From fc86345edb144289995dd404ac000c25ec1faa26 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 24 Jun 2025 14:38:34 +0800 Subject: [PATCH 01/17] feat: add pd chunk draft --- lightllm/server/api_cli.py | 6 +++ .../httpserver_for_pd_master/manager.py | 40 ++++++++++++++----- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py index e9943b05f..293958f5b 100644 --- a/lightllm/server/api_cli.py +++ b/lightllm/server/api_cli.py @@ -434,4 +434,10 @@ def make_argument_parser() -> argparse.ArgumentParser: but ensure that the model is compatible with the specified step count. currently, deepseekv3 model only support 1 step""", ) + parser.add_argument( + "--pd_chunk_size", + type=int, + default=0, + help="""Specifies the chunk size for pd mode.""", + ) return parser diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index 05b2d987c..e33ec8a8a 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -237,19 +237,37 @@ async def fetch_stream( raise ServerBusyError() sampling_params.move_kv_to_decode_node.initialize(None) - sampling_params.max_new_tokens = old_max_new_tokens - 1 sampling_params.suggested_dp_index = up_status_event.upkv_status.dp_index - await d_node.websocket.send_bytes(pickle.dumps((ObjType.REQ, (prompt_ids, sampling_params, multimodal_params)))) + remaining_tokens = old_max_new_tokens - 1 + pd_chunk_size = self.args.pd_chunk_size + current_prompt_ids = list(prompt_ids) - while True: - await req_status.wait_to_ready() - if await request.is_disconnected(): - raise Exception(f"req_id {group_request_id} disconnected") - if await req_status.can_read(self.req_id_to_out_inf): - token_list = await req_status.pop_all_tokens() - for sub_req_id, request_output, metadata, finish_status in token_list: - yield sub_req_id, request_output, metadata, finish_status + while remaining_tokens > 0: + chunk_size = min(remaining_tokens, pd_chunk_size) if pd_chunk_size > 0 else remaining_tokens + sampling_params.max_new_tokens = chunk_size + await d_node.websocket.send_bytes( + pickle.dumps((ObjType.REQ, (current_prompt_ids, sampling_params, multimodal_params))) + ) + + chunk_finished = False + while not chunk_finished: + await req_status.wait_to_ready() + if await request.is_disconnected(): + raise Exception(f"req_id {group_request_id} disconnected") + + if await req_status.can_read(self.req_id_to_out_inf): + token_list = await req_status.pop_all_tokens() + for sub_req_id, request_output, metadata, finish_status in token_list: + current_prompt_ids.append(metadata.get("id")) + remaining_tokens -= 1 + + if finish_status.is_finished() or remaining_tokens == 0: + chunk_finished = True + if remaining_tokens == 0: + finish_status = FinishStatus(FinishStatus.FINISHED_LENGTH) + + yield sub_req_id, request_output, metadata, finish_status return @@ -269,6 +287,8 @@ async def _wait_to_token_package( unfinished_count = sampling_params.best_of is_first_token = True + max_new_tokens = sampling_params.max_new_tokens + async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( p_node, d_node, prompt, sampling_params, multimodal_params, request ): From 57ade254bbba5e32dce4d6c2ae25a5e1fd308608 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 24 Jun 2025 14:47:45 +0800 Subject: [PATCH 02/17] fix: finish status fix --- .../server/httpserver_for_pd_master/manager.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index e33ec8a8a..85ac5d084 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -262,12 +262,21 @@ async def fetch_stream( current_prompt_ids.append(metadata.get("id")) remaining_tokens -= 1 - if finish_status.is_finished() or remaining_tokens == 0: - chunk_finished = True + final_finish_status = finish_status + + # reach max new tokens, really finished if remaining_tokens == 0: - finish_status = FinishStatus(FinishStatus.FINISHED_LENGTH) + final_finish_status = FinishStatus(FinishStatus.FINISHED_LENGTH) + chunk_finished = True + # reach chunk size, not really finished + elif final_finish_status.is_finished(): + final_finish_status = FinishStatus(FinishStatus.NO_FINISH) + chunk_finished = True + + yield sub_req_id, request_output, metadata, final_finish_status - yield sub_req_id, request_output, metadata, finish_status + if final_finish_status.is_finished(): + break return From e30587bed42eebc43d58cb34d7fa95700388e796 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 24 Jun 2025 16:08:19 +0800 Subject: [PATCH 03/17] feat: add test sh --- server_d.sh | 13 +++++++++++++ server_master.sh | 5 +++++ server_p.sh | 14 ++++++++++++++ test.sh | 10 ++++++++++ 4 files changed, 42 insertions(+) create mode 100644 server_d.sh create mode 100644 server_master.sh create mode 100644 server_p.sh create mode 100644 test.sh diff --git a/server_d.sh b/server_d.sh new file mode 100644 index 000000000..1cb6d8c18 --- /dev/null +++ b/server_d.sh @@ -0,0 +1,13 @@ +CUDA_VISIBLE_DEVICES=1 KV_TRANS_USE_P2P=1 LOADWORKER=10 python3 -m lightllm.server.api_server \ + --model_dir /mtc/DeepSeek-V3 \ + --run_mode "decode" \ + --host 10.120.178.74 \ + --port 8118 \ + --nccl_port 12322 \ + --tp 1 \ + --max_total_token_num 400000 \ + --graph_max_len_in_batch 2048 \ + --graph_max_batch_size 16 \ + --tokenizer_mode fast \ + --pd_master_ip 10.120.178.74 \ + --pd_master_port 60011 \ No newline at end of file diff --git a/server_master.sh b/server_master.sh new file mode 100644 index 000000000..9e64de8c8 --- /dev/null +++ b/server_master.sh @@ -0,0 +1,5 @@ +python3 -m lightllm.server.api_server \ + --model_dir /mtc/DeepSeek-V3 \ + --run_mode "pd_master" \ + --host 10.120.178.74 \ + --port 60011 \ No newline at end of file diff --git a/server_p.sh b/server_p.sh new file mode 100644 index 000000000..3eec8f680 --- /dev/null +++ b/server_p.sh @@ -0,0 +1,14 @@ +CUDA_VISIBLE_DEVICES=0 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ + --model_dir /mtc/DeepSeek-V3 \ + --run_mode "prefill" \ + --host 10.120.178.74 \ + --port 8017 \ + --tp 1 \ + --nccl_port 2732 \ + --max_total_token_num 400000 \ + --tokenizer_mode fast \ + --pd_master_ip 10.120.178.74 \ + --pd_master_port 60011 \ + --max_req_total_len 16000 \ + --running_max_req_size 128 \ + --disable_cudagraph \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100644 index 000000000..d072e5112 --- /dev/null +++ b/test.sh @@ -0,0 +1,10 @@ +python3 test/benchmark_client.py \ + --url http://10.120.178.74:60011/generate \ + --num_clients 100 \ + --tokenizer_path /mtc/DeepSeek-V3 \ + --input_num 2000 \ + --input_len 1024 \ + --output_len 16384 \ + --server_api lightllm \ + --dump_file result.json \ + --seed 42 \ No newline at end of file From a11c926e7d93a63bb8ff9fd86cba82c508d91133 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 24 Jun 2025 16:23:32 +0800 Subject: [PATCH 04/17] fix oom --- server_d.sh | 2 +- server_p.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server_d.sh b/server_d.sh index 1cb6d8c18..39551beed 100644 --- a/server_d.sh +++ b/server_d.sh @@ -1,4 +1,4 @@ -CUDA_VISIBLE_DEVICES=1 KV_TRANS_USE_P2P=1 LOADWORKER=10 python3 -m lightllm.server.api_server \ +CUDA_VISIBLE_DEVICES=2,3 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ --model_dir /mtc/DeepSeek-V3 \ --run_mode "decode" \ --host 10.120.178.74 \ diff --git a/server_p.sh b/server_p.sh index 3eec8f680..82850a6fc 100644 --- a/server_p.sh +++ b/server_p.sh @@ -1,4 +1,4 @@ -CUDA_VISIBLE_DEVICES=0 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ +CUDA_VISIBLE_DEVICES=0,1 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ --model_dir /mtc/DeepSeek-V3 \ --run_mode "prefill" \ --host 10.120.178.74 \ From f37f3ce27cf33d82e0b07180d380a5a5505cf665 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 24 Jun 2025 16:37:01 +0800 Subject: [PATCH 05/17] fix: change model --- server_d.sh | 2 +- server_master.sh | 5 +++-- server_p.sh | 2 +- test.sh | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/server_d.sh b/server_d.sh index 39551beed..7966b2cbd 100644 --- a/server_d.sh +++ b/server_d.sh @@ -1,5 +1,5 @@ CUDA_VISIBLE_DEVICES=2,3 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ - --model_dir /mtc/DeepSeek-V3 \ + --model_dir /mtc/wufeiyang/Qwen2.5-72B-Instruct \ --run_mode "decode" \ --host 10.120.178.74 \ --port 8118 \ diff --git a/server_master.sh b/server_master.sh index 9e64de8c8..d1bacb78a 100644 --- a/server_master.sh +++ b/server_master.sh @@ -1,5 +1,6 @@ python3 -m lightllm.server.api_server \ - --model_dir /mtc/DeepSeek-V3 \ + --model_dir /mtc/wufeiyang/Qwen2.5-72B-Instruct \ --run_mode "pd_master" \ --host 10.120.178.74 \ - --port 60011 \ No newline at end of file + --port 60011 \ + --pd_chunk_size 4096 \ No newline at end of file diff --git a/server_p.sh b/server_p.sh index 82850a6fc..4c4754521 100644 --- a/server_p.sh +++ b/server_p.sh @@ -1,5 +1,5 @@ CUDA_VISIBLE_DEVICES=0,1 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ - --model_dir /mtc/DeepSeek-V3 \ + --model_dir /mtc/wufeiyang/Qwen2.5-72B-Instruct \ --run_mode "prefill" \ --host 10.120.178.74 \ --port 8017 \ diff --git a/test.sh b/test.sh index d072e5112..31dd7d8d6 100644 --- a/test.sh +++ b/test.sh @@ -1,7 +1,7 @@ python3 test/benchmark_client.py \ --url http://10.120.178.74:60011/generate \ --num_clients 100 \ - --tokenizer_path /mtc/DeepSeek-V3 \ + --tokenizer_path /mtc/wufeiyang/Qwen2.5-72B-Instruct \ --input_num 2000 \ --input_len 1024 \ --output_len 16384 \ From 99bf811b7ebd08fc396bf939584a094e17412fd7 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 24 Jun 2025 19:57:20 +0800 Subject: [PATCH 06/17] fix: fix local test --- .gitignore | 4 ++++ server_d.sh | 10 +++++----- server_master.sh | 4 ++-- server_p.sh | 10 +++++----- test.sh | 8 +++----- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 6049c2cdb..1f0c31d19 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,7 @@ dist .idea .vscode tmp/ + +*.log +*.sh +*.json \ No newline at end of file diff --git a/server_d.sh b/server_d.sh index 7966b2cbd..9c408e104 100644 --- a/server_d.sh +++ b/server_d.sh @@ -1,13 +1,13 @@ -CUDA_VISIBLE_DEVICES=2,3 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ - --model_dir /mtc/wufeiyang/Qwen2.5-72B-Instruct \ +CUDA_VISIBLE_DEVICES=1 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ + --model_dir /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ --run_mode "decode" \ - --host 10.120.178.74 \ + --host 127.0.1.1 \ --port 8118 \ --nccl_port 12322 \ --tp 1 \ - --max_total_token_num 400000 \ + --max_total_token_num 16392 \ --graph_max_len_in_batch 2048 \ --graph_max_batch_size 16 \ --tokenizer_mode fast \ - --pd_master_ip 10.120.178.74 \ + --pd_master_ip 127.0.1.1 \ --pd_master_port 60011 \ No newline at end of file diff --git a/server_master.sh b/server_master.sh index d1bacb78a..1c2dacdea 100644 --- a/server_master.sh +++ b/server_master.sh @@ -1,6 +1,6 @@ python3 -m lightllm.server.api_server \ - --model_dir /mtc/wufeiyang/Qwen2.5-72B-Instruct \ + --model_dir /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ --run_mode "pd_master" \ - --host 10.120.178.74 \ + --host 127.0.1.1 \ --port 60011 \ --pd_chunk_size 4096 \ No newline at end of file diff --git a/server_p.sh b/server_p.sh index 4c4754521..f10c06f52 100644 --- a/server_p.sh +++ b/server_p.sh @@ -1,13 +1,13 @@ -CUDA_VISIBLE_DEVICES=0,1 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ - --model_dir /mtc/wufeiyang/Qwen2.5-72B-Instruct \ +CUDA_VISIBLE_DEVICES=0 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ + --model_dir /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ --run_mode "prefill" \ - --host 10.120.178.74 \ + --host 127.0.1.1 \ --port 8017 \ --tp 1 \ --nccl_port 2732 \ - --max_total_token_num 400000 \ + --max_total_token_num 16392 \ --tokenizer_mode fast \ - --pd_master_ip 10.120.178.74 \ + --pd_master_ip 127.0.1.1 \ --pd_master_port 60011 \ --max_req_total_len 16000 \ --running_max_req_size 128 \ diff --git a/test.sh b/test.sh index 31dd7d8d6..c16b3cc64 100644 --- a/test.sh +++ b/test.sh @@ -1,9 +1,7 @@ python3 test/benchmark_client.py \ - --url http://10.120.178.74:60011/generate \ - --num_clients 100 \ - --tokenizer_path /mtc/wufeiyang/Qwen2.5-72B-Instruct \ - --input_num 2000 \ - --input_len 1024 \ + --url http://127.0.1.1:60011/generate \ + --num_clients 1 \ + --tokenizer_path /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ --output_len 16384 \ --server_api lightllm \ --dump_file result.json \ From 255767559c6dd51b0b38bd6c07a4baabe6305f51 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Wed, 25 Jun 2025 11:18:42 +0800 Subject: [PATCH 07/17] fix: add eos stop support --- lightllm/server/httpserver_for_pd_master/manager.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index 85ac5d084..295b52d5a 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -268,8 +268,12 @@ async def fetch_stream( if remaining_tokens == 0: final_finish_status = FinishStatus(FinishStatus.FINISHED_LENGTH) chunk_finished = True + # reach stop token, really finished + elif finish_status == FinishStatus.FINISHED_STOP: + final_finish_status = FinishStatus(FinishStatus.FINISHED_STOP) + chunk_finished = True # reach chunk size, not really finished - elif final_finish_status.is_finished(): + elif finish_status == FinishStatus.FINISHED_LENGTH: final_finish_status = FinishStatus(FinishStatus.NO_FINISH) chunk_finished = True From d1c7396b606498e014f30434fde4faf220e0e461 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Wed, 25 Jun 2025 11:20:28 +0800 Subject: [PATCH 08/17] feat: remove unnecessary files --- .gitignore | 6 +- pd_d.log | 2282 + pd_d_4096.log | 196230 ++++++++++++++++++++++++++++++++++++++++++ pd_master.log | 4529 + pd_master_4096.log | 15896 ++++ pd_p.log | 1512 + pd_p_4096.log | 2843 + server_d.sh | 13 - server_master.sh | 6 - server_p.sh | 14 - test.sh | 8 - 11 files changed, 223293 insertions(+), 46 deletions(-) create mode 100644 pd_d.log create mode 100644 pd_d_4096.log create mode 100644 pd_master.log create mode 100644 pd_master_4096.log create mode 100644 pd_p.log create mode 100644 pd_p_4096.log delete mode 100644 server_d.sh delete mode 100644 server_master.sh delete mode 100644 server_p.sh delete mode 100644 test.sh diff --git a/.gitignore b/.gitignore index 1f0c31d19..d07ab8183 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,4 @@ dist *.egg-info .idea .vscode -tmp/ - -*.log -*.sh -*.json \ No newline at end of file +tmp/ \ No newline at end of file diff --git a/pd_d.log b/pd_d.log new file mode 100644 index 000000000..2268f088d --- /dev/null +++ b/pd_d.log @@ -0,0 +1,2282 @@ +INFO 06-24 21:55:51 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:55:52 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:55:53 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:55:55 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:55:55 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:55:55 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:55:55 [api_start.py:79] zmq mode head: ipc:///tmp/_12322_0_ +INFO 06-24 21:55:55 [api_start.py:81] use tgi api: False +INFO 06-24 21:55:55 [api_start.py:192] alloced ports: [10011, 10239, 10144, 10176, 10271, 10117, 10125, 10205, 10126] +INFO 06-24 21:55:55 [api_start.py:233] all start args:Namespace(run_mode='decode', host='127.0.1.1', port=8118, httpserver_workers=1, zmq_mode='ipc:///tmp/_12322_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=12322, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=0, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=16, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=2048, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10011, detokenization_port=10239, detokenization_pub_port=10144, visual_port=10176, audio_port=10271, cache_port=10117, metric_port=10125, pd_node_infer_rpyc_ports=[10126], pd_node_id=148730891575017957868136796871489876076, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) +INFO 06-24 21:55:57 [start_utils.py:37] init func start_metric_manager : init ok +INFO 06-24 21:55:59 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:00 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:00 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:00 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:01 [__init__.py:239] Automatically detected platform cuda. +INFO 06-24 21:56:02 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:56:03 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:03 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:03 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:03 [manager.py:41] pub_to_httpserver sendhwm 1000 +INFO 06-24 21:56:04 [shm_req_manager.py:59] create lock shm 12322_0_req_shm_total +INFO 06-24 21:56:04 [atomic_array_lock.py:29] create lock shm 12322_0_array_reqs_lock +INFO 06-24 21:56:04 [atomic_lock.py:26] create lock shm 12322_0_shm_reqs_manager_lock +WARNING 06-24 21:56:04 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:04 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:04 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:04 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total +INFO 06-24 21:56:04 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock +INFO 06-24 21:56:04 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock +INFO 06-24 21:56:04 [shared_arr.py:17] create shm 12322_0_mem_manger_can_use_token_num_0 +INFO 06-24 21:56:04 [shared_arr.py:17] create shm 12322_0_shared_token_load +INFO 06-24 21:56:04 [shared_arr.py:17] create shm 12322_0_shared_token_load_ext_infos +INFO 06-24 21:56:04 [model_rpc.py:70] Initialized RPC server for rank 0. +INFO 06-24 21:56:04 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total +INFO 06-24 21:56:04 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock +INFO 06-24 21:56:04 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock +INFO 06-24 21:56:04 [model_rpc.py:184] use ContinuesBatchBackendForDecodeNode +INFO 06-24 21:56:06 [shared_arr.py:20] link shm 12322_0_shared_token_load +INFO 06-24 21:56:06 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos +INFO 06-24 21:56:06 [shared_arr.py:17] create shm 12322_0_dp_rank_0_lock_tp_infos +INFO 06-24 21:56:06 [basemodel.py:134] Initial quantization. The default quantization method is none +INFO 06-24 21:56:06 [mem_utils.py:11] mode setting params: [] +INFO 06-24 21:56:06 [mem_utils.py:25] Model kv cache using mode normal +INFO 06-24 21:56:06 [shared_arr.py:20] link shm 12322_0_mem_manger_can_use_token_num_0 +INFO 06-24 21:56:17 [cuda_graph.py:45] cuda graph batch_sizes: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] +INFO 06-24 21:56:17 [cuda_graph.py:187] Begin capture cudagraph, use the --disable_cudagraph to disable it. +INFO 06-24 21:56:17 [cache_tensor_manager.py:75] pid 1413335 cuda graph alloc graph out mem (16, 152064) torch.float32 2433024 2433024 +INFO 06-24 21:56:17 [cache_tensor_manager.py:77] cuda graph managed_total_tensor_bytes: 9732096 +INFO 06-24 21:56:22 [cuda_graph.py:232] Capture cudagraph success, batch_size <=16 and max_len_in_batch <= 2048 will infer with cudagraph. +INFO 06-24 21:56:22 [basemodel.py:652] begin check max_len infer +INFO 06-24 21:56:23 [basemodel.py:680] check max_len 8448 infer ok +INFO 06-24 21:56:23 [shared_arr.py:17] create shm 12322_0_refed_tokens_num_0 +INFO 06-24 21:56:23 [shared_arr.py:17] create shm 12322_0_tree_total_tokens_num_0 +INFO 06-24 21:56:23 [base_backend.py:135] loaded model class +INFO 06-24 21:56:23 [decode_impl.py:36] lock_nccl_group ranks 0 +INFO 06-24 21:56:23 [shared_arr.py:20] link shm 12322_0_refed_tokens_num_0 +INFO 06-24 21:56:23 [shared_arr.py:20] link shm 12322_0_tree_total_tokens_num_0 +INFO 06-24 21:56:23 [manager.py:196] use req queue QueueForPDDecode +INFO 06-24 21:56:25 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:26 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:27 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:56:29 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:29 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:29 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:30 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 21:56:30 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 21:56:30 [decode_kv_move_manager.py:59] rpyc connect to port: 10126 ok +INFO 06-24 21:56:30 [up_status.py:122] up_kv_status_process start +INFO 06-24 21:56:30 [decode_trans_process.py:145] decode trans kv process for device: 0 start! +INFO 06-24 21:56:31 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:32 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:32 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:32 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:34 [__init__.py:239] Automatically detected platform cuda. +INFO 06-24 21:56:34 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:56:35 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:35 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:35 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:36 [up_status.py:112] up kv manager start ok +INFO 06-24 21:56:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 21:56:36 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:36 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:36 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:36 [decode_infer_rpyc.py:171] put mem manager to info_queues ok +INFO 06-24 21:56:36 [decode_kv_move_manager.py:388] decode kv move manager process started +INFO 06-24 21:56:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:56:36 [start_utils.py:37] init func start_router_process : init ok +INFO 06-24 21:56:36 [start_utils.py:37] init func start_detokenization_process : init ok +INFO 06-24 21:56:36 [api_start.py:57] start process pid 1412821 +INFO 06-24 21:56:36 [api_start.py:58] http server pid 1415861 +INFO 06-24 21:56:39 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:56:40 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:41 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:56:43 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:43 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:43 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:43 [api_http.py:326] server start up +INFO 06-24 21:56:43 [atomic_array_lock.py:32] link lock shm 12322_0_lightllm_resource_lock +INFO 06-24 21:56:43 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total +INFO 06-24 21:56:43 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock +INFO 06-24 21:56:43 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock +INFO 06-24 21:56:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:56:43 [atomic_lock.py:29] link lock shm 12322_0_req_id_gen_lock +INFO 06-24 21:56:43 [shared_arr.py:20] link shm 12322_0_latest_success_infer_time_mark +INFO 06-24 21:56:43 [shared_arr.py:20] link shm 12322_0_shared_token_load +INFO 06-24 21:56:43 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos +INFO 06-24 21:56:43 [api_http.py:330] server start up ok, loop use is +INFO 06-24 21:56:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:56:44 [pd_loop.py:92] Sent registration JSON: {'node_id': 148730891575017957868136796871489876076, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10011, 'detokenization_port': 10239, 'detokenization_pub_port': 10144, 'visual_port': 10176, 'audio_port': 10271, 'cache_port': 10117, 'metric_port': 10125, 'pd_node_infer_rpyc_ports': [10126], 'pd_node_id': 148730891575017957868136796871489876076, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 21:56:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:56:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:56:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:56:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:57:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:57:13 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 21:57:13 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 21:57:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:18 [rpyc_fix_utils.py:107] set nodelay mode +INFO 06-24 21:57:18 [rpyc_fix_utils.py:113] change socket buffer from 2626560 131072 change to 4194304 +INFO 06-24 21:57:18 [decode_kv_move_manager.py:225] build trans infos 287595743282619216970276961428881885738 127.0.1.1 20000 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:18 [decode_trans_process.py:57] connect start PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=0, prefill_id=287595743282619216970276961428881885738, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') +INFO 06-24 21:57:18 [decode_trans_process.py:67] connect src_id 287595743282619216970276961428881885738 dest_id 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:18 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 +INFO 06-24 21:57:18 [pynccl.py:180] LightLLM is using nccl==2.21.5 +INFO 06-24 21:57:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:19 [decode_trans_process.py:85] PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=0, prefill_id=287595743282619216970276961428881885738, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') kv trans connected +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 8 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 16 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 24 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 8 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 16392 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 24 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 8 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 24 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 32 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 40 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 48 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 56 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 64 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 80 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 80 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +INFO 06-24 21:57:19 [decode_trans_obj.py:118] kv_move_loop get task id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_trans_process.py:34] trans start: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 72 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 80 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 88 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 88 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 88 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 96 request_data_transfer fail, server is busy +INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 104 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 112 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 120 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 120 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 120 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 128 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_trans_process.py:43] trans finished: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc move len: 1055 +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 128 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 128 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 136 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_trans_process.py:45] trans cost time: 1.5387728214263916, id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_trans_obj.py:95] _transfer_kv ok id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 144 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 152 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:20 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 16 cost_time 0.02942347526550293 s +INFO 06-24 21:57:20 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 16 finished +INFO 06-24 21:57:20 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=16, dp_index=0, pd_master_node_id=0) +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:20 lightllm_req_id:16 +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 160 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 168 request_data_transfer fail, server is busy +INFO 06-24 21:57:20 [manager.py:224] router recive req id 16 cost time 0.08874297142028809 s +DEBUG 06-24 21:57:20 [manager.py:391] Prefill Batch: batch_id=287305880176870206574426824661545321596, time:1750773440.8170793s req_ids:[16] +DEBUG 06-24 21:57:20 [manager.py:391] +DEBUG 06-24 21:57:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 13.839 tokens/s +DEBUG 06-24 21:57:20 [stats.py:37] Avg prompt tokens throughput: 13.839 tokens/s +DEBUG 06-24 21:57:20 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 16 cost time 0.14529085159301758 s +INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 176 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 176 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15307 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15307 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 176 request_data_transfer fail, server is busy +INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 184 request_data_transfer fail, server is busy +INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15306 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15306 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 192 request_data_transfer fail, server is busy +INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 200 request_data_transfer fail, server is busy +INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15306 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 208 request_data_transfer fail, server is busy +INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15305 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15305 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 216 request_data_transfer fail, server is busy +INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 224 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 token used ratio: 0.06710590531966813 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 token used ratio: 0.06710590531966813 contain prompt cache tree unrefed token +INFO 06-24 21:57:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 232 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 232 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 232 request_data_transfer fail, server is busy +INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 240 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 240 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 240 request_data_transfer fail, server is busy +INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 256 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 256 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 248 request_data_transfer fail, server is busy +INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 256 request_data_transfer fail, server is busy +INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 264 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 272 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 264 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15263 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 272 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15263 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 264 request_data_transfer fail, server is busy +INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 272 request_data_transfer fail, server is busy +INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 280 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 280 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15263 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 280 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 token used ratio: 0.0701561737432894 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 token used ratio: 0.0701561737432894 contain prompt cache tree unrefed token +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 288 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 288 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 288 request_data_transfer fail, server is busy +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 296 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 304 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 296 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 304 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 296 request_data_transfer fail, server is busy +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 304 request_data_transfer fail, server is busy +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 320 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 320 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 312 request_data_transfer fail, server is busy +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 320 request_data_transfer fail, server is busy +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15214 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15214 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 328 request_data_transfer fail, server is busy +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 336 request_data_transfer fail, server is busy +INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15214 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 344 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 token used ratio: 0.07320644216691069 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 token used ratio: 0.07320644216691069 contain prompt cache tree unrefed token +INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 352 request_data_transfer fail, server is busy +INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 360 request_data_transfer fail, server is busy +INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 368 request_data_transfer fail, server is busy +INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 376 request_data_transfer fail, server is busy +INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 384 request_data_transfer fail, server is busy +INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 400 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15172 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 400 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15172 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 392 request_data_transfer fail, server is busy +INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 400 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.07625671059053196 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.07625671059053196 contain prompt cache tree unrefed token +INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 408 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 408 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15131 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 408 request_data_transfer fail, server is busy +INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 416 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 424 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 416 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15131 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 424 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15131 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 416 request_data_transfer fail, server is busy +INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 424 request_data_transfer fail, server is busy +INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 432 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 440 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 432 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 440 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 432 request_data_transfer fail, server is busy +INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 440 request_data_transfer fail, server is busy +INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 456 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 456 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 448 request_data_transfer fail, server is busy +INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 456 request_data_transfer fail, server is busy +INFO 06-24 21:57:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 token used ratio: 0.07930697901415325 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 token used ratio: 0.07930697901415325 contain prompt cache tree unrefed token +INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 464 request_data_transfer fail, server is busy +INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 472 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 488 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 472 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 488 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 472 request_data_transfer fail, server is busy +INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 480 request_data_transfer fail, server is busy +INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 488 request_data_transfer fail, server is busy +INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15088 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15088 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 496 request_data_transfer fail, server is busy +INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 504 request_data_transfer fail, server is busy +INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 512 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 512 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15088 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 512 request_data_transfer fail, server is busy +INFO 06-24 21:57:27 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 520 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:27 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 528 in_len:1045 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 token used ratio: 0.08235724743777452 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 token used ratio: 0.08235724743777452 contain prompt cache tree unrefed token +INFO 06-24 21:57:27 [decode_infer_rpyc.py:75] req_id: id: 520 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:27 [decode_infer_rpyc.py:75] req_id: id: 528 in_len:1045 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:27 [decode_kv_move_manager.py:273] req id 520 request_data_transfer fail, server is busy +INFO 06-24 21:57:27 [decode_kv_move_manager.py:273] req id 528 request_data_transfer fail, server is busy +INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 536 request_data_transfer fail, server is busy +INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 544 request_data_transfer fail, server is busy +INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 552 request_data_transfer fail, server is busy +INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 560 request_data_transfer fail, server is busy +INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 568 request_data_transfer fail, server is busy +INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15040 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 576 request_data_transfer fail, server is busy +INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 584 request_data_transfer fail, server is busy +INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 592 request_data_transfer fail, server is busy +INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 600 request_data_transfer fail, server is busy +INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 608 request_data_transfer fail, server is busy +INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 616 request_data_transfer fail, server is busy +INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14998 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14998 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 624 request_data_transfer fail, server is busy +INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 632 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 token used ratio: 0.0854075158613958 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 token used ratio: 0.0854075158613958 contain prompt cache tree unrefed token +INFO 06-24 21:57:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14958 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 640 request_data_transfer fail, server is busy +INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14958 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 648 request_data_transfer fail, server is busy +INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14957 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 656 request_data_transfer fail, server is busy +INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 664 in_len:1043 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 664 in_len:1043 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14956 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 664 request_data_transfer fail, server is busy +INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 672 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 672 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14956 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 672 request_data_transfer fail, server is busy +INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14956 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 680 request_data_transfer fail, server is busy +INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14955 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 688 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 token used ratio: 0.08845778428501708 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 token used ratio: 0.08845778428501708 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 41.301 tokens/s +DEBUG 06-24 21:57:30 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:57:30 [stats.py:37] Avg generate tokens throughput: 41.301 tokens/s +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14910 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 696 request_data_transfer fail, server is busy +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 704 request_data_transfer fail, server is busy +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 712 request_data_transfer fail, server is busy +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 720 request_data_transfer fail, server is busy +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 728 request_data_transfer fail, server is busy +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14908 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14908 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 736 request_data_transfer fail, server is busy +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 744 request_data_transfer fail, server is busy +INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 752 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 752 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14908 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 752 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 token used ratio: 0.09150805270863836 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 token used ratio: 0.09150805270863836 contain prompt cache tree unrefed token +INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 760 request_data_transfer fail, server is busy +INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 768 request_data_transfer fail, server is busy +INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 776 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 784 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 776 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 784 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 776 request_data_transfer fail, server is busy +INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 784 request_data_transfer fail, server is busy +INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14871 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14871 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] +INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 792 request_data_transfer fail, server is busy +INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 800 request_data_transfer fail, server is busy +DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 token used ratio: 0.09455832113225963 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 token used ratio: 0.09455832113225963 contain prompt cache tree unrefed token +INFO 06-24 21:57:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 token used ratio: 0.09760858955588092 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 token used ratio: 0.09760858955588092 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 token used ratio: 0.10065885797950219 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 token used ratio: 0.10065885797950219 contain prompt cache tree unrefed token +INFO 06-24 21:57:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:57:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 token used ratio: 0.10370912640312348 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 token used ratio: 0.10370912640312348 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 token used ratio: 0.10675939482674475 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 token used ratio: 0.10675939482674475 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 token used ratio: 0.10980966325036604 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 token used ratio: 0.10980966325036604 contain prompt cache tree unrefed token +INFO 06-24 21:57:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 token used ratio: 0.11285993167398731 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 token used ratio: 0.11285993167398731 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 40.475 tokens/s +DEBUG 06-24 21:57:40 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:57:40 [stats.py:37] Avg generate tokens throughput: 40.475 tokens/s +DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 token used ratio: 0.11591020009760859 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 token used ratio: 0.11591020009760859 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 token used ratio: 0.11896046852122986 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 token used ratio: 0.11896046852122986 contain prompt cache tree unrefed token +INFO 06-24 21:57:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:57:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:57:43 [statics_utils.py:24] mean first cost: 290.2035713195801 ms +INFO 06-24 21:57:43 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 token used ratio: 0.12201073694485115 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 token used ratio: 0.12201073694485115 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 token used ratio: 0.12506100536847242 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 token used ratio: 0.12506100536847242 contain prompt cache tree unrefed token +INFO 06-24 21:57:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 token used ratio: 0.1281112737920937 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 token used ratio: 0.1281112737920937 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 token used ratio: 0.131161542215715 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 token used ratio: 0.131161542215715 contain prompt cache tree unrefed token +INFO 06-24 21:57:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 29.473 tokens/s +DEBUG 06-24 21:57:50 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:57:50 [stats.py:37] Avg generate tokens throughput: 29.473 tokens/s +DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 token used ratio: 0.13421181063933627 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 token used ratio: 0.13421181063933627 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 token used ratio: 0.13726207906295754 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 token used ratio: 0.13726207906295754 contain prompt cache tree unrefed token +INFO 06-24 21:57:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 token used ratio: 0.1403123474865788 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 token used ratio: 0.1403123474865788 contain prompt cache tree unrefed token +INFO 06-24 21:57:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 token used ratio: 0.1433626159102001 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 token used ratio: 0.1433626159102001 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 token used ratio: 0.14641288433382138 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 token used ratio: 0.14641288433382138 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.256 tokens/s +DEBUG 06-24 21:58:00 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:58:00 [stats.py:37] Avg generate tokens throughput: 26.256 tokens/s +INFO 06-24 21:58:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 token used ratio: 0.14946315275744265 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 token used ratio: 0.14946315275744265 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 token used ratio: 0.15251342118106392 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 token used ratio: 0.15251342118106392 contain prompt cache tree unrefed token +INFO 06-24 21:58:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 token used ratio: 0.15556368960468522 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 token used ratio: 0.15556368960468522 contain prompt cache tree unrefed token +INFO 06-24 21:58:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 token used ratio: 0.1586139580283065 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 token used ratio: 0.1586139580283065 contain prompt cache tree unrefed token +INFO 06-24 21:58:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 token used ratio: 0.16166422645192777 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 token used ratio: 0.16166422645192777 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.558 tokens/s +DEBUG 06-24 21:58:10 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:58:10 [stats.py:37] Avg generate tokens throughput: 25.558 tokens/s +DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 token used ratio: 0.16471449487554904 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 token used ratio: 0.16471449487554904 contain prompt cache tree unrefed token +INFO 06-24 21:58:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 token used ratio: 0.16776476329917034 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 token used ratio: 0.16776476329917034 contain prompt cache tree unrefed token +INFO 06-24 21:58:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:58:13 [statics_utils.py:24] mean first cost: 290.2035713195801 ms +INFO 06-24 21:58:13 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 21:58:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 token used ratio: 0.1708150317227916 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 token used ratio: 0.1708150317227916 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 token used ratio: 0.17386530014641288 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 token used ratio: 0.17386530014641288 contain prompt cache tree unrefed token +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 808 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 808 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13513 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] +INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 808 request_data_transfer fail, server is busy +INFO 06-24 21:58:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 816 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 816 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13497 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] +INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13497 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] +INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 816 request_data_transfer fail, server is busy +INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 824 request_data_transfer fail, server is busy +INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 832 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 832 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13497 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] +INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 832 request_data_transfer fail, server is busy +INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type +INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13496 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 +DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] +INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 840 request_data_transfer fail, server is busy +DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 token used ratio: 0.17691556857003415 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 token used ratio: 0.17691556857003415 contain prompt cache tree unrefed token +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:58:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.113 tokens/s +DEBUG 06-24 21:58:20 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:58:20 [stats.py:37] Avg generate tokens throughput: 26.113 tokens/s +DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 token used ratio: 0.17996583699365545 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 token used ratio: 0.17996583699365545 contain prompt cache tree unrefed token +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +INFO 06-24 21:58:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 token used ratio: 0.18301610541727673 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 token used ratio: 0.18301610541727673 contain prompt cache tree unrefed token +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 token used ratio: 0.186066373840898 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 token used ratio: 0.186066373840898 contain prompt cache tree unrefed token +INFO 06-24 21:58:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 token used ratio: 0.18911664226451927 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 token used ratio: 0.18911664226451927 contain prompt cache tree unrefed token +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 token used ratio: 0.19216691068814057 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 token used ratio: 0.19216691068814057 contain prompt cache tree unrefed token +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +INFO 06-24 21:58:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:58:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 24.824 tokens/s +DEBUG 06-24 21:58:30 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:58:30 [stats.py:37] Avg generate tokens throughput: 24.824 tokens/s +DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 token used ratio: 0.19521717911176184 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 token used ratio: 0.19521717911176184 contain prompt cache tree unrefed token +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +INFO 06-24 21:58:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 token used ratio: 0.1982674475353831 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 token used ratio: 0.1982674475353831 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 token used ratio: 0.20131771595900438 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 token used ratio: 0.20131771595900438 contain prompt cache tree unrefed token +INFO 06-24 21:58:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:58:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 token used ratio: 0.20436798438262568 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 token used ratio: 0.20436798438262568 contain prompt cache tree unrefed token +INFO 06-24 21:58:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 token used ratio: 0.20741825280624696 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 token used ratio: 0.20741825280624696 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 22.839 tokens/s +DEBUG 06-24 21:58:40 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:58:40 [stats.py:37] Avg generate tokens throughput: 22.839 tokens/s +DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 token used ratio: 0.21046852122986823 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 token used ratio: 0.21046852122986823 contain prompt cache tree unrefed token +INFO 06-24 21:58:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 token used ratio: 0.2135187896534895 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 token used ratio: 0.2135187896534895 contain prompt cache tree unrefed token +INFO 06-24 21:58:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:58:44 [statics_utils.py:24] mean first cost: 290.2035713195801 ms +INFO 06-24 21:58:44 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 21:58:44 [manager.py:620] left req id 16can release False refcount 4 +DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 token used ratio: 0.2165690580771108 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 token used ratio: 0.2165690580771108 contain prompt cache tree unrefed token +INFO 06-24 21:58:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 token used ratio: 0.21961932650073207 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 token used ratio: 0.21961932650073207 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 token used ratio: 0.22266959492435334 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 token used ratio: 0.22266959492435334 contain prompt cache tree unrefed token +INFO 06-24 21:58:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.730 tokens/s +DEBUG 06-24 21:58:51 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:58:51 [stats.py:37] Avg generate tokens throughput: 26.730 tokens/s +DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 token used ratio: 0.22571986334797461 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 token used ratio: 0.22571986334797461 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 token used ratio: 0.2287701317715959 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 token used ratio: 0.2287701317715959 contain prompt cache tree unrefed token +INFO 06-24 21:58:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 token used ratio: 0.23182040019521719 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 token used ratio: 0.23182040019521719 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 token used ratio: 0.23487066861883846 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 token used ratio: 0.23487066861883846 contain prompt cache tree unrefed token +INFO 06-24 21:58:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 token used ratio: 0.23792093704245973 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 token used ratio: 0.23792093704245973 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 token used ratio: 0.24097120546608103 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 token used ratio: 0.24097120546608103 contain prompt cache tree unrefed token +INFO 06-24 21:59:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.807 tokens/s +DEBUG 06-24 21:59:01 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:59:01 [stats.py:37] Avg generate tokens throughput: 26.807 tokens/s +DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 token used ratio: 0.2440214738897023 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 token used ratio: 0.2440214738897023 contain prompt cache tree unrefed token +INFO 06-24 21:59:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 token used ratio: 0.24707174231332357 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 token used ratio: 0.24707174231332357 contain prompt cache tree unrefed token +INFO 06-24 21:59:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 token used ratio: 0.25012201073694484 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 token used ratio: 0.25012201073694484 contain prompt cache tree unrefed token +INFO 06-24 21:59:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 token used ratio: 0.2531722791605661 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 token used ratio: 0.2531722791605661 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 token used ratio: 0.2562225475841874 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 token used ratio: 0.2562225475841874 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.359 tokens/s +DEBUG 06-24 21:59:11 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:59:11 [stats.py:37] Avg generate tokens throughput: 25.359 tokens/s +INFO 06-24 21:59:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 token used ratio: 0.25927281600780866 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 token used ratio: 0.25927281600780866 contain prompt cache tree unrefed token +INFO 06-24 21:59:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:59:14 [statics_utils.py:24] mean first cost: 290.2035713195801 ms +INFO 06-24 21:59:14 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 token used ratio: 0.26232308443143 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 token used ratio: 0.26232308443143 contain prompt cache tree unrefed token +INFO 06-24 21:59:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 token used ratio: 0.26537335285505126 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 token used ratio: 0.26537335285505126 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 token used ratio: 0.26842362127867253 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 token used ratio: 0.26842362127867253 contain prompt cache tree unrefed token +INFO 06-24 21:59:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 token used ratio: 0.2714738897022938 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 token used ratio: 0.2714738897022938 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.152 tokens/s +DEBUG 06-24 21:59:21 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:59:21 [stats.py:37] Avg generate tokens throughput: 26.152 tokens/s +INFO 06-24 21:59:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 token used ratio: 0.2745241581259151 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 token used ratio: 0.2745241581259151 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 token used ratio: 0.27757442654953635 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 token used ratio: 0.27757442654953635 contain prompt cache tree unrefed token +INFO 06-24 21:59:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 token used ratio: 0.2806246949731576 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 token used ratio: 0.2806246949731576 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 token used ratio: 0.2836749633967789 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 token used ratio: 0.2836749633967789 contain prompt cache tree unrefed token +INFO 06-24 21:59:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 token used ratio: 0.2867252318204002 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 token used ratio: 0.2867252318204002 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.161 tokens/s +DEBUG 06-24 21:59:31 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:59:31 [stats.py:37] Avg generate tokens throughput: 25.161 tokens/s +DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 token used ratio: 0.2897755002440215 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 token used ratio: 0.2897755002440215 contain prompt cache tree unrefed token +INFO 06-24 21:59:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 token used ratio: 0.29282576866764276 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 token used ratio: 0.29282576866764276 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 token used ratio: 0.29587603709126403 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 token used ratio: 0.29587603709126403 contain prompt cache tree unrefed token +INFO 06-24 21:59:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:59:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 token used ratio: 0.2989263055148853 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 token used ratio: 0.2989263055148853 contain prompt cache tree unrefed token +INFO 06-24 21:59:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 token used ratio: 0.3019765739385066 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 token used ratio: 0.3019765739385066 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.809 tokens/s +DEBUG 06-24 21:59:41 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:59:41 [stats.py:37] Avg generate tokens throughput: 25.809 tokens/s +DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 token used ratio: 0.30502684236212785 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 token used ratio: 0.30502684236212785 contain prompt cache tree unrefed token +INFO 06-24 21:59:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 token used ratio: 0.3080771107857491 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 token used ratio: 0.3080771107857491 contain prompt cache tree unrefed token +INFO 06-24 21:59:44 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:59:44 [statics_utils.py:24] mean first cost: 290.2035713195801 ms +INFO 06-24 21:59:44 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 token used ratio: 0.31112737920937045 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 token used ratio: 0.31112737920937045 contain prompt cache tree unrefed token +INFO 06-24 21:59:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 token used ratio: 0.3141776476329917 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 token used ratio: 0.3141776476329917 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 token used ratio: 0.317227916056613 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 token used ratio: 0.317227916056613 contain prompt cache tree unrefed token +INFO 06-24 21:59:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 token used ratio: 0.32027818448023426 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 token used ratio: 0.32027818448023426 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.841 tokens/s +DEBUG 06-24 21:59:51 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 21:59:51 [stats.py:37] Avg generate tokens throughput: 26.841 tokens/s +DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 token used ratio: 0.32332845290385553 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 token used ratio: 0.32332845290385553 contain prompt cache tree unrefed token +INFO 06-24 21:59:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 token used ratio: 0.3263787213274768 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 token used ratio: 0.3263787213274768 contain prompt cache tree unrefed token +DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 token used ratio: 0.3294289897510981 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 token used ratio: 0.3294289897510981 contain prompt cache tree unrefed token +INFO 06-24 21:59:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 token used ratio: 0.33247925817471935 not contain prompt cache tree unrefed token +DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 token used ratio: 0.33247925817471935 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 token used ratio: 0.3355295265983407 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 token used ratio: 0.3355295265983407 contain prompt cache tree unrefed token +INFO 06-24 22:00:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 27.086 tokens/s +DEBUG 06-24 22:00:01 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:00:01 [stats.py:37] Avg generate tokens throughput: 27.086 tokens/s +DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 token used ratio: 0.33857979502196195 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 token used ratio: 0.33857979502196195 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 token used ratio: 0.3416300634455832 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 token used ratio: 0.3416300634455832 contain prompt cache tree unrefed token +INFO 06-24 22:00:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 token used ratio: 0.3446803318692045 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 token used ratio: 0.3446803318692045 contain prompt cache tree unrefed token +INFO 06-24 22:00:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:00:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 token used ratio: 0.34773060029282576 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 token used ratio: 0.34773060029282576 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 token used ratio: 0.35078086871644704 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 token used ratio: 0.35078086871644704 contain prompt cache tree unrefed token +INFO 06-24 22:00:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 token used ratio: 0.3538311371400683 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 token used ratio: 0.3538311371400683 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 27.118 tokens/s +DEBUG 06-24 22:00:11 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:00:11 [stats.py:37] Avg generate tokens throughput: 27.118 tokens/s +DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 token used ratio: 0.3568814055636896 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 token used ratio: 0.3568814055636896 contain prompt cache tree unrefed token +INFO 06-24 22:00:14 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:00:14 [statics_utils.py:24] mean first cost: 290.2035713195801 ms +INFO 06-24 22:00:14 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 22:00:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 token used ratio: 0.3599316739873109 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 token used ratio: 0.3599316739873109 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 token used ratio: 0.3629819424109322 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 token used ratio: 0.3629819424109322 contain prompt cache tree unrefed token +INFO 06-24 22:00:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 token used ratio: 0.36603221083455345 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 token used ratio: 0.36603221083455345 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 token used ratio: 0.3690824792581747 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 token used ratio: 0.3690824792581747 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.744 tokens/s +DEBUG 06-24 22:00:21 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:00:21 [stats.py:37] Avg generate tokens throughput: 26.744 tokens/s +INFO 06-24 22:00:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 token used ratio: 0.372132747681796 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 token used ratio: 0.372132747681796 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 token used ratio: 0.37518301610541727 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 token used ratio: 0.37518301610541727 contain prompt cache tree unrefed token +INFO 06-24 22:00:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 token used ratio: 0.37823328452903854 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 token used ratio: 0.37823328452903854 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 token used ratio: 0.3812835529526598 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 token used ratio: 0.3812835529526598 contain prompt cache tree unrefed token +INFO 06-24 22:00:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 token used ratio: 0.38433382137628114 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 token used ratio: 0.38433382137628114 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.338 tokens/s +DEBUG 06-24 22:00:31 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:00:31 [stats.py:37] Avg generate tokens throughput: 26.338 tokens/s +INFO 06-24 22:00:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 token used ratio: 0.3873840897999024 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 token used ratio: 0.3873840897999024 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 token used ratio: 0.3904343582235237 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 token used ratio: 0.3904343582235237 contain prompt cache tree unrefed token +INFO 06-24 22:00:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 token used ratio: 0.39348462664714495 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 token used ratio: 0.39348462664714495 contain prompt cache tree unrefed token +INFO 06-24 22:00:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 token used ratio: 0.3965348950707662 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 token used ratio: 0.3965348950707662 contain prompt cache tree unrefed token +INFO 06-24 22:00:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 token used ratio: 0.3995851634943875 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 token used ratio: 0.3995851634943875 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.030 tokens/s +DEBUG 06-24 22:00:41 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:00:41 [stats.py:37] Avg generate tokens throughput: 26.030 tokens/s +DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 token used ratio: 0.40263543191800877 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 token used ratio: 0.40263543191800877 contain prompt cache tree unrefed token +INFO 06-24 22:00:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 token used ratio: 0.40568570034163004 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 token used ratio: 0.40568570034163004 contain prompt cache tree unrefed token +INFO 06-24 22:00:44 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:00:44 [manager.py:620] left req id 16can release False refcount 4 +DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 token used ratio: 0.40873596876525137 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 token used ratio: 0.40873596876525137 contain prompt cache tree unrefed token +INFO 06-24 22:00:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 token used ratio: 0.41178623718887264 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 token used ratio: 0.41178623718887264 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 token used ratio: 0.4148365056124939 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 token used ratio: 0.4148365056124939 contain prompt cache tree unrefed token +INFO 06-24 22:00:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 token used ratio: 0.4178867740361152 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 token used ratio: 0.4178867740361152 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.794 tokens/s +DEBUG 06-24 22:00:51 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:00:51 [stats.py:37] Avg generate tokens throughput: 26.794 tokens/s +DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 token used ratio: 0.42093704245973645 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 token used ratio: 0.42093704245973645 contain prompt cache tree unrefed token +INFO 06-24 22:00:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 token used ratio: 0.4239873108833577 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 token used ratio: 0.4239873108833577 contain prompt cache tree unrefed token +DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 token used ratio: 0.427037579306979 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 token used ratio: 0.427037579306979 contain prompt cache tree unrefed token +INFO 06-24 22:00:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 token used ratio: 0.43008784773060027 not contain prompt cache tree unrefed token +DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 token used ratio: 0.43008784773060027 contain prompt cache tree unrefed token +INFO 06-24 22:00:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 token used ratio: 0.4331381161542216 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 token used ratio: 0.4331381161542216 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.958 tokens/s +DEBUG 06-24 22:01:01 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:01:01 [stats.py:37] Avg generate tokens throughput: 26.958 tokens/s +DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 token used ratio: 0.43618838457784287 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 token used ratio: 0.43618838457784287 contain prompt cache tree unrefed token +INFO 06-24 22:01:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 token used ratio: 0.43923865300146414 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 token used ratio: 0.43923865300146414 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 token used ratio: 0.4422889214250854 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 token used ratio: 0.4422889214250854 contain prompt cache tree unrefed token +INFO 06-24 22:01:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:01:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 token used ratio: 0.4453391898487067 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 token used ratio: 0.4453391898487067 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 token used ratio: 0.44838945827232796 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 token used ratio: 0.44838945827232796 contain prompt cache tree unrefed token +INFO 06-24 22:01:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 token used ratio: 0.45143972669594923 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 token used ratio: 0.45143972669594923 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.708 tokens/s +DEBUG 06-24 22:01:11 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:01:11 [stats.py:37] Avg generate tokens throughput: 26.708 tokens/s +DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 token used ratio: 0.4544899951195705 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 token used ratio: 0.4544899951195705 contain prompt cache tree unrefed token +INFO 06-24 22:01:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 22:01:14 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:01:14 [statics_utils.py:24] mean first cost: 290.2035713195801 ms +INFO 06-24 22:01:14 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 token used ratio: 0.4575402635431918 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 token used ratio: 0.4575402635431918 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 token used ratio: 0.4605905319668131 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 token used ratio: 0.4605905319668131 contain prompt cache tree unrefed token +INFO 06-24 22:01:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 token used ratio: 0.46364080039043437 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 token used ratio: 0.46364080039043437 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 token used ratio: 0.46669106881405564 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 token used ratio: 0.46669106881405564 contain prompt cache tree unrefed token +INFO 06-24 22:01:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.467 tokens/s +DEBUG 06-24 22:01:21 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:01:21 [stats.py:37] Avg generate tokens throughput: 26.467 tokens/s +DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 token used ratio: 0.4697413372376769 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 token used ratio: 0.4697413372376769 contain prompt cache tree unrefed token +INFO 06-24 22:01:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 token used ratio: 0.4727916056612982 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 token used ratio: 0.4727916056612982 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 token used ratio: 0.47584187408491946 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 token used ratio: 0.47584187408491946 contain prompt cache tree unrefed token +INFO 06-24 22:01:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 token used ratio: 0.47889214250854073 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 token used ratio: 0.47889214250854073 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 token used ratio: 0.48194241093216206 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 token used ratio: 0.48194241093216206 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.121 tokens/s +DEBUG 06-24 22:01:31 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:01:31 [stats.py:37] Avg generate tokens throughput: 26.121 tokens/s +INFO 06-24 22:01:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 token used ratio: 0.48499267935578333 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 token used ratio: 0.48499267935578333 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 token used ratio: 0.4880429477794046 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 token used ratio: 0.4880429477794046 contain prompt cache tree unrefed token +INFO 06-24 22:01:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 token used ratio: 0.4910932162030259 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 token used ratio: 0.4910932162030259 contain prompt cache tree unrefed token +INFO 06-24 22:01:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 token used ratio: 0.49414348462664714 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 token used ratio: 0.49414348462664714 contain prompt cache tree unrefed token +INFO 06-24 22:01:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +ERROR 06-24 22:01:39 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 22:01:39 [pd_loop.py:121] no close frame received or sent +ERROR 06-24 22:01:39 [pd_loop.py:121] ConnectionResetError: [Errno 104] Connection reset by peer +ERROR 06-24 22:01:39 [pd_loop.py:121] +ERROR 06-24 22:01:39 [pd_loop.py:121] The above exception was the direct cause of the following exception: +ERROR 06-24 22:01:39 [pd_loop.py:121] +ERROR 06-24 22:01:39 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task +ERROR 06-24 22:01:39 [pd_loop.py:121] recv_bytes = await websocket.recv() +ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv +ERROR 06-24 22:01:39 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc +ERROR 06-24 22:01:39 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent +DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 token used ratio: 0.4971937530502684 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 token used ratio: 0.4971937530502684 contain prompt cache tree unrefed token +DEBUG 06-24 22:01:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.706 tokens/s +DEBUG 06-24 22:01:41 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 22:01:41 [stats.py:37] Avg generate tokens throughput: 25.706 tokens/s +INFO 06-24 22:01:41 [decode_kv_move_manager.py:206] connect id 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc disconnect +ERROR 06-24 22:01:41 [decode_trans_obj.py:180] put_to_radix_loop thread quit, info: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 +ERROR 06-24 22:01:41 [decode_trans_obj.py:136] kv_move_loop thread quit +ERROR 06-24 22:01:41 [decode_trans_obj.py:226] trans obj del start, info: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 +ERROR 06-24 22:01:41 [decode_trans_obj.py:249] trans obj deled, info: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 +INFO 06-24 22:01:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 +DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 token used ratio: 0.5002440214738897 not contain prompt cache tree unrefed token +DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 token used ratio: 0.5002440214738897 contain prompt cache tree unrefed token +INFO 06-24 22:01:42 [decode_trans_process.py:123] destory PDTransLeaveInfo(decode_id=148730891575017957868136796871489876076, prefill_id=287595743282619216970276961428881885738, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') nccl communicator. +INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +ERROR 06-24 22:01:43 [manager.py:487] Router Caught exception: {'message': 'Task exception was never retrieved', 'exception': KeyboardInterrupt(), 'future': exception=KeyboardInterrupt()>} +ERROR 06-24 22:01:43 [manager.py:487] NoneType: None +ERROR 06-24 22:01:43 [decode_kv_move_manager.py:301] +Traceback (most recent call last): + File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/decode_node_impl/decode_kv_move_manager.py", line 299, in timer_loop + time.sleep(3.5) +KeyboardInterrupt + +INFO 06-24 22:01:43 [start_utils.py:106] Killing child process 1415871 +INFO 06-24 22:01:43 [start_utils.py:106] Killing child process 1416220 +INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 +INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413171 +INFO 06-24 22:01:43 [start_utils.py:51] Killing child process 1414844 +INFO 06-24 22:01:43 [start_utils.py:51] Killing child process 1415305 +INFO 06-24 22:01:43 [start_utils.py:51] Killing child process 1415308 +INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413335 +INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 +INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413335 +INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 +INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413335 +INFO 06-24 22:01:43 [start_utils.py:69] All processes terminated gracefully. +INFO 06-24 22:01:43 [api_start.py:30] All processes have been forcefully terminated. +INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 +INFO 06-24 22:01:43 [start_utils.py:69] All processes terminated gracefully. +INFO 06-24 22:01:43 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_d_4096.log b/pd_d_4096.log new file mode 100644 index 000000000..ea644a687 --- /dev/null +++ b/pd_d_4096.log @@ -0,0 +1,196230 @@ +INFO 06-24 19:54:01 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:02 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:03 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:04 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:04 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:04 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:05 [api_start.py:79] zmq mode head: ipc:///tmp/_12322_0_ +INFO 06-24 19:54:05 [api_start.py:81] use tgi api: False +INFO 06-24 19:54:05 [api_start.py:192] alloced ports: [10135, 10143, 10207, 10165, 10059, 10114, 10051, 10001, 10236] +INFO 06-24 19:54:05 [api_start.py:233] all start args:Namespace(run_mode='decode', host='127.0.1.1', port=8118, httpserver_workers=1, zmq_mode='ipc:///tmp/_12322_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=12322, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=0, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=16, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=2048, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10135, detokenization_port=10143, detokenization_pub_port=10207, visual_port=10165, audio_port=10059, cache_port=10114, metric_port=10051, pd_node_infer_rpyc_ports=[10236], pd_node_id=147275795944234129756100418482494441380, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) +INFO 06-24 19:54:06 [start_utils.py:37] init func start_metric_manager : init ok +INFO 06-24 19:54:08 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:09 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:09 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:10 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:11 [__init__.py:239] Automatically detected platform cuda. +INFO 06-24 19:54:11 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:12 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:12 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:12 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:13 [shm_req_manager.py:59] create lock shm 12322_0_req_shm_total +INFO 06-24 19:54:13 [atomic_array_lock.py:29] create lock shm 12322_0_array_reqs_lock +INFO 06-24 19:54:13 [atomic_lock.py:26] create lock shm 12322_0_shm_reqs_manager_lock +INFO 06-24 19:54:13 [shared_arr.py:17] create shm 12322_0_mem_manger_can_use_token_num_0 +INFO 06-24 19:54:13 [shared_arr.py:17] create shm 12322_0_shared_token_load +INFO 06-24 19:54:13 [shared_arr.py:17] create shm 12322_0_shared_token_load_ext_infos +INFO 06-24 19:54:13 [model_rpc.py:70] Initialized RPC server for rank 0. +INFO 06-24 19:54:13 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total +INFO 06-24 19:54:13 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock +INFO 06-24 19:54:13 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock +INFO 06-24 19:54:13 [model_rpc.py:184] use ContinuesBatchBackendForDecodeNode +WARNING 06-24 19:54:13 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:13 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:13 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:14 [manager.py:41] pub_to_httpserver sendhwm 1000 +INFO 06-24 19:54:14 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total +INFO 06-24 19:54:14 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock +INFO 06-24 19:54:14 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock +INFO 06-24 19:54:15 [shared_arr.py:20] link shm 12322_0_shared_token_load +INFO 06-24 19:54:15 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos +INFO 06-24 19:54:15 [shared_arr.py:17] create shm 12322_0_dp_rank_0_lock_tp_infos +INFO 06-24 19:54:15 [basemodel.py:134] Initial quantization. The default quantization method is none +INFO 06-24 19:54:15 [mem_utils.py:11] mode setting params: [] +INFO 06-24 19:54:15 [mem_utils.py:25] Model kv cache using mode normal +INFO 06-24 19:54:15 [shared_arr.py:20] link shm 12322_0_mem_manger_can_use_token_num_0 +INFO 06-24 19:54:26 [cuda_graph.py:45] cuda graph batch_sizes: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] +INFO 06-24 19:54:26 [cuda_graph.py:187] Begin capture cudagraph, use the --disable_cudagraph to disable it. +INFO 06-24 19:54:26 [cache_tensor_manager.py:75] pid 1212190 cuda graph alloc graph out mem (16, 152064) torch.float32 2433024 2433024 +INFO 06-24 19:54:26 [cache_tensor_manager.py:77] cuda graph managed_total_tensor_bytes: 9732096 +INFO 06-24 19:54:31 [cuda_graph.py:232] Capture cudagraph success, batch_size <=16 and max_len_in_batch <= 2048 will infer with cudagraph. +INFO 06-24 19:54:31 [basemodel.py:652] begin check max_len infer +INFO 06-24 19:54:32 [basemodel.py:680] check max_len 8448 infer ok +INFO 06-24 19:54:32 [shared_arr.py:17] create shm 12322_0_refed_tokens_num_0 +INFO 06-24 19:54:32 [shared_arr.py:17] create shm 12322_0_tree_total_tokens_num_0 +INFO 06-24 19:54:32 [base_backend.py:135] loaded model class +INFO 06-24 19:54:32 [decode_impl.py:36] lock_nccl_group ranks 0 +INFO 06-24 19:54:32 [shared_arr.py:20] link shm 12322_0_refed_tokens_num_0 +INFO 06-24 19:54:32 [shared_arr.py:20] link shm 12322_0_tree_total_tokens_num_0 +INFO 06-24 19:54:32 [manager.py:196] use req queue QueueForPDDecode +INFO 06-24 19:54:34 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:35 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:36 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:38 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:38 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:38 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:38 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 19:54:38 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 19:54:38 [decode_kv_move_manager.py:59] rpyc connect to port: 10236 ok +INFO 06-24 19:54:38 [up_status.py:122] up_kv_status_process start +INFO 06-24 19:54:38 [decode_trans_process.py:145] decode trans kv process for device: 0 start! +INFO 06-24 19:54:40 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:40 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:41 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:41 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:43 [__init__.py:239] Automatically detected platform cuda. +INFO 06-24 19:54:43 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:44 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:44 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:44 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:45 [decode_infer_rpyc.py:171] put mem manager to info_queues ok +INFO 06-24 19:54:45 [decode_kv_move_manager.py:388] decode kv move manager process started +INFO 06-24 19:54:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:54:45 [start_utils.py:37] init func start_router_process : init ok +INFO 06-24 19:54:45 [start_utils.py:37] init func start_detokenization_process : init ok +INFO 06-24 19:54:45 [api_start.py:57] start process pid 1211578 +INFO 06-24 19:54:45 [api_start.py:58] http server pid 1214339 +WARNING 06-24 19:54:45 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:45 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:45 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:46 [up_status.py:112] up kv manager start ok +INFO 06-24 19:54:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:54:48 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:54:49 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:50 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:52 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:52 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:52 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:54:52 [api_http.py:326] server start up +INFO 06-24 19:54:52 [atomic_array_lock.py:32] link lock shm 12322_0_lightllm_resource_lock +INFO 06-24 19:54:52 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total +INFO 06-24 19:54:52 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock +INFO 06-24 19:54:52 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock +INFO 06-24 19:54:53 [atomic_lock.py:29] link lock shm 12322_0_req_id_gen_lock +INFO 06-24 19:54:53 [shared_arr.py:20] link shm 12322_0_latest_success_infer_time_mark +INFO 06-24 19:54:53 [shared_arr.py:20] link shm 12322_0_shared_token_load +INFO 06-24 19:54:53 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos +INFO 06-24 19:54:53 [api_http.py:330] server start up ok, loop use is +INFO 06-24 19:54:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:54:53 [pd_loop.py:92] Sent registration JSON: {'node_id': 147275795944234129756100418482494441380, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10135, 'detokenization_port': 10143, 'detokenization_pub_port': 10207, 'visual_port': 10165, 'audio_port': 10059, 'cache_port': 10114, 'metric_port': 10051, 'pd_node_infer_rpyc_ports': [10236], 'pd_node_id': 147275795944234129756100418482494441380, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 19:54:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:54:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:55:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:55:23 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:55:23 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:55:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:55:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 19:55:51 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:55:51 [manager.py:283] +DEBUG 06-24 19:55:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:55:51 [manager.py:284] +INFO 06-24 19:55:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:55:53 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:55:53 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:55:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:55:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:56:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:56:23 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:56:23 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:56:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:56:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 19:56:51 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:56:51 [manager.py:283] +DEBUG 06-24 19:56:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:56:51 [manager.py:284] +INFO 06-24 19:56:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:56:53 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:56:53 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:56:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:56:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:57:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:57:23 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:57:23 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:57:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:57:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 19:57:52 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:57:52 [manager.py:283] +DEBUG 06-24 19:57:52 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:57:52 [manager.py:284] +INFO 06-24 19:57:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:57:53 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:57:53 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:57:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:57:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:58:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:58:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:58:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:58:53 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:58:53 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 19:58:53 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:58:53 [manager.py:283] +DEBUG 06-24 19:58:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:58:53 [manager.py:284] +INFO 06-24 19:58:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:58:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:59:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:59:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:40 [rpyc_fix_utils.py:107] set nodelay mode +INFO 06-24 19:59:40 [rpyc_fix_utils.py:113] change socket buffer from 2626560 131072 change to 4194304 +INFO 06-24 19:59:40 [decode_kv_move_manager.py:225] build trans infos 163479035537597727162519172725806046247 127.0.1.1 20000 f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:40 [decode_trans_process.py:57] connect start PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=0, prefill_id=163479035537597727162519172725806046247, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') +INFO 06-24 19:59:40 [decode_trans_process.py:67] connect src_id 163479035537597727162519172725806046247 dest_id f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:40 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 +INFO 06-24 19:59:40 [pynccl.py:180] LightLLM is using nccl==2.21.5 +INFO 06-24 19:59:41 [decode_trans_process.py:85] PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=0, prefill_id=163479035537597727162519172725806046247, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') kv trans connected +INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 8 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 16 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 16 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 16 request_data_transfer fail, server is busy +INFO 06-24 19:59:41 [decode_trans_obj.py:118] kv_move_loop get task id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:41 [decode_trans_process.py:34] trans start: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 24 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 24 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 24 request_data_transfer fail, server is busy +INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 32 request_data_transfer fail, server is busy +INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 40 request_data_transfer fail, server is busy +INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 48 request_data_transfer fail, server is busy +INFO 06-24 19:59:42 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:42 [decode_infer_rpyc.py:75] req_id: id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:42 [decode_kv_move_manager.py:273] req id 56 request_data_transfer fail, server is busy +INFO 06-24 19:59:42 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:42 [decode_infer_rpyc.py:75] req_id: id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:42 [decode_kv_move_manager.py:273] req id 64 request_data_transfer fail, server is busy +INFO 06-24 19:59:42 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:42 [decode_infer_rpyc.py:75] req_id: id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix refed token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix hold token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] estimated peak token num 0 +DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:42 [decode_kv_move_manager.py:273] req id 72 request_data_transfer fail, server is busy +INFO 06-24 19:59:43 [decode_trans_process.py:43] trans finished: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1055 +INFO 06-24 19:59:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:43 [decode_trans_process.py:45] trans cost time: 1.5428881645202637, id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_obj.py:95] _transfer_kv ok id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 8 cost_time 0.027128219604492188 s +INFO 06-24 19:59:43 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 8 finished +INFO 06-24 19:59:43 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=8, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:8 +INFO 06-24 19:59:43 [manager.py:224] router recive req id 8 cost time 0.09608936309814453 s +DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=78859837134492230658999430496479756877, time:1750766383.4995642s req_ids:[8] +DEBUG 06-24 19:59:43 [manager.py:391] +DEBUG 06-24 19:59:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 3.198 tokens/s +DEBUG 06-24 19:59:43 [stats.py:37] Avg prompt tokens throughput: 3.198 tokens/s +DEBUG 06-24 19:59:43 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 80 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 80 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 8 cost time 0.15932035446166992 s +INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 80 request_data_transfer fail, server is busy +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 88 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 88 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 88 request_data_transfer fail, server is busy +INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 96 request_data_transfer fail, server is busy +INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 104 request_data_transfer fail, server is busy +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 120 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 112 request_data_transfer fail, server is busy +INFO 06-24 19:59:43 [decode_trans_obj.py:118] kv_move_loop get task id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_process.py:34] trans start: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 128 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_trans_process.py:43] trans finished: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 +INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 13229 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 6395 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 +DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 136 request_data_transfer fail, server is busy +INFO 06-24 19:59:43 [decode_trans_process.py:45] trans cost time: 0.018827438354492188, id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_obj.py:95] _transfer_kv ok id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_obj.py:118] kv_move_loop get task id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_process.py:34] trans start: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_process.py:43] trans finished: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 +INFO 06-24 19:59:43 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_process.py:45] trans cost time: 0.021219968795776367, id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_obj.py:95] _transfer_kv ok id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 120 cost_time 0.030753135681152344 s +INFO 06-24 19:59:43 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 120 finished +INFO 06-24 19:59:43 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=120, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:120 +INFO 06-24 19:59:43 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 128 cost_time 0.030910253524780273 s +INFO 06-24 19:59:43 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 128 finished +INFO 06-24 19:59:43 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=128, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:128 +INFO 06-24 19:59:43 [manager.py:224] router recive req id 120 cost time 0.10020303726196289 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 120 cost time 0.10201883316040039 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 128 cost time 0.12596750259399414 s +DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=197246024226914264121713941442776912679, time:1750766383.760377s req_ids:[120] +DEBUG 06-24 19:59:43 [manager.py:391] +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 128 cost time 0.1275322437286377 s +DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=83240924680699306059748328113417723632, time:1750766383.7850044s req_ids:[128] +DEBUG 06-24 19:59:43 [manager.py:391] +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 128 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:128 first_token_cost:223.16217422485352ms total_cost_time:223.18410873413086ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1050 prompt_cache_len:1049 prompt_cache_ratio:0.9990476190476191 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [batch.py:51] router release req id 128 +DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 +DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 token used ratio: 0.12981942410932162 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 token used ratio: 0.19387506100536847 contain prompt cache tree unrefed token +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 144 request_data_transfer fail, server is busy +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 152 request_data_transfer fail, server is busy +INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 160 request_data_transfer fail, server is busy +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 176 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 168 request_data_transfer fail, server is busy +INFO 06-24 19:59:44 [decode_trans_obj.py:118] kv_move_loop get task id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:44 [decode_trans_process.py:34] trans start: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 12090 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 12090 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 184 request_data_transfer fail, server is busy +INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 192 request_data_transfer fail, server is busy +INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 12090 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 200 request_data_transfer fail, server is busy +INFO 06-24 19:59:44 [decode_trans_process.py:43] trans finished: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 +INFO 06-24 19:59:44 [decode_trans_process.py:45] trans cost time: 0.0549924373626709, id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:44 [decode_trans_obj.py:95] _transfer_kv ok id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:44 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:44 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 176 cost_time 0.029801130294799805 s +INFO 06-24 19:59:44 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 176 finished +INFO 06-24 19:59:44 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=176, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:44 lightllm_req_id:176 +WARNING 06-24 19:59:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again +INFO 06-24 19:59:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 +WARNING 06-24 19:59:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again +INFO 06-24 19:59:44 [shm_array.py:30] create shm 12322_0_shm_prompts_2 +INFO 06-24 19:59:44 [manager.py:224] router recive req id 176 cost time 0.09153246879577637 s +INFO 06-24 19:59:44 [manager.py:68] detokenization recv req id 176 cost time 0.09333586692810059 s +DEBUG 06-24 19:59:44 [manager.py:391] Prefill Batch: batch_id=144644668040586024174396603513996757795, time:1750766384.942718s req_ids:[176] +DEBUG 06-24 19:59:44 [manager.py:391] +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 176 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:44 lightllm_req_id:176 first_token_cost:193.2220458984375ms total_cost_time:193.24350357055664ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1057 prompt_cache_len:1056 prompt_cache_ratio:0.9990539262062441 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [batch.py:51] router release req id 176 +DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 +DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 token used ratio: 0.13579795021961932 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 token used ratio: 0.26433626159102 contain prompt cache tree unrefed token +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 208 request_data_transfer fail, server is busy +INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 216 request_data_transfer fail, server is busy +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 232 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 232 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 224 request_data_transfer fail, server is busy +INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 232 request_data_transfer fail, server is busy +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 240 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 10934 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1064 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 248 request_data_transfer fail, server is busy +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 256 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 264 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:45 [decode_trans_obj.py:118] kv_move_loop get task id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_process.py:34] trans start: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_process.py:43] trans finished: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1063 +INFO 06-24 19:59:45 [decode_trans_process.py:45] trans cost time: 0.024710416793823242, id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_obj.py:95] _transfer_kv ok id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_obj.py:118] kv_move_loop get task id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_obj.py:118] kv_move_loop get task id: 264 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_process.py:34] trans start: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [decode_trans_process.py:43] trans finished: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 2108 +INFO 06-24 19:59:45 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 240 cost_time 0.02665853500366211 s +INFO 06-24 19:59:45 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 240 finished +INFO 06-24 19:59:45 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=240, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:240 +WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again +INFO 06-24 19:59:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 +WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again +INFO 06-24 19:59:45 [shm_array.py:30] create shm 12322_0_shm_prompts_2 +INFO 06-24 19:59:46 [decode_trans_process.py:45] trans cost time: 0.14300155639648438, id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_obj.py:95] _transfer_kv ok id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 264 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 256 cost_time 0.051763296127319336 s +INFO 06-24 19:59:46 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 256 finished +INFO 06-24 19:59:46 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 264 cost_time 0.05242276191711426 s +INFO 06-24 19:59:46 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 264 finished +INFO 06-24 19:59:46 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=256, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:46 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=264, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:256 +INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:264 +INFO 06-24 19:59:46 [manager.py:224] router recive req id 240 cost time 0.18394184112548828 s +INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 240 cost time 0.18611979484558105 s +INFO 06-24 19:59:46 [manager.py:224] router recive req id 256 cost time 0.06905341148376465 s +DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=67281202978481964711024831614577858867, time:1750766386.1856062s req_ids:[240] +DEBUG 06-24 19:59:46 [manager.py:391] +INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 256 cost time 0.07062745094299316 s +INFO 06-24 19:59:46 [manager.py:224] router recive req id 264 cost time 0.08840799331665039 s +DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=181293788626017492202555092884360541225, time:1750766386.2110493s req_ids:[256] +DEBUG 06-24 19:59:46 [manager.py:391] +INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 264 cost time 0.09010958671569824 s +DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=217589871489938957811806292531150173471, time:1750766386.237144s req_ids:[264] +DEBUG 06-24 19:59:46 [manager.py:391] +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 240 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:240 first_token_cost:288.67244720458984ms total_cost_time:288.6958122253418ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1064 prompt_cache_len:1063 prompt_cache_ratio:0.9990601503759399 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [batch.py:51] router release req id 240 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 256 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:256 first_token_cost:148.67806434631348ms total_cost_time:148.6983299255371ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1055 prompt_cache_len:1054 prompt_cache_ratio:0.9990521327014218 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:59:46 [batch.py:51] router release req id 256 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 264 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:264 first_token_cost:168.3180332183838ms total_cost_time:168.33782196044922ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:1055 prompt_cache_len:1054 prompt_cache_ratio:0.9990521327014218 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [batch.py:51] router release req id 264 +DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 +DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 token used ratio: 0.14153245485602733 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 token used ratio: 0.4637018057589068 contain prompt cache tree unrefed token +INFO 06-24 19:59:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 272 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 272 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 8749 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 272 request_data_transfer fail, server is busy +INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 280 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 288 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 288 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 7691 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 2436 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 288 request_data_transfer fail, server is busy +INFO 06-24 19:59:46 [decode_trans_obj.py:118] kv_move_loop get task id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_process.py:34] trans start: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 296 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 304 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 296 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 7689 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 2436 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:46 [decode_trans_process.py:43] trans finished: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 +INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 296 request_data_transfer fail, server is busy +INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 320 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 6638 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 3488 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 312 request_data_transfer fail, server is busy +INFO 06-24 19:59:46 [decode_trans_process.py:45] trans cost time: 0.04500317573547363, id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_obj.py:95] _transfer_kv ok id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [task_queue.py:39] queue ready_to_move_queue left size: 1 +INFO 06-24 19:59:46 [decode_trans_obj.py:118] kv_move_loop get task id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_process.py:34] trans start: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_process.py:43] trans finished: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1051 +INFO 06-24 19:59:47 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 280 cost_time 0.016895771026611328 s +INFO 06-24 19:59:47 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 280 finished +INFO 06-24 19:59:47 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=280, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:280 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_4 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_4 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_4 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_prompts_4 +INFO 06-24 19:59:47 [decode_trans_process.py:45] trans cost time: 0.07761478424072266, id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_obj.py:95] _transfer_kv ok id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_obj.py:118] kv_move_loop get task id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_process.py:34] trans start: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_process.py:43] trans finished: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1057 +INFO 06-24 19:59:47 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 304 cost_time 0.02335834503173828 s +INFO 06-24 19:59:47 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 304 finished +INFO 06-24 19:59:47 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=304, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:304 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +INFO 06-24 19:59:47 [decode_trans_process.py:45] trans cost time: 0.04166221618652344, id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_obj.py:95] _transfer_kv ok id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 320 cost_time 0.015163183212280273 s +INFO 06-24 19:59:47 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 320 finished +INFO 06-24 19:59:47 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=320, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:320 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_prompts_2 +INFO 06-24 19:59:47 [manager.py:224] router recive req id 280 cost time 0.12640666961669922 s +INFO 06-24 19:59:47 [manager.py:68] detokenization recv req id 280 cost time 0.12806081771850586 s +INFO 06-24 19:59:47 [manager.py:224] router recive req id 304 cost time 0.09123945236206055 s +DEBUG 06-24 19:59:47 [manager.py:391] Prefill Batch: batch_id=277777573062047994740171680530455957075, time:1750766387.1931908s req_ids:[280] +DEBUG 06-24 19:59:47 [manager.py:391] +INFO 06-24 19:59:47 [manager.py:68] detokenization recv req id 304 cost time 0.09308290481567383 s +INFO 06-24 19:59:47 [manager.py:224] router recive req id 320 cost time 0.08214735984802246 s +DEBUG 06-24 19:59:47 [manager.py:391] Prefill Batch: batch_id=319286642773941052764187011823303848839, time:1750766387.2187362s req_ids:[304] +DEBUG 06-24 19:59:47 [manager.py:391] +INFO 06-24 19:59:47 [manager.py:68] detokenization recv req id 320 cost time 0.08399581909179688 s +DEBUG 06-24 19:59:47 [manager.py:391] Prefill Batch: batch_id=291105873674365910161113312262977364089, time:1750766387.2456753s req_ids:[320] +DEBUG 06-24 19:59:47 [manager.py:391] +INFO 06-24 19:59:47 [manager.py:162] detoken release req id 304 +INFO 06-24 19:59:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:304 first_token_cost:171.4761257171631ms total_cost_time:171.49710655212402ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1052 prompt_cache_len:1051 prompt_cache_ratio:0.9990494296577946 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:47 [batch.py:51] router release req id 304 +INFO 06-24 19:59:47 [manager.py:162] detoken release req id 320 +INFO 06-24 19:59:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:320 first_token_cost:161.9546413421631ms total_cost_time:161.96465492248535ms,out_token_counter:1 mean_per_token_cost_time: 0.010013580322265625ms prompt_token_num:1058 prompt_cache_len:1057 prompt_cache_ratio:0.999054820415879 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:47 [batch.py:51] router release req id 320 +DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 +DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 token used ratio: 0.21296974133723767 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 token used ratio: 0.6638604197169351 contain prompt cache tree unrefed token +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5459 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 328 request_data_transfer fail, server is busy +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5459 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5459 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 336 request_data_transfer fail, server is busy +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 344 request_data_transfer fail, server is busy +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 352 request_data_transfer fail, server is busy +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 360 request_data_transfer fail, server is busy +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 368 request_data_transfer fail, server is busy +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 376 request_data_transfer fail, server is busy +INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5453 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 384 request_data_transfer fail, server is busy +DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 +DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 token used ratio: 0.22212054660810152 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 token used ratio: 0.673011224987799 contain prompt cache tree unrefed token +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 400 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 5327 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 392 request_data_transfer fail, server is busy +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 408 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 416 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_trans_obj.py:118] kv_move_loop get task id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 408 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 416 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:49 [decode_trans_process.py:34] trans start: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 408 request_data_transfer fail, server is busy +INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 416 request_data_transfer fail, server is busy +INFO 06-24 19:59:49 [decode_trans_process.py:43] trans finished: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 424 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 432 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 424 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 432 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 424 request_data_transfer fail, server is busy +INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 432 request_data_transfer fail, server is busy +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 440 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 440 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 +DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 440 request_data_transfer fail, server is busy +INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 448 request_data_transfer fail, server is busy +INFO 06-24 19:59:49 [decode_trans_process.py:45] trans cost time: 0.035636186599731445, id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:49 [decode_trans_obj.py:95] _transfer_kv ok id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:49 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:49 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 400 cost_time 0.020232439041137695 s +INFO 06-24 19:59:49 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 400 finished +INFO 06-24 19:59:49 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=400, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:49 lightllm_req_id:400 +WARNING 06-24 19:59:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again +INFO 06-24 19:59:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 +WARNING 06-24 19:59:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again +INFO 06-24 19:59:49 [shm_array.py:30] create shm 12322_0_shm_prompts_2 +INFO 06-24 19:59:49 [manager.py:224] router recive req id 400 cost time 0.1079549789428711 s +INFO 06-24 19:59:49 [manager.py:68] detokenization recv req id 400 cost time 0.10963606834411621 s +DEBUG 06-24 19:59:49 [manager.py:391] Prefill Batch: batch_id=240562744484786649800446342328616789333, time:1750766389.5182493s req_ids:[400] +DEBUG 06-24 19:59:49 [manager.py:391] +DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 token used ratio: 0.29685212298682284 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 token used ratio: 0.7477428013665203 contain prompt cache tree unrefed token +INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 456 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3069 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 464 request_data_transfer fail, server is busy +INFO 06-24 19:59:50 [decode_trans_obj.py:118] kv_move_loop get task id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:50 [decode_trans_process.py:34] trans start: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 472 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 472 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:50 [decode_trans_process.py:43] trans finished: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 +INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 472 request_data_transfer fail, server is busy +INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 480 request_data_transfer fail, server is busy +INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 488 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 488 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 488 request_data_transfer fail, server is busy +INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 496 request_data_transfer fail, server is busy +INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 504 request_data_transfer fail, server is busy +INFO 06-24 19:59:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:50 [decode_trans_process.py:45] trans cost time: 0.03783893585205078, id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:50 [decode_trans_obj.py:95] _transfer_kv ok id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:50 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:50 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 456 cost_time 0.01764392852783203 s +INFO 06-24 19:59:50 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 456 finished +INFO 06-24 19:59:50 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=456, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:456 +WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 19:59:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 19:59:50 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +INFO 06-24 19:59:50 [manager.py:224] router recive req id 456 cost time 0.07205057144165039 s +INFO 06-24 19:59:50 [manager.py:68] detokenization recv req id 456 cost time 0.07368993759155273 s +DEBUG 06-24 19:59:50 [manager.py:391] Prefill Batch: batch_id=336204780369009327632129103991276574369, time:1750766390.4856179s req_ids:[456] +DEBUG 06-24 19:59:50 [manager.py:391] +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 456 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:456 first_token_cost:176.88751220703125ms total_cost_time:176.9094467163086ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1063 prompt_cache_len:1062 prompt_cache_ratio:0.9990592662276576 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [batch.py:51] router release req id 456 +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 512 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 512 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 2896 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 512 request_data_transfer fail, server is busy +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 520 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 528 in_len:1045 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 520 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 2892 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 520 request_data_transfer fail, server is busy +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 536 request_data_transfer fail, server is busy +INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 544 request_data_transfer fail, server is busy +INFO 06-24 19:59:51 [decode_trans_obj.py:118] kv_move_loop get task id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_trans_process.py:34] trans start: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 552 request_data_transfer fail, server is busy +INFO 06-24 19:59:51 [decode_trans_process.py:43] trans finished: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1045 +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 560 request_data_transfer fail, server is busy +INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 568 request_data_transfer fail, server is busy +INFO 06-24 19:59:51 [decode_trans_process.py:45] trans cost time: 0.023418903350830078, id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:51 [decode_trans_obj.py:95] _transfer_kv ok id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:51 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:51 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 528 cost_time 0.014069318771362305 s +INFO 06-24 19:59:51 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 528 finished +INFO 06-24 19:59:51 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=528, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:528 +WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 19:59:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 19:59:51 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 frozen token num: 1046 +DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 token used ratio: 0.372559785261103 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 token used ratio: 0.8882991703269888 contain prompt cache tree unrefed token +INFO 06-24 19:59:51 [manager.py:224] router recive req id 528 cost time 0.10735130310058594 s +INFO 06-24 19:59:51 [manager.py:68] detokenization recv req id 528 cost time 0.10909676551818848 s +DEBUG 06-24 19:59:51 [manager.py:391] Prefill Batch: batch_id=332065354239372522643271644973622948359, time:1750766391.653858s req_ids:[528] +DEBUG 06-24 19:59:51 [manager.py:391] +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 528 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:528 first_token_cost:212.17942237854004ms total_cost_time:212.20135688781738ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1046 prompt_cache_len:1045 prompt_cache_ratio:0.9990439770554493 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [batch.py:51] router release req id 528 +INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 576 request_data_transfer fail, server is busy +INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 584 request_data_transfer fail, server is busy +INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 592 request_data_transfer fail, server is busy +INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 600 request_data_transfer fail, server is busy +INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1682 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1682 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 608 request_data_transfer fail, server is busy +INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 616 request_data_transfer fail, server is busy +INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1682 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 624 request_data_transfer fail, server is busy +DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 token used ratio: 0.32076622742801364 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 token used ratio: 0.9003172279160566 contain prompt cache tree unrefed token +INFO 06-24 19:59:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:59:53 [statics_utils.py:24] mean first cost: 206.37761629544772 ms +INFO 06-24 19:59:53 [statics_utils.py:24] mean per token cost: 0.020186106363932293 ms +INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 632 request_data_transfer fail, server is busy +INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 640 request_data_transfer fail, server is busy +INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 648 request_data_transfer fail, server is busy +INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 656 request_data_transfer fail, server is busy +INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 664 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 672 in_len:1043 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +DEBUG 06-24 19:59:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 1384.057 tokens/s +DEBUG 06-24 19:59:53 [stats.py:37] Avg prompt tokens throughput: 1266.761 tokens/s +DEBUG 06-24 19:59:53 [stats.py:37] Avg generate tokens throughput: 117.296 tokens/s +INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 672 in_len:1043 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 464 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 672 request_data_transfer fail, server is busy +INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 464 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 680 request_data_transfer fail, server is busy +INFO 06-24 19:59:53 [decode_trans_obj.py:118] kv_move_loop get task id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:53 [decode_trans_process.py:34] trans start: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:53 [decode_trans_process.py:43] trans finished: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 +INFO 06-24 19:59:53 [decode_trans_process.py:45] trans cost time: 0.0374302864074707, id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:53 [decode_trans_obj.py:95] _transfer_kv ok id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:53 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:53 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 664 cost_time 0.018375873565673828 s +INFO 06-24 19:59:53 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 664 finished +INFO 06-24 19:59:53 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=664, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:664 +WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 19:59:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 19:59:53 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +INFO 06-24 19:59:53 [manager.py:224] router recive req id 664 cost time 0.07961368560791016 s +INFO 06-24 19:59:53 [manager.py:68] detokenization recv req id 664 cost time 0.08190369606018066 s +DEBUG 06-24 19:59:53 [manager.py:391] Prefill Batch: batch_id=56316389325537960521534077905631517256, time:1750766393.707428s req_ids:[664] +DEBUG 06-24 19:59:53 [manager.py:391] +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 664 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:664 first_token_cost:186.48576736450195ms total_cost_time:186.5079402923584ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1063 prompt_cache_len:1062 prompt_cache_ratio:0.9990592662276576 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [batch.py:51] router release req id 664 +INFO 06-24 19:59:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.3327232796486091 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.9771229868228404 contain prompt cache tree unrefed token +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 291 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 291 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 688 request_data_transfer fail, server is busy +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 696 request_data_transfer fail, server is busy +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 704 request_data_transfer fail, server is busy +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 712 request_data_transfer fail, server is busy +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 720 request_data_transfer fail, server is busy +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 728 request_data_transfer fail, server is busy +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 736 request_data_transfer fail, server is busy +INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 744 request_data_transfer fail, server is busy +DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 token used ratio: 0.3449243533430942 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 token used ratio: 0.9893240605173256 contain prompt cache tree unrefed token +INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 752 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 132 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 760 request_data_transfer fail, server is busy +INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 776 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:55 [decode_trans_obj.py:118] kv_move_loop get task id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:55 [decode_trans_process.py:34] trans start: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 776 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 768 request_data_transfer fail, server is busy +INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 776 request_data_transfer fail, server is busy +INFO 06-24 19:59:55 [decode_trans_process.py:43] trans finished: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1053 +INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 784 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 784 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 784 request_data_transfer fail, server is busy +INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 792 request_data_transfer fail, server is busy +INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 800 request_data_transfer fail, server is busy +INFO 06-24 19:59:55 [decode_trans_process.py:45] trans cost time: 0.03745913505554199, id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:55 [decode_trans_obj.py:95] _transfer_kv ok id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:55 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:55 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 752 cost_time 0.020582199096679688 s +INFO 06-24 19:59:55 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 752 finished +INFO 06-24 19:59:55 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=752, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:752 +WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 19:59:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 19:59:55 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +INFO 06-24 19:59:55 [manager.py:224] router recive req id 752 cost time 0.08124971389770508 s +INFO 06-24 19:59:55 [manager.py:68] detokenization recv req id 752 cost time 0.08315467834472656 s +DEBUG 06-24 19:59:55 [manager.py:391] Prefill Batch: batch_id=108854083105344818407372731453821830999, time:1750766395.8599865s req_ids:[752] +DEBUG 06-24 19:59:55 [manager.py:391] +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 752 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:752 first_token_cost:188.4760856628418ms total_cost_time:188.50088119506836ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:1054 prompt_cache_len:1053 prompt_cache_ratio:0.9990512333965844 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [batch.py:51] router release req id 752 +DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 token used ratio: 0.3568814055636896 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 token used ratio: 0.9370424597364568 contain prompt cache tree unrefed token +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 808 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 808 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 13730 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1016 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 808 request_data_transfer fail, server is busy +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 816 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 816 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 13730 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1012 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 13730 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1012 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 816 request_data_transfer fail, server is busy +INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 824 request_data_transfer fail, server is busy +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 832 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1020 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 840 request_data_transfer fail, server is busy +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 848 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 856 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 848 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1020 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 856 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1020 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 848 request_data_transfer fail, server is busy +INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 856 request_data_transfer fail, server is busy +INFO 06-24 19:59:56 [decode_trans_obj.py:118] kv_move_loop get task id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:56 [decode_trans_process.py:34] trans start: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 864 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 864 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1016 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 864 request_data_transfer fail, server is busy +INFO 06-24 19:59:56 [decode_trans_process.py:43] trans finished: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 +INFO 06-24 19:59:56 [decode_trans_process.py:45] trans cost time: 0.048836708068847656, id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:56 [decode_trans_obj.py:95] _transfer_kv ok id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:56 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:56 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 832 cost_time 0.02505183219909668 s +INFO 06-24 19:59:56 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 832 finished +INFO 06-24 19:59:56 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=832, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:56 lightllm_req_id:832 +WARNING 06-24 19:59:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 19:59:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 19:59:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 19:59:56 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +INFO 06-24 19:59:57 [manager.py:224] router recive req id 832 cost time 0.09881830215454102 s +INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 832 cost time 0.10045814514160156 s +DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=6221752173663725315526152860676858504, time:1750766397.052792s req_ids:[832] +DEBUG 06-24 19:59:57 [manager.py:391] +INFO 06-24 19:59:57 [manager.py:162] detoken release req id 832 +INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:56 lightllm_req_id:832 first_token_cost:204.74624633789062ms total_cost_time:204.76865768432617ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1057 prompt_cache_len:1056 prompt_cache_ratio:0.9990539262062441 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:57 [batch.py:51] router release req id 832 +INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 872 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:57 [decode_trans_obj.py:118] kv_move_loop get task id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:57 [decode_trans_process.py:34] trans start: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 880 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 888 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:57 [decode_infer_rpyc.py:75] req_id: id: 880 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix hold token num 12668 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager can alloc token num 953 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1050 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:57 [decode_infer_rpyc.py:75] req_id: id: 888 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix hold token num 12668 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager can alloc token num 953 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1050 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:57 [decode_trans_process.py:43] trans finished: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 +INFO 06-24 19:59:57 [decode_kv_move_manager.py:273] req id 880 request_data_transfer fail, server is busy +INFO 06-24 19:59:57 [decode_kv_move_manager.py:273] req id 888 request_data_transfer fail, server is busy +INFO 06-24 19:59:57 [decode_trans_process.py:45] trans cost time: 0.03789687156677246, id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:57 [decode_trans_obj.py:95] _transfer_kv ok id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 19:59:57 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:57 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 872 cost_time 0.03162407875061035 s +INFO 06-24 19:59:57 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 872 finished +INFO 06-24 19:59:57 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=872, dp_index=0, pd_master_node_id=0) +INFO 06-24 19:59:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:872 +WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 19:59:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 19:59:57 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +INFO 06-24 19:59:57 [manager.py:224] router recive req id 872 cost time 0.09926819801330566 s +INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 872 cost time 0.10094022750854492 s +INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 896 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:57 [decode_infer_rpyc.py:75] req_id: id: 896 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix refed token num 5269 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix hold token num 13717 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager can alloc token num 925 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1050 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:57 [decode_kv_move_manager.py:273] req id 896 request_data_transfer fail, server is busy +DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=88475449123448403812857265878733895974, time:1750766397.5221214s req_ids:[872] +DEBUG 06-24 19:59:57 [manager.py:391] +INFO 06-24 19:59:57 [manager.py:162] detoken release req id 872 +INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:872 first_token_cost:205.86729049682617ms total_cost_time:205.8889865875244ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1050 prompt_cache_len:1049 prompt_cache_ratio:0.9990476190476191 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:57 [batch.py:51] router release req id 872 +DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 token used ratio: 0.3685944363103953 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 token used ratio: 0.9480234260614934 contain prompt cache tree unrefed token +INFO 06-24 19:59:58 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 904 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 19:59:58 [decode_infer_rpyc.py:75] req_id: id: 904 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] radix refed token num 4220 +DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] radix hold token num 13718 +DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] mem manager can alloc token num 840 +DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] +INFO 06-24 19:59:58 [decode_kv_move_manager.py:273] req id 904 request_data_transfer fail, server is busy +DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 token used ratio: 0.38079551000488043 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 token used ratio: 0.9602244997559786 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 token used ratio: 0.3929965836993655 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 token used ratio: 0.9724255734504637 contain prompt cache tree unrefed token +INFO 06-24 20:00:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 token used ratio: 0.40519765739385066 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 token used ratio: 0.9846266471449487 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 token used ratio: 0.41739873108833575 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 token used ratio: 0.9968277208394338 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 576.096 tokens/s +DEBUG 06-24 20:00:03 [stats.py:37] Avg prompt tokens throughput: 421.885 tokens/s +DEBUG 06-24 20:00:03 [stats.py:37] Avg generate tokens throughput: 154.212 tokens/s +INFO 06-24 20:00:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 token used ratio: 0.4295998047828209 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 token used ratio: 0.94466813079551 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 token used ratio: 0.441800878477306 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 token used ratio: 0.9568692044899951 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 token used ratio: 0.4540019521717911 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 token used ratio: 0.9690702781844802 contain prompt cache tree unrefed token +INFO 06-24 20:00:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 token used ratio: 0.4662030258662762 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 token used ratio: 0.9812713518789653 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 token used ratio: 0.47840409956076135 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 token used ratio: 0.9934724255734505 contain prompt cache tree unrefed token +INFO 06-24 20:00:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 token used ratio: 0.49060517325524644 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 token used ratio: 0.9414958516349439 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 122.406 tokens/s +DEBUG 06-24 20:00:13 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:00:13 [stats.py:37] Avg generate tokens throughput: 122.406 tokens/s +INFO 06-24 20:00:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 token used ratio: 0.5028062469497315 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 token used ratio: 0.953696925329429 contain prompt cache tree unrefed token +INFO 06-24 20:00:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 token used ratio: 0.5150073206442167 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 token used ratio: 0.9658979990239142 contain prompt cache tree unrefed token +INFO 06-24 20:00:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 token used ratio: 0.5272083943387018 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 token used ratio: 0.9780990727183992 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 token used ratio: 0.5394094680331869 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 token used ratio: 0.9903001464128843 contain prompt cache tree unrefed token +INFO 06-24 20:00:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:00:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:00:23 [statics_utils.py:24] mean first cost: 204.02849421781653 ms +INFO 06-24 20:00:23 [statics_utils.py:24] mean per token cost: 0.0209808349609375 ms +DEBUG 06-24 20:00:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 89.890 tokens/s +DEBUG 06-24 20:00:23 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:00:23 [stats.py:37] Avg generate tokens throughput: 89.890 tokens/s +DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 token used ratio: 0.551610541727672 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 token used ratio: 0.9379575402635432 contain prompt cache tree unrefed token +INFO 06-24 20:00:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 token used ratio: 0.5638116154221572 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 token used ratio: 0.9501586139580283 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 token used ratio: 0.5760126891166423 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 token used ratio: 0.9623596876525135 contain prompt cache tree unrefed token +INFO 06-24 20:00:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 token used ratio: 0.5882137628111274 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 token used ratio: 0.9745607613469985 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 current batch size: 4 +DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 token used ratio: 0.6004148365056124 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 token used ratio: 0.9867618350414836 contain prompt cache tree unrefed token +INFO 06-24 20:00:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:00:32 [manager.py:162] detoken release req id 280 +INFO 06-24 20:00:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:280 first_token_cost:231.58621788024902ms total_cost_time:45849.95889663696ms,out_token_counter:1378 mean_per_token_cost_time: 33.104769723335785ms prompt_token_num:1059 prompt_cache_len:1058 prompt_cache_ratio:0.9990557129367328 mtp_avg_token_per_step:1.0 +INFO 06-24 20:00:32 [batch.py:51] router release req id 280 +INFO 06-24 20:00:33 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 912 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 20:00:33 [decode_infer_rpyc.py:75] req_id: id: 912 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] radix refed token num 3162 +DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] radix hold token num 11931 +DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] mem manager can alloc token num 138 +DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] +INFO 06-24 20:00:33 [decode_kv_move_manager.py:273] req id 912 request_data_transfer fail, server is busy +DEBUG 06-24 20:00:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 92.605 tokens/s +DEBUG 06-24 20:00:33 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:00:33 [stats.py:37] Avg generate tokens throughput: 92.605 tokens/s +DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 token used ratio: 0.46193265007320644 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 token used ratio: 0.9968887262079063 contain prompt cache tree unrefed token +INFO 06-24 20:00:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 token used ratio: 0.47108345534407026 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 token used ratio: 0.9411908247925818 contain prompt cache tree unrefed token +DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 token used ratio: 0.4802342606149341 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 token used ratio: 0.9503416300634456 contain prompt cache tree unrefed token +INFO 06-24 20:00:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 token used ratio: 0.48938506588579794 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 token used ratio: 0.9594924353343094 contain prompt cache tree unrefed token +WARNING 06-24 20:00:40 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:00:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 920 in_len:1045 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type +INFO 06-24 20:00:41 [decode_infer_rpyc.py:75] req_id: id: 920 in_len:1045 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed +DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] radix refed token num 3162 +DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] radix hold token num 10868 +DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 637 +DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 +DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] estimated peak token num 15243 +DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] +INFO 06-24 20:00:41 [decode_kv_move_manager.py:273] req id 920 request_data_transfer fail, server is busy +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:00:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 token used ratio: 0.4985358711566618 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 token used ratio: 0.9686432406051733 contain prompt cache tree unrefed token +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 69.053 tokens/s +DEBUG 06-24 20:00:43 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:00:43 [stats.py:37] Avg generate tokens throughput: 69.053 tokens/s +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 token used ratio: 0.5076866764275256 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 token used ratio: 0.9777940458760371 contain prompt cache tree unrefed token +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:00:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:00:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 token used ratio: 0.5168374816983895 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 token used ratio: 0.9869448511469009 contain prompt cache tree unrefed token +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:00:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 token used ratio: 0.5259882869692533 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 token used ratio: 0.9960956564177648 contain prompt cache tree unrefed token +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 token used ratio: 0.5351390922401171 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 token used ratio: 0.9414348462664714 contain prompt cache tree unrefed token +INFO 06-24 20:00:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:00:53 [statics_utils.py:24] mean first cost: 204.02849421781653 ms +INFO 06-24 20:00:53 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms +INFO 06-24 20:00:53 [manager.py:620] left req id 8can release False refcount 4 +INFO 06-24 20:00:53 [manager.py:620] left req id 120can release False refcount 4 +INFO 06-24 20:00:53 [manager.py:620] left req id 400can release False refcount 4 +INFO 06-24 20:00:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 59.813 tokens/s +DEBUG 06-24 20:00:53 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:00:53 [stats.py:37] Avg generate tokens throughput: 59.813 tokens/s +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 token used ratio: 0.544289897510981 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 token used ratio: 0.9505856515373353 contain prompt cache tree unrefed token +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:00:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 token used ratio: 0.5534407027818448 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 token used ratio: 0.9597364568081991 contain prompt cache tree unrefed token +WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:58 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 token used ratio: 0.5625915080527086 not contain prompt cache tree unrefed token +DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 token used ratio: 0.968887262079063 contain prompt cache tree unrefed token +INFO 06-24 20:01:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 token used ratio: 0.5717423133235725 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 token used ratio: 0.9780380673499268 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 68.165 tokens/s +DEBUG 06-24 20:01:03 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:01:03 [stats.py:37] Avg generate tokens throughput: 68.165 tokens/s +INFO 06-24 20:01:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 token used ratio: 0.5808931185944363 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 token used ratio: 0.9871888726207906 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 token used ratio: 0.5900439238653001 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 token used ratio: 0.9963396778916545 contain prompt cache tree unrefed token +INFO 06-24 20:01:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 token used ratio: 0.599194729136164 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 token used ratio: 0.94064177647633 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 token used ratio: 0.6083455344070278 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 token used ratio: 0.9497925817471937 contain prompt cache tree unrefed token +INFO 06-24 20:01:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 token used ratio: 0.6174963396778916 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 token used ratio: 0.9589433870180576 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 69.460 tokens/s +DEBUG 06-24 20:01:13 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:01:13 [stats.py:37] Avg generate tokens throughput: 69.460 tokens/s +INFO 06-24 20:01:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 token used ratio: 0.6266471449487555 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 token used ratio: 0.9680941922889215 contain prompt cache tree unrefed token +INFO 06-24 20:01:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 token used ratio: 0.6357979502196194 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 token used ratio: 0.9772449975597852 contain prompt cache tree unrefed token +INFO 06-24 20:01:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 token used ratio: 0.6449487554904831 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 token used ratio: 0.9863958028306491 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 token used ratio: 0.654099560761347 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 token used ratio: 0.995546608101513 contain prompt cache tree unrefed token +INFO 06-24 20:01:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 token used ratio: 0.6632503660322109 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 token used ratio: 0.9403977550024402 contain prompt cache tree unrefed token +INFO 06-24 20:01:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:01:23 [statics_utils.py:24] mean first cost: 204.02849421781653 ms +INFO 06-24 20:01:23 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms +DEBUG 06-24 20:01:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 72.525 tokens/s +DEBUG 06-24 20:01:23 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:01:23 [stats.py:37] Avg generate tokens throughput: 72.525 tokens/s +INFO 06-24 20:01:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 token used ratio: 0.6724011713030746 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 token used ratio: 0.949548560273304 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 token used ratio: 0.6815519765739385 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 token used ratio: 0.9586993655441679 contain prompt cache tree unrefed token +INFO 06-24 20:01:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 token used ratio: 0.6907027818448024 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 token used ratio: 0.9678501708150318 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 token used ratio: 0.6998535871156661 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 token used ratio: 0.9770009760858955 contain prompt cache tree unrefed token +INFO 06-24 20:01:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:01:33 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 token used ratio: 0.70900439238653 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 token used ratio: 0.9861517813567594 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 73.637 tokens/s +DEBUG 06-24 20:01:33 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:01:33 [stats.py:37] Avg generate tokens throughput: 73.637 tokens/s +DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 token used ratio: 0.7181551976573939 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 token used ratio: 0.9953025866276233 contain prompt cache tree unrefed token +INFO 06-24 20:01:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 token used ratio: 0.7273060029282576 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 token used ratio: 0.9399707174231332 contain prompt cache tree unrefed token +INFO 06-24 20:01:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 token used ratio: 0.7364568081991215 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 token used ratio: 0.9491215226939971 contain prompt cache tree unrefed token +WARNING 06-24 20:01:41 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 token used ratio: 0.7456076134699854 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 token used ratio: 0.958272327964861 contain prompt cache tree unrefed token +INFO 06-24 20:01:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 token used ratio: 0.7547584187408491 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 token used ratio: 0.9674231332357247 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 73.200 tokens/s +DEBUG 06-24 20:01:43 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:01:43 [stats.py:37] Avg generate tokens throughput: 73.200 tokens/s +DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 token used ratio: 0.763909224011713 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 token used ratio: 0.9765739385065886 contain prompt cache tree unrefed token +INFO 06-24 20:01:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:01:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 token used ratio: 0.7730600292825769 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 token used ratio: 0.9857247437774525 contain prompt cache tree unrefed token +INFO 06-24 20:01:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 token used ratio: 0.7822108345534406 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 token used ratio: 0.9948755490483162 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 token used ratio: 0.7913616398243045 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 token used ratio: 0.9399707174231332 contain prompt cache tree unrefed token +INFO 06-24 20:01:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:01:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:01:53 [statics_utils.py:24] mean first cost: 204.02849421781653 ms +INFO 06-24 20:01:53 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms +DEBUG 06-24 20:01:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 73.341 tokens/s +DEBUG 06-24 20:01:53 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:01:53 [stats.py:37] Avg generate tokens throughput: 73.341 tokens/s +DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 token used ratio: 0.8005124450951684 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 token used ratio: 0.9491215226939971 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 token used ratio: 0.8096632503660323 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 token used ratio: 0.958272327964861 contain prompt cache tree unrefed token +INFO 06-24 20:01:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 token used ratio: 0.818814055636896 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 token used ratio: 0.9674231332357247 contain prompt cache tree unrefed token +DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 token used ratio: 0.8279648609077599 not contain prompt cache tree unrefed token +DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 token used ratio: 0.9765739385065886 contain prompt cache tree unrefed token +INFO 06-24 20:02:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 token used ratio: 0.8371156661786238 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 token used ratio: 0.9857247437774525 contain prompt cache tree unrefed token +INFO 06-24 20:02:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:02:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 70.168 tokens/s +DEBUG 06-24 20:02:03 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:02:03 [stats.py:37] Avg generate tokens throughput: 70.168 tokens/s +DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 token used ratio: 0.8462664714494875 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 token used ratio: 0.9948755490483162 contain prompt cache tree unrefed token +DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 token used ratio: 0.8554172767203514 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 token used ratio: 0.9199609565641776 contain prompt cache tree unrefed token +INFO 06-24 20:02:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 token used ratio: 0.8645680819912153 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 token used ratio: 0.9291117618350415 contain prompt cache tree unrefed token +INFO 06-24 20:02:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 token used ratio: 0.873718887262079 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 token used ratio: 0.9382625671059053 contain prompt cache tree unrefed token +DEBUG 06-24 20:02:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 64.428 tokens/s +DEBUG 06-24 20:02:13 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:02:13 [stats.py:37] Avg generate tokens throughput: 64.428 tokens/s +DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 token used ratio: 0.8828696925329429 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 token used ratio: 0.9474133723767691 contain prompt cache tree unrefed token +INFO 06-24 20:02:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 token used ratio: 0.8920204978038068 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 token used ratio: 0.956564177647633 contain prompt cache tree unrefed token +INFO 06-24 20:02:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 token used ratio: 0.9011713030746705 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 token used ratio: 0.9657149829184968 contain prompt cache tree unrefed token +INFO 06-24 20:02:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 token used ratio: 0.9103221083455344 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 token used ratio: 0.9748657881893606 contain prompt cache tree unrefed token +INFO 06-24 20:02:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:02:23 [statics_utils.py:24] mean first cost: 204.02849421781653 ms +INFO 06-24 20:02:23 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms +DEBUG 06-24 20:02:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 57.594 tokens/s +DEBUG 06-24 20:02:23 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s +DEBUG 06-24 20:02:23 [stats.py:37] Avg generate tokens throughput: 57.594 tokens/s +DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 token used ratio: 0.9194729136163983 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 token used ratio: 0.9840165934602245 contain prompt cache tree unrefed token +INFO 06-24 20:02:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:8 first_token_cost:273.547887802124ms total_cost_time:163344.9625968933ms,out_token_counter:4096 mean_per_token_cost_time: 39.812357106711715ms prompt_token_num:1056 prompt_cache_len:1055 prompt_cache_ratio:0.9990530303030303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:26 lightllm_req_id:8 +WARNING 06-24 20:02:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_4 and create again +INFO 06-24 20:02:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_4 +WARNING 06-24 20:02:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_4 and create again +INFO 06-24 20:02:26 [shm_array.py:30] create shm 12322_0_shm_prompts_4 +INFO 06-24 20:02:26 [batch.py:51] router release req id 8 +DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 +DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 token used ratio: 0.6143240605173256 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 token used ratio: 0.9931063933626159 contain prompt cache tree unrefed token +INFO 06-24 20:02:26 [manager.py:224] router recive req id 8 cost time 0.15075016021728516 s +INFO 06-24 20:02:26 [manager.py:68] detokenization recv req id 8 cost time 0.15286540985107422 s +DEBUG 06-24 20:02:26 [manager.py:391] Prefill Batch: batch_id=58698570276155524760032194803622252344, time:1750766546.9490788s req_ids:[8] +DEBUG 06-24 20:02:26 [manager.py:391] +ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:27 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:26 lightllm_req_id:8 first_token_cost:295.5002784729004ms total_cost_time:295.53794860839844ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5152 prompt_cache_len:5151 prompt_cache_ratio:0.999805900621118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:120 first_token_cost:219.25711631774902ms total_cost_time:163406.2693119049ms,out_token_counter:4096 mean_per_token_cost_time: 39.84057914931327ms prompt_token_num:1059 prompt_cache_len:1058 prompt_cache_ratio:0.9990557129367328 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 +WARNING 06-24 20:02:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again +INFO 06-24 20:02:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 +WARNING 06-24 20:02:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again +INFO 06-24 20:02:27 [shm_array.py:30] create shm 12322_0_shm_prompts_3 +INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 +INFO 06-24 20:02:27 [batch.py:51] router release req id 120 +INFO 06-24 20:02:27 [batch.py:51] router release req id 8 +INFO 06-24 20:02:27 [manager.py:224] router recive req id 8 cost time 0.18331217765808105 s +INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 8 cost time 0.18489503860473633 s +INFO 06-24 20:02:27 [manager.py:224] router recive req id 120 cost time 0.25066685676574707 s +DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=55151820991519834530335084348763236391, time:1750766547.2862108s req_ids:[8] +DEBUG 06-24 20:02:27 [manager.py:391] +INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 120 cost time 0.2523775100708008 s +ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=324595234068718905327107771453927268720, time:1750766547.352423s req_ids:[120] +DEBUG 06-24 20:02:27 [manager.py:391] +INFO 06-24 20:02:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 first_token_cost:327.23283767700195ms total_cost_time:327.27575302124023ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5153 prompt_cache_len:5151 prompt_cache_ratio:0.9996118765767514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 +ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:27 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:27 [batch.py:51] router release req id 8 +INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 first_token_cost:410.9804630279541ms total_cost_time:411.0243320465088ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5155 prompt_cache_len:5154 prompt_cache_ratio:0.9998060135790494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 +INFO 06-24 20:02:27 [batch.py:51] router release req id 120 +INFO 06-24 20:02:27 [manager.py:224] router recive req id 8 cost time 0.18894100189208984 s +INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 8 cost time 0.19057464599609375 s +INFO 06-24 20:02:27 [manager.py:224] router recive req id 120 cost time 0.17214608192443848 s +DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=177490213761783962427997777087416007060, time:1750766547.6244812s req_ids:[8] +DEBUG 06-24 20:02:27 [manager.py:391] +INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 120 cost time 0.17384767532348633 s +ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=307868029576368645239940015534616110336, time:1750766547.6910472s req_ids:[120] +DEBUG 06-24 20:02:27 [manager.py:391] +INFO 06-24 20:02:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 first_token_cost:332.98230171203613ms total_cost_time:333.0240249633789ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5154 prompt_cache_len:5151 prompt_cache_ratio:0.9994179278230501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 +ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:27 [batch.py:51] router release req id 8 +INFO 06-24 20:02:27 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 first_token_cost:334.6529006958008ms total_cost_time:334.69367027282715ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5156 prompt_cache_len:5154 prompt_cache_ratio:0.9996121024049651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 +INFO 06-24 20:02:27 [batch.py:51] router release req id 120 +INFO 06-24 20:02:27 [manager.py:224] router recive req id 8 cost time 0.18998241424560547 s +INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 8 cost time 0.1916346549987793 s +INFO 06-24 20:02:27 [manager.py:224] router recive req id 120 cost time 0.17139911651611328 s +DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=148107591610778313726679120311886011716, time:1750766547.96445s req_ids:[8] +DEBUG 06-24 20:02:27 [manager.py:391] +INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 120 cost time 0.17303133010864258 s +ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=191872871025326045705711332854194569796, time:1750766548.0315557s req_ids:[120] +DEBUG 06-24 20:02:28 [manager.py:391] +INFO 06-24 20:02:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 first_token_cost:334.2723846435547ms total_cost_time:334.31410789489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5155 prompt_cache_len:5151 prompt_cache_ratio:0.9992240543161979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 +ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:28 [batch.py:51] router release req id 8 +INFO 06-24 20:02:28 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 first_token_cost:334.1801166534424ms total_cost_time:334.22327041625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5157 prompt_cache_len:5154 prompt_cache_ratio:0.9994182664339732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 +INFO 06-24 20:02:28 [batch.py:51] router release req id 120 +INFO 06-24 20:02:28 [manager.py:224] router recive req id 8 cost time 0.19146418571472168 s +INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 8 cost time 0.19313263893127441 s +INFO 06-24 20:02:28 [manager.py:224] router recive req id 120 cost time 0.1751861572265625 s +DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=19951010786248747427721704446616503021, time:1750766548.3064108s req_ids:[8] +DEBUG 06-24 20:02:28 [manager.py:391] +INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 120 cost time 0.17684268951416016 s +ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=9544498945230240798969547345878200462, time:1750766548.3725064s req_ids:[120] +DEBUG 06-24 20:02:28 [manager.py:391] +INFO 06-24 20:02:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 first_token_cost:335.15071868896484ms total_cost_time:335.1907730102539ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5156 prompt_cache_len:5151 prompt_cache_ratio:0.9990302560124127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 +ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:28 [batch.py:51] router release req id 8 +INFO 06-24 20:02:28 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 first_token_cost:337.04614639282227ms total_cost_time:337.10575103759766ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5158 prompt_cache_len:5154 prompt_cache_ratio:0.9992245056223342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 +INFO 06-24 20:02:28 [batch.py:51] router release req id 120 +INFO 06-24 20:02:28 [manager.py:224] router recive req id 8 cost time 0.18924427032470703 s +INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 8 cost time 0.19082164764404297 s +INFO 06-24 20:02:28 [manager.py:224] router recive req id 120 cost time 0.16993260383605957 s +DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=206301375493748779292062980281842267970, time:1750766548.645843s req_ids:[8] +DEBUG 06-24 20:02:28 [manager.py:391] +INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 120 cost time 0.17154550552368164 s +ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=258367600656908120183520187698952271172, time:1750766548.711979s req_ids:[120] +DEBUG 06-24 20:02:28 [manager.py:391] +INFO 06-24 20:02:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 first_token_cost:333.9109420776367ms total_cost_time:333.9529037475586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5157 prompt_cache_len:5151 prompt_cache_ratio:0.9988365328679465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 +ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:28 [batch.py:51] router release req id 8 +INFO 06-24 20:02:28 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 first_token_cost:330.7986259460449ms total_cost_time:330.8393955230713ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5159 prompt_cache_len:5154 prompt_cache_ratio:0.9990308199263424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 +INFO 06-24 20:02:28 [batch.py:51] router release req id 120 +DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 10505 +DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 token used ratio: 0.3026476329917033 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 token used ratio: 0.9958516349438751 contain prompt cache tree unrefed token +INFO 06-24 20:02:28 [manager.py:224] router recive req id 8 cost time 0.19005727767944336 s +INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 8 cost time 0.1916353702545166 s +INFO 06-24 20:02:28 [manager.py:224] router recive req id 120 cost time 0.17285466194152832 s +DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=130994044296742924544660401690901123193, time:1750766548.985329s req_ids:[8] +DEBUG 06-24 20:02:28 [manager.py:391] +INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 120 cost time 0.174422025680542 s +ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=256343030381444045174742585450945114861, time:1750766549.0520992s req_ids:[120] +DEBUG 06-24 20:02:29 [manager.py:391] +INFO 06-24 20:02:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 first_token_cost:334.6133232116699ms total_cost_time:334.6571922302246ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5158 prompt_cache_len:5151 prompt_cache_ratio:0.9986428848390849 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 +ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:29 [batch.py:51] router release req id 8 +INFO 06-24 20:02:29 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 first_token_cost:566.0884380340576ms total_cost_time:566.131591796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5160 prompt_cache_len:5154 prompt_cache_ratio:0.9988372093023256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 +INFO 06-24 20:02:29 [batch.py:51] router release req id 120 +INFO 06-24 20:02:29 [manager.py:224] router recive req id 8 cost time 0.42057371139526367 s +INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 8 cost time 0.42220330238342285 s +INFO 06-24 20:02:29 [manager.py:224] router recive req id 120 cost time 0.16965055465698242 s +DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=320710000408154836108755850928504862051, time:1750766549.5546157s req_ids:[8] +DEBUG 06-24 20:02:29 [manager.py:391] +INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 120 cost time 0.17153406143188477 s +ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=279463898450996377852457583534159633084, time:1750766549.6245277s req_ids:[120] +DEBUG 06-24 20:02:29 [manager.py:391] +INFO 06-24 20:02:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 first_token_cost:566.643476486206ms total_cost_time:566.6866302490234ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5159 prompt_cache_len:5151 prompt_cache_ratio:0.9984493118821477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 +ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:29 [batch.py:51] router release req id 8 +INFO 06-24 20:02:29 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 first_token_cost:334.5487117767334ms total_cost_time:334.60474014282227ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:5161 prompt_cache_len:5154 prompt_cache_ratio:0.998643673706646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 +INFO 06-24 20:02:29 [batch.py:51] router release req id 120 +INFO 06-24 20:02:29 [manager.py:224] router recive req id 8 cost time 0.19302701950073242 s +INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 8 cost time 0.19452691078186035 s +INFO 06-24 20:02:29 [manager.py:224] router recive req id 120 cost time 0.17338347434997559 s +DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=221046665309317463906105869468329545501, time:1750766549.8992717s req_ids:[8] +DEBUG 06-24 20:02:29 [manager.py:391] +INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 120 cost time 0.17486906051635742 s +ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=43310256160888501956184753382885587604, time:1750766549.9690757s req_ids:[120] +DEBUG 06-24 20:02:29 [manager.py:391] +INFO 06-24 20:02:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 first_token_cost:338.1624221801758ms total_cost_time:338.20295333862305ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:5160 prompt_cache_len:5151 prompt_cache_ratio:0.9982558139534884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 +ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:30 [batch.py:51] router release req id 8 +INFO 06-24 20:02:30 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 first_token_cost:342.17357635498047ms total_cost_time:342.21434593200684ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5162 prompt_cache_len:5154 prompt_cache_ratio:0.9984502130956994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 +INFO 06-24 20:02:30 [batch.py:51] router release req id 120 +INFO 06-24 20:02:30 [manager.py:224] router recive req id 8 cost time 0.19750738143920898 s +INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 8 cost time 0.19896626472473145 s +INFO 06-24 20:02:30 [manager.py:224] router recive req id 120 cost time 0.18012619018554688 s +DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=233065189596150499436936420445503785771, time:1750766550.25376s req_ids:[8] +DEBUG 06-24 20:02:30 [manager.py:391] +INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 120 cost time 0.18140125274658203 s +ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=57186440482347288099027679364484160941, time:1750766550.3238456s req_ids:[120] +DEBUG 06-24 20:02:30 [manager.py:391] +INFO 06-24 20:02:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 first_token_cost:349.43580627441406ms total_cost_time:349.48039054870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5161 prompt_cache_len:5151 prompt_cache_ratio:0.9980623910094942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 +ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:30 [batch.py:51] router release req id 8 +INFO 06-24 20:02:30 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 first_token_cost:349.84445571899414ms total_cost_time:349.8857021331787ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5163 prompt_cache_len:5154 prompt_cache_ratio:0.9982568274259151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 +INFO 06-24 20:02:30 [batch.py:51] router release req id 120 +INFO 06-24 20:02:30 [manager.py:224] router recive req id 8 cost time 0.19885563850402832 s +INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 8 cost time 0.20014476776123047 s +INFO 06-24 20:02:30 [manager.py:224] router recive req id 120 cost time 0.18237876892089844 s +DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=325166414549089171055933772292823555424, time:1750766550.6116781s req_ids:[8] +DEBUG 06-24 20:02:30 [manager.py:391] +INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 120 cost time 0.1838054656982422 s +ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=182687286152867624407180658577576079028, time:1750766550.6806443s req_ids:[120] +DEBUG 06-24 20:02:30 [manager.py:391] +INFO 06-24 20:02:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 first_token_cost:349.63130950927734ms total_cost_time:349.6527671813965ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:5162 prompt_cache_len:5151 prompt_cache_ratio:0.9978690430065866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 +ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:30 [batch.py:51] router release req id 8 +INFO 06-24 20:02:30 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 first_token_cost:351.529598236084ms total_cost_time:351.58514976501465ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:5164 prompt_cache_len:5154 prompt_cache_ratio:0.9980635166537568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 +INFO 06-24 20:02:30 [batch.py:51] router release req id 120 +INFO 06-24 20:02:30 [manager.py:224] router recive req id 8 cost time 0.2012631893157959 s +INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 8 cost time 0.2022998332977295 s +INFO 06-24 20:02:30 [manager.py:224] router recive req id 120 cost time 0.17996978759765625 s +DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=181692654978745119352071937201076676150, time:1750766550.966496s req_ids:[8] +DEBUG 06-24 20:02:30 [manager.py:391] +INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 120 cost time 0.18112778663635254 s +ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=137537163284179342353635003783964228096, time:1750766551.0392952s req_ids:[120] +DEBUG 06-24 20:02:31 [manager.py:391] +INFO 06-24 20:02:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 first_token_cost:353.787899017334ms total_cost_time:353.82986068725586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5163 prompt_cache_len:5151 prompt_cache_ratio:0.9976757699012202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 +ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:31 [batch.py:51] router release req id 8 +INFO 06-24 20:02:31 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 first_token_cost:352.89525985717773ms total_cost_time:352.9384136199951ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5165 prompt_cache_len:5154 prompt_cache_ratio:0.9978702807357211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 +INFO 06-24 20:02:31 [batch.py:51] router release req id 120 +INFO 06-24 20:02:31 [manager.py:224] router recive req id 8 cost time 0.1950840950012207 s +INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 8 cost time 0.19604825973510742 s +DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 5153 +DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 token used ratio: 0.3049658369936554 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 token used ratio: 0.9981698389458272 contain prompt cache tree unrefed token +INFO 06-24 20:02:31 [manager.py:224] router recive req id 120 cost time 0.17209434509277344 s +DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=197006888500978337676448670582637360287, time:1750766551.3175275s req_ids:[8] +DEBUG 06-24 20:02:31 [manager.py:391] +INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 120 cost time 0.1730031967163086 s +ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=11670169896892292318513132832919621299, time:1750766551.3842564s req_ids:[120] +DEBUG 06-24 20:02:31 [manager.py:391] +INFO 06-24 20:02:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 first_token_cost:338.67692947387695ms total_cost_time:338.7181758880615ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5164 prompt_cache_len:5151 prompt_cache_ratio:0.9974825716498839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 +ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:31 [batch.py:51] router release req id 8 +INFO 06-24 20:02:31 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 first_token_cost:334.4714641571045ms total_cost_time:334.51390266418457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5166 prompt_cache_len:5154 prompt_cache_ratio:0.9976771196283392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 +INFO 06-24 20:02:31 [batch.py:51] router release req id 120 +INFO 06-24 20:02:31 [manager.py:224] router recive req id 8 cost time 0.19119596481323242 s +INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 8 cost time 0.19215726852416992 s +INFO 06-24 20:02:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:31 [manager.py:224] router recive req id 120 cost time 0.17340993881225586 s +DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=120668157758041636555107115534557259494, time:1750766551.658939s req_ids:[8] +DEBUG 06-24 20:02:31 [manager.py:391] +INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 120 cost time 0.17439556121826172 s +ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=14423423920378056212830746976957857580, time:1750766551.7268872s req_ids:[120] +DEBUG 06-24 20:02:31 [manager.py:391] +INFO 06-24 20:02:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 first_token_cost:337.3236656188965ms total_cost_time:337.3675346374512ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5165 prompt_cache_len:5151 prompt_cache_ratio:0.9972894482090997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 +ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:31 [batch.py:51] router release req id 8 +INFO 06-24 20:02:31 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 first_token_cost:500.0150203704834ms total_cost_time:500.0574588775635ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5167 prompt_cache_len:5154 prompt_cache_ratio:0.9974840332881749 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 +INFO 06-24 20:02:32 [batch.py:51] router release req id 120 +INFO 06-24 20:02:32 [manager.py:224] router recive req id 8 cost time 0.35269975662231445 s +INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 8 cost time 0.35370349884033203 s +INFO 06-24 20:02:32 [manager.py:224] router recive req id 120 cost time 0.17232894897460938 s +DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=179823136526523305886515440313213155976, time:1750766552.163929s req_ids:[8] +DEBUG 06-24 20:02:32 [manager.py:391] +INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 120 cost time 0.17338347434997559 s +ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=88046096357712232782403390132352545457, time:1750766552.2301753s req_ids:[120] +DEBUG 06-24 20:02:32 [manager.py:391] +INFO 06-24 20:02:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 first_token_cost:497.23196029663086ms total_cost_time:497.27463722229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5166 prompt_cache_len:5151 prompt_cache_ratio:0.997096399535424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 +ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:32 [batch.py:51] router release req id 8 +INFO 06-24 20:02:32 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 first_token_cost:333.2059383392334ms total_cost_time:333.2488536834717ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5168 prompt_cache_len:5154 prompt_cache_ratio:0.9972910216718266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 +INFO 06-24 20:02:32 [batch.py:51] router release req id 120 +INFO 06-24 20:02:32 [manager.py:224] router recive req id 8 cost time 0.18969106674194336 s +INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 8 cost time 0.19072461128234863 s +INFO 06-24 20:02:32 [manager.py:224] router recive req id 120 cost time 0.1734631061553955 s +DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=304560050187529194552626759112820445802, time:1750766552.5041142s req_ids:[8] +DEBUG 06-24 20:02:32 [manager.py:391] +INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 120 cost time 0.17458558082580566 s +ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=70854333583937847052961646541117977776, time:1750766552.5711844s req_ids:[120] +DEBUG 06-24 20:02:32 [manager.py:391] +INFO 06-24 20:02:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 first_token_cost:335.3080749511719ms total_cost_time:335.35122871398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5167 prompt_cache_len:5151 prompt_cache_ratio:0.9969034255854461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 +ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:32 [batch.py:51] router release req id 8 +INFO 06-24 20:02:32 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 first_token_cost:339.2155170440674ms total_cost_time:339.25747871398926ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5169 prompt_cache_len:5154 prompt_cache_ratio:0.9970980847359258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 +INFO 06-24 20:02:32 [batch.py:51] router release req id 120 +INFO 06-24 20:02:32 [manager.py:224] router recive req id 8 cost time 0.19597721099853516 s +INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 8 cost time 0.1969306468963623 s +INFO 06-24 20:02:32 [manager.py:224] router recive req id 120 cost time 0.17893099784851074 s +DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=153692536721252447721302630558412660203, time:1750766552.8546917s req_ids:[8] +DEBUG 06-24 20:02:32 [manager.py:391] +INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 120 cost time 0.18005657196044922 s +ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=325272125246867870141921304363947877800, time:1750766552.9278076s req_ids:[120] +DEBUG 06-24 20:02:32 [manager.py:391] +INFO 06-24 20:02:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 first_token_cost:351.01842880249023ms total_cost_time:351.0599136352539ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5168 prompt_cache_len:5151 prompt_cache_ratio:0.9967105263157895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 +ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:33 [batch.py:51] router release req id 8 +INFO 06-24 20:02:33 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 first_token_cost:350.34632682800293ms total_cost_time:350.4002094268799ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:5170 prompt_cache_len:5154 prompt_cache_ratio:0.9969052224371373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 +INFO 06-24 20:02:33 [batch.py:51] router release req id 120 +INFO 06-24 20:02:33 [manager.py:224] router recive req id 8 cost time 0.1944289207458496 s +INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 8 cost time 0.1954362392425537 s +INFO 06-24 20:02:33 [manager.py:224] router recive req id 120 cost time 0.17339372634887695 s +DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=292150644875981341720552266159506310504, time:1750766553.2059548s req_ids:[8] +DEBUG 06-24 20:02:33 [manager.py:391] +INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 120 cost time 0.17440032958984375 s +ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=30636783448264997668087792726228861062, time:1750766553.2731233s req_ids:[120] +DEBUG 06-24 20:02:33 [manager.py:391] +INFO 06-24 20:02:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 first_token_cost:338.33932876586914ms total_cost_time:338.38415145874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5169 prompt_cache_len:5151 prompt_cache_ratio:0.9965177016831108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 +ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:33 [batch.py:51] router release req id 8 +INFO 06-24 20:02:33 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 first_token_cost:341.40872955322266ms total_cost_time:341.45116806030273ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5171 prompt_cache_len:5154 prompt_cache_ratio:0.9967124347321601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 +INFO 06-24 20:02:33 [batch.py:51] router release req id 120 +INFO 06-24 20:02:33 [manager.py:224] router recive req id 8 cost time 0.19852948188781738 s +INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 8 cost time 0.19960403442382812 s +INFO 06-24 20:02:33 [manager.py:224] router recive req id 120 cost time 0.1758863925933838 s +DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=284266565621990171334867632545315592012, time:1750766553.555611s req_ids:[8] +DEBUG 06-24 20:02:33 [manager.py:391] +DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 5153 +DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 token used ratio: 0.3072230356271352 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 token used ratio: 0.9358833577354807 contain prompt cache tree unrefed token +INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 120 cost time 0.17690825462341309 s +ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=148365955361019506689171581896907311253, time:1750766553.6230311s req_ids:[120] +DEBUG 06-24 20:02:33 [manager.py:391] +INFO 06-24 20:02:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 first_token_cost:344.1734313964844ms total_cost_time:344.21753883361816ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5170 prompt_cache_len:5151 prompt_cache_ratio:0.9963249516441006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 +ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:33 [batch.py:51] router release req id 8 +INFO 06-24 20:02:33 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 first_token_cost:338.58776092529297ms total_cost_time:338.62948417663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5172 prompt_cache_len:5154 prompt_cache_ratio:0.9965197215777262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 +INFO 06-24 20:02:33 [batch.py:51] router release req id 120 +DEBUG 06-24 20:02:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 19104.743 tokens/s +DEBUG 06-24 20:02:33 [stats.py:37] Avg prompt tokens throughput: 19066.909 tokens/s +DEBUG 06-24 20:02:33 [stats.py:37] Avg generate tokens throughput: 37.834 tokens/s +INFO 06-24 20:02:33 [manager.py:224] router recive req id 8 cost time 0.19037938117980957 s +INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 8 cost time 0.19129514694213867 s +INFO 06-24 20:02:33 [manager.py:224] router recive req id 120 cost time 0.17192554473876953 s +DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=106275082060199716910361946551511747546, time:1750766553.8956792s req_ids:[8] +DEBUG 06-24 20:02:33 [manager.py:391] +INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 120 cost time 0.1729111671447754 s +ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=116522267955794727148873301070590193805, time:1750766553.9633794s req_ids:[120] +DEBUG 06-24 20:02:33 [manager.py:391] +INFO 06-24 20:02:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 first_token_cost:334.67769622802734ms total_cost_time:334.72204208374023ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5171 prompt_cache_len:5151 prompt_cache_ratio:0.9961322761554825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 +ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:34 [batch.py:51] router release req id 8 +INFO 06-24 20:02:34 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 first_token_cost:335.0250720977783ms total_cost_time:335.0677490234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5173 prompt_cache_len:5154 prompt_cache_ratio:0.9963270829306012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 +INFO 06-24 20:02:34 [batch.py:51] router release req id 120 +INFO 06-24 20:02:34 [manager.py:224] router recive req id 8 cost time 0.190169095993042 s +INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 8 cost time 0.19116473197937012 s +INFO 06-24 20:02:34 [manager.py:224] router recive req id 120 cost time 0.17226386070251465 s +DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=216180060899162795446057214724923543950, time:1750766554.2366824s req_ids:[8] +DEBUG 06-24 20:02:34 [manager.py:391] +INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 120 cost time 0.17324280738830566 s +ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=133335284426750078538303677658081183383, time:1750766554.3066978s req_ids:[120] +DEBUG 06-24 20:02:34 [manager.py:391] +INFO 06-24 20:02:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 first_token_cost:337.141752243042ms total_cost_time:337.18180656433105ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5172 prompt_cache_len:5151 prompt_cache_ratio:0.9959396751740139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 +ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:34 [batch.py:51] router release req id 8 +INFO 06-24 20:02:34 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 first_token_cost:499.93205070495605ms total_cost_time:499.97663497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5174 prompt_cache_len:5154 prompt_cache_ratio:0.9961345187475841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 +INFO 06-24 20:02:34 [batch.py:51] router release req id 120 +INFO 06-24 20:02:34 [manager.py:224] router recive req id 8 cost time 0.3507866859436035 s +INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 8 cost time 0.35181355476379395 s +INFO 06-24 20:02:34 [manager.py:224] router recive req id 120 cost time 0.16745638847351074 s +DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=328487710547830764985725990226098219099, time:1750766554.7383745s req_ids:[8] +DEBUG 06-24 20:02:34 [manager.py:391] +INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 120 cost time 0.16847538948059082 s +ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=171092730690731262668778781094899037403, time:1750766554.8028111s req_ids:[120] +DEBUG 06-24 20:02:34 [manager.py:391] +INFO 06-24 20:02:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 first_token_cost:490.24319648742676ms total_cost_time:490.28682708740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5173 prompt_cache_len:5151 prompt_cache_ratio:0.9957471486564856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 +ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:34 [batch.py:51] router release req id 8 +INFO 06-24 20:02:34 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 first_token_cost:324.27501678466797ms total_cost_time:324.31697845458984ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5175 prompt_cache_len:5154 prompt_cache_ratio:0.9959420289855072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 +INFO 06-24 20:02:34 [batch.py:51] router release req id 120 +INFO 06-24 20:02:34 [manager.py:224] router recive req id 8 cost time 0.18387365341186523 s +INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 8 cost time 0.18485713005065918 s +INFO 06-24 20:02:35 [manager.py:224] router recive req id 120 cost time 0.16688251495361328 s +DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=308381661668696947110093909189302682866, time:1750766555.0676937s req_ids:[8] +DEBUG 06-24 20:02:35 [manager.py:391] +INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 120 cost time 0.16782617568969727 s +ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:35 [manager.py:162] detoken release req id 8 +DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=269123448390170420007340370954879016595, time:1750766555.1320918s req_ids:[120] +DEBUG 06-24 20:02:35 [manager.py:391] +INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 first_token_cost:322.0694065093994ms total_cost_time:322.1457004547119ms,out_token_counter:1 mean_per_token_cost_time: 0.0762939453125ms prompt_token_num:5174 prompt_cache_len:5151 prompt_cache_ratio:0.9955546965597217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 +ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:35 [batch.py:51] router release req id 8 +INFO 06-24 20:02:35 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 first_token_cost:329.40220832824707ms total_cost_time:329.44512367248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5176 prompt_cache_len:5154 prompt_cache_ratio:0.9957496136012365 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 +INFO 06-24 20:02:35 [batch.py:51] router release req id 120 +INFO 06-24 20:02:35 [manager.py:224] router recive req id 8 cost time 0.1957552433013916 s +INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 8 cost time 0.1977832317352295 s +INFO 06-24 20:02:35 [manager.py:224] router recive req id 120 cost time 0.17625045776367188 s +DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=94067862915937339081023395007342340164, time:1750766555.4121013s req_ids:[8] +DEBUG 06-24 20:02:35 [manager.py:391] +INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 120 cost time 0.1781754493713379 s +ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=87206748564518505841282697682956678676, time:1750766555.48042s req_ids:[120] +DEBUG 06-24 20:02:35 [manager.py:391] +INFO 06-24 20:02:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 first_token_cost:343.7843322753906ms total_cost_time:343.8246250152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5175 prompt_cache_len:5151 prompt_cache_ratio:0.9953623188405797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 +ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:35 [batch.py:51] router release req id 8 +INFO 06-24 20:02:35 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 first_token_cost:340.26598930358887ms total_cost_time:340.30938148498535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5177 prompt_cache_len:5154 prompt_cache_ratio:0.9955572725516708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 +INFO 06-24 20:02:35 [batch.py:51] router release req id 120 +INFO 06-24 20:02:35 [manager.py:224] router recive req id 8 cost time 0.19130301475524902 s +INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 8 cost time 0.19298934936523438 s +INFO 06-24 20:02:35 [manager.py:224] router recive req id 120 cost time 0.1761031150817871 s +DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=147672100185494361982363591948111087835, time:1750766555.7579165s req_ids:[8] +DEBUG 06-24 20:02:35 [manager.py:391] +INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 120 cost time 0.17804574966430664 s +ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=143047659011745498613406751312335334599, time:1750766555.8263934s req_ids:[120] +DEBUG 06-24 20:02:35 [manager.py:391] +DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 10409 +DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 token used ratio: 0.623718887262079 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 token used ratio: 0.9381405563689604 contain prompt cache tree unrefed token +INFO 06-24 20:02:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 first_token_cost:340.7261371612549ms total_cost_time:340.76833724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5176 prompt_cache_len:5151 prompt_cache_ratio:0.9951700154559505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 +ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:35 [batch.py:51] router release req id 8 +INFO 06-24 20:02:35 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 first_token_cost:344.4535732269287ms total_cost_time:344.4943428039551ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5178 prompt_cache_len:5154 prompt_cache_ratio:0.9953650057937428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 +INFO 06-24 20:02:35 [batch.py:51] router release req id 120 +INFO 06-24 20:02:36 [manager.py:224] router recive req id 8 cost time 0.19268131256103516 s +INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 8 cost time 0.19474244117736816 s +INFO 06-24 20:02:36 [manager.py:224] router recive req id 120 cost time 0.1732466220855713 s +DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=319358495761877469808091573891295074292, time:1750766556.1048965s req_ids:[8] +DEBUG 06-24 20:02:36 [manager.py:391] +INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 120 cost time 0.1749589443206787 s +ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=88442397095455837595288464995755154619, time:1750766556.173116s req_ids:[120] +DEBUG 06-24 20:02:36 [manager.py:391] +INFO 06-24 20:02:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 first_token_cost:340.27099609375ms total_cost_time:340.31200408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5177 prompt_cache_len:5151 prompt_cache_ratio:0.9949777863627584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 +ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:36 [batch.py:51] router release req id 8 +INFO 06-24 20:02:36 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 first_token_cost:337.16678619384766ms total_cost_time:337.2082710266113ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5179 prompt_cache_len:5154 prompt_cache_ratio:0.9951728132844179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 +INFO 06-24 20:02:36 [batch.py:51] router release req id 120 +INFO 06-24 20:02:36 [manager.py:224] router recive req id 8 cost time 0.18986248970031738 s +INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 8 cost time 0.19164085388183594 s +INFO 06-24 20:02:36 [manager.py:224] router recive req id 120 cost time 0.17455458641052246 s +DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=105715028260458016211972132122476947014, time:1750766556.4490292s req_ids:[8] +DEBUG 06-24 20:02:36 [manager.py:391] +INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 120 cost time 0.17646193504333496 s +ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=42024943894738311677341859538473944952, time:1750766556.5168693s req_ids:[120] +DEBUG 06-24 20:02:36 [manager.py:391] +INFO 06-24 20:02:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 first_token_cost:338.37127685546875ms total_cost_time:338.4137153625488ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5178 prompt_cache_len:5151 prompt_cache_ratio:0.9947856315179606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 +ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:36 [batch.py:51] router release req id 8 +INFO 06-24 20:02:36 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 first_token_cost:338.2561206817627ms total_cost_time:338.31024169921875ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5180 prompt_cache_len:5154 prompt_cache_ratio:0.994980694980695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 +INFO 06-24 20:02:36 [batch.py:51] router release req id 120 +INFO 06-24 20:02:36 [manager.py:224] router recive req id 8 cost time 0.18833446502685547 s +INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 8 cost time 0.1901721954345703 s +INFO 06-24 20:02:36 [manager.py:224] router recive req id 120 cost time 0.17294073104858398 s +DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=74474623728033823189024821135637606482, time:1750766556.7925127s req_ids:[8] +DEBUG 06-24 20:02:36 [manager.py:391] +INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 120 cost time 0.1747579574584961 s +ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=238536329505933910770105629899399880214, time:1750766556.8604538s req_ids:[120] +DEBUG 06-24 20:02:36 [manager.py:391] +INFO 06-24 20:02:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 first_token_cost:336.4260196685791ms total_cost_time:336.47966384887695ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5179 prompt_cache_len:5151 prompt_cache_ratio:0.994593550878548 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 +ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:37 [batch.py:51] router release req id 8 +INFO 06-24 20:02:37 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 first_token_cost:495.8524703979492ms total_cost_time:495.8951473236084ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5181 prompt_cache_len:5154 prompt_cache_ratio:0.9947886508396062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 +INFO 06-24 20:02:37 [batch.py:51] router release req id 120 +INFO 06-24 20:02:37 [manager.py:224] router recive req id 8 cost time 0.3491194248199463 s +INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 8 cost time 0.35091090202331543 s +INFO 06-24 20:02:37 [manager.py:224] router recive req id 120 cost time 0.17320775985717773 s +DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=189323941677795171105667660791104535233, time:1750766557.2944403s req_ids:[8] +DEBUG 06-24 20:02:37 [manager.py:391] +INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 120 cost time 0.17506647109985352 s +ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=66057912821395346582374237591150179711, time:1750766557.362545s req_ids:[120] +DEBUG 06-24 20:02:37 [manager.py:391] +INFO 06-24 20:02:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 first_token_cost:495.4962730407715ms total_cost_time:495.5401420593262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5180 prompt_cache_len:5151 prompt_cache_ratio:0.9944015444015444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 +ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:37 [batch.py:51] router release req id 8 +INFO 06-24 20:02:37 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 first_token_cost:336.8966579437256ms total_cost_time:336.93814277648926ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5182 prompt_cache_len:5154 prompt_cache_ratio:0.994596680818217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 +INFO 06-24 20:02:37 [batch.py:51] router release req id 120 +INFO 06-24 20:02:37 [manager.py:224] router recive req id 8 cost time 0.1900639533996582 s +INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 8 cost time 0.19208860397338867 s +INFO 06-24 20:02:37 [manager.py:224] router recive req id 120 cost time 0.17496514320373535 s +DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=50082484109554152682919667096071512388, time:1750766557.638698s req_ids:[8] +DEBUG 06-24 20:02:37 [manager.py:391] +INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 120 cost time 0.17653203010559082 s +ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=322850610058007483413692199976770092967, time:1750766557.7064953s req_ids:[120] +DEBUG 06-24 20:02:37 [manager.py:391] +INFO 06-24 20:02:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 first_token_cost:338.0165100097656ms total_cost_time:338.0591869354248ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5181 prompt_cache_len:5151 prompt_cache_ratio:0.9942096120440069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 +ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:37 [batch.py:51] router release req id 8 +INFO 06-24 20:02:37 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 first_token_cost:338.3982181549072ms total_cost_time:338.442325592041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5183 prompt_cache_len:5154 prompt_cache_ratio:0.9944047848736253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 +INFO 06-24 20:02:37 [batch.py:51] router release req id 120 +INFO 06-24 20:02:37 [manager.py:224] router recive req id 8 cost time 0.1906728744506836 s +INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 8 cost time 0.19226837158203125 s +INFO 06-24 20:02:37 [manager.py:224] router recive req id 120 cost time 0.1736290454864502 s +DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=310037248984337995353741690915580331398, time:1750766557.9810693s req_ids:[8] +DEBUG 06-24 20:02:37 [manager.py:391] +INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 120 cost time 0.1753554344177246 s +ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=89094203310283461523971066376083520727, time:1750766558.0492156s req_ids:[120] +DEBUG 06-24 20:02:38 [manager.py:391] +INFO 06-24 20:02:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 first_token_cost:337.5270366668701ms total_cost_time:337.5692367553711ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5182 prompt_cache_len:5151 prompt_cache_ratio:0.9940177537630258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 +ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:38 [batch.py:51] router release req id 8 +INFO 06-24 20:02:38 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 first_token_cost:338.4726047515869ms total_cost_time:338.5307788848877ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5184 prompt_cache_len:5154 prompt_cache_ratio:0.9942129629629629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 +INFO 06-24 20:02:38 [batch.py:51] router release req id 120 +DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 10380 +DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 token used ratio: 0.3117984382625671 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 token used ratio: 0.9404587603709127 contain prompt cache tree unrefed token +INFO 06-24 20:02:38 [manager.py:224] router recive req id 8 cost time 0.1923377513885498 s +INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 8 cost time 0.19388699531555176 s +INFO 06-24 20:02:38 [manager.py:224] router recive req id 120 cost time 0.17340946197509766 s +DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=238723149873922316462447295905857588050, time:1750766558.3256867s req_ids:[8] +DEBUG 06-24 20:02:38 [manager.py:391] +INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 120 cost time 0.17495369911193848 s +ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=182779175972599225594068117589919037104, time:1750766558.3929396s req_ids:[120] +DEBUG 06-24 20:02:38 [manager.py:391] +INFO 06-24 20:02:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 first_token_cost:338.0553722381592ms total_cost_time:338.09757232666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5183 prompt_cache_len:5151 prompt_cache_ratio:0.9938259695157244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 +ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:38 [batch.py:51] router release req id 8 +INFO 06-24 20:02:38 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 first_token_cost:335.74557304382324ms total_cost_time:335.7884883880615ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5185 prompt_cache_len:5154 prompt_cache_ratio:0.9940212150433944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 +INFO 06-24 20:02:38 [batch.py:51] router release req id 120 +INFO 06-24 20:02:38 [manager.py:224] router recive req id 8 cost time 0.18851876258850098 s +INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 8 cost time 0.19037199020385742 s +INFO 06-24 20:02:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:38 [manager.py:224] router recive req id 120 cost time 0.17420148849487305 s +DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=292276559148319682824863746895715753657, time:1750766558.6680949s req_ids:[8] +DEBUG 06-24 20:02:38 [manager.py:391] +INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 120 cost time 0.1759662628173828 s +ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=277429260547364294425535208052553364166, time:1750766558.7361224s req_ids:[120] +DEBUG 06-24 20:02:38 [manager.py:391] +INFO 06-24 20:02:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 first_token_cost:336.2894058227539ms total_cost_time:336.3327980041504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5184 prompt_cache_len:5151 prompt_cache_ratio:0.9936342592592593 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 +ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:38 [batch.py:51] router release req id 8 +INFO 06-24 20:02:38 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 first_token_cost:340.6651020050049ms total_cost_time:340.70611000061035ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5186 prompt_cache_len:5154 prompt_cache_ratio:0.9938295410721172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 +INFO 06-24 20:02:38 [batch.py:51] router release req id 120 +INFO 06-24 20:02:38 [manager.py:224] router recive req id 8 cost time 0.19263410568237305 s +INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 8 cost time 0.19434475898742676 s +INFO 06-24 20:02:39 [manager.py:224] router recive req id 120 cost time 0.17420172691345215 s +DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=118327325064722403699989140212815933934, time:1750766559.0144398s req_ids:[8] +DEBUG 06-24 20:02:39 [manager.py:391] +INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 120 cost time 0.1767127513885498 s +ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=62729845736357706451825428816338906382, time:1750766559.0825098s req_ids:[120] +DEBUG 06-24 20:02:39 [manager.py:391] +INFO 06-24 20:02:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 first_token_cost:339.8125171661377ms total_cost_time:339.84994888305664ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5185 prompt_cache_len:5151 prompt_cache_ratio:0.9934426229508196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 +ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:39 [batch.py:51] router release req id 8 +INFO 06-24 20:02:39 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 first_token_cost:337.8791809082031ms total_cost_time:337.9225730895996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5187 prompt_cache_len:5154 prompt_cache_ratio:0.993637941006362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 +INFO 06-24 20:02:39 [batch.py:51] router release req id 120 +INFO 06-24 20:02:39 [manager.py:224] router recive req id 8 cost time 0.19096636772155762 s +INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 8 cost time 0.1929643154144287 s +INFO 06-24 20:02:39 [manager.py:224] router recive req id 120 cost time 0.17554926872253418 s +DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=64437539872323005501070832531747967521, time:1750766559.3594937s req_ids:[8] +DEBUG 06-24 20:02:39 [manager.py:391] +INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 120 cost time 0.1775045394897461 s +ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=195780276065028672541593939212991762257, time:1750766559.4271584s req_ids:[120] +DEBUG 06-24 20:02:39 [manager.py:391] +INFO 06-24 20:02:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 first_token_cost:339.89930152893066ms total_cost_time:339.94197845458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5186 prompt_cache_len:5151 prompt_cache_ratio:0.9932510605476282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 +ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:39 [batch.py:51] router release req id 8 +INFO 06-24 20:02:39 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 first_token_cost:501.38211250305176ms total_cost_time:501.42478942871094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5188 prompt_cache_len:5154 prompt_cache_ratio:0.9934464148033925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 +INFO 06-24 20:02:39 [batch.py:51] router release req id 120 +INFO 06-24 20:02:39 [manager.py:224] router recive req id 8 cost time 0.3508286476135254 s +INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 8 cost time 0.35257458686828613 s +INFO 06-24 20:02:39 [manager.py:224] router recive req id 120 cost time 0.17219257354736328 s +DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=62827745303995127719037238041028755552, time:1750766559.8635664s req_ids:[8] +DEBUG 06-24 20:02:39 [manager.py:391] +INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 120 cost time 0.17403888702392578 s +ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=276211917171070532462303628244523093220, time:1750766559.931689s req_ids:[120] +DEBUG 06-24 20:02:39 [manager.py:391] +INFO 06-24 20:02:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 first_token_cost:497.9369640350342ms total_cost_time:497.97916412353516ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5187 prompt_cache_len:5151 prompt_cache_ratio:0.9930595720069404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [batch.py:51] router release req id 8 +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 first_token_cost:336.1246585845947ms total_cost_time:336.167573928833ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5189 prompt_cache_len:5154 prompt_cache_ratio:0.9932549624205049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 +INFO 06-24 20:02:40 [batch.py:51] router release req id 120 +INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.18917346000671387 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.19168543815612793 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.16874957084655762 s +DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=24829423517738258191934446368084965230, time:1750766560.2027774s req_ids:[8] +DEBUG 06-24 20:02:40 [manager.py:391] +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.17043805122375488 s +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=47183152602107509258019726468215310231, time:1750766560.267163s req_ids:[120] +DEBUG 06-24 20:02:40 [manager.py:391] +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 first_token_cost:329.64324951171875ms total_cost_time:329.6961784362793ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:5188 prompt_cache_len:5151 prompt_cache_ratio:0.9928681572860447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:49 lightllm_req_id:400 first_token_cost:213.96732330322266ms total_cost_time:170911.7624759674ms,out_token_counter:4096 mean_per_token_cost_time: 41.674266394693404ms prompt_token_num:1050 prompt_cache_len:1049 prompt_cache_ratio:0.9990476190476191 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [batch.py:51] router release req id 400 +INFO 06-24 20:02:40 [batch.py:51] router release req id 8 +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:296.2765693664551ms total_cost_time:296.31829261779785ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5190 prompt_cache_len:5154 prompt_cache_ratio:0.9930635838150289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 +DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:40 [batch.py:51] router release req id 120 +INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.07571887969970703 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.0699927806854248 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.021096467971801758 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.07718205451965332 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.07282781600952148 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.02525782585144043 s +DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=294615103522950980572974644756615694644, time:1750766560.3752382s req_ids:[8, 400, 120] +DEBUG 06-24 20:02:40 [manager.py:391] +DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:127.0895004272461ms total_cost_time:127.13336944580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5189 prompt_cache_len:5151 prompt_cache_ratio:0.9926768163422625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:120.69869041442871ms total_cost_time:120.73659896850586ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5191 prompt_cache_len:5154 prompt_cache_ratio:0.9928722789443267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:170.52340507507324ms total_cost_time:170.5484390258789ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5146 prompt_cache_len:5145 prompt_cache_ratio:0.9998056743101438 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 +DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:40 [batch.py:51] router release req id 8 +INFO 06-24 20:02:40 [batch.py:51] router release req id 400 +INFO 06-24 20:02:40 [batch.py:51] router release req id 120 +INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.12004637718200684 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.06810331344604492 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.06298112869262695 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.12163305282592773 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.07121467590332031 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.06729984283447266 s +DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=230329129274426114032723946096066484947, time:1750766560.5529442s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:40 [manager.py:391] +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:199.72586631774902ms total_cost_time:199.7685432434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5190 prompt_cache_len:5151 prompt_cache_ratio:0.992485549132948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:147.9017734527588ms total_cost_time:147.92823791503906ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5192 prompt_cache_len:5154 prompt_cache_ratio:0.9926810477657936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:163.41471672058105ms total_cost_time:163.4519100189209ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5147 prompt_cache_len:5145 prompt_cache_ratio:0.9996114241305615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 +DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:40 [batch.py:51] router release req id 8 +INFO 06-24 20:02:40 [batch.py:51] router release req id 120 +INFO 06-24 20:02:40 [batch.py:51] router release req id 400 +INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.08940482139587402 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.08448123931884766 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.07314324378967285 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.09107470512390137 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.08757710456848145 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.07760453224182129 s +DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=54783462695815102979971716264587004769, time:1750766560.7301013s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:40 [manager.py:391] +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:145.1871395111084ms total_cost_time:145.2312469482422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5191 prompt_cache_len:5151 prompt_cache_ratio:0.9922943556154884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:183.8662624359131ms total_cost_time:183.90297889709473ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5193 prompt_cache_len:5154 prompt_cache_ratio:0.9924898902368573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:172.61433601379395ms total_cost_time:172.64080047607422ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5148 prompt_cache_len:5145 prompt_cache_ratio:0.9994172494172494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 +DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:40 [batch.py:51] router release req id 8 +INFO 06-24 20:02:40 [batch.py:51] router release req id 120 +INFO 06-24 20:02:40 [batch.py:51] router release req id 400 +INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.1146082878112793 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.06705021858215332 s +INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.061409950256347656 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.11623930931091309 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.07025885581970215 s +INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.0658421516418457 s +DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=212707212336415738926563630506934879922, time:1750766560.9067447s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:40 [manager.py:391] +ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:198.54259490966797ms total_cost_time:198.59981536865234ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5192 prompt_cache_len:5151 prompt_cache_ratio:0.9921032357473035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:334.0299129486084ms total_cost_time:334.0728282928467ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5194 prompt_cache_len:5154 prompt_cache_ratio:0.9922988063149788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:328.48310470581055ms total_cost_time:328.5074234008789ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:5149 prompt_cache_len:5145 prompt_cache_ratio:0.9992231501262381 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 +DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:41 [batch.py:51] router release req id 8 +INFO 06-24 20:02:41 [batch.py:51] router release req id 120 +INFO 06-24 20:02:41 [batch.py:51] router release req id 400 +INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.25322508811950684 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06767559051513672 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.06233024597167969 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.25475335121154785 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.07062792778015137 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.06661105155944824 s +DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=177120281547588888327394775277671623407, time:1750766561.2497702s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:41 [manager.py:391] +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:349.0462303161621ms total_cost_time:349.0874767303467ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5193 prompt_cache_len:5151 prompt_cache_ratio:0.9919121894858464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:163.50150108337402ms total_cost_time:163.5277271270752ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5195 prompt_cache_len:5154 prompt_cache_ratio:0.9921077959576516 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:163.64121437072754ms total_cost_time:163.67554664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.034332275390625ms prompt_token_num:5150 prompt_cache_len:5145 prompt_cache_ratio:0.9990291262135922 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 +DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:41 [batch.py:51] router release req id 8 +INFO 06-24 20:02:41 [batch.py:51] router release req id 120 +INFO 06-24 20:02:41 [batch.py:51] router release req id 400 +INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.07371664047241211 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06903576850891113 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.05933523178100586 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.07530403137207031 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.07195568084716797 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.06350278854370117 s +DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=60040809617159010657647324259870243873, time:1750766561.4285986s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:41 [manager.py:391] +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:146.27623558044434ms total_cost_time:146.31986618041992ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5194 prompt_cache_len:5151 prompt_cache_ratio:0.9917212167886023 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:169.52157020568848ms total_cost_time:169.56043243408203ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5196 prompt_cache_len:5154 prompt_cache_ratio:0.9919168591224018 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:159.9252223968506ms total_cost_time:159.95025634765625ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5151 prompt_cache_len:5145 prompt_cache_ratio:0.9988351776354106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 +DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:41 [batch.py:51] router release req id 8 +INFO 06-24 20:02:41 [batch.py:51] router release req id 120 +INFO 06-24 20:02:41 [batch.py:51] router release req id 400 +INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.09943914413452148 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06870055198669434 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.06301021575927734 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.10176420211791992 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.07337760925292969 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.0698850154876709 s +DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=137366473691382659998425482788712593774, time:1750766561.606176s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:41 [manager.py:391] +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:199.38158988952637ms total_cost_time:199.42402839660645ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5195 prompt_cache_len:5151 prompt_cache_ratio:0.9915303176130895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:168.6270236968994ms total_cost_time:168.6534881591797ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5197 prompt_cache_len:5154 prompt_cache_ratio:0.9917259957667885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:165.20404815673828ms total_cost_time:165.24767875671387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5152 prompt_cache_len:5145 prompt_cache_ratio:0.998641304347826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 +DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:41 [batch.py:51] router release req id 8 +INFO 06-24 20:02:41 [batch.py:51] router release req id 120 +INFO 06-24 20:02:41 [batch.py:51] router release req id 400 +INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.0691370964050293 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06269288063049316 s +INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.05591845512390137 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.07076549530029297 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.0657815933227539 s +INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.06028246879577637 s +DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=121350927446857115063884673502289609276, time:1750766561.7840889s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:41 [manager.py:391] +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:144.8225975036621ms total_cost_time:144.92011070251465ms,out_token_counter:1 mean_per_token_cost_time: 0.09751319885253906ms prompt_token_num:5196 prompt_cache_len:5151 prompt_cache_ratio:0.9913394919168591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:138.44585418701172ms total_cost_time:138.4735107421875ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5198 prompt_cache_len:5154 prompt_cache_ratio:0.9915352058484033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 +ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:155.85637092590332ms total_cost_time:155.89427947998047ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5153 prompt_cache_len:5145 prompt_cache_ratio:0.9984475063070056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 +DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:42 [batch.py:51] router release req id 8 +INFO 06-24 20:02:42 [batch.py:51] router release req id 120 +INFO 06-24 20:02:42 [batch.py:51] router release req id 400 +INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.25313591957092285 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.24799203872680664 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.23288559913635254 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.25470542907714844 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.25093746185302734 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.23709869384765625 s +DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=35260259955341358144012866797719747089, time:1750766562.122755s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:42 [manager.py:391] +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:347.13053703308105ms total_cost_time:347.17535972595215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5197 prompt_cache_len:5151 prompt_cache_ratio:0.9911487396574947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:341.9039249420166ms total_cost_time:341.9318199157715ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5199 prompt_cache_len:5154 prompt_cache_ratio:0.9913444893248702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:346.099853515625ms total_cost_time:346.14062309265137ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5154 prompt_cache_len:5145 prompt_cache_ratio:0.9982537834691502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 +DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:42 [batch.py:51] router release req id 8 +INFO 06-24 20:02:42 [batch.py:51] router release req id 120 +INFO 06-24 20:02:42 [batch.py:51] router release req id 400 +INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.07563138008117676 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.07058191299438477 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.059890031814575195 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.07723164558410645 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.0736396312713623 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.06416821479797363 s +DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=293881141402435172941675046230236215183, time:1750766562.3001063s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:42 [manager.py:391] +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:148.16975593566895ms total_cost_time:148.21267127990723ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5198 prompt_cache_len:5151 prompt_cache_ratio:0.9909580607926125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:170.34912109375ms total_cost_time:170.38989067077637ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5200 prompt_cache_len:5154 prompt_cache_ratio:0.9911538461538462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:159.7440242767334ms total_cost_time:159.76905822753906ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5155 prompt_cache_len:5145 prompt_cache_ratio:0.9980601357904947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 +DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:42 [batch.py:51] router release req id 8 +INFO 06-24 20:02:42 [batch.py:51] router release req id 120 +INFO 06-24 20:02:42 [batch.py:51] router release req id 400 +INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.09848308563232422 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.06808829307556152 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.06273770332336426 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.10004639625549316 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.07109308242797852 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.0669863224029541 s +DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=208206045579062200952449328727469213305, time:1750766562.4776666s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:42 [manager.py:391] +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:149.97053146362305ms total_cost_time:150.01511573791504ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5199 prompt_cache_len:5151 prompt_cache_ratio:0.9907674552798615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:169.48652267456055ms total_cost_time:169.525146484375ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5201 prompt_cache_len:5154 prompt_cache_ratio:0.9909632762930206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:164.20340538024902ms total_cost_time:164.229154586792ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5156 prompt_cache_len:5145 prompt_cache_ratio:0.997866563227308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 +DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:42 [batch.py:51] router release req id 8 +INFO 06-24 20:02:42 [batch.py:51] router release req id 120 +INFO 06-24 20:02:42 [batch.py:51] router release req id 400 +INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.12104439735412598 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.06738829612731934 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.06241774559020996 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.12282443046569824 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.07056784629821777 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.06677842140197754 s +DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=128806875567830028876767950250780831397, time:1750766562.6545167s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:42 [manager.py:391] +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:199.33533668518066ms total_cost_time:199.37610626220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5200 prompt_cache_len:5151 prompt_cache_ratio:0.990576923076923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:145.63512802124023ms total_cost_time:145.6613540649414ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5202 prompt_cache_len:5154 prompt_cache_ratio:0.9907727797001153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:161.62467002868652ms total_cost_time:161.66234016418457ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5157 prompt_cache_len:5145 prompt_cache_ratio:0.997673065735893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 +DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:42 [batch.py:51] router release req id 8 +INFO 06-24 20:02:42 [batch.py:51] router release req id 120 +INFO 06-24 20:02:42 [batch.py:51] router release req id 400 +INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.08949828147888184 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.08461380004882812 s +INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.07227039337158203 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.09049105644226074 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.08624386787414551 s +INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.07428646087646484 s +DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=228776173924554834851270083177592946646, time:1750766562.8317842s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:42 [manager.py:391] +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:145.65062522888184ms total_cost_time:145.7064151763916ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:5201 prompt_cache_len:5151 prompt_cache_ratio:0.9903864641415112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 +ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:191.08128547668457ms total_cost_time:191.12396240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5203 prompt_cache_len:5154 prompt_cache_ratio:0.9905823563328848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:331.4690589904785ms total_cost_time:331.5122127532959ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5158 prompt_cache_len:5145 prompt_cache_ratio:0.9974796432725863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 +DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:43 [batch.py:51] router release req id 8 +INFO 06-24 20:02:43 [batch.py:51] router release req id 120 +INFO 06-24 20:02:43 [batch.py:51] router release req id 400 +INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.2753880023956299 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.22573065757751465 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.07296562194824219 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.27635765075683594 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.22722578048706055 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.07490873336791992 s +DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=148608290994619930076366228567200426137, time:1750766563.1680388s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:43 [manager.py:391] +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:347.6216793060303ms total_cost_time:347.66626358032227ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5202 prompt_cache_len:5151 prompt_cache_ratio:0.9901960784313726 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:325.7572650909424ms total_cost_time:325.7937431335449ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5204 prompt_cache_len:5154 prompt_cache_ratio:0.990392006149116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:173.065185546875ms total_cost_time:173.08950424194336ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:5159 prompt_cache_len:5145 prompt_cache_ratio:0.9972862957937585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 +DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:43 [batch.py:51] router release req id 8 +INFO 06-24 20:02:43 [batch.py:51] router release req id 120 +INFO 06-24 20:02:43 [batch.py:51] router release req id 400 +INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.09999465942382812 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.06880879402160645 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.10091376304626465 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.06396603584289551 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.07018303871154785 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.06577944755554199 s +DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=317668582527231393955332500802508033814, time:1750766563.3453636s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:43 [manager.py:391] +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:148.03028106689453ms total_cost_time:148.0727195739746ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5203 prompt_cache_len:5151 prompt_cache_ratio:0.990005765904286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:167.3107147216797ms total_cost_time:167.35053062438965ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5205 prompt_cache_len:5154 prompt_cache_ratio:0.9902017291066283 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:164.6416187286377ms total_cost_time:164.69359397888184ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:5160 prompt_cache_len:5145 prompt_cache_ratio:0.997093023255814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 +DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:43 [batch.py:51] router release req id 8 +INFO 06-24 20:02:43 [batch.py:51] router release req id 120 +INFO 06-24 20:02:43 [batch.py:51] router release req id 400 +INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.12473940849304199 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.0742034912109375 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.1256732940673828 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.0655219554901123 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.07572269439697266 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.06733393669128418 s +DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=53283205650233727839010787215544686011, time:1750766563.524954s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:43 [manager.py:391] +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:197.34644889831543ms total_cost_time:197.3886489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5204 prompt_cache_len:5151 prompt_cache_ratio:0.989815526518063 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:175.4145622253418ms total_cost_time:175.45175552368164ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5206 prompt_cache_len:5154 prompt_cache_ratio:0.9900115251632732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:166.67866706848145ms total_cost_time:166.7032241821289ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:5161 prompt_cache_len:5145 prompt_cache_ratio:0.9968998256151909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 +DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:43 [batch.py:51] router release req id 8 +INFO 06-24 20:02:43 [batch.py:51] router release req id 120 +INFO 06-24 20:02:43 [batch.py:51] router release req id 400 +INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.10004329681396484 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.06942963600158691 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.06429123878479004 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.1009972095489502 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.07090091705322266 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.06582856178283691 s +DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=315500482468433114172399458886555902873, time:1750766563.7022219s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:43 [manager.py:391] +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:197.69787788391113ms total_cost_time:197.7403163909912ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5205 prompt_cache_len:5151 prompt_cache_ratio:0.9896253602305476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:167.12594032287598ms total_cost_time:167.15407371520996ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:5207 prompt_cache_len:5154 prompt_cache_ratio:0.9898213942769349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:187.4704360961914ms total_cost_time:187.50882148742676ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5162 prompt_cache_len:5145 prompt_cache_ratio:0.996706702828361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 +DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:43 [batch.py:51] router release req id 8 +INFO 06-24 20:02:43 [batch.py:51] router release req id 120 +INFO 06-24 20:02:43 [batch.py:51] router release req id 400 +INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.1163320541381836 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.11059951782226562 s +INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.09522318840026855 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.11796021461486816 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.11366653442382812 s +INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.09942770004272461 s +DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=319947527375132010620479070993530412357, time:1750766563.9242387s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:43 [manager.py:391] +DEBUG 06-24 20:02:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 46186.739 tokens/s +DEBUG 06-24 20:02:43 [stats.py:37] Avg prompt tokens throughput: 46159.030 tokens/s +DEBUG 06-24 20:02:43 [stats.py:37] Avg generate tokens throughput: 27.710 tokens/s +ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:196.23327255249023ms total_cost_time:196.27642631530762ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5206 prompt_cache_len:5151 prompt_cache_ratio:0.9894352669996158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:438.7969970703125ms total_cost_time:438.8401508331299ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5208 prompt_cache_len:5154 prompt_cache_ratio:0.9896313364055299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:423.4936237335205ms total_cost_time:423.5203266143799ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5163 prompt_cache_len:5145 prompt_cache_ratio:0.9965136548518303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 +DEBUG 06-24 20:02:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:44 [batch.py:51] router release req id 8 +INFO 06-24 20:02:44 [batch.py:51] router release req id 120 +INFO 06-24 20:02:44 [batch.py:51] router release req id 400 +INFO 06-24 20:02:44 [manager.py:224] router recive req id 8 cost time 0.3417627811431885 s +INFO 06-24 20:02:44 [manager.py:224] router recive req id 120 cost time 0.09117412567138672 s +INFO 06-24 20:02:44 [manager.py:224] router recive req id 400 cost time 0.0856773853302002 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 8 cost time 0.34337449073791504 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 120 cost time 0.09405755996704102 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 400 cost time 0.08978867530822754 s +DEBUG 06-24 20:02:44 [manager.py:391] Prefill Batch: batch_id=230783454148511429882451945577098558046, time:1750766564.3544729s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:44 [manager.py:391] +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:401.20410919189453ms total_cost_time:401.247501373291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5207 prompt_cache_len:5151 prompt_cache_ratio:0.9892452467831765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:201.5233039855957ms total_cost_time:201.56216621398926ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5209 prompt_cache_len:5154 prompt_cache_ratio:0.9894413515070071 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:209.26284790039062ms total_cost_time:209.30075645446777ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5164 prompt_cache_len:5145 prompt_cache_ratio:0.9963206816421378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 +DEBUG 06-24 20:02:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:44 [batch.py:51] router release req id 8 +INFO 06-24 20:02:44 [batch.py:51] router release req id 120 +INFO 06-24 20:02:44 [batch.py:51] router release req id 400 +INFO 06-24 20:02:44 [manager.py:224] router recive req id 8 cost time 0.15827465057373047 s +INFO 06-24 20:02:44 [manager.py:224] router recive req id 120 cost time 0.10768008232116699 s +INFO 06-24 20:02:44 [manager.py:224] router recive req id 400 cost time 0.09426021575927734 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 8 cost time 0.15978455543518066 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 120 cost time 0.11042118072509766 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 400 cost time 0.09823155403137207 s +DEBUG 06-24 20:02:44 [manager.py:391] Prefill Batch: batch_id=275655102703287675180080340832436846042, time:1750766564.5763066s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:44 [manager.py:391] +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 first_token_cost:249.3112087249756ms total_cost_time:249.35460090637207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5208 prompt_cache_len:5151 prompt_cache_ratio:0.9890552995391705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:198.5642910003662ms total_cost_time:198.59051704406738ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5210 prompt_cache_len:5154 prompt_cache_ratio:0.9892514395393474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:215.7444953918457ms total_cost_time:215.78264236450195ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5165 prompt_cache_len:5145 prompt_cache_ratio:0.9961277831558567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 +DEBUG 06-24 20:02:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:44 [batch.py:51] router release req id 8 +INFO 06-24 20:02:44 [batch.py:51] router release req id 120 +INFO 06-24 20:02:44 [batch.py:51] router release req id 400 +INFO 06-24 20:02:44 [manager.py:224] router recive req id 8 cost time 0.12160444259643555 s +INFO 06-24 20:02:44 [manager.py:224] router recive req id 120 cost time 0.11640071868896484 s +INFO 06-24 20:02:44 [manager.py:224] router recive req id 400 cost time 0.09479784965515137 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 8 cost time 0.12312984466552734 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 120 cost time 0.11922335624694824 s +INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 400 cost time 0.09863018989562988 s +DEBUG 06-24 20:02:44 [manager.py:391] Prefill Batch: batch_id=49281106245312341984666419430909755772, time:1750766564.7995226s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:44 [manager.py:391] +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 first_token_cost:196.4707374572754ms total_cost_time:196.51460647583008ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5209 prompt_cache_len:5151 prompt_cache_ratio:0.9888654252255711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:240.75651168823242ms total_cost_time:240.79442024230957ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5211 prompt_cache_len:5154 prompt_cache_ratio:0.9890616004605642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:219.03252601623535ms total_cost_time:219.0573215484619ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5166 prompt_cache_len:5145 prompt_cache_ratio:0.9959349593495935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 +INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 +DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:45 [batch.py:51] router release req id 8 +INFO 06-24 20:02:45 [batch.py:51] router release req id 120 +INFO 06-24 20:02:45 [batch.py:51] router release req id 400 +INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.1434493064880371 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.09111571311950684 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.08559679985046387 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.14496564865112305 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.09403395652770996 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.08971214294433594 s +DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=95099462047739173574208768820256951114, time:1750766565.0238433s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:45 [manager.py:391] +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 first_token_cost:249.44019317626953ms total_cost_time:249.4826316833496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5210 prompt_cache_len:5151 prompt_cache_ratio:0.9886756238003839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:197.12543487548828ms total_cost_time:197.15142250061035ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5212 prompt_cache_len:5154 prompt_cache_ratio:0.988871834228703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:212.59140968322754ms total_cost_time:212.62860298156738ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5167 prompt_cache_len:5145 prompt_cache_ratio:0.9957422101799884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 +DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:45 [batch.py:51] router release req id 8 +INFO 06-24 20:02:45 [batch.py:51] router release req id 120 +INFO 06-24 20:02:45 [batch.py:51] router release req id 400 +INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.10842728614807129 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.10210609436035156 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.09076905250549316 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.10999822616577148 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.10509061813354492 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.09489107131958008 s +DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=317818893624569935822494954495013423899, time:1750766565.2470171s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:45 [manager.py:391] +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:347.0473289489746ms total_cost_time:347.0919132232666ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5211 prompt_cache_len:5151 prompt_cache_ratio:0.9884858952216465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:391.62445068359375ms total_cost_time:391.6647434234619ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5213 prompt_cache_len:5154 prompt_cache_ratio:0.9886821408018416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:393.04447174072266ms total_cost_time:393.0854797363281ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5168 prompt_cache_len:5145 prompt_cache_ratio:0.9955495356037152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 +DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:45 [batch.py:51] router release req id 8 +INFO 06-24 20:02:45 [batch.py:51] router release req id 120 +INFO 06-24 20:02:45 [batch.py:51] router release req id 400 +INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.15468955039978027 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.10419964790344238 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.09134817123413086 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.15636634826660156 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.10712957382202148 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.09556221961975098 s +DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=286179879701422339754763011790231859823, time:1750766565.644323s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:45 [manager.py:391] +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:249.9847412109375ms total_cost_time:250.02765655517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5212 prompt_cache_len:5151 prompt_cache_ratio:0.9882962394474291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:199.27287101745605ms total_cost_time:199.29957389831543ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5214 prompt_cache_len:5154 prompt_cache_ratio:0.9884925201380897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:216.79091453552246ms total_cost_time:216.83382987976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5169 prompt_cache_len:5145 prompt_cache_ratio:0.9953569355774812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 +DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:45 [batch.py:51] router release req id 8 +INFO 06-24 20:02:45 [batch.py:51] router release req id 120 +INFO 06-24 20:02:45 [batch.py:51] router release req id 400 +INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.12239265441894531 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.1168978214263916 s +INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.09710407257080078 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.1241767406463623 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.12004923820495605 s +INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.10146474838256836 s +DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=171311091287859552820779734640431589121, time:1750766565.8727746s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:45 [manager.py:391] +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:197.28970527648926ms total_cost_time:197.33381271362305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5213 prompt_cache_len:5151 prompt_cache_ratio:0.9881066564358335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:240.74077606201172ms total_cost_time:240.7820224761963ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5215 prompt_cache_len:5154 prompt_cache_ratio:0.9883029721955896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:220.96943855285645ms total_cost_time:220.99590301513672ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5170 prompt_cache_len:5145 prompt_cache_ratio:0.995164410058027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 +INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 +DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:46 [batch.py:51] router release req id 8 +INFO 06-24 20:02:46 [batch.py:51] router release req id 120 +INFO 06-24 20:02:46 [batch.py:51] router release req id 400 +INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.14344072341918945 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.0900886058807373 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.08398175239562988 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.14513826370239258 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.09312033653259277 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.08826327323913574 s +DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=72655901201623586805876811164415711376, time:1750766566.0961924s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:46 [manager.py:391] +DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:202.3460865020752ms total_cost_time:202.38947868347168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5214 prompt_cache_len:5151 prompt_cache_ratio:0.9879171461449943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:199.71895217895508ms total_cost_time:199.75709915161133ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5216 prompt_cache_len:5154 prompt_cache_ratio:0.9881134969325154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:209.1236114501953ms total_cost_time:209.16104316711426ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5171 prompt_cache_len:5145 prompt_cache_ratio:0.9949719590021272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 +DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:46 [batch.py:51] router release req id 8 +INFO 06-24 20:02:46 [batch.py:51] router release req id 120 +INFO 06-24 20:02:46 [batch.py:51] router release req id 400 +INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.16060757637023926 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.11036467552185059 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.09540677070617676 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.16232705116271973 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.11345648765563965 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.09962940216064453 s +INFO 06-24 20:02:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=40453273424850931302616342077142105795, time:1750766566.3215234s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:46 [manager.py:391] +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:249.01771545410156ms total_cost_time:249.0713596343994ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5215 prompt_cache_len:5151 prompt_cache_ratio:0.9877277085330777 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:233.56938362121582ms total_cost_time:233.6099147796631ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:5217 prompt_cache_len:5154 prompt_cache_ratio:0.9879240943070731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:218.57571601867676ms total_cost_time:218.60146522521973ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5172 prompt_cache_len:5145 prompt_cache_ratio:0.9947795823665894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 +DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:46 [batch.py:51] router release req id 8 +INFO 06-24 20:02:46 [batch.py:51] router release req id 120 +INFO 06-24 20:02:46 [batch.py:51] router release req id 400 +INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.28092455863952637 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.24422788619995117 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.23957157135009766 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.28275585174560547 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.24747443199157715 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.24407458305358887 s +DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=172230966633847778216040757786092878543, time:1750766566.696777s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:46 [manager.py:391] +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:350.7411479949951ms total_cost_time:350.7864475250244ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5216 prompt_cache_len:5151 prompt_cache_ratio:0.9875383435582822 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:346.33398056030273ms total_cost_time:346.3706970214844ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5218 prompt_cache_len:5154 prompt_cache_ratio:0.987734764277501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:341.71533584594727ms total_cost_time:341.7401313781738ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5173 prompt_cache_len:5145 prompt_cache_ratio:0.9945872801082544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 +DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:46 [batch.py:51] router release req id 8 +INFO 06-24 20:02:46 [batch.py:51] router release req id 120 +INFO 06-24 20:02:46 [batch.py:51] router release req id 400 +INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.10393142700195312 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.06840944290161133 s +INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.06266450881958008 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.10568881034851074 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.07154107093811035 s +INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.06706380844116211 s +DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=42485708086675373357309966435325513359, time:1750766566.8757374s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:46 [manager.py:391] +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:199.93972778320312ms total_cost_time:199.981689453125ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5217 prompt_cache_len:5151 prompt_cache_ratio:0.9873490511788384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:164.33358192443848ms total_cost_time:164.36004638671875ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5219 prompt_cache_len:5154 prompt_cache_ratio:0.9875455068020693 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:162.92285919189453ms total_cost_time:162.95766830444336ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5174 prompt_cache_len:5145 prompt_cache_ratio:0.9943950521839969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 +INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 +DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:47 [batch.py:51] router release req id 8 +INFO 06-24 20:02:47 [batch.py:51] router release req id 120 +INFO 06-24 20:02:47 [batch.py:51] router release req id 400 +INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.07356977462768555 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.06813430786132812 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06139397621154785 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.0751190185546875 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.07128763198852539 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.06577181816101074 s +DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=141621909717983627052793268080161525040, time:1750766567.0530863s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:47 [manager.py:391] +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:145.16496658325195ms total_cost_time:145.21026611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5218 prompt_cache_len:5151 prompt_cache_ratio:0.9871598313530088 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:167.7708625793457ms total_cost_time:167.80710220336914ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5220 prompt_cache_len:5154 prompt_cache_ratio:0.9873563218390805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:161.07988357543945ms total_cost_time:161.10491752624512ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5175 prompt_cache_len:5145 prompt_cache_ratio:0.9942028985507246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 +DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:47 [batch.py:51] router release req id 8 +INFO 06-24 20:02:47 [batch.py:51] router release req id 120 +INFO 06-24 20:02:47 [batch.py:51] router release req id 400 +INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.09864521026611328 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.06913280487060547 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06261277198791504 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.10019087791442871 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.0721583366394043 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.06686139106750488 s +DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=65744191368026087255134059996568159161, time:1750766567.2308347s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:47 [manager.py:391] +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:148.27466011047363ms total_cost_time:148.31852912902832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5219 prompt_cache_len:5151 prompt_cache_ratio:0.9869706840390879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:169.89970207214355ms total_cost_time:169.94023323059082ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:5221 prompt_cache_len:5154 prompt_cache_ratio:0.9871672093468684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:163.47551345825195ms total_cost_time:163.50221633911133ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5176 prompt_cache_len:5145 prompt_cache_ratio:0.9940108191653787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 +DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:47 [batch.py:51] router release req id 8 +INFO 06-24 20:02:47 [batch.py:51] router release req id 120 +INFO 06-24 20:02:47 [batch.py:51] router release req id 400 +INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.12335062026977539 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.06944751739501953 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06419968605041504 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.12493491172790527 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.07249283790588379 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.0684814453125 s +DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=1804965171305388270106208883991935717, time:1750766567.4084427s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:47 [manager.py:391] +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:199.97620582580566ms total_cost_time:200.01840591430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5220 prompt_cache_len:5151 prompt_cache_ratio:0.9867816091954023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:146.071195602417ms total_cost_time:146.09718322753906ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5222 prompt_cache_len:5154 prompt_cache_ratio:0.9869781692837993 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:329.0572166442871ms total_cost_time:329.1027545928955ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5177 prompt_cache_len:5145 prompt_cache_ratio:0.9938188139849333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 +DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:47 [batch.py:51] router release req id 8 +INFO 06-24 20:02:47 [batch.py:51] router release req id 120 +INFO 06-24 20:02:47 [batch.py:51] router release req id 400 +INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.25643396377563477 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.2510795593261719 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.07161736488342285 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.2581908702850342 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.2542910575866699 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.07609987258911133 s +DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=67843138287693751065499900990329021479, time:1750766567.751175s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:47 [manager.py:391] +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:347.55587577819824ms total_cost_time:347.59998321533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5221 prompt_cache_len:5151 prompt_cache_ratio:0.9865926067803102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:342.17238426208496ms total_cost_time:342.1976566314697ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5223 prompt_cache_len:5154 prompt_cache_ratio:0.9867892016082711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:181.56719207763672ms total_cost_time:181.60605430603027ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5178 prompt_cache_len:5145 prompt_cache_ratio:0.9936268829663963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 +DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:47 [batch.py:51] router release req id 8 +INFO 06-24 20:02:47 [batch.py:51] router release req id 120 +INFO 06-24 20:02:47 [batch.py:51] router release req id 400 +INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.07826685905456543 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.07308268547058105 s +INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06275081634521484 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.0798642635345459 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.07611298561096191 s +INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.06707406044006348 s +DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=113979492298701802096809321035682046342, time:1750766567.928425s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:47 [manager.py:391] +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:147.2771167755127ms total_cost_time:147.40657806396484ms,out_token_counter:1 mean_per_token_cost_time: 0.12946128845214844ms prompt_token_num:5222 prompt_cache_len:5151 prompt_cache_ratio:0.9864036767522022 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:173.83193969726562ms total_cost_time:173.87104034423828ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5224 prompt_cache_len:5154 prompt_cache_ratio:0.9866003062787136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:163.59853744506836ms total_cost_time:163.62428665161133ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5179 prompt_cache_len:5145 prompt_cache_ratio:0.9934350260668082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 +DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:48 [batch.py:51] router release req id 8 +INFO 06-24 20:02:48 [batch.py:51] router release req id 120 +INFO 06-24 20:02:48 [batch.py:51] router release req id 400 +INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.10093259811401367 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06888270378112793 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.06319141387939453 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.10255908966064453 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.07193541526794434 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06752872467041016 s +DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=171026834703893758007870875179720792469, time:1750766568.1073458s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:48 [manager.py:391] +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:197.1752643585205ms total_cost_time:197.21746444702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5223 prompt_cache_len:5151 prompt_cache_ratio:0.9862148190695003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:165.09127616882324ms total_cost_time:165.1175022125244ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5225 prompt_cache_len:5154 prompt_cache_ratio:0.9864114832535885 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:164.1671657562256ms total_cost_time:164.2019748687744ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5180 prompt_cache_len:5145 prompt_cache_ratio:0.9932432432432432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 +DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:48 [batch.py:51] router release req id 8 +INFO 06-24 20:02:48 [batch.py:51] router release req id 120 +INFO 06-24 20:02:48 [batch.py:51] router release req id 400 +INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.07435035705566406 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06873035430908203 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.062180280685424805 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.07669281959533691 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.07339644432067871 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06882047653198242 s +DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=161512257408937614815745953922568981624, time:1750766568.2853134s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:48 [manager.py:391] +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:145.45559883117676ms total_cost_time:145.49803733825684ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5224 prompt_cache_len:5151 prompt_cache_ratio:0.9860260336906586 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:170.00246047973633ms total_cost_time:170.0572967529297ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5226 prompt_cache_len:5154 prompt_cache_ratio:0.9862227324913893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:163.47169876098633ms total_cost_time:163.4986400604248ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5181 prompt_cache_len:5145 prompt_cache_ratio:0.9930515344528084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 +DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:48 [batch.py:51] router release req id 8 +INFO 06-24 20:02:48 [batch.py:51] router release req id 120 +INFO 06-24 20:02:48 [batch.py:51] router release req id 400 +INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.10375332832336426 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06914138793945312 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.06386280059814453 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.1048882007598877 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.0706186294555664 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06560993194580078 s +DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=220389546080767873193089226288918592327, time:1750766568.4676797s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:48 [manager.py:391] +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:349.9183654785156ms total_cost_time:349.963903427124ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5225 prompt_cache_len:5151 prompt_cache_ratio:0.9858373205741627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:315.2749538421631ms total_cost_time:315.30237197875977ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5227 prompt_cache_len:5154 prompt_cache_ratio:0.9860340539506409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:329.6546936035156ms total_cost_time:329.6947479248047ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5182 prompt_cache_len:5145 prompt_cache_ratio:0.9928598996526438 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 +DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:48 [batch.py:51] router release req id 8 +INFO 06-24 20:02:48 [batch.py:51] router release req id 120 +INFO 06-24 20:02:48 [batch.py:51] router release req id 400 +INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.09048676490783691 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.08513689041137695 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.07510995864868164 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.09225940704345703 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.08833074569702148 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.07944226264953613 s +DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=183814381258494528671191550415711091738, time:1750766568.813486s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:48 [manager.py:391] +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:148.40197563171387ms total_cost_time:148.44512939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5226 prompt_cache_len:5151 prompt_cache_ratio:0.9856486796785304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:189.38350677490234ms total_cost_time:189.4216537475586ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5228 prompt_cache_len:5154 prompt_cache_ratio:0.9858454475899006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:179.2891025543213ms total_cost_time:179.31342124938965ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:5183 prompt_cache_len:5145 prompt_cache_ratio:0.9926683387999228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 +INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 +DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:48 [batch.py:51] router release req id 8 +INFO 06-24 20:02:48 [batch.py:51] router release req id 120 +INFO 06-24 20:02:48 [batch.py:51] router release req id 400 +INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.11712503433227539 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06935739517211914 s +INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.06408143043518066 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.11897540092468262 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.07263517379760742 s +INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06847476959228516 s +DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=193723613199783006716287912473031008588, time:1750766568.9948952s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:48 [manager.py:391] +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:197.25370407104492ms total_cost_time:197.2956657409668ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5227 prompt_cache_len:5151 prompt_cache_ratio:0.9854601109623111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:149.43504333496094ms total_cost_time:149.4605541229248ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5229 prompt_cache_len:5154 prompt_cache_ratio:0.9856569133677567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:167.88601875305176ms total_cost_time:167.9244041442871ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5184 prompt_cache_len:5145 prompt_cache_ratio:0.9924768518518519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 +DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:49 [batch.py:51] router release req id 8 +INFO 06-24 20:02:49 [batch.py:51] router release req id 120 +INFO 06-24 20:02:49 [batch.py:51] router release req id 400 +INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.09158658981323242 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.0866386890411377 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.0740041732788086 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.09335565567016602 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.0898129940032959 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.07839202880859375 s +DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=36108071487365200816681941474101197067, time:1750766569.1776986s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:49 [manager.py:391] +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:193.80664825439453ms total_cost_time:193.8493251800537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5228 prompt_cache_len:5151 prompt_cache_ratio:0.9852716143840857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:188.80295753479004ms total_cost_time:188.82989883422852ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5230 prompt_cache_len:5154 prompt_cache_ratio:0.9854684512428298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:178.21812629699707ms total_cost_time:178.2546043395996ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5185 prompt_cache_len:5145 prompt_cache_ratio:0.9922854387656702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 +DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:49 [batch.py:51] router release req id 8 +INFO 06-24 20:02:49 [batch.py:51] router release req id 120 +INFO 06-24 20:02:49 [batch.py:51] router release req id 400 +INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.07204031944274902 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.06410527229309082 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.055680274963378906 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.07362127304077148 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.06734776496887207 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.060156822204589844 s +DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=332687357680878472029442304223983177776, time:1750766569.3614087s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:49 [manager.py:391] +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:145.51758766174316ms total_cost_time:145.55978775024414ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5229 prompt_cache_len:5151 prompt_cache_ratio:0.985083189902467 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:168.54619979858398ms total_cost_time:168.58267784118652ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5231 prompt_cache_len:5154 prompt_cache_ratio:0.9852800611737718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:160.1412296295166ms total_cost_time:160.16697883605957ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5186 prompt_cache_len:5145 prompt_cache_ratio:0.9920940994986502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 +DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:49 [batch.py:51] router release req id 8 +INFO 06-24 20:02:49 [batch.py:51] router release req id 120 +INFO 06-24 20:02:49 [batch.py:51] router release req id 400 +INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.2644016742706299 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.23098254203796387 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.22561407089233398 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.2661874294281006 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.23415732383728027 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.22982549667358398 s +DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=54126910484437954321278348656260176441, time:1750766569.7047596s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:49 [manager.py:391] +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:350.5678176879883ms total_cost_time:350.61097145080566ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5230 prompt_cache_len:5151 prompt_cache_ratio:0.9848948374760994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:317.05760955810547ms total_cost_time:317.08431243896484ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5232 prompt_cache_len:5154 prompt_cache_ratio:0.9850917431192661 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:331.2101364135742ms total_cost_time:331.24756813049316ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5187 prompt_cache_len:5145 prompt_cache_ratio:0.9919028340080972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 +DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:49 [batch.py:51] router release req id 8 +INFO 06-24 20:02:49 [batch.py:51] router release req id 120 +INFO 06-24 20:02:49 [batch.py:51] router release req id 400 +INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.0867466926574707 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.08163571357727051 s +INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.06937360763549805 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.08866572380065918 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.08495783805847168 s +INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.0737156867980957 s +DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=284584860480274361578153255982552055212, time:1750766569.885161s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:49 [manager.py:391] +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:148.48971366882324ms total_cost_time:148.53262901306152ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5231 prompt_cache_len:5151 prompt_cache_ratio:0.984706557063659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:184.68618392944336ms total_cost_time:184.73505973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:5233 prompt_cache_len:5154 prompt_cache_ratio:0.9849034970380279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:172.41263389587402ms total_cost_time:172.44458198547363ms,out_token_counter:1 mean_per_token_cost_time: 0.031948089599609375ms prompt_token_num:5188 prompt_cache_len:5145 prompt_cache_ratio:0.9917116422513492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 +INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 +DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:50 [batch.py:51] router release req id 8 +INFO 06-24 20:02:50 [batch.py:51] router release req id 120 +INFO 06-24 20:02:50 [batch.py:51] router release req id 400 +INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.11399674415588379 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.0692594051361084 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.06387782096862793 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.11558175086975098 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.07245659828186035 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.06834053993225098 s +DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=113887091680116134046690464418394073136, time:1750766570.0675366s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:50 [manager.py:391] +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:198.96626472473145ms total_cost_time:199.01132583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5232 prompt_cache_len:5151 prompt_cache_ratio:0.9845183486238532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:154.41489219665527ms total_cost_time:154.44087982177734ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5234 prompt_cache_len:5154 prompt_cache_ratio:0.9847153228888039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:168.75576972961426ms total_cost_time:168.7936782836914ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5189 prompt_cache_len:5145 prompt_cache_ratio:0.9915205241857776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 +DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:50 [batch.py:51] router release req id 8 +INFO 06-24 20:02:50 [batch.py:51] router release req id 120 +INFO 06-24 20:02:50 [batch.py:51] router release req id 400 +INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.08876347541809082 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.08342266082763672 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.07222270965576172 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.09048843383789062 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.08654928207397461 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.07658696174621582 s +DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=72154061987392291080148914565191156117, time:1750766570.250024s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:50 [manager.py:391] +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:146.91519737243652ms total_cost_time:146.93355560302734ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:5233 prompt_cache_len:5151 prompt_cache_ratio:0.9843302121154214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:187.8368854522705ms total_cost_time:187.87527084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5235 prompt_cache_len:5154 prompt_cache_ratio:0.9845272206303725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:176.70536041259766ms total_cost_time:176.73087120056152ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5190 prompt_cache_len:5145 prompt_cache_ratio:0.9913294797687862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 +DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:50 [batch.py:51] router release req id 8 +INFO 06-24 20:02:50 [batch.py:51] router release req id 120 +INFO 06-24 20:02:50 [batch.py:51] router release req id 400 +INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.11793732643127441 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.06876111030578613 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.06348848342895508 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.11950349807739258 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.07170939445495605 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.06775021553039551 s +DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=116340167260195613421598654793460460159, time:1750766570.4315321s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:50 [manager.py:391] +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:202.07953453063965ms total_cost_time:202.12221145629883ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5234 prompt_cache_len:5151 prompt_cache_ratio:0.9841421474971341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:152.82869338989258ms total_cost_time:152.85396575927734ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5236 prompt_cache_len:5154 prompt_cache_ratio:0.9843391902215431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:169.85654830932617ms total_cost_time:169.9063777923584ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:5191 prompt_cache_len:5145 prompt_cache_ratio:0.9911385089578116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 +DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:50 [batch.py:51] router release req id 8 +INFO 06-24 20:02:50 [batch.py:51] router release req id 120 +INFO 06-24 20:02:50 [batch.py:51] router release req id 400 +INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.2665128707885742 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.2609217166900635 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.24997472763061523 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.26821470260620117 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.2642223834991455 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.254561185836792 s +DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=114861829841792204182494582706844302284, time:1750766570.792475s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:50 [manager.py:391] +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:345.2742099761963ms total_cost_time:345.3185558319092ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5235 prompt_cache_len:5151 prompt_cache_ratio:0.9839541547277937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:339.80703353881836ms total_cost_time:339.832067489624ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5237 prompt_cache_len:5154 prompt_cache_ratio:0.9841512316211571 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 +ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:352.7500629425049ms total_cost_time:352.78868675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5192 prompt_cache_len:5145 prompt_cache_ratio:0.9909476117103235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 +DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:50 [batch.py:51] router release req id 8 +INFO 06-24 20:02:50 [batch.py:51] router release req id 120 +INFO 06-24 20:02:50 [batch.py:51] router release req id 400 +INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.09287905693054199 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.08757781982421875 s +INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.07364726066589355 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.09470415115356445 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.09082150459289551 s +INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.07813429832458496 s +DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=258074016904941905616835299421948813108, time:1750766570.9736176s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:50 [manager.py:391] +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:194.53167915344238ms total_cost_time:194.57507133483887ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5236 prompt_cache_len:5151 prompt_cache_ratio:0.9837662337662337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:189.13841247558594ms total_cost_time:189.1646385192871ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5238 prompt_cache_len:5154 prompt_cache_ratio:0.983963344788087 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:178.93052101135254ms total_cost_time:178.96628379821777ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:5193 prompt_cache_len:5145 prompt_cache_ratio:0.9907567879838244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 +DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:51 [batch.py:51] router release req id 8 +INFO 06-24 20:02:51 [batch.py:51] router release req id 120 +INFO 06-24 20:02:51 [batch.py:51] router release req id 400 +INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.07263016700744629 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.0673072338104248 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.05789756774902344 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.07437014579772949 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.07047247886657715 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.06220197677612305 s +DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=5945537567179164147587546669967627772, time:1750766571.157539s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:51 [manager.py:391] +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:145.4331874847412ms total_cost_time:145.4770565032959ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5237 prompt_cache_len:5151 prompt_cache_ratio:0.9835783845713194 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:171.53310775756836ms total_cost_time:171.5712547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5239 prompt_cache_len:5154 prompt_cache_ratio:0.9837755296812368 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:162.07170486450195ms total_cost_time:162.09721565246582ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5194 prompt_cache_len:5145 prompt_cache_ratio:0.9905660377358491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 +DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:51 [batch.py:51] router release req id 8 +INFO 06-24 20:02:51 [batch.py:51] router release req id 120 +INFO 06-24 20:02:51 [batch.py:51] router release req id 400 +INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.10373950004577637 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.06980681419372559 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.06439328193664551 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.10528230667114258 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.07274293899536133 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.06854772567749023 s +DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=119026861543401500830279452836118553216, time:1750766571.33999s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:51 [manager.py:391] +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:199.16558265686035ms total_cost_time:199.20778274536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5238 prompt_cache_len:5151 prompt_cache_ratio:0.9833906071019473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:165.21811485290527ms total_cost_time:165.24481773376465ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5240 prompt_cache_len:5154 prompt_cache_ratio:0.983587786259542 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:183.20798873901367ms total_cost_time:183.2449436187744ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5195 prompt_cache_len:5145 prompt_cache_ratio:0.9903753609239654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 +DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:51 [batch.py:51] router release req id 8 +INFO 06-24 20:02:51 [batch.py:51] router release req id 120 +INFO 06-24 20:02:51 [batch.py:51] router release req id 400 +INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.23725247383117676 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.23113727569580078 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.22025251388549805 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.23898601531982422 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.23438167572021484 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.22449541091918945 s +DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=131024800894177316653257120401693257, time:1750766571.6836846s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:51 [manager.py:391] +DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:295.058012008667ms total_cost_time:295.1023578643799ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5239 prompt_cache_len:5151 prompt_cache_ratio:0.9832029013170452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:335.446834564209ms total_cost_time:335.48808097839355ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5241 prompt_cache_len:5154 prompt_cache_ratio:0.9834001144819691 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:324.4318962097168ms total_cost_time:324.45812225341797ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5196 prompt_cache_len:5145 prompt_cache_ratio:0.9901847575057737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 +DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:51 [batch.py:51] router release req id 8 +INFO 06-24 20:02:51 [batch.py:51] router release req id 120 +INFO 06-24 20:02:51 [batch.py:51] router release req id 400 +INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.11945700645446777 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.0689542293548584 s +INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.06354188919067383 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.12111949920654297 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.0722959041595459 s +INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.0680394172668457 s +DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=9231044424691171558073278120236064217, time:1750766571.865661s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:51 [manager.py:391] +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:197.8168487548828ms total_cost_time:197.8588104248047ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5240 prompt_cache_len:5151 prompt_cache_ratio:0.9830152671755725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:147.5076675415039ms total_cost_time:147.53413200378418ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5242 prompt_cache_len:5154 prompt_cache_ratio:0.9832125143075162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 +ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:166.87893867492676ms total_cost_time:166.9178009033203ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5197 prompt_cache_len:5145 prompt_cache_ratio:0.9899942274389071 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 +DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:52 [batch.py:51] router release req id 8 +INFO 06-24 20:02:52 [batch.py:51] router release req id 120 +INFO 06-24 20:02:52 [batch.py:51] router release req id 400 +INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.09177446365356445 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.08695030212402344 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.07159900665283203 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.09340262413024902 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.08996081352233887 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.0758049488067627 s +DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=3241771730246344814940865378067211563, time:1750766572.0470524s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:52 [manager.py:391] +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:193.70460510253906ms total_cost_time:193.74585151672363ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5241 prompt_cache_len:5151 prompt_cache_ratio:0.9828277046365197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:188.81797790527344ms total_cost_time:188.84515762329102ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5243 prompt_cache_len:5154 prompt_cache_ratio:0.9830249856952127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:175.2147674560547ms total_cost_time:175.2481460571289ms,out_token_counter:1 mean_per_token_cost_time: 0.03337860107421875ms prompt_token_num:5198 prompt_cache_len:5145 prompt_cache_ratio:0.9898037706810312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 +DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:52 [batch.py:51] router release req id 8 +INFO 06-24 20:02:52 [batch.py:51] router release req id 120 +INFO 06-24 20:02:52 [batch.py:51] router release req id 400 +INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.07125329971313477 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.06663727760314941 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.0594332218170166 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.07299089431762695 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.06977391242980957 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.06398510932922363 s +DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=83457029891860222385253277944389321619, time:1750766572.2273893s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:52 [manager.py:391] +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:146.76308631896973ms total_cost_time:146.80743217468262ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5242 prompt_cache_len:5151 prompt_cache_ratio:0.9826402136589089 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:169.48199272155762ms total_cost_time:169.53635215759277ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5244 prompt_cache_len:5154 prompt_cache_ratio:0.982837528604119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:162.68301010131836ms total_cost_time:162.7202033996582ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5199 prompt_cache_len:5145 prompt_cache_ratio:0.9896133871898442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 +DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:52 [batch.py:51] router release req id 8 +INFO 06-24 20:02:52 [batch.py:51] router release req id 120 +INFO 06-24 20:02:52 [batch.py:51] router release req id 400 +INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.09785985946655273 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.0672454833984375 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.059719085693359375 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.09942197799682617 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.07021665573120117 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.06392478942871094 s +DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=161102350868659160106859563961468787287, time:1750766572.4088843s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:52 [manager.py:391] +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:197.93128967285156ms total_cost_time:197.98564910888672ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5243 prompt_cache_len:5151 prompt_cache_ratio:0.9824527942017929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:167.3884391784668ms total_cost_time:167.42205619812012ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:5245 prompt_cache_len:5154 prompt_cache_ratio:0.982650142993327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:326.54786109924316ms total_cost_time:326.59220695495605ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5200 prompt_cache_len:5145 prompt_cache_ratio:0.989423076923077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 +DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:52 [batch.py:51] router release req id 8 +INFO 06-24 20:02:52 [batch.py:51] router release req id 120 +INFO 06-24 20:02:52 [batch.py:51] router release req id 400 +INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.2346498966217041 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.22957229614257812 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.07171273231506348 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.2364654541015625 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.23282957077026367 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.07621216773986816 s +DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=129884596516568863242925438522015958112, time:1750766572.7524042s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:52 [manager.py:391] +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:297.4991798400879ms total_cost_time:297.544002532959ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5244 prompt_cache_len:5151 prompt_cache_ratio:0.9822654462242563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:332.9939842224121ms total_cost_time:333.03308486938477ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5246 prompt_cache_len:5154 prompt_cache_ratio:0.9824628288219596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:175.19116401672363ms total_cost_time:175.2171516418457ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5201 prompt_cache_len:5145 prompt_cache_ratio:0.9892328398384926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 +INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 +DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:52 [batch.py:51] router release req id 8 +INFO 06-24 20:02:52 [batch.py:51] router release req id 120 +INFO 06-24 20:02:52 [batch.py:51] router release req id 400 +INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.11379218101501465 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.06990361213684082 s +INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.06427288055419922 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.1154327392578125 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.07317590713500977 s +INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.06866884231567383 s +DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=209942190913813938067526097973070525471, time:1750766572.9348528s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:52 [manager.py:391] +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:200.65069198608398ms total_cost_time:200.69265365600586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5245 prompt_cache_len:5151 prompt_cache_ratio:0.9820781696854147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:156.9201946258545ms total_cost_time:156.94689750671387ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5247 prompt_cache_len:5154 prompt_cache_ratio:0.982275586049171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:170.2725887298584ms total_cost_time:170.31145095825195ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5202 prompt_cache_len:5145 prompt_cache_ratio:0.989042675893887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 +DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:53 [batch.py:51] router release req id 8 +INFO 06-24 20:02:53 [batch.py:51] router release req id 120 +INFO 06-24 20:02:53 [batch.py:51] router release req id 400 +INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.08655643463134766 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.081329345703125 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.0688636302947998 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.08811497688293457 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.08447098731994629 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.07328128814697266 s +DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=2705697327813994178765300784631705432, time:1750766573.1153185s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:53 [manager.py:391] +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:149.04284477233887ms total_cost_time:149.08766746520996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5246 prompt_cache_len:5151 prompt_cache_ratio:0.9818909645444148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:02:53 [statics_utils.py:24] mean first cost: 253.81479736502843 ms +INFO 06-24 20:02:53 [statics_utils.py:24] mean per token cost: 0.6276211114843245 ms +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:53 [manager.py:620] left req id 8can release False refcount 1 +INFO 06-24 20:02:53 [manager.py:620] left req id 120can release True refcount 3 +INFO 06-24 20:02:53 [manager.py:620] left req id 400can release False refcount 4 +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:184.0071678161621ms total_cost_time:184.04626846313477ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5248 prompt_cache_len:5154 prompt_cache_ratio:0.9820884146341463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:171.50282859802246ms total_cost_time:171.5259552001953ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:5203 prompt_cache_len:5145 prompt_cache_ratio:0.9888525850470882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 +DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:53 [batch.py:51] router release req id 8 +INFO 06-24 20:02:53 [batch.py:51] router release req id 120 +INFO 06-24 20:02:53 [batch.py:51] router release req id 400 +INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.11163592338562012 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.0683906078338623 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.0630185604095459 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.11339902877807617 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.0717473030090332 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.06750822067260742 s +DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=92134936354665294992778054666637416072, time:1750766573.2956557s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:53 [manager.py:391] +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:199.86629486083984ms total_cost_time:199.91159439086914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5247 prompt_cache_len:5151 prompt_cache_ratio:0.9817038307604345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:156.80170059204102ms total_cost_time:156.8281650543213ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5249 prompt_cache_len:5154 prompt_cache_ratio:0.9819013145361021 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:170.56655883789062ms total_cost_time:170.60399055480957ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5204 prompt_cache_len:5145 prompt_cache_ratio:0.9886625672559569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 +DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:53 [batch.py:51] router release req id 8 +INFO 06-24 20:02:53 [batch.py:51] router release req id 120 +INFO 06-24 20:02:53 [batch.py:51] router release req id 400 +INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.0836787223815918 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.07861518859863281 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.06662321090698242 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.08552145957946777 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.08181571960449219 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.0711214542388916 s +DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=13151274556950410961679043541694526760, time:1750766573.4754348s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:53 [manager.py:391] +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:147.11427688598633ms total_cost_time:147.1574306488037ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5248 prompt_cache_len:5151 prompt_cache_ratio:0.981516768292683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:182.3885440826416ms total_cost_time:182.43002891540527ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5250 prompt_cache_len:5154 prompt_cache_ratio:0.9817142857142858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:170.49837112426758ms total_cost_time:170.52388191223145ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5205 prompt_cache_len:5145 prompt_cache_ratio:0.9884726224783862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 +DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:53 [batch.py:51] router release req id 8 +INFO 06-24 20:02:53 [batch.py:51] router release req id 120 +INFO 06-24 20:02:53 [batch.py:51] router release req id 400 +INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.274993896484375 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.23135662078857422 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.27588629722595215 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.22590875625610352 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.23252582550048828 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.22729897499084473 s +DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=81634246692624740912354441538916512075, time:1750766573.8191593s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:53 [manager.py:391] +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:350.5971431732178ms total_cost_time:350.64005851745605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5249 prompt_cache_len:5151 prompt_cache_ratio:0.9813297771004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 +ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:333.5714340209961ms total_cost_time:333.61124992370605ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5251 prompt_cache_len:5154 prompt_cache_ratio:0.9815273281279756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:328.1712532043457ms total_cost_time:328.1974792480469ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5206 prompt_cache_len:5145 prompt_cache_ratio:0.9882827506723012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 +INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 +DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:53 [batch.py:51] router release req id 8 +INFO 06-24 20:02:53 [batch.py:51] router release req id 120 +INFO 06-24 20:02:53 [batch.py:51] router release req id 400 +INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.09893250465393066 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.06884479522705078 s +INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.0640878677368164 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.10047078132629395 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.07195591926574707 s +INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.06844496726989746 s +DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=3265062220488357911602693387903255028, time:1750766573.9997535s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:53 [manager.py:391] +DEBUG 06-24 20:02:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 68344.947 tokens/s +DEBUG 06-24 20:02:54 [stats.py:37] Avg prompt tokens throughput: 68318.746 tokens/s +DEBUG 06-24 20:02:54 [stats.py:37] Avg generate tokens throughput: 26.202 tokens/s +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:199.83887672424316ms total_cost_time:199.88250732421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5250 prompt_cache_len:5151 prompt_cache_ratio:0.9811428571428571 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:169.94500160217285ms total_cost_time:169.97194290161133ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5252 prompt_cache_len:5154 prompt_cache_ratio:0.9813404417364814 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:168.0431365966797ms total_cost_time:168.07866096496582ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:5207 prompt_cache_len:5145 prompt_cache_ratio:0.9880929517956597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 +DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:54 [batch.py:51] router release req id 8 +INFO 06-24 20:02:54 [batch.py:51] router release req id 120 +INFO 06-24 20:02:54 [batch.py:51] router release req id 400 +INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.06940555572509766 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.06410503387451172 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.0575556755065918 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.07092642784118652 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.06723570823669434 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06194353103637695 s +DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=206958994617528890900615359731581510637, time:1750766574.1810417s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:54 [manager.py:391] +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:143.72849464416504ms total_cost_time:143.77331733703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5251 prompt_cache_len:5151 prompt_cache_ratio:0.9809560083793563 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:166.96476936340332ms total_cost_time:167.0057773590088ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5253 prompt_cache_len:5154 prompt_cache_ratio:0.9811536264991434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:160.53104400634766ms total_cost_time:160.55798530578613ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5208 prompt_cache_len:5145 prompt_cache_ratio:0.9879032258064516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 +DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:54 [batch.py:51] router release req id 8 +INFO 06-24 20:02:54 [batch.py:51] router release req id 120 +INFO 06-24 20:02:54 [batch.py:51] router release req id 400 +INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.0986175537109375 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.06905198097229004 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.06377983093261719 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.10032796859741211 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.07214903831481934 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06863665580749512 s +DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=146170109155660599558531665483957543008, time:1750766574.3614078s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:54 [manager.py:391] +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:198.87113571166992ms total_cost_time:198.91619682312012ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5252 prompt_cache_len:5151 prompt_cache_ratio:0.9807692307692307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:169.26050186157227ms total_cost_time:169.28625106811523ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5254 prompt_cache_len:5154 prompt_cache_ratio:0.980966882375333 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:166.87369346618652ms total_cost_time:166.90850257873535ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5209 prompt_cache_len:5145 prompt_cache_ratio:0.9877135726626992 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 +DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:54 [batch.py:51] router release req id 8 +INFO 06-24 20:02:54 [batch.py:51] router release req id 120 +INFO 06-24 20:02:54 [batch.py:51] router release req id 400 +INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.07230687141418457 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.06747937202453613 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.06181001663208008 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.07379937171936035 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.07049369812011719 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06596875190734863 s +DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=136919901274101448323875996927526254737, time:1750766574.542649s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:54 [manager.py:391] +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:146.38113975524902ms total_cost_time:146.42596244812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5253 prompt_cache_len:5151 prompt_cache_ratio:0.9805825242718447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:333.7271213531494ms total_cost_time:333.7712287902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5255 prompt_cache_len:5154 prompt_cache_ratio:0.9807802093244529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:328.10258865356445ms total_cost_time:328.1288146972656ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5210 prompt_cache_len:5145 prompt_cache_ratio:0.9875239923224568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 +DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:54 [batch.py:51] router release req id 8 +INFO 06-24 20:02:54 [batch.py:51] router release req id 120 +INFO 06-24 20:02:54 [batch.py:51] router release req id 400 +INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.2641136646270752 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.0690619945526123 s +INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.06465697288513184 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.2651369571685791 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.07157778739929199 s +INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06843113899230957 s +DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=34011374031020010090033538090945874728, time:1750766574.8859375s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:54 [manager.py:391] +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:348.7358093261719ms total_cost_time:348.77943992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5254 prompt_cache_len:5151 prompt_cache_ratio:0.9803958888465931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:153.68938446044922ms total_cost_time:153.7158489227295ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5256 prompt_cache_len:5154 prompt_cache_ratio:0.980593607305936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 +ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:170.21942138671875ms total_cost_time:170.2558994293213ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5211 prompt_cache_len:5145 prompt_cache_ratio:0.9873344847438111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 +DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:55 [batch.py:51] router release req id 8 +INFO 06-24 20:02:55 [batch.py:51] router release req id 120 +INFO 06-24 20:02:55 [batch.py:51] router release req id 400 +INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.08820199966430664 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.08188533782958984 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.0710752010345459 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.08975553512573242 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.08503437042236328 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.07547402381896973 s +DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=278367308038101676331255405218844979123, time:1750766575.067711s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:55 [manager.py:391] +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:146.67940139770508ms total_cost_time:146.72422409057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5255 prompt_cache_len:5151 prompt_cache_ratio:0.980209324452902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:185.6391429901123ms total_cost_time:185.68754196166992ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:5257 prompt_cache_len:5154 prompt_cache_ratio:0.9804070762792467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:175.0478744506836ms total_cost_time:175.08578300476074ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5212 prompt_cache_len:5145 prompt_cache_ratio:0.987145049884881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 +DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:55 [batch.py:51] router release req id 8 +INFO 06-24 20:02:55 [batch.py:51] router release req id 120 +INFO 06-24 20:02:55 [batch.py:51] router release req id 400 +INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.11642789840698242 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.06826424598693848 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.06376218795776367 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.11816811561584473 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.07135510444641113 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.06809425354003906 s +DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=324459728662323944494060785249951884936, time:1750766575.248322s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:55 [manager.py:391] +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:199.0346908569336ms total_cost_time:199.07665252685547ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5256 prompt_cache_len:5151 prompt_cache_ratio:0.9800228310502284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:150.79760551452637ms total_cost_time:150.82454681396484ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5258 prompt_cache_len:5154 prompt_cache_ratio:0.9802206162038798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:167.222261428833ms total_cost_time:167.26016998291016ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5213 prompt_cache_len:5145 prompt_cache_ratio:0.9869556877038174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 +DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:55 [batch.py:51] router release req id 8 +INFO 06-24 20:02:55 [batch.py:51] router release req id 120 +INFO 06-24 20:02:55 [batch.py:51] router release req id 400 +INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.09134721755981445 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.0855112075805664 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.07272052764892578 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.09288477897644043 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.08856606483459473 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.07706713676452637 s +DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=65400348464549236068612662973453324390, time:1750766575.4307563s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:55 [manager.py:391] +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:146.88754081726074ms total_cost_time:146.93140983581543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5257 prompt_cache_len:5151 prompt_cache_ratio:0.9798364085980598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:192.00444221496582ms total_cost_time:192.0452117919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5259 prompt_cache_len:5154 prompt_cache_ratio:0.980034227039361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:338.1540775299072ms total_cost_time:338.1989002227783ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5214 prompt_cache_len:5145 prompt_cache_ratio:0.9867663981588032 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 +DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:55 [batch.py:51] router release req id 8 +INFO 06-24 20:02:55 [batch.py:51] router release req id 120 +INFO 06-24 20:02:55 [batch.py:51] router release req id 400 +INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.2814028263092041 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.2294013500213623 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.07239556312561035 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.2832174301147461 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.23264408111572266 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.07686209678649902 s +DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=284083830934789910735376228269160234085, time:1750766575.7746549s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:55 [manager.py:391] +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:349.6870994567871ms total_cost_time:349.7285842895508ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5258 prompt_cache_len:5151 prompt_cache_ratio:0.9796500570559148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:332.72504806518555ms total_cost_time:332.7798843383789ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5260 prompt_cache_len:5154 prompt_cache_ratio:0.9798479087452472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:175.77242851257324ms total_cost_time:175.80032348632812ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5215 prompt_cache_len:5145 prompt_cache_ratio:0.9865771812080537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 +INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 +DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:55 [batch.py:51] router release req id 8 +INFO 06-24 20:02:55 [batch.py:51] router release req id 120 +INFO 06-24 20:02:55 [batch.py:51] router release req id 400 +INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.10731053352355957 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.06700968742370605 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.10824441909790039 s +INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.06201624870300293 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.06818294525146484 s +INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.06318330764770508 s +DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=211766155860229120632619050207040578690, time:1750766575.955179s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:55 [manager.py:391] +ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:196.63190841674805ms total_cost_time:196.67482376098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5259 prompt_cache_len:5151 prompt_cache_ratio:0.9794637763833428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:156.2635898590088ms total_cost_time:156.29005432128906ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5261 prompt_cache_len:5154 prompt_cache_ratio:0.9796616612811253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:171.51188850402832ms total_cost_time:171.54979705810547ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5216 prompt_cache_len:5145 prompt_cache_ratio:0.9863880368098159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 +DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:56 [batch.py:51] router release req id 8 +INFO 06-24 20:02:56 [batch.py:51] router release req id 120 +INFO 06-24 20:02:56 [batch.py:51] router release req id 400 +INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.08247923851013184 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.07744526863098145 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.06571388244628906 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.08420991897583008 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.08054614067077637 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.06988787651062012 s +DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=76294914724792995211805685286338746436, time:1750766576.136636s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:56 [manager.py:391] +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:148.37932586669922ms total_cost_time:148.423433303833ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5260 prompt_cache_len:5151 prompt_cache_ratio:0.9792775665399239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:181.94866180419922ms total_cost_time:181.9894313812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5262 prompt_cache_len:5154 prompt_cache_ratio:0.9794754846066135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:170.10998725891113ms total_cost_time:170.1350212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5217 prompt_cache_len:5145 prompt_cache_ratio:0.9861989649223691 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 +DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:56 [batch.py:51] router release req id 8 +INFO 06-24 20:02:56 [batch.py:51] router release req id 120 +INFO 06-24 20:02:56 [batch.py:51] router release req id 400 +INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.11077141761779785 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.06993722915649414 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.06426572799682617 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.1122901439666748 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.07293963432312012 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.06844687461853027 s +DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=295722499348286923707146046955766765905, time:1750766576.3189332s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:56 [manager.py:391] +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:199.06997680664062ms total_cost_time:199.1140842437744ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5261 prompt_cache_len:5151 prompt_cache_ratio:0.979091427485269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:158.27703475952148ms total_cost_time:158.30373764038086ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5263 prompt_cache_len:5154 prompt_cache_ratio:0.9792893786813605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:171.66519165039062ms total_cost_time:171.70405387878418ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5218 prompt_cache_len:5145 prompt_cache_ratio:0.9860099655040245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 +DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:56 [batch.py:51] router release req id 8 +INFO 06-24 20:02:56 [batch.py:51] router release req id 120 +INFO 06-24 20:02:56 [batch.py:51] router release req id 400 +INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.08559179306030273 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.0804300308227539 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.07039880752563477 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.08800148963928223 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.08546566963195801 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.07663488388061523 s +DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=67362860849785924080459254445982399049, time:1750766576.5014725s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:56 [manager.py:391] +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:147.3236083984375ms total_cost_time:147.36580848693848ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5262 prompt_cache_len:5151 prompt_cache_ratio:0.9789053591790193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:344.4664478302002ms total_cost_time:344.5086479187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5264 prompt_cache_len:5154 prompt_cache_ratio:0.9791033434650456 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:338.14406394958496ms total_cost_time:338.1788730621338ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5219 prompt_cache_len:5145 prompt_cache_ratio:0.9858210385131251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 +DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:56 [batch.py:51] router release req id 8 +INFO 06-24 20:02:56 [batch.py:51] router release req id 120 +INFO 06-24 20:02:56 [batch.py:51] router release req id 400 +INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.2792527675628662 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.07652473449707031 s +INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.06608700752258301 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.280977725982666 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.07957220077514648 s +INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.07026386260986328 s +DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=149780053443978379004941183649294894592, time:1750766576.8479824s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:56 [manager.py:391] +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:351.7799377441406ms total_cost_time:351.8235683441162ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5263 prompt_cache_len:5151 prompt_cache_ratio:0.9787193615808474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:182.51872062683105ms total_cost_time:182.56020545959473ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5265 prompt_cache_len:5154 prompt_cache_ratio:0.978917378917379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:172.00994491577148ms total_cost_time:172.03593254089355ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5220 prompt_cache_len:5145 prompt_cache_ratio:0.985632183908046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 +INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 +DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:57 [batch.py:51] router release req id 8 +INFO 06-24 20:02:57 [batch.py:51] router release req id 120 +INFO 06-24 20:02:57 [batch.py:51] router release req id 400 +INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.10492515563964844 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.06873679161071777 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.06345820426940918 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.10660004615783691 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.07173395156860352 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.06770086288452148 s +DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=193658042960038636184952126573462673029, time:1750766577.0308678s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:57 [manager.py:391] +DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:199.90777969360352ms total_cost_time:199.9499797821045ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5264 prompt_cache_len:5151 prompt_cache_ratio:0.978533434650456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:163.67030143737793ms total_cost_time:163.6974811553955ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5266 prompt_cache_len:5154 prompt_cache_ratio:0.978731484998101 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:179.65221405029297ms total_cost_time:179.6896457672119ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5221 prompt_cache_len:5145 prompt_cache_ratio:0.985443401647194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 +DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:57 [batch.py:51] router release req id 8 +INFO 06-24 20:02:57 [batch.py:51] router release req id 120 +INFO 06-24 20:02:57 [batch.py:51] router release req id 400 +INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.07663941383361816 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.07186698913574219 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.060872793197631836 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.07818078994750977 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.07466983795166016 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.06488370895385742 s +DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=238477062562069395972721753778868477347, time:1750766577.2123666s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:57 [manager.py:391] +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:145.30348777770996ms total_cost_time:145.34711837768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5265 prompt_cache_len:5151 prompt_cache_ratio:0.9783475783475784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:174.33881759643555ms total_cost_time:174.3764877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5267 prompt_cache_len:5154 prompt_cache_ratio:0.9785456616669831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:163.41018676757812ms total_cost_time:163.4359359741211ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5222 prompt_cache_len:5145 prompt_cache_ratio:0.985254691689008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 +DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:57 [batch.py:51] router release req id 8 +INFO 06-24 20:02:57 [batch.py:51] router release req id 120 +INFO 06-24 20:02:57 [batch.py:51] router release req id 400 +INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.1061849594116211 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.06943631172180176 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.06514430046081543 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.10779094696044922 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.07232856750488281 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.06926202774047852 s +DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=259411934968734860524228707761348215427, time:1750766577.393905s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:57 [manager.py:391] +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:198.2131004333496ms total_cost_time:198.25983047485352ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5266 prompt_cache_len:5151 prompt_cache_ratio:0.9781617926319788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:161.42702102661133ms total_cost_time:161.454439163208ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5268 prompt_cache_len:5154 prompt_cache_ratio:0.9783599088838268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:177.5836944580078ms total_cost_time:177.62207984924316ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5223 prompt_cache_len:5145 prompt_cache_ratio:0.9850660539919587 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 +DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:57 [batch.py:51] router release req id 8 +INFO 06-24 20:02:57 [batch.py:51] router release req id 120 +INFO 06-24 20:02:57 [batch.py:51] router release req id 400 +INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.24242353439331055 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.23661541938781738 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.22551822662353516 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.24406909942626953 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.2397611141204834 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.22988033294677734 s +DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=198269870995469517647432146918450003320, time:1750766577.7364511s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:57 [manager.py:391] +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:296.5719699859619ms total_cost_time:296.6158390045166ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5267 prompt_cache_len:5151 prompt_cache_ratio:0.9779760774634517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:339.9538993835449ms total_cost_time:340.00587463378906ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:5269 prompt_cache_len:5154 prompt_cache_ratio:0.9781742266084646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:329.15639877319336ms total_cost_time:329.18858528137207ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:5224 prompt_cache_len:5145 prompt_cache_ratio:0.9848774885145483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 +DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:57 [batch.py:51] router release req id 8 +INFO 06-24 20:02:57 [batch.py:51] router release req id 120 +INFO 06-24 20:02:57 [batch.py:51] router release req id 400 +INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.1193990707397461 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.06866121292114258 s +INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.06359457969665527 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.12102293968200684 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.071563720703125 s +INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.0676732063293457 s +DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=57736344375771534419464930785140962856, time:1750766577.915871s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:57 [manager.py:391] +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:196.96831703186035ms total_cost_time:197.01051712036133ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5268 prompt_cache_len:5151 prompt_cache_ratio:0.9777904328018223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:146.16703987121582ms total_cost_time:146.1935043334961ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5270 prompt_cache_len:5154 prompt_cache_ratio:0.977988614800759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 +INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 +ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:165.84539413452148ms total_cost_time:165.88234901428223ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5225 prompt_cache_len:5145 prompt_cache_ratio:0.9846889952153111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 +DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:58 [batch.py:51] router release req id 8 +INFO 06-24 20:02:58 [batch.py:51] router release req id 120 +INFO 06-24 20:02:58 [batch.py:51] router release req id 400 +INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.09508013725280762 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.08972668647766113 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.07290792465209961 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.096832275390625 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.09283947944641113 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.07722759246826172 s +DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=163105448317614145423382377236093730641, time:1750766578.0977929s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:58 [manager.py:391] +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:195.89662551879883ms total_cost_time:195.9395408630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5269 prompt_cache_len:5151 prompt_cache_ratio:0.9776048586069462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:190.48380851745605ms total_cost_time:190.50979614257812ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5271 prompt_cache_len:5154 prompt_cache_ratio:0.9778030734206034 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:176.18703842163086ms total_cost_time:176.22089385986328ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:5226 prompt_cache_len:5145 prompt_cache_ratio:0.9845005740528129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 +DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:58 [batch.py:51] router release req id 8 +INFO 06-24 20:02:58 [batch.py:51] router release req id 120 +INFO 06-24 20:02:58 [batch.py:51] router release req id 400 +INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.07253289222717285 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.06770753860473633 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.062004804611206055 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.07416534423828125 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.07058405876159668 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.06607985496520996 s +DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=107990464613445968187221303691437965877, time:1750766578.279516s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:58 [manager.py:391] +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:145.0951099395752ms total_cost_time:145.13802528381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5270 prompt_cache_len:5151 prompt_cache_ratio:0.9774193548387097 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:171.39554023742676ms total_cost_time:171.4344024658203ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5272 prompt_cache_len:5154 prompt_cache_ratio:0.9776176024279211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:165.76814651489258ms total_cost_time:165.79270362854004ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:5227 prompt_cache_len:5145 prompt_cache_ratio:0.9843122249856514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 +DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:58 [batch.py:51] router release req id 8 +INFO 06-24 20:02:58 [batch.py:51] router release req id 120 +INFO 06-24 20:02:58 [batch.py:51] router release req id 400 +INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.10302448272705078 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.06861424446105957 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.06339859962463379 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.10484886169433594 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.07177543640136719 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.06783032417297363 s +DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=252717552862111207250401123124524785553, time:1750766578.460681s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:58 [manager.py:391] +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:199.79596138000488ms total_cost_time:199.83887672424316ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5271 prompt_cache_len:5151 prompt_cache_ratio:0.977233921457029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:165.30752182006836ms total_cost_time:165.33446311950684ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5273 prompt_cache_len:5154 prompt_cache_ratio:0.9774322017826664 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:180.7103157043457ms total_cost_time:180.7551383972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5228 prompt_cache_len:5145 prompt_cache_ratio:0.9841239479724561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 +DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:58 [batch.py:51] router release req id 8 +INFO 06-24 20:02:58 [batch.py:51] router release req id 120 +INFO 06-24 20:02:58 [batch.py:51] router release req id 400 +INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.24073100090026855 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.23489117622375488 s +INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.22488045692443848 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.2423253059387207 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.23803997039794922 s +INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.22934603691101074 s +DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=18173306447344486328654646520677566058, time:1750766578.8067722s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:58 [manager.py:391] +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:346.76575660705566ms total_cost_time:346.80962562561035ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5272 prompt_cache_len:5151 prompt_cache_ratio:0.9770485584218513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:340.9996032714844ms total_cost_time:341.02702140808105ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5274 prompt_cache_len:5154 prompt_cache_ratio:0.9772468714448237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 +ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:351.593017578125ms total_cost_time:351.62925720214844ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5229 prompt_cache_len:5145 prompt_cache_ratio:0.9839357429718876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 +DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:59 [batch.py:51] router release req id 8 +INFO 06-24 20:02:59 [batch.py:51] router release req id 120 +INFO 06-24 20:02:59 [batch.py:51] router release req id 400 +INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.11075305938720703 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.10550141334533691 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.09362626075744629 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.11229586601257324 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.10835576057434082 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.09774899482727051 s +DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=168181395587955718704358422693170453354, time:1750766579.0325258s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:59 [manager.py:391] +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:196.78163528442383ms total_cost_time:196.8247890472412ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5273 prompt_cache_len:5151 prompt_cache_ratio:0.9768632656931538 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:230.74817657470703ms total_cost_time:230.7896614074707ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5275 prompt_cache_len:5154 prompt_cache_ratio:0.9770616113744076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:218.9624309539795ms total_cost_time:218.98818016052246ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5230 prompt_cache_len:5145 prompt_cache_ratio:0.9837476099426387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 +DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:59 [batch.py:51] router release req id 8 +INFO 06-24 20:02:59 [batch.py:51] router release req id 120 +INFO 06-24 20:02:59 [batch.py:51] router release req id 400 +INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.1333463191986084 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.0912010669708252 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.08359503746032715 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.134993314743042 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.09429264068603516 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.08779358863830566 s +DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=250623435912946141919493879886071853332, time:1750766579.2575517s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:59 [manager.py:391] +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:200.35767555236816ms total_cost_time:200.41942596435547ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5274 prompt_cache_len:5151 prompt_cache_ratio:0.9766780432309442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:209.25402641296387ms total_cost_time:209.29527282714844ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5276 prompt_cache_len:5154 prompt_cache_ratio:0.9768764215314633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:212.24594116210938ms total_cost_time:212.28361129760742ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5231 prompt_cache_len:5145 prompt_cache_ratio:0.9835595488434333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 +DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:59 [batch.py:51] router release req id 8 +INFO 06-24 20:02:59 [batch.py:51] router release req id 120 +INFO 06-24 20:02:59 [batch.py:51] router release req id 400 +INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.15273022651672363 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.10262823104858398 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.09177160263061523 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.15452098846435547 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.10572266578674316 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.09604763984680176 s +DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=60146283299106991422489108794745049196, time:1750766579.4845774s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:59 [manager.py:391] +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:249.2082118988037ms total_cost_time:249.25470352172852ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5275 prompt_cache_len:5151 prompt_cache_ratio:0.9764928909952607 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:198.99535179138184ms total_cost_time:199.0222930908203ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5277 prompt_cache_len:5154 prompt_cache_ratio:0.9766913018760659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:217.60129928588867ms total_cost_time:217.64135360717773ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5232 prompt_cache_len:5145 prompt_cache_ratio:0.9833715596330275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 +DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:02:59 [batch.py:51] router release req id 8 +INFO 06-24 20:02:59 [batch.py:51] router release req id 120 +INFO 06-24 20:02:59 [batch.py:51] router release req id 400 +INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.11905884742736816 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.11362624168395996 s +INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.09503602981567383 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.12063336372375488 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.11663436889648438 s +INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.09913206100463867 s +DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=335252716620284208802720739114582642921, time:1750766579.710724s req_ids:[8, 120, 400] +DEBUG 06-24 20:02:59 [manager.py:391] +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:195.0395107269287ms total_cost_time:195.0817108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5276 prompt_cache_len:5151 prompt_cache_ratio:0.9763078089461713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:391.30496978759766ms total_cost_time:391.34716987609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5278 prompt_cache_len:5154 prompt_cache_ratio:0.9765062523683213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 +ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:02:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 +INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:391.216516494751ms total_cost_time:391.25680923461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5233 prompt_cache_len:5145 prompt_cache_ratio:0.9831836422702083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 +DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:00 [batch.py:51] router release req id 8 +INFO 06-24 20:03:00 [batch.py:51] router release req id 120 +INFO 06-24 20:03:00 [batch.py:51] router release req id 400 +INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.31546807289123535 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.11297273635864258 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.09592032432556152 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.31705236434936523 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.11597013473510742 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.10002803802490234 s +DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=243493336054955487679182620365720616481, time:1750766580.1080182s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:00 [manager.py:391] +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:400.73108673095703ms total_cost_time:400.7754325866699ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5277 prompt_cache_len:5151 prompt_cache_ratio:0.9761227970437749 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:239.70341682434082ms total_cost_time:239.7456169128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5279 prompt_cache_len:5154 prompt_cache_ratio:0.9763212729683652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:222.6274013519287ms total_cost_time:222.65338897705078ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5234 prompt_cache_len:5145 prompt_cache_ratio:0.9829957967137944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 +DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:00 [batch.py:51] router release req id 8 +INFO 06-24 20:03:00 [batch.py:51] router release req id 120 +INFO 06-24 20:03:00 [batch.py:51] router release req id 400 +INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.1340494155883789 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.08982586860656738 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.08382964134216309 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.13568496704101562 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.09305906295776367 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.08835601806640625 s +DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=263235028361106040947328215850264471338, time:1750766580.33224s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:00 [manager.py:391] +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:199.5065212249756ms total_cost_time:199.5527744293213ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5278 prompt_cache_len:5151 prompt_cache_ratio:0.9759378552482001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:206.71725273132324ms total_cost_time:206.7587375640869ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5280 prompt_cache_len:5154 prompt_cache_ratio:0.9761363636363637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:211.7602825164795ms total_cost_time:211.79676055908203ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5235 prompt_cache_len:5145 prompt_cache_ratio:0.9828080229226361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 +DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:00 [batch.py:51] router release req id 8 +INFO 06-24 20:03:00 [batch.py:51] router release req id 120 +INFO 06-24 20:03:00 [batch.py:51] router release req id 400 +INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.15256118774414062 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.10103631019592285 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.09058427810668945 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.15433216094970703 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.10418081283569336 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.09496068954467773 s +DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=220412517274802927186084874659131687305, time:1750766580.5570736s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:00 [manager.py:391] +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:251.68132781982422ms total_cost_time:251.7232894897461ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5279 prompt_cache_len:5151 prompt_cache_ratio:0.9757529835196059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:200.01578330993652ms total_cost_time:200.0417709350586ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5281 prompt_cache_len:5154 prompt_cache_ratio:0.9759515243325128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:215.86322784423828ms total_cost_time:215.90542793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5236 prompt_cache_len:5145 prompt_cache_ratio:0.982620320855615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 +DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:00 [batch.py:51] router release req id 8 +INFO 06-24 20:03:00 [batch.py:51] router release req id 120 +INFO 06-24 20:03:00 [batch.py:51] router release req id 400 +INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.1159822940826416 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.11146903038024902 s +INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.09499955177307129 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.11766767501831055 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.11440825462341309 s +INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.09891843795776367 s +DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=29122925912562986131668313164799944060, time:1750766580.7816646s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:00 [manager.py:391] +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:195.54853439331055ms total_cost_time:195.59311866760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5280 prompt_cache_len:5151 prompt_cache_ratio:0.9755681818181818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:236.15574836730957ms total_cost_time:236.19413375854492ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5282 prompt_cache_len:5154 prompt_cache_ratio:0.975766755017039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:219.5131778717041ms total_cost_time:219.53892707824707ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5237 prompt_cache_len:5145 prompt_cache_ratio:0.982432690471644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 +INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 +DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:01 [batch.py:51] router release req id 8 +INFO 06-24 20:03:01 [batch.py:51] router release req id 120 +INFO 06-24 20:03:01 [batch.py:51] router release req id 400 +INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.3114128112792969 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.2635183334350586 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.25843238830566406 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.3139491081237793 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.2681081295013428 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.26425671577453613 s +DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=182335962842177281224558857469851022107, time:1750766581.1789913s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:01 [manager.py:391] +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:399.6856212615967ms total_cost_time:399.72782135009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5281 prompt_cache_len:5151 prompt_cache_ratio:0.975383450104147 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:390.51127433776855ms total_cost_time:390.55371284484863ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5283 prompt_cache_len:5154 prompt_cache_ratio:0.9755820556501987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:385.41722297668457ms total_cost_time:385.44297218322754ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5238 prompt_cache_len:5145 prompt_cache_ratio:0.9822451317296678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 +DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:01 [batch.py:51] router release req id 8 +INFO 06-24 20:03:01 [batch.py:51] router release req id 120 +INFO 06-24 20:03:01 [batch.py:51] router release req id 400 +INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.13236498832702637 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.09107613563537598 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.08606386184692383 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.13392972946166992 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.09407854080200195 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.09026718139648438 s +DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=188293439928123939094173931805000715375, time:1750766581.4046385s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:01 [manager.py:391] +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:200.00052452087402ms total_cost_time:200.04606246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5282 prompt_cache_len:5151 prompt_cache_ratio:0.9751987883377509 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:209.95163917541504ms total_cost_time:209.9928855895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5284 prompt_cache_len:5154 prompt_cache_ratio:0.9753974261922785 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:216.51196479797363ms total_cost_time:216.5522575378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5239 prompt_cache_len:5145 prompt_cache_ratio:0.982057644588662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 +DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:01 [batch.py:51] router release req id 8 +INFO 06-24 20:03:01 [batch.py:51] router release req id 120 +INFO 06-24 20:03:01 [batch.py:51] router release req id 400 +INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.15140771865844727 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.10064196586608887 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.08910465240478516 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.15315937995910645 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.10357975959777832 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.09314990043640137 s +DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=259194658151707794372523836442420707745, time:1750766581.630687s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:01 [manager.py:391] +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:250.9772777557373ms total_cost_time:251.02710723876953ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:5283 prompt_cache_len:5151 prompt_cache_ratio:0.9750141964792731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:200.1051902770996ms total_cost_time:200.14071464538574ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:5285 prompt_cache_len:5154 prompt_cache_ratio:0.9752128666035951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:214.32018280029297ms total_cost_time:214.35904502868652ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5240 prompt_cache_len:5145 prompt_cache_ratio:0.9818702290076335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 +DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:01 [batch.py:51] router release req id 8 +INFO 06-24 20:03:01 [batch.py:51] router release req id 120 +INFO 06-24 20:03:01 [batch.py:51] router release req id 400 +INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.11613821983337402 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.11108255386352539 s +INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.09606814384460449 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.1177058219909668 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.11397910118103027 s +INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.10022997856140137 s +DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=64197573786525407112719760275378176415, time:1750766581.855237s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:01 [manager.py:391] +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:193.89581680297852ms total_cost_time:193.9396858215332ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5284 prompt_cache_len:5151 prompt_cache_ratio:0.9748296744890235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:236.18841171264648ms total_cost_time:236.22775077819824ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5286 prompt_cache_len:5154 prompt_cache_ratio:0.9750283768444948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:221.28844261169434ms total_cost_time:221.3146686553955ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5241 prompt_cache_len:5145 prompt_cache_ratio:0.9816828849456211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 +INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 +DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:02 [batch.py:51] router release req id 8 +INFO 06-24 20:03:02 [batch.py:51] router release req id 120 +INFO 06-24 20:03:02 [batch.py:51] router release req id 400 +INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.14007067680358887 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.09129524230957031 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.08695363998413086 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.1417217254638672 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.09403705596923828 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.09047293663024902 s +DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=125094437991998793190289606147543117805, time:1750766582.0803537s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:02 [manager.py:391] +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:199.34797286987305ms total_cost_time:199.39351081848145ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5285 prompt_cache_len:5151 prompt_cache_ratio:0.9746452223273415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:201.54619216918945ms total_cost_time:201.62177085876465ms,out_token_counter:1 mean_per_token_cost_time: 0.07557868957519531ms prompt_token_num:5287 prompt_cache_len:5154 prompt_cache_ratio:0.9748439568753546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:210.06369590759277ms total_cost_time:210.10303497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5242 prompt_cache_len:5145 prompt_cache_ratio:0.981495612361694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 +DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:02 [batch.py:51] router release req id 8 +INFO 06-24 20:03:02 [batch.py:51] router release req id 120 +INFO 06-24 20:03:02 [batch.py:51] router release req id 400 +INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.33292460441589355 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.28167271614074707 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.26958537101745605 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.3346402645111084 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.2847588062286377 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.27356910705566406 s +DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=239136155835675422613035786088671133321, time:1750766582.4800446s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:02 [manager.py:391] +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:402.8663635253906ms total_cost_time:402.9114246368408ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5286 prompt_cache_len:5151 prompt_cache_ratio:0.9744608399545971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:402.71663665771484ms total_cost_time:402.7600288391113ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5288 prompt_cache_len:5154 prompt_cache_ratio:0.9746596066565809 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:391.9949531555176ms total_cost_time:392.02880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:5243 prompt_cache_len:5145 prompt_cache_ratio:0.9813084112149533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 +DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:02 [batch.py:51] router release req id 8 +INFO 06-24 20:03:02 [batch.py:51] router release req id 120 +INFO 06-24 20:03:02 [batch.py:51] router release req id 400 +INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.12467145919799805 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.07447981834411621 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.0682077407836914 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.1262655258178711 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.07736825942993164 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.07226347923278809 s +DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=281128153793894351413878658290320283819, time:1750766582.6805327s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:02 [manager.py:391] +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:199.27430152893066ms total_cost_time:199.31912422180176ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5287 prompt_cache_len:5151 prompt_cache_ratio:0.9742765273311897 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:177.9038906097412ms total_cost_time:177.94227600097656ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5289 prompt_cache_len:5154 prompt_cache_ratio:0.9744753261486103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:171.6759204864502ms total_cost_time:171.70143127441406ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5244 prompt_cache_len:5145 prompt_cache_ratio:0.9811212814645309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 +DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:02 [batch.py:51] router release req id 8 +INFO 06-24 20:03:02 [batch.py:51] router release req id 120 +INFO 06-24 20:03:02 [batch.py:51] router release req id 400 +INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.1013634204864502 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.06864643096923828 s +INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.06300497055053711 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.10288476943969727 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.07166266441345215 s +INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.06723737716674805 s +DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=91523970985601352697070865277596008677, time:1750766582.8603294s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:02 [manager.py:391] +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:200.02460479736328ms total_cost_time:200.06918907165527ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5288 prompt_cache_len:5151 prompt_cache_ratio:0.9740922844175491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:167.4044132232666ms total_cost_time:167.43206977844238ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5290 prompt_cache_len:5154 prompt_cache_ratio:0.9742911153119093 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:164.3974781036377ms total_cost_time:164.4306182861328ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:5245 prompt_cache_len:5145 prompt_cache_ratio:0.9809342230695901 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 +INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 +DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:03 [batch.py:51] router release req id 8 +INFO 06-24 20:03:03 [batch.py:51] router release req id 120 +INFO 06-24 20:03:03 [batch.py:51] router release req id 400 +INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.07186365127563477 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.06684017181396484 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06063389778137207 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.07349824905395508 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.06997442245483398 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.06496000289916992 s +DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=288233766182506990015444085610493712215, time:1750766583.0410485s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:03 [manager.py:391] +DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:146.42024040222168ms total_cost_time:146.46410942077637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5289 prompt_cache_len:5151 prompt_cache_ratio:0.973908111174135 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:171.48447036743164ms total_cost_time:171.5230941772461ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5291 prompt_cache_len:5154 prompt_cache_ratio:0.9741069741069741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:165.26389122009277ms total_cost_time:165.29035568237305ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5246 prompt_cache_len:5145 prompt_cache_ratio:0.9807472359893252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 +DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:03 [batch.py:51] router release req id 8 +INFO 06-24 20:03:03 [batch.py:51] router release req id 120 +INFO 06-24 20:03:03 [batch.py:51] router release req id 400 +INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.1012265682220459 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.06892633438110352 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06318545341491699 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.10269975662231445 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.07202768325805664 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.06740689277648926 s +DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=182149879844355413368528578342019487823, time:1750766583.2229466s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:03 [manager.py:391] +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:197.94797897338867ms total_cost_time:197.99208641052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5290 prompt_cache_len:5151 prompt_cache_ratio:0.9737240075614366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:317.48461723327637ms total_cost_time:317.53015518188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5292 prompt_cache_len:5154 prompt_cache_ratio:0.9739229024943311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:325.18744468688965ms total_cost_time:325.2289295196533ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5247 prompt_cache_len:5145 prompt_cache_ratio:0.9805603201829617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 +DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:03 [batch.py:51] router release req id 8 +INFO 06-24 20:03:03 [batch.py:51] router release req id 120 +INFO 06-24 20:03:03 [batch.py:51] router release req id 400 +INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.23842501640319824 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.08643913269042969 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.07294750213623047 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.24007129669189453 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.08942604064941406 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.07713079452514648 s +DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=217347339546027922626723107032738883650, time:1750766583.562186s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:03 [manager.py:391] +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:302.78849601745605ms total_cost_time:302.83212661743164ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5291 prompt_cache_len:5151 prompt_cache_ratio:0.9735399735399736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:191.08033180236816ms total_cost_time:191.11943244934082ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5293 prompt_cache_len:5154 prompt_cache_ratio:0.9737389004345361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:177.67786979675293ms total_cost_time:177.7033805847168ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5248 prompt_cache_len:5145 prompt_cache_ratio:0.9803734756097561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 +DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:03 [batch.py:51] router release req id 8 +INFO 06-24 20:03:03 [batch.py:51] router release req id 120 +INFO 06-24 20:03:03 [batch.py:51] router release req id 400 +INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.11289215087890625 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.06951093673706055 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06439065933227539 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.11450624465942383 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.0725090503692627 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.0685873031616211 s +DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=227679676313343204764042523904389103901, time:1750766583.745937s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:03 [manager.py:391] +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:199.91564750671387ms total_cost_time:199.97620582580566ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5292 prompt_cache_len:5151 prompt_cache_ratio:0.9733560090702947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:156.70180320739746ms total_cost_time:156.7401885986328ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5294 prompt_cache_len:5154 prompt_cache_ratio:0.9735549678881753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:176.6190528869629ms total_cost_time:176.65529251098633ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5249 prompt_cache_len:5145 prompt_cache_ratio:0.980186702228996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 +DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:03 [batch.py:51] router release req id 8 +INFO 06-24 20:03:03 [batch.py:51] router release req id 120 +INFO 06-24 20:03:03 [batch.py:51] router release req id 400 +INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.08400130271911621 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.07600045204162598 s +INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06231570243835449 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.08558297157287598 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.07914495468139648 s +INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.06668877601623535 s +DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=275884247643486442865157346923580975197, time:1750766583.92665s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:03 [manager.py:391] +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:145.5838680267334ms total_cost_time:145.62582969665527ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5293 prompt_cache_len:5151 prompt_cache_ratio:0.9731721141129794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 +ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:03:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 67803.981 tokens/s +DEBUG 06-24 20:03:04 [stats.py:37] Avg prompt tokens throughput: 67777.902 tokens/s +DEBUG 06-24 20:03:04 [stats.py:37] Avg generate tokens throughput: 26.079 tokens/s +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:179.52299118041992ms total_cost_time:179.56161499023438ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5295 prompt_cache_len:5154 prompt_cache_ratio:0.973371104815864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:165.90023040771484ms total_cost_time:165.9252643585205ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5250 prompt_cache_len:5145 prompt_cache_ratio:0.98 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 +DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:04 [batch.py:51] router release req id 8 +INFO 06-24 20:03:04 [batch.py:51] router release req id 120 +INFO 06-24 20:03:04 [batch.py:51] router release req id 400 +INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.11393237113952637 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06885027885437012 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.06300020217895508 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.11544537544250488 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.0719749927520752 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.06738066673278809 s +DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=66708671877183112421689361876483998042, time:1750766584.1074975s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:04 [manager.py:391] +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:199.75829124450684ms total_cost_time:199.80168342590332ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5294 prompt_cache_len:5151 prompt_cache_ratio:0.9729882886286362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:154.83808517456055ms total_cost_time:154.8635959625244ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5296 prompt_cache_len:5154 prompt_cache_ratio:0.9731873111782477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:170.65811157226562ms total_cost_time:170.71175575256348ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5251 prompt_cache_len:5145 prompt_cache_ratio:0.9798133688821177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 +DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:04 [batch.py:51] router release req id 8 +INFO 06-24 20:03:04 [batch.py:51] router release req id 120 +INFO 06-24 20:03:04 [batch.py:51] router release req id 400 +INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.2448740005493164 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.23986124992370605 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.2283473014831543 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.24667954444885254 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.24306750297546387 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.23279142379760742 s +DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=149687600297356712041009137080545313648, time:1750766584.4473498s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:04 [manager.py:391] +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:345.71194648742676ms total_cost_time:345.75676918029785ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5295 prompt_cache_len:5151 prompt_cache_ratio:0.9728045325779037 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:340.63172340393066ms total_cost_time:340.65818786621094ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5297 prompt_cache_len:5154 prompt_cache_ratio:0.9730035869360015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:330.96957206726074ms total_cost_time:331.00438117980957ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5252 prompt_cache_len:5145 prompt_cache_ratio:0.9796268088347296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 +DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:04 [batch.py:51] router release req id 8 +INFO 06-24 20:03:04 [batch.py:51] router release req id 120 +INFO 06-24 20:03:04 [batch.py:51] router release req id 400 +INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.07063508033752441 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06555461883544922 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.05957508087158203 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.07228374481201172 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.06875753402709961 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.06395506858825684 s +DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=47316391409393921248033422328897065009, time:1750766584.6288254s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:04 [manager.py:391] +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:145.06840705871582ms total_cost_time:145.1106071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5296 prompt_cache_len:5151 prompt_cache_ratio:0.9726208459214502 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:168.3824062347412ms total_cost_time:168.42103004455566ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5298 prompt_cache_len:5154 prompt_cache_ratio:0.9728199320498301 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:162.4002456665039ms total_cost_time:162.42527961730957ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5253 prompt_cache_len:5145 prompt_cache_ratio:0.9794403198172473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 +DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:04 [batch.py:51] router release req id 8 +INFO 06-24 20:03:04 [batch.py:51] router release req id 120 +INFO 06-24 20:03:04 [batch.py:51] router release req id 400 +INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.09999442100524902 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06928372383117676 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.06400632858276367 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.10173821449279785 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.07449841499328613 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.07265734672546387 s +DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=79892869252685447031901683572463103244, time:1750766584.8082302s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:04 [manager.py:391] +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:200.4532814025879ms total_cost_time:200.4988193511963ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5297 prompt_cache_len:5151 prompt_cache_ratio:0.9724372286199736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 +ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:169.69013214111328ms total_cost_time:169.71683502197266ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5299 prompt_cache_len:5154 prompt_cache_ratio:0.972636346480468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:166.36323928833008ms total_cost_time:166.4128303527832ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:5254 prompt_cache_len:5145 prompt_cache_ratio:0.9792539017891131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 +INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 +DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:04 [batch.py:51] router release req id 8 +INFO 06-24 20:03:04 [batch.py:51] router release req id 120 +INFO 06-24 20:03:04 [batch.py:51] router release req id 400 +INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.06858706474304199 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06291651725769043 s +INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.05619978904724121 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.07008528709411621 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.06586384773254395 s +INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.060288190841674805 s +DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=164075764231021247856704771629386601870, time:1750766584.9884338s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:04 [manager.py:391] +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:144.00506019592285ms total_cost_time:144.04654502868652ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5298 prompt_cache_len:5151 prompt_cache_ratio:0.9722536806342016 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:166.64743423461914ms total_cost_time:166.7020320892334ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:5300 prompt_cache_len:5154 prompt_cache_ratio:0.9724528301886792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:159.93785858154297ms total_cost_time:159.96360778808594ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5255 prompt_cache_len:5145 prompt_cache_ratio:0.9790675547098002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 +DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:05 [batch.py:51] router release req id 8 +INFO 06-24 20:03:05 [batch.py:51] router release req id 120 +INFO 06-24 20:03:05 [batch.py:51] router release req id 400 +INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.10055685043334961 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.06757378578186035 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.06247830390930176 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.10227465629577637 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.07069587707519531 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.06672811508178711 s +DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=259431682458955373295122064125422602180, time:1750766585.1691797s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:05 [manager.py:391] +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:199.46908950805664ms total_cost_time:199.51486587524414ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5299 prompt_cache_len:5151 prompt_cache_ratio:0.9720702019248915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:318.6924457550049ms total_cost_time:318.73512268066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5301 prompt_cache_len:5154 prompt_cache_ratio:0.9722693831352575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:325.64830780029297ms total_cost_time:325.68836212158203ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5256 prompt_cache_len:5145 prompt_cache_ratio:0.9788812785388128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 +DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:05 [batch.py:51] router release req id 8 +INFO 06-24 20:03:05 [batch.py:51] router release req id 120 +INFO 06-24 20:03:05 [batch.py:51] router release req id 400 +INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.2349088191986084 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.08285832405090332 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.07140970230102539 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.2364649772644043 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.08598685264587402 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.07569289207458496 s +DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=310158004849090102742443202022598035781, time:1750766585.5102398s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:05 [manager.py:391] +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:301.8653392791748ms total_cost_time:301.9092082977295ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5300 prompt_cache_len:5151 prompt_cache_ratio:0.9718867924528302 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:186.90824508666992ms total_cost_time:186.94829940795898ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5302 prompt_cache_len:5154 prompt_cache_ratio:0.9720860052810261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:175.431489944458ms total_cost_time:175.45676231384277ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5257 prompt_cache_len:5145 prompt_cache_ratio:0.9786950732356857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 +DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:05 [batch.py:51] router release req id 8 +INFO 06-24 20:03:05 [batch.py:51] router release req id 120 +INFO 06-24 20:03:05 [batch.py:51] router release req id 400 +INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.10951685905456543 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.06955742835998535 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.0648806095123291 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.0724940299987793 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.06974482536315918 s +DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=84577353042093161251123392718419548024, time:1750766585.692095s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:05 [manager.py:391] +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:199.65267181396484ms total_cost_time:199.69701766967773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5301 prompt_cache_len:5151 prompt_cache_ratio:0.9717034521788341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:159.52801704406738ms total_cost_time:159.55376625061035ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5303 prompt_cache_len:5154 prompt_cache_ratio:0.9719026965868376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:174.37219619750977ms total_cost_time:174.41177368164062ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5258 prompt_cache_len:5145 prompt_cache_ratio:0.9785089387599848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 +DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:05 [batch.py:51] router release req id 8 +INFO 06-24 20:03:05 [batch.py:51] router release req id 120 +INFO 06-24 20:03:05 [batch.py:51] router release req id 400 +INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.08170604705810547 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.07679009437561035 s +INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.06541228294372559 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.0833137035369873 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.07999515533447266 s +INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.06974577903747559 s +DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=231194745762646292343607873640996808426, time:1750766585.8702967s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:05 [manager.py:391] +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:147.35794067382812ms total_cost_time:147.40419387817383ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5302 prompt_cache_len:5151 prompt_cache_ratio:0.9715201810637495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:178.8315773010254ms total_cost_time:178.87401580810547ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5304 prompt_cache_len:5154 prompt_cache_ratio:0.9717194570135747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:167.42491722106934ms total_cost_time:167.4516201019287ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5259 prompt_cache_len:5145 prompt_cache_ratio:0.9783228750713063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 +INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 +DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:06 [batch.py:51] router release req id 8 +INFO 06-24 20:03:06 [batch.py:51] router release req id 120 +INFO 06-24 20:03:06 [batch.py:51] router release req id 400 +INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.10860681533813477 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.0692598819732666 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.0637061595916748 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.11040830612182617 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.07240819931030273 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06789445877075195 s +DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=160257433247289975480812048254156051702, time:1750766586.0519567s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:06 [manager.py:391] +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:199.94592666625977ms total_cost_time:199.99003410339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5303 prompt_cache_len:5151 prompt_cache_ratio:0.9713369790684518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:160.53271293640137ms total_cost_time:160.55870056152344ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5305 prompt_cache_len:5154 prompt_cache_ratio:0.971536286522149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:173.99215698242188ms total_cost_time:174.03030395507812ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5260 prompt_cache_len:5145 prompt_cache_ratio:0.9781368821292775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 +DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:06 [batch.py:51] router release req id 8 +INFO 06-24 20:03:06 [batch.py:51] router release req id 120 +INFO 06-24 20:03:06 [batch.py:51] router release req id 400 +INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.24316668510437012 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.23845696449279785 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.22804641723632812 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.24495697021484375 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.2417299747467041 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.23244285583496094 s +DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=190813674211183090339878898280079691919, time:1750766586.3956118s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:06 [manager.py:391] +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:298.02536964416504ms total_cost_time:298.07019233703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5304 prompt_cache_len:5151 prompt_cache_ratio:0.9711538461538461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:342.423677444458ms total_cost_time:342.4665927886963ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5306 prompt_cache_len:5154 prompt_cache_ratio:0.9713531850735017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:332.0116996765137ms total_cost_time:332.03911781311035ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5261 prompt_cache_len:5145 prompt_cache_ratio:0.9779509598935564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 +DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:06 [batch.py:51] router release req id 8 +INFO 06-24 20:03:06 [batch.py:51] router release req id 120 +INFO 06-24 20:03:06 [batch.py:51] router release req id 400 +INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.12110543251037598 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.06941533088684082 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.06402778625488281 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.12287569046020508 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.07263469696044922 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06830286979675293 s +DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=88611357532451146026380542523366982005, time:1750766586.577605s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:06 [manager.py:391] +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:197.9985237121582ms total_cost_time:198.0421543121338ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5305 prompt_cache_len:5151 prompt_cache_ratio:0.9709707822808671 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:174.36647415161133ms total_cost_time:174.4062900543213ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5307 prompt_cache_len:5154 prompt_cache_ratio:0.9711701526286037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:168.88022422790527ms total_cost_time:168.90597343444824ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5262 prompt_cache_len:5145 prompt_cache_ratio:0.9777651083238312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 +DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:06 [batch.py:51] router release req id 8 +INFO 06-24 20:03:06 [batch.py:51] router release req id 120 +INFO 06-24 20:03:06 [batch.py:51] router release req id 400 +INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.0994877815246582 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.06853818893432617 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.06286787986755371 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.10113167762756348 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.07184529304504395 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06729817390441895 s +DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=335940104050689606158566240719424468424, time:1750766586.7589724s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:06 [manager.py:391] +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:198.90069961547852ms total_cost_time:198.944091796875ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5306 prompt_cache_len:5151 prompt_cache_ratio:0.9707877874104787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:168.14136505126953ms total_cost_time:168.1685447692871ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5308 prompt_cache_len:5154 prompt_cache_ratio:0.9709871891484552 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:166.00513458251953ms total_cost_time:166.0468578338623ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5263 prompt_cache_len:5145 prompt_cache_ratio:0.9775793273798214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 +DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:06 [batch.py:51] router release req id 8 +INFO 06-24 20:03:06 [batch.py:51] router release req id 120 +INFO 06-24 20:03:06 [batch.py:51] router release req id 400 +INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.07269501686096191 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.06657648086547852 s +INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.06083345413208008 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.07430529594421387 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.06975412368774414 s +INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06520557403564453 s +DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=170816745227154724700370166280682941226, time:1750766586.941039s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:06 [manager.py:391] +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:145.7841396331787ms total_cost_time:145.83396911621094ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:5307 prompt_cache_len:5151 prompt_cache_ratio:0.9706048615036744 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:171.1738109588623ms total_cost_time:171.21291160583496ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5309 prompt_cache_len:5154 prompt_cache_ratio:0.9708042945940856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:165.4052734375ms total_cost_time:165.43173789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5264 prompt_cache_len:5145 prompt_cache_ratio:0.9773936170212766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 +DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:07 [batch.py:51] router release req id 8 +INFO 06-24 20:03:07 [batch.py:51] router release req id 120 +INFO 06-24 20:03:07 [batch.py:51] router release req id 400 +INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.10344791412353516 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.06972265243530273 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.06460857391357422 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.10510468482971191 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.07271480560302734 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.06862640380859375 s +DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=71340613666972578149316851485645079893, time:1750766587.1236358s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:07 [manager.py:391] +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:198.45008850097656ms total_cost_time:198.49443435668945ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5308 prompt_cache_len:5151 prompt_cache_ratio:0.970422004521477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:164.6888256072998ms total_cost_time:164.71505165100098ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5310 prompt_cache_len:5154 prompt_cache_ratio:0.9706214689265537 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:178.36523056030273ms total_cost_time:178.4040927886963ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5265 prompt_cache_len:5145 prompt_cache_ratio:0.9772079772079773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 +DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:07 [batch.py:51] router release req id 8 +INFO 06-24 20:03:07 [batch.py:51] router release req id 120 +INFO 06-24 20:03:07 [batch.py:51] router release req id 400 +INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.24206900596618652 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.2371683120727539 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.22643351554870605 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.2437143325805664 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.24038290977478027 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.230987548828125 s +DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=307441400527026226635470668807473442954, time:1750766587.4694526s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:07 [manager.py:391] +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:297.8386878967285ms total_cost_time:297.8825569152832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5309 prompt_cache_len:5151 prompt_cache_ratio:0.9702392164249388 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:340.84439277648926ms total_cost_time:340.8844470977783ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5311 prompt_cache_len:5154 prompt_cache_ratio:0.9704387121069479 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:330.20973205566406ms total_cost_time:330.2347660064697ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5266 prompt_cache_len:5145 prompt_cache_ratio:0.9770224078997342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 +DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:07 [batch.py:51] router release req id 8 +INFO 06-24 20:03:07 [batch.py:51] router release req id 120 +INFO 06-24 20:03:07 [batch.py:51] router release req id 400 +INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.11931276321411133 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.06859779357910156 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.06405234336853027 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.12086820602416992 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.0715947151184082 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.06826186180114746 s +DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=312672663945024470632483703707765056425, time:1750766587.6492965s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:07 [manager.py:391] +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:199.5561122894287ms total_cost_time:199.6002197265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5310 prompt_cache_len:5151 prompt_cache_ratio:0.9700564971751412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:148.8039493560791ms total_cost_time:148.83041381835938ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5312 prompt_cache_len:5154 prompt_cache_ratio:0.9702560240963856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:167.71340370178223ms total_cost_time:167.7529811859131ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5267 prompt_cache_len:5145 prompt_cache_ratio:0.9768369090563889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 +DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:07 [batch.py:51] router release req id 8 +INFO 06-24 20:03:07 [batch.py:51] router release req id 120 +INFO 06-24 20:03:07 [batch.py:51] router release req id 400 +INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.09345245361328125 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.08801460266113281 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.07297444343566895 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.09522056579589844 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.09141969680786133 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.07752132415771484 s +DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=193091722683291349466394464995447508586, time:1750766587.8326578s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:07 [manager.py:391] +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 +ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:195.34993171691895ms total_cost_time:195.39237022399902ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5311 prompt_cache_len:5151 prompt_cache_ratio:0.9698738467331952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:190.08588790893555ms total_cost_time:190.11354446411133ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5313 prompt_cache_len:5154 prompt_cache_ratio:0.9700734048560136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:176.88751220703125ms total_cost_time:176.92995071411133ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5268 prompt_cache_len:5145 prompt_cache_ratio:0.9766514806378133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 +INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 +DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:07 [batch.py:51] router release req id 8 +INFO 06-24 20:03:07 [batch.py:51] router release req id 120 +INFO 06-24 20:03:07 [batch.py:51] router release req id 400 +INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.06983780860900879 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.06453132629394531 s +INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.05739402770996094 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.07161664962768555 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.06784510612487793 s +INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.06203031539916992 s +DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=64303321026449279438288865713711737914, time:1750766588.012172s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:08 [manager.py:391] +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:145.30682563781738ms total_cost_time:145.35093307495117ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5312 prompt_cache_len:5151 prompt_cache_ratio:0.969691265060241 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:168.3492660522461ms total_cost_time:168.38932037353516ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5314 prompt_cache_len:5154 prompt_cache_ratio:0.969890854347008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:161.27586364746094ms total_cost_time:161.3016128540039ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5269 prompt_cache_len:5145 prompt_cache_ratio:0.9764661226039096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 +DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:08 [batch.py:51] router release req id 8 +INFO 06-24 20:03:08 [batch.py:51] router release req id 120 +INFO 06-24 20:03:08 [batch.py:51] router release req id 400 +INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.10009527206420898 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.06900477409362793 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.06406497955322266 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.10164761543273926 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.0722665786743164 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.06852245330810547 s +DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=327575761866047990773155610397381304783, time:1750766588.194093s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:08 [manager.py:391] +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:350.4462242126465ms total_cost_time:350.48985481262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5313 prompt_cache_len:5151 prompt_cache_ratio:0.9695087521174478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:319.5836544036865ms total_cost_time:319.6108341217041ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5315 prompt_cache_len:5154 prompt_cache_ratio:0.9697083725305738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:333.5709571838379ms total_cost_time:333.61077308654785ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5270 prompt_cache_len:5145 prompt_cache_ratio:0.976280834914611 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 +DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:08 [batch.py:51] router release req id 8 +INFO 06-24 20:03:08 [batch.py:51] router release req id 120 +INFO 06-24 20:03:08 [batch.py:51] router release req id 400 +INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.0843658447265625 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.07943940162658691 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.06834888458251953 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.08600425720214844 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.08255338668823242 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.07269573211669922 s +DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=247661076971784525710063830289677839388, time:1750766588.5378354s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:08 [manager.py:391] +DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:147.89772033691406ms total_cost_time:147.94111251831055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5314 prompt_cache_len:5151 prompt_cache_ratio:0.9693263078660143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:184.28349494934082ms total_cost_time:184.32331085205078ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5316 prompt_cache_len:5154 prompt_cache_ratio:0.9695259593679458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:173.2776165008545ms total_cost_time:173.30336570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5271 prompt_cache_len:5145 prompt_cache_ratio:0.9760956175298805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 +DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:08 [batch.py:51] router release req id 8 +INFO 06-24 20:03:08 [batch.py:51] router release req id 120 +INFO 06-24 20:03:08 [batch.py:51] router release req id 400 +INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.11314868927001953 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.06896233558654785 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.06373453140258789 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.11490797996520996 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.07211685180664062 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.06814074516296387 s +DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=140511977231311428800538261355338594472, time:1750766588.7198162s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:08 [manager.py:391] +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:198.1799602508545ms total_cost_time:198.22072982788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5315 prompt_cache_len:5151 prompt_cache_ratio:0.9691439322671684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:153.9597511291504ms total_cost_time:153.98621559143066ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5317 prompt_cache_len:5154 prompt_cache_ratio:0.9693436148203874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:170.24636268615723ms total_cost_time:170.28427124023438ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5272 prompt_cache_len:5145 prompt_cache_ratio:0.9759104704097117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 +DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:08 [batch.py:51] router release req id 8 +INFO 06-24 20:03:08 [batch.py:51] router release req id 120 +INFO 06-24 20:03:08 [batch.py:51] router release req id 400 +INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.0884101390838623 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.082305908203125 s +INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.07048821449279785 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.09008431434631348 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.08532214164733887 s +INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.07485628128051758 s +DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=221469127664766532209913394809552239708, time:1750766588.901205s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:08 [manager.py:391] +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:147.83906936645508ms total_cost_time:147.88341522216797ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5316 prompt_cache_len:5151 prompt_cache_ratio:0.968961625282167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:183.5784912109375ms total_cost_time:183.63213539123535ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5318 prompt_cache_len:5154 prompt_cache_ratio:0.9691613388491914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:172.09267616271973ms total_cost_time:172.12605476379395ms,out_token_counter:1 mean_per_token_cost_time: 0.03337860107421875ms prompt_token_num:5273 prompt_cache_len:5145 prompt_cache_ratio:0.9757253935141286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 +INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 +DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:09 [batch.py:51] router release req id 8 +INFO 06-24 20:03:09 [batch.py:51] router release req id 120 +INFO 06-24 20:03:09 [batch.py:51] router release req id 400 +INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.11386823654174805 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.07115316390991211 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.06558728218078613 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.11551022529602051 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.07411003112792969 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06966829299926758 s +DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=20319516309224191970199725162782006250, time:1750766589.0809584s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:09 [manager.py:391] +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:198.61435890197754ms total_cost_time:198.66013526916504ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5317 prompt_cache_len:5151 prompt_cache_ratio:0.9687793868722964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:155.8363437652588ms total_cost_time:155.86304664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5319 prompt_cache_len:5154 prompt_cache_ratio:0.9689791314156796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:171.28586769104004ms total_cost_time:171.32306098937988ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5274 prompt_cache_len:5145 prompt_cache_ratio:0.9755403868031854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 +DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:09 [batch.py:51] router release req id 8 +INFO 06-24 20:03:09 [batch.py:51] router release req id 120 +INFO 06-24 20:03:09 [batch.py:51] router release req id 400 +INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.24782609939575195 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.24282050132751465 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.23194313049316406 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.24942564964294434 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.24591469764709473 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.23625755310058594 s +DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=235892825169247348311076354257188749409, time:1750766589.4237509s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:09 [manager.py:391] +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:345.47924995422363ms total_cost_time:345.5231189727783ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5318 prompt_cache_len:5151 prompt_cache_ratio:0.9685972169988718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:340.5113220214844ms total_cost_time:340.53707122802734ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5320 prompt_cache_len:5154 prompt_cache_ratio:0.968796992481203 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:334.89131927490234ms total_cost_time:334.92469787597656ms,out_token_counter:1 mean_per_token_cost_time: 0.03337860107421875ms prompt_token_num:5275 prompt_cache_len:5145 prompt_cache_ratio:0.9753554502369668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 +DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:09 [batch.py:51] router release req id 8 +INFO 06-24 20:03:09 [batch.py:51] router release req id 120 +INFO 06-24 20:03:09 [batch.py:51] router release req id 400 +INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.07388758659362793 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.06824827194213867 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.06154179573059082 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.07535743713378906 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.07131409645080566 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06591558456420898 s +DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=81903583301967354346757096417475418656, time:1750766589.6045299s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:09 [manager.py:391] +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:146.39520645141602ms total_cost_time:146.4381217956543ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5319 prompt_cache_len:5151 prompt_cache_ratio:0.9684151156232375 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:173.91061782836914ms total_cost_time:173.9494800567627ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5321 prompt_cache_len:5154 prompt_cache_ratio:0.9686149220071415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:167.280912399292ms total_cost_time:167.30690002441406ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5276 prompt_cache_len:5145 prompt_cache_ratio:0.9751705837755875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 +DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:09 [batch.py:51] router release req id 8 +INFO 06-24 20:03:09 [batch.py:51] router release req id 120 +INFO 06-24 20:03:09 [batch.py:51] router release req id 400 +INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.10403776168823242 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.06827974319458008 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.06267166137695312 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.10580134391784668 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.07145380973815918 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06708765029907227 s +DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=338763151939561981775034878317933767378, time:1750766589.7862425s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:09 [manager.py:391] +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:198.16231727600098ms total_cost_time:198.20594787597656ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5320 prompt_cache_len:5151 prompt_cache_ratio:0.9682330827067669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:162.35804557800293ms total_cost_time:162.3842716217041ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5322 prompt_cache_len:5154 prompt_cache_ratio:0.9684329199549042 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 +INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:178.1024932861328ms total_cost_time:178.14159393310547ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5277 prompt_cache_len:5145 prompt_cache_ratio:0.9749857873791927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 +DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:09 [batch.py:51] router release req id 8 +INFO 06-24 20:03:09 [batch.py:51] router release req id 120 +INFO 06-24 20:03:09 [batch.py:51] router release req id 400 +INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.07581067085266113 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.07088828086853027 s +INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.05869793891906738 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.07762432098388672 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.0740969181060791 s +INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06306934356689453 s +DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=40901409512790474375518464984594005385, time:1750766589.9674773s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:09 [manager.py:391] +ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:145.59650421142578ms total_cost_time:145.64037322998047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5321 prompt_cache_len:5151 prompt_cache_ratio:0.9680511182108626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:175.22168159484863ms total_cost_time:175.26912689208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:5323 prompt_cache_len:5154 prompt_cache_ratio:0.968250986285929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:163.1004810333252ms total_cost_time:163.1300449371338ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:5278 prompt_cache_len:5145 prompt_cache_ratio:0.9748010610079576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 +DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:10 [batch.py:51] router release req id 8 +INFO 06-24 20:03:10 [batch.py:51] router release req id 120 +INFO 06-24 20:03:10 [batch.py:51] router release req id 400 +INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.10704231262207031 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.06838369369506836 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.06333398818969727 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.10885405540466309 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.07172966003417969 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.06785249710083008 s +DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=266292312149013768256993696514765246322, time:1750766590.1492386s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:10 [manager.py:391] +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:199.02324676513672ms total_cost_time:199.06854629516602ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5322 prompt_cache_len:5151 prompt_cache_ratio:0.967869222096956 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:160.4321002960205ms total_cost_time:160.45761108398438ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5324 prompt_cache_len:5154 prompt_cache_ratio:0.968069120961683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:328.9916515350342ms total_cost_time:329.04577255249023ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5279 prompt_cache_len:5145 prompt_cache_ratio:0.9746164046220875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 +DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:10 [batch.py:51] router release req id 8 +INFO 06-24 20:03:10 [batch.py:51] router release req id 120 +INFO 06-24 20:03:10 [batch.py:51] router release req id 400 +INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.2432699203491211 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.23871374130249023 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.07324957847595215 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.24477672576904297 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.24170279502868652 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.07749390602111816 s +DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=322481225434558682014507328874036955519, time:1750766590.4936216s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:10 [manager.py:391] +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:297.9154586791992ms total_cost_time:297.9590892791748ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5323 prompt_cache_len:5151 prompt_cache_ratio:0.9676873943265076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:342.2665596008301ms total_cost_time:342.30685234069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5325 prompt_cache_len:5154 prompt_cache_ratio:0.967887323943662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:176.88989639282227ms total_cost_time:176.91516876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5280 prompt_cache_len:5145 prompt_cache_ratio:0.9744318181818182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 +DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:10 [batch.py:51] router release req id 8 +INFO 06-24 20:03:10 [batch.py:51] router release req id 120 +INFO 06-24 20:03:10 [batch.py:51] router release req id 400 +INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.12222766876220703 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.06864333152770996 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.0626680850982666 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.12378525733947754 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.07193636894226074 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.06710553169250488 s +DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=220151859614946108368536776786202970805, time:1750766590.6743085s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:10 [manager.py:391] +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:200.00195503234863ms total_cost_time:200.04510879516602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5324 prompt_cache_len:5151 prompt_cache_ratio:0.9675056348610067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:146.6064453125ms total_cost_time:146.63290977478027ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5326 prompt_cache_len:5154 prompt_cache_ratio:0.9677055951933909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:165.28987884521484ms total_cost_time:165.3306484222412ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5281 prompt_cache_len:5145 prompt_cache_ratio:0.9742473016474152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 +DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:10 [batch.py:51] router release req id 8 +INFO 06-24 20:03:10 [batch.py:51] router release req id 120 +INFO 06-24 20:03:10 [batch.py:51] router release req id 400 +INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.09436726570129395 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.08864331245422363 s +INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.0736687183380127 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.09592103958129883 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.09162449836730957 s +INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.07790493965148926 s +DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=128487460138582329053149305547734082855, time:1750766590.855353s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:10 [manager.py:391] +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:195.4941749572754ms total_cost_time:195.63674926757812ms,out_token_counter:1 mean_per_token_cost_time: 0.14257431030273438ms prompt_token_num:5325 prompt_cache_len:5151 prompt_cache_ratio:0.9673239436619718 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:190.49668312072754ms total_cost_time:190.52457809448242ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5327 prompt_cache_len:5154 prompt_cache_ratio:0.9675239346724235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:178.20215225219727ms total_cost_time:178.2360076904297ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:5282 prompt_cache_len:5145 prompt_cache_ratio:0.9740628549791746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 +INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 +DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:11 [batch.py:51] router release req id 8 +INFO 06-24 20:03:11 [batch.py:51] router release req id 120 +INFO 06-24 20:03:11 [batch.py:51] router release req id 400 +INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.06902575492858887 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.06267142295837402 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.0565953254699707 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.07071208953857422 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.06584715843200684 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.060863494873046875 s +DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=290035317868547564990657383240947801738, time:1750766591.0376327s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:11 [manager.py:391] +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:142.7133083343506ms total_cost_time:142.75860786437988ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5326 prompt_cache_len:5151 prompt_cache_ratio:0.9671423206909501 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:166.78762435913086ms total_cost_time:166.82910919189453ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5328 prompt_cache_len:5154 prompt_cache_ratio:0.9673423423423423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:160.65025329589844ms total_cost_time:160.67814826965332ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5283 prompt_cache_len:5145 prompt_cache_ratio:0.9738784781374219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 +DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:11 [batch.py:51] router release req id 8 +INFO 06-24 20:03:11 [batch.py:51] router release req id 120 +INFO 06-24 20:03:11 [batch.py:51] router release req id 400 +INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.10183286666870117 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.06792640686035156 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.0631246566772461 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.10357499122619629 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.07109856605529785 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.06754469871520996 s +DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=24695960871113498125025115045639151037, time:1750766591.2185159s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:11 [manager.py:391] +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:199.5987892150879ms total_cost_time:199.64218139648438ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5327 prompt_cache_len:5151 prompt_cache_ratio:0.9669607659095175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:317.3503875732422ms total_cost_time:317.3949718475342ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5329 prompt_cache_len:5154 prompt_cache_ratio:0.9671608181647589 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:329.5876979827881ms total_cost_time:329.62703704833984ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5284 prompt_cache_len:5145 prompt_cache_ratio:0.9736941710825132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 +DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:11 [batch.py:51] router release req id 8 +INFO 06-24 20:03:11 [batch.py:51] router release req id 120 +INFO 06-24 20:03:11 [batch.py:51] router release req id 400 +INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.24199199676513672 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.08997678756713867 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.07400131225585938 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.24371337890625 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.0930783748626709 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.07832789421081543 s +DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=57661434145422022087137357498451544339, time:1750766591.563655s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:11 [manager.py:391] +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:300.83250999450684ms total_cost_time:300.8759021759033ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5328 prompt_cache_len:5151 prompt_cache_ratio:0.9667792792792793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:194.06867027282715ms total_cost_time:194.1089630126953ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5330 prompt_cache_len:5154 prompt_cache_ratio:0.9669793621013133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:178.14064025878906ms total_cost_time:178.1916618347168ms,out_token_counter:1 mean_per_token_cost_time: 0.051021575927734375ms prompt_token_num:5285 prompt_cache_len:5145 prompt_cache_ratio:0.9735099337748344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 +DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:11 [batch.py:51] router release req id 8 +INFO 06-24 20:03:11 [batch.py:51] router release req id 120 +INFO 06-24 20:03:11 [batch.py:51] router release req id 400 +INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.11713314056396484 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.06909584999084473 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.06452155113220215 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.1187899112701416 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.07206034660339355 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.06853246688842773 s +DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=317777703859871646095525156885254569624, time:1750766591.7457037s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:11 [manager.py:391] +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:199.34940338134766ms total_cost_time:199.39279556274414ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5329 prompt_cache_len:5151 prompt_cache_ratio:0.966597860761869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:151.2582302093506ms total_cost_time:151.28517150878906ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5331 prompt_cache_len:5154 prompt_cache_ratio:0.9667979741136747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:168.609619140625ms total_cost_time:168.64991188049316ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5286 prompt_cache_len:5145 prompt_cache_ratio:0.9733257661748014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 +DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:11 [batch.py:51] router release req id 8 +INFO 06-24 20:03:11 [batch.py:51] router release req id 120 +INFO 06-24 20:03:11 [batch.py:51] router release req id 400 +INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.11312317848205566 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.10802197456359863 s +INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.09498333930969238 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.11467885971069336 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.1109166145324707 s +INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.09885668754577637 s +DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=313643772326803029637075869083573974472, time:1750766591.949918s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:11 [manager.py:391] +ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:195.23978233337402ms total_cost_time:195.2826976776123ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5330 prompt_cache_len:5151 prompt_cache_ratio:0.9664165103189494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:223.4516143798828ms total_cost_time:223.48880767822266ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5332 prompt_cache_len:5154 prompt_cache_ratio:0.9666166541635409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:210.21485328674316ms total_cost_time:210.24012565612793ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5287 prompt_cache_len:5145 prompt_cache_ratio:0.9731416682428599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 +DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:12 [batch.py:51] router release req id 8 +INFO 06-24 20:03:12 [batch.py:51] router release req id 120 +INFO 06-24 20:03:12 [batch.py:51] router release req id 400 +INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.10523557662963867 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.06858348846435547 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.06328058242797852 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.10685968399047852 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.07165408134460449 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.06772708892822266 s +DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=18784754208781915160655151353157979525, time:1750766592.14187s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:12 [manager.py:391] +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:199.97930526733398ms total_cost_time:200.02365112304688ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5331 prompt_cache_len:5151 prompt_cache_ratio:0.9662352279122116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:163.30265998840332ms total_cost_time:163.3291244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5333 prompt_cache_len:5154 prompt_cache_ratio:0.9664354022126382 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:178.04765701293945ms total_cost_time:178.0855655670166ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5288 prompt_cache_len:5145 prompt_cache_ratio:0.9729576399394856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 +DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:12 [batch.py:51] router release req id 8 +INFO 06-24 20:03:12 [batch.py:51] router release req id 120 +INFO 06-24 20:03:12 [batch.py:51] router release req id 400 +INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.24019265174865723 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.23466897010803223 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.22370553016662598 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.24183392524719238 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.2377324104309082 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.22808003425598145 s +DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=156391083831002154803495374581616708161, time:1750766592.4854987s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:12 [manager.py:391] +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:296.7512607574463ms total_cost_time:296.7960834503174ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5332 prompt_cache_len:5151 prompt_cache_ratio:0.9660540135033758 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:338.7103080749512ms total_cost_time:338.75060081481934ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5334 prompt_cache_len:5154 prompt_cache_ratio:0.9662542182227222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:327.8787136077881ms total_cost_time:327.90493965148926ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5289 prompt_cache_len:5145 prompt_cache_ratio:0.9727736812251844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 +DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:12 [batch.py:51] router release req id 8 +INFO 06-24 20:03:12 [batch.py:51] router release req id 120 +INFO 06-24 20:03:12 [batch.py:51] router release req id 400 +INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.11927652359008789 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.06856822967529297 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.06243705749511719 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.1210019588470459 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.0716392993927002 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.06669402122497559 s +DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=62189537672751645756612474582055411552, time:1750766592.6662848s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:12 [manager.py:391] +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:198.29964637756348ms total_cost_time:198.34208488464355ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5333 prompt_cache_len:5151 prompt_cache_ratio:0.9658728670541908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:147.54295349121094ms total_cost_time:147.57013320922852ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5335 prompt_cache_len:5154 prompt_cache_ratio:0.9660731021555764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:164.5376682281494ms total_cost_time:164.57509994506836ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5290 prompt_cache_len:5145 prompt_cache_ratio:0.9725897920604915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 +DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:12 [batch.py:51] router release req id 8 +INFO 06-24 20:03:12 [batch.py:51] router release req id 120 +INFO 06-24 20:03:12 [batch.py:51] router release req id 400 +INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.09337854385375977 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.08781576156616211 s +INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.07291984558105469 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.09497404098510742 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.09160900115966797 s +INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.07830214500427246 s +DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=75055817195679535222948774519941722017, time:1750766592.8478618s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:12 [manager.py:391] +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:195.2962875366211ms total_cost_time:195.33967971801758ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5334 prompt_cache_len:5151 prompt_cache_ratio:0.9656917885264342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:189.76879119873047ms total_cost_time:189.79430198669434ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5336 prompt_cache_len:5154 prompt_cache_ratio:0.9658920539730135 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:176.99289321899414ms total_cost_time:177.04224586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:5291 prompt_cache_len:5145 prompt_cache_ratio:0.9724059724059724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 +INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 +DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:13 [batch.py:51] router release req id 8 +INFO 06-24 20:03:13 [batch.py:51] router release req id 120 +INFO 06-24 20:03:13 [batch.py:51] router release req id 400 +INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.07023143768310547 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.06479072570800781 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.057845115661621094 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.07196855545043945 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.06797099113464355 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.06226181983947754 s +DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=157606791495605492007818834588376794682, time:1750766593.030438s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:13 [manager.py:391] +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:144.97065544128418ms total_cost_time:145.01285552978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5335 prompt_cache_len:5151 prompt_cache_ratio:0.9655107778819119 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:168.69854927062988ms total_cost_time:168.73621940612793ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5337 prompt_cache_len:5154 prompt_cache_ratio:0.9657110736368747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:161.8201732635498ms total_cost_time:161.84568405151367ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5292 prompt_cache_len:5145 prompt_cache_ratio:0.9722222222222222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 +DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:13 [batch.py:51] router release req id 8 +INFO 06-24 20:03:13 [batch.py:51] router release req id 120 +INFO 06-24 20:03:13 [batch.py:51] router release req id 400 +INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.10184240341186523 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.06973958015441895 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.06485867500305176 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.1033639907836914 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.0726320743560791 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.06896209716796875 s +DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=188242593780906835263794326487222722732, time:1750766593.2112694s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:13 [manager.py:391] +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:199.65195655822754ms total_cost_time:199.69582557678223ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5336 prompt_cache_len:5151 prompt_cache_ratio:0.9653298350824587 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:319.2763328552246ms total_cost_time:319.3178176879883ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5338 prompt_cache_len:5154 prompt_cache_ratio:0.9655301611090296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:326.72619819641113ms total_cost_time:326.7652988433838ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5293 prompt_cache_len:5145 prompt_cache_ratio:0.9720385414698659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 +DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:13 [batch.py:51] router release req id 8 +INFO 06-24 20:03:13 [batch.py:51] router release req id 120 +INFO 06-24 20:03:13 [batch.py:51] router release req id 400 +INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.23711180686950684 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.08566713333129883 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.0733346939086914 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.23889541625976562 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.08885645866394043 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.0777883529663086 s +DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=131936270174258965455591418755039847242, time:1750766593.5528991s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:13 [manager.py:391] +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:302.609920501709ms total_cost_time:302.65355110168457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5337 prompt_cache_len:5151 prompt_cache_ratio:0.9651489600899381 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:190.64664840698242ms total_cost_time:190.68622589111328ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5339 prompt_cache_len:5154 prompt_cache_ratio:0.9653493163513767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:178.36570739746094ms total_cost_time:178.3902645111084ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:5294 prompt_cache_len:5145 prompt_cache_ratio:0.971854930109558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 +DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:13 [batch.py:51] router release req id 8 +INFO 06-24 20:03:13 [batch.py:51] router release req id 120 +INFO 06-24 20:03:13 [batch.py:51] router release req id 400 +INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.11217737197875977 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.06891465187072754 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.0636281967163086 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.11380553245544434 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.07216238975524902 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.06802606582641602 s +DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=10450261760703601424695349161846293356, time:1750766593.7355967s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:13 [manager.py:391] +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:200.0892162322998ms total_cost_time:200.1335620880127ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5338 prompt_cache_len:5151 prompt_cache_ratio:0.964968152866242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:156.99243545532227ms total_cost_time:157.01842308044434ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5340 prompt_cache_len:5154 prompt_cache_ratio:0.9651685393258427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:172.64246940612793ms total_cost_time:172.68109321594238ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5295 prompt_cache_len:5145 prompt_cache_ratio:0.9716713881019831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 +DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:13 [batch.py:51] router release req id 8 +INFO 06-24 20:03:13 [batch.py:51] router release req id 120 +INFO 06-24 20:03:13 [batch.py:51] router release req id 400 +INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.085052490234375 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.07938790321350098 s +INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.06761765480041504 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.08659005165100098 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.08255577087402344 s +INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.07191276550292969 s +DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=169342856567867684146074955245936758200, time:1750766593.9175556s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:13 [manager.py:391] +DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 current batch size: 3 +DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:148.78106117248535ms total_cost_time:148.82421493530273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5339 prompt_cache_len:5151 prompt_cache_ratio:0.9647874133732909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 +ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:184.75866317749023ms total_cost_time:184.7987174987793ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5341 prompt_cache_len:5154 prompt_cache_ratio:0.964987829994383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:172.96719551086426ms total_cost_time:172.99222946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5296 prompt_cache_len:5145 prompt_cache_ratio:0.971487915407855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 +DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:14 [batch.py:51] router release req id 8 +INFO 06-24 20:03:14 [batch.py:51] router release req id 120 +INFO 06-24 20:03:14 [batch.py:51] router release req id 400 +INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.11292767524719238 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.06888198852539062 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06382298469543457 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.11452674865722656 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.07187414169311523 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.0680234432220459 s +DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=259080315080310031435413927249989612548, time:1750766594.0998464s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:14 [manager.py:391] +DEBUG 06-24 20:03:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 74123.011 tokens/s +DEBUG 06-24 20:03:14 [stats.py:37] Avg prompt tokens throughput: 74095.365 tokens/s +DEBUG 06-24 20:03:14 [stats.py:37] Avg generate tokens throughput: 27.646 tokens/s +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:198.92311096191406ms total_cost_time:198.96793365478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5340 prompt_cache_len:5151 prompt_cache_ratio:0.9646067415730337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:154.84309196472168ms total_cost_time:154.86812591552734ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5342 prompt_cache_len:5154 prompt_cache_ratio:0.9648071883189816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:170.48883438110352ms total_cost_time:170.52817344665527ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5297 prompt_cache_len:5145 prompt_cache_ratio:0.9713045119879177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 +DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:14 [batch.py:51] router release req id 8 +INFO 06-24 20:03:14 [batch.py:51] router release req id 120 +INFO 06-24 20:03:14 [batch.py:51] router release req id 400 +INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.24832463264465332 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.24385905265808105 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.23159050941467285 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.2501237392425537 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.24709606170654297 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.23588991165161133 s +DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=141971760801296374767195326882432588540, time:1750766594.4423323s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:14 [manager.py:391] +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:346.5564250946045ms total_cost_time:346.6014862060547ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5341 prompt_cache_len:5151 prompt_cache_ratio:0.9644261374274481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:342.0062065124512ms total_cost_time:342.03243255615234ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5343 prompt_cache_len:5154 prompt_cache_ratio:0.9646266142616507 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:334.43188667297363ms total_cost_time:334.48076248168945ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:5298 prompt_cache_len:5145 prompt_cache_ratio:0.9711211778029445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 +DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:14 [batch.py:51] router release req id 8 +INFO 06-24 20:03:14 [batch.py:51] router release req id 120 +INFO 06-24 20:03:14 [batch.py:51] router release req id 400 +INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.07294654846191406 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.06713199615478516 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06101107597351074 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.07456398010253906 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.07023024559020996 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.06514811515808105 s +DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=40778025634488550235479753694553256226, time:1750766594.6247032s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:14 [manager.py:391] +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:143.79525184631348ms total_cost_time:143.83721351623535ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5342 prompt_cache_len:5151 prompt_cache_ratio:0.9642456008985398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:170.94969749450684ms total_cost_time:170.9880828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5344 prompt_cache_len:5154 prompt_cache_ratio:0.9644461077844312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:164.6571159362793ms total_cost_time:164.68238830566406ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5299 prompt_cache_len:5145 prompt_cache_ratio:0.9709379128137384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 +DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:14 [batch.py:51] router release req id 8 +INFO 06-24 20:03:14 [batch.py:51] router release req id 120 +INFO 06-24 20:03:14 [batch.py:51] router release req id 400 +INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.1055765151977539 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.06861662864685059 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06290388107299805 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.1073000431060791 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.07172536849975586 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.06704378128051758 s +DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=20284907941312092189839901193717122740, time:1750766594.8065596s req_ids:[8, 120, 400] +DEBUG 06-24 20:03:14 [manager.py:391] +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:200.00481605529785ms total_cost_time:200.04820823669434ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5343 prompt_cache_len:5151 prompt_cache_ratio:0.9640651319483436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:162.99962997436523ms total_cost_time:163.0268096923828ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5345 prompt_cache_len:5154 prompt_cache_ratio:0.9642656688493919 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 +INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:172.1630096435547ms total_cost_time:172.20282554626465ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5300 prompt_cache_len:5145 prompt_cache_ratio:0.970754716981132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 +DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:14 [batch.py:51] router release req id 8 +INFO 06-24 20:03:14 [batch.py:51] router release req id 120 +INFO 06-24 20:03:14 [batch.py:51] router release req id 400 +INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.07947611808776855 s +INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06709527969360352 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.08098983764648438 s +INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.07028484344482422 s +DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=327174093909908919411840986831610256029, time:1750766594.9872627s req_ids:[8, 400] +DEBUG 06-24 20:03:14 [manager.py:391] +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:147.60255813598633ms total_cost_time:147.6445198059082ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5344 prompt_cache_len:5151 prompt_cache_ratio:0.9638847305389222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 +WARNING 06-24 20:03:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_539 and create again +INFO 06-24 20:03:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_539 +WARNING 06-24 20:03:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_539 and create again +INFO 06-24 20:03:15 [shm_array.py:30] create shm 12322_0_shm_prompts_539 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:150.19869804382324ms total_cost_time:150.2377986907959ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5301 prompt_cache_len:5145 prompt_cache_ratio:0.9705715902659876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 +DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:15 [batch.py:51] router release req id 8 +INFO 06-24 20:03:15 [batch.py:51] router release req id 400 +INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.05526137351989746 s +INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.04001045227050781 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.05663442611694336 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.04280972480773926 s +DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=189951137416322866039773932100182786338, time:1750766595.1170263s req_ids:[8, 400] +DEBUG 06-24 20:03:15 [manager.py:391] +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:132.490873336792ms total_cost_time:132.53450393676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5345 prompt_cache_len:5151 prompt_cache_ratio:0.9637043966323667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:117.30670928955078ms total_cost_time:117.33293533325195ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5302 prompt_cache_len:5145 prompt_cache_ratio:0.9703885326291966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 +DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:15 [batch.py:51] router release req id 8 +INFO 06-24 20:03:15 [batch.py:51] router release req id 400 +INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.04217696189880371 s +INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.03687310218811035 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.04360365867614746 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.03970789909362793 s +DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=46692940531810719990292236079464638852, time:1750766595.2450464s req_ids:[8, 400] +DEBUG 06-24 20:03:15 [manager.py:391] +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:259.655237197876ms total_cost_time:259.69982147216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5346 prompt_cache_len:5151 prompt_cache_ratio:0.9635241301907969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:272.1405029296875ms total_cost_time:272.18079566955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5303 prompt_cache_len:5145 prompt_cache_ratio:0.9702055440316801 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 +DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:15 [batch.py:51] router release req id 8 +INFO 06-24 20:03:15 [batch.py:51] router release req id 400 +INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.06503462791442871 s +INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.04769539833068848 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.06670260429382324 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.051019906997680664 s +DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=136432012742320644167027789481584047054, time:1750766595.5323694s req_ids:[8, 400] +DEBUG 06-24 20:03:15 [manager.py:391] +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:142.6372528076172ms total_cost_time:142.68136024475098ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5347 prompt_cache_len:5151 prompt_cache_ratio:0.9633439311763605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:125.47922134399414ms total_cost_time:125.50592422485352ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5304 prompt_cache_len:5145 prompt_cache_ratio:0.9700226244343891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 +DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:15 [batch.py:51] router release req id 8 +INFO 06-24 20:03:15 [batch.py:51] router release req id 400 +INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.04376411437988281 s +INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.03836464881896973 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.04517722129821777 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.04124140739440918 s +DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=40478381412344561412632383900827468522, time:1750766595.662321s req_ids:[8, 400] +DEBUG 06-24 20:03:15 [manager.py:391] +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:102.98466682434082ms total_cost_time:103.02901268005371ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5348 prompt_cache_len:5151 prompt_cache_ratio:0.9631637995512341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:113.42120170593262ms total_cost_time:113.45887184143066ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5305 prompt_cache_len:5145 prompt_cache_ratio:0.9698397737983034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 +DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:15 [batch.py:51] router release req id 8 +INFO 06-24 20:03:15 [batch.py:51] router release req id 400 +INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.06212925910949707 s +INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.0465695858001709 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.06363844871520996 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.04974842071533203 s +DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=247422156102940647801278631174207531889, time:1750766595.7900999s req_ids:[8, 400] +DEBUG 06-24 20:03:15 [manager.py:391] +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:139.8324966430664ms total_cost_time:139.8763656616211ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5349 prompt_cache_len:5151 prompt_cache_ratio:0.9629837352776219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:124.47714805603027ms total_cost_time:124.50242042541504ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5306 prompt_cache_len:5145 prompt_cache_ratio:0.9696569920844327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 +DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:15 [batch.py:51] router release req id 8 +INFO 06-24 20:03:15 [batch.py:51] router release req id 400 +INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.0438838005065918 s +INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.03831219673156738 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.04529142379760742 s +INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.041277170181274414 s +DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=98433266959917497981121697478807169414, time:1750766595.9207468s req_ids:[8, 400] +DEBUG 06-24 20:03:15 [manager.py:391] +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:104.61807250976562ms total_cost_time:104.66361045837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5350 prompt_cache_len:5151 prompt_cache_ratio:0.962803738317757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 +ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:114.96949195861816ms total_cost_time:115.00811576843262ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5307 prompt_cache_len:5145 prompt_cache_ratio:0.9694742792538157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 +DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:16 [batch.py:51] router release req id 8 +INFO 06-24 20:03:16 [batch.py:51] router release req id 400 +INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.06327080726623535 s +INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.04785466194152832 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.06497001647949219 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.05095171928405762 s +DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=35894921562136922557296646936210573730, time:1750766596.0499103s req_ids:[8, 400] +DEBUG 06-24 20:03:16 [manager.py:391] +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:142.64822006225586ms total_cost_time:142.69018173217773ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5351 prompt_cache_len:5151 prompt_cache_ratio:0.9626238086339002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:127.12407112121582ms total_cost_time:127.14934349060059ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5308 prompt_cache_len:5145 prompt_cache_ratio:0.9692916352675207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 +DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:16 [batch.py:51] router release req id 8 +INFO 06-24 20:03:16 [batch.py:51] router release req id 400 +INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.04305100440979004 s +INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.037767648696899414 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.04459071159362793 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.04080605506896973 s +DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=38888879662320504780947184851210148357, time:1750766596.1816561s req_ids:[8, 400] +DEBUG 06-24 20:03:16 [manager.py:391] +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:101.00221633911133ms total_cost_time:101.04703903198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5352 prompt_cache_len:5151 prompt_cache_ratio:0.9624439461883408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:113.91162872314453ms total_cost_time:113.94977569580078ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5309 prompt_cache_len:5145 prompt_cache_ratio:0.9691090600866453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 +INFO 06-24 20:03:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:16 [batch.py:51] router release req id 8 +INFO 06-24 20:03:16 [batch.py:51] router release req id 400 +INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.227386474609375 s +INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.2101593017578125 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.229170560836792 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.21335315704345703 s +DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=270143158965125304432810081785770064385, time:1750766596.4736764s req_ids:[8, 400] +DEBUG 06-24 20:03:16 [manager.py:391] +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:298.37608337402344ms total_cost_time:298.419713973999ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5353 prompt_cache_len:5151 prompt_cache_ratio:0.9622641509433962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:294.1858768463135ms total_cost_time:294.2242622375488ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5310 prompt_cache_len:5145 prompt_cache_ratio:0.9689265536723164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 +DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:16 [batch.py:51] router release req id 8 +INFO 06-24 20:03:16 [batch.py:51] router release req id 400 +INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.05416274070739746 s +INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.041588783264160156 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.05591702461242676 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.044759273529052734 s +DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=311394143339911508401203537531421472913, time:1750766596.6023881s req_ids:[8, 400] +DEBUG 06-24 20:03:16 [manager.py:391] +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:131.15596771240234ms total_cost_time:131.19935989379883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5354 prompt_cache_len:5151 prompt_cache_ratio:0.962084422861412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:118.56532096862793ms total_cost_time:118.59130859375ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5311 prompt_cache_len:5145 prompt_cache_ratio:0.9687441159856901 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 +DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:16 [batch.py:51] router release req id 8 +INFO 06-24 20:03:16 [batch.py:51] router release req id 400 +INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.04376959800720215 s +INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.039160728454589844 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.045458316802978516 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.04223227500915527 s +DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=21990912508586108754065992746704248702, time:1750766596.7321157s req_ids:[8, 400] +DEBUG 06-24 20:03:16 [manager.py:391] +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:113.62671852111816ms total_cost_time:113.66939544677734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5355 prompt_cache_len:5151 prompt_cache_ratio:0.9619047619047619 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:121.90103530883789ms total_cost_time:121.94037437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5312 prompt_cache_len:5145 prompt_cache_ratio:0.9685617469879518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 +DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:16 [batch.py:51] router release req id 8 +INFO 06-24 20:03:16 [batch.py:51] router release req id 400 +INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.05467033386230469 s +INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.04224729537963867 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.05626034736633301 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.045326948165893555 s +DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=229560545551786866641632425460018259376, time:1750766596.8612866s req_ids:[8, 400] +DEBUG 06-24 20:03:16 [manager.py:391] +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:131.14666938781738ms total_cost_time:131.18934631347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5356 prompt_cache_len:5151 prompt_cache_ratio:0.9617251680358476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:118.79324913024902ms total_cost_time:118.8192367553711ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5313 prompt_cache_len:5145 prompt_cache_ratio:0.9683794466403162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 +INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 +DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:16 [batch.py:51] router release req id 8 +INFO 06-24 20:03:16 [batch.py:51] router release req id 400 +INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.042740821838378906 s +INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.03650379180908203 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.04443788528442383 s +INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.03966951370239258 s +DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=186243878365602043137147869268313065741, time:1750766596.989342s req_ids:[8, 400] +DEBUG 06-24 20:03:16 [manager.py:391] +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:112.67781257629395ms total_cost_time:112.72048950195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5357 prompt_cache_len:5151 prompt_cache_ratio:0.9615456412170991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:117.2645092010498ms total_cost_time:117.30194091796875ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5314 prompt_cache_len:5145 prompt_cache_ratio:0.9681972149040271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 +DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:17 [batch.py:51] router release req id 8 +INFO 06-24 20:03:17 [batch.py:51] router release req id 400 +INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.053771257400512695 s +INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.042955636978149414 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.055507659912109375 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.04607725143432617 s +DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=230636701291539464809625689598923462140, time:1750766597.1193354s req_ids:[8, 400] +DEBUG 06-24 20:03:17 [manager.py:391] +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:131.666898727417ms total_cost_time:131.71029090881348ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5358 prompt_cache_len:5151 prompt_cache_ratio:0.9613661814109743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:120.93472480773926ms total_cost_time:120.97048759460449ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:5315 prompt_cache_len:5145 prompt_cache_ratio:0.9680150517403575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 +DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:17 [batch.py:51] router release req id 8 +INFO 06-24 20:03:17 [batch.py:51] router release req id 400 +INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.04317283630371094 s +INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.03769993782043457 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.04466533660888672 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.04063582420349121 s +DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=120869968107718192522984805409364279403, time:1750766597.2487464s req_ids:[8, 400] +DEBUG 06-24 20:03:17 [manager.py:391] +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:264.00065422058105ms total_cost_time:264.04285430908203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5359 prompt_cache_len:5151 prompt_cache_ratio:0.961186788579959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:279.6156406402588ms total_cost_time:279.65474128723145ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5316 prompt_cache_len:5145 prompt_cache_ratio:0.9678329571106095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 +DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:17 [batch.py:51] router release req id 8 +INFO 06-24 20:03:17 [batch.py:51] router release req id 400 +INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.06868243217468262 s +INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.04792928695678711 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.07042360305786133 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.051039695739746094 s +DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=185462889001181234029293729776561510086, time:1750766597.544036s req_ids:[8, 400] +DEBUG 06-24 20:03:17 [manager.py:391] +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:146.11434936523438ms total_cost_time:146.15726470947266ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5360 prompt_cache_len:5151 prompt_cache_ratio:0.9610074626865671 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:125.28634071350098ms total_cost_time:125.31399726867676ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5317 prompt_cache_len:5145 prompt_cache_ratio:0.9676509309761143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 +DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:17 [batch.py:51] router release req id 8 +INFO 06-24 20:03:17 [batch.py:51] router release req id 400 +INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.04425168037414551 s +INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.03928947448730469 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.045926570892333984 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.042326927185058594 s +DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=188593728979199905353980186123864104054, time:1750766597.6732187s req_ids:[8, 400] +DEBUG 06-24 20:03:17 [manager.py:391] +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:96.36425971984863ms total_cost_time:96.4057445526123ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5361 prompt_cache_len:5151 prompt_cache_ratio:0.9608282036933408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:116.02663993835449ms total_cost_time:116.06526374816895ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5318 prompt_cache_len:5145 prompt_cache_ratio:0.9674689732982324 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 +DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:17 [batch.py:51] router release req id 8 +INFO 06-24 20:03:17 [batch.py:51] router release req id 400 +INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.07289242744445801 s +INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.04809832572937012 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.07454705238342285 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.05107879638671875 s +DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=280100085227197225193040152187720737292, time:1750766597.8044372s req_ids:[8, 400] +DEBUG 06-24 20:03:17 [manager.py:391] +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:148.35596084594727ms total_cost_time:148.40197563171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5362 prompt_cache_len:5151 prompt_cache_ratio:0.9606490115628497 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:125.0462532043457ms total_cost_time:125.08416175842285ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5319 prompt_cache_len:5145 prompt_cache_ratio:0.9672870840383531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 +INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 +DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:17 [batch.py:51] router release req id 8 +INFO 06-24 20:03:17 [batch.py:51] router release req id 400 +INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.04773211479187012 s +INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.04088282585144043 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.04934430122375488 s +INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.044008493423461914 s +DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=156165060869680668527641726546416142808, time:1750766597.934053s req_ids:[8, 400] +DEBUG 06-24 20:03:17 [manager.py:391] +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:125.30040740966797ms total_cost_time:125.34427642822266ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5363 prompt_cache_len:5151 prompt_cache_ratio:0.9604698862576916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:118.55173110961914ms total_cost_time:118.57914924621582ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5320 prompt_cache_len:5145 prompt_cache_ratio:0.9671052631578947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 +DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:18 [batch.py:51] router release req id 8 +INFO 06-24 20:03:18 [batch.py:51] router release req id 400 +INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.04442906379699707 s +INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.03907513618469238 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.04649662971496582 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.04331088066101074 s +DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=273966127553962088038974822208457010276, time:1750766598.0649917s req_ids:[8, 400] +DEBUG 06-24 20:03:18 [manager.py:391] +DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:116.63603782653809ms total_cost_time:116.68062210083008ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5364 prompt_cache_len:5151 prompt_cache_ratio:0.9602908277404921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:122.51424789428711ms total_cost_time:122.55191802978516ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5321 prompt_cache_len:5145 prompt_cache_ratio:0.9669235106183048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 +DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:18 [batch.py:51] router release req id 8 +INFO 06-24 20:03:18 [batch.py:51] router release req id 400 +INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.055776119232177734 s +INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.04394197463989258 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.05722236633300781 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.046793222427368164 s +DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=117949674048193035804295712480210034488, time:1750766598.1982203s req_ids:[8, 400] +DEBUG 06-24 20:03:18 [manager.py:391] +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:134.31692123413086ms total_cost_time:134.35959815979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5365 prompt_cache_len:5151 prompt_cache_ratio:0.9601118359739049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:122.57075309753418ms total_cost_time:122.59793281555176ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5322 prompt_cache_len:5145 prompt_cache_ratio:0.9667418263810598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 +DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:18 [batch.py:51] router release req id 8 +INFO 06-24 20:03:18 [batch.py:51] router release req id 400 +INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.21005940437316895 s +INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.20437359809875488 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.2119007110595703 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.2075343132019043 s +DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=214011622827882394983153150710142588809, time:1750766598.4954398s req_ids:[8, 400] +DEBUG 06-24 20:03:18 [manager.py:391] +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:288.6219024658203ms total_cost_time:288.6662483215332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5366 prompt_cache_len:5151 prompt_cache_ratio:0.9599329109206113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:282.7904224395752ms total_cost_time:282.81712532043457ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5323 prompt_cache_len:5145 prompt_cache_ratio:0.9665602104076648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 +DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:18 [batch.py:51] router release req id 8 +INFO 06-24 20:03:18 [batch.py:51] router release req id 400 +INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.043175697326660156 s +INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.03703474998474121 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.044617652893066406 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.03991079330444336 s +DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=301697731384390924163457577554198952603, time:1750766598.6244838s req_ids:[8, 400] +DEBUG 06-24 20:03:18 [manager.py:391] +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:114.58826065063477ms total_cost_time:114.63212966918945ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5367 prompt_cache_len:5151 prompt_cache_ratio:0.9597540525433202 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:120.16773223876953ms total_cost_time:120.20540237426758ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5324 prompt_cache_len:5145 prompt_cache_ratio:0.9663786626596544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 +DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:18 [batch.py:51] router release req id 8 +INFO 06-24 20:03:18 [batch.py:51] router release req id 400 +INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.052245378494262695 s +INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.04084420204162598 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.053244590759277344 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.04190850257873535 s +DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=102727216634116764231115538267062852807, time:1750766598.7532914s req_ids:[8, 400] +DEBUG 06-24 20:03:18 [manager.py:391] +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:129.8372745513916ms total_cost_time:129.87899780273438ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5368 prompt_cache_len:5151 prompt_cache_ratio:0.959575260804769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:118.35575103759766ms total_cost_time:118.38269233703613ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5325 prompt_cache_len:5145 prompt_cache_ratio:0.9661971830985916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 +DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:18 [batch.py:51] router release req id 8 +INFO 06-24 20:03:18 [batch.py:51] router release req id 400 +INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.041342735290527344 s +INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.036348581314086914 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.042851924896240234 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.039415836334228516 s +DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=98124483665698792647859000193566248540, time:1750766598.8847835s req_ids:[8, 400] +DEBUG 06-24 20:03:18 [manager.py:391] +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:110.37969589233398ms total_cost_time:110.43381690979004ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5369 prompt_cache_len:5151 prompt_cache_ratio:0.9593965356677221 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 +ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:117.15388298034668ms total_cost_time:117.19298362731934ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5326 prompt_cache_len:5145 prompt_cache_ratio:0.9660157716860683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 +DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:18 [batch.py:51] router release req id 8 +INFO 06-24 20:03:18 [batch.py:51] router release req id 400 +INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.05618596076965332 s +INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.045150041580200195 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.05710721015930176 s +INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.04642224311828613 s +DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=147849587969594960881713478086376920888, time:1750766599.014787s req_ids:[8, 400] +DEBUG 06-24 20:03:19 [manager.py:391] +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:134.51647758483887ms total_cost_time:134.55796241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5370 prompt_cache_len:5151 prompt_cache_ratio:0.9592178770949721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:123.38685989379883ms total_cost_time:123.4128475189209ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5327 prompt_cache_len:5145 prompt_cache_ratio:0.9658344283837057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 +DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:19 [batch.py:51] router release req id 8 +INFO 06-24 20:03:19 [batch.py:51] router release req id 400 +INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.04154849052429199 s +INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.036346435546875 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.04313373565673828 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.03940391540527344 s +DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=255306634295527561674110196835588646071, time:1750766599.1450534s req_ids:[8, 400] +DEBUG 06-24 20:03:19 [manager.py:391] +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:106.36138916015625ms total_cost_time:106.40597343444824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5371 prompt_cache_len:5151 prompt_cache_ratio:0.959039285049339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:113.07692527770996ms total_cost_time:113.11459541320801ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5328 prompt_cache_len:5145 prompt_cache_ratio:0.9656531531531531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 +DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:19 [batch.py:51] router release req id 8 +INFO 06-24 20:03:19 [batch.py:51] router release req id 400 +INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.05828714370727539 s +INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.04680824279785156 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.05968165397644043 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.04972648620605469 s +DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=30574143479048206985942298018467704674, time:1750766599.273315s req_ids:[8, 400] +DEBUG 06-24 20:03:19 [manager.py:391] +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:146.4998722076416ms total_cost_time:146.5449333190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5372 prompt_cache_len:5151 prompt_cache_ratio:0.9588607594936709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:285.0158214569092ms total_cost_time:285.05873680114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5329 prompt_cache_len:5145 prompt_cache_ratio:0.9654719459560893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 +DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:19 [batch.py:51] router release req id 8 +INFO 06-24 20:03:19 [batch.py:51] router release req id 400 +INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.19778752326965332 s +INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.04796576499938965 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.199296236038208 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.0510556697845459 s +DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=270871851661331763059887179467384104673, time:1750766599.5660577s req_ids:[8, 400] +DEBUG 06-24 20:03:19 [manager.py:391] +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:275.1026153564453ms total_cost_time:275.1462459564209ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5373 prompt_cache_len:5151 prompt_cache_ratio:0.9586823003908431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:125.35643577575684ms total_cost_time:125.38266181945801ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5330 prompt_cache_len:5145 prompt_cache_ratio:0.9652908067542214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 +DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:19 [batch.py:51] router release req id 8 +INFO 06-24 20:03:19 [batch.py:51] router release req id 400 +INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.04194974899291992 s +INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.035865068435668945 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.043467044830322266 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.03886151313781738 s +DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=112294743771034080161203355033738056235, time:1750766599.69536s req_ids:[8, 400] +DEBUG 06-24 20:03:19 [manager.py:391] +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:115.8151626586914ms total_cost_time:115.85712432861328ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5374 prompt_cache_len:5151 prompt_cache_ratio:0.9585039077037588 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:111.65165901184082ms total_cost_time:111.68527603149414ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:5331 prompt_cache_len:5145 prompt_cache_ratio:0.9651097355092854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 +DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:19 [batch.py:51] router release req id 8 +INFO 06-24 20:03:19 [batch.py:51] router release req id 400 +INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.05036592483520508 s +INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.043828487396240234 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.05179238319396973 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.046666860580444336 s +DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=273266674224674944785396183209738039119, time:1750766599.8254528s req_ids:[8, 400] +DEBUG 06-24 20:03:19 [manager.py:391] +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:128.71074676513672ms total_cost_time:128.7548542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5375 prompt_cache_len:5151 prompt_cache_ratio:0.9583255813953488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:122.37954139709473ms total_cost_time:122.41649627685547ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5332 prompt_cache_len:5145 prompt_cache_ratio:0.9649287321830458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 +INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 +DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:19 [batch.py:51] router release req id 8 +INFO 06-24 20:03:19 [batch.py:51] router release req id 400 +INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.041793107986450195 s +INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.03604006767272949 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.04351019859313965 s +INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.03925728797912598 s +DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=297545294467207005180114693576846680623, time:1750766599.9568624s req_ids:[8, 400] +DEBUG 06-24 20:03:19 [manager.py:391] +ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:113.5258674621582ms total_cost_time:113.56949806213379ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5376 prompt_cache_len:5151 prompt_cache_ratio:0.9581473214285714 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:120.72896957397461ms total_cost_time:120.76687812805176ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5333 prompt_cache_len:5145 prompt_cache_ratio:0.964747796737296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 +DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:20 [batch.py:51] router release req id 8 +INFO 06-24 20:03:20 [batch.py:51] router release req id 400 +INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.052423954010009766 s +INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.040143728256225586 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.05401468276977539 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.0430912971496582 s +DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=217055771786444408326993347723112055811, time:1750766600.0868733s req_ids:[8, 400] +DEBUG 06-24 20:03:20 [manager.py:391] +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:130.37395477294922ms total_cost_time:130.418062210083ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5377 prompt_cache_len:5151 prompt_cache_ratio:0.9579691277664125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:118.05057525634766ms total_cost_time:118.07703971862793ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5334 prompt_cache_len:5145 prompt_cache_ratio:0.9645669291338582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 +DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:20 [batch.py:51] router release req id 8 +INFO 06-24 20:03:20 [batch.py:51] router release req id 400 +INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.04285025596618652 s +INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.03669095039367676 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.04431319236755371 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.03964710235595703 s +DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=112477783830639367390967217710355093768, time:1750766600.2170322s req_ids:[8, 400] +DEBUG 06-24 20:03:20 [manager.py:391] +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:111.85002326965332ms total_cost_time:111.8929386138916ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5378 prompt_cache_len:5151 prompt_cache_ratio:0.9577910003718855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:118.47233772277832ms total_cost_time:118.51167678833008ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5335 prompt_cache_len:5145 prompt_cache_ratio:0.9643861293345829 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 +DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:20 [batch.py:51] router release req id 8 +INFO 06-24 20:03:20 [batch.py:51] router release req id 400 +INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.054384469985961914 s +INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.04178619384765625 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.05599856376647949 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.04481172561645508 s +DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=111760969116218091044910498362681902475, time:1750766600.3461525s req_ids:[8, 400] +DEBUG 06-24 20:03:20 [manager.py:391] +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:291.4910316467285ms total_cost_time:291.55564308166504ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:5379 prompt_cache_len:5151 prompt_cache_ratio:0.9576129392080313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:278.98287773132324ms total_cost_time:279.0100574493408ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5336 prompt_cache_len:5145 prompt_cache_ratio:0.9642053973013494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 +DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:20 [batch.py:51] router release req id 8 +INFO 06-24 20:03:20 [batch.py:51] router release req id 400 +INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.040621042251586914 s +INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.035002946853637695 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.04219484329223633 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.038141489028930664 s +DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=276025679932591819049439770061506921453, time:1750766600.6369236s req_ids:[8, 400] +DEBUG 06-24 20:03:20 [manager.py:391] +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:99.53069686889648ms total_cost_time:99.57313537597656ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5380 prompt_cache_len:5151 prompt_cache_ratio:0.9574349442379182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:112.55383491516113ms total_cost_time:112.59222030639648ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5337 prompt_cache_len:5145 prompt_cache_ratio:0.9640247329960652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 +DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:20 [batch.py:51] router release req id 8 +INFO 06-24 20:03:20 [batch.py:51] router release req id 400 +INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.06434202194213867 s +INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.04566383361816406 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.06608295440673828 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.048783302307128906 s +DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=286135391832825627885999024620823385726, time:1750766600.7672696s req_ids:[8, 400] +DEBUG 06-24 20:03:20 [manager.py:391] +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:144.6220874786377ms total_cost_time:144.6688175201416ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5381 prompt_cache_len:5151 prompt_cache_ratio:0.9572570154246423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:125.84424018859863ms total_cost_time:125.87094306945801ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5338 prompt_cache_len:5145 prompt_cache_ratio:0.9638441363806669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 +DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:20 [batch.py:51] router release req id 8 +INFO 06-24 20:03:20 [batch.py:51] router release req id 400 +INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.04098677635192871 s +INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.03562450408935547 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.04268312454223633 s +INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.038706064224243164 s +DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=78164605853841112862269592373220035213, time:1750766600.8976705s req_ids:[8, 400] +DEBUG 06-24 20:03:20 [manager.py:391] +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:96.87423706054688ms total_cost_time:96.91619873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5382 prompt_cache_len:5151 prompt_cache_ratio:0.9570791527313266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 +ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:113.4788990020752ms total_cost_time:113.51895332336426ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5339 prompt_cache_len:5145 prompt_cache_ratio:0.9636636074171193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 +DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:21 [batch.py:51] router release req id 8 +INFO 06-24 20:03:21 [batch.py:51] router release req id 400 +INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.06828618049621582 s +INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.04655122756958008 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.06995296478271484 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.04954338073730469 s +DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=297213816336514699910842439804809839254, time:1750766601.0273569s req_ids:[8, 400] +DEBUG 06-24 20:03:21 [manager.py:391] +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:145.74956893920898ms total_cost_time:145.79272270202637ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5383 prompt_cache_len:5151 prompt_cache_ratio:0.956901356121122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:123.9314079284668ms total_cost_time:123.95691871643066ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5340 prompt_cache_len:5145 prompt_cache_ratio:0.9634831460674157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 +DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:21 [batch.py:51] router release req id 8 +INFO 06-24 20:03:21 [batch.py:51] router release req id 400 +INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.04177379608154297 s +INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.03552532196044922 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.043331146240234375 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.03861212730407715 s +DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=135925319698489064531153805945555175362, time:1750766601.157251s req_ids:[8, 400] +DEBUG 06-24 20:03:21 [manager.py:391] +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:93.95551681518555ms total_cost_time:94.0089225769043ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:5384 prompt_cache_len:5151 prompt_cache_ratio:0.9567236255572066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:112.30230331420898ms total_cost_time:112.33925819396973ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5341 prompt_cache_len:5145 prompt_cache_ratio:0.963302752293578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 +DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:21 [batch.py:51] router release req id 8 +INFO 06-24 20:03:21 [batch.py:51] router release req id 400 +INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.07148361206054688 s +INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.04772377014160156 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.07359766960144043 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.05205821990966797 s +DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=134779627233529057769425696002701442732, time:1750766601.2885098s req_ids:[8, 400] +DEBUG 06-24 20:03:21 [manager.py:391] +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:146.78692817687988ms total_cost_time:146.83008193969727ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5385 prompt_cache_len:5151 prompt_cache_ratio:0.9565459610027855 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:124.59278106689453ms total_cost_time:124.62759017944336ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5342 prompt_cache_len:5145 prompt_cache_ratio:0.9631224260576563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 +DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:21 [batch.py:51] router release req id 8 +INFO 06-24 20:03:21 [batch.py:51] router release req id 400 +INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.20799803733825684 s +INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.20207691192626953 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.2096102237701416 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.2052173614501953 s +DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=186208313997580636810905768666178473861, time:1750766601.5780487s req_ids:[8, 400] +DEBUG 06-24 20:03:21 [manager.py:391] +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:288.12170028686523ms total_cost_time:288.1660461425781ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5386 prompt_cache_len:5151 prompt_cache_ratio:0.9563683624210917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:282.26184844970703ms total_cost_time:282.2885513305664ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5343 prompt_cache_len:5145 prompt_cache_ratio:0.9629421673217293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 +DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:21 [batch.py:51] router release req id 8 +INFO 06-24 20:03:21 [batch.py:51] router release req id 400 +INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.04269051551818848 s +INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.0370328426361084 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.04415607452392578 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.0400547981262207 s +DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=222480278121724777104444868214524344190, time:1750766601.7098036s req_ids:[8, 400] +DEBUG 06-24 20:03:21 [manager.py:391] +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:106.95958137512207ms total_cost_time:107.00416564941406ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5387 prompt_cache_len:5151 prompt_cache_ratio:0.9561908297753852 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:129.85682487487793ms total_cost_time:129.8964023590088ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5344 prompt_cache_len:5145 prompt_cache_ratio:0.9627619760479041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 +DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:21 [batch.py:51] router release req id 8 +INFO 06-24 20:03:21 [batch.py:51] router release req id 400 +INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.0907132625579834 s +INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.06250143051147461 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.09232044219970703 s +INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.0655050277709961 s +DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=124571461464393980073213979239828388332, time:1750766601.8685126s req_ids:[8, 400] +DEBUG 06-24 20:03:21 [manager.py:391] +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:150.7575511932373ms total_cost_time:150.79951286315918ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5388 prompt_cache_len:5151 prompt_cache_ratio:0.9560133630289532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 +ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:154.83736991882324ms total_cost_time:154.8764705657959ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5345 prompt_cache_len:5145 prompt_cache_ratio:0.9625818521983162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 +DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:22 [batch.py:51] router release req id 8 +INFO 06-24 20:03:22 [batch.py:51] router release req id 400 +INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.09172987937927246 s +INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.06131577491760254 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.09335494041442871 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.06451940536499023 s +DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=142457900377968623178329445699247551843, time:1750766602.0300777s req_ids:[8, 400] +DEBUG 06-24 20:03:22 [manager.py:391] +DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:186.33580207824707ms total_cost_time:186.37871742248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5389 prompt_cache_len:5151 prompt_cache_ratio:0.9558359621451105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:155.86423873901367ms total_cost_time:155.89189529418945ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5346 prompt_cache_len:5145 prompt_cache_ratio:0.9624017957351291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 +DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:22 [batch.py:51] router release req id 8 +INFO 06-24 20:03:22 [batch.py:51] router release req id 400 +INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.05829787254333496 s +INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05179548263549805 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.0599365234375 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.05483746528625488 s +DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=234893896941947979002553866056014758351, time:1750766602.1918833s req_ids:[8, 400] +DEBUG 06-24 20:03:22 [manager.py:391] +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:150.9406566619873ms total_cost_time:150.9835720062256ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5390 prompt_cache_len:5151 prompt_cache_ratio:0.9556586270871985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:144.44446563720703ms total_cost_time:144.4699764251709ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5347 prompt_cache_len:5145 prompt_cache_ratio:0.9622218066205349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 +DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:22 [batch.py:51] router release req id 8 +INFO 06-24 20:03:22 [batch.py:51] router release req id 400 +INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.05821728706359863 s +INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05285143852233887 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.05965733528137207 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.0556492805480957 s +DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=177368139352387036710850507953880825522, time:1750766602.3511534s req_ids:[8, 400] +DEBUG 06-24 20:03:22 [manager.py:391] +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:147.5977897644043ms total_cost_time:147.64094352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5391 prompt_cache_len:5151 prompt_cache_ratio:0.9554813578185866 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:154.71291542053223ms total_cost_time:154.75702285766602ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5348 prompt_cache_len:5145 prompt_cache_ratio:0.9620418848167539 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 +DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:22 [batch.py:51] router release req id 8 +INFO 06-24 20:03:22 [batch.py:51] router release req id 400 +INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.06644487380981445 s +INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05464601516723633 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.06823897361755371 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.05769062042236328 s +DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=261174539934488379927568871207490788573, time:1750766602.5114079s req_ids:[8, 400] +DEBUG 06-24 20:03:22 [manager.py:391] +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:149.19114112854004ms total_cost_time:149.24335479736328ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:5392 prompt_cache_len:5151 prompt_cache_ratio:0.9553041543026706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:322.4632740020752ms total_cost_time:322.5069046020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5349 prompt_cache_len:5145 prompt_cache_ratio:0.9618620302860348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 +DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:22 [batch.py:51] router release req id 8 +INFO 06-24 20:03:22 [batch.py:51] router release req id 400 +INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.24780631065368652 s +INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.06302690505981445 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.24949955940246582 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.0659639835357666 s +DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=317924181214390439860970743783356698597, time:1750766602.8485909s req_ids:[8, 400] +DEBUG 06-24 20:03:22 [manager.py:391] +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:341.80521965026855ms total_cost_time:341.8586254119873ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:5393 prompt_cache_len:5151 prompt_cache_ratio:0.9551270165028741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:157.02319145202637ms total_cost_time:157.06157684326172ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5350 prompt_cache_len:5145 prompt_cache_ratio:0.9616822429906542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 +INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 +DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:22 [batch.py:51] router release req id 8 +INFO 06-24 20:03:22 [batch.py:51] router release req id 400 +INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.05843234062194824 s +INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05263257026672363 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.0601048469543457 s +INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.05573582649230957 s +DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=234783476287721002190218136072740691845, time:1750766603.0100996s req_ids:[8, 400] +DEBUG 06-24 20:03:23 [manager.py:391] +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:151.06511116027832ms total_cost_time:151.1077880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5394 prompt_cache_len:5151 prompt_cache_ratio:0.9549499443826474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:145.22838592529297ms total_cost_time:145.25485038757324ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5351 prompt_cache_len:5145 prompt_cache_ratio:0.9615025228929173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 +DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:23 [batch.py:51] router release req id 8 +INFO 06-24 20:03:23 [batch.py:51] router release req id 400 +INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.05952906608581543 s +INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.05414128303527832 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.06118273735046387 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.05701899528503418 s +DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=145475987631001248074123468473000341903, time:1750766603.1710052s req_ids:[8, 400] +DEBUG 06-24 20:03:23 [manager.py:391] +INFO 06-24 20:03:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:03:23 [statics_utils.py:24] mean first cost: 220.15237989672426 ms +INFO 06-24 20:03:23 [statics_utils.py:24] mean per token cost: 0.27344202866706124 ms +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:144.13022994995117ms total_cost_time:144.17123794555664ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5395 prompt_cache_len:5151 prompt_cache_ratio:0.954772937905468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:150.2683162689209ms total_cost_time:150.30765533447266ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5352 prompt_cache_len:5145 prompt_cache_ratio:0.9613228699551569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 +DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:23 [batch.py:51] router release req id 8 +INFO 06-24 20:03:23 [batch.py:51] router release req id 400 +INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.07183074951171875 s +INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.05985760688781738 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.07356953620910645 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.0631256103515625 s +DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=98074405731391139573687344266870355138, time:1750766603.3330717s req_ids:[8, 400] +DEBUG 06-24 20:03:23 [manager.py:391] +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:149.07264709472656ms total_cost_time:149.11675453186035ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5396 prompt_cache_len:5151 prompt_cache_ratio:0.9545959970348407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:153.89466285705566ms total_cost_time:153.94091606140137ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5353 prompt_cache_len:5145 prompt_cache_ratio:0.9611432841397347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 +DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:23 [batch.py:51] router release req id 8 +INFO 06-24 20:03:23 [batch.py:51] router release req id 400 +INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.07914233207702637 s +INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.06242012977600098 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.0806882381439209 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.06532621383666992 s +DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=177230458683442775947328395995968344935, time:1750766603.4940922s req_ids:[8, 400] +DEBUG 06-24 20:03:23 [manager.py:391] +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:149.7194766998291ms total_cost_time:149.7645378112793ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5397 prompt_cache_len:5151 prompt_cache_ratio:0.9544191217342969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:155.12943267822266ms total_cost_time:155.1685333251953ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5354 prompt_cache_len:5145 prompt_cache_ratio:0.9609637654090399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 +DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:23 [batch.py:51] router release req id 8 +INFO 06-24 20:03:23 [batch.py:51] router release req id 400 +INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.08433914184570312 s +INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.0628652572631836 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.0858919620513916 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.06565260887145996 s +DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=258063346900042113233655153145958006950, time:1750766603.6545858s req_ids:[8, 400] +DEBUG 06-24 20:03:23 [manager.py:391] +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:150.58422088623047ms total_cost_time:150.62880516052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5398 prompt_cache_len:5151 prompt_cache_ratio:0.9542423119673953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 +ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:155.68113327026367ms total_cost_time:155.72094917297363ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5355 prompt_cache_len:5145 prompt_cache_ratio:0.9607843137254902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 +DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:23 [batch.py:51] router release req id 8 +INFO 06-24 20:03:23 [batch.py:51] router release req id 400 +INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.09006857872009277 s +INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.06382942199707031 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.0915532112121582 s +INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.06651949882507324 s +DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=249229290258155323088474967584757917971, time:1750766603.8153276s req_ids:[8, 400] +DEBUG 06-24 20:03:23 [manager.py:391] +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:351.37438774108887ms total_cost_time:351.41777992248535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5399 prompt_cache_len:5151 prompt_cache_ratio:0.9540655676977218 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:329.23269271850586ms total_cost_time:329.2655944824219ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:5356 prompt_cache_len:5145 prompt_cache_ratio:0.9606049290515309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 +DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:24 [batch.py:51] router release req id 8 +INFO 06-24 20:03:24 [batch.py:51] router release req id 400 +INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.06512832641601562 s +INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.0543978214263916 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.06679320335388184 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.0573732852935791 s +DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=52597344188287752416133989793924407275, time:1750766604.1495602s req_ids:[8, 400] +DEBUG 06-24 20:03:24 [manager.py:391] +DEBUG 06-24 20:03:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 65490.148 tokens/s +DEBUG 06-24 20:03:24 [stats.py:37] Avg prompt tokens throughput: 65465.471 tokens/s +DEBUG 06-24 20:03:24 [stats.py:37] Avg generate tokens throughput: 24.677 tokens/s +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:147.5844383239746ms total_cost_time:147.6287841796875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5400 prompt_cache_len:5151 prompt_cache_ratio:0.9538888888888889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:149.15847778320312ms total_cost_time:149.19757843017578ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5357 prompt_cache_len:5145 prompt_cache_ratio:0.960425611349636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 +DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:24 [batch.py:51] router release req id 8 +INFO 06-24 20:03:24 [batch.py:51] router release req id 400 +INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.07307100296020508 s +INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06075239181518555 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.0745384693145752 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.06353449821472168 s +DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=182291598458812760724701045789992271971, time:1750766604.3106132s req_ids:[8, 400] +DEBUG 06-24 20:03:24 [manager.py:391] +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:146.57258987426758ms total_cost_time:146.61598205566406ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5401 prompt_cache_len:5151 prompt_cache_ratio:0.9537122755045362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:152.2524356842041ms total_cost_time:152.2986888885498ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5358 prompt_cache_len:5145 prompt_cache_ratio:0.9602463605823068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 +DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:24 [batch.py:51] router release req id 8 +INFO 06-24 20:03:24 [batch.py:51] router release req id 400 +INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.07987260818481445 s +INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06191205978393555 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.08144092559814453 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.06473970413208008 s +DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=332941924801027823445769937160025542787, time:1750766604.4717515s req_ids:[8, 400] +DEBUG 06-24 20:03:24 [manager.py:391] +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:149.01494979858398ms total_cost_time:149.05905723571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5402 prompt_cache_len:5151 prompt_cache_ratio:0.9535357275083303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:159.64317321777344ms total_cost_time:159.682035446167ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5359 prompt_cache_len:5145 prompt_cache_ratio:0.9600671767120732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 +DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:24 [batch.py:51] router release req id 8 +INFO 06-24 20:03:24 [batch.py:51] router release req id 400 +INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.08970522880554199 s +INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06165480613708496 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.0912163257598877 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.06441521644592285 s +DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=305586345978787976424141272683256054825, time:1750766604.6364968s req_ids:[8, 400] +DEBUG 06-24 20:03:24 [manager.py:391] +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:149.17564392089844ms total_cost_time:149.21951293945312ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5403 prompt_cache_len:5151 prompt_cache_ratio:0.9533592448639645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:154.50143814086914ms total_cost_time:154.5395851135254ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5360 prompt_cache_len:5145 prompt_cache_ratio:0.9598880597014925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 +DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:24 [batch.py:51] router release req id 8 +INFO 06-24 20:03:24 [batch.py:51] router release req id 400 +INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.09705662727355957 s +INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06425786018371582 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.09850859642028809 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.0669853687286377 s +DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=115845301462857095522409489583683656259, time:1750766604.7983978s req_ids:[8, 400] +DEBUG 06-24 20:03:24 [manager.py:391] +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:185.91952323913574ms total_cost_time:185.96291542053223ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5404 prompt_cache_len:5151 prompt_cache_ratio:0.9531828275351591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:152.97341346740723ms total_cost_time:153.0003547668457ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5361 prompt_cache_len:5145 prompt_cache_ratio:0.9597090095131505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 +DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:24 [batch.py:51] router release req id 8 +INFO 06-24 20:03:24 [batch.py:51] router release req id 400 +INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.04229736328125 s +INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.03718709945678711 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.043793678283691406 s +INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.040232181549072266 s +DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=188934003509489378439395106842846349720, time:1750766604.9398494s req_ids:[8, 400] +DEBUG 06-24 20:03:24 [manager.py:391] +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:106.69994354248047ms total_cost_time:106.74548149108887ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5405 prompt_cache_len:5151 prompt_cache_ratio:0.9530064754856614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 +ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:113.97814750671387ms total_cost_time:114.01557922363281ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5362 prompt_cache_len:5145 prompt_cache_ratio:0.9595300261096605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 +DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:25 [batch.py:51] router release req id 8 +INFO 06-24 20:03:25 [batch.py:51] router release req id 400 +INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.05905604362487793 s +INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.04689764976501465 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.06046724319458008 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04974985122680664 s +DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=106941296513059239210358134024724807212, time:1750766605.0696821s req_ids:[8, 400] +DEBUG 06-24 20:03:25 [manager.py:391] +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:299.2374897003174ms total_cost_time:299.2825508117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5406 prompt_cache_len:5151 prompt_cache_ratio:0.9528301886792453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:288.59829902648926ms total_cost_time:288.6345386505127ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5363 prompt_cache_len:5145 prompt_cache_ratio:0.959351109453664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 +DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:25 [batch.py:51] router release req id 8 +INFO 06-24 20:03:25 [batch.py:51] router release req id 400 +INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.04875373840332031 s +INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.04164385795593262 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.05025887489318848 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04451394081115723 s +DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=109518551772225882203425461416569783940, time:1750766605.3635254s req_ids:[8, 400] +DEBUG 06-24 20:03:25 [manager.py:391] +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:126.4505386352539ms total_cost_time:126.49273872375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5407 prompt_cache_len:5151 prompt_cache_ratio:0.9526539670797115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:119.39740180969238ms total_cost_time:119.42362785339355ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5364 prompt_cache_len:5145 prompt_cache_ratio:0.95917225950783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 +DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:25 [batch.py:51] router release req id 8 +INFO 06-24 20:03:25 [batch.py:51] router release req id 400 +INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.04301190376281738 s +INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.03754377365112305 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.04456186294555664 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04060029983520508 s +DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=301397941774356863546476412261306772242, time:1750766605.492426s req_ids:[8, 400] +DEBUG 06-24 20:03:25 [manager.py:391] +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:116.78671836853027ms total_cost_time:116.83058738708496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5408 prompt_cache_len:5151 prompt_cache_ratio:0.9524778106508875 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:123.25358390808105ms total_cost_time:123.29983711242676ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5365 prompt_cache_len:5145 prompt_cache_ratio:0.9589934762348555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 +DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:25 [batch.py:51] router release req id 8 +INFO 06-24 20:03:25 [batch.py:51] router release req id 400 +INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.05025053024291992 s +INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.03904008865356445 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.051779985427856445 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.0417790412902832 s +DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=105624872389767533399666806461708471141, time:1750766605.6227462s req_ids:[8, 400] +DEBUG 06-24 20:03:25 [manager.py:391] +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:129.0872097015381ms total_cost_time:129.13131713867188ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5409 prompt_cache_len:5151 prompt_cache_ratio:0.9523017193566279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:117.69700050354004ms total_cost_time:117.7220344543457ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5366 prompt_cache_len:5145 prompt_cache_ratio:0.9588147595974655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 +DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:25 [batch.py:51] router release req id 8 +INFO 06-24 20:03:25 [batch.py:51] router release req id 400 +INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.04254436492919922 s +INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.0374608039855957 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.04412531852722168 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04032325744628906 s +DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=24229803649190843203552023773833686566, time:1750766605.7525249s req_ids:[8, 400] +DEBUG 06-24 20:03:25 [manager.py:391] +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:113.68703842163086ms total_cost_time:113.72900009155273ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5410 prompt_cache_len:5151 prompt_cache_ratio:0.9521256931608133 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:119.02165412902832ms total_cost_time:119.05813217163086ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5367 prompt_cache_len:5145 prompt_cache_ratio:0.9586361095584125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 +DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:25 [batch.py:51] router release req id 8 +INFO 06-24 20:03:25 [batch.py:51] router release req id 400 +INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.052778005599975586 s +INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.04199981689453125 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.05484318733215332 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04638528823852539 s +DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=177010643694584652993485228236392188125, time:1750766605.881679s req_ids:[8, 400] +DEBUG 06-24 20:03:25 [manager.py:391] +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:131.6516399383545ms total_cost_time:131.69455528259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5411 prompt_cache_len:5151 prompt_cache_ratio:0.9519497320273517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:120.88847160339355ms total_cost_time:120.91374397277832ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5368 prompt_cache_len:5145 prompt_cache_ratio:0.9584575260804769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 +INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 +DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:25 [batch.py:51] router release req id 8 +INFO 06-24 20:03:25 [batch.py:51] router release req id 400 +INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.0415797233581543 s +INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.03612041473388672 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.04372453689575195 s +INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04061317443847656 s +DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=9730814389562709508790817266911640232, time:1750766606.0124953s req_ids:[8, 400] +DEBUG 06-24 20:03:26 [manager.py:391] +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:112.6091480255127ms total_cost_time:112.65206336975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5412 prompt_cache_len:5151 prompt_cache_ratio:0.9517738359201774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:117.65170097351074ms total_cost_time:117.68770217895508ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5369 prompt_cache_len:5145 prompt_cache_ratio:0.9582790091264668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 +DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:26 [batch.py:51] router release req id 8 +INFO 06-24 20:03:26 [batch.py:51] router release req id 400 +INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.2191617488861084 s +INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.20856332778930664 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.22147083282470703 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.21335339546203613 s +DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=304176749670807226224668701127724980507, time:1750766606.30706s req_ids:[8, 400] +DEBUG 06-24 20:03:26 [manager.py:391] +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:297.47796058654785ms total_cost_time:297.52254486083984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5413 prompt_cache_len:5151 prompt_cache_ratio:0.9515980048032514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:286.9689464569092ms total_cost_time:286.99541091918945ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5370 prompt_cache_len:5145 prompt_cache_ratio:0.9581005586592178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 +DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:26 [batch.py:51] router release req id 8 +INFO 06-24 20:03:26 [batch.py:51] router release req id 400 +INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.04301762580871582 s +INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.03847908973693848 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.04520916938781738 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.04186749458312988 s +DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=155768307003440881433306197369814046090, time:1750766606.438347s req_ids:[8, 400] +DEBUG 06-24 20:03:26 [manager.py:391] +DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:148.1490135192871ms total_cost_time:148.193359375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5414 prompt_cache_len:5151 prompt_cache_ratio:0.9514222386405615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:157.11665153503418ms total_cost_time:157.15384483337402ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5371 prompt_cache_len:5145 prompt_cache_ratio:0.9579221746415938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 +DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:26 [batch.py:51] router release req id 8 +INFO 06-24 20:03:26 [batch.py:51] router release req id 400 +INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.09540843963623047 s +INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.08272886276245117 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.09698629379272461 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.08544135093688965 s +DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=115379664095328341896948840713000160689, time:1750766606.6432939s req_ids:[8, 400] +DEBUG 06-24 20:03:26 [manager.py:391] +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:199.63550567626953ms total_cost_time:199.6903419494629ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5415 prompt_cache_len:5151 prompt_cache_ratio:0.9512465373961219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:202.64840126037598ms total_cost_time:202.72135734558105ms,out_token_counter:1 mean_per_token_cost_time: 0.07295608520507812ms prompt_token_num:5372 prompt_cache_len:5145 prompt_cache_ratio:0.9577438570364855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 +DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:26 [batch.py:51] router release req id 8 +INFO 06-24 20:03:26 [batch.py:51] router release req id 400 +INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.08435773849487305 s +INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.06706047058105469 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.08589911460876465 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.06977581977844238 s +DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=293968352126310077614312109756214923554, time:1750766606.8370242s req_ids:[8, 400] +DEBUG 06-24 20:03:26 [manager.py:391] +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:147.58944511413574ms total_cost_time:147.63593673706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5416 prompt_cache_len:5151 prompt_cache_ratio:0.9510709010339734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 +ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:160.14909744262695ms total_cost_time:160.1850986480713ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5373 prompt_cache_len:5145 prompt_cache_ratio:0.9575656058068118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 +DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:26 [batch.py:51] router release req id 8 +INFO 06-24 20:03:26 [batch.py:51] router release req id 400 +INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.09514594078063965 s +INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.06626176834106445 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.0965878963470459 s +INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.06908774375915527 s +DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=167325842124086770004192277942009369444, time:1750766607.0014303s req_ids:[8, 400] +DEBUG 06-24 20:03:27 [manager.py:391] +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:188.58742713928223ms total_cost_time:188.63177299499512ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5417 prompt_cache_len:5151 prompt_cache_ratio:0.9508953295181835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:159.67702865600586ms total_cost_time:159.70277786254883ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5374 prompt_cache_len:5145 prompt_cache_ratio:0.9573874209155192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 +DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:27 [batch.py:51] router release req id 8 +INFO 06-24 20:03:27 [batch.py:51] router release req id 400 +INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.06267595291137695 s +INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.0576627254486084 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.06410479545593262 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.060454368591308594 s +DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=303111087904259815833922205561312474417, time:1750766607.1631618s req_ids:[8, 400] +DEBUG 06-24 20:03:27 [manager.py:391] +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:157.99832344055176ms total_cost_time:158.04672241210938ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:5418 prompt_cache_len:5151 prompt_cache_ratio:0.9507198228128461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:325.01983642578125ms total_cost_time:325.06251335144043ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5375 prompt_cache_len:5145 prompt_cache_ratio:0.9572093023255814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 +DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:27 [batch.py:51] router release req id 8 +INFO 06-24 20:03:27 [batch.py:51] router release req id 400 +INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.23650550842285156 s +INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.06451678276062012 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.2381153106689453 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06746101379394531 s +DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=23737735877736789029864311632157714978, time:1750766607.5006576s req_ids:[8, 400] +DEBUG 06-24 20:03:27 [manager.py:391] +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:301.5153408050537ms total_cost_time:301.5611171722412ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5419 prompt_cache_len:5151 prompt_cache_ratio:0.9505443808820816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:158.22863578796387ms total_cost_time:158.28299522399902ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5376 prompt_cache_len:5145 prompt_cache_ratio:0.95703125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 +DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:27 [batch.py:51] router release req id 8 +INFO 06-24 20:03:27 [batch.py:51] router release req id 400 +INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.09329581260681152 s +INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.06400322914123535 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.0947713851928711 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06665468215942383 s +DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=184186379465645799903256587193586697744, time:1750766607.6640804s req_ids:[8, 400] +DEBUG 06-24 20:03:27 [manager.py:391] +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:187.39676475524902ms total_cost_time:187.4384880065918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5420 prompt_cache_len:5151 prompt_cache_ratio:0.9503690036900369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:157.944917678833ms total_cost_time:157.9723358154297ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5377 prompt_cache_len:5145 prompt_cache_ratio:0.956853263901804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 +DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:27 [batch.py:51] router release req id 8 +INFO 06-24 20:03:27 [batch.py:51] router release req id 400 +INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.06305503845214844 s +INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.057465553283691406 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.06470632553100586 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06039118766784668 s +DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=26852431945214795417678461753140578788, time:1750766607.8283565s req_ids:[8, 400] +DEBUG 06-24 20:03:27 [manager.py:391] +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 +ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:159.64293479919434ms total_cost_time:159.68775749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5421 prompt_cache_len:5151 prompt_cache_ratio:0.9501936912008855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:155.32898902893066ms total_cost_time:155.36212921142578ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:5378 prompt_cache_len:5145 prompt_cache_ratio:0.9566753439940499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 +INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 +DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:27 [batch.py:51] router release req id 8 +INFO 06-24 20:03:27 [batch.py:51] router release req id 400 +INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.06591558456420898 s +INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.058904409408569336 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.06739091873168945 s +INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06159090995788574 s +DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=330791697677108148587356519040261344416, time:1750766607.9936192s req_ids:[8, 400] +DEBUG 06-24 20:03:27 [manager.py:391] +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:149.15871620178223ms total_cost_time:149.20306205749512ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5422 prompt_cache_len:5151 prompt_cache_ratio:0.950018443378827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:152.6808738708496ms total_cost_time:152.71830558776855ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5379 prompt_cache_len:5145 prompt_cache_ratio:0.9564974902398216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 +DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:28 [batch.py:51] router release req id 8 +INFO 06-24 20:03:28 [batch.py:51] router release req id 400 +INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.07291269302368164 s +INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.0631561279296875 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.07444310188293457 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.06596684455871582 s +DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=338901319347185641443872713378132059222, time:1750766608.1535382s req_ids:[8, 400] +DEBUG 06-24 20:03:28 [manager.py:391] +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:148.37312698364258ms total_cost_time:148.41628074645996ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5423 prompt_cache_len:5151 prompt_cache_ratio:0.9498432601880877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:155.99584579467773ms total_cost_time:156.0525894165039ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:5380 prompt_cache_len:5145 prompt_cache_ratio:0.9563197026022305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 +DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:28 [batch.py:51] router release req id 8 +INFO 06-24 20:03:28 [batch.py:51] router release req id 400 +INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.08449220657348633 s +INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.06730389595031738 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.08582663536071777 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.06997990608215332 s +DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=247586718328975964051524694451745231976, time:1750766608.318872s req_ids:[8, 400] +DEBUG 06-24 20:03:28 [manager.py:391] +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:149.57404136657715ms total_cost_time:149.61743354797363ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5424 prompt_cache_len:5151 prompt_cache_ratio:0.9496681415929203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:159.9726676940918ms total_cost_time:160.02678871154785ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5381 prompt_cache_len:5145 prompt_cache_ratio:0.9561419810444155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 +DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:28 [batch.py:51] router release req id 8 +INFO 06-24 20:03:28 [batch.py:51] router release req id 400 +INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.091552734375 s +INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.06432533264160156 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.0930032730102539 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.0669867992401123 s +DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=337814848559277299921959166467115357587, time:1750766608.4799948s req_ids:[8, 400] +DEBUG 06-24 20:03:28 [manager.py:391] +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:199.8891830444336ms total_cost_time:199.93185997009277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5425 prompt_cache_len:5151 prompt_cache_ratio:0.9494930875576036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:326.31921768188477ms total_cost_time:326.37929916381836ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:5382 prompt_cache_len:5145 prompt_cache_ratio:0.9559643255295429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 +DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:28 [batch.py:51] router release req id 8 +INFO 06-24 20:03:28 [batch.py:51] router release req id 400 +INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.22453880310058594 s +INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.0705423355102539 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.22608232498168945 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.07349467277526855 s +DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=60154028013799029199347481649118869184, time:1750766608.818252s req_ids:[8, 400] +DEBUG 06-24 20:03:28 [manager.py:391] +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:300.0452518463135ms total_cost_time:300.08864402770996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5426 prompt_cache_len:5151 prompt_cache_ratio:0.9493180980464431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 +ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:165.14205932617188ms total_cost_time:165.19641876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5383 prompt_cache_len:5145 prompt_cache_ratio:0.9557867360208062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 +DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:28 [batch.py:51] router release req id 8 +INFO 06-24 20:03:28 [batch.py:51] router release req id 400 +INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.0726935863494873 s +INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.05223393440246582 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.07442879676818848 s +INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.0556330680847168 s +DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=154028495638990239280161464070004995579, time:1750766608.971124s req_ids:[8, 400] +DEBUG 06-24 20:03:28 [manager.py:391] +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:149.59239959716797ms total_cost_time:149.63722229003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5427 prompt_cache_len:5151 prompt_cache_ratio:0.94914317302377 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:131.31308555603027ms total_cost_time:131.3624382019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:5384 prompt_cache_len:5145 prompt_cache_ratio:0.9556092124814265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 +DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:29 [batch.py:51] router release req id 8 +INFO 06-24 20:03:29 [batch.py:51] router release req id 400 +INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.05073976516723633 s +INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.04285001754760742 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.052385807037353516 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.04587292671203613 s +DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=163135921896879067316561220007680792676, time:1750766609.1041005s req_ids:[8, 400] +DEBUG 06-24 20:03:29 [manager.py:391] +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:131.39724731445312ms total_cost_time:131.45852088928223ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:5428 prompt_cache_len:5151 prompt_cache_ratio:0.9489683124539425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:123.66914749145508ms total_cost_time:123.70848655700684ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5385 prompt_cache_len:5145 prompt_cache_ratio:0.9554317548746518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 +DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:29 [batch.py:51] router release req id 8 +INFO 06-24 20:03:29 [batch.py:51] router release req id 400 +INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.04875373840332031 s +INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.042389631271362305 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.05027008056640625 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.04531431198120117 s +DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=66245248201670615578028118172994514477, time:1750766609.2394724s req_ids:[8, 400] +DEBUG 06-24 20:03:29 [manager.py:391] +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:115.2949333190918ms total_cost_time:115.33713340759277ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5429 prompt_cache_len:5151 prompt_cache_ratio:0.9487935163013447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:122.15805053710938ms total_cost_time:122.20287322998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5386 prompt_cache_len:5145 prompt_cache_ratio:0.9552543631637579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 +DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:29 [batch.py:51] router release req id 8 +INFO 06-24 20:03:29 [batch.py:51] router release req id 400 +INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.061919212341308594 s +INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.048467159271240234 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.0633692741394043 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.05130314826965332 s +DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=20460130740402664492274137011520852336, time:1750766609.372251s req_ids:[8, 400] +DEBUG 06-24 20:03:29 [manager.py:391] +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:140.0299072265625ms total_cost_time:140.0902271270752ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:5430 prompt_cache_len:5151 prompt_cache_ratio:0.9486187845303867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:126.73234939575195ms total_cost_time:126.76453590393066ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:5387 prompt_cache_len:5145 prompt_cache_ratio:0.9550770373120475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 +DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:29 [batch.py:51] router release req id 8 +INFO 06-24 20:03:29 [batch.py:51] router release req id 400 +INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.042932748794555664 s +INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.036199331283569336 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.04449129104614258 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.03898000717163086 s +DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=183371005889916883990738994599749282125, time:1750766609.506414s req_ids:[8, 400] +DEBUG 06-24 20:03:29 [manager.py:391] +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:104.80642318725586ms total_cost_time:104.85124588012695ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5431 prompt_cache_len:5151 prompt_cache_ratio:0.9484441171055055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:118.35598945617676ms total_cost_time:118.4084415435791ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:5388 prompt_cache_len:5145 prompt_cache_ratio:0.9548997772828508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 +DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:29 [batch.py:51] router release req id 8 +INFO 06-24 20:03:29 [batch.py:51] router release req id 400 +INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.06884407997131348 s +INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.048369646072387695 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.07041501998901367 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.05144786834716797 s +DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=45311421111920125492293462332825522287, time:1750766609.6423876s req_ids:[8, 400] +DEBUG 06-24 20:03:29 [manager.py:391] +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:147.84669876098633ms total_cost_time:147.89390563964844ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:5432 prompt_cache_len:5151 prompt_cache_ratio:0.9482695139911634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:127.69746780395508ms total_cost_time:127.73752212524414ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5389 prompt_cache_len:5145 prompt_cache_ratio:0.954722583039525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 +INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 +DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:29 [batch.py:51] router release req id 8 +INFO 06-24 20:03:29 [batch.py:51] router release req id 400 +INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.20403599739074707 s +INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.1997208595275879 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.20572781562805176 s +INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.20275378227233887 s +DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=102080339307434120591735184785497978270, time:1750766609.9338508s req_ids:[8, 400] +DEBUG 06-24 20:03:29 [manager.py:391] +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:284.75284576416016ms total_cost_time:284.79504585266113ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5433 prompt_cache_len:5151 prompt_cache_ratio:0.9480949751518498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:280.3466320037842ms total_cost_time:280.37452697753906ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5390 prompt_cache_len:5145 prompt_cache_ratio:0.9545454545454546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 +DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:30 [batch.py:51] router release req id 8 +INFO 06-24 20:03:30 [batch.py:51] router release req id 400 +INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.0449519157409668 s +INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.03904891014099121 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.046533823013305664 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.04213857650756836 s +DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=126334105448398784146160656944666796615, time:1750766610.0661843s req_ids:[8, 400] +DEBUG 06-24 20:03:30 [manager.py:391] +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:104.65312004089355ms total_cost_time:104.68101501464844ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5434 prompt_cache_len:5151 prompt_cache_ratio:0.9479205005520795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:116.09530448913574ms total_cost_time:116.11747741699219ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:5391 prompt_cache_len:5145 prompt_cache_ratio:0.9543683917640512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 +DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:30 [batch.py:51] router release req id 8 +INFO 06-24 20:03:30 [batch.py:51] router release req id 400 +INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.06585979461669922 s +INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.04912996292114258 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.06747126579284668 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.05209231376647949 s +DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=242107810493557566891979709824189032318, time:1750766610.197466s req_ids:[8, 400] +DEBUG 06-24 20:03:30 [manager.py:391] +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:146.9252109527588ms total_cost_time:146.95191383361816ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5435 prompt_cache_len:5151 prompt_cache_ratio:0.9477460901563938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:129.90307807922363ms total_cost_time:129.91714477539062ms,out_token_counter:1 mean_per_token_cost_time: 0.014066696166992188ms prompt_token_num:5392 prompt_cache_len:5145 prompt_cache_ratio:0.9541913946587537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 +DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:30 [batch.py:51] router release req id 8 +INFO 06-24 20:03:30 [batch.py:51] router release req id 400 +INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.0483396053314209 s +INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.04661202430725098 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.049868106842041016 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.0494232177734375 s +DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=67592681871443124720309873873702244054, time:1750766610.334043s req_ids:[8, 400] +DEBUG 06-24 20:03:30 [manager.py:391] +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:126.35612487792969ms total_cost_time:126.39927864074707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5436 prompt_cache_len:5151 prompt_cache_ratio:0.9475717439293598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:124.53842163085938ms total_cost_time:124.56369400024414ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5393 prompt_cache_len:5145 prompt_cache_ratio:0.954014463193028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 +DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:30 [batch.py:51] router release req id 8 +INFO 06-24 20:03:30 [batch.py:51] router release req id 400 +INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.0446925163269043 s +INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.04045701026916504 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.046218156814575195 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.043189048767089844 s +DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=260155226503569480391415024607049133174, time:1750766610.4645112s req_ids:[8, 400] +DEBUG 06-24 20:03:30 [manager.py:391] +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:116.78838729858398ms total_cost_time:116.83058738708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5437 prompt_cache_len:5151 prompt_cache_ratio:0.947397461835571 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:123.0618953704834ms total_cost_time:123.09694290161133ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5394 prompt_cache_len:5145 prompt_cache_ratio:0.9538375973303671 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 +DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:30 [batch.py:51] router release req id 8 +INFO 06-24 20:03:30 [batch.py:51] router release req id 400 +INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.05352020263671875 s +INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.043241024017333984 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.0549314022064209 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.04600262641906738 s +DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=313234555886742557445646386306637271280, time:1750766610.5965967s req_ids:[8, 400] +DEBUG 06-24 20:03:30 [manager.py:391] +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:132.52925872802734ms total_cost_time:132.57145881652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5438 prompt_cache_len:5151 prompt_cache_ratio:0.947223243839647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:122.29228019714355ms total_cost_time:122.31922149658203ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5395 prompt_cache_len:5145 prompt_cache_ratio:0.953660797034291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 +DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:30 [batch.py:51] router release req id 8 +INFO 06-24 20:03:30 [batch.py:51] router release req id 400 +INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.04629039764404297 s +INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.03888344764709473 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.04778409004211426 s +INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.041953086853027344 s +DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=198433364997030551283595780849039249513, time:1750766610.732075s req_ids:[8, 400] +DEBUG 06-24 20:03:30 [manager.py:391] +DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:107.01417922973633ms total_cost_time:107.05995559692383ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5439 prompt_cache_len:5151 prompt_cache_ratio:0.9470490899062327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 +ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:304.6762943267822ms total_cost_time:304.7182559967041ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5396 prompt_cache_len:5145 prompt_cache_ratio:0.9534840622683469 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 +DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:31 [batch.py:51] router release req id 8 +INFO 06-24 20:03:31 [batch.py:51] router release req id 400 +INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.2552757263183594 s +INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.050196170806884766 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.25688695907592773 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.05312943458557129 s +DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=8463104666033899046106122913629846796, time:1750766611.0526986s req_ids:[8, 400] +DEBUG 06-24 20:03:31 [manager.py:391] +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:333.13965797424316ms total_cost_time:333.18352699279785ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5440 prompt_cache_len:5151 prompt_cache_ratio:0.946875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:128.03030014038086ms total_cost_time:128.05986404418945ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:5397 prompt_cache_len:5145 prompt_cache_ratio:0.953307392996109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 +DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:31 [batch.py:51] router release req id 8 +INFO 06-24 20:03:31 [batch.py:51] router release req id 400 +INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.04332733154296875 s +INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.03737449645996094 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.044718265533447266 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.04013657569885254 s +DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=88066736761649289802742310716448559315, time:1750766611.1830666s req_ids:[8, 400] +DEBUG 06-24 20:03:31 [manager.py:391] +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:110.96954345703125ms total_cost_time:110.99648475646973ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5441 prompt_cache_len:5151 prompt_cache_ratio:0.946700974085646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:116.01614952087402ms total_cost_time:116.05310440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5398 prompt_cache_len:5145 prompt_cache_ratio:0.9531307891811782 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 +DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:31 [batch.py:51] router release req id 8 +INFO 06-24 20:03:31 [batch.py:51] router release req id 400 +INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.0576629638671875 s +INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.047483205795288086 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.05909895896911621 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.05029010772705078 s +DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=281492459761609073606103375947265986960, time:1750766611.3139722s req_ids:[8, 400] +DEBUG 06-24 20:03:31 [manager.py:391] +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:140.0768756866455ms total_cost_time:140.12837409973145ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:5442 prompt_cache_len:5151 prompt_cache_ratio:0.9465270121278941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:130.11527061462402ms total_cost_time:130.15127182006836ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5399 prompt_cache_len:5145 prompt_cache_ratio:0.9529542507871828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 +DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:31 [batch.py:51] router release req id 8 +INFO 06-24 20:03:31 [batch.py:51] router release req id 400 +INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.04406142234802246 s +INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.03776431083679199 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.045690059661865234 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.04068279266357422 s +DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=762836077333495209866306699082543465, time:1750766611.449527s req_ids:[8, 400] +DEBUG 06-24 20:03:31 [manager.py:391] +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:104.22706604003906ms total_cost_time:104.27021980285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5443 prompt_cache_len:5151 prompt_cache_ratio:0.9463531140914937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:115.49568176269531ms total_cost_time:115.53430557250977ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5400 prompt_cache_len:5145 prompt_cache_ratio:0.9527777777777777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 +DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:31 [batch.py:51] router release req id 8 +INFO 06-24 20:03:31 [batch.py:51] router release req id 400 +INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.06519579887390137 s +INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.04776334762573242 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.06674647331237793 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.05061507225036621 s +DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=21300543656571417567429651290240614390, time:1750766611.5809498s req_ids:[8, 400] +DEBUG 06-24 20:03:31 [manager.py:391] +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:145.11632919311523ms total_cost_time:145.16019821166992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5444 prompt_cache_len:5151 prompt_cache_ratio:0.9461792799412196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:127.64120101928711ms total_cost_time:127.66695022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5401 prompt_cache_len:5145 prompt_cache_ratio:0.952601370116645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 +DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:31 [batch.py:51] router release req id 8 +INFO 06-24 20:03:31 [batch.py:51] router release req id 400 +INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.20452284812927246 s +INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.20039844512939453 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.20624089241027832 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.20335030555725098 s +DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=20733769253323551879912948740146819659, time:1750766611.8724234s req_ids:[8, 400] +DEBUG 06-24 20:03:31 [manager.py:391] +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:282.98497200012207ms total_cost_time:283.02764892578125ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5445 prompt_cache_len:5151 prompt_cache_ratio:0.9460055096418732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:278.6588668823242ms total_cost_time:278.6848545074463ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5402 prompt_cache_len:5145 prompt_cache_ratio:0.9524250277674935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 +INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 +DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:31 [batch.py:51] router release req id 8 +INFO 06-24 20:03:31 [batch.py:51] router release req id 400 +INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.0439298152923584 s +INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.038420677185058594 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.046257734298706055 s +INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.04876399040222168 s +DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=337857081280152104816477595388679153727, time:1750766612.004838s req_ids:[8, 400] +DEBUG 06-24 20:03:32 [manager.py:391] +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:112.43987083435059ms total_cost_time:112.48183250427246ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5446 prompt_cache_len:5151 prompt_cache_ratio:0.9458318031582813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:119.09031867980957ms total_cost_time:119.12846565246582ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5403 prompt_cache_len:5145 prompt_cache_ratio:0.9522487506940589 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 +DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:32 [batch.py:51] router release req id 8 +INFO 06-24 20:03:32 [batch.py:51] router release req id 400 +INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.058426618576049805 s +INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.047699689865112305 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.059938669204711914 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.05072283744812012 s +DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=228066695473364257078105924335940953061, time:1750766612.1377354s req_ids:[8, 400] +DEBUG 06-24 20:03:32 [manager.py:391] +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:136.1987590789795ms total_cost_time:136.25669479370117ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5447 prompt_cache_len:5151 prompt_cache_ratio:0.9456581604552965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:125.73456764221191ms total_cost_time:125.77176094055176ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5404 prompt_cache_len:5145 prompt_cache_ratio:0.9520725388601037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 +DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:32 [batch.py:51] router release req id 8 +INFO 06-24 20:03:32 [batch.py:51] router release req id 400 +INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.04080080986022949 s +INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.03333711624145508 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.04228067398071289 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.036275625228881836 s +DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=247604284250055934613918963996553235052, time:1750766612.2668216s req_ids:[8, 400] +DEBUG 06-24 20:03:32 [manager.py:391] +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:104.88533973693848ms total_cost_time:104.92920875549316ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5448 prompt_cache_len:5151 prompt_cache_ratio:0.9454845814977973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:111.44757270812988ms total_cost_time:111.49168014526367ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5405 prompt_cache_len:5145 prompt_cache_ratio:0.9518963922294172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 +DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:32 [batch.py:51] router release req id 8 +INFO 06-24 20:03:32 [batch.py:51] router release req id 400 +INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.06156516075134277 s +INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.047151803970336914 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.06313347816467285 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.0499722957611084 s +DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=174429969657558533135303064167703084055, time:1750766612.3980412s req_ids:[8, 400] +DEBUG 06-24 20:03:32 [manager.py:391] +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:148.0262279510498ms total_cost_time:148.0691432952881ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5449 prompt_cache_len:5151 prompt_cache_ratio:0.9453110662506882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:145.70903778076172ms total_cost_time:145.75815200805664ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:5406 prompt_cache_len:5145 prompt_cache_ratio:0.9517203107658158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 +DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:32 [batch.py:51] router release req id 8 +INFO 06-24 20:03:32 [batch.py:51] router release req id 400 +INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.07125639915466309 s +INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.05977034568786621 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.0728905200958252 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.06276249885559082 s +DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=239972212520793441422113569732139637729, time:1750766612.5604982s req_ids:[8, 400] +DEBUG 06-24 20:03:32 [manager.py:391] +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:149.22571182250977ms total_cost_time:149.26958084106445ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5450 prompt_cache_len:5151 prompt_cache_ratio:0.9451376146788991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:151.5524387359619ms total_cost_time:151.60584449768066ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:5407 prompt_cache_len:5145 prompt_cache_ratio:0.9515442944331423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 +DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:32 [batch.py:51] router release req id 8 +INFO 06-24 20:03:32 [batch.py:51] router release req id 400 +INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.07585835456848145 s +INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.06166410446166992 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.07739901542663574 s +INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.06461405754089355 s +DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=145266479497705978809128689037259388380, time:1750766612.7202573s req_ids:[8, 400] +DEBUG 06-24 20:03:32 [manager.py:391] +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:149.8281955718994ms total_cost_time:149.8720645904541ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5451 prompt_cache_len:5151 prompt_cache_ratio:0.9449642267473858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 +ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:155.55930137634277ms total_cost_time:155.61223030090332ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:5408 prompt_cache_len:5145 prompt_cache_ratio:0.9513683431952663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 +DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:33 [batch.py:51] router release req id 8 +INFO 06-24 20:03:33 [batch.py:51] router release req id 400 +INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.24834465980529785 s +INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.22836756706237793 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.2501695156097412 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.23155927658081055 s +DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=231897547603849720267055709208481616009, time:1750766613.0471766s req_ids:[8, 400] +DEBUG 06-24 20:03:33 [manager.py:391] +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:341.75777435302734ms total_cost_time:341.80331230163574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5452 prompt_cache_len:5151 prompt_cache_ratio:0.9447909024211298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:321.9008445739746ms total_cost_time:321.9411373138428ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5409 prompt_cache_len:5145 prompt_cache_ratio:0.9511924570160843 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 +DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:33 [batch.py:51] router release req id 8 +INFO 06-24 20:03:33 [batch.py:51] router release req id 400 +INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.05870652198791504 s +INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.054108381271362305 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.06033134460449219 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.056955814361572266 s +DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=228439960587663879943879096787604973611, time:1750766613.20849s req_ids:[8, 400] +DEBUG 06-24 20:03:33 [manager.py:391] +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:151.4911651611328ms total_cost_time:151.5507698059082ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5453 prompt_cache_len:5151 prompt_cache_ratio:0.9446176416651385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:146.81506156921387ms total_cost_time:146.85988426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5410 prompt_cache_len:5145 prompt_cache_ratio:0.9510166358595195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 +DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:33 [batch.py:51] router release req id 8 +INFO 06-24 20:03:33 [batch.py:51] router release req id 400 +INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.05869030952453613 s +INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.05304574966430664 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.06087470054626465 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.057590484619140625 s +DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=58569480998902752068275225342265434688, time:1750766613.3693695s req_ids:[8, 400] +DEBUG 06-24 20:03:33 [manager.py:391] +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:144.19245719909668ms total_cost_time:144.23441886901855ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5454 prompt_cache_len:5151 prompt_cache_ratio:0.9444444444444444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:151.98588371276855ms total_cost_time:152.04191207885742ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:5411 prompt_cache_len:5145 prompt_cache_ratio:0.9508408796895214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 +DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:33 [batch.py:51] router release req id 8 +INFO 06-24 20:03:33 [batch.py:51] router release req id 400 +INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.07071161270141602 s +INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.057137489318847656 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.07224273681640625 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.05996823310852051 s +DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=236890264408260102127111188309508912438, time:1750766613.5314384s req_ids:[8, 400] +DEBUG 06-24 20:03:33 [manager.py:391] +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:146.73113822937012ms total_cost_time:146.7747688293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5455 prompt_cache_len:5151 prompt_cache_ratio:0.9442713107241063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:150.47836303710938ms total_cost_time:150.5270004272461ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:5412 prompt_cache_len:5145 prompt_cache_ratio:0.9506651884700665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 +DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:33 [batch.py:51] router release req id 8 +INFO 06-24 20:03:33 [batch.py:51] router release req id 400 +INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.07972478866577148 s +INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.062406301498413086 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.08193325996398926 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.06684494018554688 s +DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=303902784344703244612003560601594996873, time:1750766613.6913793s req_ids:[8, 400] +DEBUG 06-24 20:03:33 [manager.py:391] +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:151.5970230102539ms total_cost_time:151.6411304473877ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5456 prompt_cache_len:5151 prompt_cache_ratio:0.9440982404692082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:153.2003879547119ms total_cost_time:153.25331687927246ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:5413 prompt_cache_len:5145 prompt_cache_ratio:0.950489562165158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 +DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:33 [batch.py:51] router release req id 8 +INFO 06-24 20:03:33 [batch.py:51] router release req id 400 +INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.08222222328186035 s +INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.06279706954956055 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.08446049690246582 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.06739950180053711 s +DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=36138175624287798503941517702568473641, time:1750766613.8523045s req_ids:[8, 400] +DEBUG 06-24 20:03:33 [manager.py:391] +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:151.93939208984375ms total_cost_time:151.98349952697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5457 prompt_cache_len:5151 prompt_cache_ratio:0.9439252336448598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 +ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:156.13746643066406ms total_cost_time:156.1887264251709ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:5414 prompt_cache_len:5145 prompt_cache_ratio:0.9503140007388252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 +DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:33 [batch.py:51] router release req id 8 +INFO 06-24 20:03:33 [batch.py:51] router release req id 400 +INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.08626484870910645 s +INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.0625617504119873 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.08783483505249023 s +INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.06550312042236328 s +DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=226418088539405689152697069365719665906, time:1750766614.0139081s req_ids:[8, 400] +DEBUG 06-24 20:03:34 [manager.py:391] +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:150.85697174072266ms total_cost_time:150.89941024780273ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5458 prompt_cache_len:5151 prompt_cache_ratio:0.9437522902161964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:155.23576736450195ms total_cost_time:155.27749061584473ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5415 prompt_cache_len:5145 prompt_cache_ratio:0.9501385041551247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 +DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:34 [batch.py:51] router release req id 8 +INFO 06-24 20:03:34 [batch.py:51] router release req id 400 +INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.263899564743042 s +INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.23550772666931152 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.2656059265136719 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.2386183738708496 s +DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=329762443465025919669621080661930544679, time:1750766614.346624s req_ids:[8, 400] +DEBUG 06-24 20:03:34 [manager.py:391] +DEBUG 06-24 20:03:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 62610.791 tokens/s +DEBUG 06-24 20:03:34 [stats.py:37] Avg prompt tokens throughput: 62587.647 tokens/s +DEBUG 06-24 20:03:34 [stats.py:37] Avg generate tokens throughput: 23.144 tokens/s +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:349.78556632995605ms total_cost_time:349.895715713501ms,out_token_counter:1 mean_per_token_cost_time: 0.11014938354492188ms prompt_token_num:5459 prompt_cache_len:5151 prompt_cache_ratio:0.9435794101483789 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:325.7250785827637ms total_cost_time:325.7758617401123ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:5416 prompt_cache_len:5145 prompt_cache_ratio:0.9499630723781388 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 +DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:34 [batch.py:51] router release req id 8 +INFO 06-24 20:03:34 [batch.py:51] router release req id 400 +INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.06442093849182129 s +INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.05374312400817871 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.06600570678710938 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.05669116973876953 s +DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=195730936274349755573355273465168684015, time:1750766614.5051832s req_ids:[8, 400] +DEBUG 06-24 20:03:34 [manager.py:391] +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:144.65665817260742ms total_cost_time:144.6988582611084ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5460 prompt_cache_len:5151 prompt_cache_ratio:0.9434065934065934 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:150.8946418762207ms total_cost_time:150.94923973083496ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:5417 prompt_cache_len:5145 prompt_cache_ratio:0.9497877053719771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 +DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:34 [batch.py:51] router release req id 8 +INFO 06-24 20:03:34 [batch.py:51] router release req id 400 +INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.0860910415649414 s +INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.06761026382446289 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.0875704288482666 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.07041740417480469 s +DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=181565894098771917202476306658473994742, time:1750766614.6742842s req_ids:[8, 400] +DEBUG 06-24 20:03:34 [manager.py:391] +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:150.1750946044922ms total_cost_time:150.22039413452148ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5461 prompt_cache_len:5151 prompt_cache_ratio:0.943233839956052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:158.97560119628906ms total_cost_time:159.0280532836914ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:5418 prompt_cache_len:5145 prompt_cache_ratio:0.9496124031007752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 +DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:34 [batch.py:51] router release req id 8 +INFO 06-24 20:03:34 [batch.py:51] router release req id 400 +INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.09009885787963867 s +INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.0627443790435791 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.0922689437866211 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.06710219383239746 s +DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=120703852401807362131516768463402938812, time:1750766614.8360612s req_ids:[8, 400] +DEBUG 06-24 20:03:34 [manager.py:391] +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:151.49259567260742ms total_cost_time:151.5340805053711ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5462 prompt_cache_len:5151 prompt_cache_ratio:0.943061149761992 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 +ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:157.11140632629395ms total_cost_time:157.1497917175293ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5419 prompt_cache_len:5145 prompt_cache_ratio:0.9494371655286953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 +DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:34 [batch.py:51] router release req id 8 +INFO 06-24 20:03:34 [batch.py:51] router release req id 400 +INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.09524822235107422 s +INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.06283330917358398 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.09691023826599121 s +INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.06598091125488281 s +DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=138612450913366828381385947570486151533, time:1750766614.9981973s req_ids:[8, 400] +DEBUG 06-24 20:03:34 [manager.py:391] +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:190.29664993286133ms total_cost_time:190.338134765625ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5463 prompt_cache_len:5151 prompt_cache_ratio:0.942888522789676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:157.81736373901367ms total_cost_time:157.84311294555664ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5420 prompt_cache_len:5145 prompt_cache_ratio:0.9492619926199262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 +DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:35 [batch.py:51] router release req id 8 +INFO 06-24 20:03:35 [batch.py:51] router release req id 400 +INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.05727028846740723 s +INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.051714420318603516 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.0588831901550293 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.05479764938354492 s +DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=73263162190928750566617320477825427881, time:1750766615.1584284s req_ids:[8, 400] +DEBUG 06-24 20:03:35 [manager.py:391] +DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:151.34024620056152ms total_cost_time:151.38506889343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5464 prompt_cache_len:5151 prompt_cache_ratio:0.9427159590043924 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:145.8280086517334ms total_cost_time:145.8570957183838ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:5421 prompt_cache_len:5145 prompt_cache_ratio:0.9490868843386829 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 +DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:35 [batch.py:51] router release req id 8 +INFO 06-24 20:03:35 [batch.py:51] router release req id 400 +INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.05920004844665527 s +INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.054059743881225586 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.06071758270263672 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.05686688423156738 s +DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=316947990396636921151122982380940675288, time:1750766615.3188865s req_ids:[8, 400] +DEBUG 06-24 20:03:35 [manager.py:391] +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:146.7444896697998ms total_cost_time:146.7874050140381ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5465 prompt_cache_len:5151 prompt_cache_ratio:0.9425434583714547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:311.0802173614502ms total_cost_time:311.1400604248047ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5422 prompt_cache_len:5145 prompt_cache_ratio:0.948911840649207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 +DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:35 [batch.py:51] router release req id 8 +INFO 06-24 20:03:35 [batch.py:51] router release req id 400 +INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.21719074249267578 s +INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.04756736755371094 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.2189335823059082 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.05070018768310547 s +DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=9189670993887045612737822298060446211, time:1750766615.6310532s req_ids:[8, 400] +DEBUG 06-24 20:03:35 [manager.py:391] +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:298.3372211456299ms total_cost_time:298.3822822570801ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5466 prompt_cache_len:5151 prompt_cache_ratio:0.942371020856202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:128.6458969116211ms total_cost_time:128.67283821105957ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5423 prompt_cache_len:5145 prompt_cache_ratio:0.9487368615157662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 +DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:35 [batch.py:51] router release req id 8 +INFO 06-24 20:03:35 [batch.py:51] router release req id 400 +INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.04225039482116699 s +INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.03630948066711426 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.04439592361450195 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.04091238975524902 s +DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=51538974472427304713807193592513952491, time:1750766615.7622607s req_ids:[8, 400] +DEBUG 06-24 20:03:35 [manager.py:391] +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:96.61436080932617ms total_cost_time:96.65513038635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5467 prompt_cache_len:5151 prompt_cache_ratio:0.9421986464239985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:114.78972434997559ms total_cost_time:114.82858657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5424 prompt_cache_len:5145 prompt_cache_ratio:0.9485619469026548 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 +DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:35 [batch.py:51] router release req id 8 +INFO 06-24 20:03:35 [batch.py:51] router release req id 400 +INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.07087564468383789 s +INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.047078847885131836 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.07310366630554199 s +INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.051564931869506836 s +DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=185429478749449503319306454850183956330, time:1750766615.891475s req_ids:[8, 400] +DEBUG 06-24 20:03:35 [manager.py:391] +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:148.2698917388916ms total_cost_time:148.31233024597168ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5468 prompt_cache_len:5151 prompt_cache_ratio:0.9420263350402341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:124.44233894348145ms total_cost_time:124.46975708007812ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5425 prompt_cache_len:5145 prompt_cache_ratio:0.9483870967741935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 +INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 +DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:36 [batch.py:51] router release req id 8 +INFO 06-24 20:03:36 [batch.py:51] router release req id 400 +INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.04348111152648926 s +INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.03818321228027344 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.0456089973449707 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04280877113342285 s +DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=210000177241489038742211947974033471283, time:1750766616.0221558s req_ids:[8, 400] +DEBUG 06-24 20:03:36 [manager.py:391] +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:97.84054756164551ms total_cost_time:97.88155555725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5469 prompt_cache_len:5151 prompt_cache_ratio:0.9418540866703237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:117.5074577331543ms total_cost_time:117.54536628723145ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5426 prompt_cache_len:5145 prompt_cache_ratio:0.948212311094729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 +DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:36 [batch.py:51] router release req id 8 +INFO 06-24 20:03:36 [batch.py:51] router release req id 400 +INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.07159090042114258 s +INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.0473637580871582 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.07374429702758789 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.051725149154663086 s +DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=162569698941648703211287072808277563915, time:1750766616.153649s req_ids:[8, 400] +DEBUG 06-24 20:03:36 [manager.py:391] +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:148.31829071044922ms total_cost_time:148.3612060546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5470 prompt_cache_len:5151 prompt_cache_ratio:0.9416819012797075 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:126.03974342346191ms total_cost_time:126.08981132507324ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:5427 prompt_cache_len:5145 prompt_cache_ratio:0.9480375898286346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 +DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:36 [batch.py:51] router release req id 8 +INFO 06-24 20:03:36 [batch.py:51] router release req id 400 +INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.2066042423248291 s +INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.1993255615234375 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.20892548561096191 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.20395541191101074 s +DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=130010109753787456219633016630110609604, time:1750766616.4430048s req_ids:[8, 400] +DEBUG 06-24 20:03:36 [manager.py:391] +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:286.9291305541992ms total_cost_time:286.9884967803955ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5471 prompt_cache_len:5151 prompt_cache_ratio:0.9415097788338512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:279.76536750793457ms total_cost_time:279.7966003417969ms,out_token_counter:1 mean_per_token_cost_time: 0.031232833862304688ms prompt_token_num:5428 prompt_cache_len:5145 prompt_cache_ratio:0.9478629329403095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 +DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:36 [batch.py:51] router release req id 8 +INFO 06-24 20:03:36 [batch.py:51] router release req id 400 +INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.04249286651611328 s +INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.03691506385803223 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.04454183578491211 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04138541221618652 s +DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=30932026737940502576981443611834848389, time:1750766616.5737557s req_ids:[8, 400] +DEBUG 06-24 20:03:36 [manager.py:391] +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:109.66634750366211ms total_cost_time:109.7111701965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5472 prompt_cache_len:5151 prompt_cache_ratio:0.9413377192982456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:115.7221794128418ms total_cost_time:115.76342582702637ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5429 prompt_cache_len:5145 prompt_cache_ratio:0.9476883403941794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 +DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:36 [batch.py:51] router release req id 8 +INFO 06-24 20:03:36 [batch.py:51] router release req id 400 +INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.05890941619873047 s +INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.0468592643737793 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.06042885780334473 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04991865158081055 s +DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=43002134030329428879573188245239996644, time:1750766616.7063823s req_ids:[8, 400] +DEBUG 06-24 20:03:36 [manager.py:391] +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:140.33913612365723ms total_cost_time:140.40088653564453ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5473 prompt_cache_len:5151 prompt_cache_ratio:0.9411657226384067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:128.58343124389648ms total_cost_time:128.62610816955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5430 prompt_cache_len:5145 prompt_cache_ratio:0.9475138121546961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 +DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:36 [batch.py:51] router release req id 8 +INFO 06-24 20:03:36 [batch.py:51] router release req id 400 +INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.043108463287353516 s +INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.03757143020629883 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.04462027549743652 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04056262969970703 s +DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=56348947202715350430636309754883468755, time:1750766616.8402448s req_ids:[8, 400] +DEBUG 06-24 20:03:36 [manager.py:391] +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:104.10261154174805ms total_cost_time:104.1266918182373ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:5474 prompt_cache_len:5151 prompt_cache_ratio:0.9409937888198758 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 +ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:135.00475883483887ms total_cost_time:135.0269317626953ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:5431 prompt_cache_len:5145 prompt_cache_ratio:0.9473393481863377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 +DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:36 [batch.py:51] router release req id 8 +INFO 06-24 20:03:36 [batch.py:51] router release req id 400 +INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.10050511360168457 s +INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.06466460227966309 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.1022646427154541 s +INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.06761837005615234 s +DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=48142095797838133697149078602854040769, time:1750766617.006159s req_ids:[8, 400] +DEBUG 06-24 20:03:37 [manager.py:391] +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:196.36249542236328ms total_cost_time:196.38848304748535ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5475 prompt_cache_len:5151 prompt_cache_ratio:0.9408219178082192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:160.03799438476562ms total_cost_time:160.05229949951172ms,out_token_counter:1 mean_per_token_cost_time: 0.01430511474609375ms prompt_token_num:5432 prompt_cache_len:5145 prompt_cache_ratio:0.9471649484536082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 +DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:37 [batch.py:51] router release req id 8 +INFO 06-24 20:03:37 [batch.py:51] router release req id 400 +INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.06491494178771973 s +INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.06207680702209473 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.0670013427734375 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.06541562080383301 s +DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=49102353570290562279437135788005333577, time:1750766617.1713684s req_ids:[8, 400] +DEBUG 06-24 20:03:37 [manager.py:391] +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:150.54059028625488ms total_cost_time:150.56657791137695ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5476 prompt_cache_len:5151 prompt_cache_ratio:0.9406501095690285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:157.32622146606445ms total_cost_time:157.3467254638672ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:5433 prompt_cache_len:5145 prompt_cache_ratio:0.9469906129210381 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 +DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:37 [batch.py:51] router release req id 8 +INFO 06-24 20:03:37 [batch.py:51] router release req id 400 +INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.07412052154541016 s +INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.06517767906188965 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.07586669921875 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.06809163093566895 s +DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=158460628801869444412491162713163251356, time:1750766617.3367357s req_ids:[8, 400] +DEBUG 06-24 20:03:37 [manager.py:391] +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:149.97220039367676ms total_cost_time:149.99794960021973ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5477 prompt_cache_len:5151 prompt_cache_ratio:0.9404783640679204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:161.37051582336426ms total_cost_time:161.3912582397461ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:5434 prompt_cache_len:5145 prompt_cache_ratio:0.9468163415531836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 +DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:37 [batch.py:51] router release req id 8 +INFO 06-24 20:03:37 [batch.py:51] router release req id 400 +INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.267611026763916 s +INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.24760913848876953 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.2696850299835205 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.2509434223175049 s +DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=322497241890493419874620467040701848401, time:1750766617.6848779s req_ids:[8, 400] +DEBUG 06-24 20:03:37 [manager.py:391] +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:350.9180545806885ms total_cost_time:350.9635925292969ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5478 prompt_cache_len:5151 prompt_cache_ratio:0.9403066812705367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:343.2471752166748ms total_cost_time:343.28627586364746ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5435 prompt_cache_len:5145 prompt_cache_ratio:0.9466421343146274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 +DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:37 [batch.py:51] router release req id 8 +INFO 06-24 20:03:37 [batch.py:51] router release req id 400 +INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.07410836219787598 s +INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.06276607513427734 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.07597732543945312 s +INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.06632637977600098 s +DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=94719355295057164348625767212695700820, time:1750766617.8478243s req_ids:[8, 400] +DEBUG 06-24 20:03:37 [manager.py:391] +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:149.43575859069824ms total_cost_time:149.48034286499023ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5479 prompt_cache_len:5151 prompt_cache_ratio:0.9401350611425443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 +ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:164.99781608581543ms total_cost_time:165.0540828704834ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:5436 prompt_cache_len:5145 prompt_cache_ratio:0.9464679911699779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 +DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:38 [batch.py:51] router release req id 8 +INFO 06-24 20:03:38 [batch.py:51] router release req id 400 +INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.10403227806091309 s +INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.0771474838256836 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.1065359115600586 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.08168745040893555 s +DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=169128198364472471035314465327019068800, time:1750766618.0340252s req_ids:[8, 400] +DEBUG 06-24 20:03:38 [manager.py:391] +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:199.70464706420898ms total_cost_time:199.74923133850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5480 prompt_cache_len:5151 prompt_cache_ratio:0.939963503649635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:222.3520278930664ms total_cost_time:222.3968505859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5437 prompt_cache_len:5145 prompt_cache_ratio:0.9462939120838698 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 +DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:38 [batch.py:51] router release req id 8 +INFO 06-24 20:03:38 [batch.py:51] router release req id 400 +INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.11401057243347168 s +INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06454849243164062 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.11652994155883789 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06919145584106445 s +DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=40486159350295905578338550203234388261, time:1750766618.2483828s req_ids:[8, 400] +DEBUG 06-24 20:03:38 [manager.py:391] +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:200.3791332244873ms total_cost_time:200.425386428833ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5481 prompt_cache_len:5151 prompt_cache_ratio:0.939792008757526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:163.77830505371094ms total_cost_time:163.8178825378418ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5438 prompt_cache_len:5145 prompt_cache_ratio:0.9461198970209636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 +DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:38 [batch.py:51] router release req id 8 +INFO 06-24 20:03:38 [batch.py:51] router release req id 400 +INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.07290983200073242 s +INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06025242805480957 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.07513570785522461 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06480908393859863 s +DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=301123155516952396020871412387181398298, time:1750766618.414426s req_ids:[8, 400] +DEBUG 06-24 20:03:38 [manager.py:391] +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:149.09625053405762ms total_cost_time:149.1401195526123ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5482 prompt_cache_len:5151 prompt_cache_ratio:0.9396205764319592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:153.53655815124512ms total_cost_time:153.57685089111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5439 prompt_cache_len:5145 prompt_cache_ratio:0.9459459459459459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 +DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:38 [batch.py:51] router release req id 8 +INFO 06-24 20:03:38 [batch.py:51] router release req id 400 +INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.0793769359588623 s +INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06363534927368164 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.08170771598815918 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06795048713684082 s +DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=116750067881656625557120399952146984721, time:1750766618.5762854s req_ids:[8, 400] +DEBUG 06-24 20:03:38 [manager.py:391] +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:150.56800842285156ms total_cost_time:150.61283111572266ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5483 prompt_cache_len:5151 prompt_cache_ratio:0.9394492066387015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:159.74164009094238ms total_cost_time:159.78264808654785ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5440 prompt_cache_len:5145 prompt_cache_ratio:0.9457720588235294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 +DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:38 [batch.py:51] router release req id 8 +INFO 06-24 20:03:38 [batch.py:51] router release req id 400 +INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.08829593658447266 s +INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06318473815917969 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.09060263633728027 s +INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06763386726379395 s +DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=278416267166014473420255664567043084141, time:1750766618.7411492s req_ids:[8, 400] +DEBUG 06-24 20:03:38 [manager.py:391] +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:190.9964084625244ms total_cost_time:191.0405158996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5484 prompt_cache_len:5151 prompt_cache_ratio:0.9392778993435449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:165.68994522094727ms total_cost_time:165.71617126464844ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5441 prompt_cache_len:5145 prompt_cache_ratio:0.9455982356184525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 +INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 +DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:39 [batch.py:51] router release req id 8 +INFO 06-24 20:03:39 [batch.py:51] router release req id 400 +INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.2627263069152832 s +INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.25689268112182617 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.2648928165435791 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.26111268997192383 s +DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=129706347939593724599986550451753117018, time:1750766619.1145344s req_ids:[8, 400] +DEBUG 06-24 20:03:39 [manager.py:391] +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:355.96179962158203ms total_cost_time:356.0066223144531ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5485 prompt_cache_len:5151 prompt_cache_ratio:0.9391066545123062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:349.85852241516113ms total_cost_time:349.8833179473877ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5442 prompt_cache_len:5145 prompt_cache_ratio:0.9454244762954797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 +DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:39 [batch.py:51] router release req id 8 +INFO 06-24 20:03:39 [batch.py:51] router release req id 400 +INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.05801534652709961 s +INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.052634239196777344 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.05999279022216797 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.05617570877075195 s +DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=87146945186067301067864858556266369722, time:1750766619.275268s req_ids:[8, 400] +DEBUG 06-24 20:03:39 [manager.py:391] +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:146.03495597839355ms total_cost_time:146.0568904876709ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:5486 prompt_cache_len:5151 prompt_cache_ratio:0.9389354721108275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:154.07395362854004ms total_cost_time:154.0968418121338ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:5443 prompt_cache_len:5145 prompt_cache_ratio:0.9452507808194011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 +DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:39 [batch.py:51] router release req id 8 +INFO 06-24 20:03:39 [batch.py:51] router release req id 400 +INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.07618451118469238 s +INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.06422042846679688 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.07848453521728516 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.06868958473205566 s +DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=3134814338491348743743000294539162370, time:1750766619.4442506s req_ids:[8, 400] +DEBUG 06-24 20:03:39 [manager.py:391] +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:150.85291862487793ms total_cost_time:150.895357131958ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5487 prompt_cache_len:5151 prompt_cache_ratio:0.9387643521049754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:166.31817817687988ms total_cost_time:166.36157035827637ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5444 prompt_cache_len:5145 prompt_cache_ratio:0.945077149155033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 +DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:39 [batch.py:51] router release req id 8 +INFO 06-24 20:03:39 [batch.py:51] router release req id 400 +INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.08937716484069824 s +INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.06246542930603027 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.09199929237365723 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.06730175018310547 s +DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=6247217228465912397711997017917951727, time:1750766619.6143165s req_ids:[8, 400] +DEBUG 06-24 20:03:39 [manager.py:391] +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:152.59122848510742ms total_cost_time:152.63652801513672ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5488 prompt_cache_len:5151 prompt_cache_ratio:0.9385932944606414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:159.09409523010254ms total_cost_time:159.13748741149902ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5445 prompt_cache_len:5145 prompt_cache_ratio:0.9449035812672176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 +DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:39 [batch.py:51] router release req id 8 +INFO 06-24 20:03:39 [batch.py:51] router release req id 400 +INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.09504103660583496 s +INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.06266522407531738 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.09713959693908691 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.06722712516784668 s +DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=310611555034683849966821181303323645239, time:1750766619.7790356s req_ids:[8, 400] +DEBUG 06-24 20:03:39 [manager.py:391] +DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:190.79136848449707ms total_cost_time:190.83523750305176ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5489 prompt_cache_len:5151 prompt_cache_ratio:0.938422299143742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:158.48040580749512ms total_cost_time:158.5063934326172ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5446 prompt_cache_len:5145 prompt_cache_ratio:0.9447300771208226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 +INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 +DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:39 [batch.py:51] router release req id 8 +INFO 06-24 20:03:39 [batch.py:51] router release req id 400 +INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.058948516845703125 s +INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.053785085678100586 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.061005353927612305 s +INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.05808091163635254 s +DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=253867498428063382271975746685462740671, time:1750766619.9419792s req_ids:[8, 400] +DEBUG 06-24 20:03:39 [manager.py:391] +ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:153.4113883972168ms total_cost_time:153.4566879272461ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5490 prompt_cache_len:5151 prompt_cache_ratio:0.9382513661202185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:148.25987815856934ms total_cost_time:148.2863426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5447 prompt_cache_len:5145 prompt_cache_ratio:0.9445566366807417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 +DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:40 [batch.py:51] router release req id 8 +INFO 06-24 20:03:40 [batch.py:51] router release req id 400 +INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.058005571365356445 s +INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.05278468132019043 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.05971884727478027 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.054704904556274414 s +DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=115700462757155041086192297804250255747, time:1750766620.1042194s req_ids:[8, 400] +DEBUG 06-24 20:03:40 [manager.py:391] +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:145.2500820159912ms total_cost_time:145.2944278717041ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5491 prompt_cache_len:5151 prompt_cache_ratio:0.9380804953560371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:298.1858253479004ms total_cost_time:298.2308864593506ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5448 prompt_cache_len:5145 prompt_cache_ratio:0.9443832599118943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 +DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:40 [batch.py:51] router release req id 8 +INFO 06-24 20:03:40 [batch.py:51] router release req id 400 +INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.20426201820373535 s +INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.04655003547668457 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.20604443550109863 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.0496368408203125 s +DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=149437139165406844475054096712746950522, time:1750766620.3992245s req_ids:[8, 400] +DEBUG 06-24 20:03:40 [manager.py:391] +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:281.9509506225586ms total_cost_time:281.994104385376ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5492 prompt_cache_len:5151 prompt_cache_ratio:0.9379096868171887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:124.15003776550293ms total_cost_time:124.1767406463623ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5449 prompt_cache_len:5145 prompt_cache_ratio:0.9442099467792255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 +DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:40 [batch.py:51] router release req id 8 +INFO 06-24 20:03:40 [batch.py:51] router release req id 400 +INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.04370570182800293 s +INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.03839445114135742 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.04548001289367676 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.04150676727294922 s +DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=192771443649093074155482288364382980342, time:1750766620.5299993s req_ids:[8, 400] +DEBUG 06-24 20:03:40 [manager.py:391] +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:112.8549575805664ms total_cost_time:112.90121078491211ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5493 prompt_cache_len:5151 prompt_cache_ratio:0.9377389404696886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:118.52025985717773ms total_cost_time:118.55673789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5450 prompt_cache_len:5145 prompt_cache_ratio:0.9440366972477064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 +DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:40 [batch.py:51] router release req id 8 +INFO 06-24 20:03:40 [batch.py:51] router release req id 400 +INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.05673837661743164 s +INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.046599388122558594 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.05822181701660156 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.0494847297668457 s +DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=17387028427260555587337005555667821935, time:1750766620.6615283s req_ids:[8, 400] +DEBUG 06-24 20:03:40 [manager.py:391] +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:134.05156135559082ms total_cost_time:134.0937614440918ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5494 prompt_cache_len:5151 prompt_cache_ratio:0.9375682562795777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:124.00150299072266ms total_cost_time:124.02820587158203ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5451 prompt_cache_len:5145 prompt_cache_ratio:0.9438635112823335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 +DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:40 [batch.py:51] router release req id 8 +INFO 06-24 20:03:40 [batch.py:51] router release req id 400 +INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.04505300521850586 s +INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.03983449935913086 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.04656481742858887 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.04274725914001465 s +DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=279914340483530208022775552399288758313, time:1750766620.7912085s req_ids:[8, 400] +DEBUG 06-24 20:03:40 [manager.py:391] +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:110.8083724975586ms total_cost_time:110.85247993469238ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5495 prompt_cache_len:5151 prompt_cache_ratio:0.9373976342129209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:116.74213409423828ms total_cost_time:116.78004264831543ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5452 prompt_cache_len:5145 prompt_cache_ratio:0.9436903888481292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 +DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:40 [batch.py:51] router release req id 8 +INFO 06-24 20:03:40 [batch.py:51] router release req id 400 +INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.05859804153442383 s +INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.048033952713012695 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.06009221076965332 s +INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.05092620849609375 s +DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=317940517780281782685719143350097904372, time:1750766620.9211283s req_ids:[8, 400] +DEBUG 06-24 20:03:40 [manager.py:391] +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:135.80894470214844ms total_cost_time:135.85162162780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5496 prompt_cache_len:5151 prompt_cache_ratio:0.9372270742358079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:125.33068656921387ms total_cost_time:125.35691261291504ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5453 prompt_cache_len:5145 prompt_cache_ratio:0.9435173299101413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 +INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 +DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:41 [batch.py:51] router release req id 8 +INFO 06-24 20:03:41 [batch.py:51] router release req id 400 +INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.20488452911376953 s +INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.19974970817565918 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.20643329620361328 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.2028522491455078 s +DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=222248072915643865879193887607851236332, time:1750766621.2138107s req_ids:[8, 400] +DEBUG 06-24 20:03:41 [manager.py:391] +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:284.5888137817383ms total_cost_time:284.6338748931885ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5497 prompt_cache_len:5151 prompt_cache_ratio:0.9370565763143532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:279.52051162719727ms total_cost_time:279.54769134521484ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5454 prompt_cache_len:5145 prompt_cache_ratio:0.9433443344334433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 +DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:41 [batch.py:51] router release req id 8 +INFO 06-24 20:03:41 [batch.py:51] router release req id 400 +INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.04375576972961426 s +INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.03897881507873535 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.04532217979431152 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.042089223861694336 s +DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=319518911672266648223702621450027877125, time:1750766621.345943s req_ids:[8, 400] +DEBUG 06-24 20:03:41 [manager.py:391] +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:115.67831039428711ms total_cost_time:115.72122573852539ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5498 prompt_cache_len:5151 prompt_cache_ratio:0.9368861404146962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:121.96588516235352ms total_cost_time:122.00260162353516ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5455 prompt_cache_len:5145 prompt_cache_ratio:0.9431714023831348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 +DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:41 [batch.py:51] router release req id 8 +INFO 06-24 20:03:41 [batch.py:51] router release req id 400 +INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.056416988372802734 s +INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.04623913764953613 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.05796480178833008 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.049021005630493164 s +DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=278679238097332180285458105917212097973, time:1750766621.4798312s req_ids:[8, 400] +DEBUG 06-24 20:03:41 [manager.py:391] +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:135.40983200073242ms total_cost_time:135.4525089263916ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5499 prompt_cache_len:5151 prompt_cache_ratio:0.9367157665030006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:125.19407272338867ms total_cost_time:125.21886825561523ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5456 prompt_cache_len:5145 prompt_cache_ratio:0.9429985337243402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 +DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:41 [batch.py:51] router release req id 8 +INFO 06-24 20:03:41 [batch.py:51] router release req id 400 +INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.04453921318054199 s +INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.039920806884765625 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.046036720275878906 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.04279613494873047 s +DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=56445385961245697677117280664771420070, time:1750766621.610605s req_ids:[8, 400] +DEBUG 06-24 20:03:41 [manager.py:391] +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:107.49530792236328ms total_cost_time:107.54084587097168ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5500 prompt_cache_len:5151 prompt_cache_ratio:0.9365454545454546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:118.26539039611816ms total_cost_time:118.3018684387207ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5457 prompt_cache_len:5145 prompt_cache_ratio:0.94282572842221 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 +DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:41 [batch.py:51] router release req id 8 +INFO 06-24 20:03:41 [batch.py:51] router release req id 400 +INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.06289219856262207 s +INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.04823446273803711 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.06427669525146484 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.05086827278137207 s +DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=325098990388853035416101800897035623329, time:1750766621.7430327s req_ids:[8, 400] +DEBUG 06-24 20:03:41 [manager.py:391] +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:143.16177368164062ms total_cost_time:143.2046890258789ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5501 prompt_cache_len:5151 prompt_cache_ratio:0.9363752045082713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:128.4658908843994ms total_cost_time:128.4935474395752ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5458 prompt_cache_len:5145 prompt_cache_ratio:0.9426529864419201 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 +DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:41 [batch.py:51] router release req id 8 +INFO 06-24 20:03:41 [batch.py:51] router release req id 400 +INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.04438018798828125 s +INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.03922462463378906 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.04580497741699219 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.04224562644958496 s +DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=14635539580292310731747326689292859442, time:1750766621.8758655s req_ids:[8, 400] +DEBUG 06-24 20:03:41 [manager.py:391] +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:97.8844165802002ms total_cost_time:97.92876243591309ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5502 prompt_cache_len:5151 prompt_cache_ratio:0.9362050163576882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 +ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:117.2182559967041ms total_cost_time:117.25473403930664ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5459 prompt_cache_len:5145 prompt_cache_ratio:0.9424803077486719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 +DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:41 [batch.py:51] router release req id 8 +INFO 06-24 20:03:41 [batch.py:51] router release req id 400 +INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.0717918872833252 s +INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.04829549789428711 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.07334518432617188 s +INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.051096200942993164 s +DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=1511357465828111383588353860684841422, time:1750766622.006007s req_ids:[8, 400] +DEBUG 06-24 20:03:42 [manager.py:391] +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:146.93307876586914ms total_cost_time:146.97694778442383ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5503 prompt_cache_len:5151 prompt_cache_ratio:0.9360348900599673 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:125.76103210449219ms total_cost_time:125.79607963562012ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5460 prompt_cache_len:5145 prompt_cache_ratio:0.9423076923076923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 +DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:42 [batch.py:51] router release req id 8 +INFO 06-24 20:03:42 [batch.py:51] router release req id 400 +INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.21104717254638672 s +INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.20490598678588867 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.21272015571594238 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.20803308486938477 s +DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=90783295793158214268778335865938606617, time:1750766622.298684s req_ids:[8, 400] +DEBUG 06-24 20:03:42 [manager.py:391] +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:290.88377952575684ms total_cost_time:290.9262180328369ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5504 prompt_cache_len:5151 prompt_cache_ratio:0.9358648255813954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:284.6791744232178ms total_cost_time:284.70468521118164ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5461 prompt_cache_len:5145 prompt_cache_ratio:0.9421351400842336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 +DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:42 [batch.py:51] router release req id 8 +INFO 06-24 20:03:42 [batch.py:51] router release req id 400 +INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.04487919807434082 s +INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.03967595100402832 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.04643511772155762 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.04262709617614746 s +DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=187981577892784661875476715935144497840, time:1750766622.4318779s req_ids:[8, 400] +DEBUG 06-24 20:03:42 [manager.py:391] +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:102.94938087463379ms total_cost_time:102.99420356750488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5505 prompt_cache_len:5151 prompt_cache_ratio:0.9356948228882834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:117.93255805969238ms total_cost_time:117.96808242797852ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:5462 prompt_cache_len:5145 prompt_cache_ratio:0.9419626510435738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 +DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:42 [batch.py:51] router release req id 8 +INFO 06-24 20:03:42 [batch.py:51] router release req id 400 +INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.06805729866027832 s +INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.04794931411743164 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.06943297386169434 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.05077719688415527 s +DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=67192859898432663458793980400878882239, time:1750766622.5619147s req_ids:[8, 400] +DEBUG 06-24 20:03:42 [manager.py:391] +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:147.1996307373047ms total_cost_time:147.24349975585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5506 prompt_cache_len:5151 prompt_cache_ratio:0.9355248819469669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:127.28071212768555ms total_cost_time:127.30717658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5463 prompt_cache_len:5145 prompt_cache_ratio:0.941790225151016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 +DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:42 [batch.py:51] router release req id 8 +INFO 06-24 20:03:42 [batch.py:51] router release req id 400 +INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.0445404052734375 s +INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.03985881805419922 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.04602456092834473 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.04290008544921875 s +DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=119249942676460819860930712482683750717, time:1750766622.694668s req_ids:[8, 400] +DEBUG 06-24 20:03:42 [manager.py:391] +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:123.16679954528809ms total_cost_time:123.21114540100098ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5507 prompt_cache_len:5151 prompt_cache_ratio:0.9353550027238061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:118.57032775878906ms total_cost_time:118.59679222106934ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5464 prompt_cache_len:5145 prompt_cache_ratio:0.9416178623718887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 +DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:42 [batch.py:51] router release req id 8 +INFO 06-24 20:03:42 [batch.py:51] router release req id 400 +INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.04457402229309082 s +INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.03928971290588379 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.04608941078186035 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.04224729537963867 s +DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=86997408004472544127746862513653604578, time:1750766622.8263876s req_ids:[8, 400] +DEBUG 06-24 20:03:42 [manager.py:391] +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:116.52898788452148ms total_cost_time:116.57238006591797ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5508 prompt_cache_len:5151 prompt_cache_ratio:0.9351851851851852 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 +INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:122.48420715332031ms total_cost_time:122.52163887023926ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5465 prompt_cache_len:5145 prompt_cache_ratio:0.9414455626715462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 +DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:42 [batch.py:51] router release req id 8 +INFO 06-24 20:03:42 [batch.py:51] router release req id 400 +INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.0525965690612793 s +INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.04213285446166992 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.05419492721557617 s +INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.0451512336730957 s +DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=231356186703579210864944373469635620353, time:1750766622.9579062s req_ids:[8, 400] +DEBUG 06-24 20:03:42 [manager.py:391] +ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:132.45058059692383ms total_cost_time:132.4927806854248ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5509 prompt_cache_len:5151 prompt_cache_ratio:0.9350154292975131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:121.94538116455078ms total_cost_time:121.97256088256836ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5466 prompt_cache_len:5145 prompt_cache_ratio:0.9412733260153677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 +DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:43 [batch.py:51] router release req id 8 +INFO 06-24 20:03:43 [batch.py:51] router release req id 400 +INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.04428219795227051 s +INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.03845548629760742 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.04574108123779297 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.04154348373413086 s +DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=186837258394108882879939874262131534433, time:1750766623.0881293s req_ids:[8, 400] +DEBUG 06-24 20:03:43 [manager.py:391] +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:111.21487617492676ms total_cost_time:111.26089096069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5510 prompt_cache_len:5151 prompt_cache_ratio:0.9348457350272232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:117.1731948852539ms total_cost_time:117.21158027648926ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5467 prompt_cache_len:5145 prompt_cache_ratio:0.941101152368758 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 +DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:43 [batch.py:51] router release req id 8 +INFO 06-24 20:03:43 [batch.py:51] router release req id 400 +INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.21978068351745605 s +INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.20865702629089355 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.22155237197875977 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.21177148818969727 s +DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=17604161524869871110427595730865289720, time:1750766623.3826926s req_ids:[8, 400] +DEBUG 06-24 20:03:43 [manager.py:391] +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:297.1522808074951ms total_cost_time:297.1968650817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5511 prompt_cache_len:5151 prompt_cache_ratio:0.934676102340773 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:290.67516326904297ms total_cost_time:290.73309898376465ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5468 prompt_cache_len:5145 prompt_cache_ratio:0.9409290416971471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 +DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:43 [batch.py:51] router release req id 8 +INFO 06-24 20:03:43 [batch.py:51] router release req id 400 +INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.05164051055908203 s +INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.04220080375671387 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.05293536186218262 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.04477095603942871 s +DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=312604985944213681506895109499589382257, time:1750766623.516448s req_ids:[8, 400] +DEBUG 06-24 20:03:43 [manager.py:391] +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:132.46941566467285ms total_cost_time:132.51352310180664ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5512 prompt_cache_len:5151 prompt_cache_ratio:0.9345065312046444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:122.98202514648438ms total_cost_time:123.00777435302734ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5469 prompt_cache_len:5145 prompt_cache_ratio:0.9407569939659901 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 +DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:43 [batch.py:51] router release req id 8 +INFO 06-24 20:03:43 [batch.py:51] router release req id 400 +INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.04400753974914551 s +INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.0388491153717041 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.04533267021179199 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.041558027267456055 s +DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=315082085694117032309544724066217879520, time:1750766623.6505597s req_ids:[8, 400] +DEBUG 06-24 20:03:43 [manager.py:391] +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:111.83452606201172ms total_cost_time:111.87887191772461ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5513 prompt_cache_len:5151 prompt_cache_ratio:0.9343370215853437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:119.6587085723877ms total_cost_time:119.69518661499023ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5470 prompt_cache_len:5145 prompt_cache_ratio:0.9405850091407678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 +DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:43 [batch.py:51] router release req id 8 +INFO 06-24 20:03:43 [batch.py:51] router release req id 400 +INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.06146049499511719 s +INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.049262285232543945 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.06301474571228027 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.05216240882873535 s +DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=78055535892047011246499798760042306458, time:1750766623.785757s req_ids:[8, 400] +DEBUG 06-24 20:03:43 [manager.py:391] +DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:144.48142051696777ms total_cost_time:144.52433586120605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5514 prompt_cache_len:5151 prompt_cache_ratio:0.9341675734494015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:132.27033615112305ms total_cost_time:132.29703903198242ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5471 prompt_cache_len:5145 prompt_cache_ratio:0.9404130871869859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 +DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:43 [batch.py:51] router release req id 8 +INFO 06-24 20:03:43 [batch.py:51] router release req id 400 +INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.049451589584350586 s +INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.043874263763427734 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.05081796646118164 s +INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.04680299758911133 s +DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=322251915806443079126708697199415980913, time:1750766623.9265716s req_ids:[8, 400] +DEBUG 06-24 20:03:43 [manager.py:391] +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:131.2546730041504ms total_cost_time:131.3004493713379ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5515 prompt_cache_len:5151 prompt_cache_ratio:0.9339981867633727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:125.88262557983398ms total_cost_time:125.90956687927246ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5472 prompt_cache_len:5145 prompt_cache_ratio:0.9402412280701754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 +DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:44 [batch.py:51] router release req id 8 +INFO 06-24 20:03:44 [batch.py:51] router release req id 400 +INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.04454922676086426 s +INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.03897547721862793 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.045972347259521484 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04199409484863281 s +DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=299495826279835281760745670739021474310, time:1750766624.0616508s req_ids:[8, 400] +DEBUG 06-24 20:03:44 [manager.py:391] +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:113.20281028747559ms total_cost_time:113.24453353881836ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5516 prompt_cache_len:5151 prompt_cache_ratio:0.9338288614938361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:119.94528770446777ms total_cost_time:119.98271942138672ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5473 prompt_cache_len:5145 prompt_cache_ratio:0.9400694317558925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 +DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:44 [batch.py:51] router release req id 8 +INFO 06-24 20:03:44 [batch.py:51] router release req id 400 +INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.0609588623046875 s +INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04912614822387695 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.06236624717712402 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.05178523063659668 s +DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=116432272721049931657962064873459148633, time:1750766624.1966136s req_ids:[8, 400] +DEBUG 06-24 20:03:44 [manager.py:391] +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:149.01041984558105ms total_cost_time:149.05261993408203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5517 prompt_cache_len:5151 prompt_cache_ratio:0.9336595976073954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:03:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 63017.683 tokens/s +DEBUG 06-24 20:03:44 [stats.py:37] Avg prompt tokens throughput: 62994.439 tokens/s +DEBUG 06-24 20:03:44 [stats.py:37] Avg generate tokens throughput: 23.244 tokens/s +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:288.29431533813477ms total_cost_time:288.33794593811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5474 prompt_cache_len:5145 prompt_cache_ratio:0.9398976982097187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 +DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:44 [batch.py:51] router release req id 8 +INFO 06-24 20:03:44 [batch.py:51] router release req id 400 +INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.1996145248413086 s +INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04862332344055176 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.20099687576293945 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.05128026008605957 s +DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=304880668639142176118440230530898120204, time:1750766624.4889433s req_ids:[8, 400] +DEBUG 06-24 20:03:44 [manager.py:391] +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:297.84703254699707ms total_cost_time:297.89066314697266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5518 prompt_cache_len:5151 prompt_cache_ratio:0.9334903950706778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:180.69219589233398ms total_cost_time:180.73153495788574ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5475 prompt_cache_len:5145 prompt_cache_ratio:0.9397260273972603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 +DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:44 [batch.py:51] router release req id 8 +INFO 06-24 20:03:44 [batch.py:51] router release req id 400 +INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.045824527740478516 s +INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04057025909423828 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.04743218421936035 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04332256317138672 s +DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=83522112589896216874883248937495033633, time:1750766624.6760492s req_ids:[8, 400] +DEBUG 06-24 20:03:44 [manager.py:391] +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:126.85799598693848ms total_cost_time:126.90234184265137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5519 prompt_cache_len:5151 prompt_cache_ratio:0.9333212538503352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:121.47402763366699ms total_cost_time:121.50073051452637ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5476 prompt_cache_len:5145 prompt_cache_ratio:0.9395544192841491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 +DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:44 [batch.py:51] router release req id 8 +INFO 06-24 20:03:44 [batch.py:51] router release req id 400 +INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.043167829513549805 s +INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.037203073501586914 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.04454374313354492 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04012250900268555 s +DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=197995134768779456357079816216559976218, time:1750766624.8096952s req_ids:[8, 400] +DEBUG 06-24 20:03:44 [manager.py:391] +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:114.8991584777832ms total_cost_time:114.9437427520752ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5520 prompt_cache_len:5151 prompt_cache_ratio:0.9331521739130435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:120.85151672363281ms total_cost_time:120.88847160339355ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5477 prompt_cache_len:5145 prompt_cache_ratio:0.9393828738360416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 +DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:44 [batch.py:51] router release req id 8 +INFO 06-24 20:03:44 [batch.py:51] router release req id 400 +INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.05646634101867676 s +INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04578828811645508 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.057877540588378906 s +INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04868459701538086 s +DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=331227107796104284259476061553842188105, time:1750766624.9439874s req_ids:[8, 400] +DEBUG 06-24 20:03:44 [manager.py:391] +ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:138.11302185058594ms total_cost_time:138.15736770629883ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5521 prompt_cache_len:5151 prompt_cache_ratio:0.9329831552255027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:127.67314910888672ms total_cost_time:127.7010440826416ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5478 prompt_cache_len:5145 prompt_cache_ratio:0.93921139101862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 +DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:45 [batch.py:51] router release req id 8 +INFO 06-24 20:03:45 [batch.py:51] router release req id 400 +INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.0426783561706543 s +INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.0374298095703125 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.04419422149658203 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.04030585289001465 s +DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=247484327087147223817135377377494183167, time:1750766625.0761447s req_ids:[8, 400] +DEBUG 06-24 20:03:45 [manager.py:391] +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:102.44369506835938ms total_cost_time:102.48780250549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5522 prompt_cache_len:5151 prompt_cache_ratio:0.9328141977544367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:115.73505401611328ms total_cost_time:115.77391624450684ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5479 prompt_cache_len:5145 prompt_cache_ratio:0.9390399707975908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 +DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:45 [batch.py:51] router release req id 8 +INFO 06-24 20:03:45 [batch.py:51] router release req id 400 +INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.22487568855285645 s +INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.2073667049407959 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.22641658782958984 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.21023082733154297 s +DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=153907764573784560787405508689444493967, time:1750766625.3675983s req_ids:[8, 400] +DEBUG 06-24 20:03:45 [manager.py:391] +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:297.96576499938965ms total_cost_time:298.01130294799805ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5523 prompt_cache_len:5151 prompt_cache_ratio:0.9326453014665942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:292.02842712402344ms total_cost_time:292.0670509338379ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5480 prompt_cache_len:5145 prompt_cache_ratio:0.9388686131386861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 +DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:45 [batch.py:51] router release req id 8 +INFO 06-24 20:03:45 [batch.py:51] router release req id 400 +INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.0554201602935791 s +INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.0442962646484375 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.05676436424255371 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.04711747169494629 s +DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=187434277561789540605526981935759976782, time:1750766625.501658s req_ids:[8, 400] +DEBUG 06-24 20:03:45 [manager.py:391] +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:137.85171508789062ms total_cost_time:137.8943920135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5524 prompt_cache_len:5151 prompt_cache_ratio:0.9324764663287473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:126.92117691040039ms total_cost_time:126.94644927978516ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5481 prompt_cache_len:5145 prompt_cache_ratio:0.9386973180076629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 +DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:45 [batch.py:51] router release req id 8 +INFO 06-24 20:03:45 [batch.py:51] router release req id 400 +INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.04479622840881348 s +INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.039304494857788086 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.04622650146484375 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.0420534610748291 s +DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=264308891410722713848022818762895973215, time:1750766625.6364613s req_ids:[8, 400] +DEBUG 06-24 20:03:45 [manager.py:391] +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:107.63239860534668ms total_cost_time:107.67745971679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5525 prompt_cache_len:5151 prompt_cache_ratio:0.9323076923076923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:119.30274963378906ms total_cost_time:119.34113502502441ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5482 prompt_cache_len:5145 prompt_cache_ratio:0.9385260853703028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 +DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:45 [batch.py:51] router release req id 8 +INFO 06-24 20:03:45 [batch.py:51] router release req id 400 +INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.06586790084838867 s +INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.04921102523803711 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.06728625297546387 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.05193519592285156 s +DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=18691014356808507469132335014769692393, time:1750766625.771753s req_ids:[8, 400] +DEBUG 06-24 20:03:45 [manager.py:391] +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:148.7126350402832ms total_cost_time:148.7562656402588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5526 prompt_cache_len:5151 prompt_cache_ratio:0.9321389793702497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:132.02452659606934ms total_cost_time:132.05265998840332ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:5483 prompt_cache_len:5145 prompt_cache_ratio:0.9383549151924129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 +DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:45 [batch.py:51] router release req id 8 +INFO 06-24 20:03:45 [batch.py:51] router release req id 400 +INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.04455828666687012 s +INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.03950905799865723 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.04588031768798828 s +INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.04236721992492676 s +DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=12676110873694008020192149709738807323, time:1750766625.9051952s req_ids:[8, 400] +DEBUG 06-24 20:03:45 [manager.py:391] +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:125.11587142944336ms total_cost_time:125.15950202941895ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5527 prompt_cache_len:5151 prompt_cache_ratio:0.931970327483264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:120.25666236877441ms total_cost_time:120.28217315673828ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5484 prompt_cache_len:5145 prompt_cache_ratio:0.938183807439825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 +INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 +DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:46 [batch.py:51] router release req id 8 +INFO 06-24 20:03:46 [batch.py:51] router release req id 400 +INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.04464387893676758 s +INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.039578914642333984 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.046161651611328125 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.042708396911621094 s +DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=201556259165941510903643945325035808616, time:1750766626.0400708s req_ids:[8, 400] +DEBUG 06-24 20:03:46 [manager.py:391] +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:115.11421203613281ms total_cost_time:115.1571273803711ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5528 prompt_cache_len:5151 prompt_cache_ratio:0.9318017366136034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:121.15693092346191ms total_cost_time:121.19293212890625ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5485 prompt_cache_len:5145 prompt_cache_ratio:0.9380127620783957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 +DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:46 [batch.py:51] router release req id 8 +INFO 06-24 20:03:46 [batch.py:51] router release req id 400 +INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.05908489227294922 s +INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.04814624786376953 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.060518741607666016 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.05104660987854004 s +DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=253770962005398030300629816593420802149, time:1750766626.1750672s req_ids:[8, 400] +DEBUG 06-24 20:03:46 [manager.py:391] +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:140.1996612548828ms total_cost_time:140.2437686920166ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5529 prompt_cache_len:5151 prompt_cache_ratio:0.9316332067281606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:129.44293022155762ms total_cost_time:129.4689178466797ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5486 prompt_cache_len:5145 prompt_cache_ratio:0.9378417790740066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 +INFO 06-24 20:03:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:46 [batch.py:51] router release req id 8 +INFO 06-24 20:03:46 [batch.py:51] router release req id 400 +INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.20795464515686035 s +INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.20274591445922852 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.20946216583251953 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.2056889533996582 s +DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=156680837650655014671997852344688153877, time:1750766626.4721198s req_ids:[8, 400] +DEBUG 06-24 20:03:46 [manager.py:391] +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:288.5935306549072ms total_cost_time:288.6366844177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5530 prompt_cache_len:5151 prompt_cache_ratio:0.9314647377938517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:283.2956314086914ms total_cost_time:283.3213806152344ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5487 prompt_cache_len:5145 prompt_cache_ratio:0.9376708583925643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 +DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:46 [batch.py:51] router release req id 8 +INFO 06-24 20:03:46 [batch.py:51] router release req id 400 +INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.04379010200500488 s +INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.03882551193237305 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.04519939422607422 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.04165530204772949 s +DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=133392077922077514522657980236303511853, time:1750766626.6053221s req_ids:[8, 400] +DEBUG 06-24 20:03:46 [manager.py:391] +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:107.7430248260498ms total_cost_time:107.7883243560791ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5531 prompt_cache_len:5151 prompt_cache_ratio:0.9312963297776171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:117.47455596923828ms total_cost_time:117.51222610473633ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5488 prompt_cache_len:5145 prompt_cache_ratio:0.9375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 +DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:46 [batch.py:51] router release req id 8 +INFO 06-24 20:03:46 [batch.py:51] router release req id 400 +INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.06258964538574219 s +INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.048950910568237305 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.06399869918823242 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.05165672302246094 s +DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=101931795556715982969217131548027683321, time:1750766626.7395117s req_ids:[8, 400] +DEBUG 06-24 20:03:46 [manager.py:391] +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:144.5009708404541ms total_cost_time:144.5446014404297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5532 prompt_cache_len:5151 prompt_cache_ratio:0.9311279826464208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:130.81812858581543ms total_cost_time:130.8448314666748ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5489 prompt_cache_len:5145 prompt_cache_ratio:0.93732920386227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 +DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:46 [batch.py:51] router release req id 8 +INFO 06-24 20:03:46 [batch.py:51] router release req id 400 +INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.04419112205505371 s +INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.03883171081542969 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.04554915428161621 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.041629791259765625 s +DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=314138048499919673355402826676390677180, time:1750766626.8723164s req_ids:[8, 400] +DEBUG 06-24 20:03:46 [manager.py:391] +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:98.74296188354492ms total_cost_time:98.785400390625ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5533 prompt_cache_len:5151 prompt_cache_ratio:0.9309596963672511 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 +ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:118.4380054473877ms total_cost_time:118.47472190856934ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5490 prompt_cache_len:5145 prompt_cache_ratio:0.9371584699453552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 +DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:46 [batch.py:51] router release req id 8 +INFO 06-24 20:03:46 [batch.py:51] router release req id 400 +INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.07383012771606445 s +INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.04933571815490723 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.07524251937866211 s +INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.05212998390197754 s +DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=310806778698467022174868719174041794873, time:1750766627.0079288s req_ids:[8, 400] +DEBUG 06-24 20:03:47 [manager.py:391] +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:147.68552780151367ms total_cost_time:147.72582054138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5534 prompt_cache_len:5151 prompt_cache_ratio:0.9307914709071197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:133.26168060302734ms total_cost_time:133.2986354827881ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5491 prompt_cache_len:5145 prompt_cache_ratio:0.9369877982152613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 +DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:47 [batch.py:51] router release req id 8 +INFO 06-24 20:03:47 [batch.py:51] router release req id 400 +INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.05562281608581543 s +INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.04543948173522949 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.057027578353881836 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.04819226264953613 s +DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=334942273163944649720874485700909627920, time:1750766627.141425s req_ids:[8, 400] +DEBUG 06-24 20:03:47 [manager.py:391] +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:136.88945770263672ms total_cost_time:136.9326114654541ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5535 prompt_cache_len:5151 prompt_cache_ratio:0.9306233062330623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:126.69157981872559ms total_cost_time:126.71780586242676ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5492 prompt_cache_len:5145 prompt_cache_ratio:0.9368171886380189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 +DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:47 [batch.py:51] router release req id 8 +INFO 06-24 20:03:47 [batch.py:51] router release req id 400 +INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.04271697998046875 s +INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.03731036186218262 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.04421520233154297 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.04021120071411133 s +DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=53401666113286143618847854401605892820, time:1750766627.2767801s req_ids:[8, 400] +DEBUG 06-24 20:03:47 [manager.py:391] +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:104.56609725952148ms total_cost_time:104.61068153381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5536 prompt_cache_len:5151 prompt_cache_ratio:0.9304552023121387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:118.47949028015137ms total_cost_time:118.51763725280762ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5493 prompt_cache_len:5145 prompt_cache_ratio:0.9366466411796832 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 +DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:47 [batch.py:51] router release req id 8 +INFO 06-24 20:03:47 [batch.py:51] router release req id 400 +INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.22628259658813477 s +INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.20778775215148926 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.22774934768676758 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.21056890487670898 s +DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=286394626559695203673277923630421906574, time:1750766627.5705795s req_ids:[8, 400] +DEBUG 06-24 20:03:47 [manager.py:391] +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:299.0703582763672ms total_cost_time:299.11208152770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5537 prompt_cache_len:5151 prompt_cache_ratio:0.9302871591114322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:291.6295528411865ms total_cost_time:291.6688919067383ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5494 prompt_cache_len:5145 prompt_cache_ratio:0.9364761558063341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 +DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:47 [batch.py:51] router release req id 8 +INFO 06-24 20:03:47 [batch.py:51] router release req id 400 +INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.05706977844238281 s +INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.04649829864501953 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.05867171287536621 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.049718379974365234 s +DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=320127671036857911014605057862772085025, time:1750766627.7046661s req_ids:[8, 400] +DEBUG 06-24 20:03:47 [manager.py:391] +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:138.3826732635498ms total_cost_time:138.4265422821045ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5538 prompt_cache_len:5151 prompt_cache_ratio:0.9301191765980499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:127.88009643554688ms total_cost_time:127.90536880493164ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5495 prompt_cache_len:5145 prompt_cache_ratio:0.9363057324840764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 +DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:47 [batch.py:51] router release req id 8 +INFO 06-24 20:03:47 [batch.py:51] router release req id 400 +INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.044013023376464844 s +INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.03803229331970215 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.045525550842285156 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.04123520851135254 s +DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=12086117763728895102413116460746267287, time:1750766627.8372653s req_ids:[8, 400] +DEBUG 06-24 20:03:47 [manager.py:391] +DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:105.90934753417969ms total_cost_time:105.95273971557617ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5539 prompt_cache_len:5151 prompt_cache_ratio:0.9299512547391225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 +ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:118.24178695678711ms total_cost_time:118.27921867370605ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5496 prompt_cache_len:5145 prompt_cache_ratio:0.9361353711790393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 +DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:47 [batch.py:51] router release req id 8 +INFO 06-24 20:03:47 [batch.py:51] router release req id 400 +INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.06685781478881836 s +INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.04898667335510254 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.06833100318908691 s +INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.051708221435546875 s +DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=120041866166639342335179457361542587693, time:1750766627.9714532s req_ids:[8, 400] +DEBUG 06-24 20:03:47 [manager.py:391] +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:147.21965789794922ms total_cost_time:147.2647190093994ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5540 prompt_cache_len:5151 prompt_cache_ratio:0.9297833935018051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:129.29749488830566ms total_cost_time:129.32467460632324ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5497 prompt_cache_len:5145 prompt_cache_ratio:0.9359650718573768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 +DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:48 [batch.py:51] router release req id 8 +INFO 06-24 20:03:48 [batch.py:51] router release req id 400 +INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.04334235191345215 s +INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.03780770301818848 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.04509329795837402 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.04091644287109375 s +DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=221936066483979035956775190440768114010, time:1750766628.1051788s req_ids:[8, 400] +DEBUG 06-24 20:03:48 [manager.py:391] +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:95.16119956970215ms total_cost_time:95.20530700683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5541 prompt_cache_len:5151 prompt_cache_ratio:0.9296155928532756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:115.97657203674316ms total_cost_time:116.01519584655762ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5498 prompt_cache_len:5145 prompt_cache_ratio:0.9357948344852673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 +DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:48 [batch.py:51] router release req id 8 +INFO 06-24 20:03:48 [batch.py:51] router release req id 400 +INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.07483887672424316 s +INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.048903703689575195 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.07623744010925293 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.051641225814819336 s +DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=152322144279589248641743862674881001419, time:1750766628.2377915s req_ids:[8, 400] +DEBUG 06-24 20:03:48 [manager.py:391] +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:148.7255096435547ms total_cost_time:148.76723289489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5542 prompt_cache_len:5151 prompt_cache_ratio:0.9294478527607362 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:134.01174545288086ms total_cost_time:134.0482234954834ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5499 prompt_cache_len:5145 prompt_cache_ratio:0.9356246590289143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 +DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:48 [batch.py:51] router release req id 8 +INFO 06-24 20:03:48 [batch.py:51] router release req id 400 +INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.05364990234375 s +INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.042586326599121094 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.055147409439086914 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.04562711715698242 s +DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=215269824782322251363880896491424866789, time:1750766628.3708467s req_ids:[8, 400] +DEBUG 06-24 20:03:48 [manager.py:391] +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:147.68671989440918ms total_cost_time:147.72748947143555ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5543 prompt_cache_len:5151 prompt_cache_ratio:0.9292801731914125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:283.672571182251ms total_cost_time:283.71644020080566ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5500 prompt_cache_len:5145 prompt_cache_ratio:0.9354545454545454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 +DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:48 [batch.py:51] router release req id 8 +INFO 06-24 20:03:48 [batch.py:51] router release req id 400 +INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.1965477466583252 s +INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.04872417449951172 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.19802474975585938 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.05181384086608887 s +DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=318372952371553185847151775354030162498, time:1750766628.6667292s req_ids:[8, 400] +DEBUG 06-24 20:03:48 [manager.py:391] +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:279.89864349365234ms total_cost_time:279.94251251220703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5544 prompt_cache_len:5151 prompt_cache_ratio:0.9291125541125541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:132.14111328125ms total_cost_time:132.16710090637207ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5501 prompt_cache_len:5145 prompt_cache_ratio:0.935284493728413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 +DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:48 [batch.py:51] router release req id 8 +INFO 06-24 20:03:48 [batch.py:51] router release req id 400 +INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.04356694221496582 s +INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.03825020790100098 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.04512166976928711 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.04124569892883301 s +DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=296582109010489214940315681468494418463, time:1750766628.802788s req_ids:[8, 400] +DEBUG 06-24 20:03:48 [manager.py:391] +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:113.4040355682373ms total_cost_time:113.46554756164551ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:5545 prompt_cache_len:5151 prompt_cache_ratio:0.9289449954914337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:124.0847110748291ms total_cost_time:124.12452697753906ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5502 prompt_cache_len:5145 prompt_cache_ratio:0.9351145038167938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 +DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:48 [batch.py:51] router release req id 8 +INFO 06-24 20:03:48 [batch.py:51] router release req id 400 +INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.06295490264892578 s +INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.04826617240905762 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.0644371509552002 s +INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.05129122734069824 s +DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=79803719322006594055946657073260988836, time:1750766628.9417806s req_ids:[8, 400] +DEBUG 06-24 20:03:48 [manager.py:391] +ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:144.4876194000244ms total_cost_time:144.54078674316406ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:5546 prompt_cache_len:5151 prompt_cache_ratio:0.928777497295348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:130.08546829223633ms total_cost_time:130.12409210205078ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5503 prompt_cache_len:5145 prompt_cache_ratio:0.9349445756859894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 +DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:49 [batch.py:51] router release req id 8 +INFO 06-24 20:03:49 [batch.py:51] router release req id 400 +INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.04366755485534668 s +INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.036544084548950195 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.04512953758239746 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.03946065902709961 s +DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=230287926030944861198511329496486400930, time:1750766629.0752885s req_ids:[8, 400] +DEBUG 06-24 20:03:49 [manager.py:391] +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:124.55892562866211ms total_cost_time:124.59397315979004ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5547 prompt_cache_len:5151 prompt_cache_ratio:0.9286100594916171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:117.35129356384277ms total_cost_time:117.37418174743652ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:5504 prompt_cache_len:5145 prompt_cache_ratio:0.9347747093023255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 +DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:49 [batch.py:51] router release req id 8 +INFO 06-24 20:03:49 [batch.py:51] router release req id 400 +INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.0453035831451416 s +INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.04056715965270996 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.04673361778259277 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.04362845420837402 s +DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=289907555184314936234175598367136515337, time:1750766629.208247s req_ids:[8, 400] +DEBUG 06-24 20:03:49 [manager.py:391] +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:115.77248573303223ms total_cost_time:115.79751968383789ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5548 prompt_cache_len:5151 prompt_cache_ratio:0.9284426820475847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:119.31490898132324ms total_cost_time:119.33517456054688ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:5505 prompt_cache_len:5145 prompt_cache_ratio:0.9346049046321526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 +DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:49 [batch.py:51] router release req id 8 +INFO 06-24 20:03:49 [batch.py:51] router release req id 400 +INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.21399998664855957 s +INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.20673632621765137 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.21562457084655762 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.21011686325073242 s +DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=313928895486517905047856224137802913716, time:1750766629.4994113s req_ids:[8, 400] +DEBUG 06-24 20:03:49 [manager.py:391] +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:295.9709167480469ms total_cost_time:296.01454734802246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5549 prompt_cache_len:5151 prompt_cache_ratio:0.9282753649306181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:288.91539573669434ms total_cost_time:288.9413833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5506 prompt_cache_len:5145 prompt_cache_ratio:0.9344351616418453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 +DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:49 [batch.py:51] router release req id 8 +INFO 06-24 20:03:49 [batch.py:51] router release req id 400 +INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.043306589126586914 s +INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.03699469566345215 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.04485034942626953 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.03997516632080078 s +DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=302165632055316284503673141368883412172, time:1750766629.634829s req_ids:[8, 400] +DEBUG 06-24 20:03:49 [manager.py:391] +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:127.28691101074219ms total_cost_time:127.34794616699219ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5550 prompt_cache_len:5151 prompt_cache_ratio:0.9281081081081081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:121.10614776611328ms total_cost_time:121.1385726928711ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:5507 prompt_cache_len:5145 prompt_cache_ratio:0.9342654802978028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 +DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:49 [batch.py:51] router release req id 8 +INFO 06-24 20:03:49 [batch.py:51] router release req id 400 +INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.04432249069213867 s +INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.03849601745605469 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.045792579650878906 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.04160785675048828 s +DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=179893346050329409575819012616851504298, time:1750766629.7717881s req_ids:[8, 400] +DEBUG 06-24 20:03:49 [manager.py:391] +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:114.7909164428711ms total_cost_time:114.83335494995117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5551 prompt_cache_len:5151 prompt_cache_ratio:0.9279409115474689 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:121.24443054199219ms total_cost_time:121.28448486328125ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5508 prompt_cache_len:5145 prompt_cache_ratio:0.9340958605664488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 +DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:49 [batch.py:51] router release req id 8 +INFO 06-24 20:03:49 [batch.py:51] router release req id 400 +INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.05814552307128906 s +INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.046526193618774414 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.059670448303222656 s +INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.04970812797546387 s +DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=224037300127965857219291631110412050042, time:1750766629.904954s req_ids:[8, 400] +DEBUG 06-24 20:03:49 [manager.py:391] +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:138.37289810180664ms total_cost_time:138.41819763183594ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5552 prompt_cache_len:5151 prompt_cache_ratio:0.9277737752161384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:126.95980072021484ms total_cost_time:126.98554992675781ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5509 prompt_cache_len:5145 prompt_cache_ratio:0.9339263024142312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 +INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 +DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:50 [batch.py:51] router release req id 8 +INFO 06-24 20:03:50 [batch.py:51] router release req id 400 +INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.045163869857788086 s +INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.0397946834564209 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.046715736389160156 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.04283475875854492 s +DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=248576220608251832721359498514563264115, time:1750766630.0397003s req_ids:[8, 400] +DEBUG 06-24 20:03:50 [manager.py:391] +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:104.52818870544434ms total_cost_time:104.56991195678711ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5553 prompt_cache_len:5151 prompt_cache_ratio:0.9276066990815776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:122.15995788574219ms total_cost_time:122.20072746276855ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5510 prompt_cache_len:5145 prompt_cache_ratio:0.9337568058076225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 +DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:50 [batch.py:51] router release req id 8 +INFO 06-24 20:03:50 [batch.py:51] router release req id 400 +INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.07105517387390137 s +INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.04810810089111328 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.07260012626647949 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.05129837989807129 s +DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=287433237452974455422368717376177179954, time:1750766630.1743073s req_ids:[8, 400] +DEBUG 06-24 20:03:50 [manager.py:391] +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:148.9541530609131ms total_cost_time:148.99659156799316ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5554 prompt_cache_len:5151 prompt_cache_ratio:0.9274396831112711 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:127.86078453063965ms total_cost_time:127.90322303771973ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5511 prompt_cache_len:5145 prompt_cache_ratio:0.9335873707131193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 +DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:50 [batch.py:51] router release req id 8 +INFO 06-24 20:03:50 [batch.py:51] router release req id 400 +INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.04954338073730469 s +INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.04231762886047363 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.05103754997253418 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.0451962947845459 s +DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=62352939666092407470671319705844274205, time:1750766630.306252s req_ids:[8, 400] +DEBUG 06-24 20:03:50 [manager.py:391] +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:128.33070755004883ms total_cost_time:128.37553024291992ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5555 prompt_cache_len:5151 prompt_cache_ratio:0.9272727272727272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:121.09827995300293ms total_cost_time:121.12569808959961ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5512 prompt_cache_len:5145 prompt_cache_ratio:0.9334179970972424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 +DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:50 [batch.py:51] router release req id 8 +INFO 06-24 20:03:50 [batch.py:51] router release req id 400 +INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.20601224899291992 s +INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.2006831169128418 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.20753169059753418 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.20346760749816895 s +DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=85347856848935128290599672774571096314, time:1750766630.600038s req_ids:[8, 400] +DEBUG 06-24 20:03:50 [manager.py:391] +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:264.86754417419434ms total_cost_time:264.91284370422363ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5556 prompt_cache_len:5151 prompt_cache_ratio:0.9271058315334774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:281.42714500427246ms total_cost_time:281.4662456512451ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5513 prompt_cache_len:5145 prompt_cache_ratio:0.9332486849265372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 +DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:50 [batch.py:51] router release req id 8 +INFO 06-24 20:03:50 [batch.py:51] router release req id 400 +INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.07044792175292969 s +INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.0492548942565918 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.0717921257019043 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.051905155181884766 s +DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=278795412201298918084725537181851329702, time:1750766630.7359498s req_ids:[8, 400] +DEBUG 06-24 20:03:50 [manager.py:391] +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:146.41332626342773ms total_cost_time:146.45671844482422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5557 prompt_cache_len:5151 prompt_cache_ratio:0.9269389958610761 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:127.98404693603516ms total_cost_time:128.0190944671631ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5514 prompt_cache_len:5145 prompt_cache_ratio:0.9330794341675734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 +DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:50 [batch.py:51] router release req id 8 +INFO 06-24 20:03:50 [batch.py:51] router release req id 400 +INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.05019736289978027 s +INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.04398465156555176 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.051659584045410156 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.04692268371582031 s +DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=217362292710259913346326911155204926233, time:1750766630.8664691s req_ids:[8, 400] +DEBUG 06-24 20:03:50 [manager.py:391] +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:129.0762424468994ms total_cost_time:129.1358470916748ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5558 prompt_cache_len:5151 prompt_cache_ratio:0.9267722202231018 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:123.16155433654785ms total_cost_time:123.20518493652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5515 prompt_cache_len:5145 prompt_cache_ratio:0.9329102447869447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 +INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 +DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:50 [batch.py:51] router release req id 8 +INFO 06-24 20:03:50 [batch.py:51] router release req id 400 +INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.04176926612854004 s +INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.03438711166381836 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.043443918228149414 s +INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.037467241287231445 s +DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=76538349414580394956434546424765338338, time:1750766630.9962099s req_ids:[8, 400] +DEBUG 06-24 20:03:50 [manager.py:391] +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:112.7469539642334ms total_cost_time:112.78319358825684ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5559 prompt_cache_len:5151 prompt_cache_ratio:0.926605504587156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:116.64700508117676ms total_cost_time:116.67704582214355ms,out_token_counter:1 mean_per_token_cost_time: 0.030040740966796875ms prompt_token_num:5516 prompt_cache_len:5145 prompt_cache_ratio:0.932741116751269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 +DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:51 [batch.py:51] router release req id 8 +INFO 06-24 20:03:51 [batch.py:51] router release req id 400 +INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.05428123474121094 s +INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.04339885711669922 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.055805206298828125 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.046477317810058594 s +DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=168779598982240737530705505383963460604, time:1750766631.1274736s req_ids:[8, 400] +DEBUG 06-24 20:03:51 [manager.py:391] +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:134.31024551391602ms total_cost_time:134.3369483947754ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5560 prompt_cache_len:5151 prompt_cache_ratio:0.9264388489208633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:123.39329719543457ms total_cost_time:123.40927124023438ms,out_token_counter:1 mean_per_token_cost_time: 0.015974044799804688ms prompt_token_num:5517 prompt_cache_len:5145 prompt_cache_ratio:0.9325720500271887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 +DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:51 [batch.py:51] router release req id 8 +INFO 06-24 20:03:51 [batch.py:51] router release req id 400 +INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.04616212844848633 s +INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.0428471565246582 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.04780006408691406 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.04586291313171387 s +DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=153338967591333737190287911065236901823, time:1750766631.2608087s req_ids:[8, 400] +DEBUG 06-24 20:03:51 [manager.py:391] +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:112.12873458862305ms total_cost_time:112.17427253723145ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5561 prompt_cache_len:5151 prompt_cache_ratio:0.926272253191872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:121.60754203796387ms total_cost_time:121.64568901062012ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5518 prompt_cache_len:5145 prompt_cache_ratio:0.93240304458137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 +DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:51 [batch.py:51] router release req id 8 +INFO 06-24 20:03:51 [batch.py:51] router release req id 400 +INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.05998349189758301 s +INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.046689748764038086 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.06171107292175293 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.04974627494812012 s +DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=224155254661675059795262565558056929326, time:1750766631.3913925s req_ids:[8, 400] +DEBUG 06-24 20:03:51 [manager.py:391] +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:138.44037055969238ms total_cost_time:138.48328590393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5562 prompt_cache_len:5151 prompt_cache_ratio:0.9261057173678533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:125.13875961303711ms total_cost_time:125.1668930053711ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:5519 prompt_cache_len:5145 prompt_cache_ratio:0.9322341003805037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 +DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:51 [batch.py:51] router release req id 8 +INFO 06-24 20:03:51 [batch.py:51] router release req id 400 +INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.2050621509552002 s +INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.19929218292236328 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.20676565170288086 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.202545166015625 s +DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=189503719940156742628622832890297902010, time:1750766631.6838496s req_ids:[8, 400] +DEBUG 06-24 20:03:51 [manager.py:391] +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:284.7471237182617ms total_cost_time:284.7902774810791ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5563 prompt_cache_len:5151 prompt_cache_ratio:0.9259392414165019 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:279.0682315826416ms total_cost_time:279.09398078918457ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5520 prompt_cache_len:5145 prompt_cache_ratio:0.9320652173913043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 +DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:51 [batch.py:51] router release req id 8 +INFO 06-24 20:03:51 [batch.py:51] router release req id 400 +INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.04391121864318848 s +INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.03834700584411621 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.0454249382019043 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.04142618179321289 s +DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=330447073011267437104863266520286198877, time:1750766631.8156574s req_ids:[8, 400] +DEBUG 06-24 20:03:51 [manager.py:391] +DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 current batch size: 2 +DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:116.67895317077637ms total_cost_time:116.72306060791016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5564 prompt_cache_len:5151 prompt_cache_ratio:0.9257728253055356 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 +INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:123.31032752990723ms total_cost_time:123.34966659545898ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5521 prompt_cache_len:5145 prompt_cache_ratio:0.9318963955805107 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 +DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:51 [batch.py:51] router release req id 8 +INFO 06-24 20:03:51 [batch.py:51] router release req id 400 +INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.05378365516662598 s +INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.04241490364074707 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.05529165267944336 s +INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.045420169830322266 s +DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=314057218962455482309286796550956194582, time:1750766631.946214s req_ids:[8, 400] +DEBUG 06-24 20:03:51 [manager.py:391] +ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:132.40289688110352ms total_cost_time:132.4610710144043ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5565 prompt_cache_len:5151 prompt_cache_ratio:0.9256064690026954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:121.3080883026123ms total_cost_time:121.34933471679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5522 prompt_cache_len:5145 prompt_cache_ratio:0.9317276349148859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 +DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:52 [batch.py:51] router release req id 8 +INFO 06-24 20:03:52 [batch.py:51] router release req id 400 +INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.04295539855957031 s +INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.03507637977600098 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.04437541961669922 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.038108110427856445 s +DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=229062865899602578597511652248994862537, time:1750766632.0796459s req_ids:[8, 400] +DEBUG 06-24 20:03:52 [manager.py:391] +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:108.74557495117188ms total_cost_time:108.79230499267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5566 prompt_cache_len:5151 prompt_cache_ratio:0.9254401724757456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:113.79718780517578ms total_cost_time:113.83748054504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5523 prompt_cache_len:5145 prompt_cache_ratio:0.9315589353612167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 +DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:52 [batch.py:51] router release req id 8 +INFO 06-24 20:03:52 [batch.py:51] router release req id 400 +INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.05973100662231445 s +INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.04776358604431152 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.0613408088684082 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.05068635940551758 s +DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=41358319528901141233982019330024275285, time:1750766632.2092133s req_ids:[8, 400] +DEBUG 06-24 20:03:52 [manager.py:391] +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:138.08107376098633ms total_cost_time:138.1094455718994ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:5567 prompt_cache_len:5151 prompt_cache_ratio:0.9252739356924735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:125.89120864868164ms total_cost_time:125.90932846069336ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:5524 prompt_cache_len:5145 prompt_cache_ratio:0.9313902968863143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 +DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:52 [batch.py:51] router release req id 8 +INFO 06-24 20:03:52 [batch.py:51] router release req id 400 +INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.04565072059631348 s +INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.042510032653808594 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.04734373092651367 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.045532941818237305 s +DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=221491810908273780899804798753281648965, time:1750766632.3402593s req_ids:[8, 400] +DEBUG 06-24 20:03:52 [manager.py:391] +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:105.50785064697266ms total_cost_time:105.53336143493652ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5568 prompt_cache_len:5151 prompt_cache_ratio:0.9251077586206896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:119.49658393859863ms total_cost_time:119.51732635498047ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:5525 prompt_cache_len:5145 prompt_cache_ratio:0.9312217194570136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 +DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:52 [batch.py:51] router release req id 8 +INFO 06-24 20:03:52 [batch.py:51] router release req id 400 +INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.06690049171447754 s +INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.050124168395996094 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.06833982467651367 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.053025007247924805 s +DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=140035562548706990595932859983144222185, time:1750766632.4720235s req_ids:[8, 400] +DEBUG 06-24 20:03:52 [manager.py:391] +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:149.66607093811035ms total_cost_time:149.68609809875488ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:5569 prompt_cache_len:5151 prompt_cache_ratio:0.9249416412282276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:306.80036544799805ms total_cost_time:306.84638023376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5526 prompt_cache_len:5145 prompt_cache_ratio:0.9310532030401737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 +DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:52 [batch.py:51] router release req id 8 +INFO 06-24 20:03:52 [batch.py:51] router release req id 400 +INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.2213153839111328 s +INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.04813241958618164 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.22272658348083496 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.051221370697021484 s +DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=25093827216994407787298731174224802561, time:1750766632.7835267s req_ids:[8, 400] +DEBUG 06-24 20:03:52 [manager.py:391] +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:298.2964515686035ms total_cost_time:298.3403205871582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5570 prompt_cache_len:5151 prompt_cache_ratio:0.9247755834829443 mtp_avg_token_per_step:1.0 +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:127.93445587158203ms total_cost_time:127.97045707702637ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5527 prompt_cache_len:5145 prompt_cache_ratio:0.9308847476026778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 +DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:52 [batch.py:51] router release req id 8 +INFO 06-24 20:03:52 [batch.py:51] router release req id 400 +INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.0499424934387207 s +INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.04373764991760254 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.05132174491882324 s +INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.04667043685913086 s +DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=118070640230890672790058146712677872330, time:1750766632.915787s req_ids:[8, 400] +DEBUG 06-24 20:03:52 [manager.py:391] +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:129.42767143249512ms total_cost_time:129.4727325439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5571 prompt_cache_len:5151 prompt_cache_ratio:0.9246095853527194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:123.40521812438965ms total_cost_time:123.43168258666992ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5528 prompt_cache_len:5145 prompt_cache_ratio:0.9307163531114327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 +INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 8 +INFO 06-24 20:03:53 [batch.py:51] router release req id 400 +INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.04362940788269043 s +INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.03808116912841797 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.04511308670043945 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.040969133377075195 s +DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=246547327416313757871574971943264278749, time:1750766633.0468106s req_ids:[8, 400] +DEBUG 06-24 20:03:53 [manager.py:391] +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:113.75117301940918ms total_cost_time:113.79432678222656ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5572 prompt_cache_len:5151 prompt_cache_ratio:0.9244436468054559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:131.791353225708ms total_cost_time:131.82926177978516ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5529 prompt_cache_len:5145 prompt_cache_ratio:0.9305480195333695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 8 +INFO 06-24 20:03:53 [batch.py:51] router release req id 400 +INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.07991933822631836 s +INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.056919097900390625 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.08159661293029785 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.0599365234375 s +INFO 06-24 20:03:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:03:53 [statics_utils.py:24] mean first cost: 199.25973987296916 ms +INFO 06-24 20:03:53 [statics_utils.py:24] mean per token cost: 0.1913509480589316 ms +DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=271533157199108687666199393799358983866, time:1750766633.2024448s req_ids:[8, 400] +DEBUG 06-24 20:03:53 [manager.py:391] +INFO 06-24 20:03:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:151.09562873840332ms total_cost_time:151.1397361755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5573 prompt_cache_len:5151 prompt_cache_ratio:0.9242777678090794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:137.0244026184082ms total_cost_time:137.06111907958984ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5530 prompt_cache_len:5145 prompt_cache_ratio:0.930379746835443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 8 +INFO 06-24 20:03:53 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_999 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_999 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_999 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_999 +INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.10678935050964355 s +DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=118189855342107601929975265918989066578, time:1750766633.3677108s req_ids:[8] +DEBUG 06-24 20:03:53 [manager.py:391] +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.10851716995239258 s +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:152.89044380187988ms total_cost_time:152.93121337890625ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5574 prompt_cache_len:5151 prompt_cache_ratio:0.9241119483315393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 8 +INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.1755075454711914 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.17688369750976562 s +DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=206890208886519794914466304678742276031, time:1750766633.4648275s req_ids:[400] +DEBUG 06-24 20:03:53 [manager.py:391] +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_999 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_999 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_999 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_999 +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:390.4855251312256ms total_cost_time:390.5303478240967ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5531 prompt_cache_len:5145 prompt_cache_ratio:0.930211534984632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 400 +INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.26761531829833984 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.26903581619262695 s +DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=300086502488774677854934614702268158373, time:1750766633.7060769s req_ids:[8] +DEBUG 06-24 20:03:53 [manager.py:391] +DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:319.516658782959ms total_cost_time:319.55838203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5575 prompt_cache_len:5151 prompt_cache_ratio:0.9239461883408072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 8 +INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.02121567726135254 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.02264857292175293 s +DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=298668374296881365915358082417663999794, time:1750766633.784471s req_ids:[8] +DEBUG 06-24 20:03:53 [manager.py:391] +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:72.21007347106934ms total_cost_time:72.24917411804199ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5576 prompt_cache_len:5151 prompt_cache_ratio:0.9237804878048781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.20575737953186035 s +INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.20719575881958008 s +DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=120711897993918734575775504122451669840, time:1750766633.8951025s req_ids:[400] +DEBUG 06-24 20:03:53 [manager.py:391] +ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:277.09007263183594ms total_cost_time:277.1332263946533ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5532 prompt_cache_len:5145 prompt_cache_ratio:0.9300433839479393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 +DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:53 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.20678138732910156 s +INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.2082827091217041 s +DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=175617659414096469247437226850579789817, time:1750766634.04318s req_ids:[8] +DEBUG 06-24 20:03:54 [manager.py:391] +ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:268.36490631103516ms total_cost_time:268.40901374816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5577 prompt_cache_len:5151 prompt_cache_ratio:0.9236148466917697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 +DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:54 [manager.py:224] router recive req id 400 cost time 0.2080368995666504 s +INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 400 cost time 0.20966148376464844 s +DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=191458082002302381861759455538494431665, time:1750766634.1755795s req_ids:[400] +DEBUG 06-24 20:03:54 [manager.py:391] +ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:279.50215339660645ms total_cost_time:279.54721450805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5533 prompt_cache_len:5145 prompt_cache_ratio:0.9298752936923911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 +DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:54 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.20798039436340332 s +INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.20949530601501465 s +DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=120231025276007316275672584283598805958, time:1750766634.3156736s req_ids:[8] +DEBUG 06-24 20:03:54 [manager.py:391] +ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:284.0301990509033ms total_cost_time:284.0754985809326ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5578 prompt_cache_len:5151 prompt_cache_ratio:0.9234492649695232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 +DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:54 [manager.py:224] router recive req id 400 cost time 0.20765185356140137 s +INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 400 cost time 0.2092914581298828 s +DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=209445341372169626103509412408618463930, time:1750766634.467842s req_ids:[400] +DEBUG 06-24 20:03:54 [manager.py:391] +DEBUG 06-24 20:03:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 66533.460 tokens/s +DEBUG 06-24 20:03:54 [stats.py:37] Avg prompt tokens throughput: 66509.391 tokens/s +DEBUG 06-24 20:03:54 [stats.py:37] Avg generate tokens throughput: 24.070 tokens/s +ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 first_token_cost:292.45805740356445ms total_cost_time:292.50192642211914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5534 prompt_cache_len:5145 prompt_cache_ratio:0.9297072641850379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 +DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:54 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.2077946662902832 s +INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.20938658714294434 s +DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=260413419221066590120254872903618999432, time:1750766634.6162775s req_ids:[8] +DEBUG 06-24 20:03:54 [manager.py:391] +ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:295.87769508361816ms total_cost_time:295.928955078125ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:5579 prompt_cache_len:5151 prompt_cache_ratio:0.9232837426062018 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 +DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:54 [manager.py:224] router recive req id 400 cost time 0.20644187927246094 s +INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 400 cost time 0.2079486846923828 s +DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=231282726551859208564745742786319460151, time:1750766634.7701323s req_ids:[400] +DEBUG 06-24 20:03:54 [manager.py:391] +ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 first_token_cost:298.5856533050537ms total_cost_time:298.6283302307129ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5535 prompt_cache_len:5145 prompt_cache_ratio:0.9295392953929539 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 +DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:54 [batch.py:51] router release req id 400 +DEBUG 06-24 20:03:54 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:54 [manager.py:283] +DEBUG 06-24 20:03:54 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:54 [manager.py:284] +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.20926380157470703 s +INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.21129131317138672 s +DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=5916925452140600368580878633475366511, time:1750766634.921155s req_ids:[8] +DEBUG 06-24 20:03:54 [manager.py:391] +ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:296.6790199279785ms total_cost_time:296.7219352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5580 prompt_cache_len:5151 prompt_cache_ratio:0.9231182795698925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 +DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:55 [manager.py:224] router recive req id 400 cost time 0.20787477493286133 s +INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 400 cost time 0.20943260192871094 s +DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=131405400482022463597985776673580920487, time:1750766635.073419s req_ids:[400] +DEBUG 06-24 20:03:55 [manager.py:391] +ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 first_token_cost:294.4645881652832ms total_cost_time:294.5091724395752ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5536 prompt_cache_len:5145 prompt_cache_ratio:0.929371387283237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 +DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:55 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:55 [manager.py:224] router recive req id 8 cost time 0.2076876163482666 s +INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 8 cost time 0.20923233032226562 s +DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=7300979067914406541592100823111090822, time:1750766635.2207983s req_ids:[8] +DEBUG 06-24 20:03:55 [manager.py:391] +ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:290.7235622406006ms total_cost_time:290.7674312591553ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5581 prompt_cache_len:5151 prompt_cache_ratio:0.9229528758287046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 +DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:55 [manager.py:224] router recive req id 400 cost time 0.20677852630615234 s +INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 400 cost time 0.20830726623535156 s +DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=152348736204183644698880921748109334448, time:1750766635.3683143s req_ids:[400] +DEBUG 06-24 20:03:55 [manager.py:391] +ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 first_token_cost:296.7367172241211ms total_cost_time:296.7798709869385ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5537 prompt_cache_len:5145 prompt_cache_ratio:0.9292035398230089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 +DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:55 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:55 [manager.py:224] router recive req id 8 cost time 0.20754265785217285 s +INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 8 cost time 0.20917081832885742 s +DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=82231552322518582949077899934064058113, time:1750766635.5225294s req_ids:[8] +DEBUG 06-24 20:03:55 [manager.py:391] +ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 first_token_cost:299.1311550140381ms total_cost_time:299.1750240325928ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5582 prompt_cache_len:5151 prompt_cache_ratio:0.9227875313507703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 +DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:55 [manager.py:224] router recive req id 400 cost time 0.2080698013305664 s +INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 400 cost time 0.20959138870239258 s +DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=194532406525690553316439469078000011290, time:1750766635.6719136s req_ids:[400] +DEBUG 06-24 20:03:55 [manager.py:391] +ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 first_token_cost:293.64848136901855ms total_cost_time:293.69115829467773ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5538 prompt_cache_len:5145 prompt_cache_ratio:0.929035752979415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 +DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:55 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:55 [manager.py:224] router recive req id 8 cost time 0.20764589309692383 s +INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 8 cost time 0.20922613143920898 s +DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=89056654487569766537445314901610860260, time:1750766635.8223937s req_ids:[8] +DEBUG 06-24 20:03:55 [manager.py:391] +ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 first_token_cost:300.92930793762207ms total_cost_time:300.97246170043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5583 prompt_cache_len:5151 prompt_cache_ratio:0.922622246104245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 +DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:56 [manager.py:224] router recive req id 400 cost time 0.4081413745880127 s +INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 400 cost time 0.40973734855651855 s +DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=159253867299297248986598983857718176201, time:1750766636.1493287s req_ids:[400] +DEBUG 06-24 20:03:56 [manager.py:391] +ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:56 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 first_token_cost:470.92437744140625ms total_cost_time:470.966100692749ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5539 prompt_cache_len:5145 prompt_cache_ratio:0.9288680267196244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 +DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:56 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:56 [manager.py:224] router recive req id 8 cost time 0.4112677574157715 s +INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 8 cost time 0.4130704402923584 s +DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=290322282031980276679568861411399006067, time:1750766636.335271s req_ids:[8] +DEBUG 06-24 20:03:56 [manager.py:391] +ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 first_token_cost:519.6452140808105ms total_cost_time:519.6881294250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5584 prompt_cache_len:5151 prompt_cache_ratio:0.9224570200573066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 +DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:56 [manager.py:224] router recive req id 400 cost time 0.30808353424072266 s +INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 400 cost time 0.30986714363098145 s +DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=213367066965451755239282467278423336233, time:1750766636.5411975s req_ids:[400] +DEBUG 06-24 20:03:56 [manager.py:391] +ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:56 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 first_token_cost:406.42809867858887ms total_cost_time:406.47006034851074ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5540 prompt_cache_len:5145 prompt_cache_ratio:0.9287003610108303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 +DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:56 [batch.py:51] router release req id 400 +INFO 06-24 20:03:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:56 [manager.py:224] router recive req id 8 cost time 0.30820584297180176 s +INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 8 cost time 0.31000399589538574 s +DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=36784291884233071155657384924118802316, time:1750766636.748188s req_ids:[8] +DEBUG 06-24 20:03:56 [manager.py:391] +ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 first_token_cost:409.0914726257324ms total_cost_time:409.1348648071289ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5585 prompt_cache_len:5151 prompt_cache_ratio:0.9222918531781558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 +DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:56 [manager.py:224] router recive req id 400 cost time 0.3094446659088135 s +INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 400 cost time 0.3112037181854248 s +DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=47933076243050267512741088183108664072, time:1750766636.95525s req_ids:[400] +DEBUG 06-24 20:03:56 [manager.py:391] +ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 first_token_cost:411.13829612731934ms total_cost_time:411.2122058868408ms,out_token_counter:1 mean_per_token_cost_time: 0.07390975952148438ms prompt_token_num:5541 prompt_cache_len:5145 prompt_cache_ratio:0.9285327558202491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 +DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:57 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:57 [manager.py:224] router recive req id 8 cost time 0.30771684646606445 s +INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 8 cost time 0.30957651138305664 s +DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=85893696854207356519531292520676573317, time:1750766637.164642s req_ids:[8] +DEBUG 06-24 20:03:57 [manager.py:391] +ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 first_token_cost:412.52946853637695ms total_cost_time:412.57214546203613ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5586 prompt_cache_len:5151 prompt_cache_ratio:0.9221267454350162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 +DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:57 [manager.py:224] router recive req id 400 cost time 0.3083832263946533 s +INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 400 cost time 0.3101034164428711 s +DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=133049827962665090558556615733996681227, time:1750766637.3753276s req_ids:[400] +DEBUG 06-24 20:03:57 [manager.py:391] +ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 first_token_cost:414.25585746765137ms total_cost_time:414.3080711364746ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:5542 prompt_cache_len:5145 prompt_cache_ratio:0.9283652111151209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 +DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:57 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:57 [manager.py:224] router recive req id 8 cost time 0.3082447052001953 s +INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 8 cost time 0.3100576400756836 s +DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=184752098914869427963870186203463903736, time:1750766637.5862875s req_ids:[8] +DEBUG 06-24 20:03:57 [manager.py:391] +DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 first_token_cost:416.69726371765137ms total_cost_time:416.72635078430176ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:5587 prompt_cache_len:5151 prompt_cache_ratio:0.9219616967961339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 +DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:57 [manager.py:224] router recive req id 400 cost time 0.30835843086242676 s +INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 400 cost time 0.31026625633239746 s +DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=337284436783423636467000215915826243165, time:1750766637.797384s req_ids:[400] +DEBUG 06-24 20:03:57 [manager.py:391] +ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:57 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 first_token_cost:415.9364700317383ms total_cost_time:415.9808158874512ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5543 prompt_cache_len:5145 prompt_cache_ratio:0.9281977268627097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 +DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:57 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:57 [manager.py:224] router recive req id 8 cost time 0.30741071701049805 s +INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 8 cost time 0.30914735794067383 s +DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=183381485783916380044657226258425736513, time:1750766638.0071425s req_ids:[8] +DEBUG 06-24 20:03:58 [manager.py:391] +ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 first_token_cost:416.25499725341797ms total_cost_time:416.29767417907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5588 prompt_cache_len:5151 prompt_cache_ratio:0.9217967072297781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 +DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:58 [manager.py:224] router recive req id 400 cost time 0.30820560455322266 s +INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 400 cost time 0.31025171279907227 s +DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=325009666800542668627770362727147024858, time:1750766638.217403s req_ids:[400] +DEBUG 06-24 20:03:58 [manager.py:391] +ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:58 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 first_token_cost:416.4443016052246ms total_cost_time:416.4876937866211ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5544 prompt_cache_len:5145 prompt_cache_ratio:0.928030303030303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 +DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:58 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:58 [manager.py:224] router recive req id 8 cost time 0.3092689514160156 s +INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 8 cost time 0.311384916305542 s +DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=320695993755427275267903011700269169569, time:1750766638.4285886s req_ids:[8] +DEBUG 06-24 20:03:58 [manager.py:391] +ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 first_token_cost:416.8996810913086ms total_cost_time:416.9437885284424ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5589 prompt_cache_len:5151 prompt_cache_ratio:0.9216317767042405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 +DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:58 [manager.py:224] router recive req id 400 cost time 0.30898094177246094 s +INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 400 cost time 0.31093597412109375 s +DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=116259232714662220742437296011044703608, time:1750766638.6391995s req_ids:[400] +DEBUG 06-24 20:03:58 [manager.py:391] +ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:58 [manager.py:162] detoken release req id 400 +INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 first_token_cost:415.13705253601074ms total_cost_time:415.1802062988281ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5545 prompt_cache_len:5145 prompt_cache_ratio:0.9278629395852119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 +DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:58 [batch.py:51] router release req id 400 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:58 [manager.py:224] router recive req id 8 cost time 0.3078181743621826 s +INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 8 cost time 0.3097081184387207 s +DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=313887881215751464440488569991646795119, time:1750766638.8514457s req_ids:[8] +DEBUG 06-24 20:03:58 [manager.py:391] +ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 first_token_cost:584.6686363220215ms total_cost_time:584.7115516662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5590 prompt_cache_len:5151 prompt_cache_ratio:0.9214669051878355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 +DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.10744333267211914 s +INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.10941529273986816 s +DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=170229261247117821417554070380268408229, time:1750766639.2209778s req_ids:[8] +DEBUG 06-24 20:03:59 [manager.py:391] +ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:193.56203079223633ms total_cost_time:193.603515625ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5591 prompt_cache_len:5151 prompt_cache_ratio:0.9213020926489001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 +DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.1070556640625 s +INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.10885930061340332 s +DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=256572012918454229085549620369003075712, time:1750766639.428526s req_ids:[8] +DEBUG 06-24 20:03:59 [manager.py:391] +ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:204.75411415100098ms total_cost_time:204.8037052154541ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:5592 prompt_cache_len:5151 prompt_cache_ratio:0.921137339055794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 +DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.10832595825195312 s +INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.11004018783569336 s +DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=7017378000446258317538721738950539737, time:1750766639.6379635s req_ids:[8] +DEBUG 06-24 20:03:59 [manager.py:391] +ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:204.96273040771484ms total_cost_time:205.00564575195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5593 prompt_cache_len:5151 prompt_cache_ratio:0.9209726443768997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 +DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.10710859298706055 s +INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.10886669158935547 s +DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=207812318088513596096976772340931268235, time:1750766639.845761s req_ids:[8] +DEBUG 06-24 20:03:59 [manager.py:391] +ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:204.00428771972656ms total_cost_time:204.05030250549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5594 prompt_cache_len:5151 prompt_cache_ratio:0.9208080085806221 mtp_avg_token_per_step:1.0 +INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 +DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:03:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:00 [manager.py:224] router recive req id 8 cost time 0.10685467720031738 s +INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 8 cost time 0.1087028980255127 s +DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=119500951787489351729722916769637496863, time:1750766640.058325s req_ids:[8] +DEBUG 06-24 20:04:00 [manager.py:391] +ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:207.11684226989746ms total_cost_time:207.15928077697754ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5595 prompt_cache_len:5151 prompt_cache_ratio:0.9206434316353888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 +DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:00 [batch.py:51] router release req id 8 +INFO 06-24 20:04:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:00 [manager.py:224] router recive req id 400 cost time 1.5198016166687012 s +INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 400 cost time 1.5215346813201904 s +DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=312625324778887380231774992193424212118, time:1750766640.267414s req_ids:[400] +DEBUG 06-24 20:04:00 [manager.py:391] +ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:00 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 first_token_cost:1622.63822555542ms total_cost_time:1622.6818561553955ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5546 prompt_cache_len:5145 prompt_cache_ratio:0.927695636494771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 +DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:00 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:00 [manager.py:224] router recive req id 8 cost time 0.3097348213195801 s +INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 8 cost time 0.31153059005737305 s +DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=191448337359629793380209850348657511519, time:1750766640.4759295s req_ids:[8] +DEBUG 06-24 20:04:00 [manager.py:391] +ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 first_token_cost:414.45374488830566ms total_cost_time:414.49880599975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5596 prompt_cache_len:5151 prompt_cache_ratio:0.9204789135096497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 +DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:00 [manager.py:224] router recive req id 400 cost time 0.3084886074066162 s +INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 400 cost time 0.3103458881378174 s +DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=167887182440130761946522220894601097220, time:1750766640.687438s req_ids:[400] +DEBUG 06-24 20:04:00 [manager.py:391] +ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:00 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 first_token_cost:412.19377517700195ms total_cost_time:412.23883628845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5547 prompt_cache_len:5145 prompt_cache_ratio:0.9275283937263386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 +DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:00 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:00 [manager.py:224] router recive req id 8 cost time 0.3095569610595703 s +INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 8 cost time 0.3113560676574707 s +DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=5289778497843780574292314570987780027, time:1750766640.8948448s req_ids:[8] +DEBUG 06-24 20:04:00 [manager.py:391] +ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 first_token_cost:413.35344314575195ms total_cost_time:413.39898109436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5597 prompt_cache_len:5151 prompt_cache_ratio:0.9203144541718777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 +DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:01 [manager.py:224] router recive req id 400 cost time 0.30982255935668945 s +INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 400 cost time 0.3119990825653076 s +DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=207913426562109027215512195925360584916, time:1750766641.1051707s req_ids:[400] +DEBUG 06-24 20:04:01 [manager.py:391] +ERROR 06-24 20:04:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:01 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 first_token_cost:409.6224308013916ms total_cost_time:409.6653461456299ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5548 prompt_cache_len:5145 prompt_cache_ratio:0.9273612112472963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 +DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:01 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:01 [manager.py:224] router recive req id 8 cost time 0.3093705177307129 s +INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 8 cost time 0.31146836280822754 s +DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=51395948235105543153758312289523783491, time:1750766641.3151486s req_ids:[8] +DEBUG 06-24 20:04:01 [manager.py:391] +ERROR 06-24 20:04:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 first_token_cost:412.16206550598145ms total_cost_time:412.2049808502197ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5598 prompt_cache_len:5151 prompt_cache_ratio:0.920150053590568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:8 +DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:01 [manager.py:224] router recive req id 400 cost time 0.3088245391845703 s +INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 400 cost time 0.3106422424316406 s +DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=181999333882499944085172552825451111517, time:1750766641.5259075s req_ids:[400] +DEBUG 06-24 20:04:01 [manager.py:391] +ERROR 06-24 20:04:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:01 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 first_token_cost:621.3717460632324ms total_cost_time:621.4141845703125ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5549 prompt_cache_len:5145 prompt_cache_ratio:0.9271940890250495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 +DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:01 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:01 [manager.py:224] router recive req id 8 cost time 0.5109429359436035 s +INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 8 cost time 0.5127873420715332 s +DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=34063390236824430431743071058540932817, time:1750766641.9308975s req_ids:[8] +DEBUG 06-24 20:04:01 [manager.py:391] +ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:8 first_token_cost:611.9697093963623ms total_cost_time:612.0131015777588ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5599 prompt_cache_len:5151 prompt_cache_ratio:0.9199857117342383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 +DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:02 [manager.py:224] router recive req id 400 cost time 0.30853796005249023 s +INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 400 cost time 0.3106873035430908 s +DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=122162866983859921578777979631834896650, time:1750766642.1407092s req_ids:[400] +DEBUG 06-24 20:04:02 [manager.py:391] +ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:02 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 first_token_cost:404.829740524292ms total_cost_time:404.8733711242676ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5550 prompt_cache_len:5145 prompt_cache_ratio:0.927027027027027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 +DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:02 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:02 [manager.py:224] router recive req id 8 cost time 0.3110980987548828 s +INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 8 cost time 0.31302738189697266 s +DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=178531426736335887918101514346955716625, time:1750766642.3484359s req_ids:[8] +DEBUG 06-24 20:04:02 [manager.py:391] +ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 first_token_cost:413.24806213378906ms total_cost_time:413.29169273376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5600 prompt_cache_len:5151 prompt_cache_ratio:0.9198214285714286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 +DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:02 [manager.py:224] router recive req id 400 cost time 0.309192419052124 s +INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 400 cost time 0.31116652488708496 s +DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=9356760427016274025068612944708979669, time:1750766642.560211s req_ids:[400] +DEBUG 06-24 20:04:02 [manager.py:391] +ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:02 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 first_token_cost:414.3857955932617ms total_cost_time:414.4296646118164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5551 prompt_cache_len:5145 prompt_cache_ratio:0.926860025220681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 +DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:02 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:02 [manager.py:224] router recive req id 8 cost time 0.3094029426574707 s +INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 8 cost time 0.311328649520874 s +DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=150881320521901634364777904695554824780, time:1750766642.7679093s req_ids:[8] +DEBUG 06-24 20:04:02 [manager.py:391] +ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 first_token_cost:409.25073623657227ms total_cost_time:409.2881679534912ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5601 prompt_cache_len:5151 prompt_cache_ratio:0.9196572040707016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 +DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:02 [manager.py:224] router recive req id 400 cost time 0.3088548183441162 s +INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 400 cost time 0.3109605312347412 s +DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=201395187720541241109434204510260875519, time:1750766642.9753573s req_ids:[400] +DEBUG 06-24 20:04:02 [manager.py:391] +ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:03 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 first_token_cost:409.70611572265625ms total_cost_time:409.74998474121094ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5552 prompt_cache_len:5145 prompt_cache_ratio:0.926693083573487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 +DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:03 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:03 [manager.py:224] router recive req id 8 cost time 0.307326078414917 s +INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 8 cost time 0.30927395820617676 s +DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=207794210652918811786776671903815756052, time:1750766643.1843364s req_ids:[8] +DEBUG 06-24 20:04:03 [manager.py:391] +DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 first_token_cost:414.50023651123047ms total_cost_time:414.54410552978516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5602 prompt_cache_len:5151 prompt_cache_ratio:0.9194930382006427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 +DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:03 [manager.py:224] router recive req id 400 cost time 0.30852389335632324 s +INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 400 cost time 0.3105156421661377 s +DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=303746443963813672154060956251192604052, time:1750766643.3955302s req_ids:[400] +DEBUG 06-24 20:04:03 [manager.py:391] +ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:03 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 first_token_cost:414.3822193145752ms total_cost_time:414.4270420074463ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5553 prompt_cache_len:5145 prompt_cache_ratio:0.9265262020529443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 +DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:03 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:03 [manager.py:224] router recive req id 8 cost time 0.3101041316986084 s +INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 8 cost time 0.31215476989746094 s +DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=143059255189025818730587806413496586422, time:1750766643.6039033s req_ids:[8] +DEBUG 06-24 20:04:03 [manager.py:391] +ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 first_token_cost:413.632869720459ms total_cost_time:413.67626190185547ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5603 prompt_cache_len:5151 prompt_cache_ratio:0.919328930929859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 +DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:03 [manager.py:224] router recive req id 400 cost time 0.3083205223083496 s +INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 400 cost time 0.3108999729156494 s +DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=159119699053717267174256553391988467768, time:1750766643.8156955s req_ids:[400] +DEBUG 06-24 20:04:03 [manager.py:391] +ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:03 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 first_token_cost:414.597749710083ms total_cost_time:414.6406650543213ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5554 prompt_cache_len:5145 prompt_cache_ratio:0.9263593806265754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 +DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:03 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:03 [manager.py:224] router recive req id 8 cost time 0.30870532989501953 s +INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 8 cost time 0.310497522354126 s +DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=137408571669613238069218075157386923670, time:1750766644.026565s req_ids:[8] +DEBUG 06-24 20:04:04 [manager.py:391] +ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 first_token_cost:416.58830642700195ms total_cost_time:416.6300296783447ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5604 prompt_cache_len:5151 prompt_cache_ratio:0.9191648822269807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 +DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:04 [manager.py:224] router recive req id 400 cost time 0.5117514133453369 s +INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 400 cost time 0.5139124393463135 s +DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=311570343597427374078576646439964112696, time:1750766644.4392066s req_ids:[400] +DEBUG 06-24 20:04:04 [manager.py:391] +ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:04:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 26054.055 tokens/s +DEBUG 06-24 20:04:04 [stats.py:37] Avg prompt tokens throughput: 26044.605 tokens/s +DEBUG 06-24 20:04:04 [stats.py:37] Avg generate tokens throughput: 9.450 tokens/s +INFO 06-24 20:04:04 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 first_token_cost:620.6552982330322ms total_cost_time:620.6998825073242ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5555 prompt_cache_len:5145 prompt_cache_ratio:0.9261926192619262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 +DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:04 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:04 [manager.py:224] router recive req id 8 cost time 0.5112729072570801 s +INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 8 cost time 0.5130696296691895 s +DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=258484979404999532749697903307901579785, time:1750766644.654779s req_ids:[8] +DEBUG 06-24 20:04:04 [manager.py:391] +DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 first_token_cost:623.0220794677734ms total_cost_time:623.0645179748535ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5605 prompt_cache_len:5151 prompt_cache_ratio:0.9190008920606602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 +DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:04 [manager.py:224] router recive req id 400 cost time 0.30897068977355957 s +INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 400 cost time 0.31076908111572266 s +DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=202936217410018198300462329942163462523, time:1750766644.8643155s req_ids:[400] +DEBUG 06-24 20:04:04 [manager.py:391] +ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:04 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 first_token_cost:415.6372547149658ms total_cost_time:415.679931640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5556 prompt_cache_len:5145 prompt_cache_ratio:0.9260259179265659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 +DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:04 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:05 [manager.py:224] router recive req id 8 cost time 0.3082394599914551 s +INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 8 cost time 0.3101339340209961 s +DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=333318630767069807526149463415231793460, time:1750766645.070772s req_ids:[8] +DEBUG 06-24 20:04:05 [manager.py:391] +ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 first_token_cost:410.3398323059082ms total_cost_time:410.3813171386719ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5606 prompt_cache_len:5151 prompt_cache_ratio:0.9188369603995719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 +DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:05 [manager.py:224] router recive req id 400 cost time 0.3093225955963135 s +INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 400 cost time 0.31135058403015137 s +DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=292298541697273391556195931370633541622, time:1750766645.280804s req_ids:[400] +DEBUG 06-24 20:04:05 [manager.py:391] +ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:05 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 first_token_cost:409.59906578063965ms total_cost_time:409.64221954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5557 prompt_cache_len:5145 prompt_cache_ratio:0.9258592765880871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 +DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:05 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:05 [manager.py:224] router recive req id 8 cost time 0.3061075210571289 s +INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 8 cost time 0.3079698085784912 s +DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=220642754003056481251368748772506181474, time:1750766645.4900327s req_ids:[8] +DEBUG 06-24 20:04:05 [manager.py:391] +ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 first_token_cost:414.9055480957031ms total_cost_time:414.9479866027832ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5607 prompt_cache_len:5151 prompt_cache_ratio:0.918673087212413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 +DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:05 [manager.py:224] router recive req id 400 cost time 0.3074033260345459 s +INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 400 cost time 0.30938100814819336 s +DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=97411769377960097701004443046286862738, time:1750766645.7023854s req_ids:[400] +DEBUG 06-24 20:04:05 [manager.py:391] +ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:05 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 first_token_cost:416.32556915283203ms total_cost_time:416.3699150085449ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5558 prompt_cache_len:5145 prompt_cache_ratio:0.9256926952141058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 +DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:05 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:05 [manager.py:224] router recive req id 8 cost time 0.3090088367462158 s +INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 8 cost time 0.31144118309020996 s +DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=127216266947550499551767509357218711452, time:1750766645.9182296s req_ids:[8] +DEBUG 06-24 20:04:05 [manager.py:391] +ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 first_token_cost:424.82495307922363ms total_cost_time:424.8678684234619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5608 prompt_cache_len:5151 prompt_cache_ratio:0.918509272467903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:8 +DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.3076171875 s +INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.31009960174560547 s +DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=70710339150044694638913242786478349956, time:1750766646.0963833s req_ids:[400] +DEBUG 06-24 20:04:06 [manager.py:391] +ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 first_token_cost:384.6170902252197ms total_cost_time:384.6616744995117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5559 prompt_cache_len:5145 prompt_cache_ratio:0.9255261737722612 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 +DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:06 [batch.py:51] router release req id 400 +INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10553979873657227 s +INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10801196098327637 s +DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=184800006221488638324880188008560939079, time:1750766646.2977662s req_ids:[400] +DEBUG 06-24 20:04:06 [manager.py:391] +ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:204.96511459350586ms total_cost_time:205.00874519348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5560 prompt_cache_len:5145 prompt_cache_ratio:0.9253597122302158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 +DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:06 [batch.py:51] router release req id 400 +INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10636520385742188 s +INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10876321792602539 s +DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=554085911765599510962848408216386845, time:1750766646.5093455s req_ids:[400] +DEBUG 06-24 20:04:06 [manager.py:391] +ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:206.5277099609375ms total_cost_time:206.57110214233398ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5561 prompt_cache_len:5145 prompt_cache_ratio:0.9251933105556555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 +DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:06 [batch.py:51] router release req id 400 +INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10669255256652832 s +INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10873794555664062 s +DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=200924581100489386741797026421560403360, time:1750766646.7189913s req_ids:[400] +DEBUG 06-24 20:04:06 [manager.py:391] +ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:202.8677463531494ms total_cost_time:202.9109001159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5562 prompt_cache_len:5145 prompt_cache_ratio:0.9250269687162891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 +DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:06 [batch.py:51] router release req id 400 +INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10732865333557129 s +INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10935163497924805 s +DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=200704122696106381218141459743677525434, time:1750766646.9346104s req_ids:[400] +DEBUG 06-24 20:04:06 [manager.py:391] +ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:206.53438568115234ms total_cost_time:206.57777786254883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5563 prompt_cache_len:5145 prompt_cache_ratio:0.924860686679849 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 +DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:07 [batch.py:51] router release req id 400 +INFO 06-24 20:04:07 [manager.py:224] router recive req id 400 cost time 0.10532832145690918 s +INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 400 cost time 0.10737133026123047 s +DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=83912536543841667595844539094836774719, time:1750766647.1395905s req_ids:[400] +DEBUG 06-24 20:04:07 [manager.py:391] +ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 first_token_cost:206.1467170715332ms total_cost_time:206.1898708343506ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5564 prompt_cache_len:5145 prompt_cache_ratio:0.9246944644140905 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 +DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:07 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:07 [manager.py:224] router recive req id 8 cost time 1.319875955581665 s +INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 8 cost time 1.3217723369598389 s +DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=48203246045638171774164962894794333738, time:1750766647.3597903s req_ids:[8] +DEBUG 06-24 20:04:07 [manager.py:391] +ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:8 first_token_cost:1436.5203380584717ms total_cost_time:1436.56325340271ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5609 prompt_cache_len:5151 prompt_cache_ratio:0.9183455161347834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 +DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:07 [manager.py:224] router recive req id 400 cost time 0.30842065811157227 s +INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 400 cost time 0.31039977073669434 s +DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=171329892020683745980329694996334406256, time:1750766647.5371072s req_ids:[400] +DEBUG 06-24 20:04:07 [manager.py:391] +ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:07 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 first_token_cost:378.12304496765137ms total_cost_time:378.16762924194336ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5565 prompt_cache_len:5145 prompt_cache_ratio:0.9245283018867925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 +DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:07 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:07 [manager.py:224] router recive req id 8 cost time 0.20805764198303223 s +INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 8 cost time 0.2098839282989502 s +DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=178477640574207755194116447892704888316, time:1750766647.6621356s req_ids:[8] +DEBUG 06-24 20:04:07 [manager.py:391] +ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 first_token_cost:431.0300350189209ms total_cost_time:431.0739040374756ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5610 prompt_cache_len:5151 prompt_cache_ratio:0.9181818181818182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 +DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:07 [manager.py:224] router recive req id 8 cost time 0.10762858390808105 s +INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 8 cost time 0.1094970703125 s +DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=186946421580586713852412184587159165742, time:1750766648.0047326s req_ids:[8] +DEBUG 06-24 20:04:08 [manager.py:391] +ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 first_token_cost:193.50433349609375ms total_cost_time:193.54867935180664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5611 prompt_cache_len:5151 prompt_cache_ratio:0.9180181785777937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 +DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10751056671142578 s +INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.10948300361633301 s +DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=84464561349473505979403028501226303687, time:1750766648.2057552s req_ids:[8] +DEBUG 06-24 20:04:08 [manager.py:391] +ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:201.83396339416504ms total_cost_time:201.8759250640869ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5612 prompt_cache_len:5151 prompt_cache_ratio:0.9178545972915182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 +DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10818624496459961 s +INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.11023426055908203 s +DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=24021638303123893874954171386458452313, time:1750766648.4140673s req_ids:[8] +DEBUG 06-24 20:04:08 [manager.py:391] +ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:207.09514617919922ms total_cost_time:207.1397304534912ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5613 prompt_cache_len:5151 prompt_cache_ratio:0.9176910742918225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 +DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s +INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.11104893684387207 s +DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=270707997427265943533780191389721800738, time:1750766648.6249912s req_ids:[8] +DEBUG 06-24 20:04:08 [manager.py:391] +ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:203.89366149902344ms total_cost_time:203.94277572631836ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:5614 prompt_cache_len:5151 prompt_cache_ratio:0.9175276095475596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 +DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10756587982177734 s +INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.1095733642578125 s +DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=164582218623876241791561632462622767483, time:1750766648.8353434s req_ids:[8] +DEBUG 06-24 20:04:08 [manager.py:391] +ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:205.1072120666504ms total_cost_time:205.13629913330078ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:5615 prompt_cache_len:5151 prompt_cache_ratio:0.9173642030276047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 +DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:09 [manager.py:224] router recive req id 8 cost time 0.10462737083435059 s +INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 8 cost time 0.10654592514038086 s +DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=254881221985780640766236556553831137552, time:1750766649.0447145s req_ids:[8] +DEBUG 06-24 20:04:09 [manager.py:391] +ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:203.81903648376465ms total_cost_time:203.86290550231934ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5616 prompt_cache_len:5151 prompt_cache_ratio:0.9172008547008547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 +DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:09 [manager.py:224] router recive req id 400 cost time 1.621199607849121 s +INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 400 cost time 1.6231095790863037 s +DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=32125577664086894184896364509563615517, time:1750766649.2482486s req_ids:[400] +DEBUG 06-24 20:04:09 [manager.py:391] +ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:09 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 first_token_cost:1720.1716899871826ms total_cost_time:1720.2157974243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5566 prompt_cache_len:5145 prompt_cache_ratio:0.9243621990657563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 +DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:09 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:09 [manager.py:224] router recive req id 8 cost time 0.3096799850463867 s +INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 8 cost time 0.3110342025756836 s +DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=46730805093707675538857214692509971952, time:1750766649.458498s req_ids:[8] +DEBUG 06-24 20:04:09 [manager.py:391] +ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 first_token_cost:412.33348846435547ms total_cost_time:412.3809337615967ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:5617 prompt_cache_len:5151 prompt_cache_ratio:0.9170375645362293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 +DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:09 [manager.py:224] router recive req id 400 cost time 0.3077678680419922 s +INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 400 cost time 0.3090324401855469 s +DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=23474063618402929745215318299546271937, time:1750766649.6704204s req_ids:[400] +DEBUG 06-24 20:04:09 [manager.py:391] +ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:09 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 first_token_cost:417.39487648010254ms total_cost_time:417.4520969390869ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5567 prompt_cache_len:5145 prompt_cache_ratio:0.9241961559188072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 +DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:09 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:09 [manager.py:224] router recive req id 8 cost time 0.3105649948120117 s +INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 8 cost time 0.31255459785461426 s +DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=133798484089321286677542925208158411055, time:1750766649.881909s req_ids:[8] +DEBUG 06-24 20:04:09 [manager.py:391] +ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 first_token_cost:417.1261787414551ms total_cost_time:417.1566963195801ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:5618 prompt_cache_len:5151 prompt_cache_ratio:0.91687433250267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 +DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:10 [manager.py:224] router recive req id 400 cost time 0.31122732162475586 s +INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 400 cost time 0.3131442070007324 s +DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=172288862546244909271305936984538185972, time:1750766650.0923932s req_ids:[400] +DEBUG 06-24 20:04:10 [manager.py:391] +DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:10 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 first_token_cost:416.95690155029297ms total_cost_time:417.00077056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5568 prompt_cache_len:5145 prompt_cache_ratio:0.9240301724137931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 +DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:10 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:10 [manager.py:224] router recive req id 8 cost time 0.3083064556121826 s +INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 8 cost time 0.3102741241455078 s +DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=107904784171822393551071728358014687071, time:1750766650.305406s req_ids:[8] +DEBUG 06-24 20:04:10 [manager.py:391] +ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 first_token_cost:417.01221466064453ms total_cost_time:417.055606842041ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5619 prompt_cache_len:5151 prompt_cache_ratio:0.9167111585691404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 +DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:10 [manager.py:224] router recive req id 400 cost time 0.3084132671356201 s +INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 400 cost time 0.3103957176208496 s +DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=238665974378854712962824593539115985316, time:1750766650.5179589s req_ids:[400] +DEBUG 06-24 20:04:10 [manager.py:391] +ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:10 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 first_token_cost:418.26868057250977ms total_cost_time:418.31302642822266ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5569 prompt_cache_len:5145 prompt_cache_ratio:0.9238642485185851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 +DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:10 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:10 [manager.py:224] router recive req id 8 cost time 0.5093872547149658 s +INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 8 cost time 0.5114932060241699 s +DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=67063561849501242349282800334071899284, time:1750766650.9135842s req_ids:[8] +DEBUG 06-24 20:04:10 [manager.py:391] +ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 first_token_cost:599.778413772583ms total_cost_time:599.820613861084ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5620 prompt_cache_len:5151 prompt_cache_ratio:0.9165480427046263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 +DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:11 [manager.py:224] router recive req id 8 cost time 0.10658454895019531 s +INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 8 cost time 0.10840177536010742 s +DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=271885194208326370355595120234574521378, time:1750766651.1221745s req_ids:[8] +DEBUG 06-24 20:04:11 [manager.py:391] +ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 first_token_cost:202.90279388427734ms total_cost_time:202.94666290283203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5621 prompt_cache_len:5151 prompt_cache_ratio:0.9163849848781356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 +DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:11 [manager.py:224] router recive req id 8 cost time 0.10694456100463867 s +INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 8 cost time 0.10935521125793457 s +DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=85327986339808664354167931846470096604, time:1750766651.330387s req_ids:[8] +DEBUG 06-24 20:04:11 [manager.py:391] +ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 first_token_cost:210.88147163391113ms total_cost_time:210.93320846557617ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:5622 prompt_cache_len:5151 prompt_cache_ratio:0.916221985058698 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 +DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:11 [manager.py:224] router recive req id 400 cost time 0.9161441326141357 s +DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=255114344367927461894644818240771348121, time:1750766651.5211604s req_ids:[400] +DEBUG 06-24 20:04:11 [manager.py:391] +INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 400 cost time 0.9184982776641846 s +ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:11 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 first_token_cost:987.316370010376ms total_cost_time:987.3597621917725ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5570 prompt_cache_len:5145 prompt_cache_ratio:0.9236983842010772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 +DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:11 [batch.py:51] router release req id 400 +INFO 06-24 20:04:11 [manager.py:224] router recive req id 400 cost time 0.10622859001159668 s +INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 400 cost time 0.1085507869720459 s +DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=221375098914235331110466844929578447172, time:1750766651.717867s req_ids:[400] +DEBUG 06-24 20:04:11 [manager.py:391] +ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:11 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 first_token_cost:200.5767822265625ms total_cost_time:200.62017440795898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5571 prompt_cache_len:5145 prompt_cache_ratio:0.9235325794291869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 +DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:11 [batch.py:51] router release req id 400 +INFO 06-24 20:04:11 [manager.py:224] router recive req id 400 cost time 0.10710501670837402 s +INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 400 cost time 0.10856413841247559 s +DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=200016491595426344810975366264990745223, time:1750766651.9256s req_ids:[400] +DEBUG 06-24 20:04:11 [manager.py:391] +ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 first_token_cost:205.04140853881836ms total_cost_time:205.08432388305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5572 prompt_cache_len:5145 prompt_cache_ratio:0.9233668341708543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 +DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:12 [batch.py:51] router release req id 400 +INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10688185691833496 s +INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.10837268829345703 s +DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=290968300566471388616889901941260062635, time:1750766652.1350586s req_ids:[400] +DEBUG 06-24 20:04:12 [manager.py:391] +ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:204.4215202331543ms total_cost_time:204.46443557739258ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5573 prompt_cache_len:5145 prompt_cache_ratio:0.9232011483940427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 +DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:12 [batch.py:51] router release req id 400 +INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10579776763916016 s +INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.1070852279663086 s +DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=87038016299531989214465367225567892343, time:1750766652.3449104s req_ids:[400] +DEBUG 06-24 20:04:12 [manager.py:391] +ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:202.08096504211426ms total_cost_time:202.12316513061523ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5574 prompt_cache_len:5145 prompt_cache_ratio:0.9230355220667384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 +DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:12 [batch.py:51] router release req id 400 +INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10552120208740234 s +INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.10744905471801758 s +DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=157257869562054399718573486539702768882, time:1750766652.5505366s req_ids:[400] +DEBUG 06-24 20:04:12 [manager.py:391] +ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:205.1711082458496ms total_cost_time:205.214262008667ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5575 prompt_cache_len:5145 prompt_cache_ratio:0.9228699551569507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 +DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:12 [batch.py:51] router release req id 400 +INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10639142990112305 s +INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.1083984375 s +DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=228977404470933678350538036202567535770, time:1750766652.774231s req_ids:[400] +DEBUG 06-24 20:04:12 [manager.py:391] +ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:217.78535842895508ms total_cost_time:217.82875061035156ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5576 prompt_cache_len:5145 prompt_cache_ratio:0.9227044476327116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 +DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:12 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:12 [manager.py:224] router recive req id 8 cost time 1.524458408355713 s +INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 8 cost time 1.5265793800354004 s +DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=205944119524255088767704860780994970925, time:1750766652.951166s req_ids:[8] +DEBUG 06-24 20:04:12 [manager.py:391] +DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 first_token_cost:1596.1339473724365ms total_cost_time:1596.1766242980957ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5623 prompt_cache_len:5151 prompt_cache_ratio:0.9160590432153655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 +DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:13 [manager.py:224] router recive req id 400 cost time 0.20746803283691406 s +INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 400 cost time 0.20907902717590332 s +DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=31616259705159304457888948372497427736, time:1750766653.0783455s req_ids:[400] +DEBUG 06-24 20:04:13 [manager.py:391] +ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:13 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:261.32655143737793ms total_cost_time:261.3687515258789ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5577 prompt_cache_len:5145 prompt_cache_ratio:0.9225389994620764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 +DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:13 [batch.py:51] router release req id 400 +INFO 06-24 20:04:13 [manager.py:224] router recive req id 400 cost time 0.3084428310394287 s +INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 400 cost time 0.31079840660095215 s +DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=190970186933295161960179467510202146044, time:1750766653.4490385s req_ids:[400] +DEBUG 06-24 20:04:13 [manager.py:391] +ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:13 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 first_token_cost:405.5614471435547ms total_cost_time:405.6057929992676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5578 prompt_cache_len:5145 prompt_cache_ratio:0.922373610613123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 +DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:13 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:13 [manager.py:224] router recive req id 8 cost time 0.6158738136291504 s +INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 8 cost time 0.618344783782959 s +DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=274311673038307767228916345367006234921, time:1750766653.6625578s req_ids:[8] +DEBUG 06-24 20:04:13 [manager.py:391] +ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 first_token_cost:718.9376354217529ms total_cost_time:718.9795970916748ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5624 prompt_cache_len:5151 prompt_cache_ratio:0.9158961593172119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 +DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:13 [manager.py:224] router recive req id 400 cost time 0.30918264389038086 s +INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 400 cost time 0.3112337589263916 s +DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=48818713253908030137800363645514311753, time:1750766653.8722582s req_ids:[400] +DEBUG 06-24 20:04:13 [manager.py:391] +ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:13 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 first_token_cost:417.39559173583984ms total_cost_time:417.4387454986572ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5579 prompt_cache_len:5145 prompt_cache_ratio:0.9222082810539524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 +DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:13 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.3089447021484375 s +INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.3109443187713623 s +DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=329163786732395761775920919753492668433, time:1750766654.08399s req_ids:[8] +DEBUG 06-24 20:04:14 [manager.py:391] +ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 first_token_cost:414.6718978881836ms total_cost_time:414.7169589996338ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5625 prompt_cache_len:5151 prompt_cache_ratio:0.9157333333333333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 +DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:14 [manager.py:224] router recive req id 400 cost time 0.3087480068206787 s +INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 400 cost time 0.3111083507537842 s +DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=234297625513481218783368336363701821556, time:1750766654.2945468s req_ids:[400] +DEBUG 06-24 20:04:14 [manager.py:391] +ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:14 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 first_token_cost:427.23655700683594ms total_cost_time:427.2916316986084ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:5580 prompt_cache_len:5145 prompt_cache_ratio:0.9220430107526881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:400 +DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:14 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.30880260467529297 s +INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.31030941009521484 s +DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=140714409445643900515517372517306199786, time:1750766654.4887865s req_ids:[8] +DEBUG 06-24 20:04:14 [manager.py:391] +ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:04:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 26195.741 tokens/s +DEBUG 06-24 20:04:14 [stats.py:37] Avg prompt tokens throughput: 26186.373 tokens/s +DEBUG 06-24 20:04:14 [stats.py:37] Avg generate tokens throughput: 9.369 tokens/s +INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:386.26694679260254ms total_cost_time:386.31248474121094ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5626 prompt_cache_len:5151 prompt_cache_ratio:0.9155705652328475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 +DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.10758066177368164 s +INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.10879826545715332 s +DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=200228179790043914477172086933595455941, time:1750766654.6832614s req_ids:[8] +DEBUG 06-24 20:04:14 [manager.py:391] +ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:204.68807220458984ms total_cost_time:204.73217964172363ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5627 prompt_cache_len:5151 prompt_cache_ratio:0.9154078549848943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 +DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.1076350212097168 s +INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.10956835746765137 s +DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=128691561035046703622596447858747696117, time:1750766654.898549s req_ids:[8] +DEBUG 06-24 20:04:14 [manager.py:391] +ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:206.11953735351562ms total_cost_time:206.16531372070312ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5628 prompt_cache_len:5151 prompt_cache_ratio:0.9152452025586354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 +DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:15 [manager.py:224] router recive req id 8 cost time 0.10662460327148438 s +INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 8 cost time 0.10849905014038086 s +DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=106639017692861415357043238974669823809, time:1750766655.1052353s req_ids:[8] +DEBUG 06-24 20:04:15 [manager.py:391] +ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:204.27489280700684ms total_cost_time:204.3170928955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5629 prompt_cache_len:5151 prompt_cache_ratio:0.9150826079232546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 +DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:15 [manager.py:224] router recive req id 8 cost time 0.10686659812927246 s +INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 8 cost time 0.10877180099487305 s +DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=141085462522987770831758864760465568946, time:1750766655.3168218s req_ids:[8] +DEBUG 06-24 20:04:15 [manager.py:391] +ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 first_token_cost:206.4509391784668ms total_cost_time:206.49433135986328ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5630 prompt_cache_len:5151 prompt_cache_ratio:0.9149200710479574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 +DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:15 [manager.py:224] router recive req id 8 cost time 0.10761833190917969 s +INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 8 cost time 0.1096029281616211 s +DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=319052547375508828994819630930400019073, time:1750766655.5269716s req_ids:[8] +DEBUG 06-24 20:04:15 [manager.py:391] +ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 first_token_cost:206.6199779510498ms total_cost_time:206.6650390625ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5631 prompt_cache_len:5151 prompt_cache_ratio:0.9147575919019713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 +DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:15 [manager.py:224] router recive req id 400 cost time 1.3225617408752441 s +INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 400 cost time 1.3246512413024902 s +DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=101624204149623261217881075467097066163, time:1750766655.7365716s req_ids:[400] +DEBUG 06-24 20:04:15 [manager.py:391] +DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:15 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:400 first_token_cost:1586.2393379211426ms total_cost_time:1586.28511428833ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5581 prompt_cache_len:5145 prompt_cache_ratio:0.9218777996774772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:400 +DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:16 [batch.py:51] router release req id 400 +INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.10750150680541992 s +INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.10942196846008301 s +DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=131142781887999190799469431461008130202, time:1750766656.1028008s req_ids:[400] +DEBUG 06-24 20:04:16 [manager.py:391] +ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:400 first_token_cost:198.8089084625244ms total_cost_time:198.85516166687012ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5582 prompt_cache_len:5145 prompt_cache_ratio:0.9217126477964888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 +DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:16 [batch.py:51] router release req id 400 +INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.10784649848937988 s +INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.10967588424682617 s +DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=37783991900662089517054200824284359370, time:1750766656.3254364s req_ids:[400] +DEBUG 06-24 20:04:16 [manager.py:391] +INFO 06-24 20:04:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 first_token_cost:219.4504737854004ms total_cost_time:219.49315071105957ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5583 prompt_cache_len:5145 prompt_cache_ratio:0.9215475550779151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 +DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:16 [batch.py:51] router release req id 400 +INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.10908699035644531 s +INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.11089563369750977 s +DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=5067486847551785240389677237162414539, time:1750766656.538432s req_ids:[400] +DEBUG 06-24 20:04:16 [manager.py:391] +ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:16 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 first_token_cost:205.13319969177246ms total_cost_time:205.1858901977539ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:5584 prompt_cache_len:5145 prompt_cache_ratio:0.9213825214899714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 +DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:16 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:16 [manager.py:224] router recive req id 8 cost time 1.1151704788208008 s +INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 8 cost time 1.1170532703399658 s +DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=306730665257022193479385530307676922530, time:1750766656.7497196s req_ids:[8] +DEBUG 06-24 20:04:16 [manager.py:391] +ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 first_token_cost:1215.9790992736816ms total_cost_time:1216.0212993621826ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5632 prompt_cache_len:5151 prompt_cache_ratio:0.9145951704545454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:8 +DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.30753374099731445 s +INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.30948758125305176 s +DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=71303722397184712584653631993487326899, time:1750766656.9596272s req_ids:[400] +DEBUG 06-24 20:04:16 [manager.py:391] +ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 first_token_cost:413.42759132385254ms total_cost_time:413.47193717956543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5585 prompt_cache_len:5145 prompt_cache_ratio:0.9212175470008953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 +DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:17 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:17 [manager.py:224] router recive req id 8 cost time 0.3075568675994873 s +INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 8 cost time 0.3095417022705078 s +DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=252064728799863974062995502885127462607, time:1750766657.170206s req_ids:[8] +DEBUG 06-24 20:04:17 [manager.py:391] +ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:8 first_token_cost:415.8060550689697ms total_cost_time:415.85230827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5633 prompt_cache_len:5151 prompt_cache_ratio:0.9144328066749512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 +DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:17 [manager.py:224] router recive req id 400 cost time 0.30767273902893066 s +INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 400 cost time 0.3096649646759033 s +DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=66287034318572387062281965083639895851, time:1750766657.3795717s req_ids:[400] +DEBUG 06-24 20:04:17 [manager.py:391] +ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 first_token_cost:414.63780403137207ms total_cost_time:414.68071937561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5586 prompt_cache_len:5145 prompt_cache_ratio:0.9210526315789473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 +DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:17 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:17 [manager.py:224] router recive req id 8 cost time 0.30847835540771484 s +INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 8 cost time 0.31049108505249023 s +DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=228938074943624491590312570889158603010, time:1750766657.5886242s req_ids:[8] +DEBUG 06-24 20:04:17 [manager.py:391] +ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 first_token_cost:410.4933738708496ms total_cost_time:410.536527633667ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5634 prompt_cache_len:5151 prompt_cache_ratio:0.9142705005324814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 +DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:17 [manager.py:224] router recive req id 400 cost time 0.3087329864501953 s +INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 400 cost time 0.3108234405517578 s +DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=60087535810734423348138057221743926182, time:1750766657.7949853s req_ids:[400] +DEBUG 06-24 20:04:17 [manager.py:391] +ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:17 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 first_token_cost:408.66971015930176ms total_cost_time:408.71238708496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5587 prompt_cache_len:5145 prompt_cache_ratio:0.9208877751924109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 +DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:17 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:17 [manager.py:224] router recive req id 8 cost time 0.30881237983703613 s +INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 8 cost time 0.31071996688842773 s +DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=302156104039420587193820993304785054710, time:1750766658.002426s req_ids:[8] +DEBUG 06-24 20:04:18 [manager.py:391] +ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 first_token_cost:409.6033573150635ms total_cost_time:409.64531898498535ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5635 prompt_cache_len:5151 prompt_cache_ratio:0.9141082519964507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 +DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:18 [manager.py:224] router recive req id 400 cost time 0.3086738586425781 s +INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 400 cost time 0.31061649322509766 s +DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=240258550456378011144236932454084892034, time:1750766658.2128952s req_ids:[400] +DEBUG 06-24 20:04:18 [manager.py:391] +ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 first_token_cost:414.517879486084ms total_cost_time:414.56127166748047ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5588 prompt_cache_len:5145 prompt_cache_ratio:0.920722977809592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 +DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:18 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:18 [manager.py:224] router recive req id 8 cost time 0.30826854705810547 s +INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 8 cost time 0.3102855682373047 s +DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=196887082411526954351833857334841843938, time:1750766658.4303896s req_ids:[8] +DEBUG 06-24 20:04:18 [manager.py:391] +ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 first_token_cost:424.88932609558105ms total_cost_time:424.93295669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5636 prompt_cache_len:5151 prompt_cache_ratio:0.9139460610361959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 +DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:18 [manager.py:224] router recive req id 400 cost time 0.30877089500427246 s +INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 400 cost time 0.3108034133911133 s +DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=45976309311112245365689531616693751266, time:1750766658.6139958s req_ids:[400] +DEBUG 06-24 20:04:18 [manager.py:391] +ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 first_token_cost:385.8602046966553ms total_cost_time:385.90240478515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5589 prompt_cache_len:5145 prompt_cache_ratio:0.9205582393988191 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 +DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:18 [batch.py:51] router release req id 400 +INFO 06-24 20:04:18 [manager.py:224] router recive req id 400 cost time 0.1065371036529541 s +INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 400 cost time 0.10845494270324707 s +DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=54542278724559732195745943898052701471, time:1750766658.8116658s req_ids:[400] +DEBUG 06-24 20:04:18 [manager.py:391] +ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:18 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 first_token_cost:200.8652687072754ms total_cost_time:200.90913772583008ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5590 prompt_cache_len:5145 prompt_cache_ratio:0.9203935599284436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 +DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:18 [batch.py:51] router release req id 400 +INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.10749650001525879 s +INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.1094827651977539 s +DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=270565812123462728368077659759711507818, time:1750766659.0198867s req_ids:[400] +DEBUG 06-24 20:04:19 [manager.py:391] +ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 first_token_cost:204.64134216308594ms total_cost_time:204.69999313354492ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:5591 prompt_cache_len:5145 prompt_cache_ratio:0.9202289393668396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 +DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:19 [batch.py:51] router release req id 400 +INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.10609102249145508 s +INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.10812973976135254 s +DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=153541719542779166919510374484718041959, time:1750766659.2338116s req_ids:[400] +DEBUG 06-24 20:04:19 [manager.py:391] +ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 first_token_cost:207.10468292236328ms total_cost_time:207.12995529174805ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5592 prompt_cache_len:5145 prompt_cache_ratio:0.9200643776824035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 +DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:19 [batch.py:51] router release req id 400 +INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.30823493003845215 s +INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.31044840812683105 s +DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=324154969492212450185372527199611848940, time:1750766659.653571s req_ids:[400] +DEBUG 06-24 20:04:19 [manager.py:391] +ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:19 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 first_token_cost:424.0915775299072ms total_cost_time:424.135684967041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5593 prompt_cache_len:5145 prompt_cache_ratio:0.9198998748435544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 +DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:19 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:19 [manager.py:224] router recive req id 8 cost time 1.314558982849121 s +INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 8 cost time 1.3165841102600098 s +DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=167409177748517338554553146583799168964, time:1750766659.836794s req_ids:[8] +DEBUG 06-24 20:04:19 [manager.py:391] +ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 first_token_cost:1386.7998123168945ms total_cost_time:1386.845350265503ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5637 prompt_cache_len:5151 prompt_cache_ratio:0.9137839276210751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:8 +DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.20762252807617188 s +INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.20920252799987793 s +DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=38653714603830349367107816191720438860, time:1750766659.9654753s req_ids:[400] +DEBUG 06-24 20:04:19 [manager.py:391] +ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 first_token_cost:272.2601890563965ms total_cost_time:272.3045349121094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5594 prompt_cache_len:5145 prompt_cache_ratio:0.9197354308187343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 +DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:20 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.20933055877685547 s +INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.2111823558807373 s +DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=225095216543165772522562669608361056402, time:1750766660.1385944s req_ids:[8] +DEBUG 06-24 20:04:20 [manager.py:391] +ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:8 first_token_cost:298.11859130859375ms total_cost_time:298.16436767578125ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5638 prompt_cache_len:5151 prompt_cache_ratio:0.9136218517204683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 +DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.10911870002746582 s +INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.11103701591491699 s +DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=145729852968060938998594991669210077344, time:1750766660.3380792s req_ids:[8] +DEBUG 06-24 20:04:20 [manager.py:391] +ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 first_token_cost:203.887939453125ms total_cost_time:203.93133163452148ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5639 prompt_cache_len:5151 prompt_cache_ratio:0.9134598333037772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 +DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.10652518272399902 s +INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.10875916481018066 s +DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=157120653617087176788172413152632552767, time:1750766660.547775s req_ids:[8] +DEBUG 06-24 20:04:20 [manager.py:391] +ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 first_token_cost:203.8261890411377ms total_cost_time:203.86981964111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5640 prompt_cache_len:5151 prompt_cache_ratio:0.9132978723404256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 +DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:20 [manager.py:224] router recive req id 400 cost time 0.7133662700653076 s +INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 400 cost time 0.7155048847198486 s +DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=61239665237718427724579099054391844062, time:1750766660.757504s req_ids:[400] +DEBUG 06-24 20:04:20 [manager.py:391] +ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:20 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 first_token_cost:813.7984275817871ms total_cost_time:813.8401508331299ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5595 prompt_cache_len:5145 prompt_cache_ratio:0.9195710455764075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 +DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:20 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.3086857795715332 s +INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.3107452392578125 s +DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=103569202099643300315125404549459298966, time:1750766660.9634693s req_ids:[8] +DEBUG 06-24 20:04:20 [manager.py:391] +ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 first_token_cost:413.16795349121094ms total_cost_time:413.2249355316162ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:5641 prompt_cache_len:5151 prompt_cache_ratio:0.9131359687998581 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 +DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.3083314895629883 s +INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.31038761138916016 s +DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=243116708294120437174279276546882770620, time:1750766661.1802902s req_ids:[400] +DEBUG 06-24 20:04:21 [manager.py:391] +ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 first_token_cost:421.3275909423828ms total_cost_time:421.3700294494629ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5596 prompt_cache_len:5145 prompt_cache_ratio:0.9194067190850608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 +DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:21 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:21 [manager.py:224] router recive req id 8 cost time 0.30976271629333496 s +INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 8 cost time 0.3117525577545166 s +DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=159589017922822320368261474168818125164, time:1750766661.3951037s req_ids:[8] +DEBUG 06-24 20:04:21 [manager.py:391] +ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 first_token_cost:424.87502098083496ms total_cost_time:424.91841316223145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5642 prompt_cache_len:5151 prompt_cache_ratio:0.912974122651542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 +DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.30779099464416504 s +INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.3097355365753174 s +DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=204027139029410204625150196260025536466, time:1750766661.5850406s req_ids:[400] +DEBUG 06-24 20:04:21 [manager.py:391] +ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 first_token_cost:391.99233055114746ms total_cost_time:392.03691482543945ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5597 prompt_cache_len:5145 prompt_cache_ratio:0.9192424513132035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 +DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:21 [batch.py:51] router release req id 400 +INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.10668253898620605 s +INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.10908055305480957 s +DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=282459104284730930658656003323083253895, time:1750766661.7867818s req_ids:[400] +DEBUG 06-24 20:04:21 [manager.py:391] +ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:21 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 first_token_cost:201.26986503601074ms total_cost_time:201.31325721740723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5598 prompt_cache_len:5145 prompt_cache_ratio:0.9190782422293676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 +DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:21 [batch.py:51] router release req id 400 +INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.10654735565185547 s +INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.10901069641113281 s +DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=307189154209660327102313172525819870474, time:1750766661.995562s req_ids:[400] +DEBUG 06-24 20:04:21 [manager.py:391] +ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 first_token_cost:206.86888694763184ms total_cost_time:206.91204071044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5599 prompt_cache_len:5145 prompt_cache_ratio:0.9189140918021075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 +DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:22 [batch.py:51] router release req id 400 +INFO 06-24 20:04:22 [manager.py:224] router recive req id 400 cost time 0.10664033889770508 s +INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 400 cost time 0.10859560966491699 s +DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=13886497406636090368314140559902134472, time:1750766662.2057035s req_ids:[400] +DEBUG 06-24 20:04:22 [manager.py:391] +ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 first_token_cost:368.3967590332031ms total_cost_time:368.4399127960205ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5600 prompt_cache_len:5145 prompt_cache_ratio:0.91875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 +DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:22 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:22 [manager.py:224] router recive req id 8 cost time 1.01613187789917 s +INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 8 cost time 1.0179226398468018 s +DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=131848877663081915398085650813740170950, time:1750766662.5382745s req_ids:[8] +DEBUG 06-24 20:04:22 [manager.py:391] +ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 first_token_cost:1121.265172958374ms total_cost_time:1121.309518814087ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5643 prompt_cache_len:5151 prompt_cache_ratio:0.9128123338649654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 +DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:22 [manager.py:224] router recive req id 400 cost time 0.20801854133605957 s +INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 400 cost time 0.21001982688903809 s +DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=58024617009333453881726821185111954685, time:1750766662.695757s req_ids:[400] +DEBUG 06-24 20:04:22 [manager.py:391] +ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:22 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 first_token_cost:305.3700923919678ms total_cost_time:305.41157722473145ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5601 prompt_cache_len:5145 prompt_cache_ratio:0.9185859667916444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 +DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:22 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:22 [manager.py:224] router recive req id 8 cost time 0.2079474925994873 s +INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 8 cost time 0.20982789993286133 s +DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=267724025021584382246605540177094573446, time:1750766662.8270555s req_ids:[8] +DEBUG 06-24 20:04:22 [manager.py:391] +ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 first_token_cost:261.14869117736816ms total_cost_time:261.19232177734375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5644 prompt_cache_len:5151 prompt_cache_ratio:0.9126506024096386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 +DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:22 [manager.py:224] router recive req id 400 cost time 0.20712733268737793 s +INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 400 cost time 0.20850658416748047 s +DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=259379537031038628830206513313025065573, time:1750766662.992749s req_ids:[400] +DEBUG 06-24 20:04:22 [manager.py:391] +ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 first_token_cost:295.987606048584ms total_cost_time:296.0324287414551ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5602 prompt_cache_len:5145 prompt_cache_ratio:0.9184219921456622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 +DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:23 [batch.py:51] router release req id 400 +INFO 06-24 20:04:23 [manager.py:224] router recive req id 400 cost time 0.10604095458984375 s +INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 400 cost time 0.10753870010375977 s +INFO 06-24 20:04:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:04:23 [statics_utils.py:24] mean first cost: 225.2109487233796 ms +INFO 06-24 20:04:23 [statics_utils.py:24] mean per token cost: 0.172888190729092 ms +DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=222675924948564142134540015902361335341, time:1750766663.1987753s req_ids:[400] +DEBUG 06-24 20:04:23 [manager.py:391] +ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 first_token_cost:212.1272087097168ms total_cost_time:212.17036247253418ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5603 prompt_cache_len:5145 prompt_cache_ratio:0.9182580760306979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 +DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:23 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:23 [manager.py:224] router recive req id 8 cost time 0.5107765197753906 s +INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 8 cost time 0.5132813453674316 s +DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=289335265318048660066163943527052332403, time:1750766663.409441s req_ids:[8] +DEBUG 06-24 20:04:23 [manager.py:391] +ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 first_token_cost:612.8072738647461ms total_cost_time:612.8509044647217ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5645 prompt_cache_len:5151 prompt_cache_ratio:0.912488928255093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 +DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:23 [manager.py:224] router recive req id 400 cost time 0.30922818183898926 s +INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 400 cost time 0.31137776374816895 s +DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=60569115480878674443553201192736048164, time:1750766663.615808s req_ids:[400] +DEBUG 06-24 20:04:23 [manager.py:391] +ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:23 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 first_token_cost:408.801794052124ms total_cost_time:408.846378326416ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5604 prompt_cache_len:5145 prompt_cache_ratio:0.9180942184154176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 +DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:23 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:23 [manager.py:224] router recive req id 8 cost time 0.3077116012573242 s +INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 8 cost time 0.3091452121734619 s +DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=4066682712827294027601391126372753086, time:1750766663.8217094s req_ids:[8] +DEBUG 06-24 20:04:23 [manager.py:391] +ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 first_token_cost:371.9310760498047ms total_cost_time:371.97422981262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5646 prompt_cache_len:5151 prompt_cache_ratio:0.9123273113708821 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 +DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:23 [manager.py:224] router recive req id 8 cost time 0.10661888122558594 s +INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 8 cost time 0.10792207717895508 s +DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=96237076485747580019305067878142776834, time:1750766663.9900763s req_ids:[8] +DEBUG 06-24 20:04:23 [manager.py:391] +ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 first_token_cost:158.81991386413574ms total_cost_time:158.86187553405762ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5647 prompt_cache_len:5151 prompt_cache_ratio:0.9121657517265804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 +DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:24 [manager.py:224] router recive req id 400 cost time 0.41042613983154297 s +INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 400 cost time 0.4124715328216553 s +DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=289818615711437329685663686575810306959, time:1750766664.1209888s req_ids:[400] +DEBUG 06-24 20:04:24 [manager.py:391] +ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 first_token_cost:479.60519790649414ms total_cost_time:479.65025901794434ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5605 prompt_cache_len:5145 prompt_cache_ratio:0.9179304192685103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 +DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:24 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:24 [manager.py:224] router recive req id 8 cost time 0.20775485038757324 s +INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 8 cost time 0.20942115783691406 s +DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=249438425325088372674096711372955382234, time:1750766664.273658s req_ids:[8] +DEBUG 06-24 20:04:24 [manager.py:391] +ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 first_token_cost:294.5408821105957ms total_cost_time:294.583797454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5648 prompt_cache_len:5151 prompt_cache_ratio:0.9120042492917847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 +DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:24 [manager.py:224] router recive req id 400 cost time 0.20707249641418457 s +INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 400 cost time 0.20958900451660156 s +DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=333649231572607279468698784082731831287, time:1750766664.4237874s req_ids:[400] +DEBUG 06-24 20:04:24 [manager.py:391] +ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:04:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 26709.539 tokens/s +DEBUG 06-24 20:04:24 [stats.py:37] Avg prompt tokens throughput: 26700.027 tokens/s +DEBUG 06-24 20:04:24 [stats.py:37] Avg generate tokens throughput: 9.513 tokens/s +INFO 06-24 20:04:24 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 first_token_cost:463.6354446411133ms total_cost_time:463.6814594268799ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5606 prompt_cache_len:5145 prompt_cache_ratio:0.9177666785586871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 +DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:24 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:24 [manager.py:224] router recive req id 8 cost time 0.40807080268859863 s +INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 8 cost time 0.40987658500671387 s +DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=186044466313300840807587836412841909570, time:1750766664.7712786s req_ids:[8] +DEBUG 06-24 20:04:24 [manager.py:391] +ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 first_token_cost:511.3251209259033ms total_cost_time:511.3685131072998ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5649 prompt_cache_len:5151 prompt_cache_ratio:0.9118428040361126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 +DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:24 [manager.py:224] router recive req id 8 cost time 0.10853338241577148 s +INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 8 cost time 0.11043643951416016 s +DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=183803393045587895134373374550283597211, time:1750766664.9759927s req_ids:[8] +DEBUG 06-24 20:04:24 [manager.py:391] +ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 first_token_cost:201.8110752105713ms total_cost_time:201.85589790344238ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5650 prompt_cache_len:5151 prompt_cache_ratio:0.9116814159292035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 +DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:25 [manager.py:224] router recive req id 8 cost time 0.10721635818481445 s +INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 8 cost time 0.10920286178588867 s +DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=37519645894131366662978579660104636506, time:1750766665.1972146s req_ids:[8] +DEBUG 06-24 20:04:25 [manager.py:391] +ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 first_token_cost:219.00534629821777ms total_cost_time:219.04563903808594ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5651 prompt_cache_len:5151 prompt_cache_ratio:0.9115200849407185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 +DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:25 [manager.py:224] router recive req id 400 cost time 0.7219085693359375 s +INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 400 cost time 0.7239117622375488 s +DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=171345870804642682737352943122718561806, time:1750766665.409969s req_ids:[400] +DEBUG 06-24 20:04:25 [manager.py:391] +ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 first_token_cost:832.0600986480713ms total_cost_time:832.1030139923096ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5607 prompt_cache_len:5145 prompt_cache_ratio:0.9176029962546817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 +DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:25 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:25 [manager.py:224] router recive req id 8 cost time 0.30924534797668457 s +INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 8 cost time 0.3111088275909424 s +DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=193352093618118088583869029643011308453, time:1750766665.6208284s req_ids:[8] +DEBUG 06-24 20:04:25 [manager.py:391] +ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 first_token_cost:418.0893898010254ms total_cost_time:418.14589500427246ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:5652 prompt_cache_len:5151 prompt_cache_ratio:0.9113588110403397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 +DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:25 [manager.py:224] router recive req id 400 cost time 0.3096654415130615 s +INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 400 cost time 0.31154608726501465 s +DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=274120091185543058063977585149068597229, time:1750766665.834958s req_ids:[400] +DEBUG 06-24 20:04:25 [manager.py:391] +ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:25 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 first_token_cost:426.99575424194336ms total_cost_time:427.04081535339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5608 prompt_cache_len:5145 prompt_cache_ratio:0.9174393723252496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 +DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:25 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:26 [manager.py:224] router recive req id 8 cost time 0.3108954429626465 s +INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 8 cost time 0.31281447410583496 s +DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=69613368012098856667054515992332491011, time:1750766666.048118s req_ids:[8] +DEBUG 06-24 20:04:26 [manager.py:391] +ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 first_token_cost:419.79146003723145ms total_cost_time:419.8341369628906ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5653 prompt_cache_len:5151 prompt_cache_ratio:0.9111975941977711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 +DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:26 [manager.py:224] router recive req id 400 cost time 0.30904102325439453 s +INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 400 cost time 0.31108784675598145 s +DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=102633915022225956462908525721125467616, time:1750766666.2586112s req_ids:[400] +DEBUG 06-24 20:04:26 [manager.py:391] +ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:26 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 first_token_cost:416.0346984863281ms total_cost_time:416.0771369934082ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5609 prompt_cache_len:5145 prompt_cache_ratio:0.9172758067391692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 +DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:26 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:26 [manager.py:224] router recive req id 8 cost time 0.30986738204956055 s +INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 8 cost time 0.3118159770965576 s +DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=265487944058360102931858283804504114088, time:1750766666.4691482s req_ids:[8] +DEBUG 06-24 20:04:26 [manager.py:391] +ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 first_token_cost:418.54381561279297ms total_cost_time:418.58816146850586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5654 prompt_cache_len:5151 prompt_cache_ratio:0.9110364343827378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 +DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:26 [manager.py:224] router recive req id 400 cost time 0.30727362632751465 s +INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 400 cost time 0.3091881275177002 s +DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=75879430666684743418787605388217885521, time:1750766666.6813915s req_ids:[400] +DEBUG 06-24 20:04:26 [manager.py:391] +ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:26 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 first_token_cost:417.93131828308105ms total_cost_time:417.97423362731934ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5610 prompt_cache_len:5145 prompt_cache_ratio:0.9171122994652406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 +DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:26 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:26 [manager.py:224] router recive req id 8 cost time 0.30787110328674316 s +INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 8 cost time 0.3097517490386963 s +DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=66381436706951621725767110887667211672, time:1750766666.8923821s req_ids:[8] +DEBUG 06-24 20:04:26 [manager.py:391] +ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 first_token_cost:417.3908233642578ms total_cost_time:417.4325466156006ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5655 prompt_cache_len:5151 prompt_cache_ratio:0.9108753315649868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 +DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:27 [manager.py:224] router recive req id 400 cost time 0.512087345123291 s +INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 400 cost time 0.5140905380249023 s +DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=313968677053965986341434509452258267899, time:1750766667.307603s req_ids:[400] +DEBUG 06-24 20:04:27 [manager.py:391] +ERROR 06-24 20:04:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:27 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 first_token_cost:623.9781379699707ms total_cost_time:624.0227222442627ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5611 prompt_cache_len:5145 prompt_cache_ratio:0.9169488504722866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 +DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:27 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:27 [manager.py:224] router recive req id 8 cost time 0.5114901065826416 s +INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 8 cost time 0.5134925842285156 s +DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=259692355608096860857597314863558876253, time:1750766667.522568s req_ids:[8] +DEBUG 06-24 20:04:27 [manager.py:391] +ERROR 06-24 20:04:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 first_token_cost:622.0667362213135ms total_cost_time:622.1106052398682ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5656 prompt_cache_len:5151 prompt_cache_ratio:0.9107142857142857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:8 +DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:27 [manager.py:224] router recive req id 400 cost time 0.3098890781402588 s +INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 400 cost time 0.31192898750305176 s +DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=280832277086313392181954795847818229028, time:1750766667.729502s req_ids:[400] +DEBUG 06-24 20:04:27 [manager.py:391] +ERROR 06-24 20:04:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:27 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 first_token_cost:414.203405380249ms total_cost_time:414.2477512359619ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5612 prompt_cache_len:5145 prompt_cache_ratio:0.9167854597291518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 +DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:27 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:27 [manager.py:224] router recive req id 8 cost time 0.3082160949707031 s +INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 8 cost time 0.31084251403808594 s +DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=25547495440991790670954121898604414010, time:1750766667.940811s req_ids:[8] +DEBUG 06-24 20:04:27 [manager.py:391] +ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:8 first_token_cost:414.9432182312012ms total_cost_time:414.98732566833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5657 prompt_cache_len:5151 prompt_cache_ratio:0.9105532968004243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 +DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:28 [manager.py:224] router recive req id 400 cost time 0.30869483947753906 s +INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 400 cost time 0.3102104663848877 s +DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=19325133157743702338532232414436161151, time:1750766668.1507266s req_ids:[400] +DEBUG 06-24 20:04:28 [manager.py:391] +ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:28 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 first_token_cost:415.47417640686035ms total_cost_time:415.5158996582031ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5613 prompt_cache_len:5145 prompt_cache_ratio:0.9166221272047034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 +DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:28 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:28 [manager.py:224] router recive req id 8 cost time 0.3092689514160156 s +INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 8 cost time 0.31162071228027344 s +DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=274595003761940960646626444977832413736, time:1750766668.3598099s req_ids:[8] +DEBUG 06-24 20:04:28 [manager.py:391] +ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 first_token_cost:414.22581672668457ms total_cost_time:414.26753997802734ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5658 prompt_cache_len:5151 prompt_cache_ratio:0.9103923647932132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 +DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:28 [manager.py:224] router recive req id 400 cost time 0.3091588020324707 s +INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 400 cost time 0.3114774227142334 s +INFO 06-24 20:04:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=17663044513857767370761652076563602295, time:1750766668.5701125s req_ids:[400] +DEBUG 06-24 20:04:28 [manager.py:391] +ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:28 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 first_token_cost:409.98125076293945ms total_cost_time:410.0227355957031ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5614 prompt_cache_len:5145 prompt_cache_ratio:0.9164588528678305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 +DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:28 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:28 [manager.py:224] router recive req id 8 cost time 0.3099038600921631 s +INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 8 cost time 0.3123795986175537 s +DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=50026042330341118691835468528673360440, time:1750766668.775991s req_ids:[8] +DEBUG 06-24 20:04:28 [manager.py:391] +ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 first_token_cost:411.2386703491211ms total_cost_time:411.2820625305176ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5659 prompt_cache_len:5151 prompt_cache_ratio:0.9102314896624846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 +DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:28 [manager.py:224] router recive req id 400 cost time 0.3096802234649658 s +INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 400 cost time 0.31191039085388184 s +DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=152405585892290894040670415658955982272, time:1750766668.9882126s req_ids:[400] +DEBUG 06-24 20:04:28 [manager.py:391] +ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 first_token_cost:415.50207138061523ms total_cost_time:415.5445098876953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5615 prompt_cache_len:5145 prompt_cache_ratio:0.9162956366874443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 +DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:29 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:29 [manager.py:224] router recive req id 8 cost time 0.3096752166748047 s +INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 8 cost time 0.31159234046936035 s +DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=146997980909769386210843937019735865025, time:1750766669.1992402s req_ids:[8] +DEBUG 06-24 20:04:29 [manager.py:391] +ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 first_token_cost:419.7840690612793ms total_cost_time:419.827938079834ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5660 prompt_cache_len:5151 prompt_cache_ratio:0.9100706713780918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 +DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:29 [manager.py:224] router recive req id 400 cost time 0.308474063873291 s +INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 400 cost time 0.310366153717041 s +DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=228195534521292472823714719262961439497, time:1750766669.4134781s req_ids:[400] +DEBUG 06-24 20:04:29 [manager.py:391] +ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 first_token_cost:421.5538501739502ms total_cost_time:421.5981960296631ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5616 prompt_cache_len:5145 prompt_cache_ratio:0.9161324786324786 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 +DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:29 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:29 [manager.py:224] router recive req id 8 cost time 0.30808281898498535 s +INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 8 cost time 0.31003451347351074 s +DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=42054421374652623759736304095637773418, time:1750766669.6245844s req_ids:[8] +DEBUG 06-24 20:04:29 [manager.py:391] +DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 first_token_cost:419.32058334350586ms total_cost_time:419.3615913391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5661 prompt_cache_len:5151 prompt_cache_ratio:0.9099099099099099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 +DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:29 [manager.py:224] router recive req id 400 cost time 0.30889320373535156 s +INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 400 cost time 0.3107783794403076 s +DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=108717596022332119038349818396712496673, time:1750766669.836832s req_ids:[400] +DEBUG 06-24 20:04:29 [manager.py:391] +ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:29 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 first_token_cost:417.1173572540283ms total_cost_time:417.1617031097412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5617 prompt_cache_len:5145 prompt_cache_ratio:0.9159693786718889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 +DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:29 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:30 [manager.py:224] router recive req id 8 cost time 0.3091428279876709 s +INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 8 cost time 0.31108808517456055 s +DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=175723662860696045028588059268083472272, time:1750766670.048848s req_ids:[8] +DEBUG 06-24 20:04:30 [manager.py:391] +ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 first_token_cost:417.9821014404297ms total_cost_time:418.02525520324707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5662 prompt_cache_len:5151 prompt_cache_ratio:0.9097492052278346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 +DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:30 [manager.py:224] router recive req id 400 cost time 0.30869340896606445 s +INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 400 cost time 0.3109416961669922 s +DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=230019865189014978099274913506664142356, time:1750766670.260391s req_ids:[400] +DEBUG 06-24 20:04:30 [manager.py:391] +ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 first_token_cost:420.01795768737793ms total_cost_time:420.0613498687744ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5618 prompt_cache_len:5145 prompt_cache_ratio:0.915806336774653 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 +DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:30 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:30 [manager.py:224] router recive req id 8 cost time 0.30882906913757324 s +INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 8 cost time 0.31121158599853516 s +DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=75358273206282626847728868763252732028, time:1750766670.4759884s req_ids:[8] +DEBUG 06-24 20:04:30 [manager.py:391] +ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 first_token_cost:425.137996673584ms total_cost_time:425.18115043640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5663 prompt_cache_len:5151 prompt_cache_ratio:0.9095885573017836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 +DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:30 [manager.py:224] router recive req id 400 cost time 0.30824971199035645 s +INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 400 cost time 0.31012630462646484 s +DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=275758295323467180306429924285007725737, time:1750766670.6900518s req_ids:[400] +DEBUG 06-24 20:04:30 [manager.py:391] +ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:30 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 first_token_cost:585.8213901519775ms total_cost_time:585.8657360076904ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5619 prompt_cache_len:5145 prompt_cache_ratio:0.9156433529097704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 +DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:30 [batch.py:51] router release req id 400 +INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10662460327148438 s +INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.1090700626373291 s +DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=245775026616329675552314482109721014078, time:1750766671.0574462s req_ids:[400] +DEBUG 06-24 20:04:31 [manager.py:391] +ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 first_token_cost:197.7367401123047ms total_cost_time:197.78084754943848ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5620 prompt_cache_len:5145 prompt_cache_ratio:0.9154804270462633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 +DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:31 [batch.py:51] router release req id 400 +INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10666823387145996 s +INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.10853815078735352 s +DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=104430840096773392534867993462466302863, time:1750766671.2651033s req_ids:[400] +DEBUG 06-24 20:04:31 [manager.py:391] +ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:207.52763748168945ms total_cost_time:207.57246017456055ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5621 prompt_cache_len:5145 prompt_cache_ratio:0.9153175591531756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 +DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:31 [batch.py:51] router release req id 400 +INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10657572746276855 s +INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.10851931571960449 s +DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=101274259892882747630248991049910571147, time:1750766671.4846306s req_ids:[400] +DEBUG 06-24 20:04:31 [manager.py:391] +ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:217.04864501953125ms total_cost_time:217.09179878234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5622 prompt_cache_len:5145 prompt_cache_ratio:0.9151547491995731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 +DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:31 [batch.py:51] router release req id 400 +INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10630679130554199 s +INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.10842442512512207 s +DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=27387733789106417195472477669736267062, time:1750766671.6976998s req_ids:[400] +DEBUG 06-24 20:04:31 [manager.py:391] +ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:207.4732780456543ms total_cost_time:207.51643180847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5623 prompt_cache_len:5145 prompt_cache_ratio:0.9149919971545438 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 +DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:31 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:31 [manager.py:224] router recive req id 8 cost time 1.317298412322998 s +INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 8 cost time 1.31976318359375 s +DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=88067096955312514899097457428015425714, time:1750766671.9103594s req_ids:[8] +DEBUG 06-24 20:04:31 [manager.py:391] +ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 first_token_cost:1427.6151657104492ms total_cost_time:1427.657127380371ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5664 prompt_cache_len:5151 prompt_cache_ratio:0.909427966101695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:8 +DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:32 [batch.py:51] router release req id 8 +INFO 06-24 20:04:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.3099067211151123 s +INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.312009334564209 s +DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=210672878257417834276140693577676006786, time:1750766672.1201878s req_ids:[400] +DEBUG 06-24 20:04:32 [manager.py:391] +ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:420.1347827911377ms total_cost_time:420.1769828796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5624 prompt_cache_len:5145 prompt_cache_ratio:0.9148293029871978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 +DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:32 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:32 [manager.py:224] router recive req id 8 cost time 0.3080563545227051 s +INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 8 cost time 0.31051015853881836 s +DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=34684555807152716900325497140577943430, time:1750766672.3331628s req_ids:[8] +DEBUG 06-24 20:04:32 [manager.py:391] +ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:8 first_token_cost:418.5612201690674ms total_cost_time:418.60318183898926ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5665 prompt_cache_len:5151 prompt_cache_ratio:0.9092674315975287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:8 +DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.3078746795654297 s +INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.3094809055328369 s +DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=11875430463453379121647450097018435833, time:1750766672.5181153s req_ids:[400] +DEBUG 06-24 20:04:32 [manager.py:391] +ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:375.0605583190918ms total_cost_time:375.1039505004883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5625 prompt_cache_len:5145 prompt_cache_ratio:0.9146666666666666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 +DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:32 [batch.py:51] router release req id 400 +INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.10575532913208008 s +INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.1068563461303711 s +DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=330514215247724816831253146616404161418, time:1750766672.708729s req_ids:[400] +DEBUG 06-24 20:04:32 [manager.py:391] +ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:197.6935863494873ms total_cost_time:197.73626327514648ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5626 prompt_cache_len:5145 prompt_cache_ratio:0.9145040881621045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 +DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:32 [batch.py:51] router release req id 400 +INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.1054985523223877 s +INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.10739850997924805 s +DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=312758865148485819698060459175714972626, time:1750766672.9136446s req_ids:[400] +DEBUG 06-24 20:04:32 [manager.py:391] +ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:204.3013572692871ms total_cost_time:204.3447494506836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5627 prompt_cache_len:5145 prompt_cache_ratio:0.914341567442687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 +DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:33 [batch.py:51] router release req id 400 +INFO 06-24 20:04:33 [manager.py:224] router recive req id 400 cost time 0.10535597801208496 s +INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 400 cost time 0.10726690292358398 s +DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=69962802871803665103388021206773155541, time:1750766673.125092s req_ids:[400] +DEBUG 06-24 20:04:33 [manager.py:391] +ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:206.3610553741455ms total_cost_time:206.4042091369629ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5628 prompt_cache_len:5145 prompt_cache_ratio:0.914179104477612 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 +DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:33 [batch.py:51] router release req id 400 +INFO 06-24 20:04:33 [manager.py:224] router recive req id 400 cost time 0.1056208610534668 s +INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 400 cost time 0.10749316215515137 s +DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=313351747199595949105718220789064124413, time:1750766673.3359003s req_ids:[400] +DEBUG 06-24 20:04:33 [manager.py:391] +ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 first_token_cost:224.17807579040527ms total_cost_time:224.22075271606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5629 prompt_cache_len:5145 prompt_cache_ratio:0.9140166992360987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 +DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:33 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:33 [manager.py:224] router recive req id 8 cost time 1.1144230365753174 s +INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 8 cost time 1.1163041591644287 s +DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=260619455572711705380796894834199775779, time:1750766673.5556192s req_ids:[8] +DEBUG 06-24 20:04:33 [manager.py:391] +ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:8 first_token_cost:1202.5015354156494ms total_cost_time:1202.544927597046ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5666 prompt_cache_len:5151 prompt_cache_ratio:0.9091069537592658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:8 +DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:33 [manager.py:224] router recive req id 400 cost time 0.4094274044036865 s +INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 400 cost time 0.4111461639404297 s +DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=64030824649360195371290145436512644687, time:1750766673.880678s req_ids:[400] +DEBUG 06-24 20:04:33 [manager.py:391] +ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:33 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 first_token_cost:509.01246070861816ms total_cost_time:509.05680656433105ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5630 prompt_cache_len:5145 prompt_cache_ratio:0.9138543516873889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 +DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:33 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.40862011909484863 s +INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.4106321334838867 s +DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=325290205884475920655626869544547932609, time:1750766674.0413604s req_ids:[8] +DEBUG 06-24 20:04:34 [manager.py:391] +ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:8 first_token_cost:482.53345489501953ms total_cost_time:482.5766086578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5667 prompt_cache_len:5151 prompt_cache_ratio:0.9089465325569084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 +DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:34 [manager.py:224] router recive req id 400 cost time 0.2065746784210205 s +INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 400 cost time 0.20849061012268066 s +DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=171744086798174465394960038580020109525, time:1750766674.1996436s req_ids:[400] +DEBUG 06-24 20:04:34 [manager.py:391] +ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 first_token_cost:308.95519256591797ms total_cost_time:308.99786949157715ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5631 prompt_cache_len:5145 prompt_cache_ratio:0.9136920618007459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 +DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:34 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.20766568183898926 s +INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.20949316024780273 s +DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=89325531173735694157580037301506625719, time:1750766674.3571036s req_ids:[8] +DEBUG 06-24 20:04:34 [manager.py:391] +ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 first_token_cost:284.5907211303711ms total_cost_time:284.6336364746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5668 prompt_cache_len:5151 prompt_cache_ratio:0.9087861679604798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 +DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:34 [manager.py:224] router recive req id 400 cost time 0.20780515670776367 s +INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 400 cost time 0.20971226692199707 s +DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=49606304116919569563348578611173658801, time:1750766674.4865568s req_ids:[400] +DEBUG 06-24 20:04:34 [manager.py:391] +ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 first_token_cost:277.24432945251465ms total_cost_time:277.28843688964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5632 prompt_cache_len:5145 prompt_cache_ratio:0.9135298295454546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 +DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:34 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.2059001922607422 s +INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.20777082443237305 s +DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=265866957393977817062778671378972033982, time:1750766674.6404688s req_ids:[8] +DEBUG 06-24 20:04:34 [manager.py:391] +ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:04:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 26365.262 tokens/s +DEBUG 06-24 20:04:34 [stats.py:37] Avg prompt tokens throughput: 26355.911 tokens/s +DEBUG 06-24 20:04:34 [stats.py:37] Avg generate tokens throughput: 9.351 tokens/s +INFO 06-24 20:04:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 first_token_cost:296.97728157043457ms total_cost_time:297.01948165893555ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5669 prompt_cache_len:5151 prompt_cache_ratio:0.9086258599400246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 +DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:34 [manager.py:224] router recive req id 400 cost time 0.20730829238891602 s +INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 400 cost time 0.2088913917541504 s +DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=246618846592341577453872614353058456766, time:1750766674.7658603s req_ids:[400] +DEBUG 06-24 20:04:34 [manager.py:391] +ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:34 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 first_token_cost:255.13339042663574ms total_cost_time:255.17511367797852ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5633 prompt_cache_len:5145 prompt_cache_ratio:0.913367654890822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 +DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:34 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.20806622505187988 s +INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.20998001098632812 s +DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=266033674225234743379494724366949788510, time:1750766674.928068s req_ids:[8] +DEBUG 06-24 20:04:34 [manager.py:391] +ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 first_token_cost:301.3298511505127ms total_cost_time:301.3739585876465ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5670 prompt_cache_len:5151 prompt_cache_ratio:0.9084656084656084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 +DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:35 [manager.py:224] router recive req id 8 cost time 0.10692334175109863 s +INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 8 cost time 0.10894179344177246 s +DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=127333895195900833682563255736194010035, time:1750766675.134944s req_ids:[8] +DEBUG 06-24 20:04:35 [manager.py:391] +ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 first_token_cost:202.5163173675537ms total_cost_time:202.5587558746338ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5671 prompt_cache_len:5151 prompt_cache_ratio:0.908305413507318 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 +DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:35 [manager.py:224] router recive req id 400 cost time 0.5143365859985352 s +INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 400 cost time 0.5163264274597168 s +DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=89562770169016691265677828974921242445, time:1750766675.344586s req_ids:[400] +DEBUG 06-24 20:04:35 [manager.py:391] +ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 first_token_cost:615.959882736206ms total_cost_time:616.0030364990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5634 prompt_cache_len:5145 prompt_cache_ratio:0.9132055378061767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 +DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:35 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:35 [manager.py:224] router recive req id 8 cost time 0.31000399589538574 s +INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 8 cost time 0.31206798553466797 s +DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=164887647849996819659555358266871580144, time:1750766675.5574384s req_ids:[8] +DEBUG 06-24 20:04:35 [manager.py:391] +ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 first_token_cost:417.73152351379395ms total_cost_time:417.77491569519043ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5672 prompt_cache_len:5151 prompt_cache_ratio:0.9081452750352609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 +DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:35 [manager.py:224] router recive req id 400 cost time 0.3086886405944824 s +INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 400 cost time 0.3106675148010254 s +DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=79571776836947761431111974952328200580, time:1750766675.768938s req_ids:[400] +DEBUG 06-24 20:04:35 [manager.py:391] +ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:35 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 first_token_cost:418.09749603271484ms total_cost_time:418.14184188842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5635 prompt_cache_len:5145 prompt_cache_ratio:0.9130434782608695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 +DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:36 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:36 [manager.py:224] router recive req id 8 cost time 0.511737585067749 s +INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 8 cost time 0.5138809680938721 s +DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=2162508810407588903125049595172361574, time:1750766676.1846812s req_ids:[8] +DEBUG 06-24 20:04:36 [manager.py:391] +ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 first_token_cost:627.1755695343018ms total_cost_time:627.2196769714355ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5673 prompt_cache_len:5151 prompt_cache_ratio:0.9079851930195664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:8 +DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.510279655456543 s +INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.5121898651123047 s +DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=254655526889719640650951190293082509633, time:1750766676.3686175s req_ids:[400] +DEBUG 06-24 20:04:36 [manager.py:391] +ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 first_token_cost:578.1567096710205ms total_cost_time:578.1996250152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5636 prompt_cache_len:5145 prompt_cache_ratio:0.9128814762242725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 +DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:36 [batch.py:51] router release req id 400 +INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.10657835006713867 s +INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.1085822582244873 s +DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=221232665752490468542733048316117506841, time:1750766676.5676017s req_ids:[400] +DEBUG 06-24 20:04:36 [manager.py:391] +ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 first_token_cost:210.08896827697754ms total_cost_time:210.13188362121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5637 prompt_cache_len:5145 prompt_cache_ratio:0.9127195316657797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 +DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:36 [batch.py:51] router release req id 400 +INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.10637378692626953 s +INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.10827088356018066 s +DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=286724357129523429998803644379594978912, time:1750766676.786888s req_ids:[400] +DEBUG 06-24 20:04:36 [manager.py:391] +ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:36 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 first_token_cost:216.97258949279785ms total_cost_time:217.01717376708984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5638 prompt_cache_len:5145 prompt_cache_ratio:0.9125576445548067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 +DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:36 [batch.py:51] router release req id 400 +INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.10560345649719238 s +INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.10782718658447266 s +DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=191525821003922652859320717804982534719, time:1750766677.0100248s req_ids:[400] +DEBUG 06-24 20:04:37 [manager.py:391] +ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 first_token_cost:216.31097793579102ms total_cost_time:216.3550853729248ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5639 prompt_cache_len:5145 prompt_cache_ratio:0.9123958148607909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 +DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:37 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:37 [manager.py:224] router recive req id 8 cost time 0.9140686988830566 s +INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 8 cost time 0.9160575866699219 s +DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=163907820373443062416644321794240740359, time:1750766677.2219796s req_ids:[8] +DEBUG 06-24 20:04:37 [manager.py:391] +ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:8 first_token_cost:1024.9671936035156ms total_cost_time:1025.0091552734375ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5674 prompt_cache_len:5151 prompt_cache_ratio:0.9078251674303842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 +DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:37 [manager.py:224] router recive req id 400 cost time 0.30860328674316406 s +INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 400 cost time 0.31053948402404785 s +DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=319396226296364540450281355798139834019, time:1750766677.4376144s req_ids:[400] +DEBUG 06-24 20:04:37 [manager.py:391] +ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 first_token_cost:421.6318130493164ms total_cost_time:421.6747283935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5640 prompt_cache_len:5145 prompt_cache_ratio:0.9122340425531915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 +DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:37 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:37 [manager.py:224] router recive req id 8 cost time 0.3094217777252197 s +INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 8 cost time 0.31142520904541016 s +DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=281282747343354252946288018410869502683, time:1750766677.6488605s req_ids:[8] +DEBUG 06-24 20:04:37 [manager.py:391] +ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 first_token_cost:424.4091510772705ms total_cost_time:424.4527816772461ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5675 prompt_cache_len:5151 prompt_cache_ratio:0.9076651982378855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 +DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:37 [manager.py:224] router recive req id 400 cost time 0.30932164192199707 s +INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 400 cost time 0.3112781047821045 s +DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=121468439610544082702813010234779980892, time:1750766677.8607125s req_ids:[400] +DEBUG 06-24 20:04:37 [manager.py:391] +ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:37 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 first_token_cost:418.63322257995605ms total_cost_time:418.67709159851074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5641 prompt_cache_len:5145 prompt_cache_ratio:0.9120723276014892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 +DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:37 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:38 [manager.py:224] router recive req id 8 cost time 0.30885982513427734 s +INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 8 cost time 0.31072378158569336 s +DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=161802575735781331894724396562522092801, time:1750766678.0751016s req_ids:[8] +DEBUG 06-24 20:04:38 [manager.py:391] +ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 first_token_cost:419.76022720336914ms total_cost_time:419.8031425476074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5676 prompt_cache_len:5151 prompt_cache_ratio:0.9075052854122622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 +DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:38 [manager.py:224] router recive req id 400 cost time 0.3077247142791748 s +INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 400 cost time 0.3096005916595459 s +DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=198951561416077516317407409602318125521, time:1750766678.2862713s req_ids:[400] +DEBUG 06-24 20:04:38 [manager.py:391] +ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:38 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 first_token_cost:420.4702377319336ms total_cost_time:420.5131530761719ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5642 prompt_cache_len:5145 prompt_cache_ratio:0.9119106699751861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:400 +DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:38 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:38 [manager.py:224] router recive req id 8 cost time 0.30901241302490234 s +INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 8 cost time 0.3110020160675049 s +DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=229601218436893746002106774031571183260, time:1750766678.5000477s req_ids:[8] +DEBUG 06-24 20:04:38 [manager.py:391] +ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 first_token_cost:586.0247611999512ms total_cost_time:586.068868637085ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5677 prompt_cache_len:5151 prompt_cache_ratio:0.9073454289237273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 +DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:38 [manager.py:224] router recive req id 8 cost time 0.10721111297607422 s +INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 8 cost time 0.10932254791259766 s +DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=163240519829322029400849542008585715736, time:1750766678.8683822s req_ids:[8] +DEBUG 06-24 20:04:38 [manager.py:391] +ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 first_token_cost:195.4202651977539ms total_cost_time:195.46127319335938ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5678 prompt_cache_len:5151 prompt_cache_ratio:0.907185628742515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 +DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:39 [manager.py:224] router recive req id 8 cost time 0.1069481372833252 s +INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 8 cost time 0.1097860336303711 s +DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=199709813388192927801921418909868392749, time:1750766679.0771613s req_ids:[8] +DEBUG 06-24 20:04:39 [manager.py:391] +ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 first_token_cost:206.33435249328613ms total_cost_time:206.38227462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:5679 prompt_cache_len:5151 prompt_cache_ratio:0.9070258848388801 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 +DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:39 [manager.py:224] router recive req id 8 cost time 0.10710477828979492 s +INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 8 cost time 0.10860323905944824 s +DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=184115028979038260271768415064738473035, time:1750766679.2982056s req_ids:[8] +DEBUG 06-24 20:04:39 [manager.py:391] +ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 first_token_cost:217.01693534851074ms total_cost_time:217.0734405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:5680 prompt_cache_len:5151 prompt_cache_ratio:0.9068661971830986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 +DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:39 [manager.py:224] router recive req id 400 cost time 1.1173467636108398 s +INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 400 cost time 1.119295597076416 s +DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=76890548786841819057424085917244917185, time:1750766679.5088637s req_ids:[400] +DEBUG 06-24 20:04:39 [manager.py:391] +DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:39 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:400 first_token_cost:1217.9288864135742ms total_cost_time:1217.9768085479736ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:5643 prompt_cache_len:5145 prompt_cache_ratio:0.9117490696438065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:400 +DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:39 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:39 [manager.py:224] router recive req id 8 cost time 0.3089413642883301 s +INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 8 cost time 0.3108973503112793 s +DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=238816029726489064559650873776002167569, time:1750766679.7193406s req_ids:[8] +DEBUG 06-24 20:04:39 [manager.py:391] +ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 first_token_cost:411.9548797607422ms total_cost_time:412.01281547546387ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5681 prompt_cache_len:5151 prompt_cache_ratio:0.9067065657454674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 +DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:39 [manager.py:224] router recive req id 400 cost time 0.3084983825683594 s +INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 400 cost time 0.31043124198913574 s +DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=198439397561961686508740764038122291163, time:1750766679.9275932s req_ids:[400] +DEBUG 06-24 20:04:39 [manager.py:391] +ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:400 first_token_cost:413.56778144836426ms total_cost_time:413.6159420013428ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:5644 prompt_cache_len:5145 prompt_cache_ratio:0.9115875265768958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 +DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:40 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:40 [manager.py:224] router recive req id 8 cost time 0.30963945388793945 s +INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 8 cost time 0.3118703365325928 s +DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=284451545658476670028795054388903790287, time:1750766680.1369154s req_ids:[8] +DEBUG 06-24 20:04:40 [manager.py:391] +ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 first_token_cost:414.442777633667ms total_cost_time:414.5050048828125ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:5682 prompt_cache_len:5151 prompt_cache_ratio:0.9065469904963042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 +DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:40 [manager.py:224] router recive req id 400 cost time 0.3061957359313965 s +INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 400 cost time 0.3081645965576172 s +DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=49281792639441259715908606114685642403, time:1750766680.3554153s req_ids:[400] +DEBUG 06-24 20:04:40 [manager.py:391] +ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 first_token_cost:420.5350875854492ms total_cost_time:420.5927848815918ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:5645 prompt_cache_len:5145 prompt_cache_ratio:0.9114260407440212 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 +DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:40 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:40 [manager.py:224] router recive req id 8 cost time 0.308119535446167 s +INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 8 cost time 0.3102080821990967 s +DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=77537507892950804258424326965689820137, time:1750766680.5689583s req_ids:[8] +DEBUG 06-24 20:04:40 [manager.py:391] +ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 first_token_cost:423.74539375305176ms total_cost_time:423.80261421203613ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5683 prompt_cache_len:5151 prompt_cache_ratio:0.9063874714059476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 +DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:40 [manager.py:224] router recive req id 400 cost time 0.3093605041503906 s +INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 400 cost time 0.3114025592803955 s +DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=231077214220768226158390882545871859983, time:1750766680.7764683s req_ids:[400] +DEBUG 06-24 20:04:40 [manager.py:391] +ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:40 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 first_token_cost:412.51087188720703ms total_cost_time:412.57166862487793ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:5646 prompt_cache_len:5145 prompt_cache_ratio:0.9112646121147715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 +DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:40 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:40 [manager.py:224] router recive req id 8 cost time 0.3090543746948242 s +INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 8 cost time 0.3110494613647461 s +DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=213257354288457420099931071678105665424, time:1750766680.984003s req_ids:[8] +DEBUG 06-24 20:04:40 [manager.py:391] +ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 first_token_cost:410.5720520019531ms total_cost_time:410.6316566467285ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5684 prompt_cache_len:5151 prompt_cache_ratio:0.9062280084447573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 +DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:41 [manager.py:224] router recive req id 400 cost time 0.3093991279602051 s +INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 400 cost time 0.3113586902618408 s +DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=314352178399198090909360555160418580868, time:1750766681.1980062s req_ids:[400] +DEBUG 06-24 20:04:41 [manager.py:391] +ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 first_token_cost:417.0494079589844ms total_cost_time:417.10925102233887ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5647 prompt_cache_len:5145 prompt_cache_ratio:0.9111032406587568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 +DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:41 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:41 [manager.py:224] router recive req id 8 cost time 0.30893898010253906 s +INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 8 cost time 0.3108949661254883 s +DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=65949083496933556795388735210093816087, time:1750766681.4106402s req_ids:[8] +DEBUG 06-24 20:04:41 [manager.py:391] +ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 first_token_cost:418.95580291748047ms total_cost_time:419.01636123657227ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5685 prompt_cache_len:5151 prompt_cache_ratio:0.9060686015831134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 +DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:41 [manager.py:224] router recive req id 400 cost time 0.3096637725830078 s +INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 400 cost time 0.3117501735687256 s +DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=177520096979157414252634423009530821455, time:1750766681.621189s req_ids:[400] +DEBUG 06-24 20:04:41 [manager.py:391] +ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:41 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 first_token_cost:417.65785217285156ms total_cost_time:417.71674156188965ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5648 prompt_cache_len:5145 prompt_cache_ratio:0.9109419263456091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 +DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:41 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:41 [manager.py:224] router recive req id 8 cost time 0.3078150749206543 s +INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 8 cost time 0.30978965759277344 s +DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=202632747836797405902521990662265850731, time:1750766681.832451s req_ids:[8] +DEBUG 06-24 20:04:41 [manager.py:391] +ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 first_token_cost:416.51391983032227ms total_cost_time:416.55993461608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5686 prompt_cache_len:5151 prompt_cache_ratio:0.9059092507914175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 +DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:42 [manager.py:224] router recive req id 400 cost time 0.30810976028442383 s +INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 400 cost time 0.3100616931915283 s +DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=70803459128417048406517131869305373762, time:1750766682.043486s req_ids:[400] +DEBUG 06-24 20:04:42 [manager.py:391] +ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 first_token_cost:415.08984565734863ms total_cost_time:415.1475429534912ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:5649 prompt_cache_len:5145 prompt_cache_ratio:0.9107806691449815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 +DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:42 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:42 [manager.py:224] router recive req id 8 cost time 0.511451244354248 s +INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 8 cost time 0.5135776996612549 s +DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=338464011028521707890579194849210524668, time:1750766682.454394s req_ids:[8] +DEBUG 06-24 20:04:42 [manager.py:391] +ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 first_token_cost:614.6280765533447ms total_cost_time:614.687442779541ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5687 prompt_cache_len:5151 prompt_cache_ratio:0.9057499560400915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 +DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:42 [manager.py:224] router recive req id 400 cost time 0.5124404430389404 s +INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 400 cost time 0.5148177146911621 s +DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=336682961697485079907097793222929059251, time:1750766682.6634202s req_ids:[400] +DEBUG 06-24 20:04:42 [manager.py:391] +ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:42 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 first_token_cost:615.0248050689697ms total_cost_time:615.0851249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:5650 prompt_cache_len:5145 prompt_cache_ratio:0.9106194690265487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 +DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:42 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:42 [manager.py:224] router recive req id 8 cost time 0.30994224548339844 s +DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=121908194961370419794076150173976525776, time:1750766682.8500907s req_ids:[8] +DEBUG 06-24 20:04:42 [manager.py:391] +INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 8 cost time 0.3122062683105469 s +ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 first_token_cost:382.2512626647949ms total_cost_time:382.2979927062988ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5688 prompt_cache_len:5151 prompt_cache_ratio:0.9055907172995781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 +DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.10645484924316406 s +INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.10883474349975586 s +DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=330111146981880258855633662505339619234, time:1750766683.0493422s req_ids:[8] +DEBUG 06-24 20:04:43 [manager.py:391] +ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 first_token_cost:200.64043998718262ms total_cost_time:200.6993293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5689 prompt_cache_len:5151 prompt_cache_ratio:0.905431534540341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 +DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.10669779777526855 s +INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.10876584053039551 s +DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=206154178136398720364593122611215617596, time:1750766683.2587278s req_ids:[8] +DEBUG 06-24 20:04:43 [manager.py:391] +ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:205.4452896118164ms total_cost_time:205.4886817932129ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5690 prompt_cache_len:5151 prompt_cache_ratio:0.9052724077328647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 +DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s +INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.10959935188293457 s +DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=207252447716160850565952498543545064374, time:1750766683.4787369s req_ids:[8] +DEBUG 06-24 20:04:43 [manager.py:391] +ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:219.04635429382324ms total_cost_time:219.09117698669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5691 prompt_cache_len:5151 prompt_cache_ratio:0.9051133368476542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 +DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:43 [manager.py:224] router recive req id 400 cost time 0.9144036769866943 s +INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 400 cost time 0.9162411689758301 s +DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=213672650856280428828660864356281812103, time:1750766683.6936774s req_ids:[400] +DEBUG 06-24 20:04:43 [manager.py:391] +ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:43 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 first_token_cost:1021.101713180542ms total_cost_time:1021.1467742919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5651 prompt_cache_len:5145 prompt_cache_ratio:0.9104583259600071 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:400 +DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:43 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.30865025520324707 s +INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.31075358390808105 s +DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=163301353999026199169867054189724113463, time:1750766683.8990397s req_ids:[8] +DEBUG 06-24 20:04:43 [manager.py:391] +ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:414.55674171447754ms total_cost_time:414.5991802215576ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5692 prompt_cache_len:5151 prompt_cache_ratio:0.9049543218552354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 +DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:44 [manager.py:224] router recive req id 400 cost time 0.30908846855163574 s +INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 400 cost time 0.31117868423461914 s +DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=117612225119529810443846089762043338009, time:1750766684.113066s req_ids:[400] +DEBUG 06-24 20:04:44 [manager.py:391] +ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:44 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:400 first_token_cost:417.2031879425049ms total_cost_time:417.24681854248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5652 prompt_cache_len:5145 prompt_cache_ratio:0.9102972399150743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:400 +DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:44 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.3089888095855713 s +INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.31087708473205566 s +DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=218431612304564700257402266090414565888, time:1750766684.300993s req_ids:[8] +DEBUG 06-24 20:04:44 [manager.py:391] +ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:384.31239128112793ms total_cost_time:384.3567371368408ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5693 prompt_cache_len:5151 prompt_cache_ratio:0.9047953627261549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 +DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.10764265060424805 s +INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.10977363586425781 s +DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=110106381893062816804621591280419638100, time:1750766684.507227s req_ids:[8] +DEBUG 06-24 20:04:44 [manager.py:391] +ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 first_token_cost:209.51199531555176ms total_cost_time:209.55514907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5694 prompt_cache_len:5151 prompt_cache_ratio:0.90463645943098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 +DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.10616922378540039 s +INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.10811662673950195 s +DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=326597459932964243164385754977380249797, time:1750766684.7166786s req_ids:[8] +DEBUG 06-24 20:04:44 [manager.py:391] +DEBUG 06-24 20:04:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 26021.774 tokens/s +DEBUG 06-24 20:04:44 [stats.py:37] Avg prompt tokens throughput: 26012.690 tokens/s +DEBUG 06-24 20:04:44 [stats.py:37] Avg generate tokens throughput: 9.084 tokens/s +ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 first_token_cost:204.63800430297852ms total_cost_time:204.6821117401123ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5695 prompt_cache_len:5151 prompt_cache_ratio:0.9044776119402985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 +DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.10760092735290527 s +INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.10941934585571289 s +DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=10078288841663145562204914900154846088, time:1750766684.9391642s req_ids:[8] +DEBUG 06-24 20:04:44 [manager.py:391] +ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 first_token_cost:220.09539604187012ms total_cost_time:220.1399803161621ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5696 prompt_cache_len:5151 prompt_cache_ratio:0.9043188202247191 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 +DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:45 [manager.py:224] router recive req id 400 cost time 0.9152262210845947 s +INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 400 cost time 0.9171676635742188 s +DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=163663124050652963259814320452496553198, time:1750766685.1184983s req_ids:[400] +DEBUG 06-24 20:04:45 [manager.py:391] +ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:400 first_token_cost:1149.8684883117676ms total_cost_time:1149.9123573303223ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5653 prompt_cache_len:5145 prompt_cache_ratio:0.9101362108614894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 +DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:45 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:45 [manager.py:224] router recive req id 8 cost time 0.40941333770751953 s +INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 8 cost time 0.4111747741699219 s +DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=175437420378587792969565869366659016119, time:1750766685.4380581s req_ids:[8] +DEBUG 06-24 20:04:45 [manager.py:391] +ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 first_token_cost:471.8492031097412ms total_cost_time:471.8914031982422ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5697 prompt_cache_len:5151 prompt_cache_ratio:0.9041600842548709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 +DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:45 [manager.py:224] router recive req id 400 cost time 0.20727205276489258 s +INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 400 cost time 0.20953583717346191 s +DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=172368450072793462988911448962309576158, time:1750766685.5909636s req_ids:[400] +DEBUG 06-24 20:04:45 [manager.py:391] +ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 first_token_cost:299.3342876434326ms total_cost_time:299.3783950805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5654 prompt_cache_len:5145 prompt_cache_ratio:0.9099752387690131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 +DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:45 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:45 [manager.py:224] router recive req id 8 cost time 0.20816802978515625 s +INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 8 cost time 0.21016597747802734 s +DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=216661643526117133661916650847868086868, time:1750766685.7196925s req_ids:[8] +DEBUG 06-24 20:04:45 [manager.py:391] +ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 first_token_cost:257.5211524963379ms total_cost_time:257.56263732910156ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5698 prompt_cache_len:5151 prompt_cache_ratio:0.904001404001404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 +DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:45 [manager.py:224] router recive req id 400 cost time 0.20802879333496094 s +INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 400 cost time 0.21039342880249023 s +DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=246863256532932488438424654702580039988, time:1750766685.8817806s req_ids:[400] +DEBUG 06-24 20:04:45 [manager.py:391] +ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:45 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 first_token_cost:289.54362869262695ms total_cost_time:289.58678245544434ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5655 prompt_cache_len:5145 prompt_cache_ratio:0.9098143236074271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 +DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:45 [batch.py:51] router release req id 400 +INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.10634613037109375 s +INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.10883140563964844 s +DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=24651384845974106098525625217995412876, time:1750766686.0769413s req_ids:[400] +DEBUG 06-24 20:04:46 [manager.py:391] +ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 first_token_cost:199.64361190795898ms total_cost_time:199.68652725219727ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5656 prompt_cache_len:5145 prompt_cache_ratio:0.9096534653465347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 +DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:46 [batch.py:51] router release req id 400 +INFO 06-24 20:04:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.10564398765563965 s +INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.10811328887939453 s +DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=144159961740778618288810797570851666246, time:1750766686.283774s req_ids:[400] +DEBUG 06-24 20:04:46 [manager.py:391] +INFO 06-24 20:04:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:201.37524604797363ms total_cost_time:201.4179229736328ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5657 prompt_cache_len:5145 prompt_cache_ratio:0.9094926639561605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 +DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:46 [batch.py:51] router release req id 400 +INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.10633087158203125 s +INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.10842418670654297 s +DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=17284165915086370021538909296821719237, time:1750766686.4925659s req_ids:[400] +DEBUG 06-24 20:04:46 [manager.py:391] +ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:204.15425300598145ms total_cost_time:204.19788360595703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5658 prompt_cache_len:5145 prompt_cache_ratio:0.9093319194061505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 +DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:46 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:46 [manager.py:224] router recive req id 8 cost time 0.9133801460266113 s +INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 8 cost time 0.9158332347869873 s +DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=122817067944151802617181614890454090499, time:1750766686.701242s req_ids:[8] +DEBUG 06-24 20:04:46 [manager.py:391] +ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 first_token_cost:1015.7935619354248ms total_cost_time:1015.836238861084ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5699 prompt_cache_len:5151 prompt_cache_ratio:0.9038427794349886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:8 +DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.3097188472747803 s +INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.3121352195739746 s +DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=329089550415523590411374788682737839562, time:1750766686.91025s req_ids:[400] +DEBUG 06-24 20:04:46 [manager.py:391] +ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:410.7358455657959ms total_cost_time:410.7794761657715ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5659 prompt_cache_len:5145 prompt_cache_ratio:0.9091712316663721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 +DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:47 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:47 [manager.py:224] router recive req id 8 cost time 0.30860233306884766 s +INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 8 cost time 0.31087613105773926 s +DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=232273989438799037360249230173836104102, time:1750766687.115208s req_ids:[8] +DEBUG 06-24 20:04:47 [manager.py:391] +ERROR 06-24 20:04:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:8 first_token_cost:407.0768356323242ms total_cost_time:407.1202278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5700 prompt_cache_len:5151 prompt_cache_ratio:0.9036842105263158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 +DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:47 [manager.py:224] router recive req id 400 cost time 0.3076043128967285 s +INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 400 cost time 0.3100552558898926 s +DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=265496327611751902510110317473881812156, time:1750766687.3257086s req_ids:[400] +DEBUG 06-24 20:04:47 [manager.py:391] +ERROR 06-24 20:04:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:47 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:411.834716796875ms total_cost_time:411.8795394897461ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5660 prompt_cache_len:5145 prompt_cache_ratio:0.9090106007067138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:400 +DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:47 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:47 [manager.py:224] router recive req id 8 cost time 0.30820298194885254 s +INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 8 cost time 0.3107006549835205 s +DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=183049706644804370980583090330969853270, time:1750766687.5458608s req_ids:[8] +DEBUG 06-24 20:04:47 [manager.py:391] +ERROR 06-24 20:04:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 first_token_cost:626.4033317565918ms total_cost_time:626.446008682251ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5701 prompt_cache_len:5151 prompt_cache_ratio:0.9035256972460972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 +DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:47 [manager.py:224] router recive req id 400 cost time 0.5088727474212646 s +INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 400 cost time 0.511070728302002 s +DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=98250522113964867449966060801872254665, time:1750766687.9506626s req_ids:[400] +DEBUG 06-24 20:04:47 [manager.py:391] +DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:400 first_token_cost:602.9376983642578ms total_cost_time:602.9801368713379ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5661 prompt_cache_len:5145 prompt_cache_ratio:0.9088500264970854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 +DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:48 [batch.py:51] router release req id 400 +INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.10689663887023926 s +INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.10939717292785645 s +DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=182089974327337710747881807405631920038, time:1750766688.1398792s req_ids:[400] +DEBUG 06-24 20:04:48 [manager.py:391] +ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:196.55108451843262ms total_cost_time:196.5944766998291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5662 prompt_cache_len:5145 prompt_cache_ratio:0.9086895090074179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 +DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:48 [batch.py:51] router release req id 400 +INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.10655665397644043 s +INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.10905027389526367 s +DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=215010597675904505882197426979035769356, time:1750766688.346191s req_ids:[400] +DEBUG 06-24 20:04:48 [manager.py:391] +ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:204.91957664489746ms total_cost_time:204.96225357055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5663 prompt_cache_len:5145 prompt_cache_ratio:0.9085290482076638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 +DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:48 [batch.py:51] router release req id 400 +INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.10645341873168945 s +INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.10881233215332031 s +DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=205277262191933819866665744085576678260, time:1750766688.5641122s req_ids:[400] +DEBUG 06-24 20:04:48 [manager.py:391] +ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:216.02320671081543ms total_cost_time:216.06802940368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5664 prompt_cache_len:5145 prompt_cache_ratio:0.9083686440677966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 +DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:48 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:48 [manager.py:224] router recive req id 8 cost time 0.9128780364990234 s +INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 8 cost time 0.9151980876922607 s +DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=227812101927005582835809075482377489280, time:1750766688.7788894s req_ids:[8] +DEBUG 06-24 20:04:48 [manager.py:391] +ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 first_token_cost:1024.5492458343506ms total_cost_time:1024.5928764343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5702 prompt_cache_len:5151 prompt_cache_ratio:0.9033672395650649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:8 +DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.3083226680755615 s +INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.31069517135620117 s +DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=148516326123153400229336644511095299264, time:1750766688.9903343s req_ids:[400] +DEBUG 06-24 20:04:48 [manager.py:391] +ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:49 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:420.60184478759766ms total_cost_time:420.64666748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5665 prompt_cache_len:5145 prompt_cache_ratio:0.9082082965578111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:400 +DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:49 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.3084142208099365 s +INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.31072425842285156 s +DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=283983126973254979148276198952788970976, time:1750766689.1771278s req_ids:[8] +DEBUG 06-24 20:04:49 [manager.py:391] +ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:8 first_token_cost:385.6847286224365ms total_cost_time:385.7271671295166ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5703 prompt_cache_len:5151 prompt_cache_ratio:0.9032088374539716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 +DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10664892196655273 s +INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.10912060737609863 s +DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=29921964045408058613861902179592031636, time:1750766689.3765888s req_ids:[8] +DEBUG 06-24 20:04:49 [manager.py:391] +ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:198.75311851501465ms total_cost_time:198.79531860351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5704 prompt_cache_len:5151 prompt_cache_ratio:0.9030504908835905 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 +DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s +INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s +DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=322576437281389737012380474863019993050, time:1750766689.5845346s req_ids:[8] +DEBUG 06-24 20:04:49 [manager.py:391] +ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:203.94372940063477ms total_cost_time:203.98712158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5705 prompt_cache_len:5151 prompt_cache_ratio:0.9028921998247151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 +DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:49 [batch.py:51] router release req id 8 +INFO 06-24 20:04:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10781621932983398 s +INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.11023902893066406 s +DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=323816389533592988068489069445212040170, time:1750766689.7937713s req_ids:[8] +DEBUG 06-24 20:04:49 [manager.py:391] +ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:202.4238109588623ms total_cost_time:202.46553421020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5706 prompt_cache_len:5151 prompt_cache_ratio:0.9027339642481599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 +DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10784506797790527 s +INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.11037206649780273 s +DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=22622370757039100382496238670709732894, time:1750766690.0020409s req_ids:[8] +DEBUG 06-24 20:04:50 [manager.py:391] +ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:205.70755004882812ms total_cost_time:205.74951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5707 prompt_cache_len:5151 prompt_cache_ratio:0.902575784124759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 +DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:50 [manager.py:224] router recive req id 8 cost time 0.3096926212310791 s +INFO 06-24 20:04:50 [manager.py:68] detokenization recv req id 8 cost time 0.3116261959075928 s +DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=292587818131219856968208427087618715976, time:1750766690.413841s req_ids:[8] +DEBUG 06-24 20:04:50 [manager.py:391] +ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 first_token_cost:412.26673126220703ms total_cost_time:412.3117923736572ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5708 prompt_cache_len:5151 prompt_cache_ratio:0.9024176594253679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 +DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:50 [manager.py:224] router recive req id 400 cost time 1.5242524147033691 s +INFO 06-24 20:04:50 [manager.py:68] detokenization recv req id 400 cost time 1.5255200862884521 s +DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=173907113493410583194997226579373946351, time:1750766690.6311872s req_ids:[400] +DEBUG 06-24 20:04:50 [manager.py:391] +ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:50 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:400 first_token_cost:1634.0522766113281ms total_cost_time:1634.0985298156738ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5666 prompt_cache_len:5145 prompt_cache_ratio:0.9080480056477233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:400 +DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:50 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:50 [manager.py:224] router recive req id 8 cost time 0.3071877956390381 s +INFO 06-24 20:04:50 [manager.py:68] detokenization recv req id 8 cost time 0.3091301918029785 s +DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=33881538930883266712733723143691938371, time:1750766690.8412223s req_ids:[8] +DEBUG 06-24 20:04:50 [manager.py:391] +ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 first_token_cost:417.85669326782227ms total_cost_time:417.89889335632324ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5709 prompt_cache_len:5151 prompt_cache_ratio:0.9022595901208618 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 +DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.3080763816833496 s +INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.30994510650634766 s +DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=16051916616745021094404272860513880049, time:1750766691.0497572s req_ids:[400] +DEBUG 06-24 20:04:51 [manager.py:391] +ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:400 first_token_cost:402.0876884460449ms total_cost_time:402.1298885345459ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5667 prompt_cache_len:5145 prompt_cache_ratio:0.9078877713075701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 +DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:51 [batch.py:51] router release req id 400 +INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.10568571090698242 s +INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.10769367218017578 s +DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=128854531344332501434149094872965343677, time:1750766691.2456982s req_ids:[400] +DEBUG 06-24 20:04:51 [manager.py:391] +ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:199.27549362182617ms total_cost_time:199.31840896606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5668 prompt_cache_len:5145 prompt_cache_ratio:0.9077275935074101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 +DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:51 [batch.py:51] router release req id 400 +INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.1060330867767334 s +INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.10811138153076172 s +DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=253381936533107567974633391698156350016, time:1750766691.4527936s req_ids:[400] +DEBUG 06-24 20:04:51 [manager.py:391] +ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:203.26495170593262ms total_cost_time:203.3083438873291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5669 prompt_cache_len:5145 prompt_cache_ratio:0.9075674722173223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 +DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:51 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:51 [manager.py:224] router recive req id 8 cost time 0.7141966819763184 s +INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 8 cost time 0.7162535190582275 s +DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=304702094745712667880901013369344024730, time:1750766691.6647267s req_ids:[8] +DEBUG 06-24 20:04:51 [manager.py:391] +ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 first_token_cost:815.3431415557861ms total_cost_time:815.3870105743408ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5710 prompt_cache_len:5151 prompt_cache_ratio:0.9021015761821366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:8 +DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.30938267707824707 s +INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.3113985061645508 s +DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=300487454556998765228661300091106802215, time:1750766691.8712232s req_ids:[400] +DEBUG 06-24 20:04:51 [manager.py:391] +ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:415.20166397094727ms total_cost_time:415.2636528015137ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5670 prompt_cache_len:5145 prompt_cache_ratio:0.9074074074074074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 +DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:51 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.30792999267578125 s +INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.31049609184265137 s +DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=315081947474904341695195417872365038660, time:1750766692.0836608s req_ids:[8] +DEBUG 06-24 20:04:52 [manager.py:391] +ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:8 first_token_cost:413.3622646331787ms total_cost_time:413.4242534637451ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5711 prompt_cache_len:5151 prompt_cache_ratio:0.9019436175801085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 +DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:52 [manager.py:224] router recive req id 400 cost time 0.3092968463897705 s +INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 400 cost time 0.31174230575561523 s +DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=125930803334487563282495153173683742070, time:1750766692.2929304s req_ids:[400] +DEBUG 06-24 20:04:52 [manager.py:391] +ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:52 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:415.27557373046875ms total_cost_time:415.33803939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:5671 prompt_cache_len:5145 prompt_cache_ratio:0.907247399047787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:400 +DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:52 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.3092052936553955 s +INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.3114452362060547 s +DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=72472019645874224158999492481589443406, time:1750766692.502862s req_ids:[8] +DEBUG 06-24 20:04:52 [manager.py:391] +ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:375.37074089050293ms total_cost_time:375.4134178161621ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5712 prompt_cache_len:5151 prompt_cache_ratio:0.9017857142857143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 +DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.10750532150268555 s +INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s +DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=192894018930828817714540143392083110720, time:1750766692.6717808s req_ids:[8] +DEBUG 06-24 20:04:52 [manager.py:391] +ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:195.43814659118652ms total_cost_time:195.4793930053711ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5713 prompt_cache_len:5151 prompt_cache_ratio:0.9016278662699108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 +DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.1066884994506836 s +INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.10910654067993164 s +DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=223283102207699624486799217184405780381, time:1750766692.8780618s req_ids:[8] +DEBUG 06-24 20:04:52 [manager.py:391] +ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:203.70769500732422ms total_cost_time:203.75323295593262ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5714 prompt_cache_len:5151 prompt_cache_ratio:0.9014700735036751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 +DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10788607597351074 s +INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.11030745506286621 s +DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=205526755471922629579056083789637944359, time:1750766693.086588s req_ids:[8] +DEBUG 06-24 20:04:53 [manager.py:391] +ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:204.00643348693848ms total_cost_time:204.05268669128418ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5715 prompt_cache_len:5151 prompt_cache_ratio:0.9013123359580052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 +INFO 06-24 20:04:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:04:53 [statics_utils.py:24] mean first cost: 246.2682920538222 ms +INFO 06-24 20:04:53 [statics_utils.py:24] mean per token cost: 0.15915296835531711 ms +DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:53 [batch.py:51] router release req id 8 +INFO 06-24 20:04:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10720634460449219 s +INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.10953497886657715 s +DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=320357002696483650559604665863145892017, time:1750766693.2960813s req_ids:[8] +DEBUG 06-24 20:04:53 [manager.py:391] +DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:202.37040519714355ms total_cost_time:202.41379737854004ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5716 prompt_cache_len:5151 prompt_cache_ratio:0.9011546536039188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 +DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10757923126220703 s +INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.1099393367767334 s +DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=216161743593470336928733501896887501067, time:1750766693.504927s req_ids:[8] +DEBUG 06-24 20:04:53 [manager.py:391] +ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:198.20404052734375ms total_cost_time:198.24600219726562ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5717 prompt_cache_len:5151 prompt_cache_ratio:0.900997026412454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 +DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10776972770690918 s +INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.10998654365539551 s +DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=189314636555872345955745140823273754267, time:1750766693.708809s req_ids:[8] +DEBUG 06-24 20:04:53 [manager.py:391] +ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:368.9858913421631ms total_cost_time:369.02809143066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5718 prompt_cache_len:5151 prompt_cache_ratio:0.9008394543546695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 +DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 1.618631362915039 s +INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 1.620178461074829 s +DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=40009490660995977837610130489078534975, time:1750766694.0417767s req_ids:[400] +DEBUG 06-24 20:04:54 [manager.py:391] +ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:400 first_token_cost:1714.5402431488037ms total_cost_time:1714.5841121673584ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5672 prompt_cache_len:5145 prompt_cache_ratio:0.9070874471086037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 +DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:54 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:54 [manager.py:224] router recive req id 8 cost time 0.20849370956420898 s +INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 8 cost time 0.21015381813049316 s +DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=253010800630327560619489577778847290846, time:1750766694.1877847s req_ids:[8] +DEBUG 06-24 20:04:54 [manager.py:391] +ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:286.01717948913574ms total_cost_time:286.0586643218994ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5719 prompt_cache_len:5151 prompt_cache_ratio:0.9006819374016436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:8 +DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.2096419334411621 s +INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.21126556396484375 s +DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=116271670008784988759190857839146343925, time:1750766694.3468943s req_ids:[400] +DEBUG 06-24 20:04:54 [manager.py:391] +ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:322.7963447570801ms total_cost_time:322.85380363464355ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:5673 prompt_cache_len:5145 prompt_cache_ratio:0.9069275515600211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 +DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:54 [batch.py:51] router release req id 400 +INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.10844874382019043 s +INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.11029458045959473 s +DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=126248704643683339107502075368214882280, time:1750766694.5469823s req_ids:[400] +DEBUG 06-24 20:04:54 [manager.py:391] +ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:183.93754959106445ms total_cost_time:183.98404121398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5674 prompt_cache_len:5145 prompt_cache_ratio:0.9067677123722242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 +DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:54 [batch.py:51] router release req id 400 +INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.10668373107910156 s +INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.10847616195678711 s +DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=198171503447834658918958200783168086027, time:1750766694.7434638s req_ids:[400] +DEBUG 06-24 20:04:54 [manager.py:391] +DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:04:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 26662.052 tokens/s +DEBUG 06-24 20:04:54 [stats.py:37] Avg prompt tokens throughput: 26652.677 tokens/s +DEBUG 06-24 20:04:54 [stats.py:37] Avg generate tokens throughput: 9.374 tokens/s +ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:202.50892639160156ms total_cost_time:202.55184173583984ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5675 prompt_cache_len:5145 prompt_cache_ratio:0.9066079295154185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 +DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:54 [batch.py:51] router release req id 400 +INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.10699200630187988 s +INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.10894918441772461 s +DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=161970145910271087535302473905871669583, time:1750766694.9530463s req_ids:[400] +DEBUG 06-24 20:04:54 [manager.py:391] +ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:205.56020736694336ms total_cost_time:205.6286334991455ms,out_token_counter:1 mean_per_token_cost_time: 0.06842613220214844ms prompt_token_num:5676 prompt_cache_len:5145 prompt_cache_ratio:0.9064482029598309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 +DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:55 [batch.py:51] router release req id 400 +INFO 06-24 20:04:55 [manager.py:224] router recive req id 400 cost time 0.10602784156799316 s +INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 400 cost time 0.10803985595703125 s +DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=65066789377104650516429213773322431548, time:1750766695.1625385s req_ids:[400] +DEBUG 06-24 20:04:55 [manager.py:391] +ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 first_token_cost:202.80861854553223ms total_cost_time:202.8520107269287ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5677 prompt_cache_len:5145 prompt_cache_ratio:0.906288532675709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 +DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:55 [batch.py:51] router release req id 400 +INFO 06-24 20:04:55 [manager.py:224] router recive req id 400 cost time 0.10705208778381348 s +INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 400 cost time 0.10904884338378906 s +DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=176586124287956918208555003790325823595, time:1750766695.3724778s req_ids:[400] +DEBUG 06-24 20:04:55 [manager.py:391] +ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 first_token_cost:204.59651947021484ms total_cost_time:204.63919639587402ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5678 prompt_cache_len:5145 prompt_cache_ratio:0.9061289186333216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 +DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:55 [batch.py:51] router release req id 400 +INFO 06-24 20:04:55 [manager.py:224] router recive req id 400 cost time 0.10549163818359375 s +INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 400 cost time 0.10749077796936035 s +DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=136583679248283343175744675896981640847, time:1750766695.579152s req_ids:[400] +DEBUG 06-24 20:04:55 [manager.py:391] +ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 +INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 first_token_cost:204.06723022460938ms total_cost_time:204.10966873168945ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5679 prompt_cache_len:5145 prompt_cache_ratio:0.9059693608029583 mtp_avg_token_per_step:1.0 +DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:55 [batch.py:51] router release req id 400 +WARNING 06-24 20:04:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:55 [manager.py:224] router recive req id 8 cost time 1.5194005966186523 s +INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 8 cost time 1.5213453769683838 s +DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=15041665363720838538079186944811534888, time:1750766695.7968402s req_ids:[8] +DEBUG 06-24 20:04:55 [manager.py:391] +ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:8 first_token_cost:1643.8179016113281ms total_cost_time:1643.8605785369873ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5720 prompt_cache_len:5151 prompt_cache_ratio:0.9005244755244756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:8 +DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.1068272590637207 s +INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.1088106632232666 s +DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=278347708599964483240077937720713792497, time:1750766696.0390255s req_ids:[8] +DEBUG 06-24 20:04:56 [manager.py:391] +ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:8 first_token_cost:202.28290557861328ms total_cost_time:202.32605934143066ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5721 prompt_cache_len:5151 prompt_cache_ratio:0.9003670686942842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 +DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.10782361030578613 s +INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.1096649169921875 s +DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=153324905297797732512861729865462903889, time:1750766696.2310464s req_ids:[8] +DEBUG 06-24 20:04:56 [manager.py:391] +ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:199.63812828063965ms total_cost_time:199.68104362487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5722 prompt_cache_len:5151 prompt_cache_ratio:0.900209716882209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 +DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.10724329948425293 s +INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.10904407501220703 s +DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=305473172064898620442050375000787693269, time:1750766696.429837s req_ids:[8] +DEBUG 06-24 20:04:56 [manager.py:391] +ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:197.07036018371582ms total_cost_time:197.1139907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5723 prompt_cache_len:5151 prompt_cache_ratio:0.9000524200594094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 +DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:56 [batch.py:51] router release req id 8 +INFO 06-24 20:04:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.3091866970062256 s +INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.3110661506652832 s +DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=20657536978945875873694093644438157362, time:1750766696.841732s req_ids:[8] +DEBUG 06-24 20:04:56 [manager.py:391] +ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:416.89538955688477ms total_cost_time:416.93973541259766ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5724 prompt_cache_len:5151 prompt_cache_ratio:0.899895178197065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 +DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10754704475402832 s +INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10974979400634766 s +DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=85430479859416578367773771191346731104, time:1750766697.0585027s req_ids:[8] +DEBUG 06-24 20:04:57 [manager.py:391] +ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:199.20921325683594ms total_cost_time:199.25260543823242ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5725 prompt_cache_len:5151 prompt_cache_ratio:0.8997379912663755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 +DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10786080360412598 s +INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.11005949974060059 s +DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=239271924188976622532618717111387774876, time:1750766697.2601724s req_ids:[8] +DEBUG 06-24 20:04:57 [manager.py:391] +ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:206.35318756103516ms total_cost_time:206.39514923095703ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5726 prompt_cache_len:5151 prompt_cache_ratio:0.8995808592385609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 +DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10774111747741699 s +INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10969734191894531 s +DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=255138084813131291229253304246351800296, time:1750766697.47109s req_ids:[8] +DEBUG 06-24 20:04:57 [manager.py:391] +ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:203.95541191101074ms total_cost_time:203.9968967437744ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5727 prompt_cache_len:5151 prompt_cache_ratio:0.8994237820848612 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 +DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.1070702075958252 s +INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10900163650512695 s +DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=202071991438630213149720896562394242777, time:1750766697.6807415s req_ids:[8] +DEBUG 06-24 20:04:57 [manager.py:391] +ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:207.43083953857422ms total_cost_time:207.4739933013916ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5728 prompt_cache_len:5151 prompt_cache_ratio:0.8992667597765364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 +DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10714483261108398 s +INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10911250114440918 s +DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=108591845078999475025865364389844744327, time:1750766697.8918238s req_ids:[8] +DEBUG 06-24 20:04:57 [manager.py:391] +ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:205.24311065673828ms total_cost_time:205.28578758239746ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5729 prompt_cache_len:5151 prompt_cache_ratio:0.8991097922848664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 +DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10677218437194824 s +INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10875654220581055 s +DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=118012874673255617870726242782240703785, time:1750766698.1028883s req_ids:[8] +DEBUG 06-24 20:04:58 [manager.py:391] +ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:207.6582908630371ms total_cost_time:207.70263671875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5730 prompt_cache_len:5151 prompt_cache_ratio:0.8989528795811519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 +DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10684442520141602 s +INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10895943641662598 s +DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=61087500756971532013078352537064784191, time:1750766698.3204014s req_ids:[8] +DEBUG 06-24 20:04:58 [manager.py:391] +ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:215.37494659423828ms total_cost_time:215.41833877563477ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5731 prompt_cache_len:5151 prompt_cache_ratio:0.8987960216367126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 +DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.1074361801147461 s +INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10943603515625 s +DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=235024219290302201653255911622681890535, time:1750766698.5344603s req_ids:[8] +DEBUG 06-24 20:04:58 [manager.py:391] +ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:205.9457302093506ms total_cost_time:205.98840713500977ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5732 prompt_cache_len:5151 prompt_cache_ratio:0.8986392184228891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 +DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10622668266296387 s +INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10880160331726074 s +DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=104347069001241844731537851767817136887, time:1750766698.7452984s req_ids:[8] +DEBUG 06-24 20:04:58 [manager.py:391] +ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:205.88135719299316ms total_cost_time:205.92474937438965ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5733 prompt_cache_len:5151 prompt_cache_ratio:0.8984824699110413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 +DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10685157775878906 s +INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10872435569763184 s +DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=250335721755451031387941079474067836139, time:1750766698.9557574s req_ids:[8] +DEBUG 06-24 20:04:58 [manager.py:391] +ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:209.93471145629883ms total_cost_time:209.99670028686523ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5734 prompt_cache_len:5151 prompt_cache_ratio:0.8983257760725497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 +DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.10800528526306152 s +INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.10999011993408203 s +DEBUG 06-24 20:04:59 [manager.py:391] Prefill Batch: batch_id=239711596511076242932222209802381732628, time:1750766699.170205s req_ids:[8] +DEBUG 06-24 20:04:59 [manager.py:391] +ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:207.79752731323242ms total_cost_time:207.8413963317871ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5735 prompt_cache_len:5151 prompt_cache_ratio:0.8981691368788143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 +DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.30968570709228516 s +INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.31178712844848633 s +DEBUG 06-24 20:04:59 [manager.py:391] Prefill Batch: batch_id=300218878288816692014235150037783802335, time:1750766699.581199s req_ids:[8] +DEBUG 06-24 20:04:59 [manager.py:391] +ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:410.930871963501ms total_cost_time:410.9766483306885ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5736 prompt_cache_len:5151 prompt_cache_ratio:0.8980125523012552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 +DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.10744333267211914 s +INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s +DEBUG 06-24 20:04:59 [manager.py:391] Prefill Batch: batch_id=1919938180372308027723679799623066717, time:1750766699.800035s req_ids:[8] +DEBUG 06-24 20:04:59 [manager.py:391] +ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:211.6997241973877ms total_cost_time:211.74287796020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5737 prompt_cache_len:5151 prompt_cache_ratio:0.8978560223113126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 +DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:04:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.10733413696289062 s +INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.10940718650817871 s +DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=203434089938694816062575501607869245256, time:1750766700.0145855s req_ids:[8] +DEBUG 06-24 20:05:00 [manager.py:391] +ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:208.50896835327148ms total_cost_time:208.55379104614258ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5738 prompt_cache_len:5151 prompt_cache_ratio:0.8976995468804462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 +DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s +INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.10906696319580078 s +DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=302872426694256758003294603731148177150, time:1750766700.2258925s req_ids:[8] +DEBUG 06-24 20:05:00 [manager.py:391] +ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:205.53922653198242ms total_cost_time:205.5821418762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5739 prompt_cache_len:5151 prompt_cache_ratio:0.8975431259801359 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 +DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.10794782638549805 s +INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.11002612113952637 s +DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=303808542095675626587744342309024702372, time:1750766700.4371383s req_ids:[8] +DEBUG 06-24 20:05:00 [manager.py:391] +ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:206.72321319580078ms total_cost_time:206.76708221435547ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5740 prompt_cache_len:5151 prompt_cache_ratio:0.8973867595818815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 +DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.1069943904876709 s +INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.10900378227233887 s +DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=111504520963685480952633465283710070091, time:1750766700.6589499s req_ids:[8] +DEBUG 06-24 20:05:00 [manager.py:391] +ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:217.74578094482422ms total_cost_time:217.789888381958ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5741 prompt_cache_len:5151 prompt_cache_ratio:0.8972304476572026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 +DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.10703730583190918 s +INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.1090235710144043 s +DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=127996145972491537755493280075179957429, time:1750766700.8699112s req_ids:[8] +DEBUG 06-24 20:05:00 [manager.py:391] +ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:204.6818733215332ms total_cost_time:204.7252655029297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5742 prompt_cache_len:5151 prompt_cache_ratio:0.8970741901776385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 +DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.1079246997833252 s +INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.11011481285095215 s +DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=45895676423769608889288885297305455429, time:1750766701.080419s req_ids:[8] +DEBUG 06-24 20:05:01 [manager.py:391] +ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:207.67498016357422ms total_cost_time:207.719087600708ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5743 prompt_cache_len:5151 prompt_cache_ratio:0.8969179871147483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 +DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.10787606239318848 s +INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.10987472534179688 s +DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=278972452376312214600403569731339308678, time:1750766701.292256s req_ids:[8] +DEBUG 06-24 20:05:01 [manager.py:391] +ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:205.75547218322754ms total_cost_time:205.80005645751953ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5744 prompt_cache_len:5151 prompt_cache_ratio:0.8967618384401114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 +DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.1069791316986084 s +INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.10891246795654297 s +DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=53723302638781883671031746079039398741, time:1750766701.5030367s req_ids:[8] +DEBUG 06-24 20:05:01 [manager.py:391] +ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:207.33094215393066ms total_cost_time:207.37433433532715ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5745 prompt_cache_len:5151 prompt_cache_ratio:0.8966057441253263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 +DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:01 [batch.py:51] router release req id 8 +DEBUG 06-24 20:05:01 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:01 [manager.py:283] +DEBUG 06-24 20:05:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:01 [manager.py:284] +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.10800743103027344 s +INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.10996198654174805 s +DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=6583665372527310875395246951436451323, time:1750766701.7173142s req_ids:[8] +DEBUG 06-24 20:05:01 [manager.py:391] +ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:211.02190017700195ms total_cost_time:211.06505393981934ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5746 prompt_cache_len:5151 prompt_cache_ratio:0.8964497041420119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 +DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.10840344429016113 s +INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.11039209365844727 s +DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=227961160379845112214730476415351999420, time:1750766701.9299242s req_ids:[8] +DEBUG 06-24 20:05:01 [manager.py:391] +ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:391.42560958862305ms total_cost_time:391.46971702575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5747 prompt_cache_len:5151 prompt_cache_ratio:0.8962937184618062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 +DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.1082773208618164 s +INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.11025810241699219 s +DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=63826387628659194673666553928481840964, time:1750766702.3187146s req_ids:[8] +DEBUG 06-24 20:05:02 [manager.py:391] +ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:192.4724578857422ms total_cost_time:192.51513481140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5748 prompt_cache_len:5151 prompt_cache_ratio:0.8961377870563675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 +DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.10725855827331543 s +INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.10930609703063965 s +DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=242626655092276276549633438089888098613, time:1750766702.5230777s req_ids:[8] +DEBUG 06-24 20:05:02 [manager.py:391] +ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:204.559326171875ms total_cost_time:204.60176467895508ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5749 prompt_cache_len:5151 prompt_cache_ratio:0.8959819098973735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 +DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.10707783699035645 s +INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.10912775993347168 s +DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=327704206048615379506805875646450782640, time:1750766702.7327387s req_ids:[8] +DEBUG 06-24 20:05:02 [manager.py:391] +ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:206.73584938049316ms total_cost_time:206.77924156188965ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5750 prompt_cache_len:5151 prompt_cache_ratio:0.8958260869565218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 +DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.1070711612701416 s +INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.10914158821105957 s +DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=274543162032134645142562845588714487038, time:1750766702.9418006s req_ids:[8] +DEBUG 06-24 20:05:02 [manager.py:391] +ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:201.80368423461914ms total_cost_time:201.84826850891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5751 prompt_cache_len:5151 prompt_cache_ratio:0.8956703182055294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 +DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10801315307617188 s +INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.11002492904663086 s +DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=293893490561595804141949037555889786040, time:1750766703.1522796s req_ids:[8] +DEBUG 06-24 20:05:03 [manager.py:391] +DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:208.30321311950684ms total_cost_time:208.3451747894287ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5752 prompt_cache_len:5151 prompt_cache_ratio:0.8955146036161336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 +DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10771870613098145 s +INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10963129997253418 s +DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=4964440078896330546164792007037856921, time:1750766703.3647814s req_ids:[8] +DEBUG 06-24 20:05:03 [manager.py:391] +ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:208.1897258758545ms total_cost_time:208.23240280151367ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5753 prompt_cache_len:5151 prompt_cache_ratio:0.8953589431600903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 +DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10685038566589355 s +INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10886311531066895 s +DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=323472937902062206953977693610557973810, time:1750766703.5760076s req_ids:[8] +DEBUG 06-24 20:05:03 [manager.py:391] +ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:206.634521484375ms total_cost_time:206.681489944458ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:5754 prompt_cache_len:5151 prompt_cache_ratio:0.8952033368091762 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 +DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10692191123962402 s +INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10872364044189453 s +DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=234837898369592640570817486594358598183, time:1750766703.7892866s req_ids:[8] +DEBUG 06-24 20:05:03 [manager.py:391] +ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:207.46755599975586ms total_cost_time:207.51142501831055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5755 prompt_cache_len:5151 prompt_cache_ratio:0.8950477845351867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 +DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10761642456054688 s +INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10946798324584961 s +DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=108919957625773694461047180063897201550, time:1750766704.0124083s req_ids:[8] +DEBUG 06-24 20:05:04 [manager.py:391] +ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:221.9388484954834ms total_cost_time:221.98200225830078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5756 prompt_cache_len:5151 prompt_cache_ratio:0.8948922863099374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 +DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10738134384155273 s +INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.10931730270385742 s +DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=18799080369994022439404773831492439134, time:1750766704.2259586s req_ids:[8] +DEBUG 06-24 20:05:04 [manager.py:391] +ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:203.6902904510498ms total_cost_time:203.7355899810791ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5757 prompt_cache_len:5151 prompt_cache_ratio:0.8947368421052632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 +DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10828614234924316 s +INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.11040067672729492 s +DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=218174013308053724178459402773829623128, time:1750766704.4359329s req_ids:[8] +DEBUG 06-24 20:05:04 [manager.py:391] +ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:205.54804801940918ms total_cost_time:205.59382438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5758 prompt_cache_len:5151 prompt_cache_ratio:0.8945814518930184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 +DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10723209381103516 s +INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s +DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=66466915453822609485099053518473648142, time:1750766704.6457856s req_ids:[8] +DEBUG 06-24 20:05:04 [manager.py:391] +ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:206.24732971191406ms total_cost_time:206.29167556762695ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5759 prompt_cache_len:5151 prompt_cache_ratio:0.8944261156450772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 +DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10659122467041016 s +INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.10861349105834961 s +DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=218936826633139380110908798735265870496, time:1750766704.8566241s req_ids:[8] +DEBUG 06-24 20:05:04 [manager.py:391] +DEBUG 06-24 20:05:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 25526.128 tokens/s +DEBUG 06-24 20:05:04 [stats.py:37] Avg prompt tokens throughput: 25517.229 tokens/s +DEBUG 06-24 20:05:04 [stats.py:37] Avg generate tokens throughput: 8.900 tokens/s +ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:206.22873306274414ms total_cost_time:206.27188682556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5760 prompt_cache_len:5151 prompt_cache_ratio:0.8942708333333333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 +DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.10706615447998047 s +INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s +DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=177559919126004287006541243321830193542, time:1750766705.0709262s req_ids:[8] +DEBUG 06-24 20:05:05 [manager.py:391] +ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:208.28509330749512ms total_cost_time:208.3296775817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5761 prompt_cache_len:5151 prompt_cache_ratio:0.8941156049296997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 +DEBUG 06-24 20:05:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.10860252380371094 s +INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.11062359809875488 s +DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=260605159470986714372952081425709552275, time:1750766705.2805793s req_ids:[8] +DEBUG 06-24 20:05:05 [manager.py:391] +ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:206.01963996887207ms total_cost_time:206.06279373168945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5762 prompt_cache_len:5151 prompt_cache_ratio:0.893960430406109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 +DEBUG 06-24 20:05:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.10709214210510254 s +INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.10913920402526855 s +DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=231842679769872168355842081128557101267, time:1750766705.4970627s req_ids:[8] +DEBUG 06-24 20:05:05 [manager.py:391] +ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:211.26532554626465ms total_cost_time:211.32373809814453ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:5763 prompt_cache_len:5151 prompt_cache_ratio:0.8938053097345132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 +DEBUG 06-24 20:05:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.30991148948669434 s +INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.3120236396789551 s +DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=87612591875752402812856526846736536191, time:1750766705.9108155s req_ids:[8] +DEBUG 06-24 20:05:05 [manager.py:391] +ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:408.60533714294434ms total_cost_time:408.65063667297363ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5764 prompt_cache_len:5151 prompt_cache_ratio:0.8936502428868841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 +DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10705685615539551 s +INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10927033424377441 s +DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=14730066086647046556245556632612497778, time:1750766706.1245298s req_ids:[8] +DEBUG 06-24 20:05:06 [manager.py:391] +ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:207.45110511779785ms total_cost_time:207.49568939208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5765 prompt_cache_len:5151 prompt_cache_ratio:0.8934952298352125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 +DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10709977149963379 s +INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10918474197387695 s +DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=98560683644153745234278928176951897804, time:1750766706.3351285s req_ids:[8] +DEBUG 06-24 20:05:06 [manager.py:391] +ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:206.7577838897705ms total_cost_time:206.8023681640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5766 prompt_cache_len:5151 prompt_cache_ratio:0.8933402705515089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 +DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10695767402648926 s +INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.1088707447052002 s +DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=91904649221834591631630286063090173675, time:1750766706.5488129s req_ids:[8] +DEBUG 06-24 20:05:06 [manager.py:391] +ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:206.6361904144287ms total_cost_time:206.6802978515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5767 prompt_cache_len:5151 prompt_cache_ratio:0.893185365007803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 +DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.1076207160949707 s +INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10965418815612793 s +DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=124920877431328838951323347641989458266, time:1750766706.760562s req_ids:[8] +DEBUG 06-24 20:05:06 [manager.py:391] +ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:208.3134651184082ms total_cost_time:208.3566188812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5768 prompt_cache_len:5151 prompt_cache_ratio:0.8930305131761442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 +DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10801982879638672 s +INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10996198654174805 s +DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=77603677738351411987356897578994262550, time:1750766706.9751596s req_ids:[8] +DEBUG 06-24 20:05:06 [manager.py:391] +ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:207.5350284576416ms total_cost_time:207.5800895690918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5769 prompt_cache_len:5151 prompt_cache_ratio:0.8928757150286012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 +DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.10751748085021973 s +INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10949850082397461 s +DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=79615452045447308888397770338320692495, time:1750766707.1871088s req_ids:[8] +DEBUG 06-24 20:05:07 [manager.py:391] +ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:204.21147346496582ms total_cost_time:204.2555809020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5770 prompt_cache_len:5151 prompt_cache_ratio:0.8927209705372617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 +DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:07 [batch.py:51] router release req id 8 +INFO 06-24 20:05:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.1066596508026123 s +INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10865187644958496 s +DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=144468863874210372699079954971167395869, time:1750766707.396074s req_ids:[8] +DEBUG 06-24 20:05:07 [manager.py:391] +ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:206.22777938842773ms total_cost_time:206.2704563140869ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5771 prompt_cache_len:5151 prompt_cache_ratio:0.8925662796742332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 +DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.10692596435546875 s +INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10894227027893066 s +DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=25761989621393823015193604318921973648, time:1750766707.606788s req_ids:[8] +DEBUG 06-24 20:05:07 [manager.py:391] +ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:206.3465118408203ms total_cost_time:206.390380859375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5772 prompt_cache_len:5151 prompt_cache_ratio:0.8924116424116424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 +DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.1063992977142334 s +INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10853219032287598 s +DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=162610706470779863226333979744849588775, time:1750766707.8215358s req_ids:[8] +DEBUG 06-24 20:05:07 [manager.py:391] +ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:207.2300910949707ms total_cost_time:207.2734832763672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5773 prompt_cache_len:5151 prompt_cache_ratio:0.8922570587216352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 +DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10752677917480469 s +INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.10966229438781738 s +DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=291940951167392547587030341205023199549, time:1750766708.0333269s req_ids:[8] +DEBUG 06-24 20:05:08 [manager.py:391] +ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:207.59034156799316ms total_cost_time:207.63325691223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5774 prompt_cache_len:5151 prompt_cache_ratio:0.8921025285763768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 +DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10712432861328125 s +INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.10905694961547852 s +DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=275879283798856027872695398187194918483, time:1750766708.2449744s req_ids:[8] +DEBUG 06-24 20:05:08 [manager.py:391] +ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:203.65285873413086ms total_cost_time:203.69815826416016ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5775 prompt_cache_len:5151 prompt_cache_ratio:0.8919480519480519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 +DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10818028450012207 s +INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.11021614074707031 s +DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=28390632015667287064039146458720193527, time:1750766708.4557414s req_ids:[8] +DEBUG 06-24 20:05:08 [manager.py:391] +ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:205.6260108947754ms total_cost_time:205.67035675048828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5776 prompt_cache_len:5151 prompt_cache_ratio:0.8917936288088643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 +DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10655045509338379 s +INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.10858750343322754 s +DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=146096954067290975391965013368975270309, time:1750766708.667068s req_ids:[8] +DEBUG 06-24 20:05:08 [manager.py:391] +ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:374.57799911499023ms total_cost_time:374.62329864501953ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5777 prompt_cache_len:5151 prompt_cache_ratio:0.8916392591310369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 +DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10678386688232422 s +INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10870599746704102 s +DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=36524539953869462874242074860281253601, time:1750766709.0393448s req_ids:[8] +DEBUG 06-24 20:05:09 [manager.py:391] +ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:196.34199142456055ms total_cost_time:196.38538360595703ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5778 prompt_cache_len:5151 prompt_cache_ratio:0.891484942886812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 +DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10776853561401367 s +INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10972189903259277 s +DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=276618074281118259245716822964162521808, time:1750766709.2469084s req_ids:[8] +DEBUG 06-24 20:05:09 [manager.py:391] +ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:206.80522918701172ms total_cost_time:206.8490982055664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5779 prompt_cache_len:5151 prompt_cache_ratio:0.8913306800484513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 +DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10642623901367188 s +INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10843157768249512 s +DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=318736392974999057298735277582301699681, time:1750766709.4591906s req_ids:[8] +DEBUG 06-24 20:05:09 [manager.py:391] +ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:210.28375625610352ms total_cost_time:210.3271484375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5780 prompt_cache_len:5151 prompt_cache_ratio:0.8911764705882353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 +DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10671567916870117 s +INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10876822471618652 s +DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=132488056754414973470364588351876737565, time:1750766709.673921s req_ids:[8] +DEBUG 06-24 20:05:09 [manager.py:391] +ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:208.88400077819824ms total_cost_time:208.92906188964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5781 prompt_cache_len:5151 prompt_cache_ratio:0.8910223144784639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 +DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10674715042114258 s +INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10876774787902832 s +DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=90094379394264099241369854396910634783, time:1750766709.8875275s req_ids:[8] +DEBUG 06-24 20:05:09 [manager.py:391] +ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:209.21039581298828ms total_cost_time:209.25331115722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5782 prompt_cache_len:5151 prompt_cache_ratio:0.8908682116914562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 +DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10627365112304688 s +INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.10871076583862305 s +DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=21057350097725668172911839561212442127, time:1750766710.1010895s req_ids:[8] +DEBUG 06-24 20:05:10 [manager.py:391] +DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:206.038236618042ms total_cost_time:206.08234405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5783 prompt_cache_len:5151 prompt_cache_ratio:0.8907141621995504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 +DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10687136650085449 s +INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.1093435287475586 s +DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=35628506560609197654704605018266396970, time:1750766710.3096273s req_ids:[8] +DEBUG 06-24 20:05:10 [manager.py:391] +ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:206.1631679534912ms total_cost_time:206.207275390625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5784 prompt_cache_len:5151 prompt_cache_ratio:0.8905601659751037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 +DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10755681991577148 s +INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.10986495018005371 s +DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=152162783008818919106651546405641910318, time:1750766710.5231626s req_ids:[8] +DEBUG 06-24 20:05:10 [manager.py:391] +ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:208.27150344848633ms total_cost_time:208.31584930419922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5785 prompt_cache_len:5151 prompt_cache_ratio:0.8904062229904927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 +DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10642790794372559 s +INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.1088871955871582 s +DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=216347873005092744382780977226077477487, time:1750766710.7363703s req_ids:[8] +DEBUG 06-24 20:05:10 [manager.py:391] +ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:208.91189575195312ms total_cost_time:208.95671844482422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5786 prompt_cache_len:5151 prompt_cache_ratio:0.8902523332181127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 +DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:10 [batch.py:51] router release req id 8 +INFO 06-24 20:05:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10697364807128906 s +INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940957069396973 s +DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=43943551713752290202805634881428227714, time:1750766710.9501908s req_ids:[8] +DEBUG 06-24 20:05:10 [manager.py:391] +ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:209.27953720092773ms total_cost_time:209.32316780090332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5787 prompt_cache_len:5151 prompt_cache_ratio:0.8900984966303784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 +DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.1080322265625 s +INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.11028528213500977 s +DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=240832909227799961582119376593104367910, time:1750766711.1658766s req_ids:[8] +DEBUG 06-24 20:05:11 [manager.py:391] +ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:209.4900608062744ms total_cost_time:209.5344066619873ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5788 prompt_cache_len:5151 prompt_cache_ratio:0.8899447131997236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 +DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.10696601867675781 s +INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.10886812210083008 s +DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=289519504952717583805549600934727055522, time:1750766711.3785887s req_ids:[8] +DEBUG 06-24 20:05:11 [manager.py:391] +ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:362.9326820373535ms total_cost_time:362.9908561706543ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5789 prompt_cache_len:5151 prompt_cache_ratio:0.8897909828986008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 +DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.1066598892211914 s +INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.1087496280670166 s +DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=134766458977726517470176220755305125321, time:1750766711.7367752s req_ids:[8] +DEBUG 06-24 20:05:11 [manager.py:391] +ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:196.29645347595215ms total_cost_time:196.33817672729492ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5790 prompt_cache_len:5151 prompt_cache_ratio:0.8896373056994819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 +DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.10658574104309082 s +INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.10868191719055176 s +DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=47189270108033356106181443410887077415, time:1750766711.9459033s req_ids:[8] +DEBUG 06-24 20:05:11 [manager.py:391] +ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:205.51705360412598ms total_cost_time:205.56020736694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5791 prompt_cache_len:5151 prompt_cache_ratio:0.8894836815748576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 +DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.10634469985961914 s +INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10836625099182129 s +DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=189471481732654903033303870766754283024, time:1750766712.1566558s req_ids:[8] +DEBUG 06-24 20:05:12 [manager.py:391] +ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:207.34071731567383ms total_cost_time:207.38554000854492ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5792 prompt_cache_len:5151 prompt_cache_ratio:0.8893301104972375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 +DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.1066131591796875 s +INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.1086428165435791 s +DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=79423937492376416983331505730315262902, time:1750766712.367584s req_ids:[8] +DEBUG 06-24 20:05:12 [manager.py:391] +ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:207.19432830810547ms total_cost_time:207.23867416381836ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5793 prompt_cache_len:5151 prompt_cache_ratio:0.8891765924391507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 +DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.10646390914916992 s +INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10858941078186035 s +DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=60991017761758826280836123093395979176, time:1750766712.5842266s req_ids:[8] +DEBUG 06-24 20:05:12 [manager.py:391] +ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:207.9479694366455ms total_cost_time:207.9923152923584ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5794 prompt_cache_len:5151 prompt_cache_ratio:0.8890231273731446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 +DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.1067509651184082 s +INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10883498191833496 s +DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=54328422634416349732020305227558359940, time:1750766712.7942815s req_ids:[8] +DEBUG 06-24 20:05:12 [manager.py:391] +ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:208.12439918518066ms total_cost_time:208.17017555236816ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5795 prompt_cache_len:5151 prompt_cache_ratio:0.888869715271786 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 +DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.10720133781433105 s +INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10956335067749023 s +DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=236316453303555827082710330540676193913, time:1750766713.0068474s req_ids:[8] +DEBUG 06-24 20:05:13 [manager.py:391] +ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:204.8788070678711ms total_cost_time:204.92291450500488ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5796 prompt_cache_len:5151 prompt_cache_ratio:0.8887163561076604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 +DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10703420639038086 s +INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.10920500755310059 s +DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=81941996111944769696434313259102623752, time:1750766713.2170575s req_ids:[8] +DEBUG 06-24 20:05:13 [manager.py:391] +ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:205.84654808044434ms total_cost_time:205.87682723999023ms,out_token_counter:1 mean_per_token_cost_time: 0.030279159545898438ms prompt_token_num:5797 prompt_cache_len:5151 prompt_cache_ratio:0.8885630498533724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 +DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10696887969970703 s +INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.1090691089630127 s +DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=254257039385188194563945046877701641528, time:1750766713.437088s req_ids:[8] +DEBUG 06-24 20:05:13 [manager.py:391] +ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:217.15712547302246ms total_cost_time:217.20290184020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5798 prompt_cache_len:5151 prompt_cache_ratio:0.8884097964815454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 +DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10662555694580078 s +INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.10878634452819824 s +DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=55647634337077060957137484828783468822, time:1750766713.6504781s req_ids:[8] +DEBUG 06-24 20:05:13 [manager.py:391] +ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:206.96115493774414ms total_cost_time:207.02171325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5799 prompt_cache_len:5151 prompt_cache_ratio:0.8882565959648215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 +DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10782122611999512 s +INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.11004376411437988 s +DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=9610420739971923566883961934099402716, time:1750766713.8669808s req_ids:[8] +DEBUG 06-24 20:05:13 [manager.py:391] +ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:212.2344970703125ms total_cost_time:212.294340133667ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5800 prompt_cache_len:5151 prompt_cache_ratio:0.8881034482758621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 +DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.30803561210632324 s +INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.3106107711791992 s +DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=13436109040149684680829302530524818223, time:1750766714.282535s req_ids:[8] +DEBUG 06-24 20:05:14 [manager.py:391] +ERROR 06-24 20:05:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:413.24472427368164ms total_cost_time:413.29002380371094ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5801 prompt_cache_len:5151 prompt_cache_ratio:0.887950353387347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 +DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.10530900955200195 s +INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.10771965980529785 s +DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=329279760679769720433216955031007764877, time:1750766714.500566s req_ids:[8] +DEBUG 06-24 20:05:14 [manager.py:391] +ERROR 06-24 20:05:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 first_token_cost:209.1238498687744ms total_cost_time:209.1691493988037ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5802 prompt_cache_len:5151 prompt_cache_ratio:0.8877973112719751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 +DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.1066434383392334 s +INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.10902261734008789 s +DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=211524719874424797528463845671927998653, time:1750766714.7134264s req_ids:[8] +DEBUG 06-24 20:05:14 [manager.py:391] +ERROR 06-24 20:05:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 first_token_cost:202.74686813354492ms total_cost_time:202.7912139892578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5803 prompt_cache_len:5151 prompt_cache_ratio:0.8876443219024642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 +DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.10783743858337402 s +INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.11019611358642578 s +DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=257126260772189521574354081748128420223, time:1750766714.9216049s req_ids:[8] +DEBUG 06-24 20:05:14 [manager.py:391] +DEBUG 06-24 20:05:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 25287.556 tokens/s +DEBUG 06-24 20:05:14 [stats.py:37] Avg prompt tokens throughput: 25278.813 tokens/s +DEBUG 06-24 20:05:14 [stats.py:37] Avg generate tokens throughput: 8.743 tokens/s +ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 first_token_cost:205.67703247070312ms total_cost_time:205.7204246520996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5804 prompt_cache_len:5151 prompt_cache_ratio:0.8874913852515507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 +DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.1078336238861084 s +INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.11023712158203125 s +DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=13156001173514735912048462017979583811, time:1750766715.1319818s req_ids:[8] +DEBUG 06-24 20:05:15 [manager.py:391] +ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:204.44178581237793ms total_cost_time:204.49328422546387ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:5805 prompt_cache_len:5151 prompt_cache_ratio:0.8873385012919897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 +DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10854482650756836 s +INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.11105656623840332 s +DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=41588808320688186178318159976668018710, time:1750766715.3420794s req_ids:[8] +DEBUG 06-24 20:05:15 [manager.py:391] +ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:204.90074157714844ms total_cost_time:204.92291450500488ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:5806 prompt_cache_len:5151 prompt_cache_ratio:0.8871856699965552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 +DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10726284980773926 s +INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.10971283912658691 s +DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=208163391892305609467255759563020615290, time:1750766715.554053s req_ids:[8] +DEBUG 06-24 20:05:15 [manager.py:391] +ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:204.96678352355957ms total_cost_time:205.02948760986328ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:5807 prompt_cache_len:5151 prompt_cache_ratio:0.8870328913380403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 +DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10689473152160645 s +INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.10939645767211914 s +DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=217544170007707265945958517846435127239, time:1750766715.7624652s req_ids:[8] +DEBUG 06-24 20:05:15 [manager.py:391] +ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:206.8009376525879ms total_cost_time:206.8798542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.07891654968261719ms prompt_token_num:5808 prompt_cache_len:5151 prompt_cache_ratio:0.8868801652892562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 +DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10953927040100098 s +INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.1119527816772461 s +DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=205441924898933949804459501007173506549, time:1750766715.9742575s req_ids:[8] +DEBUG 06-24 20:05:15 [manager.py:391] +ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:205.03687858581543ms total_cost_time:205.0802707672119ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5809 prompt_cache_len:5151 prompt_cache_ratio:0.8867274918230332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 +DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10679507255554199 s +INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s +DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=300752411717777120746350395164648101424, time:1750766716.1856523s req_ids:[8] +DEBUG 06-24 20:05:16 [manager.py:391] +ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:205.4128646850586ms total_cost_time:205.4586410522461ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5810 prompt_cache_len:5151 prompt_cache_ratio:0.8865748709122203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 +DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:16 [batch.py:51] router release req id 8 +INFO 06-24 20:05:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10884547233581543 s +INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.11116385459899902 s +DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=215626646824194162052921541144742565453, time:1750766716.3953905s req_ids:[8] +DEBUG 06-24 20:05:16 [manager.py:391] +ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:205.29699325561523ms total_cost_time:205.35516738891602ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5811 prompt_cache_len:5151 prompt_cache_ratio:0.8864223025296851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 +DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10906529426574707 s +INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.11155939102172852 s +DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=3446350724196355247226250001055512086, time:1750766716.6058247s req_ids:[8] +DEBUG 06-24 20:05:16 [manager.py:391] +ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:205.75928688049316ms total_cost_time:205.81555366516113ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:5812 prompt_cache_len:5151 prompt_cache_ratio:0.8862697866483138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 +DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10892486572265625 s +INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.11088681221008301 s +DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=210129042323211614437430385285636176916, time:1750766716.816875s req_ids:[8] +DEBUG 06-24 20:05:16 [manager.py:391] +ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:203.16314697265625ms total_cost_time:203.2148838043213ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:5813 prompt_cache_len:5151 prompt_cache_ratio:0.8861173232410116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 +DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.11028552055358887 s +INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.11284804344177246 s +DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=180970217023457905140559726872068014940, time:1750766717.0265222s req_ids:[8] +DEBUG 06-24 20:05:17 [manager.py:391] +DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:220.8724021911621ms total_cost_time:220.92342376708984ms,out_token_counter:1 mean_per_token_cost_time: 0.051021575927734375ms prompt_token_num:5814 prompt_cache_len:5151 prompt_cache_ratio:0.8859649122807017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 +DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.10945701599121094 s +INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.11162209510803223 s +DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=193554039648567789626035390633491588137, time:1750766717.2510054s req_ids:[8] +DEBUG 06-24 20:05:17 [manager.py:391] +ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:200.3152370452881ms total_cost_time:200.36005973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5815 prompt_cache_len:5151 prompt_cache_ratio:0.8858125537403267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 +DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.11030697822570801 s +INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.11234593391418457 s +DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=55238196607474429053847315643017236839, time:1750766717.4584095s req_ids:[8] +DEBUG 06-24 20:05:17 [manager.py:391] +ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:205.81603050231934ms total_cost_time:205.86013793945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5816 prompt_cache_len:5151 prompt_cache_ratio:0.8856602475928473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 +DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.10747265815734863 s +INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.10960149765014648 s +DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=292379887231049043727563229835918325595, time:1750766717.674671s req_ids:[8] +DEBUG 06-24 20:05:17 [manager.py:391] +ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:376.6369819641113ms total_cost_time:376.6825199127197ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5817 prompt_cache_len:5151 prompt_cache_ratio:0.8855079938112429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 +DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.1075439453125 s +INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10962128639221191 s +DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=334019733525901534901339728637892430523, time:1750766718.042526s req_ids:[8] +DEBUG 06-24 20:05:18 [manager.py:391] +ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:196.17557525634766ms total_cost_time:196.23541831970215ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5818 prompt_cache_len:5151 prompt_cache_ratio:0.8853557923685115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 +DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.10746884346008301 s +INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10949230194091797 s +DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=305836860098545249966328480463826565677, time:1750766718.2515116s req_ids:[8] +DEBUG 06-24 20:05:18 [manager.py:391] +ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:205.7344913482666ms total_cost_time:205.7955265045166ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5819 prompt_cache_len:5151 prompt_cache_ratio:0.8852036432376698 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 +DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.10719537734985352 s +INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10932517051696777 s +DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=332796991343996048987693077930960817217, time:1750766718.4628248s req_ids:[8] +DEBUG 06-24 20:05:18 [manager.py:391] +ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:207.78203010559082ms total_cost_time:207.84306526184082ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5820 prompt_cache_len:5151 prompt_cache_ratio:0.8850515463917525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 +DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.1082456111907959 s +INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.11083102226257324 s +DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=7520305571926688053102247409640375342, time:1750766718.6747494s req_ids:[8] +DEBUG 06-24 20:05:18 [manager.py:391] +ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:218.3535099029541ms total_cost_time:218.41096878051758ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:5821 prompt_cache_len:5151 prompt_cache_ratio:0.8848995018038138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 +DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.1075887680053711 s +INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10993719100952148 s +DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=36497736523476442285915324166227208847, time:1750766718.8964424s req_ids:[8] +DEBUG 06-24 20:05:18 [manager.py:391] +ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:199.76496696472168ms total_cost_time:199.80835914611816ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5822 prompt_cache_len:5151 prompt_cache_ratio:0.8847475094469255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 +DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10756778717041016 s +INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.10985994338989258 s +DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=284586521460332609945855395351023572554, time:1750766719.109991s req_ids:[8] +DEBUG 06-24 20:05:19 [manager.py:391] +ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:226.40442848205566ms total_cost_time:226.46617889404297ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5823 prompt_cache_len:5151 prompt_cache_ratio:0.8845955692941783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 +DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10872483253479004 s +INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.11112427711486816 s +DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=286819375068313634828754931559565596181, time:1750766719.3325064s req_ids:[8] +DEBUG 06-24 20:05:19 [manager.py:391] +ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:214.3564224243164ms total_cost_time:214.4174575805664ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5824 prompt_cache_len:5151 prompt_cache_ratio:0.8844436813186813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 +DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10754227638244629 s +INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.10963273048400879 s +DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=82125310477825531762142640063632383831, time:1750766719.5512064s req_ids:[8] +DEBUG 06-24 20:05:19 [manager.py:391] +ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:214.08319473266602ms total_cost_time:214.1427993774414ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5825 prompt_cache_len:5151 prompt_cache_ratio:0.8842918454935622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 +DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10732102394104004 s +INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.1091463565826416 s +DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=189606707303042446458717177826965963969, time:1750766719.7695777s req_ids:[8] +DEBUG 06-24 20:05:19 [manager.py:391] +ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:217.70262718200684ms total_cost_time:217.74744987487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5826 prompt_cache_len:5151 prompt_cache_ratio:0.884140061791967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 +DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10733699798583984 s +INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.1094660758972168 s +DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=55657284113546525446108847269412476574, time:1750766719.9930034s req_ids:[8] +DEBUG 06-24 20:05:19 [manager.py:391] +ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:212.53108978271484ms total_cost_time:212.59450912475586ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:5827 prompt_cache_len:5151 prompt_cache_ratio:0.8839883301870602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 +DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:20 [manager.py:224] router recive req id 8 cost time 0.10719966888427734 s +INFO 06-24 20:05:20 [manager.py:68] detokenization recv req id 8 cost time 0.10927724838256836 s +DEBUG 06-24 20:05:20 [manager.py:391] Prefill Batch: batch_id=249207288594956523524363858007463676643, time:1750766720.2126641s req_ids:[8] +DEBUG 06-24 20:05:20 [manager.py:391] +ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:213.87243270874023ms total_cost_time:213.93108367919922ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:5828 prompt_cache_len:5151 prompt_cache_ratio:0.8838366506520247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 +DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:20 [manager.py:224] router recive req id 8 cost time 0.10686016082763672 s +INFO 06-24 20:05:20 [manager.py:68] detokenization recv req id 8 cost time 0.10880136489868164 s +DEBUG 06-24 20:05:20 [manager.py:391] Prefill Batch: batch_id=50211751967394745518179093856042512969, time:1750766720.424666s req_ids:[8] +DEBUG 06-24 20:05:20 [manager.py:391] +ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:191.2245750427246ms total_cost_time:191.2863254547119ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5829 prompt_cache_len:5151 prompt_cache_ratio:0.8836850231600618 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 +DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:20 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s +INFO 06-24 20:05:20 [manager.py:68] detokenization recv req id 8 cost time 0.10937118530273438 s +DEBUG 06-24 20:05:20 [manager.py:391] Prefill Batch: batch_id=105172989624798812679579100617991249290, time:1750766720.6247327s req_ids:[8] +DEBUG 06-24 20:05:20 [manager.py:391] +ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:201.30491256713867ms total_cost_time:201.36547088623047ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5830 prompt_cache_len:5151 prompt_cache_ratio:0.8835334476843911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 +DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.31052231788635254 s +INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.3116328716278076 s +DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=29634646512671778184617612723094905530, time:1750766721.0317552s req_ids:[8] +DEBUG 06-24 20:05:21 [manager.py:391] +ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:407.0782661437988ms total_cost_time:407.14073181152344ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:5831 prompt_cache_len:5151 prompt_cache_ratio:0.8833819241982507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 +DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.10670280456542969 s +INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.1086571216583252 s +DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=273613196677162498547765352273728690154, time:1750766721.2475622s req_ids:[8] +DEBUG 06-24 20:05:21 [manager.py:391] +ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:209.00964736938477ms total_cost_time:209.05542373657227ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5832 prompt_cache_len:5151 prompt_cache_ratio:0.8832304526748971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 +DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:21 [batch.py:51] router release req id 8 +INFO 06-24 20:05:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.1073005199432373 s +INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.10923051834106445 s +DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=186528521138944257094372448521661314318, time:1750766721.4587982s req_ids:[8] +DEBUG 06-24 20:05:21 [manager.py:391] +ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:203.7353515625ms total_cost_time:203.7956714630127ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:5833 prompt_cache_len:5151 prompt_cache_ratio:0.883079033087605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 +DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.10814571380615234 s +INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.11010575294494629 s +DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=287868392629287258833983322258470314665, time:1750766721.6688561s req_ids:[8] +DEBUG 06-24 20:05:21 [manager.py:391] +ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:205.49583435058594ms total_cost_time:205.55520057678223ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5834 prompt_cache_len:5151 prompt_cache_ratio:0.8829276654096675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 +DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.10844874382019043 s +INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s +DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=183426558836825497026989485321622697486, time:1750766721.8795202s req_ids:[8] +DEBUG 06-24 20:05:21 [manager.py:391] +ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:205.31606674194336ms total_cost_time:205.37209510803223ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:5835 prompt_cache_len:5151 prompt_cache_ratio:0.8827763496143959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 +DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10737228393554688 s +INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10944461822509766 s +DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=323924746044384931008380151683726151909, time:1750766722.0887597s req_ids:[8] +DEBUG 06-24 20:05:22 [manager.py:391] +ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:205.5795192718506ms total_cost_time:205.63888549804688ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5836 prompt_cache_len:5151 prompt_cache_ratio:0.88262508567512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 +DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10657358169555664 s +INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10860443115234375 s +DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=4314745179259761099362783005863162849, time:1750766722.3078513s req_ids:[8] +DEBUG 06-24 20:05:22 [manager.py:391] +ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:221.96006774902344ms total_cost_time:222.0172882080078ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5837 prompt_cache_len:5151 prompt_cache_ratio:0.8824738735651876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 +DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10709643363952637 s +INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10907626152038574 s +DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=320889084037266710720023836220273636333, time:1750766722.5297847s req_ids:[8] +DEBUG 06-24 20:05:22 [manager.py:391] +ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:208.1303596496582ms total_cost_time:208.1761360168457ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5838 prompt_cache_len:5151 prompt_cache_ratio:0.8823227132579651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 +DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10703325271606445 s +INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10903215408325195 s +DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=318193857822376333687922538946690967845, time:1750766722.739481s req_ids:[8] +DEBUG 06-24 20:05:22 [manager.py:391] +ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:203.9961814880371ms total_cost_time:204.05817031860352ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5839 prompt_cache_len:5151 prompt_cache_ratio:0.8821716047268368 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 +DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10788345336914062 s +INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.11006331443786621 s +DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=285871223811438429528001651557791563795, time:1750766722.9635055s req_ids:[8] +DEBUG 06-24 20:05:22 [manager.py:391] +ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:220.4742431640625ms total_cost_time:220.5188274383545ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5840 prompt_cache_len:5151 prompt_cache_ratio:0.8820205479452055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 +DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.10673141479492188 s +INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.10866951942443848 s +DEBUG 06-24 20:05:23 [manager.py:391] Prefill Batch: batch_id=61199987539522993132786479677822146577, time:1750766723.17567s req_ids:[8] +DEBUG 06-24 20:05:23 [manager.py:391] +INFO 06-24 20:05:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:05:23 [statics_utils.py:24] mean first cost: 246.08994080085674 ms +INFO 06-24 20:05:23 [statics_utils.py:24] mean per token cost: 0.1487752992162806 ms +ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:206.679105758667ms total_cost_time:206.73727989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5841 prompt_cache_len:5151 prompt_cache_ratio:0.881869542886492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 +DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.10946226119995117 s +INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.11142492294311523 s +DEBUG 06-24 20:05:23 [manager.py:391] Prefill Batch: batch_id=302376348211471132063346124126741473894, time:1750766723.3861055s req_ids:[8] +DEBUG 06-24 20:05:23 [manager.py:391] +ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:203.66311073303223ms total_cost_time:203.71007919311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:5842 prompt_cache_len:5151 prompt_cache_ratio:0.8817185895241356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 +DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.3092224597930908 s +INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.31124067306518555 s +DEBUG 06-24 20:05:23 [manager.py:391] Prefill Batch: batch_id=325893024976816570180956671979932518530, time:1750766723.7928898s req_ids:[8] +DEBUG 06-24 20:05:23 [manager.py:391] +ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:404.9248695373535ms total_cost_time:404.9839973449707ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5843 prompt_cache_len:5151 prompt_cache_ratio:0.8815676878315933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 +DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.1081240177154541 s +INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.11003232002258301 s +DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=140514976188979608081788129592750819575, time:1750766724.007185s req_ids:[8] +DEBUG 06-24 20:05:24 [manager.py:391] +ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:206.3581943511963ms total_cost_time:206.40230178833008ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5844 prompt_cache_len:5151 prompt_cache_ratio:0.8814168377823408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 +DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.10740828514099121 s +INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.1094675064086914 s +DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=299218053319811646133950392668267074423, time:1750766724.2164278s req_ids:[8] +DEBUG 06-24 20:05:24 [manager.py:391] +ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:205.11126518249512ms total_cost_time:205.1548957824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5845 prompt_cache_len:5151 prompt_cache_ratio:0.8812660393498717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 +DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.10695695877075195 s +INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.1090247631072998 s +DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=26565515681475904456185421761033144494, time:1750766724.4277542s req_ids:[8] +DEBUG 06-24 20:05:24 [manager.py:391] +ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:207.41558074951172ms total_cost_time:207.4582576751709ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5846 prompt_cache_len:5151 prompt_cache_ratio:0.8811152925076976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 +DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.11001420021057129 s +INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.11198973655700684 s +DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=137461144901055138580842537814338046640, time:1750766724.645591s req_ids:[8] +DEBUG 06-24 20:05:24 [manager.py:391] +ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:212.88204193115234ms total_cost_time:212.92638778686523ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5847 prompt_cache_len:5151 prompt_cache_ratio:0.8809645972293484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 +DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.10644078254699707 s +INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.10825562477111816 s +DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=166883432195156598061953158148513638313, time:1750766724.85619s req_ids:[8] +DEBUG 06-24 20:05:24 [manager.py:391] +ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:168.95627975463867ms total_cost_time:168.99776458740234ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5848 prompt_cache_len:5151 prompt_cache_ratio:0.8808139534883721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 +DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:24 [batch.py:51] router release req id 8 +INFO 06-24 20:05:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10547590255737305 s +INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10696935653686523 s +DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=56530541912003010724294856250128985736, time:1750766725.0274258s req_ids:[8] +DEBUG 06-24 20:05:25 [manager.py:391] +DEBUG 06-24 20:05:25 [stats.py:37] Avg tokens(prompt+generate) throughput: 25956.663 tokens/s +DEBUG 06-24 20:05:25 [stats.py:37] Avg prompt tokens throughput: 25947.757 tokens/s +DEBUG 06-24 20:05:25 [stats.py:37] Avg generate tokens throughput: 8.906 tokens/s +ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:162.4436378479004ms total_cost_time:162.5041961669922ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5849 prompt_cache_len:5151 prompt_cache_ratio:0.8806633612583348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 +DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10846948623657227 s +INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.11008334159851074 s +DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=87267703143384155545502246740985402850, time:1750766725.189954s req_ids:[8] +DEBUG 06-24 20:05:25 [manager.py:391] +ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:153.9146900177002ms total_cost_time:153.95545959472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5850 prompt_cache_len:5151 prompt_cache_ratio:0.8805128205128205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 +DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10838460922241211 s +INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10997509956359863 s +DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=260575524142660679089000493264261182054, time:1750766725.349024s req_ids:[8] +DEBUG 06-24 20:05:25 [manager.py:391] +DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:153.51176261901855ms total_cost_time:153.55634689331055ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5851 prompt_cache_len:5151 prompt_cache_ratio:0.8803623312254315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 +DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10769081115722656 s +INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10959649085998535 s +DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=64777232367542088665923281608286836412, time:1750766725.508749s req_ids:[8] +DEBUG 06-24 20:05:25 [manager.py:391] +ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:189.50343132019043ms total_cost_time:189.5453929901123ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5852 prompt_cache_len:5151 prompt_cache_ratio:0.8802118933697881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 +DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10701632499694824 s +INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10892820358276367 s +DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=56673744498726400636949890867722889474, time:1750766725.7076936s req_ids:[8] +DEBUG 06-24 20:05:25 [manager.py:391] +ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:201.39050483703613ms total_cost_time:201.432466506958ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5853 prompt_cache_len:5151 prompt_cache_ratio:0.8800615069195284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 +DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10679388046264648 s +INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10886192321777344 s +DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=188389654727391739722603997664810042262, time:1750766725.9165316s req_ids:[8] +DEBUG 06-24 20:05:25 [manager.py:391] +ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:203.20391654968262ms total_cost_time:203.2461166381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5854 prompt_cache_len:5151 prompt_cache_ratio:0.8799111718483088 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 +DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.31009674072265625 s +INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.31215357780456543 s +DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=244491530443690671120648079858459053813, time:1750766726.319744s req_ids:[8] +DEBUG 06-24 20:05:26 [manager.py:391] +ERROR 06-24 20:05:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:404.25658226013184ms total_cost_time:404.30235862731934ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5855 prompt_cache_len:5151 prompt_cache_ratio:0.8797608881298036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 +DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.1066434383392334 s +INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.1085808277130127 s +DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=216128223810542315090735730623472779013, time:1750766726.5354388s req_ids:[8] +DEBUG 06-24 20:05:26 [manager.py:391] +ERROR 06-24 20:05:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:209.34033393859863ms total_cost_time:209.38444137573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5856 prompt_cache_len:5151 prompt_cache_ratio:0.8796106557377049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 +DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.10633611679077148 s +INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.10825395584106445 s +DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=266760414679073014297838827265769629821, time:1750766726.7458067s req_ids:[8] +DEBUG 06-24 20:05:26 [manager.py:391] +DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:202.99577713012695ms total_cost_time:203.05156707763672ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:5857 prompt_cache_len:5151 prompt_cache_ratio:0.8794604746457231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 +DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.10837936401367188 s +INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.11048579216003418 s +DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=220466120611512142612463095873976691933, time:1750766726.9550085s req_ids:[8] +DEBUG 06-24 20:05:26 [manager.py:391] +ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:203.57227325439453ms total_cost_time:203.61733436584473ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5858 prompt_cache_len:5151 prompt_cache_ratio:0.8793103448275862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 +DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10771727561950684 s +INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.1097266674041748 s +DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=134821454466315863154539601422746433918, time:1750766727.1628861s req_ids:[8] +DEBUG 06-24 20:05:27 [manager.py:391] +ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:204.0553092956543ms total_cost_time:204.09631729125977ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5859 prompt_cache_len:5151 prompt_cache_ratio:0.8791602662570405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 +DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10698294639587402 s +INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.10895490646362305 s +DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=132862812230334863190403326526797720794, time:1750766727.3710592s req_ids:[8] +DEBUG 06-24 20:05:27 [manager.py:391] +ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:205.60050010681152ms total_cost_time:205.65533638000488ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5860 prompt_cache_len:5151 prompt_cache_ratio:0.8790102389078498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 +DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10823917388916016 s +INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.11017918586730957 s +DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=276831155914955110812739613017683809734, time:1750766727.5828426s req_ids:[8] +DEBUG 06-24 20:05:27 [manager.py:391] +ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:203.90558242797852ms total_cost_time:203.9499282836914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5861 prompt_cache_len:5151 prompt_cache_ratio:0.8788602627537963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 +DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10755062103271484 s +INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.10968732833862305 s +DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=158837560188965962129419693574975003791, time:1750766727.7916977s req_ids:[8] +DEBUG 06-24 20:05:27 [manager.py:391] +ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:205.20853996276855ms total_cost_time:205.25169372558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5862 prompt_cache_len:5151 prompt_cache_ratio:0.8787103377686797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 +DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10861587524414062 s +INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106574535369873 s +DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=267365800964887204896803713617676680851, time:1750766728.006392s req_ids:[8] +DEBUG 06-24 20:05:28 [manager.py:391] +ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:207.3071002960205ms total_cost_time:207.3521614074707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5863 prompt_cache_len:5151 prompt_cache_ratio:0.8785604639263176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 +DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:28 [manager.py:224] router recive req id 8 cost time 0.10674691200256348 s +INFO 06-24 20:05:28 [manager.py:68] detokenization recv req id 8 cost time 0.10873675346374512 s +DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=99786576739328307645200362814535502059, time:1750766728.214684s req_ids:[8] +DEBUG 06-24 20:05:28 [manager.py:391] +ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:206.00223541259766ms total_cost_time:206.04515075683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5864 prompt_cache_len:5151 prompt_cache_ratio:0.8784106412005457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 +DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:28 [manager.py:224] router recive req id 8 cost time 0.10743165016174316 s +INFO 06-24 20:05:28 [manager.py:68] detokenization recv req id 8 cost time 0.10934901237487793 s +DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=164259632419712909047324340898853753917, time:1750766728.424846s req_ids:[8] +DEBUG 06-24 20:05:28 [manager.py:391] +ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:205.06787300109863ms total_cost_time:205.1100730895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5865 prompt_cache_len:5151 prompt_cache_ratio:0.8782608695652174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 +DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:28 [manager.py:224] router recive req id 8 cost time 0.10778355598449707 s +INFO 06-24 20:05:28 [manager.py:68] detokenization recv req id 8 cost time 0.10963559150695801 s +DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=302518441080223523771180885206035461137, time:1750766728.6457758s req_ids:[8] +DEBUG 06-24 20:05:28 [manager.py:391] +ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:218.1985378265381ms total_cost_time:218.24097633361816ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5866 prompt_cache_len:5151 prompt_cache_ratio:0.8781111489942038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 +DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.3093092441558838 s +INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.3112492561340332 s +DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=53001004075857478438966631503466376375, time:1750766729.055668s req_ids:[8] +DEBUG 06-24 20:05:29 [manager.py:391] +ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:405.87592124938965ms total_cost_time:405.92002868652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5867 prompt_cache_len:5151 prompt_cache_ratio:0.8779614794613942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 +DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10670900344848633 s +INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10856771469116211 s +DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=164708127857232841700698221882702931243, time:1750766729.268062s req_ids:[8] +DEBUG 06-24 20:05:29 [manager.py:391] +ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:207.7329158782959ms total_cost_time:207.77440071105957ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5868 prompt_cache_len:5151 prompt_cache_ratio:0.8778118609406953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 +DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10664701461791992 s +INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10817813873291016 s +DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=180964126688548559453482646540539033502, time:1750766729.4795063s req_ids:[8] +DEBUG 06-24 20:05:29 [manager.py:391] +ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:170.82476615905762ms total_cost_time:170.8657741546631ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5869 prompt_cache_len:5151 prompt_cache_ratio:0.8776622934060317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 +DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10749125480651855 s +INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10942888259887695 s +DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=81118989916174723421674683087830855928, time:1750766729.6521387s req_ids:[8] +DEBUG 06-24 20:05:29 [manager.py:391] +ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:195.43838500976562ms total_cost_time:195.48320770263672ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5870 prompt_cache_len:5151 prompt_cache_ratio:0.8775127768313459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 +DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10671496391296387 s +INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10876822471618652 s +DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=181945187667740630881186361410800597670, time:1750766729.8545318s req_ids:[8] +DEBUG 06-24 20:05:29 [manager.py:391] +ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:205.5642604827881ms total_cost_time:205.60908317565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5871 prompt_cache_len:5151 prompt_cache_ratio:0.8773633111905978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 +DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.10664796829223633 s +INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10855674743652344 s +DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=29715160147586941010783745802076900099, time:1750766730.0643985s req_ids:[8] +DEBUG 06-24 20:05:30 [manager.py:391] +ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:205.42550086975098ms total_cost_time:205.47175407409668ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5872 prompt_cache_len:5151 prompt_cache_ratio:0.8772138964577657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 +DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s +INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10993838310241699 s +DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=124273127636215433551459457371888764151, time:1750766730.2756112s req_ids:[8] +DEBUG 06-24 20:05:30 [manager.py:391] +ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:206.1138153076172ms total_cost_time:206.15911483764648ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5873 prompt_cache_len:5151 prompt_cache_ratio:0.8770645326068449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 +DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.1075901985168457 s +INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10959625244140625 s +DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=134650803688637093581155315749523218553, time:1750766730.4856517s req_ids:[8] +DEBUG 06-24 20:05:30 [manager.py:391] +ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:206.09211921691895ms total_cost_time:206.13551139831543ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5874 prompt_cache_len:5151 prompt_cache_ratio:0.8769152196118488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 +DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.1062462329864502 s +INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10814857482910156 s +DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=222140940433738275078013409828372511769, time:1750766730.695645s req_ids:[8] +DEBUG 06-24 20:05:30 [manager.py:391] +ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:204.1454315185547ms total_cost_time:204.18810844421387ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5875 prompt_cache_len:5151 prompt_cache_ratio:0.8767659574468085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 +DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.10668683052062988 s +INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10866093635559082 s +DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=265221968152092291397755966274140640296, time:1750766730.9055886s req_ids:[8] +DEBUG 06-24 20:05:30 [manager.py:391] +ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:207.7949047088623ms total_cost_time:207.8378200531006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5876 prompt_cache_len:5151 prompt_cache_ratio:0.8766167460857727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 +DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.10646438598632812 s +INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.10852289199829102 s +DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=236270317846096033880888357995200803581, time:1750766731.114795s req_ids:[8] +DEBUG 06-24 20:05:31 [manager.py:391] +ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:202.65817642211914ms total_cost_time:202.70037651062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5877 prompt_cache_len:5151 prompt_cache_ratio:0.8764675855028076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 +DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.10719609260559082 s +INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.10924053192138672 s +DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=164403856939229580166367368514261368258, time:1750766731.3288934s req_ids:[8] +DEBUG 06-24 20:05:31 [manager.py:391] +ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:211.03167533874512ms total_cost_time:211.0755443572998ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5878 prompt_cache_len:5151 prompt_cache_ratio:0.8763184756719973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 +DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.1076197624206543 s +INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.1094961166381836 s +DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=269635757327963367040826164988685440570, time:1750766731.538054s req_ids:[8] +DEBUG 06-24 20:05:31 [manager.py:391] +ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:367.10381507873535ms total_cost_time:367.14720726013184ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5879 prompt_cache_len:5151 prompt_cache_ratio:0.8761694165674434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 +DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.10816717147827148 s +INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.11020207405090332 s +DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=214866989383641299867387913467734368570, time:1750766731.9029057s req_ids:[8] +DEBUG 06-24 20:05:31 [manager.py:391] +ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:195.87206840515137ms total_cost_time:195.91856002807617ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5880 prompt_cache_len:5151 prompt_cache_ratio:0.8760204081632653 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 +DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s +INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.1100149154663086 s +DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=106725213212099907266868693060755539329, time:1750766732.1144238s req_ids:[8] +DEBUG 06-24 20:05:32 [manager.py:391] +ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:210.91699600219727ms total_cost_time:210.97517013549805ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5881 prompt_cache_len:5151 prompt_cache_ratio:0.8758714504335997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 +DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10773730278015137 s +INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.10972929000854492 s +DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=101947368284228535189896198802988323202, time:1750766732.3278925s req_ids:[8] +DEBUG 06-24 20:05:32 [manager.py:391] +ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:208.09173583984375ms total_cost_time:208.14967155456543ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5882 prompt_cache_len:5151 prompt_cache_ratio:0.8757225433526011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 +DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10857439041137695 s +INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.11048221588134766 s +DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=97679567535992670821866787776492695721, time:1750766732.5382626s req_ids:[8] +DEBUG 06-24 20:05:32 [manager.py:391] +ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:205.1219940185547ms total_cost_time:205.18183708190918ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5883 prompt_cache_len:5151 prompt_cache_ratio:0.8755736868944416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 +DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10775184631347656 s +INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.10975289344787598 s +DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=178435416047852832337546773932555505404, time:1750766732.7475448s req_ids:[8] +DEBUG 06-24 20:05:32 [manager.py:391] +ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:204.9119472503662ms total_cost_time:204.9720287322998ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:5884 prompt_cache_len:5151 prompt_cache_ratio:0.8754248810333106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 +DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10698151588439941 s +INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.10898375511169434 s +DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=123666270782056740528551928461279383569, time:1750766732.955824s req_ids:[8] +DEBUG 06-24 20:05:32 [manager.py:391] +ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:204.07986640930176ms total_cost_time:204.12826538085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:5885 prompt_cache_len:5151 prompt_cache_ratio:0.8752761257434155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 +DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10792684555053711 s +INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10981273651123047 s +DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=23923425125933659962189477342226765404, time:1750766733.1656048s req_ids:[8] +DEBUG 06-24 20:05:33 [manager.py:391] +ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:205.38711547851562ms total_cost_time:205.4462432861328ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5886 prompt_cache_len:5151 prompt_cache_ratio:0.8751274209989807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 +DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10747241973876953 s +INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10938882827758789 s +DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=21342355386349848596045215303548627103, time:1750766733.3752594s req_ids:[8] +DEBUG 06-24 20:05:33 [manager.py:391] +ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:203.85456085205078ms total_cost_time:203.91440391540527ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5887 prompt_cache_len:5151 prompt_cache_ratio:0.8749787667742484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 +DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.1076822280883789 s +INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10958719253540039 s +DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=109405726276601417605796498330202614, time:1750766733.5846536s req_ids:[8] +DEBUG 06-24 20:05:33 [manager.py:391] +DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:204.209566116333ms total_cost_time:204.29134368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.08177757263183594ms prompt_token_num:5888 prompt_cache_len:5151 prompt_cache_ratio:0.8748301630434783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 +DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10813713073730469 s +INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.11012673377990723 s +DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=265887485452802363983375518624640218209, time:1750766733.793306s req_ids:[8] +DEBUG 06-24 20:05:33 [manager.py:391] +ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:206.98094367980957ms total_cost_time:207.03983306884766ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5889 prompt_cache_len:5151 prompt_cache_ratio:0.8746816097809476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 +DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10696673393249512 s +INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10896015167236328 s +DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=183284529631983018413602053033903009801, time:1750766734.0030892s req_ids:[8] +DEBUG 06-24 20:05:34 [manager.py:391] +ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:203.200101852417ms total_cost_time:203.2606601715088ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5890 prompt_cache_len:5151 prompt_cache_ratio:0.8745331069609508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 +DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:34 [manager.py:224] router recive req id 8 cost time 0.10656476020812988 s +INFO 06-24 20:05:34 [manager.py:68] detokenization recv req id 8 cost time 0.10844945907592773 s +DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=329287718120830507429386941046441970615, time:1750766734.2107918s req_ids:[8] +DEBUG 06-24 20:05:34 [manager.py:391] +ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:204.87046241760254ms total_cost_time:204.91671562194824ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5891 prompt_cache_len:5151 prompt_cache_ratio:0.8743846545578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 +DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:34 [manager.py:224] router recive req id 8 cost time 0.1074211597442627 s +INFO 06-24 20:05:34 [manager.py:68] detokenization recv req id 8 cost time 0.10935449600219727 s +DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=118783105674865520560722692819544422296, time:1750766734.420839s req_ids:[8] +DEBUG 06-24 20:05:34 [manager.py:391] +ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:405.9295654296875ms total_cost_time:405.9736728668213ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5892 prompt_cache_len:5151 prompt_cache_ratio:0.8742362525458248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 +DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:34 [manager.py:224] router recive req id 8 cost time 0.10655832290649414 s +INFO 06-24 20:05:34 [manager.py:68] detokenization recv req id 8 cost time 0.10847234725952148 s +DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=42792763810505906401079886865951985128, time:1750766734.823878s req_ids:[8] +DEBUG 06-24 20:05:34 [manager.py:391] +ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:197.0210075378418ms total_cost_time:197.0658302307129ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5893 prompt_cache_len:5151 prompt_cache_ratio:0.8740879008993722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 +DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10727286338806152 s +INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10917186737060547 s +DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=321263043437776973982265304590274819667, time:1750766735.0438063s req_ids:[8] +DEBUG 06-24 20:05:35 [manager.py:391] +DEBUG 06-24 20:05:35 [stats.py:37] Avg tokens(prompt+generate) throughput: 26388.493 tokens/s +DEBUG 06-24 20:05:35 [stats.py:37] Avg prompt tokens throughput: 26379.508 tokens/s +DEBUG 06-24 20:05:35 [stats.py:37] Avg generate tokens throughput: 8.985 tokens/s +ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:218.8894748687744ms total_cost_time:218.9333438873291ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5894 prompt_cache_len:5151 prompt_cache_ratio:0.8739395995928062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 +DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.1068718433380127 s +INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10873031616210938 s +DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=144628765856183523328501397702438528985, time:1750766735.2574515s req_ids:[8] +DEBUG 06-24 20:05:35 [manager.py:391] +ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:206.05969429016113ms total_cost_time:206.1018943786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5895 prompt_cache_len:5151 prompt_cache_ratio:0.8737913486005089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 +DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10648083686828613 s +INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10841155052185059 s +DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=259031312636184950882605828011309746693, time:1750766735.4679787s req_ids:[8] +DEBUG 06-24 20:05:35 [manager.py:391] +ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:211.1058235168457ms total_cost_time:211.1494541168213ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5896 prompt_cache_len:5151 prompt_cache_ratio:0.8736431478968792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 +DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10660958290100098 s +INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10854458808898926 s +DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=265383747771238565895225696460475206297, time:1750766735.694489s req_ids:[8] +DEBUG 06-24 20:05:35 [manager.py:391] +ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:219.5894718170166ms total_cost_time:219.6335792541504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5897 prompt_cache_len:5151 prompt_cache_ratio:0.8734949974563337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 +DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10738754272460938 s +INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10929608345031738 s +DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=292285804024102488685631352250269746294, time:1750766735.9105105s req_ids:[8] +DEBUG 06-24 20:05:35 [manager.py:391] +ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:209.5177173614502ms total_cost_time:209.55872535705566ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5898 prompt_cache_len:5151 prompt_cache_ratio:0.8733468972533062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 +DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10753750801086426 s +INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10939884185791016 s +DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=245816984363902994808846964949334554694, time:1750766736.1198466s req_ids:[8] +DEBUG 06-24 20:05:36 [manager.py:391] +ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:203.1242847442627ms total_cost_time:203.16720008850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5899 prompt_cache_len:5151 prompt_cache_ratio:0.8731988472622478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 +DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10772562026977539 s +INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10960125923156738 s +DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=168634482844164825401986559154295363480, time:1750766736.3298683s req_ids:[8] +DEBUG 06-24 20:05:36 [manager.py:391] +ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:205.79171180725098ms total_cost_time:205.83415031433105ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5900 prompt_cache_len:5151 prompt_cache_ratio:0.8730508474576271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 +DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10741281509399414 s +INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10930275917053223 s +DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=171193577296389203140549453621901726523, time:1750766736.5403395s req_ids:[8] +DEBUG 06-24 20:05:36 [manager.py:391] +ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:207.75318145751953ms total_cost_time:207.7951431274414ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5901 prompt_cache_len:5151 prompt_cache_ratio:0.8729028978139298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 +DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10689401626586914 s +INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10878610610961914 s +DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=148517907394385037002585101021750028516, time:1750766736.7500126s req_ids:[8] +DEBUG 06-24 20:05:36 [manager.py:391] +ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:204.00190353393555ms total_cost_time:204.04481887817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5902 prompt_cache_len:5151 prompt_cache_ratio:0.8727549983056591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 +DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10775041580200195 s +INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10962390899658203 s +DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=178241337152403988583453577654269440205, time:1750766736.9606419s req_ids:[8] +DEBUG 06-24 20:05:36 [manager.py:391] +ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:206.09188079833984ms total_cost_time:206.13527297973633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5903 prompt_cache_len:5151 prompt_cache_ratio:0.8726071489073353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 +DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:37 [manager.py:224] router recive req id 8 cost time 0.10789990425109863 s +INFO 06-24 20:05:37 [manager.py:68] detokenization recv req id 8 cost time 0.10987186431884766 s +DEBUG 06-24 20:05:37 [manager.py:391] Prefill Batch: batch_id=217514073107859485860528390999687253932, time:1750766737.1712465s req_ids:[8] +DEBUG 06-24 20:05:37 [manager.py:391] +ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:204.92243766784668ms total_cost_time:204.98156547546387ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5904 prompt_cache_len:5151 prompt_cache_ratio:0.8724593495934959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 +DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:37 [manager.py:224] router recive req id 8 cost time 0.30886101722717285 s +INFO 06-24 20:05:37 [manager.py:68] detokenization recv req id 8 cost time 0.31081414222717285 s +DEBUG 06-24 20:05:37 [manager.py:391] Prefill Batch: batch_id=151456831129273920842199557142009969426, time:1750766737.597954s req_ids:[8] +DEBUG 06-24 20:05:37 [manager.py:391] +ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:422.0590591430664ms total_cost_time:422.105073928833ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5905 prompt_cache_len:5151 prompt_cache_ratio:0.872311600338696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 +DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:37 [manager.py:224] router recive req id 8 cost time 0.10759091377258301 s +INFO 06-24 20:05:37 [manager.py:68] detokenization recv req id 8 cost time 0.10963249206542969 s +DEBUG 06-24 20:05:37 [manager.py:391] Prefill Batch: batch_id=38094264507912673617324947294864808263, time:1750766737.8069682s req_ids:[8] +DEBUG 06-24 20:05:37 [manager.py:391] +ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:204.30684089660645ms total_cost_time:204.36787605285645ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5906 prompt_cache_len:5151 prompt_cache_ratio:0.8721639011175076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 +DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10756897926330566 s +INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.10954737663269043 s +DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=163010358919744624737680257640292067652, time:1750766738.0158596s req_ids:[8] +DEBUG 06-24 20:05:38 [manager.py:391] +ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:220.81398963928223ms total_cost_time:220.87335586547852ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5907 prompt_cache_len:5151 prompt_cache_ratio:0.87201625190452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 +DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.11127209663391113 s +INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.1132051944732666 s +DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=111244164488570237428228703712044603737, time:1750766738.234603s req_ids:[8] +DEBUG 06-24 20:05:38 [manager.py:391] +ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:195.7724094390869ms total_cost_time:195.93048095703125ms,out_token_counter:1 mean_per_token_cost_time: 0.15807151794433594ms prompt_token_num:5908 prompt_cache_len:5151 prompt_cache_ratio:0.8718686526743399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 +DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10906171798706055 s +INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.1109914779663086 s +DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=257019394870488663321458114262251973497, time:1750766738.4406104s req_ids:[8] +DEBUG 06-24 20:05:38 [manager.py:391] +ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:195.86491584777832ms total_cost_time:195.9061622619629ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5909 prompt_cache_len:5151 prompt_cache_ratio:0.8717211034015908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 +DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10768747329711914 s +INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.1095888614654541 s +DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=316151401935455185858536369403285693098, time:1750766738.645771s req_ids:[8] +DEBUG 06-24 20:05:38 [manager.py:391] +ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:214.85352516174316ms total_cost_time:214.89834785461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5910 prompt_cache_len:5151 prompt_cache_ratio:0.8715736040609137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 +DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10767364501953125 s +INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.10961174964904785 s +DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=322094231416616230583717242209418326981, time:1750766738.8593504s req_ids:[8] +DEBUG 06-24 20:05:38 [manager.py:391] +ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:198.1973648071289ms total_cost_time:198.25315475463867ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:5911 prompt_cache_len:5151 prompt_cache_ratio:0.8714261546269667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 +DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10902285575866699 s +INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.11087703704833984 s +INFO 06-24 20:05:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=114660758787471047487035244980950098389, time:1750766739.0680265s req_ids:[8] +DEBUG 06-24 20:05:39 [manager.py:391] +ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:204.21338081359863ms total_cost_time:204.25891876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5912 prompt_cache_len:5151 prompt_cache_ratio:0.8712787550744249 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 +DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10720062255859375 s +INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.10924005508422852 s +DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=249176044338494554850261883115622713228, time:1750766739.2778916s req_ids:[8] +DEBUG 06-24 20:05:39 [manager.py:391] +ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:214.21146392822266ms total_cost_time:214.25628662109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5913 prompt_cache_len:5151 prompt_cache_ratio:0.8711314053779807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 +DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10844969749450684 s +INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.11038780212402344 s +DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=157419726146854861868801583056416366792, time:1750766739.4927173s req_ids:[8] +DEBUG 06-24 20:05:39 [manager.py:391] +ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:199.97692108154297ms total_cost_time:200.03247261047363ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:5914 prompt_cache_len:5151 prompt_cache_ratio:0.8709841055123436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 +DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10835099220275879 s +INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.11035275459289551 s +DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=77273508830135499011469575554018049537, time:1750766739.7001789s req_ids:[8] +DEBUG 06-24 20:05:39 [manager.py:391] +ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:203.25064659118652ms total_cost_time:203.2949924468994ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5915 prompt_cache_len:5151 prompt_cache_ratio:0.8708368554522401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 +DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10684943199157715 s +INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.10877513885498047 s +DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=133179247841005326712862507837341167875, time:1750766739.9093688s req_ids:[8] +DEBUG 06-24 20:05:39 [manager.py:391] +ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:207.11803436279297ms total_cost_time:207.16142654418945ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5916 prompt_cache_len:5151 prompt_cache_ratio:0.8706896551724138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 +DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10852932929992676 s +INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.1104896068572998 s +DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=181626636729052983992307442542036322159, time:1750766740.122249s req_ids:[8] +DEBUG 06-24 20:05:40 [manager.py:391] +ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:367.49744415283203ms total_cost_time:367.5415515899658ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5917 prompt_cache_len:5151 prompt_cache_ratio:0.8705425046476255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 +DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10830473899841309 s +INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.11044979095458984 s +DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=152001827886474994107589433056427983166, time:1750766740.4863083s req_ids:[8] +DEBUG 06-24 20:05:40 [manager.py:391] +ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:204.80799674987793ms total_cost_time:204.85353469848633ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5918 prompt_cache_len:5151 prompt_cache_ratio:0.8703954038526529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 +DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10707783699035645 s +INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.10896968841552734 s +DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=300633274443823352844279249176860097927, time:1750766740.698046s req_ids:[8] +DEBUG 06-24 20:05:40 [manager.py:391] +DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:197.4780559539795ms total_cost_time:197.52001762390137ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5919 prompt_cache_len:5151 prompt_cache_ratio:0.870248352762291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 +DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10670804977416992 s +INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.10865283012390137 s +DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=108102634263331283575687312091738207278, time:1750766740.9043477s req_ids:[8] +DEBUG 06-24 20:05:40 [manager.py:391] +ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:204.82683181762695ms total_cost_time:204.87046241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5920 prompt_cache_len:5151 prompt_cache_ratio:0.8701013513513514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 +DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.10911393165588379 s +INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.11126136779785156 s +DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=235916167943205028888782710612671262144, time:1750766741.1142182s req_ids:[8] +DEBUG 06-24 20:05:41 [manager.py:391] +ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:205.95335960388184ms total_cost_time:205.99651336669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5921 prompt_cache_len:5151 prompt_cache_ratio:0.869954399594663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 +DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.1074364185333252 s +INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10950422286987305 s +DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=301608564280635476313578938345141703166, time:1750766741.3290462s req_ids:[8] +DEBUG 06-24 20:05:41 [manager.py:391] +ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:211.7331027984619ms total_cost_time:211.7929458618164ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5922 prompt_cache_len:5151 prompt_cache_ratio:0.869807497467072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 +DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.1075282096862793 s +INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10944795608520508 s +DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=171429433126248174320469178699743283025, time:1750766741.5392385s req_ids:[8] +DEBUG 06-24 20:05:41 [manager.py:391] +ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:204.1032314300537ms total_cost_time:204.1473388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5923 prompt_cache_len:5151 prompt_cache_ratio:0.8696606449434409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 +DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.10632443428039551 s +INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10831475257873535 s +DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=275252570970759150657999435858015477785, time:1750766741.7484965s req_ids:[8] +DEBUG 06-24 20:05:41 [manager.py:391] +ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:204.60772514343262ms total_cost_time:204.6511173248291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5924 prompt_cache_len:5151 prompt_cache_ratio:0.8695138419986496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 +DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.10732293128967285 s +INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10935759544372559 s +DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=44404975909782431791128604979760313821, time:1750766741.9576838s req_ids:[8] +DEBUG 06-24 20:05:41 [manager.py:391] +ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:206.30502700805664ms total_cost_time:206.34913444519043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5925 prompt_cache_len:5151 prompt_cache_ratio:0.8693670886075949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 +DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.10872936248779297 s +INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s +DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=102912430039983633823064929478532407910, time:1750766742.1720471s req_ids:[8] +DEBUG 06-24 20:05:42 [manager.py:391] +ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:210.62040328979492ms total_cost_time:210.6640338897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5926 prompt_cache_len:5151 prompt_cache_ratio:0.8692203847451907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 +DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.10639762878417969 s +INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.10830879211425781 s +DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=277821636332524685911340699948649125369, time:1750766742.3843956s req_ids:[8] +DEBUG 06-24 20:05:42 [manager.py:391] +ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:225.9845733642578ms total_cost_time:226.0286808013916ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5927 prompt_cache_len:5151 prompt_cache_ratio:0.8690737303863675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 +DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:42 [batch.py:51] router release req id 8 +INFO 06-24 20:05:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.1065208911895752 s +INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.10833454132080078 s +DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=35995670673423249789911032409996801814, time:1750766742.6158297s req_ids:[8] +DEBUG 06-24 20:05:42 [manager.py:391] +ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:201.30038261413574ms total_cost_time:201.34258270263672ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5928 prompt_cache_len:5151 prompt_cache_ratio:0.8689271255060729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 +DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.10826253890991211 s +INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.11031079292297363 s +DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=16379239979929699217054309005453892902, time:1750766742.8247406s req_ids:[8] +DEBUG 06-24 20:05:42 [manager.py:391] +ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:206.3913345336914ms total_cost_time:206.4359188079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5929 prompt_cache_len:5151 prompt_cache_ratio:0.8687805700792713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 +DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.3093404769897461 s +INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.3112220764160156 s +DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=129439380047136193216685825403293327015, time:1750766743.251915s req_ids:[8] +DEBUG 06-24 20:05:43 [manager.py:391] +ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:427.47044563293457ms total_cost_time:427.52552032470703ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:5930 prompt_cache_len:5151 prompt_cache_ratio:0.8686340640809443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 +DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.10823655128479004 s +INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.110137939453125 s +DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=127426075875698976452704002990004581271, time:1750766743.4636261s req_ids:[8] +DEBUG 06-24 20:05:43 [manager.py:391] +ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:205.5501937866211ms total_cost_time:205.59358596801758ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5931 prompt_cache_len:5151 prompt_cache_ratio:0.86848760748609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 +DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.10851263999938965 s +INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.1105799674987793 s +DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=201901603690389837099490448017098832166, time:1750766743.6756063s req_ids:[8] +DEBUG 06-24 20:05:43 [manager.py:391] +ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:202.36611366271973ms total_cost_time:202.4099826812744ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5932 prompt_cache_len:5151 prompt_cache_ratio:0.8683412002697235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 +DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.1070563793182373 s +INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.10891222953796387 s +DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=135160899731608524104247853024960416166, time:1750766743.8817742s req_ids:[8] +DEBUG 06-24 20:05:43 [manager.py:391] +ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:202.99744606018066ms total_cost_time:203.03964614868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5933 prompt_cache_len:5151 prompt_cache_ratio:0.8681948424068768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 +DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10766291618347168 s +INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10962176322937012 s +DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=163682811720249964848261335850125851149, time:1750766744.0892234s req_ids:[8] +DEBUG 06-24 20:05:44 [manager.py:391] +ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:202.41141319274902ms total_cost_time:202.45671272277832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5934 prompt_cache_len:5151 prompt_cache_ratio:0.8680485338725986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 +DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10722947120666504 s +INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10920333862304688 s +DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=214140426092099590280341965821289001537, time:1750766744.302957s req_ids:[8] +DEBUG 06-24 20:05:44 [manager.py:391] +ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:212.5225067138672ms total_cost_time:212.56661415100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5935 prompt_cache_len:5151 prompt_cache_ratio:0.8679022746419545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 +DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10871243476867676 s +INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.11063480377197266 s +DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=124423844859483329659866094771703773225, time:1750766744.5143557s req_ids:[8] +DEBUG 06-24 20:05:44 [manager.py:391] +ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:205.07287979125977ms total_cost_time:205.11460304260254ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5936 prompt_cache_len:5151 prompt_cache_ratio:0.867756064690027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 +DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.1070559024810791 s +INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10898566246032715 s +DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=12073605763005068082579359030135786602, time:1750766744.7248528s req_ids:[8] +DEBUG 06-24 20:05:44 [manager.py:391] +ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:217.36812591552734ms total_cost_time:217.41366386413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5937 prompt_cache_len:5151 prompt_cache_ratio:0.8676099039919151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 +DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10693073272705078 s +INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10883164405822754 s +DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=90805269083624779918806064048377080440, time:1750766744.9490588s req_ids:[8] +DEBUG 06-24 20:05:44 [manager.py:391] +ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:207.7796459197998ms total_cost_time:207.83400535583496ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5938 prompt_cache_len:5151 prompt_cache_ratio:0.867463792522735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 +DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.10896849632263184 s +INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.11092185974121094 s +DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=54128234162562651870199740421830130095, time:1750766745.1586773s req_ids:[8] +DEBUG 06-24 20:05:45 [manager.py:391] +DEBUG 06-24 20:05:45 [stats.py:37] Avg tokens(prompt+generate) throughput: 26333.194 tokens/s +DEBUG 06-24 20:05:45 [stats.py:37] Avg prompt tokens throughput: 26324.296 tokens/s +DEBUG 06-24 20:05:45 [stats.py:37] Avg generate tokens throughput: 8.898 tokens/s +ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:204.71572875976562ms total_cost_time:204.7584056854248ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5939 prompt_cache_len:5151 prompt_cache_ratio:0.8673177302576192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 +DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.10736083984375 s +INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.10935401916503906 s +DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=112842464064063731892480378855172805627, time:1750766745.368699s req_ids:[8] +DEBUG 06-24 20:05:45 [manager.py:391] +ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:208.58120918273926ms total_cost_time:208.62269401550293ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5940 prompt_cache_len:5151 prompt_cache_ratio:0.8671717171717171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 +DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.10722136497497559 s +INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.1091463565826416 s +DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=160339710711583950389343699079330280289, time:1750766745.5796046s req_ids:[8] +DEBUG 06-24 20:05:45 [manager.py:391] +ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:209.84625816345215ms total_cost_time:209.88821983337402ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5941 prompt_cache_len:5151 prompt_cache_ratio:0.8670257532401953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 +DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.30837249755859375 s +INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.3104386329650879 s +DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=24260915096826942641782324216921801579, time:1750766745.9963193s req_ids:[8] +DEBUG 06-24 20:05:45 [manager.py:391] +ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:418.51139068603516ms total_cost_time:418.55502128601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5942 prompt_cache_len:5151 prompt_cache_ratio:0.8668798384382362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 +DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10840630531311035 s +INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.11040091514587402 s +DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=270284450233264779789483556926320525598, time:1750766746.230256s req_ids:[8] +DEBUG 06-24 20:05:46 [manager.py:391] +ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:217.2107696533203ms total_cost_time:217.2558307647705ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5943 prompt_cache_len:5151 prompt_cache_ratio:0.8667339727410399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 +INFO 06-24 20:05:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10868406295776367 s +INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.11052703857421875 s +DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=160130006833086861845403311815971520988, time:1750766746.4473963s req_ids:[8] +DEBUG 06-24 20:05:46 [manager.py:391] +ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:209.99526977539062ms total_cost_time:210.03961563110352ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5944 prompt_cache_len:5151 prompt_cache_ratio:0.8665881561238223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 +DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10767602920532227 s +INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.10963249206542969 s +DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=141508397205182845527168418005562325703, time:1750766746.6605678s req_ids:[8] +DEBUG 06-24 20:05:46 [manager.py:391] +ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:212.493896484375ms total_cost_time:212.5382423400879ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5945 prompt_cache_len:5151 prompt_cache_ratio:0.8664423885618167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 +DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10787725448608398 s +INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.10988855361938477 s +DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=328975330132373566534277658549044379774, time:1750766746.8724654s req_ids:[8] +DEBUG 06-24 20:05:46 [manager.py:391] +ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:206.94327354431152ms total_cost_time:207.0004940032959ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5946 prompt_cache_len:5151 prompt_cache_ratio:0.8662966700302724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 +DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10690045356750488 s +INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.10879755020141602 s +DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=97554934084245309769905067245421374941, time:1750766747.0847585s req_ids:[8] +DEBUG 06-24 20:05:47 [manager.py:391] +ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:212.7671241760254ms total_cost_time:212.81170845031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5947 prompt_cache_len:5151 prompt_cache_ratio:0.866151000504456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 +DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10864901542663574 s +INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.11059999465942383 s +DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=201741406272564656110662075331555046199, time:1750766747.297187s req_ids:[8] +DEBUG 06-24 20:05:47 [manager.py:391] +ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:195.6155300140381ms total_cost_time:195.67322731018066ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:5948 prompt_cache_len:5151 prompt_cache_ratio:0.8660053799596503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 +DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10891103744506836 s +INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.11089944839477539 s +DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=89413130611419990146106968658329412848, time:1750766747.5209644s req_ids:[8] +DEBUG 06-24 20:05:47 [manager.py:391] +ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:216.02249145507812ms total_cost_time:216.0813808441162ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5949 prompt_cache_len:5151 prompt_cache_ratio:0.8658598083711548 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 +DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10759353637695312 s +INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.10949969291687012 s +DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=36496211028081786553224994863031676824, time:1750766747.7373848s req_ids:[8] +DEBUG 06-24 20:05:47 [manager.py:391] +ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:207.36384391784668ms total_cost_time:207.40675926208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5950 prompt_cache_len:5151 prompt_cache_ratio:0.8657142857142858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 +DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10863590240478516 s +INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.11072468757629395 s +DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=128278051825058905235174300962432237617, time:1750766747.9420974s req_ids:[8] +DEBUG 06-24 20:05:47 [manager.py:391] +ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:202.33607292175293ms total_cost_time:202.3794651031494ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5951 prompt_cache_len:5151 prompt_cache_ratio:0.8655688119643757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 +DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.10887002944946289 s +INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.11083364486694336 s +DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=99076477718657234264185631570161401892, time:1750766748.1506126s req_ids:[8] +DEBUG 06-24 20:05:48 [manager.py:391] +ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:213.29450607299805ms total_cost_time:213.34004402160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5952 prompt_cache_len:5151 prompt_cache_ratio:0.8654233870967742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 +DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.1094810962677002 s +INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.11152768135070801 s +DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=41483792469898467468946724507271740079, time:1750766748.3643184s req_ids:[8] +DEBUG 06-24 20:05:48 [manager.py:391] +ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:198.3785629272461ms total_cost_time:198.43769073486328ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5953 prompt_cache_len:5151 prompt_cache_ratio:0.865278011086847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 +DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.10754990577697754 s +INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.10966134071350098 s +DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=125902274602996686839827660586087944252, time:1750766748.57264s req_ids:[8] +DEBUG 06-24 20:05:48 [manager.py:391] +ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:389.24503326416016ms total_cost_time:389.30416107177734ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5954 prompt_cache_len:5151 prompt_cache_ratio:0.8651326839099764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 +DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.10943293571472168 s +INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.11158585548400879 s +DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=93825880385553514424003276212867291768, time:1750766748.9620855s req_ids:[8] +DEBUG 06-24 20:05:48 [manager.py:391] +ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:200.7579803466797ms total_cost_time:200.80041885375977ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5955 prompt_cache_len:5151 prompt_cache_ratio:0.8649874055415617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 +DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.10632729530334473 s +INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.10819625854492188 s +DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=232116031451780088576439673604026543872, time:1750766749.1782868s req_ids:[8] +DEBUG 06-24 20:05:49 [manager.py:391] +DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:212.9521369934082ms total_cost_time:213.0117416381836ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5956 prompt_cache_len:5151 prompt_cache_ratio:0.8648421759570182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 +DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.1081385612487793 s +INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.11017322540283203 s +DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=213150724174912118763928887621427491964, time:1750766749.392174s req_ids:[8] +DEBUG 06-24 20:05:49 [manager.py:391] +ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:214.97178077697754ms total_cost_time:215.0135040283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5957 prompt_cache_len:5151 prompt_cache_ratio:0.8646969951317778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 +DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.1094357967376709 s +INFO 06-24 20:05:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.11170125007629395 s +DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=290957100067134969791149684646401836800, time:1750766749.603784s req_ids:[8] +DEBUG 06-24 20:05:49 [manager.py:391] +ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:197.1290111541748ms total_cost_time:197.1719264984131ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5958 prompt_cache_len:5151 prompt_cache_ratio:0.8645518630412891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 +DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.10773658752441406 s +INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.1096949577331543 s +DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=265698227134966737820480521263344306228, time:1750766749.8126616s req_ids:[8] +DEBUG 06-24 20:05:49 [manager.py:391] +ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:206.65812492370605ms total_cost_time:206.70127868652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5959 prompt_cache_len:5151 prompt_cache_ratio:0.864406779661017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 +DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10831665992736816 s +INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.11037087440490723 s +DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=66509292563218109666350743280734973981, time:1750766750.0224252s req_ids:[8] +DEBUG 06-24 20:05:50 [manager.py:391] +ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:212.57567405700684ms total_cost_time:212.61930465698242ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5960 prompt_cache_len:5151 prompt_cache_ratio:0.8642617449664429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 +DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.1101219654083252 s +INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.11220383644104004 s +DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=68311589154665885032269309568024454609, time:1750766750.2369223s req_ids:[8] +DEBUG 06-24 20:05:50 [manager.py:391] +ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:198.98462295532227ms total_cost_time:199.02634620666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5961 prompt_cache_len:5151 prompt_cache_ratio:0.8641167589330649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 +DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10746598243713379 s +INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.10944890975952148 s +DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=187089546669287271830158747387289628042, time:1750766750.4448397s req_ids:[8] +DEBUG 06-24 20:05:50 [manager.py:391] +ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:206.61354064941406ms total_cost_time:206.65717124938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5962 prompt_cache_len:5151 prompt_cache_ratio:0.8639718215363972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 +DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10704159736633301 s +INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.10902833938598633 s +DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=336832597321368935844338338553773306677, time:1750766750.6548817s req_ids:[8] +DEBUG 06-24 20:05:50 [manager.py:391] +ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:203.58777046203613ms total_cost_time:203.63163948059082ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5963 prompt_cache_len:5151 prompt_cache_ratio:0.8638269327519705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 +DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10625648498535156 s +INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.10817217826843262 s +DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=88711568420442278984566495200523889085, time:1750766750.8724267s req_ids:[8] +DEBUG 06-24 20:05:50 [manager.py:391] +ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:215.52801132202148ms total_cost_time:215.57188034057617ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5964 prompt_cache_len:5151 prompt_cache_ratio:0.863682092555332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 +DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.10653257369995117 s +INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.10875248908996582 s +DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=282462538562656376028030544333096649558, time:1750766751.0828886s req_ids:[8] +DEBUG 06-24 20:05:51 [manager.py:391] +ERROR 06-24 20:05:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:212.22829818725586ms total_cost_time:212.27264404296875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5965 prompt_cache_len:5151 prompt_cache_ratio:0.8635373009220453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 +DEBUG 06-24 20:05:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.10907864570617676 s +INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.11126470565795898 s +DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=233528624480736105939881513180315876313, time:1750766751.2968867s req_ids:[8] +DEBUG 06-24 20:05:51 [manager.py:391] +ERROR 06-24 20:05:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 first_token_cost:198.78625869750977ms total_cost_time:198.82917404174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5966 prompt_cache_len:5151 prompt_cache_ratio:0.8633925578276902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 +DEBUG 06-24 20:05:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.3094794750213623 s +INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.3116569519042969 s +DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=204490699424441161818721750353064685123, time:1750766751.7089698s req_ids:[8] +DEBUG 06-24 20:05:51 [manager.py:391] +ERROR 06-24 20:05:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 first_token_cost:413.13838958740234ms total_cost_time:413.18440437316895ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5967 prompt_cache_len:5151 prompt_cache_ratio:0.8632478632478633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 +DEBUG 06-24 20:05:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.10658478736877441 s +INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.1085357666015625 s +DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=310507340447272352776526822154154186446, time:1750766751.9393249s req_ids:[8] +DEBUG 06-24 20:05:51 [manager.py:391] +ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 first_token_cost:222.6102352142334ms total_cost_time:222.65267372131348ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5968 prompt_cache_len:5151 prompt_cache_ratio:0.863103217158177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 +DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10782361030578613 s +INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.1099996566772461 s +DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=233544677597134714539503231736653218209, time:1750766752.1509347s req_ids:[8] +DEBUG 06-24 20:05:52 [manager.py:391] +ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:215.27099609375ms total_cost_time:215.3148651123047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5969 prompt_cache_len:5151 prompt_cache_ratio:0.8629586195342603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 +DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10892891883850098 s +INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.11084771156311035 s +DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=174444203205848138744276360695841723479, time:1750766752.37145s req_ids:[8] +DEBUG 06-24 20:05:52 [manager.py:391] +ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:205.51061630249023ms total_cost_time:205.5532932281494ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5970 prompt_cache_len:5151 prompt_cache_ratio:0.8628140703517588 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 +DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10739898681640625 s +INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.10953855514526367 s +DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=112352746556105372410892225798683454658, time:1750766752.5811937s req_ids:[8] +DEBUG 06-24 20:05:52 [manager.py:391] +ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:207.19170570373535ms total_cost_time:207.23485946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5971 prompt_cache_len:5151 prompt_cache_ratio:0.8626695695863339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 +DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10709023475646973 s +INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.10908055305480957 s +DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=81601539618143760172780318311462308195, time:1750766752.7926688s req_ids:[8] +DEBUG 06-24 20:05:52 [manager.py:391] +ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:205.88088035583496ms total_cost_time:205.92427253723145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5972 prompt_cache_len:5151 prompt_cache_ratio:0.8625251172136638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 +DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10826730728149414 s +INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.11013412475585938 s +DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=87497033073507314089656603941486919313, time:1750766753.0036s req_ids:[8] +DEBUG 06-24 20:05:53 [manager.py:391] +ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:206.53915405273438ms total_cost_time:206.58254623413086ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5973 prompt_cache_len:5151 prompt_cache_ratio:0.8623807132094425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 +DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:53 [batch.py:51] router release req id 8 +INFO 06-24 20:05:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:05:53 [statics_utils.py:24] mean first cost: 243.96374326227877 ms +INFO 06-24 20:05:53 [statics_utils.py:24] mean per token cost: 0.14018381944647604 ms +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.10816001892089844 s +INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.11017537117004395 s +DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=160541479694583813617532479330037742474, time:1750766753.2145755s req_ids:[8] +DEBUG 06-24 20:05:53 [manager.py:391] +ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:205.8546543121338ms total_cost_time:205.89852333068848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5974 prompt_cache_len:5151 prompt_cache_ratio:0.8622363575493807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 +DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.1065666675567627 s +INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.10859274864196777 s +DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=136995348244606884830980312973085187167, time:1750766753.4409115s req_ids:[8] +DEBUG 06-24 20:05:53 [manager.py:391] +ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:222.83363342285156ms total_cost_time:222.87821769714355ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5975 prompt_cache_len:5151 prompt_cache_ratio:0.862092050209205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 +DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.10744261741638184 s +INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.10944604873657227 s +DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=50777332959909825465698093432459104499, time:1750766753.6536458s req_ids:[8] +DEBUG 06-24 20:05:53 [manager.py:391] +ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:219.46430206298828ms total_cost_time:219.50793266296387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5976 prompt_cache_len:5151 prompt_cache_ratio:0.8619477911646586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 +DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.10897660255432129 s +INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.11095976829528809 s +DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=232954780820797137435187514800106915712, time:1750766753.8694084s req_ids:[8] +DEBUG 06-24 20:05:53 [manager.py:391] +ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:195.71852684020996ms total_cost_time:195.76191902160645ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5977 prompt_cache_len:5151 prompt_cache_ratio:0.8618035803915007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 +DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10754776000976562 s +INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.10950040817260742 s +DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=18527719879534534413897382772101630377, time:1750766754.0758874s req_ids:[8] +DEBUG 06-24 20:05:54 [manager.py:391] +ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:209.98454093933105ms total_cost_time:210.02650260925293ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5978 prompt_cache_len:5151 prompt_cache_ratio:0.8616594178655068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 +DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10907244682312012 s +INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.11116528511047363 s +DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=112919749214389839482693669424151537171, time:1750766754.287938s req_ids:[8] +DEBUG 06-24 20:05:54 [manager.py:391] +ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:358.3719730377197ms total_cost_time:358.414888381958ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5979 prompt_cache_len:5151 prompt_cache_ratio:0.8615153035624686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 +DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10930490493774414 s +INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.11124348640441895 s +DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=144461363616351480408530902084407063625, time:1750766754.6468432s req_ids:[8] +DEBUG 06-24 20:05:54 [manager.py:391] +ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:196.75254821777344ms total_cost_time:196.7945098876953ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5980 prompt_cache_len:5151 prompt_cache_ratio:0.861371237458194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 +DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10689306259155273 s +INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.10881638526916504 s +DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=154027440084857640457042252118385257471, time:1750766754.8560915s req_ids:[8] +DEBUG 06-24 20:05:54 [manager.py:391] +ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:214.1861915588379ms total_cost_time:214.23053741455078ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5981 prompt_cache_len:5151 prompt_cache_ratio:0.8612272195285069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 +DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10979127883911133 s +INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.11171960830688477 s +DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=147797385138120558451791632165306027119, time:1750766755.0709698s req_ids:[8] +DEBUG 06-24 20:05:55 [manager.py:391] +ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:201.19714736938477ms total_cost_time:201.24053955078125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5982 prompt_cache_len:5151 prompt_cache_ratio:0.8610832497492478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 +DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10650348663330078 s +INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10848164558410645 s +DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=16381280202610690378562921273254971086, time:1750766755.281764s req_ids:[8] +DEBUG 06-24 20:05:55 [manager.py:391] +DEBUG 06-24 20:05:55 [stats.py:37] Avg tokens(prompt+generate) throughput: 25920.329 tokens/s +DEBUG 06-24 20:05:55 [stats.py:37] Avg prompt tokens throughput: 25911.636 tokens/s +DEBUG 06-24 20:05:55 [stats.py:37] Avg generate tokens throughput: 8.693 tokens/s +ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:205.52515983581543ms total_cost_time:205.5678367614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5983 prompt_cache_len:5151 prompt_cache_ratio:0.8609393280962728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 +DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10726261138916016 s +INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10942721366882324 s +DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=274677425798838795707847528826137074219, time:1750766755.4967396s req_ids:[8] +DEBUG 06-24 20:05:55 [manager.py:391] +ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:208.60934257507324ms total_cost_time:208.65249633789062ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5984 prompt_cache_len:5151 prompt_cache_ratio:0.8607954545454546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 +DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10795474052429199 s +INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10983943939208984 s +DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=137642812051166655080394664561366117838, time:1750766755.7114305s req_ids:[8] +DEBUG 06-24 20:05:55 [manager.py:391] +ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:211.4851474761963ms total_cost_time:211.5304470062256ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5985 prompt_cache_len:5151 prompt_cache_ratio:0.8606516290726817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 +DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10726571083068848 s +INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10927128791809082 s +DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=107752442356689597624157973513950714135, time:1750766755.920558s req_ids:[8] +DEBUG 06-24 20:05:55 [manager.py:391] +ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:206.92682266235352ms total_cost_time:206.9873809814453ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5986 prompt_cache_len:5151 prompt_cache_ratio:0.860507851653859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 +DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.1083228588104248 s +INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.11037993431091309 s +DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=205542709139994294980388602615776041534, time:1750766756.132257s req_ids:[8] +DEBUG 06-24 20:05:56 [manager.py:391] +DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:210.50405502319336ms total_cost_time:210.54720878601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5987 prompt_cache_len:5151 prompt_cache_ratio:0.8603641222649073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 +DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10917329788208008 s +INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.11127543449401855 s +DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=246875837048353226761367920699651202360, time:1750766756.344702s req_ids:[8] +DEBUG 06-24 20:05:56 [manager.py:391] +ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:196.08736038208008ms total_cost_time:196.13122940063477ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5988 prompt_cache_len:5151 prompt_cache_ratio:0.8602204408817635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 +DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s +INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.1097874641418457 s +DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=124258850992971139955494198883481897181, time:1750766756.548638s req_ids:[8] +DEBUG 06-24 20:05:56 [manager.py:391] +ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:201.03883743286133ms total_cost_time:201.0822296142578ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5989 prompt_cache_len:5151 prompt_cache_ratio:0.8600768074803807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 +DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:05:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10655832290649414 s +INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.10842108726501465 s +DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=212001235388484545402876139944954118167, time:1750766756.7544267s req_ids:[8] +DEBUG 06-24 20:05:56 [manager.py:391] +ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:210.4170322418213ms total_cost_time:210.46066284179688ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5990 prompt_cache_len:5151 prompt_cache_ratio:0.8599332220367278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 +DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10787153244018555 s +INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.10989618301391602 s +DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=195118263744690451613304927831181151582, time:1750766756.9678667s req_ids:[8] +DEBUG 06-24 20:05:56 [manager.py:391] +ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:366.00184440612793ms total_cost_time:366.04881286621094ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:5991 prompt_cache_len:5151 prompt_cache_ratio:0.8597896845267902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 +DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.10695695877075195 s +INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.1089930534362793 s +DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=121972919079684487616597755571680397213, time:1750766757.332667s req_ids:[8] +DEBUG 06-24 20:05:57 [manager.py:391] +ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:194.9460506439209ms total_cost_time:194.98944282531738ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5992 prompt_cache_len:5151 prompt_cache_ratio:0.8596461949265688 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 +DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.1072087287902832 s +INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.10913610458374023 s +DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=8547435340866420949849757099965804813, time:1750766757.542335s req_ids:[8] +DEBUG 06-24 20:05:57 [manager.py:391] +ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:207.12828636169434ms total_cost_time:207.17406272888184ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5993 prompt_cache_len:5151 prompt_cache_ratio:0.8595027532120808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 +DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.10664653778076172 s +INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.10862231254577637 s +DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=271941149118837530208832393733773513095, time:1750766757.7521827s req_ids:[8] +DEBUG 06-24 20:05:57 [manager.py:391] +ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:215.93117713928223ms total_cost_time:215.97647666931152ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5994 prompt_cache_len:5151 prompt_cache_ratio:0.8593593593593594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 +DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.10994219779968262 s +INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.11186718940734863 s +DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=205601139107771026948904433732531077996, time:1750766757.9676685s req_ids:[8] +DEBUG 06-24 20:05:57 [manager.py:391] +ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:159.2123508453369ms total_cost_time:159.2557430267334ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5995 prompt_cache_len:5151 prompt_cache_ratio:0.8592160133444537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 +DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10760664939880371 s +INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.1096200942993164 s +DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=73719984015007487088031442765655411250, time:1750766758.1318653s req_ids:[8] +DEBUG 06-24 20:05:58 [manager.py:391] +ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:195.2533721923828ms total_cost_time:195.2979564666748ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5996 prompt_cache_len:5151 prompt_cache_ratio:0.859072715143429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 +DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10694169998168945 s +INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.1089327335357666 s +DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=159971970776071997799528475970192885580, time:1750766758.3341148s req_ids:[8] +DEBUG 06-24 20:05:58 [manager.py:391] +ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:200.5155086517334ms total_cost_time:200.55699348449707ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5997 prompt_cache_len:5151 prompt_cache_ratio:0.8589294647323662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 +DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10676407814025879 s +INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.10884356498718262 s +DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=157241660816996135308371881641383297949, time:1750766758.5422552s req_ids:[8] +DEBUG 06-24 20:05:58 [manager.py:391] +ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:207.43274688720703ms total_cost_time:207.4747085571289ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5998 prompt_cache_len:5151 prompt_cache_ratio:0.8587862620873624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 +DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10662698745727539 s +INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.10860681533813477 s +DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=111218329920431504922144730136015821807, time:1750766758.754807s req_ids:[8] +DEBUG 06-24 20:05:58 [manager.py:391] +ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:207.19122886657715ms total_cost_time:207.23509788513184ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5999 prompt_cache_len:5151 prompt_cache_ratio:0.8586431071845307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 +DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10687136650085449 s +INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.10888338088989258 s +DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=259461990585894679082593061496660544785, time:1750766758.963067s req_ids:[8] +DEBUG 06-24 20:05:58 [manager.py:391] +ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:209.4886302947998ms total_cost_time:209.53059196472168ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6000 prompt_cache_len:5151 prompt_cache_ratio:0.8585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 +DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10695028305053711 s +INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.10888791084289551 s +DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=163430304304878730975312654902442757575, time:1750766759.1738033s req_ids:[8] +DEBUG 06-24 20:05:59 [manager.py:391] +ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:202.6839256286621ms total_cost_time:202.7263641357422ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6001 prompt_cache_len:5151 prompt_cache_ratio:0.8583569405099151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 +DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10641145706176758 s +INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s +DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=161240589611204228590144522119641037923, time:1750766759.3914907s req_ids:[8] +DEBUG 06-24 20:05:59 [manager.py:391] +ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:215.20304679870605ms total_cost_time:215.24453163146973ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6002 prompt_cache_len:5151 prompt_cache_ratio:0.8582139286904366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 +DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10641741752624512 s +INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.10846757888793945 s +DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=107875550869559230957324294041768019506, time:1750766759.6005511s req_ids:[8] +DEBUG 06-24 20:05:59 [manager.py:391] +ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:212.98813819885254ms total_cost_time:213.0300998687744ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6003 prompt_cache_len:5151 prompt_cache_ratio:0.8580709645177411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 +DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:05:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10885977745056152 s +INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.11082673072814941 s +DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=110438577607883684908074342491993246647, time:1750766759.813092s req_ids:[8] +DEBUG 06-24 20:05:59 [manager.py:391] +ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:354.4738292694092ms total_cost_time:354.51793670654297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6004 prompt_cache_len:5151 prompt_cache_ratio:0.8579280479680214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 +DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10744810104370117 s +INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.10941052436828613 s +DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=123685105457773945532704575093532578410, time:1750766760.1686082s req_ids:[8] +DEBUG 06-24 20:06:00 [manager.py:391] +ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:195.87326049804688ms total_cost_time:195.91808319091797ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6005 prompt_cache_len:5151 prompt_cache_ratio:0.8577851790174854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 +DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10641646385192871 s +INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.10831856727600098 s +DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=151111491359179037882126005651959245572, time:1750766760.3833294s req_ids:[8] +DEBUG 06-24 20:06:00 [manager.py:391] +ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:207.7465057373047ms total_cost_time:207.80706405639648ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6006 prompt_cache_len:5151 prompt_cache_ratio:0.8576423576423576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 +DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10820984840393066 s +INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.11015892028808594 s +DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=87715402846227758269307481141806007782, time:1750766760.5877888s req_ids:[8] +DEBUG 06-24 20:06:00 [manager.py:391] +ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:210.9675407409668ms total_cost_time:211.01045608520508ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6007 prompt_cache_len:5151 prompt_cache_ratio:0.857499583818878 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 +DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10796141624450684 s +INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.1098470687866211 s +DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=227833650595297036567008934024828968605, time:1750766760.8022716s req_ids:[8] +DEBUG 06-24 20:06:00 [manager.py:391] +ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:199.4950771331787ms total_cost_time:199.5375156402588ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6008 prompt_cache_len:5151 prompt_cache_ratio:0.8573568575233023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 +DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10747599601745605 s +INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.10933423042297363 s +DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=336515560310489788587484226026842813644, time:1750766761.0088184s req_ids:[8] +DEBUG 06-24 20:06:01 [manager.py:391] +ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:208.65273475646973ms total_cost_time:208.6946964263916ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6009 prompt_cache_len:5151 prompt_cache_ratio:0.8572141787319022 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 +DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s +INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.10909914970397949 s +DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=319563846693610181713917807249832365667, time:1750766761.219058s req_ids:[8] +DEBUG 06-24 20:06:01 [manager.py:391] +ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:197.25298881530762ms total_cost_time:197.2970962524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6010 prompt_cache_len:5151 prompt_cache_ratio:0.857071547420965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 +DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10690975189208984 s +INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.10890483856201172 s +DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=100321666177804041049961602043741686447, time:1750766761.4301834s req_ids:[8] +DEBUG 06-24 20:06:01 [manager.py:391] +ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:209.97977256774902ms total_cost_time:210.0234031677246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6011 prompt_cache_len:5151 prompt_cache_ratio:0.8569289635667942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 +DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10692715644836426 s +INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.10898065567016602 s +DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=332169097897630086266387728060200201779, time:1750766761.640435s req_ids:[8] +DEBUG 06-24 20:06:01 [manager.py:391] +DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:205.73091506958008ms total_cost_time:205.77430725097656ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6012 prompt_cache_len:5151 prompt_cache_ratio:0.8567864271457086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 +DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10831022262573242 s +INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.11019253730773926 s +DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=41775081533758781953175881899275237685, time:1750766761.851978s req_ids:[8] +DEBUG 06-24 20:06:01 [manager.py:391] +ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:209.75136756896973ms total_cost_time:209.8104953765869ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:6013 prompt_cache_len:5151 prompt_cache_ratio:0.8566439381340429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 +DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10854363441467285 s +INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.11044883728027344 s +DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=75410760041881952069521960672429947763, time:1750766762.0625553s req_ids:[8] +DEBUG 06-24 20:06:02 [manager.py:391] +ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:166.3646697998047ms total_cost_time:166.40353202819824ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:6014 prompt_cache_len:5151 prompt_cache_ratio:0.8565014965081477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 +DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10667276382446289 s +INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.10857677459716797 s +DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=30432970171464819289030910924875046901, time:1750766762.2331657s req_ids:[8] +DEBUG 06-24 20:06:02 [manager.py:391] +ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:162.2323989868164ms total_cost_time:162.27412223815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6015 prompt_cache_len:5151 prompt_cache_ratio:0.856359102244389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 +DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10741424560546875 s +INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.1093289852142334 s +DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=191506844905751641003751721247594467984, time:1750766762.3952725s req_ids:[8] +DEBUG 06-24 20:06:02 [manager.py:391] +ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:353.7178039550781ms total_cost_time:353.762149810791ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6016 prompt_cache_len:5151 prompt_cache_ratio:0.856216755319149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 +DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10684728622436523 s +INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.10874176025390625 s +DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=270946205760246357945941888305565268850, time:1750766762.7534137s req_ids:[8] +DEBUG 06-24 20:06:02 [manager.py:391] +ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:195.12009620666504ms total_cost_time:195.16396522521973ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6017 prompt_cache_len:5151 prompt_cache_ratio:0.856074455708825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 +DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10697412490844727 s +INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.10887312889099121 s +DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=21704738408738167715889986739234348234, time:1750766762.9611528s req_ids:[8] +DEBUG 06-24 20:06:02 [manager.py:391] +ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:205.857515335083ms total_cost_time:205.9018611907959ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6018 prompt_cache_len:5151 prompt_cache_ratio:0.8559322033898306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 +DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.10663151741027832 s +INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.10866212844848633 s +DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=311611103771604976056265245455257470882, time:1750766763.1723723s req_ids:[8] +DEBUG 06-24 20:06:03 [manager.py:391] +ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:214.49923515319824ms total_cost_time:214.54238891601562ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6019 prompt_cache_len:5151 prompt_cache_ratio:0.8557899983385945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 +DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.10869503021240234 s +INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.11067461967468262 s +DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=94220829450184913536506285841266441674, time:1750766763.3867571s req_ids:[8] +DEBUG 06-24 20:06:03 [manager.py:391] +ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:200.69003105163574ms total_cost_time:200.73294639587402ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6020 prompt_cache_len:5151 prompt_cache_ratio:0.8556478405315615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 +DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.10764884948730469 s +INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.10959863662719727 s +DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=56415503249021951910052789260636417351, time:1750766763.5967462s req_ids:[8] +DEBUG 06-24 20:06:03 [manager.py:391] +ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:209.8684310913086ms total_cost_time:209.91110801696777ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6021 prompt_cache_len:5151 prompt_cache_ratio:0.8555057299451918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 +DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:03 [batch.py:51] router release req id 8 +INFO 06-24 20:06:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.1061089038848877 s +INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.1080174446105957 s +DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=134213832146758924644754119611002437326, time:1750766763.8197763s req_ids:[8] +DEBUG 06-24 20:06:03 [manager.py:391] +ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:211.5938663482666ms total_cost_time:211.63558959960938ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6022 prompt_cache_len:5151 prompt_cache_ratio:0.8553636665559615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 +DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10748958587646484 s +INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10955405235290527 s +DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=102901567638538750537742707993187222280, time:1750766764.027554s req_ids:[8] +DEBUG 06-24 20:06:04 [manager.py:391] +ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:208.54949951171875ms total_cost_time:208.59384536743164ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6023 prompt_cache_len:5151 prompt_cache_ratio:0.8552216503403619 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 +DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10725641250610352 s +INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10916709899902344 s +DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=91782245481498522849448084337209493358, time:1750766764.2506578s req_ids:[8] +DEBUG 06-24 20:06:04 [manager.py:391] +ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:215.19994735717773ms total_cost_time:215.26074409484863ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6024 prompt_cache_len:5151 prompt_cache_ratio:0.8550796812749004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 +DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.1073763370513916 s +INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10931038856506348 s +DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=266852163226625083481240219310839221658, time:1750766764.459633s req_ids:[8] +DEBUG 06-24 20:06:04 [manager.py:391] +ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:203.10592651367188ms total_cost_time:203.14908027648926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6025 prompt_cache_len:5151 prompt_cache_ratio:0.8549377593360996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 +DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10775899887084961 s +INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10973167419433594 s +DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=340080733285546621874399297211970847306, time:1750766764.6679301s req_ids:[8] +DEBUG 06-24 20:06:04 [manager.py:391] +ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:204.55098152160645ms total_cost_time:204.59389686584473ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6026 prompt_cache_len:5151 prompt_cache_ratio:0.8547958845004978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 +DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10724592208862305 s +INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.1093282699584961 s +DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=245105947362803867143756495800275066361, time:1750766764.881278s req_ids:[8] +DEBUG 06-24 20:06:04 [manager.py:391] +ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:204.9539089202881ms total_cost_time:205.0156593322754ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:6027 prompt_cache_len:5151 prompt_cache_ratio:0.8546540567446491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 +DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.10771870613098145 s +INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.1096956729888916 s +DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=306419502479036330333451320681627632543, time:1750766765.0877676s req_ids:[8] +DEBUG 06-24 20:06:05 [manager.py:391] +ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:216.9969081878662ms total_cost_time:217.0393466949463ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6028 prompt_cache_len:5151 prompt_cache_ratio:0.8545122760451228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 +DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.20849990844726562 s +INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.2102954387664795 s +DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=99326755756285572003130029737357720463, time:1750766765.43657s req_ids:[8] +DEBUG 06-24 20:06:05 [manager.py:391] +DEBUG 06-24 20:06:05 [stats.py:37] Avg tokens(prompt+generate) throughput: 27218.602 tokens/s +DEBUG 06-24 20:06:05 [stats.py:37] Avg prompt tokens throughput: 27209.542 tokens/s +DEBUG 06-24 20:06:05 [stats.py:37] Avg generate tokens throughput: 9.060 tokens/s +ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:306.0033321380615ms total_cost_time:306.0462474822998ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6029 prompt_cache_len:5151 prompt_cache_ratio:0.8543705423785038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 +DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.1068732738494873 s +INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.10877823829650879 s +DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=75675991891991260095550043185155196406, time:1750766765.623562s req_ids:[8] +DEBUG 06-24 20:06:05 [manager.py:391] +ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:217.3304557800293ms total_cost_time:217.37337112426758ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6030 prompt_cache_len:5151 prompt_cache_ratio:0.8542288557213931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 +DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.10902070999145508 s +INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.11091446876525879 s +DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=154242974668682939103279576651745810799, time:1750766765.8366601s req_ids:[8] +DEBUG 06-24 20:06:05 [manager.py:391] +DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:197.47400283813477ms total_cost_time:197.5235939025879ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:6031 prompt_cache_len:5151 prompt_cache_ratio:0.8540872160504063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 +DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10708832740783691 s +INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10910630226135254 s +DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=6655774158398050954417595977197143387, time:1750766766.0454202s req_ids:[8] +DEBUG 06-24 20:06:06 [manager.py:391] +ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:205.43169975280762ms total_cost_time:205.4755687713623ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6032 prompt_cache_len:5151 prompt_cache_ratio:0.853945623342175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 +DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10643863677978516 s +INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10824823379516602 s +DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=151743558694802791331520334670078416812, time:1750766766.2707515s req_ids:[8] +DEBUG 06-24 20:06:06 [manager.py:391] +ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:220.88360786437988ms total_cost_time:220.9632396697998ms,out_token_counter:1 mean_per_token_cost_time: 0.07963180541992188ms prompt_token_num:6033 prompt_cache_len:5151 prompt_cache_ratio:0.8538040775733466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 +DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10901618003845215 s +INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.11097264289855957 s +DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=279400124865038890338536200820076921188, time:1750766766.4800735s req_ids:[8] +DEBUG 06-24 20:06:06 [manager.py:391] +ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:202.6515007019043ms total_cost_time:202.7144432067871ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:6034 prompt_cache_len:5151 prompt_cache_ratio:0.8536625787205834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 +DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10762691497802734 s +INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10955572128295898 s +DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=199150676529489813273115984516576434350, time:1750766766.6890802s req_ids:[8] +DEBUG 06-24 20:06:06 [manager.py:391] +ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:218.58716011047363ms total_cost_time:218.63222122192383ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6035 prompt_cache_len:5151 prompt_cache_ratio:0.8535211267605634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 +DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10685229301452637 s +INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10884475708007812 s +DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=243928687079722007172797608296389297938, time:1750766766.906342s req_ids:[8] +DEBUG 06-24 20:06:06 [manager.py:391] +ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:194.69833374023438ms total_cost_time:194.75555419921875ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6036 prompt_cache_len:5151 prompt_cache_ratio:0.8533797216699801 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 +DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10849905014038086 s +INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.11041975021362305 s +DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=159630121017897343577225404866710007114, time:1750766767.1100843s req_ids:[8] +DEBUG 06-24 20:06:07 [manager.py:391] +ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:200.87647438049316ms total_cost_time:200.91843605041504ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6037 prompt_cache_len:5151 prompt_cache_ratio:0.8532383634255425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 +DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:07 [batch.py:51] router release req id 8 +INFO 06-24 20:06:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10791015625 s +INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10988855361938477 s +DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=89359874644234034230952322667151343683, time:1750766767.317074s req_ids:[8] +DEBUG 06-24 20:06:07 [manager.py:391] +ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:211.639404296875ms total_cost_time:211.68088912963867ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6038 prompt_cache_len:5151 prompt_cache_ratio:0.8530970520039748 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 +DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10653114318847656 s +INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10852456092834473 s +DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=275890797649249121971842471571247033934, time:1750766767.5315483s req_ids:[8] +DEBUG 06-24 20:06:07 [manager.py:391] +ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:201.78961753845215ms total_cost_time:201.83229446411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6039 prompt_cache_len:5151 prompt_cache_ratio:0.8529557873820169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 +DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10765457153320312 s +INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10967731475830078 s +DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=208362326449834795111760892590877417449, time:1750766767.7452273s req_ids:[8] +DEBUG 06-24 20:06:07 [manager.py:391] +ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:210.93225479125977ms total_cost_time:210.97373962402344ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6040 prompt_cache_len:5151 prompt_cache_ratio:0.8528145695364239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 +DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10795807838439941 s +INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10999369621276855 s +DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=58035316657469180835239157941735372030, time:1750766767.955685s req_ids:[8] +DEBUG 06-24 20:06:07 [manager.py:391] +ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:362.180233001709ms total_cost_time:362.2410297393799ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6041 prompt_cache_len:5151 prompt_cache_ratio:0.8526733984439663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 +DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10658884048461914 s +INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.1086421012878418 s +DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=157975994899605062849202051970836880543, time:1750766768.316626s req_ids:[8] +DEBUG 06-24 20:06:08 [manager.py:391] +ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:198.49276542663574ms total_cost_time:198.53591918945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6042 prompt_cache_len:5151 prompt_cache_ratio:0.85253227408143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 +DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s +INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.11021566390991211 s +DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=179660772761188551798476275861152715091, time:1750766768.5318959s req_ids:[8] +DEBUG 06-24 20:06:08 [manager.py:391] +ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:212.25500106811523ms total_cost_time:212.30077743530273ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6043 prompt_cache_len:5151 prompt_cache_ratio:0.8523911964256164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 +DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:08 [batch.py:51] router release req id 8 +DEBUG 06-24 20:06:08 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:08 [manager.py:283] +DEBUG 06-24 20:06:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:08 [manager.py:284] +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10654163360595703 s +INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.10849785804748535 s +DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=12692235407278998001087392039979179807, time:1750766768.7447062s req_ids:[8] +DEBUG 06-24 20:06:08 [manager.py:391] +ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:206.26139640808105ms total_cost_time:206.30598068237305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6044 prompt_cache_len:5151 prompt_cache_ratio:0.8522501654533422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 +DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10837578773498535 s +INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s +DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=162360349079261907905771395898871572899, time:1750766768.9540884s req_ids:[8] +DEBUG 06-24 20:06:08 [manager.py:391] +ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:205.55973052978516ms total_cost_time:205.60216903686523ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6045 prompt_cache_len:5151 prompt_cache_ratio:0.8521091811414392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 +DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.11013984680175781 s +INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.11215329170227051 s +DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=131047694827510380239155065517779337875, time:1750766769.1639116s req_ids:[8] +DEBUG 06-24 20:06:09 [manager.py:391] +ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:203.13048362731934ms total_cost_time:203.1722068786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6046 prompt_cache_len:5151 prompt_cache_ratio:0.8519682434667549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 +DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.1082005500793457 s +INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s +DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=218160160220883725165295105687964819184, time:1750766769.3737366s req_ids:[8] +DEBUG 06-24 20:06:09 [manager.py:391] +ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:208.40001106262207ms total_cost_time:208.44173431396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6047 prompt_cache_len:5151 prompt_cache_ratio:0.8518273524061518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 +DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.10958194732666016 s +INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.1115567684173584 s +DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=53057828346809511827489385185871316848, time:1750766769.5858638s req_ids:[8] +DEBUG 06-24 20:06:09 [manager.py:391] +ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:205.28101921081543ms total_cost_time:205.3239345550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6048 prompt_cache_len:5151 prompt_cache_ratio:0.8516865079365079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 +DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.10804224014282227 s +INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.11006307601928711 s +DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=138425581106410269064914626258390239917, time:1750766769.7964027s req_ids:[8] +DEBUG 06-24 20:06:09 [manager.py:391] +ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:208.55164527893066ms total_cost_time:208.59479904174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6049 prompt_cache_len:5151 prompt_cache_ratio:0.8515457100347165 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 +DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.10650086402893066 s +INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.1083528995513916 s +DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=224298259361201211348792717512227495518, time:1750766770.0200956s req_ids:[8] +DEBUG 06-24 20:06:10 [manager.py:391] +ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:216.31431579589844ms total_cost_time:216.37248992919922ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:6050 prompt_cache_len:5151 prompt_cache_ratio:0.851404958677686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 +DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:10 [manager.py:224] router recive req id 8 cost time 0.1081082820892334 s +INFO 06-24 20:06:10 [manager.py:68] detokenization recv req id 8 cost time 0.1101071834564209 s +DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=238904984276494675904643293176135096303, time:1750766770.2286634s req_ids:[8] +DEBUG 06-24 20:06:10 [manager.py:391] +ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:203.7792205810547ms total_cost_time:203.82142066955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6051 prompt_cache_len:5151 prompt_cache_ratio:0.8512642538423401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 +DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:10 [manager.py:224] router recive req id 8 cost time 0.10772180557250977 s +INFO 06-24 20:06:10 [manager.py:68] detokenization recv req id 8 cost time 0.10970711708068848 s +DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=221655274772514624022504259940694232320, time:1750766770.4377048s req_ids:[8] +DEBUG 06-24 20:06:10 [manager.py:391] +ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:217.21744537353516ms total_cost_time:217.25916862487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6052 prompt_cache_len:5151 prompt_cache_ratio:0.851123595505618 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 +DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:10 [manager.py:224] router recive req id 8 cost time 0.10675215721130371 s +INFO 06-24 20:06:10 [manager.py:68] detokenization recv req id 8 cost time 0.10876965522766113 s +DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=316861390726326815761557355728947650701, time:1750766770.6543322s req_ids:[8] +DEBUG 06-24 20:06:10 [manager.py:391] +ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:198.29273223876953ms total_cost_time:198.33827018737793ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6053 prompt_cache_len:5151 prompt_cache_ratio:0.8509829836444738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 +DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.30963826179504395 s +INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.3117208480834961 s +DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=151781003075594576265789595161954593258, time:1750766771.0629683s req_ids:[8] +DEBUG 06-24 20:06:11 [manager.py:391] +ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:406.2960147857666ms total_cost_time:406.3389301300049ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6054 prompt_cache_len:5151 prompt_cache_ratio:0.8508424182358771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 +DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10912775993347168 s +INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.11102628707885742 s +DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=321827521104182704746025711132435272079, time:1750766771.2758784s req_ids:[8] +DEBUG 06-24 20:06:11 [manager.py:391] +ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:206.94208145141602ms total_cost_time:206.9854736328125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6055 prompt_cache_len:5151 prompt_cache_ratio:0.8507018992568125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 +DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10611176490783691 s +INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.10805320739746094 s +DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=68861608703555874757328734142258805857, time:1750766771.4874628s req_ids:[8] +DEBUG 06-24 20:06:11 [manager.py:391] +ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:206.6643238067627ms total_cost_time:206.70723915100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6056 prompt_cache_len:5151 prompt_cache_ratio:0.85056142668428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 +DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10673284530639648 s +INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.10860633850097656 s +DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=169491117484089444130241157449145170975, time:1750766771.6982973s req_ids:[8] +DEBUG 06-24 20:06:11 [manager.py:391] +ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:222.20373153686523ms total_cost_time:222.24712371826172ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6057 prompt_cache_len:5151 prompt_cache_ratio:0.8504210004952947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 +DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10990595817565918 s +INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.1118631362915039 s +DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=117162993049022160015391316251353143723, time:1750766771.9168928s req_ids:[8] +DEBUG 06-24 20:06:11 [manager.py:391] +ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:193.58587265014648ms total_cost_time:193.62926483154297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6058 prompt_cache_len:5151 prompt_cache_ratio:0.8502806206668868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 +DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10622549057006836 s +INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10801482200622559 s +DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=301093093537012768122185984318227343637, time:1750766772.1185257s req_ids:[8] +DEBUG 06-24 20:06:12 [manager.py:391] +ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:191.68853759765625ms total_cost_time:191.73240661621094ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6059 prompt_cache_len:5151 prompt_cache_ratio:0.8501402871761017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 +DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10796427726745605 s +INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10984611511230469 s +DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=325631143478273732789486490704266853008, time:1750766772.3168254s req_ids:[8] +DEBUG 06-24 20:06:12 [manager.py:391] +ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:200.03080368041992ms total_cost_time:200.0730037689209ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6060 prompt_cache_len:5151 prompt_cache_ratio:0.85 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 +DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10736083984375 s +INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10931134223937988 s +DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=124308350601463742845531964351284427057, time:1750766772.524571s req_ids:[8] +DEBUG 06-24 20:06:12 [manager.py:391] +ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:203.98378372192383ms total_cost_time:204.02765274047852ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6061 prompt_cache_len:5151 prompt_cache_ratio:0.8498597591156575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 +DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10757184028625488 s +INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10946822166442871 s +DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=23353637144100624278590679040910324258, time:1750766772.7329254s req_ids:[8] +DEBUG 06-24 20:06:12 [manager.py:391] +DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:204.91456985473633ms total_cost_time:204.9577236175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6062 prompt_cache_len:5151 prompt_cache_ratio:0.849719564500165 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 +DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.1079249382019043 s +INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10988140106201172 s +DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=127819628385223202540910316486211405321, time:1750766772.943142s req_ids:[8] +DEBUG 06-24 20:06:12 [manager.py:391] +ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:206.18557929992676ms total_cost_time:206.2389850616455ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:6063 prompt_cache_len:5151 prompt_cache_ratio:0.8495794161306284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 +DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10665512084960938 s +INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10851263999938965 s +DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=171567262922505299513823119486732596219, time:1750766773.1544828s req_ids:[8] +DEBUG 06-24 20:06:13 [manager.py:391] +ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:205.98769187927246ms total_cost_time:206.03084564208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6064 prompt_cache_len:5151 prompt_cache_ratio:0.8494393139841688 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 +DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10744571685791016 s +INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10932493209838867 s +DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=260528880721881099362340415622763548570, time:1750766773.3639982s req_ids:[8] +DEBUG 06-24 20:06:13 [manager.py:391] +ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:205.40261268615723ms total_cost_time:205.4445743560791ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6065 prompt_cache_len:5151 prompt_cache_ratio:0.8492992580379225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 +DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10728693008422852 s +INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10917234420776367 s +DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=300043569360611283178988110519808921296, time:1750766773.5755944s req_ids:[8] +DEBUG 06-24 20:06:13 [manager.py:391] +ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:366.35351181030273ms total_cost_time:366.39881134033203ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6066 prompt_cache_len:5151 prompt_cache_ratio:0.8491592482690405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 +DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10702824592590332 s +INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10894966125488281 s +DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=323105297088449021781211148333292932588, time:1750766773.9380095s req_ids:[8] +DEBUG 06-24 20:06:13 [manager.py:391] +ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:199.0494728088379ms total_cost_time:199.10216331481934ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:6067 prompt_cache_len:5151 prompt_cache_ratio:0.8490192846546893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 +DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10783886909484863 s +INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1096656322479248 s +DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=31083084694115692945721492883722296898, time:1750766774.1551318s req_ids:[8] +DEBUG 06-24 20:06:14 [manager.py:391] +ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:209.68914031982422ms total_cost_time:209.7313404083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6068 prompt_cache_len:5151 prompt_cache_ratio:0.8488793671720501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 +DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10698962211608887 s +INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.10889911651611328 s +DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=291955693929551057049281708590529636693, time:1750766774.3644059s req_ids:[8] +DEBUG 06-24 20:06:14 [manager.py:391] +ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:206.25996589660645ms total_cost_time:206.30168914794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6069 prompt_cache_len:5151 prompt_cache_ratio:0.8487394957983193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 +DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:14 [batch.py:51] router release req id 8 +INFO 06-24 20:06:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10714554786682129 s +INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1091616153717041 s +DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=210093120209305620961475658908497460456, time:1750766774.5746593s req_ids:[8] +DEBUG 06-24 20:06:14 [manager.py:391] +ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:204.98418807983398ms total_cost_time:205.02686500549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6070 prompt_cache_len:5151 prompt_cache_ratio:0.8485996705107084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 +DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10711860656738281 s +INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1091606616973877 s +DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=68058616912046301648369426858999133725, time:1750766774.7847652s req_ids:[8] +DEBUG 06-24 20:06:14 [manager.py:391] +ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:205.4755687713623ms total_cost_time:205.5196762084961ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6071 prompt_cache_len:5151 prompt_cache_ratio:0.8484598912864437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 +DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10659432411193848 s +INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1085045337677002 s +DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=266869136503257706735618173599352139853, time:1750766774.9953895s req_ids:[8] +DEBUG 06-24 20:06:14 [manager.py:391] +ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:205.93500137329102ms total_cost_time:205.9950828552246ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6072 prompt_cache_len:5151 prompt_cache_ratio:0.8483201581027668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 +DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10963273048400879 s +INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.11151504516601562 s +DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=19534602718431580561814923327965498632, time:1750766775.2048912s req_ids:[8] +DEBUG 06-24 20:06:15 [manager.py:391] +ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:204.5152187347412ms total_cost_time:204.5602798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6073 prompt_cache_len:5151 prompt_cache_ratio:0.8481804709369339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 +DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10764241218566895 s +INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.10958504676818848 s +DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=228492006378238062042066841337975883367, time:1750766775.416704s req_ids:[8] +DEBUG 06-24 20:06:15 [manager.py:391] +ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:06:15 [stats.py:37] Avg tokens(prompt+generate) throughput: 27075.462 tokens/s +DEBUG 06-24 20:06:15 [stats.py:37] Avg prompt tokens throughput: 27066.418 tokens/s +DEBUG 06-24 20:06:15 [stats.py:37] Avg generate tokens throughput: 9.044 tokens/s +INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:208.6033821105957ms total_cost_time:208.6479663848877ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6074 prompt_cache_len:5151 prompt_cache_ratio:0.8480408297662166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 +DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10997176170349121 s +INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.11201000213623047 s +DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=51643496317541814573320543684449009021, time:1750766775.6281173s req_ids:[8] +DEBUG 06-24 20:06:15 [manager.py:391] +ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:203.54938507080078ms total_cost_time:203.59253883361816ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6075 prompt_cache_len:5151 prompt_cache_ratio:0.8479012345679012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 +DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10733246803283691 s +INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.1094510555267334 s +DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=172076749074984437575466145834307755679, time:1750766775.8367238s req_ids:[8] +DEBUG 06-24 20:06:15 [manager.py:391] +ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:205.7514190673828ms total_cost_time:205.7960033416748ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6076 prompt_cache_len:5151 prompt_cache_ratio:0.847761685319289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 +DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.30954909324645996 s +INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.3115055561065674 s +DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=193559354369953011294342665744597919469, time:1750766776.2482228s req_ids:[8] +DEBUG 06-24 20:06:16 [manager.py:391] +ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:411.9577407836914ms total_cost_time:412.0030403137207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6077 prompt_cache_len:5151 prompt_cache_ratio:0.8476221819976962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 +INFO 06-24 20:06:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.10954904556274414 s +INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.1114511489868164 s +DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=230695407670521310928532476937615309847, time:1750766776.4668732s req_ids:[8] +DEBUG 06-24 20:06:16 [manager.py:391] +ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:206.78019523620605ms total_cost_time:206.81428909301758ms,out_token_counter:1 mean_per_token_cost_time: 0.03409385681152344ms prompt_token_num:6078 prompt_cache_len:5151 prompt_cache_ratio:0.8474827245804541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 +DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.1049497127532959 s +INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.10688304901123047 s +DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=271268013338566478107730195079943376472, time:1750766776.6821978s req_ids:[8] +DEBUG 06-24 20:06:16 [manager.py:391] +ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:214.33568000793457ms total_cost_time:214.3561840057373ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6079 prompt_cache_len:5151 prompt_cache_ratio:0.8473433130449087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 +DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.10604643821716309 s +INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.10781574249267578 s +DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=332304996389003127346489551757001399497, time:1750766776.894991s req_ids:[8] +DEBUG 06-24 20:06:16 [manager.py:391] +ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:205.8694362640381ms total_cost_time:205.916166305542ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:6080 prompt_cache_len:5151 prompt_cache_ratio:0.8472039473684211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 +DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.1078634262084961 s +INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10965752601623535 s +DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=238444553988125691603373839760759709187, time:1750766777.1175091s req_ids:[8] +DEBUG 06-24 20:06:17 [manager.py:391] +ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:225.15010833740234ms total_cost_time:225.19397735595703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6081 prompt_cache_len:5151 prompt_cache_ratio:0.8470646275283671 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 +DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10692858695983887 s +INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10859870910644531 s +DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=136989867071156774195720343540565107209, time:1750766777.344901s req_ids:[8] +DEBUG 06-24 20:06:17 [manager.py:391] +ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:206.60972595214844ms total_cost_time:206.65216445922852ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6082 prompt_cache_len:5151 prompt_cache_ratio:0.8469253535021375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 +DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10762357711791992 s +INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10931563377380371 s +DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=307858555659764366283867965664957438427, time:1750766777.5510867s req_ids:[8] +DEBUG 06-24 20:06:17 [manager.py:391] +ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:203.7034034729004ms total_cost_time:203.7487030029297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6083 prompt_cache_len:5151 prompt_cache_ratio:0.8467861252671379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 +DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10495376586914062 s +INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10667181015014648 s +DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=255122935323283699467144391108828202401, time:1750766777.7637253s req_ids:[8] +DEBUG 06-24 20:06:17 [manager.py:391] +ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:202.62598991394043ms total_cost_time:202.6498317718506ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6084 prompt_cache_len:5151 prompt_cache_ratio:0.846646942800789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 +DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10427260398864746 s +INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10612249374389648 s +DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=125637894730720629841251063555081598942, time:1750766777.9607599s req_ids:[8] +DEBUG 06-24 20:06:17 [manager.py:391] +ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:212.07904815673828ms total_cost_time:212.10193634033203ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6085 prompt_cache_len:5151 prompt_cache_ratio:0.8465078060805259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 +DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.10420560836791992 s +INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.10605216026306152 s +DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=164090879465811865734923686937803075974, time:1750766778.1825109s req_ids:[8] +DEBUG 06-24 20:06:18 [manager.py:391] +ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:208.40144157409668ms total_cost_time:208.42504501342773ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6086 prompt_cache_len:5151 prompt_cache_ratio:0.8463687150837989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 +DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.10448503494262695 s +INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.10638952255249023 s +DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=311849000398133340649487743385729532947, time:1750766778.3891091s req_ids:[8] +DEBUG 06-24 20:06:18 [manager.py:391] +ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:204.4525146484375ms total_cost_time:204.47468757629395ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6087 prompt_cache_len:5151 prompt_cache_ratio:0.8462296697880729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 +DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.10472893714904785 s +INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.10666227340698242 s +DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=92187267207809233539537458920588809941, time:1750766778.597368s req_ids:[8] +DEBUG 06-24 20:06:18 [manager.py:391] +ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:203.83715629577637ms total_cost_time:203.8590908050537ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6088 prompt_cache_len:5151 prompt_cache_ratio:0.8460906701708278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 +DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.20444226264953613 s +INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.2062511444091797 s +DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=206062541825738528709371198982046326987, time:1750766778.894758s req_ids:[8] +DEBUG 06-24 20:06:18 [manager.py:391] +ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:246.4456558227539ms total_cost_time:246.46902084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6089 prompt_cache_len:5151 prompt_cache_ratio:0.8459517162095582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 +DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10510683059692383 s +INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10703277587890625 s +DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=230953239405980865784183960803700014176, time:1750766779.040687s req_ids:[8] +DEBUG 06-24 20:06:19 [manager.py:391] +ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:177.89173126220703ms total_cost_time:177.91414260864258ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6090 prompt_cache_len:5151 prompt_cache_ratio:0.8458128078817734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 +DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10443329811096191 s +INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10633063316345215 s +DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=84761899105140769856861837934947310840, time:1750766779.2340703s req_ids:[8] +DEBUG 06-24 20:06:19 [manager.py:391] +ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:201.36070251464844ms total_cost_time:201.3843059539795ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6091 prompt_cache_len:5151 prompt_cache_ratio:0.8456739451649975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 +DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10441803932189941 s +INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.1063077449798584 s +DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=125005615929491505393152090411030572673, time:1750766779.441638s req_ids:[8] +DEBUG 06-24 20:06:19 [manager.py:391] +ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:200.89960098266602ms total_cost_time:200.92320442199707ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6092 prompt_cache_len:5151 prompt_cache_ratio:0.8455351280367696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 +DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10431599617004395 s +INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10630369186401367 s +DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=197508703828335778843817948177504437509, time:1750766779.6448328s req_ids:[8] +DEBUG 06-24 20:06:19 [manager.py:391] +ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:193.07923316955566ms total_cost_time:193.1009292602539ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6093 prompt_cache_len:5151 prompt_cache_ratio:0.8453963564746431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 +DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10539793968200684 s +INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10735702514648438 s +DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=124580408642116343837523537077441585861, time:1750766779.8427448s req_ids:[8] +DEBUG 06-24 20:06:19 [manager.py:391] +ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:205.38067817687988ms total_cost_time:205.40213584899902ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6094 prompt_cache_len:5151 prompt_cache_ratio:0.8452576304561864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 +DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10436224937438965 s +INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10629153251647949 s +DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=10934076778701682580893587398551280918, time:1750766780.0554173s req_ids:[8] +DEBUG 06-24 20:06:20 [manager.py:391] +ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:210.27636528015137ms total_cost_time:210.2982997894287ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6095 prompt_cache_len:5151 prompt_cache_ratio:0.8451189499589827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 +DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10620927810668945 s +INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10808396339416504 s +DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=308841724527493195675627558880119212621, time:1750766780.2664921s req_ids:[8] +DEBUG 06-24 20:06:20 [manager.py:391] +ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:211.1341953277588ms total_cost_time:211.16042137145996ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:6096 prompt_cache_len:5151 prompt_cache_ratio:0.84498031496063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 +DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10492682456970215 s +INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.1071019172668457 s +DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=188825993691424026567293187609409176411, time:1750766780.480337s req_ids:[8] +DEBUG 06-24 20:06:20 [manager.py:391] +ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:209.34438705444336ms total_cost_time:209.3672752380371ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6097 prompt_cache_len:5151 prompt_cache_ratio:0.8448417254387404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 +DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10441708564758301 s +INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10637331008911133 s +DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=334849299458908717533360891303073156047, time:1750766780.705667s req_ids:[8] +DEBUG 06-24 20:06:20 [manager.py:391] +ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:219.49410438537598ms total_cost_time:219.51580047607422ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6098 prompt_cache_len:5151 prompt_cache_ratio:0.8447031813709412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 +DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10492897033691406 s +INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10697770118713379 s +DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=247251171324266444486086501888682516170, time:1750766780.9174411s req_ids:[8] +DEBUG 06-24 20:06:20 [manager.py:391] +ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:209.33914184570312ms total_cost_time:209.36155319213867ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6099 prompt_cache_len:5151 prompt_cache_ratio:0.8445646827348746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 +DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10485672950744629 s +INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.10689377784729004 s +DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=233826107457444844923875639856547905262, time:1750766781.1366055s req_ids:[8] +DEBUG 06-24 20:06:21 [manager.py:391] +ERROR 06-24 20:06:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:227.76436805725098ms total_cost_time:227.78725624084473ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6100 prompt_cache_len:5151 prompt_cache_ratio:0.8444262295081967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 +DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10472440719604492 s +INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.1067342758178711 s +DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=121978652246319463831904810709488873129, time:1750766781.3581998s req_ids:[8] +DEBUG 06-24 20:06:21 [manager.py:391] +ERROR 06-24 20:06:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:200.26731491088867ms total_cost_time:200.29044151306152ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6101 prompt_cache_len:5151 prompt_cache_ratio:0.844287821668579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 +DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10430002212524414 s +INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.10643291473388672 s +INFO 06-24 20:06:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=148022403606126598876806728390213116473, time:1750766781.5776339s req_ids:[8] +DEBUG 06-24 20:06:21 [manager.py:391] +ERROR 06-24 20:06:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:383.4555149078369ms total_cost_time:383.47935676574707ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6102 prompt_cache_len:5151 prompt_cache_ratio:0.844149459193707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 +DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10547161102294922 s +INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.10766482353210449 s +DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=167325784571661921149648466926566223250, time:1750766781.948074s req_ids:[8] +DEBUG 06-24 20:06:21 [manager.py:391] +ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:203.66406440734863ms total_cost_time:203.6874294281006ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6103 prompt_cache_len:5151 prompt_cache_ratio:0.8440111420612814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 +DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10441040992736816 s +INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10657358169555664 s +DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=278309699379054471619229642513523834756, time:1750766782.1691618s req_ids:[8] +DEBUG 06-24 20:06:22 [manager.py:391] +ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:218.82963180541992ms total_cost_time:218.86062622070312ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:6104 prompt_cache_len:5151 prompt_cache_ratio:0.843872870249017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 +DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.1043233871459961 s +INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10654139518737793 s +DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=34458947381605882051351193243915910112, time:1750766782.3847382s req_ids:[8] +DEBUG 06-24 20:06:22 [manager.py:391] +ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:210.9212875366211ms total_cost_time:210.94441413879395ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6105 prompt_cache_len:5151 prompt_cache_ratio:0.8437346437346437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 +DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10333704948425293 s +INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10522675514221191 s +DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=7272999439659390420071915596219741207, time:1750766782.593363s req_ids:[8] +DEBUG 06-24 20:06:22 [manager.py:391] +ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:188.57383728027344ms total_cost_time:188.59338760375977ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6106 prompt_cache_len:5151 prompt_cache_ratio:0.8435964624959057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 +DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10317826271057129 s +INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10506844520568848 s +DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=141317760587143255624025853604179120910, time:1750766782.7940385s req_ids:[8] +DEBUG 06-24 20:06:22 [manager.py:391] +ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:208.00113677978516ms total_cost_time:208.02044868469238ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6107 prompt_cache_len:5151 prompt_cache_ratio:0.8434583265105616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 +DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10430788993835449 s +INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10638236999511719 s +DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=272977373493258621916293948208431436365, time:1750766782.9938126s req_ids:[8] +DEBUG 06-24 20:06:22 [manager.py:391] +ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:203.53412628173828ms total_cost_time:203.5536766052246ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6108 prompt_cache_len:5151 prompt_cache_ratio:0.843320235756385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 +DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10313224792480469 s +INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10507392883300781 s +INFO 06-24 20:06:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=25300919702447270741370121062232038812, time:1750766783.2045593s req_ids:[8] +DEBUG 06-24 20:06:23 [manager.py:391] +ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:209.61236953735352ms total_cost_time:209.63215827941895ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6109 prompt_cache_len:5151 prompt_cache_ratio:0.8431821902111638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 +DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10340404510498047 s +INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10528254508972168 s +DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=305353340372624027929144639176119755243, time:1750766783.4176474s req_ids:[8] +DEBUG 06-24 20:06:23 [manager.py:391] +ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:204.96726036071777ms total_cost_time:204.9880027770996ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6110 prompt_cache_len:5151 prompt_cache_ratio:0.8430441898527005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 +DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10335230827331543 s +INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10536670684814453 s +DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=290960491537300519251285696251829625472, time:1750766783.6261122s req_ids:[8] +DEBUG 06-24 20:06:23 [manager.py:391] +ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:208.93383026123047ms total_cost_time:208.9533805847168ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6111 prompt_cache_len:5151 prompt_cache_ratio:0.842906234658812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 +DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10455441474914551 s +INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10655093193054199 s +DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=218080960870294761331534899352510751129, time:1750766783.8440077s req_ids:[8] +DEBUG 06-24 20:06:23 [manager.py:391] +ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:214.10655975341797ms total_cost_time:214.1263484954834ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6112 prompt_cache_len:5151 prompt_cache_ratio:0.8427683246073299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 +DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10477519035339355 s +INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.1069033145904541 s +DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=224552355293082718388846205899768770206, time:1750766784.061049s req_ids:[8] +DEBUG 06-24 20:06:24 [manager.py:391] +ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:205.98769187927246ms total_cost_time:206.0093879699707ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6113 prompt_cache_len:5151 prompt_cache_ratio:0.8426304596761001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 +DEBUG 06-24 20:06:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10348773002624512 s +INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.10551333427429199 s +DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=246947666861936024699748553683267397229, time:1750766784.2651703s req_ids:[8] +DEBUG 06-24 20:06:24 [manager.py:391] +ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:206.99667930603027ms total_cost_time:207.0169448852539ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6114 prompt_cache_len:5151 prompt_cache_ratio:0.8424926398429833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 +DEBUG 06-24 20:06:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10422301292419434 s +INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.10622429847717285 s +DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=299756380200373611391072335718948034271, time:1750766784.481824s req_ids:[8] +DEBUG 06-24 20:06:24 [manager.py:391] +ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:382.73024559020996ms total_cost_time:382.7517032623291ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6115 prompt_cache_len:5151 prompt_cache_ratio:0.8423548650858544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 +DEBUG 06-24 20:06:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10453963279724121 s +INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.1064455509185791 s +DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=309494970834871294122755161822399773162, time:1750766784.854899s req_ids:[8] +DEBUG 06-24 20:06:24 [manager.py:391] +ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:207.58342742919922ms total_cost_time:207.60369300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6116 prompt_cache_len:5151 prompt_cache_ratio:0.842217135382603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 +DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.10311412811279297 s +INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10492730140686035 s +DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=135146763130120856991333150772475227718, time:1750766785.0594633s req_ids:[8] +DEBUG 06-24 20:06:25 [manager.py:391] +ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:176.2712001800537ms total_cost_time:176.29051208496094ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6117 prompt_cache_len:5151 prompt_cache_ratio:0.8420794507111329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 +DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.1046912670135498 s +INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10671329498291016 s +DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=194703729639997601733597106153453669572, time:1750766785.2571788s req_ids:[8] +DEBUG 06-24 20:06:25 [manager.py:391] +ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:213.38415145874023ms total_cost_time:213.40346336364746ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6118 prompt_cache_len:5151 prompt_cache_ratio:0.8419418110493625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 +DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.10375404357910156 s +INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10602784156799316 s +DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=141128383125760913879594072874103801806, time:1750766785.4771478s req_ids:[8] +DEBUG 06-24 20:06:25 [manager.py:391] +DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:06:25 [stats.py:37] Avg tokens(prompt+generate) throughput: 27280.706 tokens/s +DEBUG 06-24 20:06:25 [stats.py:37] Avg prompt tokens throughput: 27271.760 tokens/s +DEBUG 06-24 20:06:25 [stats.py:37] Avg generate tokens throughput: 8.946 tokens/s +INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:215.24596214294434ms total_cost_time:215.26670455932617ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6119 prompt_cache_len:5151 prompt_cache_ratio:0.8418042163752247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 +DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.10345172882080078 s +INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10543012619018555 s +DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=38810360127257287293362483462333117489, time:1750766785.6892889s req_ids:[8] +DEBUG 06-24 20:06:25 [manager.py:391] +ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:206.25734329223633ms total_cost_time:206.27713203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6120 prompt_cache_len:5151 prompt_cache_ratio:0.8416666666666667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 +DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.1035909652709961 s +INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.1055753231048584 s +DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=226052987839706201670884770574816148378, time:1750766785.9013026s req_ids:[8] +DEBUG 06-24 20:06:25 [manager.py:391] +ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:209.53035354614258ms total_cost_time:209.5503807067871ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6121 prompt_cache_len:5151 prompt_cache_ratio:0.8415291619016501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 +DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10443997383117676 s +INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.1064460277557373 s +DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=22096028405950588065764586516796394258, time:1750766786.1141126s req_ids:[8] +DEBUG 06-24 20:06:26 [manager.py:391] +ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:206.12549781799316ms total_cost_time:206.1457633972168ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6122 prompt_cache_len:5151 prompt_cache_ratio:0.841391702058151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 +DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10439729690551758 s +INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10635900497436523 s +DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=325243897191519754945790619986661389796, time:1750766786.3255363s req_ids:[8] +DEBUG 06-24 20:06:26 [manager.py:391] +ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:207.55910873413086ms total_cost_time:207.5810432434082ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6123 prompt_cache_len:5151 prompt_cache_ratio:0.8412542871141597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 +DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10320019721984863 s +INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10521268844604492 s +DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=84765910776844190678842431717733350882, time:1750766786.5344055s req_ids:[8] +DEBUG 06-24 20:06:26 [manager.py:391] +ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:201.52926445007324ms total_cost_time:201.55024528503418ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6124 prompt_cache_len:5151 prompt_cache_ratio:0.8411169170476812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 +DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10446548461914062 s +INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10644865036010742 s +DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=143383876673861167419868309980376649490, time:1750766786.740108s req_ids:[8] +DEBUG 06-24 20:06:26 [manager.py:391] +ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:207.60035514831543ms total_cost_time:207.62109756469727ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6125 prompt_cache_len:5151 prompt_cache_ratio:0.8409795918367347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 +DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.1042790412902832 s +INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10624027252197266 s +DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=125871245928418826983408541299915505592, time:1750766786.952758s req_ids:[8] +DEBUG 06-24 20:06:26 [manager.py:391] +ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:202.84652709960938ms total_cost_time:202.8663158416748ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6126 prompt_cache_len:5151 prompt_cache_ratio:0.8408423114593536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 +DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10442042350769043 s +INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10631179809570312 s +DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=331288092322122495617245829926784338048, time:1750766787.1602623s req_ids:[8] +DEBUG 06-24 20:06:27 [manager.py:391] +ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:409.299373626709ms total_cost_time:409.3189239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6127 prompt_cache_len:5151 prompt_cache_ratio:0.8407050758935858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 +DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10421133041381836 s +INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10604667663574219 s +DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=260568356287072644763237119143286650608, time:1750766787.5677457s req_ids:[8] +DEBUG 06-24 20:06:27 [manager.py:391] +ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:201.1582851409912ms total_cost_time:201.17807388305664ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6128 prompt_cache_len:5151 prompt_cache_ratio:0.8405678851174935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 +DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10314106941223145 s +INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10496234893798828 s +DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=267458317937483801225361854082613453127, time:1750766787.7785275s req_ids:[8] +DEBUG 06-24 20:06:27 [manager.py:391] +ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:208.12082290649414ms total_cost_time:208.14132690429688ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6129 prompt_cache_len:5151 prompt_cache_ratio:0.8404307391091532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 +DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10388016700744629 s +INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10584092140197754 s +DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=331763519628841190231568367292577767933, time:1750766787.9901304s req_ids:[8] +DEBUG 06-24 20:06:27 [manager.py:391] +ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:207.94296264648438ms total_cost_time:207.9622745513916ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6130 prompt_cache_len:5151 prompt_cache_ratio:0.8402936378466558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 +DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.10319399833679199 s +INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10501551628112793 s +DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=305752710936130591689075335204265638774, time:1750766788.2016904s req_ids:[8] +DEBUG 06-24 20:06:28 [manager.py:391] +ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:209.5024585723877ms total_cost_time:209.52272415161133ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6131 prompt_cache_len:5151 prompt_cache_ratio:0.8401565813081063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 +DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.10292696952819824 s +INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10473394393920898 s +DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=145967706689425026602361702885872410523, time:1750766788.4141562s req_ids:[8] +DEBUG 06-24 20:06:28 [manager.py:391] +ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:206.97331428527832ms total_cost_time:206.99238777160645ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6132 prompt_cache_len:5151 prompt_cache_ratio:0.8400195694716243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 +DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.1044154167175293 s +INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10626769065856934 s +DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=191264099948528474948886321304434144103, time:1750766788.6259215s req_ids:[8] +DEBUG 06-24 20:06:28 [manager.py:391] +ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:210.4043960571289ms total_cost_time:210.42346954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6133 prompt_cache_len:5151 prompt_cache_ratio:0.8398826023153432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 +DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.10449385643005371 s +INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10637187957763672 s +DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=242090911718453434807775366384303598317, time:1750766788.8500962s req_ids:[8] +DEBUG 06-24 20:06:28 [manager.py:391] +ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:219.9575901031494ms total_cost_time:219.97594833374023ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6134 prompt_cache_len:5151 prompt_cache_ratio:0.8397456798174111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 +DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.1030726432800293 s +INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.1049191951751709 s +DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=157431062221938925865839713271753476351, time:1750766789.0636861s req_ids:[8] +DEBUG 06-24 20:06:29 [manager.py:391] +ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:207.50665664672852ms total_cost_time:207.52644538879395ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6135 prompt_cache_len:5151 prompt_cache_ratio:0.8396088019559902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 +DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.10311484336853027 s +INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.1049809455871582 s +DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=134929130697788601482045786854694649371, time:1750766789.2742567s req_ids:[8] +DEBUG 06-24 20:06:29 [manager.py:391] +ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:206.66742324829102ms total_cost_time:206.68601989746094ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6136 prompt_cache_len:5151 prompt_cache_ratio:0.8394719687092569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 +DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.10301494598388672 s +INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.10478711128234863 s +DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=15777616477381629866767359523101059399, time:1750766789.4847536s req_ids:[8] +DEBUG 06-24 20:06:29 [manager.py:391] +ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:370.1438903808594ms total_cost_time:370.1643943786621ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6137 prompt_cache_len:5151 prompt_cache_ratio:0.8393351800554016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 +DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.10407805442810059 s +INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.10587096214294434 s +DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=253517709683889396140375011504581507997, time:1750766789.8540235s req_ids:[8] +DEBUG 06-24 20:06:29 [manager.py:391] +ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:196.46215438842773ms total_cost_time:196.48003578186035ms,out_token_counter:1 mean_per_token_cost_time: 0.017881393432617188ms prompt_token_num:6138 prompt_cache_len:5151 prompt_cache_ratio:0.8391984359726296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 +DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10304665565490723 s +INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10489773750305176 s +DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=184257812429556298306099130896696400107, time:1750766790.0608459s req_ids:[8] +DEBUG 06-24 20:06:30 [manager.py:391] +ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:205.90758323669434ms total_cost_time:205.92665672302246ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6139 prompt_cache_len:5151 prompt_cache_ratio:0.8390617364391595 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 +DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10294175148010254 s +INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.1047518253326416 s +DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=164203541249587692058122716266663712570, time:1750766790.269834s req_ids:[8] +DEBUG 06-24 20:06:30 [manager.py:391] +ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:205.7480812072754ms total_cost_time:205.76763153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6140 prompt_cache_len:5151 prompt_cache_ratio:0.8389250814332248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 +DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10336852073669434 s +INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10518383979797363 s +DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=231616877556518156345629522787451039788, time:1750766790.4834332s req_ids:[8] +DEBUG 06-24 20:06:30 [manager.py:391] +ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:208.9226245880127ms total_cost_time:208.94122123718262ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6141 prompt_cache_len:5151 prompt_cache_ratio:0.8387884709330727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 +DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10297250747680664 s +INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10527801513671875 s +DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=238878848851664897259707789097795683145, time:1750766790.694265s req_ids:[8] +DEBUG 06-24 20:06:30 [manager.py:391] +ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:207.36098289489746ms total_cost_time:207.3800563812256ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6142 prompt_cache_len:5151 prompt_cache_ratio:0.8386519049169652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 +DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10459351539611816 s +INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10649776458740234 s +DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=70952653644051467278305454036591872139, time:1750766790.9066322s req_ids:[8] +DEBUG 06-24 20:06:30 [manager.py:391] +ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:215.03210067749023ms total_cost_time:215.05093574523926ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6143 prompt_cache_len:5151 prompt_cache_ratio:0.8385153833631775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 +DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10431385040283203 s +INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10614824295043945 s +DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=295336196461553647570053591516784724948, time:1750766791.1214995s req_ids:[8] +DEBUG 06-24 20:06:31 [manager.py:391] +DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:205.16204833984375ms total_cost_time:205.18183708190918ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6144 prompt_cache_len:5151 prompt_cache_ratio:0.83837890625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 +DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10414886474609375 s +INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10599756240844727 s +DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=61852508994208769950654920921410801492, time:1750766791.3335516s req_ids:[8] +DEBUG 06-24 20:06:31 [manager.py:391] +ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:205.33370971679688ms total_cost_time:205.3532600402832ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6145 prompt_cache_len:5151 prompt_cache_ratio:0.8382424735557363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 +DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.1040341854095459 s +INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10590124130249023 s +DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=188559802601152452400496849733930747664, time:1750766791.5422516s req_ids:[8] +DEBUG 06-24 20:06:31 [manager.py:391] +ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:206.26163482666016ms total_cost_time:206.28023147583008ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6146 prompt_cache_len:5151 prompt_cache_ratio:0.8381060852587049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 +DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10401034355163574 s +INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10580015182495117 s +DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=1619228682943050669769246352976244528, time:1750766791.7528481s req_ids:[8] +DEBUG 06-24 20:06:31 [manager.py:391] +ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:206.04252815246582ms total_cost_time:206.06112480163574ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6147 prompt_cache_len:5151 prompt_cache_ratio:0.8379697413372377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 +DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10291433334350586 s +INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10476374626159668 s +DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=287959402040441899584032853745045000267, time:1750766791.9619412s req_ids:[8] +DEBUG 06-24 20:06:31 [manager.py:391] +ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:203.6137580871582ms total_cost_time:203.63378524780273ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6148 prompt_cache_len:5151 prompt_cache_ratio:0.8378334417696812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 +DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.3045976161956787 s +INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.30650949478149414 s +DEBUG 06-24 20:06:32 [manager.py:391] Prefill Batch: batch_id=316620463359896037430047501284133251457, time:1750766792.3726034s req_ids:[8] +DEBUG 06-24 20:06:32 [manager.py:391] +ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:412.89305686950684ms total_cost_time:412.91236877441406ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6149 prompt_cache_len:5151 prompt_cache_ratio:0.8376971865343958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 +DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.10439634323120117 s +INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.10628390312194824 s +DEBUG 06-24 20:06:32 [manager.py:391] Prefill Batch: batch_id=87037759884092886547309206577495653779, time:1750766792.5882473s req_ids:[8] +DEBUG 06-24 20:06:32 [manager.py:391] +DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:209.57469940185547ms total_cost_time:209.5937728881836ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6150 prompt_cache_len:5151 prompt_cache_ratio:0.8375609756097561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 +DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.10426449775695801 s +INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.10615897178649902 s +DEBUG 06-24 20:06:32 [manager.py:391] Prefill Batch: batch_id=146792570732096537179118145993766077747, time:1750766792.8015797s req_ids:[8] +DEBUG 06-24 20:06:32 [manager.py:391] +ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:206.88343048095703ms total_cost_time:206.90202713012695ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6151 prompt_cache_len:5151 prompt_cache_ratio:0.8374248089741505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 +DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.10303759574890137 s +INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.10489201545715332 s +DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=59318418209261092103843418343049738002, time:1750766793.0117295s req_ids:[8] +DEBUG 06-24 20:06:33 [manager.py:391] +ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:209.16748046875ms total_cost_time:209.18822288513184ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6152 prompt_cache_len:5151 prompt_cache_ratio:0.8372886866059818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 +DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10385942459106445 s +INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10579681396484375 s +DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=114481994863591629468448857816390295088, time:1750766793.2241678s req_ids:[8] +DEBUG 06-24 20:06:33 [manager.py:391] +ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:206.43258094787598ms total_cost_time:206.4530849456787ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6153 prompt_cache_len:5151 prompt_cache_ratio:0.8371526084836665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 +DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10329389572143555 s +INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10515832901000977 s +DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=9572132528447915679900250264896881297, time:1750766793.4365184s req_ids:[8] +DEBUG 06-24 20:06:33 [manager.py:391] +ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:208.44197273254395ms total_cost_time:208.46056938171387ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6154 prompt_cache_len:5151 prompt_cache_ratio:0.8370165745856354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 +DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10425424575805664 s +INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10609817504882812 s +DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=90337888777422112167426687610347983383, time:1750766793.6470857s req_ids:[8] +DEBUG 06-24 20:06:33 [manager.py:391] +ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:207.63397216796875ms total_cost_time:207.65376091003418ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6155 prompt_cache_len:5151 prompt_cache_ratio:0.836880584890333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 +DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10345053672790527 s +INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10531258583068848 s +DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=252429212484329802484051078304187855971, time:1750766793.856358s req_ids:[8] +DEBUG 06-24 20:06:33 [manager.py:391] +ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:208.61434936523438ms total_cost_time:208.6338996887207ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6156 prompt_cache_len:5151 prompt_cache_ratio:0.8367446393762183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 +DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10409355163574219 s +INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.1061556339263916 s +DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=6472940603598857380552641733867814902, time:1750766794.0691113s req_ids:[8] +DEBUG 06-24 20:06:34 [manager.py:391] +ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:207.3190212249756ms total_cost_time:207.3376178741455ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6157 prompt_cache_len:5151 prompt_cache_ratio:0.8366087380217638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 +DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10341620445251465 s +INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10528111457824707 s +DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=4476805069943366652938146203316118081, time:1750766794.2795162s req_ids:[8] +DEBUG 06-24 20:06:34 [manager.py:391] +ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:207.2463035583496ms total_cost_time:207.26680755615234ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6158 prompt_cache_len:5151 prompt_cache_ratio:0.8364728808054563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 +DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10430383682250977 s +INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10614371299743652 s +DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=302005619389447012991952819556627360605, time:1750766794.4938734s req_ids:[8] +DEBUG 06-24 20:06:34 [manager.py:391] +ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:206.79545402526855ms total_cost_time:206.81452751159668ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6159 prompt_cache_len:5151 prompt_cache_ratio:0.8363370677057964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 +DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10430335998535156 s +INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10613608360290527 s +DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=332032272582930476422586949342274249412, time:1750766794.701615s req_ids:[8] +DEBUG 06-24 20:06:34 [manager.py:391] +ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:205.30390739440918ms total_cost_time:205.3244113922119ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6160 prompt_cache_len:5151 prompt_cache_ratio:0.8362012987012987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 +DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10301518440246582 s +INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10484004020690918 s +DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=333851615067819169426055893232590404915, time:1750766794.910514s req_ids:[8] +DEBUG 06-24 20:06:34 [manager.py:391] +ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:349.86233711242676ms total_cost_time:349.8823642730713ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6161 prompt_cache_len:5151 prompt_cache_ratio:0.8360655737704918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 +DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10310220718383789 s +INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10494661331176758 s +DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=291336576570570190445050849118329221926, time:1750766795.2574146s req_ids:[8] +DEBUG 06-24 20:06:35 [manager.py:391] +ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:199.2471218109131ms total_cost_time:199.2659568786621ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6162 prompt_cache_len:5151 prompt_cache_ratio:0.8359298928919182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 +DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10412168502807617 s +INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10608029365539551 s +DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=156860295167591743292688129874894849012, time:1750766795.467346s req_ids:[8] +DEBUG 06-24 20:06:35 [manager.py:391] +ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:207.63754844665527ms total_cost_time:207.6573371887207ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6163 prompt_cache_len:5151 prompt_cache_ratio:0.8357942560441344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 +DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10349726676940918 s +INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10544228553771973 s +DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=334727225578316448577823985276164232942, time:1750766795.6793237s req_ids:[8] +DEBUG 06-24 20:06:35 [manager.py:391] +DEBUG 06-24 20:06:35 [stats.py:37] Avg tokens(prompt+generate) throughput: 27318.235 tokens/s +DEBUG 06-24 20:06:35 [stats.py:37] Avg prompt tokens throughput: 27309.441 tokens/s +DEBUG 06-24 20:06:35 [stats.py:37] Avg generate tokens throughput: 8.794 tokens/s +ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:210.0365161895752ms total_cost_time:210.05535125732422ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6164 prompt_cache_len:5151 prompt_cache_ratio:0.8356586632057106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 +DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10286688804626465 s +INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10472989082336426 s +DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=143005380746227313454870115002179804716, time:1750766795.9022946s req_ids:[8] +DEBUG 06-24 20:06:35 [manager.py:391] +ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:214.3383026123047ms total_cost_time:214.35785293579102ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6165 prompt_cache_len:5151 prompt_cache_ratio:0.8355231143552312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 +DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10303282737731934 s +INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10496664047241211 s +DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=236052058903476792443385631655913286730, time:1750766796.1101658s req_ids:[8] +DEBUG 06-24 20:06:36 [manager.py:391] +ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:204.14185523986816ms total_cost_time:204.16021347045898ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6166 prompt_cache_len:5151 prompt_cache_ratio:0.8353876094712942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 +DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10411882400512695 s +INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10662460327148438 s +DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=242333186663282558248076690973249687968, time:1750766796.3190448s req_ids:[8] +DEBUG 06-24 20:06:36 [manager.py:391] +ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:207.48114585876465ms total_cost_time:207.50021934509277ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6167 prompt_cache_len:5151 prompt_cache_ratio:0.8352521485325117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 +DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10408401489257812 s +INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.1060020923614502 s +DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=127334321396271924451989612847392834481, time:1750766796.5293477s req_ids:[8] +DEBUG 06-24 20:06:36 [manager.py:391] +ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:206.2242031097412ms total_cost_time:206.24518394470215ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6168 prompt_cache_len:5151 prompt_cache_ratio:0.8351167315175098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 +DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10327529907226562 s +INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10522007942199707 s +DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=299900336200277287405435521368851611491, time:1750766796.740181s req_ids:[8] +DEBUG 06-24 20:06:36 [manager.py:391] +ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:206.7258358001709ms total_cost_time:206.74586296081543ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6169 prompt_cache_len:5151 prompt_cache_ratio:0.8349813584049278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 +DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.1044614315032959 s +INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10641098022460938 s +DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=262983140690848707483768694406291156567, time:1750766796.9512544s req_ids:[8] +DEBUG 06-24 20:06:36 [manager.py:391] +ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:211.61174774169922ms total_cost_time:211.63177490234375ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6170 prompt_cache_len:5151 prompt_cache_ratio:0.8348460291734198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 +DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10322833061218262 s +INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10515427589416504 s +DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=184326619287352883060671791297834522162, time:1750766797.164035s req_ids:[8] +DEBUG 06-24 20:06:37 [manager.py:391] +ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:205.49535751342773ms total_cost_time:205.51490783691406ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6171 prompt_cache_len:5151 prompt_cache_ratio:0.8347107438016529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 +DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10317730903625488 s +INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10503506660461426 s +DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=110844453664365176085731817877030912463, time:1750766797.3733673s req_ids:[8] +DEBUG 06-24 20:06:37 [manager.py:391] +ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:208.7550163269043ms total_cost_time:208.77361297607422ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6172 prompt_cache_len:5151 prompt_cache_ratio:0.8345755022683085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 +DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10453629493713379 s +INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10655426979064941 s +DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=143021464239665314309801794821729374873, time:1750766797.585675s req_ids:[8] +DEBUG 06-24 20:06:37 [manager.py:391] +ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:203.4311294555664ms total_cost_time:203.45067977905273ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6173 prompt_cache_len:5151 prompt_cache_ratio:0.8344403045520816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 +DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10401487350463867 s +INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10593509674072266 s +DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=95783369034580814269639877441897335712, time:1750766797.7920573s req_ids:[8] +DEBUG 06-24 20:06:37 [manager.py:391] +ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:367.5997257232666ms total_cost_time:367.61927604675293ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6174 prompt_cache_len:5151 prompt_cache_ratio:0.8343051506316812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 +DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10430145263671875 s +INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10635948181152344 s +DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=182465649665468414357396296172358654706, time:1750766798.1579986s req_ids:[8] +DEBUG 06-24 20:06:38 [manager.py:391] +DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:197.03984260559082ms total_cost_time:197.06273078918457ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6175 prompt_cache_len:5151 prompt_cache_ratio:0.83417004048583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 +DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10313987731933594 s +INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10503530502319336 s +DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=67433550744957107559143879422456748340, time:1750766798.3666937s req_ids:[8] +DEBUG 06-24 20:06:38 [manager.py:391] +ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:208.3439826965332ms total_cost_time:208.36353302001953ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6176 prompt_cache_len:5151 prompt_cache_ratio:0.8340349740932642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 +DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10311341285705566 s +INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10551595687866211 s +DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=274340827700731012035498431325864876539, time:1750766798.5774026s req_ids:[8] +DEBUG 06-24 20:06:38 [manager.py:391] +ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:206.76374435424805ms total_cost_time:206.78257942199707ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6177 prompt_cache_len:5151 prompt_cache_ratio:0.8338999514327343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 +DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10402250289916992 s +INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10584306716918945 s +DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=213846850821126457013818831797157702175, time:1750766798.78863s req_ids:[8] +DEBUG 06-24 20:06:38 [manager.py:391] +ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:203.45091819763184ms total_cost_time:203.46927642822266ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6178 prompt_cache_len:5151 prompt_cache_ratio:0.8337649724830042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 +DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10430240631103516 s +INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10632991790771484 s +DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=68544995257468624353025646154177443132, time:1750766798.992736s req_ids:[8] +DEBUG 06-24 20:06:38 [manager.py:391] +ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:201.82037353515625ms total_cost_time:201.84063911437988ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6179 prompt_cache_len:5151 prompt_cache_ratio:0.8336300372228516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 +DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10308575630187988 s +INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10507011413574219 s +DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=252022908310939251057633468845722367738, time:1750766799.201477s req_ids:[8] +DEBUG 06-24 20:06:39 [manager.py:391] +ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:209.2607021331787ms total_cost_time:209.28001403808594ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6180 prompt_cache_len:5151 prompt_cache_ratio:0.833495145631068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 +DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10311412811279297 s +INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10517549514770508 s +DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=25219512254762808842410937951525288431, time:1750766799.4265058s req_ids:[8] +DEBUG 06-24 20:06:39 [manager.py:391] +ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:217.18883514404297ms total_cost_time:217.2069549560547ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6181 prompt_cache_len:5151 prompt_cache_ratio:0.8333602976864585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 +DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10399603843688965 s +INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10604739189147949 s +DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=271346729542096737699955016952924407771, time:1750766799.6428423s req_ids:[8] +DEBUG 06-24 20:06:39 [manager.py:391] +ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:208.94336700439453ms total_cost_time:208.96315574645996ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6182 prompt_cache_len:5151 prompt_cache_ratio:0.8332254933678421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 +DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10331344604492188 s +INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10523438453674316 s +DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=182671081899220230471401592469597276619, time:1750766799.8473756s req_ids:[8] +DEBUG 06-24 20:06:39 [manager.py:391] +ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:201.94315910339355ms total_cost_time:201.96294784545898ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6183 prompt_cache_len:5151 prompt_cache_ratio:0.8330907326540514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 +DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10319900512695312 s +INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10564565658569336 s +DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=277174947382323183378112580624573963225, time:1750766800.0534825s req_ids:[8] +DEBUG 06-24 20:06:40 [manager.py:391] +ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:208.09149742126465ms total_cost_time:208.11080932617188ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6184 prompt_cache_len:5151 prompt_cache_ratio:0.8329560155239327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 +DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10322165489196777 s +INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.1051323413848877 s +DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=220627652321598522111115160464918740206, time:1750766800.2663488s req_ids:[8] +DEBUG 06-24 20:06:40 [manager.py:391] +ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:206.50863647460938ms total_cost_time:206.5277099609375ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6185 prompt_cache_len:5151 prompt_cache_ratio:0.832821341956346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 +DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10337185859680176 s +INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10518670082092285 s +DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=78318963350391885158059624854948249041, time:1750766800.4801636s req_ids:[8] +DEBUG 06-24 20:06:40 [manager.py:391] +ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:214.39647674560547ms total_cost_time:214.4150733947754ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6186 prompt_cache_len:5151 prompt_cache_ratio:0.8326867119301649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 +DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10411834716796875 s +INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10598087310791016 s +DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=169647311881210589823113231037780987484, time:1750766800.6932726s req_ids:[8] +DEBUG 06-24 20:06:40 [manager.py:391] +ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:205.49821853637695ms total_cost_time:205.51729202270508ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6187 prompt_cache_len:5151 prompt_cache_ratio:0.8325521254242767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 +DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10323953628540039 s +INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10518479347229004 s +DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=254358326656507889614853692253052500646, time:1750766800.9027264s req_ids:[8] +DEBUG 06-24 20:06:40 [manager.py:391] +ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:395.1089382171631ms total_cost_time:395.1294422149658ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6188 prompt_cache_len:5151 prompt_cache_ratio:0.8324175824175825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 +DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10402226448059082 s +INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.1060795783996582 s +DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=230859904132794620911184000909303711837, time:1750766801.299617s req_ids:[8] +DEBUG 06-24 20:06:41 [manager.py:391] +ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:201.52044296264648ms total_cost_time:201.5397548675537ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6189 prompt_cache_len:5151 prompt_cache_ratio:0.8322830828889967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 +DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10439944267272949 s +INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.10636425018310547 s +DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=119316076468777548694444272363925472556, time:1750766801.5083835s req_ids:[8] +DEBUG 06-24 20:06:41 [manager.py:391] +ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:223.29211235046387ms total_cost_time:223.3123779296875ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6190 prompt_cache_len:5151 prompt_cache_ratio:0.8321486268174475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 +DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10424399375915527 s +INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.10613846778869629 s +DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=185920297840509855886648159365815234920, time:1750766801.7281063s req_ids:[8] +DEBUG 06-24 20:06:41 [manager.py:391] +ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:208.9974880218506ms total_cost_time:209.0170383453369ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6191 prompt_cache_len:5151 prompt_cache_ratio:0.8320142141818769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 +DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10422992706298828 s +INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.10590147972106934 s +DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=145809805742492902515718657724567250407, time:1750766801.947688s req_ids:[8] +DEBUG 06-24 20:06:41 [manager.py:391] +ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:208.20069313049316ms total_cost_time:208.221435546875ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6192 prompt_cache_len:5151 prompt_cache_ratio:0.8318798449612403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 +DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10312366485595703 s +INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10494232177734375 s +DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=50987435135096950734258525826769416672, time:1750766802.158477s req_ids:[8] +DEBUG 06-24 20:06:42 [manager.py:391] +ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:217.50664710998535ms total_cost_time:217.52595901489258ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6193 prompt_cache_len:5151 prompt_cache_ratio:0.8317455191345067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 +DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10332465171813965 s +INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10443711280822754 s +DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=334874776106186390169014285899188065114, time:1750766802.3751802s req_ids:[8] +DEBUG 06-24 20:06:42 [manager.py:391] +ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:214.39099311828613ms total_cost_time:214.41030502319336ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6194 prompt_cache_len:5151 prompt_cache_ratio:0.8316112366806587 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 +DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10326886177062988 s +INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10509824752807617 s +DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=260422862321947458316288145973784263899, time:1750766802.5897987s req_ids:[8] +DEBUG 06-24 20:06:42 [manager.py:391] +ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:194.32640075683594ms total_cost_time:194.34523582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6195 prompt_cache_len:5151 prompt_cache_ratio:0.8314769975786925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 +DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10461735725402832 s +INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.1063685417175293 s +INFO 06-24 20:06:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=249308235477996095651910616905792056065, time:1750766802.7936606s req_ids:[8] +DEBUG 06-24 20:06:42 [manager.py:391] +ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:206.70318603515625ms total_cost_time:206.72249794006348ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6196 prompt_cache_len:5151 prompt_cache_ratio:0.8313428018076178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 +DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10488033294677734 s +INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10678529739379883 s +DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=181533786784591662249938083573719314708, time:1750766803.0051754s req_ids:[8] +DEBUG 06-24 20:06:43 [manager.py:391] +ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:207.15045928955078ms total_cost_time:207.1692943572998ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6197 prompt_cache_len:5151 prompt_cache_ratio:0.831208649346458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 +DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:43 [manager.py:224] router recive req id 8 cost time 0.10412096977233887 s +INFO 06-24 20:06:43 [manager.py:68] detokenization recv req id 8 cost time 0.10596418380737305 s +DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=109538553350496572726145691345586193581, time:1750766803.2160833s req_ids:[8] +DEBUG 06-24 20:06:43 [manager.py:391] +ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:207.48090744018555ms total_cost_time:207.50021934509277ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6198 prompt_cache_len:5151 prompt_cache_ratio:0.8310745401742498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 +DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:43 [manager.py:224] router recive req id 8 cost time 0.10294246673583984 s +INFO 06-24 20:06:43 [manager.py:68] detokenization recv req id 8 cost time 0.10469818115234375 s +DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=238703736818791049971439601774946443404, time:1750766803.4310713s req_ids:[8] +DEBUG 06-24 20:06:43 [manager.py:391] +ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:210.35265922546387ms total_cost_time:210.3719711303711ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6199 prompt_cache_len:5151 prompt_cache_ratio:0.8309404742700436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 +DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:43 [manager.py:224] router recive req id 8 cost time 0.10540556907653809 s +INFO 06-24 20:06:43 [manager.py:68] detokenization recv req id 8 cost time 0.10732626914978027 s +DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=223678429410222826247402301657597914347, time:1750766803.6550295s req_ids:[8] +DEBUG 06-24 20:06:43 [manager.py:391] +DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:404.99401092529297ms total_cost_time:405.03716468811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6200 prompt_cache_len:5151 prompt_cache_ratio:0.8308064516129032 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 +DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10730195045471191 s +INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.10953140258789062 s +DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=129089206010392321674126505504979631485, time:1750766804.0544941s req_ids:[8] +DEBUG 06-24 20:06:44 [manager.py:391] +ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:209.48481559753418ms total_cost_time:209.52987670898438ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6201 prompt_cache_len:5151 prompt_cache_ratio:0.8306724721819061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 +DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10790276527404785 s +INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.11055445671081543 s +DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=274580558183632382412876717582796393967, time:1750766804.2763412s req_ids:[8] +DEBUG 06-24 20:06:44 [manager.py:391] +ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:213.32550048828125ms total_cost_time:213.38391304016113ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6202 prompt_cache_len:5151 prompt_cache_ratio:0.8305385359561431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 +DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10883355140686035 s +INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.1107327938079834 s +DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=29222527585448756392550174064691259774, time:1750766804.4776585s req_ids:[8] +DEBUG 06-24 20:06:44 [manager.py:391] +ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:210.4020118713379ms total_cost_time:210.4473114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6203 prompt_cache_len:5151 prompt_cache_ratio:0.8304046429147187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 +DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10832619667053223 s +INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.11061882972717285 s +DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=311283468831619481868701538590607731076, time:1750766804.700409s req_ids:[8] +DEBUG 06-24 20:06:44 [manager.py:391] +ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:212.74065971374512ms total_cost_time:212.78786659240723ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6204 prompt_cache_len:5151 prompt_cache_ratio:0.8302707930367504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 +DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.1071622371673584 s +INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.10907649993896484 s +DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=11467304696616277608568626861512787192, time:1750766804.9270504s req_ids:[8] +DEBUG 06-24 20:06:44 [manager.py:391] +ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:221.24814987182617ms total_cost_time:221.29487991333008ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:6205 prompt_cache_len:5151 prompt_cache_ratio:0.8301369863013699 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 +DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10755300521850586 s +INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.10967040061950684 s +DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=121899999699021260392889196242249340422, time:1750766805.1445925s req_ids:[8] +DEBUG 06-24 20:06:45 [manager.py:391] +ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:203.92584800720215ms total_cost_time:203.97329330444336ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:6206 prompt_cache_len:5151 prompt_cache_ratio:0.8300032226877215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 +DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10819458961486816 s +INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.11011457443237305 s +DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=62139265564817838173973606682531378410, time:1750766805.3625162s req_ids:[8] +DEBUG 06-24 20:06:45 [manager.py:391] +ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:213.14048767089844ms total_cost_time:213.18411827087402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6207 prompt_cache_len:5151 prompt_cache_ratio:0.8298695021749638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 +DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10736441612243652 s +INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.10924363136291504 s +DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=171636578239590667322213797124729344498, time:1750766805.5734103s req_ids:[8] +DEBUG 06-24 20:06:45 [manager.py:391] +ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:204.57720756530762ms total_cost_time:204.6198844909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6208 prompt_cache_len:5151 prompt_cache_ratio:0.829735824742268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 +DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10900235176086426 s +INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.11114168167114258 s +DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=224668051744289019973723205584528606375, time:1750766805.7891595s req_ids:[8] +DEBUG 06-24 20:06:45 [manager.py:391] +DEBUG 06-24 20:06:45 [stats.py:37] Avg tokens(prompt+generate) throughput: 27548.044 tokens/s +DEBUG 06-24 20:06:45 [stats.py:37] Avg prompt tokens throughput: 27539.142 tokens/s +DEBUG 06-24 20:06:45 [stats.py:37] Avg generate tokens throughput: 8.902 tokens/s +ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:213.3009433746338ms total_cost_time:213.3462429046631ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6209 prompt_cache_len:5151 prompt_cache_ratio:0.8296021903688194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 +DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.1069183349609375 s +INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.10888552665710449 s +DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=142026440483083702224366022291479131288, time:1750766806.000784s req_ids:[8] +DEBUG 06-24 20:06:46 [manager.py:391] +ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:366.5480613708496ms total_cost_time:366.5933609008789ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6210 prompt_cache_len:5151 prompt_cache_ratio:0.8294685990338164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 +DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:46 [batch.py:51] router release req id 8 +INFO 06-24 20:06:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10657691955566406 s +INFO 06-24 20:06:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.10846877098083496 s +DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=273392915733129403376475945402901694394, time:1750766806.3655977s req_ids:[8] +DEBUG 06-24 20:06:46 [manager.py:391] +ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:196.54202461242676ms total_cost_time:196.58303260803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6211 prompt_cache_len:5151 prompt_cache_ratio:0.8293350507164707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 +DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10917329788208008 s +INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.11118197441101074 s +DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=305699887275326704054820597455406318012, time:1750766806.5731392s req_ids:[8] +DEBUG 06-24 20:06:46 [manager.py:391] +ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:198.47774505615234ms total_cost_time:198.52066040039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6212 prompt_cache_len:5151 prompt_cache_ratio:0.8292015453960078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 +DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10764551162719727 s +INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.10951662063598633 s +DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=266878638444826133262585614104181434482, time:1750766806.7766533s req_ids:[8] +DEBUG 06-24 20:06:46 [manager.py:391] +ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:199.54538345336914ms total_cost_time:199.58806037902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6213 prompt_cache_len:5151 prompt_cache_ratio:0.8290680830516659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 +DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10656952857971191 s +INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.10836505889892578 s +DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=246813748868512437775352192216081579281, time:1750766806.9820025s req_ids:[8] +DEBUG 06-24 20:06:46 [manager.py:391] +ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:200.48856735229492ms total_cost_time:200.5324363708496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6214 prompt_cache_len:5151 prompt_cache_ratio:0.8289346636626972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 +DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.1078941822052002 s +INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.10967326164245605 s +DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=134260068625676113925637427492131411216, time:1750766807.1867747s req_ids:[8] +DEBUG 06-24 20:06:47 [manager.py:391] +ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:200.62756538391113ms total_cost_time:200.6702423095703ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6215 prompt_cache_len:5151 prompt_cache_ratio:0.8288012872083669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 +DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.10832095146179199 s +INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.11028814315795898 s +DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=308530330604852826819614108825896848946, time:1750766807.3954499s req_ids:[8] +DEBUG 06-24 20:06:47 [manager.py:391] +ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:205.66153526306152ms total_cost_time:205.7032585144043ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6216 prompt_cache_len:5151 prompt_cache_ratio:0.8286679536679536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 +DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.10776114463806152 s +INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.10967135429382324 s +DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=85663535150515529166413223636137479560, time:1750766807.6048172s req_ids:[8] +DEBUG 06-24 20:06:47 [manager.py:391] +ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:202.73160934448242ms total_cost_time:202.77714729309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6217 prompt_cache_len:5151 prompt_cache_ratio:0.8285346630207495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 +DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.1067345142364502 s +INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.10862588882446289 s +DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=109039265709184128189277658901280970833, time:1750766807.8154387s req_ids:[8] +DEBUG 06-24 20:06:47 [manager.py:391] +ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:205.98244667053223ms total_cost_time:206.02869987487793ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6218 prompt_cache_len:5151 prompt_cache_ratio:0.8284014152460598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 +DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10805654525756836 s +INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.11022210121154785 s +DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=221001382116346561839749205486953955098, time:1750766808.0252206s req_ids:[8] +DEBUG 06-24 20:06:48 [manager.py:391] +ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:202.30531692504883ms total_cost_time:202.347993850708ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6219 prompt_cache_len:5151 prompt_cache_ratio:0.8282682103232031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 +DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10684490203857422 s +INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.10876941680908203 s +DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=302157331811672016174837861696821348702, time:1750766808.2332687s req_ids:[8] +DEBUG 06-24 20:06:48 [manager.py:391] +ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:204.38170433044434ms total_cost_time:204.42438125610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6220 prompt_cache_len:5151 prompt_cache_ratio:0.8281350482315113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 +DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10763430595397949 s +INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.11004114151000977 s +DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=204819967202021584442150035400988437551, time:1750766808.4419105s req_ids:[8] +DEBUG 06-24 20:06:48 [manager.py:391] +ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:205.69634437561035ms total_cost_time:205.73997497558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6221 prompt_cache_len:5151 prompt_cache_ratio:0.8280019289503295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 +DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.20728421211242676 s +INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.20887303352355957 s +DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=253577637024759995790024656554170786603, time:1750766808.7463598s req_ids:[8] +DEBUG 06-24 20:06:48 [manager.py:391] +ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:254.15349006652832ms total_cost_time:254.1940212249756ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:6222 prompt_cache_len:5151 prompt_cache_ratio:0.8278688524590164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 +DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10745429992675781 s +INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.1093282699584961 s +DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=148159794852021763307896848468820039632, time:1750766808.9071126s req_ids:[8] +DEBUG 06-24 20:06:48 [manager.py:391] +ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:193.20249557495117ms total_cost_time:193.24660301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6223 prompt_cache_len:5151 prompt_cache_ratio:0.8277358187369436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 +DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10662388801574707 s +INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.10866904258728027 s +DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=209085834863442157280202752124128141146, time:1750766809.110937s req_ids:[8] +DEBUG 06-24 20:06:49 [manager.py:391] +ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:203.71294021606445ms total_cost_time:203.75609397888184ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6224 prompt_cache_len:5151 prompt_cache_ratio:0.8276028277634961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 +DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10829401016235352 s +INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.11018681526184082 s +DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=335171324055220064096823808819897812607, time:1750766809.3196244s req_ids:[8] +DEBUG 06-24 20:06:49 [manager.py:391] +ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:202.64410972595215ms total_cost_time:202.74066925048828ms,out_token_counter:1 mean_per_token_cost_time: 0.09655952453613281ms prompt_token_num:6225 prompt_cache_len:5151 prompt_cache_ratio:0.8274698795180723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 +DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.1082005500793457 s +INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.11034369468688965 s +DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=164680843762860334860357277778222115978, time:1750766809.5286891s req_ids:[8] +DEBUG 06-24 20:06:49 [manager.py:391] +ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:202.66151428222656ms total_cost_time:202.70323753356934ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6226 prompt_cache_len:5151 prompt_cache_ratio:0.8273369739800835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 +DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10777735710144043 s +INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.10981535911560059 s +DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=87545499633224233074531612892889704748, time:1750766809.738937s req_ids:[8] +DEBUG 06-24 20:06:49 [manager.py:391] +ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:208.34994316101074ms total_cost_time:208.39238166809082ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6227 prompt_cache_len:5151 prompt_cache_ratio:0.8272041111289545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 +DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10716462135314941 s +INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.10915470123291016 s +DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=296706002826935978710635342014476682588, time:1750766809.9653463s req_ids:[8] +DEBUG 06-24 20:06:49 [manager.py:391] +ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:219.23303604125977ms total_cost_time:219.27595138549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6228 prompt_cache_len:5151 prompt_cache_ratio:0.8270712909441233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 +DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.1068580150604248 s +INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10877799987792969 s +DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=300437126235879746737221230586241630333, time:1750766810.1783924s req_ids:[8] +DEBUG 06-24 20:06:50 [manager.py:391] +ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:207.77654647827148ms total_cost_time:207.81946182250977ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6229 prompt_cache_len:5151 prompt_cache_ratio:0.826938513405041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 +DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.10719418525695801 s +INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10918426513671875 s +DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=246924350557468428398411045117208200383, time:1750766810.390594s req_ids:[8] +DEBUG 06-24 20:06:50 [manager.py:391] +ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:205.86395263671875ms total_cost_time:205.90758323669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6230 prompt_cache_len:5151 prompt_cache_ratio:0.8268057784911718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 +DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.10747575759887695 s +INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10946869850158691 s +DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=291651901307532973010767736686075293355, time:1750766810.601247s req_ids:[8] +DEBUG 06-24 20:06:50 [manager.py:391] +ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:205.4286003112793ms total_cost_time:205.47151565551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6231 prompt_cache_len:5151 prompt_cache_ratio:0.8266730861819933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 +DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.10773587226867676 s +INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972976684570312 s +DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=96089753093821298213595850066179703477, time:1750766810.8131654s req_ids:[8] +DEBUG 06-24 20:06:50 [manager.py:391] +ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:206.81262016296387ms total_cost_time:206.85553550720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6232 prompt_cache_len:5151 prompt_cache_ratio:0.8265404364569962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 +DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10669112205505371 s +INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.10853266716003418 s +DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=40826812959059000403406219261630428467, time:1750766811.0239308s req_ids:[8] +DEBUG 06-24 20:06:51 [manager.py:391] +ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:215.85488319396973ms total_cost_time:215.8958911895752ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6233 prompt_cache_len:5151 prompt_cache_ratio:0.8264078292956842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 +DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10732531547546387 s +INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.1092824935913086 s +DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=164039272474673313599094395242539904108, time:1750766811.242305s req_ids:[8] +DEBUG 06-24 20:06:51 [manager.py:391] +ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:346.759557723999ms total_cost_time:346.8043804168701ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6234 prompt_cache_len:5151 prompt_cache_ratio:0.8262752646775746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 +DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10660624504089355 s +INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.1085355281829834 s +DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=300933581240107300780060504450370359901, time:1750766811.5903049s req_ids:[8] +DEBUG 06-24 20:06:51 [manager.py:391] +ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:195.3415870666504ms total_cost_time:195.38354873657227ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6235 prompt_cache_len:5151 prompt_cache_ratio:0.8261427425821972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 +DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s +INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s +DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=16910913011531094755825903362567027860, time:1750766811.796252s req_ids:[8] +DEBUG 06-24 20:06:51 [manager.py:391] +ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:199.65267181396484ms total_cost_time:199.69487190246582ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6236 prompt_cache_len:5151 prompt_cache_ratio:0.8260102629890955 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 +DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.1065359115600586 s +INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.10842132568359375 s +DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=78818862649933872031442039225507011228, time:1750766811.9922535s req_ids:[8] +DEBUG 06-24 20:06:51 [manager.py:391] +ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:182.1267604827881ms total_cost_time:182.16991424560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6237 prompt_cache_len:5151 prompt_cache_ratio:0.8258778258778259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 +DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.10797667503356934 s +INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.10986804962158203 s +DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=85419056267207516503026081972988258446, time:1750766812.1870546s req_ids:[8] +DEBUG 06-24 20:06:52 [manager.py:391] +ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:199.815034866333ms total_cost_time:199.85675811767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6238 prompt_cache_len:5151 prompt_cache_ratio:0.8257454312279576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 +DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.1072838306427002 s +INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.10918402671813965 s +DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=114045466805254336772843708094003856883, time:1750766812.4078274s req_ids:[8] +DEBUG 06-24 20:06:52 [manager.py:391] +ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:218.57929229736328ms total_cost_time:218.62196922302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6239 prompt_cache_len:5151 prompt_cache_ratio:0.8256130790190735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 +DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.10686635971069336 s +INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.10872793197631836 s +DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=53933632271953530123873540843571492602, time:1750766812.619721s req_ids:[8] +DEBUG 06-24 20:06:52 [manager.py:391] +ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:202.91924476623535ms total_cost_time:202.97908782958984ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:6240 prompt_cache_len:5151 prompt_cache_ratio:0.8254807692307692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 +DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s +INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.11093783378601074 s +DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=160183513984481951008537484254434947779, time:1750766812.823167s req_ids:[8] +DEBUG 06-24 20:06:52 [manager.py:391] +ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:195.20235061645508ms total_cost_time:195.25718688964844ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:6241 prompt_cache_len:5151 prompt_cache_ratio:0.8253485018426534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 +DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10863232612609863 s +INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.11055445671081543 s +DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=166742508698764139519972053962035536941, time:1750766813.0258133s req_ids:[8] +DEBUG 06-24 20:06:53 [manager.py:391] +ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:197.1435546875ms total_cost_time:197.18647003173828ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6242 prompt_cache_len:5151 prompt_cache_ratio:0.825216276834348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 +DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:53 [batch.py:51] router release req id 8 +INFO 06-24 20:06:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:06:53 [statics_utils.py:24] mean first cost: 240.26094247783655 ms +INFO 06-24 20:06:53 [statics_utils.py:24] mean per token cost: 0.12467752169683273 ms +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10694599151611328 s +INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.10882019996643066 s +INFO 06-24 20:06:53 [manager.py:620] left req id 8can release False refcount 3 +DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=218407537650659866215547164604819415142, time:1750766813.2296014s req_ids:[8] +DEBUG 06-24 20:06:53 [manager.py:391] +ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:201.85470581054688ms total_cost_time:201.89666748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6243 prompt_cache_len:5151 prompt_cache_ratio:0.8250840941854878 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 +DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:53 [batch.py:51] router release req id 8 +INFO 06-24 20:06:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10700011253356934 s +INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.10888886451721191 s +DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=332238364146753330790334037284437236205, time:1750766813.4536078s req_ids:[8] +DEBUG 06-24 20:06:53 [manager.py:391] +ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:218.49966049194336ms total_cost_time:218.55688095092773ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6244 prompt_cache_len:5151 prompt_cache_ratio:0.8249519538757207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 +DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10814261436462402 s +INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.11003708839416504 s +DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=182149710840249271173379627299389147373, time:1750766813.6589174s req_ids:[8] +DEBUG 06-24 20:06:53 [manager.py:391] +ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:200.4873752593994ms total_cost_time:200.52862167358398ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6245 prompt_cache_len:5151 prompt_cache_ratio:0.8248198558847077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 +DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10765600204467773 s +INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.1095285415649414 s +DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=45256018481458876725907852320652525514, time:1750766813.865812s req_ids:[8] +DEBUG 06-24 20:06:53 [manager.py:391] +ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:201.10702514648438ms total_cost_time:201.14874839782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6246 prompt_cache_len:5151 prompt_cache_ratio:0.824687800192123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 +DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10970616340637207 s +INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.1114950180053711 s +DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=65634498828482663384644033816235928042, time:1750766814.0750844s req_ids:[8] +DEBUG 06-24 20:06:54 [manager.py:391] +ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:331.79616928100586ms total_cost_time:331.84051513671875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6247 prompt_cache_len:5151 prompt_cache_ratio:0.8245557867776533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 +DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10823464393615723 s +INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s +DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=332597275972844949813723299032572109235, time:1750766814.4025528s req_ids:[8] +DEBUG 06-24 20:06:54 [manager.py:391] +ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:186.49959564208984ms total_cost_time:186.54298782348633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6248 prompt_cache_len:5151 prompt_cache_ratio:0.8244238156209988 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 +DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10712909698486328 s +INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.10910415649414062 s +DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=130040751941261851027312512392497380335, time:1750766814.6009617s req_ids:[8] +DEBUG 06-24 20:06:54 [manager.py:391] +ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:199.02896881103516ms total_cost_time:199.07259941101074ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6249 prompt_cache_len:5151 prompt_cache_ratio:0.8242918867018723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 +DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10797381401062012 s +INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.10961174964904785 s +DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=77434561325384223784516655122473469878, time:1750766814.8076317s req_ids:[8] +DEBUG 06-24 20:06:54 [manager.py:391] +ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:204.2853832244873ms total_cost_time:204.33878898620605ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:6250 prompt_cache_len:5151 prompt_cache_ratio:0.82416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 +DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10884523391723633 s +INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.11082124710083008 s +DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=105930493638842463816699007718508160007, time:1750766815.017161s req_ids:[8] +DEBUG 06-24 20:06:55 [manager.py:391] +ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:204.80632781982422ms total_cost_time:204.8487663269043ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6251 prompt_cache_len:5151 prompt_cache_ratio:0.8240281554951208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 +DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10810351371765137 s +INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.11004281044006348 s +DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=34186843696439327860141355779217284187, time:1750766815.2269428s req_ids:[8] +DEBUG 06-24 20:06:55 [manager.py:391] +ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:202.59785652160645ms total_cost_time:202.64434814453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:6252 prompt_cache_len:5151 prompt_cache_ratio:0.8238963531669866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 +DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.1072838306427002 s +INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.10922074317932129 s +DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=223347132229164407715252728174003840411, time:1750766815.4361951s req_ids:[8] +DEBUG 06-24 20:06:55 [manager.py:391] +ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:213.54317665100098ms total_cost_time:213.58561515808105ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6253 prompt_cache_len:5151 prompt_cache_ratio:0.8237645929953622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 +DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10922884941101074 s +INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.1112070083618164 s +DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=216880376403258175119033497322489105606, time:1750766815.6509798s req_ids:[8] +DEBUG 06-24 20:06:55 [manager.py:391] +ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:196.22468948364258ms total_cost_time:196.26808166503906ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6254 prompt_cache_len:5151 prompt_cache_ratio:0.8236328749600256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 +DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10836195945739746 s +INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.11027145385742188 s +DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=210224789270281068689775519564836292732, time:1750766815.856839s req_ids:[8] +DEBUG 06-24 20:06:55 [manager.py:391] +DEBUG 06-24 20:06:55 [stats.py:37] Avg tokens(prompt+generate) throughput: 28485.836 tokens/s +DEBUG 06-24 20:06:55 [stats.py:37] Avg prompt tokens throughput: 28476.698 tokens/s +DEBUG 06-24 20:06:55 [stats.py:37] Avg generate tokens throughput: 9.138 tokens/s +ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:202.15725898742676ms total_cost_time:202.20160484313965ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6255 prompt_cache_len:5151 prompt_cache_ratio:0.8235011990407674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 +DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.1064293384552002 s +INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10833048820495605 s +DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=10059680914770981924600929935817399605, time:1750766816.0652816s req_ids:[8] +DEBUG 06-24 20:06:56 [manager.py:391] +ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:211.09771728515625ms total_cost_time:211.14110946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6256 prompt_cache_len:5151 prompt_cache_ratio:0.8233695652173914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 +DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10664820671081543 s +INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10855364799499512 s +DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=250605462008026985249952580069170181870, time:1750766816.2881517s req_ids:[8] +DEBUG 06-24 20:06:56 [manager.py:391] +DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:210.81256866455078ms total_cost_time:210.85691452026367ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6257 prompt_cache_len:5151 prompt_cache_ratio:0.823237973469714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 +DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10656118392944336 s +INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.1084437370300293 s +DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=266275702577405762715278288984290961619, time:1750766816.496682s req_ids:[8] +DEBUG 06-24 20:06:56 [manager.py:391] +ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:202.66222953796387ms total_cost_time:202.70442962646484ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6258 prompt_cache_len:5151 prompt_cache_ratio:0.8231064237775647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 +DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10759592056274414 s +INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10963559150695801 s +DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=52680681458027716104963416745762809161, time:1750766816.7081451s req_ids:[8] +DEBUG 06-24 20:06:56 [manager.py:391] +ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:207.322359085083ms total_cost_time:207.3662281036377ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6259 prompt_cache_len:5151 prompt_cache_ratio:0.8229749161207861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 +DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:56 [batch.py:51] router release req id 8 +INFO 06-24 20:06:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10671734809875488 s +INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10856771469116211 s +DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=161459789615312660008044195983531698770, time:1750766816.9282625s req_ids:[8] +DEBUG 06-24 20:06:56 [manager.py:391] +ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:213.0436897277832ms total_cost_time:213.0882740020752ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6260 prompt_cache_len:5151 prompt_cache_ratio:0.8228434504792332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 +DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10694146156311035 s +INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.10882925987243652 s +DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=176693256279375892864322822773122197327, time:1750766817.137513s req_ids:[8] +DEBUG 06-24 20:06:57 [manager.py:391] +ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:366.4078712463379ms total_cost_time:366.45030975341797ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6261 prompt_cache_len:5151 prompt_cache_ratio:0.8227120268327743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 +DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10706925392150879 s +INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.10900592803955078 s +DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=87983332168444301666264180499709868541, time:1750766817.5006573s req_ids:[8] +DEBUG 06-24 20:06:57 [manager.py:391] +ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:196.25568389892578ms total_cost_time:196.29836082458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6262 prompt_cache_len:5151 prompt_cache_ratio:0.8225806451612904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 +DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10747027397155762 s +INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.10936450958251953 s +DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=167020859558833719829316254360198560208, time:1750766817.7020197s req_ids:[8] +DEBUG 06-24 20:06:57 [manager.py:391] +ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:193.99094581604004ms total_cost_time:194.03457641601562ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6263 prompt_cache_len:5151 prompt_cache_ratio:0.8224493054446751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 +DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10661840438842773 s +INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.1085209846496582 s +DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=324110138131552846090648391225548136608, time:1750766817.9055629s req_ids:[8] +DEBUG 06-24 20:06:57 [manager.py:391] +ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:202.94427871704102ms total_cost_time:202.9893398284912ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6264 prompt_cache_len:5151 prompt_cache_ratio:0.8223180076628352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 +DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10723733901977539 s +INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.1091461181640625 s +DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=38251015215320874434302751048638888587, time:1750766818.1147664s req_ids:[8] +DEBUG 06-24 20:06:58 [manager.py:391] +ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:196.10095024108887ms total_cost_time:196.14434242248535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6265 prompt_cache_len:5151 prompt_cache_ratio:0.8221867517956903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 +DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s +INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.11023616790771484 s +DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=9660085581430418594847180062588788319, time:1750766818.3151138s req_ids:[8] +DEBUG 06-24 20:06:58 [manager.py:391] +ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:200.51956176757812ms total_cost_time:200.5615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6266 prompt_cache_len:5151 prompt_cache_ratio:0.8220555378231726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 +DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10734724998474121 s +INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.10916566848754883 s +DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=16772820196595019676616217413099751234, time:1750766818.5233934s req_ids:[8] +DEBUG 06-24 20:06:58 [manager.py:391] +ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:205.16347885131836ms total_cost_time:205.20544052124023ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6267 prompt_cache_len:5151 prompt_cache_ratio:0.8219243657252274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 +DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.1067202091217041 s +INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.1086726188659668 s +DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=271289577921490993594641244974333103262, time:1750766818.7322161s req_ids:[8] +DEBUG 06-24 20:06:58 [manager.py:391] +ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:221.5423583984375ms total_cost_time:221.58527374267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6268 prompt_cache_len:5151 prompt_cache_ratio:0.8217932354818124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 +DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10800552368164062 s +INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.10998249053955078 s +DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=124357690349298273630683956741641857356, time:1750766818.9531121s req_ids:[8] +DEBUG 06-24 20:06:58 [manager.py:391] +ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:194.4746971130371ms total_cost_time:194.5192813873291ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6269 prompt_cache_len:5151 prompt_cache_ratio:0.8216621470728984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 +DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10786676406860352 s +INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10974884033203125 s +DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=132784692881031485489833489246918940542, time:1750766819.1633801s req_ids:[8] +DEBUG 06-24 20:06:59 [manager.py:391] +ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:209.5623016357422ms total_cost_time:209.60474014282227ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6270 prompt_cache_len:5151 prompt_cache_ratio:0.8215311004784689 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 +DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10734248161315918 s +INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10926032066345215 s +DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=123968070347101568990641047573881143169, time:1750766819.3750129s req_ids:[8] +DEBUG 06-24 20:06:59 [manager.py:391] +ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:212.02874183654785ms total_cost_time:212.07141876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6271 prompt_cache_len:5151 prompt_cache_ratio:0.8214000956785201 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 +DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10727787017822266 s +INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10912442207336426 s +DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=307802305702148274201862609672065561028, time:1750766819.600406s req_ids:[8] +DEBUG 06-24 20:06:59 [manager.py:391] +ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:209.84244346618652ms total_cost_time:209.8853588104248ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6272 prompt_cache_len:5151 prompt_cache_ratio:0.8212691326530612 mtp_avg_token_per_step:1.0 +INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 +DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:06:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10697102546691895 s +INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10884475708007812 s +DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=302351527808587643478256935575585367699, time:1750766819.8060408s req_ids:[8] +DEBUG 06-24 20:06:59 [manager.py:391] +ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:366.01781845092773ms total_cost_time:366.0621643066406ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6273 prompt_cache_len:5151 prompt_cache_ratio:0.8211382113821138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 +DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s +INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10961246490478516 s +DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=222700841902183682758413966446678588761, time:1750766820.1714184s req_ids:[8] +DEBUG 06-24 20:07:00 [manager.py:391] +ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:199.37920570373535ms total_cost_time:199.42307472229004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6274 prompt_cache_len:5151 prompt_cache_ratio:0.8210073318457125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 +DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10677456855773926 s +INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10863423347473145 s +INFO 06-24 20:07:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=197730867870280356015220970242127861243, time:1750766820.381235s req_ids:[8] +DEBUG 06-24 20:07:00 [manager.py:391] +ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:207.55910873413086ms total_cost_time:207.60202407836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6275 prompt_cache_len:5151 prompt_cache_ratio:0.8208764940239044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 +DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10723495483398438 s +INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10901212692260742 s +DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=233151851286911992311521907088453205328, time:1750766820.5982914s req_ids:[8] +DEBUG 06-24 20:07:00 [manager.py:391] +ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:211.95673942565918ms total_cost_time:211.99917793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6276 prompt_cache_len:5151 prompt_cache_ratio:0.8207456978967496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 +DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10640764236450195 s +INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10834455490112305 s +DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=285505395774361927297530496356465379467, time:1750766820.8113303s req_ids:[8] +DEBUG 06-24 20:07:00 [manager.py:391] +ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:207.43894577026367ms total_cost_time:207.48066902160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6277 prompt_cache_len:5151 prompt_cache_ratio:0.8206149434443205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 +DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.1066133975982666 s +INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10844779014587402 s +DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=335461953799354196498880748715811272009, time:1750766821.0242538s req_ids:[8] +DEBUG 06-24 20:07:01 [manager.py:391] +ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:207.3826789855957ms total_cost_time:207.4275016784668ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6278 prompt_cache_len:5151 prompt_cache_ratio:0.8204842306467027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 +DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10675048828125 s +INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10863375663757324 s +DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=73641268550376281433009704859033172918, time:1750766821.233939s req_ids:[8] +DEBUG 06-24 20:07:01 [manager.py:391] +ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:206.97426795959473ms total_cost_time:207.01980590820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6279 prompt_cache_len:5151 prompt_cache_ratio:0.8203535594839942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 +DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10720539093017578 s +INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10905885696411133 s +DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=171288799635770241690022970561266420750, time:1750766821.4456205s req_ids:[8] +DEBUG 06-24 20:07:01 [manager.py:391] +ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:206.223726272583ms total_cost_time:206.2814235687256ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:6280 prompt_cache_len:5151 prompt_cache_ratio:0.8202229299363057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 +DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10727858543395996 s +INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.1091909408569336 s +DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=42353602582363933174833826509349783660, time:1750766821.6573157s req_ids:[8] +DEBUG 06-24 20:07:01 [manager.py:391] +ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:207.64470100402832ms total_cost_time:207.6873779296875ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6281 prompt_cache_len:5151 prompt_cache_ratio:0.8200923419837606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 +DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10759615898132324 s +INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s +DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=25270987587876083140233900462483169127, time:1750766821.8686085s req_ids:[8] +DEBUG 06-24 20:07:01 [manager.py:391] +DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:206.4368724822998ms total_cost_time:206.4800262451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6282 prompt_cache_len:5151 prompt_cache_ratio:0.8199617956064947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 +DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10627222061157227 s +INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.10811400413513184 s +DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=79964922483371076737724541484025367847, time:1750766822.0804527s req_ids:[8] +DEBUG 06-24 20:07:02 [manager.py:391] +ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:370.09429931640625ms total_cost_time:370.13864517211914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6283 prompt_cache_len:5151 prompt_cache_ratio:0.819831290784657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 +DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s +INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.10991287231445312 s +DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=267673378373259684656720675566839986487, time:1750766822.4501612s req_ids:[8] +DEBUG 06-24 20:07:02 [manager.py:391] +ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:199.59378242492676ms total_cost_time:199.63645935058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6284 prompt_cache_len:5151 prompt_cache_ratio:0.8197008274984087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 +DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10738730430603027 s +INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.1092677116394043 s +DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=85288884119915974765861794932711927272, time:1750766822.6613731s req_ids:[8] +DEBUG 06-24 20:07:02 [manager.py:391] +ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:206.69126510620117ms total_cost_time:206.71892166137695ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:6285 prompt_cache_len:5151 prompt_cache_ratio:0.8195704057279236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 +DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10660934448242188 s +INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.10843873023986816 s +DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=250322631767396809232977903781263862389, time:1750766822.8744023s req_ids:[8] +DEBUG 06-24 20:07:02 [manager.py:391] +ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:209.98477935791016ms total_cost_time:210.01172065734863ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6286 prompt_cache_len:5151 prompt_cache_ratio:0.8194400254533885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 +DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10694265365600586 s +INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10878276824951172 s +DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=177574486411429186707302830692711678300, time:1750766823.086578s req_ids:[8] +DEBUG 06-24 20:07:03 [manager.py:391] +ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:207.69429206848145ms total_cost_time:207.72242546081543ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:6287 prompt_cache_len:5151 prompt_cache_ratio:0.8193096866550024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 +DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10679078102111816 s +INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10862374305725098 s +DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=51277520975690333125523558163218814845, time:1750766823.3048594s req_ids:[8] +DEBUG 06-24 20:07:03 [manager.py:391] +ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:214.9331569671631ms total_cost_time:214.96081352233887ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:6288 prompt_cache_len:5151 prompt_cache_ratio:0.8191793893129771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 +DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10646820068359375 s +INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10824942588806152 s +DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=5766265000539410180438159610318032100, time:1750766823.5238473s req_ids:[8] +DEBUG 06-24 20:07:03 [manager.py:391] +ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:175.83990097045898ms total_cost_time:175.86827278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:6289 prompt_cache_len:5151 prompt_cache_ratio:0.8190491334075369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 +DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10739779472351074 s +INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10916614532470703 s +DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=16684104428779221276763783555591099387, time:1750766823.692883s req_ids:[8] +DEBUG 06-24 20:07:03 [manager.py:391] +ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:216.68529510498047ms total_cost_time:216.71175956726074ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:6290 prompt_cache_len:5151 prompt_cache_ratio:0.8189189189189189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 +DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:03 [batch.py:51] router release req id 8 +INFO 06-24 20:07:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10528826713562012 s +INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.1070716381072998 s +DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=24762815047925415713503284958301536277, time:1750766823.911648s req_ids:[8] +DEBUG 06-24 20:07:03 [manager.py:391] +ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:197.3257064819336ms total_cost_time:197.35407829284668ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:6291 prompt_cache_len:5151 prompt_cache_ratio:0.8187887458273725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 +DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.10687541961669922 s +INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.10869812965393066 s +DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=255011625029333719041046047767468977013, time:1750766824.1245944s req_ids:[8] +DEBUG 06-24 20:07:04 [manager.py:391] +ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:214.02645111083984ms total_cost_time:214.05529975891113ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:6292 prompt_cache_len:5151 prompt_cache_ratio:0.8186586141131595 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 +DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.10755395889282227 s +INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.10945653915405273 s +DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=266104762228652156360118773751536407822, time:1750766824.3392954s req_ids:[8] +DEBUG 06-24 20:07:04 [manager.py:391] +ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:209.02752876281738ms total_cost_time:209.05494689941406ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:6293 prompt_cache_len:5151 prompt_cache_ratio:0.8185285237565549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 +DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.10691380500793457 s +INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.10870242118835449 s +DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=97126666895751136968492784880361397351, time:1750766824.5633032s req_ids:[8] +DEBUG 06-24 20:07:04 [manager.py:391] +ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:219.9697494506836ms total_cost_time:220.01242637634277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6294 prompt_cache_len:5151 prompt_cache_ratio:0.8183984747378455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 +DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.2067258358001709 s +INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.20825886726379395 s +DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=51649544520622392456444384656326100225, time:1750766824.8679576s req_ids:[8] +DEBUG 06-24 20:07:04 [manager.py:391] +DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:257.5376033782959ms total_cost_time:257.56239891052246ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:6295 prompt_cache_len:5151 prompt_cache_ratio:0.8182684670373312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 +DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10592269897460938 s +INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10780072212219238 s +DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=96468283147581372195952446556949833115, time:1750766825.028596s req_ids:[8] +DEBUG 06-24 20:07:05 [manager.py:391] +ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:193.63164901733398ms total_cost_time:193.65978240966797ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:6296 prompt_cache_len:5151 prompt_cache_ratio:0.818138500635324 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 +DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10592317581176758 s +INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.1077120304107666 s +DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=120227863708073623699945365080452543162, time:1750766825.2335815s req_ids:[8] +DEBUG 06-24 20:07:05 [manager.py:391] +ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:203.60136032104492ms total_cost_time:203.64618301391602ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6297 prompt_cache_len:5151 prompt_cache_ratio:0.8180085755121487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 +DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.1078500747680664 s +INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10978126525878906 s +DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=276007112557467347048263257914014445458, time:1750766825.4390893s req_ids:[8] +DEBUG 06-24 20:07:05 [manager.py:391] +ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:205.16705513000488ms total_cost_time:205.20925521850586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6298 prompt_cache_len:5151 prompt_cache_ratio:0.8178786916481423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 +DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10771870613098145 s +INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10953354835510254 s +DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=298385317018520693020262406703948482418, time:1750766825.6500566s req_ids:[8] +DEBUG 06-24 20:07:05 [manager.py:391] +ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:205.94000816345215ms total_cost_time:205.98602294921875ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6299 prompt_cache_len:5151 prompt_cache_ratio:0.8177488490236545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 +DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10706138610839844 s +INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10888218879699707 s +DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=103505822365411880605617934658433225513, time:1750766825.8600256s req_ids:[8] +DEBUG 06-24 20:07:05 [manager.py:391] +DEBUG 06-24 20:07:05 [stats.py:37] Avg tokens(prompt+generate) throughput: 28250.717 tokens/s +DEBUG 06-24 20:07:05 [stats.py:37] Avg prompt tokens throughput: 28241.720 tokens/s +DEBUG 06-24 20:07:05 [stats.py:37] Avg generate tokens throughput: 8.997 tokens/s +ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:206.23183250427246ms total_cost_time:206.27522468566895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6300 prompt_cache_len:5151 prompt_cache_ratio:0.8176190476190476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 +DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.10689926147460938 s +INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10864400863647461 s +DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=230687595483476645233685325472748887999, time:1750766826.0723994s req_ids:[8] +DEBUG 06-24 20:07:06 [manager.py:391] +ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:204.40101623535156ms total_cost_time:204.44464683532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6301 prompt_cache_len:5151 prompt_cache_ratio:0.8174892874146961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 +DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.10797691345214844 s +INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10982036590576172 s +DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=182785419055422512682711868491062587720, time:1750766826.2811992s req_ids:[8] +DEBUG 06-24 20:07:06 [manager.py:391] +ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:206.9571018218994ms total_cost_time:207.01074600219727ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:6302 prompt_cache_len:5151 prompt_cache_ratio:0.817359568390987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 +DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.1086881160736084 s +INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.1105799674987793 s +DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=297151291847982058467551477555438132616, time:1750766826.4940038s req_ids:[8] +DEBUG 06-24 20:07:06 [manager.py:391] +ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:205.19685745239258ms total_cost_time:205.23977279663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6303 prompt_cache_len:5151 prompt_cache_ratio:0.8172298905283198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 +DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.10636687278747559 s +INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10826826095581055 s +DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=37990712776260456808002344871438098923, time:1750766826.7081707s req_ids:[8] +DEBUG 06-24 20:07:06 [manager.py:391] +ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:210.42275428771973ms total_cost_time:210.4642391204834ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6304 prompt_cache_len:5151 prompt_cache_ratio:0.8171002538071066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 +DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.1074073314666748 s +INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10922908782958984 s +DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=274444876454159010134984836494717448998, time:1750766826.920513s req_ids:[8] +DEBUG 06-24 20:07:06 [manager.py:391] +ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:205.49678802490234ms total_cost_time:205.54018020629883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6305 prompt_cache_len:5151 prompt_cache_ratio:0.8169706582077716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 +DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.10774493217468262 s +INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.10967612266540527 s +DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=178641240275241983487802954710813495574, time:1750766827.1304538s req_ids:[8] +DEBUG 06-24 20:07:07 [manager.py:391] +ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:205.20257949829102ms total_cost_time:205.24930953979492ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:6306 prompt_cache_len:5151 prompt_cache_ratio:0.8168411037107517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 +DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.10707879066467285 s +INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.10895323753356934 s +DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=17589881590357641309354845225860252106, time:1750766827.3413534s req_ids:[8] +DEBUG 06-24 20:07:07 [manager.py:391] +ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:356.14490509033203ms total_cost_time:356.189489364624ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6307 prompt_cache_len:5151 prompt_cache_ratio:0.816711590296496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 +DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.1071619987487793 s +INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.10892271995544434 s +DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=163581982390625610938982657172341055317, time:1750766827.6944473s req_ids:[8] +DEBUG 06-24 20:07:07 [manager.py:391] +ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:154.15120124816895ms total_cost_time:154.19244766235352ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6308 prompt_cache_len:5151 prompt_cache_ratio:0.8165821179454661 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 +DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.10689616203308105 s +INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.1084136962890625 s +DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=191572045227264855488903072312529771122, time:1750766827.855373s req_ids:[8] +DEBUG 06-24 20:07:07 [manager.py:391] +ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:154.72054481506348ms total_cost_time:154.76369857788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6309 prompt_cache_len:5151 prompt_cache_ratio:0.816452686638136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 +DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10703372955322266 s +INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.10900568962097168 s +DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=132789265975471341735305218110494931049, time:1750766828.015344s req_ids:[8] +DEBUG 06-24 20:07:08 [manager.py:391] +ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:191.03670120239258ms total_cost_time:191.08033180236816ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6310 prompt_cache_len:5151 prompt_cache_ratio:0.8163232963549921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 +DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10677742958068848 s +INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.10879850387573242 s +DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=192255500328984601628079668141315857591, time:1750766828.2172751s req_ids:[8] +DEBUG 06-24 20:07:08 [manager.py:391] +ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:202.1007537841797ms total_cost_time:202.14414596557617ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6311 prompt_cache_len:5151 prompt_cache_ratio:0.816193947076533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 +DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10811519622802734 s +INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.1100454330444336 s +DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=18908211197279030093139072172310386004, time:1750766828.4267254s req_ids:[8] +DEBUG 06-24 20:07:08 [manager.py:391] +ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:205.25383949279785ms total_cost_time:205.29580116271973ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6312 prompt_cache_len:5151 prompt_cache_ratio:0.81606463878327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 +DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.1078639030456543 s +INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s +DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=304734313958465676574906816411856615702, time:1750766828.6378846s req_ids:[8] +DEBUG 06-24 20:07:08 [manager.py:391] +ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:203.75776290893555ms total_cost_time:203.80187034606934ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6313 prompt_cache_len:5151 prompt_cache_ratio:0.8159353714557263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 +DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10929536819458008 s +INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.11126470565795898 s +DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=302046486410946255995924752177643996239, time:1750766828.846932s req_ids:[8] +DEBUG 06-24 20:07:08 [manager.py:391] +ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:206.14910125732422ms total_cost_time:206.19463920593262ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6314 prompt_cache_len:5151 prompt_cache_ratio:0.8158061450744377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 +DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.1069483757019043 s +INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.108917236328125 s +DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=162904604133774740698790562668641650740, time:1750766829.0587995s req_ids:[8] +DEBUG 06-24 20:07:09 [manager.py:391] +ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:204.95963096618652ms total_cost_time:205.0023078918457ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6315 prompt_cache_len:5151 prompt_cache_ratio:0.8156769596199525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 +DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10761833190917969 s +INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10947585105895996 s +DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=25582089516851104834398826522381157047, time:1750766829.2689612s req_ids:[8] +DEBUG 06-24 20:07:09 [manager.py:391] +ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:205.61909675598145ms total_cost_time:205.66201210021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6316 prompt_cache_len:5151 prompt_cache_ratio:0.8155478150728309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 +DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10694742202758789 s +INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10881257057189941 s +DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=287963106526761434012182075965223740303, time:1750766829.4804862s req_ids:[8] +DEBUG 06-24 20:07:09 [manager.py:391] +ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:203.62019538879395ms total_cost_time:203.66215705871582ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6317 prompt_cache_len:5151 prompt_cache_ratio:0.8154187114136457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 +DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10693550109863281 s +INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10880184173583984 s +DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=210133155114166104210233651792608782672, time:1750766829.6901078s req_ids:[8] +DEBUG 06-24 20:07:09 [manager.py:391] +ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:207.33046531677246ms total_cost_time:207.37385749816895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6318 prompt_cache_len:5151 prompt_cache_ratio:0.8152896486229819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 +DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10802721977233887 s +INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10995125770568848 s +DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=189851099262274948310761481398596293227, time:1750766829.9012098s req_ids:[8] +DEBUG 06-24 20:07:09 [manager.py:391] +ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:205.64889907836914ms total_cost_time:205.69419860839844ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6319 prompt_cache_len:5151 prompt_cache_ratio:0.815160626681437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 +DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.10736727714538574 s +INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.10868597030639648 s +DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=312173652376760028409097280408613978126, time:1750766830.1109817s req_ids:[8] +DEBUG 06-24 20:07:10 [manager.py:391] +DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:368.61443519592285ms total_cost_time:368.6566352844238ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6320 prompt_cache_len:5151 prompt_cache_ratio:0.8150316455696203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 +DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.1066122055053711 s +INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.1091306209564209 s +DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=88423840053187159557205621470393843093, time:1750766830.4774318s req_ids:[8] +DEBUG 06-24 20:07:10 [manager.py:391] +ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:196.0165500640869ms total_cost_time:196.0604190826416ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6321 prompt_cache_len:5151 prompt_cache_ratio:0.8149027052681538 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 +DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.10685968399047852 s +INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.10883116722106934 s +DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=333099592941879735870532094106130606005, time:1750766830.6868808s req_ids:[8] +DEBUG 06-24 20:07:10 [manager.py:391] +ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:209.0911865234375ms total_cost_time:209.13386344909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6322 prompt_cache_len:5151 prompt_cache_ratio:0.8147738057576717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 +DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.10683274269104004 s +INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.10876750946044922 s +DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=163465259530753467948429459369948400073, time:1750766830.899803s req_ids:[8] +DEBUG 06-24 20:07:10 [manager.py:391] +ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:205.69252967834473ms total_cost_time:205.73878288269043ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6323 prompt_cache_len:5151 prompt_cache_ratio:0.8146449470188202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 +DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:11 [batch.py:51] router release req id 8 +INFO 06-24 20:07:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10798096656799316 s +INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.11010384559631348 s +DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=202894628804384893588529496768088320187, time:1750766831.1116807s req_ids:[8] +DEBUG 06-24 20:07:11 [manager.py:391] +ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:207.88121223449707ms total_cost_time:207.92341232299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6324 prompt_cache_len:5151 prompt_cache_ratio:0.8145161290322581 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 +DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10676240921020508 s +INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10882115364074707 s +DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=177097290730940882150571417584864333509, time:1750766831.3237572s req_ids:[8] +DEBUG 06-24 20:07:11 [manager.py:391] +ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:205.39617538452148ms total_cost_time:205.43885231018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6325 prompt_cache_len:5151 prompt_cache_ratio:0.8143873517786562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 +DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10675740242004395 s +INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10855460166931152 s +DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=110014469419275104128822460684444345126, time:1750766831.5341318s req_ids:[8] +DEBUG 06-24 20:07:11 [manager.py:391] +ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:206.7577838897705ms total_cost_time:206.8004608154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6326 prompt_cache_len:5151 prompt_cache_ratio:0.8142586152386975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 +DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10686016082763672 s +INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10878109931945801 s +DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=257364788617188246510131075627668714136, time:1750766831.7453012s req_ids:[8] +DEBUG 06-24 20:07:11 [manager.py:391] +ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:205.37066459655762ms total_cost_time:205.4131031036377ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6327 prompt_cache_len:5151 prompt_cache_ratio:0.8141299193930773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 +DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10708928108215332 s +INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10901355743408203 s +DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=325648488257159057133851401469301939451, time:1750766831.9559932s req_ids:[8] +DEBUG 06-24 20:07:11 [manager.py:391] +ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:207.09633827209473ms total_cost_time:207.1387767791748ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6328 prompt_cache_len:5151 prompt_cache_ratio:0.8140012642225032 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 +DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10672450065612793 s +INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10849714279174805 s +DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=132453781875957037588690724459947097817, time:1750766832.1788855s req_ids:[8] +DEBUG 06-24 20:07:12 [manager.py:391] +ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:214.79010581970215ms total_cost_time:214.83278274536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6329 prompt_cache_len:5151 prompt_cache_ratio:0.8138726497076947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 +DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10647225379943848 s +INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10846161842346191 s +DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=323409448048934599318098732198484163370, time:1750766832.3898735s req_ids:[8] +DEBUG 06-24 20:07:12 [manager.py:391] +ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:207.00860023498535ms total_cost_time:207.05223083496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6330 prompt_cache_len:5151 prompt_cache_ratio:0.8137440758293839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 +DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10744881629943848 s +INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10941863059997559 s +DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=322698242599262235056343798566421410464, time:1750766832.6010017s req_ids:[8] +DEBUG 06-24 20:07:12 [manager.py:391] +ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:201.85160636901855ms total_cost_time:201.89380645751953ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6331 prompt_cache_len:5151 prompt_cache_ratio:0.8136155425683146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 +DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10683369636535645 s +INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10871744155883789 s +DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=262486461884198547315762310011626530634, time:1750766832.8103404s req_ids:[8] +DEBUG 06-24 20:07:12 [manager.py:391] +ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:207.00478553771973ms total_cost_time:207.0457935333252ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6332 prompt_cache_len:5151 prompt_cache_ratio:0.8134870499052432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 +DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10806059837341309 s +INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10994625091552734 s +DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=163989098569810015552727274174789614259, time:1750766833.0214827s req_ids:[8] +DEBUG 06-24 20:07:13 [manager.py:391] +ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:206.1619758605957ms total_cost_time:206.2058448791504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6333 prompt_cache_len:5151 prompt_cache_ratio:0.8133585978209379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 +DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10739779472351074 s +INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10926175117492676 s +DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=209797224307846317215786823261265272329, time:1750766833.2323744s req_ids:[8] +DEBUG 06-24 20:07:13 [manager.py:391] +ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:407.4885845184326ms total_cost_time:407.53173828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6334 prompt_cache_len:5151 prompt_cache_ratio:0.8132301862961794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 +DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10700106620788574 s +INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10889363288879395 s +DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=60083835334385541856362906243124156144, time:1750766833.6380823s req_ids:[8] +DEBUG 06-24 20:07:13 [manager.py:391] +ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:198.8966464996338ms total_cost_time:198.93908500671387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6335 prompt_cache_len:5151 prompt_cache_ratio:0.8131018153117601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 +DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10657310485839844 s +INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10898518562316895 s +DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=293504249439672959360506913917727065091, time:1750766833.8493621s req_ids:[8] +DEBUG 06-24 20:07:13 [manager.py:391] +ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:207.66305923461914ms total_cost_time:207.70668983459473ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6336 prompt_cache_len:5151 prompt_cache_ratio:0.8129734848484849 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 +DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10710978507995605 s +INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.10901498794555664 s +DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=87974495286386069027186687485462158597, time:1750766834.0613477s req_ids:[8] +DEBUG 06-24 20:07:14 [manager.py:391] +ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:205.99126815795898ms total_cost_time:206.0372829437256ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6337 prompt_cache_len:5151 prompt_cache_ratio:0.8128451948871706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 +DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.1072545051574707 s +INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.10909748077392578 s +DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=143219557366296097436126848172060224175, time:1750766834.2725668s req_ids:[8] +DEBUG 06-24 20:07:14 [manager.py:391] +ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:206.35056495666504ms total_cost_time:206.39371871948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6338 prompt_cache_len:5151 prompt_cache_ratio:0.8127169454086463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 +DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s +INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.11014318466186523 s +DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=173361394412458330951528551107039917015, time:1750766834.4885712s req_ids:[8] +DEBUG 06-24 20:07:14 [manager.py:391] +ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:208.27817916870117ms total_cost_time:208.32443237304688ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6339 prompt_cache_len:5151 prompt_cache_ratio:0.812588736393753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 +DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10757327079772949 s +INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.10956501960754395 s +DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=292497859625989229548108690944400072367, time:1750766834.695327s req_ids:[8] +DEBUG 06-24 20:07:14 [manager.py:391] +ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:205.98602294921875ms total_cost_time:206.02655410766602ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:6340 prompt_cache_len:5151 prompt_cache_ratio:0.8124605678233439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 +DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10669374465942383 s +INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.1086270809173584 s +DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=35339774385412500604545992424730141804, time:1750766834.904591s req_ids:[8] +DEBUG 06-24 20:07:14 [manager.py:391] +ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:200.30927658081055ms total_cost_time:200.3500461578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:6341 prompt_cache_len:5151 prompt_cache_ratio:0.8123324396782842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 +DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10723614692687988 s +INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10921382904052734 s +DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=202134237878158183597786875001077339014, time:1750766835.110387s req_ids:[8] +DEBUG 06-24 20:07:15 [manager.py:391] +ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:205.54113388061523ms total_cost_time:205.5821418762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6342 prompt_cache_len:5151 prompt_cache_ratio:0.8122043519394513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 +DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s +INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.1097257137298584 s +DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=88428806570050185851345105856444148325, time:1750766835.321182s req_ids:[8] +DEBUG 06-24 20:07:15 [manager.py:391] +ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:204.392671585083ms total_cost_time:204.4353485107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6343 prompt_cache_len:5151 prompt_cache_ratio:0.8120763045877345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 +DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10690093040466309 s +INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10879397392272949 s +DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=218191751874392554043143463791311183068, time:1750766835.529388s req_ids:[8] +DEBUG 06-24 20:07:15 [manager.py:391] +ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:203.19056510925293ms total_cost_time:203.2334804534912ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6344 prompt_cache_len:5151 prompt_cache_ratio:0.8119482976040353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 +DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10767173767089844 s +INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10957527160644531 s +DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=309177496473773233353882741587049419028, time:1750766835.738482s req_ids:[8] +DEBUG 06-24 20:07:15 [manager.py:391] +DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:205.7363986968994ms total_cost_time:205.7802677154541ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6345 prompt_cache_len:5151 prompt_cache_ratio:0.8118203309692671 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 +DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10633349418640137 s +INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10826826095581055 s +DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=294852681012369532813217791210735018956, time:1750766835.9492528s req_ids:[8] +DEBUG 06-24 20:07:15 [manager.py:391] +DEBUG 06-24 20:07:15 [stats.py:37] Avg tokens(prompt+generate) throughput: 28840.002 tokens/s +DEBUG 06-24 20:07:15 [stats.py:37] Avg prompt tokens throughput: 28830.884 tokens/s +DEBUG 06-24 20:07:15 [stats.py:37] Avg generate tokens throughput: 9.119 tokens/s +ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:367.66576766967773ms total_cost_time:367.71154403686523ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6346 prompt_cache_len:5151 prompt_cache_ratio:0.8116924046643555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 +DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.1073160171508789 s +INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.10920143127441406 s +DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=120921729330711434707927188879017047718, time:1750766836.3149257s req_ids:[8] +DEBUG 06-24 20:07:16 [manager.py:391] +INFO 06-24 20:07:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:198.17256927490234ms total_cost_time:198.21524620056152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6347 prompt_cache_len:5151 prompt_cache_ratio:0.8115645186702379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 +DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.10835099220275879 s +INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.11026167869567871 s +DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=141646032115225800210923034905016704361, time:1750766836.529948s req_ids:[8] +DEBUG 06-24 20:07:16 [manager.py:391] +ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:212.98837661743164ms total_cost_time:213.0296230316162ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6348 prompt_cache_len:5151 prompt_cache_ratio:0.8114366729678639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 +DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.10669851303100586 s +INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.10860323905944824 s +DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=166448275712807393193990223908205624051, time:1750766836.7471905s req_ids:[8] +DEBUG 06-24 20:07:16 [manager.py:391] +ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:212.4464511871338ms total_cost_time:212.48817443847656ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6349 prompt_cache_len:5151 prompt_cache_ratio:0.811308867538195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 +DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.10805416107177734 s +INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099860668182373 s +DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=139501197248710750614944743135828879782, time:1750766836.9598958s req_ids:[8] +DEBUG 06-24 20:07:16 [manager.py:391] +ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:209.63597297668457ms total_cost_time:209.67817306518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6350 prompt_cache_len:5151 prompt_cache_ratio:0.8111811023622048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 +DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10668635368347168 s +INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10855841636657715 s +DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=112490514022899890125824570279857099747, time:1750766837.1794329s req_ids:[8] +DEBUG 06-24 20:07:17 [manager.py:391] +ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:213.2546901702881ms total_cost_time:213.29665184020996ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6351 prompt_cache_len:5151 prompt_cache_ratio:0.8110533774208786 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 +DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10648345947265625 s +INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10846996307373047 s +DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=220229294329623826670807193264817125641, time:1750766837.391519s req_ids:[8] +DEBUG 06-24 20:07:17 [manager.py:391] +ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:205.63578605651855ms total_cost_time:205.67798614501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6352 prompt_cache_len:5151 prompt_cache_ratio:0.8109256926952141 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 +DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10703635215759277 s +INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10911345481872559 s +DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=16023825690020712549425117917313100034, time:1750766837.602196s req_ids:[8] +DEBUG 06-24 20:07:17 [manager.py:391] +ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:204.77032661437988ms total_cost_time:204.833984375ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:6353 prompt_cache_len:5151 prompt_cache_ratio:0.8107980481662207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 +DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10665130615234375 s +INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10867047309875488 s +DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=95677870683987626635732540697801840946, time:1750766837.8132424s req_ids:[8] +DEBUG 06-24 20:07:17 [manager.py:391] +ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:208.05835723876953ms total_cost_time:208.1000804901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6354 prompt_cache_len:5151 prompt_cache_ratio:0.8106704438149197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 +DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.1080622673034668 s +INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.10997867584228516 s +DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=123627088178139103949308910765401113629, time:1750766838.024191s req_ids:[8] +DEBUG 06-24 20:07:18 [manager.py:391] +ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:202.74758338928223ms total_cost_time:202.7890682220459ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6355 prompt_cache_len:5151 prompt_cache_ratio:0.8105428796223446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 +DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.1071023941040039 s +INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.10908746719360352 s +DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=123847528957609546147921281445750786101, time:1750766838.2357662s req_ids:[8] +DEBUG 06-24 20:07:18 [manager.py:391] +ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:382.43579864501953ms total_cost_time:382.4808597564697ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6356 prompt_cache_len:5151 prompt_cache_ratio:0.8104153555695406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 +DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.10815548896789551 s +INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.11003828048706055 s +DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=62133768182538830403883456505768189413, time:1750766838.6142645s req_ids:[8] +DEBUG 06-24 20:07:18 [manager.py:391] +ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:193.8459873199463ms total_cost_time:193.90177726745605ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:6357 prompt_cache_len:5151 prompt_cache_ratio:0.8102878716375649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 +DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.10763406753540039 s +INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.10969257354736328 s +DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=179135501778210700476988432898635020351, time:1750766838.819131s req_ids:[8] +DEBUG 06-24 20:07:18 [manager.py:391] +ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:204.12755012512207ms total_cost_time:204.17094230651855ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6358 prompt_cache_len:5151 prompt_cache_ratio:0.8101604278074866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 +DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10785388946533203 s +INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10979819297790527 s +DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=65984342547863205415590248220489148013, time:1750766839.0274625s req_ids:[8] +DEBUG 06-24 20:07:19 [manager.py:391] +ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:199.05638694763184ms total_cost_time:199.09906387329102ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6359 prompt_cache_len:5151 prompt_cache_ratio:0.8100330240603869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 +DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10671067237854004 s +INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10861539840698242 s +DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=47975082915100702174733019989173075689, time:1750766839.2327614s req_ids:[8] +DEBUG 06-24 20:07:19 [manager.py:391] +ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:201.033353805542ms total_cost_time:201.07531547546387ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6360 prompt_cache_len:5151 prompt_cache_ratio:0.8099056603773584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 +DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10798478126525879 s +INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s +DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=243077904758446502764099184827245061155, time:1750766839.4368393s req_ids:[8] +DEBUG 06-24 20:07:19 [manager.py:391] +ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:201.42841339111328ms total_cost_time:201.47037506103516ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6361 prompt_cache_len:5151 prompt_cache_ratio:0.8097783367395064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 +DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10754156112670898 s +INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10959219932556152 s +DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=205123648775903889972532964438178937365, time:1750766839.6459172s req_ids:[8] +DEBUG 06-24 20:07:19 [manager.py:391] +ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:202.6822566986084ms total_cost_time:202.72493362426758ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6362 prompt_cache_len:5151 prompt_cache_ratio:0.8096510531279472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 +DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10790205001831055 s +INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10994124412536621 s +DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=134284420754432855497992682180999440343, time:1750766839.8544524s req_ids:[8] +DEBUG 06-24 20:07:19 [manager.py:391] +ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:205.1706314086914ms total_cost_time:205.2140235900879ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6363 prompt_cache_len:5151 prompt_cache_ratio:0.8095238095238095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 +DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10717129707336426 s +INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.10926532745361328 s +DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=184744521303190872415718353400539099429, time:1750766840.0665824s req_ids:[8] +DEBUG 06-24 20:07:20 [manager.py:391] +ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:207.67641067504883ms total_cost_time:207.7198028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6364 prompt_cache_len:5151 prompt_cache_ratio:0.8093966059082338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 +DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10633111000061035 s +INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.10866570472717285 s +DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=101552565110780088886243180887610041320, time:1750766840.2785263s req_ids:[8] +DEBUG 06-24 20:07:20 [manager.py:391] +ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:182.85560607910156ms total_cost_time:182.89899826049805ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6365 prompt_cache_len:5151 prompt_cache_ratio:0.8092694422623723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 +DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10821270942687988 s +INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.1102297306060791 s +DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=90239129122698199280613024996017023570, time:1750766840.4611707s req_ids:[8] +DEBUG 06-24 20:07:20 [manager.py:391] +ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:194.061279296875ms total_cost_time:194.10347938537598ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6366 prompt_cache_len:5151 prompt_cache_ratio:0.8091423185673893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 +DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10751891136169434 s +INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.10947084426879883 s +DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=194902336225767236775902628182845033582, time:1750766840.666747s req_ids:[8] +DEBUG 06-24 20:07:20 [manager.py:391] +ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:200.1965045928955ms total_cost_time:200.2410888671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6367 prompt_cache_len:5151 prompt_cache_ratio:0.8090152348044605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 +DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.20749855041503906 s +INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.20907974243164062 s +DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=26124297775799202445823680602694593347, time:1750766840.9656844s req_ids:[8] +DEBUG 06-24 20:07:20 [manager.py:391] +ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:255.76138496398926ms total_cost_time:255.80668449401855ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6368 prompt_cache_len:5151 prompt_cache_ratio:0.8088881909547738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 +DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10706520080566406 s +INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.10909414291381836 s +DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=337776873639168811494052602187561451099, time:1750766841.1237805s req_ids:[8] +DEBUG 06-24 20:07:21 [manager.py:391] +ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:184.64112281799316ms total_cost_time:184.68546867370605ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6369 prompt_cache_len:5151 prompt_cache_ratio:0.8087611869995289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 +DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10754966735839844 s +INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.10940194129943848 s +DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=14911192895663488767947941062116202134, time:1750766841.3194096s req_ids:[8] +DEBUG 06-24 20:07:21 [manager.py:391] +ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:200.02412796020508ms total_cost_time:200.06823539733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6370 prompt_cache_len:5151 prompt_cache_ratio:0.8086342229199373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 +DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10806417465209961 s +INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.11005210876464844 s +DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=122924971126051225713531411324070708049, time:1750766841.526016s req_ids:[8] +DEBUG 06-24 20:07:21 [manager.py:391] +ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:203.76920700073242ms total_cost_time:203.8123607635498ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6371 prompt_cache_len:5151 prompt_cache_ratio:0.8085072986972218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 +DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:21 [batch.py:51] router release req id 8 +INFO 06-24 20:07:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10654067993164062 s +INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.1084127426147461 s +DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=295376191670073971033604862462797949927, time:1750766841.7344024s req_ids:[8] +DEBUG 06-24 20:07:21 [manager.py:391] +ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:205.49321174621582ms total_cost_time:205.53898811340332ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6372 prompt_cache_len:5151 prompt_cache_ratio:0.8083804143126178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 +DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10669946670532227 s +INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.1087040901184082 s +DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=37940593202108407917900423591364897712, time:1750766841.9439292s req_ids:[8] +DEBUG 06-24 20:07:21 [manager.py:391] +ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:204.69951629638672ms total_cost_time:204.7433853149414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6373 prompt_cache_len:5151 prompt_cache_ratio:0.8082535697473717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 +DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10732197761535645 s +INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10929989814758301 s +DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=316165221962432977187434463312088224185, time:1750766842.1528869s req_ids:[8] +DEBUG 06-24 20:07:22 [manager.py:391] +ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:203.0313014984131ms total_cost_time:203.07517051696777ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6374 prompt_cache_len:5151 prompt_cache_ratio:0.8081267649827424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 +DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.1074533462524414 s +INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10943222045898438 s +DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=67211706646516948832983196714321265062, time:1750766842.3607624s req_ids:[8] +DEBUG 06-24 20:07:22 [manager.py:391] +ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:206.24423027038574ms total_cost_time:206.28690719604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6375 prompt_cache_len:5151 prompt_cache_ratio:0.808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 +DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10659098625183105 s +INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10848021507263184 s +DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=258443828891221266878168040515043055088, time:1750766842.5725026s req_ids:[8] +DEBUG 06-24 20:07:22 [manager.py:391] +ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:202.25882530212402ms total_cost_time:202.301025390625ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6376 prompt_cache_len:5151 prompt_cache_ratio:0.8078732747804266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 +DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s +INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.11019158363342285 s +DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=87868904835949324238163358204285998800, time:1750766842.7833178s req_ids:[8] +DEBUG 06-24 20:07:22 [manager.py:391] +ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:211.85803413391113ms total_cost_time:211.9009494781494ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6377 prompt_cache_len:5151 prompt_cache_ratio:0.807746589305316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 +DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10758566856384277 s +INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10953187942504883 s +DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=254954695257033260540786436652530404236, time:1750766843.0026777s req_ids:[8] +DEBUG 06-24 20:07:23 [manager.py:391] +ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:214.25652503967285ms total_cost_time:214.29991722106934ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6378 prompt_cache_len:5151 prompt_cache_ratio:0.8076199435559737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 +DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:23 [batch.py:51] router release req id 8 +INFO 06-24 20:07:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:07:23 [statics_utils.py:24] mean first cost: 238.5498575294847 ms +INFO 06-24 20:07:23 [statics_utils.py:24] mean per token cost: 0.11900594229094989 ms +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.1067807674407959 s +INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10863876342773438 s +DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=295321640378937250801195798856079967885, time:1750766843.223434s req_ids:[8] +DEBUG 06-24 20:07:23 [manager.py:391] +ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:214.50185775756836ms total_cost_time:214.54715728759766ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6379 prompt_cache_len:5151 prompt_cache_ratio:0.8074933375137169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 +DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.1067051887512207 s +INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10854434967041016 s +DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=207689085523368735196677137944278563454, time:1750766843.434768s req_ids:[8] +DEBUG 06-24 20:07:23 [manager.py:391] +ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:357.27787017822266ms total_cost_time:357.32269287109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6380 prompt_cache_len:5151 prompt_cache_ratio:0.8073667711598747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 +DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.10730171203613281 s +INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10913777351379395 s +DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=101658396661300611045323289221455467678, time:1750766843.7879584s req_ids:[8] +DEBUG 06-24 20:07:23 [manager.py:391] +ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:195.60575485229492ms total_cost_time:195.62721252441406ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6381 prompt_cache_len:5151 prompt_cache_ratio:0.8072402444757875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 +DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.10783743858337402 s +INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10978078842163086 s +DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=338813048448525875163439705584924304477, time:1750766843.9942977s req_ids:[8] +DEBUG 06-24 20:07:23 [manager.py:391] +ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:205.6710720062256ms total_cost_time:205.71541786193848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6382 prompt_cache_len:5151 prompt_cache_ratio:0.8071137574428079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 +DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.10778260231018066 s +INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10968732833862305 s +DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=310836859909545211380722812137295434747, time:1750766844.2100923s req_ids:[8] +DEBUG 06-24 20:07:24 [manager.py:391] +DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:209.33103561401367ms total_cost_time:209.37466621398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6383 prompt_cache_len:5151 prompt_cache_ratio:0.8069873100422998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 +DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.10744976997375488 s +INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10937643051147461 s +DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=191853468497508748025345220208125563413, time:1750766844.41712s req_ids:[8] +DEBUG 06-24 20:07:24 [manager.py:391] +ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:199.13792610168457ms total_cost_time:199.17941093444824ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6384 prompt_cache_len:5151 prompt_cache_ratio:0.8068609022556391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 +DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.10740399360656738 s +INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10941457748413086 s +DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=276586219381523023915976329825600895267, time:1750766844.6209106s req_ids:[8] +DEBUG 06-24 20:07:24 [manager.py:391] +ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:202.90207862854004ms total_cost_time:202.9428482055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:6385 prompt_cache_len:5151 prompt_cache_ratio:0.806734534064213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 +DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.1068572998046875 s +INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10864877700805664 s +DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=69964543967062815121492064754984586966, time:1750766844.8288844s req_ids:[8] +DEBUG 06-24 20:07:24 [manager.py:391] +ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:201.0507583618164ms total_cost_time:201.0955810546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6386 prompt_cache_len:5151 prompt_cache_ratio:0.8066082054494206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 +DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.1066596508026123 s +INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.10859131813049316 s +DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=126492723920003215010828325983851292807, time:1750766845.0362968s req_ids:[8] +DEBUG 06-24 20:07:25 [manager.py:391] +ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:204.41222190856934ms total_cost_time:204.4541835784912ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6387 prompt_cache_len:5151 prompt_cache_ratio:0.8064819163926726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 +DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.10781979560852051 s +INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.11107778549194336 s +DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=7238148324664032186187916021557349381, time:1750766845.2590988s req_ids:[8] +DEBUG 06-24 20:07:25 [manager.py:391] +ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:218.22834014892578ms total_cost_time:218.27149391174316ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6388 prompt_cache_len:5151 prompt_cache_ratio:0.8063556668753914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 +DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.10799598693847656 s +INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.10998725891113281 s +DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=188447203118245659279288565994770417111, time:1750766845.4696946s req_ids:[8] +DEBUG 06-24 20:07:25 [manager.py:391] +ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:207.3822021484375ms total_cost_time:207.4282169342041ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6389 prompt_cache_len:5151 prompt_cache_ratio:0.8062294568790108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 +DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.1071019172668457 s +INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.1089470386505127 s +DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=280726450229926050658059604256089491298, time:1750766845.6876192s req_ids:[8] +DEBUG 06-24 20:07:25 [manager.py:391] +ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:215.08216857910156ms total_cost_time:215.12794494628906ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6390 prompt_cache_len:5151 prompt_cache_ratio:0.8061032863849765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 +DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.10803675651550293 s +INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.11008882522583008 s +DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=160188690540386492952828426374536732900, time:1750766845.9009745s req_ids:[8] +DEBUG 06-24 20:07:25 [manager.py:391] +ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:07:25 [stats.py:37] Avg tokens(prompt+generate) throughput: 28570.013 tokens/s +DEBUG 06-24 20:07:25 [stats.py:37] Avg prompt tokens throughput: 28560.945 tokens/s +DEBUG 06-24 20:07:25 [stats.py:37] Avg generate tokens throughput: 9.068 tokens/s +INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:208.44101905822754ms total_cost_time:208.48464965820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6391 prompt_cache_len:5151 prompt_cache_ratio:0.8059771553747458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 +DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.10737466812133789 s +INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.10932230949401855 s +DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=228604054740154981689772448206816494487, time:1750766846.1310523s req_ids:[8] +DEBUG 06-24 20:07:26 [manager.py:391] +ERROR 06-24 20:07:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:222.02277183532715ms total_cost_time:222.06592559814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6392 prompt_cache_len:5151 prompt_cache_ratio:0.8058510638297872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 +DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.1095120906829834 s +INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.11154937744140625 s +DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=312915713167388912111118957156371639118, time:1750766846.3414807s req_ids:[8] +DEBUG 06-24 20:07:26 [manager.py:391] +ERROR 06-24 20:07:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 first_token_cost:391.4530277252197ms total_cost_time:391.4990425109863ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6393 prompt_cache_len:5151 prompt_cache_ratio:0.8057250117315814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 +DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.10832071304321289 s +INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s +DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=102406642911270979826871072940478293446, time:1750766846.7300222s req_ids:[8] +DEBUG 06-24 20:07:26 [manager.py:391] +ERROR 06-24 20:07:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 first_token_cost:196.09403610229492ms total_cost_time:196.1379051208496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6394 prompt_cache_len:5151 prompt_cache_ratio:0.8055989990616202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 +DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.10853123664855957 s +INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.11041951179504395 s +DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=196786156479095613644525651729462782558, time:1750766846.9373221s req_ids:[8] +DEBUG 06-24 20:07:26 [manager.py:391] +ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 first_token_cost:207.87644386291504ms total_cost_time:207.92222023010254ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6395 prompt_cache_len:5151 prompt_cache_ratio:0.8054730258014073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 +DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10716009140014648 s +INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10893034934997559 s +DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=45001006765452936920707263297335184944, time:1750766847.1490228s req_ids:[8] +DEBUG 06-24 20:07:27 [manager.py:391] +ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:201.88379287719727ms total_cost_time:201.92790031433105ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6396 prompt_cache_len:5151 prompt_cache_ratio:0.8053470919324578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 +DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10765314102172852 s +INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10933279991149902 s +DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=94268636562644782262260521302255298770, time:1750766847.3564608s req_ids:[8] +DEBUG 06-24 20:07:27 [manager.py:391] +ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:203.31048965454102ms total_cost_time:203.3538818359375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6397 prompt_cache_len:5151 prompt_cache_ratio:0.8052211974362983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 +DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10724377632141113 s +INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10928487777709961 s +DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=43899599509596085957061115679107144678, time:1750766847.5634959s req_ids:[8] +DEBUG 06-24 20:07:27 [manager.py:391] +ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:202.90303230285645ms total_cost_time:202.94475555419922ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6398 prompt_cache_len:5151 prompt_cache_ratio:0.805095342294467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 +DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10718941688537598 s +INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10891366004943848 s +DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=83915149644297881688912840550406248485, time:1750766847.780428s req_ids:[8] +DEBUG 06-24 20:07:27 [manager.py:391] +ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:212.28647232055664ms total_cost_time:212.33105659484863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6399 prompt_cache_len:5151 prompt_cache_ratio:0.8049695264885138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 +DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10703325271606445 s +INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10871243476867676 s +DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=209476011459872347304076654898771444962, time:1750766847.9947937s req_ids:[8] +DEBUG 06-24 20:07:27 [manager.py:391] +ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:210.66999435424805ms total_cost_time:210.71338653564453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6400 prompt_cache_len:5151 prompt_cache_ratio:0.80484375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 +DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10742998123168945 s +INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.10912156105041504 s +DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=2262121424716448946689578322702580357, time:1750766848.2041864s req_ids:[8] +DEBUG 06-24 20:07:28 [manager.py:391] +ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:203.71532440185547ms total_cost_time:203.75919342041016ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6401 prompt_cache_len:5151 prompt_cache_ratio:0.8047180128104984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 +DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10802078247070312 s +INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.11005854606628418 s +DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=22364813314466253442878272072547929050, time:1750766848.4126894s req_ids:[8] +DEBUG 06-24 20:07:28 [manager.py:391] +ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:203.51910591125488ms total_cost_time:203.56345176696777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6402 prompt_cache_len:5151 prompt_cache_ratio:0.8045923149015932 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 +DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10760021209716797 s +INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.10951399803161621 s +DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=250155622624881252824915617003879091350, time:1750766848.6211555s req_ids:[8] +DEBUG 06-24 20:07:28 [manager.py:391] +ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:207.7162265777588ms total_cost_time:207.76009559631348ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6403 prompt_cache_len:5151 prompt_cache_ratio:0.8044666562548806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 +DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10750627517700195 s +INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.10954046249389648 s +DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=38931049553816605501730715291273131191, time:1750766848.8326027s req_ids:[8] +DEBUG 06-24 20:07:28 [manager.py:391] +ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:198.07910919189453ms total_cost_time:198.1217861175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6404 prompt_cache_len:5151 prompt_cache_ratio:0.8043410368519676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 +DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10748672485351562 s +INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.10951066017150879 s +DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=161586170111866255721537388258675518262, time:1750766849.0367486s req_ids:[8] +DEBUG 06-24 20:07:29 [manager.py:391] +ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:203.16839218139648ms total_cost_time:203.2146453857422ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6405 prompt_cache_len:5151 prompt_cache_ratio:0.804215456674473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 +DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10824990272521973 s +INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s +DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=17251701210669346271855920599126243357, time:1750766849.2449346s req_ids:[8] +DEBUG 06-24 20:07:29 [manager.py:391] +ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:202.87203788757324ms total_cost_time:202.91590690612793ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6406 prompt_cache_len:5151 prompt_cache_ratio:0.8040899157040274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 +DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10820698738098145 s +INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.11011862754821777 s +DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=230030077836748532013066974298752324777, time:1750766849.45452s req_ids:[8] +DEBUG 06-24 20:07:29 [manager.py:391] +ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:372.114896774292ms total_cost_time:372.1587657928467ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6407 prompt_cache_len:5151 prompt_cache_ratio:0.8039644139222725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 +DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10744667053222656 s +INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.10946440696716309 s +DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=19327026418283520824285305625103672068, time:1750766849.8295848s req_ids:[8] +DEBUG 06-24 20:07:29 [manager.py:391] +DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:202.25071907043457ms total_cost_time:202.29458808898926ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6408 prompt_cache_len:5151 prompt_cache_ratio:0.8038389513108615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 +DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.109527587890625 s +INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.11152815818786621 s +DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=155311556029215410727470590641399905063, time:1750766850.037623s req_ids:[8] +DEBUG 06-24 20:07:30 [manager.py:391] +ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:202.6810646057129ms total_cost_time:202.72421836853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6409 prompt_cache_len:5151 prompt_cache_ratio:0.8037135278514589 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 +DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10698485374450684 s +INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.1090095043182373 s +DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=243042772619747918448172936446622181532, time:1750766850.2441173s req_ids:[8] +DEBUG 06-24 20:07:30 [manager.py:391] +ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:202.8818130493164ms total_cost_time:202.92377471923828ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6410 prompt_cache_len:5151 prompt_cache_ratio:0.803588143525741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 +DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10778427124023438 s +INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.10981512069702148 s +DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=61099337780632933381892147634630145822, time:1750766850.453742s req_ids:[8] +DEBUG 06-24 20:07:30 [manager.py:391] +ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:204.4198513031006ms total_cost_time:204.46467399597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6411 prompt_cache_len:5151 prompt_cache_ratio:0.8034627983153955 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 +DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10766291618347168 s +INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.1096959114074707 s +DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=18289494735402077466102731521670451904, time:1750766850.6624231s req_ids:[8] +DEBUG 06-24 20:07:30 [manager.py:391] +ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:204.5886516571045ms total_cost_time:204.63228225708008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6412 prompt_cache_len:5151 prompt_cache_ratio:0.803337492202121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 +DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10874009132385254 s +INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.11085844039916992 s +DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=162766293716190275354455887563044039384, time:1750766850.8714647s req_ids:[8] +DEBUG 06-24 20:07:30 [manager.py:391] +ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:200.96921920776367ms total_cost_time:201.01213455200195ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6413 prompt_cache_len:5151 prompt_cache_ratio:0.8032122251676282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 +DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10924148559570312 s +INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.11180567741394043 s +DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=174748775501565243194153653442195068240, time:1750766851.0758317s req_ids:[8] +DEBUG 06-24 20:07:31 [manager.py:391] +ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:202.7149200439453ms total_cost_time:202.7592658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6414 prompt_cache_len:5151 prompt_cache_ratio:0.803086997193639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 +DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10758638381958008 s +INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.10970354080200195 s +DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=219999819138247977150489355215866715369, time:1750766851.2862265s req_ids:[8] +DEBUG 06-24 20:07:31 [manager.py:391] +ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:209.64956283569336ms total_cost_time:209.71059799194336ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:6415 prompt_cache_len:5151 prompt_cache_ratio:0.8029618082618862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 +DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.11081480979919434 s +INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.11283278465270996 s +DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=46746523174745507555522880867120847269, time:1750766851.5102866s req_ids:[8] +DEBUG 06-24 20:07:31 [manager.py:391] +ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:226.93657875061035ms total_cost_time:226.98044776916504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6416 prompt_cache_len:5151 prompt_cache_ratio:0.8028366583541147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 +DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10687565803527832 s +INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.10877633094787598 s +DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=281149605712715575170090412663862597737, time:1750766851.7211468s req_ids:[8] +DEBUG 06-24 20:07:31 [manager.py:391] +ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:184.7665309906006ms total_cost_time:184.82661247253418ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6417 prompt_cache_len:5151 prompt_cache_ratio:0.8027115474520804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 +DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10501790046691895 s +INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.10707521438598633 s +DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=320220794002356667151525472959349061768, time:1750766851.9367573s req_ids:[8] +DEBUG 06-24 20:07:31 [manager.py:391] +ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:201.99012756347656ms total_cost_time:202.03733444213867ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6418 prompt_cache_len:5151 prompt_cache_ratio:0.8025864755375507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 +DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s +INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.1108243465423584 s +DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=789083248672545067439897866642060409, time:1750766852.129937s req_ids:[8] +DEBUG 06-24 20:07:32 [manager.py:391] +ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:369.2138195037842ms total_cost_time:369.2584037780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6419 prompt_cache_len:5151 prompt_cache_ratio:0.8024614425923041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 +DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.10785198211669922 s +INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.10991716384887695 s +DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=58100478191498191975058167756618835645, time:1750766852.49919s req_ids:[8] +DEBUG 06-24 20:07:32 [manager.py:391] +ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:189.3751621246338ms total_cost_time:189.41903114318848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6420 prompt_cache_len:5151 prompt_cache_ratio:0.8023364485981308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 +DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:32 [batch.py:51] router release req id 8 +DEBUG 06-24 20:07:32 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:32 [manager.py:283] +DEBUG 06-24 20:07:32 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:32 [manager.py:284] +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.11073827743530273 s +INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.11358118057250977 s +DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=319430017784338069508014450912251849191, time:1750766852.6971173s req_ids:[8] +DEBUG 06-24 20:07:32 [manager.py:391] +ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:181.44679069519043ms total_cost_time:181.49375915527344ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:6421 prompt_cache_len:5151 prompt_cache_ratio:0.8022114935368323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 +DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.1074972152709961 s +INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.10864424705505371 s +DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=88577200680938638912766003500699994522, time:1750766852.885413s req_ids:[8] +DEBUG 06-24 20:07:32 [manager.py:391] +ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:174.9260425567627ms total_cost_time:174.9706268310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6422 prompt_cache_len:5151 prompt_cache_ratio:0.8020865773902212 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 +DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10654830932617188 s +INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.10848212242126465 s +DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=290329378628107991180201689580422536189, time:1750766853.0610497s req_ids:[8] +DEBUG 06-24 20:07:33 [manager.py:391] +ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:197.31974601745605ms total_cost_time:197.36170768737793ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6423 prompt_cache_len:5151 prompt_cache_ratio:0.8019617001401215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 +DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s +INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.1096189022064209 s +DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=235416984454171649416937346371902768846, time:1750766853.2752743s req_ids:[8] +DEBUG 06-24 20:07:33 [manager.py:391] +ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:213.42229843139648ms total_cost_time:213.44232559204102ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6424 prompt_cache_len:5151 prompt_cache_ratio:0.8018368617683687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 +DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10640430450439453 s +INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.1081845760345459 s +DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=181722341802678490307138560262165851373, time:1750766853.4837644s req_ids:[8] +DEBUG 06-24 20:07:33 [manager.py:391] +ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:213.19150924682617ms total_cost_time:213.23704719543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6425 prompt_cache_len:5151 prompt_cache_ratio:0.8017120622568094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 +DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10821413993835449 s +INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.1102755069732666 s +DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=312925680023984549522281534196669479487, time:1750766853.698264s req_ids:[8] +DEBUG 06-24 20:07:33 [manager.py:391] +ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:193.90606880187988ms total_cost_time:193.92871856689453ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6426 prompt_cache_len:5151 prompt_cache_ratio:0.8015873015873016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 +DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10847854614257812 s +INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.10965394973754883 s +DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=229135695201755924284186637707151974331, time:1750766853.8980339s req_ids:[8] +DEBUG 06-24 20:07:33 [manager.py:391] +DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:194.48232650756836ms total_cost_time:194.50974464416504ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:6427 prompt_cache_len:5151 prompt_cache_ratio:0.8014625797417146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 +DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.10703754425048828 s +INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.1089315414428711 s +DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=242998436925304072190618214014751954340, time:1750766854.0953164s req_ids:[8] +DEBUG 06-24 20:07:34 [manager.py:391] +ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:198.71759414672852ms total_cost_time:198.744535446167ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6428 prompt_cache_len:5151 prompt_cache_ratio:0.801337896701929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 +DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.1057596206665039 s +INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.10777163505554199 s +DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=303348420371880266158676659486538769898, time:1750766854.2984066s req_ids:[8] +DEBUG 06-24 20:07:34 [manager.py:391] +ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:391.40796661376953ms total_cost_time:391.42823219299316ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6429 prompt_cache_len:5151 prompt_cache_ratio:0.8012132524498367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 +DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.10339975357055664 s +INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.10541796684265137 s +DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=154021285788498723307609437758763867769, time:1750766854.6887193s req_ids:[8] +DEBUG 06-24 20:07:34 [manager.py:391] +ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:209.01846885681152ms total_cost_time:209.04159545898438ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6430 prompt_cache_len:5151 prompt_cache_ratio:0.8010886469673406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 +DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.10462045669555664 s +INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.10613250732421875 s +DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=78589999635016606555057666509739248537, time:1750766854.9032788s req_ids:[8] +DEBUG 06-24 20:07:34 [manager.py:391] +ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:193.39346885681152ms total_cost_time:193.42637062072754ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:6431 prompt_cache_len:5151 prompt_cache_ratio:0.8009640802363551 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 +DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.1039276123046875 s +INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10494732856750488 s +DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=277651636101555860355521136388073706659, time:1750766855.1197765s req_ids:[8] +DEBUG 06-24 20:07:35 [manager.py:391] +ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:174.38149452209473ms total_cost_time:174.40223693847656ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6432 prompt_cache_len:5151 prompt_cache_ratio:0.800839552238806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 +DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10335040092468262 s +INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10427260398864746 s +DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=127003829063534249482766862879952724486, time:1750766855.282155s req_ids:[8] +DEBUG 06-24 20:07:35 [manager.py:391] +ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:163.2828712463379ms total_cost_time:163.33436965942383ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:6433 prompt_cache_len:5151 prompt_cache_ratio:0.8007150629566299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 +DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10391640663146973 s +INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10492277145385742 s +DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=152945352410245173994213632169770991675, time:1750766855.445187s req_ids:[8] +DEBUG 06-24 20:07:35 [manager.py:391] +ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:179.7318458557129ms total_cost_time:179.75568771362305ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6434 prompt_cache_len:5151 prompt_cache_ratio:0.800590612371775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 +DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10452556610107422 s +INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10578370094299316 s +DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=184524012450491782012372307907848467272, time:1750766855.6294634s req_ids:[8] +DEBUG 06-24 20:07:35 [manager.py:391] +ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:184.51476097106934ms total_cost_time:184.54599380493164ms,out_token_counter:1 mean_per_token_cost_time: 0.031232833862304688ms prompt_token_num:6435 prompt_cache_len:5151 prompt_cache_ratio:0.8004662004662004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 +DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10402798652648926 s +INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10498666763305664 s +DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=70282385645472796513511399873863791238, time:1750766855.8179278s req_ids:[8] +DEBUG 06-24 20:07:35 [manager.py:391] +ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:165.47727584838867ms total_cost_time:165.4980182647705ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6436 prompt_cache_len:5151 prompt_cache_ratio:0.800341827221877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 +DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10463714599609375 s +INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10551190376281738 s +DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=42542426947358313940718517522071905179, time:1750766855.992846s req_ids:[8] +DEBUG 06-24 20:07:35 [manager.py:391] +DEBUG 06-24 20:07:35 [stats.py:37] Avg tokens(prompt+generate) throughput: 29490.393 tokens/s +DEBUG 06-24 20:07:35 [stats.py:37] Avg prompt tokens throughput: 29481.300 tokens/s +DEBUG 06-24 20:07:35 [stats.py:37] Avg generate tokens throughput: 9.092 tokens/s +ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:188.60864639282227ms total_cost_time:188.6749267578125ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:6437 prompt_cache_len:5151 prompt_cache_ratio:0.8002174926207861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 +DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.10462069511413574 s +INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.10640382766723633 s +DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=272904143396359929035142605338255024995, time:1750766856.1916833s req_ids:[8] +DEBUG 06-24 20:07:36 [manager.py:391] +ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:193.71747970581055ms total_cost_time:193.75348091125488ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:6438 prompt_cache_len:5151 prompt_cache_ratio:0.8000931966449207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 +DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.10660505294799805 s +INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.10811328887939453 s +DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=29542705308469339236021555113426164790, time:1750766856.3891482s req_ids:[8] +DEBUG 06-24 20:07:36 [manager.py:391] +ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:190.80066680908203ms total_cost_time:190.83285331726074ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:6439 prompt_cache_len:5151 prompt_cache_ratio:0.7999689392762851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 +DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.10590386390686035 s +INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.10761308670043945 s +DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=183847986241234897755007620097506754741, time:1750766856.58589s req_ids:[8] +DEBUG 06-24 20:07:36 [manager.py:391] +ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:194.08059120178223ms total_cost_time:194.1068172454834ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:6440 prompt_cache_len:5151 prompt_cache_ratio:0.7998447204968944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 +DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.30413079261779785 s +INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.30516481399536133 s +DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=243528582557082727624923433659228573093, time:1750766856.9848533s req_ids:[8] +DEBUG 06-24 20:07:36 [manager.py:391] +ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:396.390438079834ms total_cost_time:396.41523361206055ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:6441 prompt_cache_len:5151 prompt_cache_ratio:0.799720540288775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 +DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10393023490905762 s +INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10497665405273438 s +DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=194638898884734219030451220826304650726, time:1750766857.182197s req_ids:[8] +DEBUG 06-24 20:07:37 [manager.py:391] +ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:195.8754062652588ms total_cost_time:195.9228515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:6442 prompt_cache_len:5151 prompt_cache_ratio:0.7995963986339646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 +DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10329461097717285 s +INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10413932800292969 s +DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=340134401958962425068140661363234134989, time:1750766857.3802767s req_ids:[8] +DEBUG 06-24 20:07:37 [manager.py:391] +ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:194.23937797546387ms total_cost_time:194.26345825195312ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6443 prompt_cache_len:5151 prompt_cache_ratio:0.7994722955145118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 +DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10335493087768555 s +INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10423803329467773 s +DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=146273797675852474904534626028972506895, time:1750766857.5782025s req_ids:[8] +DEBUG 06-24 20:07:37 [manager.py:391] +ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:196.50530815124512ms total_cost_time:196.52938842773438ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6444 prompt_cache_len:5151 prompt_cache_ratio:0.7993482309124768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 +DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10355377197265625 s +INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10455203056335449 s +DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=302558271918179564273532316603594788709, time:1750766857.7772455s req_ids:[8] +DEBUG 06-24 20:07:37 [manager.py:391] +ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:195.89781761169434ms total_cost_time:195.91999053955078ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6445 prompt_cache_len:5151 prompt_cache_ratio:0.7992242048099302 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 +DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10390257835388184 s +INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10486221313476562 s +DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=270572504772751118693218582419165926723, time:1750766857.9739037s req_ids:[8] +DEBUG 06-24 20:07:37 [manager.py:391] +ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:192.3084259033203ms total_cost_time:192.3379898071289ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:6446 prompt_cache_len:5151 prompt_cache_ratio:0.7991002171889544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 +DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10349249839782715 s +INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10440373420715332 s +DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=116178694139485277705486577136024336778, time:1750766858.173152s req_ids:[8] +DEBUG 06-24 20:07:38 [manager.py:391] +ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:194.9012279510498ms total_cost_time:194.92363929748535ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6447 prompt_cache_len:5151 prompt_cache_ratio:0.7989762680316427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 +DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10451436042785645 s +INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10538744926452637 s +DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=194128265109180259560578825727958352792, time:1750766858.3682892s req_ids:[8] +DEBUG 06-24 20:07:38 [manager.py:391] +ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:193.57848167419434ms total_cost_time:193.60041618347168ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6448 prompt_cache_len:5151 prompt_cache_ratio:0.7988523573200993 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 +DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10422682762145996 s +INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10503220558166504 s +DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=51658120734796558279444531196498436430, time:1750766858.5658338s req_ids:[8] +DEBUG 06-24 20:07:38 [manager.py:391] +ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:193.02678108215332ms total_cost_time:193.06540489196777ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:6449 prompt_cache_len:5151 prompt_cache_ratio:0.7987284850364398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 +DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10414910316467285 s +INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10494208335876465 s +DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=315696345731366669095935905320573377262, time:1750766858.7622507s req_ids:[8] +DEBUG 06-24 20:07:38 [manager.py:391] +ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:194.54312324523926ms total_cost_time:194.5657730102539ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6450 prompt_cache_len:5151 prompt_cache_ratio:0.7986046511627907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 +DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10428786277770996 s +INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10522341728210449 s +DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=206908801593370767481508339372952323565, time:1750766858.9602518s req_ids:[8] +DEBUG 06-24 20:07:38 [manager.py:391] +ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:194.69785690307617ms total_cost_time:194.72074508666992ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6451 prompt_cache_len:5151 prompt_cache_ratio:0.7984808556812897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 +DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10310029983520508 s +INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10400819778442383 s +DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=132553718270098213108977223783422420076, time:1750766859.1570463s req_ids:[8] +DEBUG 06-24 20:07:39 [manager.py:391] +DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:195.36590576171875ms total_cost_time:195.3887939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6452 prompt_cache_len:5151 prompt_cache_ratio:0.7983570985740855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 +DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10332798957824707 s +INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10425090789794922 s +DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=58385016245443314743285601264492238964, time:1750766859.35493s req_ids:[8] +DEBUG 06-24 20:07:39 [manager.py:391] +ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:393.7079906463623ms total_cost_time:393.73016357421875ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6453 prompt_cache_len:5151 prompt_cache_ratio:0.798233379823338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 +DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10347723960876465 s +INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10437154769897461 s +DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=298196186531360028699079477309244576656, time:1750766859.75108s req_ids:[8] +DEBUG 06-24 20:07:39 [manager.py:391] +ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:195.9521770477295ms total_cost_time:195.97291946411133ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6454 prompt_cache_len:5151 prompt_cache_ratio:0.7981096994112179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 +DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10316324234008789 s +INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10407018661499023 s +DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=116113632928286299315228017006581560825, time:1750766859.948663s req_ids:[8] +DEBUG 06-24 20:07:39 [manager.py:391] +ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:185.45985221862793ms total_cost_time:185.4841709136963ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6455 prompt_cache_len:5151 prompt_cache_ratio:0.797986057319907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 +DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10398268699645996 s +INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.1051173210144043 s +DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=223798825897450242420293689125319637877, time:1750766860.12405s req_ids:[8] +DEBUG 06-24 20:07:40 [manager.py:391] +ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:194.43273544311523ms total_cost_time:194.45490837097168ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6456 prompt_cache_len:5151 prompt_cache_ratio:0.7978624535315985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 +DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10289788246154785 s +INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10374879837036133 s +DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=245930913687896846212758576001524123335, time:1750766860.3314426s req_ids:[8] +DEBUG 06-24 20:07:40 [manager.py:391] +ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:181.4565658569336ms total_cost_time:181.47540092468262ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6457 prompt_cache_len:5151 prompt_cache_ratio:0.7977388880284962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 +DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10250973701477051 s +INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10334658622741699 s +DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=300814501955891480708230787553195268902, time:1750766860.5038974s req_ids:[8] +DEBUG 06-24 20:07:40 [manager.py:391] +ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:171.5857982635498ms total_cost_time:171.60415649414062ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6458 prompt_cache_len:5151 prompt_cache_ratio:0.7976153607928151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 +DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10261392593383789 s +INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10353565216064453 s +DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=328856043597080550726110006887545014462, time:1750766860.677881s req_ids:[8] +DEBUG 06-24 20:07:40 [manager.py:391] +ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:172.2245216369629ms total_cost_time:172.24502563476562ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6459 prompt_cache_len:5151 prompt_cache_ratio:0.7974918718067813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 +DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10254526138305664 s +INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10358953475952148 s +DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=26918721920605767040580489940094063371, time:1750766860.8488936s req_ids:[8] +DEBUG 06-24 20:07:40 [manager.py:391] +ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:187.08419799804688ms total_cost_time:187.1044635772705ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6460 prompt_cache_len:5151 prompt_cache_ratio:0.7973684210526316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 +DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10273194313049316 s +INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10462546348571777 s +DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=57578275140190219088614946745857418471, time:1750766861.0205421s req_ids:[8] +DEBUG 06-24 20:07:41 [manager.py:391] +ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:147.7639675140381ms total_cost_time:147.78423309326172ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6461 prompt_cache_len:5151 prompt_cache_ratio:0.7972450085126142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 +DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.1022794246673584 s +INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.1030263900756836 s +DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=86719047522043758040005091706188635523, time:1750766861.1807175s req_ids:[8] +DEBUG 06-24 20:07:41 [manager.py:391] +ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:160.45236587524414ms total_cost_time:160.47120094299316ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6462 prompt_cache_len:5151 prompt_cache_ratio:0.797121634168988 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 +DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10258722305297852 s +INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10351133346557617 s +DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=181444785753583738642322945006367839524, time:1750766861.356838s req_ids:[8] +DEBUG 06-24 20:07:41 [manager.py:391] +ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:173.39158058166504ms total_cost_time:173.41089248657227ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6463 prompt_cache_len:5151 prompt_cache_ratio:0.7969982980040229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 +DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10244941711425781 s +INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.1033635139465332 s +DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=17893851396138027308882101329374453316, time:1750766861.5269644s req_ids:[8] +DEBUG 06-24 20:07:41 [manager.py:391] +ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:170.6695556640625ms total_cost_time:170.68934440612793ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6464 prompt_cache_len:5151 prompt_cache_ratio:0.796875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 +DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10333967208862305 s +INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10422086715698242 s +DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=135337036080186965582565406197571437930, time:1750766861.7009006s req_ids:[8] +DEBUG 06-24 20:07:41 [manager.py:391] +ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:169.06380653381348ms total_cost_time:169.0833568572998ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6465 prompt_cache_len:5151 prompt_cache_ratio:0.7967517401392111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 +DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10337305068969727 s +INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10422825813293457 s +DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=45995322978030122009249596550023693257, time:1750766861.8719761s req_ids:[8] +DEBUG 06-24 20:07:41 [manager.py:391] +ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:424.64423179626465ms total_cost_time:424.663782119751ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6466 prompt_cache_len:5151 prompt_cache_ratio:0.7966285184039592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 +DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.10356831550598145 s +INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.1044759750366211 s +DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=172093534301486729678058609197864293171, time:1750766862.2985606s req_ids:[8] +DEBUG 06-24 20:07:42 [manager.py:391] +ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:170.24946212768555ms total_cost_time:170.26972770690918ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6467 prompt_cache_len:5151 prompt_cache_ratio:0.7965053347765579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 +DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.1034841537475586 s +INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.1043236255645752 s +DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=242272056309317997160533368002445237385, time:1750766862.4699495s req_ids:[8] +DEBUG 06-24 20:07:42 [manager.py:391] +ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:169.60573196411133ms total_cost_time:169.62647438049316ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6468 prompt_cache_len:5151 prompt_cache_ratio:0.7963821892393321 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 +DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.10242438316345215 s +INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.10326004028320312 s +DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=301129148018629460522289228551652108236, time:1750766862.6481237s req_ids:[8] +DEBUG 06-24 20:07:42 [manager.py:391] +ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:175.79221725463867ms total_cost_time:175.8124828338623ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6469 prompt_cache_len:5151 prompt_cache_ratio:0.7962590817746175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 +DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.10325741767883301 s +INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.10408353805541992 s +DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=124810405975926903198085887044105251657, time:1750766862.820404s req_ids:[8] +DEBUG 06-24 20:07:42 [manager.py:391] +ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:170.84908485412598ms total_cost_time:170.8686351776123ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6470 prompt_cache_len:5151 prompt_cache_ratio:0.7961360123647604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 +DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.1024470329284668 s +INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.10325360298156738 s +DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=173101263434430583469388973091177795010, time:1750766862.9929597s req_ids:[8] +DEBUG 06-24 20:07:42 [manager.py:391] +DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:188.1082057952881ms total_cost_time:188.1265640258789ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6471 prompt_cache_len:5151 prompt_cache_ratio:0.7960129809921187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 +DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:43 [batch.py:51] router release req id 8 +INFO 06-24 20:07:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10419631004333496 s +INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10508179664611816 s +DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=21036456429622598966336122260429662214, time:1750766863.1802194s req_ids:[8] +DEBUG 06-24 20:07:43 [manager.py:391] +ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:157.57250785827637ms total_cost_time:157.61661529541016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6472 prompt_cache_len:5151 prompt_cache_ratio:0.7958899876390606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 +DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10724711418151855 s +INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10822868347167969 s +DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=330701111064747031129313013078277290890, time:1750766863.333606s req_ids:[8] +DEBUG 06-24 20:07:43 [manager.py:391] +ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:182.23023414611816ms total_cost_time:182.2524070739746ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6473 prompt_cache_len:5151 prompt_cache_ratio:0.7957670322879654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 +DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10329079627990723 s +INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10423421859741211 s +DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=206348574152710904833298126315754254957, time:1750766863.5302026s req_ids:[8] +DEBUG 06-24 20:07:43 [manager.py:391] +ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:198.17018508911133ms total_cost_time:198.18902015686035ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6474 prompt_cache_len:5151 prompt_cache_ratio:0.7956441149212233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 +DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10338377952575684 s +INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.1044011116027832 s +DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=14628350517036034029041546758611589131, time:1750766863.7297199s req_ids:[8] +DEBUG 06-24 20:07:43 [manager.py:391] +ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:201.98464393615723ms total_cost_time:202.00490951538086ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6475 prompt_cache_len:5151 prompt_cache_ratio:0.7955212355212355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 +DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10248184204101562 s +INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10329771041870117 s +DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=116438294479522528102917667725403806416, time:1750766863.9369898s req_ids:[8] +DEBUG 06-24 20:07:43 [manager.py:391] +ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:190.69361686706543ms total_cost_time:190.7174587249756ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6476 prompt_cache_len:5151 prompt_cache_ratio:0.7953983940704138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 +DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10366106033325195 s +INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.10451054573059082 s +DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=257773982139388132358875865290343669063, time:1750766864.1263807s req_ids:[8] +DEBUG 06-24 20:07:44 [manager.py:391] +ERROR 06-24 20:07:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:201.57980918884277ms total_cost_time:201.5979290008545ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6477 prompt_cache_len:5151 prompt_cache_ratio:0.7952755905511811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 +DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10302853584289551 s +INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.10385489463806152 s +DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=126242766787683742505483276964342121391, time:1750766864.333875s req_ids:[8] +DEBUG 06-24 20:07:44 [manager.py:391] +ERROR 06-24 20:07:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:202.41689682006836ms total_cost_time:202.44598388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:6478 prompt_cache_len:5151 prompt_cache_ratio:0.795152824945971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 +DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10467195510864258 s +INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.1056675910949707 s +DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=184181375275239015472833738009614651858, time:1750766864.5355773s req_ids:[8] +DEBUG 06-24 20:07:44 [manager.py:391] +ERROR 06-24 20:07:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:180.2206039428711ms total_cost_time:180.24349212646484ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6479 prompt_cache_len:5151 prompt_cache_ratio:0.795030097237228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 +DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10301971435546875 s +INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.10394024848937988 s +DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=224880872042154363729060713509458329734, time:1750766864.7275956s req_ids:[8] +DEBUG 06-24 20:07:44 [manager.py:391] +ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:474.4093418121338ms total_cost_time:474.4288921356201ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6480 prompt_cache_len:5151 prompt_cache_ratio:0.7949074074074074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 +DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10244035720825195 s +INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10339188575744629 s +DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=238614409872273377792341325968228639345, time:1750766865.1740808s req_ids:[8] +DEBUG 06-24 20:07:45 [manager.py:391] +ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:162.42098808288574ms total_cost_time:162.44029998779297ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6481 prompt_cache_len:5151 prompt_cache_ratio:0.7947847554389754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 +DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10355401039123535 s +INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10449981689453125 s +DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=193675608813001665297894471724137490471, time:1750766865.371104s req_ids:[8] +DEBUG 06-24 20:07:45 [manager.py:391] +ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:208.56308937072754ms total_cost_time:208.58311653137207ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6482 prompt_cache_len:5151 prompt_cache_ratio:0.7946621413144092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 +DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10247588157653809 s +INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.1033773422241211 s +DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=157937386824530053381217531711538744977, time:1750766865.5688033s req_ids:[8] +DEBUG 06-24 20:07:45 [manager.py:391] +ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:182.85226821899414ms total_cost_time:182.87229537963867ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6483 prompt_cache_len:5151 prompt_cache_ratio:0.7945395650161962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 +DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10266375541687012 s +INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10357666015625 s +DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=12752785488541668364279761820450439520, time:1750766865.765305s req_ids:[8] +DEBUG 06-24 20:07:45 [manager.py:391] +ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:194.76771354675293ms total_cost_time:194.78702545166016ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6484 prompt_cache_len:5151 prompt_cache_ratio:0.7944170265268353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 +DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.1042332649230957 s +INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10509419441223145 s +DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=325010508588490039617232469909551234557, time:1750766865.961748s req_ids:[8] +DEBUG 06-24 20:07:45 [manager.py:391] +ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:07:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 30945.143 tokens/s +DEBUG 06-24 20:07:46 [stats.py:37] Avg prompt tokens throughput: 30935.468 tokens/s +DEBUG 06-24 20:07:46 [stats.py:37] Avg generate tokens throughput: 9.675 tokens/s +INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:194.16165351867676ms total_cost_time:194.18621063232422ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6485 prompt_cache_len:5151 prompt_cache_ratio:0.7942945258288358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 +DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10251975059509277 s +INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10334253311157227 s +DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=210536128736576338302969145410798114958, time:1750766866.1284277s req_ids:[8] +DEBUG 06-24 20:07:46 [manager.py:391] +ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:166.29838943481445ms total_cost_time:166.31793975830078ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6486 prompt_cache_len:5151 prompt_cache_ratio:0.7941720629047179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 +DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10255765914916992 s +INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.1034550666809082 s +DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=84915782470430408052689888535711959985, time:1750766866.3256173s req_ids:[8] +DEBUG 06-24 20:07:46 [manager.py:391] +INFO 06-24 20:07:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:182.97195434570312ms total_cost_time:182.99293518066406ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6487 prompt_cache_len:5151 prompt_cache_ratio:0.7940496377370125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 +DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10295248031616211 s +INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10389399528503418 s +DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=267643638216845622132249038847759953193, time:1750766866.5030222s req_ids:[8] +DEBUG 06-24 20:07:46 [manager.py:391] +ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:186.02776527404785ms total_cost_time:186.05828285217285ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:6488 prompt_cache_len:5151 prompt_cache_ratio:0.7939272503082614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 +DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:46 [batch.py:51] router release req id 8 +INFO 06-24 20:07:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10259699821472168 s +INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10336923599243164 s +DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=239583653720130785327050135800804767963, time:1750766866.6853502s req_ids:[8] +DEBUG 06-24 20:07:46 [manager.py:391] +ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:217.8034782409668ms total_cost_time:217.82255172729492ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6489 prompt_cache_len:5151 prompt_cache_ratio:0.7938049006010172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 +DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.1036078929901123 s +INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10441040992736816 s +DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=198961092386080139032010520120304111370, time:1750766866.903222s req_ids:[8] +DEBUG 06-24 20:07:46 [manager.py:391] +ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:161.41867637634277ms total_cost_time:161.4377498626709ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6490 prompt_cache_len:5151 prompt_cache_ratio:0.7936825885978428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 +DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10264968872070312 s +INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.10354804992675781 s +DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=239179229814974888201377999842684913324, time:1750766867.0657022s req_ids:[8] +DEBUG 06-24 20:07:47 [manager.py:391] +ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:187.73150444030762ms total_cost_time:187.75033950805664ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6491 prompt_cache_len:5151 prompt_cache_ratio:0.7935603142813126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 +DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10272336006164551 s +INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.1035912036895752 s +DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=103078364069653881977574875583836388135, time:1750766867.2584398s req_ids:[8] +DEBUG 06-24 20:07:47 [manager.py:391] +ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:438.89904022216797ms total_cost_time:438.9188289642334ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6492 prompt_cache_len:5151 prompt_cache_ratio:0.7934380776340111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 +DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10373973846435547 s +INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.10455632209777832 s +DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=109159410370116480327486610713283133136, time:1750766867.6966515s req_ids:[8] +DEBUG 06-24 20:07:47 [manager.py:391] +ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:190.22774696350098ms total_cost_time:190.2477741241455ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6493 prompt_cache_len:5151 prompt_cache_ratio:0.7933158786385338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 +DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10334134101867676 s +INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.10419917106628418 s +DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=140166208106447383022192479348029819820, time:1750766867.8912528s req_ids:[8] +DEBUG 06-24 20:07:47 [manager.py:391] +ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:191.88570976257324ms total_cost_time:191.90478324890137ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6494 prompt_cache_len:5151 prompt_cache_ratio:0.7931937172774869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 +DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10387253761291504 s +INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10473036766052246 s +DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=132906829058250632681023425364877577196, time:1750766868.0871184s req_ids:[8] +DEBUG 06-24 20:07:48 [manager.py:391] +ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:206.07376098632812ms total_cost_time:206.09450340270996ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6495 prompt_cache_len:5151 prompt_cache_ratio:0.7930715935334873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 +DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10361838340759277 s +INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10440587997436523 s +DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=326812407101579893880070637079805300251, time:1750766868.2971044s req_ids:[8] +DEBUG 06-24 20:07:48 [manager.py:391] +ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:185.5919361114502ms total_cost_time:185.61148643493652ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6496 prompt_cache_len:5151 prompt_cache_ratio:0.7929495073891626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 +DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10248780250549316 s +INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10331225395202637 s +DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=270063229367083878588308833398558635598, time:1750766868.494132s req_ids:[8] +DEBUG 06-24 20:07:48 [manager.py:391] +ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:193.8483715057373ms total_cost_time:193.86863708496094ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6497 prompt_cache_len:5151 prompt_cache_ratio:0.792827458827151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 +DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10350632667541504 s +INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10425710678100586 s +DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=87368749600254114626615310403483453831, time:1750766868.6902633s req_ids:[8] +DEBUG 06-24 20:07:48 [manager.py:391] +ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:209.93399620056152ms total_cost_time:209.95378494262695ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6498 prompt_cache_len:5151 prompt_cache_ratio:0.7927054478301016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 +DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10268306732177734 s +INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.1034245491027832 s +DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=331275246263277867899907139286774510423, time:1750766868.8917224s req_ids:[8] +DEBUG 06-24 20:07:48 [manager.py:391] +ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:207.2732448577881ms total_cost_time:207.29398727416992ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6499 prompt_cache_len:5151 prompt_cache_ratio:0.792583474380674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 +DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.10332179069519043 s +INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10424184799194336 s +DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=147686522597013606763777386451352467469, time:1750766869.1008904s req_ids:[8] +DEBUG 06-24 20:07:49 [manager.py:391] +ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:174.53360557556152ms total_cost_time:174.55410957336426ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6500 prompt_cache_len:5151 prompt_cache_ratio:0.7924615384615384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 +DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.10369658470153809 s +INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10461044311523438 s +DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=197541847940923658611654503838558822155, time:1750766869.278513s req_ids:[8] +DEBUG 06-24 20:07:49 [manager.py:391] +ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:175.35758018493652ms total_cost_time:175.37760734558105ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6501 prompt_cache_len:5151 prompt_cache_ratio:0.792339640055376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 +DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.10256409645080566 s +INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10338973999023438 s +DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=132702706947485900854455702773939660389, time:1750766869.4572473s req_ids:[8] +DEBUG 06-24 20:07:49 [manager.py:391] +ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:433.2864284515381ms total_cost_time:433.31122398376465ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:6502 prompt_cache_len:5151 prompt_cache_ratio:0.7922177791448785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 +DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.1038060188293457 s +INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10461235046386719 s +DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=171165086247102797236517380674753479330, time:1750766869.8937516s req_ids:[8] +DEBUG 06-24 20:07:49 [manager.py:391] +ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:177.9007911682129ms total_cost_time:177.92105674743652ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6503 prompt_cache_len:5151 prompt_cache_ratio:0.7920959557127479 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 +DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.1025228500366211 s +INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10327839851379395 s +DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=196327177555609758556694261281867455577, time:1750766870.0731492s req_ids:[8] +DEBUG 06-24 20:07:50 [manager.py:391] +ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:174.6354103088379ms total_cost_time:174.6542453765869ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6504 prompt_cache_len:5151 prompt_cache_ratio:0.7919741697416974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 +DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10346078872680664 s +INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10420608520507812 s +DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=207416091501988028743947181853378527167, time:1750766870.2453249s req_ids:[8] +DEBUG 06-24 20:07:50 [manager.py:391] +ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:170.43542861938477ms total_cost_time:170.4549789428711ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6505 prompt_cache_len:5151 prompt_cache_ratio:0.7918524212144504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 +DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10283708572387695 s +INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.1035912036895752 s +DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=323945026505037003124992966061812271389, time:1750766870.4202967s req_ids:[8] +DEBUG 06-24 20:07:50 [manager.py:391] +ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:173.45452308654785ms total_cost_time:173.475980758667ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6506 prompt_cache_len:5151 prompt_cache_ratio:0.7917307101137412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 +DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10260987281799316 s +INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10342669486999512 s +DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=113942899061612323935628313099834531792, time:1750766870.5894356s req_ids:[8] +DEBUG 06-24 20:07:50 [manager.py:391] +ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:166.90754890441895ms total_cost_time:166.9290065765381ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6507 prompt_cache_len:5151 prompt_cache_ratio:0.7916090364223144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 +DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10246610641479492 s +INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10323548316955566 s +DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=274947130823457823363986664492025835745, time:1750766870.7652779s req_ids:[8] +DEBUG 06-24 20:07:50 [manager.py:391] +ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:176.1176586151123ms total_cost_time:176.13720893859863ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6508 prompt_cache_len:5151 prompt_cache_ratio:0.7914874001229256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 +DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10254955291748047 s +INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10338950157165527 s +DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=130058276645781014263339357133166153295, time:1750766870.9402163s req_ids:[8] +DEBUG 06-24 20:07:50 [manager.py:391] +DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:173.39444160461426ms total_cost_time:173.4147071838379ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6509 prompt_cache_len:5151 prompt_cache_ratio:0.7913658011983408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 +DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10358500480651855 s +INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10450530052185059 s +DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=5812738722704364118282413355817042452, time:1750766871.1172047s req_ids:[8] +DEBUG 06-24 20:07:51 [manager.py:391] +ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:175.1117706298828ms total_cost_time:175.13084411621094ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6510 prompt_cache_len:5151 prompt_cache_ratio:0.7912442396313364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 +DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10371518135070801 s +INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10462570190429688 s +DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=247592817930205042458930687386827020192, time:1750766871.2958677s req_ids:[8] +DEBUG 06-24 20:07:51 [manager.py:391] +ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:175.83870887756348ms total_cost_time:175.8594512939453ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6511 prompt_cache_len:5151 prompt_cache_ratio:0.7911227154046997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 +DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10241174697875977 s +INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10323238372802734 s +DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=304053334779035361387276040703562775692, time:1750766871.4723759s req_ids:[8] +DEBUG 06-24 20:07:51 [manager.py:391] +ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:174.79944229125977ms total_cost_time:174.8180389404297ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6512 prompt_cache_len:5151 prompt_cache_ratio:0.7910012285012284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 +DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10261321067810059 s +INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10345220565795898 s +DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=98817249459650791093243683381362720667, time:1750766871.6492512s req_ids:[8] +DEBUG 06-24 20:07:51 [manager.py:391] +ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:174.96109008789062ms total_cost_time:174.98064041137695ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6513 prompt_cache_len:5151 prompt_cache_ratio:0.790879778903731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 +DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.3042147159576416 s +INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.3050215244293213 s +DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=47834146034141635528381887216696462926, time:1750766872.0170374s req_ids:[8] +DEBUG 06-24 20:07:52 [manager.py:391] +ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:365.4131889343262ms total_cost_time:365.433931350708ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6514 prompt_cache_len:5151 prompt_cache_ratio:0.7907583665950261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 +DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10378766059875488 s +INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10460710525512695 s +DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=21575093312374063629381745541516707452, time:1750766872.192353s req_ids:[8] +DEBUG 06-24 20:07:52 [manager.py:391] +ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:174.21817779541016ms total_cost_time:174.23772811889648ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6515 prompt_cache_len:5151 prompt_cache_ratio:0.7906369915579432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 +DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10374283790588379 s +INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10457229614257812 s +DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=295222997128615938268548297679684524458, time:1750766872.371079s req_ids:[8] +DEBUG 06-24 20:07:52 [manager.py:391] +ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:175.92453956604004ms total_cost_time:175.94432830810547ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6516 prompt_cache_len:5151 prompt_cache_ratio:0.7905156537753223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 +DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10378265380859375 s +INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10456061363220215 s +DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=65618126885616188691528652477715923791, time:1750766872.5473342s req_ids:[8] +DEBUG 06-24 20:07:52 [manager.py:391] +ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:174.70502853393555ms total_cost_time:174.72434043884277ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6517 prompt_cache_len:5151 prompt_cache_ratio:0.7903943532300138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 +DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10382294654846191 s +INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10475444793701172 s +DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=252896642010716564800578357476524136997, time:1750766872.7297418s req_ids:[8] +DEBUG 06-24 20:07:52 [manager.py:391] +ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:179.66580390930176ms total_cost_time:179.68416213989258ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6518 prompt_cache_len:5151 prompt_cache_ratio:0.7902730899048788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 +DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10277605056762695 s +INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10357379913330078 s +DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=28642480268450966155805030762440984506, time:1750766872.9061286s req_ids:[8] +DEBUG 06-24 20:07:52 [manager.py:391] +ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:175.29749870300293ms total_cost_time:175.31728744506836ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6519 prompt_cache_len:5151 prompt_cache_ratio:0.7901518637827888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 +DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10249972343444824 s +INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10328888893127441 s +DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=304737385422966190443938002033288797537, time:1750766873.084035s req_ids:[8] +DEBUG 06-24 20:07:53 [manager.py:391] +ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:175.37569999694824ms total_cost_time:175.39548873901367ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6520 prompt_cache_len:5151 prompt_cache_ratio:0.7900306748466258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 +DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:53 [batch.py:51] router release req id 8 +INFO 06-24 20:07:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:07:53 [statics_utils.py:24] mean first cost: 236.52147444883605 ms +INFO 06-24 20:07:53 [statics_utils.py:24] mean per token cost: 0.11301781988396738 ms +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10278987884521484 s +INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10362768173217773 s +DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=158842327934775242871236920039881476087, time:1750766873.262754s req_ids:[8] +DEBUG 06-24 20:07:53 [manager.py:391] +ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:178.36284637451172ms total_cost_time:178.38263511657715ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6521 prompt_cache_len:5151 prompt_cache_ratio:0.7899095230792823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 +DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10351037979125977 s +INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.1043095588684082 s +DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=10357838863325675974813224689161317079, time:1750766873.440043s req_ids:[8] +DEBUG 06-24 20:07:53 [manager.py:391] +ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:173.9809513092041ms total_cost_time:174.00097846984863ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6522 prompt_cache_len:5151 prompt_cache_ratio:0.7897884084636615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 +DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10332727432250977 s +INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10410523414611816 s +DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=289848153505505827798820199662533696319, time:1750766873.6173978s req_ids:[8] +DEBUG 06-24 20:07:53 [manager.py:391] +ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:177.02484130859375ms total_cost_time:177.04439163208008ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6523 prompt_cache_len:5151 prompt_cache_ratio:0.7896673309826767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 +DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.1026606559753418 s +INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10347604751586914 s +DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=125892725516290441340947676297137038584, time:1750766873.7966766s req_ids:[8] +DEBUG 06-24 20:07:53 [manager.py:391] +ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:177.98089981079102ms total_cost_time:178.00235748291016ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6524 prompt_cache_len:5151 prompt_cache_ratio:0.789546290619252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 +DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10268115997314453 s +INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10345125198364258 s +DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=254529937322885649592467878415780614853, time:1750766873.9737563s req_ids:[8] +DEBUG 06-24 20:07:53 [manager.py:391] +ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:174.01123046875ms total_cost_time:174.03101921081543ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6525 prompt_cache_len:5151 prompt_cache_ratio:0.7894252873563219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 +DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10265874862670898 s +INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10345745086669922 s +DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=51015691918098510435944764786793492115, time:1750766874.1501138s req_ids:[8] +DEBUG 06-24 20:07:54 [manager.py:391] +ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:426.3758659362793ms total_cost_time:426.39780044555664ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6526 prompt_cache_len:5151 prompt_cache_ratio:0.7893043211768311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 +DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10339879989624023 s +INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10421299934387207 s +DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=141315925142610384200663510540177604937, time:1750766874.573598s req_ids:[8] +DEBUG 06-24 20:07:54 [manager.py:391] +ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:175.7214069366455ms total_cost_time:175.73952674865723ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6527 prompt_cache_len:5151 prompt_cache_ratio:0.7891833920637352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 +DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10258793830871582 s +INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10336422920227051 s +DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=100355875104822616395829803571393076726, time:1750766874.7503808s req_ids:[8] +DEBUG 06-24 20:07:54 [manager.py:391] +DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:155.55596351623535ms total_cost_time:155.57551383972168ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6528 prompt_cache_len:5151 prompt_cache_ratio:0.7890625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 +DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10266566276550293 s +INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10358262062072754 s +DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=285742318986483779281097104917762138299, time:1750766874.9060862s req_ids:[8] +DEBUG 06-24 20:07:54 [manager.py:391] +ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:164.43634033203125ms total_cost_time:164.46232795715332ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:6529 prompt_cache_len:5151 prompt_cache_ratio:0.7889416449686016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 +DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10251736640930176 s +INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10344433784484863 s +DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=43538634372651627202407946678715001201, time:1750766875.078549s req_ids:[8] +DEBUG 06-24 20:07:55 [manager.py:391] +ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:170.0155735015869ms total_cost_time:170.03583908081055ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6530 prompt_cache_len:5151 prompt_cache_ratio:0.7888208269525268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 +DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10277628898620605 s +INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10362124443054199 s +DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=271618805711854084481240478047996891232, time:1750766875.2505336s req_ids:[8] +DEBUG 06-24 20:07:55 [manager.py:391] +ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:169.84820365905762ms total_cost_time:169.86894607543945ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6531 prompt_cache_len:5151 prompt_cache_ratio:0.7887000459347726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 +DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10404205322265625 s +INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10500597953796387 s +DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=231315357401635360308444963447657535406, time:1750766875.4223404s req_ids:[8] +DEBUG 06-24 20:07:55 [manager.py:391] +ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:170.74847221374512ms total_cost_time:170.76849937438965ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6532 prompt_cache_len:5151 prompt_cache_ratio:0.7885793018983466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 +DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10335278511047363 s +INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10417366027832031 s +DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=147128586873853265288275953916815218265, time:1750766875.5946658s req_ids:[8] +DEBUG 06-24 20:07:55 [manager.py:391] +ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:170.28355598449707ms total_cost_time:170.3052520751953ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6533 prompt_cache_len:5151 prompt_cache_ratio:0.7884585948262667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 +DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10343480110168457 s +INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10420489311218262 s +DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=284428589149872889558063286235950643025, time:1750766875.7661471s req_ids:[8] +DEBUG 06-24 20:07:55 [manager.py:391] +ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:168.17426681518555ms total_cost_time:168.19357872009277ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6534 prompt_cache_len:5151 prompt_cache_ratio:0.788337924701561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 +DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.1026608943939209 s +INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10346555709838867 s +DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=63933040122762846626464688629048596383, time:1750766875.9372768s req_ids:[8] +DEBUG 06-24 20:07:55 [manager.py:391] +ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:169.93021965026855ms total_cost_time:169.94953155517578ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6535 prompt_cache_len:5151 prompt_cache_ratio:0.7882172915072686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 +DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10241246223449707 s +INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10318970680236816 s +DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=138547024478328743551876514470640210897, time:1750766876.110058s req_ids:[8] +DEBUG 06-24 20:07:56 [manager.py:391] +DEBUG 06-24 20:07:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 32916.094 tokens/s +DEBUG 06-24 20:07:56 [stats.py:37] Avg prompt tokens throughput: 32906.085 tokens/s +DEBUG 06-24 20:07:56 [stats.py:37] Avg generate tokens throughput: 10.009 tokens/s +ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:170.49741744995117ms total_cost_time:170.5174446105957ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6536 prompt_cache_len:5151 prompt_cache_ratio:0.7880966952264382 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 +DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10358166694641113 s +INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10453343391418457 s +DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=159648989565732161263113577589633625872, time:1750766876.2815194s req_ids:[8] +DEBUG 06-24 20:07:56 [manager.py:391] +ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:169.93021965026855ms total_cost_time:169.94857788085938ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6537 prompt_cache_len:5151 prompt_cache_ratio:0.7879761358421294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 +DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10334897041320801 s +INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10424160957336426 s +DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=289769732960280758261643545077930612867, time:1750766876.4532168s req_ids:[8] +DEBUG 06-24 20:07:56 [manager.py:391] +ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:169.04211044311523ms total_cost_time:169.06023025512695ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6538 prompt_cache_len:5151 prompt_cache_ratio:0.787855613337412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 +DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10336875915527344 s +INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10414791107177734 s +DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=211647016632928102464812513062562131563, time:1750766876.62437s req_ids:[8] +DEBUG 06-24 20:07:56 [manager.py:391] +ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:400.82526206970215ms total_cost_time:400.8443355560303ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6539 prompt_cache_len:5151 prompt_cache_ratio:0.7877351276953662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 +DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10285353660583496 s +INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10371613502502441 s +DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=288441894007253336103581840206777446839, time:1750766877.0275624s req_ids:[8] +DEBUG 06-24 20:07:57 [manager.py:391] +ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:172.44482040405273ms total_cost_time:172.46675491333008ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6540 prompt_cache_len:5151 prompt_cache_ratio:0.7876146788990825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 +DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:57 [batch.py:51] router release req id 8 +INFO 06-24 20:07:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10265493392944336 s +INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10341644287109375 s +DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=194596496276751477478281780720891319031, time:1750766877.202853s req_ids:[8] +DEBUG 06-24 20:07:57 [manager.py:391] +ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:171.68617248535156ms total_cost_time:171.7054843902588ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6541 prompt_cache_len:5151 prompt_cache_ratio:0.7874942669316618 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 +DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10521960258483887 s +INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10711956024169922 s +DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=164828262874223036048735912254273320823, time:1750766877.3745232s req_ids:[8] +DEBUG 06-24 20:07:57 [manager.py:391] +ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:184.9040985107422ms total_cost_time:184.92412567138672ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6542 prompt_cache_len:5151 prompt_cache_ratio:0.7873738917762152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 +DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.104736328125 s +INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10657811164855957 s +DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=137674331790217986982697619566593818697, time:1750766877.561502s req_ids:[8] +DEBUG 06-24 20:07:57 [manager.py:391] +ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:191.82157516479492ms total_cost_time:191.8647289276123ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6543 prompt_cache_len:5151 prompt_cache_ratio:0.7872535534158643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 +DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10742020606994629 s +INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10930013656616211 s +DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=293662292481314682657824029098564819856, time:1750766877.7555928s req_ids:[8] +DEBUG 06-24 20:07:57 [manager.py:391] +ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:194.95129585266113ms total_cost_time:194.99611854553223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6544 prompt_cache_len:5151 prompt_cache_ratio:0.7871332518337408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 +DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.107940673828125 s +INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098635196685791 s +DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=114934815720830893613429674677034081803, time:1750766877.958956s req_ids:[8] +DEBUG 06-24 20:07:57 [manager.py:391] +ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:204.4389247894287ms total_cost_time:204.4832706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6545 prompt_cache_len:5151 prompt_cache_ratio:0.787012987012987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 +DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.1081538200378418 s +INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.11033940315246582 s +DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=158581515700955884791283099870840440588, time:1750766878.1624103s req_ids:[8] +DEBUG 06-24 20:07:58 [manager.py:391] +ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:203.92942428588867ms total_cost_time:203.97210121154785ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6546 prompt_cache_len:5151 prompt_cache_ratio:0.7868927589367553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 +DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.1072695255279541 s +INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.10922908782958984 s +DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=151508599920298688002298122847873345598, time:1750766878.377107s req_ids:[8] +DEBUG 06-24 20:07:58 [manager.py:391] +ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:211.0598087310791ms total_cost_time:211.11059188842773ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:6547 prompt_cache_len:5151 prompt_cache_ratio:0.7867725675882083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 +DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.10934662818908691 s +INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.11128926277160645 s +DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=166146776672678505851302311666809791106, time:1750766878.5869389s req_ids:[8] +DEBUG 06-24 20:07:58 [manager.py:391] +ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:203.4778594970703ms total_cost_time:203.521728515625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6548 prompt_cache_len:5151 prompt_cache_ratio:0.7866524129505192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 +DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.10992240905761719 s +INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.11178469657897949 s +DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=134472441908852779603807620094580836635, time:1750766878.803043s req_ids:[8] +DEBUG 06-24 20:07:58 [manager.py:391] +ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:217.35215187072754ms total_cost_time:217.40388870239258ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:6549 prompt_cache_len:5151 prompt_cache_ratio:0.7865322950068713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 +DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.10872578620910645 s +INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s +DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=302152688899111280828711351932827939935, time:1750766879.0242462s req_ids:[8] +DEBUG 06-24 20:07:59 [manager.py:391] +ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:205.2772045135498ms total_cost_time:205.3220272064209ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6550 prompt_cache_len:5151 prompt_cache_ratio:0.786412213740458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 +DEBUG 06-24 20:07:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.10761308670043945 s +INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.10943222045898438 s +DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=157115331287803616465708494985069641858, time:1750766879.2332423s req_ids:[8] +DEBUG 06-24 20:07:59 [manager.py:391] +ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:206.0844898223877ms total_cost_time:206.11119270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:6551 prompt_cache_len:5151 prompt_cache_ratio:0.7862921691344833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 +DEBUG 06-24 20:07:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.1066441535949707 s +INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.1088716983795166 s +DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=284172145492665634682701765775469285327, time:1750766879.4446352s req_ids:[8] +DEBUG 06-24 20:07:59 [manager.py:391] +ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:217.85783767700195ms total_cost_time:217.90146827697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6552 prompt_cache_len:5151 prompt_cache_ratio:0.7861721611721612 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 +DEBUG 06-24 20:07:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:07:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:07:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:07:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:07:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.10570859909057617 s +INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.10671186447143555 s +DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=90993685465906767626893706212207034830, time:1750766879.6824048s req_ids:[8] +DEBUG 06-24 20:07:59 [manager.py:391] +DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:433.7937831878662ms total_cost_time:433.81333351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6553 prompt_cache_len:5151 prompt_cache_ratio:0.786052189836716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 +DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10309886932373047 s +INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10398721694946289 s +DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=149886257626241376539418311650901892603, time:1750766880.1080983s req_ids:[8] +DEBUG 06-24 20:08:00 [manager.py:391] +ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:212.97597885131836ms total_cost_time:212.9983901977539ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6554 prompt_cache_len:5151 prompt_cache_ratio:0.7859322551113823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 +DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10407137870788574 s +INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10485458374023438 s +DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=256024165726407114040412341183948101373, time:1750766880.3303475s req_ids:[8] +DEBUG 06-24 20:08:00 [manager.py:391] +ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:219.4368839263916ms total_cost_time:219.45929527282715ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6555 prompt_cache_len:5151 prompt_cache_ratio:0.785812356979405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 +DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.1039581298828125 s +INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10474157333374023 s +DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=300762525322920996386191485192252530492, time:1750766880.5457523s req_ids:[8] +DEBUG 06-24 20:08:00 [manager.py:391] +ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:217.1928882598877ms total_cost_time:217.21434593200684ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6556 prompt_cache_len:5151 prompt_cache_ratio:0.7856924954240391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 +DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10271978378295898 s +INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10364174842834473 s +DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=227318149890494559415593650287784392178, time:1750766880.7664857s req_ids:[8] +DEBUG 06-24 20:08:00 [manager.py:391] +ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:214.768648147583ms total_cost_time:214.80846405029297ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:6557 prompt_cache_len:5151 prompt_cache_ratio:0.7855726704285496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 +DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10338187217712402 s +INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10436272621154785 s +DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=244868837653894440989355634197952921510, time:1750766880.9743795s req_ids:[8] +DEBUG 06-24 20:08:00 [manager.py:391] +ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:181.32376670837402ms total_cost_time:181.3504695892334ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:6558 prompt_cache_len:5151 prompt_cache_ratio:0.7854528819762122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 +DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10641312599182129 s +INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10723352432250977 s +DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=32215411473256686390517951858213228889, time:1750766881.1715913s req_ids:[8] +DEBUG 06-24 20:08:01 [manager.py:391] +ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:195.09363174438477ms total_cost_time:195.1162815093994ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6559 prompt_cache_len:5151 prompt_cache_ratio:0.7853331300503126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 +DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10326266288757324 s +INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10404109954833984 s +DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=103309180369724906960659546231745570996, time:1750766881.3687525s req_ids:[8] +DEBUG 06-24 20:08:01 [manager.py:391] +ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:195.32155990600586ms total_cost_time:195.3451633453369ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6560 prompt_cache_len:5151 prompt_cache_ratio:0.7852134146341463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 +DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10395693778991699 s +INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10489392280578613 s +DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=93991114498658739753755441224830960712, time:1750766881.5680447s req_ids:[8] +DEBUG 06-24 20:08:01 [manager.py:391] +ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:197.13950157165527ms total_cost_time:197.16644287109375ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6561 prompt_cache_len:5151 prompt_cache_ratio:0.7850937357110197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 +DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10436201095581055 s +INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10525846481323242 s +DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=269220682656883608215008651585342217569, time:1750766881.766165s req_ids:[8] +DEBUG 06-24 20:08:01 [manager.py:391] +ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:196.3801383972168ms total_cost_time:196.40064239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6562 prompt_cache_len:5151 prompt_cache_ratio:0.7849740932642487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 +DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10332465171813965 s +INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10425758361816406 s +DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=27057745429025615999948500940191507240, time:1750766881.9659078s req_ids:[8] +DEBUG 06-24 20:08:01 [manager.py:391] +ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:196.31361961364746ms total_cost_time:196.3369846343994ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6563 prompt_cache_len:5151 prompt_cache_ratio:0.7848544872771598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 +DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.10347414016723633 s +INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.1043548583984375 s +DEBUG 06-24 20:08:02 [manager.py:391] Prefill Batch: batch_id=296527693582730257078243109686320094362, time:1750766882.162575s req_ids:[8] +DEBUG 06-24 20:08:02 [manager.py:391] +ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:195.57642936706543ms total_cost_time:195.59955596923828ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6564 prompt_cache_len:5151 prompt_cache_ratio:0.7847349177330896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 +DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.10339570045471191 s +INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.1040952205657959 s +DEBUG 06-24 20:08:02 [manager.py:391] Prefill Batch: batch_id=33483788748351140146589963042480571007, time:1750766882.3294718s req_ids:[8] +DEBUG 06-24 20:08:02 [manager.py:391] +ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:447.11947441101074ms total_cost_time:447.1430778503418ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6565 prompt_cache_len:5151 prompt_cache_ratio:0.7846153846153846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 +DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.10449981689453125 s +INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.10548734664916992 s +DEBUG 06-24 20:08:02 [manager.py:391] Prefill Batch: batch_id=304989367871509899525266204744141931004, time:1750766882.813156s req_ids:[8] +DEBUG 06-24 20:08:02 [manager.py:391] +ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:197.88312911987305ms total_cost_time:197.9050636291504ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6566 prompt_cache_len:5151 prompt_cache_ratio:0.7844958879074018 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 +DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.1044623851776123 s +INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.10537910461425781 s +DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=115118470218929170677551198842567753359, time:1750766883.0132272s req_ids:[8] +DEBUG 06-24 20:08:03 [manager.py:391] +ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:200.0129222869873ms total_cost_time:200.03437995910645ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6567 prompt_cache_len:5151 prompt_cache_ratio:0.784376427592508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 +DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.10448288917541504 s +INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10543084144592285 s +DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=230181831405509754905413801276211858985, time:1750766883.2155313s req_ids:[8] +DEBUG 06-24 20:08:03 [manager.py:391] +ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:198.44794273376465ms total_cost_time:198.4696388244629ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6568 prompt_cache_len:5151 prompt_cache_ratio:0.7842570036540804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 +DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.1034080982208252 s +DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=33134982648399779774370338875152717013, time:1750766883.3820357s req_ids:[8] +DEBUG 06-24 20:08:03 [manager.py:391] +INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10437512397766113 s +ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:164.88099098205566ms total_cost_time:164.90530967712402ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6569 prompt_cache_len:5151 prompt_cache_ratio:0.7841376160755061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 +DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.1043388843536377 s +INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10526895523071289 s +DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=167901434297497913687850021988397450348, time:1750766883.5831628s req_ids:[8] +DEBUG 06-24 20:08:03 [manager.py:391] +ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:198.5776424407959ms total_cost_time:198.60005378723145ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6570 prompt_cache_len:5151 prompt_cache_ratio:0.7840182648401827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 +DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.10458755493164062 s +INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10553836822509766 s +DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=94136648362729192975334448357501526764, time:1750766883.7845218s req_ids:[8] +DEBUG 06-24 20:08:03 [manager.py:391] +ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:200.33025741577148ms total_cost_time:200.35243034362793ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6571 prompt_cache_len:5151 prompt_cache_ratio:0.7838989499315173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 +DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.10350799560546875 s +INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10448670387268066 s +DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=289307394101204197849394223303276118495, time:1750766883.9560091s req_ids:[8] +DEBUG 06-24 20:08:03 [manager.py:391] +ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:167.5553321838379ms total_cost_time:167.586088180542ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:6572 prompt_cache_len:5151 prompt_cache_ratio:0.7837796713329276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 +DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.1033778190612793 s +INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10430407524108887 s +DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=201323526094499140131704555544006882246, time:1750766884.1225228s req_ids:[8] +DEBUG 06-24 20:08:04 [manager.py:391] +ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:165.7862663269043ms total_cost_time:165.80939292907715ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6573 prompt_cache_len:5151 prompt_cache_ratio:0.7836604290278412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 +DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.1036076545715332 s +DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=98671405032938867657604737324888287387, time:1750766884.2903838s req_ids:[8] +DEBUG 06-24 20:08:04 [manager.py:391] +INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10445094108581543 s +ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:164.2436981201172ms total_cost_time:164.26610946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6574 prompt_cache_len:5151 prompt_cache_ratio:0.7835412229996958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 +DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.10348010063171387 s +INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10436701774597168 s +DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=119329707580809012539896616823420033557, time:1750766884.4906654s req_ids:[8] +DEBUG 06-24 20:08:04 [manager.py:391] +ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:198.41957092285156ms total_cost_time:198.4424591064453ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6575 prompt_cache_len:5151 prompt_cache_ratio:0.7834220532319391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 +DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.10427093505859375 s +INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10523080825805664 s +DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=11017830086969121781467561119364908084, time:1750766884.6910179s req_ids:[8] +DEBUG 06-24 20:08:04 [manager.py:391] +ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:402.57811546325684ms total_cost_time:402.6014804840088ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6576 prompt_cache_len:5151 prompt_cache_ratio:0.7833029197080292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 +DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10341358184814453 s +INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10424661636352539 s +DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=150539983563989196837051547827601227710, time:1750766885.063207s req_ids:[8] +DEBUG 06-24 20:08:05 [manager.py:391] +ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:164.44969177246094ms total_cost_time:164.47162628173828ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6577 prompt_cache_len:5151 prompt_cache_ratio:0.7831838224114338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 +DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10350227355957031 s +DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=167570025489540151050218267461662009768, time:1750766885.229989s req_ids:[8] +DEBUG 06-24 20:08:05 [manager.py:391] +INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10447192192077637 s +ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:163.86079788208008ms total_cost_time:163.88320922851562ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6578 prompt_cache_len:5151 prompt_cache_ratio:0.7830647613256309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 +DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10347270965576172 s +INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10427165031433105 s +DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=317389812520166892422619678154494894804, time:1750766885.4291837s req_ids:[8] +DEBUG 06-24 20:08:05 [manager.py:391] +ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:198.08626174926758ms total_cost_time:198.10891151428223ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6579 prompt_cache_len:5151 prompt_cache_ratio:0.7829457364341085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 +DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10291624069213867 s +INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10373187065124512 s +DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=296462087924179389608161661769965199444, time:1750766885.6216543s req_ids:[8] +DEBUG 06-24 20:08:05 [manager.py:391] +ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:178.99513244628906ms total_cost_time:179.0158748626709ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6580 prompt_cache_len:5151 prompt_cache_ratio:0.7828267477203648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 +DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.1031029224395752 s +INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.1039421558380127 s +DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=60413574130035240158253699094597767795, time:1750766885.8008034s req_ids:[8] +DEBUG 06-24 20:08:05 [manager.py:391] +ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:177.2780418395996ms total_cost_time:177.30069160461426ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6581 prompt_cache_len:5151 prompt_cache_ratio:0.7827077951679076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 +DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10329365730285645 s +INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.1040806770324707 s +DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=66276631188760833674115637294585181631, time:1750766885.9826605s req_ids:[8] +DEBUG 06-24 20:08:05 [manager.py:391] +ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:180.9689998626709ms total_cost_time:180.99284172058105ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6582 prompt_cache_len:5151 prompt_cache_ratio:0.7825888787602553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 +DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.10315942764282227 s +INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10401463508605957 s +DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=139215653085053419297325284996226614444, time:1750766886.1635964s req_ids:[8] +DEBUG 06-24 20:08:06 [manager.py:391] +DEBUG 06-24 20:08:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 30677.094 tokens/s +DEBUG 06-24 20:08:06 [stats.py:37] Avg prompt tokens throughput: 30667.744 tokens/s +DEBUG 06-24 20:08:06 [stats.py:37] Avg generate tokens throughput: 9.350 tokens/s +ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:176.93519592285156ms total_cost_time:176.9545078277588ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6583 prompt_cache_len:5151 prompt_cache_ratio:0.7824699984809358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 +DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.10346245765686035 s +INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10430788993835449 s +DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=297115931424411766673074528789680894868, time:1750766886.342778s req_ids:[8] +DEBUG 06-24 20:08:06 [manager.py:391] +ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:178.06482315063477ms total_cost_time:178.08938026428223ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6584 prompt_cache_len:5151 prompt_cache_ratio:0.7823511543134872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 +DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.10317134857177734 s +INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10408687591552734 s +DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=209692352950660815403228627761921158643, time:1750766886.5237727s req_ids:[8] +DEBUG 06-24 20:08:06 [manager.py:391] +ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:177.7477264404297ms total_cost_time:177.77037620544434ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6585 prompt_cache_len:5151 prompt_cache_ratio:0.7822323462414579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 +DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.1035304069519043 s +INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10434651374816895 s +DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=289609579578588072205425068740855085820, time:1750766886.704047s req_ids:[8] +DEBUG 06-24 20:08:06 [manager.py:391] +DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:179.27956581115723ms total_cost_time:179.29983139038086ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6586 prompt_cache_len:5151 prompt_cache_ratio:0.7821135742484057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 +DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.1035468578338623 s +INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10448288917541504 s +DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=293129712444990751025023324333042776492, time:1750766886.8852599s req_ids:[8] +DEBUG 06-24 20:08:06 [manager.py:391] +ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:445.36304473876953ms total_cost_time:445.3871250152588ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6587 prompt_cache_len:5151 prompt_cache_ratio:0.7819948383178988 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 +DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.10301995277404785 s +INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.1038365364074707 s +DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=4495600393670880256734311935803767945, time:1750766887.3331206s req_ids:[8] +DEBUG 06-24 20:08:07 [manager.py:391] +ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:179.77285385131836ms total_cost_time:179.793119430542ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6588 prompt_cache_len:5151 prompt_cache_ratio:0.7818761384335154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 +DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.10347890853881836 s +INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.10431361198425293 s +DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=166688360723241871799307283720319327218, time:1750766887.5145986s req_ids:[8] +DEBUG 06-24 20:08:07 [manager.py:391] +ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:180.05776405334473ms total_cost_time:180.07683753967285ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6589 prompt_cache_len:5151 prompt_cache_ratio:0.7817574745788435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 +DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.1026756763458252 s +INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.10354232788085938 s +INFO 06-24 20:08:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=246253059507484621452892384083505044897, time:1750766887.696044s req_ids:[8] +DEBUG 06-24 20:08:07 [manager.py:391] +ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:177.9465675354004ms total_cost_time:177.96611785888672ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6590 prompt_cache_len:5151 prompt_cache_ratio:0.7816388467374811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 +DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.10301518440246582 s +INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.10375452041625977 s +DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=111977189218298297154917991367338786883, time:1750766887.875277s req_ids:[8] +DEBUG 06-24 20:08:07 [manager.py:391] +ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:177.95920372009277ms total_cost_time:177.9789924621582ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6591 prompt_cache_len:5151 prompt_cache_ratio:0.781520254893036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 +DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10365486145019531 s +INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10451006889343262 s +DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=255903059030314638768994547491062777866, time:1750766888.0569398s req_ids:[8] +DEBUG 06-24 20:08:08 [manager.py:391] +ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:178.3130168914795ms total_cost_time:178.33304405212402ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6592 prompt_cache_len:5151 prompt_cache_ratio:0.7814016990291263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 +DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10294103622436523 s +INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10374617576599121 s +DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=196964873592574270824987986178047185210, time:1750766888.2355106s req_ids:[8] +DEBUG 06-24 20:08:08 [manager.py:391] +ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:177.72698402404785ms total_cost_time:177.7479648590088ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6593 prompt_cache_len:5151 prompt_cache_ratio:0.7812831791293796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 +DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10266518592834473 s +INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10341143608093262 s +DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=241875484116784925527753277529395867868, time:1750766888.4160109s req_ids:[8] +DEBUG 06-24 20:08:08 [manager.py:391] +ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:178.78985404968262ms total_cost_time:178.80964279174805ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6594 prompt_cache_len:5151 prompt_cache_ratio:0.781164695177434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 +DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10264062881469727 s +INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.1034703254699707 s +DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=118916997724574188583551769676313403706, time:1750766888.5966334s req_ids:[8] +DEBUG 06-24 20:08:08 [manager.py:391] +ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:178.06482315063477ms total_cost_time:178.08914184570312ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6595 prompt_cache_len:5151 prompt_cache_ratio:0.781046247156937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 +DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10382437705993652 s +INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10496211051940918 s +DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=258364177596127837824218742533220279316, time:1750766888.7769299s req_ids:[8] +DEBUG 06-24 20:08:08 [manager.py:391] +ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:179.78310585021973ms total_cost_time:179.80504035949707ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6596 prompt_cache_len:5151 prompt_cache_ratio:0.7809278350515464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 +DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10369372367858887 s +INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10471844673156738 s +DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=151555169824861727898014115286026299808, time:1750766888.9572575s req_ids:[8] +DEBUG 06-24 20:08:08 [manager.py:391] +ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:176.50532722473145ms total_cost_time:176.52416229248047ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6597 prompt_cache_len:5151 prompt_cache_ratio:0.7808094588449295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 +DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10276484489440918 s +INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10384726524353027 s +DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=162944470859520650962697595420898744012, time:1750766889.1364498s req_ids:[8] +DEBUG 06-24 20:08:09 [manager.py:391] +ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:177.76846885681152ms total_cost_time:177.78825759887695ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6598 prompt_cache_len:5151 prompt_cache_ratio:0.7806911185207639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 +DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10268974304199219 s +INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10345578193664551 s +DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=74048051138518053639834027369433209462, time:1750766889.3190787s req_ids:[8] +DEBUG 06-24 20:08:09 [manager.py:391] +ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:376.4479160308838ms total_cost_time:376.4667510986328ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6599 prompt_cache_len:5151 prompt_cache_ratio:0.7805728140627368 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 +DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10375857353210449 s +INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10451221466064453 s +DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=14188385992404870168531881843669727848, time:1750766889.6986046s req_ids:[8] +DEBUG 06-24 20:08:09 [manager.py:391] +ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:181.2765598297119ms total_cost_time:181.29587173461914ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6600 prompt_cache_len:5151 prompt_cache_ratio:0.7804545454545454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 +DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10363626480102539 s +INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10441303253173828 s +DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=67879041528280267818434537724890625298, time:1750766889.8798192s req_ids:[8] +DEBUG 06-24 20:08:09 [manager.py:391] +ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:180.03535270690918ms total_cost_time:180.05776405334473ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6601 prompt_cache_len:5151 prompt_cache_ratio:0.780336312679897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 +DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10311365127563477 s +INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.1040492057800293 s +DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=131685317018766945159821954781761140121, time:1750766890.0603228s req_ids:[8] +DEBUG 06-24 20:08:10 [manager.py:391] +ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:177.68502235412598ms total_cost_time:177.70767211914062ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6602 prompt_cache_len:5151 prompt_cache_ratio:0.7802181157225083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 +DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10274410247802734 s +INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10351276397705078 s +DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=256644791612131770199390578521525171997, time:1750766890.2396007s req_ids:[8] +DEBUG 06-24 20:08:10 [manager.py:391] +ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:177.46424674987793ms total_cost_time:177.48689651489258ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6603 prompt_cache_len:5151 prompt_cache_ratio:0.7800999545661064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 +DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10282659530639648 s +INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10361433029174805 s +DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=216266420918941880875924200015291850098, time:1750766890.4202123s req_ids:[8] +DEBUG 06-24 20:08:10 [manager.py:391] +ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:177.40201950073242ms total_cost_time:177.42204666137695ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6604 prompt_cache_len:5151 prompt_cache_ratio:0.7799818291944276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 +DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10255980491638184 s +INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10336518287658691 s +DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=153538677785110043256186593347632316745, time:1750766890.6002853s req_ids:[8] +DEBUG 06-24 20:08:10 [manager.py:391] +ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:176.58686637878418ms total_cost_time:176.6073703765869ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6605 prompt_cache_len:5151 prompt_cache_ratio:0.7798637395912188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 +DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10318136215209961 s +INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10396027565002441 s +DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=280835678200776518496273460347817462003, time:1750766890.7757876s req_ids:[8] +DEBUG 06-24 20:08:10 [manager.py:391] +ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:173.13742637634277ms total_cost_time:173.1586456298828ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:6606 prompt_cache_len:5151 prompt_cache_ratio:0.7797456857402362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 +DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10281610488891602 s +INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10387015342712402 s +DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=21220642554357495732906291539070260379, time:1750766890.949562s req_ids:[8] +DEBUG 06-24 20:08:10 [manager.py:391] +ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:171.5095043182373ms total_cost_time:171.52953147888184ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6607 prompt_cache_len:5151 prompt_cache_ratio:0.7796276676252459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 +DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10353755950927734 s +INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10467290878295898 s +DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=36827867700163695493528678262358920762, time:1750766891.1236122s req_ids:[8] +DEBUG 06-24 20:08:11 [manager.py:391] +ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:173.53153228759766ms total_cost_time:173.5513210296631ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6608 prompt_cache_len:5151 prompt_cache_ratio:0.7795096852300242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 +DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10258340835571289 s +INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10365843772888184 s +DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=124606570890185929232611631494251867940, time:1750766891.298665s req_ids:[8] +DEBUG 06-24 20:08:11 [manager.py:391] +ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:171.70190811157227ms total_cost_time:171.7216968536377ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6609 prompt_cache_len:5151 prompt_cache_ratio:0.7793917385383567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 +DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10259151458740234 s +INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10367035865783691 s +DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=232945961897203821972240630802946431598, time:1750766891.4733114s req_ids:[8] +DEBUG 06-24 20:08:11 [manager.py:391] +ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:395.4133987426758ms total_cost_time:395.4334259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6610 prompt_cache_len:5151 prompt_cache_ratio:0.7792738275340393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 +DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10354828834533691 s +INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10467338562011719 s +DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=53101368892789926988651189164262826618, time:1750766891.8700006s req_ids:[8] +DEBUG 06-24 20:08:11 [manager.py:391] +DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:171.41366004943848ms total_cost_time:171.4344024658203ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6611 prompt_cache_len:5151 prompt_cache_ratio:0.7791559522008773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 +DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10256123542785645 s +INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10346746444702148 s +DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=302174623266224902665345525997973858971, time:1750766892.0436416s req_ids:[8] +DEBUG 06-24 20:08:12 [manager.py:391] +ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:171.86212539672852ms total_cost_time:171.88453674316406ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6612 prompt_cache_len:5151 prompt_cache_ratio:0.7790381125226861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 +DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10287785530090332 s +INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10374259948730469 s +DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=3032045629400578820823193840322397544, time:1750766892.2170122s req_ids:[8] +DEBUG 06-24 20:08:12 [manager.py:391] +ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:170.67360877990723ms total_cost_time:170.69530487060547ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6613 prompt_cache_len:5151 prompt_cache_ratio:0.7789203084832905 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 +DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.1036539077758789 s +INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10462379455566406 s +DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=279082285981614683865750171668569640259, time:1750766892.3901708s req_ids:[8] +DEBUG 06-24 20:08:12 [manager.py:391] +ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:170.93181610107422ms total_cost_time:170.9582805633545ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:6614 prompt_cache_len:5151 prompt_cache_ratio:0.7788025400665256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 +DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.1042630672454834 s +INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10511112213134766 s +DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=261742954679300659917031500397173604981, time:1750766892.5636303s req_ids:[8] +DEBUG 06-24 20:08:12 [manager.py:391] +ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:174.6382713317871ms total_cost_time:174.66282844543457ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6615 prompt_cache_len:5151 prompt_cache_ratio:0.7786848072562358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 +DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10440301895141602 s +INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10520243644714355 s +DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=52609022134127389406442247711739875126, time:1750766892.7406607s req_ids:[8] +DEBUG 06-24 20:08:12 [manager.py:391] +ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:171.431303024292ms total_cost_time:171.45705223083496ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:6616 prompt_cache_len:5151 prompt_cache_ratio:0.7785671100362757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 +DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10311579704284668 s +INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.1040494441986084 s +DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=235041608358570639903522591436355314691, time:1750766892.9145389s req_ids:[8] +DEBUG 06-24 20:08:12 [manager.py:391] +ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:172.09434509277344ms total_cost_time:172.1212863922119ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6617 prompt_cache_len:5151 prompt_cache_ratio:0.7784494483905093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 +DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10426878929138184 s +INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10519886016845703 s +DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=194773498160783962394009866550121854296, time:1750766893.0884979s req_ids:[8] +DEBUG 06-24 20:08:13 [manager.py:391] +ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:170.78471183776855ms total_cost_time:170.8052158355713ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6618 prompt_cache_len:5151 prompt_cache_ratio:0.7783318223028105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 +DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10252165794372559 s +INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10339879989624023 s +DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=68391446853682902637113617250917353863, time:1750766893.261682s req_ids:[8] +DEBUG 06-24 20:08:13 [manager.py:391] +ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:171.83279991149902ms total_cost_time:171.85688018798828ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6619 prompt_cache_len:5151 prompt_cache_ratio:0.778214231757063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 +DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10317540168762207 s +INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10394597053527832 s +DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=232134761998851497740550972545339718604, time:1750766893.4364195s req_ids:[8] +DEBUG 06-24 20:08:13 [manager.py:391] +ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:174.53837394714355ms total_cost_time:174.5603084564209ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6620 prompt_cache_len:5151 prompt_cache_ratio:0.7780966767371601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 +DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10300922393798828 s +INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10384058952331543 s +DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=300258016195312242781607881392428318636, time:1750766893.612446s req_ids:[8] +DEBUG 06-24 20:08:13 [manager.py:391] +ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:171.44203186035156ms total_cost_time:171.4613437652588ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6621 prompt_cache_len:5151 prompt_cache_ratio:0.777979157227005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 +DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.30655336380004883 s +INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.30755615234375 s +DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=76616850857632319776638515296303990014, time:1750766893.9724815s req_ids:[8] +DEBUG 06-24 20:08:13 [manager.py:391] +ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:374.4938373565674ms total_cost_time:374.5379447937012ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6622 prompt_cache_len:5151 prompt_cache_ratio:0.7778616732105105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 +DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10610008239746094 s +INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.10734772682189941 s +DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=108516167701310695909846229215343784401, time:1750766894.1566823s req_ids:[8] +DEBUG 06-24 20:08:14 [manager.py:391] +ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:197.31950759887695ms total_cost_time:197.36480712890625ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6623 prompt_cache_len:5151 prompt_cache_ratio:0.777744224671599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 +DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.1066889762878418 s +INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.1084146499633789 s +DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=99576434557634153304010861976032655950, time:1750766894.3741653s req_ids:[8] +DEBUG 06-24 20:08:14 [manager.py:391] +ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:213.9589786529541ms total_cost_time:214.00094032287598ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6624 prompt_cache_len:5151 prompt_cache_ratio:0.7776268115942029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 +DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10662484169006348 s +INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.10846686363220215 s +DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=81390603297347369586015502132699054087, time:1750766894.5825822s req_ids:[8] +DEBUG 06-24 20:08:14 [manager.py:391] +ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:206.7403793334961ms total_cost_time:206.78400993347168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6625 prompt_cache_len:5151 prompt_cache_ratio:0.7775094339622641 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 +DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:14 [batch.py:51] router release req id 8 +INFO 06-24 20:08:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10774016380310059 s +INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.1095573902130127 s +DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=238190361934916103154338218314280947890, time:1750766894.793305s req_ids:[8] +DEBUG 06-24 20:08:14 [manager.py:391] +ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:204.2081356048584ms total_cost_time:204.25057411193848ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6626 prompt_cache_len:5151 prompt_cache_ratio:0.7773920917597343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 +DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10751938819885254 s +INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.10940122604370117 s +DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=304291945790671430844889712059379354506, time:1750766895.002599s req_ids:[8] +DEBUG 06-24 20:08:15 [manager.py:391] +ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:217.2844409942627ms total_cost_time:217.33903884887695ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:6627 prompt_cache_len:5151 prompt_cache_ratio:0.7772747849705749 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 +DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10763382911682129 s +INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.10955333709716797 s +DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=179281977347027537178778990850163758956, time:1750766895.2221758s req_ids:[8] +DEBUG 06-24 20:08:15 [manager.py:391] +ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:207.3519229888916ms total_cost_time:207.40222930908203ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:6628 prompt_cache_len:5151 prompt_cache_ratio:0.7771575135787568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 +DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10854339599609375 s +INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.11043167114257812 s +DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=116141932841926246748307144103376131, time:1750766895.436326s req_ids:[8] +DEBUG 06-24 20:08:15 [manager.py:391] +ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:210.1418972015381ms total_cost_time:210.1907730102539ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:6629 prompt_cache_len:5151 prompt_cache_ratio:0.7770402775682607 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 +DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s +INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.11076021194458008 s +DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=313644453672583407323065939380400725469, time:1750766895.6425743s req_ids:[8] +DEBUG 06-24 20:08:15 [manager.py:391] +ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:202.17633247375488ms total_cost_time:202.22187042236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6630 prompt_cache_len:5151 prompt_cache_ratio:0.7769230769230769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 +DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10620403289794922 s +INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.10825181007385254 s +DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=178386253344971705894471970057605489820, time:1750766895.8554792s req_ids:[8] +DEBUG 06-24 20:08:15 [manager.py:391] +ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:222.16510772705078ms total_cost_time:222.1968173980713ms,out_token_counter:1 mean_per_token_cost_time: 0.03170967102050781ms prompt_token_num:6631 prompt_cache_len:5151 prompt_cache_ratio:0.7768059116272056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 +DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10491085052490234 s +INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10579061508178711 s +DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=100654872267871011793600096768215123932, time:1750766896.093473s req_ids:[8] +DEBUG 06-24 20:08:16 [manager.py:391] +ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:188.3232593536377ms total_cost_time:188.35163116455078ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:6632 prompt_cache_len:5151 prompt_cache_ratio:0.7766887816646562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 +DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10452127456665039 s +INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10539364814758301 s +DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=331272067694196189234109459581233641012, time:1750766896.2733557s req_ids:[8] +DEBUG 06-24 20:08:16 [manager.py:391] +DEBUG 06-24 20:08:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 32693.724 tokens/s +DEBUG 06-24 20:08:16 [stats.py:37] Avg prompt tokens throughput: 32683.833 tokens/s +DEBUG 06-24 20:08:16 [stats.py:37] Avg generate tokens throughput: 9.891 tokens/s +INFO 06-24 20:08:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:366.90473556518555ms total_cost_time:366.9276237487793ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6633 prompt_cache_len:5151 prompt_cache_ratio:0.7765716870194482 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 +DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10358071327209473 s +INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10437870025634766 s +DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=314050034208254503942930170399987118245, time:1750766896.6405125s req_ids:[8] +DEBUG 06-24 20:08:16 [manager.py:391] +ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:173.50506782531738ms total_cost_time:173.53129386901855ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:6634 prompt_cache_len:5151 prompt_cache_ratio:0.7764546276756105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 +DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10352206230163574 s +INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.1042184829711914 s +DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=339445298279271882749399439891659184736, time:1750766896.814091s req_ids:[8] +DEBUG 06-24 20:08:16 [manager.py:391] +ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:170.58706283569336ms total_cost_time:170.6104278564453ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6635 prompt_cache_len:5151 prompt_cache_ratio:0.7763376036171816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 +DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10346174240112305 s +INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10422515869140625 s +DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=121562355206119065467765885132694553295, time:1750766896.9900672s req_ids:[8] +DEBUG 06-24 20:08:16 [manager.py:391] +DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:175.92597007751465ms total_cost_time:175.94647407531738ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6636 prompt_cache_len:5151 prompt_cache_ratio:0.7762206148282098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 +DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10375618934631348 s +INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10448598861694336 s +DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=41966788393262626009712097227105373913, time:1750766897.166263s req_ids:[8] +DEBUG 06-24 20:08:17 [manager.py:391] +ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:172.20640182495117ms total_cost_time:172.23238945007324ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:6637 prompt_cache_len:5151 prompt_cache_ratio:0.7761036612927528 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 +DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10448217391967773 s +INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10520744323730469 s +DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=218651858331743370911797444053562495163, time:1750766897.3408914s req_ids:[8] +DEBUG 06-24 20:08:17 [manager.py:391] +ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:172.6679801940918ms total_cost_time:172.69325256347656ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:6638 prompt_cache_len:5151 prompt_cache_ratio:0.775986742994878 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 +DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10456490516662598 s +INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10544252395629883 s +DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=301851994734178787066283953352489382906, time:1750766897.5125735s req_ids:[8] +DEBUG 06-24 20:08:17 [manager.py:391] +ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:169.2962646484375ms total_cost_time:169.32082176208496ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6639 prompt_cache_len:5151 prompt_cache_ratio:0.7758698599186624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 +DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10445165634155273 s +INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10521769523620605 s +DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=214612147480416088725447606320552949597, time:1750766897.6860292s req_ids:[8] +DEBUG 06-24 20:08:17 [manager.py:391] +ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:170.99809646606445ms total_cost_time:171.02456092834473ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:6640 prompt_cache_len:5151 prompt_cache_ratio:0.7757530120481928 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 +DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10480189323425293 s +INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10550880432128906 s +DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=288837377376845741450783638332174716442, time:1750766897.8611555s req_ids:[8] +DEBUG 06-24 20:08:17 [manager.py:391] +ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:173.22945594787598ms total_cost_time:173.2501983642578ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6641 prompt_cache_len:5151 prompt_cache_ratio:0.7756361993675651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 +DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10285186767578125 s +INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10360407829284668 s +DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=328686461131071067099056829337821508011, time:1750766898.036024s req_ids:[8] +DEBUG 06-24 20:08:18 [manager.py:391] +ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:171.71931266784668ms total_cost_time:171.7393398284912ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6642 prompt_cache_len:5151 prompt_cache_ratio:0.7755194218608853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 +DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10268735885620117 s +INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10346508026123047 s +DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=81491088297838954735987998757817457308, time:1750766898.210867s req_ids:[8] +DEBUG 06-24 20:08:18 [manager.py:391] +ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:174.09157752990723ms total_cost_time:174.11160469055176ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6643 prompt_cache_len:5151 prompt_cache_ratio:0.7754026795122686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 +DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10339236259460449 s +INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10413932800292969 s +DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=17660781774325945835768061267815100187, time:1750766898.3865294s req_ids:[8] +DEBUG 06-24 20:08:18 [manager.py:391] +ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:171.9064712524414ms total_cost_time:171.92769050598145ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:6644 prompt_cache_len:5151 prompt_cache_ratio:0.7752859723058398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 +DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.30435895919799805 s +INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.3051755428314209 s +DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=210535194157748794671121346160281012819, time:1750766898.752301s req_ids:[8] +DEBUG 06-24 20:08:18 [manager.py:391] +ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:364.168643951416ms total_cost_time:364.18938636779785ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6645 prompt_cache_len:5151 prompt_cache_ratio:0.7751693002257336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 +DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10298967361450195 s +INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10371994972229004 s +DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=20501690227470305272477878612189158988, time:1750766898.9278543s req_ids:[8] +DEBUG 06-24 20:08:18 [manager.py:391] +ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:173.384428024292ms total_cost_time:173.40683937072754ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6646 prompt_cache_len:5151 prompt_cache_ratio:0.7750526632560939 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 +DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10364818572998047 s +INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10438823699951172 s +DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=328369143440921323501441110847433197116, time:1750766899.1038973s req_ids:[8] +DEBUG 06-24 20:08:19 [manager.py:391] +ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:173.97117614746094ms total_cost_time:173.99215698242188ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6647 prompt_cache_len:5151 prompt_cache_ratio:0.7749360613810742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 +DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10349678993225098 s +INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.1042640209197998 s +DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=292253474812179226473531301739829822962, time:1750766899.2789533s req_ids:[8] +DEBUG 06-24 20:08:19 [manager.py:391] +ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:176.0251522064209ms total_cost_time:176.04565620422363ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6648 prompt_cache_len:5151 prompt_cache_ratio:0.7748194945848376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 +DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10339236259460449 s +INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10417342185974121 s +DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=270958768949171847383913317475610678651, time:1750766899.4568348s req_ids:[8] +DEBUG 06-24 20:08:19 [manager.py:391] +ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:183.93754959106445ms total_cost_time:183.9590072631836ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6649 prompt_cache_len:5151 prompt_cache_ratio:0.7747029628515566 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 +DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10332274436950684 s +INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10415863990783691 s +DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=177971793414114685818885274700060432217, time:1750766899.654955s req_ids:[8] +DEBUG 06-24 20:08:19 [manager.py:391] +ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:196.45428657531738ms total_cost_time:196.47765159606934ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6650 prompt_cache_len:5151 prompt_cache_ratio:0.7745864661654135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 +DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10332489013671875 s +INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10413789749145508 s +DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=11842557855751636329965958002161259417, time:1750766899.8524349s req_ids:[8] +DEBUG 06-24 20:08:19 [manager.py:391] +ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:194.01240348815918ms total_cost_time:194.03553009033203ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6651 prompt_cache_len:5151 prompt_cache_ratio:0.7744700045105999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 +DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.10362505912780762 s +INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10444235801696777 s +DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=235311089144352083349883720089446389135, time:1750766900.0458918s req_ids:[8] +DEBUG 06-24 20:08:20 [manager.py:391] +ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:192.31939315795898ms total_cost_time:192.34371185302734ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6652 prompt_cache_len:5151 prompt_cache_ratio:0.7743535778713169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 +DEBUG 06-24 20:08:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.10342693328857422 s +INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10423541069030762 s +DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=291984512690000998264627330919083872621, time:1750766900.2438397s req_ids:[8] +DEBUG 06-24 20:08:20 [manager.py:391] +ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:194.66876983642578ms total_cost_time:194.69189643859863ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6653 prompt_cache_len:5151 prompt_cache_ratio:0.7742371862317752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 +DEBUG 06-24 20:08:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.1035163402557373 s +INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10433268547058105 s +DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=28245207229027621910766393667237963024, time:1750766900.4409645s req_ids:[8] +DEBUG 06-24 20:08:20 [manager.py:391] +ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:193.53079795837402ms total_cost_time:193.55463981628418ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6654 prompt_cache_len:5151 prompt_cache_ratio:0.7741208295761948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 +DEBUG 06-24 20:08:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.10385274887084961 s +INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10463690757751465 s +DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=317514030739637553419146091810570466023, time:1750766900.638091s req_ids:[8] +DEBUG 06-24 20:08:20 [manager.py:391] +ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:471.22955322265625ms total_cost_time:471.2533950805664ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6655 prompt_cache_len:5151 prompt_cache_ratio:0.7740045078888054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 +DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10342526435852051 s +INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.10432195663452148 s +DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=30980645598591650455106540333409595111, time:1750766901.1110702s req_ids:[8] +DEBUG 06-24 20:08:21 [manager.py:391] +ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:194.4868564605713ms total_cost_time:194.50926780700684ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6656 prompt_cache_len:5151 prompt_cache_ratio:0.7738882211538461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 +DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10336017608642578 s +INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.1042332649230957 s +DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=34121926877449444250388161550306996865, time:1750766901.3078477s req_ids:[8] +DEBUG 06-24 20:08:21 [manager.py:391] +ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:195.55211067199707ms total_cost_time:195.5733299255371ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:6657 prompt_cache_len:5151 prompt_cache_ratio:0.7737719693555656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 +DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10371208190917969 s +INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.10457515716552734 s +DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=287432616830226294511326950660990517217, time:1750766901.5050535s req_ids:[8] +DEBUG 06-24 20:08:21 [manager.py:391] +ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:193.43280792236328ms total_cost_time:193.45808029174805ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:6658 prompt_cache_len:5151 prompt_cache_ratio:0.7736557524782217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 +DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.1029806137084961 s +INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.10371804237365723 s +DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=175664264706789410897304020809037391109, time:1750766901.701097s req_ids:[8] +DEBUG 06-24 20:08:21 [manager.py:391] +ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:194.43726539611816ms total_cost_time:194.4587230682373ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6659 prompt_cache_len:5151 prompt_cache_ratio:0.773539570506082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 +DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10389161109924316 s +INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.104644775390625 s +DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=10630165977194723510697770059608746840, time:1750766901.8937702s req_ids:[8] +DEBUG 06-24 20:08:21 [manager.py:391] +ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:180.11474609375ms total_cost_time:180.13548851013184ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6660 prompt_cache_len:5151 prompt_cache_ratio:0.7734234234234234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 +DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10329294204711914 s +INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10410022735595703 s +DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=128063307483509719508947582742323474444, time:1750766902.069227s req_ids:[8] +DEBUG 06-24 20:08:22 [manager.py:391] +DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:171.63372039794922ms total_cost_time:171.6594696044922ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:6661 prompt_cache_len:5151 prompt_cache_ratio:0.7733073112145323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 +DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10325241088867188 s +INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10403227806091309 s +DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=46927959820916164977552300981163483105, time:1750766902.2432866s req_ids:[8] +DEBUG 06-24 20:08:22 [manager.py:391] +ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:173.1710433959961ms total_cost_time:173.19369316101074ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6662 prompt_cache_len:5151 prompt_cache_ratio:0.7731912338637046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 +DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10293006896972656 s +INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10372734069824219 s +DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=62987125412093965661292374868193298594, time:1750766902.4182777s req_ids:[8] +DEBUG 06-24 20:08:22 [manager.py:391] +ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:171.2493896484375ms total_cost_time:171.26846313476562ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6663 prompt_cache_len:5151 prompt_cache_ratio:0.7730751913552454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 +DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10373187065124512 s +INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10460710525512695 s +DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=302858764404270177606600070937502381217, time:1750766902.5922801s req_ids:[8] +DEBUG 06-24 20:08:22 [manager.py:391] +ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:171.84019088745117ms total_cost_time:171.8597412109375ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6664 prompt_cache_len:5151 prompt_cache_ratio:0.7729591836734694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 +DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10385394096374512 s +INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10466408729553223 s +DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=259728389754750789146250968403973284523, time:1750766902.766782s req_ids:[8] +DEBUG 06-24 20:08:22 [manager.py:391] +ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:172.30916023254395ms total_cost_time:172.32966423034668ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6665 prompt_cache_len:5151 prompt_cache_ratio:0.7728432108027007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 +DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10370969772338867 s +INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10449647903442383 s +DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=2136601516786641120303337454849602120, time:1750766902.9407043s req_ids:[8] +DEBUG 06-24 20:08:22 [manager.py:391] +ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:171.33569717407227ms total_cost_time:171.3576316833496ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6666 prompt_cache_len:5151 prompt_cache_ratio:0.7727272727272727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 +DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.1028757095336914 s +INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10366630554199219 s +DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=122267241236350591908030647070893183518, time:1750766903.115273s req_ids:[8] +DEBUG 06-24 20:08:23 [manager.py:391] +INFO 06-24 20:08:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:364.73870277404785ms total_cost_time:364.78447914123535ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6667 prompt_cache_len:5151 prompt_cache_ratio:0.7726113694315284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 +DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10740351676940918 s +INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10833358764648438 s +DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=164102660930050860680511214386531734555, time:1750766903.4801629s req_ids:[8] +DEBUG 06-24 20:08:23 [manager.py:391] +ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:169.39020156860352ms total_cost_time:169.40927505493164ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6668 prompt_cache_len:5151 prompt_cache_ratio:0.77249550089982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 +DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10348629951477051 s +INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10426950454711914 s +DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=17869486175194536211254003213995010229, time:1750766903.6546123s req_ids:[8] +DEBUG 06-24 20:08:23 [manager.py:391] +ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:171.9977855682373ms total_cost_time:172.01972007751465ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6669 prompt_cache_len:5151 prompt_cache_ratio:0.7723796671165092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 +DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10376715660095215 s +INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10466957092285156 s +DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=10259493781062500999804112004293445854, time:1750766903.8270166s req_ids:[8] +DEBUG 06-24 20:08:23 [manager.py:391] +ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:169.92759704589844ms total_cost_time:169.94810104370117ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6670 prompt_cache_len:5151 prompt_cache_ratio:0.772263868065967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 +DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10370993614196777 s +INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10449790954589844 s +DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=220635371705624340100337866565929489887, time:1750766904.0005183s req_ids:[8] +DEBUG 06-24 20:08:24 [manager.py:391] +ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:172.16253280639648ms total_cost_time:172.18351364135742ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6671 prompt_cache_len:5151 prompt_cache_ratio:0.7721481037325738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 +DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10272789001464844 s +INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10355091094970703 s +DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=246183150744101542631592476689679010154, time:1750766904.1753678s req_ids:[8] +DEBUG 06-24 20:08:24 [manager.py:391] +ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:171.90814018249512ms total_cost_time:171.92888259887695ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6672 prompt_cache_len:5151 prompt_cache_ratio:0.7720323741007195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 +DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10359883308410645 s +INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10427284240722656 s +DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=323063720290417171130286498329359322088, time:1750766904.3478436s req_ids:[8] +DEBUG 06-24 20:08:24 [manager.py:391] +ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:171.66972160339355ms total_cost_time:171.6899871826172ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6673 prompt_cache_len:5151 prompt_cache_ratio:0.7719166791548029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 +DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10263442993164062 s +INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10336804389953613 s +DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=234848190280298446323037109848195276285, time:1750766904.5229247s req_ids:[8] +DEBUG 06-24 20:08:24 [manager.py:391] +ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:173.44999313354492ms total_cost_time:173.47049713134766ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6674 prompt_cache_len:5151 prompt_cache_ratio:0.7718010188792328 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 +DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10278844833374023 s +INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10344481468200684 s +DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=21845366977061307162928420200092222127, time:1750766904.6973526s req_ids:[8] +DEBUG 06-24 20:08:24 [manager.py:391] +ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:174.4372844696045ms total_cost_time:174.45778846740723ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6675 prompt_cache_len:5151 prompt_cache_ratio:0.771685393258427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 +DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.1026163101196289 s +INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.1034235954284668 s +DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=222536560041210690881446825363828074025, time:1750766904.8749282s req_ids:[8] +DEBUG 06-24 20:08:24 [manager.py:391] +ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:173.46549034118652ms total_cost_time:173.48551750183105ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6676 prompt_cache_len:5151 prompt_cache_ratio:0.7715698022768125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 +DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10376620292663574 s +INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10460615158081055 s +DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=76850591441180677852678766913997708516, time:1750766905.0495467s req_ids:[8] +DEBUG 06-24 20:08:25 [manager.py:391] +ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:171.45204544067383ms total_cost_time:171.47278785705566ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6677 prompt_cache_len:5151 prompt_cache_ratio:0.7714542459188258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 +DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10357403755187988 s +INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10443258285522461 s +DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=158138099099929713803848566779531729145, time:1750766905.222758s req_ids:[8] +DEBUG 06-24 20:08:25 [manager.py:391] +ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:171.52810096740723ms total_cost_time:171.54812812805176ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6678 prompt_cache_len:5151 prompt_cache_ratio:0.7713387241689128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 +DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.3035445213317871 s +INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.3043382167816162 s +DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=213742922549194738191601459810872188563, time:1750766905.589661s req_ids:[8] +DEBUG 06-24 20:08:25 [manager.py:391] +ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:364.54296112060547ms total_cost_time:364.5627498626709ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6679 prompt_cache_len:5151 prompt_cache_ratio:0.7712232370115286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 +DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10274505615234375 s +INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10355663299560547 s +DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=268013514714247676222980776834001766051, time:1750766905.7636595s req_ids:[8] +DEBUG 06-24 20:08:25 [manager.py:391] +ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:171.14734649658203ms total_cost_time:171.16880416870117ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6680 prompt_cache_len:5151 prompt_cache_ratio:0.7711077844311377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 +DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10281825065612793 s +INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10369467735290527 s +DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=4544020287834425213191641493530896407, time:1750766905.9374914s req_ids:[8] +DEBUG 06-24 20:08:25 [manager.py:391] +ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:172.18422889709473ms total_cost_time:172.20377922058105ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6681 prompt_cache_len:5151 prompt_cache_ratio:0.7709923664122137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 +DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10272216796875 s +INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10355377197265625 s +DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=302792190368058105885812858556684838605, time:1750766906.1119337s req_ids:[8] +DEBUG 06-24 20:08:26 [manager.py:391] +ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:171.39577865600586ms total_cost_time:171.4150905609131ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6682 prompt_cache_len:5151 prompt_cache_ratio:0.7708769829392398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 +DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.1025991439819336 s +INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10343623161315918 s +DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=99641745374432970864789277615389258348, time:1750766906.2853358s req_ids:[8] +DEBUG 06-24 20:08:26 [manager.py:391] +DEBUG 06-24 20:08:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 33262.512 tokens/s +DEBUG 06-24 20:08:26 [stats.py:37] Avg prompt tokens throughput: 33252.524 tokens/s +DEBUG 06-24 20:08:26 [stats.py:37] Avg generate tokens throughput: 9.988 tokens/s +ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:172.50490188598633ms total_cost_time:172.52421379089355ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6683 prompt_cache_len:5151 prompt_cache_ratio:0.770761633996708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 +DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10355591773986816 s +INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.1043710708618164 s +DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=255537719101623284236138329010103564888, time:1750766906.4597776s req_ids:[8] +DEBUG 06-24 20:08:26 [manager.py:391] +ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:170.81952095031738ms total_cost_time:170.8385944366455ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6684 prompt_cache_len:5151 prompt_cache_ratio:0.7706463195691203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 +DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10346436500549316 s +INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10428833961486816 s +DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=255963418735040898194772303425898154574, time:1750766906.6313071s req_ids:[8] +DEBUG 06-24 20:08:26 [manager.py:391] +ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:170.0420379638672ms total_cost_time:170.08519172668457ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6685 prompt_cache_len:5151 prompt_cache_ratio:0.7705310396409872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 +DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10663580894470215 s +INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10767817497253418 s +DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=67055651969537020383802311921946454342, time:1750766906.800675s req_ids:[8] +DEBUG 06-24 20:08:26 [manager.py:391] +DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:166.78261756896973ms total_cost_time:166.8260097503662ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6686 prompt_cache_len:5151 prompt_cache_ratio:0.7704157941968292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 +DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10634708404541016 s +INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.1082160472869873 s +DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=21935383184900354097340083238716987528, time:1750766906.9691164s req_ids:[8] +DEBUG 06-24 20:08:26 [manager.py:391] +ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:181.05196952819824ms total_cost_time:181.09560012817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6687 prompt_cache_len:5151 prompt_cache_ratio:0.7703005832211754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 +DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.10605335235595703 s +INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.10798907279968262 s +DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=213359968021568292966472708527835167370, time:1750766907.1608782s req_ids:[8] +DEBUG 06-24 20:08:27 [manager.py:391] +ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:198.18115234375ms total_cost_time:198.2254981994629ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6688 prompt_cache_len:5151 prompt_cache_ratio:0.7701854066985646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 +DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.10673999786376953 s +INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.1088719367980957 s +DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=281303154695637182775781952614451809064, time:1750766907.37003s req_ids:[8] +DEBUG 06-24 20:08:27 [manager.py:391] +ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:211.62176132202148ms total_cost_time:211.66658401489258ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6689 prompt_cache_len:5151 prompt_cache_ratio:0.7700702646135447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 +DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.11572265625 s +INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.1182248592376709 s +DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=239751704778660776030788681335740157755, time:1750766907.5761216s req_ids:[8] +DEBUG 06-24 20:08:27 [manager.py:391] +ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:203.93681526184082ms total_cost_time:203.9813995361328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6690 prompt_cache_len:5151 prompt_cache_ratio:0.7699551569506726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 +DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.3085286617279053 s +INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.31034135818481445 s +DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=246395412899516577919414705014750691435, time:1750766907.9752414s req_ids:[8] +DEBUG 06-24 20:08:27 [manager.py:391] +ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:370.9242343902588ms total_cost_time:370.9678649902344ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6691 prompt_cache_len:5151 prompt_cache_ratio:0.7698400836945151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 +DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10677194595336914 s +INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.10872364044189453 s +DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=282620410023735375457222546593774511403, time:1750766908.1566496s req_ids:[8] +DEBUG 06-24 20:08:28 [manager.py:391] +ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:196.72751426696777ms total_cost_time:196.77209854125977ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6692 prompt_cache_len:5151 prompt_cache_ratio:0.7697250448296473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 +DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10802602767944336 s +INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.11000442504882812 s +DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=252990733704484395681354265889919238969, time:1750766908.3592584s req_ids:[8] +DEBUG 06-24 20:08:28 [manager.py:391] +ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:210.81829071044922ms total_cost_time:210.86812019348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:6693 prompt_cache_len:5151 prompt_cache_ratio:0.7696100403406544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 +DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10723209381103516 s +INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.1092526912689209 s +DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=42176902470794745955215985218822114854, time:1750766908.5746381s req_ids:[8] +DEBUG 06-24 20:08:28 [manager.py:391] +ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:205.11317253112793ms total_cost_time:205.15727996826172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6694 prompt_cache_len:5151 prompt_cache_ratio:0.7694950702121303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 +DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10800719261169434 s +INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.11029195785522461 s +DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=24783726129431110386393331515845013217, time:1750766908.786741s req_ids:[8] +DEBUG 06-24 20:08:28 [manager.py:391] +ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:199.57685470581055ms total_cost_time:199.61881637573242ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6695 prompt_cache_len:5151 prompt_cache_ratio:0.7693801344286781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 +DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10670304298400879 s +INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.10862040519714355 s +DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=59928214765592580147659844039457142889, time:1750766908.9908593s req_ids:[8] +DEBUG 06-24 20:08:28 [manager.py:391] +ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:201.70235633850098ms total_cost_time:201.7230987548828ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6696 prompt_cache_len:5151 prompt_cache_ratio:0.7692652329749103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 +DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10657286643981934 s +INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.10802721977233887 s +DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=49264600708782802041134166380472424368, time:1750766909.1990623s req_ids:[8] +DEBUG 06-24 20:08:29 [manager.py:391] +ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:204.6678066253662ms total_cost_time:204.73599433898926ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:6697 prompt_cache_len:5151 prompt_cache_ratio:0.7691503658354487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 +DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10910367965698242 s +INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.11104536056518555 s +DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=292686549458837563439949141375681683571, time:1750766909.4069147s req_ids:[8] +DEBUG 06-24 20:08:29 [manager.py:391] +ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:204.19001579284668ms total_cost_time:204.23388481140137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6698 prompt_cache_len:5151 prompt_cache_ratio:0.7690355329949239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 +DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10703229904174805 s +INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.10890078544616699 s +DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=196819749059306390656920415950124368006, time:1750766909.6300597s req_ids:[8] +DEBUG 06-24 20:08:29 [manager.py:391] +ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:224.19118881225586ms total_cost_time:224.23672676086426ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6699 prompt_cache_len:5151 prompt_cache_ratio:0.7689207344379758 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 +DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10824179649353027 s +INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.11065244674682617 s +DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=168406442583207974979581954703224952412, time:1750766909.8480458s req_ids:[8] +DEBUG 06-24 20:08:29 [manager.py:391] +ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:211.39764785766602ms total_cost_time:211.44366264343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6700 prompt_cache_len:5151 prompt_cache_ratio:0.7688059701492538 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 +DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10813093185424805 s +INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11003565788269043 s +DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=147332535392471569285841464770054614332, time:1750766910.0680475s req_ids:[8] +DEBUG 06-24 20:08:30 [manager.py:391] +ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:213.8960361480713ms total_cost_time:213.94085884094238ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6701 prompt_cache_len:5151 prompt_cache_ratio:0.7686912401134159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 +DEBUG 06-24 20:08:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10788178443908691 s +INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11001801490783691 s +DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=191959397599350931026694696406300295960, time:1750766910.2828557s req_ids:[8] +DEBUG 06-24 20:08:30 [manager.py:391] +ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:391.9696807861328ms total_cost_time:392.0145034790039ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6702 prompt_cache_len:5151 prompt_cache_ratio:0.7685765443151298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 +DEBUG 06-24 20:08:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10800313949584961 s +INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11012554168701172 s +DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=318456931529128675692668461057963277990, time:1750766910.6727326s req_ids:[8] +DEBUG 06-24 20:08:30 [manager.py:391] +ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:197.2205638885498ms total_cost_time:197.2658634185791ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6703 prompt_cache_len:5151 prompt_cache_ratio:0.768461882739072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 +DEBUG 06-24 20:08:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10857272148132324 s +INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11085963249206543 s +DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=12813427073584617697323634515723354772, time:1750766910.8819442s req_ids:[8] +DEBUG 06-24 20:08:30 [manager.py:391] +ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:208.4197998046875ms total_cost_time:208.4660530090332ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6704 prompt_cache_len:5151 prompt_cache_ratio:0.7683472553699284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 +DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10700583457946777 s +INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.10899066925048828 s +DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=330135876875456796282989728479313720206, time:1750766911.0954363s req_ids:[8] +DEBUG 06-24 20:08:31 [manager.py:391] +ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:207.30304718017578ms total_cost_time:207.34691619873047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6705 prompt_cache_len:5151 prompt_cache_ratio:0.7682326621923937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 +DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10723423957824707 s +INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.10912322998046875 s +DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=105608192307819772141236887839164647781, time:1750766911.317005s req_ids:[8] +DEBUG 06-24 20:08:31 [manager.py:391] +ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:214.94388580322266ms total_cost_time:214.98918533325195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6706 prompt_cache_len:5151 prompt_cache_ratio:0.7681181031911721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 +DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10789966583251953 s +INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s +DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=295008873756857632056297240885271838370, time:1750766911.5304515s req_ids:[8] +DEBUG 06-24 20:08:31 [manager.py:391] +ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:206.12430572509766ms total_cost_time:206.16650581359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6707 prompt_cache_len:5151 prompt_cache_ratio:0.7680035783509765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 +DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10923147201538086 s +INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.11121416091918945 s +DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=170280736880172826854863411924707197143, time:1750766911.7496917s req_ids:[8] +DEBUG 06-24 20:08:31 [manager.py:391] +ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:216.45402908325195ms total_cost_time:216.49765968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6708 prompt_cache_len:5151 prompt_cache_ratio:0.7678890876565295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 +DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10793232917785645 s +INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.1100013256072998 s +DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=284962741641141091681157017492724357699, time:1750766911.9628575s req_ids:[8] +DEBUG 06-24 20:08:31 [manager.py:391] +ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:205.674409866333ms total_cost_time:205.7197093963623ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6709 prompt_cache_len:5151 prompt_cache_ratio:0.7677746310925623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 +DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10763907432556152 s +INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s +DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=197591840544237168808461760452749615432, time:1750766912.18445s req_ids:[8] +DEBUG 06-24 20:08:32 [manager.py:391] +ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:217.30542182922363ms total_cost_time:217.3483371734619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6710 prompt_cache_len:5151 prompt_cache_ratio:0.7676602086438152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 +DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s +INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.10982060432434082 s +DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=294090999140326597692134080247387816444, time:1750766912.3987265s req_ids:[8] +DEBUG 06-24 20:08:32 [manager.py:391] +DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:207.57770538330078ms total_cost_time:207.62062072753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6711 prompt_cache_len:5151 prompt_cache_ratio:0.767545820295038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 +DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:32 [batch.py:51] router release req id 8 +INFO 06-24 20:08:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10788297653198242 s +INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.1097724437713623 s +DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=154088919688947973616282415618627949520, time:1750766912.6118033s req_ids:[8] +DEBUG 06-24 20:08:32 [manager.py:391] +ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:208.5883617401123ms total_cost_time:208.6503505706787ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:6712 prompt_cache_len:5151 prompt_cache_ratio:0.7674314660309892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 +DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10695457458496094 s +INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.10904908180236816 s +DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=106320657917346147897819065032855247952, time:1750766912.8234613s req_ids:[8] +DEBUG 06-24 20:08:32 [manager.py:391] +ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:205.4438591003418ms total_cost_time:205.48772811889648ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6713 prompt_cache_len:5151 prompt_cache_ratio:0.7673171458364367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 +DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10835957527160645 s +INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.11027336120605469 s +DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=161230471123807431630621270657695533810, time:1750766913.03567s req_ids:[8] +DEBUG 06-24 20:08:33 [manager.py:391] +ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:378.5703182220459ms total_cost_time:378.6156177520752ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6714 prompt_cache_len:5151 prompt_cache_ratio:0.7672028596961573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 +DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10730147361755371 s +INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.10979223251342773 s +DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=241516068784982826095322496324239754400, time:1750766913.4131095s req_ids:[8] +DEBUG 06-24 20:08:33 [manager.py:391] +ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:200.15525817871094ms total_cost_time:200.19841194152832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6715 prompt_cache_len:5151 prompt_cache_ratio:0.7670886075949367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 +DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10809659957885742 s +INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.11013984680175781 s +DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=19411609795473690828923505217126781678, time:1750766913.6256084s req_ids:[8] +DEBUG 06-24 20:08:33 [manager.py:391] +ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:199.5542049407959ms total_cost_time:199.5992660522461ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6716 prompt_cache_len:5151 prompt_cache_ratio:0.76697438951757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 +DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10680651664733887 s +INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.10880661010742188 s +DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=51453387134912828855516948669800398278, time:1750766913.8259282s req_ids:[8] +DEBUG 06-24 20:08:33 [manager.py:391] +ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:202.8799057006836ms total_cost_time:202.92282104492188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6717 prompt_cache_len:5151 prompt_cache_ratio:0.7668602054488611 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 +DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10848355293273926 s +INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.11042618751525879 s +DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=148889840732107309444267874568511472426, time:1750766914.0353277s req_ids:[8] +DEBUG 06-24 20:08:34 [manager.py:391] +ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:205.82103729248047ms total_cost_time:205.86681365966797ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6718 prompt_cache_len:5151 prompt_cache_ratio:0.7667460553736231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 +DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10651183128356934 s +INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.1084744930267334 s +DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=334973309206528870297413195019794328832, time:1750766914.2570422s req_ids:[8] +DEBUG 06-24 20:08:34 [manager.py:391] +ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:220.3505039215088ms total_cost_time:220.3960418701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6719 prompt_cache_len:5151 prompt_cache_ratio:0.766631939276678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 +DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10807442665100098 s +INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.10994410514831543 s +DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=29452326635476435385003753671757923992, time:1750766914.4723237s req_ids:[8] +DEBUG 06-24 20:08:34 [manager.py:391] +ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:205.72686195373535ms total_cost_time:205.77001571655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6720 prompt_cache_len:5151 prompt_cache_ratio:0.7665178571428571 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 +DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10782670974731445 s +INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.10998344421386719 s +DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=294678871133913544834755032474214803707, time:1750766914.6806602s req_ids:[8] +DEBUG 06-24 20:08:34 [manager.py:391] +ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:208.30631256103516ms total_cost_time:208.35232734680176ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6721 prompt_cache_len:5151 prompt_cache_ratio:0.7664038089570004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 +DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10719728469848633 s +INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.10911440849304199 s +DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=201595841732882854870594208350390536789, time:1750766914.8939748s req_ids:[8] +DEBUG 06-24 20:08:34 [manager.py:391] +ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:207.25607872009277ms total_cost_time:207.29994773864746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6722 prompt_cache_len:5151 prompt_cache_ratio:0.7662897947039572 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 +DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.10716581344604492 s +INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.10923075675964355 s +DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=8416288677439228005391345051958034976, time:1750766915.1061032s req_ids:[8] +DEBUG 06-24 20:08:35 [manager.py:391] +ERROR 06-24 20:08:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:205.89399337768555ms total_cost_time:205.93905448913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6723 prompt_cache_len:5151 prompt_cache_ratio:0.7661758143685855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 +DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:35 [batch.py:51] router release req id 8 +DEBUG 06-24 20:08:35 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:35 [manager.py:283] +DEBUG 06-24 20:08:35 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:35 [manager.py:284] +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.10804224014282227 s +INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.11012840270996094 s +DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=88507336782021023440795641149498961248, time:1750766915.3204677s req_ids:[8] +DEBUG 06-24 20:08:35 [manager.py:391] +ERROR 06-24 20:08:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 first_token_cost:209.8546028137207ms total_cost_time:209.89990234375ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6724 prompt_cache_len:5151 prompt_cache_ratio:0.7660618679357525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 +DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.10892963409423828 s +INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.11083745956420898 s +DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=296357628105699983245747034866890123566, time:1750766915.533806s req_ids:[8] +DEBUG 06-24 20:08:35 [manager.py:391] +ERROR 06-24 20:08:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 first_token_cost:207.54241943359375ms total_cost_time:207.60369300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:6725 prompt_cache_len:5151 prompt_cache_ratio:0.7659479553903346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 +DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.3115348815917969 s +INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.3135504722595215 s +DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=13678000044143905445337379650049336645, time:1750766915.942661s req_ids:[8] +DEBUG 06-24 20:08:35 [manager.py:391] +ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 first_token_cost:404.27327156066895ms total_cost_time:404.31880950927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6726 prompt_cache_len:5151 prompt_cache_ratio:0.7658340767172168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 +DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:36 [batch.py:51] router release req id 8 +INFO 06-24 20:08:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.10723447799682617 s +INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.10917496681213379 s +DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=193495345133577661924044501876538101680, time:1750766916.157448s req_ids:[8] +DEBUG 06-24 20:08:36 [manager.py:391] +ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:206.6938877105713ms total_cost_time:206.73751831054688ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6727 prompt_cache_len:5151 prompt_cache_ratio:0.7657202319012933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 +DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.10784435272216797 s +INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.10993790626525879 s +DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=502238838404425100428816804310129816, time:1750766916.3701265s req_ids:[8] +DEBUG 06-24 20:08:36 [manager.py:391] +DEBUG 06-24 20:08:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 29931.537 tokens/s +DEBUG 06-24 20:08:36 [stats.py:37] Avg prompt tokens throughput: 29922.613 tokens/s +DEBUG 06-24 20:08:36 [stats.py:37] Avg generate tokens throughput: 8.924 tokens/s +ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:208.4496021270752ms total_cost_time:208.4941864013672ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6728 prompt_cache_len:5151 prompt_cache_ratio:0.7656064209274673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 +DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.10714316368103027 s +INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.10918450355529785 s +DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=19189512817642001562274960438499800950, time:1750766916.5954013s req_ids:[8] +DEBUG 06-24 20:08:36 [manager.py:391] +ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:221.49205207824707ms total_cost_time:221.55404090881348ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:6729 prompt_cache_len:5151 prompt_cache_ratio:0.765492643780651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 +DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.1089019775390625 s +INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.1108555793762207 s +DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=165444330422026395286628862689801865748, time:1750766916.8115408s req_ids:[8] +DEBUG 06-24 20:08:36 [manager.py:391] +ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:208.5425853729248ms total_cost_time:208.5862159729004ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6730 prompt_cache_len:5151 prompt_cache_ratio:0.7653789004457652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 +DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.1072244644165039 s +INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.10916733741760254 s +DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=253491670847908351536112789244110220432, time:1750766917.0247726s req_ids:[8] +DEBUG 06-24 20:08:37 [manager.py:391] +ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:209.02490615844727ms total_cost_time:209.06805992126465ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6731 prompt_cache_len:5151 prompt_cache_ratio:0.7652651909077403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 +DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.10931062698364258 s +INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.111572265625 s +DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=331747267810991488939375663638036191785, time:1750766917.2388408s req_ids:[8] +DEBUG 06-24 20:08:37 [manager.py:391] +ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:209.73849296569824ms total_cost_time:209.78403091430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6732 prompt_cache_len:5151 prompt_cache_ratio:0.7651515151515151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 +DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.10712218284606934 s +INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.10914182662963867 s +DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=292301759242789518715217514065416443314, time:1750766917.4506118s req_ids:[8] +DEBUG 06-24 20:08:37 [manager.py:391] +ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:208.24551582336426ms total_cost_time:208.29057693481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6733 prompt_cache_len:5151 prompt_cache_ratio:0.7650378731620378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 +DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.11014699935913086 s +INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.11208868026733398 s +DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=240059531966130898876800830055992379952, time:1750766917.6628726s req_ids:[8] +DEBUG 06-24 20:08:37 [manager.py:391] +ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:203.30238342285156ms total_cost_time:203.34649085998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6734 prompt_cache_len:5151 prompt_cache_ratio:0.7649242649242649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 +DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.10933279991149902 s +INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.11142420768737793 s +DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=183685201305384668864030912205276770777, time:1750766917.8723195s req_ids:[8] +DEBUG 06-24 20:08:37 [manager.py:391] +ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:193.62521171569824ms total_cost_time:193.70460510253906ms,out_token_counter:1 mean_per_token_cost_time: 0.07939338684082031ms prompt_token_num:6735 prompt_cache_len:5151 prompt_cache_ratio:0.7648106904231626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 +DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10838985443115234 s +INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102895736694336 s +DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=246287987853511476426529927330699365189, time:1750766918.075125s req_ids:[8] +DEBUG 06-24 20:08:38 [manager.py:391] +ERROR 06-24 20:08:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:206.25877380371094ms total_cost_time:206.3007354736328ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6736 prompt_cache_len:5151 prompt_cache_ratio:0.7646971496437055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 +DEBUG 06-24 20:08:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s +INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.10979485511779785 s +DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=127372801947996544036491227655613577675, time:1750766918.2873216s req_ids:[8] +DEBUG 06-24 20:08:38 [manager.py:391] +ERROR 06-24 20:08:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 first_token_cost:420.26615142822266ms total_cost_time:420.32384872436523ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:6737 prompt_cache_len:5151 prompt_cache_ratio:0.7645836425708773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 +DEBUG 06-24 20:08:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10878634452819824 s +INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s +DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=60047819696147762557686670744336811587, time:1750766918.7065413s req_ids:[8] +DEBUG 06-24 20:08:38 [manager.py:391] +ERROR 06-24 20:08:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 first_token_cost:200.78182220458984ms total_cost_time:200.82640647888184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6738 prompt_cache_len:5151 prompt_cache_ratio:0.7644701691896705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 +DEBUG 06-24 20:08:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10904502868652344 s +INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.11105728149414062 s +DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=252497352531398885708197813635021907175, time:1750766918.9194736s req_ids:[8] +DEBUG 06-24 20:08:38 [manager.py:391] +ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 first_token_cost:208.50443840026855ms total_cost_time:208.54997634887695ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6739 prompt_cache_len:5151 prompt_cache_ratio:0.7643567294850868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 +DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10783934593200684 s +INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.10975980758666992 s +DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=115610693477718233507326140136722902166, time:1750766919.131967s req_ids:[8] +DEBUG 06-24 20:08:39 [manager.py:391] +ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:206.129789352417ms total_cost_time:206.17318153381348ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6740 prompt_cache_len:5151 prompt_cache_ratio:0.7642433234421365 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 +DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s +INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.10969066619873047 s +DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=29717720000699866761248176799642494381, time:1750766919.3449202s req_ids:[8] +DEBUG 06-24 20:08:39 [manager.py:391] +ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:211.65013313293457ms total_cost_time:211.69567108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6741 prompt_cache_len:5151 prompt_cache_ratio:0.7641299510458389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 +DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10995030403137207 s +INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.11197638511657715 s +DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=322503750348632202975705303397126319806, time:1750766919.5671275s req_ids:[8] +DEBUG 06-24 20:08:39 [manager.py:391] +DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:212.76497840881348ms total_cost_time:212.82505989074707ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6742 prompt_cache_len:5151 prompt_cache_ratio:0.7640166122812222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 +DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10605931282043457 s +INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.10794281959533691 s +DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=10230627022835069444484710851566607728, time:1750766919.787143s req_ids:[8] +DEBUG 06-24 20:08:39 [manager.py:391] +ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:218.9924716949463ms total_cost_time:219.03491020202637ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6743 prompt_cache_len:5151 prompt_cache_ratio:0.7639033071333234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 +DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10812187194824219 s +INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.11025619506835938 s +DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=219568035245375300369524968770702965573, time:1750766920.0023599s req_ids:[8] +DEBUG 06-24 20:08:40 [manager.py:391] +ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:209.49721336364746ms total_cost_time:209.54251289367676ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6744 prompt_cache_len:5151 prompt_cache_ratio:0.7637900355871886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 +DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.10781288146972656 s +INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.10973000526428223 s +DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=228273723028112542180981318754322040018, time:1750766920.2161644s req_ids:[8] +DEBUG 06-24 20:08:40 [manager.py:391] +ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:209.34104919433594ms total_cost_time:209.38682556152344ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6745 prompt_cache_len:5151 prompt_cache_ratio:0.7636767976278726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 +DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.1090383529663086 s +INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.11095738410949707 s +DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=104230606977252100823412701979251462906, time:1750766920.429848s req_ids:[8] +DEBUG 06-24 20:08:40 [manager.py:391] +ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:207.19408988952637ms total_cost_time:207.23819732666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6746 prompt_cache_len:5151 prompt_cache_ratio:0.7635635932404388 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 +DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.10814166069030762 s +INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.1100924015045166 s +DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=282316973358304690643278264402494620452, time:1750766920.642904s req_ids:[8] +DEBUG 06-24 20:08:40 [manager.py:391] +ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:208.74834060668945ms total_cost_time:208.79340171813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6747 prompt_cache_len:5151 prompt_cache_ratio:0.76345042240996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 +DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.10831809043884277 s +INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.11012482643127441 s +DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=169237986028943535165137430126255829517, time:1750766920.8552425s req_ids:[8] +DEBUG 06-24 20:08:40 [manager.py:391] +ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:201.8909454345703ms total_cost_time:201.9338607788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6748 prompt_cache_len:5151 prompt_cache_ratio:0.7633372851215174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 +DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.309434175491333 s +INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.31143736839294434 s +DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=78320379318328277967915325647279185675, time:1750766921.2802517s req_ids:[8] +DEBUG 06-24 20:08:41 [manager.py:391] +ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:430.3269386291504ms total_cost_time:430.3874969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6749 prompt_cache_len:5151 prompt_cache_ratio:0.7632241813602015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 +DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.10868668556213379 s +INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.11049103736877441 s +DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=91094922499811045905987609088869949117, time:1750766921.5126514s req_ids:[8] +DEBUG 06-24 20:08:41 [manager.py:391] +ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:222.55468368530273ms total_cost_time:222.5971221923828ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6750 prompt_cache_len:5151 prompt_cache_ratio:0.7631111111111111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 +DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.10689067840576172 s +INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.10877752304077148 s +DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=209984400732997473891358820165404926895, time:1750766921.7268076s req_ids:[8] +DEBUG 06-24 20:08:41 [manager.py:391] +ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:205.5513858795166ms total_cost_time:205.59382438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6751 prompt_cache_len:5151 prompt_cache_ratio:0.7629980743593542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 +DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.1072847843170166 s +INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.1091163158416748 s +DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=313109080307095368606341924757917796701, time:1750766921.9455562s req_ids:[8] +DEBUG 06-24 20:08:41 [manager.py:391] +ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:173.61974716186523ms total_cost_time:173.66480827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6752 prompt_cache_len:5151 prompt_cache_ratio:0.7628850710900474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 +DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.1069488525390625 s +INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.10880041122436523 s +DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=178909068906954253669261535474164004569, time:1750766922.1149204s req_ids:[8] +DEBUG 06-24 20:08:42 [manager.py:391] +ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:193.3763027191162ms total_cost_time:193.4225559234619ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6753 prompt_cache_len:5151 prompt_cache_ratio:0.7627721012883163 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 +DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.10831952095031738 s +INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.11026406288146973 s +DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=176572999536272374346531094946946535012, time:1750766922.3159695s req_ids:[8] +DEBUG 06-24 20:08:42 [manager.py:391] +ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:205.01422882080078ms total_cost_time:205.07216453552246ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:6754 prompt_cache_len:5151 prompt_cache_ratio:0.7626591649392952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 +DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.11023354530334473 s +INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.11216330528259277 s +DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=62672343667016140725304176744281383838, time:1750766922.524904s req_ids:[8] +DEBUG 06-24 20:08:42 [manager.py:391] +ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:203.3534049987793ms total_cost_time:203.3989429473877ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6755 prompt_cache_len:5151 prompt_cache_ratio:0.7625462620281274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 +DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.10689973831176758 s +INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.10880351066589355 s +DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=31613983164895435762734545647385562653, time:1750766922.735242s req_ids:[8] +DEBUG 06-24 20:08:42 [manager.py:391] +ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:205.57260513305664ms total_cost_time:205.6252956390381ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:6756 prompt_cache_len:5151 prompt_cache_ratio:0.7624333925399644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 +DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.10717105865478516 s +INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.10948967933654785 s +DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=119888786870526259577977256775036426299, time:1750766922.946584s req_ids:[8] +DEBUG 06-24 20:08:42 [manager.py:391] +ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:205.11388778686523ms total_cost_time:205.17230033874512ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6757 prompt_cache_len:5151 prompt_cache_ratio:0.7623205564599674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 +DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.10773324966430664 s +INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.10978198051452637 s +INFO 06-24 20:08:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=184512487882974925633390378294419813939, time:1750766923.1572506s req_ids:[8] +DEBUG 06-24 20:08:43 [manager.py:391] +ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:206.5298557281494ms total_cost_time:206.59136772155762ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:6758 prompt_cache_len:5151 prompt_cache_ratio:0.7622077537733057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 +DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.107391357421875 s +INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.10934066772460938 s +DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=177411912099606378266306546535816316849, time:1750766923.369534s req_ids:[8] +DEBUG 06-24 20:08:43 [manager.py:391] +ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:208.49084854125977ms total_cost_time:208.54997634887695ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:6759 prompt_cache_len:5151 prompt_cache_ratio:0.7620949844651576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 +DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.3097422122955322 s +DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=61198221408760454322586257974543910418, time:1750766923.772991s req_ids:[8] +DEBUG 06-24 20:08:43 [manager.py:391] +INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.3116874694824219 s +ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:395.9174156188965ms total_cost_time:395.9622383117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6760 prompt_cache_len:5151 prompt_cache_ratio:0.7619822485207101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 +DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.10772514343261719 s +INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.1097402572631836 s +DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=335057589859209807641592147543714777312, time:1750766923.9870877s req_ids:[8] +DEBUG 06-24 20:08:43 [manager.py:391] +ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:208.72139930725098ms total_cost_time:208.76836776733398ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:6761 prompt_cache_len:5151 prompt_cache_ratio:0.761869545925159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 +DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10708761215209961 s +INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.10911822319030762 s +DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=35779362752252582917354885307538951500, time:1750766924.198649s req_ids:[8] +DEBUG 06-24 20:08:44 [manager.py:391] +ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:205.27076721191406ms total_cost_time:205.31392097473145ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6762 prompt_cache_len:5151 prompt_cache_ratio:0.761756876663709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 +DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10691070556640625 s +INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.1089942455291748 s +DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=63260897216070476343805153363406224280, time:1750766924.4098787s req_ids:[8] +DEBUG 06-24 20:08:44 [manager.py:391] +ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:204.15019989013672ms total_cost_time:204.19740676879883ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6763 prompt_cache_len:5151 prompt_cache_ratio:0.7616442407215732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 +DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10646581649780273 s +INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.10845637321472168 s +DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=187291577263790937726844947324173144257, time:1750766924.6195474s req_ids:[8] +DEBUG 06-24 20:08:44 [manager.py:391] +ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:207.55791664123535ms total_cost_time:207.60393142700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6764 prompt_cache_len:5151 prompt_cache_ratio:0.761531638083974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 +DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10716724395751953 s +INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.10917997360229492 s +DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=274430659751551982977824336866464089830, time:1750766924.82964s req_ids:[8] +DEBUG 06-24 20:08:44 [manager.py:391] +ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:205.75523376464844ms total_cost_time:205.7967185974121ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6765 prompt_cache_len:5151 prompt_cache_ratio:0.761419068736142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 +DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10792946815490723 s +INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.11000204086303711 s +DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=195274094301156479846658081054958228848, time:1750766925.0416074s req_ids:[8] +DEBUG 06-24 20:08:45 [manager.py:391] +ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:206.80546760559082ms total_cost_time:206.8495750427246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6766 prompt_cache_len:5151 prompt_cache_ratio:0.7613065326633166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 +DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10720324516296387 s +INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.10921907424926758 s +DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=194890738647437413925465202597311011598, time:1750766925.2535632s req_ids:[8] +DEBUG 06-24 20:08:45 [manager.py:391] +ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:205.7485580444336ms total_cost_time:205.79266548156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6767 prompt_cache_len:5151 prompt_cache_ratio:0.7611940298507462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 +DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10851359367370605 s +INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s +DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=143949160973121822218989297233902846223, time:1750766925.4638603s req_ids:[8] +DEBUG 06-24 20:08:45 [manager.py:391] +ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:203.8130760192871ms total_cost_time:203.8578987121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6768 prompt_cache_len:5151 prompt_cache_ratio:0.7610815602836879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 +DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10704565048217773 s +INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.10903477668762207 s +DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=237750400561157892323276976140745843408, time:1750766925.6737409s req_ids:[8] +DEBUG 06-24 20:08:45 [manager.py:391] +ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:205.5661678314209ms total_cost_time:205.60932159423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6769 prompt_cache_len:5151 prompt_cache_ratio:0.7609691239474073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 +DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10779547691345215 s +INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.10983681678771973 s +DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=189529664323192768928888967928959667068, time:1750766925.8851635s req_ids:[8] +DEBUG 06-24 20:08:45 [manager.py:391] +ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:206.1140537261963ms total_cost_time:206.15768432617188ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6770 prompt_cache_len:5151 prompt_cache_ratio:0.7608567208271787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 +DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.10626983642578125 s +INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.10800480842590332 s +DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=224287249814167857040856566119867663100, time:1750766926.0976348s req_ids:[8] +DEBUG 06-24 20:08:46 [manager.py:391] +ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:168.68257522583008ms total_cost_time:168.72501373291016ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6771 prompt_cache_len:5151 prompt_cache_ratio:0.7607443509082853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 +DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:46 [batch.py:51] router release req id 8 +INFO 06-24 20:08:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.31071925163269043 s +INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.3133690357208252 s +DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=82035410088159140682469824966854678640, time:1750766926.4846282s req_ids:[8] +DEBUG 06-24 20:08:46 [manager.py:391] +DEBUG 06-24 20:08:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 29374.630 tokens/s +DEBUG 06-24 20:08:46 [stats.py:37] Avg prompt tokens throughput: 29365.930 tokens/s +DEBUG 06-24 20:08:46 [stats.py:37] Avg generate tokens throughput: 8.700 tokens/s +ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 first_token_cost:421.76318168640137ms total_cost_time:421.8168258666992ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:6772 prompt_cache_len:5151 prompt_cache_ratio:0.7606320141760189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 +DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:46 [batch.py:51] router release req id 8 +INFO 06-24 20:08:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.10804224014282227 s +INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999441146850586 s +DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=313234008715192439293167780811178395306, time:1750766926.7005136s req_ids:[8] +DEBUG 06-24 20:08:46 [manager.py:391] +ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 first_token_cost:207.5216770172119ms total_cost_time:207.5662612915039ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6773 prompt_cache_len:5151 prompt_cache_ratio:0.7605197106156799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 +DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.10767555236816406 s +INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.10966253280639648 s +DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=92477025772287950603912353368303292975, time:1750766926.912773s req_ids:[8] +DEBUG 06-24 20:08:46 [manager.py:391] +ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 first_token_cost:204.00500297546387ms total_cost_time:204.05006408691406ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6774 prompt_cache_len:5151 prompt_cache_ratio:0.7604074402125776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 +DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.1072385311126709 s +INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.10925507545471191 s +DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=322910606595456292834510955635338698519, time:1750766927.122272s req_ids:[8] +DEBUG 06-24 20:08:47 [manager.py:391] +ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:207.2908878326416ms total_cost_time:207.33380317687988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6775 prompt_cache_len:5151 prompt_cache_ratio:0.7602952029520296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 +DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.1081242561340332 s +INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.11072325706481934 s +DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=94265110629409966291883574646290877440, time:1750766927.334459s req_ids:[8] +DEBUG 06-24 20:08:47 [manager.py:391] +ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:204.44893836975098ms total_cost_time:204.49471473693848ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6776 prompt_cache_len:5151 prompt_cache_ratio:0.7601829988193625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 +DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.10788154602050781 s +INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.10980916023254395 s +DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=283530304557294388573507479508404600870, time:1750766927.5450237s req_ids:[8] +DEBUG 06-24 20:08:47 [manager.py:391] +ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:205.18112182617188ms total_cost_time:205.22618293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6777 prompt_cache_len:5151 prompt_cache_ratio:0.7600708277999114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 +DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.10916948318481445 s +INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.11110472679138184 s +DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=68868785949950384980426062600360074558, time:1750766927.7571042s req_ids:[8] +DEBUG 06-24 20:08:47 [manager.py:391] +ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:207.5815200805664ms total_cost_time:207.6256275177002ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6778 prompt_cache_len:5151 prompt_cache_ratio:0.7599586898790204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 +DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.10992765426635742 s +INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.11233949661254883 s +DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=152892528172023292954586396489725154654, time:1750766927.967879s req_ids:[8] +DEBUG 06-24 20:08:47 [manager.py:391] +DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:205.15727996826172ms total_cost_time:205.2011489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6779 prompt_cache_len:5151 prompt_cache_ratio:0.7598465850420416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 +DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10751724243164062 s +INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10934662818908691 s +DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=165488218452614913446654519369134634504, time:1750766928.1790571s req_ids:[8] +DEBUG 06-24 20:08:48 [manager.py:391] +ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:206.11000061035156ms total_cost_time:206.15386962890625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6780 prompt_cache_len:5151 prompt_cache_ratio:0.7597345132743363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 +DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10778355598449707 s +INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10981631278991699 s +DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=252450605717652009987969899972673146229, time:1750766928.3899784s req_ids:[8] +DEBUG 06-24 20:08:48 [manager.py:391] +ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:375.4286766052246ms total_cost_time:375.4911422729492ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:6781 prompt_cache_len:5151 prompt_cache_ratio:0.7596224745612742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 +DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10796713829040527 s +INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10994935035705566 s +DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=23516597177518177035405939292839748097, time:1750766928.7651787s req_ids:[8] +DEBUG 06-24 20:08:48 [manager.py:391] +ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:198.49681854248047ms total_cost_time:198.54092597961426ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6782 prompt_cache_len:5151 prompt_cache_ratio:0.7595104688882336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 +DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10778665542602539 s +INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10985922813415527 s +DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=203485221896755871233354274138310620708, time:1750766928.9750814s req_ids:[8] +DEBUG 06-24 20:08:48 [manager.py:391] +ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:206.74967765808105ms total_cost_time:206.79211616516113ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6783 prompt_cache_len:5151 prompt_cache_ratio:0.7593984962406015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 +DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10686278343200684 s +INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.10868310928344727 s +DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=254655720894725969309801202973118646342, time:1750766929.1878257s req_ids:[8] +DEBUG 06-24 20:08:49 [manager.py:391] +ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:208.44173431396484ms total_cost_time:208.48870277404785ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:6784 prompt_cache_len:5151 prompt_cache_ratio:0.7592865566037735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 +DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10705327987670898 s +INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.10907459259033203 s +DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=96219160658219615205499946749251361804, time:1750766929.4013824s req_ids:[8] +DEBUG 06-24 20:08:49 [manager.py:391] +ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:206.74586296081543ms total_cost_time:206.80665969848633ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6785 prompt_cache_len:5151 prompt_cache_ratio:0.759174649963154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 +DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10830354690551758 s +INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.11021828651428223 s +DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=94380587574532285646200314595935828628, time:1750766929.6126935s req_ids:[8] +DEBUG 06-24 20:08:49 [manager.py:391] +ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:206.5408229827881ms total_cost_time:206.5868377685547ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6786 prompt_cache_len:5151 prompt_cache_ratio:0.7590627763041556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 +DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10819768905639648 s +INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.11020970344543457 s +DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=78197480041733542648039021190915062801, time:1750766929.8380508s req_ids:[8] +DEBUG 06-24 20:08:49 [manager.py:391] +ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:221.7421531677246ms total_cost_time:221.8027114868164ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6787 prompt_cache_len:5151 prompt_cache_ratio:0.7589509356121998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 +DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s +INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.11014485359191895 s +DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=147765756231835770340044809408454696949, time:1750766930.0530963s req_ids:[8] +DEBUG 06-24 20:08:50 [manager.py:391] +ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:205.6901454925537ms total_cost_time:205.7344913482666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6788 prompt_cache_len:5151 prompt_cache_ratio:0.7588391278727166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 +DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10959291458129883 s +INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.11153912544250488 s +DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=190585900172372510502676911678700753440, time:1750766930.2658298s req_ids:[8] +DEBUG 06-24 20:08:50 [manager.py:391] +ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:208.2803249359131ms total_cost_time:208.32562446594238ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6789 prompt_cache_len:5151 prompt_cache_ratio:0.7587273530711445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 +DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10746288299560547 s +INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.10941505432128906 s +DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=5147996313211811105363772403616630933, time:1750766930.4791634s req_ids:[8] +DEBUG 06-24 20:08:50 [manager.py:391] +ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:207.23247528076172ms total_cost_time:207.2770595550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6790 prompt_cache_len:5151 prompt_cache_ratio:0.7586156111929307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 +DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10779356956481934 s +INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.10982370376586914 s +DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=286606952964628576938799592731346262469, time:1750766930.6908398s req_ids:[8] +DEBUG 06-24 20:08:50 [manager.py:391] +ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:206.76517486572266ms total_cost_time:206.81047439575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6791 prompt_cache_len:5151 prompt_cache_ratio:0.7585039022235311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 +DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.1078798770904541 s +INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.11066484451293945 s +DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=230765914308757202153944771379198524618, time:1750766930.9033625s req_ids:[8] +DEBUG 06-24 20:08:50 [manager.py:391] +ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:379.7633647918701ms total_cost_time:379.8089027404785ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6792 prompt_cache_len:5151 prompt_cache_ratio:0.7583922261484098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 +DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10911059379577637 s +INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.11124348640441895 s +DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=119612395175745845849597073514319604438, time:1750766931.2842295s req_ids:[8] +DEBUG 06-24 20:08:51 [manager.py:391] +ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:217.79155731201172ms total_cost_time:217.8349494934082ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6793 prompt_cache_len:5151 prompt_cache_ratio:0.7582805829530399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 +DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10807228088378906 s +INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.11004281044006348 s +DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=286494089978722960493267422863171282923, time:1750766931.5057797s req_ids:[8] +DEBUG 06-24 20:08:51 [manager.py:391] +ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:195.30606269836426ms total_cost_time:195.35183906555176ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6794 prompt_cache_len:5151 prompt_cache_ratio:0.7581689726229026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 +DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10836315155029297 s +INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.11052322387695312 s +DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=260873694769622915261915499260171345399, time:1750766931.7134244s req_ids:[8] +DEBUG 06-24 20:08:51 [manager.py:391] +ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:205.3229808807373ms total_cost_time:205.3675651550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6795 prompt_cache_len:5151 prompt_cache_ratio:0.7580573951434879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 +DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10749626159667969 s +INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.10945987701416016 s +DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=133640124527346400198273736458967993871, time:1750766931.921991s req_ids:[8] +DEBUG 06-24 20:08:51 [manager.py:391] +ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:205.23810386657715ms total_cost_time:205.28268814086914ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6796 prompt_cache_len:5151 prompt_cache_ratio:0.7579458505002943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 +DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10841917991638184 s +INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.11042213439941406 s +DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=21778620527042892497097852364577625813, time:1750766932.1333263s req_ids:[8] +DEBUG 06-24 20:08:52 [manager.py:391] +ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:207.06605911254883ms total_cost_time:207.11064338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6797 prompt_cache_len:5151 prompt_cache_ratio:0.7578343386788289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 +DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10849881172180176 s +INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.1110239028930664 s +DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=125288009436500817618780378276220361105, time:1750766932.3468506s req_ids:[8] +DEBUG 06-24 20:08:52 [manager.py:391] +ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:207.18073844909668ms total_cost_time:207.22413063049316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6798 prompt_cache_len:5151 prompt_cache_ratio:0.7577228596646073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 +DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10761857032775879 s +INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.10953736305236816 s +DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=98304187566559320348146116816158238604, time:1750766932.558749s req_ids:[8] +DEBUG 06-24 20:08:52 [manager.py:391] +ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:205.87730407714844ms total_cost_time:205.92093467712402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6799 prompt_cache_len:5151 prompt_cache_ratio:0.7576114134431534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 +DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10853362083435059 s +INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.11053848266601562 s +DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=217080632940604623842237636984809670372, time:1750766932.7734692s req_ids:[8] +DEBUG 06-24 20:08:52 [manager.py:391] +ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:206.88724517822266ms total_cost_time:206.94565773010254ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6800 prompt_cache_len:5151 prompt_cache_ratio:0.7575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 +DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10735058784484863 s +INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.10947871208190918 s +DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=296738469189727214031625300847602126259, time:1750766932.9834988s req_ids:[8] +DEBUG 06-24 20:08:52 [manager.py:391] +ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:204.9269676208496ms total_cost_time:204.9720287322998ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6801 prompt_cache_len:5151 prompt_cache_ratio:0.7573886193206881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 +DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.10728693008422852 s +INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.10942673683166504 s +INFO 06-24 20:08:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:08:53 [statics_utils.py:24] mean first cost: 233.3704283006215 ms +INFO 06-24 20:08:53 [statics_utils.py:24] mean per token cost: 0.1037253025240309 ms +DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=309421786694701307951979883044879572168, time:1750766933.1935613s req_ids:[8] +DEBUG 06-24 20:08:53 [manager.py:391] +INFO 06-24 20:08:53 [manager.py:620] left req id 8can release False refcount 4 +ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:204.47659492492676ms total_cost_time:204.51998710632324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6802 prompt_cache_len:5151 prompt_cache_ratio:0.7572772713907674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 +DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.1067037582397461 s +INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.10866117477416992 s +DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=214407128989713558225604226204020395788, time:1750766933.4035594s req_ids:[8] +DEBUG 06-24 20:08:53 [manager.py:391] +ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:369.9917793273926ms total_cost_time:370.03564834594727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6803 prompt_cache_len:5151 prompt_cache_ratio:0.757165956195796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 +DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:08:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.10904097557067871 s +INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.11112403869628906 s +DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=152945604529670885507645194524750962048, time:1750766933.7736366s req_ids:[8] +DEBUG 06-24 20:08:53 [manager.py:391] +DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:199.55158233642578ms total_cost_time:199.59521293640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6804 prompt_cache_len:5151 prompt_cache_ratio:0.7570546737213404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 +DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.10995936393737793 s +INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.11216163635253906 s +DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=37265643018336761248807333963735391818, time:1750766933.986626s req_ids:[8] +DEBUG 06-24 20:08:53 [manager.py:391] +ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:212.0048999786377ms total_cost_time:212.0521068572998ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6805 prompt_cache_len:5151 prompt_cache_ratio:0.7569434239529758 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 +DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10766005516052246 s +INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.1096038818359375 s +DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=298547032732709273951645993427227226019, time:1750766934.2168617s req_ids:[8] +DEBUG 06-24 20:08:54 [manager.py:391] +ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:216.31860733032227ms total_cost_time:216.36056900024414ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6806 prompt_cache_len:5151 prompt_cache_ratio:0.7568322068762856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 +DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s +INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.11019778251647949 s +DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=340277087876868199815569875476864188595, time:1750766934.4249306s req_ids:[8] +DEBUG 06-24 20:08:54 [manager.py:391] +ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:204.54812049865723ms total_cost_time:204.5919895172119ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6807 prompt_cache_len:5151 prompt_cache_ratio:0.756721022476862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 +DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10575222969055176 s +INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.10774803161621094 s +DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=75556858598528452147306081279350670220, time:1750766934.6351163s req_ids:[8] +DEBUG 06-24 20:08:54 [manager.py:391] +ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:205.95622062683105ms total_cost_time:206.00032806396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6808 prompt_cache_len:5151 prompt_cache_ratio:0.7566098707403055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 +DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10661792755126953 s +INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.10867190361022949 s +DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=153441860747713807877371441881734185883, time:1750766934.8477068s req_ids:[8] +DEBUG 06-24 20:08:54 [manager.py:391] +ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:204.98895645141602ms total_cost_time:205.0337791442871ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6809 prompt_cache_len:5151 prompt_cache_ratio:0.756498751652225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 +DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s +INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.11023402214050293 s +DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=117284022306644619603333435732420065603, time:1750766935.0608492s req_ids:[8] +DEBUG 06-24 20:08:55 [manager.py:391] +ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:209.8078727722168ms total_cost_time:209.85865592956543ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:6810 prompt_cache_len:5151 prompt_cache_ratio:0.7563876651982379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 +DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10847997665405273 s +INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.11081933975219727 s +DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=124068384156100191802701432997152032523, time:1750766935.275859s req_ids:[8] +DEBUG 06-24 20:08:55 [manager.py:391] +ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:226.94778442382812ms total_cost_time:226.9916534423828ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6811 prompt_cache_len:5151 prompt_cache_ratio:0.7562766113639701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 +DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10743856430053711 s +INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.10943031311035156 s +DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=245254695863644878168693848881388718397, time:1750766935.5013602s req_ids:[8] +DEBUG 06-24 20:08:55 [manager.py:391] +ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:194.793701171875ms total_cost_time:194.85020637512207ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:6812 prompt_cache_len:5151 prompt_cache_ratio:0.7561655901350558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 +DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10984206199645996 s +INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.11184835433959961 s +DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=149609619247020072101385771269713651258, time:1750766935.7130337s req_ids:[8] +DEBUG 06-24 20:08:55 [manager.py:391] +ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:213.55295181274414ms total_cost_time:213.59014511108398ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:6813 prompt_cache_len:5151 prompt_cache_ratio:0.7560546014971379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 +DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10763239860534668 s +INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.10960817337036133 s +DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=304787358479949976232689580980107115929, time:1750766935.92544s req_ids:[8] +DEBUG 06-24 20:08:55 [manager.py:391] +ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:201.9345760345459ms total_cost_time:201.98512077331543ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:6814 prompt_cache_len:5151 prompt_cache_ratio:0.7559436454358673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 +DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.10815787315368652 s +INFO 06-24 20:08:56 [manager.py:68] detokenization recv req id 8 cost time 0.11001777648925781 s +DEBUG 06-24 20:08:56 [manager.py:391] Prefill Batch: batch_id=149849551543287818636116492078764238697, time:1750766936.137794s req_ids:[8] +DEBUG 06-24 20:08:56 [manager.py:391] +ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:208.57787132263184ms total_cost_time:208.62936973571777ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:6815 prompt_cache_len:5151 prompt_cache_ratio:0.7558327219369039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 +DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.31054115295410156 s +INFO 06-24 20:08:56 [manager.py:68] detokenization recv req id 8 cost time 0.3125176429748535 s +DEBUG 06-24 20:08:56 [manager.py:391] Prefill Batch: batch_id=103416269575912463498794671518578235352, time:1750766936.5595899s req_ids:[8] +DEBUG 06-24 20:08:56 [manager.py:391] +DEBUG 06-24 20:08:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 29682.081 tokens/s +DEBUG 06-24 20:08:56 [stats.py:37] Avg prompt tokens throughput: 29673.347 tokens/s +DEBUG 06-24 20:08:56 [stats.py:37] Avg generate tokens throughput: 8.735 tokens/s +ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:421.1719036102295ms total_cost_time:421.2336540222168ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:6816 prompt_cache_len:5151 prompt_cache_ratio:0.7557218309859155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 +DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.10702180862426758 s +INFO 06-24 20:08:56 [manager.py:68] detokenization recv req id 8 cost time 0.10955953598022461 s +DEBUG 06-24 20:08:56 [manager.py:391] Prefill Batch: batch_id=43174193868915020687055380947455832254, time:1750766936.7763374s req_ids:[8] +DEBUG 06-24 20:08:56 [manager.py:391] +ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:224.73406791687012ms total_cost_time:224.78818893432617ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:6817 prompt_cache_len:5151 prompt_cache_ratio:0.7556109725685786 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 +DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.10881280899047852 s +INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.11111950874328613 s +DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=340106471239495112144221860027334925483, time:1750766937.006206s req_ids:[8] +DEBUG 06-24 20:08:57 [manager.py:391] +ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:196.56896591186523ms total_cost_time:196.6257095336914ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:6818 prompt_cache_len:5151 prompt_cache_ratio:0.7555001466705779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 +DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:57 [batch.py:51] router release req id 8 +INFO 06-24 20:08:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10696625709533691 s +INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.1088876724243164 s +DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=98128387816006441505230501916506021110, time:1750766937.2127125s req_ids:[8] +DEBUG 06-24 20:08:57 [manager.py:391] +ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:203.72653007507324ms total_cost_time:203.7796974182129ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:6819 prompt_cache_len:5151 prompt_cache_ratio:0.7553893532776067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 +DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s +INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.11032414436340332 s +DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=97578720243199485041838651943656236325, time:1750766937.4236917s req_ids:[8] +DEBUG 06-24 20:08:57 [manager.py:391] +ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:204.48660850524902ms total_cost_time:204.5307159423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6820 prompt_cache_len:5151 prompt_cache_ratio:0.7552785923753665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 +DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10671806335449219 s +INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.10860967636108398 s +DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=100091910470335941765493008615469347300, time:1750766937.6339097s req_ids:[8] +DEBUG 06-24 20:08:57 [manager.py:391] +ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:203.54819297790527ms total_cost_time:203.59277725219727ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6821 prompt_cache_len:5151 prompt_cache_ratio:0.7551678639495675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 +DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10645532608032227 s +INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.10857105255126953 s +DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=224474604729714835671989505433088524141, time:1750766937.8435917s req_ids:[8] +DEBUG 06-24 20:08:57 [manager.py:391] +ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:206.38346672058105ms total_cost_time:206.42685890197754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6822 prompt_cache_len:5151 prompt_cache_ratio:0.7550571679859279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 +DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.10757827758789062 s +INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.10955333709716797 s +DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=199609835461489872909080927121389934457, time:1750766938.054261s req_ids:[8] +DEBUG 06-24 20:08:58 [manager.py:391] +ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:204.34188842773438ms total_cost_time:204.38623428344727ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6823 prompt_cache_len:5151 prompt_cache_ratio:0.7549465044701744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 +DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.10746264457702637 s +INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.10984015464782715 s +DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=288733824546877206061754072661364099862, time:1750766938.26539s req_ids:[8] +DEBUG 06-24 20:08:58 [manager.py:391] +ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:205.04307746887207ms total_cost_time:205.08646965026855ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6824 prompt_cache_len:5151 prompt_cache_ratio:0.7548358733880423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 +DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.10722804069519043 s +INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.10912728309631348 s +DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=178520428975840801954627838230231340903, time:1750766938.482138s req_ids:[8] +DEBUG 06-24 20:08:58 [manager.py:391] +ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:379.61316108703613ms total_cost_time:379.6572685241699ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6825 prompt_cache_len:5151 prompt_cache_ratio:0.7547252747252747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 +DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.1092538833618164 s +INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.11130690574645996 s +DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=73373417575017730238666576942647667645, time:1750766938.8548827s req_ids:[8] +DEBUG 06-24 20:08:58 [manager.py:391] +ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:198.24528694152832ms total_cost_time:198.29130172729492ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6826 prompt_cache_len:5151 prompt_cache_ratio:0.7546147084676238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 +DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10714983940124512 s +INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.1089472770690918 s +DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=59873730228461247030151046067252736987, time:1750766939.066585s req_ids:[8] +DEBUG 06-24 20:08:59 [manager.py:391] +ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:208.70399475097656ms total_cost_time:208.74881744384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6827 prompt_cache_len:5151 prompt_cache_ratio:0.7545041746008496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 +DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10799932479858398 s +INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.10986971855163574 s +DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=323417206050919408324145488795259418716, time:1750766939.2786152s req_ids:[8] +DEBUG 06-24 20:08:59 [manager.py:391] +ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:203.67813110351562ms total_cost_time:203.72271537780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6828 prompt_cache_len:5151 prompt_cache_ratio:0.7543936731107206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 +DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.1080775260925293 s +INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.11007094383239746 s +DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=147344583457436821196947579985878308915, time:1750766939.4896247s req_ids:[8] +DEBUG 06-24 20:08:59 [manager.py:391] +ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:207.95893669128418ms total_cost_time:208.00161361694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6829 prompt_cache_len:5151 prompt_cache_ratio:0.7542832039830136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 +DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10713648796081543 s +INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.1090245246887207 s +DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=23660667302062305760683070084689719247, time:1750766939.7025998s req_ids:[8] +DEBUG 06-24 20:08:59 [manager.py:391] +ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:207.43608474731445ms total_cost_time:207.4875831604004ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:6830 prompt_cache_len:5151 prompt_cache_ratio:0.7541727672035139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 +DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:08:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10644268989562988 s +INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.10818147659301758 s +DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=183799344215477268207882276421314014863, time:1750766939.915935s req_ids:[8] +DEBUG 06-24 20:08:59 [manager.py:391] +ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:230.1347255706787ms total_cost_time:230.1807403564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6831 prompt_cache_len:5151 prompt_cache_ratio:0.754062362758015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 +DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10837030410766602 s +INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.11026239395141602 s +DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=119383577268180528697244155241791447470, time:1750766940.143927s req_ids:[8] +DEBUG 06-24 20:09:00 [manager.py:391] +ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:196.0914134979248ms total_cost_time:196.13361358642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6832 prompt_cache_len:5151 prompt_cache_ratio:0.7539519906323185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 +DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10720062255859375 s +INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.10900545120239258 s +DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=163951889323953808350912125679091832088, time:1750766940.352612s req_ids:[8] +DEBUG 06-24 20:09:00 [manager.py:391] +ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:205.93023300170898ms total_cost_time:205.98530769348145ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:6833 prompt_cache_len:5151 prompt_cache_ratio:0.7538416508122348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 +DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10826969146728516 s +INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100001335144043 s +DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=79849963693416527876517226336232406480, time:1750766940.5618372s req_ids:[8] +DEBUG 06-24 20:09:00 [manager.py:391] +ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:164.49689865112305ms total_cost_time:164.53814506530762ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6834 prompt_cache_len:5151 prompt_cache_ratio:0.753731343283582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 +DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:00 [batch.py:51] router release req id 8 +INFO 06-24 20:09:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10645294189453125 s +INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.10864400863647461 s +DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=85094641878749162022666796669973486460, time:1750766940.7325542s req_ids:[8] +DEBUG 06-24 20:09:00 [manager.py:391] +DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:194.08249855041504ms total_cost_time:194.12636756896973ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6835 prompt_cache_len:5151 prompt_cache_ratio:0.7536210680321873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 +DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10729455947875977 s +INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.10937047004699707 s +DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=76860467498452797639012823698468457235, time:1750766940.9344769s req_ids:[8] +DEBUG 06-24 20:09:00 [manager.py:391] +ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:419.10624504089355ms total_cost_time:419.15035247802734ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6836 prompt_cache_len:5151 prompt_cache_ratio:0.7535108250438853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 +DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.10732245445251465 s +INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s +DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=274781979090449504542227148969917865417, time:1750766941.3535109s req_ids:[8] +DEBUG 06-24 20:09:01 [manager.py:391] +ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:199.13363456726074ms total_cost_time:199.17798042297363ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6837 prompt_cache_len:5151 prompt_cache_ratio:0.7534006143045195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 +DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.10813426971435547 s +INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.10989689826965332 s +DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=62969786028292721098604257036862143940, time:1750766941.5628517s req_ids:[8] +DEBUG 06-24 20:09:01 [manager.py:391] +ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:225.75807571411133ms total_cost_time:225.80242156982422ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6838 prompt_cache_len:5151 prompt_cache_ratio:0.7532904357999415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 +DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.1071007251739502 s +INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.10882806777954102 s +DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=322686030477036829512376333636657150402, time:1750766941.7895255s req_ids:[8] +DEBUG 06-24 20:09:01 [manager.py:391] +ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:191.88761711120605ms total_cost_time:191.93220138549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6839 prompt_cache_len:5151 prompt_cache_ratio:0.7531802895160111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 +DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.1100006103515625 s +INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.11208724975585938 s +DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=250424276227907648336379964360058923222, time:1750766941.9914446s req_ids:[8] +DEBUG 06-24 20:09:01 [manager.py:391] +ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:201.51662826538086ms total_cost_time:201.56407356262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:6840 prompt_cache_len:5151 prompt_cache_ratio:0.7530701754385964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 +DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10894370079040527 s +INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.11093306541442871 s +DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=236305860541665600868445785598499599432, time:1750766942.200533s req_ids:[8] +DEBUG 06-24 20:09:02 [manager.py:391] +ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:206.65335655212402ms total_cost_time:206.6974639892578ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6841 prompt_cache_len:5151 prompt_cache_ratio:0.7529600935535741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 +DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10677123069763184 s +INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.10891556739807129 s +DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=321468663311855045553034986311707378586, time:1750766942.4127455s req_ids:[8] +DEBUG 06-24 20:09:02 [manager.py:391] +ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:208.5282802581787ms total_cost_time:208.5719108581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6842 prompt_cache_len:5151 prompt_cache_ratio:0.7528500438468284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 +DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10765385627746582 s +INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.10957527160644531 s +DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=32482110370897785384533908274468476902, time:1750766942.626984s req_ids:[8] +DEBUG 06-24 20:09:02 [manager.py:391] +ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:207.15904235839844ms total_cost_time:207.2007656097412ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6843 prompt_cache_len:5151 prompt_cache_ratio:0.7527400263042525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 +DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10643839836120605 s +INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.10841703414916992 s +DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=309063169396142430458222121257222346538, time:1750766942.8380742s req_ids:[8] +DEBUG 06-24 20:09:02 [manager.py:391] +ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:203.65452766418457ms total_cost_time:203.69744300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6844 prompt_cache_len:5151 prompt_cache_ratio:0.7526300409117476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 +DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10966181755065918 s +INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.11162686347961426 s +DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=116825656213073181334035216133489873534, time:1750766943.0487237s req_ids:[8] +DEBUG 06-24 20:09:03 [manager.py:391] +ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:204.65707778930664ms total_cost_time:204.71549034118652ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6845 prompt_cache_len:5151 prompt_cache_ratio:0.7525200876552228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 +DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10768723487854004 s +INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.10962891578674316 s +DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=259937918151389654459772075964894621910, time:1750766943.2730591s req_ids:[8] +DEBUG 06-24 20:09:03 [manager.py:391] +ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:218.2483673095703ms total_cost_time:218.2931900024414ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6846 prompt_cache_len:5151 prompt_cache_ratio:0.752410166520596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 +DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10729837417602539 s +INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.10911059379577637 s +DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=70849868679831145275039319567461479376, time:1750766943.4835455s req_ids:[8] +DEBUG 06-24 20:09:03 [manager.py:391] +ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:393.38088035583496ms total_cost_time:393.44048500061035ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:6847 prompt_cache_len:5151 prompt_cache_ratio:0.7523002774937929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 +DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10747075080871582 s +INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.10949158668518066 s +DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=121929378039989573052113394819959477599, time:1750766943.8775961s req_ids:[8] +DEBUG 06-24 20:09:03 [manager.py:391] +ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:195.3420639038086ms total_cost_time:195.4021453857422ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6848 prompt_cache_len:5151 prompt_cache_ratio:0.7521904205607477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 +DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10731697082519531 s +INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.10939478874206543 s +DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=328724829950183079474138097475080964284, time:1750766944.0826952s req_ids:[8] +DEBUG 06-24 20:09:04 [manager.py:391] +ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:201.9493579864502ms total_cost_time:202.00800895690918ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:6849 prompt_cache_len:5151 prompt_cache_ratio:0.7520805957074025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 +DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10774731636047363 s +INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.10981392860412598 s +DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=232347241197283823669963992043093891493, time:1750766944.3003318s req_ids:[8] +DEBUG 06-24 20:09:04 [manager.py:391] +ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:211.39812469482422ms total_cost_time:211.4424705505371ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6850 prompt_cache_len:5151 prompt_cache_ratio:0.7519708029197081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 +DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10698461532592773 s +INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.1089639663696289 s +DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=65750638268671543037787516011075628749, time:1750766944.510203s req_ids:[8] +DEBUG 06-24 20:09:04 [manager.py:391] +ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:206.71987533569336ms total_cost_time:206.77924156188965ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:6851 prompt_cache_len:5151 prompt_cache_ratio:0.7518610421836228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 +DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.1082448959350586 s +INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.11028242111206055 s +DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=234222845476793630706353687329533810956, time:1750766944.7211623s req_ids:[8] +DEBUG 06-24 20:09:04 [manager.py:391] +ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:227.32257843017578ms total_cost_time:227.37431526184082ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:6852 prompt_cache_len:5151 prompt_cache_ratio:0.7517513134851138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 +DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10767650604248047 s +INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.10995149612426758 s +DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=1170669171345206435821314363264910166, time:1750766944.9487925s req_ids:[8] +DEBUG 06-24 20:09:04 [manager.py:391] +ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:192.64888763427734ms total_cost_time:192.69442558288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6853 prompt_cache_len:5151 prompt_cache_ratio:0.7516416168101562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 +DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10715842247009277 s +INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.10909366607666016 s +DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=263287990501476734327041944388234170440, time:1750766945.1522245s req_ids:[8] +DEBUG 06-24 20:09:05 [manager.py:391] +ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:201.75457000732422ms total_cost_time:201.80058479309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6854 prompt_cache_len:5151 prompt_cache_ratio:0.751531952144733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 +DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10840868949890137 s +INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11042022705078125 s +DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=257665570324946241474742966192998510411, time:1750766945.3609002s req_ids:[8] +DEBUG 06-24 20:09:05 [manager.py:391] +ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:203.0351161956787ms total_cost_time:203.07660102844238ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6855 prompt_cache_len:5151 prompt_cache_ratio:0.7514223194748358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 +DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10810661315917969 s +INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11008810997009277 s +DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=286546985851962698788457962709141533939, time:1750766945.5706441s req_ids:[8] +DEBUG 06-24 20:09:05 [manager.py:391] +ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:202.17323303222656ms total_cost_time:202.21877098083496ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6856 prompt_cache_len:5151 prompt_cache_ratio:0.7513127187864644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 +DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10901427268981934 s +INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11105036735534668 s +DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=155611884862911419224665382034283406138, time:1750766945.7794344s req_ids:[8] +DEBUG 06-24 20:09:05 [manager.py:391] +ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:206.5272331237793ms total_cost_time:206.5727710723877ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6857 prompt_cache_len:5151 prompt_cache_ratio:0.7512031500656263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 +DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.11002707481384277 s +INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11203193664550781 s +DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=49175244881618462133460933587070937997, time:1750766945.9898036s req_ids:[8] +DEBUG 06-24 20:09:05 [manager.py:391] +ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:199.1562843322754ms total_cost_time:199.17678833007812ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6858 prompt_cache_len:5151 prompt_cache_ratio:0.7510936132983377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 +DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.1050572395324707 s +INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.10691261291503906 s +DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=69714902999983563607694563702086794760, time:1750766946.1942093s req_ids:[8] +DEBUG 06-24 20:09:06 [manager.py:391] +ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:204.8354148864746ms total_cost_time:204.8799991607666ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6859 prompt_cache_len:5151 prompt_cache_ratio:0.7509841084706226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 +DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.3096740245819092 s +INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.3117990493774414 s +DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=37281641384380666703021203269817209625, time:1750766946.5993214s req_ids:[8] +DEBUG 06-24 20:09:06 [manager.py:391] +DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:09:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 29978.461 tokens/s +DEBUG 06-24 20:09:06 [stats.py:37] Avg prompt tokens throughput: 29969.696 tokens/s +DEBUG 06-24 20:09:06 [stats.py:37] Avg generate tokens throughput: 8.765 tokens/s +ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:397.98450469970703ms total_cost_time:398.0603218078613ms,out_token_counter:1 mean_per_token_cost_time: 0.07581710815429688ms prompt_token_num:6860 prompt_cache_len:5151 prompt_cache_ratio:0.7508746355685131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 +DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.10731649398803711 s +INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.10928988456726074 s +DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=33120558522242884783691575350515062608, time:1750766946.8051884s req_ids:[8] +DEBUG 06-24 20:09:06 [manager.py:391] +ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:161.95297241210938ms total_cost_time:161.99660301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6861 prompt_cache_len:5151 prompt_cache_ratio:0.7507651945780498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 +DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.10732150077819824 s +INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.1093292236328125 s +DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=155198956505296983996858079202329459734, time:1750766946.9746046s req_ids:[8] +DEBUG 06-24 20:09:06 [manager.py:391] +ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:193.58587265014648ms total_cost_time:193.62878799438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6862 prompt_cache_len:5151 prompt_cache_ratio:0.7506557854852812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 +DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.1067512035369873 s +INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.10921096801757812 s +DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=207074435996427102295964350657990814924, time:1750766947.174065s req_ids:[8] +DEBUG 06-24 20:09:07 [manager.py:391] +ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:203.57465744018555ms total_cost_time:203.61900329589844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6863 prompt_cache_len:5151 prompt_cache_ratio:0.7505464082762641 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 +DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.10803747177124023 s +INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.1099693775177002 s +DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=20402048368169934665425366240280627766, time:1750766947.3892162s req_ids:[8] +DEBUG 06-24 20:09:07 [manager.py:391] +ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:209.9456787109375ms total_cost_time:209.98859405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6864 prompt_cache_len:5151 prompt_cache_ratio:0.7504370629370629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 +DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.10746312141418457 s +INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.10918307304382324 s +DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=197319660250053152617949901274091911221, time:1750766947.60713s req_ids:[8] +DEBUG 06-24 20:09:07 [manager.py:391] +ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:177.66165733337402ms total_cost_time:177.70719528198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6865 prompt_cache_len:5151 prompt_cache_ratio:0.7503277494537509 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 +DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:07 [batch.py:51] router release req id 8 +INFO 06-24 20:09:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.1069784164428711 s +INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.10892438888549805 s +DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=267338712490869647730092118815062869706, time:1750766947.7823315s req_ids:[8] +DEBUG 06-24 20:09:07 [manager.py:391] +ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:193.45998764038086ms total_cost_time:193.50361824035645ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6866 prompt_cache_len:5151 prompt_cache_ratio:0.750218467812409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 +DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.10831260681152344 s +INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.11018776893615723 s +DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=204035463261510792662876690861850653045, time:1750766947.9989896s req_ids:[8] +DEBUG 06-24 20:09:07 [manager.py:391] +ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:218.31727027893066ms total_cost_time:218.36137771606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6867 prompt_cache_len:5151 prompt_cache_ratio:0.7501092179991262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 +DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10693669319152832 s +INFO 06-24 20:09:08 [manager.py:68] detokenization recv req id 8 cost time 0.10891032218933105 s +DEBUG 06-24 20:09:08 [manager.py:391] Prefill Batch: batch_id=230147653845344523764166485638477591089, time:1750766948.2102313s req_ids:[8] +DEBUG 06-24 20:09:08 [manager.py:391] +ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:207.7343463897705ms total_cost_time:207.7770233154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6868 prompt_cache_len:5151 prompt_cache_ratio:0.75 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 +DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10773301124572754 s +INFO 06-24 20:09:08 [manager.py:68] detokenization recv req id 8 cost time 0.10959887504577637 s +DEBUG 06-24 20:09:08 [manager.py:391] Prefill Batch: batch_id=234047829248130842097119857624736495919, time:1750766948.4233541s req_ids:[8] +DEBUG 06-24 20:09:08 [manager.py:391] +ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:379.7280788421631ms total_cost_time:379.7736167907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6869 prompt_cache_len:5151 prompt_cache_ratio:0.7498908138011355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 +DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10706400871276855 s +INFO 06-24 20:09:08 [manager.py:68] detokenization recv req id 8 cost time 0.10911965370178223 s +DEBUG 06-24 20:09:08 [manager.py:391] Prefill Batch: batch_id=222085876255501036351347160999741977617, time:1750766948.8017652s req_ids:[8] +DEBUG 06-24 20:09:08 [manager.py:391] +ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:200.37460327148438ms total_cost_time:200.42061805725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6870 prompt_cache_len:5151 prompt_cache_ratio:0.7497816593886463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 +DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s +INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.10992431640625 s +DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=319863202815236015216974838829498862323, time:1750766949.0147793s req_ids:[8] +DEBUG 06-24 20:09:09 [manager.py:391] +ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:208.35518836975098ms total_cost_time:208.40048789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6871 prompt_cache_len:5151 prompt_cache_ratio:0.7496725367486538 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 +DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.1088404655456543 s +INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.11135482788085938 s +DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=70540158241112889960598274416747593011, time:1750766949.2287843s req_ids:[8] +DEBUG 06-24 20:09:09 [manager.py:391] +ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:205.87897300720215ms total_cost_time:205.92260360717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6872 prompt_cache_len:5151 prompt_cache_ratio:0.7495634458672875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 +DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.10782003402709961 s +INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.10984349250793457 s +DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=201733612295772734934790242833815407511, time:1750766949.4391215s req_ids:[8] +DEBUG 06-24 20:09:09 [manager.py:391] +ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:206.9566249847412ms total_cost_time:207.0000171661377ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6873 prompt_cache_len:5151 prompt_cache_ratio:0.7494543867306853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 +DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.10796689987182617 s +INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.1098933219909668 s +DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=226601812317982880887238551623539738984, time:1750766949.651246s req_ids:[8] +DEBUG 06-24 20:09:09 [manager.py:391] +ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:214.9984836578369ms total_cost_time:215.043306350708ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6874 prompt_cache_len:5151 prompt_cache_ratio:0.7493453593249927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 +DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.10877227783203125 s +INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.11069965362548828 s +DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=9104256372056234133142155551511902485, time:1750766949.8706276s req_ids:[8] +DEBUG 06-24 20:09:09 [manager.py:391] +ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:195.04141807556152ms total_cost_time:195.0831413269043ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6875 prompt_cache_len:5151 prompt_cache_ratio:0.7492363636363636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 +DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10768628120422363 s +INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.10963058471679688 s +DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=143529874133760861206366770873756909096, time:1750766950.0731673s req_ids:[8] +DEBUG 06-24 20:09:10 [manager.py:391] +ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:203.66859436035156ms total_cost_time:203.71174812316895ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6876 prompt_cache_len:5151 prompt_cache_ratio:0.7491273996509599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 +DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10804533958435059 s +INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.1102914810180664 s +DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=327287713082070026360185408397921363772, time:1750766950.2827823s req_ids:[8] +DEBUG 06-24 20:09:10 [manager.py:391] +ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:202.87299156188965ms total_cost_time:202.91757583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6877 prompt_cache_len:5151 prompt_cache_ratio:0.7490184673549513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 +DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10824990272521973 s +INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.11025214195251465 s +DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=290356987819548676593119064791683122350, time:1750766950.4911025s req_ids:[8] +DEBUG 06-24 20:09:10 [manager.py:391] +ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:200.45924186706543ms total_cost_time:200.5026340484619ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6878 prompt_cache_len:5151 prompt_cache_ratio:0.7489095667345158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 +DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10813188552856445 s +INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.11025047302246094 s +DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=273200868083007745610716192236653973761, time:1750766950.6973672s req_ids:[8] +DEBUG 06-24 20:09:10 [manager.py:391] +ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:205.77383041381836ms total_cost_time:205.81698417663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6879 prompt_cache_len:5151 prompt_cache_ratio:0.7488006977758395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 +DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10717916488647461 s +INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.10932278633117676 s +DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=17284495574199326630179094213809081405, time:1750766950.908958s req_ids:[8] +DEBUG 06-24 20:09:10 [manager.py:391] +ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:373.74067306518555ms total_cost_time:373.7826347351074ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6880 prompt_cache_len:5151 prompt_cache_ratio:0.7486918604651163 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 +DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:11 [batch.py:51] router release req id 8 +INFO 06-24 20:09:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.10826635360717773 s +INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.11054682731628418 s +DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=8620235176817787976818710038643389818, time:1750766951.2825701s req_ids:[8] +DEBUG 06-24 20:09:11 [manager.py:391] +ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:219.10667419433594ms total_cost_time:219.15245056152344ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6881 prompt_cache_len:5151 prompt_cache_ratio:0.7485830547885481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 +DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.10707664489746094 s +INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.1088857650756836 s +DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=252907209971013680774389553894057493820, time:1750766951.5209494s req_ids:[8] +DEBUG 06-24 20:09:11 [manager.py:391] +ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:211.4250659942627ms total_cost_time:211.4694118499756ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6882 prompt_cache_len:5151 prompt_cache_ratio:0.7484742807323452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 +DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.1071321964263916 s +INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.10899853706359863 s +DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=247052002028324460539124605807992495673, time:1750766951.7299132s req_ids:[8] +DEBUG 06-24 20:09:11 [manager.py:391] +ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:206.12645149230957ms total_cost_time:206.16984367370605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6883 prompt_cache_len:5151 prompt_cache_ratio:0.7483655382827256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 +DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.10673737525939941 s +INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.10851359367370605 s +DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=137057678445348523698642094917231283017, time:1750766951.940449s req_ids:[8] +DEBUG 06-24 20:09:11 [manager.py:391] +ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:200.85430145263672ms total_cost_time:200.90031623840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6884 prompt_cache_len:5151 prompt_cache_ratio:0.7482568274259151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 +DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10753655433654785 s +INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.10952997207641602 s +DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=1659404641005524033557153981725153282, time:1750766952.1494915s req_ids:[8] +DEBUG 06-24 20:09:12 [manager.py:391] +DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:206.44354820251465ms total_cost_time:206.48932456970215ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6885 prompt_cache_len:5151 prompt_cache_ratio:0.7481481481481481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 +DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10764527320861816 s +INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.10967707633972168 s +DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=216572230065717302837359203033343949958, time:1750766952.360585s req_ids:[8] +DEBUG 06-24 20:09:12 [manager.py:391] +ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:209.58614349365234ms total_cost_time:209.63096618652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6886 prompt_cache_len:5151 prompt_cache_ratio:0.7480395004356666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 +DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10689711570739746 s +INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.1086876392364502 s +DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=246623927898465359324282066273072788907, time:1750766952.574779s req_ids:[8] +DEBUG 06-24 20:09:12 [manager.py:391] +ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:169.9545383453369ms total_cost_time:169.9965000152588ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6887 prompt_cache_len:5151 prompt_cache_ratio:0.7479308842747205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 +DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10834097862243652 s +INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.1103358268737793 s +DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=243956624227053944202027504946768286228, time:1750766952.749194s req_ids:[8] +DEBUG 06-24 20:09:12 [manager.py:391] +ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:210.08849143981934ms total_cost_time:210.13259887695312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6888 prompt_cache_len:5151 prompt_cache_ratio:0.747822299651568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 +DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10877776145935059 s +INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.1108391284942627 s +DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=289805519157454348748486061440403572758, time:1750766952.9717371s req_ids:[8] +DEBUG 06-24 20:09:12 [manager.py:391] +ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:212.11719512939453ms total_cost_time:212.16082572937012ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6889 prompt_cache_len:5151 prompt_cache_ratio:0.7477137465524749 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 +DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.10876154899597168 s +INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.11076807975769043 s +DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=257192159113495849581322003778988938223, time:1750766953.18366s req_ids:[8] +DEBUG 06-24 20:09:13 [manager.py:391] +ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:205.7807445526123ms total_cost_time:205.8238983154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6890 prompt_cache_len:5151 prompt_cache_ratio:0.7476052249637155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 +DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.1078345775604248 s +INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.10988545417785645 s +DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=290147897035526126563697176154353902618, time:1750766953.3955362s req_ids:[8] +DEBUG 06-24 20:09:13 [manager.py:391] +ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:380.28979301452637ms total_cost_time:380.33318519592285ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6891 prompt_cache_len:5151 prompt_cache_ratio:0.7474967348715716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 +DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.10729479789733887 s +INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.10923552513122559 s +DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=323716766473355109703222426048066109140, time:1750766953.7767453s req_ids:[8] +DEBUG 06-24 20:09:13 [manager.py:391] +ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:177.16336250305176ms total_cost_time:177.20532417297363ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6892 prompt_cache_len:5151 prompt_cache_ratio:0.7473882762623332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 +DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.10741925239562988 s +INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.1093900203704834 s +DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=43378016928329572813687070252260745346, time:1750766953.9596915s req_ids:[8] +DEBUG 06-24 20:09:13 [manager.py:391] +ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:191.34759902954102ms total_cost_time:191.392183303833ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6893 prompt_cache_len:5151 prompt_cache_ratio:0.747279849122298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 +DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10767960548400879 s +INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.10964226722717285 s +DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=289560743015190712296058488262278038096, time:1750766954.1729689s req_ids:[8] +DEBUG 06-24 20:09:14 [manager.py:391] +ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:224.24602508544922ms total_cost_time:224.2898941040039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6894 prompt_cache_len:5151 prompt_cache_ratio:0.747171453437772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 +DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10858798027038574 s +INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.1107337474822998 s +DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=338255403435816586150679977298720005636, time:1750766954.3899124s req_ids:[8] +DEBUG 06-24 20:09:14 [manager.py:391] +ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:208.0247402191162ms total_cost_time:208.0695629119873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6895 prompt_cache_len:5151 prompt_cache_ratio:0.7470630891950689 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 +DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10733175277709961 s +INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.10930442810058594 s +DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=44909381193533248699894426576118612518, time:1750766954.6031084s req_ids:[8] +DEBUG 06-24 20:09:14 [manager.py:391] +ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:204.1170597076416ms total_cost_time:204.17404174804688ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:6896 prompt_cache_len:5151 prompt_cache_ratio:0.7469547563805105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 +DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:14 [batch.py:51] router release req id 8 +INFO 06-24 20:09:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10976910591125488 s +INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.11162853240966797 s +DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=318899340255647978147881978148277003537, time:1750766954.8116658s req_ids:[8] +DEBUG 06-24 20:09:14 [manager.py:391] +ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:200.8841037750244ms total_cost_time:200.9267807006836ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6897 prompt_cache_len:5151 prompt_cache_ratio:0.7468464549804262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 +DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10575222969055176 s +INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.10673165321350098 s +DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=205081967549116427551053729990620260228, time:1750766955.0334196s req_ids:[8] +DEBUG 06-24 20:09:15 [manager.py:391] +ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:219.7573184967041ms total_cost_time:219.8007106781006ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6898 prompt_cache_len:5151 prompt_cache_ratio:0.746738184981154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 +DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10760235786437988 s +INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.10871553421020508 s +DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=51606372709773453561888983896277976951, time:1750766955.251612s req_ids:[8] +DEBUG 06-24 20:09:15 [manager.py:391] +ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:210.8633518218994ms total_cost_time:210.91556549072266ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:6899 prompt_cache_len:5151 prompt_cache_ratio:0.746629946369039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 +DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10944318771362305 s +INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.11136126518249512 s +DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=248416926676330097048324237649015957789, time:1750766955.4652324s req_ids:[8] +DEBUG 06-24 20:09:15 [manager.py:391] +ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:208.16826820373535ms total_cost_time:208.21070671081543ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6900 prompt_cache_len:5151 prompt_cache_ratio:0.7465217391304347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 +DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10890722274780273 s +INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.11077380180358887 s +DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=190700846429975878505122255953311583677, time:1750766955.68941s req_ids:[8] +DEBUG 06-24 20:09:15 [manager.py:391] +ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:222.35941886901855ms total_cost_time:222.40543365478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6901 prompt_cache_len:5151 prompt_cache_ratio:0.7464135632517026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 +DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10781979560852051 s +INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.11097288131713867 s +DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=238411128429305238840896180064888003698, time:1750766955.9050894s req_ids:[8] +DEBUG 06-24 20:09:15 [manager.py:391] +ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:208.07218551635742ms total_cost_time:208.1167697906494ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6902 prompt_cache_len:5151 prompt_cache_ratio:0.7463054187192119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 +DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.10788130760192871 s +INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099236011505127 s +DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=138922657926341970864806457679063138580, time:1750766956.1171858s req_ids:[8] +DEBUG 06-24 20:09:16 [manager.py:391] +ERROR 06-24 20:09:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:195.09601593017578ms total_cost_time:195.13988494873047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6903 prompt_cache_len:5151 prompt_cache_ratio:0.7461973055193394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 +INFO 06-24 20:09:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.30887508392333984 s +INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.3106410503387451 s +DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=326096745887338067339182938990879355421, time:1750766956.5299559s req_ids:[8] +DEBUG 06-24 20:09:16 [manager.py:391] +ERROR 06-24 20:09:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:09:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 30259.254 tokens/s +DEBUG 06-24 20:09:16 [stats.py:37] Avg prompt tokens throughput: 30250.363 tokens/s +DEBUG 06-24 20:09:16 [stats.py:37] Avg generate tokens throughput: 8.890 tokens/s +INFO 06-24 20:09:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 first_token_cost:415.1158332824707ms total_cost_time:415.1618480682373ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6904 prompt_cache_len:5151 prompt_cache_ratio:0.7460892236384704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 +DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.10692238807678223 s +INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.10878539085388184 s +DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=78713534651843993670620097241845470399, time:1750766956.738417s req_ids:[8] +DEBUG 06-24 20:09:16 [manager.py:391] +ERROR 06-24 20:09:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 first_token_cost:206.86793327331543ms total_cost_time:206.91323280334473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6905 prompt_cache_len:5151 prompt_cache_ratio:0.7459811730629978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 +DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.10825014114379883 s +INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.11014580726623535 s +DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=207125816614046794154027677691358135447, time:1750766956.9508383s req_ids:[8] +DEBUG 06-24 20:09:16 [manager.py:391] +ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 first_token_cost:206.72345161437988ms total_cost_time:206.76898956298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6906 prompt_cache_len:5151 prompt_cache_ratio:0.7458731537793223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 +DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10745477676391602 s +INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.1093134880065918 s +DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=166948275256378102374637279380540858950, time:1750766957.1645246s req_ids:[8] +DEBUG 06-24 20:09:17 [manager.py:391] +ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:170.87578773498535ms total_cost_time:170.91870307922363ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6907 prompt_cache_len:5151 prompt_cache_ratio:0.7457651657738527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 +DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10688591003417969 s +INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.10886406898498535 s +DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=140786328696448609608918440601761507838, time:1750766957.3361902s req_ids:[8] +DEBUG 06-24 20:09:17 [manager.py:391] +ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:196.46358489990234ms total_cost_time:196.52152061462402ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:6908 prompt_cache_len:5151 prompt_cache_ratio:0.7456572090330053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 +DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10738682746887207 s +INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.10918283462524414 s +DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=140511340718790453745836007395595072154, time:1750766957.5425222s req_ids:[8] +DEBUG 06-24 20:09:17 [manager.py:391] +ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:206.28714561462402ms total_cost_time:206.33172988891602ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6909 prompt_cache_len:5151 prompt_cache_ratio:0.7455492835432045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 +DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.11002516746520996 s +INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.1120290756225586 s +DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=248743935404132737802664156037262138498, time:1750766957.7560642s req_ids:[8] +DEBUG 06-24 20:09:17 [manager.py:391] +DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:208.27746391296387ms total_cost_time:208.32276344299316ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6910 prompt_cache_len:5151 prompt_cache_ratio:0.7454413892908828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 +DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s +INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.10982656478881836 s +DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=17057963950935521477427715846137666943, time:1750766957.9690385s req_ids:[8] +DEBUG 06-24 20:09:17 [manager.py:391] +ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:169.33107376098633ms total_cost_time:169.3727970123291ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6911 prompt_cache_len:5151 prompt_cache_ratio:0.7453335262624801 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 +DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.10722470283508301 s +INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.10918045043945312 s +DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=90082410373012013453206017776557508317, time:1750766958.142211s req_ids:[8] +DEBUG 06-24 20:09:18 [manager.py:391] +ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:203.44161987304688ms total_cost_time:203.48477363586426ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6912 prompt_cache_len:5151 prompt_cache_ratio:0.7452256944444444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 +DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.10779023170471191 s +INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.10977792739868164 s +DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=291103716472239899615552377186162032266, time:1750766958.3554807s req_ids:[8] +DEBUG 06-24 20:09:18 [manager.py:391] +ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:393.6293125152588ms total_cost_time:393.6734199523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6913 prompt_cache_len:5151 prompt_cache_ratio:0.7451178938232316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 +DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.1068875789642334 s +INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.10873818397521973 s +DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=92612920590475344131013022557893615747, time:1750766958.749083s req_ids:[8] +DEBUG 06-24 20:09:18 [manager.py:391] +ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:166.01967811584473ms total_cost_time:166.0597324371338ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:6914 prompt_cache_len:5151 prompt_cache_ratio:0.7450101243853052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 +DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.1089925765991211 s +INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.11065363883972168 s +DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=175184431586980232786083489790913563920, time:1750766958.9189248s req_ids:[8] +DEBUG 06-24 20:09:18 [manager.py:391] +ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:163.5587215423584ms total_cost_time:163.61570358276367ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:6915 prompt_cache_len:5151 prompt_cache_ratio:0.7449023861171367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 +DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10628199577331543 s +INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10824418067932129 s +DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=144038533184179620941465623302262351484, time:1750766959.0870543s req_ids:[8] +DEBUG 06-24 20:09:19 [manager.py:391] +ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:201.00879669189453ms total_cost_time:201.0519504547119ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6916 prompt_cache_len:5151 prompt_cache_ratio:0.7447946790052054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 +DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.1060936450958252 s +INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10773110389709473 s +DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=61763980811306878369260783804852400501, time:1750766959.2990544s req_ids:[8] +DEBUG 06-24 20:09:19 [manager.py:391] +ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:169.10481452941895ms total_cost_time:169.14701461791992ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6917 prompt_cache_len:5151 prompt_cache_ratio:0.7446870030359982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 +DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10651516914367676 s +INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10838627815246582 s +DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=47429139345182858383808607468568021253, time:1750766959.470113s req_ids:[8] +DEBUG 06-24 20:09:19 [manager.py:391] +ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:196.86484336853027ms total_cost_time:196.91014289855957ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6918 prompt_cache_len:5151 prompt_cache_ratio:0.7445793581960104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 +DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10749602317810059 s +INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10922670364379883 s +DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=100565686933187798535807967242389494034, time:1750766959.6809075s req_ids:[8] +DEBUG 06-24 20:09:19 [manager.py:391] +ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:209.6548080444336ms total_cost_time:209.69772338867188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6919 prompt_cache_len:5151 prompt_cache_ratio:0.7444717444717445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 +DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10738682746887207 s +INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.1093289852142334 s +DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=95968143407433037559571242837024856065, time:1750766959.8890162s req_ids:[8] +DEBUG 06-24 20:09:19 [manager.py:391] +ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:204.93412017822266ms total_cost_time:204.99134063720703ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6920 prompt_cache_len:5151 prompt_cache_ratio:0.744364161849711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 +DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.11037707328796387 s +INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.11228799819946289 s +DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=60634089658338092162473874348933465268, time:1750766960.1070666s req_ids:[8] +DEBUG 06-24 20:09:20 [manager.py:391] +ERROR 06-24 20:09:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:216.01319313049316ms total_cost_time:216.05777740478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6921 prompt_cache_len:5151 prompt_cache_ratio:0.7442566103164283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 +DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.10679411888122559 s +INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.10862588882446289 s +DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=89951810445199885337027229627688685946, time:1750766960.3245802s req_ids:[8] +DEBUG 06-24 20:09:20 [manager.py:391] +ERROR 06-24 20:09:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 first_token_cost:211.25197410583496ms total_cost_time:211.30895614624023ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:6922 prompt_cache_len:5151 prompt_cache_ratio:0.7441490898584224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 +DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.10959982872009277 s +INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.11153149604797363 s +DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=157279279165038591863805754921717179559, time:1750766960.5393896s req_ids:[8] +DEBUG 06-24 20:09:20 [manager.py:391] +ERROR 06-24 20:09:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 first_token_cost:206.33244514465332ms total_cost_time:206.3765525817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6923 prompt_cache_len:5151 prompt_cache_ratio:0.7440416004622273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 +DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.10747456550598145 s +INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.10942387580871582 s +DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=174758288416002072189864863234840683521, time:1750766960.7512164s req_ids:[8] +DEBUG 06-24 20:09:20 [manager.py:391] +ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 first_token_cost:400.0568389892578ms total_cost_time:400.1152515411377ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6924 prompt_cache_len:5151 prompt_cache_ratio:0.7439341421143848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 +DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s +INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.11032986640930176 s +DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=320888267519610555281189143097010194747, time:1750766961.151767s req_ids:[8] +DEBUG 06-24 20:09:21 [manager.py:391] +ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:201.6298770904541ms total_cost_time:201.6735076904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6925 prompt_cache_len:5151 prompt_cache_ratio:0.743826714801444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 +DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.10711407661437988 s +INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.10901141166687012 s +DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=207287127815546269654234663340172237141, time:1750766961.3650198s req_ids:[8] +DEBUG 06-24 20:09:21 [manager.py:391] +ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:209.4118595123291ms total_cost_time:209.4554901123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6926 prompt_cache_len:5151 prompt_cache_ratio:0.7437193185099624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 +DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.10863828659057617 s +INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.11040234565734863 s +DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=35929562461058030591916718192874871548, time:1750766961.5888963s req_ids:[8] +DEBUG 06-24 20:09:21 [manager.py:391] +ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:177.36005783081055ms total_cost_time:177.40631103515625ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6927 prompt_cache_len:5151 prompt_cache_ratio:0.743611953226505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 +DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.11058378219604492 s +INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.11247730255126953 s +DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=135423499751192587588180300544723488528, time:1750766961.7581558s req_ids:[8] +DEBUG 06-24 20:09:21 [manager.py:391] +ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:195.2371597290039ms total_cost_time:195.29318809509277ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:6928 prompt_cache_len:5151 prompt_cache_ratio:0.7435046189376443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 +DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.11017489433288574 s +INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.1122288703918457 s +DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=17749891700881748912510547420416998015, time:1750766961.9704053s req_ids:[8] +DEBUG 06-24 20:09:21 [manager.py:391] +ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:216.73274040222168ms total_cost_time:216.79139137268066ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:6929 prompt_cache_len:5151 prompt_cache_ratio:0.743397315629961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 +DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.11032485961914062 s +INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.1122133731842041 s +DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=96170561581559741611959441122627898410, time:1750766962.186606s req_ids:[8] +DEBUG 06-24 20:09:22 [manager.py:391] +ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:209.76710319519043ms total_cost_time:209.8236083984375ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:6930 prompt_cache_len:5151 prompt_cache_ratio:0.7432900432900433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 +DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.10929703712463379 s +INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.11138916015625 s +DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=292199263672672958146266758669996439660, time:1750766962.4028592s req_ids:[8] +DEBUG 06-24 20:09:22 [manager.py:391] +ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:209.5053195953369ms total_cost_time:209.55920219421387ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:6931 prompt_cache_len:5151 prompt_cache_ratio:0.743182801904487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 +DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.10923552513122559 s +INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.11129999160766602 s +DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=286061526647172331166079189505142377498, time:1750766962.6165218s req_ids:[8] +DEBUG 06-24 20:09:22 [manager.py:391] +ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:207.411527633667ms total_cost_time:207.47089385986328ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:6932 prompt_cache_len:5151 prompt_cache_ratio:0.7430755914598961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 +DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.11026859283447266 s +INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.1121664047241211 s +DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=192833902552775011150571354853801803225, time:1750766962.8279874s req_ids:[8] +DEBUG 06-24 20:09:22 [manager.py:391] +ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:204.43248748779297ms total_cost_time:204.49161529541016ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:6933 prompt_cache_len:5151 prompt_cache_ratio:0.7429684119428819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 +DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.11007881164550781 s +INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.11205935478210449 s +DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=271720725053971868565770338556972290025, time:1750766963.0397692s req_ids:[8] +DEBUG 06-24 20:09:23 [manager.py:391] +ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:209.5029354095459ms total_cost_time:209.5623016357422ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:6934 prompt_cache_len:5151 prompt_cache_ratio:0.7428612633400634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 +DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:23 [batch.py:51] router release req id 8 +INFO 06-24 20:09:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:09:23 [statics_utils.py:24] mean first cost: 232.68165588378906 ms +INFO 06-24 20:09:23 [statics_utils.py:24] mean per token cost: 0.10072723610154566 ms +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.10782480239868164 s +INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.11026597023010254 s +DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=195648951257120370286849110014651569748, time:1750766963.2543972s req_ids:[8] +DEBUG 06-24 20:09:23 [manager.py:391] +DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:377.0885467529297ms total_cost_time:377.1328926086426ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6935 prompt_cache_len:5151 prompt_cache_ratio:0.7427541456380677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 +DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.1070699691772461 s +INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.10921168327331543 s +DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=335158384099918207981021655922702029707, time:1750766963.6349595s req_ids:[8] +DEBUG 06-24 20:09:23 [manager.py:391] +ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:202.63290405273438ms total_cost_time:202.68726348876953ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:6936 prompt_cache_len:5151 prompt_cache_ratio:0.7426470588235294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 +DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.10824394226074219 s +INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.11061930656433105 s +DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=294327538711928867078826592577168536712, time:1750766963.8495407s req_ids:[8] +DEBUG 06-24 20:09:23 [manager.py:391] +ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:170.34530639648438ms total_cost_time:170.38655281066895ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6937 prompt_cache_len:5151 prompt_cache_ratio:0.7425400028830906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 +DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.10738801956176758 s +INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.10925555229187012 s +DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=233634497428596586444807743092893542204, time:1750766964.0211234s req_ids:[8] +DEBUG 06-24 20:09:24 [manager.py:391] +ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:200.27852058410645ms total_cost_time:200.32191276550293ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6938 prompt_cache_len:5151 prompt_cache_ratio:0.7424329778034016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 +DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.10926032066345215 s +INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11113333702087402 s +DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=274999008431250849009176309288609288878, time:1750766964.230775s req_ids:[8] +DEBUG 06-24 20:09:24 [manager.py:391] +ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:207.3037624359131ms total_cost_time:207.35812187194824ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:6939 prompt_cache_len:5151 prompt_cache_ratio:0.7423259835711198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 +DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.11025500297546387 s +INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11198592185974121 s +DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=70549287475986639571941520420270440627, time:1750766964.453925s req_ids:[8] +DEBUG 06-24 20:09:24 [manager.py:391] +ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:219.37227249145508ms total_cost_time:219.42949295043945ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6940 prompt_cache_len:5151 prompt_cache_ratio:0.7422190201729106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 +DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.11045241355895996 s +INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11247825622558594 s +DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=221345667760810605225683432600496978715, time:1750766964.66762s req_ids:[8] +DEBUG 06-24 20:09:24 [manager.py:391] +ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:205.7323455810547ms total_cost_time:205.79123497009277ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:6941 prompt_cache_len:5151 prompt_cache_ratio:0.7421120875954473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 +DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.11027932167053223 s +INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11237263679504395 s +DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=110506748024976645147247124505962285937, time:1750766964.8813798s req_ids:[8] +DEBUG 06-24 20:09:24 [manager.py:391] +ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:209.05017852783203ms total_cost_time:209.10906791687012ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:6942 prompt_cache_len:5151 prompt_cache_ratio:0.7420051858254105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 +DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10861992835998535 s +INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.11039590835571289 s +DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=166740636188599530597183013295445101056, time:1750766965.0980039s req_ids:[8] +DEBUG 06-24 20:09:25 [manager.py:391] +ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:164.45207595825195ms total_cost_time:164.49308395385742ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6943 prompt_cache_len:5151 prompt_cache_ratio:0.7418983148494886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 +DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.11044907569885254 s +INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.1124732494354248 s +DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=153134180068656956769970409914680309911, time:1750766965.2644672s req_ids:[8] +DEBUG 06-24 20:09:25 [manager.py:391] +ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:202.70729064941406ms total_cost_time:202.7297019958496ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6944 prompt_cache_len:5151 prompt_cache_ratio:0.7417914746543779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 +DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10792016983032227 s +INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.10978984832763672 s +DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=323837272937981191487168154030710788429, time:1750766965.4868433s req_ids:[8] +DEBUG 06-24 20:09:25 [manager.py:391] +ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:220.73698043823242ms total_cost_time:220.78180313110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6945 prompt_cache_len:5151 prompt_cache_ratio:0.7416846652267819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 +DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10756206512451172 s +INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.1094970703125 s +DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=222476395595585701786148228484142619695, time:1750766965.7046075s req_ids:[8] +DEBUG 06-24 20:09:25 [manager.py:391] +ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:208.1773281097412ms total_cost_time:208.21881294250488ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6946 prompt_cache_len:5151 prompt_cache_ratio:0.7415778865534121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 +DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10687923431396484 s +INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.10938239097595215 s +DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=109069371626195611967487238876797404441, time:1750766965.9163272s req_ids:[8] +DEBUG 06-24 20:09:25 [manager.py:391] +ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:206.72059059143066ms total_cost_time:206.76374435424805ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6947 prompt_cache_len:5151 prompt_cache_ratio:0.7414711386209875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 +DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.3108196258544922 s +INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.3127915859222412 s +DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=147953532134185098139541498918780709382, time:1750766966.3391767s req_ids:[8] +DEBUG 06-24 20:09:26 [manager.py:391] +ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:418.80011558532715ms total_cost_time:418.84350776672363ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6948 prompt_cache_len:5151 prompt_cache_ratio:0.7413644214162349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 +DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.10737156867980957 s +INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.10929346084594727 s +DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=72148535345002952708956666187890394553, time:1750766966.5562384s req_ids:[8] +DEBUG 06-24 20:09:26 [manager.py:391] +ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:09:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 31081.671 tokens/s +DEBUG 06-24 20:09:26 [stats.py:37] Avg prompt tokens throughput: 31072.700 tokens/s +DEBUG 06-24 20:09:26 [stats.py:37] Avg generate tokens throughput: 8.971 tokens/s +INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:208.6639404296875ms total_cost_time:208.723783493042ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:6949 prompt_cache_len:5151 prompt_cache_ratio:0.7412577349258886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 +DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.10642313957214355 s +INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.1087346076965332 s +DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=254639638272510080486951607467912890903, time:1750766966.7685235s req_ids:[8] +DEBUG 06-24 20:09:26 [manager.py:391] +ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:198.2276439666748ms total_cost_time:198.2710361480713ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6950 prompt_cache_len:5151 prompt_cache_ratio:0.7411510791366906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 +DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.10822582244873047 s +INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.11042547225952148 s +DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=179204686512772318719832661932022784952, time:1750766966.9787612s req_ids:[8] +DEBUG 06-24 20:09:26 [manager.py:391] +ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:228.39069366455078ms total_cost_time:228.45101356506348ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:6951 prompt_cache_len:5151 prompt_cache_ratio:0.7410444540353905 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 +DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.1079556941986084 s +INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.1097869873046875 s +DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=140654937359732499131976907812900226960, time:1750766967.213281s req_ids:[8] +DEBUG 06-24 20:09:27 [manager.py:391] +ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:214.77055549621582ms total_cost_time:214.85400199890137ms,out_token_counter:1 mean_per_token_cost_time: 0.08344650268554688ms prompt_token_num:6952 prompt_cache_len:5151 prompt_cache_ratio:0.7409378596087457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 +DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.10795712471008301 s +INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.11027359962463379 s +DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=250633257242078789352267710236054104706, time:1750766967.4331238s req_ids:[8] +DEBUG 06-24 20:09:27 [manager.py:391] +ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:174.80087280273438ms total_cost_time:174.84545707702637ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6953 prompt_cache_len:5151 prompt_cache_ratio:0.7408312958435208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 +DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.10847735404968262 s +INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.11091375350952148 s +DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=199177758026373373726423367977729069901, time:1750766967.614291s req_ids:[8] +DEBUG 06-24 20:09:27 [manager.py:391] +ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:210.71982383728027ms total_cost_time:210.76536178588867ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6954 prompt_cache_len:5151 prompt_cache_ratio:0.7407247627264884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 +DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.10720205307006836 s +INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.1088874340057373 s +DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=8676309028398779381981767618697758672, time:1750766967.8321767s req_ids:[8] +DEBUG 06-24 20:09:27 [manager.py:391] +ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:214.22624588012695ms total_cost_time:214.27273750305176ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:6955 prompt_cache_len:5151 prompt_cache_ratio:0.7406182602444285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 +DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.1082761287689209 s +INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.11023426055908203 s +DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=111332636945963924268903145981125856586, time:1750766968.0520105s req_ids:[8] +DEBUG 06-24 20:09:28 [manager.py:391] +ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:211.38596534729004ms total_cost_time:211.44676208496094ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6956 prompt_cache_len:5151 prompt_cache_ratio:0.7405117883841288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 +DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.10715413093566895 s +INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.1090705394744873 s +DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=216271417722196592755908961979558241576, time:1750766968.2621663s req_ids:[8] +DEBUG 06-24 20:09:28 [manager.py:391] +ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:368.70741844177246ms total_cost_time:368.75247955322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6957 prompt_cache_len:5151 prompt_cache_ratio:0.7404053471323846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 +DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.10851812362670898 s +INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.11017322540283203 s +DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=337211624246682531128525082782226120046, time:1750766968.6433308s req_ids:[8] +DEBUG 06-24 20:09:28 [manager.py:391] +ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:170.34554481506348ms total_cost_time:170.38655281066895ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6958 prompt_cache_len:5151 prompt_cache_ratio:0.7402989364759989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 +DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.10801959037780762 s +INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.10977411270141602 s +DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=38476791923426436566418821364010559105, time:1750766968.8154457s req_ids:[8] +DEBUG 06-24 20:09:28 [manager.py:391] +ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:211.60292625427246ms total_cost_time:211.64870262145996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6959 prompt_cache_len:5151 prompt_cache_ratio:0.7401925564017818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 +DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10740494728088379 s +INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.10944366455078125 s +DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=198360749475603404131342523960186004621, time:1750766969.0389304s req_ids:[8] +DEBUG 06-24 20:09:29 [manager.py:391] +ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:210.36219596862793ms total_cost_time:210.41417121887207ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:6960 prompt_cache_len:5151 prompt_cache_ratio:0.7400862068965517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 +DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10890364646911621 s +INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.11096334457397461 s +DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=39025271007477647371353754296646693976, time:1750766969.250993s req_ids:[8] +DEBUG 06-24 20:09:29 [manager.py:391] +ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:205.16109466552734ms total_cost_time:205.20615577697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6961 prompt_cache_len:5151 prompt_cache_ratio:0.739979887947134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 +DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10674333572387695 s +INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.10856485366821289 s +DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=95735099957216022983105811825329965616, time:1750766969.4630466s req_ids:[8] +DEBUG 06-24 20:09:29 [manager.py:391] +ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:206.06732368469238ms total_cost_time:206.11119270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6962 prompt_cache_len:5151 prompt_cache_ratio:0.7398735995403619 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 +DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10825181007385254 s +INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.11058330535888672 s +DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=211863718066400655253570485250189148065, time:1750766969.6753507s req_ids:[8] +DEBUG 06-24 20:09:29 [manager.py:391] +ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:206.60948753356934ms total_cost_time:206.65383338928223ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6963 prompt_cache_len:5151 prompt_cache_ratio:0.7397673416630762 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 +DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10793614387512207 s +INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.11043882369995117 s +DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=12422594749623131341584589390751468642, time:1750766969.8867943s req_ids:[8] +DEBUG 06-24 20:09:29 [manager.py:391] +ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:206.15077018737793ms total_cost_time:206.193208694458ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6964 prompt_cache_len:5151 prompt_cache_ratio:0.7396611143021252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 +DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10778665542602539 s +INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.11002302169799805 s +DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=145069796834992381023761704493784268065, time:1750766970.0988657s req_ids:[8] +DEBUG 06-24 20:09:30 [manager.py:391] +ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:207.14783668518066ms total_cost_time:207.17597007751465ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:6965 prompt_cache_len:5151 prompt_cache_ratio:0.7395549174443646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 +DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10721230506896973 s +INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.1096944808959961 s +DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=103159980895018830692316190209700517025, time:1750766970.3109448s req_ids:[8] +DEBUG 06-24 20:09:30 [manager.py:391] +DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:207.4904441833496ms total_cost_time:207.5343132019043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6966 prompt_cache_len:5151 prompt_cache_ratio:0.7394487510766581 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 +DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10827398300170898 s +INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.11072301864624023 s +DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=175392749261144683863549251990985085587, time:1750766970.5217838s req_ids:[8] +DEBUG 06-24 20:09:30 [manager.py:391] +ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:205.74474334716797ms total_cost_time:205.78885078430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6967 prompt_cache_len:5151 prompt_cache_ratio:0.7393426151858763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 +DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10711097717285156 s +INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.10912895202636719 s +DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=119111949642591540425370972446168910753, time:1750766970.7333875s req_ids:[8] +DEBUG 06-24 20:09:30 [manager.py:391] +ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:375.12660026550293ms total_cost_time:375.1804828643799ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:6968 prompt_cache_len:5151 prompt_cache_ratio:0.7392365097588978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 +DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10806131362915039 s +INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.11010003089904785 s +DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=123228940517763598775949017972227999338, time:1750766971.1088696s req_ids:[8] +DEBUG 06-24 20:09:31 [manager.py:391] +ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:198.81916046142578ms total_cost_time:198.87924194335938ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6969 prompt_cache_len:5151 prompt_cache_ratio:0.7391304347826086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 +DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10688018798828125 s +INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.10885357856750488 s +DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=178153890777087341587262821169997035394, time:1750766971.3192205s req_ids:[8] +DEBUG 06-24 20:09:31 [manager.py:391] +ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:204.13875579833984ms total_cost_time:204.19931411743164ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6970 prompt_cache_len:5151 prompt_cache_ratio:0.7390243902439024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 +DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10819411277770996 s +INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.11038827896118164 s +DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=259928739140403687249978736846855060101, time:1750766971.5372643s req_ids:[8] +DEBUG 06-24 20:09:31 [manager.py:391] +ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:213.39678764343262ms total_cost_time:213.43994140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6971 prompt_cache_len:5151 prompt_cache_ratio:0.7389183761296801 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 +DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10725569725036621 s +INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.1096963882446289 s +DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=10565937004940407226703449215169879609, time:1750766971.7503073s req_ids:[8] +DEBUG 06-24 20:09:31 [manager.py:391] +ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:205.56044578552246ms total_cost_time:205.60312271118164ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6972 prompt_cache_len:5151 prompt_cache_ratio:0.7388123924268503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 +DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10792183876037598 s +INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.11002397537231445 s +DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=168194516618368278289754010730757320479, time:1750766971.9616761s req_ids:[8] +DEBUG 06-24 20:09:31 [manager.py:391] +ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:201.60889625549316ms total_cost_time:201.65181159973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6973 prompt_cache_len:5151 prompt_cache_ratio:0.738706439122329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 +DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.10809206962585449 s +INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11025071144104004 s +DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=157016632265416001099357344839958277751, time:1750766972.1703467s req_ids:[8] +DEBUG 06-24 20:09:32 [manager.py:391] +ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:205.24859428405762ms total_cost_time:205.3084373474121ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:6974 prompt_cache_len:5151 prompt_cache_ratio:0.7386005162030399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 +DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.11090588569641113 s +INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11348652839660645 s +DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=258701100296728703800063755114376830065, time:1750766972.3801236s req_ids:[8] +DEBUG 06-24 20:09:32 [manager.py:391] +ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:213.46569061279297ms total_cost_time:213.52744102478027ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:6975 prompt_cache_len:5151 prompt_cache_ratio:0.738494623655914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 +DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.10808801651000977 s +INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s +DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=304169721027801386697519607523599451519, time:1750766972.6044662s req_ids:[8] +DEBUG 06-24 20:09:32 [manager.py:391] +ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:208.53328704833984ms total_cost_time:208.57858657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6976 prompt_cache_len:5151 prompt_cache_ratio:0.7383887614678899 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 +DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.10920381546020508 s +INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11121153831481934 s +DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=286196213032217293205577654344663361900, time:1750766972.817637s req_ids:[8] +DEBUG 06-24 20:09:32 [manager.py:391] +ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:203.1397819519043ms total_cost_time:203.1846046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6977 prompt_cache_len:5151 prompt_cache_ratio:0.7382829296259137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 +DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10750555992126465 s +INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.10956645011901855 s +DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=261152835254390078385340898973950328560, time:1750766973.0260477s req_ids:[8] +DEBUG 06-24 20:09:33 [manager.py:391] +ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:205.7485580444336ms total_cost_time:205.79195022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6978 prompt_cache_len:5151 prompt_cache_ratio:0.7381771281169389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 +DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10771703720092773 s +INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.10969138145446777 s +DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=250056607442531606481754718267049377893, time:1750766973.236964s req_ids:[8] +DEBUG 06-24 20:09:33 [manager.py:391] +ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:409.4517230987549ms total_cost_time:409.4970226287842ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6979 prompt_cache_len:5151 prompt_cache_ratio:0.7380713569279267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 +DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10772275924682617 s +INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.11020708084106445 s +DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=177041933191104831327555416569420636289, time:1750766973.6506557s req_ids:[8] +DEBUG 06-24 20:09:33 [manager.py:391] +ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:196.88010215759277ms total_cost_time:196.92039489746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:6980 prompt_cache_len:5151 prompt_cache_ratio:0.7379656160458453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 +DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10817742347717285 s +INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.11012411117553711 s +DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=218745766024046803353739353880985413365, time:1750766973.8579645s req_ids:[8] +DEBUG 06-24 20:09:33 [manager.py:391] +ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:205.5497169494629ms total_cost_time:205.59310913085938ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6981 prompt_cache_len:5151 prompt_cache_ratio:0.7378599054576708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 +DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.1093759536743164 s +INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11125946044921875 s +DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=599292272217602083766337463703539381, time:1750766974.0714693s req_ids:[8] +DEBUG 06-24 20:09:34 [manager.py:391] +ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:208.65797996520996ms total_cost_time:208.71806144714355ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6982 prompt_cache_len:5151 prompt_cache_ratio:0.7377542251503867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 +DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.10860562324523926 s +INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11049032211303711 s +DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=203636070319301594395778703950407125825, time:1750766974.2837803s req_ids:[8] +DEBUG 06-24 20:09:34 [manager.py:391] +ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:202.4235725402832ms total_cost_time:202.4819850921631ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6983 prompt_cache_len:5151 prompt_cache_ratio:0.7376485751109838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 +DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.1108095645904541 s +INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11267828941345215 s +DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=192737950361043851321723575816291530039, time:1750766974.4973862s req_ids:[8] +DEBUG 06-24 20:09:34 [manager.py:391] +ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:210.54363250732422ms total_cost_time:210.6020450592041ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6984 prompt_cache_len:5151 prompt_cache_ratio:0.7375429553264605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 +DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.1110999584197998 s +INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11306285858154297 s +DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=201176628500201158513346679242497888119, time:1750766974.7085865s req_ids:[8] +DEBUG 06-24 20:09:34 [manager.py:391] +ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:206.49313926696777ms total_cost_time:206.55465126037598ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:6985 prompt_cache_len:5151 prompt_cache_ratio:0.7374373657838225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 +DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.10787487030029297 s +INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11038708686828613 s +DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=240948202692280021182180199113987502852, time:1750766974.924493s req_ids:[8] +DEBUG 06-24 20:09:34 [manager.py:391] +ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:202.4669647216797ms total_cost_time:202.5282382965088ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:6986 prompt_cache_len:5151 prompt_cache_ratio:0.737331806470083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 +DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10996460914611816 s +INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.11197376251220703 s +DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=198135432441563189570564884917085807517, time:1750766975.1314347s req_ids:[8] +DEBUG 06-24 20:09:35 [manager.py:391] +ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:199.89752769470215ms total_cost_time:199.95379447937012ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:6987 prompt_cache_len:5151 prompt_cache_ratio:0.7372262773722628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 +DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.11095595359802246 s +INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.11195731163024902 s +DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=102432406846420757366985783377264954113, time:1750766975.3409455s req_ids:[8] +DEBUG 06-24 20:09:35 [manager.py:391] +ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:195.44053077697754ms total_cost_time:195.51897048950195ms,out_token_counter:1 mean_per_token_cost_time: 0.07843971252441406ms prompt_token_num:6988 prompt_cache_len:5151 prompt_cache_ratio:0.7371207784773898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 +DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10727858543395996 s +INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.10924100875854492 s +DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=54035056628067357207579998115660089949, time:1750766975.5463512s req_ids:[8] +DEBUG 06-24 20:09:35 [manager.py:391] +ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:205.84988594055176ms total_cost_time:205.89208602905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6989 prompt_cache_len:5151 prompt_cache_ratio:0.7370153097724996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 +DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10758256912231445 s +INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.10947656631469727 s +DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=288648455980673662204845089783337947518, time:1750766975.7587504s req_ids:[8] +DEBUG 06-24 20:09:35 [manager.py:391] +ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:206.2537670135498ms total_cost_time:206.298828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6990 prompt_cache_len:5151 prompt_cache_ratio:0.7369098712446351 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 +DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10798335075378418 s +INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.11006593704223633 s +DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=248423882294222575476569912793031191179, time:1750766975.9713943s req_ids:[8] +DEBUG 06-24 20:09:35 [manager.py:391] +DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:206.98213577270508ms total_cost_time:207.02505111694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6991 prompt_cache_len:5151 prompt_cache_ratio:0.7368044628808468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 +DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.3092825412750244 s +INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.3112952709197998 s +DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=203956867761942255643498051693877618256, time:1750766976.402257s req_ids:[8] +DEBUG 06-24 20:09:36 [manager.py:391] +ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:431.8583011627197ms total_cost_time:431.903600692749ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6992 prompt_cache_len:5151 prompt_cache_ratio:0.7366990846681922 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 +DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.10582518577575684 s +INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.10760116577148438 s +DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=212014047260529389858333058757487263518, time:1750766976.6253958s req_ids:[8] +DEBUG 06-24 20:09:36 [manager.py:391] +ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:09:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 30592.531 tokens/s +DEBUG 06-24 20:09:36 [stats.py:37] Avg prompt tokens throughput: 30583.757 tokens/s +DEBUG 06-24 20:09:36 [stats.py:37] Avg generate tokens throughput: 8.774 tokens/s +INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:174.41368103027344ms total_cost_time:174.43513870239258ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6993 prompt_cache_len:5151 prompt_cache_ratio:0.7365937365937366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 +DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.10584735870361328 s +INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.10781383514404297 s +DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=317990431194270743129708758431052549586, time:1750766976.7967572s req_ids:[8] +DEBUG 06-24 20:09:36 [manager.py:391] +ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:195.44291496276855ms total_cost_time:195.48535346984863ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6994 prompt_cache_len:5151 prompt_cache_ratio:0.7364884186445525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 +DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.10747766494750977 s +INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.10955286026000977 s +DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=85245416132971713248762513453855472676, time:1750766976.9970174s req_ids:[8] +DEBUG 06-24 20:09:36 [manager.py:391] +ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:198.5175609588623ms total_cost_time:198.5604763031006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6995 prompt_cache_len:5151 prompt_cache_ratio:0.7363831308077198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 +DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10686349868774414 s +INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.10886216163635254 s +DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=310669307802095323216903216716115554466, time:1750766977.205106s req_ids:[8] +DEBUG 06-24 20:09:37 [manager.py:391] +ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:203.68123054504395ms total_cost_time:203.72414588928223ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6996 prompt_cache_len:5151 prompt_cache_ratio:0.7362778730703259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 +DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10695457458496094 s +INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.10897111892700195 s +DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=76207439430784735306067978913635058296, time:1750766977.416222s req_ids:[8] +DEBUG 06-24 20:09:37 [manager.py:391] +DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:205.85227012634277ms total_cost_time:205.89709281921387ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6997 prompt_cache_len:5151 prompt_cache_ratio:0.7361726454194655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 +DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10820794105529785 s +INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102745532989502 s +DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=170546230080354542849015320167025605022, time:1750766977.630516s req_ids:[8] +DEBUG 06-24 20:09:37 [manager.py:391] +ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:209.5344066619873ms total_cost_time:209.5792293548584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6998 prompt_cache_len:5151 prompt_cache_ratio:0.7360674478422407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 +DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10785794258117676 s +INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.10991692543029785 s +DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=275940969347150611953668944432863123474, time:1750766977.8439746s req_ids:[8] +DEBUG 06-24 20:09:37 [manager.py:391] +ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:209.5324993133545ms total_cost_time:209.57684516906738ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6999 prompt_cache_len:5151 prompt_cache_ratio:0.7359622803257608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 +DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.10918760299682617 s +INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.11139082908630371 s +DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=109969116172603545355387431760986067321, time:1750766978.069327s req_ids:[8] +DEBUG 06-24 20:09:38 [manager.py:391] +ERROR 06-24 20:09:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:233.43849182128906ms total_cost_time:233.48236083984375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7000 prompt_cache_len:5151 prompt_cache_ratio:0.7358571428571429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 +DEBUG 06-24 20:09:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s +INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.11079955101013184 s +DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=233533177281800167923189218572262983328, time:1750766978.3195708s req_ids:[8] +DEBUG 06-24 20:09:38 [manager.py:391] +ERROR 06-24 20:09:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 first_token_cost:445.5878734588623ms total_cost_time:445.6343650817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7001 prompt_cache_len:5151 prompt_cache_ratio:0.735752035423511 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 +DEBUG 06-24 20:09:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.1077260971069336 s +INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.1103205680847168 s +DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=29852215741968064702695415886492974499, time:1750766978.7431128s req_ids:[8] +DEBUG 06-24 20:09:38 [manager.py:391] +ERROR 06-24 20:09:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 first_token_cost:198.1651782989502ms total_cost_time:198.2095241546631ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7002 prompt_cache_len:5151 prompt_cache_ratio:0.7356469580119965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 +DEBUG 06-24 20:09:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.1074211597442627 s +INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.1096947193145752 s +DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=62184181445982319536237629206583589940, time:1750766978.9545083s req_ids:[8] +DEBUG 06-24 20:09:38 [manager.py:391] +ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 first_token_cost:206.89630508422852ms total_cost_time:206.9411277770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7003 prompt_cache_len:5151 prompt_cache_ratio:0.7355419106097387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 +DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s +INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.11056256294250488 s +DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=221300506093925473206125020576025282748, time:1750766979.17306s req_ids:[8] +DEBUG 06-24 20:09:39 [manager.py:391] +ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:216.92585945129395ms total_cost_time:216.96853637695312ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7004 prompt_cache_len:5151 prompt_cache_ratio:0.7354368932038835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 +DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10778546333312988 s +INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.10986018180847168 s +DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=9565274677745250305264589072776311165, time:1750766979.3985596s req_ids:[8] +DEBUG 06-24 20:09:39 [manager.py:391] +ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:220.4720973968506ms total_cost_time:220.52836418151855ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7005 prompt_cache_len:5151 prompt_cache_ratio:0.7353319057815846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 +DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:39 [batch.py:51] router release req id 8 +INFO 06-24 20:09:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s +INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.10980582237243652 s +DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=145742353410665672479627251815149880301, time:1750766979.615521s req_ids:[8] +DEBUG 06-24 20:09:39 [manager.py:391] +ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:210.34812927246094ms total_cost_time:210.39199829101562ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7006 prompt_cache_len:5151 prompt_cache_ratio:0.7352269483300029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 +DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s +INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.10942506790161133 s +DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=295453608528418028322228214570836746228, time:1750766979.8308558s req_ids:[8] +DEBUG 06-24 20:09:39 [manager.py:391] +ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:203.84669303894043ms total_cost_time:203.89175415039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7007 prompt_cache_len:5151 prompt_cache_ratio:0.7351220208363065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 +DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10746884346008301 s +INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.10948443412780762 s +DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=169824647646680485101925820862177075342, time:1750766980.041262s req_ids:[8] +DEBUG 06-24 20:09:40 [manager.py:391] +ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:203.04083824157715ms total_cost_time:203.08470726013184ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7008 prompt_cache_len:5151 prompt_cache_ratio:0.7350171232876712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 +DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10655736923217773 s +INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.1083834171295166 s +DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=273997678334228137087699773507148443513, time:1750766980.2555933s req_ids:[8] +DEBUG 06-24 20:09:40 [manager.py:391] +ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:210.90102195739746ms total_cost_time:210.94369888305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7009 prompt_cache_len:5151 prompt_cache_ratio:0.7349122556712798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 +DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10744285583496094 s +INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.1092984676361084 s +DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=10498821165853357417995097129855956008, time:1750766980.467268s req_ids:[8] +DEBUG 06-24 20:09:40 [manager.py:391] +ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:204.82563972473145ms total_cost_time:204.86831665039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7010 prompt_cache_len:5151 prompt_cache_ratio:0.7348074179743224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 +DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10811519622802734 s +INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.11019611358642578 s +DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=2875633161752592044663104080438821551, time:1750766980.6768613s req_ids:[8] +DEBUG 06-24 20:09:40 [manager.py:391] +ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:203.0632495880127ms total_cost_time:203.10688018798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7011 prompt_cache_len:5151 prompt_cache_ratio:0.7347026101839966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 +DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s +INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.10880517959594727 s +DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=227277140553399177127145677353846599201, time:1750766980.886514s req_ids:[8] +DEBUG 06-24 20:09:40 [manager.py:391] +ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:340.3136730194092ms total_cost_time:340.3587341308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7012 prompt_cache_len:5151 prompt_cache_ratio:0.7345978322875071 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 +DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10779619216918945 s +INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.10994434356689453 s +DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=20408072357736825724115787358674884017, time:1750766981.2277904s req_ids:[8] +DEBUG 06-24 20:09:41 [manager.py:391] +ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:190.0317668914795ms total_cost_time:190.08302688598633ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:7013 prompt_cache_len:5151 prompt_cache_ratio:0.7344930842720662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 +DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10681271553039551 s +INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.10930013656616211 s +DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=270841038525781732133971214150114997634, time:1750766981.4292374s req_ids:[8] +DEBUG 06-24 20:09:41 [manager.py:391] +ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:216.75634384155273ms total_cost_time:216.7990207672119ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7014 prompt_cache_len:5151 prompt_cache_ratio:0.7343883661248931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 +DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10716390609741211 s +INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.1091916561126709 s +DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=158424158616074215731579477275511466768, time:1750766981.6577358s req_ids:[8] +DEBUG 06-24 20:09:41 [manager.py:391] +ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:205.45434951782227ms total_cost_time:205.49750328063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7015 prompt_cache_len:5151 prompt_cache_ratio:0.7342836778332146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 +DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10782527923583984 s +INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.10983753204345703 s +DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=247131592517961113993600040711359329630, time:1750766981.8631s req_ids:[8] +DEBUG 06-24 20:09:41 [manager.py:391] +ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:201.61080360412598ms total_cost_time:201.66683197021484ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:7016 prompt_cache_len:5151 prompt_cache_ratio:0.7341790193842646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 +DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.11096954345703125 s +INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.1128995418548584 s +DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=216585471176658254948146543493173651465, time:1750766982.0709512s req_ids:[8] +DEBUG 06-24 20:09:42 [manager.py:391] +ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:204.76603507995605ms total_cost_time:204.82301712036133ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:7017 prompt_cache_len:5151 prompt_cache_ratio:0.7340743907652844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 +DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.10823297500610352 s +INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.11024284362792969 s +DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=44664904328901707916365973852833571992, time:1750766982.2825196s req_ids:[8] +DEBUG 06-24 20:09:42 [manager.py:391] +ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:206.12478256225586ms total_cost_time:206.18462562561035ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:7018 prompt_cache_len:5151 prompt_cache_ratio:0.7339697919635224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 +DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.10662221908569336 s +INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.10856461524963379 s +DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=86823121673107716449771199287487425682, time:1750766982.506394s req_ids:[8] +DEBUG 06-24 20:09:42 [manager.py:391] +ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:216.47000312805176ms total_cost_time:216.51101112365723ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:7019 prompt_cache_len:5151 prompt_cache_ratio:0.7338652229662345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 +DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.1081686019897461 s +INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.11025094985961914 s +DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=72938464951548285394554736563170334019, time:1750766982.715524s req_ids:[8] +DEBUG 06-24 20:09:42 [manager.py:391] +ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:202.7730941772461ms total_cost_time:202.82673835754395ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:7020 prompt_cache_len:5151 prompt_cache_ratio:0.7337606837606837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 +DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.10881757736206055 s +INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.11084365844726562 s +DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=321433588193429024606349864537648851852, time:1750766982.925698s req_ids:[8] +DEBUG 06-24 20:09:42 [manager.py:391] +ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:207.11588859558105ms total_cost_time:207.16190338134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7021 prompt_cache_len:5151 prompt_cache_ratio:0.7336561743341404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 +DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:43 [batch.py:51] router release req id 8 +INFO 06-24 20:09:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.10778141021728516 s +INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100766658782959 s +DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=129285581661108605582588076291320214339, time:1750766983.1382234s req_ids:[8] +DEBUG 06-24 20:09:43 [manager.py:391] +DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:206.5272331237793ms total_cost_time:206.58588409423828ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7022 prompt_cache_len:5151 prompt_cache_ratio:0.733551694673882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 +DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.10907912254333496 s +INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.11107110977172852 s +DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=223976299216702650389736003719716837816, time:1750766983.3560214s req_ids:[8] +DEBUG 06-24 20:09:43 [manager.py:391] +ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:379.31084632873535ms total_cost_time:379.35709953308105ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7023 prompt_cache_len:5151 prompt_cache_ratio:0.7334472447671935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 +DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.11153697967529297 s +INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.11349892616271973 s +DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=212574178696194885909043746347840892454, time:1750766983.7301126s req_ids:[8] +DEBUG 06-24 20:09:43 [manager.py:391] +ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:200.22106170654297ms total_cost_time:200.27899742126465ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:7024 prompt_cache_len:5151 prompt_cache_ratio:0.7333428246013668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 +DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.10841131210327148 s +INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.11044144630432129 s +DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=133893781925173173090075751705681009770, time:1750766983.9429348s req_ids:[8] +DEBUG 06-24 20:09:43 [manager.py:391] +ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:203.2606601715088ms total_cost_time:203.30548286437988ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7025 prompt_cache_len:5151 prompt_cache_ratio:0.733238434163701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 +DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10793471336364746 s +INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.10985827445983887 s +DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=297123514128696453537523559760719515628, time:1750766984.1515064s req_ids:[8] +DEBUG 06-24 20:09:44 [manager.py:391] +ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:203.7038803100586ms total_cost_time:203.74727249145508ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7026 prompt_cache_len:5151 prompt_cache_ratio:0.7331340734415029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 +DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.11017799377441406 s +INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.11210918426513672 s +DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=42637584963984286553057089134083885588, time:1750766984.3652472s req_ids:[8] +DEBUG 06-24 20:09:44 [manager.py:391] +ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:210.03389358520508ms total_cost_time:210.1118564605713ms,out_token_counter:1 mean_per_token_cost_time: 0.07796287536621094ms prompt_token_num:7027 prompt_cache_len:5151 prompt_cache_ratio:0.7330297424220862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 +DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10775613784790039 s +INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.10984587669372559 s +DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=38565836076627402788130896525905266864, time:1750766984.5799851s req_ids:[8] +DEBUG 06-24 20:09:44 [manager.py:391] +ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:206.90298080444336ms total_cost_time:206.96067810058594ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:7028 prompt_cache_len:5151 prompt_cache_ratio:0.7329254410927718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 +DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10861492156982422 s +INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.11050271987915039 s +DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=63290235803964911040438474994995393305, time:1750766984.7926052s req_ids:[8] +DEBUG 06-24 20:09:44 [manager.py:391] +ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:206.46405220031738ms total_cost_time:206.50768280029297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7029 prompt_cache_len:5151 prompt_cache_ratio:0.7328211694408877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 +DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10668683052062988 s +INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.10860586166381836 s +DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=249275468540962447202843775934637463001, time:1750766985.0068457s req_ids:[8] +DEBUG 06-24 20:09:45 [manager.py:391] +ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:208.5249423980713ms total_cost_time:208.58097076416016ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:7030 prompt_cache_len:5151 prompt_cache_ratio:0.7327169274537696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 +DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s +INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.10975384712219238 s +DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=59074255187604148962979577516464474336, time:1750766985.2205453s req_ids:[8] +DEBUG 06-24 20:09:45 [manager.py:391] +ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:205.6748867034912ms total_cost_time:205.718994140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7031 prompt_cache_len:5151 prompt_cache_ratio:0.7326127151187598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 +DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.1074984073638916 s +INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.10940432548522949 s +DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=115766482473309226226165846090986474542, time:1750766985.431283s req_ids:[8] +DEBUG 06-24 20:09:45 [manager.py:391] +ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:204.93555068969727ms total_cost_time:204.98037338256836ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7032 prompt_cache_len:5151 prompt_cache_ratio:0.7325085324232082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 +DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.10828900337219238 s +INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.11043286323547363 s +DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=251860991257689450838886816552349828798, time:1750766985.6457691s req_ids:[8] +DEBUG 06-24 20:09:45 [manager.py:391] +ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:210.0510597229004ms total_cost_time:210.10780334472656ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7033 prompt_cache_len:5151 prompt_cache_ratio:0.7324043793544718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 +DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.10745859146118164 s +INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.10940384864807129 s +DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=321394784721419565652845163418924300160, time:1750766985.8593209s req_ids:[8] +DEBUG 06-24 20:09:45 [manager.py:391] +ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:206.94947242736816ms total_cost_time:206.99405670166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7034 prompt_cache_len:5151 prompt_cache_ratio:0.7323002558999147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 +DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.1090095043182373 s +INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s +DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=8019153211479629106314897447107924278, time:1750766986.0705528s req_ids:[8] +DEBUG 06-24 20:09:46 [manager.py:391] +ERROR 06-24 20:09:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:205.13606071472168ms total_cost_time:205.17992973327637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7035 prompt_cache_len:5151 prompt_cache_ratio:0.7321961620469083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 +DEBUG 06-24 20:09:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:46 [batch.py:51] router release req id 8 +INFO 06-24 20:09:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.3093726634979248 s +INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.3113830089569092 s +DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=254185250865029745042799285138379544140, time:1750766986.4877672s req_ids:[8] +DEBUG 06-24 20:09:46 [manager.py:391] +ERROR 06-24 20:09:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 first_token_cost:415.8670902252197ms total_cost_time:415.912389755249ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7036 prompt_cache_len:5151 prompt_cache_ratio:0.7320920977828311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 +DEBUG 06-24 20:09:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.10840535163879395 s +INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.11080646514892578 s +DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=224861289895817570093112946243918685720, time:1750766986.7065287s req_ids:[8] +DEBUG 06-24 20:09:46 [manager.py:391] +DEBUG 06-24 20:09:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 30770.076 tokens/s +DEBUG 06-24 20:09:46 [stats.py:37] Avg prompt tokens throughput: 30761.406 tokens/s +DEBUG 06-24 20:09:46 [stats.py:37] Avg generate tokens throughput: 8.670 tokens/s +ERROR 06-24 20:09:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 first_token_cost:208.8794708251953ms total_cost_time:208.9235782623291ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7037 prompt_cache_len:5151 prompt_cache_ratio:0.7319880630950689 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 +DEBUG 06-24 20:09:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.10781168937683105 s +INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.10968327522277832 s +DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=39086507492685723244897939402818620777, time:1750766986.919084s req_ids:[8] +DEBUG 06-24 20:09:46 [manager.py:391] +ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 first_token_cost:208.25505256652832ms total_cost_time:208.298921585083ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7038 prompt_cache_len:5151 prompt_cache_ratio:0.7318840579710145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 +DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.10778284072875977 s +INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.10979104042053223 s +DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=144112067349485135343119971217517788557, time:1750766987.1344683s req_ids:[8] +DEBUG 06-24 20:09:47 [manager.py:391] +ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:213.1030559539795ms total_cost_time:213.14764022827148ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7039 prompt_cache_len:5151 prompt_cache_ratio:0.7317800823980679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 +DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.1077113151550293 s +INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.1103219985961914 s +DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=34153906626530375061910247054427422010, time:1750766987.347248s req_ids:[8] +DEBUG 06-24 20:09:47 [manager.py:391] +ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:201.32780075073242ms total_cost_time:201.3850212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7040 prompt_cache_len:5151 prompt_cache_ratio:0.7316761363636364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 +DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.10864877700805664 s +INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.11054801940917969 s +DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=193271645915833834442346805406463976612, time:1750766987.5595512s req_ids:[8] +DEBUG 06-24 20:09:47 [manager.py:391] +ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:205.78956604003906ms total_cost_time:205.83367347717285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7041 prompt_cache_len:5151 prompt_cache_ratio:0.7315722198551342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 +DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.1080617904663086 s +INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.1099705696105957 s +DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=149474528920331779775155659984858842044, time:1750766987.7699776s req_ids:[8] +DEBUG 06-24 20:09:47 [manager.py:391] +ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:206.26091957092285ms total_cost_time:206.30240440368652ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7042 prompt_cache_len:5151 prompt_cache_ratio:0.7314683328599829 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 +DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.10842061042785645 s +INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.11040520668029785 s +DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=71217097253853702939773252014690635086, time:1750766987.9828205s req_ids:[8] +DEBUG 06-24 20:09:47 [manager.py:391] +ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:202.76308059692383ms total_cost_time:202.80814170837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7043 prompt_cache_len:5151 prompt_cache_ratio:0.7313644753656112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 +DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10756468772888184 s +INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.10949110984802246 s +DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=68165960913190061882280910302921811922, time:1750766988.1957924s req_ids:[8] +DEBUG 06-24 20:09:48 [manager.py:391] +ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:214.6279811859131ms total_cost_time:214.674711227417ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7044 prompt_cache_len:5151 prompt_cache_ratio:0.7312606473594548 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 +DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10690045356750488 s +INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.10863351821899414 s +DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=188178136399052333078211054209845536012, time:1750766988.4247224s req_ids:[8] +DEBUG 06-24 20:09:48 [manager.py:391] +ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:356.88304901123047ms total_cost_time:356.92739486694336ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7045 prompt_cache_len:5151 prompt_cache_ratio:0.7311568488289567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 +DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10758829116821289 s +INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.10933208465576172 s +DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=288692115567613850583141710337677405389, time:1750766988.7676241s req_ids:[8] +DEBUG 06-24 20:09:48 [manager.py:391] +ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:158.58793258666992ms total_cost_time:158.6306095123291ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7046 prompt_cache_len:5151 prompt_cache_ratio:0.7310530797615669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 +DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10745406150817871 s +INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.1090250015258789 s +DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=331250265777993948946108102614150056771, time:1750766988.9314344s req_ids:[8] +DEBUG 06-24 20:09:48 [manager.py:391] +ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:156.31985664367676ms total_cost_time:156.36277198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7047 prompt_cache_len:5151 prompt_cache_ratio:0.7309493401447424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 +DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10769033432006836 s +INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10982561111450195 s +DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=32205043930665808063507706587165837699, time:1750766989.0947518s req_ids:[8] +DEBUG 06-24 20:09:49 [manager.py:391] +ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:197.20053672790527ms total_cost_time:197.24297523498535ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7048 prompt_cache_len:5151 prompt_cache_ratio:0.7308456299659478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 +DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10632896423339844 s +INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10814166069030762 s +DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=62252368727601393800237313363757503139, time:1750766989.2996423s req_ids:[8] +DEBUG 06-24 20:09:49 [manager.py:391] +ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:165.00401496887207ms total_cost_time:165.04645347595215ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7049 prompt_cache_len:5151 prompt_cache_ratio:0.7307419492126542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 +DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10790109634399414 s +INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10969328880310059 s +DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=285115388595355004479184503824237321996, time:1750766989.4693289s req_ids:[8] +DEBUG 06-24 20:09:49 [manager.py:391] +ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:178.96056175231934ms total_cost_time:179.0030002593994ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7050 prompt_cache_len:5151 prompt_cache_ratio:0.7306382978723405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 +DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10732197761535645 s +INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10933041572570801 s +DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=99618431375576687749249584378704478292, time:1750766989.6511416s req_ids:[8] +DEBUG 06-24 20:09:49 [manager.py:391] +ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:187.02173233032227ms total_cost_time:187.06560134887695ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7051 prompt_cache_len:5151 prompt_cache_ratio:0.7305346759324919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 +DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10711240768432617 s +INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10888886451721191 s +DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=186464892543180224506767692052298905781, time:1750766989.8468451s req_ids:[8] +DEBUG 06-24 20:09:49 [manager.py:391] +ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:162.51230239868164ms total_cost_time:162.55474090576172ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7052 prompt_cache_len:5151 prompt_cache_ratio:0.7304310833806013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 +DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10682177543640137 s +INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10859322547912598 s +DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=331081502006136302528339927990028052165, time:1750766990.012144s req_ids:[8] +DEBUG 06-24 20:09:50 [manager.py:391] +DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:158.58888626098633ms total_cost_time:158.6308479309082ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7053 prompt_cache_len:5151 prompt_cache_ratio:0.7303275202041685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 +DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.1069798469543457 s +INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10868024826049805 s +DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=241564626071293052156886647871064829037, time:1750766990.1771505s req_ids:[8] +DEBUG 06-24 20:09:50 [manager.py:391] +ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:158.4620475769043ms total_cost_time:158.50543975830078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7054 prompt_cache_len:5151 prompt_cache_ratio:0.7302239863907003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 +DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10781049728393555 s +INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10969066619873047 s +DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=250381936632162221198521456350413183183, time:1750766990.339713s req_ids:[8] +DEBUG 06-24 20:09:50 [manager.py:391] +ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:156.9061279296875ms total_cost_time:156.9499969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7055 prompt_cache_len:5151 prompt_cache_ratio:0.7301204819277108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 +DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10957837104797363 s +INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.1116032600402832 s +DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=289450376407739396891543488800770439243, time:1750766990.5032516s req_ids:[8] +DEBUG 06-24 20:09:50 [manager.py:391] +ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:373.4128475189209ms total_cost_time:373.457670211792ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7056 prompt_cache_len:5151 prompt_cache_ratio:0.7300170068027211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 +DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10774660110473633 s +INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972261428833008 s +DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=166926929915001501458874246876964424846, time:1750766990.8827965s req_ids:[8] +DEBUG 06-24 20:09:50 [manager.py:391] +ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:199.41186904907227ms total_cost_time:199.45478439331055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7057 prompt_cache_len:5151 prompt_cache_ratio:0.7299135610032592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 +DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10807490348815918 s +INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.10982346534729004 s +DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=243798304501683709424806666007937849613, time:1750766991.0885587s req_ids:[8] +DEBUG 06-24 20:09:51 [manager.py:391] +ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:198.50850105285645ms total_cost_time:198.55356216430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7058 prompt_cache_len:5151 prompt_cache_ratio:0.7298101445168603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 +DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10636711120605469 s +INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.10811972618103027 s +DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=206753105262050173169055719858943823414, time:1750766991.2953157s req_ids:[8] +DEBUG 06-24 20:09:51 [manager.py:391] +ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:167.222261428833ms total_cost_time:167.2656536102295ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7059 prompt_cache_len:5151 prompt_cache_ratio:0.7297067573310667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 +DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:51 [batch.py:51] router release req id 8 +DEBUG 06-24 20:09:51 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:51 [manager.py:283] +DEBUG 06-24 20:09:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:51 [manager.py:284] +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10506558418273926 s +INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.10691666603088379 s +DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=313898699638901334526512905155452506375, time:1750766991.4720092s req_ids:[8] +DEBUG 06-24 20:09:51 [manager.py:391] +ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:205.81865310668945ms total_cost_time:205.84511756896973ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7060 prompt_cache_len:5151 prompt_cache_ratio:0.7296033994334278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 +DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10541939735412598 s +INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.1073312759399414 s +DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=182807026759947702512988392440017852853, time:1750766991.6790426s req_ids:[8] +DEBUG 06-24 20:09:51 [manager.py:391] +ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:171.7660427093506ms total_cost_time:171.80991172790527ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7061 prompt_cache_len:5151 prompt_cache_ratio:0.7295000708114998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 +DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10789656639099121 s +INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.11008071899414062 s +DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=196182240311157225562385165972302170902, time:1750766991.8514597s req_ids:[8] +DEBUG 06-24 20:09:51 [manager.py:391] +ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:159.41119194030762ms total_cost_time:159.4550609588623ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7062 prompt_cache_len:5151 prompt_cache_ratio:0.7293967714528462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 +DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10716867446899414 s +INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10949945449829102 s +DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=302703372957099633730743925568492783910, time:1750766992.0162678s req_ids:[8] +DEBUG 06-24 20:09:52 [manager.py:391] +ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:160.28547286987305ms total_cost_time:160.33077239990234ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7063 prompt_cache_len:5151 prompt_cache_ratio:0.7292935013450376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 +DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10603976249694824 s +INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10834789276123047 s +DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=253418889061719988393636136768012854595, time:1750766992.1826224s req_ids:[8] +DEBUG 06-24 20:09:52 [manager.py:391] +ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:157.32598304748535ms total_cost_time:157.35340118408203ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7064 prompt_cache_len:5151 prompt_cache_ratio:0.7291902604756512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 +DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10661768913269043 s +INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.1088569164276123 s +DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=3443360195119048305859640309605250915, time:1750766992.3617861s req_ids:[8] +DEBUG 06-24 20:09:52 [manager.py:391] +ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:175.18091201782227ms total_cost_time:175.22454261779785ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7065 prompt_cache_len:5151 prompt_cache_ratio:0.7290870488322717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 +DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10587453842163086 s +INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10802173614501953 s +DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=177798015500150624239925639168977529816, time:1750766992.5247648s req_ids:[8] +DEBUG 06-24 20:09:52 [manager.py:391] +ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:156.72612190246582ms total_cost_time:156.7530632019043ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7066 prompt_cache_len:5151 prompt_cache_ratio:0.7289838664024908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 +DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10684490203857422 s +INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10918951034545898 s +DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=301204671251040806212397617958475393477, time:1750766992.6902378s req_ids:[8] +DEBUG 06-24 20:09:52 [manager.py:391] +ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:346.18401527404785ms total_cost_time:346.2209701538086ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:7067 prompt_cache_len:5151 prompt_cache_ratio:0.7288807131739069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 +DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10605931282043457 s +INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10823893547058105 s +DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=91275080856245484722246561375573889001, time:1750766993.0414362s req_ids:[8] +DEBUG 06-24 20:09:53 [manager.py:391] +ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:162.37735748291016ms total_cost_time:162.40310668945312ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:7068 prompt_cache_len:5151 prompt_cache_ratio:0.7287775891341256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 +DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:53 [batch.py:51] router release req id 8 +INFO 06-24 20:09:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:09:53 [statics_utils.py:24] mean first cost: 231.91998568904629 ms +INFO 06-24 20:09:53 [statics_utils.py:24] mean per token cost: 0.09801591679639993 ms +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10585689544677734 s +INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10819530487060547 s +DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=30424601777397132778491633213816572716, time:1750766993.2079294s req_ids:[8] +DEBUG 06-24 20:09:53 [manager.py:391] +ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:156.9344997406006ms total_cost_time:156.96215629577637ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:7069 prompt_cache_len:5151 prompt_cache_ratio:0.7286744942707597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 +DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10516571998596191 s +INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10756969451904297 s +DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=261790003214515784803493285501489270068, time:1750766993.374977s req_ids:[8] +DEBUG 06-24 20:09:53 [manager.py:391] +ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:162.30535507202148ms total_cost_time:162.33181953430176ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7070 prompt_cache_len:5151 prompt_cache_ratio:0.7285714285714285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 +DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10466432571411133 s +INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10694384574890137 s +DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=126479945513051239574125682163455071749, time:1750766993.5413985s req_ids:[8] +DEBUG 06-24 20:09:53 [manager.py:391] +ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:162.40787506103516ms total_cost_time:162.43481636047363ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7071 prompt_cache_len:5151 prompt_cache_ratio:0.728468392023759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 +DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10760045051574707 s +INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.1100015640258789 s +DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=64884687019624248573500707260782935831, time:1750766993.708866s req_ids:[8] +DEBUG 06-24 20:09:53 [manager.py:391] +ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:211.73810958862305ms total_cost_time:211.78436279296875ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7072 prompt_cache_len:5151 prompt_cache_ratio:0.7283653846153846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 +DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10813117027282715 s +INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.11033320426940918 s +DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=305228987426012316669365352995151230602, time:1750766993.9291317s req_ids:[8] +DEBUG 06-24 20:09:53 [manager.py:391] +ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:213.82975578308105ms total_cost_time:213.87434005737305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7073 prompt_cache_len:5151 prompt_cache_ratio:0.728262406333946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 +DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10740447044372559 s +INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.10982728004455566 s +DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=122004746788792347499561384702186841266, time:1750766994.1441474s req_ids:[8] +DEBUG 06-24 20:09:54 [manager.py:391] +ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:213.1040096282959ms total_cost_time:213.148832321167ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7074 prompt_cache_len:5151 prompt_cache_ratio:0.7281594571670907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 +DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10576248168945312 s +INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.10813736915588379 s +DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=333308016617898820066244908809605307267, time:1750766994.364741s req_ids:[8] +DEBUG 06-24 20:09:54 [manager.py:391] +ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:188.12847137451172ms total_cost_time:188.1735324859619ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7075 prompt_cache_len:5151 prompt_cache_ratio:0.7280565371024735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 +DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10874009132385254 s +INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.11118721961975098 s +DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=30434778279694482488887304214310495836, time:1750766994.5583978s req_ids:[8] +DEBUG 06-24 20:09:54 [manager.py:391] +ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:203.8884162902832ms total_cost_time:203.9315700531006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7076 prompt_cache_len:5151 prompt_cache_ratio:0.7279536461277558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 +DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10727119445800781 s +INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.10957622528076172 s +DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=242682439793286847111665614236775714354, time:1750766994.769921s req_ids:[8] +DEBUG 06-24 20:09:54 [manager.py:391] +ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:207.05914497375488ms total_cost_time:207.10325241088867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7077 prompt_cache_len:5151 prompt_cache_ratio:0.7278507842306062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 +DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10812258720397949 s +INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.11047148704528809 s +DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=132979071581710553935111917518909292964, time:1750766994.9811187s req_ids:[8] +DEBUG 06-24 20:09:54 [manager.py:391] +DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:206.13479614257812ms total_cost_time:206.18057250976562ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7078 prompt_cache_len:5151 prompt_cache_ratio:0.7277479513987002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 +DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:55 [manager.py:224] router recive req id 8 cost time 0.10833382606506348 s +INFO 06-24 20:09:55 [manager.py:68] detokenization recv req id 8 cost time 0.1106715202331543 s +DEBUG 06-24 20:09:55 [manager.py:391] Prefill Batch: batch_id=12379410039002637848782271546084553361, time:1750766995.205292s req_ids:[8] +DEBUG 06-24 20:09:55 [manager.py:391] +ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:221.91691398620605ms total_cost_time:221.96054458618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7079 prompt_cache_len:5151 prompt_cache_ratio:0.7276451476197203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 +DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:55 [manager.py:224] router recive req id 8 cost time 0.30980658531188965 s +DEBUG 06-24 20:09:55 [manager.py:391] Prefill Batch: batch_id=284150519502720374912508937930736402279, time:1750766995.6104312s req_ids:[8] +DEBUG 06-24 20:09:55 [manager.py:391] +INFO 06-24 20:09:55 [manager.py:68] detokenization recv req id 8 cost time 0.3123300075531006 s +ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:394.1361904144287ms total_cost_time:394.1817283630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7080 prompt_cache_len:5151 prompt_cache_ratio:0.7275423728813559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 +DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:55 [manager.py:224] router recive req id 8 cost time 0.10777783393859863 s +INFO 06-24 20:09:55 [manager.py:68] detokenization recv req id 8 cost time 0.10963153839111328 s +DEBUG 06-24 20:09:55 [manager.py:391] Prefill Batch: batch_id=230937608692170841542470437433688085535, time:1750766995.8246305s req_ids:[8] +DEBUG 06-24 20:09:55 [manager.py:391] +ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:210.43109893798828ms total_cost_time:210.47496795654297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7081 prompt_cache_len:5151 prompt_cache_ratio:0.7274396271713035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 +DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10862255096435547 s +INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.11070847511291504 s +DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=90349251318207458683030239345966701490, time:1750766996.0496924s req_ids:[8] +DEBUG 06-24 20:09:56 [manager.py:391] +ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:223.42419624328613ms total_cost_time:223.46735000610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7082 prompt_cache_len:5151 prompt_cache_ratio:0.7273369104772663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 +DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10870242118835449 s +INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.11126542091369629 s +DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=296523933662993490342538824803118486461, time:1750766996.2671366s req_ids:[8] +DEBUG 06-24 20:09:56 [manager.py:391] +ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:208.74381065368652ms total_cost_time:208.78863334655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7083 prompt_cache_len:5151 prompt_cache_ratio:0.7272342227869547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 +DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10796070098876953 s +INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.11027121543884277 s +DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=143133874909342421042249310486773561035, time:1750766996.4773555s req_ids:[8] +DEBUG 06-24 20:09:56 [manager.py:391] +ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:207.7808380126953ms total_cost_time:207.8239917755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7084 prompt_cache_len:5151 prompt_cache_ratio:0.7271315640880859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 +DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10755062103271484 s +INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.10998749732971191 s +DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=159127175986164890766557908686690992255, time:1750766996.691371s req_ids:[8] +DEBUG 06-24 20:09:56 [manager.py:391] +ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:09:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 33653.398 tokens/s +DEBUG 06-24 20:09:56 [stats.py:37] Avg prompt tokens throughput: 33643.770 tokens/s +DEBUG 06-24 20:09:56 [stats.py:37] Avg generate tokens throughput: 9.628 tokens/s +INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:211.92049980163574ms total_cost_time:211.96460723876953ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7085 prompt_cache_len:5151 prompt_cache_ratio:0.7270289343683839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 +DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.1064913272857666 s +INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.10892057418823242 s +DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=116660142203876957422519773013296766280, time:1750766996.9073567s req_ids:[8] +DEBUG 06-24 20:09:56 [manager.py:391] +ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:202.3153305053711ms total_cost_time:202.3599147796631ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7086 prompt_cache_len:5151 prompt_cache_ratio:0.72692633361558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 +DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10732316970825195 s +INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.10963249206542969 s +DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=175233227699193740443038301441889862525, time:1750766997.1230116s req_ids:[8] +DEBUG 06-24 20:09:57 [manager.py:391] +ERROR 06-24 20:09:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:09:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:211.8685245513916ms total_cost_time:211.91167831420898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7087 prompt_cache_len:5151 prompt_cache_ratio:0.7268237618174122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 +DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10704779624938965 s +INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.10946440696716309 s +DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=98909302763353059390833891356971687981, time:1750766997.338112s req_ids:[8] +DEBUG 06-24 20:09:57 [manager.py:391] +ERROR 06-24 20:09:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 first_token_cost:208.1737518310547ms total_cost_time:208.21785926818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7088 prompt_cache_len:5151 prompt_cache_ratio:0.7267212189616253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 +DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10718512535095215 s +INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.1096186637878418 s +DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=314700914326133862294647399136865429122, time:1750766997.5496855s req_ids:[8] +DEBUG 06-24 20:09:57 [manager.py:391] +ERROR 06-24 20:09:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 first_token_cost:406.96001052856445ms total_cost_time:406.9812297821045ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:7089 prompt_cache_len:5151 prompt_cache_ratio:0.7266187050359713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 +DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10350728034973145 s +INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.1057441234588623 s +DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=178255191269902766868388880060371012633, time:1750766997.95519s req_ids:[8] +DEBUG 06-24 20:09:57 [manager.py:391] +ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 first_token_cost:201.54666900634766ms total_cost_time:201.59053802490234ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7090 prompt_cache_len:5151 prompt_cache_ratio:0.7265162200282087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 +DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10675621032714844 s +INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.10917186737060547 s +DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=132847899587549461555309619505983659335, time:1750766998.1771355s req_ids:[8] +DEBUG 06-24 20:09:58 [manager.py:391] +ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:238.2211685180664ms total_cost_time:238.2643222808838ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7091 prompt_cache_len:5151 prompt_cache_ratio:0.7264137639261035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 +DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10737037658691406 s +INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.1097867488861084 s +DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=315418887624735474318815576230088215970, time:1750766998.408618s req_ids:[8] +DEBUG 06-24 20:09:58 [manager.py:391] +ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:201.14493370056152ms total_cost_time:201.1876106262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7092 prompt_cache_len:5151 prompt_cache_ratio:0.7263113367174281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 +DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10841560363769531 s +INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.11083102226257324 s +DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=8258850778964970381986830952183533027, time:1750766998.6193967s req_ids:[8] +DEBUG 06-24 20:09:58 [manager.py:391] +ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:206.50362968444824ms total_cost_time:206.56371116638184ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7093 prompt_cache_len:5151 prompt_cache_ratio:0.7262089383899619 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 +DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10534429550170898 s +INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.10771918296813965 s +DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=249823406403785494193284548172758073531, time:1750766998.830035s req_ids:[8] +DEBUG 06-24 20:09:58 [manager.py:391] +ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:205.2445411682129ms total_cost_time:205.28888702392578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7094 prompt_cache_len:5151 prompt_cache_ratio:0.7261065689314914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 +DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10730648040771484 s +INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.10958099365234375 s +DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=273608528422316564533548418979934368023, time:1750766999.048555s req_ids:[8] +DEBUG 06-24 20:09:59 [manager.py:391] +ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:214.07842636108398ms total_cost_time:214.12181854248047ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7095 prompt_cache_len:5151 prompt_cache_ratio:0.7260042283298097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 +DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10831522941589355 s +INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.11031603813171387 s +DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=230914194810430733131020604214937675244, time:1750766999.2714353s req_ids:[8] +DEBUG 06-24 20:09:59 [manager.py:391] +ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:216.9184684753418ms total_cost_time:216.96114540100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7096 prompt_cache_len:5151 prompt_cache_ratio:0.7259019165727171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 +DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s +INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.11054420471191406 s +DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=53720769683128440599646698110962512146, time:1750766999.4839795s req_ids:[8] +DEBUG 06-24 20:09:59 [manager.py:391] +ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:205.34467697143555ms total_cost_time:205.40189743041992ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7097 prompt_cache_len:5151 prompt_cache_ratio:0.7257996336480202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 +DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10876607894897461 s +INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.11078786849975586 s +DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=322139315507372282227487931761803289286, time:1750766999.696448s req_ids:[8] +DEBUG 06-24 20:09:59 [manager.py:391] +ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:218.41192245483398ms total_cost_time:218.45602989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7098 prompt_cache_len:5151 prompt_cache_ratio:0.7256973795435334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 +DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:09:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10675263404846191 s +INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.10858440399169922 s +DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=197836052983697661160900405722389322420, time:1750766999.914774s req_ids:[8] +DEBUG 06-24 20:09:59 [manager.py:391] +ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:193.80593299865723ms total_cost_time:193.85147094726562ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7099 prompt_cache_len:5151 prompt_cache_ratio:0.7255951542470771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 +DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.10581588745117188 s +INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.10826253890991211 s +DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=148742198960908114037739102838572146263, time:1750767000.118381s req_ids:[8] +DEBUG 06-24 20:10:00 [manager.py:391] +ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:377.7203559875488ms total_cost_time:377.7649402618408ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7100 prompt_cache_len:5151 prompt_cache_ratio:0.7254929577464789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 +DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.1080925464630127 s +INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.11050152778625488 s +DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=37550767906915893554883909801970245636, time:1750767000.4976037s req_ids:[8] +DEBUG 06-24 20:10:00 [manager.py:391] +ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 first_token_cost:196.30789756774902ms total_cost_time:196.3496208190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7101 prompt_cache_len:5151 prompt_cache_ratio:0.7253907900295733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 +DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.10767555236816406 s +INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.11003637313842773 s +DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=101428883375530902768809643651798357481, time:1750767000.703656s req_ids:[8] +DEBUG 06-24 20:10:00 [manager.py:391] +ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 first_token_cost:202.32129096984863ms total_cost_time:202.3639678955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7102 prompt_cache_len:5151 prompt_cache_ratio:0.7252886510842016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 +DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.10772466659545898 s +INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100306510925293 s +DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=18934513631104061511468611443629907904, time:1750767000.9131138s req_ids:[8] +DEBUG 06-24 20:10:00 [manager.py:391] +DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 first_token_cost:208.4505558013916ms total_cost_time:208.49204063415527ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7103 prompt_cache_len:5151 prompt_cache_ratio:0.725186540898212 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 +DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10950255393981934 s +INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.11156868934631348 s +DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=259610609344834905459112606133938913722, time:1750767001.1250837s req_ids:[8] +DEBUG 06-24 20:10:01 [manager.py:391] +ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:165.7874584197998ms total_cost_time:165.82751274108887ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:7104 prompt_cache_len:5151 prompt_cache_ratio:0.7250844594594594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 +DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10723376274108887 s +INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.10957098007202148 s +DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=66882572977318960984681088177092046486, time:1750767001.2957704s req_ids:[8] +DEBUG 06-24 20:10:01 [manager.py:391] +ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:197.05724716186523ms total_cost_time:197.10016250610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7105 prompt_cache_len:5151 prompt_cache_ratio:0.7249824067558057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 +DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10780215263366699 s +INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101067066192627 s +DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=133520236464024991491466422264837499737, time:1750767001.4988065s req_ids:[8] +DEBUG 06-24 20:10:01 [manager.py:391] +ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:202.22854614257812ms total_cost_time:202.2714614868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7106 prompt_cache_len:5151 prompt_cache_ratio:0.7248803827751196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 +DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10849189758300781 s +INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.11085939407348633 s +DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=336014274809402884963973388019554214518, time:1750767001.7067683s req_ids:[8] +DEBUG 06-24 20:10:01 [manager.py:391] +ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:203.66573333740234ms total_cost_time:203.70888710021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7107 prompt_cache_len:5151 prompt_cache_ratio:0.7247783875052765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 +DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10725140571594238 s +INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.10976409912109375 s +DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=178841979544913129152945944260351364455, time:1750767001.917101s req_ids:[8] +DEBUG 06-24 20:10:01 [manager.py:391] +ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:205.0316333770752ms total_cost_time:205.0759792327881ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7108 prompt_cache_len:5151 prompt_cache_ratio:0.7246764209341587 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 +DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.1075131893157959 s +INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.10952949523925781 s +DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=266439027130059369469566099634846372756, time:1750767002.1301055s req_ids:[8] +DEBUG 06-24 20:10:02 [manager.py:391] +ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:206.67266845703125ms total_cost_time:206.71796798706055ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7109 prompt_cache_len:5151 prompt_cache_ratio:0.7245744830496553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 +DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.10746502876281738 s +INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.10948848724365234 s +DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=330550870813570894071692884842101098320, time:1750767002.342767s req_ids:[8] +DEBUG 06-24 20:10:02 [manager.py:391] +ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:207.36098289489746ms total_cost_time:207.40842819213867ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:7110 prompt_cache_len:5151 prompt_cache_ratio:0.7244725738396625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 +DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.1098031997680664 s +INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.11171603202819824 s +DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=105342325395231877605480585374180517953, time:1750767002.5550268s req_ids:[8] +DEBUG 06-24 20:10:02 [manager.py:391] +ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:377.02322006225586ms total_cost_time:377.06851959228516ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7111 prompt_cache_len:5151 prompt_cache_ratio:0.7243706932920827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 +DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.10738873481750488 s +INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.10934972763061523 s +DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=70920666620170024826776621011409511558, time:1750767002.942237s req_ids:[8] +DEBUG 06-24 20:10:02 [manager.py:391] +ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:210.5538845062256ms total_cost_time:210.59846878051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7112 prompt_cache_len:5151 prompt_cache_ratio:0.7242688413948256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 +DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.10734081268310547 s +INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10942983627319336 s +DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=150809633389647638864255792000559816358, time:1750767003.1551838s req_ids:[8] +DEBUG 06-24 20:10:03 [manager.py:391] +ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:207.46755599975586ms total_cost_time:207.51023292541504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7113 prompt_cache_len:5151 prompt_cache_ratio:0.7241670181358076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 +DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.10794305801391602 s +INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10991120338439941 s +DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=194140702077090127405579358861391543500, time:1750767003.366889s req_ids:[8] +DEBUG 06-24 20:10:03 [manager.py:391] +ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:201.18188858032227ms total_cost_time:201.22504234313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7114 prompt_cache_len:5151 prompt_cache_ratio:0.7240652235029519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 +DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.10780477523803711 s +INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10969281196594238 s +DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=17381470317216304067070525988180645294, time:1750767003.5744162s req_ids:[8] +DEBUG 06-24 20:10:03 [manager.py:391] +ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:207.31377601623535ms total_cost_time:207.35692977905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7115 prompt_cache_len:5151 prompt_cache_ratio:0.7239634574841883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 +DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.1068735122680664 s +INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10877752304077148 s +DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=314688588496185346754580480667134413986, time:1750767003.7954772s req_ids:[8] +DEBUG 06-24 20:10:03 [manager.py:391] +ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:212.61930465698242ms total_cost_time:212.6638889312744ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7116 prompt_cache_len:5151 prompt_cache_ratio:0.7238617200674536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 +DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.1070547103881836 s +INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10911083221435547 s +DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=279707048686434325747606007748573292003, time:1750767004.0062456s req_ids:[8] +DEBUG 06-24 20:10:04 [manager.py:391] +ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:207.0310115814209ms total_cost_time:207.08703994750977ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:7117 prompt_cache_len:5151 prompt_cache_ratio:0.7237600112406913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 +DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10927510261535645 s +INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.1113123893737793 s +DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=280897291166937183142754931010607511219, time:1750767004.2183535s req_ids:[8] +DEBUG 06-24 20:10:04 [manager.py:391] +ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:224.03979301452637ms total_cost_time:224.08437728881836ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7118 prompt_cache_len:5151 prompt_cache_ratio:0.7236583309918516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 +DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10692644119262695 s +INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.10895466804504395 s +DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=150305683124795077441087728834588842407, time:1750767004.442925s req_ids:[8] +DEBUG 06-24 20:10:04 [manager.py:391] +ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:197.77822494506836ms total_cost_time:197.82066345214844ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7119 prompt_cache_len:5151 prompt_cache_ratio:0.7235566793088917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 +DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s +INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s +DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=58687478010630007145795110407163309477, time:1750767004.6539078s req_ids:[8] +DEBUG 06-24 20:10:04 [manager.py:391] +ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:205.03592491149902ms total_cost_time:205.0788402557373ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7120 prompt_cache_len:5151 prompt_cache_ratio:0.7234550561797752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 +DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10757994651794434 s +INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.10946941375732422 s +DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=110949885598389622226292911100575103825, time:1750767004.8633108s req_ids:[8] +DEBUG 06-24 20:10:04 [manager.py:391] +ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:192.09647178649902ms total_cost_time:192.1408176422119ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7121 prompt_cache_len:5151 prompt_cache_ratio:0.7233534615924729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 +DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.1072380542755127 s +INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.10909438133239746 s +DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=74167005708575374579923380212747340759, time:1750767005.0570939s req_ids:[8] +DEBUG 06-24 20:10:05 [manager.py:391] +ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:194.33093070983887ms total_cost_time:194.37599182128906ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7122 prompt_cache_len:5151 prompt_cache_ratio:0.723251895534962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 +DEBUG 06-24 20:10:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.10817623138427734 s +INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.11003684997558594 s +DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=269285268965367244088909877791527324841, time:1750767005.2695458s req_ids:[8] +DEBUG 06-24 20:10:05 [manager.py:391] +ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:215.32654762268066ms total_cost_time:215.36517143249512ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:7123 prompt_cache_len:5151 prompt_cache_ratio:0.7231503579952268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 +DEBUG 06-24 20:10:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.3100724220275879 s +INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.31199026107788086 s +DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=232982395339919514132300219642630077310, time:1750767005.6938589s req_ids:[8] +DEBUG 06-24 20:10:05 [manager.py:391] +ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:421.9181537628174ms total_cost_time:421.9627380371094ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7124 prompt_cache_len:5151 prompt_cache_ratio:0.7230488489612578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 +DEBUG 06-24 20:10:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.10701680183410645 s +INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.10880899429321289 s +DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=326622681737859601353889855588158573919, time:1750767005.8993278s req_ids:[8] +DEBUG 06-24 20:10:05 [manager.py:391] +ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:182.3709011077881ms total_cost_time:182.4171543121338ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7125 prompt_cache_len:5151 prompt_cache_ratio:0.7229473684210527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 +DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10747170448303223 s +INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10933375358581543 s +DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=129036071752079148896783508566670962042, time:1750767006.0958092s req_ids:[8] +DEBUG 06-24 20:10:06 [manager.py:391] +ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:208.4064483642578ms total_cost_time:208.451509475708ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7126 prompt_cache_len:5151 prompt_cache_ratio:0.7228459163626157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 +DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.1079704761505127 s +INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s +DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=290488082846029475565293271506841012760, time:1750767006.3095121s req_ids:[8] +DEBUG 06-24 20:10:06 [manager.py:391] +ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:210.30616760253906ms total_cost_time:210.35289764404297ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7127 prompt_cache_len:5151 prompt_cache_ratio:0.7227444927739581 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 +DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10725808143615723 s +INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10919976234436035 s +DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=293347923792413291535025684709885828334, time:1750767006.523225s req_ids:[8] +DEBUG 06-24 20:10:06 [manager.py:391] +DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:201.59220695495605ms total_cost_time:201.63679122924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7128 prompt_cache_len:5151 prompt_cache_ratio:0.7226430976430976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 +DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10776448249816895 s +INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10968828201293945 s +DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=305020993537177111931357738681263566223, time:1750767006.7439685s req_ids:[8] +DEBUG 06-24 20:10:06 [manager.py:391] +ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:10:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 31137.346 tokens/s +DEBUG 06-24 20:10:06 [stats.py:37] Avg prompt tokens throughput: 31128.587 tokens/s +DEBUG 06-24 20:10:06 [stats.py:37] Avg generate tokens throughput: 8.759 tokens/s +INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:217.0412540435791ms total_cost_time:217.0851230621338ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7129 prompt_cache_len:5151 prompt_cache_ratio:0.7225417309580586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 +DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10730504989624023 s +INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10923457145690918 s +DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=238388200100258151270442128106986777960, time:1750767006.9574132s req_ids:[8] +DEBUG 06-24 20:10:06 [manager.py:391] +ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:209.3968391418457ms total_cost_time:209.4414234161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7130 prompt_cache_len:5151 prompt_cache_ratio:0.7224403927068723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 +DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:07 [manager.py:224] router recive req id 8 cost time 0.10738587379455566 s +INFO 06-24 20:10:07 [manager.py:68] detokenization recv req id 8 cost time 0.10946536064147949 s +DEBUG 06-24 20:10:07 [manager.py:391] Prefill Batch: batch_id=23181921022295476850797447240014940962, time:1750767007.1693873s req_ids:[8] +DEBUG 06-24 20:10:07 [manager.py:391] +ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:197.71742820739746ms total_cost_time:197.75962829589844ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7131 prompt_cache_len:5151 prompt_cache_ratio:0.7223390828775768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 +DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:07 [manager.py:224] router recive req id 8 cost time 0.10732722282409668 s +INFO 06-24 20:10:07 [manager.py:68] detokenization recv req id 8 cost time 0.10924172401428223 s +DEBUG 06-24 20:10:07 [manager.py:391] Prefill Batch: batch_id=315806402130743580461888335256824080900, time:1750767007.3734066s req_ids:[8] +DEBUG 06-24 20:10:07 [manager.py:391] +ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:223.82807731628418ms total_cost_time:223.87266159057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7132 prompt_cache_len:5151 prompt_cache_ratio:0.7222378014582165 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 +DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:07 [manager.py:224] router recive req id 8 cost time 0.10881352424621582 s +INFO 06-24 20:10:07 [manager.py:68] detokenization recv req id 8 cost time 0.11068058013916016 s +DEBUG 06-24 20:10:07 [manager.py:391] Prefill Batch: batch_id=176546428790214740621802520055242272636, time:1750767007.6101258s req_ids:[8] +DEBUG 06-24 20:10:07 [manager.py:391] +ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:409.56640243530273ms total_cost_time:409.61265563964844ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7133 prompt_cache_len:5151 prompt_cache_ratio:0.7221365484368428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 +DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10712885856628418 s +INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.10949420928955078 s +DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=318306890930790770495520493631439597028, time:1750767008.0142307s req_ids:[8] +DEBUG 06-24 20:10:08 [manager.py:391] +ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:191.51616096496582ms total_cost_time:191.5607452392578ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7134 prompt_cache_len:5151 prompt_cache_ratio:0.7220353238015139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 +DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10833883285522461 s +INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s +DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=191904024574924427026689802931918122501, time:1750767008.2202315s req_ids:[8] +DEBUG 06-24 20:10:08 [manager.py:391] +ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:209.55371856689453ms total_cost_time:209.60164070129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:7135 prompt_cache_len:5151 prompt_cache_ratio:0.7219341275402943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 +DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10799002647399902 s +INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.1098778247833252 s +DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=257084366272053646165539103018325668475, time:1750767008.4312084s req_ids:[8] +DEBUG 06-24 20:10:08 [manager.py:391] +ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:208.12034606933594ms total_cost_time:208.16421508789062ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7136 prompt_cache_len:5151 prompt_cache_ratio:0.7218329596412556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 +DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10794901847839355 s +INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.10990619659423828 s +DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=73995569084425848046208426251939111890, time:1750767008.644523s req_ids:[8] +DEBUG 06-24 20:10:08 [manager.py:391] +ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:223.36864471435547ms total_cost_time:223.41346740722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7137 prompt_cache_len:5151 prompt_cache_ratio:0.7217318200924758 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 +DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10720109939575195 s +INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.1092977523803711 s +DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=276137271877318547874868435815613362142, time:1750767008.8668494s req_ids:[8] +DEBUG 06-24 20:10:08 [manager.py:391] +ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:191.58148765563965ms total_cost_time:191.62583351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7138 prompt_cache_len:5151 prompt_cache_ratio:0.7216307088820398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 +DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10808753967285156 s +INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.11002016067504883 s +DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=117382491611288558201896011651250459515, time:1750767009.0679352s req_ids:[8] +DEBUG 06-24 20:10:09 [manager.py:391] +ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:205.92713356018066ms total_cost_time:205.98149299621582ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:7139 prompt_cache_len:5151 prompt_cache_ratio:0.7215296259980389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 +DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10872864723205566 s +INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.11072444915771484 s +DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=153017171538353874900998120190634285094, time:1750767009.2767518s req_ids:[8] +DEBUG 06-24 20:10:09 [manager.py:391] +ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:195.9857940673828ms total_cost_time:196.03252410888672ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7140 prompt_cache_len:5151 prompt_cache_ratio:0.7214285714285714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 +DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10745024681091309 s +INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.10979080200195312 s +DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=42047959268200999976377909954876377860, time:1750767009.4818919s req_ids:[8] +DEBUG 06-24 20:10:09 [manager.py:391] +ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:200.54006576538086ms total_cost_time:200.58536529541016ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7141 prompt_cache_len:5151 prompt_cache_ratio:0.721327545161742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 +DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.1068730354309082 s +INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.10879397392272949 s +DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=330096932053594308892812119960859029107, time:1750767009.688927s req_ids:[8] +DEBUG 06-24 20:10:09 [manager.py:391] +ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:202.5928497314453ms total_cost_time:202.6371955871582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7142 prompt_cache_len:5151 prompt_cache_ratio:0.7212265471856623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 +DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s +INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s +DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=305608827174941284113587090752990707681, time:1750767009.9055269s req_ids:[8] +DEBUG 06-24 20:10:09 [manager.py:391] +ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:214.7996425628662ms total_cost_time:214.84613418579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7143 prompt_cache_len:5151 prompt_cache_ratio:0.7211255774884502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 +DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10856389999389648 s +INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.11052584648132324 s +DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=24602101484355933588773515392067287557, time:1750767010.1177237s req_ids:[8] +DEBUG 06-24 20:10:10 [manager.py:391] +ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:366.4212226867676ms total_cost_time:366.46509170532227ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7144 prompt_cache_len:5151 prompt_cache_ratio:0.7210246360582306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 +DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10800027847290039 s +INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.10982823371887207 s +DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=21201348492042893980037036695048958244, time:1750767010.4847126s req_ids:[8] +DEBUG 06-24 20:10:10 [manager.py:391] +ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 first_token_cost:199.1877555847168ms total_cost_time:199.2504596710205ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:7145 prompt_cache_len:5151 prompt_cache_ratio:0.7209237228831351 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 +DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10828638076782227 s +INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.11027240753173828 s +DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=160426337095385273733612065057675098173, time:1750767010.696382s req_ids:[8] +DEBUG 06-24 20:10:10 [manager.py:391] +ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 first_token_cost:210.1461887359619ms total_cost_time:210.1917266845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7146 prompt_cache_len:5151 prompt_cache_ratio:0.7208228379513014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 +DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10698652267456055 s +INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.10810542106628418 s +DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=116900175550160533350180533268538788657, time:1750767010.9129367s req_ids:[8] +DEBUG 06-24 20:10:10 [manager.py:391] +ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 first_token_cost:209.29360389709473ms total_cost_time:209.35416221618652ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:7147 prompt_cache_len:5151 prompt_cache_ratio:0.7207219812508745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 +DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.10809493064880371 s +INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.1101679801940918 s +DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=248482763243138562391745014721123308021, time:1750767011.1320522s req_ids:[8] +DEBUG 06-24 20:10:11 [manager.py:391] +ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:211.66706085205078ms total_cost_time:211.70663833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:7148 prompt_cache_len:5151 prompt_cache_ratio:0.7206211527700056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 +DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.1065671443939209 s +INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.10860967636108398 s +DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=190870487122387489309474453686741319734, time:1750767011.3455086s req_ids:[8] +DEBUG 06-24 20:10:11 [manager.py:391] +ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:208.98842811584473ms total_cost_time:209.03611183166504ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7149 prompt_cache_len:5151 prompt_cache_ratio:0.7205203524968528 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 +DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.10767006874084473 s +INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.10955929756164551 s +DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=225427075520997457843880120865797903053, time:1750767011.5601594s req_ids:[8] +DEBUG 06-24 20:10:11 [manager.py:391] +ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:206.09402656555176ms total_cost_time:206.13884925842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7150 prompt_cache_len:5151 prompt_cache_ratio:0.7204195804195804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 +DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.1083977222442627 s +INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.11046719551086426 s +DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=243342279008509018952944565838802223707, time:1750767011.7762108s req_ids:[8] +DEBUG 06-24 20:10:11 [manager.py:391] +ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:215.00778198242188ms total_cost_time:215.04998207092285ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7151 prompt_cache_len:5151 prompt_cache_ratio:0.72031883652636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 +DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.10793185234069824 s +INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.10987544059753418 s +DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=96835342792509319395585885408570039088, time:1750767011.9984121s req_ids:[8] +DEBUG 06-24 20:10:11 [manager.py:391] +ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:213.20199966430664ms total_cost_time:213.2434844970703ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7152 prompt_cache_len:5151 prompt_cache_ratio:0.7202181208053692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 +DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10709333419799805 s +INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.10905122756958008 s +DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=224574353546801253410498667577537715500, time:1750767012.2069333s req_ids:[8] +DEBUG 06-24 20:10:12 [manager.py:391] +DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:205.89542388916016ms total_cost_time:205.93857765197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7153 prompt_cache_len:5151 prompt_cache_ratio:0.7201174332447924 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 +DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10823273658752441 s +INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.11028242111206055 s +DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=292732686581944479537784308821433568301, time:1750767012.4196084s req_ids:[8] +DEBUG 06-24 20:10:12 [manager.py:391] +ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:208.48369598388672ms total_cost_time:208.54616165161133ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:7154 prompt_cache_len:5151 prompt_cache_ratio:0.7200167738328208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 +DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10779929161071777 s +INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.10975408554077148 s +DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=28317246882918998221432272335114133467, time:1750767012.643038s req_ids:[8] +DEBUG 06-24 20:10:12 [manager.py:391] +ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:362.424373626709ms total_cost_time:362.4711036682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7155 prompt_cache_len:5151 prompt_cache_ratio:0.719916142557652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 +DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10862851142883301 s +INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.11054372787475586 s +DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=209049702126121689527432925685733818285, time:1750767012.9997888s req_ids:[8] +DEBUG 06-24 20:10:12 [manager.py:391] +ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:196.6550350189209ms total_cost_time:196.6991424560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7156 prompt_cache_len:5151 prompt_cache_ratio:0.7198155394074902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 +DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.10712027549743652 s +INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.10897350311279297 s +DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=181401506268972188852523975402527821510, time:1750767013.2124963s req_ids:[8] +DEBUG 06-24 20:10:13 [manager.py:391] +ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:212.84914016723633ms total_cost_time:212.89372444152832ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7157 prompt_cache_len:5151 prompt_cache_ratio:0.7197149643705463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 +DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.1082768440246582 s +INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.11030888557434082 s +DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=175813898411919452132436765316407837202, time:1750767013.4260056s req_ids:[8] +DEBUG 06-24 20:10:13 [manager.py:391] +ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:209.7475528717041ms total_cost_time:209.7926139831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7158 prompt_cache_len:5151 prompt_cache_ratio:0.7196144174350377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 +DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s +INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.1104886531829834 s +DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=211385711464919028396476986522095651354, time:1750767013.6496053s req_ids:[8] +DEBUG 06-24 20:10:13 [manager.py:391] +ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:217.09370613098145ms total_cost_time:217.13805198669434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7159 prompt_cache_len:5151 prompt_cache_ratio:0.7195138985891885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 +DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.10737347602844238 s +INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.10931396484375 s +DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=221518014329207604782594996932027391297, time:1750767013.8646321s req_ids:[8] +DEBUG 06-24 20:10:13 [manager.py:391] +ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:206.88343048095703ms total_cost_time:206.92873001098633ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7160 prompt_cache_len:5151 prompt_cache_ratio:0.7194134078212291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 +DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.1075444221496582 s +INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10938000679016113 s +DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=280595608036306510486759159359317774786, time:1750767014.078916s req_ids:[8] +DEBUG 06-24 20:10:14 [manager.py:391] +ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:207.7345848083496ms total_cost_time:207.7775001525879ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7161 prompt_cache_len:5151 prompt_cache_ratio:0.7193129451193967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 +DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.10759282112121582 s +INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10939288139343262 s +DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=67938495537675149659507722131604408284, time:1750767014.2943754s req_ids:[8] +DEBUG 06-24 20:10:14 [manager.py:391] +ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:210.54983139038086ms total_cost_time:210.59632301330566ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7162 prompt_cache_len:5151 prompt_cache_ratio:0.7192125104719352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 +DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.1074514389038086 s +INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10936117172241211 s +DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=306563091185353362046616685526690681352, time:1750767014.508425s req_ids:[8] +DEBUG 06-24 20:10:14 [manager.py:391] +ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:208.1432342529297ms total_cost_time:208.1892490386963ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7163 prompt_cache_len:5151 prompt_cache_ratio:0.7191121038670948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 +DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.10753560066223145 s +INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10946917533874512 s +DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=43022290005250700893678364406478221217, time:1750767014.7211523s req_ids:[8] +DEBUG 06-24 20:10:14 [manager.py:391] +ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:207.8566551208496ms total_cost_time:207.9014778137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7164 prompt_cache_len:5151 prompt_cache_ratio:0.7190117252931323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 +DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.10719156265258789 s +INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10924053192138672 s +DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=277291343155307770143807634968099963263, time:1750767014.9347205s req_ids:[8] +DEBUG 06-24 20:10:14 [manager.py:391] +ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:213.40203285217285ms total_cost_time:213.42825889587402ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7165 prompt_cache_len:5151 prompt_cache_ratio:0.7189113747383112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 +DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:15 [manager.py:224] router recive req id 8 cost time 0.10596847534179688 s +INFO 06-24 20:10:15 [manager.py:68] detokenization recv req id 8 cost time 0.10802054405212402 s +DEBUG 06-24 20:10:15 [manager.py:391] Prefill Batch: batch_id=289661613349018392918383164196431019396, time:1750767015.1655276s req_ids:[8] +DEBUG 06-24 20:10:15 [manager.py:391] +ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:224.54261779785156ms total_cost_time:224.58744049072266ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7166 prompt_cache_len:5151 prompt_cache_ratio:0.7188110521909015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 +DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:15 [manager.py:224] router recive req id 8 cost time 0.10807943344116211 s +INFO 06-24 20:10:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022043228149414 s +DEBUG 06-24 20:10:15 [manager.py:391] Prefill Batch: batch_id=271730576213155666345063098772542448954, time:1750767015.3820226s req_ids:[8] +DEBUG 06-24 20:10:15 [manager.py:391] +ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:208.78148078918457ms total_cost_time:208.82582664489746ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7167 prompt_cache_len:5151 prompt_cache_ratio:0.7187107576391796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 +DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:15 [manager.py:224] router recive req id 8 cost time 0.30916380882263184 s +INFO 06-24 20:10:15 [manager.py:68] detokenization recv req id 8 cost time 0.3112506866455078 s +DEBUG 06-24 20:10:15 [manager.py:391] Prefill Batch: batch_id=247636245169448681274982394630374559129, time:1750767015.8055675s req_ids:[8] +DEBUG 06-24 20:10:15 [manager.py:391] +ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:417.7429676055908ms total_cost_time:417.7863597869873ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7168 prompt_cache_len:5151 prompt_cache_ratio:0.7186104910714286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 +DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s +INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.11060523986816406 s +DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=80719615642951113721846341072722340409, time:1750767016.0286608s req_ids:[8] +DEBUG 06-24 20:10:16 [manager.py:391] +ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:214.16473388671875ms total_cost_time:214.21098709106445ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7169 prompt_cache_len:5151 prompt_cache_ratio:0.718510252475938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 +DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10839486122131348 s +INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.11031055450439453 s +DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=180987930712356888811838622202771241091, time:1750767016.2424474s req_ids:[8] +DEBUG 06-24 20:10:16 [manager.py:391] +ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:206.91609382629395ms total_cost_time:206.95972442626953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7170 prompt_cache_len:5151 prompt_cache_ratio:0.7184100418410042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 +DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:16 [batch.py:51] router release req id 8 +INFO 06-24 20:10:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10810708999633789 s +INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.11025428771972656 s +DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=214080404080471687872836387010066170044, time:1750767016.4537523s req_ids:[8] +DEBUG 06-24 20:10:16 [manager.py:391] +ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:204.1475772857666ms total_cost_time:204.1914463043213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7171 prompt_cache_len:5151 prompt_cache_ratio:0.7183098591549296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 +DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10870623588562012 s +INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.1107931137084961 s +DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=332392529189229823549960462193953147603, time:1750767016.6638665s req_ids:[8] +DEBUG 06-24 20:10:16 [manager.py:391] +ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:207.13400840759277ms total_cost_time:207.17954635620117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7172 prompt_cache_len:5151 prompt_cache_ratio:0.7182097044060234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 +DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.1072385311126709 s +INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.10926318168640137 s +DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=64018560222206016299779069699227649297, time:1750767016.8747811s req_ids:[8] +DEBUG 06-24 20:10:16 [manager.py:391] +DEBUG 06-24 20:10:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 31327.678 tokens/s +DEBUG 06-24 20:10:16 [stats.py:37] Avg prompt tokens throughput: 31319.019 tokens/s +DEBUG 06-24 20:10:16 [stats.py:37] Avg generate tokens throughput: 8.659 tokens/s +ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:206.85267448425293ms total_cost_time:206.89725875854492ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7173 prompt_cache_len:5151 prompt_cache_ratio:0.7181095775826014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 +DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10745930671691895 s +INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.1094667911529541 s +DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=288548564847830269233578903021808522298, time:1750767017.088555s req_ids:[8] +DEBUG 06-24 20:10:17 [manager.py:391] +ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:204.81491088867188ms total_cost_time:204.85806465148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7174 prompt_cache_len:5151 prompt_cache_ratio:0.7180094786729858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 +DEBUG 06-24 20:10:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10778141021728516 s +INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.10980701446533203 s +DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=334186496730858969894756895045469871523, time:1750767017.2976313s req_ids:[8] +DEBUG 06-24 20:10:17 [manager.py:391] +ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:199.68175888061523ms total_cost_time:199.7241973876953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7175 prompt_cache_len:5151 prompt_cache_ratio:0.7179094076655053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 +DEBUG 06-24 20:10:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10738372802734375 s +INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.10931992530822754 s +DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=18306681105736782257776387587061967468, time:1750767017.5098698s req_ids:[8] +DEBUG 06-24 20:10:17 [manager.py:391] +ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:209.5024585723877ms total_cost_time:209.5472812652588ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7176 prompt_cache_len:5151 prompt_cache_ratio:0.717809364548495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 +DEBUG 06-24 20:10:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10798025131225586 s +INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.11020469665527344 s +DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=120362025528813721755658995554779320411, time:1750767017.7219095s req_ids:[8] +DEBUG 06-24 20:10:17 [manager.py:391] +ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:384.80663299560547ms total_cost_time:384.85193252563477ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7177 prompt_cache_len:5151 prompt_cache_ratio:0.7177093493102967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 +DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10746073722839355 s +INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10961508750915527 s +DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=305892352742916224250096128670539733067, time:1750767018.1035385s req_ids:[8] +DEBUG 06-24 20:10:18 [manager.py:391] +ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:201.826810836792ms total_cost_time:201.86924934387207ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7178 prompt_cache_len:5151 prompt_cache_ratio:0.7176093619392588 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 +DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10703825950622559 s +INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10912251472473145 s +DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=120065790336193888102739497046430547834, time:1750767018.315625s req_ids:[8] +DEBUG 06-24 20:10:18 [manager.py:391] +ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:209.22350883483887ms total_cost_time:209.26713943481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7179 prompt_cache_len:5151 prompt_cache_ratio:0.7175094024237358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 +DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.1076345443725586 s +INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10971808433532715 s +INFO 06-24 20:10:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=339490786432690786413431494579132048795, time:1750767018.5306323s req_ids:[8] +DEBUG 06-24 20:10:18 [manager.py:391] +ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:203.32026481628418ms total_cost_time:203.37986946105957ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7180 prompt_cache_len:5151 prompt_cache_ratio:0.7174094707520892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 +DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10857868194580078 s +INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.11054754257202148 s +DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=232498509910486336261472994260498195806, time:1750767018.7451828s req_ids:[8] +DEBUG 06-24 20:10:18 [manager.py:391] +ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:208.2045078277588ms total_cost_time:208.266019821167ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:7181 prompt_cache_len:5151 prompt_cache_ratio:0.7173095669126862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 +DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10731768608093262 s +INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10918283462524414 s +DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=256977974851781653653669263487424330118, time:1750767018.9734213s req_ids:[8] +DEBUG 06-24 20:10:18 [manager.py:391] +ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:222.95141220092773ms total_cost_time:223.01340103149414ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:7182 prompt_cache_len:5151 prompt_cache_ratio:0.7172096908939014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 +DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.10789346694946289 s +INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.10995078086853027 s +DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=332521767171950538506777204357045243607, time:1750767019.1863844s req_ids:[8] +DEBUG 06-24 20:10:19 [manager.py:391] +ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:197.11709022521973ms total_cost_time:197.16858863830566ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:7183 prompt_cache_len:5151 prompt_cache_ratio:0.7171098426841153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 +DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.10996317863464355 s +INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.11208271980285645 s +DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=297669703632240953516048109199720389073, time:1750767019.3891451s req_ids:[8] +DEBUG 06-24 20:10:19 [manager.py:391] +DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:200.2410888671875ms total_cost_time:200.3006935119629ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7184 prompt_cache_len:5151 prompt_cache_ratio:0.717010022271715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 +DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.11086797714233398 s +INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.1128854751586914 s +DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=14371472575789124717291915108220881311, time:1750767019.5936017s req_ids:[8] +DEBUG 06-24 20:10:19 [manager.py:391] +ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:200.9263038635254ms total_cost_time:200.98090171813965ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:7185 prompt_cache_len:5151 prompt_cache_ratio:0.716910229645094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 +DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.10885262489318848 s +INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.11088871955871582 s +DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=303526261337772790197775478694906210796, time:1750767019.8088892s req_ids:[8] +DEBUG 06-24 20:10:19 [manager.py:391] +ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:212.41164207458496ms total_cost_time:212.45932579040527ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7186 prompt_cache_len:5151 prompt_cache_ratio:0.7168104647926524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 +DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.10983777046203613 s +INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.11191916465759277 s +DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=56333294574531672692607443881386467465, time:1750767020.0238993s req_ids:[8] +DEBUG 06-24 20:10:20 [manager.py:391] +ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:205.84583282470703ms total_cost_time:205.90591430664062ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7187 prompt_cache_len:5151 prompt_cache_ratio:0.7167107277027968 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 +DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.10804891586303711 s +INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.1105196475982666 s +DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=275587993470186061924883128807917108381, time:1750767020.24008s req_ids:[8] +DEBUG 06-24 20:10:20 [manager.py:391] +ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:386.81697845458984ms total_cost_time:386.859655380249ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7188 prompt_cache_len:5151 prompt_cache_ratio:0.7166110183639399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 +DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.10769462585449219 s +INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.10980987548828125 s +DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=14457955551232871508301530811737527859, time:1750767020.6219192s req_ids:[8] +DEBUG 06-24 20:10:20 [manager.py:391] +ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:200.3650665283203ms total_cost_time:200.41871070861816ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:7189 prompt_cache_len:5151 prompt_cache_ratio:0.7165113367645013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 +DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.11113858222961426 s +INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.11323070526123047 s +DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=3215234047010236800195495082582627068, time:1750767020.8365908s req_ids:[8] +DEBUG 06-24 20:10:20 [manager.py:391] +ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:210.16693115234375ms total_cost_time:210.22391319274902ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:7190 prompt_cache_len:5151 prompt_cache_ratio:0.7164116828929068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 +DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10957598686218262 s +INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.11176133155822754 s +DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=128746066182812263521487385908887434637, time:1750767021.049714s req_ids:[8] +DEBUG 06-24 20:10:21 [manager.py:391] +ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:204.41865921020508ms total_cost_time:204.46181297302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7191 prompt_cache_len:5151 prompt_cache_ratio:0.7163120567375887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 +DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10727429389953613 s +INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.10939335823059082 s +DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=326285463919855974268593810677104993759, time:1750767021.261176s req_ids:[8] +DEBUG 06-24 20:10:21 [manager.py:391] +ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:203.6299705505371ms total_cost_time:203.6759853363037ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7192 prompt_cache_len:5151 prompt_cache_ratio:0.7162124582869855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 +DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10591602325439453 s +INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.10784745216369629 s +DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=43057916862155693040484303281464402648, time:1750767021.4726374s req_ids:[8] +DEBUG 06-24 20:10:21 [manager.py:391] +ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:208.34732055664062ms total_cost_time:208.39214324951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7193 prompt_cache_len:5151 prompt_cache_ratio:0.7161128875295426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 +DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.1085667610168457 s +INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.11104249954223633 s +DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=29042436613385024766462786427722377952, time:1750767021.6895385s req_ids:[8] +DEBUG 06-24 20:10:21 [manager.py:391] +ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:212.1596336364746ms total_cost_time:212.2032642364502ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7194 prompt_cache_len:5151 prompt_cache_ratio:0.7160133444537115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 +DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10733461380004883 s +INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.10953998565673828 s +DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=312441112655158214139687751498353124083, time:1750767021.904089s req_ids:[8] +DEBUG 06-24 20:10:21 [manager.py:391] +ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:208.3151340484619ms total_cost_time:208.37879180908203ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:7195 prompt_cache_len:5151 prompt_cache_ratio:0.71591382904795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 +DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10904717445373535 s +INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.11102581024169922 s +DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=638280569979463467694848177759158162, time:1750767022.1253746s req_ids:[8] +DEBUG 06-24 20:10:22 [manager.py:391] +ERROR 06-24 20:10:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:211.6076946258545ms total_cost_time:211.65013313293457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7196 prompt_cache_len:5151 prompt_cache_ratio:0.7158143413007226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 +DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10704517364501953 s +INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.1090700626373291 s +DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=168891913480738567038559936616958775430, time:1750767022.3402126s req_ids:[8] +DEBUG 06-24 20:10:22 [manager.py:391] +ERROR 06-24 20:10:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 first_token_cost:212.97025680541992ms total_cost_time:213.0138874053955ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7197 prompt_cache_len:5151 prompt_cache_ratio:0.7157148812005002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 +DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10906386375427246 s +INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.11098265647888184 s +DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=267764532393415287241209309393756621666, time:1750767022.5575345s req_ids:[8] +DEBUG 06-24 20:10:22 [manager.py:391] +ERROR 06-24 20:10:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 first_token_cost:210.10589599609375ms total_cost_time:210.14928817749023ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7198 prompt_cache_len:5151 prompt_cache_ratio:0.7156154487357599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 +DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10879158973693848 s +INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s +DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=324710147842344023793192062781897973722, time:1750767022.7708187s req_ids:[8] +DEBUG 06-24 20:10:22 [manager.py:391] +ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 first_token_cost:375.8392333984375ms total_cost_time:375.8819103240967ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7199 prompt_cache_len:5151 prompt_cache_ratio:0.7155160438949855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 +DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10745596885681152 s +INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.1094050407409668 s +DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=302984740435381875287107172358051071348, time:1750767023.148048s req_ids:[8] +DEBUG 06-24 20:10:23 [manager.py:391] +INFO 06-24 20:10:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:10:23 [statics_utils.py:24] mean first cost: 231.49240948393353 ms +INFO 06-24 20:10:23 [statics_utils.py:24] mean per token cost: 0.09554309566870256 ms +ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:199.30553436279297ms total_cost_time:199.34821128845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7200 prompt_cache_len:5151 prompt_cache_ratio:0.7154166666666667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 +DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10734796524047852 s +INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.10925817489624023 s +DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=221237606668244531027029436085037952968, time:1750767023.3595424s req_ids:[8] +DEBUG 06-24 20:10:23 [manager.py:391] +ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:205.47747611999512ms total_cost_time:205.5225372314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7201 prompt_cache_len:5151 prompt_cache_ratio:0.7153173170393001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 +DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10808229446411133 s +INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.11000752449035645 s +DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=308580203769821789153304500767938642136, time:1750767023.5758736s req_ids:[8] +DEBUG 06-24 20:10:23 [manager.py:391] +ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:213.00649642944336ms total_cost_time:213.05036544799805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7202 prompt_cache_len:5151 prompt_cache_ratio:0.7152179950013885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 +DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10677742958068848 s +INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.1087338924407959 s +DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=180349067545523260537495373194874626496, time:1750767023.78955s req_ids:[8] +DEBUG 06-24 20:10:23 [manager.py:391] +ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:205.09982109069824ms total_cost_time:205.14249801635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7203 prompt_cache_len:5151 prompt_cache_ratio:0.7151187005414411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 +DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10800766944885254 s +INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.11002349853515625 s +DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=161657790310231586776724987055712014107, time:1750767023.9990604s req_ids:[8] +DEBUG 06-24 20:10:23 [manager.py:391] +ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:201.89762115478516ms total_cost_time:201.94196701049805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7204 prompt_cache_len:5151 prompt_cache_ratio:0.7150194336479734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 +DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.1097569465637207 s +INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.11174345016479492 s +DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=162933438657109837295909713186048139132, time:1750767024.212169s req_ids:[8] +DEBUG 06-24 20:10:24 [manager.py:391] +ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:212.28289604187012ms total_cost_time:212.33057975769043ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7205 prompt_cache_len:5151 prompt_cache_ratio:0.7149201943095073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 +DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.10838007926940918 s +INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.11041665077209473 s +DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=118792546758758780161445794108205256710, time:1750767024.4280128s req_ids:[8] +DEBUG 06-24 20:10:24 [manager.py:391] +ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:207.60178565979004ms total_cost_time:207.62872695922852ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7206 prompt_cache_len:5151 prompt_cache_ratio:0.7148209825145712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 +DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.10358405113220215 s +INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.10547900199890137 s +DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=92216390046390601306457832625247722278, time:1750767024.6502898s req_ids:[8] +DEBUG 06-24 20:10:24 [manager.py:391] +ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:215.93689918518066ms total_cost_time:215.98315238952637ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7207 prompt_cache_len:5151 prompt_cache_ratio:0.7147217982516998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 +DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.10808849334716797 s +INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.11014246940612793 s +DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=328501875706381556995345619339009139193, time:1750767024.8711586s req_ids:[8] +DEBUG 06-24 20:10:24 [manager.py:391] +ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:216.06063842773438ms total_cost_time:216.1116600036621ms,out_token_counter:1 mean_per_token_cost_time: 0.051021575927734375ms prompt_token_num:7208 prompt_cache_len:5151 prompt_cache_ratio:0.714622641509434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 +DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.1073007583618164 s +INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.109222412109375 s +DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=234656382681164417105014294113369149439, time:1750767025.0868006s req_ids:[8] +DEBUG 06-24 20:10:25 [manager.py:391] +DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:202.55351066589355ms total_cost_time:202.61669158935547ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:7209 prompt_cache_len:5151 prompt_cache_ratio:0.7145235122763213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 +DEBUG 06-24 20:10:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.1071627140045166 s +INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.10919332504272461 s +DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=276193358371705551075212688797899138558, time:1750767025.2967112s req_ids:[8] +DEBUG 06-24 20:10:25 [manager.py:391] +ERROR 06-24 20:10:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 first_token_cost:204.76531982421875ms total_cost_time:204.80799674987793ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7210 prompt_cache_len:5151 prompt_cache_ratio:0.7144244105409154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 +DEBUG 06-24 20:10:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.10821151733398438 s +INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.11015629768371582 s +DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=232772307891548812685691808767956124537, time:1750767025.5086195s req_ids:[8] +DEBUG 06-24 20:10:25 [manager.py:391] +ERROR 06-24 20:10:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 first_token_cost:209.78498458862305ms total_cost_time:209.82956886291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7211 prompt_cache_len:5151 prompt_cache_ratio:0.7143253362917764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 +DEBUG 06-24 20:10:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.3096437454223633 s +INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.3116188049316406 s +DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=251799269907912582352124314833577177188, time:1750767025.9346156s req_ids:[8] +DEBUG 06-24 20:10:25 [manager.py:391] +ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 first_token_cost:420.5431938171387ms total_cost_time:420.58730125427246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7212 prompt_cache_len:5151 prompt_cache_ratio:0.7142262895174709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 +DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10733985900878906 s +INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.10922908782958984 s +DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=130494591751210110622407879704260863672, time:1750767026.1518312s req_ids:[8] +DEBUG 06-24 20:10:26 [manager.py:391] +ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:205.8122158050537ms total_cost_time:205.85155487060547ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:7213 prompt_cache_len:5151 prompt_cache_ratio:0.7141272702065715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 +DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10892915725708008 s +INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.1107180118560791 s +DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=38793771621875564070348385916962534616, time:1750767026.362907s req_ids:[8] +DEBUG 06-24 20:10:26 [manager.py:391] +ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:169.37780380249023ms total_cost_time:169.4192886352539ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7214 prompt_cache_len:5151 prompt_cache_ratio:0.7140282783476574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 +DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10682797431945801 s +INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.10856986045837402 s +DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=340226059752779660504320568643710369387, time:1750767026.5366027s req_ids:[8] +DEBUG 06-24 20:10:26 [manager.py:391] +DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:162.08958625793457ms total_cost_time:162.13417053222656ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7215 prompt_cache_len:5151 prompt_cache_ratio:0.7139293139293139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 +DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10839653015136719 s +INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.11039376258850098 s +DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=212537708616635262251282615363484367122, time:1750767026.7013197s req_ids:[8] +DEBUG 06-24 20:10:26 [manager.py:391] +ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:192.80719757080078ms total_cost_time:192.85321235656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7216 prompt_cache_len:5151 prompt_cache_ratio:0.7138303769401331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 +DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10718154907226562 s +INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.10921978950500488 s +DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=218654729717630473533709349939016141917, time:1750767026.9042196s req_ids:[8] +DEBUG 06-24 20:10:26 [manager.py:391] +DEBUG 06-24 20:10:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 31576.075 tokens/s +DEBUG 06-24 20:10:26 [stats.py:37] Avg prompt tokens throughput: 31567.301 tokens/s +DEBUG 06-24 20:10:26 [stats.py:37] Avg generate tokens throughput: 8.774 tokens/s +ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:202.26526260375977ms total_cost_time:202.31008529663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7217 prompt_cache_len:5151 prompt_cache_ratio:0.7137314673687127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 +DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.10702991485595703 s +INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.10892105102539062 s +DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=211973182059193687553247121749063789130, time:1750767027.1158285s req_ids:[8] +DEBUG 06-24 20:10:27 [manager.py:391] +ERROR 06-24 20:10:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:204.62870597839355ms total_cost_time:204.67233657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7218 prompt_cache_len:5151 prompt_cache_ratio:0.7136325852036576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 +DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.10733699798583984 s +INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.10929393768310547 s +DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=298911771670311815287180430790603675105, time:1750767027.324702s req_ids:[8] +DEBUG 06-24 20:10:27 [manager.py:391] +ERROR 06-24 20:10:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 first_token_cost:202.7270793914795ms total_cost_time:202.76975631713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7219 prompt_cache_len:5151 prompt_cache_ratio:0.713533730433578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 +DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.1071012020111084 s +INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.1091609001159668 s +DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=23628964565569154186533181962007594750, time:1750767027.53783s req_ids:[8] +DEBUG 06-24 20:10:27 [manager.py:391] +ERROR 06-24 20:10:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 first_token_cost:207.31067657470703ms total_cost_time:207.35406875610352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7220 prompt_cache_len:5151 prompt_cache_ratio:0.7134349030470915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 +DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.1074671745300293 s +INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.1095113754272461 s +DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=119896601454468249564775799677033631108, time:1750767027.750543s req_ids:[8] +DEBUG 06-24 20:10:27 [manager.py:391] +ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 first_token_cost:380.95760345458984ms total_cost_time:381.00266456604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7221 prompt_cache_len:5151 prompt_cache_ratio:0.7133361030328209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 +DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10892295837402344 s +INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.1109170913696289 s +DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=93410540193450095456408940389700922066, time:1750767028.130421s req_ids:[8] +DEBUG 06-24 20:10:28 [manager.py:391] +ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:202.62813568115234ms total_cost_time:202.67271995544434ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7222 prompt_cache_len:5151 prompt_cache_ratio:0.7132373303793963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 +DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10788846015930176 s +INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.10988640785217285 s +DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=39464193718203905937358236735442189758, time:1750767028.3447165s req_ids:[8] +DEBUG 06-24 20:10:28 [manager.py:391] +ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:201.65252685546875ms total_cost_time:201.69615745544434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7223 prompt_cache_len:5151 prompt_cache_ratio:0.7131385850754534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 +DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10877466201782227 s +INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.11069393157958984 s +DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=122662691721303781997121285494684971650, time:1750767028.5568492s req_ids:[8] +DEBUG 06-24 20:10:28 [manager.py:391] +ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:210.05582809448242ms total_cost_time:210.0989818572998ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7224 prompt_cache_len:5151 prompt_cache_ratio:0.7130398671096345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 +DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s +INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.11002564430236816 s +DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=15729667685554205224443925442438866255, time:1750767028.7681086s req_ids:[8] +DEBUG 06-24 20:10:28 [manager.py:391] +ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:201.97248458862305ms total_cost_time:202.01945304870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7225 prompt_cache_len:5151 prompt_cache_ratio:0.7129411764705882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 +DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.1078035831451416 s +INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.10998249053955078 s +DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=157369194917989399585955947836580024713, time:1750767028.975092s req_ids:[8] +DEBUG 06-24 20:10:28 [manager.py:391] +ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:205.43384552001953ms total_cost_time:205.4765224456787ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7226 prompt_cache_len:5151 prompt_cache_ratio:0.7128425131469692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 +DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:29 [batch.py:51] router release req id 8 +INFO 06-24 20:10:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s +INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.1091303825378418 s +DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=124855075529052620064855014365017169230, time:1750767029.1848948s req_ids:[8] +DEBUG 06-24 20:10:29 [manager.py:391] +ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:206.38346672058105ms total_cost_time:206.42638206481934ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7227 prompt_cache_len:5151 prompt_cache_ratio:0.7127438771274388 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 +DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.10744881629943848 s +INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.1095266342163086 s +DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=324192866249057640123247515126778826469, time:1750767029.3982937s req_ids:[8] +DEBUG 06-24 20:10:29 [manager.py:391] +ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:200.80208778381348ms total_cost_time:200.8492946624756ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7228 prompt_cache_len:5151 prompt_cache_ratio:0.7126452684006641 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 +DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.10861611366271973 s +INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.11068964004516602 s +DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=180048252551783266138792039764171005669, time:1750767029.6094892s req_ids:[8] +DEBUG 06-24 20:10:29 [manager.py:391] +ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:211.83037757873535ms total_cost_time:211.87639236450195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7229 prompt_cache_len:5151 prompt_cache_ratio:0.7125466869553189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 +DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.1069951057434082 s +INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.10897970199584961 s +DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=318325686242876287528242902171023528252, time:1750767029.8241057s req_ids:[8] +DEBUG 06-24 20:10:29 [manager.py:391] +ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:204.84423637390137ms total_cost_time:204.89001274108887ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7230 prompt_cache_len:5151 prompt_cache_ratio:0.712448132780083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 +DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.10920882225036621 s +INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.11115670204162598 s +DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=324494310953011021023746819849373659221, time:1750767030.0353692s req_ids:[8] +DEBUG 06-24 20:10:30 [manager.py:391] +ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:205.01279830932617ms total_cost_time:205.05881309509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7231 prompt_cache_len:5151 prompt_cache_ratio:0.7123496058636426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 +DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.10704183578491211 s +INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.10891246795654297 s +DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=267027640250509903135018762451620827533, time:1750767030.2468457s req_ids:[8] +DEBUG 06-24 20:10:30 [manager.py:391] +ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:383.5582733154297ms total_cost_time:383.6038112640381ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7232 prompt_cache_len:5151 prompt_cache_ratio:0.7122511061946902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 +DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.1085367202758789 s +INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.1103353500366211 s +DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=162924482506875424748913019064898020679, time:1750767030.633132s req_ids:[8] +DEBUG 06-24 20:10:30 [manager.py:391] +ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:159.61050987243652ms total_cost_time:159.65652465820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7233 prompt_cache_len:5151 prompt_cache_ratio:0.7121526337619245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 +DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.10731077194213867 s +INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.10909295082092285 s +DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=186526876941831326948447572594480015686, time:1750767030.796385s req_ids:[8] +DEBUG 06-24 20:10:30 [manager.py:391] +ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:190.2024745941162ms total_cost_time:190.2477741241455ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7234 prompt_cache_len:5151 prompt_cache_ratio:0.7120541885540503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 +DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.1077108383178711 s +INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.10975289344787598 s +DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=274481688101174119330160124376803015988, time:1750767030.9959865s req_ids:[8] +DEBUG 06-24 20:10:30 [manager.py:391] +ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:204.15091514587402ms total_cost_time:204.19812202453613ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7235 prompt_cache_len:5151 prompt_cache_ratio:0.7119557705597789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 +DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10736680030822754 s +INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s +DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=62316219555418782600933360323131736727, time:1750767031.208444s req_ids:[8] +DEBUG 06-24 20:10:31 [manager.py:391] +ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:201.07650756835938ms total_cost_time:201.12109184265137ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7236 prompt_cache_len:5151 prompt_cache_ratio:0.7118573797678275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 +DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10753369331359863 s +INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10956740379333496 s +DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=74697170739964952193704113619547784909, time:1750767031.4148755s req_ids:[8] +DEBUG 06-24 20:10:31 [manager.py:391] +ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.80036735534668ms total_cost_time:204.84375953674316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7237 prompt_cache_len:5151 prompt_cache_ratio:0.7117590161669199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 +DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10768771171569824 s +INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10961508750915527 s +DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=56741706128974825936677918356888018615, time:1750767031.6266813s req_ids:[8] +DEBUG 06-24 20:10:31 [manager.py:391] +ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.3769359588623ms total_cost_time:204.41746711730957ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:7238 prompt_cache_len:5151 prompt_cache_ratio:0.7116606797457862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 +DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10805654525756836 s +INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10999107360839844 s +DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=252434110650053846965505190618454983385, time:1750767031.8353941s req_ids:[8] +DEBUG 06-24 20:10:31 [manager.py:391] +ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.10537719726562ms total_cost_time:204.14996147155762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7239 prompt_cache_len:5151 prompt_cache_ratio:0.7115623704931621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 +DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.10898637771606445 s +INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.1111764907836914 s +DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=22607813476252651674226409249720354387, time:1750767032.0439928s req_ids:[8] +DEBUG 06-24 20:10:32 [manager.py:391] +DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.19740676879883ms total_cost_time:204.2393684387207ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7240 prompt_cache_len:5151 prompt_cache_ratio:0.7114640883977901 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 +DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.10725021362304688 s +INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.109466552734375 s +DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=314179247102126788612725897522878929399, time:1750767032.2612433s req_ids:[8] +DEBUG 06-24 20:10:32 [manager.py:391] +ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:173.16007614135742ms total_cost_time:173.2020378112793ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7241 prompt_cache_len:5151 prompt_cache_ratio:0.7113658334484187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 +DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.10866951942443848 s +INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.11076235771179199 s +DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=87865614705104011906865004172318973259, time:1750767032.4333034s req_ids:[8] +DEBUG 06-24 20:10:32 [manager.py:391] +ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:198.68040084838867ms total_cost_time:198.72570037841797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7242 prompt_cache_len:5151 prompt_cache_ratio:0.7112676056338029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 +DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:32 [batch.py:51] router release req id 8 +INFO 06-24 20:10:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.1074972152709961 s +INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.10953211784362793 s +DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=155436571193538276875067680947187297986, time:1750767032.6370661s req_ids:[8] +DEBUG 06-24 20:10:32 [manager.py:391] +ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:369.52781677246094ms total_cost_time:369.57406997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7243 prompt_cache_len:5151 prompt_cache_ratio:0.7111694049427033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 +DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10856461524963379 s +INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.11059951782226562 s +DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=336935784673416017800023648627878177954, time:1750767033.0074594s req_ids:[8] +DEBUG 06-24 20:10:33 [manager.py:391] +ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:197.30734825134277ms total_cost_time:197.35121726989746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7244 prompt_cache_len:5151 prompt_cache_ratio:0.7110712313638874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 +DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10713624954223633 s +INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.10920381546020508 s +DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=209369300805153890953581975562276003969, time:1750767033.2155s req_ids:[8] +DEBUG 06-24 20:10:33 [manager.py:391] +ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:200.37603378295898ms total_cost_time:200.42061805725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7245 prompt_cache_len:5151 prompt_cache_ratio:0.7109730848861283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 +DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10648012161254883 s +INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.10804009437561035 s +DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=266898182224695576309616086475220382267, time:1750767033.4206557s req_ids:[8] +DEBUG 06-24 20:10:33 [manager.py:391] +ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:203.70888710021973ms total_cost_time:203.75418663024902ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7246 prompt_cache_len:5151 prompt_cache_ratio:0.710874965498206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 +DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10617375373840332 s +INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.10814499855041504 s +DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=33333936293761564256867121031755046839, time:1750767033.6349773s req_ids:[8] +DEBUG 06-24 20:10:33 [manager.py:391] +ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:211.8818759918213ms total_cost_time:211.9009494781494ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:7247 prompt_cache_len:5151 prompt_cache_ratio:0.7107768731889057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 +DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10917878150939941 s +INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.11110424995422363 s +DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=2733833664530300635195244076976798363, time:1750767033.8469546s req_ids:[8] +DEBUG 06-24 20:10:33 [manager.py:391] +ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:202.20375061035156ms total_cost_time:202.24666595458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7248 prompt_cache_len:5151 prompt_cache_ratio:0.7106788079470199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 +DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.10806751251220703 s +INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.10985827445983887 s +DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=325979888953816957162591866982931370752, time:1750767034.0554545s req_ids:[8] +DEBUG 06-24 20:10:34 [manager.py:391] +ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:168.75863075256348ms total_cost_time:168.80178451538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7249 prompt_cache_len:5151 prompt_cache_ratio:0.7105807697613464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 +DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.10898685455322266 s +INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.11130332946777344 s +DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=321049004300940147340612428139819631718, time:1750767034.2264402s req_ids:[8] +DEBUG 06-24 20:10:34 [manager.py:391] +ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:203.00769805908203ms total_cost_time:203.05132865905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7250 prompt_cache_len:5151 prompt_cache_ratio:0.7104827586206897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 +DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.1091756820678711 s +INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.11124229431152344 s +DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=233057664364048161071206213507931870307, time:1750767034.4363675s req_ids:[8] +DEBUG 06-24 20:10:34 [manager.py:391] +ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:201.68375968933105ms total_cost_time:201.72667503356934ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7251 prompt_cache_len:5151 prompt_cache_ratio:0.7103847745138602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 +DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.10716652870178223 s +INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.10918164253234863 s +DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=164413056384362301709939298631924542218, time:1750767034.645918s req_ids:[8] +DEBUG 06-24 20:10:34 [manager.py:391] +ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:211.56811714172363ms total_cost_time:211.61341667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7252 prompt_cache_len:5151 prompt_cache_ratio:0.7102868174296746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 +DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.1080026626586914 s +INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.1099698543548584 s +DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=328328421806607295482736433101757707892, time:1750767034.8594172s req_ids:[8] +DEBUG 06-24 20:10:34 [manager.py:391] +ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:204.2546272277832ms total_cost_time:204.2982578277588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7253 prompt_cache_len:5151 prompt_cache_ratio:0.7101888873569557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 +DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s +INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.11028552055358887 s +DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=69668054540230122915841868564786102137, time:1750767035.069043s req_ids:[8] +DEBUG 06-24 20:10:35 [manager.py:391] +ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:198.03547859191895ms total_cost_time:198.08053970336914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7254 prompt_cache_len:5151 prompt_cache_ratio:0.7100909842845327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 +DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.1076955795288086 s +INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.1094350814819336 s +DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=48444522966012316390466421214308243207, time:1750767035.2744632s req_ids:[8] +DEBUG 06-24 20:10:35 [manager.py:391] +ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:166.80407524108887ms total_cost_time:166.83125495910645ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:7255 prompt_cache_len:5151 prompt_cache_ratio:0.7099931082012405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 +DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.30983519554138184 s +INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.3120441436767578 s +DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=19523284449129905030255925222745505602, time:1750767035.653979s req_ids:[8] +DEBUG 06-24 20:10:35 [manager.py:391] +ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:416.34416580200195ms total_cost_time:416.38827323913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7256 prompt_cache_len:5151 prompt_cache_ratio:0.7098952590959207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 +DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.10999369621276855 s +INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.11206793785095215 s +DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=197646652824363134957097674590760498934, time:1750767035.8692129s req_ids:[8] +DEBUG 06-24 20:10:35 [manager.py:391] +ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:200.73699951171875ms total_cost_time:200.77943801879883ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7257 prompt_cache_len:5151 prompt_cache_ratio:0.7097974369574204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 +DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10861921310424805 s +INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.10996055603027344 s +DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=301940679035798947702126647785207321382, time:1750767036.0899715s req_ids:[8] +DEBUG 06-24 20:10:36 [manager.py:391] +ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:219.66290473937988ms total_cost_time:219.72179412841797ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7258 prompt_cache_len:5151 prompt_cache_ratio:0.7096996417745935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 +DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10868430137634277 s +INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.11057138442993164 s +DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=226595325471622424024653024689584587024, time:1750767036.3136182s req_ids:[8] +DEBUG 06-24 20:10:36 [manager.py:391] +ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:222.46861457824707ms total_cost_time:222.51224517822266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7259 prompt_cache_len:5151 prompt_cache_ratio:0.7096018735362998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 +DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10711359977722168 s +INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.10921120643615723 s +DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=298886262915320744112737120774802138228, time:1750767036.5290208s req_ids:[8] +DEBUG 06-24 20:10:36 [manager.py:391] +ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:204.3755054473877ms total_cost_time:204.43344116210938ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:7260 prompt_cache_len:5151 prompt_cache_ratio:0.709504132231405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 +DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10895490646362305 s +INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.11085987091064453 s +DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=235150593230651662889508222522870167543, time:1750767036.7382686s req_ids:[8] +DEBUG 06-24 20:10:36 [manager.py:391] +ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:204.79750633239746ms total_cost_time:204.84328269958496ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7261 prompt_cache_len:5151 prompt_cache_ratio:0.7094064178487811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 +DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10806512832641602 s +INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.10982584953308105 s +DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=220528497006738283949075094653301708227, time:1750767036.9499292s req_ids:[8] +DEBUG 06-24 20:10:36 [manager.py:391] +DEBUG 06-24 20:10:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 32440.702 tokens/s +DEBUG 06-24 20:10:36 [stats.py:37] Avg prompt tokens throughput: 32431.743 tokens/s +DEBUG 06-24 20:10:36 [stats.py:37] Avg generate tokens throughput: 8.959 tokens/s +ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:209.8388671875ms total_cost_time:209.883451461792ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7262 prompt_cache_len:5151 prompt_cache_ratio:0.7093087303773066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 +DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.10781431198120117 s +INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.10967803001403809 s +DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=90077169022495300155589441148934207215, time:1750767037.1622255s req_ids:[8] +DEBUG 06-24 20:10:37 [manager.py:391] +ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:209.15555953979492ms total_cost_time:209.1994285583496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7263 prompt_cache_len:5151 prompt_cache_ratio:0.7092110698058653 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 +DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.1071176528930664 s +INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.10887813568115234 s +DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=243431306123721172412479796478299610021, time:1750767037.3761737s req_ids:[8] +DEBUG 06-24 20:10:37 [manager.py:391] +ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:205.63054084777832ms total_cost_time:205.67631721496582ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7264 prompt_cache_len:5151 prompt_cache_ratio:0.709113436123348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 +DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.10825252532958984 s +INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.11010313034057617 s +DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=235663764225709031458581537271945563588, time:1750767037.5893896s req_ids:[8] +DEBUG 06-24 20:10:37 [manager.py:391] +ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:379.90880012512207ms total_cost_time:379.9548149108887ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7265 prompt_cache_len:5151 prompt_cache_ratio:0.709015829318651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 +DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.1084291934967041 s +INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.11028742790222168 s +DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=18099044438268323268202813297715585331, time:1750767037.9660473s req_ids:[8] +DEBUG 06-24 20:10:37 [manager.py:391] +ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:195.08767127990723ms total_cost_time:195.1448917388916ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7266 prompt_cache_len:5151 prompt_cache_ratio:0.7089182493806772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 +DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10971903800964355 s +INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.11169052124023438 s +DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=112801311629044844999252805501410288395, time:1750767038.170881s req_ids:[8] +DEBUG 06-24 20:10:38 [manager.py:391] +ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:204.33807373046875ms total_cost_time:204.38551902770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:7267 prompt_cache_len:5151 prompt_cache_ratio:0.7088206962983349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 +DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10705018043518066 s +INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.10895490646362305 s +DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=209686261940783936477428539149758498168, time:1750767038.3808196s req_ids:[8] +DEBUG 06-24 20:10:38 [manager.py:391] +ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:208.73022079467773ms total_cost_time:208.77432823181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7268 prompt_cache_len:5151 prompt_cache_ratio:0.7087231700605393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 +DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s +INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.11025071144104004 s +DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=167877870579220179736183427106828961482, time:1750767038.593764s req_ids:[8] +DEBUG 06-24 20:10:38 [manager.py:391] +ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:207.3662281036377ms total_cost_time:207.41009712219238ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7269 prompt_cache_len:5151 prompt_cache_ratio:0.7086256706562113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 +DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10982775688171387 s +INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.11152338981628418 s +DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=72788187165892404612792987463776655359, time:1750767038.805863s req_ids:[8] +DEBUG 06-24 20:10:38 [manager.py:391] +ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:169.755220413208ms total_cost_time:169.7995662689209ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7270 prompt_cache_len:5151 prompt_cache_ratio:0.7085281980742778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 +DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10771036148071289 s +INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.10951542854309082 s +DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=237640782563767769112376076906352866175, time:1750767038.9784048s req_ids:[8] +DEBUG 06-24 20:10:38 [manager.py:391] +DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:196.21515274047852ms total_cost_time:196.2599754333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7271 prompt_cache_len:5151 prompt_cache_ratio:0.7084307523036721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 +DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10825443267822266 s +INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.10993027687072754 s +DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=192314997140673783629424668319017003407, time:1750767039.1811612s req_ids:[8] +DEBUG 06-24 20:10:39 [manager.py:391] +ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:206.78997039794922ms total_cost_time:206.8338394165039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7272 prompt_cache_len:5151 prompt_cache_ratio:0.7083333333333334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 +DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10756731033325195 s +INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.10937666893005371 s +DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=48575477382155707358820825832496407423, time:1750767039.3924413s req_ids:[8] +DEBUG 06-24 20:10:39 [manager.py:391] +ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:209.03682708740234ms total_cost_time:209.08021926879883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7273 prompt_cache_len:5151 prompt_cache_ratio:0.7082359411522068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 +DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10834050178527832 s +INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.11032438278198242 s +DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=168370239596028939439786990877196622028, time:1750767039.6107357s req_ids:[8] +DEBUG 06-24 20:10:39 [manager.py:391] +ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:219.4812297821045ms total_cost_time:219.52486038208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7274 prompt_cache_len:5151 prompt_cache_ratio:0.7081385757492439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 +DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10720968246459961 s +INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096642017364502 s +DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=35173719214567630523763336769299016594, time:1750767039.8446443s req_ids:[8] +DEBUG 06-24 20:10:39 [manager.py:391] +ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:222.79119491577148ms total_cost_time:222.8376865386963ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7275 prompt_cache_len:5151 prompt_cache_ratio:0.708041237113402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 +DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.10679936408996582 s +INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10865998268127441 s +DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=186037920559638541463209925243644068529, time:1750767040.0587113s req_ids:[8] +DEBUG 06-24 20:10:40 [manager.py:391] +ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:373.95310401916504ms total_cost_time:373.9800453186035ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7276 prompt_cache_len:5151 prompt_cache_ratio:0.7079439252336449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 +DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.105194091796875 s +INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10732150077819824 s +DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=160500783290146718397027439758564782031, time:1750767040.4340706s req_ids:[8] +DEBUG 06-24 20:10:40 [manager.py:391] +ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:201.94363594055176ms total_cost_time:201.96866989135742ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:7277 prompt_cache_len:5151 prompt_cache_ratio:0.7078466400989418 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 +DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.10588622093200684 s +INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10793495178222656 s +DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=26450578057135793677165423474092906501, time:1750767040.6545265s req_ids:[8] +DEBUG 06-24 20:10:40 [manager.py:391] +ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:217.17405319213867ms total_cost_time:217.19813346862793ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:7278 prompt_cache_len:5151 prompt_cache_ratio:0.7077493816982687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 +DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.1053471565246582 s +INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10740137100219727 s +DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=65111602603070663856775687233915174605, time:1750767040.8696058s req_ids:[8] +DEBUG 06-24 20:10:40 [manager.py:391] +ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:209.3832492828369ms total_cost_time:209.4438076019287ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:7279 prompt_cache_len:5151 prompt_cache_ratio:0.7076521500206072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 +DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10835027694702148 s +INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.1102135181427002 s +DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=340212487959947910105271440381458304022, time:1750767041.083428s req_ids:[8] +DEBUG 06-24 20:10:41 [manager.py:391] +ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:165.66967964172363ms total_cost_time:165.7125949859619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7280 prompt_cache_len:5151 prompt_cache_ratio:0.7075549450549451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 +DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10791921615600586 s +INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.1098477840423584 s +DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=120705658204575691334420220369801096005, time:1750767041.2537646s req_ids:[8] +DEBUG 06-24 20:10:41 [manager.py:391] +ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:199.62644577026367ms total_cost_time:199.66959953308105ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7281 prompt_cache_len:5151 prompt_cache_ratio:0.707457766790276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 +DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10724782943725586 s +INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.10930800437927246 s +DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=282924825497786259832217238419173368504, time:1750767041.460665s req_ids:[8] +DEBUG 06-24 20:10:41 [manager.py:391] +ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:205.11531829833984ms total_cost_time:205.16014099121094ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7282 prompt_cache_len:5151 prompt_cache_ratio:0.7073606152156001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 +DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10693979263305664 s +INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.10883164405822754 s +DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=103283245792934247857275274985882658373, time:1750767041.6732085s req_ids:[8] +DEBUG 06-24 20:10:41 [manager.py:391] +ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:205.39188385009766ms total_cost_time:205.45077323913574ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7283 prompt_cache_len:5151 prompt_cache_ratio:0.7072634903199231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 +DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10941433906555176 s +INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.11139178276062012 s +DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=80593836945201073127840683486257672087, time:1750767041.8846195s req_ids:[8] +DEBUG 06-24 20:10:41 [manager.py:391] +ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:200.61898231506348ms total_cost_time:200.65879821777344ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:7284 prompt_cache_len:5151 prompt_cache_ratio:0.707166392092257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 +DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.10816264152526855 s +INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.11006665229797363 s +DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=204473922872318872093972750179917253080, time:1750767042.0916972s req_ids:[8] +DEBUG 06-24 20:10:42 [manager.py:391] +ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:201.3876438140869ms total_cost_time:201.4334201812744ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7285 prompt_cache_len:5151 prompt_cache_ratio:0.7070693205216197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 +DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.10764122009277344 s +INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.10962128639221191 s +DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=128124623813229323274220139740691275408, time:1750767042.2988038s req_ids:[8] +DEBUG 06-24 20:10:42 [manager.py:391] +ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:361.8454933166504ms total_cost_time:361.9041442871094ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7286 prompt_cache_len:5151 prompt_cache_ratio:0.7069722755970355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 +DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.10896444320678711 s +INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.11142396926879883 s +DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=97926578070434944574522350007247309946, time:1750767042.664236s req_ids:[8] +DEBUG 06-24 20:10:42 [manager.py:391] +ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:195.53494453430176ms total_cost_time:195.57905197143555ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7287 prompt_cache_len:5151 prompt_cache_ratio:0.706875257307534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 +DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.1096341609954834 s +INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.11160492897033691 s +DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=132158968149909586994938019318998920611, time:1750767042.876787s req_ids:[8] +DEBUG 06-24 20:10:42 [manager.py:391] +ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:211.32564544677734ms total_cost_time:211.37070655822754ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7288 prompt_cache_len:5151 prompt_cache_ratio:0.7067782656421515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 +DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10715079307556152 s +INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.1091609001159668 s +DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=138349920203990430016601246316735737527, time:1750767043.0899665s req_ids:[8] +DEBUG 06-24 20:10:43 [manager.py:391] +ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:201.6618251800537ms total_cost_time:201.7068862915039ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7289 prompt_cache_len:5151 prompt_cache_ratio:0.7066813005899301 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 +DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.1072683334350586 s +INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.1091618537902832 s +DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=124741380771814824092382270157001046431, time:1750767043.298819s req_ids:[8] +DEBUG 06-24 20:10:43 [manager.py:391] +ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:205.76190948486328ms total_cost_time:205.80530166625977ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7290 prompt_cache_len:5151 prompt_cache_ratio:0.7065843621399177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 +DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10787701606750488 s +INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.10998868942260742 s +DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=25476171138796315862199414570633814024, time:1750767043.5151877s req_ids:[8] +DEBUG 06-24 20:10:43 [manager.py:391] +ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:212.10646629333496ms total_cost_time:212.15319633483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7291 prompt_cache_len:5151 prompt_cache_ratio:0.7064874502811685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 +DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10770273208618164 s +INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.11013102531433105 s +DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=103668737836625702171761116550150663454, time:1750767043.7285688s req_ids:[8] +DEBUG 06-24 20:10:43 [manager.py:391] +ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:208.526611328125ms total_cost_time:208.5702419281006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7292 prompt_cache_len:5151 prompt_cache_ratio:0.7063905650027428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 +DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10764503479003906 s +INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.11005091667175293 s +DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=85909545883973092041015216591173295903, time:1750767043.9410982s req_ids:[8] +DEBUG 06-24 20:10:43 [manager.py:391] +ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:206.86864852905273ms total_cost_time:206.91180229187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7293 prompt_cache_len:5151 prompt_cache_ratio:0.7062937062937062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 +DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.11012911796569824 s +INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.11256933212280273 s +DEBUG 06-24 20:10:44 [manager.py:391] Prefill Batch: batch_id=142901869390163619075359529293124477504, time:1750767044.1521049s req_ids:[8] +DEBUG 06-24 20:10:44 [manager.py:391] +ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:204.4088840484619ms total_cost_time:204.46443557739258ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:7294 prompt_cache_len:5151 prompt_cache_ratio:0.7061968741431314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 +DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.1071171760559082 s +INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.10947155952453613 s +DEBUG 06-24 20:10:44 [manager.py:391] Prefill Batch: batch_id=281543898630429601221932504852734354682, time:1750767044.3683124s req_ids:[8] +DEBUG 06-24 20:10:44 [manager.py:391] +ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:222.33104705810547ms total_cost_time:222.39017486572266ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7295 prompt_cache_len:5151 prompt_cache_ratio:0.706100068540096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 +DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.10967874526977539 s +INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.11212897300720215 s +DEBUG 06-24 20:10:44 [manager.py:391] Prefill Batch: batch_id=119883666103027908705969322485426229156, time:1750767044.5854666s req_ids:[8] +DEBUG 06-24 20:10:44 [manager.py:391] +DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:201.3082504272461ms total_cost_time:201.35188102722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7296 prompt_cache_len:5151 prompt_cache_ratio:0.7060032894736842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 +DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.3094611167907715 s +INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.3120291233062744 s +DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=194823158417543623527934530994831563619, time:1750767045.0140455s req_ids:[8] +DEBUG 06-24 20:10:45 [manager.py:391] +ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:429.87537384033203ms total_cost_time:429.9194812774658ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7297 prompt_cache_len:5151 prompt_cache_ratio:0.7059065369329861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 +DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10968494415283203 s +INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.11201214790344238 s +DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=233477900293633246251300850828977870970, time:1750767045.2294223s req_ids:[8] +DEBUG 06-24 20:10:45 [manager.py:391] +ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:208.8766098022461ms total_cost_time:208.92047882080078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7298 prompt_cache_len:5151 prompt_cache_ratio:0.7058098109070978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 +DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10767269134521484 s +INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.10997438430786133 s +DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=62271928968247115429868126046498150319, time:1750767045.441696s req_ids:[8] +DEBUG 06-24 20:10:45 [manager.py:391] +ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:209.68341827392578ms total_cost_time:209.74206924438477ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7299 prompt_cache_len:5151 prompt_cache_ratio:0.7057131113851213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 +DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10932350158691406 s +INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.11162996292114258 s +DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=283890927453082896090528697948699020209, time:1750767045.654972s req_ids:[8] +DEBUG 06-24 20:10:45 [manager.py:391] +ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:205.7938575744629ms total_cost_time:205.83701133728027ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7300 prompt_cache_len:5151 prompt_cache_ratio:0.7056164383561644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 +DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10733652114868164 s +INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.10981154441833496 s +DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=37450076678544449488050399548789079055, time:1750767045.8679988s req_ids:[8] +DEBUG 06-24 20:10:45 [manager.py:391] +ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:208.04476737976074ms total_cost_time:208.08911323547363ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7301 prompt_cache_len:5151 prompt_cache_ratio:0.7055197918093412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 +DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.1076059341430664 s +INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.10993838310241699 s +DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=214245342262997392794930319917498776295, time:1750767046.080606s req_ids:[8] +DEBUG 06-24 20:10:46 [manager.py:391] +DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:206.91800117492676ms total_cost_time:206.9406509399414ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:7302 prompt_cache_len:5151 prompt_cache_ratio:0.7054231717337716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 +DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10524439811706543 s +INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.10753488540649414 s +DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=101718473352239467030873801629077513741, time:1750767046.2934191s req_ids:[8] +DEBUG 06-24 20:10:46 [manager.py:391] +ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:211.2421989440918ms total_cost_time:211.28582954406738ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7303 prompt_cache_len:5151 prompt_cache_ratio:0.7053265781185815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 +INFO 06-24 20:10:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10908842086791992 s +INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.11162447929382324 s +DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=203044898641526475654016607069188912737, time:1750767046.5072513s req_ids:[8] +DEBUG 06-24 20:10:46 [manager.py:391] +ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:209.32650566101074ms total_cost_time:209.3665599822998ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:7304 prompt_cache_len:5151 prompt_cache_ratio:0.7052300109529025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 +DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10796213150024414 s +INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999488830566406 s +INFO 06-24 20:10:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=32340213592411030954585997555498175887, time:1750767046.7198253s req_ids:[8] +DEBUG 06-24 20:10:46 [manager.py:391] +ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:203.83477210998535ms total_cost_time:203.89318466186523ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:7305 prompt_cache_len:5151 prompt_cache_ratio:0.7051334702258727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 +DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10955238342285156 s +INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.11141824722290039 s +DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=83335548004852674011446397348800612464, time:1750767046.925753s req_ids:[8] +DEBUG 06-24 20:10:46 [manager.py:391] +ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:10:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 31888.636 tokens/s +DEBUG 06-24 20:10:47 [stats.py:37] Avg prompt tokens throughput: 31879.784 tokens/s +DEBUG 06-24 20:10:47 [stats.py:37] Avg generate tokens throughput: 8.852 tokens/s +INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:199.49865341186523ms total_cost_time:199.540376663208ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7306 prompt_cache_len:5151 prompt_cache_ratio:0.7050369559266356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 +DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.10735225677490234 s +INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.10929346084594727 s +DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=162247465693118716121961573930951560220, time:1750767047.1321516s req_ids:[8] +DEBUG 06-24 20:10:47 [manager.py:391] +ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:383.2435607910156ms total_cost_time:383.2898139953613ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7307 prompt_cache_len:5151 prompt_cache_ratio:0.7049404680443411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 +DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.10828995704650879 s +INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.1101689338684082 s +DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=222498418444786361982687246301777696453, time:1750767047.5158505s req_ids:[8] +DEBUG 06-24 20:10:47 [manager.py:391] +ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:195.90425491333008ms total_cost_time:195.95003128051758ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7308 prompt_cache_len:5151 prompt_cache_ratio:0.7048440065681445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 +DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.10785698890686035 s +INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.10981583595275879 s +DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=131810273439665120696535019073103969246, time:1750767047.7264242s req_ids:[8] +DEBUG 06-24 20:10:47 [manager.py:391] +ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:208.65941047668457ms total_cost_time:208.70256423950195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7309 prompt_cache_len:5151 prompt_cache_ratio:0.7047475714872076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 +DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.11016201972961426 s +INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.11211538314819336 s +DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=242558619468284472902352478849998760525, time:1750767047.9373255s req_ids:[8] +DEBUG 06-24 20:10:47 [manager.py:391] +ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:207.34763145446777ms total_cost_time:207.39245414733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7310 prompt_cache_len:5151 prompt_cache_ratio:0.7046511627906977 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 +DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10828900337219238 s +INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.11032509803771973 s +DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=12884510638409178235254292396741050371, time:1750767048.1501095s req_ids:[8] +DEBUG 06-24 20:10:48 [manager.py:391] +ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:204.1158676147461ms total_cost_time:204.15997505187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7311 prompt_cache_len:5151 prompt_cache_ratio:0.7045547804677883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 +DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10796117782592773 s +INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.11000251770019531 s +DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=148057922452117575294072474300219806118, time:1750767048.3592095s req_ids:[8] +DEBUG 06-24 20:10:48 [manager.py:391] +ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:203.84740829467773ms total_cost_time:203.89032363891602ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7312 prompt_cache_len:5151 prompt_cache_ratio:0.7044584245076586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 +DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10847043991088867 s +INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.11024236679077148 s +DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=172975489887196558188711160745698945670, time:1750767048.5673492s req_ids:[8] +DEBUG 06-24 20:10:48 [manager.py:391] +ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:203.25112342834473ms total_cost_time:203.2938003540039ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7313 prompt_cache_len:5151 prompt_cache_ratio:0.7043620948994941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 +DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10832405090332031 s +INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.1095724105834961 s +DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=131807397670034056073256056305535833953, time:1750767048.7788599s req_ids:[8] +DEBUG 06-24 20:10:48 [manager.py:391] +ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:200.07824897766113ms total_cost_time:200.1323699951172ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:7314 prompt_cache_len:5151 prompt_cache_ratio:0.7042657916324856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 +DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10791158676147461 s +INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.10936141014099121 s +DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=155511238821116433683993091218704466907, time:1750767048.9847224s req_ids:[8] +DEBUG 06-24 20:10:48 [manager.py:391] +ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:206.18653297424316ms total_cost_time:206.22920989990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7315 prompt_cache_len:5151 prompt_cache_ratio:0.7041695146958304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 +DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:49 [manager.py:224] router recive req id 8 cost time 0.11001396179199219 s +INFO 06-24 20:10:49 [manager.py:68] detokenization recv req id 8 cost time 0.11113762855529785 s +DEBUG 06-24 20:10:49 [manager.py:391] Prefill Batch: batch_id=214104080320373597442255604445414321819, time:1750767049.1997285s req_ids:[8] +DEBUG 06-24 20:10:49 [manager.py:391] +ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:205.89780807495117ms total_cost_time:205.93762397766113ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:7316 prompt_cache_len:5151 prompt_cache_ratio:0.7040732640787315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 +DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:49 [manager.py:224] router recive req id 8 cost time 0.10821032524108887 s +INFO 06-24 20:10:49 [manager.py:68] detokenization recv req id 8 cost time 0.10951757431030273 s +DEBUG 06-24 20:10:49 [manager.py:391] Prefill Batch: batch_id=216691462765695122594664936852641311230, time:1750767049.4068573s req_ids:[8] +DEBUG 06-24 20:10:49 [manager.py:391] +ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:409.5494747161865ms total_cost_time:409.5945358276367ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7317 prompt_cache_len:5151 prompt_cache_ratio:0.7039770397703977 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 +DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:49 [manager.py:224] router recive req id 8 cost time 0.10844063758850098 s +INFO 06-24 20:10:49 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s +DEBUG 06-24 20:10:49 [manager.py:391] Prefill Batch: batch_id=31662051254250471892806448142275701230, time:1750767049.8192294s req_ids:[8] +DEBUG 06-24 20:10:49 [manager.py:391] +ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:199.5689868927002ms total_cost_time:199.62096214294434ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:7318 prompt_cache_len:5151 prompt_cache_ratio:0.7038808417600437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 +DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.1084280014038086 s +INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.10986733436584473 s +DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=335360585898198737944900427118214954570, time:1750767050.0268033s req_ids:[8] +DEBUG 06-24 20:10:50 [manager.py:391] +ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:203.83977890014648ms total_cost_time:203.88412475585938ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7319 prompt_cache_len:5151 prompt_cache_ratio:0.7037846700368903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 +DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:50 [batch.py:51] router release req id 8 +INFO 06-24 20:10:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10906481742858887 s +INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.11031866073608398 s +DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=284093857157982465514860989567850654524, time:1750767050.2383218s req_ids:[8] +DEBUG 06-24 20:10:50 [manager.py:391] +ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:205.91068267822266ms total_cost_time:205.95550537109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7320 prompt_cache_len:5151 prompt_cache_ratio:0.703688524590164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 +DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10796904563903809 s +INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.10928559303283691 s +DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=159647160188703735805016841660777369951, time:1750767050.4501858s req_ids:[8] +DEBUG 06-24 20:10:50 [manager.py:391] +ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:205.05237579345703ms total_cost_time:205.09672164916992ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7321 prompt_cache_len:5151 prompt_cache_ratio:0.7035924054090971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 +DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10866475105285645 s +INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.1103360652923584 s +DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=65021463197538769737667566934514872154, time:1750767050.6760638s req_ids:[8] +DEBUG 06-24 20:10:50 [manager.py:391] +ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:237.54262924194336ms total_cost_time:237.58649826049805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7322 prompt_cache_len:5151 prompt_cache_ratio:0.7034963124829282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 +DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10817289352416992 s +INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.11016368865966797 s +DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=204447481982750042091179497161814546802, time:1750767050.8989658s req_ids:[8] +DEBUG 06-24 20:10:50 [manager.py:391] +ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:200.20365715026855ms total_cost_time:200.2699375152588ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:7323 prompt_cache_len:5151 prompt_cache_ratio:0.7034002458009013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 +DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.11021018028259277 s +INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.1122748851776123 s +DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=279205993072441188444220509166110482086, time:1750767051.1119697s req_ids:[8] +DEBUG 06-24 20:10:51 [manager.py:391] +ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:206.6948413848877ms total_cost_time:206.73680305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7324 prompt_cache_len:5151 prompt_cache_ratio:0.7033042053522666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 +DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.10750222206115723 s +INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.10950231552124023 s +DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=22380373094516319786997426597829126667, time:1750767051.3232763s req_ids:[8] +DEBUG 06-24 20:10:51 [manager.py:391] +ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:205.83486557006836ms total_cost_time:205.87944984436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7325 prompt_cache_len:5151 prompt_cache_ratio:0.7032081911262799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 +DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.10957860946655273 s +INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.11160802841186523 s +DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=159896924075375755684379307928335316542, time:1750767051.5456898s req_ids:[8] +DEBUG 06-24 20:10:51 [manager.py:391] +ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:215.43574333190918ms total_cost_time:215.47484397888184ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:7326 prompt_cache_len:5151 prompt_cache_ratio:0.7031122031122031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 +DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.10700154304504395 s +INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.10908865928649902 s +DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=270898054671327502493741152687266374370, time:1750767051.754808s req_ids:[8] +DEBUG 06-24 20:10:51 [manager.py:391] +DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:202.91876792907715ms total_cost_time:202.96406745910645ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7327 prompt_cache_len:5151 prompt_cache_ratio:0.703016241299304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 +DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.3110976219177246 s +INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.3131113052368164 s +DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=22735911283396578895209749089605901233, time:1750767052.17783s req_ids:[8] +DEBUG 06-24 20:10:52 [manager.py:391] +ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:426.2092113494873ms total_cost_time:426.2659549713135ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7328 prompt_cache_len:5151 prompt_cache_ratio:0.7029203056768559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 +DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.10930109024047852 s +INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.11132264137268066 s +DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=323241569393630522845609790233088084645, time:1750767052.3975043s req_ids:[8] +DEBUG 06-24 20:10:52 [manager.py:391] +ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:205.52444458007812ms total_cost_time:205.5683135986328ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7329 prompt_cache_len:5151 prompt_cache_ratio:0.7028243962341384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 +DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.10826683044433594 s +INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.11028647422790527 s +DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=173376044225472621086365261218701864554, time:1750767052.6120102s req_ids:[8] +DEBUG 06-24 20:10:52 [manager.py:391] +ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:205.8084011077881ms total_cost_time:205.8548927307129ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7330 prompt_cache_len:5151 prompt_cache_ratio:0.7027285129604366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 +DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.10857319831848145 s +INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.11063051223754883 s +DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=247706796193667989818515651279577926631, time:1750767052.8245676s req_ids:[8] +DEBUG 06-24 20:10:52 [manager.py:391] +ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:205.21283149719238ms total_cost_time:205.25646209716797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7331 prompt_cache_len:5151 prompt_cache_ratio:0.7026326558450416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 +DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10773444175720215 s +INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.10974884033203125 s +DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=303874733997143244583882110787305815590, time:1750767053.0342126s req_ids:[8] +DEBUG 06-24 20:10:53 [manager.py:391] +ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:206.7413330078125ms total_cost_time:206.7859172821045ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7332 prompt_cache_len:5151 prompt_cache_ratio:0.7025368248772504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 +DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:53 [batch.py:51] router release req id 8 +INFO 06-24 20:10:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:10:53 [statics_utils.py:24] mean first cost: 230.9962014731095 ms +INFO 06-24 20:10:53 [statics_utils.py:24] mean per token cost: 0.09323150455453197 ms +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10877418518066406 s +INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.11076951026916504 s +INFO 06-24 20:10:53 [manager.py:620] left req id 8can release False refcount 3 +DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=80566298018534670368675466917469600892, time:1750767053.2463005s req_ids:[8] +DEBUG 06-24 20:10:53 [manager.py:391] +DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:205.5346965789795ms total_cost_time:205.59954643249512ms,out_token_counter:1 mean_per_token_cost_time: 0.064849853515625ms prompt_token_num:7333 prompt_cache_len:5151 prompt_cache_ratio:0.7024410200463658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 +DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10708403587341309 s +INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.1089775562286377 s +DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=317839866243944364860789863817722667801, time:1750767053.4598207s req_ids:[8] +DEBUG 06-24 20:10:53 [manager.py:391] +ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:208.9989185333252ms total_cost_time:209.04207229614258ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7334 prompt_cache_len:5151 prompt_cache_ratio:0.7023452413416962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 +DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10699081420898438 s +INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.10904383659362793 s +DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=151879409406905662584552354619321757026, time:1750767053.6736982s req_ids:[8] +DEBUG 06-24 20:10:53 [manager.py:391] +ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:221.76122665405273ms total_cost_time:221.81987762451172ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7335 prompt_cache_len:5151 prompt_cache_ratio:0.7022494887525562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 +DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.1093897819519043 s +INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.11072707176208496 s +DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=57716537177508755309603139784448212464, time:1750767053.9091198s req_ids:[8] +DEBUG 06-24 20:10:53 [manager.py:391] +ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:218.83416175842285ms total_cost_time:218.87993812561035ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7336 prompt_cache_len:5151 prompt_cache_ratio:0.7021537622682661 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 +DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s +INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.11016392707824707 s +DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=131112938989285941164537059457064845759, time:1750767054.1251578s req_ids:[8] +DEBUG 06-24 20:10:54 [manager.py:391] +ERROR 06-24 20:10:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:209.9928855895996ms total_cost_time:210.0374698638916ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7337 prompt_cache_len:5151 prompt_cache_ratio:0.7020580618781518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 +DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.10692691802978516 s +INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.1094052791595459 s +DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=212613693472918364232639730004469037145, time:1750767054.3402786s req_ids:[8] +DEBUG 06-24 20:10:54 [manager.py:391] +ERROR 06-24 20:10:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:378.12018394470215ms total_cost_time:378.16357612609863ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7338 prompt_cache_len:5151 prompt_cache_ratio:0.7019623875715454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 +DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.10837459564208984 s +INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.11049795150756836 s +DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=1179205756127142338216323444751613379, time:1750767054.7193651s req_ids:[8] +DEBUG 06-24 20:10:54 [manager.py:391] +ERROR 06-24 20:10:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:202.06236839294434ms total_cost_time:202.10623741149902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7339 prompt_cache_len:5151 prompt_cache_ratio:0.7018667393377844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 +DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.10769081115722656 s +INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.10984110832214355 s +DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=71641422852253229981606777297116572436, time:1750767054.9300795s req_ids:[8] +DEBUG 06-24 20:10:54 [manager.py:391] +ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:205.78813552856445ms total_cost_time:205.84583282470703ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:7340 prompt_cache_len:5151 prompt_cache_ratio:0.7017711171662125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 +DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10889339447021484 s +INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11090278625488281 s +DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=201054428637749086976384757490202543929, time:1750767055.1429398s req_ids:[8] +DEBUG 06-24 20:10:55 [manager.py:391] +ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.05213737487793ms total_cost_time:205.0952911376953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7341 prompt_cache_len:5151 prompt_cache_ratio:0.701675521046179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 +DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10735297203063965 s +INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.10945558547973633 s +DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=136333656841176072696256853514154589337, time:1750767055.3535612s req_ids:[8] +DEBUG 06-24 20:10:55 [manager.py:391] +ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.2309513092041ms total_cost_time:205.28507232666016ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:7342 prompt_cache_len:5151 prompt_cache_ratio:0.701579950967039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 +DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s +INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11039948463439941 s +DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=158370428512695886902722263078567266670, time:1750767055.5645573s req_ids:[8] +DEBUG 06-24 20:10:55 [manager.py:391] +ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.6751251220703ms total_cost_time:205.71613311767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:7343 prompt_cache_len:5151 prompt_cache_ratio:0.7014844069181534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 +DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10776782035827637 s +INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11047887802124023 s +DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=202473020901166999540914998573824548304, time:1750767055.776688s req_ids:[8] +DEBUG 06-24 20:10:55 [manager.py:391] +ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:208.9860439300537ms total_cost_time:209.03873443603516ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7344 prompt_cache_len:5151 prompt_cache_ratio:0.7013888888888888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 +DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.1083371639251709 s +INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11045527458190918 s +DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=210499403461884205756609329565684921909, time:1750767055.9909363s req_ids:[8] +DEBUG 06-24 20:10:55 [manager.py:391] +ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.78861236572266ms total_cost_time:205.83295822143555ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7345 prompt_cache_len:5151 prompt_cache_ratio:0.7012933968686181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 +DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:56 [manager.py:224] router recive req id 8 cost time 0.10727882385253906 s +INFO 06-24 20:10:56 [manager.py:68] detokenization recv req id 8 cost time 0.10935759544372559 s +DEBUG 06-24 20:10:56 [manager.py:391] Prefill Batch: batch_id=214992066548604897783592603758629816394, time:1750767056.2052305s req_ids:[8] +DEBUG 06-24 20:10:56 [manager.py:391] +ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:209.97166633605957ms total_cost_time:210.01577377319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7346 prompt_cache_len:5151 prompt_cache_ratio:0.7011979308467193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 +DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:56 [manager.py:224] router recive req id 8 cost time 0.10955357551574707 s +INFO 06-24 20:10:56 [manager.py:68] detokenization recv req id 8 cost time 0.11095857620239258 s +DEBUG 06-24 20:10:56 [manager.py:391] Prefill Batch: batch_id=252496551072470857929874306331235812320, time:1750767056.4200165s req_ids:[8] +DEBUG 06-24 20:10:56 [manager.py:391] +ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:207.57436752319336ms total_cost_time:207.61775970458984ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7347 prompt_cache_len:5151 prompt_cache_ratio:0.7011024908125766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 +DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:56 [manager.py:224] router recive req id 8 cost time 0.1080775260925293 s +INFO 06-24 20:10:56 [manager.py:68] detokenization recv req id 8 cost time 0.11006593704223633 s +DEBUG 06-24 20:10:56 [manager.py:391] Prefill Batch: batch_id=83842210521168743064324428769028079487, time:1750767056.631914s req_ids:[8] +DEBUG 06-24 20:10:56 [manager.py:391] +ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:382.60769844055176ms total_cost_time:382.65275955200195ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7348 prompt_cache_len:5151 prompt_cache_ratio:0.7010070767555797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 +DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s +INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10955929756164551 s +DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=128429492537027966226609494702577465483, time:1750767057.0157988s req_ids:[8] +DEBUG 06-24 20:10:57 [manager.py:391] +DEBUG 06-24 20:10:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 31481.431 tokens/s +DEBUG 06-24 20:10:57 [stats.py:37] Avg prompt tokens throughput: 31472.941 tokens/s +DEBUG 06-24 20:10:57 [stats.py:37] Avg generate tokens throughput: 8.490 tokens/s +ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:200.7002830505371ms total_cost_time:200.7436752319336ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7349 prompt_cache_len:5151 prompt_cache_ratio:0.7009116886651245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 +DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10685420036315918 s +INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10940146446228027 s +DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=288418038020713596785352609430658875254, time:1750767057.2284973s req_ids:[8] +DEBUG 06-24 20:10:57 [manager.py:391] +ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:208.94718170166016ms total_cost_time:208.99200439453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7350 prompt_cache_len:5151 prompt_cache_ratio:0.7008163265306122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 +DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10704827308654785 s +INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10893797874450684 s +DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=148478776457784561871096967559142469882, time:1750767057.4491668s req_ids:[8] +DEBUG 06-24 20:10:57 [manager.py:391] +ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:216.23945236206055ms total_cost_time:216.28308296203613ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7351 prompt_cache_len:5151 prompt_cache_ratio:0.7007209903414502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 +DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10738039016723633 s +INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10949850082397461 s +DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=194783267836401790684942073979424805220, time:1750767057.6631773s req_ids:[8] +DEBUG 06-24 20:10:57 [manager.py:391] +ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:201.64895057678223ms total_cost_time:201.6913890838623ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7352 prompt_cache_len:5151 prompt_cache_ratio:0.7006256800870512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 +DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10724782943725586 s +INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10930109024047852 s +DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=321620402646718828637577748429759378866, time:1750767057.87115s req_ids:[8] +DEBUG 06-24 20:10:57 [manager.py:391] +ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:205.9316635131836ms total_cost_time:205.9946060180664ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:7353 prompt_cache_len:5151 prompt_cache_ratio:0.700530395756834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 +DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s +INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.10997700691223145 s +DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=1813075882591651542096087047651490230, time:1750767058.0976398s req_ids:[8] +DEBUG 06-24 20:10:58 [manager.py:391] +ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:221.12226486206055ms total_cost_time:221.16613388061523ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7354 prompt_cache_len:5151 prompt_cache_ratio:0.700435137340223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 +DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.11002707481384277 s +INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.11221837997436523 s +DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=331777170448266398232102119832378698544, time:1750767058.311498s req_ids:[8] +DEBUG 06-24 20:10:58 [manager.py:391] +ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:205.93929290771484ms total_cost_time:205.98554611206055ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7355 prompt_cache_len:5151 prompt_cache_ratio:0.7003399048266485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 +DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.10786080360412598 s +INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s +DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=103223605056089905235631302160333488042, time:1750767058.5231457s req_ids:[8] +DEBUG 06-24 20:10:58 [manager.py:391] +ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:203.97138595581055ms total_cost_time:204.02765274047852ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7356 prompt_cache_len:5151 prompt_cache_ratio:0.7002446982055465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 +DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.1081852912902832 s +INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.11012864112854004 s +DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=182632211483088519975840098218018721385, time:1750767058.7347765s req_ids:[8] +DEBUG 06-24 20:10:58 [manager.py:391] +ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:204.68759536743164ms total_cost_time:204.73241806030273ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7357 prompt_cache_len:5151 prompt_cache_ratio:0.7001495174663586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 +DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.10700583457946777 s +INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.10902738571166992 s +DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=216356244524370284264384879171389254477, time:1750767058.9457476s req_ids:[8] +DEBUG 06-24 20:10:58 [manager.py:391] +DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:219.1781997680664ms total_cost_time:219.2220687866211ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7358 prompt_cache_len:5151 prompt_cache_ratio:0.7000543625985323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 +DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:59 [manager.py:224] router recive req id 8 cost time 0.30957484245300293 s +INFO 06-24 20:10:59 [manager.py:68] detokenization recv req id 8 cost time 0.3116602897644043 s +DEBUG 06-24 20:10:59 [manager.py:391] Prefill Batch: batch_id=2264642607349805404401190042475125373, time:1750767059.3760958s req_ids:[8] +DEBUG 06-24 20:10:59 [manager.py:391] +ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:417.2382354736328ms total_cost_time:417.283296585083ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7359 prompt_cache_len:5151 prompt_cache_ratio:0.6999592335915206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 +DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:59 [manager.py:224] router recive req id 8 cost time 0.10847735404968262 s +INFO 06-24 20:10:59 [manager.py:68] detokenization recv req id 8 cost time 0.11086463928222656 s +DEBUG 06-24 20:10:59 [manager.py:391] Prefill Batch: batch_id=66519183249440458341463583743828010204, time:1750767059.592086s req_ids:[8] +DEBUG 06-24 20:10:59 [manager.py:391] +ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:206.94756507873535ms total_cost_time:206.99429512023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7360 prompt_cache_len:5151 prompt_cache_ratio:0.6998641304347826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 +DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:10:59 [manager.py:224] router recive req id 8 cost time 0.10973572731018066 s +INFO 06-24 20:10:59 [manager.py:68] detokenization recv req id 8 cost time 0.11189842224121094 s +DEBUG 06-24 20:10:59 [manager.py:391] Prefill Batch: batch_id=128010648796410410069244495630260560290, time:1750767059.8066742s req_ids:[8] +DEBUG 06-24 20:10:59 [manager.py:391] +ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:206.68530464172363ms total_cost_time:206.73155784606934ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7361 prompt_cache_len:5151 prompt_cache_ratio:0.6997690531177829 mtp_avg_token_per_step:1.0 +INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 +DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:10:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.1090555191040039 s +INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s +DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=73649524278685635958343730194231164109, time:1750767060.0386567s req_ids:[8] +DEBUG 06-24 20:11:00 [manager.py:391] +ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:225.81052780151367ms total_cost_time:225.86822509765625ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:7362 prompt_cache_len:5151 prompt_cache_ratio:0.6996740016299918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 +DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.11015439033508301 s +INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.11232709884643555 s +DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=83706735499495914136268620612917560818, time:1750767060.2527764s req_ids:[8] +DEBUG 06-24 20:11:00 [manager.py:391] +ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:213.3166790008545ms total_cost_time:213.3643627166748ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7363 prompt_cache_len:5151 prompt_cache_ratio:0.6995789759608855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 +DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.10936665534973145 s +INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.11142969131469727 s +DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=284050316983779356658319656710424368611, time:1750767060.4682941s req_ids:[8] +DEBUG 06-24 20:11:00 [manager.py:391] +DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:200.6206512451172ms total_cost_time:200.66237449645996ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7364 prompt_cache_len:5151 prompt_cache_ratio:0.6994839760999457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 +DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.10686254501342773 s +INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.10883450508117676 s +DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=19753685728978327083420076091198538540, time:1750767060.679862s req_ids:[8] +DEBUG 06-24 20:11:00 [manager.py:391] +ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:201.65491104125977ms total_cost_time:201.69782638549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7365 prompt_cache_len:5151 prompt_cache_ratio:0.6993890020366599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 +DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:00 [batch.py:51] router release req id 8 +INFO 06-24 20:11:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.10874629020690918 s +INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105797290802002 s +DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=49545670795472131664474338655634924967, time:1750767060.8987358s req_ids:[8] +DEBUG 06-24 20:11:00 [manager.py:391] +ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:220.58963775634766ms total_cost_time:220.63231468200684ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7366 prompt_cache_len:5151 prompt_cache_ratio:0.6992940537605213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 +DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10546112060546875 s +INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.10737824440002441 s +DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=227299348526851670516411576296245009630, time:1750767061.1127481s req_ids:[8] +DEBUG 06-24 20:11:01 [manager.py:391] +ERROR 06-24 20:11:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:204.18882369995117ms total_cost_time:204.23364639282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7367 prompt_cache_len:5151 prompt_cache_ratio:0.699199131261029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 +DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10712885856628418 s +INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.10918450355529785 s +DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=294904393198683879510940222567556944702, time:1750767061.3243184s req_ids:[8] +DEBUG 06-24 20:11:01 [manager.py:391] +ERROR 06-24 20:11:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 first_token_cost:203.16743850708008ms total_cost_time:203.21011543273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7368 prompt_cache_len:5151 prompt_cache_ratio:0.6991042345276873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 +DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10910272598266602 s +INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.11115813255310059 s +DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=329111139101046428596288774370877026745, time:1750767061.5334024s req_ids:[8] +DEBUG 06-24 20:11:01 [manager.py:391] +ERROR 06-24 20:11:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 first_token_cost:383.50439071655273ms total_cost_time:383.54945182800293ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7369 prompt_cache_len:5151 prompt_cache_ratio:0.6990093635500068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 +DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10748457908630371 s +INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.10957789421081543 s +DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=55895185674284968159937508345713491013, time:1750767061.918857s req_ids:[8] +DEBUG 06-24 20:11:01 [manager.py:391] +ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 first_token_cost:201.11513137817383ms total_cost_time:201.16376876831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:7370 prompt_cache_len:5151 prompt_cache_ratio:0.6989145183175034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 +DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.11032509803771973 s +INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.11241269111633301 s +DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=82191193730356992513365928255088525865, time:1750767062.130072s req_ids:[8] +DEBUG 06-24 20:11:02 [manager.py:391] +ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:201.37691497802734ms total_cost_time:201.4174461364746ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:7371 prompt_cache_len:5151 prompt_cache_ratio:0.6988196988196989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 +DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10887813568115234 s +INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.11119318008422852 s +DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=138765018916252575226916705138083812411, time:1750767062.349686s req_ids:[8] +DEBUG 06-24 20:11:02 [manager.py:391] +ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:218.44983100891113ms total_cost_time:218.49322319030762ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7372 prompt_cache_len:5151 prompt_cache_ratio:0.6987249050461205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 +DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10881543159484863 s +INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.11093473434448242 s +DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=19661280138114560118248711856672004970, time:1750767062.5639071s req_ids:[8] +DEBUG 06-24 20:11:02 [manager.py:391] +ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:226.96638107299805ms total_cost_time:227.01311111450195ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7373 prompt_cache_len:5151 prompt_cache_ratio:0.6986301369863014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 +DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10653948783874512 s +INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.10851645469665527 s +DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=152400839019831143599401829167042997652, time:1750767062.790551s req_ids:[8] +DEBUG 06-24 20:11:02 [manager.py:391] +ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:202.80098915100098ms total_cost_time:202.84438133239746ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7374 prompt_cache_len:5151 prompt_cache_ratio:0.6985353946297803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 +DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10629916191101074 s +INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.10836982727050781 s +DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=111804252221161114773714917822451462088, time:1750767063.014768s req_ids:[8] +DEBUG 06-24 20:11:03 [manager.py:391] +ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:210.43944358825684ms total_cost_time:210.45780181884766ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:7375 prompt_cache_len:5151 prompt_cache_ratio:0.6984406779661017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 +DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.10505843162536621 s +INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.10683917999267578 s +DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=4638162854259765252694369501378054258, time:1750767063.2233026s req_ids:[8] +DEBUG 06-24 20:11:03 [manager.py:391] +ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:191.82348251342773ms total_cost_time:191.87045097351074ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7376 prompt_cache_len:5151 prompt_cache_ratio:0.6983459869848156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 +DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.1076042652130127 s +INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.10959005355834961 s +DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=317523030494298247547354973342494453228, time:1750767063.4166574s req_ids:[8] +DEBUG 06-24 20:11:03 [manager.py:391] +ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:205.20472526550293ms total_cost_time:205.26385307312012ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7377 prompt_cache_len:5151 prompt_cache_ratio:0.6982513216754779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 +DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.10801386833190918 s +INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.11006474494934082 s +DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=324472046932402268415492465854863902769, time:1750767063.6267533s req_ids:[8] +DEBUG 06-24 20:11:03 [manager.py:391] +ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:196.43759727478027ms total_cost_time:196.47932052612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7378 prompt_cache_len:5151 prompt_cache_ratio:0.6981566820276498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 +DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.10714912414550781 s +INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s +DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=93925838868649264423400233645302644116, time:1750767063.8278341s req_ids:[8] +DEBUG 06-24 20:11:03 [manager.py:391] +ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:359.33876037597656ms total_cost_time:359.38334465026855ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7379 prompt_cache_len:5151 prompt_cache_ratio:0.6980620680308985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 +DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10827159881591797 s +INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.11037421226501465 s +DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=291168360001639087648506427455969920477, time:1750767064.1908667s req_ids:[8] +DEBUG 06-24 20:11:04 [manager.py:391] +ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:202.8830051422119ms total_cost_time:202.927827835083ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7380 prompt_cache_len:5151 prompt_cache_ratio:0.6979674796747968 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 +DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:04 [batch.py:51] router release req id 8 +INFO 06-24 20:11:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10750436782836914 s +INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.10943198204040527 s +DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=87886597218279036644459730700986809497, time:1750767064.410748s req_ids:[8] +DEBUG 06-24 20:11:04 [manager.py:391] +ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:214.57195281982422ms total_cost_time:214.6139144897461ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7381 prompt_cache_len:5151 prompt_cache_ratio:0.697872916948923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 +DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10829567909240723 s +INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.1107473373413086 s +DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=3799367668225916929373851165032803228, time:1750767064.6259s req_ids:[8] +DEBUG 06-24 20:11:04 [manager.py:391] +ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:206.11572265625ms total_cost_time:206.1593532562256ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7382 prompt_cache_len:5151 prompt_cache_ratio:0.6977783798428611 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 +DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10827946662902832 s +INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.11044955253601074 s +DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=111627068176744698271439517244740906258, time:1750767064.8386378s req_ids:[8] +DEBUG 06-24 20:11:04 [manager.py:391] +ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:203.77326011657715ms total_cost_time:203.81593704223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7383 prompt_cache_len:5151 prompt_cache_ratio:0.6976838683462008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 +DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.1067655086517334 s +INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.10935497283935547 s +DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=254904684821976854590610823142432295856, time:1750767065.047482s req_ids:[8] +DEBUG 06-24 20:11:05 [manager.py:391] +ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:204.5764923095703ms total_cost_time:204.6217918395996ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7384 prompt_cache_len:5151 prompt_cache_ratio:0.6975893824485374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 +DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.10688376426696777 s +INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.1089639663696289 s +DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=59771303662292638734836263390182195397, time:1750767065.2616735s req_ids:[8] +DEBUG 06-24 20:11:05 [manager.py:391] +ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:208.573579788208ms total_cost_time:208.6317539215088ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:7385 prompt_cache_len:5151 prompt_cache_ratio:0.697494922139472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 +DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.1101675033569336 s +INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.11226344108581543 s +DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=16595835252883497813491293212124451774, time:1750767065.4747312s req_ids:[8] +DEBUG 06-24 20:11:05 [manager.py:391] +ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:207.62133598327637ms total_cost_time:207.66568183898926ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7386 prompt_cache_len:5151 prompt_cache_ratio:0.6974004874086109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 +DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.10902690887451172 s +INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.11122322082519531 s +DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=157185814312803514271869536270425242219, time:1750767065.6896033s req_ids:[8] +DEBUG 06-24 20:11:05 [manager.py:391] +ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:220.05009651184082ms total_cost_time:220.0925350189209ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7387 prompt_cache_len:5151 prompt_cache_ratio:0.6973060782455666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 +DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.10790824890136719 s +INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.11027979850769043 s +DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=249767058022291023164933967361379440630, time:1750767065.9313347s req_ids:[8] +DEBUG 06-24 20:11:05 [manager.py:391] +ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:224.26986694335938ms total_cost_time:224.33018684387207ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:7388 prompt_cache_len:5151 prompt_cache_ratio:0.6972116946399567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 +DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.10842061042785645 s +INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.11052155494689941 s +DEBUG 06-24 20:11:06 [manager.py:391] Prefill Batch: batch_id=72988453671020820833492621132462256626, time:1750767066.151361s req_ids:[8] +DEBUG 06-24 20:11:06 [manager.py:391] +DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:221.15564346313477ms total_cost_time:221.20070457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7389 prompt_cache_len:5151 prompt_cache_ratio:0.6971173365814048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 +DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.30930161476135254 s +INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.31130504608154297 s +DEBUG 06-24 20:11:06 [manager.py:391] Prefill Batch: batch_id=68609562879362198066827347485825263785, time:1750767066.5766842s req_ids:[8] +DEBUG 06-24 20:11:06 [manager.py:391] +ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:414.3202304840088ms total_cost_time:414.3640995025635ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7390 prompt_cache_len:5151 prompt_cache_ratio:0.6970230040595399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 +DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.10757780075073242 s +INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.10947847366333008 s +DEBUG 06-24 20:11:06 [manager.py:391] Prefill Batch: batch_id=333201663625202988012110722551252224850, time:1750767066.7924585s req_ids:[8] +DEBUG 06-24 20:11:06 [manager.py:391] +ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:204.40268516540527ms total_cost_time:204.44655418395996ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7391 prompt_cache_len:5151 prompt_cache_ratio:0.6969286970639967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 +DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.10867071151733398 s +INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.11066508293151855 s +DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=214718624767574882782097065110452616956, time:1750767067.003334s req_ids:[8] +DEBUG 06-24 20:11:07 [manager.py:391] +ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:11:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 31479.013 tokens/s +DEBUG 06-24 20:11:07 [stats.py:37] Avg prompt tokens throughput: 31470.374 tokens/s +DEBUG 06-24 20:11:07 [stats.py:37] Avg generate tokens throughput: 8.638 tokens/s +INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:205.96885681152344ms total_cost_time:206.02750778198242ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7392 prompt_cache_len:5151 prompt_cache_ratio:0.6968344155844156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 +DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.11130332946777344 s +INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11334538459777832 s +DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=181972095624183535281932648711790474471, time:1750767067.216142s req_ids:[8] +DEBUG 06-24 20:11:07 [manager.py:391] +ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:215.81101417541504ms total_cost_time:215.8682346343994ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7393 prompt_cache_len:5151 prompt_cache_ratio:0.6967401596104423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 +DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.11033225059509277 s +INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11220502853393555 s +DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=199431724135214611544607853810347254679, time:1750767067.4325259s req_ids:[8] +DEBUG 06-24 20:11:07 [manager.py:391] +ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:156.98814392089844ms total_cost_time:157.0439338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:7394 prompt_cache_len:5151 prompt_cache_ratio:0.6966459291317284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 +DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.10992789268493652 s +INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11201977729797363 s +DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=209834024425992269867616880824656120165, time:1750767067.5966449s req_ids:[8] +DEBUG 06-24 20:11:07 [manager.py:391] +DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:218.57547760009766ms total_cost_time:218.6279296875ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:7395 prompt_cache_len:5151 prompt_cache_ratio:0.696551724137931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 +DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.10839295387268066 s +INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11030125617980957 s +DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=180742489393682375246702233078365593616, time:1750767067.8252866s req_ids:[8] +DEBUG 06-24 20:11:07 [manager.py:391] +ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:198.90308380126953ms total_cost_time:198.94671440124512ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7396 prompt_cache_len:5151 prompt_cache_ratio:0.6964575446187128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 +DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.10776162147521973 s +INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.10971236228942871 s +DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=124965555470040271358904910163908282243, time:1750767068.0312061s req_ids:[8] +DEBUG 06-24 20:11:08 [manager.py:391] +ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:204.24413681030273ms total_cost_time:204.29682731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7397 prompt_cache_len:5151 prompt_cache_ratio:0.6963633905637421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 +DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.10903739929199219 s +INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.11103367805480957 s +DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=93154533368300523820641178779356754311, time:1750767068.2396228s req_ids:[8] +DEBUG 06-24 20:11:08 [manager.py:391] +ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:201.99966430664062ms total_cost_time:202.0413875579834ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7398 prompt_cache_len:5151 prompt_cache_ratio:0.6962692619626926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 +DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.10927486419677734 s +INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.11123466491699219 s +DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=129381620587932354756920402419366880146, time:1750767068.4518588s req_ids:[8] +DEBUG 06-24 20:11:08 [manager.py:391] +ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:218.37139129638672ms total_cost_time:218.4302806854248ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7399 prompt_cache_len:5151 prompt_cache_ratio:0.6961751588052439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 +DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.11020779609680176 s +INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.1122291088104248 s +DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=236683490441226378863854356949677232959, time:1750767068.6766586s req_ids:[8] +DEBUG 06-24 20:11:08 [manager.py:391] +ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:378.2684803009033ms total_cost_time:378.3297538757324ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:7400 prompt_cache_len:5151 prompt_cache_ratio:0.6960810810810811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 +DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10901474952697754 s +INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.11107969284057617 s +DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=267939142040932546910369753345547863350, time:1750767069.056185s req_ids:[8] +DEBUG 06-24 20:11:09 [manager.py:391] +ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:206.70175552368164ms total_cost_time:206.74514770507812ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7401 prompt_cache_len:5151 prompt_cache_ratio:0.6959870287798946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 +DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10707855224609375 s +INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10902571678161621 s +DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=20764349457028876236182632729912366716, time:1750767069.2684903s req_ids:[8] +DEBUG 06-24 20:11:09 [manager.py:391] +ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:196.47812843322754ms total_cost_time:196.52104377746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7402 prompt_cache_len:5151 prompt_cache_ratio:0.6958930018913807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 +DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10776662826538086 s +INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10979747772216797 s +DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=171483059124038557330035222879340141391, time:1750767069.4743934s req_ids:[8] +DEBUG 06-24 20:11:09 [manager.py:391] +ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:203.54223251342773ms total_cost_time:203.58538627624512ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7403 prompt_cache_len:5151 prompt_cache_ratio:0.6957990004052411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 +DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10772895812988281 s +INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10973620414733887 s +DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=101431306433446687075158170020865578733, time:1750767069.6884556s req_ids:[8] +DEBUG 06-24 20:11:09 [manager.py:391] +ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:209.35964584350586ms total_cost_time:209.40232276916504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7404 prompt_cache_len:5151 prompt_cache_ratio:0.6957050243111832 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 +DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10604548454284668 s +INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10797524452209473 s +DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=152705021199227741693674680543932485769, time:1750767069.90363s req_ids:[8] +DEBUG 06-24 20:11:09 [manager.py:391] +ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:212.05759048461914ms total_cost_time:212.10241317749023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7405 prompt_cache_len:5151 prompt_cache_ratio:0.6956110735989196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 +DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10819077491760254 s +INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.11017942428588867 s +DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=214049058359539040459960797503223730991, time:1750767070.1380618s req_ids:[8] +DEBUG 06-24 20:11:10 [manager.py:391] +ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:215.23141860961914ms total_cost_time:215.27624130249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7406 prompt_cache_len:5151 prompt_cache_ratio:0.695517148258169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 +DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10821723937988281 s +INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.11037039756774902 s +DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=292120518234624111530100389802582502124, time:1750767070.3432667s req_ids:[8] +DEBUG 06-24 20:11:10 [manager.py:391] +ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:211.05694770812988ms total_cost_time:211.09962463378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7407 prompt_cache_len:5151 prompt_cache_ratio:0.6954232482786553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 +DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.11038351058959961 s +INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.11192965507507324 s +DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=174630318551087950671781791321461696608, time:1750767070.5567055s req_ids:[8] +DEBUG 06-24 20:11:10 [manager.py:391] +ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:197.75724411010742ms total_cost_time:197.84116744995117ms,out_token_counter:1 mean_per_token_cost_time: 0.08392333984375ms prompt_token_num:7408 prompt_cache_len:5151 prompt_cache_ratio:0.6953293736501079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 +DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10664963722229004 s +INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.1079397201538086 s +DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=114849242951004337205286534813722412650, time:1750767070.776647s req_ids:[8] +DEBUG 06-24 20:11:10 [manager.py:391] +ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:220.4306125640869ms total_cost_time:220.47686576843262ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7409 prompt_cache_len:5151 prompt_cache_ratio:0.6952355243622621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 +DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10818648338317871 s +INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940980911254883 s +DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=26131201015284449413325560935713121364, time:1750767070.9910412s req_ids:[8] +DEBUG 06-24 20:11:10 [manager.py:391] +ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:372.9691505432129ms total_cost_time:372.9894161224365ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:7410 prompt_cache_len:5151 prompt_cache_ratio:0.6951417004048583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 +DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.10274195671081543 s +INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.10380315780639648 s +DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=97925376814572714015306892286008678909, time:1750767071.3679955s req_ids:[8] +DEBUG 06-24 20:11:11 [manager.py:391] +ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:162.0182991027832ms total_cost_time:162.03832626342773ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:7411 prompt_cache_len:5151 prompt_cache_ratio:0.6950479017676426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 +DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.10619020462036133 s +INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.10744142532348633 s +DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=184130130615286016847116781770405684457, time:1750767071.5327094s req_ids:[8] +DEBUG 06-24 20:11:11 [manager.py:391] +ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:192.09837913513184ms total_cost_time:192.14224815368652ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7412 prompt_cache_len:5151 prompt_cache_ratio:0.694954128440367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 +DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.11013078689575195 s +INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.11205625534057617 s +DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=52809970427344754986774336602757269588, time:1750767071.7294004s req_ids:[8] +DEBUG 06-24 20:11:11 [manager.py:391] +ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:208.51850509643555ms total_cost_time:208.56499671936035ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7413 prompt_cache_len:5151 prompt_cache_ratio:0.6948603804127883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 +DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.1053769588470459 s +INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.1073904037475586 s +DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=34179166253060384155157933142749296764, time:1750767071.9433348s req_ids:[8] +DEBUG 06-24 20:11:11 [manager.py:391] +ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:201.90191268920898ms total_cost_time:201.94625854492188ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7414 prompt_cache_len:5151 prompt_cache_ratio:0.6947666576746695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 +DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10776591300964355 s +INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.10968875885009766 s +DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=157556507505558106713233822930765687305, time:1750767072.1567602s req_ids:[8] +DEBUG 06-24 20:11:12 [manager.py:391] +ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:207.85236358642578ms total_cost_time:207.89670944213867ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7415 prompt_cache_len:5151 prompt_cache_ratio:0.6946729602157788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 +DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s +INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s +DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=226310388708464242968343142667638750119, time:1750767072.3669736s req_ids:[8] +DEBUG 06-24 20:11:12 [manager.py:391] +ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:226.70555114746094ms total_cost_time:226.74989700317383ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7416 prompt_cache_len:5151 prompt_cache_ratio:0.69457928802589 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 +DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10812592506408691 s +INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.1101694107055664 s +DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=226088692224405038323400085663291837562, time:1750767072.593881s req_ids:[8] +DEBUG 06-24 20:11:12 [manager.py:391] +ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:196.3212490081787ms total_cost_time:196.36249542236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:7417 prompt_cache_len:5151 prompt_cache_ratio:0.6944856410947823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 +DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10728883743286133 s +INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.10930466651916504 s +DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=286384624214673506357911175950543719865, time:1750767072.7984557s req_ids:[8] +DEBUG 06-24 20:11:12 [manager.py:391] +ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:207.95679092407227ms total_cost_time:207.99970626831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7418 prompt_cache_len:5151 prompt_cache_ratio:0.6943920194122405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 +DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.10766029357910156 s +INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.10934829711914062 s +DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=84916617351802788885427210951537737186, time:1750767073.0118542s req_ids:[8] +DEBUG 06-24 20:11:13 [manager.py:391] +ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:162.6584529876709ms total_cost_time:162.7035140991211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7419 prompt_cache_len:5151 prompt_cache_ratio:0.694298422968055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 +DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.10761117935180664 s +INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.10947656631469727 s +DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=87141610058439750630199920144190091425, time:1750767073.183684s req_ids:[8] +DEBUG 06-24 20:11:13 [manager.py:391] +DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:200.14405250549316ms total_cost_time:200.18863677978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7420 prompt_cache_len:5151 prompt_cache_ratio:0.6942048517520215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 +DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.30913734436035156 s +INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.3111867904663086 s +DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=326211178319112308269114931908100826260, time:1750767073.596389s req_ids:[8] +DEBUG 06-24 20:11:13 [manager.py:391] +ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:438.3435249328613ms total_cost_time:438.3866786956787ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7421 prompt_cache_len:5151 prompt_cache_ratio:0.6941113057539415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 +DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.10975146293640137 s +INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.11178946495056152 s +DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=51661140725856637525718398489677062871, time:1750767073.830754s req_ids:[8] +DEBUG 06-24 20:11:13 [manager.py:391] +ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:198.30679893493652ms total_cost_time:198.3499526977539ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7422 prompt_cache_len:5151 prompt_cache_ratio:0.6940177849636217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 +DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10762214660644531 s +INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10967612266540527 s +DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=337922778637490160285430918550413108406, time:1750767074.0506709s req_ids:[8] +DEBUG 06-24 20:11:14 [manager.py:391] +ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:222.5050926208496ms total_cost_time:222.5501537322998ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7423 prompt_cache_len:5151 prompt_cache_ratio:0.6939242893708744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 +DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10769844055175781 s +INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10961508750915527 s +DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=76539053179047477132070559233635810338, time:1750767074.2663903s req_ids:[8] +DEBUG 06-24 20:11:14 [manager.py:391] +ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:214.41149711608887ms total_cost_time:214.45417404174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7424 prompt_cache_len:5151 prompt_cache_ratio:0.6938308189655172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 +DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10786223411560059 s +INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10962533950805664 s +DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=180680170160272296930782298953702396896, time:1750767074.4827452s req_ids:[8] +DEBUG 06-24 20:11:14 [manager.py:391] +ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:199.28836822509766ms total_cost_time:199.33056831359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7425 prompt_cache_len:5151 prompt_cache_ratio:0.6937373737373738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 +DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10782170295715332 s +INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10989975929260254 s +DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=245509376800608299763815822673360042683, time:1750767074.6889277s req_ids:[8] +DEBUG 06-24 20:11:14 [manager.py:391] +DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:198.66704940795898ms total_cost_time:198.68803024291992ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:7426 prompt_cache_len:5151 prompt_cache_ratio:0.6936439536762725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 +DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10653877258300781 s +INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10848116874694824 s +DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=277915457632603868987146929440189743098, time:1750767074.8918285s req_ids:[8] +DEBUG 06-24 20:11:14 [manager.py:391] +ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:225.98743438720703ms total_cost_time:226.043701171875ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7427 prompt_cache_len:5151 prompt_cache_ratio:0.693550558772048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 +DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.1076657772064209 s +INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.10950064659118652 s +DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=25926950429729095360007155456661765313, time:1750767075.1248958s req_ids:[8] +DEBUG 06-24 20:11:15 [manager.py:391] +ERROR 06-24 20:11:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:200.58202743530273ms total_cost_time:200.63090324401855ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:7428 prompt_cache_len:5151 prompt_cache_ratio:0.6934571890145396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 +DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.1073753833770752 s +INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.10968637466430664 s +DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=67999856864234004939624225716167560111, time:1750767075.3329852s req_ids:[8] +DEBUG 06-24 20:11:15 [manager.py:391] +ERROR 06-24 20:11:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:206.77566528320312ms total_cost_time:206.82048797607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7429 prompt_cache_len:5151 prompt_cache_ratio:0.6933638443935927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 +DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.10675168037414551 s +INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.10875368118286133 s +DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=79460822365509253196476621537295212197, time:1750767075.5446875s req_ids:[8] +DEBUG 06-24 20:11:15 [manager.py:391] +ERROR 06-24 20:11:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:202.8963565826416ms total_cost_time:202.9573917388916ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:7430 prompt_cache_len:5151 prompt_cache_ratio:0.6932705248990578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 +DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.10844230651855469 s +INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s +DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=4503896045434052233063513679634790994, time:1750767075.7627897s req_ids:[8] +DEBUG 06-24 20:11:15 [manager.py:391] +ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:381.8814754486084ms total_cost_time:381.9265365600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7431 prompt_cache_len:5151 prompt_cache_ratio:0.6931772305207913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 +DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.1080014705657959 s +INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10982012748718262 s +DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=124455734227133753084207299854245542880, time:1750767076.1367433s req_ids:[8] +DEBUG 06-24 20:11:16 [manager.py:391] +ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:158.19287300109863ms total_cost_time:158.2345962524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7432 prompt_cache_len:5151 prompt_cache_ratio:0.6930839612486545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 +DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:16 [batch.py:51] router release req id 8 +DEBUG 06-24 20:11:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:16 [manager.py:283] +DEBUG 06-24 20:11:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:16 [manager.py:284] +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10798978805541992 s +INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099090576171875 s +DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=181111378110961650159993324375680167398, time:1750767076.298811s req_ids:[8] +DEBUG 06-24 20:11:16 [manager.py:391] +ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:194.3991184234619ms total_cost_time:194.4422721862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7433 prompt_cache_len:5151 prompt_cache_ratio:0.6929907170725145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 +DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:16 [batch.py:51] router release req id 8 +INFO 06-24 20:11:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10695552825927734 s +INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10882115364074707 s +DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=232659404386592625834311807803533803212, time:1750767076.503519s req_ids:[8] +DEBUG 06-24 20:11:16 [manager.py:391] +ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:202.82483100891113ms total_cost_time:202.86893844604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7434 prompt_cache_len:5151 prompt_cache_ratio:0.6928974979822438 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 +DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10710835456848145 s +INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10903692245483398 s +DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=329355241449413619017569938229929147761, time:1750767076.7153666s req_ids:[8] +DEBUG 06-24 20:11:16 [manager.py:391] +ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:206.4192295074463ms total_cost_time:206.46429061889648ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7435 prompt_cache_len:5151 prompt_cache_ratio:0.6928043039677202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 +DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10724806785583496 s +INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10914921760559082 s +DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=256184503709959002366324047395618549361, time:1750767076.9335728s req_ids:[8] +DEBUG 06-24 20:11:16 [manager.py:391] +ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:216.7816162109375ms total_cost_time:216.8259620666504ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7436 prompt_cache_len:5151 prompt_cache_ratio:0.6927111350188273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 +DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s +INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.11059260368347168 s +DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=4548978999315013399410810697814234927, time:1750767077.1575794s req_ids:[8] +DEBUG 06-24 20:11:17 [manager.py:391] +DEBUG 06-24 20:11:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 33143.256 tokens/s +DEBUG 06-24 20:11:17 [stats.py:37] Avg prompt tokens throughput: 33134.419 tokens/s +DEBUG 06-24 20:11:17 [stats.py:37] Avg generate tokens throughput: 8.838 tokens/s +ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:218.3389663696289ms total_cost_time:218.4011936187744ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:7437 prompt_cache_len:5151 prompt_cache_ratio:0.6926179911254539 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 +DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.1065680980682373 s +INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.10840845108032227 s +DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=73690730217841824382647300993480774425, time:1750767077.3747609s req_ids:[8] +DEBUG 06-24 20:11:17 [manager.py:391] +ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:169.7535514831543ms total_cost_time:169.7983741760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7438 prompt_cache_len:5151 prompt_cache_ratio:0.6925248722774939 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 +DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.10841512680053711 s +INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s +DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=55373161161021486853917574618699261695, time:1750767077.5481749s req_ids:[8] +DEBUG 06-24 20:11:17 [manager.py:391] +ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:201.6761302947998ms total_cost_time:201.7202377319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7439 prompt_cache_len:5151 prompt_cache_ratio:0.6924317784648474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 +DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.1088259220123291 s +INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.1107625961303711 s +DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=8834535322681766073089467998353454417, time:1750767077.755431s req_ids:[8] +DEBUG 06-24 20:11:17 [manager.py:391] +ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:205.50155639648438ms total_cost_time:205.56068420410156ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7440 prompt_cache_len:5151 prompt_cache_ratio:0.6923387096774194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 +DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.10922956466674805 s +INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.11108207702636719 s +DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=4084781113246519992814150865807414548, time:1750767077.9766252s req_ids:[8] +DEBUG 06-24 20:11:17 [manager.py:391] +ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:378.11851501464844ms total_cost_time:378.16357612609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7441 prompt_cache_len:5151 prompt_cache_ratio:0.6922456659051203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 +DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10913872718811035 s +INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.11162185668945312 s +DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=108525833346023201694568122436972465913, time:1750767078.344695s req_ids:[8] +DEBUG 06-24 20:11:18 [manager.py:391] +ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:192.08145141601562ms total_cost_time:192.1248435974121ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7442 prompt_cache_len:5151 prompt_cache_ratio:0.6921526471378662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 +DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:18 [batch.py:51] router release req id 8 +INFO 06-24 20:11:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10696196556091309 s +INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.10875058174133301 s +DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=99470848060880521869391212208684377943, time:1750767078.5464842s req_ids:[8] +DEBUG 06-24 20:11:18 [manager.py:391] +ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:166.85724258422852ms total_cost_time:166.8999195098877ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7443 prompt_cache_len:5151 prompt_cache_ratio:0.6920596533655784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 +DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10774803161621094 s +INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.10971832275390625 s +DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=130461255637545816791186424850565626532, time:1750767078.7157888s req_ids:[8] +DEBUG 06-24 20:11:18 [manager.py:391] +ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:198.8506317138672ms total_cost_time:198.89330863952637ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7444 prompt_cache_len:5151 prompt_cache_ratio:0.6919666845781838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 +DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10785436630249023 s +INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.10980916023254395 s +DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=162081000095136418946882237973577674770, time:1750767078.91929s req_ids:[8] +DEBUG 06-24 20:11:18 [manager.py:391] +ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:200.19936561584473ms total_cost_time:200.24418830871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7445 prompt_cache_len:5151 prompt_cache_ratio:0.6918737407656145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 +DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.10715079307556152 s +INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.10953211784362793 s +DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=162595257166887762818683052027363676628, time:1750767079.131464s req_ids:[8] +DEBUG 06-24 20:11:19 [manager.py:391] +ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:208.85205268859863ms total_cost_time:208.89616012573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7446 prompt_cache_len:5151 prompt_cache_ratio:0.6917808219178082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 +DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.10780525207519531 s +INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.10961461067199707 s +DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=253277512845888381906924330504823816849, time:1750767079.3446136s req_ids:[8] +DEBUG 06-24 20:11:19 [manager.py:391] +ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:205.20377159118652ms total_cost_time:205.2462100982666ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7447 prompt_cache_len:5151 prompt_cache_ratio:0.6916879280247079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 +DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.10796952247619629 s +INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.10999917984008789 s +DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=82380109372754230062951009038544942766, time:1750767079.5548475s req_ids:[8] +DEBUG 06-24 20:11:19 [manager.py:391] +ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:203.39035987854004ms total_cost_time:203.43470573425293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7448 prompt_cache_len:5151 prompt_cache_ratio:0.6915950590762621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 +DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.11012721061706543 s +INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.11200881004333496 s +DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=103645447359087133093166208600145588326, time:1750767079.7633677s req_ids:[8] +DEBUG 06-24 20:11:19 [manager.py:391] +ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:202.2995948791504ms total_cost_time:202.35228538513184ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7449 prompt_cache_len:5151 prompt_cache_ratio:0.6915022150624245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 +DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.11132621765136719 s +INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.11318039894104004 s +DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=243298994352140601102470509383565096354, time:1750767079.984716s req_ids:[8] +DEBUG 06-24 20:11:19 [manager.py:391] +ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:218.98913383483887ms total_cost_time:219.04754638671875ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:7450 prompt_cache_len:5151 prompt_cache_ratio:0.6914093959731543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 +DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:20 [manager.py:224] router recive req id 8 cost time 0.10958313941955566 s +INFO 06-24 20:11:20 [manager.py:68] detokenization recv req id 8 cost time 0.11211585998535156 s +DEBUG 06-24 20:11:20 [manager.py:391] Prefill Batch: batch_id=263186011479994774986520148202932534887, time:1750767080.1980124s req_ids:[8] +DEBUG 06-24 20:11:20 [manager.py:391] +DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:204.32257652282715ms total_cost_time:204.3769359588623ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:7451 prompt_cache_len:5151 prompt_cache_ratio:0.6913166017984164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 +DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:20 [manager.py:224] router recive req id 8 cost time 0.31166982650756836 s +INFO 06-24 20:11:20 [manager.py:68] detokenization recv req id 8 cost time 0.31423354148864746 s +DEBUG 06-24 20:11:20 [manager.py:391] Prefill Batch: batch_id=269404262892281471926627286984742575383, time:1750767080.6199296s req_ids:[8] +DEBUG 06-24 20:11:20 [manager.py:391] +ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:437.56890296936035ms total_cost_time:437.61181831359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7452 prompt_cache_len:5151 prompt_cache_ratio:0.6912238325281803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 +DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:20 [manager.py:224] router recive req id 8 cost time 0.10823607444763184 s +INFO 06-24 20:11:20 [manager.py:68] detokenization recv req id 8 cost time 0.11075544357299805 s +DEBUG 06-24 20:11:20 [manager.py:391] Prefill Batch: batch_id=326802644971231985589783645817220190968, time:1750767080.8522918s req_ids:[8] +DEBUG 06-24 20:11:20 [manager.py:391] +ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:202.67319679260254ms total_cost_time:202.71849632263184ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7453 prompt_cache_len:5151 prompt_cache_ratio:0.6911310881524219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 +DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.1074366569519043 s +INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s +DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=305387787074210830426694086908688281117, time:1750767081.062045s req_ids:[8] +DEBUG 06-24 20:11:21 [manager.py:391] +ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:206.90321922302246ms total_cost_time:206.94637298583984ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7454 prompt_cache_len:5151 prompt_cache_ratio:0.6910383686611216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 +DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10714578628540039 s +INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.10965657234191895 s +DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=335538953564640558639313983719977406366, time:1750767081.274611s req_ids:[8] +DEBUG 06-24 20:11:21 [manager.py:391] +ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:216.41969680786133ms total_cost_time:216.46404266357422ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7455 prompt_cache_len:5151 prompt_cache_ratio:0.6909456740442655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 +DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10731291770935059 s +INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.10965490341186523 s +DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=245071617843898293241549653545049277766, time:1750767081.4994216s req_ids:[8] +DEBUG 06-24 20:11:21 [manager.py:391] +ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:212.62454986572266ms total_cost_time:212.68010139465332ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:7456 prompt_cache_len:5151 prompt_cache_ratio:0.6908530042918455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 +DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10844779014587402 s +INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.1104135513305664 s +DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=124356320736153591286829794059769685363, time:1750767081.7180753s req_ids:[8] +DEBUG 06-24 20:11:21 [manager.py:391] +DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:212.80932426452637ms total_cost_time:212.85247802734375ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7457 prompt_cache_len:5151 prompt_cache_ratio:0.6907603593938582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 +DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10788798332214355 s +INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.1099703311920166 s +DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=144240155567352064500614094323357681162, time:1750767081.9312556s req_ids:[8] +DEBUG 06-24 20:11:21 [manager.py:391] +ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:208.04643630981445ms total_cost_time:208.09102058410645ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7458 prompt_cache_len:5151 prompt_cache_ratio:0.6906677393403057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 +DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10896611213684082 s +INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.11120104789733887 s +DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=232573094133731883191370021607807576027, time:1750767082.1584105s req_ids:[8] +DEBUG 06-24 20:11:22 [manager.py:391] +ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:221.5559482574463ms total_cost_time:221.60100936889648ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7459 prompt_cache_len:5151 prompt_cache_ratio:0.6905751441211959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 +DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10775876045227051 s +INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.109527587890625 s +DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=184912065706389910249888098198937428946, time:1750767082.3729267s req_ids:[8] +DEBUG 06-24 20:11:22 [manager.py:391] +ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:168.52879524230957ms total_cost_time:168.57028007507324ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7460 prompt_cache_len:5151 prompt_cache_ratio:0.6904825737265415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 +DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10914802551269531 s +INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.1110544204711914 s +DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=310569826816793906961358973129667899148, time:1750767082.5438545s req_ids:[8] +DEBUG 06-24 20:11:22 [manager.py:391] +ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:198.85492324829102ms total_cost_time:198.897123336792ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7461 prompt_cache_len:5151 prompt_cache_ratio:0.6903900281463611 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 +DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10770034790039062 s +INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.11043095588684082 s +DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=8757093422609606970826310645553977256, time:1750767082.7522476s req_ids:[8] +DEBUG 06-24 20:11:22 [manager.py:391] +ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:373.24976921081543ms total_cost_time:373.2926845550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7462 prompt_cache_len:5151 prompt_cache_ratio:0.6902975073706781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 +DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10822749137878418 s +INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.11038589477539062 s +DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=327421759691147094336038244002227703615, time:1750767083.1257067s req_ids:[8] +DEBUG 06-24 20:11:23 [manager.py:391] +INFO 06-24 20:11:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:11:23 [statics_utils.py:24] mean first cost: 230.6960552901547 ms +INFO 06-24 20:11:23 [statics_utils.py:24] mean per token cost: 0.09123260213915535 ms +ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:198.78888130187988ms total_cost_time:198.8520622253418ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:7463 prompt_cache_len:5151 prompt_cache_ratio:0.6902050113895216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 +DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10706543922424316 s +INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.10910439491271973 s +DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=257869851516972095649333889571118661386, time:1750767083.338935s req_ids:[8] +DEBUG 06-24 20:11:23 [manager.py:391] +ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:209.35893058776855ms total_cost_time:209.40327644348145ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7464 prompt_cache_len:5151 prompt_cache_ratio:0.690112540192926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 +DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10680937767028809 s +INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.10877466201782227 s +DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=210604851289719266499750429799966188411, time:1750767083.5520976s req_ids:[8] +DEBUG 06-24 20:11:23 [manager.py:391] +ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:206.77614212036133ms total_cost_time:206.817626953125ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7465 prompt_cache_len:5151 prompt_cache_ratio:0.690020093770931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 +DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10725927352905273 s +INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.1095116138458252 s +DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=137598415086722974131867197063478172252, time:1750767083.7657492s req_ids:[8] +DEBUG 06-24 20:11:23 [manager.py:391] +ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:211.5950584411621ms total_cost_time:211.6398811340332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7466 prompt_cache_len:5151 prompt_cache_ratio:0.6899276721135815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 +DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.1072840690612793 s +INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.10971522331237793 s +DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=308689576169670154108324346939187040182, time:1750767083.9808764s req_ids:[8] +DEBUG 06-24 20:11:23 [manager.py:391] +ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:207.10325241088867ms total_cost_time:207.14783668518066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7467 prompt_cache_len:5151 prompt_cache_ratio:0.6898352752109281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 +DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10664486885070801 s +INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.10896682739257812 s +DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=84787565096822400543265844336594142702, time:1750767084.1936512s req_ids:[8] +DEBUG 06-24 20:11:24 [manager.py:391] +ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:207.25226402282715ms total_cost_time:207.29732513427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7468 prompt_cache_len:5151 prompt_cache_ratio:0.6897429030530262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 +DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10811305046081543 s +INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.11062479019165039 s +DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=175643542312453904322308021175820148236, time:1750767084.4237344s req_ids:[8] +DEBUG 06-24 20:11:24 [manager.py:391] +ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:228.55281829833984ms total_cost_time:228.59668731689453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7469 prompt_cache_len:5151 prompt_cache_ratio:0.689650555629937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 +DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10729050636291504 s +INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.10970401763916016 s +DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=208989913259873464523209405562986098916, time:1750767084.641867s req_ids:[8] +DEBUG 06-24 20:11:24 [manager.py:391] +ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:207.2129249572754ms total_cost_time:207.2584629058838ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7470 prompt_cache_len:5151 prompt_cache_ratio:0.6895582329317269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 +DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10843729972839355 s +INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.1105043888092041 s +DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=188445056309105053618701648374319636945, time:1750767084.8526504s req_ids:[8] +DEBUG 06-24 20:11:24 [manager.py:391] +ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:201.68685913085938ms total_cost_time:201.73120498657227ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7471 prompt_cache_len:5151 prompt_cache_ratio:0.6894659349484674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 +DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10793828964233398 s +INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.10985016822814941 s +DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=99413025731883509549028864441292816538, time:1750767085.067927s req_ids:[8] +DEBUG 06-24 20:11:25 [manager.py:391] +ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:387.1722221374512ms total_cost_time:387.24756240844727ms,out_token_counter:1 mean_per_token_cost_time: 0.07534027099609375ms prompt_token_num:7472 prompt_cache_len:5151 prompt_cache_ratio:0.6893736616702355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 +DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10883045196533203 s +INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.11080384254455566 s +DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=324676176774033674555688877787615452218, time:1750767085.452931s req_ids:[8] +DEBUG 06-24 20:11:25 [manager.py:391] +ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:203.4132480621338ms total_cost_time:203.45711708068848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7473 prompt_cache_len:5151 prompt_cache_ratio:0.6892814130871137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 +DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10664939880371094 s +INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.10846543312072754 s +DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=334730216528391277831921803284621599088, time:1750767085.676555s req_ids:[8] +DEBUG 06-24 20:11:25 [manager.py:391] +ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:220.7179069519043ms total_cost_time:220.76177597045898ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7474 prompt_cache_len:5151 prompt_cache_ratio:0.6891891891891891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 +DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10716867446899414 s +INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.10887670516967773 s +DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=131079256920421875665600843456104989984, time:1750767085.8906987s req_ids:[8] +DEBUG 06-24 20:11:25 [manager.py:391] +ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:167.35219955444336ms total_cost_time:167.39654541015625ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7475 prompt_cache_len:5151 prompt_cache_ratio:0.6890969899665552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 +DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10800027847290039 s +INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.1100006103515625 s +DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=114162242014997088566202584070837452204, time:1750767086.061665s req_ids:[8] +DEBUG 06-24 20:11:26 [manager.py:391] +ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:194.67544555664062ms total_cost_time:194.71096992492676ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:7476 prompt_cache_len:5151 prompt_cache_ratio:0.6890048154093098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 +DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10736870765686035 s +INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.1092071533203125 s +DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=11936535065731273734170187497077689518, time:1750767086.261259s req_ids:[8] +DEBUG 06-24 20:11:26 [manager.py:391] +ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:167.16933250427246ms total_cost_time:167.21510887145996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7477 prompt_cache_len:5151 prompt_cache_ratio:0.6889126655075565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 +DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10825896263122559 s +INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.11023664474487305 s +DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=229544341479842231107019660074869882799, time:1750767086.4390116s req_ids:[8] +DEBUG 06-24 20:11:26 [manager.py:391] +ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:203.45664024353027ms total_cost_time:203.50027084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7478 prompt_cache_len:5151 prompt_cache_ratio:0.6888205402514042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 +DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10705971717834473 s +INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.1089634895324707 s +DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=123198301956496034737157668314164997966, time:1750767086.6444924s req_ids:[8] +DEBUG 06-24 20:11:26 [manager.py:391] +ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:206.09760284423828ms total_cost_time:206.14123344421387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7479 prompt_cache_len:5151 prompt_cache_ratio:0.6887284396309667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 +DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10989570617675781 s +INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.11193203926086426 s +DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=64542103636832748140191840346770710671, time:1750767086.8549757s req_ids:[8] +DEBUG 06-24 20:11:26 [manager.py:391] +ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:211.1523151397705ms total_cost_time:211.198091506958ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7480 prompt_cache_len:5151 prompt_cache_ratio:0.6886363636363636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 +DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10768365859985352 s +INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.10941195487976074 s +DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=82058919095171448801317716285573376702, time:1750767087.0699363s req_ids:[8] +DEBUG 06-24 20:11:27 [manager.py:391] +ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:162.54186630249023ms total_cost_time:162.58573532104492ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7481 prompt_cache_len:5151 prompt_cache_ratio:0.6885443122577195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 +DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10779142379760742 s +INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.10930180549621582 s +DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=118189453328384174911612043717663907831, time:1750767087.2380433s req_ids:[8] +DEBUG 06-24 20:11:27 [manager.py:391] +DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:11:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 33309.372 tokens/s +DEBUG 06-24 20:11:27 [stats.py:37] Avg prompt tokens throughput: 33300.445 tokens/s +DEBUG 06-24 20:11:27 [stats.py:37] Avg generate tokens throughput: 8.928 tokens/s +ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:198.31442832946777ms total_cost_time:198.35853576660156ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7482 prompt_cache_len:5151 prompt_cache_ratio:0.6884522854851644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 +DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10681772232055664 s +INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.1086418628692627 s +DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=295166475996341443049158862173550238938, time:1750767087.4505174s req_ids:[8] +DEBUG 06-24 20:11:27 [manager.py:391] +ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:372.89929389953613ms total_cost_time:372.94459342956543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7483 prompt_cache_len:5151 prompt_cache_ratio:0.6883602833088334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 +DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10837912559509277 s +INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s +DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=214533361182107069727169985622831811017, time:1750767087.8181543s req_ids:[8] +DEBUG 06-24 20:11:27 [manager.py:391] +ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:204.63275909423828ms total_cost_time:204.67782020568848ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7484 prompt_cache_len:5151 prompt_cache_ratio:0.6882683057188669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 +DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10764098167419434 s +INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s +DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=12773058526286456511775650609802154919, time:1750767088.040381s req_ids:[8] +DEBUG 06-24 20:11:28 [manager.py:391] +ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:217.33999252319336ms total_cost_time:217.38290786743164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7485 prompt_cache_len:5151 prompt_cache_ratio:0.6881763527054108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 +DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10837984085083008 s +INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.11039233207702637 s +DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=254281285714112820972224071806047020584, time:1750767088.2532353s req_ids:[8] +DEBUG 06-24 20:11:28 [manager.py:391] +ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:212.95976638793945ms total_cost_time:213.02056312561035ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:7486 prompt_cache_len:5151 prompt_cache_ratio:0.688084424258616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 +DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10738515853881836 s +INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.10944533348083496 s +DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=40799480694245296062838350882707044361, time:1750767088.4840286s req_ids:[8] +DEBUG 06-24 20:11:28 [manager.py:391] +ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:216.27068519592285ms total_cost_time:216.31360054016113ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7487 prompt_cache_len:5151 prompt_cache_ratio:0.687992520368639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 +DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s +INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.10977029800415039 s +DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=129384408894430325270825048918733684249, time:1750767088.6984239s req_ids:[8] +DEBUG 06-24 20:11:28 [manager.py:391] +ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:207.7648639678955ms total_cost_time:207.8094482421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7488 prompt_cache_len:5151 prompt_cache_ratio:0.6879006410256411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 +DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10870885848999023 s +INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.11074256896972656 s +DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=29604001605695794923323631120808993684, time:1750767088.932468s req_ids:[8] +DEBUG 06-24 20:11:28 [manager.py:391] +ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:232.32483863830566ms total_cost_time:232.36799240112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7489 prompt_cache_len:5151 prompt_cache_ratio:0.687808786219789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 +DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10772967338562012 s +INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.10995650291442871 s +DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=182831017166580633193818875579910703007, time:1750767089.1505642s req_ids:[8] +DEBUG 06-24 20:11:29 [manager.py:391] +ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:210.16335487365723ms total_cost_time:210.2067470550537ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7490 prompt_cache_len:5151 prompt_cache_ratio:0.687716955941255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 +DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10936236381530762 s +INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.11062192916870117 s +DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=15615732650749503656476020085573723261, time:1750767089.367472s req_ids:[8] +DEBUG 06-24 20:11:29 [manager.py:391] +ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:207.64398574829102ms total_cost_time:207.6883316040039ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7491 prompt_cache_len:5151 prompt_cache_ratio:0.6876251501802163 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 +DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10740160942077637 s +INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.10868215560913086 s +DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=318122953185650221205474782875383564674, time:1750767089.577351s req_ids:[8] +DEBUG 06-24 20:11:29 [manager.py:391] +ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:206.93302154541016ms total_cost_time:206.97712898254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7492 prompt_cache_len:5151 prompt_cache_ratio:0.6875333689268553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 +DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10736703872680664 s +INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.1088874340057373 s +DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=187166761824039047294628319431594801377, time:1750767089.7890775s req_ids:[8] +DEBUG 06-24 20:11:29 [manager.py:391] +ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:378.8154125213623ms total_cost_time:378.8430690765381ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:7493 prompt_cache_len:5151 prompt_cache_ratio:0.6874416121713599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 +DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.1045064926147461 s +INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.10646700859069824 s +DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=332405169542008112923217216009417027023, time:1750767090.170222s req_ids:[8] +DEBUG 06-24 20:11:30 [manager.py:391] +ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:203.68099212646484ms total_cost_time:203.7062644958496ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:7494 prompt_cache_len:5151 prompt_cache_ratio:0.6873498799039232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 +DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.1045072078704834 s +INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.1064760684967041 s +DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=303114048412922466176841591166455847461, time:1750767090.3816154s req_ids:[8] +DEBUG 06-24 20:11:30 [manager.py:391] +ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:206.03251457214355ms total_cost_time:206.05707168579102ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:7495 prompt_cache_len:5151 prompt_cache_ratio:0.6872581721147432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 +DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.10559296607971191 s +INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.1081080436706543 s +DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=26337156931803281927853578809734759500, time:1750767090.5929859s req_ids:[8] +DEBUG 06-24 20:11:30 [manager.py:391] +ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:210.1123332977295ms total_cost_time:210.13736724853516ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:7496 prompt_cache_len:5151 prompt_cache_ratio:0.6871664887940235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 +DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.10436773300170898 s +INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.10628604888916016 s +DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=288449249694049706828524221040679568578, time:1750767090.8113515s req_ids:[8] +DEBUG 06-24 20:11:30 [manager.py:391] +ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:213.66143226623535ms total_cost_time:213.68646621704102ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:7497 prompt_cache_len:5151 prompt_cache_ratio:0.6870748299319728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 +DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.1052398681640625 s +INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.10728645324707031 s +DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=148742170778064327355130550573034594329, time:1750767091.0237875s req_ids:[8] +DEBUG 06-24 20:11:31 [manager.py:391] +ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:201.77960395812988ms total_cost_time:201.80583000183105ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7498 prompt_cache_len:5151 prompt_cache_ratio:0.686983195518805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 +DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10531330108642578 s +INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.1078486442565918 s +DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=106859183797288018885583155067256657616, time:1750767091.2316837s req_ids:[8] +DEBUG 06-24 20:11:31 [manager.py:391] +ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:210.30235290527344ms total_cost_time:210.33334732055664ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:7499 prompt_cache_len:5151 prompt_cache_ratio:0.6868915855447393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 +DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10526418685913086 s +INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.10725760459899902 s +DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=49529748292441829881381191867094834059, time:1750767091.4458873s req_ids:[8] +DEBUG 06-24 20:11:31 [manager.py:391] +ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:204.73289489746094ms total_cost_time:204.7586441040039ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:7500 prompt_cache_len:5151 prompt_cache_ratio:0.6868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 +DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10531926155090332 s +INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.10719704627990723 s +DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=115221810126877614795910595506306583049, time:1750767091.655888s req_ids:[8] +DEBUG 06-24 20:11:31 [manager.py:391] +ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:207.32903480529785ms total_cost_time:207.35549926757812ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7501 prompt_cache_len:5151 prompt_cache_ratio:0.6867084388748167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 +DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10444140434265137 s +INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.1063833236694336 s +DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=260062700437060553398605326927876665386, time:1750767091.8686435s req_ids:[8] +DEBUG 06-24 20:11:31 [manager.py:391] +ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:174.882173538208ms total_cost_time:174.9119758605957ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:7502 prompt_cache_len:5151 prompt_cache_ratio:0.6866169021594242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 +DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10431885719299316 s +INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10614132881164551 s +DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=211957748278428885069148269148538428227, time:1750767092.048504s req_ids:[8] +DEBUG 06-24 20:11:32 [manager.py:391] +ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:382.4605941772461ms total_cost_time:382.48753547668457ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7503 prompt_cache_len:5151 prompt_cache_ratio:0.6865253898440624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 +DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10559415817260742 s +INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10757589340209961 s +DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=295971484134711392985058761331930024081, time:1750767092.4298196s req_ids:[8] +DEBUG 06-24 20:11:32 [manager.py:391] +ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:204.78582382202148ms total_cost_time:204.80895042419434ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:7504 prompt_cache_len:5151 prompt_cache_ratio:0.6864339019189766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 +DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:32 [batch.py:51] router release req id 8 +INFO 06-24 20:11:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10549354553222656 s +INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10752511024475098 s +DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=195268353624139903076023955487563032689, time:1750767092.6445408s req_ids:[8] +DEBUG 06-24 20:11:32 [manager.py:391] +ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:208.94694328308105ms total_cost_time:208.97364616394043ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:7505 prompt_cache_len:5151 prompt_cache_ratio:0.6863424383744171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 +DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10540103912353516 s +INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10739970207214355 s +DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=112160947376472336544252143786192419154, time:1750767092.862713s req_ids:[8] +DEBUG 06-24 20:11:32 [manager.py:391] +ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:209.6409797668457ms total_cost_time:209.66553688049316ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:7506 prompt_cache_len:5151 prompt_cache_ratio:0.6862509992006395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 +DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10442042350769043 s +INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.1063838005065918 s +DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=258279668907049344798782978390288345325, time:1750767093.0751154s req_ids:[8] +DEBUG 06-24 20:11:33 [manager.py:391] +DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:212.00823783874512ms total_cost_time:212.0344638824463ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7507 prompt_cache_len:5151 prompt_cache_ratio:0.6861595843879046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 +DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10449528694152832 s +INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.10650968551635742 s +DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=81699811517547993716394177816612957527, time:1750767093.2871106s req_ids:[8] +DEBUG 06-24 20:11:33 [manager.py:391] +ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:205.6715488433838ms total_cost_time:205.69467544555664ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:7508 prompt_cache_len:5151 prompt_cache_ratio:0.6860681939264784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 +DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10442471504211426 s +INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.10634374618530273 s +DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=190290976791288828654431990360719391443, time:1750767093.4977794s req_ids:[8] +DEBUG 06-24 20:11:33 [manager.py:391] +ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:205.75618743896484ms total_cost_time:205.7805061340332ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:7509 prompt_cache_len:5151 prompt_cache_ratio:0.685976827806632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 +DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.1049196720123291 s +INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.1069495677947998 s +DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=11793138929972073944316525303950363704, time:1750767093.7122924s req_ids:[8] +DEBUG 06-24 20:11:33 [manager.py:391] +ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:209.7482681274414ms total_cost_time:209.77020263671875ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:7510 prompt_cache_len:5151 prompt_cache_ratio:0.6858854860186419 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 +DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10476851463317871 s +INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.10694265365600586 s +DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=307708771651216880233589300116879201600, time:1750767093.9284694s req_ids:[8] +DEBUG 06-24 20:11:33 [manager.py:391] +ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:214.34330940246582ms total_cost_time:214.36572074890137ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:7511 prompt_cache_len:5151 prompt_cache_ratio:0.6857941685527893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 +DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.10448598861694336 s +INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10652923583984375 s +DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=30600763680293083363578683379073791214, time:1750767094.1431246s req_ids:[8] +DEBUG 06-24 20:11:34 [manager.py:391] +ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:212.8288745880127ms total_cost_time:212.85533905029297ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7512 prompt_cache_len:5151 prompt_cache_ratio:0.685702875399361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 +DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.10454583168029785 s +INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10652470588684082 s +DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=265964139982383590784195405489511878612, time:1750767094.3621747s req_ids:[8] +DEBUG 06-24 20:11:34 [manager.py:391] +ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:370.0528144836426ms total_cost_time:370.0745105743408ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:7513 prompt_cache_len:5151 prompt_cache_ratio:0.685611606548649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 +DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.1051325798034668 s +INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10709118843078613 s +DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=7063782699805092857792049105384080889, time:1750767094.726963s req_ids:[8] +DEBUG 06-24 20:11:34 [manager.py:391] +ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:201.98750495910645ms total_cost_time:202.0111083984375ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7514 prompt_cache_len:5151 prompt_cache_ratio:0.6855203619909502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 +DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.10419821739196777 s +INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10611510276794434 s +DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=311779049452557566036070398878018274550, time:1750767094.9387593s req_ids:[8] +DEBUG 06-24 20:11:34 [manager.py:391] +ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:206.85863494873047ms total_cost_time:206.8796157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:7515 prompt_cache_len:5151 prompt_cache_ratio:0.6854291417165669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 +DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10629749298095703 s +INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.10837244987487793 s +DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=299071190233896129594866488433455035880, time:1750767095.151677s req_ids:[8] +DEBUG 06-24 20:11:35 [manager.py:391] +ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:212.843656539917ms total_cost_time:212.88728713989258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7516 prompt_cache_len:5151 prompt_cache_ratio:0.6853379457158063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 +DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10713768005371094 s +INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.1090383529663086 s +DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=287004762905690154214341945911835012485, time:1750767095.3674567s req_ids:[8] +DEBUG 06-24 20:11:35 [manager.py:391] +ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:201.07221603393555ms total_cost_time:201.11727714538574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7517 prompt_cache_len:5151 prompt_cache_ratio:0.6852467739789809 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 +DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10812616348266602 s +INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.11048054695129395 s +DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=117829934997772956437382426471130174788, time:1750767095.5762248s req_ids:[8] +DEBUG 06-24 20:11:35 [manager.py:391] +ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:206.65311813354492ms total_cost_time:206.70580863952637ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7518 prompt_cache_len:5151 prompt_cache_ratio:0.6851556264964086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 +DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.1086435317993164 s +INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.11118030548095703 s +DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=231881408363574442044207623135395878581, time:1750767095.7877162s req_ids:[8] +DEBUG 06-24 20:11:35 [manager.py:391] +ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:206.8009376525879ms total_cost_time:206.8462371826172ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7519 prompt_cache_len:5151 prompt_cache_ratio:0.6850645032584121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 +DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10734796524047852 s +INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.10944104194641113 s +DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=206436451016202294829374883891855345838, time:1750767096.0001028s req_ids:[8] +DEBUG 06-24 20:11:36 [manager.py:391] +ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:208.50849151611328ms total_cost_time:208.55116844177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7520 prompt_cache_len:5151 prompt_cache_ratio:0.6849734042553192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 +DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:36 [manager.py:224] router recive req id 8 cost time 0.10778212547302246 s +INFO 06-24 20:11:36 [manager.py:68] detokenization recv req id 8 cost time 0.10971713066101074 s +DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=36419375788548306324888363376126952575, time:1750767096.224387s req_ids:[8] +DEBUG 06-24 20:11:36 [manager.py:391] +ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:218.96052360534668ms total_cost_time:219.02036666870117ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:7521 prompt_cache_len:5151 prompt_cache_ratio:0.6848823294774631 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 +DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:36 [manager.py:224] router recive req id 8 cost time 0.10792851448059082 s +INFO 06-24 20:11:36 [manager.py:68] detokenization recv req id 8 cost time 0.10989141464233398 s +DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=65494866243394028663424782378883036630, time:1750767096.4399028s req_ids:[8] +DEBUG 06-24 20:11:36 [manager.py:391] +ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:208.50777626037598ms total_cost_time:208.55236053466797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7522 prompt_cache_len:5151 prompt_cache_ratio:0.6847912789151821 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 +DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:36 [manager.py:224] router recive req id 8 cost time 0.10727715492248535 s +INFO 06-24 20:11:36 [manager.py:68] detokenization recv req id 8 cost time 0.10921669006347656 s +DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=113294654407010233336789794421959043848, time:1750767096.6536722s req_ids:[8] +DEBUG 06-24 20:11:36 [manager.py:391] +ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:380.4318904876709ms total_cost_time:380.4774284362793ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7523 prompt_cache_len:5151 prompt_cache_ratio:0.6847002525588196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 +DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10796737670898438 s +INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.10996437072753906 s +DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=185489745184099290425601377004170766334, time:1750767097.0340314s req_ids:[8] +DEBUG 06-24 20:11:37 [manager.py:391] +ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:198.9598274230957ms total_cost_time:199.00226593017578ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7524 prompt_cache_len:5151 prompt_cache_ratio:0.6846092503987241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 +DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10871219635009766 s +INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.11089444160461426 s +DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=285073187843206962474167598254521177838, time:1750767097.2414315s req_ids:[8] +DEBUG 06-24 20:11:37 [manager.py:391] +DEBUG 06-24 20:11:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 32266.478 tokens/s +DEBUG 06-24 20:11:37 [stats.py:37] Avg prompt tokens throughput: 32257.881 tokens/s +DEBUG 06-24 20:11:37 [stats.py:37] Avg generate tokens throughput: 8.598 tokens/s +ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:203.9337158203125ms total_cost_time:203.97686958312988ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7525 prompt_cache_len:5151 prompt_cache_ratio:0.6845182724252492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 +DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.1080472469329834 s +INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.11011815071105957 s +DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=42078760260490209465379607529668271940, time:1750767097.451762s req_ids:[8] +DEBUG 06-24 20:11:37 [manager.py:391] +ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:205.580472946167ms total_cost_time:205.6272029876709ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7526 prompt_cache_len:5151 prompt_cache_ratio:0.6844273186287536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 +DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10798072814941406 s +INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.1099848747253418 s +DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=160840032468572075283996328480700834242, time:1750767097.6638014s req_ids:[8] +DEBUG 06-24 20:11:37 [manager.py:391] +ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:206.96544647216797ms total_cost_time:207.00883865356445ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7527 prompt_cache_len:5151 prompt_cache_ratio:0.6843363889996015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 +DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10991859436035156 s +INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.11197042465209961 s +DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=301518563105008016666437752492078553157, time:1750767097.8765974s req_ids:[8] +DEBUG 06-24 20:11:37 [manager.py:391] +ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:207.67712593078613ms total_cost_time:207.72218704223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7528 prompt_cache_len:5151 prompt_cache_ratio:0.6842454835281615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 +DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10884809494018555 s +INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.11092162132263184 s +DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=68758664152356652860138478944933548676, time:1750767098.0902777s req_ids:[8] +DEBUG 06-24 20:11:38 [manager.py:391] +ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:209.43331718444824ms total_cost_time:209.4864845275879ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:7529 prompt_cache_len:5151 prompt_cache_ratio:0.6841546022048081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 +DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.11102914810180664 s +INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.11359858512878418 s +DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=218956699181932895097716661028125152524, time:1750767098.3051922s req_ids:[8] +DEBUG 06-24 20:11:38 [manager.py:391] +ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:209.76567268371582ms total_cost_time:209.8097801208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7530 prompt_cache_len:5151 prompt_cache_ratio:0.6840637450199203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 +DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10788798332214355 s +INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.1101222038269043 s +DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=61625995151615594766650670104290739021, time:1750767098.519472s req_ids:[8] +DEBUG 06-24 20:11:38 [manager.py:391] +ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:207.37743377685547ms total_cost_time:207.4110507965088ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:7531 prompt_cache_len:5151 prompt_cache_ratio:0.6839729119638827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 +DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10763835906982422 s +INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.10962843894958496 s +DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=13973968831911278706293974065734950827, time:1750767098.733651s req_ids:[8] +DEBUG 06-24 20:11:38 [manager.py:391] +DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:207.87906646728516ms total_cost_time:207.92555809020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7532 prompt_cache_len:5151 prompt_cache_ratio:0.6838821030270844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 +DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10722851753234863 s +INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.10943794250488281 s +DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=113025326024336420487456254158916326313, time:1750767098.9467137s req_ids:[8] +DEBUG 06-24 20:11:38 [manager.py:391] +ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:204.82373237609863ms total_cost_time:204.86879348754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7533 prompt_cache_len:5151 prompt_cache_ratio:0.6837913181999203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 +DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:39 [manager.py:224] router recive req id 8 cost time 0.30832552909851074 s +INFO 06-24 20:11:39 [manager.py:68] detokenization recv req id 8 cost time 0.31042003631591797 s +DEBUG 06-24 20:11:39 [manager.py:391] Prefill Batch: batch_id=216369464472762570067837417006755498285, time:1750767099.3705409s req_ids:[8] +DEBUG 06-24 20:11:39 [manager.py:391] +ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:427.26826667785645ms total_cost_time:427.31356620788574ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7534 prompt_cache_len:5151 prompt_cache_ratio:0.68370055747279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 +DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:39 [manager.py:224] router recive req id 8 cost time 0.10771679878234863 s +INFO 06-24 20:11:39 [manager.py:68] detokenization recv req id 8 cost time 0.10969042778015137 s +INFO 06-24 20:11:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:11:39 [manager.py:391] Prefill Batch: batch_id=238311092774122782516572350187902785928, time:1750767099.590941s req_ids:[8] +DEBUG 06-24 20:11:39 [manager.py:391] +ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:209.47551727294922ms total_cost_time:209.5189094543457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7535 prompt_cache_len:5151 prompt_cache_ratio:0.6836098208360982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 +DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:39 [manager.py:224] router recive req id 8 cost time 0.10835886001586914 s +INFO 06-24 20:11:39 [manager.py:68] detokenization recv req id 8 cost time 0.11038613319396973 s +DEBUG 06-24 20:11:39 [manager.py:391] Prefill Batch: batch_id=152539768811612996889041455011969975972, time:1750767099.8051589s req_ids:[8] +DEBUG 06-24 20:11:39 [manager.py:391] +ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:207.5784206390381ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7536 prompt_cache_len:5151 prompt_cache_ratio:0.6835191082802548 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 +DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10856986045837402 s +INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.11115241050720215 s +DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=338177010296469615718055883587469689926, time:1750767100.0188751s req_ids:[8] +DEBUG 06-24 20:11:40 [manager.py:391] +ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:208.77718925476074ms total_cost_time:208.82010459899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7537 prompt_cache_len:5151 prompt_cache_ratio:0.6834284197956747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 +DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10960793495178223 s +INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.11184453964233398 s +DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=152018828105213930908298841951801194192, time:1750767100.2322216s req_ids:[8] +DEBUG 06-24 20:11:40 [manager.py:391] +DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:208.33849906921387ms total_cost_time:208.38475227355957ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7538 prompt_cache_len:5151 prompt_cache_ratio:0.683337755372778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 +DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10802984237670898 s +INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.10991406440734863 s +DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=282522593512654099620333723473342585014, time:1750767100.453024s req_ids:[8] +DEBUG 06-24 20:11:40 [manager.py:391] +ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:211.56644821166992ms total_cost_time:211.6100788116455ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7539 prompt_cache_len:5151 prompt_cache_ratio:0.6832471150019896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 +DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10759425163269043 s +INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.10956716537475586 s +DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=47214779218334967578483089504079496138, time:1750767100.6636777s req_ids:[8] +DEBUG 06-24 20:11:40 [manager.py:391] +ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:206.36272430419922ms total_cost_time:206.4075469970703ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7540 prompt_cache_len:5151 prompt_cache_ratio:0.6831564986737401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 +DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.1087038516998291 s +INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.11061716079711914 s +DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=30757906366604137415553569177955159724, time:1750767100.876402s req_ids:[8] +DEBUG 06-24 20:11:40 [manager.py:391] +ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:207.70788192749023ms total_cost_time:207.75222778320312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7541 prompt_cache_len:5151 prompt_cache_ratio:0.6830659063784644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 +DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.11059093475341797 s +INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.1126549243927002 s +DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=127068417032066405177607604255771580738, time:1750767101.0890172s req_ids:[8] +DEBUG 06-24 20:11:41 [manager.py:391] +ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:205.88278770446777ms total_cost_time:205.9159278869629ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:7542 prompt_cache_len:5151 prompt_cache_ratio:0.682975338106603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 +DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.10812687873840332 s +INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s +DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=245845754647715021886777661706995994807, time:1750767101.2992468s req_ids:[8] +DEBUG 06-24 20:11:41 [manager.py:391] +ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:207.32426643371582ms total_cost_time:207.38530158996582ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:7543 prompt_cache_len:5151 prompt_cache_ratio:0.6828847938486013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 +DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.3088817596435547 s +DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=80510929785446294805860615412067534417, time:1750767101.7020075s req_ids:[8] +DEBUG 06-24 20:11:41 [manager.py:391] +INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.31073546409606934 s +ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:391.14999771118164ms total_cost_time:391.19410514831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7544 prompt_cache_len:5151 prompt_cache_ratio:0.6827942735949099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 +DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.10804080963134766 s +INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.10985875129699707 s +DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=281879656881785090116938225451205953164, time:1750767101.910918s req_ids:[8] +DEBUG 06-24 20:11:41 [manager.py:391] +ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:204.27465438842773ms total_cost_time:204.32066917419434ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7545 prompt_cache_len:5151 prompt_cache_ratio:0.6827037773359841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 +DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10866284370422363 s +INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.1106119155883789 s +DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=42259830633954016871273066399335502987, time:1750767102.1210551s req_ids:[8] +DEBUG 06-24 20:11:42 [manager.py:391] +ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:207.2775363922119ms total_cost_time:207.3209285736084ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7546 prompt_cache_len:5151 prompt_cache_ratio:0.6826133050622847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 +DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10869193077087402 s +INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.11075258255004883 s +DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=215726875174416369074625016089047465238, time:1750767102.3349273s req_ids:[8] +DEBUG 06-24 20:11:42 [manager.py:391] +ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:204.12588119506836ms total_cost_time:204.17094230651855ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7547 prompt_cache_len:5151 prompt_cache_ratio:0.6825228567642772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 +DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10876870155334473 s +INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.11080527305603027 s +DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=320313802556848223172971554747640775661, time:1750767102.550362s req_ids:[8] +DEBUG 06-24 20:11:42 [manager.py:391] +ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:215.01898765563965ms total_cost_time:215.06404876708984ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7548 prompt_cache_len:5151 prompt_cache_ratio:0.6824324324324325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 +DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10975289344787598 s +INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.11162018775939941 s +DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=313359995978491942668677638544510187266, time:1750767102.7674503s req_ids:[8] +DEBUG 06-24 20:11:42 [manager.py:391] +ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:205.8999538421631ms total_cost_time:205.9464454650879ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7549 prompt_cache_len:5151 prompt_cache_ratio:0.6823420320572261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 +DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10744285583496094 s +INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.10928511619567871 s +DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=236825976613469005315546055762565439020, time:1750767102.976845s req_ids:[8] +DEBUG 06-24 20:11:42 [manager.py:391] +ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:205.885648727417ms total_cost_time:205.92951774597168ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7550 prompt_cache_len:5151 prompt_cache_ratio:0.682251655629139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 +DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:43 [manager.py:224] router recive req id 8 cost time 0.10826611518859863 s +INFO 06-24 20:11:43 [manager.py:68] detokenization recv req id 8 cost time 0.11011958122253418 s +DEBUG 06-24 20:11:43 [manager.py:391] Prefill Batch: batch_id=88132883812326784471063410761757039423, time:1750767103.191181s req_ids:[8] +DEBUG 06-24 20:11:43 [manager.py:391] +ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:209.75089073181152ms total_cost_time:209.794282913208ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7551 prompt_cache_len:5151 prompt_cache_ratio:0.6821613031386571 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 +DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:43 [manager.py:224] router recive req id 8 cost time 0.10725665092468262 s +INFO 06-24 20:11:43 [manager.py:68] detokenization recv req id 8 cost time 0.10895681381225586 s +DEBUG 06-24 20:11:43 [manager.py:391] Prefill Batch: batch_id=66356796860217165281483759098578758348, time:1750767103.4195976s req_ids:[8] +DEBUG 06-24 20:11:43 [manager.py:391] +ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:227.25939750671387ms total_cost_time:227.30422019958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7552 prompt_cache_len:5151 prompt_cache_ratio:0.6820709745762712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 +DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:43 [manager.py:224] router recive req id 8 cost time 0.1074371337890625 s +INFO 06-24 20:11:43 [manager.py:68] detokenization recv req id 8 cost time 0.10940742492675781 s +DEBUG 06-24 20:11:43 [manager.py:391] Prefill Batch: batch_id=126705362625043862283877706389894545841, time:1750767103.6429734s req_ids:[8] +DEBUG 06-24 20:11:43 [manager.py:391] +ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:374.84216690063477ms total_cost_time:374.88842010498047ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7553 prompt_cache_len:5151 prompt_cache_ratio:0.6819806699324772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 +DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10911941528320312 s +INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.11129951477050781 s +DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=186556218471417168246214132513990802721, time:1750767104.0130157s req_ids:[8] +DEBUG 06-24 20:11:44 [manager.py:391] +ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:200.9432315826416ms total_cost_time:200.9880542755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7554 prompt_cache_len:5151 prompt_cache_ratio:0.681890389197776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 +DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10832977294921875 s +INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.1102285385131836 s +DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=169988591339832919397211719879761852865, time:1750767104.2266111s req_ids:[8] +DEBUG 06-24 20:11:44 [manager.py:391] +ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:215.70873260498047ms total_cost_time:215.7454490661621ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:7555 prompt_cache_len:5151 prompt_cache_ratio:0.6818001323626737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 +DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.1071772575378418 s +INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.10895824432373047 s +DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=76418448813543687857113272721814682787, time:1750767104.459956s req_ids:[8] +DEBUG 06-24 20:11:44 [manager.py:391] +ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:179.49700355529785ms total_cost_time:179.52322959899902ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7556 prompt_cache_len:5151 prompt_cache_ratio:0.6817098994176813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 +DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10622930526733398 s +INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.1081995964050293 s +DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=121887375925958240352771498361143910745, time:1750767104.629618s req_ids:[8] +DEBUG 06-24 20:11:44 [manager.py:391] +ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:198.26602935791016ms total_cost_time:198.29368591308594ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:7557 prompt_cache_len:5151 prompt_cache_ratio:0.6816196903533148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 +DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10699081420898438 s +INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.10898280143737793 s +DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=294791716470291628319199793820009573133, time:1750767104.8342476s req_ids:[8] +DEBUG 06-24 20:11:44 [manager.py:391] +ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:209.25021171569824ms total_cost_time:209.27762985229492ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7558 prompt_cache_len:5151 prompt_cache_ratio:0.6815295051600953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 +DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10629391670227051 s +INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.10812830924987793 s +DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=133862789411179326842288275223241386351, time:1750767105.0505066s req_ids:[8] +DEBUG 06-24 20:11:45 [manager.py:391] +ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:209.53369140625ms total_cost_time:209.5620632171631ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:7559 prompt_cache_len:5151 prompt_cache_ratio:0.6814393438285488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 +DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10702371597290039 s +INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.10891437530517578 s +DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=179617423170358196531557431207277395617, time:1750767105.2633886s req_ids:[8] +DEBUG 06-24 20:11:45 [manager.py:391] +ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:209.18798446655273ms total_cost_time:209.21587944030762ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:7560 prompt_cache_len:5151 prompt_cache_ratio:0.6813492063492064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 +DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10806560516357422 s +INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.10988736152648926 s +DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=145768680628490351418632385925288715384, time:1750767105.4791107s req_ids:[8] +DEBUG 06-24 20:11:45 [manager.py:391] +ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:212.5697135925293ms total_cost_time:212.6142978668213ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7561 prompt_cache_len:5151 prompt_cache_ratio:0.6812590927126041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 +DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10834622383117676 s +INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.11034440994262695 s +DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=138648251566974517984286031088452072751, time:1750767105.6970541s req_ids:[8] +DEBUG 06-24 20:11:45 [manager.py:391] +ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:211.6379737854004ms total_cost_time:211.68208122253418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7562 prompt_cache_len:5151 prompt_cache_ratio:0.6811690029092833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 +DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.1078345775604248 s +INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.1098167896270752 s +DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=134283487158023206086155534826987988275, time:1750767105.9123216s req_ids:[8] +DEBUG 06-24 20:11:45 [manager.py:391] +DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:404.76417541503906ms total_cost_time:404.80685234069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7563 prompt_cache_len:5151 prompt_cache_ratio:0.6810789369297897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 +DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.10766768455505371 s +INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.10963678359985352 s +DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=325059121281060223623189607702544014239, time:1750767106.3145409s req_ids:[8] +DEBUG 06-24 20:11:46 [manager.py:391] +ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:205.06739616394043ms total_cost_time:205.11388778686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7564 prompt_cache_len:5151 prompt_cache_ratio:0.6809888947646747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 +INFO 06-24 20:11:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.10802888870239258 s +INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.11025404930114746 s +DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=186911982696731476522228026045104293876, time:1750767106.5277236s req_ids:[8] +DEBUG 06-24 20:11:46 [manager.py:391] +ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:204.0121555328369ms total_cost_time:204.0560245513916ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7565 prompt_cache_len:5151 prompt_cache_ratio:0.6808988764044944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 +DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.1082913875579834 s +INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.11031985282897949 s +DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=91165102410507712713925566915790908705, time:1750767106.7484918s req_ids:[8] +DEBUG 06-24 20:11:46 [manager.py:391] +ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:219.69151496887207ms total_cost_time:219.73562240600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7566 prompt_cache_len:5151 prompt_cache_ratio:0.6808088818398097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 +DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.10805869102478027 s +INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.11002302169799805 s +DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=5552189506530361009785273682136623347, time:1750767106.9661233s req_ids:[8] +DEBUG 06-24 20:11:46 [manager.py:391] +ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:208.53281021118164ms total_cost_time:208.57596397399902ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7567 prompt_cache_len:5151 prompt_cache_ratio:0.6807189110611868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 +DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.1072683334350586 s +INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.1093595027923584 s +DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=261840028535865296972554656400637385727, time:1750767107.178723s req_ids:[8] +DEBUG 06-24 20:11:47 [manager.py:391] +ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:11:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 32398.280 tokens/s +DEBUG 06-24 20:11:47 [stats.py:37] Avg prompt tokens throughput: 32389.597 tokens/s +DEBUG 06-24 20:11:47 [stats.py:37] Avg generate tokens throughput: 8.683 tokens/s +INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:206.29239082336426ms total_cost_time:206.33578300476074ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7568 prompt_cache_len:5151 prompt_cache_ratio:0.6806289640591966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 +DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.1073460578918457 s +INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.10930418968200684 s +DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=334830395248494753189233186727094003830, time:1750767107.3902807s req_ids:[8] +DEBUG 06-24 20:11:47 [manager.py:391] +ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:206.892728805542ms total_cost_time:206.93469047546387ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7569 prompt_cache_len:5151 prompt_cache_ratio:0.6805390408244154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 +DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.1075127124786377 s +INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.10863971710205078 s +DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=3640695315095079824981478921322872342, time:1750767107.6019921s req_ids:[8] +DEBUG 06-24 20:11:47 [manager.py:391] +ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:205.45649528503418ms total_cost_time:205.49964904785156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7570 prompt_cache_len:5151 prompt_cache_ratio:0.6804491413474241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 +DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.10765957832336426 s +INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.10934925079345703 s +DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=259176706692511777023216105829224292808, time:1750767107.8130593s req_ids:[8] +DEBUG 06-24 20:11:47 [manager.py:391] +ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:208.45770835876465ms total_cost_time:208.50253105163574ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7571 prompt_cache_len:5151 prompt_cache_ratio:0.6803592656188087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 +DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.10832381248474121 s +INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.11025476455688477 s +DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=269065755103071112415442395774572655020, time:1750767108.0271504s req_ids:[8] +DEBUG 06-24 20:11:48 [manager.py:391] +ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:203.34267616271973ms total_cost_time:203.3863067626953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7572 prompt_cache_len:5151 prompt_cache_ratio:0.68026941362916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 +DEBUG 06-24 20:11:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.1074068546295166 s +INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.10947895050048828 s +DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=292513892436549303524914500434221788481, time:1750767108.2419765s req_ids:[8] +DEBUG 06-24 20:11:48 [manager.py:391] +ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:212.76211738586426ms total_cost_time:212.80455589294434ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7573 prompt_cache_len:5151 prompt_cache_ratio:0.6801795853690743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 +DEBUG 06-24 20:11:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.30934977531433105 s +INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.3115513324737549 s +DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=55627887512008716128218348700463282596, time:1750767108.6658175s req_ids:[8] +DEBUG 06-24 20:11:48 [manager.py:391] +ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:425.94265937805176ms total_cost_time:425.98652839660645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7574 prompt_cache_len:5151 prompt_cache_ratio:0.6800897808291524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 +DEBUG 06-24 20:11:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.10815787315368652 s +INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s +DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=159034666683320048925259149267094192233, time:1750767108.8889012s req_ids:[8] +DEBUG 06-24 20:11:48 [manager.py:391] +ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:212.13340759277344ms total_cost_time:212.17823028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7575 prompt_cache_len:5151 prompt_cache_ratio:0.68 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 +DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10863566398620605 s +INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.11065936088562012 s +DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=105745694030715500471268079531589838319, time:1750767109.1039073s req_ids:[8] +DEBUG 06-24 20:11:49 [manager.py:391] +ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:207.5655460357666ms total_cost_time:207.60846138000488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7576 prompt_cache_len:5151 prompt_cache_ratio:0.6799102428722281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 +DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10725212097167969 s +INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.1095428466796875 s +DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=282135909232614285931765542060023114506, time:1750767109.316409s req_ids:[8] +DEBUG 06-24 20:11:49 [manager.py:391] +ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:205.9769630432129ms total_cost_time:206.01940155029297ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7577 prompt_cache_len:5151 prompt_cache_ratio:0.6798205094364524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 +DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10726594924926758 s +INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.10962557792663574 s +DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=188216836749803026845342195855905657739, time:1750767109.5292065s req_ids:[8] +DEBUG 06-24 20:11:49 [manager.py:391] +ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:205.60789108276367ms total_cost_time:205.65080642700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7578 prompt_cache_len:5151 prompt_cache_ratio:0.6797307996832938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 +DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10712385177612305 s +INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.1095280647277832 s +DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=336313110730404584116123823137357753181, time:1750767109.7404168s req_ids:[8] +DEBUG 06-24 20:11:49 [manager.py:391] +ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:204.86688613891602ms total_cost_time:204.9100399017334ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7579 prompt_cache_len:5151 prompt_cache_ratio:0.6796411136033778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 +DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10727143287658691 s +INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.10964322090148926 s +DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=206080538548647739792168901607128742659, time:1750767109.9512205s req_ids:[8] +DEBUG 06-24 20:11:49 [manager.py:391] +ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:207.3988914489746ms total_cost_time:207.4434757232666ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7580 prompt_cache_len:5151 prompt_cache_ratio:0.6795514511873351 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 +DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:50 [batch.py:51] router release req id 8 +INFO 06-24 20:11:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:50 [manager.py:224] router recive req id 8 cost time 0.10822415351867676 s +INFO 06-24 20:11:50 [manager.py:68] detokenization recv req id 8 cost time 0.11013269424438477 s +DEBUG 06-24 20:11:50 [manager.py:391] Prefill Batch: batch_id=80145215812688536994433277270156737848, time:1750767110.1638384s req_ids:[8] +DEBUG 06-24 20:11:50 [manager.py:391] +ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:208.0380916595459ms total_cost_time:208.0826759338379ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7581 prompt_cache_len:5151 prompt_cache_ratio:0.6794618124258014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 +DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:50 [manager.py:224] router recive req id 8 cost time 0.10722947120666504 s +INFO 06-24 20:11:50 [manager.py:68] detokenization recv req id 8 cost time 0.10914778709411621 s +DEBUG 06-24 20:11:50 [manager.py:391] Prefill Batch: batch_id=99644866612821332998155598073138688888, time:1750767110.3906262s req_ids:[8] +DEBUG 06-24 20:11:50 [manager.py:391] +ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:222.015380859375ms total_cost_time:222.05758094787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7582 prompt_cache_len:5151 prompt_cache_ratio:0.679372197309417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 +DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:50 [manager.py:224] router recive req id 8 cost time 0.10823392868041992 s +INFO 06-24 20:11:50 [manager.py:68] detokenization recv req id 8 cost time 0.11014461517333984 s +DEBUG 06-24 20:11:50 [manager.py:391] Prefill Batch: batch_id=176151888092917954387613094052147616261, time:1750767110.6053104s req_ids:[8] +DEBUG 06-24 20:11:50 [manager.py:391] +ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:206.31098747253418ms total_cost_time:206.35604858398438ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7583 prompt_cache_len:5151 prompt_cache_ratio:0.6792826058288276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 +DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.31011343002319336 s +INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.3121047019958496 s +DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=307182656201380843456086862858552060262, time:1750767111.0246234s req_ids:[8] +DEBUG 06-24 20:11:51 [manager.py:391] +ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:419.76261138916016ms total_cost_time:419.80624198913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7584 prompt_cache_len:5151 prompt_cache_ratio:0.6791930379746836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 +DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10803031921386719 s +INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.10997939109802246 s +DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=306002531093132578532058517711718487539, time:1750767111.2456334s req_ids:[8] +DEBUG 06-24 20:11:51 [manager.py:391] +ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:210.26873588562012ms total_cost_time:210.3121280670166ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7585 prompt_cache_len:5151 prompt_cache_ratio:0.6791034937376401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 +DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10836124420166016 s +INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.11037492752075195 s +DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=47301069928570188080534922027948646235, time:1750767111.4572446s req_ids:[8] +DEBUG 06-24 20:11:51 [manager.py:391] +ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:200.98257064819336ms total_cost_time:201.02572441101074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7586 prompt_cache_len:5151 prompt_cache_ratio:0.6790139731083575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 +DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10720705986022949 s +INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.10921883583068848 s +DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=20063513931273837474188628653112903437, time:1750767111.6647258s req_ids:[8] +DEBUG 06-24 20:11:51 [manager.py:391] +ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:206.5417766571045ms total_cost_time:206.58588409423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7587 prompt_cache_len:5151 prompt_cache_ratio:0.678924476077501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 +DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10907506942749023 s +INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.1110982894897461 s +DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=331378806526777745438802288831548262516, time:1750767111.8865666s req_ids:[8] +DEBUG 06-24 20:11:51 [manager.py:391] +ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:215.82365036010742ms total_cost_time:215.867280960083ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7588 prompt_cache_len:5151 prompt_cache_ratio:0.6788350026357406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 +DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.10786557197570801 s +INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.10974454879760742 s +DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=134589987200880221805656027682236387595, time:1750767112.0985985s req_ids:[8] +DEBUG 06-24 20:11:52 [manager.py:391] +ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:201.0822296142578ms total_cost_time:201.12919807434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7589 prompt_cache_len:5151 prompt_cache_ratio:0.6787455527737515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 +DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.10866641998291016 s +INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.11063885688781738 s +DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=265795104378361882361495694846097974291, time:1750767112.304473s req_ids:[8] +DEBUG 06-24 20:11:52 [manager.py:391] +ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:204.71692085266113ms total_cost_time:204.76412773132324ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7590 prompt_cache_len:5151 prompt_cache_ratio:0.6786561264822134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 +DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.10732603073120117 s +INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.10934734344482422 s +DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=326500589668166019911679618073762534047, time:1750767112.5148087s req_ids:[8] +DEBUG 06-24 20:11:52 [manager.py:391] +ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:205.76024055480957ms total_cost_time:205.80363273620605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7591 prompt_cache_len:5151 prompt_cache_ratio:0.6785667237518114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 +DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.1073613166809082 s +INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.10937070846557617 s +DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=225464502858199488910453500859387784207, time:1750767112.7271273s req_ids:[8] +DEBUG 06-24 20:11:52 [manager.py:391] +ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:191.90573692321777ms total_cost_time:191.95127487182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7592 prompt_cache_len:5151 prompt_cache_ratio:0.6784773445732349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 +DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.1114962100982666 s +INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.11359786987304688 s +DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=177648526989387100364248466222953646722, time:1750767112.9252858s req_ids:[8] +DEBUG 06-24 20:11:52 [manager.py:391] +ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:359.00402069091797ms total_cost_time:359.12060737609863ms,out_token_counter:1 mean_per_token_cost_time: 0.11658668518066406ms prompt_token_num:7593 prompt_cache_len:5151 prompt_cache_ratio:0.678387988937179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 +INFO 06-24 20:11:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:11:53 [statics_utils.py:24] mean first cost: 230.45424360725747 ms +INFO 06-24 20:11:53 [statics_utils.py:24] mean per token cost: 0.08918244930490664 ms +DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.1082758903503418 s +INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.11029934883117676 s +DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=247120571047092908805065118308696284587, time:1750767113.2921638s req_ids:[8] +DEBUG 06-24 20:11:53 [manager.py:391] +ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:201.8113136291504ms total_cost_time:201.8566131591797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7594 prompt_cache_len:5151 prompt_cache_ratio:0.6782986568343429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 +DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.10799646377563477 s +INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.10996532440185547 s +DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=156741713883222348929408060229348054386, time:1750767113.5038817s req_ids:[8] +DEBUG 06-24 20:11:53 [manager.py:391] +ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:207.9160213470459ms total_cost_time:207.98015594482422ms,out_token_counter:1 mean_per_token_cost_time: 0.06413459777832031ms prompt_token_num:7595 prompt_cache_len:5151 prompt_cache_ratio:0.6782093482554312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 +DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:11:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s +INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.10963296890258789 s +DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=22160039114344207846908912752363658009, time:1750767113.7199109s req_ids:[8] +DEBUG 06-24 20:11:53 [manager.py:391] +ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:194.5345401763916ms total_cost_time:194.5805549621582ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7596 prompt_cache_len:5151 prompt_cache_ratio:0.6781200631911533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 +DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.10690903663635254 s +INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.10880517959594727 s +DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=318069371706745337923303718372962645116, time:1750767113.918359s req_ids:[8] +DEBUG 06-24 20:11:53 [manager.py:391] +ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:205.275297164917ms total_cost_time:205.3208351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7597 prompt_cache_len:5151 prompt_cache_ratio:0.6780308016322233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 +DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10772275924682617 s +INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974574089050293 s +DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=294179280277235238044681467199038308348, time:1750767114.129605s req_ids:[8] +DEBUG 06-24 20:11:54 [manager.py:391] +ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:207.5364589691162ms total_cost_time:207.5817584991455ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7598 prompt_cache_len:5151 prompt_cache_ratio:0.6779415635693603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 +DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10748744010925293 s +INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10944557189941406 s +DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=191603652701972640168689369227027687579, time:1750767114.3409472s req_ids:[8] +DEBUG 06-24 20:11:54 [manager.py:391] +ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:208.08172225952148ms total_cost_time:208.12726020812988ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7599 prompt_cache_len:5151 prompt_cache_ratio:0.6778523489932886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 +DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10667276382446289 s +INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10857391357421875 s +DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=54371763715272126649577982863107339549, time:1750767114.5551345s req_ids:[8] +DEBUG 06-24 20:11:54 [manager.py:391] +DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:204.96869087219238ms total_cost_time:205.01303672790527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7600 prompt_cache_len:5151 prompt_cache_ratio:0.6777631578947368 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 +DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10717558860778809 s +INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10929083824157715 s +DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=262799399220093104716059103035021439543, time:1750767114.7641528s req_ids:[8] +DEBUG 06-24 20:11:54 [manager.py:391] +ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:197.54457473754883ms total_cost_time:197.59011268615723ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7601 prompt_cache_len:5151 prompt_cache_ratio:0.6776739902644389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 +DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10757279396057129 s +INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10966157913208008 s +DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=110870420036702772096070893083722228545, time:1750767114.969113s req_ids:[8] +DEBUG 06-24 20:11:54 [manager.py:391] +ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:205.0192356109619ms total_cost_time:205.0638198852539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7602 prompt_cache_len:5151 prompt_cache_ratio:0.6775848460931334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 +DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10756349563598633 s +INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10939645767211914 s +DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=210220237649531196875526394833870996893, time:1750767115.182449s req_ids:[8] +DEBUG 06-24 20:11:55 [manager.py:391] +ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:361.6604804992676ms total_cost_time:361.7062568664551ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7603 prompt_cache_len:5151 prompt_cache_ratio:0.6774957253715639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 +DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10712647438049316 s +INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10901856422424316 s +DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=175504085763459210803417320798174422131, time:1750767115.545004s req_ids:[8] +DEBUG 06-24 20:11:55 [manager.py:391] +ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:201.48563385009766ms total_cost_time:201.53021812438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7604 prompt_cache_len:5151 prompt_cache_ratio:0.6774066280904787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 +DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10767269134521484 s +INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10972809791564941 s +DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=246909247659818984089727209313713175225, time:1750767115.7549067s req_ids:[8] +DEBUG 06-24 20:11:55 [manager.py:391] +ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:167.2191619873047ms total_cost_time:167.26303100585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7605 prompt_cache_len:5151 prompt_cache_ratio:0.6773175542406311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 +DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10723996162414551 s +INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10920476913452148 s +DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=157032801151998973443450838199777836797, time:1750767115.926761s req_ids:[8] +DEBUG 06-24 20:11:55 [manager.py:391] +ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:202.64434814453125ms total_cost_time:202.68917083740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7606 prompt_cache_len:5151 prompt_cache_ratio:0.6772285038127794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 +DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10741066932678223 s +INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.10977816581726074 s +DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=240035920349394977824314060253533495167, time:1750767116.1468995s req_ids:[8] +DEBUG 06-24 20:11:56 [manager.py:391] +ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:220.50857543945312ms total_cost_time:220.55315971374512ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7607 prompt_cache_len:5151 prompt_cache_ratio:0.6771394767976864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 +DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.1080935001373291 s +INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.11004972457885742 s +DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=69803246132653740458522977531718677978, time:1750767116.3658953s req_ids:[8] +DEBUG 06-24 20:11:56 [manager.py:391] +ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:207.82232284545898ms total_cost_time:207.86786079406738ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7608 prompt_cache_len:5151 prompt_cache_ratio:0.6770504731861199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 +DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s +INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.11011123657226562 s +DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=161361274899183701647745323484916032548, time:1750767116.584003s req_ids:[8] +DEBUG 06-24 20:11:56 [manager.py:391] +ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:216.62139892578125ms total_cost_time:216.66669845581055ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7609 prompt_cache_len:5151 prompt_cache_ratio:0.6769614929688527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 +DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10826539993286133 s +INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.11025524139404297 s +DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=224161456428403044134976587197314518946, time:1750767116.7989717s req_ids:[8] +DEBUG 06-24 20:11:56 [manager.py:391] +ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:205.9805393218994ms total_cost_time:206.0248851776123ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7610 prompt_cache_len:5151 prompt_cache_ratio:0.6768725361366623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 +DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10729455947875977 s +INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.10940861701965332 s +DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=230468343094795497387444710483104175921, time:1750767117.0107594s req_ids:[8] +DEBUG 06-24 20:11:57 [manager.py:391] +ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:206.28714561462402ms total_cost_time:206.33244514465332ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7611 prompt_cache_len:5151 prompt_cache_ratio:0.6767836026803311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 +DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:57 [batch.py:51] router release req id 8 +INFO 06-24 20:11:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:57 [manager.py:224] router recive req id 8 cost time 0.11011266708374023 s +INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.11212778091430664 s +DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=138092859862136241658117489900640823354, time:1750767117.2220254s req_ids:[8] +DEBUG 06-24 20:11:57 [manager.py:391] +ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:11:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 33253.528 tokens/s +DEBUG 06-24 20:11:57 [stats.py:37] Avg prompt tokens throughput: 33244.768 tokens/s +DEBUG 06-24 20:11:57 [stats.py:37] Avg generate tokens throughput: 8.760 tokens/s +INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:206.5145969390869ms total_cost_time:206.5596580505371ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7612 prompt_cache_len:5151 prompt_cache_ratio:0.6766946925906463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 +DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:57 [manager.py:224] router recive req id 8 cost time 0.1081092357635498 s +INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.11008954048156738 s +DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=307895635031848941299399201531484764646, time:1750767117.4424143s req_ids:[8] +DEBUG 06-24 20:11:57 [manager.py:391] +ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:214.8268222808838ms total_cost_time:214.86926078796387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7613 prompt_cache_len:5151 prompt_cache_ratio:0.6766058058584001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 +DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:57 [manager.py:224] router recive req id 8 cost time 0.308910608291626 s +INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.3109011650085449 s +DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=136917777098879793241144358799949189195, time:1750767117.8635302s req_ids:[8] +DEBUG 06-24 20:11:57 [manager.py:391] +ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:420.8080768585205ms total_cost_time:420.8526611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7614 prompt_cache_len:5151 prompt_cache_ratio:0.6765169424743893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 +DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10798335075378418 s +INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.11007046699523926 s +DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=180865609731338338075095484017174829893, time:1750767118.0840385s req_ids:[8] +DEBUG 06-24 20:11:58 [manager.py:391] +ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:210.04533767700195ms total_cost_time:210.08920669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7615 prompt_cache_len:5151 prompt_cache_ratio:0.6764281024294156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 +DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10816621780395508 s +INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.11015844345092773 s +DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=145160727108707129997868071611482595468, time:1750767118.2986574s req_ids:[8] +DEBUG 06-24 20:11:58 [manager.py:391] +ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:204.01453971862793ms total_cost_time:204.05888557434082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7616 prompt_cache_len:5151 prompt_cache_ratio:0.6763392857142857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 +DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10793232917785645 s +INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.1100320816040039 s +DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=319578231464490195713350762204816044405, time:1750767118.5067341s req_ids:[8] +DEBUG 06-24 20:11:58 [manager.py:391] +ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:204.132080078125ms total_cost_time:204.1773796081543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7617 prompt_cache_len:5151 prompt_cache_ratio:0.6762504923198109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 +DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10730981826782227 s +INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.10927486419677734 s +DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=206517665322153722171346162811302919141, time:1750767118.7168157s req_ids:[8] +DEBUG 06-24 20:11:58 [manager.py:391] +ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:205.3208351135254ms total_cost_time:205.36565780639648ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7618 prompt_cache_len:5151 prompt_cache_ratio:0.6761617222368076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 +DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10852384567260742 s +INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.11053061485290527 s +DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=182806468058710080011925634816711119674, time:1750767118.9331715s req_ids:[8] +DEBUG 06-24 20:11:58 [manager.py:391] +ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:213.98615837097168ms total_cost_time:214.03098106384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7619 prompt_cache_len:5151 prompt_cache_ratio:0.6760729754560966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 +DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s +INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.10947751998901367 s +DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=125058214398531642428446553485042514064, time:1750767119.1481361s req_ids:[8] +DEBUG 06-24 20:11:59 [manager.py:391] +ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:205.12819290161133ms total_cost_time:205.17396926879883ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7620 prompt_cache_len:5151 prompt_cache_ratio:0.675984251968504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 +DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.10817837715148926 s +INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.11013245582580566 s +DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=266503259897760547314518672065873128037, time:1750767119.3667228s req_ids:[8] +DEBUG 06-24 20:11:59 [manager.py:391] +ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:217.32163429260254ms total_cost_time:217.38266944885254ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:7621 prompt_cache_len:5151 prompt_cache_ratio:0.6758955517648603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 +DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.10742998123168945 s +INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.10950732231140137 s +DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=96431240528635776001762764699951333799, time:1750767119.5825422s req_ids:[8] +DEBUG 06-24 20:11:59 [manager.py:391] +ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:202.8813362121582ms total_cost_time:202.9252052307129ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7622 prompt_cache_len:5151 prompt_cache_ratio:0.6758068748360011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 +DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:11:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.1074988842010498 s +INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.10958600044250488 s +DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=65827784738082953039640762525229544695, time:1750767119.7984495s req_ids:[8] +DEBUG 06-24 20:11:59 [manager.py:391] +ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:210.25753021240234ms total_cost_time:210.31785011291504ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:7623 prompt_cache_len:5151 prompt_cache_ratio:0.6757182211727666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 +DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.3103930950164795 s +INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.31258440017700195 s +DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=20141742625225259944710266829088301339, time:1750767120.2114785s req_ids:[8] +DEBUG 06-24 20:12:00 [manager.py:391] +ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:410.63976287841797ms total_cost_time:410.68577766418457ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7624 prompt_cache_len:5151 prompt_cache_ratio:0.6756295907660022 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 +DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.10883092880249023 s +INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.11085295677185059 s +DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=90148409981598706303780972151094956415, time:1750767120.4277575s req_ids:[8] +DEBUG 06-24 20:12:00 [manager.py:391] +ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:210.47687530517578ms total_cost_time:210.52145957946777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7625 prompt_cache_len:5151 prompt_cache_ratio:0.6755409836065573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 +DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.10720205307006836 s +INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.10912775993347168 s +INFO 06-24 20:12:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=84636828358444117249686718153847102598, time:1750767120.6417718s req_ids:[8] +DEBUG 06-24 20:12:00 [manager.py:391] +ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:209.49268341064453ms total_cost_time:209.53989028930664ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7626 prompt_cache_len:5151 prompt_cache_ratio:0.6754523996852871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 +DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s +INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.10981082916259766 s +DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=315821611015599099231123208784346763090, time:1750767120.8559885s req_ids:[8] +DEBUG 06-24 20:12:00 [manager.py:391] +ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:208.13274383544922ms total_cost_time:208.17828178405762ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7627 prompt_cache_len:5151 prompt_cache_ratio:0.675363838993051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 +DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10771894454956055 s +INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.10971808433532715 s +DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=202394250765275040888192629281793333950, time:1750767121.069141s req_ids:[8] +DEBUG 06-24 20:12:01 [manager.py:391] +ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:208.41169357299805ms total_cost_time:208.45651626586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7628 prompt_cache_len:5151 prompt_cache_ratio:0.6752753015207131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 +DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10761618614196777 s +INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.10958719253540039 s +DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=154807317957381621669301476022717652428, time:1750767121.2832618s req_ids:[8] +DEBUG 06-24 20:12:01 [manager.py:391] +ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:204.00357246398926ms total_cost_time:204.04767990112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7629 prompt_cache_len:5151 prompt_cache_ratio:0.6751867872591427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 +DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10805916786193848 s +INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.11003541946411133 s +DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=29302947999594689954497796196213616164, time:1750767121.4928668s req_ids:[8] +DEBUG 06-24 20:12:01 [manager.py:391] +ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:208.71925354003906ms total_cost_time:208.76407623291016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7630 prompt_cache_len:5151 prompt_cache_ratio:0.6750982961992136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 +DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10754013061523438 s +INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.1095438003540039 s +DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=138418184835968109005212377708670425308, time:1750767121.7076094s req_ids:[8] +DEBUG 06-24 20:12:01 [manager.py:391] +ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:204.85424995422363ms total_cost_time:204.90074157714844ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7631 prompt_cache_len:5151 prompt_cache_ratio:0.6750098283318045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 +DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10854339599609375 s +INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.11053705215454102 s +DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=251343756333749997203949398879495622773, time:1750767121.9175932s req_ids:[8] +DEBUG 06-24 20:12:01 [manager.py:391] +ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:207.7937126159668ms total_cost_time:207.83662796020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7632 prompt_cache_len:5151 prompt_cache_ratio:0.6749213836477987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 +DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.1076517105102539 s +INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.1096038818359375 s +DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=105915941757655465419408989286278896191, time:1750767122.1317947s req_ids:[8] +DEBUG 06-24 20:12:02 [manager.py:391] +ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:359.1322898864746ms total_cost_time:359.1773509979248ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7633 prompt_cache_len:5151 prompt_cache_ratio:0.6748329621380846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 +DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.1076362133026123 s +INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.1096043586730957 s +DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=292466292324528418532920161671775069269, time:1750767122.4912443s req_ids:[8] +DEBUG 06-24 20:12:02 [manager.py:391] +ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:198.82726669311523ms total_cost_time:198.8697052001953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7634 prompt_cache_len:5151 prompt_cache_ratio:0.6747445637935552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 +DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.10865116119384766 s +INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.11061263084411621 s +DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=251356633320645433807908352290572514401, time:1750767122.7008545s req_ids:[8] +DEBUG 06-24 20:12:02 [manager.py:391] +ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:206.88843727111816ms total_cost_time:206.94279670715332ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:7635 prompt_cache_len:5151 prompt_cache_ratio:0.674656188605108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 +DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.10819005966186523 s +INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.11022734642028809 s +DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=228999123688250112746799481675882906663, time:1750767122.915243s req_ids:[8] +DEBUG 06-24 20:12:02 [manager.py:391] +ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:213.81688117980957ms total_cost_time:213.86098861694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7636 prompt_cache_len:5151 prompt_cache_ratio:0.6745678365636459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 +DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10842037200927734 s +INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.11054277420043945 s +DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=222487139147151296085300585172215025662, time:1750767123.129836s req_ids:[8] +DEBUG 06-24 20:12:03 [manager.py:391] +DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:197.14689254760742ms total_cost_time:197.1902847290039ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7637 prompt_cache_len:5151 prompt_cache_ratio:0.6744795076600759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 +DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10741138458251953 s +INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.10955047607421875 s +DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=201567458647512280476042599797373910715, time:1750767123.3382328s req_ids:[8] +DEBUG 06-24 20:12:03 [manager.py:391] +ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:206.2857151031494ms total_cost_time:206.3305377960205ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7638 prompt_cache_len:5151 prompt_cache_ratio:0.6743912018853103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 +DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10785889625549316 s +INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.10998892784118652 s +DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=59434207318227779999270853848336715770, time:1750767123.5517983s req_ids:[8] +DEBUG 06-24 20:12:03 [manager.py:391] +ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:205.5060863494873ms total_cost_time:205.5490016937256ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7639 prompt_cache_len:5151 prompt_cache_ratio:0.6743029192302658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 +DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10821413993835449 s +INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s +DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=17700833921834285883276378318189027449, time:1750767123.7606602s req_ids:[8] +DEBUG 06-24 20:12:03 [manager.py:391] +ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:203.60040664672852ms total_cost_time:203.6449909210205ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7640 prompt_cache_len:5151 prompt_cache_ratio:0.6742146596858639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 +DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.1075904369354248 s +INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.11019110679626465 s +DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=286149354074145959431110702610805139441, time:1750767123.9697864s req_ids:[8] +DEBUG 06-24 20:12:03 [manager.py:391] +ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:202.79431343078613ms total_cost_time:202.8367519378662ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7641 prompt_cache_len:5151 prompt_cache_ratio:0.6741264232430311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 +DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:04 [batch.py:51] router release req id 8 +INFO 06-24 20:12:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10877656936645508 s +INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.11089229583740234 s +DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=272417992743359392322715543009164785209, time:1750767124.1806426s req_ids:[8] +DEBUG 06-24 20:12:04 [manager.py:391] +ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:206.83598518371582ms total_cost_time:206.88199996948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7642 prompt_cache_len:5151 prompt_cache_ratio:0.6740382098926982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 +DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10679244995117188 s +INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.1085963249206543 s +DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=45954975210836917177091388612510632464, time:1750767124.3916602s req_ids:[8] +DEBUG 06-24 20:12:04 [manager.py:391] +ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:343.69969367980957ms total_cost_time:343.72615814208984ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7643 prompt_cache_len:5151 prompt_cache_ratio:0.6739500196258014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 +DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10479950904846191 s +INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.10668444633483887 s +DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=293091237295508245519746781460967692957, time:1750767124.737938s req_ids:[8] +DEBUG 06-24 20:12:04 [manager.py:391] +ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:186.66505813598633ms total_cost_time:186.692476272583ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7644 prompt_cache_len:5151 prompt_cache_ratio:0.673861852433281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 +DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10714316368103027 s +INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.10845398902893066 s +DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=337865258369175204784794031953478057126, time:1750767124.9302933s req_ids:[8] +DEBUG 06-24 20:12:04 [manager.py:391] +ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:203.68027687072754ms total_cost_time:203.72509956359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7645 prompt_cache_len:5151 prompt_cache_ratio:0.6737737083060824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 +DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10900211334228516 s +INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.1111598014831543 s +DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=194631978790029223599206045056590355021, time:1750767125.1390193s req_ids:[8] +DEBUG 06-24 20:12:05 [manager.py:391] +ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:205.50990104675293ms total_cost_time:205.55567741394043ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7646 prompt_cache_len:5151 prompt_cache_ratio:0.6736855872351556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 +DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10923027992248535 s +INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063909530639648 s +DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=308001038756916865905015348005747319476, time:1750767125.350848s req_ids:[8] +DEBUG 06-24 20:12:05 [manager.py:391] +ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:210.052490234375ms total_cost_time:210.0963592529297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7647 prompt_cache_len:5151 prompt_cache_ratio:0.6735974892114555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 +DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10803365707397461 s +INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.10923099517822266 s +DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=127060175205574686085944506796575274085, time:1750767125.5634408s req_ids:[8] +DEBUG 06-24 20:12:05 [manager.py:391] +ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:207.98707008361816ms total_cost_time:208.03260803222656ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7648 prompt_cache_len:5151 prompt_cache_ratio:0.6735094142259415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 +DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10728073120117188 s +INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.10932040214538574 s +DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=213957160664273498099397797333643204139, time:1750767125.7777202s req_ids:[8] +DEBUG 06-24 20:12:05 [manager.py:391] +ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:207.83090591430664ms total_cost_time:207.87644386291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7649 prompt_cache_len:5151 prompt_cache_ratio:0.6734213622695777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 +DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10753250122070312 s +INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.10967326164245605 s +DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=13061883082707811418455032452779136763, time:1750767125.9915092s req_ids:[8] +DEBUG 06-24 20:12:05 [manager.py:391] +ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:204.47278022766113ms total_cost_time:204.51736450195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7650 prompt_cache_len:5151 prompt_cache_ratio:0.6733333333333333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 +DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:06 [manager.py:224] router recive req id 8 cost time 0.10706090927124023 s +INFO 06-24 20:12:06 [manager.py:68] detokenization recv req id 8 cost time 0.10899138450622559 s +DEBUG 06-24 20:12:06 [manager.py:391] Prefill Batch: batch_id=256030095120020264533364286603217459869, time:1750767126.206879s req_ids:[8] +DEBUG 06-24 20:12:06 [manager.py:391] +ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:209.35964584350586ms total_cost_time:209.40446853637695ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7651 prompt_cache_len:5151 prompt_cache_ratio:0.6732453274081819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 +DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:06 [manager.py:224] router recive req id 8 cost time 0.10760188102722168 s +INFO 06-24 20:12:06 [manager.py:68] detokenization recv req id 8 cost time 0.10959053039550781 s +DEBUG 06-24 20:12:06 [manager.py:391] Prefill Batch: batch_id=77668998125334631537902299014074144355, time:1750767126.41798s req_ids:[8] +DEBUG 06-24 20:12:06 [manager.py:391] +ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:206.41303062438965ms total_cost_time:206.43925666809082ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7652 prompt_cache_len:5151 prompt_cache_ratio:0.6731573444851019 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 +DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:06 [manager.py:224] router recive req id 8 cost time 0.1050422191619873 s +INFO 06-24 20:12:06 [manager.py:68] detokenization recv req id 8 cost time 0.10705232620239258 s +DEBUG 06-24 20:12:06 [manager.py:391] Prefill Batch: batch_id=99039694711886695764364046387431626529, time:1750767126.634283s req_ids:[8] +DEBUG 06-24 20:12:06 [manager.py:391] +ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:210.53385734558105ms total_cost_time:210.5538845062256ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:7653 prompt_cache_len:5151 prompt_cache_ratio:0.6730693845550765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 +DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.31047582626342773 s +INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.3126389980316162 s +DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=157716120506166927391665716434283762813, time:1750767127.058635s req_ids:[8] +DEBUG 06-24 20:12:07 [manager.py:391] +ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:425.78625679016113ms total_cost_time:425.83179473876953ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7654 prompt_cache_len:5151 prompt_cache_ratio:0.6729814476090933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 +DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10737419128417969 s +INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s +DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=107131856995001008665159195517854166514, time:1750767127.2778528s req_ids:[8] +DEBUG 06-24 20:12:07 [manager.py:391] +ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:12:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 32650.535 tokens/s +DEBUG 06-24 20:12:07 [stats.py:37] Avg prompt tokens throughput: 32641.983 tokens/s +DEBUG 06-24 20:12:07 [stats.py:37] Avg generate tokens throughput: 8.552 tokens/s +INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:209.21754837036133ms total_cost_time:209.26427841186523ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7655 prompt_cache_len:5151 prompt_cache_ratio:0.672893533638145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 +DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10753488540649414 s +INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.10886025428771973 s +DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=64058647012786924515775172497937836678, time:1750767127.4913738s req_ids:[8] +DEBUG 06-24 20:12:07 [manager.py:391] +ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:203.23824882507324ms total_cost_time:203.28211784362793ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7656 prompt_cache_len:5151 prompt_cache_ratio:0.6728056426332288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 +DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:07 [batch.py:51] router release req id 8 +INFO 06-24 20:12:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10768985748291016 s +INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.10971474647521973 s +DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=310553461627661597263289366800552946098, time:1750767127.7002566s req_ids:[8] +DEBUG 06-24 20:12:07 [manager.py:391] +ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:204.05912399291992ms total_cost_time:204.08129692077637ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7657 prompt_cache_len:5151 prompt_cache_ratio:0.6727177745853468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 +DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10602450370788574 s +INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.10721468925476074 s +DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=17602799060686228765768248359399999234, time:1750767127.9095516s req_ids:[8] +DEBUG 06-24 20:12:07 [manager.py:391] +ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:212.22567558288574ms total_cost_time:212.26882934570312ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7658 prompt_cache_len:5151 prompt_cache_ratio:0.6726299294855054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 +DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.1080634593963623 s +INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.11032533645629883 s +DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=168358859685662161197510960453472682439, time:1750767128.1249661s req_ids:[8] +DEBUG 06-24 20:12:08 [manager.py:391] +ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:212.3239040374756ms total_cost_time:212.3696804046631ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7659 prompt_cache_len:5151 prompt_cache_ratio:0.672542107324716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 +DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10715460777282715 s +INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10905909538269043 s +DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=177548496048096521164403243664799188756, time:1750767128.3417008s req_ids:[8] +DEBUG 06-24 20:12:08 [manager.py:391] +ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:164.5498275756836ms total_cost_time:164.59298133850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7660 prompt_cache_len:5151 prompt_cache_ratio:0.6724543080939948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 +DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10836148262023926 s +INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10955214500427246 s +DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=99453293519797623160995451792571262350, time:1750767128.5119169s req_ids:[8] +DEBUG 06-24 20:12:08 [manager.py:391] +ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:204.50091361999512ms total_cost_time:204.5290470123291ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:7661 prompt_cache_len:5151 prompt_cache_ratio:0.6723665317843623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 +DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10555791854858398 s +INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10746908187866211 s +DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=277848746396038846409600628438658162249, time:1750767128.7217531s req_ids:[8] +DEBUG 06-24 20:12:08 [manager.py:391] +ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:204.00071144104004ms total_cost_time:204.0231227874756ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:7662 prompt_cache_len:5151 prompt_cache_ratio:0.6722787783868441 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 +DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10603570938110352 s +INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10817313194274902 s +DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=14829012066021962959277559814804674120, time:1750767128.931706s req_ids:[8] +DEBUG 06-24 20:12:08 [manager.py:391] +ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:211.59863471984863ms total_cost_time:211.64178848266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7663 prompt_cache_len:5151 prompt_cache_ratio:0.6721910478924703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 +DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.30983853340148926 s +INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.3120276927947998 s +DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=148716461557774016630600037119486825080, time:1750767129.3526757s req_ids:[8] +DEBUG 06-24 20:12:09 [manager.py:391] +ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:414.55531120300293ms total_cost_time:414.60251808166504ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7664 prompt_cache_len:5151 prompt_cache_ratio:0.6721033402922756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 +DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.10713529586791992 s +INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.1086127758026123 s +DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=102939958713478243095372399820662756318, time:1750767129.5714526s req_ids:[8] +DEBUG 06-24 20:12:09 [manager.py:391] +ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:182.1737289428711ms total_cost_time:182.21640586853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7665 prompt_cache_len:5151 prompt_cache_ratio:0.6720156555772994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 +DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.10753655433654785 s +INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.10950660705566406 s +DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=267231234815814363712815229519286381196, time:1750767129.75272s req_ids:[8] +DEBUG 06-24 20:12:09 [manager.py:391] +ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:195.14942169189453ms total_cost_time:195.19519805908203ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7666 prompt_cache_len:5151 prompt_cache_ratio:0.6719279937385859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 +DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.10741758346557617 s +INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.10999536514282227 s +DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=170566789586579577405896794004604895525, time:1750767129.9568775s req_ids:[8] +DEBUG 06-24 20:12:09 [manager.py:391] +ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:206.69150352478027ms total_cost_time:206.73775672912598ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7667 prompt_cache_len:5151 prompt_cache_ratio:0.6718403547671841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 +DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.10818648338317871 s +INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.11029410362243652 s +DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=4218543985224994588653419520821831190, time:1750767130.1686184s req_ids:[8] +DEBUG 06-24 20:12:10 [manager.py:391] +ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:211.56597137451172ms total_cost_time:211.61365509033203ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7668 prompt_cache_len:5151 prompt_cache_ratio:0.6717527386541471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 +DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.1087486743927002 s +INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s +DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=133130163678548114148860185451658996365, time:1750767130.384927s req_ids:[8] +DEBUG 06-24 20:12:10 [manager.py:391] +ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:164.26324844360352ms total_cost_time:164.3667221069336ms,out_token_counter:1 mean_per_token_cost_time: 0.10347366333007812ms prompt_token_num:7669 prompt_cache_len:5151 prompt_cache_ratio:0.6716651453905333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 +DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.1072995662689209 s +INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.10864686965942383 s +DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=158684119483686090009265898146213015132, time:1750767130.557638s req_ids:[8] +DEBUG 06-24 20:12:10 [manager.py:391] +ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:200.8810043334961ms total_cost_time:200.9272575378418ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7670 prompt_cache_len:5151 prompt_cache_ratio:0.6715775749674054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 +DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.10821962356567383 s +INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.10956811904907227 s +DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=235949549936487960068486758082145082128, time:1750767130.7630873s req_ids:[8] +DEBUG 06-24 20:12:10 [manager.py:391] +ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:206.0713768005371ms total_cost_time:206.1171531677246ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7671 prompt_cache_len:5151 prompt_cache_ratio:0.6714900273758311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 +DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s +INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.11005616188049316 s +DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=184004980944507935669551138043796048011, time:1750767130.9766197s req_ids:[8] +DEBUG 06-24 20:12:10 [manager.py:391] +ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:208.72735977172852ms total_cost_time:208.7728977203369ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7672 prompt_cache_len:5151 prompt_cache_ratio:0.6714025026068822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 +DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:11 [batch.py:51] router release req id 8 +INFO 06-24 20:12:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10737156867980957 s +INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.1095740795135498 s +DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=229408986791501043074889080270158216643, time:1750767131.18818s req_ids:[8] +DEBUG 06-24 20:12:11 [manager.py:391] +ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:376.5888214111328ms total_cost_time:376.6167163848877ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:7673 prompt_cache_len:5151 prompt_cache_ratio:0.6713150006516356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 +DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10551071166992188 s +INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.10756969451904297 s +DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=206072783669655577257534812506327375500, time:1750767131.5705001s req_ids:[8] +DEBUG 06-24 20:12:11 [manager.py:391] +DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:204.9424648284912ms total_cost_time:204.9863338470459ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7674 prompt_cache_len:5151 prompt_cache_ratio:0.6712275215011728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 +DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10942339897155762 s +INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.11137795448303223 s +DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=130202095981086571737380495270041754654, time:1750767131.7792754s req_ids:[8] +DEBUG 06-24 20:12:11 [manager.py:391] +ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:206.22587203979492ms total_cost_time:206.2702178955078ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7675 prompt_cache_len:5151 prompt_cache_ratio:0.6711400651465798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 +DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s +INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.11037397384643555 s +DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=121103546113517516161015130683696696512, time:1750767131.9910636s req_ids:[8] +DEBUG 06-24 20:12:11 [manager.py:391] +ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:206.82287216186523ms total_cost_time:206.86769485473633ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7676 prompt_cache_len:5151 prompt_cache_ratio:0.6710526315789473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 +DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10876703262329102 s +INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.11081337928771973 s +DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=232835002047103208372584284681637773844, time:1750767132.2038646s req_ids:[8] +DEBUG 06-24 20:12:12 [manager.py:391] +ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:208.64462852478027ms total_cost_time:208.69064331054688ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7677 prompt_cache_len:5151 prompt_cache_ratio:0.6709652207893708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 +DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10805964469909668 s +INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.11000943183898926 s +DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=98536507745594849840746674159457798075, time:1750767132.4180126s req_ids:[8] +DEBUG 06-24 20:12:12 [manager.py:391] +ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:210.20746231079102ms total_cost_time:210.25419235229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7678 prompt_cache_len:5151 prompt_cache_ratio:0.6708778327689503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 +DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10870909690856934 s +INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.1106724739074707 s +DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=46269012733314900325424536999501968109, time:1750767132.63283s req_ids:[8] +DEBUG 06-24 20:12:12 [manager.py:391] +ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:201.16472244262695ms total_cost_time:201.20811462402344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7679 prompt_cache_len:5151 prompt_cache_ratio:0.6707904675087902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 +DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10878801345825195 s +INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.11066699028015137 s +DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=21370404631229234843900594685176242183, time:1750767132.8399675s req_ids:[8] +DEBUG 06-24 20:12:12 [manager.py:391] +ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:205.3818702697754ms total_cost_time:205.4276466369629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7680 prompt_cache_len:5151 prompt_cache_ratio:0.670703125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 +DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.10796570777893066 s +INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.11004972457885742 s +DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=7960748760137472986901542241846735523, time:1750767133.0535378s req_ids:[8] +DEBUG 06-24 20:12:13 [manager.py:391] +ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:212.77689933776855ms total_cost_time:212.82172203063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7681 prompt_cache_len:5151 prompt_cache_ratio:0.6706158052336936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 +DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.1076669692993164 s +INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.1096959114074707 s +DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=253761194687533787300615710681600011697, time:1750767133.267918s req_ids:[8] +DEBUG 06-24 20:12:13 [manager.py:391] +ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:200.47783851623535ms total_cost_time:200.52266120910645ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7682 prompt_cache_len:5151 prompt_cache_ratio:0.6705285082009893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 +DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.10928630828857422 s +INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.11135578155517578 s +DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=320017400431626443251218785640864818234, time:1750767133.4771929s req_ids:[8] +DEBUG 06-24 20:12:13 [manager.py:391] +ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:380.59544563293457ms total_cost_time:380.64050674438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7683 prompt_cache_len:5151 prompt_cache_ratio:0.6704412338930106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 +DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.10880637168884277 s +INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.11016345024108887 s +DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=39548840540517503657260333740346636623, time:1750767133.860254s req_ids:[8] +DEBUG 06-24 20:12:13 [manager.py:391] +ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:202.54278182983398ms total_cost_time:202.58784294128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7684 prompt_cache_len:5151 prompt_cache_ratio:0.6703539823008849 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 +DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:14 [manager.py:224] router recive req id 8 cost time 0.10897946357727051 s +INFO 06-24 20:12:14 [manager.py:68] detokenization recv req id 8 cost time 0.11058378219604492 s +DEBUG 06-24 20:12:14 [manager.py:391] Prefill Batch: batch_id=218850475855003369037865176542166040708, time:1750767134.0789475s req_ids:[8] +DEBUG 06-24 20:12:14 [manager.py:391] +ERROR 06-24 20:12:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:213.79446983337402ms total_cost_time:213.8388156890869ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7685 prompt_cache_len:5151 prompt_cache_ratio:0.6702667534157449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:14 [manager.py:106] timer detokenize batch cost time 405.87592124938965 ms +INFO 06-24 20:12:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 +DEBUG 06-24 20:12:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:14 [batch.py:51] router release req id 8 +INFO 06-24 20:12:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:14 [manager.py:224] router recive req id 8 cost time 0.10557174682617188 s +INFO 06-24 20:12:14 [manager.py:68] detokenization recv req id 8 cost time 0.10760688781738281 s +DEBUG 06-24 20:12:14 [manager.py:391] Prefill Batch: batch_id=310767629775045048878038319543665394526, time:1750767134.699976s req_ids:[8] +DEBUG 06-24 20:12:14 [manager.py:391] +ERROR 06-24 20:12:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 first_token_cost:213.48023414611816ms total_cost_time:213.50765228271484ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7686 prompt_cache_len:5151 prompt_cache_ratio:0.6701795472287275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 +DEBUG 06-24 20:12:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:14 [manager.py:224] router recive req id 8 cost time 0.10688400268554688 s +INFO 06-24 20:12:14 [manager.py:68] detokenization recv req id 8 cost time 0.10906672477722168 s +DEBUG 06-24 20:12:14 [manager.py:391] Prefill Batch: batch_id=180654914250473417241057671004655026311, time:1750767134.9177299s req_ids:[8] +DEBUG 06-24 20:12:14 [manager.py:391] +ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 first_token_cost:211.99798583984375ms total_cost_time:212.04400062561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7687 prompt_cache_len:5151 prompt_cache_ratio:0.6700923637309744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 +DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10992050170898438 s +INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11266183853149414 s +DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=133785205864378210191389362876875571011, time:1750767135.1301816s req_ids:[8] +DEBUG 06-24 20:12:15 [manager.py:391] +ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:206.6192626953125ms total_cost_time:206.6652774810791ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7688 prompt_cache_len:5151 prompt_cache_ratio:0.6700052029136316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 +DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10812544822692871 s +INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11023354530334473 s +DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=261013022283967441306782793176483601438, time:1750767135.3546646s req_ids:[8] +DEBUG 06-24 20:12:15 [manager.py:391] +ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:216.59374237060547ms total_cost_time:216.63856506347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7689 prompt_cache_len:5151 prompt_cache_ratio:0.6699180647678502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 +DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10773611068725586 s +INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.1096184253692627 s +DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=246975148987580736987947544367019044143, time:1750767135.565465s req_ids:[8] +DEBUG 06-24 20:12:15 [manager.py:391] +ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:207.74054527282715ms total_cost_time:207.78703689575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7690 prompt_cache_len:5151 prompt_cache_ratio:0.6698309492847855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 +DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10917162895202637 s +INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11115694046020508 s +DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=230622756890826987753124950719996624279, time:1750767135.7781389s req_ids:[8] +DEBUG 06-24 20:12:15 [manager.py:391] +ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:206.06660842895508ms total_cost_time:206.11214637756348ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7691 prompt_cache_len:5151 prompt_cache_ratio:0.6697438564555974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 +DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10837030410766602 s +INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11024665832519531 s +DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=234589154091863519003143456498446367496, time:1750767135.9904277s req_ids:[8] +DEBUG 06-24 20:12:15 [manager.py:391] +ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:200.2401351928711ms total_cost_time:200.2854347229004ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7692 prompt_cache_len:5151 prompt_cache_ratio:0.6696567862714509 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 +DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:16 [manager.py:224] router recive req id 8 cost time 0.1098325252532959 s +INFO 06-24 20:12:16 [manager.py:68] detokenization recv req id 8 cost time 0.11195087432861328 s +DEBUG 06-24 20:12:16 [manager.py:391] Prefill Batch: batch_id=192520768049687979528146644289515731775, time:1750767136.2001874s req_ids:[8] +DEBUG 06-24 20:12:16 [manager.py:391] +ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:209.7034454345703ms total_cost_time:209.75041389465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7693 prompt_cache_len:5151 prompt_cache_ratio:0.6695697387235149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 +INFO 06-24 20:12:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:16 [manager.py:224] router recive req id 8 cost time 0.31085777282714844 s +INFO 06-24 20:12:16 [manager.py:68] detokenization recv req id 8 cost time 0.3128995895385742 s +DEBUG 06-24 20:12:16 [manager.py:391] Prefill Batch: batch_id=308741460652673925749285390957746583388, time:1750767136.6203775s req_ids:[8] +DEBUG 06-24 20:12:16 [manager.py:391] +ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:419.5363521575928ms total_cost_time:419.58045959472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7694 prompt_cache_len:5151 prompt_cache_ratio:0.6694827138029633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 +DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:16 [manager.py:224] router recive req id 8 cost time 0.10721468925476074 s +INFO 06-24 20:12:16 [manager.py:68] detokenization recv req id 8 cost time 0.10908961296081543 s +DEBUG 06-24 20:12:16 [manager.py:391] Prefill Batch: batch_id=264058196261211212583347829409656135085, time:1750767136.839376s req_ids:[8] +DEBUG 06-24 20:12:16 [manager.py:391] +ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:209.17272567749023ms total_cost_time:209.21850204467773ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7695 prompt_cache_len:5151 prompt_cache_ratio:0.6693957115009747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 +DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10735630989074707 s +INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.10927581787109375 s +DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=150606508140671605389677804750096182026, time:1750767137.0517712s req_ids:[8] +DEBUG 06-24 20:12:17 [manager.py:391] +ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:203.34935188293457ms total_cost_time:203.39250564575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7696 prompt_cache_len:5151 prompt_cache_ratio:0.6693087318087318 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 +DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10749530792236328 s +INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.10958266258239746 s +DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=268460407826695096637248993225957928846, time:1750767137.2606175s req_ids:[8] +DEBUG 06-24 20:12:17 [manager.py:391] +ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:205.9767246246338ms total_cost_time:206.01940155029297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7697 prompt_cache_len:5151 prompt_cache_ratio:0.6692217747174224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 +DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10762858390808105 s +INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.10958552360534668 s +DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=296825565490880410815270795500349032426, time:1750767137.4707358s req_ids:[8] +DEBUG 06-24 20:12:17 [manager.py:391] +DEBUG 06-24 20:12:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 32668.766 tokens/s +DEBUG 06-24 20:12:17 [stats.py:37] Avg prompt tokens throughput: 32660.357 tokens/s +DEBUG 06-24 20:12:17 [stats.py:37] Avg generate tokens throughput: 8.410 tokens/s +ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:205.07574081420898ms total_cost_time:205.11817932128906ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7698 prompt_cache_len:5151 prompt_cache_ratio:0.6691348402182385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 +DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10821342468261719 s +INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.11021924018859863 s +DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=252351336283897341091789067156162653979, time:1750767137.6836154s req_ids:[8] +DEBUG 06-24 20:12:17 [manager.py:391] +ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:204.44965362548828ms total_cost_time:204.49519157409668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7699 prompt_cache_len:5151 prompt_cache_ratio:0.6690479283023769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 +DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10996174812316895 s +INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.11191773414611816 s +DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=119558839409258222953942602133175770821, time:1750767137.8948174s req_ids:[8] +DEBUG 06-24 20:12:17 [manager.py:391] +ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:208.46891403198242ms total_cost_time:208.5118293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7700 prompt_cache_len:5151 prompt_cache_ratio:0.668961038961039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 +DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.10960507392883301 s +INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.11094236373901367 s +DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=261640368785005968656837102395284740508, time:1750767138.1096537s req_ids:[8] +DEBUG 06-24 20:12:18 [manager.py:391] +ERROR 06-24 20:12:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:210.07251739501953ms total_cost_time:210.11829376220703ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7701 prompt_cache_len:5151 prompt_cache_ratio:0.6688741721854304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 +DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.10744738578796387 s +INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.10941767692565918 s +DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=205171123453491851650481666414018619079, time:1750767138.3314912s req_ids:[8] +DEBUG 06-24 20:12:18 [manager.py:391] +ERROR 06-24 20:12:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 first_token_cost:215.61193466186523ms total_cost_time:215.65699577331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7702 prompt_cache_len:5151 prompt_cache_ratio:0.6687873279667619 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 +DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.10752558708190918 s +INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.1094503402709961 s +DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=132006334873363944205563863252642714645, time:1750767138.5454113s req_ids:[8] +DEBUG 06-24 20:12:18 [manager.py:391] +ERROR 06-24 20:12:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 first_token_cost:205.25312423706055ms total_cost_time:205.29913902282715ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7703 prompt_cache_len:5151 prompt_cache_ratio:0.6687005062962482 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 +DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.30987095832824707 s +INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.31196093559265137 s +DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=240661992237263133656300588294017518136, time:1750767138.9639163s req_ids:[8] +DEBUG 06-24 20:12:18 [manager.py:391] +ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 first_token_cost:420.123815536499ms total_cost_time:420.17626762390137ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:7704 prompt_cache_len:5151 prompt_cache_ratio:0.668613707165109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 +DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10863184928894043 s +INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.11055755615234375 s +DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=302495773197347062738671692802458817917, time:1750767139.179904s req_ids:[8] +DEBUG 06-24 20:12:19 [manager.py:391] +ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:205.29627799987793ms total_cost_time:205.34133911132812ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7705 prompt_cache_len:5151 prompt_cache_ratio:0.6685269305645685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 +DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10806751251220703 s +INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.10954713821411133 s +DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=12897941176936508498152477794098447621, time:1750767139.3916538s req_ids:[8] +DEBUG 06-24 20:12:19 [manager.py:391] +ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:168.67685317993164ms total_cost_time:168.71976852416992ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7706 prompt_cache_len:5151 prompt_cache_ratio:0.6684401764858552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 +DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.1074838638305664 s +INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.10933780670166016 s +DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=180589104954624473766497886043115660844, time:1750767139.5653625s req_ids:[8] +DEBUG 06-24 20:12:19 [manager.py:391] +ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:198.52781295776367ms total_cost_time:198.56977462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7707 prompt_cache_len:5151 prompt_cache_ratio:0.6683534449202024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 +DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10831499099731445 s +INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.1103978157043457 s +DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=230211948437807052932271922465470976024, time:1750767139.7691188s req_ids:[8] +DEBUG 06-24 20:12:19 [manager.py:391] +ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:201.15923881530762ms total_cost_time:201.20620727539062ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7708 prompt_cache_len:5151 prompt_cache_ratio:0.668266735858848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 +DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10763978958129883 s +INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.1094048023223877 s +DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=332211914892233867929005211017714547721, time:1750767139.9764647s req_ids:[8] +DEBUG 06-24 20:12:19 [manager.py:391] +ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:204.09536361694336ms total_cost_time:204.14090156555176ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7709 prompt_cache_len:5151 prompt_cache_ratio:0.6681800492930341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 +DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.10725879669189453 s +INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.10940909385681152 s +DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=149564056068108416224796820993696048419, time:1750767140.1865942s req_ids:[8] +DEBUG 06-24 20:12:20 [manager.py:391] +ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:207.3678970336914ms total_cost_time:207.4124813079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7710 prompt_cache_len:5151 prompt_cache_ratio:0.6680933852140077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 +DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.10846519470214844 s +INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.11063575744628906 s +DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=229798759804972537941191959631736045427, time:1750767140.3997397s req_ids:[8] +DEBUG 06-24 20:12:20 [manager.py:391] +DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:207.30018615722656ms total_cost_time:207.35979080200195ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7711 prompt_cache_len:5151 prompt_cache_ratio:0.6680067436130204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 +DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.1108713150024414 s +INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.11307740211486816 s +DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=241340085920983260504230117320264374974, time:1750767140.613256s req_ids:[8] +DEBUG 06-24 20:12:20 [manager.py:391] +ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:204.65588569641113ms total_cost_time:204.70094680786133ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7712 prompt_cache_len:5151 prompt_cache_ratio:0.6679201244813278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 +DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.10714578628540039 s +INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.10882282257080078 s +DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=248639011001771450836073963359765139428, time:1750767140.8201125s req_ids:[8] +DEBUG 06-24 20:12:20 [manager.py:391] +ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:338.34385871887207ms total_cost_time:338.38868141174316ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7713 prompt_cache_len:5151 prompt_cache_ratio:0.6678335278101906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 +DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10871028900146484 s +INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.11068439483642578 s +DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=144120124511206351885462145130861118921, time:1750767141.1623044s req_ids:[8] +DEBUG 06-24 20:12:21 [manager.py:391] +ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:195.56093215942383ms total_cost_time:195.59407234191895ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:7714 prompt_cache_len:5151 prompt_cache_ratio:0.6677469535908738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 +DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10694384574890137 s +INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.10892462730407715 s +DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=208840935093514335426674163763014891756, time:1750767141.366234s req_ids:[8] +DEBUG 06-24 20:12:21 [manager.py:391] +ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:201.84326171875ms total_cost_time:201.887845993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7715 prompt_cache_len:5151 prompt_cache_ratio:0.6676604018146468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 +DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10913610458374023 s +INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.11117959022521973 s +DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=285504034967834411243699953463678513157, time:1750767141.575837s req_ids:[8] +DEBUG 06-24 20:12:21 [manager.py:391] +ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:205.338716506958ms total_cost_time:205.3830623626709ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7716 prompt_cache_len:5151 prompt_cache_ratio:0.6675738724727839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 +DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:21 [batch.py:51] router release req id 8 +INFO 06-24 20:12:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10834789276123047 s +INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.11042308807373047 s +DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=174464156689809822244051483718707031920, time:1750767141.7871873s req_ids:[8] +DEBUG 06-24 20:12:21 [manager.py:391] +ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:206.1479091644287ms total_cost_time:206.1920166015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7717 prompt_cache_len:5151 prompt_cache_ratio:0.6674873655565634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 +DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.1105356216430664 s +INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.1125020980834961 s +DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=26686273271799114541198605393210678670, time:1750767141.9996161s req_ids:[8] +DEBUG 06-24 20:12:21 [manager.py:391] +ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:206.5446376800537ms total_cost_time:206.5901756286621ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7718 prompt_cache_len:5151 prompt_cache_ratio:0.6674008810572687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 +DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10787439346313477 s +INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.10995125770568848 s +DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=148557605442988544374942217686259573352, time:1750767142.2116847s req_ids:[8] +DEBUG 06-24 20:12:22 [manager.py:391] +ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:203.79018783569336ms total_cost_time:203.83405685424805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7719 prompt_cache_len:5151 prompt_cache_ratio:0.6673144189661874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 +DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10939669609069824 s +INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.11067676544189453 s +DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=126733585052604101675858451730984682268, time:1750767142.4187284s req_ids:[8] +DEBUG 06-24 20:12:22 [manager.py:391] +ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:204.7417163848877ms total_cost_time:204.7865390777588ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7720 prompt_cache_len:5151 prompt_cache_ratio:0.6672279792746114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 +DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10797595977783203 s +INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.10955095291137695 s +DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=86298599241888146922444121195539820547, time:1750767142.630999s req_ids:[8] +DEBUG 06-24 20:12:22 [manager.py:391] +ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:205.76953887939453ms total_cost_time:205.7936191558838ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:7721 prompt_cache_len:5151 prompt_cache_ratio:0.6671415619738376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 +DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10894560813903809 s +INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s +DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=9505275690836169138130013725301276836, time:1750767142.8449075s req_ids:[8] +DEBUG 06-24 20:12:22 [manager.py:391] +ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:206.2525749206543ms total_cost_time:206.2971591949463ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7722 prompt_cache_len:5151 prompt_cache_ratio:0.6670551670551671 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 +DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.1074216365814209 s +INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.10885047912597656 s +DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=259952194685564632073835239786928160486, time:1750767143.0609267s req_ids:[8] +DEBUG 06-24 20:12:23 [manager.py:391] +INFO 06-24 20:12:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:12:23 [statics_utils.py:24] mean first cost: 230.12276527711066 ms +INFO 06-24 20:12:23 [statics_utils.py:24] mean per token cost: 0.08743647966695665 ms +ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:385.7707977294922ms total_cost_time:385.8165740966797ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7723 prompt_cache_len:5151 prompt_cache_ratio:0.6669687945099054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 +DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.10878491401672363 s +INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.11073446273803711 s +DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=169273566874592448716949563324576011950, time:1750767143.4428437s req_ids:[8] +DEBUG 06-24 20:12:23 [manager.py:391] +ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:201.88164710998535ms total_cost_time:201.92527770996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7724 prompt_cache_len:5151 prompt_cache_ratio:0.666882444329363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 +DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.10835146903991699 s +INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.10967659950256348 s +DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=245600736928763126660960009319826389092, time:1750767143.652081s req_ids:[8] +DEBUG 06-24 20:12:23 [manager.py:391] +ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:204.73551750183105ms total_cost_time:204.78391647338867ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:7725 prompt_cache_len:5151 prompt_cache_ratio:0.6667961165048544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 +DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.10957837104797363 s +INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.11088252067565918 s +DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=79715703878086391527046643944288800918, time:1750767143.864175s req_ids:[8] +DEBUG 06-24 20:12:23 [manager.py:391] +ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:206.32624626159668ms total_cost_time:206.39467239379883ms,out_token_counter:1 mean_per_token_cost_time: 0.06842613220214844ms prompt_token_num:7726 prompt_cache_len:5151 prompt_cache_ratio:0.6667098110276987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 +DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10866737365722656 s +INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11005306243896484 s +DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=176965807144545710478674132240663513675, time:1750767144.076287s req_ids:[8] +DEBUG 06-24 20:12:24 [manager.py:391] +ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:205.83248138427734ms total_cost_time:205.87825775146484ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7727 prompt_cache_len:5151 prompt_cache_ratio:0.6666235278892196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 +DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s +INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.1110234260559082 s +DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=279049199565125134186941059577293749666, time:1750767144.287476s req_ids:[8] +DEBUG 06-24 20:12:24 [manager.py:391] +ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:204.16998863220215ms total_cost_time:204.21481132507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7728 prompt_cache_len:5151 prompt_cache_ratio:0.6665372670807453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 +DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s +INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11019587516784668 s +DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=143448438893372300916084685539371458777, time:1750767144.4993286s req_ids:[8] +DEBUG 06-24 20:12:24 [manager.py:391] +ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:206.5730094909668ms total_cost_time:206.6185474395752ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7729 prompt_cache_len:5151 prompt_cache_ratio:0.6664510285936085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 +DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:24 [batch.py:51] router release req id 8 +DEBUG 06-24 20:12:24 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:24 [manager.py:283] +DEBUG 06-24 20:12:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:24 [manager.py:284] +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.11134552955627441 s +INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11343932151794434 s +DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=88999850516548248954728590606272073656, time:1750767144.7137043s req_ids:[8] +DEBUG 06-24 20:12:24 [manager.py:391] +ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:209.00869369506836ms total_cost_time:209.05804634094238ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:7730 prompt_cache_len:5151 prompt_cache_ratio:0.6663648124191461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 +DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10801482200622559 s +INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11011385917663574 s +DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=4682599189205451547800957209215186178, time:1750767144.9265842s req_ids:[8] +DEBUG 06-24 20:12:24 [manager.py:391] +ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:205.91330528259277ms total_cost_time:205.97243309020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7731 prompt_cache_len:5151 prompt_cache_ratio:0.6662786185487001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 +DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.10938358306884766 s +INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.11140847206115723 s +DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=335529262386809074759956794830698865310, time:1750767145.1380873s req_ids:[8] +DEBUG 06-24 20:12:25 [manager.py:391] +ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:206.95090293884277ms total_cost_time:206.99620246887207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7732 prompt_cache_len:5151 prompt_cache_ratio:0.6661924469736161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 +DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.10702657699584961 s +INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.1083521842956543 s +DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=224084969392382344109697068137149680109, time:1750767145.364397s req_ids:[8] +DEBUG 06-24 20:12:25 [manager.py:391] +ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:220.31092643737793ms total_cost_time:220.3371524810791ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7733 prompt_cache_len:5151 prompt_cache_ratio:0.6661062976852451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 +DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.3071630001068115 s +INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.3090696334838867 s +DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=313963277010549572786154352066598814281, time:1750767145.7864816s req_ids:[8] +DEBUG 06-24 20:12:25 [manager.py:391] +ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:373.1262683868408ms total_cost_time:373.1725215911865ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7734 prompt_cache_len:5151 prompt_cache_ratio:0.6660201706749418 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 +DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.1074821949005127 s +INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.1094813346862793 s +DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=215035803195593453922325933986124224781, time:1750767145.9510503s req_ids:[8] +DEBUG 06-24 20:12:25 [manager.py:391] +ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:192.97003746032715ms total_cost_time:193.01652908325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7735 prompt_cache_len:5151 prompt_cache_ratio:0.6659340659340659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 +DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10812568664550781 s +INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.11020541191101074 s +DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=224545698279231772613087379879108321542, time:1750767146.154076s req_ids:[8] +DEBUG 06-24 20:12:26 [manager.py:391] +ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:203.97043228149414ms total_cost_time:204.01644706726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7736 prompt_cache_len:5151 prompt_cache_ratio:0.6658479834539814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 +DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10738110542297363 s +INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s +DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=11158265035190960162037378457957103384, time:1750767146.3628578s req_ids:[8] +DEBUG 06-24 20:12:26 [manager.py:391] +ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:204.79083061218262ms total_cost_time:204.83636856079102ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7737 prompt_cache_len:5151 prompt_cache_ratio:0.6657619232260567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 +DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10793471336364746 s +INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.10927271842956543 s +DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=7269796909582166636603784686954752189, time:1750767146.575159s req_ids:[8] +DEBUG 06-24 20:12:26 [manager.py:391] +ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:204.76150512695312ms total_cost_time:204.80680465698242ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7738 prompt_cache_len:5151 prompt_cache_ratio:0.6656758852416645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 +DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10741353034973145 s +INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s +DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=316266665537376092956464913607339418477, time:1750767146.7871783s req_ids:[8] +DEBUG 06-24 20:12:26 [manager.py:391] +ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:203.2008171081543ms total_cost_time:203.2465934753418ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7739 prompt_cache_len:5151 prompt_cache_ratio:0.6655898694921825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 +DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10771036148071289 s +INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.10985040664672852 s +DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=126411687016370857589073125108218174947, time:1750767146.9985607s req_ids:[8] +DEBUG 06-24 20:12:26 [manager.py:391] +ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:208.42885971069336ms total_cost_time:208.47511291503906ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7740 prompt_cache_len:5151 prompt_cache_ratio:0.6655038759689923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 +DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:27 [manager.py:224] router recive req id 8 cost time 0.1076359748840332 s +INFO 06-24 20:12:27 [manager.py:68] detokenization recv req id 8 cost time 0.10892248153686523 s +DEBUG 06-24 20:12:27 [manager.py:391] Prefill Batch: batch_id=17402659547059232416830831147864305262, time:1750767147.2127025s req_ids:[8] +DEBUG 06-24 20:12:27 [manager.py:391] +ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:210.89673042297363ms total_cost_time:210.94083786010742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7741 prompt_cache_len:5151 prompt_cache_ratio:0.6654179046634802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 +DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:27 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s +INFO 06-24 20:12:27 [manager.py:68] detokenization recv req id 8 cost time 0.10982751846313477 s +DEBUG 06-24 20:12:27 [manager.py:391] Prefill Batch: batch_id=22603639528273711943121190585010557166, time:1750767147.4257321s req_ids:[8] +DEBUG 06-24 20:12:27 [manager.py:391] +DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:12:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 33851.921 tokens/s +DEBUG 06-24 20:12:27 [stats.py:37] Avg prompt tokens throughput: 33843.054 tokens/s +DEBUG 06-24 20:12:27 [stats.py:37] Avg generate tokens throughput: 8.867 tokens/s +INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:203.8273811340332ms total_cost_time:203.8719654083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7742 prompt_cache_len:5151 prompt_cache_ratio:0.665331955567037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 +DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:27 [manager.py:224] router recive req id 8 cost time 0.10746526718139648 s +INFO 06-24 20:12:27 [manager.py:68] detokenization recv req id 8 cost time 0.1094365119934082 s +DEBUG 06-24 20:12:27 [manager.py:391] Prefill Batch: batch_id=133782690042444740467600107048354996762, time:1750767147.635496s req_ids:[8] +DEBUG 06-24 20:12:27 [manager.py:391] +ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:204.47468757629395ms total_cost_time:204.51903343200684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7743 prompt_cache_len:5151 prompt_cache_ratio:0.6652460286710578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 +DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.3091590404510498 s +INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.31121110916137695 s +DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=130245217265561447649736767689436308715, time:1750767148.0482833s req_ids:[8] +DEBUG 06-24 20:12:28 [manager.py:391] +ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:412.13274002075195ms total_cost_time:412.17708587646484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7744 prompt_cache_len:5151 prompt_cache_ratio:0.6651601239669421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 +DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.10729217529296875 s +INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.10968756675720215 s +DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=157193302478056662613369267669173880795, time:1750767148.2702112s req_ids:[8] +DEBUG 06-24 20:12:28 [manager.py:391] +ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:214.6470546722412ms total_cost_time:214.6921157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7745 prompt_cache_len:5151 prompt_cache_ratio:0.6650742414460943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 +DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.10764551162719727 s +INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.10962462425231934 s +DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=1704467605426256655837637487786126806, time:1750767148.4832547s req_ids:[8] +DEBUG 06-24 20:12:28 [manager.py:391] +ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:205.3995132446289ms total_cost_time:205.4445743560791ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7746 prompt_cache_len:5151 prompt_cache_ratio:0.6649883810999225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 +DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.1076817512512207 s +INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.10967016220092773 s +DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=6434365157893586288999586432007946816, time:1750767148.6931903s req_ids:[8] +DEBUG 06-24 20:12:28 [manager.py:391] +ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:216.2320613861084ms total_cost_time:216.2761688232422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7747 prompt_cache_len:5151 prompt_cache_ratio:0.66490254291984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 +DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.10848093032836914 s +INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.11082887649536133 s +DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=253371907630901544703125539572568389707, time:1750767148.921384s req_ids:[8] +DEBUG 06-24 20:12:28 [manager.py:391] +DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:200.87790489196777ms total_cost_time:200.92153549194336ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7748 prompt_cache_len:5151 prompt_cache_ratio:0.6648167268972638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 +DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10836338996887207 s +INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.1104283332824707 s +DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=294760328714458150904326144520642097709, time:1750767149.1180046s req_ids:[8] +DEBUG 06-24 20:12:29 [manager.py:391] +ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:200.00171661376953ms total_cost_time:200.02079010009766ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:7749 prompt_cache_len:5151 prompt_cache_ratio:0.664730933023616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 +DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10668683052062988 s +INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.10857534408569336 s +DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=182197754750730165326018836625523681415, time:1750767149.3273027s req_ids:[8] +DEBUG 06-24 20:12:29 [manager.py:391] +ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:207.200288772583ms total_cost_time:207.26323127746582ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:7750 prompt_cache_len:5151 prompt_cache_ratio:0.6646451612903226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 +DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10785531997680664 s +INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.1098642349243164 s +DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=325301622988792319447016016894741091713, time:1750767149.5395696s req_ids:[8] +DEBUG 06-24 20:12:29 [manager.py:391] +ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:206.7737579345703ms total_cost_time:206.8178653717041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7751 prompt_cache_len:5151 prompt_cache_ratio:0.6645594116888144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 +DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10776567459106445 s +INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.10900306701660156 s +DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=90143514973807723855892648219674968306, time:1750767149.7529294s req_ids:[8] +DEBUG 06-24 20:12:29 [manager.py:391] +ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:201.24292373657227ms total_cost_time:201.28726959228516ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7752 prompt_cache_len:5151 prompt_cache_ratio:0.6644736842105263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 +DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10864615440368652 s +INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005449295043945 s +DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=27666137300216555166214798127999450547, time:1750767149.9574769s req_ids:[8] +DEBUG 06-24 20:12:29 [manager.py:391] +ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:366.65892601013184ms total_cost_time:366.7027950286865ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7753 prompt_cache_len:5151 prompt_cache_ratio:0.6643879788468979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 +DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10871458053588867 s +INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.11086153984069824 s +DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=240265696609253545021720102462037834059, time:1750767150.325432s req_ids:[8] +DEBUG 06-24 20:12:30 [manager.py:391] +ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:195.845365524292ms total_cost_time:195.88875770568848ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7754 prompt_cache_len:5151 prompt_cache_ratio:0.6643022955893733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 +DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10811996459960938 s +INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.10926270484924316 s +DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=72977870460239377973795779508969227193, time:1750767150.5287335s req_ids:[8] +DEBUG 06-24 20:12:30 [manager.py:391] +ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:201.33256912231445ms total_cost_time:201.37739181518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7755 prompt_cache_len:5151 prompt_cache_ratio:0.6642166344294004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 +DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10946059226989746 s +INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.11147427558898926 s +DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=199341405449302213243985967462397805978, time:1750767150.7394466s req_ids:[8] +DEBUG 06-24 20:12:30 [manager.py:391] +ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:208.88400077819824ms total_cost_time:208.92786979675293ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7756 prompt_cache_len:5151 prompt_cache_ratio:0.6641309953584322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 +DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10812044143676758 s +INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.11000251770019531 s +DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=187695372359525935045798531110759600929, time:1750767150.9533205s req_ids:[8] +DEBUG 06-24 20:12:30 [manager.py:391] +ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:204.31184768676758ms total_cost_time:204.36429977416992ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:7757 prompt_cache_len:5151 prompt_cache_ratio:0.6640453783679258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 +DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10888242721557617 s +INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11018800735473633 s +DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=135950551838856282707149986398196395175, time:1750767151.1627944s req_ids:[8] +DEBUG 06-24 20:12:31 [manager.py:391] +ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.44195175170898ms total_cost_time:205.48725128173828ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7758 prompt_cache_len:5151 prompt_cache_ratio:0.6639597834493426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 +DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.1094508171081543 s +INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11143136024475098 s +DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=181811441888191325594103853691638711493, time:1750767151.3747268s req_ids:[8] +DEBUG 06-24 20:12:31 [manager.py:391] +ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.73997497558594ms total_cost_time:205.78527450561523ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7759 prompt_cache_len:5151 prompt_cache_ratio:0.6638742105941488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 +DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10869550704956055 s +INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s +DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=53156924217010073971313872542626871691, time:1750767151.5875807s req_ids:[8] +DEBUG 06-24 20:12:31 [manager.py:391] +ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.97505569458008ms total_cost_time:206.01940155029297ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7760 prompt_cache_len:5151 prompt_cache_ratio:0.6637886597938144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 +DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10748052597045898 s +INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.10946774482727051 s +DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=22357898136628181695422432697323554644, time:1750767151.7974072s req_ids:[8] +DEBUG 06-24 20:12:31 [manager.py:391] +ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:202.7263641357422ms total_cost_time:202.76999473571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7761 prompt_cache_len:5151 prompt_cache_ratio:0.6637031310398145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 +DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10904574394226074 s +INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11095952987670898 s +DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=257084032899589677067335827102224164149, time:1750767152.0070477s req_ids:[8] +DEBUG 06-24 20:12:32 [manager.py:391] +ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.49678802490234ms total_cost_time:205.53922653198242ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7762 prompt_cache_len:5151 prompt_cache_ratio:0.6636176243236279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 +DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:32 [manager.py:224] router recive req id 8 cost time 0.1072990894317627 s +INFO 06-24 20:12:32 [manager.py:68] detokenization recv req id 8 cost time 0.10857272148132324 s +DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=180878602208322106040086715830079363267, time:1750767152.219672s req_ids:[8] +DEBUG 06-24 20:12:32 [manager.py:391] +ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:378.05914878845215ms total_cost_time:378.10468673706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7763 prompt_cache_len:5151 prompt_cache_ratio:0.6635321396367384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 +DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:32 [manager.py:224] router recive req id 8 cost time 0.10700464248657227 s +INFO 06-24 20:12:32 [manager.py:68] detokenization recv req id 8 cost time 0.1089162826538086 s +DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=210890120535656640545125953461980127135, time:1750767152.6039314s req_ids:[8] +DEBUG 06-24 20:12:32 [manager.py:391] +ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:205.43551445007324ms total_cost_time:205.47962188720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7764 prompt_cache_len:5151 prompt_cache_ratio:0.6634466769706336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 +DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:32 [manager.py:224] router recive req id 8 cost time 0.10841846466064453 s +INFO 06-24 20:12:32 [manager.py:68] detokenization recv req id 8 cost time 0.11043071746826172 s +DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=80634286891850296675645363892271664036, time:1750767152.8122385s req_ids:[8] +DEBUG 06-24 20:12:32 [manager.py:391] +ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:203.65071296691895ms total_cost_time:203.69505882263184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7765 prompt_cache_len:5151 prompt_cache_ratio:0.6633612363168062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 +DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10745573043823242 s +INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.10939455032348633 s +DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=153560186904683195565756766824542719587, time:1750767153.0251248s req_ids:[8] +DEBUG 06-24 20:12:33 [manager.py:391] +ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:207.66735076904297ms total_cost_time:207.71193504333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7766 prompt_cache_len:5151 prompt_cache_ratio:0.6632758176667525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 +DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10809016227722168 s +INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.110076904296875 s +DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=270881466284648982921449910782393816994, time:1750767153.2375104s req_ids:[8] +DEBUG 06-24 20:12:33 [manager.py:391] +ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:206.99238777160645ms total_cost_time:207.01289176940918ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:7767 prompt_cache_len:5151 prompt_cache_ratio:0.6631904210119738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 +DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10776233673095703 s +INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.10962939262390137 s +DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=193418318937954715154867499288586150488, time:1750767153.451543s req_ids:[8] +DEBUG 06-24 20:12:33 [manager.py:391] +ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:207.96895027160645ms total_cost_time:208.01305770874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7768 prompt_cache_len:5151 prompt_cache_ratio:0.6631050463439753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 +DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10694694519042969 s +INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.10904693603515625 s +DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=117157342473209478339769115234884940205, time:1750767153.6646492s req_ids:[8] +DEBUG 06-24 20:12:33 [manager.py:391] +ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:205.11174201965332ms total_cost_time:205.15775680541992ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7769 prompt_cache_len:5151 prompt_cache_ratio:0.6630196936542669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 +DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10735154151916504 s +INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.1093301773071289 s +DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=77828536841264767715912798409615444473, time:1750767153.8733015s req_ids:[8] +DEBUG 06-24 20:12:33 [manager.py:391] +ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:203.67980003356934ms total_cost_time:203.72366905212402ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7770 prompt_cache_len:5151 prompt_cache_ratio:0.6629343629343629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 +DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.10811924934387207 s +INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.10978579521179199 s +DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=201837767333182358225249047165835573782, time:1750767154.0818121s req_ids:[8] +DEBUG 06-24 20:12:34 [manager.py:391] +ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:200.04606246948242ms total_cost_time:200.09112358093262ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7771 prompt_cache_len:5151 prompt_cache_ratio:0.6628490541757818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 +DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.10912919044494629 s +INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.11119580268859863 s +DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=281489527916733601197115499436543554149, time:1750767154.2872586s req_ids:[8] +DEBUG 06-24 20:12:34 [manager.py:391] +ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:211.0421657562256ms total_cost_time:211.08770370483398ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7772 prompt_cache_len:5151 prompt_cache_ratio:0.6627637673700463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 +DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.1073155403137207 s +INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.1092538833618164 s +DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=176443349013428303898407700054557740915, time:1750767154.5122185s req_ids:[8] +DEBUG 06-24 20:12:34 [manager.py:391] +DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:214.52617645263672ms total_cost_time:214.57195281982422ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7773 prompt_cache_len:5151 prompt_cache_ratio:0.6626785025086839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 +DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.3106222152709961 s +INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.3123915195465088 s +DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=129921337774109002810425281359679226152, time:1750767154.9278512s req_ids:[8] +DEBUG 06-24 20:12:34 [manager.py:391] +ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:357.1920394897461ms total_cost_time:357.23328590393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:7774 prompt_cache_len:5151 prompt_cache_ratio:0.6625932595832261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 +DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.10799431800842285 s +DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=260307923561819129592192910237041806741, time:1750767155.07599s req_ids:[8] +DEBUG 06-24 20:12:35 [manager.py:391] +INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10993480682373047 s +ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:180.46259880065918ms total_cost_time:180.50670623779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7775 prompt_cache_len:5151 prompt_cache_ratio:0.662508038585209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 +DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.1080474853515625 s +INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.11003637313842773 s +DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=275895040678148417252126426520469427253, time:1750767155.2777753s req_ids:[8] +DEBUG 06-24 20:12:35 [manager.py:391] +ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:205.92951774597168ms total_cost_time:205.97410202026367ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7776 prompt_cache_len:5151 prompt_cache_ratio:0.6624228395061729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 +DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.1070864200592041 s +INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10905838012695312 s +DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=303300826189958568204005249402960769560, time:1750767155.4842093s req_ids:[8] +DEBUG 06-24 20:12:35 [manager.py:391] +ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:206.96210861206055ms total_cost_time:207.01909065246582ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:7777 prompt_cache_len:5151 prompt_cache_ratio:0.6623376623376623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 +DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.10723567008972168 s +INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10923385620117188 s +DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=241088646162826871141598090224176662276, time:1750767155.698701s req_ids:[8] +DEBUG 06-24 20:12:35 [manager.py:391] +ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:206.26235008239746ms total_cost_time:206.30502700805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7778 prompt_cache_len:5151 prompt_cache_ratio:0.6622525070712265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 +DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.10809946060180664 s +INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10971474647521973 s +DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=220943618113280144075856033904954626563, time:1750767155.9082694s req_ids:[8] +DEBUG 06-24 20:12:35 [manager.py:391] +DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:204.92887496948242ms total_cost_time:204.95152473449707ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:7779 prompt_cache_len:5151 prompt_cache_ratio:0.6621673736984188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 +DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10387206077575684 s +INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10563850402832031 s +DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=153072631190134629818925698619498487620, time:1750767156.1211104s req_ids:[8] +DEBUG 06-24 20:12:36 [manager.py:391] +ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:210.0512981414795ms total_cost_time:210.09588241577148ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7780 prompt_cache_len:5151 prompt_cache_ratio:0.6620822622107969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 +DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10711550712585449 s +INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10898876190185547 s +DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=56054287968830783638118173843652018438, time:1750767156.3335786s req_ids:[8] +DEBUG 06-24 20:12:36 [manager.py:391] +ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:208.11080932617188ms total_cost_time:208.15443992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7781 prompt_cache_len:5151 prompt_cache_ratio:0.6619971725999229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 +DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10720109939575195 s +INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10901713371276855 s +DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=28647881830278230452660578503466640222, time:1750767156.546369s req_ids:[8] +DEBUG 06-24 20:12:36 [manager.py:391] +ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:202.35037803649902ms total_cost_time:202.3932933807373ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7782 prompt_cache_len:5151 prompt_cache_ratio:0.6619121048573632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 +DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10718464851379395 s +INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10903191566467285 s +DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=276846434074140928183005385687770531311, time:1750767156.754601s req_ids:[8] +DEBUG 06-24 20:12:36 [manager.py:391] +ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:206.92873001098633ms total_cost_time:206.9721221923828ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7783 prompt_cache_len:5151 prompt_cache_ratio:0.6618270589746884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 +DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.20819950103759766 s +INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.21006560325622559 s +DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=254734296519192551992241417985805698524, time:1750767157.06225s req_ids:[8] +DEBUG 06-24 20:12:37 [manager.py:391] +ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:257.28774070739746ms total_cost_time:257.34663009643555ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7784 prompt_cache_len:5151 prompt_cache_ratio:0.6617420349434738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 +DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10843873023986816 s +INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.11045074462890625 s +DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=97058257592607103553118273396713083541, time:1750767157.224329s req_ids:[8] +DEBUG 06-24 20:12:37 [manager.py:391] +ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:188.84706497192383ms total_cost_time:188.89260292053223ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7785 prompt_cache_len:5151 prompt_cache_ratio:0.6616570327552986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 +DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10940051078796387 s +INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.11054277420043945 s +DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=256131329317586280008538418931330994118, time:1750767157.4238315s req_ids:[8] +DEBUG 06-24 20:12:37 [manager.py:391] +ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:169.87323760986328ms total_cost_time:169.91615295410156ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7786 prompt_cache_len:5151 prompt_cache_ratio:0.6615720524017468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 +DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10767412185668945 s +INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.10871481895446777 s +DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=65455097099429867480787763502281785780, time:1750767157.5987215s req_ids:[8] +DEBUG 06-24 20:12:37 [manager.py:391] +DEBUG 06-24 20:12:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 34639.451 tokens/s +DEBUG 06-24 20:12:37 [stats.py:37] Avg prompt tokens throughput: 34630.631 tokens/s +DEBUG 06-24 20:12:37 [stats.py:37] Avg generate tokens throughput: 8.821 tokens/s +ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:162.6131534576416ms total_cost_time:162.65606880187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7787 prompt_cache_len:5151 prompt_cache_ratio:0.6614870938744061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 +DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10680270195007324 s +INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.10870742797851562 s +DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=238117005491855037305178138852115182568, time:1750767157.7652826s req_ids:[8] +DEBUG 06-24 20:12:37 [manager.py:391] +ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:190.52934646606445ms total_cost_time:190.57440757751465ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7788 prompt_cache_len:5151 prompt_cache_ratio:0.661402157164869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 +DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.11024713516235352 s +INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.11217045783996582 s +DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=121074299000091681149389648773347520305, time:1750767157.961781s req_ids:[8] +DEBUG 06-24 20:12:37 [manager.py:391] +ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:199.86653327941895ms total_cost_time:199.9490261077881ms,out_token_counter:1 mean_per_token_cost_time: 0.08249282836914062ms prompt_token_num:7789 prompt_cache_len:5151 prompt_cache_ratio:0.6613172422647323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 +DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.10735106468200684 s +INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.10967612266540527 s +DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=226875499815262637808329613381709186994, time:1750767158.171505s req_ids:[8] +DEBUG 06-24 20:12:38 [manager.py:391] +ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:204.27656173706055ms total_cost_time:204.32209968566895ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7790 prompt_cache_len:5151 prompt_cache_ratio:0.6612323491655969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 +DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.1064910888671875 s +INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.10843586921691895 s +DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=248913798969780591702474178697412042669, time:1750767158.3914535s req_ids:[8] +DEBUG 06-24 20:12:38 [manager.py:391] +ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:208.2197666168213ms total_cost_time:208.27984809875488ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7791 prompt_cache_len:5151 prompt_cache_ratio:0.6611474778590681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 +DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.11180996894836426 s +INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.11304974555969238 s +DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=151660816767079489641917233573865364065, time:1750767158.608534s req_ids:[8] +DEBUG 06-24 20:12:38 [manager.py:391] +DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:210.5121612548828ms total_cost_time:210.69598197937012ms,out_token_counter:1 mean_per_token_cost_time: 0.1838207244873047ms prompt_token_num:7792 prompt_cache_len:5151 prompt_cache_ratio:0.6610626283367557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 +DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.10995340347290039 s +INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.11208152770996094 s +DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=140696447984941706432247929926933686536, time:1750767158.8200881s req_ids:[8] +DEBUG 06-24 20:12:38 [manager.py:391] +ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:358.59203338623047ms total_cost_time:358.6394786834717ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:7793 prompt_cache_len:5151 prompt_cache_ratio:0.6609778005902733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 +DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.11174178123474121 s +INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11391520500183105 s +DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=290166508850084323246739154706895201051, time:1750767159.1830564s req_ids:[8] +DEBUG 06-24 20:12:39 [manager.py:391] +ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:199.75876808166504ms total_cost_time:199.8155117034912ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7794 prompt_cache_len:5151 prompt_cache_ratio:0.6608929946112394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 +DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.10836052894592285 s +INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11015701293945312 s +DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=310206031373922942876128471285687560017, time:1750767159.3933094s req_ids:[8] +DEBUG 06-24 20:12:39 [manager.py:391] +ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:204.6835422515869ms total_cost_time:204.72955703735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7795 prompt_cache_len:5151 prompt_cache_ratio:0.6608082103912765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 +DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.1085212230682373 s +INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11055874824523926 s +DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=284318134349137126464202645957340367228, time:1750767159.6054559s req_ids:[8] +DEBUG 06-24 20:12:39 [manager.py:391] +ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:208.99081230163574ms total_cost_time:209.03682708740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7796 prompt_cache_len:5151 prompt_cache_ratio:0.6607234479220113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 +DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.11321520805358887 s +INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11528158187866211 s +DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=163468458279842602933693085200637565145, time:1750767159.8204372s req_ids:[8] +DEBUG 06-24 20:12:39 [manager.py:391] +ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:209.7485065460205ms total_cost_time:209.85150337219238ms,out_token_counter:1 mean_per_token_cost_time: 0.102996826171875ms prompt_token_num:7797 prompt_cache_len:5151 prompt_cache_ratio:0.660638707195075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 +DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10813760757446289 s +INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.1100921630859375 s +DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=329266234108695424827554144819913739540, time:1750767160.0452194s req_ids:[8] +DEBUG 06-24 20:12:40 [manager.py:391] +ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:216.89844131469727ms total_cost_time:216.94111824035645ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7798 prompt_cache_len:5151 prompt_cache_ratio:0.6605539882021031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 +DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.1082448959350586 s +INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.11010146141052246 s +DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=277837382740778638043234555736824370474, time:1750767160.2572002s req_ids:[8] +DEBUG 06-24 20:12:40 [manager.py:391] +ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:204.56409454345703ms total_cost_time:204.6067714691162ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7799 prompt_cache_len:5151 prompt_cache_ratio:0.6604692909347353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 +DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10883879661560059 s +INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.11078977584838867 s +DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=288971633494187793332790798573218423484, time:1750767160.4674702s req_ids:[8] +DEBUG 06-24 20:12:40 [manager.py:391] +ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:206.72345161437988ms total_cost_time:206.76827430725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7800 prompt_cache_len:5151 prompt_cache_ratio:0.6603846153846153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 +DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10765361785888672 s +INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.10959291458129883 s +DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=59754223598116099589598867257513819109, time:1750767160.6790953s req_ids:[8] +DEBUG 06-24 20:12:40 [manager.py:391] +ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:205.5494785308838ms total_cost_time:205.59310913085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7801 prompt_cache_len:5151 prompt_cache_ratio:0.6602999615433919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 +DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10789132118225098 s +INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.10994839668273926 s +DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=313314583547577911770182655310939576242, time:1750767160.8909373s req_ids:[8] +DEBUG 06-24 20:12:40 [manager.py:391] +ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:206.07304573059082ms total_cost_time:206.1166763305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7802 prompt_cache_len:5151 prompt_cache_ratio:0.6602153294027172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 +DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10886621475219727 s +INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.1108088493347168 s +DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=287262326593647541029907940415650754223, time:1750767161.103584s req_ids:[8] +DEBUG 06-24 20:12:41 [manager.py:391] +ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:374.6631145477295ms total_cost_time:374.7081756591797ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7803 prompt_cache_len:5151 prompt_cache_ratio:0.6601307189542484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 +DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10940361022949219 s +INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.11131906509399414 s +DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=8444240540389227913382922186749930798, time:1750767161.4795408s req_ids:[8] +DEBUG 06-24 20:12:41 [manager.py:391] +ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.0924301147461ms total_cost_time:205.1386833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7804 prompt_cache_len:5151 prompt_cache_ratio:0.6600461301896463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 +DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10970306396484375 s +INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.11165761947631836 s +DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=322950922185280246242499609836644840532, time:1750767161.692342s req_ids:[8] +DEBUG 06-24 20:12:41 [manager.py:391] +ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.8258056640625ms total_cost_time:205.8694362640381ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7805 prompt_cache_len:5151 prompt_cache_ratio:0.6599615631005765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 +DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10847926139831543 s +INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.1104745864868164 s +DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=166950637527419435414020971497767797773, time:1750767161.9040914s req_ids:[8] +DEBUG 06-24 20:12:41 [manager.py:391] +ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.75571060180664ms total_cost_time:205.79910278320312ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7806 prompt_cache_len:5151 prompt_cache_ratio:0.6598770176787087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 +DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s +INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.11052465438842773 s +DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=87715912668220206645377143317363891508, time:1750767162.117333s req_ids:[8] +DEBUG 06-24 20:12:42 [manager.py:391] +ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.91998100280762ms total_cost_time:205.963134765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7807 prompt_cache_len:5151 prompt_cache_ratio:0.6597924939157167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 +DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10818910598754883 s +INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.11009931564331055 s +DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=206859655588565935639958019697132375743, time:1750767162.3277104s req_ids:[8] +DEBUG 06-24 20:12:42 [manager.py:391] +ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:203.4900188446045ms total_cost_time:203.53388786315918ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7808 prompt_cache_len:5151 prompt_cache_ratio:0.6597079918032787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 +DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10830044746398926 s +INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s +DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=105027466097580820553409443519945041224, time:1750767162.5361772s req_ids:[8] +DEBUG 06-24 20:12:42 [manager.py:391] +ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:206.60758018493652ms total_cost_time:206.65264129638672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7809 prompt_cache_len:5151 prompt_cache_ratio:0.6596235113330772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 +DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10750699043273926 s +INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.10945916175842285 s +DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=338869147424300712385214900071881679254, time:1750767162.7489462s req_ids:[8] +DEBUG 06-24 20:12:42 [manager.py:391] +ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:208.63914489746094ms total_cost_time:208.665132522583ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:7810 prompt_cache_len:5151 prompt_cache_ratio:0.659539052496799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 +DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10767674446105957 s +INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.10961031913757324 s +DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=273643189204603867902547357672135946779, time:1750767162.9625251s req_ids:[8] +DEBUG 06-24 20:12:42 [manager.py:391] +ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:209.04016494750977ms total_cost_time:209.08522605895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7811 prompt_cache_len:5151 prompt_cache_ratio:0.6594546152861349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 +DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:43 [manager.py:224] router recive req id 8 cost time 0.10741043090820312 s +INFO 06-24 20:12:43 [manager.py:68] detokenization recv req id 8 cost time 0.1094810962677002 s +DEBUG 06-24 20:12:43 [manager.py:391] Prefill Batch: batch_id=209662032974470144078574535756495997810, time:1750767163.184626s req_ids:[8] +DEBUG 06-24 20:12:43 [manager.py:391] +ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:216.8440818786621ms total_cost_time:216.8865203857422ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7812 prompt_cache_len:5151 prompt_cache_ratio:0.6593701996927803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 +DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:43 [manager.py:224] router recive req id 8 cost time 0.10881900787353516 s +INFO 06-24 20:12:43 [manager.py:68] detokenization recv req id 8 cost time 0.11075806617736816 s +DEBUG 06-24 20:12:43 [manager.py:391] Prefill Batch: batch_id=66348501409928737007563581828109310192, time:1750767163.3994532s req_ids:[8] +DEBUG 06-24 20:12:43 [manager.py:391] +ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:204.49209213256836ms total_cost_time:204.53453063964844ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7813 prompt_cache_len:5151 prompt_cache_ratio:0.6592858057084346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 +DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:43 [manager.py:224] router recive req id 8 cost time 0.31018972396850586 s +INFO 06-24 20:12:43 [manager.py:68] detokenization recv req id 8 cost time 0.3122715950012207 s +DEBUG 06-24 20:12:43 [manager.py:391] Prefill Batch: batch_id=282802671165673485803220431081463154781, time:1750767163.8082213s req_ids:[8] +DEBUG 06-24 20:12:43 [manager.py:391] +ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:410.7203483581543ms total_cost_time:410.7632637023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7814 prompt_cache_len:5151 prompt_cache_ratio:0.6592014333248016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 +DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10756707191467285 s +INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.11001849174499512 s +DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=307054873825381039363732680820361925079, time:1750767164.026393s req_ids:[8] +DEBUG 06-24 20:12:44 [manager.py:391] +ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:208.6312770843506ms total_cost_time:208.67443084716797ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7815 prompt_cache_len:5151 prompt_cache_ratio:0.6591170825335892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 +DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10797810554504395 s +INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.10997533798217773 s +DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=112390849237437208587327163039240949921, time:1750767164.239071s req_ids:[8] +DEBUG 06-24 20:12:44 [manager.py:391] +ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:207.7314853668213ms total_cost_time:207.77535438537598ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7816 prompt_cache_len:5151 prompt_cache_ratio:0.6590327533265097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 +DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10856461524963379 s +INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.11053609848022461 s +DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=113759178430491661709473551108915496947, time:1750767164.4567106s req_ids:[8] +DEBUG 06-24 20:12:44 [manager.py:391] +ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:211.82727813720703ms total_cost_time:211.87210083007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7817 prompt_cache_len:5151 prompt_cache_ratio:0.6589484456952795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 +DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10897397994995117 s +INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.1109311580657959 s +DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=57724224046778320720279463542096146490, time:1750767164.6691864s req_ids:[8] +DEBUG 06-24 20:12:44 [manager.py:391] +ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:205.4297924041748ms total_cost_time:205.4731845855713ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7818 prompt_cache_len:5151 prompt_cache_ratio:0.6588641596316194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 +DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10769510269165039 s +INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.10958504676818848 s +DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=292588218726670146515795331089167792488, time:1750767164.8796887s req_ids:[8] +DEBUG 06-24 20:12:44 [manager.py:391] +ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:201.68447494506836ms total_cost_time:201.72882080078125ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7819 prompt_cache_len:5151 prompt_cache_ratio:0.6587798951272541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 +DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10802650451660156 s +INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.10992264747619629 s +DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=286169459701384622266023073823860286096, time:1750767165.0856876s req_ids:[8] +DEBUG 06-24 20:12:45 [manager.py:391] +ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:201.53212547302246ms total_cost_time:201.57551765441895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7820 prompt_cache_len:5151 prompt_cache_ratio:0.658695652173913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 +DEBUG 06-24 20:12:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10865211486816406 s +INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.11058354377746582 s +DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=235034714627007687674623075359729493511, time:1750767165.2935016s req_ids:[8] +DEBUG 06-24 20:12:45 [manager.py:391] +ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:204.60987091064453ms total_cost_time:204.65373992919922ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7821 prompt_cache_len:5151 prompt_cache_ratio:0.6586114307633295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 +DEBUG 06-24 20:12:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10772347450256348 s +INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.10961556434631348 s +DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=82528607743229659664222798962670511548, time:1750767165.5035062s req_ids:[8] +DEBUG 06-24 20:12:45 [manager.py:391] +ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:204.85591888427734ms total_cost_time:204.8795223236084ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7822 prompt_cache_len:5151 prompt_cache_ratio:0.6585272308872411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 +DEBUG 06-24 20:12:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10699963569641113 s +INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.10897159576416016 s +DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=244572937814260211114096187091869256825, time:1750767165.7132423s req_ids:[8] +DEBUG 06-24 20:12:45 [manager.py:391] +DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:207.88908004760742ms total_cost_time:207.9336643218994ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7823 prompt_cache_len:5151 prompt_cache_ratio:0.6584430525373898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 +DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.31109118461608887 s +INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.3131904602050781 s +DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=124631973272479657648092345425973111333, time:1750767166.1239002s req_ids:[8] +DEBUG 06-24 20:12:46 [manager.py:391] +ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:406.83746337890625ms total_cost_time:406.9020748138428ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:7824 prompt_cache_len:5151 prompt_cache_ratio:0.6583588957055214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 +DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.10747361183166504 s +INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10948896408081055 s +DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=229035754067486776153710022692966777221, time:1750767166.3412209s req_ids:[8] +DEBUG 06-24 20:12:46 [manager.py:391] +ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:206.0229778289795ms total_cost_time:206.0678005218506ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7825 prompt_cache_len:5151 prompt_cache_ratio:0.6582747603833866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 +DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.1079871654510498 s +INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999488830566406 s +DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=75116731492428493444384759793893627504, time:1750767166.55168s req_ids:[8] +DEBUG 06-24 20:12:46 [manager.py:391] +ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:206.61234855651855ms total_cost_time:206.65574073791504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7826 prompt_cache_len:5151 prompt_cache_ratio:0.6581906465627396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 +DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.10775208473205566 s +INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10971760749816895 s +DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=24430523302828009429234383318060190626, time:1750767166.7773473s req_ids:[8] +DEBUG 06-24 20:12:46 [manager.py:391] +ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:223.97589683532715ms total_cost_time:224.02167320251465ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7827 prompt_cache_len:5151 prompt_cache_ratio:0.6581065542353393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 +DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.10783267021179199 s +INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10982656478881836 s +DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=202067431516456296234557740695017272739, time:1750767166.9950624s req_ids:[8] +DEBUG 06-24 20:12:46 [manager.py:391] +ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:206.45928382873535ms total_cost_time:206.50124549865723ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7828 prompt_cache_len:5151 prompt_cache_ratio:0.6580224833929484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 +DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10797858238220215 s +INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.1098945140838623 s +DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=173437858391750328019739884941327596300, time:1750767167.2053876s req_ids:[8] +DEBUG 06-24 20:12:47 [manager.py:391] +DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:205.63745498657227ms total_cost_time:205.68156242370605ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7829 prompt_cache_len:5151 prompt_cache_ratio:0.6579384340273343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 +DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10883021354675293 s +INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.11079955101013184 s +DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=74453388191925478653278384864177402429, time:1750767167.4185927s req_ids:[8] +DEBUG 06-24 20:12:47 [manager.py:391] +ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:204.40340042114258ms total_cost_time:204.44846153259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7830 prompt_cache_len:5151 prompt_cache_ratio:0.6578544061302682 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 +DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10787391662597656 s +INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.1099708080291748 s +DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=282155727067769280782993866955231938843, time:1750767167.628151s req_ids:[8] +DEBUG 06-24 20:12:47 [manager.py:391] +DEBUG 06-24 20:12:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 34268.414 tokens/s +DEBUG 06-24 20:12:47 [stats.py:37] Avg prompt tokens throughput: 34259.640 tokens/s +DEBUG 06-24 20:12:47 [stats.py:37] Avg generate tokens throughput: 8.774 tokens/s +ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:206.35747909545898ms total_cost_time:206.40087127685547ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7831 prompt_cache_len:5151 prompt_cache_ratio:0.6577703996935257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 +DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10757184028625488 s +INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.10948538780212402 s +DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=108920066960515860144407616421605717055, time:1750767167.8410118s req_ids:[8] +DEBUG 06-24 20:12:47 [manager.py:391] +ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:207.8542709350586ms total_cost_time:207.89813995361328ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7832 prompt_cache_len:5151 prompt_cache_ratio:0.6576864147088867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 +DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.10874629020690918 s +INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.1107170581817627 s +DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=236193475925226025747992817951678770063, time:1750767168.0548775s req_ids:[8] +DEBUG 06-24 20:12:48 [manager.py:391] +ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:358.74032974243164ms total_cost_time:358.7837219238281ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7833 prompt_cache_len:5151 prompt_cache_ratio:0.6576024511681348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 +DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.10766434669494629 s +INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.10966897010803223 s +DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=6837067223728868161548059738235814118, time:1750767168.4143562s req_ids:[8] +DEBUG 06-24 20:12:48 [manager.py:391] +ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:202.64267921447754ms total_cost_time:202.68607139587402ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7834 prompt_cache_len:5151 prompt_cache_ratio:0.6575185090630584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 +DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.10733699798583984 s +INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.10928058624267578 s +DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=173510149407034626369929831193475074888, time:1750767168.6263971s req_ids:[8] +DEBUG 06-24 20:12:48 [manager.py:391] +ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:205.04474639892578ms total_cost_time:205.06572723388672ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:7835 prompt_cache_len:5151 prompt_cache_ratio:0.6574345883854499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 +DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.106842041015625 s +INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.10872197151184082 s +DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=221223644396528069925247365640292506895, time:1750767168.8385878s req_ids:[8] +DEBUG 06-24 20:12:48 [manager.py:391] +ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:207.32474327087402ms total_cost_time:207.3671817779541ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7836 prompt_cache_len:5151 prompt_cache_ratio:0.6573506891271057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 +DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10775184631347656 s +INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10968565940856934 s +DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=281517890659331691620816104545894738764, time:1750767169.0551896s req_ids:[8] +DEBUG 06-24 20:12:49 [manager.py:391] +ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:212.385892868042ms total_cost_time:212.43023872375488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7837 prompt_cache_len:5151 prompt_cache_ratio:0.6572668112798264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 +DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.1076512336730957 s +INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10953330993652344 s +DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=268326555053036279353775213955998023840, time:1750767169.270256s req_ids:[8] +DEBUG 06-24 20:12:49 [manager.py:391] +ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:209.21063423156738ms total_cost_time:209.25474166870117ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7838 prompt_cache_len:5151 prompt_cache_ratio:0.6571829548354172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 +DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10802793502807617 s +INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10988783836364746 s +DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=307489675214198564511685809218087725720, time:1750767169.4838462s req_ids:[8] +DEBUG 06-24 20:12:49 [manager.py:391] +ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:207.52382278442383ms total_cost_time:207.5479030609131ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:7839 prompt_cache_len:5151 prompt_cache_ratio:0.6570991197856869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 +DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10446023941040039 s +INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10647201538085938 s +DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=231708289772467728640701324307162813532, time:1750767169.6993258s req_ids:[8] +DEBUG 06-24 20:12:49 [manager.py:391] +ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:210.81280708312988ms total_cost_time:210.83426475524902ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:7840 prompt_cache_len:5151 prompt_cache_ratio:0.657015306122449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 +DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10384535789489746 s +INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10575532913208008 s +DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=141116724313608378497178101585232724247, time:1750767169.923279s req_ids:[8] +DEBUG 06-24 20:12:49 [manager.py:391] +ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:220.87478637695312ms total_cost_time:220.89695930480957ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7841 prompt_cache_len:5151 prompt_cache_ratio:0.6569315138375207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 +DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.1054677963256836 s +INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.10742735862731934 s +DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=303587160006366534194467963279072552093, time:1750767170.1389878s req_ids:[8] +DEBUG 06-24 20:12:50 [manager.py:391] +ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:209.57040786743164ms total_cost_time:209.5935344696045ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:7842 prompt_cache_len:5151 prompt_cache_ratio:0.6568477429227237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 +DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.10389113426208496 s +INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.10590410232543945 s +DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=24512486689610865578662831877832408314, time:1750767170.3536935s req_ids:[8] +DEBUG 06-24 20:12:50 [manager.py:391] +ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:419.89946365356445ms total_cost_time:420.0477600097656ms,out_token_counter:1 mean_per_token_cost_time: 0.14829635620117188ms prompt_token_num:7843 prompt_cache_len:5151 prompt_cache_ratio:0.656763993369884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 +DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.10954093933105469 s +INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.11141633987426758 s +DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=40115888345967496188149387683151479576, time:1750767170.7707872s req_ids:[8] +DEBUG 06-24 20:12:50 [manager.py:391] +ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:185.6365203857422ms total_cost_time:185.6846809387207ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:7844 prompt_cache_len:5151 prompt_cache_ratio:0.6566802651708312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 +DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.10816168785095215 s +INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.10990571975708008 s +DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=197401546882747500383506495433550272837, time:1750767170.9653738s req_ids:[8] +DEBUG 06-24 20:12:50 [manager.py:391] +ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:199.86772537231445ms total_cost_time:199.89323616027832ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:7845 prompt_cache_len:5151 prompt_cache_ratio:0.6565965583173996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 +DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.10799360275268555 s +INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.10997724533081055 s +DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=307534822013909809222983270193302572045, time:1750767171.1752682s req_ids:[8] +DEBUG 06-24 20:12:51 [manager.py:391] +ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:205.25574684143066ms total_cost_time:205.29890060424805ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7846 prompt_cache_len:5151 prompt_cache_ratio:0.6565128728014274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 +DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.10863518714904785 s +INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.11071467399597168 s +DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=5329549067393894531597531817611664621, time:1750767171.3861141s req_ids:[8] +DEBUG 06-24 20:12:51 [manager.py:391] +ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:206.06040954589844ms total_cost_time:206.10332489013672ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7847 prompt_cache_len:5151 prompt_cache_ratio:0.6564292086147573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 +DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.11042332649230957 s +INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.11253833770751953 s +DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=307273961558421150940248929403324173681, time:1750767171.6003299s req_ids:[8] +DEBUG 06-24 20:12:51 [manager.py:391] +ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:207.83734321594238ms total_cost_time:207.88121223449707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7848 prompt_cache_len:5151 prompt_cache_ratio:0.6563455657492355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 +DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.10757637023925781 s +INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.10957503318786621 s +DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=237644735317629934630430100903887691055, time:1750767171.816964s req_ids:[8] +DEBUG 06-24 20:12:51 [manager.py:391] +ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:212.48126029968262ms total_cost_time:212.5253677368164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7849 prompt_cache_len:5151 prompt_cache_ratio:0.6562619441967129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 +DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.10747790336608887 s +INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s +DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=315041943837192311843458098095749415180, time:1750767172.0288498s req_ids:[8] +DEBUG 06-24 20:12:52 [manager.py:391] +ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:200.76680183410645ms total_cost_time:200.78420639038086ms,out_token_counter:1 mean_per_token_cost_time: 0.017404556274414062ms prompt_token_num:7850 prompt_cache_len:5151 prompt_cache_ratio:0.6561783439490446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 +DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.1056528091430664 s +INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.10761499404907227 s +DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=253688449457827194157943166102316284723, time:1750767172.2339876s req_ids:[8] +DEBUG 06-24 20:12:52 [manager.py:391] +ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:197.22747802734375ms total_cost_time:197.27182388305664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7851 prompt_cache_len:5151 prompt_cache_ratio:0.6560947649980894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 +DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.11025881767272949 s +INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.11274528503417969 s +DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=12431557209936470488714697809467793092, time:1750767172.43621s req_ids:[8] +DEBUG 06-24 20:12:52 [manager.py:391] +ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:193.8316822052002ms total_cost_time:193.8765048980713ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7852 prompt_cache_len:5151 prompt_cache_ratio:0.6560112073357106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 +DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.11105155944824219 s +INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.1139066219329834 s +DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=163246012947028245846152416365491824755, time:1750767172.635632s req_ids:[8] +DEBUG 06-24 20:12:52 [manager.py:391] +ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:194.3669319152832ms total_cost_time:194.4105625152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7853 prompt_cache_len:5151 prompt_cache_ratio:0.6559276709537756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 +DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.3107764720916748 s +INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.31225156784057617 s +DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=154965861668476664084580937826507589001, time:1750767173.0613384s req_ids:[8] +DEBUG 06-24 20:12:53 [manager.py:391] +ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:430.57847023010254ms total_cost_time:430.6020736694336ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7854 prompt_cache_len:5151 prompt_cache_ratio:0.6558441558441559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 +DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:53 [batch.py:51] router release req id 8 +INFO 06-24 20:12:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:12:53 [statics_utils.py:24] mean first cost: 229.8583200950235 ms +INFO 06-24 20:12:53 [statics_utils.py:24] mean per token cost: 0.08583945283048107 ms +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.1072380542755127 s +INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.10808849334716797 s +DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=296597469887509482248910859973923037228, time:1750767173.2782252s req_ids:[8] +DEBUG 06-24 20:12:53 [manager.py:391] +ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:172.61576652526855ms total_cost_time:172.65987396240234ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7855 prompt_cache_len:5151 prompt_cache_ratio:0.655760661998727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 +DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.11081051826477051 s +INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.1128232479095459 s +DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=268361294092545847191864485264169355821, time:1750767173.4565158s req_ids:[8] +DEBUG 06-24 20:12:53 [manager.py:391] +ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:188.3225440979004ms total_cost_time:188.36593627929688ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7856 prompt_cache_len:5151 prompt_cache_ratio:0.6556771894093686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 +DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.10772824287414551 s +INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.10971498489379883 s +DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=251681310256606377179874524905202762770, time:1750767173.6421583s req_ids:[8] +DEBUG 06-24 20:12:53 [manager.py:391] +ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:204.22792434692383ms total_cost_time:204.2853832244873ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:7857 prompt_cache_len:5151 prompt_cache_ratio:0.6555937380679648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 +DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.1049950122833252 s +INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.10697484016418457 s +DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=124473054105582977757357013000252202510, time:1750767173.8729613s req_ids:[8] +DEBUG 06-24 20:12:53 [manager.py:391] +ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:230.40080070495605ms total_cost_time:230.44800758361816ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7858 prompt_cache_len:5151 prompt_cache_ratio:0.6555103079664036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 +DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.10741066932678223 s +INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10998845100402832 s +DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=126360516433953115765849857296199212361, time:1750767174.0914202s req_ids:[8] +DEBUG 06-24 20:12:54 [manager.py:391] +ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:216.02845191955566ms total_cost_time:216.050386428833ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:7859 prompt_cache_len:5151 prompt_cache_ratio:0.6554268990965771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 +DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.10408782958984375 s +INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10613632202148438 s +DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=22547531148127120559754087384139046939, time:1750767174.310107s req_ids:[8] +DEBUG 06-24 20:12:54 [manager.py:391] +DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:207.38506317138672ms total_cost_time:207.40723609924316ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7860 prompt_cache_len:5151 prompt_cache_ratio:0.6553435114503817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 +DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.10633254051208496 s +INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10843443870544434 s +DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=263023415094047271043780797761014411231, time:1750767174.5220256s req_ids:[8] +DEBUG 06-24 20:12:54 [manager.py:391] +ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:206.82358741760254ms total_cost_time:206.84552192687988ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:7861 prompt_cache_len:5151 prompt_cache_ratio:0.6552601450197176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 +DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.1042168140411377 s +INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10651326179504395 s +DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=300604934885119637634836726330644569553, time:1750767174.7398539s req_ids:[8] +DEBUG 06-24 20:12:54 [manager.py:391] +ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:217.3776626586914ms total_cost_time:217.39912033081055ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:7862 prompt_cache_len:5151 prompt_cache_ratio:0.6551767997964895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 +DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.1043400764465332 s +INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10589241981506348 s +DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=190357288578506274695988281917095770923, time:1750767174.9545193s req_ids:[8] +DEBUG 06-24 20:12:54 [manager.py:391] +ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:204.93769645690918ms total_cost_time:204.9582004547119ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:7863 prompt_cache_len:5151 prompt_cache_ratio:0.6550934757726059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 +DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:55 [manager.py:224] router recive req id 8 cost time 0.3059842586517334 s +INFO 06-24 20:12:55 [manager.py:68] detokenization recv req id 8 cost time 0.30852556228637695 s +DEBUG 06-24 20:12:55 [manager.py:391] Prefill Batch: batch_id=230141403210811406881505948612778582404, time:1750767175.3794127s req_ids:[8] +DEBUG 06-24 20:12:55 [manager.py:391] +ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:428.67207527160645ms total_cost_time:428.6983013153076ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7864 prompt_cache_len:5151 prompt_cache_ratio:0.6550101729399797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 +DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:55 [manager.py:224] router recive req id 8 cost time 0.10584163665771484 s +INFO 06-24 20:12:55 [manager.py:68] detokenization recv req id 8 cost time 0.10785794258117676 s +DEBUG 06-24 20:12:55 [manager.py:391] Prefill Batch: batch_id=264321377950579036539958183155249432759, time:1750767175.5973854s req_ids:[8] +DEBUG 06-24 20:12:55 [manager.py:391] +ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:192.63839721679688ms total_cost_time:192.69180297851562ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:7865 prompt_cache_len:5151 prompt_cache_ratio:0.6549268912905276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 +DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:55 [manager.py:224] router recive req id 8 cost time 0.10584592819213867 s +INFO 06-24 20:12:55 [manager.py:68] detokenization recv req id 8 cost time 0.10764598846435547 s +DEBUG 06-24 20:12:55 [manager.py:391] Prefill Batch: batch_id=217045488101720811049419811924290649885, time:1750767175.816658s req_ids:[8] +DEBUG 06-24 20:12:55 [manager.py:391] +DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:205.68275451660156ms total_cost_time:205.74569702148438ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:7866 prompt_cache_len:5151 prompt_cache_ratio:0.6548436308161708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 +DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.11109757423400879 s +INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.11252498626708984 s +DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=298134573649110912622343440084621034180, time:1750767176.0103226s req_ids:[8] +DEBUG 06-24 20:12:56 [manager.py:391] +ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:202.03065872192383ms total_cost_time:202.07595825195312ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7867 prompt_cache_len:5151 prompt_cache_ratio:0.6547603915088344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 +DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.1062161922454834 s +INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.10811352729797363 s +DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=184728238813309456036900917964433343305, time:1750767176.219719s req_ids:[8] +DEBUG 06-24 20:12:56 [manager.py:391] +ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:201.86352729797363ms total_cost_time:201.91001892089844ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7868 prompt_cache_len:5151 prompt_cache_ratio:0.6546771733604474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 +DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.10471677780151367 s +INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.10607218742370605 s +DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=111950901616908714130900100285260309411, time:1750767176.4368408s req_ids:[8] +DEBUG 06-24 20:12:56 [manager.py:391] +ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:170.7768440246582ms total_cost_time:170.79639434814453ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:7869 prompt_cache_len:5151 prompt_cache_ratio:0.6545939763629431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 +DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.10854768753051758 s +INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.11049771308898926 s +DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=78415624879051548732735224168072897535, time:1750767176.6045241s req_ids:[8] +DEBUG 06-24 20:12:56 [manager.py:391] +ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:199.60999488830566ms total_cost_time:199.65553283691406ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7870 prompt_cache_len:5151 prompt_cache_ratio:0.6545108005082592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 +DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.10727787017822266 s +INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.10885024070739746 s +DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=233414516269008725324805499745081472802, time:1750767176.8186522s req_ids:[8] +DEBUG 06-24 20:12:56 [manager.py:391] +ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:205.0018310546875ms total_cost_time:205.02448081970215ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:7871 prompt_cache_len:5151 prompt_cache_ratio:0.6544276457883369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 +DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10424470901489258 s +INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10634493827819824 s +DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=247953068239166334887973339496606245823, time:1750767177.025059s req_ids:[8] +DEBUG 06-24 20:12:57 [manager.py:391] +ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:212.68701553344727ms total_cost_time:212.7082347869873ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:7872 prompt_cache_len:5151 prompt_cache_ratio:0.6543445121951219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 +DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10459303855895996 s +INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10650992393493652 s +DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=302238872280038165254578060229914404602, time:1750767177.2418559s req_ids:[8] +DEBUG 06-24 20:12:57 [manager.py:391] +ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:394.3946361541748ms total_cost_time:394.41680908203125ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7873 prompt_cache_len:5151 prompt_cache_ratio:0.654261399720564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 +DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10344314575195312 s +INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10525107383728027 s +DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=286503473111675095607754861358278990518, time:1750767177.635418s req_ids:[8] +DEBUG 06-24 20:12:57 [manager.py:391] +DEBUG 06-24 20:12:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 33752.300 tokens/s +DEBUG 06-24 20:12:57 [stats.py:37] Avg prompt tokens throughput: 33743.706 tokens/s +DEBUG 06-24 20:12:57 [stats.py:37] Avg generate tokens throughput: 8.594 tokens/s +ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:199.71442222595215ms total_cost_time:199.7373104095459ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:7874 prompt_cache_len:5151 prompt_cache_ratio:0.6541783083566167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 +DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10605216026306152 s +INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10798931121826172 s +DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=127078506917606954820349593645339128213, time:1750767177.8405166s req_ids:[8] +DEBUG 06-24 20:12:57 [manager.py:391] +ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:205.74307441711426ms total_cost_time:205.77001571655273ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7875 prompt_cache_len:5151 prompt_cache_ratio:0.6540952380952381 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 +DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10694503784179688 s +INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10920977592468262 s +DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=233992111268628424688566563356844023088, time:1750767178.052152s req_ids:[8] +DEBUG 06-24 20:12:58 [manager.py:391] +ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:217.02051162719727ms total_cost_time:217.04649925231934ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:7876 prompt_cache_len:5151 prompt_cache_ratio:0.6540121889283901 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 +DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10575175285339355 s +INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10756134986877441 s +DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=251061990466641925608856123154577490800, time:1750767178.271426s req_ids:[8] +DEBUG 06-24 20:12:58 [manager.py:391] +ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:205.11794090270996ms total_cost_time:205.14535903930664ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7877 prompt_cache_len:5151 prompt_cache_ratio:0.6539291608480386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 +DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10613703727722168 s +DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=51097521555061455467387534951474416660, time:1750767178.46793s req_ids:[8] +DEBUG 06-24 20:12:58 [manager.py:391] +INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10809063911437988 s +ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:181.502103805542ms total_cost_time:181.54668807983398ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7878 prompt_cache_len:5151 prompt_cache_ratio:0.6538461538461539 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 +DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10791659355163574 s +INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10947346687316895 s +DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=146551847265213755202561051179619212524, time:1750767178.6773195s req_ids:[8] +DEBUG 06-24 20:12:58 [manager.py:391] +ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:197.2217559814453ms total_cost_time:197.2670555114746ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7879 prompt_cache_len:5151 prompt_cache_ratio:0.65376316791471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 +DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10718035697937012 s +INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10902762413024902 s +DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=89115862987678142568656268587083587720, time:1750767178.8684704s req_ids:[8] +DEBUG 06-24 20:12:58 [manager.py:391] +ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:201.29919052124023ms total_cost_time:201.34282112121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7880 prompt_cache_len:5151 prompt_cache_ratio:0.6536802030456853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 +DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.1101839542388916 s +INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.11231207847595215 s +DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=162986498573728941537294242118658574543, time:1750767179.0723193s req_ids:[8] +DEBUG 06-24 20:12:59 [manager.py:391] +ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:158.28657150268555ms total_cost_time:158.32996368408203ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7881 prompt_cache_len:5151 prompt_cache_ratio:0.653597259231062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 +DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.1068410873413086 s +INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.10806107521057129 s +DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=185412020946522520068467839363895827249, time:1750767179.2357886s req_ids:[8] +DEBUG 06-24 20:12:59 [manager.py:391] +ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:189.55397605895996ms total_cost_time:189.57757949829102ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7882 prompt_cache_len:5151 prompt_cache_ratio:0.6535143364628266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 +DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.10394978523254395 s +INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.1055293083190918 s +DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=142059734378155772776807005061767237422, time:1750767179.432318s req_ids:[8] +DEBUG 06-24 20:12:59 [manager.py:391] +ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:374.5393753051758ms total_cost_time:374.58276748657227ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7883 prompt_cache_len:5151 prompt_cache_ratio:0.6534314347329697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 +DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.10754704475402832 s +INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.10938000679016113 s +DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=123713240672608311838517953530146017920, time:1750767179.8118942s req_ids:[8] +DEBUG 06-24 20:12:59 [manager.py:391] +ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:188.19832801818848ms total_cost_time:188.24338912963867ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7884 prompt_cache_len:5151 prompt_cache_ratio:0.6533485540334856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 +DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:12:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.10844039916992188 s +INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.11043953895568848 s +DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=67427140879333357418330329786273903796, time:1750767180.00242s req_ids:[8] +DEBUG 06-24 20:13:00 [manager.py:391] +ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:188.52758407592773ms total_cost_time:188.57288360595703ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7885 prompt_cache_len:5151 prompt_cache_ratio:0.6532656943563728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 +DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.10796689987182617 s +INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.10975480079650879 s +DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=2900130359649208526240365784995170837, time:1750767180.209241s req_ids:[8] +DEBUG 06-24 20:13:00 [manager.py:391] +ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:200.38962364196777ms total_cost_time:200.43253898620605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7886 prompt_cache_len:5151 prompt_cache_ratio:0.6531828556936343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 +DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.10701131820678711 s +INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.10850691795349121 s +DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=46424425140334738714158305292346791267, time:1750767180.402475s req_ids:[8] +DEBUG 06-24 20:13:00 [manager.py:391] +ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:186.9971752166748ms total_cost_time:187.0410442352295ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7887 prompt_cache_len:5151 prompt_cache_ratio:0.6531000380372766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 +DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.10892844200134277 s +INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.1109616756439209 s +DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=302313526594270025396732375261683113736, time:1750767180.594776s req_ids:[8] +DEBUG 06-24 20:13:00 [manager.py:391] +ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:185.89496612548828ms total_cost_time:185.9586238861084ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:7888 prompt_cache_len:5151 prompt_cache_ratio:0.6530172413793104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 +DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.1083219051361084 s +INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.11023306846618652 s +DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=152033509839383854772700794251605781894, time:1750767180.790537s req_ids:[8] +DEBUG 06-24 20:13:00 [manager.py:391] +ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:198.99439811706543ms total_cost_time:199.03993606567383ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7889 prompt_cache_len:5151 prompt_cache_ratio:0.6529344657117505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 +DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.1084446907043457 s +INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.11056041717529297 s +DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=253908231683318308956088896497234494859, time:1750767180.997681s req_ids:[8] +DEBUG 06-24 20:13:00 [manager.py:391] +ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:214.94698524475098ms total_cost_time:215.00849723815918ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:7890 prompt_cache_len:5151 prompt_cache_ratio:0.652851711026616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 +DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:01 [manager.py:224] router recive req id 8 cost time 0.10896611213684082 s +INFO 06-24 20:13:01 [manager.py:68] detokenization recv req id 8 cost time 0.11059403419494629 s +DEBUG 06-24 20:13:01 [manager.py:391] Prefill Batch: batch_id=182488562083900766787788617840230840474, time:1750767181.212677s req_ids:[8] +DEBUG 06-24 20:13:01 [manager.py:391] +ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:196.36821746826172ms total_cost_time:196.4733600616455ms,out_token_counter:1 mean_per_token_cost_time: 0.10514259338378906ms prompt_token_num:7891 prompt_cache_len:5151 prompt_cache_ratio:0.6527689773159295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 +DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:01 [manager.py:224] router recive req id 8 cost time 0.10830569267272949 s +INFO 06-24 20:13:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101984977722168 s +DEBUG 06-24 20:13:01 [manager.py:391] Prefill Batch: batch_id=80303069115567512710135163929348897385, time:1750767181.4186814s req_ids:[8] +DEBUG 06-24 20:13:01 [manager.py:391] +ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:197.70026206970215ms total_cost_time:197.74293899536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7892 prompt_cache_len:5151 prompt_cache_ratio:0.6526862645717182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 +DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:01 [manager.py:224] router recive req id 8 cost time 0.10770034790039062 s +INFO 06-24 20:13:01 [manager.py:68] detokenization recv req id 8 cost time 0.10953450202941895 s +DEBUG 06-24 20:13:01 [manager.py:391] Prefill Batch: batch_id=90066832745162117730290751837561427420, time:1750767181.62345s req_ids:[8] +DEBUG 06-24 20:13:01 [manager.py:391] +ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:209.19036865234375ms total_cost_time:209.23566818237305ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7893 prompt_cache_len:5151 prompt_cache_ratio:0.6526035727860129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 +DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.31040382385253906 s +INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.3123970031738281 s +DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=39778133448029726069810616260304593093, time:1750767182.0531948s req_ids:[8] +DEBUG 06-24 20:13:02 [manager.py:391] +ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:422.976016998291ms total_cost_time:423.0198860168457ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7894 prompt_cache_len:5151 prompt_cache_ratio:0.6525209019508488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 +DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10775566101074219 s +INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.1098167896270752 s +DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=44698441075349810791249508441735903748, time:1750767182.2686117s req_ids:[8] +DEBUG 06-24 20:13:02 [manager.py:391] +ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:204.42795753479004ms total_cost_time:204.47421073913574ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7895 prompt_cache_len:5151 prompt_cache_ratio:0.6524382520582648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 +DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10716891288757324 s +INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.1090552806854248 s +DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=177655514827792966030129165712151640473, time:1750767182.4973726s req_ids:[8] +DEBUG 06-24 20:13:02 [manager.py:391] +ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:208.7094783782959ms total_cost_time:208.7538242340088ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7896 prompt_cache_len:5151 prompt_cache_ratio:0.6523556231003039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 +DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10757899284362793 s +INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.10967493057250977 s +DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=62159712570324668989569367859642289889, time:1750767182.6907918s req_ids:[8] +DEBUG 06-24 20:13:02 [manager.py:391] +ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:200.35243034362793ms total_cost_time:200.3955841064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7897 prompt_cache_len:5151 prompt_cache_ratio:0.6522730150690136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 +DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10933208465576172 s +INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.11121678352355957 s +DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=108348845364740487634179052781422269795, time:1750767182.8985214s req_ids:[8] +DEBUG 06-24 20:13:02 [manager.py:391] +ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:203.49597930908203ms total_cost_time:203.54056358337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7898 prompt_cache_len:5151 prompt_cache_ratio:0.6521904279564447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 +DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10881543159484863 s +INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.11079144477844238 s +DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=6605860236587641384885165777443432748, time:1750767183.1096194s req_ids:[8] +DEBUG 06-24 20:13:03 [manager.py:391] +ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:201.32732391357422ms total_cost_time:201.3697624206543ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7899 prompt_cache_len:5151 prompt_cache_ratio:0.6521078617546525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 +DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10791945457458496 s +INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.10991621017456055 s +DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=289821486839042290796382636304548662413, time:1750767183.3158336s req_ids:[8] +DEBUG 06-24 20:13:03 [manager.py:391] +ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:206.1479091644287ms total_cost_time:206.1939239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7900 prompt_cache_len:5151 prompt_cache_ratio:0.6520253164556962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 +DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10901093482971191 s +INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.11100316047668457 s +DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=275711844514365880620415851361824461795, time:1750767183.5266416s req_ids:[8] +DEBUG 06-24 20:13:03 [manager.py:391] +ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:204.11133766174316ms total_cost_time:204.15568351745605ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7901 prompt_cache_len:5151 prompt_cache_ratio:0.6519427920516391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 +DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10836672782897949 s +INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.1102902889251709 s +DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=123694756116836612678339531542087918998, time:1750767183.7368822s req_ids:[8] +DEBUG 06-24 20:13:03 [manager.py:391] +ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:197.0236301422119ms total_cost_time:197.0670223236084ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7902 prompt_cache_len:5151 prompt_cache_ratio:0.6518602885345482 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 +DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s +INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.10955214500427246 s +DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=123775485109388652976857619359068068796, time:1750767183.9393003s req_ids:[8] +DEBUG 06-24 20:13:03 [manager.py:391] +ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:203.72915267944336ms total_cost_time:203.78541946411133ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7903 prompt_cache_len:5151 prompt_cache_ratio:0.651777805896495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 +DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.3103783130645752 s +INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.3123209476470947 s +DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=335894166381298754951731075756251215310, time:1750767184.3479855s req_ids:[8] +DEBUG 06-24 20:13:04 [manager.py:391] +ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:374.713659286499ms total_cost_time:374.7575283050537ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7904 prompt_cache_len:5151 prompt_cache_ratio:0.6516953441295547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 +DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.10678696632385254 s +INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.1085667610168457 s +DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=237800872036648745128847456497235724796, time:1750767184.5361698s req_ids:[8] +DEBUG 06-24 20:13:04 [manager.py:391] +ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:173.8452911376953ms total_cost_time:173.89750480651855ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:7905 prompt_cache_len:5151 prompt_cache_ratio:0.6516129032258065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 +DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.10713076591491699 s +INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.10918450355529785 s +DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=39255547682258642672138592418649323969, time:1750767184.7130153s req_ids:[8] +DEBUG 06-24 20:13:04 [manager.py:391] +ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:179.38971519470215ms total_cost_time:179.43620681762695ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7906 prompt_cache_len:5151 prompt_cache_ratio:0.6515304831773336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 +DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.10713577270507812 s +INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.1089634895324707 s +DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=173481927397527039614188545363467387475, time:1750767184.902614s req_ids:[8] +DEBUG 06-24 20:13:04 [manager.py:391] +ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:185.92357635498047ms total_cost_time:185.96696853637695ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7907 prompt_cache_len:5151 prompt_cache_ratio:0.6514480839762236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 +DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10714173316955566 s +INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.10898113250732422 s +DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=190578080940050250896555868563495249006, time:1750767185.0922885s req_ids:[8] +DEBUG 06-24 20:13:05 [manager.py:391] +ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:182.1463108062744ms total_cost_time:182.1916103363037ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7908 prompt_cache_len:5151 prompt_cache_ratio:0.6513657056145675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 +DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10744380950927734 s +INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.10920333862304688 s +DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=266639939143359683303859665175877895688, time:1750767185.2803514s req_ids:[8] +DEBUG 06-24 20:13:05 [manager.py:391] +ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:184.91744995117188ms total_cost_time:184.96417999267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7909 prompt_cache_len:5151 prompt_cache_ratio:0.6512833480844608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 +DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10914444923400879 s +INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.11112594604492188 s +DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=340250830668207411367088568228091772786, time:1750767185.465908s req_ids:[8] +DEBUG 06-24 20:13:05 [manager.py:391] +ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:204.15782928466797ms total_cost_time:204.20360565185547ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7910 prompt_cache_len:5151 prompt_cache_ratio:0.6512010113780026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 +DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10770893096923828 s +INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.10984253883361816 s +DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=152551029912292411840390979526489326153, time:1750767185.6765435s req_ids:[8] +DEBUG 06-24 20:13:05 [manager.py:391] +ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:202.4819850921631ms total_cost_time:202.52680778503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7911 prompt_cache_len:5151 prompt_cache_ratio:0.6511186954872962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 +DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10759139060974121 s +INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s +DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=121832564358685040198901278251330890652, time:1750767185.8840709s req_ids:[8] +DEBUG 06-24 20:13:05 [manager.py:391] +ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:206.82978630065918ms total_cost_time:206.90298080444336ms,out_token_counter:1 mean_per_token_cost_time: 0.07319450378417969ms prompt_token_num:7912 prompt_cache_len:5151 prompt_cache_ratio:0.651036400404449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 +DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.10760879516601562 s +INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.10966038703918457 s +DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=13153126015800782008200206057395262524, time:1750767186.096664s req_ids:[8] +DEBUG 06-24 20:13:06 [manager.py:391] +ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:359.50756072998047ms total_cost_time:359.5540523529053ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7913 prompt_cache_len:5151 prompt_cache_ratio:0.6509541261215721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 +DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.1082000732421875 s +INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.11023831367492676 s +DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=226346503673033865273859597184193016819, time:1750767186.4575088s req_ids:[8] +DEBUG 06-24 20:13:06 [manager.py:391] +ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:197.99566268920898ms total_cost_time:198.03905487060547ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7914 prompt_cache_len:5151 prompt_cache_ratio:0.650871872630781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 +DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.10837984085083008 s +INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.11020445823669434 s +DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=209418053219207086454191845195469947598, time:1750767186.6653643s req_ids:[8] +DEBUG 06-24 20:13:06 [manager.py:391] +ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:201.39050483703613ms total_cost_time:201.43532752990723ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7915 prompt_cache_len:5151 prompt_cache_ratio:0.6507896399241946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 +DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.10780835151672363 s +INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.10981535911560059 s +DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=172098795589316693942510050371878336921, time:1750767186.873043s req_ids:[8] +DEBUG 06-24 20:13:06 [manager.py:391] +DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:205.95598220825195ms total_cost_time:206.00104331970215ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7916 prompt_cache_len:5151 prompt_cache_ratio:0.6507074279939363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 +DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10806965827941895 s +INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.10998010635375977 s +DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=65473686777600051045740449582883380537, time:1750767187.0843024s req_ids:[8] +DEBUG 06-24 20:13:07 [manager.py:391] +ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:205.81698417663574ms total_cost_time:205.86085319519043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7917 prompt_cache_len:5151 prompt_cache_ratio:0.6506252368321334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 +DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.1078944206237793 s +INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.1099245548248291 s +DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=171341821113669339273368265654742421590, time:1750767187.2954495s req_ids:[8] +DEBUG 06-24 20:13:07 [manager.py:391] +ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:205.60932159423828ms total_cost_time:205.65390586853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7918 prompt_cache_len:5151 prompt_cache_ratio:0.6505430664309169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 +DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10785794258117676 s +INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.10975098609924316 s +DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=272346332865439823118449374424592895924, time:1750767187.5076268s req_ids:[8] +DEBUG 06-24 20:13:07 [manager.py:391] +ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:202.61573791503906ms total_cost_time:202.66008377075195ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7919 prompt_cache_len:5151 prompt_cache_ratio:0.650460916782422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 +DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10987687110900879 s +INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.11219382286071777 s +DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=81938992990779597663101351136034808955, time:1750767187.714619s req_ids:[8] +DEBUG 06-24 20:13:07 [manager.py:391] +DEBUG 06-24 20:13:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 36051.913 tokens/s +DEBUG 06-24 20:13:07 [stats.py:37] Avg prompt tokens throughput: 36042.786 tokens/s +DEBUG 06-24 20:13:07 [stats.py:37] Avg generate tokens throughput: 9.128 tokens/s +ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:204.15234565734863ms total_cost_time:204.19836044311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7920 prompt_cache_len:5151 prompt_cache_ratio:0.6503787878787879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 +DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10734820365905762 s +INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.10941362380981445 s +DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=230638070399174263728168433577006515524, time:1750767187.935181s req_ids:[8] +DEBUG 06-24 20:13:07 [manager.py:391] +ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:213.0739688873291ms total_cost_time:213.1185531616211ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7921 prompt_cache_len:5151 prompt_cache_ratio:0.6502966797121575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 +DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.1088099479675293 s +INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.11080026626586914 s +DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=236806128764871171734798511644940751512, time:1750767188.1417224s req_ids:[8] +DEBUG 06-24 20:13:08 [manager.py:391] +ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:203.0937671661377ms total_cost_time:203.13763618469238ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7922 prompt_cache_len:5151 prompt_cache_ratio:0.6502145922746781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 +DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.10749149322509766 s +INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.10953092575073242 s +DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=240696199067258421943873084107753893066, time:1750767188.3506975s req_ids:[8] +DEBUG 06-24 20:13:08 [manager.py:391] +ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:367.6939010620117ms total_cost_time:367.74158477783203ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7923 prompt_cache_len:5151 prompt_cache_ratio:0.6501325255585005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 +DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.10718536376953125 s +INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.10913324356079102 s +DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=46218260897524850275658883935598911374, time:1750767188.72318s req_ids:[8] +DEBUG 06-24 20:13:08 [manager.py:391] +ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:203.67884635925293ms total_cost_time:203.72247695922852ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7924 prompt_cache_len:5151 prompt_cache_ratio:0.6500504795557799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 +DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.10895037651062012 s +INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.11003494262695312 s +DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=87202976777824432516454560716265878975, time:1750767188.9416347s req_ids:[8] +DEBUG 06-24 20:13:08 [manager.py:391] +ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:214.57767486572266ms total_cost_time:214.62297439575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7925 prompt_cache_len:5151 prompt_cache_ratio:0.6499684542586751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 +DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10826468467712402 s +INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.11015796661376953 s +DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=235785113419115753343401632089706793226, time:1750767189.158465s req_ids:[8] +DEBUG 06-24 20:13:09 [manager.py:391] +ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:209.26785469055176ms total_cost_time:209.31458473205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7926 prompt_cache_len:5151 prompt_cache_ratio:0.6498864496593489 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 +DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10793662071228027 s +INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.10944557189941406 s +DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=246578625919351810075038972413302716947, time:1750767189.3711963s req_ids:[8] +DEBUG 06-24 20:13:09 [manager.py:391] +ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:205.60860633850098ms total_cost_time:205.65247535705566ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7927 prompt_cache_len:5151 prompt_cache_ratio:0.6498044657499684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 +DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10806536674499512 s +INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.10982680320739746 s +DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=169929738287265903565011866029648777119, time:1750767189.5820098s req_ids:[8] +DEBUG 06-24 20:13:09 [manager.py:391] +ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:206.95090293884277ms total_cost_time:206.99787139892578ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7928 prompt_cache_len:5151 prompt_cache_ratio:0.6497225025227044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 +DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10829949378967285 s +INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.11024141311645508 s +DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=12643162153161367092062928905874082559, time:1750767189.8050375s req_ids:[8] +DEBUG 06-24 20:13:09 [manager.py:391] +ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:218.69254112243652ms total_cost_time:218.75262260437012ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7929 prompt_cache_len:5151 prompt_cache_ratio:0.6496405599697314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 +DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10791230201721191 s +INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.10975050926208496 s +DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=172499203099025787549400814559674835046, time:1750767190.0256903s req_ids:[8] +DEBUG 06-24 20:13:10 [manager.py:391] +ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:214.1721248626709ms total_cost_time:214.2188549041748ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7930 prompt_cache_len:5151 prompt_cache_ratio:0.6495586380832282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 +DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10918116569519043 s +INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.11094164848327637 s +DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=82108129192379092560528808504768085724, time:1750767190.2386286s req_ids:[8] +DEBUG 06-24 20:13:10 [manager.py:391] +ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:170.58277130126953ms total_cost_time:170.6857681274414ms,out_token_counter:1 mean_per_token_cost_time: 0.102996826171875ms prompt_token_num:7931 prompt_cache_len:5151 prompt_cache_ratio:0.6494767368553777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 +DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10782790184020996 s +INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.10999035835266113 s +DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=190329048025523621231116025914761801299, time:1750767190.413228s req_ids:[8] +DEBUG 06-24 20:13:10 [manager.py:391] +ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:196.61879539489746ms total_cost_time:196.66337966918945ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7932 prompt_cache_len:5151 prompt_cache_ratio:0.6493948562783661 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 +DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10846233367919922 s +INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.11048293113708496 s +DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=3605447209226160400306186525110061324, time:1750767190.616704s req_ids:[8] +DEBUG 06-24 20:13:10 [manager.py:391] +ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:203.51839065551758ms total_cost_time:203.56440544128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7933 prompt_cache_len:5151 prompt_cache_ratio:0.6493129963443842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 +DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.3096907138824463 s +INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.3118479251861572 s +DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=50382169885984366261941334393795361844, time:1750767191.0408227s req_ids:[8] +DEBUG 06-24 20:13:11 [manager.py:391] +ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:420.7601547241211ms total_cost_time:420.8204746246338ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:7934 prompt_cache_len:5151 prompt_cache_ratio:0.6492311570456264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 +DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.10867834091186523 s +INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.11068081855773926 s +DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=45859994211609285005085830790199660753, time:1750767191.2545052s req_ids:[8] +DEBUG 06-24 20:13:11 [manager.py:391] +ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:206.18057250976562ms total_cost_time:206.21871948242188ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:7935 prompt_cache_len:5151 prompt_cache_ratio:0.6491493383742911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 +DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.11047768592834473 s +INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.11294674873352051 s +DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=189644505788787314905127435530256993724, time:1750767191.4665513s req_ids:[8] +DEBUG 06-24 20:13:11 [manager.py:391] +ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:204.70213890075684ms total_cost_time:204.74672317504883ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7936 prompt_cache_len:5151 prompt_cache_ratio:0.6490675403225806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 +DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:11 [batch.py:51] router release req id 8 +INFO 06-24 20:13:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.1082768440246582 s +INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.11016082763671875 s +DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=219821321002343569697730238889961466858, time:1750767191.6753488s req_ids:[8] +DEBUG 06-24 20:13:11 [manager.py:391] +ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:204.57887649536133ms total_cost_time:204.60963249206543ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:7937 prompt_cache_len:5151 prompt_cache_ratio:0.6489857628827013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 +DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.1078028678894043 s +INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.10977792739868164 s +DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=298207810121136073618616016893981759736, time:1750767191.8858616s req_ids:[8] +DEBUG 06-24 20:13:11 [manager.py:391] +ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:202.73661613464355ms total_cost_time:202.78334617614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7938 prompt_cache_len:5151 prompt_cache_ratio:0.6489040060468632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 +DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.1087496280670166 s +INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.11087179183959961 s +DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=251706297057370749193957462732433719124, time:1750767192.095412s req_ids:[8] +DEBUG 06-24 20:13:12 [manager.py:391] +ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:205.4281234741211ms total_cost_time:205.4729461669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7939 prompt_cache_len:5151 prompt_cache_ratio:0.6488222698072805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 +DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.1075582504272461 s +INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.10955047607421875 s +DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=261201989736215395061009724072290674442, time:1750767192.306056s req_ids:[8] +DEBUG 06-24 20:13:12 [manager.py:391] +ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:204.02836799621582ms total_cost_time:204.07366752624512ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7940 prompt_cache_len:5151 prompt_cache_ratio:0.6487405541561713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 +DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.10903692245483398 s +INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.11104106903076172 s +DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=157037935830945172001923291864364440951, time:1750767192.5170796s req_ids:[8] +DEBUG 06-24 20:13:12 [manager.py:391] +ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:203.6001682281494ms total_cost_time:203.6597728729248ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7941 prompt_cache_len:5151 prompt_cache_ratio:0.6486588590857575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 +DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.10979104042053223 s +INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.1118476390838623 s +DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=100849140383447369085005502856360103607, time:1750767192.7250402s req_ids:[8] +DEBUG 06-24 20:13:12 [manager.py:391] +ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:198.75741004943848ms total_cost_time:198.80247116088867ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7942 prompt_cache_len:5151 prompt_cache_ratio:0.648577184588265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 +DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.1079094409942627 s +INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.10998392105102539 s +DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=188509554225064819795100941825890153293, time:1750767192.9326692s req_ids:[8] +DEBUG 06-24 20:13:12 [manager.py:391] +ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:205.7960033416748ms total_cost_time:205.8405876159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7943 prompt_cache_len:5151 prompt_cache_ratio:0.6484955306559235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 +DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.3096151351928711 s +INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.311603307723999 s +DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=165924940871535103958487311751476384802, time:1750767193.3451605s req_ids:[8] +DEBUG 06-24 20:13:13 [manager.py:391] +ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:411.43321990966797ms total_cost_time:411.47923469543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7944 prompt_cache_len:5151 prompt_cache_ratio:0.6484138972809668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 +DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.10728573799133301 s +INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.10907793045043945 s +DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=72000306954325854122893826628957377457, time:1750767193.5611944s req_ids:[8] +DEBUG 06-24 20:13:13 [manager.py:391] +ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:176.60236358642578ms total_cost_time:176.64480209350586ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7945 prompt_cache_len:5151 prompt_cache_ratio:0.6483322844556325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 +DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.11027312278747559 s +INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.11226415634155273 s +DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=26976301927374775297573615978889401296, time:1750767193.7499294s req_ids:[8] +DEBUG 06-24 20:13:13 [manager.py:391] +ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:196.70963287353516ms total_cost_time:196.75374031066895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7946 prompt_cache_len:5151 prompt_cache_ratio:0.6482506921721621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 +DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.10876059532165527 s +INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.1107943058013916 s +DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=17795288803662155671368493058997001428, time:1750767193.9432232s req_ids:[8] +DEBUG 06-24 20:13:13 [manager.py:391] +ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:194.3838596343994ms total_cost_time:194.42105293273926ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:7947 prompt_cache_len:5151 prompt_cache_ratio:0.6481691204228011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 +DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s +INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.10978436470031738 s +DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=102365839971098244946502808781547108031, time:1750767194.149403s req_ids:[8] +DEBUG 06-24 20:13:14 [manager.py:391] +ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:205.3685188293457ms total_cost_time:205.4131031036377ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7948 prompt_cache_len:5151 prompt_cache_ratio:0.6480875691997987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 +DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10853886604309082 s +INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.1105196475982666 s +DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=108852590672583081808096388461669505108, time:1750767194.3614717s req_ids:[8] +DEBUG 06-24 20:13:14 [manager.py:391] +ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:205.60407638549805ms total_cost_time:205.64842224121094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7949 prompt_cache_len:5151 prompt_cache_ratio:0.6480060384954083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 +DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s +INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.11044073104858398 s +DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=193820767272036912060725222124523004983, time:1750767194.5727732s req_ids:[8] +DEBUG 06-24 20:13:14 [manager.py:391] +ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:204.72121238708496ms total_cost_time:204.76484298706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7950 prompt_cache_len:5151 prompt_cache_ratio:0.6479245283018867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 +DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10715079307556152 s +INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.10897445678710938 s +DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=100201376188519901765503312076035811969, time:1750767194.7837389s req_ids:[8] +DEBUG 06-24 20:13:14 [manager.py:391] +ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:168.532133102417ms total_cost_time:168.5769557952881ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7951 prompt_cache_len:5151 prompt_cache_ratio:0.6478430386114954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 +DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.1085958480834961 s +INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.1105966567993164 s +DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=80175787666720453699911710298814146851, time:1750767194.9575622s req_ids:[8] +DEBUG 06-24 20:13:14 [manager.py:391] +ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:195.47295570373535ms total_cost_time:195.51801681518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7952 prompt_cache_len:5151 prompt_cache_ratio:0.647761569416499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 +DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:15 [batch.py:51] router release req id 8 +INFO 06-24 20:13:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.10825014114379883 s +INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.1103367805480957 s +DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=291063739834872920566854934898680732024, time:1750767195.1586053s req_ids:[8] +DEBUG 06-24 20:13:15 [manager.py:391] +DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:362.4105453491211ms total_cost_time:362.4553680419922ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7953 prompt_cache_len:5151 prompt_cache_ratio:0.6476801207091664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 +DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.1086263656616211 s +INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.11065411567687988 s +DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=241528825606693664583590914976001702960, time:1750767195.5245929s req_ids:[8] +DEBUG 06-24 20:13:15 [manager.py:391] +ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:203.05347442626953ms total_cost_time:203.0966281890869ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7954 prompt_cache_len:5151 prompt_cache_ratio:0.6475986924817702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 +DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.10764503479003906 s +INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.10975980758666992 s +DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=19452618694001067806174437045959048889, time:1750767195.7363954s req_ids:[8] +DEBUG 06-24 20:13:15 [manager.py:391] +ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:205.1067352294922ms total_cost_time:205.15131950378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7955 prompt_cache_len:5151 prompt_cache_ratio:0.6475172847265871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 +DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.10867929458618164 s +INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.11081171035766602 s +DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=311462142156239411046205829092249669040, time:1750767195.948825s req_ids:[8] +DEBUG 06-24 20:13:15 [manager.py:391] +ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:207.8399658203125ms total_cost_time:207.8852653503418ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7956 prompt_cache_len:5151 prompt_cache_ratio:0.6474358974358975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 +DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10894131660461426 s +INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11071896553039551 s +DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=177499050341133244490451836185537531683, time:1750767196.1605725s req_ids:[8] +DEBUG 06-24 20:13:16 [manager.py:391] +ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:167.80614852905273ms total_cost_time:167.8483486175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7957 prompt_cache_len:5151 prompt_cache_ratio:0.6473545306019857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 +DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10842585563659668 s +INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s +DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=178673114598216798993756536284522377100, time:1750767196.3331385s req_ids:[8] +DEBUG 06-24 20:13:16 [manager.py:391] +ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:199.65696334838867ms total_cost_time:199.70202445983887ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7958 prompt_cache_len:5151 prompt_cache_ratio:0.64727318421714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 +INFO 06-24 20:13:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.1084892749786377 s +INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11057305335998535 s +DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=323102430961154705342918125938112755382, time:1750767196.5380726s req_ids:[8] +DEBUG 06-24 20:13:16 [manager.py:391] +ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:204.66208457946777ms total_cost_time:204.70690727233887ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7959 prompt_cache_len:5151 prompt_cache_ratio:0.6471918582736524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 +DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10920429229736328 s +INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11141395568847656 s +DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=84207559269411626679544389255632117579, time:1750767196.7503664s req_ids:[8] +DEBUG 06-24 20:13:16 [manager.py:391] +ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:204.3752670288086ms total_cost_time:204.41865921020508ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7960 prompt_cache_len:5151 prompt_cache_ratio:0.6471105527638191 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 +DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10753083229064941 s +INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.10955405235290527 s +DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=10991299536171667354359562329823648315, time:1750767196.9578123s req_ids:[8] +DEBUG 06-24 20:13:16 [manager.py:391] +ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:205.98673820495605ms total_cost_time:206.03084564208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7961 prompt_cache_len:5151 prompt_cache_ratio:0.6470292676799397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 +DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10987234115600586 s +INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.11175227165222168 s +DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=76897185658495584524723295597724586468, time:1750767197.170621s req_ids:[8] +DEBUG 06-24 20:13:17 [manager.py:391] +ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:209.35392379760742ms total_cost_time:209.4588279724121ms,out_token_counter:1 mean_per_token_cost_time: 0.1049041748046875ms prompt_token_num:7962 prompt_cache_len:5151 prompt_cache_ratio:0.646948003014318 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 +DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10850858688354492 s +INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.11055994033813477 s +DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=162023126535662793328347509828152377396, time:1750767197.3830712s req_ids:[8] +DEBUG 06-24 20:13:17 [manager.py:391] +ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:373.0955123901367ms total_cost_time:373.1415271759033ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7963 prompt_cache_len:5151 prompt_cache_ratio:0.6468667587592616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 +DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10791873931884766 s +INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.10986161231994629 s +DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=324684405530242042566443508761391729032, time:1750767197.761375s req_ids:[8] +DEBUG 06-24 20:13:17 [manager.py:391] +DEBUG 06-24 20:13:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 34793.244 tokens/s +DEBUG 06-24 20:13:17 [stats.py:37] Avg prompt tokens throughput: 34784.485 tokens/s +DEBUG 06-24 20:13:17 [stats.py:37] Avg generate tokens throughput: 8.759 tokens/s +ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:199.9685764312744ms total_cost_time:200.0138759613037ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7964 prompt_cache_len:5151 prompt_cache_ratio:0.6467855349070819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 +DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s +INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.1101539134979248 s +DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=162766083698922772247394849587732474131, time:1750767197.972497s req_ids:[8] +DEBUG 06-24 20:13:17 [manager.py:391] +ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:204.81300354003906ms total_cost_time:204.85591888427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7965 prompt_cache_len:5151 prompt_cache_ratio:0.6467043314500942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 +DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10822749137878418 s +INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.11039209365844727 s +DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=33312534504588364210253046676642202074, time:1750767198.1811037s req_ids:[8] +DEBUG 06-24 20:13:18 [manager.py:391] +ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:206.2242031097412ms total_cost_time:206.2702178955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7966 prompt_cache_len:5151 prompt_cache_ratio:0.6466231483806176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 +DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10970854759216309 s +INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.1118166446685791 s +DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=136079266570731363741327080388681070218, time:1750767198.3936095s req_ids:[8] +DEBUG 06-24 20:13:18 [manager.py:391] +ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:207.21983909606934ms total_cost_time:207.26370811462402ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7967 prompt_cache_len:5151 prompt_cache_ratio:0.6465419856909753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 +DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10869240760803223 s +INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.11053967475891113 s +DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=185906662755667593526781665256658650992, time:1750767198.6071663s req_ids:[8] +DEBUG 06-24 20:13:18 [manager.py:391] +ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:219.13981437683105ms total_cost_time:219.18702125549316ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7968 prompt_cache_len:5151 prompt_cache_ratio:0.646460843373494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 +DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10873961448669434 s +INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.11064028739929199 s +DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=132403851612439993135478933216699860822, time:1750767198.8411586s req_ids:[8] +DEBUG 06-24 20:13:18 [manager.py:391] +ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:217.43011474609375ms total_cost_time:217.47398376464844ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7969 prompt_cache_len:5151 prompt_cache_ratio:0.6463797214205045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 +DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.10910558700561523 s +INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.11117362976074219 s +DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=252219083471049875361079567845645639394, time:1750767199.0516593s req_ids:[8] +DEBUG 06-24 20:13:19 [manager.py:391] +ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:203.60255241394043ms total_cost_time:203.64665985107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7970 prompt_cache_len:5151 prompt_cache_ratio:0.6462986198243412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 +DEBUG 06-24 20:13:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.10843896865844727 s +INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.11037373542785645 s +DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=144389132066047642571520266776842730411, time:1750767199.264224s req_ids:[8] +DEBUG 06-24 20:13:19 [manager.py:391] +ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:206.02703094482422ms total_cost_time:206.0716152191162ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7971 prompt_cache_len:5151 prompt_cache_ratio:0.6462175385773429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 +DEBUG 06-24 20:13:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.1077725887298584 s +INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.1098170280456543 s +DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=273780341120515948085496226309683162227, time:1750767199.4756942s req_ids:[8] +DEBUG 06-24 20:13:19 [manager.py:391] +ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:210.5717658996582ms total_cost_time:210.61468124389648ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7972 prompt_cache_len:5151 prompt_cache_ratio:0.6461364776718515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 +DEBUG 06-24 20:13:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.10817337036132812 s +INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.1101233959197998 s +DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=220345837017607607103329194752039998753, time:1750767199.7025595s req_ids:[8] +DEBUG 06-24 20:13:19 [manager.py:391] +ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:219.36297416687012ms total_cost_time:219.40875053405762ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7973 prompt_cache_len:5151 prompt_cache_ratio:0.6460554371002132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 +DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.3097507953643799 s +INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.3119313716888428 s +DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=253200887723432504106460219835936193951, time:1750767200.1237783s req_ids:[8] +DEBUG 06-24 20:13:20 [manager.py:391] +ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:421.9388961791992ms total_cost_time:421.9832420349121ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7974 prompt_cache_len:5151 prompt_cache_ratio:0.6459744168547781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 +DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10828685760498047 s +INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.11050748825073242 s +DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=318400449752076669691656270146283111966, time:1750767200.3446972s req_ids:[8] +DEBUG 06-24 20:13:20 [manager.py:391] +ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:204.50353622436523ms total_cost_time:204.54859733581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7975 prompt_cache_len:5151 prompt_cache_ratio:0.6458934169278997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 +DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10860872268676758 s +INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.11049008369445801 s +DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=311269633079447175632913857033269319962, time:1750767200.5615158s req_ids:[8] +DEBUG 06-24 20:13:20 [manager.py:391] +ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:213.07730674743652ms total_cost_time:213.13953399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:7976 prompt_cache_len:5151 prompt_cache_ratio:0.6458124373119358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 +DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10779881477355957 s +INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.10982728004455566 s +DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=191985133987512044440306912255200162810, time:1750767200.7734928s req_ids:[8] +DEBUG 06-24 20:13:20 [manager.py:391] +ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:206.60042762756348ms total_cost_time:206.64429664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7977 prompt_cache_len:5151 prompt_cache_ratio:0.6457314779992478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 +DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10785651206970215 s +INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.10977029800415039 s +DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=57672486614255653457036316272276538107, time:1750767200.9856186s req_ids:[8] +DEBUG 06-24 20:13:20 [manager.py:391] +ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:206.9869041442871ms total_cost_time:207.0310115814209ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7978 prompt_cache_len:5151 prompt_cache_ratio:0.645650538982201 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 +DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.11068034172058105 s +INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.1126868724822998 s +DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=225618012719256400572124766861540711218, time:1750767201.1983144s req_ids:[8] +DEBUG 06-24 20:13:21 [manager.py:391] +ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:206.6514492034912ms total_cost_time:206.6962718963623ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7979 prompt_cache_len:5151 prompt_cache_ratio:0.6455696202531646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 +DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s +INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.10996317863464355 s +DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=258356704852361665615646251548933254746, time:1750767201.4103487s req_ids:[8] +DEBUG 06-24 20:13:21 [manager.py:391] +ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:208.5404396057129ms total_cost_time:208.58359336853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7980 prompt_cache_len:5151 prompt_cache_ratio:0.6454887218045112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 +DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10837483406066895 s +INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.11040210723876953 s +DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=108367910749049095762624023086147352603, time:1750767201.623241s req_ids:[8] +DEBUG 06-24 20:13:21 [manager.py:391] +ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:206.39729499816895ms total_cost_time:206.44235610961914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7981 prompt_cache_len:5151 prompt_cache_ratio:0.6454078436286179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 +DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10700106620788574 s +INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.10878109931945801 s +DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=316821462377410352800186330713157359991, time:1750767201.8359358s req_ids:[8] +DEBUG 06-24 20:13:21 [manager.py:391] +ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:165.3287410736084ms total_cost_time:165.37213325500488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7982 prompt_cache_len:5151 prompt_cache_ratio:0.6453269857178652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 +DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10832405090332031 s +INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.11040139198303223 s +DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=16792091570059411058082003683852118587, time:1750767202.0052469s req_ids:[8] +DEBUG 06-24 20:13:22 [manager.py:391] +ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:198.2433795928955ms total_cost_time:198.2886791229248ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7983 prompt_cache_len:5151 prompt_cache_ratio:0.6452461480646373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 +DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:22 [manager.py:224] router recive req id 8 cost time 0.3109469413757324 s +INFO 06-24 20:13:22 [manager.py:68] detokenization recv req id 8 cost time 0.31298017501831055 s +DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=183208197559468521810446693559550644944, time:1750767202.419358s req_ids:[8] +DEBUG 06-24 20:13:22 [manager.py:391] +ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:420.7897186279297ms total_cost_time:420.8345413208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7984 prompt_cache_len:5151 prompt_cache_ratio:0.6451653306613226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 +DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:22 [manager.py:224] router recive req id 8 cost time 0.10910367965698242 s +INFO 06-24 20:13:22 [manager.py:68] detokenization recv req id 8 cost time 0.11121320724487305 s +DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=233162659706203182095491710370881821821, time:1750767202.6382375s req_ids:[8] +DEBUG 06-24 20:13:22 [manager.py:391] +ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:210.29925346374512ms total_cost_time:210.3445529937744ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7985 prompt_cache_len:5151 prompt_cache_ratio:0.6450845335003131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 +DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:22 [manager.py:224] router recive req id 8 cost time 0.10764837265014648 s +INFO 06-24 20:13:22 [manager.py:68] detokenization recv req id 8 cost time 0.10964298248291016 s +DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=175047108610562315219631427288663882275, time:1750767202.8521001s req_ids:[8] +DEBUG 06-24 20:13:22 [manager.py:391] +ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:208.36496353149414ms total_cost_time:208.40930938720703ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7986 prompt_cache_len:5151 prompt_cache_ratio:0.6450037565740045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 +DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10775923728942871 s +INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.10974526405334473 s +DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=89107591834513802921518294138822072337, time:1750767203.0652199s req_ids:[8] +DEBUG 06-24 20:13:23 [manager.py:391] +ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:203.17339897155762ms total_cost_time:203.2170295715332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7987 prompt_cache_len:5151 prompt_cache_ratio:0.6449229998747965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 +DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:23 [batch.py:51] router release req id 8 +INFO 06-24 20:13:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:13:23 [statics_utils.py:24] mean first cost: 229.48906573588252 ms +INFO 06-24 20:13:23 [statics_utils.py:24] mean per token cost: 0.08431392781199956 ms +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10917448997497559 s +INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.11129927635192871 s +DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=71308346544288660219036677005860516361, time:1750767203.2821803s req_ids:[8] +DEBUG 06-24 20:13:23 [manager.py:391] +ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:206.68816566467285ms total_cost_time:206.74896240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:7988 prompt_cache_len:5151 prompt_cache_ratio:0.6448422633950927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 +DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10909271240234375 s +INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.11080384254455566 s +DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=230830846054799028040890274283817565569, time:1750767203.4892309s req_ids:[8] +DEBUG 06-24 20:13:23 [manager.py:391] +ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:204.8323154449463ms total_cost_time:204.87618446350098ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7989 prompt_cache_len:5151 prompt_cache_ratio:0.6447615471273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 +DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10789275169372559 s +INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.10983514785766602 s +DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=191704778279780603684049451138835019044, time:1750767203.6991696s req_ids:[8] +DEBUG 06-24 20:13:23 [manager.py:391] +DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:206.88652992248535ms total_cost_time:206.93039894104004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7990 prompt_cache_len:5151 prompt_cache_ratio:0.6446808510638298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 +DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10717439651489258 s +INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.10902690887451172 s +DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=43003146286534761905898694334188232933, time:1750767203.914639s req_ids:[8] +DEBUG 06-24 20:13:23 [manager.py:391] +ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:200.76966285705566ms total_cost_time:200.81496238708496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7991 prompt_cache_len:5151 prompt_cache_ratio:0.6446001751970968 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 +DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.10965108871459961 s +INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.11164569854736328 s +DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=18367124191187797340708065209219967734, time:1750767204.1204052s req_ids:[8] +DEBUG 06-24 20:13:24 [manager.py:391] +ERROR 06-24 20:13:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:204.31923866271973ms total_cost_time:204.36406135559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7992 prompt_cache_len:5151 prompt_cache_ratio:0.6445195195195195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 +DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.10828542709350586 s +INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.11034154891967773 s +DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=44964723072825769166666211452241269907, time:1750767204.329934s req_ids:[8] +DEBUG 06-24 20:13:24 [manager.py:391] +ERROR 06-24 20:13:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:211.6379737854004ms total_cost_time:211.6849422454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7993 prompt_cache_len:5151 prompt_cache_ratio:0.6444388840235206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 +DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.31157779693603516 s +INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.3135087490081787 s +DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=9328866232456109829399433226299765447, time:1750767204.7530556s req_ids:[8] +DEBUG 06-24 20:13:24 [manager.py:391] +ERROR 06-24 20:13:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:418.00379753112793ms total_cost_time:418.0605411529541ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7994 prompt_cache_len:5151 prompt_cache_ratio:0.6443582687015261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 +DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.10833311080932617 s +INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.11092782020568848 s +DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=154038376663072427616142803706380359878, time:1750767204.971421s req_ids:[8] +DEBUG 06-24 20:13:24 [manager.py:391] +ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:207.00407028198242ms total_cost_time:207.05032348632812ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7995 prompt_cache_len:5151 prompt_cache_ratio:0.6442776735459662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 +DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10859155654907227 s +INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.11118459701538086 s +DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=7534830560099383221859710888313675221, time:1750767205.184723s req_ids:[8] +DEBUG 06-24 20:13:25 [manager.py:391] +DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:208.06527137756348ms total_cost_time:208.11104774475098ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7996 prompt_cache_len:5151 prompt_cache_ratio:0.6441970985492746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 +DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10689687728881836 s +INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.10886645317077637 s +DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=276594682596052906015886712976346641591, time:1750767205.4019175s req_ids:[8] +DEBUG 06-24 20:13:25 [manager.py:391] +ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:214.57529067993164ms total_cost_time:214.62011337280273ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7997 prompt_cache_len:5151 prompt_cache_ratio:0.6441165437038889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 +DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10746264457702637 s +INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.1093595027923584 s +DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=337967489359525602128814517579388664542, time:1750767205.6165328s req_ids:[8] +DEBUG 06-24 20:13:25 [manager.py:391] +ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:208.77599716186523ms total_cost_time:208.82010459899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7998 prompt_cache_len:5151 prompt_cache_ratio:0.6440360090022506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 +DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:25 [batch.py:51] router release req id 8 +INFO 06-24 20:13:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10781717300415039 s +INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.1098320484161377 s +DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=68138939121714499093178933998493010849, time:1750767205.829303s req_ids:[8] +DEBUG 06-24 20:13:25 [manager.py:391] +ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:199.751615524292ms total_cost_time:199.79596138000488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7999 prompt_cache_len:5151 prompt_cache_ratio:0.6439554944368046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 +DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.1088552474975586 s +INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.11085844039916992 s +DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=337770350031183940183620976415810615760, time:1750767206.034402s req_ids:[8] +DEBUG 06-24 20:13:26 [manager.py:391] +ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:205.71184158325195ms total_cost_time:205.77120780944824ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:8000 prompt_cache_len:5151 prompt_cache_ratio:0.643875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 +DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.10732841491699219 s +INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.10937738418579102 s +DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=205096531034771445278059301635308550610, time:1750767206.2481425s req_ids:[8] +DEBUG 06-24 20:13:26 [manager.py:391] +ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:201.73382759094238ms total_cost_time:201.77817344665527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8001 prompt_cache_len:5151 prompt_cache_ratio:0.6437945256842894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 +DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.10805034637451172 s +INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.11062455177307129 s +DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=301276533795778854086758299293579705845, time:1750767206.4560614s req_ids:[8] +DEBUG 06-24 20:13:26 [manager.py:391] +ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:207.29708671569824ms total_cost_time:207.34238624572754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8002 prompt_cache_len:5151 prompt_cache_ratio:0.6437140714821294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 +DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s +INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.11055207252502441 s +DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=324273851657995431666401824684728740649, time:1750767206.6699553s req_ids:[8] +DEBUG 06-24 20:13:26 [manager.py:391] +ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:364.02392387390137ms total_cost_time:364.13049697875977ms,out_token_counter:1 mean_per_token_cost_time: 0.10657310485839844ms prompt_token_num:8003 prompt_cache_len:5151 prompt_cache_ratio:0.6436336373859802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 +DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.1087644100189209 s +INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106576919555664 s +DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=189669802997694798807429187492652562204, time:1750767207.0344183s req_ids:[8] +DEBUG 06-24 20:13:27 [manager.py:391] +ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:201.77054405212402ms total_cost_time:201.81632041931152ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8004 prompt_cache_len:5151 prompt_cache_ratio:0.6435532233883059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 +DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10774064064025879 s +INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1096642017364502 s +DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=85299969823290596692718017623202377358, time:1750767207.249235s req_ids:[8] +DEBUG 06-24 20:13:27 [manager.py:391] +ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:211.09342575073242ms total_cost_time:211.13920211791992ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8005 prompt_cache_len:5151 prompt_cache_ratio:0.643472829481574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 +DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10740804672241211 s +INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1091604232788086 s +DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=189792808382240699496813175578660920062, time:1750767207.4628148s req_ids:[8] +DEBUG 06-24 20:13:27 [manager.py:391] +ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:166.86320304870605ms total_cost_time:166.90635681152344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8006 prompt_cache_len:5151 prompt_cache_ratio:0.6433924556582563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 +DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10839605331420898 s +INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.11035776138305664 s +DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=170986225010251020910561319639399839689, time:1750767207.636874s req_ids:[8] +DEBUG 06-24 20:13:27 [manager.py:391] +ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:199.1877555847168ms total_cost_time:199.23138618469238ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8007 prompt_cache_len:5151 prompt_cache_ratio:0.643312101910828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 +DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10945844650268555 s +INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1114344596862793 s +DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=75593178513738109247761338142377745273, time:1750767207.845318s req_ids:[8] +DEBUG 06-24 20:13:27 [manager.py:391] +DEBUG 06-24 20:13:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 34856.687 tokens/s +DEBUG 06-24 20:13:27 [stats.py:37] Avg prompt tokens throughput: 34847.961 tokens/s +DEBUG 06-24 20:13:27 [stats.py:37] Avg generate tokens throughput: 8.727 tokens/s +ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:205.20973205566406ms total_cost_time:205.25383949279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8008 prompt_cache_len:5151 prompt_cache_ratio:0.6432317682317682 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 +DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10815954208374023 s +INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.11006617546081543 s +DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=28755440544609540965911731894135949409, time:1750767208.051244s req_ids:[8] +DEBUG 06-24 20:13:28 [manager.py:391] +ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:203.7208080291748ms total_cost_time:203.7651538848877ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8009 prompt_cache_len:5151 prompt_cache_ratio:0.6431514546135597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 +DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10823607444763184 s +INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.11016321182250977 s +DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=311237116191207968242434546976502323983, time:1750767208.2658818s req_ids:[8] +DEBUG 06-24 20:13:28 [manager.py:391] +ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:214.29872512817383ms total_cost_time:214.3573760986328ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:8010 prompt_cache_len:5151 prompt_cache_ratio:0.6430711610486891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 +DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10785222053527832 s +INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.10986137390136719 s +DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=298065199028497056066814213255053417322, time:1750767208.4818952s req_ids:[8] +DEBUG 06-24 20:13:28 [manager.py:391] +ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:205.31201362609863ms total_cost_time:205.35564422607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8011 prompt_cache_len:5151 prompt_cache_ratio:0.6429908875296467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 +DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10765743255615234 s +INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.10885310173034668 s +DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=248753396601127990204836315934974615862, time:1750767208.704955s req_ids:[8] +DEBUG 06-24 20:13:28 [manager.py:391] +ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:372.20048904418945ms total_cost_time:372.24602699279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8012 prompt_cache_len:5151 prompt_cache_ratio:0.6429106340489266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 +DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10778951644897461 s +INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.10983562469482422 s +DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=62356335541022719459942403410366310392, time:1750767209.0688243s req_ids:[8] +DEBUG 06-24 20:13:29 [manager.py:391] +ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:203.68480682373047ms total_cost_time:203.72939109802246ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8013 prompt_cache_len:5151 prompt_cache_ratio:0.6428304005990266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 +DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10899496078491211 s +INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.11028265953063965 s +DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=132568857199084135860643036116585255987, time:1750767209.2864473s req_ids:[8] +DEBUG 06-24 20:13:29 [manager.py:391] +ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:225.76236724853516ms total_cost_time:225.82364082336426ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:8014 prompt_cache_len:5151 prompt_cache_ratio:0.6427501871724483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 +DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10742568969726562 s +INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.10915470123291016 s +DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=75760665572769204223699169450466371408, time:1750767209.5185494s req_ids:[8] +DEBUG 06-24 20:13:29 [manager.py:391] +ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:213.62018585205078ms total_cost_time:213.66524696350098ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8015 prompt_cache_len:5151 prompt_cache_ratio:0.6426699937616969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 +DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10858964920043945 s +INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.11053895950317383 s +DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=295603085150416418505031132224439432380, time:1750767209.7340767s req_ids:[8] +DEBUG 06-24 20:13:29 [manager.py:391] +ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:213.37461471557617ms total_cost_time:213.41991424560547ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8016 prompt_cache_len:5151 prompt_cache_ratio:0.6425898203592815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 +DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10859918594360352 s +INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.11073017120361328 s +DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=96874062735073194553146675415417018416, time:1750767209.9483294s req_ids:[8] +DEBUG 06-24 20:13:29 [manager.py:391] +ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:205.9154510498047ms total_cost_time:205.96051216125488ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8017 prompt_cache_len:5151 prompt_cache_ratio:0.6425096669577148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 +DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.10903596878051758 s +INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.11094260215759277 s +DEBUG 06-24 20:13:30 [manager.py:391] Prefill Batch: batch_id=241011568176580033652275330164393240203, time:1750767210.160697s req_ids:[8] +DEBUG 06-24 20:13:30 [manager.py:391] +ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:206.85863494873047ms total_cost_time:206.92038536071777ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:8018 prompt_cache_len:5151 prompt_cache_ratio:0.6424295335495136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 +DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.10845661163330078 s +INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.11046242713928223 s +DEBUG 06-24 20:13:30 [manager.py:391] Prefill Batch: batch_id=91908757559130525624874254999915434982, time:1750767210.3803387s req_ids:[8] +DEBUG 06-24 20:13:30 [manager.py:391] +ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:210.2665901184082ms total_cost_time:210.3114128112793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8019 prompt_cache_len:5151 prompt_cache_ratio:0.6423494201271979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 +DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.10772395133972168 s +INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.1098027229309082 s +DEBUG 06-24 20:13:30 [manager.py:391] Prefill Batch: batch_id=82546041890165863939652097839496998136, time:1750767210.593952s req_ids:[8] +DEBUG 06-24 20:13:30 [manager.py:391] +ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:206.68268203735352ms total_cost_time:206.72893524169922ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8020 prompt_cache_len:5151 prompt_cache_ratio:0.6422693266832917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 +DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.3095567226409912 s +INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.31077003479003906 s +DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=92071940266793824855499522807865262678, time:1750767211.0057893s req_ids:[8] +DEBUG 06-24 20:13:31 [manager.py:391] +ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:413.41614723205566ms total_cost_time:413.46120834350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8021 prompt_cache_len:5151 prompt_cache_ratio:0.6421892532103229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 +DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10883212089538574 s +INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.11078023910522461 s +DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=232439080478786691291367156715416963605, time:1750767211.223302s req_ids:[8] +DEBUG 06-24 20:13:31 [manager.py:391] +ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:205.5830955505371ms total_cost_time:205.6264877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8022 prompt_cache_len:5151 prompt_cache_ratio:0.6421091997008227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 +DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10797405242919922 s +INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.10987138748168945 s +DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=80115564579417612783810801856783968278, time:1750767211.4359064s req_ids:[8] +DEBUG 06-24 20:13:31 [manager.py:391] +ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:212.51821517944336ms total_cost_time:212.56422996520996ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8023 prompt_cache_len:5151 prompt_cache_ratio:0.6420291661473264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 +DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10848045349121094 s +INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.11026144027709961 s +DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=293943441949701320686664402720610770772, time:1750767211.6528914s req_ids:[8] +DEBUG 06-24 20:13:31 [manager.py:391] +ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:203.3693790435791ms total_cost_time:203.4139633178711ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8024 prompt_cache_len:5151 prompt_cache_ratio:0.6419491525423728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 +DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10758447647094727 s +INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.10954022407531738 s +DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=171561965923437039977024084455466477433, time:1750767211.8630927s req_ids:[8] +DEBUG 06-24 20:13:31 [manager.py:391] +ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:204.21791076660156ms total_cost_time:204.26201820373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8025 prompt_cache_len:5151 prompt_cache_ratio:0.6418691588785047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 +DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10892868041992188 s +INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.11079859733581543 s +DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=163393512212347573480059928391264765222, time:1750767212.0808082s req_ids:[8] +DEBUG 06-24 20:13:32 [manager.py:391] +ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:216.19558334350586ms total_cost_time:216.24374389648438ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:8026 prompt_cache_len:5151 prompt_cache_ratio:0.6417891851482681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 +DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10766339302062988 s +INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.10946893692016602 s +DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=27567450746955744522232242561639321977, time:1750767212.3012102s req_ids:[8] +DEBUG 06-24 20:13:32 [manager.py:391] +DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:220.05391120910645ms total_cost_time:220.09801864624023ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8027 prompt_cache_len:5151 prompt_cache_ratio:0.6417092313442133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 +DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10823369026184082 s +INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.11010909080505371 s +DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=171102489936591826138659442117401955408, time:1750767212.5201666s req_ids:[8] +DEBUG 06-24 20:13:32 [manager.py:391] +ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:169.95000839233398ms total_cost_time:169.99316215515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8028 prompt_cache_len:5151 prompt_cache_ratio:0.6416292974588939 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 +DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10801458358764648 s +INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.10981464385986328 s +DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=169796326162823785676721683044717755850, time:1750767212.697953s req_ids:[8] +DEBUG 06-24 20:13:32 [manager.py:391] +ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:208.99462699890137ms total_cost_time:209.04111862182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8029 prompt_cache_len:5151 prompt_cache_ratio:0.6415493834848673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 +DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.3095996379852295 s +INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.31188511848449707 s +DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=330063706882244893526928047670001724203, time:1750767213.1190164s req_ids:[8] +DEBUG 06-24 20:13:33 [manager.py:391] +ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:417.2329902648926ms total_cost_time:417.2811508178711ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:8030 prompt_cache_len:5151 prompt_cache_ratio:0.641469489414695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 +DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10786080360412598 s +INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10964775085449219 s +DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=24472232322048738001534544070683166294, time:1750767213.339208s req_ids:[8] +DEBUG 06-24 20:13:33 [manager.py:391] +ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:213.5946750640869ms total_cost_time:213.6402130126953ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8031 prompt_cache_len:5151 prompt_cache_ratio:0.6413896152409414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 +DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10765647888183594 s +INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10948395729064941 s +DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=271701817042320459685940580244195258829, time:1750767213.5552058s req_ids:[8] +DEBUG 06-24 20:13:33 [manager.py:391] +ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:206.8326473236084ms total_cost_time:206.8774700164795ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8032 prompt_cache_len:5151 prompt_cache_ratio:0.6413097609561753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 +DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10721659660339355 s +INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10896587371826172 s +DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=259117447860807868689616613469691422374, time:1750767213.7701948s req_ids:[8] +DEBUG 06-24 20:13:33 [manager.py:391] +DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:218.2292938232422ms total_cost_time:218.27459335327148ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8033 prompt_cache_len:5151 prompt_cache_ratio:0.641229926552969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 +DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10794472694396973 s +INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10982871055603027 s +DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=37808013535902847515610826973021025300, time:1750767213.9884288s req_ids:[8] +DEBUG 06-24 20:13:33 [manager.py:391] +ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:205.36017417907715ms total_cost_time:205.40738105773926ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:8034 prompt_cache_len:5151 prompt_cache_ratio:0.6411501120238984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 +DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.10774040222167969 s +INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.10932111740112305 s +DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=315084858904030512185046253995798691755, time:1750767214.2176788s req_ids:[8] +DEBUG 06-24 20:13:34 [manager.py:391] +ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:182.18350410461426ms total_cost_time:182.22904205322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8035 prompt_cache_len:5151 prompt_cache_ratio:0.6410703173615433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 +DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.10826969146728516 s +INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.11013627052307129 s +DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=19148182157460675066394449996674157111, time:1750767214.3901174s req_ids:[8] +DEBUG 06-24 20:13:34 [manager.py:391] +ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:200.08158683776855ms total_cost_time:200.12640953063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8036 prompt_cache_len:5151 prompt_cache_ratio:0.6409905425584869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 +DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.11209464073181152 s +INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.1137993335723877 s +DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=119397928418846814921679876938158418701, time:1750767214.6058753s req_ids:[8] +DEBUG 06-24 20:13:34 [manager.py:391] +ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:208.71448516845703ms total_cost_time:208.79602432250977ms,out_token_counter:1 mean_per_token_cost_time: 0.08153915405273438ms prompt_token_num:8037 prompt_cache_len:5151 prompt_cache_ratio:0.6409107876073161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 +DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s +INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.10983085632324219 s +DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=212586833083281771472720913898747214808, time:1750767214.8126934s req_ids:[8] +DEBUG 06-24 20:13:34 [manager.py:391] +ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:204.67448234558105ms total_cost_time:204.71858978271484ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8038 prompt_cache_len:5151 prompt_cache_ratio:0.640831052500622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 +DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10861563682556152 s +INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.11058545112609863 s +DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=126600861762066455238703936800154693957, time:1750767215.0257301s req_ids:[8] +DEBUG 06-24 20:13:35 [manager.py:391] +ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:209.99598503112793ms total_cost_time:210.04199981689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8039 prompt_cache_len:5151 prompt_cache_ratio:0.6407513372309989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 +DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s +INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.11072826385498047 s +DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=178861688980500179736056911155426334307, time:1750767215.2415404s req_ids:[8] +DEBUG 06-24 20:13:35 [manager.py:391] +ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:415.13991355895996ms total_cost_time:415.18688201904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8040 prompt_cache_len:5151 prompt_cache_ratio:0.6406716417910447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 +DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10831832885742188 s +INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.1103360652923584 s +DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=313483101149062552106507321958323623725, time:1750767215.6610317s req_ids:[8] +DEBUG 06-24 20:13:35 [manager.py:391] +ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:199.04637336730957ms total_cost_time:199.1097927093506ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:8041 prompt_cache_len:5151 prompt_cache_ratio:0.6405919661733616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 +DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10947322845458984 s +INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.1114356517791748 s +DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=160943622196415814569415706327143919507, time:1750767215.8656864s req_ids:[8] +DEBUG 06-24 20:13:35 [manager.py:391] +ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:202.47626304626465ms total_cost_time:202.52108573913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8042 prompt_cache_len:5151 prompt_cache_ratio:0.6405123103705546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 +DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.1073598861694336 s +INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.10905051231384277 s +DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=215495799654406256577686628097255323083, time:1750767216.0800626s req_ids:[8] +DEBUG 06-24 20:13:36 [manager.py:391] +ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:207.04150199890137ms total_cost_time:207.08537101745605ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8043 prompt_cache_len:5151 prompt_cache_ratio:0.6404326743752331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 +DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10731029510498047 s +INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.1090700626373291 s +DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=135729905471102727047599571379728491656, time:1750767216.298246s req_ids:[8] +DEBUG 06-24 20:13:36 [manager.py:391] +ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:214.8873805999756ms total_cost_time:214.93077278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8044 prompt_cache_len:5151 prompt_cache_ratio:0.64035305818001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 +DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10834431648254395 s +INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.1101984977722168 s +INFO 06-24 20:13:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=218653994900652475462081452584280600779, time:1750767216.5263934s req_ids:[8] +DEBUG 06-24 20:13:36 [manager.py:391] +ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:225.31366348266602ms total_cost_time:225.35991668701172ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8045 prompt_cache_len:5151 prompt_cache_ratio:0.6402734617775016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 +DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10830545425415039 s +INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.11014938354492188 s +DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=329454382465862086743430419182510531469, time:1750767216.741093s req_ids:[8] +DEBUG 06-24 20:13:36 [manager.py:391] +ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:206.83765411376953ms total_cost_time:206.88128471374512ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8046 prompt_cache_len:5151 prompt_cache_ratio:0.6401938851603282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 +DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10912752151489258 s +INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.11120343208312988 s +DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=138989923197628097627799582094864550929, time:1750767216.956738s req_ids:[8] +DEBUG 06-24 20:13:36 [manager.py:391] +ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:212.17751502990723ms total_cost_time:212.22209930419922ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8047 prompt_cache_len:5151 prompt_cache_ratio:0.6401143283211135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 +DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.10907220840454102 s +INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.11097025871276855 s +DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=29119291134425823645822612133371008602, time:1750767217.1700191s req_ids:[8] +DEBUG 06-24 20:13:37 [manager.py:391] +ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:205.000638961792ms total_cost_time:205.0485610961914ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:8048 prompt_cache_len:5151 prompt_cache_ratio:0.6400347912524851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 +DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.1082615852355957 s +INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.11008000373840332 s +DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=109790732004628960669754922390521204587, time:1750767217.385679s req_ids:[8] +DEBUG 06-24 20:13:37 [manager.py:391] +ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:214.17713165283203ms total_cost_time:214.22243118286133ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8049 prompt_cache_len:5151 prompt_cache_ratio:0.6399552739470742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 +DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.10813307762145996 s +INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.10985231399536133 s +DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=322464502042012871840696675926425672648, time:1750767217.604803s req_ids:[8] +DEBUG 06-24 20:13:37 [manager.py:391] +ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:13:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 33704.165 tokens/s +DEBUG 06-24 20:13:37 [stats.py:37] Avg prompt tokens throughput: 33695.672 tokens/s +DEBUG 06-24 20:13:37 [stats.py:37] Avg generate tokens throughput: 8.493 tokens/s +INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:372.5893497467041ms total_cost_time:372.6356029510498ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8050 prompt_cache_len:5151 prompt_cache_ratio:0.6398757763975156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 +DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.10793542861938477 s +INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.10982799530029297 s +DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=231065977064941673351029818506398440574, time:1750767217.977116s req_ids:[8] +DEBUG 06-24 20:13:37 [manager.py:391] +ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:208.2195281982422ms total_cost_time:208.2662582397461ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:8051 prompt_cache_len:5151 prompt_cache_ratio:0.6397962985964476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 +DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10788154602050781 s +INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10966300964355469 s +DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=147389770831574650088893201530362838930, time:1750767218.1944437s req_ids:[8] +DEBUG 06-24 20:13:38 [manager.py:391] +ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:208.22572708129883ms total_cost_time:208.28580856323242ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8052 prompt_cache_len:5151 prompt_cache_ratio:0.6397168405365127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 +DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10769414901733398 s +INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10968661308288574 s +DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=305374590684094392150729281460347867612, time:1750767218.4148922s req_ids:[8] +DEBUG 06-24 20:13:38 [manager.py:391] +ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:211.7440700531006ms total_cost_time:211.76695823669434ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:8053 prompt_cache_len:5151 prompt_cache_ratio:0.6396374022103564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 +DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10475301742553711 s +INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10663676261901855 s +DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=133189338529028411515339446870221423781, time:1750767218.626101s req_ids:[8] +DEBUG 06-24 20:13:38 [manager.py:391] +ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:210.89887619018555ms total_cost_time:210.94632148742676ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:8054 prompt_cache_len:5151 prompt_cache_ratio:0.6395579836106282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 +DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10790419578552246 s +INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10973048210144043 s +DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=235679107474053359259283436901151122929, time:1750767218.8414223s req_ids:[8] +DEBUG 06-24 20:13:38 [manager.py:391] +ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:204.91909980773926ms total_cost_time:204.96559143066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8055 prompt_cache_len:5151 prompt_cache_ratio:0.6394785847299814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 +DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.10909795761108398 s +INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.11096954345703125 s +DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=305589090865375666920949736731106693642, time:1750767219.0565562s req_ids:[8] +DEBUG 06-24 20:13:39 [manager.py:391] +ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:205.93881607055664ms total_cost_time:205.98411560058594ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8056 prompt_cache_len:5151 prompt_cache_ratio:0.6393992055610725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 +DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.1076662540435791 s +INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096951961517334 s +DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=151767450900899089826474746433593334684, time:1750767219.2669268s req_ids:[8] +DEBUG 06-24 20:13:39 [manager.py:391] +ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:212.1875286102295ms total_cost_time:212.233304977417ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8057 prompt_cache_len:5151 prompt_cache_ratio:0.639319846096562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 +DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.10685420036315918 s +INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.10872578620910645 s +DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=211842810678018342263127347609701582751, time:1750767219.4814346s req_ids:[8] +DEBUG 06-24 20:13:39 [manager.py:391] +DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:196.80261611938477ms total_cost_time:196.8250274658203ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:8058 prompt_cache_len:5151 prompt_cache_ratio:0.6392405063291139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 +DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.10632634162902832 s +INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.1081697940826416 s +DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=88591534724784794718488785318291708654, time:1750767219.6838667s req_ids:[8] +DEBUG 06-24 20:13:39 [manager.py:391] +ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:199.39875602722168ms total_cost_time:199.44334030151367ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8059 prompt_cache_len:5151 prompt_cache_ratio:0.639161186251396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 +DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.1082456111907959 s +INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.1100766658782959 s +DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=261998647676513450391335054519485690806, time:1750767219.8888779s req_ids:[8] +DEBUG 06-24 20:13:39 [manager.py:391] +ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:205.4159641265869ms total_cost_time:205.460786819458ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8060 prompt_cache_len:5151 prompt_cache_ratio:0.6390818858560794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 +DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.3099782466888428 s +INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.31195569038391113 s +DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=254125505485077604956992921997470737741, time:1750767220.3018322s req_ids:[8] +DEBUG 06-24 20:13:40 [manager.py:391] +ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:409.5466136932373ms total_cost_time:409.5914363861084ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8061 prompt_cache_len:5151 prompt_cache_ratio:0.6390026051358392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 +DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.1072845458984375 s +INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s +DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=295609575480806864960928682887784431310, time:1750767220.5287113s req_ids:[8] +DEBUG 06-24 20:13:40 [manager.py:391] +ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:181.4250946044922ms total_cost_time:181.46872520446777ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8062 prompt_cache_len:5151 prompt_cache_ratio:0.638923344083354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 +DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.10769844055175781 s +INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.10959029197692871 s +DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=198778040995821253941882945232673870698, time:1750767220.6995816s req_ids:[8] +DEBUG 06-24 20:13:40 [manager.py:391] +ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:199.5084285736084ms total_cost_time:199.55182075500488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8063 prompt_cache_len:5151 prompt_cache_ratio:0.638844102691306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 +DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.10851788520812988 s +INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.11049413681030273 s +DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=49064737822486721512052770965989559594, time:1750767220.9076686s req_ids:[8] +DEBUG 06-24 20:13:40 [manager.py:391] +DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:206.45952224731445ms total_cost_time:206.50506019592285ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8064 prompt_cache_len:5151 prompt_cache_ratio:0.6387648809523809 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 +DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10778975486755371 s +INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.10974502563476562 s +DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=301305406224313279995046004031161261926, time:1750767221.119459s req_ids:[8] +DEBUG 06-24 20:13:41 [manager.py:391] +ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:206.51817321777344ms total_cost_time:206.56418800354004ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8065 prompt_cache_len:5151 prompt_cache_ratio:0.6386856788592684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 +DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10805702209472656 s +INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s +DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=71840767398812457825887583429247173729, time:1750767221.336955s req_ids:[8] +DEBUG 06-24 20:13:41 [manager.py:391] +ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:212.37921714782715ms total_cost_time:212.42260932922363ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8066 prompt_cache_len:5151 prompt_cache_ratio:0.6386064964046615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 +DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10795164108276367 s +INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.10991454124450684 s +DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=262640897278135896921666262285222479589, time:1750767221.549863s req_ids:[8] +DEBUG 06-24 20:13:41 [manager.py:391] +ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:203.74345779418945ms total_cost_time:203.78780364990234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8067 prompt_cache_len:5151 prompt_cache_ratio:0.638527333581257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 +DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10877060890197754 s +INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.11073660850524902 s +DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=320349095467898117374916659957531924132, time:1750767221.7579339s req_ids:[8] +DEBUG 06-24 20:13:41 [manager.py:391] +ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:205.0940990447998ms total_cost_time:205.16443252563477ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:8068 prompt_cache_len:5151 prompt_cache_ratio:0.638448190381755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 +DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10901927947998047 s +INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.11095738410949707 s +DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=203255366901912356807155428081710494454, time:1750767221.9690363s req_ids:[8] +DEBUG 06-24 20:13:41 [manager.py:391] +ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:203.22060585021973ms total_cost_time:203.263521194458ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8069 prompt_cache_len:5151 prompt_cache_ratio:0.6383690667988599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 +DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:42 [manager.py:224] router recive req id 8 cost time 0.10870885848999023 s +INFO 06-24 20:13:42 [manager.py:68] detokenization recv req id 8 cost time 0.11066079139709473 s +DEBUG 06-24 20:13:42 [manager.py:391] Prefill Batch: batch_id=330590240013976199529664246209082262821, time:1750767222.1791081s req_ids:[8] +DEBUG 06-24 20:13:42 [manager.py:391] +ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:205.42049407958984ms total_cost_time:205.46674728393555ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8070 prompt_cache_len:5151 prompt_cache_ratio:0.6382899628252788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 +DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:42 [manager.py:224] router recive req id 8 cost time 0.31090641021728516 s +INFO 06-24 20:13:42 [manager.py:68] detokenization recv req id 8 cost time 0.3122224807739258 s +DEBUG 06-24 20:13:42 [manager.py:391] Prefill Batch: batch_id=161336232136907122838365072471345207341, time:1750767222.60948s req_ids:[8] +DEBUG 06-24 20:13:42 [manager.py:391] +ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:431.6565990447998ms total_cost_time:431.7011833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8071 prompt_cache_len:5151 prompt_cache_ratio:0.6382108784537232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 +DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:42 [manager.py:224] router recive req id 8 cost time 0.1084742546081543 s +INFO 06-24 20:13:42 [manager.py:68] detokenization recv req id 8 cost time 0.11041069030761719 s +DEBUG 06-24 20:13:42 [manager.py:391] Prefill Batch: batch_id=194174635208848866564785900138590427762, time:1750767222.8305907s req_ids:[8] +DEBUG 06-24 20:13:42 [manager.py:391] +ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:210.26968955993652ms total_cost_time:210.3135585784912ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8072 prompt_cache_len:5151 prompt_cache_ratio:0.6381318136769079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 +DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10826420783996582 s +INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.10992193222045898 s +DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=70019449877000478808206091789589732597, time:1750767223.0435102s req_ids:[8] +DEBUG 06-24 20:13:43 [manager.py:391] +ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:171.6468334197998ms total_cost_time:171.6938018798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8073 prompt_cache_len:5151 prompt_cache_ratio:0.6380527684875511 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 +DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s +INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.10986661911010742 s +DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=159349141074113812258836080997744185009, time:1750767223.2213142s req_ids:[8] +DEBUG 06-24 20:13:43 [manager.py:391] +ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:202.85677909851074ms total_cost_time:202.90160179138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8074 prompt_cache_len:5151 prompt_cache_ratio:0.637973742878375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 +DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10805773735046387 s +INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100778579711914 s +DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=308266019018937835188037493226891837083, time:1750767223.4299216s req_ids:[8] +DEBUG 06-24 20:13:43 [manager.py:391] +ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:206.21776580810547ms total_cost_time:206.26401901245117ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8075 prompt_cache_len:5151 prompt_cache_ratio:0.6378947368421053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 +DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10716605186462402 s +INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.1090848445892334 s +DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=209930011095205863501781125335445617491, time:1750767223.6416006s req_ids:[8] +DEBUG 06-24 20:13:43 [manager.py:391] +ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:204.3149471282959ms total_cost_time:204.34260368347168ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8076 prompt_cache_len:5151 prompt_cache_ratio:0.637815750371471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 +DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10679864883422852 s +INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.1087641716003418 s +DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=190721890390853134489228581767649819552, time:1750767223.8610218s req_ids:[8] +DEBUG 06-24 20:13:43 [manager.py:391] +ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:213.7596607208252ms total_cost_time:213.78779411315918ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8077 prompt_cache_len:5151 prompt_cache_ratio:0.6377367834592051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 +DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.1047365665435791 s +INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.10672664642333984 s +DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=107489743280337487220905519960400172082, time:1750767224.0701993s req_ids:[8] +DEBUG 06-24 20:13:44 [manager.py:391] +ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:202.6221752166748ms total_cost_time:202.6503086090088ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8078 prompt_cache_len:5151 prompt_cache_ratio:0.637657836098044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 +DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.10600566864013672 s +INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.10810065269470215 s +DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=202015832633553004289211074090195720630, time:1750767224.2802904s req_ids:[8] +DEBUG 06-24 20:13:44 [manager.py:391] +ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:201.59626007080078ms total_cost_time:201.63202285766602ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:8079 prompt_cache_len:5151 prompt_cache_ratio:0.6375789082807278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 +DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.10818314552307129 s +INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.11006522178649902 s +DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=203334258871617260662525445579672495712, time:1750767224.49161s req_ids:[8] +DEBUG 06-24 20:13:44 [manager.py:391] +ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:203.7971019744873ms total_cost_time:203.84454727172852ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:8080 prompt_cache_len:5151 prompt_cache_ratio:0.6375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 +DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.1088247299194336 s +INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.11080384254455566 s +DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=294482469373479857291287404792816699961, time:1750767224.7010427s req_ids:[8] +DEBUG 06-24 20:13:44 [manager.py:391] +ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:363.36755752563477ms total_cost_time:363.4159564971924ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:8081 prompt_cache_len:5151 prompt_cache_ratio:0.6374211112486079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 +DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10929203033447266 s +INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.1113898754119873 s +DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=241194734210511093161432585006183178886, time:1750767225.0655677s req_ids:[8] +DEBUG 06-24 20:13:45 [manager.py:391] +ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:197.91603088378906ms total_cost_time:197.96085357666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8082 prompt_cache_len:5151 prompt_cache_ratio:0.6373422420193021 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 +DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10849666595458984 s +INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.11074209213256836 s +DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=146978215860167253039851290843201513281, time:1750767225.2720957s req_ids:[8] +DEBUG 06-24 20:13:45 [manager.py:391] +ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.4464817047119ms total_cost_time:205.4903507232666ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8083 prompt_cache_len:5151 prompt_cache_ratio:0.6372633923048373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 +DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10818219184875488 s +INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.11010217666625977 s +DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=218889070414234497520232911618951723960, time:1750767225.484616s req_ids:[8] +DEBUG 06-24 20:13:45 [manager.py:391] +ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.99651336669922ms total_cost_time:206.04300498962402ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8084 prompt_cache_len:5151 prompt_cache_ratio:0.6371845620979713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 +DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10773944854736328 s +INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.10968947410583496 s +DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=225968309768180993327228550401013305867, time:1750767225.695383s req_ids:[8] +DEBUG 06-24 20:13:45 [manager.py:391] +ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.31821250915527ms total_cost_time:205.36398887634277ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8085 prompt_cache_len:5151 prompt_cache_ratio:0.6371057513914656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 +DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10732555389404297 s +INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.10925912857055664 s +DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=160033239014355013892670493839584912528, time:1750767225.9065716s req_ids:[8] +DEBUG 06-24 20:13:45 [manager.py:391] +ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.84416389465332ms total_cost_time:205.8887481689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8086 prompt_cache_len:5151 prompt_cache_ratio:0.6370269601780856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 +DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.1104738712310791 s +INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.11252474784851074 s +DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=31386857032168329666902550638757640541, time:1750767226.118384s req_ids:[8] +DEBUG 06-24 20:13:46 [manager.py:391] +ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:203.90558242797852ms total_cost_time:203.963041305542ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:8087 prompt_cache_len:5151 prompt_cache_ratio:0.6369481884505998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 +DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s +INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.11064386367797852 s +DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=133551726946520198733980140423400851976, time:1750767226.328581s req_ids:[8] +DEBUG 06-24 20:13:46 [manager.py:391] +ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:206.42876625061035ms total_cost_time:206.4962387084961ms,out_token_counter:1 mean_per_token_cost_time: 0.06747245788574219ms prompt_token_num:8088 prompt_cache_len:5151 prompt_cache_ratio:0.6368694362017804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 +INFO 06-24 20:13:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10809016227722168 s +INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s +DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=279137965828223822875134489357048231187, time:1750767226.5410378s req_ids:[8] +DEBUG 06-24 20:13:46 [manager.py:391] +ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:201.5066146850586ms total_cost_time:201.54976844787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8089 prompt_cache_len:5151 prompt_cache_ratio:0.6367907034244035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 +DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10738563537597656 s +INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.10937952995300293 s +DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=133140476523604600798955754364762278591, time:1750767226.7490964s req_ids:[8] +DEBUG 06-24 20:13:46 [manager.py:391] +ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:203.57346534729004ms total_cost_time:203.61828804016113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8090 prompt_cache_len:5151 prompt_cache_ratio:0.6367119901112485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 +DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10778141021728516 s +INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.10970473289489746 s +DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=199175326906523073053260394173069683128, time:1750767226.9585898s req_ids:[8] +DEBUG 06-24 20:13:46 [manager.py:391] +ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:363.095760345459ms total_cost_time:363.1410598754883ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8091 prompt_cache_len:5151 prompt_cache_ratio:0.6366332962550982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 +DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10843300819396973 s +INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.11034512519836426 s +DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=96058079967577283384805348767671051698, time:1750767227.326059s req_ids:[8] +DEBUG 06-24 20:13:47 [manager.py:391] +ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:204.69236373901367ms total_cost_time:204.73647117614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8092 prompt_cache_len:5151 prompt_cache_ratio:0.6365546218487395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 +DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10884380340576172 s +INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.11073541641235352 s +DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=105523112436045670858046030849275993520, time:1750767227.539137s req_ids:[8] +DEBUG 06-24 20:13:47 [manager.py:391] +ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:208.0233097076416ms total_cost_time:208.0678939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8093 prompt_cache_len:5151 prompt_cache_ratio:0.6364759668849623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 +DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10761046409606934 s +INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.10950446128845215 s +DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=73850314913349288611568059276265842885, time:1750767227.7534378s req_ids:[8] +DEBUG 06-24 20:13:47 [manager.py:391] +ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:209.72013473510742ms total_cost_time:209.7647190093994ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8094 prompt_cache_len:5151 prompt_cache_ratio:0.6363973313565604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 +DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10832905769348145 s +INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.11021876335144043 s +DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=121527046461158447765531230086802271831, time:1750767227.9661796s req_ids:[8] +DEBUG 06-24 20:13:47 [manager.py:391] +DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:13:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 35932.030 tokens/s +DEBUG 06-24 20:13:47 [stats.py:37] Avg prompt tokens throughput: 35923.229 tokens/s +DEBUG 06-24 20:13:47 [stats.py:37] Avg generate tokens throughput: 8.801 tokens/s +ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:205.40499687194824ms total_cost_time:205.45053482055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8095 prompt_cache_len:5151 prompt_cache_ratio:0.6363187152563311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 +DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s +INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.10999464988708496 s +DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=314332061752910045469982093564041865488, time:1750767228.1750445s req_ids:[8] +DEBUG 06-24 20:13:48 [manager.py:391] +ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:203.97496223449707ms total_cost_time:204.02026176452637ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8096 prompt_cache_len:5151 prompt_cache_ratio:0.6362401185770751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 +DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.1085348129272461 s +INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.11058545112609863 s +DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=270553068105683448834996038342538367326, time:1750767228.3865838s req_ids:[8] +DEBUG 06-24 20:13:48 [manager.py:391] +ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:205.7032585144043ms total_cost_time:205.7483196258545ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8097 prompt_cache_len:5151 prompt_cache_ratio:0.6361615413115969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 +DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.10804080963134766 s +INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.11022686958312988 s +DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=211954486614562433502001795866789504837, time:1750767228.5986605s req_ids:[8] +DEBUG 06-24 20:13:48 [manager.py:391] +ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:206.30311965942383ms total_cost_time:206.345796585083ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8098 prompt_cache_len:5151 prompt_cache_ratio:0.6360829834527044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 +DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.10732579231262207 s +INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.1092672348022461 s +DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=155124168504351182725194535457896336114, time:1750767228.810134s req_ids:[8] +DEBUG 06-24 20:13:48 [manager.py:391] +ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:206.02941513061523ms total_cost_time:206.07328414916992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8099 prompt_cache_len:5151 prompt_cache_ratio:0.6360044449932091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 +DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.10775995254516602 s +INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.10976719856262207 s +DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=106071030128258246122994942756063897024, time:1750767229.0220828s req_ids:[8] +DEBUG 06-24 20:13:49 [manager.py:391] +ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:206.12025260925293ms total_cost_time:206.16459846496582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8100 prompt_cache_len:5151 prompt_cache_ratio:0.6359259259259259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 +DEBUG 06-24 20:13:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.31028151512145996 s +INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.3122560977935791 s +DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=98627209788490222932552879441707546660, time:1750767229.4442008s req_ids:[8] +DEBUG 06-24 20:13:49 [manager.py:391] +DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:424.4811534881592ms total_cost_time:424.52526092529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8101 prompt_cache_len:5151 prompt_cache_ratio:0.6358474262436736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 +DEBUG 06-24 20:13:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.10773062705993652 s +INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.10921597480773926 s +DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=121194993821246751232135909639346608298, time:1750767229.6659307s req_ids:[8] +DEBUG 06-24 20:13:49 [manager.py:391] +ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:208.4791660308838ms total_cost_time:208.52398872375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8102 prompt_cache_len:5151 prompt_cache_ratio:0.6357689459392742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 +DEBUG 06-24 20:13:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.10701775550842285 s +INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.108978271484375 s +DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=221921391047084398129342456341527162847, time:1750767229.8775554s req_ids:[8] +DEBUG 06-24 20:13:49 [manager.py:391] +ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:208.86492729187012ms total_cost_time:208.9085578918457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8103 prompt_cache_len:5151 prompt_cache_ratio:0.6356904850055535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 +DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.11203742027282715 s +INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.1140599250793457 s +DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=252668172921633708187507389841897805641, time:1750767230.0908892s req_ids:[8] +DEBUG 06-24 20:13:50 [manager.py:391] +ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:209.12837982177734ms total_cost_time:209.26737785339355ms,out_token_counter:1 mean_per_token_cost_time: 0.13899803161621094ms prompt_token_num:8104 prompt_cache_len:5151 prompt_cache_ratio:0.6356120434353406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 +DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.10787224769592285 s +INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.10981941223144531 s +DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=37321015057118796252739577025384193267, time:1750767230.3028307s req_ids:[8] +DEBUG 06-24 20:13:50 [manager.py:391] +ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:209.88988876342773ms total_cost_time:209.93447303771973ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8105 prompt_cache_len:5151 prompt_cache_ratio:0.6355336212214683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 +DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.1075437068939209 s +INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.10955810546875 s +DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=222030678851909785274861156665840841904, time:1750767230.51725s req_ids:[8] +DEBUG 06-24 20:13:50 [manager.py:391] +ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:203.48095893859863ms total_cost_time:203.52435111999512ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8106 prompt_cache_len:5151 prompt_cache_ratio:0.6354552183567728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 +DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.1087188720703125 s +INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.11078500747680664 s +DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=312983583374346170053176241200150171805, time:1750767230.7239282s req_ids:[8] +DEBUG 06-24 20:13:50 [manager.py:391] +ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:204.79297637939453ms total_cost_time:204.83732223510742ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8107 prompt_cache_len:5151 prompt_cache_ratio:0.635376834834094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 +DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.10879945755004883 s +INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.11074399948120117 s +DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=170544941664411486915413319735891134310, time:1750767230.9484057s req_ids:[8] +DEBUG 06-24 20:13:50 [manager.py:391] +ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:222.89609909057617ms total_cost_time:222.94092178344727ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8108 prompt_cache_len:5151 prompt_cache_ratio:0.6352984706462753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 +DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.10770821571350098 s +INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.10974907875061035 s +DEBUG 06-24 20:13:51 [manager.py:391] Prefill Batch: batch_id=93902888379961682109737453866265062270, time:1750767231.1609998s req_ids:[8] +DEBUG 06-24 20:13:51 [manager.py:391] +ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:208.5719108581543ms total_cost_time:208.61577987670898ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8109 prompt_cache_len:5151 prompt_cache_ratio:0.6352201257861635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 +DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.3106982707977295 s +INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.31279563903808594 s +DEBUG 06-24 20:13:51 [manager.py:391] Prefill Batch: batch_id=170289657816857018488338971290654525701, time:1750767231.5765553s req_ids:[8] +DEBUG 06-24 20:13:51 [manager.py:391] +ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:413.257360458374ms total_cost_time:413.30456733703613ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:8110 prompt_cache_len:5151 prompt_cache_ratio:0.6351418002466092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 +DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.10869359970092773 s +INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.1107168197631836 s +DEBUG 06-24 20:13:51 [manager.py:391] Prefill Batch: batch_id=323111179435295905993405715949752953950, time:1750767231.7944765s req_ids:[8] +DEBUG 06-24 20:13:51 [manager.py:391] +ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:209.97953414916992ms total_cost_time:210.0236415863037ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8111 prompt_cache_len:5151 prompt_cache_ratio:0.635063494020466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 +DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.10794782638549805 s +INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.10992670059204102 s +DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=337363253957841892024073588369729281564, time:1750767232.0061998s req_ids:[8] +DEBUG 06-24 20:13:52 [manager.py:391] +ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:202.73876190185547ms total_cost_time:202.79908180236816ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8112 prompt_cache_len:5151 prompt_cache_ratio:0.6349852071005917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 +DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.11054372787475586 s +INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11249423027038574 s +DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=315769267226245320941435438690373019161, time:1750767232.215018s req_ids:[8] +DEBUG 06-24 20:13:52 [manager.py:391] +ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:202.7900218963623ms total_cost_time:202.8346061706543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8113 prompt_cache_len:5151 prompt_cache_ratio:0.6349069394798471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 +DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.10931086540222168 s +INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11178398132324219 s +DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=249247419710835236530314416190627642121, time:1750767232.4252582s req_ids:[8] +DEBUG 06-24 20:13:52 [manager.py:391] +ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:205.76143264770508ms total_cost_time:205.80625534057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8114 prompt_cache_len:5151 prompt_cache_ratio:0.6348286911510969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 +DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.10795187950134277 s +INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11035752296447754 s +DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=78808598712691966219372036419946189344, time:1750767232.6374888s req_ids:[8] +DEBUG 06-24 20:13:52 [manager.py:391] +ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:207.48329162597656ms total_cost_time:207.52811431884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8115 prompt_cache_len:5151 prompt_cache_ratio:0.6347504621072089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 +DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.10850644111633301 s +INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11056971549987793 s +DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=273276107363366275342978176810852430166, time:1750767232.8499973s req_ids:[8] +DEBUG 06-24 20:13:52 [manager.py:391] +ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:203.35817337036133ms total_cost_time:203.40228080749512ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8116 prompt_cache_len:5151 prompt_cache_ratio:0.6346722523410547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 +DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10728931427001953 s +INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.10935616493225098 s +DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=83269596180312643652643871002693855222, time:1750767233.0581539s req_ids:[8] +DEBUG 06-24 20:13:53 [manager.py:391] +ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:207.4739933013916ms total_cost_time:207.5178623199463ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8117 prompt_cache_len:5151 prompt_cache_ratio:0.6345940618455094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 +DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:53 [batch.py:51] router release req id 8 +INFO 06-24 20:13:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10751700401306152 s +INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.10982465744018555 s +DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=152022429020625600733726434338023991900, time:1750767233.2724051s req_ids:[8] +DEBUG 06-24 20:13:53 [manager.py:391] +ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:374.78041648864746ms total_cost_time:374.82500076293945ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8118 prompt_cache_len:5151 prompt_cache_ratio:0.6345158906134516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 +DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10859441757202148 s +INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.11058688163757324 s +DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=139669373905826357507293369115477898259, time:1750767233.6479955s req_ids:[8] +DEBUG 06-24 20:13:53 [manager.py:391] +ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:204.6663761138916ms total_cost_time:204.7119140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8119 prompt_cache_len:5151 prompt_cache_ratio:0.6344377386377633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 +DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10773491859436035 s +INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.10980701446533203 s +DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=192703656837568399344039916382832874153, time:1750767233.8604848s req_ids:[8] +DEBUG 06-24 20:13:53 [manager.py:391] +ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:201.69878005981445ms total_cost_time:201.74288749694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8120 prompt_cache_len:5151 prompt_cache_ratio:0.63435960591133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 +DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10777592658996582 s +INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.10977697372436523 s +DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=93883353495894323270944665940305027394, time:1750767234.0687232s req_ids:[8] +DEBUG 06-24 20:13:54 [manager.py:391] +ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:212.7540111541748ms total_cost_time:212.81051635742188ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:8121 prompt_cache_len:5151 prompt_cache_ratio:0.634281492427041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 +DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10798382759094238 s +INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.10998058319091797 s +DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=120185731613970969993399398035329968520, time:1750767234.286131s req_ids:[8] +DEBUG 06-24 20:13:54 [manager.py:391] +ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:201.2033462524414ms total_cost_time:201.246976852417ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8122 prompt_cache_len:5151 prompt_cache_ratio:0.6342033981777887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 +DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10770392417907715 s +INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s +DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=35888724780471275313928195310894683225, time:1750767234.511684s req_ids:[8] +DEBUG 06-24 20:13:54 [manager.py:391] +ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:222.6095199584961ms total_cost_time:222.6581573486328ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:8123 prompt_cache_len:5151 prompt_cache_ratio:0.6341253231564693 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 +DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.11200356483459473 s +INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.11416745185852051 s +DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=323760679570478905223602029173197638473, time:1750767234.7270212s req_ids:[8] +DEBUG 06-24 20:13:54 [manager.py:391] +ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:207.5190544128418ms total_cost_time:207.61752128601074ms,out_token_counter:1 mean_per_token_cost_time: 0.09846687316894531ms prompt_token_num:8124 prompt_cache_len:5151 prompt_cache_ratio:0.6340472673559823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 +DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10851311683654785 s +INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.11048150062561035 s +DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=144232891784506052291040991711523305370, time:1750767234.9395273s req_ids:[8] +DEBUG 06-24 20:13:54 [manager.py:391] +ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:207.14902877807617ms total_cost_time:207.19504356384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8125 prompt_cache_len:5151 prompt_cache_ratio:0.6339692307692307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 +DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:55 [manager.py:224] router recive req id 8 cost time 0.10739564895629883 s +INFO 06-24 20:13:55 [manager.py:68] detokenization recv req id 8 cost time 0.10935521125793457 s +DEBUG 06-24 20:13:55 [manager.py:391] Prefill Batch: batch_id=213472795848757023692836494534585206047, time:1750767235.159325s req_ids:[8] +DEBUG 06-24 20:13:55 [manager.py:391] +ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:217.1652317047119ms total_cost_time:217.21172332763672ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8126 prompt_cache_len:5151 prompt_cache_ratio:0.6338912133891214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 +DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:55 [manager.py:224] router recive req id 8 cost time 0.10814499855041504 s +INFO 06-24 20:13:55 [manager.py:68] detokenization recv req id 8 cost time 0.1100912094116211 s +DEBUG 06-24 20:13:55 [manager.py:391] Prefill Batch: batch_id=184152968178471537931364003203748091832, time:1750767235.3752327s req_ids:[8] +DEBUG 06-24 20:13:55 [manager.py:391] +ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:209.3331813812256ms total_cost_time:209.37728881835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8127 prompt_cache_len:5151 prompt_cache_ratio:0.633813215208564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 +DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:55 [manager.py:224] router recive req id 8 cost time 0.3099689483642578 s +INFO 06-24 20:13:55 [manager.py:68] detokenization recv req id 8 cost time 0.3119676113128662 s +DEBUG 06-24 20:13:55 [manager.py:391] Prefill Batch: batch_id=107749667326992076180421441000421056615, time:1750767235.8063462s req_ids:[8] +DEBUG 06-24 20:13:55 [manager.py:391] +ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:429.027795791626ms total_cost_time:429.07238006591797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8128 prompt_cache_len:5151 prompt_cache_ratio:0.6337352362204725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 +DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10877585411071777 s +INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.11067652702331543 s +DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=306315429860793680452079003117736245369, time:1750767236.0260406s req_ids:[8] +DEBUG 06-24 20:13:56 [manager.py:391] +ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:207.19337463378906ms total_cost_time:207.23676681518555ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8129 prompt_cache_len:5151 prompt_cache_ratio:0.6336572764177636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 +DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10743141174316406 s +INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.10941052436828613 s +DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=194206650407103124662620301303233951784, time:1750767236.2360225s req_ids:[8] +DEBUG 06-24 20:13:56 [manager.py:391] +ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:204.8027515411377ms total_cost_time:204.8473358154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8130 prompt_cache_len:5151 prompt_cache_ratio:0.6335793357933579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 +DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10790085792541504 s +INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.10985898971557617 s +DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=171769448427063724711245677916851727497, time:1750767236.4470484s req_ids:[8] +DEBUG 06-24 20:13:56 [manager.py:391] +ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:206.75039291381836ms total_cost_time:206.79497718811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8131 prompt_cache_len:5151 prompt_cache_ratio:0.6335014143401796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 +DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10873651504516602 s +INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.11079764366149902 s +DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=168692241335269363455072678160687170632, time:1750767236.6600668s req_ids:[8] +DEBUG 06-24 20:13:56 [manager.py:391] +ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:209.05470848083496ms total_cost_time:209.09881591796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8132 prompt_cache_len:5151 prompt_cache_ratio:0.6334235120511559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 +DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10744166374206543 s +INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.10937261581420898 s +DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=56391575287625588199529243349556454808, time:1750767236.879535s req_ids:[8] +DEBUG 06-24 20:13:56 [manager.py:391] +ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:211.15970611572266ms total_cost_time:211.20500564575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8133 prompt_cache_len:5151 prompt_cache_ratio:0.633345628919218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 +DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s +INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s +DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=236141038221640238371131846152720113750, time:1750767237.0933435s req_ids:[8] +DEBUG 06-24 20:13:57 [manager.py:391] +ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:211.93361282348633ms total_cost_time:211.96651458740234ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:8134 prompt_cache_len:5151 prompt_cache_ratio:0.6332677649373002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 +DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10651326179504395 s +INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.10872602462768555 s +DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=280094735408299385741857150805681554139, time:1750767237.3088446s req_ids:[8] +DEBUG 06-24 20:13:57 [manager.py:391] +ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:205.17706871032715ms total_cost_time:205.21950721740723ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8135 prompt_cache_len:5151 prompt_cache_ratio:0.6331899200983405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 +DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10834670066833496 s +INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.11030459403991699 s +DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=247827023491710030646849402339736665414, time:1750767237.5185273s req_ids:[8] +DEBUG 06-24 20:13:57 [manager.py:391] +ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:210.6621265411377ms total_cost_time:210.7079029083252ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8136 prompt_cache_len:5151 prompt_cache_ratio:0.6331120943952803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 +DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10869622230529785 s +INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.11071562767028809 s +DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=171020838325629176785303915474818867972, time:1750767237.735349s req_ids:[8] +DEBUG 06-24 20:13:57 [manager.py:391] +ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:209.05518531799316ms total_cost_time:209.10120010375977ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8137 prompt_cache_len:5151 prompt_cache_ratio:0.6330342878210643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 +DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:57 [batch.py:51] router release req id 8 +INFO 06-24 20:13:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10773134231567383 s +INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.10966658592224121 s +DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=247857284863722506367431758765285983902, time:1750767237.9537423s req_ids:[8] +DEBUG 06-24 20:13:57 [manager.py:391] +DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:13:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 34143.317 tokens/s +DEBUG 06-24 20:13:58 [stats.py:37] Avg prompt tokens throughput: 34134.808 tokens/s +DEBUG 06-24 20:13:58 [stats.py:37] Avg generate tokens throughput: 8.508 tokens/s +INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:363.0220890045166ms total_cost_time:363.0683422088623ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8138 prompt_cache_len:5151 prompt_cache_ratio:0.6329565003686409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 +DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10869288444519043 s +INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.11066389083862305 s +DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=187487224563553522143730522357142036644, time:1750767238.314153s req_ids:[8] +DEBUG 06-24 20:13:58 [manager.py:391] +ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:204.07390594482422ms total_cost_time:204.1182518005371ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8139 prompt_cache_len:5151 prompt_cache_ratio:0.632878732030962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 +DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10882019996643066 s +INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s +DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=314945835761892214818346075508883585304, time:1750767238.5274227s req_ids:[8] +DEBUG 06-24 20:13:58 [manager.py:391] +ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:206.7878246307373ms total_cost_time:206.8307399749756ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8140 prompt_cache_len:5151 prompt_cache_ratio:0.6328009828009828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 +DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10771703720092773 s +INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.10958027839660645 s +DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=263417574246561362819796126301078586497, time:1750767238.7400277s req_ids:[8] +DEBUG 06-24 20:13:58 [manager.py:391] +ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:204.33926582336426ms total_cost_time:204.38385009765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8141 prompt_cache_len:5151 prompt_cache_ratio:0.632723252671662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 +DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10765194892883301 s +INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.10968232154846191 s +DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=215016467225992275090012779667877494795, time:1750767238.9563518s req_ids:[8] +DEBUG 06-24 20:13:58 [manager.py:391] +ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:210.02674102783203ms total_cost_time:210.07108688354492ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8142 prompt_cache_len:5151 prompt_cache_ratio:0.6326455416359617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 +DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.10844302177429199 s +INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.11056399345397949 s +DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=36770850189304221059211412149047832042, time:1750767239.177923s req_ids:[8] +DEBUG 06-24 20:13:59 [manager.py:391] +ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:218.5971736907959ms total_cost_time:218.6427116394043ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8143 prompt_cache_len:5151 prompt_cache_ratio:0.6325678496868476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 +DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.10827469825744629 s +INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s +DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=299121397611924274749621096919289546738, time:1750767239.3912458s req_ids:[8] +DEBUG 06-24 20:13:59 [manager.py:391] +ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:206.024169921875ms total_cost_time:206.0678005218506ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8144 prompt_cache_len:5151 prompt_cache_ratio:0.6324901768172888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 +DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.1089179515838623 s +INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.11082673072814941 s +DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=109876234039614677024550444379341383400, time:1750767239.6031022s req_ids:[8] +DEBUG 06-24 20:13:59 [manager.py:391] +ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:203.59015464782715ms total_cost_time:203.63521575927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8145 prompt_cache_len:5151 prompt_cache_ratio:0.6324125230202579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 +DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.1079263687133789 s +INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.10975527763366699 s +DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=108296032935899573809964476709793020728, time:1750767239.8128839s req_ids:[8] +DEBUG 06-24 20:13:59 [manager.py:391] +ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:200.6673812866211ms total_cost_time:200.71005821228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8146 prompt_cache_len:5151 prompt_cache_ratio:0.6323348882887306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 +DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:13:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10916948318481445 s +INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.11116528511047363 s +DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=30166649213215935523335351540027366351, time:1750767240.0184507s req_ids:[8] +DEBUG 06-24 20:14:00 [manager.py:391] +ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:207.71121978759766ms total_cost_time:207.75556564331055ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8147 prompt_cache_len:5151 prompt_cache_ratio:0.6322572726156868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 +DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10744524002075195 s +INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.1093745231628418 s +DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=66687902070621836592232725297456087870, time:1750767240.231232s req_ids:[8] +DEBUG 06-24 20:14:00 [manager.py:391] +ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:365.54718017578125ms total_cost_time:365.59271812438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8148 prompt_cache_len:5151 prompt_cache_ratio:0.632179675994109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 +DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10818886756896973 s +INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.1101377010345459 s +DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=124206705536624127938749662369392206157, time:1750767240.5986862s req_ids:[8] +DEBUG 06-24 20:14:00 [manager.py:391] +ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:199.2659568786621ms total_cost_time:199.32317733764648ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:8149 prompt_cache_len:5151 prompt_cache_ratio:0.6321020984169837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 +DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10762882232666016 s +INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.10964536666870117 s +DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=232137978353780083554307627247549918360, time:1750767240.8090641s req_ids:[8] +DEBUG 06-24 20:14:00 [manager.py:391] +ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:204.75339889526367ms total_cost_time:204.79869842529297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8150 prompt_cache_len:5151 prompt_cache_ratio:0.6320245398773006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 +DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10795259475708008 s +INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.1100625991821289 s +DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=82097101414835021965599505539140264462, time:1750767241.0209415s req_ids:[8] +DEBUG 06-24 20:14:01 [manager.py:391] +ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:205.99961280822754ms total_cost_time:206.04491233825684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8151 prompt_cache_len:5151 prompt_cache_ratio:0.631947000368053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 +DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10771536827087402 s +INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.10973620414733887 s +DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=119557458893656592067856531594900213445, time:1750767241.23264s req_ids:[8] +DEBUG 06-24 20:14:01 [manager.py:391] +ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:211.6868495941162ms total_cost_time:211.73405647277832ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:8152 prompt_cache_len:5151 prompt_cache_ratio:0.6318694798822375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 +DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:01 [batch.py:51] router release req id 8 +INFO 06-24 20:14:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10933756828308105 s +INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.11141347885131836 s +DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=210002580486205887339835189436195781795, time:1750767241.4465356s req_ids:[8] +DEBUG 06-24 20:14:01 [manager.py:391] +ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:203.62281799316406ms total_cost_time:203.66597175598145ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8153 prompt_cache_len:5151 prompt_cache_ratio:0.6317919784128542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 +DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10802412033081055 s +INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.11004638671875 s +DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=136195498455880647709803462812207535773, time:1750767241.656808s req_ids:[8] +DEBUG 06-24 20:14:01 [manager.py:391] +ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:203.446626663208ms total_cost_time:203.4902572631836ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8154 prompt_cache_len:5151 prompt_cache_ratio:0.6317144959529065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 +DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s +INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.10999011993408203 s +DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=222816441817315718288396958081052877764, time:1750767241.8676915s req_ids:[8] +DEBUG 06-24 20:14:01 [manager.py:391] +ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:193.29547882080078ms total_cost_time:193.34006309509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8155 prompt_cache_len:5151 prompt_cache_ratio:0.6316370324954016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 +DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.10818672180175781 s +INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.11019086837768555 s +DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=204118815246304273799472226266466998678, time:1750767242.0661912s req_ids:[8] +DEBUG 06-24 20:14:02 [manager.py:391] +ERROR 06-24 20:14:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:205.72471618652344ms total_cost_time:205.7664394378662ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8156 prompt_cache_len:5151 prompt_cache_ratio:0.6315595880333497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 +DEBUG 06-24 20:14:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.10853719711303711 s +INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.11060047149658203 s +DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=85815348899662107165800156844841642807, time:1750767242.278215s req_ids:[8] +DEBUG 06-24 20:14:02 [manager.py:391] +ERROR 06-24 20:14:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 first_token_cost:209.50555801391602ms total_cost_time:209.55252647399902ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8157 prompt_cache_len:5151 prompt_cache_ratio:0.6314821625597646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 +DEBUG 06-24 20:14:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s +INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.11053848266601562 s +DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=265564970458232278766696871673682265576, time:1750767242.5081575s req_ids:[8] +DEBUG 06-24 20:14:02 [manager.py:391] +ERROR 06-24 20:14:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 first_token_cost:220.38626670837402ms total_cost_time:220.43085098266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8158 prompt_cache_len:5151 prompt_cache_ratio:0.6314047560676637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 +DEBUG 06-24 20:14:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.31031346321105957 s +INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.3123600482940674 s +DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=291321288946418798067932882494203163434, time:1750767242.9429576s req_ids:[8] +DEBUG 06-24 20:14:02 [manager.py:391] +ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 first_token_cost:431.03551864624023ms total_cost_time:431.1201572418213ms,out_token_counter:1 mean_per_token_cost_time: 0.08463859558105469ms prompt_token_num:8159 prompt_cache_len:5151 prompt_cache_ratio:0.6313273685500674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 +DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10814476013183594 s +INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.1102902889251709 s +DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=123380920499488466507541365330463193228, time:1750767243.1581955s req_ids:[8] +DEBUG 06-24 20:14:03 [manager.py:391] +ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:195.48416137695312ms total_cost_time:195.5277919769287ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8160 prompt_cache_len:5151 prompt_cache_ratio:0.63125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 +DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10854482650756836 s +INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.11057138442993164 s +DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=295859059032607079717317744804826931656, time:1750767243.361531s req_ids:[8] +DEBUG 06-24 20:14:03 [manager.py:391] +ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:197.2215175628662ms total_cost_time:197.2653865814209ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8161 prompt_cache_len:5151 prompt_cache_ratio:0.6311726504104889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 +DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10789775848388672 s +INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s +DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=158060897042739489710531752338729260904, time:1750767243.5683115s req_ids:[8] +DEBUG 06-24 20:14:03 [manager.py:391] +ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:204.3445110321045ms total_cost_time:204.38814163208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8162 prompt_cache_len:5151 prompt_cache_ratio:0.631095319774565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 +DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10597372055053711 s +INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.1080019474029541 s +DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=235162392724253059263539254686898747124, time:1750767243.7760034s req_ids:[8] +DEBUG 06-24 20:14:03 [manager.py:391] +ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:203.77159118652344ms total_cost_time:203.8135528564453ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8163 prompt_cache_len:5151 prompt_cache_ratio:0.6310180080852628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 +DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10777449607849121 s +INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.10962677001953125 s +DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=292629814067217144237502741386069374171, time:1750767243.9983857s req_ids:[8] +DEBUG 06-24 20:14:03 [manager.py:391] +ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:190.66977500915527ms total_cost_time:190.71412086486816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8164 prompt_cache_len:5151 prompt_cache_ratio:0.6309407153356198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 +DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10720038414001465 s +INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10909485816955566 s +DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=191433332946761968458717048529716102269, time:1750767244.1889997s req_ids:[8] +DEBUG 06-24 20:14:04 [manager.py:391] +ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:184.7519874572754ms total_cost_time:184.79537963867188ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8165 prompt_cache_len:5151 prompt_cache_ratio:0.6308634415186772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 +DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10744166374206543 s +INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10933661460876465 s +DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=258997889155956506282448517417427670301, time:1750767244.3711169s req_ids:[8] +DEBUG 06-24 20:14:04 [manager.py:391] +ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:172.27745056152344ms total_cost_time:172.32203483581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8166 prompt_cache_len:5151 prompt_cache_ratio:0.6307861866274798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 +DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10735678672790527 s +INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.1094062328338623 s +DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=37727982923305833901555603148115169449, time:1750767244.547978s req_ids:[8] +DEBUG 06-24 20:14:04 [manager.py:391] +ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:178.0240535736084ms total_cost_time:178.06744575500488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8167 prompt_cache_len:5151 prompt_cache_ratio:0.6307089506550753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 +DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10743284225463867 s +INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10945582389831543 s +DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=249115819185616173968662537037994923968, time:1750767244.7388084s req_ids:[8] +DEBUG 06-24 20:14:04 [manager.py:391] +ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:212.67271041870117ms total_cost_time:212.71681785583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8168 prompt_cache_len:5151 prompt_cache_ratio:0.6306317335945152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 +DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:04 [batch.py:51] router release req id 8 +INFO 06-24 20:14:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10772371292114258 s +INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10971260070800781 s +DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=201972237846244439557616264509139902619, time:1750767244.9525092s req_ids:[8] +DEBUG 06-24 20:14:04 [manager.py:391] +DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:363.07334899902344ms total_cost_time:363.11912536621094ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8169 prompt_cache_len:5151 prompt_cache_ratio:0.6305545354388542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 +DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.10810232162475586 s +INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021280288696289 s +DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=7119149069212245141719419761246496855, time:1750767245.3185558s req_ids:[8] +DEBUG 06-24 20:14:05 [manager.py:391] +ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:201.9331455230713ms total_cost_time:201.97463035583496ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8170 prompt_cache_len:5151 prompt_cache_ratio:0.6304773561811505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 +DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.10893368721008301 s +INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.11098885536193848 s +DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=173053887485502014893886567756538319646, time:1750767245.5305269s req_ids:[8] +DEBUG 06-24 20:14:05 [manager.py:391] +ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:207.66472816467285ms total_cost_time:207.70788192749023ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8171 prompt_cache_len:5151 prompt_cache_ratio:0.6304001958144658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 +DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.10841846466064453 s +INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.11046099662780762 s +DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=130770835150352112341883891056301955236, time:1750767245.7455034s req_ids:[8] +DEBUG 06-24 20:14:05 [manager.py:391] +ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:202.70323753356934ms total_cost_time:202.7449607849121ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8172 prompt_cache_len:5151 prompt_cache_ratio:0.6303230543318649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 +DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.1082160472869873 s +INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.1101384162902832 s +DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=251779298724697805889993182398210140621, time:1750767245.9533567s req_ids:[8] +DEBUG 06-24 20:14:05 [manager.py:391] +ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:208.909273147583ms total_cost_time:208.953857421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8173 prompt_cache_len:5151 prompt_cache_ratio:0.6302459317264163 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 +DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.10827398300170898 s +INFO 06-24 20:14:06 [manager.py:68] detokenization recv req id 8 cost time 0.11014747619628906 s +DEBUG 06-24 20:14:06 [manager.py:391] Prefill Batch: batch_id=31221589617152319978797119831902836282, time:1750767246.1648116s req_ids:[8] +DEBUG 06-24 20:14:06 [manager.py:391] +ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:203.13787460327148ms total_cost_time:203.18102836608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8174 prompt_cache_len:5151 prompt_cache_ratio:0.6301688279911916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 +DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.10723757743835449 s +INFO 06-24 20:14:06 [manager.py:68] detokenization recv req id 8 cost time 0.10924100875854492 s +DEBUG 06-24 20:14:06 [manager.py:391] Prefill Batch: batch_id=35085441372502870374992534548054757330, time:1750767246.37842s req_ids:[8] +DEBUG 06-24 20:14:06 [manager.py:391] +ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:210.3259563446045ms total_cost_time:210.37030220031738ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8175 prompt_cache_len:5151 prompt_cache_ratio:0.6300917431192661 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 +DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:06 [batch.py:51] router release req id 8 +DEBUG 06-24 20:14:06 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:06 [manager.py:283] +DEBUG 06-24 20:14:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:06 [manager.py:284] +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.10761165618896484 s +INFO 06-24 20:14:06 [manager.py:68] detokenization recv req id 8 cost time 0.10963797569274902 s +DEBUG 06-24 20:14:06 [manager.py:391] Prefill Batch: batch_id=239538971643298834180927939180398719612, time:1750767246.5926855s req_ids:[8] +DEBUG 06-24 20:14:06 [manager.py:391] +ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:212.39948272705078ms total_cost_time:212.44525909423828ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8176 prompt_cache_len:5151 prompt_cache_ratio:0.6300146771037182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 +DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.30980801582336426 s +INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.3118412494659424 s +DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=195563440773596450115787238403469999120, time:1750767247.0107722s req_ids:[8] +DEBUG 06-24 20:14:07 [manager.py:391] +ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:410.7322692871094ms total_cost_time:410.7778072357178ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8177 prompt_cache_len:5151 prompt_cache_ratio:0.6299376299376299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 +DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10856127738952637 s +INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11006402969360352 s +DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=323956641456327166886418403578493900429, time:1750767247.2368467s req_ids:[8] +DEBUG 06-24 20:14:07 [manager.py:391] +ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:220.0922966003418ms total_cost_time:220.15666961669922ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:8178 prompt_cache_len:5151 prompt_cache_ratio:0.6298606016140865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 +DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10838127136230469 s +INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s +DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=238740210514643135315666247753766729533, time:1750767247.451941s req_ids:[8] +DEBUG 06-24 20:14:07 [manager.py:391] +ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:205.2443027496338ms total_cost_time:205.28674125671387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8179 prompt_cache_len:5151 prompt_cache_ratio:0.6297835921261767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 +DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10870504379272461 s +INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11093282699584961 s +DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=161814794533151180275238584937388708918, time:1750767247.6649897s req_ids:[8] +DEBUG 06-24 20:14:07 [manager.py:391] +ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:207.36026763916016ms total_cost_time:207.42034912109375ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8180 prompt_cache_len:5151 prompt_cache_ratio:0.6297066014669926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 +DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10823798179626465 s +INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11041474342346191 s +DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=31551757430701560686430636243854652421, time:1750767247.8768175s req_ids:[8] +DEBUG 06-24 20:14:07 [manager.py:391] +ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:207.08847045898438ms total_cost_time:207.13424682617188ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8181 prompt_cache_len:5151 prompt_cache_ratio:0.6296296296296297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 +DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.10760188102722168 s +INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.10944724082946777 s +DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=88992985543877452403508850705920334389, time:1750767248.0906332s req_ids:[8] +DEBUG 06-24 20:14:08 [manager.py:391] +ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:206.04658126831055ms total_cost_time:206.08949661254883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8182 prompt_cache_len:5151 prompt_cache_ratio:0.6295526766071865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 +DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.10908031463623047 s +INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.11165714263916016 s +DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=292683208528598043607404119324130709373, time:1750767248.3036668s req_ids:[8] +DEBUG 06-24 20:14:08 [manager.py:391] +DEBUG 06-24 20:14:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 36326.220 tokens/s +DEBUG 06-24 20:14:08 [stats.py:37] Avg prompt tokens throughput: 36317.419 tokens/s +DEBUG 06-24 20:14:08 [stats.py:37] Avg generate tokens throughput: 8.801 tokens/s +ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:365.9791946411133ms total_cost_time:366.0237789154053ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8183 prompt_cache_len:5151 prompt_cache_ratio:0.6294757423927655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 +DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.1081998348236084 s +INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s +DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=8271224489075767715714486620883965716, time:1750767248.6767516s req_ids:[8] +DEBUG 06-24 20:14:08 [manager.py:391] +ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:201.08389854431152ms total_cost_time:201.1275291442871ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8184 prompt_cache_len:5151 prompt_cache_ratio:0.6293988269794721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 +DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s +INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s +DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=60147356553226541158890653771445803652, time:1750767248.8808453s req_ids:[8] +DEBUG 06-24 20:14:08 [manager.py:391] +ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:208.05859565734863ms total_cost_time:208.10365676879883ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8185 prompt_cache_len:5151 prompt_cache_ratio:0.6293219303604154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 +DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10869359970092773 s +INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.11067771911621094 s +DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=87896529675835842931984396752353875373, time:1750767249.0933948s req_ids:[8] +DEBUG 06-24 20:14:09 [manager.py:391] +ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:201.76315307617188ms total_cost_time:201.80797576904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8186 prompt_cache_len:5151 prompt_cache_ratio:0.6292450525287075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 +DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10827112197875977 s +INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.1102759838104248 s +DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=11852556071448998926506365861048590445, time:1750767249.304143s req_ids:[8] +DEBUG 06-24 20:14:09 [manager.py:391] +ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:207.88121223449707ms total_cost_time:207.92651176452637ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8187 prompt_cache_len:5151 prompt_cache_ratio:0.6291681934774642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 +DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10780072212219238 s +INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.10976672172546387 s +DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=287418870297530242715208631967386355799, time:1750767249.5179174s req_ids:[8] +DEBUG 06-24 20:14:09 [manager.py:391] +ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:206.83956146240234ms total_cost_time:206.88152313232422ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8188 prompt_cache_len:5151 prompt_cache_ratio:0.6290913531998046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 +DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10835814476013184 s +INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.11041402816772461 s +DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=338202895807794559099934398269674607235, time:1750767249.7306821s req_ids:[8] +DEBUG 06-24 20:14:09 [manager.py:391] +ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:371.8433380126953ms total_cost_time:371.8881607055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8189 prompt_cache_len:5151 prompt_cache_ratio:0.6290145316888509 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 +DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s +INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.10983800888061523 s +DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=94871757423233156945319531700478610751, time:1750767250.1038203s req_ids:[8] +DEBUG 06-24 20:14:10 [manager.py:391] +ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:181.35309219360352ms total_cost_time:181.3967227935791ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8190 prompt_cache_len:5151 prompt_cache_ratio:0.6289377289377289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 +DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10856246948242188 s +INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.11069750785827637 s +DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=93978342822466537734810043780781309390, time:1750767250.2922082s req_ids:[8] +DEBUG 06-24 20:14:10 [manager.py:391] +ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:201.52783393859863ms total_cost_time:201.5702724456787ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8191 prompt_cache_len:5151 prompt_cache_ratio:0.6288609449395678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 +DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10747694969177246 s +INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.11003804206848145 s +DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=163152984329460008773210291647198671893, time:1750767250.5013144s req_ids:[8] +DEBUG 06-24 20:14:10 [manager.py:391] +ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:207.15975761413574ms total_cost_time:207.21793174743652ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:8192 prompt_cache_len:5151 prompt_cache_ratio:0.6287841796875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 +DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.11188268661499023 s +INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.11404538154602051 s +DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=216061748797111718729639123577209416088, time:1750767250.7148685s req_ids:[8] +DEBUG 06-24 20:14:10 [manager.py:391] +ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:207.92245864868164ms total_cost_time:208.0237865447998ms,out_token_counter:1 mean_per_token_cost_time: 0.10132789611816406ms prompt_token_num:8193 prompt_cache_len:5151 prompt_cache_ratio:0.6287074331746613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 +DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10775589942932129 s +INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.10970067977905273 s +DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=278247609722941237089342914051659026378, time:1750767250.9275854s req_ids:[8] +DEBUG 06-24 20:14:10 [manager.py:391] +ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:206.9568634033203ms total_cost_time:207.0009708404541ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8194 prompt_cache_len:5151 prompt_cache_ratio:0.6286307053941909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 +DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.10873222351074219 s +INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.1106879711151123 s +DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=9589251780774518106927254503588327133, time:1750767251.1398485s req_ids:[8] +DEBUG 06-24 20:14:11 [manager.py:391] +ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:204.86974716186523ms total_cost_time:204.91361618041992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8195 prompt_cache_len:5151 prompt_cache_ratio:0.6285539963392313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 +DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.10881590843200684 s +INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.11082577705383301 s +DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=139860195202762718650448344765985069076, time:1750767251.3538775s req_ids:[8] +DEBUG 06-24 20:14:11 [manager.py:391] +ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:208.88328552246094ms total_cost_time:208.92667770385742ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8196 prompt_cache_len:5151 prompt_cache_ratio:0.6284773060029283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 +DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.31079816818237305 s +INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.31270885467529297 s +DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=283716800539442594557754057455017025459, time:1750767251.7616658s req_ids:[8] +DEBUG 06-24 20:14:11 [manager.py:391] +ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:376.6818046569824ms total_cost_time:376.7259120941162ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8197 prompt_cache_len:5151 prompt_cache_ratio:0.6284006343784312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 +DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.1074686050415039 s +INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.10940265655517578 s +DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=309199642275444589512129230919881262491, time:1750767251.9454248s req_ids:[8] +DEBUG 06-24 20:14:11 [manager.py:391] +ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:190.57011604309082ms total_cost_time:190.6137466430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8198 prompt_cache_len:5151 prompt_cache_ratio:0.6283239814588925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 +DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.10809516906738281 s +INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.11020398139953613 s +DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=41261312792576203565358700047348206384, time:1750767252.1430855s req_ids:[8] +DEBUG 06-24 20:14:12 [manager.py:391] +ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:203.53436470031738ms total_cost_time:203.57584953308105ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8199 prompt_cache_len:5151 prompt_cache_ratio:0.628247347237468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 +DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.10726428031921387 s +INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.10926389694213867 s +DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=11101219059342487458976537520204190429, time:1750767252.3562844s req_ids:[8] +DEBUG 06-24 20:14:12 [manager.py:391] +DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:208.5745334625244ms total_cost_time:208.6169719696045ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8200 prompt_cache_len:5151 prompt_cache_ratio:0.6281707317073171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 +DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.10827302932739258 s +INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.11030721664428711 s +DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=257465763184886632177255511979938622553, time:1750767252.5679746s req_ids:[8] +DEBUG 06-24 20:14:12 [manager.py:391] +ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:204.91409301757812ms total_cost_time:204.9562931060791ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8201 prompt_cache_len:5151 prompt_cache_ratio:0.6280941348616023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 +DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.1081991195678711 s +INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.11079120635986328 s +DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=125565558197597165677612537791340702063, time:1750767252.7779922s req_ids:[8] +DEBUG 06-24 20:14:12 [manager.py:391] +ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:206.22706413269043ms total_cost_time:206.2692642211914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8202 prompt_cache_len:5151 prompt_cache_ratio:0.6280175566934894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 +DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.1077721118927002 s +INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.10992860794067383 s +DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=287135127654882255258564632646566994497, time:1750767252.9914038s req_ids:[8] +DEBUG 06-24 20:14:12 [manager.py:391] +ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:206.94351196289062ms total_cost_time:206.9845199584961ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:8203 prompt_cache_len:5151 prompt_cache_ratio:0.6279409971961477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 +DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.1081533432006836 s +INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.11029958724975586 s +DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=115936952661952553947224920661484405447, time:1750767253.2038233s req_ids:[8] +DEBUG 06-24 20:14:13 [manager.py:391] +ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:206.64596557617188ms total_cost_time:206.68816566467285ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8204 prompt_cache_len:5151 prompt_cache_ratio:0.6278644563627499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 +DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.10834622383117676 s +INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.11039519309997559 s +DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=190919211405241075570177676424753789204, time:1750767253.416654s req_ids:[8] +DEBUG 06-24 20:14:13 [manager.py:391] +ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:207.26680755615234ms total_cost_time:207.32998847961426ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:8205 prompt_cache_len:5151 prompt_cache_ratio:0.6277879341864717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 +DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.10870003700256348 s +INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.1107950210571289 s +DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=322303341283369671817303233250120732131, time:1750767253.628697s req_ids:[8] +DEBUG 06-24 20:14:13 [manager.py:391] +ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:209.8846435546875ms total_cost_time:209.92779731750488ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8206 prompt_cache_len:5151 prompt_cache_ratio:0.6277114306604923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 +DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s +INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.1102762222290039 s +DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=80904245895879391559551529713843570840, time:1750767253.8510287s req_ids:[8] +DEBUG 06-24 20:14:13 [manager.py:391] +ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:215.81792831420898ms total_cost_time:215.86084365844727ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8207 prompt_cache_len:5151 prompt_cache_ratio:0.6276349457779944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 +DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.10705065727233887 s +INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.10907602310180664 s +DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=253125965026509362139665168163568890401, time:1750767254.0669687s req_ids:[8] +DEBUG 06-24 20:14:14 [manager.py:391] +ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:205.08289337158203ms total_cost_time:205.1255702972412ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8208 prompt_cache_len:5151 prompt_cache_ratio:0.6275584795321637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 +DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.1074686050415039 s +INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.11006450653076172 s +DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=160537196442207926638327708239825641714, time:1750767254.277483s req_ids:[8] +DEBUG 06-24 20:14:14 [manager.py:391] +ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:208.68253707885742ms total_cost_time:208.7252140045166ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8209 prompt_cache_len:5151 prompt_cache_ratio:0.6274820319161896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 +DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.10750818252563477 s +INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.10958695411682129 s +DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=15534810910617778980985037523048271680, time:1750767254.492441s req_ids:[8] +DEBUG 06-24 20:14:14 [manager.py:391] +ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:207.11731910705566ms total_cost_time:207.16142654418945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8210 prompt_cache_len:5151 prompt_cache_ratio:0.6274056029232643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 +DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.10750031471252441 s +INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.10962557792663574 s +DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=178290631810559916833148922978289016388, time:1750767254.7044828s req_ids:[8] +DEBUG 06-24 20:14:14 [manager.py:391] +ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:377.6285648345947ms total_cost_time:377.6721954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8211 prompt_cache_len:5151 prompt_cache_ratio:0.6273291925465838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 +DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10750293731689453 s +INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.10943055152893066 s +DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=255349555432442777664593291734636731032, time:1750767255.0836663s req_ids:[8] +DEBUG 06-24 20:14:15 [manager.py:391] +ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:202.55470275878906ms total_cost_time:202.59857177734375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8212 prompt_cache_len:5151 prompt_cache_ratio:0.6272528007793473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 +DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10868954658508301 s +INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s +DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=167519846264033505778519093198355888213, time:1750767255.296174s req_ids:[8] +DEBUG 06-24 20:14:15 [manager.py:391] +ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:206.71939849853516ms total_cost_time:206.76350593566895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8213 prompt_cache_len:5151 prompt_cache_ratio:0.6271764276147571 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 +DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10826683044433594 s +INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.11033415794372559 s +DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=295995382628478015370261587843171751805, time:1750767255.5086071s req_ids:[8] +DEBUG 06-24 20:14:15 [manager.py:391] +ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:211.47632598876953ms total_cost_time:211.5466594696045ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:8214 prompt_cache_len:5151 prompt_cache_ratio:0.627100073046019 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 +DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10816478729248047 s +INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013031005859375 s +DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=155453941317106121589221654295195309468, time:1750767255.731334s req_ids:[8] +DEBUG 06-24 20:14:15 [manager.py:391] +ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:179.002046585083ms total_cost_time:179.0473461151123ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8215 prompt_cache_len:5151 prompt_cache_ratio:0.6270237370663421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 +DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10717415809631348 s +INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.10920119285583496 s +DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=6533685983715925204155624854801809804, time:1750767255.9090567s req_ids:[8] +DEBUG 06-24 20:14:15 [manager.py:391] +ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:194.00715827941895ms total_cost_time:194.04983520507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8216 prompt_cache_len:5151 prompt_cache_ratio:0.6269474196689386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 +DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10762786865234375 s +INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.1095588207244873 s +DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=300283714207460228834679828023080769626, time:1750767256.1206934s req_ids:[8] +DEBUG 06-24 20:14:16 [manager.py:391] +ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:211.24649047851562ms total_cost_time:211.29202842712402ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8217 prompt_cache_len:5151 prompt_cache_ratio:0.6268711208470245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 +DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10800671577453613 s +INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099848747253418 s +DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=109778357837254034357768967643383568761, time:1750767256.3332043s req_ids:[8] +DEBUG 06-24 20:14:16 [manager.py:391] +ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:168.15757751464844ms total_cost_time:168.1997776031494ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8218 prompt_cache_len:5151 prompt_cache_ratio:0.6267948405938184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 +DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:16 [batch.py:51] router release req id 8 +INFO 06-24 20:14:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10783529281616211 s +INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.10969233512878418 s +DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=5055397033538650112742120216689907639, time:1750767256.5079765s req_ids:[8] +DEBUG 06-24 20:14:16 [manager.py:391] +ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:199.01061058044434ms total_cost_time:199.05376434326172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8219 prompt_cache_len:5151 prompt_cache_ratio:0.6267185789025429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 +DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10752010345458984 s +INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.10950398445129395 s +DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=9947145703033310605896879809801799068, time:1750767256.7120574s req_ids:[8] +DEBUG 06-24 20:14:16 [manager.py:391] +ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:205.28173446655273ms total_cost_time:205.32608032226562ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8220 prompt_cache_len:5151 prompt_cache_ratio:0.6266423357664234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 +DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.1080164909362793 s +INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.1101224422454834 s +DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=226676471529500660787201094188944670688, time:1750767256.9246058s req_ids:[8] +DEBUG 06-24 20:14:16 [manager.py:391] +ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:208.1894874572754ms total_cost_time:208.23168754577637ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8221 prompt_cache_len:5151 prompt_cache_ratio:0.6265661111786888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 +DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10817980766296387 s +INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.11015081405639648 s +DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=101035877380059583711512576753120789996, time:1750767257.1380749s req_ids:[8] +DEBUG 06-24 20:14:17 [manager.py:391] +ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:204.0235996246338ms total_cost_time:204.06651496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8222 prompt_cache_len:5151 prompt_cache_ratio:0.6264899051325712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 +DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10762524604797363 s +INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.1097564697265625 s +DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=97230976004576439437522054495135724801, time:1750767257.3581214s req_ids:[8] +DEBUG 06-24 20:14:17 [manager.py:391] +ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:218.46604347229004ms total_cost_time:218.50895881652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8223 prompt_cache_len:5151 prompt_cache_ratio:0.626413717621306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 +DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10696935653686523 s +INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.10883212089538574 s +DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=321673893865463017831728517079690072985, time:1750767257.5723488s req_ids:[8] +DEBUG 06-24 20:14:17 [manager.py:391] +ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:389.79458808898926ms total_cost_time:389.83964920043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8224 prompt_cache_len:5151 prompt_cache_ratio:0.6263375486381323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 +DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10845732688903809 s +INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.11050844192504883 s +DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=149695382554569083848457790860284428069, time:1750767257.9630108s req_ids:[8] +DEBUG 06-24 20:14:17 [manager.py:391] +DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:199.80239868164062ms total_cost_time:199.84745979309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8225 prompt_cache_len:5151 prompt_cache_ratio:0.6262613981762918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 +DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.10874342918395996 s +INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.11095333099365234 s +DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=221254139614361055759412105128705395804, time:1750767258.1729584s req_ids:[8] +DEBUG 06-24 20:14:18 [manager.py:391] +ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:200.2253532409668ms total_cost_time:200.26922225952148ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8226 prompt_cache_len:5151 prompt_cache_ratio:0.6261852662290299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 +DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.1085975170135498 s +INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.11046504974365234 s +DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=65235300840212817652322914723492353706, time:1750767258.3791482s req_ids:[8] +DEBUG 06-24 20:14:18 [manager.py:391] +DEBUG 06-24 20:14:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 35843.772 tokens/s +DEBUG 06-24 20:14:18 [stats.py:37] Avg prompt tokens throughput: 35835.037 tokens/s +DEBUG 06-24 20:14:18 [stats.py:37] Avg generate tokens throughput: 8.734 tokens/s +ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:166.45073890686035ms total_cost_time:166.49436950683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8227 prompt_cache_len:5151 prompt_cache_ratio:0.6261091527895952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 +DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.10725903511047363 s +INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.10923290252685547 s +DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=315172784806527849080130116182337377788, time:1750767258.548727s req_ids:[8] +DEBUG 06-24 20:14:18 [manager.py:391] +ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:195.41430473327637ms total_cost_time:195.45722007751465ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8228 prompt_cache_len:5151 prompt_cache_ratio:0.6260330578512396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 +DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.10753631591796875 s +INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.10978555679321289 s +DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=140352905812694760434308362871764846989, time:1750767258.7517388s req_ids:[8] +DEBUG 06-24 20:14:18 [manager.py:391] +ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:206.31742477416992ms total_cost_time:206.3617706298828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8229 prompt_cache_len:5151 prompt_cache_ratio:0.6259569814072183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 +DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.1087944507598877 s +INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.11092829704284668 s +DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=290956557832134340733766902606977100570, time:1750767258.9648273s req_ids:[8] +DEBUG 06-24 20:14:18 [manager.py:391] +ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:206.50458335876465ms total_cost_time:206.54964447021484ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8230 prompt_cache_len:5151 prompt_cache_ratio:0.6258809234507898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 +DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:19 [batch.py:51] router release req id 8 +INFO 06-24 20:14:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10880088806152344 s +INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11085772514343262 s +DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=447598318239783903004791516437181127, time:1750767259.176396s req_ids:[8] +DEBUG 06-24 20:14:19 [manager.py:391] +ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:206.10618591308594ms total_cost_time:206.14886283874512ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8231 prompt_cache_len:5151 prompt_cache_ratio:0.6258048839752156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 +DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10781145095825195 s +INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.10961008071899414 s +DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=187418386908774338035271253996348562646, time:1750767259.3909872s req_ids:[8] +DEBUG 06-24 20:14:19 [manager.py:391] +ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:170.2268123626709ms total_cost_time:170.26925086975098ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8232 prompt_cache_len:5151 prompt_cache_ratio:0.625728862973761 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 +DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10814571380615234 s +INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11033248901367188 s +DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=253447295995371973390112169892863222402, time:1750767259.5643253s req_ids:[8] +DEBUG 06-24 20:14:19 [manager.py:391] +ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:199.36609268188477ms total_cost_time:199.46885108947754ms,out_token_counter:1 mean_per_token_cost_time: 0.10275840759277344ms prompt_token_num:8233 prompt_cache_len:5151 prompt_cache_ratio:0.6256528604396939 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 +DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10892462730407715 s +INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11092472076416016 s +DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=82258284478348215431311361104312834180, time:1750767259.768058s req_ids:[8] +DEBUG 06-24 20:14:19 [manager.py:391] +ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:203.81760597229004ms total_cost_time:203.86052131652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8234 prompt_cache_len:5151 prompt_cache_ratio:0.6255768763662861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 +DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10844206809997559 s +INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11023783683776855 s +DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=92954427773002363299141466529320844484, time:1750767259.9854786s req_ids:[8] +DEBUG 06-24 20:14:19 [manager.py:391] +ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:214.7824764251709ms total_cost_time:214.8275375366211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8235 prompt_cache_len:5151 prompt_cache_ratio:0.6255009107468124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 +DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.31051158905029297 s +INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.31244874000549316 s +DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=259760011847428101482912530310620493266, time:1750767260.4085298s req_ids:[8] +DEBUG 06-24 20:14:20 [manager.py:391] +ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:427.0789623260498ms total_cost_time:427.1233081817627ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8236 prompt_cache_len:5151 prompt_cache_ratio:0.6254249635745508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 +DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.1078040599822998 s +INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.10976982116699219 s +DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=71313572895144100455817948563024647518, time:1750767260.6392076s req_ids:[8] +DEBUG 06-24 20:14:20 [manager.py:391] +ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:177.9806613922119ms total_cost_time:178.02166938781738ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:8237 prompt_cache_len:5151 prompt_cache_ratio:0.6253490348427826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 +DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.10767316818237305 s +INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.10929441452026367 s +DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=2887190597889122890984753067612664319, time:1750767260.8201468s req_ids:[8] +DEBUG 06-24 20:14:20 [manager.py:391] +ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:167.10972785949707ms total_cost_time:167.15264320373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8238 prompt_cache_len:5151 prompt_cache_ratio:0.6252731245447924 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 +DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.10843825340270996 s +INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.11052846908569336 s +DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=312197214977796842315867089994688204412, time:1750767260.985843s req_ids:[8] +DEBUG 06-24 20:14:20 [manager.py:391] +ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:195.16587257385254ms total_cost_time:195.20974159240723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8239 prompt_cache_len:5151 prompt_cache_ratio:0.6251972326738682 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 +DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.10851025581359863 s +INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.11055898666381836 s +DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=229704925054678512378765839987309525486, time:1750767261.1883245s req_ids:[8] +DEBUG 06-24 20:14:21 [manager.py:391] +ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:209.89227294921875ms total_cost_time:209.93804931640625ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8240 prompt_cache_len:5151 prompt_cache_ratio:0.6251213592233009 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 +DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.1075289249420166 s +INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.10940814018249512 s +DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=180794770876922782244201066993298748507, time:1750767261.4023495s req_ids:[8] +DEBUG 06-24 20:14:21 [manager.py:391] +ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:205.17849922180176ms total_cost_time:205.22165298461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8241 prompt_cache_len:5151 prompt_cache_ratio:0.6250455041863852 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 +DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.10814547538757324 s +INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s +DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=188577339144872038960020900946029490097, time:1750767261.613874s req_ids:[8] +DEBUG 06-24 20:14:21 [manager.py:391] +ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:204.39672470092773ms total_cost_time:204.43964004516602ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8242 prompt_cache_len:5151 prompt_cache_ratio:0.6249696675564184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 +DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.10811972618103027 s +INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s +DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=263832933928543762401941698108738763233, time:1750767261.834627s req_ids:[8] +DEBUG 06-24 20:14:21 [manager.py:391] +ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:215.71826934814453ms total_cost_time:215.7604694366455ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8243 prompt_cache_len:5151 prompt_cache_ratio:0.6248938493267014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 +DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.10656547546386719 s +INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.10846757888793945 s +DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=131235268036437667941543454761610152737, time:1750767262.0531976s req_ids:[8] +DEBUG 06-24 20:14:22 [manager.py:391] +ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:206.99620246887207ms total_cost_time:207.03959465026855ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8244 prompt_cache_len:5151 prompt_cache_ratio:0.6248180494905385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 +DEBUG 06-24 20:14:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.10750865936279297 s +INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.1096189022064209 s +DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=180550156393750232738339251266751671679, time:1750767262.2627933s req_ids:[8] +DEBUG 06-24 20:14:22 [manager.py:391] +ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 first_token_cost:207.43441581726074ms total_cost_time:207.47780799865723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8245 prompt_cache_len:5151 prompt_cache_ratio:0.6247422680412371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 +DEBUG 06-24 20:14:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:22 [batch.py:51] router release req id 8 +INFO 06-24 20:14:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.3101494312286377 s +INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.31208348274230957 s +DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=309096607483377877924253773921989934079, time:1750767262.6878161s req_ids:[8] +DEBUG 06-24 20:14:22 [manager.py:391] +ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 first_token_cost:425.5216121673584ms total_cost_time:425.5659580230713ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8246 prompt_cache_len:5151 prompt_cache_ratio:0.6246665049721077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 +DEBUG 06-24 20:14:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.10834026336669922 s +INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s +DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=214336013238935208013151337043050903740, time:1750767262.9071925s req_ids:[8] +DEBUG 06-24 20:14:22 [manager.py:391] +ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 first_token_cost:211.6241455078125ms total_cost_time:211.66729927062988ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8247 prompt_cache_len:5151 prompt_cache_ratio:0.6245907602764642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 +DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s +INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.11038875579833984 s +DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=71926704239353949756396946235142243440, time:1750767263.1236458s req_ids:[8] +DEBUG 06-24 20:14:23 [manager.py:391] +INFO 06-24 20:14:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:14:23 [statics_utils.py:24] mean first cost: 229.1424531623558 ms +INFO 06-24 20:14:23 [statics_utils.py:24] mean per token cost: 0.08177307755679901 ms +ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:213.36603164672852ms total_cost_time:213.4072780609131ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8248 prompt_cache_len:5151 prompt_cache_ratio:0.6245150339476236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 +DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10755252838134766 s +INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.1094655990600586 s +DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=129629145510785701664431823672365451520, time:1750767263.3415504s req_ids:[8] +DEBUG 06-24 20:14:23 [manager.py:391] +ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:203.57966423034668ms total_cost_time:203.62472534179688ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8249 prompt_cache_len:5151 prompt_cache_ratio:0.6244393259789065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 +DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s +INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.10915112495422363 s +DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=299453358395684065531327476054548970987, time:1750767263.5596383s req_ids:[8] +DEBUG 06-24 20:14:23 [manager.py:391] +ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:214.2322063446045ms total_cost_time:214.27440643310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8250 prompt_cache_len:5151 prompt_cache_ratio:0.6243636363636363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 +DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10797619819641113 s +INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.10981130599975586 s +DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=225000609200119526222412495525724153030, time:1750767263.7734196s req_ids:[8] +DEBUG 06-24 20:14:23 [manager.py:391] +ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:208.51373672485352ms total_cost_time:208.5561752319336ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8251 prompt_cache_len:5151 prompt_cache_ratio:0.62428796509514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 +DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10834026336669922 s +INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.1105647087097168 s +DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=211503971166047731307337885990702781615, time:1750767263.9868767s req_ids:[8] +DEBUG 06-24 20:14:23 [manager.py:391] +ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:207.11946487426758ms total_cost_time:207.16118812561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8252 prompt_cache_len:5151 prompt_cache_ratio:0.6242123121667474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 +DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.10796880722045898 s +INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.10969901084899902 s +DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=67402228077757464222528501877411027566, time:1750767264.2000492s req_ids:[8] +DEBUG 06-24 20:14:24 [manager.py:391] +ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:170.99261283874512ms total_cost_time:171.0355281829834ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8253 prompt_cache_len:5151 prompt_cache_ratio:0.624136677571792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 +DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.1082315444946289 s +INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.11031961441040039 s +DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=93038811055957424303285982173718902292, time:1750767264.3747873s req_ids:[8] +DEBUG 06-24 20:14:24 [manager.py:391] +ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:215.3012752532959ms total_cost_time:215.34466743469238ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8254 prompt_cache_len:5151 prompt_cache_ratio:0.6240610613036104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 +DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.10755395889282227 s +INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.10955214500427246 s +DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=43529484863156197402552797155113384608, time:1750767264.6065595s req_ids:[8] +DEBUG 06-24 20:14:24 [manager.py:391] +ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:224.7765064239502ms total_cost_time:224.8239517211914ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:8255 prompt_cache_len:5151 prompt_cache_ratio:0.6239854633555421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 +DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.10848069190979004 s +INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.11055374145507812 s +DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=214465736342507255828468403906776192055, time:1750767264.8246212s req_ids:[8] +DEBUG 06-24 20:14:24 [manager.py:391] +ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:206.44545555114746ms total_cost_time:206.48670196533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8256 prompt_cache_len:5151 prompt_cache_ratio:0.6239098837209303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 +DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.10762310028076172 s +INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.1098337173461914 s +DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=307488381447178661372840019066980107206, time:1750767265.0386279s req_ids:[8] +DEBUG 06-24 20:14:25 [manager.py:391] +ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:210.3099822998047ms total_cost_time:210.35170555114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8257 prompt_cache_len:5151 prompt_cache_ratio:0.623834322393121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 +DEBUG 06-24 20:14:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.10818266868591309 s +INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.11029767990112305 s +DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=263465528909816318169942742778347583501, time:1750767265.258069s req_ids:[8] +DEBUG 06-24 20:14:25 [manager.py:391] +ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:206.7885398864746ms total_cost_time:206.8500518798828ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8258 prompt_cache_len:5151 prompt_cache_ratio:0.6237587793654638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 +DEBUG 06-24 20:14:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.10738492012023926 s +INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.108734130859375 s +DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=106745785907896658005997409885388092530, time:1750767265.4700048s req_ids:[8] +DEBUG 06-24 20:14:25 [manager.py:391] +ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:206.7568302154541ms total_cost_time:206.79926872253418ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8259 prompt_cache_len:5151 prompt_cache_ratio:0.6236832546313112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 +DEBUG 06-24 20:14:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.3093998432159424 s +INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.3109891414642334 s +DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=234763077732606254503033857667197305946, time:1750767265.8925054s req_ids:[8] +DEBUG 06-24 20:14:25 [manager.py:391] +ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:422.38450050354004ms total_cost_time:422.4283695220947ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8260 prompt_cache_len:5151 prompt_cache_ratio:0.6236077481840193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 +DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s +INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.11030840873718262 s +DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=17240633472292672143665764368059250320, time:1750767266.1111033s req_ids:[8] +DEBUG 06-24 20:14:26 [manager.py:391] +ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:206.04395866394043ms total_cost_time:206.08830451965332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8261 prompt_cache_len:5151 prompt_cache_ratio:0.6235322600169471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 +DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10781216621398926 s +INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.10979866981506348 s +DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=86927474474676127669923044268970747394, time:1750767266.3223767s req_ids:[8] +DEBUG 06-24 20:14:26 [manager.py:391] +ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:209.90419387817383ms total_cost_time:209.94853973388672ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8262 prompt_cache_len:5151 prompt_cache_ratio:0.6234567901234568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 +DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10804343223571777 s +INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.1101233959197998 s +DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=3150598490457491812277847727764372341, time:1750767266.5382953s req_ids:[8] +DEBUG 06-24 20:14:26 [manager.py:391] +ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:210.58058738708496ms total_cost_time:210.62564849853516ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8263 prompt_cache_len:5151 prompt_cache_ratio:0.6233813384969139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 +DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10896849632263184 s +INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.11104869842529297 s +DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=239566653616966397240727414952784915557, time:1750767266.7536767s req_ids:[8] +DEBUG 06-24 20:14:26 [manager.py:391] +ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:208.1770896911621ms total_cost_time:208.21785926818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:8264 prompt_cache_len:5151 prompt_cache_ratio:0.6233059051306873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 +DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10832047462463379 s +INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.11035537719726562 s +DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=100180753851236980139415075938875614750, time:1750767266.979056s req_ids:[8] +DEBUG 06-24 20:14:26 [manager.py:391] +ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:224.39312934875488ms total_cost_time:224.43890571594238ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8265 prompt_cache_len:5151 prompt_cache_ratio:0.6232304900181488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 +DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.10860085487365723 s +INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.11065864562988281 s +DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=124623409531242877131000662623302193333, time:1750767267.1968863s req_ids:[8] +DEBUG 06-24 20:14:27 [manager.py:391] +ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:202.48937606811523ms total_cost_time:202.5320529937744ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8266 prompt_cache_len:5151 prompt_cache_ratio:0.6231550931526736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 +DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.10868096351623535 s +INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.11013412475585938 s +DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=327817020758628935242704148188284735278, time:1750767267.4100006s req_ids:[8] +DEBUG 06-24 20:14:27 [manager.py:391] +ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:210.97707748413086ms total_cost_time:211.01975440979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8267 prompt_cache_len:5151 prompt_cache_ratio:0.62307971452764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 +DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.1084139347076416 s +INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.10999441146850586 s +DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=16901268603879458421438969290504287105, time:1750767267.6184614s req_ids:[8] +DEBUG 06-24 20:14:27 [manager.py:391] +DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:193.68624687194824ms total_cost_time:193.72940063476562ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8268 prompt_cache_len:5151 prompt_cache_ratio:0.6230043541364296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 +DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s +INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.11147642135620117 s +DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=211159885629803706771634935212835108818, time:1750767267.8217897s req_ids:[8] +DEBUG 06-24 20:14:27 [manager.py:391] +ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:207.36169815063477ms total_cost_time:207.40342140197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8269 prompt_cache_len:5151 prompt_cache_ratio:0.6229290119724271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 +DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10817670822143555 s +INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.11025643348693848 s +DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=170304497000061674757970776327095639772, time:1750767268.0360324s req_ids:[8] +DEBUG 06-24 20:14:28 [manager.py:391] +ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:212.2206687927246ms total_cost_time:212.2635841369629ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8270 prompt_cache_len:5151 prompt_cache_ratio:0.6228536880290205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 +DEBUG 06-24 20:14:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10811829566955566 s +INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.1101527214050293 s +DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=261242068489394431808768014005859942328, time:1750767268.2529294s req_ids:[8] +DEBUG 06-24 20:14:28 [manager.py:391] +ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:14:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 35757.238 tokens/s +DEBUG 06-24 20:14:28 [stats.py:37] Avg prompt tokens throughput: 35748.473 tokens/s +DEBUG 06-24 20:14:28 [stats.py:37] Avg generate tokens throughput: 8.765 tokens/s +INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:400.2413749694824ms total_cost_time:400.2852439880371ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8271 prompt_cache_len:5151 prompt_cache_ratio:0.622778382299601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 +DEBUG 06-24 20:14:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10861802101135254 s +INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.11068129539489746 s +DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=212190245655084559465324612830320502117, time:1750767268.6567762s req_ids:[8] +DEBUG 06-24 20:14:28 [manager.py:391] +ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:206.07495307922363ms total_cost_time:206.1178684234619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8272 prompt_cache_len:5151 prompt_cache_ratio:0.6227030947775629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 +DEBUG 06-24 20:14:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10780549049377441 s +INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.1101381778717041 s +DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=255036863182439795815741971756166669901, time:1750767268.8774004s req_ids:[8] +DEBUG 06-24 20:14:28 [manager.py:391] +ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:211.33184432983398ms total_cost_time:211.37475967407227ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8273 prompt_cache_len:5151 prompt_cache_ratio:0.6226278254563037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 +DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10758471488952637 s +INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.10974645614624023 s +DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=24171152060961190799496513964774866282, time:1750767269.0941477s req_ids:[8] +DEBUG 06-24 20:14:29 [manager.py:391] +ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:211.34328842163086ms total_cost_time:211.38763427734375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8274 prompt_cache_len:5151 prompt_cache_ratio:0.6225525743292241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 +DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10880017280578613 s +INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.1108100414276123 s +DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=208588402509499513113172047790400436350, time:1750767269.3137026s req_ids:[8] +DEBUG 06-24 20:14:29 [manager.py:391] +ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:215.6527042388916ms total_cost_time:215.75617790222168ms,out_token_counter:1 mean_per_token_cost_time: 0.10347366333007812ms prompt_token_num:8275 prompt_cache_len:5151 prompt_cache_ratio:0.6224773413897281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 +DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10863494873046875 s +INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.11060500144958496 s +DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=107522958879956309643900032138484948880, time:1750767269.535218s req_ids:[8] +DEBUG 06-24 20:14:29 [manager.py:391] +ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:215.7437801361084ms total_cost_time:215.7883644104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8276 prompt_cache_len:5151 prompt_cache_ratio:0.6224021266312229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 +DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:29 [batch.py:51] router release req id 8 +INFO 06-24 20:14:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10823273658752441 s +INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.11080193519592285 s +DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=331023510023733657338219015572884901752, time:1750767269.7505565s req_ids:[8] +DEBUG 06-24 20:14:29 [manager.py:391] +ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:206.24923706054688ms total_cost_time:206.29215240478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8277 prompt_cache_len:5151 prompt_cache_ratio:0.6223269300471185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 +DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10905647277832031 s +INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.11113715171813965 s +DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=111459193273702210075804418297171762800, time:1750767269.9626977s req_ids:[8] +DEBUG 06-24 20:14:29 [manager.py:391] +ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:205.3811550140381ms total_cost_time:205.42287826538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8278 prompt_cache_len:5151 prompt_cache_ratio:0.6222517516308287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 +DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10811614990234375 s +INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.1100625991821289 s +DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=235318352632475267082330510763280703969, time:1750767270.1743755s req_ids:[8] +DEBUG 06-24 20:14:30 [manager.py:391] +ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:210.7243537902832ms total_cost_time:210.7696533203125ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8279 prompt_cache_len:5151 prompt_cache_ratio:0.62217659137577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 +DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10775947570800781 s +INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.10903596878051758 s +DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=268666975616545456718146529699510418, time:1750767270.391569s req_ids:[8] +DEBUG 06-24 20:14:30 [manager.py:391] +ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:208.10723304748535ms total_cost_time:208.15086364746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8280 prompt_cache_len:5151 prompt_cache_ratio:0.6221014492753624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 +DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10767984390258789 s +INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.10975003242492676 s +DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=49819110030416092095459864328612333161, time:1750767270.6047833s req_ids:[8] +DEBUG 06-24 20:14:30 [manager.py:391] +ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:369.74620819091797ms total_cost_time:369.78793144226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8281 prompt_cache_len:5151 prompt_cache_ratio:0.6220263253230286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 +DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10841679573059082 s +INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110222339630127 s +DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=298677913122716067951241139475505847516, time:1750767270.976117s req_ids:[8] +DEBUG 06-24 20:14:30 [manager.py:391] +ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:204.4961452484131ms total_cost_time:204.53667640686035ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:8282 prompt_cache_len:5151 prompt_cache_ratio:0.6219512195121951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 +DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s +INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11010146141052246 s +DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=131337185881214976532187266676560497071, time:1750767271.189098s req_ids:[8] +DEBUG 06-24 20:14:31 [manager.py:391] +ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:206.48550987243652ms total_cost_time:206.55584335327148ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:8283 prompt_cache_len:5151 prompt_cache_ratio:0.6218761318362912 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 +DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10838603973388672 s +INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11054754257202148 s +DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=206879154648522365262448133744336640793, time:1750767271.4012792s req_ids:[8] +DEBUG 06-24 20:14:31 [manager.py:391] +ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:203.54676246643066ms total_cost_time:203.58920097351074ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8284 prompt_cache_len:5151 prompt_cache_ratio:0.6218010622887494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 +DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10805845260620117 s +INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11010932922363281 s +DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=194677959630076490721400359373378351942, time:1750767271.6098862s req_ids:[8] +DEBUG 06-24 20:14:31 [manager.py:391] +ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:204.98371124267578ms total_cost_time:205.02638816833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8285 prompt_cache_len:5151 prompt_cache_ratio:0.6217260108630054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 +DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10866212844848633 s +INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11059236526489258 s +DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=144117828241798914200188985456885003077, time:1750767271.8229425s req_ids:[8] +DEBUG 06-24 20:14:31 [manager.py:391] +ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:207.51690864562988ms total_cost_time:207.55934715270996ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8286 prompt_cache_len:5151 prompt_cache_ratio:0.6216509775524982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 +DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.10919046401977539 s +INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.11148500442504883 s +DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=210026803768110602240005798992642147381, time:1750767272.0332983s req_ids:[8] +DEBUG 06-24 20:14:32 [manager.py:391] +ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:205.06572723388672ms total_cost_time:205.1072120666504ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8287 prompt_cache_len:5151 prompt_cache_ratio:0.6215759623506697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 +DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.1069631576538086 s +INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.10886931419372559 s +DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=269520222326580752155272498254908090933, time:1750767272.2449923s req_ids:[8] +DEBUG 06-24 20:14:32 [manager.py:391] +ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:207.23557472229004ms total_cost_time:207.27825164794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8288 prompt_cache_len:5151 prompt_cache_ratio:0.6215009652509652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 +DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.1070396900177002 s +INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.10958743095397949 s +DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=68457551406381783675109409507429387027, time:1750767272.458577s req_ids:[8] +DEBUG 06-24 20:14:32 [manager.py:391] +ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:203.41253280639648ms total_cost_time:203.45497131347656ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8289 prompt_cache_len:5151 prompt_cache_ratio:0.6214259862468332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 +DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.3110806941986084 s +INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.3133068084716797 s +DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=311861998419128760240120615040794478039, time:1750767272.873438s req_ids:[8] +DEBUG 06-24 20:14:32 [manager.py:391] +ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:415.0123596191406ms total_cost_time:415.0543212890625ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8290 prompt_cache_len:5151 prompt_cache_ratio:0.6213510253317249 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 +DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10884928703308105 s +INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.11092305183410645 s +DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=304775374273399976944644037986543762210, time:1750767273.0888023s req_ids:[8] +DEBUG 06-24 20:14:33 [manager.py:391] +ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:207.91912078857422ms total_cost_time:207.9794406890869ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8291 prompt_cache_len:5151 prompt_cache_ratio:0.6212760824990954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 +DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10836505889892578 s +INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.11060380935668945 s +DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=109481890043208347135331468406609282952, time:1750767273.3025763s req_ids:[8] +DEBUG 06-24 20:14:33 [manager.py:391] +ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:206.13765716552734ms total_cost_time:206.19750022888184ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8292 prompt_cache_len:5151 prompt_cache_ratio:0.6212011577424024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 +DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10810208320617676 s +INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.11006021499633789 s +DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=160556306680002546400351079511857133113, time:1750767273.512699s req_ids:[8] +DEBUG 06-24 20:14:33 [manager.py:391] +ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:202.91876792907715ms total_cost_time:202.96120643615723ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8293 prompt_cache_len:5151 prompt_cache_ratio:0.6211262510551068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 +DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.1075899600982666 s +INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.10979104042053223 s +DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=32074529192952349618100839597978615920, time:1750767273.7267318s req_ids:[8] +DEBUG 06-24 20:14:33 [manager.py:391] +ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:211.94052696228027ms total_cost_time:211.98534965515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8294 prompt_cache_len:5151 prompt_cache_ratio:0.6210513624306728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 +DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10739302635192871 s +INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.10944819450378418 s +DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=243760956130616791617009914156589848019, time:1750767273.9389782s req_ids:[8] +DEBUG 06-24 20:14:33 [manager.py:391] +ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:208.13679695129395ms total_cost_time:208.18114280700684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8295 prompt_cache_len:5151 prompt_cache_ratio:0.6209764918625678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 +DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.10746598243713379 s +INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.10936212539672852 s +DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=189770559000292328682272700480913325791, time:1750767274.1518123s req_ids:[8] +DEBUG 06-24 20:14:34 [manager.py:391] +ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:211.21573448181152ms total_cost_time:211.2584114074707ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8296 prompt_cache_len:5151 prompt_cache_ratio:0.6209016393442623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 +DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.10821247100830078 s +INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.11012148857116699 s +DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=169396735050538373371631332208990012691, time:1750767274.36697s req_ids:[8] +DEBUG 06-24 20:14:34 [manager.py:391] +ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:368.1051731109619ms total_cost_time:368.1497573852539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8297 prompt_cache_len:5151 prompt_cache_ratio:0.6208268048692298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 +DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.10783123970031738 s +INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.10971808433532715 s +DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=167956627076954069194956913411280167363, time:1750767274.7405546s req_ids:[8] +DEBUG 06-24 20:14:34 [manager.py:391] +ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:200.6845474243164ms total_cost_time:200.7300853729248ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8298 prompt_cache_len:5151 prompt_cache_ratio:0.6207519884309473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 +DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.1078042984008789 s +INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.1105194091796875 s +DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=301337830344655309401875836347015862071, time:1750767274.954106s req_ids:[8] +DEBUG 06-24 20:14:34 [manager.py:391] +DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:212.02421188354492ms total_cost_time:212.0678424835205ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8299 prompt_cache_len:5151 prompt_cache_ratio:0.6206771900228943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 +DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10830998420715332 s +INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.11098384857177734 s +DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=65587622665194329579089570487194253022, time:1750767275.1678586s req_ids:[8] +DEBUG 06-24 20:14:35 [manager.py:391] +ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:219.67768669128418ms total_cost_time:219.71988677978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8300 prompt_cache_len:5151 prompt_cache_ratio:0.6206024096385542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 +DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10743093490600586 s +INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.10927939414978027 s +DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=63493732624778298924648741448306459558, time:1750767275.4087806s req_ids:[8] +DEBUG 06-24 20:14:35 [manager.py:391] +ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:220.22652626037598ms total_cost_time:220.27063369750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8301 prompt_cache_len:5151 prompt_cache_ratio:0.6205276472714131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 +DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10848736763000488 s +INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.11052966117858887 s +DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=211688468286845774453822823612595637726, time:1750767275.6236768s req_ids:[8] +DEBUG 06-24 20:14:35 [manager.py:391] +ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:211.63105964660645ms total_cost_time:211.67373657226562ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8302 prompt_cache_len:5151 prompt_cache_ratio:0.6204529029149602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 +DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10779428482055664 s +INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.10966897010803223 s +DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=6904799698057618520689627287651683187, time:1750767275.836534s req_ids:[8] +DEBUG 06-24 20:14:35 [manager.py:391] +ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:169.9512004852295ms total_cost_time:169.99411582946777ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8303 prompt_cache_len:5151 prompt_cache_ratio:0.6203781765626882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 +DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.31055521965026855 s +INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.3125629425048828 s +DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=277802937496219477041554422591933751385, time:1750767276.2197196s req_ids:[8] +DEBUG 06-24 20:14:36 [manager.py:391] +ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:421.2357997894287ms total_cost_time:421.2782382965088ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8304 prompt_cache_len:5151 prompt_cache_ratio:0.6203034682080925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 +DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.10714888572692871 s +INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.10903310775756836 s +DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=255252996458118514134069515587603915157, time:1750767276.4395554s req_ids:[8] +DEBUG 06-24 20:14:36 [manager.py:391] +DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:209.80405807495117ms total_cost_time:209.8684310913086ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:8305 prompt_cache_len:5151 prompt_cache_ratio:0.6202287778446719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 +DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.10686445236206055 s +INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.10863447189331055 s +DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=315718203251225857716152392641209894182, time:1750767276.652343s req_ids:[8] +DEBUG 06-24 20:14:36 [manager.py:391] +ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:167.88148880004883ms total_cost_time:167.9251194000244ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8306 prompt_cache_len:5151 prompt_cache_ratio:0.6201541054659282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 +DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.10840702056884766 s +INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.11016082763671875 s +DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=35520727223488568443893203475961670141, time:1750767276.8285089s req_ids:[8] +DEBUG 06-24 20:14:36 [manager.py:391] +ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:197.03364372253418ms total_cost_time:197.07679748535156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8307 prompt_cache_len:5151 prompt_cache_ratio:0.6200794510653665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 +DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10570096969604492 s +INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.1066579818725586 s +DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=274696903048460602471430352125434336653, time:1750767277.027263s req_ids:[8] +DEBUG 06-24 20:14:37 [manager.py:391] +ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:162.8549098968506ms total_cost_time:162.87612915039062ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8308 prompt_cache_len:5151 prompt_cache_ratio:0.6200048146364949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 +DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10597825050354004 s +INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.10806965827941895 s +DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=66087891383537191982797430514580655308, time:1750767277.194863s req_ids:[8] +DEBUG 06-24 20:14:37 [manager.py:391] +ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:194.40293312072754ms total_cost_time:194.44775581359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8309 prompt_cache_len:5151 prompt_cache_ratio:0.6199301961728246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 +DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10847067832946777 s +INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.11049866676330566 s +DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=251522397432514335940512181293752170135, time:1750767277.3953865s req_ids:[8] +DEBUG 06-24 20:14:37 [manager.py:391] +ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:365.2081489562988ms total_cost_time:365.2515411376953ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8310 prompt_cache_len:5151 prompt_cache_ratio:0.6198555956678701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 +DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.1099708080291748 s +INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.11186432838439941 s +DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=161021902991964345720263375301572541431, time:1750767277.7721167s req_ids:[8] +DEBUG 06-24 20:14:37 [manager.py:391] +ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:213.8359546661377ms total_cost_time:213.87124061584473ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:8311 prompt_cache_len:5151 prompt_cache_ratio:0.6197810131151485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 +DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10755538940429688 s +INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.10964846611022949 s +DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=295648583996366004321371936179937087096, time:1750767277.9884977s req_ids:[8] +DEBUG 06-24 20:14:37 [manager.py:391] +ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:224.6232032775879ms total_cost_time:224.66421127319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:8312 prompt_cache_len:5151 prompt_cache_ratio:0.6197064485081809 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 +DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s +INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.1098639965057373 s +DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=298479220233737810524018769446339314478, time:1750767278.2154572s req_ids:[8] +DEBUG 06-24 20:14:38 [manager.py:391] +ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:202.3754119873047ms total_cost_time:202.41928100585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8313 prompt_cache_len:5151 prompt_cache_ratio:0.6196319018404908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 +DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.1082923412322998 s +INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.11022043228149414 s +DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=252102363990941462669573429678951581757, time:1750767278.4254768s req_ids:[8] +DEBUG 06-24 20:14:38 [manager.py:391] +ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:218.89925003051758ms total_cost_time:218.94168853759766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8314 prompt_cache_len:5151 prompt_cache_ratio:0.619557373105605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 +DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.10834932327270508 s +INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.11026597023010254 s +DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=312271822081104715641743960189888542274, time:1750767278.6513693s req_ids:[8] +DEBUG 06-24 20:14:38 [manager.py:391] +DEBUG 06-24 20:14:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 36072.292 tokens/s +DEBUG 06-24 20:14:38 [stats.py:37] Avg prompt tokens throughput: 36063.694 tokens/s +DEBUG 06-24 20:14:38 [stats.py:37] Avg generate tokens throughput: 8.598 tokens/s +ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:202.10552215576172ms total_cost_time:202.1486759185791ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8315 prompt_cache_len:5151 prompt_cache_ratio:0.6194828622970535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 +DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.10877037048339844 s +INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.11081075668334961 s +DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=138931135602462018121515202530146619532, time:1750767278.8584652s req_ids:[8] +DEBUG 06-24 20:14:38 [manager.py:391] +ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:207.3657512664795ms total_cost_time:207.40818977355957ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8316 prompt_cache_len:5151 prompt_cache_ratio:0.6194083694083694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 +DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.1085202693939209 s +INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.11047053337097168 s +DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=218982852697502198158719175564218522053, time:1750767279.072043s req_ids:[8] +DEBUG 06-24 20:14:39 [manager.py:391] +ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:374.18341636657715ms total_cost_time:374.22704696655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8317 prompt_cache_len:5151 prompt_cache_ratio:0.6193338944330888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 +DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.1084434986114502 s +INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.1103827953338623 s +DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=300991428693439536423626809739855937382, time:1750767279.4470572s req_ids:[8] +DEBUG 06-24 20:14:39 [manager.py:391] +ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:202.6839256286621ms total_cost_time:202.7280330657959ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8318 prompt_cache_len:5151 prompt_cache_ratio:0.6192594373647512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 +DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.1073768138885498 s +INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.1094367504119873 s +DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=163824231564315183873231817994491432707, time:1750767279.6601083s req_ids:[8] +DEBUG 06-24 20:14:39 [manager.py:391] +ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:205.12080192565918ms total_cost_time:205.16395568847656ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8319 prompt_cache_len:5151 prompt_cache_ratio:0.6191849981968987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 +DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.10744690895080566 s +INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.10952043533325195 s +DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=258402454133813239926229406276900726653, time:1750767279.870637s req_ids:[8] +DEBUG 06-24 20:14:39 [manager.py:391] +ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:205.98125457763672ms total_cost_time:206.0253620147705ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8320 prompt_cache_len:5151 prompt_cache_ratio:0.6191105769230769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 +DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10741162300109863 s +INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.10940837860107422 s +DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=15768748149000020772775328294597903454, time:1750767280.0876532s req_ids:[8] +DEBUG 06-24 20:14:40 [manager.py:391] +ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:213.46163749694824ms total_cost_time:213.50502967834473ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8321 prompt_cache_len:5151 prompt_cache_ratio:0.6190361735368345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 +DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:40 [batch.py:51] router release req id 8 +INFO 06-24 20:14:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10871005058288574 s +INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.1106112003326416 s +DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=35748051260586873718890216062716901531, time:1750767280.30079s req_ids:[8] +DEBUG 06-24 20:14:40 [manager.py:391] +ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:206.08973503112793ms total_cost_time:206.13336563110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8322 prompt_cache_len:5151 prompt_cache_ratio:0.6189617880317232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 +DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s +INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.11020874977111816 s +DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=203062104970345147628927787627688872702, time:1750767280.513943s req_ids:[8] +DEBUG 06-24 20:14:40 [manager.py:391] +ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:205.20377159118652ms total_cost_time:205.24907112121582ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8323 prompt_cache_len:5151 prompt_cache_ratio:0.6188874204012976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 +DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10747337341308594 s +INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.10849165916442871 s +DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=92721954162718563247843241532194790696, time:1750767280.7225084s req_ids:[8] +DEBUG 06-24 20:14:40 [manager.py:391] +ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:185.90521812438965ms total_cost_time:185.94908714294434ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8324 prompt_cache_len:5151 prompt_cache_ratio:0.6188130706391158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 +DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10887598991394043 s +INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.1108696460723877 s +DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=27910674266167320054595452940333283810, time:1750767280.9121764s req_ids:[8] +DEBUG 06-24 20:14:40 [manager.py:391] +ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:201.6618251800537ms total_cost_time:201.76339149475098ms,out_token_counter:1 mean_per_token_cost_time: 0.10156631469726562ms prompt_token_num:8325 prompt_cache_len:5151 prompt_cache_ratio:0.6187387387387387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 +DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10860204696655273 s +INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.11075520515441895 s +DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=39104579483530487045217095644296521828, time:1750767281.119557s req_ids:[8] +DEBUG 06-24 20:14:41 [manager.py:391] +ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:206.68601989746094ms total_cost_time:206.72941207885742ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8326 prompt_cache_len:5151 prompt_cache_ratio:0.6186644246937305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 +DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10857677459716797 s +INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.11064720153808594 s +DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=127764584215628286168403979539007149501, time:1750767281.333407s req_ids:[8] +DEBUG 06-24 20:14:41 [manager.py:391] +ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:210.10112762451172ms total_cost_time:210.1449966430664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8327 prompt_cache_len:5151 prompt_cache_ratio:0.6185901284976583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 +DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10875415802001953 s +INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.11067628860473633 s +DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=4958469037022221137828440278288455800, time:1750767281.5466924s req_ids:[8] +DEBUG 06-24 20:14:41 [manager.py:391] +ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:210.4480266571045ms total_cost_time:210.4935646057129ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8328 prompt_cache_len:5151 prompt_cache_ratio:0.6185158501440923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 +DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10725069046020508 s +INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.10926938056945801 s +DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=272250437461255759268203854122825154056, time:1750767281.7616775s req_ids:[8] +DEBUG 06-24 20:14:41 [manager.py:391] +ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:210.14738082885742ms total_cost_time:210.1914882659912ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8329 prompt_cache_len:5151 prompt_cache_ratio:0.6184415896266058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 +DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10744261741638184 s +INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.10949468612670898 s +DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=189423115506842996907537481977005635238, time:1750767281.9772346s req_ids:[8] +DEBUG 06-24 20:14:41 [manager.py:391] +DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:203.86242866516113ms total_cost_time:203.90605926513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8330 prompt_cache_len:5151 prompt_cache_ratio:0.6183673469387755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 +DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.1076972484588623 s +INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.10976147651672363 s +DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=231737037246298845956164207363075937542, time:1750767282.189191s req_ids:[8] +DEBUG 06-24 20:14:42 [manager.py:391] +ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:205.69658279418945ms total_cost_time:205.74069023132324ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8331 prompt_cache_len:5151 prompt_cache_ratio:0.6182931220741807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 +DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.10817527770996094 s +INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.11019253730773926 s +DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=204833891265901063353923436250621708968, time:1750767282.3999195s req_ids:[8] +DEBUG 06-24 20:14:42 [manager.py:391] +ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:364.6695613861084ms total_cost_time:364.7119998931885ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8332 prompt_cache_len:5151 prompt_cache_ratio:0.6182189150264042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 +DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.10700535774230957 s +INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.10903620719909668 s +DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=153021718533794694911774247721165952692, time:1750767282.766928s req_ids:[8] +DEBUG 06-24 20:14:42 [manager.py:391] +ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:202.74639129638672ms total_cost_time:202.7895450592041ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8333 prompt_cache_len:5151 prompt_cache_ratio:0.6181447257890316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 +DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.10812854766845703 s +INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.11017179489135742 s +DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=307875200559235023189034847413241857077, time:1750767282.977633s req_ids:[8] +DEBUG 06-24 20:14:42 [manager.py:391] +ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:211.81178092956543ms total_cost_time:211.85302734375ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8334 prompt_cache_len:5151 prompt_cache_ratio:0.6180705543556515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 +DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.1084592342376709 s +INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.11037611961364746 s +DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=244938656511849479856530537959277250774, time:1750767283.1920686s req_ids:[8] +DEBUG 06-24 20:14:43 [manager.py:391] +ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:202.30746269226074ms total_cost_time:202.35013961791992ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8335 prompt_cache_len:5151 prompt_cache_ratio:0.6179964007198561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 +DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.10822558403015137 s +INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.1102132797241211 s +DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=30774481689037904860931704222377865461, time:1750767283.4027514s req_ids:[8] +DEBUG 06-24 20:14:43 [manager.py:391] +ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:205.64770698547363ms total_cost_time:205.6906223297119ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8336 prompt_cache_len:5151 prompt_cache_ratio:0.6179222648752399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 +DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.10813593864440918 s +INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.11007213592529297 s +DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=339001691673391085264550844361019495214, time:1750767283.6140687s req_ids:[8] +DEBUG 06-24 20:14:43 [manager.py:391] +ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:212.65101432800293ms total_cost_time:212.69536018371582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8337 prompt_cache_len:5151 prompt_cache_ratio:0.6178481468154012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 +DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:43 [batch.py:51] router release req id 8 +INFO 06-24 20:14:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.10802412033081055 s +INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100313663482666 s +DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=261940475427338622658296194332473660030, time:1750767283.8327336s req_ids:[8] +DEBUG 06-24 20:14:43 [manager.py:391] +ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:200.1497745513916ms total_cost_time:200.19268989562988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8338 prompt_cache_len:5151 prompt_cache_ratio:0.617774046533941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 +DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10875749588012695 s +INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.1107792854309082 s +DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=337781020124583152190153779169000766806, time:1750767284.0380256s req_ids:[8] +DEBUG 06-24 20:14:44 [manager.py:391] +ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:203.39345932006836ms total_cost_time:203.43661308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8339 prompt_cache_len:5151 prompt_cache_ratio:0.6176999640244634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 +DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10731673240661621 s +INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.1092982292175293 s +DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=273895288870707467973267963694377667971, time:1750767284.2479763s req_ids:[8] +DEBUG 06-24 20:14:44 [manager.py:391] +ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:209.8081111907959ms total_cost_time:209.85102653503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8340 prompt_cache_len:5151 prompt_cache_ratio:0.6176258992805755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 +DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10702204704284668 s +INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.10901165008544922 s +DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=73239091614846805757568216182305887005, time:1750767284.4646623s req_ids:[8] +DEBUG 06-24 20:14:44 [manager.py:391] +ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:207.46827125549316ms total_cost_time:207.51142501831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8341 prompt_cache_len:5151 prompt_cache_ratio:0.6175518522958878 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 +DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10699844360351562 s +INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.10890674591064453 s +DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=276057004754196455625317364162476827661, time:1750767284.676558s req_ids:[8] +DEBUG 06-24 20:14:44 [manager.py:391] +ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:208.73117446899414ms total_cost_time:208.7724208831787ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8342 prompt_cache_len:5151 prompt_cache_ratio:0.6174778230640134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 +DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10847115516662598 s +INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.11041617393493652 s +DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=167386728822424099788423938465730215994, time:1750767284.8904045s req_ids:[8] +DEBUG 06-24 20:14:44 [manager.py:391] +ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:211.11011505126953ms total_cost_time:211.1525535583496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8343 prompt_cache_len:5151 prompt_cache_ratio:0.6174038115785688 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 +DEBUG 06-24 20:14:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:45 [manager.py:224] router recive req id 8 cost time 0.10886025428771973 s +INFO 06-24 20:14:45 [manager.py:68] detokenization recv req id 8 cost time 0.11068582534790039 s +DEBUG 06-24 20:14:45 [manager.py:391] Prefill Batch: batch_id=248199366923918170706963929973859510082, time:1750767285.1048195s req_ids:[8] +DEBUG 06-24 20:14:45 [manager.py:391] +ERROR 06-24 20:14:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:367.0048713684082ms total_cost_time:367.0461177825928ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8344 prompt_cache_len:5151 prompt_cache_ratio:0.6173298178331735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 +DEBUG 06-24 20:14:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:45 [manager.py:224] router recive req id 8 cost time 0.10794305801391602 s +INFO 06-24 20:14:45 [manager.py:68] detokenization recv req id 8 cost time 0.11001968383789062 s +DEBUG 06-24 20:14:45 [manager.py:391] Prefill Batch: batch_id=337267696374520296586378518285888365504, time:1750767285.47608s req_ids:[8] +DEBUG 06-24 20:14:45 [manager.py:391] +ERROR 06-24 20:14:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 first_token_cost:201.66969299316406ms total_cost_time:201.71093940734863ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8345 prompt_cache_len:5151 prompt_cache_ratio:0.61725584182145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 +DEBUG 06-24 20:14:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:45 [manager.py:224] router recive req id 8 cost time 0.10873889923095703 s +INFO 06-24 20:14:45 [manager.py:68] detokenization recv req id 8 cost time 0.11086297035217285 s +DEBUG 06-24 20:14:45 [manager.py:391] Prefill Batch: batch_id=99011530542658766002368443279198558949, time:1750767285.6876237s req_ids:[8] +DEBUG 06-24 20:14:45 [manager.py:391] +ERROR 06-24 20:14:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 first_token_cost:224.52926635742188ms total_cost_time:224.57051277160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8346 prompt_cache_len:5151 prompt_cache_ratio:0.6171818835370237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:14:46 [manager.py:106] timer detokenize batch cost time 1095.607042312622 ms +INFO 06-24 20:14:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:46 lightllm_req_id:8 +DEBUG 06-24 20:14:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.1082310676574707 s +INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.11037611961364746 s +DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=160364814654248776765176563932240351650, time:1750767287.0144155s req_ids:[8] +DEBUG 06-24 20:14:47 [manager.py:391] +ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:46 lightllm_req_id:8 first_token_cost:215.3482437133789ms total_cost_time:215.3935432434082ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8347 prompt_cache_len:5151 prompt_cache_ratio:0.6171079429735234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 +DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10779523849487305 s +INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.1098337173461914 s +DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=144620008047979263520750210776378677178, time:1750767287.2318034s req_ids:[8] +DEBUG 06-24 20:14:47 [manager.py:391] +ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:207.30209350585938ms total_cost_time:207.35979080200195ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:8348 prompt_cache_len:5151 prompt_cache_ratio:0.6170340201245808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 +DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s +INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.11042547225952148 s +DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=160944829997916299009439741916119532297, time:1750767287.453559s req_ids:[8] +DEBUG 06-24 20:14:47 [manager.py:391] +ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:216.45808219909668ms total_cost_time:216.49885177612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:8349 prompt_cache_len:5151 prompt_cache_ratio:0.6169601149838304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 +DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10754942893981934 s +INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.10944414138793945 s +DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=75719440186970097660606079336839045869, time:1750767287.6681027s req_ids:[8] +DEBUG 06-24 20:14:47 [manager.py:391] +ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:207.98587799072266ms total_cost_time:208.03141593933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8350 prompt_cache_len:5151 prompt_cache_ratio:0.6168862275449102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 +DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10752749443054199 s +INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.10954117774963379 s +DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=218792159429198702307235322663060631927, time:1750767287.8833733s req_ids:[8] +DEBUG 06-24 20:14:47 [manager.py:391] +ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:207.15999603271484ms total_cost_time:207.20434188842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8351 prompt_cache_len:5151 prompt_cache_ratio:0.6168123578014609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 +DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10816121101379395 s +INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.11015844345092773 s +DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=254979686932485161415815347422465485103, time:1750767288.1009836s req_ids:[8] +DEBUG 06-24 20:14:48 [manager.py:391] +ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:211.17210388183594ms total_cost_time:211.21525764465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8352 prompt_cache_len:5151 prompt_cache_ratio:0.6167385057471264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 +DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10988593101501465 s +INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.11238360404968262 s +DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=297813054370892844616543908556902429450, time:1750767288.3134503s req_ids:[8] +DEBUG 06-24 20:14:48 [manager.py:391] +ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:206.6657543182373ms total_cost_time:206.7110538482666ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8353 prompt_cache_len:5151 prompt_cache_ratio:0.6166646713755537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 +DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10724020004272461 s +INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.10899758338928223 s +DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=185173315891996855591033788824963142451, time:1750767288.5246487s req_ids:[8] +DEBUG 06-24 20:14:48 [manager.py:391] +ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:172.7902889251709ms total_cost_time:172.8341579437256ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8354 prompt_cache_len:5151 prompt_cache_ratio:0.6165908546803927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 +DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10793781280517578 s +INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.10980987548828125 s +DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=120665254578290436136896004776420250263, time:1750767288.7016625s req_ids:[8] +DEBUG 06-24 20:14:48 [manager.py:391] +DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:14:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 33182.379 tokens/s +DEBUG 06-24 20:14:48 [stats.py:37] Avg prompt tokens throughput: 33174.420 tokens/s +DEBUG 06-24 20:14:48 [stats.py:37] Avg generate tokens throughput: 7.960 tokens/s +ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:160.81666946411133ms total_cost_time:160.8600616455078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8355 prompt_cache_len:5151 prompt_cache_ratio:0.6165170556552962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 +DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.3103959560394287 s +INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.3124523162841797 s +DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=297253794906935847718989086604809386366, time:1750767289.072066s req_ids:[8] +DEBUG 06-24 20:14:49 [manager.py:391] +ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:414.48330879211426ms total_cost_time:414.52527046203613ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8356 prompt_cache_len:5151 prompt_cache_ratio:0.6164432742939205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 +DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10755109786987305 s +INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.10960149765014648 s +DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=96331022194745966552273779111210363613, time:1750767289.290974s req_ids:[8] +DEBUG 06-24 20:14:49 [manager.py:391] +ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:204.2560577392578ms total_cost_time:204.2980194091797ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8357 prompt_cache_len:5151 prompt_cache_ratio:0.6163695105899246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 +DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s +INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.11016678810119629 s +DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=168997122465318584218215688668621920189, time:1750767289.5015683s req_ids:[8] +DEBUG 06-24 20:14:49 [manager.py:391] +ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:207.15618133544922ms total_cost_time:207.2007656097412ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8358 prompt_cache_len:5151 prompt_cache_ratio:0.6162957645369705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 +DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10836982727050781 s +INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.11050152778625488 s +DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=296702129191292968734477058715179826979, time:1750767289.7148378s req_ids:[8] +DEBUG 06-24 20:14:49 [manager.py:391] +ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:210.57605743408203ms total_cost_time:210.63685417175293ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8359 prompt_cache_len:5151 prompt_cache_ratio:0.6162220361287235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 +DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10753488540649414 s +INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.10964584350585938 s +DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=115688155636789223614638512260721104005, time:1750767289.926333s req_ids:[8] +DEBUG 06-24 20:14:49 [manager.py:391] +ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:204.26225662231445ms total_cost_time:204.30755615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8360 prompt_cache_len:5151 prompt_cache_ratio:0.6161483253588517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 +DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s +INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.10969114303588867 s +DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=264133553713560860929982643855878300920, time:1750767290.1420069s req_ids:[8] +DEBUG 06-24 20:14:50 [manager.py:391] +DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:167.55223274230957ms total_cost_time:167.59395599365234ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8361 prompt_cache_len:5151 prompt_cache_ratio:0.6160746322210262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 +DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10890054702758789 s +INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.11092042922973633 s +DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=57929572887054814088646815737964715958, time:1750767290.311229s req_ids:[8] +DEBUG 06-24 20:14:50 [manager.py:391] +ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:200.65665245056152ms total_cost_time:200.700044631958ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8362 prompt_cache_len:5151 prompt_cache_ratio:0.6160009567089213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 +DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10888075828552246 s +INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.11140680313110352 s +DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=168627028583777841942655497440907742276, time:1750767290.5188572s req_ids:[8] +DEBUG 06-24 20:14:50 [manager.py:391] +ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:200.39033889770508ms total_cost_time:200.4373073577881ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8363 prompt_cache_len:5151 prompt_cache_ratio:0.6159272988162143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 +DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10813260078430176 s +INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.10934662818908691 s +DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=325934028108119775500361158950074120832, time:1750767290.7251697s req_ids:[8] +DEBUG 06-24 20:14:50 [manager.py:391] +ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:206.4075469970703ms total_cost_time:206.4497470855713ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8364 prompt_cache_len:5151 prompt_cache_ratio:0.6158536585365854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 +DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10829019546508789 s +INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.11039328575134277 s +DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=308675889738169970898209975872523305190, time:1750767290.9383552s req_ids:[8] +DEBUG 06-24 20:14:50 [manager.py:391] +ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:207.61847496032715ms total_cost_time:207.65995979309082ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8365 prompt_cache_len:5151 prompt_cache_ratio:0.6157800358637179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 +DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.3103630542755127 s +INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.31244659423828125 s +DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=246077629767251498303869218290539575363, time:1750767291.350653s req_ids:[8] +DEBUG 06-24 20:14:51 [manager.py:391] +ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:406.74543380737305ms total_cost_time:406.7883491516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8366 prompt_cache_len:5151 prompt_cache_ratio:0.6157064307912982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 +DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.10696649551391602 s +INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.10817837715148926 s +DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=21434543799022693081551161487831978689, time:1750767291.5654447s req_ids:[8] +DEBUG 06-24 20:14:51 [manager.py:391] +DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:209.69462394714355ms total_cost_time:209.73873138427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8367 prompt_cache_len:5151 prompt_cache_ratio:0.6156328433130154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 +DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.10743880271911621 s +INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.10864853858947754 s +DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=151796774140644941281476469251469813652, time:1750767291.7821524s req_ids:[8] +DEBUG 06-24 20:14:51 [manager.py:391] +ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:211.25221252441406ms total_cost_time:211.29512786865234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8368 prompt_cache_len:5151 prompt_cache_ratio:0.6155592734225621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 +DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.1068124771118164 s +INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.10881900787353516 s +DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=261262458726762182673500422414928363311, time:1750767291.9968534s req_ids:[8] +DEBUG 06-24 20:14:51 [manager.py:391] +ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:209.4264030456543ms total_cost_time:209.46907997131348ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8369 prompt_cache_len:5151 prompt_cache_ratio:0.6154857211136336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 +DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.1084146499633789 s +INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.10968708992004395 s +DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=169319382204054847427064048251158443429, time:1750767292.2101078s req_ids:[8] +DEBUG 06-24 20:14:52 [manager.py:391] +ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:202.52084732055664ms total_cost_time:202.56423950195312ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8370 prompt_cache_len:5151 prompt_cache_ratio:0.6154121863799283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 +DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.1072540283203125 s +INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.10987067222595215 s +DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=231120673683649118079756514706535394070, time:1750767292.4177868s req_ids:[8] +DEBUG 06-24 20:14:52 [manager.py:391] +ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:207.91387557983398ms total_cost_time:207.95845985412598ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8371 prompt_cache_len:5151 prompt_cache_ratio:0.6153386692151476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 +DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.10770034790039062 s +INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.10880279541015625 s +DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=328689474952543886466471767609274016914, time:1750767292.6460326s req_ids:[8] +DEBUG 06-24 20:14:52 [manager.py:391] +ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:225.88253021240234ms total_cost_time:225.92902183532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8372 prompt_cache_len:5151 prompt_cache_ratio:0.6152651696129957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 +DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.11045432090759277 s +INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.11163544654846191 s +DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=267770518497033857782039773375141812507, time:1750767292.8618057s req_ids:[8] +DEBUG 06-24 20:14:52 [manager.py:391] +ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:204.7557830810547ms total_cost_time:204.79965209960938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8373 prompt_cache_len:5151 prompt_cache_ratio:0.6151916875671802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 +DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10892891883850098 s +INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.11017274856567383 s +DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=227773135032657690402595550675335516755, time:1750767293.0695074s req_ids:[8] +DEBUG 06-24 20:14:53 [manager.py:391] +ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:200.50430297851562ms total_cost_time:200.54960250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8374 prompt_cache_len:5151 prompt_cache_ratio:0.6151182230714115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 +INFO 06-24 20:14:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:14:53 [statics_utils.py:24] mean first cost: 228.9509126247103 ms +INFO 06-24 20:14:53 [statics_utils.py:24] mean per token cost: 0.0805954998983492 ms +DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10893130302429199 s +INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.11007571220397949 s +DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=229087766411493737261611588252799228858, time:1750767293.2928994s req_ids:[8] +DEBUG 06-24 20:14:53 [manager.py:391] +ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:216.30406379699707ms total_cost_time:216.35890007019043ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:8375 prompt_cache_len:5151 prompt_cache_ratio:0.615044776119403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 +DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10812067985534668 s +INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.10922050476074219 s +DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=129537030089034207998032222786913640221, time:1750767293.5096447s req_ids:[8] +DEBUG 06-24 20:14:53 [manager.py:391] +ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:210.94822883605957ms total_cost_time:210.99448204040527ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8376 prompt_cache_len:5151 prompt_cache_ratio:0.6149713467048711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 +DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10728216171264648 s +INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.10836052894592285 s +DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=165266274327625847730226731789770395319, time:1750767293.7259486s req_ids:[8] +DEBUG 06-24 20:14:53 [manager.py:391] +ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:211.5194797515869ms total_cost_time:211.5652561187744ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8377 prompt_cache_len:5151 prompt_cache_ratio:0.6148979348215352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 +DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10747385025024414 s +INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.10867190361022949 s +DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=258691949810301513525959011385237201284, time:1750767293.9384341s req_ids:[8] +DEBUG 06-24 20:14:53 [manager.py:391] +ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:208.18161964416504ms total_cost_time:208.22715759277344ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8378 prompt_cache_len:5151 prompt_cache_ratio:0.6148245404631177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 +DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:54 [manager.py:224] router recive req id 8 cost time 0.10882234573364258 s +INFO 06-24 20:14:54 [manager.py:68] detokenization recv req id 8 cost time 0.11013221740722656 s +DEBUG 06-24 20:14:54 [manager.py:391] Prefill Batch: batch_id=191262296906833126698612474128247323370, time:1750767294.1518295s req_ids:[8] +DEBUG 06-24 20:14:54 [manager.py:391] +ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:206.1898708343506ms total_cost_time:206.23350143432617ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8379 prompt_cache_len:5151 prompt_cache_ratio:0.614751163623344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 +DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:54 [manager.py:224] router recive req id 8 cost time 0.31197428703308105 s +INFO 06-24 20:14:54 [manager.py:68] detokenization recv req id 8 cost time 0.3131895065307617 s +DEBUG 06-24 20:14:54 [manager.py:391] Prefill Batch: batch_id=240628571668215059209245563739069729640, time:1750767294.568028s req_ids:[8] +DEBUG 06-24 20:14:54 [manager.py:391] +ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:427.8912544250488ms total_cost_time:427.9353618621826ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8380 prompt_cache_len:5151 prompt_cache_ratio:0.6146778042959428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 +DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:54 [manager.py:224] router recive req id 8 cost time 0.10899066925048828 s +INFO 06-24 20:14:54 [manager.py:68] detokenization recv req id 8 cost time 0.11098504066467285 s +DEBUG 06-24 20:14:54 [manager.py:391] Prefill Batch: batch_id=54355864517937947992768217391971135850, time:1750767294.8007379s req_ids:[8] +DEBUG 06-24 20:14:54 [manager.py:391] +ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:210.82782745361328ms total_cost_time:210.87241172790527ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8381 prompt_cache_len:5151 prompt_cache_ratio:0.6146044624746451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 +DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10777497291564941 s +INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.10876846313476562 s +DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=263781650599547323521369778180731406609, time:1750767295.009364s req_ids:[8] +DEBUG 06-24 20:14:55 [manager.py:391] +ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:198.96340370178223ms total_cost_time:199.0053653717041ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8382 prompt_cache_len:5151 prompt_cache_ratio:0.6145311381531854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 +DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10738754272460938 s +INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.10932254791259766 s +DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=124992048622619558696193766618707086575, time:1750767295.2172015s req_ids:[8] +DEBUG 06-24 20:14:55 [manager.py:391] +ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:199.78928565979004ms total_cost_time:199.83267784118652ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8383 prompt_cache_len:5151 prompt_cache_ratio:0.6144578313253012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 +DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10921335220336914 s +INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.11159729957580566 s +DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=193523000075637311573989991248064101422, time:1750767295.4257834s req_ids:[8] +DEBUG 06-24 20:14:55 [manager.py:391] +ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:202.20661163330078ms total_cost_time:202.24928855895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8384 prompt_cache_len:5151 prompt_cache_ratio:0.6143845419847328 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 +DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10804438591003418 s +INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.11002802848815918 s +DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=8907332747248072743063263310547633692, time:1750767295.62989s req_ids:[8] +DEBUG 06-24 20:14:55 [manager.py:391] +ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:203.9949893951416ms total_cost_time:204.03814315795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8385 prompt_cache_len:5151 prompt_cache_ratio:0.6143112701252236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 +DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10847043991088867 s +INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.11044430732727051 s +DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=240996194893828963635763653932288036861, time:1750767295.838813s req_ids:[8] +DEBUG 06-24 20:14:55 [manager.py:391] +ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:205.45530319213867ms total_cost_time:205.49726486206055ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8386 prompt_cache_len:5151 prompt_cache_ratio:0.61423801574052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 +DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10777068138122559 s +INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.10971331596374512 s +DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=37632074984236548834336501665566198293, time:1750767296.0514276s req_ids:[8] +DEBUG 06-24 20:14:56 [manager.py:391] +ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:208.6179256439209ms total_cost_time:208.6637020111084ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8387 prompt_cache_len:5151 prompt_cache_ratio:0.614164778824371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 +DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10894060134887695 s +INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.11086487770080566 s +DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=248851822877703466818619519155165118854, time:1750767296.261816s req_ids:[8] +DEBUG 06-24 20:14:56 [manager.py:391] +ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:203.5691738128662ms total_cost_time:203.6142349243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8388 prompt_cache_len:5151 prompt_cache_ratio:0.6140915593705293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 +DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10698580741882324 s +INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.10884881019592285 s +DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=56177992392048865470351637431435911958, time:1750767296.4714868s req_ids:[8] +DEBUG 06-24 20:14:56 [manager.py:391] +ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:197.18146324157715ms total_cost_time:197.22390174865723ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8389 prompt_cache_len:5151 prompt_cache_ratio:0.61401835737275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 +DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10841798782348633 s +INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.11037135124206543 s +DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=232692198900104402470385540106632706254, time:1750767296.6851459s req_ids:[8] +DEBUG 06-24 20:14:56 [manager.py:391] +ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:208.77933502197266ms total_cost_time:208.82296562194824ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8390 prompt_cache_len:5151 prompt_cache_ratio:0.6139451728247914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 +DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.1071012020111084 s +INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.10942292213439941 s +DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=290898094398610724015060743423334700994, time:1750767296.897478s req_ids:[8] +DEBUG 06-24 20:14:56 [manager.py:391] +ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:370.6681728363037ms total_cost_time:370.7118034362793ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8391 prompt_cache_len:5151 prompt_cache_ratio:0.6138720057204148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 +DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.10814356803894043 s +INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.11028432846069336 s +DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=118080281514754145619810589840061686976, time:1750767297.266583s req_ids:[8] +DEBUG 06-24 20:14:57 [manager.py:391] +ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:205.23667335510254ms total_cost_time:205.27911186218262ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8392 prompt_cache_len:5151 prompt_cache_ratio:0.6137988560533841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 +DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.1081233024597168 s +INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s +DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=270615625680643213940768743007461210296, time:1750767297.4789965s req_ids:[8] +DEBUG 06-24 20:14:57 [manager.py:391] +ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:204.4045925140381ms total_cost_time:204.44869995117188ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8393 prompt_cache_len:5151 prompt_cache_ratio:0.6137257238174669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 +DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.10827851295471191 s +INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.1103219985961914 s +DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=208165520351236135149491979903072636517, time:1750767297.6904929s req_ids:[8] +DEBUG 06-24 20:14:57 [manager.py:391] +ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:203.9508819580078ms total_cost_time:203.9966583251953ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8394 prompt_cache_len:5151 prompt_cache_ratio:0.6136526090064331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 +DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.10821247100830078 s +INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.11022233963012695 s +DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=325989771635030704425533231108756568641, time:1750767297.907327s req_ids:[8] +DEBUG 06-24 20:14:57 [manager.py:391] +ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:221.46868705749512ms total_cost_time:221.53377532958984ms,out_token_counter:1 mean_per_token_cost_time: 0.06508827209472656ms prompt_token_num:8395 prompt_cache_len:5151 prompt_cache_ratio:0.613579511614056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 +DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10776233673095703 s +INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.10970234870910645 s +DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=61790169856867526185735406743537073462, time:1750767298.1492138s req_ids:[8] +DEBUG 06-24 20:14:58 [manager.py:391] +ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:228.38616371154785ms total_cost_time:228.43122482299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8396 prompt_cache_len:5151 prompt_cache_ratio:0.6135064316341114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 +DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10759520530700684 s +INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.10950136184692383 s +DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=337543175081435050361682484932943899218, time:1750767298.3638692s req_ids:[8] +DEBUG 06-24 20:14:58 [manager.py:391] +ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:206.9075107574463ms total_cost_time:206.9711685180664ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:8397 prompt_cache_len:5151 prompt_cache_ratio:0.6134333690603787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 +DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10845017433166504 s +INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.11011910438537598 s +DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=17590771758064458560663809220335429279, time:1750767298.5764377s req_ids:[8] +DEBUG 06-24 20:14:58 [manager.py:391] +DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:168.1840419769287ms total_cost_time:168.2283878326416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8398 prompt_cache_len:5151 prompt_cache_ratio:0.6133603238866396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 +DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10730242729187012 s +INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.1091454029083252 s +DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=55275128846947129734103868336165707921, time:1750767298.7486298s req_ids:[8] +DEBUG 06-24 20:14:58 [manager.py:391] +DEBUG 06-24 20:14:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 36697.345 tokens/s +DEBUG 06-24 20:14:58 [stats.py:37] Avg prompt tokens throughput: 36688.587 tokens/s +DEBUG 06-24 20:14:58 [stats.py:37] Avg generate tokens throughput: 8.759 tokens/s +ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:200.87027549743652ms total_cost_time:200.913667678833ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8399 prompt_cache_len:5151 prompt_cache_ratio:0.6132872961066793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 +DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10864400863647461 s +INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052227020263672 s +DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=54161961121955347321221076462912968396, time:1750767298.953305s req_ids:[8] +DEBUG 06-24 20:14:58 [manager.py:391] +ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:201.9021511077881ms total_cost_time:201.94649696350098ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8400 prompt_cache_len:5151 prompt_cache_ratio:0.6132142857142857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 +DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10792207717895508 s +INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.11030006408691406 s +DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=59594178784249676758607097844937174089, time:1750767299.1693125s req_ids:[8] +DEBUG 06-24 20:14:59 [manager.py:391] +ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:373.40712547302246ms total_cost_time:373.45194816589355ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8401 prompt_cache_len:5151 prompt_cache_ratio:0.6131412927032496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 +DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10827875137329102 s +INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.11060738563537598 s +DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=10241095929983728542422852066010047376, time:1750767299.5401566s req_ids:[8] +DEBUG 06-24 20:14:59 [manager.py:391] +ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:204.0271759033203ms total_cost_time:204.0688991546631ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8402 prompt_cache_len:5151 prompt_cache_ratio:0.6130683170673649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 +DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10727477073669434 s +INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.10913205146789551 s +DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=224034263545368386471211661971263470861, time:1750767299.7539341s req_ids:[8] +DEBUG 06-24 20:14:59 [manager.py:391] +ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:177.87623405456543ms total_cost_time:177.93011665344238ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:8403 prompt_cache_len:5151 prompt_cache_ratio:0.6129953588004284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 +DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:14:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10712265968322754 s +INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.10819649696350098 s +DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=237413407909576479746994851068780530194, time:1750767299.934086s req_ids:[8] +DEBUG 06-24 20:14:59 [manager.py:391] +ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:196.30885124206543ms total_cost_time:196.35343551635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8404 prompt_cache_len:5151 prompt_cache_ratio:0.6129224178962399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 +DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.1076807975769043 s +INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.10956597328186035 s +DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=308123753582056048531065578681317644641, time:1750767300.1434653s req_ids:[8] +DEBUG 06-24 20:15:00 [manager.py:391] +ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:207.48376846313477ms total_cost_time:207.52739906311035ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8405 prompt_cache_len:5151 prompt_cache_ratio:0.612849494348602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 +DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.10744905471801758 s +INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.10932421684265137 s +DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=137197264246329344014321356755399205236, time:1750767300.3545356s req_ids:[8] +DEBUG 06-24 20:15:00 [manager.py:391] +ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:209.8677158355713ms total_cost_time:209.91086959838867ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8406 prompt_cache_len:5151 prompt_cache_ratio:0.6127765881513205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 +DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.1072547435760498 s +INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.10924887657165527 s +DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=101885760199815517281230032851123741432, time:1750767300.5690255s req_ids:[8] +DEBUG 06-24 20:15:00 [manager.py:391] +ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:203.643798828125ms total_cost_time:203.6881446838379ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8407 prompt_cache_len:5151 prompt_cache_ratio:0.6127036992982039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 +DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.10894060134887695 s +INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.11147570610046387 s +DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=14885947683403805306594203694629827631, time:1750767300.7812178s req_ids:[8] +DEBUG 06-24 20:15:00 [manager.py:391] +ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:209.0001106262207ms total_cost_time:209.04278755187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8408 prompt_cache_len:5151 prompt_cache_ratio:0.6126308277830638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 +DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s +INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.11019206047058105 s +DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=3223486102555821366067162042301857276, time:1750767300.9932365s req_ids:[8] +DEBUG 06-24 20:15:00 [manager.py:391] +ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:208.50658416748047ms total_cost_time:208.54926109313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8409 prompt_cache_len:5151 prompt_cache_ratio:0.6125579735997146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 +DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:01 [manager.py:224] router recive req id 8 cost time 0.3110232353210449 s +INFO 06-24 20:15:01 [manager.py:68] detokenization recv req id 8 cost time 0.312283992767334 s +DEBUG 06-24 20:15:01 [manager.py:391] Prefill Batch: batch_id=129538661814799410571869211096606593282, time:1750767301.4131074s req_ids:[8] +DEBUG 06-24 20:15:01 [manager.py:391] +ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:418.21837425231934ms total_cost_time:418.2605743408203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8410 prompt_cache_len:5151 prompt_cache_ratio:0.6124851367419738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 +DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:01 [manager.py:224] router recive req id 8 cost time 0.10874199867248535 s +INFO 06-24 20:15:01 [manager.py:68] detokenization recv req id 8 cost time 0.11006569862365723 s +DEBUG 06-24 20:15:01 [manager.py:391] Prefill Batch: batch_id=244235888560123911845585581849849240793, time:1750767301.630859s req_ids:[8] +DEBUG 06-24 20:15:01 [manager.py:391] +ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:207.02648162841797ms total_cost_time:207.06939697265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8411 prompt_cache_len:5151 prompt_cache_ratio:0.6124123172036618 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 +DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:01 [manager.py:224] router recive req id 8 cost time 0.10769319534301758 s +INFO 06-24 20:15:01 [manager.py:68] detokenization recv req id 8 cost time 0.1089482307434082 s +DEBUG 06-24 20:15:01 [manager.py:391] Prefill Batch: batch_id=54519829939308747405056562360300828622, time:1750767301.8409214s req_ids:[8] +DEBUG 06-24 20:15:01 [manager.py:391] +ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:208.1136703491211ms total_cost_time:208.15682411193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8412 prompt_cache_len:5151 prompt_cache_ratio:0.612339514978602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 +DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.10814070701599121 s +INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.11016035079956055 s +DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=158095796602175375334813036413737706951, time:1750767302.0515876s req_ids:[8] +DEBUG 06-24 20:15:02 [manager.py:391] +ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:200.77753067016602ms total_cost_time:200.8211612701416ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8413 prompt_cache_len:5151 prompt_cache_ratio:0.6122667300606205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 +DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.10800385475158691 s +INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.10998320579528809 s +DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=221934834960834592626627247088426473337, time:1750767302.2627792s req_ids:[8] +DEBUG 06-24 20:15:02 [manager.py:391] +ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:208.3725929260254ms total_cost_time:208.4181308746338ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8414 prompt_cache_len:5151 prompt_cache_ratio:0.6121939624435465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 +DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.1079258918762207 s +INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.10961413383483887 s +DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=295339679434532293143780347051093598663, time:1750767302.476235s req_ids:[8] +DEBUG 06-24 20:15:02 [manager.py:391] +ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:175.39525032043457ms total_cost_time:175.43697357177734ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8415 prompt_cache_len:5151 prompt_cache_ratio:0.6121212121212121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 +DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.1074972152709961 s +INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.10934257507324219 s +DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=310665530674894052116640070306649479440, time:1750767302.6542237s req_ids:[8] +DEBUG 06-24 20:15:02 [manager.py:391] +ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:193.10379028320312ms total_cost_time:193.1476593017578ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8416 prompt_cache_len:5151 prompt_cache_ratio:0.6120484790874525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 +DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.10875177383422852 s +INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.1107492446899414 s +DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=86426648157262359196472262747095022900, time:1750767302.8553464s req_ids:[8] +DEBUG 06-24 20:15:02 [manager.py:391] +ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:368.99447441101074ms total_cost_time:369.03834342956543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8417 prompt_cache_len:5151 prompt_cache_ratio:0.6119757633361055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 +DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10817837715148926 s +INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.11009645462036133 s +DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=116645101844515112408615210018666121642, time:1750767303.2273679s req_ids:[8] +DEBUG 06-24 20:15:03 [manager.py:391] +ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:203.43852043151855ms total_cost_time:203.48000526428223ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8418 prompt_cache_len:5151 prompt_cache_ratio:0.6119030648610121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 +DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10825324058532715 s +INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s +DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=290852715701895200239015688235404418295, time:1750767303.4411628s req_ids:[8] +DEBUG 06-24 20:15:03 [manager.py:391] +ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:203.83024215698242ms total_cost_time:203.8707733154297ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:8419 prompt_cache_len:5151 prompt_cache_ratio:0.6118303836560162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 +DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10701465606689453 s +INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.1088418960571289 s +DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=157217911038384463259229455355640947255, time:1750767303.6496248s req_ids:[8] +DEBUG 06-24 20:15:03 [manager.py:391] +ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:204.90765571594238ms total_cost_time:204.95033264160156ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8420 prompt_cache_len:5151 prompt_cache_ratio:0.6117577197149644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 +DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10895085334777832 s +INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.11089587211608887 s +DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=319585823200911135751075547187108423502, time:1750767303.860282s req_ids:[8] +DEBUG 06-24 20:15:03 [manager.py:391] +ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:206.90417289733887ms total_cost_time:206.94947242736816ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8421 prompt_cache_len:5151 prompt_cache_ratio:0.6116850730317065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 +DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.10723400115966797 s +INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.10927343368530273 s +DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=297867023668335867617050734588457361992, time:1750767304.0747416s req_ids:[8] +DEBUG 06-24 20:15:04 [manager.py:391] +ERROR 06-24 20:15:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:205.90710639953613ms total_cost_time:205.9495449066162ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8422 prompt_cache_len:5151 prompt_cache_ratio:0.611612443600095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 +DEBUG 06-24 20:15:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.10744118690490723 s +INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.10948848724365234 s +DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=224037054886901283131742567933337772403, time:1750767304.284884s req_ids:[8] +DEBUG 06-24 20:15:04 [manager.py:391] +ERROR 06-24 20:15:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 first_token_cost:206.47096633911133ms total_cost_time:206.5141201019287ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8423 prompt_cache_len:5151 prompt_cache_ratio:0.6115398314139855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 +DEBUG 06-24 20:15:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.31050705909729004 s +INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.31285834312438965 s +DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=250127663779496055721299875716886669029, time:1750767304.707225s req_ids:[8] +DEBUG 06-24 20:15:04 [manager.py:391] +ERROR 06-24 20:15:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 first_token_cost:423.7239360809326ms total_cost_time:423.7651824951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8424 prompt_cache_len:5151 prompt_cache_ratio:0.6114672364672364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 +DEBUG 06-24 20:15:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.10828733444213867 s +INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.1102590560913086 s +DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=328532782729164800199216219570759092521, time:1750767304.9299095s req_ids:[8] +DEBUG 06-24 20:15:04 [manager.py:391] +ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 first_token_cost:211.38882637023926ms total_cost_time:211.43341064453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8425 prompt_cache_len:5151 prompt_cache_ratio:0.6113946587537092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 +DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10788679122924805 s +INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.11036539077758789 s +DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=317426667700253739143720540712034569793, time:1750767305.1479073s req_ids:[8] +DEBUG 06-24 20:15:05 [manager.py:391] +ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:211.44533157348633ms total_cost_time:211.4884853363037ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8426 prompt_cache_len:5151 prompt_cache_ratio:0.611322098267268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 +DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10812115669250488 s +INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.11005473136901855 s +DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=208698387415990703524925066827822658945, time:1750767305.360969s req_ids:[8] +DEBUG 06-24 20:15:05 [manager.py:391] +ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:206.81405067443848ms total_cost_time:206.85625076293945ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8427 prompt_cache_len:5151 prompt_cache_ratio:0.61124955500178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 +DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10798907279968262 s +INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.1099851131439209 s +DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=299413205335424934533331883544922609080, time:1750767305.5721238s req_ids:[8] +DEBUG 06-24 20:15:05 [manager.py:391] +ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:200.47807693481445ms total_cost_time:200.52433013916016ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8428 prompt_cache_len:5151 prompt_cache_ratio:0.6111770289511154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 +DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.11088323593139648 s +INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.1128392219543457 s +DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=213006831136104534703128364769423283324, time:1750767305.780454s req_ids:[8] +DEBUG 06-24 20:15:05 [manager.py:391] +ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:202.5439739227295ms total_cost_time:202.62718200683594ms,out_token_counter:1 mean_per_token_cost_time: 0.08320808410644531ms prompt_token_num:8429 prompt_cache_len:5151 prompt_cache_ratio:0.611104520109147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 +DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10747718811035156 s +INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.10953688621520996 s +DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=150703326799795933127368813083694868111, time:1750767305.9936242s req_ids:[8] +DEBUG 06-24 20:15:05 [manager.py:391] +ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:362.5922203063965ms total_cost_time:362.6365661621094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8430 prompt_cache_len:5151 prompt_cache_ratio:0.6110320284697509 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 +DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.10839605331420898 s +INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11036109924316406 s +DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=295351742298487985870258412058856895801, time:1750767306.3593252s req_ids:[8] +DEBUG 06-24 20:15:06 [manager.py:391] +ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:205.20973205566406ms total_cost_time:205.25431632995605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8431 prompt_cache_len:5151 prompt_cache_ratio:0.6109595540268058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 +DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.1082000732421875 s +INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11009383201599121 s +DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=187028261677400707019438634439829499872, time:1750767306.5734897s req_ids:[8] +DEBUG 06-24 20:15:06 [manager.py:391] +ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:203.03702354431152ms total_cost_time:203.08160781860352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8432 prompt_cache_len:5151 prompt_cache_ratio:0.6108870967741935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 +DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.10884976387023926 s +INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075949668884277 s +DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=16977108093388773543187093875157760379, time:1750767306.7825294s req_ids:[8] +DEBUG 06-24 20:15:06 [manager.py:391] +ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:205.9328556060791ms total_cost_time:205.97553253173828ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8433 prompt_cache_len:5151 prompt_cache_ratio:0.6108146567057986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 +DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.10910415649414062 s +INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11111116409301758 s +DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=35186303771664444843385352262013707111, time:1750767306.9942148s req_ids:[8] +DEBUG 06-24 20:15:06 [manager.py:391] +ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:209.19322967529297ms total_cost_time:209.23781394958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8434 prompt_cache_len:5151 prompt_cache_ratio:0.6107422338155086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 +DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s +INFO 06-24 20:15:07 [manager.py:68] detokenization recv req id 8 cost time 0.11007118225097656 s +DEBUG 06-24 20:15:07 [manager.py:391] Prefill Batch: batch_id=212458558143022334187553734603168329978, time:1750767307.208388s req_ids:[8] +DEBUG 06-24 20:15:07 [manager.py:391] +DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:206.6512107849121ms total_cost_time:206.6943645477295ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8435 prompt_cache_len:5151 prompt_cache_ratio:0.610669828097214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 +DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10773158073425293 s +INFO 06-24 20:15:07 [manager.py:68] detokenization recv req id 8 cost time 0.10949325561523438 s +DEBUG 06-24 20:15:07 [manager.py:391] Prefill Batch: batch_id=311047565757583755063146628048837157573, time:1750767307.4208276s req_ids:[8] +DEBUG 06-24 20:15:07 [manager.py:391] +ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:202.99220085144043ms total_cost_time:203.0353546142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8436 prompt_cache_len:5151 prompt_cache_ratio:0.6105974395448079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 +DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10765337944030762 s +INFO 06-24 20:15:07 [manager.py:68] detokenization recv req id 8 cost time 0.11033344268798828 s +DEBUG 06-24 20:15:07 [manager.py:391] Prefill Batch: batch_id=70525317052197544717407791725934589966, time:1750767307.6295514s req_ids:[8] +DEBUG 06-24 20:15:07 [manager.py:391] +ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:373.9476203918457ms total_cost_time:373.9917278289795ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8437 prompt_cache_len:5151 prompt_cache_ratio:0.6105250681521868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 +DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10785627365112305 s +INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.109893798828125 s +DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=295426365423609987860349739567250163495, time:1750767308.0050988s req_ids:[8] +DEBUG 06-24 20:15:08 [manager.py:391] +ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:196.03323936462402ms total_cost_time:196.07806205749512ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8438 prompt_cache_len:5151 prompt_cache_ratio:0.6104527139132496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 +DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10577726364135742 s +INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.10772323608398438 s +DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=130595018932769216662651260710651922661, time:1750767308.2106023s req_ids:[8] +DEBUG 06-24 20:15:08 [manager.py:391] +ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:205.98983764648438ms total_cost_time:206.03370666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8439 prompt_cache_len:5151 prompt_cache_ratio:0.6103803768218983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 +DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10849189758300781 s +INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.11099791526794434 s +DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=160734116978551564000830878190382787404, time:1750767308.423383s req_ids:[8] +DEBUG 06-24 20:15:08 [manager.py:391] +ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:207.83042907714844ms total_cost_time:207.87334442138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8440 prompt_cache_len:5151 prompt_cache_ratio:0.6103080568720379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 +DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:08 [batch.py:51] router release req id 8 +INFO 06-24 20:15:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10791397094726562 s +INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s +DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=316873265787230008035939602988657140647, time:1750767308.6370327s req_ids:[8] +DEBUG 06-24 20:15:08 [manager.py:391] +ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:210.27660369873047ms total_cost_time:210.31975746154785ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8441 prompt_cache_len:5151 prompt_cache_ratio:0.6102357540575761 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 +DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10810351371765137 s +INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.1099402904510498 s +DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=257433987272126904175471229630861215682, time:1750767308.8514311s req_ids:[8] +DEBUG 06-24 20:15:08 [manager.py:391] +DEBUG 06-24 20:15:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 35849.960 tokens/s +DEBUG 06-24 20:15:08 [stats.py:37] Avg prompt tokens throughput: 35841.448 tokens/s +DEBUG 06-24 20:15:08 [stats.py:37] Avg generate tokens throughput: 8.512 tokens/s +ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:203.66501808166504ms total_cost_time:203.70817184448242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8442 prompt_cache_len:5151 prompt_cache_ratio:0.6101634683724236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 +DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10793185234069824 s +INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.10981631278991699 s +DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=170949480989550182365094535809482750049, time:1750767309.0619617s req_ids:[8] +DEBUG 06-24 20:15:09 [manager.py:391] +ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:206.3138484954834ms total_cost_time:206.3581943511963ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8443 prompt_cache_len:5151 prompt_cache_ratio:0.6100911998104939 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 +DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.1084451675415039 s +INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.11036300659179688 s +DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=195792750900178838713643202427079486527, time:1750767309.2735946s req_ids:[8] +DEBUG 06-24 20:15:09 [manager.py:391] +ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:211.24911308288574ms total_cost_time:211.29560470581055ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8444 prompt_cache_len:5151 prompt_cache_ratio:0.6100189483657035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 +DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10817909240722656 s +INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.11081838607788086 s +DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=15353624148820505063541424233944416766, time:1750767309.4869719s req_ids:[8] +DEBUG 06-24 20:15:09 [manager.py:391] +ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:200.29950141906738ms total_cost_time:200.34408569335938ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8445 prompt_cache_len:5151 prompt_cache_ratio:0.6099467140319715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 +DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10745525360107422 s +INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.10932540893554688 s +DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=320950916089381884813448321355557982584, time:1750767309.6973982s req_ids:[8] +DEBUG 06-24 20:15:09 [manager.py:391] +ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:203.2158374786377ms total_cost_time:203.25851440429688ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8446 prompt_cache_len:5151 prompt_cache_ratio:0.6098744968032205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 +DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10837554931640625 s +INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.11045408248901367 s +DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=96690897178817779332938651548598764994, time:1750767309.9054024s req_ids:[8] +DEBUG 06-24 20:15:09 [manager.py:391] +ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:202.5461196899414ms total_cost_time:202.5899887084961ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8447 prompt_cache_len:5151 prompt_cache_ratio:0.6098022966733752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 +DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.11004924774169922 s +INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11198139190673828 s +DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=302327857834520467008844304676849488523, time:1750767310.1111403s req_ids:[8] +DEBUG 06-24 20:15:10 [manager.py:391] +ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:203.76110076904297ms total_cost_time:203.8249969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:8448 prompt_cache_len:5151 prompt_cache_ratio:0.6097301136363636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 +DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.10928583145141602 s +INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11123204231262207 s +DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=143226974034097371617636099111396372073, time:1750767310.3229887s req_ids:[8] +DEBUG 06-24 20:15:10 [manager.py:391] +ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:206.27689361572266ms total_cost_time:206.32219314575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8449 prompt_cache_len:5151 prompt_cache_ratio:0.6096579476861167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 +DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s +INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11018133163452148 s +DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=271057018768547075267753270214297248275, time:1750767310.537294s req_ids:[8] +DEBUG 06-24 20:15:10 [manager.py:391] +ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:209.75494384765625ms total_cost_time:209.79952812194824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8450 prompt_cache_len:5151 prompt_cache_ratio:0.609585798816568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 +DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.10807132720947266 s +INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11001229286193848 s +DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=152788734019961385649920779463865574522, time:1750767310.749922s req_ids:[8] +DEBUG 06-24 20:15:10 [manager.py:391] +ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:202.94928550720215ms total_cost_time:202.99458503723145ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8451 prompt_cache_len:5151 prompt_cache_ratio:0.6095136670216542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 +DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.1072092056274414 s +INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.10906267166137695 s +DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=54521800750868192945465877837284109421, time:1750767310.9676654s req_ids:[8] +DEBUG 06-24 20:15:10 [manager.py:391] +ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:374.6373653411865ms total_cost_time:374.6829032897949ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8452 prompt_cache_len:5151 prompt_cache_ratio:0.6094415522953147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 +DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10817241668701172 s +INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.11021685600280762 s +DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=155400084622596006515751292754945294661, time:1750767311.3379538s req_ids:[8] +DEBUG 06-24 20:15:11 [manager.py:391] +ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:203.78875732421875ms total_cost_time:203.83262634277344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8453 prompt_cache_len:5151 prompt_cache_ratio:0.6093694546314917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 +DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10749006271362305 s +INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.10959362983703613 s +DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=246866300748825310386851339264167772556, time:1750767311.5514486s req_ids:[8] +DEBUG 06-24 20:15:11 [manager.py:391] +ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:207.40628242492676ms total_cost_time:207.45015144348145ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8454 prompt_cache_len:5151 prompt_cache_ratio:0.6092973740241305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 +DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10779953002929688 s +INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.10988879203796387 s +DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=90405470861237417620041826766799821619, time:1750767311.7645261s req_ids:[8] +DEBUG 06-24 20:15:11 [manager.py:391] +ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:206.4671516418457ms total_cost_time:206.50982856750488ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8455 prompt_cache_len:5151 prompt_cache_ratio:0.6092253104671792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 +DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10887336730957031 s +INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.1108405590057373 s +DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=92521648342276699864805984054588147693, time:1750767311.978629s req_ids:[8] +DEBUG 06-24 20:15:11 [manager.py:391] +ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:209.03944969177246ms total_cost_time:209.08427238464355ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8456 prompt_cache_len:5151 prompt_cache_ratio:0.6091532639545885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 +DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.10832571983337402 s +INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.11016154289245605 s +DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=306821650775601618321656819602050241520, time:1750767312.1932003s req_ids:[8] +DEBUG 06-24 20:15:12 [manager.py:391] +ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:206.77709579467773ms total_cost_time:206.82311058044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8457 prompt_cache_len:5151 prompt_cache_ratio:0.6090812344803122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 +DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.10801172256469727 s +INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.10984253883361816 s +DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=58619328649905711436624774804867760944, time:1750767312.4106028s req_ids:[8] +DEBUG 06-24 20:15:12 [manager.py:391] +ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:213.0730152130127ms total_cost_time:213.11664581298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8458 prompt_cache_len:5151 prompt_cache_ratio:0.6090092220383069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 +DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.1085515022277832 s +INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.11048746109008789 s +DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=165015383856099417288269464491488748107, time:1750767312.6243236s req_ids:[8] +DEBUG 06-24 20:15:12 [manager.py:391] +ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:207.442045211792ms total_cost_time:207.48639106750488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8459 prompt_cache_len:5151 prompt_cache_ratio:0.6089372266225322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 +DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.10841798782348633 s +INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.11050105094909668 s +DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=159767453362005887747708929850226556457, time:1750767312.8363004s req_ids:[8] +DEBUG 06-24 20:15:12 [manager.py:391] +DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:206.62951469421387ms total_cost_time:206.67433738708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8460 prompt_cache_len:5151 prompt_cache_ratio:0.6088652482269503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 +DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10735774040222168 s +INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.10933923721313477 s +DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=115532773430623435681575884018880805220, time:1750767313.04956s req_ids:[8] +DEBUG 06-24 20:15:13 [manager.py:391] +ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:207.60750770568848ms total_cost_time:207.65161514282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8461 prompt_cache_len:5151 prompt_cache_ratio:0.6087932868455266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 +DEBUG 06-24 20:15:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10860180854797363 s +INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.11053609848022461 s +DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=298894202637373361076223219613686520612, time:1750767313.2677681s req_ids:[8] +DEBUG 06-24 20:15:13 [manager.py:391] +ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:214.24007415771484ms total_cost_time:214.28322792053223ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8462 prompt_cache_len:5151 prompt_cache_ratio:0.6087213424722288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 +DEBUG 06-24 20:15:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10805249214172363 s +INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s +DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=58954914252949819266978352605403739981, time:1750767313.4887788s req_ids:[8] +DEBUG 06-24 20:15:13 [manager.py:391] +ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:212.87846565246582ms total_cost_time:212.9230499267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8463 prompt_cache_len:5151 prompt_cache_ratio:0.608649415101028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 +DEBUG 06-24 20:15:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10867047309875488 s +INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.11060690879821777 s +DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=60888327154630375669830410076507793305, time:1750767313.7013774s req_ids:[8] +DEBUG 06-24 20:15:13 [manager.py:391] +ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:387.2199058532715ms total_cost_time:387.2649669647217ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8464 prompt_cache_len:5151 prompt_cache_ratio:0.6085775047258979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 +DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10889792442321777 s +INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.11089253425598145 s +DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=283953288730117449369057787490348485431, time:1750767314.090492s req_ids:[8] +DEBUG 06-24 20:15:14 [manager.py:391] +ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:208.47678184509277ms total_cost_time:208.5251808166504ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:8465 prompt_cache_len:5151 prompt_cache_ratio:0.6085056113408152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 +DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10806775093078613 s +INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.10997605323791504 s +DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=22071053498242958339452132597611902174, time:1750767314.3107696s req_ids:[8] +DEBUG 06-24 20:15:14 [manager.py:391] +ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:200.0107765197754ms total_cost_time:200.0570297241211ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8466 prompt_cache_len:5151 prompt_cache_ratio:0.608433734939759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 +DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10996365547180176 s +INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.11196446418762207 s +DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=173114695993577092812998530470225851035, time:1750767314.5127943s req_ids:[8] +DEBUG 06-24 20:15:14 [manager.py:391] +ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:206.21180534362793ms total_cost_time:206.25567436218262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8467 prompt_cache_len:5151 prompt_cache_ratio:0.608361875516712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 +DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10863852500915527 s +INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.11069989204406738 s +DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=280071428544312896133544943344611301108, time:1750767314.725252s req_ids:[8] +DEBUG 06-24 20:15:14 [manager.py:391] +ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:204.35142517089844ms total_cost_time:204.39529418945312ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8468 prompt_cache_len:5151 prompt_cache_ratio:0.608290033065659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 +DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10763955116271973 s +INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.10954809188842773 s +DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=86440286479720367570019129694858265234, time:1750767314.9408438s req_ids:[8] +DEBUG 06-24 20:15:14 [manager.py:391] +ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:207.33022689819336ms total_cost_time:207.37171173095703ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8469 prompt_cache_len:5151 prompt_cache_ratio:0.608218207580588 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 +DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10832095146179199 s +INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.1102285385131836 s +DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=52800548585344657027239445122208056954, time:1750767315.1515646s req_ids:[8] +DEBUG 06-24 20:15:15 [manager.py:391] +ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:169.42548751831055ms total_cost_time:169.46840286254883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8470 prompt_cache_len:5151 prompt_cache_ratio:0.60814639905549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 +DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10683631896972656 s +INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.10877180099487305 s +DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=261284322877345006314508110249412414775, time:1750767315.324305s req_ids:[8] +DEBUG 06-24 20:15:15 [manager.py:391] +ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:196.08306884765625ms total_cost_time:196.12622261047363ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8471 prompt_cache_len:5151 prompt_cache_ratio:0.6080746074843584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 +DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10768699645996094 s +INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.10961031913757324 s +DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=12269258894682624816136188255232647632, time:1750767315.5255272s req_ids:[8] +DEBUG 06-24 20:15:15 [manager.py:391] +ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:202.6808261871338ms total_cost_time:202.7263641357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8472 prompt_cache_len:5151 prompt_cache_ratio:0.6080028328611898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 +DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:15 [batch.py:51] router release req id 8 +DEBUG 06-24 20:15:15 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:15 [manager.py:283] +DEBUG 06-24 20:15:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:15 [manager.py:284] +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10805964469909668 s +INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.11002826690673828 s +DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=165807093125889596731618697130676111141, time:1750767315.7361126s req_ids:[8] +DEBUG 06-24 20:15:15 [manager.py:391] +ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:207.31806755065918ms total_cost_time:207.36241340637207ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8473 prompt_cache_len:5151 prompt_cache_ratio:0.6079310751799835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 +DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10732078552246094 s +INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.10940051078796387 s +DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=2803494314530524709596668655958977477, time:1750767315.9491577s req_ids:[8] +DEBUG 06-24 20:15:15 [manager.py:391] +ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:203.8130760192871ms total_cost_time:203.8586139678955ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8474 prompt_cache_len:5151 prompt_cache_ratio:0.6078593344347416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 +DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.10854220390319824 s +INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.11040973663330078 s +DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=91036598762403695309762399747701284438, time:1750767316.1576025s req_ids:[8] +DEBUG 06-24 20:15:16 [manager.py:391] +ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:220.9019660949707ms total_cost_time:220.94488143920898ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8475 prompt_cache_len:5151 prompt_cache_ratio:0.6077876106194691 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 +DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.3099963665008545 s +INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.31188297271728516 s +DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=210730471232504555605085140701265999772, time:1750767316.5884387s req_ids:[8] +DEBUG 06-24 20:15:16 [manager.py:391] +ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:386.704683303833ms total_cost_time:386.7485523223877ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8476 prompt_cache_len:5151 prompt_cache_ratio:0.6077159037281736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 +DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.10864734649658203 s +INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.11059951782226562 s +DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=321442465610725303638229306882563553601, time:1750767316.773562s req_ids:[8] +DEBUG 06-24 20:15:16 [manager.py:391] +ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:194.67926025390625ms total_cost_time:194.72503662109375ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8477 prompt_cache_len:5151 prompt_cache_ratio:0.6076442137548661 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 +DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:16 [batch.py:51] router release req id 8 +INFO 06-24 20:15:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.10824704170227051 s +INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.11017656326293945 s +DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=236176162028475234781851806680295783278, time:1750767316.9777544s req_ids:[8] +DEBUG 06-24 20:15:16 [manager.py:391] +ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:204.88929748535156ms total_cost_time:204.93340492248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8478 prompt_cache_len:5151 prompt_cache_ratio:0.6075725406935598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 +DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.10919857025146484 s +INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11129307746887207 s +DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=1125608822110276401943498766983581029, time:1750767317.1898224s req_ids:[8] +DEBUG 06-24 20:15:17 [manager.py:391] +ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:208.64415168762207ms total_cost_time:208.68802070617676ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8479 prompt_cache_len:5151 prompt_cache_ratio:0.607500884538271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 +DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.10887384414672852 s +INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11079645156860352 s +DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=303643803246148690888786967502782939238, time:1750767317.4039333s req_ids:[8] +DEBUG 06-24 20:15:17 [manager.py:391] +ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:205.76047897338867ms total_cost_time:205.86299896240234ms,out_token_counter:1 mean_per_token_cost_time: 0.10251998901367188ms prompt_token_num:8480 prompt_cache_len:5151 prompt_cache_ratio:0.6074292452830189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 +DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.10893440246582031 s +INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s +DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=157943569294492984502555634239809805970, time:1750767317.6145113s req_ids:[8] +DEBUG 06-24 20:15:17 [manager.py:391] +ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:203.6266326904297ms total_cost_time:203.67074012756348ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8481 prompt_cache_len:5151 prompt_cache_ratio:0.6073576229218253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 +DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.1080772876739502 s +INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11020040512084961 s +DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=173741878135272598941561213663270583986, time:1750767317.8238358s req_ids:[8] +DEBUG 06-24 20:15:17 [manager.py:391] +ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:205.07192611694336ms total_cost_time:205.13176918029785ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8482 prompt_cache_len:5151 prompt_cache_ratio:0.6072860174487149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 +DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.10827064514160156 s +INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.11030912399291992 s +DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=195946146804776235230595986216628919743, time:1750767318.037506s req_ids:[8] +DEBUG 06-24 20:15:18 [manager.py:391] +ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:204.3924331665039ms total_cost_time:204.4353485107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8483 prompt_cache_len:5151 prompt_cache_ratio:0.6072144288577155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 +DEBUG 06-24 20:15:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.10776805877685547 s +INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.10968446731567383 s +DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=154750650718031942259477710082548771845, time:1750767318.2587488s req_ids:[8] +DEBUG 06-24 20:15:18 [manager.py:391] +ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:220.81708908081055ms total_cost_time:220.86000442504883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8484 prompt_cache_len:5151 prompt_cache_ratio:0.6071428571428571 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 +DEBUG 06-24 20:15:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.10806560516357422 s +INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.10996723175048828 s +DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=77800032504309576078524225678141175589, time:1750767318.47809s req_ids:[8] +DEBUG 06-24 20:15:18 [manager.py:391] +DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:211.91668510437012ms total_cost_time:211.9598388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8485 prompt_cache_len:5151 prompt_cache_ratio:0.6070713022981733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 +DEBUG 06-24 20:15:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.3107593059539795 s +INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.3128194808959961 s +DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=151485347633670953105748647304515732091, time:1750767318.8961577s req_ids:[8] +DEBUG 06-24 20:15:18 [manager.py:391] +DEBUG 06-24 20:15:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 37086.855 tokens/s +DEBUG 06-24 20:15:18 [stats.py:37] Avg prompt tokens throughput: 37078.094 tokens/s +DEBUG 06-24 20:15:18 [stats.py:37] Avg generate tokens throughput: 8.761 tokens/s +ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:412.8682613372803ms total_cost_time:412.91356086730957ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8486 prompt_cache_len:5151 prompt_cache_ratio:0.6069997643176998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 +DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.11022090911865234 s +INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.11217188835144043 s +DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=239152782978784526261356649094679474939, time:1750767319.1107416s req_ids:[8] +DEBUG 06-24 20:15:19 [manager.py:391] +ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:216.81690216064453ms total_cost_time:216.8593406677246ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8487 prompt_cache_len:5151 prompt_cache_ratio:0.6069282431954754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 +DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10955166816711426 s +INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.1115579605102539 s +DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=208035959298632149078947995208968925946, time:1750767319.337627s req_ids:[8] +DEBUG 06-24 20:15:19 [manager.py:391] +ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:212.4018669128418ms total_cost_time:212.4459743499756ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8488 prompt_cache_len:5151 prompt_cache_ratio:0.6068567389255419 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 +DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10800457000732422 s +INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.10993337631225586 s +DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=128612469570573097013692548871673272703, time:1750767319.5492456s req_ids:[8] +DEBUG 06-24 20:15:19 [manager.py:391] +ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:204.49042320251465ms total_cost_time:204.53405380249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8489 prompt_cache_len:5151 prompt_cache_ratio:0.6067852515019437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 +DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10846066474914551 s +INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.11035776138305664 s +DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=294771058514122612498391887698161269515, time:1750767319.758234s req_ids:[8] +DEBUG 06-24 20:15:19 [manager.py:391] +ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:207.2005271911621ms total_cost_time:207.2453498840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8490 prompt_cache_len:5151 prompt_cache_ratio:0.606713780918728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 +DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10765337944030762 s +INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.1096642017364502 s +DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=5619122663642315377369401010211490363, time:1750767319.971431s req_ids:[8] +DEBUG 06-24 20:15:19 [manager.py:391] +DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:201.06220245361328ms total_cost_time:201.10535621643066ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8491 prompt_cache_len:5151 prompt_cache_ratio:0.6066423271699446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 +DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10716485977172852 s +INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10951375961303711 s +DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=139793673587263089691533404429428714574, time:1750767320.1853154s req_ids:[8] +DEBUG 06-24 20:15:20 [manager.py:391] +ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:209.6257209777832ms total_cost_time:209.6688747406006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8492 prompt_cache_len:5151 prompt_cache_ratio:0.6065708902496467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 +DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10730266571044922 s +INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10915446281433105 s +DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=70718289190808696025515795977135806550, time:1750767320.3997424s req_ids:[8] +DEBUG 06-24 20:15:20 [manager.py:391] +ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:209.64860916137695ms total_cost_time:209.69176292419434ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8493 prompt_cache_len:5151 prompt_cache_ratio:0.6064994701518898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 +DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10724711418151855 s +INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10913610458374023 s +DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=89085277759505408142025224176861574031, time:1750767320.6124532s req_ids:[8] +DEBUG 06-24 20:15:20 [manager.py:391] +ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:205.41763305664062ms total_cost_time:205.4603099822998ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8494 prompt_cache_len:5151 prompt_cache_ratio:0.6064280668707323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 +DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10781502723693848 s +INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10973834991455078 s +DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=92916704179941237488806598321225664813, time:1750767320.829385s req_ids:[8] +DEBUG 06-24 20:15:20 [manager.py:391] +ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:212.51416206359863ms total_cost_time:212.5563621520996ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8495 prompt_cache_len:5151 prompt_cache_ratio:0.6063566804002354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 +DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.10965752601623535 s +INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.11220622062683105 s +DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=140906510723481573319937088547607829840, time:1750767321.0414336s req_ids:[8] +DEBUG 06-24 20:15:21 [manager.py:391] +ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:207.91363716125488ms total_cost_time:207.95679092407227ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8496 prompt_cache_len:5151 prompt_cache_ratio:0.6062853107344632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 +DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.1080319881439209 s +INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.10970449447631836 s +DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=323068116461531204609375460164869341927, time:1750767321.2607837s req_ids:[8] +DEBUG 06-24 20:15:21 [manager.py:391] +ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:177.63423919677734ms total_cost_time:177.67596244812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8497 prompt_cache_len:5151 prompt_cache_ratio:0.6062139578674827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 +DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.10791468620300293 s +INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.10965108871459961 s +DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=119454969091182412363521423605531501278, time:1750767321.4376922s req_ids:[8] +DEBUG 06-24 20:15:21 [manager.py:391] +ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:162.48345375061035ms total_cost_time:162.52732276916504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8498 prompt_cache_len:5151 prompt_cache_ratio:0.6061426217933631 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 +DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.10746407508850098 s +INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.10956668853759766 s +DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=118945864247282320913323320385437838865, time:1750767321.6031396s req_ids:[8] +DEBUG 06-24 20:15:21 [manager.py:391] +ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:191.15352630615234ms total_cost_time:191.19763374328613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8499 prompt_cache_len:5151 prompt_cache_ratio:0.6060713025061772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 +DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.31008005142211914 s +INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.3126652240753174 s +DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=236196416598195270259149948309937087996, time:1750767322.0249398s req_ids:[8] +DEBUG 06-24 20:15:22 [manager.py:391] +ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:432.2967529296875ms total_cost_time:432.342529296875ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8500 prompt_cache_len:5151 prompt_cache_ratio:0.606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 +DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10975432395935059 s +INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.11237764358520508 s +DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=29553248500851598115511775970752351938, time:1750767322.2451496s req_ids:[8] +DEBUG 06-24 20:15:22 [manager.py:391] +ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:206.1011791229248ms total_cost_time:206.1452865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8501 prompt_cache_len:5151 prompt_cache_ratio:0.6059287142689095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 +DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10796380043029785 s +INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.10999441146850586 s +DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=331758296688782613664854051868210306741, time:1750767322.454536s req_ids:[8] +DEBUG 06-24 20:15:22 [manager.py:391] +ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:206.16436004638672ms total_cost_time:206.2070369720459ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8502 prompt_cache_len:5151 prompt_cache_ratio:0.6058574453069866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 +DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10791468620300293 s +INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.10997939109802246 s +DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=53853493672397416991690197629247613602, time:1750767322.6689768s req_ids:[8] +DEBUG 06-24 20:15:22 [manager.py:391] +ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:212.0647430419922ms total_cost_time:212.10932731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8503 prompt_cache_len:5151 prompt_cache_ratio:0.6057861931083147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 +DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10798788070678711 s +INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.10991144180297852 s +DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=312577998458600255496097158259796688924, time:1750767322.8920121s req_ids:[8] +DEBUG 06-24 20:15:22 [manager.py:391] +ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:212.1286392211914ms total_cost_time:212.1717929840088ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8504 prompt_cache_len:5151 prompt_cache_ratio:0.6057149576669802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 +DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10831618309020996 s +INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.11040163040161133 s +DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=217993710780494814174624110127196104236, time:1750767323.1065829s req_ids:[8] +DEBUG 06-24 20:15:23 [manager.py:391] +ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:207.81421661376953ms total_cost_time:207.85903930664062ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8505 prompt_cache_len:5151 prompt_cache_ratio:0.6056437389770724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:15:23 [statics_utils.py:24] mean first cost: 228.77406631596386 ms +INFO 06-24 20:15:23 [statics_utils.py:24] mean per token cost: 0.0794685119446591 ms +INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 +DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10848784446716309 s +INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s +DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=129247782875002465410019254433696615145, time:1750767323.3214767s req_ids:[8] +DEBUG 06-24 20:15:23 [manager.py:391] +ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:207.02171325683594ms total_cost_time:207.06629753112793ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8506 prompt_cache_len:5151 prompt_cache_ratio:0.6055725370326828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 +DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10709810256958008 s +INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.10911154747009277 s +DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=132656349958959940570247664895628701529, time:1750767323.5356083s req_ids:[8] +DEBUG 06-24 20:15:23 [manager.py:391] +ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:206.62689208984375ms total_cost_time:206.66956901550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8507 prompt_cache_len:5151 prompt_cache_ratio:0.6055013518279064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 +DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s +INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.10977053642272949 s +DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=209991409122001065423055592853578051718, time:1750767323.7486072s req_ids:[8] +DEBUG 06-24 20:15:23 [manager.py:391] +ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:205.74355125427246ms total_cost_time:205.78646659851074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8508 prompt_cache_len:5151 prompt_cache_ratio:0.6054301833568406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 +DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10754060745239258 s +INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.10933113098144531 s +DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=545429987252481740797419247378719823, time:1750767323.9614074s req_ids:[8] +DEBUG 06-24 20:15:23 [manager.py:391] +ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:204.6341896057129ms total_cost_time:204.67758178710938ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8509 prompt_cache_len:5151 prompt_cache_ratio:0.6053590316135856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 +DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10900712013244629 s +INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s +DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=16392692472748186276922570269306858353, time:1750767324.1703444s req_ids:[8] +DEBUG 06-24 20:15:24 [manager.py:391] +ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:206.49361610412598ms total_cost_time:206.53581619262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8510 prompt_cache_len:5151 prompt_cache_ratio:0.6052878965922445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 +DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10696744918823242 s +INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.10900473594665527 s +DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=264358132955926801498855851406260134625, time:1750767324.383713s req_ids:[8] +DEBUG 06-24 20:15:24 [manager.py:391] +ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:379.11510467529297ms total_cost_time:379.17256355285645ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:8511 prompt_cache_len:5151 prompt_cache_ratio:0.6052167782869228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 +DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10718250274658203 s +INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.10905051231384277 s +DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=307384835284001986686130560529391821686, time:1750767324.7678869s req_ids:[8] +DEBUG 06-24 20:15:24 [manager.py:391] +ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:201.60412788391113ms total_cost_time:201.64871215820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8512 prompt_cache_len:5151 prompt_cache_ratio:0.6051456766917294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 +DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10816192626953125 s +INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.11013555526733398 s +DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=157596596356067655189486930346008664597, time:1750767324.976057s req_ids:[8] +DEBUG 06-24 20:15:24 [manager.py:391] +ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:208.2669734954834ms total_cost_time:208.30869674682617ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8513 prompt_cache_len:5151 prompt_cache_ratio:0.6050745918007753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 +DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10870790481567383 s +INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.11059260368347168 s +DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=136807545754151353094143520203897769620, time:1750767325.1892252s req_ids:[8] +DEBUG 06-24 20:15:25 [manager.py:391] +ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:208.48727226257324ms total_cost_time:208.52947235107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8514 prompt_cache_len:5151 prompt_cache_ratio:0.6050035236081748 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 +DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10793948173522949 s +INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.10996174812316895 s +DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=190668272506028533420422347869940039755, time:1750767325.403062s req_ids:[8] +DEBUG 06-24 20:15:25 [manager.py:391] +ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:208.68444442749023ms total_cost_time:208.72902870178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8515 prompt_cache_len:5151 prompt_cache_ratio:0.6049324721080446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 +DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10694670677185059 s +INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.10886240005493164 s +DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=295735398739757235695089759779826457435, time:1750767325.6214058s req_ids:[8] +DEBUG 06-24 20:15:25 [manager.py:391] +ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:209.94949340820312ms total_cost_time:209.9921703338623ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8516 prompt_cache_len:5151 prompt_cache_ratio:0.6048614372945045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 +DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10792183876037598 s +INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.10997772216796875 s +DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=159856648931109268603093480541227155024, time:1750767325.8343954s req_ids:[8] +DEBUG 06-24 20:15:25 [manager.py:391] +ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:207.08608627319336ms total_cost_time:207.12947845458984ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8517 prompt_cache_len:5151 prompt_cache_ratio:0.6047904191616766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 +DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s +INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.11120939254760742 s +DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=153381652037389632006705761380732530862, time:1750767326.0464973s req_ids:[8] +DEBUG 06-24 20:15:26 [manager.py:391] +ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:211.93742752075195ms total_cost_time:211.98177337646484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8518 prompt_cache_len:5151 prompt_cache_ratio:0.6047194177036863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 +DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s +INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.11084365844726562 s +DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=52974734607087009590033185457552262859, time:1750767326.2623494s req_ids:[8] +DEBUG 06-24 20:15:26 [manager.py:391] +ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:210.05773544311523ms total_cost_time:210.10184288024902ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8519 prompt_cache_len:5151 prompt_cache_ratio:0.6046484329146613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 +DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.10731816291809082 s +INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.10934042930603027 s +DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=186729565521199244258777162153374024553, time:1750767326.4896312s req_ids:[8] +DEBUG 06-24 20:15:26 [manager.py:391] +ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:220.2146053314209ms total_cost_time:220.27254104614258ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:8520 prompt_cache_len:5151 prompt_cache_ratio:0.6045774647887324 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 +DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.1069338321685791 s +INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.10888218879699707 s +DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=230520383117236471779716580503064090288, time:1750767326.7091699s req_ids:[8] +DEBUG 06-24 20:15:26 [manager.py:391] +ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:370.4872131347656ms total_cost_time:370.530366897583ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8521 prompt_cache_len:5151 prompt_cache_ratio:0.6045065133200329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 +DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10833072662353516 s +INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.11028170585632324 s +DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=188583129675801176551374106725754953476, time:1750767327.0792832s req_ids:[8] +DEBUG 06-24 20:15:27 [manager.py:391] +DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:204.50687408447266ms total_cost_time:204.54764366149902ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:8522 prompt_cache_len:5151 prompt_cache_ratio:0.6044355785026989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 +DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10864448547363281 s +INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.11068391799926758 s +DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=21417598207230193205827627196295039304, time:1750767327.2929027s req_ids:[8] +DEBUG 06-24 20:15:27 [manager.py:391] +ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:208.86778831481934ms total_cost_time:208.91141891479492ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8523 prompt_cache_len:5151 prompt_cache_ratio:0.6043646603308694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 +DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10826826095581055 s +INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.11036849021911621 s +DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=204157378981048470601218725884134895874, time:1750767327.5084636s req_ids:[8] +DEBUG 06-24 20:15:27 [manager.py:391] +ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:209.5775604248047ms total_cost_time:209.62262153625488ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8524 prompt_cache_len:5151 prompt_cache_ratio:0.6042937587986861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 +DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10720229148864746 s +INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.10923910140991211 s +DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=196925882664769043045292876473712605708, time:1750767327.7219563s req_ids:[8] +DEBUG 06-24 20:15:27 [manager.py:391] +ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:212.20731735229492ms total_cost_time:212.2495174407959ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8525 prompt_cache_len:5151 prompt_cache_ratio:0.6042228739002933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 +DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10759973526000977 s +INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.10956144332885742 s +DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=333347743545925540421858354844586208734, time:1750767327.9414845s req_ids:[8] +DEBUG 06-24 20:15:27 [manager.py:391] +ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:209.57088470458984ms total_cost_time:209.61499214172363ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8526 prompt_cache_len:5151 prompt_cache_ratio:0.6041520056298382 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 +DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.10722780227661133 s +INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.10918569564819336 s +DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=173394053266153671737294083980024516414, time:1750767328.1564298s req_ids:[8] +DEBUG 06-24 20:15:28 [manager.py:391] +ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:206.79044723510742ms total_cost_time:206.8338394165039ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8527 prompt_cache_len:5151 prompt_cache_ratio:0.6040811539814707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 +DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.10726308822631836 s +INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.1092684268951416 s +DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=30303021994612302613101917295478603769, time:1750767328.369641s req_ids:[8] +DEBUG 06-24 20:15:28 [manager.py:391] +ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:207.13019371032715ms total_cost_time:207.17334747314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8528 prompt_cache_len:5151 prompt_cache_ratio:0.6040103189493433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 +DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.1081399917602539 s +INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.1101694107055664 s +DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=87859193108282997470079537069547101951, time:1750767328.5805612s req_ids:[8] +DEBUG 06-24 20:15:28 [manager.py:391] +ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:204.9391269683838ms total_cost_time:204.98085021972656ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8529 prompt_cache_len:5151 prompt_cache_ratio:0.6039395005276117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 +DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.3103761672973633 s +INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.31238365173339844 s +DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=192378186404137852813846886238068411766, time:1750767328.9998293s req_ids:[8] +DEBUG 06-24 20:15:28 [manager.py:391] +DEBUG 06-24 20:15:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 37060.934 tokens/s +DEBUG 06-24 20:15:29 [stats.py:37] Avg prompt tokens throughput: 37052.224 tokens/s +DEBUG 06-24 20:15:29 [stats.py:37] Avg generate tokens throughput: 8.709 tokens/s +ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:420.90392112731934ms total_cost_time:420.9468364715576ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8530 prompt_cache_len:5151 prompt_cache_ratio:0.6038686987104338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 +DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10927295684814453 s +INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.11128926277160645 s +DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=13630894034014213155189222552455811800, time:1750767329.2193623s req_ids:[8] +DEBUG 06-24 20:15:29 [manager.py:391] +ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:215.56591987609863ms total_cost_time:215.6083583831787ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8531 prompt_cache_len:5151 prompt_cache_ratio:0.6037979134919704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 +DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10687112808227539 s +INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.1089940071105957 s +DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=123939434798998087351274025107418030535, time:1750767329.4379287s req_ids:[8] +DEBUG 06-24 20:15:29 [manager.py:391] +ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:206.0568332672119ms total_cost_time:206.0995101928711ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8532 prompt_cache_len:5151 prompt_cache_ratio:0.6037271448663853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 +DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10761880874633789 s +INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.10955572128295898 s +DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=243310875721114746564180993787040486386, time:1750767329.6489604s req_ids:[8] +DEBUG 06-24 20:15:29 [manager.py:391] +ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:204.34260368347168ms total_cost_time:204.38599586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8533 prompt_cache_len:5151 prompt_cache_ratio:0.6036563928278449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 +DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10699176788330078 s +INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.1089179515838623 s +INFO 06-24 20:15:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=332983164824958923548316388721037697989, time:1750767329.867805s req_ids:[8] +DEBUG 06-24 20:15:29 [manager.py:391] +ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:212.85510063171387ms total_cost_time:212.89634704589844ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8534 prompt_cache_len:5151 prompt_cache_ratio:0.603585657370518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 +DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10807514190673828 s +INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.1099996566772461 s +DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=204690613152714575734141582294821453951, time:1750767330.0830824s req_ids:[8] +DEBUG 06-24 20:15:30 [manager.py:391] +ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:210.8631134033203ms total_cost_time:210.9203338623047ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:8535 prompt_cache_len:5151 prompt_cache_ratio:0.6035149384885764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 +DEBUG 06-24 20:15:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s +INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s +DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=264596307124406072009738350548654803927, time:1750767330.2997074s req_ids:[8] +DEBUG 06-24 20:15:30 [manager.py:391] +ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:205.33299446105957ms total_cost_time:205.37614822387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8536 prompt_cache_len:5151 prompt_cache_ratio:0.6034442361761949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 +DEBUG 06-24 20:15:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10949897766113281 s +INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.11140990257263184 s +DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=7656796507038843357386589394699898810, time:1750767330.5143342s req_ids:[8] +DEBUG 06-24 20:15:30 [manager.py:391] +ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:377.9163360595703ms total_cost_time:378.05795669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.14162063598632812ms prompt_token_num:8537 prompt_cache_len:5151 prompt_cache_ratio:0.6033735504275507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 +DEBUG 06-24 20:15:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10921645164489746 s +INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.11147499084472656 s +DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=72463650261458093323445190087648720601, time:1750767330.8881588s req_ids:[8] +DEBUG 06-24 20:15:30 [manager.py:391] +ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:203.48834991455078ms total_cost_time:203.53174209594727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8538 prompt_cache_len:5151 prompt_cache_ratio:0.6033028812368236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 +DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10732603073120117 s +INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.10932707786560059 s +DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=294024890204422956578584808167364229279, time:1750767331.0994558s req_ids:[8] +DEBUG 06-24 20:15:31 [manager.py:391] +ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:202.6362419128418ms total_cost_time:202.67939567565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8539 prompt_cache_len:5151 prompt_cache_ratio:0.6032322285981965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 +DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.1081993579864502 s +INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.11025023460388184 s +DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=280495079524397457292074127742404385637, time:1750767331.3198676s req_ids:[8] +DEBUG 06-24 20:15:31 [manager.py:391] +ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:217.99111366271973ms total_cost_time:218.03593635559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8540 prompt_cache_len:5151 prompt_cache_ratio:0.6031615925058548 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 +DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10800290107727051 s +INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.10984945297241211 s +DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=96600855143595073290606236396454472021, time:1750767331.5370731s req_ids:[8] +DEBUG 06-24 20:15:31 [manager.py:391] +ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:209.59162712097168ms total_cost_time:209.63549613952637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8541 prompt_cache_len:5151 prompt_cache_ratio:0.6030909729539866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 +DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10858273506164551 s +INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.11062145233154297 s +DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=42693446784899497132833166372419739678, time:1750767331.7514043s req_ids:[8] +DEBUG 06-24 20:15:31 [manager.py:391] +ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:209.0129852294922ms total_cost_time:209.05709266662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8542 prompt_cache_len:5151 prompt_cache_ratio:0.603020369936783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 +DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10814070701599121 s +INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.11011576652526855 s +DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=215624332510421590446244100583823725723, time:1750767331.9646316s req_ids:[8] +DEBUG 06-24 20:15:31 [manager.py:391] +ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:210.8144760131836ms total_cost_time:210.85739135742188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8543 prompt_cache_len:5151 prompt_cache_ratio:0.6029497834484373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 +DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:32 [manager.py:224] router recive req id 8 cost time 0.3100240230560303 s +INFO 06-24 20:15:32 [manager.py:68] detokenization recv req id 8 cost time 0.3120403289794922 s +DEBUG 06-24 20:15:32 [manager.py:391] Prefill Batch: batch_id=46321486772111671361927148212392424674, time:1750767332.3850904s req_ids:[8] +DEBUG 06-24 20:15:32 [manager.py:391] +ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:417.1028137207031ms total_cost_time:417.1490669250488ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8544 prompt_cache_len:5151 prompt_cache_ratio:0.6028792134831461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 +DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:32 [manager.py:224] router recive req id 8 cost time 0.1090548038482666 s +INFO 06-24 20:15:32 [manager.py:68] detokenization recv req id 8 cost time 0.11089825630187988 s +DEBUG 06-24 20:15:32 [manager.py:391] Prefill Batch: batch_id=306033276156440412874305306476695504459, time:1750767332.6066794s req_ids:[8] +DEBUG 06-24 20:15:32 [manager.py:391] +ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:207.89813995361328ms total_cost_time:207.94153213500977ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8545 prompt_cache_len:5151 prompt_cache_ratio:0.6028086600351082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 +DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:32 [manager.py:224] router recive req id 8 cost time 0.1074361801147461 s +INFO 06-24 20:15:32 [manager.py:68] detokenization recv req id 8 cost time 0.10928082466125488 s +DEBUG 06-24 20:15:32 [manager.py:391] Prefill Batch: batch_id=20162927309447926211624322733699390612, time:1750767332.818703s req_ids:[8] +DEBUG 06-24 20:15:32 [manager.py:391] +ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:208.5130214691162ms total_cost_time:208.5549831390381ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8546 prompt_cache_len:5151 prompt_cache_ratio:0.6027381230985256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 +DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10717391967773438 s +INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.10886669158935547 s +DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=249745067147078360916428441156672066786, time:1750767333.030117s req_ids:[8] +DEBUG 06-24 20:15:33 [manager.py:391] +ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:206.0108184814453ms total_cost_time:206.0544490814209ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8547 prompt_cache_len:5151 prompt_cache_ratio:0.6026676026676027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 +DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10731363296508789 s +INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.10934734344482422 s +DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=11482566865862535043272814229549764421, time:1750767333.2413735s req_ids:[8] +DEBUG 06-24 20:15:33 [manager.py:391] +ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:201.12967491149902ms total_cost_time:201.1730670928955ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8548 prompt_cache_len:5151 prompt_cache_ratio:0.6025970987365465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 +DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:33 [batch.py:51] router release req id 8 +INFO 06-24 20:15:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10761499404907227 s +INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.10944437980651855 s +DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=132949898404242696109846845492448455548, time:1750767333.4491472s req_ids:[8] +DEBUG 06-24 20:15:33 [manager.py:391] +ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:205.80339431762695ms total_cost_time:205.84821701049805ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8549 prompt_cache_len:5151 prompt_cache_ratio:0.6025266112995672 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 +DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10796189308166504 s +INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.1101076602935791 s +DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=114858188229823419875342144611938573990, time:1750767333.6585143s req_ids:[8] +DEBUG 06-24 20:15:33 [manager.py:391] +ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:356.0502529144287ms total_cost_time:356.0957908630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8550 prompt_cache_len:5151 prompt_cache_ratio:0.6024561403508772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 +DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10864830017089844 s +INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.11056327819824219 s +DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=190316705489918916240635013301405709730, time:1750767334.0184188s req_ids:[8] +DEBUG 06-24 20:15:34 [manager.py:391] +ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:204.8182487487793ms total_cost_time:204.89215850830078ms,out_token_counter:1 mean_per_token_cost_time: 0.07390975952148438ms prompt_token_num:8551 prompt_cache_len:5151 prompt_cache_ratio:0.6023856858846919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 +DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10746121406555176 s +INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.10939455032348633 s +DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=44094763010231987726551674294648442065, time:1750767334.2314978s req_ids:[8] +DEBUG 06-24 20:15:34 [manager.py:391] +ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:206.2854766845703ms total_cost_time:206.3298225402832ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8552 prompt_cache_len:5151 prompt_cache_ratio:0.6023152478952292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 +DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10814142227172852 s +INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.11009716987609863 s +DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=238787319704647890257745665277201401324, time:1750767334.445922s req_ids:[8] +DEBUG 06-24 20:15:34 [manager.py:391] +ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:207.10134506225586ms total_cost_time:207.14521408081055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8553 prompt_cache_len:5151 prompt_cache_ratio:0.60224482637671 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 +DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10856938362121582 s +INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.11056971549987793 s +DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=230193925894755696956863277133683705673, time:1750767334.656881s req_ids:[8] +DEBUG 06-24 20:15:34 [manager.py:391] +ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:203.9942741394043ms total_cost_time:204.0390968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8554 prompt_cache_len:5151 prompt_cache_ratio:0.6021744213233575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 +DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10873913764953613 s +INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.1106414794921875 s +DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=75850696276421864765907797025711940246, time:1750767334.8785403s req_ids:[8] +DEBUG 06-24 20:15:34 [manager.py:391] +ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:220.08633613586426ms total_cost_time:220.13044357299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8555 prompt_cache_len:5151 prompt_cache_ratio:0.602104032729398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 +DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10839223861694336 s +INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.1103212833404541 s +DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=249491654645543725449353112108620191805, time:1750767335.0942698s req_ids:[8] +DEBUG 06-24 20:15:35 [manager.py:391] +ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:221.77767753601074ms total_cost_time:221.82106971740723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8556 prompt_cache_len:5151 prompt_cache_ratio:0.6020336605890603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 +DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10730099678039551 s +INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.10916018486022949 s +DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=143019255305219443752567633993119414304, time:1750767335.3205013s req_ids:[8] +DEBUG 06-24 20:15:35 [manager.py:391] +ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:369.60816383361816ms total_cost_time:369.65084075927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8557 prompt_cache_len:5151 prompt_cache_ratio:0.6019633048965759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 +DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10713911056518555 s +INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.10909199714660645 s +DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=242926426628296345919738222321851993142, time:1750767335.692064s req_ids:[8] +DEBUG 06-24 20:15:35 [manager.py:391] +ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:200.57272911071777ms total_cost_time:200.636625289917ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:8558 prompt_cache_len:5151 prompt_cache_ratio:0.601892965646179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 +DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10735130310058594 s +INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s +DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=297944609435722229786627409721087980954, time:1750767335.9061604s req_ids:[8] +DEBUG 06-24 20:15:35 [manager.py:391] +DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:189.9120807647705ms total_cost_time:189.9728775024414ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8559 prompt_cache_len:5151 prompt_cache_ratio:0.6018226428321065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 +DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10923027992248535 s +INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11106276512145996 s +DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=115144012956924170296528295967195715759, time:1750767336.0984185s req_ids:[8] +DEBUG 06-24 20:15:36 [manager.py:391] +ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:200.5321979522705ms total_cost_time:200.5774974822998ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8560 prompt_cache_len:5151 prompt_cache_ratio:0.6017523364485982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 +DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10886454582214355 s +INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11092972755432129 s +DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=320430095561094426093602546940470333338, time:1750767336.3137348s req_ids:[8] +DEBUG 06-24 20:15:36 [manager.py:391] +ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:214.25318717956543ms total_cost_time:214.2956256866455ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8561 prompt_cache_len:5151 prompt_cache_ratio:0.6016820464898961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 +DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10876703262329102 s +INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11062788963317871 s +DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=159549008973744229682345434415274231323, time:1750767336.5287848s req_ids:[8] +DEBUG 06-24 20:15:36 [manager.py:391] +ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:204.0703296661377ms total_cost_time:204.12921905517578ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:8562 prompt_cache_len:5151 prompt_cache_ratio:0.6016117729502453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 +DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10885500907897949 s +INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11072564125061035 s +DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=246482161281524109034759150350025120444, time:1750767336.7375507s req_ids:[8] +DEBUG 06-24 20:15:36 [manager.py:391] +ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:207.37338066101074ms total_cost_time:207.43584632873535ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:8563 prompt_cache_len:5151 prompt_cache_ratio:0.6015415158238935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 +DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:36 [batch.py:51] router release req id 8 +INFO 06-24 20:15:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10849761962890625 s +INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11033082008361816 s +DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=281939244502849994131118807085798143521, time:1750767336.952223s req_ids:[8] +DEBUG 06-24 20:15:36 [manager.py:391] +ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:207.00788497924805ms total_cost_time:207.05389976501465ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8564 prompt_cache_len:5151 prompt_cache_ratio:0.6014712751050911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 +DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.10722565650939941 s +INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.10927534103393555 s +DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=321476571598713713911984069092578972050, time:1750767337.1646285s req_ids:[8] +DEBUG 06-24 20:15:37 [manager.py:391] +ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:207.30900764465332ms total_cost_time:207.3690891265869ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8565 prompt_cache_len:5151 prompt_cache_ratio:0.601401050788091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 +DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.10891103744506836 s +INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.11089110374450684 s +DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=301093304575456325421500402413931644072, time:1750767337.3827157s req_ids:[8] +DEBUG 06-24 20:15:37 [manager.py:391] +ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:212.432861328125ms total_cost_time:212.4931812286377ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8566 prompt_cache_len:5151 prompt_cache_ratio:0.6013308428671492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 +DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.10717988014221191 s +INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.10914850234985352 s +DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=199186451515677796896414973713591242148, time:1750767337.5962982s req_ids:[8] +DEBUG 06-24 20:15:37 [manager.py:391] +ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:209.48457717895508ms total_cost_time:209.54275131225586ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:8567 prompt_cache_len:5151 prompt_cache_ratio:0.6012606513365238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 +DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.11002993583679199 s +INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.11197090148925781 s +DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=243929967683984173325726418544622365672, time:1750767337.8114219s req_ids:[8] +DEBUG 06-24 20:15:37 [manager.py:391] +ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:207.54337310791016ms total_cost_time:207.63206481933594ms,out_token_counter:1 mean_per_token_cost_time: 0.08869171142578125ms prompt_token_num:8568 prompt_cache_len:5151 prompt_cache_ratio:0.6011904761904762 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 +DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10728788375854492 s +INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.10916757583618164 s +DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=53004489249112506426435280025071636276, time:1750767338.0247083s req_ids:[8] +DEBUG 06-24 20:15:38 [manager.py:391] +ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:206.0554027557373ms total_cost_time:206.14099502563477ms,out_token_counter:1 mean_per_token_cost_time: 0.08559226989746094ms prompt_token_num:8569 prompt_cache_len:5151 prompt_cache_ratio:0.6011203174232699 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 +DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10764575004577637 s +INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.10944104194641113 s +DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=311776384963781421621041571655868517066, time:1750767338.2370148s req_ids:[8] +DEBUG 06-24 20:15:38 [manager.py:391] +ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:168.93625259399414ms total_cost_time:168.99585723876953ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:8570 prompt_cache_len:5151 prompt_cache_ratio:0.6010501750291716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 +DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s +INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.11005473136901855 s +DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=218580787118050884884352546886344104858, time:1750767338.4094918s req_ids:[8] +DEBUG 06-24 20:15:38 [manager.py:391] +ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:193.14956665039062ms total_cost_time:193.21012496948242ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:8571 prompt_cache_len:5151 prompt_cache_ratio:0.6009800490024502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 +DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s +INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.11043143272399902 s +DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=170231516057042826970898834271448194058, time:1750767338.6086905s req_ids:[8] +DEBUG 06-24 20:15:38 [manager.py:391] +ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:355.67188262939453ms total_cost_time:355.7169437408447ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8572 prompt_cache_len:5151 prompt_cache_ratio:0.6009099393373775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 +DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10757780075073242 s +INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.10950732231140137 s +DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=210511229865047557556495184117328315576, time:1750767338.9716892s req_ids:[8] +DEBUG 06-24 20:15:38 [manager.py:391] +ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:15:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 36572.764 tokens/s +DEBUG 06-24 20:15:39 [stats.py:37] Avg prompt tokens throughput: 36564.113 tokens/s +DEBUG 06-24 20:15:39 [stats.py:37] Avg generate tokens throughput: 8.650 tokens/s +INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:203.41086387634277ms total_cost_time:203.45425605773926ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8573 prompt_cache_len:5151 prompt_cache_ratio:0.6008398460282282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 +DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10854673385620117 s +INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.11056852340698242 s +DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=75056801216580952783662098251538391641, time:1750767339.1838658s req_ids:[8] +DEBUG 06-24 20:15:39 [manager.py:391] +ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:204.8492431640625ms total_cost_time:204.8947811126709ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8574 prompt_cache_len:5151 prompt_cache_ratio:0.6007697690692793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 +DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10810518264770508 s +INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.11005187034606934 s +DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=163924599713165710140080591958966886862, time:1750767339.393993s req_ids:[8] +DEBUG 06-24 20:15:39 [manager.py:391] +ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:173.9063262939453ms total_cost_time:173.96783828735352ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8575 prompt_cache_len:5151 prompt_cache_ratio:0.6006997084548105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 +DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10794901847839355 s +INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.10953044891357422 s +DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=89551969239288181716671943866777561815, time:1750767339.575326s req_ids:[8] +DEBUG 06-24 20:15:39 [manager.py:391] +ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:202.82888412475586ms total_cost_time:202.87275314331055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8576 prompt_cache_len:5151 prompt_cache_ratio:0.6006296641791045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 +DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10716629028320312 s +INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.10916686058044434 s +DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=28138723432161160509557309431187563911, time:1750767339.7832804s req_ids:[8] +DEBUG 06-24 20:15:39 [manager.py:391] +ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:204.3466567993164ms total_cost_time:204.3898105621338ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8577 prompt_cache_len:5151 prompt_cache_ratio:0.6005596362364463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 +DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10756325721740723 s +INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096506118774414 s +DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=104645560876462385456351091684670193820, time:1750767339.9949799s req_ids:[8] +DEBUG 06-24 20:15:39 [manager.py:391] +ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:206.44450187683105ms total_cost_time:206.48789405822754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8578 prompt_cache_len:5151 prompt_cache_ratio:0.6004896246211238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 +DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10828137397766113 s +INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11016154289245605 s +DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=40205074096559871779339078023710601417, time:1750767340.2076926s req_ids:[8] +DEBUG 06-24 20:15:40 [manager.py:391] +ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:208.19854736328125ms total_cost_time:208.25743675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:8579 prompt_cache_len:5151 prompt_cache_ratio:0.6004196293274274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 +DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:40 [batch.py:51] router release req id 8 +INFO 06-24 20:15:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10867428779602051 s +INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11059784889221191 s +DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=299444953691497921732522428330361829093, time:1750767340.421328s req_ids:[8] +DEBUG 06-24 20:15:40 [manager.py:391] +ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:205.75284957885742ms total_cost_time:205.7960033416748ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8580 prompt_cache_len:5151 prompt_cache_ratio:0.6003496503496504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 +DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10962677001953125 s +INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11159539222717285 s +DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=298113432649806000816871885751269445462, time:1750767340.6340387s req_ids:[8] +DEBUG 06-24 20:15:40 [manager.py:391] +ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:208.50086212158203ms total_cost_time:208.54473114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8581 prompt_cache_len:5151 prompt_cache_ratio:0.6002796876820883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 +DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10837984085083008 s +INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11042141914367676 s +DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=67606458072795178270782355267272470688, time:1750767340.8462453s req_ids:[8] +DEBUG 06-24 20:15:40 [manager.py:391] +ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:206.89797401428223ms total_cost_time:206.96020126342773ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:8582 prompt_cache_len:5151 prompt_cache_ratio:0.6002097413190398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 +DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s +INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.11038827896118164 s +DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=59716419677110937178594468400947965854, time:1750767341.058243s req_ids:[8] +DEBUG 06-24 20:15:41 [manager.py:391] +ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:204.41794395446777ms total_cost_time:204.46348190307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8583 prompt_cache_len:5151 prompt_cache_ratio:0.600139811254806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 +DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s +INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.10997295379638672 s +DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=219318013247674326003980485475133496555, time:1750767341.2689927s req_ids:[8] +DEBUG 06-24 20:15:41 [manager.py:391] +DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:371.25253677368164ms total_cost_time:371.29664421081543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8584 prompt_cache_len:5151 prompt_cache_ratio:0.6000698974836906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 +DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.10729312896728516 s +INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.10914826393127441 s +DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=20858375911226063548689145554477107170, time:1750767341.6436179s req_ids:[8] +DEBUG 06-24 20:15:41 [manager.py:391] +ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:202.15272903442383ms total_cost_time:202.21424102783203ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8585 prompt_cache_len:5151 prompt_cache_ratio:0.6 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 +DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.10736250877380371 s +INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.10925126075744629 s +DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=200731905809083425961225210310501200279, time:1750767341.8565917s req_ids:[8] +DEBUG 06-24 20:15:41 [manager.py:391] +ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:209.62905883789062ms total_cost_time:209.67388153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8586 prompt_cache_len:5151 prompt_cache_ratio:0.5999301187980434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 +DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.10831928253173828 s +INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11024999618530273 s +DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=27311638682989539438306748475180700865, time:1750767342.0713277s req_ids:[8] +DEBUG 06-24 20:15:42 [manager.py:391] +ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:204.44130897521973ms total_cost_time:204.4839859008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8587 prompt_cache_len:5151 prompt_cache_ratio:0.5998602538721323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 +DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.11011385917663574 s +INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11210441589355469 s +DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=329406751854207152957844446401031356306, time:1750767342.2797034s req_ids:[8] +DEBUG 06-24 20:15:42 [manager.py:391] +ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:204.72025871276855ms total_cost_time:204.76222038269043ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8588 prompt_cache_len:5151 prompt_cache_ratio:0.5997904052165812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 +DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.1084134578704834 s +INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11047482490539551 s +DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=103453187026908672081498915049816639244, time:1750767342.4918904s req_ids:[8] +DEBUG 06-24 20:15:42 [manager.py:391] +ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:206.23493194580078ms total_cost_time:206.27903938293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8589 prompt_cache_len:5151 prompt_cache_ratio:0.5997205728257073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 +DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.10736870765686035 s +INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.10936188697814941 s +DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=222679028378662759729419216246031457564, time:1750767342.704289s req_ids:[8] +DEBUG 06-24 20:15:42 [manager.py:391] +ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:209.7301483154297ms total_cost_time:209.77306365966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8590 prompt_cache_len:5151 prompt_cache_ratio:0.5996507566938301 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 +DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.10755157470703125 s +INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11011815071105957 s +DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=252183364096878671407651159214985288889, time:1750767342.9198287s req_ids:[8] +DEBUG 06-24 20:15:42 [manager.py:391] +ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:207.7035903930664ms total_cost_time:207.7462673187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8591 prompt_cache_len:5151 prompt_cache_ratio:0.5995809568152718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 +DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10781407356262207 s +INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.1098785400390625 s +DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=237381923391058047345873965357739994921, time:1750767343.135878s req_ids:[8] +DEBUG 06-24 20:15:43 [manager.py:391] +ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:209.63120460510254ms total_cost_time:209.67507362365723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8592 prompt_cache_len:5151 prompt_cache_ratio:0.5995111731843575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 +DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10786104202270508 s +INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.10969352722167969 s +DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=95811665106767276770797142424348274625, time:1750767343.3543055s req_ids:[8] +DEBUG 06-24 20:15:43 [manager.py:391] +ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:214.27607536315918ms total_cost_time:214.32065963745117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8593 prompt_cache_len:5151 prompt_cache_ratio:0.5994414057954148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 +DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10824370384216309 s +INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.11003923416137695 s +DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=164390461516707769514663475345394406834, time:1750767343.5694385s req_ids:[8] +DEBUG 06-24 20:15:43 [manager.py:391] +ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:206.56585693359375ms total_cost_time:206.61067962646484ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8594 prompt_cache_len:5151 prompt_cache_ratio:0.599371654642774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 +DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10872650146484375 s +INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.1109933853149414 s +DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=222259092305952009232631941802004750841, time:1750767343.7823122s req_ids:[8] +DEBUG 06-24 20:15:43 [manager.py:391] +ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:207.03792572021484ms total_cost_time:207.08227157592773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8595 prompt_cache_len:5151 prompt_cache_ratio:0.5993019197207679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 +DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.3097259998321533 s +INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.31168603897094727 s +DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=188805496688941522141198763281802615338, time:1750767344.1944442s req_ids:[8] +DEBUG 06-24 20:15:44 [manager.py:391] +ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:411.13734245300293ms total_cost_time:411.18359565734863ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8596 prompt_cache_len:5151 prompt_cache_ratio:0.599232201023732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 +DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.1083517074584961 s +INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.11099696159362793 s +DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=225682591460667085788851878430834158903, time:1750767344.4108748s req_ids:[8] +DEBUG 06-24 20:15:44 [manager.py:391] +ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:206.96783065795898ms total_cost_time:207.0138454437256ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8597 prompt_cache_len:5151 prompt_cache_ratio:0.5991624985460045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 +DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.10881209373474121 s +INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.11085844039916992 s +DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=196682584126845284413674532490968553575, time:1750767344.6248553s req_ids:[8] +DEBUG 06-24 20:15:44 [manager.py:391] +ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:207.03721046447754ms total_cost_time:207.08250999450684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8598 prompt_cache_len:5151 prompt_cache_ratio:0.599092812281926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 +DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.1081552505493164 s +INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.11017847061157227 s +DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=223858395401425842925905640274644645428, time:1750767344.8375568s req_ids:[8] +DEBUG 06-24 20:15:44 [manager.py:391] +ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:204.51116561889648ms total_cost_time:204.55336570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8599 prompt_cache_len:5151 prompt_cache_ratio:0.5990231422258402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 +DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10856413841247559 s +INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.1106114387512207 s +DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=20850219675375486767706871389917731487, time:1750767345.0451617s req_ids:[8] +DEBUG 06-24 20:15:45 [manager.py:391] +ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:202.56972312927246ms total_cost_time:202.61216163635254ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8600 prompt_cache_len:5151 prompt_cache_ratio:0.5989534883720931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 +DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10725831985473633 s +INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.1091611385345459 s +DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=293515760268833438742091879849543317162, time:1750767345.2556036s req_ids:[8] +DEBUG 06-24 20:15:45 [manager.py:391] +ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:204.83994483947754ms total_cost_time:204.88524436950684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8601 prompt_cache_len:5151 prompt_cache_ratio:0.5988838507150331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 +DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10742855072021484 s +INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.1093144416809082 s +DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=84380401665391759316985189299352539421, time:1750767345.4661472s req_ids:[8] +DEBUG 06-24 20:15:45 [manager.py:391] +ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:204.4520378112793ms total_cost_time:204.4963836669922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8602 prompt_cache_len:5151 prompt_cache_ratio:0.5988142292490118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 +DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10878443717956543 s +INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s +DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=79237542835125964022601807237201019938, time:1750767345.6788263s req_ids:[8] +DEBUG 06-24 20:15:45 [manager.py:391] +ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:205.54804801940918ms total_cost_time:205.59382438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8603 prompt_cache_len:5151 prompt_cache_ratio:0.5987446239683831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 +DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10827112197875977 s +INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.11027789115905762 s +DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=100013118033941836532123354560368255615, time:1750767345.8921201s req_ids:[8] +DEBUG 06-24 20:15:45 [manager.py:391] +ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:207.0302963256836ms total_cost_time:207.07416534423828ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8604 prompt_cache_len:5151 prompt_cache_ratio:0.5986750348675035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 +DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.10726284980773926 s +INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.10929536819458008 s +DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=325703733726895029902083958698920612783, time:1750767346.1054127s req_ids:[8] +DEBUG 06-24 20:15:46 [manager.py:391] +ERROR 06-24 20:15:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:202.17108726501465ms total_cost_time:202.193021774292ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:8605 prompt_cache_len:5151 prompt_cache_ratio:0.5986054619407322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 +DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.3094618320465088 s +INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.3114497661590576 s +DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=3890015597716843859054928432341133673, time:1750767346.5108368s req_ids:[8] +DEBUG 06-24 20:15:46 [manager.py:391] +ERROR 06-24 20:15:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 first_token_cost:406.827449798584ms total_cost_time:406.87060356140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8606 prompt_cache_len:5151 prompt_cache_ratio:0.5985359051824308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 +DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.10828638076782227 s +INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.11067986488342285 s +DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=192015898212516578439792107589247682627, time:1750767346.7272089s req_ids:[8] +DEBUG 06-24 20:15:46 [manager.py:391] +ERROR 06-24 20:15:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 first_token_cost:207.12661743164062ms total_cost_time:207.1692943572998ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8607 prompt_cache_len:5151 prompt_cache_ratio:0.5984663645869641 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 +DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:46 [batch.py:51] router release req id 8 +INFO 06-24 20:15:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.10808467864990234 s +INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.10943388938903809 s +DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=56221773896007466417818778916375672063, time:1750767346.941499s req_ids:[8] +DEBUG 06-24 20:15:46 [manager.py:391] +ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 first_token_cost:208.40144157409668ms total_cost_time:208.44459533691406ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8608 prompt_cache_len:5151 prompt_cache_ratio:0.5983968401486989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 +DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10941267013549805 s +INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.11085391044616699 s +DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=145506447045775515652484870123089572861, time:1750767347.1563153s req_ids:[8] +DEBUG 06-24 20:15:47 [manager.py:391] +ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:206.8309783935547ms total_cost_time:206.88486099243164ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:8609 prompt_cache_len:5151 prompt_cache_ratio:0.5983273318620049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 +DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10898399353027344 s +INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.11042118072509766 s +DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=147354888719752725993282252457687051201, time:1750767347.370712s req_ids:[8] +DEBUG 06-24 20:15:47 [manager.py:391] +ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:208.44554901123047ms total_cost_time:208.47272872924805ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8610 prompt_cache_len:5151 prompt_cache_ratio:0.5982578397212543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 +DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10700583457946777 s +INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.10828399658203125 s +INFO 06-24 20:15:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=98893103449333088770465065003625350011, time:1750767347.5875823s req_ids:[8] +DEBUG 06-24 20:15:47 [manager.py:391] +ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:209.64741706848145ms total_cost_time:209.67507362365723ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8611 prompt_cache_len:5151 prompt_cache_ratio:0.5981883637208222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 +DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10707592964172363 s +INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.10837316513061523 s +DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=134816064425607312544796702415860207762, time:1750767347.8028426s req_ids:[8] +DEBUG 06-24 20:15:47 [manager.py:391] +ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:209.52367782592773ms total_cost_time:209.55181121826172ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8612 prompt_cache_len:5151 prompt_cache_ratio:0.5981189038550859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 +DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.1069183349609375 s +INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10822081565856934 s +DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=127545821474840010215304932526415060315, time:1750767348.016962s req_ids:[8] +DEBUG 06-24 20:15:48 [manager.py:391] +ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:210.07442474365234ms total_cost_time:210.10351181030273ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:8613 prompt_cache_len:5151 prompt_cache_ratio:0.5980494601184256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 +DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.1067662239074707 s +INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10810685157775879 s +DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=76707537922744350734486545071765588201, time:1750767348.2327988s req_ids:[8] +DEBUG 06-24 20:15:48 [manager.py:391] +ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:209.5181941986084ms total_cost_time:209.54489707946777ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8614 prompt_cache_len:5151 prompt_cache_ratio:0.597980032505224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 +DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.10725879669189453 s +INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10860252380371094 s +DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=132889472464794341510608824653082723641, time:1750767348.4482346s req_ids:[8] +DEBUG 06-24 20:15:48 [manager.py:391] +ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:213.3018970489502ms total_cost_time:213.32907676696777ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8615 prompt_cache_len:5151 prompt_cache_ratio:0.5979106210098665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 +DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.10726690292358398 s +INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10848641395568848 s +DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=27353820628027354836925368631840326932, time:1750767348.664918s req_ids:[8] +DEBUG 06-24 20:15:48 [manager.py:391] +ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:214.43581581115723ms total_cost_time:214.49732780456543ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8616 prompt_cache_len:5151 prompt_cache_ratio:0.5978412256267409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 +DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s +INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10919308662414551 s +DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=80834130963619343026104698173639495931, time:1750767348.8791428s req_ids:[8] +DEBUG 06-24 20:15:48 [manager.py:391] +ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:203.58729362487793ms total_cost_time:203.6144733428955ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8617 prompt_cache_len:5151 prompt_cache_ratio:0.597771846350238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 +DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.10778212547302246 s +INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.10976290702819824 s +DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=327947773210663783207989291382117020807, time:1750767349.091376s req_ids:[8] +DEBUG 06-24 20:15:49 [manager.py:391] +DEBUG 06-24 20:15:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 38559.720 tokens/s +DEBUG 06-24 20:15:49 [stats.py:37] Avg prompt tokens throughput: 38550.850 tokens/s +DEBUG 06-24 20:15:49 [stats.py:37] Avg generate tokens throughput: 8.870 tokens/s +ERROR 06-24 20:15:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:212.01086044311523ms total_cost_time:212.0378017425537ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8618 prompt_cache_len:5151 prompt_cache_ratio:0.5977024831747505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 +DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.10765194892883301 s +INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.10959124565124512 s +DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=195110582542508504166370787372578000150, time:1750767349.3075764s req_ids:[8] +DEBUG 06-24 20:15:49 [manager.py:391] +ERROR 06-24 20:15:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 first_token_cost:211.98606491088867ms total_cost_time:212.04590797424316ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8619 prompt_cache_len:5151 prompt_cache_ratio:0.5976331360946746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 +DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.3115818500518799 s +INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.31351208686828613 s +DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=4675993680884649036898655705398425481, time:1750767349.7262797s req_ids:[8] +DEBUG 06-24 20:15:49 [manager.py:391] +ERROR 06-24 20:15:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 first_token_cost:415.8141613006592ms total_cost_time:415.84014892578125ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:8620 prompt_cache_len:5151 prompt_cache_ratio:0.5975638051044083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 +DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.1069791316986084 s +INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.10884737968444824 s +DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=336463105600932342479240478027388247716, time:1750767349.9503822s req_ids:[8] +DEBUG 06-24 20:15:49 [manager.py:391] +ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 first_token_cost:213.47546577453613ms total_cost_time:213.5019302368164ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8621 prompt_cache_len:5151 prompt_cache_ratio:0.5974944901983529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 +DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10730576515197754 s +INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.10967278480529785 s +DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=177511773517448391718539318962146872220, time:1750767350.1670706s req_ids:[8] +DEBUG 06-24 20:15:50 [manager.py:391] +ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:210.69049835205078ms total_cost_time:210.71720123291016ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8622 prompt_cache_len:5151 prompt_cache_ratio:0.5974251913709117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 +DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10719943046569824 s +INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.109100341796875 s +DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=210839971689128467243315078449690056794, time:1750767350.3833394s req_ids:[8] +DEBUG 06-24 20:15:50 [manager.py:391] +ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:211.01021766662598ms total_cost_time:211.03644371032715ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:8623 prompt_cache_len:5151 prompt_cache_ratio:0.5973559086164908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 +DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.1074674129486084 s +INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.10946178436279297 s +DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=35467724990460301039167846945245487855, time:1750767350.597482s req_ids:[8] +DEBUG 06-24 20:15:50 [manager.py:391] +ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:211.30824089050293ms total_cost_time:211.3358974456787ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8624 prompt_cache_len:5151 prompt_cache_ratio:0.5972866419294991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 +DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10677528381347656 s +INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.1085672378540039 s +DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=187635642155877287331304612064724676632, time:1750767350.8128176s req_ids:[8] +DEBUG 06-24 20:15:50 [manager.py:391] +ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:166.40019416809082ms total_cost_time:166.4261817932129ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:8625 prompt_cache_len:5151 prompt_cache_ratio:0.5972173913043478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 +DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10712671279907227 s +INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.1091756820678711 s +DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=145838419759000090318337819669025115810, time:1750767350.9809206s req_ids:[8] +DEBUG 06-24 20:15:50 [manager.py:391] +ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:204.6494483947754ms total_cost_time:204.67591285705566ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8626 prompt_cache_len:5151 prompt_cache_ratio:0.597148156735451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 +DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10735964775085449 s +INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.1092691421508789 s +DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=305902874484456029029440808145185695119, time:1750767351.1936886s req_ids:[8] +DEBUG 06-24 20:15:51 [manager.py:391] +DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:211.80319786071777ms total_cost_time:211.83109283447266ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8627 prompt_cache_len:5151 prompt_cache_ratio:0.597078938217225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 +DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10698461532592773 s +INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.10890388488769531 s +DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=80570532653570611367847552903431975869, time:1750767351.4103572s req_ids:[8] +DEBUG 06-24 20:15:51 [manager.py:391] +ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:211.49182319641113ms total_cost_time:211.51995658874512ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8628 prompt_cache_len:5151 prompt_cache_ratio:0.597009735744089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 +DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10724449157714844 s +INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s +DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=10089758097853426141610710946775075217, time:1750767351.6261542s req_ids:[8] +DEBUG 06-24 20:15:51 [manager.py:391] +ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:211.49396896362305ms total_cost_time:211.52114868164062ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8629 prompt_cache_len:5151 prompt_cache_ratio:0.5969405493104647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 +DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10691308975219727 s +INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.10881423950195312 s +DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=285404613080003683239576750062296706496, time:1750767351.8433146s req_ids:[8] +DEBUG 06-24 20:15:51 [manager.py:391] +ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:214.09273147583008ms total_cost_time:214.12229537963867ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:8630 prompt_cache_len:5151 prompt_cache_ratio:0.5968713789107764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 +DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10705399513244629 s +INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.1089177131652832 s +DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=61198116048990528694271310173769103177, time:1750767352.0603604s req_ids:[8] +DEBUG 06-24 20:15:52 [manager.py:391] +ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:377.07042694091797ms total_cost_time:377.09665298461914ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:8631 prompt_cache_len:5151 prompt_cache_ratio:0.5968022245394509 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 +DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10684728622436523 s +INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.10882735252380371 s +DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=89910616842636114760477870414949685048, time:1750767352.4374418s req_ids:[8] +DEBUG 06-24 20:15:52 [manager.py:391] +ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:205.5208683013916ms total_cost_time:205.54852485656738ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8632 prompt_cache_len:5151 prompt_cache_ratio:0.5967330861909175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 +DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10734868049621582 s +INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.1092081069946289 s +DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=266617003662529885924990680195403131116, time:1750767352.6528072s req_ids:[8] +DEBUG 06-24 20:15:52 [manager.py:391] +ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:212.4636173248291ms total_cost_time:212.49079704284668ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8633 prompt_cache_len:5151 prompt_cache_ratio:0.5966639638596085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 +DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10766434669494629 s +INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s +DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=236414599979207992362211421766389776763, time:1750767352.8662076s req_ids:[8] +DEBUG 06-24 20:15:52 [manager.py:391] +ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:211.75885200500488ms total_cost_time:211.78698539733887ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8634 prompt_cache_len:5151 prompt_cache_ratio:0.5965948575399583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 +DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10729670524597168 s +INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s +DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=155870433669323759513484788182705282360, time:1750767353.0824475s req_ids:[8] +DEBUG 06-24 20:15:53 [manager.py:391] +ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:209.57326889038086ms total_cost_time:209.59877967834473ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:8635 prompt_cache_len:5151 prompt_cache_ratio:0.5965257672264042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 +INFO 06-24 20:15:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:15:53 [statics_utils.py:24] mean first cost: 228.65261757368168 ms +INFO 06-24 20:15:53 [statics_utils.py:24] mean per token cost: 0.07839269675614903 ms +DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10674834251403809 s +INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10862064361572266 s +DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=304191109677564794397697662014726846233, time:1750767353.2970173s req_ids:[8] +DEBUG 06-24 20:15:53 [manager.py:391] +ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:211.03453636169434ms total_cost_time:211.06410026550293ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:8636 prompt_cache_len:5151 prompt_cache_ratio:0.5964566929133859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 +DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10773897171020508 s +INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.1096646785736084 s +DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=310087298978583217011911034363535990094, time:1750767353.5119967s req_ids:[8] +DEBUG 06-24 20:15:53 [manager.py:391] +ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:210.88242530822754ms total_cost_time:210.91032028198242ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8637 prompt_cache_len:5151 prompt_cache_ratio:0.5963876345953456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 +DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10732030868530273 s +INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10922122001647949 s +DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=155887026690505235392210963667584678908, time:1750767353.7277205s req_ids:[8] +DEBUG 06-24 20:15:53 [manager.py:391] +ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:211.92693710327148ms total_cost_time:211.95435523986816ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8638 prompt_cache_len:5151 prompt_cache_ratio:0.5963185922667285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 +DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10707974433898926 s +INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10883784294128418 s +DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=141046997045262487931318167717964286773, time:1750767353.943042s req_ids:[8] +DEBUG 06-24 20:15:53 [manager.py:391] +ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:172.56975173950195ms total_cost_time:172.59693145751953ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8639 prompt_cache_len:5151 prompt_cache_ratio:0.5962495659219818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 +DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.10758209228515625 s +INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.10924577713012695 s +DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=126523020401569277934202558477030988759, time:1750767354.1188173s req_ids:[8] +DEBUG 06-24 20:15:54 [manager.py:391] +ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:168.1206226348877ms total_cost_time:168.14708709716797ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8640 prompt_cache_len:5151 prompt_cache_ratio:0.5961805555555556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 +DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.1068108081817627 s +INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.10861063003540039 s +DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=325895951143817156589602412438011297519, time:1750767354.2898452s req_ids:[8] +DEBUG 06-24 20:15:54 [manager.py:391] +ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:354.04253005981445ms total_cost_time:354.0680408477783ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:8641 prompt_cache_len:5151 prompt_cache_ratio:0.5961115611619026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 +DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:54 [batch.py:51] router release req id 8 +INFO 06-24 20:15:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.10723423957824707 s +INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.1091611385345459 s +DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=335950042744354890384831743976553383240, time:1750767354.647487s req_ids:[8] +DEBUG 06-24 20:15:54 [manager.py:391] +ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:203.71031761169434ms total_cost_time:203.7370204925537ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8642 prompt_cache_len:5151 prompt_cache_ratio:0.5960425827354779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 +DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.10797977447509766 s +INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.10989880561828613 s +DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=75479750132709722285116054173934217801, time:1750767354.85957s req_ids:[8] +DEBUG 06-24 20:15:54 [manager.py:391] +ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:209.81478691101074ms total_cost_time:209.8410129547119ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:8643 prompt_cache_len:5151 prompt_cache_ratio:0.5959736202707393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 +DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10711431503295898 s +INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10898470878601074 s +DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=296958158523771365378641166377561885391, time:1750767355.0735836s req_ids:[8] +DEBUG 06-24 20:15:55 [manager.py:391] +ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:210.34884452819824ms total_cost_time:210.37626266479492ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8644 prompt_cache_len:5151 prompt_cache_ratio:0.5959046737621472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 +DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10794830322265625 s +INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10994148254394531 s +DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=102357261601462566273949875584226125655, time:1750767355.3002286s req_ids:[8] +DEBUG 06-24 20:15:55 [manager.py:391] +ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:223.7377166748047ms total_cost_time:223.76012802124023ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:8645 prompt_cache_len:5151 prompt_cache_ratio:0.5958357432041642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 +DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10712385177612305 s +INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s +DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=211027751720498640499274939534724818704, time:1750767355.519101s req_ids:[8] +DEBUG 06-24 20:15:55 [manager.py:391] +ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:212.7857208251953ms total_cost_time:212.8145694732666ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:8646 prompt_cache_len:5151 prompt_cache_ratio:0.5957668285912561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 +DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10724210739135742 s +INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10933160781860352 s +DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=168972539811369224694438541514754513152, time:1750767355.734947s req_ids:[8] +DEBUG 06-24 20:15:55 [manager.py:391] +ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:211.46059036254883ms total_cost_time:211.4884853363037ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8647 prompt_cache_len:5151 prompt_cache_ratio:0.5956979299178906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 +DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10657978057861328 s +INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10850024223327637 s +DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=102156733295605558217938263674384558340, time:1750767355.956836s req_ids:[8] +DEBUG 06-24 20:15:55 [manager.py:391] +ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:217.99802780151367ms total_cost_time:218.02377700805664ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:8648 prompt_cache_len:5151 prompt_cache_ratio:0.5956290471785384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 +DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:56 [manager.py:224] router recive req id 8 cost time 0.10700392723083496 s +INFO 06-24 20:15:56 [manager.py:68] detokenization recv req id 8 cost time 0.10892295837402344 s +DEBUG 06-24 20:15:56 [manager.py:391] Prefill Batch: batch_id=45415003266625811108577610172499233463, time:1750767356.1747308s req_ids:[8] +DEBUG 06-24 20:15:56 [manager.py:391] +ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:210.5996608734131ms total_cost_time:210.62707901000977ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8649 prompt_cache_len:5151 prompt_cache_ratio:0.5955601803676726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 +DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:56 [manager.py:224] router recive req id 8 cost time 0.30879640579223633 s +INFO 06-24 20:15:56 [manager.py:68] detokenization recv req id 8 cost time 0.31078219413757324 s +DEBUG 06-24 20:15:56 [manager.py:391] Prefill Batch: batch_id=238811191861258323942676912626546299049, time:1750767356.5888302s req_ids:[8] +DEBUG 06-24 20:15:56 [manager.py:391] +ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:415.82345962524414ms total_cost_time:415.90118408203125ms,out_token_counter:1 mean_per_token_cost_time: 0.07772445678710938ms prompt_token_num:8650 prompt_cache_len:5151 prompt_cache_ratio:0.5954913294797688 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 +DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:56 [manager.py:224] router recive req id 8 cost time 0.10813140869140625 s +INFO 06-24 20:15:56 [manager.py:68] detokenization recv req id 8 cost time 0.11083292961120605 s +DEBUG 06-24 20:15:56 [manager.py:391] Prefill Batch: batch_id=265007676071601060982558305109637889232, time:1750767356.8096282s req_ids:[8] +DEBUG 06-24 20:15:56 [manager.py:391] +ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:207.02075958251953ms total_cost_time:207.0484161376953ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8651 prompt_cache_len:5151 prompt_cache_ratio:0.5954224945093053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 +DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10718894004821777 s +INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.10924386978149414 s +DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=322469694216646632210091238024816269063, time:1750767357.0232863s req_ids:[8] +DEBUG 06-24 20:15:57 [manager.py:391] +ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:211.81774139404297ms total_cost_time:211.86208724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8652 prompt_cache_len:5151 prompt_cache_ratio:0.5953536754507628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 +DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.1085367202758789 s +INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.11073803901672363 s +DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=56816474326029314301617780521708812376, time:1750767357.237419s req_ids:[8] +DEBUG 06-24 20:15:57 [manager.py:391] +ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:204.32496070861816ms total_cost_time:204.36906814575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8653 prompt_cache_len:5151 prompt_cache_ratio:0.5952848722986247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 +DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10855603218078613 s +INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.11053133010864258 s +DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=25346880510727909014291123664824867652, time:1750767357.4468966s req_ids:[8] +DEBUG 06-24 20:15:57 [manager.py:391] +ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:204.2715549468994ms total_cost_time:204.3159008026123ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8654 prompt_cache_len:5151 prompt_cache_ratio:0.5952160850473769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 +DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10801410675048828 s +INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.10997891426086426 s +DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=281232410824694217759969696144858928011, time:1750767357.6576092s req_ids:[8] +DEBUG 06-24 20:15:57 [manager.py:391] +ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:206.39872550964355ms total_cost_time:206.44235610961914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8655 prompt_cache_len:5151 prompt_cache_ratio:0.5951473136915078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 +DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10784459114074707 s +INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.10973072052001953 s +DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=240856594904333817151454836605617472116, time:1750767357.8764405s req_ids:[8] +DEBUG 06-24 20:15:57 [manager.py:391] +ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:215.72566032409668ms total_cost_time:215.76905250549316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8656 prompt_cache_len:5151 prompt_cache_ratio:0.5950785582255084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 +DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.109344482421875 s +INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.11129450798034668 s +DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=299768019684239505048008319246407127342, time:1750767358.0925808s req_ids:[8] +DEBUG 06-24 20:15:58 [manager.py:391] +ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:376.6744136810303ms total_cost_time:376.71828269958496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8657 prompt_cache_len:5151 prompt_cache_ratio:0.595009818643872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 +DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.10904812812805176 s +INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.1110067367553711 s +DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=264986679659096398218824789331350931220, time:1750767358.4745119s req_ids:[8] +DEBUG 06-24 20:15:58 [manager.py:391] +DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 first_token_cost:199.97930526733398ms total_cost_time:200.02412796020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8658 prompt_cache_len:5151 prompt_cache_ratio:0.5949410949410949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 +DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.10773396492004395 s +INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.10977816581726074 s +DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=162980380089519838335840101450101071635, time:1750767358.6924164s req_ids:[8] +DEBUG 06-24 20:15:58 [manager.py:391] +ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 first_token_cost:217.44441986083984ms total_cost_time:217.48900413513184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8659 prompt_cache_len:5151 prompt_cache_ratio:0.5948723871116757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 +DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.10896730422973633 s +INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.11114668846130371 s +DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=63886982681376039620085478176367845467, time:1750767358.9095316s req_ids:[8] +DEBUG 06-24 20:15:58 [manager.py:391] +ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 first_token_cost:208.07814598083496ms total_cost_time:208.12058448791504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8660 prompt_cache_len:5151 prompt_cache_ratio:0.5948036951501154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 +DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.10745525360107422 s +INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.10947155952453613 s +DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=281904837146979810894191269082872227960, time:1750767359.1242416s req_ids:[8] +DEBUG 06-24 20:15:59 [manager.py:391] +DEBUG 06-24 20:15:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 37039.050 tokens/s +DEBUG 06-24 20:15:59 [stats.py:37] Avg prompt tokens throughput: 37030.478 tokens/s +DEBUG 06-24 20:15:59 [stats.py:37] Avg generate tokens throughput: 8.572 tokens/s +ERROR 06-24 20:15:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:208.8186740875244ms total_cost_time:208.8615894317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8661 prompt_cache_len:5151 prompt_cache_ratio:0.5947350190509179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 +DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.10762786865234375 s +INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.10952901840209961 s +DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=262071714936887769086296028453352968731, time:1750767359.3404036s req_ids:[8] +DEBUG 06-24 20:15:59 [manager.py:391] +ERROR 06-24 20:15:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:209.1360092163086ms total_cost_time:209.1810703277588ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8662 prompt_cache_len:5151 prompt_cache_ratio:0.5946663588085892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 +DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.10784721374511719 s +INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.1098017692565918 s +DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=184024350559996861939459891170327758071, time:1750767359.5564532s req_ids:[8] +DEBUG 06-24 20:15:59 [manager.py:391] +ERROR 06-24 20:15:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:15:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:15:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:205.8548927307129ms total_cost_time:205.89900016784668ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8663 prompt_cache_len:5151 prompt_cache_ratio:0.5945977144176382 mtp_avg_token_per_step:1.0 +INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 +DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:15:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.310746431350708 s +INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.31266188621520996 s +DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=215917235149705184108547697784927502634, time:1750767359.9797094s req_ids:[8] +DEBUG 06-24 20:15:59 [manager.py:391] +DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:425.08935928344727ms total_cost_time:425.13227462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8664 prompt_cache_len:5151 prompt_cache_ratio:0.5945290858725761 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 +DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10819792747497559 s +INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.11010050773620605 s +DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=136147127035164528318686398312869560033, time:1750767360.1970098s req_ids:[8] +DEBUG 06-24 20:16:00 [manager.py:391] +ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:208.2345485687256ms total_cost_time:208.27960968017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8665 prompt_cache_len:5151 prompt_cache_ratio:0.594460473167917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 +DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10801577568054199 s +INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.11006474494934082 s +DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=180758557791862045687178387349191666464, time:1750767360.410471s req_ids:[8] +DEBUG 06-24 20:16:00 [manager.py:391] +ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:209.89727973937988ms total_cost_time:209.94138717651367ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8666 prompt_cache_len:5151 prompt_cache_ratio:0.5943918762981768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 +DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10722923278808594 s +INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.10900115966796875 s +DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=264424419732114430214278448015456873755, time:1750767360.6251872s req_ids:[8] +DEBUG 06-24 20:16:00 [manager.py:391] +ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:207.0600986480713ms total_cost_time:207.10420608520508ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8667 prompt_cache_len:5151 prompt_cache_ratio:0.5943232952578748 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 +DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10905838012695312 s +INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.11100101470947266 s +DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=318207036420058163763350942030782280447, time:1750767360.837229s req_ids:[8] +DEBUG 06-24 20:16:00 [manager.py:391] +ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:207.85975456237793ms total_cost_time:207.8876495361328ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8668 prompt_cache_len:5151 prompt_cache_ratio:0.5942547300415321 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 +DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10723304748535156 s +INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.10925006866455078 s +DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=127163040213379171266489653912401960636, time:1750767361.0534785s req_ids:[8] +DEBUG 06-24 20:16:01 [manager.py:391] +ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:209.38897132873535ms total_cost_time:209.41734313964844ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:8669 prompt_cache_len:5151 prompt_cache_ratio:0.5941861806436729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 +DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10711216926574707 s +INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.10894656181335449 s +DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=255821967515769890219147620404690002674, time:1750767361.2678485s req_ids:[8] +DEBUG 06-24 20:16:01 [manager.py:391] +ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:379.76789474487305ms total_cost_time:379.7943592071533ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8670 prompt_cache_len:5151 prompt_cache_ratio:0.5941176470588235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 +DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10705971717834473 s +INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.10810732841491699 s +DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=149495825451803480385567228029064539290, time:1750767361.6488042s req_ids:[8] +DEBUG 06-24 20:16:01 [manager.py:391] +ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:209.14316177368164ms total_cost_time:209.17105674743652ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8671 prompt_cache_len:5151 prompt_cache_ratio:0.5940491292815131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 +DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10714292526245117 s +INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.1089487075805664 s +INFO 06-24 20:16:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=189302794967851091212629870885199358219, time:1750767361.8647485s req_ids:[8] +DEBUG 06-24 20:16:01 [manager.py:391] +ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:210.1898193359375ms total_cost_time:210.21056175231934ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8672 prompt_cache_len:5151 prompt_cache_ratio:0.5939806273062731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 +DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.10576415061950684 s +INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.10767340660095215 s +DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=181424321466035071202026380552985488323, time:1750767362.0779436s req_ids:[8] +DEBUG 06-24 20:16:02 [manager.py:391] +ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:209.28287506103516ms total_cost_time:209.32698249816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8673 prompt_cache_len:5151 prompt_cache_ratio:0.5939121411276375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 +DEBUG 06-24 20:16:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.10823345184326172 s +INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.11013150215148926 s +DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=113611843110585115971311669170005158087, time:1750767362.2917545s req_ids:[8] +DEBUG 06-24 20:16:02 [manager.py:391] +ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:210.61372756958008ms total_cost_time:210.65950393676758ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8674 prompt_cache_len:5151 prompt_cache_ratio:0.593843670740143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 +DEBUG 06-24 20:16:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.11094880104064941 s +INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.11320042610168457 s +DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=4001469884290988289042580270407328078, time:1750767362.507861s req_ids:[8] +DEBUG 06-24 20:16:02 [manager.py:391] +ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:209.55348014831543ms total_cost_time:209.5966339111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8675 prompt_cache_len:5151 prompt_cache_ratio:0.5937752161383285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 +DEBUG 06-24 20:16:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.10718703269958496 s +INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.10923242568969727 s +DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=330765037508280068294476077613368300209, time:1750767362.723427s req_ids:[8] +DEBUG 06-24 20:16:02 [manager.py:391] +ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:383.8794231414795ms total_cost_time:383.9235305786133ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8676 prompt_cache_len:5151 prompt_cache_ratio:0.5937067773167358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 +DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10804200172424316 s +INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.11053204536437988 s +DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=177213500750246411712767087063684572205, time:1750767363.1107733s req_ids:[8] +DEBUG 06-24 20:16:03 [manager.py:391] +ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:200.9139060974121ms total_cost_time:200.9563446044922ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8677 prompt_cache_len:5151 prompt_cache_ratio:0.593638354269909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 +DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10839653015136719 s +INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.11030101776123047 s +DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=203163806683013812110058414340153594354, time:1750767363.3197696s req_ids:[8] +DEBUG 06-24 20:16:03 [manager.py:391] +ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:209.35416221618652ms total_cost_time:209.39898490905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8678 prompt_cache_len:5151 prompt_cache_ratio:0.5935699469923945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 +DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.1096808910369873 s +INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.111602783203125 s +DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=5067769829712468248354023280183657293, time:1750767363.5347543s req_ids:[8] +DEBUG 06-24 20:16:03 [manager.py:391] +ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:201.6754150390625ms total_cost_time:201.7204761505127ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8679 prompt_cache_len:5151 prompt_cache_ratio:0.5935015554787418 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 +DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10851836204528809 s +INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.11040997505187988 s +DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=258089597404816309814586125524604010746, time:1750767363.7430842s req_ids:[8] +DEBUG 06-24 20:16:03 [manager.py:391] +ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:208.34922790527344ms total_cost_time:208.39619636535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8680 prompt_cache_len:5151 prompt_cache_ratio:0.5934331797235023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 +DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10818028450012207 s +INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.1101083755493164 s +DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=204218946128987072182407181653151540657, time:1750767363.9575243s req_ids:[8] +DEBUG 06-24 20:16:03 [manager.py:391] +ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:207.94034004211426ms total_cost_time:207.98420906066895ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8681 prompt_cache_len:5151 prompt_cache_ratio:0.5933648197212302 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 +DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:04 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s +INFO 06-24 20:16:04 [manager.py:68] detokenization recv req id 8 cost time 0.10925555229187012 s +DEBUG 06-24 20:16:04 [manager.py:391] Prefill Batch: batch_id=236517805150392480682572797221682854369, time:1750767364.1699874s req_ids:[8] +DEBUG 06-24 20:16:04 [manager.py:391] +ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:206.07757568359375ms total_cost_time:206.12168312072754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8682 prompt_cache_len:5151 prompt_cache_ratio:0.5932964754664823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 +DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:04 [manager.py:224] router recive req id 8 cost time 0.10804462432861328 s +INFO 06-24 20:16:04 [manager.py:68] detokenization recv req id 8 cost time 0.10991764068603516 s +DEBUG 06-24 20:16:04 [manager.py:391] Prefill Batch: batch_id=51431258918605310321494633830180937774, time:1750767364.3896003s req_ids:[8] +DEBUG 06-24 20:16:04 [manager.py:391] +ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:212.7225399017334ms total_cost_time:212.7666473388672ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8683 prompt_cache_len:5151 prompt_cache_ratio:0.5932281469538178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 +DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:04 [manager.py:224] router recive req id 8 cost time 0.10983657836914062 s +INFO 06-24 20:16:04 [manager.py:68] detokenization recv req id 8 cost time 0.11180520057678223 s +DEBUG 06-24 20:16:04 [manager.py:391] Prefill Batch: batch_id=170874834403489074314604561210507021655, time:1750767364.6024904s req_ids:[8] +DEBUG 06-24 20:16:04 [manager.py:391] +ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:209.40160751342773ms total_cost_time:209.44571495056152ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8684 prompt_cache_len:5151 prompt_cache_ratio:0.5931598341777983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 +DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.30855894088745117 s +INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.3106086254119873 s +DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=310247974285068922102516162120012122234, time:1750767365.0220683s req_ids:[8] +DEBUG 06-24 20:16:05 [manager.py:391] +ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:419.85535621643066ms total_cost_time:419.88277435302734ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8685 prompt_cache_len:5151 prompt_cache_ratio:0.5930915371329879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 +DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.10738539695739746 s +INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10921978950500488 s +DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=207372117307222087664790296364762701120, time:1750767365.2452528s req_ids:[8] +DEBUG 06-24 20:16:05 [manager.py:391] +ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:213.49501609802246ms total_cost_time:213.52052688598633ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:8686 prompt_cache_len:5151 prompt_cache_ratio:0.5930232558139535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 +DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.1069488525390625 s +INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10882735252380371 s +DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=169609836848961795597468318677719612174, time:1750767365.4632072s req_ids:[8] +DEBUG 06-24 20:16:05 [manager.py:391] +ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:211.52901649475098ms total_cost_time:211.55667304992676ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8687 prompt_cache_len:5151 prompt_cache_ratio:0.5929549902152642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 +DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.10489583015441895 s +INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10698080062866211 s +DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=202239432366594529834013516959295320576, time:1750767365.6761682s req_ids:[8] +DEBUG 06-24 20:16:05 [manager.py:391] +ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:207.41510391235352ms total_cost_time:207.4434757232666ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:8688 prompt_cache_len:5151 prompt_cache_ratio:0.5928867403314917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 +DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.10578298568725586 s +INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10781383514404297 s +DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=288751712017526861861479764673698867775, time:1750767365.8899243s req_ids:[8] +DEBUG 06-24 20:16:05 [manager.py:391] +ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:208.9378833770752ms total_cost_time:208.96553993225098ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8689 prompt_cache_len:5151 prompt_cache_ratio:0.5928185061572102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 +DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10756659507751465 s +INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.1094825267791748 s +DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=208920756717776334734611819740183739442, time:1750767366.1011784s req_ids:[8] +DEBUG 06-24 20:16:06 [manager.py:391] +ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:203.86075973510742ms total_cost_time:203.8886547088623ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8690 prompt_cache_len:5151 prompt_cache_ratio:0.5927502876869966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 +DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10629487037658691 s +INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.10841989517211914 s +DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=259455445063889171371942406201898698242, time:1750767366.3110995s req_ids:[8] +DEBUG 06-24 20:16:06 [manager.py:391] +ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:210.07609367370605ms total_cost_time:210.10375022888184ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8691 prompt_cache_len:5151 prompt_cache_ratio:0.5926820849154297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 +DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10675621032714844 s +INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.10857462882995605 s +DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=32362421047188359885756907501820259508, time:1750767366.5277376s req_ids:[8] +DEBUG 06-24 20:16:06 [manager.py:391] +ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:209.73801612854004ms total_cost_time:209.77544784545898ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:8692 prompt_cache_len:5151 prompt_cache_ratio:0.5926138978370916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 +DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10706329345703125 s +INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.10959291458129883 s +DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=311100661761634716503769196727668022713, time:1750767366.742244s req_ids:[8] +DEBUG 06-24 20:16:06 [manager.py:391] +ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:370.6338405609131ms total_cost_time:370.65863609313965ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:8693 prompt_cache_len:5151 prompt_cache_ratio:0.5925457264465662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 +DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10666203498840332 s +INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10868978500366211 s +DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=79784031661188739511220751542289004333, time:1750767367.114693s req_ids:[8] +DEBUG 06-24 20:16:07 [manager.py:391] +ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:205.0039768218994ms total_cost_time:205.0473690032959ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8694 prompt_cache_len:5151 prompt_cache_ratio:0.5924775707384403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 +DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10744500160217285 s +INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10947108268737793 s +DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=257767445152528213482133776975009908613, time:1750767367.3265533s req_ids:[8] +DEBUG 06-24 20:16:07 [manager.py:391] +DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:205.64031600952148ms total_cost_time:205.68490028381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8695 prompt_cache_len:5151 prompt_cache_ratio:0.592409430707303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 +DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10780167579650879 s +INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10973429679870605 s +DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=195625561088872498217914411626738598735, time:1750767367.5370066s req_ids:[8] +DEBUG 06-24 20:16:07 [manager.py:391] +ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:202.6519775390625ms total_cost_time:202.6960849761963ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8696 prompt_cache_len:5151 prompt_cache_ratio:0.5923413063477461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 +DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10866999626159668 s +INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.11024069786071777 s +DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=19283396263920187241399721999610692776, time:1750767367.755712s req_ids:[8] +DEBUG 06-24 20:16:07 [manager.py:391] +ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:217.79799461364746ms total_cost_time:217.84019470214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8697 prompt_cache_len:5151 prompt_cache_ratio:0.5922731976543636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 +DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10722088813781738 s +INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10918879508972168 s +DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=271790284896158510476850575334084442174, time:1750767367.9705822s req_ids:[8] +DEBUG 06-24 20:16:07 [manager.py:391] +ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:206.75325393676758ms total_cost_time:206.79831504821777ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8698 prompt_cache_len:5151 prompt_cache_ratio:0.5922051046217521 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 +DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10728669166564941 s +INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.10923266410827637 s +DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=47382662291830107211040676635290520277, time:1750767368.1816206s req_ids:[8] +DEBUG 06-24 20:16:08 [manager.py:391] +ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:204.76174354553223ms total_cost_time:204.80585098266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8699 prompt_cache_len:5151 prompt_cache_ratio:0.5921370272445109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 +DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10866141319274902 s +INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.11075639724731445 s +DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=302629048100371886877782436519795339402, time:1750767368.3906238s req_ids:[8] +DEBUG 06-24 20:16:08 [manager.py:391] +ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:202.11386680603027ms total_cost_time:202.15988159179688ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8700 prompt_cache_len:5151 prompt_cache_ratio:0.5920689655172414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 +DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10893964767456055 s +INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.11090612411499023 s +DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=303501947060614020763465538572715567884, time:1750767368.5989063s req_ids:[8] +DEBUG 06-24 20:16:08 [manager.py:391] +ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:204.0235996246338ms total_cost_time:204.06651496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8701 prompt_cache_len:5151 prompt_cache_ratio:0.5920009194345478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 +DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10833120346069336 s +INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.11029338836669922 s +DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=315477166867868578492856780249458172545, time:1750767368.8109736s req_ids:[8] +DEBUG 06-24 20:16:08 [manager.py:391] +ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:210.93106269836426ms total_cost_time:210.97636222839355ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8702 prompt_cache_len:5151 prompt_cache_ratio:0.5919328889910366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 +DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s +INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.10979247093200684 s +DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=16482676512270489144640571868669405872, time:1750767369.038684s req_ids:[8] +DEBUG 06-24 20:16:09 [manager.py:391] +ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:16:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 35897.968 tokens/s +DEBUG 06-24 20:16:09 [stats.py:37] Avg prompt tokens throughput: 35889.602 tokens/s +DEBUG 06-24 20:16:09 [stats.py:37] Avg generate tokens throughput: 8.366 tokens/s +INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:377.9129981994629ms total_cost_time:377.96616554260254ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:8703 prompt_cache_len:5151 prompt_cache_ratio:0.5918648741813168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 +DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s +INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.11006641387939453 s +DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=131724145680361051239260274794207620583, time:1750767369.4084675s req_ids:[8] +DEBUG 06-24 20:16:09 [manager.py:391] +ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:203.66716384887695ms total_cost_time:203.70984077453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8704 prompt_cache_len:5151 prompt_cache_ratio:0.591796875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 +DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.10864090919494629 s +INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.11057400703430176 s +DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=103997066129633895867027092551755400213, time:1750767369.6216373s req_ids:[8] +DEBUG 06-24 20:16:09 [manager.py:391] +ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:206.18152618408203ms total_cost_time:206.2244415283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8705 prompt_cache_len:5151 prompt_cache_ratio:0.5917288914417002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 +DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.1069650650024414 s +INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.10903596878051758 s +DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=194549958552717845118952086581843546921, time:1750767369.8354504s req_ids:[8] +DEBUG 06-24 20:16:09 [manager.py:391] +ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:212.04662322998047ms total_cost_time:212.07237243652344ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:8706 prompt_cache_len:5151 prompt_cache_ratio:0.5916609235010337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 +DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10604190826416016 s +INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10821986198425293 s +DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=46121748165978154648408797380860218320, time:1750767370.052102s req_ids:[8] +DEBUG 06-24 20:16:10 [manager.py:391] +ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:210.37936210632324ms total_cost_time:210.40773391723633ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:8707 prompt_cache_len:5151 prompt_cache_ratio:0.5915929711726198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 +DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10545992851257324 s +INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10731244087219238 s +DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=329168963753702867377021078792865332110, time:1750767370.2668214s req_ids:[8] +DEBUG 06-24 20:16:10 [manager.py:391] +ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:205.35802841186523ms total_cost_time:205.3837776184082ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:8708 prompt_cache_len:5151 prompt_cache_ratio:0.5915250344510795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 +DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10583877563476562 s +INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10796618461608887 s +DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=233384485912848265270322361903112607844, time:1750767370.4785018s req_ids:[8] +DEBUG 06-24 20:16:10 [manager.py:391] +ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:211.05480194091797ms total_cost_time:211.08341217041016ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:8709 prompt_cache_len:5151 prompt_cache_ratio:0.5914571133310369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 +DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10612893104553223 s +INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10837912559509277 s +DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=7420920920516343220003201347939572463, time:1750767370.6944659s req_ids:[8] +DEBUG 06-24 20:16:10 [manager.py:391] +ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:209.88798141479492ms total_cost_time:209.9156379699707ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8710 prompt_cache_len:5151 prompt_cache_ratio:0.5913892078071182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 +DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10608386993408203 s +INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10830497741699219 s +DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=243535940201473379212920217087159912199, time:1750767370.910673s req_ids:[8] +DEBUG 06-24 20:16:10 [manager.py:391] +ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:211.90643310546875ms total_cost_time:211.93337440490723ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8711 prompt_cache_len:5151 prompt_cache_ratio:0.5913213178739525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 +DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.3087458610534668 s +INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.31102561950683594 s +DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=144063290390221686119286330991390126674, time:1750767371.3406572s req_ids:[8] +DEBUG 06-24 20:16:11 [manager.py:391] +ERROR 06-24 20:16:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:428.07936668395996ms total_cost_time:428.10654640197754ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8712 prompt_cache_len:5151 prompt_cache_ratio:0.5912534435261708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 +DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.10510563850402832 s +INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.10730385780334473 s +DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=19249680141597300110959663883363574542, time:1750767371.5593305s req_ids:[8] +DEBUG 06-24 20:16:11 [manager.py:391] +ERROR 06-24 20:16:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:212.04686164855957ms total_cost_time:212.07404136657715ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8713 prompt_cache_len:5151 prompt_cache_ratio:0.591185584758407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 +DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.10614824295043945 s +INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.10812520980834961 s +DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=172177723434462251239210485406265618413, time:1750767371.7754364s req_ids:[8] +DEBUG 06-24 20:16:11 [manager.py:391] +ERROR 06-24 20:16:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:209.10215377807617ms total_cost_time:209.13004875183105ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8714 prompt_cache_len:5151 prompt_cache_ratio:0.5911177415652972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 +DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.10571885108947754 s +INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.10768508911132812 s +DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=223909422759655011471442408260330310183, time:1750767371.9887822s req_ids:[8] +DEBUG 06-24 20:16:11 [manager.py:391] +ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:207.62324333190918ms total_cost_time:207.65018463134766ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8715 prompt_cache_len:5151 prompt_cache_ratio:0.5910499139414802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 +DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10627436637878418 s +INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10730338096618652 s +DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=271852047108543187908820001499860862915, time:1750767372.2010179s req_ids:[8] +DEBUG 06-24 20:16:12 [manager.py:391] +ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:208.5425853729248ms total_cost_time:208.5702419281006ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8716 prompt_cache_len:5151 prompt_cache_ratio:0.590982101881597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 +DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10498785972595215 s +INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10691118240356445 s +INFO 06-24 20:16:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=187256827203914294803876434169418556722, time:1750767372.4131129s req_ids:[8] +DEBUG 06-24 20:16:12 [manager.py:391] +ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:208.62054824829102ms total_cost_time:208.6472511291504ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8717 prompt_cache_len:5151 prompt_cache_ratio:0.5909143053802914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 +DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10538864135742188 s +INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10729742050170898 s +DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=13884611575273157329040532088828212280, time:1750767372.6283948s req_ids:[8] +DEBUG 06-24 20:16:12 [manager.py:391] +ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:208.63986015319824ms total_cost_time:208.66751670837402ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8718 prompt_cache_len:5151 prompt_cache_ratio:0.5908465244322092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 +DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10469317436218262 s +INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10655355453491211 s +DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=140476871476550883239163342624988036280, time:1750767372.8416803s req_ids:[8] +DEBUG 06-24 20:16:12 [manager.py:391] +ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:210.04438400268555ms total_cost_time:210.07108688354492ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8719 prompt_cache_len:5151 prompt_cache_ratio:0.5907787590319991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 +DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.1052546501159668 s +INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10753989219665527 s +DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=332791027690318538794781537968684689773, time:1750767373.0555527s req_ids:[8] +DEBUG 06-24 20:16:13 [manager.py:391] +ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:364.9423122406006ms total_cost_time:364.96543884277344ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:8720 prompt_cache_len:5151 prompt_cache_ratio:0.590711009174312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 +DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.10478782653808594 s +INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10681843757629395 s +DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=4631933807010807125183525636181255813, time:1750767373.4213347s req_ids:[8] +DEBUG 06-24 20:16:13 [manager.py:391] +ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:204.09393310546875ms total_cost_time:204.1168212890625ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:8721 prompt_cache_len:5151 prompt_cache_ratio:0.5906432748538012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 +DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.10667252540588379 s +INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10883951187133789 s +DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=34043117042336419745321865639889501708, time:1750767373.6342566s req_ids:[8] +DEBUG 06-24 20:16:13 [manager.py:391] +ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:211.26866340637207ms total_cost_time:211.31420135498047ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8722 prompt_cache_len:5151 prompt_cache_ratio:0.5905755560651227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 +DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.10731697082519531 s +INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10941195487976074 s +DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=9249543655199973640489540039419633258, time:1750767373.848966s req_ids:[8] +DEBUG 06-24 20:16:13 [manager.py:391] +ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:207.1220874786377ms total_cost_time:207.1816921234131ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:8723 prompt_cache_len:5151 prompt_cache_ratio:0.5905078528029347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 +DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10972309112548828 s +INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.11177182197570801 s +DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=326921199938883247148799735788973904828, time:1750767374.0674932s req_ids:[8] +DEBUG 06-24 20:16:14 [manager.py:391] +ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:212.1729850769043ms total_cost_time:212.2180461883545ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8724 prompt_cache_len:5151 prompt_cache_ratio:0.5904401650618982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 +DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10836982727050781 s +INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.11042642593383789 s +DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=200133532509875701156239908622072334871, time:1750767374.2803633s req_ids:[8] +DEBUG 06-24 20:16:14 [manager.py:391] +ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:207.45086669921875ms total_cost_time:207.49545097351074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8725 prompt_cache_len:5151 prompt_cache_ratio:0.5903724928366763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 +DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10899949073791504 s +INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.11115360260009766 s +DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=135315976197620084233604134598492109448, time:1750767374.4936907s req_ids:[8] +DEBUG 06-24 20:16:14 [manager.py:391] +DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:208.2517147064209ms total_cost_time:208.2970142364502ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8726 prompt_cache_len:5151 prompt_cache_ratio:0.5903048361219344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 +DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.1071934700012207 s +INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.10894465446472168 s +DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=286862521401608182539655590773229034014, time:1750767374.7074695s req_ids:[8] +DEBUG 06-24 20:16:14 [manager.py:391] +ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:168.77341270446777ms total_cost_time:168.81537437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8727 prompt_cache_len:5151 prompt_cache_ratio:0.5902371949123411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 +DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10729241371154785 s +INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.1093740463256836 s +DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=158992273611537624406086570137397483174, time:1750767374.8799243s req_ids:[8] +DEBUG 06-24 20:16:14 [manager.py:391] +ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:196.63381576538086ms total_cost_time:196.67530059814453ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8728 prompt_cache_len:5151 prompt_cache_ratio:0.5901695692025665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 +DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10527539253234863 s +INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10721206665039062 s +DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=168880936568733871627491218112729785383, time:1750767375.0829747s req_ids:[8] +DEBUG 06-24 20:16:15 [manager.py:391] +ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:206.35724067687988ms total_cost_time:206.3758373260498ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8729 prompt_cache_len:5151 prompt_cache_ratio:0.5901019589872838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 +DEBUG 06-24 20:16:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10484600067138672 s +INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10686612129211426 s +DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=302227700900147570840535272781260276347, time:1750767375.3026495s req_ids:[8] +DEBUG 06-24 20:16:15 [manager.py:391] +ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:421.6330051422119ms total_cost_time:421.65374755859375ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8730 prompt_cache_len:5151 prompt_cache_ratio:0.5900343642611684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 +DEBUG 06-24 20:16:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10430073738098145 s +INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10616922378540039 s +DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=63482562015916414113306188278525560586, time:1750767375.7192562s req_ids:[8] +DEBUG 06-24 20:16:15 [manager.py:391] +ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:162.85014152526855ms total_cost_time:162.86969184875488ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8731 prompt_cache_len:5151 prompt_cache_ratio:0.5899667850188982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 +DEBUG 06-24 20:16:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10403323173522949 s +INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10598111152648926 s +DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=336997077797936274652031650712566758865, time:1750767375.8871655s req_ids:[8] +DEBUG 06-24 20:16:15 [manager.py:391] +ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:202.77094841003418ms total_cost_time:202.803373336792ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:8732 prompt_cache_len:5151 prompt_cache_ratio:0.5898992212551535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 +DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:16 [batch.py:51] router release req id 8 +DEBUG 06-24 20:16:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:16 [manager.py:283] +DEBUG 06-24 20:16:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:16 [manager.py:284] +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10474252700805664 s +INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10690736770629883 s +DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=170774377588143166325422058446289834445, time:1750767376.1103315s req_ids:[8] +DEBUG 06-24 20:16:16 [manager.py:391] +ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:224.67279434204102ms total_cost_time:224.69282150268555ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8733 prompt_cache_len:5151 prompt_cache_ratio:0.5898316729646169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 +DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10485458374023438 s +INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10703325271606445 s +DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=332294470804965214014087820768410649528, time:1750767376.3275404s req_ids:[8] +DEBUG 06-24 20:16:16 [manager.py:391] +ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:210.55221557617188ms total_cost_time:210.5715274810791ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8734 prompt_cache_len:5151 prompt_cache_ratio:0.5897641401419739 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 +DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10464143753051758 s +INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10667777061462402 s +DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=101295379088705130650927365669849669152, time:1750767376.5415006s req_ids:[8] +DEBUG 06-24 20:16:16 [manager.py:391] +ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:210.46781539916992ms total_cost_time:210.48665046691895ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8735 prompt_cache_len:5151 prompt_cache_ratio:0.5896966227819118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 +DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10372543334960938 s +INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.1056509017944336 s +DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=295518405310055073146589171151809110219, time:1750767376.756528s req_ids:[8] +DEBUG 06-24 20:16:16 [manager.py:391] +ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:210.55841445922852ms total_cost_time:210.57939529418945ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8736 prompt_cache_len:5151 prompt_cache_ratio:0.5896291208791209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 +DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:16 [batch.py:51] router release req id 8 +INFO 06-24 20:16:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10360527038574219 s +INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10576343536376953 s +DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=94503661745512680274114572090088447911, time:1750767376.9700577s req_ids:[8] +DEBUG 06-24 20:16:16 [manager.py:391] +ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:209.49935913085938ms total_cost_time:209.52630043029785ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8737 prompt_cache_len:5151 prompt_cache_ratio:0.5895616344282935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 +DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.10488033294677734 s +INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.1069498062133789 s +DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=171763503198930521907460316055785763951, time:1750767377.1816206s req_ids:[8] +DEBUG 06-24 20:16:17 [manager.py:391] +ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:211.1220359802246ms total_cost_time:211.14230155944824ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8738 prompt_cache_len:5151 prompt_cache_ratio:0.5894941634241245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 +DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.30524754524230957 s +INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.3073999881744385 s +DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=215837103245348277297928525518434552758, time:1750767377.595684s req_ids:[8] +DEBUG 06-24 20:16:17 [manager.py:391] +ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:416.69487953186035ms total_cost_time:416.715145111084ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8739 prompt_cache_len:5151 prompt_cache_ratio:0.5894267078613113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 +DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.10466599464416504 s +INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.10650253295898438 s +DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=162089664782168773349651029167792305258, time:1750767377.8154814s req_ids:[8] +DEBUG 06-24 20:16:17 [manager.py:391] +ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:175.7352352142334ms total_cost_time:175.75407028198242ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8740 prompt_cache_len:5151 prompt_cache_ratio:0.5893592677345538 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 +DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.10465288162231445 s +INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.10663151741027832 s +DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=1599071863362262453522374957829331511, time:1750767377.9907365s req_ids:[8] +DEBUG 06-24 20:16:17 [manager.py:391] +ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:200.20794868469238ms total_cost_time:200.22916793823242ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8741 prompt_cache_len:5151 prompt_cache_ratio:0.589291843038554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 +DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.1046442985534668 s +INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.1066122055053711 s +DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=294996034983037430652853083310942822325, time:1750767378.1944015s req_ids:[8] +DEBUG 06-24 20:16:18 [manager.py:391] +ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:208.21833610534668ms total_cost_time:208.2374095916748ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8742 prompt_cache_len:5151 prompt_cache_ratio:0.5892244337680165 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 +DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.10406684875488281 s +INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.10614824295043945 s +DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=280572983269317351810717964606967105386, time:1750767378.406361s req_ids:[8] +DEBUG 06-24 20:16:18 [manager.py:391] +ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:212.39995956420898ms total_cost_time:212.41998672485352ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8743 prompt_cache_len:5151 prompt_cache_ratio:0.5891570399176485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 +DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.10390901565551758 s +INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.10599231719970703 s +DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=276191162601313425422058820333468261833, time:1750767378.6203165s req_ids:[8] +DEBUG 06-24 20:16:18 [manager.py:391] +ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:210.53004264831543ms total_cost_time:210.55054664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8744 prompt_cache_len:5151 prompt_cache_ratio:0.5890896614821592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 +DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.10494637489318848 s +INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.10599446296691895 s +DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=247941048554051494990060859181010391591, time:1750767378.8325007s req_ids:[8] +DEBUG 06-24 20:16:18 [manager.py:391] +ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:208.67228507995605ms total_cost_time:208.69112014770508ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8745 prompt_cache_len:5151 prompt_cache_ratio:0.5890222984562608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 +DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.10383343696594238 s +INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10591936111450195 s +DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=273348610996156805508150395931525832654, time:1750767379.0467112s req_ids:[8] +DEBUG 06-24 20:16:19 [manager.py:391] +ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:210.52861213684082ms total_cost_time:210.54863929748535ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8746 prompt_cache_len:5151 prompt_cache_ratio:0.5889549508346673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 +DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.10515952110290527 s +INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10713791847229004 s +DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=252981414446781748076929619620919768907, time:1750767379.2579706s req_ids:[8] +DEBUG 06-24 20:16:19 [manager.py:391] +ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:16:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 37591.918 tokens/s +DEBUG 06-24 20:16:19 [stats.py:37] Avg prompt tokens throughput: 37583.304 tokens/s +DEBUG 06-24 20:16:19 [stats.py:37] Avg generate tokens throughput: 8.615 tokens/s +INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:365.71407318115234ms total_cost_time:365.7352924346924ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8747 prompt_cache_len:5151 prompt_cache_ratio:0.5888876186120956 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 +DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.10472226142883301 s +INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10680270195007324 s +DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=317641688166258195193937745914572189230, time:1750767379.6245546s req_ids:[8] +DEBUG 06-24 20:16:19 [manager.py:391] +ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:208.3423137664795ms total_cost_time:208.36210250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8748 prompt_cache_len:5151 prompt_cache_ratio:0.5888203017832647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 +DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.1046137809753418 s +INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10658407211303711 s +DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=144329472682892245180671667966026191821, time:1750767379.8385735s req_ids:[8] +DEBUG 06-24 20:16:19 [manager.py:391] +ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:213.11020851135254ms total_cost_time:213.12999725341797ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8749 prompt_cache_len:5151 prompt_cache_ratio:0.5887530003428963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 +DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10480546951293945 s +INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10673904418945312 s +DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=235626832170164669715842314633802476442, time:1750767380.0527763s req_ids:[8] +DEBUG 06-24 20:16:20 [manager.py:391] +ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:211.15970611572266ms total_cost_time:211.17877960205078ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8750 prompt_cache_len:5151 prompt_cache_ratio:0.5886857142857143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 +DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10426712036132812 s +INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10603213310241699 s +DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=167413973569693302042290786962719356422, time:1750767380.268253s req_ids:[8] +DEBUG 06-24 20:16:20 [manager.py:391] +ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:210.94965934753418ms total_cost_time:210.9694480895996ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8751 prompt_cache_len:5151 prompt_cache_ratio:0.588618443606445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 +DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10378193855285645 s +INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10575699806213379 s +DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=286362606315066561801943840118291955688, time:1750767380.4810402s req_ids:[8] +DEBUG 06-24 20:16:20 [manager.py:391] +ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:212.21256256103516ms total_cost_time:212.23139762878418ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8752 prompt_cache_len:5151 prompt_cache_ratio:0.5885511882998172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 +DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.1040952205657959 s +INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10635948181152344 s +DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=191432946535640696478067250426176412232, time:1750767380.696372s req_ids:[8] +DEBUG 06-24 20:16:20 [manager.py:391] +ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:215.06834030151367ms total_cost_time:215.0874137878418ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8753 prompt_cache_len:5151 prompt_cache_ratio:0.5884839483605621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 +DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10411977767944336 s +INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10587573051452637 s +DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=88092127503740288236844686559287892016, time:1750767380.9120975s req_ids:[8] +DEBUG 06-24 20:16:20 [manager.py:391] +ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:169.76022720336914ms total_cost_time:169.77882385253906ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8754 prompt_cache_len:5151 prompt_cache_ratio:0.5884167237834133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 +DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.10489106178283691 s +INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10696220397949219 s +DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=294203860385911019585072928586074079954, time:1750767381.083713s req_ids:[8] +DEBUG 06-24 20:16:21 [manager.py:391] +ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:201.54690742492676ms total_cost_time:201.5671730041504ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8755 prompt_cache_len:5151 prompt_cache_ratio:0.5883495145631068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 +DEBUG 06-24 20:16:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.1046302318572998 s +INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10649275779724121 s +DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=131168778024007096473233232306347925352, time:1750767381.289694s req_ids:[8] +DEBUG 06-24 20:16:21 [manager.py:391] +ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:208.86874198913574ms total_cost_time:208.88757705688477ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8756 prompt_cache_len:5151 prompt_cache_ratio:0.588282320694381 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 +DEBUG 06-24 20:16:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.10311722755432129 s +INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10496735572814941 s +DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=66519058456742941817911103978958069628, time:1750767381.513503s req_ids:[8] +DEBUG 06-24 20:16:21 [manager.py:391] +DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:388.045072555542ms total_cost_time:388.06605339050293ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8757 prompt_cache_len:5151 prompt_cache_ratio:0.5882151421719767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 +DEBUG 06-24 20:16:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.10496950149536133 s +INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10691165924072266 s +DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=204434587636336291910313496141720463984, time:1750767381.8909197s req_ids:[8] +DEBUG 06-24 20:16:21 [manager.py:391] +ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:207.48043060302734ms total_cost_time:207.49974250793457ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8758 prompt_cache_len:5151 prompt_cache_ratio:0.5881479789906371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 +DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10391068458557129 s +INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10591554641723633 s +DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=165244173061017888312944627697171620074, time:1750767382.113133s req_ids:[8] +DEBUG 06-24 20:16:22 [manager.py:391] +ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:215.50369262695312ms total_cost_time:215.52371978759766ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8759 prompt_cache_len:5151 prompt_cache_ratio:0.5880808311451079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 +DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10477709770202637 s +INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10653543472290039 s +DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=319181208121993822473453360483614274991, time:1750767382.326287s req_ids:[8] +DEBUG 06-24 20:16:22 [manager.py:391] +ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:199.33414459228516ms total_cost_time:199.3541717529297ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8760 prompt_cache_len:5151 prompt_cache_ratio:0.588013698630137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 +DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10375332832336426 s +INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10541892051696777 s +DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=94071744550325598922276257492388202909, time:1750767382.530367s req_ids:[8] +DEBUG 06-24 20:16:22 [manager.py:391] +ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:205.36136627197266ms total_cost_time:205.3813934326172ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8761 prompt_cache_len:5151 prompt_cache_ratio:0.5879465814404748 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 +DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10384249687194824 s +INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10559821128845215 s +DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=53098447361049124127382189744973606075, time:1750767382.7405095s req_ids:[8] +DEBUG 06-24 20:16:22 [manager.py:391] +ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:210.73079109191895ms total_cost_time:210.75129508972168ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8762 prompt_cache_len:5151 prompt_cache_ratio:0.5878794795708743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 +DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10338616371154785 s +INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10505104064941406 s +DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=141680682515886791874495618927637929146, time:1750767382.9531124s req_ids:[8] +DEBUG 06-24 20:16:22 [manager.py:391] +ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:206.35151863098145ms total_cost_time:206.37130737304688ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8763 prompt_cache_len:5151 prompt_cache_ratio:0.5878123930160903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 +DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.10464811325073242 s +INFO 06-24 20:16:23 [manager.py:68] detokenization recv req id 8 cost time 0.10631752014160156 s +DEBUG 06-24 20:16:23 [manager.py:391] Prefill Batch: batch_id=249004431784392409067435279394965296702, time:1750767383.1654112s req_ids:[8] +DEBUG 06-24 20:16:23 [manager.py:391] +INFO 06-24 20:16:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:209.4593048095703ms total_cost_time:209.47813987731934ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8764 prompt_cache_len:5151 prompt_cache_ratio:0.5877453217708809 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 +DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.1036221981048584 s +INFO 06-24 20:16:23 [manager.py:68] detokenization recv req id 8 cost time 0.10536456108093262 s +DEBUG 06-24 20:16:23 [manager.py:391] Prefill Batch: batch_id=11609722059271124313419090558757325430, time:1750767383.3790019s req_ids:[8] +DEBUG 06-24 20:16:23 [manager.py:391] +ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:209.98597145080566ms total_cost_time:210.0064754486084ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8765 prompt_cache_len:5151 prompt_cache_ratio:0.5876782658300057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 +DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.30646324157714844 s +INFO 06-24 20:16:23 [manager.py:68] detokenization recv req id 8 cost time 0.30840301513671875 s +DEBUG 06-24 20:16:23 [manager.py:391] Prefill Batch: batch_id=109716450048481134079151200332423457731, time:1750767383.7983263s req_ids:[8] +DEBUG 06-24 20:16:23 [manager.py:391] +ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:418.1685447692871ms total_cost_time:418.18857192993164ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8766 prompt_cache_len:5151 prompt_cache_ratio:0.5876112251882273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 +DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.10481548309326172 s +INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10664510726928711 s +DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=204791949817607283898888766678076877504, time:1750767384.0150526s req_ids:[8] +DEBUG 06-24 20:16:24 [manager.py:391] +ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:208.93049240112305ms total_cost_time:208.94980430603027ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8767 prompt_cache_len:5151 prompt_cache_ratio:0.5875441998403103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 +DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10427236557006836 s +INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10607147216796875 s +DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=152195190818401465144187580916051028767, time:1750767384.2286193s req_ids:[8] +DEBUG 06-24 20:16:24 [manager.py:391] +ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:169.4321632385254ms total_cost_time:169.45219039916992ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8768 prompt_cache_len:5151 prompt_cache_ratio:0.5874771897810219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 +DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10375404357910156 s +INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10554265975952148 s +DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=304787887041618853777309058110288350723, time:1750767384.39944s req_ids:[8] +DEBUG 06-24 20:16:24 [manager.py:391] +ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:197.28851318359375ms total_cost_time:197.30782508850098ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8769 prompt_cache_len:5151 prompt_cache_ratio:0.5874101950051317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 +DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10514974594116211 s +INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10708189010620117 s +DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=249345828863053844020423460869549491031, time:1750767384.6020055s req_ids:[8] +DEBUG 06-24 20:16:24 [manager.py:391] +ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:210.89482307434082ms total_cost_time:210.91413497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8770 prompt_cache_len:5151 prompt_cache_ratio:0.5873432155074116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 +DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10459423065185547 s +INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10632681846618652 s +DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=56133271329187069498374586045577948728, time:1750767384.8311439s req_ids:[8] +DEBUG 06-24 20:16:24 [manager.py:391] +ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:224.5187759399414ms total_cost_time:224.53784942626953ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8771 prompt_cache_len:5151 prompt_cache_ratio:0.5872762512826359 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 +DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.10343217849731445 s +INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10505175590515137 s +DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=89660768129950293395287248560988318949, time:1750767385.0484066s req_ids:[8] +DEBUG 06-24 20:16:25 [manager.py:391] +ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:211.33136749267578ms total_cost_time:211.35234832763672ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8772 prompt_cache_len:5151 prompt_cache_ratio:0.5872093023255814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 +DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.10459017753601074 s +INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10635209083557129 s +DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=47849658358945076852346753505784044149, time:1750767385.264066s req_ids:[8] +DEBUG 06-24 20:16:25 [manager.py:391] +ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:210.1120948791504ms total_cost_time:210.13140678405762ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8773 prompt_cache_len:5151 prompt_cache_ratio:0.587142368631027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 +DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.1034085750579834 s +INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10529303550720215 s +DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=74183999344847385141801403082104224026, time:1750767385.478397s req_ids:[8] +DEBUG 06-24 20:16:25 [manager.py:391] +ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:370.042085647583ms total_cost_time:370.06235122680664ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8774 prompt_cache_len:5151 prompt_cache_ratio:0.5870754501937543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 +DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.10485267639160156 s +INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10669231414794922 s +DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=272544114785116394160225213551452294788, time:1750767385.8482215s req_ids:[8] +DEBUG 06-24 20:16:25 [manager.py:391] +ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:203.66835594177246ms total_cost_time:203.6879062652588ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8775 prompt_cache_len:5151 prompt_cache_ratio:0.587008547008547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 +DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10463356971740723 s +INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10641670227050781 s +DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=99249653751804177961496670406427403703, time:1750767386.0595105s req_ids:[8] +DEBUG 06-24 20:16:26 [manager.py:391] +ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:206.9103717803955ms total_cost_time:206.94351196289062ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:8776 prompt_cache_len:5151 prompt_cache_ratio:0.5869416590701915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 +DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10458111763000488 s +INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10641193389892578 s +DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=141260591270106173224662149232128788010, time:1750767386.2735436s req_ids:[8] +DEBUG 06-24 20:16:26 [manager.py:391] +ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:211.89141273498535ms total_cost_time:211.9121551513672ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8777 prompt_cache_len:5151 prompt_cache_ratio:0.5868747863734761 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 +DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10479331016540527 s +INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.1066582202911377 s +DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=220727153197882344242724035165312102565, time:1750767386.4888551s req_ids:[8] +DEBUG 06-24 20:16:26 [manager.py:391] +ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:209.1062068939209ms total_cost_time:209.12528038024902ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8778 prompt_cache_len:5151 prompt_cache_ratio:0.5868079289131921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 +DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10424017906188965 s +INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10614705085754395 s +DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=92669549218114962188388017205752186781, time:1750767386.7029374s req_ids:[8] +DEBUG 06-24 20:16:26 [manager.py:391] +ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:173.0809211730957ms total_cost_time:173.09975624084473ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8779 prompt_cache_len:5151 prompt_cache_ratio:0.5867410866841326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 +DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10455751419067383 s +INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10622549057006836 s +DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=116808246936285906623955355588607232853, time:1750767386.8763094s req_ids:[8] +DEBUG 06-24 20:16:26 [manager.py:391] +ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:165.27581214904785ms total_cost_time:165.29583930969238ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8780 prompt_cache_len:5151 prompt_cache_ratio:0.5866742596810934 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 +DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.10466766357421875 s +INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10653090476989746 s +DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=308526404622330065319904016450984911265, time:1750767387.047423s req_ids:[8] +DEBUG 06-24 20:16:27 [manager.py:391] +ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:200.73390007019043ms total_cost_time:200.75535774230957ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:8781 prompt_cache_len:5151 prompt_cache_ratio:0.5866074478988725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 +DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.10506296157836914 s +INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10688519477844238 s +DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=315016866289753239878057919839284217177, time:1750767387.25336s req_ids:[8] +DEBUG 06-24 20:16:27 [manager.py:391] +ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:208.9407444000244ms total_cost_time:208.96005630493164ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8782 prompt_cache_len:5151 prompt_cache_ratio:0.5865406513322705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 +DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.10479354858398438 s +INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10657191276550293 s +DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=40077149989252318110167162392132544835, time:1750767387.479015s req_ids:[8] +DEBUG 06-24 20:16:27 [manager.py:391] +ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:225.73494911193848ms total_cost_time:225.7537841796875ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8783 prompt_cache_len:5151 prompt_cache_ratio:0.5864738699760902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 +DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.1041257381439209 s +INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10512471199035645 s +DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=5773166514640876433548213832257652289, time:1750767387.694719s req_ids:[8] +DEBUG 06-24 20:16:27 [manager.py:391] +ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:339.4918441772461ms total_cost_time:339.5123481750488ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8784 prompt_cache_len:5151 prompt_cache_ratio:0.5864071038251366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 +DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.1046438217163086 s +INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10630655288696289 s +DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=208704371763789621224403990918087754640, time:1750767388.0358486s req_ids:[8] +DEBUG 06-24 20:16:28 [manager.py:391] +ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:193.4802532196045ms total_cost_time:193.50194931030273ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8785 prompt_cache_len:5151 prompt_cache_ratio:0.5863403528742174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 +DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.1048130989074707 s +INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.1067664623260498 s +DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=312136464773319852682973084497703641753, time:1750767388.2409534s req_ids:[8] +DEBUG 06-24 20:16:28 [manager.py:391] +ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:208.30273628234863ms total_cost_time:208.32228660583496ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8786 prompt_cache_len:5151 prompt_cache_ratio:0.5862736171181425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 +DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.10377693176269531 s +INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10563921928405762 s +DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=337601474522438756842260111276025163081, time:1750767388.453865s req_ids:[8] +DEBUG 06-24 20:16:28 [manager.py:391] +ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:209.76758003234863ms total_cost_time:209.78665351867676ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8787 prompt_cache_len:5151 prompt_cache_ratio:0.5862068965517241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 +DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.10399413108825684 s +INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10561823844909668 s +DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=223429798951520209433591624666148734166, time:1750767388.6852233s req_ids:[8] +DEBUG 06-24 20:16:28 [manager.py:391] +DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:229.295015335083ms total_cost_time:229.31528091430664ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8788 prompt_cache_len:5151 prompt_cache_ratio:0.586140191169777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 +DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.1045689582824707 s +INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10635972023010254 s +DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=207396175817713021326841319391094247496, time:1750767388.902168s req_ids:[8] +DEBUG 06-24 20:16:28 [manager.py:391] +ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:211.2863063812256ms total_cost_time:211.30609512329102ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8789 prompt_cache_len:5151 prompt_cache_ratio:0.586073500967118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 +DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.10355138778686523 s +INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.10531854629516602 s +DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=60575587449976754069523328598613846306, time:1750767389.117616s req_ids:[8] +DEBUG 06-24 20:16:29 [manager.py:391] +ERROR 06-24 20:16:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:210.97040176391602ms total_cost_time:210.98947525024414ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8790 prompt_cache_len:5151 prompt_cache_ratio:0.5860068259385666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 +DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.10452079772949219 s +INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.10623693466186523 s +DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=312540868062217735163241866263049271418, time:1750767389.3335767s req_ids:[8] +DEBUG 06-24 20:16:29 [manager.py:391] +ERROR 06-24 20:16:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 first_token_cost:210.8142375946045ms total_cost_time:210.83378791809082ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8791 prompt_cache_len:5151 prompt_cache_ratio:0.5859401660789444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 +DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.10476541519165039 s +INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.10655045509338379 s +DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=259723317891891195970569752834540119650, time:1750767389.5493307s req_ids:[8] +DEBUG 06-24 20:16:29 [manager.py:391] +DEBUG 06-24 20:16:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 39280.059 tokens/s +DEBUG 06-24 20:16:29 [stats.py:37] Avg prompt tokens throughput: 39271.203 tokens/s +DEBUG 06-24 20:16:29 [stats.py:37] Avg generate tokens throughput: 8.856 tokens/s +ERROR 06-24 20:16:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 first_token_cost:212.15486526489258ms total_cost_time:212.174654006958ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8792 prompt_cache_len:5151 prompt_cache_ratio:0.5858735213830755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 +DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.305072546005249 s +INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.30690622329711914 s +DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=40578652665155967973819427498442073667, time:1750767389.9553926s req_ids:[8] +DEBUG 06-24 20:16:29 [manager.py:391] +ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 first_token_cost:394.4094181060791ms total_cost_time:394.4284915924072ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8793 prompt_cache_len:5151 prompt_cache_ratio:0.5858068918457864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 +DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:30 [batch.py:51] router release req id 8 +INFO 06-24 20:16:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10373520851135254 s +INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10554218292236328 s +DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=339822657400394861750711768947973783395, time:1750767390.1610081s req_ids:[8] +DEBUG 06-24 20:16:30 [manager.py:391] +ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:207.21077919006348ms total_cost_time:207.2300910949707ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8794 prompt_cache_len:5151 prompt_cache_ratio:0.5857402774619058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 +DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10360860824584961 s +INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10528182983398438 s +DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=226272608757914475370867309104144677031, time:1750767390.372933s req_ids:[8] +DEBUG 06-24 20:16:30 [manager.py:391] +ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:209.2416286468506ms total_cost_time:209.26165580749512ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8795 prompt_cache_len:5151 prompt_cache_ratio:0.5856736782262649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 +DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10392498970031738 s +INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10568690299987793 s +DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=28835755361875341592108171298952713476, time:1750767390.5859728s req_ids:[8] +DEBUG 06-24 20:16:30 [manager.py:391] +ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:207.54051208496094ms total_cost_time:207.56006240844727ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8796 prompt_cache_len:5151 prompt_cache_ratio:0.5856070941336972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 +DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10471987724304199 s +INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10640454292297363 s +DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=307747629390181273451922907191906202392, time:1750767390.798153s req_ids:[8] +DEBUG 06-24 20:16:30 [manager.py:391] +ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:209.53011512756348ms total_cost_time:209.5503807067871ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8797 prompt_cache_len:5151 prompt_cache_ratio:0.5855405251790383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 +DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10442852973937988 s +INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10609745979309082 s +DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=304944213721777500798656876618708118014, time:1750767391.0128114s req_ids:[8] +DEBUG 06-24 20:16:31 [manager.py:391] +ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:208.85515213012695ms total_cost_time:208.87517929077148ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8798 prompt_cache_len:5151 prompt_cache_ratio:0.5854739713571266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 +DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.10422801971435547 s +INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10547852516174316 s +DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=103180211320769869563425088920794833794, time:1750767391.2275288s req_ids:[8] +DEBUG 06-24 20:16:31 [manager.py:391] +ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:170.21822929382324ms total_cost_time:170.23849487304688ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8799 prompt_cache_len:5151 prompt_cache_ratio:0.5854074326628026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 +DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.1033775806427002 s +INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10492801666259766 s +DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=193255226241230815017993224650949095534, time:1750767391.4004557s req_ids:[8] +DEBUG 06-24 20:16:31 [manager.py:391] +ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:206.32290840148926ms total_cost_time:206.3431739807129ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8800 prompt_cache_len:5151 prompt_cache_ratio:0.5853409090909091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 +DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.10471296310424805 s +INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10641598701477051 s +DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=255434347819822951052762368778676545409, time:1750767391.6112704s req_ids:[8] +DEBUG 06-24 20:16:31 [manager.py:391] +ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:364.3062114715576ms total_cost_time:364.32623863220215ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8801 prompt_cache_len:5151 prompt_cache_ratio:0.5852744006362913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 +DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.10463857650756836 s +INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10628962516784668 s +DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=297786129285146124572162186225087022901, time:1750767391.9783964s req_ids:[8] +DEBUG 06-24 20:16:31 [manager.py:391] +ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:205.72447776794434ms total_cost_time:205.74450492858887ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8802 prompt_cache_len:5151 prompt_cache_ratio:0.5852079072937969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 +DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10479140281677246 s +INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10651254653930664 s +DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=58166223878735851830397291476238206219, time:1750767392.1898077s req_ids:[8] +DEBUG 06-24 20:16:32 [manager.py:391] +ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:207.86762237548828ms total_cost_time:207.8862190246582ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8803 prompt_cache_len:5151 prompt_cache_ratio:0.5851414290582756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 +DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10389447212219238 s +INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10556435585021973 s +DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=187962914697178080270694169678475419136, time:1750767392.4015887s req_ids:[8] +DEBUG 06-24 20:16:32 [manager.py:391] +ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:209.34534072875977ms total_cost_time:209.3651294708252ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8804 prompt_cache_len:5151 prompt_cache_ratio:0.5850749659245797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 +DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10369753837585449 s +INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10545635223388672 s +DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=236678181613034067494605680322394829588, time:1750767392.6157513s req_ids:[8] +DEBUG 06-24 20:16:32 [manager.py:391] +ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:211.19236946105957ms total_cost_time:211.2114429473877ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8805 prompt_cache_len:5151 prompt_cache_ratio:0.5850085178875639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 +DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10386109352111816 s +INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10544943809509277 s +DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=210340224440324938728608345569770828259, time:1750767392.829011s req_ids:[8] +DEBUG 06-24 20:16:32 [manager.py:391] +ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:191.4525032043457ms total_cost_time:191.47157669067383ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8806 prompt_cache_len:5151 prompt_cache_ratio:0.584942084942085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 +DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10380196571350098 s +INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10571694374084473 s +DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=308353486822531169101062417156737327353, time:1750767393.024687s req_ids:[8] +DEBUG 06-24 20:16:33 [manager.py:391] +ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:212.22686767578125ms total_cost_time:212.24617958068848ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8807 prompt_cache_len:5151 prompt_cache_ratio:0.5848756670830022 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 +DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.1044614315032959 s +INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10650300979614258 s +DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=84630901579156035836851274580048383936, time:1750767393.2395406s req_ids:[8] +DEBUG 06-24 20:16:33 [manager.py:391] +ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:205.82818984985352ms total_cost_time:205.84678649902344ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8808 prompt_cache_len:5151 prompt_cache_ratio:0.5848092643051771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 +DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10492753982543945 s +INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10693907737731934 s +DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=276147475871266061044697032850987544470, time:1750767393.4508243s req_ids:[8] +DEBUG 06-24 20:16:33 [manager.py:391] +ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:208.91737937927246ms total_cost_time:208.93573760986328ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:8809 prompt_cache_len:5151 prompt_cache_ratio:0.5847428766034737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 +DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:33 [batch.py:51] router release req id 8 +INFO 06-24 20:16:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10495209693908691 s +INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10706472396850586 s +DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=137073501901715090507185354789866282187, time:1750767393.6641302s req_ids:[8] +DEBUG 06-24 20:16:33 [manager.py:391] +ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:213.59801292419434ms total_cost_time:213.61637115478516ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:8810 prompt_cache_len:5151 prompt_cache_ratio:0.5846765039727583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 +DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10477375984191895 s +INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10671091079711914 s +DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=19143163195591878355582258715949879318, time:1750767393.8798652s req_ids:[8] +DEBUG 06-24 20:16:33 [manager.py:391] +ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:365.9818172454834ms total_cost_time:366.0016059875488ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8811 prompt_cache_len:5151 prompt_cache_ratio:0.5846101464078992 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 +DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.10363912582397461 s +INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10549044609069824 s +DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=127666171004250215499789485444500759210, time:1750767394.2478504s req_ids:[8] +DEBUG 06-24 20:16:34 [manager.py:391] +ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:206.13598823547363ms total_cost_time:206.15601539611816ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8812 prompt_cache_len:5151 prompt_cache_ratio:0.5845438039037676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 +DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.10422730445861816 s +INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10622835159301758 s +DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=282014173667933612999771397970384953737, time:1750767394.4615579s req_ids:[8] +DEBUG 06-24 20:16:34 [manager.py:391] +DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:208.03332328796387ms total_cost_time:208.05740356445312ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:8813 prompt_cache_len:5151 prompt_cache_ratio:0.5844774764552366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 +DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.1037750244140625 s +INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10574221611022949 s +DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=70106468550658532288802452627722681366, time:1750767394.6753662s req_ids:[8] +DEBUG 06-24 20:16:34 [manager.py:391] +ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:209.07902717590332ms total_cost_time:209.09953117370605ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8814 prompt_cache_len:5151 prompt_cache_ratio:0.5844111640571817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 +DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.10462236404418945 s +INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10663843154907227 s +DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=204993705148776029419516887194964731953, time:1750767394.8905427s req_ids:[8] +DEBUG 06-24 20:16:34 [manager.py:391] +ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:215.15488624572754ms total_cost_time:215.17395973205566ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8815 prompt_cache_len:5151 prompt_cache_ratio:0.584344866704481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 +DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10475730895996094 s +INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.1067037582397461 s +DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=299625396579342395573599578292802477759, time:1750767395.104231s req_ids:[8] +DEBUG 06-24 20:16:35 [manager.py:391] +ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:205.27315139770508ms total_cost_time:205.2912712097168ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:8816 prompt_cache_len:5151 prompt_cache_ratio:0.5842785843920145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 +DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10374069213867188 s +INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.10559892654418945 s +DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=283674224862485783665140796794994506073, time:1750767395.3205547s req_ids:[8] +DEBUG 06-24 20:16:35 [manager.py:391] +ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:212.33010292053223ms total_cost_time:212.34989166259766ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8817 prompt_cache_len:5151 prompt_cache_ratio:0.5842123171146648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 +DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10499453544616699 s +INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.10706472396850586 s +DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=170725035517218595382162291972276622406, time:1750767395.5360687s req_ids:[8] +DEBUG 06-24 20:16:35 [manager.py:391] +ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:214.17593955993652ms total_cost_time:214.19429779052734ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:8818 prompt_cache_len:5151 prompt_cache_ratio:0.5841460648673169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 +DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10378313064575195 s +INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.10566186904907227 s +DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=131608194390991472688069427284636497936, time:1750767395.7624066s req_ids:[8] +DEBUG 06-24 20:16:35 [manager.py:391] +ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:228.1937599182129ms total_cost_time:228.21545600891113ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8819 prompt_cache_len:5151 prompt_cache_ratio:0.5840798276448577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 +DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.3063206672668457 s +INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.3084678649902344 s +DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=259674633504515716307373476339123297260, time:1750767396.1812396s req_ids:[8] +DEBUG 06-24 20:16:36 [manager.py:391] +ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:409.5134735107422ms total_cost_time:409.5327854156494ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8820 prompt_cache_len:5151 prompt_cache_ratio:0.5840136054421768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 +DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.10466265678405762 s +INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.10669922828674316 s +DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=245515372743785362468523164696002866630, time:1750767396.399474s req_ids:[8] +DEBUG 06-24 20:16:36 [manager.py:391] +ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:214.45226669311523ms total_cost_time:214.47110176086426ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8821 prompt_cache_len:5151 prompt_cache_ratio:0.5839473982541662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 +DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.10449433326721191 s +INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.10635828971862793 s +DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=283473949696293549861105790889982474077, time:1750767396.6155508s req_ids:[8] +DEBUG 06-24 20:16:36 [manager.py:391] +ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:212.27097511291504ms total_cost_time:212.29004859924316ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8822 prompt_cache_len:5151 prompt_cache_ratio:0.5838812060757198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 +DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.10393285751342773 s +INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.10576295852661133 s +DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=162770612379230473001523855757170336381, time:1750767396.8295727s req_ids:[8] +DEBUG 06-24 20:16:36 [manager.py:391] +ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:206.30908012390137ms total_cost_time:206.3279151916504ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8823 prompt_cache_len:5151 prompt_cache_ratio:0.5838150289017341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 +DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10371184349060059 s +INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10552716255187988 s +DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=109590026684623195051473853864703762715, time:1750767397.0416324s req_ids:[8] +DEBUG 06-24 20:16:37 [manager.py:391] +ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:207.34333992004395ms total_cost_time:207.36265182495117ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8824 prompt_cache_len:5151 prompt_cache_ratio:0.5837488667271079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 +DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10460257530212402 s +INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10644841194152832 s +DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=114394081305104645478759791613039329994, time:1750767397.2720864s req_ids:[8] +DEBUG 06-24 20:16:37 [manager.py:391] +ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:229.15387153625488ms total_cost_time:229.17413711547852ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8825 prompt_cache_len:5151 prompt_cache_ratio:0.5836827195467422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 +DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10352158546447754 s +INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10539937019348145 s +DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=143263901701319551427814554020459952277, time:1750767397.4907007s req_ids:[8] +DEBUG 06-24 20:16:37 [manager.py:391] +ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:209.64646339416504ms total_cost_time:209.66577529907227ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8826 prompt_cache_len:5151 prompt_cache_ratio:0.5836165873555405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 +DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10483193397521973 s +INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10669445991516113 s +DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=136181734587534091121086275678260560741, time:1750767397.7017734s req_ids:[8] +DEBUG 06-24 20:16:37 [manager.py:391] +ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:208.2996368408203ms total_cost_time:208.31823348999023ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8827 prompt_cache_len:5151 prompt_cache_ratio:0.5835504701484083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 +DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10407733917236328 s +INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10521602630615234 s +DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=27048329408268098867669136642947898749, time:1750767397.9158554s req_ids:[8] +DEBUG 06-24 20:16:37 [manager.py:391] +ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:365.94414710998535ms total_cost_time:365.9639358520508ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8828 prompt_cache_len:5151 prompt_cache_ratio:0.5834843679202537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 +DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10360574722290039 s +INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.10550236701965332 s +DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=15049161101907821679235322609179138380, time:1750767398.2821703s req_ids:[8] +DEBUG 06-24 20:16:38 [manager.py:391] +ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:198.23265075683594ms total_cost_time:198.25148582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8829 prompt_cache_len:5151 prompt_cache_ratio:0.5834182806659871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 +DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10488224029541016 s +INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.1067347526550293 s +DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=35169564321805584275233690160675784002, time:1750767398.4927359s req_ids:[8] +DEBUG 06-24 20:16:38 [manager.py:391] +ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:211.27963066101074ms total_cost_time:211.29894256591797ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8830 prompt_cache_len:5151 prompt_cache_ratio:0.5833522083805209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 +DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10374879837036133 s +INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.10560226440429688 s +DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=210542581009229946575912674407740530007, time:1750767398.7038364s req_ids:[8] +DEBUG 06-24 20:16:38 [manager.py:391] +ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:207.5819969177246ms total_cost_time:207.60154724121094ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8831 prompt_cache_len:5151 prompt_cache_ratio:0.5832861510587702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 +DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10487771034240723 s +INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.10689735412597656 s +DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=131167741982988970437691125072495916103, time:1750767398.91603s req_ids:[8] +DEBUG 06-24 20:16:38 [manager.py:391] +ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:208.58240127563477ms total_cost_time:208.6031436920166ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8832 prompt_cache_len:5151 prompt_cache_ratio:0.5832201086956522 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 +DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10381484031677246 s +INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10565853118896484 s +DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=223353594043057109881424729889617159111, time:1750767399.1293898s req_ids:[8] +DEBUG 06-24 20:16:39 [manager.py:391] +ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:209.05065536499023ms total_cost_time:209.06925201416016ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8833 prompt_cache_len:5151 prompt_cache_ratio:0.5831540812860863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 +DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10398173332214355 s +INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10587263107299805 s +DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=264577059700914300962971012242922819357, time:1750767399.3437293s req_ids:[8] +DEBUG 06-24 20:16:39 [manager.py:391] +ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:209.2435359954834ms total_cost_time:209.26427841186523ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8834 prompt_cache_len:5151 prompt_cache_ratio:0.5830880688249943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 +DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10385608673095703 s +INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10574460029602051 s +DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=28654973021796738562290714145069808892, time:1750767399.5623739s req_ids:[8] +DEBUG 06-24 20:16:39 [manager.py:391] +DEBUG 06-24 20:16:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 37859.789 tokens/s +DEBUG 06-24 20:16:39 [stats.py:37] Avg prompt tokens throughput: 37851.200 tokens/s +DEBUG 06-24 20:16:39 [stats.py:37] Avg generate tokens throughput: 8.589 tokens/s +ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:217.34976768493652ms total_cost_time:217.37146377563477ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8835 prompt_cache_len:5151 prompt_cache_ratio:0.5830220713073005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 +DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10383081436157227 s +INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10582327842712402 s +DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=246337646210155313187594556677981933975, time:1750767399.7782652s req_ids:[8] +DEBUG 06-24 20:16:39 [manager.py:391] +ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:212.9356861114502ms total_cost_time:212.95642852783203ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8836 prompt_cache_len:5151 prompt_cache_ratio:0.5829560887279311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 +DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10350203514099121 s +INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10544085502624512 s +DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=125029425744185041451188223092067272870, time:1750767399.9932396s req_ids:[8] +DEBUG 06-24 20:16:39 [manager.py:391] +ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:211.06958389282227ms total_cost_time:211.0888957977295ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8837 prompt_cache_len:5151 prompt_cache_ratio:0.5828901210818151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 +DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:40 [manager.py:224] router recive req id 8 cost time 0.10472512245178223 s +INFO 06-24 20:16:40 [manager.py:68] detokenization recv req id 8 cost time 0.10666632652282715 s +DEBUG 06-24 20:16:40 [manager.py:391] Prefill Batch: batch_id=108739890810296384291883431736135630121, time:1750767400.2146661s req_ids:[8] +DEBUG 06-24 20:16:40 [manager.py:391] +ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:372.6067543029785ms total_cost_time:372.62678146362305ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8838 prompt_cache_len:5151 prompt_cache_ratio:0.5828241683638832 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 +DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:40 [manager.py:224] router recive req id 8 cost time 0.10480642318725586 s +INFO 06-24 20:16:40 [manager.py:68] detokenization recv req id 8 cost time 0.10683774948120117 s +DEBUG 06-24 20:16:40 [manager.py:391] Prefill Batch: batch_id=137182879272665246365222353747197067933, time:1750767400.5910578s req_ids:[8] +DEBUG 06-24 20:16:40 [manager.py:391] +ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:223.97947311401367ms total_cost_time:223.9995002746582ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8839 prompt_cache_len:5151 prompt_cache_ratio:0.582758230569069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 +DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:40 [manager.py:224] router recive req id 8 cost time 0.10363221168518066 s +INFO 06-24 20:16:40 [manager.py:68] detokenization recv req id 8 cost time 0.10560035705566406 s +DEBUG 06-24 20:16:40 [manager.py:391] Prefill Batch: batch_id=43623040478605321900639345151403157762, time:1750767400.8228626s req_ids:[8] +DEBUG 06-24 20:16:40 [manager.py:391] +ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:215.90185165405273ms total_cost_time:215.92307090759277ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8840 prompt_cache_len:5151 prompt_cache_ratio:0.5826923076923077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 +DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.10401391983032227 s +INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10594487190246582 s +DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=162256754651009415115314769247972693438, time:1750767401.0461504s req_ids:[8] +DEBUG 06-24 20:16:41 [manager.py:391] +ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:219.75326538085938ms total_cost_time:219.7725772857666ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8841 prompt_cache_len:5151 prompt_cache_ratio:0.5826263997285375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 +DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.1053001880645752 s +INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10736322402954102 s +DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=254538388498492174788304656452027119200, time:1750767401.2610793s req_ids:[8] +DEBUG 06-24 20:16:41 [manager.py:391] +ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:208.22930335998535ms total_cost_time:208.2505226135254ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8842 prompt_cache_len:5151 prompt_cache_ratio:0.5825605066726984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 +DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.1048133373260498 s +INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10672450065612793 s +DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=7716106725998897597704434543669567369, time:1750767401.4738746s req_ids:[8] +DEBUG 06-24 20:16:41 [manager.py:391] +ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:212.8303050994873ms total_cost_time:212.84937858581543ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8843 prompt_cache_len:5151 prompt_cache_ratio:0.5824946285197331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 +DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.10459351539611816 s +INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10648560523986816 s +DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=294638686245784400283276032346098376840, time:1750767401.6894193s req_ids:[8] +DEBUG 06-24 20:16:41 [manager.py:391] +DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:210.49261093139648ms total_cost_time:210.51359176635742ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8844 prompt_cache_len:5151 prompt_cache_ratio:0.5824287652645862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 +DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.1035163402557373 s +INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10536384582519531 s +DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=334334543676120450436359452421978621661, time:1750767401.9036028s req_ids:[8] +DEBUG 06-24 20:16:41 [manager.py:391] +ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:212.61024475097656ms total_cost_time:212.62884140014648ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8845 prompt_cache_len:5151 prompt_cache_ratio:0.5823629169022047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 +DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.10465669631958008 s +INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.10654997825622559 s +DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=201531809068318213141755797680642456468, time:1750767402.1179054s req_ids:[8] +DEBUG 06-24 20:16:42 [manager.py:391] +ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:205.92212677001953ms total_cost_time:205.94191551208496ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8846 prompt_cache_len:5151 prompt_cache_ratio:0.5822970834275378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 +DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.3053457736968994 s +INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.3073160648345947 s +DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=15051369468374919355692436498628890880, time:1750767402.5313802s req_ids:[8] +DEBUG 06-24 20:16:42 [manager.py:391] +ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:411.0136032104492ms total_cost_time:411.03267669677734ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8847 prompt_cache_len:5151 prompt_cache_ratio:0.5822312648355374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 +DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.10471534729003906 s +INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.1066884994506836 s +DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=63887917890302927817416729281654573746, time:1750767402.747182s req_ids:[8] +DEBUG 06-24 20:16:42 [manager.py:391] +ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:208.4176540374756ms total_cost_time:208.4369659423828ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8848 prompt_cache_len:5151 prompt_cache_ratio:0.5821654611211573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 +DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.10439324378967285 s +INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.10666942596435547 s +DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=216554087575526819063725601505553162961, time:1750767402.9593923s req_ids:[8] +DEBUG 06-24 20:16:42 [manager.py:391] +ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:155.1969051361084ms total_cost_time:155.23838996887207ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8849 prompt_cache_len:5151 prompt_cache_ratio:0.5820996722793536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 +DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.10394620895385742 s +INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.10590600967407227 s +DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=76743676554788380930598546864352706572, time:1750767403.107903s req_ids:[8] +DEBUG 06-24 20:16:43 [manager.py:391] +DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:180.8948516845703ms total_cost_time:180.91440200805664ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8850 prompt_cache_len:5151 prompt_cache_ratio:0.5820338983050848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 +DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.10469651222229004 s +INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.1066141128540039 s +DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=152393125699855840421739180105974931880, time:1750767403.3042023s req_ids:[8] +DEBUG 06-24 20:16:43 [manager.py:391] +ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:203.460693359375ms total_cost_time:203.48191261291504ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8851 prompt_cache_len:5151 prompt_cache_ratio:0.5819681391933115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 +DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.1046745777130127 s +INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.10648918151855469 s +DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=94896021534387880590814201143537496803, time:1750767403.5137482s req_ids:[8] +DEBUG 06-24 20:16:43 [manager.py:391] +ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:207.89837837219238ms total_cost_time:207.9179286956787ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8852 prompt_cache_len:5151 prompt_cache_ratio:0.5819023949389969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 +DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.10469985008239746 s +INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.1065220832824707 s +DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=280050071233375162656695273907039330622, time:1750767403.726173s req_ids:[8] +DEBUG 06-24 20:16:43 [manager.py:391] +ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:210.62850952148438ms total_cost_time:210.6485366821289ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8853 prompt_cache_len:5151 prompt_cache_ratio:0.5818366655371061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 +DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.1048436164855957 s +INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.10674571990966797 s +DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=302825814543166830955819650169748163577, time:1750767403.9407737s req_ids:[8] +DEBUG 06-24 20:16:43 [manager.py:391] +ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:209.80215072631836ms total_cost_time:209.8228931427002ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8854 prompt_cache_len:5151 prompt_cache_ratio:0.5817709509826068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 +DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.1039879322052002 s +INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.10601520538330078 s +DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=30968407067180982044031480715023409910, time:1750767404.155743s req_ids:[8] +DEBUG 06-24 20:16:44 [manager.py:391] +ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:372.23243713378906ms total_cost_time:372.2541332244873ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8855 prompt_cache_len:5151 prompt_cache_ratio:0.5817052512704687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 +DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.10376596450805664 s +INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.1055753231048584 s +DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=13431603512594475036307027071388676733, time:1750767404.5293074s req_ids:[8] +DEBUG 06-24 20:16:44 [manager.py:391] +ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:195.4789161682129ms total_cost_time:195.4977512359619ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8856 prompt_cache_len:5151 prompt_cache_ratio:0.5816395663956639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 +DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.10516762733459473 s +INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.10706400871276855 s +DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=194646685453142923901148799521286369227, time:1750767404.729637s req_ids:[8] +DEBUG 06-24 20:16:44 [manager.py:391] +ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:205.2299976348877ms total_cost_time:205.24859428405762ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8857 prompt_cache_len:5151 prompt_cache_ratio:0.581573896353167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 +DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.10390806198120117 s +INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.10580658912658691 s +DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=265369481400982825314228131935786264454, time:1750767404.9390323s req_ids:[8] +DEBUG 06-24 20:16:44 [manager.py:391] +ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:211.0896110534668ms total_cost_time:211.11011505126953ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8858 prompt_cache_len:5151 prompt_cache_ratio:0.5815082411379544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 +DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10492920875549316 s +INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.10681581497192383 s +DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=140419740045284432005756807358752268198, time:1750767405.153852s req_ids:[8] +DEBUG 06-24 20:16:45 [manager.py:391] +ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:207.29613304138184ms total_cost_time:207.31592178344727ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8859 prompt_cache_len:5151 prompt_cache_ratio:0.5814426007450051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 +DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10389375686645508 s +INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.1057901382446289 s +DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=241322000246597304240476955131738362531, time:1750767405.371206s req_ids:[8] +DEBUG 06-24 20:16:45 [manager.py:391] +ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:215.00182151794434ms total_cost_time:215.02137184143066ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8860 prompt_cache_len:5151 prompt_cache_ratio:0.5813769751693002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 +DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10375308990478516 s +INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.10569286346435547 s +DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=194202088129139056798334056657092975400, time:1750767405.5869563s req_ids:[8] +DEBUG 06-24 20:16:45 [manager.py:391] +ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:214.33568000793457ms total_cost_time:214.35546875ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8861 prompt_cache_len:5151 prompt_cache_ratio:0.5813113644058233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 +DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10451149940490723 s +INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.10636067390441895 s +DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=284682553372056024422139272034783985704, time:1750767405.8031723s req_ids:[8] +DEBUG 06-24 20:16:45 [manager.py:391] +ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:206.69317245483398ms total_cost_time:206.71319961547852ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8862 prompt_cache_len:5151 prompt_cache_ratio:0.5812457684495599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 +DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10395431518554688 s +INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.10574555397033691 s +DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=268323881653810775414952847802068330695, time:1750767406.0159044s req_ids:[8] +DEBUG 06-24 20:16:46 [manager.py:391] +ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:209.05113220214844ms total_cost_time:209.07211303710938ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8863 prompt_cache_len:5151 prompt_cache_ratio:0.5811801872954981 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 +DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:46 [manager.py:224] router recive req id 8 cost time 0.10363435745239258 s +INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.1055450439453125 s +DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=93669106560416661597049720443583595761, time:1750767406.228901s req_ids:[8] +DEBUG 06-24 20:16:46 [manager.py:391] +ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:211.69805526733398ms total_cost_time:211.72618865966797ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8864 prompt_cache_len:5151 prompt_cache_ratio:0.5811146209386282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 +DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:46 [manager.py:224] router recive req id 8 cost time 0.10480451583862305 s +INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.1066749095916748 s +DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=79452523022682796777905236322953823783, time:1750767406.4440258s req_ids:[8] +DEBUG 06-24 20:16:46 [manager.py:391] +ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:370.0978755950928ms total_cost_time:370.1174259185791ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8865 prompt_cache_len:5151 prompt_cache_ratio:0.5810490693739425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 +DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:46 [manager.py:224] router recive req id 8 cost time 0.10348701477050781 s +INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.10527491569519043 s +DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=103297020694432281771462674145471856787, time:1750767406.8150291s req_ids:[8] +DEBUG 06-24 20:16:46 [manager.py:391] +ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:198.87638092041016ms total_cost_time:198.89521598815918ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8866 prompt_cache_len:5151 prompt_cache_ratio:0.5809835325964359 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 +INFO 06-24 20:16:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10459518432617188 s +INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.1064763069152832 s +DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=59511156612033797311193696909033272538, time:1750767407.027178s req_ids:[8] +DEBUG 06-24 20:16:47 [manager.py:391] +ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:211.17281913757324ms total_cost_time:211.19308471679688ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8867 prompt_cache_len:5151 prompt_cache_ratio:0.5809180106011053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 +DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10391068458557129 s +INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.10575056076049805 s +DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=65216870026988162575463225909234083661, time:1750767407.2381s req_ids:[8] +DEBUG 06-24 20:16:47 [manager.py:391] +ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:211.26937866210938ms total_cost_time:211.289644241333ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8868 prompt_cache_len:5151 prompt_cache_ratio:0.5808525033829499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 +DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10445523262023926 s +INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.1061551570892334 s +DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=101550760431193320321205862337742963104, time:1750767407.4532542s req_ids:[8] +DEBUG 06-24 20:16:47 [manager.py:391] +ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:209.3801498413086ms total_cost_time:209.39970016479492ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8869 prompt_cache_len:5151 prompt_cache_ratio:0.5807870109369715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 +DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10376787185668945 s +INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.10548567771911621 s +DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=134766213092950017905986676544824762022, time:1750767407.670551s req_ids:[8] +DEBUG 06-24 20:16:47 [manager.py:391] +ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:206.47668838500977ms total_cost_time:206.4957618713379ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8870 prompt_cache_len:5151 prompt_cache_ratio:0.5807215332581737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 +DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10480737686157227 s +INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.10650205612182617 s +DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=120576872507559406119096675302525861864, time:1750767407.8770566s req_ids:[8] +DEBUG 06-24 20:16:47 [manager.py:391] +ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:205.39379119873047ms total_cost_time:205.4128646850586ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8871 prompt_cache_len:5151 prompt_cache_ratio:0.5806560703415624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 +DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.10469222068786621 s +INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.10646343231201172 s +DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=177647161323810744699679365650847905483, time:1750767408.0991855s req_ids:[8] +DEBUG 06-24 20:16:48 [manager.py:391] +ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:226.17149353027344ms total_cost_time:226.19128227233887ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8872 prompt_cache_len:5151 prompt_cache_ratio:0.5805906221821461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 +DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.1045684814453125 s +INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.10635733604431152 s +DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=209578003027673564330038902945213060539, time:1750767408.3167858s req_ids:[8] +DEBUG 06-24 20:16:48 [manager.py:391] +ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 first_token_cost:376.16491317749023ms total_cost_time:376.1858940124512ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8873 prompt_cache_len:5151 prompt_cache_ratio:0.5805251887749352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 +DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.10478472709655762 s +INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.10660290718078613 s +DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=230569964652894574798238065813458826228, time:1750767408.6950932s req_ids:[8] +DEBUG 06-24 20:16:48 [manager.py:391] +ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 first_token_cost:202.5277614593506ms total_cost_time:202.54778861999512ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8874 prompt_cache_len:5151 prompt_cache_ratio:0.5804597701149425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 +DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.10381793975830078 s +INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.1056816577911377 s +DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=238393472863888964353804205166056023551, time:1750767408.9107988s req_ids:[8] +DEBUG 06-24 20:16:48 [manager.py:391] +DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 first_token_cost:214.1425609588623ms total_cost_time:214.202880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8875 prompt_cache_len:5151 prompt_cache_ratio:0.5803943661971831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 +DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10945320129394531 s +INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.11137104034423828 s +DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=65750811711056253505751207091851935002, time:1750767409.1231465s req_ids:[8] +DEBUG 06-24 20:16:49 [manager.py:391] +ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:203.7336826324463ms total_cost_time:203.7956714630127ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8876 prompt_cache_len:5151 prompt_cache_ratio:0.5803289770166742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 +DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.1078953742980957 s +INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.10978126525878906 s +DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=23083063524322175496351897776305135026, time:1750767409.332368s req_ids:[8] +DEBUG 06-24 20:16:49 [manager.py:391] +ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:209.57422256469727ms total_cost_time:209.63406562805176ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8877 prompt_cache_len:5151 prompt_cache_ratio:0.5802636025684352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 +DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10747480392456055 s +INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.10945940017700195 s +DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=254825678638041634889819029363112995762, time:1750767409.547216s req_ids:[8] +DEBUG 06-24 20:16:49 [manager.py:391] +ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:16:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 37830.726 tokens/s +DEBUG 06-24 20:16:49 [stats.py:37] Avg prompt tokens throughput: 37822.086 tokens/s +DEBUG 06-24 20:16:49 [stats.py:37] Avg generate tokens throughput: 8.640 tokens/s +INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:202.91900634765625ms total_cost_time:202.98004150390625ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8878 prompt_cache_len:5151 prompt_cache_ratio:0.5801982428474882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 +DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10749506950378418 s +INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.10941147804260254 s +DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=220977894733042176549695653389239948043, time:1750767409.757163s req_ids:[8] +DEBUG 06-24 20:16:49 [manager.py:391] +ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:204.9703598022461ms total_cost_time:205.0302028656006ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8879 prompt_cache_len:5151 prompt_cache_ratio:0.5801328978488568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 +DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10914397239685059 s +INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.11113762855529785 s +DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=102342554391041541289847739531866024393, time:1750767409.9684293s req_ids:[8] +DEBUG 06-24 20:16:49 [manager.py:391] +ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:211.97891235351562ms total_cost_time:212.04090118408203ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8880 prompt_cache_len:5151 prompt_cache_ratio:0.5800675675675676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 +DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:50 [manager.py:224] router recive req id 8 cost time 0.10838770866394043 s +INFO 06-24 20:16:50 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s +DEBUG 06-24 20:16:50 [manager.py:391] Prefill Batch: batch_id=77379161617885543387170311483269266961, time:1750767410.1845493s req_ids:[8] +DEBUG 06-24 20:16:50 [manager.py:391] +ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:198.43196868896484ms total_cost_time:198.49181175231934ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8881 prompt_cache_len:5151 prompt_cache_ratio:0.5800022519986489 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 +DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:50 [manager.py:224] router recive req id 8 cost time 0.31032609939575195 s +INFO 06-24 20:16:50 [manager.py:68] detokenization recv req id 8 cost time 0.31241750717163086 s +DEBUG 06-24 20:16:50 [manager.py:391] Prefill Batch: batch_id=161146434150085977531548893645227060842, time:1750767410.5975466s req_ids:[8] +DEBUG 06-24 20:16:50 [manager.py:391] +ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:419.28863525390625ms total_cost_time:419.34871673583984ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8882 prompt_cache_len:5151 prompt_cache_ratio:0.5799369511371313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 +DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:50 [manager.py:224] router recive req id 8 cost time 0.10771846771240234 s +INFO 06-24 20:16:50 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s +DEBUG 06-24 20:16:50 [manager.py:391] Prefill Batch: batch_id=223533037483694452704536624846233538098, time:1750767410.8171737s req_ids:[8] +DEBUG 06-24 20:16:50 [manager.py:391] +ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:205.69992065429688ms total_cost_time:205.76190948486328ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8883 prompt_cache_len:5151 prompt_cache_ratio:0.579871664978048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 +DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10814189910888672 s +INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.1100759506225586 s +DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=325960077848963605311963975656039667236, time:1750767411.0276473s req_ids:[8] +DEBUG 06-24 20:16:51 [manager.py:391] +ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:204.23030853271484ms total_cost_time:204.29039001464844ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8884 prompt_cache_len:5151 prompt_cache_ratio:0.579806393516434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 +DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10739517211914062 s +INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.10924100875854492 s +DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=72846018538006314650065173458323404836, time:1750767411.2403064s req_ids:[8] +DEBUG 06-24 20:16:51 [manager.py:391] +ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:208.9991569519043ms total_cost_time:209.0590000152588ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8885 prompt_cache_len:5151 prompt_cache_ratio:0.579741136747327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 +DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.1084601879119873 s +INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s +DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=254973895708894659982170889728110681850, time:1750767411.4542308s req_ids:[8] +DEBUG 06-24 20:16:51 [manager.py:391] +ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:208.8949680328369ms total_cost_time:208.9557647705078ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8886 prompt_cache_len:5151 prompt_cache_ratio:0.5796758946657664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 +DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s +INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.11073422431945801 s +DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=65266010499210479839951921874118006657, time:1750767411.688298s req_ids:[8] +DEBUG 06-24 20:16:51 [manager.py:391] +ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:230.24630546569824ms total_cost_time:230.30710220336914ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8887 prompt_cache_len:5151 prompt_cache_ratio:0.5796106672667942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 +DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10822081565856934 s +INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s +DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=26305488056991098575544296758668740659, time:1750767411.9046948s req_ids:[8] +DEBUG 06-24 20:16:51 [manager.py:391] +ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:206.12740516662598ms total_cost_time:206.2089443206787ms,out_token_counter:1 mean_per_token_cost_time: 0.08153915405273438ms prompt_token_num:8888 prompt_cache_len:5151 prompt_cache_ratio:0.5795454545454546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 +DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.10830187797546387 s +INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.11014938354492188 s +DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=198116328097361737523860085723017705778, time:1750767412.1289146s req_ids:[8] +DEBUG 06-24 20:16:52 [manager.py:391] +ERROR 06-24 20:16:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:218.91450881958008ms total_cost_time:218.97339820861816ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:8889 prompt_cache_len:5151 prompt_cache_ratio:0.5794802564967938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 +DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.1072239875793457 s +INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.10906600952148438 s +DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=198520961871868292016833177284348416371, time:1750767412.3547006s req_ids:[8] +DEBUG 06-24 20:16:52 [manager.py:391] +ERROR 06-24 20:16:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 first_token_cost:431.73837661743164ms total_cost_time:431.7595958709717ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8890 prompt_cache_len:5151 prompt_cache_ratio:0.5794150731158605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 +DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.10363602638244629 s +INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.10537362098693848 s +DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=71144932878219058634035291584882570372, time:1750767412.7787616s req_ids:[8] +DEBUG 06-24 20:16:52 [manager.py:391] +ERROR 06-24 20:16:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 first_token_cost:184.03267860412598ms total_cost_time:184.0522289276123ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8891 prompt_cache_len:5151 prompt_cache_ratio:0.5793499043977055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 +DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.10348629951477051 s +INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.10547852516174316 s +DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=153600737672794742458455272146577399276, time:1750767412.9681041s req_ids:[8] +DEBUG 06-24 20:16:52 [manager.py:391] +ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 first_token_cost:201.71713829040527ms total_cost_time:201.7369270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8892 prompt_cache_len:5151 prompt_cache_ratio:0.5792847503373819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 +DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10386228561401367 s +INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10591769218444824 s +DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=327409902056847920467439628655131409168, time:1750767413.1867535s req_ids:[8] +DEBUG 06-24 20:16:53 [manager.py:391] +INFO 06-24 20:16:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:16:53 [statics_utils.py:24] mean first cost: 228.63510679831708 ms +INFO 06-24 20:16:53 [statics_utils.py:24] mean per token cost: 0.07553394364940436 ms +INFO 06-24 20:16:53 [manager.py:620] left req id 8can release False refcount 4 +ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:220.98207473754883ms total_cost_time:221.00257873535156ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8893 prompt_cache_len:5151 prompt_cache_ratio:0.5792196109299449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 +DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10386157035827637 s +INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10567188262939453 s +DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=126333554365285730588813938880852083247, time:1750767413.4021406s req_ids:[8] +DEBUG 06-24 20:16:53 [manager.py:391] +ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:211.01617813110352ms total_cost_time:211.03739738464355ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8894 prompt_cache_len:5151 prompt_cache_ratio:0.579154486170452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 +DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10481858253479004 s +INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10680890083312988 s +DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=141236306381527821731829417056961246032, time:1750767413.61614s req_ids:[8] +DEBUG 06-24 20:16:53 [manager.py:391] +ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:208.6324691772461ms total_cost_time:208.65154266357422ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8895 prompt_cache_len:5151 prompt_cache_ratio:0.5790893760539629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 +DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10359716415405273 s +INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10563015937805176 s +DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=283248732016914005400940595114035059812, time:1750767413.830578s req_ids:[8] +DEBUG 06-24 20:16:53 [manager.py:391] +ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:212.10741996765137ms total_cost_time:212.127685546875ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8896 prompt_cache_len:5151 prompt_cache_ratio:0.5790242805755396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 +DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10489249229431152 s +INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.10672855377197266 s +DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=218385129957549424183202662383248988496, time:1750767414.0442324s req_ids:[8] +DEBUG 06-24 20:16:54 [manager.py:391] +ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:206.4380645751953ms total_cost_time:206.45713806152344ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8897 prompt_cache_len:5151 prompt_cache_ratio:0.5789591997302461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 +DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10338759422302246 s +INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.10506391525268555 s +DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=191428750527290991069837316898020542357, time:1750767414.2572935s req_ids:[8] +DEBUG 06-24 20:16:54 [manager.py:391] +ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:205.6427001953125ms total_cost_time:205.66320419311523ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8898 prompt_cache_len:5151 prompt_cache_ratio:0.578894133513149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 +DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10454893112182617 s +INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.10633492469787598 s +DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=118029275559865095275624757838938465751, time:1750767414.4665225s req_ids:[8] +DEBUG 06-24 20:16:54 [manager.py:391] +ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:376.0397434234619ms total_cost_time:376.06143951416016ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8899 prompt_cache_len:5151 prompt_cache_ratio:0.5788290819193168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 +DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10453557968139648 s +INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.1064293384552002 s +DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=52128857476596176353068296351094791082, time:1750767414.8445861s req_ids:[8] +DEBUG 06-24 20:16:54 [manager.py:391] +ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:199.36490058898926ms total_cost_time:199.3856430053711ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8900 prompt_cache_len:5151 prompt_cache_ratio:0.5787640449438203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 +DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.10363078117370605 s +INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.10544633865356445 s +INFO 06-24 20:16:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=33406292991481172648410339890085150551, time:1750767415.0504093s req_ids:[8] +DEBUG 06-24 20:16:55 [manager.py:391] +ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:208.21762084960938ms total_cost_time:208.2383632659912ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8901 prompt_cache_len:5151 prompt_cache_ratio:0.5786990225817323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 +DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.1047217845916748 s +INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.1064906120300293 s +DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=93803866099495995578927176410997910072, time:1750767415.264898s req_ids:[8] +DEBUG 06-24 20:16:55 [manager.py:391] +ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:208.8930606842041ms total_cost_time:208.91451835632324ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:8902 prompt_cache_len:5151 prompt_cache_ratio:0.5786340148281285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 +DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.1039731502532959 s +INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.1059565544128418 s +DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=132243958957585845351225120375591536808, time:1750767415.4772036s req_ids:[8] +DEBUG 06-24 20:16:55 [manager.py:391] +ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:210.22891998291016ms total_cost_time:210.24799346923828ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8903 prompt_cache_len:5151 prompt_cache_ratio:0.578569021678086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 +DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.10372495651245117 s +INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.10572385787963867 s +DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=306141301356205742773285364268045159607, time:1750767415.6886394s req_ids:[8] +DEBUG 06-24 20:16:55 [manager.py:391] +ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:202.93045043945312ms total_cost_time:202.95119285583496ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8904 prompt_cache_len:5151 prompt_cache_ratio:0.5785040431266847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 +DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.1049342155456543 s +INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.10688138008117676 s +DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=128943705185753112795225162597262618703, time:1750767415.9034827s req_ids:[8] +DEBUG 06-24 20:16:55 [manager.py:391] +ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:217.24390983581543ms total_cost_time:217.26369857788086ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8905 prompt_cache_len:5151 prompt_cache_ratio:0.5784390791690062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 +DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.1043250560760498 s +INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10641336441040039 s +DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=269739212138248251170632620236071610367, time:1750767416.1211972s req_ids:[8] +DEBUG 06-24 20:16:56 [manager.py:391] +DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:16:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:211.72833442687988ms total_cost_time:211.74860000610352ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8906 prompt_cache_len:5151 prompt_cache_ratio:0.5783741298001347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 +DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.10384654998779297 s +INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10519909858703613 s +DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=258508451126726824213222623803758099614, time:1750767416.3383834s req_ids:[8] +DEBUG 06-24 20:16:56 [manager.py:391] +ERROR 06-24 20:16:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 first_token_cost:395.71166038513184ms total_cost_time:395.73121070861816ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8907 prompt_cache_len:5151 prompt_cache_ratio:0.5783091950151567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 +DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.10404109954833984 s +INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10604572296142578 s +DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=92245419213367687007050010692471114560, time:1750767416.7341688s req_ids:[8] +DEBUG 06-24 20:16:56 [manager.py:391] +ERROR 06-24 20:16:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 first_token_cost:205.18064498901367ms total_cost_time:205.1997184753418ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8908 prompt_cache_len:5151 prompt_cache_ratio:0.5782442748091603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 +DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.1052248477935791 s +INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10723423957824707 s +DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=261456838854199833172417584909902952380, time:1750767416.951939s req_ids:[8] +DEBUG 06-24 20:16:56 [manager.py:391] +ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 first_token_cost:216.08448028564453ms total_cost_time:216.10355377197266ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8909 prompt_cache_len:5151 prompt_cache_ratio:0.5781793691772366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 +DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.10483694076538086 s +INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.10669279098510742 s +DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=5678412599399719150484060546967361395, time:1750767417.1648765s req_ids:[8] +DEBUG 06-24 20:16:57 [manager.py:391] +ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:209.63215827941895ms total_cost_time:209.65147018432617ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8910 prompt_cache_len:5151 prompt_cache_ratio:0.5781144781144781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 +DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.10432052612304688 s +INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.1064443588256836 s +DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=274094934530483401352735122898718705961, time:1750767417.3805385s req_ids:[8] +DEBUG 06-24 20:16:57 [manager.py:391] +ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:210.17074584960938ms total_cost_time:210.1898193359375ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8911 prompt_cache_len:5151 prompt_cache_ratio:0.5780496016159803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 +DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.1039268970489502 s +INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.10576772689819336 s +DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=226172706906735974955229582283256859477, time:1750767417.5931454s req_ids:[8] +DEBUG 06-24 20:16:57 [manager.py:391] +ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:209.90419387817383ms total_cost_time:209.92422103881836ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8912 prompt_cache_len:5151 prompt_cache_ratio:0.5779847396768402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 +DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.10412359237670898 s +INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.10606908798217773 s +DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=293381907861407826719254971643435041269, time:1750767417.8057432s req_ids:[8] +DEBUG 06-24 20:16:57 [manager.py:391] +ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:210.16407012939453ms total_cost_time:210.18433570861816ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8913 prompt_cache_len:5151 prompt_cache_ratio:0.5779198922921576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 +DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10502433776855469 s +INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.10705208778381348 s +DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=310946579013909743027242563710376532228, time:1750767418.0210376s req_ids:[8] +DEBUG 06-24 20:16:58 [manager.py:391] +ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:207.51953125ms total_cost_time:207.53931999206543ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8914 prompt_cache_len:5151 prompt_cache_ratio:0.5778550594570339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 +DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10654377937316895 s +INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.10862350463867188 s +DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=77772935397058980906716150862557068636, time:1750767418.2367756s req_ids:[8] +DEBUG 06-24 20:16:58 [manager.py:391] +ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:211.46583557128906ms total_cost_time:211.50875091552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8915 prompt_cache_len:5151 prompt_cache_ratio:0.5777902411665732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 +DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10906577110290527 s +INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.11095929145812988 s +DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=229380487342592059473794077439139877362, time:1750767418.448589s req_ids:[8] +DEBUG 06-24 20:16:58 [manager.py:391] +ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:372.59364128112793ms total_cost_time:372.6377487182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8916 prompt_cache_len:5151 prompt_cache_ratio:0.5777254374158816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 +DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10831546783447266 s +INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.1101830005645752 s +DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=233375322972774472048694040053618011096, time:1750767418.8250642s req_ids:[8] +DEBUG 06-24 20:16:58 [manager.py:391] +ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:193.53485107421875ms total_cost_time:193.57943534851074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8917 prompt_cache_len:5151 prompt_cache_ratio:0.5776606482000672 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 +DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10781192779541016 s +INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s +DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=307005661884289463220043092330619282146, time:1750767419.0303292s req_ids:[8] +DEBUG 06-24 20:16:59 [manager.py:391] +ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:211.28559112548828ms total_cost_time:211.34686470031738ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:8918 prompt_cache_len:5151 prompt_cache_ratio:0.5775958735142409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 +DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10736584663391113 s +INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.10921335220336914 s +DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=30755010886150805956282936688883985615, time:1750767419.2480152s req_ids:[8] +DEBUG 06-24 20:16:59 [manager.py:391] +ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:218.52564811706543ms total_cost_time:218.58620643615723ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:8919 prompt_cache_len:5151 prompt_cache_ratio:0.5775311133535149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 +DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10774779319763184 s +INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.10946917533874512 s +DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=160303467202031100191546329919921789217, time:1750767419.4659355s req_ids:[8] +DEBUG 06-24 20:16:59 [manager.py:391] +ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:166.61620140075684ms total_cost_time:166.67771339416504ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8920 prompt_cache_len:5151 prompt_cache_ratio:0.5774663677130045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 +DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10836958885192871 s +INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.1100759506225586 s +DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=19315419347180410015663360530496610774, time:1750767419.6416104s req_ids:[8] +DEBUG 06-24 20:16:59 [manager.py:391] +DEBUG 06-24 20:16:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 38243.306 tokens/s +DEBUG 06-24 20:16:59 [stats.py:37] Avg prompt tokens throughput: 38234.814 tokens/s +DEBUG 06-24 20:16:59 [stats.py:37] Avg generate tokens throughput: 8.492 tokens/s +ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:170.0572967529297ms total_cost_time:170.1183319091797ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8921 prompt_cache_len:5151 prompt_cache_ratio:0.5774016365878265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 +DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.1082611083984375 s +INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.11008000373840332 s +DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=319220720399587162835807293767505459398, time:1750767419.813506s req_ids:[8] +DEBUG 06-24 20:16:59 [manager.py:391] +ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:201.8752098083496ms total_cost_time:201.93743705749512ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:8922 prompt_cache_len:5151 prompt_cache_ratio:0.5773369199731002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 +DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:16:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10869240760803223 s +INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s +DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=317787984170760226683943948325725029628, time:1750767420.0185385s req_ids:[8] +DEBUG 06-24 20:17:00 [manager.py:391] +ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:211.63344383239746ms total_cost_time:211.69304847717285ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:8923 prompt_cache_len:5151 prompt_cache_ratio:0.5772722178639471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 +DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s +INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.11091446876525879 s +DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=42331215773084319277829802411603563280, time:1750767420.233102s req_ids:[8] +DEBUG 06-24 20:17:00 [manager.py:391] +ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:205.23357391357422ms total_cost_time:205.291748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:8924 prompt_cache_len:5151 prompt_cache_ratio:0.5772075302554908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 +DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10750651359558105 s +INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.10943222045898438 s +DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=282766992145340919586161690440461338436, time:1750767420.442701s req_ids:[8] +DEBUG 06-24 20:17:00 [manager.py:391] +ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:359.5857620239258ms total_cost_time:359.6305847167969ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8925 prompt_cache_len:5151 prompt_cache_ratio:0.5771428571428572 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 +DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10858154296875 s +INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.11070418357849121 s +DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=88681147693703614893608177227914178313, time:1750767420.8084419s req_ids:[8] +DEBUG 06-24 20:17:00 [manager.py:391] +ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:204.5130729675293ms total_cost_time:204.5576572418213ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8926 prompt_cache_len:5151 prompt_cache_ratio:0.5770781985211741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 +DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10773921012878418 s +INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.10979866981506348 s +DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=124778285220701490301268569749554298348, time:1750767421.0209086s req_ids:[8] +DEBUG 06-24 20:17:01 [manager.py:391] +ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:202.59881019592285ms total_cost_time:202.64196395874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8927 prompt_cache_len:5151 prompt_cache_ratio:0.5770135543855719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 +DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10986852645874023 s +INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.11184334754943848 s +DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=271529714286811458371990989738542139090, time:1750767421.228776s req_ids:[8] +DEBUG 06-24 20:17:01 [manager.py:391] +ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:204.12468910217285ms total_cost_time:204.20455932617188ms,out_token_counter:1 mean_per_token_cost_time: 0.07987022399902344ms prompt_token_num:8928 prompt_cache_len:5151 prompt_cache_ratio:0.5769489247311828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 +DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10862350463867188 s +INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.11059761047363281 s +DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=94065213730399421592732381490742650244, time:1750767421.4409108s req_ids:[8] +DEBUG 06-24 20:17:01 [manager.py:391] +ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:205.72733879089355ms total_cost_time:205.77001571655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8929 prompt_cache_len:5151 prompt_cache_ratio:0.5768843095531414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 +DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10886764526367188 s +INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.1108255386352539 s +DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=176143583590473473350966258398461638983, time:1750767421.6582363s req_ids:[8] +DEBUG 06-24 20:17:01 [manager.py:391] +ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:213.24896812438965ms total_cost_time:213.29474449157715ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8930 prompt_cache_len:5151 prompt_cache_ratio:0.5768197088465845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 +DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10754132270812988 s +INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.1097116470336914 s +DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=254146057358101458130434997105210517413, time:1750767421.8734066s req_ids:[8] +DEBUG 06-24 20:17:01 [manager.py:391] +DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:209.87319946289062ms total_cost_time:209.93995666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0667572021484375ms prompt_token_num:8931 prompt_cache_len:5151 prompt_cache_ratio:0.576755122606651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 +DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.10810542106628418 s +INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.11005926132202148 s +DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=87262195068118814638037801191140550472, time:1750767422.0885563s req_ids:[8] +DEBUG 06-24 20:17:02 [manager.py:391] +ERROR 06-24 20:17:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:207.55743980407715ms total_cost_time:207.60226249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8932 prompt_cache_len:5151 prompt_cache_ratio:0.5766905508284819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 +DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.30936336517333984 s +INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.31136536598205566 s +DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=330993943936161122929188293806647668386, time:1750767422.5048394s req_ids:[8] +DEBUG 06-24 20:17:02 [manager.py:391] +ERROR 06-24 20:17:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 first_token_cost:413.3129119873047ms total_cost_time:413.3586883544922ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8933 prompt_cache_len:5151 prompt_cache_ratio:0.5766259935072204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 +DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.10788488388061523 s +INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.10979819297790527 s +DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=267373276406362962481736055622660704725, time:1750767422.7207217s req_ids:[8] +DEBUG 06-24 20:17:02 [manager.py:391] +ERROR 06-24 20:17:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 first_token_cost:206.31957054138184ms total_cost_time:206.36534690856934ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8934 prompt_cache_len:5151 prompt_cache_ratio:0.5765614506380121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 +DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.10776710510253906 s +INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.10980963706970215 s +DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=215351417917954018970827477105021411992, time:1750767422.9345574s req_ids:[8] +DEBUG 06-24 20:17:02 [manager.py:391] +ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 first_token_cost:204.1923999786377ms total_cost_time:204.2388916015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8935 prompt_cache_len:5151 prompt_cache_ratio:0.5764969222160045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 +DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.1069483757019043 s +INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.10881519317626953 s +DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=11619329866567844193776717127063708485, time:1750767423.141983s req_ids:[8] +DEBUG 06-24 20:17:03 [manager.py:391] +ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:204.48040962219238ms total_cost_time:204.52380180358887ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8936 prompt_cache_len:5151 prompt_cache_ratio:0.5764324082363473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 +DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.1084129810333252 s +INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.11026692390441895 s +DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=122322695847459535119690543336852838010, time:1750767423.3523335s req_ids:[8] +DEBUG 06-24 20:17:03 [manager.py:391] +DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:207.12780952453613ms total_cost_time:207.17144012451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8937 prompt_cache_len:5151 prompt_cache_ratio:0.5763679086941926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 +DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.10873985290527344 s +INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.11067819595336914 s +DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=32850394343742274739563683495382795667, time:1750767423.566508s req_ids:[8] +DEBUG 06-24 20:17:03 [manager.py:391] +ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:208.59885215759277ms total_cost_time:208.64272117614746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8938 prompt_cache_len:5151 prompt_cache_ratio:0.5763034235846946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 +DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.10757231712341309 s +INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.10936284065246582 s +DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=161904701201333922480716624142633716983, time:1750767423.781403s req_ids:[8] +DEBUG 06-24 20:17:03 [manager.py:391] +ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:206.68506622314453ms total_cost_time:206.72917366027832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8939 prompt_cache_len:5151 prompt_cache_ratio:0.5762389529030093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 +DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.10871124267578125 s +INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.11063432693481445 s +DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=264584927080220929741692407812855701077, time:1750767423.9925942s req_ids:[8] +DEBUG 06-24 20:17:03 [manager.py:391] +ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:205.46841621398926ms total_cost_time:205.51204681396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8940 prompt_cache_len:5151 prompt_cache_ratio:0.5761744966442953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 +DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:04 [manager.py:224] router recive req id 8 cost time 0.10752177238464355 s +INFO 06-24 20:17:04 [manager.py:68] detokenization recv req id 8 cost time 0.10940361022949219 s +DEBUG 06-24 20:17:04 [manager.py:391] Prefill Batch: batch_id=130158855593065881367684946236579652488, time:1750767424.2064216s req_ids:[8] +DEBUG 06-24 20:17:04 [manager.py:391] +ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:379.6501159667969ms total_cost_time:379.6954154968262ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8941 prompt_cache_len:5151 prompt_cache_ratio:0.5761100548037132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 +DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:04 [manager.py:224] router recive req id 8 cost time 0.10875964164733887 s +INFO 06-24 20:17:04 [manager.py:68] detokenization recv req id 8 cost time 0.11069393157958984 s +DEBUG 06-24 20:17:04 [manager.py:391] Prefill Batch: batch_id=318680504543133720992798802114768743113, time:1750767424.584957s req_ids:[8] +DEBUG 06-24 20:17:04 [manager.py:391] +ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:204.85901832580566ms total_cost_time:204.90336418151855ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8942 prompt_cache_len:5151 prompt_cache_ratio:0.5760456273764258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 +DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:04 [manager.py:224] router recive req id 8 cost time 0.10762333869934082 s +INFO 06-24 20:17:04 [manager.py:68] detokenization recv req id 8 cost time 0.10960125923156738 s +DEBUG 06-24 20:17:04 [manager.py:391] Prefill Batch: batch_id=337005028610997234267978534964101998304, time:1750767424.7983696s req_ids:[8] +DEBUG 06-24 20:17:04 [manager.py:391] +ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:206.82716369628906ms total_cost_time:206.87174797058105ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8943 prompt_cache_len:5151 prompt_cache_ratio:0.5759812143575981 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 +DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10777044296264648 s +INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.1096956729888916 s +DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=239378083890809986108219071636611794478, time:1750767425.0132086s req_ids:[8] +DEBUG 06-24 20:17:05 [manager.py:391] +ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:205.37233352661133ms total_cost_time:205.4152488708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8944 prompt_cache_len:5151 prompt_cache_ratio:0.5759168157423972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 +DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10763835906982422 s +INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.10951018333435059 s +DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=273087776931210503476202860879448796078, time:1750767425.2226858s req_ids:[8] +DEBUG 06-24 20:17:05 [manager.py:391] +ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:207.43417739868164ms total_cost_time:207.47876167297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8945 prompt_cache_len:5151 prompt_cache_ratio:0.5758524315259922 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 +DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10783839225769043 s +INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.10973763465881348 s +DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=161288152311888900138671598490094586865, time:1750767425.4357536s req_ids:[8] +DEBUG 06-24 20:17:05 [manager.py:391] +ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:206.34698867797852ms total_cost_time:206.3906192779541ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8946 prompt_cache_len:5151 prompt_cache_ratio:0.5757880617035547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 +DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10864925384521484 s +INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063289642333984 s +DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=150764785052589225061479550015254395552, time:1750767425.6488895s req_ids:[8] +DEBUG 06-24 20:17:05 [manager.py:391] +ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:204.04696464538574ms total_cost_time:204.09131050109863ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8947 prompt_cache_len:5151 prompt_cache_ratio:0.5757237062702582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 +DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s +INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.11030292510986328 s +DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=128240808934864838081752262430029952182, time:1750767425.8701012s req_ids:[8] +DEBUG 06-24 20:17:05 [manager.py:391] +ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:220.184326171875ms total_cost_time:220.2298641204834ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8948 prompt_cache_len:5151 prompt_cache_ratio:0.5756593652212785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 +DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10833930969238281 s +INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.1101994514465332 s +DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=147854794251447613253983833023172051708, time:1750767426.0857434s req_ids:[8] +DEBUG 06-24 20:17:06 [manager.py:391] +ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:210.01505851745605ms total_cost_time:210.07537841796875ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8949 prompt_cache_len:5151 prompt_cache_ratio:0.5755950385517935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 +DEBUG 06-24 20:17:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10926938056945801 s +INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.11120438575744629 s +DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=4639161228803901275697230139548544674, time:1750767426.3014596s req_ids:[8] +DEBUG 06-24 20:17:06 [manager.py:391] +ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:381.9706439971924ms total_cost_time:382.016658782959ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8950 prompt_cache_len:5151 prompt_cache_ratio:0.5755307262569832 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 +DEBUG 06-24 20:17:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10883021354675293 s +INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.11018729209899902 s +DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=336611141573958973034701297104283935118, time:1750767426.6861308s req_ids:[8] +DEBUG 06-24 20:17:06 [manager.py:391] +ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:211.24577522277832ms total_cost_time:211.29250526428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:8951 prompt_cache_len:5151 prompt_cache_ratio:0.57546642833203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 +DEBUG 06-24 20:17:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10782551765441895 s +INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.10980486869812012 s +DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=162029279459218751810730563047351020255, time:1750767426.9059296s req_ids:[8] +DEBUG 06-24 20:17:06 [manager.py:391] +ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:206.8805694580078ms total_cost_time:206.9237232208252ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8952 prompt_cache_len:5151 prompt_cache_ratio:0.5754021447721179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 +DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1091313362121582 s +INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.11023974418640137 s +DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=252357408677479524072174775130571251815, time:1750767427.1188245s req_ids:[8] +DEBUG 06-24 20:17:07 [manager.py:391] +ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:208.77861976623535ms total_cost_time:208.84132385253906ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:8953 prompt_cache_len:5151 prompt_cache_ratio:0.5753378755724338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 +DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1088566780090332 s +INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.11098074913024902 s +DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=304378158466525731755261021306525801988, time:1750767427.3383908s req_ids:[8] +DEBUG 06-24 20:17:07 [manager.py:391] +ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:212.94045448303223ms total_cost_time:212.9843235015869ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8954 prompt_cache_len:5151 prompt_cache_ratio:0.5752736207281662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 +DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.10798907279968262 s +INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.10992693901062012 s +DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=220194227559716799541579462165990975640, time:1750767427.5532007s req_ids:[8] +DEBUG 06-24 20:17:07 [manager.py:391] +ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:206.1326503753662ms total_cost_time:206.15458488464355ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:8955 prompt_cache_len:5151 prompt_cache_ratio:0.5752093802345059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 +DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1077566146850586 s +INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.10979771614074707 s +DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=105720818530875730109728932282672909083, time:1750767427.7648869s req_ids:[8] +DEBUG 06-24 20:17:07 [manager.py:391] +ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:206.9849967956543ms total_cost_time:207.02815055847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8956 prompt_cache_len:5151 prompt_cache_ratio:0.5751451540866458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 +DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1081993579864502 s +INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.11017036437988281 s +DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=242058706025797307114532665330738583874, time:1750767427.9790566s req_ids:[8] +DEBUG 06-24 20:17:07 [manager.py:391] +ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:207.98277854919434ms total_cost_time:208.04309844970703ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8957 prompt_cache_len:5151 prompt_cache_ratio:0.5750809422797811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 +DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10831499099731445 s +INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.11014699935913086 s +DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=257195158114178892403542433549024805195, time:1750767428.1923118s req_ids:[8] +DEBUG 06-24 20:17:08 [manager.py:391] +ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:375.61917304992676ms total_cost_time:375.68116188049316ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8958 prompt_cache_len:5151 prompt_cache_ratio:0.5750167448091091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 +DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10812783241271973 s +INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.11004018783569336 s +DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=170627348129163633396542544179569303694, time:1750767428.5711615s req_ids:[8] +DEBUG 06-24 20:17:08 [manager.py:391] +ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:202.03518867492676ms total_cost_time:202.09527015686035ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8959 prompt_cache_len:5151 prompt_cache_ratio:0.5749525616698292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 +DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10867595672607422 s +INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.11054563522338867 s +DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=180196441877832190385726831487869867362, time:1750767428.781529s req_ids:[8] +DEBUG 06-24 20:17:08 [manager.py:391] +ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:207.5796127319336ms total_cost_time:207.6406478881836ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8960 prompt_cache_len:5151 prompt_cache_ratio:0.5748883928571429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 +DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10787105560302734 s +INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s +DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=151547087549632477647200108727397404230, time:1750767428.9915733s req_ids:[8] +DEBUG 06-24 20:17:08 [manager.py:391] +ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:203.5653591156006ms total_cost_time:203.62591743469238ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:8961 prompt_cache_len:5151 prompt_cache_ratio:0.5748242383662537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 +DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s +INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.11039876937866211 s +DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=143536032062813711634125537969059759941, time:1750767429.1996453s req_ids:[8] +DEBUG 06-24 20:17:09 [manager.py:391] +DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:203.8888931274414ms total_cost_time:203.9487361907959ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8962 prompt_cache_len:5151 prompt_cache_ratio:0.5747600981923677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 +DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s +INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.10955548286437988 s +DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=327357447050371120880584020934600147968, time:1750767429.4100945s req_ids:[8] +DEBUG 06-24 20:17:09 [manager.py:391] +ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:207.26871490478516ms total_cost_time:207.33118057250977ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:8963 prompt_cache_len:5151 prompt_cache_ratio:0.5746959723306928 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 +DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10740828514099121 s +INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.10932064056396484 s +DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=317751659001078979533008830335795931463, time:1750767429.622811s req_ids:[8] +DEBUG 06-24 20:17:09 [manager.py:391] +ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:17:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 38204.903 tokens/s +DEBUG 06-24 20:17:09 [stats.py:37] Avg prompt tokens throughput: 38196.262 tokens/s +DEBUG 06-24 20:17:09 [stats.py:37] Avg generate tokens throughput: 8.641 tokens/s +INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:208.01329612731934ms total_cost_time:208.07361602783203ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8964 prompt_cache_len:5151 prompt_cache_ratio:0.5746318607764391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 +DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10842132568359375 s +INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.11028861999511719 s +DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=170109001635047167598307515247845342046, time:1750767429.834858s req_ids:[8] +DEBUG 06-24 20:17:09 [manager.py:391] +ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:207.65280723571777ms total_cost_time:207.71288871765137ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8965 prompt_cache_len:5151 prompt_cache_ratio:0.5745677635248188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 +DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.10710358619689941 s +INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.1092824935913086 s +DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=94941777096042333939239926973690463079, time:1750767430.0464866s req_ids:[8] +DEBUG 06-24 20:17:10 [manager.py:391] +ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:205.82008361816406ms total_cost_time:205.86299896240234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8966 prompt_cache_len:5151 prompt_cache_ratio:0.5745036805710462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 +DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s +INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.1100919246673584 s +DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=20948888814035641340630326814713687000, time:1750767430.2565775s req_ids:[8] +DEBUG 06-24 20:17:10 [manager.py:391] +ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:321.32434844970703ms total_cost_time:321.37036323547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8967 prompt_cache_len:5151 prompt_cache_ratio:0.5744396119103379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 +DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.10753393173217773 s +DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=319780607465907028802153902613469241116, time:1750767430.571691s req_ids:[8] +DEBUG 06-24 20:17:10 [manager.py:391] +INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940790176391602 s +ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:178.66015434265137ms total_cost_time:178.70283126831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8968 prompt_cache_len:5151 prompt_cache_ratio:0.5743755575379126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 +DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.11011791229248047 s +INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.11207294464111328 s +DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=95237488466607486815177656623541483809, time:1750767430.7627072s req_ids:[8] +DEBUG 06-24 20:17:10 [manager.py:391] +ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:204.81610298156738ms total_cost_time:204.85877990722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8969 prompt_cache_len:5151 prompt_cache_ratio:0.574311517448991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 +DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.1054530143737793 s +INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.10735964775085449 s +DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=194471783958277788296526476928783097899, time:1750767430.973444s req_ids:[8] +DEBUG 06-24 20:17:10 [manager.py:391] +ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:206.85553550720215ms total_cost_time:206.89916610717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8970 prompt_cache_len:5151 prompt_cache_ratio:0.574247491638796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 +DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10900115966796875 s +INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.11103010177612305 s +DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=56372432435871421302477774095305757951, time:1750767431.1847541s req_ids:[8] +DEBUG 06-24 20:17:11 [manager.py:391] +ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:205.72185516357422ms total_cost_time:205.76763153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8971 prompt_cache_len:5151 prompt_cache_ratio:0.5741834801025527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 +DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10796046257019043 s +INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.10991525650024414 s +DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=245126801952321825184825781904625560542, time:1750767431.3977723s req_ids:[8] +DEBUG 06-24 20:17:11 [manager.py:391] +ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:207.17167854309082ms total_cost_time:207.23247528076172ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8972 prompt_cache_len:5151 prompt_cache_ratio:0.5741194828354882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 +DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10748577117919922 s +INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.10933446884155273 s +DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=320381136194951906015885340203962231488, time:1750767431.6124935s req_ids:[8] +DEBUG 06-24 20:17:11 [manager.py:391] +ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:167.47331619262695ms total_cost_time:167.51742362976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8973 prompt_cache_len:5151 prompt_cache_ratio:0.5740554998328319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 +DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10822081565856934 s +INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.11022782325744629 s +DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=326111066798307479613743618530441851660, time:1750767431.7862663s req_ids:[8] +DEBUG 06-24 20:17:11 [manager.py:391] +ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:203.48882675170898ms total_cost_time:203.5503387451172ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8974 prompt_cache_len:5151 prompt_cache_ratio:0.573991531089815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 +DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10819005966186523 s +INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.11022138595581055 s +DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=25370381696809871478274298444054125041, time:1750767431.9942765s req_ids:[8] +DEBUG 06-24 20:17:11 [manager.py:391] +ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:207.35478401184082ms total_cost_time:207.3993682861328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8975 prompt_cache_len:5151 prompt_cache_ratio:0.5739275766016713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 +DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.10856842994689941 s +INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.1110682487487793 s +DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=5168625437787586946947508355080883226, time:1750767432.208292s req_ids:[8] +DEBUG 06-24 20:17:12 [manager.py:391] +ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:362.560510635376ms total_cost_time:362.6229763031006ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:8976 prompt_cache_len:5151 prompt_cache_ratio:0.5738636363636364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 +DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.10741543769836426 s +INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.10935306549072266 s +DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=167974589616316682929243987443939473914, time:1750767432.573994s req_ids:[8] +DEBUG 06-24 20:17:12 [manager.py:391] +ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:201.59125328063965ms total_cost_time:201.65300369262695ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:8977 prompt_cache_len:5151 prompt_cache_ratio:0.573799710370948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 +DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.1091909408569336 s +INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.11119747161865234 s +DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=28282993139065527106023617924931212631, time:1750767432.7834418s req_ids:[8] +DEBUG 06-24 20:17:12 [manager.py:391] +ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:204.73766326904297ms total_cost_time:204.78034019470215ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8978 prompt_cache_len:5151 prompt_cache_ratio:0.573735798618846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 +DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.10888886451721191 s +INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.11105895042419434 s +DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=39209003267617238746486925946491960509, time:1750767432.9954407s req_ids:[8] +DEBUG 06-24 20:17:12 [manager.py:391] +ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:209.303617477417ms total_cost_time:209.34772491455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8979 prompt_cache_len:5151 prompt_cache_ratio:0.5736719011025727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 +DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.11058306694030762 s +INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.11206984519958496 s +DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=147854703863899957613959827721668984010, time:1750767433.2199225s req_ids:[8] +DEBUG 06-24 20:17:13 [manager.py:391] +ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:221.86756134033203ms total_cost_time:221.9560146331787ms,out_token_counter:1 mean_per_token_cost_time: 0.08845329284667969ms prompt_token_num:8980 prompt_cache_len:5151 prompt_cache_ratio:0.5736080178173719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 +DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.10795736312866211 s +INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.10993409156799316 s +DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=30966462781944354513156019909001183947, time:1750767433.436091s req_ids:[8] +DEBUG 06-24 20:17:13 [manager.py:391] +ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:207.66115188598633ms total_cost_time:207.70502090454102ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8981 prompt_cache_len:5151 prompt_cache_ratio:0.5735441487584901 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 +DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.1087651252746582 s +INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.11070942878723145 s +DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=336216294804511645343909080101336282157, time:1750767433.6485589s req_ids:[8] +DEBUG 06-24 20:17:13 [manager.py:391] +ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:211.25388145446777ms total_cost_time:211.29679679870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8982 prompt_cache_len:5151 prompt_cache_ratio:0.5734802939211757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 +DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.10705280303955078 s +INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.10893011093139648 s +DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=58390068932930900808980097322551633293, time:1750767433.8705475s req_ids:[8] +DEBUG 06-24 20:17:13 [manager.py:391] +ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:172.346830368042ms total_cost_time:172.38974571228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8983 prompt_cache_len:5151 prompt_cache_ratio:0.573416453300679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 +DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.31026673316955566 s +INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.3122293949127197 s +DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=236177969380402165124684040469374380689, time:1750767434.2561026s req_ids:[8] +DEBUG 06-24 20:17:14 [manager.py:391] +ERROR 06-24 20:17:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:422.3446846008301ms total_cost_time:422.38879203796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8984 prompt_cache_len:5151 prompt_cache_ratio:0.5733526268922529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 +DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.10797691345214844 s +INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.11008620262145996 s +DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=237143991917641885445852323576828894334, time:1750767434.474272s req_ids:[8] +DEBUG 06-24 20:17:14 [manager.py:391] +ERROR 06-24 20:17:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 first_token_cost:208.67109298706055ms total_cost_time:208.71400833129883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8985 prompt_cache_len:5151 prompt_cache_ratio:0.5732888146911519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 +DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.10833191871643066 s +INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.11031770706176758 s +DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=138984329150426342176995524047403727780, time:1750767434.6878843s req_ids:[8] +DEBUG 06-24 20:17:14 [manager.py:391] +ERROR 06-24 20:17:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 first_token_cost:220.61920166015625ms total_cost_time:220.66259384155273ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8986 prompt_cache_len:5151 prompt_cache_ratio:0.573225016692633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 +DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.10757899284362793 s +INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.10960054397583008 s +DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=269644690554574174219149255460480978028, time:1750767434.917639s req_ids:[8] +DEBUG 06-24 20:17:14 [manager.py:391] +DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 first_token_cost:218.49632263183594ms total_cost_time:218.53876113891602ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8987 prompt_cache_len:5151 prompt_cache_ratio:0.5731612328919551 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 +DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10743427276611328 s +INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10957050323486328 s +DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=102922138129445785441771554311907151348, time:1750767435.1353962s req_ids:[8] +DEBUG 06-24 20:17:15 [manager.py:391] +ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:208.7390422821045ms total_cost_time:208.7841033935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8988 prompt_cache_len:5151 prompt_cache_ratio:0.5730974632843792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 +DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10801506042480469 s +INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10991835594177246 s +DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=291958425610691376673525108379897308714, time:1750767435.3505938s req_ids:[8] +DEBUG 06-24 20:17:15 [manager.py:391] +ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:202.94618606567383ms total_cost_time:203.0038833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:8989 prompt_cache_len:5151 prompt_cache_ratio:0.5730337078651685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 +DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10979413986206055 s +INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.11176419258117676 s +DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=320846410685894208290744524425102739160, time:1750767435.561704s req_ids:[8] +DEBUG 06-24 20:17:15 [manager.py:391] +ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:206.6805362701416ms total_cost_time:206.7253589630127ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8990 prompt_cache_len:5151 prompt_cache_ratio:0.5729699666295884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 +DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10731959342956543 s +INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10929369926452637 s +DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=319386890125927505938048275963745384882, time:1750767435.7882884s req_ids:[8] +DEBUG 06-24 20:17:15 [manager.py:391] +ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:222.58663177490234ms total_cost_time:222.62954711914062ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8991 prompt_cache_len:5151 prompt_cache_ratio:0.5729062395729062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 +DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10774660110473633 s +INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10964441299438477 s +DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=195956202944059190015444435539391641456, time:1750767436.0037181s req_ids:[8] +DEBUG 06-24 20:17:16 [manager.py:391] +ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:382.1301460266113ms total_cost_time:382.1756839752197ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8992 prompt_cache_len:5151 prompt_cache_ratio:0.5728425266903915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 +DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.10847806930541992 s +INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.1104423999786377 s +DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=49357955995182287461900278190609818441, time:1750767436.3856146s req_ids:[8] +DEBUG 06-24 20:17:16 [manager.py:391] +ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:213.00888061523438ms total_cost_time:213.05298805236816ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8993 prompt_cache_len:5151 prompt_cache_ratio:0.5727788279773157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 +DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:16 [batch.py:51] router release req id 8 +DEBUG 06-24 20:17:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:16 [manager.py:283] +DEBUG 06-24 20:17:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:16 [manager.py:284] +INFO 06-24 20:17:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.10714864730834961 s +INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.10892081260681152 s +DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=84574821323557679604401101532113065562, time:1750767436.6248374s req_ids:[8] +DEBUG 06-24 20:17:16 [manager.py:391] +ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:185.96982955932617ms total_cost_time:186.03110313415527ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:8994 prompt_cache_len:5151 prompt_cache_ratio:0.5727151434289527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 +DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.1082773208618164 s +INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.11006617546081543 s +DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=140785608006063803498228717442618505207, time:1750767436.800333s req_ids:[8] +DEBUG 06-24 20:17:16 [manager.py:391] +ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:197.953462600708ms total_cost_time:198.014497756958ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8995 prompt_cache_len:5151 prompt_cache_ratio:0.5726514730405781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 +INFO 06-24 20:17:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s +INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.11030411720275879 s +DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=39291744285264861009561210828910134939, time:1750767437.0102444s req_ids:[8] +DEBUG 06-24 20:17:17 [manager.py:391] +ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:211.2751007080078ms total_cost_time:211.33732795715332ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:8996 prompt_cache_len:5151 prompt_cache_ratio:0.57258781680747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 +DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.1082315444946289 s +INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10998821258544922 s +DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=216708081498569652965055329754715418135, time:1750767437.2232928s req_ids:[8] +DEBUG 06-24 20:17:17 [manager.py:391] +ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:179.1555881500244ms total_cost_time:179.1985034942627ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8997 prompt_cache_len:5151 prompt_cache_ratio:0.5725241747249084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 +DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.10799098014831543 s +INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10960793495178223 s +DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=304732051042790933190058072696307991216, time:1750767437.4154117s req_ids:[8] +DEBUG 06-24 20:17:17 [manager.py:391] +ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:175.51207542419434ms total_cost_time:175.55522918701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8998 prompt_cache_len:5151 prompt_cache_ratio:0.5724605467881752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 +DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.1068735122680664 s +INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10867500305175781 s +DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=257843741810405947796855847109888483563, time:1750767437.597309s req_ids:[8] +DEBUG 06-24 20:17:17 [manager.py:391] +ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:187.91961669921875ms total_cost_time:187.96181678771973ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8999 prompt_cache_len:5151 prompt_cache_ratio:0.5723969329925547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 +DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.10790395736694336 s +INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10962176322937012 s +DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=15663230959827011995206672374632577466, time:1750767437.7875779s req_ids:[8] +DEBUG 06-24 20:17:17 [manager.py:391] +ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:184.26966667175293ms total_cost_time:184.3111515045166ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:9000 prompt_cache_len:5151 prompt_cache_ratio:0.5723333333333334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 +DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.10826230049133301 s +INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s +DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=62629272663282118140314877088396506222, time:1750767437.9801936s req_ids:[8] +DEBUG 06-24 20:17:17 [manager.py:391] +ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:365.93103408813477ms total_cost_time:365.97442626953125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9001 prompt_cache_len:5151 prompt_cache_ratio:0.5722697478057993 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 +DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s +INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.10983872413635254 s +DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=85980072883571051902589798313651817770, time:1750767438.3426404s req_ids:[8] +DEBUG 06-24 20:17:18 [manager.py:391] +ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:184.10015106201172ms total_cost_time:184.1440200805664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9002 prompt_cache_len:5151 prompt_cache_ratio:0.5722061764052433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 +DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10875177383422852 s +INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.11060881614685059 s +DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=152755467072835928158139155363844326983, time:1750767438.5326786s req_ids:[8] +DEBUG 06-24 20:17:18 [manager.py:391] +ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:196.08235359191895ms total_cost_time:196.14124298095703ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9003 prompt_cache_len:5151 prompt_cache_ratio:0.5721426191269576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 +DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10917854309082031 s +INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.11104226112365723 s +DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=241118503997975820102745188354966991075, time:1750767438.7390392s req_ids:[8] +DEBUG 06-24 20:17:18 [manager.py:391] +ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:200.46114921569824ms total_cost_time:200.52146911621094ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9004 prompt_cache_len:5151 prompt_cache_ratio:0.5720790759662372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 +DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10772991180419922 s +INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.10966253280639648 s +DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=33147665566299173786912972765177007072, time:1750767438.949131s req_ids:[8] +DEBUG 06-24 20:17:18 [manager.py:391] +ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:205.10411262512207ms total_cost_time:205.16633987426758ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:9005 prompt_cache_len:5151 prompt_cache_ratio:0.5720155469183786 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 +DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.1074063777923584 s +INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.10925436019897461 s +DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=128409507262889350094567875217795717011, time:1750767439.1589413s req_ids:[8] +DEBUG 06-24 20:17:19 [manager.py:391] +ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:207.12590217590332ms total_cost_time:207.1704864501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9006 prompt_cache_len:5151 prompt_cache_ratio:0.5719520319786808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 +DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.10898900032043457 s +INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.11092376708984375 s +DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=136934274041532793254251756157230925124, time:1750767439.3780718s req_ids:[8] +DEBUG 06-24 20:17:19 [manager.py:391] +ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:213.0272388458252ms total_cost_time:213.0897045135498ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:9007 prompt_cache_len:5151 prompt_cache_ratio:0.5718885311424448 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 +DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.10892391204833984 s +INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.11095309257507324 s +DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=249250275298257318043637550316592501130, time:1750767439.5923343s req_ids:[8] +DEBUG 06-24 20:17:19 [manager.py:391] +ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:210.15286445617676ms total_cost_time:210.19577980041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9008 prompt_cache_len:5151 prompt_cache_ratio:0.5718250444049734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 +DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.1071922779083252 s +INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.10924553871154785 s +DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=198666066097588211875632792731312869956, time:1750767439.8079598s req_ids:[8] +DEBUG 06-24 20:17:19 [manager.py:391] +DEBUG 06-24 20:17:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 40054.030 tokens/s +DEBUG 06-24 20:17:19 [stats.py:37] Avg prompt tokens throughput: 40045.218 tokens/s +DEBUG 06-24 20:17:19 [stats.py:37] Avg generate tokens throughput: 8.813 tokens/s +ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:381.58631324768066ms total_cost_time:381.63161277770996ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9009 prompt_cache_len:5151 prompt_cache_ratio:0.5717615717615717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 +DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10896515846252441 s +INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.11098122596740723 s +DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=198733991319755332806791202154025012942, time:1750767440.201033s req_ids:[8] +DEBUG 06-24 20:17:20 [manager.py:391] +ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:210.06011962890625ms total_cost_time:210.10541915893555ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9010 prompt_cache_len:5151 prompt_cache_ratio:0.5716981132075472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 +DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10785126686096191 s +INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.10985112190246582 s +DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=133655503512312833281338068472638587025, time:1750767440.41831s req_ids:[8] +DEBUG 06-24 20:17:20 [manager.py:391] +ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:208.64486694335938ms total_cost_time:208.68897438049316ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9011 prompt_cache_len:5151 prompt_cache_ratio:0.5716346687382089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 +DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10863208770751953 s +INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.11052298545837402 s +DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=140633077150729094966817065045580390780, time:1750767440.6324441s req_ids:[8] +DEBUG 06-24 20:17:20 [manager.py:391] +DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:207.54528045654297ms total_cost_time:207.58962631225586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9012 prompt_cache_len:5151 prompt_cache_ratio:0.5715712383488681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 +DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10838818550109863 s +INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.1103830337524414 s +DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=151872465838131387182803064336637216896, time:1750767440.8476827s req_ids:[8] +DEBUG 06-24 20:17:20 [manager.py:391] +ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:206.51817321777344ms total_cost_time:206.56371116638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9013 prompt_cache_len:5151 prompt_cache_ratio:0.5715078220348385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 +DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10859489440917969 s +INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.11052942276000977 s +DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=1215350245682702007953321924069838026, time:1750767441.0620859s req_ids:[8] +DEBUG 06-24 20:17:21 [manager.py:391] +ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:208.1902027130127ms total_cost_time:208.235502243042ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9014 prompt_cache_len:5151 prompt_cache_ratio:0.5714444197914356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 +DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10790085792541504 s +INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.1098184585571289 s +DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=200057129046233163701636388622520631242, time:1750767441.2764268s req_ids:[8] +DEBUG 06-24 20:17:21 [manager.py:391] +ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:209.41996574401855ms total_cost_time:209.46598052978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9015 prompt_cache_len:5151 prompt_cache_ratio:0.5713810316139767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 +DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10886144638061523 s +INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.11093521118164062 s +DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=19280522459072846009628019658793759557, time:1750767441.49288s req_ids:[8] +DEBUG 06-24 20:17:21 [manager.py:391] +ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:204.79059219360352ms total_cost_time:204.833984375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9016 prompt_cache_len:5151 prompt_cache_ratio:0.5713176574977817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 +DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10880160331726074 s +INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.11057925224304199 s +DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=281370543332253051238967638396506410158, time:1750767441.7012935s req_ids:[8] +DEBUG 06-24 20:17:21 [manager.py:391] +ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:205.53207397460938ms total_cost_time:205.57570457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9017 prompt_cache_len:5151 prompt_cache_ratio:0.5712542974381724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 +DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s +INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.1105349063873291 s +DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=112737093018953084253985439686167327786, time:1750767441.9127367s req_ids:[8] +DEBUG 06-24 20:17:21 [manager.py:391] +ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:380.78808784484863ms total_cost_time:380.83386421203613ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9018 prompt_cache_len:5151 prompt_cache_ratio:0.5711909514304724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 +DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.1091303825378418 s +INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.11105918884277344 s +DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=156059677279708698171622819195425621317, time:1750767442.298076s req_ids:[8] +DEBUG 06-24 20:17:22 [manager.py:391] +ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:206.1631679534912ms total_cost_time:206.2084674835205ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9019 prompt_cache_len:5151 prompt_cache_ratio:0.5711276194700078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 +DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.1071929931640625 s +INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.1091768741607666 s +DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=120376976019836288580240986842697140808, time:1750767442.511536s req_ids:[8] +DEBUG 06-24 20:17:22 [manager.py:391] +ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:211.49635314941406ms total_cost_time:211.54260635375977ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9020 prompt_cache_len:5151 prompt_cache_ratio:0.5710643015521064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 +DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.10979819297790527 s +INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.11175346374511719 s +DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=183790117633128580297676593823407639860, time:1750767442.7292178s req_ids:[8] +DEBUG 06-24 20:17:22 [manager.py:391] +ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:205.31415939331055ms total_cost_time:205.3697109222412ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:9021 prompt_cache_len:5151 prompt_cache_ratio:0.5710009976720984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 +DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.10855817794799805 s +INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.11048388481140137 s +DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=195418052297677259974204386944888487026, time:1750767442.940089s req_ids:[8] +DEBUG 06-24 20:17:22 [manager.py:391] +ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:204.77294921875ms total_cost_time:204.8165798187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9022 prompt_cache_len:5151 prompt_cache_ratio:0.5709377078253159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 +DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10881948471069336 s +INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.11065077781677246 s +DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=1757326764188527763622896894796764475, time:1750767443.154273s req_ids:[8] +DEBUG 06-24 20:17:23 [manager.py:391] +INFO 06-24 20:17:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:213.08016777038574ms total_cost_time:213.12355995178223ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9023 prompt_cache_len:5151 prompt_cache_ratio:0.570874432007093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 +DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10898113250732422 s +INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.11083245277404785 s +DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=246196932323326183899368089162140857770, time:1750767443.3705597s req_ids:[8] +DEBUG 06-24 20:17:23 [manager.py:391] +ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:206.06732368469238ms total_cost_time:206.11166954040527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9024 prompt_cache_len:5151 prompt_cache_ratio:0.570811170212766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 +DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10822367668151855 s +INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.1103522777557373 s +DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=195846588865691015174616264513569790305, time:1750767443.5838308s req_ids:[8] +DEBUG 06-24 20:17:23 [manager.py:391] +ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:214.4794464111328ms total_cost_time:214.5226001739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9025 prompt_cache_len:5151 prompt_cache_ratio:0.5707479224376731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 +DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s +INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.10938382148742676 s +DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=21168327506130646957229349341382917958, time:1750767443.8168314s req_ids:[8] +DEBUG 06-24 20:17:23 [manager.py:391] +ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:222.15700149536133ms total_cost_time:222.1992015838623ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9026 prompt_cache_len:5151 prompt_cache_ratio:0.5706846886771549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 +DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.1083230972290039 s +INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.1103212833404541 s +DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=255905150786887199098845183028234185825, time:1750767444.0340273s req_ids:[8] +DEBUG 06-24 20:17:24 [manager.py:391] +ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:363.8937473297119ms total_cost_time:363.95716667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:9027 prompt_cache_len:5151 prompt_cache_ratio:0.5706214689265536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 +DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.10850405693054199 s +INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.11040639877319336 s +DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=182285771540374548568624860982144453646, time:1750767444.4017656s req_ids:[8] +DEBUG 06-24 20:17:24 [manager.py:391] +ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:206.1941623687744ms total_cost_time:206.2368392944336ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9028 prompt_cache_len:5151 prompt_cache_ratio:0.570558263181214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 +DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.10766720771789551 s +INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.10957884788513184 s +DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=24169608865987072195238911802103291561, time:1750767444.6247532s req_ids:[8] +DEBUG 06-24 20:17:24 [manager.py:391] +ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:212.71681785583496ms total_cost_time:212.75925636291504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9029 prompt_cache_len:5151 prompt_cache_ratio:0.5704950714364825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 +DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.10879993438720703 s +INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.11068439483642578 s +DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=291012521357822056089159012780735255280, time:1750767444.846235s req_ids:[8] +DEBUG 06-24 20:17:24 [manager.py:391] +ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:214.83206748962402ms total_cost_time:214.87712860107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9030 prompt_cache_len:5151 prompt_cache_ratio:0.5704318936877076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 +DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10991501808166504 s +INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.11185383796691895 s +DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=106195945024620536298013656357598419643, time:1750767445.057377s req_ids:[8] +DEBUG 06-24 20:17:25 [manager.py:391] +ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:203.89604568481445ms total_cost_time:203.94086837768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9031 prompt_cache_len:5151 prompt_cache_ratio:0.5703687299302402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 +DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10819649696350098 s +INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.11005353927612305 s +DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=143383355291374166620225071413529534936, time:1750767445.2665021s req_ids:[8] +DEBUG 06-24 20:17:25 [manager.py:391] +ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:205.02924919128418ms total_cost_time:205.0769329071045ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:9032 prompt_cache_len:5151 prompt_cache_ratio:0.5703055801594331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 +DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10988163948059082 s +INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.11184239387512207 s +DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=212993884098863265608577302874415928432, time:1750767445.4804173s req_ids:[8] +DEBUG 06-24 20:17:25 [manager.py:391] +ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:205.04188537597656ms total_cost_time:205.05952835083008ms,out_token_counter:1 mean_per_token_cost_time: 0.017642974853515625ms prompt_token_num:9033 prompt_cache_len:5151 prompt_cache_ratio:0.570242444370641 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 +DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10689139366149902 s +INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.10867929458618164 s +DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=125589872394213683625029000943628122230, time:1750767445.6948245s req_ids:[8] +DEBUG 06-24 20:17:25 [manager.py:391] +ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:216.00866317749023ms total_cost_time:216.05229377746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9034 prompt_cache_len:5151 prompt_cache_ratio:0.5701793225592208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 +DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.3091757297515869 s +INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.3111307621002197 s +DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=108651578238852569605351403456609193733, time:1750767446.1142063s req_ids:[8] +DEBUG 06-24 20:17:26 [manager.py:391] +ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:418.5481071472168ms total_cost_time:418.5929298400879ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9035 prompt_cache_len:5151 prompt_cache_ratio:0.5701162147205313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 +DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s +INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.10962319374084473 s +DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=62901160930639870454073417731847839307, time:1750767446.3348627s req_ids:[8] +DEBUG 06-24 20:17:26 [manager.py:391] +ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:202.03042030334473ms total_cost_time:202.09312438964844ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:9036 prompt_cache_len:5151 prompt_cache_ratio:0.5700531208499336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 +DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.10986542701721191 s +INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.11174917221069336 s +DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=86593469892047858363957617796270065255, time:1750767446.54246s req_ids:[8] +DEBUG 06-24 20:17:26 [manager.py:391] +ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:208.26077461242676ms total_cost_time:208.30416679382324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9037 prompt_cache_len:5151 prompt_cache_ratio:0.5699900409427907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 +DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.1077268123626709 s +INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s +DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=254274687396927669781078363049304102166, time:1750767446.7533488s req_ids:[8] +DEBUG 06-24 20:17:26 [manager.py:391] +ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:165.28844833374023ms total_cost_time:165.33517837524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9038 prompt_cache_len:5151 prompt_cache_ratio:0.5699269749944678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 +DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.109405517578125 s +INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.1112985610961914 s +DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=172830358942321650316837731707075342368, time:1750767446.9250538s req_ids:[8] +DEBUG 06-24 20:17:26 [manager.py:391] +ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:195.87278366088867ms total_cost_time:195.91736793518066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9039 prompt_cache_len:5151 prompt_cache_ratio:0.5698639230003318 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 +DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.1081991195678711 s +INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.11019468307495117 s +DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=184066609769409526265405192645799088222, time:1750767447.126606s req_ids:[8] +DEBUG 06-24 20:17:27 [manager.py:391] +ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:200.03056526184082ms total_cost_time:200.0751495361328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9040 prompt_cache_len:5151 prompt_cache_ratio:0.5698008849557522 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 +DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.10763955116271973 s +INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.10964417457580566 s +DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=309811716278438147874745734196503976459, time:1750767447.3448002s req_ids:[8] +DEBUG 06-24 20:17:27 [manager.py:391] +ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:214.28823471069336ms total_cost_time:214.33234214782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9041 prompt_cache_len:5151 prompt_cache_ratio:0.5697378608561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 +DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.10972857475280762 s +INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.11169075965881348 s +DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=171341820387311616198659689439275757973, time:1750767447.5529075s req_ids:[8] +DEBUG 06-24 20:17:27 [manager.py:391] +ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:206.89082145690918ms total_cost_time:206.93612098693848ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9042 prompt_cache_len:5151 prompt_cache_ratio:0.5696748506967485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 +DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.10792064666748047 s +INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.10989713668823242 s +DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=204175858112969646678721911364068548984, time:1750767447.765457s req_ids:[8] +DEBUG 06-24 20:17:27 [manager.py:391] +DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:372.9212284088135ms total_cost_time:372.9667663574219ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9043 prompt_cache_len:5151 prompt_cache_ratio:0.569611854473073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 +DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10844302177429199 s +INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11043357849121094 s +DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=102148160034655764285792572463043678115, time:1750767448.1383677s req_ids:[8] +DEBUG 06-24 20:17:28 [manager.py:391] +ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:207.23581314086914ms total_cost_time:207.27968215942383ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9044 prompt_cache_len:5151 prompt_cache_ratio:0.5695488721804511 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 +DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s +INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11036992073059082 s +DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=294823730050369668290038911640181721084, time:1750767448.3540196s req_ids:[8] +DEBUG 06-24 20:17:28 [manager.py:391] +ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:207.2749137878418ms total_cost_time:207.3192596435547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9045 prompt_cache_len:5151 prompt_cache_ratio:0.569485903814262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 +DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10800004005432129 s +INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s +DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=121740804808151401357241240308012416184, time:1750767448.5651655s req_ids:[8] +DEBUG 06-24 20:17:28 [manager.py:391] +ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:205.33251762390137ms total_cost_time:205.37686347961426ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9046 prompt_cache_len:5151 prompt_cache_ratio:0.5694229493698872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 +DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.1090395450592041 s +INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11165952682495117 s +DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=22273556943361234544611201048355700871, time:1750767448.777193s req_ids:[8] +DEBUG 06-24 20:17:28 [manager.py:391] +ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:203.34672927856445ms total_cost_time:203.39059829711914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9047 prompt_cache_len:5151 prompt_cache_ratio:0.5693600088427103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 +DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10867571830749512 s +INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11060976982116699 s +DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=176365281971759274054800114141409402130, time:1750767448.9841177s req_ids:[8] +DEBUG 06-24 20:17:28 [manager.py:391] +ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:205.93738555908203ms total_cost_time:205.98173141479492ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9048 prompt_cache_len:5151 prompt_cache_ratio:0.5692970822281167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 +DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.106719970703125 s +INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.10845565795898438 s +DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=69539051245927013469841173789718370101, time:1750767449.1968307s req_ids:[8] +DEBUG 06-24 20:17:29 [manager.py:391] +ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:166.42332077026367ms total_cost_time:166.46432876586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9049 prompt_cache_len:5151 prompt_cache_ratio:0.5692341695214941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 +DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.10751485824584961 s +INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.10939741134643555 s +DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=248696540663083424477640112466331200452, time:1750767449.3686295s req_ids:[8] +DEBUG 06-24 20:17:29 [manager.py:391] +ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:202.26216316223145ms total_cost_time:202.30555534362793ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9050 prompt_cache_len:5151 prompt_cache_ratio:0.569171270718232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 +DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.10850191116333008 s +INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.11038804054260254 s +DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=90237852150894263542821758350988211241, time:1750767449.5745094s req_ids:[8] +DEBUG 06-24 20:17:29 [manager.py:391] +ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:205.17325401306152ms total_cost_time:205.2159309387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9051 prompt_cache_len:5151 prompt_cache_ratio:0.5691083858137223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 +DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.10884952545166016 s +INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.11072039604187012 s +DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=112798261252937456210403558858631971237, time:1750767449.784551s req_ids:[8] +DEBUG 06-24 20:17:29 [manager.py:391] +ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:17:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 37959.698 tokens/s +DEBUG 06-24 20:17:30 [stats.py:37] Avg prompt tokens throughput: 37951.196 tokens/s +DEBUG 06-24 20:17:30 [stats.py:37] Avg generate tokens throughput: 8.502 tokens/s +INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:374.25947189331055ms total_cost_time:374.30334091186523ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9052 prompt_cache_len:5151 prompt_cache_ratio:0.5690455148033584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 +DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10845637321472168 s +INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.11043190956115723 s +DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=46172499855835102535638605376491679381, time:1750767450.1641493s req_ids:[8] +DEBUG 06-24 20:17:30 [manager.py:391] +ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:207.9174518585205ms total_cost_time:207.95965194702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9053 prompt_cache_len:5151 prompt_cache_ratio:0.5689826576825362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 +DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10796356201171875 s +INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.10982584953308105 s +DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=150432740938159030885871041910383665118, time:1750767450.3774915s req_ids:[8] +DEBUG 06-24 20:17:30 [manager.py:391] +ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:201.35855674743652ms total_cost_time:201.4012336730957ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9054 prompt_cache_len:5151 prompt_cache_ratio:0.5689198144466534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 +DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10838580131530762 s +INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.11031365394592285 s +DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=263058377155870401741826142622502305159, time:1750767450.5855207s req_ids:[8] +DEBUG 06-24 20:17:30 [manager.py:391] +ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:205.00802993774414ms total_cost_time:205.05023002624512ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9055 prompt_cache_len:5151 prompt_cache_ratio:0.5688569850911099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 +DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:30 [batch.py:51] router release req id 8 +INFO 06-24 20:17:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10722827911376953 s +INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.1092989444732666 s +DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=230652721732071936982121543856970753777, time:1750767450.7956138s req_ids:[8] +DEBUG 06-24 20:17:30 [manager.py:391] +ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:210.04891395568848ms total_cost_time:210.09159088134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9056 prompt_cache_len:5151 prompt_cache_ratio:0.5687941696113075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 +DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10843086242675781 s +INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.11037874221801758 s +DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=278207321136303310530199541732912331009, time:1750767451.0104725s req_ids:[8] +DEBUG 06-24 20:17:31 [manager.py:391] +ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:203.37390899658203ms total_cost_time:203.4165859222412ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9057 prompt_cache_len:5151 prompt_cache_ratio:0.5687313680026499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 +DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10772299766540527 s +INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.10971379280090332 s +DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=155752698134876873499115911755912711820, time:1750767451.2209694s req_ids:[8] +DEBUG 06-24 20:17:31 [manager.py:391] +ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:206.41136169433594ms total_cost_time:206.45427703857422ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9058 prompt_cache_len:5151 prompt_cache_ratio:0.5686685802605431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 +DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10894918441772461 s +INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.11092782020568848 s +DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=143443123846673624039053526833371009288, time:1750767451.4346554s req_ids:[8] +DEBUG 06-24 20:17:31 [manager.py:391] +ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:207.75938034057617ms total_cost_time:207.80134201049805ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9059 prompt_cache_len:5151 prompt_cache_ratio:0.5686058063803952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 +DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10797429084777832 s +INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.10985302925109863 s +DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=282167227268283711297495119859275404138, time:1750767451.6453416s req_ids:[8] +DEBUG 06-24 20:17:31 [manager.py:391] +ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:366.6999340057373ms total_cost_time:366.741418838501ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:9060 prompt_cache_len:5151 prompt_cache_ratio:0.5685430463576159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 +DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.1090693473815918 s +INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.11113476753234863 s +DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=240630527962494075863602263035231183536, time:1750767452.0170205s req_ids:[8] +DEBUG 06-24 20:17:32 [manager.py:391] +ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:197.45516777038574ms total_cost_time:197.49760627746582ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9061 prompt_cache_len:5151 prompt_cache_ratio:0.5684803001876173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 +DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10878276824951172 s +INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.11053228378295898 s +DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=79683932674048073758332838157254158076, time:1750767452.229957s req_ids:[8] +DEBUG 06-24 20:17:32 [manager.py:391] +ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:176.06449127197266ms total_cost_time:176.10621452331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9062 prompt_cache_len:5151 prompt_cache_ratio:0.5684175678658133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 +DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10784244537353516 s +INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10953879356384277 s +DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=130564116145833584033082582601019075580, time:1750767452.4035175s req_ids:[8] +DEBUG 06-24 20:17:32 [manager.py:391] +ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:163.3918285369873ms total_cost_time:163.4347438812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9063 prompt_cache_len:5151 prompt_cache_ratio:0.56835484938762 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 +DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s +INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10952544212341309 s +DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=234526844808907882324166229756105218512, time:1750767452.5742404s req_ids:[8] +DEBUG 06-24 20:17:32 [manager.py:391] +ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:194.0765380859375ms total_cost_time:194.1208839416504ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9064 prompt_cache_len:5151 prompt_cache_ratio:0.5682921447484555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 +DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10748624801635742 s +INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10972380638122559 s +DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=119075549861745558479836765483475474716, time:1750767452.773023s req_ids:[8] +DEBUG 06-24 20:17:32 [manager.py:391] +ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:200.042724609375ms total_cost_time:200.08468627929688ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9065 prompt_cache_len:5151 prompt_cache_ratio:0.5682294539437397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 +DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10761618614196777 s +INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10954093933105469 s +DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=162918449311404895902465367164190345699, time:1750767452.9807253s req_ids:[8] +DEBUG 06-24 20:17:32 [manager.py:391] +ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:203.20534706115723ms total_cost_time:203.24969291687012ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9066 prompt_cache_len:5151 prompt_cache_ratio:0.5681667769688947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 +DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.10988783836364746 s +INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.11181783676147461 s +DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=146179967116824440059055427832698202671, time:1750767453.1911597s req_ids:[8] +DEBUG 06-24 20:17:33 [manager.py:391] +ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:205.30033111572266ms total_cost_time:205.38902282714844ms,out_token_counter:1 mean_per_token_cost_time: 0.08869171142578125ms prompt_token_num:9067 prompt_cache_len:5151 prompt_cache_ratio:0.5681041138193449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 +DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.1073455810546875 s +INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.10901021957397461 s +DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=144658769682933511972550424461314205895, time:1750767453.4035618s req_ids:[8] +DEBUG 06-24 20:17:33 [manager.py:391] +DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:205.8427333831787ms total_cost_time:205.8861255645752ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9068 prompt_cache_len:5151 prompt_cache_ratio:0.5680414644905161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 +DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.10862994194030762 s +INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.11048126220703125 s +DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=211262395551471538955593498382367406063, time:1750767453.6156483s req_ids:[8] +DEBUG 06-24 20:17:33 [manager.py:391] +ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:367.1834468841553ms total_cost_time:367.22660064697266ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9069 prompt_cache_len:5151 prompt_cache_ratio:0.5679788289778366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 +DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.10819196701049805 s +INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.10995149612426758 s +DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=107820975240906965561470660412339631837, time:1750767453.9860559s req_ids:[8] +DEBUG 06-24 20:17:33 [manager.py:391] +ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:200.2890110015869ms total_cost_time:200.3335952758789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9070 prompt_cache_len:5151 prompt_cache_ratio:0.5679162072767365 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 +DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10507702827453613 s +INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.10679769515991211 s +DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=205225209936931002587392856928062240427, time:1750767454.1950545s req_ids:[8] +DEBUG 06-24 20:17:34 [manager.py:391] +ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:205.68013191223145ms total_cost_time:205.7020664215088ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:9071 prompt_cache_len:5151 prompt_cache_ratio:0.567853599382648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 +DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10712933540344238 s +INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.10892057418823242 s +DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=158194142036279451302455411303820262690, time:1750767454.4101603s req_ids:[8] +DEBUG 06-24 20:17:34 [manager.py:391] +ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:207.57675170898438ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9072 prompt_cache_len:5151 prompt_cache_ratio:0.5677910052910053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 +DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10757827758789062 s +INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.10940098762512207 s +DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=44347171820135762684373692154377166590, time:1750767454.6227002s req_ids:[8] +DEBUG 06-24 20:17:34 [manager.py:391] +ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:210.30497550964355ms total_cost_time:210.34812927246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9073 prompt_cache_len:5151 prompt_cache_ratio:0.5677284249972445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 +DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10779452323913574 s +INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.1096189022064209 s +DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=46577318760181191084934310514382168482, time:1750767454.8394735s req_ids:[8] +DEBUG 06-24 20:17:34 [manager.py:391] +ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:206.35986328125ms total_cost_time:206.4046859741211ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9074 prompt_cache_len:5151 prompt_cache_ratio:0.5676658584968041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 +DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.10792279243469238 s +INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.10965871810913086 s +DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=303056134939306169761253453337698456772, time:1750767455.0517132s req_ids:[8] +DEBUG 06-24 20:17:35 [manager.py:391] +ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:204.6523094177246ms total_cost_time:204.6959400177002ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9075 prompt_cache_len:5151 prompt_cache_ratio:0.567603305785124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 +DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.1083219051361084 s +INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.11013603210449219 s +DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=297931845880827676857265525178087800212, time:1750767455.2621672s req_ids:[8] +DEBUG 06-24 20:17:35 [manager.py:391] +ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:206.160306930542ms total_cost_time:206.20274543762207ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9076 prompt_cache_len:5151 prompt_cache_ratio:0.5675407668576465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 +DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.10890460014343262 s +INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.11056351661682129 s +DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=204897955006555471481438489697139057746, time:1750767455.4739044s req_ids:[8] +DEBUG 06-24 20:17:35 [manager.py:391] +ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:208.65845680236816ms total_cost_time:208.70208740234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9077 prompt_cache_len:5151 prompt_cache_ratio:0.567478241709816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 +DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.10822010040283203 s +INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.10999894142150879 s +DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=336487824731695015526660155724652317351, time:1750767455.6898077s req_ids:[8] +DEBUG 06-24 20:17:35 [manager.py:391] +ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:359.75050926208496ms total_cost_time:359.79557037353516ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9078 prompt_cache_len:5151 prompt_cache_ratio:0.5674157303370787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 +DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10883045196533203 s +INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.11066341400146484 s +DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=25067442058673772039438090688048044975, time:1750767456.0530531s req_ids:[8] +DEBUG 06-24 20:17:36 [manager.py:391] +ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:205.09004592895508ms total_cost_time:205.13510704040527ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9079 prompt_cache_len:5151 prompt_cache_ratio:0.5673532327348827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 +DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10714077949523926 s +INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.10876750946044922 s +DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=172061770431736595010632408677400606391, time:1750767456.2652411s req_ids:[8] +DEBUG 06-24 20:17:36 [manager.py:391] +ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:165.95458984375ms total_cost_time:165.99559783935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9080 prompt_cache_len:5151 prompt_cache_ratio:0.5672907488986784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 +DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10815882682800293 s +INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.10992598533630371 s +DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=208117479228312208466612396778261736848, time:1750767456.4358087s req_ids:[8] +DEBUG 06-24 20:17:36 [manager.py:391] +ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:196.24924659729004ms total_cost_time:196.29359245300293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9081 prompt_cache_len:5151 prompt_cache_ratio:0.567228278823918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 +DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10804128646850586 s +INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.10978412628173828 s +DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=127525080092998326386041601173677015839, time:1750767456.6366534s req_ids:[8] +DEBUG 06-24 20:17:36 [manager.py:391] +ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:200.66285133361816ms total_cost_time:200.70672035217285ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9082 prompt_cache_len:5151 prompt_cache_ratio:0.567165822506056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 +DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10831427574157715 s +INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.11000680923461914 s +DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=206351178039516672771856355312574216531, time:1750767456.8456137s req_ids:[8] +DEBUG 06-24 20:17:36 [manager.py:391] +ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:205.61456680297852ms total_cost_time:205.6746482849121ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:9083 prompt_cache_len:5151 prompt_cache_ratio:0.5671033799405483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 +DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.10999631881713867 s +INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.11179184913635254 s +DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=128704728056128551549436374752652465424, time:1750767457.0579705s req_ids:[8] +DEBUG 06-24 20:17:37 [manager.py:391] +ERROR 06-24 20:17:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:202.2109031677246ms total_cost_time:202.2531032562256ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9084 prompt_cache_len:5151 prompt_cache_ratio:0.5670409511228534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 +DEBUG 06-24 20:17:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.10807204246520996 s +INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.10982942581176758 s +DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=170928825256746817523069897019407902532, time:1750767457.2780888s req_ids:[8] +DEBUG 06-24 20:17:37 [manager.py:391] +ERROR 06-24 20:17:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 first_token_cost:219.6662425994873ms total_cost_time:219.710111618042ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9085 prompt_cache_len:5151 prompt_cache_ratio:0.5669785360484315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 +DEBUG 06-24 20:17:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.3105344772338867 s +INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.31235814094543457 s +DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=115373669173046901443542345329097316294, time:1750767457.696327s req_ids:[8] +DEBUG 06-24 20:17:37 [manager.py:391] +ERROR 06-24 20:17:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 first_token_cost:415.76671600341797ms total_cost_time:415.81130027770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9086 prompt_cache_len:5151 prompt_cache_ratio:0.5669161347127449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 +DEBUG 06-24 20:17:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.10874485969543457 s +INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.1104886531829834 s +DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=176432542799910577045655493018298909826, time:1750767457.9167128s req_ids:[8] +DEBUG 06-24 20:17:37 [manager.py:391] +ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 first_token_cost:211.62009239196777ms total_cost_time:211.66467666625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9087 prompt_cache_len:5151 prompt_cache_ratio:0.5668537471112578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 +DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10857939720153809 s +INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.11076688766479492 s +DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=182146823058740913217817137935532365988, time:1750767458.1316423s req_ids:[8] +DEBUG 06-24 20:17:38 [manager.py:391] +ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:207.46111869812012ms total_cost_time:207.5057029724121ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9088 prompt_cache_len:5151 prompt_cache_ratio:0.5667913732394366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 +DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10753560066223145 s +INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.10953974723815918 s +DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=89878620528512706800593880300899142713, time:1750767458.3464398s req_ids:[8] +DEBUG 06-24 20:17:38 [manager.py:391] +ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:207.1681022644043ms total_cost_time:207.18860626220703ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:9089 prompt_cache_len:5151 prompt_cache_ratio:0.5667290130927495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 +DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10614609718322754 s +INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.10801076889038086 s +DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=142634653769112240282900307423188055642, time:1750767458.5582223s req_ids:[8] +DEBUG 06-24 20:17:38 [manager.py:391] +ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:209.13219451904297ms total_cost_time:209.17582511901855ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9090 prompt_cache_len:5151 prompt_cache_ratio:0.5666666666666667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 +DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.11035585403442383 s +INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.11220788955688477 s +DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=138316223372688956716404561153924373845, time:1750767458.7721753s req_ids:[8] +DEBUG 06-24 20:17:38 [manager.py:391] +ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:208.3299160003662ms total_cost_time:208.3740234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9091 prompt_cache_len:5151 prompt_cache_ratio:0.5666043339566604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 +DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10854911804199219 s +INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s +DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=263362447210243111690473103560753751188, time:1750767458.986477s req_ids:[8] +DEBUG 06-24 20:17:38 [manager.py:391] +ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:208.37640762329102ms total_cost_time:208.4212303161621ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9092 prompt_cache_len:5151 prompt_cache_ratio:0.566542014958205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 +DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10800814628601074 s +INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10989236831665039 s +DEBUG 06-24 20:17:39 [manager.py:391] Prefill Batch: batch_id=76227912907781212266002389760860021171, time:1750767459.200261s req_ids:[8] +DEBUG 06-24 20:17:39 [manager.py:391] +ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:208.53757858276367ms total_cost_time:208.58025550842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9093 prompt_cache_len:5151 prompt_cache_ratio:0.5664797096667766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 +DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10858511924743652 s +INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10979557037353516 s +DEBUG 06-24 20:17:39 [manager.py:391] Prefill Batch: batch_id=242238091212291235386295463381145533090, time:1750767459.415203s req_ids:[8] +DEBUG 06-24 20:17:39 [manager.py:391] +ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:380.14984130859375ms total_cost_time:380.19442558288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9094 prompt_cache_len:5151 prompt_cache_ratio:0.5664174180778535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 +DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10776996612548828 s +INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10909104347229004 s +DEBUG 06-24 20:17:39 [manager.py:391] Prefill Batch: batch_id=302640422053831647328045485026408915332, time:1750767459.7989516s req_ids:[8] +DEBUG 06-24 20:17:39 [manager.py:391] +ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:197.05891609191895ms total_cost_time:197.10230827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9095 prompt_cache_len:5151 prompt_cache_ratio:0.5663551401869159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 +DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10747122764587402 s +INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10871553421020508 s +DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=263564323815847502650207188799560671159, time:1750767460.010209s req_ids:[8] +DEBUG 06-24 20:17:40 [manager.py:391] +ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:17:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 39715.214 tokens/s +DEBUG 06-24 20:17:40 [stats.py:37] Avg prompt tokens throughput: 39706.462 tokens/s +DEBUG 06-24 20:17:40 [stats.py:37] Avg generate tokens throughput: 8.751 tokens/s +INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:210.89959144592285ms total_cost_time:210.94393730163574ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9096 prompt_cache_len:5151 prompt_cache_ratio:0.5662928759894459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 +DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10834217071533203 s +INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.10960221290588379 s +DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=70963243783554287864233748988416880656, time:1750767460.2310975s req_ids:[8] +DEBUG 06-24 20:17:40 [manager.py:391] +ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:212.6145362854004ms total_cost_time:212.6600742340088ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9097 prompt_cache_len:5151 prompt_cache_ratio:0.5662306254809277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 +DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10781097412109375 s +INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.10892009735107422 s +DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=51976469388299548267846981846796998420, time:1750767460.45682s req_ids:[8] +DEBUG 06-24 20:17:40 [manager.py:391] +ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:223.53863716125488ms total_cost_time:223.58274459838867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9098 prompt_cache_len:5151 prompt_cache_ratio:0.5661683886568477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 +DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10772061347961426 s +INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.10902929306030273 s +DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=223775799852905756568018494896860267300, time:1750767460.6696396s req_ids:[8] +DEBUG 06-24 20:17:40 [manager.py:391] +DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:204.43201065063477ms total_cost_time:204.47659492492676ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9099 prompt_cache_len:5151 prompt_cache_ratio:0.5661061655126937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 +DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10688972473144531 s +INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.108123779296875 s +DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=58518247834017251204217895661955953350, time:1750767460.8881118s req_ids:[8] +DEBUG 06-24 20:17:40 [manager.py:391] +ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:195.45221328735352ms total_cost_time:195.4967975616455ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9100 prompt_cache_len:5151 prompt_cache_ratio:0.5660439560439561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 +DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.10792064666748047 s +INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.1091775894165039 s +DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=150296117049281581506110310316176293477, time:1750767461.0849931s req_ids:[8] +DEBUG 06-24 20:17:41 [manager.py:391] +ERROR 06-24 20:17:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:208.18161964416504ms total_cost_time:208.22620391845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9101 prompt_cache_len:5151 prompt_cache_ratio:0.5659817602461268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 +DEBUG 06-24 20:17:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.10841155052185059 s +INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.10972237586975098 s +DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=283807308141732662757355879473149139330, time:1750767461.2936542s req_ids:[8] +DEBUG 06-24 20:17:41 [manager.py:391] +ERROR 06-24 20:17:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 first_token_cost:211.56024932861328ms total_cost_time:211.61866188049316ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:9102 prompt_cache_len:5151 prompt_cache_ratio:0.5659195781147001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 +DEBUG 06-24 20:17:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.1072230339050293 s +INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.1084749698638916 s +DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=260761783970519086594320656913962518965, time:1750767461.522846s req_ids:[8] +DEBUG 06-24 20:17:41 [manager.py:391] +ERROR 06-24 20:17:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 first_token_cost:445.6913471221924ms total_cost_time:445.7359313964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9103 prompt_cache_len:5151 prompt_cache_ratio:0.565857409645172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 +DEBUG 06-24 20:17:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.1087038516998291 s +INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.10989832878112793 s +DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=228751951960390180023537572176241025601, time:1750767461.9604895s req_ids:[8] +DEBUG 06-24 20:17:41 [manager.py:391] +ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 first_token_cost:204.85234260559082ms total_cost_time:204.8962116241455ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9104 prompt_cache_len:5151 prompt_cache_ratio:0.5657952548330404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 +DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10744142532348633 s +INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.10933899879455566 s +DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=29451791423767860307010991991050173493, time:1750767462.1734905s req_ids:[8] +DEBUG 06-24 20:17:42 [manager.py:391] +ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:204.1487693786621ms total_cost_time:204.1916847229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9105 prompt_cache_len:5151 prompt_cache_ratio:0.5657331136738056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 +DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10832381248474121 s +INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.11037683486938477 s +DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=272304765429070754483715665818444208693, time:1750767462.3926804s req_ids:[8] +DEBUG 06-24 20:17:42 [manager.py:391] +ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:210.64472198486328ms total_cost_time:210.68763732910156ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9106 prompt_cache_len:5151 prompt_cache_ratio:0.5656709861629695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 +DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10882401466369629 s +INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.11069965362548828 s +DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=200383950542084731665368227552392277527, time:1750767462.59986s req_ids:[8] +DEBUG 06-24 20:17:42 [manager.py:391] +ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:216.48216247558594ms total_cost_time:216.52722358703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9107 prompt_cache_len:5151 prompt_cache_ratio:0.5656088722960361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 +DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10847091674804688 s +INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.11031556129455566 s +DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=44745802921034371653264177543152351047, time:1750767462.8273537s req_ids:[8] +DEBUG 06-24 20:17:42 [manager.py:391] +ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:208.91714096069336ms total_cost_time:208.96124839782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9108 prompt_cache_len:5151 prompt_cache_ratio:0.5655467720685112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 +DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10837960243225098 s +INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.10955262184143066 s +DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=263217397880628502499983181186969279215, time:1750767463.0443947s req_ids:[8] +DEBUG 06-24 20:17:43 [manager.py:391] +ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:213.53483200073242ms total_cost_time:213.58013153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9109 prompt_cache_len:5151 prompt_cache_ratio:0.5654846854759029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 +DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10771608352661133 s +INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.10882735252380371 s +DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=193410434992252893072806644306609567462, time:1750767463.2641819s req_ids:[8] +DEBUG 06-24 20:17:43 [manager.py:391] +ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:216.5396213531494ms total_cost_time:216.60518646240234ms,out_token_counter:1 mean_per_token_cost_time: 0.06556510925292969ms prompt_token_num:9110 prompt_cache_len:5151 prompt_cache_ratio:0.5654226125137212 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 +DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10950517654418945 s +INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.1106252670288086 s +DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=237669666995255187717755512307023890439, time:1750767463.477472s req_ids:[8] +DEBUG 06-24 20:17:43 [manager.py:391] +ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:375.3175735473633ms total_cost_time:375.37360191345215ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:9111 prompt_cache_len:5151 prompt_cache_ratio:0.5653605531774778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 +DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10981297492980957 s +INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.11100244522094727 s +DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=63843366113483220398848954350055386240, time:1750767463.8617978s req_ids:[8] +DEBUG 06-24 20:17:43 [manager.py:391] +ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:202.47125625610352ms total_cost_time:202.5151252746582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9112 prompt_cache_len:5151 prompt_cache_ratio:0.5652985074626866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 +DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10839724540710449 s +INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10955262184143066 s +DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=89653658001103304936097936224465830486, time:1750767464.0794227s req_ids:[8] +DEBUG 06-24 20:17:44 [manager.py:391] +ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:215.33608436584473ms total_cost_time:215.378999710083ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9113 prompt_cache_len:5151 prompt_cache_ratio:0.5652364753648634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 +DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s +INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10882139205932617 s +DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=128607251516059381620689092756968946354, time:1750767464.2904258s req_ids:[8] +DEBUG 06-24 20:17:44 [manager.py:391] +ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:173.07281494140625ms total_cost_time:173.11477661132812ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9114 prompt_cache_len:5151 prompt_cache_ratio:0.565174456879526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 +DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.1074223518371582 s +INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10846853256225586 s +DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=31223166606576611599743235604451514682, time:1750767464.4679406s req_ids:[8] +DEBUG 06-24 20:17:44 [manager.py:391] +ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:195.7530975341797ms total_cost_time:195.79744338989258ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9115 prompt_cache_len:5151 prompt_cache_ratio:0.5651124520021942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 +DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10872054100036621 s +INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10981893539428711 s +DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=336084515208951996784736391515857103132, time:1750767464.680144s req_ids:[8] +DEBUG 06-24 20:17:44 [manager.py:391] +ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:213.5787010192871ms total_cost_time:213.63162994384766ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:9116 prompt_cache_len:5151 prompt_cache_ratio:0.5650504607283896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 +DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10743880271911621 s +INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.1084895133972168 s +INFO 06-24 20:17:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=249687192207439366523226706485167352983, time:1750767464.9037137s req_ids:[8] +DEBUG 06-24 20:17:44 [manager.py:391] +ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:212.3849391937256ms total_cost_time:212.42761611938477ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9117 prompt_cache_len:5151 prompt_cache_ratio:0.564988483053636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 +DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.10857534408569336 s +INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s +DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=137303332310323471266158825118051753771, time:1750767465.1168795s req_ids:[8] +DEBUG 06-24 20:17:45 [manager.py:391] +ERROR 06-24 20:17:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:207.43131637573242ms total_cost_time:207.4742317199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9118 prompt_cache_len:5151 prompt_cache_ratio:0.5649265189734591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 +DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.1082618236541748 s +INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10927772521972656 s +DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=248893422368750251349530465773989141894, time:1750767465.3226542s req_ids:[8] +DEBUG 06-24 20:17:45 [manager.py:391] +ERROR 06-24 20:17:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 first_token_cost:221.28534317016602ms total_cost_time:221.32635116577148ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9119 prompt_cache_len:5151 prompt_cache_ratio:0.5648645684833863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 +DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s +INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10936641693115234 s +DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=175471572388505947408846434978229123936, time:1750767465.5620322s req_ids:[8] +DEBUG 06-24 20:17:45 [manager.py:391] +ERROR 06-24 20:17:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 first_token_cost:385.8215808868408ms total_cost_time:385.8823776245117ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:9120 prompt_cache_len:5151 prompt_cache_ratio:0.5648026315789474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 +DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.10846948623657227 s +INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10959267616271973 s +DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=229000686109359084199154972758708999837, time:1750767465.9417825s req_ids:[8] +DEBUG 06-24 20:17:45 [manager.py:391] +ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 first_token_cost:200.00505447387695ms total_cost_time:200.04844665527344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9121 prompt_cache_len:5151 prompt_cache_ratio:0.5647407082556737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 +DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10840702056884766 s +INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.109527587890625 s +DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=321196718147821671106974363826428519690, time:1750767466.1471102s req_ids:[8] +DEBUG 06-24 20:17:46 [manager.py:391] +ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:217.70811080932617ms total_cost_time:217.75126457214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9122 prompt_cache_len:5151 prompt_cache_ratio:0.5646787985090989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 +DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10815286636352539 s +INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.10911417007446289 s +DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=26260114944191526090112052080083379405, time:1750767466.3696215s req_ids:[8] +DEBUG 06-24 20:17:46 [manager.py:391] +ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:212.6443386077881ms total_cost_time:212.68582344055176ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:9123 prompt_cache_len:5151 prompt_cache_ratio:0.5646169023347583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 +DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10736417770385742 s +INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.10934853553771973 s +DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=130571995628882504245055712227674726714, time:1750767466.5869458s req_ids:[8] +DEBUG 06-24 20:17:46 [manager.py:391] +DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:195.23859024047852ms total_cost_time:195.2815055847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9124 prompt_cache_len:5151 prompt_cache_ratio:0.5645550197281894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 +DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10753417015075684 s +INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.10938525199890137 s +DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=48138046015850802785012282112927102814, time:1750767466.7869596s req_ids:[8] +DEBUG 06-24 20:17:46 [manager.py:391] +ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:200.8800506591797ms total_cost_time:200.92177391052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9125 prompt_cache_len:5151 prompt_cache_ratio:0.5644931506849316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 +DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:46 [batch.py:51] router release req id 8 +INFO 06-24 20:17:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10873937606811523 s +INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.11086440086364746 s +DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=159136781388559923265721056429741570705, time:1750767467.0088549s req_ids:[8] +DEBUG 06-24 20:17:47 [manager.py:391] +ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:216.9356346130371ms total_cost_time:216.97640419006348ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:9126 prompt_cache_len:5151 prompt_cache_ratio:0.5644312952005259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 +DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:47 [manager.py:224] router recive req id 8 cost time 0.10831403732299805 s +INFO 06-24 20:17:47 [manager.py:68] detokenization recv req id 8 cost time 0.11028647422790527 s +DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=89080046073269820237940655444263389399, time:1750767467.217674s req_ids:[8] +DEBUG 06-24 20:17:47 [manager.py:391] +ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:204.59890365600586ms total_cost_time:204.63895797729492ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:9127 prompt_cache_len:5151 prompt_cache_ratio:0.564369453270516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 +DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:47 [manager.py:224] router recive req id 8 cost time 0.10799431800842285 s +INFO 06-24 20:17:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994219779968262 s +DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=314897341560074333632355274740919162265, time:1750767467.4281774s req_ids:[8] +DEBUG 06-24 20:17:47 [manager.py:391] +ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:206.31742477416992ms total_cost_time:206.3593864440918ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9128 prompt_cache_len:5151 prompt_cache_ratio:0.564307624890447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 +DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:47 [manager.py:224] router recive req id 8 cost time 0.10794615745544434 s +INFO 06-24 20:17:47 [manager.py:68] detokenization recv req id 8 cost time 0.10988497734069824 s +DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=179681104245930564168593553247355939673, time:1750767467.6568062s req_ids:[8] +DEBUG 06-24 20:17:47 [manager.py:391] +ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:401.17835998535156ms total_cost_time:401.22103691101074ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9129 prompt_cache_len:5151 prompt_cache_ratio:0.5642458100558659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 +DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10796546936035156 s +INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.10995364189147949 s +DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=163339092325223953140991650558096356050, time:1750767468.0450165s req_ids:[8] +DEBUG 06-24 20:17:48 [manager.py:391] +ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:201.14469528198242ms total_cost_time:201.1857032775879ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9130 prompt_cache_len:5151 prompt_cache_ratio:0.564184008762322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 +DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10825681686401367 s +INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11035370826721191 s +DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=264392873418988787551537759616035734224, time:1750767468.2613184s req_ids:[8] +DEBUG 06-24 20:17:48 [manager.py:391] +ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:212.92400360107422ms total_cost_time:212.9662036895752ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9131 prompt_cache_len:5151 prompt_cache_ratio:0.5641222210053664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 +DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:48 [batch.py:51] router release req id 8 +INFO 06-24 20:17:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10813093185424805 s +INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11011695861816406 s +DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=151232507364924187237706629753431039614, time:1750767468.4736757s req_ids:[8] +DEBUG 06-24 20:17:48 [manager.py:391] +ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:206.97879791259766ms total_cost_time:207.01956748962402ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:9132 prompt_cache_len:5151 prompt_cache_ratio:0.5640604467805519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 +DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10829973220825195 s +INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11043930053710938 s +DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=79540876517241164630107096143610535091, time:1750767468.687593s req_ids:[8] +DEBUG 06-24 20:17:48 [manager.py:391] +ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:204.93698120117188ms total_cost_time:204.97870445251465ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9133 prompt_cache_len:5151 prompt_cache_ratio:0.5639986860834337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 +DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10817217826843262 s +INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11029314994812012 s +DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=116460816963910177617645820307223448997, time:1750767468.8991795s req_ids:[8] +DEBUG 06-24 20:17:48 [manager.py:391] +ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:206.1934471130371ms total_cost_time:206.2373161315918ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9134 prompt_cache_len:5151 prompt_cache_ratio:0.5639369389095686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 +DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.10797429084777832 s +INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.1099083423614502 s +DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=169626425627265771429907825004631984783, time:1750767469.1138191s req_ids:[8] +DEBUG 06-24 20:17:49 [manager.py:391] +ERROR 06-24 20:17:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:206.15673065185547ms total_cost_time:206.19678497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:9135 prompt_cache_len:5151 prompt_cache_ratio:0.5638752052545156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 +DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.10873222351074219 s +INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.11085367202758789 s +DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=268093803980831210387764013211877021655, time:1750767469.3255413s req_ids:[8] +DEBUG 06-24 20:17:49 [manager.py:391] +ERROR 06-24 20:17:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 first_token_cost:203.57894897460938ms total_cost_time:203.62091064453125ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9136 prompt_cache_len:5151 prompt_cache_ratio:0.5638134851138353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 +DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.3112161159515381 s +INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.3132617473602295 s +DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=40780508849527415984221058469774820982, time:1750767469.7391853s req_ids:[8] +DEBUG 06-24 20:17:49 [manager.py:391] +ERROR 06-24 20:17:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 first_token_cost:417.52171516418457ms total_cost_time:417.5453186035156ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:9137 prompt_cache_len:5151 prompt_cache_ratio:0.5637517784830908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 +DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.10578680038452148 s +INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.10777139663696289 s +DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=31742633132422482604816317503867970792, time:1750767469.959137s req_ids:[8] +DEBUG 06-24 20:17:49 [manager.py:391] +ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 first_token_cost:209.54442024230957ms total_cost_time:209.56945419311523ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:9138 prompt_cache_len:5151 prompt_cache_ratio:0.5636900853578464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 +DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.10494494438171387 s +INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.10709381103515625 s +DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=224426812676846252755451243756798516722, time:1750767470.189711s req_ids:[8] +DEBUG 06-24 20:17:50 [manager.py:391] +DEBUG 06-24 20:17:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 38852.019 tokens/s +DEBUG 06-24 20:17:50 [stats.py:37] Avg prompt tokens throughput: 38843.598 tokens/s +DEBUG 06-24 20:17:50 [stats.py:37] Avg generate tokens throughput: 8.421 tokens/s +ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:225.65460205078125ms total_cost_time:225.68106651306152ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:9139 prompt_cache_len:5151 prompt_cache_ratio:0.5636284057336689 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 +DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.1061105728149414 s +INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.10815787315368652 s +DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=151839675575688813695308174843151410742, time:1750767470.4111671s req_ids:[8] +DEBUG 06-24 20:17:50 [manager.py:391] +ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:216.06135368347168ms total_cost_time:216.10474586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9140 prompt_cache_len:5151 prompt_cache_ratio:0.5635667396061269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 +DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s +INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.11080288887023926 s +DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=267268160903167560583673742182398741362, time:1750767470.6247582s req_ids:[8] +DEBUG 06-24 20:17:50 [manager.py:391] +ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:203.64689826965332ms total_cost_time:203.69315147399902ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9141 prompt_cache_len:5151 prompt_cache_ratio:0.5635050869707909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 +DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.10885405540466309 s +INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.11076545715332031 s +DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=318366147512324683774626486903332458750, time:1750767470.8458142s req_ids:[8] +DEBUG 06-24 20:17:50 [manager.py:391] +ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:218.42074394226074ms total_cost_time:218.46485137939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9142 prompt_cache_len:5151 prompt_cache_ratio:0.5634434478232334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 +DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10855865478515625 s +INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.110595703125 s +DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=310012664102105101403075701128513841867, time:1750767471.0596693s req_ids:[8] +DEBUG 06-24 20:17:51 [manager.py:391] +ERROR 06-24 20:17:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:211.90905570983887ms total_cost_time:211.95340156555176ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9143 prompt_cache_len:5151 prompt_cache_ratio:0.5633818221590288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 +DEBUG 06-24 20:17:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s +INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.10993790626525879 s +DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=318784245780716834934045844335098572798, time:1750767471.2862723s req_ids:[8] +DEBUG 06-24 20:17:51 [manager.py:391] +ERROR 06-24 20:17:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 first_token_cost:215.69252014160156ms total_cost_time:215.73805809020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9144 prompt_cache_len:5151 prompt_cache_ratio:0.5633202099737533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 +DEBUG 06-24 20:17:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10901212692260742 s +INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.11103296279907227 s +DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=200541937897549555525460644966036483649, time:1750767471.513238s req_ids:[8] +DEBUG 06-24 20:17:51 [manager.py:391] +ERROR 06-24 20:17:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 first_token_cost:411.73243522644043ms total_cost_time:411.7765426635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9145 prompt_cache_len:5151 prompt_cache_ratio:0.5632586112629853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 +DEBUG 06-24 20:17:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +INFO 06-24 20:17:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10890722274780273 s +INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.1104881763458252 s +DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=51817998049738479400287841593904923331, time:1750767471.9167204s req_ids:[8] +DEBUG 06-24 20:17:51 [manager.py:391] +ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 first_token_cost:202.6212215423584ms total_cost_time:202.6650905609131ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9146 prompt_cache_len:5151 prompt_cache_ratio:0.5631970260223048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 +DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.10770058631896973 s +INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.10972261428833008 s +DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=125243099393884674255731487202836969405, time:1750767472.1270342s req_ids:[8] +DEBUG 06-24 20:17:52 [manager.py:391] +ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:207.3037624359131ms total_cost_time:207.3497772216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9147 prompt_cache_len:5151 prompt_cache_ratio:0.5631354542472942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 +DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.107696533203125 s +INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.10968756675720215 s +DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=30639912477286841283430553634645288861, time:1750767472.3412244s req_ids:[8] +DEBUG 06-24 20:17:52 [manager.py:391] +ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:203.216552734375ms total_cost_time:203.2601833343506ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9148 prompt_cache_len:5151 prompt_cache_ratio:0.5630738959335374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 +DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.1087639331817627 s +INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.11066746711730957 s +DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=266707414069044576990849343319810443864, time:1750767472.5515623s req_ids:[8] +DEBUG 06-24 20:17:52 [manager.py:391] +ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:206.63094520568848ms total_cost_time:206.67386054992676ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9149 prompt_cache_len:5151 prompt_cache_ratio:0.5630123510766204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 +DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.10834503173828125 s +INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.1103827953338623 s +DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=94734751028820478190516509802155873537, time:1750767472.7643964s req_ids:[8] +DEBUG 06-24 20:17:52 [manager.py:391] +ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:206.132173538208ms total_cost_time:206.1758041381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9150 prompt_cache_len:5151 prompt_cache_ratio:0.5629508196721311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 +DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s +INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.1107335090637207 s +DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=9552556883086670841568969081011581313, time:1750767472.9903944s req_ids:[8] +DEBUG 06-24 20:17:52 [manager.py:391] +ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:221.23169898986816ms total_cost_time:221.27366065979004ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9151 prompt_cache_len:5151 prompt_cache_ratio:0.5628893017156595 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 +DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:53 [manager.py:224] router recive req id 8 cost time 0.10829997062683105 s +INFO 06-24 20:17:53 [manager.py:68] detokenization recv req id 8 cost time 0.11030721664428711 s +INFO 06-24 20:17:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:17:53 [statics_utils.py:24] mean first cost: 228.4931887659228 ms +INFO 06-24 20:17:53 [statics_utils.py:24] mean per token cost: 0.07382688314471175 ms +DEBUG 06-24 20:17:53 [manager.py:391] Prefill Batch: batch_id=324711151569472514536706116248160378323, time:1750767473.203997s req_ids:[8] +DEBUG 06-24 20:17:53 [manager.py:391] +ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:209.30719375610352ms total_cost_time:209.35988426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:9152 prompt_cache_len:5151 prompt_cache_ratio:0.5628277972027972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 +DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:53 [manager.py:224] router recive req id 8 cost time 0.10935235023498535 s +INFO 06-24 20:17:53 [manager.py:68] detokenization recv req id 8 cost time 0.11139464378356934 s +DEBUG 06-24 20:17:53 [manager.py:391] Prefill Batch: batch_id=214006993374620486464232341518666973826, time:1750767473.417888s req_ids:[8] +DEBUG 06-24 20:17:53 [manager.py:391] +ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:209.87915992736816ms total_cost_time:209.92398262023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9153 prompt_cache_len:5151 prompt_cache_ratio:0.562766306129138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 +DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:53 [manager.py:224] router recive req id 8 cost time 0.10793447494506836 s +INFO 06-24 20:17:53 [manager.py:68] detokenization recv req id 8 cost time 0.1100761890411377 s +DEBUG 06-24 20:17:53 [manager.py:391] Prefill Batch: batch_id=253767855611546421588786042887253644750, time:1750767473.6321132s req_ids:[8] +DEBUG 06-24 20:17:53 [manager.py:391] +ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:372.35212326049805ms total_cost_time:372.3728656768799ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:9154 prompt_cache_len:5151 prompt_cache_ratio:0.5627048284902775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 +DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10731840133666992 s +INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.10938501358032227 s +DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=331909048137418625658946409003102388320, time:1750767474.0119374s req_ids:[8] +DEBUG 06-24 20:17:54 [manager.py:391] +DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:207.20505714416504ms total_cost_time:207.24964141845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9155 prompt_cache_len:5151 prompt_cache_ratio:0.5626433642818132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 +DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10929989814758301 s +INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.11120271682739258 s +DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=287390887733117223773472737635453980604, time:1750767474.2265604s req_ids:[8] +DEBUG 06-24 20:17:54 [manager.py:391] +ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:207.94296264648438ms total_cost_time:207.98683166503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9156 prompt_cache_len:5151 prompt_cache_ratio:0.5625819134993447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 +DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10787606239318848 s +INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.11002779006958008 s +DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=283487759017220183620895517911980688769, time:1750767474.441972s req_ids:[8] +DEBUG 06-24 20:17:54 [manager.py:391] +ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:209.20085906982422ms total_cost_time:209.2432975769043ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9157 prompt_cache_len:5151 prompt_cache_ratio:0.5625204761384733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 +DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10753440856933594 s +INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.10953354835510254 s +DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=277191049498297397188555628965409990833, time:1750767474.6522064s req_ids:[8] +DEBUG 06-24 20:17:54 [manager.py:391] +ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:200.75011253356934ms total_cost_time:200.79421997070312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9158 prompt_cache_len:5151 prompt_cache_ratio:0.5624590521948024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 +DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10764384269714355 s +INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.10975313186645508 s +DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=303703254543840744832812142557198748455, time:1750767474.8732293s req_ids:[8] +DEBUG 06-24 20:17:54 [manager.py:391] +ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:218.07098388671875ms total_cost_time:218.11461448669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9159 prompt_cache_len:5151 prompt_cache_ratio:0.5623976416639371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 +DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10857176780700684 s +INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.110565185546875 s +DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=240360103760360458085029011617726902207, time:1750767475.0846636s req_ids:[8] +DEBUG 06-24 20:17:55 [manager.py:391] +ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:202.8360366821289ms total_cost_time:202.8806209564209ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9160 prompt_cache_len:5151 prompt_cache_ratio:0.5623362445414847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 +DEBUG 06-24 20:17:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10770058631896973 s +INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.10961270332336426 s +DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=58917766878084295456849795659540730102, time:1750767475.2919686s req_ids:[8] +DEBUG 06-24 20:17:55 [manager.py:391] +ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:202.9721736907959ms total_cost_time:203.0172348022461ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9161 prompt_cache_len:5151 prompt_cache_ratio:0.5622748608230542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 +DEBUG 06-24 20:17:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10858511924743652 s +INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.11113643646240234 s +DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=305639944509063170281314874698033301728, time:1750767475.5022302s req_ids:[8] +DEBUG 06-24 20:17:55 [manager.py:391] +ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:384.38940048217773ms total_cost_time:384.43517684936523ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9162 prompt_cache_len:5151 prompt_cache_ratio:0.5622134905042567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 +DEBUG 06-24 20:17:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s +INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.10993170738220215 s +DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=29690471294648345669967766610822702366, time:1750767475.8896215s req_ids:[8] +DEBUG 06-24 20:17:55 [manager.py:391] +ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:204.56910133361816ms total_cost_time:204.61440086364746ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9163 prompt_cache_len:5151 prompt_cache_ratio:0.562152133580705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 +DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s +INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.1098015308380127 s +DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=220611319126008586915767920987530128428, time:1750767476.1014805s req_ids:[8] +DEBUG 06-24 20:17:56 [manager.py:391] +ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:207.08250999450684ms total_cost_time:207.1242332458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9164 prompt_cache_len:5151 prompt_cache_ratio:0.562090790048014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 +DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10835552215576172 s +INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.1102457046508789 s +DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=236789039643149174638226162559960928145, time:1750767476.3129618s req_ids:[8] +DEBUG 06-24 20:17:56 [manager.py:391] +ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:207.52644538879395ms total_cost_time:207.57079124450684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9165 prompt_cache_len:5151 prompt_cache_ratio:0.5620294599018003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 +DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10904622077941895 s +INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.11103224754333496 s +DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=308589171101176568547402010041205559053, time:1750767476.527668s req_ids:[8] +DEBUG 06-24 20:17:56 [manager.py:391] +ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:206.63166046142578ms total_cost_time:206.6953182220459ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:9166 prompt_cache_len:5151 prompt_cache_ratio:0.5619681431376827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 +DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10864520072937012 s +INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.11070895195007324 s +DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=330792659487416461321722315400079216502, time:1750767476.7409718s req_ids:[8] +DEBUG 06-24 20:17:56 [manager.py:391] +ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:203.98283004760742ms total_cost_time:204.0269374847412ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9167 prompt_cache_len:5151 prompt_cache_ratio:0.5619068397512818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 +DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10741710662841797 s +INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.10938501358032227 s +DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=47901682726416836059836200692660336761, time:1750767476.9498265s req_ids:[8] +DEBUG 06-24 20:17:56 [manager.py:391] +ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:202.3603916168213ms total_cost_time:202.409029006958ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:9168 prompt_cache_len:5151 prompt_cache_ratio:0.5618455497382199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 +DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10722565650939941 s +INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.10907697677612305 s +DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=2751666521234312501967093984730327320, time:1750767477.170453s req_ids:[8] +DEBUG 06-24 20:17:57 [manager.py:391] +ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:220.99018096923828ms total_cost_time:221.03643417358398ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9169 prompt_cache_len:5151 prompt_cache_ratio:0.5617842730941215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 +DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10926604270935059 s +INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.11126208305358887 s +DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=120130646417052014959436074914889918533, time:1750767477.3853238s req_ids:[8] +DEBUG 06-24 20:17:57 [manager.py:391] +ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:205.7507038116455ms total_cost_time:205.7960033416748ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9170 prompt_cache_len:5151 prompt_cache_ratio:0.5617230098146129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 +DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s +INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.1096956729888916 s +DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=187012901259584873121280241010995897899, time:1750767477.5939145s req_ids:[8] +DEBUG 06-24 20:17:57 [manager.py:391] +ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:366.91737174987793ms total_cost_time:366.9595718383789ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9171 prompt_cache_len:5151 prompt_cache_ratio:0.5616617598953222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 +DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10728216171264648 s +INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.10853028297424316 s +DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=145257320452728365833048809989216592140, time:1750767477.9662383s req_ids:[8] +DEBUG 06-24 20:17:57 [manager.py:391] +ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:204.3168544769287ms total_cost_time:204.3609619140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9172 prompt_cache_len:5151 prompt_cache_ratio:0.5616005233318796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 +DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10955238342285156 s +INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11149144172668457 s +DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=95427247315366658458260292105757213725, time:1750767478.1785378s req_ids:[8] +DEBUG 06-24 20:17:58 [manager.py:391] +ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:205.17849922180176ms total_cost_time:205.26409149169922ms,out_token_counter:1 mean_per_token_cost_time: 0.08559226989746094ms prompt_token_num:9173 prompt_cache_len:5151 prompt_cache_ratio:0.5615393001199172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 +DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10813760757446289 s +INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11025571823120117 s +DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=100975865134194915189570895870596366508, time:1750767478.4067347s req_ids:[8] +DEBUG 06-24 20:17:58 [manager.py:391] +ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:225.87919235229492ms total_cost_time:225.9237766265869ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9174 prompt_cache_len:5151 prompt_cache_ratio:0.5614780902550687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 +DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10902166366577148 s +INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11071252822875977 s +DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=96322102084514881656518143232030772973, time:1750767478.6241958s req_ids:[8] +DEBUG 06-24 20:17:58 [manager.py:391] +ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:207.53240585327148ms total_cost_time:207.5819969177246ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:9175 prompt_cache_len:5151 prompt_cache_ratio:0.56141689373297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 +DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10872745513916016 s +INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s +DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=318394982054173243714165842305838472535, time:1750767478.8371236s req_ids:[8] +DEBUG 06-24 20:17:58 [manager.py:391] +ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:204.70714569091797ms total_cost_time:204.75006103515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9176 prompt_cache_len:5151 prompt_cache_ratio:0.5613557105492589 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 +DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10760021209716797 s +INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.10947918891906738 s +DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=137287051620844474141744064446558746008, time:1750767479.0458934s req_ids:[8] +DEBUG 06-24 20:17:59 [manager.py:391] +ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:205.45434951782227ms total_cost_time:205.49750328063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9177 prompt_cache_len:5151 prompt_cache_ratio:0.5612945406995751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 +DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10834145545959473 s +INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.1103677749633789 s +DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=303656569711345954085768466428423768664, time:1750767479.2584782s req_ids:[8] +DEBUG 06-24 20:17:59 [manager.py:391] +ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:208.5881233215332ms total_cost_time:208.6319923400879ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9178 prompt_cache_len:5151 prompt_cache_ratio:0.5612333841795598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 +DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10766220092773438 s +INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.11025714874267578 s +DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=121588650703532696032183635134485630204, time:1750767479.4725976s req_ids:[8] +DEBUG 06-24 20:17:59 [manager.py:391] +ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:210.56652069091797ms total_cost_time:210.61134338378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9179 prompt_cache_len:5151 prompt_cache_ratio:0.5611722409848567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 +DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10809016227722168 s +INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.11002111434936523 s +DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=78649853741524488062543666123288661923, time:1750767479.6935399s req_ids:[8] +DEBUG 06-24 20:17:59 [manager.py:391] +DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:374.2671012878418ms total_cost_time:374.3126392364502ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9180 prompt_cache_len:5151 prompt_cache_ratio:0.5611111111111111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 +DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:17:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.10827326774597168 s +INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.11011338233947754 s +DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=334951981988306762992992621850953188183, time:1750767480.0709212s req_ids:[8] +DEBUG 06-24 20:18:00 [manager.py:391] +ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:212.01562881469727ms total_cost_time:212.06068992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9181 prompt_cache_len:5151 prompt_cache_ratio:0.5610499945539702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 +DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.10805463790893555 s +INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.11006879806518555 s +DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=141463028044222706786599367214542703915, time:1750767480.2843537s req_ids:[8] +DEBUG 06-24 20:18:00 [manager.py:391] +DEBUG 06-24 20:18:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 39031.682 tokens/s +DEBUG 06-24 20:18:00 [stats.py:37] Avg prompt tokens throughput: 39023.162 tokens/s +DEBUG 06-24 20:18:00 [stats.py:37] Avg generate tokens throughput: 8.519 tokens/s +ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:210.74295043945312ms total_cost_time:210.7858657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9182 prompt_cache_len:5151 prompt_cache_ratio:0.560988891309083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 +DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.1053628921508789 s +INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.10784459114074707 s +DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=123848837855498994080969459403205099170, time:1750767480.502519s req_ids:[8] +DEBUG 06-24 20:18:00 [manager.py:391] +ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:202.67486572265625ms total_cost_time:202.71944999694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9183 prompt_cache_len:5151 prompt_cache_ratio:0.5609278013721006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 +DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.1080477237701416 s +INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.10992217063903809 s +DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=146032783821635420479968329603284437169, time:1750767480.7128665s req_ids:[8] +DEBUG 06-24 20:18:00 [manager.py:391] +ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:170.00269889831543ms total_cost_time:170.0448989868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9184 prompt_cache_len:5151 prompt_cache_ratio:0.5608667247386759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 +DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.10802102088928223 s +INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.10985898971557617 s +DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=310822315137735517657485655362343319827, time:1750767480.8869317s req_ids:[8] +DEBUG 06-24 20:18:00 [manager.py:391] +ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:198.7283229827881ms total_cost_time:198.81248474121094ms,out_token_counter:1 mean_per_token_cost_time: 0.08416175842285156ms prompt_token_num:9185 prompt_cache_len:5151 prompt_cache_ratio:0.5608056614044638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 +DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.10826635360717773 s +INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.11035466194152832 s +DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=216477383643683980862385346214404857276, time:1750767481.0965877s req_ids:[8] +DEBUG 06-24 20:18:01 [manager.py:391] +ERROR 06-24 20:18:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:210.2828025817871ms total_cost_time:210.3273868560791ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9186 prompt_cache_len:5151 prompt_cache_ratio:0.5607446113651209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 +DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.10789704322814941 s +INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.10989069938659668 s +DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=157400207346203648751317866692448464223, time:1750767481.3097582s req_ids:[8] +DEBUG 06-24 20:18:01 [manager.py:391] +ERROR 06-24 20:18:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 first_token_cost:206.2993049621582ms total_cost_time:206.345796585083ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9187 prompt_cache_len:5151 prompt_cache_ratio:0.5606835746163057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 +DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.3104102611541748 s +INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.31241559982299805 s +DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=337259564766646680565441084991899011358, time:1750767481.7277277s req_ids:[8] +DEBUG 06-24 20:18:01 [manager.py:391] +ERROR 06-24 20:18:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 first_token_cost:419.4505214691162ms total_cost_time:419.4951057434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9188 prompt_cache_len:5151 prompt_cache_ratio:0.5606225511536788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 +DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.1077888011932373 s +INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.10938882827758789 s +DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=245808586545818793940237183825811874109, time:1750767481.9486392s req_ids:[8] +DEBUG 06-24 20:18:01 [manager.py:391] +ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 first_token_cost:209.63335037231445ms total_cost_time:209.67721939086914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9189 prompt_cache_len:5151 prompt_cache_ratio:0.5605615409729023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 +DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10892820358276367 s +INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.11088681221008301 s +DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=9127106633765892834038504792990398407, time:1750767482.1621401s req_ids:[8] +DEBUG 06-24 20:18:02 [manager.py:391] +ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:208.92596244812012ms total_cost_time:208.9710235595703ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9190 prompt_cache_len:5151 prompt_cache_ratio:0.5605005440696409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 +DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10820245742797852 s +INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.11028337478637695 s +DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=253111041487356381843795679361273236578, time:1750767482.375281s req_ids:[8] +DEBUG 06-24 20:18:02 [manager.py:391] +ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:205.02424240112305ms total_cost_time:205.06811141967773ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9191 prompt_cache_len:5151 prompt_cache_ratio:0.5604395604395604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 +DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10712623596191406 s +INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.10904693603515625 s +DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=295304094055455162681260328310239442188, time:1750767482.5975747s req_ids:[8] +DEBUG 06-24 20:18:02 [manager.py:391] +ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:222.4125862121582ms total_cost_time:222.4557399749756ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9192 prompt_cache_len:5151 prompt_cache_ratio:0.560378590078329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 +DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10883975028991699 s +INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.11096668243408203 s +DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=214888043985544017507894540235459245545, time:1750767482.8149986s req_ids:[8] +DEBUG 06-24 20:18:02 [manager.py:391] +ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:209.46741104125977ms total_cost_time:209.51223373413086ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9193 prompt_cache_len:5151 prompt_cache_ratio:0.5603176329816164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 +DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10790586471557617 s +INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.1100149154663086 s +DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=83934191685152002874158912726003421552, time:1750767483.0288465s req_ids:[8] +DEBUG 06-24 20:18:03 [manager.py:391] +ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:206.99691772460938ms total_cost_time:207.04317092895508ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9194 prompt_cache_len:5151 prompt_cache_ratio:0.5602566891450946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 +DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10789227485656738 s +INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.10981059074401855 s +DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=196632631739059630910480213559346553875, time:1750767483.243221s req_ids:[8] +DEBUG 06-24 20:18:03 [manager.py:391] +ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:207.5943946838379ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:9195 prompt_cache_len:5151 prompt_cache_ratio:0.5601957585644372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 +DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10853862762451172 s +INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.11049151420593262 s +DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=263542503111660210103797759936809571995, time:1750767483.4565628s req_ids:[8] +DEBUG 06-24 20:18:03 [manager.py:391] +ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:379.7135353088379ms total_cost_time:379.7571659088135ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9196 prompt_cache_len:5151 prompt_cache_ratio:0.5601348412353198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 +DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s +INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.1100318431854248 s +DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=10866354140679370042787661828376891234, time:1750767483.8377013s req_ids:[8] +DEBUG 06-24 20:18:03 [manager.py:391] +ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:186.171293258667ms total_cost_time:186.19036674499512ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:9197 prompt_cache_len:5151 prompt_cache_ratio:0.5600739371534196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 +DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10355091094970703 s +INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10531949996948242 s +DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=116403008540925730974961226649763478141, time:1750767484.0284977s req_ids:[8] +DEBUG 06-24 20:18:04 [manager.py:391] +ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:161.64445877075195ms total_cost_time:161.66973114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:9198 prompt_cache_len:5151 prompt_cache_ratio:0.5600130463144162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 +DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10378670692443848 s +INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10484933853149414 s +DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=288617315235715295294410479010533969353, time:1750767484.1955159s req_ids:[8] +DEBUG 06-24 20:18:04 [manager.py:391] +ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:193.2976245880127ms total_cost_time:193.31908226013184ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:9199 prompt_cache_len:5151 prompt_cache_ratio:0.5599521687139907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 +DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.1043548583984375 s +INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10623621940612793 s +DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=109473945284411648595648756599354109041, time:1750767484.3974924s req_ids:[8] +DEBUG 06-24 20:18:04 [manager.py:391] +ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:195.57499885559082ms total_cost_time:195.59597969055176ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:9200 prompt_cache_len:5151 prompt_cache_ratio:0.5598913043478261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 +DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10421562194824219 s +INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10519051551818848 s +DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=253448908328166926368113286808427068371, time:1750767484.5957174s req_ids:[8] +DEBUG 06-24 20:18:04 [manager.py:391] +ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:209.14649963378906ms total_cost_time:209.17057991027832ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:9201 prompt_cache_len:5151 prompt_cache_ratio:0.5598304532116074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 +DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10439085960388184 s +INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10626935958862305 s +DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=209749523721773349881434321096166843830, time:1750767484.8089638s req_ids:[8] +DEBUG 06-24 20:18:04 [manager.py:391] +ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:209.5654010772705ms total_cost_time:209.58590507507324ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:9202 prompt_cache_len:5151 prompt_cache_ratio:0.5597696153010215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 +DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10353398323059082 s +INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10541343688964844 s +DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=276266234381847925214626001471024049600, time:1750767485.0224288s req_ids:[8] +DEBUG 06-24 20:18:05 [manager.py:391] +ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:168.57004165649414ms total_cost_time:168.59102249145508ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:9203 prompt_cache_len:5151 prompt_cache_ratio:0.559708790611757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 +DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10354781150817871 s +INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10534286499023438 s +DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=286509355390923617819774299002137245791, time:1750767485.1954172s req_ids:[8] +DEBUG 06-24 20:18:05 [manager.py:391] +ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:169.17133331298828ms total_cost_time:169.19326782226562ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:9204 prompt_cache_len:5151 prompt_cache_ratio:0.5596479791395046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 +DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10430097579956055 s +INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10606932640075684 s +DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=301363027013557716718759043022598039087, time:1750767485.368912s req_ids:[8] +DEBUG 06-24 20:18:05 [manager.py:391] +ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:373.7328052520752ms total_cost_time:373.75593185424805ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:9205 prompt_cache_len:5151 prompt_cache_ratio:0.5595871808799565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 +DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10645341873168945 s +INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10819268226623535 s +DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=243044375339155104131630395535847049845, time:1750767485.7443063s req_ids:[8] +DEBUG 06-24 20:18:05 [manager.py:391] +ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:179.96478080749512ms total_cost_time:179.99267578125ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:9206 prompt_cache_len:5151 prompt_cache_ratio:0.5595263958288073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 +DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10833430290222168 s +INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.11029529571533203 s +DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=291116813354111419392828936215741224674, time:1750767485.9328382s req_ids:[8] +DEBUG 06-24 20:18:05 [manager.py:391] +ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:210.22534370422363ms total_cost_time:210.28447151184082ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:9207 prompt_cache_len:5151 prompt_cache_ratio:0.559465623981753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 +DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.1106710433959961 s +INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11278367042541504 s +DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=157300921725614050422063393098954047532, time:1750767486.147935s req_ids:[8] +DEBUG 06-24 20:18:06 [manager.py:391] +ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:208.96291732788086ms total_cost_time:209.00893211364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9208 prompt_cache_len:5151 prompt_cache_ratio:0.5594048653344917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 +DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.10781669616699219 s +INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.10979843139648438 s +DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=136540907133243327421181140505822317347, time:1750767486.3594477s req_ids:[8] +DEBUG 06-24 20:18:06 [manager.py:391] +ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:204.2832374572754ms total_cost_time:204.32686805725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9209 prompt_cache_len:5151 prompt_cache_ratio:0.5593441198827234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 +DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.10934066772460938 s +INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11043119430541992 s +DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=38847635736323849600539643608520480090, time:1750767486.572793s req_ids:[8] +DEBUG 06-24 20:18:06 [manager.py:391] +ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:203.63831520080566ms total_cost_time:203.68146896362305ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9210 prompt_cache_len:5151 prompt_cache_ratio:0.5592833876221498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 +DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.10918641090393066 s +INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11118936538696289 s +DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=278353172333314132252100784832366968289, time:1750767486.7797415s req_ids:[8] +DEBUG 06-24 20:18:06 [manager.py:391] +DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:207.59153366088867ms total_cost_time:207.63373374938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9211 prompt_cache_len:5151 prompt_cache_ratio:0.5592226685484747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 +DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.1088860034942627 s +INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s +DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=291041939818782909066901000207242699234, time:1750767486.9952748s req_ids:[8] +DEBUG 06-24 20:18:06 [manager.py:391] +ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:208.02879333496094ms total_cost_time:208.07147026062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9212 prompt_cache_len:5151 prompt_cache_ratio:0.5591619626574034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 +DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10822534561157227 s +INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.11018252372741699 s +DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=54775796579205562981517156095891831232, time:1750767487.2134013s req_ids:[8] +DEBUG 06-24 20:18:07 [manager.py:391] +ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:381.70766830444336ms total_cost_time:381.75082206726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9213 prompt_cache_len:5151 prompt_cache_ratio:0.5591012699446435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 +DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10726642608642578 s +INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.1090385913848877 s +DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=291840191099463998705193355317684656286, time:1750767487.5977561s req_ids:[8] +DEBUG 06-24 20:18:07 [manager.py:391] +ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:171.67425155639648ms total_cost_time:171.71549797058105ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9214 prompt_cache_len:5151 prompt_cache_ratio:0.559040590405904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 +DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10785436630249023 s +INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.10976839065551758 s +DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=313584767816956809456261967902595767443, time:1750767487.7722263s req_ids:[8] +DEBUG 06-24 20:18:07 [manager.py:391] +ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:197.4942684173584ms total_cost_time:197.53766059875488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9215 prompt_cache_len:5151 prompt_cache_ratio:0.5589799240368963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 +DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s +INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.11052632331848145 s +DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=294917274875824626067374536945453251015, time:1750767487.9754372s req_ids:[8] +DEBUG 06-24 20:18:07 [manager.py:391] +ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:203.4780979156494ms total_cost_time:203.5212516784668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9216 prompt_cache_len:5151 prompt_cache_ratio:0.5589192708333334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 +DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10718655586242676 s +INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.10896897315979004 s +DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=249188649074010266052419688548737651785, time:1750767488.1889348s req_ids:[8] +DEBUG 06-24 20:18:08 [manager.py:391] +ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:169.996976852417ms total_cost_time:170.04132270812988ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9217 prompt_cache_len:5151 prompt_cache_ratio:0.5588586307909298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 +DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10850834846496582 s +INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11036348342895508 s +DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=113807042986531951054382870618652886722, time:1750767488.3604047s req_ids:[8] +DEBUG 06-24 20:18:08 [manager.py:391] +ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:200.98400115966797ms total_cost_time:201.02810859680176ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9218 prompt_cache_len:5151 prompt_cache_ratio:0.5587980039054025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 +DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10933995246887207 s +INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11136507987976074 s +DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=245462981656364611925886645624053020232, time:1750767488.569732s req_ids:[8] +DEBUG 06-24 20:18:08 [manager.py:391] +ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:210.60657501220703ms total_cost_time:210.65402030944824ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9219 prompt_cache_len:5151 prompt_cache_ratio:0.5587373901724699 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 +DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10848426818847656 s +INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11044025421142578 s +DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=142689946323887981608749569061928626350, time:1750767488.7905803s req_ids:[8] +DEBUG 06-24 20:18:08 [manager.py:391] +ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:209.57398414611816ms total_cost_time:209.61856842041016ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9220 prompt_cache_len:5151 prompt_cache_ratio:0.5586767895878525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 +DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10766053199768066 s +INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s +DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=155902034642491250222671982729313440036, time:1750767489.0026062s req_ids:[8] +DEBUG 06-24 20:18:09 [manager.py:391] +ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:199.93853569030762ms total_cost_time:199.98455047607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9221 prompt_cache_len:5151 prompt_cache_ratio:0.5586162021472725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 +DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10877132415771484 s +INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.11083006858825684 s +DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=318814423123739856498830528638324771759, time:1750767489.208464s req_ids:[8] +DEBUG 06-24 20:18:09 [manager.py:391] +ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:373.1358051300049ms total_cost_time:373.1799125671387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9222 prompt_cache_len:5151 prompt_cache_ratio:0.5585556278464542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 +DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:09 [batch.py:51] router release req id 8 +INFO 06-24 20:18:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10887670516967773 s +INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.1109459400177002 s +DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=68798332249564284410481520464894479180, time:1750767489.5835423s req_ids:[8] +DEBUG 06-24 20:18:09 [manager.py:391] +ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:199.30291175842285ms total_cost_time:199.34487342834473ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9223 prompt_cache_len:5151 prompt_cache_ratio:0.5584950666811233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 +DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10753130912780762 s +INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.10953783988952637 s +DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=187937522190474941985785309124092816112, time:1750767489.791203s req_ids:[8] +DEBUG 06-24 20:18:09 [manager.py:391] +ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:208.6939811706543ms total_cost_time:208.75144004821777ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:9224 prompt_cache_len:5151 prompt_cache_ratio:0.5584345186470078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 +DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10855317115783691 s +INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.11047530174255371 s +DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=61687263355418844533672509070856761787, time:1750767490.0062573s req_ids:[8] +DEBUG 06-24 20:18:10 [manager.py:391] +ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:209.9595069885254ms total_cost_time:210.00266075134277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9225 prompt_cache_len:5151 prompt_cache_ratio:0.5583739837398374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 +DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.10782790184020996 s +INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.11033797264099121 s +DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=270942227467139598072154930580873847292, time:1750767490.2213788s req_ids:[8] +DEBUG 06-24 20:18:10 [manager.py:391] +ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:18:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 40406.846 tokens/s +DEBUG 06-24 20:18:10 [stats.py:37] Avg prompt tokens throughput: 40397.969 tokens/s +DEBUG 06-24 20:18:10 [stats.py:37] Avg generate tokens throughput: 8.878 tokens/s +INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:207.85975456237793ms total_cost_time:207.90433883666992ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9226 prompt_cache_len:5151 prompt_cache_ratio:0.5583134619553436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 +DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.10894584655761719 s +INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.11102747917175293 s +DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=161604242942835847877440478245941411520, time:1750767490.4347165s req_ids:[8] +DEBUG 06-24 20:18:10 [manager.py:391] +ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:205.6138515472412ms total_cost_time:205.6584358215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9227 prompt_cache_len:5151 prompt_cache_ratio:0.5582529532892598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 +DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.107818603515625 s +INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.10956764221191406 s +DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=41541567059871594269309513321040468585, time:1750767490.6470358s req_ids:[8] +DEBUG 06-24 20:18:10 [manager.py:391] +ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:206.68745040893555ms total_cost_time:206.73060417175293ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9228 prompt_cache_len:5151 prompt_cache_ratio:0.5581924577373212 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 +DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.10746645927429199 s +INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.10937380790710449 s +DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=294037725165213729733911474611206253000, time:1750767490.8599105s req_ids:[8] +DEBUG 06-24 20:18:10 [manager.py:391] +ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:168.32256317138672ms total_cost_time:168.3657169342041ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9229 prompt_cache_len:5151 prompt_cache_ratio:0.5581319752952649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 +DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s +INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s +DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=333407896447195116791964486331549569617, time:1750767491.033047s req_ids:[8] +DEBUG 06-24 20:18:11 [manager.py:391] +ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:200.3951072692871ms total_cost_time:200.4525661468506ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:9230 prompt_cache_len:5151 prompt_cache_ratio:0.5580715059588299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 +DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.10868453979492188 s +INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.11070799827575684 s +DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=202693955467251572401102332806698819724, time:1750767491.2391808s req_ids:[8] +DEBUG 06-24 20:18:11 [manager.py:391] +ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:366.76025390625ms total_cost_time:366.804838180542ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9231 prompt_cache_len:5151 prompt_cache_ratio:0.5580110497237569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 +DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.10784006118774414 s +INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.10991334915161133 s +DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=186146418654037744484623706774939876899, time:1750767491.610858s req_ids:[8] +DEBUG 06-24 20:18:11 [manager.py:391] +ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:208.60004425048828ms total_cost_time:208.64367485046387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9232 prompt_cache_len:5151 prompt_cache_ratio:0.5579506065857885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 +DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.1072852611541748 s +INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.1093437671661377 s +DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=6149988071369590703869920764234230863, time:1750767491.8253691s req_ids:[8] +DEBUG 06-24 20:18:11 [manager.py:391] +ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:208.80532264709473ms total_cost_time:208.8484764099121ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9233 prompt_cache_len:5151 prompt_cache_ratio:0.5578901765406693 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 +DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10888051986694336 s +INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.11082148551940918 s +DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=8945734480304843139971568632826844243, time:1750767492.0410483s req_ids:[8] +DEBUG 06-24 20:18:12 [manager.py:391] +ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:206.53080940246582ms total_cost_time:206.5749168395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9234 prompt_cache_len:5151 prompt_cache_ratio:0.5578297595841456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 +DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10751771926879883 s +INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.10950446128845215 s +DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=121662816512365697660177215695735140528, time:1750767492.2681649s req_ids:[8] +DEBUG 06-24 20:18:12 [manager.py:391] +ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:223.8781452178955ms total_cost_time:223.93417358398438ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:9235 prompt_cache_len:5151 prompt_cache_ratio:0.5577693557119654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 +DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10810589790344238 s +INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s +DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=28106967925632819298593912517321093571, time:1750767492.4884746s req_ids:[8] +DEBUG 06-24 20:18:12 [manager.py:391] +DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:172.30868339538574ms total_cost_time:172.35231399536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9236 prompt_cache_len:5151 prompt_cache_ratio:0.5577089649198788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 +DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10757255554199219 s +INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.10959076881408691 s +DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=2563500130423229341058323665593143397, time:1750767492.6611607s req_ids:[8] +DEBUG 06-24 20:18:12 [manager.py:391] +ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:201.88498497009277ms total_cost_time:201.92718505859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9237 prompt_cache_len:5151 prompt_cache_ratio:0.5576485872036375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 +DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10846853256225586 s +INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.11049723625183105 s +DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=224317777876410552669034916666169451278, time:1750767492.8704154s req_ids:[8] +DEBUG 06-24 20:18:12 [manager.py:391] +ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:209.47527885437012ms total_cost_time:209.5198631286621ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9238 prompt_cache_len:5151 prompt_cache_ratio:0.5575882225589954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 +DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.3102076053619385 s +INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.3121776580810547 s +DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=134881263633653972088042414046087733416, time:1750767493.2959967s req_ids:[8] +DEBUG 06-24 20:18:13 [manager.py:391] +ERROR 06-24 20:18:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:428.6642074584961ms total_cost_time:428.7071228027344ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9239 prompt_cache_len:5151 prompt_cache_ratio:0.557527870981708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 +DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.10733175277709961 s +INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.1093282699584961 s +DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=236522868321740353771811365255938366896, time:1750767493.5195332s req_ids:[8] +DEBUG 06-24 20:18:13 [manager.py:391] +ERROR 06-24 20:18:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 first_token_cost:210.6010913848877ms total_cost_time:210.64448356628418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9240 prompt_cache_len:5151 prompt_cache_ratio:0.5574675324675324 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 +DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.10880756378173828 s +INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.11073040962219238 s +DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=272753175952379862801614731178030771490, time:1750767493.744156s req_ids:[8] +DEBUG 06-24 20:18:13 [manager.py:391] +ERROR 06-24 20:18:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 first_token_cost:216.40920639038086ms total_cost_time:216.45236015319824ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9241 prompt_cache_len:5151 prompt_cache_ratio:0.5574072070122281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 +DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.1086273193359375 s +INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.11057710647583008 s +DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=61308780382437033073465857022945156385, time:1750767493.9592402s req_ids:[8] +DEBUG 06-24 20:18:13 [manager.py:391] +DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 first_token_cost:208.50777626037598ms total_cost_time:208.55164527893066ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9242 prompt_cache_len:5151 prompt_cache_ratio:0.557346894611556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 +DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10876655578613281 s +INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11072850227355957 s +DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=162267478617837692775161738158906774165, time:1750767494.1726017s req_ids:[8] +DEBUG 06-24 20:18:14 [manager.py:391] +ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:207.9598903656006ms total_cost_time:207.98277854919434ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:9243 prompt_cache_len:5151 prompt_cache_ratio:0.5572865952612788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 +DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10710334777832031 s +INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.1090860366821289 s +DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=52915046624392844168256738402097734950, time:1750767494.3875766s req_ids:[8] +DEBUG 06-24 20:18:14 [manager.py:391] +ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:212.02325820922852ms total_cost_time:212.0671272277832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9244 prompt_cache_len:5151 prompt_cache_ratio:0.5572263089571614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 +DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10860586166381836 s +INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11048126220703125 s +DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=307328457360070613572742122512232295155, time:1750767494.6012526s req_ids:[8] +DEBUG 06-24 20:18:14 [manager.py:391] +ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:173.07376861572266ms total_cost_time:173.1276512145996ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:9245 prompt_cache_len:5151 prompt_cache_ratio:0.5571660356949703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 +DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.1083214282989502 s +INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11020350456237793 s +DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=245947172845256045016585494392602646452, time:1750767494.7781713s req_ids:[8] +DEBUG 06-24 20:18:14 [manager.py:391] +ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:199.2805004119873ms total_cost_time:199.32174682617188ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9246 prompt_cache_len:5151 prompt_cache_ratio:0.5571057754704737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 +DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10867953300476074 s +INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11072921752929688 s +DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=299830371531881070758015621079097548133, time:1750767494.9832478s req_ids:[8] +DEBUG 06-24 20:18:14 [manager.py:391] +ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:381.1075687408447ms total_cost_time:381.1523914337158ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9247 prompt_cache_len:5151 prompt_cache_ratio:0.557045528279442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 +DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10730576515197754 s +INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.10923504829406738 s +DEBUG 06-24 20:18:15 [manager.py:391] Prefill Batch: batch_id=16670438905765915958486712514952063797, time:1750767495.3720822s req_ids:[8] +DEBUG 06-24 20:18:15 [manager.py:391] +ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:205.94477653503418ms total_cost_time:205.98769187927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9248 prompt_cache_len:5151 prompt_cache_ratio:0.5569852941176471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 +DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10808324813842773 s +INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013364791870117 s +DEBUG 06-24 20:18:15 [manager.py:391] Prefill Batch: batch_id=6869835389927009416340042045619703461, time:1750767495.582303s req_ids:[8] +DEBUG 06-24 20:18:15 [manager.py:391] +ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:205.7027816772461ms total_cost_time:205.74665069580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9249 prompt_cache_len:5151 prompt_cache_ratio:0.5569250729808628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 +DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10878205299377441 s +INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s +DEBUG 06-24 20:18:15 [manager.py:391] Prefill Batch: batch_id=159869955967810791958616804271818440393, time:1750767495.7965612s req_ids:[8] +DEBUG 06-24 20:18:15 [manager.py:391] +ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:209.60450172424316ms total_cost_time:209.64765548706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9250 prompt_cache_len:5151 prompt_cache_ratio:0.5568648648648649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 +DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10600805282592773 s +INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.10790634155273438 s +DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=237468569893546502185594882489795657096, time:1750767496.0130298s req_ids:[8] +DEBUG 06-24 20:18:16 [manager.py:391] +ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:174.6382713317871ms total_cost_time:174.6695041656494ms,out_token_counter:1 mean_per_token_cost_time: 0.031232833862304688ms prompt_token_num:9251 prompt_cache_len:5151 prompt_cache_ratio:0.5568046697654307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 +DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.10894203186035156 s +INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.11080813407897949 s +DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=123835282712431282502350845761591512436, time:1750767496.187982s req_ids:[8] +DEBUG 06-24 20:18:16 [manager.py:391] +ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:193.94230842590332ms total_cost_time:193.98736953735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9252 prompt_cache_len:5151 prompt_cache_ratio:0.5567444876783398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 +DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.10850286483764648 s +INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.11047959327697754 s +DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=286216554820334194591642683786481301623, time:1750767496.3889387s req_ids:[8] +DEBUG 06-24 20:18:16 [manager.py:391] +ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:205.77669143676758ms total_cost_time:205.82151412963867ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9253 prompt_cache_len:5151 prompt_cache_ratio:0.5566843185993732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 +DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.11022305488586426 s +INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.11215329170227051 s +DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=35689826949743344270340906774914469563, time:1750767496.5983038s req_ids:[8] +DEBUG 06-24 20:18:16 [manager.py:391] +ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:205.5976390838623ms total_cost_time:205.6419849395752ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9254 prompt_cache_len:5151 prompt_cache_ratio:0.5566241625243138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 +DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:16 [batch.py:51] router release req id 8 +DEBUG 06-24 20:18:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:16 [manager.py:283] +DEBUG 06-24 20:18:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:16 [manager.py:284] +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.11145830154418945 s +INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.1133875846862793 s +DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=137219438306877450289715106310044438445, time:1750767496.826416s req_ids:[8] +DEBUG 06-24 20:18:16 [manager.py:391] +ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:212.4176025390625ms total_cost_time:212.4629020690918ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9255 prompt_cache_len:5151 prompt_cache_ratio:0.5565640194489465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 +INFO 06-24 20:18:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10646414756774902 s +INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.10833477973937988 s +DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=139765296529452748325281898390301738771, time:1750767497.037263s req_ids:[8] +DEBUG 06-24 20:18:17 [manager.py:391] +ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:360.5999946594238ms total_cost_time:360.64624786376953ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9256 prompt_cache_len:5151 prompt_cache_ratio:0.5565038893690579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 +DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10972166061401367 s +INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.11159968376159668 s +DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=13479720253126084349936411105145005324, time:1750767497.3968194s req_ids:[8] +DEBUG 06-24 20:18:17 [manager.py:391] +ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:200.44875144958496ms total_cost_time:200.49309730529785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9257 prompt_cache_len:5151 prompt_cache_ratio:0.5564437722804364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 +DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10809636116027832 s +INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.11004519462585449 s +DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=144089109438440321818126075292956114440, time:1750767497.6275244s req_ids:[8] +DEBUG 06-24 20:18:17 [manager.py:391] +ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:189.2240047454834ms total_cost_time:189.28194046020508ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:9258 prompt_cache_len:5151 prompt_cache_ratio:0.5563836681788723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 +DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10775613784790039 s +INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.10898590087890625 s +DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=72921378620789925350388034805777535770, time:1750767497.8224013s req_ids:[8] +DEBUG 06-24 20:18:17 [manager.py:391] +ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:237.3814582824707ms total_cost_time:237.4093532562256ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:9259 prompt_cache_len:5151 prompt_cache_ratio:0.5563235770601577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 +DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.10494184494018555 s +INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10596060752868652 s +DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=167507405522539109322776770635082025472, time:1750767498.036946s req_ids:[8] +DEBUG 06-24 20:18:18 [manager.py:391] +ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:207.02767372131348ms total_cost_time:207.05485343933105ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:9260 prompt_cache_len:5151 prompt_cache_ratio:0.5562634989200864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 +DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.1051173210144043 s +INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10646891593933105 s +DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=99475044803566855321305779250944595585, time:1750767498.251523s req_ids:[8] +DEBUG 06-24 20:18:18 [manager.py:391] +ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:211.96484565734863ms total_cost_time:211.99345588684082ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:9261 prompt_cache_len:5151 prompt_cache_ratio:0.5562034337544541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 +DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.10596013069152832 s +INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10733699798583984 s +DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=235535005008876972047763154644867852441, time:1750767498.470751s req_ids:[8] +DEBUG 06-24 20:18:18 [manager.py:391] +ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:197.20721244812012ms total_cost_time:197.25418090820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9262 prompt_cache_len:5151 prompt_cache_ratio:0.5561433815590585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 +DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.1062314510345459 s +INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10717487335205078 s +DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=94180847616631123256090805014619159555, time:1750767498.6722412s req_ids:[8] +DEBUG 06-24 20:18:18 [manager.py:391] +ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:167.39130020141602ms total_cost_time:167.43874549865723ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9263 prompt_cache_len:5151 prompt_cache_ratio:0.5560833423296988 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 +DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.10508942604064941 s +INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10714268684387207 s +DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=185553661054948646830759762350409376006, time:1750767498.8456492s req_ids:[8] +DEBUG 06-24 20:18:18 [manager.py:391] +ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:376.3432502746582ms total_cost_time:376.3887882232666ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9264 prompt_cache_len:5151 prompt_cache_ratio:0.5560233160621761 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 +DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.1087956428527832 s +INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.11065840721130371 s +DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=79251223731374372924200933267263716182, time:1750767499.2361445s req_ids:[8] +DEBUG 06-24 20:18:19 [manager.py:391] +ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:211.4865779876709ms total_cost_time:211.5318775177002ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9265 prompt_cache_len:5151 prompt_cache_ratio:0.5559633027522936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 +DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.11125540733337402 s +INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.1130983829498291 s +DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=50923862431308956081764183268362327832, time:1750767499.4541466s req_ids:[8] +DEBUG 06-24 20:18:19 [manager.py:391] +ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:202.18992233276367ms total_cost_time:202.23331451416016ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9266 prompt_cache_len:5151 prompt_cache_ratio:0.5559033023958558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 +DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.10988903045654297 s +INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.11139440536499023 s +DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=235871802952763893913245885880066581351, time:1750767499.6495616s req_ids:[8] +DEBUG 06-24 20:18:19 [manager.py:391] +ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:183.08210372924805ms total_cost_time:183.12764167785645ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9267 prompt_cache_len:5151 prompt_cache_ratio:0.5558433149886695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 +DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.10810232162475586 s +INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.10954689979553223 s +DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=151278103815947906254878416266465318475, time:1750767499.8375328s req_ids:[8] +DEBUG 06-24 20:18:19 [manager.py:391] +ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:189.58592414855957ms total_cost_time:189.63241577148438ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9268 prompt_cache_len:5151 prompt_cache_ratio:0.555783340526543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 +DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10874652862548828 s +INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.11016178131103516 s +DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=167090878864262363791757727692719337491, time:1750767500.0425417s req_ids:[8] +DEBUG 06-24 20:18:20 [manager.py:391] +ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:213.8054370880127ms total_cost_time:213.8497829437256ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9269 prompt_cache_len:5151 prompt_cache_ratio:0.5557233790052865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 +DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:20 [batch.py:51] router release req id 8 +INFO 06-24 20:18:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10780763626098633 s +INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.10922718048095703 s +DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=213622004282571873146338095809093423526, time:1750767500.256435s req_ids:[8] +DEBUG 06-24 20:18:20 [manager.py:391] +ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:18:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 40552.159 tokens/s +DEBUG 06-24 20:18:20 [stats.py:37] Avg prompt tokens throughput: 40543.392 tokens/s +DEBUG 06-24 20:18:20 [stats.py:37] Avg generate tokens throughput: 8.768 tokens/s +INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:211.82513236999512ms total_cost_time:211.89284324645996ms,out_token_counter:1 mean_per_token_cost_time: 0.06771087646484375ms prompt_token_num:9270 prompt_cache_len:5151 prompt_cache_ratio:0.555663430420712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 +DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10743951797485352 s +INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.10899853706359863 s +DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=306435043135976212889729874607454252705, time:1750767500.478957s req_ids:[8] +DEBUG 06-24 20:18:20 [manager.py:391] +ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:211.18593215942383ms total_cost_time:211.23147010803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9271 prompt_cache_len:5151 prompt_cache_ratio:0.5556034947686334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 +DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10786890983581543 s +INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.10983014106750488 s +DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=28582706080833806698273810992275762909, time:1750767500.6879413s req_ids:[8] +DEBUG 06-24 20:18:20 [manager.py:391] +ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:195.4348087310791ms total_cost_time:195.4934597015381ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:9272 prompt_cache_len:5151 prompt_cache_ratio:0.5555435720448663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 +DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.11024308204650879 s +INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.111541748046875 s +DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=234425215806055000723350193049643644508, time:1750767500.8986363s req_ids:[8] +DEBUG 06-24 20:18:20 [manager.py:391] +DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:442.69871711730957ms total_cost_time:442.74425506591797ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9273 prompt_cache_len:5151 prompt_cache_ratio:0.555483662245228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 +DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10820770263671875 s +INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.10960888862609863 s +DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=173472924936806867508056562063117831374, time:1750767501.3493648s req_ids:[8] +DEBUG 06-24 20:18:21 [manager.py:391] +ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:190.75751304626465ms total_cost_time:190.80090522766113ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9274 prompt_cache_len:5151 prompt_cache_ratio:0.5554237653655381 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 +DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10833573341369629 s +INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s +DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=248877013934999467466663353685816589997, time:1750767501.5447094s req_ids:[8] +DEBUG 06-24 20:18:21 [manager.py:391] +ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:210.15667915344238ms total_cost_time:210.20030975341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9275 prompt_cache_len:5151 prompt_cache_ratio:0.5553638814016173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 +DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10832500457763672 s +INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.11026644706726074 s +DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=206697942667618486938747599179932142208, time:1750767501.7642777s req_ids:[8] +DEBUG 06-24 20:18:21 [manager.py:391] +ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:207.11565017700195ms total_cost_time:207.15832710266113ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9276 prompt_cache_len:5151 prompt_cache_ratio:0.5553040103492884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 +DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10628151893615723 s +INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.10829019546508789 s +DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=79602965760426490859251933426994669030, time:1750767501.9781897s req_ids:[8] +DEBUG 06-24 20:18:21 [manager.py:391] +ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:193.54987144470215ms total_cost_time:193.59421730041504ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9277 prompt_cache_len:5151 prompt_cache_ratio:0.5552441522043764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 +DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10804128646850586 s +INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.11005330085754395 s +DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=218868102160626047326833669659735298845, time:1750767502.1653292s req_ids:[8] +DEBUG 06-24 20:18:22 [manager.py:391] +ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:167.61541366577148ms total_cost_time:167.6352024078369ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:9278 prompt_cache_len:5151 prompt_cache_ratio:0.5551843069627075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 +DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10601425170898438 s +INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.10805964469909668 s +DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=333066603158334479207830083779621765869, time:1750767502.3386338s req_ids:[8] +DEBUG 06-24 20:18:22 [manager.py:391] +ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:203.94206047058105ms total_cost_time:203.96184921264648ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:9279 prompt_cache_len:5151 prompt_cache_ratio:0.5551244746201099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 +DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10566854476928711 s +INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.10762691497802734 s +DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=56036461237177231349389298812581987410, time:1750767502.5464158s req_ids:[8] +DEBUG 06-24 20:18:22 [manager.py:391] +ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:206.08854293823242ms total_cost_time:206.14886283874512ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9280 prompt_cache_len:5151 prompt_cache_ratio:0.5550646551724138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 +DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10836315155029297 s +INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.11014533042907715 s +DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=261543143878827586351449428631907676263, time:1750767502.7578053s req_ids:[8] +DEBUG 06-24 20:18:22 [manager.py:391] +ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:190.68598747253418ms total_cost_time:190.75632095336914ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:9281 prompt_cache_len:5151 prompt_cache_ratio:0.555004848615451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 +DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10836338996887207 s +INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.1103050708770752 s +DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=49218571529429249530037835628964195033, time:1750767502.9537623s req_ids:[8] +DEBUG 06-24 20:18:22 [manager.py:391] +INFO 06-24 20:18:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:461.90738677978516ms total_cost_time:461.95244789123535ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9282 prompt_cache_len:5151 prompt_cache_ratio:0.554945054945055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 +DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:23 [manager.py:224] router recive req id 8 cost time 0.10889053344726562 s +INFO 06-24 20:18:23 [manager.py:68] detokenization recv req id 8 cost time 0.11084556579589844 s +DEBUG 06-24 20:18:23 [manager.py:391] Prefill Batch: batch_id=218259182610396984124906220540967509014, time:1750767503.4218028s req_ids:[8] +DEBUG 06-24 20:18:23 [manager.py:391] +ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:192.19660758972168ms total_cost_time:192.23928451538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9283 prompt_cache_len:5151 prompt_cache_ratio:0.5548852741570613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 +DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:23 [manager.py:224] router recive req id 8 cost time 0.10771393775939941 s +INFO 06-24 20:18:23 [manager.py:68] detokenization recv req id 8 cost time 0.10953140258789062 s +DEBUG 06-24 20:18:23 [manager.py:391] Prefill Batch: batch_id=256973600115054141520035110761201180734, time:1750767503.6188889s req_ids:[8] +DEBUG 06-24 20:18:23 [manager.py:391] +ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:206.11023902893066ms total_cost_time:206.15458488464355ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9284 prompt_cache_len:5151 prompt_cache_ratio:0.5548255062473072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 +DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:23 [manager.py:224] router recive req id 8 cost time 0.10799455642700195 s +INFO 06-24 20:18:23 [manager.py:68] detokenization recv req id 8 cost time 0.10981535911560059 s +DEBUG 06-24 20:18:23 [manager.py:391] Prefill Batch: batch_id=157510512380801049924596876221921057106, time:1750767503.8452344s req_ids:[8] +DEBUG 06-24 20:18:23 [manager.py:391] +ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:219.4230556488037ms total_cost_time:219.465970993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9285 prompt_cache_len:5151 prompt_cache_ratio:0.5547657512116316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 +DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.10682368278503418 s +INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.10877752304077148 s +DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=104763866741169426899088457190312884850, time:1750767504.0593114s req_ids:[8] +DEBUG 06-24 20:18:24 [manager.py:391] +ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:208.1167697906494ms total_cost_time:208.1601619720459ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9286 prompt_cache_len:5151 prompt_cache_ratio:0.5547060090458755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 +DEBUG 06-24 20:18:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.10815072059631348 s +INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.11007118225097656 s +DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=145942437893780209477351729661652265046, time:1750767504.2735126s req_ids:[8] +DEBUG 06-24 20:18:24 [manager.py:391] +ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:207.63826370239258ms total_cost_time:207.68070220947266ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9287 prompt_cache_len:5151 prompt_cache_ratio:0.5546462797458813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 +DEBUG 06-24 20:18:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.11002445220947266 s +INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.1118314266204834 s +DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=2010277666601747235759860607133754849, time:1750767504.488255s req_ids:[8] +DEBUG 06-24 20:18:24 [manager.py:391] +ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:196.64716720581055ms total_cost_time:196.7294216156006ms,out_token_counter:1 mean_per_token_cost_time: 0.08225440979003906ms prompt_token_num:9288 prompt_cache_len:5151 prompt_cache_ratio:0.5545865633074936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 +DEBUG 06-24 20:18:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.10836338996887207 s +INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.10941386222839355 s +DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=321205629702195802134837893422444120690, time:1750767504.6965811s req_ids:[8] +DEBUG 06-24 20:18:24 [manager.py:391] +ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:210.676908493042ms total_cost_time:210.71743965148926ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:9289 prompt_cache_len:5151 prompt_cache_ratio:0.5545268597265582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 +DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.30936360359191895 s +INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.3112771511077881 s +DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=216598510945938762506284028978742102057, time:1750767505.1171427s req_ids:[8] +DEBUG 06-24 20:18:25 [manager.py:391] +ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:417.59777069091797ms total_cost_time:417.64163970947266ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9290 prompt_cache_len:5151 prompt_cache_ratio:0.5544671689989236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 +DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.10881257057189941 s +INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.11099767684936523 s +DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=13860048375773957268377798216700581130, time:1750767505.3362734s req_ids:[8] +DEBUG 06-24 20:18:25 [manager.py:391] +ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:208.9977264404297ms total_cost_time:209.04254913330078ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9291 prompt_cache_len:5151 prompt_cache_ratio:0.5544074911204392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 +DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.10749053955078125 s +INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.10885119438171387 s +DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=109527754818997647442026457489800682961, time:1750767505.5643132s req_ids:[8] +DEBUG 06-24 20:18:25 [manager.py:391] +ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:231.45270347595215ms total_cost_time:231.49824142456055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9292 prompt_cache_len:5151 prompt_cache_ratio:0.5543478260869565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 +DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.10773015022277832 s +INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.10973262786865234 s +DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=99489189420439418206205310491258387303, time:1750767505.7867193s req_ids:[8] +DEBUG 06-24 20:18:25 [manager.py:391] +ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:209.55276489257812ms total_cost_time:209.5956802368164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9293 prompt_cache_len:5151 prompt_cache_ratio:0.554288173894329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 +DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.1088552474975586 s +INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.11071157455444336 s +DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=277961791235706237973368595058590240302, time:1750767506.0015595s req_ids:[8] +DEBUG 06-24 20:18:26 [manager.py:391] +ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:208.45484733581543ms total_cost_time:208.50014686584473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9294 prompt_cache_len:5151 prompt_cache_ratio:0.5542285345384119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 +DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s +INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.11002850532531738 s +DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=205051634422228306511857245631748981867, time:1750767506.21907s req_ids:[8] +DEBUG 06-24 20:18:26 [manager.py:391] +ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:212.78667449951172ms total_cost_time:212.83292770385742ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9295 prompt_cache_len:5151 prompt_cache_ratio:0.5541689080150619 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 +DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10893726348876953 s +INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.11103367805480957 s +DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=229365423320898490452490995590992021882, time:1750767506.43325s req_ids:[8] +DEBUG 06-24 20:18:26 [manager.py:391] +ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:212.0378017425537ms total_cost_time:212.0823860168457ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9296 prompt_cache_len:5151 prompt_cache_ratio:0.5541092943201377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 +DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10761380195617676 s +INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.10961270332336426 s +DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=234825169486191299430931135766740615847, time:1750767506.6493888s req_ids:[8] +DEBUG 06-24 20:18:26 [manager.py:391] +ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:205.91306686401367ms total_cost_time:205.96718788146973ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:9297 prompt_cache_len:5151 prompt_cache_ratio:0.5540496934494998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 +DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10991501808166504 s +INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.11198067665100098 s +DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=229628546010705264986746719872545878502, time:1750767506.8626723s req_ids:[8] +DEBUG 06-24 20:18:26 [manager.py:391] +DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:445.1429843902588ms total_cost_time:445.1878070831299ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9298 prompt_cache_len:5151 prompt_cache_ratio:0.5539901053990105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 +DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.10818338394165039 s +INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.11002349853515625 s +DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=128692169339911869054071585537008417943, time:1750767507.3132699s req_ids:[8] +DEBUG 06-24 20:18:27 [manager.py:391] +ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:186.33222579956055ms total_cost_time:186.37633323669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9299 prompt_cache_len:5151 prompt_cache_ratio:0.5539305301645339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 +DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s +INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.1097872257232666 s +DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=7770038333936731247008112935368640563, time:1750767507.503761s req_ids:[8] +DEBUG 06-24 20:18:27 [manager.py:391] +ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:206.52461051940918ms total_cost_time:206.58516883850098ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:9300 prompt_cache_len:5151 prompt_cache_ratio:0.5538709677419354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 +DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.10881304740905762 s +INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.1107797622680664 s +DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=277655607905477690693443742949139009905, time:1750767507.7287114s req_ids:[8] +DEBUG 06-24 20:18:27 [manager.py:391] +ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:217.7283763885498ms total_cost_time:217.7729606628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9301 prompt_cache_len:5151 prompt_cache_ratio:0.5538114181270831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 +DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.1086578369140625 s +INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.11062479019165039 s +DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=297475880019424184746866488691393176874, time:1750767507.9440336s req_ids:[8] +DEBUG 06-24 20:18:27 [manager.py:391] +ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:212.8291130065918ms total_cost_time:212.8753662109375ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9302 prompt_cache_len:5151 prompt_cache_ratio:0.5537518813158461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 +DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10807418823242188 s +INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s +DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=241969782247805231732837880334125039754, time:1750767508.1608758s req_ids:[8] +DEBUG 06-24 20:18:28 [manager.py:391] +ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:201.25293731689453ms total_cost_time:201.2951374053955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9303 prompt_cache_len:5151 prompt_cache_ratio:0.5536923573040955 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 +DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10933423042297363 s +INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.11117267608642578 s +DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=246465716219894826919124601409653288353, time:1750767508.365621s req_ids:[8] +DEBUG 06-24 20:18:28 [manager.py:391] +ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:178.969144821167ms total_cost_time:179.01134490966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9304 prompt_cache_len:5151 prompt_cache_ratio:0.5536328460877042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 +DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10780048370361328 s +INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.10975456237792969 s +DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=128021281595461283110850988560047475623, time:1750767508.5656278s req_ids:[8] +DEBUG 06-24 20:18:28 [manager.py:391] +ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:214.80751037597656ms total_cost_time:214.85066413879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9305 prompt_cache_len:5151 prompt_cache_ratio:0.5535733476625471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 +DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10764622688293457 s +INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.10966849327087402 s +DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=202596029832712123338483804742122003053, time:1750767508.7796519s req_ids:[8] +DEBUG 06-24 20:18:28 [manager.py:391] +ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:218.98174285888672ms total_cost_time:219.0268039703369ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9306 prompt_cache_len:5151 prompt_cache_ratio:0.5535138620245004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 +DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10808849334716797 s +INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.10995650291442871 s +DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=300314773720501405388706156056359889001, time:1750767508.998383s req_ids:[8] +DEBUG 06-24 20:18:28 [manager.py:391] +ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:416.99838638305664ms total_cost_time:417.04440116882324ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9307 prompt_cache_len:5151 prompt_cache_ratio:0.5534543891694423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 +DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:29 [manager.py:224] router recive req id 8 cost time 0.10834741592407227 s +INFO 06-24 20:18:29 [manager.py:68] detokenization recv req id 8 cost time 0.11021995544433594 s +DEBUG 06-24 20:18:29 [manager.py:391] Prefill Batch: batch_id=156134443441179910532476980756793851694, time:1750767509.417482s req_ids:[8] +DEBUG 06-24 20:18:29 [manager.py:391] +ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:190.11688232421875ms total_cost_time:190.16265869140625ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9308 prompt_cache_len:5151 prompt_cache_ratio:0.5533949290932532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 +DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:29 [manager.py:224] router recive req id 8 cost time 0.11026859283447266 s +INFO 06-24 20:18:29 [manager.py:68] detokenization recv req id 8 cost time 0.11221957206726074 s +DEBUG 06-24 20:18:29 [manager.py:391] Prefill Batch: batch_id=29741714824452702047626919819519651661, time:1750767509.6132147s req_ids:[8] +DEBUG 06-24 20:18:29 [manager.py:391] +ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:206.4664363861084ms total_cost_time:206.5110206604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9309 prompt_cache_len:5151 prompt_cache_ratio:0.5533354817918144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 +DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:29 [manager.py:224] router recive req id 8 cost time 0.10843014717102051 s +INFO 06-24 20:18:29 [manager.py:68] detokenization recv req id 8 cost time 0.11044096946716309 s +DEBUG 06-24 20:18:29 [manager.py:391] Prefill Batch: batch_id=142904512957857027768425589480482727394, time:1750767509.8271465s req_ids:[8] +DEBUG 06-24 20:18:29 [manager.py:391] +ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:209.41567420959473ms total_cost_time:209.4581127166748ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9310 prompt_cache_len:5151 prompt_cache_ratio:0.5532760472610097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 +DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10794925689697266 s +INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.10990381240844727 s +DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=235328718225795424197232008588189723202, time:1750767510.0445127s req_ids:[8] +DEBUG 06-24 20:18:30 [manager.py:391] +ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:208.88113975524902ms total_cost_time:208.92715454101562ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9311 prompt_cache_len:5151 prompt_cache_ratio:0.5532166254967243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 +DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10751795768737793 s +INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.10961222648620605 s +DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=69298779961062641417040540879669527204, time:1750767510.2584512s req_ids:[8] +DEBUG 06-24 20:18:30 [manager.py:391] +ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:18:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 39027.657 tokens/s +DEBUG 06-24 20:18:30 [stats.py:37] Avg prompt tokens throughput: 39019.258 tokens/s +DEBUG 06-24 20:18:30 [stats.py:37] Avg generate tokens throughput: 8.399 tokens/s +INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:209.37609672546387ms total_cost_time:209.42211151123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9312 prompt_cache_len:5151 prompt_cache_ratio:0.5531572164948454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 +DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10875153541564941 s +INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.11082983016967773 s +DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=170612911993628356115839532835378819843, time:1750767510.4744725s req_ids:[8] +DEBUG 06-24 20:18:30 [manager.py:391] +ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:214.00070190429688ms total_cost_time:214.04671669006348ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9313 prompt_cache_len:5151 prompt_cache_ratio:0.5530978202512616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 +DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.1086275577545166 s +INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.11059856414794922 s +DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=34912333179990918236291747625646930366, time:1750767510.6942954s req_ids:[8] +DEBUG 06-24 20:18:30 [manager.py:391] +ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:210.6003761291504ms total_cost_time:210.64352989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9314 prompt_cache_len:5151 prompt_cache_ratio:0.5530384367618638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 +DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10834860801696777 s +INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.11030817031860352 s +DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=181406102390634474070741000622698709786, time:1750767510.9251325s req_ids:[8] +DEBUG 06-24 20:18:30 [manager.py:391] +ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:442.89708137512207ms total_cost_time:442.94023513793945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9315 prompt_cache_len:5151 prompt_cache_ratio:0.5529790660225443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 +DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.10807108879089355 s +INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10985684394836426 s +DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=137837470209392868632115177107509220677, time:1750767511.3546028s req_ids:[8] +DEBUG 06-24 20:18:31 [manager.py:391] +ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:186.47313117980957ms total_cost_time:186.51318550109863ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:9316 prompt_cache_len:5151 prompt_cache_ratio:0.5529197080291971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 +DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.1076512336730957 s +INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10954117774963379 s +DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=76203475649878561557883432830425158775, time:1750767511.547423s req_ids:[8] +DEBUG 06-24 20:18:31 [manager.py:391] +ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:207.3805332183838ms total_cost_time:207.4265480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9317 prompt_cache_len:5151 prompt_cache_ratio:0.5528603627777181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 +DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.10741329193115234 s +INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10932397842407227 s +DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=40654939884451686189700077947709147658, time:1750767511.7605965s req_ids:[8] +DEBUG 06-24 20:18:31 [manager.py:391] +ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:205.05857467651367ms total_cost_time:205.10268211364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9318 prompt_cache_len:5151 prompt_cache_ratio:0.5528010302640052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 +DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.10809087753295898 s +INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10986971855163574 s +DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=98541880123704918089254753171065239816, time:1750767511.9777577s req_ids:[8] +DEBUG 06-24 20:18:31 [manager.py:391] +ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:213.52076530456543ms total_cost_time:213.56511116027832ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9319 prompt_cache_len:5151 prompt_cache_ratio:0.5527417104839575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 +DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s +INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.10982084274291992 s +DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=76720681156965560673183795948250117124, time:1750767512.1991284s req_ids:[8] +DEBUG 06-24 20:18:32 [manager.py:391] +ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:215.45028686523438ms total_cost_time:215.49510955810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9320 prompt_cache_len:5151 prompt_cache_ratio:0.5526824034334764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 +DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.10825514793395996 s +INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.11030054092407227 s +DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=36275727703857046198552211349742062891, time:1750767512.4153743s req_ids:[8] +DEBUG 06-24 20:18:32 [manager.py:391] +ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:209.02228355407715ms total_cost_time:209.06710624694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9321 prompt_cache_len:5151 prompt_cache_ratio:0.5526231091084648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 +DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.10814690589904785 s +INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.11009573936462402 s +DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=95650636783919024741987046886419605073, time:1750767512.6303039s req_ids:[8] +DEBUG 06-24 20:18:32 [manager.py:391] +ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:209.48004722595215ms total_cost_time:209.53369140625ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:9322 prompt_cache_len:5151 prompt_cache_ratio:0.5525638275048272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 +DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.1079254150390625 s +INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.10995674133300781 s +DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=289532035324924961885950948972866132163, time:1750767512.8454669s req_ids:[8] +DEBUG 06-24 20:18:32 [manager.py:391] +DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:219.01988983154297ms total_cost_time:219.06447410583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9323 prompt_cache_len:5151 prompt_cache_ratio:0.5525045586184705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 +DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.10896658897399902 s +INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.1112065315246582 s +DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=92887215491084387930586888064477053107, time:1750767513.0668216s req_ids:[8] +DEBUG 06-24 20:18:33 [manager.py:391] +ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:401.4897346496582ms total_cost_time:401.5324115753174ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9324 prompt_cache_len:5151 prompt_cache_ratio:0.5524453024453024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 +DEBUG 06-24 20:18:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.11154389381408691 s +INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.11363959312438965 s +DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=63527918430597368618261515189355407709, time:1750767513.474653s req_ids:[8] +DEBUG 06-24 20:18:33 [manager.py:391] +ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 first_token_cost:205.85203170776367ms total_cost_time:205.89733123779297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9325 prompt_cache_len:5151 prompt_cache_ratio:0.5523860589812333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 +DEBUG 06-24 20:18:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.10879158973693848 s +INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.11087441444396973 s +DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=113150326052703750499165396307381437021, time:1750767513.6899714s req_ids:[8] +DEBUG 06-24 20:18:33 [manager.py:391] +ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 first_token_cost:209.76948738098145ms total_cost_time:209.81287956237793ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9326 prompt_cache_len:5151 prompt_cache_ratio:0.5523268282221746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 +DEBUG 06-24 20:18:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.10864043235778809 s +INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.11075663566589355 s +DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=62799057106168825341701852419915411600, time:1750767513.908324s req_ids:[8] +DEBUG 06-24 20:18:33 [manager.py:391] +ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 first_token_cost:209.0778350830078ms total_cost_time:209.13076400756836ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:9327 prompt_cache_len:5151 prompt_cache_ratio:0.5522676101640399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 +DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10821986198425293 s +INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11040449142456055 s +DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=124776404089529678682760472780511614881, time:1750767514.1200025s req_ids:[8] +DEBUG 06-24 20:18:34 [manager.py:391] +ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:208.2815170288086ms total_cost_time:208.3263397216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9328 prompt_cache_len:5151 prompt_cache_ratio:0.5522084048027445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 +DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:34 [batch.py:51] router release req id 8 +INFO 06-24 20:18:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10935163497924805 s +INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.1112971305847168 s +DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=315537113531127486414261624523771889181, time:1750767514.3334146s req_ids:[8] +DEBUG 06-24 20:18:34 [manager.py:391] +ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:212.68773078918457ms total_cost_time:212.73255348205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9329 prompt_cache_len:5151 prompt_cache_ratio:0.5521492121342052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 +DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10846114158630371 s +INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11061692237854004 s +DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=32979992957941090545508609990595984356, time:1750767514.5508199s req_ids:[8] +DEBUG 06-24 20:18:34 [manager.py:391] +ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:207.5488567352295ms total_cost_time:207.59344100952148ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9330 prompt_cache_len:5151 prompt_cache_ratio:0.5520900321543408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 +DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10885047912597656 s +INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11108112335205078 s +DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=170470851419015322578034815119610885777, time:1750767514.772356s req_ids:[8] +DEBUG 06-24 20:18:34 [manager.py:391] +ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:216.66955947875977ms total_cost_time:216.71247482299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9331 prompt_cache_len:5151 prompt_cache_ratio:0.5520308648590719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 +DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10915899276733398 s +INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11110806465148926 s +DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=178736315697040475494833979055037879660, time:1750767514.986667s req_ids:[8] +DEBUG 06-24 20:18:34 [manager.py:391] +ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:209.24687385559082ms total_cost_time:209.2916965484619ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9332 prompt_cache_len:5151 prompt_cache_ratio:0.5519717102443206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 +DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:35 [manager.py:224] router recive req id 8 cost time 0.1079397201538086 s +INFO 06-24 20:18:35 [manager.py:68] detokenization recv req id 8 cost time 0.10989212989807129 s +DEBUG 06-24 20:18:35 [manager.py:391] Prefill Batch: batch_id=247711944520664981764574613839250111909, time:1750767515.2101607s req_ids:[8] +DEBUG 06-24 20:18:35 [manager.py:391] +ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:439.8694038391113ms total_cost_time:439.9127960205078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9333 prompt_cache_len:5151 prompt_cache_ratio:0.5519125683060109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 +DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:35 [manager.py:224] router recive req id 8 cost time 0.10918641090393066 s +INFO 06-24 20:18:35 [manager.py:68] detokenization recv req id 8 cost time 0.11126565933227539 s +DEBUG 06-24 20:18:35 [manager.py:391] Prefill Batch: batch_id=27283157052080378889087002154835988353, time:1750767515.646029s req_ids:[8] +DEBUG 06-24 20:18:35 [manager.py:391] +ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:213.83953094482422ms total_cost_time:213.88578414916992ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9334 prompt_cache_len:5151 prompt_cache_ratio:0.5518534390400686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 +DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:35 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s +INFO 06-24 20:18:35 [manager.py:68] detokenization recv req id 8 cost time 0.11034464836120605 s +DEBUG 06-24 20:18:35 [manager.py:391] Prefill Batch: batch_id=6003502344676777078635133244805083622, time:1750767515.8646538s req_ids:[8] +DEBUG 06-24 20:18:35 [manager.py:391] +ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:199.8591423034668ms total_cost_time:199.90229606628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9335 prompt_cache_len:5151 prompt_cache_ratio:0.551794322442421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 +DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10879635810852051 s +INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.1105797290802002 s +DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=286500980243029802175118015059578604034, time:1750767516.0720313s req_ids:[8] +DEBUG 06-24 20:18:36 [manager.py:391] +ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:207.7338695526123ms total_cost_time:207.75628089904785ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:9336 prompt_cache_len:5151 prompt_cache_ratio:0.5517352185089974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 +DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10849475860595703 s +INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.10988807678222656 s +DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=204267680588585439730087053501275674940, time:1750767516.2864718s req_ids:[8] +DEBUG 06-24 20:18:36 [manager.py:391] +ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:208.96410942077637ms total_cost_time:209.00750160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9337 prompt_cache_len:5151 prompt_cache_ratio:0.5516761272357288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 +DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10759305953979492 s +INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.1095890998840332 s +DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=168800904832590199896499429484171025660, time:1750767516.5007553s req_ids:[8] +DEBUG 06-24 20:18:36 [manager.py:391] +ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:202.87823677062988ms total_cost_time:202.92019844055176ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9338 prompt_cache_len:5151 prompt_cache_ratio:0.5516170486185479 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 +DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10832834243774414 s +INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.11032247543334961 s +DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=42025246364622437227302490396544114000, time:1750767516.709963s req_ids:[8] +DEBUG 06-24 20:18:36 [manager.py:391] +ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:211.8513584136963ms total_cost_time:211.87448501586914ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:9339 prompt_cache_len:5151 prompt_cache_ratio:0.5515579826533891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 +DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.1059417724609375 s +INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.10794925689697266 s +DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=104057965778393181074431870685340784025, time:1750767516.9262657s req_ids:[8] +DEBUG 06-24 20:18:36 [manager.py:391] +ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:206.12359046936035ms total_cost_time:206.16888999938965ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9340 prompt_cache_len:5151 prompt_cache_ratio:0.5514989293361884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 +DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.31062889099121094 s +INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.31249213218688965 s +DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=25499900574824098872545825137498387261, time:1750767517.3445113s req_ids:[8] +DEBUG 06-24 20:18:37 [manager.py:391] +ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:421.9679832458496ms total_cost_time:422.0123291015625ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9341 prompt_cache_len:5151 prompt_cache_ratio:0.5514398886628841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 +DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.10869860649108887 s +INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.11075878143310547 s +DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=166555585389564763292371171382386431482, time:1750767517.5735655s req_ids:[8] +DEBUG 06-24 20:18:37 [manager.py:391] +ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:218.86610984802246ms total_cost_time:218.91093254089355ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9342 prompt_cache_len:5151 prompt_cache_ratio:0.5513808606294155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 +DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.10706019401550293 s +INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.10889029502868652 s +DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=130581220891805976431492587565184113702, time:1750767517.790377s req_ids:[8] +DEBUG 06-24 20:18:37 [manager.py:391] +ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:169.97814178466797ms total_cost_time:170.02272605895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9343 prompt_cache_len:5151 prompt_cache_ratio:0.5513218452317243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 +DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.10769343376159668 s +INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.10934662818908691 s +DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=109506230651132938181981831808426416821, time:1750767517.9632144s req_ids:[8] +DEBUG 06-24 20:18:37 [manager.py:391] +ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:207.94272422790527ms total_cost_time:207.96942710876465ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:9344 prompt_cache_len:5151 prompt_cache_ratio:0.5512628424657534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 +DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10770702362060547 s +INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10967350006103516 s +DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=301399392725949253400427459311522439131, time:1750767518.1788976s req_ids:[8] +DEBUG 06-24 20:18:38 [manager.py:391] +ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:205.74188232421875ms total_cost_time:205.78622817993164ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9345 prompt_cache_len:5151 prompt_cache_ratio:0.5512038523274478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 +DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10753536224365234 s +INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10942363739013672 s +DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=13218318386344334593966450951141386357, time:1750767518.388607s req_ids:[8] +DEBUG 06-24 20:18:38 [manager.py:391] +ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:201.08580589294434ms total_cost_time:201.12872123718262ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9346 prompt_cache_len:5151 prompt_cache_ratio:0.5511448748127541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 +DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10760927200317383 s +INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10945677757263184 s +DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=70530452077290694520202534944311138704, time:1750767518.5950859s req_ids:[8] +DEBUG 06-24 20:18:38 [manager.py:391] +ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:204.32686805725098ms total_cost_time:204.36906814575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9347 prompt_cache_len:5151 prompt_cache_ratio:0.5510859099176206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 +DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10796475410461426 s +INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10994291305541992 s +DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=332207915498795661502667307258914507765, time:1750767518.8071723s req_ids:[8] +DEBUG 06-24 20:18:38 [manager.py:391] +ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:211.93838119506836ms total_cost_time:211.98296546936035ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9348 prompt_cache_len:5151 prompt_cache_ratio:0.5510269576379975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 +DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10814428329467773 s +INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.11002182960510254 s +DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=310147536882246356484416830328790103349, time:1750767519.0245087s req_ids:[8] +DEBUG 06-24 20:18:39 [manager.py:391] +ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:423.3407974243164ms total_cost_time:423.3858585357666ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9349 prompt_cache_len:5151 prompt_cache_ratio:0.5509680179698363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 +DEBUG 06-24 20:18:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10873913764953613 s +INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s +DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=238971824242477823997992666575446067972, time:1750767519.4494917s req_ids:[8] +DEBUG 06-24 20:18:39 [manager.py:391] +ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:204.04553413391113ms total_cost_time:204.09035682678223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9350 prompt_cache_len:5151 prompt_cache_ratio:0.5509090909090909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 +DEBUG 06-24 20:18:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10822677612304688 s +INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.11039257049560547 s +DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=127989588424597249568666812976295338326, time:1750767519.6629457s req_ids:[8] +DEBUG 06-24 20:18:39 [manager.py:391] +ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:211.3635540008545ms total_cost_time:211.4090919494629ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9351 prompt_cache_len:5151 prompt_cache_ratio:0.5508501764517164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 +DEBUG 06-24 20:18:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10748291015625 s +INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.10936212539672852 s +DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=25893295861398403746239887537156817257, time:1750767519.8897254s req_ids:[8] +DEBUG 06-24 20:18:39 [manager.py:391] +ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:217.24390983581543ms total_cost_time:217.2873020172119ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9352 prompt_cache_len:5151 prompt_cache_ratio:0.5507912745936698 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 +DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.10776853561401367 s +INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.10974001884460449 s +DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=308317648888152489951184035022976224901, time:1750767520.1026177s req_ids:[8] +DEBUG 06-24 20:18:40 [manager.py:391] +ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:207.28707313537598ms total_cost_time:207.33189582824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9353 prompt_cache_len:5151 prompt_cache_ratio:0.5507323853309098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 +DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.1092219352722168 s +INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11133742332458496 s +DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=218821704727674153128340409062844407954, time:1750767520.315385s req_ids:[8] +DEBUG 06-24 20:18:40 [manager.py:391] +DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:18:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 39003.346 tokens/s +DEBUG 06-24 20:18:40 [stats.py:37] Avg prompt tokens throughput: 38994.990 tokens/s +DEBUG 06-24 20:18:40 [stats.py:37] Avg generate tokens throughput: 8.356 tokens/s +INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:205.9774398803711ms total_cost_time:206.0232162475586ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9354 prompt_cache_len:5151 prompt_cache_ratio:0.550673508659397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 +DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s +INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11085867881774902 s +DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=226575243793892133848431750106548334429, time:1750767520.5258307s req_ids:[8] +DEBUG 06-24 20:18:40 [manager.py:391] +ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:205.6262493133545ms total_cost_time:205.68513870239258ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9355 prompt_cache_len:5151 prompt_cache_ratio:0.5506146445750936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 +DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.1089935302734375 s +INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11098623275756836 s +DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=66272512420969035470347482591649382126, time:1750767520.7386668s req_ids:[8] +DEBUG 06-24 20:18:40 [manager.py:391] +ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:208.44388008117676ms total_cost_time:208.49108695983887ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:9356 prompt_cache_len:5151 prompt_cache_ratio:0.5505557930739632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 +DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.10799527168273926 s +INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11007523536682129 s +DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=229036434022024464609025832765807767815, time:1750767520.9517496s req_ids:[8] +DEBUG 06-24 20:18:40 [manager.py:391] +ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:207.18979835510254ms total_cost_time:207.23509788513184ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9357 prompt_cache_len:5151 prompt_cache_ratio:0.5504969541519718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 +DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.10941338539123535 s +INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.11125898361206055 s +DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=87114537866521618636260549898888358894, time:1750767521.1657667s req_ids:[8] +DEBUG 06-24 20:18:41 [manager.py:391] +ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:383.85462760925293ms total_cost_time:383.90159606933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9358 prompt_cache_len:5151 prompt_cache_ratio:0.5504381278050866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 +DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.10951876640319824 s +INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.11127638816833496 s +DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=48552633070679790607339979998988219511, time:1750767521.555132s req_ids:[8] +DEBUG 06-24 20:18:41 [manager.py:391] +ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:199.7838020324707ms total_cost_time:199.8128890991211ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:9359 prompt_cache_len:5151 prompt_cache_ratio:0.5503793140292766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 +DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.10812091827392578 s +INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.10992288589477539 s +DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=98609587146583736099450961868090129595, time:1750767521.760574s req_ids:[8] +DEBUG 06-24 20:18:41 [manager.py:391] +ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:213.6392593383789ms total_cost_time:213.6697769165039ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:9360 prompt_cache_len:5151 prompt_cache_ratio:0.5503205128205129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 +DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.11086583137512207 s +INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.11292386054992676 s +DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=181592482738529884555520050847254447139, time:1750767521.977503s req_ids:[8] +DEBUG 06-24 20:18:41 [manager.py:391] +ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:204.84399795532227ms total_cost_time:204.88858222961426ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9361 prompt_cache_len:5151 prompt_cache_ratio:0.5502617241747677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 +DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.1089169979095459 s +INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11077046394348145 s +DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=267151359357687867087498539843923783841, time:1750767522.1893723s req_ids:[8] +DEBUG 06-24 20:18:42 [manager.py:391] +ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:202.68797874450684ms total_cost_time:202.73113250732422ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9362 prompt_cache_len:5151 prompt_cache_ratio:0.5502029480880154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 +DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.10802245140075684 s +INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11003398895263672 s +DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=133598150387002545302050663775136369399, time:1750767522.3971968s req_ids:[8] +DEBUG 06-24 20:18:42 [manager.py:391] +ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:207.2298526763916ms total_cost_time:207.2741985321045ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9363 prompt_cache_len:5151 prompt_cache_ratio:0.5501441845562319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 +DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.1107182502746582 s +INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11272120475769043 s +DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=157302896165237297351661399855202390176, time:1750767522.6169362s req_ids:[8] +DEBUG 06-24 20:18:42 [manager.py:391] +ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:214.5845890045166ms total_cost_time:214.613676071167ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:9364 prompt_cache_len:5151 prompt_cache_ratio:0.5500854335753951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 +DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.1088252067565918 s +INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11083674430847168 s +DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=255563398915611487234220896017189281182, time:1750767522.8328726s req_ids:[8] +DEBUG 06-24 20:18:42 [manager.py:391] +ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:202.47554779052734ms total_cost_time:202.51917839050293ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9365 prompt_cache_len:5151 prompt_cache_ratio:0.5500266951414843 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 +DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.10991597175598145 s +INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.11195826530456543 s +DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=314184928531339062925874346514626080480, time:1750767523.0391216s req_ids:[8] +DEBUG 06-24 20:18:43 [manager.py:391] +ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:206.44807815551758ms total_cost_time:206.49147033691406ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9366 prompt_cache_len:5151 prompt_cache_ratio:0.5499679692504804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 +DEBUG 06-24 20:18:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.30935072898864746 s +INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.3113832473754883 s +DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=139197000991035658328178546052033031064, time:1750767523.4658768s req_ids:[8] +DEBUG 06-24 20:18:43 [manager.py:391] +ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:423.9389896392822ms total_cost_time:423.9802360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9367 prompt_cache_len:5151 prompt_cache_ratio:0.5499092558983666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 +DEBUG 06-24 20:18:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.11211156845092773 s +INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.1140298843383789 s +DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=122832590025042177571353688047423274681, time:1750767523.6851263s req_ids:[8] +DEBUG 06-24 20:18:43 [manager.py:391] +ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:210.6630802154541ms total_cost_time:210.7086181640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9368 prompt_cache_len:5151 prompt_cache_ratio:0.5498505550811272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 +DEBUG 06-24 20:18:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.11084985733032227 s +INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.11280035972595215 s +DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=120220290847923262846426664980744685994, time:1750767523.8983703s req_ids:[8] +DEBUG 06-24 20:18:43 [manager.py:391] +ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:207.32975006103516ms total_cost_time:207.3824405670166ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:9369 prompt_cache_len:5151 prompt_cache_ratio:0.5497918667947487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 +DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.1091146469116211 s +INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.11119389533996582 s +DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=52079594539728887853133797607053525671, time:1750767524.1128175s req_ids:[8] +DEBUG 06-24 20:18:44 [manager.py:391] +ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:208.28843116760254ms total_cost_time:208.33420753479004ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9370 prompt_cache_len:5151 prompt_cache_ratio:0.5497331910352188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 +DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.10828757286071777 s +INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.11014413833618164 s +DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=248199669595846345552003821994408247405, time:1750767524.328374s req_ids:[8] +DEBUG 06-24 20:18:44 [manager.py:391] +ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:169.68417167663574ms total_cost_time:169.7239875793457ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:9371 prompt_cache_len:5151 prompt_cache_ratio:0.5496745277985273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 +DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.10805249214172363 s +INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.10998845100402832 s +DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=225642183276704916980652706082082690436, time:1750767524.502245s req_ids:[8] +DEBUG 06-24 20:18:44 [manager.py:391] +ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:204.4217586517334ms total_cost_time:204.4832706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:9372 prompt_cache_len:5151 prompt_cache_ratio:0.5496158770806658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 +DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.11166977882385254 s +INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.1136016845703125 s +DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=264403272174568280559111571776879344908, time:1750767524.7099493s req_ids:[8] +DEBUG 06-24 20:18:44 [manager.py:391] +ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:198.29106330871582ms total_cost_time:198.3344554901123ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9373 prompt_cache_len:5151 prompt_cache_ratio:0.5495572388776272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 +DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.10821819305419922 s +INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.11019420623779297 s +DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=330136369390958067927249883902542692257, time:1750767524.916127s req_ids:[8] +DEBUG 06-24 20:18:44 [manager.py:391] +ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:201.60770416259766ms total_cost_time:201.65395736694336ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9374 prompt_cache_len:5151 prompt_cache_ratio:0.5494986131854065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 +DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.10972070693969727 s +INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.11091494560241699 s +DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=199307586066732721805485094986834489661, time:1750767525.138953s req_ids:[8] +DEBUG 06-24 20:18:45 [manager.py:391] +ERROR 06-24 20:18:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:417.9110527038574ms total_cost_time:417.9561138153076ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9375 prompt_cache_len:5151 prompt_cache_ratio:0.54944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 +DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.1088111400604248 s +INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.11104559898376465 s +DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=109023194330465550049776881491839779585, time:1750767525.5454657s req_ids:[8] +DEBUG 06-24 20:18:45 [manager.py:391] +ERROR 06-24 20:18:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:205.98125457763672ms total_cost_time:206.024169921875ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9376 prompt_cache_len:5151 prompt_cache_ratio:0.5493813993174061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 +DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.10960984230041504 s +INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.11161208152770996 s +DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=16195195559669248089523741743603130201, time:1750767525.7616897s req_ids:[8] +DEBUG 06-24 20:18:45 [manager.py:391] +ERROR 06-24 20:18:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:207.08703994750977ms total_cost_time:207.13067054748535ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9377 prompt_cache_len:5151 prompt_cache_ratio:0.5493228111336248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 +DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.10966229438781738 s +INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.1115868091583252 s +DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=9615418946363929900161907388398816718, time:1750767525.98121s req_ids:[8] +DEBUG 06-24 20:18:45 [manager.py:391] +ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:215.19827842712402ms total_cost_time:215.2423858642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9378 prompt_cache_len:5151 prompt_cache_ratio:0.5492642354446577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 +DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s +INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.11122393608093262 s +DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=135172372355074243145447348303505900241, time:1750767526.1937318s req_ids:[8] +DEBUG 06-24 20:18:46 [manager.py:391] +DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:207.0639133453369ms total_cost_time:207.1084976196289ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9379 prompt_cache_len:5151 prompt_cache_ratio:0.5492056722465082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 +DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.1081092357635498 s +INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.11007857322692871 s +DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=299605073543821601526506511985845887223, time:1750767526.4068062s req_ids:[8] +DEBUG 06-24 20:18:46 [manager.py:391] +ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:207.7922821044922ms total_cost_time:207.83638954162598ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9380 prompt_cache_len:5151 prompt_cache_ratio:0.5491471215351812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 +DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.10841178894042969 s +INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s +DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=4133840953107508809564462407498961705, time:1750767526.621742s req_ids:[8] +DEBUG 06-24 20:18:46 [manager.py:391] +ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:210.71982383728027ms total_cost_time:210.77322959899902ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:9381 prompt_cache_len:5151 prompt_cache_ratio:0.5490885833066838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 +DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.10928010940551758 s +INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.1114192008972168 s +DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=42426426853873469032012346939218910241, time:1750767526.837774s req_ids:[8] +DEBUG 06-24 20:18:46 [manager.py:391] +ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:209.76805686950684ms total_cost_time:209.81454849243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9382 prompt_cache_len:5151 prompt_cache_ratio:0.549030057557024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 +DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.10871076583862305 s +INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11073851585388184 s +DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=182824948200295845000190611337664749412, time:1750767527.0577874s req_ids:[8] +DEBUG 06-24 20:18:47 [manager.py:391] +ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:213.98019790649414ms total_cost_time:214.02597427368164ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9383 prompt_cache_len:5151 prompt_cache_ratio:0.5489715442822125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 +DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.10923576354980469 s +INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s +DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=151856997356907522226943261919843619495, time:1750767527.2728353s req_ids:[8] +DEBUG 06-24 20:18:47 [manager.py:391] +ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:374.50599670410156ms total_cost_time:374.54867362976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9384 prompt_cache_len:5151 prompt_cache_ratio:0.5489130434782609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 +DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.1082010269165039 s +INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11027312278747559 s +DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=233871465672500407220371451279721491773, time:1750767527.6512487s req_ids:[8] +DEBUG 06-24 20:18:47 [manager.py:391] +ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:205.95979690551758ms total_cost_time:206.00318908691406ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9385 prompt_cache_len:5151 prompt_cache_ratio:0.5488545551411828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 +DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.10913610458374023 s +INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11117696762084961 s +DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=297309026873538633858659643157580554756, time:1750767527.866224s req_ids:[8] +DEBUG 06-24 20:18:47 [manager.py:391] +ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:217.09728240966797ms total_cost_time:217.15736389160156ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:9386 prompt_cache_len:5151 prompt_cache_ratio:0.5487960792669934 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 +DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.11004376411437988 s +INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.1119997501373291 s +DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=162733558480038583898720770607588723769, time:1750767528.0852365s req_ids:[8] +DEBUG 06-24 20:18:48 [manager.py:391] +ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:203.40919494628906ms total_cost_time:203.45306396484375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9387 prompt_cache_len:5151 prompt_cache_ratio:0.5487376158517098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 +DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.1091451644897461 s +INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11127972602844238 s +DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=169623573602419946838825179666390091926, time:1750767528.3049574s req_ids:[8] +DEBUG 06-24 20:18:48 [manager.py:391] +ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:216.15242958068848ms total_cost_time:216.20965003967285ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:9388 prompt_cache_len:5151 prompt_cache_ratio:0.5486791648913507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 +DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:48 [batch.py:51] router release req id 8 +INFO 06-24 20:18:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.109832763671875 s +INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11170387268066406 s +DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=297018891785275048985309785701515093129, time:1750767528.5212207s req_ids:[8] +DEBUG 06-24 20:18:48 [manager.py:391] +ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:203.46713066101074ms total_cost_time:203.51219177246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9389 prompt_cache_len:5151 prompt_cache_ratio:0.5486207263819363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 +DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.10860276222229004 s +INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11062216758728027 s +DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=267181301379852238359380246302893777478, time:1750767528.7373126s req_ids:[8] +DEBUG 06-24 20:18:48 [manager.py:391] +ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:213.06490898132324ms total_cost_time:213.11092376708984ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9390 prompt_cache_len:5151 prompt_cache_ratio:0.5485623003194888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 +DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.1126554012298584 s +INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11465835571289062 s +DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=186548386531207567525495565350509579688, time:1750767528.9518423s req_ids:[8] +DEBUG 06-24 20:18:48 [manager.py:391] +ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:207.42154121398926ms total_cost_time:207.46517181396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9391 prompt_cache_len:5151 prompt_cache_ratio:0.5485038867000319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 +DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.10889291763305664 s +INFO 06-24 20:18:49 [manager.py:68] detokenization recv req id 8 cost time 0.11064529418945312 s +DEBUG 06-24 20:18:49 [manager.py:391] Prefill Batch: batch_id=89390263682661142275801071360181944707, time:1750767529.1626248s req_ids:[8] +DEBUG 06-24 20:18:49 [manager.py:391] +ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:203.28426361083984ms total_cost_time:203.32646369934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9392 prompt_cache_len:5151 prompt_cache_ratio:0.5484454855195912 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 +DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.3094174861907959 s +INFO 06-24 20:18:49 [manager.py:68] detokenization recv req id 8 cost time 0.31140756607055664 s +DEBUG 06-24 20:18:49 [manager.py:391] Prefill Batch: batch_id=297934755376552635271450498841153617337, time:1750767529.5788825s req_ids:[8] +DEBUG 06-24 20:18:49 [manager.py:391] +ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:417.18554496765137ms total_cost_time:417.2508716583252ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:9393 prompt_cache_len:5151 prompt_cache_ratio:0.5483870967741935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 +DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.10480403900146484 s +INFO 06-24 20:18:49 [manager.py:68] detokenization recv req id 8 cost time 0.10668563842773438 s +DEBUG 06-24 20:18:49 [manager.py:391] Prefill Batch: batch_id=132068247613130601702428966680131553729, time:1750767529.795925s req_ids:[8] +DEBUG 06-24 20:18:49 [manager.py:391] +ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:209.02538299560547ms total_cost_time:209.06972885131836ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9394 prompt_cache_len:5151 prompt_cache_ratio:0.548328720459868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 +DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.10793519020080566 s +INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972857475280762 s +DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=316164893744554260328226341423848644545, time:1750767530.009122s req_ids:[8] +DEBUG 06-24 20:18:50 [manager.py:391] +ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:207.74507522583008ms total_cost_time:207.76605606079102ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:9395 prompt_cache_len:5151 prompt_cache_ratio:0.548270356572645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 +DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.11018586158752441 s +INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.11193156242370605 s +DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=315354061486305444181261415085774655567, time:1750767530.2287476s req_ids:[8] +DEBUG 06-24 20:18:50 [manager.py:391] +ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:210.45613288879395ms total_cost_time:210.50024032592773ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9396 prompt_cache_len:5151 prompt_cache_ratio:0.5482120051085568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 +DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.10770535469055176 s +INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.10960245132446289 s +DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=114158765976512418394771001316948076572, time:1750767530.439466s req_ids:[8] +DEBUG 06-24 20:18:50 [manager.py:391] +DEBUG 06-24 20:18:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 40169.092 tokens/s +DEBUG 06-24 20:18:50 [stats.py:37] Avg prompt tokens throughput: 40160.625 tokens/s +DEBUG 06-24 20:18:50 [stats.py:37] Avg generate tokens throughput: 8.467 tokens/s +ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:208.68372917175293ms total_cost_time:208.72807502746582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9397 prompt_cache_len:5151 prompt_cache_ratio:0.5481536660636374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 +DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.10834312438964844 s +INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s +DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=139199871069183012273871281835521743878, time:1750767530.6544921s req_ids:[8] +DEBUG 06-24 20:18:50 [manager.py:391] +ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:209.3522548675537ms total_cost_time:209.39898490905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9398 prompt_cache_len:5151 prompt_cache_ratio:0.5480953394339221 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 +DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.11009550094604492 s +INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.11172819137573242 s +DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=199532604982527366255059577840737705965, time:1750767530.870041s req_ids:[8] +DEBUG 06-24 20:18:50 [manager.py:391] +ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:211.93289756774902ms total_cost_time:211.98725700378418ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:9399 prompt_cache_len:5151 prompt_cache_ratio:0.5480370252154485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 +DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10924887657165527 s +INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.1113734245300293 s +DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=182991710461050797466267311570039038538, time:1750767531.0863528s req_ids:[8] +DEBUG 06-24 20:18:51 [manager.py:391] +ERROR 06-24 20:18:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:206.51841163635254ms total_cost_time:206.57610893249512ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:9400 prompt_cache_len:5151 prompt_cache_ratio:0.5479787234042554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 +DEBUG 06-24 20:18:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10970067977905273 s +INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.11171579360961914 s +DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=339655276680888302437381787282246012443, time:1750767531.2998667s req_ids:[8] +DEBUG 06-24 20:18:51 [manager.py:391] +ERROR 06-24 20:18:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 first_token_cost:394.90699768066406ms total_cost_time:394.95062828063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9401 prompt_cache_len:5151 prompt_cache_ratio:0.5479204339963833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 +DEBUG 06-24 20:18:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10926032066345215 s +INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.11127591133117676 s +DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=262788721655121252534215125111773818001, time:1750767531.6992686s req_ids:[8] +DEBUG 06-24 20:18:51 [manager.py:391] +ERROR 06-24 20:18:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 first_token_cost:206.4039707183838ms total_cost_time:206.44879341125488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9402 prompt_cache_len:5151 prompt_cache_ratio:0.5478621569878749 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 +DEBUG 06-24 20:18:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10871410369873047 s +INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.11070585250854492 s +DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=292959802285941863907899806875229480893, time:1750767531.9140542s req_ids:[8] +DEBUG 06-24 20:18:51 [manager.py:391] +ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 first_token_cost:219.57707405090332ms total_cost_time:219.6202278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9403 prompt_cache_len:5151 prompt_cache_ratio:0.547803892374774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 +DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.10837268829345703 s +INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.11033439636230469 s +DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=187459358324289468524560066457744687584, time:1750767532.1515164s req_ids:[8] +DEBUG 06-24 20:18:52 [manager.py:391] +DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:223.4935760498047ms total_cost_time:223.5565185546875ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:9404 prompt_cache_len:5151 prompt_cache_ratio:0.5477456401531263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 +DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.10851550102233887 s +INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.11053824424743652 s +DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=261923118487710086690324391739921401622, time:1750767532.3681855s req_ids:[8] +DEBUG 06-24 20:18:52 [manager.py:391] +ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:210.22963523864746ms total_cost_time:210.27398109436035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9405 prompt_cache_len:5151 prompt_cache_ratio:0.5476874003189792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 +DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.10806679725646973 s +INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.11008000373840332 s +DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=203780571424773688482413383182967370344, time:1750767532.5842621s req_ids:[8] +DEBUG 06-24 20:18:52 [manager.py:391] +ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:208.38236808776855ms total_cost_time:208.42719078063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9406 prompt_cache_len:5151 prompt_cache_ratio:0.5476291728683819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 +DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.1079559326171875 s +INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.10992765426635742 s +DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=92210435780845335127759300314314792120, time:1750767532.7988975s req_ids:[8] +DEBUG 06-24 20:18:52 [manager.py:391] +ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:207.23271369934082ms total_cost_time:207.2765827178955ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9407 prompt_cache_len:5151 prompt_cache_ratio:0.547570957797385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 +DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.10776758193969727 s +INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.10970354080200195 s +DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=278687665012442664750509981945815305378, time:1750767533.0158496s req_ids:[8] +DEBUG 06-24 20:18:53 [manager.py:391] +ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:216.22300148010254ms total_cost_time:216.26663208007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9408 prompt_cache_len:5151 prompt_cache_ratio:0.5475127551020408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 +INFO 06-24 20:18:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:18:53 [statics_utils.py:24] mean first cost: 228.46019012876977 ms +INFO 06-24 20:18:53 [statics_utils.py:24] mean per token cost: 0.07230114423918113 ms +INFO 06-24 20:18:53 [manager.py:620] left req id 8can release True refcount 3 +DEBUG 06-24 20:18:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.3087015151977539 s +INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.31076693534851074 s +DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=265539280178491077772461977828877762888, time:1750767533.4396644s req_ids:[8] +DEBUG 06-24 20:18:53 [manager.py:391] +ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:420.426607131958ms total_cost_time:420.47119140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9409 prompt_cache_len:5151 prompt_cache_ratio:0.5474545647784037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 +DEBUG 06-24 20:18:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.10936355590820312 s +INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.11156678199768066 s +DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=97403972049179491936060101463389399531, time:1750767533.6609626s req_ids:[8] +DEBUG 06-24 20:18:53 [manager.py:391] +DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:211.20238304138184ms total_cost_time:211.24815940856934ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9410 prompt_cache_len:5151 prompt_cache_ratio:0.5473963868225292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 +DEBUG 06-24 20:18:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.10793066024780273 s +INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.11013913154602051 s +DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=31175632113729125041931272519313904898, time:1750767533.8777382s req_ids:[8] +DEBUG 06-24 20:18:53 [manager.py:391] +ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:213.53840827941895ms total_cost_time:213.60445022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.06604194641113281ms prompt_token_num:9411 prompt_cache_len:5151 prompt_cache_ratio:0.5473382212304749 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 +DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10778617858886719 s +INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.1097252368927002 s +DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=4729224428327037314363497422221951586, time:1750767534.0984704s req_ids:[8] +DEBUG 06-24 20:18:54 [manager.py:391] +ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:207.56125450134277ms total_cost_time:207.60726928710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9412 prompt_cache_len:5151 prompt_cache_ratio:0.5472800679983001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 +DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.1089482307434082 s +INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11090445518493652 s +DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=321831177419449398569006073744884989978, time:1750767534.3093169s req_ids:[8] +DEBUG 06-24 20:18:54 [manager.py:391] +ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:207.1363925933838ms total_cost_time:207.1833610534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9413 prompt_cache_len:5151 prompt_cache_ratio:0.5472219271220652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 +DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10808348655700684 s +INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11010575294494629 s +DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=89893149663159545215300810750263035418, time:1750767534.5232947s req_ids:[8] +DEBUG 06-24 20:18:54 [manager.py:391] +ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:208.73236656188965ms total_cost_time:208.77671241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9414 prompt_cache_len:5151 prompt_cache_ratio:0.547163798597833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 +DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10962390899658203 s +INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11159110069274902 s +DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=54683786452400120704727034693169809602, time:1750767534.7369127s req_ids:[8] +DEBUG 06-24 20:18:54 [manager.py:391] +ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:210.49952507019043ms total_cost_time:210.54434776306152ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9415 prompt_cache_len:5151 prompt_cache_ratio:0.5471056824216676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 +DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s +INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11021161079406738 s +DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=148011747951849578399687801810939653708, time:1750767534.964154s req_ids:[8] +DEBUG 06-24 20:18:54 [manager.py:391] +ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:219.05231475830078ms total_cost_time:219.09523010253906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9416 prompt_cache_len:5151 prompt_cache_ratio:0.5470475785896347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 +DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10786843299865723 s +INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.10985231399536133 s +DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=204865736885565238525231608572197216964, time:1750767535.1773098s req_ids:[8] +DEBUG 06-24 20:18:55 [manager.py:391] +ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:377.5053024291992ms total_cost_time:377.5506019592285ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9417 prompt_cache_len:5151 prompt_cache_ratio:0.5469894870978018 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 +DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:55 [batch.py:51] router release req id 8 +INFO 06-24 20:18:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10969090461730957 s +INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.11163568496704102 s +DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=11092848218925159794959842236554362811, time:1750767535.5578635s req_ids:[8] +DEBUG 06-24 20:18:55 [manager.py:391] +ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:197.92580604553223ms total_cost_time:198.03094863891602ms,out_token_counter:1 mean_per_token_cost_time: 0.10514259338378906ms prompt_token_num:9418 prompt_cache_len:5151 prompt_cache_ratio:0.5469314079422383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 +DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10831356048583984 s +INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.11027789115905762 s +DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=319850299488603346951369610268920219756, time:1750767535.76459s req_ids:[8] +DEBUG 06-24 20:18:55 [manager.py:391] +ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:206.06017112731934ms total_cost_time:206.10618591308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9419 prompt_cache_len:5151 prompt_cache_ratio:0.5468733411190148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 +DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10834050178527832 s +INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.11037397384643555 s +DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=238603301335089879016021315979476664983, time:1750767535.977811s req_ids:[8] +DEBUG 06-24 20:18:55 [manager.py:391] +ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:211.6084098815918ms total_cost_time:211.6537094116211ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9420 prompt_cache_len:5151 prompt_cache_ratio:0.5468152866242039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 +DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10863232612609863 s +INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.11059308052062988 s +DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=53781545484616126902212899771880517662, time:1750767536.19361s req_ids:[8] +DEBUG 06-24 20:18:56 [manager.py:391] +ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:205.86609840393066ms total_cost_time:205.90996742248535ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9421 prompt_cache_len:5151 prompt_cache_ratio:0.5467572444538796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 +DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10859203338623047 s +INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.11063337326049805 s +DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=57756363042208772537728315925561342116, time:1750767536.4048595s req_ids:[8] +DEBUG 06-24 20:18:56 [manager.py:391] +ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:203.31430435180664ms total_cost_time:203.35888862609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9422 prompt_cache_len:5151 prompt_cache_ratio:0.546699214604118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 +DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10839700698852539 s +INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.1109018325805664 s +DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=305158957503334160047298534300741861132, time:1750767536.6167164s req_ids:[8] +DEBUG 06-24 20:18:56 [manager.py:391] +ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:210.76416969299316ms total_cost_time:210.80994606018066ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9423 prompt_cache_len:5151 prompt_cache_ratio:0.5466411970709965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 +DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10758256912231445 s +INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.1102304458618164 s +DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=183581129594587028601362430169446285478, time:1750767536.8312707s req_ids:[8] +DEBUG 06-24 20:18:56 [manager.py:391] +ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:205.8863639831543ms total_cost_time:205.9321403503418ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9424 prompt_cache_len:5151 prompt_cache_ratio:0.5465831918505942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 +DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10754275321960449 s +INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.10944366455078125 s +DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=22903453100214854687007838486022663608, time:1750767537.0434313s req_ids:[8] +DEBUG 06-24 20:18:57 [manager.py:391] +ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:207.73720741271973ms total_cost_time:207.7794075012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9425 prompt_cache_len:5151 prompt_cache_ratio:0.5465251989389921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 +DEBUG 06-24 20:18:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10801863670349121 s +INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.11040210723876953 s +DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=152759777809005182149710808316383880828, time:1750767537.257061s req_ids:[8] +DEBUG 06-24 20:18:57 [manager.py:391] +ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:412.3709201812744ms total_cost_time:412.4152660369873ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9426 prompt_cache_len:5151 prompt_cache_ratio:0.5464672183322724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 +DEBUG 06-24 20:18:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10882282257080078 s +INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.11108613014221191 s +DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=325258290724073984265378628684246156202, time:1750767537.6715803s req_ids:[8] +DEBUG 06-24 20:18:57 [manager.py:391] +ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:202.681303024292ms total_cost_time:202.75163650512695ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:9427 prompt_cache_len:5151 prompt_cache_ratio:0.5464092500265195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 +DEBUG 06-24 20:18:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10939645767211914 s +INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.11139297485351562 s +DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=47817126347833342274045891292977599211, time:1750767537.8843465s req_ids:[8] +DEBUG 06-24 20:18:57 [manager.py:391] +ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:205.57594299316406ms total_cost_time:205.62171936035156ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9428 prompt_cache_len:5151 prompt_cache_ratio:0.5463512940178192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 +DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10871505737304688 s +INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.110504150390625 s +DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=222374452230323870745990206841735553668, time:1750767538.0955043s req_ids:[8] +DEBUG 06-24 20:18:58 [manager.py:391] +ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:205.81841468811035ms total_cost_time:205.86776733398438ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:9429 prompt_cache_len:5151 prompt_cache_ratio:0.546293350302259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 +DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s +INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.11097216606140137 s +DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=153768267260050529550081842154272344564, time:1750767538.3067088s req_ids:[8] +DEBUG 06-24 20:18:58 [manager.py:391] +ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:209.83529090881348ms total_cost_time:209.87868309020996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9430 prompt_cache_len:5151 prompt_cache_ratio:0.5462354188759279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 +DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.1081700325012207 s +INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.11021614074707031 s +DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=303762531824133431773636501771497935694, time:1750767538.5231895s req_ids:[8] +DEBUG 06-24 20:18:58 [manager.py:391] +ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:207.83543586730957ms total_cost_time:207.88073539733887ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9431 prompt_cache_len:5151 prompt_cache_ratio:0.5461774997349168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 +DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10811829566955566 s +INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.1098334789276123 s +DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=65437858423016728761309742618881641393, time:1750767538.7374206s req_ids:[8] +DEBUG 06-24 20:18:58 [manager.py:391] +ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:207.05008506774902ms total_cost_time:207.0937156677246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9432 prompt_cache_len:5151 prompt_cache_ratio:0.5461195928753181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 +DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10936498641967773 s +INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.11126136779785156 s +DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=108333826383628663131756463913541451914, time:1750767538.949054s req_ids:[8] +DEBUG 06-24 20:18:58 [manager.py:391] +ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:209.60068702697754ms total_cost_time:209.64622497558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9433 prompt_cache_len:5151 prompt_cache_ratio:0.5460616982932259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 +DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.10907554626464844 s +INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.1108694076538086 s +DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=314316401261113685739685346255097421033, time:1750767539.1785715s req_ids:[8] +DEBUG 06-24 20:18:59 [manager.py:391] +ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:370.0385093688965ms total_cost_time:370.0826168060303ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9434 prompt_cache_len:5151 prompt_cache_ratio:0.546003815984736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 +DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.10898995399475098 s +INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.11100888252258301 s +DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=268943082935752230655200174301782136504, time:1750767539.5378783s req_ids:[8] +DEBUG 06-24 20:18:59 [manager.py:391] +DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:200.51336288452148ms total_cost_time:200.55747032165527ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9435 prompt_cache_len:5151 prompt_cache_ratio:0.5459459459459459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 +DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.107452392578125 s +INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s +DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=17277010042072423231340309437570342852, time:1750767539.746249s req_ids:[8] +DEBUG 06-24 20:18:59 [manager.py:391] +ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:206.39896392822266ms total_cost_time:206.44283294677734ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9436 prompt_cache_len:5151 prompt_cache_ratio:0.5458880881729546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 +DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:18:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.10831236839294434 s +INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.1102139949798584 s +DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=59702198927824160580232890571734388647, time:1750767539.9583795s req_ids:[8] +DEBUG 06-24 20:18:59 [manager.py:391] +ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:206.91752433776855ms total_cost_time:206.9721221923828ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:9437 prompt_cache_len:5151 prompt_cache_ratio:0.5458302426618629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 +DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10961127281188965 s +INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.11148548126220703 s +DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=242225668455424046068694241133695064255, time:1750767540.1704946s req_ids:[8] +DEBUG 06-24 20:19:00 [manager.py:391] +ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:205.0337791442871ms total_cost_time:205.0797939300537ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9438 prompt_cache_len:5151 prompt_cache_ratio:0.545772409408773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 +DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10842084884643555 s +INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s +DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=263133332819952811469962226474279628575, time:1750767540.383601s req_ids:[8] +DEBUG 06-24 20:19:00 [manager.py:391] +ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:19:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 39441.173 tokens/s +DEBUG 06-24 20:19:00 [stats.py:37] Avg prompt tokens throughput: 39432.700 tokens/s +DEBUG 06-24 20:19:00 [stats.py:37] Avg generate tokens throughput: 8.473 tokens/s +INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:207.89837837219238ms total_cost_time:207.94177055358887ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9439 prompt_cache_len:5151 prompt_cache_ratio:0.5457145884097891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 +DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10970330238342285 s +INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.1118171215057373 s +DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=226329075419561500816913074426180506792, time:1750767540.6078472s req_ids:[8] +DEBUG 06-24 20:19:00 [manager.py:391] +ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:222.59521484375ms total_cost_time:222.63836860656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9440 prompt_cache_len:5151 prompt_cache_ratio:0.545656779661017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 +DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10817837715148926 s +INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.11065411567687988 s +DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=238816902533675331945785924356883807428, time:1750767540.8258314s req_ids:[8] +DEBUG 06-24 20:19:00 [manager.py:391] +ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:209.20634269714355ms total_cost_time:209.25259590148926ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9441 prompt_cache_len:5151 prompt_cache_ratio:0.5455989831585637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 +DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.10836386680603027 s +INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.1103971004486084 s +DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=15733038250147580558669482168204162376, time:1750767541.040767s req_ids:[8] +DEBUG 06-24 20:19:01 [manager.py:391] +ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:207.83638954162598ms total_cost_time:207.88192749023438ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9442 prompt_cache_len:5151 prompt_cache_ratio:0.5455411988985385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 +DEBUG 06-24 20:19:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.3105044364929199 s +INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.3126044273376465 s +DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=271529283589110587012337969316096141285, time:1750767541.4614305s req_ids:[8] +DEBUG 06-24 20:19:01 [manager.py:391] +ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:419.94166374206543ms total_cost_time:419.9855327606201ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9443 prompt_cache_len:5151 prompt_cache_ratio:0.5454834268770518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 +DEBUG 06-24 20:19:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.10896015167236328 s +INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.11089801788330078 s +DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=158195400641797000310195576083438978260, time:1750767541.6831138s req_ids:[8] +DEBUG 06-24 20:19:01 [manager.py:391] +ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:208.571195602417ms total_cost_time:208.61458778381348ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9444 prompt_cache_len:5151 prompt_cache_ratio:0.545425667090216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 +DEBUG 06-24 20:19:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.10894489288330078 s +INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.11076998710632324 s +DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=282688474064382112335567784890489238390, time:1750767541.8951578s req_ids:[8] +DEBUG 06-24 20:19:01 [manager.py:391] +ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:209.63048934936523ms total_cost_time:209.67650413513184ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9445 prompt_cache_len:5151 prompt_cache_ratio:0.5453679195341451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 +DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s +INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.11104369163513184 s +DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=291015430165318849778810117084862349157, time:1750767542.1092677s req_ids:[8] +DEBUG 06-24 20:19:02 [manager.py:391] +ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:206.47096633911133ms total_cost_time:206.5136432647705ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9446 prompt_cache_len:5151 prompt_cache_ratio:0.5453101842049545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 +DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10803437232971191 s +INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.10984230041503906 s +DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=61872181520849338771502150678026515647, time:1750767542.3224409s req_ids:[8] +DEBUG 06-24 20:19:02 [manager.py:391] +ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:208.8174819946289ms total_cost_time:208.8611125946045ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9447 prompt_cache_len:5151 prompt_cache_ratio:0.5452524610987615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 +DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10890746116638184 s +INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.11090087890625 s +DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=116608658700308673101809449414433011588, time:1750767542.5380657s req_ids:[8] +DEBUG 06-24 20:19:02 [manager.py:391] +ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:208.06384086608887ms total_cost_time:208.10770988464355ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9448 prompt_cache_len:5151 prompt_cache_ratio:0.5451947502116851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 +DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.1086874008178711 s +INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.11060333251953125 s +DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=295424685662002956594976529346418163893, time:1750767542.7639275s req_ids:[8] +DEBUG 06-24 20:19:02 [manager.py:391] +ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:223.24848175048828ms total_cost_time:223.29306602478027ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9449 prompt_cache_len:5151 prompt_cache_ratio:0.5451370515398455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 +DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10746431350708008 s +INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.10933303833007812 s +DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=295167941398399488695322548363752242229, time:1750767542.9805148s req_ids:[8] +DEBUG 06-24 20:19:02 [manager.py:391] +ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:208.5418701171875ms total_cost_time:208.5862159729004ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9450 prompt_cache_len:5151 prompt_cache_ratio:0.545079365079365 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 +DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s +INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.11149716377258301 s +DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=4268324458217295308772877589307294260, time:1750767543.1943116s req_ids:[8] +DEBUG 06-24 20:19:03 [manager.py:391] +ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:362.43414878845215ms total_cost_time:362.47873306274414ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9451 prompt_cache_len:5151 prompt_cache_ratio:0.5450216908263675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 +DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.1077430248260498 s +INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.10975003242492676 s +DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=96906300838378948526988093718372400457, time:1750767543.559697s req_ids:[8] +DEBUG 06-24 20:19:03 [manager.py:391] +ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:203.8254737854004ms total_cost_time:203.8705348968506ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9452 prompt_cache_len:5151 prompt_cache_ratio:0.5449640287769785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 +DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.11033368110656738 s +INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.11282134056091309 s +DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=199669966303887786433219487880555653168, time:1750767543.7714417s req_ids:[8] +DEBUG 06-24 20:19:03 [manager.py:391] +ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:205.60431480407715ms total_cost_time:205.64723014831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9453 prompt_cache_len:5151 prompt_cache_ratio:0.5449063789273246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 +DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.10771346092224121 s +INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.1096799373626709 s +DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=247111579861919871318120546939081245571, time:1750767543.982007s req_ids:[8] +DEBUG 06-24 20:19:03 [manager.py:391] +ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:206.03466033935547ms total_cost_time:206.07829093933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9454 prompt_cache_len:5151 prompt_cache_ratio:0.544848741273535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 +DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.1084599494934082 s +INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.11040973663330078 s +DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=273016106680339891853076998086805456151, time:1750767544.1938465s req_ids:[8] +DEBUG 06-24 20:19:04 [manager.py:391] +ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:207.31472969055176ms total_cost_time:207.35931396484375ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9455 prompt_cache_len:5151 prompt_cache_ratio:0.5447911158117398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 +DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.1080021858215332 s +INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.10993194580078125 s +DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=240333772975707828403398244207926738996, time:1750767544.408639s req_ids:[8] +DEBUG 06-24 20:19:04 [manager.py:391] +ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:207.71169662475586ms total_cost_time:207.75580406188965ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9456 prompt_cache_len:5151 prompt_cache_ratio:0.5447335025380711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 +DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.10868000984191895 s +INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.11065006256103516 s +DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=209200369564992502423188406848762028987, time:1750767544.6251745s req_ids:[8] +DEBUG 06-24 20:19:04 [manager.py:391] +ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:210.2367877960205ms total_cost_time:210.2828025817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9457 prompt_cache_len:5151 prompt_cache_ratio:0.5446759014486624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 +DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.10830855369567871 s +INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.11050295829772949 s +DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=324149106799743092640747740386183345474, time:1750767544.8375664s req_ids:[8] +DEBUG 06-24 20:19:04 [manager.py:391] +ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:209.72561836242676ms total_cost_time:209.76996421813965ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9458 prompt_cache_len:5151 prompt_cache_ratio:0.544618312539649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 +DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.30976319313049316 s +INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.3116185665130615 s +DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=41503862466856879213908494385940195898, time:1750767545.2591815s req_ids:[8] +DEBUG 06-24 20:19:05 [manager.py:391] +ERROR 06-24 20:19:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:416.28026962280273ms total_cost_time:416.3224697113037ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9459 prompt_cache_len:5151 prompt_cache_ratio:0.5445607358071678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 +DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.10837793350219727 s +INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.11024618148803711 s +DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=131699589002594736062749373963028149548, time:1750767545.476052s req_ids:[8] +DEBUG 06-24 20:19:05 [manager.py:391] +ERROR 06-24 20:19:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 first_token_cost:209.37132835388184ms total_cost_time:209.41686630249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9460 prompt_cache_len:5151 prompt_cache_ratio:0.5445031712473573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 +DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.10815834999084473 s +INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.11012411117553711 s +DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=264795559358717156597182826990643116323, time:1750767545.69836s req_ids:[8] +DEBUG 06-24 20:19:05 [manager.py:391] +ERROR 06-24 20:19:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 first_token_cost:218.7039852142334ms total_cost_time:218.7492847442627ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9461 prompt_cache_len:5151 prompt_cache_ratio:0.5444456188563577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 +DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.10855913162231445 s +INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.11064863204956055 s +DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=203902309525946269025761712424853015704, time:1750767545.9189591s req_ids:[8] +DEBUG 06-24 20:19:05 [manager.py:391] +ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 first_token_cost:215.40331840515137ms total_cost_time:215.44623374938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9462 prompt_cache_len:5151 prompt_cache_ratio:0.5443880786303107 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 +DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10906291007995605 s +INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.11094069480895996 s +INFO 06-24 20:19:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=322587704771296475903020819518451033441, time:1750767546.1347663s req_ids:[8] +DEBUG 06-24 20:19:06 [manager.py:391] +ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:209.36250686645508ms total_cost_time:209.40566062927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9463 prompt_cache_len:5151 prompt_cache_ratio:0.5443305505653598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 +DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10847878456115723 s +INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.11031651496887207 s +DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=294645377803875822034081822554445740978, time:1750767546.3500075s req_ids:[8] +DEBUG 06-24 20:19:06 [manager.py:391] +ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:209.26928520202637ms total_cost_time:209.31506156921387ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9464 prompt_cache_len:5151 prompt_cache_ratio:0.5442730346576501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 +DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10892868041992188 s +INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075615882873535 s +DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=295782618736663910957284834744563470319, time:1750767546.5653384s req_ids:[8] +DEBUG 06-24 20:19:06 [manager.py:391] +ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:205.04474639892578ms total_cost_time:205.08885383605957ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9465 prompt_cache_len:5151 prompt_cache_ratio:0.5442155309033281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 +DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.1080169677734375 s +INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.10976362228393555 s +DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=56991221673068594709085878753444164988, time:1750767546.7745323s req_ids:[8] +DEBUG 06-24 20:19:06 [manager.py:391] +DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:205.90496063232422ms total_cost_time:205.9471607208252ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9466 prompt_cache_len:5151 prompt_cache_ratio:0.5441580392985421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 +DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10750150680541992 s +INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.1093897819519043 s +DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=267574377036043559422860292007781721319, time:1750767546.9868793s req_ids:[8] +DEBUG 06-24 20:19:06 [manager.py:391] +ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:403.3827781677246ms total_cost_time:403.4271240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9467 prompt_cache_len:5151 prompt_cache_ratio:0.5441005598394423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 +DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:07 [manager.py:224] router recive req id 8 cost time 0.10743188858032227 s +INFO 06-24 20:19:07 [manager.py:68] detokenization recv req id 8 cost time 0.10907721519470215 s +DEBUG 06-24 20:19:07 [manager.py:391] Prefill Batch: batch_id=225326639399567361155247027550066138265, time:1750767547.3933082s req_ids:[8] +DEBUG 06-24 20:19:07 [manager.py:391] +ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:189.45550918579102ms total_cost_time:189.5136833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:9468 prompt_cache_len:5151 prompt_cache_ratio:0.54404309252218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 +DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:07 [manager.py:224] router recive req id 8 cost time 0.10745549201965332 s +INFO 06-24 20:19:07 [manager.py:68] detokenization recv req id 8 cost time 0.10935425758361816 s +DEBUG 06-24 20:19:07 [manager.py:391] Prefill Batch: batch_id=56186086545072274754574405586959243690, time:1750767547.589018s req_ids:[8] +DEBUG 06-24 20:19:07 [manager.py:391] +ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:205.07359504699707ms total_cost_time:205.12843132019043ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:9469 prompt_cache_len:5151 prompt_cache_ratio:0.5439856373429084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 +DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:07 [manager.py:224] router recive req id 8 cost time 0.10938835144042969 s +INFO 06-24 20:19:07 [manager.py:68] detokenization recv req id 8 cost time 0.11147403717041016 s +DEBUG 06-24 20:19:07 [manager.py:391] Prefill Batch: batch_id=278393525547507602372702473068399333662, time:1750767547.8034744s req_ids:[8] +DEBUG 06-24 20:19:07 [manager.py:391] +ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:209.82766151428223ms total_cost_time:209.87176895141602ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9470 prompt_cache_len:5151 prompt_cache_ratio:0.5439281942977825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 +DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.11033773422241211 s +INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.11284065246582031 s +DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=30861567242428310679956261929504483368, time:1750767548.0184994s req_ids:[8] +DEBUG 06-24 20:19:08 [manager.py:391] +ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:208.72902870178223ms total_cost_time:208.77432823181152ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9471 prompt_cache_len:5151 prompt_cache_ratio:0.5438707633829585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 +DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.10844087600708008 s +INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.1104891300201416 s +DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=213848588314479385216982733295015197075, time:1750767548.2323484s req_ids:[8] +DEBUG 06-24 20:19:08 [manager.py:391] +ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:206.9847583770752ms total_cost_time:207.02815055847168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9472 prompt_cache_len:5151 prompt_cache_ratio:0.5438133445945946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 +DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.1094660758972168 s +INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.11162710189819336 s +DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=226394144107209850889417979980786010794, time:1750767548.4448607s req_ids:[8] +DEBUG 06-24 20:19:08 [manager.py:391] +ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:206.15434646606445ms total_cost_time:206.20012283325195ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9473 prompt_cache_len:5151 prompt_cache_ratio:0.5437559379288505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 +DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.10860443115234375 s +INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.1106109619140625 s +DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=134474692013101829275825186048702199760, time:1750767548.6643627s req_ids:[8] +DEBUG 06-24 20:19:08 [manager.py:391] +ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:216.98999404907227ms total_cost_time:217.0417308807373ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:9474 prompt_cache_len:5151 prompt_cache_ratio:0.5436985433818873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 +DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.10820698738098145 s +INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.11027240753173828 s +DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=83528304455845781622705596875364155863, time:1750767548.8934457s req_ids:[8] +DEBUG 06-24 20:19:08 [manager.py:391] +ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:222.62978553771973ms total_cost_time:222.6734161376953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9475 prompt_cache_len:5151 prompt_cache_ratio:0.543641160949868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 +DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10805892944335938 s +INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.11013460159301758 s +DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=238106543101396802876624115340730734705, time:1750767549.1095243s req_ids:[8] +DEBUG 06-24 20:19:09 [manager.py:391] +ERROR 06-24 20:19:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:372.93124198913574ms total_cost_time:372.9748725891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9476 prompt_cache_len:5151 prompt_cache_ratio:0.5435837906289573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 +DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s +INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.10981011390686035 s +DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=282077581832763734508013699435374229069, time:1750767549.4863234s req_ids:[8] +DEBUG 06-24 20:19:09 [manager.py:391] +ERROR 06-24 20:19:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 first_token_cost:205.15179634094238ms total_cost_time:205.19614219665527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9477 prompt_cache_len:5151 prompt_cache_ratio:0.5435264324153213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 +DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:09 [batch.py:51] router release req id 8 +INFO 06-24 20:19:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10847854614257812 s +INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.11092901229858398 s +DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=85327576226851586285838097394048263372, time:1750767549.699758s req_ids:[8] +DEBUG 06-24 20:19:09 [manager.py:391] +ERROR 06-24 20:19:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 first_token_cost:207.9770565032959ms total_cost_time:208.03380012512207ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:9478 prompt_cache_len:5151 prompt_cache_ratio:0.5434690863051277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 +DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10799145698547363 s +INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.11026763916015625 s +DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=247560034264618077555436327086463475058, time:1750767549.9136674s req_ids:[8] +DEBUG 06-24 20:19:09 [manager.py:391] +ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 first_token_cost:206.37845993041992ms total_cost_time:206.42995834350586ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:9479 prompt_cache_len:5151 prompt_cache_ratio:0.5434117522945459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 +DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.11108636856079102 s +INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.11324763298034668 s +DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=180825912745911338013169056130461679319, time:1750767550.1280706s req_ids:[8] +DEBUG 06-24 20:19:10 [manager.py:391] +ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:205.9471607208252ms total_cost_time:205.9915065765381ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9480 prompt_cache_len:5151 prompt_cache_ratio:0.5433544303797468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 +DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.10816287994384766 s +INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.11032557487487793 s +DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=101835608740629359015223320197928803454, time:1750767550.3416312s req_ids:[8] +DEBUG 06-24 20:19:10 [manager.py:391] +ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:209.42974090576172ms total_cost_time:209.4733715057373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9481 prompt_cache_len:5151 prompt_cache_ratio:0.5432971205569033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 +DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.10552549362182617 s +INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.10720968246459961 s +DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=228273762864539407426857273064807498466, time:1750767550.5549245s req_ids:[8] +DEBUG 06-24 20:19:10 [manager.py:391] +DEBUG 06-24 20:19:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 40354.344 tokens/s +DEBUG 06-24 20:19:10 [stats.py:37] Avg prompt tokens throughput: 40345.914 tokens/s +DEBUG 06-24 20:19:10 [stats.py:37] Avg generate tokens throughput: 8.430 tokens/s +ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:169.13342475891113ms total_cost_time:169.1567897796631ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:9482 prompt_cache_len:5151 prompt_cache_ratio:0.5432398228221894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 +DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.1047368049621582 s +INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.10667061805725098 s +DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=288252154001088804211405016006749618686, time:1750767550.7278805s req_ids:[8] +DEBUG 06-24 20:19:10 [manager.py:391] +ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:167.92869567871094ms total_cost_time:167.95921325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:9483 prompt_cache_len:5151 prompt_cache_ratio:0.5431825371717811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 +DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.1066136360168457 s +INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.10862851142883301 s +DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=247737292023069941320950625484644864714, time:1750767550.9017634s req_ids:[8] +DEBUG 06-24 20:19:10 [manager.py:391] +ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:367.02537536621094ms total_cost_time:367.07162857055664ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9484 prompt_cache_len:5151 prompt_cache_ratio:0.5431252636018558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 +DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s +INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11083602905273438 s +DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=44098795136216788450014540460655884495, time:1750767551.2704563s req_ids:[8] +DEBUG 06-24 20:19:11 [manager.py:391] +ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:205.52873611450195ms total_cost_time:205.57308197021484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9485 prompt_cache_len:5151 prompt_cache_ratio:0.5430680021085925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 +DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.1091923713684082 s +INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11120176315307617 s +DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=177442249279889811091579178205460721184, time:1750767551.4848654s req_ids:[8] +DEBUG 06-24 20:19:11 [manager.py:391] +ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:212.0215892791748ms total_cost_time:212.083101272583ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:9486 prompt_cache_len:5151 prompt_cache_ratio:0.543010752688172 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 +DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.10817384719848633 s +INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029434204101562 s +DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=336038313640778110676563599774092670938, time:1750767551.701238s req_ids:[8] +DEBUG 06-24 20:19:11 [manager.py:391] +ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:209.55443382263184ms total_cost_time:209.59925651550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9487 prompt_cache_len:5151 prompt_cache_ratio:0.5429535153367766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 +DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.10802698135375977 s +INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11002445220947266 s +DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=303804628563870263405600602250252311117, time:1750767551.9175334s req_ids:[8] +DEBUG 06-24 20:19:11 [manager.py:391] +ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:211.20429039001465ms total_cost_time:211.24839782714844ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9488 prompt_cache_len:5151 prompt_cache_ratio:0.5428962900505903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 +DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.10898590087890625 s +INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11110591888427734 s +DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=132238584770086110177881922986494741513, time:1750767552.1332054s req_ids:[8] +DEBUG 06-24 20:19:12 [manager.py:391] +ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:203.43351364135742ms total_cost_time:203.4759521484375ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9489 prompt_cache_len:5151 prompt_cache_ratio:0.5428390768257982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 +DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.10841846466064453 s +INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11053037643432617 s +DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=315894240340197927946327027547828814518, time:1750767552.3441603s req_ids:[8] +DEBUG 06-24 20:19:12 [manager.py:391] +ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:203.17721366882324ms total_cost_time:203.22132110595703ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9490 prompt_cache_len:5151 prompt_cache_ratio:0.542781875658588 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 +DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.10885214805603027 s +INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11083221435546875 s +DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=204405236055117453385275209946975602480, time:1750767552.5552635s req_ids:[8] +DEBUG 06-24 20:19:12 [manager.py:391] +DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:212.15200424194336ms total_cost_time:212.19587326049805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9491 prompt_cache_len:5151 prompt_cache_ratio:0.542724686545148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 +DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.1088871955871582 s +INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11100602149963379 s +DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=34227615085832645541041483757012205983, time:1750767552.7697768s req_ids:[8] +DEBUG 06-24 20:19:12 [manager.py:391] +ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:208.98699760437012ms total_cost_time:209.0299129486084ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9492 prompt_cache_len:5151 prompt_cache_ratio:0.5426675094816688 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 +DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:13 [batch.py:51] router release req id 8 +INFO 06-24 20:19:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.31023097038269043 s +INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.3125762939453125 s +DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=97002728500330342113595149147540985558, time:1750767553.1910343s req_ids:[8] +DEBUG 06-24 20:19:13 [manager.py:391] +ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:422.1360683441162ms total_cost_time:422.1808910369873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9493 prompt_cache_len:5151 prompt_cache_ratio:0.5426103444643422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 +DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.10722112655639648 s +INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.10917949676513672 s +DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=225222273515392958428850864945066509604, time:1750767553.414655s req_ids:[8] +DEBUG 06-24 20:19:13 [manager.py:391] +ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:215.1482105255127ms total_cost_time:215.19231796264648ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9494 prompt_cache_len:5151 prompt_cache_ratio:0.5425531914893617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 +DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.10870981216430664 s +INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.1108095645904541 s +DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=296007100506641432479422898517724582854, time:1750767553.6320462s req_ids:[8] +DEBUG 06-24 20:19:13 [manager.py:391] +ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:213.63091468811035ms total_cost_time:213.67740631103516ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9495 prompt_cache_len:5151 prompt_cache_ratio:0.5424960505529226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 +DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.11020994186401367 s +INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.11221861839294434 s +DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=146773525895336688697443739669053541394, time:1750767553.850315s req_ids:[8] +DEBUG 06-24 20:19:13 [manager.py:391] +ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:209.2602252960205ms total_cost_time:209.3203067779541ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:9496 prompt_cache_len:5151 prompt_cache_ratio:0.5424389216512215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 +DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.1103522777557373 s +INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11238718032836914 s +DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=214805554404507599878926251264682041329, time:1750767554.0687454s req_ids:[8] +DEBUG 06-24 20:19:14 [manager.py:391] +DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:210.44301986694336ms total_cost_time:210.48617362976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9497 prompt_cache_len:5151 prompt_cache_ratio:0.5423818047804569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 +DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10722899436950684 s +INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.10880637168884277 s +DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=318326850601847422570142924270443736419, time:1750767554.287194s req_ids:[8] +DEBUG 06-24 20:19:14 [manager.py:391] +ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:201.8606662750244ms total_cost_time:201.9050121307373ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9498 prompt_cache_len:5151 prompt_cache_ratio:0.5423246999368289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 +DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10889935493469238 s +INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11104607582092285 s +DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=183688773839862824106086197837875434139, time:1750767554.4893334s req_ids:[8] +DEBUG 06-24 20:19:14 [manager.py:391] +ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:203.58586311340332ms total_cost_time:203.63259315490723ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9499 prompt_cache_len:5151 prompt_cache_ratio:0.5422676071165385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 +DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10922122001647949 s +INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11057519912719727 s +DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=310808304937008975223275583377398731745, time:1750767554.7005208s req_ids:[8] +DEBUG 06-24 20:19:14 [manager.py:391] +ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:202.99887657165527ms total_cost_time:203.04274559020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9500 prompt_cache_len:5151 prompt_cache_ratio:0.5422105263157895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 +DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10844230651855469 s +INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11062192916870117 s +DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=239244549908637628386442658515458888733, time:1750767554.911279s req_ids:[8] +DEBUG 06-24 20:19:14 [manager.py:391] +ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:410.0067615509033ms total_cost_time:410.0496768951416ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9501 prompt_cache_len:5151 prompt_cache_ratio:0.5421534575307863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 +DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.10775256156921387 s +INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.10965538024902344 s +DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=191338304622728367090180797029487094315, time:1750767555.324572s req_ids:[8] +DEBUG 06-24 20:19:15 [manager.py:391] +ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:205.57689666748047ms total_cost_time:205.62076568603516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9502 prompt_cache_len:5151 prompt_cache_ratio:0.5420964007577352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 +DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.1077115535736084 s +INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.10961246490478516 s +DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=218936684090068143895796848420811936729, time:1750767555.542097s req_ids:[8] +DEBUG 06-24 20:19:15 [manager.py:391] +ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:213.39035034179688ms total_cost_time:213.43302726745605ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9503 prompt_cache_len:5151 prompt_cache_ratio:0.5420393559928444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 +DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.10808229446411133 s +INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.11019086837768555 s +DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=142486151678572551032481283114735252395, time:1750767555.7547362s req_ids:[8] +DEBUG 06-24 20:19:15 [manager.py:391] +ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:206.71415328979492ms total_cost_time:206.7577838897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9504 prompt_cache_len:5151 prompt_cache_ratio:0.5419823232323232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 +DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.10767674446105957 s +INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098172664642334 s +DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=288090420595560667864837319391484491471, time:1750767555.9673603s req_ids:[8] +DEBUG 06-24 20:19:15 [manager.py:391] +ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:213.40465545654297ms total_cost_time:213.44971656799316ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9505 prompt_cache_len:5151 prompt_cache_ratio:0.5419253024723829 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 +DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:16 [manager.py:224] router recive req id 8 cost time 0.10886001586914062 s +INFO 06-24 20:19:16 [manager.py:68] detokenization recv req id 8 cost time 0.11083221435546875 s +DEBUG 06-24 20:19:16 [manager.py:391] Prefill Batch: batch_id=207724348780860885516595774927431818758, time:1750767556.1854522s req_ids:[8] +DEBUG 06-24 20:19:16 [manager.py:391] +ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:205.2781581878662ms total_cost_time:205.322265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9506 prompt_cache_len:5151 prompt_cache_ratio:0.5418682937092363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 +DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:16 [manager.py:224] router recive req id 8 cost time 0.1080629825592041 s +INFO 06-24 20:19:16 [manager.py:68] detokenization recv req id 8 cost time 0.11017942428588867 s +DEBUG 06-24 20:19:16 [manager.py:391] Prefill Batch: batch_id=130734616665938504041972883646328416702, time:1750767556.3937082s req_ids:[8] +DEBUG 06-24 20:19:16 [manager.py:391] +ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:198.4117031097412ms total_cost_time:198.4546184539795ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9507 prompt_cache_len:5151 prompt_cache_ratio:0.5418112969390975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 +DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:16 [manager.py:224] router recive req id 8 cost time 0.10869455337524414 s +INFO 06-24 20:19:16 [manager.py:68] detokenization recv req id 8 cost time 0.11001014709472656 s +DEBUG 06-24 20:19:16 [manager.py:391] Prefill Batch: batch_id=13971209076046169750798382194947000057, time:1750767556.607396s req_ids:[8] +DEBUG 06-24 20:19:16 [manager.py:391] +ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:220.4296588897705ms total_cost_time:220.475435256958ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9508 prompt_cache_len:5151 prompt_cache_ratio:0.5417543121581826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 +DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:16 [batch.py:51] router release req id 8 +INFO 06-24 20:19:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.3106698989868164 s +INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.31276607513427734 s +DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=55453135206861195895021387691924791469, time:1750767557.0442412s req_ids:[8] +DEBUG 06-24 20:19:17 [manager.py:391] +ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:430.9651851654053ms total_cost_time:431.01000785827637ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9509 prompt_cache_len:5151 prompt_cache_ratio:0.541697339362709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 +DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10848641395568848 s +INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.10988187789916992 s +DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=199971673062834587022588979287134911493, time:1750767557.2681487s req_ids:[8] +DEBUG 06-24 20:19:17 [manager.py:391] +ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:209.84721183776855ms total_cost_time:209.89298820495605ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9510 prompt_cache_len:5151 prompt_cache_ratio:0.5416403785488959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 +DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10822296142578125 s +INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.11002039909362793 s +DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=71989213771161549090560814828924794690, time:1750767557.4869645s req_ids:[8] +DEBUG 06-24 20:19:17 [manager.py:391] +ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:210.77895164489746ms total_cost_time:210.82329750061035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9511 prompt_cache_len:5151 prompt_cache_ratio:0.5415834297129639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 +DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10845375061035156 s +INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.1102607250213623 s +DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=113755833423514972031954830890727553652, time:1750767557.697606s req_ids:[8] +DEBUG 06-24 20:19:17 [manager.py:391] +ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:167.23871231079102ms total_cost_time:167.2821044921875ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9512 prompt_cache_len:5151 prompt_cache_ratio:0.5415264928511354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 +DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10773134231567383 s +INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.10973024368286133 s +DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=52209263675876751794618345501765567275, time:1750767557.8718507s req_ids:[8] +DEBUG 06-24 20:19:17 [manager.py:391] +ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:200.4692554473877ms total_cost_time:200.51336288452148ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9513 prompt_cache_len:5151 prompt_cache_ratio:0.5414695679596342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 +DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10806655883789062 s +INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.10998988151550293 s +DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=224784590751389838433745243158358563839, time:1750767558.0914156s req_ids:[8] +DEBUG 06-24 20:19:18 [manager.py:391] +ERROR 06-24 20:19:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:221.65250778198242ms total_cost_time:221.69828414916992ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9514 prompt_cache_len:5151 prompt_cache_ratio:0.5414126550346857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 +DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10782504081726074 s +INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.10928106307983398 s +DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=17292155745083185855352960589703203170, time:1750767558.306342s req_ids:[8] +DEBUG 06-24 20:19:18 [manager.py:391] +ERROR 06-24 20:19:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 first_token_cost:209.92112159729004ms total_cost_time:209.96761322021484ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9515 prompt_cache_len:5151 prompt_cache_ratio:0.5413557540725171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 +DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:18 [batch.py:51] router release req id 8 +DEBUG 06-24 20:19:18 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:18 [manager.py:283] +DEBUG 06-24 20:19:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:18 [manager.py:284] +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10889410972595215 s +INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.11099004745483398 s +DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=63366768640992635962556872624868741592, time:1750767558.5225437s req_ids:[8] +DEBUG 06-24 20:19:18 [manager.py:391] +ERROR 06-24 20:19:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 first_token_cost:212.5074863433838ms total_cost_time:212.55207061767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9516 prompt_cache_len:5151 prompt_cache_ratio:0.5412988650693569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 +DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s +INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.11055278778076172 s +DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=194052217673907494389417897687899324300, time:1750767558.738362s req_ids:[8] +DEBUG 06-24 20:19:18 [manager.py:391] +ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 first_token_cost:394.6666717529297ms total_cost_time:394.7134017944336ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9517 prompt_cache_len:5151 prompt_cache_ratio:0.5412419880214353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 +DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.11365795135498047 s +DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=28059836524251615797012362759534471435, time:1750767559.137432s req_ids:[8] +DEBUG 06-24 20:19:19 [manager.py:391] +INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.1158287525177002 s +ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:205.28316497802734ms total_cost_time:205.40165901184082ms,out_token_counter:1 mean_per_token_cost_time: 0.11849403381347656ms prompt_token_num:9518 prompt_cache_len:5151 prompt_cache_ratio:0.5411851229249842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 +DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10947799682617188 s +INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.11166143417358398 s +DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=11790123669005909731693445998418181061, time:1750767559.3500266s req_ids:[8] +DEBUG 06-24 20:19:19 [manager.py:391] +ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:206.0532569885254ms total_cost_time:206.09760284423828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9519 prompt_cache_len:5151 prompt_cache_ratio:0.541128269776237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 +DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10766863822937012 s +INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.10904765129089355 s +DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=115384545291841855096144685263377372201, time:1750767559.5649986s req_ids:[8] +DEBUG 06-24 20:19:19 [manager.py:391] +ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:209.28192138671875ms total_cost_time:209.32793617248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9520 prompt_cache_len:5151 prompt_cache_ratio:0.5410714285714285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 +DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10901808738708496 s +INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.11021900177001953 s +DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=9476506861008827652652614131979112897, time:1750767559.7779527s req_ids:[8] +DEBUG 06-24 20:19:19 [manager.py:391] +ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:205.95121383666992ms total_cost_time:205.99603652954102ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9521 prompt_cache_len:5151 prompt_cache_ratio:0.5410145993067955 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 +DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10802936553955078 s +INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.10995340347290039 s +DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=252137954223621262321451064199893388429, time:1750767559.9908183s req_ids:[8] +DEBUG 06-24 20:19:19 [manager.py:391] +ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:208.57834815979004ms total_cost_time:208.62388610839844ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9522 prompt_cache_len:5151 prompt_cache_ratio:0.5409577819785759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 +DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10810279846191406 s +INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.11028480529785156 s +DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=130160574120391630341066702531193944386, time:1750767560.2063177s req_ids:[8] +DEBUG 06-24 20:19:20 [manager.py:391] +ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:211.0464572906494ms total_cost_time:211.0910415649414ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9523 prompt_cache_len:5151 prompt_cache_ratio:0.5409009765830095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 +DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10767173767089844 s +INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.1099088191986084 s +DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=100423744268563885973072802906235578345, time:1750767560.4226995s req_ids:[8] +DEBUG 06-24 20:19:20 [manager.py:391] +ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:210.3407382965088ms total_cost_time:210.3862762451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9524 prompt_cache_len:5151 prompt_cache_ratio:0.5408441831163376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 +DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10889577865600586 s +INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.11086201667785645 s +DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=24865963991163631886412013540410867617, time:1750767560.6381278s req_ids:[8] +DEBUG 06-24 20:19:20 [manager.py:391] +DEBUG 06-24 20:19:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 40537.004 tokens/s +DEBUG 06-24 20:19:20 [stats.py:37] Avg prompt tokens throughput: 40528.475 tokens/s +DEBUG 06-24 20:19:20 [stats.py:37] Avg generate tokens throughput: 8.529 tokens/s +ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:208.50515365600586ms total_cost_time:208.54949951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9525 prompt_cache_len:5151 prompt_cache_ratio:0.5407874015748031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 +DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10859537124633789 s +INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.11077427864074707 s +DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=136914751062597012646751259227228527018, time:1750767560.8526795s req_ids:[8] +DEBUG 06-24 20:19:20 [manager.py:391] +ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:368.2398796081543ms total_cost_time:368.2827949523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9526 prompt_cache_len:5151 prompt_cache_ratio:0.5407306319546504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 +DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.10880064964294434 s +INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.1109018325805664 s +DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=80738908151693307333219153041502589941, time:1750767561.2244606s req_ids:[8] +DEBUG 06-24 20:19:21 [manager.py:391] +ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:206.30645751953125ms total_cost_time:206.34961128234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9527 prompt_cache_len:5151 prompt_cache_ratio:0.5406738742521255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 +DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.10936832427978516 s +INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.11132526397705078 s +DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=181606426344102808309183871147793687985, time:1750767561.4390697s req_ids:[8] +DEBUG 06-24 20:19:21 [manager.py:391] +DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:208.1446647644043ms total_cost_time:208.1918716430664ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:9528 prompt_cache_len:5151 prompt_cache_ratio:0.5406171284634761 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 +DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.10818052291870117 s +INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.11021733283996582 s +DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=160365818577538819032419899103222203738, time:1750767561.6532326s req_ids:[8] +DEBUG 06-24 20:19:21 [manager.py:391] +ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:205.5966854095459ms total_cost_time:205.6412696838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9529 prompt_cache_len:5151 prompt_cache_ratio:0.5405603945849512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 +DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.1076664924621582 s +INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.1097254753112793 s +DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=194070719926577308181044475807128642788, time:1750767561.8626502s req_ids:[8] +DEBUG 06-24 20:19:21 [manager.py:391] +ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:203.81402969360352ms total_cost_time:203.86028289794922ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9530 prompt_cache_len:5151 prompt_cache_ratio:0.5405036726128016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 +DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s +INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.10939931869506836 s +DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=126213006280750415131768862422010611418, time:1750767562.0719163s req_ids:[8] +DEBUG 06-24 20:19:22 [manager.py:391] +ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:206.23016357421875ms total_cost_time:206.27641677856445ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9531 prompt_cache_len:5151 prompt_cache_ratio:0.5404469625432798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 +DEBUG 06-24 20:19:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.1090552806854248 s +INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.11110067367553711 s +DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=79424146977121731248240642927187242761, time:1750767562.2847207s req_ids:[8] +DEBUG 06-24 20:19:22 [manager.py:391] +ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:207.3347568511963ms total_cost_time:207.37910270690918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9532 prompt_cache_len:5151 prompt_cache_ratio:0.5403902643726395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 +DEBUG 06-24 20:19:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.10875368118286133 s +INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.11073803901672363 s +DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=297479554818252738698913234966264014214, time:1750767562.499257s req_ids:[8] +DEBUG 06-24 20:19:22 [manager.py:391] +ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:209.0737819671631ms total_cost_time:209.11836624145508ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9533 prompt_cache_len:5151 prompt_cache_ratio:0.5403335780971362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 +DEBUG 06-24 20:19:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.10875797271728516 s +INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089181900024414 s +DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=90693199049472413933671555881737149274, time:1750767562.7133358s req_ids:[8] +DEBUG 06-24 20:19:22 [manager.py:391] +ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:377.06637382507324ms total_cost_time:377.11143493652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9534 prompt_cache_len:5151 prompt_cache_ratio:0.5402769037130271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 +DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s +INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.11072087287902832 s +DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=257001935623558950551430280768637795232, time:1750767563.094092s req_ids:[8] +DEBUG 06-24 20:19:23 [manager.py:391] +ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:210.15214920043945ms total_cost_time:210.19840240478516ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9535 prompt_cache_len:5151 prompt_cache_ratio:0.5402202412165705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 +INFO 06-24 20:19:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:19:23 [statics_utils.py:24] mean first cost: 228.52410058048707 ms +INFO 06-24 20:19:23 [statics_utils.py:24] mean per token cost: 0.07166113834181964 ms +DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10849547386169434 s +INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.110626220703125 s +DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=80094487189028559078407482541304070027, time:1750767563.3237152s req_ids:[8] +DEBUG 06-24 20:19:23 [manager.py:391] +ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:221.86684608459473ms total_cost_time:221.9107151031494ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9536 prompt_cache_len:5151 prompt_cache_ratio:0.5401635906040269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 +DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10872888565063477 s +INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.11081767082214355 s +DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=111626047785192724796877611334782543585, time:1750767563.5400145s req_ids:[8] +DEBUG 06-24 20:19:23 [manager.py:391] +ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:208.15467834472656ms total_cost_time:208.19902420043945ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9537 prompt_cache_len:5151 prompt_cache_ratio:0.5401069518716578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 +DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s +INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.10961151123046875 s +DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=108966486587751156095835843742284198978, time:1750767563.7544625s req_ids:[8] +DEBUG 06-24 20:19:23 [manager.py:391] +ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:208.6658477783203ms total_cost_time:208.7087631225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9538 prompt_cache_len:5151 prompt_cache_ratio:0.5400503250157266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 +DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10812950134277344 s +INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.10941147804260254 s +DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=212201549310206662969522477041190209317, time:1750767563.9870532s req_ids:[8] +DEBUG 06-24 20:19:23 [manager.py:391] +ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:231.83012008666992ms total_cost_time:231.87518119812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9539 prompt_cache_len:5151 prompt_cache_ratio:0.5399937100324982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 +DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:24 [manager.py:224] router recive req id 8 cost time 0.10798168182373047 s +INFO 06-24 20:19:24 [manager.py:68] detokenization recv req id 8 cost time 0.11004853248596191 s +DEBUG 06-24 20:19:24 [manager.py:391] Prefill Batch: batch_id=124557792094451792280938614199486510096, time:1750767564.2057323s req_ids:[8] +DEBUG 06-24 20:19:24 [manager.py:391] +ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:210.48450469970703ms total_cost_time:210.53004264831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9540 prompt_cache_len:5151 prompt_cache_ratio:0.539937106918239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 +DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:24 [manager.py:224] router recive req id 8 cost time 0.10900306701660156 s +INFO 06-24 20:19:24 [manager.py:68] detokenization recv req id 8 cost time 0.11091899871826172 s +DEBUG 06-24 20:19:24 [manager.py:391] Prefill Batch: batch_id=243273264063016273493189198844713274205, time:1750767564.4221249s req_ids:[8] +DEBUG 06-24 20:19:24 [manager.py:391] +ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:208.4803581237793ms total_cost_time:208.526611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9541 prompt_cache_len:5151 prompt_cache_ratio:0.539880515669217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 +DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:24 [manager.py:224] router recive req id 8 cost time 0.10789036750793457 s +INFO 06-24 20:19:24 [manager.py:68] detokenization recv req id 8 cost time 0.10928082466125488 s +DEBUG 06-24 20:19:24 [manager.py:391] Prefill Batch: batch_id=269677482003745466953213236010634757687, time:1750767564.6356347s req_ids:[8] +DEBUG 06-24 20:19:24 [manager.py:391] +ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:207.10349082946777ms total_cost_time:207.14926719665527ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9542 prompt_cache_len:5151 prompt_cache_ratio:0.539823936281702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 +DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.3095424175262451 s +INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.31163716316223145 s +DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=292533113241390086186084715142591626153, time:1750767565.0527327s req_ids:[8] +DEBUG 06-24 20:19:25 [manager.py:391] +ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:414.92724418640137ms total_cost_time:414.97230529785156ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9543 prompt_cache_len:5151 prompt_cache_ratio:0.5397673687519647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 +DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10739898681640625 s +INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.10953974723815918 s +DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=129773338545182260910553542519800138269, time:1750767565.2676132s req_ids:[8] +DEBUG 06-24 20:19:25 [manager.py:391] +ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:207.1065902709961ms total_cost_time:207.1511745452881ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9544 prompt_cache_len:5151 prompt_cache_ratio:0.5397108130762783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 +DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10579442977905273 s +INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.1070091724395752 s +DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=74123601327788204749057157391408544246, time:1750767565.4809144s req_ids:[8] +DEBUG 06-24 20:19:25 [manager.py:391] +ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:207.9331874847412ms total_cost_time:207.95917510986328ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:9545 prompt_cache_len:5151 prompt_cache_ratio:0.5396542692509168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 +DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10538864135742188 s +INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.10716891288757324 s +DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=334029106264724612480043666535895921578, time:1750767565.694826s req_ids:[8] +DEBUG 06-24 20:19:25 [manager.py:391] +ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:207.19647407531738ms total_cost_time:207.24081993103027ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9546 prompt_cache_len:5151 prompt_cache_ratio:0.5395977372721559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 +DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10828471183776855 s +INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.11030364036560059 s +DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=205611890943692983669794516857737120455, time:1750767565.9102569s req_ids:[8] +DEBUG 06-24 20:19:25 [manager.py:391] +ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:213.61494064331055ms total_cost_time:213.66119384765625ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9547 prompt_cache_len:5151 prompt_cache_ratio:0.5395412171362731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 +DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10779547691345215 s +INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.10987567901611328 s +DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=313802351801410313454618246998111029132, time:1750767566.1262188s req_ids:[8] +DEBUG 06-24 20:19:26 [manager.py:391] +ERROR 06-24 20:19:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:209.7339630126953ms total_cost_time:209.7783088684082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9548 prompt_cache_len:5151 prompt_cache_ratio:0.5394847088395476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 +DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10685610771179199 s +INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.10838532447814941 s +DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=338483364382048143294704091766177915800, time:1750767566.3410172s req_ids:[8] +DEBUG 06-24 20:19:26 [manager.py:391] +ERROR 06-24 20:19:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:168.44654083251953ms total_cost_time:168.49040985107422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9549 prompt_cache_len:5151 prompt_cache_ratio:0.5394282123782596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 +DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10874652862548828 s +INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.11086010932922363 s +DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=110130595648166600230264661782633902792, time:1750767566.514189s req_ids:[8] +DEBUG 06-24 20:19:26 [manager.py:391] +ERROR 06-24 20:19:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:198.69494438171387ms total_cost_time:198.73714447021484ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9550 prompt_cache_len:5151 prompt_cache_ratio:0.5393717277486911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 +DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10772919654846191 s +INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.10915470123291016 s +DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=183456611728232452377588134043477587051, time:1750767566.718857s req_ids:[8] +DEBUG 06-24 20:19:26 [manager.py:391] +ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:408.28990936279297ms total_cost_time:408.33401679992676ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9551 prompt_cache_len:5151 prompt_cache_ratio:0.539315254947126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 +DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10887289047241211 s +INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.1108551025390625 s +DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=231885829364772420925828996676835353467, time:1750767567.1310544s req_ids:[8] +DEBUG 06-24 20:19:27 [manager.py:391] +ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:205.81889152526855ms total_cost_time:205.86156845092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9552 prompt_cache_len:5151 prompt_cache_ratio:0.5392587939698492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 +DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10846161842346191 s +INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020874977111816 s +INFO 06-24 20:19:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=230519776746442261614442812435782099247, time:1750767567.3507257s req_ids:[8] +DEBUG 06-24 20:19:27 [manager.py:391] +ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:214.37835693359375ms total_cost_time:214.42151069641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9553 prompt_cache_len:5151 prompt_cache_ratio:0.5392023448131477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 +DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10761857032775879 s +INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.10957598686218262 s +DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=288984510766525124591215800061005886763, time:1750767567.5653968s req_ids:[8] +DEBUG 06-24 20:19:27 [manager.py:391] +ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:209.12909507751465ms total_cost_time:209.15579795837402ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:9554 prompt_cache_len:5151 prompt_cache_ratio:0.5391459074733096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 +DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10548710823059082 s +INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.10752391815185547 s +DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=52218567995083725170193084871394546637, time:1750767567.7789428s req_ids:[8] +DEBUG 06-24 20:19:27 [manager.py:391] +ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:204.16927337646484ms total_cost_time:204.19573783874512ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:9555 prompt_cache_len:5151 prompt_cache_ratio:0.5390894819466248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 +DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10528802871704102 s +INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.10711431503295898 s +DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=157585441127913167258305155106687687915, time:1750767567.9876807s req_ids:[8] +DEBUG 06-24 20:19:27 [manager.py:391] +ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:167.93274879455566ms total_cost_time:167.97399520874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9556 prompt_cache_len:5151 prompt_cache_ratio:0.5390330682293847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 +DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:28 [manager.py:224] router recive req id 8 cost time 0.10731768608093262 s +INFO 06-24 20:19:28 [manager.py:68] detokenization recv req id 8 cost time 0.1092996597290039 s +DEBUG 06-24 20:19:28 [manager.py:391] Prefill Batch: batch_id=280119468062835181607479627794192860184, time:1750767568.1598387s req_ids:[8] +DEBUG 06-24 20:19:28 [manager.py:391] +ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:197.54648208618164ms total_cost_time:197.59273529052734ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9557 prompt_cache_len:5151 prompt_cache_ratio:0.5389766663178822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 +DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:28 [manager.py:224] router recive req id 8 cost time 0.10871362686157227 s +INFO 06-24 20:19:28 [manager.py:68] detokenization recv req id 8 cost time 0.11076211929321289 s +DEBUG 06-24 20:19:28 [manager.py:391] Prefill Batch: batch_id=97628362716929959377425897611923726797, time:1750767568.3639367s req_ids:[8] +DEBUG 06-24 20:19:28 [manager.py:391] +ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:206.6347599029541ms total_cost_time:206.6788673400879ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9558 prompt_cache_len:5151 prompt_cache_ratio:0.5389202762084118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 +DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:28 [manager.py:224] router recive req id 8 cost time 0.3106393814086914 s +INFO 06-24 20:19:28 [manager.py:68] detokenization recv req id 8 cost time 0.31271839141845703 s +DEBUG 06-24 20:19:28 [manager.py:391] Prefill Batch: batch_id=33576749409770292392699614730198400965, time:1750767568.7915688s req_ids:[8] +DEBUG 06-24 20:19:28 [manager.py:391] +ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:428.8289546966553ms total_cost_time:428.8756847381592ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9559 prompt_cache_len:5151 prompt_cache_ratio:0.5388638978972696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 +DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10970783233642578 s +INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11167287826538086 s +DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=297082691612105092246512813917874413429, time:1750767569.0141037s req_ids:[8] +DEBUG 06-24 20:19:29 [manager.py:391] +ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:211.40313148498535ms total_cost_time:211.45009994506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9560 prompt_cache_len:5151 prompt_cache_ratio:0.5388075313807531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 +DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10769104957580566 s +INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.10964608192443848 s +DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=196269175707732820190716200779654786997, time:1750767569.230571s req_ids:[8] +DEBUG 06-24 20:19:29 [manager.py:391] +ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:204.73027229309082ms total_cost_time:204.7741413116455ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9561 prompt_cache_len:5151 prompt_cache_ratio:0.5387511766551616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 +DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10869479179382324 s +INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11064982414245605 s +DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=54062305983063016907630298902198025130, time:1750767569.4407966s req_ids:[8] +DEBUG 06-24 20:19:29 [manager.py:391] +ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:209.2912197113037ms total_cost_time:209.3358039855957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9562 prompt_cache_len:5151 prompt_cache_ratio:0.5386948337167956 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 +DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10982155799865723 s +INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11184358596801758 s +DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=301855681546919206607842265377921371713, time:1750767569.6558874s req_ids:[8] +DEBUG 06-24 20:19:29 [manager.py:391] +ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:206.88748359680176ms total_cost_time:206.94446563720703ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:9563 prompt_cache_len:5151 prompt_cache_ratio:0.5386385025619576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 +DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10836148262023926 s +INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11038517951965332 s +DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=245796407903921052772078424454213224369, time:1750767569.8700001s req_ids:[8] +DEBUG 06-24 20:19:29 [manager.py:391] +ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:207.5631618499756ms total_cost_time:207.60726928710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9564 prompt_cache_len:5151 prompt_cache_ratio:0.5385821831869511 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 +DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10905790328979492 s +INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.11098599433898926 s +DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=212273581476518115166191443412697119141, time:1750767570.084183s req_ids:[8] +DEBUG 06-24 20:19:30 [manager.py:391] +DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:207.14187622070312ms total_cost_time:207.1843147277832ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9565 prompt_cache_len:5151 prompt_cache_ratio:0.5385258755880815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 +DEBUG 06-24 20:19:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10780835151672363 s +INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.10980820655822754 s +DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=250025269233431222942089966411710680897, time:1750767570.2964118s req_ids:[8] +DEBUG 06-24 20:19:30 [manager.py:391] +ERROR 06-24 20:19:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 first_token_cost:206.71749114990234ms total_cost_time:206.76064491271973ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9566 prompt_cache_len:5151 prompt_cache_ratio:0.5384695797616559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 +DEBUG 06-24 20:19:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10832524299621582 s +INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.11042594909667969 s +DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=52585544584111991476653927595130533793, time:1750767570.5083225s req_ids:[8] +DEBUG 06-24 20:19:30 [manager.py:391] +ERROR 06-24 20:19:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:19:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 39525.673 tokens/s +DEBUG 06-24 20:19:30 [stats.py:37] Avg prompt tokens throughput: 39517.295 tokens/s +DEBUG 06-24 20:19:30 [stats.py:37] Avg generate tokens throughput: 8.377 tokens/s +INFO 06-24 20:19:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 first_token_cost:396.05212211608887ms total_cost_time:396.09551429748535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9567 prompt_cache_len:5151 prompt_cache_ratio:0.5384132957039824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 +DEBUG 06-24 20:19:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:30 [batch.py:51] router release req id 8 +INFO 06-24 20:19:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10800790786743164 s +INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.11003613471984863 s +DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=263333304832050029111477575052525426633, time:1750767570.910313s req_ids:[8] +DEBUG 06-24 20:19:30 [manager.py:391] +ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 first_token_cost:210.48736572265625ms total_cost_time:210.53171157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9568 prompt_cache_len:5151 prompt_cache_ratio:0.5383570234113713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 +DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10956621170043945 s +INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.1116175651550293 s +DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=287340723267448761295025781415023718825, time:1750767571.1271217s req_ids:[8] +DEBUG 06-24 20:19:31 [manager.py:391] +ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:206.1927318572998ms total_cost_time:206.2361240386963ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9569 prompt_cache_len:5151 prompt_cache_ratio:0.5383007628801337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 +DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10833024978637695 s +INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.11043787002563477 s +DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=17760565847091197084904475265661525694, time:1750767571.339339s req_ids:[8] +DEBUG 06-24 20:19:31 [manager.py:391] +ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:208.99367332458496ms total_cost_time:209.03873443603516ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9570 prompt_cache_len:5151 prompt_cache_ratio:0.538244514106583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 +DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.1095888614654541 s +INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.11133456230163574 s +DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=117789067196858661312594493710677531381, time:1750767571.5537932s req_ids:[8] +DEBUG 06-24 20:19:31 [manager.py:391] +ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:168.3812141418457ms total_cost_time:168.42174530029297ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:9571 prompt_cache_len:5151 prompt_cache_ratio:0.5381882770870338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 +DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10822796821594238 s +INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.11014819145202637 s +DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=97424972194900291150588369756505001331, time:1750767571.7277553s req_ids:[8] +DEBUG 06-24 20:19:31 [manager.py:391] +ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:201.24483108520508ms total_cost_time:201.28703117370605ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9572 prompt_cache_len:5151 prompt_cache_ratio:0.5381320518178019 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 +DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10737776756286621 s +INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.10928106307983398 s +DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=288528960775953015444062707085079258345, time:1750767571.934154s req_ids:[8] +DEBUG 06-24 20:19:31 [manager.py:391] +ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:204.85258102416992ms total_cost_time:204.8969268798828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9573 prompt_cache_len:5151 prompt_cache_ratio:0.5380758382952052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 +DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.10795903205871582 s +INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.11003899574279785 s +DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=44216944368792910296516066325835168438, time:1750767572.1459544s req_ids:[8] +DEBUG 06-24 20:19:32 [manager.py:391] +ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:209.78260040283203ms total_cost_time:209.82909202575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9574 prompt_cache_len:5151 prompt_cache_ratio:0.538019636515563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 +DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.10894012451171875 s +INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.11100149154663086 s +DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=317957170919018299222671624538445976168, time:1750767572.3677666s req_ids:[8] +DEBUG 06-24 20:19:32 [manager.py:391] +ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:379.1632652282715ms total_cost_time:379.209041595459ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9575 prompt_cache_len:5151 prompt_cache_ratio:0.5379634464751958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 +DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.1084294319152832 s +INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.11042213439941406 s +DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=36089649378477838612247118710426139676, time:1750767572.7478623s req_ids:[8] +DEBUG 06-24 20:19:32 [manager.py:391] +ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:205.6279182434082ms total_cost_time:205.6715488433838ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9576 prompt_cache_len:5151 prompt_cache_ratio:0.5379072681704261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 +DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.10768270492553711 s +INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.10957598686218262 s +DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=198763118647607762005862418134122915282, time:1750767572.963563s req_ids:[8] +DEBUG 06-24 20:19:32 [manager.py:391] +ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:209.36894416809082ms total_cost_time:209.4125747680664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9577 prompt_cache_len:5151 prompt_cache_ratio:0.5378511015975775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 +DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10787320137023926 s +INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.10976696014404297 s +DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=214163634930176399251974770767307927226, time:1750767573.1844647s req_ids:[8] +DEBUG 06-24 20:19:33 [manager.py:391] +ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:216.37582778930664ms total_cost_time:216.41850471496582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9578 prompt_cache_len:5151 prompt_cache_ratio:0.5377949467529756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 +DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10867905616760254 s +INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11063575744628906 s +DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=121387143422924937791818423750414505546, time:1750767573.4008336s req_ids:[8] +DEBUG 06-24 20:19:33 [manager.py:391] +ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:205.17325401306152ms total_cost_time:205.21855354309082ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9579 prompt_cache_len:5151 prompt_cache_ratio:0.5377388036329471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 +DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10829663276672363 s +INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11023902893066406 s +DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=259602775629928287214063378682794537207, time:1750767573.616585s req_ids:[8] +DEBUG 06-24 20:19:33 [manager.py:391] +ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:213.85645866394043ms total_cost_time:213.90080451965332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9580 prompt_cache_len:5151 prompt_cache_ratio:0.5376826722338205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 +DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10869026184082031 s +INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11096405982971191 s +DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=315242433687348694896291394848095769635, time:1750767573.8304882s req_ids:[8] +DEBUG 06-24 20:19:33 [manager.py:391] +ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:167.6466464996338ms total_cost_time:167.68765449523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9581 prompt_cache_len:5151 prompt_cache_ratio:0.5376265525519257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 +DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.1082453727722168 s +INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11026930809020996 s +DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=258432937727112355898745677113389726167, time:1750767574.0031037s req_ids:[8] +DEBUG 06-24 20:19:34 [manager.py:391] +ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:198.55833053588867ms total_cost_time:198.60148429870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9582 prompt_cache_len:5151 prompt_cache_ratio:0.5375704445835943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 +DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:34 [manager.py:224] router recive req id 8 cost time 0.1087195873260498 s +INFO 06-24 20:19:34 [manager.py:68] detokenization recv req id 8 cost time 0.11071324348449707 s +DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=210982940087692593205265939631316583737, time:1750767574.2077737s req_ids:[8] +DEBUG 06-24 20:19:34 [manager.py:391] +ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:377.1669864654541ms total_cost_time:377.2149085998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:9583 prompt_cache_len:5151 prompt_cache_ratio:0.5375143483251591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 +DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:34 [manager.py:224] router recive req id 8 cost time 0.1089010238647461 s +INFO 06-24 20:19:34 [manager.py:68] detokenization recv req id 8 cost time 0.11097145080566406 s +DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=109907447556258030749571459732535756548, time:1750767574.5974846s req_ids:[8] +DEBUG 06-24 20:19:34 [manager.py:391] +ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:213.4850025177002ms total_cost_time:213.52863311767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9584 prompt_cache_len:5151 prompt_cache_ratio:0.5374582637729549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 +DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:34 [manager.py:224] router recive req id 8 cost time 0.10787844657897949 s +INFO 06-24 20:19:34 [manager.py:68] detokenization recv req id 8 cost time 0.10994791984558105 s +DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=38372522123268998299215385022443258171, time:1750767574.8116379s req_ids:[8] +DEBUG 06-24 20:19:34 [manager.py:391] +ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:206.8185806274414ms total_cost_time:206.8636417388916ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9585 prompt_cache_len:5151 prompt_cache_ratio:0.5374021909233176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 +DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10898995399475098 s +INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.1110222339630127 s +DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=5223430205994202387819560736507652473, time:1750767575.0241735s req_ids:[8] +DEBUG 06-24 20:19:35 [manager.py:391] +ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:209.60521697998047ms total_cost_time:209.65003967285156ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9586 prompt_cache_len:5151 prompt_cache_ratio:0.537346129772585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 +DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10707974433898926 s +INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.10904955863952637 s +DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=273720352666573770646096172097715243995, time:1750767575.237574s req_ids:[8] +DEBUG 06-24 20:19:35 [manager.py:391] +ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:204.21481132507324ms total_cost_time:204.26058769226074ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9587 prompt_cache_len:5151 prompt_cache_ratio:0.537290080317096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 +DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10780549049377441 s +INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.10979771614074707 s +DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=238753019418245568751878399799239336327, time:1750767575.4485145s req_ids:[8] +DEBUG 06-24 20:19:35 [manager.py:391] +ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:207.18812942504883ms total_cost_time:207.2310447692871ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9588 prompt_cache_len:5151 prompt_cache_ratio:0.5372340425531915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 +DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10790371894836426 s +INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.10988044738769531 s +DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=305836452495562443389262573850058974031, time:1750767575.661738s req_ids:[8] +DEBUG 06-24 20:19:35 [manager.py:391] +ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:208.5120677947998ms total_cost_time:208.5561752319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9589 prompt_cache_len:5151 prompt_cache_ratio:0.5371780164772135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 +DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s +INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.11011552810668945 s +DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=181763942477498207314635101086111422094, time:1750767575.875582s req_ids:[8] +DEBUG 06-24 20:19:35 [manager.py:391] +DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:168.32995414733887ms total_cost_time:168.37430000305176ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9590 prompt_cache_len:5151 prompt_cache_ratio:0.5371220020855058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 +DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.10794878005981445 s +INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.10995316505432129 s +DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=93906630767897306992456986658360172706, time:1750767576.0496395s req_ids:[8] +DEBUG 06-24 20:19:36 [manager.py:391] +ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:202.41355895996094ms total_cost_time:202.45838165283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9591 prompt_cache_len:5151 prompt_cache_ratio:0.5370659993744135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 +DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.10797667503356934 s +INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s +DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=303985534831315510848588688480693363609, time:1750767576.2572515s req_ids:[8] +DEBUG 06-24 20:19:36 [manager.py:391] +ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:375.39124488830566ms total_cost_time:375.43749809265137ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9592 prompt_cache_len:5151 prompt_cache_ratio:0.5370100083402836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 +DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.1093149185180664 s +INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.1115114688873291 s +DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=170592797656311299238016900463283825150, time:1750767576.636555s req_ids:[8] +DEBUG 06-24 20:19:36 [manager.py:391] +ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:207.122802734375ms total_cost_time:207.1676254272461ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9593 prompt_cache_len:5151 prompt_cache_ratio:0.5369540289794642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 +DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.10947227478027344 s +INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.11193680763244629 s +DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=64360228190931432567425016605874837677, time:1750767576.853484s req_ids:[8] +DEBUG 06-24 20:19:36 [manager.py:391] +ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:211.3351821899414ms total_cost_time:211.3807201385498ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9594 prompt_cache_len:5151 prompt_cache_ratio:0.5368980612883052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 +DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10926604270935059 s +INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.11128616333007812 s +DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=207897350207409508537204026300791898488, time:1750767577.0707061s req_ids:[8] +DEBUG 06-24 20:19:37 [manager.py:391] +ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:211.57240867614746ms total_cost_time:211.61603927612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9595 prompt_cache_len:5151 prompt_cache_ratio:0.5368421052631579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 +DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10646939277648926 s +INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10856175422668457 s +DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=151763881482794762220657913381451240919, time:1750767577.2855875s req_ids:[8] +DEBUG 06-24 20:19:37 [manager.py:391] +ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:206.87580108642578ms total_cost_time:206.90035820007324ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:9596 prompt_cache_len:5151 prompt_cache_ratio:0.5367861609003751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 +DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10506772994995117 s +INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10704326629638672 s +DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=51301851881255104305251584834485696610, time:1750767577.5004802s req_ids:[8] +DEBUG 06-24 20:19:37 [manager.py:391] +ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:210.0660800933838ms total_cost_time:210.09039878845215ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:9597 prompt_cache_len:5151 prompt_cache_ratio:0.5367302281963113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 +DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10460305213928223 s +INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10661578178405762 s +DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=236579386352676430611043429926459770089, time:1750767577.7144868s req_ids:[8] +DEBUG 06-24 20:19:37 [manager.py:391] +ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:210.68525314331055ms total_cost_time:210.7095718383789ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:9598 prompt_cache_len:5151 prompt_cache_ratio:0.5366743071473223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 +DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10498881340026855 s +INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10705161094665527 s +DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=75817467312477508519932669955118119489, time:1750767577.9297729s req_ids:[8] +DEBUG 06-24 20:19:37 [manager.py:391] +ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:221.8313217163086ms total_cost_time:221.85373306274414ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:9599 prompt_cache_len:5151 prompt_cache_ratio:0.5366183977497656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 +DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10483479499816895 s +INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.10686635971069336 s +DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=88536805584247665376282388142544233056, time:1750767578.161501s req_ids:[8] +DEBUG 06-24 20:19:38 [manager.py:391] +ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:373.1272220611572ms total_cost_time:373.1505870819092ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:9600 prompt_cache_len:5151 prompt_cache_ratio:0.5365625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 +DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10686826705932617 s +INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.10893988609313965 s +DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=150395789900956025810420455991126483628, time:1750767578.53168s req_ids:[8] +DEBUG 06-24 20:19:38 [manager.py:391] +ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:209.1827392578125ms total_cost_time:209.22613143920898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9601 prompt_cache_len:5151 prompt_cache_ratio:0.536506613894386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 +DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10860419273376465 s +INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.11052918434143066 s +DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=146761882375458639845954091863197012843, time:1750767578.758405s req_ids:[8] +DEBUG 06-24 20:19:38 [manager.py:391] +ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:223.54555130004883ms total_cost_time:223.59132766723633ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9602 prompt_cache_len:5151 prompt_cache_ratio:0.5364507394292856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 +DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10777544975280762 s +INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.10980081558227539 s +DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=159859511184832409921937340587114494483, time:1750767578.9766366s req_ids:[8] +DEBUG 06-24 20:19:38 [manager.py:391] +ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:208.113431930542ms total_cost_time:208.15682411193848ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9603 prompt_cache_len:5151 prompt_cache_ratio:0.5363948766010622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 +DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10802173614501953 s +INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.11008429527282715 s +DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=175633452015431345896353213018620224902, time:1750767579.1903057s req_ids:[8] +DEBUG 06-24 20:19:39 [manager.py:391] +ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:208.25505256652832ms total_cost_time:208.2986831665039ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9604 prompt_cache_len:5151 prompt_cache_ratio:0.5363390254060808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 +DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10990667343139648 s +INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.11195731163024902 s +DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=178076400401502766169889305514435031838, time:1750767579.4037645s req_ids:[8] +DEBUG 06-24 20:19:39 [manager.py:391] +ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:208.4963321685791ms total_cost_time:208.5425853729248ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9605 prompt_cache_len:5151 prompt_cache_ratio:0.536283185840708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 +DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s +INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.1104421615600586 s +DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=36459739328261783013923791821027753616, time:1750767579.6179442s req_ids:[8] +DEBUG 06-24 20:19:39 [manager.py:391] +ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:208.81986618041992ms total_cost_time:208.8637351989746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9606 prompt_cache_len:5151 prompt_cache_ratio:0.5362273579013117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 +DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10804915428161621 s +INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.11017060279846191 s +DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=18844510223548048568892691055504029337, time:1750767579.8327348s req_ids:[8] +DEBUG 06-24 20:19:39 [manager.py:391] +ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:209.1388702392578ms total_cost_time:209.1834545135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9607 prompt_cache_len:5151 prompt_cache_ratio:0.5361715415842615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 +DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.10931921005249023 s +INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11142230033874512 s +DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=268007091527590011736570927146725383483, time:1750767580.0461495s req_ids:[8] +DEBUG 06-24 20:19:40 [manager.py:391] +ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:377.69269943237305ms total_cost_time:377.73799896240234ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9608 prompt_cache_len:5151 prompt_cache_ratio:0.5361157368859284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 +DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.10853052139282227 s +INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11058163642883301 s +DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=229352083071119222617866018625946274470, time:1750767580.4286027s req_ids:[8] +DEBUG 06-24 20:19:40 [manager.py:391] +ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:201.29752159118652ms total_cost_time:201.3411521911621ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9609 prompt_cache_len:5151 prompt_cache_ratio:0.536059943802685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 +DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.1082611083984375 s +INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11035346984863281 s +DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=31637374943393758850933222485018680898, time:1750767580.6359897s req_ids:[8] +DEBUG 06-24 20:19:40 [manager.py:391] +ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:206.17318153381348ms total_cost_time:206.21728897094727ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9610 prompt_cache_len:5151 prompt_cache_ratio:0.5360041623309053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 +DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.10934758186340332 s +INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11137151718139648 s +DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=233943322740278592188911471722080358872, time:1750767580.8497343s req_ids:[8] +DEBUG 06-24 20:19:40 [manager.py:391] +DEBUG 06-24 20:19:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 41928.831 tokens/s +DEBUG 06-24 20:19:40 [stats.py:37] Avg prompt tokens throughput: 41920.187 tokens/s +DEBUG 06-24 20:19:40 [stats.py:37] Avg generate tokens throughput: 8.644 tokens/s +ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:209.73682403564453ms total_cost_time:209.78212356567383ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9611 prompt_cache_len:5151 prompt_cache_ratio:0.5359483924669649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 +DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10899615287780762 s +INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.11102080345153809 s +DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=185494743997697275557776341276094814104, time:1750767581.064922s req_ids:[8] +DEBUG 06-24 20:19:41 [manager.py:391] +ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:208.2674503326416ms total_cost_time:208.30774307250977ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:9612 prompt_cache_len:5151 prompt_cache_ratio:0.535892634207241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 +DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10728740692138672 s +INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.10913562774658203 s +DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=175102617024324890490308687935322662722, time:1750767581.2819605s req_ids:[8] +DEBUG 06-24 20:19:41 [manager.py:391] +ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:207.84687995910645ms total_cost_time:207.89074897766113ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9613 prompt_cache_len:5151 prompt_cache_ratio:0.535836887548112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 +DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10780453681945801 s +INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.1098785400390625 s +DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=269670587385769109448781059973211664034, time:1750767581.4929845s req_ids:[8] +DEBUG 06-24 20:19:41 [manager.py:391] +ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:212.39519119262695ms total_cost_time:212.45145797729492ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:9614 prompt_cache_len:5151 prompt_cache_ratio:0.5357811524859579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 +DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10951662063598633 s +INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.11159110069274902 s +DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=77081444725224209734323118311909887603, time:1750767581.7214305s req_ids:[8] +DEBUG 06-24 20:19:41 [manager.py:391] +DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:218.81890296936035ms total_cost_time:218.86444091796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9615 prompt_cache_len:5151 prompt_cache_ratio:0.5357254290171607 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 +DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s +INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.11015987396240234 s +DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=11777221909152026451009235240742205842, time:1750767581.9367328s req_ids:[8] +DEBUG 06-24 20:19:41 [manager.py:391] +ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:410.8567237854004ms total_cost_time:410.9020233154297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9616 prompt_cache_len:5151 prompt_cache_ratio:0.5356697171381032 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 +DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.107452392578125 s +INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10946321487426758 s +DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=248237562646056106519644143213199493711, time:1750767582.3506112s req_ids:[8] +DEBUG 06-24 20:19:42 [manager.py:391] +ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:183.5329532623291ms total_cost_time:183.577299118042ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9617 prompt_cache_len:5151 prompt_cache_ratio:0.53561401684517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 +DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.10807561874389648 s +INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10997867584228516 s +DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=1332380366734783349342886585031602093, time:1750767582.5448482s req_ids:[8] +DEBUG 06-24 20:19:42 [manager.py:391] +ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:207.78918266296387ms total_cost_time:207.83352851867676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9618 prompt_cache_len:5151 prompt_cache_ratio:0.5355583281347474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 +DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.10608553886413574 s +INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10804009437561035 s +DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=255841502713761468798142083990519807549, time:1750767582.7551804s req_ids:[8] +DEBUG 06-24 20:19:42 [manager.py:391] +ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:207.29851722717285ms total_cost_time:207.3218822479248ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:9619 prompt_cache_len:5151 prompt_cache_ratio:0.5355026510032228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 +DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.1047048568725586 s +INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10686159133911133 s +DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=252199975673626752452097158141448863182, time:1750767582.9680789s req_ids:[8] +DEBUG 06-24 20:19:42 [manager.py:391] +ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:209.11073684692383ms total_cost_time:209.13338661193848ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:9620 prompt_cache_len:5151 prompt_cache_ratio:0.5354469854469854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 +DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10455536842346191 s +INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10656976699829102 s +DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=103296147603089221884845970984593799546, time:1750767583.1825933s req_ids:[8] +DEBUG 06-24 20:19:43 [manager.py:391] +ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:210.6165885925293ms total_cost_time:210.63876152038574ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:9621 prompt_cache_len:5151 prompt_cache_ratio:0.5353913314624259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 +DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10435891151428223 s +INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10607600212097168 s +DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=68497284328355858332725188410175934439, time:1750767583.400153s req_ids:[8] +DEBUG 06-24 20:19:43 [manager.py:391] +ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:169.81887817382812ms total_cost_time:169.84105110168457ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:9622 prompt_cache_len:5151 prompt_cache_ratio:0.5353356890459364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 +DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.1042783260345459 s +INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10596060752868652 s +DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=32377755342894174844317467030654539845, time:1750767583.5694542s req_ids:[8] +DEBUG 06-24 20:19:43 [manager.py:391] +ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:168.3330535888672ms total_cost_time:168.37835311889648ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9623 prompt_cache_len:5151 prompt_cache_ratio:0.5352800581939104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 +DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10821843147277832 s +INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10990452766418457 s +DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=167175564499737749048255909064311163788, time:1750767583.7411168s req_ids:[8] +DEBUG 06-24 20:19:43 [manager.py:391] +ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:193.86601448059082ms total_cost_time:193.91369819641113ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:9624 prompt_cache_len:5151 prompt_cache_ratio:0.5352244389027432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 +DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10806441307067871 s +INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10993766784667969 s +DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=107380764574381396375119990113434129122, time:1750767583.942508s req_ids:[8] +DEBUG 06-24 20:19:43 [manager.py:391] +ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:372.9238510131836ms total_cost_time:372.9679584503174ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9625 prompt_cache_len:5151 prompt_cache_ratio:0.5351688311688312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 +DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.1083076000213623 s +INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.11017656326293945 s +DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=119787481166582829081045979957857650818, time:1750767584.321902s req_ids:[8] +DEBUG 06-24 20:19:44 [manager.py:391] +ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:201.8585205078125ms total_cost_time:201.9050121307373ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9626 prompt_cache_len:5151 prompt_cache_ratio:0.5351132349885727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 +DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.1086740493774414 s +INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.11048316955566406 s +DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=190169904185174574302182704166990217707, time:1750767584.5300026s req_ids:[8] +DEBUG 06-24 20:19:44 [manager.py:391] +ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:203.71437072753906ms total_cost_time:203.7365436553955ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:9627 prompt_cache_len:5151 prompt_cache_ratio:0.535057650358367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 +DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.10483598709106445 s +INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.10669517517089844 s +DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=81192098810479394604549223639369035146, time:1750767584.741568s req_ids:[8] +DEBUG 06-24 20:19:44 [manager.py:391] +ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:209.31100845336914ms total_cost_time:209.35797691345215ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9628 prompt_cache_len:5151 prompt_cache_ratio:0.5350020772746157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 +DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.1078801155090332 s +INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.11005592346191406 s +DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=90929364060730447218088175153487262607, time:1750767584.9540465s req_ids:[8] +DEBUG 06-24 20:19:44 [manager.py:391] +ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:207.81493186950684ms total_cost_time:207.85856246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9629 prompt_cache_len:5151 prompt_cache_ratio:0.5349465157337211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 +DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:45 [batch.py:51] router release req id 8 +INFO 06-24 20:19:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.10886859893798828 s +INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.11109542846679688 s +DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=266265460499678806136626940419325990771, time:1750767585.169764s req_ids:[8] +DEBUG 06-24 20:19:45 [manager.py:391] +ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:209.98835563659668ms total_cost_time:210.03103256225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9630 prompt_cache_len:5151 prompt_cache_ratio:0.5348909657320873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 +DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.10800457000732422 s +INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.10962057113647461 s +DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=26206371376692073210248424946010359137, time:1750767585.385586s req_ids:[8] +DEBUG 06-24 20:19:45 [manager.py:391] +ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:208.46319198608398ms total_cost_time:208.52160453796387ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:9631 prompt_cache_len:5151 prompt_cache_ratio:0.5348354272661198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 +DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.10795426368713379 s +INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.10982060432434082 s +DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=247775928449296558252822840896997218541, time:1750767585.5987737s req_ids:[8] +DEBUG 06-24 20:19:45 [manager.py:391] +ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:209.81264114379883ms total_cost_time:209.8560333251953ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9632 prompt_cache_len:5151 prompt_cache_ratio:0.5347799003322259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 +DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.1079416275024414 s +INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.10979652404785156 s +DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=82041442209767478687089909084370622680, time:1750767585.8140917s req_ids:[8] +DEBUG 06-24 20:19:45 [manager.py:391] +ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:393.59259605407715ms total_cost_time:393.6350345611572ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9633 prompt_cache_len:5151 prompt_cache_ratio:0.5347243849268141 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 +DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.10912013053894043 s +INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.11094117164611816 s +DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=10816723901541761915552143528320509050, time:1750767586.2128546s req_ids:[8] +DEBUG 06-24 20:19:46 [manager.py:391] +ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:206.2673568725586ms total_cost_time:206.3138484954834ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9634 prompt_cache_len:5151 prompt_cache_ratio:0.5346688810462944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 +DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.1077127456665039 s +INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.10964798927307129 s +DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=219409397432468855112201426791909145260, time:1750767586.424971s req_ids:[8] +DEBUG 06-24 20:19:46 [manager.py:391] +ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:204.9872875213623ms total_cost_time:205.0333023071289ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9635 prompt_cache_len:5151 prompt_cache_ratio:0.5346133886870783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 +DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.108245849609375 s +INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s +DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=61213992967660673274579254354653416875, time:1750767586.638555s req_ids:[8] +DEBUG 06-24 20:19:46 [manager.py:391] +ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:211.49206161499023ms total_cost_time:211.53569221496582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9636 prompt_cache_len:5151 prompt_cache_ratio:0.5345579078455791 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 +DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.10981345176696777 s +INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.11157822608947754 s +DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=82216709151957465245689891190725870592, time:1750767586.8546174s req_ids:[8] +DEBUG 06-24 20:19:46 [manager.py:391] +ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:168.28155517578125ms total_cost_time:168.32375526428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9637 prompt_cache_len:5151 prompt_cache_ratio:0.5345024385182111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 +DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:46 [batch.py:51] router release req id 8 +INFO 06-24 20:19:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.10820412635803223 s +INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.10933494567871094 s +DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=5909871157666746440511889913313135747, time:1750767587.0280712s req_ids:[8] +DEBUG 06-24 20:19:47 [manager.py:391] +ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:199.72777366638184ms total_cost_time:199.7697353363037ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9638 prompt_cache_len:5151 prompt_cache_ratio:0.5344469807013903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 +DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.10823774337768555 s +INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s +DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=338969082829078649723663384628191990768, time:1750767587.2337573s req_ids:[8] +DEBUG 06-24 20:19:47 [manager.py:391] +ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:205.85250854492188ms total_cost_time:205.89685440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9639 prompt_cache_len:5151 prompt_cache_ratio:0.5343915343915344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 +DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.10940694808959961 s +INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.11127281188964844 s +DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=77075791345459186315944657609144642559, time:1750767587.445883s req_ids:[8] +DEBUG 06-24 20:19:47 [manager.py:391] +DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:209.7799777984619ms total_cost_time:209.8245620727539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9640 prompt_cache_len:5151 prompt_cache_ratio:0.5343360995850622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 +DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.30974411964416504 s +INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.3116919994354248 s +DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=337176811945769070226098979939527429647, time:1750767587.8643425s req_ids:[8] +DEBUG 06-24 20:19:47 [manager.py:391] +ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:414.9456024169922ms total_cost_time:414.9911403656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9641 prompt_cache_len:5151 prompt_cache_ratio:0.5342806762783944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 +DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10763859748840332 s +INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.10981607437133789 s +DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=277845337380524985396413421470870837342, time:1750767588.082584s req_ids:[8] +DEBUG 06-24 20:19:48 [manager.py:391] +ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:208.3914279937744ms total_cost_time:208.4345817565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9642 prompt_cache_len:5151 prompt_cache_ratio:0.5342252644679527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 +DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10906314849853516 s +INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.1110994815826416 s +DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=178330296721344943658068439239499366502, time:1750767588.2978003s req_ids:[8] +DEBUG 06-24 20:19:48 [manager.py:391] +ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:208.81414413452148ms total_cost_time:208.8615894317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9643 prompt_cache_len:5151 prompt_cache_ratio:0.5341698641501608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 +DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.1104133129119873 s +INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.11239743232727051 s +DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=328505392454928217944061255683414460129, time:1750767588.5152035s req_ids:[8] +DEBUG 06-24 20:19:48 [manager.py:391] +ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:210.96515655517578ms total_cost_time:211.0116481781006ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9644 prompt_cache_len:5151 prompt_cache_ratio:0.5341144753214434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 +DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10952258110046387 s +INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.11135435104370117 s +DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=240716525428923366772979994225780464591, time:1750767588.7297585s req_ids:[8] +DEBUG 06-24 20:19:48 [manager.py:391] +ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:204.4689655303955ms total_cost_time:204.55241203308105ms,out_token_counter:1 mean_per_token_cost_time: 0.08344650268554688ms prompt_token_num:9645 prompt_cache_len:5151 prompt_cache_ratio:0.534059097978227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 +DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10913419723510742 s +INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.1110525131225586 s +DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=105953088862040771555172251900164209198, time:1750767588.9494414s req_ids:[8] +DEBUG 06-24 20:19:48 [manager.py:391] +DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:214.10703659057617ms total_cost_time:214.15328979492188ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9646 prompt_cache_len:5151 prompt_cache_ratio:0.5340037321169396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 +DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10718011856079102 s +INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.1092064380645752 s +DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=5793026601148823038621048170859671440, time:1750767589.1635518s req_ids:[8] +DEBUG 06-24 20:19:49 [manager.py:391] +ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:207.75079727172852ms total_cost_time:207.7934741973877ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9647 prompt_cache_len:5151 prompt_cache_ratio:0.5339483777340106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 +DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10806703567504883 s +INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.11015439033508301 s +DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=189290661953232890015189260527305789059, time:1750767589.3778455s req_ids:[8] +DEBUG 06-24 20:19:49 [manager.py:391] +ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:378.7095546722412ms total_cost_time:378.7548542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9648 prompt_cache_len:5151 prompt_cache_ratio:0.5338930348258707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 +DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10819745063781738 s +INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.10997843742370605 s +DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=108538101536752074888691480378199928414, time:1750767589.75791s req_ids:[8] +DEBUG 06-24 20:19:49 [manager.py:391] +ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:205.44004440307617ms total_cost_time:205.48391342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9649 prompt_cache_len:5151 prompt_cache_ratio:0.5338377033889522 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 +DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10827207565307617 s +INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.11020135879516602 s +DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=91914723675173458180240889002944990615, time:1750767589.9707835s req_ids:[8] +DEBUG 06-24 20:19:49 [manager.py:391] +ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:205.17802238464355ms total_cost_time:205.23571968078613ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:9650 prompt_cache_len:5151 prompt_cache_ratio:0.5337823834196891 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 +DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.1084902286529541 s +INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.1105508804321289 s +DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=118622405010069062828969351452705723509, time:1750767590.1838999s req_ids:[8] +DEBUG 06-24 20:19:50 [manager.py:391] +ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:208.56785774230957ms total_cost_time:208.61220359802246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9651 prompt_cache_len:5151 prompt_cache_ratio:0.5337270749145167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 +DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.1076650619506836 s +INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.10958623886108398 s +DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=88527693400179694043787640988555732381, time:1750767590.3957908s req_ids:[8] +DEBUG 06-24 20:19:50 [manager.py:391] +ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:165.15398025512695ms total_cost_time:165.19594192504883ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9652 prompt_cache_len:5151 prompt_cache_ratio:0.5336717778698715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 +DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.10805988311767578 s +INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.11009454727172852 s +DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=238477254660501812556750645205057513737, time:1750767590.5671477s req_ids:[8] +DEBUG 06-24 20:19:50 [manager.py:391] +ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:198.6837387084961ms total_cost_time:198.72713088989258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9653 prompt_cache_len:5151 prompt_cache_ratio:0.5336164922821921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 +DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.10936832427978516 s +INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.11141037940979004 s +DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=120190807129728072917122877568634250347, time:1750767590.7731225s req_ids:[8] +DEBUG 06-24 20:19:50 [manager.py:391] +ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:19:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 41386.095 tokens/s +DEBUG 06-24 20:19:50 [stats.py:37] Avg prompt tokens throughput: 41377.404 tokens/s +DEBUG 06-24 20:19:50 [stats.py:37] Avg generate tokens throughput: 8.691 tokens/s +INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:205.95550537109375ms total_cost_time:205.99842071533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9654 prompt_cache_len:5151 prompt_cache_ratio:0.533561218147918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 +DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.10737466812133789 s +INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.11003565788269043 s +DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=289606991163884517033352215602545557612, time:1750767590.9866502s req_ids:[8] +DEBUG 06-24 20:19:50 [manager.py:391] +ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:205.413818359375ms total_cost_time:205.4600715637207ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9655 prompt_cache_len:5151 prompt_cache_ratio:0.5335059554634904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 +DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:51 [manager.py:224] router recive req id 8 cost time 0.10761260986328125 s +INFO 06-24 20:19:51 [manager.py:68] detokenization recv req id 8 cost time 0.10953593254089355 s +DEBUG 06-24 20:19:51 [manager.py:391] Prefill Batch: batch_id=252240909737154361592616327307883508517, time:1750767591.1984615s req_ids:[8] +DEBUG 06-24 20:19:51 [manager.py:391] +ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:208.1279754638672ms total_cost_time:208.17279815673828ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9656 prompt_cache_len:5151 prompt_cache_ratio:0.5334507042253521 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 +DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:51 [manager.py:224] router recive req id 8 cost time 0.10926938056945801 s +INFO 06-24 20:19:51 [manager.py:68] detokenization recv req id 8 cost time 0.11123514175415039 s +DEBUG 06-24 20:19:51 [manager.py:391] Prefill Batch: batch_id=55450587156414847949319668924742244432, time:1750767591.4135814s req_ids:[8] +DEBUG 06-24 20:19:51 [manager.py:391] +ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:379.9633979797363ms total_cost_time:380.01084327697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9657 prompt_cache_len:5151 prompt_cache_ratio:0.5333954644299472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 +DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:51 [manager.py:224] router recive req id 8 cost time 0.10783934593200684 s +INFO 06-24 20:19:51 [manager.py:68] detokenization recv req id 8 cost time 0.10993671417236328 s +DEBUG 06-24 20:19:51 [manager.py:391] Prefill Batch: batch_id=289482793539444389640029991272583324012, time:1750767591.796596s req_ids:[8] +DEBUG 06-24 20:19:51 [manager.py:391] +ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:206.32386207580566ms total_cost_time:206.36796951293945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9658 prompt_cache_len:5151 prompt_cache_ratio:0.5333402360737213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 +DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10834980010986328 s +INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.11035370826721191 s +DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=274274551328341439320814498529382498710, time:1750767592.0126216s req_ids:[8] +DEBUG 06-24 20:19:52 [manager.py:391] +ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:210.9549045562744ms total_cost_time:210.9990119934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9659 prompt_cache_len:5151 prompt_cache_ratio:0.5332850191531214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 +DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10761070251464844 s +INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.10961461067199707 s +DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=290258757460850802994861219246012240054, time:1750767592.2294054s req_ids:[8] +DEBUG 06-24 20:19:52 [manager.py:391] +ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:211.78531646728516ms total_cost_time:211.83037757873535ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9660 prompt_cache_len:5151 prompt_cache_ratio:0.5332298136645963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 +DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10919737815856934 s +INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.11133623123168945 s +DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=4057051399639971067341323173359020704, time:1750767592.4452207s req_ids:[8] +DEBUG 06-24 20:19:52 [manager.py:391] +ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:208.7228298187256ms total_cost_time:208.7686061859131ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9661 prompt_cache_len:5151 prompt_cache_ratio:0.5331746196045958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 +DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10785746574401855 s +INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.10957622528076172 s +DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=257679984017375814772329898543858974518, time:1750767592.6610968s req_ids:[8] +DEBUG 06-24 20:19:52 [manager.py:391] +ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:211.94052696228027ms total_cost_time:211.9905948638916ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:9662 prompt_cache_len:5151 prompt_cache_ratio:0.5331194369695715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 +DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.1080777645111084 s +INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.10955238342285156 s +DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=43310941231281132321980867946359944188, time:1750767592.8800669s req_ids:[8] +DEBUG 06-24 20:19:52 [manager.py:391] +ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:209.50651168823242ms total_cost_time:209.55395698547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9663 prompt_cache_len:5151 prompt_cache_ratio:0.5330642657559764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 +DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.10901665687561035 s +INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11037921905517578 s +DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=183327004959039561849445538396153208268, time:1750767593.0960507s req_ids:[8] +DEBUG 06-24 20:19:53 [manager.py:391] +ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:210.74843406677246ms total_cost_time:210.79182624816895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9664 prompt_cache_len:5151 prompt_cache_ratio:0.5330091059602649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 +INFO 06-24 20:19:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:19:53 [statics_utils.py:24] mean first cost: 228.4807292764012 ms +INFO 06-24 20:19:53 [statics_utils.py:24] mean per token cost: 0.07096020363066266 ms +DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.10826587677001953 s +INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11018848419189453 s +DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=61534437760081381791483118735268215302, time:1750767593.311766s req_ids:[8] +DEBUG 06-24 20:19:53 [manager.py:391] +ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:370.9876537322998ms total_cost_time:371.0362911224365ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:9665 prompt_cache_len:5151 prompt_cache_ratio:0.532953957578893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 +DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.1088414192199707 s +INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11076903343200684 s +DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=116500071940017242191780697842006230253, time:1750767593.6870768s req_ids:[8] +DEBUG 06-24 20:19:53 [manager.py:391] +ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:205.92212677001953ms total_cost_time:205.9652805328369ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9666 prompt_cache_len:5151 prompt_cache_ratio:0.5328988206083178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 +DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.10887527465820312 s +INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11103081703186035 s +DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=30854642515604314840476333541084311588, time:1750767593.9008296s req_ids:[8] +DEBUG 06-24 20:19:53 [manager.py:391] +ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:207.45110511779785ms total_cost_time:207.49568939208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9667 prompt_cache_len:5151 prompt_cache_ratio:0.5328436950449984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 +DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10738492012023926 s +INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.10953545570373535 s +DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=226836570111958039524872607850970301475, time:1750767594.1134188s req_ids:[8] +DEBUG 06-24 20:19:54 [manager.py:391] +ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:205.88994026184082ms total_cost_time:205.9330940246582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9668 prompt_cache_len:5151 prompt_cache_ratio:0.5327885808853952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 +DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10756158828735352 s +INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s +DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=82264845266696754626794600739316367045, time:1750767594.3326564s req_ids:[8] +DEBUG 06-24 20:19:54 [manager.py:391] +ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:214.68544006347656ms total_cost_time:214.72930908203125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9669 prompt_cache_len:5151 prompt_cache_ratio:0.5327334781259696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 +DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10922956466674805 s +INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.1119394302368164 s +DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=11008737174155588908555256577496295557, time:1750767594.5472918s req_ids:[8] +DEBUG 06-24 20:19:54 [manager.py:391] +ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:207.41844177246094ms total_cost_time:207.46326446533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9670 prompt_cache_len:5151 prompt_cache_ratio:0.5326783867631851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 +DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10850667953491211 s +INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.11050701141357422 s +DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=33699697577897786755012014422252778798, time:1750767594.7625592s req_ids:[8] +DEBUG 06-24 20:19:54 [manager.py:391] +DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:213.31238746643066ms total_cost_time:213.35816383361816ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9671 prompt_cache_len:5151 prompt_cache_ratio:0.5326233067935063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 +DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10765314102172852 s +INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.10915923118591309 s +DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=257682736714229978696341492184405662504, time:1750767594.980271s req_ids:[8] +DEBUG 06-24 20:19:54 [manager.py:391] +ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:208.3144187927246ms total_cost_time:208.357572555542ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9672 prompt_cache_len:5151 prompt_cache_ratio:0.5325682382133995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 +DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:55 [manager.py:224] router recive req id 8 cost time 0.10796546936035156 s +INFO 06-24 20:19:55 [manager.py:68] detokenization recv req id 8 cost time 0.1099393367767334 s +DEBUG 06-24 20:19:55 [manager.py:391] Prefill Batch: batch_id=122103183999943483690161780774362462390, time:1750767595.1965408s req_ids:[8] +DEBUG 06-24 20:19:55 [manager.py:391] +ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:378.44347953796387ms total_cost_time:378.50141525268555ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:9673 prompt_cache_len:5151 prompt_cache_ratio:0.5325131810193322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 +DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:55 [manager.py:224] router recive req id 8 cost time 0.10935091972351074 s +INFO 06-24 20:19:55 [manager.py:68] detokenization recv req id 8 cost time 0.11134171485900879 s +DEBUG 06-24 20:19:55 [manager.py:391] Prefill Batch: batch_id=151140804814309667086523599918860505684, time:1750767595.5769887s req_ids:[8] +DEBUG 06-24 20:19:55 [manager.py:391] +ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:204.6210765838623ms total_cost_time:204.6670913696289ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9674 prompt_cache_len:5151 prompt_cache_ratio:0.5324581352077734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 +DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:55 [manager.py:224] router recive req id 8 cost time 0.10819649696350098 s +INFO 06-24 20:19:55 [manager.py:68] detokenization recv req id 8 cost time 0.1100771427154541 s +DEBUG 06-24 20:19:55 [manager.py:391] Prefill Batch: batch_id=196112063531861301829800378987176710025, time:1750767595.8050344s req_ids:[8] +DEBUG 06-24 20:19:55 [manager.py:391] +ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:222.7628231048584ms total_cost_time:222.80573844909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9675 prompt_cache_len:5151 prompt_cache_ratio:0.5324031007751938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 +DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.108245849609375 s +INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.1094820499420166 s +DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=49368387533370030219862401609699854618, time:1750767596.0219862s req_ids:[8] +DEBUG 06-24 20:19:56 [manager.py:391] +ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:212.30173110961914ms total_cost_time:212.34607696533203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9676 prompt_cache_len:5151 prompt_cache_ratio:0.5323480777180654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 +DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10780096054077148 s +INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.1098487377166748 s +DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=291426017437452501690655650071724354074, time:1750767596.2438524s req_ids:[8] +DEBUG 06-24 20:19:56 [manager.py:391] +ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:204.5419216156006ms total_cost_time:204.58412170410156ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9677 prompt_cache_len:5151 prompt_cache_ratio:0.5322930660328614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 +DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10704183578491211 s +INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.10912060737609863 s +DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=132331023165531569777583027104103853008, time:1750767596.4543126s req_ids:[8] +DEBUG 06-24 20:19:56 [manager.py:391] +ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:210.95538139343262ms total_cost_time:210.9990119934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9678 prompt_cache_len:5151 prompt_cache_ratio:0.532238065716057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 +DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10843658447265625 s +INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.11041522026062012 s +DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=105535843362200375441152399740705343610, time:1750767596.6680608s req_ids:[8] +DEBUG 06-24 20:19:56 [manager.py:391] +ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:209.0318202972412ms total_cost_time:209.0766429901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9679 prompt_cache_len:5151 prompt_cache_ratio:0.5321830767641286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 +DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10870575904846191 s +INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.11064457893371582 s +DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=316450409325792394578830814589121240415, time:1750767596.8813577s req_ids:[8] +DEBUG 06-24 20:19:56 [manager.py:391] +ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:208.2517147064209ms total_cost_time:208.2967758178711ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9680 prompt_cache_len:5151 prompt_cache_ratio:0.5321280991735537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 +DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.10721921920776367 s +INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.10911226272583008 s +DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=151435867354600278596016892986945710528, time:1750767597.0956845s req_ids:[8] +DEBUG 06-24 20:19:57 [manager.py:391] +ERROR 06-24 20:19:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:378.7839412689209ms total_cost_time:378.8266181945801ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9681 prompt_cache_len:5151 prompt_cache_ratio:0.5320731329408119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 +DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.10887265205383301 s +INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.11090707778930664 s +DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=216428318150012989131646076857912083580, time:1750767597.4785457s req_ids:[8] +DEBUG 06-24 20:19:57 [manager.py:391] +ERROR 06-24 20:19:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 first_token_cost:207.51571655273438ms total_cost_time:207.55982398986816ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9682 prompt_cache_len:5151 prompt_cache_ratio:0.5320181780623838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 +DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.1089317798614502 s +INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.11025166511535645 s +DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=284942597387553562974535114275199676351, time:1750767597.6952114s req_ids:[8] +DEBUG 06-24 20:19:57 [manager.py:391] +ERROR 06-24 20:19:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 first_token_cost:210.41607856750488ms total_cost_time:210.46161651611328ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9683 prompt_cache_len:5151 prompt_cache_ratio:0.5319632345347516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 +DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.10808873176574707 s +INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.10999822616577148 s +DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=314745124465930407969631336400647724769, time:1750767597.9147997s req_ids:[8] +DEBUG 06-24 20:19:57 [manager.py:391] +ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 first_token_cost:212.68510818481445ms total_cost_time:212.73088455200195ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9684 prompt_cache_len:5151 prompt_cache_ratio:0.531908302354399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 +DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10717177391052246 s +INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.1090695858001709 s +DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=298352208137805440241609976595646173245, time:1750767598.1310885s req_ids:[8] +DEBUG 06-24 20:19:58 [manager.py:391] +ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:211.0593318939209ms total_cost_time:211.1051082611084ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9685 prompt_cache_len:5151 prompt_cache_ratio:0.531853381517811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 +DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10740971565246582 s +INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.10944247245788574 s +DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=193257439957698840387376032147910334337, time:1750767598.3481166s req_ids:[8] +DEBUG 06-24 20:19:58 [manager.py:391] +ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:215.25239944458008ms total_cost_time:215.29483795166016ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9686 prompt_cache_len:5151 prompt_cache_ratio:0.5317984720214743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 +DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10818743705749512 s +INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.11005377769470215 s +DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=112530607752741615985389883147609090559, time:1750767598.5670755s req_ids:[8] +DEBUG 06-24 20:19:58 [manager.py:391] +ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:211.22503280639648ms total_cost_time:211.26937866210938ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9687 prompt_cache_len:5151 prompt_cache_ratio:0.5317435738618768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 +DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10843396186828613 s +INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052417755126953 s +DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=88414819670413727865434900862166015933, time:1750767598.7858176s req_ids:[8] +DEBUG 06-24 20:19:58 [manager.py:391] +ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:211.15612983703613ms total_cost_time:211.20142936706543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9688 prompt_cache_len:5151 prompt_cache_ratio:0.5316886870355079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 +DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10820984840393066 s +INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.11040639877319336 s +DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=99584855930640605521930884488617686947, time:1750767599.0019405s req_ids:[8] +DEBUG 06-24 20:19:59 [manager.py:391] +ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:210.21771430969238ms total_cost_time:210.26134490966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9689 prompt_cache_len:5151 prompt_cache_ratio:0.5316338115388585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 +DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:59 [batch.py:51] router release req id 8 +INFO 06-24 20:19:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:59 [manager.py:224] router recive req id 8 cost time 0.10815572738647461 s +INFO 06-24 20:19:59 [manager.py:68] detokenization recv req id 8 cost time 0.11027741432189941 s +DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=3002714152209460191439921782906540841, time:1750767599.2250035s req_ids:[8] +DEBUG 06-24 20:19:59 [manager.py:391] +ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:420.9277629852295ms total_cost_time:420.9721088409424ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9690 prompt_cache_len:5151 prompt_cache_ratio:0.531578947368421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 +DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:59 [manager.py:224] router recive req id 8 cost time 0.10848808288574219 s +INFO 06-24 20:19:59 [manager.py:68] detokenization recv req id 8 cost time 0.11047148704528809 s +DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=28300136114432458160799638570859105825, time:1750767599.6430626s req_ids:[8] +DEBUG 06-24 20:19:59 [manager.py:391] +ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:203.93657684326172ms total_cost_time:203.9799690246582ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9691 prompt_cache_len:5151 prompt_cache_ratio:0.5315240945206893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 +DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:19:59 [manager.py:224] router recive req id 8 cost time 0.10787534713745117 s +INFO 06-24 20:19:59 [manager.py:68] detokenization recv req id 8 cost time 0.1099238395690918 s +DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=294119150604922861310329704527725479104, time:1750767599.8577213s req_ids:[8] +DEBUG 06-24 20:19:59 [manager.py:391] +ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:215.35849571228027ms total_cost_time:215.41595458984375ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:9692 prompt_cache_len:5151 prompt_cache_ratio:0.5314692529921585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 +DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:19:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10817480087280273 s +INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.11006760597229004 s +DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=278389338409202568406036434146731462022, time:1750767600.07603s req_ids:[8] +DEBUG 06-24 20:20:00 [manager.py:391] +ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:201.32923126220703ms total_cost_time:201.3709545135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9693 prompt_cache_len:5151 prompt_cache_ratio:0.5314144227793253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 +DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10808563232421875 s +INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.11000180244445801 s +DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=207063587305631109318568896515575232334, time:1750767600.2866547s req_ids:[8] +DEBUG 06-24 20:20:00 [manager.py:391] +ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:211.42959594726562ms total_cost_time:211.4734649658203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9694 prompt_cache_len:5151 prompt_cache_ratio:0.5313596038786879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 +DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10993242263793945 s +INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.11201906204223633 s +DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=286798964550320642445133678155355526126, time:1750767600.5044332s req_ids:[8] +DEBUG 06-24 20:20:00 [manager.py:391] +ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:210.83378791809082ms total_cost_time:210.8771800994873ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9695 prompt_cache_len:5151 prompt_cache_ratio:0.5313047962867458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 +DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10873270034790039 s +INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100771427154541 s +DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=66789275500910736260065763294017829934, time:1750767600.7201176s req_ids:[8] +DEBUG 06-24 20:20:00 [manager.py:391] +DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:213.00125122070312ms total_cost_time:213.04678916931152ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9696 prompt_cache_len:5151 prompt_cache_ratio:0.53125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 +DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10827422142028809 s +INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.10954117774963379 s +DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=320413334884219757723972715791519580452, time:1750767600.9377885s req_ids:[8] +DEBUG 06-24 20:20:00 [manager.py:391] +DEBUG 06-24 20:20:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 41295.184 tokens/s +DEBUG 06-24 20:20:00 [stats.py:37] Avg prompt tokens throughput: 41286.749 tokens/s +DEBUG 06-24 20:20:00 [stats.py:37] Avg generate tokens throughput: 8.435 tokens/s +ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:207.40675926208496ms total_cost_time:207.45158195495605ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9697 prompt_cache_len:5151 prompt_cache_ratio:0.531195215014953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 +DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.10836577415466309 s +INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s +DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=71626440281056613528981303272735811647, time:1750767601.1539295s req_ids:[8] +DEBUG 06-24 20:20:01 [manager.py:391] +ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:387.91608810424805ms total_cost_time:387.95948028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9698 prompt_cache_len:5151 prompt_cache_ratio:0.5311404413281089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 +DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.10857081413269043 s +INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.11046648025512695 s +DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=317786889022225189547959250526869868959, time:1750767601.5469432s req_ids:[8] +DEBUG 06-24 20:20:01 [manager.py:391] +ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:205.38949966430664ms total_cost_time:205.43217658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9699 prompt_cache_len:5151 prompt_cache_ratio:0.5310856789359728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 +DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.10821413993835449 s +INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.11019349098205566 s +DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=180216853736114142907152478111313433814, time:1750767601.762811s req_ids:[8] +DEBUG 06-24 20:20:01 [manager.py:391] +ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:210.9987735748291ms total_cost_time:211.045503616333ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9700 prompt_cache_len:5151 prompt_cache_ratio:0.5310309278350516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 +DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.1072690486907959 s +INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.10918712615966797 s +DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=116212148515364075124477238192984311207, time:1750767601.9770606s req_ids:[8] +DEBUG 06-24 20:20:01 [manager.py:391] +ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:209.39970016479492ms total_cost_time:209.4428539276123ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9701 prompt_cache_len:5151 prompt_cache_ratio:0.5309761880218534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 +DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.10869288444519043 s +INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.11078715324401855 s +DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=277410707678491262230001769956113740678, time:1750767602.1921508s req_ids:[8] +DEBUG 06-24 20:20:02 [manager.py:391] +ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:211.4722728729248ms total_cost_time:211.5161418914795ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9702 prompt_cache_len:5151 prompt_cache_ratio:0.530921459492888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 +DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.10790181159973145 s +INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.1098783016204834 s +DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=257867785472341609180314496652977881833, time:1750767602.409584s req_ids:[8] +DEBUG 06-24 20:20:02 [manager.py:391] +ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:207.81373977661133ms total_cost_time:207.8573703765869ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9703 prompt_cache_len:5151 prompt_cache_ratio:0.5308667422446666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 +DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.1087956428527832 s +INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s +DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=284368287669520152223162542718497634385, time:1750767602.624502s req_ids:[8] +DEBUG 06-24 20:20:02 [manager.py:391] +ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:209.5036506652832ms total_cost_time:209.54585075378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9704 prompt_cache_len:5151 prompt_cache_ratio:0.5308120362737015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 +DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.10782718658447266 s +INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.10980463027954102 s +DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=75812024211430808579145112321927363575, time:1750767602.850213s req_ids:[8] +DEBUG 06-24 20:20:02 [manager.py:391] +ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:222.06878662109375ms total_cost_time:222.11313247680664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9705 prompt_cache_len:5151 prompt_cache_ratio:0.530757341576507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 +DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.30924367904663086 s +INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.31187939643859863 s +DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=268524233455071503148872553174597338444, time:1750767603.2704127s req_ids:[8] +DEBUG 06-24 20:20:03 [manager.py:391] +ERROR 06-24 20:20:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:419.6591377258301ms total_cost_time:419.70300674438477ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9706 prompt_cache_len:5151 prompt_cache_ratio:0.5307026581495982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 +DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.10849547386169434 s +INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.11052656173706055 s +DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=87058785314371425299572973846072185675, time:1750767603.4954422s req_ids:[8] +DEBUG 06-24 20:20:03 [manager.py:391] +ERROR 06-24 20:20:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 first_token_cost:213.0870819091797ms total_cost_time:213.13023567199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9707 prompt_cache_len:5151 prompt_cache_ratio:0.5306479859894921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 +DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.10860037803649902 s +INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s +DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=180067148754010167147546538725913019379, time:1750767603.7122824s req_ids:[8] +DEBUG 06-24 20:20:03 [manager.py:391] +ERROR 06-24 20:20:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 first_token_cost:211.29751205444336ms total_cost_time:211.33923530578613ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9708 prompt_cache_len:5151 prompt_cache_ratio:0.5305933250927071 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 +DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.10843038558959961 s +INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.11058330535888672 s +DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=320780667185077450600428379004137468125, time:1750767603.9307284s req_ids:[8] +DEBUG 06-24 20:20:03 [manager.py:391] +ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 first_token_cost:212.73422241210938ms total_cost_time:212.77737617492676ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9709 prompt_cache_len:5151 prompt_cache_ratio:0.5305386754557627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 +DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10729622840881348 s +INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.10938119888305664 s +DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=304119155003335547984900864040435993815, time:1750767604.1480935s req_ids:[8] +DEBUG 06-24 20:20:04 [manager.py:391] +ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:211.70282363891602ms total_cost_time:211.745023727417ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9710 prompt_cache_len:5151 prompt_cache_ratio:0.5304840370751802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 +DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10798478126525879 s +INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.1100165843963623 s +DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=57956615167176663607350909813748036224, time:1750767604.3646386s req_ids:[8] +DEBUG 06-24 20:20:04 [manager.py:391] +ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:206.4492702484131ms total_cost_time:206.49361610412598ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9711 prompt_cache_len:5151 prompt_cache_ratio:0.5304294099474822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 +DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10833120346069336 s +INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.11028528213500977 s +DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=197773222691356057595139139091455200404, time:1750767604.577911s req_ids:[8] +DEBUG 06-24 20:20:04 [manager.py:391] +ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:209.0616226196289ms total_cost_time:209.1047763824463ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9712 prompt_cache_len:5151 prompt_cache_ratio:0.5303747940691927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 +DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10735583305358887 s +INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.10958647727966309 s +DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=133865151070505433415795782479915516220, time:1750767604.795446s req_ids:[8] +DEBUG 06-24 20:20:04 [manager.py:391] +ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:404.6018123626709ms total_cost_time:404.6444892883301ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9713 prompt_cache_len:5151 prompt_cache_ratio:0.5303201894368372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 +DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.10837554931640625 s +INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.11059951782226562 s +DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=261384487814129794694582142692399419620, time:1750767605.2019336s req_ids:[8] +DEBUG 06-24 20:20:05 [manager.py:391] +ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:202.57830619812012ms total_cost_time:202.6386260986328ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9714 prompt_cache_len:5151 prompt_cache_ratio:0.5302655960469426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 +DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.11020708084106445 s +INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.11228585243225098 s +DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=142412961228486802287719729961435390319, time:1750767605.4108384s req_ids:[8] +DEBUG 06-24 20:20:05 [manager.py:391] +ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:197.62372970581055ms total_cost_time:197.66592979431152ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9715 prompt_cache_len:5151 prompt_cache_ratio:0.5302110138960371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 +DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.10779213905334473 s +INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.10984182357788086 s +DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=139089787080126564143897494991910015649, time:1750767605.6238022s req_ids:[8] +DEBUG 06-24 20:20:05 [manager.py:391] +ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:217.88859367370605ms total_cost_time:217.93174743652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9716 prompt_cache_len:5151 prompt_cache_ratio:0.5301564429806505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 +DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.10772871971130371 s +INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.10964083671569824 s +DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=20662626919278309107181029170062485971, time:1750767605.8387673s req_ids:[8] +DEBUG 06-24 20:20:05 [manager.py:391] +ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:210.5274200439453ms total_cost_time:210.5717658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9717 prompt_cache_len:5151 prompt_cache_ratio:0.530101883297314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 +DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.10916566848754883 s +INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.11130118370056152 s +DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=275328782566432825580440050425848449424, time:1750767606.0566213s req_ids:[8] +DEBUG 06-24 20:20:06 [manager.py:391] +ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:207.5340747833252ms total_cost_time:207.55457878112793ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:9718 prompt_cache_len:5151 prompt_cache_ratio:0.5300473348425602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 +DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:06 [batch.py:51] router release req id 8 +INFO 06-24 20:20:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.10758185386657715 s +INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.10864138603210449 s +DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=311670395322849918340537004391606645662, time:1750767606.271722s req_ids:[8] +DEBUG 06-24 20:20:06 [manager.py:391] +ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:215.31391143798828ms total_cost_time:215.35921096801758ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9719 prompt_cache_len:5151 prompt_cache_ratio:0.5299927976129232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 +DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.11006689071655273 s +INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.11207365989685059 s +DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=227032008099157770842561474505562710616, time:1750767606.4877062s req_ids:[8] +DEBUG 06-24 20:20:06 [manager.py:391] +ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:208.83965492248535ms total_cost_time:208.88304710388184ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9720 prompt_cache_len:5151 prompt_cache_ratio:0.5299382716049382 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 +DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.10750770568847656 s +INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.1094820499420166 s +DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=167659032782312380567456520451444155887, time:1750767606.7103145s req_ids:[8] +DEBUG 06-24 20:20:06 [manager.py:391] +ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:217.96393394470215ms total_cost_time:218.00780296325684ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9721 prompt_cache_len:5151 prompt_cache_ratio:0.5298837568151424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 +DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.1091160774230957 s +INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s +DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=231589246666996406668481052321002255627, time:1750767606.9278843s req_ids:[8] +DEBUG 06-24 20:20:06 [manager.py:391] +ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:379.05001640319824ms total_cost_time:379.09579277038574ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9722 prompt_cache_len:5151 prompt_cache_ratio:0.529829253240074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 +DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10877776145935059 s +INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.11014008522033691 s +DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=2225338221513613032582196025480608493, time:1750767607.3097916s req_ids:[8] +DEBUG 06-24 20:20:07 [manager.py:391] +ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:201.27224922180176ms total_cost_time:201.31611824035645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9723 prompt_cache_len:5151 prompt_cache_ratio:0.5297747608762727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 +DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10768795013427734 s +INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.10944771766662598 s +DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=98023260160296989839209806430023744184, time:1750767607.5199409s req_ids:[8] +DEBUG 06-24 20:20:07 [manager.py:391] +ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:206.62593841552734ms total_cost_time:206.67243003845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9724 prompt_cache_len:5151 prompt_cache_ratio:0.5297202797202797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 +DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10803031921386719 s +INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.10987663269042969 s +DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=222003945428386060345322280753844824522, time:1750767607.7354288s req_ids:[8] +DEBUG 06-24 20:20:07 [manager.py:391] +ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:206.46286010742188ms total_cost_time:206.50911331176758ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9725 prompt_cache_len:5151 prompt_cache_ratio:0.5296658097686375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 +DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10958075523376465 s +INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.11166143417358398 s +DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=7506650279647652430122012581182043003, time:1750767607.9453754s req_ids:[8] +DEBUG 06-24 20:20:07 [manager.py:391] +ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:209.57708358764648ms total_cost_time:209.62071418762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9726 prompt_cache_len:5151 prompt_cache_ratio:0.5296113510178901 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 +DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10826992988586426 s +INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.11015701293945312 s +DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=320924440453652897726031525962464717816, time:1750767608.162904s req_ids:[8] +DEBUG 06-24 20:20:08 [manager.py:391] +DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:211.73954010009766ms total_cost_time:211.78269386291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9727 prompt_cache_len:5151 prompt_cache_ratio:0.5295569034645832 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 +DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s +INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.11005640029907227 s +DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=128795407385784873548843615826024573754, time:1750767608.3789635s req_ids:[8] +DEBUG 06-24 20:20:08 [manager.py:391] +ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:211.5039825439453ms total_cost_time:211.5485668182373ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9728 prompt_cache_len:5151 prompt_cache_ratio:0.5295024671052632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 +DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10843563079833984 s +INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.1104273796081543 s +DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=71782776674557046093692865703428277706, time:1750767608.5975492s req_ids:[8] +DEBUG 06-24 20:20:08 [manager.py:391] +ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:212.25333213806152ms total_cost_time:212.2976779937744ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9729 prompt_cache_len:5151 prompt_cache_ratio:0.5294480419364785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 +DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10800671577453613 s +INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.10994529724121094 s +DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=73004945621707262405562216788078786334, time:1750767608.816916s req_ids:[8] +DEBUG 06-24 20:20:08 [manager.py:391] +ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:368.8981533050537ms total_cost_time:368.9446449279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9730 prompt_cache_len:5151 prompt_cache_ratio:0.5293936279547791 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 +DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.10924386978149414 s +INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.11092066764831543 s +DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=199719459417463798778382736204090210748, time:1750767609.1886976s req_ids:[8] +DEBUG 06-24 20:20:09 [manager.py:391] +ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:209.80286598205566ms total_cost_time:209.86342430114746ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:9731 prompt_cache_len:5151 prompt_cache_ratio:0.5293392251567156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 +DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.10919904708862305 s +INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.11160755157470703 s +DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=184553118692978129706589994601828174068, time:1750767609.4075675s req_ids:[8] +DEBUG 06-24 20:20:09 [manager.py:391] +ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:209.59734916687012ms total_cost_time:209.6405029296875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9732 prompt_cache_len:5151 prompt_cache_ratio:0.5292848335388409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 +DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.10779762268066406 s +INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.10973834991455078 s +DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=300798638160309421166442466786889989121, time:1750767609.62327s req_ids:[8] +DEBUG 06-24 20:20:09 [manager.py:391] +ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:210.12496948242188ms total_cost_time:210.16764640808105ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9733 prompt_cache_len:5151 prompt_cache_ratio:0.5292304530977088 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 +DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.1069486141204834 s +INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.10860681533813477 s +DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=231850447464200916526537811520732908171, time:1750767609.8438127s req_ids:[8] +DEBUG 06-24 20:20:09 [manager.py:391] +ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:213.02342414855957ms total_cost_time:213.06681632995605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9734 prompt_cache_len:5151 prompt_cache_ratio:0.5291760838298747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 +DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.10913634300231934 s +INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.11109685897827148 s +DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=289202097216434212225687696875073447604, time:1750767610.0580661s req_ids:[8] +DEBUG 06-24 20:20:10 [manager.py:391] +ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:210.85596084594727ms total_cost_time:210.89792251586914ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9735 prompt_cache_len:5151 prompt_cache_ratio:0.5291217257318952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 +DEBUG 06-24 20:20:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.10852670669555664 s +INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.1098325252532959 s +DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=168811509918177182176994056287941076826, time:1750767610.2750983s req_ids:[8] +DEBUG 06-24 20:20:10 [manager.py:391] +ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:209.62786674499512ms total_cost_time:209.6724510192871ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9736 prompt_cache_len:5151 prompt_cache_ratio:0.5290673788003287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 +DEBUG 06-24 20:20:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.1086888313293457 s +INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s +DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=145214871140402672326201844330547131825, time:1750767610.4909701s req_ids:[8] +DEBUG 06-24 20:20:10 [manager.py:391] +ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:212.04280853271484ms total_cost_time:212.08906173706055ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9737 prompt_cache_len:5151 prompt_cache_ratio:0.5290130430317346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 +DEBUG 06-24 20:20:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.10897374153137207 s +INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.11086082458496094 s +DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=66533355568975188491230653003823496247, time:1750767610.715345s req_ids:[8] +DEBUG 06-24 20:20:10 [manager.py:391] +ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:20:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 39689.050 tokens/s +DEBUG 06-24 20:20:10 [stats.py:37] Avg prompt tokens throughput: 39680.784 tokens/s +DEBUG 06-24 20:20:10 [stats.py:37] Avg generate tokens throughput: 8.266 tokens/s +INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:391.3445472717285ms total_cost_time:391.3888931274414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9738 prompt_cache_len:5151 prompt_cache_ratio:0.5289587184226741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 +DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10860300064086914 s +INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.11061906814575195 s +DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=278175583104001119323103361152710516508, time:1750767611.1027768s req_ids:[8] +DEBUG 06-24 20:20:11 [manager.py:391] +ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:205.24024963378906ms total_cost_time:205.28483390808105ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9739 prompt_cache_len:5151 prompt_cache_ratio:0.5289044049697094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 +DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.1078789234161377 s +INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.10990262031555176 s +DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=121969728957628981588249574813757099980, time:1750767611.3190236s req_ids:[8] +DEBUG 06-24 20:20:11 [manager.py:391] +ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:208.62603187561035ms total_cost_time:208.67109298706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9740 prompt_cache_len:5151 prompt_cache_ratio:0.5288501026694045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 +DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10906982421875 s +INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.11112666130065918 s +DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=202613982771872757106878612523922298752, time:1750767611.531273s req_ids:[8] +DEBUG 06-24 20:20:11 [manager.py:391] +ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:207.64994621276855ms total_cost_time:207.69500732421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9741 prompt_cache_len:5151 prompt_cache_ratio:0.5287958115183246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 +DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10886812210083008 s +INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.1108698844909668 s +DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=200415677098959675704101023206881190065, time:1750767611.7458744s req_ids:[8] +DEBUG 06-24 20:20:11 [manager.py:391] +ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:207.60798454284668ms total_cost_time:207.65209197998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9742 prompt_cache_len:5151 prompt_cache_ratio:0.5287415315130364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 +DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10907864570617676 s +INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.11102557182312012 s +DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=133174399326987779142144286260679923670, time:1750767611.9606745s req_ids:[8] +DEBUG 06-24 20:20:11 [manager.py:391] +ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:204.33878898620605ms total_cost_time:204.38265800476074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9743 prompt_cache_len:5151 prompt_cache_ratio:0.5286872626501078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 +DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.10783267021179199 s +INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.10978102684020996 s +DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=37728146127731746141090949004683340721, time:1750767612.1694937s req_ids:[8] +DEBUG 06-24 20:20:12 [manager.py:391] +ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:208.52112770080566ms total_cost_time:208.56499671936035ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9744 prompt_cache_len:5151 prompt_cache_ratio:0.5286330049261084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 +DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.10953593254089355 s +INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.11155128479003906 s +DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=320185153706412132023007155815806705641, time:1750767612.384032s req_ids:[8] +DEBUG 06-24 20:20:12 [manager.py:391] +ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:206.7255973815918ms total_cost_time:206.7697048187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9745 prompt_cache_len:5151 prompt_cache_ratio:0.528578758337609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 +DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.108154296875 s +INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.11012005805969238 s +DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=237416041686495269323906896404812372160, time:1750767612.6001775s req_ids:[8] +DEBUG 06-24 20:20:12 [manager.py:391] +ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:379.03594970703125ms total_cost_time:379.08005714416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9746 prompt_cache_len:5151 prompt_cache_ratio:0.528524522881182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 +DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.1080007553100586 s +INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.10975980758666992 s +DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=154080746999841864462739118397049498419, time:1750767612.9819417s req_ids:[8] +DEBUG 06-24 20:20:12 [manager.py:391] +ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:200.23465156555176ms total_cost_time:200.28042793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9747 prompt_cache_len:5151 prompt_cache_ratio:0.528470298553401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 +DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10796737670898438 s +INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.10976910591125488 s +DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=199767120005644601791785465444193265663, time:1750767613.1892571s req_ids:[8] +DEBUG 06-24 20:20:13 [manager.py:391] +ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:213.3004665374756ms total_cost_time:213.34481239318848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9748 prompt_cache_len:5151 prompt_cache_ratio:0.5284160853508412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 +DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10779476165771484 s +INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.1098330020904541 s +DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=334036386015699906219525108573329421028, time:1750767613.4173067s req_ids:[8] +DEBUG 06-24 20:20:13 [manager.py:391] +ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:215.61431884765625ms total_cost_time:215.66081047058105ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9749 prompt_cache_len:5151 prompt_cache_ratio:0.5283618832700789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 +DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10934042930603027 s +INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.11146330833435059 s +DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=288433600148769554101834860726783745077, time:1750767613.6319258s req_ids:[8] +DEBUG 06-24 20:20:13 [manager.py:391] +ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:209.03301239013672ms total_cost_time:209.0756893157959ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9750 prompt_cache_len:5151 prompt_cache_ratio:0.5283076923076923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 +DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10854721069335938 s +INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.11056923866271973 s +DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=145421800037629520217053451588917517742, time:1750767613.8485296s req_ids:[8] +DEBUG 06-24 20:20:13 [manager.py:391] +ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:211.77244186401367ms total_cost_time:211.81702613830566ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9751 prompt_cache_len:5151 prompt_cache_ratio:0.5282535124602605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 +DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10895276069641113 s +INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.11111855506896973 s +DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=97827461153038952754172966853357628156, time:1750767614.06513s req_ids:[8] +DEBUG 06-24 20:20:14 [manager.py:391] +DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:209.90324020385742ms total_cost_time:209.9473476409912ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9752 prompt_cache_len:5151 prompt_cache_ratio:0.5281993437243643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 +DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10807275772094727 s +INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.11022043228149414 s +DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=303765686469944090660755388017390322347, time:1750767614.2807088s req_ids:[8] +DEBUG 06-24 20:20:14 [manager.py:391] +ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:206.17341995239258ms total_cost_time:206.21728897094727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9753 prompt_cache_len:5151 prompt_cache_ratio:0.5281451860965857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 +DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10798501968383789 s +INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.10979843139648438 s +DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=202412384200120253298412522299552946512, time:1750767614.4897735s req_ids:[8] +DEBUG 06-24 20:20:14 [manager.py:391] +ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:166.57066345214844ms total_cost_time:166.611909866333ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9754 prompt_cache_len:5151 prompt_cache_ratio:0.5280910395735083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 +DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10865497589111328 s +INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.11060643196105957 s +DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=327809107792174381303047551066176707403, time:1750767614.6610632s req_ids:[8] +DEBUG 06-24 20:20:14 [manager.py:391] +ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:360.2261543273926ms total_cost_time:360.27002334594727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9755 prompt_cache_len:5151 prompt_cache_ratio:0.5280369041517171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 +DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10855650901794434 s +INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.11051559448242188 s +DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=117519926021217637261315152730966681155, time:1750767615.0275495s req_ids:[8] +DEBUG 06-24 20:20:15 [manager.py:391] +ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:213.37127685546875ms total_cost_time:213.41514587402344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9756 prompt_cache_len:5151 prompt_cache_ratio:0.5279827798277983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 +DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10777068138122559 s +INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.10980653762817383 s +DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=95861185672663450850397823444368368132, time:1750767615.250367s req_ids:[8] +DEBUG 06-24 20:20:15 [manager.py:391] +ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:205.53994178771973ms total_cost_time:205.58404922485352ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9757 prompt_cache_len:5151 prompt_cache_ratio:0.5279286665983397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 +DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10919451713562012 s +INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.11130881309509277 s +DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=145356293704953526278090116327852215972, time:1750767615.460488s req_ids:[8] +DEBUG 06-24 20:20:15 [manager.py:391] +ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:205.6727409362793ms total_cost_time:205.7168483734131ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9758 prompt_cache_len:5151 prompt_cache_ratio:0.5278745644599303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 +DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10807156562805176 s +INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098935604095459 s +DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=63229927499695075163877259537715446299, time:1750767615.6711202s req_ids:[8] +DEBUG 06-24 20:20:15 [manager.py:391] +ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:206.38346672058105ms total_cost_time:206.42852783203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9759 prompt_cache_len:5151 prompt_cache_ratio:0.5278204734091608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 +DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.1088099479675293 s +INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.11082744598388672 s +DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=215663421535686870401861607311448518734, time:1750767615.8831227s req_ids:[8] +DEBUG 06-24 20:20:15 [manager.py:391] +ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:207.69333839416504ms total_cost_time:207.73816108703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9760 prompt_cache_len:5151 prompt_cache_ratio:0.527766393442623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 +DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.10880136489868164 s +INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.11086702346801758 s +DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=131907548108518511329528664180910140578, time:1750767616.0979292s req_ids:[8] +DEBUG 06-24 20:20:16 [manager.py:391] +ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:207.6129913330078ms total_cost_time:207.6585292816162ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9761 prompt_cache_len:5151 prompt_cache_ratio:0.5277123245569102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 +DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.1086893081665039 s +INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.11069178581237793 s +DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=302371100660683400851857243964768711469, time:1750767616.3113856s req_ids:[8] +DEBUG 06-24 20:20:16 [manager.py:391] +ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 first_token_cost:210.65568923950195ms total_cost_time:210.69979667663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9762 prompt_cache_len:5151 prompt_cache_ratio:0.527658266748617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 +DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.10814595222473145 s +INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.10964632034301758 s +DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=33560323923669897701082508714031620270, time:1750767616.527047s req_ids:[8] +DEBUG 06-24 20:20:16 [manager.py:391] +ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 first_token_cost:375.7026195526123ms total_cost_time:375.748872756958ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9763 prompt_cache_len:5151 prompt_cache_ratio:0.5276042200143398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 +DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.10852193832397461 s +INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.11051058769226074 s +DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=81425868376065752073435208167205443781, time:1750767616.9066594s req_ids:[8] +DEBUG 06-24 20:20:16 [manager.py:391] +INFO 06-24 20:20:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 first_token_cost:207.72123336791992ms total_cost_time:207.76724815368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9764 prompt_cache_len:5151 prompt_cache_ratio:0.5275501843506759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 +DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10856986045837402 s +INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.11054301261901855 s +DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=207371732129449384608120600511036528264, time:1750767617.1217468s req_ids:[8] +DEBUG 06-24 20:20:17 [manager.py:391] +ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:207.09753036499023ms total_cost_time:207.139253616333ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9765 prompt_cache_len:5151 prompt_cache_ratio:0.5274961597542243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 +DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10900259017944336 s +INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.1110084056854248 s +DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=289128333664217103081796475139398685184, time:1750767617.335392s req_ids:[8] +DEBUG 06-24 20:20:17 [manager.py:391] +ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:210.7090950012207ms total_cost_time:210.75153350830078ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9766 prompt_cache_len:5151 prompt_cache_ratio:0.5274421462215851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 +DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10854935646057129 s +INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.11040258407592773 s +DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=75049211106508628423885749013290240635, time:1750767617.551092s req_ids:[8] +DEBUG 06-24 20:20:17 [manager.py:391] +ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:206.88652992248535ms total_cost_time:206.92968368530273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9767 prompt_cache_len:5151 prompt_cache_ratio:0.52738814374936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 +DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10755372047424316 s +INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.10950636863708496 s +DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=246156348777491631219598781280866151910, time:1750767617.7630782s req_ids:[8] +DEBUG 06-24 20:20:17 [manager.py:391] +ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:211.32707595825195ms total_cost_time:211.37213706970215ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9768 prompt_cache_len:5151 prompt_cache_ratio:0.5273341523341524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 +DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.1085352897644043 s +INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.11045718193054199 s +DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=305521883373357154193388552541911291996, time:1750767617.9792042s req_ids:[8] +DEBUG 06-24 20:20:17 [manager.py:391] +ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:205.40976524353027ms total_cost_time:205.45482635498047ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9769 prompt_cache_len:5151 prompt_cache_ratio:0.5272801719725663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 +DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s +INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.10985708236694336 s +DEBUG 06-24 20:20:18 [manager.py:391] Prefill Batch: batch_id=207562563613590190357008297471056069914, time:1750767618.191489s req_ids:[8] +DEBUG 06-24 20:20:18 [manager.py:391] +ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:169.41452026367188ms total_cost_time:169.45695877075195ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9770 prompt_cache_len:5151 prompt_cache_ratio:0.5272262026612078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 +DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.31064558029174805 s +INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.312760591506958 s +DEBUG 06-24 20:20:18 [manager.py:391] Prefill Batch: batch_id=125098793639138365764655329189827525280, time:1750767618.5791636s req_ids:[8] +DEBUG 06-24 20:20:18 [manager.py:391] +ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:422.19018936157227ms total_cost_time:422.23453521728516ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9771 prompt_cache_len:5151 prompt_cache_ratio:0.5271722443966841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 +DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.10791611671447754 s +INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.1103813648223877 s +DEBUG 06-24 20:20:18 [manager.py:391] Prefill Batch: batch_id=97105121925774154059657236269240374964, time:1750767618.794579s req_ids:[8] +DEBUG 06-24 20:20:18 [manager.py:391] +ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:206.85791969299316ms total_cost_time:206.91680908203125ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9772 prompt_cache_len:5151 prompt_cache_ratio:0.5271182971756038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 +DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.10720038414001465 s +INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.10927534103393555 s +DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=107320750101515810262680710997068668178, time:1750767619.0118597s req_ids:[8] +DEBUG 06-24 20:20:19 [manager.py:391] +ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:212.55135536193848ms total_cost_time:212.59665489196777ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9773 prompt_cache_len:5151 prompt_cache_ratio:0.5270643609945769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 +DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10910272598266602 s +INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.11104989051818848 s +DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=70653187323093500766273813701306117462, time:1750767619.22569s req_ids:[8] +DEBUG 06-24 20:20:19 [manager.py:391] +ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:208.84418487548828ms total_cost_time:208.88805389404297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9774 prompt_cache_len:5151 prompt_cache_ratio:0.5270104358502149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 +DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10796141624450684 s +INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.10993146896362305 s +DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=32355482779220167620655311851573586916, time:1750767619.4394698s req_ids:[8] +DEBUG 06-24 20:20:19 [manager.py:391] +ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:213.37580680847168ms total_cost_time:213.42110633850098ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9775 prompt_cache_len:5151 prompt_cache_ratio:0.5269565217391304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 +DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10910892486572266 s +INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.11127471923828125 s +DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=17853183955154101609264010542592806431, time:1750767619.6577818s req_ids:[8] +DEBUG 06-24 20:20:19 [manager.py:391] +ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:206.62450790405273ms total_cost_time:206.66742324829102ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9776 prompt_cache_len:5151 prompt_cache_ratio:0.5269026186579379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 +DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:19 [batch.py:51] router release req id 8 +DEBUG 06-24 20:20:19 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:19 [manager.py:283] +DEBUG 06-24 20:20:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:19 [manager.py:284] +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10840702056884766 s +INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.11057090759277344 s +DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=273764909050809210803730026898423823178, time:1750767619.8759189s req_ids:[8] +DEBUG 06-24 20:20:19 [manager.py:391] +ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:213.5486602783203ms total_cost_time:213.5915756225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9777 prompt_cache_len:5151 prompt_cache_ratio:0.5268487266032525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 +DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.10806488990783691 s +INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.1100611686706543 s +DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=56420925025816450662159351236814640231, time:1750767620.0921218s req_ids:[8] +DEBUG 06-24 20:20:20 [manager.py:391] +ERROR 06-24 20:20:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:422.2087860107422ms total_cost_time:422.2533702850342ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9778 prompt_cache_len:5151 prompt_cache_ratio:0.5267948455716915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 +DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.10817432403564453 s +INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s +DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=136265769951535728799517873709343292744, time:1750767620.5133588s req_ids:[8] +DEBUG 06-24 20:20:20 [manager.py:391] +ERROR 06-24 20:20:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 first_token_cost:200.76370239257812ms total_cost_time:200.80900192260742ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9779 prompt_cache_len:5151 prompt_cache_ratio:0.5267409755598732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 +DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s +INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s +DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=204351107882877299643086804897031632554, time:1750767620.7296488s req_ids:[8] +DEBUG 06-24 20:20:20 [manager.py:391] +ERROR 06-24 20:20:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 first_token_cost:209.22470092773438ms total_cost_time:209.27000045776367ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9780 prompt_cache_len:5151 prompt_cache_ratio:0.5266871165644171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 +DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.10907840728759766 s +INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.1112210750579834 s +DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=125110857008695838466598375136438888116, time:1750767620.9396293s req_ids:[8] +DEBUG 06-24 20:20:20 [manager.py:391] +ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:20:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 41768.579 tokens/s +DEBUG 06-24 20:20:21 [stats.py:37] Avg prompt tokens throughput: 41760.021 tokens/s +DEBUG 06-24 20:20:21 [stats.py:37] Avg generate tokens throughput: 8.557 tokens/s +INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 first_token_cost:209.86390113830566ms total_cost_time:209.90872383117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9781 prompt_cache_len:5151 prompt_cache_ratio:0.5266332685819446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 +DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.10857534408569336 s +INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11080789566040039 s +DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=157585264792120494257916827978896740898, time:1750767621.154516s req_ids:[8] +DEBUG 06-24 20:20:21 [manager.py:391] +ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:207.93461799621582ms total_cost_time:207.9787254333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9782 prompt_cache_len:5151 prompt_cache_ratio:0.5265794316090779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 +DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.1088714599609375 s +INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11101698875427246 s +DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=17677222310119034072956419658191149520, time:1750767621.3689423s req_ids:[8] +DEBUG 06-24 20:20:21 [manager.py:391] +DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:208.0848217010498ms total_cost_time:208.1305980682373ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9783 prompt_cache_len:5151 prompt_cache_ratio:0.526525605642441 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 +DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.10829401016235352 s +INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s +DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=321994167095462075870398911145069528169, time:1750767621.5824456s req_ids:[8] +DEBUG 06-24 20:20:21 [manager.py:391] +ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:209.20157432556152ms total_cost_time:209.244966506958ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9784 prompt_cache_len:5151 prompt_cache_ratio:0.526471790678659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 +DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.10802173614501953 s +INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11006283760070801 s +DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=116340645852147583221677812670388553045, time:1750767621.8004498s req_ids:[8] +DEBUG 06-24 20:20:21 [manager.py:391] +ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:207.3664665222168ms total_cost_time:207.4110507965088ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9785 prompt_cache_len:5151 prompt_cache_ratio:0.5264179867143587 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 +DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10824036598205566 s +INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11017704010009766 s +DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=208883472621056742786344414080557084607, time:1750767622.011575s req_ids:[8] +DEBUG 06-24 20:20:22 [manager.py:391] +ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:212.92519569396973ms total_cost_time:212.9685878753662ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9786 prompt_cache_len:5151 prompt_cache_ratio:0.5263641937461679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 +DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10841703414916992 s +INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11057090759277344 s +DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=103049623068262838665217529845026182488, time:1750767622.2284205s req_ids:[8] +DEBUG 06-24 20:20:22 [manager.py:391] +ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:371.8533515930176ms total_cost_time:371.89745903015137ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9787 prompt_cache_len:5151 prompt_cache_ratio:0.5263104117707162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 +DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10903811454772949 s +INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11100912094116211 s +DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=86248591457833197710644900335035567232, time:1750767622.604795s req_ids:[8] +DEBUG 06-24 20:20:22 [manager.py:391] +ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:213.3958339691162ms total_cost_time:213.4389877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9788 prompt_cache_len:5151 prompt_cache_ratio:0.5262566407846343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 +DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10881710052490234 s +INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11091732978820801 s +DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=76905037032621645064775549485879512866, time:1750767622.824462s req_ids:[8] +DEBUG 06-24 20:20:22 [manager.py:391] +ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:206.085205078125ms total_cost_time:206.1288356781006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9789 prompt_cache_len:5151 prompt_cache_ratio:0.5262028807845541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 +DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10819768905639648 s +INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.11032652854919434 s +DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=150434956329889859531696042569191554330, time:1750767623.0371916s req_ids:[8] +DEBUG 06-24 20:20:23 [manager.py:391] +ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:210.74604988098145ms total_cost_time:210.79277992248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9790 prompt_cache_len:5151 prompt_cache_ratio:0.5261491317671093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 +DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:23 [batch.py:51] router release req id 8 +INFO 06-24 20:20:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:20:23 [statics_utils.py:24] mean first cost: 228.55497230674405 ms +INFO 06-24 20:20:23 [statics_utils.py:24] mean per token cost: 0.07034210481983891 ms +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10877752304077148 s +INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.11068272590637207 s +DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=326957584174311689796673248110270754317, time:1750767623.2515244s req_ids:[8] +DEBUG 06-24 20:20:23 [manager.py:391] +ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:207.98492431640625ms total_cost_time:208.02807807922363ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9791 prompt_cache_len:5151 prompt_cache_ratio:0.5260953937289348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 +DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10785365104675293 s +INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.10973358154296875 s +DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=174811010164042578254361644033611800985, time:1750767623.4669185s req_ids:[8] +DEBUG 06-24 20:20:23 [manager.py:391] +ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:206.30288124084473ms total_cost_time:206.34770393371582ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9792 prompt_cache_len:5151 prompt_cache_ratio:0.5260416666666666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 +DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10794401168823242 s +INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.10971474647521973 s +DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=318824003125236192598000679272060087072, time:1750767623.6812394s req_ids:[8] +DEBUG 06-24 20:20:23 [manager.py:391] +ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:212.48364448547363ms total_cost_time:212.52703666687012ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9793 prompt_cache_len:5151 prompt_cache_ratio:0.5259879505769427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 +DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10878515243530273 s +INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.1108546257019043 s +DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=48151011333779218242864024445519024527, time:1750767623.8955107s req_ids:[8] +DEBUG 06-24 20:20:23 [manager.py:391] +ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:208.18185806274414ms total_cost_time:208.22525024414062ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9794 prompt_cache_len:5151 prompt_cache_ratio:0.5259342454564019 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 +DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10798263549804688 s +INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.11023354530334473 s +DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=147640214283413526253516770315794689703, time:1750767624.1231794s req_ids:[8] +DEBUG 06-24 20:20:24 [manager.py:391] +ERROR 06-24 20:20:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:390.77186584472656ms total_cost_time:390.81764221191406ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9795 prompt_cache_len:5151 prompt_cache_ratio:0.5258805513016845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 +DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10931658744812012 s +INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s +DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=132185907965054953535872976648421573318, time:1750767624.505492s req_ids:[8] +DEBUG 06-24 20:20:24 [manager.py:391] +ERROR 06-24 20:20:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 first_token_cost:207.5040340423584ms total_cost_time:207.54766464233398ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9796 prompt_cache_len:5151 prompt_cache_ratio:0.5258268681094325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 +DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10922741889953613 s +INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.11113548278808594 s +DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=93988963094899952369079101234938533334, time:1750767624.720883s req_ids:[8] +DEBUG 06-24 20:20:24 [manager.py:391] +ERROR 06-24 20:20:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 first_token_cost:210.81089973449707ms total_cost_time:210.85572242736816ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9797 prompt_cache_len:5151 prompt_cache_ratio:0.5257731958762887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 +DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10846304893493652 s +INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.11062431335449219 s +DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=93796764862019116683755273855541655499, time:1750767624.9371994s req_ids:[8] +DEBUG 06-24 20:20:24 [manager.py:391] +ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 first_token_cost:208.13298225402832ms total_cost_time:208.1770896911621ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9798 prompt_cache_len:5151 prompt_cache_ratio:0.5257195345988978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 +DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10859107971191406 s +INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.1105349063873291 s +DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=47472019500088227784608418801057187546, time:1750767625.1513755s req_ids:[8] +DEBUG 06-24 20:20:25 [manager.py:391] +ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:208.73618125915527ms total_cost_time:208.78076553344727ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9799 prompt_cache_len:5151 prompt_cache_ratio:0.5256658842739055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 +DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10790371894836426 s +INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.10987162590026855 s +DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=111426244481711219880549100700927874793, time:1750767625.3656385s req_ids:[8] +DEBUG 06-24 20:20:25 [manager.py:391] +ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:209.18893814086914ms total_cost_time:209.23328399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9800 prompt_cache_len:5151 prompt_cache_ratio:0.5256122448979592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 +DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10913419723510742 s +INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.1111152172088623 s +DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=54153523113677443566912752204703749372, time:1750767625.5809858s req_ids:[8] +DEBUG 06-24 20:20:25 [manager.py:391] +ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:211.08579635620117ms total_cost_time:211.13133430480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9801 prompt_cache_len:5151 prompt_cache_ratio:0.5255586164677074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 +DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10812163352966309 s +INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.11011171340942383 s +DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=90066343870038748186176984786366386990, time:1750767625.7977364s req_ids:[8] +DEBUG 06-24 20:20:25 [manager.py:391] +ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:210.6034755706787ms total_cost_time:210.6480598449707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9802 prompt_cache_len:5151 prompt_cache_ratio:0.5255049989798001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 +DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.10855221748352051 s +INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.11051201820373535 s +DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=170388547711268976016585658125538062117, time:1750767626.01283s req_ids:[8] +DEBUG 06-24 20:20:26 [manager.py:391] +ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:391.2055492401123ms total_cost_time:391.2489414215088ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9803 prompt_cache_len:5151 prompt_cache_ratio:0.5254513924308885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 +DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.10794734954833984 s +INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.10990118980407715 s +DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=272922261260460602336001289038308907645, time:1750767626.4105554s req_ids:[8] +DEBUG 06-24 20:20:26 [manager.py:391] +ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:202.11482048034668ms total_cost_time:202.1770477294922ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:9804 prompt_cache_len:5151 prompt_cache_ratio:0.5253977968176254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 +DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.10906267166137695 s +INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.11021757125854492 s +DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=245039867733681188862055160217377739558, time:1750767626.6206322s req_ids:[8] +DEBUG 06-24 20:20:26 [manager.py:391] +ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:220.2756404876709ms total_cost_time:220.3207015991211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9805 prompt_cache_len:5151 prompt_cache_ratio:0.5253442121366649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 +DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.1088247299194336 s +INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.11078596115112305 s +DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=293456262429453376833136859382916705405, time:1750767626.844889s req_ids:[8] +DEBUG 06-24 20:20:26 [manager.py:391] +ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:200.64330101013184ms total_cost_time:200.68740844726562ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9806 prompt_cache_len:5151 prompt_cache_ratio:0.5252906383846625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 +DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.1080634593963623 s +INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.11000776290893555 s +DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=160751587742383152512342875336158542690, time:1750767627.0526578s req_ids:[8] +DEBUG 06-24 20:20:27 [manager.py:391] +ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:208.7228298187256ms total_cost_time:208.76693725585938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9807 prompt_cache_len:5151 prompt_cache_ratio:0.5252370755582747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 +DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10763049125671387 s +INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.10952997207641602 s +DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=339069058622311765922511838405776414884, time:1750767627.2667212s req_ids:[8] +DEBUG 06-24 20:20:27 [manager.py:391] +DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:201.65324211120605ms total_cost_time:201.69591903686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9808 prompt_cache_len:5151 prompt_cache_ratio:0.5251835236541599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 +DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10782146453857422 s +INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.10980725288391113 s +DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=193403816358995635362265405681291672318, time:1750767627.4749835s req_ids:[8] +DEBUG 06-24 20:20:27 [manager.py:391] +ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:206.1011791229248ms total_cost_time:206.1469554901123ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9809 prompt_cache_len:5151 prompt_cache_ratio:0.5251299826689775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 +DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10819411277770996 s +INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s +DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=286261063991233896328525595859785562276, time:1750767627.6866248s req_ids:[8] +DEBUG 06-24 20:20:27 [manager.py:391] +ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:207.92841911315918ms total_cost_time:207.97085762023926ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9810 prompt_cache_len:5151 prompt_cache_ratio:0.5250764525993884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 +DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s +INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.11008405685424805 s +DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=340240703996619726981146317295080010198, time:1750767627.9008553s req_ids:[8] +DEBUG 06-24 20:20:27 [manager.py:391] +ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:409.4376564025879ms total_cost_time:409.4820022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9811 prompt_cache_len:5151 prompt_cache_ratio:0.5250229334420549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 +DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10784292221069336 s +INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.10868310928344727 s +DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=286847843633475391638477118772544232958, time:1750767628.3136668s req_ids:[8] +DEBUG 06-24 20:20:28 [manager.py:391] +ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:162.4279022216797ms total_cost_time:162.47153282165527ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9812 prompt_cache_len:5151 prompt_cache_ratio:0.5249694251936404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 +DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s +INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.1105203628540039 s +DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=166152652480891661699124313696404818514, time:1750767628.4819105s req_ids:[8] +DEBUG 06-24 20:20:28 [manager.py:391] +ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:198.99702072143555ms total_cost_time:199.0518569946289ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:9813 prompt_cache_len:5151 prompt_cache_ratio:0.5249159278508102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 +DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10885071754455566 s +INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.11068987846374512 s +DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=320224752019504705425281495072960923808, time:1750767628.6958935s req_ids:[8] +DEBUG 06-24 20:20:28 [manager.py:391] +ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:211.95316314697266ms total_cost_time:211.99822425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9814 prompt_cache_len:5151 prompt_cache_ratio:0.5248624414102303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 +DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10910892486572266 s +INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.11098527908325195 s +DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=141411309940188619781739269547183197588, time:1750767628.9102902s req_ids:[8] +DEBUG 06-24 20:20:28 [manager.py:391] +ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:207.40413665771484ms total_cost_time:207.45611190795898ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:9815 prompt_cache_len:5151 prompt_cache_ratio:0.5248089658685685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 +DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10896062850952148 s +INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.11025834083557129 s +DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=335251184606819733764496071821458711383, time:1750767629.126696s req_ids:[8] +DEBUG 06-24 20:20:29 [manager.py:391] +ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:215.1801586151123ms total_cost_time:215.2249813079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9816 prompt_cache_len:5151 prompt_cache_ratio:0.5247555012224939 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 +DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.11427855491638184 s +INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.1157076358795166 s +DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=63116750155687860411820697032352944097, time:1750767629.343398s req_ids:[8] +DEBUG 06-24 20:20:29 [manager.py:391] +ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:209.70535278320312ms total_cost_time:209.80429649353027ms,out_token_counter:1 mean_per_token_cost_time: 0.09894371032714844ms prompt_token_num:9817 prompt_cache_len:5151 prompt_cache_ratio:0.5247020474686768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 +DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10829758644104004 s +INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.1102914810180664 s +DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=297419112478891465939957762744821745548, time:1750767629.558996s req_ids:[8] +DEBUG 06-24 20:20:29 [manager.py:391] +ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:209.69343185424805ms total_cost_time:209.74016189575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9818 prompt_cache_len:5151 prompt_cache_ratio:0.5246486046037889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 +DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10995650291442871 s +INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.11198091506958008 s +DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=97266016639221576983154022186993573303, time:1750767629.7779675s req_ids:[8] +DEBUG 06-24 20:20:29 [manager.py:391] +ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:211.41672134399414ms total_cost_time:211.46059036254883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9819 prompt_cache_len:5151 prompt_cache_ratio:0.5245951726245035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 +DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10900735855102539 s +INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.11094880104064941 s +DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=246860284430075259600618888057837398864, time:1750767629.9944608s req_ids:[8] +DEBUG 06-24 20:20:29 [manager.py:391] +ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:369.89665031433105ms total_cost_time:369.9531555175781ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:9820 prompt_cache_len:5151 prompt_cache_ratio:0.5245417515274949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 +DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:30 [manager.py:224] router recive req id 8 cost time 0.10876083374023438 s +INFO 06-24 20:20:30 [manager.py:68] detokenization recv req id 8 cost time 0.11068248748779297 s +DEBUG 06-24 20:20:30 [manager.py:391] Prefill Batch: batch_id=149708187322455662487276173552151934686, time:1750767630.3696084s req_ids:[8] +DEBUG 06-24 20:20:30 [manager.py:391] +ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:208.54949951171875ms total_cost_time:208.59408378601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9821 prompt_cache_len:5151 prompt_cache_ratio:0.524488341309439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 +DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:30 [manager.py:224] router recive req id 8 cost time 0.11060047149658203 s +INFO 06-24 20:20:30 [manager.py:68] detokenization recv req id 8 cost time 0.11270356178283691 s +DEBUG 06-24 20:20:30 [manager.py:391] Prefill Batch: batch_id=67683242613066182270213141512976074559, time:1750767630.5833068s req_ids:[8] +DEBUG 06-24 20:20:30 [manager.py:391] +ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:209.77544784545898ms total_cost_time:209.82003211975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9822 prompt_cache_len:5151 prompt_cache_ratio:0.5244349419670128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 +DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:30 [manager.py:224] router recive req id 8 cost time 0.1075754165649414 s +INFO 06-24 20:20:30 [manager.py:68] detokenization recv req id 8 cost time 0.1095583438873291 s +DEBUG 06-24 20:20:30 [manager.py:391] Prefill Batch: batch_id=229364621618973955746910840269263571553, time:1750767630.7986398s req_ids:[8] +DEBUG 06-24 20:20:30 [manager.py:391] +ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:209.47694778442383ms total_cost_time:209.5181941986084ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9823 prompt_cache_len:5151 prompt_cache_ratio:0.5243815534968951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 +DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10992312431335449 s +INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11184287071228027 s +DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=152587293620394071600994084970168883214, time:1750767631.0144174s req_ids:[8] +DEBUG 06-24 20:20:31 [manager.py:391] +ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 +DEBUG 06-24 20:20:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 41835.348 tokens/s +DEBUG 06-24 20:20:31 [stats.py:37] Avg prompt tokens throughput: 41826.814 tokens/s +DEBUG 06-24 20:20:31 [stats.py:37] Avg generate tokens throughput: 8.533 tokens/s +INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:211.57550811767578ms total_cost_time:211.64393424987793ms,out_token_counter:1 mean_per_token_cost_time: 0.06842613220214844ms prompt_token_num:9824 prompt_cache_len:5151 prompt_cache_ratio:0.5243281758957655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 +DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10873150825500488 s +INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11069369316101074 s +DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=83546231035720741649149733392079134143, time:1750767631.243892s req_ids:[8] +DEBUG 06-24 20:20:31 [manager.py:391] +ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:224.42126274108887ms total_cost_time:224.46656227111816ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9825 prompt_cache_len:5151 prompt_cache_ratio:0.5242748091603053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 +DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10757112503051758 s +INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.10963869094848633 s +DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=14564678536693774603978609421203651799, time:1750767631.4625828s req_ids:[8] +DEBUG 06-24 20:20:31 [manager.py:391] +ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:209.43951606750488ms total_cost_time:209.48386192321777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9826 prompt_cache_len:5151 prompt_cache_ratio:0.5242214532871973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 +DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.1086735725402832 s +INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11070656776428223 s +DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=296145160860894103250871002745783382680, time:1750767631.6775823s req_ids:[8] +DEBUG 06-24 20:20:31 [manager.py:391] +ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:210.13450622558594ms total_cost_time:210.1762294769287ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9827 prompt_cache_len:5151 prompt_cache_ratio:0.5241681082731251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 +DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10884523391723633 s +INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11079788208007812 s +DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=291404921401300082657533418019495111242, time:1750767631.8933513s req_ids:[8] +DEBUG 06-24 20:20:31 [manager.py:391] +ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:375.4565715789795ms total_cost_time:375.5018711090088ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9828 prompt_cache_len:5151 prompt_cache_ratio:0.5241147741147741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 +DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10831618309020996 s +INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11039400100708008 s +DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=305031747116538201747409735416777483557, time:1750767632.2716286s req_ids:[8] +DEBUG 06-24 20:20:32 [manager.py:391] +ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:203.37772369384766ms total_cost_time:203.42373847961426ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9829 prompt_cache_len:5151 prompt_cache_ratio:0.524061450808831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 +DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s +INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11006355285644531 s +DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=57136484573265414604000683062063195642, time:1750767632.4890945s req_ids:[8] +DEBUG 06-24 20:20:32 [manager.py:391] +ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:213.72008323669434ms total_cost_time:213.76609802246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9830 prompt_cache_len:5151 prompt_cache_ratio:0.5240081383519837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 +DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10954952239990234 s +INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11150813102722168 s +DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=16984100778801417762478380666952846194, time:1750767632.7032015s req_ids:[8] +DEBUG 06-24 20:20:32 [manager.py:391] +ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:206.99238777160645ms total_cost_time:207.03721046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9831 prompt_cache_len:5151 prompt_cache_ratio:0.5239548367409216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 +DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10895872116088867 s +INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11083984375 s +DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=103827813637938114863672103537791564273, time:1750767632.9181502s req_ids:[8] +DEBUG 06-24 20:20:32 [manager.py:391] +ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:207.33904838562012ms total_cost_time:207.3826789855957ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9832 prompt_cache_len:5151 prompt_cache_ratio:0.5239015459723352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 +DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.10949373245239258 s +INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.11147117614746094 s +DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=269713370630256404373954428594009822731, time:1750767633.1315007s req_ids:[8] +DEBUG 06-24 20:20:33 [manager.py:391] +DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:209.05303955078125ms total_cost_time:209.09643173217773ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9833 prompt_cache_len:5151 prompt_cache_ratio:0.5238482660429167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 +DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.10938763618469238 s +INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.11140036582946777 s +DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=331487190744281758171180091463261590600, time:1750767633.3457603s req_ids:[8] +DEBUG 06-24 20:20:33 [manager.py:391] +ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:202.5470733642578ms total_cost_time:202.592134475708ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9834 prompt_cache_len:5151 prompt_cache_ratio:0.5237949969493594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 +DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.10939407348632812 s +INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.11140275001525879 s +DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=141709234723696794358221923432692018043, time:1750767633.5555518s req_ids:[8] +DEBUG 06-24 20:20:33 [manager.py:391] +ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:207.5185775756836ms total_cost_time:207.5653076171875ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9835 prompt_cache_len:5151 prompt_cache_ratio:0.5237417386883579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 +DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.3118305206298828 s +INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.31439781188964844 s +DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=44593544588382884354724034125251082542, time:1750767633.9745944s req_ids:[8] +DEBUG 06-24 20:20:33 [manager.py:391] +ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:416.4111614227295ms total_cost_time:416.4576530456543ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9836 prompt_cache_len:5151 prompt_cache_ratio:0.5236884912566083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 +DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.10799813270568848 s +INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s +DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=278173540455071295297888750275793111741, time:1750767634.1925979s req_ids:[8] +DEBUG 06-24 20:20:34 [manager.py:391] +ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:210.25586128234863ms total_cost_time:210.30068397521973ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9837 prompt_cache_len:5151 prompt_cache_ratio:0.5236352546508082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 +DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.10789370536804199 s +INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.10988140106201172 s +DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=121175320744780678283327625287460900507, time:1750767634.4076784s req_ids:[8] +DEBUG 06-24 20:20:34 [manager.py:391] +ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:206.35294914245605ms total_cost_time:206.39824867248535ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9838 prompt_cache_len:5151 prompt_cache_ratio:0.523582028867656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 +DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.10833621025085449 s +INFO 06-24 20:20:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.11023712158203125 s +DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=316282947894223674005178801565186239309, time:1750767634.622619s req_ids:[8] +DEBUG 06-24 20:20:34 [manager.py:391] +DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:211.2584114074707ms total_cost_time:211.3046646118164ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9839 prompt_cache_len:5151 prompt_cache_ratio:0.523528813903852 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 +DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.11083984375 s +INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.11342048645019531 s +DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=217072354813086774953979272487721024322, time:1750767634.8397508s req_ids:[8] +DEBUG 06-24 20:20:34 [manager.py:391] +ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:209.8526954650879ms total_cost_time:209.8991870880127ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9840 prompt_cache_len:5151 prompt_cache_ratio:0.5234756097560975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 +DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.10886096954345703 s +INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11093449592590332 s +DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=339370780111758554673234968269496834561, time:1750767635.053557s req_ids:[8] +DEBUG 06-24 20:20:35 [manager.py:391] +ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:203.6585807800293ms total_cost_time:203.70244979858398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9841 prompt_cache_len:5151 prompt_cache_ratio:0.5234224164210954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 +DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.1092061996459961 s +INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11118865013122559 s +DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=325788816525945214746441594029746809847, time:1750767635.2632673s req_ids:[8] +DEBUG 06-24 20:20:35 [manager.py:391] +ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:206.4645290374756ms total_cost_time:206.50744438171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9842 prompt_cache_len:5151 prompt_cache_ratio:0.5233692338955497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 +DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.1087639331817627 s +INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11079978942871094 s +DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=161422872285320130485490148285864108616, time:1750767635.4776294s req_ids:[8] +DEBUG 06-24 20:20:35 [manager.py:391] +ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:380.0981044769287ms total_cost_time:380.1426887512207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9843 prompt_cache_len:5151 prompt_cache_ratio:0.5233160621761658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 +DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.10795402526855469 s +INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11012721061706543 s +DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=188554156942973155624809852656070165639, time:1750767635.859789s req_ids:[8] +DEBUG 06-24 20:20:35 [manager.py:391] +ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:205.65009117126465ms total_cost_time:205.69229125976562ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9844 prompt_cache_len:5151 prompt_cache_ratio:0.5232629012596506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 +DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10892796516418457 s +INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.11101317405700684 s +DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=259447047004667800648840476073097527162, time:1750767636.0749018s req_ids:[8] +DEBUG 06-24 20:20:36 [manager.py:391] +ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:206.67529106140137ms total_cost_time:206.71844482421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9845 prompt_cache_len:5151 prompt_cache_ratio:0.5232097511427121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 +DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10952162742614746 s +INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.11075425148010254 s +DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=252974452959142126170074782789905306596, time:1750767636.2868354s req_ids:[8] +DEBUG 06-24 20:20:36 [manager.py:391] +ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:207.23557472229004ms total_cost_time:207.27920532226562ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9846 prompt_cache_len:5151 prompt_cache_ratio:0.5231566118220597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 +DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10776901245117188 s +INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.10902929306030273 s +DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=186829927064031739364695002353175291408, time:1750767636.4999228s req_ids:[8] +DEBUG 06-24 20:20:36 [manager.py:391] +ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:207.66520500183105ms total_cost_time:207.71098136901855ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9847 prompt_cache_len:5151 prompt_cache_ratio:0.5231034832944044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 +DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10792040824890137 s +INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.10983085632324219 s +DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=127039352766353270495954388025317877139, time:1750767636.7140849s req_ids:[8] +DEBUG 06-24 20:20:36 [manager.py:391] +ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:206.35581016540527ms total_cost_time:206.39944076538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9848 prompt_cache_len:5151 prompt_cache_ratio:0.5230503655564581 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 +DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s +INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.10997605323791504 s +DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=175289394642325575729716716597674809733, time:1750767636.9314957s req_ids:[8] +DEBUG 06-24 20:20:36 [manager.py:391] +ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:214.19239044189453ms total_cost_time:214.23602104187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9849 prompt_cache_len:5151 prompt_cache_ratio:0.5229972586049345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 +DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.1087644100189209 s +INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.11075115203857422 s +DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=93794999698347656964797352548668484564, time:1750767637.1472352s req_ids:[8] +DEBUG 06-24 20:20:37 [manager.py:391] +ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:210.11734008789062ms total_cost_time:210.1614475250244ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9850 prompt_cache_len:5151 prompt_cache_ratio:0.5229441624365482 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 +DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.1088860034942627 s +INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.1111443042755127 s +DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=51006539445990156205023680618176401893, time:1750767637.366126s req_ids:[8] +DEBUG 06-24 20:20:37 [manager.py:391] +ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:212.16797828674316ms total_cost_time:212.21303939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9851 prompt_cache_len:5151 prompt_cache_ratio:0.5228910770480154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 +DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.10804510116577148 s +INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.10994410514831543 s +DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=87152793400464007447782248749656807151, time:1750767637.5794404s req_ids:[8] +DEBUG 06-24 20:20:37 [manager.py:391] +ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:380.48696517944336ms total_cost_time:380.53178787231445ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9852 prompt_cache_len:5151 prompt_cache_ratio:0.5228380024360536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 +DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.10836648941040039 s +INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s +DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=6912084223442481052783565917609126859, time:1750767637.9647207s req_ids:[8] +DEBUG 06-24 20:20:37 [manager.py:391] +ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:208.65392684936523ms total_cost_time:208.69803428649902ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9853 prompt_cache_len:5151 prompt_cache_ratio:0.5227849385973815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 +DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:38 [batch.py:51] router release req id 8 +INFO 06-24 20:20:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.10946941375732422 s +INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11136579513549805 s +DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=136808972309748891854450292745875917321, time:1750767638.1802363s req_ids:[8] +DEBUG 06-24 20:20:38 [manager.py:391] +ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:208.38594436645508ms total_cost_time:208.43076705932617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9854 prompt_cache_len:5151 prompt_cache_ratio:0.5227318855287193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 +DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.10810494422912598 s +INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s +DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=338846449146995771767465987364617165023, time:1750767638.396286s req_ids:[8] +DEBUG 06-24 20:20:38 [manager.py:391] +ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:209.52343940734863ms total_cost_time:209.56707000732422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9855 prompt_cache_len:5151 prompt_cache_ratio:0.5226788432267885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 +DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s +INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11008572578430176 s +DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=283666213461422392242081606385119331258, time:1750767638.6117744s req_ids:[8] +DEBUG 06-24 20:20:38 [manager.py:391] +ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:171.08631134033203ms total_cost_time:171.12994194030762ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9856 prompt_cache_len:5151 prompt_cache_ratio:0.5226258116883117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 +DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.1077260971069336 s +INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.10978579521179199 s +DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=254932023129837077409726046965486703673, time:1750767638.7875166s req_ids:[8] +DEBUG 06-24 20:20:38 [manager.py:391] +ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:202.29053497314453ms total_cost_time:202.33464241027832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9857 prompt_cache_len:5151 prompt_cache_ratio:0.5225727909100132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 +DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.11051297187805176 s +INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11240506172180176 s +DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=234005756795913159361874983629903291277, time:1750767639.0004761s req_ids:[8] +DEBUG 06-24 20:20:39 [manager.py:391] +ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:209.88082885742188ms total_cost_time:209.92422103881836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9858 prompt_cache_len:5151 prompt_cache_ratio:0.5225197808886184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 +DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.1091451644897461 s +INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.11110162734985352 s +DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=331257924281503179256106769152840237726, time:1750767639.2108104s req_ids:[8] +DEBUG 06-24 20:20:39 [manager.py:391] +ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:207.01098442077637ms total_cost_time:207.05628395080566ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9859 prompt_cache_len:5151 prompt_cache_ratio:0.522466781620854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 +DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.10889220237731934 s +INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.11095523834228516 s +DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=149457176586173820492060779123933035311, time:1750767639.42298s req_ids:[8] +DEBUG 06-24 20:20:39 [manager.py:391] +ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:371.8738555908203ms total_cost_time:371.9189167022705ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9860 prompt_cache_len:5151 prompt_cache_ratio:0.5224137931034483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 +DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.10610008239746094 s +INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.10729217529296875 s +DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=298013877519844892731659186438830181986, time:1750767639.7999816s req_ids:[8] +DEBUG 06-24 20:20:39 [manager.py:391] +ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:163.8026237487793ms total_cost_time:163.82241249084473ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:9861 prompt_cache_len:5151 prompt_cache_ratio:0.5223608153331305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 +DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.10590362548828125 s +INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.10789132118225098 s +DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=264764386632736105083480191454632587505, time:1750767639.9696183s req_ids:[8] +DEBUG 06-24 20:20:39 [manager.py:391] +ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:195.7104206085205ms total_cost_time:195.7564353942871ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9862 prompt_cache_len:5151 prompt_cache_ratio:0.5223078483066315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 +DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10863137245178223 s +INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.1106717586517334 s +DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=307259256675055424279737802234669274196, time:1750767640.1706963s req_ids:[8] +DEBUG 06-24 20:20:40 [manager.py:391] +ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:202.29554176330566ms total_cost_time:202.33964920043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9863 prompt_cache_len:5151 prompt_cache_ratio:0.5222548920206833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 +DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10933184623718262 s +INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.11138296127319336 s +DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=156895608538144327447908960179691300966, time:1750767640.3894298s req_ids:[8] +DEBUG 06-24 20:20:40 [manager.py:391] +DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:218.75905990600586ms total_cost_time:218.80269050598145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9864 prompt_cache_len:5151 prompt_cache_ratio:0.5222019464720195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 +DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10873961448669434 s +INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.11080479621887207 s +DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=9752078054635430684531351365962402658, time:1750767640.6056838s req_ids:[8] +DEBUG 06-24 20:20:40 [manager.py:391] +ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:209.78927612304688ms total_cost_time:209.83195304870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9865 prompt_cache_len:5151 prompt_cache_ratio:0.5221490116573746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 +DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10702347755432129 s +INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.1088094711303711 s +DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=2780910235941728093575381039294561682, time:1750767640.8217697s req_ids:[8] +DEBUG 06-24 20:20:40 [manager.py:391] +ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:170.34626007080078ms total_cost_time:170.38822174072266ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9866 prompt_cache_len:5151 prompt_cache_ratio:0.5220960875734847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 +DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10869812965393066 s +INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.11081051826477051 s +DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=129604416244387845644769763547787524816, time:1750767640.9955535s req_ids:[8] +DEBUG 06-24 20:20:40 [manager.py:391] +ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:204.76865768432617ms total_cost_time:204.81228828430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9867 prompt_cache_len:5151 prompt_cache_ratio:0.5220431742170872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 +DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10809636116027832 s +INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.11014342308044434 s +DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=20772459192545164914069084465998517447, time:1750767641.207221s req_ids:[8] +DEBUG 06-24 20:20:41 [manager.py:391] +DEBUG 06-24 20:20:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 42902.348 tokens/s +DEBUG 06-24 20:20:41 [stats.py:37] Avg prompt tokens throughput: 42893.734 tokens/s +DEBUG 06-24 20:20:41 [stats.py:37] Avg generate tokens throughput: 8.613 tokens/s +ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:377.6357173919678ms total_cost_time:377.68077850341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9868 prompt_cache_len:5151 prompt_cache_ratio:0.521990271584921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 +DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10770916938781738 s +INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.10953950881958008 s +DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=307878850242575289067884355466355414894, time:1750767641.5885391s req_ids:[8] +DEBUG 06-24 20:20:41 [manager.py:391] +ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:162.9774570465088ms total_cost_time:163.01989555358887ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9869 prompt_cache_len:5151 prompt_cache_ratio:0.5219373796737258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 +DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10884237289428711 s +INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.11084985733032227 s +DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=12089587263252223100125819026189013065, time:1750767641.76152s req_ids:[8] +DEBUG 06-24 20:20:41 [manager.py:391] +ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:201.28297805786133ms total_cost_time:201.32756233215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9870 prompt_cache_len:5151 prompt_cache_ratio:0.5218844984802432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 +DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10781645774841309 s +INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.10983085632324219 s +DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=259176926614692279723400673668586282655, time:1750767641.9662097s req_ids:[8] +DEBUG 06-24 20:20:41 [manager.py:391] +ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:207.14902877807617ms total_cost_time:207.20744132995605ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:9871 prompt_cache_len:5151 prompt_cache_ratio:0.5218316280012156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 +DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10855913162231445 s +INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.11060762405395508 s +DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=5868944616832487380587426543255908956, time:1750767642.1808763s req_ids:[8] +DEBUG 06-24 20:20:42 [manager.py:391] +ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:204.53715324401855ms total_cost_time:204.58221435546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9872 prompt_cache_len:5151 prompt_cache_ratio:0.5217787682333873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 +DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10855412483215332 s +INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.11075544357299805 s +DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=310221211109891676950394085893901676252, time:1750767642.3890727s req_ids:[8] +DEBUG 06-24 20:20:42 [manager.py:391] +ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:207.00621604919434ms total_cost_time:207.05103874206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9873 prompt_cache_len:5151 prompt_cache_ratio:0.5217259191735035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 +DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10772204399108887 s +INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.10957455635070801 s +DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=190098878393544036093794269424953305809, time:1750767642.6188695s req_ids:[8] +DEBUG 06-24 20:20:42 [manager.py:391] +ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:227.21219062805176ms total_cost_time:227.25653648376465ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9874 prompt_cache_len:5151 prompt_cache_ratio:0.5216730808183108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 +DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10882234573364258 s +INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.11093616485595703 s +DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=21196999728586172224207675544167470291, time:1750767642.8376992s req_ids:[8] +DEBUG 06-24 20:20:42 [manager.py:391] +ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:205.99889755249023ms total_cost_time:206.04228973388672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9875 prompt_cache_len:5151 prompt_cache_ratio:0.521620253164557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 +DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.10737037658691406 s +INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.1092824935913086 s +DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=320648744224014371053406123493583358585, time:1750767643.0478897s req_ids:[8] +DEBUG 06-24 20:20:43 [manager.py:391] +ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:374.87220764160156ms total_cost_time:374.91655349731445ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9876 prompt_cache_len:5151 prompt_cache_ratio:0.5215674362089915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 +DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.10786914825439453 s +INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.10996794700622559 s +DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=304273158954173830938695181251070496230, time:1750767643.4268143s req_ids:[8] +DEBUG 06-24 20:20:43 [manager.py:391] +ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:206.30812644958496ms total_cost_time:206.35247230529785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9877 prompt_cache_len:5151 prompt_cache_ratio:0.5215146299483648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 +DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.10673904418945312 s +INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.10860610008239746 s +DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=188135838357065998143554875599503838116, time:1750767643.6388216s req_ids:[8] +DEBUG 06-24 20:20:43 [manager.py:391] +ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:165.11940956115723ms total_cost_time:165.1625633239746ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9878 prompt_cache_len:5151 prompt_cache_ratio:0.5214618343794291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 +DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.1072540283203125 s +INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.1091301441192627 s +DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=74261669375289386687300662149375477014, time:1750767643.8102007s req_ids:[8] +DEBUG 06-24 20:20:43 [manager.py:391] +ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:200.9902000427246ms total_cost_time:201.03216171264648ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9879 prompt_cache_len:5151 prompt_cache_ratio:0.5214090494989372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 +DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10801362991333008 s +INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11001777648925781 s +DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=164803211597925524100263864492898658049, time:1750767644.0165825s req_ids:[8] +DEBUG 06-24 20:20:44 [manager.py:391] +ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:204.41889762878418ms total_cost_time:204.46348190307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9880 prompt_cache_len:5151 prompt_cache_ratio:0.5213562753036437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 +DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10867190361022949 s +INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11074280738830566 s +DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=188125718408042883991489142682868166758, time:1750767644.2268527s req_ids:[8] +DEBUG 06-24 20:20:44 [manager.py:391] +ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:207.0612907409668ms total_cost_time:207.10515975952148ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9881 prompt_cache_len:5151 prompt_cache_ratio:0.5213035117903047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 +DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10909414291381836 s +INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.1110830307006836 s +DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=321062933222949637566173065974442666502, time:1750767644.4425356s req_ids:[8] +DEBUG 06-24 20:20:44 [manager.py:391] +ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:210.44230461120605ms total_cost_time:210.48712730407715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9882 prompt_cache_len:5151 prompt_cache_ratio:0.521250758955677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 +DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10823988914489746 s +INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11024832725524902 s +DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=162440463483307804226659245769572298390, time:1750767644.6567304s req_ids:[8] +DEBUG 06-24 20:20:44 [manager.py:391] +ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:208.61101150512695ms total_cost_time:208.65488052368164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9883 prompt_cache_len:5151 prompt_cache_ratio:0.5211980167965192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 +DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.11024999618530273 s +INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11221480369567871 s +DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=294384953879268681190450713043747543078, time:1750767644.8723514s req_ids:[8] +DEBUG 06-24 20:20:44 [manager.py:391] +ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:210.67333221435547ms total_cost_time:210.71839332580566ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9884 prompt_cache_len:5151 prompt_cache_ratio:0.5211452853095913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 +DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.11099910736083984 s +INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11284756660461426 s +DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=191386931295212723312556467212755032414, time:1750767645.0882506s req_ids:[8] +DEBUG 06-24 20:20:45 [manager.py:391] +ERROR 06-24 20:20:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:408.4815979003906ms total_cost_time:408.5254669189453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9885 prompt_cache_len:5151 prompt_cache_ratio:0.521092564491654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 +DEBUG 06-24 20:20:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.10951733589172363 s +INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11160802841186523 s +DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=81663825210486118114073163964727236765, time:1750767645.501231s req_ids:[8] +DEBUG 06-24 20:20:45 [manager.py:391] +ERROR 06-24 20:20:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 first_token_cost:204.0395736694336ms total_cost_time:204.0998935699463ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9886 prompt_cache_len:5151 prompt_cache_ratio:0.52103985433947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 +DEBUG 06-24 20:20:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.10838937759399414 s +INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11016654968261719 s +DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=168677982381934386854181754710081848340, time:1750767645.7109559s req_ids:[8] +DEBUG 06-24 20:20:45 [manager.py:391] +ERROR 06-24 20:20:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 first_token_cost:205.25479316711426ms total_cost_time:205.30033111572266ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9887 prompt_cache_len:5151 prompt_cache_ratio:0.5209871548498027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 +DEBUG 06-24 20:20:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.10902523994445801 s +INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11073899269104004 s +DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=333624429488186261856074132625158568426, time:1750767645.9302318s req_ids:[8] +DEBUG 06-24 20:20:45 [manager.py:391] +ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 first_token_cost:216.36629104614258ms total_cost_time:216.41111373901367ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9888 prompt_cache_len:5151 prompt_cache_ratio:0.5209344660194175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 +DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.10846805572509766 s +INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11040925979614258 s +DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=163631020007746300915768704500755498749, time:1750767646.1458554s req_ids:[8] +DEBUG 06-24 20:20:46 [manager.py:391] +DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:210.22605895996094ms total_cost_time:210.28494834899902ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9889 prompt_cache_len:5151 prompt_cache_ratio:0.5208817878450804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 +DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.1082620620727539 s +INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11053633689880371 s +DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=243161681238343816480894071986445783410, time:1750767646.3674445s req_ids:[8] +DEBUG 06-24 20:20:46 [manager.py:391] +ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:217.18811988830566ms total_cost_time:217.23103523254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9890 prompt_cache_len:5151 prompt_cache_ratio:0.5208291203235591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 +DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.10822343826293945 s +INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11022257804870605 s +DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=13380787787805788429719768356090830890, time:1750767646.5867062s req_ids:[8] +DEBUG 06-24 20:20:46 [manager.py:391] +ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:210.46781539916992ms total_cost_time:210.51287651062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9891 prompt_cache_len:5151 prompt_cache_ratio:0.5207764634516227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 +DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.10948920249938965 s +INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11154460906982422 s +DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=189841376659603989693115274284082644605, time:1750767646.8008347s req_ids:[8] +DEBUG 06-24 20:20:46 [manager.py:391] +ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:207.7009677886963ms total_cost_time:207.74579048156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9892 prompt_cache_len:5151 prompt_cache_ratio:0.5207238172260412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 +DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:46 [batch.py:51] router release req id 8 +INFO 06-24 20:20:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.10839319229125977 s +INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.11043548583984375 s +DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=121393687532653621760617977440406562892, time:1750767647.0154874s req_ids:[8] +DEBUG 06-24 20:20:47 [manager.py:391] +ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:379.66012954711914ms total_cost_time:379.70590591430664ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9893 prompt_cache_len:5151 prompt_cache_ratio:0.5206711816435864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 +DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.1092677116394043 s +INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.11111807823181152 s +DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=321092132993971158357821639423962274450, time:1750767647.3973281s req_ids:[8] +DEBUG 06-24 20:20:47 [manager.py:391] +ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:205.3520679473877ms total_cost_time:205.39331436157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9894 prompt_cache_len:5151 prompt_cache_ratio:0.520618556701031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 +DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.10869526863098145 s +INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.110687255859375 s +DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=164058486827198979877252200023749882637, time:1750767647.61033s req_ids:[8] +DEBUG 06-24 20:20:47 [manager.py:391] +ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:208.35232734680176ms total_cost_time:208.39595794677734ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9895 prompt_cache_len:5151 prompt_cache_ratio:0.5205659423951491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 +DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.10845208168029785 s +INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.11040091514587402 s +DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=146880981361548222439618596416026006337, time:1750767647.8229754s req_ids:[8] +DEBUG 06-24 20:20:47 [manager.py:391] +ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:204.7121524810791ms total_cost_time:204.75482940673828ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9896 prompt_cache_len:5151 prompt_cache_ratio:0.5205133387227162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 +DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.1089634895324707 s +INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11137270927429199 s +DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=61218077597539260524514589169240912483, time:1750767648.0365417s req_ids:[8] +DEBUG 06-24 20:20:48 [manager.py:391] +ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:210.30378341674805ms total_cost_time:210.34765243530273ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9897 prompt_cache_len:5151 prompt_cache_ratio:0.5204607456805093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 +DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.10803031921386719 s +INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11069774627685547 s +DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=7654666468082137691885753503010683951, time:1750767648.2517345s req_ids:[8] +DEBUG 06-24 20:20:48 [manager.py:391] +ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:211.79485321044922ms total_cost_time:211.85684204101562ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:9898 prompt_cache_len:5151 prompt_cache_ratio:0.5204081632653061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 +DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.10976791381835938 s +INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11162590980529785 s +DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=226164015312736010738695072021288213789, time:1750767648.4679751s req_ids:[8] +DEBUG 06-24 20:20:48 [manager.py:391] +ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:206.41756057739258ms total_cost_time:206.46047592163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9899 prompt_cache_len:5151 prompt_cache_ratio:0.5203555914738862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 +DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.1081857681274414 s +INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11014962196350098 s +DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=311516804142280656061973400366153985787, time:1750767648.681982s req_ids:[8] +DEBUG 06-24 20:20:48 [manager.py:391] +ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:209.08355712890625ms total_cost_time:209.12671089172363ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9900 prompt_cache_len:5151 prompt_cache_ratio:0.5203030303030303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 +DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.3102424144744873 s +INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.31230950355529785 s +DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=317320175677442533329948112406176659206, time:1750767649.114971s req_ids:[8] +DEBUG 06-24 20:20:49 [manager.py:391] +ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:430.039644241333ms total_cost_time:430.0832748413086ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9901 prompt_cache_len:5151 prompt_cache_ratio:0.5202504797495202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 +DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10858631134033203 s +INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.11058497428894043 s +DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=273342720085073954242036592856850556877, time:1750767649.331593s req_ids:[8] +DEBUG 06-24 20:20:49 [manager.py:391] +ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:207.83519744873047ms total_cost_time:207.88049697875977ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9902 prompt_cache_len:5151 prompt_cache_ratio:0.5201979398101394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 +DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10814976692199707 s +INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.1094202995300293 s +DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=51326364993960170451700342698613703650, time:1750767649.5472248s req_ids:[8] +DEBUG 06-24 20:20:49 [manager.py:391] +ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:208.34589004516602ms total_cost_time:208.3895206451416ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9903 prompt_cache_len:5151 prompt_cache_ratio:0.5201454104816722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 +DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10816025733947754 s +INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.11025714874267578 s +DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=314088385529207409621295462312068046265, time:1750767649.7614405s req_ids:[8] +DEBUG 06-24 20:20:49 [manager.py:391] +ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:208.5108757019043ms total_cost_time:208.5866928100586ms,out_token_counter:1 mean_per_token_cost_time: 0.07581710815429688ms prompt_token_num:9904 prompt_cache_len:5151 prompt_cache_ratio:0.5200928917609047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 +DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10775375366210938 s +INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.10945653915405273 s +DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=173785239330932506444723210247542134277, time:1750767649.9750614s req_ids:[8] +DEBUG 06-24 20:20:49 [manager.py:391] +ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:206.7122459411621ms total_cost_time:206.75897598266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9905 prompt_cache_len:5151 prompt_cache_ratio:0.5200403836446239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 +DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10947942733764648 s +INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.11140608787536621 s +DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=306505746546170394459764457884315686306, time:1750767650.1884894s req_ids:[8] +DEBUG 06-24 20:20:50 [manager.py:391] +ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:208.48727226257324ms total_cost_time:208.54687690734863ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:9906 prompt_cache_len:5151 prompt_cache_ratio:0.5199878861296184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 +DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10834908485412598 s +INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.11004447937011719 s +DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=277306760785593645747118556100421222228, time:1750767650.4037344s req_ids:[8] +DEBUG 06-24 20:20:50 [manager.py:391] +ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:206.25042915344238ms total_cost_time:206.29429817199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9907 prompt_cache_len:5151 prompt_cache_ratio:0.519935399212678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 +DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10816454887390137 s +INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.11022329330444336 s +DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=231771345956104574097618857690044203648, time:1750767650.615426s req_ids:[8] +DEBUG 06-24 20:20:50 [manager.py:391] +ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:380.59163093566895ms total_cost_time:380.63693046569824ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9908 prompt_cache_len:5151 prompt_cache_ratio:0.5198829228905935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 +DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10812735557556152 s +INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.10998082160949707 s +DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=261548013875112434465631004391562673004, time:1750767650.9996867s req_ids:[8] +DEBUG 06-24 20:20:50 [manager.py:391] +ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:162.41788864135742ms total_cost_time:162.46318817138672ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9909 prompt_cache_len:5151 prompt_cache_ratio:0.5198304571601574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 +DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.1088402271270752 s +INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.11102724075317383 s +DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=309018862987142025115431894535816575948, time:1750767651.1689436s req_ids:[8] +DEBUG 06-24 20:20:51 [manager.py:391] +ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:20:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 41378.152 tokens/s +DEBUG 06-24 20:20:51 [stats.py:37] Avg prompt tokens throughput: 41369.686 tokens/s +DEBUG 06-24 20:20:51 [stats.py:37] Avg generate tokens throughput: 8.466 tokens/s +INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:195.45292854309082ms total_cost_time:195.51658630371094ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:9910 prompt_cache_len:5151 prompt_cache_ratio:0.5197780020181635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 +DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.10827326774597168 s +INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000847816467285 s +DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=94489174169281886247160007134792499541, time:1750767651.370296s req_ids:[8] +DEBUG 06-24 20:20:51 [manager.py:391] +ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:198.7473964691162ms total_cost_time:198.7912654876709ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9911 prompt_cache_len:5151 prompt_cache_ratio:0.5197255574614065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 +DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.11065983772277832 s +INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.11197614669799805 s +DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=335807280223569339657691140883727269331, time:1750767651.5781727s req_ids:[8] +DEBUG 06-24 20:20:51 [manager.py:391] +ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:207.14592933654785ms total_cost_time:207.18955993652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9912 prompt_cache_len:5151 prompt_cache_ratio:0.5196731234866828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 +DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.10956597328186035 s +INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.1114192008972168 s +DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=320745414109206876440773494153284768016, time:1750767651.7941086s req_ids:[8] +DEBUG 06-24 20:20:51 [manager.py:391] +ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:210.07394790649414ms total_cost_time:210.11805534362793ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9913 prompt_cache_len:5151 prompt_cache_ratio:0.5196207000907899 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 +DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.10977506637573242 s +INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.1116793155670166 s +DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=172244253849782513583953329480053825896, time:1750767652.0086656s req_ids:[8] +DEBUG 06-24 20:20:52 [manager.py:391] +ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:208.56976509094238ms total_cost_time:208.61244201660156ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9914 prompt_cache_len:5151 prompt_cache_ratio:0.5195682872705265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 +DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.10772299766540527 s +INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.10955381393432617 s +DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=270987464514143766364327673023628163689, time:1750767652.2292407s req_ids:[8] +DEBUG 06-24 20:20:52 [manager.py:391] +ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:215.58475494384766ms total_cost_time:215.62790870666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9915 prompt_cache_len:5151 prompt_cache_ratio:0.5195158850226929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 +DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.11062407493591309 s +INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.11256957054138184 s +DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=254126646303119867233221327869802306417, time:1750767652.445792s req_ids:[8] +DEBUG 06-24 20:20:52 [manager.py:391] +ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:217.43512153625488ms total_cost_time:217.48018264770508ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9916 prompt_cache_len:5151 prompt_cache_ratio:0.5194634933440904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 +DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.10887980461120605 s +INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.11092400550842285 s +DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=69246610673891094944948994819703108954, time:1750767652.6751306s req_ids:[8] +DEBUG 06-24 20:20:52 [manager.py:391] +ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:407.1533679962158ms total_cost_time:407.1977138519287ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9917 prompt_cache_len:5151 prompt_cache_ratio:0.5194111122315216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 +DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10956811904907227 s +INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11143136024475098 s +DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=107666578661603905235050777174683000406, time:1750767653.0789864s req_ids:[8] +DEBUG 06-24 20:20:53 [manager.py:391] +ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:207.69238471984863ms total_cost_time:207.73577690124512ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9918 prompt_cache_len:5151 prompt_cache_ratio:0.5193587416817906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 +INFO 06-24 20:20:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:20:53 [statics_utils.py:24] mean first cost: 228.55945710240874 ms +INFO 06-24 20:20:53 [statics_utils.py:24] mean per token cost: 0.06977781880966027 ms +DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10997915267944336 s +INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11199450492858887 s +INFO 06-24 20:20:53 [manager.py:620] left req id 8can release False refcount 3 +DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=41412148109367468713269515340594385636, time:1750767653.2951975s req_ids:[8] +DEBUG 06-24 20:20:53 [manager.py:391] +ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:210.1731300354004ms total_cost_time:210.21795272827148ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9919 prompt_cache_len:5151 prompt_cache_ratio:0.5193063816917028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 +DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10846924781799316 s +INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11048078536987305 s +DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=22290365138575200200818037451803950864, time:1750767653.5114722s req_ids:[8] +DEBUG 06-24 20:20:53 [manager.py:391] +DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:207.9756259918213ms total_cost_time:208.0214023590088ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9920 prompt_cache_len:5151 prompt_cache_ratio:0.5192540322580645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 +DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10810089111328125 s +INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11024928092956543 s +DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=40686012056419928807309908434534712101, time:1750767653.7233381s req_ids:[8] +DEBUG 06-24 20:20:53 [manager.py:391] +ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:208.19664001464844ms total_cost_time:208.24265480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9921 prompt_cache_len:5151 prompt_cache_ratio:0.5192016933776837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 +DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10889339447021484 s +INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11108589172363281 s +DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=297837684477516239657388566648269175007, time:1750767653.9395359s req_ids:[8] +DEBUG 06-24 20:20:53 [manager.py:391] +ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:209.65957641601562ms total_cost_time:209.70559120178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9922 prompt_cache_len:5151 prompt_cache_ratio:0.5191493650473695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 +DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10936951637268066 s +INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11138248443603516 s +DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=10970470048835329913858813642914984453, time:1750767654.1553917s req_ids:[8] +DEBUG 06-24 20:20:54 [manager.py:391] +ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:209.95163917541504ms total_cost_time:209.99693870544434ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9923 prompt_cache_len:5151 prompt_cache_ratio:0.5190970472639322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 +DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10816335678100586 s +INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012053489685059 s +DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=301056452421119756747039332133775401391, time:1750767654.3700624s req_ids:[8] +DEBUG 06-24 20:20:54 [manager.py:391] +ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:209.84601974487305ms total_cost_time:209.88988876342773ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9924 prompt_cache_len:5151 prompt_cache_ratio:0.5190447400241838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 +DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10878562927246094 s +INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11069750785827637 s +DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=135924630575370879627074432169980386401, time:1750767654.5877664s req_ids:[8] +DEBUG 06-24 20:20:54 [manager.py:391] +ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:369.5874214172363ms total_cost_time:369.63367462158203ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9925 prompt_cache_len:5151 prompt_cache_ratio:0.518992443324937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 +DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10813546180725098 s +INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11005163192749023 s +DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=190992739326582510060511799598515707255, time:1750767654.959295s req_ids:[8] +DEBUG 06-24 20:20:54 [manager.py:391] +ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:207.5192928314209ms total_cost_time:207.5636386871338ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9926 prompt_cache_len:5151 prompt_cache_ratio:0.5189401571630062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 +DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10828781127929688 s +INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1102452278137207 s +DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=289454582770496445817708896174862167366, time:1750767655.1750066s req_ids:[8] +DEBUG 06-24 20:20:55 [manager.py:391] +ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:210.16550064086914ms total_cost_time:210.20984649658203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9927 prompt_cache_len:5151 prompt_cache_ratio:0.518887881535207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 +DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10693240165710449 s +INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1087641716003418 s +DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=280295909085302520328724668154749677591, time:1750767655.391663s req_ids:[8] +DEBUG 06-24 20:20:55 [manager.py:391] +ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:209.38515663146973ms total_cost_time:209.43093299865723ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9928 prompt_cache_len:5151 prompt_cache_ratio:0.5188356164383562 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 +DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10798430442810059 s +INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1098330020904541 s +DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=15569096682967370885726323017318071405, time:1750767655.607369s req_ids:[8] +DEBUG 06-24 20:20:55 [manager.py:391] +ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:210.89863777160645ms total_cost_time:210.94465255737305ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9929 prompt_cache_len:5151 prompt_cache_ratio:0.5187833618692719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 +DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10831308364868164 s +INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1103055477142334 s +DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=278633271057273486695497892105762031023, time:1750767655.8240693s req_ids:[8] +DEBUG 06-24 20:20:55 [manager.py:391] +ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:207.535982131958ms total_cost_time:207.5803279876709ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9930 prompt_cache_len:5151 prompt_cache_ratio:0.5187311178247734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 +DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.10807061195373535 s +INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11006784439086914 s +DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=60560146148668280968771922280226871867, time:1750767656.0383618s req_ids:[8] +DEBUG 06-24 20:20:56 [manager.py:391] +ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:211.9302749633789ms total_cost_time:211.9762897491455ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9931 prompt_cache_len:5151 prompt_cache_ratio:0.5186788843016816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 +DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s +INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11044812202453613 s +DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=11204574451932175979217886445579166321, time:1750767656.2615516s req_ids:[8] +DEBUG 06-24 20:20:56 [manager.py:391] +ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:178.88283729553223ms total_cost_time:178.9388656616211ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:9932 prompt_cache_len:5151 prompt_cache_ratio:0.5186266612968183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 +DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.10929179191589355 s +INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11136865615844727 s +DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=198318554860585403016489114115363768310, time:1750767656.4393396s req_ids:[8] +DEBUG 06-24 20:20:56 [manager.py:391] +ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:371.33073806762695ms total_cost_time:371.37532234191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9933 prompt_cache_len:5151 prompt_cache_ratio:0.518574448807007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 +DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.10884523391723633 s +INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11069107055664062 s +DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=330590804664262865529786039201684416981, time:1750767656.8159032s req_ids:[8] +DEBUG 06-24 20:20:56 [manager.py:391] +ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:188.62175941467285ms total_cost_time:188.66705894470215ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9934 prompt_cache_len:5151 prompt_cache_ratio:0.5185222468290719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 +DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10749483108520508 s +INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.10854244232177734 s +DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=123965026933437333118654804617355171576, time:1750767657.0100503s req_ids:[8] +DEBUG 06-24 20:20:57 [manager.py:391] +ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:206.3882350921631ms total_cost_time:206.43091201782227ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9935 prompt_cache_len:5151 prompt_cache_ratio:0.5184700553598389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 +DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.1088860034942627 s +INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.11084103584289551 s +DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=173571683204316120938873125343524767670, time:1750767657.2216396s req_ids:[8] +DEBUG 06-24 20:20:57 [manager.py:391] +ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:206.2366008758545ms total_cost_time:206.2821388244629ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9936 prompt_cache_len:5151 prompt_cache_ratio:0.5184178743961353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 +DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10791563987731934 s +INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.1096951961517334 s +DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=111840610641777610316120718853855267976, time:1750767657.434745s req_ids:[8] +DEBUG 06-24 20:20:57 [manager.py:391] +ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:168.1978702545166ms total_cost_time:168.23863983154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:9937 prompt_cache_len:5151 prompt_cache_ratio:0.5183657039347892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 +DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10871315002441406 s +INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.11058807373046875 s +DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=110501370039389872827039626051623097239, time:1750767657.6085336s req_ids:[8] +DEBUG 06-24 20:20:57 [manager.py:391] +ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:200.1338005065918ms total_cost_time:200.17623901367188ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9938 prompt_cache_len:5151 prompt_cache_ratio:0.5183135439726303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 +DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10793113708496094 s +INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.10984683036804199 s +DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=10193515210701057199399957373723113977, time:1750767657.813302s req_ids:[8] +DEBUG 06-24 20:20:57 [manager.py:391] +ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:199.4149684906006ms total_cost_time:199.45907592773438ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9939 prompt_cache_len:5151 prompt_cache_ratio:0.5182613945064896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 +DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10789108276367188 s +INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.10990023612976074 s +DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=258878670054681111014811377985024597995, time:1750767658.020507s req_ids:[8] +DEBUG 06-24 20:20:58 [manager.py:391] +ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:209.38777923583984ms total_cost_time:209.43045616149902ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9940 prompt_cache_len:5151 prompt_cache_ratio:0.5182092555331992 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 +DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s +INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.11026382446289062 s +DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=40536146178302451164866466184241225559, time:1750767658.2347372s req_ids:[8] +DEBUG 06-24 20:20:58 [manager.py:391] +ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:369.07172203063965ms total_cost_time:369.11606788635254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9941 prompt_cache_len:5151 prompt_cache_ratio:0.5181571270495926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 +DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10814547538757324 s +INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s +DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=266016124966263148606793095131775562043, time:1750767658.6085687s req_ids:[8] +DEBUG 06-24 20:20:58 [manager.py:391] +ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:203.87506484985352ms total_cost_time:203.92107963562012ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9942 prompt_cache_len:5151 prompt_cache_ratio:0.5181050090525046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 +DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10862040519714355 s +INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.11054205894470215 s +DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=303657712685032260571406807054286837166, time:1750767658.8234777s req_ids:[8] +DEBUG 06-24 20:20:58 [manager.py:391] +ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:207.8866958618164ms total_cost_time:207.9448699951172ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:9943 prompt_cache_len:5151 prompt_cache_ratio:0.518052901538771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 +DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10811805725097656 s +INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.10971856117248535 s +DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=36443218352821080560374997261471680640, time:1750767659.036104s req_ids:[8] +DEBUG 06-24 20:20:59 [manager.py:391] +ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:207.86786079406738ms total_cost_time:207.91387557983398ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9944 prompt_cache_len:5151 prompt_cache_ratio:0.5180008045052292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 +DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10793256759643555 s +INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.10988950729370117 s +DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=183136565938974985875453033110202944807, time:1750767659.2506392s req_ids:[8] +DEBUG 06-24 20:20:59 [manager.py:391] +DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:209.81597900390625ms total_cost_time:209.86056327819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9945 prompt_cache_len:5151 prompt_cache_ratio:0.517948717948718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 +DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10762524604797363 s +INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.1095426082611084 s +DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=132336491755839788229827732163589310296, time:1750767659.466637s req_ids:[8] +DEBUG 06-24 20:20:59 [manager.py:391] +ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:209.54632759094238ms total_cost_time:209.59830284118652ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:9946 prompt_cache_len:5151 prompt_cache_ratio:0.5178966418660769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 +DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:59 [batch.py:51] router release req id 8 +INFO 06-24 20:20:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10875463485717773 s +INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.11060976982116699 s +DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=181758097636460314387901909898746724908, time:1750767659.682529s req_ids:[8] +DEBUG 06-24 20:20:59 [manager.py:391] +ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:209.00297164916992ms total_cost_time:209.04827117919922ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9947 prompt_cache_len:5151 prompt_cache_ratio:0.5178445762541469 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 +DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:20:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.11018967628479004 s +INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.11212468147277832 s +DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=273274709059460904365627622650169209396, time:1750767659.8981586s req_ids:[8] +DEBUG 06-24 20:20:59 [manager.py:391] +ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:204.4076919555664ms total_cost_time:204.451322555542ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9948 prompt_cache_len:5151 prompt_cache_ratio:0.5177925211097708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 +DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10813689231872559 s +INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.11008691787719727 s +DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=185222268399279423144052114080611671725, time:1750767660.1057796s req_ids:[8] +DEBUG 06-24 20:21:00 [manager.py:391] +ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:204.7281265258789ms total_cost_time:204.7715187072754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9949 prompt_cache_len:5151 prompt_cache_ratio:0.5177404764297919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 +DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10974764823913574 s +INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.1116189956665039 s +DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=332464861543650424361201588821276783734, time:1750767660.3179812s req_ids:[8] +DEBUG 06-24 20:21:00 [manager.py:391] +ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:356.2922477722168ms total_cost_time:356.3370704650879ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9950 prompt_cache_len:5151 prompt_cache_ratio:0.5176884422110553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 +DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10858464241027832 s +INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s +DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=129166784603824183022138822911491398708, time:1750767660.6817322s req_ids:[8] +DEBUG 06-24 20:21:00 [manager.py:391] +ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:202.83007621765137ms total_cost_time:202.87060737609863ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:9951 prompt_cache_len:5151 prompt_cache_ratio:0.517636418450407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 +DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10750055313110352 s +INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.10927462577819824 s +DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=139641029111335068301324851956824432353, time:1750767660.8900936s req_ids:[8] +DEBUG 06-24 20:21:00 [manager.py:391] +ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:204.05268669128418ms total_cost_time:204.09727096557617ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9952 prompt_cache_len:5151 prompt_cache_ratio:0.5175844051446945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 +DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.1095573902130127 s +INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.11144638061523438 s +DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=26394718543908845080344080704352155005, time:1750767661.1009364s req_ids:[8] +DEBUG 06-24 20:21:01 [manager.py:391] +ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:204.1783332824707ms total_cost_time:204.23626899719238ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:9953 prompt_cache_len:5151 prompt_cache_ratio:0.5175324022907666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 +DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.10763168334960938 s +INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.1095285415649414 s +DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=129480446570101706200776640659649642876, time:1750767661.3101473s req_ids:[8] +DEBUG 06-24 20:21:01 [manager.py:391] +DEBUG 06-24 20:21:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 43438.683 tokens/s +DEBUG 06-24 20:21:01 [stats.py:37] Avg prompt tokens throughput: 43430.037 tokens/s +DEBUG 06-24 20:21:01 [stats.py:37] Avg generate tokens throughput: 8.646 tokens/s +ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:209.00321006774902ms total_cost_time:209.04970169067383ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9954 prompt_cache_len:5151 prompt_cache_ratio:0.5174804098854732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 +DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.11225128173828125 s +INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.11424922943115234 s +DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=234080343148501379406998050925219224952, time:1750767661.536944s req_ids:[8] +DEBUG 06-24 20:21:01 [manager.py:391] +ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:224.49326515197754ms total_cost_time:224.53832626342773ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9955 prompt_cache_len:5151 prompt_cache_ratio:0.5174284279256655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 +DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.10790848731994629 s +INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.10982251167297363 s +DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=60172369518621274140902527900406187761, time:1750767661.7570713s req_ids:[8] +DEBUG 06-24 20:21:01 [manager.py:391] +ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:205.19328117370605ms total_cost_time:205.23881912231445ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9956 prompt_cache_len:5151 prompt_cache_ratio:0.5173764564081961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 +DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.10923910140991211 s +INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.11113166809082031 s +DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=112305038055508124378197539320394231454, time:1750767661.9681523s req_ids:[8] +DEBUG 06-24 20:21:01 [manager.py:391] +ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:208.34994316101074ms total_cost_time:208.39309692382812ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9957 prompt_cache_len:5151 prompt_cache_ratio:0.5173244953299186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 +DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10889005661010742 s +INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.11090254783630371 s +DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=69800990742856507034415933235353276116, time:1750767662.1824415s req_ids:[8] +DEBUG 06-24 20:21:02 [manager.py:391] +ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:371.83356285095215ms total_cost_time:371.87647819519043ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9958 prompt_cache_len:5151 prompt_cache_ratio:0.5172725446876882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 +DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10792708396911621 s +INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.10993671417236328 s +DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=118423450614302879731655470062462990466, time:1750767662.5589702s req_ids:[8] +DEBUG 06-24 20:21:02 [manager.py:391] +ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:205.38663864135742ms total_cost_time:205.4286003112793ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9959 prompt_cache_len:5151 prompt_cache_ratio:0.5172206044783613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 +DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10891938209533691 s +INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.11082792282104492 s +DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=162848021179699535963005010316348445442, time:1750767662.7718565s req_ids:[8] +DEBUG 06-24 20:21:02 [manager.py:391] +ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:207.62181282043457ms total_cost_time:207.66496658325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9960 prompt_cache_len:5151 prompt_cache_ratio:0.5171686746987951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 +DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10903406143188477 s +INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.11103057861328125 s +DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=172428507386319781555218910924323746751, time:1750767662.9845843s req_ids:[8] +DEBUG 06-24 20:21:02 [manager.py:391] +ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:207.55815505981445ms total_cost_time:207.60202407836914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9961 prompt_cache_len:5151 prompt_cache_ratio:0.5171167553458488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 +DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:03 [batch.py:51] router release req id 8 +INFO 06-24 20:21:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10780954360961914 s +INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.10966944694519043 s +DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=24463816073667950368156730698820009816, time:1750767663.1980839s req_ids:[8] +DEBUG 06-24 20:21:03 [manager.py:391] +ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:206.94231986999512ms total_cost_time:206.9871425628662ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9962 prompt_cache_len:5151 prompt_cache_ratio:0.5170648464163823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 +DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10786771774291992 s +INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.10973930358886719 s +DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=169975598456152039738536182986348594070, time:1750767663.410929s req_ids:[8] +DEBUG 06-24 20:21:03 [manager.py:391] +ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:209.29884910583496ms total_cost_time:209.33914184570312ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:9963 prompt_cache_len:5151 prompt_cache_ratio:0.5170129479072568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 +DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10853981971740723 s +INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.11044859886169434 s +DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=143862426211613561403549361754594968269, time:1750767663.6276522s req_ids:[8] +DEBUG 06-24 20:21:03 [manager.py:391] +ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:209.75875854492188ms total_cost_time:209.80286598205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9964 prompt_cache_len:5151 prompt_cache_ratio:0.5169610598153352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 +DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10861992835998535 s +INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.1104421615600586 s +DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=93552411160066230399512086584808525191, time:1750767663.8560488s req_ids:[8] +DEBUG 06-24 20:21:03 [manager.py:391] +ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:224.93457794189453ms total_cost_time:224.9772548675537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9965 prompt_cache_len:5151 prompt_cache_ratio:0.5169091821374812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 +DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.30924248695373535 s +INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.31040191650390625 s +DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=279714164212704705235431471574101401370, time:1750767664.2726552s req_ids:[8] +DEBUG 06-24 20:21:04 [manager.py:391] +ERROR 06-24 20:21:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:413.6035442352295ms total_cost_time:413.6490821838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9966 prompt_cache_len:5151 prompt_cache_ratio:0.5168573148705599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 +DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s +INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.10991311073303223 s +DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=135690711032428744047720222660508084969, time:1750767664.491355s req_ids:[8] +DEBUG 06-24 20:21:04 [manager.py:391] +ERROR 06-24 20:21:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 first_token_cost:209.7623348236084ms total_cost_time:209.8076343536377ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9967 prompt_cache_len:5151 prompt_cache_ratio:0.5168054580114377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 +DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.11158490180969238 s +INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.11353611946105957 s +DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=183748781579142167714979272542056348758, time:1750767664.7064996s req_ids:[8] +DEBUG 06-24 20:21:04 [manager.py:391] +ERROR 06-24 20:21:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 first_token_cost:209.8398208618164ms total_cost_time:209.8827362060547ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9968 prompt_cache_len:5151 prompt_cache_ratio:0.5167536115569823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 +DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.10816454887390137 s +INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.11001324653625488 s +DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=282814132950444738923495225033291252789, time:1750767664.9222932s req_ids:[8] +DEBUG 06-24 20:21:04 [manager.py:391] +ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 first_token_cost:210.71791648864746ms total_cost_time:210.77871322631836ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:9969 prompt_cache_len:5151 prompt_cache_ratio:0.5167017755040626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 +DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.10755729675292969 s +INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.10951709747314453 s +DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=23388699106980153411544501890576106891, time:1750767665.1380618s req_ids:[8] +DEBUG 06-24 20:21:05 [manager.py:391] +ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:208.3725929260254ms total_cost_time:208.41360092163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9970 prompt_cache_len:5151 prompt_cache_ratio:0.5166499498495486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 +DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.1087045669555664 s +INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.11057806015014648 s +DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=22878211419665127922682799821278860024, time:1750767665.3526773s req_ids:[8] +DEBUG 06-24 20:21:05 [manager.py:391] +ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:209.4407081604004ms total_cost_time:209.49554443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:9971 prompt_cache_len:5151 prompt_cache_ratio:0.5165981345903119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 +DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.10913610458374023 s +INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.11117863655090332 s +DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=232207132289008827513977614181120207721, time:1750767665.567956s req_ids:[8] +DEBUG 06-24 20:21:05 [manager.py:391] +ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:208.01281929016113ms total_cost_time:208.0550193786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9972 prompt_cache_len:5151 prompt_cache_ratio:0.516546329723225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 +DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.10769248008728027 s +INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.10959625244140625 s +DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=215006537970871500549139802932536460796, time:1750767665.7822976s req_ids:[8] +DEBUG 06-24 20:21:05 [manager.py:391] +ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:377.66075134277344ms total_cost_time:377.70533561706543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9973 prompt_cache_len:5151 prompt_cache_ratio:0.516494535245162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 +DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.10881829261779785 s +INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.11079621315002441 s +DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=17100289891285958506334936168275958895, time:1750767666.1631534s req_ids:[8] +DEBUG 06-24 20:21:06 [manager.py:391] +ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:206.4826488494873ms total_cost_time:206.52484893798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9974 prompt_cache_len:5151 prompt_cache_ratio:0.5164427511529978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 +DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.10811972618103027 s +INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.11015677452087402 s +DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=179813701737561185777273573297107603470, time:1750767666.37906s req_ids:[8] +DEBUG 06-24 20:21:06 [manager.py:391] +ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:209.8236083984375ms total_cost_time:209.8681926727295ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9975 prompt_cache_len:5151 prompt_cache_ratio:0.5163909774436091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 +DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.1076042652130127 s +INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.10960030555725098 s +DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=250937847908470706637058327255795703214, time:1750767666.594391s req_ids:[8] +DEBUG 06-24 20:21:06 [manager.py:391] +DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:220.86191177368164ms total_cost_time:220.90411186218262ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9976 prompt_cache_len:5151 prompt_cache_ratio:0.5163392141138733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 +DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.10848808288574219 s +INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.11041259765625 s +DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=168796196435107336272721574124022867492, time:1750767666.8322642s req_ids:[8] +DEBUG 06-24 20:21:06 [manager.py:391] +ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:222.2137451171875ms total_cost_time:222.25642204284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9977 prompt_cache_len:5151 prompt_cache_ratio:0.5162874611606696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 +DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s +INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.10986924171447754 s +DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=307576314839161240363538225544365554187, time:1750767667.0494096s req_ids:[8] +DEBUG 06-24 20:21:07 [manager.py:391] +ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:206.03346824645996ms total_cost_time:206.07829093933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9978 prompt_cache_len:5151 prompt_cache_ratio:0.5162357185808779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 +DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s +INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.11013531684875488 s +DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=108183174978379412043035103833611140736, time:1750767667.2684855s req_ids:[8] +DEBUG 06-24 20:21:07 [manager.py:391] +ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:219.07758712768555ms total_cost_time:219.12288665771484ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9979 prompt_cache_len:5151 prompt_cache_ratio:0.5161839863713799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 +DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10893917083740234 s +INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.11098456382751465 s +DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=282577177561919562890152058072844039607, time:1750767667.4852571s req_ids:[8] +DEBUG 06-24 20:21:07 [manager.py:391] +ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:210.87145805358887ms total_cost_time:210.91866493225098ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:9980 prompt_cache_len:5151 prompt_cache_ratio:0.5161322645290581 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 +DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10757708549499512 s +INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s +DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=120046370712717207346296916590439979355, time:1750767667.6976185s req_ids:[8] +DEBUG 06-24 20:21:07 [manager.py:391] +ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:204.37121391296387ms total_cost_time:204.41675186157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9981 prompt_cache_len:5151 prompt_cache_ratio:0.5160805530507965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 +DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10858297348022461 s +INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.1105186939239502 s +DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=222506289079808776796965038722241532514, time:1750767667.9145157s req_ids:[8] +DEBUG 06-24 20:21:07 [manager.py:391] +ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:375.5908012390137ms total_cost_time:375.63633918762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9982 prompt_cache_len:5151 prompt_cache_ratio:0.5160288519334802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 +DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10915088653564453 s +INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.1110074520111084 s +DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=94483011821250945046934461359916865649, time:1750767668.288998s req_ids:[8] +DEBUG 06-24 20:21:08 [manager.py:391] +ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:203.04465293884277ms total_cost_time:203.09019088745117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9983 prompt_cache_len:5151 prompt_cache_ratio:0.5159771611739958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 +DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10854697227478027 s +INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.11046671867370605 s +DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=5297760170437189769376425518692500176, time:1750767668.5004077s req_ids:[8] +DEBUG 06-24 20:21:08 [manager.py:391] +ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:207.02123641967773ms total_cost_time:207.06605911254883ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9984 prompt_cache_len:5151 prompt_cache_ratio:0.5159254807692307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 +DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10822296142578125 s +INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.11028051376342773 s +DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=276812833751982453210791032269573598053, time:1750767668.7128441s req_ids:[8] +DEBUG 06-24 20:21:08 [manager.py:391] +ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:207.3996067047119ms total_cost_time:207.41605758666992ms,out_token_counter:1 mean_per_token_cost_time: 0.016450881958007812ms prompt_token_num:9985 prompt_cache_len:5151 prompt_cache_ratio:0.5158738107160741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 +DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10799598693847656 s +INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.11006879806518555 s +DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=39765604879132248614607728293641060354, time:1750767668.9272919s req_ids:[8] +DEBUG 06-24 20:21:08 [manager.py:391] +ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:211.12680435180664ms total_cost_time:211.16971969604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9986 prompt_cache_len:5151 prompt_cache_ratio:0.5158221510114159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 +DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10746955871582031 s +INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.10936903953552246 s +DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=294372783429016060194793841420548569083, time:1750767669.1445508s req_ids:[8] +DEBUG 06-24 20:21:09 [manager.py:391] +ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:207.17644691467285ms total_cost_time:207.22270011901855ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9987 prompt_cache_len:5151 prompt_cache_ratio:0.5157705016521478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 +DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10744261741638184 s +INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.1091310977935791 s +DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=140881631131160804137596483978217227902, time:1750767669.354957s req_ids:[8] +DEBUG 06-24 20:21:09 [manager.py:391] +ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:163.1300449371338ms total_cost_time:163.17367553710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9988 prompt_cache_len:5151 prompt_cache_ratio:0.5157188626351622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 +DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10692048072814941 s +INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.10885047912597656 s +DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=138661740903121516764081382344389267930, time:1750767669.5257733s req_ids:[8] +DEBUG 06-24 20:21:09 [manager.py:391] +ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:195.5397129058838ms total_cost_time:195.58215141296387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9989 prompt_cache_len:5151 prompt_cache_ratio:0.5156672339573531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 +DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10903811454772949 s +INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s +DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=166849256525591806119343712592361928164, time:1750767669.7309976s req_ids:[8] +DEBUG 06-24 20:21:09 [manager.py:391] +ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:363.4190559387207ms total_cost_time:363.4629249572754ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9990 prompt_cache_len:5151 prompt_cache_ratio:0.5156156156156156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 +DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10833311080932617 s +INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.1096644401550293 s +DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=57587758008193816251025902434036326038, time:1750767670.0981374s req_ids:[8] +DEBUG 06-24 20:21:10 [manager.py:391] +ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:204.8776149749756ms total_cost_time:204.91981506347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9991 prompt_cache_len:5151 prompt_cache_ratio:0.5155640076068462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 +DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:10 [batch.py:51] router release req id 8 +INFO 06-24 20:21:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10796904563903809 s +INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10923027992248535 s +DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=280835558985208009245880622555525650595, time:1750767670.3100107s req_ids:[8] +DEBUG 06-24 20:21:10 [manager.py:391] +ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:203.52697372436523ms total_cost_time:203.57155799865723ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9992 prompt_cache_len:5151 prompt_cache_ratio:0.5155124099279423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 +DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s +INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10964322090148926 s +DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=90450806788128003340951744083752884362, time:1750767670.520659s req_ids:[8] +DEBUG 06-24 20:21:10 [manager.py:391] +ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:205.32751083374023ms total_cost_time:205.3694725036621ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9993 prompt_cache_len:5151 prompt_cache_ratio:0.5154608225758031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 +DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10808920860290527 s +INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940265655517578 s +DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=180899965111927181935825818141597714707, time:1750767670.742921s req_ids:[8] +DEBUG 06-24 20:21:10 [manager.py:391] +ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:219.99502182006836ms total_cost_time:220.03912925720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9994 prompt_cache_len:5151 prompt_cache_ratio:0.5154092455473284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 +DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.1080024242401123 s +INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10934114456176758 s +DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=40104557225523290884687562116290541949, time:1750767670.958038s req_ids:[8] +DEBUG 06-24 20:21:10 [manager.py:391] +ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:208.94765853881836ms total_cost_time:208.99105072021484ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9995 prompt_cache_len:5151 prompt_cache_ratio:0.5153576788394197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 +DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10812520980834961 s +INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.10949373245239258 s +DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=241801547083469787472972769137870442853, time:1750767671.1746902s req_ids:[8] +DEBUG 06-24 20:21:11 [manager.py:391] +ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:207.68404006958008ms total_cost_time:207.72790908813477ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9996 prompt_cache_len:5151 prompt_cache_ratio:0.5153061224489796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 +DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10889649391174316 s +INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.11012411117553711 s +DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=58879665799645463204103263682261571893, time:1750767671.3969471s req_ids:[8] +DEBUG 06-24 20:21:11 [manager.py:391] +DEBUG 06-24 20:21:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 42536.888 tokens/s +DEBUG 06-24 20:21:11 [stats.py:37] Avg prompt tokens throughput: 42528.362 tokens/s +DEBUG 06-24 20:21:11 [stats.py:37] Avg generate tokens throughput: 8.526 tokens/s +ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:220.68381309509277ms total_cost_time:220.72625160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9997 prompt_cache_len:5151 prompt_cache_ratio:0.5152545763729118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 +DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10813021659851074 s +INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.10937356948852539 s +DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=38477199961946528073559778566850430395, time:1750767671.6136253s req_ids:[8] +DEBUG 06-24 20:21:11 [manager.py:391] +ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:373.7192153930664ms total_cost_time:373.762845993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9998 prompt_cache_len:5151 prompt_cache_ratio:0.5152030406081216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 +DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10929417610168457 s +INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.11058855056762695 s +DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=203684489208498424483161072921823719551, time:1750767671.9913633s req_ids:[8] +DEBUG 06-24 20:21:11 [manager.py:391] +ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:207.56864547729492ms total_cost_time:207.6106071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9999 prompt_cache_len:5151 prompt_cache_ratio:0.5151515151515151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 +DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.11094117164611816 s +INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.11201906204223633 s +DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=196829641810847984290401703276510273359, time:1750767672.2064517s req_ids:[8] +DEBUG 06-24 20:21:12 [manager.py:391] +ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:208.51469039916992ms total_cost_time:208.59551429748535ms,out_token_counter:1 mean_per_token_cost_time: 0.08082389831542969ms prompt_token_num:10000 prompt_cache_len:5151 prompt_cache_ratio:0.5151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 +DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.10743951797485352 s +INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.10855865478515625 s +DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=243736951514448719717173695058871981784, time:1750767672.4224358s req_ids:[8] +DEBUG 06-24 20:21:12 [manager.py:391] +DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:208.41240882873535ms total_cost_time:208.45556259155273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10001 prompt_cache_len:5151 prompt_cache_ratio:0.5150484951504849 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 +DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.10831999778747559 s +INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.10950946807861328 s +DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=207492440192782490309536909041136545524, time:1750767672.6367188s req_ids:[8] +DEBUG 06-24 20:21:12 [manager.py:391] +ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:207.8382968902588ms total_cost_time:207.88145065307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10002 prompt_cache_len:5151 prompt_cache_ratio:0.5149970005998801 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 +DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.10789656639099121 s +INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.10902714729309082 s +DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=190319853639039492489269438048347113176, time:1750767672.8516507s req_ids:[8] +DEBUG 06-24 20:21:12 [manager.py:391] +ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:209.66863632202148ms total_cost_time:209.72084999084473ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:10003 prompt_cache_len:5151 prompt_cache_ratio:0.5149455163450964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 +DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.10807418823242188 s +INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10923933982849121 s +DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=221056692862367080316260394023245105224, time:1750767673.0665367s req_ids:[8] +DEBUG 06-24 20:21:13 [manager.py:391] +ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:208.76836776733398ms total_cost_time:208.81104469299316ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10004 prompt_cache_len:5151 prompt_cache_ratio:0.5148940423830468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 +DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.1076042652130127 s +INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10877776145935059 s +DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=320674779231369677908385547073158256476, time:1750767673.2812364s req_ids:[8] +DEBUG 06-24 20:21:13 [manager.py:391] +ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:207.15618133544922ms total_cost_time:207.2000503540039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10005 prompt_cache_len:5151 prompt_cache_ratio:0.5148425787106446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 +DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.10855579376220703 s +INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10974597930908203 s +DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=33524976610381482095798127027695539967, time:1750767673.4932196s req_ids:[8] +DEBUG 06-24 20:21:13 [manager.py:391] +ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:371.32978439331055ms total_cost_time:371.37413024902344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10006 prompt_cache_len:5151 prompt_cache_ratio:0.5147911253248051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 +DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.10773015022277832 s +INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10886120796203613 s +DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=333394434092002072566714290280727377900, time:1750767673.8692977s req_ids:[8] +DEBUG 06-24 20:21:13 [manager.py:391] +ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:207.31043815612793ms total_cost_time:207.352876663208ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10007 prompt_cache_len:5151 prompt_cache_ratio:0.5147396822224443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 +DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.1076500415802002 s +INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10875654220581055 s +DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=309755671929120129278934526914585655267, time:1750767674.0825815s req_ids:[8] +DEBUG 06-24 20:21:14 [manager.py:391] +ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:209.74969863891602ms total_cost_time:209.794282913208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10008 prompt_cache_len:5151 prompt_cache_ratio:0.5146882494004796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 +DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10811305046081543 s +INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10917401313781738 s +DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=1713855337256672981006411168032889607, time:1750767674.2980914s req_ids:[8] +DEBUG 06-24 20:21:14 [manager.py:391] +ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:207.72957801818848ms total_cost_time:207.77130126953125ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10009 prompt_cache_len:5151 prompt_cache_ratio:0.5146368268558298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 +DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10766983032226562 s +INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10874605178833008 s +DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=55819940519552811489077764129527821583, time:1750767674.5118341s req_ids:[8] +DEBUG 06-24 20:21:14 [manager.py:391] +ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:208.18305015563965ms total_cost_time:208.22548866271973ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10010 prompt_cache_len:5151 prompt_cache_ratio:0.5145854145854146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 +DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10849499702453613 s +INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10955595970153809 s +DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=272493082653217517005898172507864264139, time:1750767674.7393186s req_ids:[8] +DEBUG 06-24 20:21:14 [manager.py:391] +ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:226.426362991333ms total_cost_time:226.4697551727295ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10011 prompt_cache_len:5151 prompt_cache_ratio:0.5145340125861553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 +DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10840082168579102 s +INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10949969291687012 s +DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=166510964683360023381071405971394095634, time:1750767674.959557s req_ids:[8] +DEBUG 06-24 20:21:14 [manager.py:391] +ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:210.55078506469727ms total_cost_time:210.5729579925537ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:10012 prompt_cache_len:5151 prompt_cache_ratio:0.514482620854974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 +DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:15 [manager.py:224] router recive req id 8 cost time 0.10811352729797363 s +INFO 06-24 20:21:15 [manager.py:68] detokenization recv req id 8 cost time 0.11001467704772949 s +DEBUG 06-24 20:21:15 [manager.py:391] Prefill Batch: batch_id=14913738956526192091614062670473285126, time:1750767675.1756477s req_ids:[8] +DEBUG 06-24 20:21:15 [manager.py:391] +ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:210.39891242980957ms total_cost_time:210.44373512268066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10013 prompt_cache_len:5151 prompt_cache_ratio:0.5144312393887945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 +DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:15 [manager.py:224] router recive req id 8 cost time 0.10783505439758301 s +INFO 06-24 20:21:15 [manager.py:68] detokenization recv req id 8 cost time 0.10967707633972168 s +DEBUG 06-24 20:21:15 [manager.py:391] Prefill Batch: batch_id=203186622563361582424386144580601664820, time:1750767675.3903215s req_ids:[8] +DEBUG 06-24 20:21:15 [manager.py:391] +ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:208.86921882629395ms total_cost_time:208.91523361206055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10014 prompt_cache_len:5151 prompt_cache_ratio:0.5143798681845416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 +DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:15 [manager.py:224] router recive req id 8 cost time 0.10798192024230957 s +INFO 06-24 20:21:15 [manager.py:68] detokenization recv req id 8 cost time 0.10978555679321289 s +DEBUG 06-24 20:21:15 [manager.py:391] Prefill Batch: batch_id=11419226073631827366246229797585728499, time:1750767675.6058543s req_ids:[8] +DEBUG 06-24 20:21:15 [manager.py:391] +ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:402.16970443725586ms total_cost_time:402.21309661865234ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10015 prompt_cache_len:5151 prompt_cache_ratio:0.5143285072391413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 +DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10824799537658691 s +INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.11034226417541504 s +DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=35136070204536014558746874922082147397, time:1750767676.012616s req_ids:[8] +DEBUG 06-24 20:21:16 [manager.py:391] +ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:203.40204238891602ms total_cost_time:203.44805717468262ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10016 prompt_cache_len:5151 prompt_cache_ratio:0.5142771565495208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 +DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10773706436157227 s +INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.10965132713317871 s +DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=56638152722488877609078004718479959184, time:1750767676.2218294s req_ids:[8] +DEBUG 06-24 20:21:16 [manager.py:391] +ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:202.8980255126953ms total_cost_time:202.9399871826172ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10017 prompt_cache_len:5151 prompt_cache_ratio:0.5142258161126085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 +DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10808801651000977 s +INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.10997247695922852 s +DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=11701099602097811231512969437967722286, time:1750767676.4320555s req_ids:[8] +DEBUG 06-24 20:21:16 [manager.py:391] +ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:204.85711097717285ms total_cost_time:204.90097999572754ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10018 prompt_cache_len:5151 prompt_cache_ratio:0.5141744859253344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 +DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10854387283325195 s +INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.11041522026062012 s +DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=182227443091019224901679317752640373229, time:1750767676.6429405s req_ids:[8] +DEBUG 06-24 20:21:16 [manager.py:391] +ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:206.84266090393066ms total_cost_time:206.88652992248535ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10019 prompt_cache_len:5151 prompt_cache_ratio:0.5141231659846291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 +DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.1093287467956543 s +INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.11126136779785156 s +DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=148329797919666168834807102624215072789, time:1750767676.8583493s req_ids:[8] +DEBUG 06-24 20:21:16 [manager.py:391] +ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:207.4108123779297ms total_cost_time:207.4570655822754ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10020 prompt_cache_len:5151 prompt_cache_ratio:0.5140718562874251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 +INFO 06-24 20:21:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.1085207462310791 s +INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.1104884147644043 s +DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=140381707006270261437390205709327570297, time:1750767677.0698934s req_ids:[8] +DEBUG 06-24 20:21:17 [manager.py:391] +ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:202.83174514770508ms total_cost_time:202.87585258483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10021 prompt_cache_len:5151 prompt_cache_ratio:0.5140205568306556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 +DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:17 [batch.py:51] router release req id 8 +INFO 06-24 20:21:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.10772323608398438 s +INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.10965847969055176 s +DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=279588767738352105954396921341654986589, time:1750767677.2788773s req_ids:[8] +DEBUG 06-24 20:21:17 [manager.py:391] +ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:205.56235313415527ms total_cost_time:205.60765266418457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10022 prompt_cache_len:5151 prompt_cache_ratio:0.5139692676112553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 +DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.10792398452758789 s +INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.10994768142700195 s +DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=83750187988892482383377707812776377455, time:1750767677.492415s req_ids:[8] +DEBUG 06-24 20:21:17 [manager.py:391] +ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:371.2608814239502ms total_cost_time:371.3047504425049ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10023 prompt_cache_len:5151 prompt_cache_ratio:0.5139179886261598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 +DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.10926437377929688 s +INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.11119294166564941 s +DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=97888943178495349940089458728766288825, time:1750767677.8678362s req_ids:[8] +DEBUG 06-24 20:21:17 [manager.py:391] +ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:204.0398120880127ms total_cost_time:204.08344268798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10024 prompt_cache_len:5151 prompt_cache_ratio:0.5138667198723065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 +DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.10876917839050293 s +INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.1107931137084961 s +DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=74609362827850303254627722039856694498, time:1750767678.0775187s req_ids:[8] +DEBUG 06-24 20:21:18 [manager.py:391] +ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:204.6823501586914ms total_cost_time:204.72478866577148ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10025 prompt_cache_len:5151 prompt_cache_ratio:0.5138154613466334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 +DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s +INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s +DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=197676765076988470015547424773743435579, time:1750767678.288581s req_ids:[8] +DEBUG 06-24 20:21:18 [manager.py:391] +DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:206.8500518798828ms total_cost_time:206.8920135498047ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10026 prompt_cache_len:5151 prompt_cache_ratio:0.5137642130460802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 +DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.10864424705505371 s +INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.11062765121459961 s +DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=218786671522666401582876905405076478179, time:1750767678.503636s req_ids:[8] +DEBUG 06-24 20:21:18 [manager.py:391] +ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:209.27190780639648ms total_cost_time:209.3186378479004ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:10027 prompt_cache_len:5151 prompt_cache_ratio:0.5137129749675875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 +DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.1094670295715332 s +INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.11155986785888672 s +DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=156214872504664219314625755335517720760, time:1750767678.7193499s req_ids:[8] +DEBUG 06-24 20:21:18 [manager.py:391] +ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:209.85102653503418ms total_cost_time:209.89489555358887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10028 prompt_cache_len:5151 prompt_cache_ratio:0.5136617471080973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 +DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.1086277961730957 s +INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.11045622825622559 s +DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=101983033354726193911074334637439386235, time:1750767678.9340477s req_ids:[8] +DEBUG 06-24 20:21:18 [manager.py:391] +ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:209.42044258117676ms total_cost_time:209.46669578552246ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10029 prompt_cache_len:5151 prompt_cache_ratio:0.5136105294645528 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 +DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:19 [manager.py:224] router recive req id 8 cost time 0.10811114311218262 s +INFO 06-24 20:21:19 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s +DEBUG 06-24 20:21:19 [manager.py:391] Prefill Batch: batch_id=46514375001138887762635820552688981950, time:1750767679.1505723s req_ids:[8] +DEBUG 06-24 20:21:19 [manager.py:391] +ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:212.2490406036377ms total_cost_time:212.3098373413086ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:10030 prompt_cache_len:5151 prompt_cache_ratio:0.5135593220338983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 +DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:19 [manager.py:224] router recive req id 8 cost time 0.31372690200805664 s +INFO 06-24 20:21:19 [manager.py:68] detokenization recv req id 8 cost time 0.31608033180236816 s +DEBUG 06-24 20:21:19 [manager.py:391] Prefill Batch: batch_id=300772357665255402163766131268156828510, time:1750767679.576192s req_ids:[8] +DEBUG 06-24 20:21:19 [manager.py:391] +ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:420.94874382019043ms total_cost_time:420.9935665130615ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10031 prompt_cache_len:5151 prompt_cache_ratio:0.5135081248130795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 +DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:19 [manager.py:224] router recive req id 8 cost time 0.10795783996582031 s +INFO 06-24 20:21:19 [manager.py:68] detokenization recv req id 8 cost time 0.10981202125549316 s +DEBUG 06-24 20:21:19 [manager.py:391] Prefill Batch: batch_id=214689524642129865984303445807051624990, time:1750767679.7948742s req_ids:[8] +DEBUG 06-24 20:21:19 [manager.py:391] +DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:211.0157012939453ms total_cost_time:211.0612392425537ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10032 prompt_cache_len:5151 prompt_cache_ratio:0.5134569377990431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 +DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.1106102466583252 s +INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11255002021789551 s +DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=60845672961888697452006653917860628399, time:1750767680.0109658s req_ids:[8] +DEBUG 06-24 20:21:20 [manager.py:391] +ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:207.61632919311523ms total_cost_time:207.66091346740723ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10033 prompt_cache_len:5151 prompt_cache_ratio:0.5134057609887371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 +DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.11033987998962402 s +INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11236858367919922 s +DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=274418926839260882035796834976540417292, time:1750767680.2266855s req_ids:[8] +DEBUG 06-24 20:21:20 [manager.py:391] +ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:206.99286460876465ms total_cost_time:207.03816413879395ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10034 prompt_cache_len:5151 prompt_cache_ratio:0.513354594379111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 +DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.10864782333374023 s +INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11064577102661133 s +DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=180548011243471030438661503036389484699, time:1750767680.437794s req_ids:[8] +DEBUG 06-24 20:21:20 [manager.py:391] +ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:202.50535011291504ms total_cost_time:202.57258415222168ms,out_token_counter:1 mean_per_token_cost_time: 0.06723403930664062ms prompt_token_num:10035 prompt_cache_len:5151 prompt_cache_ratio:0.5133034379671151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 +DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.10869574546813965 s +INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11046695709228516 s +DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=200486883765685384610084020189093497944, time:1750767680.6474435s req_ids:[8] +DEBUG 06-24 20:21:20 [manager.py:391] +ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:166.26310348510742ms total_cost_time:166.3055419921875ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10036 prompt_cache_len:5151 prompt_cache_ratio:0.5132522917497011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 +DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:20 [batch.py:51] router release req id 8 +INFO 06-24 20:21:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.10906791687011719 s +INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.1109163761138916 s +DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=294965701930188687970334612481347691767, time:1750767680.8197846s req_ids:[8] +DEBUG 06-24 20:21:20 [manager.py:391] +ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:205.6879997253418ms total_cost_time:205.72876930236816ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10037 prompt_cache_len:5151 prompt_cache_ratio:0.5132011557238219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 +DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.10898303985595703 s +INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.11095356941223145 s +DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=100659100704734653746762311064242140258, time:1750767681.0301135s req_ids:[8] +DEBUG 06-24 20:21:21 [manager.py:391] +ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:376.8141269683838ms total_cost_time:376.87230110168457ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:10038 prompt_cache_len:5151 prompt_cache_ratio:0.5131500298864315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 +DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:21 [batch.py:51] router release req id 8 +DEBUG 06-24 20:21:21 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:21 [manager.py:283] +DEBUG 06-24 20:21:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:21 [manager.py:284] +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.11017560958862305 s +INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.1121528148651123 s +DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=35295932470831007959154153158677129875, time:1750767681.4098794s req_ids:[8] +DEBUG 06-24 20:21:21 [manager.py:391] +DEBUG 06-24 20:21:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 42031.247 tokens/s +DEBUG 06-24 20:21:21 [stats.py:37] Avg prompt tokens throughput: 42022.858 tokens/s +DEBUG 06-24 20:21:21 [stats.py:37] Avg generate tokens throughput: 8.389 tokens/s +ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:202.7261257171631ms total_cost_time:202.77118682861328ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10039 prompt_cache_len:5151 prompt_cache_ratio:0.5130989142344855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 +DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.10851335525512695 s +INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.11042237281799316 s +DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=265116536251123485920688320572071467933, time:1750767681.624575s req_ids:[8] +DEBUG 06-24 20:21:21 [manager.py:391] +ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:209.29861068725586ms total_cost_time:209.34176445007324ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10040 prompt_cache_len:5151 prompt_cache_ratio:0.5130478087649403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 +DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.1091604232788086 s +INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.11104679107666016 s +DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=100863527701970576721803157899893907785, time:1750767681.8394673s req_ids:[8] +DEBUG 06-24 20:21:21 [manager.py:391] +ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:209.29670333862305ms total_cost_time:209.33938026428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10041 prompt_cache_len:5151 prompt_cache_ratio:0.5129967134747535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 +DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10757780075073242 s +INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.10947084426879883 s +DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=40544905080563988853088973662104220682, time:1750767682.0554178s req_ids:[8] +DEBUG 06-24 20:21:22 [manager.py:391] +ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:210.2034091949463ms total_cost_time:210.24799346923828ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10042 prompt_cache_len:5151 prompt_cache_ratio:0.5129456283608843 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 +DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10867619514465332 s +INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.11047697067260742 s +DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=184777596034010017581833146526827337886, time:1750767682.2715046s req_ids:[8] +DEBUG 06-24 20:21:22 [manager.py:391] +ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:209.60068702697754ms total_cost_time:209.64503288269043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10043 prompt_cache_len:5151 prompt_cache_ratio:0.5128945534202928 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 +DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10893654823303223 s +INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.11083602905273438 s +DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=4247196397283856712166793552981517271, time:1750767682.4868393s req_ids:[8] +DEBUG 06-24 20:21:22 [manager.py:391] +ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:210.56222915649414ms total_cost_time:210.60585975646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10044 prompt_cache_len:5151 prompt_cache_ratio:0.5128434886499402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 +DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.1079404354095459 s +INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.1097254753112793 s +DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=322491184833666962516974785292692919732, time:1750767682.70236s req_ids:[8] +DEBUG 06-24 20:21:22 [manager.py:391] +ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:209.8088264465332ms total_cost_time:209.8519802093506ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10045 prompt_cache_len:5151 prompt_cache_ratio:0.5127924340467894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 +DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10824751853942871 s +INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.11027884483337402 s +DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=299665894762378377012958077671039109994, time:1750767682.9185252s req_ids:[8] +DEBUG 06-24 20:21:22 [manager.py:391] +ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:210.37888526916504ms total_cost_time:210.42394638061523ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10046 prompt_cache_len:5151 prompt_cache_ratio:0.5127413896078041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 +DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.10821390151977539 s +INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11014485359191895 s +DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=287179751297200290519753863543706986456, time:1750767683.1344461s req_ids:[8] +DEBUG 06-24 20:21:23 [manager.py:391] +INFO 06-24 20:21:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:21:23 [statics_utils.py:24] mean first cost: 228.526973546725 ms +INFO 06-24 20:21:23 [statics_utils.py:24] mean per token cost: 0.06921560093324891 ms +ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:380.9094429016113ms total_cost_time:380.9523582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10047 prompt_cache_len:5151 prompt_cache_ratio:0.5126903553299492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 +DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.10894441604614258 s +INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11083984375 s +DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=303547777474213759955366294495700183861, time:1750767683.5184271s req_ids:[8] +DEBUG 06-24 20:21:23 [manager.py:391] +ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:207.66949653625488ms total_cost_time:207.71360397338867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10048 prompt_cache_len:5151 prompt_cache_ratio:0.5126393312101911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 +DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.11051583290100098 s +INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11236310005187988 s +DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=174089199723783062845283306768373385238, time:1750767683.7348313s req_ids:[8] +DEBUG 06-24 20:21:23 [manager.py:391] +ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:171.39816284179688ms total_cost_time:171.44536972045898ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:10049 prompt_cache_len:5151 prompt_cache_ratio:0.512588317245497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 +DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.11118006706237793 s +INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11280369758605957 s +DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=277400912700728182880664065744754812631, time:1750767683.9107313s req_ids:[8] +DEBUG 06-24 20:21:23 [manager.py:391] +ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:165.37165641784668ms total_cost_time:165.41600227355957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10050 prompt_cache_len:5151 prompt_cache_ratio:0.5125373134328358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 +DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10824370384216309 s +INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11016845703125 s +DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=281318903391158202926042078210960709111, time:1750767684.0818222s req_ids:[8] +DEBUG 06-24 20:21:24 [manager.py:391] +ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:195.3134536743164ms total_cost_time:195.3575611114502ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10051 prompt_cache_len:5151 prompt_cache_ratio:0.5124863197691772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 +DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:24 [batch.py:51] router release req id 8 +INFO 06-24 20:21:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10954117774963379 s +INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11153411865234375 s +DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=336596084924390563913965478354008531509, time:1750767684.2821875s req_ids:[8] +DEBUG 06-24 20:21:24 [manager.py:391] +ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:205.11269569396973ms total_cost_time:205.15680313110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10052 prompt_cache_len:5151 prompt_cache_ratio:0.5124353362514923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 +DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s +INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11065840721130371 s +DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=139117914993178353133929287899439663027, time:1750767684.4962888s req_ids:[8] +DEBUG 06-24 20:21:24 [manager.py:391] +ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:210.78205108642578ms total_cost_time:210.82353591918945ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10053 prompt_cache_len:5151 prompt_cache_ratio:0.5123843628767533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 +DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10919833183288574 s +INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11106109619140625 s +DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=160088530373999995126289933252483621141, time:1750767684.710222s req_ids:[8] +DEBUG 06-24 20:21:24 [manager.py:391] +ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:208.09292793273926ms total_cost_time:208.14871788024902ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:10054 prompt_cache_len:5151 prompt_cache_ratio:0.5123333996419336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 +DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10789966583251953 s +INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11001157760620117 s +DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=176572674145125742239267296485982782640, time:1750767684.9253588s req_ids:[8] +DEBUG 06-24 20:21:24 [manager.py:391] +ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:386.19279861450195ms total_cost_time:386.25192642211914ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:10055 prompt_cache_len:5151 prompt_cache_ratio:0.512282446544008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 +DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.1101226806640625 s +INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.11214423179626465 s +DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=299224584559672665683965250736448623562, time:1750767685.3151038s req_ids:[8] +DEBUG 06-24 20:21:25 [manager.py:391] +ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:196.16174697875977ms total_cost_time:196.1979866027832ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:10056 prompt_cache_len:5151 prompt_cache_ratio:0.5122315035799523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 +DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.10843014717102051 s +INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.11045479774475098 s +DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=79424366191923499171041814660667259058, time:1750767685.520111s req_ids:[8] +DEBUG 06-24 20:21:25 [manager.py:391] +DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:207.275390625ms total_cost_time:207.31878280639648ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10057 prompt_cache_len:5151 prompt_cache_ratio:0.5121805707467436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 +DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.10790133476257324 s +INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.10976362228393555 s +DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=214631679299171028309678258331186956528, time:1750767685.7337265s req_ids:[8] +DEBUG 06-24 20:21:25 [manager.py:391] +ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:208.49347114562988ms total_cost_time:208.53829383850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10058 prompt_cache_len:5151 prompt_cache_ratio:0.5121296480413601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 +DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.10805559158325195 s +INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.11007952690124512 s +DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=179346366814783116467619421880454085909, time:1750767685.949073s req_ids:[8] +DEBUG 06-24 20:21:25 [manager.py:391] +ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:208.76574516296387ms total_cost_time:208.80842208862305ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10059 prompt_cache_len:5151 prompt_cache_ratio:0.5120787354607814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 +DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.10963153839111328 s +INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11166167259216309 s +DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=194597401526203248726829121736755434946, time:1750767686.163391s req_ids:[8] +DEBUG 06-24 20:21:26 [manager.py:391] +ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:208.7993621826172ms total_cost_time:208.84299278259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10060 prompt_cache_len:5151 prompt_cache_ratio:0.5120278330019881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 +DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.10854601860046387 s +INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11063599586486816 s +DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=297298980173384097366042720721692644236, time:1750767686.3796s req_ids:[8] +DEBUG 06-24 20:21:26 [manager.py:391] +ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:210.69884300231934ms total_cost_time:210.74438095092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10061 prompt_cache_len:5151 prompt_cache_ratio:0.511976940661962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 +DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.10895895957946777 s +INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11092448234558105 s +DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=92706566194223126333068329853782741279, time:1750767686.5944939s req_ids:[8] +DEBUG 06-24 20:21:26 [manager.py:391] +ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:206.6483497619629ms total_cost_time:206.6938877105713ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10062 prompt_cache_len:5151 prompt_cache_ratio:0.5119260584376863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 +DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.1085212230682373 s +INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11055326461791992 s +DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=17967257669469969246100229176614469242, time:1750767686.8067243s req_ids:[8] +DEBUG 06-24 20:21:26 [manager.py:391] +ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:376.48677825927734ms total_cost_time:376.53064727783203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10063 prompt_cache_len:5151 prompt_cache_ratio:0.5118751863261453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 +DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.1085057258605957 s +INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.11059427261352539 s +DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=7200054131084324765188545611968458852, time:1750767687.1868775s req_ids:[8] +DEBUG 06-24 20:21:27 [manager.py:391] +ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:205.93714714050293ms total_cost_time:205.98220825195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10064 prompt_cache_len:5151 prompt_cache_ratio:0.5118243243243243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 +DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.11086678504943848 s +INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.11277651786804199 s +DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=37203945455403723882523268096833036239, time:1750767687.4026315s req_ids:[8] +DEBUG 06-24 20:21:27 [manager.py:391] +ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:204.99014854431152ms total_cost_time:205.0340175628662ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10065 prompt_cache_len:5151 prompt_cache_ratio:0.5117734724292101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 +DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.10983061790466309 s +INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.1116647720336914 s +DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=120232757590422401504228173982753513999, time:1750767687.6123137s req_ids:[8] +DEBUG 06-24 20:21:27 [manager.py:391] +ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:207.18884468078613ms total_cost_time:207.23199844360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10066 prompt_cache_len:5151 prompt_cache_ratio:0.5117226306377906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 +DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:27 [batch.py:51] router release req id 8 +INFO 06-24 20:21:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.10816669464111328 s +INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.11012578010559082 s +DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=131606207316451692458795530323234626549, time:1750767687.824804s req_ids:[8] +DEBUG 06-24 20:21:27 [manager.py:391] +ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:206.53820037841797ms total_cost_time:206.58135414123535ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10067 prompt_cache_len:5151 prompt_cache_ratio:0.5116717989470547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 +DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.10899972915649414 s +INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11099386215209961 s +DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=234026135713167960366335546678673767083, time:1750767688.0524561s req_ids:[8] +DEBUG 06-24 20:21:28 [manager.py:391] +ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:225.94308853149414ms total_cost_time:225.98886489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10068 prompt_cache_len:5151 prompt_cache_ratio:0.5116209773539928 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 +DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.10869646072387695 s +INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11069583892822266 s +DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=304959298521922266918311050979615680493, time:1750767688.2713532s req_ids:[8] +DEBUG 06-24 20:21:28 [manager.py:391] +ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:208.8947296142578ms total_cost_time:208.9536190032959ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:10069 prompt_cache_len:5151 prompt_cache_ratio:0.5115701658555963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 +DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.10811448097229004 s +INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11007261276245117 s +DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=112585559579808366174336296295767711100, time:1750767688.4867325s req_ids:[8] +DEBUG 06-24 20:21:28 [manager.py:391] +ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:209.60283279418945ms total_cost_time:209.64622497558594ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10070 prompt_cache_len:5151 prompt_cache_ratio:0.511519364448858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 +DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.1086876392364502 s +INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11067390441894531 s +DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=286316830919673610697039027217866780400, time:1750767688.7025733s req_ids:[8] +DEBUG 06-24 20:21:28 [manager.py:391] +ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:375.4258155822754ms total_cost_time:375.46825408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10071 prompt_cache_len:5151 prompt_cache_ratio:0.5114685731307715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 +DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10812616348266602 s +INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.11012005805969238 s +DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=92891636877492041921651033582298098354, time:1750767689.0808687s req_ids:[8] +DEBUG 06-24 20:21:29 [manager.py:391] +ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:203.28998565673828ms total_cost_time:203.33290100097656ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10072 prompt_cache_len:5151 prompt_cache_ratio:0.511417791898332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 +DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10799360275268555 s +INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005735397338867 s +DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=165887855038438303852356340589238204514, time:1750767689.2915168s req_ids:[8] +DEBUG 06-24 20:21:29 [manager.py:391] +ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:205.6889533996582ms total_cost_time:205.7335376739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10073 prompt_cache_len:5151 prompt_cache_ratio:0.5113670207485357 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 +DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10788702964782715 s +INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.10997152328491211 s +DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=244282371890683699895524948660143192664, time:1750767689.5037596s req_ids:[8] +DEBUG 06-24 20:21:29 [manager.py:391] +ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:207.15618133544922ms total_cost_time:207.2012424468994ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10074 prompt_cache_len:5151 prompt_cache_ratio:0.51131625967838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 +DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.11013126373291016 s +INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.11208105087280273 s +DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=122056371073381441588836800094874385254, time:1750767689.724118s req_ids:[8] +DEBUG 06-24 20:21:29 [manager.py:391] +ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:217.7290916442871ms total_cost_time:217.7717685699463ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10075 prompt_cache_len:5151 prompt_cache_ratio:0.5112655086848635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 +DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10781073570251465 s +INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.10978960990905762 s +DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=6315164233497349545243221144006092776, time:1750767689.9410348s req_ids:[8] +DEBUG 06-24 20:21:29 [manager.py:391] +ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:205.98244667053223ms total_cost_time:206.0403823852539ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:10076 prompt_cache_len:5151 prompt_cache_ratio:0.5112147677649861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 +DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.11011505126953125 s +INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.11220383644104004 s +DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=111518823631385236392230933709093442634, time:1750767690.1507657s req_ids:[8] +DEBUG 06-24 20:21:30 [manager.py:391] +ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:205.3365707397461ms total_cost_time:205.39569854736328ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:10077 prompt_cache_len:5151 prompt_cache_ratio:0.5111640369157487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 +DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.1114809513092041 s +INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.11359524726867676 s +DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=298061475193586900689244199838334770632, time:1750767690.3747528s req_ids:[8] +DEBUG 06-24 20:21:30 [manager.py:391] +ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:218.92857551574707ms total_cost_time:218.97149085998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10078 prompt_cache_len:5151 prompt_cache_ratio:0.5111133161341536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 +DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.1082923412322998 s +INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.11012983322143555 s +DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=331716534586550348579023054532073534859, time:1750767690.5911942s req_ids:[8] +DEBUG 06-24 20:21:30 [manager.py:391] +ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:208.8465690612793ms total_cost_time:208.8906764984131ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10079 prompt_cache_len:5151 prompt_cache_ratio:0.511062605417204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 +DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.10916876792907715 s +INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.1109468936920166 s +DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=61858417265926975758481934947827921843, time:1750767690.8173528s req_ids:[8] +DEBUG 06-24 20:21:30 [manager.py:391] +ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:375.5214214324951ms total_cost_time:375.5674362182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10080 prompt_cache_len:5151 prompt_cache_ratio:0.5110119047619047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 +DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.10855698585510254 s +INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.11070060729980469 s +DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=79888184635082909339674954571069002676, time:1750767691.1846051s req_ids:[8] +DEBUG 06-24 20:21:31 [manager.py:391] +ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:188.67230415344238ms total_cost_time:188.71712684631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10081 prompt_cache_len:5151 prompt_cache_ratio:0.5109612141652614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 +DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.10771369934082031 s +INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.10967731475830078 s +DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=300508760914918063409485340260319963028, time:1750767691.3840997s req_ids:[8] +DEBUG 06-24 20:21:31 [manager.py:391] +DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:21:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 43014.791 tokens/s +DEBUG 06-24 20:21:31 [stats.py:37] Avg prompt tokens throughput: 43006.143 tokens/s +DEBUG 06-24 20:21:31 [stats.py:37] Avg generate tokens throughput: 8.648 tokens/s +INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:206.72369003295898ms total_cost_time:206.76612854003906ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10082 prompt_cache_len:5151 prompt_cache_ratio:0.5109105336242808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 +DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.1087794303894043 s +INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.11094975471496582 s +DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=170469209074526881710686611792377875569, time:1750767691.592726s req_ids:[8] +DEBUG 06-24 20:21:31 [manager.py:391] +ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:205.44815063476562ms total_cost_time:205.4905891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10083 prompt_cache_len:5151 prompt_cache_ratio:0.5108598631359714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 +DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.1087334156036377 s +INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.11064434051513672 s +DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=129629005656283229777702711253780644293, time:1750767691.8045309s req_ids:[8] +DEBUG 06-24 20:21:31 [manager.py:391] +ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:206.21132850646973ms total_cost_time:206.2525749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10084 prompt_cache_len:5151 prompt_cache_ratio:0.5108092026973423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 +DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10755228996276855 s +INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.10948061943054199 s +DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=143642637379412976413601307716440487873, time:1750767692.019238s req_ids:[8] +DEBUG 06-24 20:21:32 [manager.py:391] +ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:210.85238456726074ms total_cost_time:210.89744567871094ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10085 prompt_cache_len:5151 prompt_cache_ratio:0.510758552305404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 +DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10807371139526367 s +INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s +DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=256154671319462269841832064768910285416, time:1750767692.2348306s req_ids:[8] +DEBUG 06-24 20:21:32 [manager.py:391] +ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:210.4175090789795ms total_cost_time:210.46161651611328ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10086 prompt_cache_len:5151 prompt_cache_ratio:0.5107079119571684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 +DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10767865180969238 s +INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.10959863662719727 s +DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=118886874068040384157817845070743212987, time:1750767692.4525623s req_ids:[8] +DEBUG 06-24 20:21:32 [manager.py:391] +ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:207.0333957672119ms total_cost_time:207.0760726928711ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10087 prompt_cache_len:5151 prompt_cache_ratio:0.5106572816496481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 +DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10852527618408203 s +INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.11058855056762695 s +DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=335306741314306037779474740091012797355, time:1750767692.663629s req_ids:[8] +DEBUG 06-24 20:21:32 [manager.py:391] +ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:365.87023735046387ms total_cost_time:365.91649055480957ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10088 prompt_cache_len:5151 prompt_cache_ratio:0.5106066613798572 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 +DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10902142524719238 s +INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.11098265647888184 s +DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=264820560990082970572277477303144795442, time:1750767693.043305s req_ids:[8] +DEBUG 06-24 20:21:33 [manager.py:391] +ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:219.15006637573242ms total_cost_time:219.1929817199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10089 prompt_cache_len:5151 prompt_cache_ratio:0.5105560511448112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 +DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s +INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.10983848571777344 s +DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=130701378259747965856741412539277597082, time:1750767693.2675397s req_ids:[8] +DEBUG 06-24 20:21:33 [manager.py:391] +ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:218.78504753112793ms total_cost_time:218.82939338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10090 prompt_cache_len:5151 prompt_cache_ratio:0.5105054509415262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 +DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10799241065979004 s +INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.10991406440734863 s +DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=279428142944436910302240759853843510097, time:1750767693.4851456s req_ids:[8] +DEBUG 06-24 20:21:33 [manager.py:391] +ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:207.747220993042ms total_cost_time:207.79156684875488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10091 prompt_cache_len:5151 prompt_cache_ratio:0.5104548607670201 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 +DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.1083364486694336 s +INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.11035346984863281 s +DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=113374117859218219784320365427376479668, time:1750767693.7000391s req_ids:[8] +DEBUG 06-24 20:21:33 [manager.py:391] +ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:206.72106742858887ms total_cost_time:206.78210258483887ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10092 prompt_cache_len:5151 prompt_cache_ratio:0.5104042806183116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 +DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10768485069274902 s +INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.1097266674041748 s +DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=191613240756096007719658622299793828887, time:1750767693.924504s req_ids:[8] +DEBUG 06-24 20:21:33 [manager.py:391] +ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:221.25935554504395ms total_cost_time:221.30203247070312ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10093 prompt_cache_len:5151 prompt_cache_ratio:0.5103537104924205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 +DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10917091369628906 s +INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.11127543449401855 s +DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=319277156119376248216460916082578855936, time:1750767694.1404011s req_ids:[8] +DEBUG 06-24 20:21:34 [manager.py:391] +ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:206.59542083740234ms total_cost_time:206.63857460021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10094 prompt_cache_len:5151 prompt_cache_ratio:0.5103031503863681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 +DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s +INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.11107373237609863 s +DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=4135892073947698169525723417846813570, time:1750767694.3539965s req_ids:[8] +DEBUG 06-24 20:21:34 [manager.py:391] +ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:207.28826522827148ms total_cost_time:207.33237266540527ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10095 prompt_cache_len:5151 prompt_cache_ratio:0.5102526002971768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 +DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10818719863891602 s +INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.10989260673522949 s +DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=226497570214325670989996984532858612195, time:1750767694.5646572s req_ids:[8] +DEBUG 06-24 20:21:34 [manager.py:391] +ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:363.5573387145996ms total_cost_time:363.6033535003662ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10096 prompt_cache_len:5151 prompt_cache_ratio:0.5102020602218701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 +DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10860300064086914 s +INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.11040282249450684 s +DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=14935135176430777046342855340645245349, time:1750767694.9402506s req_ids:[8] +DEBUG 06-24 20:21:34 [manager.py:391] +ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:212.3713493347168ms total_cost_time:212.4154567718506ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10097 prompt_cache_len:5151 prompt_cache_ratio:0.5101515301574725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 +DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10794782638549805 s +INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.10952281951904297 s +DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=282820079747458981836030076136828667165, time:1750767695.152686s req_ids:[8] +DEBUG 06-24 20:21:35 [manager.py:391] +ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:164.48378562927246ms total_cost_time:164.52503204345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10098 prompt_cache_len:5151 prompt_cache_ratio:0.51010101010101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 +DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10914421081542969 s +INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s +DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=201336936424233267674965533192099887897, time:1750767695.3232732s req_ids:[8] +DEBUG 06-24 20:21:35 [manager.py:391] +ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:197.4184513092041ms total_cost_time:197.4625587463379ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10099 prompt_cache_len:5151 prompt_cache_ratio:0.5100505000495098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 +DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10861968994140625 s +INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11057329177856445 s +DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=309803038197483354678558607182292816702, time:1750767695.5274568s req_ids:[8] +DEBUG 06-24 20:21:35 [manager.py:391] +ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:206.80713653564453ms total_cost_time:206.85219764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10100 prompt_cache_len:5151 prompt_cache_ratio:0.51 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 +DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10872507095336914 s +INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s +DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=157645074527919689235193345428391882414, time:1750767695.7425334s req_ids:[8] +DEBUG 06-24 20:21:35 [manager.py:391] +ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:210.42728424072266ms total_cost_time:210.47186851501465ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10101 prompt_cache_len:5151 prompt_cache_ratio:0.5099495099495099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 +DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10873723030090332 s +INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11066341400146484 s +DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=225060325160732444418554665517828245520, time:1750767695.958877s req_ids:[8] +DEBUG 06-24 20:21:35 [manager.py:391] +ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:207.55434036254883ms total_cost_time:207.59892463684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10102 prompt_cache_len:5151 prompt_cache_ratio:0.5098990298950703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 +DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.10800909996032715 s +INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.10981440544128418 s +DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=190329637547370070365289770444499947269, time:1750767696.1688066s req_ids:[8] +DEBUG 06-24 20:21:36 [manager.py:391] +ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:199.2652416229248ms total_cost_time:199.3091106414795ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10103 prompt_cache_len:5151 prompt_cache_ratio:0.5098485598337128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 +DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.108428955078125 s +INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s +DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=59374709559217071290271197130847305544, time:1750767696.3755941s req_ids:[8] +DEBUG 06-24 20:21:36 [manager.py:391] +ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:361.00292205810547ms total_cost_time:361.04822158813477ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10104 prompt_cache_len:5151 prompt_cache_ratio:0.5097980997624703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 +DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.11113739013671875 s +INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.1130683422088623 s +DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=261850602028187585112047889552634727812, time:1750767696.7415636s req_ids:[8] +DEBUG 06-24 20:21:36 [manager.py:391] +ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:207.12995529174805ms total_cost_time:207.17263221740723ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10105 prompt_cache_len:5151 prompt_cache_ratio:0.509747649678377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 +DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.10809755325317383 s +INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.11001253128051758 s +DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=205001415949656960870601665210233184403, time:1750767696.9557235s req_ids:[8] +DEBUG 06-24 20:21:36 [manager.py:391] +ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:207.65328407287598ms total_cost_time:207.71121978759766ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:10106 prompt_cache_len:5151 prompt_cache_ratio:0.5096972095784682 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 +DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10825848579406738 s +INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.11018800735473633 s +DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=230390370823724051593308616579455274792, time:1750767697.1711361s req_ids:[8] +DEBUG 06-24 20:21:37 [manager.py:391] +DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:208.28747749328613ms total_cost_time:208.33063125610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10107 prompt_cache_len:5151 prompt_cache_ratio:0.5096467794597803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 +DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10770845413208008 s +INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.10963964462280273 s +DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=296049167846621257719703681367170025270, time:1750767697.3843527s req_ids:[8] +DEBUG 06-24 20:21:37 [manager.py:391] +ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:207.1223258972168ms total_cost_time:207.16571807861328ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10108 prompt_cache_len:5151 prompt_cache_ratio:0.509596359319351 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 +DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10924506187438965 s +INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.11127281188964844 s +DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=95441286448768445164103536021692416739, time:1750767697.6047597s req_ids:[8] +DEBUG 06-24 20:21:37 [manager.py:391] +ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:214.36476707458496ms total_cost_time:214.40911293029785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10109 prompt_cache_len:5151 prompt_cache_ratio:0.509545949154219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 +DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10805630683898926 s +INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.1099863052368164 s +DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=36605886147250795582364140084630623331, time:1750767697.818904s req_ids:[8] +DEBUG 06-24 20:21:37 [manager.py:391] +ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:209.46955680847168ms total_cost_time:209.51461791992188ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10110 prompt_cache_len:5151 prompt_cache_ratio:0.5094955489614243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 +DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.10912632942199707 s +INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.11100482940673828 s +DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=197695263134732850504251537393565290994, time:1750767698.033769s req_ids:[8] +DEBUG 06-24 20:21:38 [manager.py:391] +ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:366.4257526397705ms total_cost_time:366.4698600769043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10111 prompt_cache_len:5151 prompt_cache_ratio:0.5094451587380081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 +DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:38 [batch.py:51] router release req id 8 +INFO 06-24 20:21:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.11111092567443848 s +INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.11310458183288574 s +DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=283934971650228186582675623401445386604, time:1750767698.4041662s req_ids:[8] +DEBUG 06-24 20:21:38 [manager.py:391] +ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:206.36606216430664ms total_cost_time:206.41112327575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10112 prompt_cache_len:5151 prompt_cache_ratio:0.5093947784810127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 +DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.10784554481506348 s +INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.1097111701965332 s +DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=164449744627675095512392983723759750336, time:1750767698.6191638s req_ids:[8] +DEBUG 06-24 20:21:38 [manager.py:391] +ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:206.3119411468506ms total_cost_time:206.35437965393066ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10113 prompt_cache_len:5151 prompt_cache_ratio:0.5093444081874815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 +DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.10743045806884766 s +INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.10926127433776855 s +DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=288633960513105798776415814023379386339, time:1750767698.830983s req_ids:[8] +DEBUG 06-24 20:21:38 [manager.py:391] +ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:205.4903507232666ms total_cost_time:205.5344581604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10114 prompt_cache_len:5151 prompt_cache_ratio:0.5092940478544592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 +DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10814619064331055 s +INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.10994601249694824 s +DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=173706481182719942940021081714847040786, time:1750767699.0430298s req_ids:[8] +DEBUG 06-24 20:21:39 [manager.py:391] +ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:207.26728439331055ms total_cost_time:207.30948448181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10115 prompt_cache_len:5151 prompt_cache_ratio:0.5092436974789916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 +DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10780930519104004 s +INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.10912609100341797 s +DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=332655904589645261763248773447313795386, time:1750767699.2549374s req_ids:[8] +DEBUG 06-24 20:21:39 [manager.py:391] +ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:205.71255683898926ms total_cost_time:205.75690269470215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10116 prompt_cache_len:5151 prompt_cache_ratio:0.5091933570581257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 +DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10863685607910156 s +INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.1106569766998291 s +DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=325104750316030565870346665723654718032, time:1750767699.4659076s req_ids:[8] +DEBUG 06-24 20:21:39 [manager.py:391] +ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:204.43224906921387ms total_cost_time:204.47421073913574ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10117 prompt_cache_len:5151 prompt_cache_ratio:0.5091430265889098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 +DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10936379432678223 s +INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.11110782623291016 s +DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=314890534290048280687948159627939184885, time:1750767699.6758273s req_ids:[8] +DEBUG 06-24 20:21:39 [manager.py:391] +ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:206.52461051940918ms total_cost_time:206.56657218933105ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10118 prompt_cache_len:5151 prompt_cache_ratio:0.509092706068393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 +DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10737013816833496 s +INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.1092071533203125 s +DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=43226466367538415541417858036634786327, time:1750767699.8911033s req_ids:[8] +DEBUG 06-24 20:21:39 [manager.py:391] +ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:362.335205078125ms total_cost_time:362.35928535461426ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:10119 prompt_cache_len:5151 prompt_cache_ratio:0.5090423954936258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 +DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10912823677062988 s +INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.11126351356506348 s +DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=166336810497066430244437046254189755467, time:1750767700.2569265s req_ids:[8] +DEBUG 06-24 20:21:40 [manager.py:391] +ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:209.51032638549805ms total_cost_time:209.55610275268555ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10120 prompt_cache_len:5151 prompt_cache_ratio:0.5089920948616601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 +DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10831880569458008 s +INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.11021924018859863 s +DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=106403930248651607666069980985364745510, time:1750767700.4718983s req_ids:[8] +DEBUG 06-24 20:21:40 [manager.py:391] +ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:205.85393905639648ms total_cost_time:205.89685440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10121 prompt_cache_len:5151 prompt_cache_ratio:0.5089418041695485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 +DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10861325263977051 s +INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.11052274703979492 s +DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=8974130209623849790482187180423655616, time:1750767700.6862683s req_ids:[8] +DEBUG 06-24 20:21:40 [manager.py:391] +ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:205.63793182373047ms total_cost_time:205.68132400512695ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10122 prompt_cache_len:5151 prompt_cache_ratio:0.508891523414345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 +DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10776281356811523 s +INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.10978221893310547 s +DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=148455892889619662892698805540155674336, time:1750767700.8962057s req_ids:[8] +DEBUG 06-24 20:21:40 [manager.py:391] +ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:207.58819580078125ms total_cost_time:207.63158798217773ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10123 prompt_cache_len:5151 prompt_cache_ratio:0.5088412525931049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 +DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.10893988609313965 s +INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.11081552505493164 s +DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=151584382138132060594754411575380425124, time:1750767701.1122565s req_ids:[8] +DEBUG 06-24 20:21:41 [manager.py:391] +ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:208.77742767333984ms total_cost_time:208.82272720336914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10124 prompt_cache_len:5151 prompt_cache_ratio:0.5087909917028842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 +DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.1078805923461914 s +INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.10974955558776855 s +DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=72702485005993852021724459423577230947, time:1750767701.3263645s req_ids:[8] +DEBUG 06-24 20:21:41 [manager.py:391] +ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 first_token_cost:208.78863334655762ms total_cost_time:208.8322639465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10125 prompt_cache_len:5151 prompt_cache_ratio:0.5087407407407407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 +DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.10910773277282715 s +INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.11106157302856445 s +DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=105555294745046929330579415626330631802, time:1750767701.5405877s req_ids:[8] +DEBUG 06-24 20:21:41 [manager.py:391] +DEBUG 06-24 20:21:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 44154.760 tokens/s +DEBUG 06-24 20:21:41 [stats.py:37] Avg prompt tokens throughput: 44146.121 tokens/s +DEBUG 06-24 20:21:41 [stats.py:37] Avg generate tokens throughput: 8.639 tokens/s +ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 first_token_cost:202.1503448486328ms total_cost_time:202.1937370300293ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10126 prompt_cache_len:5151 prompt_cache_ratio:0.5086904997037329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 +DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.10892009735107422 s +INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.11082744598388672 s +DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=293805536961258826844948065298290622296, time:1750767701.7486289s req_ids:[8] +DEBUG 06-24 20:21:41 [manager.py:391] +ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 first_token_cost:368.31116676330566ms total_cost_time:368.35622787475586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10127 prompt_cache_len:5151 prompt_cache_ratio:0.5086402685889208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 +DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.1084134578704834 s +INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.11037063598632812 s +DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=54144249806072607201261505484498455589, time:1750767702.1220245s req_ids:[8] +DEBUG 06-24 20:21:42 [manager.py:391] +ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:204.67066764831543ms total_cost_time:204.71549034118652ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10128 prompt_cache_len:5151 prompt_cache_ratio:0.5085900473933649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 +DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10775375366210938 s +INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.10958075523376465 s +DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=324461330008477282178186543620538000204, time:1750767702.3337145s req_ids:[8] +DEBUG 06-24 20:21:42 [manager.py:391] +ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:164.83092308044434ms total_cost_time:164.8707389831543ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10129 prompt_cache_len:5151 prompt_cache_ratio:0.5085398361141278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 +DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10750579833984375 s +INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.10938525199890137 s +DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=120654422921445400278997315909668341801, time:1750767702.5026917s req_ids:[8] +DEBUG 06-24 20:21:42 [manager.py:391] +ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:196.3038444519043ms total_cost_time:196.37203216552734ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:10130 prompt_cache_len:5151 prompt_cache_ratio:0.5084896347482725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 +DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10912394523620605 s +INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.11092329025268555 s +DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=81235468923126583492175219618464557839, time:1750767702.7066839s req_ids:[8] +DEBUG 06-24 20:21:42 [manager.py:391] +ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:201.83563232421875ms total_cost_time:201.88331604003906ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:10131 prompt_cache_len:5151 prompt_cache_ratio:0.5084394432928635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 +DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10843157768249512 s +INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s +DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=240529802433699469972229452236451614887, time:1750767702.9152942s req_ids:[8] +DEBUG 06-24 20:21:42 [manager.py:391] +DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:206.8033218383789ms total_cost_time:206.8467140197754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10132 prompt_cache_len:5151 prompt_cache_ratio:0.5083892617449665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 +DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.10851168632507324 s +INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.1104578971862793 s +DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=195755025892400101355005074492519400520, time:1750767703.1259995s req_ids:[8] +DEBUG 06-24 20:21:43 [manager.py:391] +ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:205.20639419555664ms total_cost_time:205.2445411682129ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:10133 prompt_cache_len:5151 prompt_cache_ratio:0.5083390901016481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 +DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.10856986045837402 s +INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.11049890518188477 s +DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=320273511243223078608185442378006472281, time:1750767703.3398168s req_ids:[8] +DEBUG 06-24 20:21:43 [manager.py:391] +ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:206.93373680114746ms total_cost_time:206.97617530822754ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10134 prompt_cache_len:5151 prompt_cache_ratio:0.5082889283599763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 +DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.20868420600891113 s +INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.21079039573669434 s +DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=169960743377358416228472878042144367918, time:1750767703.682854s req_ids:[8] +DEBUG 06-24 20:21:43 [manager.py:391] +ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:327.3181915283203ms total_cost_time:327.3634910583496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10135 prompt_cache_len:5151 prompt_cache_ratio:0.5082387765170202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 +DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.10889816284179688 s +INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.11055850982666016 s +DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=262609319509659942003145030118556452624, time:1750767703.8846385s req_ids:[8] +DEBUG 06-24 20:21:43 [manager.py:391] +ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:207.2441577911377ms total_cost_time:207.28683471679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10136 prompt_cache_len:5151 prompt_cache_ratio:0.5081886345698501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 +DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10828232765197754 s +INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.10981082916259766 s +DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=123312667081070237568459580423686935075, time:1750767704.0987656s req_ids:[8] +DEBUG 06-24 20:21:44 [manager.py:391] +ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:207.00335502624512ms total_cost_time:207.0634365081787ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:10137 prompt_cache_len:5151 prompt_cache_ratio:0.5081385025155372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 +DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10768485069274902 s +INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s +DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=40495947664485255597714034583587539764, time:1750767704.317576s req_ids:[8] +DEBUG 06-24 20:21:44 [manager.py:391] +ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:216.63331985473633ms total_cost_time:216.6762351989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10138 prompt_cache_len:5151 prompt_cache_ratio:0.5080883803511541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 +DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10746121406555176 s +INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.10952425003051758 s +DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=172981181791695416483957084520320403600, time:1750767704.5340722s req_ids:[8] +DEBUG 06-24 20:21:44 [manager.py:391] +ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:210.00313758850098ms total_cost_time:210.04867553710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10139 prompt_cache_len:5151 prompt_cache_ratio:0.5080382680737745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 +DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.1082763671875 s +INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.11029863357543945 s +DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=131169783785352733046111397100057899373, time:1750767704.7483761s req_ids:[8] +DEBUG 06-24 20:21:44 [manager.py:391] +ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:207.55743980407715ms total_cost_time:207.60226249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10140 prompt_cache_len:5151 prompt_cache_ratio:0.5079881656804733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 +DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10889291763305664 s +INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.11079955101013184 s +DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=115811425222702606777810986042052353041, time:1750767704.9646556s req_ids:[8] +DEBUG 06-24 20:21:44 [manager.py:391] +ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:209.2571258544922ms total_cost_time:209.30099487304688ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10141 prompt_cache_len:5151 prompt_cache_ratio:0.5079380731683266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 +DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10887289047241211 s +INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.11075782775878906 s +DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=132508977081290864847569260779378056284, time:1750767705.179054s req_ids:[8] +DEBUG 06-24 20:21:45 [manager.py:391] +ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:375.38719177246094ms total_cost_time:375.43177604675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10142 prompt_cache_len:5151 prompt_cache_ratio:0.5078879905344114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 +DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:45 [batch.py:51] router release req id 8 +INFO 06-24 20:21:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10858511924743652 s +INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.11038851737976074 s +DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=22145420406558370586398438984783546823, time:1750767705.5566025s req_ids:[8] +DEBUG 06-24 20:21:45 [manager.py:391] +ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:161.56554222106934ms total_cost_time:161.6058349609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10143 prompt_cache_len:5151 prompt_cache_ratio:0.507837917775806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 +DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10735464096069336 s +INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.1092376708984375 s +DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=313712040281680162188102139771420179062, time:1750767705.7251568s req_ids:[8] +DEBUG 06-24 20:21:45 [manager.py:391] +ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:196.5653896331787ms total_cost_time:196.6094970703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10144 prompt_cache_len:5151 prompt_cache_ratio:0.5077878548895899 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 +DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10772466659545898 s +INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.10976719856262207 s +DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=330362428935782045468292297518601909011, time:1750767705.928117s req_ids:[8] +DEBUG 06-24 20:21:45 [manager.py:391] +ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:206.37917518615723ms total_cost_time:206.42423629760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10145 prompt_cache_len:5151 prompt_cache_ratio:0.5077378018728438 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 +DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10794758796691895 s +INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.10994434356689453 s +DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=29180433877557666447985475617404811289, time:1750767706.143307s req_ids:[8] +DEBUG 06-24 20:21:46 [manager.py:391] +ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:210.16263961791992ms total_cost_time:210.2072238922119ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10146 prompt_cache_len:5151 prompt_cache_ratio:0.5076877587226494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 +DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10790538787841797 s +INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.10986089706420898 s +DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=274659681424589417645776321804664797426, time:1750767706.3585832s req_ids:[8] +DEBUG 06-24 20:21:46 [manager.py:391] +ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:208.37163925170898ms total_cost_time:208.41574668884277ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10147 prompt_cache_len:5151 prompt_cache_ratio:0.5076377254360895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 +DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10791230201721191 s +INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.10975241661071777 s +DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=329676922568742266876793498249649662445, time:1750767706.570521s req_ids:[8] +DEBUG 06-24 20:21:46 [manager.py:391] +ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:202.3310661315918ms total_cost_time:202.37445831298828ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10148 prompt_cache_len:5151 prompt_cache_ratio:0.5075877020102483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 +DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.11007261276245117 s +INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.11208224296569824 s +DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=108188104191186310020818999519498292593, time:1750767706.778726s req_ids:[8] +DEBUG 06-24 20:21:46 [manager.py:391] +ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:202.4984359741211ms total_cost_time:202.54039764404297ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10149 prompt_cache_len:5151 prompt_cache_ratio:0.507537688442211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 +DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:46 [batch.py:51] router release req id 8 +INFO 06-24 20:21:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10917258262634277 s +INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.11107277870178223 s +DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=192491815574463681595600070727692657857, time:1750767706.9876206s req_ids:[8] +DEBUG 06-24 20:21:46 [manager.py:391] +ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:371.40846252441406ms total_cost_time:371.45447731018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10150 prompt_cache_len:5151 prompt_cache_ratio:0.507487684729064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 +DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s +INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.1108701229095459 s +DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=221824144923647474597465254393548597416, time:1750767707.3654897s req_ids:[8] +DEBUG 06-24 20:21:47 [manager.py:391] +DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:203.88174057006836ms total_cost_time:203.92584800720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10151 prompt_cache_len:5151 prompt_cache_ratio:0.5074376908678948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 +DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10907578468322754 s +INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.11089920997619629 s +DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=55690150147460318077901751025777496396, time:1750767707.5768464s req_ids:[8] +DEBUG 06-24 20:21:47 [manager.py:391] +ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:204.41770553588867ms total_cost_time:204.45847511291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10152 prompt_cache_len:5151 prompt_cache_ratio:0.5073877068557919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 +DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10849905014038086 s +INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.11039566993713379 s +DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=114600798920360626666962762159538598071, time:1750767707.7860758s req_ids:[8] +DEBUG 06-24 20:21:47 [manager.py:391] +ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:203.47833633422852ms total_cost_time:203.5236358642578ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10153 prompt_cache_len:5151 prompt_cache_ratio:0.5073377326898454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 +DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s +INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.11026263236999512 s +DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=264635228822724950254656684438632077668, time:1750767707.9974406s req_ids:[8] +DEBUG 06-24 20:21:47 [manager.py:391] +ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:211.17758750915527ms total_cost_time:211.22312545776367ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10154 prompt_cache_len:5151 prompt_cache_ratio:0.507287768367146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 +DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10880494117736816 s +INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.1108999252319336 s +DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=236765120341545939154367353873775367629, time:1750767708.2189193s req_ids:[8] +DEBUG 06-24 20:21:48 [manager.py:391] +ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:213.22917938232422ms total_cost_time:213.2742404937744ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10155 prompt_cache_len:5151 prompt_cache_ratio:0.5072378138847858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 +DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10959672927856445 s +INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.11145448684692383 s +DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=91883959434081743059970374307395749092, time:1750767708.4319596s req_ids:[8] +DEBUG 06-24 20:21:48 [manager.py:391] +ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:204.99944686889648ms total_cost_time:205.04474639892578ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10156 prompt_cache_len:5151 prompt_cache_ratio:0.5071878692398583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 +DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10863447189331055 s +INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.11058473587036133 s +DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=326062611992093738680094295157482954815, time:1750767708.6449058s req_ids:[8] +DEBUG 06-24 20:21:48 [manager.py:391] +ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:206.8791389465332ms total_cost_time:206.9227695465088ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10157 prompt_cache_len:5151 prompt_cache_ratio:0.5071379344294575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 +DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10817289352416992 s +INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.11031389236450195 s +DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=121666342581204545602804745496802067528, time:1750767708.8547683s req_ids:[8] +DEBUG 06-24 20:21:48 [manager.py:391] +ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:362.31517791748047ms total_cost_time:362.35904693603516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10158 prompt_cache_len:5151 prompt_cache_ratio:0.5070880094506792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 +DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10782313346862793 s +INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.10983729362487793 s +DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=200214342817685868408717861021666873381, time:1750767709.2235832s req_ids:[8] +DEBUG 06-24 20:21:49 [manager.py:391] +ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:197.64089584350586ms total_cost_time:197.68595695495605ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10159 prompt_cache_len:5151 prompt_cache_ratio:0.5070380943006202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 +DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10890889167785645 s +INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.11096453666687012 s +DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=107399705044772354379231881816944385808, time:1750767709.425727s req_ids:[8] +DEBUG 06-24 20:21:49 [manager.py:391] +ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:204.38909530639648ms total_cost_time:204.43367958068848ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10160 prompt_cache_len:5151 prompt_cache_ratio:0.506988188976378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 +DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s +INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.10934281349182129 s +DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=39171556448706185149996231046413106475, time:1750767709.6371737s req_ids:[8] +DEBUG 06-24 20:21:49 [manager.py:391] +ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:206.4363956451416ms total_cost_time:206.47954940795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10161 prompt_cache_len:5151 prompt_cache_ratio:0.5069382934750517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 +DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10760021209716797 s +INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.10950660705566406 s +DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=246151613058528285861338106235448375026, time:1750767709.8503969s req_ids:[8] +DEBUG 06-24 20:21:49 [manager.py:391] +ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:203.50265502929688ms total_cost_time:203.54723930358887ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10162 prompt_cache_len:5151 prompt_cache_ratio:0.5068884077937413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 +DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.10870170593261719 s +INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.11012125015258789 s +DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=62611630133935355224573563501472198736, time:1750767710.0587509s req_ids:[8] +DEBUG 06-24 20:21:50 [manager.py:391] +ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:204.8792839050293ms total_cost_time:204.9243450164795ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10163 prompt_cache_len:5151 prompt_cache_ratio:0.5068385319295483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 +DEBUG 06-24 20:21:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.10873198509216309 s +INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.11067581176757812 s +DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=95319955804261135385267686831907450335, time:1750767710.2726073s req_ids:[8] +DEBUG 06-24 20:21:50 [manager.py:391] +ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 first_token_cost:209.1991901397705ms total_cost_time:209.244966506958ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10164 prompt_cache_len:5151 prompt_cache_ratio:0.5067886658795749 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 +DEBUG 06-24 20:21:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.10759711265563965 s +INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.10939288139343262 s +DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=143161653485812012324160082007040947348, time:1750767710.4848094s req_ids:[8] +DEBUG 06-24 20:21:50 [manager.py:391] +ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 first_token_cost:205.43885231018066ms total_cost_time:205.48248291015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10165 prompt_cache_len:5151 prompt_cache_ratio:0.5067388096409248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 +DEBUG 06-24 20:21:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.31098198890686035 s +INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.312960147857666 s +DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=59417629579731564947259900426237265669, time:1750767710.90077s req_ids:[8] +DEBUG 06-24 20:21:50 [manager.py:391] +ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 first_token_cost:418.19214820861816ms total_cost_time:418.23816299438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10166 prompt_cache_len:5151 prompt_cache_ratio:0.5066889632107023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 +DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10958170890808105 s +INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11151480674743652 s +DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=181690066750847179644252591866895893861, time:1750767711.123757s req_ids:[8] +DEBUG 06-24 20:21:51 [manager.py:391] +ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:210.12187004089355ms total_cost_time:210.16716957092285ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10167 prompt_cache_len:5151 prompt_cache_ratio:0.5066391265860136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 +DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10908341407775879 s +INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11112666130065918 s +DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=141108360591117724241443149582779438395, time:1750767711.3396575s req_ids:[8] +DEBUG 06-24 20:21:51 [manager.py:391] +ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:209.06782150268555ms total_cost_time:209.11216735839844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10168 prompt_cache_len:5151 prompt_cache_ratio:0.5065892997639654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 +DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10872173309326172 s +INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11089777946472168 s +DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=306874814561215855798336839244601092985, time:1750767711.5562913s req_ids:[8] +DEBUG 06-24 20:21:51 [manager.py:391] +DEBUG 06-24 20:21:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 43575.933 tokens/s +DEBUG 06-24 20:21:51 [stats.py:37] Avg prompt tokens throughput: 43567.347 tokens/s +DEBUG 06-24 20:21:51 [stats.py:37] Avg generate tokens throughput: 8.586 tokens/s +ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:210.4470729827881ms total_cost_time:210.48951148986816ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10169 prompt_cache_len:5151 prompt_cache_ratio:0.5065394827416658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 +DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10847735404968262 s +INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11040425300598145 s +DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=110390041671043797051975586016149013721, time:1750767711.7702425s req_ids:[8] +DEBUG 06-24 20:21:51 [manager.py:391] +ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:206.07233047485352ms total_cost_time:206.1169147491455ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10170 prompt_cache_len:5151 prompt_cache_ratio:0.5064896755162241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 +DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.1081244945526123 s +INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000418663024902 s +DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=207589740932310923478679326210970218798, time:1750767711.9834073s req_ids:[8] +DEBUG 06-24 20:21:51 [manager.py:391] +ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:209.122896194458ms total_cost_time:209.16748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10171 prompt_cache_len:5151 prompt_cache_ratio:0.5064398780847508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 +DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:52 [manager.py:224] router recive req id 8 cost time 0.10817599296569824 s +INFO 06-24 20:21:52 [manager.py:68] detokenization recv req id 8 cost time 0.11030149459838867 s +DEBUG 06-24 20:21:52 [manager.py:391] Prefill Batch: batch_id=250082861282191818878850867351228700242, time:1750767712.1983495s req_ids:[8] +DEBUG 06-24 20:21:52 [manager.py:391] +ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:206.9263458251953ms total_cost_time:206.9690227508545ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10172 prompt_cache_len:5151 prompt_cache_ratio:0.506390090444357 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 +DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:52 [manager.py:224] router recive req id 8 cost time 0.10918068885803223 s +INFO 06-24 20:21:52 [manager.py:68] detokenization recv req id 8 cost time 0.11125326156616211 s +DEBUG 06-24 20:21:52 [manager.py:391] Prefill Batch: batch_id=281921791089613627703830368274671382019, time:1750767712.413499s req_ids:[8] +DEBUG 06-24 20:21:52 [manager.py:391] +ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:210.07466316223145ms total_cost_time:210.11948585510254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10173 prompt_cache_len:5151 prompt_cache_ratio:0.5063403125921557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 +DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:52 [manager.py:224] router recive req id 8 cost time 0.3103041648864746 s +INFO 06-24 20:21:52 [manager.py:68] detokenization recv req id 8 cost time 0.3122215270996094 s +DEBUG 06-24 20:21:52 [manager.py:391] Prefill Batch: batch_id=132038998096165543760404663160512558533, time:1750767712.8346462s req_ids:[8] +DEBUG 06-24 20:21:52 [manager.py:391] +ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:408.4193706512451ms total_cost_time:408.4639549255371ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10174 prompt_cache_len:5151 prompt_cache_ratio:0.5062905445252605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 +DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10894083976745605 s +INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11083841323852539 s +DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=217183463166192805370728066747468332152, time:1750767713.0422575s req_ids:[8] +DEBUG 06-24 20:21:53 [manager.py:391] +ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:209.35463905334473ms total_cost_time:209.41567420959473ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10175 prompt_cache_len:5151 prompt_cache_ratio:0.5062407862407863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 +DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:53 [batch.py:51] router release req id 8 +INFO 06-24 20:21:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10963582992553711 s +INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11186671257019043 s +DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=239194813887653020895748570470620342739, time:1750767713.2581968s req_ids:[8] +DEBUG 06-24 20:21:53 [manager.py:391] +ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:206.18891716003418ms total_cost_time:206.23445510864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10176 prompt_cache_len:5151 prompt_cache_ratio:0.5061910377358491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 +DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10833549499511719 s +INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s +DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=5937566445910207894640637634133177337, time:1750767713.4691164s req_ids:[8] +DEBUG 06-24 20:21:53 [manager.py:391] +ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:206.5727710723877ms total_cost_time:206.618070602417ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10177 prompt_cache_len:5151 prompt_cache_ratio:0.5061412990075661 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 +DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.1075894832611084 s +INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.10971879959106445 s +DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=54947203383929137795381133179969680500, time:1750767713.6841764s req_ids:[8] +DEBUG 06-24 20:21:53 [manager.py:391] +ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:208.77861976623535ms total_cost_time:208.80675315856934ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:10178 prompt_cache_len:5151 prompt_cache_ratio:0.5060915700530556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 +DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10849404335021973 s +INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11066031455993652 s +DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=327989025836206708986247450474680594918, time:1750767713.899737s req_ids:[8] +DEBUG 06-24 20:21:53 [manager.py:391] +ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:223.9692211151123ms total_cost_time:224.0147590637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10179 prompt_cache_len:5151 prompt_cache_ratio:0.5060418508694371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 +DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10809850692749023 s +INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.11019420623779297 s +DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=154274389491382623239164633297486486447, time:1750767714.148191s req_ids:[8] +DEBUG 06-24 20:21:54 [manager.py:391] +ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:219.62237358093262ms total_cost_time:219.6669578552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10180 prompt_cache_len:5151 prompt_cache_ratio:0.5059921414538311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 +DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10608458518981934 s +INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.1078493595123291 s +DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=128063657254144586533000277854839495315, time:1750767714.351337s req_ids:[8] +DEBUG 06-24 20:21:54 [manager.py:391] +ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:335.7722759246826ms total_cost_time:335.8170986175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10181 prompt_cache_len:5151 prompt_cache_ratio:0.5059424418033592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 +DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10908198356628418 s +INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.1113426685333252 s +DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=279689998078639681657998157505518402627, time:1750767714.6907172s req_ids:[8] +DEBUG 06-24 20:21:54 [manager.py:391] +DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:195.67298889160156ms total_cost_time:195.72043418884277ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10182 prompt_cache_len:5151 prompt_cache_ratio:0.5058927519151444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 +DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10828709602355957 s +INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.11037468910217285 s +DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=189583021769601880643778622638744457130, time:1750767714.8958888s req_ids:[8] +DEBUG 06-24 20:21:54 [manager.py:391] +ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:207.6132297515869ms total_cost_time:207.65948295593262ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10183 prompt_cache_len:5151 prompt_cache_ratio:0.5058430717863105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 +DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.1076357364654541 s +INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.10966849327087402 s +DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=149850188785011090092291004190266491427, time:1750767715.1082509s req_ids:[8] +DEBUG 06-24 20:21:55 [manager.py:391] +ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:209.34343338012695ms total_cost_time:209.38801765441895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10184 prompt_cache_len:5151 prompt_cache_ratio:0.5057934014139828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 +DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s +INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.10986065864562988 s +DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=120125875770447005251387818846730992887, time:1750767715.3284495s req_ids:[8] +DEBUG 06-24 20:21:55 [manager.py:391] +ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:209.94257926940918ms total_cost_time:209.98764038085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10185 prompt_cache_len:5151 prompt_cache_ratio:0.5057437407952872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 +DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s +INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.11026573181152344 s +DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=38533931238485029816214169885775389096, time:1750767715.5410726s req_ids:[8] +DEBUG 06-24 20:21:55 [manager.py:391] +ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:210.4494571685791ms total_cost_time:210.5097770690918ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:10186 prompt_cache_len:5151 prompt_cache_ratio:0.5056940899273513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 +DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.1103827953338623 s +INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.11236381530761719 s +DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=47294859779598998354675238836512849143, time:1750767715.7572296s req_ids:[8] +DEBUG 06-24 20:21:55 [manager.py:391] +ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:167.8328514099121ms total_cost_time:167.87481307983398ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10187 prompt_cache_len:5151 prompt_cache_ratio:0.5056444488073034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 +DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.10814785957336426 s +INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.11013913154602051 s +DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=183486134893157251971364779623943165028, time:1750767715.9320982s req_ids:[8] +DEBUG 06-24 20:21:55 [manager.py:391] +ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:198.73499870300293ms total_cost_time:198.78149032592773ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10188 prompt_cache_len:5151 prompt_cache_ratio:0.5055948174322733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 +DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.10853099822998047 s +INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.11054348945617676 s +DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=184456700281138368338089968995373025651, time:1750767716.1349926s req_ids:[8] +DEBUG 06-24 20:21:56 [manager.py:391] +ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:204.64229583740234ms total_cost_time:204.68640327453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10189 prompt_cache_len:5151 prompt_cache_ratio:0.5055451957993915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 +DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:21:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.3071279525756836 s +INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.3089561462402344 s +DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=119639971667094751148004675908539669559, time:1750767716.5551603s req_ids:[8] +DEBUG 06-24 20:21:56 [manager.py:391] +ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:373.9924430847168ms total_cost_time:374.0499019622803ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:10190 prompt_cache_len:5151 prompt_cache_ratio:0.50549558390579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 +DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.10834789276123047 s +INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.11015009880065918 s +DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=228065973816863935871663455940005107890, time:1750767716.724513s req_ids:[8] +DEBUG 06-24 20:21:56 [manager.py:391] +ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:197.0040798187256ms total_cost_time:197.0505714416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10191 prompt_cache_len:5151 prompt_cache_ratio:0.5054459817486017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 +DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.10836172103881836 s +INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.10995745658874512 s +DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=127021827906575341326721937884342472059, time:1750767716.927526s req_ids:[8] +DEBUG 06-24 20:21:56 [manager.py:391] +ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:204.01668548583984ms total_cost_time:204.06031608581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10192 prompt_cache_len:5151 prompt_cache_ratio:0.5053963893249608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 +DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.10954976081848145 s +INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.11158561706542969 s +DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=191153412754150930387837211417413830895, time:1750767717.1404629s req_ids:[8] +DEBUG 06-24 20:21:57 [manager.py:391] +ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:207.9176902770996ms total_cost_time:208.0233097076416ms,out_token_counter:1 mean_per_token_cost_time: 0.10561943054199219ms prompt_token_num:10193 prompt_cache_len:5151 prompt_cache_ratio:0.5053468066320024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 +DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.1095125675201416 s +INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.1115577220916748 s +DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=151890497980791179063594521867435290218, time:1750767717.3539455s req_ids:[8] +DEBUG 06-24 20:21:57 [manager.py:391] +ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:205.93667030334473ms total_cost_time:205.98101615905762ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10194 prompt_cache_len:5151 prompt_cache_ratio:0.5052972336668629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 +DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.1084756851196289 s +INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.11057162284851074 s +DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=324985861580833485035245486627321442665, time:1750767717.5661154s req_ids:[8] +DEBUG 06-24 20:21:57 [manager.py:391] +ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:204.72359657287598ms total_cost_time:204.78200912475586ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:10195 prompt_cache_len:5151 prompt_cache_ratio:0.5052476704266797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 +DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s +INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098184585571289 s +DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=334435263317403939663933301836415221103, time:1750767717.7864873s req_ids:[8] +DEBUG 06-24 20:21:57 [manager.py:391] +ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:218.4736728668213ms total_cost_time:218.5196876525879ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10196 prompt_cache_len:5151 prompt_cache_ratio:0.5051981169085916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 +DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.10912394523620605 s +INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.11124157905578613 s +DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=130574240565199710585131735138744884282, time:1750767718.0032194s req_ids:[8] +DEBUG 06-24 20:21:58 [manager.py:391] +ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:377.35724449157715ms total_cost_time:377.40278244018555ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10197 prompt_cache_len:5151 prompt_cache_ratio:0.5051485731097382 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 +DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:58 [manager.py:224] router recive req id 8 cost time 0.10932350158691406 s +INFO 06-24 20:21:58 [manager.py:68] detokenization recv req id 8 cost time 0.11142802238464355 s +DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=157090004188417983524334565221009191323, time:1750767718.3862798s req_ids:[8] +DEBUG 06-24 20:21:58 [manager.py:391] +ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:207.35406875610352ms total_cost_time:207.3974609375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10198 prompt_cache_len:5151 prompt_cache_ratio:0.5050990390272603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 +DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:58 [manager.py:224] router recive req id 8 cost time 0.10788917541503906 s +INFO 06-24 20:21:58 [manager.py:68] detokenization recv req id 8 cost time 0.10983967781066895 s +DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=316010042423684139405218087127307100707, time:1750767718.6064842s req_ids:[8] +DEBUG 06-24 20:21:58 [manager.py:391] +ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:215.00778198242188ms total_cost_time:215.05260467529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10199 prompt_cache_len:5151 prompt_cache_ratio:0.5050495146582998 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 +DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:58 [manager.py:224] router recive req id 8 cost time 0.10806822776794434 s +INFO 06-24 20:21:58 [manager.py:68] detokenization recv req id 8 cost time 0.11021280288696289 s +DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=224647700800854691780972548085507260734, time:1750767718.822453s req_ids:[8] +DEBUG 06-24 20:21:58 [manager.py:391] +ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:206.41469955444336ms total_cost_time:206.45976066589355ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10200 prompt_cache_len:5151 prompt_cache_ratio:0.505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 +DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10800766944885254 s +INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.11006498336791992 s +DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=147427983136895024557591037632266843627, time:1750767719.0338118s req_ids:[8] +DEBUG 06-24 20:21:59 [manager.py:391] +ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:207.31115341186523ms total_cost_time:207.35645294189453ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10201 prompt_cache_len:5151 prompt_cache_ratio:0.504950495049505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 +DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10795950889587402 s +INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.11064958572387695 s +DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=260332699668594343608360380236410457888, time:1750767719.2463562s req_ids:[8] +DEBUG 06-24 20:21:59 [manager.py:391] +ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:204.1494846343994ms total_cost_time:204.1945457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10202 prompt_cache_len:5151 prompt_cache_ratio:0.50490099980396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 +DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10875129699707031 s +INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.11070466041564941 s +DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=260230835040301276240579915124230505773, time:1750767719.4573379s req_ids:[8] +DEBUG 06-24 20:21:59 [manager.py:391] +ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:208.8601589202881ms total_cost_time:208.90402793884277ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10203 prompt_cache_len:5151 prompt_cache_ratio:0.5048515142605117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 +DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10782957077026367 s +INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.10998201370239258 s +DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=153313817155264135467096212655365307807, time:1750767719.6713607s req_ids:[8] +DEBUG 06-24 20:21:59 [manager.py:391] +ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:204.4847011566162ms total_cost_time:204.5295238494873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10204 prompt_cache_len:5151 prompt_cache_ratio:0.5048020384163073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 +DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:21:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10742378234863281 s +INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.10963749885559082 s +DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=72309810925689308879123621374612041555, time:1750767719.8825214s req_ids:[8] +DEBUG 06-24 20:21:59 [manager.py:391] +ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:364.6209239959717ms total_cost_time:364.68052864074707ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:10205 prompt_cache_len:5151 prompt_cache_ratio:0.5047525722684958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 +DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10870194435119629 s +INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.11061263084411621 s +DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=192109418991214771039446883347904298435, time:1750767720.2531226s req_ids:[8] +DEBUG 06-24 20:22:00 [manager.py:391] +ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:198.2593536376953ms total_cost_time:198.3034610748291ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10206 prompt_cache_len:5151 prompt_cache_ratio:0.5047031158142269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 +DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10747885704040527 s +INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.10944294929504395 s +DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=129729238269397932006761705385547293121, time:1750767720.4562047s req_ids:[8] +DEBUG 06-24 20:22:00 [manager.py:391] +ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:206.5284252166748ms total_cost_time:206.5892219543457ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:10207 prompt_cache_len:5151 prompt_cache_ratio:0.5046536690506516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 +DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10808277130126953 s +INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100454330444336 s +DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=254475762851233455950344337717014695834, time:1750767720.6699603s req_ids:[8] +DEBUG 06-24 20:22:00 [manager.py:391] +ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:203.61971855163574ms total_cost_time:203.66477966308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10208 prompt_cache_len:5151 prompt_cache_ratio:0.5046042319749217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 +DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10911417007446289 s +INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.1110391616821289 s +DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=10258791185918051292096629133600954734, time:1750767720.8882546s req_ids:[8] +DEBUG 06-24 20:22:00 [manager.py:391] +ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:217.56887435913086ms total_cost_time:217.61178970336914ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10209 prompt_cache_len:5151 prompt_cache_ratio:0.5045548045841904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 +DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s +INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s +DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=318605398706177215223499471910484711821, time:1750767721.104027s req_ids:[8] +DEBUG 06-24 20:22:01 [manager.py:391] +ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:203.57012748718262ms total_cost_time:203.6125659942627ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10210 prompt_cache_len:5151 prompt_cache_ratio:0.5045053868756122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 +DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.10910177230834961 s +INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.11108994483947754 s +DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=314011069988068644181715927510292270202, time:1750767721.3135314s req_ids:[8] +DEBUG 06-24 20:22:01 [manager.py:391] +ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:206.9103717803955ms total_cost_time:206.9535255432129ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10211 prompt_cache_len:5151 prompt_cache_ratio:0.5044559788463422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 +DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.10814881324768066 s +INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.11008620262145996 s +DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=303818005635972922274620839295545195667, time:1750767721.5283315s req_ids:[8] +DEBUG 06-24 20:22:01 [manager.py:391] +ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:22:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 43561.657 tokens/s +DEBUG 06-24 20:22:01 [stats.py:37] Avg prompt tokens throughput: 43553.010 tokens/s +DEBUG 06-24 20:22:01 [stats.py:37] Avg generate tokens throughput: 8.647 tokens/s +INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:211.36140823364258ms total_cost_time:211.40527725219727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10212 prompt_cache_len:5151 prompt_cache_ratio:0.504406580493537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 +DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.1076047420501709 s +INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.10971379280090332 s +DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=75970751537961997592694919467677988980, time:1750767721.744334s req_ids:[8] +DEBUG 06-24 20:22:01 [manager.py:391] +DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:365.8406734466553ms total_cost_time:365.88597297668457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10213 prompt_cache_len:5151 prompt_cache_ratio:0.5043571918143542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 +DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10786557197570801 s +INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.10976457595825195 s +DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=107989291788391464910735229822148876319, time:1750767722.1124654s req_ids:[8] +DEBUG 06-24 20:22:02 [manager.py:391] +ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:203.216552734375ms total_cost_time:203.25994491577148ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10214 prompt_cache_len:5151 prompt_cache_ratio:0.5043078128059526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 +DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10857725143432617 s +INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.11061835289001465 s +DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=107974031084677637687774769860780245952, time:1750767722.3252752s req_ids:[8] +DEBUG 06-24 20:22:02 [manager.py:391] +ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:203.95374298095703ms total_cost_time:203.99951934814453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10215 prompt_cache_len:5151 prompt_cache_ratio:0.504258443465492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 +DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.1074674129486084 s +INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.10944056510925293 s +DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=287625476360597362245278457823962472121, time:1750767722.5337555s req_ids:[8] +DEBUG 06-24 20:22:02 [manager.py:391] +ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:208.1589698791504ms total_cost_time:208.20307731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10216 prompt_cache_len:5151 prompt_cache_ratio:0.5042090837901331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 +DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10928845405578613 s +INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.11115384101867676 s +DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=239985181381730499148495444469618714068, time:1750767722.747441s req_ids:[8] +DEBUG 06-24 20:22:02 [manager.py:391] +ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:206.77804946899414ms total_cost_time:206.82191848754883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10217 prompt_cache_len:5151 prompt_cache_ratio:0.5041597337770383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 +DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s +INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.1105036735534668 s +DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=104676612471079284225607484851989523785, time:1750767722.960841s req_ids:[8] +DEBUG 06-24 20:22:02 [manager.py:391] +ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:204.23531532287598ms total_cost_time:204.27918434143066ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10218 prompt_cache_len:5151 prompt_cache_ratio:0.5041103934233705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 +DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.10896015167236328 s +INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.11092162132263184 s +DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=316419618625399408460749509896687761630, time:1750767723.1691246s req_ids:[8] +DEBUG 06-24 20:22:03 [manager.py:391] +ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:204.3285369873047ms total_cost_time:204.37169075012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10219 prompt_cache_len:5151 prompt_cache_ratio:0.5040610627262941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 +DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.10750508308410645 s +INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.10929751396179199 s +DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=168651627738625351384139706124913322363, time:1750767723.3800666s req_ids:[8] +DEBUG 06-24 20:22:03 [manager.py:391] +ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:165.62962532043457ms total_cost_time:165.67230224609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10220 prompt_cache_len:5151 prompt_cache_ratio:0.5040117416829746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 +DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.10802149772644043 s +INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.11001062393188477 s +DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=257351464449025043258036408178703522858, time:1750767723.5496976s req_ids:[8] +DEBUG 06-24 20:22:03 [manager.py:391] +ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:370.3761100769043ms total_cost_time:370.4209327697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10221 prompt_cache_len:5151 prompt_cache_ratio:0.5039624302905782 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 +DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.11108255386352539 s +INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.11302518844604492 s +DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=174259209055580641974967543231458468488, time:1750767723.9259326s req_ids:[8] +DEBUG 06-24 20:22:03 [manager.py:391] +ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:201.6751766204834ms total_cost_time:201.7202377319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10222 prompt_cache_len:5151 prompt_cache_ratio:0.5039131285462728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 +DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.1079566478729248 s +INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.10990190505981445 s +DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=235643982268707508209970922522028557091, time:1750767724.135265s req_ids:[8] +DEBUG 06-24 20:22:04 [manager.py:391] +ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:204.97608184814453ms total_cost_time:205.00850677490234ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:10223 prompt_cache_len:5151 prompt_cache_ratio:0.5038638364472269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 +DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10766029357910156 s +INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.10965776443481445 s +DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=48476886572061274099378784083312012222, time:1750767724.3460057s req_ids:[8] +DEBUG 06-24 20:22:04 [manager.py:391] +ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:207.31544494628906ms total_cost_time:207.36026763916016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10224 prompt_cache_len:5151 prompt_cache_ratio:0.5038145539906104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 +DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10860157012939453 s +INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.1106109619140625 s +DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=200476096092235810820775155040679686310, time:1750767724.5594409s req_ids:[8] +DEBUG 06-24 20:22:04 [manager.py:391] +ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:207.6582908630371ms total_cost_time:207.7012062072754ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10225 prompt_cache_len:5151 prompt_cache_ratio:0.5037652811735941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 +DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10790777206420898 s +INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.10983657836914062 s +DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=161883859476673767464460776695429900353, time:1750767724.7730112s req_ids:[8] +DEBUG 06-24 20:22:04 [manager.py:391] +ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:209.98907089233398ms total_cost_time:210.03365516662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10226 prompt_cache_len:5151 prompt_cache_ratio:0.5037160179933503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 +DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10895061492919922 s +INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.11091494560241699 s +DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=113656605777687224642386074159050625189, time:1750767724.9906356s req_ids:[8] +DEBUG 06-24 20:22:04 [manager.py:391] +ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:209.49292182922363ms total_cost_time:209.51485633850098ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:10227 prompt_cache_len:5151 prompt_cache_ratio:0.5036667644470519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 +DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.10772204399108887 s +INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.1097116470336914 s +DEBUG 06-24 20:22:05 [manager.py:391] Prefill Batch: batch_id=42246799337171411802174871268835855027, time:1750767725.206292s req_ids:[8] +DEBUG 06-24 20:22:05 [manager.py:391] +ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:209.78355407714844ms total_cost_time:209.82861518859863ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10228 prompt_cache_len:5151 prompt_cache_ratio:0.5036175205318733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 +DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.10733532905578613 s +INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.10919785499572754 s +DEBUG 06-24 20:22:05 [manager.py:391] Prefill Batch: batch_id=8721238828797903302586038883418131499, time:1750767725.4203289s req_ids:[8] +DEBUG 06-24 20:22:05 [manager.py:391] +ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:367.6156997680664ms total_cost_time:367.6612377166748ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10229 prompt_cache_len:5151 prompt_cache_ratio:0.5035682862449897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 +DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.1078639030456543 s +INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.10989069938659668 s +DEBUG 06-24 20:22:05 [manager.py:391] Prefill Batch: batch_id=106525218158279199168253248006685147668, time:1750767725.7908587s req_ids:[8] +DEBUG 06-24 20:22:05 [manager.py:391] +ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:206.7854404449463ms total_cost_time:206.82930946350098ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10230 prompt_cache_len:5151 prompt_cache_ratio:0.5035190615835777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 +DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.1080930233001709 s +INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.11016416549682617 s +DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=329642262179184151283354890990399705301, time:1750767726.0066218s req_ids:[8] +DEBUG 06-24 20:22:06 [manager.py:391] +ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:205.5490016937256ms total_cost_time:205.59358596801758ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10231 prompt_cache_len:5151 prompt_cache_ratio:0.5034698465448147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 +DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.1092996597290039 s +INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.1113889217376709 s +DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=233333697384713715876170146428131205256, time:1750767726.2168474s req_ids:[8] +DEBUG 06-24 20:22:06 [manager.py:391] +ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:202.1772861480713ms total_cost_time:202.2233009338379ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10232 prompt_cache_len:5151 prompt_cache_ratio:0.5034206411258796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 +DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.10860633850097656 s +INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075234413146973 s +DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=113359060590504514651390816215956225575, time:1750767726.4248521s req_ids:[8] +DEBUG 06-24 20:22:06 [manager.py:391] +ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:207.0167064666748ms total_cost_time:207.061767578125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10233 prompt_cache_len:5151 prompt_cache_ratio:0.503371445323952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 +DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.10875296592712402 s +INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075067520141602 s +DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=38560652732732527171251416027733831092, time:1750767726.6375473s req_ids:[8] +DEBUG 06-24 20:22:06 [manager.py:391] +ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:203.61924171447754ms total_cost_time:203.66287231445312ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10234 prompt_cache_len:5151 prompt_cache_ratio:0.5033222591362126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 +DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.10834336280822754 s +INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.1103830337524414 s +DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=86730363826149479647827825782195051178, time:1750767726.8478534s req_ids:[8] +DEBUG 06-24 20:22:06 [manager.py:391] +ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:208.03189277648926ms total_cost_time:208.07743072509766ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10235 prompt_cache_len:5151 prompt_cache_ratio:0.5032730825598437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 +DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.10842704772949219 s +INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.11045241355895996 s +DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=187787795358477786349138944684288949952, time:1750767727.0620134s req_ids:[8] +DEBUG 06-24 20:22:07 [manager.py:391] +ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:206.1784267425537ms total_cost_time:206.2222957611084ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10236 prompt_cache_len:5151 prompt_cache_ratio:0.5032239155920282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 +DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.10881853103637695 s +INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.11097979545593262 s +DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=221182453411316454599036893611322321598, time:1750767727.2743232s req_ids:[8] +DEBUG 06-24 20:22:07 [manager.py:391] +ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:369.6126937866211ms total_cost_time:369.657039642334ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10237 prompt_cache_len:5151 prompt_cache_ratio:0.5031747582299502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 +DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.10831689834594727 s +INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.11027050018310547 s +DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=312775591316998553067620147458162063250, time:1750767727.6489344s req_ids:[8] +DEBUG 06-24 20:22:07 [manager.py:391] +DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:201.10249519348145ms total_cost_time:201.14731788635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10238 prompt_cache_len:5151 prompt_cache_ratio:0.5031256104707951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 +DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.1077277660369873 s +INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.1094655990600586 s +DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=146454716266056223197323622324079044423, time:1750767727.855233s req_ids:[8] +DEBUG 06-24 20:22:07 [manager.py:391] +ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:200.75392723083496ms total_cost_time:200.79684257507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10239 prompt_cache_len:5151 prompt_cache_ratio:0.5030764723117492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 +DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10940408706665039 s +INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11137104034423828 s +DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=72883751692198195468150224239538313170, time:1750767728.062727s req_ids:[8] +DEBUG 06-24 20:22:08 [manager.py:391] +ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:205.93929290771484ms total_cost_time:205.98363876342773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10240 prompt_cache_len:5151 prompt_cache_ratio:0.50302734375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 +DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10640192031860352 s +INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.10761260986328125 s +DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=4868146908764873148012394317783736212, time:1750767728.2743766s req_ids:[8] +DEBUG 06-24 20:22:08 [manager.py:391] +ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:205.12866973876953ms total_cost_time:205.1718235015869ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10241 prompt_cache_len:5151 prompt_cache_ratio:0.502978224782736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 +DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10842657089233398 s +INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11036539077758789 s +DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=200504998753112038203372185856995901251, time:1750767728.4874542s req_ids:[8] +DEBUG 06-24 20:22:08 [manager.py:391] +ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:210.19983291625977ms total_cost_time:210.24274826049805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10242 prompt_cache_len:5151 prompt_cache_ratio:0.502929115407147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 +DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10885238647460938 s +INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11075735092163086 s +DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=101126981036147944592795205662388662384, time:1750767728.701009s req_ids:[8] +DEBUG 06-24 20:22:08 [manager.py:391] +ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:203.83596420288086ms total_cost_time:203.87959480285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10243 prompt_cache_len:5151 prompt_cache_ratio:0.5028800156204237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 +DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10879039764404297 s +INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11076831817626953 s +DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=73293308006869813752099840756692673174, time:1750767728.9089744s req_ids:[8] +DEBUG 06-24 20:22:08 [manager.py:391] +ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:367.9049015045166ms total_cost_time:367.9492473602295ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10244 prompt_cache_len:5151 prompt_cache_ratio:0.5028309254197579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 +DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.10838794708251953 s +INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11024689674377441 s +DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=50274572810304302519805148129903612458, time:1750767729.284289s req_ids:[8] +DEBUG 06-24 20:22:09 [manager.py:391] +ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:208.40883255004883ms total_cost_time:208.45317840576172ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10245 prompt_cache_len:5151 prompt_cache_ratio:0.5027818448023426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 +DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.10929727554321289 s +INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11112070083618164 s +DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=262020928575820431123970926129717187165, time:1750767729.499709s req_ids:[8] +DEBUG 06-24 20:22:09 [manager.py:391] +ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:209.61880683898926ms total_cost_time:209.66315269470215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10246 prompt_cache_len:5151 prompt_cache_ratio:0.5027327737653718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 +DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.10936474800109863 s +INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11121559143066406 s +DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=184255663300813413374191363012734676561, time:1750767729.7150693s req_ids:[8] +DEBUG 06-24 20:22:09 [manager.py:391] +ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:210.3862762451172ms total_cost_time:210.43014526367188ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10247 prompt_cache_len:5151 prompt_cache_ratio:0.5026837123060408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 +DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.1083674430847168 s +INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11017608642578125 s +DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=119689195309379197764747738086371071536, time:1750767729.9287121s req_ids:[8] +DEBUG 06-24 20:22:09 [manager.py:391] +ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:207.8537940979004ms total_cost_time:207.89790153503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10248 prompt_cache_len:5151 prompt_cache_ratio:0.5026346604215457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 +DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.10852217674255371 s +INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.11045384407043457 s +DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=120681180088671193582983245044616050580, time:1750767730.140855s req_ids:[8] +DEBUG 06-24 20:22:10 [manager.py:391] +ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:206.77709579467773ms total_cost_time:206.82311058044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10249 prompt_cache_len:5151 prompt_cache_ratio:0.5025856181090839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 +DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.10823702812194824 s +INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.11002874374389648 s +DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=263471411795505560058855517541088360478, time:1750767730.3569775s req_ids:[8] +DEBUG 06-24 20:22:10 [manager.py:391] +ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:206.48694038391113ms total_cost_time:206.53033256530762ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10250 prompt_cache_len:5151 prompt_cache_ratio:0.5025365853658537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 +DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.11025309562683105 s +INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.11238670349121094 s +DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=313826642341131770435029087243898610909, time:1750767730.5669968s req_ids:[8] +DEBUG 06-24 20:22:10 [manager.py:391] +ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:206.2661647796631ms total_cost_time:206.30908012390137ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10251 prompt_cache_len:5151 prompt_cache_ratio:0.5024875621890548 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 +DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.10705161094665527 s +INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.10875248908996582 s +DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=69283379468962504050718329789364605222, time:1750767730.7793877s req_ids:[8] +DEBUG 06-24 20:22:10 [manager.py:391] +ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:363.9380931854248ms total_cost_time:363.9845848083496ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10252 prompt_cache_len:5151 prompt_cache_ratio:0.5024385485758877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 +DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s +INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.10930681228637695 s +DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=86354164656274642082945519685718996903, time:1750767731.1498017s req_ids:[8] +DEBUG 06-24 20:22:11 [manager.py:391] +ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:209.11884307861328ms total_cost_time:209.16509628295898ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10253 prompt_cache_len:5151 prompt_cache_ratio:0.5023895445235541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 +DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10789752006530762 s +INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.10974359512329102 s +DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=44936262327005927380570783316101625049, time:1750767731.3663714s req_ids:[8] +DEBUG 06-24 20:22:11 [manager.py:391] +ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:210.5429172515869ms total_cost_time:210.5875015258789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10254 prompt_cache_len:5151 prompt_cache_ratio:0.5023405500292569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 +DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10896730422973633 s +INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s +DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=150493458547882697654070984417728837376, time:1750767731.5829113s req_ids:[8] +DEBUG 06-24 20:22:11 [manager.py:391] +ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:22:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 43782.752 tokens/s +DEBUG 06-24 20:22:11 [stats.py:37] Avg prompt tokens throughput: 43774.197 tokens/s +DEBUG 06-24 20:22:11 [stats.py:37] Avg generate tokens throughput: 8.555 tokens/s +INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:208.61554145812988ms total_cost_time:208.66036415100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10255 prompt_cache_len:5151 prompt_cache_ratio:0.5022915650901999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 +DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10741400718688965 s +INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.10959720611572266 s +DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=63885352170483338346688268442786315651, time:1750767731.7954724s req_ids:[8] +DEBUG 06-24 20:22:11 [manager.py:391] +ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:208.13846588134766ms total_cost_time:208.18138122558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10256 prompt_cache_len:5151 prompt_cache_ratio:0.5022425897035881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 +DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10816526412963867 s +INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.11024641990661621 s +DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=157305407585719661145844634786554562101, time:1750767732.0087693s req_ids:[8] +DEBUG 06-24 20:22:12 [manager.py:391] +ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:203.87721061706543ms total_cost_time:203.92251014709473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10257 prompt_cache_len:5151 prompt_cache_ratio:0.5021936238666277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 +DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10832023620605469 s +INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.11047816276550293 s +DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=16074861369786859127646378120335819979, time:1750767732.2188797s req_ids:[8] +DEBUG 06-24 20:22:12 [manager.py:391] +ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:206.7575454711914ms total_cost_time:206.8021297454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10258 prompt_cache_len:5151 prompt_cache_ratio:0.5021446675765256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 +DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10847878456115723 s +INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.11062431335449219 s +DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=146998822295428622619543684175371976206, time:1750767732.4335449s req_ids:[8] +DEBUG 06-24 20:22:12 [manager.py:391] +ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:207.89027214050293ms total_cost_time:207.93437957763672ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10259 prompt_cache_len:5151 prompt_cache_ratio:0.5020957208304903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 +DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10787796974182129 s +INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.10974884033203125 s +DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=158558053363817602165064488744067653815, time:1750767732.6469445s req_ids:[8] +DEBUG 06-24 20:22:12 [manager.py:391] +ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:335.2031707763672ms total_cost_time:335.2482318878174ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10260 prompt_cache_len:5151 prompt_cache_ratio:0.502046783625731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 +DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10782408714294434 s +INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.10959744453430176 s +DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=329893388024098318479951063285567523408, time:1750767732.9848046s req_ids:[8] +DEBUG 06-24 20:22:12 [manager.py:391] +ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:202.64244079589844ms total_cost_time:202.68774032592773ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10261 prompt_cache_len:5151 prompt_cache_ratio:0.5019978559594581 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 +DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10813713073730469 s +INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.11007070541381836 s +DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=313374967784078418398425981119381224697, time:1750767733.1976862s req_ids:[8] +DEBUG 06-24 20:22:13 [manager.py:391] +ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:208.20069313049316ms total_cost_time:208.24265480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10262 prompt_cache_len:5151 prompt_cache_ratio:0.5019489378288833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 +DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10907268524169922 s +INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.11114025115966797 s +DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=103361930918123696123472490777643683246, time:1750767733.422159s req_ids:[8] +DEBUG 06-24 20:22:13 [manager.py:391] +DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:220.21770477294922ms total_cost_time:220.2625274658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10263 prompt_cache_len:5151 prompt_cache_ratio:0.5019000292312189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 +DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s +INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.1106572151184082 s +DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=93316368097765262139110981880372248384, time:1750767733.6362405s req_ids:[8] +DEBUG 06-24 20:22:13 [manager.py:391] +ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:207.09490776062012ms total_cost_time:207.1394920349121ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10264 prompt_cache_len:5151 prompt_cache_ratio:0.5018511301636789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 +DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10743427276611328 s +INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.10950136184692383 s +DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=257614060921682427594205754949900729768, time:1750767733.8500996s req_ids:[8] +DEBUG 06-24 20:22:13 [manager.py:391] +ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:207.91125297546387ms total_cost_time:207.95536041259766ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10265 prompt_cache_len:5151 prompt_cache_ratio:0.5018022406234779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 +DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.10798048973083496 s +INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.10991263389587402 s +DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=254346960570548724651195692950773432735, time:1750767734.0625298s req_ids:[8] +DEBUG 06-24 20:22:14 [manager.py:391] +ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:204.66303825378418ms total_cost_time:204.70690727233887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10266 prompt_cache_len:5151 prompt_cache_ratio:0.5017533606078317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 +DEBUG 06-24 20:22:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.10927915573120117 s +INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.1112213134765625 s +DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=289802829278611423102773138686342498026, time:1750767734.2740417s req_ids:[8] +DEBUG 06-24 20:22:14 [manager.py:391] +ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 first_token_cost:205.75261116027832ms total_cost_time:205.7974338531494ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10267 prompt_cache_len:5151 prompt_cache_ratio:0.5017044901139573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 +DEBUG 06-24 20:22:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:14 [batch.py:51] router release req id 8 +INFO 06-24 20:22:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.3109145164489746 s +INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.31287550926208496 s +DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=23870346092718010681965928639307286375, time:1750767734.6874986s req_ids:[8] +DEBUG 06-24 20:22:14 [manager.py:391] +ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 first_token_cost:415.5890941619873ms total_cost_time:415.6339168548584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10268 prompt_cache_len:5151 prompt_cache_ratio:0.5016556291390728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 +DEBUG 06-24 20:22:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.10797548294067383 s +INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.10978198051452637 s +DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=31173844393602952100244297731096556051, time:1750767734.9096694s req_ids:[8] +DEBUG 06-24 20:22:14 [manager.py:391] +DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 first_token_cost:209.7318172454834ms total_cost_time:209.7756862640381ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10269 prompt_cache_len:5151 prompt_cache_ratio:0.5016067776803973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 +DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10770082473754883 s +INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.1093893051147461 s +DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=196644230493390246230935138837953203705, time:1750767735.1240141s req_ids:[8] +DEBUG 06-24 20:22:15 [manager.py:391] +ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:167.55366325378418ms total_cost_time:167.59753227233887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10270 prompt_cache_len:5151 prompt_cache_ratio:0.501557935735151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 +DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10828399658203125 s +INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022782325744629 s +DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=33853320136259992533841961752353559037, time:1750767735.2958357s req_ids:[8] +DEBUG 06-24 20:22:15 [manager.py:391] +ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:200.55747032165527ms total_cost_time:200.60372352600098ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10271 prompt_cache_len:5151 prompt_cache_ratio:0.501509103300555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 +DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10780692100524902 s +INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098945140838623 s +DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=122125232397529444202375984015789356726, time:1750767735.5052178s req_ids:[8] +DEBUG 06-24 20:22:15 [manager.py:391] +ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:208.31632614135742ms total_cost_time:208.36210250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10272 prompt_cache_len:5151 prompt_cache_ratio:0.5014602803738317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 +DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10839486122131348 s +INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.11045479774475098 s +DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=58858929114961699880501059172692946852, time:1750767735.7188945s req_ids:[8] +DEBUG 06-24 20:22:15 [manager.py:391] +ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:205.20544052124023ms total_cost_time:205.24859428405762ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10273 prompt_cache_len:5151 prompt_cache_ratio:0.5014114669522048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 +DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10851240158081055 s +INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.1106576919555664 s +DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=165605065468315248266370372075708944722, time:1750767735.931981s req_ids:[8] +DEBUG 06-24 20:22:15 [manager.py:391] +ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:208.71210098266602ms total_cost_time:208.7554931640625ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10274 prompt_cache_len:5151 prompt_cache_ratio:0.5013626630328986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 +DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10880112648010254 s +INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.11083030700683594 s +DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=554535734519393799943423381313276459, time:1750767736.1582665s req_ids:[8] +DEBUG 06-24 20:22:16 [manager.py:391] +ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:387.3727321624756ms total_cost_time:387.4177932739258ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10275 prompt_cache_len:5151 prompt_cache_ratio:0.5013138686131386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 +DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10781359672546387 s +INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.10978150367736816 s +DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=49886438634246760885947694933797887742, time:1750767736.5392172s req_ids:[8] +DEBUG 06-24 20:22:16 [manager.py:391] +ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:208.3141803741455ms total_cost_time:208.3566188812256ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10276 prompt_cache_len:5151 prompt_cache_ratio:0.5012650836901518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 +DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10871720314025879 s +INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.11071133613586426 s +DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=129926623472035919997000197592832891459, time:1750767736.7627668s req_ids:[8] +DEBUG 06-24 20:22:16 [manager.py:391] +ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:219.7723388671875ms total_cost_time:219.8169231414795ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10277 prompt_cache_len:5151 prompt_cache_ratio:0.5012163082611657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 +DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10707497596740723 s +INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.10891509056091309 s +DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=313980723514558435763066028293727914177, time:1750767736.9791117s req_ids:[8] +DEBUG 06-24 20:22:16 [manager.py:391] +ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:206.27093315124512ms total_cost_time:206.315279006958ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10278 prompt_cache_len:5151 prompt_cache_ratio:0.5011675423234092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 +DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10842156410217285 s +INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.11056661605834961 s +DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=298013927179041960902023171726822679443, time:1750767737.1915488s req_ids:[8] +DEBUG 06-24 20:22:17 [manager.py:391] +ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:208.77337455749512ms total_cost_time:208.8301181793213ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:10279 prompt_cache_len:5151 prompt_cache_ratio:0.5011187858741123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 +DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10831022262573242 s +INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.11037993431091309 s +DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=281980234241351428324921683522295552104, time:1750767737.405839s req_ids:[8] +DEBUG 06-24 20:22:17 [manager.py:391] +ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:206.82883262634277ms total_cost_time:206.87270164489746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10280 prompt_cache_len:5151 prompt_cache_ratio:0.5010700389105058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 +DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10956811904907227 s +INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.11158370971679688 s +DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=238465566818917715473067542760257592925, time:1750767737.6186085s req_ids:[8] +DEBUG 06-24 20:22:17 [manager.py:391] +ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:205.92570304870605ms total_cost_time:205.96885681152344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10281 prompt_cache_len:5151 prompt_cache_ratio:0.501021301429822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 +DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10846877098083496 s +INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.1105496883392334 s +DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=161568975263501689945010728613233130396, time:1750767737.8321395s req_ids:[8] +DEBUG 06-24 20:22:17 [manager.py:391] +ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:364.0451431274414ms total_cost_time:364.0925884246826ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10282 prompt_cache_len:5151 prompt_cache_ratio:0.5009725734292939 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 +DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10827445983886719 s +INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11015462875366211 s +DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=329914915758111595283891041044952266875, time:1750767738.2110536s req_ids:[8] +DEBUG 06-24 20:22:18 [manager.py:391] +ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:226.30643844604492ms total_cost_time:226.3507843017578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10283 prompt_cache_len:5151 prompt_cache_ratio:0.5009238549061558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 +DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10829639434814453 s +INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11027097702026367 s +DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=301804600742518351186226696390072293639, time:1750767738.4322872s req_ids:[8] +DEBUG 06-24 20:22:18 [manager.py:391] +ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:206.496000289917ms total_cost_time:206.53891563415527ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10284 prompt_cache_len:5151 prompt_cache_ratio:0.5008751458576429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 +DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10814285278320312 s +INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11017632484436035 s +DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=21925370174256355441288257377919713421, time:1750767738.644661s req_ids:[8] +DEBUG 06-24 20:22:18 [manager.py:391] +ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:205.9495449066162ms total_cost_time:205.9950828552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10285 prompt_cache_len:5151 prompt_cache_ratio:0.5008264462809917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 +DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10821652412414551 s +INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11006355285644531 s +DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=250165149359517451626687889527235289342, time:1750767738.858237s req_ids:[8] +DEBUG 06-24 20:22:18 [manager.py:391] +ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:209.85770225524902ms total_cost_time:209.9020481109619ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10286 prompt_cache_len:5151 prompt_cache_ratio:0.5007777561734397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 +DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.10851860046386719 s +INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.11052465438842773 s +DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=205136005706830345389167522242309785212, time:1750767739.0799248s req_ids:[8] +DEBUG 06-24 20:22:19 [manager.py:391] +ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:214.56527709960938ms total_cost_time:214.60890769958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10287 prompt_cache_len:5151 prompt_cache_ratio:0.5007290755322251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 +DEBUG 06-24 20:22:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.10886955261230469 s +INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.11066389083862305 s +DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=10241466212906694874199316097219585421, time:1750767739.2945216s req_ids:[8] +DEBUG 06-24 20:22:19 [manager.py:391] +ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:206.0873508453369ms total_cost_time:206.1305046081543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10288 prompt_cache_len:5151 prompt_cache_ratio:0.5006804043545878 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 +DEBUG 06-24 20:22:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.1092681884765625 s +INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.11133480072021484 s +DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=199519415269606847818021463458208936423, time:1750767739.5075781s req_ids:[8] +DEBUG 06-24 20:22:19 [manager.py:391] +ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:377.46500968933105ms total_cost_time:377.51102447509766ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10289 prompt_cache_len:5151 prompt_cache_ratio:0.5006317426377684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 +DEBUG 06-24 20:22:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.10747170448303223 s +INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.10933065414428711 s +DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=38893829181488244030219277355429765050, time:1750767739.8891523s req_ids:[8] +DEBUG 06-24 20:22:19 [manager.py:391] +ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:200.88815689086914ms total_cost_time:200.9294033050537ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10290 prompt_cache_len:5151 prompt_cache_ratio:0.5005830903790087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 +DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10808324813842773 s +INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.1096041202545166 s +DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=121505252080224643244078088261954986865, time:1750767740.1063125s req_ids:[8] +DEBUG 06-24 20:22:20 [manager.py:391] +ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:186.37824058532715ms total_cost_time:186.42091751098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10291 prompt_cache_len:5151 prompt_cache_ratio:0.5005344475755514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 +DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10882568359375 s +INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.1105794906616211 s +DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=82575235355024122005167262335914070838, time:1750767740.2955408s req_ids:[8] +DEBUG 06-24 20:22:20 [manager.py:391] +ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:200.0417709350586ms total_cost_time:200.08516311645508ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10292 prompt_cache_len:5151 prompt_cache_ratio:0.5004858142246404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 +DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10845041275024414 s +INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s +DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=208575206317893509792888435729128572495, time:1750767740.4953501s req_ids:[8] +DEBUG 06-24 20:22:20 [manager.py:391] +ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:208.0075740814209ms total_cost_time:208.04953575134277ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10293 prompt_cache_len:5151 prompt_cache_ratio:0.5004371903235209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 +DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10822200775146484 s +INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.10999655723571777 s +DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=250369875193676304520556204386907758406, time:1750767740.71641s req_ids:[8] +DEBUG 06-24 20:22:20 [manager.py:391] +DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:217.0083522796631ms total_cost_time:217.05031394958496ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10294 prompt_cache_len:5151 prompt_cache_ratio:0.5003885758694385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 +DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10842347145080566 s +INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.10994386672973633 s +DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=154885277650339155207194404052257143634, time:1750767740.931785s req_ids:[8] +DEBUG 06-24 20:22:20 [manager.py:391] +ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:165.63010215759277ms total_cost_time:165.67111015319824ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10295 prompt_cache_len:5151 prompt_cache_ratio:0.5003399708596405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 +DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10865283012390137 s +INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.11031556129455566 s +DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=308534654132256305042290277092515060481, time:1750767741.1028113s req_ids:[8] +DEBUG 06-24 20:22:21 [manager.py:391] +ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:198.43626022338867ms total_cost_time:198.47869873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10296 prompt_cache_len:5151 prompt_cache_ratio:0.5002913752913752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 +DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s +INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.10988259315490723 s +DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=66197736360831328427488129173146641571, time:1750767741.3069665s req_ids:[8] +DEBUG 06-24 20:22:21 [manager.py:391] +ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:366.40381813049316ms total_cost_time:366.44864082336426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10297 prompt_cache_len:5151 prompt_cache_ratio:0.5002427891618918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 +DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10840463638305664 s +INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.1101539134979248 s +DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=265377633317718680268647285829868327465, time:1750767741.680321s req_ids:[8] +DEBUG 06-24 20:22:21 [manager.py:391] +DEBUG 06-24 20:22:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 44158.180 tokens/s +DEBUG 06-24 20:22:21 [stats.py:37] Avg prompt tokens throughput: 44149.688 tokens/s +DEBUG 06-24 20:22:21 [stats.py:37] Avg generate tokens throughput: 8.492 tokens/s +ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:209.61689949035645ms total_cost_time:209.66029167175293ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10298 prompt_cache_len:5151 prompt_cache_ratio:0.5001942124684404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 +DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10810565948486328 s +INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.1098027229309082 s +DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=122785721189510170987315232710289558622, time:1750767741.8996363s req_ids:[8] +DEBUG 06-24 20:22:21 [manager.py:391] +ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:212.43667602539062ms total_cost_time:212.48269081115723ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10299 prompt_cache_len:5151 prompt_cache_ratio:0.5001456452082726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 +DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10799312591552734 s +INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10988497734069824 s +DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=174845202187782950859567887247743806230, time:1750767742.1155999s req_ids:[8] +DEBUG 06-24 20:22:22 [manager.py:391] +ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:212.23092079162598ms total_cost_time:212.27383613586426ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10300 prompt_cache_len:5151 prompt_cache_ratio:0.5000970873786408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 +DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:22 [batch.py:51] router release req id 8 +DEBUG 06-24 20:22:22 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:22 [manager.py:283] +DEBUG 06-24 20:22:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:22 [manager.py:284] +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10791158676147461 s +INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10992312431335449 s +DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=31846366582046265711656508342113361118, time:1750767742.3334842s req_ids:[8] +DEBUG 06-24 20:22:22 [manager.py:391] +ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:209.20515060424805ms total_cost_time:209.24806594848633ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10301 prompt_cache_len:5151 prompt_cache_ratio:0.5000485389767984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 +DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10907316207885742 s +INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.11109018325805664 s +DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=255958905651420051754103079129364504700, time:1750767742.547657s req_ids:[8] +DEBUG 06-24 20:22:22 [manager.py:391] +ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:207.81707763671875ms total_cost_time:207.85999298095703ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10302 prompt_cache_len:5151 prompt_cache_ratio:0.5 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 +DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10781526565551758 s +INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10983610153198242 s +DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=114126566620706963311631077410174920024, time:1750767742.7637403s req_ids:[8] +DEBUG 06-24 20:22:22 [manager.py:391] +ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:206.1774730682373ms total_cost_time:206.2218189239502ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10303 prompt_cache_len:5151 prompt_cache_ratio:0.4999514704455013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 +DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10793519020080566 s +INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10994720458984375 s +DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=163699608436570609887097916997682510572, time:1750767742.9783657s req_ids:[8] +DEBUG 06-24 20:22:22 [manager.py:391] +ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:212.0819091796875ms total_cost_time:212.1257781982422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10304 prompt_cache_len:5151 prompt_cache_ratio:0.499902950310559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 +DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:23 [manager.py:224] router recive req id 8 cost time 0.10909891128540039 s +INFO 06-24 20:22:23 [manager.py:68] detokenization recv req id 8 cost time 0.11120343208312988 s +DEBUG 06-24 20:22:23 [manager.py:391] Prefill Batch: batch_id=208990138659131123260733264382370260611, time:1750767743.195763s req_ids:[8] +DEBUG 06-24 20:22:23 [manager.py:391] +INFO 06-24 20:22:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:22:23 [statics_utils.py:24] mean first cost: 228.4506684546758 ms +INFO 06-24 20:22:23 [statics_utils.py:24] mean per token cost: 0.06815599078567418 ms +ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:376.68418884277344ms total_cost_time:376.7426013946533ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:10305 prompt_cache_len:5151 prompt_cache_ratio:0.49985443959243087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 +DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:23 [manager.py:224] router recive req id 8 cost time 0.10951113700866699 s +INFO 06-24 20:22:23 [manager.py:68] detokenization recv req id 8 cost time 0.1115875244140625 s +DEBUG 06-24 20:22:23 [manager.py:391] Prefill Batch: batch_id=126039944702570846460710720030489876343, time:1750767743.5768766s req_ids:[8] +DEBUG 06-24 20:22:23 [manager.py:391] +ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:205.9171199798584ms total_cost_time:205.9648036956787ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:10306 prompt_cache_len:5151 prompt_cache_ratio:0.4998059382883757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 +DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:23 [manager.py:224] router recive req id 8 cost time 0.10881567001342773 s +INFO 06-24 20:22:23 [manager.py:68] detokenization recv req id 8 cost time 0.11088776588439941 s +DEBUG 06-24 20:22:23 [manager.py:391] Prefill Batch: batch_id=244203032497385553012173206708891756280, time:1750767743.792903s req_ids:[8] +DEBUG 06-24 20:22:23 [manager.py:391] +ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:212.34440803527832ms total_cost_time:212.388277053833ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10307 prompt_cache_len:5151 prompt_cache_ratio:0.49975744639565345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 +DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10779714584350586 s +INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.10982251167297363 s +DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=69741261299761267808557202108976926020, time:1750767744.0089617s req_ids:[8] +DEBUG 06-24 20:22:24 [manager.py:391] +ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:200.32548904418945ms total_cost_time:200.36768913269043ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10308 prompt_cache_len:5151 prompt_cache_ratio:0.49970896391152503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 +DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s +INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.11007261276245117 s +DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=98975056219619573849370661682914439005, time:1750767744.215854s req_ids:[8] +DEBUG 06-24 20:22:24 [manager.py:391] +ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:208.97316932678223ms total_cost_time:209.01894569396973ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10309 prompt_cache_len:5151 prompt_cache_ratio:0.4996604908332525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 +DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10784602165222168 s +INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.10991168022155762 s +DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=220273299568137152097911005850653866062, time:1750767744.4353983s req_ids:[8] +DEBUG 06-24 20:22:24 [manager.py:391] +ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:205.86109161376953ms total_cost_time:205.9025764465332ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10310 prompt_cache_len:5151 prompt_cache_ratio:0.49961202715809894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 +DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10803961753845215 s +INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.11015152931213379 s +DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=9982364954087842745798978268399770415, time:1750767744.6413734s req_ids:[8] +DEBUG 06-24 20:22:24 [manager.py:391] +ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:203.05681228637695ms total_cost_time:203.09877395629883ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10311 prompt_cache_len:5151 prompt_cache_ratio:0.4995635728833285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 +DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10796904563903809 s +INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.10993123054504395 s +DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=130468150686709755216027306805361528812, time:1750767744.855029s req_ids:[8] +DEBUG 06-24 20:22:24 [manager.py:391] +ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:168.99704933166504ms total_cost_time:169.0382957458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10312 prompt_cache_len:5151 prompt_cache_ratio:0.49951512800620634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 +DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10899186134338379 s +INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.11089849472045898 s +DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=260355014748932165591810164110156752293, time:1750767745.0275855s req_ids:[8] +DEBUG 06-24 20:22:25 [manager.py:391] +ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:208.93573760986328ms total_cost_time:208.97746086120605ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10313 prompt_cache_len:5151 prompt_cache_ratio:0.49946669252399883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 +DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10788869857788086 s +INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.10992264747619629 s +DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=224501941774865367286307918825286970399, time:1750767745.2511392s req_ids:[8] +DEBUG 06-24 20:22:25 [manager.py:391] +ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:374.4499683380127ms total_cost_time:374.4935989379883ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10314 prompt_cache_len:5151 prompt_cache_ratio:0.49941826643397325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 +DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10799574851989746 s +INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.10995864868164062 s +DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=172666715562177365570960651566326762540, time:1750767745.6217852s req_ids:[8] +DEBUG 06-24 20:22:25 [manager.py:391] +ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:207.14426040649414ms total_cost_time:207.18812942504883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10315 prompt_cache_len:5151 prompt_cache_ratio:0.499369849733398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 +DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10773825645446777 s +INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.1097259521484375 s +DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=91160378675212571757982830841370027548, time:1750767745.8337877s req_ids:[8] +DEBUG 06-24 20:22:25 [manager.py:391] +ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:200.72364807128906ms total_cost_time:200.76584815979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10316 prompt_cache_len:5151 prompt_cache_ratio:0.49932144241954246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 +DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10891938209533691 s +INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11085009574890137 s +DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=320883836373186921317180501895490250335, time:1750767746.0427434s req_ids:[8] +DEBUG 06-24 20:22:26 [manager.py:391] +ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:211.97032928466797ms total_cost_time:212.01467514038086ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10317 prompt_cache_len:5151 prompt_cache_ratio:0.49927304448967724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 +DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10814094543457031 s +INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s +DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=68407154439700804799213699883633472357, time:1750767746.2597349s req_ids:[8] +DEBUG 06-24 20:22:26 [manager.py:391] +ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:207.1669101715088ms total_cost_time:207.20934867858887ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10318 prompt_cache_len:5151 prompt_cache_ratio:0.49922465594107385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 +DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.1089632511138916 s +INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11100530624389648 s +DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=294625448838183442552014483913045551464, time:1750767746.4716535s req_ids:[8] +DEBUG 06-24 20:22:26 [manager.py:391] +DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:214.0216827392578ms total_cost_time:214.0657901763916ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10319 prompt_cache_len:5151 prompt_cache_ratio:0.49917627677100496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 +DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10877704620361328 s +INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11089229583740234 s +DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=63436086032351738149397686368699925569, time:1750767746.6890297s req_ids:[8] +DEBUG 06-24 20:22:26 [manager.py:391] +ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:203.26828956604004ms total_cost_time:203.31239700317383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10320 prompt_cache_len:5151 prompt_cache_ratio:0.4991279069767442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 +DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10803008079528809 s +INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11008596420288086 s +DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=128642204472296785825428752584970832287, time:1750767746.8998265s req_ids:[8] +DEBUG 06-24 20:22:26 [manager.py:391] +ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:377.3789405822754ms total_cost_time:377.4247169494629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10321 prompt_cache_len:5151 prompt_cache_ratio:0.4990795465555663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 +DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10783672332763672 s +INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.10988616943359375 s +DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=73461674454890354100181805387096865509, time:1750767747.2817729s req_ids:[8] +DEBUG 06-24 20:22:27 [manager.py:391] +ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:201.7970085144043ms total_cost_time:201.8423080444336ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10322 prompt_cache_len:5151 prompt_cache_ratio:0.4990311955047471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 +DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10875558853149414 s +INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.11001753807067871 s +DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=240367748368764043544440764879147296010, time:1750767747.5031447s req_ids:[8] +DEBUG 06-24 20:22:27 [manager.py:391] +ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:221.00067138671875ms total_cost_time:221.04573249816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10323 prompt_cache_len:5151 prompt_cache_ratio:0.4989828538215635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 +DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10805892944335938 s +INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020350456237793 s +DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=313076179508639764375288314961895668028, time:1750767747.720255s req_ids:[8] +DEBUG 06-24 20:22:27 [manager.py:391] +ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:211.25173568725586ms total_cost_time:211.29631996154785ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10324 prompt_cache_len:5151 prompt_cache_ratio:0.4989345215032933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 +DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10803937911987305 s +INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.10968184471130371 s +DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=197981968021660726835889293629654436938, time:1750767747.9346037s req_ids:[8] +DEBUG 06-24 20:22:27 [manager.py:391] +ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:162.5385284423828ms total_cost_time:162.59407997131348ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:10325 prompt_cache_len:5151 prompt_cache_ratio:0.4988861985472155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 +DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10804319381713867 s +INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.10979580879211426 s +DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=317699360792474351072496784488527266591, time:1750767748.1027286s req_ids:[8] +DEBUG 06-24 20:22:28 [manager.py:391] +ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:197.30830192565918ms total_cost_time:197.35240936279297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10326 prompt_cache_len:5151 prompt_cache_ratio:0.4988378849506101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 +DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10747551918029785 s +INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.10917520523071289 s +DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=186163459001219566064830966149439004907, time:1750767748.3090603s req_ids:[8] +DEBUG 06-24 20:22:28 [manager.py:391] +ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:207.79943466186523ms total_cost_time:207.84330368041992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10327 prompt_cache_len:5151 prompt_cache_ratio:0.4987895807107582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 +DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10794806480407715 s +INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.10974287986755371 s +DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=163861439566978617340597609768246352247, time:1750767748.5209s req_ids:[8] +DEBUG 06-24 20:22:28 [manager.py:391] +ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:373.78954887390137ms total_cost_time:373.83484840393066ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10328 prompt_cache_len:5151 prompt_cache_ratio:0.4987412858249419 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 +DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10881161689758301 s +INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.1106107234954834 s +DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=195338795940714724733914543550023223251, time:1750767748.9029186s req_ids:[8] +DEBUG 06-24 20:22:28 [manager.py:391] +ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:204.10871505737305ms total_cost_time:204.15377616882324ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10329 prompt_cache_len:5151 prompt_cache_ratio:0.4986930002904444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 +DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.10907840728759766 s +INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s +DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=295255866699181792380412017212270903082, time:1750767749.1119401s req_ids:[8] +DEBUG 06-24 20:22:29 [manager.py:391] +ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:210.2653980255127ms total_cost_time:210.3102207183838ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10330 prompt_cache_len:5151 prompt_cache_ratio:0.49864472410454985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 +DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.10822081565856934 s +INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.10985040664672852 s +DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=121087541024802860329966955500409284694, time:1750767749.3291287s req_ids:[8] +DEBUG 06-24 20:22:29 [manager.py:391] +ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:168.0467128753662ms total_cost_time:168.0901050567627ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10331 prompt_cache_len:5151 prompt_cache_ratio:0.4985964572645436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 +DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.1076056957244873 s +INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s +DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=164893767722855489744220284806006295916, time:1750767749.5021951s req_ids:[8] +DEBUG 06-24 20:22:29 [manager.py:391] +ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:163.88535499572754ms total_cost_time:163.9273166656494ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10332 prompt_cache_len:5151 prompt_cache_ratio:0.498548199767712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 +DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.11025118827819824 s +INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.11222982406616211 s +DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=65560349686911681658934658032153356946, time:1750767749.6748888s req_ids:[8] +DEBUG 06-24 20:22:29 [manager.py:391] +ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:202.87442207336426ms total_cost_time:202.91757583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10333 prompt_cache_len:5151 prompt_cache_ratio:0.4984999516113423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 +DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.10866403579711914 s +INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s +DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=28480617321864542271562399776639258880, time:1750767749.8812187s req_ids:[8] +DEBUG 06-24 20:22:29 [manager.py:391] +ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:201.39074325561523ms total_cost_time:201.43461227416992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10334 prompt_cache_len:5151 prompt_cache_ratio:0.49845171279272305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 +DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.10776185989379883 s +INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.10971951484680176 s +DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=11582177308451012744215777984298933547, time:1750767750.0917037s req_ids:[8] +DEBUG 06-24 20:22:30 [manager.py:391] +ERROR 06-24 20:22:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:212.10169792175293ms total_cost_time:212.14628219604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10335 prompt_cache_len:5151 prompt_cache_ratio:0.4984034833091437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 +DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.3115060329437256 s +INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.31355905532836914 s +DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=35273692193330766977074673194617282265, time:1750767750.5087545s req_ids:[8] +DEBUG 06-24 20:22:30 [manager.py:391] +ERROR 06-24 20:22:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 first_token_cost:417.5593852996826ms total_cost_time:417.6034927368164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10336 prompt_cache_len:5151 prompt_cache_ratio:0.49835526315789475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 +DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.10763692855834961 s +INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.10976076126098633 s +DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=334347332458898272621366859657519121522, time:1750767750.730738s req_ids:[8] +DEBUG 06-24 20:22:30 [manager.py:391] +ERROR 06-24 20:22:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 first_token_cost:208.67228507995605ms total_cost_time:208.71758460998535ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10337 prompt_cache_len:5151 prompt_cache_ratio:0.49830705233626776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 +DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.10825014114379883 s +INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.11017370223999023 s +DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=43203262180246446197206706236438566724, time:1750767750.9441006s req_ids:[8] +DEBUG 06-24 20:22:30 [manager.py:391] +ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 first_token_cost:207.24773406982422ms total_cost_time:207.2916030883789ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10338 prompt_cache_len:5151 prompt_cache_ratio:0.4982588508415554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 +DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10782265663146973 s +INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.10988378524780273 s +DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=16542570089010733181376663330825691080, time:1750767751.1584325s req_ids:[8] +DEBUG 06-24 20:22:31 [manager.py:391] +ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:208.7728977203369ms total_cost_time:208.8160514831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10339 prompt_cache_len:5151 prompt_cache_ratio:0.49821065867105135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 +DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10765194892883301 s +INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.10968732833862305 s +DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=35588095463613294945725117312591505899, time:1750767751.3737185s req_ids:[8] +DEBUG 06-24 20:22:31 [manager.py:391] +ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:211.20071411132812ms total_cost_time:211.24505996704102ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10340 prompt_cache_len:5151 prompt_cache_ratio:0.4981624758220503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 +DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10809707641601562 s +INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.1096038818359375 s +DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=38830237430708855680287110073681220014, time:1750767751.589177s req_ids:[8] +DEBUG 06-24 20:22:31 [manager.py:391] +ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:165.23408889770508ms total_cost_time:165.27438163757324ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10341 prompt_cache_len:5151 prompt_cache_ratio:0.498114302291848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 +DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.11124110221862793 s +INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.11335515975952148 s +DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=268351695856400630840101911890586519184, time:1750767751.7599645s req_ids:[8] +DEBUG 06-24 20:22:31 [manager.py:391] +DEBUG 06-24 20:22:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 45060.282 tokens/s +DEBUG 06-24 20:22:31 [stats.py:37] Avg prompt tokens throughput: 45051.552 tokens/s +DEBUG 06-24 20:22:31 [stats.py:37] Avg generate tokens throughput: 8.730 tokens/s +ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:202.25024223327637ms total_cost_time:202.29291915893555ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10342 prompt_cache_len:5151 prompt_cache_ratio:0.49806613807774125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 +DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10857415199279785 s +INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.11049056053161621 s +DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=150076075180652426484534194360203352199, time:1750767751.966243s req_ids:[8] +DEBUG 06-24 20:22:31 [manager.py:391] +ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:361.54627799987793ms total_cost_time:361.61160469055176ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:10343 prompt_cache_len:5151 prompt_cache_ratio:0.49801798317702795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 +DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:32 [batch.py:51] router release req id 8 +INFO 06-24 20:22:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10849618911743164 s +INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.1105647087097168 s +DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=257336771039768912987092749540052854179, time:1750767752.3332877s req_ids:[8] +DEBUG 06-24 20:22:32 [manager.py:391] +ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:207.12542533874512ms total_cost_time:207.1688175201416ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10344 prompt_cache_len:5151 prompt_cache_ratio:0.49796983758700697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 +DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10831665992736816 s +INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.11048007011413574 s +DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=250717237158648149939676946563682262513, time:1750767752.5588596s req_ids:[8] +DEBUG 06-24 20:22:32 [manager.py:391] +ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:225.05640983581543ms total_cost_time:225.10027885437012ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10345 prompt_cache_len:5151 prompt_cache_ratio:0.49792170130497826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 +DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10779213905334473 s +INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.10985279083251953 s +DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=268337710649910725284406591789103686737, time:1750767752.7777896s req_ids:[8] +DEBUG 06-24 20:22:32 [manager.py:391] +ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:203.39536666870117ms total_cost_time:203.43732833862305ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10346 prompt_cache_len:5151 prompt_cache_ratio:0.4978735743282428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 +DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10774445533752441 s +INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.10970377922058105 s +DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=173138731795658629343972024572512214313, time:1750767752.9873455s req_ids:[8] +DEBUG 06-24 20:22:32 [manager.py:391] +ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:205.11198043823242ms total_cost_time:205.1694393157959ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:10347 prompt_cache_len:5151 prompt_cache_ratio:0.49782545665410266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 +DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10764169692993164 s +INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.11034560203552246 s +DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=83739402286627884308481944804561816234, time:1750767753.1995344s req_ids:[8] +DEBUG 06-24 20:22:33 [manager.py:391] +ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:207.57675170898438ms total_cost_time:207.61942863464355ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10348 prompt_cache_len:5151 prompt_cache_ratio:0.49777734827986087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 +DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10864615440368652 s +INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.11075592041015625 s +DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=291965758908185270691687565653434800912, time:1750767753.4139185s req_ids:[8] +DEBUG 06-24 20:22:33 [manager.py:391] +ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:212.43739128112793ms total_cost_time:212.49628067016602ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:10349 prompt_cache_len:5151 prompt_cache_ratio:0.4977292492028215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 +DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10814833641052246 s +INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.10976815223693848 s +DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=246908911873727272078964649699445807867, time:1750767753.631525s req_ids:[8] +DEBUG 06-24 20:22:33 [manager.py:391] +DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:166.43905639648438ms total_cost_time:166.48101806640625ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10350 prompt_cache_len:5151 prompt_cache_ratio:0.49768115942028984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 +DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s +INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.11031317710876465 s +DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=13580222801466031512273372410953187833, time:1750767753.8032863s req_ids:[8] +DEBUG 06-24 20:22:33 [manager.py:391] +ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:367.1088218688965ms total_cost_time:367.15149879455566ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10351 prompt_cache_len:5151 prompt_cache_ratio:0.49763307892957204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 +DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.10875272750854492 s +INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11083698272705078 s +DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=200651820041839026377652842415654156519, time:1750767754.1750822s req_ids:[8] +DEBUG 06-24 20:22:34 [manager.py:391] +ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:204.99300956726074ms total_cost_time:205.03640174865723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10352 prompt_cache_len:5151 prompt_cache_ratio:0.49758500772797526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 +DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.10944247245788574 s +INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11207199096679688 s +DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=241917812701794860302151998051664334017, time:1750767754.386984s req_ids:[8] +DEBUG 06-24 20:22:34 [manager.py:391] +ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:211.17615699768066ms total_cost_time:211.22074127197266ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10353 prompt_cache_len:5151 prompt_cache_ratio:0.4975369458128079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 +DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.1088106632232666 s +INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11084604263305664 s +DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=191089809990868524168833930790470483477, time:1750767754.605447s req_ids:[8] +DEBUG 06-24 20:22:34 [manager.py:391] +ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:207.98468589782715ms total_cost_time:208.02783966064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10354 prompt_cache_len:5151 prompt_cache_ratio:0.4974888931813792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 +DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.10904359817504883 s +INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11125016212463379 s +DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=269989263421000906117485194464522506677, time:1750767754.8309176s req_ids:[8] +DEBUG 06-24 20:22:34 [manager.py:391] +ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:223.8941192626953ms total_cost_time:223.94108772277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:10355 prompt_cache_len:5151 prompt_cache_ratio:0.4974408498309995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 +DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s +INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s +DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=114931424319703533334877654731112586586, time:1750767755.0504596s req_ids:[8] +DEBUG 06-24 20:22:35 [manager.py:391] +ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:209.9628448486328ms total_cost_time:210.0057601928711ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10356 prompt_cache_len:5151 prompt_cache_ratio:0.4973928157589803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 +DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10949039459228516 s +INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.11225295066833496 s +DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=194020831668532145025468100928642168278, time:1750767755.2651596s req_ids:[8] +DEBUG 06-24 20:22:35 [manager.py:391] +ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:211.50922775268555ms total_cost_time:211.55428886413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10357 prompt_cache_len:5151 prompt_cache_ratio:0.49734479096263395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 +DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10762667655944824 s +INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.1095590591430664 s +DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=260435290179522882675273827721914900035, time:1750767755.4812856s req_ids:[8] +DEBUG 06-24 20:22:35 [manager.py:391] +ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:165.33350944519043ms total_cost_time:165.3749942779541ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10358 prompt_cache_len:5151 prompt_cache_ratio:0.497296775439274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 +DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10807394981384277 s +INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.1098470687866211 s +DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=152532607541201060370658523858501765498, time:1750767755.6515634s req_ids:[8] +DEBUG 06-24 20:22:35 [manager.py:391] +ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:201.22003555297852ms total_cost_time:201.2653350830078ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10359 prompt_cache_len:5151 prompt_cache_ratio:0.4972487691862149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 +DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.3097038269042969 s +INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.31162428855895996 s +DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=49033115129791760927870708781634549489, time:1750767756.0620825s req_ids:[8] +DEBUG 06-24 20:22:36 [manager.py:391] +ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:414.32905197143555ms total_cost_time:414.37411308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10360 prompt_cache_len:5151 prompt_cache_ratio:0.4972007722007722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 +DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10836315155029297 s +INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s +DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=63540603265704405844453718881465339435, time:1750767756.2801273s req_ids:[8] +DEBUG 06-24 20:22:36 [manager.py:391] +ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:207.6432704925537ms total_cost_time:207.7028751373291ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:10361 prompt_cache_len:5151 prompt_cache_ratio:0.4971527844802625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 +DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10796499252319336 s +INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.10999035835266113 s +DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=309673494027345793833177531987089616344, time:1750767756.4945261s req_ids:[8] +DEBUG 06-24 20:22:36 [manager.py:391] +ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:212.43619918823242ms total_cost_time:212.4803066253662ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10362 prompt_cache_len:5151 prompt_cache_ratio:0.49710480602200346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 +DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10894179344177246 s +INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.11113762855529785 s +DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=238694357498597557865724648557873600935, time:1750767756.7107904s req_ids:[8] +DEBUG 06-24 20:22:36 [manager.py:391] +ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:205.33370971679688ms total_cost_time:205.37900924682617ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10363 prompt_cache_len:5151 prompt_cache_ratio:0.4970568368233137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 +DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10792994499206543 s +INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.10984635353088379 s +DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=135707565040266280925665906191334458973, time:1750767756.9236083s req_ids:[8] +DEBUG 06-24 20:22:36 [manager.py:391] +ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:208.11843872070312ms total_cost_time:208.16326141357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10364 prompt_cache_len:5151 prompt_cache_ratio:0.4970088768815129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 +DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.1072397232055664 s +INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.10914826393127441 s +DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=131440745439312728152478431304230977580, time:1750767757.1364512s req_ids:[8] +DEBUG 06-24 20:22:37 [manager.py:391] +ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:162.61744499206543ms total_cost_time:162.6596450805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10365 prompt_cache_len:5151 prompt_cache_ratio:0.49696092619392185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 +DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.1081247329711914 s +INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s +DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=189343553643351174697631666878034050680, time:1750767757.3060403s req_ids:[8] +DEBUG 06-24 20:22:37 [manager.py:391] +ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:198.81868362426758ms total_cost_time:198.87638092041016ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:10366 prompt_cache_len:5151 prompt_cache_ratio:0.4969129847578622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 +DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.10904526710510254 s +INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.11103439331054688 s +DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=287566056518895171573620605533759385613, time:1750767757.5114832s req_ids:[8] +DEBUG 06-24 20:22:37 [manager.py:391] +ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:369.98820304870605ms total_cost_time:370.03135681152344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10367 prompt_cache_len:5151 prompt_cache_ratio:0.4968650525706569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 +DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.10825991630554199 s +INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.11045241355895996 s +DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=191381530966494656238064068975601008873, time:1750767757.886067s req_ids:[8] +DEBUG 06-24 20:22:37 [manager.py:391] +ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:207.43513107299805ms total_cost_time:207.47756958007812ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10368 prompt_cache_len:5151 prompt_cache_ratio:0.49681712962962965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 +DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s +INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s +DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=94221755782992239755155540692380086222, time:1750767758.1010854s req_ids:[8] +DEBUG 06-24 20:22:38 [manager.py:391] +ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:208.25457572937012ms total_cost_time:208.2967758178711ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10369 prompt_cache_len:5151 prompt_cache_ratio:0.4967692159321053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 +DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10841250419616699 s +INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.11022281646728516 s +DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=111717341550881893506543347610036031397, time:1750767758.3156047s req_ids:[8] +DEBUG 06-24 20:22:38 [manager.py:391] +ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:208.28866958618164ms total_cost_time:208.33277702331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10370 prompt_cache_len:5151 prompt_cache_ratio:0.4967213114754098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 +DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10834074020385742 s +INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s +DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=236470228113403493009977736800627042858, time:1750767758.5294502s req_ids:[8] +DEBUG 06-24 20:22:38 [manager.py:391] +ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:207.10062980651855ms total_cost_time:207.14402198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10371 prompt_cache_len:5151 prompt_cache_ratio:0.4966734162568701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 +DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10852861404418945 s +INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.11051154136657715 s +DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=105076126124137557891165488676981637224, time:1750767758.749048s req_ids:[8] +DEBUG 06-24 20:22:38 [manager.py:391] +ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:215.99364280700684ms total_cost_time:216.01390838623047ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:10372 prompt_cache_len:5151 prompt_cache_ratio:0.4966255302738141 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 +DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.1067345142364502 s +INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.10840392112731934 s +DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=51983285751805140217907341864661589055, time:1750767758.966502s req_ids:[8] +DEBUG 06-24 20:22:38 [manager.py:391] +ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:200.08373260498047ms total_cost_time:200.12593269348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10373 prompt_cache_len:5151 prompt_cache_ratio:0.4965776535235708 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 +DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10847210884094238 s +INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.11029911041259766 s +DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=299417465918477213483861331001562030897, time:1750767759.1686184s req_ids:[8] +DEBUG 06-24 20:22:39 [manager.py:391] +ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:361.99307441711426ms total_cost_time:362.03765869140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10374 prompt_cache_len:5151 prompt_cache_ratio:0.4965297860034702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 +DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:39 [batch.py:51] router release req id 8 +INFO 06-24 20:22:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10876011848449707 s +INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.11058354377746582 s +DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=51417506593072752060489619094449772311, time:1750767759.5350707s req_ids:[8] +DEBUG 06-24 20:22:39 [manager.py:391] +DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:209.92660522460938ms total_cost_time:209.97023582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10375 prompt_cache_len:5151 prompt_cache_ratio:0.4964819277108434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 +DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10879659652709961 s +INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.11060690879821777 s +DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=165496056998650210461027873656268197863, time:1750767759.7523823s req_ids:[8] +DEBUG 06-24 20:22:39 [manager.py:391] +ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:207.3063850402832ms total_cost_time:207.3495388031006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10376 prompt_cache_len:5151 prompt_cache_ratio:0.49643407864302236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 +DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10758614540100098 s +INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.10932064056396484 s +DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=251603891124680208239392832697422523312, time:1750767759.964518s req_ids:[8] +DEBUG 06-24 20:22:39 [manager.py:391] +ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:204.54072952270508ms total_cost_time:204.58340644836426ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10377 prompt_cache_len:5151 prompt_cache_ratio:0.4963862387973403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 +DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10840654373168945 s +INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.1102149486541748 s +DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=318423228381345826766951926946577405489, time:1750767760.1759562s req_ids:[8] +DEBUG 06-24 20:22:40 [manager.py:391] +ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:203.6149501800537ms total_cost_time:203.6592960357666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10378 prompt_cache_len:5151 prompt_cache_ratio:0.49633840817113123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 +DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10730433464050293 s +INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.10903501510620117 s +DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=99354618613098526477662078356212708650, time:1750767760.3848908s req_ids:[8] +DEBUG 06-24 20:22:40 [manager.py:391] +ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:202.0254135131836ms total_cost_time:202.0885944366455ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:10379 prompt_cache_len:5151 prompt_cache_ratio:0.49629058676173043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 +DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s +INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.10895085334777832 s +DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=247339359758741247886879341973693801372, time:1750767760.5972955s req_ids:[8] +DEBUG 06-24 20:22:40 [manager.py:391] +ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:209.14554595947266ms total_cost_time:209.19013023376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10380 prompt_cache_len:5151 prompt_cache_ratio:0.496242774566474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 +DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10872912406921387 s +INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.1104426383972168 s +DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=297604454597966095426785159653358283907, time:1750767760.8089724s req_ids:[8] +DEBUG 06-24 20:22:40 [manager.py:391] +ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:341.8080806732178ms total_cost_time:341.85171127319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10381 prompt_cache_len:5151 prompt_cache_ratio:0.49619497158269915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 +DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10809707641601562 s +INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11005353927612305 s +DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=310143746084296752404464710043421217265, time:1750767761.1567204s req_ids:[8] +DEBUG 06-24 20:22:41 [manager.py:391] +ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:203.41253280639648ms total_cost_time:203.45401763916016ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10382 prompt_cache_len:5151 prompt_cache_ratio:0.4961471778077442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 +DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10950016975402832 s +INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11162972450256348 s +DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=235449628526656197694447854520887458303, time:1750767761.368506s req_ids:[8] +DEBUG 06-24 20:22:41 [manager.py:391] +ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:207.169771194458ms total_cost_time:207.2138786315918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10383 prompt_cache_len:5151 prompt_cache_ratio:0.4960993932389483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 +DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10831284523010254 s +INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11029410362243652 s +DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=159722831330546396644859523699728712487, time:1750767761.5808394s req_ids:[8] +DEBUG 06-24 20:22:41 [manager.py:391] +ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:203.9780616760254ms total_cost_time:204.02264595031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10384 prompt_cache_len:5151 prompt_cache_ratio:0.49605161787365176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 +DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10920047760009766 s +INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11128544807434082 s +DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=240584616464094947768473992828521159306, time:1750767761.8018107s req_ids:[8] +DEBUG 06-24 20:22:41 [manager.py:391] +DEBUG 06-24 20:22:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 44388.222 tokens/s +DEBUG 06-24 20:22:41 [stats.py:37] Avg prompt tokens throughput: 44379.658 tokens/s +DEBUG 06-24 20:22:41 [stats.py:37] Avg generate tokens throughput: 8.564 tokens/s +ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:223.8442897796631ms total_cost_time:223.88815879821777ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10385 prompt_cache_len:5151 prompt_cache_ratio:0.496003851709196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 +DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10802721977233887 s +INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s +DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=239061471339755844093469635866648094250, time:1750767762.0226786s req_ids:[8] +DEBUG 06-24 20:22:42 [manager.py:391] +ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:212.03994750976562ms total_cost_time:212.083101272583ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10386 prompt_cache_len:5151 prompt_cache_ratio:0.4959560947429232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 +DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10802197456359863 s +INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.10994195938110352 s +DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=87398471436767039242142754522288795150, time:1750767762.2398617s req_ids:[8] +DEBUG 06-24 20:22:42 [manager.py:391] +ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:209.15794372558594ms total_cost_time:209.20276641845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10387 prompt_cache_len:5151 prompt_cache_ratio:0.4959083469721768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 +DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10837531089782715 s +INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.1105353832244873 s +DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=251323818111197751307017457251095458666, time:1750767762.4553828s req_ids:[8] +DEBUG 06-24 20:22:42 [manager.py:391] +ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:208.8305950164795ms total_cost_time:208.87422561645508ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10388 prompt_cache_len:5151 prompt_cache_ratio:0.49586060839430113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 +DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10580992698669434 s +INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.10754537582397461 s +DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=146176246297469748188130174631754789957, time:1750767762.6693764s req_ids:[8] +DEBUG 06-24 20:22:42 [manager.py:391] +ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:366.7149543762207ms total_cost_time:366.7581081390381ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10389 prompt_cache_len:5151 prompt_cache_ratio:0.4958128790066416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 +DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:42 [batch.py:51] router release req id 8 +INFO 06-24 20:22:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.1089928150177002 s +INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.11114263534545898 s +DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=210261252165401293905842155245538837465, time:1750767763.0417302s req_ids:[8] +DEBUG 06-24 20:22:43 [manager.py:391] +ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:202.30650901794434ms total_cost_time:202.35061645507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10390 prompt_cache_len:5151 prompt_cache_ratio:0.49576515880654476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 +DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.10835862159729004 s +INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.11038422584533691 s +DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=5460438190186362292226597957233559975, time:1750767763.2514791s req_ids:[8] +DEBUG 06-24 20:22:43 [manager.py:391] +ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:207.24773406982422ms total_cost_time:207.291841506958ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10391 prompt_cache_len:5151 prompt_cache_ratio:0.4957174477913579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 +DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.10724186897277832 s +INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.10911369323730469 s +DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=178057043617394164313002345635573152615, time:1750767763.4637008s req_ids:[8] +DEBUG 06-24 20:22:43 [manager.py:391] +ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:165.50803184509277ms total_cost_time:165.55047035217285ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10392 prompt_cache_len:5151 prompt_cache_ratio:0.49566974595842955 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 +DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.1086428165435791 s +INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.11076068878173828 s +DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=83634940458071114933573786257754091595, time:1750767763.6395838s req_ids:[8] +DEBUG 06-24 20:22:43 [manager.py:391] +ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:207.47017860412598ms total_cost_time:207.51285552978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10393 prompt_cache_len:5151 prompt_cache_ratio:0.4956220533051092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 +DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.10850214958190918 s +INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.1104896068572998 s +DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=20481680301060264165034861047194207514, time:1750767763.8498523s req_ids:[8] +DEBUG 06-24 20:22:43 [manager.py:391] +ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:208.95862579345703ms total_cost_time:209.00249481201172ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10394 prompt_cache_len:5151 prompt_cache_ratio:0.49557436982874736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 +DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10790872573852539 s +INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10985422134399414 s +DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=323595257289467470771758082766740219426, time:1750767764.0672977s req_ids:[8] +DEBUG 06-24 20:22:44 [manager.py:391] +ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:212.61906623840332ms total_cost_time:212.66436576843262ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10395 prompt_cache_len:5151 prompt_cache_ratio:0.49552669552669554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 +DEBUG 06-24 20:22:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10738778114318848 s +INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10942912101745605 s +DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=59156255639356315159514022224525904432, time:1750767764.2822669s req_ids:[8] +DEBUG 06-24 20:22:44 [manager.py:391] +ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:209.20562744140625ms total_cost_time:209.24925804138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10396 prompt_cache_len:5151 prompt_cache_ratio:0.49547903039630625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 +DEBUG 06-24 20:22:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10748815536499023 s +INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10939240455627441 s +DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=69624064984972370827450359108516402773, time:1750767764.5028427s req_ids:[8] +DEBUG 06-24 20:22:44 [manager.py:391] +ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:374.79686737060547ms total_cost_time:374.83739852905273ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10397 prompt_cache_len:5151 prompt_cache_ratio:0.4954313744349332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 +DEBUG 06-24 20:22:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10746407508850098 s +INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10932278633117676 s +DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=186889013300742734525179701844894886664, time:1750767764.876438s req_ids:[8] +DEBUG 06-24 20:22:44 [manager.py:391] +ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:207.03625679016113ms total_cost_time:207.09466934204102ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:10398 prompt_cache_len:5151 prompt_cache_ratio:0.49538372763993077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 +DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.10779929161071777 s +INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.1097571849822998 s +DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=12909657544039380466092619614552745530, time:1750767765.0898335s req_ids:[8] +DEBUG 06-24 20:22:45 [manager.py:391] +ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:204.970121383667ms total_cost_time:205.01327514648438ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10399 prompt_cache_len:5151 prompt_cache_ratio:0.4953360900086547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 +DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.10857057571411133 s +INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.11060047149658203 s +DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=318409688853941498638946132176251857335, time:1750767765.3006153s req_ids:[8] +DEBUG 06-24 20:22:45 [manager.py:391] +DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:201.43651962280273ms total_cost_time:201.47967338562012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10400 prompt_cache_len:5151 prompt_cache_ratio:0.4952884615384615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 +DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.1087179183959961 s +INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.11074113845825195 s +DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=228926717532228007552077052775030216516, time:1750767765.5090423s req_ids:[8] +DEBUG 06-24 20:22:45 [manager.py:391] +ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:205.7168483734131ms total_cost_time:205.76000213623047ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10401 prompt_cache_len:5151 prompt_cache_ratio:0.49524084222670894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 +DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.1083977222442627 s +INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.1103525161743164 s +DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=328429048975347538906994701743098777503, time:1750767765.7182033s req_ids:[8] +DEBUG 06-24 20:22:45 [manager.py:391] +ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:201.87711715698242ms total_cost_time:201.9200325012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10402 prompt_cache_len:5151 prompt_cache_ratio:0.4951932320707556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 +DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.10844659805297852 s +INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.11043190956115723 s +DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=155443869086096911093776483229810947857, time:1750767765.9292936s req_ids:[8] +DEBUG 06-24 20:22:45 [manager.py:391] +ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:208.1735134124756ms total_cost_time:208.21762084960938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10403 prompt_cache_len:5151 prompt_cache_ratio:0.49514563106796117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 +DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10894131660461426 s +INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.11094856262207031 s +DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=264829523068263391088666724062988282778, time:1750767766.1422453s req_ids:[8] +DEBUG 06-24 20:22:46 [manager.py:391] +ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:206.67457580566406ms total_cost_time:206.72035217285156ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10404 prompt_cache_len:5151 prompt_cache_ratio:0.4950980392156863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 +DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10773897171020508 s +INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.10971426963806152 s +DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=124257638709429001144386082009509276255, time:1750767766.3552089s req_ids:[8] +DEBUG 06-24 20:22:46 [manager.py:391] +ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:208.1003189086914ms total_cost_time:208.1460952758789ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10405 prompt_cache_len:5151 prompt_cache_ratio:0.49505045651129265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 +DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:46 [batch.py:51] router release req id 8 +INFO 06-24 20:22:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10867857933044434 s +INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s +DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=116995072487734720922352423795703241754, time:1750767766.567579s req_ids:[8] +DEBUG 06-24 20:22:46 [manager.py:391] +ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:362.1079921722412ms total_cost_time:362.1535301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10406 prompt_cache_len:5151 prompt_cache_ratio:0.495002882952143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 +DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10918331146240234 s +INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.11118388175964355 s +DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=88819206029422537532015685153907971491, time:1750767766.936467s req_ids:[8] +DEBUG 06-24 20:22:46 [manager.py:391] +INFO 06-24 20:22:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:209.71131324768066ms total_cost_time:209.75542068481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10407 prompt_cache_len:5151 prompt_cache_ratio:0.49495531853560104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 +DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10837197303771973 s +INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.11031365394592285 s +DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=16271885427411925168492342617434444384, time:1750767767.151465s req_ids:[8] +DEBUG 06-24 20:22:47 [manager.py:391] +ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:210.70241928100586ms total_cost_time:210.74533462524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10408 prompt_cache_len:5151 prompt_cache_ratio:0.4949077632590315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 +DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10803365707397461 s +INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994601249694824 s +DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=38822038359984714596728068735187240472, time:1750767767.3651364s req_ids:[8] +DEBUG 06-24 20:22:47 [manager.py:391] +ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:206.21204376220703ms total_cost_time:206.2551975250244ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10409 prompt_cache_len:5151 prompt_cache_ratio:0.49486021711980016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 +DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.1075751781463623 s +INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.1095895767211914 s +DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=280812206711169103902733729860267969838, time:1750767767.5762281s req_ids:[8] +DEBUG 06-24 20:22:47 [manager.py:391] +ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:209.8076343536377ms total_cost_time:209.85150337219238ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10410 prompt_cache_len:5151 prompt_cache_ratio:0.4948126801152738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 +DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10891914367675781 s +INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.11096787452697754 s +DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=147385449822408347189565731094173806334, time:1750767767.790348s req_ids:[8] +DEBUG 06-24 20:22:47 [manager.py:391] +ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:209.98191833496094ms total_cost_time:210.02578735351562ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10411 prompt_cache_len:5151 prompt_cache_ratio:0.4947651522428201 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 +DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10765433311462402 s +INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.10961556434631348 s +DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=227370528430795809304782549255296297747, time:1750767768.0033023s req_ids:[8] +DEBUG 06-24 20:22:48 [manager.py:391] +ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:209.75732803344727ms total_cost_time:209.80191230773926ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10412 prompt_cache_len:5151 prompt_cache_ratio:0.4947176334998079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 +DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:48 [manager.py:224] router recive req id 8 cost time 0.10846781730651855 s +INFO 06-24 20:22:48 [manager.py:68] detokenization recv req id 8 cost time 0.11049675941467285 s +DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=240829012355463897240808543394434970127, time:1750767768.2195964s req_ids:[8] +DEBUG 06-24 20:22:48 [manager.py:391] +ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:391.1724090576172ms total_cost_time:391.2177085876465ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10413 prompt_cache_len:5151 prompt_cache_ratio:0.49467012388360704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 +DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:48 [manager.py:224] router recive req id 8 cost time 0.10822486877441406 s +INFO 06-24 20:22:48 [manager.py:68] detokenization recv req id 8 cost time 0.1102895736694336 s +DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=71830897086584306879759081330078242774, time:1750767768.6108136s req_ids:[8] +DEBUG 06-24 20:22:48 [manager.py:391] +ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:206.02869987487793ms total_cost_time:206.07280731201172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10414 prompt_cache_len:5151 prompt_cache_ratio:0.49462262339158825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 +DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:48 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s +INFO 06-24 20:22:48 [manager.py:68] detokenization recv req id 8 cost time 0.109466552734375 s +DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=246987984134468666153071729283832652315, time:1750767768.823078s req_ids:[8] +DEBUG 06-24 20:22:48 [manager.py:391] +ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:206.23159408569336ms total_cost_time:206.27403259277344ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10415 prompt_cache_len:5151 prompt_cache_ratio:0.49457513202112335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 +DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s +INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.1107175350189209 s +DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=180186609349216854940815857423545582389, time:1750767769.036628s req_ids:[8] +DEBUG 06-24 20:22:49 [manager.py:391] +ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:209.7158432006836ms total_cost_time:209.75852012634277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10416 prompt_cache_len:5151 prompt_cache_ratio:0.49452764976958524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 +DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10825872421264648 s +INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.11025285720825195 s +DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=243503898184221356737906925237216812246, time:1750767769.2507477s req_ids:[8] +DEBUG 06-24 20:22:49 [manager.py:391] +ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:205.16300201416016ms total_cost_time:205.20710945129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10417 prompt_cache_len:5151 prompt_cache_ratio:0.4944801766343477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 +DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10890388488769531 s +INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.11065888404846191 s +DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=249625965274669926640242695938844330257, time:1750767769.4613028s req_ids:[8] +DEBUG 06-24 20:22:49 [manager.py:391] +ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:165.9567356109619ms total_cost_time:166.05496406555176ms,out_token_counter:1 mean_per_token_cost_time: 0.09822845458984375ms prompt_token_num:10418 prompt_cache_len:5151 prompt_cache_ratio:0.49443271261278554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 +DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10818600654602051 s +INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.11005735397338867 s +DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=148852134762508122417736352467842303500, time:1750767769.6321716s req_ids:[8] +DEBUG 06-24 20:22:49 [manager.py:391] +ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:196.43425941467285ms total_cost_time:196.47884368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10419 prompt_cache_len:5151 prompt_cache_ratio:0.4943852577022747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 +DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10740923881530762 s +INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.1093144416809082 s +DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=327703986457878781049287798450116800348, time:1750767769.8367155s req_ids:[8] +DEBUG 06-24 20:22:49 [manager.py:391] +ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:372.74765968322754ms total_cost_time:372.7917671203613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10420 prompt_cache_len:5151 prompt_cache_ratio:0.49433781190019194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 +DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.1084444522857666 s +INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.11031150817871094 s +DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=139079399488545447464800389498791243400, time:1750767770.2134678s req_ids:[8] +DEBUG 06-24 20:22:50 [manager.py:391] +ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:205.14583587646484ms total_cost_time:205.18755912780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10421 prompt_cache_len:5151 prompt_cache_ratio:0.49429037520391517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 +DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.10881662368774414 s +INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.11083126068115234 s +DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=86973358106598838811594499086279411004, time:1750767770.4241383s req_ids:[8] +DEBUG 06-24 20:22:50 [manager.py:391] +ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:204.68688011169434ms total_cost_time:204.73027229309082ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10422 prompt_cache_len:5151 prompt_cache_ratio:0.49424294761082327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 +DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.1076819896697998 s +INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s +DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=139958710593008038286319395373188238541, time:1750767770.6410074s req_ids:[8] +DEBUG 06-24 20:22:50 [manager.py:391] +ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:213.2716178894043ms total_cost_time:213.31405639648438ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10423 prompt_cache_len:5151 prompt_cache_ratio:0.49419552911829606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 +DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.10920190811157227 s +INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.11125731468200684 s +DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=145042264192795053439458397349048134922, time:1750767770.8538065s req_ids:[8] +DEBUG 06-24 20:22:50 [manager.py:391] +ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:207.7949047088623ms total_cost_time:207.8378200531006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10424 prompt_cache_len:5151 prompt_cache_ratio:0.49414811972371453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 +DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.10817646980285645 s +INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.11019134521484375 s +DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=202766691988266413196423251581896815749, time:1750767771.0682747s req_ids:[8] +DEBUG 06-24 20:22:51 [manager.py:391] +DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:207.9489231109619ms total_cost_time:207.9932689666748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10425 prompt_cache_len:5151 prompt_cache_ratio:0.49410071942446043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 +DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.10806536674499512 s +INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s +DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=146316257230826306477061750623240975556, time:1750767771.2797039s req_ids:[8] +DEBUG 06-24 20:22:51 [manager.py:391] +ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:206.41517639160156ms total_cost_time:206.45785331726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10426 prompt_cache_len:5151 prompt_cache_ratio:0.49405332821791675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 +DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.10801148414611816 s +INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.10994482040405273 s +DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=23624167420889058281137695709302306861, time:1750767771.4950533s req_ids:[8] +DEBUG 06-24 20:22:51 [manager.py:391] +ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:210.30545234680176ms total_cost_time:210.34717559814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10427 prompt_cache_len:5151 prompt_cache_ratio:0.49400594610146736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 +DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.20879268646240234 s +INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.2104027271270752 s +DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=99879166003109065707071330531033644487, time:1750767771.8404758s req_ids:[8] +DEBUG 06-24 20:22:51 [manager.py:391] +DEBUG 06-24 20:22:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 44587.991 tokens/s +DEBUG 06-24 20:22:51 [stats.py:37] Avg prompt tokens throughput: 44579.423 tokens/s +DEBUG 06-24 20:22:51 [stats.py:37] Avg generate tokens throughput: 8.567 tokens/s +ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:295.8862781524658ms total_cost_time:295.9272861480713ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10428 prompt_cache_len:5151 prompt_cache_ratio:0.49395857307249713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 +DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10832405090332031 s +INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.11017608642578125 s +DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=315385173719523273442675599510100875527, time:1750767772.0097556s req_ids:[8] +DEBUG 06-24 20:22:52 [manager.py:391] +ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:194.7479248046875ms total_cost_time:194.793701171875ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10429 prompt_cache_len:5151 prompt_cache_ratio:0.493911209128392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 +DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10792136192321777 s +INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.10981607437133789 s +DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=336860438054043947475534170342487314642, time:1750767772.2100503s req_ids:[8] +DEBUG 06-24 20:22:52 [manager.py:391] +ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:203.6590576171875ms total_cost_time:203.7029266357422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10430 prompt_cache_len:5151 prompt_cache_ratio:0.49386385426653884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 +DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10780143737792969 s +INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.1096503734588623 s +DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=101515308155050829823066307878377886044, time:1750767772.420043s req_ids:[8] +DEBUG 06-24 20:22:52 [manager.py:391] +ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:203.74059677124023ms total_cost_time:203.7830352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10431 prompt_cache_len:5151 prompt_cache_ratio:0.49381650848432557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 +DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10745477676391602 s +INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.10962343215942383 s +DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=272820416551857047257517459736564969014, time:1750767772.631151s req_ids:[8] +DEBUG 06-24 20:22:52 [manager.py:391] +ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:207.40818977355957ms total_cost_time:207.45277404785156ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10432 prompt_cache_len:5151 prompt_cache_ratio:0.4937691717791411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 +DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10788512229919434 s +INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.10991239547729492 s +DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=248979808605345491094647678818473388570, time:1750767772.8438094s req_ids:[8] +DEBUG 06-24 20:22:52 [manager.py:391] +ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:206.8502902984619ms total_cost_time:206.8941593170166ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10433 prompt_cache_len:5151 prompt_cache_ratio:0.49372184414837533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 +DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10816836357116699 s +INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.11022663116455078 s +DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=249465721101337701861493548868766225409, time:1750767773.0570183s req_ids:[8] +DEBUG 06-24 20:22:53 [manager.py:391] +ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:210.01029014587402ms total_cost_time:210.0527286529541ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10434 prompt_cache_len:5151 prompt_cache_ratio:0.4936745255894192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 +DEBUG 06-24 20:22:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:53 [batch.py:51] router release req id 8 +INFO 06-24 20:22:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:22:53 [statics_utils.py:24] mean first cost: 228.3881372238096 ms +INFO 06-24 20:22:53 [statics_utils.py:24] mean per token cost: 0.06765558074446179 ms +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10744404792785645 s +INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.10942673683166504 s +DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=318046112563954782749007296590387814937, time:1750767773.2735572s req_ids:[8] +DEBUG 06-24 20:22:53 [manager.py:391] +INFO 06-24 20:22:53 [manager.py:620] left req id 8can release False refcount 4 +ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:371.7174530029297ms total_cost_time:371.7615604400635ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10435 prompt_cache_len:5151 prompt_cache_ratio:0.4936272160996646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 +DEBUG 06-24 20:22:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:53 [batch.py:51] router release req id 8 +INFO 06-24 20:22:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10828995704650879 s +INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.11038398742675781 s +DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=220678945150700335371915804890000657273, time:1750767773.6516666s req_ids:[8] +DEBUG 06-24 20:22:53 [manager.py:391] +ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:213.12236785888672ms total_cost_time:213.18340301513672ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10436 prompt_cache_len:5151 prompt_cache_ratio:0.4935799156765044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 +DEBUG 06-24 20:22:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10768389701843262 s +INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.1096353530883789 s +DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=59188974628554415539707732469254003781, time:1750767773.8818514s req_ids:[8] +DEBUG 06-24 20:22:53 [manager.py:391] +ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:227.38385200500488ms total_cost_time:227.42891311645508ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10437 prompt_cache_len:5151 prompt_cache_ratio:0.4935326243173326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 +DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10826635360717773 s +INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11025285720825195 s +DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=19325045056632669235205311910440370122, time:1750767774.101485s req_ids:[8] +DEBUG 06-24 20:22:54 [manager.py:391] +ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:209.08236503601074ms total_cost_time:209.12408828735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10438 prompt_cache_len:5151 prompt_cache_ratio:0.49348534201954397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 +DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10877180099487305 s +INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s +DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=88782648626342577758299477127353334893, time:1750767774.3165982s req_ids:[8] +DEBUG 06-24 20:22:54 [manager.py:391] +ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:207.92818069458008ms total_cost_time:207.97276496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10439 prompt_cache_len:5151 prompt_cache_ratio:0.49343806878053453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 +DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10912680625915527 s +INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11122798919677734 s +DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=272424104696004674183005754171202374518, time:1750767774.5278761s req_ids:[8] +DEBUG 06-24 20:22:54 [manager.py:391] +ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:209.9165916442871ms total_cost_time:209.9611759185791ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10440 prompt_cache_len:5151 prompt_cache_ratio:0.4933908045977011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 +DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10910916328430176 s +INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s +DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=128848024472553492963760173015557870097, time:1750767774.7420876s req_ids:[8] +DEBUG 06-24 20:22:54 [manager.py:391] +ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:213.31405639648438ms total_cost_time:213.35887908935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10441 prompt_cache_len:5151 prompt_cache_ratio:0.4933435494684417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 +DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10727071762084961 s +INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s +DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=182947162379528098610882196974559594017, time:1750767774.9769313s req_ids:[8] +DEBUG 06-24 20:22:54 [manager.py:391] +ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:230.00144958496094ms total_cost_time:230.04531860351562ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10442 prompt_cache_len:5151 prompt_cache_ratio:0.4932963033901551 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 +DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10948300361633301 s +INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.1114342212677002 s +DEBUG 06-24 20:22:55 [manager.py:391] Prefill Batch: batch_id=98453330293958664740898350931813979507, time:1750767775.1945434s req_ids:[8] +DEBUG 06-24 20:22:55 [manager.py:391] +ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:376.0819435119629ms total_cost_time:376.1265277862549ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10443 prompt_cache_len:5151 prompt_cache_ratio:0.4932490663602413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 +DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10871076583862305 s +INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.11072254180908203 s +DEBUG 06-24 20:22:55 [manager.py:391] Prefill Batch: batch_id=184795237481759773573843833181978124666, time:1750767775.5734842s req_ids:[8] +DEBUG 06-24 20:22:55 [manager.py:391] +DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:210.1001739501953ms total_cost_time:210.14118194580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10444 prompt_cache_len:5151 prompt_cache_ratio:0.4932018383761011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 +DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10772371292114258 s +INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.10968661308288574 s +DEBUG 06-24 20:22:55 [manager.py:391] Prefill Batch: batch_id=2027963967725044653957012527503245660, time:1750767775.7895231s req_ids:[8] +DEBUG 06-24 20:22:55 [manager.py:391] +ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:212.24665641784668ms total_cost_time:212.29052543640137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10445 prompt_cache_len:5151 prompt_cache_ratio:0.49315461943513644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 +DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10800528526306152 s +INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.11001420021057129 s +DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=124382591521699313176766934786640915775, time:1750767776.0067759s req_ids:[8] +DEBUG 06-24 20:22:56 [manager.py:391] +ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:209.3179225921631ms total_cost_time:209.36083793640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10446 prompt_cache_len:5151 prompt_cache_ratio:0.49310740953475013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 +DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10764122009277344 s +INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.10956168174743652 s +DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=132535163713605869227513659293106035121, time:1750767776.218991s req_ids:[8] +DEBUG 06-24 20:22:56 [manager.py:391] +ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:210.22558212280273ms total_cost_time:210.2677822113037ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10447 prompt_cache_len:5151 prompt_cache_ratio:0.4930602086723461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 +DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10781359672546387 s +INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.1096954345703125 s +DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=324212774519960139960774985571884455850, time:1750767776.4352353s req_ids:[8] +DEBUG 06-24 20:22:56 [manager.py:391] +ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:211.86447143554688ms total_cost_time:211.90571784973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10448 prompt_cache_len:5151 prompt_cache_ratio:0.4930130168453293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 +DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10883021354675293 s +INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.11081051826477051 s +DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=30848355640080317684631723310004568333, time:1750767776.649874s req_ids:[8] +DEBUG 06-24 20:22:56 [manager.py:391] +ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:209.00630950927734ms total_cost_time:209.04803276062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10449 prompt_cache_len:5151 prompt_cache_ratio:0.4929658340511054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 +DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10753369331359863 s +INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.10947465896606445 s +DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=335622299302438022868866674138545793902, time:1750767776.8647614s req_ids:[8] +DEBUG 06-24 20:22:56 [manager.py:391] +ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:211.23123168945312ms total_cost_time:211.2736701965332ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10450 prompt_cache_len:5151 prompt_cache_ratio:0.49291866028708137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 +DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s +INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.1099545955657959 s +DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=166886130383756693301267210465919408749, time:1750767777.0787702s req_ids:[8] +DEBUG 06-24 20:22:57 [manager.py:391] +ERROR 06-24 20:22:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:22:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:211.31157875061035ms total_cost_time:211.35354042053223ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10451 prompt_cache_len:5151 prompt_cache_ratio:0.492871495550665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 +DEBUG 06-24 20:22:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.3103499412536621 s +INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.31239795684814453 s +DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=169021123394621979471360071303725217999, time:1750767777.5054705s req_ids:[8] +DEBUG 06-24 20:22:57 [manager.py:391] +ERROR 06-24 20:22:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 first_token_cost:428.2495975494385ms total_cost_time:428.29394340515137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10452 prompt_cache_len:5151 prompt_cache_ratio:0.4928243398392652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 +DEBUG 06-24 20:22:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.10771727561950684 s +INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.1097409725189209 s +DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=91810470405329715056891898998079490428, time:1750767777.727743s req_ids:[8] +DEBUG 06-24 20:22:57 [manager.py:391] +ERROR 06-24 20:22:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 first_token_cost:211.30919456481934ms total_cost_time:211.350679397583ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10453 prompt_cache_len:5151 prompt_cache_ratio:0.4927771931502918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 +DEBUG 06-24 20:22:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.10802078247070312 s +INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.10995340347290039 s +DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=5706802542527761967783740111423268254, time:1750767777.9442964s req_ids:[8] +DEBUG 06-24 20:22:57 [manager.py:391] +ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 first_token_cost:212.18442916870117ms total_cost_time:212.22519874572754ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10454 prompt_cache_len:5151 prompt_cache_ratio:0.49273005548115556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 +DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10858607292175293 s +INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.11070036888122559 s +DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=260124197293030158320695005734089767279, time:1750767778.1595263s req_ids:[8] +DEBUG 06-24 20:22:58 [manager.py:391] +ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:207.72099494934082ms total_cost_time:207.7641487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10455 prompt_cache_len:5151 prompt_cache_ratio:0.4926829268292683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 +DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10747456550598145 s +INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.10947561264038086 s +DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=284506228455272881463741301845932095545, time:1750767778.3708541s req_ids:[8] +DEBUG 06-24 20:22:58 [manager.py:391] +ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:209.38491821289062ms total_cost_time:209.4266414642334ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10456 prompt_cache_len:5151 prompt_cache_ratio:0.49263580719204286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 +DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10828375816345215 s +INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.11039876937866211 s +DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=156171080608819117782932720389248611509, time:1750767778.5856686s req_ids:[8] +DEBUG 06-24 20:22:58 [manager.py:391] +ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:209.41948890686035ms total_cost_time:209.46145057678223ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10457 prompt_cache_len:5151 prompt_cache_ratio:0.49258869656689297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 +DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10863661766052246 s +INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.11047744750976562 s +DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=269519166771300874000092730839186656302, time:1750767778.8052545s req_ids:[8] +DEBUG 06-24 20:22:58 [manager.py:391] +ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:216.19176864624023ms total_cost_time:216.23587608337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10458 prompt_cache_len:5151 prompt_cache_ratio:0.4925415949512335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 +DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10763788223266602 s +INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.10963726043701172 s +DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=272580322249888308427890228088957698166, time:1750767779.0249705s req_ids:[8] +DEBUG 06-24 20:22:59 [manager.py:391] +ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:382.07101821899414ms total_cost_time:382.11727142333984ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10459 prompt_cache_len:5151 prompt_cache_ratio:0.49249450234248016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 +DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10936546325683594 s +INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.11151838302612305 s +DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=151809333693191699252600625198969621802, time:1750767779.4054992s req_ids:[8] +DEBUG 06-24 20:22:59 [manager.py:391] +ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:208.64295959472656ms total_cost_time:208.68921279907227ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10460 prompt_cache_len:5151 prompt_cache_ratio:0.4924474187380497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 +DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10772490501403809 s +INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.10988807678222656 s +DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=127315185224529253195029196074611261460, time:1750767779.6194077s req_ids:[8] +DEBUG 06-24 20:22:59 [manager.py:391] +ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:211.8544578552246ms total_cost_time:211.897611618042ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10461 prompt_cache_len:5151 prompt_cache_ratio:0.49240034413535994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 +DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10922050476074219 s +INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.11114192008972168 s +DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=80138613385132679163632645306056136619, time:1750767779.8344874s req_ids:[8] +DEBUG 06-24 20:22:59 [manager.py:391] +ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:201.58672332763672ms total_cost_time:201.6289234161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10462 prompt_cache_len:5151 prompt_cache_ratio:0.4923532785318295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 +DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:22:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10767722129821777 s +INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.1097257137298584 s +DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=10792118494910909596023623338620067069, time:1750767780.0423303s req_ids:[8] +DEBUG 06-24 20:23:00 [manager.py:391] +ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:212.89348602294922ms total_cost_time:212.9373550415039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10463 prompt_cache_len:5151 prompt_cache_ratio:0.49230622192487816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 +DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10736083984375 s +INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.10946345329284668 s +DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=101626184689385828579756632635243552850, time:1750767780.2699656s req_ids:[8] +DEBUG 06-24 20:23:00 [manager.py:391] +ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:218.80555152893066ms total_cost_time:218.84846687316895ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10464 prompt_cache_len:5151 prompt_cache_ratio:0.4922591743119266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 +DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10758352279663086 s +INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.10966753959655762 s +DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=57605862771144922217086814013438927517, time:1750767780.4865959s req_ids:[8] +DEBUG 06-24 20:23:00 [manager.py:391] +ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:212.50510215759277ms total_cost_time:212.54968643188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10465 prompt_cache_len:5151 prompt_cache_ratio:0.49221213569039657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 +DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:00 [batch.py:51] router release req id 8 +INFO 06-24 20:23:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10919952392578125 s +INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.11125564575195312 s +DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=41438589456498988321709129699360881836, time:1750767780.704985s req_ids:[8] +DEBUG 06-24 20:23:00 [manager.py:391] +ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:370.46194076538086ms total_cost_time:370.50509452819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10466 prompt_cache_len:5151 prompt_cache_ratio:0.4921651060577107 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 +DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.10880327224731445 s +INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.11092638969421387 s +DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=41163294749033431170213538925187515621, time:1750767781.0783868s req_ids:[8] +DEBUG 06-24 20:23:01 [manager.py:391] +ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:206.45403861999512ms total_cost_time:206.4971923828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10467 prompt_cache_len:5151 prompt_cache_ratio:0.49211808541129265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 +DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.10809493064880371 s +INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.1103200912475586 s +DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=173632704893600197874911090120457634365, time:1750767781.2912803s req_ids:[8] +DEBUG 06-24 20:23:01 [manager.py:391] +ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:210.56842803955078ms total_cost_time:210.61420440673828ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10468 prompt_cache_len:5151 prompt_cache_ratio:0.49207107374856707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 +DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.1089630126953125 s +INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.11108183860778809 s +DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=215738003461033960067048973984508832112, time:1750767781.5060785s req_ids:[8] +DEBUG 06-24 20:23:01 [manager.py:391] +ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:205.47890663146973ms total_cost_time:205.5211067199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10469 prompt_cache_len:5151 prompt_cache_ratio:0.4920240710669596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 +DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.10775017738342285 s +INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.10981392860412598 s +DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=241713275119806770689790357155624972844, time:1750767781.7196922s req_ids:[8] +DEBUG 06-24 20:23:01 [manager.py:391] +ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:213.2251262664795ms total_cost_time:213.2706642150879ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10470 prompt_cache_len:5151 prompt_cache_ratio:0.49197707736389684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 +DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.1092219352722168 s +INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.11122632026672363 s +DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=291639848862992160669210662031483363667, time:1750767781.936995s req_ids:[8] +DEBUG 06-24 20:23:01 [manager.py:391] +DEBUG 06-24 20:23:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 44512.380 tokens/s +DEBUG 06-24 20:23:01 [stats.py:37] Avg prompt tokens throughput: 44503.863 tokens/s +DEBUG 06-24 20:23:01 [stats.py:37] Avg generate tokens throughput: 8.517 tokens/s +ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:200.73914527893066ms total_cost_time:200.78015327453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10471 prompt_cache_len:5151 prompt_cache_ratio:0.4919300926368064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 +DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.10751748085021973 s +INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.10916972160339355 s +DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=278853586091680011417897211241521476387, time:1750767782.1441522s req_ids:[8] +DEBUG 06-24 20:23:02 [manager.py:391] +ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:169.76141929626465ms total_cost_time:169.80290412902832ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10472 prompt_cache_len:5151 prompt_cache_ratio:0.49188311688311687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 +DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.10759472846984863 s +INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.10938096046447754 s +DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=306152459111477007214166901652572626413, time:1750767782.3230655s req_ids:[8] +DEBUG 06-24 20:23:02 [manager.py:391] +ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:365.7214641571045ms total_cost_time:365.7674789428711ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10473 prompt_cache_len:5151 prompt_cache_ratio:0.49183615010025783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 +DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.1078646183013916 s +INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.10965180397033691 s +DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=252022900533972657594114832070296127450, time:1750767782.687387s req_ids:[8] +DEBUG 06-24 20:23:02 [manager.py:391] +ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:163.83814811706543ms total_cost_time:163.8798713684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10474 prompt_cache_len:5151 prompt_cache_ratio:0.4917891922856597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 +DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.10855317115783691 s +INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.11039209365844727 s +DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=266100389871840263774030788711563824288, time:1750767782.8577383s req_ids:[8] +DEBUG 06-24 20:23:02 [manager.py:391] +DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:199.85413551330566ms total_cost_time:199.89705085754395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10475 prompt_cache_len:5151 prompt_cache_ratio:0.49174224343675416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 +DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10884881019592285 s +INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11089563369750977 s +DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=81240748417478525688089750528474676864, time:1750767783.0652025s req_ids:[8] +DEBUG 06-24 20:23:03 [manager.py:391] +ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:204.00500297546387ms total_cost_time:204.04958724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10476 prompt_cache_len:5151 prompt_cache_ratio:0.49169530355097363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 +DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10811710357666016 s +INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11011695861816406 s +DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=310797368702450596962733250694149652952, time:1750767783.2790608s req_ids:[8] +DEBUG 06-24 20:23:03 [manager.py:391] +ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:211.07172966003418ms total_cost_time:211.11440658569336ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10477 prompt_cache_len:5151 prompt_cache_ratio:0.4916483726257517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 +DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10810065269470215 s +INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11017513275146484 s +DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=225300777549212817003056728676960185560, time:1750767783.4926603s req_ids:[8] +DEBUG 06-24 20:23:03 [manager.py:391] +ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:205.7352066040039ms total_cost_time:205.7812213897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10478 prompt_cache_len:5151 prompt_cache_ratio:0.4916014506585226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 +DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10791897773742676 s +INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11005520820617676 s +DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=191792093288073743015213700754746311756, time:1750767783.7023287s req_ids:[8] +DEBUG 06-24 20:23:03 [manager.py:391] +ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:206.1171531677246ms total_cost_time:206.15911483764648ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10479 prompt_cache_len:5151 prompt_cache_ratio:0.491554537646722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 +DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10774660110473633 s +INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.10989999771118164 s +DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=294477285376858161126122115592757642872, time:1750767783.9156473s req_ids:[8] +DEBUG 06-24 20:23:03 [manager.py:391] +ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:212.56566047668457ms total_cost_time:212.60929107666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10480 prompt_cache_len:5151 prompt_cache_ratio:0.49150763358778626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 +DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10884714126586914 s +INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.11080050468444824 s +DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=50663458582324664747167879331006438306, time:1750767784.1326072s req_ids:[8] +DEBUG 06-24 20:23:04 [manager.py:391] +ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:367.51747131347656ms total_cost_time:367.56277084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10481 prompt_cache_len:5151 prompt_cache_ratio:0.4914607384791527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 +DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10897040367126465 s +INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.11092543601989746 s +DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=43145789675735818702076241218680629402, time:1750767784.5107534s req_ids:[8] +DEBUG 06-24 20:23:04 [manager.py:391] +ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:207.2737216949463ms total_cost_time:207.31687545776367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10482 prompt_cache_len:5151 prompt_cache_ratio:0.49141385231825985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 +DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10878944396972656 s +INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.11093568801879883 s +DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=150575292758730623191428989415665335439, time:1750767784.7262588s req_ids:[8] +DEBUG 06-24 20:23:04 [manager.py:391] +ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:209.5630168914795ms total_cost_time:209.60688591003418ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10483 prompt_cache_len:5151 prompt_cache_ratio:0.49136697510254695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 +DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10810184478759766 s +INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.1100456714630127 s +DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=36846644281662749423563983180010028874, time:1750767784.939978s req_ids:[8] +DEBUG 06-24 20:23:04 [manager.py:391] +ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:207.777738571167ms total_cost_time:207.8235149383545ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10484 prompt_cache_len:5151 prompt_cache_ratio:0.49132010682945443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 +DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10774493217468262 s +INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.10979700088500977 s +DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=213064408220166020891579270914192875526, time:1750767785.14998s req_ids:[8] +DEBUG 06-24 20:23:05 [manager.py:391] +ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:206.52174949645996ms total_cost_time:206.56538009643555ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10485 prompt_cache_len:5151 prompt_cache_ratio:0.4912732474964235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 +DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10796546936035156 s +INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.11000657081604004 s +DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=218780981206242701420356822070161866058, time:1750767785.3631122s req_ids:[8] +DEBUG 06-24 20:23:05 [manager.py:391] +ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:208.9996337890625ms total_cost_time:209.0439796447754ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10486 prompt_cache_len:5151 prompt_cache_ratio:0.49122639710089644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 +DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10928821563720703 s +INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.11124801635742188 s +DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=229545892437754620000139027760733219508, time:1750767785.577492s req_ids:[8] +DEBUG 06-24 20:23:05 [manager.py:391] +ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:204.1771411895752ms total_cost_time:204.21862602233887ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10487 prompt_cache_len:5151 prompt_cache_ratio:0.49117955564031657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 +DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10535836219787598 s +INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.10728192329406738 s +DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=30093450963617115161656054501090529296, time:1750767785.7879674s req_ids:[8] +DEBUG 06-24 20:23:05 [manager.py:391] +ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:205.39402961730957ms total_cost_time:205.43789863586426ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10488 prompt_cache_len:5151 prompt_cache_ratio:0.49113272311212813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 +DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10703635215759277 s +INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.10880780220031738 s +DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=137723828481734535150389609558279898463, time:1750767785.9999073s req_ids:[8] +DEBUG 06-24 20:23:06 [manager.py:391] +ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:375.3066062927246ms total_cost_time:375.3511905670166ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10489 prompt_cache_len:5151 prompt_cache_ratio:0.49108589951377635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 +DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.1089777946472168 s +INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.1110234260559082 s +DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=190431015586751524762106285687722969794, time:1750767786.3782377s req_ids:[8] +DEBUG 06-24 20:23:06 [manager.py:391] +ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:208.1315517425537ms total_cost_time:208.1742286682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10490 prompt_cache_len:5151 prompt_cache_ratio:0.49103908484270736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 +DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.10699129104614258 s +INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.1088259220123291 s +DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=120594010899663691325773467268633223684, time:1750767786.5925155s req_ids:[8] +DEBUG 06-24 20:23:06 [manager.py:391] +ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:169.4033145904541ms total_cost_time:169.44384574890137ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10491 prompt_cache_len:5151 prompt_cache_ratio:0.49099227909636833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 +DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.10834908485412598 s +INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.11026811599731445 s +DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=263983791762126028247243620869660656210, time:1750767786.7644932s req_ids:[8] +DEBUG 06-24 20:23:06 [manager.py:391] +ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:200.99902153015137ms total_cost_time:201.0185718536377ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:10492 prompt_cache_len:5151 prompt_cache_ratio:0.4909454822722074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 +DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.1085512638092041 s +INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s +DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=240028344186681621554208991752933577679, time:1750767786.9706728s req_ids:[8] +DEBUG 06-24 20:23:06 [manager.py:391] +ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:210.42442321777344ms total_cost_time:210.46853065490723ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10493 prompt_cache_len:5151 prompt_cache_ratio:0.4908986943676737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 +DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.10779929161071777 s +INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.10976457595825195 s +DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=262747532434907506117884079407748875214, time:1750767787.183103s req_ids:[8] +DEBUG 06-24 20:23:07 [manager.py:391] +ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:210.11590957641602ms total_cost_time:210.1764678955078ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10494 prompt_cache_len:5151 prompt_cache_ratio:0.4908519153802173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 +DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.1089942455291748 s +INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.11105012893676758 s +DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=284577445736752928889769921535287803629, time:1750767787.3979647s req_ids:[8] +DEBUG 06-24 20:23:07 [manager.py:391] +ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:210.7858657836914ms total_cost_time:210.82782745361328ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10495 prompt_cache_len:5151 prompt_cache_ratio:0.4908051453072892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 +DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.10880398750305176 s +INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.11085629463195801 s +DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=154481376097807786742278662746323362318, time:1750767787.613465s req_ids:[8] +DEBUG 06-24 20:23:07 [manager.py:391] +ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:209.92612838745117ms total_cost_time:209.97118949890137ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10496 prompt_cache_len:5151 prompt_cache_ratio:0.4907583841463415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 +DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.10907387733459473 s +INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.11106991767883301 s +DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=37382853190393095923771544333408533512, time:1750767787.826955s req_ids:[8] +DEBUG 06-24 20:23:07 [manager.py:391] +ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:207.5049877166748ms total_cost_time:207.54766464233398ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10497 prompt_cache_len:5151 prompt_cache_ratio:0.4907116318948271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 +DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.1074838638305664 s +INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.10942721366882324 s +DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=143062463020457958938120092966961442471, time:1750767788.0508592s req_ids:[8] +DEBUG 06-24 20:23:08 [manager.py:391] +ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:377.3953914642334ms total_cost_time:377.4404525756836ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10498 prompt_cache_len:5151 prompt_cache_ratio:0.4906648885502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 +DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.1086273193359375 s +INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.11040854454040527 s +DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=151898880458988532233913271108351243350, time:1750767788.418869s req_ids:[8] +DEBUG 06-24 20:23:08 [manager.py:391] +ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:165.03167152404785ms total_cost_time:165.07363319396973ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10499 prompt_cache_len:5151 prompt_cache_ratio:0.4906181541099152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 +DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.10852670669555664 s +INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.11049151420593262 s +DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=220034366819782506788911284879478910035, time:1750767788.5876021s req_ids:[8] +DEBUG 06-24 20:23:08 [manager.py:391] +DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:196.58255577087402ms total_cost_time:196.624755859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10500 prompt_cache_len:5151 prompt_cache_ratio:0.49057142857142855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 +DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.10750532150268555 s +INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.1099696159362793 s +DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=165370249540247868389171365045703401023, time:1750767788.7913153s req_ids:[8] +DEBUG 06-24 20:23:08 [manager.py:391] +ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:210.31832695007324ms total_cost_time:210.36314964294434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10501 prompt_cache_len:5151 prompt_cache_ratio:0.49052471193219693 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 +DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.10743188858032227 s +INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.10949254035949707 s +DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=46914548390978496425318454641099302077, time:1750767789.0054648s req_ids:[8] +DEBUG 06-24 20:23:09 [manager.py:391] +ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:208.7841033935547ms total_cost_time:208.82463455200195ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10502 prompt_cache_len:5151 prompt_cache_ratio:0.49047800418967813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 +DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:09 [manager.py:224] router recive req id 8 cost time 0.10836029052734375 s +INFO 06-24 20:23:09 [manager.py:68] detokenization recv req id 8 cost time 0.11055374145507812 s +DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=103392783865096425300454508626029030234, time:1750767789.2194126s req_ids:[8] +DEBUG 06-24 20:23:09 [manager.py:391] +ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:210.48998832702637ms total_cost_time:210.51263809204102ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:10503 prompt_cache_len:5151 prompt_cache_ratio:0.49043130534133106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 +DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:09 [manager.py:224] router recive req id 8 cost time 0.10521197319030762 s +INFO 06-24 20:23:09 [manager.py:68] detokenization recv req id 8 cost time 0.10705161094665527 s +DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=111043631714963482903786914078158498538, time:1750767789.4341786s req_ids:[8] +DEBUG 06-24 20:23:09 [manager.py:391] +ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:210.0672721862793ms total_cost_time:210.11114120483398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10504 prompt_cache_len:5151 prompt_cache_ratio:0.49038461538461536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 +DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:09 [manager.py:224] router recive req id 8 cost time 0.10747933387756348 s +INFO 06-24 20:23:09 [manager.py:68] detokenization recv req id 8 cost time 0.1093757152557373 s +DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=24059588089287389854361894552918399712, time:1750767789.6478791s req_ids:[8] +DEBUG 06-24 20:23:09 [manager.py:391] +ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:375.7820129394531ms total_cost_time:375.8258819580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10505 prompt_cache_len:5151 prompt_cache_ratio:0.4903379343169919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 +DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10855627059936523 s +INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.1104116439819336 s +DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=55212438606298325580934466211135352144, time:1750767790.0256085s req_ids:[8] +DEBUG 06-24 20:23:10 [manager.py:391] +ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:165.21215438842773ms total_cost_time:165.27271270751953ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10506 prompt_cache_len:5151 prompt_cache_ratio:0.49029126213592233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 +DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10851144790649414 s +INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s +DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=304434084715376283973451933997430115545, time:1750767790.1957963s req_ids:[8] +DEBUG 06-24 20:23:10 [manager.py:391] +ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:200.20079612731934ms total_cost_time:200.2429962158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10507 prompt_cache_len:5151 prompt_cache_ratio:0.49024459883886934 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 +DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s +INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11005067825317383 s +DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=281535885799806600632874478105830657322, time:1750767790.400307s req_ids:[8] +DEBUG 06-24 20:23:10 [manager.py:391] +ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:202.38637924194336ms total_cost_time:202.42905616760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10508 prompt_cache_len:5151 prompt_cache_ratio:0.49019794442329656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 +DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10832619667053223 s +INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11037182807922363 s +DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=15741999783332113079593550735428721288, time:1750767790.6075797s req_ids:[8] +DEBUG 06-24 20:23:10 [manager.py:391] +ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:213.37461471557617ms total_cost_time:213.41848373413086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10509 prompt_cache_len:5151 prompt_cache_ratio:0.4901512988866686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 +DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10882687568664551 s +INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11090445518493652 s +DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=75524396585592636383898815363052284148, time:1750767790.8243484s req_ids:[8] +DEBUG 06-24 20:23:10 [manager.py:391] +ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:208.23907852172852ms total_cost_time:208.2819938659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10510 prompt_cache_len:5151 prompt_cache_ratio:0.490104662226451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 +DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.1081385612487793 s +INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.11015939712524414 s +DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=139821773293256259985018704396819698635, time:1750767791.0376763s req_ids:[8] +DEBUG 06-24 20:23:11 [manager.py:391] +ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:209.14864540100098ms total_cost_time:209.19203758239746ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10511 prompt_cache_len:5151 prompt_cache_ratio:0.49005803444011037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 +DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.10781240463256836 s +INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.10995745658874512 s +DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=63553487088312537059409365035282471181, time:1750767791.2517588s req_ids:[8] +DEBUG 06-24 20:23:11 [manager.py:391] +ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:374.93014335632324ms total_cost_time:374.97520446777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10512 prompt_cache_len:5151 prompt_cache_ratio:0.4900114155251142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 +DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.10903644561767578 s +INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.11108827590942383 s +DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=89570451453719896526957637692585509599, time:1750767791.6315482s req_ids:[8] +DEBUG 06-24 20:23:11 [manager.py:391] +ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:207.6404094696045ms total_cost_time:207.68475532531738ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10513 prompt_cache_len:5151 prompt_cache_ratio:0.48996480547893084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 +DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s +INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.11004829406738281 s +DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=5659944395461967708871410630708063660, time:1750767791.8421195s req_ids:[8] +DEBUG 06-24 20:23:11 [manager.py:391] +ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:209.84339714050293ms total_cost_time:209.88702774047852ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10514 prompt_cache_len:5151 prompt_cache_ratio:0.4899182042990299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 +DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10714864730834961 s +INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.10872173309326172 s +DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=57675362637170016775144474276464150143, time:1750767792.0561054s req_ids:[8] +DEBUG 06-24 20:23:12 [manager.py:391] +DEBUG 06-24 20:23:12 [stats.py:37] Avg tokens(prompt+generate) throughput: 45638.292 tokens/s +DEBUG 06-24 20:23:12 [stats.py:37] Avg prompt tokens throughput: 45629.595 tokens/s +DEBUG 06-24 20:23:12 [stats.py:37] Avg generate tokens throughput: 8.697 tokens/s +ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:206.31790161132812ms total_cost_time:206.3612937927246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10515 prompt_cache_len:5151 prompt_cache_ratio:0.4898716119828816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 +DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.1087808609008789 s +INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.11095190048217773 s +DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=177663106032884925446632845769406428673, time:1750767792.2669654s req_ids:[8] +DEBUG 06-24 20:23:12 [manager.py:391] +ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:209.74230766296387ms total_cost_time:209.78665351867676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10516 prompt_cache_len:5151 prompt_cache_ratio:0.4898250285279574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 +DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10909175872802734 s +INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.1111760139465332 s +DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=135133832850244019395752144513071703506, time:1750767792.482907s req_ids:[8] +DEBUG 06-24 20:23:12 [manager.py:391] +ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:212.22543716430664ms total_cost_time:212.26906776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10517 prompt_cache_len:5151 prompt_cache_ratio:0.4897784539317296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 +DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10767626762390137 s +INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.10969352722167969 s +DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=207174962002017792972199957986713731324, time:1750767792.6973548s req_ids:[8] +DEBUG 06-24 20:23:12 [manager.py:391] +ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:208.5108757019043ms total_cost_time:208.5549831390381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10518 prompt_cache_len:5151 prompt_cache_ratio:0.4897318881916714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 +DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10821151733398438 s +INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.11024260520935059 s +DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=107353707203886606969466179385401519479, time:1750767792.9124122s req_ids:[8] +DEBUG 06-24 20:23:12 [manager.py:391] +ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:211.75146102905273ms total_cost_time:211.79509162902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10519 prompt_cache_len:5151 prompt_cache_ratio:0.48968533130525715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 +DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.30990147590637207 s +INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.3121316432952881 s +DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=68359018073290960545308886771519367661, time:1750767793.3278317s req_ids:[8] +DEBUG 06-24 20:23:13 [manager.py:391] +ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:414.87932205200195ms total_cost_time:414.92271423339844ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10520 prompt_cache_len:5151 prompt_cache_ratio:0.48963878326996196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 +DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.10780072212219238 s +INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.10975384712219238 s +DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=145957623393675098924069957399053325304, time:1750767793.546723s req_ids:[8] +DEBUG 06-24 20:23:13 [manager.py:391] +ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:208.10341835021973ms total_cost_time:208.14871788024902ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10521 prompt_cache_len:5151 prompt_cache_ratio:0.4895922440832621 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 +DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.10798382759094238 s +INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.1100015640258789 s +DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=293650215581887429273889851938353575045, time:1750767793.7598019s req_ids:[8] +DEBUG 06-24 20:23:13 [manager.py:391] +ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:210.0965976715088ms total_cost_time:210.14142036437988ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10522 prompt_cache_len:5151 prompt_cache_ratio:0.4895457137426345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 +DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.10884332656860352 s +INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.11082005500793457 s +DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=100975903948495585413790924824280540358, time:1750767793.9728243s req_ids:[8] +DEBUG 06-24 20:23:13 [manager.py:391] +ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:207.14592933654785ms total_cost_time:207.19027519226074ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10523 prompt_cache_len:5151 prompt_cache_ratio:0.4894991922455573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 +DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10815882682800293 s +INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.11005067825317383 s +DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=153453962920608169506472202848014528148, time:1750767794.184267s req_ids:[8] +DEBUG 06-24 20:23:14 [manager.py:391] +ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:207.96513557434082ms total_cost_time:208.01019668579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10524 prompt_cache_len:5151 prompt_cache_ratio:0.48945267958950966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 +DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10914397239685059 s +INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.11098837852478027 s +DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=19794675849032432206496704358834112755, time:1750767794.3985043s req_ids:[8] +DEBUG 06-24 20:23:14 [manager.py:391] +ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:210.96038818359375ms total_cost_time:211.00568771362305ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10525 prompt_cache_len:5151 prompt_cache_ratio:0.4894061757719715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 +DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10784387588500977 s +INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.1098334789276123 s +DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=181922587957900643269876737508005884083, time:1750767794.6149604s req_ids:[8] +DEBUG 06-24 20:23:14 [manager.py:391] +ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:212.74828910827637ms total_cost_time:212.79215812683105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10526 prompt_cache_len:5151 prompt_cache_ratio:0.4893596807904237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 +DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10597610473632812 s +INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.10796594619750977 s +DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=323006020968430285436397838857655278831, time:1750767794.8301084s req_ids:[8] +DEBUG 06-24 20:23:14 [manager.py:391] +ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:364.92252349853516ms total_cost_time:364.96710777282715ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10527 prompt_cache_len:5151 prompt_cache_ratio:0.48931319464234824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 +DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10895466804504395 s +INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.11102509498596191 s +DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=102201014220438848733240371042768520752, time:1750767795.1982744s req_ids:[8] +DEBUG 06-24 20:23:15 [manager.py:391] +ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:201.7369270324707ms total_cost_time:201.7807960510254ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10528 prompt_cache_len:5151 prompt_cache_ratio:0.489266717325228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 +DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10917949676513672 s +INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.11102151870727539 s +DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=8658408790011574165926964006319192123, time:1750767795.4109485s req_ids:[8] +DEBUG 06-24 20:23:15 [manager.py:391] +ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:212.59593963623047ms total_cost_time:212.63885498046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10529 prompt_cache_len:5151 prompt_cache_ratio:0.4892202488365467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 +DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10801982879638672 s +INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013579368591309 s +DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=77542708209534337192191955309151391650, time:1750767795.6236284s req_ids:[8] +DEBUG 06-24 20:23:15 [manager.py:391] +ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:211.57503128051758ms total_cost_time:211.63010597229004ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:10530 prompt_cache_len:5151 prompt_cache_ratio:0.4891737891737892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 +DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10848355293273926 s +INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.1103982925415039 s +DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=205930217717701043080726446567956603082, time:1750767795.8382573s req_ids:[8] +DEBUG 06-24 20:23:15 [manager.py:391] +DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:210.5088233947754ms total_cost_time:210.55293083190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10531 prompt_cache_len:5151 prompt_cache_ratio:0.48912733833444116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 +DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10814929008483887 s +INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11003756523132324 s +DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=207788433510289404428365533763712853781, time:1750767796.053094s req_ids:[8] +DEBUG 06-24 20:23:16 [manager.py:391] +ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:208.59527587890625ms total_cost_time:208.63938331604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10532 prompt_cache_len:5151 prompt_cache_ratio:0.4890808963159894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 +DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10886836051940918 s +INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s +DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=256434674743195982114371851166603146737, time:1750767796.266375s req_ids:[8] +DEBUG 06-24 20:23:16 [manager.py:391] +ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:208.8487148284912ms total_cost_time:208.8923454284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10533 prompt_cache_len:5151 prompt_cache_ratio:0.4890344631159214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 +DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10881400108337402 s +INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11084771156311035 s +DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=94169743975114847569830510820187016435, time:1750767796.4793234s req_ids:[8] +DEBUG 06-24 20:23:16 [manager.py:391] +ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:207.61370658874512ms total_cost_time:207.6563835144043ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10534 prompt_cache_len:5151 prompt_cache_ratio:0.48898803873172586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 +DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10792875289916992 s +INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11015033721923828 s +DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=225691613820244617850568937976307226938, time:1750767796.6911275s req_ids:[8] +DEBUG 06-24 20:23:16 [manager.py:391] +ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:366.84584617614746ms total_cost_time:366.89209938049316ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10535 prompt_cache_len:5151 prompt_cache_ratio:0.4889416231608923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 +DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:16 [batch.py:51] router release req id 8 +INFO 06-24 20:23:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10950303077697754 s +INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.11141800880432129 s +DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=154499013009289501225264090005648834604, time:1750767797.0611374s req_ids:[8] +DEBUG 06-24 20:23:17 [manager.py:391] +ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:210.44468879699707ms total_cost_time:210.48712730407715ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10536 prompt_cache_len:5151 prompt_cache_ratio:0.48889521640091116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 +DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10916352272033691 s +INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.1111912727355957 s +DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=163983926928818824706495166708941450984, time:1750767797.2772782s req_ids:[8] +DEBUG 06-24 20:23:17 [manager.py:391] +ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:208.41193199157715ms total_cost_time:208.45508575439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10537 prompt_cache_len:5151 prompt_cache_ratio:0.48884881844927397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 +DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10790801048278809 s +INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.10983133316040039 s +DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=76070627260413980981175792313749074692, time:1750767797.4907858s req_ids:[8] +DEBUG 06-24 20:23:17 [manager.py:391] +ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:210.27159690856934ms total_cost_time:210.31498908996582ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10538 prompt_cache_len:5151 prompt_cache_ratio:0.4888024293034731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 +DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10717511177062988 s +INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.10902643203735352 s +DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=271685061277719993712582177868272290572, time:1750767797.705348s req_ids:[8] +DEBUG 06-24 20:23:17 [manager.py:391] +ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:165.85183143615723ms total_cost_time:165.8928394317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10539 prompt_cache_len:5151 prompt_cache_ratio:0.488756048961002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 +DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.1081075668334961 s +INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.10997128486633301 s +DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=261552380299926029620334085981128685999, time:1750767797.8752513s req_ids:[8] +DEBUG 06-24 20:23:17 [manager.py:391] +ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:199.42259788513184ms total_cost_time:199.46694374084473ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10540 prompt_cache_len:5151 prompt_cache_ratio:0.48870967741935484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 +DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.10916519165039062 s +INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.1111297607421875 s +DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=232785756382693521180138208655647740286, time:1750767798.0819907s req_ids:[8] +DEBUG 06-24 20:23:18 [manager.py:391] +ERROR 06-24 20:23:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:206.528902053833ms total_cost_time:206.5894603729248ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10541 prompt_cache_len:5151 prompt_cache_ratio:0.4886633146760269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 +DEBUG 06-24 20:23:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.10783815383911133 s +INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.10984992980957031 s +DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=280379045392154242532510553331309286785, time:1750767798.293527s req_ids:[8] +DEBUG 06-24 20:23:18 [manager.py:391] +ERROR 06-24 20:23:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 first_token_cost:203.64832878112793ms total_cost_time:203.6893367767334ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10542 prompt_cache_len:5151 prompt_cache_ratio:0.4886169607285145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 +DEBUG 06-24 20:23:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.10780787467956543 s +INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.1099240779876709 s +DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=58916834652057400942515487938459448049, time:1750767798.504073s req_ids:[8] +DEBUG 06-24 20:23:18 [manager.py:391] +ERROR 06-24 20:23:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 first_token_cost:209.14363861083984ms total_cost_time:209.18750762939453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10543 prompt_cache_len:5151 prompt_cache_ratio:0.4885706155743147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 +DEBUG 06-24 20:23:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.3103036880493164 s +INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.31230664253234863 s +DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=122757105661462498315367893442956340916, time:1750767798.925997s req_ids:[8] +DEBUG 06-24 20:23:18 [manager.py:391] +ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 first_token_cost:418.88976097106934ms total_cost_time:418.9324378967285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10544 prompt_cache_len:5151 prompt_cache_ratio:0.48852427921092567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 +DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10802412033081055 s +INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11019182205200195 s +DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=289668057849387587738812066190441142892, time:1750767799.144604s req_ids:[8] +DEBUG 06-24 20:23:19 [manager.py:391] +ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:209.9313735961914ms total_cost_time:209.9757194519043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10545 prompt_cache_len:5151 prompt_cache_ratio:0.4884779516358464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 +DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10834431648254395 s +INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11027336120605469 s +DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=230445750422319362798883249835658808677, time:1750767799.3600543s req_ids:[8] +DEBUG 06-24 20:23:19 [manager.py:391] +ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:209.19513702392578ms total_cost_time:209.23781394958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10546 prompt_cache_len:5151 prompt_cache_ratio:0.4884316328465769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 +DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10862970352172852 s +INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.1108396053314209 s +DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=27761550774756758275482656260098989074, time:1750767799.575164s req_ids:[8] +DEBUG 06-24 20:23:19 [manager.py:391] +ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:207.98683166503906ms total_cost_time:208.02998542785645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10547 prompt_cache_len:5151 prompt_cache_ratio:0.4883853228406182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 +DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10860228538513184 s +INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11061239242553711 s +DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=230169112702596070491529601845165931442, time:1750767799.787658s req_ids:[8] +DEBUG 06-24 20:23:19 [manager.py:391] +ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:209.49578285217285ms total_cost_time:209.54179763793945ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10548 prompt_cache_len:5151 prompt_cache_ratio:0.4883390216154721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 +DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10814666748046875 s +INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11004304885864258 s +DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=200096191489901066678812064764128600112, time:1750767800.0016806s req_ids:[8] +DEBUG 06-24 20:23:20 [manager.py:391] +ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:204.50401306152344ms total_cost_time:204.54716682434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10549 prompt_cache_len:5151 prompt_cache_ratio:0.4882927291686416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 +DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.1071479320526123 s +INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.10896968841552734 s +DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=140621637521862760995481892467228551119, time:1750767800.2124295s req_ids:[8] +DEBUG 06-24 20:23:20 [manager.py:391] +ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:166.36371612548828ms total_cost_time:166.40496253967285ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10550 prompt_cache_len:5151 prompt_cache_ratio:0.48824644549763035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 +DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.1080629825592041 s +INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.11005377769470215 s +DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=74824843944110781401531505610786734471, time:1750767800.383586s req_ids:[8] +DEBUG 06-24 20:23:20 [manager.py:391] +ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:365.23914337158203ms total_cost_time:365.2834892272949ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10551 prompt_cache_len:5151 prompt_cache_ratio:0.48820017059994314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 +DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.10902810096740723 s +INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.11103272438049316 s +DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=193120891851973332498648182633609516190, time:1750767800.75355s req_ids:[8] +DEBUG 06-24 20:23:20 [manager.py:391] +ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:206.0999870300293ms total_cost_time:206.1440944671631ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10552 prompt_cache_len:5151 prompt_cache_ratio:0.48815390447308565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 +DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.10897541046142578 s +INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.11097311973571777 s +DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=258036025765972859156200257858143711777, time:1750767800.9670439s req_ids:[8] +DEBUG 06-24 20:23:20 [manager.py:391] +ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:208.5883617401123ms total_cost_time:208.6331844329834ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10553 prompt_cache_len:5151 prompt_cache_ratio:0.48810764711456456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 +DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10776066780090332 s +INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.10967063903808594 s +DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=297206624226052054167233196753259654715, time:1750767801.1924891s req_ids:[8] +DEBUG 06-24 20:23:21 [manager.py:391] +ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:219.54870223999023ms total_cost_time:219.59233283996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10554 prompt_cache_len:5151 prompt_cache_ratio:0.48806139852188746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 +DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10935735702514648 s +INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.11148405075073242 s +DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=298425519050738719949869265975783826697, time:1750767801.4076734s req_ids:[8] +DEBUG 06-24 20:23:21 [manager.py:391] +ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:209.34271812438965ms total_cost_time:209.38587188720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10555 prompt_cache_len:5151 prompt_cache_ratio:0.4880151586925628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 +DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10911273956298828 s +INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.1112065315246582 s +DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=79932727332359486502320936854070590208, time:1750767801.6230245s req_ids:[8] +DEBUG 06-24 20:23:21 [manager.py:391] +ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:207.71455764770508ms total_cost_time:207.75675773620605ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10556 prompt_cache_len:5151 prompt_cache_ratio:0.48796892762410005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 +DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10787248611450195 s +INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.10994982719421387 s +DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=55003324000668117483760985119531555770, time:1750767801.8347437s req_ids:[8] +DEBUG 06-24 20:23:21 [manager.py:391] +ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:208.07194709777832ms total_cost_time:208.1146240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10557 prompt_cache_len:5151 prompt_cache_ratio:0.48792270531400966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 +DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10726261138916016 s +INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.10909557342529297 s +DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=316414676733546544463998249901221899824, time:1750767802.0506444s req_ids:[8] +DEBUG 06-24 20:23:22 [manager.py:391] +ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:23:22 [stats.py:37] Avg tokens(prompt+generate) throughput: 44387.100 tokens/s +DEBUG 06-24 20:23:22 [stats.py:37] Avg prompt tokens throughput: 44378.579 tokens/s +DEBUG 06-24 20:23:22 [stats.py:37] Avg generate tokens throughput: 8.521 tokens/s +INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:333.85205268859863ms total_cost_time:333.8966369628906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10558 prompt_cache_len:5151 prompt_cache_ratio:0.48787649175980297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 +DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s +INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.10979080200195312 s +DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=281390346247682185979026902050459277796, time:1750767802.38847s req_ids:[8] +DEBUG 06-24 20:23:22 [manager.py:391] +ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:193.6802864074707ms total_cost_time:193.7251091003418ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10559 prompt_cache_len:5151 prompt_cache_ratio:0.48783028695899233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 +DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s +INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.11113619804382324 s +DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=35478402712308417166143843208469603603, time:1750767802.5893872s req_ids:[8] +DEBUG 06-24 20:23:22 [manager.py:391] +ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:205.81889152526855ms total_cost_time:205.86371421813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10560 prompt_cache_len:5151 prompt_cache_ratio:0.4877840909090909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 +DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10867547988891602 s +INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.11062288284301758 s +DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=196644603409985866450823290147185251338, time:1750767802.8003201s req_ids:[8] +DEBUG 06-24 20:23:22 [manager.py:391] +ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:204.57220077514648ms total_cost_time:204.61535453796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10561 prompt_cache_len:5151 prompt_cache_ratio:0.48773790360761293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 +DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.10889482498168945 s +INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.1109776496887207 s +DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=243676183230123385063439111350281356370, time:1750767803.0105016s req_ids:[8] +DEBUG 06-24 20:23:23 [manager.py:391] +DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:206.38132095336914ms total_cost_time:206.42375946044922ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10562 prompt_cache_len:5151 prompt_cache_ratio:0.4876917250520735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 +DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:23 [batch.py:51] router release req id 8 +INFO 06-24 20:23:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:23:23 [statics_utils.py:24] mean first cost: 228.40219983119954 ms +INFO 06-24 20:23:23 [statics_utils.py:24] mean per token cost: 0.06715881479464118 ms +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.10754632949829102 s +INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.10965466499328613 s +DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=5870643599275590524862380036959005878, time:1750767803.2238207s req_ids:[8] +DEBUG 06-24 20:23:23 [manager.py:391] +ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:207.0775032043457ms total_cost_time:207.11946487426758ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10563 prompt_cache_len:5151 prompt_cache_ratio:0.48764555523998865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 +DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.10800886154174805 s +INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.11006474494934082 s +DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=105230735345434795187875896015195942851, time:1750767803.4367042s req_ids:[8] +DEBUG 06-24 20:23:23 [manager.py:391] +ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:208.6644172668457ms total_cost_time:208.7085247039795ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10564 prompt_cache_len:5151 prompt_cache_ratio:0.48759939416887543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 +DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.1082766056060791 s +INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.1102759838104248 s +DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=38289967942352887997750733340412377668, time:1750767803.651139s req_ids:[8] +DEBUG 06-24 20:23:23 [manager.py:391] +ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:385.2496147155762ms total_cost_time:385.2953910827637ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10565 prompt_cache_len:5151 prompt_cache_ratio:0.48755324183625176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 +DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s +INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.10891222953796387 s +DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=130898138005803516014141643763082471336, time:1750767804.051288s req_ids:[8] +DEBUG 06-24 20:23:24 [manager.py:391] +ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:178.18546295166016ms total_cost_time:178.22718620300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10566 prompt_cache_len:5151 prompt_cache_ratio:0.48750709823963656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 +DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10861063003540039 s +INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.11049389839172363 s +DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=339517920910523521811597183965644957664, time:1750767804.2253027s req_ids:[8] +DEBUG 06-24 20:23:24 [manager.py:391] +ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:194.6084499359131ms total_cost_time:194.65208053588867ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10567 prompt_cache_len:5151 prompt_cache_ratio:0.48746096337654965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 +DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.1086721420288086 s +INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.11075162887573242 s +DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=61941015305183658364223702153394623461, time:1750767804.4225938s req_ids:[8] +DEBUG 06-24 20:23:24 [manager.py:391] +ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:201.49874687194824ms total_cost_time:201.5397548675537ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10568 prompt_cache_len:5151 prompt_cache_ratio:0.48741483724451173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 +DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10868644714355469 s +INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.11084723472595215 s +DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=179471736249085892245051118598414867791, time:1750767804.630419s req_ids:[8] +DEBUG 06-24 20:23:24 [manager.py:391] +ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:206.5720558166504ms total_cost_time:206.59780502319336ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:10569 prompt_cache_len:5151 prompt_cache_ratio:0.4873687198410446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 +DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10706186294555664 s +INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.10909080505371094 s +DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=310183199270551091418164842669890798865, time:1750767804.8465369s req_ids:[8] +DEBUG 06-24 20:23:24 [manager.py:391] +ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:204.6051025390625ms total_cost_time:204.63013648986816ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:10570 prompt_cache_len:5151 prompt_cache_ratio:0.48732261116367076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 +DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10719609260559082 s +INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.10936093330383301 s +DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=268060445851607364708633959200458605647, time:1750767805.053917s req_ids:[8] +DEBUG 06-24 20:23:25 [manager.py:391] +ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:208.9698314666748ms total_cost_time:208.99605751037598ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:10571 prompt_cache_len:5151 prompt_cache_ratio:0.48727651120991394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 +DEBUG 06-24 20:23:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10809087753295898 s +INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s +DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=82041548072377950844375702232596312562, time:1750767805.2699723s req_ids:[8] +DEBUG 06-24 20:23:25 [manager.py:391] +ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 first_token_cost:207.99851417541504ms total_cost_time:208.02617073059082ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:10572 prompt_cache_len:5151 prompt_cache_ratio:0.48723041997729855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 +DEBUG 06-24 20:23:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10698366165161133 s +INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.10906767845153809 s +DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=277143668288910730814503220056621954306, time:1750767805.483336s req_ids:[8] +DEBUG 06-24 20:23:25 [manager.py:391] +ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 first_token_cost:417.11974143981934ms total_cost_time:417.1462059020996ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:10573 prompt_cache_len:5151 prompt_cache_ratio:0.48718433746335005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 +DEBUG 06-24 20:23:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10743594169616699 s +INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.10943746566772461 s +DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=186367774265478952694738383213051807404, time:1750767805.9081852s req_ids:[8] +DEBUG 06-24 20:23:25 [manager.py:391] +ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 first_token_cost:215.88540077209473ms total_cost_time:215.9113883972168ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:10574 prompt_cache_len:5151 prompt_cache_ratio:0.4871382636655949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 +DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10758638381958008 s +INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10956048965454102 s +DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=260159047027369305044269385414180651744, time:1750767806.123888s req_ids:[8] +DEBUG 06-24 20:23:26 [manager.py:391] +ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:211.10916137695312ms total_cost_time:211.134672164917ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:10575 prompt_cache_len:5151 prompt_cache_ratio:0.4870921985815603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 +DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10678219795227051 s +INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10883617401123047 s +DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=63895385407009942945228935080394885652, time:1750767806.341533s req_ids:[8] +DEBUG 06-24 20:23:26 [manager.py:391] +ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:208.8625431060791ms total_cost_time:208.88900756835938ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:10576 prompt_cache_len:5151 prompt_cache_ratio:0.48704614220877457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 +DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10753870010375977 s +INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10946846008300781 s +DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=138891744844753793670024022334827493990, time:1750767806.5557919s req_ids:[8] +DEBUG 06-24 20:23:26 [manager.py:391] +ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:213.1509780883789ms total_cost_time:213.19580078125ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10577 prompt_cache_len:5151 prompt_cache_ratio:0.48700009454476695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 +DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10870146751403809 s +INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.11069035530090332 s +DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=233933099612379178827063512492090623776, time:1750767806.772135s req_ids:[8] +DEBUG 06-24 20:23:26 [manager.py:391] +ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:209.1667652130127ms total_cost_time:209.19418334960938ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:10578 prompt_cache_len:5151 prompt_cache_ratio:0.4869540555870675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 +DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10773181915283203 s +INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10970902442932129 s +DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=76633448612257674462946007913337663421, time:1750767806.9880245s req_ids:[8] +DEBUG 06-24 20:23:26 [manager.py:391] +ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:210.70098876953125ms total_cost_time:210.72626113891602ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:10579 prompt_cache_len:5151 prompt_cache_ratio:0.4869080253332073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 +DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:27 [manager.py:224] router recive req id 8 cost time 0.1075592041015625 s +INFO 06-24 20:23:27 [manager.py:68] detokenization recv req id 8 cost time 0.10967683792114258 s +DEBUG 06-24 20:23:27 [manager.py:391] Prefill Batch: batch_id=240835447256370895263204467819486178862, time:1750767807.202556s req_ids:[8] +DEBUG 06-24 20:23:27 [manager.py:391] +ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:210.0231647491455ms total_cost_time:210.0508213043213ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:10580 prompt_cache_len:5151 prompt_cache_ratio:0.4868620037807183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 +DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:27 [manager.py:224] router recive req id 8 cost time 0.1074669361114502 s +INFO 06-24 20:23:27 [manager.py:68] detokenization recv req id 8 cost time 0.10967493057250977 s +DEBUG 06-24 20:23:27 [manager.py:391] Prefill Batch: batch_id=73455545078193044399765939502571278176, time:1750767807.4185908s req_ids:[8] +DEBUG 06-24 20:23:27 [manager.py:391] +ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:378.8790702819824ms total_cost_time:378.9057731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:10581 prompt_cache_len:5151 prompt_cache_ratio:0.48681599092713357 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 +DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:27 [manager.py:224] router recive req id 8 cost time 0.10722684860229492 s +INFO 06-24 20:23:27 [manager.py:68] detokenization recv req id 8 cost time 0.10934042930603027 s +DEBUG 06-24 20:23:27 [manager.py:391] Prefill Batch: batch_id=287109929602429456417041128143866934349, time:1750767807.801624s req_ids:[8] +DEBUG 06-24 20:23:27 [manager.py:391] +ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:207.99851417541504ms total_cost_time:208.02545547485352ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:10582 prompt_cache_len:5151 prompt_cache_ratio:0.4867699867699868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 +DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10712623596191406 s +INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.10898256301879883 s +DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=320503679515350941497512093487547089939, time:1750767808.0156s req_ids:[8] +DEBUG 06-24 20:23:28 [manager.py:391] +ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:209.46049690246582ms total_cost_time:209.48266983032227ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:10583 prompt_cache_len:5151 prompt_cache_ratio:0.4867239913068128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 +DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.1068272590637207 s +INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.10893821716308594 s +DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=213639688990821776167126999704675522673, time:1750767808.2301686s req_ids:[8] +DEBUG 06-24 20:23:28 [manager.py:391] +ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:193.8636302947998ms total_cost_time:193.90416145324707ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10584 prompt_cache_len:5151 prompt_cache_ratio:0.48667800453514737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 +DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10840320587158203 s +INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.11036419868469238 s +DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=90950471142427081649407821093144862325, time:1750767808.4258635s req_ids:[8] +DEBUG 06-24 20:23:28 [manager.py:391] +ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:208.33873748779297ms total_cost_time:208.38117599487305ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10585 prompt_cache_len:5151 prompt_cache_ratio:0.4866320264525272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 +DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10861039161682129 s +INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.11063122749328613 s +DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=171074563834729134138899235368959273302, time:1750767808.6405563s req_ids:[8] +DEBUG 06-24 20:23:28 [manager.py:391] +ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:202.03256607055664ms total_cost_time:202.07762718200684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10586 prompt_cache_len:5151 prompt_cache_ratio:0.4865860570564897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 +DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10809993743896484 s +INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.11000466346740723 s +DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=107040977703879795177255903976388551349, time:1750767808.849747s req_ids:[8] +DEBUG 06-24 20:23:28 [manager.py:391] +DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:206.831693649292ms total_cost_time:206.87389373779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10587 prompt_cache_len:5151 prompt_cache_ratio:0.4865400963445735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 +DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10877466201782227 s +INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.11095690727233887 s +DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=334223754105166662184624036780787201810, time:1750767809.060972s req_ids:[8] +DEBUG 06-24 20:23:29 [manager.py:391] +ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:198.8990306854248ms total_cost_time:198.9421844482422ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10588 prompt_cache_len:5151 prompt_cache_ratio:0.4864941443143181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 +DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10899925231933594 s +INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.1110072135925293 s +DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=142120503313511755528130963689549811088, time:1750767809.267129s req_ids:[8] +DEBUG 06-24 20:23:29 [manager.py:391] +ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:206.87484741210938ms total_cost_time:206.91776275634766ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10589 prompt_cache_len:5151 prompt_cache_ratio:0.48644820096326374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 +DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10847067832946777 s +INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.11034178733825684 s +DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=283039935415706212664921653027795952089, time:1750767809.4805195s req_ids:[8] +DEBUG 06-24 20:23:29 [manager.py:391] +ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:352.4484634399414ms total_cost_time:352.4930477142334ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10590 prompt_cache_len:5151 prompt_cache_ratio:0.48640226628895183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 +DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10902261734008789 s +INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.1110677719116211 s +DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=42079004799456599193261064600250232129, time:1750767809.8659406s req_ids:[8] +DEBUG 06-24 20:23:29 [manager.py:391] +ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:234.1439723968506ms total_cost_time:234.18855667114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10591 prompt_cache_len:5151 prompt_cache_ratio:0.48635634028892455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 +DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10889101028442383 s +INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.11081385612487793 s +DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=126775619072673405241715471735740029887, time:1750767810.0777638s req_ids:[8] +DEBUG 06-24 20:23:30 [manager.py:391] +ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:206.05850219726562ms total_cost_time:206.1009407043457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10592 prompt_cache_len:5151 prompt_cache_ratio:0.4863104229607251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 +DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10877323150634766 s +INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.11083030700683594 s +DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=166323288787399514626021471323322929834, time:1750767810.290168s req_ids:[8] +DEBUG 06-24 20:23:30 [manager.py:391] +DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:206.1154842376709ms total_cost_time:206.1600685119629ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10593 prompt_cache_len:5151 prompt_cache_ratio:0.4862645143018975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 +DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10884332656860352 s +INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.1109318733215332 s +DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=198335673821234505339724825430855145339, time:1750767810.501339s req_ids:[8] +DEBUG 06-24 20:23:30 [manager.py:391] +ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:201.77793502807617ms total_cost_time:201.82228088378906ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10594 prompt_cache_len:5151 prompt_cache_ratio:0.4862186143099868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 +DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.1074223518371582 s +INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.10935688018798828 s +DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=48102405358065047708546072187677442745, time:1750767810.70928s req_ids:[8] +DEBUG 06-24 20:23:30 [manager.py:391] +ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:199.67269897460938ms total_cost_time:199.71632957458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10595 prompt_cache_len:5151 prompt_cache_ratio:0.48617272298253894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 +DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10909032821655273 s +INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110081672668457 s +DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=208379217200951301345986660449640277309, time:1750767810.9157777s req_ids:[8] +DEBUG 06-24 20:23:30 [manager.py:391] +ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:205.69992065429688ms total_cost_time:205.74498176574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10596 prompt_cache_len:5151 prompt_cache_ratio:0.4861268403171008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 +DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s +INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11094284057617188 s +DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=34507118452913979053388996677929473451, time:1750767811.1277444s req_ids:[8] +DEBUG 06-24 20:23:31 [manager.py:391] +ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:372.27654457092285ms total_cost_time:372.32208251953125ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10597 prompt_cache_len:5151 prompt_cache_ratio:0.48608096631122016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 +DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10842514038085938 s +INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11046552658081055 s +DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=327505332942232623446345904272220035545, time:1750767811.5060194s req_ids:[8] +DEBUG 06-24 20:23:31 [manager.py:391] +ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:206.6490650177002ms total_cost_time:206.6946029663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10598 prompt_cache_len:5151 prompt_cache_ratio:0.48603510096244573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 +DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10845232009887695 s +INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11046886444091797 s +DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=240139758206292863285115753611639092354, time:1750767811.7210624s req_ids:[8] +DEBUG 06-24 20:23:31 [manager.py:391] +ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:208.39929580688477ms total_cost_time:208.44435691833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10599 prompt_cache_len:5151 prompt_cache_ratio:0.4859892442683272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 +DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:31 [batch.py:51] router release req id 8 +DEBUG 06-24 20:23:31 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:31 [manager.py:283] +DEBUG 06-24 20:23:31 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:31 [manager.py:284] +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10869789123535156 s +INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11061882972717285 s +DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=272148052655120584761169423874977634142, time:1750767811.9344182s req_ids:[8] +DEBUG 06-24 20:23:31 [manager.py:391] +ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:207.38983154296875ms total_cost_time:207.43441581726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10600 prompt_cache_len:5151 prompt_cache_ratio:0.4859433962264151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 +DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.1087801456451416 s +INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.11084532737731934 s +DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=271417524959698733290066442598351556951, time:1750767812.1493838s req_ids:[8] +DEBUG 06-24 20:23:32 [manager.py:391] +ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:205.77478408813477ms total_cost_time:205.81698417663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10601 prompt_cache_len:5151 prompt_cache_ratio:0.48589755683426095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 +DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.10793566703796387 s +INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.10999536514282227 s +DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=222678921241954460341327115475400846160, time:1750767812.3599384s req_ids:[8] +DEBUG 06-24 20:23:32 [manager.py:391] +DEBUG 06-24 20:23:32 [stats.py:37] Avg tokens(prompt+generate) throughput: 46126.509 tokens/s +DEBUG 06-24 20:23:32 [stats.py:37] Avg prompt tokens throughput: 46117.891 tokens/s +DEBUG 06-24 20:23:32 [stats.py:37] Avg generate tokens throughput: 8.618 tokens/s +ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:208.22834968566895ms total_cost_time:208.27269554138184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10602 prompt_cache_len:5151 prompt_cache_ratio:0.48585172608941707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 +DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.1091618537902832 s +INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.11108589172363281 s +DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=316326715217238276791746487440939986756, time:1750767812.575488s req_ids:[8] +DEBUG 06-24 20:23:32 [manager.py:391] +ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:209.24091339111328ms total_cost_time:209.28406715393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10603 prompt_cache_len:5151 prompt_cache_ratio:0.48580590398943696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 +DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.10836577415466309 s +INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.1104574203491211 s +DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=139749630265199994937330590862862922937, time:1750767812.7903078s req_ids:[8] +DEBUG 06-24 20:23:32 [manager.py:391] +ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:378.5426616668701ms total_cost_time:378.5865306854248ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10604 prompt_cache_len:5151 prompt_cache_ratio:0.48576009053187474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 +DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.10768985748291016 s +INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.10965514183044434 s +DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=340097525144110809251933922488635915205, time:1750767813.1737452s req_ids:[8] +DEBUG 06-24 20:23:33 [manager.py:391] +ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:200.30784606933594ms total_cost_time:200.35028457641602ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10605 prompt_cache_len:5151 prompt_cache_ratio:0.4857142857142857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 +DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.1081690788269043 s +INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.11021018028259277 s +DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=297397482124910181749692685116025943499, time:1750767813.3803527s req_ids:[8] +DEBUG 06-24 20:23:33 [manager.py:391] +ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:203.78589630126953ms total_cost_time:203.83048057556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10606 prompt_cache_len:5151 prompt_cache_ratio:0.4856684895342259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 +DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.10802316665649414 s +INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.11011195182800293 s +DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=84917473804953672582943777154564435861, time:1750767813.593819s req_ids:[8] +DEBUG 06-24 20:23:33 [manager.py:391] +ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:204.62679862976074ms total_cost_time:204.67138290405273ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10607 prompt_cache_len:5151 prompt_cache_ratio:0.4856227019892524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 +DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s +INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.11055707931518555 s +DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=137115911546530998967570934236193167372, time:1750767813.802926s req_ids:[8] +DEBUG 06-24 20:23:33 [manager.py:391] +ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:206.22706413269043ms total_cost_time:206.27164840698242ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10608 prompt_cache_len:5151 prompt_cache_ratio:0.4855769230769231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 +DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.10810399055480957 s +INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.10992884635925293 s +DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=162908782544598867845808621660361357414, time:1750767814.016541s req_ids:[8] +DEBUG 06-24 20:23:34 [manager.py:391] +ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:165.35210609436035ms total_cost_time:165.39406776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10609 prompt_cache_len:5151 prompt_cache_ratio:0.48553115279479686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 +DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.10814261436462402 s +INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.11020112037658691 s +DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=235723158115036734154778957805550911242, time:1750767814.1856308s req_ids:[8] +DEBUG 06-24 20:23:34 [manager.py:391] +ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:201.1110782623291ms total_cost_time:201.155424118042ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10610 prompt_cache_len:5151 prompt_cache_ratio:0.48548539114043354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 +DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.10821938514709473 s +INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.11027860641479492 s +DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=303073645720057297861643302979422893529, time:1750767814.3947806s req_ids:[8] +DEBUG 06-24 20:23:34 [manager.py:391] +ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:211.33899688720703ms total_cost_time:211.38644218444824ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10611 prompt_cache_len:5151 prompt_cache_ratio:0.48543963811139385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 +DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.3105807304382324 s +INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.31245851516723633 s +DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=302156197188704868612635996696577633416, time:1750767814.8125212s req_ids:[8] +DEBUG 06-24 20:23:34 [manager.py:391] +ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:366.11318588256836ms total_cost_time:366.15467071533203ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10612 prompt_cache_len:5151 prompt_cache_ratio:0.48539389370523933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 +DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.1081240177154541 s +INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.1098322868347168 s +DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=255032505572229513336141334685824311779, time:1750767814.9817455s req_ids:[8] +DEBUG 06-24 20:23:34 [manager.py:391] +ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:164.60776329040527ms total_cost_time:164.65115547180176ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10613 prompt_cache_len:5151 prompt_cache_ratio:0.4853481579195327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 +DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10796833038330078 s +INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.10997247695922852 s +DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=129505343074159542362675074786112375968, time:1750767815.1528082s req_ids:[8] +DEBUG 06-24 20:23:35 [manager.py:391] +ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:200.37293434143066ms total_cost_time:200.41537284851074ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10614 prompt_cache_len:5151 prompt_cache_ratio:0.4853024307518372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 +DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10848784446716309 s +INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.1104578971862793 s +DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=27466650968321358944828197415683535379, time:1750767815.3589172s req_ids:[8] +DEBUG 06-24 20:23:35 [manager.py:391] +ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:207.1366310119629ms total_cost_time:207.1826457977295ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10615 prompt_cache_len:5151 prompt_cache_ratio:0.4852567121997174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 +DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10842752456665039 s +INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.11050224304199219 s +DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=112957043977688005619213469462042723155, time:1750767815.5723581s req_ids:[8] +DEBUG 06-24 20:23:35 [manager.py:391] +ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:202.47960090637207ms total_cost_time:202.52227783203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10616 prompt_cache_len:5151 prompt_cache_ratio:0.4852110022607385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 +DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10964655876159668 s +INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.11170506477355957 s +DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=125655741680200497391371399699036762248, time:1750767815.7820084s req_ids:[8] +DEBUG 06-24 20:23:35 [manager.py:391] +ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:204.17165756225586ms total_cost_time:204.21361923217773ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10617 prompt_cache_len:5151 prompt_cache_ratio:0.4851653009324668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 +DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10821366310119629 s +INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.10993695259094238 s +DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=273062731695426888825031236330384365444, time:1750767815.9910028s req_ids:[8] +DEBUG 06-24 20:23:35 [manager.py:391] +ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:164.9177074432373ms total_cost_time:164.9608612060547ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10618 prompt_cache_len:5151 prompt_cache_ratio:0.4851196082124694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 +DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.10890078544616699 s +INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11092710494995117 s +DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=221439180676689136195214543215193167380, time:1750767816.1637483s req_ids:[8] +DEBUG 06-24 20:23:36 [manager.py:391] +ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:364.1057014465332ms total_cost_time:364.1490936279297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10619 prompt_cache_len:5151 prompt_cache_ratio:0.48507392409831435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 +DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.1085364818572998 s +INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11066365242004395 s +DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=89229329646768808914903790808376194295, time:1750767816.5318065s req_ids:[8] +DEBUG 06-24 20:23:36 [manager.py:391] +ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:208.22834968566895ms total_cost_time:208.27269554138184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10620 prompt_cache_len:5151 prompt_cache_ratio:0.4850282485875706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 +DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.10809969902038574 s +INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11031246185302734 s +DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=145472464764696407640405133442187164578, time:1750767816.747533s req_ids:[8] +DEBUG 06-24 20:23:36 [manager.py:391] +ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:206.82764053344727ms total_cost_time:206.87103271484375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10621 prompt_cache_len:5151 prompt_cache_ratio:0.4849825816778081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 +DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.10804319381713867 s +INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11004829406738281 s +DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=193270003046653424977718838941624939591, time:1750767816.9591281s req_ids:[8] +DEBUG 06-24 20:23:36 [manager.py:391] +ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:206.32624626159668ms total_cost_time:206.36892318725586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10622 prompt_cache_len:5151 prompt_cache_ratio:0.4849369233665976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 +DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.10931873321533203 s +INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.11132574081420898 s +DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=256367619262365976854395701806166182831, time:1750767817.171969s req_ids:[8] +DEBUG 06-24 20:23:37 [manager.py:391] +ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:206.63833618164062ms total_cost_time:206.6824436187744ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10623 prompt_cache_len:5151 prompt_cache_ratio:0.48489127365151086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 +DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.10870862007141113 s +INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.11061954498291016 s +DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=5554489246022689885220731856145702269, time:1750767817.3835218s req_ids:[8] +DEBUG 06-24 20:23:37 [manager.py:391] +DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:206.0258388519287ms total_cost_time:206.0694694519043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10624 prompt_cache_len:5151 prompt_cache_ratio:0.4848456325301205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 +DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.10889530181884766 s +INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.1109619140625 s +DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=313150806907564746184138131911282627875, time:1750767817.597566s req_ids:[8] +DEBUG 06-24 20:23:37 [manager.py:391] +ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:206.36892318725586ms total_cost_time:206.41207695007324ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10625 prompt_cache_len:5151 prompt_cache_ratio:0.4848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 +DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.1079869270324707 s +INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.10995793342590332 s +DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=250245316967746710561712901272111363475, time:1750767817.8092964s req_ids:[8] +DEBUG 06-24 20:23:37 [manager.py:391] +ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:210.06321907043457ms total_cost_time:210.10637283325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10626 prompt_cache_len:5151 prompt_cache_ratio:0.4847543760587239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 +DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.1076967716217041 s +INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.10941481590270996 s +DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=221147738148463133489210833662887678276, time:1750767818.0257697s req_ids:[8] +DEBUG 06-24 20:23:38 [manager.py:391] +ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:375.1864433288574ms total_cost_time:375.230073928833ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10627 prompt_cache_len:5151 prompt_cache_ratio:0.4847087607038675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 +DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.1083371639251709 s +INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.11020040512084961 s +DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=152099942067070344777453022695172355520, time:1750767818.4054303s req_ids:[8] +DEBUG 06-24 20:23:38 [manager.py:391] +ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:207.32402801513672ms total_cost_time:207.37075805664062ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:10628 prompt_cache_len:5151 prompt_cache_ratio:0.48466315393300713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 +DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.10865902900695801 s +INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.11072468757629395 s +DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=168975119348926875282782134754947189000, time:1750767818.618262s req_ids:[8] +DEBUG 06-24 20:23:38 [manager.py:391] +ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:206.8798542022705ms total_cost_time:206.9227695465088ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10629 prompt_cache_len:5151 prompt_cache_ratio:0.48461755574372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 +DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.10803723335266113 s +INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.11008930206298828 s +DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=267731213131224281606172980302531083481, time:1750767818.831792s req_ids:[8] +DEBUG 06-24 20:23:38 [manager.py:391] +ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:209.42306518554688ms total_cost_time:209.46598052978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10630 prompt_cache_len:5151 prompt_cache_ratio:0.4845719661335842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 +DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s +INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.1103515625 s +DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=330001906014905614014905661425570973309, time:1750767819.0456243s req_ids:[8] +DEBUG 06-24 20:23:39 [manager.py:391] +ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:208.06336402893066ms total_cost_time:208.10532569885254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10631 prompt_cache_len:5151 prompt_cache_ratio:0.4845263851001787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 +DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10813689231872559 s +INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.11024355888366699 s +DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=278826529962881305696837775261834205935, time:1750767819.2605176s req_ids:[8] +DEBUG 06-24 20:23:39 [manager.py:391] +ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:209.6705436706543ms total_cost_time:209.7160816192627ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10632 prompt_cache_len:5151 prompt_cache_ratio:0.4844808126410835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 +DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10958695411682129 s +INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.11166715621948242 s +DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=93392969545736111248099606069292205590, time:1750767819.475019s req_ids:[8] +DEBUG 06-24 20:23:39 [manager.py:391] +ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:206.80522918701172ms total_cost_time:206.8476676940918ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10633 prompt_cache_len:5151 prompt_cache_ratio:0.48443524875387944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 +DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10943174362182617 s +INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.11159801483154297 s +DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=208545011489377269650163257881000511195, time:1750767819.6900053s req_ids:[8] +DEBUG 06-24 20:23:39 [manager.py:391] +ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:209.2292308807373ms total_cost_time:209.27071571350098ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10634 prompt_cache_len:5151 prompt_cache_ratio:0.4843896934361482 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 +DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.1079559326171875 s +INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.10965657234191895 s +DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=180676490857397177878682984812351728203, time:1750767819.9048176s req_ids:[8] +DEBUG 06-24 20:23:39 [manager.py:391] +ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:204.30803298950195ms total_cost_time:204.35261726379395ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10635 prompt_cache_len:5151 prompt_cache_ratio:0.4843441466854725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 +DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.3109130859375 s +INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.3130512237548828 s +DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=150348215278820549945523686304392075076, time:1750767820.3175418s req_ids:[8] +DEBUG 06-24 20:23:40 [manager.py:391] +ERROR 06-24 20:23:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:415.00234603881836ms total_cost_time:415.04597663879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10636 prompt_cache_len:5151 prompt_cache_ratio:0.4842986084994359 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 +DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.10831451416015625 s +INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.11026358604431152 s +DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=262329211750554130783153273786557884932, time:1750767820.5357897s req_ids:[8] +DEBUG 06-24 20:23:40 [manager.py:391] +ERROR 06-24 20:23:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 first_token_cost:208.56618881225586ms total_cost_time:208.61172676086426ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10637 prompt_cache_len:5151 prompt_cache_ratio:0.4842530788756228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 +DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.10927391052246094 s +INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.1113123893737793 s +DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=287897987217858922400792256789562553996, time:1750767820.7516203s req_ids:[8] +DEBUG 06-24 20:23:40 [manager.py:391] +ERROR 06-24 20:23:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 first_token_cost:210.4330062866211ms total_cost_time:210.47544479370117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10638 prompt_cache_len:5151 prompt_cache_ratio:0.4842075578116187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 +DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.1082754135131836 s +INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s +DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=137085167271240889512760319724886421900, time:1750767820.965626s req_ids:[8] +DEBUG 06-24 20:23:40 [manager.py:391] +ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 first_token_cost:204.22887802124023ms total_cost_time:204.27179336547852ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10639 prompt_cache_len:5151 prompt_cache_ratio:0.4841620453050099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 +DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10870933532714844 s +INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.11121797561645508 s +DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=333118848593950508715951355151827529269, time:1750767821.1781464s req_ids:[8] +DEBUG 06-24 20:23:41 [manager.py:391] +ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:211.5168571472168ms total_cost_time:211.5612030029297ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10640 prompt_cache_len:5151 prompt_cache_ratio:0.48411654135338344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 +DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10859847068786621 s +INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.11055421829223633 s +DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=86397651907940271866265643598529758351, time:1750767821.3931897s req_ids:[8] +DEBUG 06-24 20:23:41 [manager.py:391] +ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:212.1131420135498ms total_cost_time:212.1579647064209ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10641 prompt_cache_len:5151 prompt_cache_ratio:0.4840710459543276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 +DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10524606704711914 s +INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.10726404190063477 s +DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=206011775873037584811703789515482801084, time:1750767821.6120894s req_ids:[8] +DEBUG 06-24 20:23:41 [manager.py:391] +ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:214.23816680908203ms total_cost_time:214.2617702484131ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:10642 prompt_cache_len:5151 prompt_cache_ratio:0.4840255591054313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 +DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10766077041625977 s +INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.10966753959655762 s +DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=311694196846412224260593748937920000616, time:1750767821.84658s req_ids:[8] +DEBUG 06-24 20:23:41 [manager.py:391] +ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:443.0243968963623ms total_cost_time:443.0694580078125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10643 prompt_cache_len:5151 prompt_cache_ratio:0.4839800808042845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 +DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10808849334716797 s +INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.11008048057556152 s +DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=148544355907827009474108630982604618703, time:1750767822.2804937s req_ids:[8] +DEBUG 06-24 20:23:42 [manager.py:391] +ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:188.31300735473633ms total_cost_time:188.3561611175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10644 prompt_cache_len:5151 prompt_cache_ratio:0.483934611048478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 +DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10843491554260254 s +INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s +DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=151621553213416391732111555500045983368, time:1750767822.4809873s req_ids:[8] +DEBUG 06-24 20:23:42 [manager.py:391] +DEBUG 06-24 20:23:42 [stats.py:37] Avg tokens(prompt+generate) throughput: 45145.946 tokens/s +DEBUG 06-24 20:23:42 [stats.py:37] Avg prompt tokens throughput: 45137.449 tokens/s +DEBUG 06-24 20:23:42 [stats.py:37] Avg generate tokens throughput: 8.497 tokens/s +ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:212.3270034790039ms total_cost_time:212.3708724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10645 prompt_cache_len:5151 prompt_cache_ratio:0.48388914983560355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 +DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10813021659851074 s +INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.1100163459777832 s +DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=214238780706192751055494745746163040799, time:1750767822.6998286s req_ids:[8] +DEBUG 06-24 20:23:42 [manager.py:391] +ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:210.57677268981934ms total_cost_time:210.6163501739502ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:10646 prompt_cache_len:5151 prompt_cache_ratio:0.4838436971632538 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 +DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10864853858947754 s +INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.1104884147644043 s +DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=339446908376033998910198224103806212095, time:1750767822.9275203s req_ids:[8] +DEBUG 06-24 20:23:42 [manager.py:391] +ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:208.08696746826172ms total_cost_time:208.1167697906494ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:10647 prompt_cache_len:5151 prompt_cache_ratio:0.48379825302902224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 +DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.11120367050170898 s +INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11307263374328613 s +DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=124290830385889220092693526906476368879, time:1750767823.1358042s req_ids:[8] +DEBUG 06-24 20:23:43 [manager.py:391] +ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:215.97981452941895ms total_cost_time:216.0499095916748ms,out_token_counter:1 mean_per_token_cost_time: 0.07009506225585938ms prompt_token_num:10648 prompt_cache_len:5151 prompt_cache_ratio:0.48375281743050336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 +DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.10850048065185547 s +INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s +DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=101625934798140967401666486975154215225, time:1750767823.3450735s req_ids:[8] +DEBUG 06-24 20:23:43 [manager.py:391] +ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:203.42063903808594ms total_cost_time:203.46379280090332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10649 prompt_cache_len:5151 prompt_cache_ratio:0.48370739036529253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 +DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.11176323890686035 s +INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11481523513793945 s +DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=252313877636946710484831720037112346053, time:1750767823.562718s req_ids:[8] +DEBUG 06-24 20:23:43 [manager.py:391] +ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:368.07703971862793ms total_cost_time:368.1197166442871ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10650 prompt_cache_len:5151 prompt_cache_ratio:0.4836619718309859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 +DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.10887813568115234 s +INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11077547073364258 s +DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=21840133844030543679182071304337891150, time:1750767823.9292629s req_ids:[8] +DEBUG 06-24 20:23:43 [manager.py:391] +ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:185.7903003692627ms total_cost_time:185.8351230621338ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10651 prompt_cache_len:5151 prompt_cache_ratio:0.48361656182518076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 +DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.10435867309570312 s +INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.10625720024108887 s +DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=15873991106523347331687712062790263459, time:1750767824.1263695s req_ids:[8] +DEBUG 06-24 20:23:44 [manager.py:391] +ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:179.63600158691406ms total_cost_time:179.68082427978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10652 prompt_cache_len:5151 prompt_cache_ratio:0.48357116034547504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 +DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.10965681076049805 s +INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.11171793937683105 s +DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=38579495468997730526565888989880967732, time:1750767824.3051748s req_ids:[8] +DEBUG 06-24 20:23:44 [manager.py:391] +ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:204.0245532989502ms total_cost_time:204.09631729125977ms,out_token_counter:1 mean_per_token_cost_time: 0.07176399230957031ms prompt_token_num:10653 prompt_cache_len:5151 prompt_cache_ratio:0.48352576738946773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 +DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.10858893394470215 s +INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.1112070083618164 s +DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=227405391970080179708346661690043954595, time:1750767824.5178235s req_ids:[8] +DEBUG 06-24 20:23:44 [manager.py:391] +ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:180.1128387451172ms total_cost_time:180.15694618225098ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10654 prompt_cache_len:5151 prompt_cache_ratio:0.48348038295475876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 +DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.11009860038757324 s +INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.11256837844848633 s +DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=63041697949929062079756114457479767921, time:1750767824.7020116s req_ids:[8] +DEBUG 06-24 20:23:44 [manager.py:391] +DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:200.10614395141602ms total_cost_time:200.16145706176758ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:10655 prompt_cache_len:5151 prompt_cache_ratio:0.48343500703894887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 +DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.1092381477355957 s +INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.11118054389953613 s +DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=228312034285978187570696014221866018889, time:1750767824.9196234s req_ids:[8] +DEBUG 06-24 20:23:44 [manager.py:391] +ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:182.65557289123535ms total_cost_time:182.71493911743164ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:10656 prompt_cache_len:5151 prompt_cache_ratio:0.48338963963963966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 +DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.10949420928955078 s +INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.11132454872131348 s +DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=81243740425104727258934805178689139985, time:1750767825.1082983s req_ids:[8] +DEBUG 06-24 20:23:45 [manager.py:391] +ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:378.48472595214844ms total_cost_time:378.5390853881836ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:10657 prompt_cache_len:5151 prompt_cache_ratio:0.4833442807544337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 +DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.11299276351928711 s +INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.11409974098205566 s +DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=234860185096409239695385623524272842084, time:1750767825.4916956s req_ids:[8] +DEBUG 06-24 20:23:45 [manager.py:391] +ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:178.36785316467285ms total_cost_time:178.41529846191406ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10658 prompt_cache_len:5151 prompt_cache_ratio:0.4832989303809345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 +DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.10800552368164062 s +INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.10989522933959961 s +DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=91612672969963075368081108278523311072, time:1750767825.6717844s req_ids:[8] +DEBUG 06-24 20:23:45 [manager.py:391] +ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:192.47078895568848ms total_cost_time:192.51465797424316ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10659 prompt_cache_len:5151 prompt_cache_ratio:0.48325358851674644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 +DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.10889625549316406 s +INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.11060571670532227 s +DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=147832979923536364460403225239871503360, time:1750767825.8670907s req_ids:[8] +DEBUG 06-24 20:23:45 [manager.py:391] +ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:204.4699192047119ms total_cost_time:204.5285701751709ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:10660 prompt_cache_len:5151 prompt_cache_ratio:0.4832082551594747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 +DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10883593559265137 s +INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11098265647888184 s +DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=270229440439650814110729001981561275464, time:1750767826.0822852s req_ids:[8] +DEBUG 06-24 20:23:46 [manager.py:391] +ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:212.7690315246582ms total_cost_time:212.80670166015625ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:10661 prompt_cache_len:5151 prompt_cache_ratio:0.48316293030672547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 +DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s +INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11031484603881836 s +DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=42937518761587788007493082736520631652, time:1750767826.3016825s req_ids:[8] +DEBUG 06-24 20:23:46 [manager.py:391] +ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:208.88090133666992ms total_cost_time:208.939790725708ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:10662 prompt_cache_len:5151 prompt_cache_ratio:0.4831176139561058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 +DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10854458808898926 s +INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11061620712280273 s +DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=117863431167037035758909124788019241721, time:1750767826.5178077s req_ids:[8] +DEBUG 06-24 20:23:46 [manager.py:391] +ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:194.70787048339844ms total_cost_time:194.75269317626953ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10663 prompt_cache_len:5151 prompt_cache_ratio:0.48307230610522367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 +DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10834836959838867 s +INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.1101534366607666 s +DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=97662689417604623489117637824285863802, time:1750767826.7126985s req_ids:[8] +DEBUG 06-24 20:23:46 [manager.py:391] +ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:203.06730270385742ms total_cost_time:203.1118869781494ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10664 prompt_cache_len:5151 prompt_cache_ratio:0.4830270067516879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 +DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10912418365478516 s +INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11102676391601562 s +DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=169749224531654366591446790406140803226, time:1750767826.9267373s req_ids:[8] +DEBUG 06-24 20:23:46 [manager.py:391] +INFO 06-24 20:23:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:379.31132316589355ms total_cost_time:379.41956520080566ms,out_token_counter:1 mean_per_token_cost_time: 0.10824203491210938ms prompt_token_num:10665 prompt_cache_len:5151 prompt_cache_ratio:0.4829817158931083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 +DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.11178827285766602 s +INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.11414098739624023 s +DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=81153556601566133706049282912444126125, time:1750767827.3071413s req_ids:[8] +DEBUG 06-24 20:23:47 [manager.py:391] +ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:208.51922035217285ms total_cost_time:208.540678024292ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:10666 prompt_cache_len:5151 prompt_cache_ratio:0.4829364335270954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 +DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.10912132263183594 s +INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.11110973358154297 s +DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=52871520637463701419448039172645026839, time:1750767827.5219233s req_ids:[8] +DEBUG 06-24 20:23:47 [manager.py:391] +ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:213.57369422912598ms total_cost_time:213.63425254821777ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10667 prompt_cache_len:5151 prompt_cache_ratio:0.4828911596512609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 +DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.10794949531555176 s +INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994839668273926 s +DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=108186006971086299309977487232989540047, time:1750767827.7466364s req_ids:[8] +DEBUG 06-24 20:23:47 [manager.py:391] +ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:212.32008934020996ms total_cost_time:212.38112449645996ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10668 prompt_cache_len:5151 prompt_cache_ratio:0.4828458942632171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 +DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.10969066619873047 s +INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.11169958114624023 s +DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=72610180719680424435563766604314066350, time:1750767827.960047s req_ids:[8] +DEBUG 06-24 20:23:47 [manager.py:391] +ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:213.94085884094238ms total_cost_time:213.96446228027344ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:10669 prompt_cache_len:5151 prompt_cache_ratio:0.4828006373605774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 +DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.1086277961730957 s +INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.10969400405883789 s +DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=333772980623695511661739735097211941704, time:1750767828.1818814s req_ids:[8] +DEBUG 06-24 20:23:48 [manager.py:391] +ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:206.5715789794922ms total_cost_time:206.61377906799316ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10670 prompt_cache_len:5151 prompt_cache_ratio:0.48275538894095593 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 +DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.11216974258422852 s +INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.11537933349609375 s +DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=288415668871127338726786066776950821893, time:1750767828.3950696s req_ids:[8] +DEBUG 06-24 20:23:48 [manager.py:391] +ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:203.50241661071777ms total_cost_time:203.5224437713623ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:10671 prompt_cache_len:5151 prompt_cache_ratio:0.48271014900196796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 +DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.10844779014587402 s +INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.11052083969116211 s +DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=322413904694870279300954650204570840441, time:1750767828.6135628s req_ids:[8] +DEBUG 06-24 20:23:48 [manager.py:391] +ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:221.06289863586426ms total_cost_time:221.10819816589355ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10672 prompt_cache_len:5151 prompt_cache_ratio:0.48266491754122937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 +DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.10811400413513184 s +INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.11016225814819336 s +DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=270226922518184353445799859756704897199, time:1750767828.8298934s req_ids:[8] +DEBUG 06-24 20:23:48 [manager.py:391] +ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:391.86549186706543ms total_cost_time:391.9100761413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10673 prompt_cache_len:5151 prompt_cache_ratio:0.48261969455635717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 +DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10824322700500488 s +INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.10987162590026855 s +DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=197906275834260757336317568461567777400, time:1750767829.2272139s req_ids:[8] +DEBUG 06-24 20:23:49 [manager.py:391] +ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:162.62149810791016ms total_cost_time:162.66179084777832ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10674 prompt_cache_len:5151 prompt_cache_ratio:0.48257448004496906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 +DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10820603370666504 s +INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.11016035079956055 s +DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=67734143250773433821065929062806834011, time:1750767829.3961296s req_ids:[8] +DEBUG 06-24 20:23:49 [manager.py:391] +ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:193.81284713745117ms total_cost_time:193.85337829589844ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10675 prompt_cache_len:5151 prompt_cache_ratio:0.4825292740046838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 +DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10800766944885254 s +INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.11000609397888184 s +DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=203829916245572850256466937165894313433, time:1750767829.5960543s req_ids:[8] +DEBUG 06-24 20:23:49 [manager.py:391] +ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:203.9964199066162ms total_cost_time:204.0388584136963ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10676 prompt_cache_len:5151 prompt_cache_ratio:0.482484076433121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 +DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10884809494018555 s +INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s +DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=186698064789001746432979593242088340498, time:1750767829.8082798s req_ids:[8] +DEBUG 06-24 20:23:49 [manager.py:391] +ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:200.65927505493164ms total_cost_time:200.7005214691162ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10677 prompt_cache_len:5151 prompt_cache_ratio:0.4824388873279011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 +DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10858273506164551 s +INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.10976386070251465 s +DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=7913404158046129302988941109694578346, time:1750767830.0157604s req_ids:[8] +DEBUG 06-24 20:23:50 [manager.py:391] +ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:207.06439018249512ms total_cost_time:207.11159706115723ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:10678 prompt_cache_len:5151 prompt_cache_ratio:0.48239370668664544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 +DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10875368118286133 s +INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.11083722114562988 s +DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=135210640638269778821161138223910911827, time:1750767830.2430506s req_ids:[8] +DEBUG 06-24 20:23:50 [manager.py:391] +ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:220.2448844909668ms total_cost_time:220.289945602417ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10679 prompt_cache_len:5151 prompt_cache_ratio:0.4823485345069763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 +DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10802435874938965 s +INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.1099398136138916 s +DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=198760087066867120295020696037565098432, time:1750767830.4569502s req_ids:[8] +DEBUG 06-24 20:23:50 [manager.py:391] +DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:215.58785438537598ms total_cost_time:215.62933921813965ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10680 prompt_cache_len:5151 prompt_cache_ratio:0.48230337078651686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 +DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10905122756958008 s +INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.11125516891479492 s +DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=237169233008131878616511920714083933474, time:1750767830.68014s req_ids:[8] +DEBUG 06-24 20:23:50 [manager.py:391] +ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:209.02228355407715ms total_cost_time:209.06496047973633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10681 prompt_cache_len:5151 prompt_cache_ratio:0.4822582155228911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 +DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10776400566101074 s +INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.10981345176696777 s +DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=210315163464066059603919360789301688202, time:1750767830.8939602s req_ids:[8] +DEBUG 06-24 20:23:50 [manager.py:391] +ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:390.20299911499023ms total_cost_time:390.2461528778076ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10682 prompt_cache_len:5151 prompt_cache_ratio:0.48221306871372405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 +DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.1086738109588623 s +INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.11079525947570801 s +DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=251660503780118854255518607567236877300, time:1750767831.294425s req_ids:[8] +DEBUG 06-24 20:23:51 [manager.py:391] +ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:211.97104454040527ms total_cost_time:212.02540397644043ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:10683 prompt_cache_len:5151 prompt_cache_ratio:0.4821679303566414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 +DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.11112451553344727 s +INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.1130685806274414 s +DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=234111943529222761671528715282875900742, time:1750767831.5079129s req_ids:[8] +DEBUG 06-24 20:23:51 [manager.py:391] +ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:203.9949893951416ms total_cost_time:204.0390968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10684 prompt_cache_len:5151 prompt_cache_ratio:0.4821228004492699 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 +DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.10926389694213867 s +INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.11130666732788086 s +DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=222999318035116701983728321023642763620, time:1750767831.7233515s req_ids:[8] +DEBUG 06-24 20:23:51 [manager.py:391] +ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:208.4331512451172ms total_cost_time:208.47654342651367ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10685 prompt_cache_len:5151 prompt_cache_ratio:0.48207767898923726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 +DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.10855579376220703 s +INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.11057019233703613 s +DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=307835549042614321149055445939970271297, time:1750767831.9341273s req_ids:[8] +DEBUG 06-24 20:23:51 [manager.py:391] +ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:204.8506736755371ms total_cost_time:204.8933506011963ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10686 prompt_cache_len:5151 prompt_cache_ratio:0.4820325659741718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 +DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.1083683967590332 s +INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.11037802696228027 s +DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=91772844773337578898989083111474286775, time:1750767832.150269s req_ids:[8] +DEBUG 06-24 20:23:52 [manager.py:391] +ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:210.91341972351074ms total_cost_time:210.97421646118164ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:10687 prompt_cache_len:5151 prompt_cache_ratio:0.481987461401703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 +DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.1079409122467041 s +INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.10984635353088379 s +DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=75832547725745977133393852346002912123, time:1750767832.3746183s req_ids:[8] +DEBUG 06-24 20:23:52 [manager.py:391] +ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:174.84331130981445ms total_cost_time:174.88574981689453ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10688 prompt_cache_len:5151 prompt_cache_ratio:0.4819423652694611 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 +DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.10880041122436523 s +INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.11082625389099121 s +DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=270789590240959522949107079358323926497, time:1750767832.5441716s req_ids:[8] +DEBUG 06-24 20:23:52 [manager.py:391] +DEBUG 06-24 20:23:52 [stats.py:37] Avg tokens(prompt+generate) throughput: 46650.816 tokens/s +DEBUG 06-24 20:23:52 [stats.py:37] Avg prompt tokens throughput: 46642.071 tokens/s +DEBUG 06-24 20:23:52 [stats.py:37] Avg generate tokens throughput: 8.745 tokens/s +ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:369.83656883239746ms total_cost_time:369.88162994384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10689 prompt_cache_len:5151 prompt_cache_ratio:0.48189727757507717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 +DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.10785198211669922 s +INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.10990452766418457 s +DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=128986558449055108404606543307590100671, time:1750767832.9201627s req_ids:[8] +DEBUG 06-24 20:23:52 [manager.py:391] +ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:210.27898788452148ms total_cost_time:210.32404899597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10690 prompt_cache_len:5151 prompt_cache_ratio:0.48185219831618337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 +DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10891389846801758 s +INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.1110692024230957 s +DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=230348149291445634281189962399330557415, time:1750767833.1364863s req_ids:[8] +DEBUG 06-24 20:23:53 [manager.py:391] +INFO 06-24 20:23:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:23:53 [statics_utils.py:24] mean first cost: 228.3885136965524 ms +INFO 06-24 20:23:53 [statics_utils.py:24] mean per token cost: 0.06666320272762045 ms +ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:190.32955169677734ms total_cost_time:190.37127494812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10691 prompt_cache_len:5151 prompt_cache_ratio:0.4818071274904125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 +DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10895562171936035 s +INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11089825630187988 s +DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=264700196572954455242747262863448840469, time:1750767833.3327696s req_ids:[8] +DEBUG 06-24 20:23:53 [manager.py:391] +ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:205.0917148590088ms total_cost_time:205.13606071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10692 prompt_cache_len:5151 prompt_cache_ratio:0.48176206509539843 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 +DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10802769660949707 s +INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11028504371643066 s +DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=129180678680853555258553645214875593432, time:1750767833.544922s req_ids:[8] +DEBUG 06-24 20:23:53 [manager.py:391] +ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:191.91670417785645ms total_cost_time:191.95890426635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10693 prompt_cache_len:5151 prompt_cache_ratio:0.48171701112877585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 +DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10835886001586914 s +INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11041688919067383 s +DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=155518454137526704989845314060892491960, time:1750767833.7422945s req_ids:[8] +DEBUG 06-24 20:23:53 [manager.py:391] +ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:203.36437225341797ms total_cost_time:203.43661308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.07224082946777344ms prompt_token_num:10694 prompt_cache_len:5151 prompt_cache_ratio:0.48167196558818026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 +DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10913515090942383 s +INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11118769645690918 s +DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=249169401981683144285093603531271845146, time:1750767833.9566832s req_ids:[8] +DEBUG 06-24 20:23:53 [manager.py:391] +ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:195.97434997558594ms total_cost_time:196.01798057556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10695 prompt_cache_len:5151 prompt_cache_ratio:0.4816269284712483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 +DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10731911659240723 s +INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.10935163497924805 s +DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=107128998757613472926782605086548178787, time:1750767834.1547458s req_ids:[8] +DEBUG 06-24 20:23:54 [manager.py:391] +ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:381.55436515808105ms total_cost_time:381.60181045532227ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10696 prompt_cache_len:5151 prompt_cache_ratio:0.48158189977561705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 +DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10867738723754883 s +INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.11065816879272461 s +DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=64008715101679227716321198699870385136, time:1750767834.5568168s req_ids:[8] +DEBUG 06-24 20:23:54 [manager.py:391] +ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:217.66996383666992ms total_cost_time:217.7135944366455ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10697 prompt_cache_len:5151 prompt_cache_ratio:0.48153687949892493 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 +DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s +INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.11092901229858398 s +DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=265376003787397788659127011570572251459, time:1750767834.7715409s req_ids:[8] +DEBUG 06-24 20:23:54 [manager.py:391] +ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:218.30081939697266ms total_cost_time:218.34397315979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10698 prompt_cache_len:5151 prompt_cache_ratio:0.48149186763881097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 +DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10814213752746582 s +INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.11013364791870117 s +DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=269278623022823872825412091353666923420, time:1750767834.994287s req_ids:[8] +DEBUG 06-24 20:23:54 [manager.py:391] +ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:202.409029006958ms total_cost_time:202.4533748626709ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10699 prompt_cache_len:5151 prompt_cache_ratio:0.4814468641929152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 +DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10808968544006348 s +INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.11014080047607422 s +DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=59701431398920068084865099236450417992, time:1750767835.20299s req_ids:[8] +DEBUG 06-24 20:23:55 [manager.py:391] +ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:218.45293045043945ms total_cost_time:218.50013732910156ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:10700 prompt_cache_len:5151 prompt_cache_ratio:0.4814018691588785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 +DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10853910446166992 s +INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.11063551902770996 s +DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=126398336310759061144477729822954681834, time:1750767835.431107s req_ids:[8] +DEBUG 06-24 20:23:55 [manager.py:391] +ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:208.30965042114258ms total_cost_time:208.35089683532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10701 prompt_cache_len:5151 prompt_cache_ratio:0.4813568825343426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 +DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s +INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.11110973358154297 s +DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=98813940618275102018649396405603939756, time:1750767835.645553s req_ids:[8] +DEBUG 06-24 20:23:55 [manager.py:391] +ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:211.1976146697998ms total_cost_time:211.2410068511963ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10702 prompt_cache_len:5151 prompt_cache_ratio:0.4813119043169501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 +DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10791206359863281 s +INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.1099691390991211 s +DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=118024339126130997941646945762081180453, time:1750767835.859777s req_ids:[8] +DEBUG 06-24 20:23:55 [manager.py:391] +ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:207.14092254638672ms total_cost_time:207.1821689605713ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10703 prompt_cache_len:5151 prompt_cache_ratio:0.4812669345043446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 +DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.3102278709411621 s +INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.31241798400878906 s +DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=296087630125455127672542242979753064293, time:1750767836.2770848s req_ids:[8] +DEBUG 06-24 20:23:56 [manager.py:391] +ERROR 06-24 20:23:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:410.22467613220215ms total_cost_time:410.26830673217773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10704 prompt_cache_len:5151 prompt_cache_ratio:0.4812219730941704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 +DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.10864377021789551 s +INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.1106266975402832 s +DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=15595981713255265585960477553742326014, time:1750767836.4983408s req_ids:[8] +DEBUG 06-24 20:23:56 [manager.py:391] +ERROR 06-24 20:23:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 first_token_cost:212.39829063415527ms total_cost_time:212.44239807128906ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10705 prompt_cache_len:5151 prompt_cache_ratio:0.48117702008407287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 +DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.10819292068481445 s +INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.11032724380493164 s +DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=230267695599085021433248977537529177777, time:1750767836.707282s req_ids:[8] +DEBUG 06-24 20:23:56 [manager.py:391] +ERROR 06-24 20:23:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 first_token_cost:212.08739280700684ms total_cost_time:212.1284008026123ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10706 prompt_cache_len:5151 prompt_cache_ratio:0.4811320754716981 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 +DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.10916376113891602 s +INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.11125349998474121 s +DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=275548206350685697968163711597966995333, time:1750767836.924826s req_ids:[8] +DEBUG 06-24 20:23:56 [manager.py:391] +ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 first_token_cost:205.42597770690918ms total_cost_time:205.48415184020996ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:10707 prompt_cache_len:5151 prompt_cache_ratio:0.4810871392546932 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 +DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.10860085487365723 s +INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.11063098907470703 s +DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=277774122060853528414412325314303042339, time:1750767837.1497808s req_ids:[8] +DEBUG 06-24 20:23:57 [manager.py:391] +ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:213.25206756591797ms total_cost_time:213.29522132873535ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10708 prompt_cache_len:5151 prompt_cache_ratio:0.481042211430706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 +DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.10910558700561523 s +INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.11112499237060547 s +DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=270231197413791574474571152139501127584, time:1750767837.3587458s req_ids:[8] +DEBUG 06-24 20:23:57 [manager.py:391] +ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:222.52392768859863ms total_cost_time:222.56708145141602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10709 prompt_cache_len:5151 prompt_cache_ratio:0.4809972919973854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 +DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.1083674430847168 s +INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.1104121208190918 s +DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=3058379246399612404117581275065109711, time:1750767837.585144s req_ids:[8] +DEBUG 06-24 20:23:57 [manager.py:391] +ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:200.76799392700195ms total_cost_time:200.81090927124023ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10710 prompt_cache_len:5151 prompt_cache_ratio:0.48095238095238096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 +DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.10744214057922363 s +INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.10933876037597656 s +DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=103714880411373908796091733900248391461, time:1750767837.7932312s req_ids:[8] +DEBUG 06-24 20:23:57 [manager.py:391] +DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:366.8181896209717ms total_cost_time:366.86205863952637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10711 prompt_cache_len:5151 prompt_cache_ratio:0.4809074782933433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 +DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10797309875488281 s +INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.11007976531982422 s +DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=324966021014922013572157646460848739285, time:1750767838.1649745s req_ids:[8] +DEBUG 06-24 20:23:58 [manager.py:391] +ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:202.71539688110352ms total_cost_time:202.7592658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10712 prompt_cache_len:5151 prompt_cache_ratio:0.4808625840179238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 +DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10894632339477539 s +INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.1109156608581543 s +DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=299072658331046332733107882349381210608, time:1750767838.385487s req_ids:[8] +DEBUG 06-24 20:23:58 [manager.py:391] +ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:219.0418243408203ms total_cost_time:219.0854549407959ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10713 prompt_cache_len:5151 prompt_cache_ratio:0.48081769812377484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 +DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10895299911499023 s +INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.11100554466247559 s +DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=156143741523281114105600389034384419784, time:1750767838.6058166s req_ids:[8] +DEBUG 06-24 20:23:58 [manager.py:391] +ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:216.6738510131836ms total_cost_time:216.71795845031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10714 prompt_cache_len:5151 prompt_cache_ratio:0.48077282060854953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 +DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10933256149291992 s +INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.11138224601745605 s +DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=264393583638041428580717229068760801382, time:1750767838.8222845s req_ids:[8] +DEBUG 06-24 20:23:58 [manager.py:391] +ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:200.11305809020996ms total_cost_time:200.15692710876465ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10715 prompt_cache_len:5151 prompt_cache_ratio:0.480727951469902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 +DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s +INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.11069917678833008 s +DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=198529135340993877520313291630329503733, time:1750767839.0271547s req_ids:[8] +DEBUG 06-24 20:23:59 [manager.py:391] +ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:201.73311233520508ms total_cost_time:201.77745819091797ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10716 prompt_cache_len:5151 prompt_cache_ratio:0.48068309070548715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 +DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s +INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.1106414794921875 s +DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=284198784862633461632271629538957405243, time:1750767839.237208s req_ids:[8] +DEBUG 06-24 20:23:59 [manager.py:391] +ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:193.94683837890625ms total_cost_time:193.99094581604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10717 prompt_cache_len:5151 prompt_cache_ratio:0.4806382383129607 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 +DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.1088407039642334 s +INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.11083579063415527 s +DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=267053340457785520782626839315947033665, time:1750767839.4338164s req_ids:[8] +DEBUG 06-24 20:23:59 [manager.py:391] +ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:198.9130973815918ms total_cost_time:198.95601272583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10718 prompt_cache_len:5151 prompt_cache_ratio:0.4805933942899795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 +DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.10761022567749023 s +INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.10955452919006348 s +DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=51221652377391903701067630840939121394, time:1750767839.6410067s req_ids:[8] +DEBUG 06-24 20:23:59 [manager.py:391] +ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:372.9057312011719ms total_cost_time:372.94840812683105ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10719 prompt_cache_len:5151 prompt_cache_ratio:0.48054855863420093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 +DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:23:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10860466957092285 s +INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11067795753479004 s +DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=251332350983646552796903468876581772474, time:1750767840.0199685s req_ids:[8] +DEBUG 06-24 20:24:00 [manager.py:391] +ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:203.90772819519043ms total_cost_time:203.95207405090332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10720 prompt_cache_len:5151 prompt_cache_ratio:0.48050373134328356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 +DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10891938209533691 s +INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11085367202758789 s +DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=146686470691144052386594523921544551465, time:1750767840.2307036s req_ids:[8] +DEBUG 06-24 20:24:00 [manager.py:391] +ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:207.95512199401855ms total_cost_time:207.99827575683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10721 prompt_cache_len:5151 prompt_cache_ratio:0.4804589124148867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 +DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10887432098388672 s +INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11080503463745117 s +DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=207760888865004936068370717202676193070, time:1750767840.4599965s req_ids:[8] +DEBUG 06-24 20:24:00 [manager.py:391] +ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:218.22381019592285ms total_cost_time:218.26577186584473ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10722 prompt_cache_len:5151 prompt_cache_ratio:0.4804141018466704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 +DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10783123970031738 s +INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.10993814468383789 s +DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=220613603311843405958996491093145288629, time:1750767840.6702724s req_ids:[8] +DEBUG 06-24 20:24:00 [manager.py:391] +ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:204.83851432800293ms total_cost_time:204.8799991607666ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10723 prompt_cache_len:5151 prompt_cache_ratio:0.4803692996362958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 +DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.1083531379699707 s +INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11046481132507324 s +DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=217772716362626640885134701392074434266, time:1750767840.880608s req_ids:[8] +DEBUG 06-24 20:24:00 [manager.py:391] +ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:210.48569679260254ms total_cost_time:210.53075790405273ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10724 prompt_cache_len:5151 prompt_cache_ratio:0.4803245057814248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 +DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.10857367515563965 s +INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.11055231094360352 s +DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=192468556486101290387226371131989197327, time:1750767841.096291s req_ids:[8] +DEBUG 06-24 20:24:01 [manager.py:391] +ERROR 06-24 20:24:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:204.79130744934082ms total_cost_time:204.8332691192627ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10725 prompt_cache_len:5151 prompt_cache_ratio:0.48027972027972027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 +DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.10924100875854492 s +INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.11117362976074219 s +DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=42554461838680618714175192599270902236, time:1750767841.3109107s req_ids:[8] +DEBUG 06-24 20:24:01 [manager.py:391] +ERROR 06-24 20:24:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 first_token_cost:219.66171264648438ms total_cost_time:219.70510482788086ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10726 prompt_cache_len:5151 prompt_cache_ratio:0.4802349431288458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 +DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.10892248153686523 s +INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.11087870597839355 s +DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=199828450158011667643137030113230233506, time:1750767841.5344734s req_ids:[8] +DEBUG 06-24 20:24:01 [manager.py:391] +ERROR 06-24 20:24:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 first_token_cost:199.4497776031494ms total_cost_time:199.4919776916504ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10727 prompt_cache_len:5151 prompt_cache_ratio:0.48019017432646594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 +DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.31087636947631836 s +INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.31309008598327637 s +DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=49265467270527786959968994580994255557, time:1750767841.9461951s req_ids:[8] +DEBUG 06-24 20:24:01 [manager.py:391] +ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 first_token_cost:417.0718193054199ms total_cost_time:417.1159267425537ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10728 prompt_cache_len:5151 prompt_cache_ratio:0.48014541387024606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 +DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10925054550170898 s +INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s +DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=172617693533985912818214671510273411065, time:1750767842.1655045s req_ids:[8] +DEBUG 06-24 20:24:02 [manager.py:391] +ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:194.59009170532227ms total_cost_time:194.63181495666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10729 prompt_cache_len:5151 prompt_cache_ratio:0.48010066175785254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 +DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10784101486206055 s +INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.10970711708068848 s +DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=196427752106409739217779195260570260309, time:1750767842.364776s req_ids:[8] +DEBUG 06-24 20:24:02 [manager.py:391] +ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:201.00688934326172ms total_cost_time:201.0490894317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10730 prompt_cache_len:5151 prompt_cache_ratio:0.48005591798695246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 +DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10786056518554688 s +INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.10976409912109375 s +DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=200100508965565780367737855160578777980, time:1750767842.571383s req_ids:[8] +DEBUG 06-24 20:24:02 [manager.py:391] +DEBUG 06-24 20:24:02 [stats.py:37] Avg tokens(prompt+generate) throughput: 44870.521 tokens/s +DEBUG 06-24 20:24:02 [stats.py:37] Avg prompt tokens throughput: 44862.143 tokens/s +DEBUG 06-24 20:24:02 [stats.py:37] Avg generate tokens throughput: 8.377 tokens/s +ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:217.94700622558594ms total_cost_time:217.9892063140869ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10731 prompt_cache_len:5151 prompt_cache_ratio:0.48001118255521386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 +DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10897374153137207 s +INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.11100220680236816 s +DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=51138943565381830818218484550439705829, time:1750767842.7980304s req_ids:[8] +DEBUG 06-24 20:24:02 [manager.py:391] +ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:205.18827438354492ms total_cost_time:205.2314281463623ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10732 prompt_cache_len:5151 prompt_cache_ratio:0.47996645546030564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 +DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.10789227485656738 s +INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.10982131958007812 s +DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=25388630355155905461096916028627604075, time:1750767843.009123s req_ids:[8] +DEBUG 06-24 20:24:03 [manager.py:391] +ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:204.17141914367676ms total_cost_time:204.21481132507324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10733 prompt_cache_len:5151 prompt_cache_ratio:0.4799217366998975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 +DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.1081843376159668 s +INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.1102912425994873 s +DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=201818268034994996651493012410627372106, time:1750767843.2208703s req_ids:[8] +DEBUG 06-24 20:24:03 [manager.py:391] +ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:227.69641876220703ms total_cost_time:227.74076461791992ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10734 prompt_cache_len:5151 prompt_cache_ratio:0.4798770262716601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 +DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.10765838623046875 s +INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.10962891578674316 s +DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=332401348142539793786797219627372981415, time:1750767843.4607944s req_ids:[8] +DEBUG 06-24 20:24:03 [manager.py:391] +ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:371.9935417175293ms total_cost_time:372.0369338989258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10735 prompt_cache_len:5151 prompt_cache_ratio:0.47983232417326505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 +DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.10782670974731445 s +INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.10972452163696289 s +DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=302604484712196872849682937475553863818, time:1750767843.831173s req_ids:[8] +DEBUG 06-24 20:24:03 [manager.py:391] +ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:205.49607276916504ms total_cost_time:205.53851127624512ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10736 prompt_cache_len:5151 prompt_cache_ratio:0.4797876304023845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 +DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10784792900085449 s +INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.10971331596374512 s +DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=148748523330451631860314822461356595214, time:1750767844.0504136s req_ids:[8] +DEBUG 06-24 20:24:04 [manager.py:391] +ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:210.21509170532227ms total_cost_time:210.27278900146484ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:10737 prompt_cache_len:5151 prompt_cache_ratio:0.4797429449566918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 +DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.11085748672485352 s +INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.11281442642211914 s +DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=317170953728911831547418954705786886814, time:1750767844.260936s req_ids:[8] +DEBUG 06-24 20:24:04 [manager.py:391] +ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:211.01927757263184ms total_cost_time:211.06815338134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:10738 prompt_cache_len:5151 prompt_cache_ratio:0.47969826783386105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 +DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10924696922302246 s +INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.1113121509552002 s +DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=171496180919496861921981393857607261372, time:1750767844.476972s req_ids:[8] +DEBUG 06-24 20:24:04 [manager.py:391] +ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:202.64530181884766ms total_cost_time:202.68988609313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10739 prompt_cache_len:5151 prompt_cache_ratio:0.4796535990315672 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 +DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10798096656799316 s +INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.1098928451538086 s +DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=257501631688147900391998460770447640441, time:1750767844.684372s req_ids:[8] +DEBUG 06-24 20:24:04 [manager.py:391] +ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:233.78276824951172ms total_cost_time:233.8271141052246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10740 prompt_cache_len:5151 prompt_cache_ratio:0.47960893854748604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 +DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10812091827392578 s +INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.11015486717224121 s +DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=160780646666661868479626756908500760630, time:1750767844.9286509s req_ids:[8] +DEBUG 06-24 20:24:04 [manager.py:391] +ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:206.7420482635498ms total_cost_time:206.7859172821045ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10741 prompt_cache_len:5151 prompt_cache_ratio:0.47956428637929427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 +DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.10817193984985352 s +INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021161079406738 s +DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=164088075040354100001853579320587468265, time:1750767845.145833s req_ids:[8] +DEBUG 06-24 20:24:05 [manager.py:391] +DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:364.78161811828613ms total_cost_time:364.8266792297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10742 prompt_cache_len:5151 prompt_cache_ratio:0.4795196425246695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 +DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.10856819152832031 s +INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11040568351745605 s +DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=251211423270579980946242852751081873387, time:1750767845.5102105s req_ids:[8] +DEBUG 06-24 20:24:05 [manager.py:391] +ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:194.28086280822754ms total_cost_time:194.32711601257324ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10743 prompt_cache_len:5151 prompt_cache_ratio:0.47947500698129014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 +DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.10876989364624023 s +INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063671112060547 s +DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=174416136732395153499323649493608178910, time:1750767845.7100375s req_ids:[8] +DEBUG 06-24 20:24:05 [manager.py:391] +ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:198.28391075134277ms total_cost_time:198.32730293273926ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10744 prompt_cache_len:5151 prompt_cache_ratio:0.47943037974683544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 +DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.1087794303894043 s +INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063933372497559 s +DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=65585749970514938972855377332042222990, time:1750767845.9154105s req_ids:[8] +DEBUG 06-24 20:24:05 [manager.py:391] +ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:200.88791847229004ms total_cost_time:200.93178749084473ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10745 prompt_cache_len:5151 prompt_cache_ratio:0.4793857608189856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 +DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.10838985443115234 s +INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11026787757873535 s +DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=142783692068970976280737925044834053913, time:1750767846.1254692s req_ids:[8] +DEBUG 06-24 20:24:06 [manager.py:391] +ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:208.0838680267334ms total_cost_time:208.12487602233887ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10746 prompt_cache_len:5151 prompt_cache_ratio:0.47934115019542156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 +DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.10851097106933594 s +INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s +DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=287238441196153784219250544650975410489, time:1750767846.3405678s req_ids:[8] +DEBUG 06-24 20:24:06 [manager.py:391] +ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:206.56609535217285ms total_cost_time:206.60924911499023ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10747 prompt_cache_len:5151 prompt_cache_ratio:0.47929654787382525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 +DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.10890984535217285 s +INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11104226112365723 s +DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=167911722959536020331312950892771906409, time:1750767846.5506027s req_ids:[8] +DEBUG 06-24 20:24:06 [manager.py:391] +ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:205.22499084472656ms total_cost_time:205.26432991027832ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:10748 prompt_cache_len:5151 prompt_cache_ratio:0.4792519538518794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 +DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.11003661155700684 s +INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11189913749694824 s +DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=130344737612505861085711954689333776351, time:1750767846.774207s req_ids:[8] +DEBUG 06-24 20:24:06 [manager.py:391] +ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:384.3824863433838ms total_cost_time:384.4285011291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10749 prompt_cache_len:5151 prompt_cache_ratio:0.47920736812726766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 +DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10897707939147949 s +INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.11103510856628418 s +DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=182038272605659286343312156967195314663, time:1750767847.1523287s req_ids:[8] +DEBUG 06-24 20:24:07 [manager.py:391] +ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:204.18739318847656ms total_cost_time:204.2226791381836ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:10750 prompt_cache_len:5151 prompt_cache_ratio:0.4791627906976744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 +DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10920238494873047 s +INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.1112985610961914 s +DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=291382247164871731104609929825451669320, time:1750767847.363522s req_ids:[8] +DEBUG 06-24 20:24:07 [manager.py:391] +ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:205.8556079864502ms total_cost_time:205.89971542358398ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10751 prompt_cache_len:5151 prompt_cache_ratio:0.47911822156078504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 +DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10832095146179199 s +INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.10988473892211914 s +DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=16592229574058624099318811922377014957, time:1750767847.5763505s req_ids:[8] +DEBUG 06-24 20:24:07 [manager.py:391] +ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:165.92049598693848ms total_cost_time:165.96102714538574ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10752 prompt_cache_len:5151 prompt_cache_ratio:0.4790736607142857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 +DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10703134536743164 s +INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.1085355281829834 s +DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=139637892613551318759074546906306002270, time:1750767847.7476578s req_ids:[8] +DEBUG 06-24 20:24:07 [manager.py:391] +ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:163.75112533569336ms total_cost_time:163.77949714660645ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:10753 prompt_cache_len:5151 prompt_cache_ratio:0.47902910815586347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 +DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10895895957946777 s +INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.11090588569641113 s +DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=249652190936004082800217767480564630748, time:1750767847.9178398s req_ids:[8] +DEBUG 06-24 20:24:07 [manager.py:391] +ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:194.7028636932373ms total_cost_time:194.7472095489502ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10754 prompt_cache_len:5151 prompt_cache_ratio:0.47898456388320626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 +DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10929250717163086 s +INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.11130595207214355 s +DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=221781913911829846660381239241377729184, time:1750767848.1203308s req_ids:[8] +DEBUG 06-24 20:24:08 [manager.py:391] +ERROR 06-24 20:24:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:205.70755004882812ms total_cost_time:205.75332641601562ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10755 prompt_cache_len:5151 prompt_cache_ratio:0.4789400278940028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 +DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10740208625793457 s +INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.10946083068847656 s +DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=302761909522084610481286261636426979422, time:1750767848.3341181s req_ids:[8] +DEBUG 06-24 20:24:08 [manager.py:391] +ERROR 06-24 20:24:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:209.04994010925293ms total_cost_time:209.09428596496582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10756 prompt_cache_len:5151 prompt_cache_ratio:0.47889550018594274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 +DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s +INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.11092185974121094 s +DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=89202504642758512683915851653559567091, time:1750767848.5532866s req_ids:[8] +DEBUG 06-24 20:24:08 [manager.py:391] +ERROR 06-24 20:24:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:370.56446075439453ms total_cost_time:370.6066608428955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10757 prompt_cache_len:5151 prompt_cache_ratio:0.47885098075671656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 +DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:08 [batch.py:51] router release req id 8 +INFO 06-24 20:24:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s +INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.10974907875061035 s +DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=289694805972595820869536730231962615714, time:1750767848.9226048s req_ids:[8] +DEBUG 06-24 20:24:08 [manager.py:391] +ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:204.80799674987793ms total_cost_time:204.8506736755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10758 prompt_cache_len:5151 prompt_cache_ratio:0.4788064696040156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 +DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.1088418960571289 s +INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11078190803527832 s +DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=293912928956826575103246580631463832972, time:1750767849.134654s req_ids:[8] +DEBUG 06-24 20:24:09 [manager.py:391] +ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:206.44164085388184ms total_cost_time:206.4833641052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10759 prompt_cache_len:5151 prompt_cache_ratio:0.4787619667255321 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 +DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.10816764831542969 s +INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11033034324645996 s +DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=212794437148878723669159633300920283150, time:1750767849.3490028s req_ids:[8] +DEBUG 06-24 20:24:09 [manager.py:391] +ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:234.96198654174805ms total_cost_time:235.00633239746094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10760 prompt_cache_len:5151 prompt_cache_ratio:0.4787174721189591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 +DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.10879993438720703 s +INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11084747314453125 s +DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=109239641811708365810478842460697069849, time:1750767849.5909824s req_ids:[8] +DEBUG 06-24 20:24:09 [manager.py:391] +ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:208.17112922668457ms total_cost_time:208.21261405944824ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10761 prompt_cache_len:5151 prompt_cache_ratio:0.4786729857819905 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 +DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.10876655578613281 s +INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11078906059265137 s +DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=98552917317254311449898763242008320623, time:1750767849.8021007s req_ids:[8] +DEBUG 06-24 20:24:09 [manager.py:391] +ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:205.33418655395508ms total_cost_time:205.37662506103516ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10762 prompt_cache_len:5151 prompt_cache_ratio:0.4786285077123211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 +DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10750269889831543 s +INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.10943102836608887 s +DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=127564470252352346717876962623455585077, time:1750767850.0156348s req_ids:[8] +DEBUG 06-24 20:24:10 [manager.py:391] +ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:209.6536159515381ms total_cost_time:209.69867706298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10763 prompt_cache_len:5151 prompt_cache_ratio:0.4785840379076466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 +DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10892701148986816 s +INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.11088204383850098 s +DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=150868658036208617431732372825691779449, time:1750767850.2288513s req_ids:[8] +DEBUG 06-24 20:24:10 [manager.py:391] +ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:199.51152801513672ms total_cost_time:199.5561122894287ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10764 prompt_cache_len:5151 prompt_cache_ratio:0.4785395763656633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 +DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10897326469421387 s +INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.1100163459777832 s +DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=71380276209538384830328682670042530080, time:1750767850.436105s req_ids:[8] +DEBUG 06-24 20:24:10 [manager.py:391] +ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:353.6221981048584ms total_cost_time:353.6550998687744ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:10765 prompt_cache_len:5151 prompt_cache_ratio:0.4784951230840687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 +DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.11044502258300781 s +INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.11248135566711426 s +DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=75211316245926906009737088829900074981, time:1750767850.7941175s req_ids:[8] +DEBUG 06-24 20:24:10 [manager.py:391] +ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:202.8036117553711ms total_cost_time:202.84605026245117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10766 prompt_cache_len:5151 prompt_cache_ratio:0.478450678060561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 +DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s +INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.11000227928161621 s +DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=168022682579162153315700332558665629202, time:1750767851.0038307s req_ids:[8] +DEBUG 06-24 20:24:11 [manager.py:391] +DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:208.07123184204102ms total_cost_time:208.1143856048584ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10767 prompt_cache_len:5151 prompt_cache_ratio:0.4784062412928392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 +DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s +INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.11089801788330078 s +DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=126051779508853090757710073925539046807, time:1750767851.2241144s req_ids:[8] +DEBUG 06-24 20:24:11 [manager.py:391] +ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:213.58489990234375ms total_cost_time:213.61231803894043ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:10768 prompt_cache_len:5151 prompt_cache_ratio:0.4783618127786033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 +DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10929489135742188 s +INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.1110990047454834 s +DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=290568228657800051681832406438492987728, time:1750767851.4583247s req_ids:[8] +DEBUG 06-24 20:24:11 [manager.py:391] +ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:226.86028480529785ms total_cost_time:226.90320014953613ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10769 prompt_cache_len:5151 prompt_cache_ratio:0.4783173925155539 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 +DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10819077491760254 s +INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s +DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=188288922335868629308890191897948687337, time:1750767851.6718009s req_ids:[8] +DEBUG 06-24 20:24:11 [manager.py:391] +ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:207.83400535583496ms total_cost_time:207.87644386291504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10770 prompt_cache_len:5151 prompt_cache_ratio:0.4782729805013928 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 +DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10868310928344727 s +INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s +DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=42682203722834391342788395589040948005, time:1750767851.8861687s req_ids:[8] +DEBUG 06-24 20:24:11 [manager.py:391] +ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:194.88120079040527ms total_cost_time:194.92244720458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10771 prompt_cache_len:5151 prompt_cache_ratio:0.4782285767338223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 +DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10906195640563965 s +INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.11097097396850586 s +DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=194133092976506195659437451908033849594, time:1750767852.086288s req_ids:[8] +DEBUG 06-24 20:24:12 [manager.py:391] +ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:202.56447792053223ms total_cost_time:202.6052474975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10772 prompt_cache_len:5151 prompt_cache_ratio:0.47818418121054584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 +DEBUG 06-24 20:24:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10802173614501953 s +INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.11005139350891113 s +DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=172318334408079033269481663472478622004, time:1750767852.297389s req_ids:[8] +DEBUG 06-24 20:24:12 [manager.py:391] +ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:209.05041694641113ms total_cost_time:209.0930938720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10773 prompt_cache_len:5151 prompt_cache_ratio:0.4781397939292676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 +DEBUG 06-24 20:24:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10735440254211426 s +INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.10921335220336914 s +DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=129358916107991694452287585063631588375, time:1750767852.5187638s req_ids:[8] +DEBUG 06-24 20:24:12 [manager.py:391] +ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:24:12 [stats.py:37] Avg tokens(prompt+generate) throughput: 45365.636 tokens/s +DEBUG 06-24 20:24:12 [stats.py:37] Avg prompt tokens throughput: 45357.102 tokens/s +DEBUG 06-24 20:24:12 [stats.py:37] Avg generate tokens throughput: 8.534 tokens/s +INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:371.4449405670166ms total_cost_time:371.4883327484131ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10774 prompt_cache_len:5151 prompt_cache_ratio:0.4780954148876926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 +DEBUG 06-24 20:24:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10786890983581543 s +INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.1098935604095459 s +DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=209086027277713716773403183486383613773, time:1750767852.8877873s req_ids:[8] +DEBUG 06-24 20:24:12 [manager.py:391] +ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:215.61050415039062ms total_cost_time:215.64030647277832ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:10775 prompt_cache_len:5151 prompt_cache_ratio:0.47805104408352667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 +DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10780024528503418 s +INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.10969948768615723 s +DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=63163234909618854643246430838169750122, time:1750767853.108974s req_ids:[8] +DEBUG 06-24 20:24:13 [manager.py:391] +ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:205.3239345550537ms total_cost_time:205.3661346435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10776 prompt_cache_len:5151 prompt_cache_ratio:0.4780066815144766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 +DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10835409164428711 s +INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.11039900779724121 s +DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=182144463427977873176742853175542331291, time:1750767853.3218887s req_ids:[8] +DEBUG 06-24 20:24:13 [manager.py:391] +ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:207.7767848968506ms total_cost_time:207.81636238098145ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:10777 prompt_cache_len:5151 prompt_cache_ratio:0.47796232717825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 +DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.1089780330657959 s +INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.11080503463745117 s +DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=200079961560476587568593564455483042450, time:1750767853.535132s req_ids:[8] +DEBUG 06-24 20:24:13 [manager.py:391] +ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:184.2961311340332ms total_cost_time:184.33713912963867ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10778 prompt_cache_len:5151 prompt_cache_ratio:0.47791798107255523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 +DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10849308967590332 s +INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.11051130294799805 s +DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=78306049111434984175527622770959907871, time:1750767853.7255378s req_ids:[8] +DEBUG 06-24 20:24:13 [manager.py:391] +ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:201.8263339996338ms total_cost_time:201.8873691558838ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10779 prompt_cache_len:5151 prompt_cache_ratio:0.47787364319510156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 +DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10805559158325195 s +INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.10995292663574219 s +DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=98419043005070836349699499373112244255, time:1750767853.9336083s req_ids:[8] +DEBUG 06-24 20:24:13 [manager.py:391] +ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:224.86233711242676ms total_cost_time:224.90715980529785ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10780 prompt_cache_len:5151 prompt_cache_ratio:0.47782931354359925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 +DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.10914802551269531 s +INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.1110680103302002 s +DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=71978612241389346520688122590416603505, time:1750767854.1627066s req_ids:[8] +DEBUG 06-24 20:24:14 [manager.py:391] +ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:201.45845413208008ms total_cost_time:201.50208473205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10781 prompt_cache_len:5151 prompt_cache_ratio:0.4777849921157592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 +DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.1086421012878418 s +INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.11053013801574707 s +DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=48498053969596050254934332781112936155, time:1750767854.3718393s req_ids:[8] +DEBUG 06-24 20:24:14 [manager.py:391] +ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:372.7872371673584ms total_cost_time:372.8322982788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10782 prompt_cache_len:5151 prompt_cache_ratio:0.4777406789092933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 +DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.10810112953186035 s +INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.1100468635559082 s +DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=166070689113236393782053105208444626906, time:1750767854.751016s req_ids:[8] +DEBUG 06-24 20:24:14 [manager.py:391] +ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:205.60145378112793ms total_cost_time:205.64532279968262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10783 prompt_cache_len:5151 prompt_cache_ratio:0.4776963739219141 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 +DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.10870075225830078 s +INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.11064267158508301 s +DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=142801626985577905746739838597226807366, time:1750767854.9640563s req_ids:[8] +DEBUG 06-24 20:24:14 [manager.py:391] +ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:215.37184715270996ms total_cost_time:215.41619300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10784 prompt_cache_len:5151 prompt_cache_ratio:0.4776520771513353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 +DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s +INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022472381591797 s +DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=264430456725722859580643647089455610414, time:1750767855.1835248s req_ids:[8] +DEBUG 06-24 20:24:15 [manager.py:391] +ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:199.48792457580566ms total_cost_time:199.53036308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10785 prompt_cache_len:5151 prompt_cache_ratio:0.4776077885952712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 +DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10924196243286133 s +INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11119627952575684 s +DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=62600360220748935168774393988508228985, time:1750767855.3931115s req_ids:[8] +DEBUG 06-24 20:24:15 [manager.py:391] +ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:202.95238494873047ms total_cost_time:202.98075675964355ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:10786 prompt_cache_len:5151 prompt_cache_ratio:0.47756350825143706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 +DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10815954208374023 s +INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11017441749572754 s +DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=9818516937104409440033434300834038600, time:1750767855.601111s req_ids:[8] +DEBUG 06-24 20:24:15 [manager.py:391] +ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:206.97450637817383ms total_cost_time:207.017183303833ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10787 prompt_cache_len:5151 prompt_cache_ratio:0.47751923611754893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 +DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10911917686462402 s +INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11114931106567383 s +DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=227450569402351813193579353115699229835, time:1750767855.8142185s req_ids:[8] +DEBUG 06-24 20:24:15 [manager.py:391] +ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:208.82534980773926ms total_cost_time:208.86969566345215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10788 prompt_cache_len:5151 prompt_cache_ratio:0.4774749721913237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 +DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10760331153869629 s +INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.10959053039550781 s +DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=189305704757693236948657209878271655758, time:1750767856.0288367s req_ids:[8] +DEBUG 06-24 20:24:16 [manager.py:391] +ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:205.36494255065918ms total_cost_time:205.40881156921387ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10789 prompt_cache_len:5151 prompt_cache_ratio:0.4774307164704792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 +DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10852360725402832 s +INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.11055135726928711 s +DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=97795670632794732542871006523381587464, time:1750767856.2398174s req_ids:[8] +DEBUG 06-24 20:24:16 [manager.py:391] +ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:385.1635456085205ms total_cost_time:385.2086067199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10790 prompt_cache_len:5151 prompt_cache_ratio:0.477386468952734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 +DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10811853408813477 s +INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.11014723777770996 s +DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=245601044069298272555660850083387609869, time:1750767856.6311982s req_ids:[8] +DEBUG 06-24 20:24:16 [manager.py:391] +ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:206.85887336730957ms total_cost_time:206.90083503723145ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10791 prompt_cache_len:5151 prompt_cache_ratio:0.47734222963580764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 +DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10914969444274902 s +INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.11104822158813477 s +DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=208715796897808203111431737026342476467, time:1750767856.8446205s req_ids:[8] +DEBUG 06-24 20:24:16 [manager.py:391] +DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:199.44477081298828ms total_cost_time:199.48983192443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10792 prompt_cache_len:5151 prompt_cache_ratio:0.47729799851742033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 +DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:16 [batch.py:51] router release req id 8 +INFO 06-24 20:24:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10918974876403809 s +INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.111083984375 s +DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=234866872768250299188643969761781890590, time:1750767857.0524092s req_ids:[8] +DEBUG 06-24 20:24:17 [manager.py:391] +ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:207.40747451782227ms total_cost_time:207.43608474731445ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:10793 prompt_cache_len:5151 prompt_cache_ratio:0.47725377559529325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 +DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10675525665283203 s +INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.10866498947143555 s +DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=18986666977046008601585727637914488933, time:1750767857.265152s req_ids:[8] +DEBUG 06-24 20:24:17 [manager.py:391] +ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:207.75175094604492ms total_cost_time:207.794189453125ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10794 prompt_cache_len:5151 prompt_cache_ratio:0.47720956086714844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 +DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10979485511779785 s +INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.11211514472961426 s +DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=115280758120090967896910607412664957338, time:1750767857.4821784s req_ids:[8] +DEBUG 06-24 20:24:17 [manager.py:391] +ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:211.4264965057373ms total_cost_time:211.4698886871338ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10795 prompt_cache_len:5151 prompt_cache_ratio:0.47716535433070867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 +DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10953760147094727 s +INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.11128616333007812 s +DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=68331389803577880124722455639779393750, time:1750767857.7135818s req_ids:[8] +DEBUG 06-24 20:24:17 [manager.py:391] +ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:220.9789752960205ms total_cost_time:221.0237979888916ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10796 prompt_cache_len:5151 prompt_cache_ratio:0.47712115598369764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 +DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10817432403564453 s +INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.11014366149902344 s +DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=259004581662586880751021761513001114818, time:1750767857.9310448s req_ids:[8] +DEBUG 06-24 20:24:17 [manager.py:391] +ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:216.38154983520508ms total_cost_time:216.42494201660156ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10797 prompt_cache_len:5151 prompt_cache_ratio:0.47707696582383996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 +DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:18 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s +INFO 06-24 20:24:18 [manager.py:68] detokenization recv req id 8 cost time 0.11003661155700684 s +DEBUG 06-24 20:24:18 [manager.py:391] Prefill Batch: batch_id=270475521476875308147399873503183429687, time:1750767858.161151s req_ids:[8] +DEBUG 06-24 20:24:18 [manager.py:391] +ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:416.7647361755371ms total_cost_time:416.8078899383545ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10798 prompt_cache_len:5151 prompt_cache_ratio:0.4770327838488609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 +DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:18 [manager.py:224] router recive req id 8 cost time 0.1099541187286377 s +INFO 06-24 20:24:18 [manager.py:68] detokenization recv req id 8 cost time 0.11181950569152832 s +DEBUG 06-24 20:24:18 [manager.py:391] Prefill Batch: batch_id=224147780121581683835399099229732185137, time:1750767858.570062s req_ids:[8] +DEBUG 06-24 20:24:18 [manager.py:391] +ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:204.58221435546875ms total_cost_time:204.62608337402344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10799 prompt_cache_len:5151 prompt_cache_ratio:0.47698861005648674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 +DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:18 [manager.py:224] router recive req id 8 cost time 0.10919642448425293 s +INFO 06-24 20:24:18 [manager.py:68] detokenization recv req id 8 cost time 0.11118865013122559 s +DEBUG 06-24 20:24:18 [manager.py:391] Prefill Batch: batch_id=291544573839569231486880046926706173409, time:1750767858.7961416s req_ids:[8] +DEBUG 06-24 20:24:18 [manager.py:391] +ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:223.49834442138672ms total_cost_time:223.5417366027832ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10800 prompt_cache_len:5151 prompt_cache_ratio:0.47694444444444445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 +DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10874152183532715 s +INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.1107320785522461 s +DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=126882741137505825170664382651963786491, time:1750767859.034263s req_ids:[8] +DEBUG 06-24 20:24:19 [manager.py:391] +ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:230.8969497680664ms total_cost_time:230.9410572052002ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10801 prompt_cache_len:5151 prompt_cache_ratio:0.476900287010462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 +DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10783863067626953 s +INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.1097707748413086 s +DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=33274191379613403005663937931994357244, time:1750767859.2522402s req_ids:[8] +DEBUG 06-24 20:24:19 [manager.py:391] +ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:209.30790901184082ms total_cost_time:209.3510627746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10802 prompt_cache_len:5151 prompt_cache_ratio:0.4768561377522681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 +DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:19 [batch.py:51] router release req id 8 +INFO 06-24 20:24:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10868239402770996 s +INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.11055159568786621 s +DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=110838222292944246721338427152658399573, time:1750767859.4667668s req_ids:[8] +DEBUG 06-24 20:24:19 [manager.py:391] +ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:206.70247077941895ms total_cost_time:206.74514770507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10803 prompt_cache_len:5151 prompt_cache_ratio:0.47681199666759233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 +DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10910487174987793 s +INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.1110539436340332 s +DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=73995933859631482736998456554585832709, time:1750767859.6807106s req_ids:[8] +DEBUG 06-24 20:24:19 [manager.py:391] +ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:206.10594749450684ms total_cost_time:206.16722106933594ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:10804 prompt_cache_len:5151 prompt_cache_ratio:0.47676786375416513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 +DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10785245895385742 s +INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.10981202125549316 s +DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=269082745501199068373247816502917595734, time:1750767859.8946364s req_ids:[8] +DEBUG 06-24 20:24:19 [manager.py:391] +ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:223.7532138824463ms total_cost_time:223.79493713378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10805 prompt_cache_len:5151 prompt_cache_ratio:0.4767237390097177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 +DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.31000757217407227 s +INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.3120899200439453 s +DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=128583745450943816864648631344109646878, time:1750767860.3265762s req_ids:[8] +DEBUG 06-24 20:24:20 [manager.py:391] +ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:422.09768295288086ms total_cost_time:422.13964462280273ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10806 prompt_cache_len:5151 prompt_cache_ratio:0.47667962243198225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 +DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.1086423397064209 s +INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.11054801940917969 s +DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=131178656667762733696958885419669019068, time:1750767860.5643415s req_ids:[8] +DEBUG 06-24 20:24:20 [manager.py:391] +ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:214.53619003295898ms total_cost_time:214.57886695861816ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10807 prompt_cache_len:5151 prompt_cache_ratio:0.4766355140186916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 +DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.10837078094482422 s +INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.11042451858520508 s +DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=124617747324273846603562059079430451410, time:1750767860.7769513s req_ids:[8] +DEBUG 06-24 20:24:20 [manager.py:391] +ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:201.93123817443848ms total_cost_time:201.97319984436035ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10808 prompt_cache_len:5151 prompt_cache_ratio:0.4765914137675796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 +DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.10834479331970215 s +INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.1102609634399414 s +DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=186971792895048784046568185742548046328, time:1750767860.9850655s req_ids:[8] +DEBUG 06-24 20:24:20 [manager.py:391] +ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:200.1030445098877ms total_cost_time:200.14476776123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10809 prompt_cache_len:5151 prompt_cache_ratio:0.4765473216763808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 +DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:21 [manager.py:224] router recive req id 8 cost time 0.10957145690917969 s +INFO 06-24 20:24:21 [manager.py:68] detokenization recv req id 8 cost time 0.11165475845336914 s +DEBUG 06-24 20:24:21 [manager.py:391] Prefill Batch: batch_id=181522422479894113185170750578680887712, time:1750767861.1932395s req_ids:[8] +DEBUG 06-24 20:24:21 [manager.py:391] +ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:202.08168029785156ms total_cost_time:202.12459564208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10810 prompt_cache_len:5151 prompt_cache_ratio:0.4765032377428307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 +DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:21 [manager.py:224] router recive req id 8 cost time 0.10766005516052246 s +INFO 06-24 20:24:21 [manager.py:68] detokenization recv req id 8 cost time 0.10960054397583008 s +DEBUG 06-24 20:24:21 [manager.py:391] Prefill Batch: batch_id=297937917307387728715662755789434559943, time:1750767861.4026966s req_ids:[8] +DEBUG 06-24 20:24:21 [manager.py:391] +ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:190.96922874450684ms total_cost_time:191.01333618164062ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10811 prompt_cache_len:5151 prompt_cache_ratio:0.47645916196466565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 +DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:21 [manager.py:224] router recive req id 8 cost time 0.10881233215332031 s +INFO 06-24 20:24:21 [manager.py:68] detokenization recv req id 8 cost time 0.11065316200256348 s +DEBUG 06-24 20:24:21 [manager.py:391] Prefill Batch: batch_id=236401982470537470583050246191566533518, time:1750767861.6034505s req_ids:[8] +DEBUG 06-24 20:24:21 [manager.py:391] +ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:198.31109046936035ms total_cost_time:198.35472106933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10812 prompt_cache_len:5151 prompt_cache_ratio:0.47641509433962265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 +DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.3103361129760742 s +INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.3123133182525635 s +DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=136728510666566027234889955202915071360, time:1750767862.0228534s req_ids:[8] +DEBUG 06-24 20:24:22 [manager.py:391] +ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:426.8004894256592ms total_cost_time:426.84412002563477ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10813 prompt_cache_len:5151 prompt_cache_ratio:0.47637103486543975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 +DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.1089932918548584 s +INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.11092209815979004 s +DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=2662429270266680719154682225776745589, time:1750767862.2457292s req_ids:[8] +DEBUG 06-24 20:24:22 [manager.py:391] +ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:206.21681213378906ms total_cost_time:206.26091957092285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10814 prompt_cache_len:5151 prompt_cache_ratio:0.47632698353985575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 +DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.1079854965209961 s +INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.10985994338989258 s +DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=164783899033060553524355598594921154399, time:1750767862.4712558s req_ids:[8] +DEBUG 06-24 20:24:22 [manager.py:391] +ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:219.87175941467285ms total_cost_time:219.91634368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10815 prompt_cache_len:5151 prompt_cache_ratio:0.47628294036061025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 +DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.10911345481872559 s +INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.1111457347869873 s +DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=24853920658371752522957514594811717838, time:1750767862.6859393s req_ids:[8] +DEBUG 06-24 20:24:22 [manager.py:391] +ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:187.76917457580566ms total_cost_time:187.81208992004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10816 prompt_cache_len:5151 prompt_cache_ratio:0.4762389053254438 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 +DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.10830020904541016 s +INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.1102604866027832 s +DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=208016824970155666301884642583086207468, time:1750767862.8779793s req_ids:[8] +DEBUG 06-24 20:24:22 [manager.py:391] +DEBUG 06-24 20:24:22 [stats.py:37] Avg tokens(prompt+generate) throughput: 45915.352 tokens/s +DEBUG 06-24 20:24:22 [stats.py:37] Avg prompt tokens throughput: 45906.947 tokens/s +DEBUG 06-24 20:24:22 [stats.py:37] Avg generate tokens throughput: 8.406 tokens/s +ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:199.46599006652832ms total_cost_time:199.50628280639648ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10817 prompt_cache_len:5151 prompt_cache_ratio:0.4761948784320976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 +DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10837912559509277 s +INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.11026453971862793 s +DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=64780947764510240840013128748340278325, time:1750767863.0952823s req_ids:[8] +DEBUG 06-24 20:24:23 [manager.py:391] +ERROR 06-24 20:24:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:225.15082359313965ms total_cost_time:225.19636154174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10818 prompt_cache_len:5151 prompt_cache_ratio:0.47615085967831394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 +INFO 06-24 20:24:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:24:23 [statics_utils.py:24] mean first cost: 228.4070945117157 ms +INFO 06-24 20:24:23 [statics_utils.py:24] mean per token cost: 0.06619702225697116 ms +DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10828232765197754 s +INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s +DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=201399538075378348725515446597072728738, time:1750767863.3216166s req_ids:[8] +DEBUG 06-24 20:24:23 [manager.py:391] +ERROR 06-24 20:24:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 first_token_cost:210.47711372375488ms total_cost_time:210.51859855651855ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10819 prompt_cache_len:5151 prompt_cache_ratio:0.47610684906183565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 +DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10920262336730957 s +INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.11110401153564453 s +DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=323737998160608752689168661959055636562, time:1750767863.5333922s req_ids:[8] +DEBUG 06-24 20:24:23 [manager.py:391] +ERROR 06-24 20:24:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 first_token_cost:377.899169921875ms total_cost_time:377.9451847076416ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10820 prompt_cache_len:5151 prompt_cache_ratio:0.47606284658040665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 +DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10791707038879395 s +INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.10990691184997559 s +DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=247958635171693639557141313031594123404, time:1750767863.9181013s req_ids:[8] +DEBUG 06-24 20:24:23 [manager.py:391] +ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 first_token_cost:206.82406425476074ms total_cost_time:206.8498134613037ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:10821 prompt_cache_len:5151 prompt_cache_ratio:0.47601885223177154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 +DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10807561874389648 s +INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11005020141601562 s +DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=254421710127020056069583577876363249980, time:1750767864.1325543s req_ids:[8] +DEBUG 06-24 20:24:24 [manager.py:391] +ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:196.9776153564453ms total_cost_time:197.0210075378418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10822 prompt_cache_len:5151 prompt_cache_ratio:0.47597486601367583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 +DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10811567306518555 s +INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.10999608039855957 s +DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=76938493226455344663435973903319391108, time:1750767864.3362625s req_ids:[8] +DEBUG 06-24 20:24:24 [manager.py:391] +ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:198.12774658203125ms total_cost_time:198.17018508911133ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10823 prompt_cache_len:5151 prompt_cache_ratio:0.47593088792386584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 +DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10819172859191895 s +INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11019611358642578 s +DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=268485964219194106055190182159183929100, time:1750767864.540481s req_ids:[8] +DEBUG 06-24 20:24:24 [manager.py:391] +ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:210.16478538513184ms total_cost_time:210.21175384521484ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:10824 prompt_cache_len:5151 prompt_cache_ratio:0.4758869179600887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 +DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10902976989746094 s +INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11102294921875 s +DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=135887280350873173955036710447765478091, time:1750767864.76122s req_ids:[8] +DEBUG 06-24 20:24:24 [manager.py:391] +ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:205.72972297668457ms total_cost_time:205.77311515808105ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10825 prompt_cache_len:5151 prompt_cache_ratio:0.47584295612009236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 +DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s +INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11053133010864258 s +DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=154818544692671508774787500468294520812, time:1750767864.973172s req_ids:[8] +DEBUG 06-24 20:24:24 [manager.py:391] +ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:206.52246475219727ms total_cost_time:206.56847953796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10826 prompt_cache_len:5151 prompt_cache_ratio:0.4757990024016257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 +DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:25 [manager.py:224] router recive req id 8 cost time 0.10811471939086914 s +INFO 06-24 20:24:25 [manager.py:68] detokenization recv req id 8 cost time 0.1102144718170166 s +DEBUG 06-24 20:24:25 [manager.py:391] Prefill Batch: batch_id=326911286291237827358936726961541192141, time:1750767865.1901429s req_ids:[8] +DEBUG 06-24 20:24:25 [manager.py:391] +ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:380.7556629180908ms total_cost_time:380.8012008666992ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10827 prompt_cache_len:5151 prompt_cache_ratio:0.47575505680243835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 +DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:25 [manager.py:224] router recive req id 8 cost time 0.10895633697509766 s +INFO 06-24 20:24:25 [manager.py:68] detokenization recv req id 8 cost time 0.11104869842529297 s +DEBUG 06-24 20:24:25 [manager.py:391] Prefill Batch: batch_id=337412359032131072961262888199619662754, time:1750767865.5777507s req_ids:[8] +DEBUG 06-24 20:24:25 [manager.py:391] +ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:202.87060737609863ms total_cost_time:202.91376113891602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10828 prompt_cache_len:5151 prompt_cache_ratio:0.47571111932028076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 +DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:25 [manager.py:224] router recive req id 8 cost time 0.10910940170288086 s +INFO 06-24 20:24:25 [manager.py:68] detokenization recv req id 8 cost time 0.11105942726135254 s +DEBUG 06-24 20:24:25 [manager.py:391] Prefill Batch: batch_id=319063083188676789548815247138352182551, time:1750767865.7810936s req_ids:[8] +DEBUG 06-24 20:24:25 [manager.py:391] +DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:219.32172775268555ms total_cost_time:219.36631202697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10829 prompt_cache_len:5151 prompt_cache_ratio:0.47566718995290425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 +DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.1095271110534668 s +INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.11162400245666504 s +DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=40358148500700225298516758828592415216, time:1750767866.0184412s req_ids:[8] +DEBUG 06-24 20:24:26 [manager.py:391] +ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:217.69380569458008ms total_cost_time:217.73648262023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10830 prompt_cache_len:5151 prompt_cache_ratio:0.47562326869806093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 +DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10947203636169434 s +INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.1115105152130127 s +DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=299282416069555826910865675271940206953, time:1750767866.2331536s req_ids:[8] +DEBUG 06-24 20:24:26 [manager.py:391] +ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:207.1092128753662ms total_cost_time:207.1549892425537ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10831 prompt_cache_len:5151 prompt_cache_ratio:0.47557935555350384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 +DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10925984382629395 s +INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.11108970642089844 s +DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=262306183436909091257918624539846342954, time:1750767866.448401s req_ids:[8] +DEBUG 06-24 20:24:26 [manager.py:391] +ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:205.37114143371582ms total_cost_time:205.4150104522705ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10832 prompt_cache_len:5151 prompt_cache_ratio:0.4755354505169867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 +DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10962820053100586 s +INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.11157536506652832 s +DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=311413846545641472282743657615750024213, time:1750767866.6608236s req_ids:[8] +DEBUG 06-24 20:24:26 [manager.py:391] +ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:205.17420768737793ms total_cost_time:205.22022247314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10833 prompt_cache_len:5151 prompt_cache_ratio:0.4754915535862642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 +DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10821533203125 s +INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.10998201370239258 s +DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=231355119758628361943229308906396102166, time:1750767866.8702745s req_ids:[8] +DEBUG 06-24 20:24:26 [manager.py:391] +ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:367.57349967956543ms total_cost_time:367.61927604675293ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10834 prompt_cache_len:5151 prompt_cache_ratio:0.4754476647590917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 +DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10865116119384766 s +INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.11068916320800781 s +DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=228738129159180230158689076537900587386, time:1750767867.244832s req_ids:[8] +DEBUG 06-24 20:24:27 [manager.py:391] +ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:222.10192680358887ms total_cost_time:222.14651107788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10835 prompt_cache_len:5151 prompt_cache_ratio:0.47540378403322564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 +DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10954618453979492 s +INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.11162996292114258 s +DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=273688623905911706260616684668085505188, time:1750767867.4802883s req_ids:[8] +DEBUG 06-24 20:24:27 [manager.py:391] +ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:213.0577564239502ms total_cost_time:213.10186386108398ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10836 prompt_cache_len:5151 prompt_cache_ratio:0.47535991140642303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 +DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10900640487670898 s +INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.11101651191711426 s +DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=114264990221561923589977140161619552313, time:1750767867.694045s req_ids:[8] +DEBUG 06-24 20:24:27 [manager.py:391] +ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:217.07630157470703ms total_cost_time:217.1194553375244ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10837 prompt_cache_len:5151 prompt_cache_ratio:0.4753160468764418 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 +DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10773372650146484 s +INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.10971546173095703 s +DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=174939914628176378268606220059850549582, time:1750767867.9182785s req_ids:[8] +DEBUG 06-24 20:24:27 [manager.py:391] +ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:202.15868949890137ms total_cost_time:202.20398902893066ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10838 prompt_cache_len:5151 prompt_cache_ratio:0.4752721904410408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 +DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.10911011695861816 s +INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.11119365692138672 s +DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=306437313048454210550639792996461335026, time:1750767868.130565s req_ids:[8] +DEBUG 06-24 20:24:28 [manager.py:391] +ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:209.2735767364502ms total_cost_time:209.3188762664795ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10839 prompt_cache_len:5151 prompt_cache_ratio:0.47522834209797954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 +DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.1076655387878418 s +INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.10968327522277832 s +DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=220685543712029237566020074931051307607, time:1750767868.3420935s req_ids:[8] +DEBUG 06-24 20:24:28 [manager.py:391] +ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:185.62960624694824ms total_cost_time:185.67299842834473ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10840 prompt_cache_len:5151 prompt_cache_ratio:0.4751845018450184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 +DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.10771465301513672 s +INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.10969090461730957 s +DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=196195095218914494071318671488747480319, time:1750767868.5307157s req_ids:[8] +DEBUG 06-24 20:24:28 [manager.py:391] +ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:356.75716400146484ms total_cost_time:356.80246353149414ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10841 prompt_cache_len:5151 prompt_cache_ratio:0.4751406696799188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 +DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.10843276977539062 s +INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.11054396629333496 s +DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=335029670910414896917996479968678177303, time:1750767868.895319s req_ids:[8] +DEBUG 06-24 20:24:28 [manager.py:391] +ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:204.76818084716797ms total_cost_time:204.80918884277344ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10842 prompt_cache_len:5151 prompt_cache_ratio:0.47509684560044274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 +DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10820579528808594 s +INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11022067070007324 s +DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=87887203475549119739862138476102386856, time:1750767869.1084838s req_ids:[8] +DEBUG 06-24 20:24:29 [manager.py:391] +ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:206.146240234375ms total_cost_time:206.18867874145508ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10843 prompt_cache_len:5151 prompt_cache_ratio:0.475053029604353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 +DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10922098159790039 s +INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11134052276611328 s +DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=219388262463495088475327393191995932341, time:1750767869.3206773s req_ids:[8] +DEBUG 06-24 20:24:29 [manager.py:391] +ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:192.36469268798828ms total_cost_time:192.40951538085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10844 prompt_cache_len:5151 prompt_cache_ratio:0.4750092216894135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 +DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10803961753845215 s +INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.10990405082702637 s +DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=239073059555298522504648309710936953492, time:1750767869.517839s req_ids:[8] +DEBUG 06-24 20:24:29 [manager.py:391] +ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:198.3029842376709ms total_cost_time:198.34589958190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10845 prompt_cache_len:5151 prompt_cache_ratio:0.47496542185338864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 +DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10922908782958984 s +INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11121726036071777 s +DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=283292565900355797946275534999830850510, time:1750767869.724987s req_ids:[8] +DEBUG 06-24 20:24:29 [manager.py:391] +ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:209.37633514404297ms total_cost_time:209.42068099975586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10846 prompt_cache_len:5151 prompt_cache_ratio:0.47492163009404387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 +DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10853314399719238 s +INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11050009727478027 s +DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=300581467551556192100840381984206946016, time:1750767869.939804s req_ids:[8] +DEBUG 06-24 20:24:29 [manager.py:391] +ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:208.3871364593506ms total_cost_time:208.43029022216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10847 prompt_cache_len:5151 prompt_cache_ratio:0.4748778464091454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 +DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.10950374603271484 s +INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.11115026473999023 s +DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=75666362721861881167432266497485201020, time:1750767870.154767s req_ids:[8] +DEBUG 06-24 20:24:30 [manager.py:391] +ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:370.0287342071533ms total_cost_time:370.0721263885498ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10848 prompt_cache_len:5151 prompt_cache_ratio:0.4748340707964602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 +DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.10784745216369629 s +INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.10950183868408203 s +DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=228779083446625926579925303326570201982, time:1750767870.5300694s req_ids:[8] +DEBUG 06-24 20:24:30 [manager.py:391] +ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:206.76779747009277ms total_cost_time:206.80904388427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10849 prompt_cache_len:5151 prompt_cache_ratio:0.4747903032537561 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 +DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.10873889923095703 s +INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.11064743995666504 s +DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=299665707491752528389684804570363213050, time:1750767870.7438598s req_ids:[8] +DEBUG 06-24 20:24:30 [manager.py:391] +ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:207.3671817779541ms total_cost_time:207.4127197265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10850 prompt_cache_len:5151 prompt_cache_ratio:0.4747465437788018 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 +DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.1087653636932373 s +INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.11069345474243164 s +DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=138937758268697022350421193032769165751, time:1750767870.9585888s req_ids:[8] +DEBUG 06-24 20:24:30 [manager.py:391] +ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:210.51859855651855ms total_cost_time:210.56151390075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10851 prompt_cache_len:5151 prompt_cache_ratio:0.47470279236936685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 +DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.1081852912902832 s +INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.11017584800720215 s +DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=99505193865291110544526335879655372723, time:1750767871.171841s req_ids:[8] +DEBUG 06-24 20:24:31 [manager.py:391] +ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:207.3376178741455ms total_cost_time:207.3831558227539ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10852 prompt_cache_len:5151 prompt_cache_ratio:0.47465904902322154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 +DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.1076209545135498 s +INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.10957884788513184 s +DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=139318953999652032244206114862282420416, time:1750767871.392703s req_ids:[8] +DEBUG 06-24 20:24:31 [manager.py:391] +ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:214.34688568115234ms total_cost_time:214.38956260681152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10853 prompt_cache_len:5151 prompt_cache_ratio:0.4746153137381369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 +DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.10822153091430664 s +INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.11014938354492188 s +DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=59289135456067394338648058823749583710, time:1750767871.6070938s req_ids:[8] +DEBUG 06-24 20:24:31 [manager.py:391] +DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:206.2056064605713ms total_cost_time:206.24828338623047ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10854 prompt_cache_len:5151 prompt_cache_ratio:0.474571586511885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 +DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.10850119590759277 s +INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.11050224304199219 s +DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=74041607491872898807968152182603757038, time:1750767871.8200922s req_ids:[8] +DEBUG 06-24 20:24:31 [manager.py:391] +ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:396.71993255615234ms total_cost_time:396.76594734191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10855 prompt_cache_len:5151 prompt_cache_ratio:0.4745278673422386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 +DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s +INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.10970377922058105 s +DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=214618193198167544858507238909870046152, time:1750767872.2227414s req_ids:[8] +DEBUG 06-24 20:24:32 [manager.py:391] +ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:185.88733673095703ms total_cost_time:185.9285831451416ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10856 prompt_cache_len:5151 prompt_cache_ratio:0.47448415622697127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 +DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10811543464660645 s +INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.11002397537231445 s +DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=267221460617357159327128193690877951519, time:1750767872.4123871s req_ids:[8] +DEBUG 06-24 20:24:32 [manager.py:391] +ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:199.74851608276367ms total_cost_time:199.78976249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10857 prompt_cache_len:5151 prompt_cache_ratio:0.4744404531638574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 +DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10833239555358887 s +INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.11019396781921387 s +DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=160885142628664941763942458081259491150, time:1750767872.6202855s req_ids:[8] +DEBUG 06-24 20:24:32 [manager.py:391] +ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:200.8349895477295ms total_cost_time:200.87766647338867ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10858 prompt_cache_len:5151 prompt_cache_ratio:0.47439675815067234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 +DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10890507698059082 s +INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.11117053031921387 s +DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=89893478114435880745364933513180263699, time:1750767872.826025s req_ids:[8] +DEBUG 06-24 20:24:32 [manager.py:391] +ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:24:32 [stats.py:37] Avg tokens(prompt+generate) throughput: 45404.392 tokens/s +DEBUG 06-24 20:24:32 [stats.py:37] Avg prompt tokens throughput: 45395.916 tokens/s +DEBUG 06-24 20:24:32 [stats.py:37] Avg generate tokens throughput: 8.477 tokens/s +INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:195.48940658569336ms total_cost_time:195.52969932556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10859 prompt_cache_len:5151 prompt_cache_ratio:0.474353071185192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 +DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.10757255554199219 s +INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.10939407348632812 s +DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=68020185219981003421405515163188924086, time:1750767873.0274575s req_ids:[8] +DEBUG 06-24 20:24:33 [manager.py:391] +ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:194.37670707702637ms total_cost_time:194.41723823547363ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10860 prompt_cache_len:5151 prompt_cache_ratio:0.47430939226519336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 +DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:33 [batch.py:51] router release req id 8 +DEBUG 06-24 20:24:33 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:33 [manager.py:283] +DEBUG 06-24 20:24:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:33 [manager.py:284] +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.11083436012268066 s +INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.11270833015441895 s +DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=278280742067585235608230908389161684435, time:1750767873.229916s req_ids:[8] +DEBUG 06-24 20:24:33 [manager.py:391] +ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:203.9356231689453ms total_cost_time:203.9775848388672ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10861 prompt_cache_len:5151 prompt_cache_ratio:0.4742657213884541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 +DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.10883522033691406 s +INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.11086559295654297 s +DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=293444444034670357492184309375589593003, time:1750767873.4397569s req_ids:[8] +DEBUG 06-24 20:24:33 [manager.py:391] +ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:373.75426292419434ms total_cost_time:373.798131942749ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10862 prompt_cache_len:5151 prompt_cache_ratio:0.47422205855275273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 +DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.10762286186218262 s +INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.1094825267791748 s +DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=83792886780307532386538439711702016073, time:1750767873.8217402s req_ids:[8] +DEBUG 06-24 20:24:33 [manager.py:391] +ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:216.4895534515381ms total_cost_time:216.53175354003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10863 prompt_cache_len:5151 prompt_cache_ratio:0.47417840375586856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 +DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10820603370666504 s +INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.1101217269897461 s +DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=280553196755246944952367061962623130830, time:1750767874.0470817s req_ids:[8] +DEBUG 06-24 20:24:34 [manager.py:391] +ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:208.49227905273438ms total_cost_time:208.53710174560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10864 prompt_cache_len:5151 prompt_cache_ratio:0.4741347569955817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 +DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10782003402709961 s +INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.10973310470581055 s +DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=222644551111961605753970706817532874320, time:1750767874.2590318s req_ids:[8] +DEBUG 06-24 20:24:34 [manager.py:391] +ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:207.73005485534668ms total_cost_time:207.77583122253418ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10865 prompt_cache_len:5151 prompt_cache_ratio:0.47409111826967326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 +DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s +INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.11067724227905273 s +DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=36384078836160988772567403311637459425, time:1750767874.487443s req_ids:[8] +DEBUG 06-24 20:24:34 [manager.py:391] +ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:216.01057052612305ms total_cost_time:216.05324745178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10866 prompt_cache_len:5151 prompt_cache_ratio:0.4740474875759249 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 +DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10866141319274902 s +INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.11053347587585449 s +DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=170675586608628021412004808859431347869, time:1750767874.6986504s req_ids:[8] +DEBUG 06-24 20:24:34 [manager.py:391] +ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:200.91843605041504ms total_cost_time:200.9599208831787ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10867 prompt_cache_len:5151 prompt_cache_ratio:0.47400386491211927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 +DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10876798629760742 s +INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.11076784133911133 s +DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=80997773735846761352858698576908829805, time:1750767874.9066362s req_ids:[8] +DEBUG 06-24 20:24:34 [manager.py:391] +ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:198.96888732910156ms total_cost_time:199.01275634765625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10868 prompt_cache_len:5151 prompt_cache_ratio:0.4739602502760398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 +DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.10892057418823242 s +INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.11082148551940918 s +DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=230168416306254965909628278873836029935, time:1750767875.111541s req_ids:[8] +DEBUG 06-24 20:24:35 [manager.py:391] +ERROR 06-24 20:24:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:373.3396530151367ms total_cost_time:373.3832836151123ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10869 prompt_cache_len:5151 prompt_cache_ratio:0.4739166436654706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 +DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.10798788070678711 s +INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.10998272895812988 s +DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=5374375838922691930202817543991614742, time:1750767875.4946747s req_ids:[8] +DEBUG 06-24 20:24:35 [manager.py:391] +ERROR 06-24 20:24:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:207.04293251037598ms total_cost_time:207.08703994750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10870 prompt_cache_len:5151 prompt_cache_ratio:0.4738730450781969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 +DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.10774350166320801 s +INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.10958409309387207 s +DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=106502040168456818274107782591062261966, time:1750767875.7204733s req_ids:[8] +DEBUG 06-24 20:24:35 [manager.py:391] +ERROR 06-24 20:24:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:214.26987648010254ms total_cost_time:214.3115997314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10871 prompt_cache_len:5151 prompt_cache_ratio:0.4738294545120044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 +DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.1096189022064209 s +INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.11168193817138672 s +DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=119186931908005883974108318337670792288, time:1750767875.9322429s req_ids:[8] +DEBUG 06-24 20:24:35 [manager.py:391] +ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:206.44044876098633ms total_cost_time:206.4824104309082ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10872 prompt_cache_len:5151 prompt_cache_ratio:0.4737858719646799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 +DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.1089925765991211 s +INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.11110568046569824 s +DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=177435800410538846829024406610637805353, time:1750767876.145778s req_ids:[8] +DEBUG 06-24 20:24:36 [manager.py:391] +ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:208.4822654724121ms total_cost_time:208.5261344909668ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10873 prompt_cache_len:5151 prompt_cache_ratio:0.47374229743401086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 +DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.10831594467163086 s +INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.11030244827270508 s +DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=69923881956386399398367749383381656968, time:1750767876.359894s req_ids:[8] +DEBUG 06-24 20:24:36 [manager.py:391] +ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:209.08594131469727ms total_cost_time:209.12933349609375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10874 prompt_cache_len:5151 prompt_cache_ratio:0.4736987309177855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 +DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s +INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.1105501651763916 s +DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=107627588829166425275469481876762792898, time:1750767876.5763588s req_ids:[8] +DEBUG 06-24 20:24:36 [manager.py:391] +ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:204.56719398498535ms total_cost_time:204.60915565490723ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10875 prompt_cache_len:5151 prompt_cache_ratio:0.4736551724137931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 +DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.10785317420959473 s +INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s +DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=126200423720258727818610355713433679090, time:1750767876.7894304s req_ids:[8] +DEBUG 06-24 20:24:36 [manager.py:391] +ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:378.223180770874ms total_cost_time:378.2694339752197ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10876 prompt_cache_len:5151 prompt_cache_ratio:0.4736116219198235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 +DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.10834455490112305 s +INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102745532989502 s +DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=339971271615870211527931684935746623285, time:1750767877.1773806s req_ids:[8] +DEBUG 06-24 20:24:37 [manager.py:391] +ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:208.74881744384766ms total_cost_time:208.79459381103516ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10877 prompt_cache_len:5151 prompt_cache_ratio:0.47356807943366735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 +DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.10852956771850586 s +INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.1104886531829834 s +DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=92492642501727646258440637661620296617, time:1750767877.4000585s req_ids:[8] +DEBUG 06-24 20:24:37 [manager.py:391] +ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:217.02241897583008ms total_cost_time:217.06557273864746ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10878 prompt_cache_len:5151 prompt_cache_ratio:0.47352454495311636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 +DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.10815238952636719 s +INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.11017894744873047 s +DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=208590541246164892376004351661097941947, time:1750767877.614999s req_ids:[8] +DEBUG 06-24 20:24:37 [manager.py:391] +DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:209.45453643798828ms total_cost_time:209.50007438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10879 prompt_cache_len:5151 prompt_cache_ratio:0.47348101847596286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 +DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.1089475154876709 s +INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.11086273193359375 s +DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=305262356473397094457770243311911652873, time:1750767877.8309855s req_ids:[8] +DEBUG 06-24 20:24:37 [manager.py:391] +ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:229.77781295776367ms total_cost_time:229.82192039489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10880 prompt_cache_len:5151 prompt_cache_ratio:0.4734375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 +DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.10840272903442383 s +INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.11026120185852051 s +DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=282365897368069762220675826924039925104, time:1750767878.0698886s req_ids:[8] +DEBUG 06-24 20:24:38 [manager.py:391] +ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:203.38106155395508ms total_cost_time:203.42564582824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10881 prompt_cache_len:5151 prompt_cache_ratio:0.4733939895230218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 +DEBUG 06-24 20:24:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.10917949676513672 s +INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.11115241050720215 s +DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=259802676199441936582986270984547564185, time:1750767878.278576s req_ids:[8] +DEBUG 06-24 20:24:38 [manager.py:391] +ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:206.77757263183594ms total_cost_time:206.82406425476074ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10882 prompt_cache_len:5151 prompt_cache_ratio:0.473350487042823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 +DEBUG 06-24 20:24:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.1078791618347168 s +INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.10957884788513184 s +DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=34047204652206304886801678833756286288, time:1750767878.4930499s req_ids:[8] +DEBUG 06-24 20:24:38 [manager.py:391] +ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:369.3690299987793ms total_cost_time:369.4119453430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10883 prompt_cache_len:5151 prompt_cache_ratio:0.4733069925571993 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 +DEBUG 06-24 20:24:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.10839176177978516 s +INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.11031889915466309 s +DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=317120276315686861681276196595491231137, time:1750767878.8678834s req_ids:[8] +DEBUG 06-24 20:24:38 [manager.py:391] +ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:204.24914360046387ms total_cost_time:204.29205894470215ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10884 prompt_cache_len:5151 prompt_cache_ratio:0.47326350606394707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 +DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s +INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.1095733642578125 s +DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=33944030376262388927398925524128089636, time:1750767879.0941868s req_ids:[8] +DEBUG 06-24 20:24:39 [manager.py:391] +ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:218.2607650756836ms total_cost_time:218.30391883850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10885 prompt_cache_len:5151 prompt_cache_ratio:0.47322002756086357 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 +DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10925984382629395 s +INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.11127638816833496 s +DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=107262527788658022358999065285327695054, time:1750767879.3045833s req_ids:[8] +DEBUG 06-24 20:24:39 [manager.py:391] +ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:203.23991775512695ms total_cost_time:203.28164100646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10886 prompt_cache_len:5151 prompt_cache_ratio:0.47317655704574685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 +DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10846161842346191 s +INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.11035990715026855 s +DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=46081308819362227579002331584933992547, time:1750767879.5124154s req_ids:[8] +DEBUG 06-24 20:24:39 [manager.py:391] +ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:201.03716850280762ms total_cost_time:201.0817527770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10887 prompt_cache_len:5151 prompt_cache_ratio:0.4731330945163957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 +DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.1090705394744873 s +INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.11110377311706543 s +DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=10021133459217627545228506299245332245, time:1750767879.7217019s req_ids:[8] +DEBUG 06-24 20:24:39 [manager.py:391] +ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:208.01305770874023ms total_cost_time:208.0554962158203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10888 prompt_cache_len:5151 prompt_cache_ratio:0.4730896399706098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 +DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10734415054321289 s +INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.1093454360961914 s +DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=263822063955114068278851596442481232629, time:1750767879.9365194s req_ids:[8] +DEBUG 06-24 20:24:39 [manager.py:391] +ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:207.3495388031006ms total_cost_time:207.39245414733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10889 prompt_cache_len:5151 prompt_cache_ratio:0.47304619340618975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 +DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10822892189025879 s +INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.11028289794921875 s +DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=210280134501110067164155157742607221410, time:1750767880.1484609s req_ids:[8] +DEBUG 06-24 20:24:40 [manager.py:391] +ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:380.5241584777832ms total_cost_time:380.5663585662842ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10890 prompt_cache_len:5151 prompt_cache_ratio:0.4730027548209366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 +DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10809874534606934 s +INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.10922479629516602 s +DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=265352066313410183063206468012034204845, time:1750767880.5355566s req_ids:[8] +DEBUG 06-24 20:24:40 [manager.py:391] +ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:165.5733585357666ms total_cost_time:165.61603546142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10891 prompt_cache_len:5151 prompt_cache_ratio:0.47295932421265263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 +DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10839176177978516 s +INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.11030769348144531 s +DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=251456888557177258443813027067746786420, time:1750767880.7052724s req_ids:[8] +DEBUG 06-24 20:24:40 [manager.py:391] +ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:185.05501747131348ms total_cost_time:185.09721755981445ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10892 prompt_cache_len:5151 prompt_cache_ratio:0.47291590157914065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 +DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10866189002990723 s +INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.10973453521728516 s +DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=245914695606664708775576781268815400974, time:1750767880.9008198s req_ids:[8] +DEBUG 06-24 20:24:40 [manager.py:391] +ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:206.09450340270996ms total_cost_time:206.15243911743164ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:10893 prompt_cache_len:5151 prompt_cache_ratio:0.4728724869182043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 +DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.10891294479370117 s +INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.11079573631286621 s +DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=101887825261589304007208350160206693217, time:1750767881.1199906s req_ids:[8] +DEBUG 06-24 20:24:41 [manager.py:391] +ERROR 06-24 20:24:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:224.50494766235352ms total_cost_time:224.5476245880127ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10894 prompt_cache_len:5151 prompt_cache_ratio:0.47282908022764825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 +DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s +INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.11063098907470703 s +DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=214954898533342632061536902634519897295, time:1750767881.3558893s req_ids:[8] +DEBUG 06-24 20:24:41 [manager.py:391] +ERROR 06-24 20:24:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 first_token_cost:211.1659049987793ms total_cost_time:211.20762825012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10895 prompt_cache_len:5151 prompt_cache_ratio:0.47278568150527767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 +DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.10734391212463379 s +INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.10932683944702148 s +DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=218337513485535596845324685750473561560, time:1750767881.5618024s req_ids:[8] +DEBUG 06-24 20:24:41 [manager.py:391] +ERROR 06-24 20:24:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 first_token_cost:192.4586296081543ms total_cost_time:192.49963760375977ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10896 prompt_cache_len:5151 prompt_cache_ratio:0.4727422907488987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 +DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.1085808277130127 s +INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.11056733131408691 s +DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=10029732828477544196174222636642369191, time:1750767881.7576942s req_ids:[8] +DEBUG 06-24 20:24:41 [manager.py:391] +ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 first_token_cost:365.9493923187256ms total_cost_time:365.99135398864746ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10897 prompt_cache_len:5151 prompt_cache_ratio:0.47269890795631825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 +DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10866427421569824 s +INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11053729057312012 s +DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=89298714437943802111078223951949266152, time:1750767882.130663s req_ids:[8] +DEBUG 06-24 20:24:42 [manager.py:391] +ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:208.2347869873047ms total_cost_time:208.27746391296387ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10898 prompt_cache_len:5151 prompt_cache_ratio:0.4726555331253441 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 +DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10860943794250488 s +INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11050939559936523 s +DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=125287686484093639407802940085021454986, time:1750767882.3441849s req_ids:[8] +DEBUG 06-24 20:24:42 [manager.py:391] +ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:202.35157012939453ms total_cost_time:202.3937702178955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10899 prompt_cache_len:5151 prompt_cache_ratio:0.47261216625378477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 +DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10972309112548828 s +INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11174726486206055 s +DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=194015431259488164473908552060062006398, time:1750767882.5541115s req_ids:[8] +DEBUG 06-24 20:24:42 [manager.py:391] +ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:214.09916877746582ms total_cost_time:214.1401767730713ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10900 prompt_cache_len:5151 prompt_cache_ratio:0.47256880733944956 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 +DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10827279090881348 s +INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11017990112304688 s +DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=101445429387619800060986762653156020863, time:1750767882.786726s req_ids:[8] +DEBUG 06-24 20:24:42 [manager.py:391] +ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:212.62145042419434ms total_cost_time:212.66531944274902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10901 prompt_cache_len:5151 prompt_cache_ratio:0.4725254563801486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 +DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10784077644348145 s +INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.10982441902160645 s +DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=159820124881652022348939545354119478391, time:1750767882.9928703s req_ids:[8] +DEBUG 06-24 20:24:42 [manager.py:391] +DEBUG 06-24 20:24:42 [stats.py:37] Avg tokens(prompt+generate) throughput: 46392.159 tokens/s +DEBUG 06-24 20:24:42 [stats.py:37] Avg prompt tokens throughput: 46383.732 tokens/s +DEBUG 06-24 20:24:42 [stats.py:37] Avg generate tokens throughput: 8.427 tokens/s +ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:202.67033576965332ms total_cost_time:202.7122974395752ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10902 prompt_cache_len:5151 prompt_cache_ratio:0.4724821133736929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 +DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.1087350845336914 s +INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.1107933521270752 s +DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=272252779466097949260122015523550826908, time:1750767883.200577s req_ids:[8] +DEBUG 06-24 20:24:43 [manager.py:391] +ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:198.2250213623047ms total_cost_time:198.26698303222656ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10903 prompt_cache_len:5151 prompt_cache_ratio:0.47243877831789416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 +DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.10765767097473145 s +INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.10964107513427734 s +DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=169695719517349931775478722777873260303, time:1750767883.4069605s req_ids:[8] +DEBUG 06-24 20:24:43 [manager.py:391] +DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:370.1894283294678ms total_cost_time:370.23210525512695ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10904 prompt_cache_len:5151 prompt_cache_ratio:0.4723954512105649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 +DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s +INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.11008429527282715 s +DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=51330402607780362524125506241512606780, time:1750767883.788702s req_ids:[8] +DEBUG 06-24 20:24:43 [manager.py:391] +ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:210.27612686157227ms total_cost_time:210.31594276428223ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10905 prompt_cache_len:5151 prompt_cache_ratio:0.4723521320495186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 +DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.10840129852294922 s +INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.11025094985961914 s +DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=212466844149246405288824435608906560238, time:1750767883.999424s req_ids:[8] +DEBUG 06-24 20:24:43 [manager.py:391] +ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:206.44402503967285ms total_cost_time:206.49027824401855ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10906 prompt_cache_len:5151 prompt_cache_ratio:0.47230882083256925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 +DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10775232315063477 s +INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.1096491813659668 s +DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=136852280205437360063724410282738171784, time:1750767884.238032s req_ids:[8] +DEBUG 06-24 20:24:44 [manager.py:391] +ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:240.92507362365723ms total_cost_time:240.9684658050537ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10907 prompt_cache_len:5151 prompt_cache_ratio:0.4722655175575319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 +DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10857486724853516 s +INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.1106255054473877 s +DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=229181597960853191363201697234373304377, time:1750767884.470303s req_ids:[8] +DEBUG 06-24 20:24:44 [manager.py:391] +ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:193.66693496704102ms total_cost_time:193.7112808227539ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10908 prompt_cache_len:5151 prompt_cache_ratio:0.4722222222222222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 +DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10897970199584961 s +INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.11080479621887207 s +DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=205035126968644651400140422837560906596, time:1750767884.66123s req_ids:[8] +DEBUG 06-24 20:24:44 [manager.py:391] +ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:203.91297340393066ms total_cost_time:203.95398139953613ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10909 prompt_cache_len:5151 prompt_cache_ratio:0.4721789348244569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 +DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10928225517272949 s +INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.11118388175964355 s +DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=82967006429085936762468577930320448417, time:1750767884.8732862s req_ids:[8] +DEBUG 06-24 20:24:44 [manager.py:391] +ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:221.62818908691406ms total_cost_time:221.67062759399414ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10910 prompt_cache_len:5151 prompt_cache_ratio:0.47213565536205315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 +DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10862040519714355 s +INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.11044979095458984 s +DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=315273219568503966902951962669287164399, time:1750767885.0999763s req_ids:[8] +DEBUG 06-24 20:24:45 [manager.py:391] +ERROR 06-24 20:24:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:367.6140308380127ms total_cost_time:367.6578998565674ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10911 prompt_cache_len:5151 prompt_cache_ratio:0.47209238383282925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 +DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10786843299865723 s +INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.10966992378234863 s +DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=106450795924799869861383999177691144656, time:1750767885.4955642s req_ids:[8] +DEBUG 06-24 20:24:45 [manager.py:391] +ERROR 06-24 20:24:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 first_token_cost:225.27790069580078ms total_cost_time:225.32081604003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10912 prompt_cache_len:5151 prompt_cache_ratio:0.4720491202346041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 +DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10841584205627441 s +INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.1104118824005127 s +DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=132659461931309042086297242559113777168, time:1750767885.7193222s req_ids:[8] +DEBUG 06-24 20:24:45 [manager.py:391] +ERROR 06-24 20:24:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 first_token_cost:229.84886169433594ms total_cost_time:229.89249229431152ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10913 prompt_cache_len:5151 prompt_cache_ratio:0.4720058645651975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 +DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10925698280334473 s +INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.11110305786132812 s +DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=41940932306153716515687513974792653497, time:1750767885.9501789s req_ids:[8] +DEBUG 06-24 20:24:45 [manager.py:391] +ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 first_token_cost:211.1225128173828ms total_cost_time:211.16256713867188ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:10914 prompt_cache_len:5151 prompt_cache_ratio:0.4719626168224299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 +DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.10851192474365234 s +INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.11046600341796875 s +DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=233605971989351610780515739303899514610, time:1750767886.161082s req_ids:[8] +DEBUG 06-24 20:24:46 [manager.py:391] +ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:205.3513526916504ms total_cost_time:205.39259910583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10915 prompt_cache_len:5151 prompt_cache_ratio:0.4719193770041228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 +DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.10812211036682129 s +INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.10992598533630371 s +DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=58231030304581054856252346791869147386, time:1750767886.3827894s req_ids:[8] +DEBUG 06-24 20:24:46 [manager.py:391] +ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:213.4850025177002ms total_cost_time:213.52791786193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10916 prompt_cache_len:5151 prompt_cache_ratio:0.4718761451080982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 +DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.10854196548461914 s +INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.11049127578735352 s +DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=11656777408027231745733726448285505640, time:1750767886.5932484s req_ids:[8] +DEBUG 06-24 20:24:46 [manager.py:391] +ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:210.16645431518555ms total_cost_time:210.2217674255371ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:10917 prompt_cache_len:5151 prompt_cache_ratio:0.4718329211321792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 +DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.1080315113067627 s +INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.11004114151000977 s +DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=100139812795096300803749440061667380995, time:1750767886.8087354s req_ids:[8] +DEBUG 06-24 20:24:46 [manager.py:391] +INFO 06-24 20:24:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:368.83020401000977ms total_cost_time:368.87216567993164ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10918 prompt_cache_len:5151 prompt_cache_ratio:0.47178970507418944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 +DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10795092582702637 s +INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.10996627807617188 s +DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=285788565624526851227342902177717252195, time:1750767887.1830673s req_ids:[8] +DEBUG 06-24 20:24:47 [manager.py:391] +ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:208.41693878173828ms total_cost_time:208.45890045166016ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10919 prompt_cache_len:5151 prompt_cache_ratio:0.4717464969319535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 +DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10846972465515137 s +INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.11055278778076172 s +DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=97698324848107576033834270346391985528, time:1750767887.3978343s req_ids:[8] +DEBUG 06-24 20:24:47 [manager.py:391] +ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:204.32162284851074ms total_cost_time:204.36358451843262ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10920 prompt_cache_len:5151 prompt_cache_ratio:0.4717032967032967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 +DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10847067832946777 s +INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.11063194274902344 s +DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=14140219593966169772838459958713913796, time:1750767887.6083336s req_ids:[8] +DEBUG 06-24 20:24:47 [manager.py:391] +ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:205.69634437561035ms total_cost_time:205.7361602783203ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10921 prompt_cache_len:5151 prompt_cache_ratio:0.4716601043860452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 +DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10739755630493164 s +INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.10937190055847168 s +DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=216412995334650003412381185075546777784, time:1750767887.8192883s req_ids:[8] +DEBUG 06-24 20:24:47 [manager.py:391] +ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:204.0078639984131ms total_cost_time:204.05220985412598ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10922 prompt_cache_len:5151 prompt_cache_ratio:0.471616919978026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 +DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.1085500717163086 s +INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.11059188842773438 s +DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=208946483766619121099759892243246796513, time:1750767888.0428193s req_ids:[8] +DEBUG 06-24 20:24:48 [manager.py:391] +ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:220.7636833190918ms total_cost_time:220.80683708190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10923 prompt_cache_len:5151 prompt_cache_ratio:0.47157374347706676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 +DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.10859084129333496 s +INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.11051416397094727 s +DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=87935912499322842865915877007782072511, time:1750767888.258504s req_ids:[8] +DEBUG 06-24 20:24:48 [manager.py:391] +ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:207.08727836608887ms total_cost_time:207.12924003601074ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10924 prompt_cache_len:5151 prompt_cache_ratio:0.471530574880996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 +DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.10897445678710938 s +INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.1109018325805664 s +DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=264784128148306195022092143807307220878, time:1750767888.4706998s req_ids:[8] +DEBUG 06-24 20:24:48 [manager.py:391] +ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:373.9173412322998ms total_cost_time:373.9604949951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10925 prompt_cache_len:5151 prompt_cache_ratio:0.471487414187643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 +DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.10450053215026855 s +INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.106414794921875 s +DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=229260501903678432970489491099494196926, time:1750767888.8507211s req_ids:[8] +DEBUG 06-24 20:24:48 [manager.py:391] +ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:208.4367275238037ms total_cost_time:208.4805965423584ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10926 prompt_cache_len:5151 prompt_cache_ratio:0.471444261394838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 +DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10819602012634277 s +INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11025381088256836 s +DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=220604183703092022419360267287316948963, time:1750767889.0725574s req_ids:[8] +DEBUG 06-24 20:24:49 [manager.py:391] +ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:228.8060188293457ms total_cost_time:228.84654998779297ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10927 prompt_cache_len:5151 prompt_cache_ratio:0.47140111650041183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 +DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10865187644958496 s +INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11059689521789551 s +DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=318720838009962767872372776613120418306, time:1750767889.3001342s req_ids:[8] +DEBUG 06-24 20:24:49 [manager.py:391] +ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:205.352783203125ms total_cost_time:205.39402961730957ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10928 prompt_cache_len:5151 prompt_cache_ratio:0.4713579795021962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 +DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10882830619812012 s +INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11098957061767578 s +DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=68855842015969667642684444504385199708, time:1750767889.511678s req_ids:[8] +DEBUG 06-24 20:24:49 [manager.py:391] +DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:207.0331573486328ms total_cost_time:207.0748805999756ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10929 prompt_cache_len:5151 prompt_cache_ratio:0.4713148503980236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 +DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10819530487060547 s +INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11022734642028809 s +DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=246159267438483442789715534316995109189, time:1750767889.7456493s req_ids:[8] +DEBUG 06-24 20:24:49 [manager.py:391] +ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:220.60227394104004ms total_cost_time:220.64495086669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10930 prompt_cache_len:5151 prompt_cache_ratio:0.47127172918572735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 +DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10861039161682129 s +INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11071324348449707 s +DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=164118021191299944436920485053693659340, time:1750767889.953331s req_ids:[8] +DEBUG 06-24 20:24:49 [manager.py:391] +ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:201.80559158325195ms total_cost_time:201.8454074859619ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10931 prompt_cache_len:5151 prompt_cache_ratio:0.4712286158631415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 +DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.10887384414672852 s +INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11072826385498047 s +DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=102928109006884212720348205073220444329, time:1750767890.1596918s req_ids:[8] +DEBUG 06-24 20:24:50 [manager.py:391] +ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:366.08290672302246ms total_cost_time:366.12606048583984ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10932 prompt_cache_len:5151 prompt_cache_ratio:0.471185510428101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 +DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.1084897518157959 s +INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11046099662780762 s +DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=19069971729744856425886734276091272120, time:1750767890.532725s req_ids:[8] +DEBUG 06-24 20:24:50 [manager.py:391] +ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:211.6093635559082ms total_cost_time:211.65013313293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10933 prompt_cache_len:5151 prompt_cache_ratio:0.47114241287844144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 +DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.1089169979095459 s +INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11091804504394531 s +DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=148679000241434196806065301444801950136, time:1750767890.7508476s req_ids:[8] +DEBUG 06-24 20:24:50 [manager.py:391] +ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:203.59206199645996ms total_cost_time:203.63521575927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10934 prompt_cache_len:5151 prompt_cache_ratio:0.47109932321199927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 +DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.10827875137329102 s +INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s +DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=105561534597595022551962044313357621383, time:1750767890.961276s req_ids:[8] +DEBUG 06-24 20:24:50 [manager.py:391] +ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:204.60128784179688ms total_cost_time:204.64539527893066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10935 prompt_cache_len:5151 prompt_cache_ratio:0.4710562414266118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 +DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10811114311218262 s +INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11023640632629395 s +DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=135692795865675350237709745462432514149, time:1750767891.1706388s req_ids:[8] +DEBUG 06-24 20:24:51 [manager.py:391] +ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:211.25006675720215ms total_cost_time:211.29322052001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10936 prompt_cache_len:5151 prompt_cache_ratio:0.47101316752011707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 +DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10840392112731934 s +INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11039328575134277 s +DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=32380444910491916643482462498259533801, time:1750767891.3868294s req_ids:[8] +DEBUG 06-24 20:24:51 [manager.py:391] +ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:211.5461826324463ms total_cost_time:211.58933639526367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10937 prompt_cache_len:5151 prompt_cache_ratio:0.47097010149035384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 +DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10880613327026367 s +INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11082148551940918 s +DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=68937887959954623273703679621972462857, time:1750767891.6181188s req_ids:[8] +DEBUG 06-24 20:24:51 [manager.py:391] +ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:220.68047523498535ms total_cost_time:220.72291374206543ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10938 prompt_cache_len:5151 prompt_cache_ratio:0.47092704333516183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 +DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10874199867248535 s +INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11058640480041504 s +DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=294588956503756199855085351343378663918, time:1750767891.832062s req_ids:[8] +DEBUG 06-24 20:24:51 [manager.py:391] +ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:377.41804122924805ms total_cost_time:377.4607181549072ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10939 prompt_cache_len:5151 prompt_cache_ratio:0.4708839930523814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 +DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10870814323425293 s +INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.11069846153259277 s +DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=201435220906460308546142884443913596009, time:1750767892.2160795s req_ids:[8] +DEBUG 06-24 20:24:52 [manager.py:391] +ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:207.98945426940918ms total_cost_time:208.09245109558105ms,out_token_counter:1 mean_per_token_cost_time: 0.102996826171875ms prompt_token_num:10940 prompt_cache_len:5151 prompt_cache_ratio:0.47084095063985376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 +DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10914111137390137 s +INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.1111905574798584 s +DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=116164649243462971563752933598663948326, time:1750767892.4299443s req_ids:[8] +DEBUG 06-24 20:24:52 [manager.py:391] +ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:211.5030288696289ms total_cost_time:211.54499053955078ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10941 prompt_cache_len:5151 prompt_cache_ratio:0.4707979160954209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 +DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10729479789733887 s +INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.10904979705810547 s +DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=91735492980976720210806838287820443763, time:1750767892.6713095s req_ids:[8] +DEBUG 06-24 20:24:52 [manager.py:391] +ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:223.03104400634766ms total_cost_time:223.07586669921875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10942 prompt_cache_len:5151 prompt_cache_ratio:0.4707548894169256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 +DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10902881622314453 s +INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.1109917163848877 s +DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=176785476877511775210970924633655974045, time:1750767892.8790798s req_ids:[8] +DEBUG 06-24 20:24:52 [manager.py:391] +ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:204.60939407348633ms total_cost_time:204.6496868133545ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10943 prompt_cache_len:5151 prompt_cache_ratio:0.4707118706022115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 +DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.10828566551208496 s +INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.1101381778717041 s +DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=171353771779874514667167471840540004477, time:1750767893.0931783s req_ids:[8] +DEBUG 06-24 20:24:53 [manager.py:391] +DEBUG 06-24 20:24:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 45432.913 tokens/s +DEBUG 06-24 20:24:53 [stats.py:37] Avg prompt tokens throughput: 45424.596 tokens/s +DEBUG 06-24 20:24:53 [stats.py:37] Avg generate tokens throughput: 8.317 tokens/s +ERROR 06-24 20:24:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:205.48677444458008ms total_cost_time:205.52825927734375ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10944 prompt_cache_len:5151 prompt_cache_ratio:0.4706688596491228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 +INFO 06-24 20:24:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:24:53 [statics_utils.py:24] mean first cost: 228.46504027549784 ms +INFO 06-24 20:24:53 [statics_utils.py:24] mean per token cost: 0.0657624344455944 ms +DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.1084754467010498 s +INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.11040163040161133 s +INFO 06-24 20:24:53 [manager.py:620] left req id 8can release False refcount 3 +DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=26652434400936807727026221652818171390, time:1750767893.3039105s req_ids:[8] +DEBUG 06-24 20:24:53 [manager.py:391] +ERROR 06-24 20:24:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 first_token_cost:209.181547164917ms total_cost_time:209.22517776489258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10945 prompt_cache_len:5151 prompt_cache_ratio:0.4706258565555048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 +DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.10866808891296387 s +INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.11049532890319824 s +DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=161599805650350540285922550843091259, time:1750767893.5191762s req_ids:[8] +DEBUG 06-24 20:24:53 [manager.py:391] +ERROR 06-24 20:24:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 first_token_cost:390.44833183288574ms total_cost_time:390.49315452575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10946 prompt_cache_len:5151 prompt_cache_ratio:0.47058286131920335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 +DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.10817718505859375 s +INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.11020755767822266 s +DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=239773195633103254504682491980729821090, time:1750767893.916511s req_ids:[8] +DEBUG 06-24 20:24:53 [manager.py:391] +ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 first_token_cost:209.3362808227539ms total_cost_time:209.3815803527832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10947 prompt_cache_len:5151 prompt_cache_ratio:0.47053987393806523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 +DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10760760307312012 s +INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.1096186637878418 s +DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=35663467200833528578369743059649874493, time:1750767894.1325288s req_ids:[8] +DEBUG 06-24 20:24:54 [manager.py:391] +ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:190.5364990234375ms total_cost_time:190.5820369720459ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10948 prompt_cache_len:5151 prompt_cache_ratio:0.4704968944099379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 +DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10857868194580078 s +INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11052370071411133 s +DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=300993446520370801341520454025337202270, time:1750767894.3283625s req_ids:[8] +DEBUG 06-24 20:24:54 [manager.py:391] +ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:194.69308853149414ms total_cost_time:194.73552703857422ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10949 prompt_cache_len:5151 prompt_cache_ratio:0.47045392273266967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 +DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10808134078979492 s +INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012125015258789 s +DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=41200956394980182714851784649436247696, time:1750767894.5304635s req_ids:[8] +DEBUG 06-24 20:24:54 [manager.py:391] +ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:205.66058158874512ms total_cost_time:205.69872856140137ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:10950 prompt_cache_len:5151 prompt_cache_ratio:0.4704109589041096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 +DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10917282104492188 s +INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11129140853881836 s +DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=93226806135687424030961946094031877936, time:1750767894.7577457s req_ids:[8] +DEBUG 06-24 20:24:54 [manager.py:391] +ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:220.02291679382324ms total_cost_time:220.06773948669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10951 prompt_cache_len:5151 prompt_cache_ratio:0.47036800292210756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 +DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10859441757202148 s +INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11052250862121582 s +DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=284450256605183148976424778323510617375, time:1750767894.9723022s req_ids:[8] +DEBUG 06-24 20:24:54 [manager.py:391] +ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:209.03420448303223ms total_cost_time:209.0754508972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10952 prompt_cache_len:5151 prompt_cache_ratio:0.47032505478451425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 +DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:55 [manager.py:224] router recive req id 8 cost time 0.10802817344665527 s +INFO 06-24 20:24:55 [manager.py:68] detokenization recv req id 8 cost time 0.10996294021606445 s +DEBUG 06-24 20:24:55 [manager.py:391] Prefill Batch: batch_id=140589585542792401983205584725270870725, time:1750767895.199469s req_ids:[8] +DEBUG 06-24 20:24:55 [manager.py:391] +ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:390.2087211608887ms total_cost_time:390.25187492370605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10953 prompt_cache_len:5151 prompt_cache_ratio:0.47028211448918106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 +DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:55 [manager.py:224] router recive req id 8 cost time 0.1087501049041748 s +INFO 06-24 20:24:55 [manager.py:68] detokenization recv req id 8 cost time 0.11075949668884277 s +DEBUG 06-24 20:24:55 [manager.py:391] Prefill Batch: batch_id=237743788539726318792599382159318905158, time:1750767895.583275s req_ids:[8] +DEBUG 06-24 20:24:55 [manager.py:391] +DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:205.86919784545898ms total_cost_time:205.91425895690918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10954 prompt_cache_len:5151 prompt_cache_ratio:0.4702391820339602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 +DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:55 [manager.py:224] router recive req id 8 cost time 0.10922002792358398 s +INFO 06-24 20:24:55 [manager.py:68] detokenization recv req id 8 cost time 0.11127829551696777 s +DEBUG 06-24 20:24:55 [manager.py:391] Prefill Batch: batch_id=182349176677916534389067219809131330255, time:1750767895.795664s req_ids:[8] +DEBUG 06-24 20:24:55 [manager.py:391] +ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:205.23667335510254ms total_cost_time:205.27982711791992ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10955 prompt_cache_len:5151 prompt_cache_ratio:0.4701962574167047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 +DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.1088104248046875 s +INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.11084842681884766 s +DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=92339679472969628116797528462646294274, time:1750767896.0085642s req_ids:[8] +DEBUG 06-24 20:24:56 [manager.py:391] +ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:222.320556640625ms total_cost_time:222.3665714263916ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10956 prompt_cache_len:5151 prompt_cache_ratio:0.47015334063526837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 +DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s +INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.10983538627624512 s +DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=198390400286481662728399578131965336993, time:1750767896.2364495s req_ids:[8] +DEBUG 06-24 20:24:56 [manager.py:391] +ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:208.24909210205078ms total_cost_time:208.29272270202637ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10957 prompt_cache_len:5151 prompt_cache_ratio:0.4701104316875057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 +DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10831904411315918 s +INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.11027884483337402 s +DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=207322269254523254893023325303703983713, time:1750767896.451326s req_ids:[8] +DEBUG 06-24 20:24:56 [manager.py:391] +ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:208.8165283203125ms total_cost_time:208.85992050170898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10958 prompt_cache_len:5151 prompt_cache_ratio:0.47006753057127215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 +DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10866928100585938 s +INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.11073923110961914 s +DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=246486173048261224009126858526853336847, time:1750767896.6664515s req_ids:[8] +DEBUG 06-24 20:24:56 [manager.py:391] +ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:208.4944248199463ms total_cost_time:208.53853225708008ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10959 prompt_cache_len:5151 prompt_cache_ratio:0.4700246372844238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 +DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10956621170043945 s +INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.1117401123046875 s +DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=191009822351454702493897905849106268357, time:1750767896.8803384s req_ids:[8] +DEBUG 06-24 20:24:56 [manager.py:391] +ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:373.00777435302734ms total_cost_time:373.05164337158203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10960 prompt_cache_len:5151 prompt_cache_ratio:0.4699817518248175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 +DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.1080784797668457 s +INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098487377166748 s +DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=1643617299687514190975862263622052154, time:1750767897.2585936s req_ids:[8] +DEBUG 06-24 20:24:57 [manager.py:391] +ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:207.5824737548828ms total_cost_time:207.61513710021973ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:10961 prompt_cache_len:5151 prompt_cache_ratio:0.4699388741903111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 +DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.10710334777832031 s +INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.10876941680908203 s +DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=125416550724025508630337956911477623945, time:1750767897.4721107s req_ids:[8] +DEBUG 06-24 20:24:57 [manager.py:391] +ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:208.44197273254395ms total_cost_time:208.48631858825684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10962 prompt_cache_len:5151 prompt_cache_ratio:0.469896004378763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 +DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.1078500747680664 s +INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.10970807075500488 s +DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=302507011890399032147690736652154968658, time:1750767897.6886485s req_ids:[8] +DEBUG 06-24 20:24:57 [manager.py:391] +ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:208.55379104614258ms total_cost_time:208.59766006469727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10963 prompt_cache_len:5151 prompt_cache_ratio:0.4698531423880325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 +DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.10861968994140625 s +INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.11066341400146484 s +DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=9134990359299375051154067743545609088, time:1750767897.9038312s req_ids:[8] +DEBUG 06-24 20:24:57 [manager.py:391] +ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:204.0257453918457ms total_cost_time:204.0688991546631ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10964 prompt_cache_len:5151 prompt_cache_ratio:0.4698102882159796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 +DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10914182662963867 s +INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.11113429069519043 s +DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=150727366386609507334011677771605134922, time:1750767898.1154895s req_ids:[8] +DEBUG 06-24 20:24:58 [manager.py:391] +ERROR 06-24 20:24:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:203.5074234008789ms total_cost_time:203.54938507080078ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10965 prompt_cache_len:5151 prompt_cache_ratio:0.4697674418604651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 +DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10922551155090332 s +INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.11120390892028809 s +DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=208945096398424598201240716281623350743, time:1750767898.3277435s req_ids:[8] +DEBUG 06-24 20:24:58 [manager.py:391] +ERROR 06-24 20:24:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:219.76995468139648ms total_cost_time:219.81215476989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10966 prompt_cache_len:5151 prompt_cache_ratio:0.46972460331935073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 +DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10775995254516602 s +INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.10959863662719727 s +DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=41783980007221942872971874837054524652, time:1750767898.5518513s req_ids:[8] +DEBUG 06-24 20:24:58 [manager.py:391] +ERROR 06-24 20:24:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:24:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:375.5145072937012ms total_cost_time:375.55885314941406ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10967 prompt_cache_len:5151 prompt_cache_ratio:0.46968177259049876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 +DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10774803161621094 s +INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s +DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=112061913371024427504242625445728366824, time:1750767898.9353757s req_ids:[8] +DEBUG 06-24 20:24:58 [manager.py:391] +ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:206.90059661865234ms total_cost_time:206.94255828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10968 prompt_cache_len:5151 prompt_cache_ratio:0.46963894967177244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 +DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10899996757507324 s +INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11074209213256836 s +DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=26394943478729843253093543010660655240, time:1750767899.1584845s req_ids:[8] +DEBUG 06-24 20:24:59 [manager.py:391] +ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:229.39252853393555ms total_cost_time:229.43568229675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10969 prompt_cache_len:5151 prompt_cache_ratio:0.46959613456103566 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 +DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10863995552062988 s +INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11060523986816406 s +DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=327490710558609932982459782384752831582, time:1750767899.383993s req_ids:[8] +DEBUG 06-24 20:24:59 [manager.py:391] +ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:205.27219772338867ms total_cost_time:205.31272888183594ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10970 prompt_cache_len:5151 prompt_cache_ratio:0.4695533272561531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 +DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s +INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11079931259155273 s +DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=299803539333341661093336690054214816510, time:1750767899.5986736s req_ids:[8] +DEBUG 06-24 20:24:59 [manager.py:391] +ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:202.96120643615723ms total_cost_time:203.0034065246582ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10971 prompt_cache_len:5151 prompt_cache_ratio:0.4695105277549904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 +DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10849213600158691 s +INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11058807373046875 s +DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=65341757955831360629614135361614667782, time:1750767899.810125s req_ids:[8] +DEBUG 06-24 20:24:59 [manager.py:391] +ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:204.33759689331055ms total_cost_time:204.38075065612793ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10972 prompt_cache_len:5151 prompt_cache_ratio:0.46946773605541375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 +DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:24:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.10849738121032715 s +INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11044692993164062 s +DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=132520184134146435247367707385271597786, time:1750767900.0213165s req_ids:[8] +DEBUG 06-24 20:25:00 [manager.py:391] +ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:207.17787742614746ms total_cost_time:207.22103118896484ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10973 prompt_cache_len:5151 prompt_cache_ratio:0.46942495215529023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 +DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.10801076889038086 s +INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11015439033508301 s +DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=125508405662339974468075661643086097510, time:1750767900.2417955s req_ids:[8] +DEBUG 06-24 20:25:00 [manager.py:391] +ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:387.3906135559082ms total_cost_time:387.434720993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10974 prompt_cache_len:5151 prompt_cache_ratio:0.4693821760524877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 +DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.10835576057434082 s +INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11040234565734863 s +DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=186726039870728504177576506634984368872, time:1750767900.6309497s req_ids:[8] +DEBUG 06-24 20:25:00 [manager.py:391] +ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:206.8312168121338ms total_cost_time:206.87341690063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10975 prompt_cache_len:5151 prompt_cache_ratio:0.4693394077448747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 +DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.1087336540222168 s +INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s +DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=24915973430362592166573701504421552533, time:1750767900.8476505s req_ids:[8] +DEBUG 06-24 20:25:00 [manager.py:391] +ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:208.52088928222656ms total_cost_time:208.56475830078125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10976 prompt_cache_len:5151 prompt_cache_ratio:0.4692966472303207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 +DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10948896408081055 s +INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.11144614219665527 s +DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=44386261259921118586040133650130393220, time:1750767901.0774035s req_ids:[8] +DEBUG 06-24 20:25:01 [manager.py:391] +ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:226.48358345031738ms total_cost_time:226.52602195739746ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10977 prompt_cache_len:5151 prompt_cache_ratio:0.4692538945066958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 +DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10762333869934082 s +INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s +DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=59637866277869091910743124309150793871, time:1750767901.297847s req_ids:[8] +DEBUG 06-24 20:25:01 [manager.py:391] +ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:202.8827667236328ms total_cost_time:202.9266357421875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10978 prompt_cache_len:5151 prompt_cache_ratio:0.469211149571871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 +DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10781240463256836 s +INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.10973000526428223 s +DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=162252473971949375278815576958788518130, time:1750767901.50635s req_ids:[8] +DEBUG 06-24 20:25:01 [manager.py:391] +DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:208.74619483947754ms total_cost_time:208.78958702087402ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10979 prompt_cache_len:5151 prompt_cache_ratio:0.469168412423718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 +DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10882711410522461 s +INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.11078834533691406 s +DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=179892210370198919613086942812545402799, time:1750767901.721192s req_ids:[8] +DEBUG 06-24 20:25:01 [manager.py:391] +ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:195.91569900512695ms total_cost_time:195.95909118652344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10980 prompt_cache_len:5151 prompt_cache_ratio:0.46912568306010927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 +DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.108612060546875 s +INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.11055612564086914 s +DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=223490053546818487648360716521758253475, time:1750767901.9234662s req_ids:[8] +DEBUG 06-24 20:25:01 [manager.py:391] +ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:367.02561378479004ms total_cost_time:367.0690059661865ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10981 prompt_cache_len:5151 prompt_cache_ratio:0.46908296147891815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 +DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10926270484924316 s +INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.11128449440002441 s +DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=176429076499462402004017456885856899520, time:1750767902.2992656s req_ids:[8] +DEBUG 06-24 20:25:02 [manager.py:391] +ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:205.21974563598633ms total_cost_time:205.2614688873291ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10982 prompt_cache_len:5151 prompt_cache_ratio:0.46904024767801855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 +DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10751652717590332 s +INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.10948038101196289 s +DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=212953398713861741680910848673347632066, time:1750767902.5192158s req_ids:[8] +DEBUG 06-24 20:25:02 [manager.py:391] +ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:211.65871620178223ms total_cost_time:211.7023468017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10983 prompt_cache_len:5151 prompt_cache_ratio:0.46899754165528545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 +DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10882115364074707 s +INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.11083841323852539 s +DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=186513123454523548710071360104218554888, time:1750767902.7329795s req_ids:[8] +DEBUG 06-24 20:25:02 [manager.py:391] +ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:208.01234245300293ms total_cost_time:208.05621147155762ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10984 prompt_cache_len:5151 prompt_cache_ratio:0.46895484340859434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 +DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10842680931091309 s +INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.11035442352294922 s +DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=203513209906631409851744046602084943175, time:1750767902.963063s req_ids:[8] +DEBUG 06-24 20:25:02 [manager.py:391] +ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:220.3836441040039ms total_cost_time:220.43967247009277ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:10985 prompt_cache_len:5151 prompt_cache_ratio:0.46891215293582156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 +DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.10895872116088867 s +INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.11109042167663574 s +DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=167347070558806449834273486071973947720, time:1750767903.1761827s req_ids:[8] +DEBUG 06-24 20:25:03 [manager.py:391] +DEBUG 06-24 20:25:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 45682.125 tokens/s +DEBUG 06-24 20:25:03 [stats.py:37] Avg prompt tokens throughput: 45673.795 tokens/s +DEBUG 06-24 20:25:03 [stats.py:37] Avg generate tokens throughput: 8.330 tokens/s +ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:205.6446075439453ms total_cost_time:205.7044506072998ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:10986 prompt_cache_len:5151 prompt_cache_ratio:0.4688694702348443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 +DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.10750818252563477 s +INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.10954999923706055 s +DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=38524974098975734154968948485018014751, time:1750767903.388591s req_ids:[8] +DEBUG 06-24 20:25:03 [manager.py:391] +ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:202.99053192138672ms total_cost_time:203.0327320098877ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10987 prompt_cache_len:5151 prompt_cache_ratio:0.46882679530354054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 +DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.2085108757019043 s +INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.2102367877960205 s +DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=310771573391976478561448508599262084015, time:1750767903.7340581s req_ids:[8] +DEBUG 06-24 20:25:03 [manager.py:391] +ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:342.49186515808105ms total_cost_time:342.53549575805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10988 prompt_cache_len:5151 prompt_cache_ratio:0.46878412813978887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 +DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.10930466651916504 s +INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.11124801635742188 s +DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=284015968686562445575221995188374653639, time:1750767903.9495783s req_ids:[8] +DEBUG 06-24 20:25:03 [manager.py:391] +ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:201.7674446105957ms total_cost_time:201.80988311767578ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10989 prompt_cache_len:5151 prompt_cache_ratio:0.4687414687414687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 +DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10774779319763184 s +INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.10946965217590332 s +DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=32008894832648051164680964201439440802, time:1750767904.1595325s req_ids:[8] +DEBUG 06-24 20:25:04 [manager.py:391] +ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:162.09101676940918ms total_cost_time:162.13393211364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10990 prompt_cache_len:5151 prompt_cache_ratio:0.46869881710646044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 +DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10866045951843262 s +INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s +DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=101473945456999423199478965036264695402, time:1750767904.3298225s req_ids:[8] +DEBUG 06-24 20:25:04 [manager.py:391] +ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:189.44168090820312ms total_cost_time:189.4848346710205ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10991 prompt_cache_len:5151 prompt_cache_ratio:0.4686561732326449 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 +DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10752391815185547 s +INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.10953330993652344 s +DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=239067842247763105509475293057826580218, time:1750767904.527247s req_ids:[8] +DEBUG 06-24 20:25:04 [manager.py:391] +ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:198.7326145172119ms total_cost_time:198.7764835357666ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10992 prompt_cache_len:5151 prompt_cache_ratio:0.46861353711790393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 +DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10812735557556152 s +INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.11020588874816895 s +DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=332050113503828533682898100832298004327, time:1750767904.73525s req_ids:[8] +DEBUG 06-24 20:25:04 [manager.py:391] +ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:206.79402351379395ms total_cost_time:206.83789253234863ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10993 prompt_cache_len:5151 prompt_cache_ratio:0.4685709087601201 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 +DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.1077578067779541 s +INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.10988259315490723 s +DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=290831358805764053386973951263269502628, time:1750767904.9486945s req_ids:[8] +DEBUG 06-24 20:25:04 [manager.py:391] +ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:203.80306243896484ms total_cost_time:203.84669303894043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10994 prompt_cache_len:5151 prompt_cache_ratio:0.4685282881571766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 +DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:05 [manager.py:224] router recive req id 8 cost time 0.10921978950500488 s +INFO 06-24 20:25:05 [manager.py:68] detokenization recv req id 8 cost time 0.1112368106842041 s +DEBUG 06-24 20:25:05 [manager.py:391] Prefill Batch: batch_id=246283051504272877428242662469774744072, time:1750767905.1610184s req_ids:[8] +DEBUG 06-24 20:25:05 [manager.py:391] +ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:204.7407627105713ms total_cost_time:204.7867774963379ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10995 prompt_cache_len:5151 prompt_cache_ratio:0.4684856753069577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 +DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:05 [manager.py:224] router recive req id 8 cost time 0.3110203742980957 s +INFO 06-24 20:25:05 [manager.py:68] detokenization recv req id 8 cost time 0.3131449222564697 s +DEBUG 06-24 20:25:05 [manager.py:391] Prefill Batch: batch_id=236199623741823595863242590982543956334, time:1750767905.586208s req_ids:[8] +DEBUG 06-24 20:25:05 [manager.py:391] +ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:426.6078472137451ms total_cost_time:426.6524314880371ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10996 prompt_cache_len:5151 prompt_cache_ratio:0.4684430702073481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 +DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:05 [manager.py:224] router recive req id 8 cost time 0.10963702201843262 s +INFO 06-24 20:25:05 [manager.py:68] detokenization recv req id 8 cost time 0.11136126518249512 s +DEBUG 06-24 20:25:05 [manager.py:391] Prefill Batch: batch_id=296991886895930450874436553332949911822, time:1750767905.8101702s req_ids:[8] +DEBUG 06-24 20:25:05 [manager.py:391] +ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:209.4876766204834ms total_cost_time:209.53035354614258ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10997 prompt_cache_len:5151 prompt_cache_ratio:0.46840047285623354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 +DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10854721069335938 s +INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11014699935913086 s +DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=315968899715823804983495787382043978343, time:1750767906.032475s req_ids:[8] +DEBUG 06-24 20:25:06 [manager.py:391] +ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:171.38123512268066ms total_cost_time:171.42462730407715ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10998 prompt_cache_len:5151 prompt_cache_ratio:0.4683578832515003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 +DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s +INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.10939502716064453 s +DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=32366584276683570796796148143078077241, time:1750767906.2055047s req_ids:[8] +DEBUG 06-24 20:25:06 [manager.py:391] +ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:202.5439739227295ms total_cost_time:202.58712768554688ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10999 prompt_cache_len:5151 prompt_cache_ratio:0.46831530139103555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 +DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10909199714660645 s +INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11112141609191895 s +DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=159689113489781304891688737690563233235, time:1750767906.4192116s req_ids:[8] +DEBUG 06-24 20:25:06 [manager.py:391] +ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:215.77763557434082ms total_cost_time:215.8198356628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11000 prompt_cache_len:5151 prompt_cache_ratio:0.4682727272727273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 +DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.1086890697479248 s +INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11051034927368164 s +DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=293444267596285824321227691670693505253, time:1750767906.6461842s req_ids:[8] +DEBUG 06-24 20:25:06 [manager.py:391] +ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:206.1319351196289ms total_cost_time:206.1760425567627ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11001 prompt_cache_len:5151 prompt_cache_ratio:0.4682301608944641 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 +DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10898375511169434 s +INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11067986488342285 s +DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=215899965806028378155742028925079266761, time:1750767906.8533325s req_ids:[8] +DEBUG 06-24 20:25:06 [manager.py:391] +ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:206.87294006347656ms total_cost_time:206.91752433776855ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11002 prompt_cache_len:5151 prompt_cache_ratio:0.46818760225413564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 +DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.1082611083984375 s +INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.11000418663024902 s +DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=141023548823535960373480117120129702161, time:1750767907.0671422s req_ids:[8] +DEBUG 06-24 20:25:07 [manager.py:391] +ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:354.72846031188965ms total_cost_time:354.7663688659668ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:11003 prompt_cache_len:5151 prompt_cache_ratio:0.4681450513496319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 +DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.10873866081237793 s +INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.11034965515136719 s +DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=13423068330213695900549575188583955766, time:1750767907.4289787s req_ids:[8] +DEBUG 06-24 20:25:07 [manager.py:391] +DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:203.64618301391602ms total_cost_time:203.690767288208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11004 prompt_cache_len:5151 prompt_cache_ratio:0.4681025081788441 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 +DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.10787296295166016 s +INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.10967397689819336 s +DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=301726762223398247403587400384164024815, time:1750767907.6419141s req_ids:[8] +DEBUG 06-24 20:25:07 [manager.py:391] +ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:210.51669120788574ms total_cost_time:210.57939529418945ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:11005 prompt_cache_len:5151 prompt_cache_ratio:0.46805997273966377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 +DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.10864424705505371 s +INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s +DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=166441483505318824770184638606147183388, time:1750767907.8575377s req_ids:[8] +DEBUG 06-24 20:25:07 [manager.py:391] +ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:202.0699977874756ms total_cost_time:202.12650299072266ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11006 prompt_cache_len:5151 prompt_cache_ratio:0.46801744502998366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 +DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.10848855972290039 s +INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.11011743545532227 s +DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=2355817118054547944584222565196555268, time:1750767908.0685067s req_ids:[8] +DEBUG 06-24 20:25:08 [manager.py:391] +ERROR 06-24 20:25:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:200.2277374267578ms total_cost_time:200.2713680267334ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11007 prompt_cache_len:5151 prompt_cache_ratio:0.46797492504769694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 +DEBUG 06-24 20:25:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.10778975486755371 s +INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.10967254638671875 s +DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=126171459759067692380093046443747151458, time:1750767908.2791915s req_ids:[8] +DEBUG 06-24 20:25:08 [manager.py:391] +ERROR 06-24 20:25:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 first_token_cost:208.7085247039795ms total_cost_time:208.7533473968506ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11008 prompt_cache_len:5151 prompt_cache_ratio:0.4679324127906977 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 +DEBUG 06-24 20:25:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.10936236381530762 s +INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.11108756065368652 s +DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=125602237716375221927895824662390007591, time:1750767908.4954042s req_ids:[8] +DEBUG 06-24 20:25:08 [manager.py:391] +ERROR 06-24 20:25:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 first_token_cost:221.47417068481445ms total_cost_time:221.51923179626465ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11009 prompt_cache_len:5151 prompt_cache_ratio:0.46788990825688076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 +DEBUG 06-24 20:25:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.30959033966064453 s +INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.3115084171295166 s +DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=265292549760178803862155251834285446603, time:1750767908.9287045s req_ids:[8] +DEBUG 06-24 20:25:08 [manager.py:391] +ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 first_token_cost:414.170503616333ms total_cost_time:414.2143726348877ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11010 prompt_cache_len:5151 prompt_cache_ratio:0.4678474114441417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 +DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10877442359924316 s +INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11062455177307129 s +DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=140794219055071588158487676512780845043, time:1750767909.1559234s req_ids:[8] +DEBUG 06-24 20:25:09 [manager.py:391] +ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:211.37285232543945ms total_cost_time:211.42244338989258ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:11011 prompt_cache_len:5151 prompt_cache_ratio:0.4678049223503769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 +DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10944294929504395 s +INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11141252517700195 s +DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=37054347876872359630693905365473309072, time:1750767909.3656437s req_ids:[8] +DEBUG 06-24 20:25:09 [manager.py:391] +ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:206.53414726257324ms total_cost_time:206.5865993499756ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:11012 prompt_cache_len:5151 prompt_cache_ratio:0.46776244097348346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 +DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10843896865844727 s +INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11033439636230469 s +DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=23044828172233357758090435000118500730, time:1750767909.5838363s req_ids:[8] +DEBUG 06-24 20:25:09 [manager.py:391] +ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:207.40246772766113ms total_cost_time:207.46231079101562ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:11013 prompt_cache_len:5151 prompt_cache_ratio:0.4677199673113593 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 +DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10840630531311035 s +INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11041784286499023 s +DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=127320018042921417183111771500204408536, time:1750767909.7965522s req_ids:[8] +DEBUG 06-24 20:25:09 [manager.py:391] +ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:204.6334743499756ms total_cost_time:204.69331741333008ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:11014 prompt_cache_len:5151 prompt_cache_ratio:0.46767750136190306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 +DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.1079099178314209 s +INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.10983037948608398 s +DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=150388860240897527475752073312404715211, time:1750767910.0073388s req_ids:[8] +DEBUG 06-24 20:25:10 [manager.py:391] +ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:203.31454277038574ms total_cost_time:203.3684253692627ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:11015 prompt_cache_len:5151 prompt_cache_ratio:0.46763504312301407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 +DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.10911846160888672 s +INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.11112523078918457 s +DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=71343971786385498784624178730287540414, time:1750767910.2221158s req_ids:[8] +DEBUG 06-24 20:25:10 [manager.py:391] +ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:373.46410751342773ms total_cost_time:373.50988388061523ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11016 prompt_cache_len:5151 prompt_cache_ratio:0.4675925925925926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 +DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.10854005813598633 s +INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.11055278778076172 s +DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=340071322014091706224156906589741976364, time:1750767910.600805s req_ids:[8] +DEBUG 06-24 20:25:10 [manager.py:391] +ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:199.73111152648926ms total_cost_time:199.77545738220215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11017 prompt_cache_len:5151 prompt_cache_ratio:0.46755014976853954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 +DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.10879087448120117 s +INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.11083006858825684 s +DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=170133167789734103335699522806250943683, time:1750767910.8093421s req_ids:[8] +DEBUG 06-24 20:25:10 [manager.py:391] +ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:200.87313652038574ms total_cost_time:200.91700553894043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11018 prompt_cache_len:5151 prompt_cache_ratio:0.46750771464875657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 +DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10863947868347168 s +INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.11050748825073242 s +DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=252096996960790285640134065687128052954, time:1750767911.0181422s req_ids:[8] +DEBUG 06-24 20:25:11 [manager.py:391] +ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:201.35903358459473ms total_cost_time:201.4024257659912ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11019 prompt_cache_len:5151 prompt_cache_ratio:0.4674652872311462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 +DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10793566703796387 s +INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.10989522933959961 s +DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=218576145005571795574538850280361506027, time:1750767911.2274828s req_ids:[8] +DEBUG 06-24 20:25:11 [manager.py:391] +ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:203.8097381591797ms total_cost_time:203.8710117340088ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:11020 prompt_cache_len:5151 prompt_cache_ratio:0.4674228675136116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 +DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10775423049926758 s +INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.10971641540527344 s +DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=17935944016900144544572488286861400602, time:1750767911.4394884s req_ids:[8] +DEBUG 06-24 20:25:11 [manager.py:391] +ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:203.8886547088623ms total_cost_time:203.94587516784668ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:11021 prompt_cache_len:5151 prompt_cache_ratio:0.4673804554940568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 +DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10902047157287598 s +INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.11106252670288086 s +DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=104053406936977228035362061297123958608, time:1750767911.6667821s req_ids:[8] +DEBUG 06-24 20:25:11 [manager.py:391] +ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:227.4765968322754ms total_cost_time:227.5223731994629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11022 prompt_cache_len:5151 prompt_cache_ratio:0.4673380511703865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 +DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10854411125183105 s +INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.110443115234375 s +DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=267124607333919216056254147109271017199, time:1750767911.8849857s req_ids:[8] +DEBUG 06-24 20:25:11 [manager.py:391] +ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:210.5708122253418ms total_cost_time:210.6163501739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11023 prompt_cache_len:5151 prompt_cache_ratio:0.4672956545405062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 +DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10791397094726562 s +INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.10988593101501465 s +DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=152399012636591430237987181666178272072, time:1750767912.1027646s req_ids:[8] +DEBUG 06-24 20:25:12 [manager.py:391] +ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:200.68717002868652ms total_cost_time:200.73223114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11024 prompt_cache_len:5151 prompt_cache_ratio:0.4672532656023222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 +DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10937929153442383 s +INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.11139297485351562 s +DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=147773605902685727343334701270613743133, time:1750767912.3132493s req_ids:[8] +DEBUG 06-24 20:25:12 [manager.py:391] +ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:366.1487102508545ms total_cost_time:366.1956787109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:11025 prompt_cache_len:5151 prompt_cache_ratio:0.4672108843537415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 +DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s +INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.11041712760925293 s +DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=22438155217631460827440440838400347236, time:1750767912.6847022s req_ids:[8] +DEBUG 06-24 20:25:12 [manager.py:391] +ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:199.61023330688477ms total_cost_time:199.65887069702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:11026 prompt_cache_len:5151 prompt_cache_ratio:0.46716851079267185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 +DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10959672927856445 s +DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=47895542750260608607403270965683851029, time:1750767912.8924437s req_ids:[8] +DEBUG 06-24 20:25:12 [manager.py:391] +INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.11156988143920898 s +ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:201.08723640441895ms total_cost_time:201.13158226013184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11027 prompt_cache_len:5151 prompt_cache_ratio:0.46712614491702187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 +DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10831165313720703 s +INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.11024093627929688 s +DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=291493530953703379327499073516200638955, time:1750767913.1026127s req_ids:[8] +DEBUG 06-24 20:25:13 [manager.py:391] +ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:163.53178024291992ms total_cost_time:163.59210014343262ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:11028 prompt_cache_len:5151 prompt_cache_ratio:0.46708378672470074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 +DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10914325714111328 s +INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.11087656021118164 s +DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=330774999887377105411399425737617302340, time:1750767913.2731836s req_ids:[8] +DEBUG 06-24 20:25:13 [manager.py:391] +DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:25:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 46888.836 tokens/s +DEBUG 06-24 20:25:13 [stats.py:37] Avg prompt tokens throughput: 46880.319 tokens/s +DEBUG 06-24 20:25:13 [stats.py:37] Avg generate tokens throughput: 8.517 tokens/s +ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:162.34087944030762ms total_cost_time:162.3859405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11029 prompt_cache_len:5151 prompt_cache_ratio:0.46704143621361865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 +DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10892438888549805 s +INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.11088752746582031 s +DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=96850845085649871216899343035006957172, time:1750767913.4444385s req_ids:[8] +DEBUG 06-24 20:25:13 [manager.py:391] +ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:199.75852966308594ms total_cost_time:199.80239868164062ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11030 prompt_cache_len:5151 prompt_cache_ratio:0.4669990933816863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 +DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10799050331115723 s +INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s +DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=311921249046831160339682210038473275200, time:1750767913.6532633s req_ids:[8] +DEBUG 06-24 20:25:13 [manager.py:391] +ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:207.62228965759277ms total_cost_time:207.66639709472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11031 prompt_cache_len:5151 prompt_cache_ratio:0.4669567582268153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 +DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10788750648498535 s +INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.10983824729919434 s +DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=174245706390833024609646349073037230517, time:1750767913.8684309s req_ids:[8] +DEBUG 06-24 20:25:13 [manager.py:391] +ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:381.45899772644043ms total_cost_time:381.5033435821533ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11032 prompt_cache_len:5151 prompt_cache_ratio:0.4669144307469181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 +DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.10913538932800293 s +INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.11106061935424805 s +DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=265928188380368634629488755356110149565, time:1750767914.258097s req_ids:[8] +DEBUG 06-24 20:25:14 [manager.py:391] +ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:202.5585174560547ms total_cost_time:202.60119438171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11033 prompt_cache_len:5151 prompt_cache_ratio:0.46687211093990755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 +DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.10926342010498047 s +INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s +DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=158436454127789123398916276811409387678, time:1750767914.4693985s req_ids:[8] +DEBUG 06-24 20:25:14 [manager.py:391] +ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:211.4732265472412ms total_cost_time:211.50588989257812ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:11034 prompt_cache_len:5151 prompt_cache_ratio:0.4668297988036977 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 +DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.1082925796508789 s +INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.11020755767822266 s +DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=98900805525056713815711249718806638451, time:1750767914.6879966s req_ids:[8] +DEBUG 06-24 20:25:14 [manager.py:391] +ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:204.64658737182617ms total_cost_time:204.69045639038086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11035 prompt_cache_len:5151 prompt_cache_ratio:0.466787494336203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 +DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.10917925834655762 s +INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.11108994483947754 s +DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=300692489463183932016449859238483987510, time:1750767914.8997097s req_ids:[8] +DEBUG 06-24 20:25:14 [manager.py:391] +ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:203.08613777160645ms total_cost_time:203.13096046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11036 prompt_cache_len:5151 prompt_cache_ratio:0.4667451975353389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 +DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.10804963111877441 s +INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.10994434356689453 s +DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=163049181786745697808295311451197836306, time:1750767915.1137922s req_ids:[8] +DEBUG 06-24 20:25:15 [manager.py:391] +ERROR 06-24 20:25:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:204.5729160308838ms total_cost_time:204.61726188659668ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11037 prompt_cache_len:5151 prompt_cache_ratio:0.46670290839902145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 +DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.10807108879089355 s +INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.10991740226745605 s +DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=320480809806303284898901144646976126681, time:1750767915.3226473s req_ids:[8] +DEBUG 06-24 20:25:15 [manager.py:391] +ERROR 06-24 20:25:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 first_token_cost:201.87854766845703ms total_cost_time:201.92313194274902ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11038 prompt_cache_len:5151 prompt_cache_ratio:0.4666606269251676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 +DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.1087639331817627 s +INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.11068940162658691 s +DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=69609281931315877409300297575156938131, time:1750767915.5422692s req_ids:[8] +DEBUG 06-24 20:25:15 [manager.py:391] +ERROR 06-24 20:25:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 first_token_cost:381.09540939331055ms total_cost_time:381.119966506958ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:11039 prompt_cache_len:5151 prompt_cache_ratio:0.4666183531116949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 +DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.10785245895385742 s +INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.10984444618225098 s +DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=339656269842694207095725495281503580912, time:1750767915.9255369s req_ids:[8] +DEBUG 06-24 20:25:15 [manager.py:391] +ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 first_token_cost:206.96425437927246ms total_cost_time:207.00764656066895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11040 prompt_cache_len:5151 prompt_cache_ratio:0.46657608695652175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 +DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.11191177368164062 s +INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11389040946960449 s +DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=115968914764257379043226683810409339471, time:1750767916.143424s req_ids:[8] +DEBUG 06-24 20:25:16 [manager.py:391] +ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:216.22943878173828ms total_cost_time:216.28069877624512ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:11041 prompt_cache_len:5151 prompt_cache_ratio:0.46653382845756725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 +DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.1082305908203125 s +INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11023640632629395 s +DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=6786558978228475158633591213856061927, time:1750767916.3636193s req_ids:[8] +DEBUG 06-24 20:25:16 [manager.py:391] +ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:203.9804458618164ms total_cost_time:204.0245532989502ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11042 prompt_cache_len:5151 prompt_cache_ratio:0.46649157761275134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 +DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.10851168632507324 s +INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11040592193603516 s +DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=189398100750898496546498054606022776907, time:1750767916.5765371s req_ids:[8] +DEBUG 06-24 20:25:16 [manager.py:391] +ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:199.55134391784668ms total_cost_time:199.5999813079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:11043 prompt_cache_len:5151 prompt_cache_ratio:0.4664493344199946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 +DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.10899019241333008 s +INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11087822914123535 s +DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=110350007444522521049195243340796643427, time:1750767916.7829885s req_ids:[8] +DEBUG 06-24 20:25:16 [manager.py:391] +ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:202.9271125793457ms total_cost_time:202.9581069946289ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:11044 prompt_cache_len:5151 prompt_cache_ratio:0.4664070988772184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 +DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.10925054550170898 s +INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s +DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=141790355407290772649623370791432942647, time:1750767916.9954743s req_ids:[8] +DEBUG 06-24 20:25:16 [manager.py:391] +INFO 06-24 20:25:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:208.8296413421631ms total_cost_time:208.86898040771484ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:11045 prompt_cache_len:5151 prompt_cache_ratio:0.46636487098234497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 +DEBUG 06-24 20:25:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:17 [manager.py:224] router recive req id 8 cost time 0.31320858001708984 s +INFO 06-24 20:25:17 [manager.py:68] detokenization recv req id 8 cost time 0.3151977062225342 s +DEBUG 06-24 20:25:17 [manager.py:391] Prefill Batch: batch_id=147970453522678204358955391066890193122, time:1750767917.426935s req_ids:[8] +DEBUG 06-24 20:25:17 [manager.py:391] +ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:427.7620315551758ms total_cost_time:427.7822971343994ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11046 prompt_cache_len:5151 prompt_cache_ratio:0.4663226507332971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 +DEBUG 06-24 20:25:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:17 [manager.py:224] router recive req id 8 cost time 0.10931539535522461 s +INFO 06-24 20:25:17 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s +DEBUG 06-24 20:25:17 [manager.py:391] Prefill Batch: batch_id=50351564120266722813972483734561850647, time:1750767917.6527383s req_ids:[8] +DEBUG 06-24 20:25:17 [manager.py:391] +ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:209.96642112731934ms total_cost_time:210.01172065734863ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11047 prompt_cache_len:5151 prompt_cache_ratio:0.4662804381279986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 +DEBUG 06-24 20:25:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:17 [manager.py:224] router recive req id 8 cost time 0.10875391960144043 s +INFO 06-24 20:25:17 [manager.py:68] detokenization recv req id 8 cost time 0.11075425148010254 s +DEBUG 06-24 20:25:17 [manager.py:391] Prefill Batch: batch_id=218812451139121876736373793069470096762, time:1750767917.8658092s req_ids:[8] +DEBUG 06-24 20:25:17 [manager.py:391] +ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:214.48612213134766ms total_cost_time:214.52903747558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11048 prompt_cache_len:5151 prompt_cache_ratio:0.4662382331643736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 +DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10839343070983887 s +INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.11015462875366211 s +DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=231073611885592840595329162406489065542, time:1750767918.1087198s req_ids:[8] +DEBUG 06-24 20:25:18 [manager.py:391] +ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:219.681978225708ms total_cost_time:219.7260856628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11049 prompt_cache_len:5151 prompt_cache_ratio:0.4661960358403475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 +DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10791540145874023 s +INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.10980510711669922 s +DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=109633651086520927716503125412899517118, time:1750767918.3140824s req_ids:[8] +DEBUG 06-24 20:25:18 [manager.py:391] +ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:197.79539108276367ms total_cost_time:197.83806800842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11050 prompt_cache_len:5151 prompt_cache_ratio:0.46615384615384614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 +DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10983920097351074 s +INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.11186075210571289 s +DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=247344740288634562219542175442343136680, time:1750767918.5208998s req_ids:[8] +DEBUG 06-24 20:25:18 [manager.py:391] +ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:196.6390609741211ms total_cost_time:196.6836452484131ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11051 prompt_cache_len:5151 prompt_cache_ratio:0.46611166410279614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 +DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10842418670654297 s +INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.11030912399291992 s +DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=157408501703582063740007155728725523775, time:1750767918.7263312s req_ids:[8] +DEBUG 06-24 20:25:18 [manager.py:391] +ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:198.87018203735352ms total_cost_time:198.9133358001709ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11052 prompt_cache_len:5151 prompt_cache_ratio:0.46606948968512485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 +DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.1090688705444336 s +INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.1109781265258789 s +DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=105829950909169031213261361866851910650, time:1750767918.9309134s req_ids:[8] +DEBUG 06-24 20:25:18 [manager.py:391] +ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:211.0307216644287ms total_cost_time:211.0753059387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11053 prompt_cache_len:5151 prompt_cache_ratio:0.46602732289876053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 +DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:19 [manager.py:224] router recive req id 8 cost time 0.310549259185791 s +INFO 06-24 20:25:19 [manager.py:68] detokenization recv req id 8 cost time 0.3125417232513428 s +DEBUG 06-24 20:25:19 [manager.py:391] Prefill Batch: batch_id=195421280554318518100734172537429406999, time:1750767919.3553078s req_ids:[8] +DEBUG 06-24 20:25:19 [manager.py:391] +ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:417.9105758666992ms total_cost_time:417.9539680480957ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11054 prompt_cache_len:5151 prompt_cache_ratio:0.465985163741632 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 +DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:19 [manager.py:224] router recive req id 8 cost time 0.10904431343078613 s +INFO 06-24 20:25:19 [manager.py:68] detokenization recv req id 8 cost time 0.1109614372253418 s +DEBUG 06-24 20:25:19 [manager.py:391] Prefill Batch: batch_id=62027748841707886598867492590792261745, time:1750767919.5777316s req_ids:[8] +DEBUG 06-24 20:25:19 [manager.py:391] +ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:209.30242538452148ms total_cost_time:209.34677124023438ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11055 prompt_cache_len:5151 prompt_cache_ratio:0.46594301221166895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 +DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:19 [manager.py:224] router recive req id 8 cost time 0.10767960548400879 s +INFO 06-24 20:25:19 [manager.py:68] detokenization recv req id 8 cost time 0.10970854759216309 s +DEBUG 06-24 20:25:19 [manager.py:391] Prefill Batch: batch_id=78624740772480355262160082468327271760, time:1750767919.7950985s req_ids:[8] +DEBUG 06-24 20:25:19 [manager.py:391] +ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:205.5981159210205ms total_cost_time:205.65223693847656ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:11056 prompt_cache_len:5151 prompt_cache_ratio:0.4659008683068017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 +DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:19 [batch.py:51] router release req id 8 +INFO 06-24 20:25:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.11041069030761719 s +INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.11235713958740234 s +DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=240187883049473306771525049756512655464, time:1750767920.0088003s req_ids:[8] +DEBUG 06-24 20:25:20 [manager.py:391] +ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:207.67903327941895ms total_cost_time:207.72147178649902ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11057 prompt_cache_len:5151 prompt_cache_ratio:0.4658587320249616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 +DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.1089324951171875 s +INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.11089801788330078 s +DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=57641011239707614968513979134791314606, time:1750767920.2235348s req_ids:[8] +DEBUG 06-24 20:25:20 [manager.py:391] +ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:204.20193672180176ms total_cost_time:204.2231559753418ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:11058 prompt_cache_len:5151 prompt_cache_ratio:0.4658166033640803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 +DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.10805296897888184 s +INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.10930562019348145 s +DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=185355541823603023135636544889825030700, time:1750767920.4424906s req_ids:[8] +DEBUG 06-24 20:25:20 [manager.py:391] +ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:217.46587753295898ms total_cost_time:217.50998497009277ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11059 prompt_cache_len:5151 prompt_cache_ratio:0.4657744823220906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 +DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.10768413543701172 s +INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s +DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=160945961856777328717147829912851632710, time:1750767920.6624548s req_ids:[8] +DEBUG 06-24 20:25:20 [manager.py:391] +ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:207.21840858459473ms total_cost_time:207.2601318359375ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11060 prompt_cache_len:5151 prompt_cache_ratio:0.4657323688969259 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 +DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.10838031768798828 s +INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.11020278930664062 s +DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=118473117702927182272277196891368324162, time:1750767920.8764567s req_ids:[8] +DEBUG 06-24 20:25:20 [manager.py:391] +ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:368.82901191711426ms total_cost_time:368.87216567993164ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11061 prompt_cache_len:5151 prompt_cache_ratio:0.46569026308652023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 +DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.10848546028137207 s +INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11053586006164551 s +DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=214513630186461970401251132675502210801, time:1750767921.2508235s req_ids:[8] +DEBUG 06-24 20:25:21 [manager.py:391] +ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:205.4421901702881ms total_cost_time:205.46746253967285ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11062 prompt_cache_len:5151 prompt_cache_ratio:0.46564816488880856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 +DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s +INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11089920997619629 s +DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=43784840294194501859005622296924316960, time:1750767921.4655201s req_ids:[8] +DEBUG 06-24 20:25:21 [manager.py:391] +ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:209.43593978881836ms total_cost_time:209.47813987731934ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11063 prompt_cache_len:5151 prompt_cache_ratio:0.46560607430172646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 +DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.1085207462310791 s +INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s +DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=24126909912162452126179353321195390044, time:1750767921.6831574s req_ids:[8] +DEBUG 06-24 20:25:21 [manager.py:391] +ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:210.4172706604004ms total_cost_time:210.4625701904297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11064 prompt_cache_len:5151 prompt_cache_ratio:0.4655639913232104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 +DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.10938763618469238 s +INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11146354675292969 s +DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=308812589413045109722906406993545244190, time:1750767921.8963435s req_ids:[8] +DEBUG 06-24 20:25:21 [manager.py:391] +ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:208.54926109313965ms total_cost_time:208.58478546142578ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:11065 prompt_cache_len:5151 prompt_cache_ratio:0.46552191595119746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 +DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.10896515846252441 s +INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089706420898438 s +DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=283255848241193956429740388411759440391, time:1750767922.1134622s req_ids:[8] +DEBUG 06-24 20:25:22 [manager.py:391] +DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:211.73977851867676ms total_cost_time:211.78197860717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11066 prompt_cache_len:5151 prompt_cache_ratio:0.46547984818362553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 +DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.10912871360778809 s +INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.11118650436401367 s +DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=270325970034754073609856668479527343402, time:1750767922.3298197s req_ids:[8] +DEBUG 06-24 20:25:22 [manager.py:391] +ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:204.91981506347656ms total_cost_time:204.94508743286133ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11067 prompt_cache_len:5151 prompt_cache_ratio:0.46543778801843316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 +DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.20946812629699707 s +INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.2110605239868164 s +DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=133469832019886332970065160001084835426, time:1750767922.675348s req_ids:[8] +DEBUG 06-24 20:25:22 [manager.py:391] +ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:328.2022476196289ms total_cost_time:328.2475471496582ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11068 prompt_cache_len:5151 prompt_cache_ratio:0.46539573545355983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 +DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.10953760147094727 s +INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.11147904396057129 s +DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=108235777082007959142269489808914289363, time:1750767922.880083s req_ids:[8] +DEBUG 06-24 20:25:22 [manager.py:391] +ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:214.19286727905273ms total_cost_time:214.22815322875977ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:11069 prompt_cache_len:5151 prompt_cache_ratio:0.4653536904869455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 +DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10860633850097656 s +INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11095690727233887 s +DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=190533959141344948774104734191331908886, time:1750767923.0962203s req_ids:[8] +DEBUG 06-24 20:25:23 [manager.py:391] +ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:210.64472198486328ms total_cost_time:210.68978309631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11070 prompt_cache_len:5151 prompt_cache_ratio:0.46531165311653117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 +INFO 06-24 20:25:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10838079452514648 s +INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11039376258850098 s +DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=62324914675680166012592345846195423792, time:1750767923.311654s req_ids:[8] +DEBUG 06-24 20:25:23 [manager.py:391] +DEBUG 06-24 20:25:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 46241.738 tokens/s +DEBUG 06-24 20:25:23 [stats.py:37] Avg prompt tokens throughput: 46233.370 tokens/s +DEBUG 06-24 20:25:23 [stats.py:37] Avg generate tokens throughput: 8.368 tokens/s +ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:206.21275901794434ms total_cost_time:206.25543594360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11071 prompt_cache_len:5151 prompt_cache_ratio:0.46526962334025834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 +DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:23 [batch.py:51] router release req id 8 +INFO 06-24 20:25:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10853147506713867 s +INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11052131652832031 s +DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=49218787953997543610336323362450078156, time:1750767923.5254507s req_ids:[8] +DEBUG 06-24 20:25:23 [manager.py:391] +ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:208.24027061462402ms total_cost_time:208.2836627960205ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11072 prompt_cache_len:5151 prompt_cache_ratio:0.46522760115606937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 +DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.11001729965209961 s +INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11201858520507812 s +DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=120041814334311103601898999032968909665, time:1750767923.7401342s req_ids:[8] +DEBUG 06-24 20:25:23 [manager.py:391] +ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:208.86874198913574ms total_cost_time:208.91404151916504ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11073 prompt_cache_len:5151 prompt_cache_ratio:0.46518558656190734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 +DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10870122909545898 s +INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11070108413696289 s +DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=279846507558977257047905484755083750700, time:1750767923.9555442s req_ids:[8] +DEBUG 06-24 20:25:23 [manager.py:391] +ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:393.51606369018555ms total_cost_time:393.55993270874023ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11074 prompt_cache_len:5151 prompt_cache_ratio:0.4651435795557161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 +DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s +INFO 06-24 20:25:24 [manager.py:68] detokenization recv req id 8 cost time 0.10991239547729492 s +DEBUG 06-24 20:25:24 [manager.py:391] Prefill Batch: batch_id=119782361379696284897016117227795364534, time:1750767924.354608s req_ids:[8] +DEBUG 06-24 20:25:24 [manager.py:391] +ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:209.7337245941162ms total_cost_time:209.7773551940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11075 prompt_cache_len:5151 prompt_cache_ratio:0.4651015801354402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 +DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.11052966117858887 s +INFO 06-24 20:25:24 [manager.py:68] detokenization recv req id 8 cost time 0.11258244514465332 s +DEBUG 06-24 20:25:24 [manager.py:391] Prefill Batch: batch_id=145639881033555246450459613603699706198, time:1750767924.571384s req_ids:[8] +DEBUG 06-24 20:25:24 [manager.py:391] +ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:209.0606689453125ms total_cost_time:209.0909481048584ms,out_token_counter:1 mean_per_token_cost_time: 0.030279159545898438ms prompt_token_num:11076 prompt_cache_len:5151 prompt_cache_ratio:0.46505958829902494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 +DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.10882115364074707 s +INFO 06-24 20:25:24 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s +DEBUG 06-24 20:25:24 [manager.py:391] Prefill Batch: batch_id=258926410392211647189162317411903107225, time:1750767924.7879057s req_ids:[8] +DEBUG 06-24 20:25:24 [manager.py:391] +ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:211.4708423614502ms total_cost_time:211.51375770568848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11077 prompt_cache_len:5151 prompt_cache_ratio:0.46501760404441633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 +DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.10876178741455078 s +INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11064958572387695 s +DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=125958852354001450414928985445268669239, time:1750767925.0042796s req_ids:[8] +DEBUG 06-24 20:25:25 [manager.py:391] +ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:203.50313186645508ms total_cost_time:203.54723930358887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11078 prompt_cache_len:5151 prompt_cache_ratio:0.4649756273695613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 +DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.10855913162231445 s +INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11045360565185547 s +DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=269678914457357181411670260256868690077, time:1750767925.212702s req_ids:[8] +DEBUG 06-24 20:25:25 [manager.py:391] +ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:206.59422874450684ms total_cost_time:206.63809776306152ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11079 prompt_cache_len:5151 prompt_cache_ratio:0.46493365827240724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 +DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.1090846061706543 s +INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11115384101867676 s +DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=197382431154624714820614900134569532808, time:1750767925.4279888s req_ids:[8] +DEBUG 06-24 20:25:25 [manager.py:391] +ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:209.54656600952148ms total_cost_time:209.5932960510254ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11080 prompt_cache_len:5151 prompt_cache_ratio:0.46489169675090253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 +DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.109130859375 s +INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11117196083068848 s +DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=219254516025300040524167287147177090162, time:1750767925.6429906s req_ids:[8] +DEBUG 06-24 20:25:25 [manager.py:391] +ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:208.50229263305664ms total_cost_time:208.54640007019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11081 prompt_cache_len:5151 prompt_cache_ratio:0.4648497428029961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 +DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.10936880111694336 s +INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11142396926879883 s +DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=309250596194093480125393263703027001964, time:1750767925.858672s req_ids:[8] +DEBUG 06-24 20:25:25 [manager.py:391] +ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:210.2832794189453ms total_cost_time:210.3266716003418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11082 prompt_cache_len:5151 prompt_cache_ratio:0.4648077964266378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 +DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s +INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.11096906661987305 s +DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=207157506587033543506168475684329136912, time:1750767926.0745623s req_ids:[8] +DEBUG 06-24 20:25:26 [manager.py:391] +ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:366.06860160827637ms total_cost_time:366.11294746398926ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11083 prompt_cache_len:5151 prompt_cache_ratio:0.464765857619778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 +DEBUG 06-24 20:25:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10832548141479492 s +INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.11030173301696777 s +DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=237840922273794409516084193594128282556, time:1750767926.4445338s req_ids:[8] +DEBUG 06-24 20:25:26 [manager.py:391] +ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:205.78289031982422ms total_cost_time:205.8253288269043ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11084 prompt_cache_len:5151 prompt_cache_ratio:0.4647239263803681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 +DEBUG 06-24 20:25:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10832929611206055 s +INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.11033987998962402 s +DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=187977930147848187257274476633530567724, time:1750767926.6597478s req_ids:[8] +DEBUG 06-24 20:25:26 [manager.py:391] +DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:210.62374114990234ms total_cost_time:210.66761016845703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11085 prompt_cache_len:5151 prompt_cache_ratio:0.46468200270635995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 +DEBUG 06-24 20:25:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10859179496765137 s +INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.1105659008026123 s +DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=167017124625254406992575970536084199568, time:1750767926.8764267s req_ids:[8] +DEBUG 06-24 20:25:26 [manager.py:391] +ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:210.62707901000977ms total_cost_time:210.67070960998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11086 prompt_cache_len:5151 prompt_cache_ratio:0.46464008659570627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 +DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.10912537574768066 s +INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s +DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=303839045978607697831287277491128577907, time:1750767927.097608s req_ids:[8] +DEBUG 06-24 20:25:27 [manager.py:391] +ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:213.23919296264648ms total_cost_time:213.28401565551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11087 prompt_cache_len:5151 prompt_cache_ratio:0.4645981780463606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 +DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.1087641716003418 s +INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.11069107055664062 s +DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=290093537135643762700485837043146974348, time:1750767927.3118103s req_ids:[8] +DEBUG 06-24 20:25:27 [manager.py:391] +ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 first_token_cost:208.27054977416992ms total_cost_time:208.30583572387695ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:11088 prompt_cache_len:5151 prompt_cache_ratio:0.46455627705627706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 +DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.10898351669311523 s +INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.11097955703735352 s +DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=75745730969974428267303103263423555694, time:1750767927.5279648s req_ids:[8] +DEBUG 06-24 20:25:27 [manager.py:391] +ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 first_token_cost:210.7090950012207ms total_cost_time:210.7548713684082ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11089 prompt_cache_len:5151 prompt_cache_ratio:0.4645143836234106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 +DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.10877704620361328 s +INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106715202331543 s +DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=245168853283421910559697992856874886657, time:1750767927.7417164s req_ids:[8] +DEBUG 06-24 20:25:27 [manager.py:391] +ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 first_token_cost:373.0051517486572ms total_cost_time:373.0490207672119ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11090 prompt_cache_len:5151 prompt_cache_ratio:0.46447249774571686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 +DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10890889167785645 s +INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11089372634887695 s +DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=221447922227528537556424716756044270055, time:1750767928.122231s req_ids:[8] +DEBUG 06-24 20:25:28 [manager.py:391] +ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:208.57000350952148ms total_cost_time:208.61148834228516ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11091 prompt_cache_len:5151 prompt_cache_ratio:0.46443061942115227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 +DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10917544364929199 s +INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11123156547546387 s +DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=245191141696883479329481177620378910005, time:1750767928.3382533s req_ids:[8] +DEBUG 06-24 20:25:28 [manager.py:391] +ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:208.3873748779297ms total_cost_time:208.43100547790527ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11092 prompt_cache_len:5151 prompt_cache_ratio:0.464388748647674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 +DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10801100730895996 s +INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.10988473892211914 s +DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=172588350429751307805867297552769050388, time:1750767928.5529335s req_ids:[8] +DEBUG 06-24 20:25:28 [manager.py:391] +ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:209.20991897583008ms total_cost_time:209.25545692443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11093 prompt_cache_len:5151 prompt_cache_ratio:0.4643468854232399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 +DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.1093606948852539 s +INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11128926277160645 s +DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=317517874249556394587978533129933962262, time:1750767928.7827284s req_ids:[8] +DEBUG 06-24 20:25:28 [manager.py:391] +ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:226.0568141937256ms total_cost_time:226.10116004943848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11094 prompt_cache_len:5151 prompt_cache_ratio:0.46430502974580856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 +DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10859394073486328 s +INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11044883728027344 s +DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=318818292330889325153635039285397752544, time:1750767929.000657s req_ids:[8] +DEBUG 06-24 20:25:29 [manager.py:391] +ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:210.21580696105957ms total_cost_time:210.26062965393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11095 prompt_cache_len:5151 prompt_cache_ratio:0.46426318161333935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 +DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:29 [manager.py:224] router recive req id 8 cost time 0.10895967483520508 s +INFO 06-24 20:25:29 [manager.py:68] detokenization recv req id 8 cost time 0.11095523834228516 s +DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=38161358032602167707836339268522799583, time:1750767929.2165122s req_ids:[8] +DEBUG 06-24 20:25:29 [manager.py:391] +ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:209.42950248718262ms total_cost_time:209.4743251800537ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11096 prompt_cache_len:5151 prompt_cache_ratio:0.46422134102379237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 +DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:29 [manager.py:224] router recive req id 8 cost time 0.10853338241577148 s +INFO 06-24 20:25:29 [manager.py:68] detokenization recv req id 8 cost time 0.11043906211853027 s +DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=124416931320129021169732395325475745920, time:1750767929.4325705s req_ids:[8] +DEBUG 06-24 20:25:29 [manager.py:391] +ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:372.85685539245605ms total_cost_time:372.90072441101074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11097 prompt_cache_len:5151 prompt_cache_ratio:0.4641795079751284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 +DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:29 [manager.py:224] router recive req id 8 cost time 0.10817313194274902 s +INFO 06-24 20:25:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005187034606934 s +DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=51560055976150133814045539790501777640, time:1750767929.8101168s req_ids:[8] +DEBUG 06-24 20:25:29 [manager.py:391] +ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:207.19218254089355ms total_cost_time:207.23438262939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11098 prompt_cache_len:5151 prompt_cache_ratio:0.46413768246530906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 +DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10866975784301758 s +INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11061716079711914 s +DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=116171309960766227373065629902343167505, time:1750767930.0237982s req_ids:[8] +DEBUG 06-24 20:25:30 [manager.py:391] +ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:206.99357986450195ms total_cost_time:207.03721046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11099 prompt_cache_len:5151 prompt_cache_ratio:0.4640958644922966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 +DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10843992233276367 s +INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11040043830871582 s +DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=182196979992663665022198272781392172937, time:1750767930.2389524s req_ids:[8] +DEBUG 06-24 20:25:30 [manager.py:391] +ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:207.71193504333496ms total_cost_time:207.75437355041504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11100 prompt_cache_len:5151 prompt_cache_ratio:0.46405405405405403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 +DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10913443565368652 s +INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110076904296875 s +DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=233354501475540860522152146710280193868, time:1750767930.4527864s req_ids:[8] +DEBUG 06-24 20:25:30 [manager.py:391] +ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:199.16987419128418ms total_cost_time:199.21302795410156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11101 prompt_cache_len:5151 prompt_cache_ratio:0.46401225114854516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 +DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10828685760498047 s +INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11017847061157227 s +DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=179281723532368675573574127619412070963, time:1750767930.6649694s req_ids:[8] +DEBUG 06-24 20:25:30 [manager.py:391] +ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:216.55583381652832ms total_cost_time:216.599702835083ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11102 prompt_cache_len:5151 prompt_cache_ratio:0.46397045577373447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 +DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10846257209777832 s +INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11097407341003418 s +DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=87235901923087418351773157862292902954, time:1750767930.8799567s req_ids:[8] +DEBUG 06-24 20:25:30 [manager.py:391] +ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:209.70749855041504ms total_cost_time:209.7492218017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11103 prompt_cache_len:5151 prompt_cache_ratio:0.46392866792758714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 +DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.20918488502502441 s +INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.21095752716064453 s +DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=226373184080802225085790709708248195854, time:1750767931.2281322s req_ids:[8] +DEBUG 06-24 20:25:31 [manager.py:391] +DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:327.3022174835205ms total_cost_time:327.359676361084ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:11104 prompt_cache_len:5151 prompt_cache_ratio:0.4638868876080692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 +DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.11044073104858398 s +INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.11236310005187988 s +DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=15633322016495255087389502373661203996, time:1750767931.4371145s req_ids:[8] +DEBUG 06-24 20:25:31 [manager.py:391] +ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:210.68120002746582ms total_cost_time:210.723876953125ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11105 prompt_cache_len:5151 prompt_cache_ratio:0.4638451148131472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 +DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s +INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997796058654785 s +DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=295886789954009101897328696003224553245, time:1750767931.6477757s req_ids:[8] +DEBUG 06-24 20:25:31 [manager.py:391] +ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:204.65731620788574ms total_cost_time:204.69951629638672ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11106 prompt_cache_len:5151 prompt_cache_ratio:0.4638033495407888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 +DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.10925722122192383 s +INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.11120438575744629 s +DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=107915076213899794467901080151949180408, time:1750767931.8589225s req_ids:[8] +DEBUG 06-24 20:25:31 [manager.py:391] +ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:209.2154026031494ms total_cost_time:209.2571258544922ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11107 prompt_cache_len:5151 prompt_cache_ratio:0.4637615917889619 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 +DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.10756874084472656 s +INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.10936951637268066 s +DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=305710140614759888826700020649970614928, time:1750767932.0727541s req_ids:[8] +DEBUG 06-24 20:25:32 [manager.py:391] +ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:166.85914993286133ms total_cost_time:166.9018268585205ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11108 prompt_cache_len:5151 prompt_cache_ratio:0.46371984155563556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 +DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.10837244987487793 s +INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.11022830009460449 s +DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=212736682107379962593283161525997070800, time:1750767932.2486763s req_ids:[8] +DEBUG 06-24 20:25:32 [manager.py:391] +ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:168.196439743042ms total_cost_time:168.23697090148926ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:11109 prompt_cache_len:5151 prompt_cache_ratio:0.46367809883877936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 +DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.10849571228027344 s +INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.11035013198852539 s +DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=195347392297703852837044612140960351594, time:1750767932.4185035s req_ids:[8] +DEBUG 06-24 20:25:32 [manager.py:391] +ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:195.25671005249023ms total_cost_time:195.2989101409912ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11110 prompt_cache_len:5151 prompt_cache_ratio:0.4636363636363636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 +DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.1103060245513916 s +INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.11270642280578613 s +DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=84596657245073357369268745342761533559, time:1750767932.6204383s req_ids:[8] +DEBUG 06-24 20:25:32 [manager.py:391] +ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:203.81522178649902ms total_cost_time:203.8578987121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11111 prompt_cache_len:5151 prompt_cache_ratio:0.46359463594635947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 +DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.20949673652648926 s +INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.2112898826599121 s +DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=128503035966156720912968903011065700237, time:1750767932.9635081s req_ids:[8] +DEBUG 06-24 20:25:32 [manager.py:391] +ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:324.446439743042ms total_cost_time:324.4905471801758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11112 prompt_cache_len:5151 prompt_cache_ratio:0.4635529157667387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 +DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10955357551574707 s +INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.11199784278869629 s +DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=145404330724008354895274998548086240101, time:1750767933.1598861s req_ids:[8] +DEBUG 06-24 20:25:33 [manager.py:391] +ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:206.15458488464355ms total_cost_time:206.20012283325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11113 prompt_cache_len:5151 prompt_cache_ratio:0.4635112030954738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 +DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10813426971435547 s +INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.1100316047668457 s +DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=9144302920476584488922873767895384919, time:1750767933.3737514s req_ids:[8] +DEBUG 06-24 20:25:33 [manager.py:391] +DEBUG 06-24 20:25:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 47413.887 tokens/s +DEBUG 06-24 20:25:33 [stats.py:37] Avg prompt tokens throughput: 47405.340 tokens/s +DEBUG 06-24 20:25:33 [stats.py:37] Avg generate tokens throughput: 8.547 tokens/s +ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:208.00018310546875ms total_cost_time:208.04500579833984ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11114 prompt_cache_len:5151 prompt_cache_ratio:0.46346949793053804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 +DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10966324806213379 s +INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.1116340160369873 s +DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=95782170374879522558022526129305773215, time:1750767933.5885692s req_ids:[8] +DEBUG 06-24 20:25:33 [manager.py:391] +ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:206.02774620056152ms total_cost_time:206.07280731201172ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11115 prompt_cache_len:5151 prompt_cache_ratio:0.46342780026990554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 +DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10850310325622559 s +INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.11043500900268555 s +DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=43193749227152974251874695833952128618, time:1750767933.801056s req_ids:[8] +DEBUG 06-24 20:25:33 [manager.py:391] +ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:203.42469215393066ms total_cost_time:203.46760749816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11116 prompt_cache_len:5151 prompt_cache_ratio:0.4633861101115509 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 +DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10806870460510254 s +INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.10992574691772461 s +DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=115772606167034794302507912061604900434, time:1750767934.0105765s req_ids:[8] +DEBUG 06-24 20:25:34 [manager.py:391] +ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:207.65924453735352ms total_cost_time:207.70716667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:11117 prompt_cache_len:5151 prompt_cache_ratio:0.46334442745344967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 +DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10789012908935547 s +INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.10988759994506836 s +DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=248177335806546126270314759246855419441, time:1750767934.2265623s req_ids:[8] +DEBUG 06-24 20:25:34 [manager.py:391] +ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:210.77990531921387ms total_cost_time:210.82448959350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11118 prompt_cache_len:5151 prompt_cache_ratio:0.463302752293578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 +DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10895705223083496 s +INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.11095118522644043 s +DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=252095948888419129872148335923691411078, time:1750767934.4432082s req_ids:[8] +DEBUG 06-24 20:25:34 [manager.py:391] +ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:367.7494525909424ms total_cost_time:367.7937984466553ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11119 prompt_cache_len:5151 prompt_cache_ratio:0.4632610846299128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 +DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10952091217041016 s +INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.11141562461853027 s +DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=32435762298879189182419070200553278517, time:1750767934.8157332s req_ids:[8] +DEBUG 06-24 20:25:34 [manager.py:391] +ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:202.52084732055664ms total_cost_time:202.56447792053223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11120 prompt_cache_len:5151 prompt_cache_ratio:0.46321942446043163 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 +DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10845088958740234 s +INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.11047840118408203 s +DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=127026865755350757135458866933897548759, time:1750767935.0268588s req_ids:[8] +DEBUG 06-24 20:25:35 [manager.py:391] +ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:206.0871124267578ms total_cost_time:206.129789352417ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11121 prompt_cache_len:5151 prompt_cache_ratio:0.46317777178311303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 +DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10774755477905273 s +INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.10977792739868164 s +DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=145121599873230649645292789453545336048, time:1750767935.238132s req_ids:[8] +DEBUG 06-24 20:25:35 [manager.py:391] +ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:211.17615699768066ms total_cost_time:211.21954917907715ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11122 prompt_cache_len:5151 prompt_cache_ratio:0.463136126595936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 +DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10825848579406738 s +INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.11020827293395996 s +DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=130288643086958652924169537159688768963, time:1750767935.458125s req_ids:[8] +DEBUG 06-24 20:25:35 [manager.py:391] +DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:221.4512825012207ms total_cost_time:221.4961051940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11123 prompt_cache_len:5151 prompt_cache_ratio:0.4630944888968803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 +DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10793161392211914 s +INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.1099998950958252 s +DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=31164107278039846269554156588571841989, time:1750767935.6903238s req_ids:[8] +DEBUG 06-24 20:25:35 [manager.py:391] +ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:218.73784065246582ms total_cost_time:218.7809944152832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11124 prompt_cache_len:5151 prompt_cache_ratio:0.46305285868392665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 +DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10925626754760742 s +INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.11129117012023926 s +DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=12673321060624182540699073370241257001, time:1750767935.9079998s req_ids:[8] +DEBUG 06-24 20:25:35 [manager.py:391] +ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:209.73801612854004ms total_cost_time:209.78236198425293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11125 prompt_cache_len:5151 prompt_cache_ratio:0.46301123595505617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 +DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.31128692626953125 s +INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.3132343292236328 s +DEBUG 06-24 20:25:36 [manager.py:391] Prefill Batch: batch_id=308635690007744316379971799515130099524, time:1750767936.3317842s req_ids:[8] +DEBUG 06-24 20:25:36 [manager.py:391] +ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:422.9094982147217ms total_cost_time:422.95360565185547ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11126 prompt_cache_len:5151 prompt_cache_ratio:0.46296962070825093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 +DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.1078193187713623 s +INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.10979127883911133 s +DEBUG 06-24 20:25:36 [manager.py:391] Prefill Batch: batch_id=120979393721561685099125683157420093553, time:1750767936.5567918s req_ids:[8] +DEBUG 06-24 20:25:36 [manager.py:391] +ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:207.17358589172363ms total_cost_time:207.2160243988037ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11127 prompt_cache_len:5151 prompt_cache_ratio:0.46292801294149366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 +DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.10902285575866699 s +INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.11092972755432129 s +DEBUG 06-24 20:25:36 [manager.py:391] Prefill Batch: batch_id=221382809260110510572216169854992769527, time:1750767936.7786467s req_ids:[8] +DEBUG 06-24 20:25:36 [manager.py:391] +ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:223.93465042114258ms total_cost_time:223.97971153259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11128 prompt_cache_len:5151 prompt_cache_ratio:0.4628864126527678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 +DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.10931897163391113 s +INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.11124277114868164 s +DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=86994068965633756317730874767842180970, time:1750767937.000422s req_ids:[8] +DEBUG 06-24 20:25:37 [manager.py:391] +DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:207.48567581176758ms total_cost_time:207.53026008605957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11129 prompt_cache_len:5151 prompt_cache_ratio:0.4628448198400575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 +DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:37 [manager.py:224] router recive req id 8 cost time 0.10770726203918457 s +INFO 06-24 20:25:37 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s +DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=196797045874225654180459128228331497825, time:1750767937.2104974s req_ids:[8] +DEBUG 06-24 20:25:37 [manager.py:391] +ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:202.3320198059082ms total_cost_time:202.3768424987793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11130 prompt_cache_len:5151 prompt_cache_ratio:0.4628032345013477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 +DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:37 [manager.py:224] router recive req id 8 cost time 0.10769844055175781 s +INFO 06-24 20:25:37 [manager.py:68] detokenization recv req id 8 cost time 0.10954999923706055 s +DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=339690355215540402093425954345440820495, time:1750767937.4222012s req_ids:[8] +DEBUG 06-24 20:25:37 [manager.py:391] +ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:210.9365463256836ms total_cost_time:210.9813690185547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11131 prompt_cache_len:5151 prompt_cache_ratio:0.46276165663462404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 +DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:37 [batch.py:51] router release req id 8 +INFO 06-24 20:25:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:37 [manager.py:224] router recive req id 8 cost time 0.10801529884338379 s +INFO 06-24 20:25:37 [manager.py:68] detokenization recv req id 8 cost time 0.10974574089050293 s +DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=17244442465584114171153167631917277710, time:1750767937.6389496s req_ids:[8] +DEBUG 06-24 20:25:37 [manager.py:391] +ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:379.67681884765625ms total_cost_time:379.72092628479004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11132 prompt_cache_len:5151 prompt_cache_ratio:0.4627200862378728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 +DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10887956619262695 s +INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11067485809326172 s +DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=282248845161600412671559526758315249379, time:1750767938.0227494s req_ids:[8] +DEBUG 06-24 20:25:38 [manager.py:391] +ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:204.6351432800293ms total_cost_time:204.67901229858398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11133 prompt_cache_len:5151 prompt_cache_ratio:0.4626785233090811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 +DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10800862312316895 s +INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.10982298851013184 s +DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=271121871529044472867643759891134182395, time:1750767938.2393641s req_ids:[8] +DEBUG 06-24 20:25:38 [manager.py:391] +ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:216.8567180633545ms total_cost_time:216.9020175933838ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11134 prompt_cache_len:5151 prompt_cache_ratio:0.46263696784623676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 +DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10843729972839355 s +INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11018848419189453 s +DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=124942762680647127012528418742735616080, time:1750767938.4636104s req_ids:[8] +DEBUG 06-24 20:25:38 [manager.py:391] +ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:219.4969654083252ms total_cost_time:219.54083442687988ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11135 prompt_cache_len:5151 prompt_cache_ratio:0.46259541984732827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 +DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10860395431518555 s +INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11023378372192383 s +DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=121423580737532697336283195293369227221, time:1750767938.6822505s req_ids:[8] +DEBUG 06-24 20:25:38 [manager.py:391] +ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:211.6711139678955ms total_cost_time:211.7166519165039ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11136 prompt_cache_len:5151 prompt_cache_ratio:0.4625538793103448 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 +DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10858154296875 s +INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11037635803222656 s +DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=3125788643256561957500492272091304599, time:1750767938.9003363s req_ids:[8] +DEBUG 06-24 20:25:38 [manager.py:391] +ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:210.91604232788086ms total_cost_time:210.95991134643555ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11137 prompt_cache_len:5151 prompt_cache_ratio:0.4625123462332765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 +DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10831594467163086 s +INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.11016631126403809 s +DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=61874171904735315196740611368707132493, time:1750767939.1180692s req_ids:[8] +DEBUG 06-24 20:25:39 [manager.py:391] +ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:209.34247970581055ms total_cost_time:209.38491821289062ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11138 prompt_cache_len:5151 prompt_cache_ratio:0.4624708206141138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 +DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10866570472717285 s +INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.1104438304901123 s +DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=186881242741176309753745312091076405172, time:1750767939.3299415s req_ids:[8] +DEBUG 06-24 20:25:39 [manager.py:391] +ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 first_token_cost:205.02614974975586ms total_cost_time:205.06954193115234ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11139 prompt_cache_len:5151 prompt_cache_ratio:0.46242930245084835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 +DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10898280143737793 s +INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.11075067520141602 s +DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=262671635216697498895206479420789481210, time:1750767939.5406687s req_ids:[8] +DEBUG 06-24 20:25:39 [manager.py:391] +ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 first_token_cost:200.9906768798828ms total_cost_time:201.0347843170166ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11140 prompt_cache_len:5151 prompt_cache_ratio:0.46238779174147215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 +DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10828852653503418 s +INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.10997629165649414 s +DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=188044664937380550806604728511659837786, time:1750767939.7468073s req_ids:[8] +DEBUG 06-24 20:25:39 [manager.py:391] +ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 first_token_cost:362.0104789733887ms total_cost_time:362.05577850341797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11141 prompt_cache_len:5151 prompt_cache_ratio:0.4623462884839781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 +DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10895848274230957 s +INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11063981056213379 s +DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=337952715152166917315863431125231126135, time:1750767940.1153479s req_ids:[8] +DEBUG 06-24 20:25:40 [manager.py:391] +ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:205.6131362915039ms total_cost_time:205.65509796142578ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11142 prompt_cache_len:5151 prompt_cache_ratio:0.4623047926763597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 +DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10972762107849121 s +INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11156535148620605 s +DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=259900849962681352694072210396452252238, time:1750767940.3273528s req_ids:[8] +DEBUG 06-24 20:25:40 [manager.py:391] +ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:206.73680305480957ms total_cost_time:206.77947998046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11143 prompt_cache_len:5151 prompt_cache_ratio:0.46226330431661133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 +DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10932683944702148 s +INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11102080345153809 s +DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=25046574423748518068777328530932896507, time:1750767940.540604s req_ids:[8] +DEBUG 06-24 20:25:40 [manager.py:391] +ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:206.0708999633789ms total_cost_time:206.11333847045898ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11144 prompt_cache_len:5151 prompt_cache_ratio:0.46222182340272794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 +DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.1078801155090332 s +INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.10959053039550781 s +DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=160821965760916896419444989153616375382, time:1750767940.75258s req_ids:[8] +DEBUG 06-24 20:25:40 [manager.py:391] +ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:202.77714729309082ms total_cost_time:202.83055305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:11145 prompt_cache_len:5151 prompt_cache_ratio:0.46218034993270524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 +DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10945606231689453 s +INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11123228073120117 s +DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=259626571560270840883194891904077338875, time:1750767940.9622366s req_ids:[8] +DEBUG 06-24 20:25:40 [manager.py:391] +ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:215.3785228729248ms total_cost_time:215.42119979858398ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11146 prompt_cache_len:5151 prompt_cache_ratio:0.4621388839045397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 +DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:41 [batch.py:51] router release req id 8 +INFO 06-24 20:25:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10793566703796387 s +INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.10964155197143555 s +DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=96998974849376860085037391574407460002, time:1750767941.183218s req_ids:[8] +DEBUG 06-24 20:25:41 [manager.py:391] +ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:206.24303817749023ms total_cost_time:206.28762245178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11147 prompt_cache_len:5151 prompt_cache_ratio:0.4620974253162286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 +DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s +INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.11078333854675293 s +DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=3653920264216194599469784658078446045, time:1750767941.397143s req_ids:[8] +DEBUG 06-24 20:25:41 [manager.py:391] +ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:368.4954643249512ms total_cost_time:368.54004859924316ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11148 prompt_cache_len:5151 prompt_cache_ratio:0.46205597416576966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 +DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10850882530212402 s +INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.11030316352844238 s +DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=36642264767058044978014565974041359019, time:1750767941.7707489s req_ids:[8] +DEBUG 06-24 20:25:41 [manager.py:391] +ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:204.90264892578125ms total_cost_time:204.94604110717773ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11149 prompt_cache_len:5151 prompt_cache_ratio:0.46201453045116153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 +DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10806059837341309 s +INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.10985803604125977 s +DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=118839201808375947487591079649151722695, time:1750767941.9840114s req_ids:[8] +DEBUG 06-24 20:25:41 [manager.py:391] +ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:181.2129020690918ms total_cost_time:181.2584400177002ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11150 prompt_cache_len:5151 prompt_cache_ratio:0.4619730941704036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 +DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10907149314880371 s +INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s +DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=107828279208627329649730299709555950553, time:1750767942.1694126s req_ids:[8] +DEBUG 06-24 20:25:42 [manager.py:391] +ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:201.4937400817871ms total_cost_time:201.53498649597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:11151 prompt_cache_len:5151 prompt_cache_ratio:0.4619316653214958 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 +DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10853290557861328 s +INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.11046028137207031 s +DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=54046315724757524841583394225002929322, time:1750767942.3775918s req_ids:[8] +DEBUG 06-24 20:25:42 [manager.py:391] +ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:207.2126865386963ms total_cost_time:207.25560188293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11152 prompt_cache_len:5151 prompt_cache_ratio:0.46189024390243905 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 +DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s +INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.11025691032409668 s +DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=189812344605512561412568087932388314541, time:1750767942.5932176s req_ids:[8] +DEBUG 06-24 20:25:42 [manager.py:391] +ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:203.19390296936035ms total_cost_time:203.23657989501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11153 prompt_cache_len:5151 prompt_cache_ratio:0.46184882991123466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 +DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10958409309387207 s +INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.11149787902832031 s +DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=172551300430105657011214463668115915341, time:1750767942.801311s req_ids:[8] +DEBUG 06-24 20:25:42 [manager.py:391] +DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:207.10301399230957ms total_cost_time:207.14759826660156ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11154 prompt_cache_len:5151 prompt_cache_ratio:0.46180742334588487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 +DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10775566101074219 s +INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.10972476005554199 s +DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=139648324707433889983516894084049022510, time:1750767943.015711s req_ids:[8] +DEBUG 06-24 20:25:43 [manager.py:391] +ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:377.8820037841797ms total_cost_time:377.9277801513672ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11155 prompt_cache_len:5151 prompt_cache_ratio:0.46176602420439267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 +DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10924839973449707 s +INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.1111762523651123 s +DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=5203874965181619320558996852881165799, time:1750767943.3993924s req_ids:[8] +DEBUG 06-24 20:25:43 [manager.py:391] +DEBUG 06-24 20:25:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 46658.702 tokens/s +DEBUG 06-24 20:25:43 [stats.py:37] Avg prompt tokens throughput: 46650.324 tokens/s +DEBUG 06-24 20:25:43 [stats.py:37] Avg generate tokens throughput: 8.379 tokens/s +ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:208.48512649536133ms total_cost_time:208.5280418395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11156 prompt_cache_len:5151 prompt_cache_ratio:0.4617246324847616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 +DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10820555686950684 s +INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.11027288436889648 s +DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=49961118885089864852284929686999496263, time:1750767943.6125195s req_ids:[8] +DEBUG 06-24 20:25:43 [manager.py:391] +ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:207.21817016601562ms total_cost_time:207.2598934173584ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11157 prompt_cache_len:5151 prompt_cache_ratio:0.46168324818499595 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 +DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10920524597167969 s +INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.11132979393005371 s +DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=329996763042401809050833955923634684589, time:1750767943.8286061s req_ids:[8] +DEBUG 06-24 20:25:43 [manager.py:391] +ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:208.87184143066406ms total_cost_time:208.91571044921875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11158 prompt_cache_len:5151 prompt_cache_ratio:0.4616418713031009 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 +DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s +INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.1098475456237793 s +DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=244804736377187328062874283071988572325, time:1750767944.0455852s req_ids:[8] +DEBUG 06-24 20:25:44 [manager.py:391] +ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:210.89863777160645ms total_cost_time:210.94250679016113ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11159 prompt_cache_len:5151 prompt_cache_ratio:0.4616005018370822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 +DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.10902547836303711 s +INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.11074209213256836 s +DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=330148272395400578762552907403444886990, time:1750767944.270929s req_ids:[8] +DEBUG 06-24 20:25:44 [manager.py:391] +ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:221.82679176330566ms total_cost_time:221.86923027038574ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11160 prompt_cache_len:5151 prompt_cache_ratio:0.4615591397849462 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 +DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:44 [batch.py:51] router release req id 8 +DEBUG 06-24 20:25:44 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:44 [manager.py:283] +DEBUG 06-24 20:25:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:44 [manager.py:284] +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.10596561431884766 s +INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.10811305046081543 s +DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=57409527709465018589801429172703123517, time:1750767944.490232s req_ids:[8] +DEBUG 06-24 20:25:44 [manager.py:391] +ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:212.0990753173828ms total_cost_time:212.1427059173584ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11161 prompt_cache_len:5151 prompt_cache_ratio:0.4615177851447003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 +DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.20920705795288086 s +INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.21117901802062988 s +DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=217664819202473743652153413774704696039, time:1750767944.840448s req_ids:[8] +DEBUG 06-24 20:25:44 [manager.py:391] +ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:323.2393264770508ms total_cost_time:323.2836723327637ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11162 prompt_cache_len:5151 prompt_cache_ratio:0.4614764379143523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 +DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10878229141235352 s +INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.11085176467895508 s +DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=46445724330574002254492969347188547297, time:1750767945.0343122s req_ids:[8] +DEBUG 06-24 20:25:45 [manager.py:391] +ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:210.28566360473633ms total_cost_time:210.32953262329102ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11163 prompt_cache_len:5151 prompt_cache_ratio:0.4614350980919108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 +DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10823440551757812 s +INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.11016654968261719 s +DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=149074844862896461652644093698713273875, time:1750767945.2513912s req_ids:[8] +DEBUG 06-24 20:25:45 [manager.py:391] +ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:207.85021781921387ms total_cost_time:207.89337158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11164 prompt_cache_len:5151 prompt_cache_ratio:0.46139376567538515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 +DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10852193832397461 s +INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.11024594306945801 s +DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=119763768197880086668122498636452411216, time:1750767945.4649055s req_ids:[8] +DEBUG 06-24 20:25:45 [manager.py:391] +ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:218.07098388671875ms total_cost_time:218.11485290527344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11165 prompt_cache_len:5151 prompt_cache_ratio:0.46135244066278547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 +DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s +INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.1102900505065918 s +DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=84079207668812155783665912506435679124, time:1750767945.6950543s req_ids:[8] +DEBUG 06-24 20:25:45 [manager.py:391] +ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:213.6697769165039ms total_cost_time:213.71173858642578ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11166 prompt_cache_len:5151 prompt_cache_ratio:0.4613111230521225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 +DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10805106163024902 s +INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.10978078842163086 s +DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=165883030364847782361472892921901271213, time:1750767945.910225s req_ids:[8] +DEBUG 06-24 20:25:45 [manager.py:391] +ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:169.32439804077148ms total_cost_time:169.36659812927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11167 prompt_cache_len:5151 prompt_cache_ratio:0.4612698128414077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 +DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.10834860801696777 s +INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.1100151538848877 s +DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=54132622284027162537311498691708173450, time:1750767946.0890448s req_ids:[8] +DEBUG 06-24 20:25:46 [manager.py:391] +ERROR 06-24 20:25:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:199.88489151000977ms total_cost_time:199.93138313293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11168 prompt_cache_len:5151 prompt_cache_ratio:0.4612285100286533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 +DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.1088714599609375 s +INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.11089110374450684 s +DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=24537302393121698266634140219236330365, time:1750767946.29093s req_ids:[8] +DEBUG 06-24 20:25:46 [manager.py:391] +ERROR 06-24 20:25:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 first_token_cost:201.5364170074463ms total_cost_time:201.57909393310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11169 prompt_cache_len:5151 prompt_cache_ratio:0.4611872146118721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 +DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.3106367588043213 s +INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.3124217987060547 s +DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=123982638266107499824677076915041570758, time:1750767946.7068677s req_ids:[8] +DEBUG 06-24 20:25:46 [manager.py:391] +ERROR 06-24 20:25:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 first_token_cost:422.3780632019043ms total_cost_time:422.4209785461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11170 prompt_cache_len:5151 prompt_cache_ratio:0.4611459265890779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 +DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.10792160034179688 s +INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.10965633392333984 s +DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=106452595989471815785371611743839577572, time:1750767946.9303849s req_ids:[8] +DEBUG 06-24 20:25:46 [manager.py:391] +INFO 06-24 20:25:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 first_token_cost:209.23089981079102ms total_cost_time:209.275484085083ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11171 prompt_cache_len:5151 prompt_cache_ratio:0.4611046459582848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 +DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.1084897518157959 s +INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049652099609375 s +DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=114685928309222512310488314422587807485, time:1750767947.1451664s req_ids:[8] +DEBUG 06-24 20:25:47 [manager.py:391] +ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:209.01155471801758ms total_cost_time:209.05637741088867ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11172 prompt_cache_len:5151 prompt_cache_ratio:0.4610633727175081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 +DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.10873126983642578 s +INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049079895019531 s +DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=72316770941450827702610844038710298986, time:1750767947.36655s req_ids:[8] +DEBUG 06-24 20:25:47 [manager.py:391] +ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:214.21480178833008ms total_cost_time:214.25747871398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11173 prompt_cache_len:5151 prompt_cache_ratio:0.46102210686476325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 +DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.10784077644348145 s +INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.10956072807312012 s +DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=87817464921592979368196152741620303663, time:1750767947.581398s req_ids:[8] +DEBUG 06-24 20:25:47 [manager.py:391] +ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:205.74522018432617ms total_cost_time:205.78789710998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11174 prompt_cache_len:5151 prompt_cache_ratio:0.46098084839806697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 +DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.10854935646057129 s +INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.11048412322998047 s +DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=148082750295088411465971644469628405429, time:1750767947.8032556s req_ids:[8] +DEBUG 06-24 20:25:47 [manager.py:391] +ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:220.78561782836914ms total_cost_time:220.82948684692383ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11175 prompt_cache_len:5151 prompt_cache_ratio:0.46093959731543627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 +DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10920262336730957 s +INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.11111927032470703 s +DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=128340550515642918757719751375923611887, time:1750767948.021261s req_ids:[8] +DEBUG 06-24 20:25:48 [manager.py:391] +ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:209.98811721801758ms total_cost_time:210.03031730651855ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11176 prompt_cache_len:5151 prompt_cache_ratio:0.46089835361488907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 +DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10781693458557129 s +INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.10961723327636719 s +DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=193775786252250670810907511983021223711, time:1750767948.235575s req_ids:[8] +DEBUG 06-24 20:25:48 [manager.py:391] +ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:365.0550842285156ms total_cost_time:365.100622177124ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11177 prompt_cache_len:5151 prompt_cache_ratio:0.46085711729444395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 +DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10844087600708008 s +INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.11048269271850586 s +DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=319276296615278593772124852534154969861, time:1750767948.6057298s req_ids:[8] +DEBUG 06-24 20:25:48 [manager.py:391] +ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:198.70710372924805ms total_cost_time:198.75144958496094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11178 prompt_cache_len:5151 prompt_cache_ratio:0.46081588835212023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 +DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10766768455505371 s +INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.10952353477478027 s +DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=147860356139436930635487677766282570104, time:1750767948.814506s req_ids:[8] +DEBUG 06-24 20:25:48 [manager.py:391] +DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:201.08962059020996ms total_cost_time:201.13468170166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11179 prompt_cache_len:5151 prompt_cache_ratio:0.4607746667859379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 +DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10838532447814941 s +INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11033797264099121 s +DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=189293136119145321284639818961578370738, time:1750767949.019157s req_ids:[8] +DEBUG 06-24 20:25:49 [manager.py:391] +ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:206.23135566711426ms total_cost_time:206.27641677856445ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11180 prompt_cache_len:5151 prompt_cache_ratio:0.46073345259391774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 +DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10854458808898926 s +INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11053681373596191 s +DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=160956794957745848934778320432889249197, time:1750767949.2341528s req_ids:[8] +DEBUG 06-24 20:25:49 [manager.py:391] +ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:206.5746784210205ms total_cost_time:206.6178321838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11181 prompt_cache_len:5151 prompt_cache_ratio:0.460692245774081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 +DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10897278785705566 s +INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11072397232055664 s +DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=227258375166824062241581769556234726236, time:1750767949.4453437s req_ids:[8] +DEBUG 06-24 20:25:49 [manager.py:391] +ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:207.41772651672363ms total_cost_time:207.4596881866455ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11182 prompt_cache_len:5151 prompt_cache_ratio:0.46065104632445003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 +DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10860466957092285 s +INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11041808128356934 s +DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=311479246608194945273891753388793926207, time:1750767949.6600506s req_ids:[8] +DEBUG 06-24 20:25:49 [manager.py:391] +ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:208.97388458251953ms total_cost_time:209.01799201965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11183 prompt_cache_len:5151 prompt_cache_ratio:0.46060985424304746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 +DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.3114607334136963 s +INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.31347131729125977 s +DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=280762290074625127239324814370163256611, time:1750767950.075504s req_ids:[8] +DEBUG 06-24 20:25:50 [manager.py:391] +ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:416.165828704834ms total_cost_time:416.2101745605469ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11184 prompt_cache_len:5151 prompt_cache_ratio:0.460568669527897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 +DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.10894536972045898 s +INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.11070537567138672 s +DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=188519534426275655655265502203566423407, time:1750767950.298984s req_ids:[8] +DEBUG 06-24 20:25:50 [manager.py:391] +DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:212.0816707611084ms total_cost_time:212.1264934539795ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11185 prompt_cache_len:5151 prompt_cache_ratio:0.4605274921770228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 +DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.11035990715026855 s +INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.1122446060180664 s +DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=64999397793335607774646753802047469436, time:1750767950.5160794s req_ids:[8] +DEBUG 06-24 20:25:50 [manager.py:391] +ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:210.99400520324707ms total_cost_time:211.03954315185547ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11186 prompt_cache_len:5151 prompt_cache_ratio:0.46048632218844987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 +DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.10893917083740234 s +INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.11069536209106445 s +DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=49968410958006549512276453064409451426, time:1750767950.7346478s req_ids:[8] +DEBUG 06-24 20:25:50 [manager.py:391] +ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:208.99343490600586ms total_cost_time:209.03968811035156ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11187 prompt_cache_len:5151 prompt_cache_ratio:0.4604451595602038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 +DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.10945820808410645 s +INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.11144113540649414 s +DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=265119435943804343477315773219832739768, time:1750767950.9482555s req_ids:[8] +DEBUG 06-24 20:25:50 [manager.py:391] +ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:209.77091789245605ms total_cost_time:209.81478691101074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11188 prompt_cache_len:5151 prompt_cache_ratio:0.46040400429031103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 +DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.10801911354064941 s +INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.10978555679321289 s +DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=74034475414137444883916761557159536649, time:1750767951.1714928s req_ids:[8] +DEBUG 06-24 20:25:51 [manager.py:391] +ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:218.77312660217285ms total_cost_time:218.82987022399902ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:11189 prompt_cache_len:5151 prompt_cache_ratio:0.46036285637679863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 +DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.11174511909484863 s +INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.11390519142150879 s +DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=144891372487941409104192668260801218380, time:1750767951.38967s req_ids:[8] +DEBUG 06-24 20:25:51 [manager.py:391] +ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:372.99084663391113ms total_cost_time:373.035192489624ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11190 prompt_cache_len:5151 prompt_cache_ratio:0.4603217158176944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 +DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.10808205604553223 s +INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.10991144180297852 s +DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=338841170270227160347942460682715518200, time:1750767951.7677221s req_ids:[8] +DEBUG 06-24 20:25:51 [manager.py:391] +ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:211.62080764770508ms total_cost_time:211.66563034057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11191 prompt_cache_len:5151 prompt_cache_ratio:0.4602805826110267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 +DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.10843706130981445 s +INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.11047792434692383 s +DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=149352152380683973233077147035485192505, time:1750767951.9864156s req_ids:[8] +DEBUG 06-24 20:25:51 [manager.py:391] +ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:207.29517936706543ms total_cost_time:207.3378562927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11192 prompt_cache_len:5151 prompt_cache_ratio:0.46023945675482486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 +DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s +INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11047196388244629 s +DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=73488640495548751367252325088386536898, time:1750767952.2127275s req_ids:[8] +DEBUG 06-24 20:25:52 [manager.py:391] +ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:221.29225730895996ms total_cost_time:221.34947776794434ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:11193 prompt_cache_len:5151 prompt_cache_ratio:0.46019833824711875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 +DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10980939865112305 s +INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11184406280517578 s +DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=130893527658173060171342307907102459381, time:1750767952.4365778s req_ids:[8] +DEBUG 06-24 20:25:52 [manager.py:391] +ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:215.70634841918945ms total_cost_time:215.74854850769043ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11194 prompt_cache_len:5151 prompt_cache_ratio:0.46015722708593887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 +DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10840082168579102 s +INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11023139953613281 s +DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=170637831079713259155186482168321421346, time:1750767952.6523867s req_ids:[8] +DEBUG 06-24 20:25:52 [manager.py:391] +ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:211.87734603881836ms total_cost_time:211.93528175354004ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:11195 prompt_cache_len:5151 prompt_cache_ratio:0.46011612326931667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 +DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10932016372680664 s +INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s +DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=80690822138922442665920115762477770843, time:1750767952.8757017s req_ids:[8] +DEBUG 06-24 20:25:52 [manager.py:391] +ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:215.50393104553223ms total_cost_time:215.5461311340332ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11196 prompt_cache_len:5151 prompt_cache_ratio:0.460075026795284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 +DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10933327674865723 s +INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.11133050918579102 s +DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=85829892216819711307675662536282017392, time:1750767953.0908775s req_ids:[8] +DEBUG 06-24 20:25:53 [manager.py:391] +ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:205.80267906188965ms total_cost_time:205.85393905639648ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:11197 prompt_cache_len:5151 prompt_cache_ratio:0.4600339376618737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 +INFO 06-24 20:25:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:25:53 [statics_utils.py:24] mean first cost: 228.53138703912163 ms +INFO 06-24 20:25:53 [statics_utils.py:24] mean per token cost: 0.06495310405514507 ms +DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10874748229980469 s +INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.1105809211730957 s +DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=263731771409684962267467292344641673315, time:1750767953.306762s req_ids:[8] +DEBUG 06-24 20:25:53 [manager.py:391] +ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:212.3258113861084ms total_cost_time:212.3711109161377ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11198 prompt_cache_len:5151 prompt_cache_ratio:0.45999285586711913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 +DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10870122909545898 s +INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.11052417755126953 s +DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=133018165551858372697049718840142825231, time:1750767953.5218735s req_ids:[8] +DEBUG 06-24 20:25:53 [manager.py:391] +DEBUG 06-24 20:25:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 47492.335 tokens/s +DEBUG 06-24 20:25:53 [stats.py:37] Avg prompt tokens throughput: 47483.839 tokens/s +DEBUG 06-24 20:25:53 [stats.py:37] Avg generate tokens throughput: 8.496 tokens/s +ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:366.7020797729492ms total_cost_time:366.7478561401367ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11199 prompt_cache_len:5151 prompt_cache_ratio:0.4599517814090544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 +DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10956907272338867 s +INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.11136507987976074 s +DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=80880279672633625537068992083431662177, time:1750767953.895029s req_ids:[8] +DEBUG 06-24 20:25:53 [manager.py:391] +ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:206.09712600708008ms total_cost_time:206.14075660705566ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11200 prompt_cache_len:5151 prompt_cache_ratio:0.4599107142857143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 +DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10913801193237305 s +INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11109447479248047 s +DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=317843542865908022763827005595154390778, time:1750767954.1077976s req_ids:[8] +DEBUG 06-24 20:25:54 [manager.py:391] +ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:208.16850662231445ms total_cost_time:208.21285247802734ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11201 prompt_cache_len:5151 prompt_cache_ratio:0.45986965449513434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 +DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10917043685913086 s +INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11093807220458984 s +DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=286875971114440177236939576407735518126, time:1750767954.323387s req_ids:[8] +DEBUG 06-24 20:25:54 [manager.py:391] +ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:210.73579788208008ms total_cost_time:210.78038215637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11202 prompt_cache_len:5151 prompt_cache_ratio:0.4598286020353508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 +DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10886478424072266 s +INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s +DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=231109430398236148908368708013247645028, time:1750767954.5386887s req_ids:[8] +DEBUG 06-24 20:25:54 [manager.py:391] +ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:203.91607284545898ms total_cost_time:203.95755767822266ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11203 prompt_cache_len:5151 prompt_cache_ratio:0.4597875569044006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 +DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10821819305419922 s +INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11023163795471191 s +DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=77626396419253498207176511821291314283, time:1750767954.7493284s req_ids:[8] +DEBUG 06-24 20:25:54 [manager.py:391] +ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:208.12463760375977ms total_cost_time:208.16683769226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11204 prompt_cache_len:5151 prompt_cache_ratio:0.4597465191003213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 +DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10900020599365234 s +INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s +DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=23150143155328957043200738156119095887, time:1750767954.9646504s req_ids:[8] +DEBUG 06-24 20:25:54 [manager.py:391] +ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:211.8968963623047ms total_cost_time:211.94195747375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11205 prompt_cache_len:5151 prompt_cache_ratio:0.4597054886211513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 +DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:55 [manager.py:224] router recive req id 8 cost time 0.10993266105651855 s +INFO 06-24 20:25:55 [manager.py:68] detokenization recv req id 8 cost time 0.11190581321716309 s +DEBUG 06-24 20:25:55 [manager.py:391] Prefill Batch: batch_id=55767900246292417211155152748313203398, time:1750767955.1884246s req_ids:[8] +DEBUG 06-24 20:25:55 [manager.py:391] +ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:392.2703266143799ms total_cost_time:392.3149108886719ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11206 prompt_cache_len:5151 prompt_cache_ratio:0.4596644654649295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 +DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:55 [manager.py:224] router recive req id 8 cost time 0.10836172103881836 s +INFO 06-24 20:25:55 [manager.py:68] detokenization recv req id 8 cost time 0.11023950576782227 s +DEBUG 06-24 20:25:55 [manager.py:391] Prefill Batch: batch_id=164527572803977870745295268566261406795, time:1750767955.5812283s req_ids:[8] +DEBUG 06-24 20:25:55 [manager.py:391] +ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:210.73412895202637ms total_cost_time:210.77799797058105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11207 prompt_cache_len:5151 prompt_cache_ratio:0.4596234496296957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 +DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:55 [manager.py:224] router recive req id 8 cost time 0.10875511169433594 s +INFO 06-24 20:25:55 [manager.py:68] detokenization recv req id 8 cost time 0.11043071746826172 s +DEBUG 06-24 20:25:55 [manager.py:391] Prefill Batch: batch_id=249332543064611217522547137842540590165, time:1750767955.81096s req_ids:[8] +DEBUG 06-24 20:25:55 [manager.py:391] +ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:225.1737117767334ms total_cost_time:225.21710395812988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11208 prompt_cache_len:5151 prompt_cache_ratio:0.45958244111349034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 +DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10975861549377441 s +INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11146712303161621 s +DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=217816344161342361305824930591571254752, time:1750767956.02838s req_ids:[8] +DEBUG 06-24 20:25:56 [manager.py:391] +ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:207.55887031555176ms total_cost_time:207.60273933410645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11209 prompt_cache_len:5151 prompt_cache_ratio:0.45954143991435453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 +DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10924673080444336 s +INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11103963851928711 s +DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=123399736346916210884584408429838890547, time:1750767956.242102s req_ids:[8] +DEBUG 06-24 20:25:56 [manager.py:391] +DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:226.98593139648438ms total_cost_time:227.03099250793457ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11210 prompt_cache_len:5151 prompt_cache_ratio:0.4595004460303301 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 +DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s +INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.10961222648620605 s +DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=192619073638595655446233641955157924952, time:1750767956.4752457s req_ids:[8] +DEBUG 06-24 20:25:56 [manager.py:391] +ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:208.62746238708496ms total_cost_time:208.67061614990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11211 prompt_cache_len:5151 prompt_cache_ratio:0.4594594594594595 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 +DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10927915573120117 s +INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11108040809631348 s +DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=32951570599200842330775672443641526263, time:1750767956.6913674s req_ids:[8] +DEBUG 06-24 20:25:56 [manager.py:391] +ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:209.41853523254395ms total_cost_time:209.46049690246582ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11212 prompt_cache_len:5151 prompt_cache_ratio:0.45941848019978593 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 +DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10825395584106445 s +INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11007857322692871 s +DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=166118293358450063680863634343748821579, time:1750767956.9082463s req_ids:[8] +DEBUG 06-24 20:25:56 [manager.py:391] +ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:364.4275665283203ms total_cost_time:364.4838333129883ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:11213 prompt_cache_len:5151 prompt_cache_ratio:0.4593775082493534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 +DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.11087226867675781 s +INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.11270666122436523 s +DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=293739073556052417241431033732295422498, time:1750767957.2756562s req_ids:[8] +DEBUG 06-24 20:25:57 [manager.py:391] +ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:210.9227180480957ms total_cost_time:210.9668254852295ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11214 prompt_cache_len:5151 prompt_cache_ratio:0.4593365436062065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 +DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.10880589485168457 s +INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.11055517196655273 s +DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=52909281713978269269218247155557999196, time:1750767957.4925122s req_ids:[8] +DEBUG 06-24 20:25:57 [manager.py:391] +ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:199.72801208496094ms total_cost_time:199.77116584777832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11215 prompt_cache_len:5151 prompt_cache_ratio:0.45929558626839057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 +DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.10809707641601562 s +INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.10980749130249023 s +DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=319350523074503099941646626693374884664, time:1750767957.69783s req_ids:[8] +DEBUG 06-24 20:25:57 [manager.py:391] +ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:202.26502418518066ms total_cost_time:202.30770111083984ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11216 prompt_cache_len:5151 prompt_cache_ratio:0.4592546362339515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 +DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.10936689376831055 s +INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.11116886138916016 s +DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=22957089167045619743112453645183691208, time:1750767957.9077053s req_ids:[8] +DEBUG 06-24 20:25:57 [manager.py:391] +ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:206.3891887664795ms total_cost_time:206.43258094787598ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11217 prompt_cache_len:5151 prompt_cache_ratio:0.4592136935009361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 +DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.1083531379699707 s +INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.11003684997558594 s +DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=163067805529481262471084503223152821526, time:1750767958.1205783s req_ids:[8] +DEBUG 06-24 20:25:58 [manager.py:391] +ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:205.81698417663574ms total_cost_time:205.85989952087402ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11218 prompt_cache_len:5151 prompt_cache_ratio:0.4591727580673917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 +DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.10839676856994629 s +INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.11007428169250488 s +DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=39764052644217033536907195100089712839, time:1750767958.332651s req_ids:[8] +DEBUG 06-24 20:25:58 [manager.py:391] +ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:199.9807357788086ms total_cost_time:200.0260353088379ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11219 prompt_cache_len:5151 prompt_cache_ratio:0.4591318299313664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 +DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.20824027061462402 s +INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.2098243236541748 s +DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=299066448191850611521187492018277222152, time:1750767958.6729167s req_ids:[8] +DEBUG 06-24 20:25:58 [manager.py:391] +ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:315.0198459625244ms total_cost_time:315.0625228881836ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11220 prompt_cache_len:5151 prompt_cache_ratio:0.4590909090909091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 +DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.10952544212341309 s +INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.11131620407104492 s +DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=307380978469258506185536755668808576869, time:1750767958.8622751s req_ids:[8] +DEBUG 06-24 20:25:58 [manager.py:391] +ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:200.0105381011963ms total_cost_time:200.05416870117188ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11221 prompt_cache_len:5151 prompt_cache_ratio:0.45904999554406917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 +DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.1086118221282959 s +INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.1105804443359375 s +DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=158084384804882526376986441716304266200, time:1750767959.0707095s req_ids:[8] +DEBUG 06-24 20:25:59 [manager.py:391] +ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:208.160400390625ms total_cost_time:208.2047462463379ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11222 prompt_cache_len:5151 prompt_cache_ratio:0.45900908928889683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 +DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:25:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.1090390682220459 s +INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.11094546318054199 s +DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=301473186249150482031430453549109251318, time:1750767959.2845762s req_ids:[8] +DEBUG 06-24 20:25:59 [manager.py:391] +ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:205.2004337310791ms total_cost_time:205.2445411682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11223 prompt_cache_len:5151 prompt_cache_ratio:0.45896819032344294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 +DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.10946536064147949 s +INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.11149406433105469 s +DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=28504294760429664993602488854901057735, time:1750767959.495689s req_ids:[8] +DEBUG 06-24 20:25:59 [manager.py:391] +ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:206.23183250427246ms total_cost_time:206.27427101135254ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11224 prompt_cache_len:5151 prompt_cache_ratio:0.4589272986457591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 +DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.10875320434570312 s +INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.11065816879272461 s +DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=183686639091116065320504674348004302075, time:1750767959.7092662s req_ids:[8] +DEBUG 06-24 20:25:59 [manager.py:391] +ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:204.16688919067383ms total_cost_time:204.2231559753418ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:11225 prompt_cache_len:5151 prompt_cache_ratio:0.45888641425389753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 +DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:25:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.10824942588806152 s +INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101081371307373 s +DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=17229421081879705955672328151934889576, time:1750767959.9185324s req_ids:[8] +DEBUG 06-24 20:25:59 [manager.py:391] +ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:204.20312881469727ms total_cost_time:204.24699783325195ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11226 prompt_cache_len:5151 prompt_cache_ratio:0.45884553714591125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 +DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.10955500602722168 s +INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.11152005195617676 s +DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=31659782244698662635339579640156553230, time:1750767960.1293104s req_ids:[8] +DEBUG 06-24 20:26:00 [manager.py:391] +ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:207.0779800415039ms total_cost_time:207.20577239990234ms,out_token_counter:1 mean_per_token_cost_time: 0.1277923583984375ms prompt_token_num:11227 prompt_cache_len:5151 prompt_cache_ratio:0.4588046673198539 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 +DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.31215572357177734 s +INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.3131403923034668 s +DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=244888835574884074259595726065343259247, time:1750767960.554022s req_ids:[8] +DEBUG 06-24 20:26:00 [manager.py:391] +ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:388.5037899017334ms total_cost_time:388.5462284088135ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11228 prompt_cache_len:5151 prompt_cache_ratio:0.4587638047737798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 +DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s +INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.11017918586730957 s +DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=262334479289199117656458803378175858252, time:1750767960.7410038s req_ids:[8] +DEBUG 06-24 20:26:00 [manager.py:391] +DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:205.430269241333ms total_cost_time:205.4729461669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11229 prompt_cache_len:5151 prompt_cache_ratio:0.45872294950574405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 +DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.10819125175476074 s +INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.10941743850708008 s +DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=301890032422770017905165957991238444691, time:1750767960.9533818s req_ids:[8] +DEBUG 06-24 20:26:00 [manager.py:391] +ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:203.4003734588623ms total_cost_time:203.4444808959961ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11230 prompt_cache_len:5151 prompt_cache_ratio:0.45868210151380234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 +DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s +INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.10985302925109863 s +DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=305673189611192845774694390486587168902, time:1750767961.1626606s req_ids:[8] +DEBUG 06-24 20:26:01 [manager.py:391] +ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:196.35820388793945ms total_cost_time:196.40016555786133ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11231 prompt_cache_len:5151 prompt_cache_ratio:0.45864126079601103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 +DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10840606689453125 s +INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.1105949878692627 s +DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=174772808380784528423380686181676252198, time:1750767961.36501s req_ids:[8] +DEBUG 06-24 20:26:01 [manager.py:391] +ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:201.9329071044922ms total_cost_time:201.97439193725586ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11232 prompt_cache_len:5151 prompt_cache_ratio:0.45860042735042733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 +DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.11081147193908691 s +INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.11329960823059082 s +DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=272905921339039769932231050802913662488, time:1750767961.573702s req_ids:[8] +DEBUG 06-24 20:26:01 [manager.py:391] +ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:201.37739181518555ms total_cost_time:201.42078399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11233 prompt_cache_len:5151 prompt_cache_ratio:0.4585596011751091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 +DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10803723335266113 s +INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.11013126373291016 s +DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=114638664198897981987831844213670532265, time:1750767961.7821412s req_ids:[8] +DEBUG 06-24 20:26:01 [manager.py:391] +ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:205.20544052124023ms total_cost_time:205.26385307312012ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:11234 prompt_cache_len:5151 prompt_cache_ratio:0.45851878226811466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 +DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10880136489868164 s +INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.11089563369750977 s +DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=12365548228051085034294240206831260674, time:1750767961.9940774s req_ids:[8] +DEBUG 06-24 20:26:01 [manager.py:391] +ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:370.73755264282227ms total_cost_time:370.78070640563965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11235 prompt_cache_len:5151 prompt_cache_ratio:0.45847797062750334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 +DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:02 [manager.py:224] router recive req id 8 cost time 0.10906291007995605 s +INFO 06-24 20:26:02 [manager.py:68] detokenization recv req id 8 cost time 0.1109774112701416 s +DEBUG 06-24 20:26:02 [manager.py:391] Prefill Batch: batch_id=148676860203763872233086019394765005131, time:1750767962.370223s req_ids:[8] +DEBUG 06-24 20:26:02 [manager.py:391] +ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:212.47076988220215ms total_cost_time:212.51463890075684ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11236 prompt_cache_len:5151 prompt_cache_ratio:0.458437166251335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 +DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:02 [manager.py:224] router recive req id 8 cost time 0.10925436019897461 s +INFO 06-24 20:26:02 [manager.py:68] detokenization recv req id 8 cost time 0.11114811897277832 s +DEBUG 06-24 20:26:02 [manager.py:391] Prefill Batch: batch_id=221046936391536458802155584223302532467, time:1750767962.595492s req_ids:[8] +DEBUG 06-24 20:26:02 [manager.py:391] +ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:215.87252616882324ms total_cost_time:215.91567993164062ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11237 prompt_cache_len:5151 prompt_cache_ratio:0.4583963691376702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 +DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:02 [manager.py:224] router recive req id 8 cost time 0.10960984230041504 s +INFO 06-24 20:26:02 [manager.py:68] detokenization recv req id 8 cost time 0.11165714263916016 s +DEBUG 06-24 20:26:02 [manager.py:391] Prefill Batch: batch_id=242569650793657085624521258133671531599, time:1750767962.8130102s req_ids:[8] +DEBUG 06-24 20:26:02 [manager.py:391] +ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:212.3270034790039ms total_cost_time:212.3711109161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11238 prompt_cache_len:5151 prompt_cache_ratio:0.4583555792845702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 +DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.10921549797058105 s +INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.11115503311157227 s +DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=308993720512887489016386776826550996017, time:1750767963.0311773s req_ids:[8] +DEBUG 06-24 20:26:03 [manager.py:391] +ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:203.59396934509277ms total_cost_time:203.64022254943848ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11239 prompt_cache_len:5151 prompt_cache_ratio:0.458314796690097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 +DEBUG 06-24 20:26:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.10904431343078613 s +INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.1111452579498291 s +DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=99044722305033676493800813008332797978, time:1750767963.2415202s req_ids:[8] +DEBUG 06-24 20:26:03 [manager.py:391] +ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:211.12871170043945ms total_cost_time:211.17281913757324ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11240 prompt_cache_len:5151 prompt_cache_ratio:0.45827402135231315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 +DEBUG 06-24 20:26:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.10951042175292969 s +INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.11151885986328125 s +DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=55375619301397570329370247801168319216, time:1750767963.457796s req_ids:[8] +DEBUG 06-24 20:26:03 [manager.py:391] +ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:26:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 47032.278 tokens/s +DEBUG 06-24 20:26:03 [stats.py:37] Avg prompt tokens throughput: 47023.796 tokens/s +DEBUG 06-24 20:26:03 [stats.py:37] Avg generate tokens throughput: 8.482 tokens/s +INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:200.67381858825684ms total_cost_time:200.71649551391602ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11241 prompt_cache_len:5151 prompt_cache_ratio:0.4582332532692821 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 +DEBUG 06-24 20:26:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.3119161128997803 s +INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.31391477584838867 s +DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=153901134923433956679032741580141758220, time:1750767963.8687677s req_ids:[8] +DEBUG 06-24 20:26:03 [manager.py:391] +ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:415.91429710388184ms total_cost_time:415.9576892852783ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11242 prompt_cache_len:5151 prompt_cache_ratio:0.4581924924390678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 +DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10954427719116211 s +INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11155509948730469 s +DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=291187297143050862178532218198044812676, time:1750767964.0920463s req_ids:[8] +DEBUG 06-24 20:26:04 [manager.py:391] +ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:211.93981170654297ms total_cost_time:211.98534965515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11243 prompt_cache_len:5151 prompt_cache_ratio:0.4581517388597349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 +DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10998868942260742 s +INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11206722259521484 s +DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=270423514890344864862399757824728430659, time:1750767964.3119063s req_ids:[8] +DEBUG 06-24 20:26:04 [manager.py:391] +ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:212.27216720581055ms total_cost_time:212.31627464294434ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11244 prompt_cache_len:5151 prompt_cache_ratio:0.458110992529349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 +DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10653829574584961 s +INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.10851645469665527 s +DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=195776544748432086561086287990080593810, time:1750767964.5288057s req_ids:[8] +DEBUG 06-24 20:26:04 [manager.py:391] +ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:210.56723594665527ms total_cost_time:210.5886936187744ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:11245 prompt_cache_len:5151 prompt_cache_ratio:0.458070253445976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 +DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.1096639633178711 s +INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11151504516601562 s +DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=219274744085130042291280004643340980841, time:1750767964.7509363s req_ids:[8] +DEBUG 06-24 20:26:04 [manager.py:391] +ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:208.73188972473145ms total_cost_time:208.77504348754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11246 prompt_cache_len:5151 prompt_cache_ratio:0.45802952160768273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 +DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s +INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11030268669128418 s +DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=335649374393550114723614249435161696338, time:1750767964.9576435s req_ids:[8] +DEBUG 06-24 20:26:04 [manager.py:391] +ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:209.7790241241455ms total_cost_time:209.8255157470703ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11247 prompt_cache_len:5151 prompt_cache_ratio:0.4579887970125367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 +DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10883975028991699 s +INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.11080694198608398 s +DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=193489682365858112513942442325673003307, time:1750767965.1785867s req_ids:[8] +DEBUG 06-24 20:26:05 [manager.py:391] +ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:377.579927444458ms total_cost_time:377.6247501373291ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11248 prompt_cache_len:5151 prompt_cache_ratio:0.45794807965860596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 +DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10851073265075684 s +INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.10966610908508301 s +DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=147072921569340290482623928426686444997, time:1750767965.5567265s req_ids:[8] +DEBUG 06-24 20:26:05 [manager.py:391] +ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:206.50196075439453ms total_cost_time:206.54559135437012ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11249 prompt_cache_len:5151 prompt_cache_ratio:0.45790736954395944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 +DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10911059379577637 s +INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021947860717773 s +DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=338437693473026976019893062755860321127, time:1750767965.7717526s req_ids:[8] +DEBUG 06-24 20:26:05 [manager.py:391] +ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:207.83662796020508ms total_cost_time:207.88025856018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11250 prompt_cache_len:5151 prompt_cache_ratio:0.45786666666666664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 +DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10976719856262207 s +INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.11104297637939453 s +DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=160647839520372619481269839033520612532, time:1750767965.9868262s req_ids:[8] +DEBUG 06-24 20:26:05 [manager.py:391] +ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:205.50203323364258ms total_cost_time:205.54685592651367ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11251 prompt_cache_len:5151 prompt_cache_ratio:0.4578259710247978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 +DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10916495323181152 s +INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.11030387878417969 s +DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=126556329494024880215464756933356050355, time:1750767966.198195s req_ids:[8] +DEBUG 06-24 20:26:06 [manager.py:391] +ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:219.54798698425293ms total_cost_time:219.59209442138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11252 prompt_cache_len:5151 prompt_cache_ratio:0.45778528261642376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 +DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10930585861206055 s +INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.11048054695129395 s +DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=62187457971661103540520164136833608530, time:1750767966.4298642s req_ids:[8] +DEBUG 06-24 20:26:06 [manager.py:391] +ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:215.7280445098877ms total_cost_time:215.7738208770752ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11253 prompt_cache_len:5151 prompt_cache_ratio:0.4577446014396161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 +DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10944795608520508 s +INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.11074185371398926 s +DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=131838611028285146385984241394885899351, time:1750767966.64681s req_ids:[8] +DEBUG 06-24 20:26:06 [manager.py:391] +ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:208.62460136413574ms total_cost_time:208.66823196411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11254 prompt_cache_len:5151 prompt_cache_ratio:0.45770392749244715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 +DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10847711563110352 s +INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.10963821411132812 s +DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=241857908542651133902165598773542387523, time:1750767966.8622508s req_ids:[8] +DEBUG 06-24 20:26:06 [manager.py:391] +ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:206.4988613128662ms total_cost_time:206.5443992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11255 prompt_cache_len:5151 prompt_cache_ratio:0.4576632607729898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 +DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.10992908477783203 s +INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.11111664772033691 s +DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=138051315716398893646675880817545965873, time:1750767967.0732079s req_ids:[8] +DEBUG 06-24 20:26:07 [manager.py:391] +ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:202.2709846496582ms total_cost_time:202.3155689239502ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11256 prompt_cache_len:5151 prompt_cache_ratio:0.4576226012793177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 +DEBUG 06-24 20:26:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.10862278938293457 s +INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.10985612869262695 s +DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=1494732066993444599016582861791172614, time:1750767967.2836668s req_ids:[8] +DEBUG 06-24 20:26:07 [manager.py:391] +ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:379.406213760376ms total_cost_time:379.4515132904053ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11257 prompt_cache_len:5151 prompt_cache_ratio:0.4575819490095052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 +DEBUG 06-24 20:26:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.10995006561279297 s +INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.11110949516296387 s +DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=203125469333152460123102920036630721850, time:1750767967.668061s req_ids:[8] +DEBUG 06-24 20:26:07 [manager.py:391] +ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:213.69552612304688ms total_cost_time:213.73915672302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11258 prompt_cache_len:5151 prompt_cache_ratio:0.4575413039616273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 +DEBUG 06-24 20:26:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.1096796989440918 s +INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s +DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=217966987924815486190498413785445177438, time:1750767967.8890185s req_ids:[8] +DEBUG 06-24 20:26:07 [manager.py:391] +ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:204.84375953674316ms total_cost_time:204.88691329956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11259 prompt_cache_len:5151 prompt_cache_ratio:0.45750066613375967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 +DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10817575454711914 s +INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.1092677116394043 s +DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=308127273991097065733287733968035147704, time:1750767968.100468s req_ids:[8] +DEBUG 06-24 20:26:08 [manager.py:391] +DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:204.5130729675293ms total_cost_time:204.55574989318848ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11260 prompt_cache_len:5151 prompt_cache_ratio:0.4574600355239787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 +DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10820341110229492 s +INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.10946798324584961 s +DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=258179461002424836209556900699704093445, time:1750767968.312114s req_ids:[8] +DEBUG 06-24 20:26:08 [manager.py:391] +ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:205.31797409057617ms total_cost_time:205.36208152770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11261 prompt_cache_len:5151 prompt_cache_ratio:0.4574194121303614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 +DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10839557647705078 s +INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.10967707633972168 s +DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=63949427930367751873803710878523381977, time:1750767968.5304134s req_ids:[8] +DEBUG 06-24 20:26:08 [manager.py:391] +ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:219.02799606323242ms total_cost_time:219.0711498260498ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11262 prompt_cache_len:5151 prompt_cache_ratio:0.45737879595098563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 +DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10937762260437012 s +INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.11055612564086914 s +DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=56850275852242140050308302253577113672, time:1750767968.7629683s req_ids:[8] +DEBUG 06-24 20:26:08 [manager.py:391] +ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:225.83484649658203ms total_cost_time:225.87919235229492ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11263 prompt_cache_len:5151 prompt_cache_ratio:0.4573381869839297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 +DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10933399200439453 s +INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.11041665077209473 s +DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=34105963039754750066749136282018883461, time:1750767968.9821277s req_ids:[8] +DEBUG 06-24 20:26:08 [manager.py:391] +ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:377.7334690093994ms total_cost_time:377.7790069580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11264 prompt_cache_len:5151 prompt_cache_ratio:0.4572975852272727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 +DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:09 [manager.py:224] router recive req id 8 cost time 0.10851573944091797 s +INFO 06-24 20:26:09 [manager.py:68] detokenization recv req id 8 cost time 0.10964155197143555 s +DEBUG 06-24 20:26:09 [manager.py:391] Prefill Batch: batch_id=19095889817465579136343533300142380175, time:1750767969.3661919s req_ids:[8] +DEBUG 06-24 20:26:09 [manager.py:391] +ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:209.69605445861816ms total_cost_time:209.73825454711914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11265 prompt_cache_len:5151 prompt_cache_ratio:0.45725699067909453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 +DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:09 [manager.py:224] router recive req id 8 cost time 0.10933375358581543 s +INFO 06-24 20:26:09 [manager.py:68] detokenization recv req id 8 cost time 0.11044454574584961 s +DEBUG 06-24 20:26:09 [manager.py:391] Prefill Batch: batch_id=79274668974497416484897064565463450212, time:1750767969.5841417s req_ids:[8] +DEBUG 06-24 20:26:09 [manager.py:391] +ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:210.82210540771484ms total_cost_time:210.86549758911133ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11266 prompt_cache_len:5151 prompt_cache_ratio:0.4572164033374756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 +DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:09 [manager.py:224] router recive req id 8 cost time 0.10828256607055664 s +INFO 06-24 20:26:09 [manager.py:68] detokenization recv req id 8 cost time 0.10935091972351074 s +DEBUG 06-24 20:26:09 [manager.py:391] Prefill Batch: batch_id=260560438635004129331078285518322478479, time:1750767969.8007002s req_ids:[8] +DEBUG 06-24 20:26:09 [manager.py:391] +ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:204.13756370544434ms total_cost_time:204.18334007263184ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11267 prompt_cache_len:5151 prompt_cache_ratio:0.45717582320049704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 +DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10800862312316895 s +INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.10894465446472168 s +DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=251752899469460670908613343792427179307, time:1750767970.0219915s req_ids:[8] +DEBUG 06-24 20:26:10 [manager.py:391] +ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:180.8645725250244ms total_cost_time:180.91845512390137ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:11268 prompt_cache_len:5151 prompt_cache_ratio:0.4571352502662407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 +DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10878109931945801 s +INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.10980987548828125 s +DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=209597318491448379436580926226140439772, time:1750767970.1973817s req_ids:[8] +DEBUG 06-24 20:26:10 [manager.py:391] +ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:198.90213012695312ms total_cost_time:198.9455223083496ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11269 prompt_cache_len:5151 prompt_cache_ratio:0.45709468453278906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 +DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10851001739501953 s +INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.10958385467529297 s +DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=161346480565352301828588657394768184066, time:1750767970.400968s req_ids:[8] +DEBUG 06-24 20:26:10 [manager.py:391] +ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:201.36570930480957ms total_cost_time:201.41148567199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11270 prompt_cache_len:5151 prompt_cache_ratio:0.45705412599822537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 +DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10963797569274902 s +INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.11083054542541504 s +DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=331527834065189892939502591772632032253, time:1750767970.6090841s req_ids:[8] +DEBUG 06-24 20:26:10 [manager.py:391] +ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:372.2808361053467ms total_cost_time:372.32398986816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11271 prompt_cache_len:5151 prompt_cache_ratio:0.45701357466063347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 +DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.1088407039642334 s +INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.11051726341247559 s +DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=276671189589423619105974982118657083169, time:1750767970.9888742s req_ids:[8] +DEBUG 06-24 20:26:10 [manager.py:391] +ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:207.34477043151855ms total_cost_time:207.3655128479004ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:11272 prompt_cache_len:5151 prompt_cache_ratio:0.45697303051809796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 +DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10885500907897949 s +INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.11050128936767578 s +DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=144061461213553357965332449036236461709, time:1750767971.203853s req_ids:[8] +DEBUG 06-24 20:26:11 [manager.py:391] +ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:210.81066131591797ms total_cost_time:210.85739135742188ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11273 prompt_cache_len:5151 prompt_cache_ratio:0.45693249356870397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 +DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10847926139831543 s +INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.11026525497436523 s +DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=163339919744459332966536289086932056875, time:1750767971.420313s req_ids:[8] +DEBUG 06-24 20:26:11 [manager.py:391] +ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:210.0660800933838ms total_cost_time:210.1123332977295ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11274 prompt_cache_len:5151 prompt_cache_ratio:0.45689196381053754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 +DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10905981063842773 s +INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.1107020378112793 s +DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=185837218332341883511079298510321949953, time:1750767971.649145s req_ids:[8] +DEBUG 06-24 20:26:11 [manager.py:391] +ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:231.27174377441406ms total_cost_time:231.31632804870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11275 prompt_cache_len:5151 prompt_cache_ratio:0.45685144124168514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 +DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10891318321228027 s +INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.11055874824523926 s +DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=86824818261893391895231446447607193171, time:1750767971.877605s req_ids:[8] +DEBUG 06-24 20:26:11 [manager.py:391] +ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:206.62379264831543ms total_cost_time:206.66837692260742ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11276 prompt_cache_len:5151 prompt_cache_ratio:0.45681092586023414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 +DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:11 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.10988378524780273 s +INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.11172008514404297 s +DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=308306770146295562239145635502111898243, time:1750767972.0860767s req_ids:[8] +DEBUG 06-24 20:26:12 [manager.py:391] +ERROR 06-24 20:26:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:207.23247528076172ms total_cost_time:207.2761058807373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11277 prompt_cache_len:5151 prompt_cache_ratio:0.4567704176642724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 +DEBUG 06-24 20:26:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.30823588371276855 s +INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.3101463317871094 s +DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=125344495659288145192733369506081803725, time:1750767972.5095296s req_ids:[8] +DEBUG 06-24 20:26:12 [manager.py:391] +ERROR 06-24 20:26:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 first_token_cost:422.97816276550293ms total_cost_time:423.0232238769531ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11278 prompt_cache_len:5151 prompt_cache_ratio:0.4567299166518886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 +DEBUG 06-24 20:26:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.10869956016540527 s +INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.11038422584533691 s +DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=160738129914993255726466849061252261033, time:1750767972.731867s req_ids:[8] +DEBUG 06-24 20:26:12 [manager.py:391] +ERROR 06-24 20:26:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 first_token_cost:216.24112129211426ms total_cost_time:216.28642082214355ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11279 prompt_cache_len:5151 prompt_cache_ratio:0.4566894228211721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 +DEBUG 06-24 20:26:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:12 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.10871553421020508 s +INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.1105961799621582 s +DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=185304419832789553591628579434099127523, time:1750767972.9518573s req_ids:[8] +DEBUG 06-24 20:26:12 [manager.py:391] +ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 first_token_cost:207.3228359222412ms total_cost_time:207.3667049407959ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11280 prompt_cache_len:5151 prompt_cache_ratio:0.4566489361702128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 +DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10724878311157227 s +INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.10888242721557617 s +DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=186264232746282001577870847187809657723, time:1750767973.1643958s req_ids:[8] +DEBUG 06-24 20:26:13 [manager.py:391] +ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:202.2378444671631ms total_cost_time:202.29077339172363ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:11281 prompt_cache_len:5151 prompt_cache_ratio:0.4566084566971013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 +DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10848832130432129 s +INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11010956764221191 s +DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=322860355188726963150795917259081146176, time:1750767973.3744392s req_ids:[8] +DEBUG 06-24 20:26:13 [manager.py:391] +ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:206.8345546722412ms total_cost_time:206.8803310394287ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11282 prompt_cache_len:5151 prompt_cache_ratio:0.45656798439992907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 +DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10892033576965332 s +INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11047625541687012 s +DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=118125867046129939159039507677707706969, time:1750767973.5999134s req_ids:[8] +DEBUG 06-24 20:26:13 [manager.py:391] +DEBUG 06-24 20:26:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 47047.488 tokens/s +DEBUG 06-24 20:26:13 [stats.py:37] Avg prompt tokens throughput: 47039.234 tokens/s +DEBUG 06-24 20:26:13 [stats.py:37] Avg generate tokens throughput: 8.254 tokens/s +ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:181.84471130371094ms total_cost_time:181.88881874084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11283 prompt_cache_len:5151 prompt_cache_ratio:0.4565275192767881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 +DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s +INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11078047752380371 s +DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=112873597615610054254562740479559641027, time:1750767973.7802098s req_ids:[8] +DEBUG 06-24 20:26:13 [manager.py:391] +ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:206.39824867248535ms total_cost_time:206.44426345825195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11284 prompt_cache_len:5151 prompt_cache_ratio:0.456487061325771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 +DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:13 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.1096494197845459 s +INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11161208152770996 s +DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=205180190037667318600096996415815111178, time:1750767973.9948084s req_ids:[8] +DEBUG 06-24 20:26:13 [manager.py:391] +ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:215.37494659423828ms total_cost_time:215.41905403137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11285 prompt_cache_len:5151 prompt_cache_ratio:0.4564466105449712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 +DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:14 [manager.py:224] router recive req id 8 cost time 0.3105800151824951 s +INFO 06-24 20:26:14 [manager.py:68] detokenization recv req id 8 cost time 0.3125650882720947 s +DEBUG 06-24 20:26:14 [manager.py:391] Prefill Batch: batch_id=137810114838088679488165386796430432505, time:1750767974.4229722s req_ids:[8] +DEBUG 06-24 20:26:14 [manager.py:391] +ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:427.6111125946045ms total_cost_time:427.6549816131592ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11286 prompt_cache_len:5151 prompt_cache_ratio:0.4564061669324827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 +DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:14 [manager.py:224] router recive req id 8 cost time 0.10907411575317383 s +INFO 06-24 20:26:14 [manager.py:68] detokenization recv req id 8 cost time 0.11088013648986816 s +DEBUG 06-24 20:26:14 [manager.py:391] Prefill Batch: batch_id=297796101534190454609186611644242683749, time:1750767974.6524305s req_ids:[8] +DEBUG 06-24 20:26:14 [manager.py:391] +ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:212.158203125ms total_cost_time:212.2037410736084ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11287 prompt_cache_len:5151 prompt_cache_ratio:0.4563657304864003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 +DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:14 [manager.py:224] router recive req id 8 cost time 0.10953116416931152 s +INFO 06-24 20:26:14 [manager.py:68] detokenization recv req id 8 cost time 0.11121964454650879 s +DEBUG 06-24 20:26:14 [manager.py:391] Prefill Batch: batch_id=72693179056495963781051038105227078731, time:1750767974.8650627s req_ids:[8] +DEBUG 06-24 20:26:14 [manager.py:391] +ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:204.4229507446289ms total_cost_time:204.46467399597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11288 prompt_cache_len:5151 prompt_cache_ratio:0.4563253012048193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 +DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:14 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10900068283081055 s +INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.11018228530883789 s +DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=229281614202594749884833054060028505629, time:1750767975.0783257s req_ids:[8] +DEBUG 06-24 20:26:15 [manager.py:391] +ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:208.88590812683105ms total_cost_time:208.93073081970215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11289 prompt_cache_len:5151 prompt_cache_ratio:0.45628487908583576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 +DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10843944549560547 s +INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.10967159271240234 s +DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=7252026405452961440475041215343050690, time:1750767975.2889245s req_ids:[8] +DEBUG 06-24 20:26:15 [manager.py:391] +ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:205.62291145324707ms total_cost_time:205.66630363464355ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11290 prompt_cache_len:5151 prompt_cache_ratio:0.4562444641275465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 +DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10802292823791504 s +INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.10920166969299316 s +DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=292121671372678296948743167208940814162, time:1750767975.501563s req_ids:[8] +DEBUG 06-24 20:26:15 [manager.py:391] +ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:215.78693389892578ms total_cost_time:215.83080291748047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11291 prompt_cache_len:5151 prompt_cache_ratio:0.4562040563280489 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 +DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s +INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.1106269359588623 s +DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=167126543321187815025510790059819837797, time:1750767975.7346318s req_ids:[8] +DEBUG 06-24 20:26:15 [manager.py:391] +ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:217.84520149230957ms total_cost_time:217.89002418518066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11292 prompt_cache_len:5151 prompt_cache_ratio:0.45616365568544104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 +DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:15 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10907411575317383 s +INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.1101067066192627 s +DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=286776391979479844886627327610146560863, time:1750767975.9701903s req_ids:[8] +DEBUG 06-24 20:26:15 [manager.py:391] +ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:402.7695655822754ms total_cost_time:402.8127193450928ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11293 prompt_cache_len:5151 prompt_cache_ratio:0.4561232621978217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 +DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:16 [manager.py:224] router recive req id 8 cost time 0.10854601860046387 s +INFO 06-24 20:26:16 [manager.py:68] detokenization recv req id 8 cost time 0.10978102684020996 s +DEBUG 06-24 20:26:16 [manager.py:391] Prefill Batch: batch_id=252107103326517643006759469335937164295, time:1750767976.358605s req_ids:[8] +DEBUG 06-24 20:26:16 [manager.py:391] +ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:217.11373329162598ms total_cost_time:217.15593338012695ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11294 prompt_cache_len:5151 prompt_cache_ratio:0.4560828758632902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 +DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:16 [manager.py:224] router recive req id 8 cost time 0.10895252227783203 s +INFO 06-24 20:26:16 [manager.py:68] detokenization recv req id 8 cost time 0.11017966270446777 s +DEBUG 06-24 20:26:16 [manager.py:391] Prefill Batch: batch_id=170531223112125849082843449353061117680, time:1750767976.5894241s req_ids:[8] +DEBUG 06-24 20:26:16 [manager.py:391] +ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:214.44225311279297ms total_cost_time:214.48445320129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11295 prompt_cache_len:5151 prompt_cache_ratio:0.4560424966799469 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 +DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:16 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:16 [manager.py:224] router recive req id 8 cost time 0.108917236328125 s +INFO 06-24 20:26:16 [manager.py:68] detokenization recv req id 8 cost time 0.11008596420288086 s +DEBUG 06-24 20:26:16 [manager.py:391] Prefill Batch: batch_id=314118861202967707367364989739656188126, time:1750767976.802385s req_ids:[8] +DEBUG 06-24 20:26:16 [manager.py:391] +ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:207.01932907104492ms total_cost_time:207.0610523223877ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11296 prompt_cache_len:5151 prompt_cache_ratio:0.45600212464589235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 +DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:16 [batch.py:51] router release req id 8 +INFO 06-24 20:26:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.10770916938781738 s +INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.10885000228881836 s +DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=178327848448299990497374716595544087587, time:1750767977.0161476s req_ids:[8] +DEBUG 06-24 20:26:17 [manager.py:391] +DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +INFO 06-24 20:26:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:203.66978645324707ms total_cost_time:203.71317863464355ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11297 prompt_cache_len:5151 prompt_cache_ratio:0.4559617597592281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 +DEBUG 06-24 20:26:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.10938239097595215 s +INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.11040425300598145 s +DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=43316320791307593159388745178545435379, time:1750767977.2269416s req_ids:[8] +DEBUG 06-24 20:26:17 [manager.py:391] +ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:208.4674835205078ms total_cost_time:208.5113525390625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11298 prompt_cache_len:5151 prompt_cache_ratio:0.4559214020180563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 +DEBUG 06-24 20:26:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.10912966728210449 s +INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.11030125617980957 s +DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=127887462431862415738984607234380205949, time:1750767977.4425018s req_ids:[8] +DEBUG 06-24 20:26:17 [manager.py:391] +ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:202.9898166656494ms total_cost_time:203.0341625213623ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11299 prompt_cache_len:5151 prompt_cache_ratio:0.4558810514204797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 +DEBUG 06-24 20:26:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:17 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.31160950660705566 s +INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.3129100799560547 s +DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=228611041444497815714782792390180076051, time:1750767977.865634s req_ids:[8] +DEBUG 06-24 20:26:17 [manager.py:391] +ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:431.5640926361084ms total_cost_time:431.6103458404541ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11300 prompt_cache_len:5151 prompt_cache_ratio:0.45584070796460174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 +DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.1097419261932373 s +INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.11102819442749023 s +DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=324781071075089926809079762976872438874, time:1750767978.0904195s req_ids:[8] +DEBUG 06-24 20:26:18 [manager.py:391] +ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:227.88715362548828ms total_cost_time:227.93269157409668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11301 prompt_cache_len:5151 prompt_cache_ratio:0.45580037164852666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 +DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10910916328430176 s +INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.11031055450439453 s +DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=217889621557458946267952590344456185602, time:1750767978.3285296s req_ids:[8] +DEBUG 06-24 20:26:18 [manager.py:391] +ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:219.17200088500977ms total_cost_time:219.21491622924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11302 prompt_cache_len:5151 prompt_cache_ratio:0.45576004247035923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 +DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10755014419555664 s +INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.10865950584411621 s +DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=14910185193590493480396849653693461633, time:1750767978.571229s req_ids:[8] +DEBUG 06-24 20:26:18 [manager.py:391] +DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:188.71283531188965ms total_cost_time:188.75551223754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11303 prompt_cache_len:5151 prompt_cache_ratio:0.4557197204282049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 +DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10828781127929688 s +INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.10927796363830566 s +DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=79820120496242415191842462846185701598, time:1750767978.7425067s req_ids:[8] +DEBUG 06-24 20:26:18 [manager.py:391] +ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:166.65172576904297ms total_cost_time:166.69321060180664ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11304 prompt_cache_len:5151 prompt_cache_ratio:0.45567940552016983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 +DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:18 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10799193382263184 s +INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.109039306640625 s +DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=179704393806689913170615001582257983838, time:1750767978.916534s req_ids:[8] +DEBUG 06-24 20:26:18 [manager.py:391] +ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:167.58203506469727ms total_cost_time:167.62447357177734ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11305 prompt_cache_len:5151 prompt_cache_ratio:0.4556390977443609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 +DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.10766983032226562 s +INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.10878896713256836 s +DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=241508012542157698679710058487985212370, time:1750767979.0940008s req_ids:[8] +DEBUG 06-24 20:26:19 [manager.py:391] +ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:371.7687129974365ms total_cost_time:371.8135356903076ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11306 prompt_cache_len:5151 prompt_cache_ratio:0.45559879709888557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 +DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.1110987663269043 s +INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.1122431755065918 s +DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=214429699826335761947874168521675479824, time:1750767979.467753s req_ids:[8] +DEBUG 06-24 20:26:19 [manager.py:391] +ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 first_token_cost:219.11954879760742ms total_cost_time:219.1624641418457ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11307 prompt_cache_len:5151 prompt_cache_ratio:0.45555850358185196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 +DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.10937237739562988 s +INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.11053276062011719 s +DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=91671198261330310031839677121591964614, time:1750767979.7045693s req_ids:[8] +DEBUG 06-24 20:26:19 [manager.py:391] +ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 first_token_cost:215.30914306640625ms total_cost_time:215.35205841064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11308 prompt_cache_len:5151 prompt_cache_ratio:0.4555182171913689 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 +DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:19 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.10691666603088379 s +INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.10784244537353516 s +DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=251071743910972663839529809746892314462, time:1750767979.9208558s req_ids:[8] +DEBUG 06-24 20:26:19 [manager.py:391] +ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 first_token_cost:196.05255126953125ms total_cost_time:196.09713554382324ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11309 prompt_cache_len:5151 prompt_cache_ratio:0.45547793792554603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 +DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10841155052185059 s +INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.11031150817871094 s +DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=40930484799322893938813653789957674048, time:1750767980.11918s req_ids:[8] +DEBUG 06-24 20:26:20 [manager.py:391] +ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:202.41308212280273ms total_cost_time:202.45718955993652ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11310 prompt_cache_len:5151 prompt_cache_ratio:0.4554376657824934 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 +DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10751128196716309 s +INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.1099696159362793 s +DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=161011448909021956858177146153711865943, time:1750767980.3309908s req_ids:[8] +DEBUG 06-24 20:26:20 [manager.py:391] +ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:203.23872566223145ms total_cost_time:203.28259468078613ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11311 prompt_cache_len:5151 prompt_cache_ratio:0.4553974007603218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 +DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:20 [batch.py:51] router release req id 8 +INFO 06-24 20:26:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10833954811096191 s +INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.1103668212890625 s +DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=327643006633026172537630322673166151125, time:1750767980.5408645s req_ids:[8] +DEBUG 06-24 20:26:20 [manager.py:391] +ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:209.3367576599121ms total_cost_time:209.3803882598877ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11312 prompt_cache_len:5151 prompt_cache_ratio:0.45535714285714285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 +DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10852575302124023 s +INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.11081743240356445 s +DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=140703759801477550523870686876265536940, time:1750767980.7657156s req_ids:[8] +DEBUG 06-24 20:26:20 [manager.py:391] +ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:221.77720069885254ms total_cost_time:221.82106971740723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11313 prompt_cache_len:5151 prompt_cache_ratio:0.4553168920710687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 +DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:20 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10939669609069824 s +INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.11151862144470215 s +DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=200654988338487005953524189509481411246, time:1750767980.98896s req_ids:[8] +DEBUG 06-24 20:26:20 [manager.py:391] +ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:214.40863609313965ms total_cost_time:214.45226669311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11314 prompt_cache_len:5151 prompt_cache_ratio:0.45527664840021215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 +DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10774922370910645 s +INFO 06-24 20:26:21 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s +DEBUG 06-24 20:26:21 [manager.py:391] Prefill Batch: batch_id=337308567436354901208803084212329339179, time:1750767981.2054055s req_ids:[8] +DEBUG 06-24 20:26:21 [manager.py:391] +ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:367.5389289855957ms total_cost_time:367.5847053527832ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11315 prompt_cache_len:5151 prompt_cache_ratio:0.4552364118426867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 +DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10843276977539062 s +INFO 06-24 20:26:21 [manager.py:68] detokenization recv req id 8 cost time 0.11056399345397949 s +DEBUG 06-24 20:26:21 [manager.py:391] Prefill Batch: batch_id=105221000426500780698965551045841528316, time:1750767981.5778193s req_ids:[8] +DEBUG 06-24 20:26:21 [manager.py:391] +ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:205.8546543121338ms total_cost_time:205.89756965637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11316 prompt_cache_len:5151 prompt_cache_ratio:0.4551961823966066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 +DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10951471328735352 s +INFO 06-24 20:26:21 [manager.py:68] detokenization recv req id 8 cost time 0.11151766777038574 s +DEBUG 06-24 20:26:21 [manager.py:391] Prefill Batch: batch_id=36465977988968600892308478293091944700, time:1750767981.791205s req_ids:[8] +DEBUG 06-24 20:26:21 [manager.py:391] +ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:206.9544792175293ms total_cost_time:206.9993019104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11317 prompt_cache_len:5151 prompt_cache_ratio:0.4551559600600866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 +DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:21 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10930657386779785 s +INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11141824722290039 s +DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=130269794980461272780526004639887080436, time:1750767982.0038283s req_ids:[8] +DEBUG 06-24 20:26:22 [manager.py:391] +ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:207.12590217590332ms total_cost_time:207.1690559387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11318 prompt_cache_len:5151 prompt_cache_ratio:0.4551157448312423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 +DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10969305038452148 s +INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11173176765441895 s +DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=202768348669964710432896394972755337439, time:1750767982.2158418s req_ids:[8] +DEBUG 06-24 20:26:22 [manager.py:391] +ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:206.6037654876709ms total_cost_time:206.6478729248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11319 prompt_cache_len:5151 prompt_cache_ratio:0.4550755367081898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 +DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10843372344970703 s +INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11035585403442383 s +DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=312177278267008731944846057538717953174, time:1750767982.430606s req_ids:[8] +DEBUG 06-24 20:26:22 [manager.py:391] +ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:207.98230171203613ms total_cost_time:208.02807807922363ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11320 prompt_cache_len:5151 prompt_cache_ratio:0.4550353356890459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 +DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10565781593322754 s +INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.10756993293762207 s +DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=60917728067241092268470813058031508247, time:1750767982.652749s req_ids:[8] +DEBUG 06-24 20:26:22 [manager.py:391] +ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:218.47152709960938ms total_cost_time:218.53184700012207ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:11321 prompt_cache_len:5151 prompt_cache_ratio:0.4549951417719283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 +DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:22 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10898494720458984 s +INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11101698875427246 s +DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=82432998254571733186314519522606324271, time:1750767982.868839s req_ids:[8] +DEBUG 06-24 20:26:22 [manager.py:391] +ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:369.2936897277832ms total_cost_time:369.33135986328125ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:11322 prompt_cache_len:5151 prompt_cache_ratio:0.45495495495495497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 +DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:23 [batch.py:51] router release req id 8 +INFO 06-24 20:26:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.10802960395812988 s +INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.10998368263244629 s +DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=9695969472066297818038024760146791449, time:1750767983.2446148s req_ids:[8] +DEBUG 06-24 20:26:23 [manager.py:391] +ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:203.31573486328125ms total_cost_time:203.35984230041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11323 prompt_cache_len:5151 prompt_cache_ratio:0.4549147752362448 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 +DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.10913705825805664 s +INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.11101198196411133 s +DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=304131336952290925737796181334933443959, time:1750767983.4543283s req_ids:[8] +DEBUG 06-24 20:26:23 [manager.py:391] +ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:209.04850959777832ms total_cost_time:209.09476280212402ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11324 prompt_cache_len:5151 prompt_cache_ratio:0.4548746026139173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 +DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.1082909107208252 s +INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.11040592193603516 s +DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=205178083546841053321471601687807998437, time:1750767983.669755s req_ids:[8] +DEBUG 06-24 20:26:23 [manager.py:391] +DEBUG 06-24 20:26:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 47155.767 tokens/s +DEBUG 06-24 20:26:23 [stats.py:37] Avg prompt tokens throughput: 47147.426 tokens/s +DEBUG 06-24 20:26:23 [stats.py:37] Avg generate tokens throughput: 8.341 tokens/s +ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:206.8483829498291ms total_cost_time:206.89058303833008ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11325 prompt_cache_len:5151 prompt_cache_ratio:0.4548344370860927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 +DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:23 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.10956335067749023 s +INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.1114048957824707 s +DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=260175145077191704055382274588877738234, time:1750767983.8822963s req_ids:[8] +DEBUG 06-24 20:26:23 [manager.py:391] +ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:208.53018760681152ms total_cost_time:208.573579788208ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11326 prompt_cache_len:5151 prompt_cache_ratio:0.4547942786508918 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 +DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.1092689037322998 s +INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.11125850677490234 s +DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=39682747746281319206905361057939582000, time:1750767984.0996873s req_ids:[8] +DEBUG 06-24 20:26:24 [manager.py:391] +ERROR 06-24 20:26:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:210.52813529968262ms total_cost_time:210.5724811553955ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11327 prompt_cache_len:5151 prompt_cache_ratio:0.45475412730643594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 +DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.10861968994140625 s +INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.11048293113708496 s +DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=311340011915937694398643413874466027315, time:1750767984.312459s req_ids:[8] +DEBUG 06-24 20:26:24 [manager.py:391] +DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 first_token_cost:206.7086696624756ms total_cost_time:206.75230026245117ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11328 prompt_cache_len:5151 prompt_cache_ratio:0.4547139830508475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 +DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.10832738876342773 s +INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.1101679801940918 s +DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=309287973592991387652661829860412963241, time:1750767984.5274093s req_ids:[8] +DEBUG 06-24 20:26:24 [manager.py:391] +ERROR 06-24 20:26:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 first_token_cost:377.579927444458ms total_cost_time:377.6240348815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11329 prompt_cache_len:5151 prompt_cache_ratio:0.4546738458822491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 +DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:24 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.10851454734802246 s +INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.11035871505737305 s +DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=27603626724685253889428863651976169081, time:1750767984.9116282s req_ids:[8] +DEBUG 06-24 20:26:24 [manager.py:391] +ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 first_token_cost:209.5491886138916ms total_cost_time:209.59234237670898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11330 prompt_cache_len:5151 prompt_cache_ratio:0.4546337157987643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 +DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.10896658897399902 s +INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s +DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=34178364314481396363755139553507762015, time:1750767985.1284096s req_ids:[8] +DEBUG 06-24 20:26:25 [manager.py:391] +ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:204.63204383850098ms total_cost_time:204.67615127563477ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11331 prompt_cache_len:5151 prompt_cache_ratio:0.45459359279851735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 +DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.1092534065246582 s +INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11097335815429688 s +DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=249296467447821938308198099825357915319, time:1750767985.3384955s req_ids:[8] +DEBUG 06-24 20:26:25 [manager.py:391] +ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:204.25701141357422ms total_cost_time:204.3163776397705ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:11332 prompt_cache_len:5151 prompt_cache_ratio:0.4545534768796329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 +DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.10921812057495117 s +INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11102747917175293 s +DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=78316491577682309637965559131496201583, time:1750767985.5491347s req_ids:[8] +DEBUG 06-24 20:26:25 [manager.py:391] +ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:205.62386512756348ms total_cost_time:205.66749572753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11333 prompt_cache_len:5151 prompt_cache_ratio:0.45451336804023645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 +DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.10871505737304688 s +INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11046481132507324 s +DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=122065705189992464653415244186399711763, time:1750767985.7599568s req_ids:[8] +DEBUG 06-24 20:26:25 [manager.py:391] +ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:209.92088317871094ms total_cost_time:209.96546745300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11334 prompt_cache_len:5151 prompt_cache_ratio:0.4544732662784542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 +DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:25 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.1089777946472168 s +INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11072158813476562 s +DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=143003878709639294044029488484888653732, time:1750767985.9822032s req_ids:[8] +DEBUG 06-24 20:26:25 [manager.py:391] +ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:214.02597427368164ms total_cost_time:214.0491008758545ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:11335 prompt_cache_len:5151 prompt_cache_ratio:0.45443317159241287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 +DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:26 [manager.py:224] router recive req id 8 cost time 0.31182003021240234 s +INFO 06-24 20:26:26 [manager.py:68] detokenization recv req id 8 cost time 0.31377577781677246 s +DEBUG 06-24 20:26:26 [manager.py:391] Prefill Batch: batch_id=32236844390206068601112508178367091998, time:1750767986.40213s req_ids:[8] +DEBUG 06-24 20:26:26 [manager.py:391] +ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:423.19512367248535ms total_cost_time:423.23994636535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11336 prompt_cache_len:5151 prompt_cache_ratio:0.4543930839802399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 +DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:26 [manager.py:224] router recive req id 8 cost time 0.1087794303894043 s +INFO 06-24 20:26:26 [manager.py:68] detokenization recv req id 8 cost time 0.11055564880371094 s +DEBUG 06-24 20:26:26 [manager.py:391] Prefill Batch: batch_id=169410883395063664411798780232010256983, time:1750767986.6263075s req_ids:[8] +DEBUG 06-24 20:26:26 [manager.py:391] +ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:212.70370483398438ms total_cost_time:212.74995803833008ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11337 prompt_cache_len:5151 prompt_cache_ratio:0.4543530034400635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 +DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:26 [manager.py:224] router recive req id 8 cost time 0.10803914070129395 s +INFO 06-24 20:26:26 [manager.py:68] detokenization recv req id 8 cost time 0.10982060432434082 s +DEBUG 06-24 20:26:26 [manager.py:391] Prefill Batch: batch_id=216687525975914802886077759414369259106, time:1750767986.8498383s req_ids:[8] +DEBUG 06-24 20:26:26 [manager.py:391] +ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:217.75555610656738ms total_cost_time:217.79942512512207ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11338 prompt_cache_len:5151 prompt_cache_ratio:0.4543129299700123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 +DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:26 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10854077339172363 s +INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020398139953613 s +DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=338129568213209360859016629128822091365, time:1750767987.0666065s req_ids:[8] +DEBUG 06-24 20:26:27 [manager.py:391] +ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:209.5339298248291ms total_cost_time:209.578275680542ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11339 prompt_cache_len:5151 prompt_cache_ratio:0.4542728635682159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 +DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.1084892749786377 s +INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.1103816032409668 s +DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=171116618902568112251584663188528449727, time:1750767987.2832882s req_ids:[8] +DEBUG 06-24 20:26:27 [manager.py:391] +ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:210.7853889465332ms total_cost_time:210.82758903503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11340 prompt_cache_len:5151 prompt_cache_ratio:0.4542328042328042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 +DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10910153388977051 s +INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.11072969436645508 s +DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=334730239873310975360380266584526570352, time:1750767987.501249s req_ids:[8] +DEBUG 06-24 20:26:27 [manager.py:391] +ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:205.5039405822754ms total_cost_time:205.52492141723633ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11341 prompt_cache_len:5151 prompt_cache_ratio:0.4541927519619081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 +DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10885810852050781 s +INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.11057686805725098 s +DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=298943297872181446800450271937263463798, time:1750767987.7214313s req_ids:[8] +DEBUG 06-24 20:26:27 [manager.py:391] +ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:218.65200996398926ms total_cost_time:218.69564056396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11342 prompt_cache_len:5151 prompt_cache_ratio:0.454152706753659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 +DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:27 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10837554931640625 s +INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.1101832389831543 s +DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=196405579000194876979895677647670296953, time:1750767987.93744s req_ids:[8] +DEBUG 06-24 20:26:27 [manager.py:391] +ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:211.57217025756836ms total_cost_time:211.61508560180664ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11343 prompt_cache_len:5151 prompt_cache_ratio:0.4541126686061888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 +DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:28 [manager.py:224] router recive req id 8 cost time 0.311542272567749 s +INFO 06-24 20:26:28 [manager.py:68] detokenization recv req id 8 cost time 0.31353259086608887 s +DEBUG 06-24 20:26:28 [manager.py:391] Prefill Batch: batch_id=248457901287186131751066140030955032277, time:1750767988.3580234s req_ids:[8] +DEBUG 06-24 20:26:28 [manager.py:391] +ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:421.7522144317627ms total_cost_time:421.7956066131592ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11344 prompt_cache_len:5151 prompt_cache_ratio:0.45407263751763044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 +DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:28 [manager.py:224] router recive req id 8 cost time 0.10861063003540039 s +INFO 06-24 20:26:28 [manager.py:68] detokenization recv req id 8 cost time 0.11063790321350098 s +DEBUG 06-24 20:26:28 [manager.py:391] Prefill Batch: batch_id=259658653019970542523968057709153410302, time:1750767988.5809s req_ids:[8] +DEBUG 06-24 20:26:28 [manager.py:391] +ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:205.8999538421631ms total_cost_time:205.92808723449707ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:11345 prompt_cache_len:5151 prompt_cache_ratio:0.45403261348611723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 +DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:28 [manager.py:224] router recive req id 8 cost time 0.10797905921936035 s +INFO 06-24 20:26:28 [manager.py:68] detokenization recv req id 8 cost time 0.10991764068603516 s +DEBUG 06-24 20:26:28 [manager.py:391] Prefill Batch: batch_id=222440150659913816449069914640604637324, time:1750767988.7921424s req_ids:[8] +DEBUG 06-24 20:26:28 [manager.py:391] +ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:211.35330200195312ms total_cost_time:211.3964557647705ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11346 prompt_cache_len:5151 prompt_cache_ratio:0.4539925965097832 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 +DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:28 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10892534255981445 s +INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11107707023620605 s +DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=222292560635342062546591543331922077526, time:1750767989.0079732s req_ids:[8] +DEBUG 06-24 20:26:29 [manager.py:391] +ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:209.09404754638672ms total_cost_time:209.1386318206787ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11347 prompt_cache_len:5151 prompt_cache_ratio:0.453952586586763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 +DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10895252227783203 s +INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11097502708435059 s +DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=338230677424122649872558070560957660399, time:1750767989.2222083s req_ids:[8] +DEBUG 06-24 20:26:29 [manager.py:391] +ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:207.77583122253418ms total_cost_time:207.81779289245605ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11348 prompt_cache_len:5151 prompt_cache_ratio:0.4539125837151921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 +DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10911893844604492 s +INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s +DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=4626725752643212933268018661862218008, time:1750767989.4383032s req_ids:[8] +DEBUG 06-24 20:26:29 [manager.py:391] +ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:206.8188190460205ms total_cost_time:206.862211227417ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11349 prompt_cache_len:5151 prompt_cache_ratio:0.45387258789320645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 +DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10837316513061523 s +INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.1102747917175293 s +DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=297461174187350274559017096912900356462, time:1750767989.650257s req_ids:[8] +DEBUG 06-24 20:26:29 [manager.py:391] +ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:202.7604579925537ms total_cost_time:202.8062343597412ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11350 prompt_cache_len:5151 prompt_cache_ratio:0.45383259911894275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 +DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:29 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10965394973754883 s +INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11154770851135254 s +DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=105161746586994409708630787358915763210, time:1750767989.858172s req_ids:[8] +DEBUG 06-24 20:26:29 [manager.py:391] +ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:366.93644523620605ms total_cost_time:366.98126792907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11351 prompt_cache_len:5151 prompt_cache_ratio:0.4537926173905383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 +DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.1083536148071289 s +INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.11027407646179199 s +DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=60630480102621067251015614715213106818, time:1750767990.2315123s req_ids:[8] +DEBUG 06-24 20:26:30 [manager.py:391] +ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:206.75063133239746ms total_cost_time:206.79306983947754ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11352 prompt_cache_len:5151 prompt_cache_ratio:0.4537526427061311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 +DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.1104733943939209 s +INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.1124274730682373 s +DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=132806970684671152990470722869136624712, time:1750767990.445534s req_ids:[8] +DEBUG 06-24 20:26:30 [manager.py:391] +ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:209.16056632995605ms total_cost_time:209.20729637145996ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11353 prompt_cache_len:5151 prompt_cache_ratio:0.45371267506385976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 +DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.10949921607971191 s +INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.11154699325561523 s +DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=136385300513875509685261741832367442213, time:1750767990.660243s req_ids:[8] +DEBUG 06-24 20:26:30 [manager.py:391] +ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:208.13417434692383ms total_cost_time:208.17828178405762ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11354 prompt_cache_len:5151 prompt_cache_ratio:0.45367271446186364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 +DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:30 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.10901641845703125 s +INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s +DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=172886899845265051946288289449321298856, time:1750767990.8819432s req_ids:[8] +DEBUG 06-24 20:26:30 [manager.py:391] +ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:216.4146900177002ms total_cost_time:216.45760536193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11355 prompt_cache_len:5151 prompt_cache_ratio:0.4536327608982827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 +DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.10955286026000977 s +INFO 06-24 20:26:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.11154699325561523 s +DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=335865772468804170213563667899280026517, time:1750767991.099628s req_ids:[8] +DEBUG 06-24 20:26:31 [manager.py:391] +ERROR 06-24 20:26:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:209.36894416809082ms total_cost_time:209.4130516052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11356 prompt_cache_len:5151 prompt_cache_ratio:0.4535928143712575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 +DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.10839104652404785 s +INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.11033916473388672 s +DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=290859144232830616038600295663153287429, time:1750767991.3142734s req_ids:[8] +DEBUG 06-24 20:26:31 [manager.py:391] +ERROR 06-24 20:26:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 first_token_cost:207.25035667419434ms total_cost_time:207.29660987854004ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11357 prompt_cache_len:5151 prompt_cache_ratio:0.4535528748789293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 +DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.3114175796508789 s +INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.31343603134155273 s +DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=181844160483954208423074101142688983172, time:1750767991.736137s req_ids:[8] +DEBUG 06-24 20:26:31 [manager.py:391] +ERROR 06-24 20:26:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 first_token_cost:423.42233657836914ms total_cost_time:423.46668243408203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11358 prompt_cache_len:5151 prompt_cache_ratio:0.45351294241944007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 +DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:31 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.10940384864807129 s +INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.11147737503051758 s +DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=326225473762673937009723440728985659466, time:1750767991.9602845s req_ids:[8] +DEBUG 06-24 20:26:31 [manager.py:391] +ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 first_token_cost:211.96556091308594ms total_cost_time:212.00966835021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11359 prompt_cache_len:5151 prompt_cache_ratio:0.4534730169909323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 +DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.10866165161132812 s +INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11064934730529785 s +DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=279074985643360394393559222810769976533, time:1750767992.1766987s req_ids:[8] +DEBUG 06-24 20:26:32 [manager.py:391] +ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:208.36997032165527ms total_cost_time:208.41431617736816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11360 prompt_cache_len:5151 prompt_cache_ratio:0.4534330985915493 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 +DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.10968160629272461 s +INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11177682876586914 s +DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=261988646200074728369550623351821313202, time:1750767992.3928876s req_ids:[8] +DEBUG 06-24 20:26:32 [manager.py:391] +ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:205.4741382598877ms total_cost_time:205.5184841156006ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11361 prompt_cache_len:5151 prompt_cache_ratio:0.4533931872194349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 +DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.10889220237731934 s +INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11086153984069824 s +DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=321489768770417620956852882477783075083, time:1750767992.603351s req_ids:[8] +DEBUG 06-24 20:26:32 [manager.py:391] +ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:207.97252655029297ms total_cost_time:208.01663398742676ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11362 prompt_cache_len:5151 prompt_cache_ratio:0.4533532828727337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 +DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.1093134880065918 s +INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s +DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=54906911637315129445403298455417082941, time:1750767992.8157206s req_ids:[8] +DEBUG 06-24 20:26:32 [manager.py:391] +ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:205.94453811645508ms total_cost_time:205.99031448364258ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11363 prompt_cache_len:5151 prompt_cache_ratio:0.4533133855495908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 +DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:32 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10937333106994629 s +INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11137938499450684 s +DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=241134323400175779274229919301016180857, time:1750767993.0300534s req_ids:[8] +DEBUG 06-24 20:26:33 [manager.py:391] +ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:378.4646987915039ms total_cost_time:378.5083293914795ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11364 prompt_cache_len:5151 prompt_cache_ratio:0.4532734952481521 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 +DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10859012603759766 s +INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11057782173156738 s +DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=273199403309646748187973744250050924557, time:1750767993.4138806s req_ids:[8] +DEBUG 06-24 20:26:33 [manager.py:391] +ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:207.26943016052246ms total_cost_time:207.3063850402832ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:11365 prompt_cache_len:5151 prompt_cache_ratio:0.45323361196656403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 +DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10879206657409668 s +INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11094546318054199 s +DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=107272170421819284538234374938743824210, time:1750767993.6293507s req_ids:[8] +DEBUG 06-24 20:26:33 [manager.py:391] +ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:26:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 46294.618 tokens/s +DEBUG 06-24 20:26:33 [stats.py:37] Avg prompt tokens throughput: 46286.360 tokens/s +DEBUG 06-24 20:26:33 [stats.py:37] Avg generate tokens throughput: 8.259 tokens/s +INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:208.44483375549316ms total_cost_time:208.48965644836426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11366 prompt_cache_len:5151 prompt_cache_ratio:0.4531937357029738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 +DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10878562927246094 s +INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11078953742980957 s +DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=148499893491984726916772807895403790720, time:1750767993.8429947s req_ids:[8] +DEBUG 06-24 20:26:33 [manager.py:391] +ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:207.37910270690918ms total_cost_time:207.42416381835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11367 prompt_cache_len:5151 prompt_cache_ratio:0.4531538664555292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 +DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:33 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.1089622974395752 s +INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11093544960021973 s +DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=205426324059627278166729247241098073496, time:1750767994.0570447s req_ids:[8] +DEBUG 06-24 20:26:34 [manager.py:391] +ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:205.3356170654297ms total_cost_time:205.37877082824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11368 prompt_cache_len:5151 prompt_cache_ratio:0.45311400422237863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 +DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10974240303039551 s +INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11173748970031738 s +DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=248443153045428672825345312726372391444, time:1750767994.2689846s req_ids:[8] +DEBUG 06-24 20:26:34 [manager.py:391] +ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:208.14967155456543ms total_cost_time:208.19544792175293ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11369 prompt_cache_len:5151 prompt_cache_ratio:0.4530741490016712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 +DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10836076736450195 s +INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.1104133129119873 s +DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=291476315973823668338462929636608152168, time:1750767994.4854543s req_ids:[8] +DEBUG 06-24 20:26:34 [manager.py:391] +ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:210.74891090393066ms total_cost_time:210.79492568969727ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11370 prompt_cache_len:5151 prompt_cache_ratio:0.4530343007915567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 +DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:34 [batch.py:51] router release req id 8 +INFO 06-24 20:26:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10911965370178223 s +INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11114192008972168 s +DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=264584476397341822822803240795818981259, time:1750767994.7011971s req_ids:[8] +DEBUG 06-24 20:26:34 [manager.py:391] +DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:208.42337608337402ms total_cost_time:208.46939086914062ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11371 prompt_cache_len:5151 prompt_cache_ratio:0.4529944595901856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 +DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:34 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10861063003540039 s +INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11081218719482422 s +DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=307213387413008843593500556736448595602, time:1750767994.9145882s req_ids:[8] +DEBUG 06-24 20:26:34 [manager.py:391] +ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:202.73280143737793ms total_cost_time:202.77714729309082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11372 prompt_cache_len:5151 prompt_cache_ratio:0.45295462539570874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 +DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.10908842086791992 s +INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.11111235618591309 s +DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=25521592814777134978935121426408989843, time:1750767995.1259975s req_ids:[8] +DEBUG 06-24 20:26:35 [manager.py:391] +ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:362.5319004058838ms total_cost_time:362.5774383544922ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11373 prompt_cache_len:5151 prompt_cache_ratio:0.452914798206278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 +DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.10841512680053711 s +INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.11037445068359375 s +DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=114171028174874015045170094747811605847, time:1750767995.4957058s req_ids:[8] +DEBUG 06-24 20:26:35 [manager.py:391] +ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:208.61053466796875ms total_cost_time:208.65654945373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11374 prompt_cache_len:5151 prompt_cache_ratio:0.4528749780200457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 +DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.10834693908691406 s +INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.11034703254699707 s +DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=186737104494599180258515166641893672656, time:1750767995.712553s req_ids:[8] +DEBUG 06-24 20:26:35 [manager.py:391] +ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:204.38504219055176ms total_cost_time:204.43034172058105ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11375 prompt_cache_len:5151 prompt_cache_ratio:0.45283516483516484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 +DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:35 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.11012601852416992 s +INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.1120750904083252 s +DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=128634963514083866325217959331542497750, time:1750767995.92352s req_ids:[8] +DEBUG 06-24 20:26:35 [manager.py:391] +ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:206.9869041442871ms total_cost_time:207.0307731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11376 prompt_cache_len:5151 prompt_cache_ratio:0.45279535864978904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 +DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10995841026306152 s +INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.11197876930236816 s +DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=87795253211386982363984227314366980904, time:1750767996.135674s req_ids:[8] +DEBUG 06-24 20:26:36 [manager.py:391] +ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:206.45713806152344ms total_cost_time:206.50172233581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11377 prompt_cache_len:5151 prompt_cache_ratio:0.4527555594620726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 +DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10920000076293945 s +INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.11117291450500488 s +DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=301826638614852868588612436839899234328, time:1750767996.3500583s req_ids:[8] +DEBUG 06-24 20:26:36 [manager.py:391] +ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:208.45484733581543ms total_cost_time:208.49943161010742ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11378 prompt_cache_len:5151 prompt_cache_ratio:0.4527157672701705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 +DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10819053649902344 s +INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.11025023460388184 s +DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=90844823201287249829461701521362730125, time:1750767996.5659115s req_ids:[8] +DEBUG 06-24 20:26:36 [manager.py:391] +ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:210.85143089294434ms total_cost_time:210.89529991149902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11379 prompt_cache_len:5151 prompt_cache_ratio:0.45267598207223836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 +DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:36 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10940051078796387 s +INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.111419677734375 s +DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=204707555654713909214322936798646306507, time:1750767996.7813227s req_ids:[8] +DEBUG 06-24 20:26:36 [manager.py:391] +ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:370.10788917541504ms total_cost_time:370.15295028686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11380 prompt_cache_len:5151 prompt_cache_ratio:0.45263620386643233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 +DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10904479026794434 s +INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.11105465888977051 s +DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=52519310238188551038995325150239412051, time:1750767997.1558664s req_ids:[8] +DEBUG 06-24 20:26:37 [manager.py:391] +ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:204.63871955871582ms total_cost_time:204.6811580657959ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11381 prompt_cache_len:5151 prompt_cache_ratio:0.45259643265090943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 +DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10858583450317383 s +INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.11052322387695312 s +DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=258501726409303998206374352036702716262, time:1750767997.3686936s req_ids:[8] +DEBUG 06-24 20:26:37 [manager.py:391] +ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:201.2655735015869ms total_cost_time:201.3101577758789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11382 prompt_cache_len:5151 prompt_cache_ratio:0.4525566684238271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 +DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10866022109985352 s +INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.1106576919555664 s +DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=20894106409733256911119845056825164036, time:1750767997.5757205s req_ids:[8] +DEBUG 06-24 20:26:37 [manager.py:391] +ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:202.99863815307617ms total_cost_time:203.04155349731445ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11383 prompt_cache_len:5151 prompt_cache_ratio:0.4525169111833436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 +DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.1092841625213623 s +INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.11115097999572754 s +DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=115214968010863144661984277475568275754, time:1750767997.7856598s req_ids:[8] +DEBUG 06-24 20:26:37 [manager.py:391] +ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:209.43140983581543ms total_cost_time:209.4738483428955ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11384 prompt_cache_len:5151 prompt_cache_ratio:0.4524771609276177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 +DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:37 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10804915428161621 s +INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.10999059677124023 s +DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=257445135311906933558898804516976076100, time:1750767998.001561s req_ids:[8] +DEBUG 06-24 20:26:38 [manager.py:391] +ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:207.02695846557617ms total_cost_time:207.06987380981445ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11385 prompt_cache_len:5151 prompt_cache_ratio:0.45243741765480894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 +DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:38 [manager.py:224] router recive req id 8 cost time 0.10863876342773438 s +INFO 06-24 20:26:38 [manager.py:68] detokenization recv req id 8 cost time 0.11054873466491699 s +DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=281490540142519963759154562484838144044, time:1750767998.2157903s req_ids:[8] +DEBUG 06-24 20:26:38 [manager.py:391] +ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:210.3729248046875ms total_cost_time:210.4172706604004ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11386 prompt_cache_len:5151 prompt_cache_ratio:0.45239768136307745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 +DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:38 [manager.py:224] router recive req id 8 cost time 0.1085047721862793 s +INFO 06-24 20:26:38 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s +DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=148884076342120898899233499658111567467, time:1750767998.432516s req_ids:[8] +DEBUG 06-24 20:26:38 [manager.py:391] +ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:374.1121292114258ms total_cost_time:374.157190322876ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11387 prompt_cache_len:5151 prompt_cache_ratio:0.452357952050584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 +DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:38 [manager.py:224] router recive req id 8 cost time 0.10919356346130371 s +INFO 06-24 20:26:38 [manager.py:68] detokenization recv req id 8 cost time 0.11113786697387695 s +DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=27374442269757230621831463518361342881, time:1750767998.8111942s req_ids:[8] +DEBUG 06-24 20:26:38 [manager.py:391] +ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:206.37965202331543ms total_cost_time:206.39920234680176ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11388 prompt_cache_len:5151 prompt_cache_ratio:0.45231822971549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 +DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:38 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10820770263671875 s +INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.1101827621459961 s +DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=213192048238280243935100427740786931934, time:1750767999.024374s req_ids:[8] +DEBUG 06-24 20:26:39 [manager.py:391] +ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:209.8240852355957ms total_cost_time:209.86557006835938ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11389 prompt_cache_len:5151 prompt_cache_ratio:0.4522785143559575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 +DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10762572288513184 s +INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.10945487022399902 s +DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=206440229183086917267791875277986428336, time:1750767999.2401114s req_ids:[8] +DEBUG 06-24 20:26:39 [manager.py:391] +ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:165.33756256103516ms total_cost_time:165.37928581237793ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11390 prompt_cache_len:5151 prompt_cache_ratio:0.45223880597014926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 +DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10698175430297852 s +INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.1088414192199707 s +DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=140382066545869588582599414852405393603, time:1750767999.4119606s req_ids:[8] +DEBUG 06-24 20:26:39 [manager.py:391] +ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:194.8864459991455ms total_cost_time:194.94390487670898ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:11391 prompt_cache_len:5151 prompt_cache_ratio:0.4521991045562286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 +DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10831832885742188 s +INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.11076164245605469 s +DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=76133194832878272861788721531963478199, time:1750767999.6237864s req_ids:[8] +DEBUG 06-24 20:26:39 [manager.py:391] +ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:217.76151657104492ms total_cost_time:217.8058624267578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11392 prompt_cache_len:5151 prompt_cache_ratio:0.45215941011235955 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 +DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:39 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10841512680053711 s +INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.11044740676879883 s +DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=204949515390140145931873366284051228650, time:1750767999.8412514s req_ids:[8] +DEBUG 06-24 20:26:39 [manager.py:391] +ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:215.8217430114746ms total_cost_time:215.8651351928711ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11393 prompt_cache_len:5151 prompt_cache_ratio:0.45211972263670674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 +DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.30970191955566406 s +INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.3116495609283447 s +DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=150427684650236453665033685190671536992, time:1750768000.2657373s req_ids:[8] +DEBUG 06-24 20:26:40 [manager.py:391] +ERROR 06-24 20:26:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:418.7474250793457ms total_cost_time:418.7924861907959ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11394 prompt_cache_len:5151 prompt_cache_ratio:0.45208004212743547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 +DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.10851621627807617 s +INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.11084198951721191 s +DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=130781456027489091625715341011975799698, time:1750768000.4956565s req_ids:[8] +DEBUG 06-24 20:26:40 [manager.py:391] +ERROR 06-24 20:26:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 first_token_cost:217.58174896240234ms total_cost_time:217.62514114379883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11395 prompt_cache_len:5151 prompt_cache_ratio:0.45204036858271174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 +DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.10803055763244629 s +INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.10998296737670898 s +DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=299077365785168116130587742559388680871, time:1750768000.7119126s req_ids:[8] +DEBUG 06-24 20:26:40 [manager.py:391] +ERROR 06-24 20:26:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 first_token_cost:213.8376235961914ms total_cost_time:213.8814926147461ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11396 prompt_cache_len:5151 prompt_cache_ratio:0.452000702000702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 +DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:40 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.10790705680847168 s +INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s +DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=196218395509997873495202234878257590502, time:1750768000.9460695s req_ids:[8] +DEBUG 06-24 20:26:40 [manager.py:391] +ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 first_token_cost:226.98497772216797ms total_cost_time:227.03051567077637ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11397 prompt_cache_len:5151 prompt_cache_ratio:0.4519610423795736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 +DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10677742958068848 s +INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.10873651504516602 s +DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=212507571335281422355347999807802694187, time:1750768001.1855285s req_ids:[8] +DEBUG 06-24 20:26:41 [manager.py:391] +ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:231.99772834777832ms total_cost_time:232.04302787780762ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11398 prompt_cache_len:5151 prompt_cache_ratio:0.4519213897174943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 +DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s +INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.11086654663085938 s +DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=213600899489594284358647393024741369149, time:1750768001.4043288s req_ids:[8] +DEBUG 06-24 20:26:41 [manager.py:391] +ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:204.95367050170898ms total_cost_time:204.9996852874756ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11399 prompt_cache_len:5151 prompt_cache_ratio:0.4518817440126327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 +DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10845947265625 s +INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.11044502258300781 s +DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=159107525586297725010270990157540721132, time:1750768001.6252797s req_ids:[8] +DEBUG 06-24 20:26:41 [manager.py:391] +ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:210.52932739257812ms total_cost_time:210.5724811553955ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11400 prompt_cache_len:5151 prompt_cache_ratio:0.4518421052631579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 +DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:41 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10933613777160645 s +INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.11127448081970215 s +DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=227527269978586458485432297574598978644, time:1750768001.8337352s req_ids:[8] +DEBUG 06-24 20:26:41 [manager.py:391] +ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:205.4150104522705ms total_cost_time:205.4576873779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11401 prompt_cache_len:5151 prompt_cache_ratio:0.45180247346723973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 +DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.3108491897583008 s +INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.31271815299987793 s +DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=286860496175689513962416210175661979854, time:1750768002.2453833s req_ids:[8] +DEBUG 06-24 20:26:42 [manager.py:391] +DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:403.84507179260254ms total_cost_time:403.88989448547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11402 prompt_cache_len:5151 prompt_cache_ratio:0.4517628486230486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 +DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.10840129852294922 s +INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.1103048324584961 s +DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=176163417493011799583658813601979509312, time:1750768002.457731s req_ids:[8] +DEBUG 06-24 20:26:42 [manager.py:391] +ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:201.6594409942627ms total_cost_time:201.71165466308594ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:11403 prompt_cache_len:5151 prompt_cache_ratio:0.4517232307287556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 +DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.10890698432922363 s +INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s +DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=106128599891318826603399998058830146012, time:1750768002.6623335s req_ids:[8] +DEBUG 06-24 20:26:42 [manager.py:391] +ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:223.01411628723145ms total_cost_time:223.04844856262207ms,out_token_counter:1 mean_per_token_cost_time: 0.034332275390625ms prompt_token_num:11404 prompt_cache_len:5151 prompt_cache_ratio:0.45168361978253246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 +DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:42 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.10883474349975586 s +INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.11072516441345215 s +DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=3181749269374408313580529893380950792, time:1750768002.8914652s req_ids:[8] +DEBUG 06-24 20:26:42 [manager.py:391] +ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:212.60643005371094ms total_cost_time:212.65697479248047ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:11405 prompt_cache_len:5151 prompt_cache_ratio:0.45164401578255153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 +DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.11085867881774902 s +INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11284160614013672 s +DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=126213053735992297747727765233620748022, time:1750768003.11114s req_ids:[8] +DEBUG 06-24 20:26:43 [manager.py:391] +ERROR 06-24 20:26:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:206.70223236083984ms total_cost_time:206.74538612365723ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11406 prompt_cache_len:5151 prompt_cache_ratio:0.4516044187269858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 +DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.10862207412719727 s +INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11069226264953613 s +DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=182526447339702735058013246334959616999, time:1750768003.3246064s req_ids:[8] +DEBUG 06-24 20:26:43 [manager.py:391] +ERROR 06-24 20:26:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 first_token_cost:217.20576286315918ms total_cost_time:217.26131439208984ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:11407 prompt_cache_len:5151 prompt_cache_ratio:0.45156482861400893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 +DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.10885095596313477 s +INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11086726188659668 s +DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=190338994372865627187810971480252783295, time:1750768003.5499256s req_ids:[8] +DEBUG 06-24 20:26:43 [manager.py:391] +ERROR 06-24 20:26:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 first_token_cost:203.7954330444336ms total_cost_time:203.82452011108398ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:11408 prompt_cache_len:5151 prompt_cache_ratio:0.45152524544179523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 +DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:43 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.10924029350280762 s +INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11170077323913574 s +DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=258024537748378165907671864074395131937, time:1750768003.7615721s req_ids:[8] +DEBUG 06-24 20:26:43 [manager.py:391] +DEBUG 06-24 20:26:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 48774.682 tokens/s +DEBUG 06-24 20:26:43 [stats.py:37] Avg prompt tokens throughput: 48766.217 tokens/s +DEBUG 06-24 20:26:43 [stats.py:37] Avg generate tokens throughput: 8.465 tokens/s +ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 first_token_cost:371.15025520324707ms total_cost_time:371.1967468261719ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11409 prompt_cache_len:5151 prompt_cache_ratio:0.45148566920851957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 +DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.10888147354125977 s +INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.11086130142211914 s +DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=311922921466040430043422932988566209656, time:1750768004.1368408s req_ids:[8] +DEBUG 06-24 20:26:44 [manager.py:391] +ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:209.5491886138916ms total_cost_time:209.5925807952881ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11410 prompt_cache_len:5151 prompt_cache_ratio:0.45144609991235757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 +DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.10971546173095703 s +INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.11107087135314941 s +DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=120879733504583710329324544207586062309, time:1750768004.3547466s req_ids:[8] +DEBUG 06-24 20:26:44 [manager.py:391] +ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:207.11350440979004ms total_cost_time:207.13567733764648ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:11411 prompt_cache_len:5151 prompt_cache_ratio:0.4514065375514854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 +DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.1065976619720459 s +INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.10849571228027344 s +DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=291734879934071544148210507075532637455, time:1750768004.566464s req_ids:[8] +DEBUG 06-24 20:26:44 [manager.py:391] +ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:209.9325656890869ms total_cost_time:209.9778652191162ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11412 prompt_cache_len:5151 prompt_cache_ratio:0.45136698212407994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 +DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.11020016670227051 s +INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.11215686798095703 s +DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=87323746205618040945949114337264768957, time:1750768004.7813244s req_ids:[8] +DEBUG 06-24 20:26:44 [manager.py:391] +ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:210.0822925567627ms total_cost_time:210.12544631958008ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11413 prompt_cache_len:5151 prompt_cache_ratio:0.45132743362831856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 +DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:44 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.10901474952697754 s +INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.1110537052154541 s +DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=230258467232759542547302720330070511707, time:1750768004.9965813s req_ids:[8] +DEBUG 06-24 20:26:44 [manager.py:391] +ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:206.3300609588623ms total_cost_time:206.3760757446289ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11414 prompt_cache_len:5151 prompt_cache_ratio:0.4512878920623795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 +DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:45 [manager.py:224] router recive req id 8 cost time 0.10862517356872559 s +INFO 06-24 20:26:45 [manager.py:68] detokenization recv req id 8 cost time 0.11072111129760742 s +DEBUG 06-24 20:26:45 [manager.py:391] Prefill Batch: batch_id=219321759911789689682295614936008568054, time:1750768005.209134s req_ids:[8] +DEBUG 06-24 20:26:45 [manager.py:391] +ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:207.82208442687988ms total_cost_time:207.86690711975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11415 prompt_cache_len:5151 prompt_cache_ratio:0.4512483574244415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 +DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:45 [manager.py:224] router recive req id 8 cost time 0.31070661544799805 s +INFO 06-24 20:26:45 [manager.py:68] detokenization recv req id 8 cost time 0.31270909309387207 s +DEBUG 06-24 20:26:45 [manager.py:391] Prefill Batch: batch_id=36450839091420756264867220889890774424, time:1750768005.6396902s req_ids:[8] +DEBUG 06-24 20:26:45 [manager.py:391] +ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:430.55152893066406ms total_cost_time:430.59587478637695ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11416 prompt_cache_len:5151 prompt_cache_ratio:0.45120882971268395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 +DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:45 [manager.py:224] router recive req id 8 cost time 0.1082448959350586 s +INFO 06-24 20:26:45 [manager.py:68] detokenization recv req id 8 cost time 0.11033034324645996 s +DEBUG 06-24 20:26:45 [manager.py:391] Prefill Batch: batch_id=303341939372676893112978830545252178676, time:1750768005.8645077s req_ids:[8] +DEBUG 06-24 20:26:45 [manager.py:391] +ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:213.22131156921387ms total_cost_time:213.26613426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11417 prompt_cache_len:5151 prompt_cache_ratio:0.45116930892528684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 +DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:45 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10911011695861816 s +INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.11105871200561523 s +DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=248047106192503922537643561133400774504, time:1750768006.0796413s req_ids:[8] +DEBUG 06-24 20:26:46 [manager.py:391] +ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:205.73163032531738ms total_cost_time:205.80124855041504ms,out_token_counter:1 mean_per_token_cost_time: 0.06961822509765625ms prompt_token_num:11418 prompt_cache_len:5151 prompt_cache_ratio:0.4511297950604309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 +DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10999226570129395 s +INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.11195850372314453 s +DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=303525403802918620619032291181053833173, time:1750768006.2931178s req_ids:[8] +DEBUG 06-24 20:26:46 [manager.py:391] +ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:205.7502269744873ms total_cost_time:205.7943344116211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11419 prompt_cache_len:5151 prompt_cache_ratio:0.4510902881162974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 +DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.1082453727722168 s +INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.1102604866027832 s +DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=100219738425038210171617287914864328515, time:1750768006.5054529s req_ids:[8] +DEBUG 06-24 20:26:46 [manager.py:391] +ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:209.14721488952637ms total_cost_time:209.19084548950195ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11420 prompt_cache_len:5151 prompt_cache_ratio:0.4510507880910683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 +DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:46 [batch.py:51] router release req id 8 +DEBUG 06-24 20:26:46 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:46 [manager.py:283] +DEBUG 06-24 20:26:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:46 [manager.py:284] +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10875248908996582 s +INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.1108248233795166 s +DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=279852271004735822952112466336953152437, time:1750768006.7226346s req_ids:[8] +DEBUG 06-24 20:26:46 [manager.py:391] +ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:236.45448684692383ms total_cost_time:236.4962100982666ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11421 prompt_cache_len:5151 prompt_cache_ratio:0.4510112949829262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 +DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:46 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10879373550415039 s +INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.11072301864624023 s +DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=305205635422123650950816419876549788098, time:1750768006.9650004s req_ids:[8] +DEBUG 06-24 20:26:46 [manager.py:391] +INFO 06-24 20:26:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:377.07042694091797ms total_cost_time:377.11358070373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11422 prompt_cache_len:5151 prompt_cache_ratio:0.4509718087900543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 +DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:47 [manager.py:224] router recive req id 8 cost time 0.10909199714660645 s +INFO 06-24 20:26:47 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s +DEBUG 06-24 20:26:47 [manager.py:391] Prefill Batch: batch_id=333926148951189250711962467588047871261, time:1750768007.3492296s req_ids:[8] +DEBUG 06-24 20:26:47 [manager.py:391] +ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:211.61270141601562ms total_cost_time:211.6560935974121ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11423 prompt_cache_len:5151 prompt_cache_ratio:0.45093232951063644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 +DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:47 [manager.py:224] router recive req id 8 cost time 0.10814476013183594 s +INFO 06-24 20:26:47 [manager.py:68] detokenization recv req id 8 cost time 0.11011648178100586 s +DEBUG 06-24 20:26:47 [manager.py:391] Prefill Batch: batch_id=55658512300784675739254653756980415550, time:1750768007.5651648s req_ids:[8] +DEBUG 06-24 20:26:47 [manager.py:391] +ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:208.88996124267578ms total_cost_time:208.93526077270508ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11424 prompt_cache_len:5151 prompt_cache_ratio:0.45089285714285715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 +DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:47 [manager.py:224] router recive req id 8 cost time 0.10985422134399414 s +INFO 06-24 20:26:47 [manager.py:68] detokenization recv req id 8 cost time 0.11188435554504395 s +DEBUG 06-24 20:26:47 [manager.py:391] Prefill Batch: batch_id=237362733282102233471689686941624240488, time:1750768007.7897766s req_ids:[8] +DEBUG 06-24 20:26:47 [manager.py:391] +ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:221.01187705993652ms total_cost_time:221.05669975280762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11425 prompt_cache_len:5151 prompt_cache_ratio:0.4508533916849015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 +DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:47 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10961675643920898 s +INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11180782318115234 s +DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=114049171174467687963947708250224734256, time:1750768008.0093539s req_ids:[8] +DEBUG 06-24 20:26:48 [manager.py:391] +ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:218.73116493225098ms total_cost_time:218.77336502075195ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11426 prompt_cache_len:5151 prompt_cache_ratio:0.4508139331349554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 +DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10853314399719238 s +INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11058449745178223 s +DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=196520198194620165377355846199711763385, time:1750768008.2341673s req_ids:[8] +DEBUG 06-24 20:26:48 [manager.py:391] +ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:211.1365795135498ms total_cost_time:211.1814022064209ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11427 prompt_cache_len:5151 prompt_cache_ratio:0.450774481491205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 +DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10811281204223633 s +INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11017990112304688 s +DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=32757324444749637181646689254531061584, time:1750768008.4508927s req_ids:[8] +DEBUG 06-24 20:26:48 [manager.py:391] +ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:208.45961570739746ms total_cost_time:208.50372314453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11428 prompt_cache_len:5151 prompt_cache_ratio:0.45073503675183757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 +DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:48 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10965847969055176 s +INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11160421371459961 s +DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=305702368448471810369552601750959548542, time:1750768008.6656163s req_ids:[8] +DEBUG 06-24 20:26:48 [manager.py:391] +ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:202.43573188781738ms total_cost_time:202.47817039489746ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11429 prompt_cache_len:5151 prompt_cache_ratio:0.4506955989150407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 +DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:48 [batch.py:51] router release req id 8 +INFO 06-24 20:26:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10840606689453125 s +INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11050200462341309 s +DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=4521175117241422025448549874625258358, time:1750768008.8800495s req_ids:[8] +DEBUG 06-24 20:26:48 [manager.py:391] +ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:214.5235538482666ms total_cost_time:214.5674228668213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11430 prompt_cache_len:5151 prompt_cache_ratio:0.4506561679790026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 +DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.10989189147949219 s +INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.1119835376739502 s +DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=63514199755677022723127054143908885287, time:1750768009.098402s req_ids:[8] +DEBUG 06-24 20:26:49 [manager.py:391] +ERROR 06-24 20:26:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:380.2375793457031ms total_cost_time:380.2812099456787ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11431 prompt_cache_len:5151 prompt_cache_ratio:0.45061674394191237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 +DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.10895848274230957 s +INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.11121821403503418 s +DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=41853940536315301038744106336320683341, time:1750768009.4829752s req_ids:[8] +DEBUG 06-24 20:26:49 [manager.py:391] +ERROR 06-24 20:26:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 first_token_cost:213.1514549255371ms total_cost_time:213.1936550140381ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11432 prompt_cache_len:5151 prompt_cache_ratio:0.4505773268019594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 +DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.1089181900024414 s +INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.1110846996307373 s +DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=27459495678707253398485182674393685392, time:1750768009.702209s req_ids:[8] +DEBUG 06-24 20:26:49 [manager.py:391] +DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 first_token_cost:209.45000648498535ms total_cost_time:209.49363708496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11433 prompt_cache_len:5151 prompt_cache_ratio:0.45053791655733405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 +DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:49 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.10866260528564453 s +INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.11062884330749512 s +DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=81542483225300755427449891853681682264, time:1750768009.9243603s req_ids:[8] +DEBUG 06-24 20:26:49 [manager.py:391] +ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 first_token_cost:212.14914321899414ms total_cost_time:212.1884822845459ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:11434 prompt_cache_len:5151 prompt_cache_ratio:0.450498513206227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 +DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.1092071533203125 s +INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.1112823486328125 s +DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=197084051234444713678637824712257515374, time:1750768010.1371236s req_ids:[8] +DEBUG 06-24 20:26:50 [manager.py:391] +ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:207.1986198425293ms total_cost_time:207.24177360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11435 prompt_cache_len:5151 prompt_cache_ratio:0.4504591167468299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 +DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.10924458503723145 s +INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.11135411262512207 s +DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=201670840651413510233811322827439234604, time:1750768010.3512115s req_ids:[8] +DEBUG 06-24 20:26:50 [manager.py:391] +ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:205.69252967834473ms total_cost_time:205.7352066040039ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11436 prompt_cache_len:5151 prompt_cache_ratio:0.45041972717733475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 +DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.10809135437011719 s +INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.11013293266296387 s +DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=70450764736635334753073304445525320932, time:1750768010.5642529s req_ids:[8] +DEBUG 06-24 20:26:50 [manager.py:391] +ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:209.52939987182617ms total_cost_time:209.57279205322266ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11437 prompt_cache_len:5151 prompt_cache_ratio:0.4503803444959342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 +DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:50 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.10941457748413086 s +INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.11160469055175781 s +DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=61631857586846066140267766410269495383, time:1750768010.7781177s req_ids:[8] +DEBUG 06-24 20:26:50 [manager.py:391] +ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:372.424840927124ms total_cost_time:372.4701404571533ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11438 prompt_cache_len:5151 prompt_cache_ratio:0.4503409687008218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 +DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10923218727111816 s +INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.1111307144165039 s +DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=139610691711192668916302126633545156275, time:1750768011.1583805s req_ids:[8] +DEBUG 06-24 20:26:51 [manager.py:391] +ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:207.1998119354248ms total_cost_time:207.2439193725586ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11439 prompt_cache_len:5151 prompt_cache_ratio:0.45030159979019146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 +DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.1074526309967041 s +INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.10922622680664062 s +DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=331591899575203965379308775778010219342, time:1750768011.3728445s req_ids:[8] +DEBUG 06-24 20:26:51 [manager.py:391] +ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:167.33479499816895ms total_cost_time:167.37794876098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11440 prompt_cache_len:5151 prompt_cache_ratio:0.4502622377622378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 +DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10809683799743652 s +INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.10990262031555176 s +DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=238774578876722170465811159921886031137, time:1750768011.545501s req_ids:[8] +DEBUG 06-24 20:26:51 [manager.py:391] +ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:165.5418872833252ms total_cost_time:165.58313369750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:11441 prompt_cache_len:5151 prompt_cache_ratio:0.45022288261515603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 +DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10931634902954102 s +INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.11132502555847168 s +DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=96912978304753535353150470575578194729, time:1750768011.7168686s req_ids:[8] +DEBUG 06-24 20:26:51 [manager.py:391] +ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:202.44097709655762ms total_cost_time:202.4829387664795ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11442 prompt_cache_len:5151 prompt_cache_ratio:0.4501835343471421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 +DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:51 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10929250717163086 s +INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.1112518310546875 s +DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=97691593939246167262052618685552095628, time:1750768011.9264734s req_ids:[8] +DEBUG 06-24 20:26:51 [manager.py:391] +ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:208.52065086364746ms total_cost_time:208.56285095214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11443 prompt_cache_len:5151 prompt_cache_ratio:0.4501441929563926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 +DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10970067977905273 s +INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11170268058776855 s +DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=95794465918697418231805359040113649018, time:1750768012.1407182s req_ids:[8] +DEBUG 06-24 20:26:52 [manager.py:391] +ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:207.82113075256348ms total_cost_time:207.86356925964355ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11444 prompt_cache_len:5151 prompt_cache_ratio:0.4501048584411045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 +DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:52 [batch.py:51] router release req id 8 +INFO 06-24 20:26:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10911035537719727 s +INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11138081550598145 s +DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=86217182640072320305634902100594652241, time:1750768012.3571422s req_ids:[8] +DEBUG 06-24 20:26:52 [manager.py:391] +ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:409.83009338378906ms total_cost_time:409.87396240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11445 prompt_cache_len:5151 prompt_cache_ratio:0.45006553079947575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 +DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10810637474060059 s +INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11014413833618164 s +DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=20573420085522550803710680408427342150, time:1750768012.772401s req_ids:[8] +DEBUG 06-24 20:26:52 [manager.py:391] +ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:207.15999603271484ms total_cost_time:207.20362663269043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11446 prompt_cache_len:5151 prompt_cache_ratio:0.4500262100297047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 +DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:52 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10821127891540527 s +INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s +DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=290568827336460463864548603896084177145, time:1750768012.9853816s req_ids:[8] +DEBUG 06-24 20:26:52 [manager.py:391] +ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:207.78846740722656ms total_cost_time:207.83162117004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11447 prompt_cache_len:5151 prompt_cache_ratio:0.4499868961299904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 +DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.10890388488769531 s +INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.11088705062866211 s +DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=249091325483057760909907931116052622912, time:1750768013.2011693s req_ids:[8] +DEBUG 06-24 20:26:53 [manager.py:391] +INFO 06-24 20:26:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:26:53 [statics_utils.py:24] mean first cost: 228.69557909546808 ms +INFO 06-24 20:26:53 [statics_utils.py:24] mean per token cost: 0.06421429555557658 ms +ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:203.08899879455566ms total_cost_time:203.13262939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11448 prompt_cache_len:5151 prompt_cache_ratio:0.4499475890985325 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 +INFO 06-24 20:26:53 [manager.py:620] left req id 8can release True refcount 3 +DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.1081545352935791 s +INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.1100926399230957 s +DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=14602717524417206026431386274730183660, time:1750768013.4103122s req_ids:[8] +DEBUG 06-24 20:26:53 [manager.py:391] +ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:208.7841033935547ms total_cost_time:208.82630348205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11449 prompt_cache_len:5151 prompt_cache_ratio:0.4499082889335313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 +DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.10936760902404785 s +INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.11135458946228027 s +DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=301209802871501452267321032428093171548, time:1750768013.6254554s req_ids:[8] +DEBUG 06-24 20:26:53 [manager.py:391] +ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:208.9521884918213ms total_cost_time:208.99724960327148ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11450 prompt_cache_len:5151 prompt_cache_ratio:0.4498689956331878 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 +DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:53 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.10816168785095215 s +INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.11019015312194824 s +DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=127511485200745841045030443192024394061, time:1750768013.841671s req_ids:[8] +DEBUG 06-24 20:26:53 [manager.py:391] +DEBUG 06-24 20:26:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 47634.439 tokens/s +DEBUG 06-24 20:26:53 [stats.py:37] Avg prompt tokens throughput: 47626.106 tokens/s +DEBUG 06-24 20:26:53 [stats.py:37] Avg generate tokens throughput: 8.333 tokens/s +ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:209.91015434265137ms total_cost_time:209.95402336120605ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11451 prompt_cache_len:5151 prompt_cache_ratio:0.4498297091957034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 +DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.31118130683898926 s +INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.3131897449493408 s +DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=54933909058169892662337345102072916851, time:1750768014.2642703s req_ids:[8] +DEBUG 06-24 20:26:54 [manager.py:391] +ERROR 06-24 20:26:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:422.76859283447266ms total_cost_time:422.81460762023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11452 prompt_cache_len:5151 prompt_cache_ratio:0.44979042961928045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 +DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.10894489288330078 s +INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.11110806465148926 s +DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=104407485998132974389324877879190732666, time:1750768014.486629s req_ids:[8] +DEBUG 06-24 20:26:54 [manager.py:391] +ERROR 06-24 20:26:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 first_token_cost:211.39216423034668ms total_cost_time:211.43698692321777ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11453 prompt_cache_len:5151 prompt_cache_ratio:0.4497511569021217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 +DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.10896873474121094 s +INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.11106705665588379 s +DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=183700484889021835634171427931783141796, time:1750768014.7209835s req_ids:[8] +DEBUG 06-24 20:26:54 [manager.py:391] +ERROR 06-24 20:26:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 first_token_cost:226.31144523620605ms total_cost_time:226.35769844055176ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11454 prompt_cache_len:5151 prompt_cache_ratio:0.4497118910424306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 +DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:54 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.10884284973144531 s +INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.11078310012817383 s +DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=332955413409934623315870900375326021640, time:1750768014.9356692s req_ids:[8] +DEBUG 06-24 20:26:54 [manager.py:391] +ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 first_token_cost:208.37163925170898ms total_cost_time:208.41550827026367ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11455 prompt_cache_len:5151 prompt_cache_ratio:0.4496726320384112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 +DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10902118682861328 s +INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11123847961425781 s +DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=76991108028334758460887047233862964570, time:1750768015.151376s req_ids:[8] +DEBUG 06-24 20:26:55 [manager.py:391] +ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:207.83567428588867ms total_cost_time:207.87930488586426ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11456 prompt_cache_len:5151 prompt_cache_ratio:0.4496333798882682 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 +DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10898041725158691 s +INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11104655265808105 s +DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=164460399628108252834782306793214210204, time:1750768015.3664188s req_ids:[8] +DEBUG 06-24 20:26:55 [manager.py:391] +ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:211.26508712768555ms total_cost_time:211.29488945007324ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:11457 prompt_cache_len:5151 prompt_cache_ratio:0.44959413459020686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 +DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10901260375976562 s +INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11107301712036133 s +DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=122346015397080565204665814221656297970, time:1750768015.583846s req_ids:[8] +DEBUG 06-24 20:26:55 [manager.py:391] +ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:380.13410568237305ms total_cost_time:380.18012046813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11458 prompt_cache_len:5151 prompt_cache_ratio:0.4495548961424332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 +DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:55 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10832929611206055 s +INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11036896705627441 s +DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=162949972449399555629281289122563008946, time:1750768015.9686754s req_ids:[8] +DEBUG 06-24 20:26:55 [manager.py:391] +ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:202.88944244384766ms total_cost_time:202.93235778808594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11459 prompt_cache_len:5151 prompt_cache_ratio:0.44951566454315384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 +DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10936355590820312 s +INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11149811744689941 s +DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=123319388752502892192770330812774526229, time:1750768016.1792185s req_ids:[8] +DEBUG 06-24 20:26:56 [manager.py:391] +ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:208.86826515197754ms total_cost_time:208.91356468200684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11460 prompt_cache_len:5151 prompt_cache_ratio:0.44947643979057594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 +DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10965323448181152 s +INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11177706718444824 s +DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=296957135344586340759362021529344435991, time:1750768016.393971s req_ids:[8] +DEBUG 06-24 20:26:56 [manager.py:391] +ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:210.36434173583984ms total_cost_time:210.40773391723633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11461 prompt_cache_len:5151 prompt_cache_ratio:0.44943722188290725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 +DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10876250267028809 s +INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11084485054016113 s +DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=62971099288697461982971183526039039182, time:1750768016.6118228s req_ids:[8] +DEBUG 06-24 20:26:56 [manager.py:391] +ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:211.03930473327637ms total_cost_time:211.08555793762207ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11462 prompt_cache_len:5151 prompt_cache_ratio:0.4493980108183563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 +DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10858440399169922 s +INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11069917678833008 s +DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=271953514829246812225434739937763886317, time:1750768016.8297834s req_ids:[8] +DEBUG 06-24 20:26:56 [manager.py:391] +ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:208.19687843322754ms total_cost_time:208.24027061462402ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11463 prompt_cache_len:5151 prompt_cache_ratio:0.44935880659513217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 +DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:56 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10863256454467773 s +INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11063647270202637 s +DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=317904880198610201939453221010727710400, time:1750768017.0421329s req_ids:[8] +DEBUG 06-24 20:26:57 [manager.py:391] +DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:210.56199073791504ms total_cost_time:210.60657501220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11464 prompt_cache_len:5151 prompt_cache_ratio:0.44931960921144454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 +DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10934090614318848 s +INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11148715019226074 s +DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=241919702743359286942008974342283114563, time:1750768017.2598357s req_ids:[8] +DEBUG 06-24 20:26:57 [manager.py:391] +ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:210.9525203704834ms total_cost_time:210.99591255187988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11465 prompt_cache_len:5151 prompt_cache_ratio:0.4492804186655037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 +DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10924744606018066 s +INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11145973205566406 s +DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=139762340319683233120175363109060948924, time:1750768017.47796s req_ids:[8] +DEBUG 06-24 20:26:57 [manager.py:391] +ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:378.31950187683105ms total_cost_time:378.36217880249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11466 prompt_cache_len:5151 prompt_cache_ratio:0.44924123495552065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 +DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10906815528869629 s +INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11096906661987305 s +DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=23560303376656970149894723636876651575, time:1750768017.8612015s req_ids:[8] +DEBUG 06-24 20:26:57 [manager.py:391] +ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:204.6647071838379ms total_cost_time:204.70905303955078ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11467 prompt_cache_len:5151 prompt_cache_ratio:0.449202058079707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 +DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:57 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10940384864807129 s +INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.11132693290710449 s +DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=294987311005685315031048737806368933803, time:1750768018.0714636s req_ids:[8] +DEBUG 06-24 20:26:58 [manager.py:391] +ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:206.1934471130371ms total_cost_time:206.23779296875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11468 prompt_cache_len:5151 prompt_cache_ratio:0.44916288803627485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 +DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.11154961585998535 s +INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.11357450485229492 s +DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=95892266543681043296887734795121190359, time:1750768018.2837808s req_ids:[8] +DEBUG 06-24 20:26:58 [manager.py:391] +ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:209.57517623901367ms total_cost_time:209.61928367614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11469 prompt_cache_len:5151 prompt_cache_ratio:0.4491237248234371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 +DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10914754867553711 s +INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.1111001968383789 s +DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=53864448307674733828095615930610936574, time:1750768018.5002954s req_ids:[8] +DEBUG 06-24 20:26:58 [manager.py:391] +ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:212.39995956420898ms total_cost_time:212.44549751281738ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11470 prompt_cache_len:5151 prompt_cache_ratio:0.44908456843940714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 +DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10998201370239258 s +INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.11192917823791504 s +DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=78028409208472113890678082194289196987, time:1750768018.7185657s req_ids:[8] +DEBUG 06-24 20:26:58 [manager.py:391] +ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:211.81964874267578ms total_cost_time:211.8661403656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11471 prompt_cache_len:5151 prompt_cache_ratio:0.44904541888239907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 +DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:58 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10844826698303223 s +INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.1104576587677002 s +DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=172054009128581549265216702023217005363, time:1750768018.9396365s req_ids:[8] +DEBUG 06-24 20:26:58 [manager.py:391] +ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:213.64808082580566ms total_cost_time:213.69338035583496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11472 prompt_cache_len:5151 prompt_cache_ratio:0.4490062761506276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 +DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10804891586303711 s +INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11006903648376465 s +DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=91415839762164015225259061108122062990, time:1750768019.156237s req_ids:[8] +DEBUG 06-24 20:26:59 [manager.py:391] +ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:209.88011360168457ms total_cost_time:209.92517471313477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11473 prompt_cache_len:5151 prompt_cache_ratio:0.44896714024230805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 +DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:59 [batch.py:51] router release req id 8 +INFO 06-24 20:26:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10871553421020508 s +INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11079597473144531 s +DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=129901199283494959515081024600784296108, time:1750768019.3737192s req_ids:[8] +DEBUG 06-24 20:26:59 [manager.py:391] +ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:384.23776626586914ms total_cost_time:384.28425788879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11474 prompt_cache_len:5151 prompt_cache_ratio:0.4489280111556563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 +DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10843825340270996 s +INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11062097549438477 s +DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=169223438173379246264727006865381613353, time:1750768019.763277s req_ids:[8] +DEBUG 06-24 20:26:59 [manager.py:391] +ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:211.03906631469727ms total_cost_time:211.08198165893555ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11475 prompt_cache_len:5151 prompt_cache_ratio:0.4488888888888889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 +DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:26:59 [batch.py:51] router release req id 8 +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10915875434875488 s +INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11112141609191895 s +DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=251407935840355163020497406823261075091, time:1750768019.986652s req_ids:[8] +DEBUG 06-24 20:26:59 [manager.py:391] +ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:209.6419334411621ms total_cost_time:209.6867561340332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11476 prompt_cache_len:5151 prompt_cache_ratio:0.4488497734402231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 +DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10870194435119629 s +INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11080551147460938 s +DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=194157459069776705000224371996220439046, time:1750768020.1963158s req_ids:[8] +DEBUG 06-24 20:27:00 [manager.py:391] +ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:208.49871635437012ms total_cost_time:208.5421085357666ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11477 prompt_cache_len:5151 prompt_cache_ratio:0.4488106648078766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 +DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10886740684509277 s +INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11082768440246582 s +DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=68079542440286809875940611800686408043, time:1750768020.413054s req_ids:[8] +DEBUG 06-24 20:27:00 [manager.py:391] +ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:209.7156047821045ms total_cost_time:209.75828170776367ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11478 prompt_cache_len:5151 prompt_cache_ratio:0.44877156299006793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 +DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10971808433532715 s +INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11183667182922363 s +DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=177578027295320291958028599555213766346, time:1750768020.6295843s req_ids:[8] +DEBUG 06-24 20:27:00 [manager.py:391] +ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:210.90149879455566ms total_cost_time:211.00926399230957ms,out_token_counter:1 mean_per_token_cost_time: 0.10776519775390625ms prompt_token_num:11479 prompt_cache_len:5151 prompt_cache_ratio:0.4487324679850161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 +DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:00 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10942411422729492 s +INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11150288581848145 s +DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=216318949101922255458039149893658851954, time:1750768020.8469996s req_ids:[8] +DEBUG 06-24 20:27:00 [manager.py:391] +ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:213.02151679992676ms total_cost_time:213.06443214416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11480 prompt_cache_len:5151 prompt_cache_ratio:0.44869337979094076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 +DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.20850300788879395 s +INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.21024560928344727 s +DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=110416980243581446351796552496244602956, time:1750768021.200284s req_ids:[8] +DEBUG 06-24 20:27:01 [manager.py:391] +ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:330.5649757385254ms total_cost_time:330.6107521057129ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11481 prompt_cache_len:5151 prompt_cache_ratio:0.4486542984060622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 +DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.10973644256591797 s +INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.11170530319213867 s +DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=17380211930572365758083962206461553192, time:1750768021.403385s req_ids:[8] +DEBUG 06-24 20:27:01 [manager.py:391] +ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:202.49390602111816ms total_cost_time:202.56853103637695ms,out_token_counter:1 mean_per_token_cost_time: 0.07462501525878906ms prompt_token_num:11482 prompt_cache_len:5151 prompt_cache_ratio:0.4486152238286013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 +DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.10879135131835938 s +INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.11086392402648926 s +DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=204806298526561401523409151778371299627, time:1750768021.6134336s req_ids:[8] +DEBUG 06-24 20:27:01 [manager.py:391] +DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:201.22289657592773ms total_cost_time:201.26605033874512ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11483 prompt_cache_len:5151 prompt_cache_ratio:0.4485761560567796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 +DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.1082756519317627 s +INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101827621459961 s +DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=46912868657940928169528706513222068588, time:1750768021.8216321s req_ids:[8] +DEBUG 06-24 20:27:01 [manager.py:391] +ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:215.9709930419922ms total_cost_time:216.01581573486328ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11484 prompt_cache_len:5151 prompt_cache_ratio:0.4485370950888192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 +DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:01 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.10811758041381836 s +INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.11007523536682129 s +DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=205906403815117856552481537645435332078, time:1750768022.04394s req_ids:[8] +DEBUG 06-24 20:27:02 [manager.py:391] +ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:210.11805534362793ms total_cost_time:210.1612091064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11485 prompt_cache_len:5151 prompt_cache_ratio:0.448498040922943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 +DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.10732793807983398 s +INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s +DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=59450376973971665913999176740413981644, time:1750768022.2612457s req_ids:[8] +DEBUG 06-24 20:27:02 [manager.py:391] +ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:212.06068992614746ms total_cost_time:212.10432052612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11486 prompt_cache_len:5151 prompt_cache_ratio:0.4484589935573742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 +DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.1087198257446289 s +INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.1107034683227539 s +DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=278351599202882621199781221391778667169, time:1750768022.4795702s req_ids:[8] +DEBUG 06-24 20:27:02 [manager.py:391] +ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:206.9528102874756ms total_cost_time:206.99810981750488ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11487 prompt_cache_len:5151 prompt_cache_ratio:0.4484199529903369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 +DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.20937347412109375 s +INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.21111392974853516 s +DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=251300340778364654720902304334192948559, time:1750768022.8253553s req_ids:[8] +DEBUG 06-24 20:27:02 [manager.py:391] +ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:338.0894660949707ms total_cost_time:338.1328582763672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11488 prompt_cache_len:5151 prompt_cache_ratio:0.4483809192200557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 +DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:02 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10823369026184082 s +INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11015605926513672 s +DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=231535289920529318999046183834708902709, time:1750768023.0452106s req_ids:[8] +DEBUG 06-24 20:27:03 [manager.py:391] +ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:217.451810836792ms total_cost_time:217.4975872039795ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11489 prompt_cache_len:5151 prompt_cache_ratio:0.44834189224475585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 +DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10831689834594727 s +INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11030435562133789 s +DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=114345775613969438442008431666323932670, time:1750768023.259938s req_ids:[8] +DEBUG 06-24 20:27:03 [manager.py:391] +ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:209.65266227722168ms total_cost_time:209.69748497009277ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11490 prompt_cache_len:5151 prompt_cache_ratio:0.44830287206266317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 +DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.11020994186401367 s +INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11211633682250977 s +DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=269449351930330736333523385808196046439, time:1750768023.4769177s req_ids:[8] +DEBUG 06-24 20:27:03 [manager.py:391] +ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:209.09428596496582ms total_cost_time:209.1374397277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11491 prompt_cache_len:5151 prompt_cache_ratio:0.4482638586720042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 +DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10909914970397949 s +INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11120057106018066 s +DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=267834929978489600763842642546862785466, time:1750768023.6944804s req_ids:[8] +DEBUG 06-24 20:27:03 [manager.py:391] +ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:210.21008491516113ms total_cost_time:210.25586128234863ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11492 prompt_cache_len:5151 prompt_cache_ratio:0.44822485207100593 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 +DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:03 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10952234268188477 s +INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11163568496704102 s +DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=213933980562856640474120110401903940428, time:1750768023.9101877s req_ids:[8] +DEBUG 06-24 20:27:03 [manager.py:391] +DEBUG 06-24 20:27:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 47865.350 tokens/s +DEBUG 06-24 20:27:03 [stats.py:37] Avg prompt tokens throughput: 47857.007 tokens/s +DEBUG 06-24 20:27:03 [stats.py:37] Avg generate tokens throughput: 8.343 tokens/s +ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:211.48300170898438ms total_cost_time:211.52591705322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11493 prompt_cache_len:5151 prompt_cache_ratio:0.4481858522578961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 +DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10856342315673828 s +INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.11051464080810547 s +DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=197765049441637236395995545614509678971, time:1750768024.127005s req_ids:[8] +DEBUG 06-24 20:27:04 [manager.py:391] +ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:392.4744129180908ms total_cost_time:392.5197124481201ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11494 prompt_cache_len:5151 prompt_cache_ratio:0.4481468592309031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 +DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10774636268615723 s +INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.10972452163696289 s +DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=11536550526298161207457285899490840812, time:1750768024.5304654s req_ids:[8] +DEBUG 06-24 20:27:04 [manager.py:391] +ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:214.98370170593262ms total_cost_time:215.0249481201172ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:11495 prompt_cache_len:5151 prompt_cache_ratio:0.4481078729882558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 +DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10864830017089844 s +INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.11054491996765137 s +DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=131730048106509425257644850606384390465, time:1750768024.7485874s req_ids:[8] +DEBUG 06-24 20:27:04 [manager.py:391] +ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:204.6835422515869ms total_cost_time:204.7276496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11496 prompt_cache_len:5151 prompt_cache_ratio:0.44806889352818374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 +DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:04 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10985016822814941 s +INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.1118013858795166 s +DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=3606936866228866317805580588946842939, time:1750768024.957365s req_ids:[8] +DEBUG 06-24 20:27:04 [manager.py:391] +ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:208.38046073913574ms total_cost_time:208.42480659484863ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11497 prompt_cache_len:5151 prompt_cache_ratio:0.44802992084891713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 +DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10895895957946777 s +INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.11084747314453125 s +DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=4855946702249088777521689259667289334, time:1750768025.1709967s req_ids:[8] +DEBUG 06-24 20:27:05 [manager.py:391] +ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:205.6434154510498ms total_cost_time:205.69586753845215ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:11498 prompt_cache_len:5151 prompt_cache_ratio:0.44799095494868674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 +DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10940384864807129 s +INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.1114354133605957 s +DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=213736072517534173356378361908157795940, time:1750768025.3827944s req_ids:[8] +DEBUG 06-24 20:27:05 [manager.py:391] +ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:215.60931205749512ms total_cost_time:215.6524658203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11499 prompt_cache_len:5151 prompt_cache_ratio:0.447951995825724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 +DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10831189155578613 s +INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.11015987396240234 s +DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=270169810724592860314252392366219539407, time:1750768025.6190133s req_ids:[8] +DEBUG 06-24 20:27:05 [manager.py:391] +ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:213.98186683654785ms total_cost_time:214.02573585510254ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11500 prompt_cache_len:5151 prompt_cache_ratio:0.4479130434782609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 +DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:05 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10850977897644043 s +INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.11031150817871094 s +DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=140047481700162450342605131418296836585, time:1750768025.8471203s req_ids:[8] +DEBUG 06-24 20:27:05 [manager.py:391] +ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:403.7513732910156ms total_cost_time:403.7942886352539ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11501 prompt_cache_len:5151 prompt_cache_ratio:0.44787409790453003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 +DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10891222953796387 s +INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.11092638969421387 s +DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=257724131350751158734646409481255075029, time:1750768026.2372186s req_ids:[8] +DEBUG 06-24 20:27:06 [manager.py:391] +DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:202.81434059143066ms total_cost_time:202.85606384277344ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11502 prompt_cache_len:5151 prompt_cache_ratio:0.4478351591027647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 +DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:06 [batch.py:51] router release req id 8 +INFO 06-24 20:27:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10863947868347168 s +INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.11056351661682129 s +DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=22240959118440316298059906489897165692, time:1750768026.4532208s req_ids:[8] +DEBUG 06-24 20:27:06 [manager.py:391] +ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:213.29784393310547ms total_cost_time:213.34147453308105ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11503 prompt_cache_len:5151 prompt_cache_ratio:0.4477962270711988 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 +DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10897636413574219 s +INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.1109464168548584 s +DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=182696868454419069693083116218630994617, time:1750768026.664432s req_ids:[8] +DEBUG 06-24 20:27:06 [manager.py:391] +ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:205.4002285003662ms total_cost_time:205.4443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11504 prompt_cache_len:5151 prompt_cache_ratio:0.4477573018080668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 +DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:06 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10844850540161133 s +INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.11051082611083984 s +DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=216123952455913904641642629521920238091, time:1750768026.8766768s req_ids:[8] +DEBUG 06-24 20:27:06 [manager.py:391] +ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:211.72428131103516ms total_cost_time:211.76862716674805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11505 prompt_cache_len:5151 prompt_cache_ratio:0.44771838331160363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 +DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.10832977294921875 s +INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.11068153381347656 s +DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=197536237760918745383568581636620113664, time:1750768027.0957658s req_ids:[8] +DEBUG 06-24 20:27:07 [manager.py:391] +ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:205.6267261505127ms total_cost_time:205.67059516906738ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11506 prompt_cache_len:5151 prompt_cache_ratio:0.4476794715800452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 +DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.10852408409118652 s +INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.11063432693481445 s +DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=89828281685318031161752566169101147716, time:1750768027.3118255s req_ids:[8] +DEBUG 06-24 20:27:07 [manager.py:391] +ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:214.6742343902588ms total_cost_time:214.71881866455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11507 prompt_cache_len:5151 prompt_cache_ratio:0.4476405666116277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 +DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s +INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s +DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=140498526619812386526613841456780909575, time:1750768027.5279782s req_ids:[8] +DEBUG 06-24 20:27:07 [manager.py:391] +ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:199.13816452026367ms total_cost_time:199.18251037597656ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11508 prompt_cache_len:5151 prompt_cache_ratio:0.4476016684045881 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 +DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.20930218696594238 s +INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.21102356910705566 s +DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=84107992851990607567501875144729538640, time:1750768027.8693511s req_ids:[8] +DEBUG 06-24 20:27:07 [manager.py:391] +ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:313.54808807373047ms total_cost_time:313.5943412780762ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11509 prompt_cache_len:5151 prompt_cache_ratio:0.44756277695716395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 +DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:07 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s +INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.11046242713928223 s +DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=89590166486592492012769836778292336381, time:1750768028.0530016s req_ids:[8] +DEBUG 06-24 20:27:08 [manager.py:391] +ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:199.28503036499023ms total_cost_time:199.32937622070312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11510 prompt_cache_len:5151 prompt_cache_ratio:0.44752389226759337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 +DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s +INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.11025762557983398 s +DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=163638271657110360675943853040788606812, time:1750768028.2602093s req_ids:[8] +DEBUG 06-24 20:27:08 [manager.py:391] +ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:203.7358283996582ms total_cost_time:203.7801742553711ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11511 prompt_cache_len:5151 prompt_cache_ratio:0.4474850143341152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 +DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10927891731262207 s +INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.1112830638885498 s +DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=182209689488403169261110316523182841843, time:1750768028.4690893s req_ids:[8] +DEBUG 06-24 20:27:08 [manager.py:391] +ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:207.59868621826172ms total_cost_time:207.6423168182373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11512 prompt_cache_len:5151 prompt_cache_ratio:0.4474461431549687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 +DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10888218879699707 s +INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.11094331741333008 s +DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=137129771991069977376591417104966075220, time:1750768028.684868s req_ids:[8] +DEBUG 06-24 20:27:08 [manager.py:391] +ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:207.10992813110352ms total_cost_time:207.1518898010254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11513 prompt_cache_len:5151 prompt_cache_ratio:0.447407278728394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 +DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:08 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10908651351928711 s +INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.1112055778503418 s +DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=183129512022226391617747331139427311333, time:1750768028.89688s req_ids:[8] +DEBUG 06-24 20:27:08 [manager.py:391] +ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:190.56415557861328ms total_cost_time:190.60826301574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11514 prompt_cache_len:5151 prompt_cache_ratio:0.4473684210526316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 +DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10892271995544434 s +INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.11073160171508789 s +DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=260037951453583052709221745229004179368, time:1750768029.0936055s req_ids:[8] +DEBUG 06-24 20:27:09 [manager.py:391] +ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:362.2126579284668ms total_cost_time:362.2567653656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11515 prompt_cache_len:5151 prompt_cache_ratio:0.4473295701259227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 +DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10850334167480469 s +INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.1103818416595459 s +DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=261454611022190730171017719536613147025, time:1750768029.461922s req_ids:[8] +DEBUG 06-24 20:27:09 [manager.py:391] +ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:187.6664161682129ms total_cost_time:187.71028518676758ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11516 prompt_cache_len:5151 prompt_cache_ratio:0.4472907259465092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 +DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s +INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.1096043586730957 s +DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=195564776759058681558391963633513822618, time:1750768029.6575105s req_ids:[8] +DEBUG 06-24 20:27:09 [manager.py:391] +ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:199.63955879211426ms total_cost_time:199.68247413635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11517 prompt_cache_len:5151 prompt_cache_ratio:0.4472518885126335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 +DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10915780067443848 s +INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.11107707023620605 s +DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=7539896173252525941614578352514043508, time:1750768029.8626633s req_ids:[8] +DEBUG 06-24 20:27:09 [manager.py:391] +ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:204.48589324951172ms total_cost_time:204.5302391052246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11518 prompt_cache_len:5151 prompt_cache_ratio:0.4472130578225386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 +DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:09 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.10808253288269043 s +INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.1100466251373291 s +DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=241238806878254744391135727420671444463, time:1750768030.0836942s req_ids:[8] +DEBUG 06-24 20:27:10 [manager.py:391] +ERROR 06-24 20:27:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:219.9561595916748ms total_cost_time:220.001220703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11519 prompt_cache_len:5151 prompt_cache_ratio:0.44717423387446825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 +DEBUG 06-24 20:27:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:10 [batch.py:51] router release req id 8 +WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again +INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 +WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again +INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 +INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.10959744453430176 s +INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.11166238784790039 s +DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=324159688702362866898453155766500746673, time:1750768030.301429s req_ids:[8] +DEBUG 06-24 20:27:10 [manager.py:391] +ERROR 06-24 20:27:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 first_token_cost:209.38348770141602ms total_cost_time:209.4278335571289ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11520 prompt_cache_len:5151 prompt_cache_ratio:0.4471354166666667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 +DEBUG 06-24 20:27:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:10 [batch.py:51] router release req id 8 +INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s +INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.11090469360351562 s +DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=273314963072987937624466043001015779581, time:1750768030.5395844s req_ids:[8] +DEBUG 06-24 20:27:10 [manager.py:391] +DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 first_token_cost:230.15928268432617ms total_cost_time:230.20315170288086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11521 prompt_cache_len:5151 prompt_cache_ratio:0.4470966061973787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 +DEBUG 06-24 20:27:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:10 [batch.py:51] router release req id 8 +INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.10875582695007324 s +INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.11083793640136719 s +DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=62874336445114855445315672648053757520, time:1750768030.752677s req_ids:[8] +DEBUG 06-24 20:27:10 [manager.py:391] +ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 first_token_cost:378.4186840057373ms total_cost_time:378.4632682800293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11522 prompt_cache_len:5151 prompt_cache_ratio:0.44705780246484983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 +DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:11 [batch.py:51] router release req id 8 +INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.10847353935241699 s +INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.11044168472290039 s +DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=181342935191488314782691934929057284834, time:1750768031.146899s req_ids:[8] +DEBUG 06-24 20:27:11 [manager.py:391] +ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:214.90788459777832ms total_cost_time:214.9515151977539ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11523 prompt_cache_len:5151 prompt_cache_ratio:0.4470190054673262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 +DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:11 [batch.py:51] router release req id 8 +INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.1078798770904541 s +INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.10990047454833984 s +DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=9589314894294385190675499494374968129, time:1750768031.3593307s req_ids:[8] +DEBUG 06-24 20:27:11 [manager.py:391] +ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:207.57436752319336ms total_cost_time:207.62085914611816ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11524 prompt_cache_len:5151 prompt_cache_ratio:0.4469802152030545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 +DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:11 [batch.py:51] router release req id 8 +INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.10930323600769043 s +INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.11125397682189941 s +DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=29830870207625210821487779867829939521, time:1750768031.581609s req_ids:[8] +DEBUG 06-24 20:27:11 [manager.py:391] +ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:218.02854537963867ms total_cost_time:218.07241439819336ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11525 prompt_cache_len:5151 prompt_cache_ratio:0.446941431670282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 +DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:11 [batch.py:51] router release req id 8 +INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.10734272003173828 s +INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.10927748680114746 s +DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=184960704546135925761441682165797363529, time:1750768031.805424s req_ids:[8] +DEBUG 06-24 20:27:11 [manager.py:391] +ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:213.92297744750977ms total_cost_time:213.96541595458984ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11526 prompt_cache_len:5151 prompt_cache_ratio:0.4469026548672566 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 +DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:11 [batch.py:51] router release req id 8 +INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.10758519172668457 s +INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.10962891578674316 s +DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=136612049340751542490230307301212965529, time:1750768032.0206873s req_ids:[8] +DEBUG 06-24 20:27:12 [manager.py:391] +ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:213.71054649353027ms total_cost_time:213.75489234924316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11527 prompt_cache_len:5151 prompt_cache_ratio:0.44686388479222694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 +DEBUG 06-24 20:27:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:12 [batch.py:51] router release req id 8 +INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.10884284973144531 s +INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.11070561408996582 s +DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=271018319268510207474466365842486396230, time:1750768032.238665s req_ids:[8] +DEBUG 06-24 20:27:12 [manager.py:391] +ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:209.23137664794922ms total_cost_time:209.27691459655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11528 prompt_cache_len:5151 prompt_cache_ratio:0.44682512144344205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 +DEBUG 06-24 20:27:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:12 [batch.py:51] router release req id 8 +INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.10699796676635742 s +INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.10888099670410156 s +DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=27029226230246066736506496277015071706, time:1750768032.4791806s req_ids:[8] +DEBUG 06-24 20:27:12 [manager.py:391] +ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:229.20823097229004ms total_cost_time:229.25090789794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11529 prompt_cache_len:5151 prompt_cache_ratio:0.4467863648191517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 +DEBUG 06-24 20:27:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:12 [batch.py:51] router release req id 8 +INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.309694766998291 s +INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.31163454055786133 s +DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=251227467450080321586560580780172480615, time:1750768032.8942409s req_ids:[8] +DEBUG 06-24 20:27:12 [manager.py:391] +ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:415.6961441040039ms total_cost_time:415.7402515411377ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11530 prompt_cache_len:5151 prompt_cache_ratio:0.44674761491760623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 +DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:13 [batch.py:51] router release req id 8 +INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10782456398010254 s +INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10972428321838379 s +DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=329394360501775278836708721118079774711, time:1750768033.1151454s req_ids:[8] +DEBUG 06-24 20:27:13 [manager.py:391] +ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:208.43005180358887ms total_cost_time:208.47535133361816ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11531 prompt_cache_len:5151 prompt_cache_ratio:0.44670887173705665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 +DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:13 [batch.py:51] router release req id 8 +INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10821390151977539 s +INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.11025333404541016 s +DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=224143599464996496941801128111624638780, time:1750768033.328239s req_ids:[8] +DEBUG 06-24 20:27:13 [manager.py:391] +ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:206.15696907043457ms total_cost_time:206.20250701904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11532 prompt_cache_len:5151 prompt_cache_ratio:0.44667013527575444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 +DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:13 [batch.py:51] router release req id 8 +INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10745549201965332 s +INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10939431190490723 s +DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=26253948729714786583888846471302946988, time:1750768033.563854s req_ids:[8] +DEBUG 06-24 20:27:13 [manager.py:391] +ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:221.55284881591797ms total_cost_time:221.59695625305176ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11533 prompt_cache_len:5151 prompt_cache_ratio:0.4466314055319518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 +DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:13 [batch.py:51] router release req id 8 +INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s +INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10976862907409668 s +DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=197400315803058788094577083226503200948, time:1750768033.7699008s req_ids:[8] +DEBUG 06-24 20:27:13 [manager.py:391] +ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:205.05952835083008ms total_cost_time:205.10435104370117ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11534 prompt_cache_len:5151 prompt_cache_ratio:0.4465926825039015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 +DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:13 [batch.py:51] router release req id 8 +INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10730147361755371 s +INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10968303680419922 s +DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=220418320867655774254720220618153093837, time:1750768033.979639s req_ids:[8] +DEBUG 06-24 20:27:13 [manager.py:391] +DEBUG 06-24 20:27:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 48035.746 tokens/s +DEBUG 06-24 20:27:13 [stats.py:37] Avg prompt tokens throughput: 48027.404 tokens/s +DEBUG 06-24 20:27:13 [stats.py:37] Avg generate tokens throughput: 8.342 tokens/s +ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:201.94077491760254ms total_cost_time:201.98392868041992ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11535 prompt_cache_len:5151 prompt_cache_ratio:0.44655396618985693 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 +DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:14 [batch.py:51] router release req id 8 +INFO 06-24 20:27:14 [manager.py:224] router recive req id 8 cost time 0.10815262794494629 s +INFO 06-24 20:27:14 [manager.py:68] detokenization recv req id 8 cost time 0.11003398895263672 s +DEBUG 06-24 20:27:14 [manager.py:391] Prefill Batch: batch_id=72722018168235309752317118410661161236, time:1750768034.192789s req_ids:[8] +DEBUG 06-24 20:27:14 [manager.py:391] +ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:379.84156608581543ms total_cost_time:379.8868656158447ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11536 prompt_cache_len:5151 prompt_cache_ratio:0.4465152565880721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 +DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:14 [batch.py:51] router release req id 8 +INFO 06-24 20:27:14 [manager.py:224] router recive req id 8 cost time 0.1072850227355957 s +INFO 06-24 20:27:14 [manager.py:68] detokenization recv req id 8 cost time 0.10918378829956055 s +DEBUG 06-24 20:27:14 [manager.py:391] Prefill Batch: batch_id=121949659619199036932854757905768669939, time:1750768034.5762315s req_ids:[8] +DEBUG 06-24 20:27:14 [manager.py:391] +ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:224.92718696594238ms total_cost_time:224.97200965881348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11537 prompt_cache_len:5151 prompt_cache_ratio:0.4464765536968016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 +DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:14 [batch.py:51] router release req id 8 +INFO 06-24 20:27:14 [manager.py:224] router recive req id 8 cost time 0.10879373550415039 s +INFO 06-24 20:27:14 [manager.py:68] detokenization recv req id 8 cost time 0.11072850227355957 s +DEBUG 06-24 20:27:14 [manager.py:391] Prefill Batch: batch_id=195821545213852940111713587102067954925, time:1750768034.8096938s req_ids:[8] +DEBUG 06-24 20:27:14 [manager.py:391] +ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:205.3837776184082ms total_cost_time:205.4283618927002ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11538 prompt_cache_len:5151 prompt_cache_ratio:0.4464378575143006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 +DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:14 [batch.py:51] router release req id 8 +INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10788559913635254 s +INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.10989809036254883 s +DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=65515364993187780731950899083747939304, time:1750768035.0190854s req_ids:[8] +DEBUG 06-24 20:27:15 [manager.py:391] +ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:212.16368675231934ms total_cost_time:212.20707893371582ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11539 prompt_cache_len:5151 prompt_cache_ratio:0.44639916803882485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 +DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:15 [batch.py:51] router release req id 8 +INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10696196556091309 s +INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.10897016525268555 s +DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=327278611138202212038166339920846279975, time:1750768035.262717s req_ids:[8] +DEBUG 06-24 20:27:15 [manager.py:391] +ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:225.32248497009277ms total_cost_time:225.36420822143555ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11540 prompt_cache_len:5151 prompt_cache_ratio:0.44636048526863087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 +DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:15 [batch.py:51] router release req id 8 +INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10893416404724121 s +INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.11101770401000977 s +DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=217050675394868006263311026451562650133, time:1750768035.470755s req_ids:[8] +DEBUG 06-24 20:27:15 [manager.py:391] +ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:208.59503746032715ms total_cost_time:208.64009857177734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11541 prompt_cache_len:5151 prompt_cache_ratio:0.44632180920197556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 +DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:15 [batch.py:51] router release req id 8 +INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10810327529907227 s +INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.10995626449584961 s +DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=336366292626247426675723278064317327664, time:1750768035.684561s req_ids:[8] +DEBUG 06-24 20:27:15 [manager.py:391] +ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:203.83524894714355ms total_cost_time:203.88007164001465ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11542 prompt_cache_len:5151 prompt_cache_ratio:0.4462831398371166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 +DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:15 [batch.py:51] router release req id 8 +INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10823202133178711 s +INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.1104433536529541 s +DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=181279718564383556660070747150368292165, time:1750768035.8942478s req_ids:[8] +DEBUG 06-24 20:27:15 [manager.py:391] +ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:386.26885414123535ms total_cost_time:386.31486892700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11543 prompt_cache_len:5151 prompt_cache_ratio:0.44624447717231225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 +DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:16 [batch.py:51] router release req id 8 +INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.10723447799682617 s +INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.1090095043182373 s +DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=276346951025133259099899817001547061955, time:1750768036.3079555s req_ids:[8] +DEBUG 06-24 20:27:16 [manager.py:391] +ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:222.30982780456543ms total_cost_time:222.3525047302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11544 prompt_cache_len:5151 prompt_cache_ratio:0.4462058212058212 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 +DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:16 [batch.py:51] router release req id 8 +INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.10828185081481934 s +INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.11039257049560547 s +DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=76095344927514376388129964169372026501, time:1750768036.5169027s req_ids:[8] +DEBUG 06-24 20:27:16 [manager.py:391] +ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:200.6700038909912ms total_cost_time:200.7136344909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11545 prompt_cache_len:5151 prompt_cache_ratio:0.446167171935903 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 +DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:16 [batch.py:51] router release req id 8 +INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.10806083679199219 s +INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.10988140106201172 s +DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=60529722807640789506344857535411921298, time:1750768036.7338393s req_ids:[8] +DEBUG 06-24 20:27:16 [manager.py:391] +ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:206.80928230285645ms total_cost_time:206.85458183288574ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11546 prompt_cache_len:5151 prompt_cache_ratio:0.4461285293608176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 +DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:16 [batch.py:51] router release req id 8 +INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.1068568229675293 s +INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.10885882377624512 s +DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=26877513934762374613534303091180945659, time:1750768036.937134s req_ids:[8] +DEBUG 06-24 20:27:16 [manager.py:391] +ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:195.27268409729004ms total_cost_time:195.31774520874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11547 prompt_cache_len:5151 prompt_cache_ratio:0.44608989347882566 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 +INFO 06-24 20:27:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:17 [batch.py:51] router release req id 8 +INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.10778093338012695 s +INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.10957860946655273 s +DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=91978862725742220083039217607748335256, time:1750768037.1470807s req_ids:[8] +DEBUG 06-24 20:27:17 [manager.py:391] +ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:218.30296516418457ms total_cost_time:218.34802627563477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11548 prompt_cache_len:5151 prompt_cache_ratio:0.4460512642881884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 +DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:17 [batch.py:51] router release req id 8 +INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.10806012153625488 s +INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.1102592945098877 s +DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=93217065297045032840663842086971177265, time:1750768037.363575s req_ids:[8] +DEBUG 06-24 20:27:17 [manager.py:391] +ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:199.37753677368164ms total_cost_time:199.42283630371094ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11549 prompt_cache_len:5151 prompt_cache_ratio:0.4460126417871677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 +DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:17 [batch.py:51] router release req id 8 +INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.107330322265625 s +INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.10925555229187012 s +DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=296261573025029293541474962605177454661, time:1750768037.5687678s req_ids:[8] +DEBUG 06-24 20:27:17 [manager.py:391] +ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:199.4023323059082ms total_cost_time:199.44500923156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11550 prompt_cache_len:5151 prompt_cache_ratio:0.44597402597402597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 +DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:17 [batch.py:51] router release req id 8 +INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.3097109794616699 s +INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.3120126724243164 s +DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=262527193116415170111231484764237816702, time:1750768037.979559s req_ids:[8] +DEBUG 06-24 20:27:17 [manager.py:391] +ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:412.4901294708252ms total_cost_time:412.5347137451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11551 prompt_cache_len:5151 prompt_cache_ratio:0.44593541684702626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 +DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:18 [batch.py:51] router release req id 8 +INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.1081545352935791 s +INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.1101374626159668 s +DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=186627008545597276962196107084991165609, time:1750768038.1962545s req_ids:[8] +DEBUG 06-24 20:27:18 [manager.py:391] +ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:209.7952365875244ms total_cost_time:209.8402976989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11552 prompt_cache_len:5151 prompt_cache_ratio:0.44589681440443213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 +DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:18 [batch.py:51] router release req id 8 +INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.10725164413452148 s +INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.10923576354980469 s +DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=72574700691997711299962612133736973536, time:1750768038.4132843s req_ids:[8] +DEBUG 06-24 20:27:18 [manager.py:391] +ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:214.4765853881836ms total_cost_time:214.52021598815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11553 prompt_cache_len:5151 prompt_cache_ratio:0.4458582186445079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 +DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:18 [batch.py:51] router release req id 8 +INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.10808968544006348 s +INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.11004853248596191 s +DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=275281208950806706563697152706406251724, time:1750768038.647819s req_ids:[8] +DEBUG 06-24 20:27:18 [manager.py:391] +ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:221.53162956237793ms total_cost_time:221.57573699951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11554 prompt_cache_len:5151 prompt_cache_ratio:0.44581962956551846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 +DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:18 [batch.py:51] router release req id 8 +INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.10864472389221191 s +INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.11060905456542969 s +DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=160619746254918374206772895103828979640, time:1750768038.8613305s req_ids:[8] +DEBUG 06-24 20:27:18 [manager.py:391] +ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:210.48545837402344ms total_cost_time:210.52908897399902ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11555 prompt_cache_len:5151 prompt_cache_ratio:0.44578104716572914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 +DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:18 [batch.py:51] router release req id 8 +INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.1076052188873291 s +INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.10968184471130371 s +DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=192055939863602628503867255009910303973, time:1750768039.07979s req_ids:[8] +DEBUG 06-24 20:27:19 [manager.py:391] +ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:207.76796340942383ms total_cost_time:207.81230926513672ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11556 prompt_cache_len:5151 prompt_cache_ratio:0.445742471443406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 +DEBUG 06-24 20:27:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:19 [batch.py:51] router release req id 8 +INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.10738205909729004 s +INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.10958147048950195 s +DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=64476610540374886490576086221244465614, time:1750768039.2919707s req_ids:[8] +DEBUG 06-24 20:27:19 [manager.py:391] +ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 first_token_cost:396.6038227081299ms total_cost_time:396.6481685638428ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11557 prompt_cache_len:5151 prompt_cache_ratio:0.4457039023968158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 +DEBUG 06-24 20:27:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:19 [batch.py:51] router release req id 8 +INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.10813093185424805 s +INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.11015796661376953 s +DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=144945953252063376310999124512265698804, time:1750768039.6948233s req_ids:[8] +DEBUG 06-24 20:27:19 [manager.py:391] +DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 first_token_cost:206.79903030395508ms total_cost_time:206.84313774108887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11558 prompt_cache_len:5151 prompt_cache_ratio:0.44566534002422564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 +DEBUG 06-24 20:27:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:19 [batch.py:51] router release req id 8 +INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.10876345634460449 s +INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.11082863807678223 s +DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=50152142214698984454296710994991332251, time:1750768039.9090242s req_ids:[8] +DEBUG 06-24 20:27:19 [manager.py:391] +ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 first_token_cost:204.07366752624512ms total_cost_time:204.1189670562744ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11559 prompt_cache_len:5151 prompt_cache_ratio:0.44562678432390346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 +DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:20 [batch.py:51] router release req id 8 +INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10718369483947754 s +INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.10926389694213867 s +DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=316562154061129852225973958895333755448, time:1750768040.1188781s req_ids:[8] +DEBUG 06-24 20:27:20 [manager.py:391] +ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:212.04471588134766ms total_cost_time:212.08882331848145ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11560 prompt_cache_len:5151 prompt_cache_ratio:0.4455882352941177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 +DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:20 [batch.py:51] router release req id 8 +INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10814523696899414 s +INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.11010289192199707 s +DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=260644065771999606106858530331104083594, time:1750768040.3502462s req_ids:[8] +DEBUG 06-24 20:27:20 [manager.py:391] +ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:213.64736557006836ms total_cost_time:213.69147300720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11561 prompt_cache_len:5151 prompt_cache_ratio:0.4455496929331373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 +DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:20 [batch.py:51] router release req id 8 +INFO 06-24 20:27:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10795998573303223 s +INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.10988593101501465 s +DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=303818388784223970108427069095913305705, time:1750768040.5591733s req_ids:[8] +DEBUG 06-24 20:27:20 [manager.py:391] +ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:206.65788650512695ms total_cost_time:206.70199394226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11562 prompt_cache_len:5151 prompt_cache_ratio:0.44551115723923196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 +DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:20 [batch.py:51] router release req id 8 +INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10726237297058105 s +INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.10938644409179688 s +DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=89243430758099310534105994542739508426, time:1750768040.7708855s req_ids:[8] +DEBUG 06-24 20:27:20 [manager.py:391] +ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:202.8939723968506ms total_cost_time:202.93951034545898ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11563 prompt_cache_len:5151 prompt_cache_ratio:0.44547262821067196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 +DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:20 [batch.py:51] router release req id 8 +INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10714459419250488 s +INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.1092081069946289 s +DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=250172097261122938508033012674261263899, time:1750768040.9817164s req_ids:[8] +DEBUG 06-24 20:27:20 [manager.py:391] +ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:370.33724784851074ms total_cost_time:370.38254737854004ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11564 prompt_cache_len:5151 prompt_cache_ratio:0.4454341058457281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 +DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:21 [batch.py:51] router release req id 8 +INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.10703277587890625 s +INFO 06-24 20:27:21 [manager.py:68] detokenization recv req id 8 cost time 0.10895490646362305 s +DEBUG 06-24 20:27:21 [manager.py:391] Prefill Batch: batch_id=279325152564711842479679128962507927131, time:1750768041.3591006s req_ids:[8] +DEBUG 06-24 20:27:21 [manager.py:391] +ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:216.58730506896973ms total_cost_time:216.6311740875244ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11565 prompt_cache_len:5151 prompt_cache_ratio:0.4453955901426719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 +DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:21 [batch.py:51] router release req id 8 +INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.1073751449584961 s +INFO 06-24 20:27:21 [manager.py:68] detokenization recv req id 8 cost time 0.109466552734375 s +DEBUG 06-24 20:27:21 [manager.py:391] Prefill Batch: batch_id=143851844614001003819053237413619527548, time:1750768041.583676s req_ids:[8] +DEBUG 06-24 20:27:21 [manager.py:391] +ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:206.27450942993164ms total_cost_time:206.31814002990723ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11566 prompt_cache_len:5151 prompt_cache_ratio:0.4453570810997752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 +DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:21 [batch.py:51] router release req id 8 +INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.1075589656829834 s +INFO 06-24 20:27:21 [manager.py:68] detokenization recv req id 8 cost time 0.10957026481628418 s +DEBUG 06-24 20:27:21 [manager.py:391] Prefill Batch: batch_id=72282653088081514264529457862941423130, time:1750768041.7936687s req_ids:[8] +DEBUG 06-24 20:27:21 [manager.py:391] +ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:203.92632484436035ms total_cost_time:203.96900177001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11567 prompt_cache_len:5151 prompt_cache_ratio:0.4453185787153108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 +DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:21 [batch.py:51] router release req id 8 +INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.10866999626159668 s +INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.11056947708129883 s +DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=207241863347605668628549669304742112662, time:1750768042.004275s req_ids:[8] +DEBUG 06-24 20:27:22 [manager.py:391] +ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:196.86603546142578ms total_cost_time:196.90752029418945ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11568 prompt_cache_len:5151 prompt_cache_ratio:0.44528008298755184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 +DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:22 [batch.py:51] router release req id 8 +INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.1075742244720459 s +INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.10971903800964355 s +DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=94883886052511634203832416524860209822, time:1750768042.2066646s req_ids:[8] +DEBUG 06-24 20:27:22 [manager.py:391] +ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:204.3936252593994ms total_cost_time:204.4358253479004ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11569 prompt_cache_len:5151 prompt_cache_ratio:0.44524159391477225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 +DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:22 [batch.py:51] router release req id 8 +INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.10750293731689453 s +INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.10943198204040527 s +DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=244564710304726301394168319461730603879, time:1750768042.418144s req_ids:[8] +DEBUG 06-24 20:27:22 [manager.py:391] +ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:204.10847663879395ms total_cost_time:204.15186882019043ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11570 prompt_cache_len:5151 prompt_cache_ratio:0.44520311149524633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 +DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:22 [batch.py:51] router release req id 8 +INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.10728621482849121 s +INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.10933303833007812 s +DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=71119514468147665464207116744884685416, time:1750768042.6296477s req_ids:[8] +DEBUG 06-24 20:27:22 [manager.py:391] +ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:207.92007446289062ms total_cost_time:207.9637050628662ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11571 prompt_cache_len:5151 prompt_cache_ratio:0.4451646357272492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 +DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:22 [batch.py:51] router release req id 8 +INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.2079322338104248 s +INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.20972371101379395 s +DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=183762078713383513045662939774384217537, time:1750768042.9455245s req_ids:[8] +DEBUG 06-24 20:27:22 [manager.py:391] +ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:265.60211181640625ms total_cost_time:265.64598083496094ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11572 prompt_cache_len:5151 prompt_cache_ratio:0.44512616660905635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 +DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:23 [batch.py:51] router release req id 8 +INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10907602310180664 s +DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=115511640357816040732083717822729157731, time:1750768043.114903s req_ids:[8] +DEBUG 06-24 20:27:23 [manager.py:391] +INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.11090087890625 s +ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:187.93749809265137ms total_cost_time:187.99519538879395ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:11573 prompt_cache_len:5151 prompt_cache_ratio:0.4450877041389441 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 +INFO 06-24 20:27:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:23 [batch.py:51] router release req id 8 +INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10815811157226562 s +INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.11004424095153809 s +DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=241558242360958393427601253541415694380, time:1750768043.3128421s req_ids:[8] +DEBUG 06-24 20:27:23 [manager.py:391] +ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:204.13780212402344ms total_cost_time:204.18071746826172ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11574 prompt_cache_len:5151 prompt_cache_ratio:0.4450492483151892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 +DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:23 [batch.py:51] router release req id 8 +INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s +INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.11069393157958984 s +DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=274041578602925132066295646625399381678, time:1750768043.5245295s req_ids:[8] +DEBUG 06-24 20:27:23 [manager.py:391] +ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:200.85859298706055ms total_cost_time:200.90436935424805ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11575 prompt_cache_len:5151 prompt_cache_ratio:0.4450107991360691 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 +DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:23 [batch.py:51] router release req id 8 +INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10806751251220703 s +INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.1099081039428711 s +DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=56556481522618134304960793097616046544, time:1750768043.7324297s req_ids:[8] +DEBUG 06-24 20:27:23 [manager.py:391] +ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:195.4667568206787ms total_cost_time:195.5125331878662ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11576 prompt_cache_len:5151 prompt_cache_ratio:0.4449723565998618 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 +DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:23 [batch.py:51] router release req id 8 +INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10698962211608887 s +INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.10892295837402344 s +DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=31227861322949417292173771675011710711, time:1750768043.9330065s req_ids:[8] +DEBUG 06-24 20:27:23 [manager.py:391] +ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:27:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 48372.263 tokens/s +DEBUG 06-24 20:27:24 [stats.py:37] Avg prompt tokens throughput: 48363.793 tokens/s +DEBUG 06-24 20:27:24 [stats.py:37] Avg generate tokens throughput: 8.470 tokens/s +INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:198.41599464416504ms total_cost_time:198.4579563140869ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11577 prompt_cache_len:5151 prompt_cache_ratio:0.44493392070484583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 +DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:24 [batch.py:51] router release req id 8 +INFO 06-24 20:27:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.10773730278015137 s +INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.10955333709716797 s +DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=274742277334670146552303954623949235435, time:1750768044.1397195s req_ids:[8] +DEBUG 06-24 20:27:24 [manager.py:391] +ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:352.75745391845703ms total_cost_time:352.8013229370117ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11578 prompt_cache_len:5151 prompt_cache_ratio:0.4448954914493004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 +DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:24 [batch.py:51] router release req id 8 +INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.1072840690612793 s +INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s +DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=162556685905550370792891674121758006562, time:1750768044.512599s req_ids:[8] +DEBUG 06-24 20:27:24 [manager.py:391] +ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:219.72155570983887ms total_cost_time:219.76470947265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11579 prompt_cache_len:5151 prompt_cache_ratio:0.44485706883150533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 +DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:24 [batch.py:51] router release req id 8 +INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.1084291934967041 s +INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.11028480529785156 s +DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=298290611333074028584773485507199370942, time:1750768044.7244189s req_ids:[8] +DEBUG 06-24 20:27:24 [manager.py:391] +ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:203.63116264343262ms total_cost_time:203.6740779876709ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11580 prompt_cache_len:5151 prompt_cache_ratio:0.4448186528497409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 +DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:24 [batch.py:51] router release req id 8 +INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.10866951942443848 s +INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.11063694953918457 s +DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=156000369759218646748078207246018214432, time:1750768044.9333236s req_ids:[8] +DEBUG 06-24 20:27:24 [manager.py:391] +ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:203.10258865356445ms total_cost_time:203.14502716064453ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11581 prompt_cache_len:5151 prompt_cache_ratio:0.44478024350228823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 +DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:25 [batch.py:51] router release req id 8 +INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.1067967414855957 s +INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.10858321189880371 s +DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=40796801150445118791521308297924028147, time:1750768045.1634939s req_ids:[8] +DEBUG 06-24 20:27:25 [manager.py:391] +ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:214.70332145690918ms total_cost_time:214.74742889404297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11582 prompt_cache_len:5151 prompt_cache_ratio:0.4447418407874288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 +DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:25 [batch.py:51] router release req id 8 +INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.1076357364654541 s +INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.10957145690917969 s +DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=104016520454205578847334899591728510589, time:1750768045.366401s req_ids:[8] +DEBUG 06-24 20:27:25 [manager.py:391] +ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:206.12239837646484ms total_cost_time:206.16602897644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11583 prompt_cache_len:5151 prompt_cache_ratio:0.4447034447034447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 +DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:25 [batch.py:51] router release req id 8 +INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.10800480842590332 s +INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.11003589630126953 s +DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=178911064734333362884920715512175663434, time:1750768045.576824s req_ids:[8] +DEBUG 06-24 20:27:25 [manager.py:391] +ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:210.39080619812012ms total_cost_time:210.4318141937256ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:11584 prompt_cache_len:5151 prompt_cache_ratio:0.44466505524861877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 +DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:25 [batch.py:51] router release req id 8 +INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.10786771774291992 s +INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.10984468460083008 s +DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=300681427374263850466168022733179785059, time:1750768045.793788s req_ids:[8] +DEBUG 06-24 20:27:25 [manager.py:391] +ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:369.5406913757324ms total_cost_time:369.5824146270752ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11585 prompt_cache_len:5151 prompt_cache_ratio:0.4446266724212344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 +DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:26 [batch.py:51] router release req id 8 +INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10800290107727051 s +INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.10968279838562012 s +DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=31521118003979778319064988090260603436, time:1750768046.1700404s req_ids:[8] +DEBUG 06-24 20:27:26 [manager.py:391] +ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:184.66591835021973ms total_cost_time:184.708833694458ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11586 prompt_cache_len:5151 prompt_cache_ratio:0.44458829621957535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 +DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:26 [batch.py:51] router release req id 8 +INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10674476623535156 s +INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.10858821868896484 s +DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=78949006112851565014238646171618040356, time:1750768046.362417s req_ids:[8] +DEBUG 06-24 20:27:26 [manager.py:391] +ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:197.59249687194824ms total_cost_time:197.63755798339844ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11587 prompt_cache_len:5151 prompt_cache_ratio:0.4445499266419263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 +DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:26 [batch.py:51] router release req id 8 +INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10857105255126953 s +INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.11091113090515137 s +DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=300631166909671122698182972879438502380, time:1750768046.5659935s req_ids:[8] +DEBUG 06-24 20:27:26 [manager.py:391] +ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:212.41116523742676ms total_cost_time:212.45479583740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11588 prompt_cache_len:5151 prompt_cache_ratio:0.4445115636865723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 +DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:26 [batch.py:51] router release req id 8 +INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10729122161865234 s +INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.10931515693664551 s +DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=237955721063345191969582371542458077500, time:1750768046.7842207s req_ids:[8] +DEBUG 06-24 20:27:26 [manager.py:391] +ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:202.85654067993164ms total_cost_time:202.89945602416992ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11589 prompt_cache_len:5151 prompt_cache_ratio:0.4444732073517991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 +DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:26 [batch.py:51] router release req id 8 +INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10828995704650879 s +INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.11021971702575684 s +DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=71974388105541650596215070283234472618, time:1750768046.9946878s req_ids:[8] +DEBUG 06-24 20:27:26 [manager.py:391] +ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:209.32888984680176ms total_cost_time:209.37228202819824ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11590 prompt_cache_len:5151 prompt_cache_ratio:0.444434857635893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 +DEBUG 06-24 20:27:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:27 [batch.py:51] router release req id 8 +INFO 06-24 20:27:27 [manager.py:224] router recive req id 8 cost time 0.10712862014770508 s +INFO 06-24 20:27:27 [manager.py:68] detokenization recv req id 8 cost time 0.10897684097290039 s +DEBUG 06-24 20:27:27 [manager.py:391] Prefill Batch: batch_id=219074054481902241515778082479039405365, time:1750768047.221946s req_ids:[8] +DEBUG 06-24 20:27:27 [manager.py:391] +ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:216.68338775634766ms total_cost_time:216.72630310058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11591 prompt_cache_len:5151 prompt_cache_ratio:0.4443965145371409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 +DEBUG 06-24 20:27:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:27 [batch.py:51] router release req id 8 +INFO 06-24 20:27:27 [manager.py:224] router recive req id 8 cost time 0.10828757286071777 s +INFO 06-24 20:27:27 [manager.py:68] detokenization recv req id 8 cost time 0.11034417152404785 s +DEBUG 06-24 20:27:27 [manager.py:391] Prefill Batch: batch_id=12926021598124466231100127117502429968, time:1750768047.4331398s req_ids:[8] +DEBUG 06-24 20:27:27 [manager.py:391] +ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:203.31120491027832ms total_cost_time:203.3545970916748ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11592 prompt_cache_len:5151 prompt_cache_ratio:0.4443581780538302 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 +DEBUG 06-24 20:27:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:27 [batch.py:51] router release req id 8 +INFO 06-24 20:27:27 [manager.py:224] router recive req id 8 cost time 0.31017088890075684 s +INFO 06-24 20:27:27 [manager.py:68] detokenization recv req id 8 cost time 0.312105655670166 s +DEBUG 06-24 20:27:27 [manager.py:391] Prefill Batch: batch_id=182634462158383355267829956423271799078, time:1750768047.8676383s req_ids:[8] +DEBUG 06-24 20:27:27 [manager.py:391] +ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:437.9279613494873ms total_cost_time:437.9720687866211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11593 prompt_cache_len:5151 prompt_cache_ratio:0.44431984818424913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 +DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:28 [batch.py:51] router release req id 8 +INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10793805122375488 s +INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.10988855361938477 s +DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=318190909249535082790665831484784221501, time:1750768048.0876975s req_ids:[8] +DEBUG 06-24 20:27:28 [manager.py:391] +ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:216.20559692382812ms total_cost_time:216.2489891052246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11594 prompt_cache_len:5151 prompt_cache_ratio:0.4442815249266862 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 +DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:28 [batch.py:51] router release req id 8 +INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.1076362133026123 s +INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.10958313941955566 s +DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=166508227249825367995833035185159250695, time:1750768048.324057s req_ids:[8] +DEBUG 06-24 20:27:28 [manager.py:391] +ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:222.4874496459961ms total_cost_time:222.53179550170898ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11595 prompt_cache_len:5151 prompt_cache_ratio:0.4442432082794308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 +DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:28 [batch.py:51] router release req id 8 +INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10849976539611816 s +INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.11043906211853027 s +DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=197673383012150948979995698131697699393, time:1750768048.5382812s req_ids:[8] +DEBUG 06-24 20:27:28 [manager.py:391] +ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:213.3941650390625ms total_cost_time:213.4389877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11596 prompt_cache_len:5151 prompt_cache_ratio:0.4442048982407727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 +DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:28 [batch.py:51] router release req id 8 +INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10799622535705566 s +INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.1100308895111084 s +DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=285586748372275578014292457786425409505, time:1750768048.7645957s req_ids:[8] +DEBUG 06-24 20:27:28 [manager.py:391] +ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:211.29870414733887ms total_cost_time:211.34257316589355ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11597 prompt_cache_len:5151 prompt_cache_ratio:0.44416659480900234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 +DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:28 [batch.py:51] router release req id 8 +INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10719776153564453 s +INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.10905027389526367 s +DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=223944548428252722684262410638180091709, time:1750768048.9752784s req_ids:[8] +DEBUG 06-24 20:27:28 [manager.py:391] +ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:206.17103576660156ms total_cost_time:206.21514320373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11598 prompt_cache_len:5151 prompt_cache_ratio:0.44412829798241077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 +DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:29 [batch.py:51] router release req id 8 +INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.10823178291320801 s +INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.11018490791320801 s +DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=115412276080002100122989988797892935842, time:1750768049.1916304s req_ids:[8] +DEBUG 06-24 20:27:29 [manager.py:391] +ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:376.33299827575684ms total_cost_time:376.37782096862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11599 prompt_cache_len:5151 prompt_cache_ratio:0.4440900077592896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 +DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:29 [batch.py:51] router release req id 8 +INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.10693502426147461 s +INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.10908389091491699 s +DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=11742553500485497552810688810487013243, time:1750768049.5703387s req_ids:[8] +DEBUG 06-24 20:27:29 [manager.py:391] +ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:207.74221420288086ms total_cost_time:207.78465270996094ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11600 prompt_cache_len:5151 prompt_cache_ratio:0.44405172413793104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 +DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:29 [batch.py:51] router release req id 8 +INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.10902142524719238 s +INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.11094403266906738 s +DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=322537680115503013770236431501456919416, time:1750768049.786758s req_ids:[8] +DEBUG 06-24 20:27:29 [manager.py:391] +ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:208.04238319396973ms total_cost_time:208.085298538208ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11601 prompt_cache_len:5151 prompt_cache_ratio:0.4440134471166279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 +DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:29 [batch.py:51] router release req id 8 +INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.1068582534790039 s +INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.10875678062438965 s +DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=120080690139516164345846875822576873465, time:1750768049.9994538s req_ids:[8] +DEBUG 06-24 20:27:29 [manager.py:391] +DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:208.72950553894043ms total_cost_time:208.7721824645996ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11602 prompt_cache_len:5151 prompt_cache_ratio:0.4439751766936735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 +DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:30 [batch.py:51] router release req id 8 +INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10735726356506348 s +INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.10944485664367676 s +DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=316670526295762277438936446950291869044, time:1750768050.2150285s req_ids:[8] +DEBUG 06-24 20:27:30 [manager.py:391] +ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:210.8299732208252ms total_cost_time:210.87336540222168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11603 prompt_cache_len:5151 prompt_cache_ratio:0.44393691286736187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 +DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:30 [batch.py:51] router release req id 8 +INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10837101936340332 s +INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.11028599739074707 s +DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=254820825928489375547819754731130235356, time:1750768050.431994s req_ids:[8] +DEBUG 06-24 20:27:30 [manager.py:391] +ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:208.93454551696777ms total_cost_time:208.97722244262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11604 prompt_cache_len:5151 prompt_cache_ratio:0.44389865563598757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 +DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:30 [batch.py:51] router release req id 8 +INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10819387435913086 s +INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.11019325256347656 s +DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=12698200465022304349315194008020105329, time:1750768050.6480536s req_ids:[8] +DEBUG 06-24 20:27:30 [manager.py:391] +ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:231.39691352844238ms total_cost_time:231.44102096557617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11605 prompt_cache_len:5151 prompt_cache_ratio:0.44386040499784574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 +DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:30 [batch.py:51] router release req id 8 +INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10722947120666504 s +INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.10936474800109863 s +DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=204571012891682551788771618421997697623, time:1750768050.8845394s req_ids:[8] +DEBUG 06-24 20:27:30 [manager.py:391] +ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:376.68824195861816ms total_cost_time:376.73091888427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11606 prompt_cache_len:5151 prompt_cache_ratio:0.4438221609512321 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 +DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:31 [batch.py:51] router release req id 8 +INFO 06-24 20:27:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10794496536254883 s +INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.10998988151550293 s +DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=4383387008904089726718803169148363854, time:1750768051.2664027s req_ids:[8] +DEBUG 06-24 20:27:31 [manager.py:391] +ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:204.8189640045166ms total_cost_time:204.86211776733398ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11607 prompt_cache_len:5151 prompt_cache_ratio:0.443783923494443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 +DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:31 [batch.py:51] router release req id 8 +INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10748577117919922 s +INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s +DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=100341077498308047138766490815062392620, time:1750768051.477281s req_ids:[8] +DEBUG 06-24 20:27:31 [manager.py:391] +ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:202.74043083190918ms total_cost_time:202.78501510620117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11608 prompt_cache_len:5151 prompt_cache_ratio:0.44374569262577535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 +DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:31 [batch.py:51] router release req id 8 +INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10749173164367676 s +INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.10952115058898926 s +DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=253028566486781346174647396143351243236, time:1750768051.6854234s req_ids:[8] +DEBUG 06-24 20:27:31 [manager.py:391] +ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:205.69396018981934ms total_cost_time:205.73925971984863ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11609 prompt_cache_len:5151 prompt_cache_ratio:0.4437074683435266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 +DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:31 [batch.py:51] router release req id 8 +INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10859155654907227 s +INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.11056828498840332 s +DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=198608662432604478060866516883725317096, time:1750768051.900921s req_ids:[8] +DEBUG 06-24 20:27:31 [manager.py:391] +ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:207.62348175048828ms total_cost_time:207.66687393188477ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11610 prompt_cache_len:5151 prompt_cache_ratio:0.44366925064599483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 +DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:32 [batch.py:51] router release req id 8 +INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s +INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.11100149154663086 s +DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=102616921554648671128379959344541566492, time:1750768052.1183193s req_ids:[8] +DEBUG 06-24 20:27:32 [manager.py:391] +ERROR 06-24 20:27:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:215.4693603515625ms total_cost_time:215.5132293701172ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11611 prompt_cache_len:5151 prompt_cache_ratio:0.44363103953147875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 +DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:32 [batch.py:51] router release req id 8 +INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.10886335372924805 s +INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.11103105545043945 s +DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=52429840949596818735614702125341845413, time:1750768052.3347113s req_ids:[8] +DEBUG 06-24 20:27:32 [manager.py:391] +ERROR 06-24 20:27:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 first_token_cost:204.6184539794922ms total_cost_time:204.66089248657227ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11612 prompt_cache_len:5151 prompt_cache_ratio:0.4435928349982776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 +DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:32 [batch.py:51] router release req id 8 +INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.10697531700134277 s +INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.10903620719909668 s +DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=326088551832837516119518108822307277858, time:1750768052.5462503s req_ids:[8] +DEBUG 06-24 20:27:32 [manager.py:391] +ERROR 06-24 20:27:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 first_token_cost:205.62982559204102ms total_cost_time:205.6746482849121ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11613 prompt_cache_len:5151 prompt_cache_ratio:0.4435546370446913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 +DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:32 [batch.py:51] router release req id 8 +INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.30941033363342285 s +INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.31136608123779297 s +DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=327655206803896732243082537203167451047, time:1750768052.9610136s req_ids:[8] +DEBUG 06-24 20:27:32 [manager.py:391] +ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 first_token_cost:397.7804183959961ms total_cost_time:397.7999687194824ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11614 prompt_cache_len:5151 prompt_cache_ratio:0.44351644566902015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 +DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:33 [batch.py:51] router release req id 8 +INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.10566926002502441 s +INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.10780835151672363 s +DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=159609690722808824453833847064038400204, time:1750768053.161927s req_ids:[8] +DEBUG 06-24 20:27:33 [manager.py:391] +ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:198.5797882080078ms total_cost_time:198.6246109008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11615 prompt_cache_len:5151 prompt_cache_ratio:0.4434782608695652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 +DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:33 [batch.py:51] router release req id 8 +INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.1082768440246582 s +INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.11032581329345703 s +DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=3704880289774479520575625657065861735, time:1750768053.3656929s req_ids:[8] +DEBUG 06-24 20:27:33 [manager.py:391] +ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:207.72647857666016ms total_cost_time:207.77153968811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11616 prompt_cache_len:5151 prompt_cache_ratio:0.4434400826446281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 +DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:33 [batch.py:51] router release req id 8 +INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s +INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.11020469665527344 s +DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=170078736722523670359689344307826376593, time:1750768053.5787885s req_ids:[8] +DEBUG 06-24 20:27:33 [manager.py:391] +ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:206.26425743103027ms total_cost_time:206.30741119384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11617 prompt_cache_len:5151 prompt_cache_ratio:0.443401910992511 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 +DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:33 [batch.py:51] router release req id 8 +INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.10519742965698242 s +INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.10724759101867676 s +DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=173046256490098839109927107496841236876, time:1750768053.793487s req_ids:[8] +DEBUG 06-24 20:27:33 [manager.py:391] +ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:210.37626266479492ms total_cost_time:210.4203701019287ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11618 prompt_cache_len:5151 prompt_cache_ratio:0.4433637459115166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 +DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:33 [batch.py:51] router release req id 8 +INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10780787467956543 s +INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10977053642272949 s +DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=249524091125512902513428867676842004096, time:1750768054.0079775s req_ids:[8] +DEBUG 06-24 20:27:34 [manager.py:391] +ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:27:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 48323.971 tokens/s +DEBUG 06-24 20:27:34 [stats.py:37] Avg prompt tokens throughput: 48315.640 tokens/s +DEBUG 06-24 20:27:34 [stats.py:37] Avg generate tokens throughput: 8.331 tokens/s +INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:205.16061782836914ms total_cost_time:205.20281791687012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11619 prompt_cache_len:5151 prompt_cache_ratio:0.4433255873999484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 +DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:34 [batch.py:51] router release req id 8 +INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10764503479003906 s +INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10957169532775879 s +DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=322042146884898295650035879953288667107, time:1750768054.2203221s req_ids:[8] +DEBUG 06-24 20:27:34 [manager.py:391] +ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:370.2967166900635ms total_cost_time:370.34082412719727ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11620 prompt_cache_len:5151 prompt_cache_ratio:0.44328743545611016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 +DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:34 [batch.py:51] router release req id 8 +INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10757708549499512 s +INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10975289344787598 s +DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=92967563308563899860405267896005405938, time:1750768054.5977654s req_ids:[8] +DEBUG 06-24 20:27:34 [manager.py:391] +ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:202.54826545715332ms total_cost_time:202.59356498718262ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11621 prompt_cache_len:5151 prompt_cache_ratio:0.4432492900783065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 +DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:34 [batch.py:51] router release req id 8 +INFO 06-24 20:27:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s +INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10966229438781738 s +DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=26086783698301372003153037311722628367, time:1750768054.8068175s req_ids:[8] +DEBUG 06-24 20:27:34 [manager.py:391] +ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:206.60877227783203ms total_cost_time:206.65264129638672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11622 prompt_cache_len:5151 prompt_cache_ratio:0.44321115126484256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 +DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:34 [batch.py:51] router release req id 8 +INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10788369178771973 s +INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10985493659973145 s +DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=56463665435554600794035382258834081678, time:1750768055.02479s req_ids:[8] +DEBUG 06-24 20:27:35 [manager.py:391] +ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:216.0806655883789ms total_cost_time:216.1235809326172ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11623 prompt_cache_len:5151 prompt_cache_ratio:0.4431730190140239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 +DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:35 [batch.py:51] router release req id 8 +INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10741233825683594 s +INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10944914817810059 s +DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=185788916005317189978595667364847927825, time:1750768055.244312s req_ids:[8] +DEBUG 06-24 20:27:35 [manager.py:391] +ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:210.98756790161133ms total_cost_time:211.0297679901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11624 prompt_cache_len:5151 prompt_cache_ratio:0.4431348933241569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 +DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:35 [batch.py:51] router release req id 8 +INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10820126533508301 s +INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.11024236679077148 s +DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=314519376579015344753851039302029320004, time:1750768055.4617643s req_ids:[8] +DEBUG 06-24 20:27:35 [manager.py:391] +ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:210.41393280029297ms total_cost_time:210.45923233032227ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11625 prompt_cache_len:5151 prompt_cache_ratio:0.44309677419354837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 +DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:35 [batch.py:51] router release req id 8 +INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s +INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s +DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=28589246543198290154796260264159352669, time:1750768055.676811s req_ids:[8] +DEBUG 06-24 20:27:35 [manager.py:391] +ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:209.02037620544434ms total_cost_time:209.06424522399902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11626 prompt_cache_len:5151 prompt_cache_ratio:0.4430586616205058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 +DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:35 [batch.py:51] router release req id 8 +INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10779094696044922 s +INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10985803604125977 s +DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=278139419228630883080986344573144226336, time:1750768055.8920612s req_ids:[8] +DEBUG 06-24 20:27:35 [manager.py:391] +ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:376.01375579833984ms total_cost_time:376.05762481689453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11627 prompt_cache_len:5151 prompt_cache_ratio:0.4430205556033371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 +DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:36 [batch.py:51] router release req id 8 +INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.10822272300720215 s +INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11024594306945801 s +DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=75417688038995294896801884289730820000, time:1750768056.272834s req_ids:[8] +DEBUG 06-24 20:27:36 [manager.py:391] +ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:208.0078125ms total_cost_time:208.05120468139648ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11628 prompt_cache_len:5151 prompt_cache_ratio:0.44298245614035087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 +DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:36 [batch.py:51] router release req id 8 +INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.1085054874420166 s +INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11049056053161621 s +DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=197132007653966627037539083395863976809, time:1750768056.4887707s req_ids:[8] +DEBUG 06-24 20:27:36 [manager.py:391] +ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:206.85338973999023ms total_cost_time:206.8953514099121ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11629 prompt_cache_len:5151 prompt_cache_ratio:0.4429443632298564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 +DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:36 [batch.py:51] router release req id 8 +INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.10856413841247559 s +INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11060714721679688 s +DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=17709891307908481128429084620117407001, time:1750768056.7017605s req_ids:[8] +DEBUG 06-24 20:27:36 [manager.py:391] +ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:208.68778228759766ms total_cost_time:208.73236656188965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11630 prompt_cache_len:5151 prompt_cache_ratio:0.4429062768701634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 +DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:36 [batch.py:51] router release req id 8 +INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.10807538032531738 s +INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11016345024108887 s +DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=141985797526126276629775214750975334031, time:1750768056.9170542s req_ids:[8] +DEBUG 06-24 20:27:36 [manager.py:391] +ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:205.83724975585938ms total_cost_time:205.88159561157227ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11631 prompt_cache_len:5151 prompt_cache_ratio:0.44286819705958214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 +DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:37 [batch.py:51] router release req id 8 +INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.10785746574401855 s +INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.1099550724029541 s +DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=150568358759909845589863602091934173328, time:1750768057.1306329s req_ids:[8] +DEBUG 06-24 20:27:37 [manager.py:391] +ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:203.2313346862793ms total_cost_time:203.2756805419922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11632 prompt_cache_len:5151 prompt_cache_ratio:0.44283012379642367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 +DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:37 [batch.py:51] router release req id 8 +INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.10842752456665039 s +INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.11037206649780273 s +DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=255468635855261129336253297370492248097, time:1750768057.3393357s req_ids:[8] +DEBUG 06-24 20:27:37 [manager.py:391] +DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:207.0162296295166ms total_cost_time:207.0600986480713ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11633 prompt_cache_len:5151 prompt_cache_ratio:0.4427920570789994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 +DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:37 [batch.py:51] router release req id 8 +INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.10868239402770996 s +INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.1106572151184082 s +DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=205861260231369315554372277450926284052, time:1750768057.5556757s req_ids:[8] +DEBUG 06-24 20:27:37 [manager.py:391] +ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:212.52751350402832ms total_cost_time:212.5718593597412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11634 prompt_cache_len:5151 prompt_cache_ratio:0.44275399690562145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 +DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:37 [batch.py:51] router release req id 8 +INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.2084054946899414 s +INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.21020102500915527 s +DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=61193095940179990327734526297592337342, time:1750768057.9048717s req_ids:[8] +DEBUG 06-24 20:27:37 [manager.py:391] +ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:319.77248191833496ms total_cost_time:319.81778144836426ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11635 prompt_cache_len:5151 prompt_cache_ratio:0.4427159432746025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 +DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:38 [batch.py:51] router release req id 8 +INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.1077573299407959 s +INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.10983848571777344 s +DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=269947313797313670973089204710357317795, time:1750768058.097356s req_ids:[8] +DEBUG 06-24 20:27:38 [manager.py:391] +ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:201.41363143920898ms total_cost_time:201.45726203918457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11636 prompt_cache_len:5151 prompt_cache_ratio:0.44267789618425574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 +DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:38 [batch.py:51] router release req id 8 +INFO 06-24 20:27:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.1074681282043457 s +INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.10943865776062012 s +DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=111155728540221233148814307519971143076, time:1750768058.3056908s req_ids:[8] +DEBUG 06-24 20:27:38 [manager.py:391] +ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:205.74712753295898ms total_cost_time:205.78980445861816ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11637 prompt_cache_len:5151 prompt_cache_ratio:0.4426398556328951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 +DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:38 [batch.py:51] router release req id 8 +INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.10755538940429688 s +INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.10971379280090332 s +DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=255039111609182138701053776779626005196, time:1750768058.5166001s req_ids:[8] +DEBUG 06-24 20:27:38 [manager.py:391] +ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:207.78203010559082ms total_cost_time:207.8251838684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11638 prompt_cache_len:5151 prompt_cache_ratio:0.4426018216188349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 +DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:38 [batch.py:51] router release req id 8 +INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.10800337791442871 s +INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.1100623607635498 s +DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=15185878056657217581301681413855575314, time:1750768058.731673s req_ids:[8] +DEBUG 06-24 20:27:38 [manager.py:391] +ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:209.96713638305664ms total_cost_time:210.01219749450684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11639 prompt_cache_len:5151 prompt_cache_ratio:0.44256379414039004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 +DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:38 [batch.py:51] router release req id 8 +INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.10798001289367676 s +INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.1100914478302002 s +DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=139004765710079065135606118722259411212, time:1750768058.9483297s req_ids:[8] +DEBUG 06-24 20:27:38 [manager.py:391] +ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:210.05606651306152ms total_cost_time:210.10184288024902ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11640 prompt_cache_len:5151 prompt_cache_ratio:0.44252577319587627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 +DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:39 [batch.py:51] router release req id 8 +INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10822701454162598 s +INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.1101372241973877 s +DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=274473753694687324176758882147147889501, time:1750768059.1711378s req_ids:[8] +DEBUG 06-24 20:27:39 [manager.py:391] +ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:372.12538719177246ms total_cost_time:372.17044830322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11641 prompt_cache_len:5151 prompt_cache_ratio:0.44248775878360963 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 +DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:39 [batch.py:51] router release req id 8 +INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s +INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.11091160774230957 s +DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=80492152616491568875311284530108596007, time:1750768059.542152s req_ids:[8] +DEBUG 06-24 20:27:39 [manager.py:391] +ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:207.74126052856445ms total_cost_time:207.78465270996094ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11642 prompt_cache_len:5151 prompt_cache_ratio:0.4424497509019069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 +DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:39 [batch.py:51] router release req id 8 +INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10876297950744629 s +INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.11085271835327148 s +DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=336383143086348206744468767669111793819, time:1750768059.7568507s req_ids:[8] +DEBUG 06-24 20:27:39 [manager.py:391] +ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:202.7606964111328ms total_cost_time:202.78167724609375ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11643 prompt_cache_len:5151 prompt_cache_ratio:0.4424117495490853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 +DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:39 [batch.py:51] router release req id 8 +INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10585665702819824 s +INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.10787177085876465 s +DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=79193535528960203107071687578790542104, time:1750768059.9663005s req_ids:[8] +DEBUG 06-24 20:27:39 [manager.py:391] +ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:205.24168014526367ms total_cost_time:205.2628993988037ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:11644 prompt_cache_len:5151 prompt_cache_ratio:0.44237375472346274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 +DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:40 [batch.py:51] router release req id 8 +INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.10426092147827148 s +INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.10619974136352539 s +DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=295558104385778927500801051260715079703, time:1750768060.1757567s req_ids:[8] +DEBUG 06-24 20:27:40 [manager.py:391] +ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:211.53926849365234ms total_cost_time:211.5957736968994ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11645 prompt_cache_len:5151 prompt_cache_ratio:0.44233576642335765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 +DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:40 [batch.py:51] router release req id 8 +INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.10790348052978516 s +INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.10982537269592285 s +DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=280646328702124015446546387565036822621, time:1750768060.3915617s req_ids:[8] +DEBUG 06-24 20:27:40 [manager.py:391] +ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:208.7557315826416ms total_cost_time:208.8010311126709ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11646 prompt_cache_len:5151 prompt_cache_ratio:0.44229778464708913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 +DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:40 [batch.py:51] router release req id 8 +INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.11104679107666016 s +INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.11320042610168457 s +DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=62268459116684508482025122141809193249, time:1750768060.6072395s req_ids:[8] +DEBUG 06-24 20:27:40 [manager.py:391] +ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:203.5071849822998ms total_cost_time:203.5524845123291ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11647 prompt_cache_len:5151 prompt_cache_ratio:0.4422598093929767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 +DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:40 [batch.py:51] router release req id 8 +INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.10855650901794434 s +INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.11055922508239746 s +DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=61079119373762392419367431873905208071, time:1750768060.8220289s req_ids:[8] +DEBUG 06-24 20:27:40 [manager.py:391] +ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:376.1124610900879ms total_cost_time:376.1570453643799ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11648 prompt_cache_len:5151 prompt_cache_ratio:0.44222184065934067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 +DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:41 [batch.py:51] router release req id 8 +INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.10858726501464844 s +INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.11051774024963379 s +DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=299593004177238963388715736403263946494, time:1750768061.1999803s req_ids:[8] +DEBUG 06-24 20:27:41 [manager.py:391] +ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:209.85984802246094ms total_cost_time:209.90347862243652ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11649 prompt_cache_len:5151 prompt_cache_ratio:0.4421838784445017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 +DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:41 [batch.py:51] router release req id 8 +INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.1081840991973877 s +INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.1102457046508789 s +DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=316781868197800707089498737181832943379, time:1750768061.416961s req_ids:[8] +DEBUG 06-24 20:27:41 [manager.py:391] +ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:206.64405822753906ms total_cost_time:206.68721199035645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11650 prompt_cache_len:5151 prompt_cache_ratio:0.4421459227467811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 +DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:41 [batch.py:51] router release req id 8 +INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.10854816436767578 s +INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.1105797290802002 s +DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=270251469595641011912024118746084410715, time:1750768061.6294148s req_ids:[8] +DEBUG 06-24 20:27:41 [manager.py:391] +ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:208.46080780029297ms total_cost_time:208.50443840026855ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11651 prompt_cache_len:5151 prompt_cache_ratio:0.4421079735645009 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 +DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:41 [batch.py:51] router release req id 8 +INFO 06-24 20:27:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.10954117774963379 s +INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.11166214942932129 s +DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=204731399901107882064521222102199610353, time:1750768061.8463502s req_ids:[8] +DEBUG 06-24 20:27:41 [manager.py:391] +DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:210.57462692260742ms total_cost_time:210.62016487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11652 prompt_cache_len:5151 prompt_cache_ratio:0.4420700308959835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 +DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:41 [batch.py:51] router release req id 8 +INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.10795974731445312 s +INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.10989904403686523 s +DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=257933142827629647630890189838073963588, time:1750768062.0591762s req_ids:[8] +DEBUG 06-24 20:27:42 [manager.py:391] +ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:207.16476440429688ms total_cost_time:207.20720291137695ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11653 prompt_cache_len:5151 prompt_cache_ratio:0.4420320947395521 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 +DEBUG 06-24 20:27:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:42 [batch.py:51] router release req id 8 +INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.10914087295532227 s +INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.1111302375793457 s +DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=17937069243415260362399538069278821502, time:1750768062.2755115s req_ids:[8] +DEBUG 06-24 20:27:42 [manager.py:391] +ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 first_token_cost:210.28399467468262ms total_cost_time:210.32953262329102ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11654 prompt_cache_len:5151 prompt_cache_ratio:0.4419941650935301 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 +DEBUG 06-24 20:27:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:42 [batch.py:51] router release req id 8 +INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.10788989067077637 s +INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.10980486869812012 s +DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=312647226188170999033993674702095941198, time:1750768062.4916286s req_ids:[8] +DEBUG 06-24 20:27:42 [manager.py:391] +ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 first_token_cost:207.88979530334473ms total_cost_time:207.9324722290039ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11655 prompt_cache_len:5151 prompt_cache_ratio:0.44195624195624195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 +DEBUG 06-24 20:27:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:42 [batch.py:51] router release req id 8 +INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.30853700637817383 s +INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.31052422523498535 s +DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=299740526944787088239288135503753498887, time:1750768062.9138064s req_ids:[8] +DEBUG 06-24 20:27:42 [manager.py:391] +ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 first_token_cost:407.29713439941406ms total_cost_time:407.34100341796875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11656 prompt_cache_len:5151 prompt_cache_ratio:0.44191832532601233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 +DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:43 [batch.py:51] router release req id 8 +INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10860013961791992 s +INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.11050987243652344 s +DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=337446504550859803029660026187667420749, time:1750768063.1204126s req_ids:[8] +DEBUG 06-24 20:27:43 [manager.py:391] +ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:207.14616775512695ms total_cost_time:207.19051361083984ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11657 prompt_cache_len:5151 prompt_cache_ratio:0.44188041520116667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 +DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:43 [batch.py:51] router release req id 8 +INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10731959342956543 s +INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.10938763618469238 s +DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=130485175085857028088939489140531399497, time:1750768063.3351586s req_ids:[8] +DEBUG 06-24 20:27:43 [manager.py:391] +ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:210.31713485717773ms total_cost_time:210.36338806152344ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11658 prompt_cache_len:5151 prompt_cache_ratio:0.4418425115800309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 +DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:43 [batch.py:51] router release req id 8 +INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.1075592041015625 s +INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.10967016220092773 s +DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=11096509143887670593093691096838479192, time:1750768063.551984s req_ids:[8] +DEBUG 06-24 20:27:43 [manager.py:391] +ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:209.15889739990234ms total_cost_time:209.20157432556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11659 prompt_cache_len:5151 prompt_cache_ratio:0.44180461446093144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 +DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:43 [batch.py:51] router release req id 8 +INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10948467254638672 s +INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.11144590377807617 s +DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=122142627285848471446769339946885118300, time:1750768063.7694595s req_ids:[8] +DEBUG 06-24 20:27:43 [manager.py:391] +ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:208.31775665283203ms total_cost_time:208.36186408996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11660 prompt_cache_len:5151 prompt_cache_ratio:0.44176672384219556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 +DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:43 [batch.py:51] router release req id 8 +INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10779523849487305 s +INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.10981059074401855 s +DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=314066956482224143493314398315864182556, time:1750768063.9817142s req_ids:[8] +DEBUG 06-24 20:27:43 [manager.py:391] +ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:208.05811882019043ms total_cost_time:208.1010341644287ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11661 prompt_cache_len:5151 prompt_cache_ratio:0.44172883972215077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 +DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:44 [batch.py:51] router release req id 8 +INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.10868573188781738 s +INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.11003851890563965 s +DEBUG 06-24 20:27:44 [manager.py:391] Prefill Batch: batch_id=43716856319837955467461962905487194665, time:1750768064.198382s req_ids:[8] +DEBUG 06-24 20:27:44 [manager.py:391] +DEBUG 06-24 20:27:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 49567.098 tokens/s +DEBUG 06-24 20:27:44 [stats.py:37] Avg prompt tokens throughput: 49558.683 tokens/s +DEBUG 06-24 20:27:44 [stats.py:37] Avg generate tokens throughput: 8.416 tokens/s +ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:367.81787872314453ms total_cost_time:367.8615093231201ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11662 prompt_cache_len:5151 prompt_cache_ratio:0.44169096209912534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 +DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:44 [batch.py:51] router release req id 8 +INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.10796308517456055 s +INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.10994935035705566 s +DEBUG 06-24 20:27:44 [manager.py:391] Prefill Batch: batch_id=265482422708807595817312816396387340617, time:1750768064.5722618s req_ids:[8] +DEBUG 06-24 20:27:44 [manager.py:391] +ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:210.59942245483398ms total_cost_time:210.6456756591797ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11663 prompt_cache_len:5151 prompt_cache_ratio:0.44165309097144817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 +DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:44 [batch.py:51] router release req id 8 +INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.1072998046875 s +INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.10930252075195312 s +DEBUG 06-24 20:27:44 [manager.py:391] Prefill Batch: batch_id=140715960182771354796120416256631862347, time:1750768064.7888014s req_ids:[8] +DEBUG 06-24 20:27:44 [manager.py:391] +ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:209.71083641052246ms total_cost_time:209.7342014312744ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:11664 prompt_cache_len:5151 prompt_cache_ratio:0.44161522633744854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 +DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:44 [batch.py:51] router release req id 8 +INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.10576009750366211 s +INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.10772371292114258 s +DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=38939872212084040627938081887060034009, time:1750768065.0060368s req_ids:[8] +DEBUG 06-24 20:27:45 [manager.py:391] +ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:213.16981315612793ms total_cost_time:213.21368217468262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11665 prompt_cache_len:5151 prompt_cache_ratio:0.4415773681954565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 +DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:45 [batch.py:51] router release req id 8 +INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.10756874084472656 s +INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.10960698127746582 s +DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=121522200574996270201540644699303111517, time:1750768065.221665s req_ids:[8] +DEBUG 06-24 20:27:45 [manager.py:391] +ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:208.81175994873047ms total_cost_time:208.85539054870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11666 prompt_cache_len:5151 prompt_cache_ratio:0.4415395165438025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 +DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:45 [batch.py:51] router release req id 8 +INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.10688066482543945 s +INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.10885500907897949 s +DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=296434976523445833842283782897358076960, time:1750768065.4510627s req_ids:[8] +DEBUG 06-24 20:27:45 [manager.py:391] +ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:227.17595100402832ms total_cost_time:227.2202968597412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11667 prompt_cache_len:5151 prompt_cache_ratio:0.4415016713808177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 +DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:45 [batch.py:51] router release req id 8 +INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.1093912124633789 s +INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.11139631271362305 s +DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=16062869560735165174523373097118204279, time:1750768065.6721177s req_ids:[8] +DEBUG 06-24 20:27:45 [manager.py:391] +ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:212.32008934020996ms total_cost_time:212.36515045166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11668 prompt_cache_len:5151 prompt_cache_ratio:0.44146383270483375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 +DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:45 [batch.py:51] router release req id 8 +INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.10849952697753906 s +INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.11048078536987305 s +DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=322430848760108596701433256098931005177, time:1750768065.8877454s req_ids:[8] +DEBUG 06-24 20:27:45 [manager.py:391] +ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:374.65786933898926ms total_cost_time:374.70197677612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11669 prompt_cache_len:5151 prompt_cache_ratio:0.4414260005141829 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 +DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:46 [batch.py:51] router release req id 8 +INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s +INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.11054825782775879 s +DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=175355592149943647442503956285560439579, time:1750768066.2710464s req_ids:[8] +DEBUG 06-24 20:27:46 [manager.py:391] +ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:211.287260055542ms total_cost_time:211.3327980041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11670 prompt_cache_len:5151 prompt_cache_ratio:0.44138817480719794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 +DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:46 [batch.py:51] router release req id 8 +INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10736536979675293 s +INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.1094813346862793 s +DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=178275448747769996930753701572404118296, time:1750768066.4893088s req_ids:[8] +DEBUG 06-24 20:27:46 [manager.py:391] +ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:210.53099632263184ms total_cost_time:210.57605743408203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11671 prompt_cache_len:5151 prompt_cache_ratio:0.4413503555822123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 +DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:46 [batch.py:51] router release req id 8 +INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10888814926147461 s +INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.11098051071166992 s +DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=104647567097353884675832059871995132250, time:1750768066.7051613s req_ids:[8] +DEBUG 06-24 20:27:46 [manager.py:391] +ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:209.43641662597656ms total_cost_time:209.47909355163574ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11672 prompt_cache_len:5151 prompt_cache_ratio:0.44131254283756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 +DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:46 [batch.py:51] router release req id 8 +INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10856199264526367 s +INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.1105494499206543 s +DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=286136379708286627664648265022458877770, time:1750768066.921517s req_ids:[8] +DEBUG 06-24 20:27:46 [manager.py:391] +ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:209.0163230895996ms total_cost_time:209.0601921081543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11673 prompt_cache_len:5151 prompt_cache_ratio:0.44127473657157545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 +INFO 06-24 20:27:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:47 [batch.py:51] router release req id 8 +INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.10799169540405273 s +INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.11010169982910156 s +DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=333092236254768544864245833229603047152, time:1750768067.1354423s req_ids:[8] +DEBUG 06-24 20:27:47 [manager.py:391] +ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:207.83305168151855ms total_cost_time:207.87668228149414ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11674 prompt_cache_len:5151 prompt_cache_ratio:0.4412369367825938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 +DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:47 [batch.py:51] router release req id 8 +INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.10848402976989746 s +INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.11039614677429199 s +DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=242066419917160546179103428843145465146, time:1750768067.3528771s req_ids:[8] +DEBUG 06-24 20:27:47 [manager.py:391] +ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:210.77370643615723ms total_cost_time:210.81948280334473ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11675 prompt_cache_len:5151 prompt_cache_ratio:0.4411991434689507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 +DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:47 [batch.py:51] router release req id 8 +INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.10789918899536133 s +INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.10973668098449707 s +DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=30946175742304727602675341187884965927, time:1750768067.570659s req_ids:[8] +DEBUG 06-24 20:27:47 [manager.py:391] +ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:379.8513412475586ms total_cost_time:379.8949718475342ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11676 prompt_cache_len:5151 prompt_cache_ratio:0.44116135662898254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 +DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:47 [batch.py:51] router release req id 8 +INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.1092216968536377 s +INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.11127710342407227 s +DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=73313771434464797223728399194716805674, time:1750768067.9539516s req_ids:[8] +DEBUG 06-24 20:27:47 [manager.py:391] +DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:209.05041694641113ms total_cost_time:209.09523963928223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11677 prompt_cache_len:5151 prompt_cache_ratio:0.44112357626102594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 +DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:48 [batch.py:51] router release req id 8 +INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.10930585861206055 s +INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.11123347282409668 s +DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=288001262650343206408843595607394306170, time:1750768068.1796744s req_ids:[8] +DEBUG 06-24 20:27:48 [manager.py:391] +ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:220.64876556396484ms total_cost_time:220.69406509399414ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11678 prompt_cache_len:5151 prompt_cache_ratio:0.4410858023634184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 +DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:48 [batch.py:51] router release req id 8 +INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.1083071231842041 s +INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.11049509048461914 s +DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=26752544059038432113733400857799309496, time:1750768068.3982942s req_ids:[8] +DEBUG 06-24 20:27:48 [manager.py:391] +ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:211.79533004760742ms total_cost_time:211.84110641479492ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11679 prompt_cache_len:5151 prompt_cache_ratio:0.4410480349344978 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 +DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:48 [batch.py:51] router release req id 8 +INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.10710597038269043 s +INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s +DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=217638077499561130651481766754466672082, time:1750768068.615498s req_ids:[8] +DEBUG 06-24 20:27:48 [manager.py:391] +ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:217.72456169128418ms total_cost_time:217.76986122131348ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11680 prompt_cache_len:5151 prompt_cache_ratio:0.44101027397260273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 +DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:48 [batch.py:51] router release req id 8 +INFO 06-24 20:27:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.1086277961730957 s +INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.11078357696533203 s +DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=183484381546618447873154783003017080773, time:1750768068.842046s req_ids:[8] +DEBUG 06-24 20:27:48 [manager.py:391] +ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:216.7809009552002ms total_cost_time:216.80068969726562ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11681 prompt_cache_len:5151 prompt_cache_ratio:0.44097251947607224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 +DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:48 [batch.py:51] router release req id 8 +INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10581374168395996 s +INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.10769104957580566 s +DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=166073478561886356450542614144698214248, time:1750768069.06763s req_ids:[8] +DEBUG 06-24 20:27:49 [manager.py:391] +ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:213.01507949829102ms total_cost_time:213.06085586547852ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11682 prompt_cache_len:5151 prompt_cache_ratio:0.440934771443246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 +DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:49 [batch.py:51] router release req id 8 +INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10723304748535156 s +INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.10907983779907227 s +DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=227622986544389012314626857110713569781, time:1750768069.2822154s req_ids:[8] +DEBUG 06-24 20:27:49 [manager.py:391] +ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:377.5901794433594ms total_cost_time:377.63381004333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11683 prompt_cache_len:5151 prompt_cache_ratio:0.44089702987246426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 +DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:49 [batch.py:51] router release req id 8 +DEBUG 06-24 20:27:49 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:49 [manager.py:283] +DEBUG 06-24 20:27:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:49 [manager.py:284] +INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10852766036987305 s +INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.11050963401794434 s +DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=191548575499890481294140533313106696880, time:1750768069.6664155s req_ids:[8] +DEBUG 06-24 20:27:49 [manager.py:391] +ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:202.59404182434082ms total_cost_time:202.6379108428955ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11684 prompt_cache_len:5151 prompt_cache_ratio:0.4408592947620678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 +DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:49 [batch.py:51] router release req id 8 +INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10742926597595215 s +INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.10953426361083984 s +DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=43187319509054787744039514623587645003, time:1750768069.872239s req_ids:[8] +DEBUG 06-24 20:27:49 [manager.py:391] +ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:200.2246379852295ms total_cost_time:200.26874542236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11685 prompt_cache_len:5151 prompt_cache_ratio:0.44082156611039797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 +DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:49 [batch.py:51] router release req id 8 +INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10787844657897949 s +INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.10983443260192871 s +DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=298384835260827150318491690840360067745, time:1750768070.0794733s req_ids:[8] +DEBUG 06-24 20:27:50 [manager.py:391] +ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:206.43162727355957ms total_cost_time:206.47644996643066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11686 prompt_cache_len:5151 prompt_cache_ratio:0.44078384391579667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 +DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:50 [batch.py:51] router release req id 8 +INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10764265060424805 s +INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.10953140258789062 s +DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=225063350445177027224107117064696904193, time:1750768070.2917519s req_ids:[8] +DEBUG 06-24 20:27:50 [manager.py:391] +ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:207.49378204345703ms total_cost_time:207.54003524780273ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11687 prompt_cache_len:5151 prompt_cache_ratio:0.44074612817660647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 +DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:50 [batch.py:51] router release req id 8 +INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10875129699707031 s +INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s +DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=137225826317094076687923812881092625321, time:1750768070.506479s req_ids:[8] +DEBUG 06-24 20:27:50 [manager.py:391] +ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:208.59742164611816ms total_cost_time:208.64105224609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11688 prompt_cache_len:5151 prompt_cache_ratio:0.4407084188911704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 +DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:50 [batch.py:51] router release req id 8 +INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s +INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.1105647087097168 s +DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=80187774504198863934088397354980675605, time:1750768070.7215064s req_ids:[8] +DEBUG 06-24 20:27:50 [manager.py:391] +ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:206.7418098449707ms total_cost_time:206.7849636077881ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11689 prompt_cache_len:5151 prompt_cache_ratio:0.44067071605783215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 +DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:50 [batch.py:51] router release req id 8 +INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10505175590515137 s +INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.10721921920776367 s +DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=147825844061204254257736059546139281692, time:1750768070.9347677s req_ids:[8] +DEBUG 06-24 20:27:50 [manager.py:391] +ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:384.75513458251953ms total_cost_time:384.8001956939697ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11690 prompt_cache_len:5151 prompt_cache_ratio:0.44063301967493584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 +DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:51 [batch.py:51] router release req id 8 +INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.10833454132080078 s +INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.11092710494995117 s +DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=47780690576181283297400113084221155824, time:1750768071.3247705s req_ids:[8] +DEBUG 06-24 20:27:51 [manager.py:391] +ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:205.37257194519043ms total_cost_time:205.4157257080078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11691 prompt_cache_len:5151 prompt_cache_ratio:0.44059532974082627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 +DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:51 [batch.py:51] router release req id 8 +INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.1075129508972168 s +INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.10955381393432617 s +DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=192028328961669777164971296976631988864, time:1750768071.5369785s req_ids:[8] +DEBUG 06-24 20:27:51 [manager.py:391] +ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:207.37075805664062ms total_cost_time:207.41510391235352ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11692 prompt_cache_len:5151 prompt_cache_ratio:0.4405576462538488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 +DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:51 [batch.py:51] router release req id 8 +INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.10739350318908691 s +INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.10927605628967285 s +DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=226644578963362568769550394731141433005, time:1750768071.7513175s req_ids:[8] +DEBUG 06-24 20:27:51 [manager.py:391] +ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:209.96475219726562ms total_cost_time:210.00981330871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11693 prompt_cache_len:5151 prompt_cache_ratio:0.44051996921234926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 +DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:51 [batch.py:51] router release req id 8 +INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.10618782043457031 s +INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.10780477523803711 s +DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=297762266278033280858054259708847652046, time:1750768071.9663367s req_ids:[8] +DEBUG 06-24 20:27:51 [manager.py:391] +ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:199.5100975036621ms total_cost_time:199.5542049407959ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11694 prompt_cache_len:5151 prompt_cache_ratio:0.4404822986146742 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 +DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:52 [batch.py:51] router release req id 8 +INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.10714960098266602 s +INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.10897254943847656 s +DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=31195497378965806952722866384171260587, time:1750768072.1725655s req_ids:[8] +DEBUG 06-24 20:27:52 [manager.py:391] +ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:203.4473419189453ms total_cost_time:203.4924030303955ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11695 prompt_cache_len:5151 prompt_cache_ratio:0.4404446344591706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 +DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:52 [batch.py:51] router release req id 8 +INFO 06-24 20:27:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.1074531078338623 s +INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.10943388938903809 s +DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=41961555260971651765030443494808349220, time:1750768072.382983s req_ids:[8] +DEBUG 06-24 20:27:52 [manager.py:391] +ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:206.2997817993164ms total_cost_time:206.3436508178711ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11696 prompt_cache_len:5151 prompt_cache_ratio:0.44040697674418605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 +DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:52 [batch.py:51] router release req id 8 +INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.10802459716796875 s +INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.10960602760314941 s +DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=259764074609678528213901087170832004807, time:1750768072.595922s req_ids:[8] +DEBUG 06-24 20:27:52 [manager.py:391] +ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:375.8530616760254ms total_cost_time:375.90718269348145ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:11697 prompt_cache_len:5151 prompt_cache_ratio:0.44036932546806873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 +DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:52 [batch.py:51] router release req id 8 +INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.10901856422424316 s +INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.11093354225158691 s +DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=924113102782929902878716484726148624, time:1750768072.976297s req_ids:[8] +DEBUG 06-24 20:27:52 [manager.py:391] +ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:207.92508125305176ms total_cost_time:207.96895027160645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11698 prompt_cache_len:5151 prompt_cache_ratio:0.4403316806291674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 +DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:53 [batch.py:51] router release req id 8 +INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10855627059936523 s +INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.11006307601928711 s +DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=26583854484608831339809600195653965698, time:1750768073.1925404s req_ids:[8] +DEBUG 06-24 20:27:53 [manager.py:391] +INFO 06-24 20:27:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:27:53 [statics_utils.py:24] mean first cost: 228.82940530744267 ms +INFO 06-24 20:27:53 [statics_utils.py:24] mean per token cost: 0.06352233204585778 ms +ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:207.9448699951172ms total_cost_time:207.98921585083008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11699 prompt_cache_len:5151 prompt_cache_ratio:0.4402940422258313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 +DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:53 [batch.py:51] router release req id 8 +INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10805463790893555 s +INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.11008477210998535 s +DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=172343902097966924872925216147226074346, time:1750768073.4061286s req_ids:[8] +DEBUG 06-24 20:27:53 [manager.py:391] +ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:211.3957405090332ms total_cost_time:211.4405632019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11700 prompt_cache_len:5151 prompt_cache_ratio:0.44025641025641027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 +DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:53 [batch.py:51] router release req id 8 +INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s +INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s +DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=225758045825775236494367558165643973930, time:1750768073.62264s req_ids:[8] +DEBUG 06-24 20:27:53 [manager.py:391] +ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:204.5266628265381ms total_cost_time:204.5729160308838ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11701 prompt_cache_len:5151 prompt_cache_ratio:0.44021878471925474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 +DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:53 [batch.py:51] router release req id 8 +INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10791921615600586 s +INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.10985159873962402 s +DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=141120055016717393029069260794318309936, time:1750768073.8354192s req_ids:[8] +DEBUG 06-24 20:27:53 [manager.py:391] +DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:210.10446548461914ms total_cost_time:210.14952659606934ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11702 prompt_cache_len:5151 prompt_cache_ratio:0.44018116561271575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 +DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:53 [batch.py:51] router release req id 8 +INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.10765218734741211 s +INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10961055755615234 s +DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=7351774205792844063141412301094091625, time:1750768074.0521193s req_ids:[8] +DEBUG 06-24 20:27:54 [manager.py:391] +ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:206.79521560668945ms total_cost_time:206.83956146240234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11703 prompt_cache_len:5151 prompt_cache_ratio:0.44014355293514484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 +DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:54 [batch.py:51] router release req id 8 +INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.1075749397277832 s +INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10939621925354004 s +DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=118458180301205669185265281303196868164, time:1750768074.263249s req_ids:[8] +DEBUG 06-24 20:27:54 [manager.py:391] +DEBUG 06-24 20:27:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 48762.511 tokens/s +DEBUG 06-24 20:27:54 [stats.py:37] Avg prompt tokens throughput: 48754.165 tokens/s +DEBUG 06-24 20:27:54 [stats.py:37] Avg generate tokens throughput: 8.346 tokens/s +ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:366.4867877960205ms total_cost_time:366.5306568145752ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11704 prompt_cache_len:5151 prompt_cache_ratio:0.44010594668489406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 +DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:54 [batch.py:51] router release req id 8 +INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.10773992538452148 s +INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10962176322937012 s +DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=322676427113483016485968396162406141022, time:1750768074.6353645s req_ids:[8] +DEBUG 06-24 20:27:54 [manager.py:391] +ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:209.87915992736816ms total_cost_time:209.92350578308105ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11705 prompt_cache_len:5151 prompt_cache_ratio:0.4400683468603161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 +DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:54 [batch.py:51] router release req id 8 +INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.10766053199768066 s +INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10933947563171387 s +DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=19948050275451180531787381997900946558, time:1750768074.8507817s req_ids:[8] +DEBUG 06-24 20:27:54 [manager.py:391] +ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:206.1176300048828ms total_cost_time:206.1624526977539ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11706 prompt_cache_len:5151 prompt_cache_ratio:0.4400307534597642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 +DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:54 [batch.py:51] router release req id 8 +INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10815119743347168 s +INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.11020803451538086 s +DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=283485054329856772655617090060025275979, time:1750768075.065433s req_ids:[8] +DEBUG 06-24 20:27:55 [manager.py:391] +ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:202.06379890441895ms total_cost_time:202.10647583007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11707 prompt_cache_len:5151 prompt_cache_ratio:0.4399931664815922 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 +DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:55 [batch.py:51] router release req id 8 +INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10750317573547363 s +INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.1095283031463623 s +DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=178332392696331114758401130756182138498, time:1750768075.274174s req_ids:[8] +DEBUG 06-24 20:27:55 [manager.py:391] +ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:202.8651237487793ms total_cost_time:202.9094696044922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11708 prompt_cache_len:5151 prompt_cache_ratio:0.4399555859241544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 +DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:55 [batch.py:51] router release req id 8 +INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10800290107727051 s +INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.10997557640075684 s +DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=154680509763442937881017367558262132366, time:1750768075.4838557s req_ids:[8] +DEBUG 06-24 20:27:55 [manager.py:391] +ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:207.11183547973633ms total_cost_time:207.1537971496582ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11709 prompt_cache_len:5151 prompt_cache_ratio:0.4399180117858058 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 +DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:55 [batch.py:51] router release req id 8 +INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10807204246520996 s +INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s +DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=125496945061914756773189028925257561314, time:1750768075.696993s req_ids:[8] +DEBUG 06-24 20:27:55 [manager.py:391] +ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:204.20193672180176ms total_cost_time:204.24532890319824ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11710 prompt_cache_len:5151 prompt_cache_ratio:0.4398804440649018 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 +DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:55 [batch.py:51] router release req id 8 +INFO 06-24 20:27:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10872483253479004 s +INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.11066985130310059 s +DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=137413808159458721799519624010656247786, time:1750768075.9059122s req_ids:[8] +DEBUG 06-24 20:27:55 [manager.py:391] +ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:359.12036895751953ms total_cost_time:359.1644763946533ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11711 prompt_cache_len:5151 prompt_cache_ratio:0.4398428827597985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 +DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:56 [batch.py:51] router release req id 8 +INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10780763626098633 s +INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s +DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=291802753025922360764161142604911838441, time:1750768076.2741618s req_ids:[8] +DEBUG 06-24 20:27:56 [manager.py:391] +ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:210.280179977417ms total_cost_time:210.30688285827637ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:11712 prompt_cache_len:5151 prompt_cache_ratio:0.43980532786885246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 +DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:56 [batch.py:51] router release req id 8 +INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10703682899475098 s +INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.10864973068237305 s +DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=8536152989871503548453711768574260706, time:1750768076.4896755s req_ids:[8] +DEBUG 06-24 20:27:56 [manager.py:391] +ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:209.29598808288574ms total_cost_time:209.34057235717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11713 prompt_cache_len:5151 prompt_cache_ratio:0.4397677793904209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 +DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:56 [batch.py:51] router release req id 8 +INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10812187194824219 s +INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.11020874977111816 s +DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=133367602897878706255786749831695528410, time:1750768076.7065353s req_ids:[8] +DEBUG 06-24 20:27:56 [manager.py:391] +ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:206.618070602417ms total_cost_time:206.6507339477539ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:11714 prompt_cache_len:5151 prompt_cache_ratio:0.43973023732286154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 +DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:56 [batch.py:51] router release req id 8 +INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10730767250061035 s +INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.1085960865020752 s +DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=221753444052221687053046271775160557634, time:1750768076.9187286s req_ids:[8] +DEBUG 06-24 20:27:56 [manager.py:391] +ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:167.38629341125488ms total_cost_time:167.43087768554688ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11715 prompt_cache_len:5151 prompt_cache_ratio:0.43969270166453267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 +DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:57 [batch.py:51] router release req id 8 +INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.1071317195892334 s +INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.10866498947143555 s +DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=123114096874721942607245944560030692846, time:1750768077.0925298s req_ids:[8] +DEBUG 06-24 20:27:57 [manager.py:391] +ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:167.5724983215332ms total_cost_time:167.6158905029297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11716 prompt_cache_len:5151 prompt_cache_ratio:0.4396551724137931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 +DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:57 [batch.py:51] router release req id 8 +INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.10835003852844238 s +INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s +DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=298595673115991259221302050929734639141, time:1750768077.2649608s req_ids:[8] +DEBUG 06-24 20:27:57 [manager.py:391] +ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:196.00868225097656ms total_cost_time:196.05088233947754ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11717 prompt_cache_len:5151 prompt_cache_ratio:0.4396176495690023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 +DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:57 [batch.py:51] router release req id 8 +INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.10825037956237793 s +INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.11015987396240234 s +DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=109180299173523834166190172675225781427, time:1750768077.4673362s req_ids:[8] +DEBUG 06-24 20:27:57 [manager.py:391] +ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:380.82122802734375ms total_cost_time:380.86748123168945ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11718 prompt_cache_len:5151 prompt_cache_ratio:0.43958013312852023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 +DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:57 [batch.py:51] router release req id 8 +INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.10854721069335938 s +INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.11049175262451172 s +DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=169985071211483921028627489385777737921, time:1750768077.8557432s req_ids:[8] +DEBUG 06-24 20:27:57 [manager.py:391] +ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:211.68112754821777ms total_cost_time:211.70282363891602ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:11719 prompt_cache_len:5151 prompt_cache_ratio:0.4395426230907074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 +DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:57 [batch.py:51] router release req id 8 +INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10806989669799805 s +INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10998415946960449 s +DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=17175807067889291154752751360815925362, time:1750768078.073327s req_ids:[8] +DEBUG 06-24 20:27:58 [manager.py:391] +ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:211.0602855682373ms total_cost_time:211.1055850982666ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11720 prompt_cache_len:5151 prompt_cache_ratio:0.4395051194539249 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 +DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:58 [batch.py:51] router release req id 8 +INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10856151580810547 s +INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s +DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=268431165464257031607871144566536213502, time:1750768078.2901986s req_ids:[8] +DEBUG 06-24 20:27:58 [manager.py:391] +ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:211.08317375183105ms total_cost_time:211.12799644470215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11721 prompt_cache_len:5151 prompt_cache_ratio:0.4394676222165344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 +DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:58 [batch.py:51] router release req id 8 +INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10759711265563965 s +INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10975933074951172 s +DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=300250570234671355712060741176340386179, time:1750768078.506851s req_ids:[8] +DEBUG 06-24 20:27:58 [manager.py:391] +ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:209.4857692718506ms total_cost_time:209.5315456390381ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11722 prompt_cache_len:5151 prompt_cache_ratio:0.4394301313768981 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 +DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:58 [batch.py:51] router release req id 8 +INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10726428031921387 s +INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10916948318481445 s +DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=141265128740661020313837351670295099714, time:1750768078.7236798s req_ids:[8] +DEBUG 06-24 20:27:58 [manager.py:391] +ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:210.58893203735352ms total_cost_time:210.6337547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11723 prompt_cache_len:5151 prompt_cache_ratio:0.4393926469333788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 +DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:58 [batch.py:51] router release req id 8 +INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10782718658447266 s +INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10967826843261719 s +DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=247539787347195394274234829680720956274, time:1750768078.9413345s req_ids:[8] +DEBUG 06-24 20:27:58 [manager.py:391] +ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:369.11535263061523ms total_cost_time:369.159460067749ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11724 prompt_cache_len:5151 prompt_cache_ratio:0.43935516888433984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 +DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:59 [batch.py:51] router release req id 8 +INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10908937454223633 s +INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.11113405227661133 s +DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=82746934489195862702655088086284734950, time:1750768079.3170936s req_ids:[8] +DEBUG 06-24 20:27:59 [manager.py:391] +ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:207.8683376312256ms total_cost_time:207.91244506835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11725 prompt_cache_len:5151 prompt_cache_ratio:0.439317697228145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 +DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:59 [batch.py:51] router release req id 8 +INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10849928855895996 s +INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.1105189323425293 s +DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=180243610687250467102093286486070958357, time:1750768079.539834s req_ids:[8] +DEBUG 06-24 20:27:59 [manager.py:391] +ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:220.04008293151855ms total_cost_time:220.08609771728516ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11726 prompt_cache_len:5151 prompt_cache_ratio:0.4392802319631588 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 +DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:59 [batch.py:51] router release req id 8 +INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10826921463012695 s +INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.11030960083007812 s +DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=41800577272912925396776910926254179331, time:1750768079.7587826s req_ids:[8] +DEBUG 06-24 20:27:59 [manager.py:391] +DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:211.1492156982422ms total_cost_time:211.19213104248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11727 prompt_cache_len:5151 prompt_cache_ratio:0.43924277308774623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 +DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:27:59 [batch.py:51] router release req id 8 +INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10863304138183594 s +INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.11061239242553711 s +DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=193009151310181925319208914731340858143, time:1750768079.9938014s req_ids:[8] +DEBUG 06-24 20:27:59 [manager.py:391] +ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:233.74342918395996ms total_cost_time:233.78825187683105ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11728 prompt_cache_len:5151 prompt_cache_ratio:0.43920532060027284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 +DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:00 [batch.py:51] router release req id 8 +INFO 06-24 20:28:00 [manager.py:224] router recive req id 8 cost time 0.10854983329772949 s +INFO 06-24 20:28:00 [manager.py:68] detokenization recv req id 8 cost time 0.11054039001464844 s +DEBUG 06-24 20:28:00 [manager.py:391] Prefill Batch: batch_id=137931543481817374150475295196355490582, time:1750768080.2216394s req_ids:[8] +DEBUG 06-24 20:28:00 [manager.py:391] +ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:218.90759468078613ms total_cost_time:218.95146369934082ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11729 prompt_cache_len:5151 prompt_cache_ratio:0.43916787449910477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 +DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:00 [batch.py:51] router release req id 8 +INFO 06-24 20:28:00 [manager.py:224] router recive req id 8 cost time 0.10710811614990234 s +INFO 06-24 20:28:00 [manager.py:68] detokenization recv req id 8 cost time 0.10911822319030762 s +DEBUG 06-24 20:28:00 [manager.py:391] Prefill Batch: batch_id=16887711482166531621416952486146028622, time:1750768080.4509778s req_ids:[8] +DEBUG 06-24 20:28:00 [manager.py:391] +ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:222.98216819763184ms total_cost_time:223.02722930908203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11730 prompt_cache_len:5151 prompt_cache_ratio:0.4391304347826087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 +DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:00 [batch.py:51] router release req id 8 +INFO 06-24 20:28:00 [manager.py:224] router recive req id 8 cost time 0.10858631134033203 s +INFO 06-24 20:28:00 [manager.py:68] detokenization recv req id 8 cost time 0.11063623428344727 s +DEBUG 06-24 20:28:00 [manager.py:391] Prefill Batch: batch_id=219186160019658772655613434929116824641, time:1750768080.6715834s req_ids:[8] +DEBUG 06-24 20:28:00 [manager.py:391] +ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:377.87652015686035ms total_cost_time:377.92134284973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11731 prompt_cache_len:5151 prompt_cache_ratio:0.4390930014491518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 +DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:00 [batch.py:51] router release req id 8 +INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10775876045227051 s +INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.10984969139099121 s +DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=232963095576801606711003416079581040665, time:1750768081.054746s req_ids:[8] +DEBUG 06-24 20:28:01 [manager.py:391] +ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:208.6923122406006ms total_cost_time:208.73618125915527ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11732 prompt_cache_len:5151 prompt_cache_ratio:0.4390555744971019 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 +DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:01 [batch.py:51] router release req id 8 +INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10822010040283203 s +INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.11041378974914551 s +DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=22902933349116449031905180184210097322, time:1750768081.2850924s req_ids:[8] +DEBUG 06-24 20:28:01 [manager.py:391] +ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:225.97765922546387ms total_cost_time:226.02295875549316ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11733 prompt_cache_len:5151 prompt_cache_ratio:0.4390181539248274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 +DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:01 [batch.py:51] router release req id 8 +INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.1077873706817627 s +INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.10933446884155273 s +DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=302437762865757099324525960350452340073, time:1750768081.5047631s req_ids:[8] +DEBUG 06-24 20:28:01 [manager.py:391] +ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:209.60259437561035ms total_cost_time:209.64598655700684ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11734 prompt_cache_len:5151 prompt_cache_ratio:0.4389807397306971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 +DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:01 [batch.py:51] router release req id 8 +INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10709691047668457 s +INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.10917520523071289 s +DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=273676361814544893762249845273690597355, time:1750768081.721005s req_ids:[8] +DEBUG 06-24 20:28:01 [manager.py:391] +ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:213.23156356811523ms total_cost_time:213.27590942382812ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11735 prompt_cache_len:5151 prompt_cache_ratio:0.43894333191308055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 +DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:01 [batch.py:51] router release req id 8 +INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10862922668457031 s +INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.11074018478393555 s +DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=277230823868757438219625573536573640479, time:1750768081.9398036s req_ids:[8] +DEBUG 06-24 20:28:01 [manager.py:391] +ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:211.05599403381348ms total_cost_time:211.09986305236816ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11736 prompt_cache_len:5151 prompt_cache_ratio:0.43890593047034765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 +DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:02 [batch.py:51] router release req id 8 +INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.10837483406066895 s +INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.11041378974914551 s +DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=13659204043033578966617922654145241138, time:1750768082.1585457s req_ids:[8] +DEBUG 06-24 20:28:02 [manager.py:391] +ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:208.28723907470703ms total_cost_time:208.33301544189453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11737 prompt_cache_len:5151 prompt_cache_ratio:0.43886853540086906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 +DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:02 [batch.py:51] router release req id 8 +INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.1074528694152832 s +INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.1095428466796875 s +DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=216741883326970456432805906282972959813, time:1750768082.3714192s req_ids:[8] +DEBUG 06-24 20:28:02 [manager.py:391] +ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:363.6476993560791ms total_cost_time:363.6929988861084ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11738 prompt_cache_len:5151 prompt_cache_ratio:0.4388311467030159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 +DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:02 [batch.py:51] router release req id 8 +INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.10821032524108887 s +INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s +DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=199879998557326096911081695023540657160, time:1750768082.7401228s req_ids:[8] +DEBUG 06-24 20:28:02 [manager.py:391] +ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:207.2005271911621ms total_cost_time:207.2451114654541ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11739 prompt_cache_len:5151 prompt_cache_ratio:0.4387937643751597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 +DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:02 [batch.py:51] router release req id 8 +INFO 06-24 20:28:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.10881209373474121 s +INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.11099910736083984 s +DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=141765117333804516487137733271404625967, time:1750768082.9591322s req_ids:[8] +DEBUG 06-24 20:28:02 [manager.py:391] +ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:215.6517505645752ms total_cost_time:215.69538116455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11740 prompt_cache_len:5151 prompt_cache_ratio:0.43875638841567294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 +DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:03 [batch.py:51] router release req id 8 +INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10770082473754883 s +INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.1098775863647461 s +DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=116470087494442029273376740998088900323, time:1750768083.1755373s req_ids:[8] +DEBUG 06-24 20:28:03 [manager.py:391] +ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.3677520751953ms total_cost_time:209.4120979309082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11741 prompt_cache_len:5151 prompt_cache_ratio:0.4387190188229282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 +DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:03 [batch.py:51] router release req id 8 +INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10846614837646484 s +INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.11050963401794434 s +DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=174600575084647101509398252152531689411, time:1750768083.3925076s req_ids:[8] +DEBUG 06-24 20:28:03 [manager.py:391] +ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.31291580200195ms total_cost_time:209.35654640197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11742 prompt_cache_len:5151 prompt_cache_ratio:0.4386816555952989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 +DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:03 [batch.py:51] router release req id 8 +INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10750269889831543 s +INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.10956215858459473 s +DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=171084241270683905995775587476250109729, time:1750768083.6075315s req_ids:[8] +DEBUG 06-24 20:28:03 [manager.py:391] +ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.5780372619629ms total_cost_time:209.6245288848877ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11743 prompt_cache_len:5151 prompt_cache_ratio:0.438644298731159 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 +DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:03 [batch.py:51] router release req id 8 +INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10937333106994629 s +INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.11142587661743164 s +DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=279051663761698478629226270718037497749, time:1750768083.8243406s req_ids:[8] +DEBUG 06-24 20:28:03 [manager.py:391] +ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.62238311767578ms total_cost_time:209.66458320617676ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11744 prompt_cache_len:5151 prompt_cache_ratio:0.43860694822888285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 +DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:03 [batch.py:51] router release req id 8 +INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.1070394515991211 s +INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.1087961196899414 s +DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=39357457925060152693190935701929217526, time:1750768084.039799s req_ids:[8] +DEBUG 06-24 20:28:04 [manager.py:391] +ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:28:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 47960.012 tokens/s +DEBUG 06-24 20:28:04 [stats.py:37] Avg prompt tokens throughput: 47951.733 tokens/s +DEBUG 06-24 20:28:04 [stats.py:37] Avg generate tokens throughput: 8.279 tokens/s +INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:364.18843269348145ms total_cost_time:364.23325538635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11745 prompt_cache_len:5151 prompt_cache_ratio:0.4385696040868455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 +DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:04 [batch.py:51] router release req id 8 +INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.10875272750854492 s +INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.11075687408447266 s +DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=248365660691166201675798718354588871138, time:1750768084.411614s req_ids:[8] +DEBUG 06-24 20:28:04 [manager.py:391] +ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:209.43021774291992ms total_cost_time:209.4733715057373ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11746 prompt_cache_len:5151 prompt_cache_ratio:0.43853226630342246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 +DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:04 [batch.py:51] router release req id 8 +INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s +INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.10946536064147949 s +DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=219002505410073411822065592928498680227, time:1750768084.6277595s req_ids:[8] +DEBUG 06-24 20:28:04 [manager.py:391] +ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:209.32602882385254ms total_cost_time:209.36846733093262ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11747 prompt_cache_len:5151 prompt_cache_ratio:0.4384949348769899 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 +DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:04 [batch.py:51] router release req id 8 +INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.10737466812133789 s +INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.10939621925354004 s +DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=232389293755963417898113195872904166252, time:1750768084.852123s req_ids:[8] +DEBUG 06-24 20:28:04 [manager.py:391] +ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:220.10564804077148ms total_cost_time:220.16215324401855ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11748 prompt_cache_len:5151 prompt_cache_ratio:0.4384576098059244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 +DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:04 [batch.py:51] router release req id 8 +INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.10694551467895508 s +INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.10907530784606934 s +DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=16345388320489121440813828911658635315, time:1750768085.0783532s req_ids:[8] +DEBUG 06-24 20:28:05 [manager.py:391] +ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:216.4173126220703ms total_cost_time:216.461181640625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11749 prompt_cache_len:5151 prompt_cache_ratio:0.4384202910886033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 +DEBUG 06-24 20:28:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:05 [batch.py:51] router release req id 8 +INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.1083076000213623 s +INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.11056995391845703 s +DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=185321277351292029103244790367876229750, time:1750768085.292266s req_ids:[8] +DEBUG 06-24 20:28:05 [manager.py:391] +ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:209.48219299316406ms total_cost_time:209.52701568603516ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11750 prompt_cache_len:5151 prompt_cache_ratio:0.43838297872340426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 +DEBUG 06-24 20:28:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:05 [batch.py:51] router release req id 8 +INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.10714459419250488 s +INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.1092221736907959 s +DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=237472826402998696739489461193778554670, time:1750768085.5088234s req_ids:[8] +DEBUG 06-24 20:28:05 [manager.py:391] +ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:203.66287231445312ms total_cost_time:203.7060260772705ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11751 prompt_cache_len:5151 prompt_cache_ratio:0.43834567270870567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 +DEBUG 06-24 20:28:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:05 [batch.py:51] router release req id 8 +INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.10820198059082031 s +INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.11025142669677734 s +DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=198220053094540015205345008884535216068, time:1750768085.7191164s req_ids:[8] +DEBUG 06-24 20:28:05 [manager.py:391] +DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:373.42143058776855ms total_cost_time:373.47960472106934ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:11752 prompt_cache_len:5151 prompt_cache_ratio:0.43830837304288633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 +DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:06 [batch.py:51] router release req id 8 +INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10702800750732422 s +INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.10899138450622559 s +DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=60737041665867790769967088915342167743, time:1750768086.0985873s req_ids:[8] +DEBUG 06-24 20:28:06 [manager.py:391] +ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:209.09714698791504ms total_cost_time:209.14006233215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11753 prompt_cache_len:5151 prompt_cache_ratio:0.4382710797243257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 +DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:06 [batch.py:51] router release req id 8 +INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10803699493408203 s +INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.11010026931762695 s +DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=310576628088192920844417999186137943529, time:1750768086.3162565s req_ids:[8] +DEBUG 06-24 20:28:06 [manager.py:391] +ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:211.1837863922119ms total_cost_time:211.2886905670166ms,out_token_counter:1 mean_per_token_cost_time: 0.1049041748046875ms prompt_token_num:11754 prompt_cache_len:5151 prompt_cache_ratio:0.4382337927514038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 +DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:06 [batch.py:51] router release req id 8 +INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10841774940490723 s +INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.11040496826171875 s +DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=331253815600497617905794142965636441641, time:1750768086.5411499s req_ids:[8] +DEBUG 06-24 20:28:06 [manager.py:391] +ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:219.4066047668457ms total_cost_time:219.46191787719727ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:11755 prompt_cache_len:5151 prompt_cache_ratio:0.43819651212250105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 +DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:06 [batch.py:51] router release req id 8 +INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10896539688110352 s +INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.1110689640045166 s +DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=317551484947557617882648294896856165063, time:1750768086.7597365s req_ids:[8] +DEBUG 06-24 20:28:06 [manager.py:391] +ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:210.3722095489502ms total_cost_time:210.42346954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:11756 prompt_cache_len:5151 prompt_cache_ratio:0.43815923783599864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 +DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:06 [batch.py:51] router release req id 8 +INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10811877250671387 s +INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.11023473739624023 s +DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=194318930416105455896631513294107967569, time:1750768086.9769719s req_ids:[8] +DEBUG 06-24 20:28:06 [manager.py:391] +ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:209.34247970581055ms total_cost_time:209.38825607299805ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11757 prompt_cache_len:5151 prompt_cache_ratio:0.43812196989027813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 +DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:07 [batch.py:51] router release req id 8 +INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s +INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.11034345626831055 s +DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=97768641497861275360251603350891425098, time:1750768087.1934514s req_ids:[8] +DEBUG 06-24 20:28:07 [manager.py:391] +ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:209.4864845275879ms total_cost_time:209.52820777893066ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11758 prompt_cache_len:5151 prompt_cache_ratio:0.4380847082837217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 +DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:07 [batch.py:51] router release req id 8 +INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.209092378616333 s +INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.21081900596618652 s +DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=73182450133938064867049573723693014090, time:1750768087.5421376s req_ids:[8] +DEBUG 06-24 20:28:07 [manager.py:391] +ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:330.7652473449707ms total_cost_time:330.82032203674316ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:11759 prompt_cache_len:5151 prompt_cache_ratio:0.43804745301471215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 +DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:07 [batch.py:51] router release req id 8 +INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.10844659805297852 s +INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.11052298545837402 s +DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=250708015961930964165876165364309421444, time:1750768087.7461414s req_ids:[8] +DEBUG 06-24 20:28:07 [manager.py:391] +ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:208.6927890777588ms total_cost_time:208.73570442199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11760 prompt_cache_len:5151 prompt_cache_ratio:0.43801020408163266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 +DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:07 [batch.py:51] router release req id 8 +INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.10825395584106445 s +INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.11030960083007812 s +DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=287084226738558720981629586349275663346, time:1750768087.9614441s req_ids:[8] +DEBUG 06-24 20:28:07 [manager.py:391] +ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:213.78350257873535ms total_cost_time:213.8066291809082ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:11761 prompt_cache_len:5151 prompt_cache_ratio:0.43797296148286713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 +DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:08 [batch.py:51] router release req id 8 +INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.10530710220336914 s +INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.1074521541595459 s +DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=323240945704873588669385453196188929316, time:1750768088.181811s req_ids:[8] +DEBUG 06-24 20:28:08 [manager.py:391] +ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:210.31975746154785ms total_cost_time:210.36386489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11762 prompt_cache_len:5151 prompt_cache_ratio:0.43793572521679985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 +DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:08 [batch.py:51] router release req id 8 +INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.1080317497253418 s +INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s +DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=206130770807291696312720620133921694370, time:1750768088.4068727s req_ids:[8] +DEBUG 06-24 20:28:08 [manager.py:391] +ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:223.92630577087402ms total_cost_time:223.9689826965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11763 prompt_cache_len:5151 prompt_cache_ratio:0.43789849528181585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 +DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:08 [batch.py:51] router release req id 8 +INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.10809636116027832 s +INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.11029219627380371 s +DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=201400141774151527664629195418059646792, time:1750768088.6269028s req_ids:[8] +DEBUG 06-24 20:28:08 [manager.py:391] +ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:209.62977409362793ms total_cost_time:209.6731662750244ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11764 prompt_cache_len:5151 prompt_cache_ratio:0.43786127167630057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 +DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:08 [batch.py:51] router release req id 8 +INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.10841631889343262 s +INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.11055374145507812 s +DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=279945297872058651323189462055745554212, time:1750768088.8443935s req_ids:[8] +DEBUG 06-24 20:28:08 [manager.py:391] +ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:378.57985496520996ms total_cost_time:378.62610816955566ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11765 prompt_cache_len:5151 prompt_cache_ratio:0.43782405439864003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 +DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:09 [batch.py:51] router release req id 8 +INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s +INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.1104421615600586 s +DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=266711111893400671486174516663542324103, time:1750768089.2280886s req_ids:[8] +DEBUG 06-24 20:28:09 [manager.py:391] +ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:206.74824714660645ms total_cost_time:206.79378509521484ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11766 prompt_cache_len:5151 prompt_cache_ratio:0.4377868434472208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 +DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:09 [batch.py:51] router release req id 8 +INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10811853408813477 s +INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.11001968383789062 s +DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=198777377601040667024239575301316246241, time:1750768089.44658s req_ids:[8] +DEBUG 06-24 20:28:09 [manager.py:391] +ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:213.54246139526367ms total_cost_time:213.58418464660645ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11767 prompt_cache_len:5151 prompt_cache_ratio:0.43774963882043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 +DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:09 [batch.py:51] router release req id 8 +INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10827183723449707 s +INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.1105051040649414 s +DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=179055969670093476718412706870680100848, time:1750768089.6621714s req_ids:[8] +DEBUG 06-24 20:28:09 [manager.py:391] +ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:210.01338958740234ms total_cost_time:210.05678176879883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11768 prompt_cache_len:5151 prompt_cache_ratio:0.4377124405166553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 +DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:09 [batch.py:51] router release req id 8 +INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10811710357666016 s +INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.11017036437988281 s +DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=155224937576569538224866764275714556448, time:1750768089.8765314s req_ids:[8] +DEBUG 06-24 20:28:09 [manager.py:391] +ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:203.39298248291016ms total_cost_time:203.43828201293945ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11769 prompt_cache_len:5151 prompt_cache_ratio:0.43767524853428497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 +DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:10 [batch.py:51] router release req id 8 +INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10827755928039551 s +INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s +DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=172156680615047922087924619668949849756, time:1750768090.098802s req_ids:[8] +DEBUG 06-24 20:28:10 [manager.py:391] +ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:221.9533920288086ms total_cost_time:221.99654579162598ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11770 prompt_cache_len:5151 prompt_cache_ratio:0.43763806287170776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 +DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:10 [batch.py:51] router release req id 8 +INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10965251922607422 s +INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11163115501403809 s +DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=70288172948057389566139017981008272936, time:1750768090.3166525s req_ids:[8] +DEBUG 06-24 20:28:10 [manager.py:391] +DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 first_token_cost:205.08360862731934ms total_cost_time:205.12843132019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11771 prompt_cache_len:5151 prompt_cache_ratio:0.4376008835273129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 +DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:10 [batch.py:51] router release req id 8 +INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10818147659301758 s +INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11018848419189453 s +DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=245776520327159263872278801652839137733, time:1750768090.5273278s req_ids:[8] +DEBUG 06-24 20:28:10 [manager.py:391] +ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 first_token_cost:367.7351474761963ms total_cost_time:367.779016494751ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11772 prompt_cache_len:5151 prompt_cache_ratio:0.43756371049949033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 +DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:10 [batch.py:51] router release req id 8 +INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10880804061889648 s +INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11085057258605957 s +DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=151540708380727966336750095080137415759, time:1750768090.901097s req_ids:[8] +DEBUG 06-24 20:28:10 [manager.py:391] +ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 first_token_cost:208.9221477508545ms total_cost_time:208.96601676940918ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11773 prompt_cache_len:5151 prompt_cache_ratio:0.4375265437866304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 +DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:11 [batch.py:51] router release req id 8 +INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.10836935043334961 s +INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.1105356216430664 s +DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=292828333093874639914792846125745919281, time:1750768091.1174147s req_ids:[8] +DEBUG 06-24 20:28:11 [manager.py:391] +ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:207.2579860687256ms total_cost_time:207.30137825012207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11774 prompt_cache_len:5151 prompt_cache_ratio:0.4374893833871242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 +DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:11 [batch.py:51] router release req id 8 +INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.10823297500610352 s +INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s +DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=155677717968568859003853376743592313835, time:1750768091.3332522s req_ids:[8] +DEBUG 06-24 20:28:11 [manager.py:391] +ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:211.8062973022461ms total_cost_time:211.8511199951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11775 prompt_cache_len:5151 prompt_cache_ratio:0.43745222929936306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 +DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:11 [batch.py:51] router release req id 8 +INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.1081235408782959 s +INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.11011362075805664 s +DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=81386659402402937026926758588876546006, time:1750768091.560997s req_ids:[8] +DEBUG 06-24 20:28:11 [manager.py:391] +ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:223.18434715270996ms total_cost_time:223.22845458984375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11776 prompt_cache_len:5151 prompt_cache_ratio:0.43741508152173914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 +DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:11 [batch.py:51] router release req id 8 +INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.1081547737121582 s +INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.11022567749023438 s +DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=262425262463702254538402996595748333561, time:1750768091.7916586s req_ids:[8] +DEBUG 06-24 20:28:11 [manager.py:391] +ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:224.32518005371094ms total_cost_time:224.36833381652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11777 prompt_cache_len:5151 prompt_cache_ratio:0.43737794005264496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 +DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:11 [batch.py:51] router release req id 8 +INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10784435272216797 s +INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.10989999771118164 s +DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=75346722665892757104002061879379297048, time:1750768092.0109773s req_ids:[8] +DEBUG 06-24 20:28:12 [manager.py:391] +ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:210.86382865905762ms total_cost_time:210.9086513519287ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11778 prompt_cache_len:5151 prompt_cache_ratio:0.4373408048904738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 +DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:12 [batch.py:51] router release req id 8 +INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10826396942138672 s +INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.11035728454589844 s +DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=18353629191092810280625008056037171545, time:1750768092.2273655s req_ids:[8] +DEBUG 06-24 20:28:12 [manager.py:391] +ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:364.14384841918945ms total_cost_time:364.18867111206055ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11779 prompt_cache_len:5151 prompt_cache_ratio:0.43730367603361914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 +DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:12 [batch.py:51] router release req id 8 +INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10778522491455078 s +INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.10982370376586914 s +DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=154272610327282765054887513662055465125, time:1750768092.599072s req_ids:[8] +DEBUG 06-24 20:28:12 [manager.py:391] +ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:209.57684516906738ms total_cost_time:209.62023735046387ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11780 prompt_cache_len:5151 prompt_cache_ratio:0.4372665534804754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 +DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:12 [batch.py:51] router release req id 8 +INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10825753211975098 s +INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.1103217601776123 s +DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=305688429886744147085185185482314613580, time:1750768092.8162234s req_ids:[8] +DEBUG 06-24 20:28:12 [manager.py:391] +ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:211.07792854309082ms total_cost_time:211.12346649169922ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11781 prompt_cache_len:5151 prompt_cache_ratio:0.43722943722943725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 +DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:12 [batch.py:51] router release req id 8 +INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.1073305606842041 s +INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.10935115814208984 s +DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=77087504656372173359191015234698670801, time:1750768093.0333297s req_ids:[8] +DEBUG 06-24 20:28:13 [manager.py:391] +ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:209.60664749145508ms total_cost_time:209.65123176574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11782 prompt_cache_len:5151 prompt_cache_ratio:0.4371923272789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 +DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:13 [batch.py:51] router release req id 8 +INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.10828351974487305 s +INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.11041808128356934 s +DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=199551473864985628619796616302214400179, time:1750768093.249657s req_ids:[8] +DEBUG 06-24 20:28:13 [manager.py:391] +ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:209.50055122375488ms total_cost_time:209.54489707946777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11783 prompt_cache_len:5151 prompt_cache_ratio:0.4371552236272596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 +DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:13 [batch.py:51] router release req id 8 +INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.10724830627441406 s +INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.1094064712524414 s +DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=303566339384537851143977123240505415120, time:1750768093.4643607s req_ids:[8] +DEBUG 06-24 20:28:13 [manager.py:391] +ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:200.93393325805664ms total_cost_time:200.97684860229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11784 prompt_cache_len:5151 prompt_cache_ratio:0.4371181262729124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 +DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:13 [batch.py:51] router release req id 8 +INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.1077723503112793 s +INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.10993528366088867 s +DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=16969174652299307381901726374040017416, time:1750768093.6806145s req_ids:[8] +DEBUG 06-24 20:28:13 [manager.py:391] +ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:216.7508602142334ms total_cost_time:216.7954444885254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11785 prompt_cache_len:5151 prompt_cache_ratio:0.4370810352142554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 +DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:13 [batch.py:51] router release req id 8 +INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s +INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.11049056053161621 s +DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=318423791699726281093029830294773070732, time:1750768093.896857s req_ids:[8] +DEBUG 06-24 20:28:13 [manager.py:391] +ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:358.80303382873535ms total_cost_time:358.84952545166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11786 prompt_cache_len:5151 prompt_cache_ratio:0.4370439504496861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 +DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:14 [batch.py:51] router release req id 8 +INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.10734319686889648 s +INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.10933423042297363 s +DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=332543445246028786268529534042257138541, time:1750768094.2621803s req_ids:[8] +DEBUG 06-24 20:28:14 [manager.py:391] +ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:28:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 49118.793 tokens/s +DEBUG 06-24 20:28:14 [stats.py:37] Avg prompt tokens throughput: 49110.445 tokens/s +DEBUG 06-24 20:28:14 [stats.py:37] Avg generate tokens throughput: 8.348 tokens/s +INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:206.16579055786133ms total_cost_time:206.19440078735352ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:11787 prompt_cache_len:5151 prompt_cache_ratio:0.43700687197760246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 +DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:14 [batch.py:51] router release req id 8 +INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.10663533210754395 s +INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.10866832733154297 s +DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=237253711178498269077398061694357211369, time:1750768094.4734743s req_ids:[8] +DEBUG 06-24 20:28:14 [manager.py:391] +ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:207.10372924804688ms total_cost_time:207.14902877807617ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11788 prompt_cache_len:5151 prompt_cache_ratio:0.4369697997964031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 +DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:14 [batch.py:51] router release req id 8 +INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.1080009937286377 s +INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s +DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=29883778546119663816269898175348468876, time:1750768094.6860876s req_ids:[8] +DEBUG 06-24 20:28:14 [manager.py:391] +ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:210.22677421569824ms total_cost_time:210.27135848999023ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11789 prompt_cache_len:5151 prompt_cache_ratio:0.4369327339044872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 +DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:14 [batch.py:51] router release req id 8 +INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.10596108436584473 s +INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.10810160636901855 s +DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=4516006987389106702425820282185963035, time:1750768094.9009302s req_ids:[8] +DEBUG 06-24 20:28:14 [manager.py:391] +ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:207.8700065612793ms total_cost_time:207.9143524169922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11790 prompt_cache_len:5151 prompt_cache_ratio:0.43689567430025444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 +DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:15 [batch.py:51] router release req id 8 +INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.1083531379699707 s +INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.11025500297546387 s +DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=156086780781323452555912073324277609630, time:1750768095.1155972s req_ids:[8] +DEBUG 06-24 20:28:15 [manager.py:391] +ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:209.6271514892578ms total_cost_time:209.6719741821289ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11791 prompt_cache_len:5151 prompt_cache_ratio:0.436858620982105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 +DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:15 [batch.py:51] router release req id 8 +INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.10849714279174805 s +INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.11052966117858887 s +DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=17948471344240426159765086055091613871, time:1750768095.3318112s req_ids:[8] +DEBUG 06-24 20:28:15 [manager.py:391] +ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:208.94932746887207ms total_cost_time:208.99510383605957ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11792 prompt_cache_len:5151 prompt_cache_ratio:0.4368215739484396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 +DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:15 [batch.py:51] router release req id 8 +INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.20846271514892578 s +INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.2101142406463623 s +DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=143071561961617562834656650991597682719, time:1750768095.6811976s req_ids:[8] +DEBUG 06-24 20:28:15 [manager.py:391] +ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:326.51209831237793ms total_cost_time:326.5392780303955ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:11793 prompt_cache_len:5151 prompt_cache_ratio:0.43678453319765964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 +DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:15 [batch.py:51] router release req id 8 +INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.10681748390197754 s +INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.10880923271179199 s +DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=139562136213725144745066632861752395607, time:1750768095.8797278s req_ids:[8] +DEBUG 06-24 20:28:15 [manager.py:391] +ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:205.5964469909668ms total_cost_time:205.6412696838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11794 prompt_cache_len:5151 prompt_cache_ratio:0.43674749872816687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 +DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:16 [batch.py:51] router release req id 8 +INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.108245849609375 s +INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.10972833633422852 s +DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=100661841516723656848895906294567805886, time:1750768096.089697s req_ids:[8] +DEBUG 06-24 20:28:16 [manager.py:391] +ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:207.30280876159668ms total_cost_time:207.34930038452148ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11795 prompt_cache_len:5151 prompt_cache_ratio:0.4367104705383637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 +DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:16 [batch.py:51] router release req id 8 +INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10730433464050293 s +INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.10918903350830078 s +DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=251771144552519883419114771388866462358, time:1750768096.3046787s req_ids:[8] +DEBUG 06-24 20:28:16 [manager.py:391] +ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:209.13171768188477ms total_cost_time:209.17415618896484ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11796 prompt_cache_len:5151 prompt_cache_ratio:0.4366734486266531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 +DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:16 [batch.py:51] router release req id 8 +INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10874557495117188 s +INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.11082100868225098 s +DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=92744913844185650375440115844941880659, time:1750768096.5200891s req_ids:[8] +DEBUG 06-24 20:28:16 [manager.py:391] +ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:208.48870277404785ms total_cost_time:208.53209495544434ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11797 prompt_cache_len:5151 prompt_cache_ratio:0.4366364329914385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 +DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:16 [batch.py:51] router release req id 8 +INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10706806182861328 s +INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.1091909408569336 s +DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=70582019436436509846692293290874934186, time:1750768096.7407508s req_ids:[8] +DEBUG 06-24 20:28:16 [manager.py:391] +ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:209.6545696258545ms total_cost_time:209.7005844116211ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11798 prompt_cache_len:5151 prompt_cache_ratio:0.4365994236311239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 +DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:16 [batch.py:51] router release req id 8 +INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s +INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.10985374450683594 s +DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=105109380332829771648179799339229659551, time:1750768096.9502997s req_ids:[8] +DEBUG 06-24 20:28:16 [manager.py:391] +INFO 06-24 20:28:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:375.4255771636963ms total_cost_time:375.4580020904541ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:11799 prompt_cache_len:5151 prompt_cache_ratio:0.4365624205441139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 +DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:17 [batch.py:51] router release req id 8 +INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.10823321342468262 s +INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.11028552055358887 s +DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=242688798368276591537192831802729369142, time:1750768097.3388922s req_ids:[8] +DEBUG 06-24 20:28:17 [manager.py:391] +ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:214.60986137390137ms total_cost_time:214.65277671813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11800 prompt_cache_len:5151 prompt_cache_ratio:0.43652542372881353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 +DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:17 [batch.py:51] router release req id 8 +INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s +INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.10983538627624512 s +DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=260862670787389057212463206165816702392, time:1750768097.550916s req_ids:[8] +DEBUG 06-24 20:28:17 [manager.py:391] +ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:199.9659538269043ms total_cost_time:200.00743865966797ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11801 prompt_cache_len:5151 prompt_cache_ratio:0.4364884331836285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 +DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:17 [batch.py:51] router release req id 8 +INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.1082301139831543 s +INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.11033821105957031 s +DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=117357242846041769403085059704811999069, time:1750768097.7580574s req_ids:[8] +DEBUG 06-24 20:28:17 [manager.py:391] +ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:203.00054550170898ms total_cost_time:203.04250717163086ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11802 prompt_cache_len:5151 prompt_cache_ratio:0.4364514489069649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 +DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:17 [batch.py:51] router release req id 8 +INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.10737204551696777 s +INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.10946989059448242 s +DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=193994694527072481628987739397096957690, time:1750768097.969149s req_ids:[8] +DEBUG 06-24 20:28:17 [manager.py:391] +ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:203.29833030700684ms total_cost_time:203.34386825561523ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11803 prompt_cache_len:5151 prompt_cache_ratio:0.4364144708972295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 +DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:18 [batch.py:51] router release req id 8 +INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.10716652870178223 s +INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.10914850234985352 s +DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=282470026420590865566027773141518107796, time:1750768098.178189s req_ids:[8] +DEBUG 06-24 20:28:18 [manager.py:391] +ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:202.53872871398926ms total_cost_time:202.58116722106934ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11804 prompt_cache_len:5151 prompt_cache_ratio:0.43637749915282953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 +DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:18 [batch.py:51] router release req id 8 +INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.10714077949523926 s +INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.10923171043395996 s +DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=273456528324718792725456617688895046678, time:1750768098.3888886s req_ids:[8] +DEBUG 06-24 20:28:18 [manager.py:391] +ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:210.53791046142578ms total_cost_time:210.57629585266113ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:11805 prompt_cache_len:5151 prompt_cache_ratio:0.4363405336721728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 +DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:18 [batch.py:51] router release req id 8 +INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.1079549789428711 s +INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.10989665985107422 s +DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=310544552687069659392370717896671726830, time:1750768098.6038795s req_ids:[8] +DEBUG 06-24 20:28:18 [manager.py:391] +ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:380.60712814331055ms total_cost_time:380.65171241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11806 prompt_cache_len:5151 prompt_cache_ratio:0.43630357445366763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 +DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:18 [batch.py:51] router release req id 8 +INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.1082601547241211 s +INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.11040663719177246 s +DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=55522329061015137872685148641918215246, time:1750768098.990727s req_ids:[8] +DEBUG 06-24 20:28:18 [manager.py:391] +ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:211.06576919555664ms total_cost_time:211.10939979553223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11807 prompt_cache_len:5151 prompt_cache_ratio:0.4362666214957229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 +DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:19 [batch.py:51] router release req id 8 +INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.10822105407714844 s +INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.1101984977722168 s +DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=111318464653381069148485436183812852771, time:1750768099.2082531s req_ids:[8] +DEBUG 06-24 20:28:19 [manager.py:391] +ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:207.40962028503418ms total_cost_time:207.45372772216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11808 prompt_cache_len:5151 prompt_cache_ratio:0.43622967479674796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 +DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:19 [batch.py:51] router release req id 8 +INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.10794281959533691 s +INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.10980105400085449 s +DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=243178789299844776893965681617541168567, time:1750768099.4340687s req_ids:[8] +DEBUG 06-24 20:28:19 [manager.py:391] +ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:223.90103340148926ms total_cost_time:223.94514083862305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11809 prompt_cache_len:5151 prompt_cache_ratio:0.4361927343551528 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 +DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:19 [batch.py:51] router release req id 8 +INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.1078801155090332 s +INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.11002492904663086 s +DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=201992601072673294364673796158752252436, time:1750768099.6541033s req_ids:[8] +DEBUG 06-24 20:28:19 [manager.py:391] +ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:207.09562301635742ms total_cost_time:207.1380615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11810 prompt_cache_len:5151 prompt_cache_ratio:0.436155800169348 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 +DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:19 [batch.py:51] router release req id 8 +INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.10807633399963379 s +INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.10999155044555664 s +DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=82793900537183976138319200893850777751, time:1750768099.8670921s req_ids:[8] +DEBUG 06-24 20:28:19 [manager.py:391] +ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:202.66437530517578ms total_cost_time:202.70895957946777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11811 prompt_cache_len:5151 prompt_cache_ratio:0.4361188722377445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 +DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:19 [batch.py:51] router release req id 8 +INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10685157775878906 s +INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.10877561569213867 s +DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=12649352316153921278264767403344495674, time:1750768100.0781999s req_ids:[8] +DEBUG 06-24 20:28:20 [manager.py:391] +ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:209.2888355255127ms total_cost_time:209.33270454406738ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11812 prompt_cache_len:5151 prompt_cache_ratio:0.4360819505587538 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 +DEBUG 06-24 20:28:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:20 [batch.py:51] router release req id 8 +INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10708308219909668 s +INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.10919189453125 s +DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=214776661518536471666954201656035617250, time:1750768100.294392s req_ids:[8] +DEBUG 06-24 20:28:20 [manager.py:391] +ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:364.41755294799805ms total_cost_time:364.46309089660645ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11813 prompt_cache_len:5151 prompt_cache_ratio:0.43604503513078813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 +DEBUG 06-24 20:28:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:20 [batch.py:51] router release req id 8 +INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10857439041137695 s +INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.11054468154907227 s +DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=94264381613404482839769812177087145027, time:1750768100.6637533s req_ids:[8] +DEBUG 06-24 20:28:20 [manager.py:391] +ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:204.15568351745605ms total_cost_time:204.19907569885254ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11814 prompt_cache_len:5151 prompt_cache_ratio:0.43600812595226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 +DEBUG 06-24 20:28:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:20 [batch.py:51] router release req id 8 +INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10782909393310547 s +INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.10951089859008789 s +DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=14341633962450682208156990771597436272, time:1750768100.8882954s req_ids:[8] +DEBUG 06-24 20:28:20 [manager.py:391] +DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:224.470853805542ms total_cost_time:224.51448440551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11815 prompt_cache_len:5151 prompt_cache_ratio:0.43597122302158275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 +DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:21 [batch.py:51] router release req id 8 +INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10834360122680664 s +INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.11031484603881836 s +DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=246786587703049836143906817268312435184, time:1750768101.1075695s req_ids:[8] +DEBUG 06-24 20:28:21 [manager.py:391] +ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:211.96985244750977ms total_cost_time:212.01348304748535ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11816 prompt_cache_len:5151 prompt_cache_ratio:0.43593432633716994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 +DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:21 [batch.py:51] router release req id 8 +INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10851764678955078 s +INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.11060357093811035 s +DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=224037271839791912419627107898530973012, time:1750768101.3253396s req_ids:[8] +DEBUG 06-24 20:28:21 [manager.py:391] +ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:210.50763130187988ms total_cost_time:210.55340766906738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11817 prompt_cache_len:5151 prompt_cache_ratio:0.4358974358974359 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 +DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:21 [batch.py:51] router release req id 8 +INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10740280151367188 s +INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.10937142372131348 s +DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=9882563576556506230902678797810835039, time:1750768101.5412252s req_ids:[8] +DEBUG 06-24 20:28:21 [manager.py:391] +ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:210.7870578765869ms total_cost_time:210.8299732208252ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11818 prompt_cache_len:5151 prompt_cache_ratio:0.4358605517007954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 +DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:21 [batch.py:51] router release req id 8 +INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10851645469665527 s +INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.1107490062713623 s +DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=171547387199592456693803666897434015487, time:1750768101.7568007s req_ids:[8] +DEBUG 06-24 20:28:21 [manager.py:391] +ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:203.92775535583496ms total_cost_time:203.97162437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11819 prompt_cache_len:5151 prompt_cache_ratio:0.4358236737456638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 +DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:21 [batch.py:51] router release req id 8 +INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.11027097702026367 s +INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.11229228973388672 s +DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=69558427659541815398142551226342704803, time:1750768101.967057s req_ids:[8] +DEBUG 06-24 20:28:21 [manager.py:391] +ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:371.1273670196533ms total_cost_time:371.1724281311035ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11820 prompt_cache_len:5151 prompt_cache_ratio:0.43578680203045683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 +DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:22 [batch.py:51] router release req id 8 +INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10829854011535645 s +INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.11032319068908691 s +DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=334967454049806759731423596811636923108, time:1750768102.344881s req_ids:[8] +DEBUG 06-24 20:28:22 [manager.py:391] +ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:211.77244186401367ms total_cost_time:211.81750297546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11821 prompt_cache_len:5151 prompt_cache_ratio:0.43574993655359107 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 +DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:22 [batch.py:51] router release req id 8 +INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10731863975524902 s +INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.10928535461425781 s +DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=257809259817211060856424739495937956189, time:1750768102.5615969s req_ids:[8] +DEBUG 06-24 20:28:22 [manager.py:391] +ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:166.03636741638184ms total_cost_time:166.07999801635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11822 prompt_cache_len:5151 prompt_cache_ratio:0.43571307731348335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 +DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:22 [batch.py:51] router release req id 8 +INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10818958282470703 s +INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.11009883880615234 s +DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=23729815718666782677920038640430574626, time:1750768102.7339983s req_ids:[8] +DEBUG 06-24 20:28:22 [manager.py:391] +ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:198.47464561462402ms total_cost_time:198.5163688659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11823 prompt_cache_len:5151 prompt_cache_ratio:0.43567622430855113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 +DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:22 [batch.py:51] router release req id 8 +INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10834598541259766 s +INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.11038088798522949 s +DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=108733328886161969055624172024253861267, time:1750768102.938689s req_ids:[8] +DEBUG 06-24 20:28:22 [manager.py:391] +ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:210.17956733703613ms total_cost_time:210.22391319274902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11824 prompt_cache_len:5151 prompt_cache_ratio:0.43563937753721244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 +DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:23 [batch.py:51] router release req id 8 +INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s +INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.10963010787963867 s +DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=133769847089815306359347501621000728365, time:1750768103.1543956s req_ids:[8] +DEBUG 06-24 20:28:23 [manager.py:391] +INFO 06-24 20:28:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:28:23 [statics_utils.py:24] mean first cost: 228.87291303777405 ms +INFO 06-24 20:28:23 [statics_utils.py:24] mean per token cost: 0.06319268006874545 ms +ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:207.8413963317871ms total_cost_time:207.8859806060791ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11825 prompt_cache_len:5151 prompt_cache_ratio:0.4356025369978858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 +DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:23 [batch.py:51] router release req id 8 +INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10840868949890137 s +INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.11044549942016602 s +DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=13774356303798020589186248076877749007, time:1750768103.3688507s req_ids:[8] +DEBUG 06-24 20:28:23 [manager.py:391] +ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:203.12118530273438ms total_cost_time:203.16600799560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11826 prompt_cache_len:5151 prompt_cache_ratio:0.43556570268899036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 +DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:23 [batch.py:51] router release req id 8 +INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10772037506103516 s +INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.10953712463378906 s +DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=277460259278322029788430456298451407760, time:1750768103.578867s req_ids:[8] +DEBUG 06-24 20:28:23 [manager.py:391] +ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:364.6361827850342ms total_cost_time:364.67981338500977ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11827 prompt_cache_len:5151 prompt_cache_ratio:0.4355288746089456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 +DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:23 [batch.py:51] router release req id 8 +INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10767674446105957 s +INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.10977983474731445 s +DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=232335364309974174322263073830646631733, time:1750768103.9492073s req_ids:[8] +DEBUG 06-24 20:28:23 [manager.py:391] +ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:207.09753036499023ms total_cost_time:207.14068412780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11828 prompt_cache_len:5151 prompt_cache_ratio:0.4354920527561718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 +DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:24 [batch.py:51] router release req id 8 +INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10756397247314453 s +INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.1096193790435791 s +DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=243058501548682557211064948786590477480, time:1750768104.1618147s req_ids:[8] +DEBUG 06-24 20:28:24 [manager.py:391] +ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:201.89356803894043ms total_cost_time:201.93815231323242ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11829 prompt_cache_len:5151 prompt_cache_ratio:0.4354552371290895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 +DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:24 [batch.py:51] router release req id 8 +INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s +INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s +DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=122837956804860988712092617742540758829, time:1750768104.370877s req_ids:[8] +DEBUG 06-24 20:28:24 [manager.py:391] +DEBUG 06-24 20:28:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 50688.747 tokens/s +DEBUG 06-24 20:28:24 [stats.py:37] Avg prompt tokens throughput: 50680.264 tokens/s +DEBUG 06-24 20:28:24 [stats.py:37] Avg generate tokens throughput: 8.484 tokens/s +ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.09833908081055ms total_cost_time:209.14268493652344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11830 prompt_cache_len:5151 prompt_cache_ratio:0.43541842772612005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 +DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:24 [batch.py:51] router release req id 8 +INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10765242576599121 s +INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.10945415496826172 s +DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=104515876624913089004030360096655924995, time:1750768104.587176s req_ids:[8] +DEBUG 06-24 20:28:24 [manager.py:391] +ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.4717025756836ms total_cost_time:209.51437950134277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11831 prompt_cache_len:5151 prompt_cache_ratio:0.4353816245456851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 +DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:24 [batch.py:51] router release req id 8 +INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10843014717102051 s +INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.10976481437683105 s +DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=288045694277706391797641348825570546609, time:1750768104.802586s req_ids:[8] +DEBUG 06-24 20:28:24 [manager.py:391] +ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.91945266723633ms total_cost_time:209.9616527557373ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11832 prompt_cache_len:5151 prompt_cache_ratio:0.4353448275862069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 +DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:24 [batch.py:51] router release req id 8 +INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10834193229675293 s +INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.11029696464538574 s +DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=131752891130281073793744434635496113375, time:1750768105.0190384s req_ids:[8] +DEBUG 06-24 20:28:25 [manager.py:391] +ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.53798294067383ms total_cost_time:209.58423614501953ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11833 prompt_cache_len:5151 prompt_cache_ratio:0.43530803684610836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 +DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:25 [batch.py:51] router release req id 8 +INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10739827156066895 s +INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.10914158821105957 s +DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=212893383406487969015545359751583718168, time:1750768105.2341962s req_ids:[8] +DEBUG 06-24 20:28:25 [manager.py:391] +ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:369.2042827606201ms total_cost_time:369.2517280578613ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:11834 prompt_cache_len:5151 prompt_cache_ratio:0.43527125232381275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 +DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:25 [batch.py:51] router release req id 8 +INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10823178291320801 s +INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.11018013954162598 s +DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=56993645224287758651976250850504000378, time:1750768105.6080446s req_ids:[8] +DEBUG 06-24 20:28:25 [manager.py:391] +ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:210.5083465576172ms total_cost_time:210.55293083190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11835 prompt_cache_len:5151 prompt_cache_ratio:0.43523447401774396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 +DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:25 [batch.py:51] router release req id 8 +INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10751581192016602 s +INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.10961699485778809 s +DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=265975935421197008784365063592200285825, time:1750768105.8247821s req_ids:[8] +DEBUG 06-24 20:28:25 [manager.py:391] +ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:208.13369750976562ms total_cost_time:208.1763744354248ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11836 prompt_cache_len:5151 prompt_cache_ratio:0.43519770192632645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 +DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:25 [batch.py:51] router release req id 8 +INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10786676406860352 s +INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.10985040664672852 s +DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=183294235721316073751470009447935104429, time:1750768106.0408154s req_ids:[8] +DEBUG 06-24 20:28:26 [manager.py:391] +ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:211.64608001708984ms total_cost_time:211.70663833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:11837 prompt_cache_len:5151 prompt_cache_ratio:0.4351609360479851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 +DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:26 [batch.py:51] router release req id 8 +INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.11120033264160156 s +INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.11325764656066895 s +DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=188789225399506397195645297269025473984, time:1750768106.2580295s req_ids:[8] +DEBUG 06-24 20:28:26 [manager.py:391] +ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:207.95011520385742ms total_cost_time:208.0066204071045ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11838 prompt_cache_len:5151 prompt_cache_ratio:0.4351241763811455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 +DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:26 [batch.py:51] router release req id 8 +INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10867953300476074 s +INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.11059856414794922 s +DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=6252123952698092410013501360165782055, time:1750768106.4740617s req_ids:[8] +DEBUG 06-24 20:28:26 [manager.py:391] +ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:210.33096313476562ms total_cost_time:210.38317680358887ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:11839 prompt_cache_len:5151 prompt_cache_ratio:0.43508742292423347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 +DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:26 [batch.py:51] router release req id 8 +INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10774087905883789 s +INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.1097726821899414 s +DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=190641257117425534058824980797202168848, time:1750768106.6901624s req_ids:[8] +DEBUG 06-24 20:28:26 [manager.py:391] +DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:209.52963829040527ms total_cost_time:209.57398414611816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11840 prompt_cache_len:5151 prompt_cache_ratio:0.4350506756756757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 +DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:26 [batch.py:51] router release req id 8 +INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10764956474304199 s +INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.10993027687072754 s +DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=151714238301907987982092593321977103980, time:1750768106.9058564s req_ids:[8] +DEBUG 06-24 20:28:26 [manager.py:391] +ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:363.74664306640625ms total_cost_time:363.79194259643555ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11841 prompt_cache_len:5151 prompt_cache_ratio:0.43501393463389915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 +DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:27 [batch.py:51] router release req id 8 +INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.1081080436706543 s +INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.1101982593536377 s +DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=25392068842753214668133826172919159850, time:1750768107.276169s req_ids:[8] +DEBUG 06-24 20:28:27 [manager.py:391] +ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:209.89418029785156ms total_cost_time:209.93995666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11842 prompt_cache_len:5151 prompt_cache_ratio:0.4349771997973315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 +DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:27 [batch.py:51] router release req id 8 +INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.10825991630554199 s +INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.11043310165405273 s +DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=188952107873609749293299689582146219866, time:1750768107.4922912s req_ids:[8] +DEBUG 06-24 20:28:27 [manager.py:391] +ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:202.57258415222168ms total_cost_time:202.61693000793457ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11843 prompt_cache_len:5151 prompt_cache_ratio:0.43494047116440093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 +DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:27 [batch.py:51] router release req id 8 +INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.10962986946105957 s +INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.11179256439208984 s +DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=137199173143918372879743460047223152319, time:1750768107.7008302s req_ids:[8] +DEBUG 06-24 20:28:27 [manager.py:391] +ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:215.22808074951172ms total_cost_time:215.2717113494873ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11844 prompt_cache_len:5151 prompt_cache_ratio:0.434903748733536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 +DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:27 [batch.py:51] router release req id 8 +INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.10940361022949219 s +INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s +DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=113998909102099155203770548422585652596, time:1750768107.938854s req_ids:[8] +DEBUG 06-24 20:28:27 [manager.py:391] +ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:227.5681495666504ms total_cost_time:227.61297225952148ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11845 prompt_cache_len:5151 prompt_cache_ratio:0.4348670325031659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 +DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:28 [batch.py:51] router release req id 8 +INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.1095426082611084 s +INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11071610450744629 s +DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=265243073473811722256365139136524401430, time:1750768108.1573012s req_ids:[8] +DEBUG 06-24 20:28:28 [manager.py:391] +ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:209.51294898986816ms total_cost_time:209.56826210021973ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:11846 prompt_cache_len:5151 prompt_cache_ratio:0.43483032247172043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 +DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:28 [batch.py:51] router release req id 8 +INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.11229896545410156 s +INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11433148384094238 s +DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=91742669439278449955476834714760492758, time:1750768108.384648s req_ids:[8] +DEBUG 06-24 20:28:28 [manager.py:391] +ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:387.09449768066406ms total_cost_time:387.13884353637695ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11847 prompt_cache_len:5151 prompt_cache_ratio:0.4347936186376298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 +DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:28 [batch.py:51] router release req id 8 +INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s +INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11019277572631836 s +DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=79882619272615391415717038818087429414, time:1750768108.7673495s req_ids:[8] +DEBUG 06-24 20:28:28 [manager.py:391] +ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:210.85071563720703ms total_cost_time:210.89649200439453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11848 prompt_cache_len:5151 prompt_cache_ratio:0.4347569209993248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 +DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:28 [batch.py:51] router release req id 8 +INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.10807943344116211 s +INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11018013954162598 s +DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=188828207219635695086545947796986337882, time:1750768108.9842227s req_ids:[8] +DEBUG 06-24 20:28:28 [manager.py:391] +ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:205.32965660095215ms total_cost_time:205.37519454956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11849 prompt_cache_len:5151 prompt_cache_ratio:0.4347202295552367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 +DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:29 [batch.py:51] router release req id 8 +INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10807442665100098 s +INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.10928034782409668 s +DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=138995906279715476315869694458443052957, time:1750768109.1963947s req_ids:[8] +DEBUG 06-24 20:28:29 [manager.py:391] +ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:209.28549766540527ms total_cost_time:209.33175086975098ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11850 prompt_cache_len:5151 prompt_cache_ratio:0.43468354430379746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 +DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:29 [batch.py:51] router release req id 8 +INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10859465599060059 s +INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.11075830459594727 s +DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=193244642764779444199998440016540857095, time:1750768109.4114962s req_ids:[8] +DEBUG 06-24 20:28:29 [manager.py:391] +ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:214.7960662841797ms total_cost_time:214.84112739562988ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11851 prompt_cache_len:5151 prompt_cache_ratio:0.4346468652434394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 +DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:29 [batch.py:51] router release req id 8 +INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10977506637573242 s +INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.11147403717041016 s +DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=230843736025076563184255870416772139743, time:1750768109.6499074s req_ids:[8] +DEBUG 06-24 20:28:29 [manager.py:391] +ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:190.8884048461914ms total_cost_time:190.9327507019043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11852 prompt_cache_len:5151 prompt_cache_ratio:0.43461019237259535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 +DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:29 [batch.py:51] router release req id 8 +INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10678720474243164 s +INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.1087179183959961 s +DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=19531097533351095739633717940783927452, time:1750768109.8298264s req_ids:[8] +DEBUG 06-24 20:28:29 [manager.py:391] +ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:167.0665740966797ms total_cost_time:167.1133041381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11853 prompt_cache_len:5151 prompt_cache_ratio:0.4345735256896988 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 +DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:29 [batch.py:51] router release req id 8 +INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10738372802734375 s +INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.10960268974304199 s +DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=298867485143769618783581751208625689141, time:1750768110.0050213s req_ids:[8] +DEBUG 06-24 20:28:30 [manager.py:391] +ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:392.99654960632324ms total_cost_time:393.04256439208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11854 prompt_cache_len:5151 prompt_cache_ratio:0.43453686519318374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 +DEBUG 06-24 20:28:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:30 [batch.py:51] router release req id 8 +INFO 06-24 20:28:30 [manager.py:224] router recive req id 8 cost time 0.10880279541015625 s +INFO 06-24 20:28:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110234260559082 s +DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=292589598680450143269398590098992001728, time:1750768110.4024403s req_ids:[8] +DEBUG 06-24 20:28:30 [manager.py:391] +ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:209.6114158630371ms total_cost_time:209.68961715698242ms,out_token_counter:1 mean_per_token_cost_time: 0.0782012939453125ms prompt_token_num:11855 prompt_cache_len:5151 prompt_cache_ratio:0.4345002108814846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 +DEBUG 06-24 20:28:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:30 [batch.py:51] router release req id 8 +INFO 06-24 20:28:30 [manager.py:224] router recive req id 8 cost time 0.10976409912109375 s +INFO 06-24 20:28:30 [manager.py:68] detokenization recv req id 8 cost time 0.11205029487609863 s +DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=56871444571994861550835807716257990985, time:1750768110.6332257s req_ids:[8] +DEBUG 06-24 20:28:30 [manager.py:391] +ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:228.11603546142578ms total_cost_time:228.1651496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:11856 prompt_cache_len:5151 prompt_cache_ratio:0.43446356275303644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 +DEBUG 06-24 20:28:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:30 [batch.py:51] router release req id 8 +INFO 06-24 20:28:30 [manager.py:224] router recive req id 8 cost time 0.11135554313659668 s +INFO 06-24 20:28:30 [manager.py:68] detokenization recv req id 8 cost time 0.11368703842163086 s +DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=174837334016425638866942082311145806441, time:1750768110.8705413s req_ids:[8] +DEBUG 06-24 20:28:30 [manager.py:391] +ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:228.76787185668945ms total_cost_time:228.81627082824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:11857 prompt_cache_len:5151 prompt_cache_ratio:0.43442692080627476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 +DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:31 [batch.py:51] router release req id 8 +INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.10927224159240723 s +INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.11138534545898438 s +DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=242135513759416570430729438734053233340, time:1750768111.1040351s req_ids:[8] +DEBUG 06-24 20:28:31 [manager.py:391] +ERROR 06-24 20:28:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:230.70883750915527ms total_cost_time:230.75532913208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11858 prompt_cache_len:5151 prompt_cache_ratio:0.43439028503963567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 +DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:31 [batch.py:51] router release req id 8 +INFO 06-24 20:28:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.10867547988891602 s +INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.1109931468963623 s +DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=273366704237232413912962972500563289375, time:1750768111.3428593s req_ids:[8] +DEBUG 06-24 20:28:31 [manager.py:391] +ERROR 06-24 20:28:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 first_token_cost:229.9816608428955ms total_cost_time:230.0264835357666ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11859 prompt_cache_len:5151 prompt_cache_ratio:0.4343536554515558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 +DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:31 [batch.py:51] router release req id 8 +INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.11008310317993164 s +INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.11231160163879395 s +DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=98208872831204647830177040958943145217, time:1750768111.5806077s req_ids:[8] +DEBUG 06-24 20:28:31 [manager.py:391] +ERROR 06-24 20:28:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 first_token_cost:233.05153846740723ms total_cost_time:233.09850692749023ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:11860 prompt_cache_len:5151 prompt_cache_ratio:0.4343170320404722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 +DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:31 [batch.py:51] router release req id 8 +INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.10791850090026855 s +INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997653007507324 s +DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=97936047980822285077980742942771859182, time:1750768111.8040445s req_ids:[8] +DEBUG 06-24 20:28:31 [manager.py:391] +ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 first_token_cost:364.84432220458984ms total_cost_time:364.88890647888184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11861 prompt_cache_len:5151 prompt_cache_ratio:0.43428041480482255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 +DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:32 [batch.py:51] router release req id 8 +INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s +INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.1105806827545166 s +DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=11142585919447462739376373840096223320, time:1750768112.171773s req_ids:[8] +DEBUG 06-24 20:28:32 [manager.py:391] +ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:207.9486846923828ms total_cost_time:207.9939842224121ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11862 prompt_cache_len:5151 prompt_cache_ratio:0.434243803743045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 +DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:32 [batch.py:51] router release req id 8 +INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.1097416877746582 s +INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.11161470413208008 s +DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=110034234514243308030885337181554907301, time:1750768112.3862817s req_ids:[8] +DEBUG 06-24 20:28:32 [manager.py:391] +ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:166.6276454925537ms total_cost_time:166.6719913482666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11863 prompt_cache_len:5151 prompt_cache_ratio:0.43420719885357834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 +DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:32 [batch.py:51] router release req id 8 +INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.10724115371704102 s +INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.10936951637268066 s +DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=326034097724481502834778145619153073234, time:1750768112.5592637s req_ids:[8] +DEBUG 06-24 20:28:32 [manager.py:391] +ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:199.41210746765137ms total_cost_time:199.43857192993164ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:11864 prompt_cache_len:5151 prompt_cache_ratio:0.43417060013486175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 +DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:32 [batch.py:51] router release req id 8 +INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.10683774948120117 s +INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.10781574249267578 s +DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=179351694559500666814396036757856168726, time:1750768112.7673483s req_ids:[8] +DEBUG 06-24 20:28:32 [manager.py:391] +DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:211.2438678741455ms total_cost_time:211.26580238342285ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:11865 prompt_cache_len:5151 prompt_cache_ratio:0.43413400758533505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 +DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:32 [batch.py:51] router release req id 8 +INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.10457754135131836 s +INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.10544490814208984 s +DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=54379912887677411096561046016714697823, time:1750768112.984503s req_ids:[8] +DEBUG 06-24 20:28:32 [manager.py:391] +ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:212.47625350952148ms total_cost_time:212.50009536743164ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:11866 prompt_cache_len:5151 prompt_cache_ratio:0.4340974212034384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 +DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:33 [batch.py:51] router release req id 8 +INFO 06-24 20:28:33 [manager.py:224] router recive req id 8 cost time 0.10356712341308594 s +INFO 06-24 20:28:33 [manager.py:68] detokenization recv req id 8 cost time 0.10439300537109375 s +DEBUG 06-24 20:28:33 [manager.py:391] Prefill Batch: batch_id=179937542494342056592850181103454616182, time:1750768113.2155564s req_ids:[8] +DEBUG 06-24 20:28:33 [manager.py:391] +ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:216.18413925170898ms total_cost_time:216.20559692382812ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:11867 prompt_cache_len:5151 prompt_cache_ratio:0.4340608409876127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 +DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:33 [batch.py:51] router release req id 8 +INFO 06-24 20:28:33 [manager.py:224] router recive req id 8 cost time 0.10451388359069824 s +DEBUG 06-24 20:28:33 [manager.py:391] Prefill Batch: batch_id=211653993442948043769921238633971639543, time:1750768113.4037392s req_ids:[8] +DEBUG 06-24 20:28:33 [manager.py:391] +INFO 06-24 20:28:33 [manager.py:68] detokenization recv req id 8 cost time 0.10536503791809082 s +ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:431.15901947021484ms total_cost_time:431.1847686767578ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:11868 prompt_cache_len:5151 prompt_cache_ratio:0.4340242669362993 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 +DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:33 [batch.py:51] router release req id 8 +INFO 06-24 20:28:33 [manager.py:224] router recive req id 8 cost time 0.1039581298828125 s +INFO 06-24 20:28:33 [manager.py:68] detokenization recv req id 8 cost time 0.1048593521118164 s +DEBUG 06-24 20:28:33 [manager.py:391] Prefill Batch: batch_id=293945058288205504111290336290387204763, time:1750768113.8673863s req_ids:[8] +DEBUG 06-24 20:28:33 [manager.py:391] +ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:214.2784595489502ms total_cost_time:214.30230140686035ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:11869 prompt_cache_len:5151 prompt_cache_ratio:0.43398769904794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 +DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:33 [batch.py:51] router release req id 8 +INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.10369157791137695 s +INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10447406768798828 s +DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=6363734903778108591894042619698550698, time:1750768114.085345s req_ids:[8] +DEBUG 06-24 20:28:34 [manager.py:391] +ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:214.59150314331055ms total_cost_time:214.6134376525879ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:11870 prompt_cache_len:5151 prompt_cache_ratio:0.43395113732097723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 +DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:34 [batch.py:51] router release req id 8 +INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.106109619140625 s +INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10734701156616211 s +DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=205892406970596528245782018845836049778, time:1750768114.304136s req_ids:[8] +DEBUG 06-24 20:28:34 [manager.py:391] +ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:28:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 48521.220 tokens/s +DEBUG 06-24 20:28:34 [stats.py:37] Avg prompt tokens throughput: 48512.933 tokens/s +DEBUG 06-24 20:28:34 [stats.py:37] Avg generate tokens throughput: 8.287 tokens/s +INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:220.56055068969727ms total_cost_time:220.60585021972656ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11871 prompt_cache_len:5151 prompt_cache_ratio:0.43391458175385395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 +DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:34 [batch.py:51] router release req id 8 +INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.10538864135742188 s +INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10723495483398438 s +DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=220597745432292173285239366751729748883, time:1750768114.5218058s req_ids:[8] +DEBUG 06-24 20:28:34 [manager.py:391] +ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:222.35417366027832ms total_cost_time:222.37753868103027ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:11872 prompt_cache_len:5151 prompt_cache_ratio:0.4338780323450135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 +DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:34 [batch.py:51] router release req id 8 +INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.1039271354675293 s +INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10494041442871094 s +DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=303682631336364528079548651584249749075, time:1750768114.7535691s req_ids:[8] +DEBUG 06-24 20:28:34 [manager.py:391] +ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:215.80195426940918ms total_cost_time:215.82460403442383ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:11873 prompt_cache_len:5151 prompt_cache_ratio:0.4338414890928999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 +DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:34 [batch.py:51] router release req id 8 +INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.10386419296264648 s +INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10472226142883301 s +DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=133238998156980978862911398731284604320, time:1750768114.9762044s req_ids:[8] +DEBUG 06-24 20:28:34 [manager.py:391] +ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:219.33317184448242ms total_cost_time:219.35534477233887ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:11874 prompt_cache_len:5151 prompt_cache_ratio:0.43380495199595753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 +DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:35 [batch.py:51] router release req id 8 +INFO 06-24 20:28:35 [manager.py:224] router recive req id 8 cost time 0.10379481315612793 s +INFO 06-24 20:28:35 [manager.py:68] detokenization recv req id 8 cost time 0.10469818115234375 s +DEBUG 06-24 20:28:35 [manager.py:391] Prefill Batch: batch_id=178173195218545079165805977127711689883, time:1750768115.198263s req_ids:[8] +DEBUG 06-24 20:28:35 [manager.py:391] +ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:430.2067756652832ms total_cost_time:430.22775650024414ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11875 prompt_cache_len:5151 prompt_cache_ratio:0.4337684210526316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 +DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:35 [batch.py:51] router release req id 8 +INFO 06-24 20:28:35 [manager.py:224] router recive req id 8 cost time 0.10480284690856934 s +INFO 06-24 20:28:35 [manager.py:68] detokenization recv req id 8 cost time 0.10572147369384766 s +DEBUG 06-24 20:28:35 [manager.py:391] Prefill Batch: batch_id=158044971044148494786218747835784622104, time:1750768115.6323247s req_ids:[8] +DEBUG 06-24 20:28:35 [manager.py:391] +ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:220.4289436340332ms total_cost_time:220.45421600341797ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11876 prompt_cache_len:5151 prompt_cache_ratio:0.4337318962613675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 +DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:35 [batch.py:51] router release req id 8 +INFO 06-24 20:28:35 [manager.py:224] router recive req id 8 cost time 0.10471010208129883 s +INFO 06-24 20:28:35 [manager.py:68] detokenization recv req id 8 cost time 0.10570812225341797 s +DEBUG 06-24 20:28:35 [manager.py:391] Prefill Batch: batch_id=175923898943493976333115882125782362181, time:1750768115.8556015s req_ids:[8] +DEBUG 06-24 20:28:35 [manager.py:391] +ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:220.57223320007324ms total_cost_time:220.59965133666992ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:11877 prompt_cache_len:5151 prompt_cache_ratio:0.4336953776206113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 +DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:35 [batch.py:51] router release req id 8 +INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10354948043823242 s +INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10442376136779785 s +DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=291573683928380045270192115578318541141, time:1750768116.0778732s req_ids:[8] +DEBUG 06-24 20:28:36 [manager.py:391] +ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:218.96791458129883ms total_cost_time:218.9943790435791ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:11878 prompt_cache_len:5151 prompt_cache_ratio:0.4336588651288096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 +DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:36 [batch.py:51] router release req id 8 +INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10524845123291016 s +INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10628771781921387 s +DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=308021029786919301525346610842204144491, time:1750768116.2824674s req_ids:[8] +DEBUG 06-24 20:28:36 [manager.py:391] +ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:203.17339897155762ms total_cost_time:203.2005786895752ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:11879 prompt_cache_len:5151 prompt_cache_ratio:0.43362235878440947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 +DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:36 [batch.py:51] router release req id 8 +INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10391378402709961 s +INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10484695434570312 s +DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=87678905147441289569508797526573543352, time:1750768116.487401s req_ids:[8] +DEBUG 06-24 20:28:36 [manager.py:391] +ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:200.15239715576172ms total_cost_time:200.17147064208984ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:11880 prompt_cache_len:5151 prompt_cache_ratio:0.4335858585858586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 +DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:36 [batch.py:51] router release req id 8 +INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10352921485900879 s +INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10434770584106445 s +DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=1154044498906886573778504009953608319, time:1750768116.6910193s req_ids:[8] +DEBUG 06-24 20:28:36 [manager.py:391] +ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:206.97855949401855ms total_cost_time:206.9990634918213ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:11881 prompt_cache_len:5151 prompt_cache_ratio:0.4335493645316051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 +DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:36 [batch.py:51] router release req id 8 +INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.304229736328125 s +INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.3050069808959961 s +DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=315163521652213426658964823810151462492, time:1750768117.1118753s req_ids:[8] +DEBUG 06-24 20:28:37 [manager.py:391] +ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:410.48479080200195ms total_cost_time:410.5041027069092ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:11882 prompt_cache_len:5151 prompt_cache_ratio:0.43351287662009763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 +DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:37 [batch.py:51] router release req id 8 +INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10379481315612793 s +DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=264284800079406942670858333695686328181, time:1750768117.2995176s req_ids:[8] +DEBUG 06-24 20:28:37 [manager.py:391] +INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10478949546813965 s +ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:185.39714813232422ms total_cost_time:185.41622161865234ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:11883 prompt_cache_len:5151 prompt_cache_ratio:0.4334763948497854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 +DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:37 [batch.py:51] router release req id 8 +INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10296773910522461 s +INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10394597053527832 s +DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=111194200685913567443083064226978910299, time:1750768117.5203545s req_ids:[8] +DEBUG 06-24 20:28:37 [manager.py:391] +DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:217.53764152526855ms total_cost_time:217.5581455230713ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:11884 prompt_cache_len:5151 prompt_cache_ratio:0.4334399192191181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 +DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:37 [batch.py:51] router release req id 8 +INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10298776626586914 s +INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10373616218566895 s +DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=252069406791972656238954777312853159992, time:1750768117.7395692s req_ids:[8] +DEBUG 06-24 20:28:37 [manager.py:391] +ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:215.75617790222168ms total_cost_time:215.77763557434082ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:11885 prompt_cache_len:5151 prompt_cache_ratio:0.43340344972654604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 +DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:37 [batch.py:51] router release req id 8 +INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10303401947021484 s +INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10387301445007324 s +DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=139493725814063699601069818821416952512, time:1750768117.9583132s req_ids:[8] +DEBUG 06-24 20:28:37 [manager.py:391] +ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:216.84932708740234ms total_cost_time:216.86959266662598ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11886 prompt_cache_len:5151 prompt_cache_ratio:0.43336698637051996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 +DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:38 [batch.py:51] router release req id 8 +INFO 06-24 20:28:38 [manager.py:224] router recive req id 8 cost time 0.10387134552001953 s +INFO 06-24 20:28:38 [manager.py:68] detokenization recv req id 8 cost time 0.10461997985839844 s +DEBUG 06-24 20:28:38 [manager.py:391] Prefill Batch: batch_id=92878499258711925766107713668096769541, time:1750768118.1783955s req_ids:[8] +DEBUG 06-24 20:28:38 [manager.py:391] +ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:214.59698677062988ms total_cost_time:214.61868286132812ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:11887 prompt_cache_len:5151 prompt_cache_ratio:0.43333052914949105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 +DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:38 [batch.py:51] router release req id 8 +INFO 06-24 20:28:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:38 [manager.py:224] router recive req id 8 cost time 0.10263776779174805 s +INFO 06-24 20:28:38 [manager.py:68] detokenization recv req id 8 cost time 0.1034245491027832 s +DEBUG 06-24 20:28:38 [manager.py:391] Prefill Batch: batch_id=186727004882905897419108820902417162138, time:1750768118.3953588s req_ids:[8] +DEBUG 06-24 20:28:38 [manager.py:391] +ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:419.61145401000977ms total_cost_time:419.6295738220215ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:11888 prompt_cache_len:5151 prompt_cache_ratio:0.43329407806191117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 +DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:38 [batch.py:51] router release req id 8 +INFO 06-24 20:28:38 [manager.py:224] router recive req id 8 cost time 0.10374951362609863 s +INFO 06-24 20:28:38 [manager.py:68] detokenization recv req id 8 cost time 0.1044778823852539 s +DEBUG 06-24 20:28:38 [manager.py:391] Prefill Batch: batch_id=225027464847716798447343676528676637766, time:1750768118.8176684s req_ids:[8] +DEBUG 06-24 20:28:38 [manager.py:391] +ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:212.74209022521973ms total_cost_time:212.76211738586426ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11889 prompt_cache_len:5151 prompt_cache_ratio:0.43325763310623266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 +DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:38 [batch.py:51] router release req id 8 +INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10383486747741699 s +INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10457634925842285 s +DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=16074684672064765379546276517470713510, time:1750768119.0338006s req_ids:[8] +DEBUG 06-24 20:28:39 [manager.py:391] +ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:214.76483345031738ms total_cost_time:214.78557586669922ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:11890 prompt_cache_len:5151 prompt_cache_ratio:0.43322119428090833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 +DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:39 [batch.py:51] router release req id 8 +INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10288023948669434 s +INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10366606712341309 s +DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=285439775702852421388817878679544362556, time:1750768119.2514436s req_ids:[8] +DEBUG 06-24 20:28:39 [manager.py:391] +ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:214.60890769958496ms total_cost_time:214.6289348602295ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11891 prompt_cache_len:5151 prompt_cache_ratio:0.43318476158439156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 +DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:39 [batch.py:51] router release req id 8 +INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10285329818725586 s +INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10359406471252441 s +DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=318004118626866687502908502583474110159, time:1750768119.469981s req_ids:[8] +DEBUG 06-24 20:28:39 [manager.py:391] +ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:214.71118927001953ms total_cost_time:214.73217010498047ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11892 prompt_cache_len:5151 prompt_cache_ratio:0.4331483350151362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 +DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:39 [batch.py:51] router release req id 8 +INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10289382934570312 s +INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10366392135620117 s +DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=27786646989689724984413693468009177999, time:1750768119.6552756s req_ids:[8] +DEBUG 06-24 20:28:39 [manager.py:391] +ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:185.27674674987793ms total_cost_time:185.29653549194336ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11893 prompt_cache_len:5151 prompt_cache_ratio:0.43311191457159676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 +DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:39 [batch.py:51] router release req id 8 +INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10391473770141602 s +INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10465574264526367 s +DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=215372438020006911783971177387505560473, time:1750768119.8432872s req_ids:[8] +DEBUG 06-24 20:28:39 [manager.py:391] +ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:186.3267421722412ms total_cost_time:186.34605407714844ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:11894 prompt_cache_len:5151 prompt_cache_ratio:0.433075500252228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 +DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:39 [batch.py:51] router release req id 8 +INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10273075103759766 s +INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10346341133117676 s +DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=94149045766095927298669277190631441307, time:1750768120.031504s req_ids:[8] +DEBUG 06-24 20:28:40 [manager.py:391] +ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:396.63124084472656ms total_cost_time:396.6507911682129ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11895 prompt_cache_len:5151 prompt_cache_ratio:0.4330390920554855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 +DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:40 [batch.py:51] router release req id 8 +INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10391068458557129 s +INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10464334487915039 s +DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=294727548836427237896901735155172937731, time:1750768120.4312015s req_ids:[8] +DEBUG 06-24 20:28:40 [manager.py:391] +ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:185.64414978027344ms total_cost_time:185.66226959228516ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:11896 prompt_cache_len:5151 prompt_cache_ratio:0.43300268997982516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 +DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:40 [batch.py:51] router release req id 8 +INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10303568840026855 s +INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10377955436706543 s +DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=193815743549806285059070736392735113758, time:1750768120.617825s req_ids:[8] +DEBUG 06-24 20:28:40 [manager.py:391] +ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:184.70144271850586ms total_cost_time:184.7207546234131ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:11897 prompt_cache_len:5151 prompt_cache_ratio:0.43296629402370346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 +DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:40 [batch.py:51] router release req id 8 +INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10303282737731934 s +DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=66009073030139411667477631772325113051, time:1750768120.8048763s req_ids:[8] +DEBUG 06-24 20:28:40 [manager.py:391] +INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10388350486755371 s +DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:184.5862865447998ms total_cost_time:184.60583686828613ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11898 prompt_cache_len:5151 prompt_cache_ratio:0.4329299041855774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 +DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:40 [batch.py:51] router release req id 8 +INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10399484634399414 s +INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10473823547363281 s +DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=304931695665280690564269659223799389780, time:1750768120.9929843s req_ids:[8] +DEBUG 06-24 20:28:40 [manager.py:391] +ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:186.12146377563477ms total_cost_time:186.1422061920166ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:11899 prompt_cache_len:5151 prompt_cache_ratio:0.43289352046390456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 +DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:41 [batch.py:51] router release req id 8 +INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10398221015930176 s +DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=91869575234673034233369237604297197940, time:1750768121.1813273s req_ids:[8] +DEBUG 06-24 20:28:41 [manager.py:391] +INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.10483932495117188 s +ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:186.0513687133789ms total_cost_time:186.07115745544434ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11900 prompt_cache_len:5151 prompt_cache_ratio:0.4328571428571429 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 +DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:41 [batch.py:51] router release req id 8 +INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10289621353149414 s +INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.10368871688842773 s +DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=293735260567805192594385009315586009925, time:1750768121.3701365s req_ids:[8] +DEBUG 06-24 20:28:41 [manager.py:391] +ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:185.74285507202148ms total_cost_time:185.76383590698242ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11901 prompt_cache_len:5151 prompt_cache_ratio:0.43282077136375097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 +DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:41 [batch.py:51] router release req id 8 +INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10282731056213379 s +INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.10369467735290527 s +DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=133231803494484504470395985042425411248, time:1750768121.5576954s req_ids:[8] +DEBUG 06-24 20:28:41 [manager.py:391] +ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:406.8021774291992ms total_cost_time:406.82244300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11902 prompt_cache_len:5151 prompt_cache_ratio:0.43278440598218787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 +DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:41 [batch.py:51] router release req id 8 +INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10396695137023926 s +DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=14362376582423461109887799211070417666, time:1750768121.967087s req_ids:[8] +DEBUG 06-24 20:28:41 [manager.py:391] +INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.1047511100769043 s +ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:185.03522872924805ms total_cost_time:185.05620956420898ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11903 prompt_cache_len:5151 prompt_cache_ratio:0.4327480467109132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 +DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:42 [batch.py:51] router release req id 8 +INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10300755500793457 s +INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10376834869384766 s +DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=196042952659276185954510919503217220697, time:1750768122.187142s req_ids:[8] +DEBUG 06-24 20:28:42 [manager.py:391] +ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:216.72797203063965ms total_cost_time:216.74680709838867ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:11904 prompt_cache_len:5151 prompt_cache_ratio:0.4327116935483871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 +DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:42 [batch.py:51] router release req id 8 +INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10305380821228027 s +DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=51072148814772939830098363549592479794, time:1750768122.3728938s req_ids:[8] +DEBUG 06-24 20:28:42 [manager.py:391] +INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10382604598999023 s +ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:183.90846252441406ms total_cost_time:183.9296817779541ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:11905 prompt_cache_len:5151 prompt_cache_ratio:0.43267534649307016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 +DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:42 [batch.py:51] router release req id 8 +INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10294365882873535 s +INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10368561744689941 s +DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=179196303910933287213935143701919788889, time:1750768122.5592992s req_ids:[8] +DEBUG 06-24 20:28:42 [manager.py:391] +DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:184.1733455657959ms total_cost_time:184.19241905212402ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:11906 prompt_cache_len:5151 prompt_cache_ratio:0.4326390055434235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 +DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:42 [batch.py:51] router release req id 8 +INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10312676429748535 s +INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.1038978099822998 s +DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=147129783462552214023951574258481299419, time:1750768122.7792697s req_ids:[8] +DEBUG 06-24 20:28:42 [manager.py:391] +ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:217.94939041137695ms total_cost_time:217.96894073486328ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11907 prompt_cache_len:5151 prompt_cache_ratio:0.4326026706979088 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 +DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:42 [batch.py:51] router release req id 8 +INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10310220718383789 s +INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10386800765991211 s +DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=249285835926311955843662330564777077985, time:1750768122.9995666s req_ids:[8] +DEBUG 06-24 20:28:42 [manager.py:391] +ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:217.47827529907227ms total_cost_time:217.4971103668213ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:11908 prompt_cache_len:5151 prompt_cache_ratio:0.4325663419549882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 +DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:43 [batch.py:51] router release req id 8 +INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.1026608943939209 s +INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.10341238975524902 s +DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=229628226113436194133592346352402657513, time:1750768123.2067904s req_ids:[8] +DEBUG 06-24 20:28:43 [manager.py:391] +ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:376.5110969543457ms total_cost_time:376.53112411499023ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11909 prompt_cache_len:5151 prompt_cache_ratio:0.4325300193131245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 +DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:43 [batch.py:51] router release req id 8 +INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.10271787643432617 s +INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.1034994125366211 s +DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=219670195906739416862186270876773721924, time:1750768123.586386s req_ids:[8] +DEBUG 06-24 20:28:43 [manager.py:391] +ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:186.95306777954102ms total_cost_time:186.97142601013184ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:11910 prompt_cache_len:5151 prompt_cache_ratio:0.43249370277078086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 +DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:43 [batch.py:51] router release req id 8 +INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.10295701026916504 s +INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.10371160507202148 s +DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=109108641233032409777627110540003684663, time:1750768123.7755115s req_ids:[8] +DEBUG 06-24 20:28:43 [manager.py:391] +ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:187.1178150177002ms total_cost_time:187.1469020843506ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:11911 prompt_cache_len:5151 prompt_cache_ratio:0.43245739232642094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 +DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:43 [batch.py:51] router release req id 8 +INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.10373187065124512 s +INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.10450029373168945 s +DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=126136171161668277662895032290498948939, time:1750768123.9630594s req_ids:[8] +DEBUG 06-24 20:28:43 [manager.py:391] +ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:185.05454063415527ms total_cost_time:185.0748062133789ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11912 prompt_cache_len:5151 prompt_cache_ratio:0.4324210879785091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 +DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:44 [batch.py:51] router release req id 8 +INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.1041414737701416 s +INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.1050574779510498 s +DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=101904346663866664492163981770199337905, time:1750768124.1519098s req_ids:[8] +DEBUG 06-24 20:28:44 [manager.py:391] +ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:185.77861785888672ms total_cost_time:185.79959869384766ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11913 prompt_cache_len:5151 prompt_cache_ratio:0.4323847897255099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 +DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:44 [batch.py:51] router release req id 8 +INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.10367274284362793 s +INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.10445904731750488 s +DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=105959152308765602551435942516272708875, time:1750768124.3401246s req_ids:[8] +DEBUG 06-24 20:28:44 [manager.py:391] +ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:28:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 51072.416 tokens/s +DEBUG 06-24 20:28:44 [stats.py:37] Avg prompt tokens throughput: 51063.828 tokens/s +DEBUG 06-24 20:28:44 [stats.py:37] Avg generate tokens throughput: 8.587 tokens/s +INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:186.10668182373047ms total_cost_time:186.1276626586914ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11914 prompt_cache_len:5151 prompt_cache_ratio:0.4323484975658889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 +DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:44 [batch.py:51] router release req id 8 +INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.1065988540649414 s +INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.10744047164916992 s +DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=150338943219137824466646260291292578170, time:1750768124.5294368s req_ids:[8] +DEBUG 06-24 20:28:44 [manager.py:391] +ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:181.13470077514648ms total_cost_time:181.16521835327148ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:11915 prompt_cache_len:5151 prompt_cache_ratio:0.4323122114981116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 +DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:44 [batch.py:51] router release req id 8 +INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.20542168617248535 s +INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.20613527297973633 s +DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=217487315321687900054528096871513963017, time:1750768124.8341997s req_ids:[8] +DEBUG 06-24 20:28:44 [manager.py:391] +ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:285.7646942138672ms total_cost_time:285.7856750488281ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11916 prompt_cache_len:5151 prompt_cache_ratio:0.43227593152064453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 +DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:44 [batch.py:51] router release req id 8 +INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.10442137718200684 s +INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.10544300079345703 s +DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=290524089958705897788470446060157217293, time:1750768124.9884398s req_ids:[8] +DEBUG 06-24 20:28:44 [manager.py:391] +ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:163.35177421569824ms total_cost_time:163.37180137634277ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11917 prompt_cache_len:5151 prompt_cache_ratio:0.4322396576319544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 +DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:45 [batch.py:51] router release req id 8 +INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10406017303466797 s +INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10481667518615723 s +DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=30692006335710302791810082906723519783, time:1750768125.1534283s req_ids:[8] +DEBUG 06-24 20:28:45 [manager.py:391] +ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:168.41912269592285ms total_cost_time:168.43771934509277ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:11918 prompt_cache_len:5151 prompt_cache_ratio:0.4322033898305085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 +DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:45 [batch.py:51] router release req id 8 +INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10372376441955566 s +INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10448122024536133 s +DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=72490887313163765474611746369139095922, time:1750768125.3297367s req_ids:[8] +DEBUG 06-24 20:28:45 [manager.py:391] +ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:156.89325332641602ms total_cost_time:156.91423416137695ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11919 prompt_cache_len:5151 prompt_cache_ratio:0.43216712811477476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 +DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:45 [batch.py:51] router release req id 8 +INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10349678993225098 s +INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10440278053283691 s +DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=104551186219817458142548401733315601718, time:1750768125.4851012s req_ids:[8] +DEBUG 06-24 20:28:45 [manager.py:391] +ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:184.56506729125977ms total_cost_time:184.5846176147461ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11920 prompt_cache_len:5151 prompt_cache_ratio:0.43213087248322146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 +DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:45 [batch.py:51] router release req id 8 +INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10286378860473633 s +INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10376334190368652 s +DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=285439590067661428086108302901341192029, time:1750768125.67804s req_ids:[8] +DEBUG 06-24 20:28:45 [manager.py:391] +ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:191.59483909606934ms total_cost_time:191.61677360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:11921 prompt_cache_len:5151 prompt_cache_ratio:0.4320946229343176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 +DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:45 [batch.py:51] router release req id 8 +INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10417461395263672 s +INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.1050724983215332 s +DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=330880769612056719860483041211692931428, time:1750768125.8728566s req_ids:[8] +DEBUG 06-24 20:28:45 [manager.py:391] +ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:371.7951774597168ms total_cost_time:371.8152046203613ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11922 prompt_cache_len:5151 prompt_cache_ratio:0.43205837946653247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 +DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:46 [batch.py:51] router release req id 8 +INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10330057144165039 s +INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10409116744995117 s +DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=101432892994023791577161635378135733177, time:1750768126.254141s req_ids:[8] +DEBUG 06-24 20:28:46 [manager.py:391] +ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:187.8063678741455ms total_cost_time:187.82806396484375ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:11923 prompt_cache_len:5151 prompt_cache_ratio:0.432022142078336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 +DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:46 [batch.py:51] router release req id 8 +INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10390281677246094 s +INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.1046912670135498 s +DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=194569489297379704269325406132162936346, time:1750768126.4438913s req_ids:[8] +DEBUG 06-24 20:28:46 [manager.py:391] +ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:184.4046115875244ms total_cost_time:184.42440032958984ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11924 prompt_cache_len:5151 prompt_cache_ratio:0.4319859107681986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 +DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:46 [batch.py:51] router release req id 8 +INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10378146171569824 s +INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10457301139831543 s +DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=179306132277893662537467714436218454634, time:1750768126.6318889s req_ids:[8] +DEBUG 06-24 20:28:46 [manager.py:391] +DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:185.32323837280273ms total_cost_time:185.34207344055176ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:11925 prompt_cache_len:5151 prompt_cache_ratio:0.4319496855345912 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 +DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:46 [batch.py:51] router release req id 8 +INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.1040191650390625 s +INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10579252243041992 s +DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=98198369865994734633178925837271803866, time:1750768126.818987s req_ids:[8] +DEBUG 06-24 20:28:46 [manager.py:391] +ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:184.4651699066162ms total_cost_time:184.4959259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:11926 prompt_cache_len:5151 prompt_cache_ratio:0.43191346637598527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 +DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:46 [batch.py:51] router release req id 8 +INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10430574417114258 s +INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10512351989746094 s +DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=39941762660234327624813944061963785239, time:1750768127.0060763s req_ids:[8] +DEBUG 06-24 20:28:47 [manager.py:391] +INFO 06-24 20:28:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:184.53502655029297ms total_cost_time:184.56006050109863ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:11927 prompt_cache_len:5151 prompt_cache_ratio:0.4318772532908527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 +DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:47 [batch.py:51] router release req id 8 +INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10611820220947266 s +INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.10719561576843262 s +DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=254371351300487063249411515892349847721, time:1750768127.1945481s req_ids:[8] +DEBUG 06-24 20:28:47 [manager.py:391] +ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:200.6247043609619ms total_cost_time:200.6673812866211ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11928 prompt_cache_len:5151 prompt_cache_ratio:0.431841046277666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 +DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:47 [batch.py:51] router release req id 8 +INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10622334480285645 s +INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.10711431503295898 s +DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=180769309607235874900552505442133146658, time:1750768127.390995s req_ids:[8] +DEBUG 06-24 20:28:47 [manager.py:391] +ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:359.03429985046387ms total_cost_time:359.07840728759766ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11929 prompt_cache_len:5151 prompt_cache_ratio:0.43180484533489816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 +DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:47 [batch.py:51] router release req id 8 +INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10712313652038574 s +INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.10915040969848633 s +DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=183451054074156367393599271568668816449, time:1750768127.7498627s req_ids:[8] +DEBUG 06-24 20:28:47 [manager.py:391] +ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:186.98954582214355ms total_cost_time:187.02030181884766ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:11930 prompt_cache_len:5151 prompt_cache_ratio:0.43176865046102264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 +DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:47 [batch.py:51] router release req id 8 +INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10822248458862305 s +INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.11017537117004395 s +DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=262289064727497717295385751286343736738, time:1750768127.954026s req_ids:[8] +DEBUG 06-24 20:28:47 [manager.py:391] +ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:214.22171592712402ms total_cost_time:214.26773071289062ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11931 prompt_cache_len:5151 prompt_cache_ratio:0.43173246165451346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 +DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:48 [batch.py:51] router release req id 8 +INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.1095893383026123 s +INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11162662506103516 s +DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=10955442552283379595091094048822561627, time:1750768128.169632s req_ids:[8] +DEBUG 06-24 20:28:48 [manager.py:391] +ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:211.67969703674316ms total_cost_time:211.72380447387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11932 prompt_cache_len:5151 prompt_cache_ratio:0.4316962789138451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 +DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:48 [batch.py:51] router release req id 8 +INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.1094825267791748 s +INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11165213584899902 s +DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=30701902382265071494347345381379766020, time:1750768128.3848386s req_ids:[8] +DEBUG 06-24 20:28:48 [manager.py:391] +ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:211.04764938354492ms total_cost_time:211.090087890625ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11933 prompt_cache_len:5151 prompt_cache_ratio:0.4316601022374927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 +DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:48 [batch.py:51] router release req id 8 +INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.10813164710998535 s +INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11011147499084473 s +DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=264220764458383977328336085653241699902, time:1750768128.6066256s req_ids:[8] +DEBUG 06-24 20:28:48 [manager.py:391] +ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:212.75782585144043ms total_cost_time:212.81981468200684ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:11934 prompt_cache_len:5151 prompt_cache_ratio:0.43162393162393164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 +DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:48 [batch.py:51] router release req id 8 +INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.1090703010559082 s +INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11097431182861328 s +DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=226970020615295283576571519100874197849, time:1750768128.82386s req_ids:[8] +DEBUG 06-24 20:28:48 [manager.py:391] +ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:225.9652614593506ms total_cost_time:226.00626945495605ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:11935 prompt_cache_len:5151 prompt_cache_ratio:0.43158776707163804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 +DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:48 [batch.py:51] router release req id 8 +INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10972976684570312 s +INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.11196017265319824 s +DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=56558388786481784097447248110183976064, time:1750768129.0512826s req_ids:[8] +DEBUG 06-24 20:28:49 [manager.py:391] +ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:373.2337951660156ms total_cost_time:373.2795715332031ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11936 prompt_cache_len:5151 prompt_cache_ratio:0.4315516085790885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 +DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:49 [batch.py:51] router release req id 8 +INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10651302337646484 s +INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10838103294372559 s +DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=249677948696364611735669091539764887871, time:1750768129.4364083s req_ids:[8] +DEBUG 06-24 20:28:49 [manager.py:391] +ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:162.73903846740723ms total_cost_time:162.78576850891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11937 prompt_cache_len:5151 prompt_cache_ratio:0.43151545614476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 +DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:49 [batch.py:51] router release req id 8 +INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.1071782112121582 s +INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10917830467224121 s +DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=193901099944717543853453843433311174277, time:1750768129.597522s req_ids:[8] +DEBUG 06-24 20:28:49 [manager.py:391] +ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:192.71159172058105ms total_cost_time:192.75712966918945ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11938 prompt_cache_len:5151 prompt_cache_ratio:0.43147930976713017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 +DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:49 [batch.py:51] router release req id 8 +INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10732078552246094 s +INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10909366607666016 s +DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=105622941769307149745216722497816046707, time:1750768129.8114686s req_ids:[8] +DEBUG 06-24 20:28:49 [manager.py:391] +ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:187.64710426330566ms total_cost_time:187.69335746765137ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11939 prompt_cache_len:5151 prompt_cache_ratio:0.4314431694446771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 +DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:49 [batch.py:51] router release req id 8 +INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10663914680480957 s +INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10769391059875488 s +DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=32829283500757086317448305008622266516, time:1750768129.999136s req_ids:[8] +DEBUG 06-24 20:28:49 [manager.py:391] +ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:184.11564826965332ms total_cost_time:184.16166305541992ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11940 prompt_cache_len:5151 prompt_cache_ratio:0.4314070351758794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 +DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:50 [batch.py:51] router release req id 8 +INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10706734657287598 s +INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.10807466506958008 s +DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=218307670817421332996924828182541919691, time:1750768130.1863377s req_ids:[8] +DEBUG 06-24 20:28:50 [manager.py:391] +ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:193.1002140045166ms total_cost_time:193.16411018371582ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:11941 prompt_cache_len:5151 prompt_cache_ratio:0.43137090695921615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 +DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:50 [batch.py:51] router release req id 8 +INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10703134536743164 s +INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.1085963249206543 s +DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=8075253747766070727967962765713576120, time:1750768130.3880155s req_ids:[8] +DEBUG 06-24 20:28:50 [manager.py:391] +ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:204.17213439941406ms total_cost_time:204.21552658081055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11942 prompt_cache_len:5151 prompt_cache_ratio:0.43133478479316695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 +DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:50 [batch.py:51] router release req id 8 +INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10860085487365723 s +INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.11028790473937988 s +DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=298479681842835287749325519964436205646, time:1750768130.596268s req_ids:[8] +DEBUG 06-24 20:28:50 [manager.py:391] +ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:362.25199699401855ms total_cost_time:362.29538917541504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11943 prompt_cache_len:5151 prompt_cache_ratio:0.431298668676212 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 +DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:50 [batch.py:51] router release req id 8 +INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10841035842895508 s +DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=177426162998192559358030897943835490159, time:1750768130.957746s req_ids:[8] +DEBUG 06-24 20:28:50 [manager.py:391] +INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.11046886444091797 s +ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:197.24130630493164ms total_cost_time:197.2970962524414ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:11944 prompt_cache_len:5151 prompt_cache_ratio:0.4312625586068319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 +DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:51 [batch.py:51] router release req id 8 +INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10709810256958008 s +INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.10895037651062012 s +DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=249981216753730869745852507468848513451, time:1750768131.16743s req_ids:[8] +DEBUG 06-24 20:28:51 [manager.py:391] +ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:192.49606132507324ms total_cost_time:192.53969192504883ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11945 prompt_cache_len:5151 prompt_cache_ratio:0.4312264545835077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 +DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:51 [batch.py:51] router release req id 8 +INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10904741287231445 s +INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.11110615730285645 s +DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=4334819489007811238289904869235204874, time:1750768131.3669422s req_ids:[8] +DEBUG 06-24 20:28:51 [manager.py:391] +ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:209.0611457824707ms total_cost_time:209.1076374053955ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11946 prompt_cache_len:5151 prompt_cache_ratio:0.43119035660472127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 +DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:51 [batch.py:51] router release req id 8 +INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10863590240478516 s +INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.11068415641784668 s +DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=187049276141074335429132119649016305600, time:1750768131.5853672s req_ids:[8] +DEBUG 06-24 20:28:51 [manager.py:391] +ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:204.3013572692871ms total_cost_time:204.3440341949463ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11947 prompt_cache_len:5151 prompt_cache_ratio:0.43115426466895457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 +DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:51 [batch.py:51] router release req id 8 +INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10752534866333008 s +INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.10944914817810059 s +DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=32083930597534349419754367776652719316, time:1750768131.7934196s req_ids:[8] +DEBUG 06-24 20:28:51 [manager.py:391] +ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:207.75246620178223ms total_cost_time:207.7958583831787ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11948 prompt_cache_len:5151 prompt_cache_ratio:0.43111817877469033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 +DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:51 [batch.py:51] router release req id 8 +INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10822105407714844 s +INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s +DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=82932988976142750099513244572096112892, time:1750768132.014584s req_ids:[8] +DEBUG 06-24 20:28:52 [manager.py:391] +ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:216.16816520690918ms total_cost_time:216.21298789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11949 prompt_cache_len:5151 prompt_cache_ratio:0.43108209892041177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 +DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:52 [batch.py:51] router release req id 8 +INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10789871215820312 s +INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.10982251167297363 s +DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=53293644116887765529528007494403420690, time:1750768132.2312596s req_ids:[8] +DEBUG 06-24 20:28:52 [manager.py:391] +DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:373.5618591308594ms total_cost_time:373.60668182373047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11950 prompt_cache_len:5151 prompt_cache_ratio:0.4310460251046025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 +DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:52 [batch.py:51] router release req id 8 +INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10766887664794922 s +INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.10986828804016113 s +DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=299085699666324630620667964424971663928, time:1750768132.6142173s req_ids:[8] +DEBUG 06-24 20:28:52 [manager.py:391] +ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:213.83905410766602ms total_cost_time:213.88912200927734ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:11951 prompt_cache_len:5151 prompt_cache_ratio:0.4310099573257468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 +DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:52 [batch.py:51] router release req id 8 +INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10742497444152832 s +INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.10931229591369629 s +DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=144798307928284488434488280874079884879, time:1750768132.8307838s req_ids:[8] +DEBUG 06-24 20:28:52 [manager.py:391] +ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:210.98780632019043ms total_cost_time:211.0309600830078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11952 prompt_cache_len:5151 prompt_cache_ratio:0.4309738955823293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 +DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:52 [batch.py:51] router release req id 8 +INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10730409622192383 s +INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.10931992530822754 s +DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=283566256457937003956700817055337061115, time:1750768133.052218s req_ids:[8] +DEBUG 06-24 20:28:53 [manager.py:391] +ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:209.32388305664062ms total_cost_time:209.37156677246094ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:11953 prompt_cache_len:5151 prompt_cache_ratio:0.4309378398728353 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 +DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:53 [batch.py:51] router release req id 8 +INFO 06-24 20:28:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s +INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.11046814918518066 s +DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=321079679069355810564997312921033860538, time:1750768133.2700834s req_ids:[8] +DEBUG 06-24 20:28:53 [manager.py:391] +INFO 06-24 20:28:53 [manager.py:620] left req id 8can release False refcount 4 +ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:201.04026794433594ms total_cost_time:201.0631561279297ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:11954 prompt_cache_len:5151 prompt_cache_ratio:0.43090179019575037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 +DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:53 [batch.py:51] router release req id 8 +INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10402393341064453 s +INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.10510063171386719 s +DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=214015021856281163025284845259801141914, time:1750768133.4948883s req_ids:[8] +DEBUG 06-24 20:28:53 [manager.py:391] +ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:215.09408950805664ms total_cost_time:215.15870094299316ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:11955 prompt_cache_len:5151 prompt_cache_ratio:0.43086574654956084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 +DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:53 [batch.py:51] router release req id 8 +INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10417962074279785 s +INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.1058039665222168 s +DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=278922859122595349551139641541674955634, time:1750768133.7029865s req_ids:[8] +DEBUG 06-24 20:28:53 [manager.py:391] +ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:208.16516876220703ms total_cost_time:208.21022987365723ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11956 prompt_cache_len:5151 prompt_cache_ratio:0.4308297089327534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 +DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:53 [batch.py:51] router release req id 8 +INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10745906829833984 s +INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.10846972465515137 s +DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=265470482435672432438706593366940899124, time:1750768133.9040763s req_ids:[8] +DEBUG 06-24 20:28:53 [manager.py:391] +ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:355.8318614959717ms total_cost_time:355.87477684020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11957 prompt_cache_len:5151 prompt_cache_ratio:0.43079367734381535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 +DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:54 [batch.py:51] router release req id 8 +INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10811686515808105 s +INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.10998415946960449 s +DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=233804298028408121718787241589612477328, time:1750768134.2658517s req_ids:[8] +DEBUG 06-24 20:28:54 [manager.py:391] +ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:199.7823715209961ms total_cost_time:199.82671737670898ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11958 prompt_cache_len:5151 prompt_cache_ratio:0.4307576517812343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 +DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:54 [batch.py:51] router release req id 8 +INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10878205299377441 s +INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.11061215400695801 s +DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=70171910777716891133016673777406270311, time:1750768134.4771082s req_ids:[8] +DEBUG 06-24 20:28:54 [manager.py:391] +DEBUG 06-24 20:28:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 53321.297 tokens/s +DEBUG 06-24 20:28:54 [stats.py:37] Avg prompt tokens throughput: 53312.464 tokens/s +DEBUG 06-24 20:28:54 [stats.py:37] Avg generate tokens throughput: 8.833 tokens/s +ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:215.33989906311035ms total_cost_time:215.38352966308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11959 prompt_cache_len:5151 prompt_cache_ratio:0.43072163224349863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 +DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:54 [batch.py:51] router release req id 8 +INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10713386535644531 s +INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.10915851593017578 s +DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=231323215789748381961888343141972574034, time:1750768134.6927128s req_ids:[8] +DEBUG 06-24 20:28:54 [manager.py:391] +ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:205.79195022583008ms total_cost_time:205.83748817443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11960 prompt_cache_len:5151 prompt_cache_ratio:0.430685618729097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 +DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:54 [batch.py:51] router release req id 8 +INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10983586311340332 s +INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.11195635795593262 s +DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=238224137516115694925448643174657560197, time:1750768134.911102s req_ids:[8] +DEBUG 06-24 20:28:54 [manager.py:391] +ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:216.70222282409668ms total_cost_time:216.74561500549316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11961 prompt_cache_len:5151 prompt_cache_ratio:0.4306496112365187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 +DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:55 [batch.py:51] router release req id 8 +INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10788154602050781 s +INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.10985636711120605 s +DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=15819547354600753324859031452129940274, time:1750768135.1281812s req_ids:[8] +DEBUG 06-24 20:28:55 [manager.py:391] +ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:211.52687072753906ms total_cost_time:211.57121658325195ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11962 prompt_cache_len:5151 prompt_cache_ratio:0.43061360976425345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 +DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:55 [batch.py:51] router release req id 8 +INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10875082015991211 s +INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.1110377311706543 s +DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=95043965739271115239721168430332272280, time:1750768135.3508294s req_ids:[8] +DEBUG 06-24 20:28:55 [manager.py:391] +ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:212.9056453704834ms total_cost_time:212.95642852783203ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:11963 prompt_cache_len:5151 prompt_cache_ratio:0.4305776143107916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 +DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:55 [batch.py:51] router release req id 8 +INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10718369483947754 s +INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.1090691089630127 s +DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=258777417804640485526976366306366551919, time:1750768135.563974s req_ids:[8] +DEBUG 06-24 20:28:55 [manager.py:391] +ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:367.98858642578125ms total_cost_time:368.03293228149414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11964 prompt_cache_len:5151 prompt_cache_ratio:0.4305416248746239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 +DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:55 [batch.py:51] router release req id 8 +INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10790467262268066 s +INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.10987687110900879 s +DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=273973186652445984431349803298722936564, time:1750768135.9396412s req_ids:[8] +DEBUG 06-24 20:28:55 [manager.py:391] +ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:202.00705528259277ms total_cost_time:202.04973220825195ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11965 prompt_cache_len:5151 prompt_cache_ratio:0.4305056414542415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 +DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:56 [batch.py:51] router release req id 8 +INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.10832691192626953 s +INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s +DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=300113675189918618686274143071557316507, time:1750768136.1523914s req_ids:[8] +DEBUG 06-24 20:28:56 [manager.py:391] +ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:212.8605842590332ms total_cost_time:212.9039764404297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11966 prompt_cache_len:5151 prompt_cache_ratio:0.4304696640481364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 +DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:56 [batch.py:51] router release req id 8 +INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.10562682151794434 s +INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.10763883590698242 s +DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=283607000119770431190850984979214755571, time:1750768136.3669546s req_ids:[8] +DEBUG 06-24 20:28:56 [manager.py:391] +ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:211.67397499084473ms total_cost_time:211.71903610229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11967 prompt_cache_len:5151 prompt_cache_ratio:0.4304336926548007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 +DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:56 [batch.py:51] router release req id 8 +INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.11092638969421387 s +INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.11307263374328613 s +DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=313994612745509528983755209127736581597, time:1750768136.588752s req_ids:[8] +DEBUG 06-24 20:28:56 [manager.py:391] +ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:210.4470729827881ms total_cost_time:210.5412483215332ms,out_token_counter:1 mean_per_token_cost_time: 0.09417533874511719ms prompt_token_num:11968 prompt_cache_len:5151 prompt_cache_ratio:0.4303977272727273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 +DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:56 [batch.py:51] router release req id 8 +INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.10787248611450195 s +INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.10982179641723633 s +DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=314901115796672256137962829382313540444, time:1750768136.8019927s req_ids:[8] +DEBUG 06-24 20:28:56 [manager.py:391] +ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:206.85744285583496ms total_cost_time:206.9244384765625ms,out_token_counter:1 mean_per_token_cost_time: 0.06699562072753906ms prompt_token_num:11969 prompt_cache_len:5151 prompt_cache_ratio:0.43036176790040936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 +DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:56 [batch.py:51] router release req id 8 +INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10453939437866211 s +INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.10548019409179688 s +DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=16478308163660516422606655363763646896, time:1750768137.0170758s req_ids:[8] +DEBUG 06-24 20:28:57 [manager.py:391] +ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:348.22535514831543ms total_cost_time:348.27208518981934ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11970 prompt_cache_len:5151 prompt_cache_ratio:0.43032581453634083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 +DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:57 [batch.py:51] router release req id 8 +INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10858654975891113 s +INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.11052155494689941 s +DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=263853919847717316374060382865263945853, time:1750768137.3716505s req_ids:[8] +DEBUG 06-24 20:28:57 [manager.py:391] +ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:202.7304172515869ms total_cost_time:202.7742862701416ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11971 prompt_cache_len:5151 prompt_cache_ratio:0.43028986717901596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 +DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:57 [batch.py:51] router release req id 8 +INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10835957527160645 s +INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.11027741432189941 s +DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=193372557165191265446377080303334493274, time:1750768137.587858s req_ids:[8] +DEBUG 06-24 20:28:57 [manager.py:391] +ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:216.3369655609131ms total_cost_time:216.39394760131836ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:11972 prompt_cache_len:5151 prompt_cache_ratio:0.4302539258269295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 +DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:57 [batch.py:51] router release req id 8 +INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10865283012390137 s +INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.11002635955810547 s +DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=118579312505353612546134618222646505980, time:1750768137.8058567s req_ids:[8] +DEBUG 06-24 20:28:57 [manager.py:391] +ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:206.66933059692383ms total_cost_time:206.71439170837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11973 prompt_cache_len:5151 prompt_cache_ratio:0.4302179904785768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 +DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:57 [batch.py:51] router release req id 8 +INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.10770940780639648 s +INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.10975456237792969 s +DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=79815428260900973984936112290918699288, time:1750768138.0176246s req_ids:[8] +DEBUG 06-24 20:28:58 [manager.py:391] +ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:194.23246383666992ms total_cost_time:194.2763328552246ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11974 prompt_cache_len:5151 prompt_cache_ratio:0.43018206113245366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 +DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:58 [batch.py:51] router release req id 8 +INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.1081693172454834 s +INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.11011099815368652 s +DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=141026568049117553427483524231658933207, time:1750768138.2232552s req_ids:[8] +DEBUG 06-24 20:28:58 [manager.py:391] +DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:207.01980590820312ms total_cost_time:207.0631980895996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11975 prompt_cache_len:5151 prompt_cache_ratio:0.43014613778705635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 +DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:58 [batch.py:51] router release req id 8 +INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.1073915958404541 s +INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.1090843677520752 s +DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=55606224309751468707965949573460652671, time:1750768138.4412284s req_ids:[8] +DEBUG 06-24 20:28:58 [manager.py:391] +ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:210.4339599609375ms total_cost_time:210.47568321228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11976 prompt_cache_len:5151 prompt_cache_ratio:0.43011022044088176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 +DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:58 [batch.py:51] router release req id 8 +INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.10776519775390625 s +INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.10976004600524902 s +DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=152217960475603516027913100532359953267, time:1750768138.653451s req_ids:[8] +DEBUG 06-24 20:28:58 [manager.py:391] +ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:345.16239166259766ms total_cost_time:345.20983695983887ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:11977 prompt_cache_len:5151 prompt_cache_ratio:0.43007430909242716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 +DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:58 [batch.py:51] router release req id 8 +INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.10803437232971191 s +INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.10989212989807129 s +DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=48966923906306713373175589894371429371, time:1750768139.0058842s req_ids:[8] +DEBUG 06-24 20:28:59 [manager.py:391] +ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:205.68132400512695ms total_cost_time:205.72543144226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11978 prompt_cache_len:5151 prompt_cache_ratio:0.43003840374019037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 +DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:59 [batch.py:51] router release req id 8 +INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.10694766044616699 s +INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.10892820358276367 s +DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=216320136538372147515387893965576566722, time:1750768139.2158518s req_ids:[8] +DEBUG 06-24 20:28:59 [manager.py:391] +ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:222.61357307434082ms total_cost_time:222.6581573486328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11979 prompt_cache_len:5151 prompt_cache_ratio:0.4300025043826697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 +DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:59 [batch.py:51] router release req id 8 +INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.10758781433105469 s +INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.1094970703125 s +DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=331044038621114045701302461255031648443, time:1750768139.448361s req_ids:[8] +DEBUG 06-24 20:28:59 [manager.py:391] +ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:209.8381519317627ms total_cost_time:209.8689079284668ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:11980 prompt_cache_len:5151 prompt_cache_ratio:0.4299666110183639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 +DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:59 [batch.py:51] router release req id 8 +INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.1075432300567627 s +INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.10949993133544922 s +DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=113667671425225498233490098555284725959, time:1750768139.671358s req_ids:[8] +DEBUG 06-24 20:28:59 [manager.py:391] +ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:230.69238662719727ms total_cost_time:230.73816299438477ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11981 prompt_cache_len:5151 prompt_cache_ratio:0.42993072364577245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 +DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:28:59 [batch.py:51] router release req id 8 +INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.1069028377532959 s +INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.10873532295227051 s +DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=146945229886474275764179234189008314897, time:1750768139.9196985s req_ids:[8] +DEBUG 06-24 20:28:59 [manager.py:391] +ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:225.06237030029297ms total_cost_time:225.0981330871582ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:11982 prompt_cache_len:5151 prompt_cache_ratio:0.4298948422633951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 +DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:00 [batch.py:51] router release req id 8 +INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10735011100769043 s +INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.10937213897705078 s +DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=325836289117998328200526547618961335684, time:1750768140.1273289s req_ids:[8] +DEBUG 06-24 20:29:00 [manager.py:391] +ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:206.91776275634766ms total_cost_time:206.97546005249023ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:11983 prompt_cache_len:5151 prompt_cache_ratio:0.42985896686973213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 +DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:00 [batch.py:51] router release req id 8 +INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10825681686401367 s +INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.11023354530334473 s +DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=105004664256981886755727146224252441224, time:1750768140.342499s req_ids:[8] +DEBUG 06-24 20:29:00 [manager.py:391] +ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:380.04326820373535ms total_cost_time:380.07044792175293ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:11984 prompt_cache_len:5151 prompt_cache_ratio:0.4298230974632844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 +DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:00 [batch.py:51] router release req id 8 +INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10687518119812012 s +INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.10879969596862793 s +DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=318198177319329947172993549742013383874, time:1750768140.728171s req_ids:[8] +DEBUG 06-24 20:29:00 [manager.py:391] +ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:211.85755729675293ms total_cost_time:211.90452575683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:11985 prompt_cache_len:5151 prompt_cache_ratio:0.4297872340425532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 +DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:00 [batch.py:51] router release req id 8 +INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10910916328430176 s +INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.1111607551574707 s +DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=76821386562948014192899590590822769055, time:1750768140.9437013s req_ids:[8] +DEBUG 06-24 20:29:00 [manager.py:391] +ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:206.22014999389648ms total_cost_time:206.2661647796631ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11986 prompt_cache_len:5151 prompt_cache_ratio:0.4297513766060404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 +DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:01 [batch.py:51] router release req id 8 +INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10844159126281738 s +INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s +DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=267626692958540132130191980178323698848, time:1750768141.1602192s req_ids:[8] +DEBUG 06-24 20:29:01 [manager.py:391] +ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:212.80479431152344ms total_cost_time:212.8603458404541ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:11987 prompt_cache_len:5151 prompt_cache_ratio:0.42971552515224826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 +DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:01 [batch.py:51] router release req id 8 +DEBUG 06-24 20:29:01 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:01 [manager.py:283] +DEBUG 06-24 20:29:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:01 [manager.py:284] +INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10789227485656738 s +INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.11007213592529297 s +DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=11444672340918047402183984021272924069, time:1750768141.380888s req_ids:[8] +DEBUG 06-24 20:29:01 [manager.py:391] +ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:229.41064834594727ms total_cost_time:229.45666313171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11988 prompt_cache_len:5151 prompt_cache_ratio:0.4296796796796797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 +DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:01 [batch.py:51] router release req id 8 +INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10780811309814453 s +INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.1096944808959961 s +DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=212977370290146390311203075431462752876, time:1750768141.6157515s req_ids:[8] +DEBUG 06-24 20:29:01 [manager.py:391] +ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:207.23962783813477ms total_cost_time:207.28468894958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11989 prompt_cache_len:5151 prompt_cache_ratio:0.42964384018683793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 +DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:01 [batch.py:51] router release req id 8 +INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10717463493347168 s +INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.10912346839904785 s +DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=58508124273961706056465349581785378507, time:1750768141.8248682s req_ids:[8] +DEBUG 06-24 20:29:01 [manager.py:391] +ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:224.1840362548828ms total_cost_time:224.2283821105957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11990 prompt_cache_len:5151 prompt_cache_ratio:0.42960800667222687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 +DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:01 [batch.py:51] router release req id 8 +INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10739445686340332 s +INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.10936617851257324 s +DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=51066918067910285056303256494526161282, time:1750768142.0605953s req_ids:[8] +DEBUG 06-24 20:29:02 [manager.py:391] +ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:386.4421844482422ms total_cost_time:386.48486137390137ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11991 prompt_cache_len:5151 prompt_cache_ratio:0.42957217913435075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 +DEBUG 06-24 20:29:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:02 [batch.py:51] router release req id 8 +INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10811543464660645 s +INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.11005759239196777 s +DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=79179214805524201961806513535136917339, time:1750768142.447242s req_ids:[8] +DEBUG 06-24 20:29:02 [manager.py:391] +ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:203.91392707824707ms total_cost_time:203.95803451538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11992 prompt_cache_len:5151 prompt_cache_ratio:0.4295363575717145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 +DEBUG 06-24 20:29:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:02 [batch.py:51] router release req id 8 +INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10744047164916992 s +INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.1095266342163086 s +DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=13266818944621714751726430285231528355, time:1750768142.657891s req_ids:[8] +DEBUG 06-24 20:29:02 [manager.py:391] +ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:207.23938941955566ms total_cost_time:207.26537704467773ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:11993 prompt_cache_len:5151 prompt_cache_ratio:0.42950054198282334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 +DEBUG 06-24 20:29:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:02 [batch.py:51] router release req id 8 +INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10683083534240723 s +INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.10882091522216797 s +DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=320029545402889721960625033169100896847, time:1750768142.8742738s req_ids:[8] +DEBUG 06-24 20:29:02 [manager.py:391] +ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:213.50622177124023ms total_cost_time:213.531494140625ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11994 prompt_cache_len:5151 prompt_cache_ratio:0.4294647323661831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 +DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:03 [batch.py:51] router release req id 8 +INFO 06-24 20:29:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.10591912269592285 s +INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.10715079307556152 s +DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=119988958224795863777424935459438210855, time:1750768143.092618s req_ids:[8] +DEBUG 06-24 20:29:03 [manager.py:391] +ERROR 06-24 20:29:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:217.04697608947754ms total_cost_time:217.0724868774414ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:11995 prompt_cache_len:5151 prompt_cache_ratio:0.42942892872030014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 +DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:03 [batch.py:51] router release req id 8 +INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.106475830078125 s +INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.10770678520202637 s +DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=100171455838181181804051801913479669772, time:1750768143.3204806s req_ids:[8] +DEBUG 06-24 20:29:03 [manager.py:391] +ERROR 06-24 20:29:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 first_token_cost:216.89200401306152ms total_cost_time:216.96114540100098ms,out_token_counter:1 mean_per_token_cost_time: 0.06914138793945312ms prompt_token_num:11996 prompt_cache_len:5151 prompt_cache_ratio:0.4293931310436812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 +DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:03 [batch.py:51] router release req id 8 +INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.10833597183227539 s +INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.11051011085510254 s +DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=175076794872094989025733257240616189561, time:1750768143.5383315s req_ids:[8] +DEBUG 06-24 20:29:03 [manager.py:391] +ERROR 06-24 20:29:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 first_token_cost:204.8318386077881ms total_cost_time:204.87689971923828ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11997 prompt_cache_len:5151 prompt_cache_ratio:0.4293573393348337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 +DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:03 [batch.py:51] router release req id 8 +INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.1096961498260498 s +INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.11219978332519531 s +DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=130083271177277859764878193174491548986, time:1750768143.7494287s req_ids:[8] +DEBUG 06-24 20:29:03 [manager.py:391] +ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 first_token_cost:405.4999351501465ms total_cost_time:405.5440425872803ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11998 prompt_cache_len:5151 prompt_cache_ratio:0.42932155359226537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 +DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:04 [batch.py:51] router release req id 8 +INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.1074836254119873 s +INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.10932135581970215 s +DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=327384162895898243084195067892052374415, time:1750768144.1623502s req_ids:[8] +DEBUG 06-24 20:29:04 [manager.py:391] +ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:183.47454071044922ms total_cost_time:183.5179328918457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11999 prompt_cache_len:5151 prompt_cache_ratio:0.4292857738144845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 +DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:04 [batch.py:51] router release req id 8 +INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10811185836791992 s +INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.11004638671875 s +DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=156339597023225659232311034211781884008, time:1750768144.3497717s req_ids:[8] +DEBUG 06-24 20:29:04 [manager.py:391] +DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:195.55115699768066ms total_cost_time:195.61147689819336ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:12000 prompt_cache_len:5151 prompt_cache_ratio:0.42925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 +DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:04 [batch.py:51] router release req id 8 +INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10849332809448242 s +INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.11047554016113281 s +DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=259177005694312922991527212354066977009, time:1750768144.5484993s req_ids:[8] +DEBUG 06-24 20:29:04 [manager.py:391] +DEBUG 06-24 20:29:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 49969.533 tokens/s +DEBUG 06-24 20:29:04 [stats.py:37] Avg prompt tokens throughput: 49961.193 tokens/s +DEBUG 06-24 20:29:04 [stats.py:37] Avg generate tokens throughput: 8.340 tokens/s +ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:215.5599594116211ms total_cost_time:215.60359001159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12001 prompt_cache_len:5151 prompt_cache_ratio:0.4292142321473211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 +DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:04 [batch.py:51] router release req id 8 +INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10795259475708008 s +INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.10995268821716309 s +DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=114486568357166733845298271866179694713, time:1750768144.7710772s req_ids:[8] +DEBUG 06-24 20:29:04 [manager.py:391] +ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:213.04869651794434ms total_cost_time:213.10973167419434ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12002 prompt_cache_len:5151 prompt_cache_ratio:0.42917847025495753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 +DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:04 [batch.py:51] router release req id 8 +INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10711002349853516 s +INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.1092214584350586 s +DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=245066641539354190372768831541904874427, time:1750768144.9931011s req_ids:[8] +DEBUG 06-24 20:29:04 [manager.py:391] +ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:215.86322784423828ms total_cost_time:215.90876579284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12003 prompt_cache_len:5151 prompt_cache_ratio:0.42914271432141965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 +DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:05 [batch.py:51] router release req id 8 +INFO 06-24 20:29:05 [manager.py:224] router recive req id 8 cost time 0.10851883888244629 s +INFO 06-24 20:29:05 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s +DEBUG 06-24 20:29:05 [manager.py:391] Prefill Batch: batch_id=206808503113083642025994083146073104112, time:1750768145.2141945s req_ids:[8] +DEBUG 06-24 20:29:05 [manager.py:391] +ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:215.68012237548828ms total_cost_time:215.72494506835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12004 prompt_cache_len:5151 prompt_cache_ratio:0.4291069643452183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 +DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:05 [batch.py:51] router release req id 8 +INFO 06-24 20:29:05 [manager.py:224] router recive req id 8 cost time 0.3118901252746582 s +INFO 06-24 20:29:05 [manager.py:68] detokenization recv req id 8 cost time 0.31380271911621094 s +DEBUG 06-24 20:29:05 [manager.py:391] Prefill Batch: batch_id=121136995746701749841183191537177903345, time:1750768145.6369538s req_ids:[8] +DEBUG 06-24 20:29:05 [manager.py:391] +ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:393.2313919067383ms total_cost_time:393.27549934387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12005 prompt_cache_len:5151 prompt_cache_ratio:0.42907122032486467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 +DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:05 [batch.py:51] router release req id 8 +INFO 06-24 20:29:05 [manager.py:224] router recive req id 8 cost time 0.10801410675048828 s +INFO 06-24 20:29:05 [manager.py:68] detokenization recv req id 8 cost time 0.10997295379638672 s +DEBUG 06-24 20:29:05 [manager.py:391] Prefill Batch: batch_id=8413969729335408194508939558546892983, time:1750768145.8373215s req_ids:[8] +DEBUG 06-24 20:29:05 [manager.py:391] +ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:212.3887538909912ms total_cost_time:212.4350070953369ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12006 prompt_cache_len:5151 prompt_cache_ratio:0.42903548225887056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 +DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:05 [batch.py:51] router release req id 8 +INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10972046852111816 s +INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11176323890686035 s +DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=19542191938045469950862621955606415612, time:1750768146.0523705s req_ids:[8] +DEBUG 06-24 20:29:06 [manager.py:391] +ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:211.68088912963867ms total_cost_time:211.72618865966797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12007 prompt_cache_len:5151 prompt_cache_ratio:0.4289997501457483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 +DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:06 [batch.py:51] router release req id 8 +INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10818195343017578 s +INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.10930180549621582 s +DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=95407010081928218771993102772963095359, time:1750768146.2953484s req_ids:[8] +DEBUG 06-24 20:29:06 [manager.py:391] +ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:235.61954498291016ms total_cost_time:235.66317558288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12008 prompt_cache_len:5151 prompt_cache_ratio:0.4289640239840107 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 +DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:06 [batch.py:51] router release req id 8 +INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10865092277526855 s +INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11003971099853516 s +DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=269685452943100156085240263733822960384, time:1750768146.5354066s req_ids:[8] +DEBUG 06-24 20:29:06 [manager.py:391] +ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:225.0828742980957ms total_cost_time:225.1286506652832ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12009 prompt_cache_len:5151 prompt_cache_ratio:0.4289283037721709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 +DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:06 [batch.py:51] router release req id 8 +INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10861396789550781 s +INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11070990562438965 s +DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=15199493082351808299793569383798071069, time:1750768146.7585313s req_ids:[8] +DEBUG 06-24 20:29:06 [manager.py:391] +ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:222.34559059143066ms total_cost_time:222.39351272583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:12010 prompt_cache_len:5151 prompt_cache_ratio:0.4288925895087427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 +DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:06 [batch.py:51] router release req id 8 +INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10854554176330566 s +INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11081624031066895 s +DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=247672839698063844752436114003918206221, time:1750768146.9780264s req_ids:[8] +DEBUG 06-24 20:29:06 [manager.py:391] +ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:423.6428737640381ms total_cost_time:423.7046241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:12011 prompt_cache_len:5151 prompt_cache_ratio:0.42885688119224047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 +DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:07 [batch.py:51] router release req id 8 +INFO 06-24 20:29:07 [manager.py:224] router recive req id 8 cost time 0.10865068435668945 s +INFO 06-24 20:29:07 [manager.py:68] detokenization recv req id 8 cost time 0.11075425148010254 s +DEBUG 06-24 20:29:07 [manager.py:391] Prefill Batch: batch_id=80163403286976088376911342295070824939, time:1750768147.4084656s req_ids:[8] +DEBUG 06-24 20:29:07 [manager.py:391] +ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:191.2829875946045ms total_cost_time:191.3473606109619ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:12012 prompt_cache_len:5151 prompt_cache_ratio:0.4288211788211788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 +DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:07 [batch.py:51] router release req id 8 +INFO 06-24 20:29:07 [manager.py:224] router recive req id 8 cost time 0.10842061042785645 s +INFO 06-24 20:29:07 [manager.py:68] detokenization recv req id 8 cost time 0.11057209968566895 s +DEBUG 06-24 20:29:07 [manager.py:391] Prefill Batch: batch_id=133095015546961401256545974772499051861, time:1750768147.6049411s req_ids:[8] +DEBUG 06-24 20:29:07 [manager.py:391] +ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:209.3672752380371ms total_cost_time:209.4137668609619ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12013 prompt_cache_len:5151 prompt_cache_ratio:0.4287854823940731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 +DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:07 [batch.py:51] router release req id 8 +INFO 06-24 20:29:07 [manager.py:224] router recive req id 8 cost time 0.10808062553405762 s +INFO 06-24 20:29:07 [manager.py:68] detokenization recv req id 8 cost time 0.10911035537719727 s +DEBUG 06-24 20:29:07 [manager.py:391] Prefill Batch: batch_id=140641072025147824017690391817133506494, time:1750768147.8322158s req_ids:[8] +DEBUG 06-24 20:29:07 [manager.py:391] +ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:220.91937065124512ms total_cost_time:220.96490859985352ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12014 prompt_cache_len:5151 prompt_cache_ratio:0.428749791909439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 +DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:07 [batch.py:51] router release req id 8 +INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.108184814453125 s +INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s +DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=52815742649395797252158054318416893819, time:1750768148.0522652s req_ids:[8] +DEBUG 06-24 20:29:08 [manager.py:391] +ERROR 06-24 20:29:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:211.8208408355713ms total_cost_time:211.8673324584961ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12015 prompt_cache_len:5151 prompt_cache_ratio:0.42871410736579274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 +DEBUG 06-24 20:29:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:08 [batch.py:51] router release req id 8 +INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.10674309730529785 s +INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.10875582695007324 s +DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=321049370361993069559888626532201931712, time:1750768148.2929409s req_ids:[8] +DEBUG 06-24 20:29:08 [manager.py:391] +ERROR 06-24 20:29:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 first_token_cost:240.81873893737793ms total_cost_time:240.8616542816162ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12016 prompt_cache_len:5151 prompt_cache_ratio:0.42867842876165113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 +DEBUG 06-24 20:29:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:08 [batch.py:51] router release req id 8 +INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.11030960083007812 s +INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.11245393753051758 s +DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=86860259430355401702264045010013662451, time:1750768148.5394244s req_ids:[8] +DEBUG 06-24 20:29:08 [manager.py:391] +ERROR 06-24 20:29:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 first_token_cost:236.26470565795898ms total_cost_time:236.30595207214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12017 prompt_cache_len:5151 prompt_cache_ratio:0.4286427560955313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 +DEBUG 06-24 20:29:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:08 [batch.py:51] router release req id 8 +INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.10876250267028809 s +INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.1109161376953125 s +DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=793972570785084083736208901986077079, time:1750768148.7645762s req_ids:[8] +DEBUG 06-24 20:29:08 [manager.py:391] +ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 first_token_cost:399.9342918395996ms total_cost_time:399.9795913696289ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12018 prompt_cache_len:5151 prompt_cache_ratio:0.4286070893659511 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 +DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:09 [batch.py:51] router release req id 8 +INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s +INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.11112403869628906 s +DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=128765853299447464181801748618125117338, time:1750768149.1748664s req_ids:[8] +DEBUG 06-24 20:29:09 [manager.py:391] +ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:221.8005657196045ms total_cost_time:221.8470573425293ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12019 prompt_cache_len:5151 prompt_cache_ratio:0.42857142857142855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 +DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:09 [batch.py:51] router release req id 8 +INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10792899131774902 s +INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.10940837860107422 s +DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=56849835682524159301660500157799077454, time:1750768149.4053035s req_ids:[8] +DEBUG 06-24 20:29:09 [manager.py:391] +ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:213.84000778198242ms total_cost_time:213.88602256774902ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12020 prompt_cache_len:5151 prompt_cache_ratio:0.4285357737104825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 +DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:09 [batch.py:51] router release req id 8 +INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10712933540344238 s +INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.10871577262878418 s +DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=59376223215732959262459505900379532525, time:1750768149.6104329s req_ids:[8] +DEBUG 06-24 20:29:09 [manager.py:391] +ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:188.57645988464355ms total_cost_time:188.6000633239746ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12021 prompt_cache_len:5151 prompt_cache_ratio:0.42850012478163213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 +DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:09 [batch.py:51] router release req id 8 +INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10310220718383789 s +INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.10462784767150879 s +DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=44788431383541676326584608368813368494, time:1750768149.8205647s req_ids:[8] +DEBUG 06-24 20:29:09 [manager.py:391] +ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:221.53973579406738ms total_cost_time:221.5728759765625ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:12022 prompt_cache_len:5151 prompt_cache_ratio:0.4284644817833971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 +DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:09 [batch.py:51] router release req id 8 +INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.1052103042602539 s +INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10740923881530762 s +DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=66917883417043659105248542192226972827, time:1750768150.0533133s req_ids:[8] +DEBUG 06-24 20:29:10 [manager.py:391] +ERROR 06-24 20:29:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:233.32548141479492ms total_cost_time:233.34813117980957ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:12023 prompt_cache_len:5151 prompt_cache_ratio:0.4284288447142976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 +DEBUG 06-24 20:29:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:10 [batch.py:51] router release req id 8 +INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.1051032543182373 s +INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10729074478149414 s +DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=176253158548196712953843253094605844050, time:1750768150.2896163s req_ids:[8] +DEBUG 06-24 20:29:10 [manager.py:391] +ERROR 06-24 20:29:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 first_token_cost:228.27434539794922ms total_cost_time:228.29914093017578ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12024 prompt_cache_len:5151 prompt_cache_ratio:0.4283932135728543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 +DEBUG 06-24 20:29:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:10 [batch.py:51] router release req id 8 +INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.10510611534118652 s +INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10730719566345215 s +DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=240172713313360568779019341364643632157, time:1750768150.5266142s req_ids:[8] +DEBUG 06-24 20:29:10 [manager.py:391] +ERROR 06-24 20:29:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 first_token_cost:444.75793838500977ms total_cost_time:444.7815418243408ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12025 prompt_cache_len:5151 prompt_cache_ratio:0.42835758835758836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 +DEBUG 06-24 20:29:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:10 [batch.py:51] router release req id 8 +INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.10425353050231934 s +INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10620880126953125 s +DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=289666635358025803404998151710080379977, time:1750768150.9643679s req_ids:[8] +DEBUG 06-24 20:29:10 [manager.py:391] +ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 first_token_cost:216.5670394897461ms total_cost_time:216.6142463684082ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12026 prompt_cache_len:5151 prompt_cache_ratio:0.42832196906702147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 +DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:11 [batch.py:51] router release req id 8 +INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.10611534118652344 s +INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.10829520225524902 s +DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=316442442297969193194089483756271252402, time:1750768151.2001643s req_ids:[8] +DEBUG 06-24 20:29:11 [manager.py:391] +ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:235.08310317993164ms total_cost_time:235.1076602935791ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:12027 prompt_cache_len:5151 prompt_cache_ratio:0.4282863556996757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 +DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:11 [batch.py:51] router release req id 8 +INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.1050877571105957 s +INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.10724759101867676 s +DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=109565752340182290113671562410930437067, time:1750768151.4411745s req_ids:[8] +DEBUG 06-24 20:29:11 [manager.py:391] +ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:235.5632781982422ms total_cost_time:235.58545112609863ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12028 prompt_cache_len:5151 prompt_cache_ratio:0.42825074825407383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 +DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:11 [batch.py:51] router release req id 8 +INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.1059572696685791 s +INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.108154296875 s +DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=219929110004615796080298254369893575243, time:1750768151.664816s req_ids:[8] +DEBUG 06-24 20:29:11 [manager.py:391] +ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:213.2554054260254ms total_cost_time:213.3004665374756ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12029 prompt_cache_len:5151 prompt_cache_ratio:0.4282151467287389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 +DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:11 [batch.py:51] router release req id 8 +INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.1102914810180664 s +INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.11255836486816406 s +DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=222455836025050646891414014265765181027, time:1750768151.895965s req_ids:[8] +DEBUG 06-24 20:29:11 [manager.py:391] +ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:231.51803016662598ms total_cost_time:231.54282569885254ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12030 prompt_cache_len:5151 prompt_cache_ratio:0.4281795511221945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 +DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:12 [batch.py:51] router release req id 8 +INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10507941246032715 s +INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10720467567443848 s +DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=58720643316398380131466173958936005866, time:1750768152.1218276s req_ids:[8] +DEBUG 06-24 20:29:12 [manager.py:391] +ERROR 06-24 20:29:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:217.71740913391113ms total_cost_time:217.75102615356445ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:12031 prompt_cache_len:5151 prompt_cache_ratio:0.42814396143296485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 +DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:12 [batch.py:51] router release req id 8 +INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10420107841491699 s +INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10635113716125488 s +DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=112135186220016747848402029810673593702, time:1750768152.3421726s req_ids:[8] +DEBUG 06-24 20:29:12 [manager.py:391] +ERROR 06-24 20:29:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:366.2266731262207ms total_cost_time:366.2524223327637ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:12032 prompt_cache_len:5151 prompt_cache_ratio:0.4281083776595745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 +DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:12 [batch.py:51] router release req id 8 +INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10545206069946289 s +INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10729169845581055 s +DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=65361160067735883182817186396964541122, time:1750768152.7145388s req_ids:[8] +DEBUG 06-24 20:29:12 [manager.py:391] +ERROR 06-24 20:29:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:217.12827682495117ms total_cost_time:217.15593338012695ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:12033 prompt_cache_len:5151 prompt_cache_ratio:0.42807279980054846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 +DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:12 [batch.py:51] router release req id 8 +INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10755681991577148 s +INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10973072052001953 s +DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=195909627552150442832864324375666989883, time:1750768152.94206s req_ids:[8] +DEBUG 06-24 20:29:12 [manager.py:391] +ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:234.0710163116455ms total_cost_time:234.09080505371094ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12034 prompt_cache_len:5151 prompt_cache_ratio:0.4280372278544125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 +DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:13 [batch.py:51] router release req id 8 +INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10854029655456543 s +INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.11075067520141602 s +DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=108602508995428510405783371312931566988, time:1750768153.176855s req_ids:[8] +DEBUG 06-24 20:29:13 [manager.py:391] +ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:206.13527297973633ms total_cost_time:206.15577697753906ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12035 prompt_cache_len:5151 prompt_cache_ratio:0.42800166181969257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 +DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:13 [batch.py:51] router release req id 8 +INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10504484176635742 s +INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.10699295997619629 s +DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=210828453444395192216928843050162273174, time:1750768153.3883357s req_ids:[8] +DEBUG 06-24 20:29:13 [manager.py:391] +ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:210.9203338623047ms total_cost_time:210.94107627868652ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12036 prompt_cache_len:5151 prompt_cache_ratio:0.4279661016949153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 +DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:13 [batch.py:51] router release req id 8 +INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10497140884399414 s +INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.10689210891723633 s +DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=191768512400196359097141788981957252732, time:1750768153.6023083s req_ids:[8] +DEBUG 06-24 20:29:13 [manager.py:391] +ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:213.87457847595215ms total_cost_time:213.89389038085938ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:12037 prompt_cache_len:5151 prompt_cache_ratio:0.42793054747860765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 +DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:13 [batch.py:51] router release req id 8 +INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10607290267944336 s +INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.10805654525756836 s +DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=229931616283184504954360598259935564468, time:1750768153.8209498s req_ids:[8] +DEBUG 06-24 20:29:13 [manager.py:391] +ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:203.6118507385254ms total_cost_time:203.65500450134277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12038 prompt_cache_len:5151 prompt_cache_ratio:0.42789499916929724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 +DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:14 [batch.py:51] router release req id 8 +INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.30750536918640137 s +DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=318304723762559595147455348375842731295, time:1750768154.2220309s req_ids:[8] +DEBUG 06-24 20:29:14 [manager.py:391] +INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.3097724914550781 s +ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:415.8947467803955ms total_cost_time:415.91358184814453ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:12039 prompt_cache_len:5151 prompt_cache_ratio:0.42785945676551207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 +DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:14 [batch.py:51] router release req id 8 +INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.10582423210144043 s +INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.10779023170471191 s +DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=215419642741544631244834772759152345221, time:1750768154.4667826s req_ids:[8] +DEBUG 06-24 20:29:14 [manager.py:391] +ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:29:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 46881.166 tokens/s +DEBUG 06-24 20:29:14 [stats.py:37] Avg prompt tokens throughput: 46873.267 tokens/s +DEBUG 06-24 20:29:14 [stats.py:37] Avg generate tokens throughput: 7.899 tokens/s +INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:218.75977516174316ms total_cost_time:218.80483627319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12040 prompt_cache_len:5151 prompt_cache_ratio:0.42782392026578075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 +DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:14 [batch.py:51] router release req id 8 +INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.10828018188476562 s +INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.11031985282897949 s +DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=71084735116213594234740656544589837216, time:1750768154.6732526s req_ids:[8] +DEBUG 06-24 20:29:14 [manager.py:391] +ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:207.20338821411133ms total_cost_time:207.24916458129883ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12041 prompt_cache_len:5151 prompt_cache_ratio:0.4277883896686322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 +DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:14 [batch.py:51] router release req id 8 +INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.10753011703491211 s +INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.10960030555725098 s +DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=203265624047166060000088244312364300826, time:1750768154.887515s req_ids:[8] +DEBUG 06-24 20:29:14 [manager.py:391] +ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:212.7835750579834ms total_cost_time:212.82696723937988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12042 prompt_cache_len:5151 prompt_cache_ratio:0.4277528649725959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 +DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:15 [batch.py:51] router release req id 8 +INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.1079103946685791 s +INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.1097860336303711 s +DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=37472561879213758118610936963017647562, time:1750768155.112771s req_ids:[8] +DEBUG 06-24 20:29:15 [manager.py:391] +ERROR 06-24 20:29:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:214.60509300231934ms total_cost_time:214.71452713012695ms,out_token_counter:1 mean_per_token_cost_time: 0.10943412780761719ms prompt_token_num:12043 prompt_cache_len:5151 prompt_cache_ratio:0.42771734617620194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 +DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:15 [batch.py:51] router release req id 8 +INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.10738062858581543 s +INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.10923957824707031 s +DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=2177836956162719931539281275010271740, time:1750768155.3265784s req_ids:[8] +DEBUG 06-24 20:29:15 [manager.py:391] +DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 first_token_cost:202.99506187438965ms total_cost_time:203.03988456726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12044 prompt_cache_len:5151 prompt_cache_ratio:0.42768183327798076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 +DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:15 [batch.py:51] router release req id 8 +INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.10605907440185547 s +INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.10773134231567383 s +DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=70371748387237070635964710645186554897, time:1750768155.537984s req_ids:[8] +DEBUG 06-24 20:29:15 [manager.py:391] +ERROR 06-24 20:29:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 first_token_cost:450.99520683288574ms total_cost_time:451.0171413421631ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12045 prompt_cache_len:5151 prompt_cache_ratio:0.42764632627646326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 +DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:15 [batch.py:51] router release req id 8 +INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.10618162155151367 s +INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.10750532150268555 s +DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=60602649264901184097240858216265112250, time:1750768155.9953952s req_ids:[8] +DEBUG 06-24 20:29:15 [manager.py:391] +ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 first_token_cost:213.62042427062988ms total_cost_time:213.66524696350098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12046 prompt_cache_len:5151 prompt_cache_ratio:0.42761082517018095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 +DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:16 [batch.py:51] router release req id 8 +INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.10809683799743652 s +INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.11012387275695801 s +DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=37680172604040798269840570150761587201, time:1750768156.212346s req_ids:[8] +DEBUG 06-24 20:29:16 [manager.py:391] +ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:203.14502716064453ms total_cost_time:203.18984985351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12047 prompt_cache_len:5151 prompt_cache_ratio:0.4275753299576658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 +DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:16 [batch.py:51] router release req id 8 +INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.1084442138671875 s +INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s +DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=208199708294903876538876743869264051722, time:1750768156.42323s req_ids:[8] +DEBUG 06-24 20:29:16 [manager.py:391] +ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:210.39390563964844ms total_cost_time:210.43658256530762ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12048 prompt_cache_len:5151 prompt_cache_ratio:0.4275398406374502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 +DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:16 [batch.py:51] router release req id 8 +INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.10856938362121582 s +INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.11047863960266113 s +DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=92822937002517172786060448355481926368, time:1750768156.6393826s req_ids:[8] +DEBUG 06-24 20:29:16 [manager.py:391] +ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:221.8616008758545ms total_cost_time:221.91429138183594ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:12049 prompt_cache_len:5151 prompt_cache_ratio:0.42750435720806707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 +DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:16 [batch.py:51] router release req id 8 +INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.10782694816589355 s +INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.1097097396850586 s +DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=310947837100557592641503380457711807419, time:1750768156.8919666s req_ids:[8] +DEBUG 06-24 20:29:16 [manager.py:391] +ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:229.64930534362793ms total_cost_time:229.69317436218262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12050 prompt_cache_len:5151 prompt_cache_ratio:0.4274688796680498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 +DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:17 [batch.py:51] router release req id 8 +INFO 06-24 20:29:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10872602462768555 s +INFO 06-24 20:29:17 [manager.py:68] detokenization recv req id 8 cost time 0.11072182655334473 s +DEBUG 06-24 20:29:17 [manager.py:391] Prefill Batch: batch_id=305620391520721147686760542501942210076, time:1750768157.1050603s req_ids:[8] +DEBUG 06-24 20:29:17 [manager.py:391] +ERROR 06-24 20:29:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:221.31872177124023ms total_cost_time:221.36282920837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12051 prompt_cache_len:5151 prompt_cache_ratio:0.42743340801593227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 +DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:17 [batch.py:51] router release req id 8 +INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10784482955932617 s +INFO 06-24 20:29:17 [manager.py:68] detokenization recv req id 8 cost time 0.10984420776367188 s +DEBUG 06-24 20:29:17 [manager.py:391] Prefill Batch: batch_id=313431163150635069735657004655510782201, time:1750768157.3480477s req_ids:[8] +DEBUG 06-24 20:29:17 [manager.py:391] +ERROR 06-24 20:29:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 first_token_cost:471.56262397766113ms total_cost_time:471.6055393218994ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12052 prompt_cache_len:5151 prompt_cache_ratio:0.4273979422502489 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 +DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:17 [batch.py:51] router release req id 8 +INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10857510566711426 s +INFO 06-24 20:29:17 [manager.py:68] detokenization recv req id 8 cost time 0.11044025421142578 s +DEBUG 06-24 20:29:17 [manager.py:391] Prefill Batch: batch_id=300533308143897871514302169705225126671, time:1750768157.8111422s req_ids:[8] +DEBUG 06-24 20:29:17 [manager.py:391] +ERROR 06-24 20:29:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 first_token_cost:195.1589584350586ms total_cost_time:195.18280029296875ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:12053 prompt_cache_len:5151 prompt_cache_ratio:0.42736248236953456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 +DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:17 [batch.py:51] router release req id 8 +INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10557794570922852 s +INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.10748457908630371 s +DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=324349048255213809808410296926647616750, time:1750768158.0120595s req_ids:[8] +DEBUG 06-24 20:29:18 [manager.py:391] +ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 first_token_cost:213.00768852233887ms total_cost_time:213.04988861083984ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12054 prompt_cache_len:5151 prompt_cache_ratio:0.42732702837232456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 +DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:18 [batch.py:51] router release req id 8 +INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s +INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.11133146286010742 s +DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=253957478304694350385564161484063562088, time:1750768158.229874s req_ids:[8] +DEBUG 06-24 20:29:18 [manager.py:391] +ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:211.3940715789795ms total_cost_time:211.44723892211914ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12055 prompt_cache_len:5151 prompt_cache_ratio:0.4272915802571547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 +DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:18 [batch.py:51] router release req id 8 +INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10811996459960938 s +INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.1102139949798584 s +DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=110474368305055151758111211003144719934, time:1750768158.4484684s req_ids:[8] +DEBUG 06-24 20:29:18 [manager.py:391] +ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:210.2830410003662ms total_cost_time:210.30688285827637ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:12056 prompt_cache_len:5151 prompt_cache_ratio:0.4272561380225614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 +DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:18 [batch.py:51] router release req id 8 +INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10441327095031738 s +INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.1064596176147461 s +DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=48845524495415014801840548309088550512, time:1750768158.6622002s req_ids:[8] +DEBUG 06-24 20:29:18 [manager.py:391] +ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:209.4719409942627ms total_cost_time:209.49554443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12057 prompt_cache_len:5151 prompt_cache_ratio:0.42722070166708137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 +DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:18 [batch.py:51] router release req id 8 +INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10660529136657715 s +INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.10858416557312012 s +DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=131436859228488649833513952173289443574, time:1750768158.8765733s req_ids:[8] +DEBUG 06-24 20:29:18 [manager.py:391] +ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:204.92243766784668ms total_cost_time:204.96630668640137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12058 prompt_cache_len:5151 prompt_cache_ratio:0.4271852711892519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 +DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:18 [batch.py:51] router release req id 8 +INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10883808135986328 s +INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.11071658134460449 s +DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=162984558546202167601570167255323412228, time:1750768159.086143s req_ids:[8] +DEBUG 06-24 20:29:19 [manager.py:391] +ERROR 06-24 20:29:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:457.5049877166748ms total_cost_time:457.52835273742676ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:12059 prompt_cache_len:5151 prompt_cache_ratio:0.4271498465876109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 +DEBUG 06-24 20:29:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:19 [batch.py:51] router release req id 8 +INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10425853729248047 s +INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.10607409477233887 s +DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=131598932469307510893186364922493749113, time:1750768159.5529115s req_ids:[8] +DEBUG 06-24 20:29:19 [manager.py:391] +ERROR 06-24 20:29:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 first_token_cost:198.00806045532227ms total_cost_time:198.03214073181152ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:12060 prompt_cache_len:5151 prompt_cache_ratio:0.42711442786069653 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 +DEBUG 06-24 20:29:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:19 [batch.py:51] router release req id 8 +INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10450339317321777 s +INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.10643696784973145 s +DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=277817653406641260519340770778147166251, time:1750768159.7655668s req_ids:[8] +DEBUG 06-24 20:29:19 [manager.py:391] +ERROR 06-24 20:29:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 first_token_cost:237.59913444519043ms total_cost_time:237.62273788452148ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12061 prompt_cache_len:5151 prompt_cache_ratio:0.42707901500704754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 +DEBUG 06-24 20:29:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:19 [batch.py:51] router release req id 8 +INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10444092750549316 s +INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.10648751258850098 s +DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=34021106094258512512574366866143348040, time:1750768159.989338s req_ids:[8] +DEBUG 06-24 20:29:19 [manager.py:391] +ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 first_token_cost:182.51562118530273ms total_cost_time:182.5399398803711ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12062 prompt_cache_len:5151 prompt_cache_ratio:0.42704360802520314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 +DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:20 [batch.py:51] router release req id 8 +INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.1044015884399414 s +INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.10629606246948242 s +DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=119329534314632534343580831602930570378, time:1750768160.1847932s req_ids:[8] +DEBUG 06-24 20:29:20 [manager.py:391] +DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:208.95981788635254ms total_cost_time:208.9846134185791ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12063 prompt_cache_len:5151 prompt_cache_ratio:0.42700820691370306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 +DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:20 [batch.py:51] router release req id 8 +INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.10443496704101562 s +INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.10647106170654297 s +DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=11231435938311089827499400375638693969, time:1750768160.3993225s req_ids:[8] +DEBUG 06-24 20:29:20 [manager.py:391] +ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:207.0763111114502ms total_cost_time:207.09991455078125ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12064 prompt_cache_len:5151 prompt_cache_ratio:0.4269728116710875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 +DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:20 [batch.py:51] router release req id 8 +INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.1066441535949707 s +INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.10855984687805176 s +DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=320422109395247778923031136796160633754, time:1750768160.616411s req_ids:[8] +DEBUG 06-24 20:29:20 [manager.py:391] +ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:216.69864654541016ms total_cost_time:216.75992012023926ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12065 prompt_cache_len:5151 prompt_cache_ratio:0.4269374222958972 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 +DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:20 [batch.py:51] router release req id 8 +INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.10817170143127441 s +INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.11020517349243164 s +DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=159022458051294909731831602136372019600, time:1750768160.8344412s req_ids:[8] +DEBUG 06-24 20:29:20 [manager.py:391] +ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:412.28389739990234ms total_cost_time:412.32776641845703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12066 prompt_cache_len:5151 prompt_cache_ratio:0.4269020387866733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 +DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:21 [batch.py:51] router release req id 8 +INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.10825705528259277 s +INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s +DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=214715682293418638077830333096195373968, time:1750768161.2534053s req_ids:[8] +DEBUG 06-24 20:29:21 [manager.py:391] +ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:204.35357093811035ms total_cost_time:204.39863204956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12067 prompt_cache_len:5151 prompt_cache_ratio:0.4268666611419574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 +DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:21 [batch.py:51] router release req id 8 +INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.10783958435058594 s +INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.10970735549926758 s +DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=172864066548881656145281451871871722721, time:1750768161.4623842s req_ids:[8] +DEBUG 06-24 20:29:21 [manager.py:391] +ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:206.62569999694824ms total_cost_time:206.67099952697754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12068 prompt_cache_len:5151 prompt_cache_ratio:0.4268312893602917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 +DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:21 [batch.py:51] router release req id 8 +INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.10787320137023926 s +INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.10995841026306152 s +DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=208200350788771513456819010406465496911, time:1750768161.680891s req_ids:[8] +DEBUG 06-24 20:29:21 [manager.py:391] +ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:210.99209785461426ms total_cost_time:211.0445499420166ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:12069 prompt_cache_len:5151 prompt_cache_ratio:0.4267959234402187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 +DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:21 [batch.py:51] router release req id 8 +INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.1087183952331543 s +INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s +DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=72467525143756329508687410543427155130, time:1750768161.8967774s req_ids:[8] +DEBUG 06-24 20:29:21 [manager.py:391] +ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:212.36205101013184ms total_cost_time:212.40711212158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12070 prompt_cache_len:5151 prompt_cache_ratio:0.4267605633802817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 +DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:22 [batch.py:51] router release req id 8 +INFO 06-24 20:29:22 [manager.py:224] router recive req id 8 cost time 0.10986876487731934 s +INFO 06-24 20:29:22 [manager.py:68] detokenization recv req id 8 cost time 0.11180472373962402 s +DEBUG 06-24 20:29:22 [manager.py:391] Prefill Batch: batch_id=142927193493091226580217571044359556618, time:1750768162.117711s req_ids:[8] +DEBUG 06-24 20:29:22 [manager.py:391] +ERROR 06-24 20:29:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:206.21609687805176ms total_cost_time:206.26282691955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12071 prompt_cache_len:5151 prompt_cache_ratio:0.4267252091790241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 +DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:22 [batch.py:51] router release req id 8 +INFO 06-24 20:29:22 [manager.py:224] router recive req id 8 cost time 0.10825157165527344 s +INFO 06-24 20:29:22 [manager.py:68] detokenization recv req id 8 cost time 0.10936927795410156 s +DEBUG 06-24 20:29:22 [manager.py:391] Prefill Batch: batch_id=82130531750689544051400682293092908715, time:1750768162.3310297s req_ids:[8] +DEBUG 06-24 20:29:22 [manager.py:391] +ERROR 06-24 20:29:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:211.75312995910645ms total_cost_time:211.81392669677734ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12072 prompt_cache_len:5151 prompt_cache_ratio:0.42668986083499005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 +DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:22 [batch.py:51] router release req id 8 +INFO 06-24 20:29:22 [manager.py:224] router recive req id 8 cost time 0.10753369331359863 s +INFO 06-24 20:29:22 [manager.py:68] detokenization recv req id 8 cost time 0.10945510864257812 s +DEBUG 06-24 20:29:22 [manager.py:391] Prefill Batch: batch_id=42464442916728141648791610871069417774, time:1750768162.550109s req_ids:[8] +DEBUG 06-24 20:29:22 [manager.py:391] +ERROR 06-24 20:29:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:463.3471965789795ms total_cost_time:463.392972946167ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12073 prompt_cache_len:5151 prompt_cache_ratio:0.4266545183467241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 +DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:22 [batch.py:51] router release req id 8 +INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10777592658996582 s +INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.10973405838012695 s +DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=11875089089050135980899569461200050751, time:1750768163.019186s req_ids:[8] +DEBUG 06-24 20:29:23 [manager.py:391] +ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:195.48678398132324ms total_cost_time:195.53112983703613ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12074 prompt_cache_len:5151 prompt_cache_ratio:0.42661918171277124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 +DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:23 [batch.py:51] router release req id 8 +INFO 06-24 20:29:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:29:23 [statics_utils.py:24] mean first cost: 229.07014882261274 ms +INFO 06-24 20:29:23 [statics_utils.py:24] mean per token cost: 0.06237495480393663 ms +INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10840463638305664 s +INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.11043500900268555 s +DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=152694082299877739306963894349550102956, time:1750768163.2206206s req_ids:[8] +DEBUG 06-24 20:29:23 [manager.py:391] +ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:214.81776237487793ms total_cost_time:214.8730754852295ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:12075 prompt_cache_len:5151 prompt_cache_ratio:0.426583850931677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 +DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:23 [batch.py:51] router release req id 8 +INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10779953002929688 s +INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.10980010032653809 s +DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=319965628701148084169393494720153804147, time:1750768163.4645429s req_ids:[8] +DEBUG 06-24 20:29:23 [manager.py:391] +ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:234.53354835510254ms total_cost_time:234.57789421081543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12076 prompt_cache_len:5151 prompt_cache_ratio:0.4265485260019874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 +DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:23 [batch.py:51] router release req id 8 +INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10821199417114258 s +INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.11040544509887695 s +DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=113672483453253660360409101686411607088, time:1750768163.68329s req_ids:[8] +DEBUG 06-24 20:29:23 [manager.py:391] +ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:206.6812515258789ms total_cost_time:206.7255973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12077 prompt_cache_len:5151 prompt_cache_ratio:0.4265132069222489 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 +DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:23 [batch.py:51] router release req id 8 +INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10892724990844727 s +INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s +DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=157106017807927445966542583362375327607, time:1750768163.8981445s req_ids:[8] +DEBUG 06-24 20:29:23 [manager.py:391] +ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:212.5871181488037ms total_cost_time:212.64100074768066ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:12078 prompt_cache_len:5151 prompt_cache_ratio:0.42647789369100847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 +DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:24 [batch.py:51] router release req id 8 +INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10681271553039551 s +INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.1086890697479248 s +DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=17715277591360210217182938731529702404, time:1750768164.1298437s req_ids:[8] +DEBUG 06-24 20:29:24 [manager.py:391] +ERROR 06-24 20:29:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:217.6811695098877ms total_cost_time:217.7278995513916ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12079 prompt_cache_len:5151 prompt_cache_ratio:0.4264425863068135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 +DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:24 [batch.py:51] router release req id 8 +INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10833358764648438 s +INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.11029505729675293 s +DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=323529041036324189747276972179529962387, time:1750768164.341399s req_ids:[8] +DEBUG 06-24 20:29:24 [manager.py:391] +ERROR 06-24 20:29:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:29:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 47815.716 tokens/s +DEBUG 06-24 20:29:24 [stats.py:37] Avg prompt tokens throughput: 47807.788 tokens/s +DEBUG 06-24 20:29:24 [stats.py:37] Avg generate tokens throughput: 7.928 tokens/s +INFO 06-24 20:29:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:418.7803268432617ms total_cost_time:418.8404083251953ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12080 prompt_cache_len:5151 prompt_cache_ratio:0.42640728476821194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 +DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:24 [batch.py:51] router release req id 8 +INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10815024375915527 s +INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.10997486114501953 s +DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=105431283973533029059556518663764142091, time:1750768164.7661805s req_ids:[8] +DEBUG 06-24 20:29:24 [manager.py:391] +ERROR 06-24 20:29:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:196.6707706451416ms total_cost_time:196.7294216156006ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12081 prompt_cache_len:5151 prompt_cache_ratio:0.4263719890737522 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 +DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:24 [batch.py:51] router release req id 8 +INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10807275772094727 s +INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.10993337631225586 s +DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=5474515331079488267474148108644037105, time:1750768164.9769785s req_ids:[8] +DEBUG 06-24 20:29:24 [manager.py:391] +ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:216.28069877624512ms total_cost_time:216.33100509643555ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:12082 prompt_cache_len:5151 prompt_cache_ratio:0.42633669922198314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 +DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:25 [batch.py:51] router release req id 8 +INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.10871267318725586 s +INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.11061978340148926 s +DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=117938332335158875872318982564111383158, time:1750768165.189563s req_ids:[8] +DEBUG 06-24 20:29:25 [manager.py:391] +ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:206.08115196228027ms total_cost_time:206.14314079284668ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12083 prompt_cache_len:5151 prompt_cache_ratio:0.4263014152114541 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 +DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:25 [batch.py:51] router release req id 8 +INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.1084756851196289 s +INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.11056232452392578 s +DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=308371571774252700494012599243960686339, time:1750768165.413208s req_ids:[8] +DEBUG 06-24 20:29:25 [manager.py:391] +ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:214.7078514099121ms total_cost_time:214.79368209838867ms,out_token_counter:1 mean_per_token_cost_time: 0.0858306884765625ms prompt_token_num:12084 prompt_cache_len:5151 prompt_cache_ratio:0.426266137040715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 +DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:25 [batch.py:51] router release req id 8 +INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.10851526260375977 s +INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.110504150390625 s +DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=14826908049336166937465773595437351736, time:1750768165.6274846s req_ids:[8] +DEBUG 06-24 20:29:25 [manager.py:391] +ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:200.35934448242188ms total_cost_time:200.38866996765137ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:12085 prompt_cache_len:5151 prompt_cache_ratio:0.4262308647083161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 +DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:25 [batch.py:51] router release req id 8 +INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.10788464546203613 s +INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.10989117622375488 s +DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=42454665239996806178864134455623708326, time:1750768165.836484s req_ids:[8] +DEBUG 06-24 20:29:25 [manager.py:391] +ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:208.15682411193848ms total_cost_time:208.18114280700684ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12086 prompt_cache_len:5151 prompt_cache_ratio:0.4261955982128082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 +DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:25 [batch.py:51] router release req id 8 +INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.1065680980682373 s +INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.10867595672607422 s +DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=289209341916191498533210314876866415664, time:1750768166.0491052s req_ids:[8] +DEBUG 06-24 20:29:26 [manager.py:391] +ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:413.3586883544922ms total_cost_time:413.4037494659424ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12087 prompt_cache_len:5151 prompt_cache_ratio:0.42616033755274263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 +DEBUG 06-24 20:29:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:26 [batch.py:51] router release req id 8 +INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.10830211639404297 s +INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.11024665832519531 s +DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=321115342579625876846707929218114651575, time:1750768166.4668515s req_ids:[8] +DEBUG 06-24 20:29:26 [manager.py:391] +DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:207.44729042053223ms total_cost_time:207.49282836914062ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12088 prompt_cache_len:5151 prompt_cache_ratio:0.4261250827266711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 +DEBUG 06-24 20:29:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:26 [batch.py:51] router release req id 8 +INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.10830569267272949 s +INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.11023330688476562 s +DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=275085722100747506563322977584746962634, time:1750768166.6807287s req_ids:[8] +DEBUG 06-24 20:29:26 [manager.py:391] +ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:207.55505561828613ms total_cost_time:207.61466026306152ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:12089 prompt_cache_len:5151 prompt_cache_ratio:0.42608983373314585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 +DEBUG 06-24 20:29:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:26 [batch.py:51] router release req id 8 +INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.10739541053771973 s +INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.10869193077087402 s +DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=55515117597908489011926213656375366351, time:1750768166.8971303s req_ids:[8] +DEBUG 06-24 20:29:26 [manager.py:391] +ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:208.9991569519043ms total_cost_time:209.0473175048828ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:12090 prompt_cache_len:5151 prompt_cache_ratio:0.4260545905707196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 +DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:27 [batch.py:51] router release req id 8 +INFO 06-24 20:29:27 [manager.py:224] router recive req id 8 cost time 0.10710597038269043 s +INFO 06-24 20:29:27 [manager.py:68] detokenization recv req id 8 cost time 0.10898351669311523 s +DEBUG 06-24 20:29:27 [manager.py:391] Prefill Batch: batch_id=8251264736707970196660598392124922615, time:1750768167.1313734s req_ids:[8] +DEBUG 06-24 20:29:27 [manager.py:391] +ERROR 06-24 20:29:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:225.8920669555664ms total_cost_time:225.9531021118164ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12091 prompt_cache_len:5151 prompt_cache_ratio:0.4260193532379456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 +DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:27 [batch.py:51] router release req id 8 +INFO 06-24 20:29:27 [manager.py:224] router recive req id 8 cost time 0.10744380950927734 s +INFO 06-24 20:29:27 [manager.py:68] detokenization recv req id 8 cost time 0.10949039459228516 s +DEBUG 06-24 20:29:27 [manager.py:391] Prefill Batch: batch_id=326397334034004834174791147072178272707, time:1750768167.3501437s req_ids:[8] +DEBUG 06-24 20:29:27 [manager.py:391] +ERROR 06-24 20:29:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 first_token_cost:215.26265144348145ms total_cost_time:215.32344818115234ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12092 prompt_cache_len:5151 prompt_cache_ratio:0.42598412173337746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 +DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:27 [batch.py:51] router release req id 8 +INFO 06-24 20:29:27 [manager.py:224] router recive req id 8 cost time 0.10903143882751465 s +INFO 06-24 20:29:27 [manager.py:68] detokenization recv req id 8 cost time 0.11093974113464355 s +DEBUG 06-24 20:29:27 [manager.py:391] Prefill Batch: batch_id=317861357271912375431931615560231996682, time:1750768167.5728202s req_ids:[8] +DEBUG 06-24 20:29:27 [manager.py:391] +ERROR 06-24 20:29:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 first_token_cost:443.6612129211426ms total_cost_time:443.7212944030762ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12093 prompt_cache_len:5151 prompt_cache_ratio:0.42594889605556935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 +DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:27 [batch.py:51] router release req id 8 +INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.10731697082519531 s +INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.10940861701965332 s +DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=238720545174291634697460011375431586903, time:1750768168.0163062s req_ids:[8] +DEBUG 06-24 20:29:28 [manager.py:391] +ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 first_token_cost:202.21638679504395ms total_cost_time:202.27670669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:12094 prompt_cache_len:5151 prompt_cache_ratio:0.4259136762030759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 +DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:28 [batch.py:51] router release req id 8 +INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.1084744930267334 s +INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11044549942016602 s +DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=320799013707563251951913806250915446825, time:1750768168.238635s req_ids:[8] +DEBUG 06-24 20:29:28 [manager.py:391] +ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:224.14374351501465ms total_cost_time:224.20072555541992ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:12095 prompt_cache_len:5151 prompt_cache_ratio:0.42587846217445224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 +DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:28 [batch.py:51] router release req id 8 +INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.10819315910339355 s +INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11028146743774414 s +DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=230088508560147385679389331321578852045, time:1750768168.4582658s req_ids:[8] +DEBUG 06-24 20:29:28 [manager.py:391] +ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:200.60968399047852ms total_cost_time:200.6537914276123ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12096 prompt_cache_len:5151 prompt_cache_ratio:0.42584325396825395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 +DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:28 [batch.py:51] router release req id 8 +INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.10874128341674805 s +INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11065936088562012 s +DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=102872741345721765490195758442613110878, time:1750768168.6643283s req_ids:[8] +DEBUG 06-24 20:29:28 [manager.py:391] +ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:200.00433921813965ms total_cost_time:200.04892349243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12097 prompt_cache_len:5151 prompt_cache_ratio:0.42580805158303714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 +DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:28 [batch.py:51] router release req id 8 +INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.1099398136138916 s +INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11197566986083984 s +DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=299172048898581842078129629047901137834, time:1750768168.8710449s req_ids:[8] +DEBUG 06-24 20:29:28 [manager.py:391] +ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:205.88231086730957ms total_cost_time:205.92641830444336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12098 prompt_cache_len:5151 prompt_cache_ratio:0.42577285501735823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 +DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:29 [batch.py:51] router release req id 8 +INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10834789276123047 s +INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.11036252975463867 s +DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=297067691350516832870386775203643160804, time:1750768169.0889473s req_ids:[8] +DEBUG 06-24 20:29:29 [manager.py:391] +ERROR 06-24 20:29:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:215.67106246948242ms total_cost_time:215.7149314880371ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12099 prompt_cache_len:5151 prompt_cache_ratio:0.4257376642697744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 +DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:29 [batch.py:51] router release req id 8 +INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10750579833984375 s +INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.1096491813659668 s +DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=142020250452107006202537844893237747961, time:1750768169.3162944s req_ids:[8] +DEBUG 06-24 20:29:29 [manager.py:391] +ERROR 06-24 20:29:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 first_token_cost:411.1766815185547ms total_cost_time:411.2215042114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12100 prompt_cache_len:5151 prompt_cache_ratio:0.425702479338843 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 +DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:29 [batch.py:51] router release req id 8 +INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10729217529296875 s +INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.10941529273986816 s +DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=83921928101375895334645024480018832977, time:1750768169.7231796s req_ids:[8] +DEBUG 06-24 20:29:29 [manager.py:391] +ERROR 06-24 20:29:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 first_token_cost:209.34534072875977ms total_cost_time:209.3679904937744ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:12101 prompt_cache_len:5151 prompt_cache_ratio:0.42566730022312205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 +DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:29 [batch.py:51] router release req id 8 +INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10831022262573242 s +INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.11036062240600586 s +DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=124916319686568152445270978310806969274, time:1750768169.950586s req_ids:[8] +DEBUG 06-24 20:29:29 [manager.py:391] +ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 first_token_cost:226.6838550567627ms total_cost_time:226.72700881958008ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12102 prompt_cache_len:5151 prompt_cache_ratio:0.42563212692117003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 +DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:30 [batch.py:51] router release req id 8 +INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.1083533763885498 s +INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.11033010482788086 s +DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=120944583438043153840471468222508828211, time:1750768170.18188s req_ids:[8] +DEBUG 06-24 20:29:30 [manager.py:391] +ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:224.56693649291992ms total_cost_time:224.61175918579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12103 prompt_cache_len:5151 prompt_cache_ratio:0.4255969594315459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 +DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:30 [batch.py:51] router release req id 8 +INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s +INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.11072921752929688 s +DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=154163153159015020155589951346534126410, time:1750768170.401171s req_ids:[8] +DEBUG 06-24 20:29:30 [manager.py:391] +ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:213.95611763000488ms total_cost_time:213.99807929992676ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12104 prompt_cache_len:5151 prompt_cache_ratio:0.425561797752809 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 +DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:30 [batch.py:51] router release req id 8 +INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.10839533805847168 s +INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.11038804054260254 s +DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=132007744675232204669686743326833615490, time:1750768170.6323035s req_ids:[8] +DEBUG 06-24 20:29:30 [manager.py:391] +ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:221.83561325073242ms total_cost_time:221.8799591064453ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12105 prompt_cache_len:5151 prompt_cache_ratio:0.4255266418835192 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 +DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:30 [batch.py:51] router release req id 8 +INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.10609841346740723 s +INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.10800647735595703 s +DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=136896352720739087550699817782484750452, time:1750768170.8497176s req_ids:[8] +DEBUG 06-24 20:29:30 [manager.py:391] +ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:210.3898525238037ms total_cost_time:210.4339599609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12106 prompt_cache_len:5151 prompt_cache_ratio:0.4254914918222369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 +DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:30 [batch.py:51] router release req id 8 +INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10666179656982422 s +INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.10842728614807129 s +DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=72441886944905688025705656820394854575, time:1750768171.0663579s req_ids:[8] +DEBUG 06-24 20:29:31 [manager.py:391] +ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:373.6233711242676ms total_cost_time:373.64888191223145ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12107 prompt_cache_len:5151 prompt_cache_ratio:0.42545634756752293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 +DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:31 [batch.py:51] router release req id 8 +INFO 06-24 20:29:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10673332214355469 s +INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.10863447189331055 s +DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=242626674229220205993105340406056708179, time:1750768171.4454274s req_ids:[8] +DEBUG 06-24 20:29:31 [manager.py:391] +ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:206.83693885803223ms total_cost_time:206.8798542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12108 prompt_cache_len:5151 prompt_cache_ratio:0.42542120911793857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 +DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:31 [batch.py:51] router release req id 8 +INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10852646827697754 s +INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.11045527458190918 s +DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=48758708796930037597639548439056340982, time:1750768171.6596942s req_ids:[8] +DEBUG 06-24 20:29:31 [manager.py:391] +ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:207.4294090270996ms total_cost_time:207.48305320739746ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12109 prompt_cache_len:5151 prompt_cache_ratio:0.42538607647204557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 +DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:31 [batch.py:51] router release req id 8 +INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10778522491455078 s +INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.10981559753417969 s +DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=272955928308009136555634738260367954766, time:1750768171.8713665s req_ids:[8] +DEBUG 06-24 20:29:31 [manager.py:391] +ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:208.08029174804688ms total_cost_time:208.1005573272705ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12110 prompt_cache_len:5151 prompt_cache_ratio:0.42535094962840625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 +DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:31 [batch.py:51] router release req id 8 +INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.10798001289367676 s +INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.10903787612915039 s +DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=318367565156790952807932832103755302053, time:1750768172.085849s req_ids:[8] +DEBUG 06-24 20:29:32 [manager.py:391] +ERROR 06-24 20:29:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:210.8771800994873ms total_cost_time:210.9205722808838ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12111 prompt_cache_len:5151 prompt_cache_ratio:0.42531582858558337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 +DEBUG 06-24 20:29:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:32 [batch.py:51] router release req id 8 +INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.1084296703338623 s +INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.11047840118408203 s +DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=170958238831819508335816881486197010368, time:1750768172.305708s req_ids:[8] +DEBUG 06-24 20:29:32 [manager.py:391] +ERROR 06-24 20:29:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 first_token_cost:214.141845703125ms total_cost_time:214.18499946594238ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12112 prompt_cache_len:5151 prompt_cache_ratio:0.42528071334214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 +DEBUG 06-24 20:29:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:32 [batch.py:51] router release req id 8 +INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.10783791542053223 s +INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.10988259315490723 s +DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=167358468822389908429829839415235154314, time:1750768172.5217946s req_ids:[8] +DEBUG 06-24 20:29:32 [manager.py:391] +DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 first_token_cost:214.5087718963623ms total_cost_time:214.52856063842773ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12113 prompt_cache_len:5151 prompt_cache_ratio:0.42524560389663996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 +DEBUG 06-24 20:29:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:32 [batch.py:51] router release req id 8 +INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.10714292526245117 s +INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.10904979705810547 s +DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=182252764770211330427295001337290632939, time:1750768172.7527468s req_ids:[8] +DEBUG 06-24 20:29:32 [manager.py:391] +ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 first_token_cost:466.52674674987793ms total_cost_time:466.5699005126953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12114 prompt_cache_len:5151 prompt_cache_ratio:0.42521050024764734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 +DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:33 [batch.py:51] router release req id 8 +INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10701942443847656 s +INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.10898375511169434 s +DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=211870241969576258847396309048078760472, time:1750768173.220652s req_ids:[8] +DEBUG 06-24 20:29:33 [manager.py:391] +ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:199.15318489074707ms total_cost_time:199.19419288635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:12115 prompt_cache_len:5151 prompt_cache_ratio:0.4251754023937268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 +DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:33 [batch.py:51] router release req id 8 +INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10884308815002441 s +INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s +DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=129042883847863911997991559641541396522, time:1750768173.4225862s req_ids:[8] +DEBUG 06-24 20:29:33 [manager.py:391] +ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:210.34550666809082ms total_cost_time:210.39080619812012ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12116 prompt_cache_len:5151 prompt_cache_ratio:0.4251403103334434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 +DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:33 [batch.py:51] router release req id 8 +INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10856413841247559 s +INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.11077713966369629 s +DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=188578579090110593454401424097026925564, time:1750768173.6380475s req_ids:[8] +DEBUG 06-24 20:29:33 [manager.py:391] +ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:195.79195976257324ms total_cost_time:195.83511352539062ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12117 prompt_cache_len:5151 prompt_cache_ratio:0.4251052240653627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 +DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:33 [batch.py:51] router release req id 8 +INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10697126388549805 s +INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.10905814170837402 s +DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=83991496340752764788674241838320348895, time:1750768173.8392944s req_ids:[8] +DEBUG 06-24 20:29:33 [manager.py:391] +ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:198.9607810974121ms total_cost_time:199.0034580230713ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12118 prompt_cache_len:5151 prompt_cache_ratio:0.42507014358805084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 +DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:33 [batch.py:51] router release req id 8 +INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10820174217224121 s +INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.11004185676574707 s +DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=177838862255100211575082958917290642402, time:1750768174.0454166s req_ids:[8] +DEBUG 06-24 20:29:34 [manager.py:391] +ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:212.32175827026367ms total_cost_time:212.36634254455566ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12119 prompt_cache_len:5151 prompt_cache_ratio:0.42503506890007425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 +DEBUG 06-24 20:29:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:34 [batch.py:51] router release req id 8 +INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10479593276977539 s +INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.10675048828125 s +DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=93604372281114938889007454708273917158, time:1750768174.2800455s req_ids:[8] +DEBUG 06-24 20:29:34 [manager.py:391] +ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:223.32167625427246ms total_cost_time:223.3431339263916ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:12120 prompt_cache_len:5151 prompt_cache_ratio:0.425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 +DEBUG 06-24 20:29:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:34 [batch.py:51] router release req id 8 +INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10472822189331055 s +INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.10650777816772461 s +DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=237680985195980529432043951656984835388, time:1750768174.4935327s req_ids:[8] +DEBUG 06-24 20:29:34 [manager.py:391] +ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:29:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 48981.434 tokens/s +DEBUG 06-24 20:29:34 [stats.py:37] Avg prompt tokens throughput: 48973.340 tokens/s +DEBUG 06-24 20:29:34 [stats.py:37] Avg generate tokens throughput: 8.094 tokens/s +INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:397.5844383239746ms total_cost_time:397.60541915893555ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12121 prompt_cache_len:5151 prompt_cache_ratio:0.42496493688639553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 +DEBUG 06-24 20:29:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:34 [batch.py:51] router release req id 8 +INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10394072532653809 s +INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.10608887672424316 s +DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=340251787563024515904180343620191232042, time:1750768174.89506s req_ids:[8] +DEBUG 06-24 20:29:34 [manager.py:391] +ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:213.59729766845703ms total_cost_time:213.623046875ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:12122 prompt_cache_len:5151 prompt_cache_ratio:0.42492987955782874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 +DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:35 [batch.py:51] router release req id 8 +INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s +INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10959959030151367 s +DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=38087546243068647744450745187189798854, time:1750768175.1265378s req_ids:[8] +DEBUG 06-24 20:29:35 [manager.py:391] +ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:222.55682945251465ms total_cost_time:222.59855270385742ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12123 prompt_cache_len:5151 prompt_cache_ratio:0.4248948280128681 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 +DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:35 [batch.py:51] router release req id 8 +INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.1068277359008789 s +INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10873675346374512 s +DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=247974355530550800301929459138736217589, time:1750768175.3402405s req_ids:[8] +DEBUG 06-24 20:29:35 [manager.py:391] +ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:166.17608070373535ms total_cost_time:166.20326042175293ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:12124 prompt_cache_len:5151 prompt_cache_ratio:0.4248597822500825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 +DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:35 [batch.py:51] router release req id 8 +INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10379743576049805 s +INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.1058194637298584 s +DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=330505091437266184205680050235447745184, time:1750768175.5147407s req_ids:[8] +DEBUG 06-24 20:29:35 [manager.py:391] +ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:189.93330001831055ms total_cost_time:189.97883796691895ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12125 prompt_cache_len:5151 prompt_cache_ratio:0.42482474226804123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 +DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:35 [batch.py:51] router release req id 8 +INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10780191421508789 s +INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10963988304138184 s +DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=185816330843267776792410407042811653170, time:1750768175.7088065s req_ids:[8] +DEBUG 06-24 20:29:35 [manager.py:391] +ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:207.86213874816895ms total_cost_time:207.92460441589355ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12126 prompt_cache_len:5151 prompt_cache_ratio:0.4247897080653142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 +DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:35 [batch.py:51] router release req id 8 +INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10345077514648438 s +INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10556721687316895 s +DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=296696299523953309787553340212737802537, time:1750768175.9229565s req_ids:[8] +DEBUG 06-24 20:29:35 [manager.py:391] +ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:206.58469200134277ms total_cost_time:206.6943645477295ms,out_token_counter:1 mean_per_token_cost_time: 0.10967254638671875ms prompt_token_num:12127 prompt_cache_len:5151 prompt_cache_ratio:0.4247546796404717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 +DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:36 [batch.py:51] router release req id 8 +INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.3104877471923828 s +INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.3124821186065674 s +DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=266823302585691637180315172780938891650, time:1750768176.3454165s req_ids:[8] +DEBUG 06-24 20:29:36 [manager.py:391] +ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:429.2929172515869ms total_cost_time:429.3365478515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12128 prompt_cache_len:5151 prompt_cache_ratio:0.4247196569920844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 +DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:36 [batch.py:51] router release req id 8 +INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.10708117485046387 s +INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.10895419120788574 s +DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=101337991100535601861372599709578179226, time:1750768176.5711436s req_ids:[8] +DEBUG 06-24 20:29:36 [manager.py:391] +ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:204.98013496398926ms total_cost_time:205.02400398254395ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12129 prompt_cache_len:5151 prompt_cache_ratio:0.4246846401187237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 +DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:36 [batch.py:51] router release req id 8 +INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.10731315612792969 s +INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.10942864418029785 s +DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=320588487500075745209625008448723033215, time:1750768176.784979s req_ids:[8] +DEBUG 06-24 20:29:36 [manager.py:391] +ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:202.34346389770508ms total_cost_time:202.38494873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12130 prompt_cache_len:5151 prompt_cache_ratio:0.42464962901896125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 +DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:36 [batch.py:51] router release req id 8 +INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.10741925239562988 s +INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.10958194732666016 s +DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=274478493294682165936667351053412392201, time:1750768176.9906409s req_ids:[8] +DEBUG 06-24 20:29:36 [manager.py:391] +ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:208.52351188659668ms total_cost_time:208.5566520690918ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:12131 prompt_cache_len:5151 prompt_cache_ratio:0.4246146236913692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 +DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:37 [batch.py:51] router release req id 8 +INFO 06-24 20:29:37 [manager.py:224] router recive req id 8 cost time 0.10922741889953613 s +INFO 06-24 20:29:37 [manager.py:68] detokenization recv req id 8 cost time 0.1116025447845459 s +DEBUG 06-24 20:29:37 [manager.py:391] Prefill Batch: batch_id=303396176667515746647835122489615290390, time:1750768177.2081375s req_ids:[8] +DEBUG 06-24 20:29:37 [manager.py:391] +ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:214.01047706604004ms total_cost_time:214.05529975891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12132 prompt_cache_len:5151 prompt_cache_ratio:0.4245796241345203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 +DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:37 [batch.py:51] router release req id 8 +INFO 06-24 20:29:37 [manager.py:224] router recive req id 8 cost time 0.10756254196166992 s +INFO 06-24 20:29:37 [manager.py:68] detokenization recv req id 8 cost time 0.10868310928344727 s +DEBUG 06-24 20:29:37 [manager.py:391] Prefill Batch: batch_id=211517298371529451079678221545897883939, time:1750768177.4400837s req_ids:[8] +DEBUG 06-24 20:29:37 [manager.py:391] +ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:215.7762050628662ms total_cost_time:215.8203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12133 prompt_cache_len:5151 prompt_cache_ratio:0.42454463034698753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 +DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:37 [batch.py:51] router release req id 8 +INFO 06-24 20:29:37 [manager.py:224] router recive req id 8 cost time 0.10770273208618164 s +INFO 06-24 20:29:37 [manager.py:68] detokenization recv req id 8 cost time 0.10964846611022949 s +DEBUG 06-24 20:29:37 [manager.py:391] Prefill Batch: batch_id=287526713420067493771072525195589226624, time:1750768177.6485612s req_ids:[8] +DEBUG 06-24 20:29:37 [manager.py:391] +ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:433.3231449127197ms total_cost_time:433.3674907684326ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12134 prompt_cache_len:5151 prompt_cache_ratio:0.42450964232734467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 +DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:37 [batch.py:51] router release req id 8 +INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.1068880558013916 s +INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.10875082015991211 s +DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=299019802727512495401886066888243376768, time:1750768178.0881457s req_ids:[8] +DEBUG 06-24 20:29:38 [manager.py:391] +ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:194.4119930267334ms total_cost_time:194.45514678955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12135 prompt_cache_len:5151 prompt_cache_ratio:0.4244746600741656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 +DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:38 [batch.py:51] router release req id 8 +INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.11001801490783691 s +INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.11194562911987305 s +DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=293640633088328358692089650157625841013, time:1750768178.2882721s req_ids:[8] +DEBUG 06-24 20:29:38 [manager.py:391] +ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:202.27742195129395ms total_cost_time:202.32057571411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12136 prompt_cache_len:5151 prompt_cache_ratio:0.4244396835860251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 +DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:38 [batch.py:51] router release req id 8 +INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.10827493667602539 s +INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.11028718948364258 s +INFO 06-24 20:29:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=91474710794998932117667021703695323800, time:1750768178.497649s req_ids:[8] +DEBUG 06-24 20:29:38 [manager.py:391] +ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:210.10208129882812ms total_cost_time:210.1449966430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12137 prompt_cache_len:5151 prompt_cache_ratio:0.4244047128614979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 +DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:38 [batch.py:51] router release req id 8 +INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.10760807991027832 s +INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.10959100723266602 s +DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=273654388223973306707767411786764149111, time:1750768178.71371s req_ids:[8] +DEBUG 06-24 20:29:38 [manager.py:391] +ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:214.2951488494873ms total_cost_time:214.32876586914062ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:12138 prompt_cache_len:5151 prompt_cache_ratio:0.42436974789915966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 +DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:38 [batch.py:51] router release req id 8 +INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.10804414749145508 s +INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.10996055603027344 s +DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=299812576089764118226394740390914428456, time:1750768178.9318209s req_ids:[8] +DEBUG 06-24 20:29:38 [manager.py:391] +ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:210.98995208740234ms total_cost_time:211.03525161743164ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12139 prompt_cache_len:5151 prompt_cache_ratio:0.4243347886975863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 +DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:39 [batch.py:51] router release req id 8 +INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10892844200134277 s +INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.11086010932922363 s +DEBUG 06-24 20:29:39 [manager.py:391] Prefill Batch: batch_id=154744768876847331147203381500540538521, time:1750768179.1485958s req_ids:[8] +DEBUG 06-24 20:29:39 [manager.py:391] +ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:213.76967430114746ms total_cost_time:213.81282806396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12140 prompt_cache_len:5151 prompt_cache_ratio:0.4242998352553542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 +DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:39 [batch.py:51] router release req id 8 +INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10664749145507812 s +INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.10846304893493652 s +DEBUG 06-24 20:29:39 [manager.py:391] Prefill Batch: batch_id=73931406336905467925483704022051540666, time:1750768179.3809977s req_ids:[8] +DEBUG 06-24 20:29:39 [manager.py:391] +ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:398.29087257385254ms total_cost_time:398.33521842956543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12141 prompt_cache_len:5151 prompt_cache_ratio:0.42426488757104025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 +DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:39 [batch.py:51] router release req id 8 +INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10951638221740723 s +INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.11161923408508301 s +DEBUG 06-24 20:29:39 [manager.py:391] Prefill Batch: batch_id=277083700891311873169974563267248248947, time:1750768179.7729344s req_ids:[8] +DEBUG 06-24 20:29:39 [manager.py:391] +ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:219.86889839172363ms total_cost_time:219.91348266601562ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12142 prompt_cache_len:5151 prompt_cache_ratio:0.42422994564322186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 +DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:39 [batch.py:51] router release req id 8 +INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10893988609313965 s +INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.11100363731384277 s +DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=243846156091639138550355281703165646622, time:1750768180.0062387s req_ids:[8] +DEBUG 06-24 20:29:40 [manager.py:391] +ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:213.56201171875ms total_cost_time:213.6056423187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12143 prompt_cache_len:5151 prompt_cache_ratio:0.42419500947047684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 +DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:40 [batch.py:51] router release req id 8 +INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10751962661743164 s +INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.10953688621520996 s +DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=320506661473848973693081198587941181604, time:1750768180.2206395s req_ids:[8] +DEBUG 06-24 20:29:40 [manager.py:391] +DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:212.005615234375ms total_cost_time:212.0504379272461ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12144 prompt_cache_len:5151 prompt_cache_ratio:0.4241600790513834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 +DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:40 [batch.py:51] router release req id 8 +INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10741853713989258 s +INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.10940098762512207 s +DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=101530776342567304696827395056945737142, time:1750768180.4375465s req_ids:[8] +DEBUG 06-24 20:29:40 [manager.py:391] +ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:214.39194679260254ms total_cost_time:214.43796157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12145 prompt_cache_len:5151 prompt_cache_ratio:0.42412515438452036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 +DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:40 [batch.py:51] router release req id 8 +INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10986089706420898 s +INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.11182475090026855 s +DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=99749869772888058729032452796643206262, time:1750768180.6585805s req_ids:[8] +DEBUG 06-24 20:29:40 [manager.py:391] +ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:210.3593349456787ms total_cost_time:210.4039192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12146 prompt_cache_len:5151 prompt_cache_ratio:0.42409023546846697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 +DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:40 [batch.py:51] router release req id 8 +INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10764169692993164 s +INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.1095728874206543 s +DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=34509305398129527457885702384426558269, time:1750768180.8735676s req_ids:[8] +DEBUG 06-24 20:29:40 [manager.py:391] +ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:209.59901809692383ms total_cost_time:209.65218544006348ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12147 prompt_cache_len:5151 prompt_cache_ratio:0.42405532230180293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 +DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:41 [batch.py:51] router release req id 8 +INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.10712289810180664 s +INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.1095585823059082 s +DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=160614028960100045664904650843145082380, time:1750768181.090413s req_ids:[8] +DEBUG 06-24 20:29:41 [manager.py:391] +ERROR 06-24 20:29:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:372.50685691833496ms total_cost_time:372.5428581237793ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:12148 prompt_cache_len:5151 prompt_cache_ratio:0.4240204148831083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 +DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:41 [batch.py:51] router release req id 8 +INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.1082758903503418 s +INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s +DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=335037007657085370143853193853880506168, time:1750768181.4700856s req_ids:[8] +DEBUG 06-24 20:29:41 [manager.py:391] +ERROR 06-24 20:29:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 first_token_cost:219.80977058410645ms total_cost_time:219.85220909118652ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12149 prompt_cache_len:5151 prompt_cache_ratio:0.42398551321096384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 +DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:41 [batch.py:51] router release req id 8 +INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.10841536521911621 s +INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s +DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=209488674385187664357349825387953496696, time:1750768181.711448s req_ids:[8] +DEBUG 06-24 20:29:41 [manager.py:391] +ERROR 06-24 20:29:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 first_token_cost:223.9232063293457ms total_cost_time:223.9665985107422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12150 prompt_cache_len:5151 prompt_cache_ratio:0.4239506172839506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 +DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:41 [batch.py:51] router release req id 8 +INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.1082618236541748 s +INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.11025333404541016 s +DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=272302216997286973164207061678113043520, time:1750768181.9262633s req_ids:[8] +DEBUG 06-24 20:29:41 [manager.py:391] +ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 first_token_cost:206.6061496734619ms total_cost_time:206.6507339477539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12151 prompt_cache_len:5151 prompt_cache_ratio:0.4239157271006502 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 +DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:42 [batch.py:51] router release req id 8 +INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.10799884796142578 s +INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.10992264747619629 s +DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=266167416153714575625739143886908559783, time:1750768182.144679s req_ids:[8] +DEBUG 06-24 20:29:42 [manager.py:391] +ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:214.5977020263672ms total_cost_time:214.64204788208008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12152 prompt_cache_len:5151 prompt_cache_ratio:0.4238808426596445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 +DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:42 [batch.py:51] router release req id 8 +INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.1085977554321289 s +INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.11084485054016113 s +DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=174877347978795779002794556765975467165, time:1750768182.3599591s req_ids:[8] +DEBUG 06-24 20:29:42 [manager.py:391] +ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:207.9782485961914ms total_cost_time:208.0214023590088ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12153 prompt_cache_len:5151 prompt_cache_ratio:0.4238459639595162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 +DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:42 [batch.py:51] router release req id 8 +INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.10742831230163574 s +INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.10930538177490234 s +DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=233632911218233230919337225802050433634, time:1750768182.575026s req_ids:[8] +DEBUG 06-24 20:29:42 [manager.py:391] +ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:207.93581008911133ms total_cost_time:207.9601287841797ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12154 prompt_cache_len:5151 prompt_cache_ratio:0.4238110909988481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 +DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:42 [batch.py:51] router release req id 8 +INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.10899043083190918 s +INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.11174154281616211 s +DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=242474598248843056928570337155729084008, time:1750768182.797736s req_ids:[8] +DEBUG 06-24 20:29:42 [manager.py:391] +ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:436.2657070159912ms total_cost_time:436.3124370574951ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12155 prompt_cache_len:5151 prompt_cache_ratio:0.42377622377622376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 +DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:43 [batch.py:51] router release req id 8 +INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10761833190917969 s +INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.10961651802062988 s +DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=10465098573057244097341410946427095255, time:1750768183.2338896s req_ids:[8] +DEBUG 06-24 20:29:43 [manager.py:391] +ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:189.26334381103516ms total_cost_time:189.30792808532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12156 prompt_cache_len:5151 prompt_cache_ratio:0.4237413622902271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 +DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:43 [batch.py:51] router release req id 8 +INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10841226577758789 s +INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.11041378974914551 s +DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=294499036573211132589107662243983826616, time:1750768183.432093s req_ids:[8] +DEBUG 06-24 20:29:43 [manager.py:391] +ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:205.97147941589355ms total_cost_time:206.00152015686035ms,out_token_counter:1 mean_per_token_cost_time: 0.030040740966796875ms prompt_token_num:12157 prompt_cache_len:5151 prompt_cache_ratio:0.4237065065394423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 +DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:43 [batch.py:51] router release req id 8 +INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10593271255493164 s +INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.10737156867980957 s +DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=135947984617785326558541513592512919015, time:1750768183.6573155s req_ids:[8] +DEBUG 06-24 20:29:43 [manager.py:391] +ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:226.92060470581055ms total_cost_time:226.96781158447266ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12158 prompt_cache_len:5151 prompt_cache_ratio:0.42367165652245437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 +DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:43 [batch.py:51] router release req id 8 +INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10831475257873535 s +INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.11059975624084473 s +DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=224292292182442271426088769871114855854, time:1750768183.878471s req_ids:[8] +DEBUG 06-24 20:29:43 [manager.py:391] +ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:214.66565132141113ms total_cost_time:214.71047401428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12159 prompt_cache_len:5151 prompt_cache_ratio:0.4236368122378485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 +DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:44 [batch.py:51] router release req id 8 +INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.10890579223632812 s +INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.11093759536743164 s +DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=189790924707943558271519743040481770936, time:1750768184.1055315s req_ids:[8] +DEBUG 06-24 20:29:44 [manager.py:391] +ERROR 06-24 20:29:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:216.11809730529785ms total_cost_time:216.16268157958984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12160 prompt_cache_len:5151 prompt_cache_ratio:0.42360197368421054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 +DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:44 [batch.py:51] router release req id 8 +INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.1073462963104248 s +INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.10948419570922852 s +DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=102542918702157905810401951991910124658, time:1750768184.3209329s req_ids:[8] +DEBUG 06-24 20:29:44 [manager.py:391] +ERROR 06-24 20:29:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 first_token_cost:204.85758781433105ms total_cost_time:204.90097999572754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12161 prompt_cache_len:5151 prompt_cache_ratio:0.42356714086012665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 +DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:44 [batch.py:51] router release req id 8 +INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.3101236820220947 s +INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.3122124671936035 s +DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=126447758517731243043021672157564236232, time:1750768184.7301917s req_ids:[8] +DEBUG 06-24 20:29:44 [manager.py:391] +ERROR 06-24 20:29:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:29:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 49553.073 tokens/s +DEBUG 06-24 20:29:44 [stats.py:37] Avg prompt tokens throughput: 49544.912 tokens/s +DEBUG 06-24 20:29:44 [stats.py:37] Avg generate tokens throughput: 8.161 tokens/s +INFO 06-24 20:29:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 first_token_cost:403.98502349853516ms total_cost_time:404.02936935424805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12162 prompt_cache_len:5151 prompt_cache_ratio:0.42353231376418354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 +DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:44 [batch.py:51] router release req id 8 +INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.10929608345031738 s +INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.11118340492248535 s +DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=320082874408314361850912757727754183200, time:1750768184.9445252s req_ids:[8] +DEBUG 06-24 20:29:44 [manager.py:391] +ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 first_token_cost:204.32567596435547ms total_cost_time:204.36978340148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12163 prompt_cache_len:5151 prompt_cache_ratio:0.42349749239496837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 +DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:45 [batch.py:51] router release req id 8 +INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.11109209060668945 s +INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.11311793327331543 s +DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=85731737004447914737908609810465876105, time:1750768185.1537778s req_ids:[8] +DEBUG 06-24 20:29:45 [manager.py:391] +ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:204.93006706237793ms total_cost_time:204.97560501098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12164 prompt_cache_len:5151 prompt_cache_ratio:0.42346267675106875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 +DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:45 [batch.py:51] router release req id 8 +INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10690593719482422 s +INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10880446434020996 s +DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=267938917893489809970455281780547095519, time:1750768185.3749826s req_ids:[8] +DEBUG 06-24 20:29:45 [manager.py:391] +ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:210.56151390075684ms total_cost_time:210.6027603149414ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12165 prompt_cache_len:5151 prompt_cache_ratio:0.42342786683107275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 +DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:45 [batch.py:51] router release req id 8 +INFO 06-24 20:29:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10727405548095703 s +INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10934710502624512 s +DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=78118183127760232846884953276331547799, time:1750768185.5794668s req_ids:[8] +DEBUG 06-24 20:29:45 [manager.py:391] +ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:197.24559783935547ms total_cost_time:197.27540016174316ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:12166 prompt_cache_len:5151 prompt_cache_ratio:0.42339306263356896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 +DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:45 [batch.py:51] router release req id 8 +INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10703635215759277 s +INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10910558700561523 s +DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=217005223284509705024797721609925231750, time:1750768185.7862613s req_ids:[8] +DEBUG 06-24 20:29:45 [manager.py:391] +ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:208.32538604736328ms total_cost_time:208.36901664733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12167 prompt_cache_len:5151 prompt_cache_ratio:0.4233582641571464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 +DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:45 [batch.py:51] router release req id 8 +INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10714221000671387 s +INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10832548141479492 s +DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=76818082194802183936069854855328380604, time:1750768185.9987824s req_ids:[8] +DEBUG 06-24 20:29:45 [manager.py:391] +ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:390.32793045043945ms total_cost_time:390.37179946899414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12168 prompt_cache_len:5151 prompt_cache_ratio:0.4233234714003945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 +DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:46 [batch.py:51] router release req id 8 +INFO 06-24 20:29:46 [manager.py:224] router recive req id 8 cost time 0.10760164260864258 s +INFO 06-24 20:29:46 [manager.py:68] detokenization recv req id 8 cost time 0.10961031913757324 s +DEBUG 06-24 20:29:46 [manager.py:391] Prefill Batch: batch_id=168314316334823366951202047765362901887, time:1750768186.397484s req_ids:[8] +DEBUG 06-24 20:29:46 [manager.py:391] +DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:205.55591583251953ms total_cost_time:205.60002326965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12169 prompt_cache_len:5151 prompt_cache_ratio:0.4232886843619032 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 +DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:46 [batch.py:51] router release req id 8 +INFO 06-24 20:29:46 [manager.py:224] router recive req id 8 cost time 0.10843205451965332 s +INFO 06-24 20:29:46 [manager.py:68] detokenization recv req id 8 cost time 0.11043238639831543 s +DEBUG 06-24 20:29:46 [manager.py:391] Prefill Batch: batch_id=170881595729915399115352785711680323964, time:1750768186.6088967s req_ids:[8] +DEBUG 06-24 20:29:46 [manager.py:391] +ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:210.5264663696289ms total_cost_time:210.5705738067627ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12170 prompt_cache_len:5151 prompt_cache_ratio:0.42325390304026295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 +DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:46 [batch.py:51] router release req id 8 +INFO 06-24 20:29:46 [manager.py:224] router recive req id 8 cost time 0.10804629325866699 s +INFO 06-24 20:29:46 [manager.py:68] detokenization recv req id 8 cost time 0.10986661911010742 s +DEBUG 06-24 20:29:46 [manager.py:391] Prefill Batch: batch_id=116492470160440233095906006976801441343, time:1750768186.8278282s req_ids:[8] +DEBUG 06-24 20:29:46 [manager.py:391] +ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:196.10261917114258ms total_cost_time:196.15817070007324ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:12171 prompt_cache_len:5151 prompt_cache_ratio:0.4232191274340646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 +DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:46 [batch.py:51] router release req id 8 +INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10854530334472656 s +INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.11046648025512695 s +DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=218320981371046368683635872008856679111, time:1750768187.0387578s req_ids:[8] +DEBUG 06-24 20:29:47 [manager.py:391] +INFO 06-24 20:29:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:219.59638595581055ms total_cost_time:219.64097023010254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12172 prompt_cache_len:5151 prompt_cache_ratio:0.4231843575418994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 +DEBUG 06-24 20:29:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:47 [batch.py:51] router release req id 8 +INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10848855972290039 s +INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.11041784286499023 s +DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=61003533996318641876576730223892620553, time:1750768187.2587843s req_ids:[8] +DEBUG 06-24 20:29:47 [manager.py:391] +ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:214.0650749206543ms total_cost_time:214.1265869140625ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:12173 prompt_cache_len:5151 prompt_cache_ratio:0.4231495933623593 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 +DEBUG 06-24 20:29:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:47 [batch.py:51] router release req id 8 +INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10982322692871094 s +INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.1124720573425293 s +DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=180633088556351709089101623329278888726, time:1750768187.477732s req_ids:[8] +DEBUG 06-24 20:29:47 [manager.py:391] +ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:213.75417709350586ms total_cost_time:213.79828453063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12174 prompt_cache_len:5151 prompt_cache_ratio:0.42311483489403645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 +DEBUG 06-24 20:29:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:47 [batch.py:51] router release req id 8 +INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10857558250427246 s +INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.11053204536437988 s +DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=309088754221688251971910421420722188861, time:1750768187.6985695s req_ids:[8] +DEBUG 06-24 20:29:47 [manager.py:391] +ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:385.6179714202881ms total_cost_time:385.6487274169922ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:12175 prompt_cache_len:5151 prompt_cache_ratio:0.4230800821355236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 +DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:48 [batch.py:51] router release req id 8 +INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10617232322692871 s +INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.10718226432800293 s +DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=221719335839050511271809210786160998330, time:1750768188.0934258s req_ids:[8] +DEBUG 06-24 20:29:48 [manager.py:391] +ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:215.88397026062012ms total_cost_time:215.93070030212402ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12176 prompt_cache_len:5151 prompt_cache_ratio:0.4230453350854139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 +DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:48 [batch.py:51] router release req id 8 +INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10999631881713867 s +INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.11194968223571777 s +DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=34323341395267746680765741202392449331, time:1750768188.3122678s req_ids:[8] +DEBUG 06-24 20:29:48 [manager.py:391] +ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:211.5931510925293ms total_cost_time:211.6372585296631ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12177 prompt_cache_len:5151 prompt_cache_ratio:0.42301059374230104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 +DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:48 [batch.py:51] router release req id 8 +INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.1068727970123291 s +INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.10817503929138184 s +DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=181755108791294670926485546829099233105, time:1750768188.531519s req_ids:[8] +DEBUG 06-24 20:29:48 [manager.py:391] +ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:208.1904411315918ms total_cost_time:208.2235813140869ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:12178 prompt_cache_len:5151 prompt_cache_ratio:0.4229758581047791 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 +DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:48 [batch.py:51] router release req id 8 +INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10853028297424316 s +INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.11055850982666016 s +DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=193632884841415902248969811673320232515, time:1750768188.7459388s req_ids:[8] +DEBUG 06-24 20:29:48 [manager.py:391] +ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:212.91685104370117ms total_cost_time:212.96119689941406ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12179 prompt_cache_len:5151 prompt_cache_ratio:0.42294112817144264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 +DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:48 [batch.py:51] router release req id 8 +INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10788846015930176 s +INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.10929298400878906 s +DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=175758928480388158086502245296753172541, time:1750768188.9642086s req_ids:[8] +DEBUG 06-24 20:29:48 [manager.py:391] +ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:211.0581398010254ms total_cost_time:211.10129356384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12180 prompt_cache_len:5151 prompt_cache_ratio:0.4229064039408867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 +DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:49 [batch.py:51] router release req id 8 +INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10778284072875977 s +INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.10960984230041504 s +DEBUG 06-24 20:29:49 [manager.py:391] Prefill Batch: batch_id=33160263357180468239623693256781480660, time:1750768189.1826067s req_ids:[8] +DEBUG 06-24 20:29:49 [manager.py:391] +ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:191.8947696685791ms total_cost_time:191.9384002685547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12181 prompt_cache_len:5151 prompt_cache_ratio:0.4228716854117068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 +DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:49 [batch.py:51] router release req id 8 +INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10841012001037598 s +INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.11052632331848145 s +DEBUG 06-24 20:29:49 [manager.py:391] Prefill Batch: batch_id=330685325507145879947394611796803360776, time:1750768189.3750062s req_ids:[8] +DEBUG 06-24 20:29:49 [manager.py:391] +ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:416.5351390838623ms total_cost_time:416.5785312652588ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12182 prompt_cache_len:5151 prompt_cache_ratio:0.42283697258249875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 +DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:49 [batch.py:51] router release req id 8 +INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10864400863647461 s +INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.11057043075561523 s +DEBUG 06-24 20:29:49 [manager.py:391] Prefill Batch: batch_id=42395687684910660019181109703880261296, time:1750768189.7965722s req_ids:[8] +DEBUG 06-24 20:29:49 [manager.py:391] +ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:195.43051719665527ms total_cost_time:195.47343254089355ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12183 prompt_cache_len:5151 prompt_cache_ratio:0.42280226545185917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 +DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:49 [batch.py:51] router release req id 8 +INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10907411575317383 s +INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.11137890815734863 s +DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=220969251188201438835985218328290347006, time:1750768190.0065565s req_ids:[8] +DEBUG 06-24 20:29:50 [manager.py:391] +ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:214.45178985595703ms total_cost_time:214.4942283630371ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12184 prompt_cache_len:5151 prompt_cache_ratio:0.4227675640183848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 +DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:50 [batch.py:51] router release req id 8 +INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10765385627746582 s +INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.10962677001953125 s +DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=271015334585051035747435236562315210014, time:1750768190.2225957s req_ids:[8] +DEBUG 06-24 20:29:50 [manager.py:391] +ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:209.56850051879883ms total_cost_time:209.6114158630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12185 prompt_cache_len:5151 prompt_cache_ratio:0.42273286828067297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 +DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:50 [batch.py:51] router release req id 8 +INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10761880874633789 s +INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.1098017692565918 s +DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=263104274312922737206454966593609951467, time:1750768190.438858s req_ids:[8] +DEBUG 06-24 20:29:50 [manager.py:391] +ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:205.64651489257812ms total_cost_time:205.6901454925537ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12186 prompt_cache_len:5151 prompt_cache_ratio:0.42269817823732153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 +DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:50 [batch.py:51] router release req id 8 +INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10959672927856445 s +INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.11152839660644531 s +DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=312109309368404542595604790255909398049, time:1750768190.6521397s req_ids:[8] +DEBUG 06-24 20:29:50 [manager.py:391] +ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:214.7970199584961ms total_cost_time:214.83898162841797ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12187 prompt_cache_len:5151 prompt_cache_ratio:0.4226634938869287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 +DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:50 [batch.py:51] router release req id 8 +INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10756492614746094 s +INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.1095428466796875 s +DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=194698969904767767711353166367489058982, time:1750768190.8709557s req_ids:[8] +DEBUG 06-24 20:29:50 [manager.py:391] +ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:211.84754371643066ms total_cost_time:211.89308166503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12188 prompt_cache_len:5151 prompt_cache_ratio:0.4226288152280932 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 +DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:51 [batch.py:51] router release req id 8 +INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10759758949279785 s +INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.1094980239868164 s +DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=155051735574091712619333999205373242602, time:1750768191.0907052s req_ids:[8] +DEBUG 06-24 20:29:51 [manager.py:391] +ERROR 06-24 20:29:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:398.8053798675537ms total_cost_time:398.8497257232666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12189 prompt_cache_len:5151 prompt_cache_ratio:0.4225941422594142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 +DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:51 [batch.py:51] router release req id 8 +INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10907363891601562 s +INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.11108851432800293 s +DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=182754504918799968094821158261553265218, time:1750768191.4948857s req_ids:[8] +DEBUG 06-24 20:29:51 [manager.py:391] +ERROR 06-24 20:29:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 first_token_cost:215.46316146850586ms total_cost_time:215.50822257995605ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12190 prompt_cache_len:5151 prompt_cache_ratio:0.42255947497949137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 +DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:51 [batch.py:51] router release req id 8 +INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10598063468933105 s +INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.1080014705657959 s +DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=272234118373456590691380941996986651667, time:1750768191.7162604s req_ids:[8] +DEBUG 06-24 20:29:51 [manager.py:391] +ERROR 06-24 20:29:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 first_token_cost:211.9293212890625ms total_cost_time:211.9755744934082ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12191 prompt_cache_len:5151 prompt_cache_ratio:0.42252481338692477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 +DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:51 [batch.py:51] router release req id 8 +INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10761785507202148 s +INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.10962533950805664 s +DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=100019831845121334773935005948475127847, time:1750768191.9334652s req_ids:[8] +DEBUG 06-24 20:29:51 [manager.py:391] +ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 first_token_cost:208.59789848327637ms total_cost_time:208.64176750183105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12192 prompt_cache_len:5151 prompt_cache_ratio:0.422490157480315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 +DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:52 [batch.py:51] router release req id 8 +INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.1091618537902832 s +INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.11111879348754883 s +DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=296326609781880951083228869860197627991, time:1750768192.1484108s req_ids:[8] +DEBUG 06-24 20:29:52 [manager.py:391] +ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:210.4947566986084ms total_cost_time:210.5398178100586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12193 prompt_cache_len:5151 prompt_cache_ratio:0.4224555072582629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 +DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:52 [batch.py:51] router release req id 8 +INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.1084127426147461 s +INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.11028766632080078 s +DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=134360504680921163504800492198622094739, time:1750768192.3647637s req_ids:[8] +DEBUG 06-24 20:29:52 [manager.py:391] +DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:212.42904663085938ms total_cost_time:212.47529983520508ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12194 prompt_cache_len:5151 prompt_cache_ratio:0.4224208627193702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 +DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:52 [batch.py:51] router release req id 8 +INFO 06-24 20:29:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.10878872871398926 s +INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.10988306999206543 s +DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=213878484901855483289117411883993635866, time:1750768192.583026s req_ids:[8] +DEBUG 06-24 20:29:52 [manager.py:391] +ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:388.7009620666504ms total_cost_time:388.7448310852051ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12195 prompt_cache_len:5151 prompt_cache_ratio:0.4223862238622386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 +DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:52 [batch.py:51] router release req id 8 +INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.10746550559997559 s +INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.10958170890808105 s +DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=198849002999616071786872246818190122225, time:1750768192.9792356s req_ids:[8] +DEBUG 06-24 20:29:52 [manager.py:391] +ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:211.49587631225586ms total_cost_time:211.53950691223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12196 prompt_cache_len:5151 prompt_cache_ratio:0.4223515906854706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 +DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:53 [batch.py:51] router release req id 8 +INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.10799837112426758 s +INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.10996270179748535 s +DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=256295755621699275400233179874920948272, time:1750768193.203448s req_ids:[8] +DEBUG 06-24 20:29:53 [manager.py:391] +INFO 06-24 20:29:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:29:53 [statics_utils.py:24] mean first cost: 229.2316868352051 ms +INFO 06-24 20:29:53 [statics_utils.py:24] mean per token cost: 0.06209368451184434 ms +ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:221.13037109375ms total_cost_time:221.18401527404785ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12197 prompt_cache_len:5151 prompt_cache_ratio:0.4223169631876691 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 +DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:53 [batch.py:51] router release req id 8 +INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.11002993583679199 s +INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.11200380325317383 s +DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=230212472641820579308381706472208574813, time:1750768193.4235344s req_ids:[8] +DEBUG 06-24 20:29:53 [manager.py:391] +ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:212.54420280456543ms total_cost_time:212.59093284606934ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12198 prompt_cache_len:5151 prompt_cache_ratio:0.4222823413674373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 +DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:53 [batch.py:51] router release req id 8 +INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s +INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.11050224304199219 s +DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=243377964981261607834274361776529131413, time:1750768193.6438625s req_ids:[8] +DEBUG 06-24 20:29:53 [manager.py:391] +ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:202.87823677062988ms total_cost_time:202.92353630065918ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12199 prompt_cache_len:5151 prompt_cache_ratio:0.42224772522337894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 +DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:53 [batch.py:51] router release req id 8 +INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.10809111595153809 s +INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.11005735397338867 s +DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=166287887725784252309711182642872837359, time:1750768193.8531113s req_ids:[8] +DEBUG 06-24 20:29:53 [manager.py:391] +ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:212.0966911315918ms total_cost_time:212.14056015014648ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12200 prompt_cache_len:5151 prompt_cache_ratio:0.42221311475409834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 +DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:53 [batch.py:51] router release req id 8 +INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10737371444702148 s +INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.10951876640319824 s +DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=270470000415247780029787379749601948666, time:1750768194.070722s req_ids:[8] +DEBUG 06-24 20:29:54 [manager.py:391] +ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:215.12341499328613ms total_cost_time:215.1656150817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12201 prompt_cache_len:5151 prompt_cache_ratio:0.42217850995820017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 +DEBUG 06-24 20:29:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:54 [batch.py:51] router release req id 8 +INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10961484909057617 s +INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.11087155342102051 s +DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=120973974663546999344761708560767831082, time:1750768194.2927775s req_ids:[8] +DEBUG 06-24 20:29:54 [manager.py:391] +ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 first_token_cost:381.44898414611816ms total_cost_time:381.49499893188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12202 prompt_cache_len:5151 prompt_cache_ratio:0.4221439108342895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 +DEBUG 06-24 20:29:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:54 [batch.py:51] router release req id 8 +INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10790729522705078 s +INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.11005663871765137 s +DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=230845293765229685668772367903587622216, time:1750768194.692315s req_ids:[8] +DEBUG 06-24 20:29:54 [manager.py:391] +ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 first_token_cost:229.59136962890625ms total_cost_time:229.63786125183105ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12203 prompt_cache_len:5151 prompt_cache_ratio:0.4221093173809719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 +DEBUG 06-24 20:29:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:54 [batch.py:51] router release req id 8 +INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10763049125671387 s +INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974860191345215 s +DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=64532123653523451403975341756351049315, time:1750768194.915187s req_ids:[8] +DEBUG 06-24 20:29:54 [manager.py:391] +DEBUG 06-24 20:29:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 50696.768 tokens/s +DEBUG 06-24 20:29:54 [stats.py:37] Avg prompt tokens throughput: 50688.546 tokens/s +DEBUG 06-24 20:29:54 [stats.py:37] Avg generate tokens throughput: 8.222 tokens/s +ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 first_token_cost:200.68669319152832ms total_cost_time:200.7291316986084ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12204 prompt_cache_len:5151 prompt_cache_ratio:0.4220747295968535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 +DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:55 [batch.py:51] router release req id 8 +INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.1075139045715332 s +INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.10956621170043945 s +DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=174789071221659748748659808691613022960, time:1750768195.1233928s req_ids:[8] +DEBUG 06-24 20:29:55 [manager.py:391] +ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:211.03429794311523ms total_cost_time:211.08198165893555ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:12205 prompt_cache_len:5151 prompt_cache_ratio:0.42204014748054075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 +DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:55 [batch.py:51] router release req id 8 +INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.10753846168518066 s +INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s +DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=21946604903621948101558953481608279943, time:1750768195.3488533s req_ids:[8] +DEBUG 06-24 20:29:55 [manager.py:391] +ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:223.59466552734375ms total_cost_time:223.63877296447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12206 prompt_cache_len:5151 prompt_cache_ratio:0.4220055710306407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 +DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:55 [batch.py:51] router release req id 8 +INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.10864949226379395 s +INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.1106104850769043 s +DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=324877073254419139717140680023012489456, time:1750768195.571614s req_ids:[8] +DEBUG 06-24 20:29:55 [manager.py:391] +ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:215.12389183044434ms total_cost_time:215.16704559326172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12207 prompt_cache_len:5151 prompt_cache_ratio:0.4219710002457606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 +DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:55 [batch.py:51] router release req id 8 +INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.10839724540710449 s +INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.11033987998962402 s +DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=88709957330802666707882512597175323037, time:1750768195.7911735s req_ids:[8] +DEBUG 06-24 20:29:55 [manager.py:391] +ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:213.0591869354248ms total_cost_time:213.1044864654541ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12208 prompt_cache_len:5151 prompt_cache_ratio:0.4219364351245085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 +DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:56 [batch.py:51] router release req id 8 +INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.30864405632019043 s +INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.30989933013916016 s +DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=149911680844091086834960921970536942243, time:1750768196.2311616s req_ids:[8] +DEBUG 06-24 20:29:56 [manager.py:391] +ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:439.4218921661377ms total_cost_time:439.4674301147461ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12209 prompt_cache_len:5151 prompt_cache_ratio:0.42190187566549264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 +DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:56 [batch.py:51] router release req id 8 +INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.10854387283325195 s +INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.11057209968566895 s +DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=314928825654595878395294572656663279924, time:1750768196.4573262s req_ids:[8] +DEBUG 06-24 20:29:56 [manager.py:391] +ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:213.26255798339844ms total_cost_time:213.30618858337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12210 prompt_cache_len:5151 prompt_cache_ratio:0.42186732186732184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 +DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:56 [batch.py:51] router release req id 8 +INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.10867643356323242 s +INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.11025071144104004 s +DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=212974228945281010053650328097761563742, time:1750768196.6761694s req_ids:[8] +DEBUG 06-24 20:29:56 [manager.py:391] +ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:213.1185531616211ms total_cost_time:213.13858032226562ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:12211 prompt_cache_len:5151 prompt_cache_ratio:0.42183277372860534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 +DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:56 [batch.py:51] router release req id 8 +INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.10543107986450195 s +INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.10740208625793457 s +DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=186497737881388926214587045756563372766, time:1750768196.8972974s req_ids:[8] +DEBUG 06-24 20:29:56 [manager.py:391] +ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:211.93337440490723ms total_cost_time:211.9770050048828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12212 prompt_cache_len:5151 prompt_cache_ratio:0.42179823124795285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 +DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:57 [batch.py:51] router release req id 8 +INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10774612426757812 s +INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.10979723930358887 s +DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=42302541233567716255078482049575838002, time:1750768197.1114984s req_ids:[8] +DEBUG 06-24 20:29:57 [manager.py:391] +ERROR 06-24 20:29:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:212.21137046813965ms total_cost_time:212.25428581237793ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12213 prompt_cache_len:5151 prompt_cache_ratio:0.42176369442397443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 +DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:57 [batch.py:51] router release req id 8 +INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10866260528564453 s +INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.1108863353729248 s +DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=59991711168008145214565920835719570270, time:1750768197.3304152s req_ids:[8] +DEBUG 06-24 20:29:57 [manager.py:391] +ERROR 06-24 20:29:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 first_token_cost:212.80694007873535ms total_cost_time:212.85033226013184ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12214 prompt_cache_len:5151 prompt_cache_ratio:0.4217291632552808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 +DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:57 [batch.py:51] router release req id 8 +INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10835456848144531 s +INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.11040687561035156 s +DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=138189075710236636975551262908225617544, time:1750768197.5490565s req_ids:[8] +DEBUG 06-24 20:29:57 [manager.py:391] +ERROR 06-24 20:29:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 first_token_cost:426.67102813720703ms total_cost_time:426.6924858093262ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:12215 prompt_cache_len:5151 prompt_cache_ratio:0.421694637740483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 +DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:57 [batch.py:51] router release req id 8 +INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10684442520141602 s +INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.10890817642211914 s +DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=269424159929223819430223750631840536781, time:1750768197.9795697s req_ids:[8] +DEBUG 06-24 20:29:57 [manager.py:391] +ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 first_token_cost:196.4249610900879ms total_cost_time:196.46763801574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12216 prompt_cache_len:5151 prompt_cache_ratio:0.4216601178781925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 +DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:58 [batch.py:51] router release req id 8 +INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.1089639663696289 s +INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.11094450950622559 s +DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=212289360542422141815130932947205913914, time:1750768198.1820931s req_ids:[8] +DEBUG 06-24 20:29:58 [manager.py:391] +ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:206.42423629760742ms total_cost_time:206.465482711792ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12217 prompt_cache_len:5151 prompt_cache_ratio:0.42162560366702134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 +DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:58 [batch.py:51] router release req id 8 +INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.10757565498352051 s +INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.10975098609924316 s +DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=31322629924996369606285235890291950074, time:1750768198.3979747s req_ids:[8] +DEBUG 06-24 20:29:58 [manager.py:391] +ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:209.78569984436035ms total_cost_time:209.82742309570312ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12218 prompt_cache_len:5151 prompt_cache_ratio:0.4215910951055819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 +DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:58 [batch.py:51] router release req id 8 +INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.10819792747497559 s +INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.11033511161804199 s +DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=186976929980878033613835932806704051648, time:1750768198.611314s req_ids:[8] +DEBUG 06-24 20:29:58 [manager.py:391] +ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:209.7926139831543ms total_cost_time:209.83600616455078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12219 prompt_cache_len:5151 prompt_cache_ratio:0.4215565921924871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 +DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:58 [batch.py:51] router release req id 8 +INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.10823535919189453 s +INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.1095738410949707 s +DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=281022501244372775081175534750421134954, time:1750768198.8274364s req_ids:[8] +DEBUG 06-24 20:29:58 [manager.py:391] +ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:211.5161418914795ms total_cost_time:211.5774154663086ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12220 prompt_cache_len:5151 prompt_cache_ratio:0.42152209492635023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 +DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:58 [batch.py:51] router release req id 8 +INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10717892646789551 s +INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.10920190811157227 s +DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=209068910636064759632128654394353618319, time:1750768199.0570753s req_ids:[8] +DEBUG 06-24 20:29:59 [manager.py:391] +ERROR 06-24 20:29:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:225.85511207580566ms total_cost_time:225.91781616210938ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:12221 prompt_cache_len:5151 prompt_cache_ratio:0.4214876033057851 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 +DEBUG 06-24 20:29:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:59 [batch.py:51] router release req id 8 +INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10770058631896973 s +INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.1103363037109375 s +DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=124682717165854309284680093298754718710, time:1750768199.275804s req_ids:[8] +DEBUG 06-24 20:29:59 [manager.py:391] +ERROR 06-24 20:29:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 first_token_cost:383.21518898010254ms total_cost_time:383.2581043243408ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12222 prompt_cache_len:5151 prompt_cache_ratio:0.421453117329406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 +DEBUG 06-24 20:29:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:59 [batch.py:51] router release req id 8 +INFO 06-24 20:29:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10812139511108398 s +INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.11014556884765625 s +DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=257629345262822897237682290826919725156, time:1750768199.6749105s req_ids:[8] +DEBUG 06-24 20:29:59 [manager.py:391] +ERROR 06-24 20:29:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:29:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:29:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 first_token_cost:220.60632705688477ms total_cost_time:220.65162658691406ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12223 prompt_cache_len:5151 prompt_cache_ratio:0.42141863699582754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:29:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 +DEBUG 06-24 20:29:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:29:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:29:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:29:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:29:59 [batch.py:51] router release req id 8 +INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10750102996826172 s +INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.10946416854858398 s +DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=105733838948125109711221831472642206304, time:1750768199.9011562s req_ids:[8] +DEBUG 06-24 20:29:59 [manager.py:391] +ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 first_token_cost:221.19688987731934ms total_cost_time:221.24099731445312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12224 prompt_cache_len:5151 prompt_cache_ratio:0.42138416230366493 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 +DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:00 [batch.py:51] router release req id 8 +INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10985517501831055 s +INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.1116797924041748 s +DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=239150139542468687021986318228541490250, time:1750768200.1236277s req_ids:[8] +DEBUG 06-24 20:30:00 [manager.py:391] +DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:210.40749549865723ms total_cost_time:210.4930877685547ms,out_token_counter:1 mean_per_token_cost_time: 0.08559226989746094ms prompt_token_num:12225 prompt_cache_len:5151 prompt_cache_ratio:0.42134969325153376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 +DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:00 [batch.py:51] router release req id 8 +INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10820627212524414 s +INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.11021041870117188 s +DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=250776445460641421919660783214769869221, time:1750768200.341464s req_ids:[8] +DEBUG 06-24 20:30:00 [manager.py:391] +ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:211.78507804870605ms total_cost_time:211.84563636779785ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:12226 prompt_cache_len:5151 prompt_cache_ratio:0.42131522983805003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 +DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:00 [batch.py:51] router release req id 8 +INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10751128196716309 s +INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.10954570770263672 s +DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=3137178737192594429649629810434968238, time:1750768200.5603104s req_ids:[8] +DEBUG 06-24 20:30:00 [manager.py:391] +ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:211.11226081848145ms total_cost_time:211.15756034851074ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12227 prompt_cache_len:5151 prompt_cache_ratio:0.4212807720618304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 +DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:00 [batch.py:51] router release req id 8 +INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10753989219665527 s +INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.10951042175292969 s +DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=56765746329624138983222567213829190126, time:1750768200.7787929s req_ids:[8] +DEBUG 06-24 20:30:00 [manager.py:391] +ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:207.17096328735352ms total_cost_time:207.2162628173828ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12228 prompt_cache_len:5151 prompt_cache_ratio:0.42124631992149164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 +DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:01 [batch.py:51] router release req id 8 +INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.30974626541137695 s +INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.3117485046386719 s +DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=80295407941590718457609173205414950581, time:1750768201.220171s req_ids:[8] +DEBUG 06-24 20:30:01 [manager.py:391] +ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:447.0634460449219ms total_cost_time:447.1089839935303ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12229 prompt_cache_len:5151 prompt_cache_ratio:0.4212118734156513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 +DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:01 [batch.py:51] router release req id 8 +INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.10863971710205078 s +INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.11047911643981934 s +DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=47003045478135918197875118563808060911, time:1750768201.4457228s req_ids:[8] +DEBUG 06-24 20:30:01 [manager.py:391] +ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:215.45791625976562ms total_cost_time:215.5013084411621ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12230 prompt_cache_len:5151 prompt_cache_ratio:0.4211774325429272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 +DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:01 [batch.py:51] router release req id 8 +INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.10921883583068848 s +INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.11119771003723145 s +DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=70643724831300006794159550924948553056, time:1750768201.665076s req_ids:[8] +DEBUG 06-24 20:30:01 [manager.py:391] +DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:206.44211769104004ms total_cost_time:206.48884773254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12231 prompt_cache_len:5151 prompt_cache_ratio:0.4211429973019377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 +DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:01 [batch.py:51] router release req id 8 +INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.10700225830078125 s +INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.10881161689758301 s +DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=109000147315457364259830850608393302140, time:1750768201.87676s req_ids:[8] +DEBUG 06-24 20:30:01 [manager.py:391] +ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:168.440580368042ms total_cost_time:168.4863567352295ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12232 prompt_cache_len:5151 prompt_cache_ratio:0.4211085676913015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 +DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:01 [batch.py:51] router release req id 8 +INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.10832786560058594 s +INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s +DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=93610471073346529630925938967718199442, time:1750768202.051116s req_ids:[8] +DEBUG 06-24 20:30:02 [manager.py:391] +ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:208.22548866271973ms total_cost_time:208.27031135559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12233 prompt_cache_len:5151 prompt_cache_ratio:0.42107414370963786 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 +DEBUG 06-24 20:30:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:02 [batch.py:51] router release req id 8 +INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.10794425010681152 s +INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.1099238395690918 s +DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=26347632179070337265035697964086891923, time:1750768202.2656589s req_ids:[8] +DEBUG 06-24 20:30:02 [manager.py:391] +ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:212.97335624694824ms total_cost_time:213.03439140319824ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12234 prompt_cache_len:5151 prompt_cache_ratio:0.42103972535556644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 +DEBUG 06-24 20:30:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:02 [batch.py:51] router release req id 8 +INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.10867953300476074 s +INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.11065387725830078 s +DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=235928491134191707130881427958066580049, time:1750768202.4862142s req_ids:[8] +DEBUG 06-24 20:30:02 [manager.py:391] +ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:387.8004550933838ms total_cost_time:387.84313201904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12235 prompt_cache_len:5151 prompt_cache_ratio:0.4210053126277074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 +DEBUG 06-24 20:30:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:02 [batch.py:51] router release req id 8 +INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.1082460880279541 s +INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.11024022102355957 s +DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=187532577999371130932475151552017991985, time:1750768202.8797696s req_ids:[8] +DEBUG 06-24 20:30:02 [manager.py:391] +ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:213.59539031982422ms total_cost_time:213.6375904083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12236 prompt_cache_len:5151 prompt_cache_ratio:0.42097090552468125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 +DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:03 [batch.py:51] router release req id 8 +INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10901260375976562 s +INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.11109161376953125 s +DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=330992609334842800756070959800509745407, time:1750768203.1018188s req_ids:[8] +DEBUG 06-24 20:30:03 [manager.py:391] +ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:215.64960479736328ms total_cost_time:215.7003879547119ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12237 prompt_cache_len:5151 prompt_cache_ratio:0.4209365040451091 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 +DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:03 [batch.py:51] router release req id 8 +INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10756039619445801 s +INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.10955977439880371 s +DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=119095991258329836554252970702074307728, time:1750768203.3333497s req_ids:[8] +DEBUG 06-24 20:30:03 [manager.py:391] +ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:223.921537399292ms total_cost_time:223.96516799926758ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12238 prompt_cache_len:5151 prompt_cache_ratio:0.42090210818761237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 +DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:03 [batch.py:51] router release req id 8 +INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10901880264282227 s +INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.11094927787780762 s +DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=302626258717630095944716405526187505033, time:1750768203.5525932s req_ids:[8] +DEBUG 06-24 20:30:03 [manager.py:391] +ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:214.95509147644043ms total_cost_time:215.00349044799805ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:12239 prompt_cache_len:5151 prompt_cache_ratio:0.42086771795081296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 +DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:03 [batch.py:51] router release req id 8 +INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10814094543457031 s +INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.11014866828918457 s +DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=190428246061767179941517820551913878189, time:1750768203.7740333s req_ids:[8] +DEBUG 06-24 20:30:03 [manager.py:391] +ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:215.9872055053711ms total_cost_time:216.03941917419434ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:12240 prompt_cache_len:5151 prompt_cache_ratio:0.42083333333333334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 +DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:03 [batch.py:51] router release req id 8 +INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10788679122924805 s +INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.10993170738220215 s +DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=92574910114674893320954211055605365078, time:1750768203.9936142s req_ids:[8] +DEBUG 06-24 20:30:03 [manager.py:391] +ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:392.29846000671387ms total_cost_time:392.3196792602539ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12241 prompt_cache_len:5151 prompt_cache_ratio:0.42079895433379627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 +DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:04 [batch.py:51] router release req id 8 +INFO 06-24 20:30:04 [manager.py:224] router recive req id 8 cost time 0.10653567314147949 s +INFO 06-24 20:30:04 [manager.py:68] detokenization recv req id 8 cost time 0.10910224914550781 s +DEBUG 06-24 20:30:04 [manager.py:391] Prefill Batch: batch_id=306653002806314186990580466101328386228, time:1750768204.3956168s req_ids:[8] +DEBUG 06-24 20:30:04 [manager.py:391] +ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:215.9860134124756ms total_cost_time:216.03059768676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12242 prompt_cache_len:5151 prompt_cache_ratio:0.42076458095082503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 +DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:04 [batch.py:51] router release req id 8 +INFO 06-24 20:30:04 [manager.py:224] router recive req id 8 cost time 0.10900402069091797 s +INFO 06-24 20:30:04 [manager.py:68] detokenization recv req id 8 cost time 0.11116576194763184 s +DEBUG 06-24 20:30:04 [manager.py:391] Prefill Batch: batch_id=1038856400656002782278862147208797843, time:1750768204.6171536s req_ids:[8] +DEBUG 06-24 20:30:04 [manager.py:391] +ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:208.9364528656006ms total_cost_time:208.97984504699707ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12243 prompt_cache_len:5151 prompt_cache_ratio:0.4207302131830434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 +DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:04 [batch.py:51] router release req id 8 +INFO 06-24 20:30:04 [manager.py:224] router recive req id 8 cost time 0.10942864418029785 s +INFO 06-24 20:30:04 [manager.py:68] detokenization recv req id 8 cost time 0.1114652156829834 s +DEBUG 06-24 20:30:04 [manager.py:391] Prefill Batch: batch_id=166725632115383027526645103125973393015, time:1750768204.8319354s req_ids:[8] +DEBUG 06-24 20:30:04 [manager.py:391] +ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:30:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 48858.834 tokens/s +DEBUG 06-24 20:30:04 [stats.py:37] Avg prompt tokens throughput: 48850.741 tokens/s +DEBUG 06-24 20:30:04 [stats.py:37] Avg generate tokens throughput: 8.092 tokens/s +INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:212.3281955718994ms total_cost_time:212.39113807678223ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:12244 prompt_cache_len:5151 prompt_cache_ratio:0.4206958510290755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 +DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:04 [batch.py:51] router release req id 8 +INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.1081843376159668 s +INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11002802848815918 s +DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=269316204123715533953208734203234650233, time:1750768205.0486505s req_ids:[8] +DEBUG 06-24 20:30:05 [manager.py:391] +ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:213.31429481506348ms total_cost_time:213.35887908935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12245 prompt_cache_len:5151 prompt_cache_ratio:0.4206614944875459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 +DEBUG 06-24 20:30:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:05 [batch.py:51] router release req id 8 +INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.10881662368774414 s +INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11130237579345703 s +DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=52688240178524939505394464797015232811, time:1750768205.270461s req_ids:[8] +DEBUG 06-24 20:30:05 [manager.py:391] +ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:213.5481834411621ms total_cost_time:213.5932445526123ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12246 prompt_cache_len:5151 prompt_cache_ratio:0.42062714355707986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 +DEBUG 06-24 20:30:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:05 [batch.py:51] router release req id 8 +INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.10935044288635254 s +INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11130881309509277 s +DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=202087013659869897460412097078758596931, time:1750768205.4883513s req_ids:[8] +DEBUG 06-24 20:30:05 [manager.py:391] +ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:214.87808227539062ms total_cost_time:214.9221897125244ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12247 prompt_cache_len:5151 prompt_cache_ratio:0.42059279823630275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 +DEBUG 06-24 20:30:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:05 [batch.py:51] router release req id 8 +INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.10907649993896484 s +INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11110806465148926 s +DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=60859406898512209685746753944047818801, time:1750768205.7104766s req_ids:[8] +DEBUG 06-24 20:30:05 [manager.py:391] +ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:388.5009288787842ms total_cost_time:388.5457515716553ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12248 prompt_cache_len:5151 prompt_cache_ratio:0.4205584585238406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 +DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:06 [batch.py:51] router release req id 8 +INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.1081552505493164 s +INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.11027646064758301 s +DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=70348708539558374347036348308610846131, time:1750768206.1045418s req_ids:[8] +DEBUG 06-24 20:30:06 [manager.py:391] +ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:202.97551155090332ms total_cost_time:203.0181884765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12249 prompt_cache_len:5151 prompt_cache_ratio:0.4205241244183199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 +DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:06 [batch.py:51] router release req id 8 +DEBUG 06-24 20:30:06 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:06 [manager.py:283] +DEBUG 06-24 20:30:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:06 [manager.py:284] +INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10988640785217285 s +INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.11189556121826172 s +DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=291711606870264346976443458499033741340, time:1750768206.3153477s req_ids:[8] +DEBUG 06-24 20:30:06 [manager.py:391] +ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:211.09819412231445ms total_cost_time:211.1198902130127ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12250 prompt_cache_len:5151 prompt_cache_ratio:0.4204897959183673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 +DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:06 [batch.py:51] router release req id 8 +INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10431385040283203 s +INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.10567784309387207 s +DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=164728346363067285708802167510264490354, time:1750768206.5321105s req_ids:[8] +DEBUG 06-24 20:30:06 [manager.py:391] +ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:215.1780128479004ms total_cost_time:215.22283554077148ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12251 prompt_cache_len:5151 prompt_cache_ratio:0.4204554730226104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 +DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:06 [batch.py:51] router release req id 8 +INFO 06-24 20:30:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10775518417358398 s +INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.10988402366638184 s +DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=49955382392383198358322884733962557779, time:1750768206.7506554s req_ids:[8] +DEBUG 06-24 20:30:06 [manager.py:391] +ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:212.74328231811523ms total_cost_time:212.77308464050293ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:12252 prompt_cache_len:5151 prompt_cache_ratio:0.4204211557296768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 +DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:06 [batch.py:51] router release req id 8 +INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10827207565307617 s +INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.11032366752624512 s +DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=68896067827600173667486211999822503174, time:1750768206.9716234s req_ids:[8] +DEBUG 06-24 20:30:06 [manager.py:391] +ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:215.59596061706543ms total_cost_time:215.6236171722412ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:12253 prompt_cache_len:5151 prompt_cache_ratio:0.4203868440381947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 +DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:07 [batch.py:51] router release req id 8 +INFO 06-24 20:30:07 [manager.py:224] router recive req id 8 cost time 0.10726523399353027 s +INFO 06-24 20:30:07 [manager.py:68] detokenization recv req id 8 cost time 0.10924649238586426 s +DEBUG 06-24 20:30:07 [manager.py:391] Prefill Batch: batch_id=145185381265212426596394092547838932601, time:1750768207.2036583s req_ids:[8] +DEBUG 06-24 20:30:07 [manager.py:391] +ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:225.30412673950195ms total_cost_time:225.36444664001465ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:12254 prompt_cache_len:5151 prompt_cache_ratio:0.4203525379467929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 +DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:07 [batch.py:51] router release req id 8 +INFO 06-24 20:30:07 [manager.py:224] router recive req id 8 cost time 0.10876750946044922 s +INFO 06-24 20:30:07 [manager.py:68] detokenization recv req id 8 cost time 0.11066579818725586 s +DEBUG 06-24 20:30:07 [manager.py:391] Prefill Batch: batch_id=195030242213755093576653937022891884723, time:1750768207.4211543s req_ids:[8] +DEBUG 06-24 20:30:07 [manager.py:391] +ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:412.9354953765869ms total_cost_time:412.9812717437744ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12255 prompt_cache_len:5151 prompt_cache_ratio:0.42031823745410035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 +DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:07 [batch.py:51] router release req id 8 +INFO 06-24 20:30:07 [manager.py:224] router recive req id 8 cost time 0.10909008979797363 s +INFO 06-24 20:30:07 [manager.py:68] detokenization recv req id 8 cost time 0.11108613014221191 s +DEBUG 06-24 20:30:07 [manager.py:391] Prefill Batch: batch_id=226834278698162554795832408421805947899, time:1750768207.8397315s req_ids:[8] +DEBUG 06-24 20:30:07 [manager.py:391] +DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:210.65974235534668ms total_cost_time:210.70265769958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12256 prompt_cache_len:5151 prompt_cache_ratio:0.42028394255874674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 +DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:07 [batch.py:51] router release req id 8 +INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10799264907836914 s +INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.10993218421936035 s +DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=121834125848010825864435878704461604788, time:1750768208.057165s req_ids:[8] +DEBUG 06-24 20:30:08 [manager.py:391] +ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:168.93935203552246ms total_cost_time:168.98107528686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12257 prompt_cache_len:5151 prompt_cache_ratio:0.420249653259362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 +DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:08 [batch.py:51] router release req id 8 +INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10718059539794922 s +INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.10908365249633789 s +DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=83955664704928414327781374524796270783, time:1750768208.2346065s req_ids:[8] +DEBUG 06-24 20:30:08 [manager.py:391] +ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:171.45228385925293ms total_cost_time:171.4949607849121ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12258 prompt_cache_len:5151 prompt_cache_ratio:0.4202153695545766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 +DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:08 [batch.py:51] router release req id 8 +INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s +INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.11007046699523926 s +DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=126970056594558218454843025430544765818, time:1750768208.4130774s req_ids:[8] +DEBUG 06-24 20:30:08 [manager.py:391] +ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:212.36109733581543ms total_cost_time:212.40687370300293ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12259 prompt_cache_len:5151 prompt_cache_ratio:0.42018109144302146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 +DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:08 [batch.py:51] router release req id 8 +INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.1087195873260498 s +INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.11092805862426758 s +DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=116769097747641247597735877954382197320, time:1750768208.6321757s req_ids:[8] +DEBUG 06-24 20:30:08 [manager.py:391] +ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:212.2941017150879ms total_cost_time:212.3396396636963ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12260 prompt_cache_len:5151 prompt_cache_ratio:0.4201468189233279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 +DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:08 [batch.py:51] router release req id 8 +INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10870480537414551 s +INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.11073946952819824 s +DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=327094586887805217270978341566685377373, time:1750768208.848913s req_ids:[8] +DEBUG 06-24 20:30:08 [manager.py:391] +ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:390.1326656341553ms total_cost_time:390.17677307128906ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12261 prompt_cache_len:5151 prompt_cache_ratio:0.4201125519941277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 +DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:09 [batch.py:51] router release req id 8 +INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10870122909545898 s +INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.1107931137084961 s +DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=8567574741461879866753610962329878327, time:1750768209.245811s req_ids:[8] +DEBUG 06-24 20:30:09 [manager.py:391] +ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:214.951753616333ms total_cost_time:214.9956226348877ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12262 prompt_cache_len:5151 prompt_cache_ratio:0.42007829065405317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 +DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:09 [batch.py:51] router release req id 8 +INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10788869857788086 s +INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.10993432998657227 s +DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=311263609536021027412294858963794908240, time:1750768209.4660754s req_ids:[8] +DEBUG 06-24 20:30:09 [manager.py:391] +ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:212.6791477203369ms total_cost_time:212.7244472503662ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12263 prompt_cache_len:5151 prompt_cache_ratio:0.4200440349017369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 +DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:09 [batch.py:51] router release req id 8 +INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10905599594116211 s +INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.11109256744384766 s +DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=204759345154378090691675856619123849807, time:1750768209.685196s req_ids:[8] +DEBUG 06-24 20:30:09 [manager.py:391] +ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:212.89610862731934ms total_cost_time:212.94164657592773ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12264 prompt_cache_len:5151 prompt_cache_ratio:0.42000978473581213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 +DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:09 [batch.py:51] router release req id 8 +INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10842776298522949 s +INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.11048173904418945 s +DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=217393415738087548518167525004335582086, time:1750768209.9042857s req_ids:[8] +DEBUG 06-24 20:30:09 [manager.py:391] +ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:209.580659866333ms total_cost_time:209.62786674499512ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12265 prompt_cache_len:5151 prompt_cache_ratio:0.41997554015491234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 +DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:10 [batch.py:51] router release req id 8 +INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.10771536827087402 s +INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.11000561714172363 s +DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=73769647781656002862499770319571827903, time:1750768210.1209624s req_ids:[8] +DEBUG 06-24 20:30:10 [manager.py:391] +ERROR 06-24 20:30:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:215.95358848571777ms total_cost_time:215.99960327148438ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12266 prompt_cache_len:5151 prompt_cache_ratio:0.4199413011576716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 +DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:10 [batch.py:51] router release req id 8 +INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.1078639030456543 s +INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.10986709594726562 s +DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=119270155802948820070728089537598370295, time:1750768210.3516078s req_ids:[8] +DEBUG 06-24 20:30:10 [manager.py:391] +ERROR 06-24 20:30:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:220.24822235107422ms total_cost_time:220.2916145324707ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12267 prompt_cache_len:5151 prompt_cache_ratio:0.4199070677427244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 +DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:10 [batch.py:51] router release req id 8 +INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.10886907577514648 s +INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.11085677146911621 s +DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=311457074105700765898417825635140374519, time:1750768210.571553s req_ids:[8] +DEBUG 06-24 20:30:10 [manager.py:391] +ERROR 06-24 20:30:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:404.46925163269043ms total_cost_time:404.5143127441406ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12268 prompt_cache_len:5151 prompt_cache_ratio:0.41987283990870555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 +DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:10 [batch.py:51] router release req id 8 +INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.10792899131774902 s +INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.10988616943359375 s +DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=106523650154662671249854788398950984555, time:1750768210.9827905s req_ids:[8] +DEBUG 06-24 20:30:10 [manager.py:391] +ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:209.85770225524902ms total_cost_time:209.90300178527832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12269 prompt_cache_len:5151 prompt_cache_ratio:0.41983861765425057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 +DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:11 [batch.py:51] router release req id 8 +INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10760784149169922 s +INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.10960268974304199 s +DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=34625511700880369267905303733132738007, time:1750768211.2095954s req_ids:[8] +DEBUG 06-24 20:30:11 [manager.py:391] +ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:223.74582290649414ms total_cost_time:223.79016876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12270 prompt_cache_len:5151 prompt_cache_ratio:0.4198044009779951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 +DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:11 [batch.py:51] router release req id 8 +INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10821723937988281 s +INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.11035513877868652 s +DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=187964616290520193354582041924638940361, time:1750768211.4278483s req_ids:[8] +DEBUG 06-24 20:30:11 [manager.py:391] +ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:209.00917053222656ms total_cost_time:209.06376838684082ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:12271 prompt_cache_len:5151 prompt_cache_ratio:0.4197701898785755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 +DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:11 [batch.py:51] router release req id 8 +INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10881328582763672 s +INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.11073970794677734 s +DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=327209994397368789317369608647500120634, time:1750768211.6439855s req_ids:[8] +DEBUG 06-24 20:30:11 [manager.py:391] +ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:212.67366409301758ms total_cost_time:212.71610260009766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12272 prompt_cache_len:5151 prompt_cache_ratio:0.41973598435462844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 +DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:11 [batch.py:51] router release req id 8 +INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10771489143371582 s +INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.1098637580871582 s +DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=152566384265876434408300939557025266447, time:1750768211.8718204s req_ids:[8] +DEBUG 06-24 20:30:11 [manager.py:391] +ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:224.49469566345215ms total_cost_time:224.531888961792ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:12273 prompt_cache_len:5151 prompt_cache_ratio:0.419701784404791 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 +DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:12 [batch.py:51] router release req id 8 +INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.10841608047485352 s +INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.11052155494689941 s +DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=26611703205459395120396464541420283727, time:1750768212.0996208s req_ids:[8] +DEBUG 06-24 20:30:12 [manager.py:391] +ERROR 06-24 20:30:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:217.43273735046387ms total_cost_time:217.47612953186035ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12274 prompt_cache_len:5151 prompt_cache_ratio:0.4196675900277008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 +DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:12 [batch.py:51] router release req id 8 +INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.1086878776550293 s +INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.11076712608337402 s +DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=284324273951095787320748538978298563633, time:1750768212.3225946s req_ids:[8] +DEBUG 06-24 20:30:12 [manager.py:391] +ERROR 06-24 20:30:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 first_token_cost:379.76670265197754ms total_cost_time:379.81200218200684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12275 prompt_cache_len:5151 prompt_cache_ratio:0.41963340122199594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 +DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:12 [batch.py:51] router release req id 8 +INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.10868525505065918 s +INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.11060142517089844 s +DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=326579203069190064052218573364872873754, time:1750768212.7029383s req_ids:[8] +DEBUG 06-24 20:30:12 [manager.py:391] +ERROR 06-24 20:30:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 first_token_cost:213.3960723876953ms total_cost_time:213.4411334991455ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12276 prompt_cache_len:5151 prompt_cache_ratio:0.4195992179863148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 +DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:12 [batch.py:51] router release req id 8 +INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.10784053802490234 s +INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.1098170280456543 s +DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=334427082786688717670328296191861956164, time:1750768212.9231236s req_ids:[8] +DEBUG 06-24 20:30:12 [manager.py:391] +ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 first_token_cost:210.75868606567383ms total_cost_time:210.82210540771484ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:12277 prompt_cache_len:5151 prompt_cache_ratio:0.41956504031929626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 +DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:13 [batch.py:51] router release req id 8 +INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.10878562927246094 s +INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11080193519592285 s +DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=220147819322291395145810741222888342634, time:1750768213.139639s req_ids:[8] +DEBUG 06-24 20:30:13 [manager.py:391] +ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:209.75971221923828ms total_cost_time:209.8069190979004ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12278 prompt_cache_len:5151 prompt_cache_ratio:0.41953086821957974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 +DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:13 [batch.py:51] router release req id 8 +INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.10899972915649414 s +INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11113405227661133 s +DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=316927324915558698703921048075067128267, time:1750768213.3576663s req_ids:[8] +DEBUG 06-24 20:30:13 [manager.py:391] +ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:208.30869674682617ms total_cost_time:208.35256576538086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12279 prompt_cache_len:5151 prompt_cache_ratio:0.41949670168580505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 +DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:13 [batch.py:51] router release req id 8 +INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.1088724136352539 s +INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11107540130615234 s +DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=206139606078525187179088752187312733038, time:1750768213.5721948s req_ids:[8] +DEBUG 06-24 20:30:13 [manager.py:391] +ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:209.059476852417ms total_cost_time:209.10239219665527ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12280 prompt_cache_len:5151 prompt_cache_ratio:0.4194625407166124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 +DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:13 [batch.py:51] router release req id 8 +INFO 06-24 20:30:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.10875129699707031 s +INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11068177223205566 s +DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=152871604667802670178516378231573534170, time:1750768213.7874155s req_ids:[8] +DEBUG 06-24 20:30:13 [manager.py:391] +DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:212.53490447998047ms total_cost_time:212.57877349853516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12281 prompt_cache_len:5151 prompt_cache_ratio:0.41942838531064247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 +DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:14 [batch.py:51] router release req id 8 +INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.3106234073638916 s +INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.31250548362731934 s +DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=35408899537013279882574449970144099957, time:1750768214.2278163s req_ids:[8] +DEBUG 06-24 20:30:14 [manager.py:391] +ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:434.6632957458496ms total_cost_time:434.7085952758789ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12282 prompt_cache_len:5151 prompt_cache_ratio:0.41939423546653637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 +DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:14 [batch.py:51] router release req id 8 +INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.10974693298339844 s +INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.11162972450256348 s +DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=255322595240908329061791178551874471116, time:1750768214.4459279s req_ids:[8] +DEBUG 06-24 20:30:14 [manager.py:391] +ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:207.72957801818848ms total_cost_time:207.7772617340088ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:12283 prompt_cache_len:5151 prompt_cache_ratio:0.41936009118293577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 +DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:14 [batch.py:51] router release req id 8 +INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.10857224464416504 s +INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.11047625541687012 s +DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=319180040553376592275858233686237598524, time:1750768214.6595943s req_ids:[8] +DEBUG 06-24 20:30:14 [manager.py:391] +ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:205.0940990447998ms total_cost_time:205.1372528076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12284 prompt_cache_len:5151 prompt_cache_ratio:0.4193259524584826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 +DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:14 [batch.py:51] router release req id 8 +INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.10866999626159668 s +INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.11063003540039062 s +DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=165248541861191840379271098035928298562, time:1750768214.8722734s req_ids:[8] +DEBUG 06-24 20:30:14 [manager.py:391] +ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:30:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 50085.260 tokens/s +DEBUG 06-24 20:30:14 [stats.py:37] Avg prompt tokens throughput: 50077.094 tokens/s +DEBUG 06-24 20:30:14 [stats.py:37] Avg generate tokens throughput: 8.166 tokens/s +INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:211.4541530609131ms total_cost_time:211.49706840515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12285 prompt_cache_len:5151 prompt_cache_ratio:0.4192918192918193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 +DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:15 [batch.py:51] router release req id 8 +INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10718083381652832 s +INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.1092524528503418 s +DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=142439062501918909999817282146898394487, time:1750768215.0885985s req_ids:[8] +DEBUG 06-24 20:30:15 [manager.py:391] +ERROR 06-24 20:30:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:210.04796028137207ms total_cost_time:210.09397506713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12286 prompt_cache_len:5151 prompt_cache_ratio:0.4192576916815888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 +DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:15 [batch.py:51] router release req id 8 +INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10892486572265625 s +INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.11083388328552246 s +DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=323717495803503646380329902279718453848, time:1750768215.3075383s req_ids:[8] +DEBUG 06-24 20:30:15 [manager.py:391] +DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 first_token_cost:215.6851291656494ms total_cost_time:215.7275676727295ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12287 prompt_cache_len:5151 prompt_cache_ratio:0.4192235696264344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 +DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:15 [batch.py:51] router release req id 8 +INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10816454887390137 s +INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.11011362075805664 s +DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=280498292470406645513611029609521955612, time:1750768215.5295215s req_ids:[8] +DEBUG 06-24 20:30:15 [manager.py:391] +ERROR 06-24 20:30:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 first_token_cost:385.80894470214844ms total_cost_time:385.85567474365234ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12288 prompt_cache_len:5151 prompt_cache_ratio:0.419189453125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 +DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:15 [batch.py:51] router release req id 8 +INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10908889770507812 s +INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022496223449707 s +DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=159541928179216326227174337285333288617, time:1750768215.9230154s req_ids:[8] +DEBUG 06-24 20:30:15 [manager.py:391] +ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 first_token_cost:222.3207950592041ms total_cost_time:222.36299514770508ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12289 prompt_cache_len:5151 prompt_cache_ratio:0.4191553421759297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 +DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:16 [batch.py:51] router release req id 8 +INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10857319831848145 s +INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.11043071746826172 s +DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=317547751233177573621313497514028922574, time:1750768216.1565542s req_ids:[8] +DEBUG 06-24 20:30:16 [manager.py:391] +ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:216.98307991027832ms total_cost_time:217.0271873474121ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12290 prompt_cache_len:5151 prompt_cache_ratio:0.41912123677786817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 +DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:16 [batch.py:51] router release req id 8 +INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10961174964904785 s +INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.1115882396697998 s +DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=146630690807243504140236523240371656616, time:1750768216.3739161s req_ids:[8] +DEBUG 06-24 20:30:16 [manager.py:391] +ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:208.46891403198242ms total_cost_time:208.5118293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12291 prompt_cache_len:5151 prompt_cache_ratio:0.4190871369294606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 +DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:16 [batch.py:51] router release req id 8 +INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10622119903564453 s +INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.10732769966125488 s +DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=183913818176938679173598097788599764881, time:1750768216.5880268s req_ids:[8] +DEBUG 06-24 20:30:16 [manager.py:391] +ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:208.60934257507324ms total_cost_time:208.634614944458ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:12292 prompt_cache_len:5151 prompt_cache_ratio:0.41905304262935245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 +DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:16 [batch.py:51] router release req id 8 +INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10464000701904297 s +INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.10652327537536621 s +DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=252749128555520377695457858129818006532, time:1750768216.8018572s req_ids:[8] +DEBUG 06-24 20:30:16 [manager.py:391] +ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:208.93144607543945ms total_cost_time:208.95767211914062ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12293 prompt_cache_len:5151 prompt_cache_ratio:0.4190189538761897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 +DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:16 [batch.py:51] router release req id 8 +INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.10602879524230957 s +INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.1077885627746582 s +DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=305500924411200803363995687139668718167, time:1750768217.0210962s req_ids:[8] +DEBUG 06-24 20:30:17 [manager.py:391] +INFO 06-24 20:30:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:218.07479858398438ms total_cost_time:218.09935569763184ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:12294 prompt_cache_len:5151 prompt_cache_ratio:0.41898487066861884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 +DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:17 [batch.py:51] router release req id 8 +INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.1044456958770752 s +INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.10638999938964844 s +INFO 06-24 20:30:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=190907607336372260987264323645116104204, time:1750768217.2442908s req_ids:[8] +DEBUG 06-24 20:30:17 [manager.py:391] +ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:382.17735290527344ms total_cost_time:382.2033405303955ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12295 prompt_cache_len:5151 prompt_cache_ratio:0.4189507930052867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 +DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:17 [batch.py:51] router release req id 8 +INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.10595846176147461 s +INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.10791015625 s +DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=175694482358923262402626680987191023810, time:1750768217.626134s req_ids:[8] +DEBUG 06-24 20:30:17 [manager.py:391] +ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:210.22653579711914ms total_cost_time:210.25323867797852ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:12296 prompt_cache_len:5151 prompt_cache_ratio:0.4189167208848406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 +DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:17 [batch.py:51] router release req id 8 +INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.10581612586975098 s +INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.10776090621948242 s +DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=203332834193548005834256147153747306924, time:1750768217.8408647s req_ids:[8] +DEBUG 06-24 20:30:17 [manager.py:391] +ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:207.43489265441895ms total_cost_time:207.47876167297363ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12297 prompt_cache_len:5151 prompt_cache_ratio:0.41888265430592825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 +DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:17 [batch.py:51] router release req id 8 +INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.10782098770141602 s +INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.10978937149047852 s +DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=309721090638467427126788955681082847325, time:1750768218.0526311s req_ids:[8] +DEBUG 06-24 20:30:18 [manager.py:391] +ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:212.7523422241211ms total_cost_time:212.79549598693848ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12298 prompt_cache_len:5151 prompt_cache_ratio:0.4188485932671979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 +DEBUG 06-24 20:30:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:18 [batch.py:51] router release req id 8 +INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.1074228286743164 s +INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.10951972007751465 s +DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=231964861893324105947416580115367687232, time:1750768218.270329s req_ids:[8] +DEBUG 06-24 20:30:18 [manager.py:391] +ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:208.19330215454102ms total_cost_time:208.2362174987793ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12299 prompt_cache_len:5151 prompt_cache_ratio:0.41881453776729816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 +DEBUG 06-24 20:30:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:18 [batch.py:51] router release req id 8 +INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.10730552673339844 s +INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.1094820499420166 s +DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=90084042079271187875812918590054601047, time:1750768218.4847622s req_ids:[8] +DEBUG 06-24 20:30:18 [manager.py:391] +ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:212.08763122558594ms total_cost_time:212.13316917419434ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12300 prompt_cache_len:5151 prompt_cache_ratio:0.41878048780487803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 +DEBUG 06-24 20:30:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:18 [batch.py:51] router release req id 8 +INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.10750222206115723 s +INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.10939955711364746 s +DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=289885995674930448297700330981269122105, time:1750768218.7024384s req_ids:[8] +DEBUG 06-24 20:30:18 [manager.py:391] +ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:209.0892791748047ms total_cost_time:209.13338661193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12301 prompt_cache_len:5151 prompt_cache_ratio:0.41874644337858713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 +DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:19 [batch.py:51] router release req id 8 +INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.3092012405395508 s +INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.31110239028930664 s +DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=38127030148488292311809444343364004316, time:1750768219.135195s req_ids:[8] +DEBUG 06-24 20:30:19 [manager.py:391] +ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:431.77151679992676ms total_cost_time:431.7903518676758ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:12302 prompt_cache_len:5151 prompt_cache_ratio:0.41871240448707525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 +DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:19 [batch.py:51] router release req id 8 +INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10590791702270508 s +INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.10767960548400879 s +DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=45000330983815278105684239013301663375, time:1750768219.3538237s req_ids:[8] +DEBUG 06-24 20:30:19 [manager.py:391] +ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:206.5718173980713ms total_cost_time:206.61664009094238ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12303 prompt_cache_len:5151 prompt_cache_ratio:0.4186783711289929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 +DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:19 [batch.py:51] router release req id 8 +INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10823655128479004 s +INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.1099860668182373 s +DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=309989169019740880780485155574810673688, time:1750768219.565168s req_ids:[8] +DEBUG 06-24 20:30:19 [manager.py:391] +ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:205.9030532836914ms total_cost_time:205.9488296508789ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12304 prompt_cache_len:5151 prompt_cache_ratio:0.4186443433029909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 +DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:19 [batch.py:51] router release req id 8 +INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10740423202514648 s +INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.10986208915710449 s +DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=311964729791632439200582052398223360061, time:1750768219.7761662s req_ids:[8] +DEBUG 06-24 20:30:19 [manager.py:391] +ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:208.16850662231445ms total_cost_time:208.21261405944824ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12305 prompt_cache_len:5151 prompt_cache_ratio:0.41861032100772044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 +DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:19 [batch.py:51] router release req id 8 +INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10737133026123047 s +INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.10938715934753418 s +DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=271777441607213166242967730699355693145, time:1750768219.9894078s req_ids:[8] +DEBUG 06-24 20:30:19 [manager.py:391] +ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:204.66899871826172ms total_cost_time:204.7126293182373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12306 prompt_cache_len:5151 prompt_cache_ratio:0.41857630424183323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 +DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:20 [batch.py:51] router release req id 8 +INFO 06-24 20:30:20 [manager.py:224] router recive req id 8 cost time 0.10681414604187012 s +INFO 06-24 20:30:20 [manager.py:68] detokenization recv req id 8 cost time 0.10869765281677246 s +DEBUG 06-24 20:30:20 [manager.py:391] Prefill Batch: batch_id=290199570875732600437797211086623048126, time:1750768220.2007053s req_ids:[8] +DEBUG 06-24 20:30:20 [manager.py:391] +ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:207.94177055358887ms total_cost_time:207.9620361328125ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12307 prompt_cache_len:5151 prompt_cache_ratio:0.4185422930039815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 +DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:20 [batch.py:51] router release req id 8 +INFO 06-24 20:30:20 [manager.py:224] router recive req id 8 cost time 0.10592150688171387 s +INFO 06-24 20:30:20 [manager.py:68] detokenization recv req id 8 cost time 0.10789203643798828 s +DEBUG 06-24 20:30:20 [manager.py:391] Prefill Batch: batch_id=235226966213028544069639166104770228557, time:1750768220.4158187s req_ids:[8] +DEBUG 06-24 20:30:20 [manager.py:391] +ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:385.8811855316162ms total_cost_time:385.9260082244873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12308 prompt_cache_len:5151 prompt_cache_ratio:0.4185082872928177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 +DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:20 [batch.py:51] router release req id 8 +INFO 06-24 20:30:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:20 [manager.py:224] router recive req id 8 cost time 0.10760235786437988 s +INFO 06-24 20:30:20 [manager.py:68] detokenization recv req id 8 cost time 0.10938811302185059 s +DEBUG 06-24 20:30:20 [manager.py:391] Prefill Batch: batch_id=277900629765222991500043776884889623013, time:1750768220.8055282s req_ids:[8] +DEBUG 06-24 20:30:20 [manager.py:391] +ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:211.6565704345703ms total_cost_time:211.7006778717041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12309 prompt_cache_len:5151 prompt_cache_ratio:0.41847428710699486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 +DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:20 [batch.py:51] router release req id 8 +INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10920333862304688 s +INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.11106681823730469 s +DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=128748271447325596884409514291324043654, time:1750768221.023272s req_ids:[8] +DEBUG 06-24 20:30:21 [manager.py:391] +ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:213.13714981079102ms total_cost_time:213.1798267364502ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12310 prompt_cache_len:5151 prompt_cache_ratio:0.4184402924451665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 +DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:21 [batch.py:51] router release req id 8 +INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10619068145751953 s +INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10793685913085938 s +DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=276425693268558130205846185341956890668, time:1750768221.2412093s req_ids:[8] +DEBUG 06-24 20:30:21 [manager.py:391] +ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:209.63215827941895ms total_cost_time:209.65266227722168ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12311 prompt_cache_len:5151 prompt_cache_ratio:0.4184063033059865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 +DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:21 [batch.py:51] router release req id 8 +INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.1039571762084961 s +INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10574054718017578 s +DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=30019235178860542505242026576238862951, time:1750768221.468738s req_ids:[8] +DEBUG 06-24 20:30:21 [manager.py:391] +DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:231.60195350646973ms total_cost_time:231.64749145507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12312 prompt_cache_len:5151 prompt_cache_ratio:0.4183723196881092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 +DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:21 [batch.py:51] router release req id 8 +INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10718297958374023 s +INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10903096199035645 s +DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=6741900432493218484736035659638790538, time:1750768221.6883554s req_ids:[8] +DEBUG 06-24 20:30:21 [manager.py:391] +ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:210.58225631713867ms total_cost_time:210.62850952148438ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12313 prompt_cache_len:5151 prompt_cache_ratio:0.41833834159018923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 +DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:21 [batch.py:51] router release req id 8 +INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10804009437561035 s +INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10983681678771973 s +DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=11000193433863802559194163045036323685, time:1750768221.9032564s req_ids:[8] +DEBUG 06-24 20:30:21 [manager.py:391] +ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:385.2221965789795ms total_cost_time:385.2674961090088ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12314 prompt_cache_len:5151 prompt_cache_ratio:0.4183043690108819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 +DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:22 [batch.py:51] router release req id 8 +INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10812878608703613 s +INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.10997438430786133 s +DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=141112722764993977758868702564397449941, time:1750768222.2928276s req_ids:[8] +DEBUG 06-24 20:30:22 [manager.py:391] +ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:211.60483360290527ms total_cost_time:211.64941787719727ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12315 prompt_cache_len:5151 prompt_cache_ratio:0.41827040194884285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 +DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:22 [batch.py:51] router release req id 8 +INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10987401008605957 s +INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.11174726486206055 s +DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=125452825545383334236345434751927397468, time:1750768222.5105457s req_ids:[8] +DEBUG 06-24 20:30:22 [manager.py:391] +ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:214.28728103637695ms total_cost_time:214.33091163635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12316 prompt_cache_len:5151 prompt_cache_ratio:0.41823644040272817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 +DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:22 [batch.py:51] router release req id 8 +INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10790562629699707 s +INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.10974335670471191 s +DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=192811308354748180395200290406349671320, time:1750768222.7301226s req_ids:[8] +DEBUG 06-24 20:30:22 [manager.py:391] +ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:210.2980613708496ms total_cost_time:210.3421688079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12317 prompt_cache_len:5151 prompt_cache_ratio:0.4182024843711943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 +DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:22 [batch.py:51] router release req id 8 +INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10889387130737305 s +INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.11087632179260254 s +DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=268798155189771828048132465972705914485, time:1750768222.9463332s req_ids:[8] +DEBUG 06-24 20:30:22 [manager.py:391] +ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:210.0825309753418ms total_cost_time:210.13379096984863ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:12318 prompt_cache_len:5151 prompt_cache_ratio:0.4181685338528982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 +DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:23 [batch.py:51] router release req id 8 +INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10793662071228027 s +INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.10984659194946289 s +DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=183527022583178955269496482693212970114, time:1750768223.1624532s req_ids:[8] +DEBUG 06-24 20:30:23 [manager.py:391] +INFO 06-24 20:30:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:211.02404594421387ms total_cost_time:211.05027198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12319 prompt_cache_len:5151 prompt_cache_ratio:0.4181345888464973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 +DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:23 [batch.py:51] router release req id 8 +INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10472369194030762 s +INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.10656523704528809 s +DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=164709275216824695765698985800874702156, time:1750768223.3795993s req_ids:[8] +DEBUG 06-24 20:30:23 [manager.py:391] +ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:210.09588241577148ms total_cost_time:210.11781692504883ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12320 prompt_cache_len:5151 prompt_cache_ratio:0.41810064935064933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 +DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:23 [batch.py:51] router release req id 8 +INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10663127899169922 s +INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.10852575302124023 s +DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=68698354258610469532954112814209678665, time:1750768223.5964773s req_ids:[8] +DEBUG 06-24 20:30:23 [manager.py:391] +ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:395.0848579406738ms total_cost_time:395.129919052124ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12321 prompt_cache_len:5151 prompt_cache_ratio:0.4180667153640127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 +DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:23 [batch.py:51] router release req id 8 +INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10791945457458496 s +INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.11002564430236816 s +DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=267181954252160671907080280023731073419, time:1750768223.9958658s req_ids:[8] +DEBUG 06-24 20:30:23 [manager.py:391] +ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:211.75074577331543ms total_cost_time:211.81654930114746ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:12322 prompt_cache_len:5151 prompt_cache_ratio:0.4180327868852459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 +DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:24 [batch.py:51] router release req id 8 +INFO 06-24 20:30:24 [manager.py:224] router recive req id 8 cost time 0.10635256767272949 s +INFO 06-24 20:30:24 [manager.py:68] detokenization recv req id 8 cost time 0.10809111595153809 s +DEBUG 06-24 20:30:24 [manager.py:391] Prefill Batch: batch_id=159796958108498451253758886484286906480, time:1750768224.2190518s req_ids:[8] +DEBUG 06-24 20:30:24 [manager.py:391] +ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 first_token_cost:218.20950508117676ms total_cost_time:218.25456619262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12323 prompt_cache_len:5151 prompt_cache_ratio:0.4179988639130082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 +DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:24 [batch.py:51] router release req id 8 +INFO 06-24 20:30:24 [manager.py:224] router recive req id 8 cost time 0.10738348960876465 s +INFO 06-24 20:30:24 [manager.py:68] detokenization recv req id 8 cost time 0.10918140411376953 s +DEBUG 06-24 20:30:24 [manager.py:391] Prefill Batch: batch_id=150905350408843317622404754928711601137, time:1750768224.4372444s req_ids:[8] +DEBUG 06-24 20:30:24 [manager.py:391] +ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 first_token_cost:167.56677627563477ms total_cost_time:167.60969161987305ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12324 prompt_cache_len:5151 prompt_cache_ratio:0.4179649464459591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 +DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:24 [batch.py:51] router release req id 8 +INFO 06-24 20:30:24 [manager.py:224] router recive req id 8 cost time 0.10768246650695801 s +INFO 06-24 20:30:24 [manager.py:68] detokenization recv req id 8 cost time 0.10867881774902344 s +DEBUG 06-24 20:30:24 [manager.py:391] Prefill Batch: batch_id=163827043402540936352270611200631441723, time:1750768224.6118188s req_ids:[8] +DEBUG 06-24 20:30:24 [manager.py:391] +ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 first_token_cost:206.4507007598877ms total_cost_time:206.49409294128418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12325 prompt_cache_len:5151 prompt_cache_ratio:0.41793103448275865 mtp_avg_token_per_step:1.0 +DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:26 [batch.py:51] router release req id 8 +INFO 06-24 20:30:26 [manager.py:88] detokenize batch cost time 1346.8925952911377 ms +INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 +INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.007501125335693359 s +INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.009353399276733398 s +DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=149971353677183372869037845085130413049, time:1750768226.069404s req_ids:[8] +DEBUG 06-24 20:30:26 [manager.py:391] +DEBUG 06-24 20:30:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 45451.710 tokens/s +DEBUG 06-24 20:30:26 [stats.py:37] Avg prompt tokens throughput: 45444.414 tokens/s +DEBUG 06-24 20:30:26 [stats.py:37] Avg generate tokens throughput: 7.296 tokens/s +ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:70.87516784667969ms total_cost_time:70.91641426086426ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12326 prompt_cache_len:5151 prompt_cache_ratio:0.4178971280220672 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 +DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:26 [batch.py:51] router release req id 8 +INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.11015725135803223 s +INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.11197423934936523 s +DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=11641086166201680642182721610707953798, time:1750768226.2472377s req_ids:[8] +DEBUG 06-24 20:30:26 [manager.py:391] +ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:202.26192474365234ms total_cost_time:202.30674743652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12327 prompt_cache_len:5151 prompt_cache_ratio:0.41786322706254564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 +DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:26 [batch.py:51] router release req id 8 +INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.10840177536010742 s +INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.10940337181091309 s +DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=4827784159558355039442597672753264100, time:1750768226.4566085s req_ids:[8] +DEBUG 06-24 20:30:26 [manager.py:391] +ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:340.548038482666ms total_cost_time:340.5919075012207ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12328 prompt_cache_len:5151 prompt_cache_ratio:0.4178293316028553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 +DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:26 [batch.py:51] router release req id 8 +INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.10769104957580566 s +INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.10893058776855469 s +DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=179908133745322955268760531501305073618, time:1750768226.802731s req_ids:[8] +DEBUG 06-24 20:30:26 [manager.py:391] +ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:200.16765594482422ms total_cost_time:200.2108097076416ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12329 prompt_cache_len:5151 prompt_cache_ratio:0.4177954416416579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 +DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:26 [batch.py:51] router release req id 8 +INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10858917236328125 s +INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s +DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=238628165189564810994990000698402897339, time:1750768227.0096383s req_ids:[8] +DEBUG 06-24 20:30:27 [manager.py:391] +ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:209.76805686950684ms total_cost_time:209.81168746948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12330 prompt_cache_len:5151 prompt_cache_ratio:0.4177615571776156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 +DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:27 [batch.py:51] router release req id 8 +INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10841631889343262 s +INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.11014986038208008 s +DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=285358479764353040924748186395470928963, time:1750768227.2264001s req_ids:[8] +DEBUG 06-24 20:30:27 [manager.py:391] +ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:209.24854278564453ms total_cost_time:209.30075645446777ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:12331 prompt_cache_len:5151 prompt_cache_ratio:0.41772767820939094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 +DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:27 [batch.py:51] router release req id 8 +INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.1079704761505127 s +INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.1090087890625 s +DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=135789942995746476933530366063026894731, time:1750768227.4434001s req_ids:[8] +DEBUG 06-24 20:30:27 [manager.py:391] +ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:192.9464340209961ms total_cost_time:192.98863410949707ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12332 prompt_cache_len:5151 prompt_cache_ratio:0.4176938047356471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 +DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:27 [batch.py:51] router release req id 8 +INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10725140571594238 s +INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.1090240478515625 s +DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=147088582560991178967695306638244271497, time:1750768227.645021s req_ids:[8] +DEBUG 06-24 20:30:27 [manager.py:391] +ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:210.6029987335205ms total_cost_time:210.6473445892334ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12333 prompt_cache_len:5151 prompt_cache_ratio:0.4176599367550474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 +DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:27 [batch.py:51] router release req id 8 +INFO 06-24 20:30:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10877656936645508 s +INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s +DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=294972682221820793108185879684440117626, time:1750768227.8602748s req_ids:[8] +DEBUG 06-24 20:30:27 [manager.py:391] +ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:382.7393054962158ms total_cost_time:382.784366607666ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12334 prompt_cache_len:5151 prompt_cache_ratio:0.41762607426625586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 +DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:28 [batch.py:51] router release req id 8 +INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10840559005737305 s +INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.11033058166503906 s +DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=213266453450289569996508969475129114344, time:1750768228.2468197s req_ids:[8] +DEBUG 06-24 20:30:28 [manager.py:391] +ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:207.87334442138672ms total_cost_time:207.9172134399414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12335 prompt_cache_len:5151 prompt_cache_ratio:0.41759221726793677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 +DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:28 [batch.py:51] router release req id 8 +INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10845589637756348 s +INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.10938358306884766 s +DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=183097842022108625875050875535606588837, time:1750768228.4586284s req_ids:[8] +DEBUG 06-24 20:30:28 [manager.py:391] +ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:168.21789741516113ms total_cost_time:168.259859085083ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12336 prompt_cache_len:5151 prompt_cache_ratio:0.4175583657587549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 +DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:28 [batch.py:51] router release req id 8 +INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10693359375 s +INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.10874533653259277 s +DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=23468331502140754638496891012924100072, time:1750768228.6327863s req_ids:[8] +DEBUG 06-24 20:30:28 [manager.py:391] +ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:204.16760444641113ms total_cost_time:204.209566116333ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12337 prompt_cache_len:5151 prompt_cache_ratio:0.41752451973737537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 +DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:28 [batch.py:51] router release req id 8 +INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10789990425109863 s +INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.10881543159484863 s +DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=142365426414513507668573665076643776753, time:1750768228.8559191s req_ids:[8] +DEBUG 06-24 20:30:28 [manager.py:391] +ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:187.92176246643066ms total_cost_time:187.97063827514648ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:12338 prompt_cache_len:5151 prompt_cache_ratio:0.4174906792024639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 +DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:28 [batch.py:51] router release req id 8 +INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10633468627929688 s +INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.10735964775085449 s +DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=174920678958286956482544384087575912741, time:1750768229.038595s req_ids:[8] +DEBUG 06-24 20:30:29 [manager.py:391] +ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:208.76622200012207ms total_cost_time:208.80985260009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12339 prompt_cache_len:5151 prompt_cache_ratio:0.4174568441526866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 +DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:29 [batch.py:51] router release req id 8 +INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10641622543334961 s +INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.10736322402954102 s +DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=38090421814484206465011050811261456010, time:1750768229.2516158s req_ids:[8] +DEBUG 06-24 20:30:29 [manager.py:391] +ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:173.1410026550293ms total_cost_time:173.16222190856934ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12340 prompt_cache_len:5151 prompt_cache_ratio:0.4174230145867099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 +DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:29 [batch.py:51] router release req id 8 +INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10596561431884766 s +INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.1079103946685791 s +DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=182901315737257360021298706793562932647, time:1750768229.4308355s req_ids:[8] +DEBUG 06-24 20:30:29 [manager.py:391] +ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:373.4912872314453ms total_cost_time:373.537540435791ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12341 prompt_cache_len:5151 prompt_cache_ratio:0.4173891905032007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 +DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:29 [batch.py:51] router release req id 8 +INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10645508766174316 s +INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.1074526309967041 s +DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=41613462274429965579517442123484210180, time:1750768229.8099973s req_ids:[8] +DEBUG 06-24 20:30:29 [manager.py:391] +ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:209.3663215637207ms total_cost_time:209.4097137451172ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12342 prompt_cache_len:5151 prompt_cache_ratio:0.41735537190082644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 +DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:29 [batch.py:51] router release req id 8 +INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10798931121826172 s +INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.10933732986450195 s +DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=172254792676380606999643884807252589719, time:1750768230.0256999s req_ids:[8] +DEBUG 06-24 20:30:30 [manager.py:391] +ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:212.13507652282715ms total_cost_time:212.18109130859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12343 prompt_cache_len:5151 prompt_cache_ratio:0.4173215587782549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 +DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:30 [batch.py:51] router release req id 8 +INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10710430145263672 s +INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.10889983177185059 s +DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=168098677183327268161574994210680427808, time:1750768230.2472036s req_ids:[8] +DEBUG 06-24 20:30:30 [manager.py:391] +ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:216.86410903930664ms total_cost_time:216.90773963928223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12344 prompt_cache_len:5151 prompt_cache_ratio:0.41728775113415423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 +DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:30 [batch.py:51] router release req id 8 +INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10759329795837402 s +INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.1086585521697998 s +DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=338907649277683956906523230870232238747, time:1750768230.4691293s req_ids:[8] +DEBUG 06-24 20:30:30 [manager.py:391] +ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:211.68875694274902ms total_cost_time:211.71283721923828ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:12345 prompt_cache_len:5151 prompt_cache_ratio:0.4172539489671932 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 +DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:30 [batch.py:51] router release req id 8 +INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10361886024475098 s +INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.10492300987243652 s +DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=261001364713037148241904907513870401650, time:1750768230.6861336s req_ids:[8] +DEBUG 06-24 20:30:30 [manager.py:391] +ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:216.43471717834473ms total_cost_time:216.45593643188477ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12346 prompt_cache_len:5151 prompt_cache_ratio:0.4172201522760408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 +DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:30 [batch.py:51] router release req id 8 +INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10332202911376953 s +INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.1045830249786377 s +DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=177285919370903052506238714961177905800, time:1750768230.907219s req_ids:[8] +DEBUG 06-24 20:30:30 [manager.py:391] +ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:222.0780849456787ms total_cost_time:222.09835052490234ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12347 prompt_cache_len:5151 prompt_cache_ratio:0.41718636105936663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 +DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:31 [batch.py:51] router release req id 8 +INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10271596908569336 s +INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10384774208068848 s +DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=289365083330138975732603467309515462637, time:1750768231.1142542s req_ids:[8] +DEBUG 06-24 20:30:31 [manager.py:391] +ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:334.9034786224365ms total_cost_time:334.92350578308105ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:12348 prompt_cache_len:5151 prompt_cache_ratio:0.4171525753158406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 +DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:31 [batch.py:51] router release req id 8 +INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10408473014831543 s +INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10537362098693848 s +DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=337988136231583324214519889707238234349, time:1750768231.467587s req_ids:[8] +DEBUG 06-24 20:30:31 [manager.py:391] +ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:214.2794132232666ms total_cost_time:214.30230140686035ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:12349 prompt_cache_len:5151 prompt_cache_ratio:0.41711879504413313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 +DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:31 [batch.py:51] router release req id 8 +INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10551285743713379 s +INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10677957534790039 s +DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=65266063218336614979421059502422687484, time:1750768231.6832716s req_ids:[8] +DEBUG 06-24 20:30:31 [manager.py:391] +ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:188.73095512390137ms total_cost_time:188.7519359588623ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12350 prompt_cache_len:5151 prompt_cache_ratio:0.417085020242915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 +DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:31 [batch.py:51] router release req id 8 +INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10391736030578613 s +INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10515713691711426 s +DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=152229579294515821701900622242121055818, time:1750768231.8749394s req_ids:[8] +DEBUG 06-24 20:30:31 [manager.py:391] +ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:209.93781089782715ms total_cost_time:209.95759963989258ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12351 prompt_cache_len:5151 prompt_cache_ratio:0.41705125091085743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 +DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:31 [batch.py:51] router release req id 8 +INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.10301542282104492 s +INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.10418176651000977 s +DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=306317128709785822871030362901159817571, time:1750768232.0866063s req_ids:[8] +DEBUG 06-24 20:30:32 [manager.py:391] +ERROR 06-24 20:30:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:203.39202880859375ms total_cost_time:203.4132480621338ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12352 prompt_cache_len:5151 prompt_cache_ratio:0.4170174870466321 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 +DEBUG 06-24 20:30:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:32 [batch.py:51] router release req id 8 +INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.10312438011169434 s +INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.10429239273071289 s +DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=100911896635480136607813043007245329642, time:1750768232.295276s req_ids:[8] +DEBUG 06-24 20:30:32 [manager.py:391] +ERROR 06-24 20:30:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 first_token_cost:203.74369621276855ms total_cost_time:203.78899574279785ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12353 prompt_cache_len:5151 prompt_cache_ratio:0.4169837286489112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 +DEBUG 06-24 20:30:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:32 [batch.py:51] router release req id 8 +INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.10602736473083496 s +INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.1073141098022461 s +DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=282869368542816713727081358954775360489, time:1750768232.5012953s req_ids:[8] +DEBUG 06-24 20:30:32 [manager.py:391] +ERROR 06-24 20:30:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 first_token_cost:207.7333927154541ms total_cost_time:207.75461196899414ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12354 prompt_cache_len:5151 prompt_cache_ratio:0.41694997571636716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 +DEBUG 06-24 20:30:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:32 [batch.py:51] router release req id 8 +INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.3062744140625 s +INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.3077511787414551 s +DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=198537307991279013509757405322646960959, time:1750768232.9232235s req_ids:[8] +DEBUG 06-24 20:30:32 [manager.py:391] +ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 first_token_cost:428.6651611328125ms total_cost_time:428.68542671203613ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12355 prompt_cache_len:5151 prompt_cache_ratio:0.416916228247673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 +DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:33 [batch.py:51] router release req id 8 +INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10606837272644043 s +INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10723352432250977 s +DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=8190218448681302252991488561925840390, time:1750768233.1494133s req_ids:[8] +DEBUG 06-24 20:30:33 [manager.py:391] +ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:216.70842170715332ms total_cost_time:216.7530059814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12356 prompt_cache_len:5151 prompt_cache_ratio:0.4168824862415021 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 +DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:33 [batch.py:51] router release req id 8 +INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10499191284179688 s +INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10617876052856445 s +DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=308470909684577249286722433319794328234, time:1750768233.3678327s req_ids:[8] +DEBUG 06-24 20:30:33 [manager.py:391] +ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:214.31541442871094ms total_cost_time:214.35999870300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12357 prompt_cache_len:5151 prompt_cache_ratio:0.4168487496965283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 +DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:33 [batch.py:51] router release req id 8 +INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10573339462280273 s +INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10697412490844727 s +DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=63654941481730843458478830735348255155, time:1750768233.5872405s req_ids:[8] +DEBUG 06-24 20:30:33 [manager.py:391] +ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:214.46919441223145ms total_cost_time:214.51210975646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12358 prompt_cache_len:5151 prompt_cache_ratio:0.4168150186114258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 +DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:33 [batch.py:51] router release req id 8 +INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10632133483886719 s +INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10740113258361816 s +DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=9011368820392217543164404922953405978, time:1750768233.8061125s req_ids:[8] +DEBUG 06-24 20:30:33 [manager.py:391] +ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:213.76299858093262ms total_cost_time:213.78540992736816ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:12359 prompt_cache_len:5151 prompt_cache_ratio:0.4167812929848693 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 +DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:33 [batch.py:51] router release req id 8 +INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.10360836982727051 s +INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.10462737083435059 s +DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=90139554579142640498108624029452065721, time:1750768234.025219s req_ids:[8] +DEBUG 06-24 20:30:34 [manager.py:391] +ERROR 06-24 20:30:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:212.72611618041992ms total_cost_time:212.7695083618164ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12360 prompt_cache_len:5151 prompt_cache_ratio:0.416747572815534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 +DEBUG 06-24 20:30:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:34 [batch.py:51] router release req id 8 +INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.10592889785766602 s +INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.10702967643737793 s +DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=320220457744279792418772654605338968470, time:1750768234.2656755s req_ids:[8] +DEBUG 06-24 20:30:34 [manager.py:391] +ERROR 06-24 20:30:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 first_token_cost:436.0630512237549ms total_cost_time:436.107873916626ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12361 prompt_cache_len:5151 prompt_cache_ratio:0.4167138581020953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 +DEBUG 06-24 20:30:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:34 [batch.py:51] router release req id 8 +INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.1073911190032959 s +INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.10863041877746582 s +DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=205387999372103311414378260568559217514, time:1750768234.684641s req_ids:[8] +DEBUG 06-24 20:30:34 [manager.py:391] +ERROR 06-24 20:30:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 first_token_cost:218.66416931152344ms total_cost_time:218.68610382080078ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12362 prompt_cache_len:5151 prompt_cache_ratio:0.41668014884322924 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 +DEBUG 06-24 20:30:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:34 [batch.py:51] router release req id 8 +INFO 06-24 20:30:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.10439586639404297 s +INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.1054999828338623 s +DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=245275210252574526833533952848575797243, time:1750768234.9287624s req_ids:[8] +DEBUG 06-24 20:30:34 [manager.py:391] +ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 first_token_cost:229.72893714904785ms total_cost_time:229.7499179840088ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12363 prompt_cache_len:5151 prompt_cache_ratio:0.4166464450376122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 +DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:35 [batch.py:51] router release req id 8 +INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.10576248168945312 s +INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.10697531700134277 s +DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=299427331904696112256235850327934306033, time:1750768235.1433938s req_ids:[8] +DEBUG 06-24 20:30:35 [manager.py:391] +ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:211.09652519226074ms total_cost_time:211.14134788513184ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12364 prompt_cache_len:5151 prompt_cache_ratio:0.41661274668392106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 +DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:35 [batch.py:51] router release req id 8 +INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.10757803916931152 s +INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.1088552474975586 s +DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=208634691544268811886730059796801350262, time:1750768235.3580174s req_ids:[8] +DEBUG 06-24 20:30:35 [manager.py:391] +ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:207.73959159851074ms total_cost_time:207.78393745422363ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12365 prompt_cache_len:5151 prompt_cache_ratio:0.416579053780833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 +DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:35 [batch.py:51] router release req id 8 +INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.1075277328491211 s +INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.10866117477416992 s +DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=298706225274550401283626875310686272409, time:1750768235.5722456s req_ids:[8] +DEBUG 06-24 20:30:35 [manager.py:391] +ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:209.6259593963623ms total_cost_time:209.6693515777588ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12366 prompt_cache_len:5151 prompt_cache_ratio:0.4165453663270257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 +DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:35 [batch.py:51] router release req id 8 +INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s +INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.10896873474121094 s +DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=137600441149220813638864710772597850762, time:1750768235.7880757s req_ids:[8] +DEBUG 06-24 20:30:35 [manager.py:391] +ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:388.40341567993164ms total_cost_time:388.4472846984863ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12367 prompt_cache_len:5151 prompt_cache_ratio:0.4165116843211773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 +DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:36 [batch.py:51] router release req id 8 +INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.10806465148925781 s +INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.10923576354980469 s +DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=81847973083828879770347336654938798229, time:1750768236.184393s req_ids:[8] +DEBUG 06-24 20:30:36 [manager.py:391] +DEBUG 06-24 20:30:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 51275.875 tokens/s +DEBUG 06-24 20:30:36 [stats.py:37] Avg prompt tokens throughput: 51267.571 tokens/s +DEBUG 06-24 20:30:36 [stats.py:37] Avg generate tokens throughput: 8.304 tokens/s +ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:207.80086517333984ms total_cost_time:207.84306526184082ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12368 prompt_cache_len:5151 prompt_cache_ratio:0.41647800776196636 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 +DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:36 [batch.py:51] router release req id 8 +INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.10672211647033691 s +INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.10782814025878906 s +DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=288476144712504447676342809205188021050, time:1750768236.4184856s req_ids:[8] +DEBUG 06-24 20:30:36 [manager.py:391] +ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:233.1368923187256ms total_cost_time:233.20317268371582ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:12369 prompt_cache_len:5151 prompt_cache_ratio:0.4164443366480718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 +DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:36 [batch.py:51] router release req id 8 +INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.11191463470458984 s +INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.11411190032958984 s +DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=28183811273717150005544274854243955198, time:1750768236.636406s req_ids:[8] +DEBUG 06-24 20:30:36 [manager.py:391] +ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:213.87052536010742ms total_cost_time:213.91725540161133ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12370 prompt_cache_len:5151 prompt_cache_ratio:0.416410670978173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 +DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:36 [batch.py:51] router release req id 8 +INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.10965418815612793 s +INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.1116335391998291 s +DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=180572414323650452194351021888365398796, time:1750768236.8562768s req_ids:[8] +DEBUG 06-24 20:30:36 [manager.py:391] +ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:222.7783203125ms total_cost_time:222.8255271911621ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12371 prompt_cache_len:5151 prompt_cache_ratio:0.4163770107509498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 +DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:36 [batch.py:51] router release req id 8 +INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.10724306106567383 s +INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.10928940773010254 s +DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=153165229990062536082565109022490064788, time:1750768237.0853343s req_ids:[8] +DEBUG 06-24 20:30:37 [manager.py:391] +ERROR 06-24 20:30:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:209.25569534301758ms total_cost_time:209.29932594299316ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12372 prompt_cache_len:5151 prompt_cache_ratio:0.41634335596508243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 +DEBUG 06-24 20:30:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:37 [batch.py:51] router release req id 8 +INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.10744094848632812 s +INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.1095132827758789 s +DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=44295151827756841104849175182254358371, time:1750768237.298769s req_ids:[8] +DEBUG 06-24 20:30:37 [manager.py:391] +ERROR 06-24 20:30:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 first_token_cost:224.1671085357666ms total_cost_time:224.2114543914795ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12373 prompt_cache_len:5151 prompt_cache_ratio:0.4163097066192516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 +DEBUG 06-24 20:30:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:37 [batch.py:51] router release req id 8 +INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.10984158515930176 s +INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.11179614067077637 s +DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=77709106296866416241723508400878984082, time:1750768237.5302694s req_ids:[8] +DEBUG 06-24 20:30:37 [manager.py:391] +ERROR 06-24 20:30:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 first_token_cost:397.17698097229004ms total_cost_time:397.22394943237305ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12374 prompt_cache_len:5151 prompt_cache_ratio:0.41627606271213835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 +DEBUG 06-24 20:30:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:37 [batch.py:51] router release req id 8 +INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.1081078052520752 s +INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.11019062995910645 s +DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=236357821768055236541250965045651833025, time:1750768237.9399734s req_ids:[8] +DEBUG 06-24 20:30:37 [manager.py:391] +DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 first_token_cost:232.19680786132812ms total_cost_time:232.24115371704102ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12375 prompt_cache_len:5151 prompt_cache_ratio:0.41624242424242425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 +DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:38 [batch.py:51] router release req id 8 +INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10889625549316406 s +INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.11083149909973145 s +DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=173037545317111123673230256039236075076, time:1750768238.1744602s req_ids:[8] +DEBUG 06-24 20:30:38 [manager.py:391] +ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:206.5291404724121ms total_cost_time:206.5744400024414ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12376 prompt_cache_len:5151 prompt_cache_ratio:0.41620879120879123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 +DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:38 [batch.py:51] router release req id 8 +INFO 06-24 20:30:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10878276824951172 s +INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.11086702346801758 s +DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=227180665237759584436180706520736692175, time:1750768238.3855667s req_ids:[8] +DEBUG 06-24 20:30:38 [manager.py:391] +ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:224.95317459106445ms total_cost_time:225.00014305114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12377 prompt_cache_len:5151 prompt_cache_ratio:0.41617516360992163 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 +DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:38 [batch.py:51] router release req id 8 +INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10823345184326172 s +INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.11024212837219238 s +DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=262686065833497363036027619225484525425, time:1750768238.6162245s req_ids:[8] +DEBUG 06-24 20:30:38 [manager.py:391] +ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:205.95312118530273ms total_cost_time:205.99818229675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12378 prompt_cache_len:5151 prompt_cache_ratio:0.41614154144449833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 +DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:38 [batch.py:51] router release req id 8 +INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10790491104125977 s +INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.10984134674072266 s +DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=132396989491180303618751575096153106756, time:1750768238.828684s req_ids:[8] +DEBUG 06-24 20:30:38 [manager.py:391] +ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:217.64659881591797ms total_cost_time:217.70596504211426ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:12379 prompt_cache_len:5151 prompt_cache_ratio:0.41610792471120445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 +DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:38 [batch.py:51] router release req id 8 +INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10881567001342773 s +INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.11071538925170898 s +DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=144908386806746057462448044733643998590, time:1750768239.0532694s req_ids:[8] +DEBUG 06-24 20:30:39 [manager.py:391] +ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:205.59263229370117ms total_cost_time:205.65485954284668ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:12380 prompt_cache_len:5151 prompt_cache_ratio:0.41607431340872375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 +DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:39 [batch.py:51] router release req id 8 +INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10609555244445801 s +INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.10787677764892578 s +DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=127969320652302352167934815481669424845, time:1750768239.2647762s req_ids:[8] +DEBUG 06-24 20:30:39 [manager.py:391] +ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:169.10457611083984ms total_cost_time:169.14749145507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12381 prompt_cache_len:5151 prompt_cache_ratio:0.41604070753574024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 +DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:39 [batch.py:51] router release req id 8 +INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10723209381103516 s +INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s +DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=62925540422142650555947171552577332489, time:1750768239.4405591s req_ids:[8] +DEBUG 06-24 20:30:39 [manager.py:391] +ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:380.62024116516113ms total_cost_time:380.6648254394531ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12382 prompt_cache_len:5151 prompt_cache_ratio:0.41600710709093847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 +DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:39 [batch.py:51] router release req id 8 +INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10865497589111328 s +INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.11052608489990234 s +DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=275353133760815678107014535410645894741, time:1750768239.8274662s req_ids:[8] +DEBUG 06-24 20:30:39 [manager.py:391] +ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:209.15460586547852ms total_cost_time:209.21564102172852ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12383 prompt_cache_len:5151 prompt_cache_ratio:0.4159735120730033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 +DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:39 [batch.py:51] router release req id 8 +INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10993385314941406 s +INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.11185526847839355 s +DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=73968033608722786198843102166076255361, time:1750768240.0470455s req_ids:[8] +DEBUG 06-24 20:30:40 [manager.py:391] +ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:214.19167518615723ms total_cost_time:214.23745155334473ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12384 prompt_cache_len:5151 prompt_cache_ratio:0.4159399224806202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 +DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:40 [batch.py:51] router release req id 8 +INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.1079566478729248 s +INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.10985827445983887 s +DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=257056328959941863730615964363766608010, time:1750768240.2619216s req_ids:[8] +DEBUG 06-24 20:30:40 [manager.py:391] +ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:205.60002326965332ms total_cost_time:205.66248893737793ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12385 prompt_cache_len:5151 prompt_cache_ratio:0.41590633831247475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 +DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:40 [batch.py:51] router release req id 8 +INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10840034484863281 s +INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s +DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=261890870281898470008089117178608868160, time:1750768240.4783971s req_ids:[8] +DEBUG 06-24 20:30:40 [manager.py:391] +ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:226.69124603271484ms total_cost_time:226.75204277038574ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12386 prompt_cache_len:5151 prompt_cache_ratio:0.41587275956725334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 +DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:40 [batch.py:51] router release req id 8 +INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10736274719238281 s +INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.10920476913452148 s +DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=180963687478566458771244654659720782819, time:1750768240.7192714s req_ids:[8] +DEBUG 06-24 20:30:40 [manager.py:391] +ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:179.8539161682129ms total_cost_time:179.89850044250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12387 prompt_cache_len:5151 prompt_cache_ratio:0.4158391862436425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 +DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:40 [batch.py:51] router release req id 8 +INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10738921165466309 s +INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.10940718650817871 s +DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=236168828180638463261332425787617481177, time:1750768240.8926275s req_ids:[8] +DEBUG 06-24 20:30:40 [manager.py:391] +ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:204.66113090515137ms total_cost_time:204.70452308654785ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12388 prompt_cache_len:5151 prompt_cache_ratio:0.41580561834032936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 +DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:41 [batch.py:51] router release req id 8 +INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10950040817260742 s +INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.11145615577697754 s +DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=120626538080941431071783547492119207052, time:1750768241.1037586s req_ids:[8] +DEBUG 06-24 20:30:41 [manager.py:391] +ERROR 06-24 20:30:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:380.08999824523926ms total_cost_time:380.13553619384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12389 prompt_cache_len:5151 prompt_cache_ratio:0.4157720558560013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 +DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:41 [batch.py:51] router release req id 8 +INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s +INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.11058330535888672 s +DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=110020199165629193763493888133133459669, time:1750768241.4900498s req_ids:[8] +DEBUG 06-24 20:30:41 [manager.py:391] +ERROR 06-24 20:30:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 first_token_cost:209.8689079284668ms total_cost_time:209.9130153656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12390 prompt_cache_len:5151 prompt_cache_ratio:0.41573849878934627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 +DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:41 [batch.py:51] router release req id 8 +INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10819125175476074 s +INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.1100473403930664 s +DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=65938755141866541824212237420187829454, time:1750768241.7069113s req_ids:[8] +DEBUG 06-24 20:30:41 [manager.py:391] +ERROR 06-24 20:30:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 first_token_cost:199.39422607421875ms total_cost_time:199.43737983703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12391 prompt_cache_len:5151 prompt_cache_ratio:0.41570494713905254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 +DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:41 [batch.py:51] router release req id 8 +INFO 06-24 20:30:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10864090919494629 s +INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.11060976982116699 s +DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=210897852412438953906054543254678154155, time:1750768241.9128823s req_ids:[8] +DEBUG 06-24 20:30:41 [manager.py:391] +ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 first_token_cost:206.51674270629883ms total_cost_time:206.56251907348633ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12392 prompt_cache_len:5151 prompt_cache_ratio:0.4156714009038089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 +DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:42 [batch.py:51] router release req id 8 +INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.10822772979736328 s +INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.11031532287597656 s +DEBUG 06-24 20:30:42 [manager.py:391] Prefill Batch: batch_id=124411267878287064489663481005004225584, time:1750768242.124851s req_ids:[8] +DEBUG 06-24 20:30:42 [manager.py:391] +ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:210.93368530273438ms total_cost_time:210.97898483276367ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12393 prompt_cache_len:5151 prompt_cache_ratio:0.4156378600823045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 +DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:42 [batch.py:51] router release req id 8 +INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.1077413558959961 s +INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.10968327522277832 s +DEBUG 06-24 20:30:42 [manager.py:391] Prefill Batch: batch_id=149873082516063976852542145395832716985, time:1750768242.3417823s req_ids:[8] +DEBUG 06-24 20:30:42 [manager.py:391] +ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:217.59700775146484ms total_cost_time:217.64111518859863ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12394 prompt_cache_len:5151 prompt_cache_ratio:0.415604324673229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 +DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:42 [batch.py:51] router release req id 8 +INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.10873270034790039 s +INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.1102757453918457 s +DEBUG 06-24 20:30:42 [manager.py:391] Prefill Batch: batch_id=42779543406357055263280614720480529403, time:1750768242.5686698s req_ids:[8] +DEBUG 06-24 20:30:42 [manager.py:391] +ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:209.22231674194336ms total_cost_time:209.26761627197266ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12395 prompt_cache_len:5151 prompt_cache_ratio:0.4155707946752723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 +DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:42 [batch.py:51] router release req id 8 +INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.3112678527832031 s +INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.3134334087371826 s +DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=10759551587808412460960988926643521750, time:1750768243.0040483s req_ids:[8] +DEBUG 06-24 20:30:43 [manager.py:391] +ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:438.673734664917ms total_cost_time:438.7032985687256ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:12396 prompt_cache_len:5151 prompt_cache_ratio:0.4155372700871249 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 +DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:43 [batch.py:51] router release req id 8 +INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.10658693313598633 s +INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.10851097106933594 s +DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=63276058981788484916974324733299827600, time:1750768243.2296104s req_ids:[8] +DEBUG 06-24 20:30:43 [manager.py:391] +ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:217.41604804992676ms total_cost_time:217.44394302368164ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12397 prompt_cache_len:5151 prompt_cache_ratio:0.4155037509074776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 +DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:43 [batch.py:51] router release req id 8 +INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.1067495346069336 s +INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.1086118221282959 s +DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=55098871330572320890231241812078703423, time:1750768243.4618495s req_ids:[8] +DEBUG 06-24 20:30:43 [manager.py:391] +ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:220.95155715942383ms total_cost_time:220.9937572479248ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12398 prompt_cache_len:5151 prompt_cache_ratio:0.4154702371350218 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 +DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:43 [batch.py:51] router release req id 8 +INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.10846567153930664 s +INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s +DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=161754550453038845847990137693515122418, time:1750768243.6758664s req_ids:[8] +DEBUG 06-24 20:30:43 [manager.py:391] +ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:207.34167098999023ms total_cost_time:207.37051963806152ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:12399 prompt_cache_len:5151 prompt_cache_ratio:0.41543672876844906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 +DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:43 [batch.py:51] router release req id 8 +INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.1051476001739502 s +INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.10701179504394531 s +DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=28805820990165881385321090527940094040, time:1750768243.8948019s req_ids:[8] +DEBUG 06-24 20:30:43 [manager.py:391] +ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:224.17902946472168ms total_cost_time:224.20692443847656ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12400 prompt_cache_len:5151 prompt_cache_ratio:0.4154032258064516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 +DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:44 [batch.py:51] router release req id 8 +INFO 06-24 20:30:44 [manager.py:224] router recive req id 8 cost time 0.10650944709777832 s +INFO 06-24 20:30:44 [manager.py:68] detokenization recv req id 8 cost time 0.10841250419616699 s +DEBUG 06-24 20:30:44 [manager.py:391] Prefill Batch: batch_id=243036985258189450962206370568579713789, time:1750768244.1334283s req_ids:[8] +DEBUG 06-24 20:30:44 [manager.py:391] +ERROR 06-24 20:30:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:230.82637786865234ms total_cost_time:230.87048530578613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12401 prompt_cache_len:5151 prompt_cache_ratio:0.41536972824772195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 +DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:44 [batch.py:51] router release req id 8 +INFO 06-24 20:30:44 [manager.py:224] router recive req id 8 cost time 0.30990171432495117 s +INFO 06-24 20:30:44 [manager.py:68] detokenization recv req id 8 cost time 0.3119776248931885 s +DEBUG 06-24 20:30:44 [manager.py:391] Prefill Batch: batch_id=168131587641461640583884149406272044847, time:1750768244.5673828s req_ids:[8] +DEBUG 06-24 20:30:44 [manager.py:391] +ERROR 06-24 20:30:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:430.31978607177734ms total_cost_time:430.36389350891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12402 prompt_cache_len:5151 prompt_cache_ratio:0.41533623609095305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 +DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:44 [batch.py:51] router release req id 8 +INFO 06-24 20:30:44 [manager.py:224] router recive req id 8 cost time 0.10913801193237305 s +INFO 06-24 20:30:44 [manager.py:68] detokenization recv req id 8 cost time 0.11108899116516113 s +DEBUG 06-24 20:30:44 [manager.py:391] Prefill Batch: batch_id=55252438555026391132161758853269804957, time:1750768244.7916949s req_ids:[8] +DEBUG 06-24 20:30:44 [manager.py:391] +ERROR 06-24 20:30:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:216.32957458496094ms total_cost_time:216.3827419281006ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12403 prompt_cache_len:5151 prompt_cache_ratio:0.41530274933483835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 +DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:44 [batch.py:51] router release req id 8 +INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.1076650619506836 s +INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10976648330688477 s +DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=329196008846586408128785990914723288890, time:1750768245.0117621s req_ids:[8] +DEBUG 06-24 20:30:45 [manager.py:391] +ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:210.557222366333ms total_cost_time:210.5865478515625ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:12404 prompt_cache_len:5151 prompt_cache_ratio:0.41526926797807157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 +DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:45 [batch.py:51] router release req id 8 +INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.10683393478393555 s +INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10891222953796387 s +DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=222756538951228158458806087948864317506, time:1750768245.2326164s req_ids:[8] +DEBUG 06-24 20:30:45 [manager.py:391] +ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:212.34416961669922ms total_cost_time:212.40592002868652ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:12405 prompt_cache_len:5151 prompt_cache_ratio:0.415235792019347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 +DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:45 [batch.py:51] router release req id 8 +INFO 06-24 20:30:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.1065816879272461 s +INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10854315757751465 s +DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=198286134788743756410891419174621468611, time:1750768245.449496s req_ids:[8] +DEBUG 06-24 20:30:45 [manager.py:391] +ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:211.55166625976562ms total_cost_time:211.5805149078369ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:12406 prompt_cache_len:5151 prompt_cache_ratio:0.41520232145735936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 +DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:45 [batch.py:51] router release req id 8 +INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.10601353645324707 s +INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10806822776794434 s +DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=180145663125960373097703813473687933942, time:1750768245.6680968s req_ids:[8] +DEBUG 06-24 20:30:45 [manager.py:391] +ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:375.654935836792ms total_cost_time:375.7014274597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12407 prompt_cache_len:5151 prompt_cache_ratio:0.4151688562908036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 +DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:45 [batch.py:51] router release req id 8 +INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.1073904037475586 s +INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s +DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=73372549880568580573604827578107182624, time:1750768246.0493965s req_ids:[8] +DEBUG 06-24 20:30:46 [manager.py:391] +ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:214.56217765808105ms total_cost_time:214.58840370178223ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12408 prompt_cache_len:5151 prompt_cache_ratio:0.4151353965183752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 +DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:46 [batch.py:51] router release req id 8 +INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10620713233947754 s +INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10832738876342773 s +DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=197839674640486488135865539723351628993, time:1750768246.2711918s req_ids:[8] +DEBUG 06-24 20:30:46 [manager.py:391] +DEBUG 06-24 20:30:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 50366.272 tokens/s +DEBUG 06-24 20:30:46 [stats.py:37] Avg prompt tokens throughput: 50358.143 tokens/s +DEBUG 06-24 20:30:46 [stats.py:37] Avg generate tokens throughput: 8.129 tokens/s +ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:216.7351245880127ms total_cost_time:216.75562858581543ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12409 prompt_cache_len:5151 prompt_cache_ratio:0.4151019421387703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 +DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:46 [batch.py:51] router release req id 8 +INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10787439346313477 s +INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999107360839844 s +DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=303824174823379533974414946467445820060, time:1750768246.4922338s req_ids:[8] +DEBUG 06-24 20:30:46 [manager.py:391] +ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:213.24396133422852ms total_cost_time:213.2887840270996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12410 prompt_cache_len:5151 prompt_cache_ratio:0.41506849315068495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 +DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:46 [batch.py:51] router release req id 8 +INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s +INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10998845100402832 s +DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=27285384022431238117225281410687554845, time:1750768246.711126s req_ids:[8] +DEBUG 06-24 20:30:46 [manager.py:391] +ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:210.7105255126953ms total_cost_time:210.73579788208008ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:12411 prompt_cache_len:5151 prompt_cache_ratio:0.41503504955281606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 +DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:46 [batch.py:51] router release req id 8 +INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10684871673583984 s +INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10884428024291992 s +DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=212822754960327124933689600490084433479, time:1750768246.9311352s req_ids:[8] +DEBUG 06-24 20:30:46 [manager.py:391] +DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:212.34369277954102ms total_cost_time:212.36872673034668ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:12412 prompt_cache_len:5151 prompt_cache_ratio:0.41500161134386077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 +DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:47 [batch.py:51] router release req id 8 +INFO 06-24 20:30:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10586118698120117 s +INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10785293579101562 s +DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=170469524889344175611430581420731266800, time:1750768247.1467924s req_ids:[8] +DEBUG 06-24 20:30:47 [manager.py:391] +ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:378.31568717956543ms total_cost_time:378.3597946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12413 prompt_cache_len:5151 prompt_cache_ratio:0.4149681785225167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 +DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:47 [batch.py:51] router release req id 8 +INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10698509216308594 s +INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10900115966796875 s +DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=322008835789722919910529588966293359657, time:1750768247.5312066s req_ids:[8] +DEBUG 06-24 20:30:47 [manager.py:391] +ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:216.0942554473877ms total_cost_time:216.11857414245605ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12414 prompt_cache_len:5151 prompt_cache_ratio:0.4149347510874819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 +DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:47 [batch.py:51] router release req id 8 +INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10652637481689453 s +INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10841965675354004 s +DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=250707333723645332776176872619605545177, time:1750768247.752067s req_ids:[8] +DEBUG 06-24 20:30:47 [manager.py:391] +ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:215.04497528076172ms total_cost_time:215.0704860687256ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12415 prompt_cache_len:5151 prompt_cache_ratio:0.4149013290374547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 +DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:47 [batch.py:51] router release req id 8 +INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10735011100769043 s +INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10939621925354004 s +DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=315843729027809932568577697666010751441, time:1750768247.9725795s req_ids:[8] +DEBUG 06-24 20:30:47 [manager.py:391] +ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:211.15684509277344ms total_cost_time:211.1835479736328ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:12416 prompt_cache_len:5151 prompt_cache_ratio:0.414867912371134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 +DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:48 [batch.py:51] router release req id 8 +INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10600900650024414 s +INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.10826849937438965 s +DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=264249146717129752881221445046787960385, time:1750768248.1897984s req_ids:[8] +DEBUG 06-24 20:30:48 [manager.py:391] +ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:211.12656593322754ms total_cost_time:211.1530303955078ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:12417 prompt_cache_len:5151 prompt_cache_ratio:0.41483450108721914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 +DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:48 [batch.py:51] router release req id 8 +INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10802960395812988 s +INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.10995149612426758 s +DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=153399232804048404410135696227341055770, time:1750768248.413649s req_ids:[8] +DEBUG 06-24 20:30:48 [manager.py:391] +ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:216.76278114318848ms total_cost_time:216.78900718688965ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12418 prompt_cache_len:5151 prompt_cache_ratio:0.4148010951844097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 +DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:48 [batch.py:51] router release req id 8 +INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10686731338500977 s +INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.1089789867401123 s +DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=136553845076708875648177866334516880661, time:1750768248.629487s req_ids:[8] +DEBUG 06-24 20:30:48 [manager.py:391] +ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:215.96002578735352ms total_cost_time:215.98482131958008ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12419 prompt_cache_len:5151 prompt_cache_ratio:0.4147676946614059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 +DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:48 [batch.py:51] router release req id 8 +INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10718154907226562 s +INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.10900235176086426 s +DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=135000725457591029293110401028152904900, time:1750768248.8498118s req_ids:[8] +DEBUG 06-24 20:30:48 [manager.py:391] +ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:389.845609664917ms total_cost_time:389.86682891845703ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12420 prompt_cache_len:5151 prompt_cache_ratio:0.4147342995169082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 +DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:49 [batch.py:51] router release req id 8 +INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10668230056762695 s +INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10815310478210449 s +DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=216706761507401565380923230029035613989, time:1750768249.2456555s req_ids:[8] +DEBUG 06-24 20:30:49 [manager.py:391] +ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:215.48771858215332ms total_cost_time:215.5132293701172ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12421 prompt_cache_len:5151 prompt_cache_ratio:0.4147009097496176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 +DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:49 [batch.py:51] router release req id 8 +INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10638618469238281 s +INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10836172103881836 s +DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=316488262775581693765581093426104177885, time:1750768249.464567s req_ids:[8] +DEBUG 06-24 20:30:49 [manager.py:391] +ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:212.26143836975098ms total_cost_time:212.28694915771484ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12422 prompt_cache_len:5151 prompt_cache_ratio:0.41466752535823537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 +DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:49 [batch.py:51] router release req id 8 +INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10631036758422852 s +INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10805130004882812 s +DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=62359354607109446271810233168689849528, time:1750768249.6812391s req_ids:[8] +DEBUG 06-24 20:30:49 [manager.py:391] +ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:171.7050075531006ms total_cost_time:171.73099517822266ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12423 prompt_cache_len:5151 prompt_cache_ratio:0.4146341463414634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 +DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:49 [batch.py:51] router release req id 8 +INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10665440559387207 s +INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10858726501464844 s +DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=15801549039811831264668260924930652050, time:1750768249.8589277s req_ids:[8] +DEBUG 06-24 20:30:49 [manager.py:391] +ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:205.63745498657227ms total_cost_time:205.66248893737793ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:12424 prompt_cache_len:5151 prompt_cache_ratio:0.4146007726980039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 +DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:49 [batch.py:51] router release req id 8 +INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10552072525024414 s +INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.10753917694091797 s +DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=17090920995912503692514837986536188180, time:1750768250.0677605s req_ids:[8] +DEBUG 06-24 20:30:50 [manager.py:391] +ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:211.3204002380371ms total_cost_time:211.3656997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12425 prompt_cache_len:5151 prompt_cache_ratio:0.41456740442655937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 +DEBUG 06-24 20:30:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:50 [batch.py:51] router release req id 8 +INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10870242118835449 s +INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.11068415641784668 s +DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=108988323394701074706965042620812931970, time:1750768250.2834623s req_ids:[8] +DEBUG 06-24 20:30:50 [manager.py:391] +ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:205.55830001831055ms total_cost_time:205.61623573303223ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:12426 prompt_cache_len:5151 prompt_cache_ratio:0.41453404152583295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 +DEBUG 06-24 20:30:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:50 [batch.py:51] router release req id 8 +INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10912227630615234 s +INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.11104416847229004 s +DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=94412808368673861688587777379546520535, time:1750768250.4959133s req_ids:[8] +DEBUG 06-24 20:30:50 [manager.py:391] +ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:372.9138374328613ms total_cost_time:372.95985221862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12427 prompt_cache_len:5151 prompt_cache_ratio:0.41450068399452805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 +DEBUG 06-24 20:30:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:50 [batch.py:51] router release req id 8 +INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10796880722045898 s +INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.10993766784667969 s +DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=59890976463626219263848214727335376126, time:1750768250.8831663s req_ids:[8] +DEBUG 06-24 20:30:50 [manager.py:391] +ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:222.66602516174316ms total_cost_time:222.71156311035156ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12428 prompt_cache_len:5151 prompt_cache_ratio:0.4144673318313486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 +DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:51 [batch.py:51] router release req id 8 +INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.10866737365722656 s +INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11076903343200684 s +DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=137955553706391832181224902759114507482, time:1750768251.104259s req_ids:[8] +DEBUG 06-24 20:30:51 [manager.py:391] +ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:208.94932746887207ms total_cost_time:208.99367332458496ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12429 prompt_cache_len:5151 prompt_cache_ratio:0.4144339850349988 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 +DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:51 [batch.py:51] router release req id 8 +INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.10726714134216309 s +INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.10960936546325684 s +DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=168862639791533896737272550085581433719, time:1750768251.319687s req_ids:[8] +DEBUG 06-24 20:30:51 [manager.py:391] +ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:207.29804039001465ms total_cost_time:207.34429359436035ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12430 prompt_cache_len:5151 prompt_cache_ratio:0.41440064360418344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 +DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:51 [batch.py:51] router release req id 8 +INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.1087045669555664 s +INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11098599433898926 s +DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=310277880230842525860281971646576196544, time:1750768251.533921s req_ids:[8] +DEBUG 06-24 20:30:51 [manager.py:391] +ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:210.79468727111816ms total_cost_time:210.82687377929688ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:12431 prompt_cache_len:5151 prompt_cache_ratio:0.4143673075376076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 +DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:51 [batch.py:51] router release req id 8 +INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.10781288146972656 s +INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000418663024902 s +DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=19869017209201943601350144444009316020, time:1750768251.7535386s req_ids:[8] +DEBUG 06-24 20:30:51 [manager.py:391] +ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:213.62662315368652ms total_cost_time:213.67168426513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12432 prompt_cache_len:5151 prompt_cache_ratio:0.4143339768339768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 +DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:51 [batch.py:51] router release req id 8 +INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.108795166015625 s +INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11119890213012695 s +DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=206350130390277966876210286803694310395, time:1750768251.971545s req_ids:[8] +DEBUG 06-24 20:30:51 [manager.py:391] +ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:424.85809326171875ms total_cost_time:424.90291595458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12433 prompt_cache_len:5151 prompt_cache_ratio:0.41430065149199713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 +DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:52 [batch.py:51] router release req id 8 +INFO 06-24 20:30:52 [manager.py:224] router recive req id 8 cost time 0.10709285736083984 s +INFO 06-24 20:30:52 [manager.py:68] detokenization recv req id 8 cost time 0.10895609855651855 s +DEBUG 06-24 20:30:52 [manager.py:391] Prefill Batch: batch_id=105076168867210197287992874263543637594, time:1750768252.4010627s req_ids:[8] +DEBUG 06-24 20:30:52 [manager.py:391] +ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:186.0647201538086ms total_cost_time:186.10835075378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12434 prompt_cache_len:5151 prompt_cache_ratio:0.41426733151037476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 +DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:52 [batch.py:51] router release req id 8 +INFO 06-24 20:30:52 [manager.py:224] router recive req id 8 cost time 0.10860061645507812 s +INFO 06-24 20:30:52 [manager.py:68] detokenization recv req id 8 cost time 0.1105356216430664 s +DEBUG 06-24 20:30:52 [manager.py:391] Prefill Batch: batch_id=246169260518551535103012322927546634563, time:1750768252.6010728s req_ids:[8] +DEBUG 06-24 20:30:52 [manager.py:391] +ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:211.34543418884277ms total_cost_time:211.39073371887207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12435 prompt_cache_len:5151 prompt_cache_ratio:0.41423401688781664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 +DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:52 [batch.py:51] router release req id 8 +INFO 06-24 20:30:52 [manager.py:224] router recive req id 8 cost time 0.10873675346374512 s +INFO 06-24 20:30:52 [manager.py:68] detokenization recv req id 8 cost time 0.11073446273803711 s +DEBUG 06-24 20:30:52 [manager.py:391] Prefill Batch: batch_id=285354833993391727964296131033507397623, time:1750768252.8356729s req_ids:[8] +DEBUG 06-24 20:30:52 [manager.py:391] +ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:231.37879371643066ms total_cost_time:231.42600059509277ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12436 prompt_cache_len:5151 prompt_cache_ratio:0.4142007076230299 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 +DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:52 [batch.py:51] router release req id 8 +INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10920858383178711 s +INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.11125969886779785 s +DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=292811820465889058451570500480640416304, time:1750768253.0572267s req_ids:[8] +DEBUG 06-24 20:30:53 [manager.py:391] +DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:208.1892490386963ms total_cost_time:208.23359489440918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12437 prompt_cache_len:5151 prompt_cache_ratio:0.4141674037147222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 +DEBUG 06-24 20:30:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:53 [batch.py:51] router release req id 8 +INFO 06-24 20:30:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:30:53 [statics_utils.py:24] mean first cost: 229.48896463201245 ms +INFO 06-24 20:30:53 [statics_utils.py:24] mean per token cost: 0.06147854232694194 ms +INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10796952247619629 s +INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.10895133018493652 s +DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=153901125562153148640576937610769178711, time:1750768253.2826965s req_ids:[8] +DEBUG 06-24 20:30:53 [manager.py:391] +INFO 06-24 20:30:53 [manager.py:620] left req id 8can release False refcount 4 +ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:224.36952590942383ms total_cost_time:224.41506385803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12438 prompt_cache_len:5151 prompt_cache_ratio:0.41413410516160154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 +DEBUG 06-24 20:30:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:53 [batch.py:51] router release req id 8 +INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10917854309082031 s +INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.11120939254760742 s +DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=2977416372958569687901322992556419180, time:1750768253.5016842s req_ids:[8] +DEBUG 06-24 20:30:53 [manager.py:391] +ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:380.01370429992676ms total_cost_time:380.05924224853516ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12439 prompt_cache_len:5151 prompt_cache_ratio:0.4141008119623764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 +DEBUG 06-24 20:30:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:53 [batch.py:51] router release req id 8 +INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10558843612670898 s +INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.10747694969177246 s +DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=88302537858980346026392574819039396894, time:1750768253.8901713s req_ids:[8] +DEBUG 06-24 20:30:53 [manager.py:391] +ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:215.38829803466797ms total_cost_time:215.43264389038086ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12440 prompt_cache_len:5151 prompt_cache_ratio:0.41406752411575565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 +DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:54 [batch.py:51] router release req id 8 +INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10879325866699219 s +INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10985255241394043 s +DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=104349589476044304526519769986205150134, time:1750768254.1110094s req_ids:[8] +DEBUG 06-24 20:30:54 [manager.py:391] +ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:214.13922309875488ms total_cost_time:214.16115760803223ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12441 prompt_cache_len:5151 prompt_cache_ratio:0.4140342416204485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 +DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:54 [batch.py:51] router release req id 8 +INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10630059242248535 s +INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10735154151916504 s +DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=278766359291833571942018372099257772224, time:1750768254.3333023s req_ids:[8] +DEBUG 06-24 20:30:54 [manager.py:391] +ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:216.5226936340332ms total_cost_time:216.57323837280273ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:12442 prompt_cache_len:5151 prompt_cache_ratio:0.41400096447516477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 +DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:54 [batch.py:51] router release req id 8 +INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10769820213317871 s +INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10872435569763184 s +DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=32780791958348762896703829874477001705, time:1750768254.5549147s req_ids:[8] +DEBUG 06-24 20:30:54 [manager.py:391] +ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:209.75494384765625ms total_cost_time:209.8388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.08392333984375ms prompt_token_num:12443 prompt_cache_len:5151 prompt_cache_ratio:0.4139676926786145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 +DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:54 [batch.py:51] router release req id 8 +INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10804605484008789 s +INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s +DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=271498013673586120717850864451052667245, time:1750768254.7698402s req_ids:[8] +DEBUG 06-24 20:30:54 [manager.py:391] +ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:213.47594261169434ms total_cost_time:213.53554725646973ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:12444 prompt_cache_len:5151 prompt_cache_ratio:0.4139344262295082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 +DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:54 [batch.py:51] router release req id 8 +INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.1078495979309082 s +INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10976743698120117 s +DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=70788377027025119580891402998515300081, time:1750768255.0028844s req_ids:[8] +DEBUG 06-24 20:30:55 [manager.py:391] +ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:228.3174991607666ms total_cost_time:228.3637523651123ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12445 prompt_cache_len:5151 prompt_cache_ratio:0.41390116512655684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 +DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:55 [batch.py:51] router release req id 8 +INFO 06-24 20:30:55 [manager.py:224] router recive req id 8 cost time 0.10842204093933105 s +INFO 06-24 20:30:55 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s +DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=234855720580925033029772938785652241281, time:1750768255.2261772s req_ids:[8] +DEBUG 06-24 20:30:55 [manager.py:391] +ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:385.36882400512695ms total_cost_time:385.41531562805176ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12446 prompt_cache_len:5151 prompt_cache_ratio:0.4138679093684718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 +DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:55 [batch.py:51] router release req id 8 +INFO 06-24 20:30:55 [manager.py:224] router recive req id 8 cost time 0.11071610450744629 s +INFO 06-24 20:30:55 [manager.py:68] detokenization recv req id 8 cost time 0.11318588256835938 s +DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=36157953441211592161737506075216100820, time:1750768255.617071s req_ids:[8] +DEBUG 06-24 20:30:55 [manager.py:391] +ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:214.16401863098145ms total_cost_time:214.20836448669434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12447 prompt_cache_len:5151 prompt_cache_ratio:0.4138346589539648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 +DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:55 [batch.py:51] router release req id 8 +INFO 06-24 20:30:55 [manager.py:224] router recive req id 8 cost time 0.10915756225585938 s +INFO 06-24 20:30:55 [manager.py:68] detokenization recv req id 8 cost time 0.11118149757385254 s +DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=8730518838044392911086779260491852017, time:1750768255.836698s req_ids:[8] +DEBUG 06-24 20:30:55 [manager.py:391] +ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:211.05551719665527ms total_cost_time:211.11726760864258ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:12448 prompt_cache_len:5151 prompt_cache_ratio:0.41380141388174807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 +DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:55 [batch.py:51] router release req id 8 +INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10796070098876953 s +INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.10976290702819824 s +DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=267677606146649080910038008611125835213, time:1750768256.0558462s req_ids:[8] +DEBUG 06-24 20:30:56 [manager.py:391] +ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:168.75267028808594ms total_cost_time:168.80464553833008ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:12449 prompt_cache_len:5151 prompt_cache_ratio:0.4137681741505342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 +DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:56 [batch.py:51] router release req id 8 +INFO 06-24 20:30:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10722136497497559 s +INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.10825014114379883 s +DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=320678951449459179676738989625905705163, time:1750768256.229986s req_ids:[8] +DEBUG 06-24 20:30:56 [manager.py:391] +ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:30:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 50717.753 tokens/s +DEBUG 06-24 20:30:56 [stats.py:37] Avg prompt tokens throughput: 50709.494 tokens/s +DEBUG 06-24 20:30:56 [stats.py:37] Avg generate tokens throughput: 8.259 tokens/s +INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:207.71455764770508ms total_cost_time:207.75938034057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12450 prompt_cache_len:5151 prompt_cache_ratio:0.41373493975903614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 +DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:56 [batch.py:51] router release req id 8 +INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10771369934082031 s +INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.10885453224182129 s +DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=33876324607224621097591329003323461536, time:1750768256.4531665s req_ids:[8] +DEBUG 06-24 20:30:56 [manager.py:391] +ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:220.02315521240234ms total_cost_time:220.06654739379883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12451 prompt_cache_len:5151 prompt_cache_ratio:0.4137017107059674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 +DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:56 [batch.py:51] router release req id 8 +INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10858297348022461 s +INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.11067438125610352 s +DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=333166459254446381887622689016573680598, time:1750768256.6697693s req_ids:[8] +DEBUG 06-24 20:30:56 [manager.py:391] +ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:398.3585834503174ms total_cost_time:398.41747283935547ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:12452 prompt_cache_len:5151 prompt_cache_ratio:0.4136684869900418 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 +DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:56 [batch.py:51] router release req id 8 +INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10870552062988281 s +INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.11072731018066406 s +DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=55297323732811713121349310387988955956, time:1750768257.0761204s req_ids:[8] +DEBUG 06-24 20:30:57 [manager.py:391] +ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:209.75804328918457ms total_cost_time:209.81740951538086ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:12453 prompt_cache_len:5151 prompt_cache_ratio:0.4136352686099735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 +DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:57 [batch.py:51] router release req id 8 +INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10803890228271484 s +INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.10909914970397949 s +DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=6027848488110771488588448605070394739, time:1750768257.3056743s req_ids:[8] +DEBUG 06-24 20:30:57 [manager.py:391] +ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:191.7872428894043ms total_cost_time:191.8480396270752ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12454 prompt_cache_len:5151 prompt_cache_ratio:0.41360205556447727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 +DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:57 [batch.py:51] router release req id 8 +INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10795021057128906 s +INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.1099538803100586 s +DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=96539375724546523582061131714159474420, time:1750768257.4909606s req_ids:[8] +DEBUG 06-24 20:30:57 [manager.py:391] +ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:205.0192356109619ms total_cost_time:205.078125ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:12455 prompt_cache_len:5151 prompt_cache_ratio:0.41356884785226816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 +DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:57 [batch.py:51] router release req id 8 +INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s +INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.11003398895263672 s +DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=19641034874896495754987573314729150902, time:1750768257.7016096s req_ids:[8] +DEBUG 06-24 20:30:57 [manager.py:391] +ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:233.78610610961914ms total_cost_time:233.8414192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:12456 prompt_cache_len:5151 prompt_cache_ratio:0.4135356454720617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 +DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:57 [batch.py:51] router release req id 8 +INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10707473754882812 s +INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.10915350914001465 s +DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=150222392040460624467828775336681107920, time:1750768257.9418857s req_ids:[8] +DEBUG 06-24 20:30:57 [manager.py:391] +ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:204.9853801727295ms total_cost_time:205.04403114318848ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12457 prompt_cache_len:5151 prompt_cache_ratio:0.41350244842257367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 +DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:58 [batch.py:51] router release req id 8 +INFO 06-24 20:30:58 [manager.py:224] router recive req id 8 cost time 0.10807514190673828 s +INFO 06-24 20:30:58 [manager.py:68] detokenization recv req id 8 cost time 0.11012530326843262 s +DEBUG 06-24 20:30:58 [manager.py:391] Prefill Batch: batch_id=224223952918295977811671035724449390854, time:1750768258.154356s req_ids:[8] +DEBUG 06-24 20:30:58 [manager.py:391] +ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:211.74001693725586ms total_cost_time:211.80105209350586ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12458 prompt_cache_len:5151 prompt_cache_ratio:0.4134692567025205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 +DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:58 [batch.py:51] router release req id 8 +INFO 06-24 20:30:58 [manager.py:224] router recive req id 8 cost time 0.10714173316955566 s +INFO 06-24 20:30:58 [manager.py:68] detokenization recv req id 8 cost time 0.10918831825256348 s +DEBUG 06-24 20:30:58 [manager.py:391] Prefill Batch: batch_id=19072953528025188445697028030248571201, time:1750768258.3796756s req_ids:[8] +DEBUG 06-24 20:30:58 [manager.py:391] +ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:216.28570556640625ms total_cost_time:216.33219718933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12459 prompt_cache_len:5151 prompt_cache_ratio:0.41343607031061885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 +DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:58 [batch.py:51] router release req id 8 +INFO 06-24 20:30:58 [manager.py:224] router recive req id 8 cost time 0.10715794563293457 s +INFO 06-24 20:30:58 [manager.py:68] detokenization recv req id 8 cost time 0.10844206809997559 s +DEBUG 06-24 20:30:58 [manager.py:391] Prefill Batch: batch_id=205282701281080427714189009668290844515, time:1750768258.6151636s req_ids:[8] +DEBUG 06-24 20:30:58 [manager.py:391] +ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:416.9301986694336ms total_cost_time:416.98455810546875ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:12460 prompt_cache_len:5151 prompt_cache_ratio:0.4134028892455859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 +DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:58 [batch.py:51] router release req id 8 +INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10933303833007812 s +INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.11135530471801758 s +DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=111631596694140023051178298499666686482, time:1750768259.0269754s req_ids:[8] +DEBUG 06-24 20:30:59 [manager.py:391] +ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:232.35225677490234ms total_cost_time:232.4669361114502ms,out_token_counter:1 mean_per_token_cost_time: 0.11467933654785156ms prompt_token_num:12461 prompt_cache_len:5151 prompt_cache_ratio:0.41336971350613916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 +DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:59 [batch.py:51] router release req id 8 +INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10863780975341797 s +INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.11061716079711914 s +DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=230545898368270800857131714662829256750, time:1750768259.258932s req_ids:[8] +DEBUG 06-24 20:30:59 [manager.py:391] +DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:208.13608169555664ms total_cost_time:208.18114280700684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12462 prompt_cache_len:5151 prompt_cache_ratio:0.41333654309099666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 +DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:59 [batch.py:51] router release req id 8 +INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10841941833496094 s +INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.11037087440490723 s +DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=301940003077655646692862439318984552192, time:1750768259.4748392s req_ids:[8] +DEBUG 06-24 20:30:59 [manager.py:391] +ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:205.04021644592285ms total_cost_time:205.09839057922363ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:12463 prompt_cache_len:5151 prompt_cache_ratio:0.4133033779988767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 +DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:59 [batch.py:51] router release req id 8 +INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10687112808227539 s +INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.10877227783203125 s +DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=312023169252105053714454258865728239354, time:1750768259.687395s req_ids:[8] +DEBUG 06-24 20:30:59 [manager.py:391] +ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:213.67859840393066ms total_cost_time:213.72103691101074ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12464 prompt_cache_len:5151 prompt_cache_ratio:0.4132702182284981 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 +DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:30:59 [batch.py:51] router release req id 8 +INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10720419883728027 s +INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.10908126831054688 s +DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=191479274459631523706888553801464698544, time:1750768259.918769s req_ids:[8] +DEBUG 06-24 20:30:59 [manager.py:391] +ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:182.7373504638672ms total_cost_time:182.78026580810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12465 prompt_cache_len:5151 prompt_cache_ratio:0.41323706377858005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 +DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:00 [batch.py:51] router release req id 8 +INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10785961151123047 s +INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.1098337173461914 s +DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=181450869902078814669365954901950285319, time:1750768260.0963042s req_ids:[8] +DEBUG 06-24 20:31:00 [manager.py:391] +ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:202.37231254577637ms total_cost_time:202.41403579711914ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12466 prompt_cache_len:5151 prompt_cache_ratio:0.4132039146478421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 +DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:00 [batch.py:51] router release req id 8 +INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s +INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s +DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=47661244253780091280466442354464114652, time:1750768260.3056054s req_ids:[8] +DEBUG 06-24 20:31:00 [manager.py:391] +ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:387.26258277893066ms total_cost_time:387.30454444885254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12467 prompt_cache_len:5151 prompt_cache_ratio:0.4131707708350044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 +DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:00 [batch.py:51] router release req id 8 +INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10769820213317871 s +INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.10935163497924805 s +DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=110918307799342330819032168563745187750, time:1750768260.697488s req_ids:[8] +DEBUG 06-24 20:31:00 [manager.py:391] +ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:188.8713836669922ms total_cost_time:188.91572952270508ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12468 prompt_cache_len:5151 prompt_cache_ratio:0.4131376323387873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 +DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:00 [batch.py:51] router release req id 8 +INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10829663276672363 s +INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.11019396781921387 s +DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=34399715534402867248186878155374888836, time:1750768260.893034s req_ids:[8] +DEBUG 06-24 20:31:00 [manager.py:391] +ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:210.15334129333496ms total_cost_time:210.19911766052246ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12469 prompt_cache_len:5151 prompt_cache_ratio:0.41310449915791164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 +DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:01 [batch.py:51] router release req id 8 +INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10797643661499023 s +INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.1098175048828125 s +DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=136530538120919030954199829759840254207, time:1750768261.1096392s req_ids:[8] +DEBUG 06-24 20:31:01 [manager.py:391] +ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:196.60449028015137ms total_cost_time:196.64597511291504ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12470 prompt_cache_len:5151 prompt_cache_ratio:0.4130713712910986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 +DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:01 [batch.py:51] router release req id 8 +INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10767102241516113 s +INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.10952115058898926 s +DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=72896634457730056549489012188130781931, time:1750768261.3115654s req_ids:[8] +DEBUG 06-24 20:31:01 [manager.py:391] +ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:201.6470432281494ms total_cost_time:201.6899585723877ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12471 prompt_cache_len:5151 prompt_cache_ratio:0.41303824873707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 +DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:01 [batch.py:51] router release req id 8 +INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10753154754638672 s +INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.1096181869506836 s +DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=291413236124409637913569519175813281511, time:1750768261.5211315s req_ids:[8] +DEBUG 06-24 20:31:01 [manager.py:391] +ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:200.21629333496094ms total_cost_time:200.25992393493652ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12472 prompt_cache_len:5151 prompt_cache_ratio:0.41300513149454776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 +DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:01 [batch.py:51] router release req id 8 +INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10804104804992676 s +INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.10991740226745605 s +DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=64811599997679441320889351947244011926, time:1750768261.7257676s req_ids:[8] +DEBUG 06-24 20:31:01 [manager.py:391] +ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:205.25789260864258ms total_cost_time:205.30080795288086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12473 prompt_cache_len:5151 prompt_cache_ratio:0.4129720195622545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 +DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:02 [batch.py:51] router release req id 8 +INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.3098106384277344 s +DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=335463417253242771115536386454889212723, time:1750768262.13208s req_ids:[8] +DEBUG 06-24 20:31:02 [manager.py:391] +INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.3117959499359131 s +ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:408.4603786468506ms total_cost_time:408.5052013397217ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12474 prompt_cache_len:5151 prompt_cache_ratio:0.41293891293891294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 +DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:02 [batch.py:51] router release req id 8 +INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.10832071304321289 s +INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.11067509651184082 s +DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=176962549358007825171441692736422913689, time:1750768262.351674s req_ids:[8] +DEBUG 06-24 20:31:02 [manager.py:391] +ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:212.45265007019043ms total_cost_time:212.49675750732422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12475 prompt_cache_len:5151 prompt_cache_ratio:0.41290581162324647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 +DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:02 [batch.py:51] router release req id 8 +INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.10820698738098145 s +INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.11001849174499512 s +DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=213912654825097230301175148773544035533, time:1750768262.5901873s req_ids:[8] +DEBUG 06-24 20:31:02 [manager.py:391] +ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:231.86922073364258ms total_cost_time:231.91142082214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12476 prompt_cache_len:5151 prompt_cache_ratio:0.4128727156139788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 +DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:02 [batch.py:51] router release req id 8 +INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.10843992233276367 s +INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.11036205291748047 s +DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=229431855920629484941947963491354180915, time:1750768262.80846s req_ids:[8] +DEBUG 06-24 20:31:02 [manager.py:391] +ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:215.3298854827881ms total_cost_time:215.37160873413086ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12477 prompt_cache_len:5151 prompt_cache_ratio:0.4128396249098341 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 +DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:02 [batch.py:51] router release req id 8 +INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.1068413257598877 s +INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.10870766639709473 s +DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=197706533817026976268769589133822344974, time:1750768263.03608s req_ids:[8] +DEBUG 06-24 20:31:03 [manager.py:391] +ERROR 06-24 20:31:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:216.53270721435547ms total_cost_time:216.57395362854004ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12478 prompt_cache_len:5151 prompt_cache_ratio:0.4128065395095368 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 +DEBUG 06-24 20:31:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:03 [batch.py:51] router release req id 8 +INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.1077427864074707 s +INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.10977411270141602 s +DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=156521254108722658419979682263078928984, time:1750768263.2536135s req_ids:[8] +DEBUG 06-24 20:31:03 [manager.py:391] +ERROR 06-24 20:31:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 first_token_cost:209.10167694091797ms total_cost_time:209.14316177368164ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12479 prompt_cache_len:5151 prompt_cache_ratio:0.41277345941181187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 +DEBUG 06-24 20:31:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:03 [batch.py:51] router release req id 8 +INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.3097202777862549 s +INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.3116645812988281 s +DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=308365057621095315206623764433903436197, time:1750768263.6856482s req_ids:[8] +DEBUG 06-24 20:31:03 [manager.py:391] +ERROR 06-24 20:31:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 first_token_cost:437.7598762512207ms total_cost_time:437.8011226654053ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12480 prompt_cache_len:5151 prompt_cache_ratio:0.4127403846153846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 +DEBUG 06-24 20:31:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:03 [batch.py:51] router release req id 8 +INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.10805344581604004 s +INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.10997438430786133 s +DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=127514572583305619428074956107137422643, time:1750768263.9105392s req_ids:[8] +DEBUG 06-24 20:31:03 [manager.py:391] +ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 first_token_cost:222.69272804260254ms total_cost_time:222.73492813110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12481 prompt_cache_len:5151 prompt_cache_ratio:0.41270731511898084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 +DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:04 [batch.py:51] router release req id 8 +INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.10782217979431152 s +INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.1097257137298584 s +DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=68816819384489690662885386207278877162, time:1750768264.1495395s req_ids:[8] +DEBUG 06-24 20:31:04 [manager.py:391] +ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:219.6512222290039ms total_cost_time:219.6958065032959ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12482 prompt_cache_len:5151 prompt_cache_ratio:0.4126742509213267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 +DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:04 [batch.py:51] router release req id 8 +INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.1082756519317627 s +INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.11081671714782715 s +DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=209516937181353585384012023219293272177, time:1750768264.3684156s req_ids:[8] +DEBUG 06-24 20:31:04 [manager.py:391] +ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:214.67947959899902ms total_cost_time:214.7378921508789ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:12483 prompt_cache_len:5151 prompt_cache_ratio:0.41264119202114874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 +DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:04 [batch.py:51] router release req id 8 +INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.10907649993896484 s +INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.11096334457397461 s +DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=316753395785497313493132876187578569870, time:1750768264.5831099s req_ids:[8] +DEBUG 06-24 20:31:04 [manager.py:391] +ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:213.16170692443848ms total_cost_time:213.20796012878418ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12484 prompt_cache_len:5151 prompt_cache_ratio:0.412608138417174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 +DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:04 [batch.py:51] router release req id 8 +INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.10727500915527344 s +INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s +DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=233676097618175219885476897963355789361, time:1750768264.8225307s req_ids:[8] +DEBUG 06-24 20:31:04 [manager.py:391] +ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:390.55633544921875ms total_cost_time:390.59996604919434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12485 prompt_cache_len:5151 prompt_cache_ratio:0.41257509010812976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 +DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:05 [batch.py:51] router release req id 8 +INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.10948657989501953 s +INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.11155486106872559 s +DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=25420626096970156692914414548132759823, time:1750768265.1987534s req_ids:[8] +DEBUG 06-24 20:31:05 [manager.py:391] +ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:211.1203670501709ms total_cost_time:211.1644744873047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12486 prompt_cache_len:5151 prompt_cache_ratio:0.4125420470927439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 +DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:05 [batch.py:51] router release req id 8 +INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.10743403434753418 s +INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.10947108268737793 s +DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=331959798810267994369950450106391193076, time:1750768265.4165535s req_ids:[8] +DEBUG 06-24 20:31:05 [manager.py:391] +ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:206.42352104187012ms total_cost_time:206.4688205718994ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12487 prompt_cache_len:5151 prompt_cache_ratio:0.41250900936974455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 +DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:05 [batch.py:51] router release req id 8 +INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.1088094711303711 s +INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.11103177070617676 s +DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=208698978373464275442925134444079464268, time:1750768265.6291256s req_ids:[8] +DEBUG 06-24 20:31:05 [manager.py:391] +ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:212.52179145812988ms total_cost_time:212.56542205810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12488 prompt_cache_len:5151 prompt_cache_ratio:0.41247597693786037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 +DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:05 [batch.py:51] router release req id 8 +INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.10823607444763184 s +INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.11030840873718262 s +DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=3222537072395876234533459496520552559, time:1750768265.8481362s req_ids:[8] +DEBUG 06-24 20:31:05 [manager.py:391] +ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:209.80024337768555ms total_cost_time:209.85817909240723ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:12489 prompt_cache_len:5151 prompt_cache_ratio:0.4124429497958203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 +DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:05 [batch.py:51] router release req id 8 +INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.1085200309753418 s +INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.11059188842773438 s +DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=177084588228214653286181802012143280478, time:1750768266.0644772s req_ids:[8] +DEBUG 06-24 20:31:06 [manager.py:391] +ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:210.23225784301758ms total_cost_time:210.27660369873047ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12490 prompt_cache_len:5151 prompt_cache_ratio:0.41240992794235387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 +DEBUG 06-24 20:31:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:06 [batch.py:51] router release req id 8 +INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.10744619369506836 s +INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.10945940017700195 s +DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=27885679134886115713469702724236210516, time:1750768266.2812974s req_ids:[8] +DEBUG 06-24 20:31:06 [manager.py:391] +ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:31:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 50055.312 tokens/s +DEBUG 06-24 20:31:06 [stats.py:37] Avg prompt tokens throughput: 50047.286 tokens/s +DEBUG 06-24 20:31:06 [stats.py:37] Avg generate tokens throughput: 8.026 tokens/s +INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:372.51925468444824ms total_cost_time:372.5621700286865ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12491 prompt_cache_len:5151 prompt_cache_ratio:0.41237691137619087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 +DEBUG 06-24 20:31:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:06 [batch.py:51] router release req id 8 +INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s +INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.1112680435180664 s +DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=201377200954779068346558203775480764757, time:1750768266.6612456s req_ids:[8] +DEBUG 06-24 20:31:06 [manager.py:391] +ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:213.82689476013184ms total_cost_time:213.87052536010742ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12492 prompt_cache_len:5151 prompt_cache_ratio:0.4123439000960615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 +DEBUG 06-24 20:31:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:06 [batch.py:51] router release req id 8 +INFO 06-24 20:31:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.10704708099365234 s +INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.1089024543762207 s +DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=281275185465685633668293628360356835821, time:1750768266.882115s req_ids:[8] +DEBUG 06-24 20:31:06 [manager.py:391] +DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:211.61437034606934ms total_cost_time:211.65943145751953ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12493 prompt_cache_len:5151 prompt_cache_ratio:0.4123108941006964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 +DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:07 [batch.py:51] router release req id 8 +INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10738372802734375 s +INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.1093451976776123 s +DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=332813435533580022291468381962780081408, time:1750768267.0995321s req_ids:[8] +DEBUG 06-24 20:31:07 [manager.py:391] +ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:208.36949348449707ms total_cost_time:208.41312408447266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12494 prompt_cache_len:5151 prompt_cache_ratio:0.41227789338882664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 +DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:07 [batch.py:51] router release req id 8 +INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10843753814697266 s +INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.11052203178405762 s +DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=41927179067663395116973905055266446041, time:1750768267.313437s req_ids:[8] +DEBUG 06-24 20:31:07 [manager.py:391] +ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:205.70659637451172ms total_cost_time:205.75308799743652ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12495 prompt_cache_len:5151 prompt_cache_ratio:0.4122448979591837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 +DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:07 [batch.py:51] router release req id 8 +INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.103546142578125 s +INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.10538387298583984 s +DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=166465240543926596858572255214284927103, time:1750768267.5263383s req_ids:[8] +DEBUG 06-24 20:31:07 [manager.py:391] +ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:222.20349311828613ms total_cost_time:222.24879264831543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12496 prompt_cache_len:5151 prompt_cache_ratio:0.4122119078104994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 +DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:07 [batch.py:51] router release req id 8 +INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10827279090881348 s +INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.1101844310760498 s +DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=196392207537966235939666587918125761220, time:1750768267.753386s req_ids:[8] +DEBUG 06-24 20:31:07 [manager.py:391] +ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:207.08870887756348ms total_cost_time:207.13305473327637ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12497 prompt_cache_len:5151 prompt_cache_ratio:0.412178922941506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 +DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:07 [batch.py:51] router release req id 8 +INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10721349716186523 s +INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.10918354988098145 s +DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=209963547930889658587163866150168609014, time:1750768267.9681652s req_ids:[8] +DEBUG 06-24 20:31:07 [manager.py:391] +ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:382.3723793029785ms total_cost_time:382.4167251586914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12498 prompt_cache_len:5151 prompt_cache_ratio:0.41214594335093613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 +DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:08 [batch.py:51] router release req id 8 +INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.1077730655670166 s +INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10968494415283203 s +DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=309380588135326408861170842434463727074, time:1750768268.3566535s req_ids:[8] +DEBUG 06-24 20:31:08 [manager.py:391] +ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:185.6536865234375ms total_cost_time:185.70446968078613ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12499 prompt_cache_len:5151 prompt_cache_ratio:0.412112969037523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 +DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:08 [batch.py:51] router release req id 8 +INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.10773134231567383 s +INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10988950729370117 s +DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=308771310589489724622061215605645790678, time:1750768268.5492675s req_ids:[8] +DEBUG 06-24 20:31:08 [manager.py:391] +ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:207.71360397338867ms total_cost_time:207.75818824768066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12500 prompt_cache_len:5151 prompt_cache_ratio:0.41208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 +DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:08 [batch.py:51] router release req id 8 +INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.10793495178222656 s +INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10987567901611328 s +DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=157605382485507449445995574528165771359, time:1750768268.7633877s req_ids:[8] +DEBUG 06-24 20:31:08 [manager.py:391] +ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:213.78326416015625ms total_cost_time:213.82617950439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12501 prompt_cache_len:5151 prompt_cache_ratio:0.41204703623710104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 +DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:08 [batch.py:51] router release req id 8 +INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.10767078399658203 s +INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10973763465881348 s +DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=156319905130828988385209150612355490410, time:1750768268.9844146s req_ids:[8] +DEBUG 06-24 20:31:08 [manager.py:391] +ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:226.78399085998535ms total_cost_time:226.82785987854004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12502 prompt_cache_len:5151 prompt_cache_ratio:0.4120140777475604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 +DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:09 [batch.py:51] router release req id 8 +INFO 06-24 20:31:09 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s +INFO 06-24 20:31:09 [manager.py:68] detokenization recv req id 8 cost time 0.10984373092651367 s +DEBUG 06-24 20:31:09 [manager.py:391] Prefill Batch: batch_id=295492755154484424061258697106642599943, time:1750768269.2215552s req_ids:[8] +DEBUG 06-24 20:31:09 [manager.py:391] +ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:210.35528182983398ms total_cost_time:210.4012966156006ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12503 prompt_cache_len:5151 prompt_cache_ratio:0.41198112453011276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 +DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:09 [batch.py:51] router release req id 8 +INFO 06-24 20:31:09 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s +INFO 06-24 20:31:09 [manager.py:68] detokenization recv req id 8 cost time 0.10977387428283691 s +DEBUG 06-24 20:31:09 [manager.py:391] Prefill Batch: batch_id=334363851398461936880807734011136347128, time:1750768269.4344208s req_ids:[8] +DEBUG 06-24 20:31:09 [manager.py:391] +ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:210.47067642211914ms total_cost_time:210.51526069641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12504 prompt_cache_len:5151 prompt_cache_ratio:0.4119481765834933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 +DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:09 [batch.py:51] router release req id 8 +INFO 06-24 20:31:09 [manager.py:224] router recive req id 8 cost time 0.10805153846740723 s +INFO 06-24 20:31:09 [manager.py:68] detokenization recv req id 8 cost time 0.1099843978881836 s +DEBUG 06-24 20:31:09 [manager.py:391] Prefill Batch: batch_id=274886550352700284025755126154914969379, time:1750768269.6512759s req_ids:[8] +DEBUG 06-24 20:31:09 [manager.py:391] +ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:406.8949222564697ms total_cost_time:406.9180488586426ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:12505 prompt_cache_len:5151 prompt_cache_ratio:0.4119152339064374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 +DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:09 [batch.py:51] router release req id 8 +INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10771989822387695 s +INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.10965275764465332 s +DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=75615430330593713225362058400767954110, time:1750768270.0636318s req_ids:[8] +DEBUG 06-24 20:31:10 [manager.py:391] +ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:209.9459171295166ms total_cost_time:209.98883247375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12506 prompt_cache_len:5151 prompt_cache_ratio:0.4118822964976811 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 +DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:10 [batch.py:51] router release req id 8 +INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10843348503112793 s +INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.11040353775024414 s +DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=262658503894260619744906117307045909717, time:1750768270.283475s req_ids:[8] +DEBUG 06-24 20:31:10 [manager.py:391] +ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:216.0928249359131ms total_cost_time:216.13669395446777ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12507 prompt_cache_len:5151 prompt_cache_ratio:0.41184936435596065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 +DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:10 [batch.py:51] router release req id 8 +INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s +INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.10956096649169922 s +DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=284068088712209531769875984138252560222, time:1750768270.5112612s req_ids:[8] +DEBUG 06-24 20:31:10 [manager.py:391] +ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:221.62652015686035ms total_cost_time:221.67038917541504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12508 prompt_cache_len:5151 prompt_cache_ratio:0.4118164374800128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 +DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:10 [batch.py:51] router release req id 8 +INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10805845260620117 s +INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.11020636558532715 s +DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=290638582771012901185844741655489464104, time:1750768270.7334328s req_ids:[8] +DEBUG 06-24 20:31:10 [manager.py:391] +ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:209.54155921936035ms total_cost_time:209.59711074829102ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:12509 prompt_cache_len:5151 prompt_cache_ratio:0.41178351586857465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 +DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:10 [batch.py:51] router release req id 8 +INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.11052513122558594 s +INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.11241459846496582 s +DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=206136282999498309293515217584519768747, time:1750768270.94695s req_ids:[8] +DEBUG 06-24 20:31:10 [manager.py:391] +ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:210.9987735748291ms total_cost_time:211.0443115234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12510 prompt_cache_len:5151 prompt_cache_ratio:0.4117505995203837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 +DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:11 [batch.py:51] router release req id 8 +INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.10859107971191406 s +INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.10962247848510742 s +DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=138472392076730866881253495914941814021, time:1750768271.1621392s req_ids:[8] +DEBUG 06-24 20:31:11 [manager.py:391] +ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:382.504940032959ms total_cost_time:382.5514316558838ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12511 prompt_cache_len:5151 prompt_cache_ratio:0.41171768843417794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 +DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:11 [batch.py:51] router release req id 8 +DEBUG 06-24 20:31:11 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:11 [manager.py:283] +DEBUG 06-24 20:31:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:11 [manager.py:284] +INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.1089928150177002 s +INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.11092591285705566 s +DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=274842610773323748006693584392995860369, time:1750768271.5527139s req_ids:[8] +DEBUG 06-24 20:31:11 [manager.py:391] +ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:217.94819831848145ms total_cost_time:217.99182891845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12512 prompt_cache_len:5151 prompt_cache_ratio:0.4116847826086957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 +DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:11 [batch.py:51] router release req id 8 +INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.10698986053466797 s +INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.10892844200134277 s +DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=328499169006924377949282002478703492110, time:1750768271.7818563s req_ids:[8] +DEBUG 06-24 20:31:11 [manager.py:391] +ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:220.14927864074707ms total_cost_time:220.19362449645996ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12513 prompt_cache_len:5151 prompt_cache_ratio:0.4116518820426756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 +DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:11 [batch.py:51] router release req id 8 +INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.10772085189819336 s +INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.10964393615722656 s +DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=338074776588502273919273700022555585288, time:1750768271.9988298s req_ids:[8] +DEBUG 06-24 20:31:11 [manager.py:391] +ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:205.66987991333008ms total_cost_time:205.71327209472656ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12514 prompt_cache_len:5151 prompt_cache_ratio:0.411618986734857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 +DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:12 [batch.py:51] router release req id 8 +INFO 06-24 20:31:12 [manager.py:224] router recive req id 8 cost time 0.1093599796295166 s +INFO 06-24 20:31:12 [manager.py:68] detokenization recv req id 8 cost time 0.11138606071472168 s +DEBUG 06-24 20:31:12 [manager.py:391] Prefill Batch: batch_id=285326432658005579272735091469556887831, time:1750768272.2109773s req_ids:[8] +DEBUG 06-24 20:31:12 [manager.py:391] +ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:204.3745517730713ms total_cost_time:204.41865921020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12515 prompt_cache_len:5151 prompt_cache_ratio:0.4115860966839792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 +DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:12 [batch.py:51] router release req id 8 +INFO 06-24 20:31:12 [manager.py:224] router recive req id 8 cost time 0.10878205299377441 s +INFO 06-24 20:31:12 [manager.py:68] detokenization recv req id 8 cost time 0.11059045791625977 s +DEBUG 06-24 20:31:12 [manager.py:391] Prefill Batch: batch_id=190255958124451904774178780099135527288, time:1750768272.417846s req_ids:[8] +DEBUG 06-24 20:31:12 [manager.py:391] +ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:206.16459846496582ms total_cost_time:206.2079906463623ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12516 prompt_cache_len:5151 prompt_cache_ratio:0.41155321188878236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 +DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:12 [batch.py:51] router release req id 8 +INFO 06-24 20:31:12 [manager.py:224] router recive req id 8 cost time 0.10843157768249512 s +INFO 06-24 20:31:12 [manager.py:68] detokenization recv req id 8 cost time 0.11037755012512207 s +DEBUG 06-24 20:31:12 [manager.py:391] Prefill Batch: batch_id=131422421860615437810100093198019348114, time:1750768272.6311183s req_ids:[8] +DEBUG 06-24 20:31:12 [manager.py:391] +ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:415.50731658935547ms total_cost_time:415.55142402648926ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12517 prompt_cache_len:5151 prompt_cache_ratio:0.4115203323480067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 +DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:12 [batch.py:51] router release req id 8 +INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10846590995788574 s +INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11074709892272949 s +DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=277106865176068151334441545449356057205, time:1750768273.0545304s req_ids:[8] +DEBUG 06-24 20:31:13 [manager.py:391] +DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:211.19403839111328ms total_cost_time:211.23862266540527ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12518 prompt_cache_len:5151 prompt_cache_ratio:0.41148745806039305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 +DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:13 [batch.py:51] router release req id 8 +INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10851669311523438 s +INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11036300659179688 s +DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=220293240658358206876506297548075410536, time:1750768273.2801838s req_ids:[8] +DEBUG 06-24 20:31:13 [manager.py:391] +ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:222.6853370666504ms total_cost_time:222.72920608520508ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12519 prompt_cache_len:5151 prompt_cache_ratio:0.41145458902468246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 +DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:13 [batch.py:51] router release req id 8 +INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10951471328735352 s +INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11139512062072754 s +DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=209489413563033150713709822564619335117, time:1750768273.4999242s req_ids:[8] +DEBUG 06-24 20:31:13 [manager.py:391] +ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:170.00555992126465ms total_cost_time:170.0570583343506ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:12520 prompt_cache_len:5151 prompt_cache_ratio:0.4114217252396166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 +DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:13 [batch.py:51] router release req id 8 +INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10794854164123535 s +INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.10965347290039062 s +DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=227931483414505929216758720466042156691, time:1750768273.676312s req_ids:[8] +DEBUG 06-24 20:31:13 [manager.py:391] +ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:202.98242568969727ms total_cost_time:203.02557945251465ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12521 prompt_cache_len:5151 prompt_cache_ratio:0.4113888667039374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 +DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:13 [batch.py:51] router release req id 8 +INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10867929458618164 s +INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11017775535583496 s +DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=116931807011560776590899074982640281284, time:1750768273.885679s req_ids:[8] +DEBUG 06-24 20:31:13 [manager.py:391] +ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:215.67106246948242ms total_cost_time:215.72375297546387ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:12522 prompt_cache_len:5151 prompt_cache_ratio:0.41135601341638717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 +DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:14 [batch.py:51] router release req id 8 +INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.1069638729095459 s +INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.10841870307922363 s +DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=178997456884958683885664304534773038616, time:1750768274.1331282s req_ids:[8] +DEBUG 06-24 20:31:14 [manager.py:391] +ERROR 06-24 20:31:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:228.3787727355957ms total_cost_time:228.43575477600098ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:12523 prompt_cache_len:5151 prompt_cache_ratio:0.4113231653757087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 +DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:14 [batch.py:51] router release req id 8 +INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s +INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.11024713516235352 s +DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=295605562536982205220024503344360061749, time:1750768274.3413801s req_ids:[8] +DEBUG 06-24 20:31:14 [manager.py:391] +ERROR 06-24 20:31:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 first_token_cost:389.8179531097412ms total_cost_time:389.8627758026123ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12524 prompt_cache_len:5151 prompt_cache_ratio:0.4112903225806452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 +DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:14 [batch.py:51] router release req id 8 +INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.10848259925842285 s +INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.11039876937866211 s +DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=331242976173159817323475117226824780284, time:1750768274.7359161s req_ids:[8] +DEBUG 06-24 20:31:14 [manager.py:391] +ERROR 06-24 20:31:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 first_token_cost:210.03007888793945ms total_cost_time:210.07442474365234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12525 prompt_cache_len:5151 prompt_cache_ratio:0.41125748502994014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 +DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:14 [batch.py:51] router release req id 8 +INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.10741925239562988 s +INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.10891294479370117 s +DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=3741966754158941271781177073025609991, time:1750768274.9651766s req_ids:[8] +DEBUG 06-24 20:31:14 [manager.py:391] +ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 first_token_cost:224.73812103271484ms total_cost_time:224.78199005126953ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12526 prompt_cache_len:5151 prompt_cache_ratio:0.41122465272233755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 +DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:15 [batch.py:51] router release req id 8 +INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10805273056030273 s +INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.10916495323181152 s +DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=123976354871854654732500638602692739225, time:1750768275.1838672s req_ids:[8] +DEBUG 06-24 20:31:15 [manager.py:391] +ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:231.1117649078369ms total_cost_time:231.15801811218262ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12527 prompt_cache_len:5151 prompt_cache_ratio:0.4111918256565818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 +DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:15 [batch.py:51] router release req id 8 +INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10794353485107422 s +INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.10989975929260254 s +DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=327224276748437850533417176729005399906, time:1750768275.4256055s req_ids:[8] +DEBUG 06-24 20:31:15 [manager.py:391] +ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:226.72462463378906ms total_cost_time:226.77040100097656ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12528 prompt_cache_len:5151 prompt_cache_ratio:0.4111590038314176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 +DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:15 [batch.py:51] router release req id 8 +INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10826563835144043 s +INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013960838317871 s +DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=97931355185587339517844902398606599557, time:1750768275.6577344s req_ids:[8] +DEBUG 06-24 20:31:15 [manager.py:391] +ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:230.30591011047363ms total_cost_time:230.38244247436523ms,out_token_counter:1 mean_per_token_cost_time: 0.07653236389160156ms prompt_token_num:12529 prompt_cache_len:5151 prompt_cache_ratio:0.41112618724559025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 +DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:15 [batch.py:51] router release req id 8 +INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10790061950683594 s +INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.1099386215209961 s +DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=105037887369281595108128063475452842141, time:1750768275.8985052s req_ids:[8] +DEBUG 06-24 20:31:15 [manager.py:391] +ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:400.8526802062988ms total_cost_time:400.907039642334ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:12530 prompt_cache_len:5151 prompt_cache_ratio:0.41109337589784517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 +DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:16 [batch.py:51] router release req id 8 +INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s +INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.11004233360290527 s +DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=236481050959080865530757516517222148744, time:1750768276.2994266s req_ids:[8] +DEBUG 06-24 20:31:16 [manager.py:391] +ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:204.27393913269043ms total_cost_time:204.2992115020752ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:12531 prompt_cache_len:5151 prompt_cache_ratio:0.4110605697869284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 +DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:16 [batch.py:51] router release req id 8 +INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10483622550964355 s +INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.10672521591186523 s +DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=175265139455072007620372557336351049246, time:1750768276.5095196s req_ids:[8] +DEBUG 06-24 20:31:16 [manager.py:391] +ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:31:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 50992.712 tokens/s +DEBUG 06-24 20:31:16 [stats.py:37] Avg prompt tokens throughput: 50984.563 tokens/s +DEBUG 06-24 20:31:16 [stats.py:37] Avg generate tokens throughput: 8.150 tokens/s +INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:210.7105255126953ms total_cost_time:210.7555866241455ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12532 prompt_cache_len:5151 prompt_cache_ratio:0.4110277689115863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 +DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:16 [batch.py:51] router release req id 8 +INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10831809043884277 s +INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.1102302074432373 s +DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=230442108666386900555144758553057681256, time:1750768276.7473354s req_ids:[8] +DEBUG 06-24 20:31:16 [manager.py:391] +ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:228.79648208618164ms total_cost_time:228.84297370910645ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12533 prompt_cache_len:5151 prompt_cache_ratio:0.4109949732705657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 +DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:16 [batch.py:51] router release req id 8 +INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10748934745788574 s +INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.1091604232788086 s +DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=174620604022056359089597685458447835929, time:1750768276.9688103s req_ids:[8] +DEBUG 06-24 20:31:16 [manager.py:391] +ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:223.30451011657715ms total_cost_time:223.36983680725098ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:12534 prompt_cache_len:5151 prompt_cache_ratio:0.4109621828626137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 +INFO 06-24 20:31:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:17 [batch.py:51] router release req id 8 +INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10846614837646484 s +INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.1103672981262207 s +DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=154217849949316213949446032002320211087, time:1750768277.1921399s req_ids:[8] +DEBUG 06-24 20:31:17 [manager.py:391] +ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:207.72790908813477ms total_cost_time:207.77392387390137ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12535 prompt_cache_len:5151 prompt_cache_ratio:0.41092939768647785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 +DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:17 [batch.py:51] router release req id 8 +INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10950636863708496 s +INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.11150050163269043 s +DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=108137090411318116023750754203297368051, time:1750768277.4012213s req_ids:[8] +DEBUG 06-24 20:31:17 [manager.py:391] +ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:211.29107475280762ms total_cost_time:211.3347053527832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12536 prompt_cache_len:5151 prompt_cache_ratio:0.4108966177409062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 +DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:17 [batch.py:51] router release req id 8 +INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10752296447753906 s +INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.10860657691955566 s +DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=112182156355757783619233039777198366976, time:1750768277.6225874s req_ids:[8] +DEBUG 06-24 20:31:17 [manager.py:391] +ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:219.49338912963867ms total_cost_time:219.53654289245605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12537 prompt_cache_len:5151 prompt_cache_ratio:0.41086384302464707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 +DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:17 [batch.py:51] router release req id 8 +INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10815572738647461 s +INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.10997295379638672 s +DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=159536824877776508465751281580135407903, time:1750768277.8636575s req_ids:[8] +DEBUG 06-24 20:31:17 [manager.py:391] +ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:404.67190742492676ms total_cost_time:404.79207038879395ms,out_token_counter:1 mean_per_token_cost_time: 0.1201629638671875ms prompt_token_num:12538 prompt_cache_len:5151 prompt_cache_ratio:0.4108310735364492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 +DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:18 [batch.py:51] router release req id 8 +INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.10717201232910156 s +INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10812878608703613 s +DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=231164334226629571980830937996094749944, time:1750768278.2539432s req_ids:[8] +DEBUG 06-24 20:31:18 [manager.py:391] +ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:186.29717826843262ms total_cost_time:186.3405704498291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12539 prompt_cache_len:5151 prompt_cache_ratio:0.41079830927506183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 +DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:18 [batch.py:51] router release req id 8 +INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.10762977600097656 s +INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s +DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=173555751811260018138083162465825119318, time:1750768278.448421s req_ids:[8] +DEBUG 06-24 20:31:18 [manager.py:391] +ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:205.46483993530273ms total_cost_time:205.51037788391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12540 prompt_cache_len:5151 prompt_cache_ratio:0.41076555023923444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 +DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:18 [batch.py:51] router release req id 8 +INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.1071467399597168 s +INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10817146301269531 s +DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=254011236643366864011433757633352885268, time:1750768278.6584747s req_ids:[8] +DEBUG 06-24 20:31:18 [manager.py:391] +ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:211.0762596130371ms total_cost_time:211.1222743988037ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12541 prompt_cache_len:5151 prompt_cache_ratio:0.4107327964277171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 +DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:18 [batch.py:51] router release req id 8 +INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.10761690139770508 s +INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10951519012451172 s +DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=257394938601223862008762757272571695009, time:1750768278.8756688s req_ids:[8] +DEBUG 06-24 20:31:18 [manager.py:391] +ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:214.7042751312256ms total_cost_time:214.74862098693848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12542 prompt_cache_len:5151 prompt_cache_ratio:0.41070004783926006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 +DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:19 [batch.py:51] router release req id 8 +INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.10759091377258301 s +INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.10868692398071289 s +DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=312292214621825617157837977250841117739, time:1750768279.0958283s req_ids:[8] +DEBUG 06-24 20:31:19 [manager.py:391] +DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:215.0275707244873ms total_cost_time:215.07024765014648ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12543 prompt_cache_len:5151 prompt_cache_ratio:0.4106673044726142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 +DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:19 [batch.py:51] router release req id 8 +INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.10758185386657715 s +INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.10962557792663574 s +DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=88137748764892093413558632388837982573, time:1750768279.3142118s req_ids:[8] +DEBUG 06-24 20:31:19 [manager.py:391] +ERROR 06-24 20:31:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 first_token_cost:212.77880668640137ms total_cost_time:212.82243728637695ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12544 prompt_cache_len:5151 prompt_cache_ratio:0.4106345663265306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 +DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:19 [batch.py:51] router release req id 8 +INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.10888910293579102 s +INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.11080741882324219 s +DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=151110584988018278342670598681910506629, time:1750768279.5333657s req_ids:[8] +DEBUG 06-24 20:31:19 [manager.py:391] +ERROR 06-24 20:31:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 first_token_cost:395.0936794281006ms total_cost_time:395.1535224914551ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:12545 prompt_cache_len:5151 prompt_cache_ratio:0.41060183339976086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 +DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:19 [batch.py:51] router release req id 8 +INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.11097025871276855 s +INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.11311078071594238 s +DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=270091429903016462769162260505912333415, time:1750768279.9354692s req_ids:[8] +DEBUG 06-24 20:31:19 [manager.py:391] +ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 first_token_cost:208.2540988922119ms total_cost_time:208.2960605621338ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12546 prompt_cache_len:5151 prompt_cache_ratio:0.4105691056910569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 +DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:20 [batch.py:51] router release req id 8 +INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10844159126281738 s +INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.11037015914916992 s +DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=123512294390957702757395496350686527781, time:1750768280.1535735s req_ids:[8] +DEBUG 06-24 20:31:20 [manager.py:391] +ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:213.4108543395996ms total_cost_time:213.4532928466797ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12547 prompt_cache_len:5151 prompt_cache_ratio:0.41053638319917113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 +DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:20 [batch.py:51] router release req id 8 +INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10864973068237305 s +INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.11067676544189453 s +DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=38621624746936721233358258632667348431, time:1750768280.3733773s req_ids:[8] +DEBUG 06-24 20:31:20 [manager.py:391] +ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:216.00031852722168ms total_cost_time:216.04323387145996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12548 prompt_cache_len:5151 prompt_cache_ratio:0.41050366592285625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 +DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:20 [batch.py:51] router release req id 8 +INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10907292366027832 s +INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.11106491088867188 s +DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=7556144568986281925685504038794234393, time:1750768280.5954952s req_ids:[8] +DEBUG 06-24 20:31:20 [manager.py:391] +ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:214.30397033691406ms total_cost_time:214.34760093688965ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12549 prompt_cache_len:5151 prompt_cache_ratio:0.4104709538608654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 +DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:20 [batch.py:51] router release req id 8 +INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10805797576904297 s +INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.10998415946960449 s +DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=34825489587725977828500866992980680493, time:1750768280.8304672s req_ids:[8] +DEBUG 06-24 20:31:20 [manager.py:391] +ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:229.47025299072266ms total_cost_time:229.51602935791016ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12550 prompt_cache_len:5151 prompt_cache_ratio:0.4104382470119522 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 +DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:20 [batch.py:51] router release req id 8 +INFO 06-24 20:31:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.10766458511352539 s +INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.1096196174621582 s +DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=31727066068721349117508171555635264683, time:1750768281.0552557s req_ids:[8] +DEBUG 06-24 20:31:21 [manager.py:391] +ERROR 06-24 20:31:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:215.84534645080566ms total_cost_time:215.88802337646484ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12551 prompt_cache_len:5151 prompt_cache_ratio:0.4104055453748705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 +DEBUG 06-24 20:31:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:21 [batch.py:51] router release req id 8 +INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.3108539581298828 s +INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.31293320655822754 s +DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=8967043047885518195309717220378685095, time:1750768281.4952662s req_ids:[8] +DEBUG 06-24 20:31:21 [manager.py:391] +ERROR 06-24 20:31:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 first_token_cost:442.0514106750488ms total_cost_time:442.09766387939453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12552 prompt_cache_len:5151 prompt_cache_ratio:0.4103728489483748 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 +DEBUG 06-24 20:31:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:21 [batch.py:51] router release req id 8 +INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.10778617858886719 s +INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s +DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=230481473998731284805538974717477629765, time:1750768281.7270124s req_ids:[8] +DEBUG 06-24 20:31:21 [manager.py:391] +ERROR 06-24 20:31:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 first_token_cost:212.82267570495605ms total_cost_time:212.86702156066895ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12553 prompt_cache_len:5151 prompt_cache_ratio:0.4103401577312196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 +DEBUG 06-24 20:31:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:21 [batch.py:51] router release req id 8 +INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.1076962947845459 s +INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.10955357551574707 s +DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=44791872304156656974901722217437230052, time:1750768281.9459798s req_ids:[8] +DEBUG 06-24 20:31:21 [manager.py:391] +ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 first_token_cost:217.67020225524902ms total_cost_time:217.7138328552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12554 prompt_cache_len:5151 prompt_cache_ratio:0.41030747172216026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 +DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:22 [batch.py:51] router release req id 8 +INFO 06-24 20:31:22 [manager.py:224] router recive req id 8 cost time 0.11064553260803223 s +INFO 06-24 20:31:22 [manager.py:68] detokenization recv req id 8 cost time 0.11272835731506348 s +DEBUG 06-24 20:31:22 [manager.py:391] Prefill Batch: batch_id=7440126245509333649937650584883862396, time:1750768282.169322s req_ids:[8] +DEBUG 06-24 20:31:22 [manager.py:391] +ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:214.90073204040527ms total_cost_time:214.94269371032715ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12555 prompt_cache_len:5151 prompt_cache_ratio:0.4102747909199522 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 +DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:22 [batch.py:51] router release req id 8 +INFO 06-24 20:31:22 [manager.py:224] router recive req id 8 cost time 0.10876870155334473 s +INFO 06-24 20:31:22 [manager.py:68] detokenization recv req id 8 cost time 0.11072468757629395 s +DEBUG 06-24 20:31:22 [manager.py:391] Prefill Batch: batch_id=56990272556954084287474442613095706910, time:1750768282.3946886s req_ids:[8] +DEBUG 06-24 20:31:22 [manager.py:391] +ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:213.7296199798584ms total_cost_time:213.77325057983398ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12556 prompt_cache_len:5151 prompt_cache_ratio:0.41024211532335136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 +DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:22 [batch.py:51] router release req id 8 +INFO 06-24 20:31:22 [manager.py:224] router recive req id 8 cost time 0.10727763175964355 s +INFO 06-24 20:31:22 [manager.py:68] detokenization recv req id 8 cost time 0.10930800437927246 s +DEBUG 06-24 20:31:22 [manager.py:391] Prefill Batch: batch_id=209551074770314779817449554963110097504, time:1750768282.6150904s req_ids:[8] +DEBUG 06-24 20:31:22 [manager.py:391] +ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:215.9578800201416ms total_cost_time:216.00008010864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12557 prompt_cache_len:5151 prompt_cache_ratio:0.4102094449311141 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 +DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:22 [batch.py:51] router release req id 8 +INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.3088219165802002 s +INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.31073999404907227 s +DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=165473555168745689181655006668900495607, time:1750768283.0528193s req_ids:[8] +DEBUG 06-24 20:31:23 [manager.py:391] +ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:446.4552402496338ms total_cost_time:446.49791717529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12558 prompt_cache_len:5151 prompt_cache_ratio:0.4101767797419971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 +DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:23 [batch.py:51] router release req id 8 +INFO 06-24 20:31:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:31:23 [statics_utils.py:24] mean first cost: 229.67158661164862 ms +INFO 06-24 20:31:23 [statics_utils.py:24] mean per token cost: 0.06127567081356578 ms +INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10859560966491699 s +INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11049389839172363 s +DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=163784857226672687579813413995041346623, time:1750768283.2889993s req_ids:[8] +DEBUG 06-24 20:31:23 [manager.py:391] +ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:208.89973640441895ms total_cost_time:208.94384384155273ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12559 prompt_cache_len:5151 prompt_cache_ratio:0.41014411975475756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 +DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:23 [batch.py:51] router release req id 8 +INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10858798027038574 s +INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11020541191101074 s +DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=161455055624283841016538751990982376338, time:1750768283.505844s req_ids:[8] +DEBUG 06-24 20:31:23 [manager.py:391] +ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:210.8311653137207ms total_cost_time:210.8759880065918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12560 prompt_cache_len:5151 prompt_cache_ratio:0.4101114649681529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 +DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:23 [batch.py:51] router release req id 8 +INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10980725288391113 s +INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11165761947631836 s +DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=264470177085633845006263746562317023140, time:1750768283.7231448s req_ids:[8] +DEBUG 06-24 20:31:23 [manager.py:391] +ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:208.6317539215088ms total_cost_time:208.67586135864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12561 prompt_cache_len:5151 prompt_cache_ratio:0.410078815380941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 +DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:23 [batch.py:51] router release req id 8 +INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10868501663208008 s +INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11061620712280273 s +DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=305335943424951821310252269238071115684, time:1750768283.939183s req_ids:[8] +DEBUG 06-24 20:31:23 [manager.py:391] +ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:212.70108222961426ms total_cost_time:212.74662017822266ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12562 prompt_cache_len:5151 prompt_cache_ratio:0.4100461709918803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 +DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:24 [batch.py:51] router release req id 8 +INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.10942602157592773 s +INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.1114652156829834 s +DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=230688461594259607233972332783572607387, time:1750768284.1595075s req_ids:[8] +DEBUG 06-24 20:31:24 [manager.py:391] +ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:381.70599937438965ms total_cost_time:381.74939155578613ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12563 prompt_cache_len:5151 prompt_cache_ratio:0.4100135317997294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 +DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:24 [batch.py:51] router release req id 8 +INFO 06-24 20:31:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.11082220077514648 s +INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.11296558380126953 s +DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=4235264440530786418340947760829135116, time:1750768284.548638s req_ids:[8] +DEBUG 06-24 20:31:24 [manager.py:391] +ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:216.43733978271484ms total_cost_time:216.48168563842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12564 prompt_cache_len:5151 prompt_cache_ratio:0.40998089780324737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 +DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:24 [batch.py:51] router release req id 8 +INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.10861039161682129 s +INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.11071181297302246 s +DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=176193743172511927873914752714383000113, time:1750768284.7757754s req_ids:[8] +DEBUG 06-24 20:31:24 [manager.py:391] +ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:217.49377250671387ms total_cost_time:217.53811836242676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12565 prompt_cache_len:5151 prompt_cache_ratio:0.4099482690011938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 +DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:24 [batch.py:51] router release req id 8 +INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.10686206817626953 s +INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.10892176628112793 s +DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=69446486642018248990431353107734448130, time:1750768284.9949796s req_ids:[8] +DEBUG 06-24 20:31:24 [manager.py:391] +ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:211.48014068603516ms total_cost_time:211.52663230895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12566 prompt_cache_len:5151 prompt_cache_ratio:0.4099156453923285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 +DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:25 [batch.py:51] router release req id 8 +INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10872077941894531 s +INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.11070084571838379 s +DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=328678817198159626523087031810306343181, time:1750768285.2133377s req_ids:[8] +DEBUG 06-24 20:31:25 [manager.py:391] +ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:214.11871910095215ms total_cost_time:214.13898468017578ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12567 prompt_cache_len:5151 prompt_cache_ratio:0.4098830269754118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 +DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:25 [batch.py:51] router release req id 8 +INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10793733596801758 s +INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.11004424095153809 s +DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=84175638531695706572957095835258471430, time:1750768285.4323394s req_ids:[8] +DEBUG 06-24 20:31:25 [manager.py:391] +ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:170.75228691101074ms total_cost_time:170.79472541809082ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12568 prompt_cache_len:5151 prompt_cache_ratio:0.40985041374920433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 +DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:25 [batch.py:51] router release req id 8 +INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10772085189819336 s +INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.10960578918457031 s +DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=265504156831964183044608640167717948435, time:1750768285.611286s req_ids:[8] +DEBUG 06-24 20:31:25 [manager.py:391] +ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:374.91536140441895ms total_cost_time:374.95970726013184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12569 prompt_cache_len:5151 prompt_cache_ratio:0.4098178057124672 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 +DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:25 [batch.py:51] router release req id 8 +INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10776805877685547 s +INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.10979270935058594 s +DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=284357627798559516179485403685622927428, time:1750768285.9909537s req_ids:[8] +DEBUG 06-24 20:31:25 [manager.py:391] +ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:214.4618034362793ms total_cost_time:214.48349952697754ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12570 prompt_cache_len:5151 prompt_cache_ratio:0.4097852028639618 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 +DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:26 [batch.py:51] router release req id 8 +INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.10873293876647949 s +INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.11074995994567871 s +DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=331567525282126948751008732231542746042, time:1750768286.2130823s req_ids:[8] +DEBUG 06-24 20:31:26 [manager.py:391] +ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:214.3697738647461ms total_cost_time:214.42103385925293ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:12571 prompt_cache_len:5151 prompt_cache_ratio:0.4097526052024501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 +DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:26 [batch.py:51] router release req id 8 +INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.10855531692504883 s +INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.11066365242004395 s +DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=180147962116622310113189789929379873694, time:1750768286.447992s req_ids:[8] +DEBUG 06-24 20:31:26 [manager.py:391] +ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:227.0054817199707ms total_cost_time:227.0512580871582ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12572 prompt_cache_len:5151 prompt_cache_ratio:0.4097200127266942 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 +DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:26 [batch.py:51] router release req id 8 +INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.1084146499633789 s +INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.11034393310546875 s +DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=287667223120414895802343739041102217604, time:1750768286.680165s req_ids:[8] +DEBUG 06-24 20:31:26 [manager.py:391] +DEBUG 06-24 20:31:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 51062.732 tokens/s +DEBUG 06-24 20:31:26 [stats.py:37] Avg prompt tokens throughput: 51054.697 tokens/s +DEBUG 06-24 20:31:26 [stats.py:37] Avg generate tokens throughput: 8.035 tokens/s +ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:229.4926643371582ms total_cost_time:229.5379638671875ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12573 prompt_cache_len:5151 prompt_cache_ratio:0.40968742543545694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 +DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:26 [batch.py:51] router release req id 8 +INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.10782861709594727 s +INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.1089012622833252 s +DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=255240291397660744534081115180308686188, time:1750768286.9055686s req_ids:[8] +DEBUG 06-24 20:31:26 [manager.py:391] +ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:207.61919021606445ms total_cost_time:207.66592025756836ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12574 prompt_cache_len:5151 prompt_cache_ratio:0.4096548433275012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 +DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:27 [batch.py:51] router release req id 8 +INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.10822033882141113 s +INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11026215553283691 s +DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=127516161619346948698272387698425244521, time:1750768287.1214705s req_ids:[8] +DEBUG 06-24 20:31:27 [manager.py:391] +ERROR 06-24 20:31:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:207.26418495178223ms total_cost_time:207.32474327087402ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:12575 prompt_cache_len:5151 prompt_cache_ratio:0.40962226640159044 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 +DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:27 [batch.py:51] router release req id 8 +INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.11001396179199219 s +INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11203432083129883 s +DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=243020015261130800642486192670525958042, time:1750768287.3432753s req_ids:[8] +DEBUG 06-24 20:31:27 [manager.py:391] +ERROR 06-24 20:31:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:402.2393226623535ms total_cost_time:402.2812843322754ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12576 prompt_cache_len:5151 prompt_cache_ratio:0.40958969465648853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 +DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:27 [batch.py:51] router release req id 8 +INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.10833430290222168 s +INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020398139953613 s +DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=335242273285806892006830332171244712358, time:1750768287.7415798s req_ids:[8] +DEBUG 06-24 20:31:27 [manager.py:391] +ERROR 06-24 20:31:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:217.25130081176758ms total_cost_time:217.29564666748047ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12577 prompt_cache_len:5151 prompt_cache_ratio:0.4095571280909597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 +DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:27 [batch.py:51] router release req id 8 +INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.10951519012451172 s +INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11144495010375977 s +DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=51303382814631372679617565969901822866, time:1750768287.965721s req_ids:[8] +DEBUG 06-24 20:31:27 [manager.py:391] +ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:182.2037696838379ms total_cost_time:182.24835395812988ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12578 prompt_cache_len:5151 prompt_cache_ratio:0.40952456670376847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 +DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:28 [batch.py:51] router release req id 8 +INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.10783934593200684 s +INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.10984277725219727 s +DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=197487877513946119539464559256284067769, time:1750768288.1528404s req_ids:[8] +DEBUG 06-24 20:31:28 [manager.py:391] +ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:207.85069465637207ms total_cost_time:207.89551734924316ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12579 prompt_cache_len:5151 prompt_cache_ratio:0.40949201049367995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 +DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:28 [batch.py:51] router release req id 8 +INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.1091461181640625 s +INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.11127209663391113 s +DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=104131051347779816479625752324733237580, time:1750768288.3669758s req_ids:[8] +DEBUG 06-24 20:31:28 [manager.py:391] +DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:212.0075225830078ms total_cost_time:212.05472946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12580 prompt_cache_len:5151 prompt_cache_ratio:0.40945945945945944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 +DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:28 [batch.py:51] router release req id 8 +INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.10755515098571777 s +INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.10943818092346191 s +DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=324297709927787439945452727901622424637, time:1750768288.5954754s req_ids:[8] +DEBUG 06-24 20:31:28 [manager.py:391] +ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:222.84531593322754ms total_cost_time:222.8987216949463ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:12581 prompt_cache_len:5151 prompt_cache_ratio:0.4094269135998728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 +DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:28 [batch.py:51] router release req id 8 +INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.11018800735473633 s +INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.11210060119628906 s +DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=322516579833666015439633711027431332100, time:1750768288.827038s req_ids:[8] +DEBUG 06-24 20:31:28 [manager.py:391] +ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:230.77678680419922ms total_cost_time:230.8206558227539ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12582 prompt_cache_len:5151 prompt_cache_ratio:0.40939437291368624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 +DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:28 [batch.py:51] router release req id 8 +INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10837268829345703 s +INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.1103355884552002 s +DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=166311768373964590449937115278238895555, time:1750768289.0540247s req_ids:[8] +DEBUG 06-24 20:31:29 [manager.py:391] +ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:381.1817169189453ms total_cost_time:381.2253475189209ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12583 prompt_cache_len:5151 prompt_cache_ratio:0.4093618373996662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 +DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:29 [batch.py:51] router release req id 8 +INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10980939865112305 s +INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.11220908164978027 s +DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=280943928641385432892860780483912264860, time:1750768289.4410005s req_ids:[8] +DEBUG 06-24 20:31:29 [manager.py:391] +ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:214.44964408874512ms total_cost_time:214.4927978515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12584 prompt_cache_len:5151 prompt_cache_ratio:0.40932930705657977 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 +DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:29 [batch.py:51] router release req id 8 +INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10741519927978516 s +INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.10979533195495605 s +DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=107483609448302520521400425450512137487, time:1750768289.6620047s req_ids:[8] +DEBUG 06-24 20:31:29 [manager.py:391] +ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:214.96224403381348ms total_cost_time:215.00539779663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12585 prompt_cache_len:5151 prompt_cache_ratio:0.4092967818831943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 +DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:29 [batch.py:51] router release req id 8 +INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10830402374267578 s +INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005711555480957 s +DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=6694361489045797251076872386723220597, time:1750768289.8824768s req_ids:[8] +DEBUG 06-24 20:31:29 [manager.py:391] +ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:170.78590393066406ms total_cost_time:170.82881927490234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12586 prompt_cache_len:5151 prompt_cache_ratio:0.40926426187827747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 +DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:29 [batch.py:51] router release req id 8 +INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.10806703567504883 s +INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.10993480682373047 s +DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=29662953976377615438430970645064532984, time:1750768290.0605233s req_ids:[8] +DEBUG 06-24 20:31:30 [manager.py:391] +ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:204.60271835327148ms total_cost_time:204.65683937072754ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:12587 prompt_cache_len:5151 prompt_cache_ratio:0.40923174704059745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 +DEBUG 06-24 20:31:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:30 [batch.py:51] router release req id 8 +INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.11028337478637695 s +INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.11228299140930176 s +DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=137804907117672319639438934689464398080, time:1750768290.270396s req_ids:[8] +DEBUG 06-24 20:31:30 [manager.py:391] +ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:210.28661727905273ms total_cost_time:210.33072471618652ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12588 prompt_cache_len:5151 prompt_cache_ratio:0.4091992373689228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 +DEBUG 06-24 20:31:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:30 [batch.py:51] router release req id 8 +INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.10830020904541016 s +INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.11031413078308105 s +DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=204934078685628162780595746907253670256, time:1750768290.488244s req_ids:[8] +DEBUG 06-24 20:31:30 [manager.py:391] +ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:389.1465663909912ms total_cost_time:389.1909122467041ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12589 prompt_cache_len:5151 prompt_cache_ratio:0.4091667328620224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 +DEBUG 06-24 20:31:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:30 [batch.py:51] router release req id 8 +INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.10947084426879883 s +INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.11156773567199707 s +DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=263747253370599758403886059081224390866, time:1750768290.8844552s req_ids:[8] +DEBUG 06-24 20:31:30 [manager.py:391] +ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:219.36607360839844ms total_cost_time:219.40922737121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12590 prompt_cache_len:5151 prompt_cache_ratio:0.4091342335186656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 +DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:31 [batch.py:51] router release req id 8 +INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10883593559265137 s +INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11081266403198242 s +DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=81222966924710462973498499514896319919, time:1750768291.1108537s req_ids:[8] +DEBUG 06-24 20:31:31 [manager.py:391] +ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:218.37973594665527ms total_cost_time:218.42122077941895ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12591 prompt_cache_len:5151 prompt_cache_ratio:0.4091017393376221 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 +DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:31 [batch.py:51] router release req id 8 +INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10838842391967773 s +INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.1103062629699707 s +DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=213524110672395144227356493622693993170, time:1750768291.3384123s req_ids:[8] +DEBUG 06-24 20:31:31 [manager.py:391] +ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:215.8212661743164ms total_cost_time:215.8658504486084ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12592 prompt_cache_len:5151 prompt_cache_ratio:0.409069250317662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 +DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:31 [batch.py:51] router release req id 8 +INFO 06-24 20:31:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10917544364929199 s +INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11111807823181152 s +DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=227393303705009321525320324786662152630, time:1750768291.5590465s req_ids:[8] +DEBUG 06-24 20:31:31 [manager.py:391] +ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:219.42758560180664ms total_cost_time:219.47216987609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12593 prompt_cache_len:5151 prompt_cache_ratio:0.40903676645755577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 +DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:31 [batch.py:51] router release req id 8 +INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10886287689208984 s +INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11090254783630371 s +DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=191694708510019816298433550817819558622, time:1750768291.7846642s req_ids:[8] +DEBUG 06-24 20:31:31 [manager.py:391] +ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:207.79967308044434ms total_cost_time:207.84354209899902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12594 prompt_cache_len:5151 prompt_cache_ratio:0.40900428775607434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 +DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:31 [batch.py:51] router release req id 8 +INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10893082618713379 s +INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11081695556640625 s +DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=176403923553384781517880922750209277091, time:1750768291.999398s req_ids:[8] +DEBUG 06-24 20:31:31 [manager.py:391] +ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:384.69386100769043ms total_cost_time:384.74011421203613ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12595 prompt_cache_len:5151 prompt_cache_ratio:0.40897181421198886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 +DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:32 [batch.py:51] router release req id 8 +INFO 06-24 20:31:32 [manager.py:224] router recive req id 8 cost time 0.10855388641357422 s +INFO 06-24 20:31:32 [manager.py:68] detokenization recv req id 8 cost time 0.11044549942016602 s +DEBUG 06-24 20:31:32 [manager.py:391] Prefill Batch: batch_id=42619443031892006306929523972689808340, time:1750768292.3936365s req_ids:[8] +DEBUG 06-24 20:31:32 [manager.py:391] +ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:215.7583236694336ms total_cost_time:215.80266952514648ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12596 prompt_cache_len:5151 prompt_cache_ratio:0.40893934582407115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 +DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:32 [batch.py:51] router release req id 8 +INFO 06-24 20:31:32 [manager.py:224] router recive req id 8 cost time 0.10793423652648926 s +INFO 06-24 20:31:32 [manager.py:68] detokenization recv req id 8 cost time 0.10993671417236328 s +DEBUG 06-24 20:31:32 [manager.py:391] Prefill Batch: batch_id=211951306842679428969452697780604626848, time:1750768292.628538s req_ids:[8] +DEBUG 06-24 20:31:32 [manager.py:391] +ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:224.73859786987305ms total_cost_time:224.78294372558594ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12597 prompt_cache_len:5151 prompt_cache_ratio:0.4089068825910931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 +DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:32 [batch.py:51] router release req id 8 +INFO 06-24 20:31:32 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s +INFO 06-24 20:31:32 [manager.py:68] detokenization recv req id 8 cost time 0.11075329780578613 s +DEBUG 06-24 20:31:32 [manager.py:391] Prefill Batch: batch_id=267389676640857197854135979600310912569, time:1750768292.846806s req_ids:[8] +DEBUG 06-24 20:31:32 [manager.py:391] +ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:211.7900848388672ms total_cost_time:211.83490753173828ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12598 prompt_cache_len:5151 prompt_cache_ratio:0.40887442451182726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 +DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:32 [batch.py:51] router release req id 8 +INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10745000839233398 s +INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.10975933074951172 s +DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=13380925876325099253765580395316687642, time:1750768293.0660636s req_ids:[8] +DEBUG 06-24 20:31:33 [manager.py:391] +ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:213.2432460784912ms total_cost_time:213.2880687713623ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12599 prompt_cache_len:5151 prompt_cache_ratio:0.4088419715850464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 +DEBUG 06-24 20:31:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:33 [batch.py:51] router release req id 8 +INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10886549949645996 s +INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.1108856201171875 s +DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=172688852117008446934263000394113719866, time:1750768293.2856596s req_ids:[8] +DEBUG 06-24 20:31:33 [manager.py:391] +ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:213.3927345275879ms total_cost_time:213.45281600952148ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12600 prompt_cache_len:5151 prompt_cache_ratio:0.4088095238095238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 +DEBUG 06-24 20:31:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:33 [batch.py:51] router release req id 8 +INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10820579528808594 s +INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.1101992130279541 s +DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=215183729230698782003611422892713412938, time:1750768293.5142765s req_ids:[8] +DEBUG 06-24 20:31:33 [manager.py:391] +ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:212.60905265808105ms total_cost_time:212.65625953674316ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12601 prompt_cache_len:5151 prompt_cache_ratio:0.408777081184033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 +DEBUG 06-24 20:31:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:33 [batch.py:51] router release req id 8 +INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10876321792602539 s +INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.11068201065063477 s +DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=246391954737351516394429210280621599632, time:1750768293.724613s req_ids:[8] +DEBUG 06-24 20:31:33 [manager.py:391] +ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:378.69954109191895ms total_cost_time:378.74555587768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12602 prompt_cache_len:5151 prompt_cache_ratio:0.40874464370734803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 +DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:34 [batch.py:51] router release req id 8 +INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10871195793151855 s +INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11063861846923828 s +DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=4479869930058026851675974494744672441, time:1750768294.1113396s req_ids:[8] +DEBUG 06-24 20:31:34 [manager.py:391] +ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:211.49396896362305ms total_cost_time:211.53807640075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12603 prompt_cache_len:5151 prompt_cache_ratio:0.40871221137824326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 +DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:34 [batch.py:51] router release req id 8 +INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10725045204162598 s +INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.10913515090942383 s +DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=98374254496721231379667898280863423693, time:1750768294.3297024s req_ids:[8] +DEBUG 06-24 20:31:34 [manager.py:391] +ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:213.4718894958496ms total_cost_time:213.51909637451172ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12604 prompt_cache_len:5151 prompt_cache_ratio:0.4086797841954935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 +DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:34 [batch.py:51] router release req id 8 +INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.11089706420898438 s +INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11214733123779297 s +DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=39509969314765756603375269342544092063, time:1750768294.549787s req_ids:[8] +DEBUG 06-24 20:31:34 [manager.py:391] +DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:213.8369083404541ms total_cost_time:213.8817310333252ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12605 prompt_cache_len:5151 prompt_cache_ratio:0.40864736215787384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 +DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:34 [batch.py:51] router release req id 8 +INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10840320587158203 s +INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11043596267700195 s +DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=56589186161222404721289733482045726887, time:1750768294.770313s req_ids:[8] +DEBUG 06-24 20:31:34 [manager.py:391] +ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:215.43478965759277ms total_cost_time:215.47913551330566ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12606 prompt_cache_len:5151 prompt_cache_ratio:0.4086149452641599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 +DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:34 [batch.py:51] router release req id 8 +INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10842108726501465 s +INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11032271385192871 s +DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=311817808803823164999071232003172973560, time:1750768294.990627s req_ids:[8] +DEBUG 06-24 20:31:34 [manager.py:391] +ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:222.88155555725098ms total_cost_time:222.92423248291016ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12607 prompt_cache_len:5151 prompt_cache_ratio:0.4085825335131276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 +DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:35 [batch.py:51] router release req id 8 +INFO 06-24 20:31:35 [manager.py:224] router recive req id 8 cost time 0.10779500007629395 s +INFO 06-24 20:31:35 [manager.py:68] detokenization recv req id 8 cost time 0.10984635353088379 s +DEBUG 06-24 20:31:35 [manager.py:391] Prefill Batch: batch_id=277651683383830551115542736121476538187, time:1750768295.225302s req_ids:[8] +DEBUG 06-24 20:31:35 [manager.py:391] +ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:405.9169292449951ms total_cost_time:405.961275100708ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12608 prompt_cache_len:5151 prompt_cache_ratio:0.4085501269035533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 +DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:35 [batch.py:51] router release req id 8 +INFO 06-24 20:31:35 [manager.py:224] router recive req id 8 cost time 0.10963010787963867 s +INFO 06-24 20:31:35 [manager.py:68] detokenization recv req id 8 cost time 0.11173057556152344 s +DEBUG 06-24 20:31:35 [manager.py:391] Prefill Batch: batch_id=100945803705115710375740433042445865933, time:1750768295.6297054s req_ids:[8] +DEBUG 06-24 20:31:35 [manager.py:391] +ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:219.29287910461426ms total_cost_time:219.33698654174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12609 prompt_cache_len:5151 prompt_cache_ratio:0.40851772543421366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 +DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:35 [batch.py:51] router release req id 8 +INFO 06-24 20:31:35 [manager.py:224] router recive req id 8 cost time 0.10914349555969238 s +INFO 06-24 20:31:35 [manager.py:68] detokenization recv req id 8 cost time 0.11135268211364746 s +DEBUG 06-24 20:31:35 [manager.py:391] Prefill Batch: batch_id=208096663102930731180491535232959140779, time:1750768295.8511267s req_ids:[8] +DEBUG 06-24 20:31:35 [manager.py:391] +ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:215.88659286499023ms total_cost_time:215.93093872070312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12610 prompt_cache_len:5151 prompt_cache_ratio:0.4084853291038858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 +DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:35 [batch.py:51] router release req id 8 +INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10940742492675781 s +INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.11138200759887695 s +DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=19190277716743124981175776702309379122, time:1750768296.0750487s req_ids:[8] +DEBUG 06-24 20:31:36 [manager.py:391] +ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:210.55293083190918ms total_cost_time:210.59894561767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12611 prompt_cache_len:5151 prompt_cache_ratio:0.40845293791134724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 +DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:36 [batch.py:51] router release req id 8 +INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10948371887207031 s +INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.11157917976379395 s +DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=128186724662158171630735639117363867303, time:1750768296.303151s req_ids:[8] +DEBUG 06-24 20:31:36 [manager.py:391] +ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:228.73711585998535ms total_cost_time:228.78575325012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:12612 prompt_cache_len:5151 prompt_cache_ratio:0.40842055185537585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 +DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:36 [batch.py:51] router release req id 8 +INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10833549499511719 s +INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.11034369468688965 s +DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=275177509666351406933765296553645368133, time:1750768296.5252721s req_ids:[8] +DEBUG 06-24 20:31:36 [manager.py:391] +ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:203.79185676574707ms total_cost_time:203.83596420288086ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12613 prompt_cache_len:5151 prompt_cache_ratio:0.4083881709347499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 +DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:36 [batch.py:51] router release req id 8 +INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s +INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.10972142219543457 s +DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=66684521766955871359378496415934350622, time:1750768296.7348616s req_ids:[8] +DEBUG 06-24 20:31:36 [manager.py:391] +DEBUG 06-24 20:31:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 51363.424 tokens/s +DEBUG 06-24 20:31:36 [stats.py:37] Avg prompt tokens throughput: 51355.268 tokens/s +DEBUG 06-24 20:31:36 [stats.py:37] Avg generate tokens throughput: 8.156 tokens/s +ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:207.6246738433838ms total_cost_time:207.66973495483398ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12614 prompt_cache_len:5151 prompt_cache_ratio:0.408355795148248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 +DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:36 [batch.py:51] router release req id 8 +INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.1076195240020752 s +INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.10949182510375977 s +DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=314904055925532987378438403082054922362, time:1750768296.9516168s req_ids:[8] +DEBUG 06-24 20:31:36 [manager.py:391] +ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:208.83417129516602ms total_cost_time:208.8785171508789ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12615 prompt_cache_len:5151 prompt_cache_ratio:0.4083234244946492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 +DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:37 [batch.py:51] router release req id 8 +INFO 06-24 20:31:37 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s +INFO 06-24 20:31:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102912425994873 s +DEBUG 06-24 20:31:37 [manager.py:391] Prefill Batch: batch_id=130424800144092031385697553977849962366, time:1750768297.1666281s req_ids:[8] +DEBUG 06-24 20:31:37 [manager.py:391] +ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:422.1012592315674ms total_cost_time:422.1458435058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12616 prompt_cache_len:5151 prompt_cache_ratio:0.40829105897273305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 +DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:37 [batch.py:51] router release req id 8 +INFO 06-24 20:31:37 [manager.py:224] router recive req id 8 cost time 0.10746240615844727 s +INFO 06-24 20:31:37 [manager.py:68] detokenization recv req id 8 cost time 0.1092832088470459 s +DEBUG 06-24 20:31:37 [manager.py:391] Prefill Batch: batch_id=49385675762759096937812373597030655581, time:1750768297.5948493s req_ids:[8] +DEBUG 06-24 20:31:37 [manager.py:391] +ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:198.89426231384277ms total_cost_time:198.94099235534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12617 prompt_cache_len:5151 prompt_cache_ratio:0.4082586985812792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 +DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:37 [batch.py:51] router release req id 8 +INFO 06-24 20:31:37 [manager.py:224] router recive req id 8 cost time 0.10738992691040039 s +INFO 06-24 20:31:37 [manager.py:68] detokenization recv req id 8 cost time 0.1092989444732666 s +DEBUG 06-24 20:31:37 [manager.py:391] Prefill Batch: batch_id=254991211651364340267426115037189287333, time:1750768297.8047986s req_ids:[8] +DEBUG 06-24 20:31:37 [manager.py:391] +ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:216.50123596191406ms total_cost_time:216.54582023620605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12618 prompt_cache_len:5151 prompt_cache_ratio:0.408226343319068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 +DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:37 [batch.py:51] router release req id 8 +INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10979819297790527 s +INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.11103105545043945 s +DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=128282666838351181239466874871923139196, time:1750768298.0243397s req_ids:[8] +DEBUG 06-24 20:31:38 [manager.py:391] +ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:213.7911319732666ms total_cost_time:213.8378620147705ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12619 prompt_cache_len:5151 prompt_cache_ratio:0.4081939931848799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 +DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:38 [batch.py:51] router release req id 8 +INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10780215263366699 s +INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10972476005554199 s +DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=236396750779510924908427445948401879365, time:1750768298.2437232s req_ids:[8] +DEBUG 06-24 20:31:38 [manager.py:391] +ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:212.1715545654297ms total_cost_time:212.21613883972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12620 prompt_cache_len:5151 prompt_cache_ratio:0.40816164817749606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 +DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:38 [batch.py:51] router release req id 8 +INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10762906074523926 s +INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10953521728515625 s +DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=112164838036872430418340581767390249956, time:1750768298.4629874s req_ids:[8] +DEBUG 06-24 20:31:38 [manager.py:391] +ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:215.5766487121582ms total_cost_time:215.620756149292ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12621 prompt_cache_len:5151 prompt_cache_ratio:0.40812930829569766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 +DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:38 [batch.py:51] router release req id 8 +INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10799145698547363 s +INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10919332504272461 s +DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=291818276245563997605504094071066155941, time:1750768298.6802568s req_ids:[8] +DEBUG 06-24 20:31:38 [manager.py:391] +ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:181.1995506286621ms total_cost_time:181.243896484375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12622 prompt_cache_len:5151 prompt_cache_ratio:0.4080969735382665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 +DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:38 [batch.py:51] router release req id 8 +INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10691118240356445 s +INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10873770713806152 s +DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=83247834203312958373580611365910668147, time:1750768298.8741672s req_ids:[8] +DEBUG 06-24 20:31:38 [manager.py:391] +ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:386.6455554962158ms total_cost_time:386.6894245147705ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12623 prompt_cache_len:5151 prompt_cache_ratio:0.4080646439039848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 +DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:39 [batch.py:51] router release req id 8 +INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.1079719066619873 s +INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.10982608795166016 s +DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=172995786502920152628159237456591888276, time:1750768299.26852s req_ids:[8] +DEBUG 06-24 20:31:39 [manager.py:391] +DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:215.8188819885254ms total_cost_time:215.86298942565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12624 prompt_cache_len:5151 prompt_cache_ratio:0.408032319391635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 +DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:39 [batch.py:51] router release req id 8 +INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.10767245292663574 s +INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.10922408103942871 s +DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=204387326474375111136189974230427065687, time:1750768299.4933753s req_ids:[8] +DEBUG 06-24 20:31:39 [manager.py:391] +ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:173.1557846069336ms total_cost_time:173.19917678833008ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12625 prompt_cache_len:5151 prompt_cache_ratio:0.408 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 +DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:39 [batch.py:51] router release req id 8 +INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.10878467559814453 s +INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.11060929298400879 s +DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=304642818181010983898201194283979085085, time:1750768299.6713095s req_ids:[8] +DEBUG 06-24 20:31:39 [manager.py:391] +ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:208.3911895751953ms total_cost_time:208.4343433380127ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12626 prompt_cache_len:5151 prompt_cache_ratio:0.4079676857278631 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 +DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:39 [batch.py:51] router release req id 8 +INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.10953617095947266 s +INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.11142945289611816 s +DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=229894501546547682786608550757233509770, time:1750768299.88258s req_ids:[8] +DEBUG 06-24 20:31:39 [manager.py:391] +ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:207.77273178100586ms total_cost_time:207.81779289245605ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12627 prompt_cache_len:5151 prompt_cache_ratio:0.4079353765740081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 +DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:40 [batch.py:51] router release req id 8 +INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.10801196098327637 s +INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.11036086082458496 s +DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=136062935555223989384343637909698895697, time:1750768300.096595s req_ids:[8] +DEBUG 06-24 20:31:40 [manager.py:391] +ERROR 06-24 20:31:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:209.23113822937012ms total_cost_time:209.27810668945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12628 prompt_cache_len:5151 prompt_cache_ratio:0.40790307253721886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 +DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:40 [batch.py:51] router release req id 8 +INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.1084127426147461 s +INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.11043596267700195 s +DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=270474597520245467067732075223890167514, time:1750768300.3128815s req_ids:[8] +DEBUG 06-24 20:31:40 [manager.py:391] +ERROR 06-24 20:31:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 first_token_cost:212.29290962219238ms total_cost_time:212.33892440795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12629 prompt_cache_len:5151 prompt_cache_ratio:0.40787077361628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 +DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:40 [batch.py:51] router release req id 8 +INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.3120231628417969 s +INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.3141918182373047 s +DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=67244638823611783429480534003193936194, time:1750768300.7550068s req_ids:[8] +DEBUG 06-24 20:31:40 [manager.py:391] +DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 first_token_cost:442.2643184661865ms total_cost_time:442.3098564147949ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12630 prompt_cache_len:5151 prompt_cache_ratio:0.40783847980997623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 +DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:40 [batch.py:51] router release req id 8 +INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.10859560966491699 s +INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.1106257438659668 s +DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=329334651340126069787057424736291684654, time:1750768300.9837117s req_ids:[8] +DEBUG 06-24 20:31:40 [manager.py:391] +ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 first_token_cost:211.1196517944336ms total_cost_time:211.16280555725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12631 prompt_cache_len:5151 prompt_cache_ratio:0.4078061911170929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 +DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:41 [batch.py:51] router release req id 8 +INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10755395889282227 s +INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.1096186637878418 s +DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=149420506903031264561203140609186492280, time:1750768301.2039337s req_ids:[8] +DEBUG 06-24 20:31:41 [manager.py:391] +ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:210.34550666809082ms total_cost_time:210.39175987243652ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12632 prompt_cache_len:5151 prompt_cache_ratio:0.40777390753641546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 +DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:41 [batch.py:51] router release req id 8 +INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10828757286071777 s +INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.11020708084106445 s +DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=69386299104513770406853117549657735794, time:1750768301.420113s req_ids:[8] +DEBUG 06-24 20:31:41 [manager.py:391] +ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:212.59641647338867ms total_cost_time:212.64147758483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12633 prompt_cache_len:5151 prompt_cache_ratio:0.40774162906673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 +DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:41 [batch.py:51] router release req id 8 +INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10882091522216797 s +INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.11072635650634766 s +DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=109523942067844912314154747091131213825, time:1750768301.6392484s req_ids:[8] +DEBUG 06-24 20:31:41 [manager.py:391] +ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:214.7073745727539ms total_cost_time:214.7500514984131ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12634 prompt_cache_len:5151 prompt_cache_ratio:0.4077093557068229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 +DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:41 [batch.py:51] router release req id 8 +INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10736441612243652 s +INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.10914349555969238 s +DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=257722918136644318243733046937363596137, time:1750768301.8586848s req_ids:[8] +DEBUG 06-24 20:31:41 [manager.py:391] +ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:215.4226303100586ms total_cost_time:215.46363830566406ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:12635 prompt_cache_len:5151 prompt_cache_ratio:0.4076770874554808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 +DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:42 [batch.py:51] router release req id 8 +INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.31010913848876953 s +INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.3122293949127197 s +DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=171882036930096393939942073787469503324, time:1750768302.2937193s req_ids:[8] +DEBUG 06-24 20:31:42 [manager.py:391] +DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:432.2371482849121ms total_cost_time:432.2810173034668ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12636 prompt_cache_len:5151 prompt_cache_ratio:0.40764482431149096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 +DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:42 [batch.py:51] router release req id 8 +INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.10843443870544434 s +INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.11066246032714844 s +DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=248272909618591885139621258374474596962, time:1750768302.5200408s req_ids:[8] +DEBUG 06-24 20:31:42 [manager.py:391] +ERROR 06-24 20:31:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 first_token_cost:214.60914611816406ms total_cost_time:214.65134620666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12637 prompt_cache_len:5151 prompt_cache_ratio:0.4076125662736409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 +DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:42 [batch.py:51] router release req id 8 +INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.10857224464416504 s +INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.11045145988464355 s +DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=330430758385035665620981689425030000775, time:1750768302.7392192s req_ids:[8] +DEBUG 06-24 20:31:42 [manager.py:391] +ERROR 06-24 20:31:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 first_token_cost:214.79487419128418ms total_cost_time:214.83850479125977ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12638 prompt_cache_len:5151 prompt_cache_ratio:0.4075803133407185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 +DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:42 [batch.py:51] router release req id 8 +INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.1080935001373291 s +INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.11078071594238281 s +DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=336640953550511299264891062090898515991, time:1750768302.9625185s req_ids:[8] +DEBUG 06-24 20:31:42 [manager.py:391] +ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 first_token_cost:216.34769439697266ms total_cost_time:216.39156341552734ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12639 prompt_cache_len:5151 prompt_cache_ratio:0.407548065511512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 +DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:43 [batch.py:51] router release req id 8 +INFO 06-24 20:31:43 [manager.py:224] router recive req id 8 cost time 0.10795259475708008 s +INFO 06-24 20:31:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100010871887207 s +DEBUG 06-24 20:31:43 [manager.py:391] Prefill Batch: batch_id=230705201634629089095159724397706240062, time:1750768303.1857138s req_ids:[8] +DEBUG 06-24 20:31:43 [manager.py:391] +ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:210.13569831848145ms total_cost_time:210.18075942993164ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12640 prompt_cache_len:5151 prompt_cache_ratio:0.40751582278481013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 +DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:43 [batch.py:51] router release req id 8 +INFO 06-24 20:31:43 [manager.py:224] router recive req id 8 cost time 0.10761380195617676 s +INFO 06-24 20:31:43 [manager.py:68] detokenization recv req id 8 cost time 0.10983586311340332 s +DEBUG 06-24 20:31:43 [manager.py:391] Prefill Batch: batch_id=334315985789690237470355374025820469899, time:1750768303.4103446s req_ids:[8] +DEBUG 06-24 20:31:43 [manager.py:391] +ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:387.9108428955078ms total_cost_time:387.97712326049805ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:12641 prompt_cache_len:5151 prompt_cache_ratio:0.407483585159402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 +DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:43 [batch.py:51] router release req id 8 +INFO 06-24 20:31:43 [manager.py:224] router recive req id 8 cost time 0.10854125022888184 s +INFO 06-24 20:31:43 [manager.py:68] detokenization recv req id 8 cost time 0.11081242561340332 s +DEBUG 06-24 20:31:43 [manager.py:391] Prefill Batch: batch_id=206346782021594085184786600562290783007, time:1750768303.7976635s req_ids:[8] +DEBUG 06-24 20:31:43 [manager.py:391] +ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:218.96004676818848ms total_cost_time:219.00367736816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12642 prompt_cache_len:5151 prompt_cache_ratio:0.4074513526340769 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 +DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:43 [batch.py:51] router release req id 8 +INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10906219482421875 s +INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.11114883422851562 s +DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=107375983674889816447451499802973515993, time:1750768304.0184083s req_ids:[8] +DEBUG 06-24 20:31:44 [manager.py:391] +ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:213.40417861938477ms total_cost_time:213.44852447509766ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12643 prompt_cache_len:5151 prompt_cache_ratio:0.4074191252076248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 +DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:44 [batch.py:51] router release req id 8 +INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10764312744140625 s +INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.10973286628723145 s +DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=114647246359924551708550263009200473238, time:1750768304.238656s req_ids:[8] +DEBUG 06-24 20:31:44 [manager.py:391] +ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:208.96363258361816ms total_cost_time:209.00774002075195ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12644 prompt_cache_len:5151 prompt_cache_ratio:0.40738690287883583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 +DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:44 [batch.py:51] router release req id 8 +INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10766315460205078 s +INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.1097269058227539 s +DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=201920586067294815178528743635256730166, time:1750768304.4562602s req_ids:[8] +DEBUG 06-24 20:31:44 [manager.py:391] +ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:212.91708946228027ms total_cost_time:212.99052238464355ms,out_token_counter:1 mean_per_token_cost_time: 0.07343292236328125ms prompt_token_num:12645 prompt_cache_len:5151 prompt_cache_ratio:0.40735468564650057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 +DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:44 [batch.py:51] router release req id 8 +INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10894894599914551 s +INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.11088824272155762 s +DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=102036719866630716199268313169755550851, time:1750768304.6750374s req_ids:[8] +DEBUG 06-24 20:31:44 [manager.py:391] +ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:216.86148643493652ms total_cost_time:216.904878616333ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12646 prompt_cache_len:5151 prompt_cache_ratio:0.4073224735094101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 +DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:44 [batch.py:51] router release req id 8 +INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.1087334156036377 s +INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.11079001426696777 s +DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=237289973530030445464194296609661123651, time:1750768304.8974686s req_ids:[8] +DEBUG 06-24 20:31:44 [manager.py:391] +ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:397.2899913787842ms total_cost_time:397.3357677459717ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12647 prompt_cache_len:5151 prompt_cache_ratio:0.40729026646635563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 +DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:45 [batch.py:51] router release req id 8 +INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10901689529418945 s +INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.11100959777832031 s +DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=225514930938261948423374102781879818224, time:1750768305.3026752s req_ids:[8] +DEBUG 06-24 20:31:45 [manager.py:391] +ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:216.22705459594727ms total_cost_time:216.27020835876465ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12648 prompt_cache_len:5151 prompt_cache_ratio:0.40725806451612906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 +DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:45 [batch.py:51] router release req id 8 +INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10810184478759766 s +INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.1105349063873291 s +DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=203140632764145214237619762604360214064, time:1750768305.525989s req_ids:[8] +DEBUG 06-24 20:31:45 [manager.py:391] +ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:212.02850341796875ms total_cost_time:212.07213401794434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12649 prompt_cache_len:5151 prompt_cache_ratio:0.40722586765752233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 +DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:45 [batch.py:51] router release req id 8 +INFO 06-24 20:31:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s +INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.11053109169006348 s +DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=113474873977158108725341840617779561671, time:1750768305.7442186s req_ids:[8] +DEBUG 06-24 20:31:45 [manager.py:391] +ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:213.96183967590332ms total_cost_time:214.0064239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12650 prompt_cache_len:5151 prompt_cache_ratio:0.4071936758893281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 +DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:45 [batch.py:51] router release req id 8 +INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10895609855651855 s +INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.11105108261108398 s +DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=34859635389651686775303458706713338653, time:1750768305.9623508s req_ids:[8] +DEBUG 06-24 20:31:45 [manager.py:391] +ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:207.82017707824707ms total_cost_time:207.84783363342285ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:12651 prompt_cache_len:5151 prompt_cache_ratio:0.4071614892103391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 +DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:46 [batch.py:51] router release req id 8 +INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.10808396339416504 s +INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.10938215255737305 s +DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=79184847473269647403676145899067018123, time:1750768306.175273s req_ids:[8] +DEBUG 06-24 20:31:46 [manager.py:391] +ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:217.04769134521484ms total_cost_time:217.08989143371582ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12652 prompt_cache_len:5151 prompt_cache_ratio:0.40712930761934873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 +DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:46 [batch.py:51] router release req id 8 +INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.1085512638092041 s +INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.10960006713867188 s +DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=288670148886631215790036199712377442612, time:1750768306.3961313s req_ids:[8] +DEBUG 06-24 20:31:46 [manager.py:391] +ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:168.40815544128418ms total_cost_time:168.45107078552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12653 prompt_cache_len:5151 prompt_cache_ratio:0.40709713111515056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 +DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:46 [batch.py:51] router release req id 8 +INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.10757184028625488 s +INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.10944032669067383 s +DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=222792922835404461972003460922181995091, time:1750768306.5729573s req_ids:[8] +DEBUG 06-24 20:31:46 [manager.py:391] +ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:31:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 50019.223 tokens/s +DEBUG 06-24 20:31:46 [stats.py:37] Avg prompt tokens throughput: 50011.207 tokens/s +DEBUG 06-24 20:31:46 [stats.py:37] Avg generate tokens throughput: 8.016 tokens/s +INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:386.08407974243164ms total_cost_time:386.1274719238281ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12654 prompt_cache_len:5151 prompt_cache_ratio:0.40706495969653866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 +DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:46 [batch.py:51] router release req id 8 +INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.10803937911987305 s +INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.11021256446838379 s +DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=241427336653067322443084062843377361305, time:1750768306.9655347s req_ids:[8] +DEBUG 06-24 20:31:46 [manager.py:391] +ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:216.25542640686035ms total_cost_time:216.29810333251953ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12655 prompt_cache_len:5151 prompt_cache_ratio:0.4070327933623074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 +INFO 06-24 20:31:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:47 [batch.py:51] router release req id 8 +INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10820508003234863 s +INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.1100006103515625 s +DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=15052656183451195163903035685505686307, time:1750768307.1866841s req_ids:[8] +DEBUG 06-24 20:31:47 [manager.py:391] +ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:171.4019775390625ms total_cost_time:171.4463233947754ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12656 prompt_cache_len:5151 prompt_cache_ratio:0.4070006321112516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 +DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:47 [batch.py:51] router release req id 8 +INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10941553115844727 s +INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.1114494800567627 s +DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=175385716349436320833296663404865624103, time:1750768307.3646085s req_ids:[8] +DEBUG 06-24 20:31:47 [manager.py:391] +ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:204.31900024414062ms total_cost_time:204.3454647064209ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:12657 prompt_cache_len:5151 prompt_cache_ratio:0.4069684759421664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 +DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:47 [batch.py:51] router release req id 8 +INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10741281509399414 s +INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.10927486419677734 s +DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=189011367133681998259886823417022571699, time:1750768307.5761268s req_ids:[8] +DEBUG 06-24 20:31:47 [manager.py:391] +ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:211.54165267944336ms total_cost_time:211.57050132751465ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:12658 prompt_cache_len:5151 prompt_cache_ratio:0.40693632485384734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 +DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:47 [batch.py:51] router release req id 8 +INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10930466651916504 s +INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.11123132705688477 s +DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=103805325351368421629367651384329074511, time:1750768307.7913852s req_ids:[8] +DEBUG 06-24 20:31:47 [manager.py:391] +ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:215.61050415039062ms total_cost_time:215.63124656677246ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12659 prompt_cache_len:5151 prompt_cache_ratio:0.40690417884509045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 +DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:47 [batch.py:51] router release req id 8 +INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10879802703857422 s +INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.11072254180908203 s +DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=211933906126234192575776803144193416354, time:1750768308.0121512s req_ids:[8] +DEBUG 06-24 20:31:48 [manager.py:391] +ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:211.991548538208ms total_cost_time:212.01324462890625ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12660 prompt_cache_len:5151 prompt_cache_ratio:0.40687203791469195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 +DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:48 [batch.py:51] router release req id 8 +INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10790467262268066 s +INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.10986804962158203 s +DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=24670051913982760462006229758086957340, time:1750768308.232683s req_ids:[8] +DEBUG 06-24 20:31:48 [manager.py:391] +DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:381.38699531555176ms total_cost_time:381.43348693847656ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12661 prompt_cache_len:5151 prompt_cache_ratio:0.40683990206144854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 +DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:48 [batch.py:51] router release req id 8 +INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10827994346618652 s +INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.11026358604431152 s +DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=126083402449422554401665426936836769630, time:1750768308.6183367s req_ids:[8] +DEBUG 06-24 20:31:48 [manager.py:391] +ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:213.0563259124756ms total_cost_time:213.10043334960938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12662 prompt_cache_len:5151 prompt_cache_ratio:0.4068077712841573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 +DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:48 [batch.py:51] router release req id 8 +INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10796499252319336 s +INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.11012840270996094 s +DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=338761029901149407147707023323693528291, time:1750768308.8387291s req_ids:[8] +DEBUG 06-24 20:31:48 [manager.py:391] +ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:213.40012550354004ms total_cost_time:213.44494819641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12663 prompt_cache_len:5151 prompt_cache_ratio:0.4067756455816157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 +DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:48 [batch.py:51] router release req id 8 +INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10886764526367188 s +INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.11089253425598145 s +DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=179643077432267788246854826850319669097, time:1750768309.0572512s req_ids:[8] +DEBUG 06-24 20:31:49 [manager.py:391] +ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:214.84708786010742ms total_cost_time:214.8904800415039ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12664 prompt_cache_len:5151 prompt_cache_ratio:0.4067435249526216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 +DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:49 [batch.py:51] router release req id 8 +INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10624527931213379 s +INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.10817766189575195 s +DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=151060297139733678092011687537845316169, time:1750768309.2781818s req_ids:[8] +DEBUG 06-24 20:31:49 [manager.py:391] +ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:215.67964553833008ms total_cost_time:215.72303771972656ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12665 prompt_cache_len:5151 prompt_cache_ratio:0.40671140939597317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 +DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:49 [batch.py:51] router release req id 8 +INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10843420028686523 s +INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.11045432090759277 s +DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=223412114021006797894461846542853570042, time:1750768309.5000458s req_ids:[8] +DEBUG 06-24 20:31:49 [manager.py:391] +ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:214.29729461669922ms total_cost_time:214.34426307678223ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12666 prompt_cache_len:5151 prompt_cache_ratio:0.40667929891046894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 +DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:49 [batch.py:51] router release req id 8 +INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10710525512695312 s +INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.10902166366577148 s +DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=247148326681128066113319362166926025056, time:1750768309.722744s req_ids:[8] +DEBUG 06-24 20:31:49 [manager.py:391] +ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:347.5487232208252ms total_cost_time:347.5940227508545ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12667 prompt_cache_len:5151 prompt_cache_ratio:0.406647193494908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 +DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:49 [batch.py:51] router release req id 8 +INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10783839225769043 s +INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.10988974571228027 s +DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=103032247160829965697417290584232196604, time:1750768310.0752413s req_ids:[8] +DEBUG 06-24 20:31:50 [manager.py:391] +ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:210.51788330078125ms total_cost_time:210.56294441223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12668 prompt_cache_len:5151 prompt_cache_ratio:0.4066150931480897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 +DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:50 [batch.py:51] router release req id 8 +INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10957884788513184 s +INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.11151528358459473 s +DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=154016029002090736858651509580717042448, time:1750768310.2934964s req_ids:[8] +DEBUG 06-24 20:31:50 [manager.py:391] +ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:210.1879119873047ms total_cost_time:210.27469635009766ms,out_token_counter:1 mean_per_token_cost_time: 0.08678436279296875ms prompt_token_num:12669 prompt_cache_len:5151 prompt_cache_ratio:0.40658299786881363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 +DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:50 [batch.py:51] router release req id 8 +INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10894203186035156 s +INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.11097598075866699 s +DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=294052799059985612734264527520075482852, time:1750768310.5149634s req_ids:[8] +DEBUG 06-24 20:31:50 [manager.py:391] +ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:214.67113494873047ms total_cost_time:214.71381187438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12670 prompt_cache_len:5151 prompt_cache_ratio:0.40655090765588003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 +DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:50 [batch.py:51] router release req id 8 +INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10744619369506836 s +INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.1094353199005127 s +DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=261407527087926550242535254081976893256, time:1750768310.7337236s req_ids:[8] +DEBUG 06-24 20:31:50 [manager.py:391] +ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:213.35315704345703ms total_cost_time:213.39893341064453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12671 prompt_cache_len:5151 prompt_cache_ratio:0.40651882250808935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 +DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:50 [batch.py:51] router release req id 8 +INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10858869552612305 s +INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.11052417755126953 s +DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=314205030846017005052871133587295736800, time:1750768310.954373s req_ids:[8] +DEBUG 06-24 20:31:50 [manager.py:391] +ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:214.86902236938477ms total_cost_time:214.91241455078125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12672 prompt_cache_len:5151 prompt_cache_ratio:0.40648674242424243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 +DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:51 [batch.py:51] router release req id 8 +INFO 06-24 20:31:51 [manager.py:224] router recive req id 8 cost time 0.10738229751586914 s +INFO 06-24 20:31:51 [manager.py:68] detokenization recv req id 8 cost time 0.10934972763061523 s +DEBUG 06-24 20:31:51 [manager.py:391] Prefill Batch: batch_id=157931708247133893830656865215721452517, time:1750768311.1769524s req_ids:[8] +DEBUG 06-24 20:31:51 [manager.py:391] +ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:382.5359344482422ms total_cost_time:382.5807571411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12673 prompt_cache_len:5151 prompt_cache_ratio:0.4064546674031405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 +DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:51 [batch.py:51] router release req id 8 +INFO 06-24 20:31:51 [manager.py:224] router recive req id 8 cost time 0.1079092025756836 s +INFO 06-24 20:31:51 [manager.py:68] detokenization recv req id 8 cost time 0.10990381240844727 s +DEBUG 06-24 20:31:51 [manager.py:391] Prefill Batch: batch_id=167077517496578843993007422321866780804, time:1750768311.5629737s req_ids:[8] +DEBUG 06-24 20:31:51 [manager.py:391] +ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:210.04438400268555ms total_cost_time:210.08992195129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12674 prompt_cache_len:5151 prompt_cache_ratio:0.4064225974435853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 +DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:51 [batch.py:51] router release req id 8 +INFO 06-24 20:31:51 [manager.py:224] router recive req id 8 cost time 0.10710835456848145 s +INFO 06-24 20:31:51 [manager.py:68] detokenization recv req id 8 cost time 0.10903143882751465 s +DEBUG 06-24 20:31:51 [manager.py:391] Prefill Batch: batch_id=295740367576957540609261981152313585471, time:1750768311.7938097s req_ids:[8] +DEBUG 06-24 20:31:51 [manager.py:391] +ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:228.9111614227295ms total_cost_time:228.95479202270508ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12675 prompt_cache_len:5151 prompt_cache_ratio:0.4063905325443787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 +DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:51 [batch.py:51] router release req id 8 +INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10876965522766113 s +INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.11072874069213867 s +DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=298061572569535717755348666789268831610, time:1750768312.0154076s req_ids:[8] +DEBUG 06-24 20:31:52 [manager.py:391] +ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:212.09955215454102ms total_cost_time:212.144136428833ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12676 prompt_cache_len:5151 prompt_cache_ratio:0.4063584727043231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 +DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:52 [batch.py:51] router release req id 8 +INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10800957679748535 s +INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.11001944541931152 s +DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=136602193909651840998475188496251938787, time:1750768312.234483s req_ids:[8] +DEBUG 06-24 20:31:52 [manager.py:391] +ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:214.76435661315918ms total_cost_time:214.80894088745117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12677 prompt_cache_len:5151 prompt_cache_ratio:0.40632641792222135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 +DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:52 [batch.py:51] router release req id 8 +INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10886740684509277 s +INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.11098861694335938 s +DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=141251011661322143949347651505877564268, time:1750768312.457678s req_ids:[8] +DEBUG 06-24 20:31:52 [manager.py:391] +ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:208.33468437194824ms total_cost_time:208.37903022766113ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12678 prompt_cache_len:5151 prompt_cache_ratio:0.4062943681968765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 +DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:52 [batch.py:51] router release req id 8 +INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10744690895080566 s +INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.10865044593811035 s +DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=336715324598648123939928572162448288918, time:1750768312.6725504s req_ids:[8] +DEBUG 06-24 20:31:52 [manager.py:391] +ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:173.01654815673828ms total_cost_time:173.05898666381836ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12679 prompt_cache_len:5151 prompt_cache_ratio:0.406262323527092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 +DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:52 [batch.py:51] router release req id 8 +INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10798311233520508 s +INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.10997819900512695 s +DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=182147693893813811509103063486495442975, time:1750768312.8503416s req_ids:[8] +DEBUG 06-24 20:31:52 [manager.py:391] +ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:371.82164192199707ms total_cost_time:371.86670303344727ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12680 prompt_cache_len:5151 prompt_cache_ratio:0.40623028391167193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 +DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:53 [batch.py:51] router release req id 8 +INFO 06-24 20:31:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:31:53 [statics_utils.py:24] mean first cost: 229.8075781236348 ms +INFO 06-24 20:31:53 [statics_utils.py:24] mean per token cost: 0.061028083524552496 ms +INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.10729265213012695 s +INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.10924053192138672 s +DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=100533128434540306893089057140164652921, time:1750768313.228158s req_ids:[8] +DEBUG 06-24 20:31:53 [manager.py:391] +ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:212.59117126464844ms total_cost_time:212.63432502746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12681 prompt_cache_len:5151 prompt_cache_ratio:0.4061982493494204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 +DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:53 [batch.py:51] router release req id 8 +INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.10828590393066406 s +INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.11043643951416016 s +DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=113012843392126164701435233802788079741, time:1750768313.4473777s req_ids:[8] +DEBUG 06-24 20:31:53 [manager.py:391] +ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:212.3394012451172ms total_cost_time:212.38398551940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12682 prompt_cache_len:5151 prompt_cache_ratio:0.4061662198391421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 +DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:53 [batch.py:51] router release req id 8 +INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.10792946815490723 s +INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.10992121696472168 s +DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=187476524882524153908230745172328359621, time:1750768313.666465s req_ids:[8] +DEBUG 06-24 20:31:53 [manager.py:391] +ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:214.44272994995117ms total_cost_time:214.48779106140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12683 prompt_cache_len:5151 prompt_cache_ratio:0.406134195379642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 +DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:53 [batch.py:51] router release req id 8 +INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.1084144115447998 s +INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.11034107208251953 s +DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=194151084748129204874002022291785510731, time:1750768313.8838358s req_ids:[8] +DEBUG 06-24 20:31:53 [manager.py:391] +ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:216.7351245880127ms total_cost_time:216.77899360656738ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12684 prompt_cache_len:5151 prompt_cache_ratio:0.40610217596972564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 +DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:54 [batch.py:51] router release req id 8 +INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.10815763473510742 s +INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012482643127441 s +DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=231502852230472474379554727890179288947, time:1750768314.1058154s req_ids:[8] +DEBUG 06-24 20:31:54 [manager.py:391] +ERROR 06-24 20:31:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:214.7071361541748ms total_cost_time:214.7519588470459ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12685 prompt_cache_len:5151 prompt_cache_ratio:0.40607016160819864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 +DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:54 [batch.py:51] router release req id 8 +INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.10867619514465332 s +INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11065554618835449 s +DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=337640953838531791581590691259983693978, time:1750768314.3249s req_ids:[8] +DEBUG 06-24 20:31:54 [manager.py:391] +DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:31:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 first_token_cost:387.33482360839844ms total_cost_time:387.3770236968994ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12686 prompt_cache_len:5151 prompt_cache_ratio:0.40603815229386725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 +DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:54 [batch.py:51] router release req id 8 +INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.1087198257446289 s +INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11047959327697754 s +DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=225979910358022397298435134781090866206, time:1750768314.7197163s req_ids:[8] +DEBUG 06-24 20:31:54 [manager.py:391] +ERROR 06-24 20:31:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 first_token_cost:213.49549293518066ms total_cost_time:213.54007720947266ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12687 prompt_cache_len:5151 prompt_cache_ratio:0.40600614802553797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 +DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:54 [batch.py:51] router release req id 8 +INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.10914015769958496 s +INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11112761497497559 s +DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=236939109757381976148698086753640529674, time:1750768314.941101s req_ids:[8] +DEBUG 06-24 20:31:54 [manager.py:391] +ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 first_token_cost:213.96422386169434ms total_cost_time:214.00928497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12688 prompt_cache_len:5151 prompt_cache_ratio:0.40597414880201765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 +DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:55 [batch.py:51] router release req id 8 +INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.1079399585723877 s +INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.10996723175048828 s +DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=213850440299692461833806423811240665997, time:1750768315.1596014s req_ids:[8] +DEBUG 06-24 20:31:55 [manager.py:391] +ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:214.60819244384766ms total_cost_time:214.65277671813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12689 prompt_cache_len:5151 prompt_cache_ratio:0.40594215462211364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 +DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:55 [batch.py:51] router release req id 8 +INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.10898303985595703 s +INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.11091184616088867 s +DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=134765064418347960226781513563734415505, time:1750768315.3802814s req_ids:[8] +DEBUG 06-24 20:31:55 [manager.py:391] +ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:207.16023445129395ms total_cost_time:207.22222328186035ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12690 prompt_cache_len:5151 prompt_cache_ratio:0.40591016548463354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 +DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:55 [batch.py:51] router release req id 8 +INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.10769867897033691 s +INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.10967016220092773 s +DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=200159742716216395068022680282094185185, time:1750768315.5951662s req_ids:[8] +DEBUG 06-24 20:31:55 [manager.py:391] +ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:214.72716331481934ms total_cost_time:214.74814414978027ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12691 prompt_cache_len:5151 prompt_cache_ratio:0.4058781813883855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 +DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:55 [batch.py:51] router release req id 8 +INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.10953640937805176 s +INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.11148357391357422 s +DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=36977750861533174894416739268900112294, time:1750768315.8162622s req_ids:[8] +DEBUG 06-24 20:31:55 [manager.py:391] +ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:216.8738842010498ms total_cost_time:216.9182300567627ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12692 prompt_cache_len:5151 prompt_cache_ratio:0.4058462023321778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 +DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:55 [batch.py:51] router release req id 8 +INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.1089329719543457 s +INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.11046433448791504 s +DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=320355215567574202974514810384319495552, time:1750768316.0366042s req_ids:[8] +DEBUG 06-24 20:31:56 [manager.py:391] +ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:211.99369430541992ms total_cost_time:212.0356559753418ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12693 prompt_cache_len:5151 prompt_cache_ratio:0.40581422831481917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 +DEBUG 06-24 20:31:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:56 [batch.py:51] router release req id 8 +INFO 06-24 20:31:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.1080942153930664 s +INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.11009907722473145 s +DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=86927203244066313732480696205449161596, time:1750768316.25664s req_ids:[8] +DEBUG 06-24 20:31:56 [manager.py:391] +ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:389.25766944885254ms total_cost_time:389.301061630249ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12694 prompt_cache_len:5151 prompt_cache_ratio:0.40578225933511897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 +DEBUG 06-24 20:31:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:56 [batch.py:51] router release req id 8 +INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.10785484313964844 s +INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.10982513427734375 s +DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=250796340074148087698914176544532451194, time:1750768316.6511972s req_ids:[8] +DEBUG 06-24 20:31:56 [manager.py:391] +ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:216.7665958404541ms total_cost_time:216.82167053222656ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:12695 prompt_cache_len:5151 prompt_cache_ratio:0.40575029539188656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 +DEBUG 06-24 20:31:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:56 [batch.py:51] router release req id 8 +INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.11043453216552734 s +INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.11251473426818848 s +DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=251084081853436642648179789449691400558, time:1750768316.8758671s req_ids:[8] +DEBUG 06-24 20:31:56 [manager.py:391] +DEBUG 06-24 20:31:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 53055.943 tokens/s +DEBUG 06-24 20:31:56 [stats.py:37] Avg prompt tokens throughput: 53047.673 tokens/s +DEBUG 06-24 20:31:56 [stats.py:37] Avg generate tokens throughput: 8.270 tokens/s +ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:215.82436561584473ms total_cost_time:215.8679962158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12696 prompt_cache_len:5151 prompt_cache_ratio:0.40571833648393196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 +DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:57 [batch.py:51] router release req id 8 +INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10769319534301758 s +INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.10972714424133301 s +DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=195134987161558625318607420898782625239, time:1750768317.0956604s req_ids:[8] +DEBUG 06-24 20:31:57 [manager.py:391] +ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:213.348388671875ms total_cost_time:213.3936882019043ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12697 prompt_cache_len:5151 prompt_cache_ratio:0.40568638261006534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 +DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:57 [batch.py:51] router release req id 8 +INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10779118537902832 s +INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.10969185829162598 s +DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=229315824314945755078829511737606730224, time:1750768317.3148115s req_ids:[8] +DEBUG 06-24 20:31:57 [manager.py:391] +ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:212.40901947021484ms total_cost_time:212.46767044067383ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12698 prompt_cache_len:5151 prompt_cache_ratio:0.4056544337690975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 +DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:57 [batch.py:51] router release req id 8 +INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10948705673217773 s +INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.11167073249816895 s +DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=280036031425414493869496818798353662600, time:1750768317.5381594s req_ids:[8] +DEBUG 06-24 20:31:57 [manager.py:391] +ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:216.43805503845215ms total_cost_time:216.4902687072754ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:12699 prompt_cache_len:5151 prompt_cache_ratio:0.40562248995983935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 +DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:57 [batch.py:51] router release req id 8 +INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.1077885627746582 s +INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098167896270752 s +DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=297255116071985496714719757389536534677, time:1750768317.7591586s req_ids:[8] +DEBUG 06-24 20:31:57 [manager.py:391] +ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:214.86496925354004ms total_cost_time:214.9221897125244ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:12700 prompt_cache_len:5151 prompt_cache_ratio:0.4055905511811024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 +DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:57 [batch.py:51] router release req id 8 +INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10833477973937988 s +INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.11027908325195312 s +DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=176513291546951222860882989056596064022, time:1750768317.9795287s req_ids:[8] +DEBUG 06-24 20:31:57 [manager.py:391] +ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:386.7781162261963ms total_cost_time:386.80028915405273ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12701 prompt_cache_len:5151 prompt_cache_ratio:0.4055586174316983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 +DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:58 [batch.py:51] router release req id 8 +INFO 06-24 20:31:58 [manager.py:224] router recive req id 8 cost time 0.10522961616516113 s +INFO 06-24 20:31:58 [manager.py:68] detokenization recv req id 8 cost time 0.10681509971618652 s +DEBUG 06-24 20:31:58 [manager.py:391] Prefill Batch: batch_id=211785063235053528479761411671688898845, time:1750768318.3747344s req_ids:[8] +DEBUG 06-24 20:31:58 [manager.py:391] +ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:213.32573890686035ms total_cost_time:213.36960792541504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12702 prompt_cache_len:5151 prompt_cache_ratio:0.4055266887104393 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 +DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:58 [batch.py:51] router release req id 8 +INFO 06-24 20:31:58 [manager.py:224] router recive req id 8 cost time 0.10815238952636719 s +INFO 06-24 20:31:58 [manager.py:68] detokenization recv req id 8 cost time 0.11023664474487305 s +DEBUG 06-24 20:31:58 [manager.py:391] Prefill Batch: batch_id=33870205628208237864658473636951548574, time:1750768318.5912044s req_ids:[8] +DEBUG 06-24 20:31:58 [manager.py:391] +ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:211.20381355285645ms total_cost_time:211.24744415283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12703 prompt_cache_len:5151 prompt_cache_ratio:0.4054947650161379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 +DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:58 [batch.py:51] router release req id 8 +INFO 06-24 20:31:58 [manager.py:224] router recive req id 8 cost time 0.10725784301757812 s +INFO 06-24 20:31:58 [manager.py:68] detokenization recv req id 8 cost time 0.10927462577819824 s +DEBUG 06-24 20:31:58 [manager.py:391] Prefill Batch: batch_id=189804254040746653902469843935530309801, time:1750768318.8098285s req_ids:[8] +DEBUG 06-24 20:31:58 [manager.py:391] +ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:213.32144737243652ms total_cost_time:213.36603164672852ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12704 prompt_cache_len:5151 prompt_cache_ratio:0.40546284634760704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 +DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:58 [batch.py:51] router release req id 8 +INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.10828542709350586 s +INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.11020994186401367 s +DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=37839808140394174915566136243964434851, time:1750768319.0312932s req_ids:[8] +DEBUG 06-24 20:31:59 [manager.py:391] +ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:209.51128005981445ms total_cost_time:209.55514907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12705 prompt_cache_len:5151 prompt_cache_ratio:0.40543093270366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 +DEBUG 06-24 20:31:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:59 [batch.py:51] router release req id 8 +INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.10865211486816406 s +INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.11063861846923828 s +DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=80260118147687216734211625097801592920, time:1750768319.247347s req_ids:[8] +DEBUG 06-24 20:31:59 [manager.py:391] +ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:210.13259887695312ms total_cost_time:210.17742156982422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12706 prompt_cache_len:5151 prompt_cache_ratio:0.40539902408311035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 +DEBUG 06-24 20:31:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:59 [batch.py:51] router release req id 8 +INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.10873723030090332 s +INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.11071014404296875 s +DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=263420373889561741424069758297888689747, time:1750768319.4639723s req_ids:[8] +DEBUG 06-24 20:31:59 [manager.py:391] +ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:213.23752403259277ms total_cost_time:213.28282356262207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12707 prompt_cache_len:5151 prompt_cache_ratio:0.4053671204847722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 +DEBUG 06-24 20:31:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:31:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:31:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:31:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:31:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:31:59 [batch.py:51] router release req id 8 +INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.31235194206237793 s +INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.3143332004547119 s +DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=102555876953742025316289845346229363665, time:1750768319.8943024s req_ids:[8] +DEBUG 06-24 20:31:59 [manager.py:391] +ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:421.8618869781494ms total_cost_time:421.9026565551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:12708 prompt_cache_len:5151 prompt_cache_ratio:0.40533522190745985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 +DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:00 [batch.py:51] router release req id 8 +INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10731887817382812 s +INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.1092674732208252 s +DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=192315838924288419967727211220898720620, time:1750768320.1125433s req_ids:[8] +DEBUG 06-24 20:32:00 [manager.py:391] +ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:214.74194526672363ms total_cost_time:214.78533744812012ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12709 prompt_cache_len:5151 prompt_cache_ratio:0.4053033283499882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 +DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:00 [batch.py:51] router release req id 8 +INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10936832427978516 s +INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.11160874366760254 s +DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=71582635394706380470695983551639098254, time:1750768320.334193s req_ids:[8] +DEBUG 06-24 20:32:00 [manager.py:391] +ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:213.6518955230713ms total_cost_time:213.69385719299316ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12710 prompt_cache_len:5151 prompt_cache_ratio:0.4052714398111723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 +DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:00 [batch.py:51] router release req id 8 +INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10767269134521484 s +INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.10987210273742676 s +DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=71884242513739346849616461547824643154, time:1750768320.5507705s req_ids:[8] +DEBUG 06-24 20:32:00 [manager.py:391] +ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:211.4260196685791ms total_cost_time:211.4696502685547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12711 prompt_cache_len:5151 prompt_cache_ratio:0.4052395562898277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 +DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:00 [batch.py:51] router release req id 8 +INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10859155654907227 s +INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.11079859733581543 s +DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=106185683891478414243840823376584271104, time:1750768320.7696738s req_ids:[8] +DEBUG 06-24 20:32:00 [manager.py:391] +ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:212.91875839233398ms total_cost_time:212.9647731781006ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12712 prompt_cache_len:5151 prompt_cache_ratio:0.4052076777847703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 +DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:00 [batch.py:51] router release req id 8 +INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10969042778015137 s +INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.11168146133422852 s +DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=237319618237049496061963558664816810396, time:1750768320.9891071s req_ids:[8] +DEBUG 06-24 20:32:00 [manager.py:391] +ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:211.87591552734375ms total_cost_time:211.92169189453125ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12713 prompt_cache_len:5151 prompt_cache_ratio:0.4051758042948163 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 +DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:01 [batch.py:51] router release req id 8 +INFO 06-24 20:32:01 [manager.py:224] router recive req id 8 cost time 0.3092350959777832 s +INFO 06-24 20:32:01 [manager.py:68] detokenization recv req id 8 cost time 0.3112211227416992 s +DEBUG 06-24 20:32:01 [manager.py:391] Prefill Batch: batch_id=80543863261359082067204434078819324570, time:1750768321.4195795s req_ids:[8] +DEBUG 06-24 20:32:01 [manager.py:391] +ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:432.58118629455566ms total_cost_time:432.62577056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12714 prompt_cache_len:5151 prompt_cache_ratio:0.40514393581878244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 +DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:01 [batch.py:51] router release req id 8 +INFO 06-24 20:32:01 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s +INFO 06-24 20:32:01 [manager.py:68] detokenization recv req id 8 cost time 0.11081790924072266 s +DEBUG 06-24 20:32:01 [manager.py:391] Prefill Batch: batch_id=338817078839101290897536857631327242476, time:1750768321.6452143s req_ids:[8] +DEBUG 06-24 20:32:01 [manager.py:391] +ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:212.97121047973633ms total_cost_time:213.01698684692383ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12715 prompt_cache_len:5151 prompt_cache_ratio:0.40511207235548563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 +DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:01 [batch.py:51] router release req id 8 +INFO 06-24 20:32:01 [manager.py:224] router recive req id 8 cost time 0.10873794555664062 s +INFO 06-24 20:32:01 [manager.py:68] detokenization recv req id 8 cost time 0.11081647872924805 s +DEBUG 06-24 20:32:01 [manager.py:391] Prefill Batch: batch_id=321556144501425804111108793326476154995, time:1750768321.863915s req_ids:[8] +DEBUG 06-24 20:32:01 [manager.py:391] +ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:214.5373821258545ms total_cost_time:214.58196640014648ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12716 prompt_cache_len:5151 prompt_cache_ratio:0.4050802139037433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 +DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:01 [batch.py:51] router release req id 8 +INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.1080923080444336 s +INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.11017417907714844 s +DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=253853648231699859043871091763919089815, time:1750768322.0859714s req_ids:[8] +DEBUG 06-24 20:32:02 [manager.py:391] +ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:215.06118774414062ms total_cost_time:215.1041030883789ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12717 prompt_cache_len:5151 prompt_cache_ratio:0.4050483604623732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 +DEBUG 06-24 20:32:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:02 [batch.py:51] router release req id 8 +INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.10805702209472656 s +INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.11009955406188965 s +DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=63005054283085304125248241768603633889, time:1750768322.3060715s req_ids:[8] +DEBUG 06-24 20:32:02 [manager.py:391] +ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:211.11130714416504ms total_cost_time:211.15517616271973ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12718 prompt_cache_len:5151 prompt_cache_ratio:0.40501651203019345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 +DEBUG 06-24 20:32:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:02 [batch.py:51] router release req id 8 +INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.10851812362670898 s +INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.11061525344848633 s +DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=164518756011714732286909320767630977826, time:1750768322.5252562s req_ids:[8] +DEBUG 06-24 20:32:02 [manager.py:391] +ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:367.5415515899658ms total_cost_time:367.5854206085205ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12719 prompt_cache_len:5151 prompt_cache_ratio:0.4049846686060225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 +DEBUG 06-24 20:32:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:02 [batch.py:51] router release req id 8 +INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.10768413543701172 s +INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.10989499092102051 s +DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=24080165050801856306512161003319405163, time:1750768322.8972886s req_ids:[8] +DEBUG 06-24 20:32:02 [manager.py:391] +ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:205.6727409362793ms total_cost_time:205.71541786193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12720 prompt_cache_len:5151 prompt_cache_ratio:0.4049528301886792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 +DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:03 [batch.py:51] router release req id 8 +INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10722994804382324 s +INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.10916376113891602 s +DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=96672564606290184475236175225142404779, time:1750768323.110316s req_ids:[8] +DEBUG 06-24 20:32:03 [manager.py:391] +ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:205.9018611907959ms total_cost_time:205.9457302093506ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12721 prompt_cache_len:5151 prompt_cache_ratio:0.40492099677698296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 +DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:03 [batch.py:51] router release req id 8 +INFO 06-24 20:32:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10873746871948242 s +INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s +DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=115159087678863655984229130903175440117, time:1750768323.3216183s req_ids:[8] +DEBUG 06-24 20:32:03 [manager.py:391] +ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:213.1955623626709ms total_cost_time:213.2406234741211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12722 prompt_cache_len:5151 prompt_cache_ratio:0.4048891683697532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 +DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:03 [batch.py:51] router release req id 8 +INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10675954818725586 s +INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.10866808891296387 s +DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=332868064180803166802322206321798698002, time:1750768323.566484s req_ids:[8] +DEBUG 06-24 20:32:03 [manager.py:391] +DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:228.6677360534668ms total_cost_time:228.7125587463379ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12723 prompt_cache_len:5151 prompt_cache_ratio:0.40485734496580994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 +DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:03 [batch.py:51] router release req id 8 +INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s +INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.1105351448059082 s +DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=39464432188160806102523052170722373423, time:1750768323.7764695s req_ids:[8] +DEBUG 06-24 20:32:03 [manager.py:391] +ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:205.09815216064453ms total_cost_time:205.12008666992188ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12724 prompt_cache_len:5151 prompt_cache_ratio:0.4048255265639736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 +DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:03 [batch.py:51] router release req id 8 +INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10851550102233887 s +INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s +DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=107478278279364262931773934796417484953, time:1750768323.988935s req_ids:[8] +DEBUG 06-24 20:32:03 [manager.py:391] +ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:375.22149085998535ms total_cost_time:375.26750564575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12725 prompt_cache_len:5151 prompt_cache_ratio:0.40479371316306484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 +DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:04 [batch.py:51] router release req id 8 +INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.1068720817565918 s +INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.1080625057220459 s +DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=308610629551917069323346340020657718255, time:1750768324.3689227s req_ids:[8] +DEBUG 06-24 20:32:04 [manager.py:391] +ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:213.67549896240234ms total_cost_time:213.6971950531006ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12726 prompt_cache_len:5151 prompt_cache_ratio:0.40476190476190477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 +DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:04 [batch.py:51] router release req id 8 +INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.10534310340881348 s +DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=166114505208902735107805869866774731630, time:1750768324.5803697s req_ids:[8] +DEBUG 06-24 20:32:04 [manager.py:391] +INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.10712122917175293 s +ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:180.96089363098145ms total_cost_time:181.00523948669434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12727 prompt_cache_len:5151 prompt_cache_ratio:0.40473010135931486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 +DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:04 [batch.py:51] router release req id 8 +INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.10731935501098633 s +INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.10818719863891602 s +DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=264095568520125204133397756748454241251, time:1750768324.7758415s req_ids:[8] +DEBUG 06-24 20:32:04 [manager.py:391] +ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:168.1971549987793ms total_cost_time:168.24102401733398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12728 prompt_cache_len:5151 prompt_cache_ratio:0.4046983029541169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 +DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:04 [batch.py:51] router release req id 8 +INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.10728240013122559 s +INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.10924959182739258 s +DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=123479965270926146145917621582577571309, time:1750768324.9508545s req_ids:[8] +DEBUG 06-24 20:32:04 [manager.py:391] +ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:200.3183364868164ms total_cost_time:200.3774642944336ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:12729 prompt_cache_len:5151 prompt_cache_ratio:0.40466650954513317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 +DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:05 [batch.py:51] router release req id 8 +INFO 06-24 20:32:05 [manager.py:224] router recive req id 8 cost time 0.10842347145080566 s +INFO 06-24 20:32:05 [manager.py:68] detokenization recv req id 8 cost time 0.1104273796081543 s +DEBUG 06-24 20:32:05 [manager.py:391] Prefill Batch: batch_id=84923760578756261262305992240040791713, time:1750768325.1592057s req_ids:[8] +DEBUG 06-24 20:32:05 [manager.py:391] +ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:211.22193336486816ms total_cost_time:211.2748622894287ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:12730 prompt_cache_len:5151 prompt_cache_ratio:0.40463472113118615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 +DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:05 [batch.py:51] router release req id 8 +INFO 06-24 20:32:05 [manager.py:224] router recive req id 8 cost time 0.10982513427734375 s +INFO 06-24 20:32:05 [manager.py:68] detokenization recv req id 8 cost time 0.11182022094726562 s +DEBUG 06-24 20:32:05 [manager.py:391] Prefill Batch: batch_id=98109035821706712392985663898202288790, time:1750768325.378078s req_ids:[8] +DEBUG 06-24 20:32:05 [manager.py:391] +ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:213.98472785949707ms total_cost_time:214.04719352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12731 prompt_cache_len:5151 prompt_cache_ratio:0.4046029377110989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 +DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:05 [batch.py:51] router release req id 8 +INFO 06-24 20:32:05 [manager.py:224] router recive req id 8 cost time 0.10725927352905273 s +INFO 06-24 20:32:05 [manager.py:68] detokenization recv req id 8 cost time 0.10911345481872559 s +DEBUG 06-24 20:32:05 [manager.py:391] Prefill Batch: batch_id=78719036996131217389117812523624455748, time:1750768325.6073685s req_ids:[8] +DEBUG 06-24 20:32:05 [manager.py:391] +ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:435.58406829833984ms total_cost_time:435.62793731689453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12732 prompt_cache_len:5151 prompt_cache_ratio:0.40457115928369464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 +DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:05 [batch.py:51] router release req id 8 +INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10787558555603027 s +INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.11005568504333496 s +DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=101455811768662956194702191245060189141, time:1750768326.0381331s req_ids:[8] +DEBUG 06-24 20:32:06 [manager.py:391] +ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:213.8967514038086ms total_cost_time:213.94062042236328ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12733 prompt_cache_len:5151 prompt_cache_ratio:0.4045393858477971 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 +DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:06 [batch.py:51] router release req id 8 +INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10832452774047852 s +INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.11032295227050781 s +DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=162744945041232176945866669177217809449, time:1750768326.2605393s req_ids:[8] +DEBUG 06-24 20:32:06 [manager.py:391] +ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:216.63522720336914ms total_cost_time:216.67742729187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12734 prompt_cache_len:5151 prompt_cache_ratio:0.40450761740223024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 +DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:06 [batch.py:51] router release req id 8 +INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10705804824829102 s +INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.10892462730407715 s +DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=224677008175086016413559102617827072802, time:1750768326.485499s req_ids:[8] +DEBUG 06-24 20:32:06 [manager.py:391] +ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:216.64977073669434ms total_cost_time:216.71366691589355ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:12735 prompt_cache_len:5151 prompt_cache_ratio:0.4044758539458186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 +DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:06 [batch.py:51] router release req id 8 +INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10782551765441895 s +INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.10978841781616211 s +DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=284946616773434406099854083244093990399, time:1750768326.7225735s req_ids:[8] +DEBUG 06-24 20:32:06 [manager.py:391] +ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:214.8592472076416ms total_cost_time:214.91551399230957ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:12736 prompt_cache_len:5151 prompt_cache_ratio:0.4044440954773869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 +DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:06 [batch.py:51] router release req id 8 +INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.11059951782226562 s +INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.11281824111938477 s +DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=279963320846153716470609459012527187729, time:1750768326.9312544s req_ids:[8] +DEBUG 06-24 20:32:06 [manager.py:391] +DEBUG 06-24 20:32:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 51860.005 tokens/s +DEBUG 06-24 20:32:06 [stats.py:37] Avg prompt tokens throughput: 51851.850 tokens/s +DEBUG 06-24 20:32:06 [stats.py:37] Avg generate tokens throughput: 8.155 tokens/s +ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:247.88904190063477ms total_cost_time:247.94912338256836ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12737 prompt_cache_len:5151 prompt_cache_ratio:0.40441234199576037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 +DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:07 [batch.py:51] router release req id 8 +INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.10750126838684082 s +INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.10938739776611328 s +DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=319864604173598892257647413798674028279, time:1750768327.1891727s req_ids:[8] +DEBUG 06-24 20:32:07 [manager.py:391] +ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:203.57871055603027ms total_cost_time:203.62544059753418ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12738 prompt_cache_len:5151 prompt_cache_ratio:0.40438059349976446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 +DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:07 [batch.py:51] router release req id 8 +INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.11082100868225098 s +INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.11287879943847656 s +DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=4303334175365590107116684646409786245, time:1750768327.3987477s req_ids:[8] +DEBUG 06-24 20:32:07 [manager.py:391] +ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:385.15186309814453ms total_cost_time:385.19906997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12739 prompt_cache_len:5151 prompt_cache_ratio:0.4043488499882251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 +DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:07 [batch.py:51] router release req id 8 +INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.10887622833251953 s +INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.11079549789428711 s +DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=96652630432232257193886835485865578059, time:1750768327.7845984s req_ids:[8] +DEBUG 06-24 20:32:07 [manager.py:391] +ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:195.98984718322754ms total_cost_time:196.03824615478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:12740 prompt_cache_len:5151 prompt_cache_ratio:0.4043171114599686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 +DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:07 [batch.py:51] router release req id 8 +INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.10849428176879883 s +INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s +DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=313080579098090644851413948042528788976, time:1750768327.9903781s req_ids:[8] +DEBUG 06-24 20:32:07 [manager.py:391] +ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:209.18011665344238ms total_cost_time:209.22541618347168ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12741 prompt_cache_len:5151 prompt_cache_ratio:0.4042853779138215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 +DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:08 [batch.py:51] router release req id 8 +INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.10911297798156738 s +INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.1112065315246582 s +DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=190986703000232247063522985211421691896, time:1750768328.2039244s req_ids:[8] +DEBUG 06-24 20:32:08 [manager.py:391] +DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:212.2969627380371ms total_cost_time:212.3415470123291ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12742 prompt_cache_len:5151 prompt_cache_ratio:0.4042536493486109 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 +DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:08 [batch.py:51] router release req id 8 +INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.10777568817138672 s +INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.10969328880310059 s +DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=171747021582067039416066221697440191013, time:1750768328.428756s req_ids:[8] +DEBUG 06-24 20:32:08 [manager.py:391] +ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:218.7035083770752ms total_cost_time:218.7516689300537ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:12743 prompt_cache_len:5151 prompt_cache_ratio:0.40422192576316407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 +DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:08 [batch.py:51] router release req id 8 +INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.1089937686920166 s +INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.11097979545593262 s +DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=273161866324616153003195574455995375117, time:1750768328.6477537s req_ids:[8] +DEBUG 06-24 20:32:08 [manager.py:391] +ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:210.676908493042ms total_cost_time:210.740327835083ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:12744 prompt_cache_len:5151 prompt_cache_ratio:0.4041902071563089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 +DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:08 [batch.py:51] router release req id 8 +INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.10848879814147949 s +INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.11054849624633789 s +DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=234654434928044360988751263455960078164, time:1750768328.8750122s req_ids:[8] +DEBUG 06-24 20:32:08 [manager.py:391] +ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:395.49946784973145ms total_cost_time:395.54429054260254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12745 prompt_cache_len:5151 prompt_cache_ratio:0.4041584935268733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 +DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:09 [batch.py:51] router release req id 8 +INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10923242568969727 s +INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.11075639724731445 s +DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=268571295668600539011956116668842712747, time:1750768329.2686017s req_ids:[8] +DEBUG 06-24 20:32:09 [manager.py:391] +ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:207.5967788696289ms total_cost_time:207.658052444458ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12746 prompt_cache_len:5151 prompt_cache_ratio:0.40412678487368586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 +DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:09 [batch.py:51] router release req id 8 +INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10855603218078613 s +INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.10967588424682617 s +DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=136986149363190310578692878921761645357, time:1750768329.4836285s req_ids:[8] +DEBUG 06-24 20:32:09 [manager.py:391] +ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:216.12024307250977ms total_cost_time:216.16148948669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12747 prompt_cache_len:5151 prompt_cache_ratio:0.4040950811955754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 +DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:09 [batch.py:51] router release req id 8 +INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10928726196289062 s +INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.11055541038513184 s +DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=186251313048350501890985089223866424605, time:1750768329.7060323s req_ids:[8] +DEBUG 06-24 20:32:09 [manager.py:391] +ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:213.03701400756836ms total_cost_time:213.08064460754395ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12748 prompt_cache_len:5151 prompt_cache_ratio:0.4040633824913712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 +DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:09 [batch.py:51] router release req id 8 +INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10909891128540039 s +INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.11082673072814941 s +DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=153094158433775038290841477991447811708, time:1750768329.9247167s req_ids:[8] +DEBUG 06-24 20:32:09 [manager.py:391] +ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:209.69510078430176ms total_cost_time:209.73777770996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12749 prompt_cache_len:5151 prompt_cache_ratio:0.40403168875990275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 +DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:10 [batch.py:51] router release req id 8 +INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10811781883239746 s +INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.10977005958557129 s +DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=87383392470522335468229076730113987959, time:1750768330.1408894s req_ids:[8] +DEBUG 06-24 20:32:10 [manager.py:391] +ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:210.76011657714844ms total_cost_time:210.80875396728516ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:12750 prompt_cache_len:5151 prompt_cache_ratio:0.404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 +DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:10 [batch.py:51] router release req id 8 +INFO 06-24 20:32:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10868525505065918 s +INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.11045336723327637 s +DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=279297398751838851318630800587533442882, time:1750768330.3651478s req_ids:[8] +DEBUG 06-24 20:32:10 [manager.py:391] +ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:399.8374938964844ms total_cost_time:399.88183975219727ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12751 prompt_cache_len:5151 prompt_cache_ratio:0.4039683162104933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 +DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:10 [batch.py:51] router release req id 8 +INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10744380950927734 s +INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.10932540893554688 s +DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=205671153875013041385491703969086570100, time:1750768330.7655103s req_ids:[8] +DEBUG 06-24 20:32:10 [manager.py:391] +ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:214.8911952972412ms total_cost_time:214.9360179901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12752 prompt_cache_len:5151 prompt_cache_ratio:0.4039366373902133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 +DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:10 [batch.py:51] router release req id 8 +INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10904574394226074 s +INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.11106419563293457 s +DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=185956170062228657751086264118787393422, time:1750768330.9861462s req_ids:[8] +DEBUG 06-24 20:32:10 [manager.py:391] +ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:213.5484218597412ms total_cost_time:213.5915756225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12753 prompt_cache_len:5151 prompt_cache_ratio:0.40390496353799105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 +DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:11 [batch.py:51] router release req id 8 +INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10767483711242676 s +INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.1094973087310791 s +DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=303464728190860782826067761737155165734, time:1750768331.207503s req_ids:[8] +DEBUG 06-24 20:32:11 [manager.py:391] +ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:210.45351028442383ms total_cost_time:210.49809455871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12754 prompt_cache_len:5151 prompt_cache_ratio:0.403873294652658 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 +DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:11 [batch.py:51] router release req id 8 +INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10775470733642578 s +INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.1096489429473877 s +DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=184202180498045085844294131931850558023, time:1750768331.4255946s req_ids:[8] +DEBUG 06-24 20:32:11 [manager.py:391] +ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:211.2879753112793ms total_cost_time:211.3323211669922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12755 prompt_cache_len:5151 prompt_cache_ratio:0.40384163073304585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 +DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:11 [batch.py:51] router release req id 8 +INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10888838768005371 s +INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.11101436614990234 s +DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=334686937240656905046644512752414658577, time:1750768331.6425962s req_ids:[8] +DEBUG 06-24 20:32:11 [manager.py:391] +ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:216.65406227111816ms total_cost_time:216.69745445251465ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12756 prompt_cache_len:5151 prompt_cache_ratio:0.40380997177798683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 +DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:11 [batch.py:51] router release req id 8 +INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10783672332763672 s +INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.10915803909301758 s +DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=236715885815506573995845790939051637960, time:1750768331.8632057s req_ids:[8] +DEBUG 06-24 20:32:11 [manager.py:391] +ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:213.29331398010254ms total_cost_time:213.33813667297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12757 prompt_cache_len:5151 prompt_cache_ratio:0.4037783177863134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 +DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:11 [batch.py:51] router release req id 8 +INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.10872220993041992 s +INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10979795455932617 s +DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=50163741418903514875663481839673748642, time:1750768332.0809324s req_ids:[8] +DEBUG 06-24 20:32:12 [manager.py:391] +ERROR 06-24 20:32:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:374.1724491119385ms total_cost_time:374.21679496765137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12758 prompt_cache_len:5151 prompt_cache_ratio:0.40374666875685844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 +DEBUG 06-24 20:32:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:12 [batch.py:51] router release req id 8 +INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.1086273193359375 s +INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10983109474182129 s +DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=309549304259410002631185175114818762264, time:1750768332.463406s req_ids:[8] +DEBUG 06-24 20:32:12 [manager.py:391] +ERROR 06-24 20:32:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 first_token_cost:216.37654304504395ms total_cost_time:216.42041206359863ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12759 prompt_cache_len:5151 prompt_cache_ratio:0.4037150246884552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 +DEBUG 06-24 20:32:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:12 [batch.py:51] router release req id 8 +INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.10851716995239258 s +INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10958647727966309 s +DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=246596124787094043231685615205376036157, time:1750768332.6848547s req_ids:[8] +DEBUG 06-24 20:32:12 [manager.py:391] +ERROR 06-24 20:32:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 first_token_cost:215.10624885559082ms total_cost_time:215.1501178741455ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12760 prompt_cache_len:5151 prompt_cache_ratio:0.4036833855799373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 +DEBUG 06-24 20:32:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:12 [batch.py:51] router release req id 8 +INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.10752630233764648 s +INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10951828956604004 s +DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=264550105337034624954075775699484397767, time:1750768332.906285s req_ids:[8] +DEBUG 06-24 20:32:12 [manager.py:391] +ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 first_token_cost:214.12420272827148ms total_cost_time:214.16997909545898ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12761 prompt_cache_len:5151 prompt_cache_ratio:0.4036517514301387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 +DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:13 [batch.py:51] router release req id 8 +INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.1092679500579834 s +INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.11122727394104004 s +DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=305178561174230089291908079720010112515, time:1750768333.1237361s req_ids:[8] +DEBUG 06-24 20:32:13 [manager.py:391] +ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:209.29908752441406ms total_cost_time:209.34462547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12762 prompt_cache_len:5151 prompt_cache_ratio:0.40362012223789373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 +DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:13 [batch.py:51] router release req id 8 +INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.10820364952087402 s +INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.10936236381530762 s +DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=221555671578494233235708062908446067347, time:1750768333.3369102s req_ids:[8] +DEBUG 06-24 20:32:13 [manager.py:391] +ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:212.0053768157959ms total_cost_time:212.0497226715088ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12763 prompt_cache_len:5151 prompt_cache_ratio:0.40358849800203717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 +DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:13 [batch.py:51] router release req id 8 +INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.10876965522766113 s +INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.11084794998168945 s +DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=73355558970403681588473645571662650760, time:1750768333.554558s req_ids:[8] +DEBUG 06-24 20:32:13 [manager.py:391] +ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:391.24488830566406ms total_cost_time:391.29137992858887ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12764 prompt_cache_len:5151 prompt_cache_ratio:0.40355687872140394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 +DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:13 [batch.py:51] router release req id 8 +INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.10910201072692871 s +INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.1112222671508789 s +DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=53943494029676393930777895924701649371, time:1750768333.959336s req_ids:[8] +DEBUG 06-24 20:32:13 [manager.py:391] +ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:222.37801551818848ms total_cost_time:222.44000434875488ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12765 prompt_cache_len:5151 prompt_cache_ratio:0.4035252643948296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 +DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:14 [batch.py:51] router release req id 8 +INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.1091158390045166 s +INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s +DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=328436197250720853730627193021126438096, time:1750768334.1792412s req_ids:[8] +DEBUG 06-24 20:32:14 [manager.py:391] +ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:211.88974380493164ms total_cost_time:211.93575859069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12766 prompt_cache_len:5151 prompt_cache_ratio:0.4034936550211499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 +DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:14 [batch.py:51] router release req id 8 +INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.10912489891052246 s +INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s +DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=204183005986504478002741963122950267224, time:1750768334.4001515s req_ids:[8] +DEBUG 06-24 20:32:14 [manager.py:391] +DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:214.22934532165527ms total_cost_time:214.27369117736816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12767 prompt_cache_len:5151 prompt_cache_ratio:0.40346205059920104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 +DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:14 [batch.py:51] router release req id 8 +INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.10898709297180176 s +INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.11110401153564453 s +DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=94384186471841813796427733896382812597, time:1750768334.6192143s req_ids:[8] +DEBUG 06-24 20:32:14 [manager.py:391] +ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:214.17617797851562ms total_cost_time:214.22076225280762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12768 prompt_cache_len:5151 prompt_cache_ratio:0.40343045112781956 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 +DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:14 [batch.py:51] router release req id 8 +INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.10762810707092285 s +INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.10870099067687988 s +DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=289125054046173823795334074225658672095, time:1750768334.8362412s req_ids:[8] +DEBUG 06-24 20:32:14 [manager.py:391] +ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:213.38987350463867ms total_cost_time:213.43469619750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12769 prompt_cache_len:5151 prompt_cache_ratio:0.4033988566058423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 +DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:14 [batch.py:51] router release req id 8 +INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.1092679500579834 s +INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.11080765724182129 s +DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=185777141390185758979124460531288140805, time:1750768335.0550463s req_ids:[8] +DEBUG 06-24 20:32:15 [manager.py:391] +ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:210.8299732208252ms total_cost_time:210.87193489074707ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12770 prompt_cache_len:5151 prompt_cache_ratio:0.4033672670321065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 +DEBUG 06-24 20:32:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:15 [batch.py:51] router release req id 8 +INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.10758709907531738 s +INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.10891556739807129 s +DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=289975140293902323568109889531367499597, time:1750768335.2733886s req_ids:[8] +DEBUG 06-24 20:32:15 [manager.py:391] +ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:211.7142677307129ms total_cost_time:211.75694465637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12771 prompt_cache_len:5151 prompt_cache_ratio:0.40333568240544987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 +DEBUG 06-24 20:32:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:15 [batch.py:51] router release req id 8 +INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s +INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.1100623607635498 s +DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=182071911128253644127758490330249599080, time:1750768335.4944787s req_ids:[8] +DEBUG 06-24 20:32:15 [manager.py:391] +ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:400.71821212768555ms total_cost_time:400.74682235717773ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:12772 prompt_cache_len:5151 prompt_cache_ratio:0.4033041027247103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 +DEBUG 06-24 20:32:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:15 [batch.py:51] router release req id 8 +INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.10919356346130371 s +INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.11125349998474121 s +DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=95678919104808777774008045419438116004, time:1750768335.9027739s req_ids:[8] +DEBUG 06-24 20:32:15 [manager.py:391] +ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:198.6231803894043ms total_cost_time:198.6687183380127ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12773 prompt_cache_len:5151 prompt_cache_ratio:0.4032725279887262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 +DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:16 [batch.py:51] router release req id 8 +DEBUG 06-24 20:32:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:16 [manager.py:283] +DEBUG 06-24 20:32:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:16 [manager.py:284] +INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10937786102294922 s +INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11140584945678711 s +DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=250394397790085365401076077240485737978, time:1750768336.107398s req_ids:[8] +DEBUG 06-24 20:32:16 [manager.py:391] +ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:210.7548713684082ms total_cost_time:210.798978805542ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12774 prompt_cache_len:5151 prompt_cache_ratio:0.4032409581963363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 +DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:16 [batch.py:51] router release req id 8 +INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10875320434570312 s +INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11078166961669922 s +DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=65166913776842913741257069643316408561, time:1750768336.3250961s req_ids:[8] +DEBUG 06-24 20:32:16 [manager.py:391] +ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:216.20678901672363ms total_cost_time:216.25041961669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12775 prompt_cache_len:5151 prompt_cache_ratio:0.4032093933463797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 +DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:16 [batch.py:51] router release req id 8 +INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10881471633911133 s +INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.1108086109161377 s +DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=265039547204125063767751321686973985199, time:1750768336.5452223s req_ids:[8] +DEBUG 06-24 20:32:16 [manager.py:391] +ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:212.43524551391602ms total_cost_time:212.4783992767334ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12776 prompt_cache_len:5151 prompt_cache_ratio:0.4031778334376957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 +DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:16 [batch.py:51] router release req id 8 +INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10804462432861328 s +INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11000990867614746 s +DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=4931067181246065638869094515320556055, time:1750768336.7648764s req_ids:[8] +DEBUG 06-24 20:32:16 [manager.py:391] +ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:207.67903327941895ms total_cost_time:207.7198028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:12777 prompt_cache_len:5151 prompt_cache_ratio:0.4031462784691242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 +DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:16 [batch.py:51] router release req id 8 +INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s +INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11025357246398926 s +DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=92238918883755976467360895836525042129, time:1750768336.9895656s req_ids:[8] +DEBUG 06-24 20:32:16 [manager.py:391] +DEBUG 06-24 20:32:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 52013.707 tokens/s +DEBUG 06-24 20:32:16 [stats.py:37] Avg prompt tokens throughput: 52005.555 tokens/s +DEBUG 06-24 20:32:16 [stats.py:37] Avg generate tokens throughput: 8.153 tokens/s +ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:227.54955291748047ms total_cost_time:227.59366035461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12778 prompt_cache_len:5151 prompt_cache_ratio:0.4031147284395054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 +DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:17 [batch.py:51] router release req id 8 +INFO 06-24 20:32:17 [manager.py:224] router recive req id 8 cost time 0.10814809799194336 s +INFO 06-24 20:32:17 [manager.py:68] detokenization recv req id 8 cost time 0.11028528213500977 s +DEBUG 06-24 20:32:17 [manager.py:391] Prefill Batch: batch_id=70026461003083432815128745744006763962, time:1750768337.211016s req_ids:[8] +DEBUG 06-24 20:32:17 [manager.py:391] +ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:387.5298500061035ms total_cost_time:387.5718116760254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12779 prompt_cache_len:5151 prompt_cache_ratio:0.4030831833476798 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 +DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:17 [batch.py:51] router release req id 8 +INFO 06-24 20:32:17 [manager.py:224] router recive req id 8 cost time 0.10745573043823242 s +INFO 06-24 20:32:17 [manager.py:68] detokenization recv req id 8 cost time 0.10946273803710938 s +DEBUG 06-24 20:32:17 [manager.py:391] Prefill Batch: batch_id=295443506750457855535545591393863372284, time:1750768337.6091065s req_ids:[8] +DEBUG 06-24 20:32:17 [manager.py:391] +ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:215.78550338745117ms total_cost_time:215.82794189453125ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12780 prompt_cache_len:5151 prompt_cache_ratio:0.40305164319248826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 +DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:17 [batch.py:51] router release req id 8 +INFO 06-24 20:32:17 [manager.py:224] router recive req id 8 cost time 0.10867762565612793 s +INFO 06-24 20:32:17 [manager.py:68] detokenization recv req id 8 cost time 0.11076164245605469 s +DEBUG 06-24 20:32:17 [manager.py:391] Prefill Batch: batch_id=324511124189727302939766653618015324208, time:1750768337.836053s req_ids:[8] +DEBUG 06-24 20:32:17 [manager.py:391] +ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:219.62523460388184ms total_cost_time:219.6676731109619ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12781 prompt_cache_len:5151 prompt_cache_ratio:0.4030201079727721 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 +DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:17 [batch.py:51] router release req id 8 +INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.1081247329711914 s +INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.11008524894714355 s +DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=208218696670463765566349830066927277334, time:1750768338.065538s req_ids:[8] +DEBUG 06-24 20:32:18 [manager.py:391] +ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:222.43499755859375ms total_cost_time:222.47719764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12782 prompt_cache_len:5151 prompt_cache_ratio:0.4029885776873729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 +DEBUG 06-24 20:32:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:18 [batch.py:51] router release req id 8 +INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.10867571830749512 s +INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.11070632934570312 s +DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=311486201189688680242702860039443242795, time:1750768338.2893257s req_ids:[8] +DEBUG 06-24 20:32:18 [manager.py:391] +ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:215.0571346282959ms total_cost_time:215.10052680969238ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12783 prompt_cache_len:5151 prompt_cache_ratio:0.40295705233513257 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 +DEBUG 06-24 20:32:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:18 [batch.py:51] router release req id 8 +INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.10738801956176758 s +INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.10925602912902832 s +DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=329499031219073375638342460364528371985, time:1750768338.5110435s req_ids:[8] +DEBUG 06-24 20:32:18 [manager.py:391] +ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:213.90271186828613ms total_cost_time:213.9451503753662ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12784 prompt_cache_len:5151 prompt_cache_ratio:0.4029255319148936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 +DEBUG 06-24 20:32:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:18 [batch.py:51] router release req id 8 +INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.10850882530212402 s +INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.10981488227844238 s +DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=48705036867626733063625460727736137529, time:1750768338.72973s req_ids:[8] +DEBUG 06-24 20:32:18 [manager.py:391] +ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:211.55047416687012ms total_cost_time:211.57526969909668ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12785 prompt_cache_len:5151 prompt_cache_ratio:0.40289401642549866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 +DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:19 [batch.py:51] router release req id 8 +INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.31234264373779297 s +INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.3144843578338623 s +DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=159212714748014700149438768661185371868, time:1750768339.164549s req_ids:[8] +DEBUG 06-24 20:32:19 [manager.py:391] +ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:435.1987838745117ms total_cost_time:435.2433681488037ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12786 prompt_cache_len:5151 prompt_cache_ratio:0.4028625058657907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 +DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:19 [batch.py:51] router release req id 8 +INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.10950994491577148 s +INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.11173439025878906 s +DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=111431361868675696267553586846410028604, time:1750768339.3923173s req_ids:[8] +DEBUG 06-24 20:32:19 [manager.py:391] +ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:212.20660209655762ms total_cost_time:212.26239204406738ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:12787 prompt_cache_len:5151 prompt_cache_ratio:0.40283100023461327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 +DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:19 [batch.py:51] router release req id 8 +INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.10769772529602051 s +INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.1098330020904541 s +DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=194246409525248452922016314915737030391, time:1750768339.6082196s req_ids:[8] +DEBUG 06-24 20:32:19 [manager.py:391] +ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:211.80343627929688ms total_cost_time:211.84921264648438ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12788 prompt_cache_len:5151 prompt_cache_ratio:0.4027994995308101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 +DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:19 [batch.py:51] router release req id 8 +INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.1082310676574707 s +INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.11014986038208008 s +DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=284192503015660560278757885151341090060, time:1750768339.827261s req_ids:[8] +DEBUG 06-24 20:32:19 [manager.py:391] +ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:214.86377716064453ms total_cost_time:214.90812301635742ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12789 prompt_cache_len:5151 prompt_cache_ratio:0.4027680037532254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 +DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:19 [batch.py:51] router release req id 8 +INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.10914897918701172 s +INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.11113166809082031 s +DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=127466260538790170118749258526872474943, time:1750768340.0629764s req_ids:[8] +DEBUG 06-24 20:32:20 [manager.py:391] +ERROR 06-24 20:32:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:228.4541130065918ms total_cost_time:228.4998893737793ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12790 prompt_cache_len:5151 prompt_cache_ratio:0.40273651290070367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 +DEBUG 06-24 20:32:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:20 [batch.py:51] router release req id 8 +INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.1089167594909668 s +INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.1110074520111084 s +DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=151832180931281131249937561775725160298, time:1750768340.284597s req_ids:[8] +DEBUG 06-24 20:32:20 [manager.py:391] +ERROR 06-24 20:32:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 first_token_cost:207.4112892150879ms total_cost_time:207.4582576751709ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12791 prompt_cache_len:5151 prompt_cache_ratio:0.40270502697208976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 +DEBUG 06-24 20:32:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:20 [batch.py:51] router release req id 8 +INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.3107945919036865 s +INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.3128983974456787 s +DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=108364897012548086077736354537211088456, time:1750768340.7140446s req_ids:[8] +DEBUG 06-24 20:32:20 [manager.py:391] +ERROR 06-24 20:32:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 first_token_cost:431.35714530944824ms total_cost_time:431.40244483947754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12792 prompt_cache_len:5151 prompt_cache_ratio:0.4026735459662289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 +DEBUG 06-24 20:32:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:20 [batch.py:51] router release req id 8 +INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.10651206970214844 s +INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.10758733749389648 s +DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=204333943733511921273916965767429681078, time:1750768340.9358287s req_ids:[8] +DEBUG 06-24 20:32:20 [manager.py:391] +ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 first_token_cost:225.07429122924805ms total_cost_time:225.12006759643555ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12793 prompt_cache_len:5151 prompt_cache_ratio:0.4026420698819667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 +DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:21 [batch.py:51] router release req id 8 +INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10753965377807617 s +INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.10942721366882324 s +DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=42797764042740796117643801712542906408, time:1750768341.1766133s req_ids:[8] +DEBUG 06-24 20:32:21 [manager.py:391] +ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:219.31147575378418ms total_cost_time:219.35725212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12794 prompt_cache_len:5151 prompt_cache_ratio:0.40261059871814914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 +DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:21 [batch.py:51] router release req id 8 +INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10905098915100098 s +INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.11093306541442871 s +DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=328045701581968338129284982651127126829, time:1750768341.3924623s req_ids:[8] +DEBUG 06-24 20:32:21 [manager.py:391] +ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:210.91079711914062ms total_cost_time:210.9549045562744ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12795 prompt_cache_len:5151 prompt_cache_ratio:0.40257913247362254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 +DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:21 [batch.py:51] router release req id 8 +INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10962700843811035 s +INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.11176085472106934 s +DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=61707017856669233914773224895676251842, time:1750768341.6102846s req_ids:[8] +DEBUG 06-24 20:32:21 [manager.py:391] +ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:213.9589786529541ms total_cost_time:214.0028476715088ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12796 prompt_cache_len:5151 prompt_cache_ratio:0.4025476711472335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 +DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:21 [batch.py:51] router release req id 8 +INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10746359825134277 s +INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.10942864418029785 s +DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=201835959866627544271851294979554920142, time:1750768341.8303468s req_ids:[8] +DEBUG 06-24 20:32:21 [manager.py:391] +ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:384.34576988220215ms total_cost_time:384.39106941223145ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12797 prompt_cache_len:5151 prompt_cache_ratio:0.40251621473782917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 +DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:22 [batch.py:51] router release req id 8 +INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10910773277282715 s +INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.11111783981323242 s +DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=309439357140097915563450789873870541843, time:1750768342.219602s req_ids:[8] +DEBUG 06-24 20:32:22 [manager.py:391] +ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:217.35477447509766ms total_cost_time:217.39935874938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12798 prompt_cache_len:5151 prompt_cache_ratio:0.4024847632442569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 +DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:22 [batch.py:51] router release req id 8 +INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10877704620361328 s +INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.11014509201049805 s +DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=258450698529744897491057281918362912498, time:1750768342.4471378s req_ids:[8] +DEBUG 06-24 20:32:22 [manager.py:391] +ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:219.39373016357422ms total_cost_time:219.4385528564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12799 prompt_cache_len:5151 prompt_cache_ratio:0.4024533166653645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 +DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:22 [batch.py:51] router release req id 8 +INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10830879211425781 s +INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.11026620864868164 s +DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=120108118922872144417573194134995661286, time:1750768342.6682813s req_ids:[8] +DEBUG 06-24 20:32:22 [manager.py:391] +ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:217.16666221618652ms total_cost_time:217.2107696533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12800 prompt_cache_len:5151 prompt_cache_ratio:0.402421875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 +DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:22 [batch.py:51] router release req id 8 +INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10784721374511719 s +INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.10982584953308105 s +DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=233249292378408635499826681981010087644, time:1750768342.8900845s req_ids:[8] +DEBUG 06-24 20:32:22 [manager.py:391] +ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:214.83397483825684ms total_cost_time:214.87665176391602ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12801 prompt_cache_len:5151 prompt_cache_ratio:0.40239043824701193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 +DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:23 [batch.py:51] router release req id 8 +INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.10773897171020508 s +INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.10967040061950684 s +DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=261649399595077689397919047617684974238, time:1750768343.1113343s req_ids:[8] +DEBUG 06-24 20:32:23 [manager.py:391] +ERROR 06-24 20:32:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:214.76054191589355ms total_cost_time:214.81585502624512ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:12802 prompt_cache_len:5151 prompt_cache_ratio:0.4023590064052492 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:32:23 [statics_utils.py:24] mean first cost: 229.96201950390488 ms +INFO 06-24 20:32:23 [statics_utils.py:24] mean per token cost: 0.06079721034521612 ms +INFO 06-24 20:32:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 +DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:23 [batch.py:51] router release req id 8 +INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.1087183952331543 s +INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.11055111885070801 s +DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=220762102063564505773762618658087183255, time:1750768343.3374286s req_ids:[8] +DEBUG 06-24 20:32:23 [manager.py:391] +ERROR 06-24 20:32:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 first_token_cost:382.89499282836914ms total_cost_time:382.94100761413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12803 prompt_cache_len:5151 prompt_cache_ratio:0.4023275794735609 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 +DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:23 [batch.py:51] router release req id 8 +INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s +INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.11112642288208008 s +DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=207213081614142952356581731114474849137, time:1750768343.7220483s req_ids:[8] +DEBUG 06-24 20:32:23 [manager.py:391] +DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 first_token_cost:215.79575538635254ms total_cost_time:215.83962440490723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12804 prompt_cache_len:5151 prompt_cache_ratio:0.4022961574507966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 +DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:23 [batch.py:51] router release req id 8 +INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.10904908180236816 s +INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.11117434501647949 s +DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=232041908002191756669560038202451650967, time:1750768343.9456053s req_ids:[8] +DEBUG 06-24 20:32:23 [manager.py:391] +ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 first_token_cost:213.547945022583ms total_cost_time:213.60445022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:12805 prompt_cache_len:5151 prompt_cache_ratio:0.40226474033580634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 +DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:24 [batch.py:51] router release req id 8 +INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.10915279388427734 s +INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.11107730865478516 s +DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=139253665954856950943252893366113079795, time:1750768344.1681037s req_ids:[8] +DEBUG 06-24 20:32:24 [manager.py:391] +ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:213.4392261505127ms total_cost_time:213.48261833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12806 prompt_cache_len:5151 prompt_cache_ratio:0.4022333281274403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 +DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:24 [batch.py:51] router release req id 8 +INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.1083381175994873 s +INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.11036324501037598 s +DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=154131676121033394070039496937921853843, time:1750768344.3862667s req_ids:[8] +DEBUG 06-24 20:32:24 [manager.py:391] +ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:213.9742374420166ms total_cost_time:214.0340805053711ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:12807 prompt_cache_len:5151 prompt_cache_ratio:0.4022019208245491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 +DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:24 [batch.py:51] router release req id 8 +INFO 06-24 20:32:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.10805368423461914 s +INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s +DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=252543143952675174779650403032820989574, time:1750768344.6063507s req_ids:[8] +DEBUG 06-24 20:32:24 [manager.py:391] +ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:211.80486679077148ms total_cost_time:211.8527889251709ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:12808 prompt_cache_len:5151 prompt_cache_ratio:0.4021705184259838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 +DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:24 [batch.py:51] router release req id 8 +INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.10799980163574219 s +INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.10990190505981445 s +DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=182665450364871729514813012209934213229, time:1750768344.8253489s req_ids:[8] +DEBUG 06-24 20:32:24 [manager.py:391] +ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:212.1446132659912ms total_cost_time:212.2058868408203ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12809 prompt_cache_len:5151 prompt_cache_ratio:0.4021391209305957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 +DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:24 [batch.py:51] router release req id 8 +INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10751056671142578 s +INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.10953545570373535 s +DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=22278380069274031271146944003692133475, time:1750768345.0440357s req_ids:[8] +DEBUG 06-24 20:32:25 [manager.py:391] +ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:384.6297264099121ms total_cost_time:384.6893310546875ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:12810 prompt_cache_len:5151 prompt_cache_ratio:0.4021077283372365 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 +DEBUG 06-24 20:32:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:25 [batch.py:51] router release req id 8 +INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10908889770507812 s +INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.1111903190612793 s +DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=52413436420511972420132877844630001755, time:1750768345.4363904s req_ids:[8] +DEBUG 06-24 20:32:25 [manager.py:391] +ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:214.27512168884277ms total_cost_time:214.35999870300293ms,out_token_counter:1 mean_per_token_cost_time: 0.08487701416015625ms prompt_token_num:12811 prompt_cache_len:5151 prompt_cache_ratio:0.4020763406447584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 +DEBUG 06-24 20:32:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:25 [batch.py:51] router release req id 8 +INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10875177383422852 s +INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.11091399192810059 s +DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=185923119406753741125043494090825499997, time:1750768345.657525s req_ids:[8] +DEBUG 06-24 20:32:25 [manager.py:391] +ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:212.19229698181152ms total_cost_time:212.23855018615723ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12812 prompt_cache_len:5151 prompt_cache_ratio:0.4020449578520137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 +DEBUG 06-24 20:32:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:25 [batch.py:51] router release req id 8 +INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10768270492553711 s +INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.10972452163696289 s +DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=51621591619419015320682010400385468415, time:1750768345.8824012s req_ids:[8] +DEBUG 06-24 20:32:25 [manager.py:391] +ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:219.10548210144043ms total_cost_time:219.16747093200684ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12813 prompt_cache_len:5151 prompt_cache_ratio:0.4020135799578553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 +DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:26 [batch.py:51] router release req id 8 +INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.1087028980255127 s +INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.11064839363098145 s +DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=116700071872371136966012630125162770949, time:1750768346.1244106s req_ids:[8] +DEBUG 06-24 20:32:26 [manager.py:391] +ERROR 06-24 20:32:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:218.65296363830566ms total_cost_time:218.71376037597656ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12814 prompt_cache_len:5151 prompt_cache_ratio:0.40198220696113623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 +DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:26 [batch.py:51] router release req id 8 +INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.10600066184997559 s +INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.10791897773742676 s +DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=323315318061765618275797393108528312537, time:1750768346.3353205s req_ids:[8] +DEBUG 06-24 20:32:26 [manager.py:391] +ERROR 06-24 20:32:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 first_token_cost:212.77427673339844ms total_cost_time:212.7974033355713ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:12815 prompt_cache_len:5151 prompt_cache_ratio:0.4019508388607101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 +DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:26 [batch.py:51] router release req id 8 +INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.10596847534179688 s +INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.10793757438659668 s +DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=85429584842177536384746230305496777384, time:1750768346.5514941s req_ids:[8] +DEBUG 06-24 20:32:26 [manager.py:391] +ERROR 06-24 20:32:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 first_token_cost:209.65051651000977ms total_cost_time:209.67841148376465ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12816 prompt_cache_len:5151 prompt_cache_ratio:0.40191947565543074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 +DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:26 [batch.py:51] router release req id 8 +INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.10503530502319336 s +INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.1069948673248291 s +DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=106829987430773303476404453014302677974, time:1750768346.7673137s req_ids:[8] +DEBUG 06-24 20:32:26 [manager.py:391] +ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:32:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 49701.467 tokens/s +DEBUG 06-24 20:32:27 [stats.py:37] Avg prompt tokens throughput: 49693.602 tokens/s +DEBUG 06-24 20:32:27 [stats.py:37] Avg generate tokens throughput: 7.865 tokens/s +INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 first_token_cost:384.48143005371094ms total_cost_time:384.51075553894043ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:12817 prompt_cache_len:5151 prompt_cache_ratio:0.4018881173441523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 +DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:27 [batch.py:51] router release req id 8 +INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.1062917709350586 s +INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10868597030639648 s +DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=60852611275995568764725270875111446399, time:1750768347.1574504s req_ids:[8] +DEBUG 06-24 20:32:27 [manager.py:391] +ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:209.46764945983887ms total_cost_time:209.49506759643555ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:12818 prompt_cache_len:5151 prompt_cache_ratio:0.40185676392572944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 +DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:27 [batch.py:51] router release req id 8 +INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.10590505599975586 s +INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10783791542053223 s +DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=325046754993295761874566672783804791916, time:1750768347.3719337s req_ids:[8] +DEBUG 06-24 20:32:27 [manager.py:391] +ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:214.76078033447266ms total_cost_time:214.78891372680664ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:12819 prompt_cache_len:5151 prompt_cache_ratio:0.40182541539901706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 +DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:27 [batch.py:51] router release req id 8 +INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.1060018539428711 s +INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10794997215270996 s +DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=18280646961843405436071686861838592063, time:1750768347.5939212s req_ids:[8] +DEBUG 06-24 20:32:27 [manager.py:391] +ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:217.12923049926758ms total_cost_time:217.15712547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12820 prompt_cache_len:5151 prompt_cache_ratio:0.4017940717628705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 +DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:27 [batch.py:51] router release req id 8 +INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.10833215713500977 s +INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10991477966308594 s +DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=121048559797060459162136081350489732168, time:1750768347.8176973s req_ids:[8] +DEBUG 06-24 20:32:27 [manager.py:391] +ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:216.59088134765625ms total_cost_time:216.61925315856934ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:12821 prompt_cache_len:5151 prompt_cache_ratio:0.4017627330161454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 +DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:27 [batch.py:51] router release req id 8 +INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.10641789436340332 s +INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.1089928150177002 s +DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=1855781636601015295194052444246726828, time:1750768348.0386903s req_ids:[8] +DEBUG 06-24 20:32:28 [manager.py:391] +ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:220.97086906433105ms total_cost_time:221.0216522216797ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12822 prompt_cache_len:5151 prompt_cache_ratio:0.40173139915769773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 +DEBUG 06-24 20:32:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:28 [batch.py:51] router release req id 8 +INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.1059560775756836 s +INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.10825109481811523 s +DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=205165376926511123211390081320287160873, time:1750768348.2758608s req_ids:[8] +DEBUG 06-24 20:32:28 [manager.py:391] +ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:392.5633430480957ms total_cost_time:392.5907611846924ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:12823 prompt_cache_len:5151 prompt_cache_ratio:0.4017000701863838 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 +DEBUG 06-24 20:32:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:28 [batch.py:51] router release req id 8 +INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.10495448112487793 s +INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.10694241523742676 s +DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=229815914310115252756232731464624936503, time:1750768348.666822s req_ids:[8] +DEBUG 06-24 20:32:28 [manager.py:391] +ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:213.9737606048584ms total_cost_time:213.99998664855957ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12824 prompt_cache_len:5151 prompt_cache_ratio:0.4016687461010605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 +DEBUG 06-24 20:32:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:28 [batch.py:51] router release req id 8 +INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.10550355911254883 s +INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.10754776000976562 s +DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=323249475443310318246273870519909685408, time:1750768348.8848746s req_ids:[8] +DEBUG 06-24 20:32:28 [manager.py:391] +ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:208.16707611083984ms total_cost_time:208.19497108459473ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12825 prompt_cache_len:5151 prompt_cache_ratio:0.4016374269005848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 +DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:29 [batch.py:51] router release req id 8 +INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10579061508178711 s +INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.10776209831237793 s +DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=124279971834469214705527431671093702337, time:1750768349.1004765s req_ids:[8] +DEBUG 06-24 20:32:29 [manager.py:391] +ERROR 06-24 20:32:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:212.60809898376465ms total_cost_time:212.63408660888672ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12826 prompt_cache_len:5151 prompt_cache_ratio:0.4016061125838141 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 +DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:29 [batch.py:51] router release req id 8 +INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10468149185180664 s +INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.1066446304321289 s +DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=158454302600614695516576208733900572756, time:1750768349.3182795s req_ids:[8] +DEBUG 06-24 20:32:29 [manager.py:391] +ERROR 06-24 20:32:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 first_token_cost:210.19411087036133ms total_cost_time:210.21795272827148ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:12827 prompt_cache_len:5151 prompt_cache_ratio:0.4015748031496063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 +DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:29 [batch.py:51] router release req id 8 +INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10579085350036621 s +INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.10778498649597168 s +DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=69985633649925630898216712388672116838, time:1750768349.534239s req_ids:[8] +DEBUG 06-24 20:32:29 [manager.py:391] +ERROR 06-24 20:32:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 first_token_cost:216.51768684387207ms total_cost_time:216.54367446899414ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12828 prompt_cache_len:5151 prompt_cache_ratio:0.4015434985968195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 +DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:29 [batch.py:51] router release req id 8 +INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10470151901245117 s +INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.10597085952758789 s +DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=96722282633699076179335580399581259859, time:1750768349.7584927s req_ids:[8] +DEBUG 06-24 20:32:29 [manager.py:391] +DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 first_token_cost:392.1701908111572ms total_cost_time:392.22168922424316ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:12829 prompt_cache_len:5151 prompt_cache_ratio:0.4015121989243121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 +DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:30 [batch.py:51] router release req id 8 +INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10770988464355469 s +INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10968399047851562 s +DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=269867954577598169529203811237201654584, time:1750768350.1550257s req_ids:[8] +DEBUG 06-24 20:32:30 [manager.py:391] +ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:214.87879753112793ms total_cost_time:214.92385864257812ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12830 prompt_cache_len:5151 prompt_cache_ratio:0.4014809041309431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 +DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:30 [batch.py:51] router release req id 8 +INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10766744613647461 s +INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10971832275390625 s +DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=51924593423859848575283771764438393108, time:1750768350.3820007s req_ids:[8] +DEBUG 06-24 20:32:30 [manager.py:391] +ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:215.59429168701172ms total_cost_time:215.6367301940918ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12831 prompt_cache_len:5151 prompt_cache_ratio:0.4014496142155717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 +DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:30 [batch.py:51] router release req id 8 +INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10666179656982422 s +INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10852217674255371 s +DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=225141957081174373561396471629176397583, time:1750768350.596503s req_ids:[8] +DEBUG 06-24 20:32:30 [manager.py:391] +ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:201.27415657043457ms total_cost_time:201.31754875183105ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12832 prompt_cache_len:5151 prompt_cache_ratio:0.40141832917705733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 +DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:30 [batch.py:51] router release req id 8 +INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10810518264770508 s +INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10918188095092773 s +DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=81172162782348690502033709073791759569, time:1750768350.8040118s req_ids:[8] +DEBUG 06-24 20:32:30 [manager.py:391] +ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:168.73526573181152ms total_cost_time:168.7788963317871ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12833 prompt_cache_len:5151 prompt_cache_ratio:0.4013870490142601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 +DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:30 [batch.py:51] router release req id 8 +INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10773658752441406 s +INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10981345176696777 s +DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=99970022450980879317253139983375404637, time:1750768350.9786732s req_ids:[8] +DEBUG 06-24 20:32:30 [manager.py:391] +ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:205.60121536254883ms total_cost_time:205.64675331115723ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12834 prompt_cache_len:5151 prompt_cache_ratio:0.4013557737260402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 +DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:31 [batch.py:51] router release req id 8 +INFO 06-24 20:32:31 [manager.py:224] router recive req id 8 cost time 0.10929512977600098 s +INFO 06-24 20:32:31 [manager.py:68] detokenization recv req id 8 cost time 0.1114037036895752 s +DEBUG 06-24 20:32:31 [manager.py:391] Prefill Batch: batch_id=32004448131269710751797711300987736398, time:1750768351.190006s req_ids:[8] +DEBUG 06-24 20:32:31 [manager.py:391] +ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:213.83428573608398ms total_cost_time:213.87863159179688ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12835 prompt_cache_len:5151 prompt_cache_ratio:0.4013245033112583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 +DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:31 [batch.py:51] router release req id 8 +INFO 06-24 20:32:31 [manager.py:224] router recive req id 8 cost time 0.1070249080657959 s +INFO 06-24 20:32:31 [manager.py:68] detokenization recv req id 8 cost time 0.10907340049743652 s +DEBUG 06-24 20:32:31 [manager.py:391] Prefill Batch: batch_id=283151357426904627881108396021815816169, time:1750768351.4141943s req_ids:[8] +DEBUG 06-24 20:32:31 [manager.py:391] +ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:390.86103439331055ms total_cost_time:390.90538024902344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12836 prompt_cache_len:5151 prompt_cache_ratio:0.40129323776877535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 +DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:31 [batch.py:51] router release req id 8 +INFO 06-24 20:32:31 [manager.py:224] router recive req id 8 cost time 0.1079108715057373 s +INFO 06-24 20:32:31 [manager.py:68] detokenization recv req id 8 cost time 0.11127901077270508 s +DEBUG 06-24 20:32:31 [manager.py:391] Prefill Batch: batch_id=300127389828582603377476130744930342262, time:1750768351.8080432s req_ids:[8] +DEBUG 06-24 20:32:31 [manager.py:391] +ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:206.12192153930664ms total_cost_time:206.16936683654785ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:12837 prompt_cache_len:5151 prompt_cache_ratio:0.40126197709745265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 +DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:31 [batch.py:51] router release req id 8 +INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.11073994636535645 s +INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11298894882202148 s +DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=148757289989635794743963032541845447157, time:1750768352.0207357s req_ids:[8] +DEBUG 06-24 20:32:32 [manager.py:391] +ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:213.6518955230713ms total_cost_time:213.69624137878418ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12838 prompt_cache_len:5151 prompt_cache_ratio:0.40123072129615206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 +DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:32 [batch.py:51] router release req id 8 +INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10794591903686523 s +INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11052513122558594 s +DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=52466047218876425747650416572726382506, time:1750768352.2389243s req_ids:[8] +DEBUG 06-24 20:32:32 [manager.py:391] +ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:210.65068244934082ms total_cost_time:210.6943130493164ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12839 prompt_cache_len:5151 prompt_cache_ratio:0.4011994703637355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 +DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:32 [batch.py:51] router release req id 8 +INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10871601104736328 s +INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11130928993225098 s +DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=179118223445953983735088294833345518804, time:1750768352.4682012s req_ids:[8] +DEBUG 06-24 20:32:32 [manager.py:391] +ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:224.31039810180664ms total_cost_time:224.35402870178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12840 prompt_cache_len:5151 prompt_cache_ratio:0.4011682242990654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 +DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:32 [batch.py:51] router release req id 8 +INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10741686820983887 s +INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.10947537422180176 s +DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=117929470809742580163682387501187836222, time:1750768352.6891825s req_ids:[8] +DEBUG 06-24 20:32:32 [manager.py:391] +ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:212.88490295410156ms total_cost_time:212.92829513549805ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12841 prompt_cache_len:5151 prompt_cache_ratio:0.4011369831010046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 +DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:32 [batch.py:51] router release req id 8 +INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10821056365966797 s +INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11046361923217773 s +DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=305517591725713251219293156373953035491, time:1750768352.9065707s req_ids:[8] +DEBUG 06-24 20:32:32 [manager.py:391] +ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:386.5363597869873ms total_cost_time:386.5811824798584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12842 prompt_cache_len:5151 prompt_cache_ratio:0.40110574676841615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 +DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:33 [batch.py:51] router release req id 8 +INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10909628868103027 s +INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.11398792266845703 s +DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=21463708599813296329177016971466419749, time:1750768353.3013568s req_ids:[8] +DEBUG 06-24 20:32:33 [manager.py:391] +ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:214.71357345581055ms total_cost_time:214.75744247436523ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12843 prompt_cache_len:5151 prompt_cache_ratio:0.4010745153001635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 +DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:33 [batch.py:51] router release req id 8 +INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10853862762451172 s +INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.11077260971069336 s +DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=255980957766741912240983380346416192244, time:1750768353.521816s req_ids:[8] +DEBUG 06-24 20:32:33 [manager.py:391] +ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:209.71393585205078ms total_cost_time:209.75852012634277ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12844 prompt_cache_len:5151 prompt_cache_ratio:0.4010432886951106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 +DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:33 [batch.py:51] router release req id 8 +INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10802674293518066 s +INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.1127021312713623 s +DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=230539606338605201505586067668847838407, time:1750768353.7386837s req_ids:[8] +DEBUG 06-24 20:32:33 [manager.py:391] +ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:214.45488929748535ms total_cost_time:214.49637413024902ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12845 prompt_cache_len:5151 prompt_cache_ratio:0.40101206695212144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 +DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:33 [batch.py:51] router release req id 8 +INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10855627059936523 s +INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.11071419715881348 s +DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=297686217749322132140258850763062125910, time:1750768353.9591126s req_ids:[8] +DEBUG 06-24 20:32:33 [manager.py:391] +ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:215.41118621826172ms total_cost_time:215.4562473297119ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12846 prompt_cache_len:5151 prompt_cache_ratio:0.40098085007006073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 +DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:34 [batch.py:51] router release req id 8 +INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10817337036132812 s +INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.11029601097106934 s +DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=73071458184082456371998146513213245308, time:1750768354.180707s req_ids:[8] +DEBUG 06-24 20:32:34 [manager.py:391] +ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:206.48765563964844ms total_cost_time:206.53223991394043ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12847 prompt_cache_len:5151 prompt_cache_ratio:0.40094963804779327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 +DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:34 [batch.py:51] router release req id 8 +INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10749936103820801 s +INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.10975885391235352 s +DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=322238633501542235499050005604107635690, time:1750768354.412689s req_ids:[8] +DEBUG 06-24 20:32:34 [manager.py:391] +ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:231.28461837768555ms total_cost_time:231.33158683776855ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12848 prompt_cache_len:5151 prompt_cache_ratio:0.40091843088418433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 +DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:34 [batch.py:51] router release req id 8 +INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10832643508911133 s +INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.1104285717010498 s +DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=66707610567167297021214850297496139154, time:1750768354.632361s req_ids:[8] +DEBUG 06-24 20:32:34 [manager.py:391] +ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:213.12880516052246ms total_cost_time:213.17410469055176ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12849 prompt_cache_len:5151 prompt_cache_ratio:0.40088722857809944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 +DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:34 [batch.py:51] router release req id 8 +INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10971808433532715 s +INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.11165761947631836 s +DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=172344340825777313959862988425652517718, time:1750768354.866721s req_ids:[8] +DEBUG 06-24 20:32:34 [manager.py:391] +ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:408.0221652984619ms total_cost_time:408.0672264099121ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12850 prompt_cache_len:5151 prompt_cache_ratio:0.4008560311284047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 +DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:35 [batch.py:51] router release req id 8 +INFO 06-24 20:32:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.10753345489501953 s +INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.10903596878051758 s +DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=24114512684198741507322899376733584826, time:1750768355.2674847s req_ids:[8] +DEBUG 06-24 20:32:35 [manager.py:391] +ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:218.45555305480957ms total_cost_time:218.51134300231934ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:12851 prompt_cache_len:5151 prompt_cache_ratio:0.4008248385339662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 +DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:35 [batch.py:51] router release req id 8 +INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.1091165542602539 s +INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.1100466251373291 s +DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=20951157011009264319585329498179760093, time:1750768355.4906127s req_ids:[8] +DEBUG 06-24 20:32:35 [manager.py:391] +ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:206.68959617614746ms total_cost_time:206.73346519470215ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12852 prompt_cache_len:5151 prompt_cache_ratio:0.4007936507936508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 +DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:35 [batch.py:51] router release req id 8 +INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.10787820816040039 s +INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.10926938056945801 s +DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=281688663327349178006618024835234508891, time:1750768355.703515s req_ids:[8] +DEBUG 06-24 20:32:35 [manager.py:391] +ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:208.27817916870117ms total_cost_time:208.32085609436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12853 prompt_cache_len:5151 prompt_cache_ratio:0.40076246790632536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 +DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:35 [batch.py:51] router release req id 8 +INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.10770440101623535 s +INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.10861778259277344 s +DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=140180503040895202675983827454183508533, time:1750768355.918948s req_ids:[8] +DEBUG 06-24 20:32:35 [manager.py:391] +DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:212.14795112609863ms total_cost_time:212.1899127960205ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12854 prompt_cache_len:5151 prompt_cache_ratio:0.4007312898708573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 +DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:36 [batch.py:51] router release req id 8 +INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.1090993881225586 s +INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.11048603057861328 s +DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=132364740035108421660585809094607396762, time:1750768356.137186s req_ids:[8] +DEBUG 06-24 20:32:36 [manager.py:391] +ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:213.1035327911377ms total_cost_time:213.1481170654297ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12855 prompt_cache_len:5151 prompt_cache_ratio:0.4007001166861143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 +DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:36 [batch.py:51] router release req id 8 +INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.10780978202819824 s +INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.10890650749206543 s +DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=313254959861912950312924639355461554495, time:1750768356.3551052s req_ids:[8] +DEBUG 06-24 20:32:36 [manager.py:391] +ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:207.08274841308594ms total_cost_time:207.12685585021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12856 prompt_cache_len:5151 prompt_cache_ratio:0.4006689483509645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 +DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:36 [batch.py:51] router release req id 8 +INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.10749077796936035 s +INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.10964822769165039 s +DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=201170592212575184104236318754027238756, time:1750768356.579609s req_ids:[8] +DEBUG 06-24 20:32:36 [manager.py:391] +ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:387.5553607940674ms total_cost_time:387.59875297546387ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12857 prompt_cache_len:5151 prompt_cache_ratio:0.40063778486427626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 +DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:36 [batch.py:51] router release req id 8 +INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.1076345443725586 s +DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=267587396332165296556366436930486428690, time:1750768356.9590855s req_ids:[8] +DEBUG 06-24 20:32:36 [manager.py:391] +INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.10959696769714355 s +ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:184.27681922912598ms total_cost_time:184.32164192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12858 prompt_cache_len:5151 prompt_cache_ratio:0.4006066262249183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 +DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:37 [batch.py:51] router release req id 8 +INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10768437385559082 s +INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11065435409545898 s +DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=317361708165447729237624378287546029893, time:1750768357.1546652s req_ids:[8] +DEBUG 06-24 20:32:37 [manager.py:391] +DEBUG 06-24 20:32:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 53285.280 tokens/s +DEBUG 06-24 20:32:37 [stats.py:37] Avg prompt tokens throughput: 53277.079 tokens/s +DEBUG 06-24 20:32:37 [stats.py:37] Avg generate tokens throughput: 8.201 tokens/s +ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:203.06015014648438ms total_cost_time:203.10401916503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12859 prompt_cache_len:5151 prompt_cache_ratio:0.40057547243175984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 +DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:37 [batch.py:51] router release req id 8 +INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10829830169677734 s +INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11042118072509766 s +DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=221135693849931653459307928210180347072, time:1750768357.3669522s req_ids:[8] +DEBUG 06-24 20:32:37 [manager.py:391] +ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:212.2952938079834ms total_cost_time:212.33892440795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12860 prompt_cache_len:5151 prompt_cache_ratio:0.4005443234836703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 +DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:37 [batch.py:51] router release req id 8 +INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10875916481018066 s +INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s +DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=84627510649399654288109116419923012571, time:1750768357.5851233s req_ids:[8] +DEBUG 06-24 20:32:37 [manager.py:391] +ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:216.0179615020752ms total_cost_time:216.06087684631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12861 prompt_cache_len:5151 prompt_cache_ratio:0.40051317937951947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 +DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:37 [batch.py:51] router release req id 8 +INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10952162742614746 s +INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11201071739196777 s +DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=193739161753955824429109779655684097894, time:1750768357.8063345s req_ids:[8] +DEBUG 06-24 20:32:37 [manager.py:391] +ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:214.82563018798828ms total_cost_time:214.84637260437012ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12862 prompt_cache_len:5151 prompt_cache_ratio:0.40048204011817756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 +DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:37 [batch.py:51] router release req id 8 +INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.1060488224029541 s +INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.10817885398864746 s +DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=294507474154773209163826349722347690323, time:1750768358.0260487s req_ids:[8] +DEBUG 06-24 20:32:38 [manager.py:391] +ERROR 06-24 20:32:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:211.86518669128418ms total_cost_time:211.92026138305664ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:12863 prompt_cache_len:5151 prompt_cache_ratio:0.4004509056985151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 +DEBUG 06-24 20:32:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:38 [batch.py:51] router release req id 8 +INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.31224679946899414 s +INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.31427979469299316 s +DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=69663845363994372534660450683127471677, time:1750768358.453449s req_ids:[8] +DEBUG 06-24 20:32:38 [manager.py:391] +ERROR 06-24 20:32:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 first_token_cost:424.7438907623291ms total_cost_time:424.7884750366211ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12864 prompt_cache_len:5151 prompt_cache_ratio:0.400419776119403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 +DEBUG 06-24 20:32:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:38 [batch.py:51] router release req id 8 +INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.1078343391418457 s +INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.10979986190795898 s +DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=50468090068802323435457323556832460496, time:1750768358.67636s req_ids:[8] +DEBUG 06-24 20:32:38 [manager.py:391] +ERROR 06-24 20:32:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 first_token_cost:240.97490310668945ms total_cost_time:241.01758003234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12865 prompt_cache_len:5151 prompt_cache_ratio:0.4003886513797124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 +DEBUG 06-24 20:32:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:38 [batch.py:51] router release req id 8 +INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.10743904113769531 s +INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.10946226119995117 s +DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=112603319654759421008126229734947131899, time:1750768358.9207656s req_ids:[8] +DEBUG 06-24 20:32:38 [manager.py:391] +ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 first_token_cost:214.74933624267578ms total_cost_time:214.79392051696777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12866 prompt_cache_len:5151 prompt_cache_ratio:0.40035753147831493 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 +DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:39 [batch.py:51] router release req id 8 +INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10841608047485352 s +INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.11049556732177734 s +DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=136569176332481269141860780318482592512, time:1750768359.1404462s req_ids:[8] +DEBUG 06-24 20:32:39 [manager.py:391] +ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:200.50406455993652ms total_cost_time:200.5460262298584ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12867 prompt_cache_len:5151 prompt_cache_ratio:0.40032641641408256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 +DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:39 [batch.py:51] router release req id 8 +INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10698699951171875 s +INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.1091761589050293 s +DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=220524089975317659450012147859027616597, time:1750768359.3482542s req_ids:[8] +DEBUG 06-24 20:32:39 [manager.py:391] +ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:206.6338062286377ms total_cost_time:206.6800594329834ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12868 prompt_cache_len:5151 prompt_cache_ratio:0.4002953061858875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 +DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:39 [batch.py:51] router release req id 8 +INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10837817192077637 s +INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.11041498184204102 s +DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=250980048724135682981607437037259836120, time:1750768359.559812s req_ids:[8] +DEBUG 06-24 20:32:39 [manager.py:391] +ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:214.1861915588379ms total_cost_time:214.2322063446045ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12869 prompt_cache_len:5151 prompt_cache_ratio:0.4002642007926024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 +DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:39 [batch.py:51] router release req id 8 +INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s +INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.11068415641784668 s +DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=113210083329043235087213525697258063742, time:1750768359.7780104s req_ids:[8] +DEBUG 06-24 20:32:39 [manager.py:391] +ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:380.8109760284424ms total_cost_time:380.85484504699707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12870 prompt_cache_len:5151 prompt_cache_ratio:0.4002331002331002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 +DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:40 [batch.py:51] router release req id 8 +INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10962200164794922 s +INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.11172628402709961 s +DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=181123670803528098680390489926329309777, time:1750768360.166057s req_ids:[8] +DEBUG 06-24 20:32:40 [manager.py:391] +ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:218.46938133239746ms total_cost_time:218.51372718811035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12871 prompt_cache_len:5151 prompt_cache_ratio:0.40020200450625437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 +DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:40 [batch.py:51] router release req id 8 +INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10950589179992676 s +INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.11142826080322266 s +DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=198681761813491091823117293309031244914, time:1750768360.3897166s req_ids:[8] +DEBUG 06-24 20:32:40 [manager.py:391] +ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:211.43293380737305ms total_cost_time:211.4572525024414ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12872 prompt_cache_len:5151 prompt_cache_ratio:0.4001709136109385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 +DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:40 [batch.py:51] router release req id 8 +INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10689616203308105 s +INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.10873675346374512 s +DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=283541222669653329069910228622371455787, time:1750768360.6124766s req_ids:[8] +DEBUG 06-24 20:32:40 [manager.py:391] +ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:220.91364860534668ms total_cost_time:220.95870971679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12873 prompt_cache_len:5151 prompt_cache_ratio:0.40013982754602656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 +DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:40 [batch.py:51] router release req id 8 +INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10841035842895508 s +INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.11122560501098633 s +DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=145071811298234991619129478083565720384, time:1750768360.8372235s req_ids:[8] +DEBUG 06-24 20:32:40 [manager.py:391] +ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:209.9156379699707ms total_cost_time:209.9611759185791ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12874 prompt_cache_len:5151 prompt_cache_ratio:0.40010874631039306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 +DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:40 [batch.py:51] router release req id 8 +INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.10780477523803711 s +INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.11000657081604004 s +DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=321941240107032266423440392027399899711, time:1750768361.0525863s req_ids:[8] +DEBUG 06-24 20:32:41 [manager.py:391] +ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:210.8466625213623ms total_cost_time:210.89744567871094ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12875 prompt_cache_len:5151 prompt_cache_ratio:0.4000776699029126 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 +DEBUG 06-24 20:32:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:41 [batch.py:51] router release req id 8 +INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.3084535598754883 s +INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.310380220413208 s +DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=298019330997989248811193889006940246576, time:1750768361.4823272s req_ids:[8] +DEBUG 06-24 20:32:41 [manager.py:391] +ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 first_token_cost:423.27046394348145ms total_cost_time:423.31480979919434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12876 prompt_cache_len:5151 prompt_cache_ratio:0.40004659832246037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 +DEBUG 06-24 20:32:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:41 [batch.py:51] router release req id 8 +INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.10851502418518066 s +INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.11046171188354492 s +DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=331478753374010836008816302858913734966, time:1750768361.698183s req_ids:[8] +DEBUG 06-24 20:32:41 [manager.py:391] +ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 first_token_cost:206.9108486175537ms total_cost_time:206.9559097290039ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12877 prompt_cache_len:5151 prompt_cache_ratio:0.4000155315679118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 +DEBUG 06-24 20:32:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:41 [batch.py:51] router release req id 8 +INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.10992717742919922 s +INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.1118319034576416 s +DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=102542784665974863699446702836921896744, time:1750768361.9118366s req_ids:[8] +DEBUG 06-24 20:32:41 [manager.py:391] +ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 first_token_cost:201.7052173614502ms total_cost_time:201.74884796142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12878 prompt_cache_len:5151 prompt_cache_ratio:0.39998446963814255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 +DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:42 [batch.py:51] router release req id 8 +INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.10749602317810059 s +INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.10966920852661133 s +DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=282410450932448245320614620816078469630, time:1750768362.1199281s req_ids:[8] +DEBUG 06-24 20:32:42 [manager.py:391] +ERROR 06-24 20:32:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:213.4227752685547ms total_cost_time:213.46592903137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12879 prompt_cache_len:5151 prompt_cache_ratio:0.3999534125320289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 +DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:42 [batch.py:51] router release req id 8 +INFO 06-24 20:32:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.10906648635864258 s +INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.11111640930175781 s +DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=199036199411911209867236474976652322521, time:1750768362.341671s req_ids:[8] +DEBUG 06-24 20:32:42 [manager.py:391] +ERROR 06-24 20:32:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:214.71428871154785ms total_cost_time:214.75672721862793ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12880 prompt_cache_len:5151 prompt_cache_ratio:0.3999223602484472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 +DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:42 [batch.py:51] router release req id 8 +INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.10857582092285156 s +INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.1105337142944336 s +DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=275838052459313704283131316678155164157, time:1750768362.5621865s req_ids:[8] +DEBUG 06-24 20:32:42 [manager.py:391] +ERROR 06-24 20:32:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:382.5969696044922ms total_cost_time:382.6415538787842ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12881 prompt_cache_len:5151 prompt_cache_ratio:0.3998913127862744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 +DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:42 [batch.py:51] router release req id 8 +INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.1079108715057373 s +INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.10999608039855957 s +DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=22256443322693418432066122735929835393, time:1750768362.9515493s req_ids:[8] +DEBUG 06-24 20:32:42 [manager.py:391] +ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:211.7021083831787ms total_cost_time:211.7481231689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12882 prompt_cache_len:5151 prompt_cache_ratio:0.3998602701443875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 +DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:43 [batch.py:51] router release req id 8 +INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.10921835899353027 s +INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.11117172241210938 s +DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=230719554820704628226513869340634270556, time:1750768363.1698744s req_ids:[8] +DEBUG 06-24 20:32:43 [manager.py:391] +ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:210.05868911743164ms total_cost_time:210.07966995239258ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12883 prompt_cache_len:5151 prompt_cache_ratio:0.39982923232166423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 +DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:43 [batch.py:51] router release req id 8 +INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.10666298866271973 s +INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.10866117477416992 s +DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=90847437594544539414446558071070232989, time:1750768363.3871963s req_ids:[8] +DEBUG 06-24 20:32:43 [manager.py:391] +ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:210.8767032623291ms total_cost_time:210.921049118042ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12884 prompt_cache_len:5151 prompt_cache_ratio:0.3997981993169823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 +DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:43 [batch.py:51] router release req id 8 +INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.1105499267578125 s +INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.11253046989440918 s +DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=32467274945931578438518827573826912993, time:1750768363.6036298s req_ids:[8] +DEBUG 06-24 20:32:43 [manager.py:391] +DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:215.55471420288086ms total_cost_time:215.59834480285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12885 prompt_cache_len:5151 prompt_cache_ratio:0.39976717112922 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 +DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:43 [batch.py:51] router release req id 8 +INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.10849380493164062 s +INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.11041927337646484 s +DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=308356996812741203143846913955190949688, time:1750768363.823867s req_ids:[8] +DEBUG 06-24 20:32:43 [manager.py:391] +ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:211.81368827819824ms total_cost_time:211.85803413391113ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12886 prompt_cache_len:5151 prompt_cache_ratio:0.3997361477572559 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 +DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:43 [batch.py:51] router release req id 8 +INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.10862445831298828 s +INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11229467391967773 s +DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=276163978058958580826991167271763283608, time:1750768364.0441546s req_ids:[8] +DEBUG 06-24 20:32:44 [manager.py:391] +ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:394.36936378479004ms total_cost_time:394.41370964050293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12887 prompt_cache_len:5151 prompt_cache_ratio:0.399705129199969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 +DEBUG 06-24 20:32:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:44 [batch.py:51] router release req id 8 +INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.10909414291381836 s +INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11101150512695312 s +DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=286248189132336773998456302015094183435, time:1750768364.4447024s req_ids:[8] +DEBUG 06-24 20:32:44 [manager.py:391] +ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:208.32109451293945ms total_cost_time:208.36734771728516ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12888 prompt_cache_len:5151 prompt_cache_ratio:0.3996741154562384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 +DEBUG 06-24 20:32:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:44 [batch.py:51] router release req id 8 +INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.108062744140625 s +INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11009478569030762 s +DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=120608706490100011489302634007617461074, time:1750768364.6587808s req_ids:[8] +DEBUG 06-24 20:32:44 [manager.py:391] +ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:208.88185501098633ms total_cost_time:208.92763137817383ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12889 prompt_cache_len:5151 prompt_cache_ratio:0.3996431065249437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 +DEBUG 06-24 20:32:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:44 [batch.py:51] router release req id 8 +INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.10848712921142578 s +INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11197185516357422 s +DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=174368984048093765196484983066368814397, time:1750768364.873221s req_ids:[8] +DEBUG 06-24 20:32:44 [manager.py:391] +ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:211.7295265197754ms total_cost_time:211.7753028869629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12890 prompt_cache_len:5151 prompt_cache_ratio:0.3996121024049651 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 +DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:45 [batch.py:51] router release req id 8 +INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10772919654846191 s +INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.10962152481079102 s +DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=90987343539172249072712881182909984932, time:1750768365.0935338s req_ids:[8] +DEBUG 06-24 20:32:45 [manager.py:391] +ERROR 06-24 20:32:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:206.87270164489746ms total_cost_time:206.91609382629395ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12891 prompt_cache_len:5151 prompt_cache_ratio:0.3995811030951827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 +DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:45 [batch.py:51] router release req id 8 +INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10809063911437988 s +INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.11018228530883789 s +DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=65195424068177244853555115154158192858, time:1750768365.3068626s req_ids:[8] +DEBUG 06-24 20:32:45 [manager.py:391] +ERROR 06-24 20:32:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 first_token_cost:213.34147453308105ms total_cost_time:213.38820457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12892 prompt_cache_len:5151 prompt_cache_ratio:0.3995501085944772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 +DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:45 [batch.py:51] router release req id 8 +INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10866522789001465 s +INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.11052775382995605 s +DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=305497047182717265234489123309083668228, time:1750768365.526419s req_ids:[8] +DEBUG 06-24 20:32:45 [manager.py:391] +ERROR 06-24 20:32:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 first_token_cost:207.24081993103027ms total_cost_time:207.28516578674316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12893 prompt_cache_len:5151 prompt_cache_ratio:0.39951911890172964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 +DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:45 [batch.py:51] router release req id 8 +INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10850882530212402 s +INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.11052131652832031 s +DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=4336780487474613396020340644803398432, time:1750768365.7402627s req_ids:[8] +DEBUG 06-24 20:32:45 [manager.py:391] +ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 first_token_cost:395.54548263549805ms total_cost_time:395.59197425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12894 prompt_cache_len:5151 prompt_cache_ratio:0.39948813401582134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 +DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:46 [batch.py:51] router release req id 8 +INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10890579223632812 s +INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.11083579063415527 s +DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=318286494635991376652187361591447375189, time:1750768366.1429873s req_ids:[8] +DEBUG 06-24 20:32:46 [manager.py:391] +ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:207.26346969604492ms total_cost_time:207.30853080749512ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12895 prompt_cache_len:5151 prompt_cache_ratio:0.39945715393563397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 +DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:46 [batch.py:51] router release req id 8 +INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10693144798278809 s +INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.10884857177734375 s +DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=102168059512969490423572861380632814920, time:1750768366.3561654s req_ids:[8] +DEBUG 06-24 20:32:46 [manager.py:391] +ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:214.6005630493164ms total_cost_time:214.644193649292ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12896 prompt_cache_len:5151 prompt_cache_ratio:0.39942617866004965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 +DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:46 [batch.py:51] router release req id 8 +INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10921716690063477 s +DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=183830403990151759732814451883982339065, time:1750768366.5776844s req_ids:[8] +DEBUG 06-24 20:32:46 [manager.py:391] +INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.11645865440368652 s +ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:209.36131477355957ms total_cost_time:209.40709114074707ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12897 prompt_cache_len:5151 prompt_cache_ratio:0.3993952081879507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 +DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:46 [batch.py:51] router release req id 8 +INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10777068138122559 s +INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.10967278480529785 s +DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=270771977134166513888438164480173409807, time:1750768366.7956276s req_ids:[8] +DEBUG 06-24 20:32:46 [manager.py:391] +ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:209.1071605682373ms total_cost_time:209.1515064239502ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12898 prompt_cache_len:5151 prompt_cache_ratio:0.3993642425182199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 +DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:46 [batch.py:51] router release req id 8 +INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s +INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.10983896255493164 s +DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=107155677279536174075203215241895240480, time:1750768367.010928s req_ids:[8] +DEBUG 06-24 20:32:47 [manager.py:391] +INFO 06-24 20:32:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:212.7225399017334ms total_cost_time:212.7671241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12899 prompt_cache_len:5151 prompt_cache_ratio:0.3993332816497403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 +DEBUG 06-24 20:32:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:47 [batch.py:51] router release req id 8 +INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.3092012405395508 s +INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.3113434314727783 s +DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=4449735080615531952315116875151216084, time:1750768367.4375908s req_ids:[8] +DEBUG 06-24 20:32:47 [manager.py:391] +DEBUG 06-24 20:32:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 51362.115 tokens/s +DEBUG 06-24 20:32:47 [stats.py:37] Avg prompt tokens throughput: 51354.140 tokens/s +DEBUG 06-24 20:32:47 [stats.py:37] Avg generate tokens throughput: 7.974 tokens/s +ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:427.80590057373047ms total_cost_time:427.8266429901123ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12900 prompt_cache_len:5151 prompt_cache_ratio:0.39930232558139533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 +DEBUG 06-24 20:32:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:47 [batch.py:51] router release req id 8 +INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.1089012622833252 s +INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.11086106300354004 s +DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=278809233310314894717978639873289973224, time:1750768367.6668477s req_ids:[8] +DEBUG 06-24 20:32:47 [manager.py:391] +ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:216.80474281311035ms total_cost_time:216.84861183166504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12901 prompt_cache_len:5151 prompt_cache_ratio:0.3992713743120688 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 +DEBUG 06-24 20:32:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:47 [batch.py:51] router release req id 8 +INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.10848021507263184 s +INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049652099609375 s +DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=255022828655958176521772538921390937047, time:1750768367.8861938s req_ids:[8] +DEBUG 06-24 20:32:47 [manager.py:391] +ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:211.38501167297363ms total_cost_time:211.42840385437012ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12902 prompt_cache_len:5151 prompt_cache_ratio:0.39924042784064484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 +DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:48 [batch.py:51] router release req id 8 +INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.1076362133026123 s +INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.10966610908508301 s +DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=138622804971475858270537276851374882567, time:1750768368.106162s req_ids:[8] +DEBUG 06-24 20:32:48 [manager.py:391] +ERROR 06-24 20:32:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:214.7347927093506ms total_cost_time:214.77842330932617ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12903 prompt_cache_len:5151 prompt_cache_ratio:0.39920948616600793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 +DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:48 [batch.py:51] router release req id 8 +INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.10920238494873047 s +INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.11158370971679688 s +DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=265442365563723208441884661850743257614, time:1750768368.3259933s req_ids:[8] +DEBUG 06-24 20:32:48 [manager.py:391] +ERROR 06-24 20:32:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 first_token_cost:212.66651153564453ms total_cost_time:212.71061897277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12904 prompt_cache_len:5151 prompt_cache_ratio:0.39917854928704277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 +DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:48 [batch.py:51] router release req id 8 +INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.1089177131652832 s +INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.11079788208007812 s +DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=88527502631751861010253793164199997731, time:1750768368.5460155s req_ids:[8] +DEBUG 06-24 20:32:48 [manager.py:391] +ERROR 06-24 20:32:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 first_token_cost:210.20126342773438ms total_cost_time:210.26039123535156ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:12905 prompt_cache_len:5151 prompt_cache_ratio:0.39914761720263464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 +DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:48 [batch.py:51] router release req id 8 +INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.10890316963195801 s +INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.11097335815429688 s +DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=151918695996829108820912358381424799642, time:1750768368.765368s req_ids:[8] +DEBUG 06-24 20:32:48 [manager.py:391] +ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 first_token_cost:376.9240379333496ms total_cost_time:376.9686222076416ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12906 prompt_cache_len:5151 prompt_cache_ratio:0.399116689911669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 +DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:49 [batch.py:51] router release req id 8 +INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.10538077354431152 s +INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.10674118995666504 s +DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=240501756469114548608930196231858067196, time:1750768369.1495357s req_ids:[8] +DEBUG 06-24 20:32:49 [manager.py:391] +ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:208.25934410095215ms total_cost_time:208.30249786376953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12907 prompt_cache_len:5151 prompt_cache_ratio:0.3990857674130317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 +DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:49 [batch.py:51] router release req id 8 +INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.10882854461669922 s +INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s +DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=289259091463584744480427569167039779462, time:1750768369.3635895s req_ids:[8] +DEBUG 06-24 20:32:49 [manager.py:391] +ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:212.26787567138672ms total_cost_time:212.3126983642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12908 prompt_cache_len:5151 prompt_cache_ratio:0.39905484970560895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 +DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:49 [batch.py:51] router release req id 8 +INFO 06-24 20:32:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.10879850387573242 s +INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.11063718795776367 s +DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=191263003833809082584938256112185884779, time:1750768369.581702s req_ids:[8] +DEBUG 06-24 20:32:49 [manager.py:391] +ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:208.62245559692383ms total_cost_time:208.66632461547852ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12909 prompt_cache_len:5151 prompt_cache_ratio:0.39902393678828724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 +DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:49 [batch.py:51] router release req id 8 +INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.1059110164642334 s +INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.10769486427307129 s +DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=81902795467379460400668537478844898701, time:1750768369.798326s req_ids:[8] +DEBUG 06-24 20:32:49 [manager.py:391] +ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:208.7695598602295ms total_cost_time:208.81390571594238ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12910 prompt_cache_len:5151 prompt_cache_ratio:0.3989930286599535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 +DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:49 [batch.py:51] router release req id 8 +INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.10593724250793457 s +INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.10789942741394043 s +DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=225605605094680520105577733466074518118, time:1750768370.0149605s req_ids:[8] +DEBUG 06-24 20:32:50 [manager.py:391] +ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:210.85476875305176ms total_cost_time:210.8769416809082ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12911 prompt_cache_len:5151 prompt_cache_ratio:0.398962125319495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 +DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:50 [batch.py:51] router release req id 8 +INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.10284590721130371 s +INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.10476994514465332 s +DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=336877155230819017036831715916542790463, time:1750768370.2333694s req_ids:[8] +DEBUG 06-24 20:32:50 [manager.py:391] +ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:388.5455131530762ms total_cost_time:388.5791301727295ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:12912 prompt_cache_len:5151 prompt_cache_ratio:0.39893122676579923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 +DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:50 [batch.py:51] router release req id 8 +INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s +INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.10938620567321777 s +DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=87600417805480212545294058328893829647, time:1750768370.6271718s req_ids:[8] +DEBUG 06-24 20:32:50 [manager.py:391] +ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:214.64014053344727ms total_cost_time:214.6751880645752ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:12913 prompt_cache_len:5151 prompt_cache_ratio:0.3989003329977542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 +DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:50 [batch.py:51] router release req id 8 +INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s +INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.11018824577331543 s +DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=277406554980552126789758474025967473542, time:1750768370.848689s req_ids:[8] +DEBUG 06-24 20:32:50 [manager.py:391] +ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:210.3433609008789ms total_cost_time:210.3874683380127ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12914 prompt_cache_len:5151 prompt_cache_ratio:0.3988694440142481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 +DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:50 [batch.py:51] router release req id 8 +INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10624051094055176 s +INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.10821890830993652 s +DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=295752496235565729837873830849632338953, time:1750768371.0634687s req_ids:[8] +DEBUG 06-24 20:32:51 [manager.py:391] +ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:212.46886253356934ms total_cost_time:212.48745918273926ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:12915 prompt_cache_len:5151 prompt_cache_ratio:0.39883855981416955 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 +DEBUG 06-24 20:32:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:51 [batch.py:51] router release req id 8 +INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10485172271728516 s +INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.10684084892272949 s +DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=139122490742054125094407310767786719853, time:1750768371.2846644s req_ids:[8] +DEBUG 06-24 20:32:51 [manager.py:391] +DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:212.16392517089844ms total_cost_time:212.18490600585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12916 prompt_cache_len:5151 prompt_cache_ratio:0.39880768039640757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 +DEBUG 06-24 20:32:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:51 [batch.py:51] router release req id 8 +INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10355997085571289 s +INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.10542845726013184 s +DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=211980806392362611982077090383202713887, time:1750768371.5021484s req_ids:[8] +DEBUG 06-24 20:32:51 [manager.py:391] +ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:215.34252166748047ms total_cost_time:215.3620719909668ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:12917 prompt_cache_len:5151 prompt_cache_ratio:0.39877680575985136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 +DEBUG 06-24 20:32:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:51 [batch.py:51] router release req id 8 +INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10410189628601074 s +INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.1060483455657959 s +DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=250792530020623909775314750896499629115, time:1750768371.7227063s req_ids:[8] +DEBUG 06-24 20:32:51 [manager.py:391] +ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:214.48731422424316ms total_cost_time:214.5075798034668ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12918 prompt_cache_len:5151 prompt_cache_ratio:0.39874593590339064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 +DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:52 [batch.py:51] router release req id 8 +INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.3075847625732422 s +INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.30959534645080566 s +DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=334869094121541617796824846594791666891, time:1750768372.148581s req_ids:[8] +DEBUG 06-24 20:32:52 [manager.py:391] +ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:427.1965026855469ms total_cost_time:427.2170066833496ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12919 prompt_cache_len:5151 prompt_cache_ratio:0.39871507082591534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 +DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:52 [batch.py:51] router release req id 8 +INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.1054689884185791 s +INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10736441612243652 s +DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=111085422146742684600645107663045020726, time:1750768372.3748178s req_ids:[8] +DEBUG 06-24 20:32:52 [manager.py:391] +ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:173.3572483062744ms total_cost_time:173.37799072265625ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12920 prompt_cache_len:5151 prompt_cache_ratio:0.3986842105263158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 +DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:52 [batch.py:51] router release req id 8 +INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.10698652267456055 s +INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10899686813354492 s +DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=152295261477633392570277294100556548031, time:1750768372.5535424s req_ids:[8] +DEBUG 06-24 20:32:52 [manager.py:391] +ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:173.3717918395996ms total_cost_time:173.39181900024414ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:12921 prompt_cache_len:5151 prompt_cache_ratio:0.3986533550034827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 +DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:52 [batch.py:51] router release req id 8 +INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.10595130920410156 s +INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10768413543701172 s +DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=253917499531028528429339765760337125781, time:1750768372.7335112s req_ids:[8] +DEBUG 06-24 20:32:52 [manager.py:391] +DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:174.57032203674316ms total_cost_time:174.5917797088623ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:12922 prompt_cache_len:5151 prompt_cache_ratio:0.3986225042563071 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 +DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:52 [batch.py:51] router release req id 8 +INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.10709667205810547 s +INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10889983177185059 s +DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=209494378238083153793980436523385802040, time:1750768372.9131565s req_ids:[8] +DEBUG 06-24 20:32:52 [manager.py:391] +ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:206.57896995544434ms total_cost_time:206.60066604614258ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12923 prompt_cache_len:5151 prompt_cache_ratio:0.39859165828368026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 +DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:53 [batch.py:51] router release req id 8 +INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10350728034973145 s +INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.1045689582824707 s +DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=167142446393170371914534712628249459505, time:1750768373.125418s req_ids:[8] +DEBUG 06-24 20:32:53 [manager.py:391] +INFO 06-24 20:32:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:53 [statics_utils.py:24] mean first cost: 230.10195452533532 ms +INFO 06-24 20:32:53 [statics_utils.py:24] mean per token cost: 0.06051902061554882 ms +INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:210.7412815093994ms total_cost_time:210.76107025146484ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12924 prompt_cache_len:5151 prompt_cache_ratio:0.398560817084494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 +DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:53 [batch.py:51] router release req id 8 +INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10338568687438965 s +INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.10541033744812012 s +INFO 06-24 20:32:53 [manager.py:620] left req id 8can release False refcount 3 +DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=232313256038154860684353765798282930665, time:1750768373.3404565s req_ids:[8] +DEBUG 06-24 20:32:53 [manager.py:391] +ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:379.55498695373535ms total_cost_time:379.5773983001709ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:12925 prompt_cache_len:5151 prompt_cache_ratio:0.3985299806576402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 +DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:53 [batch.py:51] router release req id 8 +INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10870623588562012 s +INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.11070680618286133 s +DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=102023534388167542808838116809032689536, time:1750768373.7236493s req_ids:[8] +DEBUG 06-24 20:32:53 [manager.py:391] +ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:210.65926551818848ms total_cost_time:210.70384979248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12926 prompt_cache_len:5151 prompt_cache_ratio:0.39849914900201144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 +DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:53 [batch.py:51] router release req id 8 +INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10799312591552734 s +INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.1099390983581543 s +DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=198231021556372056825551028792748324331, time:1750768373.9406655s req_ids:[8] +DEBUG 06-24 20:32:53 [manager.py:391] +ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:209.95092391967773ms total_cost_time:209.99717712402344ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12927 prompt_cache_len:5151 prompt_cache_ratio:0.39846832211650035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 +DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:54 [batch.py:51] router release req id 8 +INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.1080012321472168 s +INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.1099543571472168 s +DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=252959330953307922622628793593577537586, time:1750768374.1579487s req_ids:[8] +DEBUG 06-24 20:32:54 [manager.py:391] +ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:206.2668800354004ms total_cost_time:206.3119411468506ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12928 prompt_cache_len:5151 prompt_cache_ratio:0.3984375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 +DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:54 [batch.py:51] router release req id 8 +INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.10745596885681152 s +INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.10957574844360352 s +DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=215385111894681779173354941527320968885, time:1750768374.3754847s req_ids:[8] +DEBUG 06-24 20:32:54 [manager.py:391] +ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:216.2466049194336ms total_cost_time:216.3090705871582ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12929 prompt_cache_len:5151 prompt_cache_ratio:0.39840668265140383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 +DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:54 [batch.py:51] router release req id 8 +INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.10756969451904297 s +INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.10943865776062012 s +DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=137213551232169556753730797810202847305, time:1750768374.6011128s req_ids:[8] +DEBUG 06-24 20:32:54 [manager.py:391] +ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:220.14379501342773ms total_cost_time:220.18885612487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12930 prompt_cache_len:5151 prompt_cache_ratio:0.39837587006960556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 +DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:54 [batch.py:51] router release req id 8 +INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.10790014266967773 s +INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.10978245735168457 s +DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=131829167908095401455511671287105035918, time:1750768374.8198059s req_ids:[8] +DEBUG 06-24 20:32:54 [manager.py:391] +ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:385.7686519622803ms total_cost_time:385.831356048584ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:12931 prompt_cache_len:5151 prompt_cache_ratio:0.39834506225349936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 +DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:55 [batch.py:51] router release req id 8 +INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s +INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.1098628044128418 s +DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=34983927202219311048215835846564490579, time:1750768375.2119613s req_ids:[8] +DEBUG 06-24 20:32:55 [manager.py:391] +ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:211.66467666625977ms total_cost_time:211.72595024108887ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12932 prompt_cache_len:5151 prompt_cache_ratio:0.3983142592019796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 +DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:55 [batch.py:51] router release req id 8 +INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10688376426696777 s +INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.10880279541015625 s +DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=266107985150428319908529106817245812229, time:1750768375.4302175s req_ids:[8] +DEBUG 06-24 20:32:55 [manager.py:391] +ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:209.76519584655762ms total_cost_time:209.81097221374512ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12933 prompt_cache_len:5151 prompt_cache_ratio:0.39828346091394107 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 +DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:55 [batch.py:51] router release req id 8 +INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10880517959594727 s +INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.11077642440795898 s +DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=143878123286737151102680501531156791307, time:1750768375.6459522s req_ids:[8] +DEBUG 06-24 20:32:55 [manager.py:391] +ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:212.00275421142578ms total_cost_time:212.0652198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12934 prompt_cache_len:5151 prompt_cache_ratio:0.39825266738827897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 +DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:55 [batch.py:51] router release req id 8 +INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10914421081542969 s +INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.11117887496948242 s +DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=137753880152086523471022614387461806796, time:1750768375.865206s req_ids:[8] +DEBUG 06-24 20:32:55 [manager.py:391] +ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:212.6917839050293ms total_cost_time:212.73565292358398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12935 prompt_cache_len:5151 prompt_cache_ratio:0.3982218786238887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 +DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:55 [batch.py:51] router release req id 8 +INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.1081085205078125 s +INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.11012768745422363 s +DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=244844937907978017677062739196223549863, time:1750768376.083391s req_ids:[8] +DEBUG 06-24 20:32:56 [manager.py:391] +ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:210.02650260925293ms total_cost_time:210.07966995239258ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12936 prompt_cache_len:5151 prompt_cache_ratio:0.39819109461966606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 +DEBUG 06-24 20:32:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:56 [batch.py:51] router release req id 8 +INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.1087191104888916 s +INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.1106417179107666 s +DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=182307547716615467233687530380405350048, time:1750768376.2997656s req_ids:[8] +DEBUG 06-24 20:32:56 [manager.py:391] +ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 first_token_cost:385.71834564208984ms total_cost_time:385.7598304748535ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12937 prompt_cache_len:5151 prompt_cache_ratio:0.39816031537450725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 +DEBUG 06-24 20:32:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:56 [batch.py:51] router release req id 8 +INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.10820245742797852 s +INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.11004137992858887 s +DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=202315475991065934492193741994576217806, time:1750768376.6929696s req_ids:[8] +DEBUG 06-24 20:32:56 [manager.py:391] +ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 first_token_cost:206.3913345336914ms total_cost_time:206.4356803894043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12938 prompt_cache_len:5151 prompt_cache_ratio:0.39812954088730873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 +DEBUG 06-24 20:32:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:56 [batch.py:51] router release req id 8 +INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.10866022109985352 s +INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.11064338684082031 s +DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=10972275312272804779166538342415308415, time:1750768376.9053369s req_ids:[8] +DEBUG 06-24 20:32:56 [manager.py:391] +ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 first_token_cost:207.59153366088867ms total_cost_time:207.65304565429688ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:12939 prompt_cache_len:5151 prompt_cache_ratio:0.3980987711569673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 +DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:57 [batch.py:51] router release req id 8 +INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10864067077636719 s +INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.11063742637634277 s +DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=70589526748832140139605609883509320737, time:1750768377.119355s req_ids:[8] +DEBUG 06-24 20:32:57 [manager.py:391] +ERROR 06-24 20:32:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:206.29501342773438ms total_cost_time:206.35390281677246ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:12940 prompt_cache_len:5151 prompt_cache_ratio:0.3980680061823802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 +DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:57 [batch.py:51] router release req id 8 +INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10773444175720215 s +INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.1130063533782959 s +DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=12656200728463244490874199830562743953, time:1750768377.3338144s req_ids:[8] +DEBUG 06-24 20:32:57 [manager.py:391] +ERROR 06-24 20:32:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:214.27655220031738ms total_cost_time:214.38980102539062ms,out_token_counter:1 mean_per_token_cost_time: 0.11324882507324219ms prompt_token_num:12941 prompt_cache_len:5151 prompt_cache_ratio:0.39803724596244494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 +DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:57 [batch.py:51] router release req id 8 +INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10749530792236328 s +INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.10951662063598633 s +DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=91290342568348502067954873437937129737, time:1750768377.550625s req_ids:[8] +DEBUG 06-24 20:32:57 [manager.py:391] +DEBUG 06-24 20:32:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 53671.841 tokens/s +DEBUG 06-24 20:32:57 [stats.py:37] Avg prompt tokens throughput: 53663.535 tokens/s +DEBUG 06-24 20:32:57 [stats.py:37] Avg generate tokens throughput: 8.306 tokens/s +ERROR 06-24 20:32:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:207.47637748718262ms total_cost_time:207.49855041503906ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12942 prompt_cache_len:5151 prompt_cache_ratio:0.39800649049605935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 +DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:57 [batch.py:51] router release req id 8 +INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10503959655761719 s +INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.10695695877075195 s +DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=62880931933828217955427990930578919891, time:1750768377.7679124s req_ids:[8] +DEBUG 06-24 20:32:57 [manager.py:391] +ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:388.3941173553467ms total_cost_time:388.41915130615234ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:12943 prompt_cache_len:5151 prompt_cache_ratio:0.3979757397821216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 +DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:58 [batch.py:51] router release req id 8 +INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10480260848999023 s +INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.10674834251403809 s +DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=315777156355411221314972682652609676588, time:1750768378.1624725s req_ids:[8] +DEBUG 06-24 20:32:58 [manager.py:391] +ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.65545082092285ms total_cost_time:210.68215370178223ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:12944 prompt_cache_len:5151 prompt_cache_ratio:0.3979449938195303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 +DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:58 [batch.py:51] router release req id 8 +INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10645747184753418 s +INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.10840487480163574 s +DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=61429348597905880185834841302515845396, time:1750768378.3758924s req_ids:[8] +DEBUG 06-24 20:32:58 [manager.py:391] +ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:207.7200412750244ms total_cost_time:207.77034759521484ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:12945 prompt_cache_len:5151 prompt_cache_ratio:0.3979142526071842 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 +DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:58 [batch.py:51] router release req id 8 +INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10840916633605957 s +INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.11051416397094727 s +DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=185715099761161567009697246668351167183, time:1750768378.589347s req_ids:[8] +DEBUG 06-24 20:32:58 [manager.py:391] +ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.88767051696777ms total_cost_time:210.93201637268066ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12946 prompt_cache_len:5151 prompt_cache_ratio:0.3978835161439827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 +DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:58 [batch.py:51] router release req id 8 +INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10898160934448242 s +INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.11129450798034668 s +DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=69182248952262198800592161266003002461, time:1750768378.8052497s req_ids:[8] +DEBUG 06-24 20:32:58 [manager.py:391] +DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.10804176330566ms total_cost_time:210.15214920043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12947 prompt_cache_len:5151 prompt_cache_ratio:0.3978527844288252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 +DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:58 [batch.py:51] router release req id 8 +INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10782909393310547 s +INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.10965657234191895 s +DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=80442181240173388234094348598265733285, time:1750768379.0210974s req_ids:[8] +DEBUG 06-24 20:32:59 [manager.py:391] +ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.21223068237305ms total_cost_time:210.27326583862305ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12948 prompt_cache_len:5151 prompt_cache_ratio:0.39782205746061167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 +DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:59 [batch.py:51] router release req id 8 +INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10777854919433594 s +INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.10961723327636719 s +DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=144308602032719838401247118046269452577, time:1750768379.237047s req_ids:[8] +DEBUG 06-24 20:32:59 [manager.py:391] +ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:207.59224891662598ms total_cost_time:207.6125144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12949 prompt_cache_len:5151 prompt_cache_ratio:0.39779133523824234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 +DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:59 [batch.py:51] router release req id 8 +INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10788488388061523 s +INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.11170196533203125 s +DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=174699695401120592218866722230309037262, time:1750768379.450935s req_ids:[8] +DEBUG 06-24 20:32:59 [manager.py:391] +ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:375.4732608795166ms total_cost_time:375.5173683166504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12950 prompt_cache_len:5151 prompt_cache_ratio:0.3977606177606178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 +DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:59 [batch.py:51] router release req id 8 +INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10757207870483398 s +INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.1094675064086914 s +DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=16840296587367438838642752002929973924, time:1750768379.8342154s req_ids:[8] +DEBUG 06-24 20:32:59 [manager.py:391] +ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:203.80735397338867ms total_cost_time:203.85026931762695ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12951 prompt_cache_len:5151 prompt_cache_ratio:0.39772990502663885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 +DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:32:59 [batch.py:51] router release req id 8 +INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10918211936950684 s +INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11115527153015137 s +DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=314596629040021081758430369974752890856, time:1750768380.0439076s req_ids:[8] +DEBUG 06-24 20:33:00 [manager.py:391] +ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:214.72716331481934ms total_cost_time:214.78986740112305ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:12952 prompt_cache_len:5151 prompt_cache_ratio:0.3976991970352069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 +DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:00 [batch.py:51] router release req id 8 +INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10782289505004883 s +INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.1096794605255127 s +DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=314012744901665502538973224126457326120, time:1750768380.262842s req_ids:[8] +DEBUG 06-24 20:33:00 [manager.py:391] +ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:211.8971347808838ms total_cost_time:211.94028854370117ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12953 prompt_cache_len:5151 prompt_cache_ratio:0.3976684937852235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 +DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:00 [batch.py:51] router release req id 8 +INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10937285423278809 s +INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11121988296508789 s +DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=230647268047209660971270696898538504520, time:1750768380.4842677s req_ids:[8] +DEBUG 06-24 20:33:00 [manager.py:391] +ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:214.6434783935547ms total_cost_time:214.68639373779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12954 prompt_cache_len:5151 prompt_cache_ratio:0.39763779527559057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 +DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:00 [batch.py:51] router release req id 8 +INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10916328430175781 s +INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11127138137817383 s +DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=44135810167417176616524004040240165577, time:1750768380.70423s req_ids:[8] +DEBUG 06-24 20:33:00 [manager.py:391] +ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:214.71524238586426ms total_cost_time:214.75934982299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12955 prompt_cache_len:5151 prompt_cache_ratio:0.39760710150521034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 +DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:00 [batch.py:51] router release req id 8 +INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10912561416625977 s +INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11117720603942871 s +DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=176221388503856740498777392223733409264, time:1750768380.9254398s req_ids:[8] +DEBUG 06-24 20:33:00 [manager.py:391] +ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:380.1608085632324ms total_cost_time:380.2063465118408ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12956 prompt_cache_len:5151 prompt_cache_ratio:0.3975764124729855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 +DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:01 [batch.py:51] router release req id 8 +INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.10778212547302246 s +INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.10973286628723145 s +DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=154708734335641159694782593130911081003, time:1750768381.3116999s req_ids:[8] +DEBUG 06-24 20:33:01 [manager.py:391] +ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:217.04697608947754ms total_cost_time:217.09012985229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12957 prompt_cache_len:5151 prompt_cache_ratio:0.3975457281778189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 +DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:01 [batch.py:51] router release req id 8 +INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.11043596267700195 s +INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.11244845390319824 s +DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=140841579056273874413939935854201200022, time:1750768381.5357118s req_ids:[8] +DEBUG 06-24 20:33:01 [manager.py:391] +ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:216.68291091918945ms total_cost_time:216.72534942626953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12958 prompt_cache_len:5151 prompt_cache_ratio:0.397515048618614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 +DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:01 [batch.py:51] router release req id 8 +INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.10885095596313477 s +INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.11083745956420898 s +DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=303539252059743278068382548839731862344, time:1750768381.7576578s req_ids:[8] +DEBUG 06-24 20:33:01 [manager.py:391] +ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:214.64776992797852ms total_cost_time:214.6909236907959ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12959 prompt_cache_len:5151 prompt_cache_ratio:0.39748437379427426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 +DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:01 [batch.py:51] router release req id 8 +INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.10900187492370605 s +INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.11129999160766602 s +DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=191500139603542081836032962661793526383, time:1750768381.9801161s req_ids:[8] +DEBUG 06-24 20:33:01 [manager.py:391] +ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:216.91155433654785ms total_cost_time:216.95661544799805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12960 prompt_cache_len:5151 prompt_cache_ratio:0.3974537037037037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 +DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:02 [batch.py:51] router release req id 8 +INFO 06-24 20:33:02 [manager.py:224] router recive req id 8 cost time 0.10811161994934082 s +INFO 06-24 20:33:02 [manager.py:68] detokenization recv req id 8 cost time 0.11006903648376465 s +DEBUG 06-24 20:33:02 [manager.py:391] Prefill Batch: batch_id=265210333260612747589921927186731374153, time:1750768382.2021415s req_ids:[8] +DEBUG 06-24 20:33:02 [manager.py:391] +ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:210.53576469421387ms total_cost_time:210.58034896850586ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12961 prompt_cache_len:5151 prompt_cache_ratio:0.39742303834580667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 +DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:02 [batch.py:51] router release req id 8 +INFO 06-24 20:33:02 [manager.py:224] router recive req id 8 cost time 0.10897517204284668 s +INFO 06-24 20:33:02 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s +DEBUG 06-24 20:33:02 [manager.py:391] Prefill Batch: batch_id=153207850059679740101457032700338277332, time:1750768382.4205258s req_ids:[8] +DEBUG 06-24 20:33:02 [manager.py:391] +ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:393.59450340270996ms total_cost_time:393.63789558410645ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12962 prompt_cache_len:5151 prompt_cache_ratio:0.3973923777194877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 +DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:02 [batch.py:51] router release req id 8 +INFO 06-24 20:33:02 [manager.py:224] router recive req id 8 cost time 0.1082620620727539 s +INFO 06-24 20:33:02 [manager.py:68] detokenization recv req id 8 cost time 0.11023235321044922 s +DEBUG 06-24 20:33:02 [manager.py:391] Prefill Batch: batch_id=44355944374384397417392747194315554768, time:1750768382.8181016s req_ids:[8] +DEBUG 06-24 20:33:02 [manager.py:391] +ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:194.5204734802246ms total_cost_time:194.56219673156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12963 prompt_cache_len:5151 prompt_cache_ratio:0.3973617218236519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 +DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:02 [batch.py:51] router release req id 8 +INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10849785804748535 s +INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.11040568351745605 s +DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=103648177072542355216118391756931998147, time:1750768383.020583s req_ids:[8] +DEBUG 06-24 20:33:03 [manager.py:391] +ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:170.53484916687012ms total_cost_time:170.58658599853516ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:12964 prompt_cache_len:5151 prompt_cache_ratio:0.3973310706572046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 +DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:03 [batch.py:51] router release req id 8 +INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10825920104980469 s +INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.1104283332824707 s +DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=145114005142044120494274581924949891185, time:1750768383.1952667s req_ids:[8] +DEBUG 06-24 20:33:03 [manager.py:391] +ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:204.6065330505371ms total_cost_time:204.6518325805664ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12965 prompt_cache_len:5151 prompt_cache_ratio:0.3973004242190513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 +DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:03 [batch.py:51] router release req id 8 +INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.1091461181640625 s +INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s +DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=317807868880814323279862971764111061329, time:1750768383.4115329s req_ids:[8] +DEBUG 06-24 20:33:03 [manager.py:391] +ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:215.3148651123047ms total_cost_time:215.35801887512207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12966 prompt_cache_len:5151 prompt_cache_ratio:0.3972697825080981 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 +DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:03 [batch.py:51] router release req id 8 +INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10859966278076172 s +INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.11061596870422363 s +DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=323492624076626596089652042139034234741, time:1750768383.631158s req_ids:[8] +DEBUG 06-24 20:33:03 [manager.py:391] +ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:208.98747444152832ms total_cost_time:209.03420448303223ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12967 prompt_cache_len:5151 prompt_cache_ratio:0.39723914552325135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 +DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:03 [batch.py:51] router release req id 8 +INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10753917694091797 s +INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.1094355583190918 s +DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=9524225021712841479825636577336600278, time:1750768383.8482509s req_ids:[8] +DEBUG 06-24 20:33:03 [manager.py:391] +ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:209.6095085144043ms total_cost_time:209.65337753295898ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12968 prompt_cache_len:5151 prompt_cache_ratio:0.39720851326341766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 +DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:03 [batch.py:51] router release req id 8 +INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10851097106933594 s +INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s +DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=121116532401364437350295609915497151252, time:1750768384.0657642s req_ids:[8] +DEBUG 06-24 20:33:04 [manager.py:391] +ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:381.02197647094727ms total_cost_time:381.0689449310303ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12969 prompt_cache_len:5151 prompt_cache_ratio:0.39717788572750407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 +DEBUG 06-24 20:33:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:04 [batch.py:51] router release req id 8 +INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10852742195129395 s +INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.11056995391845703 s +DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=279314551015373191406254706093521968169, time:1750768384.4541688s req_ids:[8] +DEBUG 06-24 20:33:04 [manager.py:391] +ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:212.64910697937012ms total_cost_time:212.693452835083ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12970 prompt_cache_len:5151 prompt_cache_ratio:0.3971472629144179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 +DEBUG 06-24 20:33:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:04 [batch.py:51] router release req id 8 +INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s +INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.1098020076751709 s +DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=53658308208390112131091921790098171216, time:1750768384.6723447s req_ids:[8] +DEBUG 06-24 20:33:04 [manager.py:391] +ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:210.33191680908203ms total_cost_time:210.37578582763672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12971 prompt_cache_len:5151 prompt_cache_ratio:0.39711664482306686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 +DEBUG 06-24 20:33:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:04 [batch.py:51] router release req id 8 +INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10858941078186035 s +INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.11047744750976562 s +DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=36014671537063516259814824075441248757, time:1750768384.889275s req_ids:[8] +DEBUG 06-24 20:33:04 [manager.py:391] +DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:208.6927890777588ms total_cost_time:208.73618125915527ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12972 prompt_cache_len:5151 prompt_cache_ratio:0.39708603145235893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 +DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:05 [batch.py:51] router release req id 8 +INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10936236381530762 s +INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.11131548881530762 s +DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=145984696456415141792262212478587274440, time:1750768385.104374s req_ids:[8] +DEBUG 06-24 20:33:05 [manager.py:391] +ERROR 06-24 20:33:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:212.03351020812988ms total_cost_time:212.08715438842773ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12973 prompt_cache_len:5151 prompt_cache_ratio:0.3970554228012025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 +DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:05 [batch.py:51] router release req id 8 +INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10803413391113281 s +INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s +DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=96116512749070799789078866520441088937, time:1750768385.323129s req_ids:[8] +DEBUG 06-24 20:33:05 [manager.py:391] +ERROR 06-24 20:33:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 first_token_cost:212.62216567993164ms total_cost_time:212.66722679138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12974 prompt_cache_len:5151 prompt_cache_ratio:0.39702481886850627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 +DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:05 [batch.py:51] router release req id 8 +INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10869860649108887 s +INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.11066699028015137 s +DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=304746264579571821326646784990042852115, time:1750768385.5424397s req_ids:[8] +DEBUG 06-24 20:33:05 [manager.py:391] +ERROR 06-24 20:33:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 first_token_cost:418.79820823669434ms total_cost_time:418.8408851623535ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12975 prompt_cache_len:5151 prompt_cache_ratio:0.3969942196531792 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 +DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:05 [batch.py:51] router release req id 8 +INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10917830467224121 s +INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.11114621162414551 s +DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=202749743828523223759340098463980886992, time:1750768385.9674542s req_ids:[8] +DEBUG 06-24 20:33:05 [manager.py:391] +ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 first_token_cost:213.84930610656738ms total_cost_time:213.89389038085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12976 prompt_cache_len:5151 prompt_cache_ratio:0.3969636251541307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 +DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:06 [batch.py:51] router release req id 8 +INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10867810249328613 s +INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11060929298400879 s +DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=203750782032177864925710945880918880595, time:1750768386.187567s req_ids:[8] +DEBUG 06-24 20:33:06 [manager.py:391] +ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:210.0210189819336ms total_cost_time:210.0660800933838ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12977 prompt_cache_len:5151 prompt_cache_ratio:0.39693303537027047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 +DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:06 [batch.py:51] router release req id 8 +INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10893511772155762 s +INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11086058616638184 s +DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=284338736109028949842059807970424577789, time:1750768386.4104998s req_ids:[8] +DEBUG 06-24 20:33:06 [manager.py:391] +ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:221.04668617248535ms total_cost_time:221.09103202819824ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12978 prompt_cache_len:5151 prompt_cache_ratio:0.3969024503005086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 +DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:06 [batch.py:51] router release req id 8 +INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10895824432373047 s +INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11128091812133789 s +DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=302926112085310552486029738966931259132, time:1750768386.631245s req_ids:[8] +DEBUG 06-24 20:33:06 [manager.py:391] +ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:210.8924388885498ms total_cost_time:210.94608306884766ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12979 prompt_cache_len:5151 prompt_cache_ratio:0.3968718699437553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 +DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:06 [batch.py:51] router release req id 8 +INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10830903053283691 s +INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11036419868469238 s +DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=3882841582426922245345230109047675805, time:1750768386.8477106s req_ids:[8] +DEBUG 06-24 20:33:06 [manager.py:391] +ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:209.74278450012207ms total_cost_time:209.7952365875244ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:12980 prompt_cache_len:5151 prompt_cache_ratio:0.3968412942989214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 +DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:06 [batch.py:51] router release req id 8 +INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10858368873596191 s +INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11061429977416992 s +DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=321633714057611307136311941571682094477, time:1750768387.0641763s req_ids:[8] +DEBUG 06-24 20:33:07 [manager.py:391] +ERROR 06-24 20:33:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:371.0591793060303ms total_cost_time:371.11783027648926ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12981 prompt_cache_len:5151 prompt_cache_ratio:0.39681072336491796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 +DEBUG 06-24 20:33:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:07 [batch.py:51] router release req id 8 +INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10902523994445801 s +INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11085271835327148 s +DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=65719791340063985072181319243609486103, time:1750768387.4745958s req_ids:[8] +DEBUG 06-24 20:33:07 [manager.py:391] +ERROR 06-24 20:33:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:33:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 51736.857 tokens/s +DEBUG 06-24 20:33:07 [stats.py:37] Avg prompt tokens throughput: 51728.775 tokens/s +DEBUG 06-24 20:33:07 [stats.py:37] Avg generate tokens throughput: 8.081 tokens/s +INFO 06-24 20:33:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 first_token_cost:245.00751495361328ms total_cost_time:245.05257606506348ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12982 prompt_cache_len:5151 prompt_cache_ratio:0.3967801571406563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 +DEBUG 06-24 20:33:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:07 [batch.py:51] router release req id 8 +INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10871052742004395 s +INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11076879501342773 s +DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=43823997117528428909026528237440035579, time:1750768387.696905s req_ids:[8] +DEBUG 06-24 20:33:07 [manager.py:391] +ERROR 06-24 20:33:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 first_token_cost:210.59417724609375ms total_cost_time:210.63876152038574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12983 prompt_cache_len:5151 prompt_cache_ratio:0.39674959562504813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 +DEBUG 06-24 20:33:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:07 [batch.py:51] router release req id 8 +INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10889101028442383 s +INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11087751388549805 s +DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=125219103746010714061231798261747723031, time:1750768387.9161804s req_ids:[8] +DEBUG 06-24 20:33:07 [manager.py:391] +ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 first_token_cost:209.7926139831543ms total_cost_time:209.8369598388672ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12984 prompt_cache_len:5151 prompt_cache_ratio:0.39671903881700554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 +DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:08 [batch.py:51] router release req id 8 +INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.10722661018371582 s +INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.10920476913452148 s +DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=67356057720234071060778502468655225335, time:1750768388.132157s req_ids:[8] +DEBUG 06-24 20:33:08 [manager.py:391] +ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:205.43384552001953ms total_cost_time:205.47890663146973ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12985 prompt_cache_len:5151 prompt_cache_ratio:0.3966884867154409 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 +DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:08 [batch.py:51] router release req id 8 +INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.10859847068786621 s +INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.11051201820373535 s +DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=19605371701955389848216558614619673007, time:1750768388.3439658s req_ids:[8] +DEBUG 06-24 20:33:08 [manager.py:391] +ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:207.8533172607422ms total_cost_time:207.9172134399414ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:12986 prompt_cache_len:5151 prompt_cache_ratio:0.3966579393192669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 +DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:08 [batch.py:51] router release req id 8 +INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.10759711265563965 s +INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.11005425453186035 s +DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=211161804440444231341572904848176394852, time:1750768388.5597873s req_ids:[8] +DEBUG 06-24 20:33:08 [manager.py:391] +ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:369.5685863494873ms total_cost_time:369.6136474609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12987 prompt_cache_len:5151 prompt_cache_ratio:0.3966273966273966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 +DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:08 [batch.py:51] router release req id 8 +INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.1086115837097168 s +INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.11060452461242676 s +DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=213727319290852998464014793929182631295, time:1750768388.9654217s req_ids:[8] +DEBUG 06-24 20:33:08 [manager.py:391] +ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:246.02746963500977ms total_cost_time:246.09637260437012ms,out_token_counter:1 mean_per_token_cost_time: 0.06890296936035156ms prompt_token_num:12988 prompt_cache_len:5151 prompt_cache_ratio:0.39659685863874344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 +DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:09 [batch.py:51] router release req id 8 +INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s +INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.11093425750732422 s +DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=325759992173675372040514483146532900854, time:1750768389.1898797s req_ids:[8] +DEBUG 06-24 20:33:09 [manager.py:391] +ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:212.33057975769043ms total_cost_time:212.3727798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12989 prompt_cache_len:5151 prompt_cache_ratio:0.3965663253522211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 +DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:09 [batch.py:51] router release req id 8 +INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10858869552612305 s +INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.11307287216186523 s +DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=7343753132360644490419188337953917768, time:1750768389.410068s req_ids:[8] +DEBUG 06-24 20:33:09 [manager.py:391] +ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:213.43183517456055ms total_cost_time:213.456392288208ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:12990 prompt_cache_len:5151 prompt_cache_ratio:0.39653579676674366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 +DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:09 [batch.py:51] router release req id 8 +INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10809779167175293 s +INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.10999464988708496 s +DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=19323978305412206683783949933855329326, time:1750768389.6284378s req_ids:[8] +DEBUG 06-24 20:33:09 [manager.py:391] +ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:213.6096954345703ms total_cost_time:213.6528491973877ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12991 prompt_cache_len:5151 prompt_cache_ratio:0.39650527288122545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 +DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:09 [batch.py:51] router release req id 8 +INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10847115516662598 s +INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.11066579818725586 s +DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=29608879867844085160926568459254698814, time:1750768389.8448298s req_ids:[8] +DEBUG 06-24 20:33:09 [manager.py:391] +ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:204.86688613891602ms total_cost_time:204.911470413208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12992 prompt_cache_len:5151 prompt_cache_ratio:0.3964747536945813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 +DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:09 [batch.py:51] router release req id 8 +INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.10941052436828613 s +DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=255809776595122129318151695821805266987, time:1750768390.055834s req_ids:[8] +DEBUG 06-24 20:33:10 [manager.py:391] +INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.11151504516601562 s +ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:207.31806755065918ms total_cost_time:207.36217498779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12993 prompt_cache_len:5151 prompt_cache_ratio:0.39644423920572613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 +DEBUG 06-24 20:33:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:10 [batch.py:51] router release req id 8 +INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.1078498363494873 s +INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.10983657836914062 s +DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=203133595039585870140127080797949423059, time:1750768390.2710402s req_ids:[8] +DEBUG 06-24 20:33:10 [manager.py:391] +ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:365.77320098876953ms total_cost_time:365.833044052124ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:12994 prompt_cache_len:5151 prompt_cache_ratio:0.3964137294135755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 +DEBUG 06-24 20:33:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:10 [batch.py:51] router release req id 8 +INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.11135625839233398 s +INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.11327886581420898 s +DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=82601351511297803244904432598668661572, time:1750768390.6740448s req_ids:[8] +DEBUG 06-24 20:33:10 [manager.py:391] +ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:237.2872829437256ms total_cost_time:237.33210563659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12995 prompt_cache_len:5151 prompt_cache_ratio:0.39638322431704504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 +DEBUG 06-24 20:33:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:10 [batch.py:51] router release req id 8 +INFO 06-24 20:33:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.1079106330871582 s +INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.1099238395690918 s +DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=12388578199880550068041307841045742933, time:1750768390.8889127s req_ids:[8] +DEBUG 06-24 20:33:10 [manager.py:391] +ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:204.51927185058594ms total_cost_time:204.56194877624512ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12996 prompt_cache_len:5151 prompt_cache_ratio:0.3963527239150508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 +DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:11 [batch.py:51] router release req id 8 +INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.10859251022338867 s +INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.11054706573486328 s +DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=232140524233388498086685132587976166866, time:1750768391.1011176s req_ids:[8] +DEBUG 06-24 20:33:11 [manager.py:391] +ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:204.56647872924805ms total_cost_time:204.60963249206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12997 prompt_cache_len:5151 prompt_cache_ratio:0.3963222282065092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 +DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:11 [batch.py:51] router release req id 8 +INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s +INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.10975003242492676 s +DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=136740788480743349329431865310010200459, time:1750768391.3200264s req_ids:[8] +DEBUG 06-24 20:33:11 [manager.py:391] +ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:220.8249568939209ms total_cost_time:220.8690643310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12998 prompt_cache_len:5151 prompt_cache_ratio:0.396291737190337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 +DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:11 [batch.py:51] router release req id 8 +INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.10783576965332031 s +INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.10975837707519531 s +DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=145019826704413001424884414227420395455, time:1750768391.5389254s req_ids:[8] +DEBUG 06-24 20:33:11 [manager.py:391] +ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:210.75129508972168ms total_cost_time:210.79707145690918ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12999 prompt_cache_len:5151 prompt_cache_ratio:0.39626125086545116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 +DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:11 [batch.py:51] router release req id 8 +INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.20865941047668457 s +INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.21034908294677734 s +DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=83883731568665235670011422660608221209, time:1750768391.887774s req_ids:[8] +DEBUG 06-24 20:33:11 [manager.py:391] +ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:309.0837001800537ms total_cost_time:309.1263771057129ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13000 prompt_cache_len:5151 prompt_cache_ratio:0.3962307692307692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 +DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:11 [batch.py:51] router release req id 8 +INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10788869857788086 s +INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.11028480529785156 s +DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=215055406353932646530078973889489330166, time:1750768392.0737674s req_ids:[8] +DEBUG 06-24 20:33:12 [manager.py:391] +ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:203.72724533081055ms total_cost_time:203.77135276794434ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13001 prompt_cache_len:5151 prompt_cache_ratio:0.3962002922852088 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 +DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:12 [batch.py:51] router release req id 8 +INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10879802703857422 s +INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.11078071594238281 s +DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=176582405888708492615130772769103129630, time:1750768392.2813714s req_ids:[8] +DEBUG 06-24 20:33:12 [manager.py:391] +ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:211.89022064208984ms total_cost_time:211.93480491638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13002 prompt_cache_len:5151 prompt_cache_ratio:0.396169820027688 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 +DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:12 [batch.py:51] router release req id 8 +INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10724759101867676 s +INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.1092216968536377 s +DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=110439147036003554141511377132808103269, time:1750768392.5000012s req_ids:[8] +DEBUG 06-24 20:33:12 [manager.py:391] +ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:209.75899696350098ms total_cost_time:209.80310440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13003 prompt_cache_len:5151 prompt_cache_ratio:0.3961393524571253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 +DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:12 [batch.py:51] router release req id 8 +INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s +INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.11141705513000488 s +DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=179407576329774911175486013129344941331, time:1750768392.715567s req_ids:[8] +DEBUG 06-24 20:33:12 [manager.py:391] +ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:210.1285457611084ms total_cost_time:210.1724147796631ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13004 prompt_cache_len:5151 prompt_cache_ratio:0.3961088895724392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 +DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:12 [batch.py:51] router release req id 8 +INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10752058029174805 s +INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.10959744453430176 s +DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=269693520548407484139985680254373418873, time:1750768392.9315057s req_ids:[8] +DEBUG 06-24 20:33:12 [manager.py:391] +ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:210.07275581359863ms total_cost_time:210.1156711578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13005 prompt_cache_len:5151 prompt_cache_ratio:0.396078431372549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 +DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:13 [batch.py:51] router release req id 8 +INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.10886383056640625 s +INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.11080241203308105 s +DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=283203894597479597336575209078303905648, time:1750768393.1594434s req_ids:[8] +DEBUG 06-24 20:33:13 [manager.py:391] +ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:406.80789947509766ms total_cost_time:406.85057640075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13006 prompt_cache_len:5151 prompt_cache_ratio:0.39604797785637397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 +DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:13 [batch.py:51] router release req id 8 +INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.10683894157409668 s +INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.1086127758026123 s +DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=288960593322333645723158748050169511986, time:1750768393.5628214s req_ids:[8] +DEBUG 06-24 20:33:13 [manager.py:391] +ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:190.9925937652588ms total_cost_time:191.03717803955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13007 prompt_cache_len:5151 prompt_cache_ratio:0.3960175290228339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 +DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:13 [batch.py:51] router release req id 8 +INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.10837793350219727 s +INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.11224126815795898 s +DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=139235851295403682620929158311826997763, time:1750768393.7612936s req_ids:[8] +DEBUG 06-24 20:33:13 [manager.py:391] +ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:214.2949104309082ms total_cost_time:214.3406867980957ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13008 prompt_cache_len:5151 prompt_cache_ratio:0.3959870848708487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 +DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:13 [batch.py:51] router release req id 8 +INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.1082615852355957 s +INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.11038422584533691 s +DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=33882121940901492234885943714915170780, time:1750768393.9819086s req_ids:[8] +DEBUG 06-24 20:33:13 [manager.py:391] +DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:213.67621421813965ms total_cost_time:213.72056007385254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13009 prompt_cache_len:5151 prompt_cache_ratio:0.39595664539933895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 +DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:14 [batch.py:51] router release req id 8 +INFO 06-24 20:33:14 [manager.py:224] router recive req id 8 cost time 0.10887956619262695 s +INFO 06-24 20:33:14 [manager.py:68] detokenization recv req id 8 cost time 0.11123347282409668 s +DEBUG 06-24 20:33:14 [manager.py:391] Prefill Batch: batch_id=106956122257022341874462145542064325463, time:1750768394.206414s req_ids:[8] +DEBUG 06-24 20:33:14 [manager.py:391] +ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:214.0185832977295ms total_cost_time:214.06173706054688ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13010 prompt_cache_len:5151 prompt_cache_ratio:0.3959262106072252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 +DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:14 [batch.py:51] router release req id 8 +INFO 06-24 20:33:14 [manager.py:224] router recive req id 8 cost time 0.1083371639251709 s +INFO 06-24 20:33:14 [manager.py:68] detokenization recv req id 8 cost time 0.11051154136657715 s +DEBUG 06-24 20:33:14 [manager.py:391] Prefill Batch: batch_id=113481414930846980617371265217507078981, time:1750768394.425001s req_ids:[8] +DEBUG 06-24 20:33:14 [manager.py:391] +ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:203.65166664123535ms total_cost_time:203.69362831115723ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13011 prompt_cache_len:5151 prompt_cache_ratio:0.39589578049342866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 +DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:14 [batch.py:51] router release req id 8 +INFO 06-24 20:33:14 [manager.py:224] router recive req id 8 cost time 0.1092529296875 s +INFO 06-24 20:33:14 [manager.py:68] detokenization recv req id 8 cost time 0.11140155792236328 s +DEBUG 06-24 20:33:14 [manager.py:391] Prefill Batch: batch_id=100172217535368547881127714946098270333, time:1750768394.6349025s req_ids:[8] +DEBUG 06-24 20:33:14 [manager.py:391] +ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:384.43446159362793ms total_cost_time:384.4785690307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13012 prompt_cache_len:5151 prompt_cache_ratio:0.3958653550568706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 +DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:14 [batch.py:51] router release req id 8 +INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10850691795349121 s +INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11072230339050293 s +DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=2060959522821261298360040814644500043, time:1750768395.029122s req_ids:[8] +DEBUG 06-24 20:33:15 [manager.py:391] +ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:205.90710639953613ms total_cost_time:205.95240592956543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13013 prompt_cache_len:5151 prompt_cache_ratio:0.39583493429647276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 +DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:15 [batch.py:51] router release req id 8 +INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10892796516418457 s +INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11128616333007812 s +DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=321394634618967813413723870259622526503, time:1750768395.2428594s req_ids:[8] +DEBUG 06-24 20:33:15 [manager.py:391] +ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:212.49651908874512ms total_cost_time:212.5418186187744ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13014 prompt_cache_len:5151 prompt_cache_ratio:0.3958045182111572 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 +DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:15 [batch.py:51] router release req id 8 +INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10858011245727539 s +INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11070036888122559 s +DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=251329224160446448843773339255489627915, time:1750768395.4607623s req_ids:[8] +DEBUG 06-24 20:33:15 [manager.py:391] +ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:212.20755577087402ms total_cost_time:212.25237846374512ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13015 prompt_cache_len:5151 prompt_cache_ratio:0.3957741067998463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 +DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:15 [batch.py:51] router release req id 8 +INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10903525352478027 s +INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11156177520751953 s +DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=165628670760496530344775804626536398740, time:1750768395.679567s req_ids:[8] +DEBUG 06-24 20:33:15 [manager.py:391] +ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:209.78784561157227ms total_cost_time:209.83409881591797ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13016 prompt_cache_len:5151 prompt_cache_ratio:0.3957437000614628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 +DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:15 [batch.py:51] router release req id 8 +INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10816383361816406 s +INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11064362525939941 s +DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=273689300891655250188172968463948757251, time:1750768395.8956273s req_ids:[8] +DEBUG 06-24 20:33:15 [manager.py:391] +ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:214.7808074951172ms total_cost_time:214.82467651367188ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13017 prompt_cache_len:5151 prompt_cache_ratio:0.3957132979949297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 +DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:16 [batch.py:51] router release req id 8 +INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.10780191421508789 s +INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.10991096496582031 s +DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=60191213572841664104369190635430141617, time:1750768396.1161702s req_ids:[8] +DEBUG 06-24 20:33:16 [manager.py:391] +ERROR 06-24 20:33:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:211.22431755065918ms total_cost_time:211.26937866210938ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13018 prompt_cache_len:5151 prompt_cache_ratio:0.3956829005991704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 +DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:16 [batch.py:51] router release req id 8 +INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.30959630012512207 s +INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.31170654296875 s +DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=288558832363669133573519403677313823705, time:1750768396.54294s req_ids:[8] +DEBUG 06-24 20:33:16 [manager.py:391] +ERROR 06-24 20:33:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:424.44348335266113ms total_cost_time:424.468994140625ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:13019 prompt_cache_len:5151 prompt_cache_ratio:0.3956525078731085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 +DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:16 [batch.py:51] router release req id 8 +INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.1059110164642334 s +INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.10865426063537598 s +DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=80865950845203259781725640475083037010, time:1750768396.7668662s req_ids:[8] +DEBUG 06-24 20:33:16 [manager.py:391] +ERROR 06-24 20:33:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:217.47994422912598ms total_cost_time:217.52381324768066ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13020 prompt_cache_len:5151 prompt_cache_ratio:0.3956221198156682 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 +DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:16 [batch.py:51] router release req id 8 +INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.10906815528869629 s +INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.11142921447753906 s +DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=236454992858104063711838886331965096370, time:1750768396.9879787s req_ids:[8] +DEBUG 06-24 20:33:16 [manager.py:391] +ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:211.68136596679688ms total_cost_time:211.72547340393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13021 prompt_cache_len:5151 prompt_cache_ratio:0.3955917364257738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 +INFO 06-24 20:33:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:17 [batch.py:51] router release req id 8 +INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10925769805908203 s +INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.1114048957824707 s +DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=242772258706677118605372391914688830697, time:1750768397.2049096s req_ids:[8] +DEBUG 06-24 20:33:17 [manager.py:391] +ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:213.37175369262695ms total_cost_time:213.47880363464355ms,out_token_counter:1 mean_per_token_cost_time: 0.10704994201660156ms prompt_token_num:13022 prompt_cache_len:5151 prompt_cache_ratio:0.39556135770234985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 +DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:17 [batch.py:51] router release req id 8 +INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10862112045288086 s +INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.11072087287902832 s +DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=189144238474181392682258897048843139324, time:1750768397.4240625s req_ids:[8] +DEBUG 06-24 20:33:17 [manager.py:391] +ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:212.19110488891602ms total_cost_time:212.2366428375244ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13023 prompt_cache_len:5151 prompt_cache_ratio:0.3955309836443216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 +DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:17 [batch.py:51] router release req id 8 +INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10843133926391602 s +INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.11065554618835449 s +DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=241864019203936309384230963132145304357, time:1750768397.644812s req_ids:[8] +DEBUG 06-24 20:33:17 [manager.py:391] +DEBUG 06-24 20:33:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 54239.434 tokens/s +DEBUG 06-24 20:33:17 [stats.py:37] Avg prompt tokens throughput: 54231.193 tokens/s +DEBUG 06-24 20:33:17 [stats.py:37] Avg generate tokens throughput: 8.242 tokens/s +ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:214.19644355773926ms total_cost_time:214.24007415771484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13024 prompt_cache_len:5151 prompt_cache_ratio:0.3955006142506142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 +DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:17 [batch.py:51] router release req id 8 +INFO 06-24 20:33:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10848712921142578 s +INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.11061954498291016 s +DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=51610959531654263310370621062290214097, time:1750768397.8752317s req_ids:[8] +DEBUG 06-24 20:33:17 [manager.py:391] +ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:392.5333023071289ms total_cost_time:392.5776481628418ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13025 prompt_cache_len:5151 prompt_cache_ratio:0.3954702495201535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 +DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:18 [batch.py:51] router release req id 8 +INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10797357559204102 s +INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.10940861701965332 s +DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=321832753344048227117057271274189577709, time:1750768398.263832s req_ids:[8] +DEBUG 06-24 20:33:18 [manager.py:391] +ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:215.86298942565918ms total_cost_time:215.93403816223145ms,out_token_counter:1 mean_per_token_cost_time: 0.07104873657226562ms prompt_token_num:13026 prompt_cache_len:5151 prompt_cache_ratio:0.3954398894518655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 +DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:18 [batch.py:51] router release req id 8 +INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10845303535461426 s +INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.1106562614440918 s +DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=183254144663049346195902056583093989877, time:1750768398.484742s req_ids:[8] +DEBUG 06-24 20:33:18 [manager.py:391] +ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:213.6678695678711ms total_cost_time:213.71173858642578ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13027 prompt_cache_len:5151 prompt_cache_ratio:0.39540953404467644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 +DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:18 [batch.py:51] router release req id 8 +INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10855722427368164 s +INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.11080574989318848 s +DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=223534537375056935434240791376150362314, time:1750768398.7047205s req_ids:[8] +DEBUG 06-24 20:33:18 [manager.py:391] +ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:208.8000774383545ms total_cost_time:208.8456153869629ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13028 prompt_cache_len:5151 prompt_cache_ratio:0.39537918329751304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 +DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:18 [batch.py:51] router release req id 8 +INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10805392265319824 s +INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.11046004295349121 s +DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=49119758234652287963103086022590865724, time:1750768398.9181309s req_ids:[8] +DEBUG 06-24 20:33:18 [manager.py:391] +ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:205.4142951965332ms total_cost_time:205.4579257965088ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13029 prompt_cache_len:5151 prompt_cache_ratio:0.3953488372093023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 +DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:19 [batch.py:51] router release req id 8 +INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.1078042984008789 s +INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.1097114086151123 s +DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=120780039706098151474444993452657621884, time:1750768399.1342244s req_ids:[8] +DEBUG 06-24 20:33:19 [manager.py:391] +ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:207.64565467834473ms total_cost_time:207.688570022583ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13030 prompt_cache_len:5151 prompt_cache_ratio:0.3953184957789716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 +DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:19 [batch.py:51] router release req id 8 +INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.10829043388366699 s +INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s +DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=305204269665171847869023064222758547886, time:1750768399.3461719s req_ids:[8] +DEBUG 06-24 20:33:19 [manager.py:391] +ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:380.69915771484375ms total_cost_time:380.74302673339844ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13031 prompt_cache_len:5151 prompt_cache_ratio:0.39528815900544856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 +DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:19 [batch.py:51] router release req id 8 +INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.10773968696594238 s +INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s +DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=310559538771044418268973412925133985581, time:1750768399.7326083s req_ids:[8] +DEBUG 06-24 20:33:19 [manager.py:391] +ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:207.13424682617188ms total_cost_time:207.17692375183105ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13032 prompt_cache_len:5151 prompt_cache_ratio:0.39525782688766115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 +DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:19 [batch.py:51] router release req id 8 +INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.10847306251525879 s +INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.11028790473937988 s +DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=57646268809455983474238958787676528418, time:1750768399.947802s req_ids:[8] +DEBUG 06-24 20:33:19 [manager.py:391] +ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:207.09848403930664ms total_cost_time:207.14259147644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13033 prompt_cache_len:5151 prompt_cache_ratio:0.3952274994245377 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 +DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:20 [batch.py:51] router release req id 8 +DEBUG 06-24 20:33:20 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:20 [manager.py:283] +DEBUG 06-24 20:33:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:20 [manager.py:284] +INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10805797576904297 s +INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.10999870300292969 s +DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=127013413293543619189723714944162444040, time:1750768400.1619778s req_ids:[8] +DEBUG 06-24 20:33:20 [manager.py:391] +ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:210.0226879119873ms total_cost_time:210.0667953491211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13034 prompt_cache_len:5151 prompt_cache_ratio:0.3951971766150069 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 +DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:20 [batch.py:51] router release req id 8 +INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10830330848693848 s +INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.11024069786071777 s +DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=35212755727179303705420540809525123737, time:1750768400.3786235s req_ids:[8] +DEBUG 06-24 20:33:20 [manager.py:391] +ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:217.73767471313477ms total_cost_time:217.78106689453125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13035 prompt_cache_len:5151 prompt_cache_ratio:0.3951668584579977 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 +DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:20 [batch.py:51] router release req id 8 +INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10869216918945312 s +INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.11049962043762207 s +DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=257496880091193917167104496177727080670, time:1750768400.6016483s req_ids:[8] +DEBUG 06-24 20:33:20 [manager.py:391] +ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:218.18828582763672ms total_cost_time:218.23358535766602ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13036 prompt_cache_len:5151 prompt_cache_ratio:0.3951365449524394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 +DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:20 [batch.py:51] router release req id 8 +INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10844206809997559 s +INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.11025404930114746 s +DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=92550189039474824003753598735162176031, time:1750768400.8400164s req_ids:[8] +DEBUG 06-24 20:33:20 [manager.py:391] +ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:401.16381645202637ms total_cost_time:401.20601654052734ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13037 prompt_cache_len:5151 prompt_cache_ratio:0.39510623609726164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 +DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:21 [batch.py:51] router release req id 8 +INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10838699340820312 s +INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.11056280136108398 s +DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=212645412631158789383963648904152861664, time:1750768401.2348037s req_ids:[8] +DEBUG 06-24 20:33:21 [manager.py:391] +ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:210.6490135192871ms total_cost_time:210.6955051422119ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13038 prompt_cache_len:5151 prompt_cache_ratio:0.3950759318913944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 +DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:21 [batch.py:51] router release req id 8 +INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10770463943481445 s +INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.10976624488830566 s +DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=323231149444194796010373259210222854298, time:1750768401.4637384s req_ids:[8] +DEBUG 06-24 20:33:21 [manager.py:391] +ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:228.4567356109619ms total_cost_time:228.47604751586914ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13039 prompt_cache_len:5151 prompt_cache_ratio:0.39504563233376794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 +DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:21 [batch.py:51] router release req id 8 +INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10755109786987305 s +INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.10884499549865723 s +DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=152479872378295911987443659235620866276, time:1750768401.684788s req_ids:[8] +DEBUG 06-24 20:33:21 [manager.py:391] +DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:216.1545753479004ms total_cost_time:216.19749069213867ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13040 prompt_cache_len:5151 prompt_cache_ratio:0.3950153374233129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 +DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:21 [batch.py:51] router release req id 8 +INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10868430137634277 s +INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.11056256294250488 s +DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=82032735784657062182763629681669643119, time:1750768401.9048376s req_ids:[8] +DEBUG 06-24 20:33:21 [manager.py:391] +ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:215.81792831420898ms total_cost_time:215.86227416992188ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13041 prompt_cache_len:5151 prompt_cache_ratio:0.3949850471589602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 +DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:22 [batch.py:51] router release req id 8 +INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.10809040069580078 s +INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.10930824279785156 s +DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=325696614994160511637001755606152937350, time:1750768402.1293988s req_ids:[8] +DEBUG 06-24 20:33:22 [manager.py:391] +ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:207.77297019958496ms total_cost_time:207.81707763671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13042 prompt_cache_len:5151 prompt_cache_ratio:0.39495476153964115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 +DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:22 [batch.py:51] router release req id 8 +INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.10861754417419434 s +INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.11064553260803223 s +DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=18671922292529703478902836156133317708, time:1750768402.343767s req_ids:[8] +DEBUG 06-24 20:33:22 [manager.py:391] +ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:392.5025463104248ms total_cost_time:392.5461769104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13043 prompt_cache_len:5151 prompt_cache_ratio:0.39492448056428736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 +DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:22 [batch.py:51] router release req id 8 +INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.10869050025939941 s +INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.11088109016418457 s +DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=267427289948298006130642032101772236891, time:1750768402.7421942s req_ids:[8] +DEBUG 06-24 20:33:22 [manager.py:391] +ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:214.4625186920166ms total_cost_time:214.5061492919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13044 prompt_cache_len:5151 prompt_cache_ratio:0.39489420423183075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 +DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:22 [batch.py:51] router release req id 8 +INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.107147216796875 s +INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.10919761657714844 s +DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=289453781527812622904324730435070155657, time:1750768402.962048s req_ids:[8] +DEBUG 06-24 20:33:22 [manager.py:391] +ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:204.32162284851074ms total_cost_time:204.38694953918457ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:13045 prompt_cache_len:5151 prompt_cache_ratio:0.3948639325412035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 +DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:23 [batch.py:51] router release req id 8 +INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.1088097095489502 s +INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.11104583740234375 s +DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=285976768689138724221873490985057687624, time:1750768403.1770976s req_ids:[8] +DEBUG 06-24 20:33:23 [manager.py:391] +INFO 06-24 20:33:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:33:23 [statics_utils.py:24] mean first cost: 230.23923012657042 ms +INFO 06-24 20:33:23 [statics_utils.py:24] mean per token cost: 0.06031699176893638 ms +ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:213.38844299316406ms total_cost_time:213.43231201171875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13046 prompt_cache_len:5151 prompt_cache_ratio:0.39483366549133836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 +DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:23 [batch.py:51] router release req id 8 +INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.10757565498352051 s +INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.1097574234008789 s +DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=146255065684401170380296216137236908087, time:1750768403.3927286s req_ids:[8] +DEBUG 06-24 20:33:23 [manager.py:391] +ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:216.53461456298828ms total_cost_time:216.57681465148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13047 prompt_cache_len:5151 prompt_cache_ratio:0.39480340308116807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 +DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:23 [batch.py:51] router release req id 8 +INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.10814976692199707 s +INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.10970854759216309 s +DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=95088164406373763704197008641919658772, time:1750768403.618071s req_ids:[8] +DEBUG 06-24 20:33:23 [manager.py:391] +ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:216.46475791931152ms total_cost_time:216.508150100708ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13048 prompt_cache_len:5151 prompt_cache_ratio:0.394773145309626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 +DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:23 [batch.py:51] router release req id 8 +INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.10933923721313477 s +INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.1114962100982666 s +DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=310444057817659679001697556403128126854, time:1750768403.8406415s req_ids:[8] +DEBUG 06-24 20:33:23 [manager.py:391] +ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:216.90082550048828ms total_cost_time:216.94493293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13049 prompt_cache_len:5151 prompt_cache_ratio:0.3947428921756456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 +DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:23 [batch.py:51] router release req id 8 +INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.10874223709106445 s +INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11074686050415039 s +DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=80160907855930335262363657481232547904, time:1750768404.0644157s req_ids:[8] +DEBUG 06-24 20:33:24 [manager.py:391] +ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:384.00864601135254ms total_cost_time:384.0758800506592ms,out_token_counter:1 mean_per_token_cost_time: 0.06723403930664062ms prompt_token_num:13050 prompt_cache_len:5151 prompt_cache_ratio:0.39471264367816095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 +DEBUG 06-24 20:33:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:24 [batch.py:51] router release req id 8 +INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.10877561569213867 s +INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11141395568847656 s +DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=178450927258888609194326570864147608221, time:1750768404.454751s req_ids:[8] +DEBUG 06-24 20:33:24 [manager.py:391] +ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 first_token_cost:215.30914306640625ms total_cost_time:215.35277366638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13051 prompt_cache_len:5151 prompt_cache_ratio:0.39468239981610603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 +DEBUG 06-24 20:33:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:24 [batch.py:51] router release req id 8 +INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.10898709297180176 s +INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11089968681335449 s +DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=262730869306433685352316019965158512678, time:1750768404.676607s req_ids:[8] +DEBUG 06-24 20:33:24 [manager.py:391] +ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 first_token_cost:216.37701988220215ms total_cost_time:216.42184257507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13052 prompt_cache_len:5151 prompt_cache_ratio:0.39465216058841557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 +DEBUG 06-24 20:33:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:24 [batch.py:51] router release req id 8 +INFO 06-24 20:33:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.11011838912963867 s +INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11190032958984375 s +DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=293380961194454966484382402919463793318, time:1750768404.8990633s req_ids:[8] +DEBUG 06-24 20:33:24 [manager.py:391] +ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 first_token_cost:217.38958358764648ms total_cost_time:217.43440628051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13053 prompt_cache_len:5151 prompt_cache_ratio:0.39462192599402435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 +DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:25 [batch.py:51] router release req id 8 +INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10863065719604492 s +INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11058640480041504 s +DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=172162000024469805260632617414626926177, time:1750768405.1250346s req_ids:[8] +DEBUG 06-24 20:33:25 [manager.py:391] +ERROR 06-24 20:33:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:219.48003768920898ms total_cost_time:219.53439712524414ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:13054 prompt_cache_len:5151 prompt_cache_ratio:0.3945916960318676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 +DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:25 [batch.py:51] router release req id 8 +INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10843682289123535 s +INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11027264595031738 s +DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=335457738988820561660540887827210159386, time:1750768405.3508813s req_ids:[8] +DEBUG 06-24 20:33:25 [manager.py:391] +ERROR 06-24 20:33:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:217.39459037780762ms total_cost_time:217.4396514892578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13055 prompt_cache_len:5151 prompt_cache_ratio:0.3945614707008809 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 +DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:25 [batch.py:51] router release req id 8 +INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10929393768310547 s +INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11033368110656738 s +DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=297157775440507761644900942844601606692, time:1750768405.5737498s req_ids:[8] +DEBUG 06-24 20:33:25 [manager.py:391] +ERROR 06-24 20:33:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:390.29717445373535ms total_cost_time:390.34175872802734ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13056 prompt_cache_len:5151 prompt_cache_ratio:0.39453125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 +DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:25 [batch.py:51] router release req id 8 +INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10927820205688477 s +INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11132001876831055 s +DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=204490816171788092866577784620946471430, time:1750768405.9714148s req_ids:[8] +DEBUG 06-24 20:33:25 [manager.py:391] +ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:216.9015407562256ms total_cost_time:216.94660186767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13057 prompt_cache_len:5151 prompt_cache_ratio:0.39450103392816116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 +DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:26 [batch.py:51] router release req id 8 +INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.1089162826538086 s +INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.11093854904174805 s +DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=324754593434674245585225043666751410798, time:1750768406.1946044s req_ids:[8] +DEBUG 06-24 20:33:26 [manager.py:391] +ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:211.62962913513184ms total_cost_time:211.67278289794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13058 prompt_cache_len:5151 prompt_cache_ratio:0.3944708224843008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 +DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:26 [batch.py:51] router release req id 8 +INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10851883888244629 s +INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.11043286323547363 s +DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=263762228062315232703049707204181391196, time:1750768406.412559s req_ids:[8] +DEBUG 06-24 20:33:26 [manager.py:391] +ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:221.35686874389648ms total_cost_time:221.40002250671387ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13059 prompt_cache_len:5151 prompt_cache_ratio:0.39444061566735583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 +DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:26 [batch.py:51] router release req id 8 +INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10802888870239258 s +INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.10996127128601074 s +DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=128074973394445665107950459448644063050, time:1750768406.6378539s req_ids:[8] +DEBUG 06-24 20:33:26 [manager.py:391] +ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:174.6046543121338ms total_cost_time:174.64828491210938ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13060 prompt_cache_len:5151 prompt_cache_ratio:0.3944104134762634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 +DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:26 [batch.py:51] router release req id 8 +INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10753583908081055 s +INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.10924720764160156 s +DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=19966390274702542212832716767348725165, time:1750768406.8176858s req_ids:[8] +DEBUG 06-24 20:33:26 [manager.py:391] +ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:172.3039150238037ms total_cost_time:172.346830368042ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13061 prompt_cache_len:5151 prompt_cache_ratio:0.394380215909961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 +DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:26 [batch.py:51] router release req id 8 +INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10685062408447266 s +INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.10879898071289062 s +DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=192403254786260618278636066661915286743, time:1750768406.9985876s req_ids:[8] +DEBUG 06-24 20:33:26 [manager.py:391] +ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:379.9111843109131ms total_cost_time:379.955530166626ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13062 prompt_cache_len:5151 prompt_cache_ratio:0.3943500229673863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 +DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:27 [batch.py:51] router release req id 8 +INFO 06-24 20:33:27 [manager.py:224] router recive req id 8 cost time 0.1076955795288086 s +INFO 06-24 20:33:27 [manager.py:68] detokenization recv req id 8 cost time 0.10964298248291016 s +DEBUG 06-24 20:33:27 [manager.py:391] Prefill Batch: batch_id=73520651513655519230449643679431415384, time:1750768407.3874955s req_ids:[8] +DEBUG 06-24 20:33:27 [manager.py:391] +ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:215.48175811767578ms total_cost_time:215.52610397338867ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13063 prompt_cache_len:5151 prompt_cache_ratio:0.3943198346474776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 +DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:27 [batch.py:51] router release req id 8 +INFO 06-24 20:33:27 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s +INFO 06-24 20:33:27 [manager.py:68] detokenization recv req id 8 cost time 0.10960960388183594 s +DEBUG 06-24 20:33:27 [manager.py:391] Prefill Batch: batch_id=77968988360941299559263734337626291431, time:1750768407.6047072s req_ids:[8] +DEBUG 06-24 20:33:27 [manager.py:391] +ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:33:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 51923.297 tokens/s +DEBUG 06-24 20:33:27 [stats.py:37] Avg prompt tokens throughput: 51915.238 tokens/s +DEBUG 06-24 20:33:27 [stats.py:37] Avg generate tokens throughput: 8.059 tokens/s +INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:207.06796646118164ms total_cost_time:207.11207389831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13064 prompt_cache_len:5151 prompt_cache_ratio:0.3942896509491733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 +DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:27 [batch.py:51] router release req id 8 +INFO 06-24 20:33:27 [manager.py:224] router recive req id 8 cost time 0.10867571830749512 s +INFO 06-24 20:33:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106867790222168 s +DEBUG 06-24 20:33:27 [manager.py:391] Prefill Batch: batch_id=47748010161756914801794253898632334892, time:1750768407.8171337s req_ids:[8] +DEBUG 06-24 20:33:27 [manager.py:391] +DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:212.18252182006836ms total_cost_time:212.22662925720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13065 prompt_cache_len:5151 prompt_cache_ratio:0.3942594718714122 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 +DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:27 [batch.py:51] router release req id 8 +INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10752367973327637 s +INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.10946178436279297 s +DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=274212762438844544916661074767687728951, time:1750768408.0362065s req_ids:[8] +DEBUG 06-24 20:33:28 [manager.py:391] +ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:211.5764617919922ms total_cost_time:211.6219997406006ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13066 prompt_cache_len:5151 prompt_cache_ratio:0.39422929741313334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 +DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:28 [batch.py:51] router release req id 8 +INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10894060134887695 s +INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.11081123352050781 s +DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=91840340759639353029527694256418159864, time:1750768408.2540207s req_ids:[8] +DEBUG 06-24 20:33:28 [manager.py:391] +ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:214.26939964294434ms total_cost_time:214.31469917297363ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13067 prompt_cache_len:5151 prompt_cache_ratio:0.3941991275732762 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 +DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:28 [batch.py:51] router release req id 8 +INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10861587524414062 s +INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.11065888404846191 s +DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=4371355966082282143182423232863614713, time:1750768408.4764512s req_ids:[8] +DEBUG 06-24 20:33:28 [manager.py:391] +ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:212.23688125610352ms total_cost_time:212.2817039489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13068 prompt_cache_len:5151 prompt_cache_ratio:0.3941689623507805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 +DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:28 [batch.py:51] router release req id 8 +INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10888314247131348 s +INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.11091828346252441 s +DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=258603725035576975470511139863019540671, time:1750768408.6978288s req_ids:[8] +DEBUG 06-24 20:33:28 [manager.py:391] +ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:377.22325325012207ms total_cost_time:377.2873878479004ms,out_token_counter:1 mean_per_token_cost_time: 0.06413459777832031ms prompt_token_num:13069 prompt_cache_len:5151 prompt_cache_ratio:0.39413880174458643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 +DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:28 [batch.py:51] router release req id 8 +INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.10883617401123047 s +INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.11073017120361328 s +DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=14909405426319352279929306654377697217, time:1750768409.0785575s req_ids:[8] +DEBUG 06-24 20:33:29 [manager.py:391] +ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:208.92000198364258ms total_cost_time:208.96482467651367ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13070 prompt_cache_len:5151 prompt_cache_ratio:0.3941086457536343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 +DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:29 [batch.py:51] router release req id 8 +INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.1086723804473877 s +INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.11072349548339844 s +DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=324858022517149890377092431931902192534, time:1750768409.2949598s req_ids:[8] +DEBUG 06-24 20:33:29 [manager.py:391] +ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:213.28020095825195ms total_cost_time:213.32478523254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13071 prompt_cache_len:5151 prompt_cache_ratio:0.3940784943768648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 +DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:29 [batch.py:51] router release req id 8 +INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.1094815731048584 s +INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.1114037036895752 s +DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=177732389578913794674087532208857300576, time:1750768409.5131643s req_ids:[8] +DEBUG 06-24 20:33:29 [manager.py:391] +ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:211.02190017700195ms total_cost_time:211.06743812561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13072 prompt_cache_len:5151 prompt_cache_ratio:0.3940483476132191 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 +DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:29 [batch.py:51] router release req id 8 +INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.10964226722717285 s +INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.11157774925231934 s +DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=247330051776901343555527741987838935255, time:1750768409.7340493s req_ids:[8] +DEBUG 06-24 20:33:29 [manager.py:391] +ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:211.96651458740234ms total_cost_time:212.01062202453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13073 prompt_cache_len:5151 prompt_cache_ratio:0.3940182054616385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 +DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:29 [batch.py:51] router release req id 8 +INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s +INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.10971617698669434 s +DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=299483850754776183861567836494340845839, time:1750768409.9497957s req_ids:[8] +DEBUG 06-24 20:33:29 [manager.py:391] +ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:214.04743194580078ms total_cost_time:214.06817436218262ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:13074 prompt_cache_len:5151 prompt_cache_ratio:0.3939880679210647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 +DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:30 [batch.py:51] router release req id 8 +INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10837054252624512 s +INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.11034870147705078 s +DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=190350854120814385586720448853787480482, time:1750768410.1723084s req_ids:[8] +DEBUG 06-24 20:33:30 [manager.py:391] +ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:378.676176071167ms total_cost_time:378.7202835083008ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13075 prompt_cache_len:5151 prompt_cache_ratio:0.3939579349904398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 +DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:30 [batch.py:51] router release req id 8 +INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10898184776306152 s +INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110377311706543 s +DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=208561265953640556266468329083053012801, time:1750768410.5562937s req_ids:[8] +DEBUG 06-24 20:33:30 [manager.py:391] +ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:213.93394470214844ms total_cost_time:213.97876739501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13076 prompt_cache_len:5151 prompt_cache_ratio:0.393927806668706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 +DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:30 [batch.py:51] router release req id 8 +INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10747337341308594 s +INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.1093130111694336 s +DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=91589200091973264243929523733434114937, time:1750768410.77666s req_ids:[8] +DEBUG 06-24 20:33:30 [manager.py:391] +ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:169.7235107421875ms total_cost_time:169.76547241210938ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13077 prompt_cache_len:5151 prompt_cache_ratio:0.39389768295480615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 +DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:30 [batch.py:51] router release req id 8 +INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10803914070129395 s +INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.11003923416137695 s +DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=328979993173353349826985374188766239306, time:1750768410.9519284s req_ids:[8] +DEBUG 06-24 20:33:30 [manager.py:391] +ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:203.05728912353516ms total_cost_time:203.10020446777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13078 prompt_cache_len:5151 prompt_cache_ratio:0.3938675638476831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 +DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:31 [batch.py:51] router release req id 8 +INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.10774421691894531 s +INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.11031270027160645 s +DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=194001125688334336216561683604167367071, time:1750768411.1621833s req_ids:[8] +DEBUG 06-24 20:33:31 [manager.py:391] +ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:211.74216270446777ms total_cost_time:211.78507804870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13079 prompt_cache_len:5151 prompt_cache_ratio:0.3938374493462803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 +DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:31 [batch.py:51] router release req id 8 +INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.10913538932800293 s +INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.11115264892578125 s +DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=235061638432118752911673637810742068850, time:1750768411.3816094s req_ids:[8] +DEBUG 06-24 20:33:31 [manager.py:391] +ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:211.0297679901123ms total_cost_time:211.0748291015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13080 prompt_cache_len:5151 prompt_cache_ratio:0.39380733944954127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 +DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:31 [batch.py:51] router release req id 8 +INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.10811424255371094 s +INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.11015176773071289 s +DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=296197620574072192617863866301584975451, time:1750768411.5984812s req_ids:[8] +DEBUG 06-24 20:33:31 [manager.py:391] +ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:374.65858459472656ms total_cost_time:374.70316886901855ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13081 prompt_cache_len:5151 prompt_cache_ratio:0.3937772341564101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 +DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:31 [batch.py:51] router release req id 8 +INFO 06-24 20:33:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.1077883243560791 s +INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997700691223145 s +DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=300828300203310295866235988355376267906, time:1750768411.9791954s req_ids:[8] +DEBUG 06-24 20:33:31 [manager.py:391] +ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:213.23013305664062ms total_cost_time:213.27614784240723ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13082 prompt_cache_len:5151 prompt_cache_ratio:0.3937471334658309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 +DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:32 [batch.py:51] router release req id 8 +INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10784459114074707 s +INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.1098017692565918 s +DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=151349816138482728764406540327365648252, time:1750768412.198429s req_ids:[8] +DEBUG 06-24 20:33:32 [manager.py:391] +ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:213.63353729248047ms total_cost_time:213.67597579956055ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13083 prompt_cache_len:5151 prompt_cache_ratio:0.39371703737674846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 +DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:32 [batch.py:51] router release req id 8 +INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s +INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.11005663871765137 s +DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=110039468396583254205386416405185985016, time:1750768412.4185407s req_ids:[8] +DEBUG 06-24 20:33:32 [manager.py:391] +ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:220.35813331604004ms total_cost_time:220.40343284606934ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13084 prompt_cache_len:5151 prompt_cache_ratio:0.3936869458881076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 +DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:32 [batch.py:51] router release req id 8 +INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10770130157470703 s +INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.1098477840423584 s +DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=212898348829649094951748400105159793932, time:1750768412.6446152s req_ids:[8] +DEBUG 06-24 20:33:32 [manager.py:391] +ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:209.73968505859375ms total_cost_time:209.78331565856934ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13085 prompt_cache_len:5151 prompt_cache_ratio:0.39365685899885366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 +DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:32 [batch.py:51] router release req id 8 +INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10767054557800293 s +INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.10959339141845703 s +DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=65717240764965864713886705633775328624, time:1750768412.860603s req_ids:[8] +DEBUG 06-24 20:33:32 [manager.py:391] +ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:212.99171447753906ms total_cost_time:213.03677558898926ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13086 prompt_cache_len:5151 prompt_cache_ratio:0.39362677670793217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 +DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:32 [batch.py:51] router release req id 8 +INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.10940051078796387 s +INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.11136960983276367 s +DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=113137000643014791351323938305164054379, time:1750768413.0806725s req_ids:[8] +DEBUG 06-24 20:33:33 [manager.py:391] +ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:389.1468048095703ms total_cost_time:389.1892433166504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13087 prompt_cache_len:5151 prompt_cache_ratio:0.393596699014289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 +DEBUG 06-24 20:33:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:33 [batch.py:51] router release req id 8 +INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.10860323905944824 s +INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.1110386848449707 s +DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=109286580887509699208716697494007363254, time:1750768413.4771266s req_ids:[8] +DEBUG 06-24 20:33:33 [manager.py:391] +ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:213.1786346435547ms total_cost_time:213.2241725921631ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13088 prompt_cache_len:5151 prompt_cache_ratio:0.3935666259168704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 +DEBUG 06-24 20:33:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:33 [batch.py:51] router release req id 8 +INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.1079409122467041 s +INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.10992312431335449 s +DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=78486502387130529742790814976590815495, time:1750768413.703067s req_ids:[8] +DEBUG 06-24 20:33:33 [manager.py:391] +ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:217.95392036437988ms total_cost_time:217.99755096435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13089 prompt_cache_len:5151 prompt_cache_ratio:0.39353655741462296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 +DEBUG 06-24 20:33:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:33 [batch.py:51] router release req id 8 +INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.10827970504760742 s +INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.11010074615478516 s +DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=104689279490230866009323878877413431411, time:1750768413.9226935s req_ids:[8] +DEBUG 06-24 20:33:33 [manager.py:391] +DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:174.29471015930176ms total_cost_time:174.33667182922363ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13090 prompt_cache_len:5151 prompt_cache_ratio:0.3935064935064935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 +DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:34 [batch.py:51] router release req id 8 +INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.10940313339233398 s +INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s +DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=82212729822338202667919897891280590583, time:1750768414.103486s req_ids:[8] +DEBUG 06-24 20:33:34 [manager.py:391] +ERROR 06-24 20:33:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:215.040922164917ms total_cost_time:215.0869369506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13091 prompt_cache_len:5151 prompt_cache_ratio:0.39347643419142925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 +DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:34 [batch.py:51] router release req id 8 +INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.10810041427612305 s +INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.11008143424987793 s +DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=63306849642453696178073286178811389022, time:1750768414.329854s req_ids:[8] +DEBUG 06-24 20:33:34 [manager.py:391] +ERROR 06-24 20:33:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 first_token_cost:208.1611156463623ms total_cost_time:208.2054615020752ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13092 prompt_cache_len:5151 prompt_cache_ratio:0.3934463794683776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 +DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:34 [batch.py:51] router release req id 8 +INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.1077275276184082 s +INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.10987734794616699 s +DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=88397387735723525714735684310989757338, time:1750768414.5367687s req_ids:[8] +DEBUG 06-24 20:33:34 [manager.py:391] +ERROR 06-24 20:33:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 first_token_cost:418.3306694030762ms total_cost_time:418.37501525878906ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13093 prompt_cache_len:5151 prompt_cache_ratio:0.39341632933628656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 +DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:34 [batch.py:51] router release req id 8 +INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.10859537124633789 s +INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.11061310768127441 s +DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=250402664702351225939690722453694530414, time:1750768414.9606493s req_ids:[8] +DEBUG 06-24 20:33:34 [manager.py:391] +ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 first_token_cost:210.87408065795898ms total_cost_time:210.91866493225098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13094 prompt_cache_len:5151 prompt_cache_ratio:0.3933862837941042 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 +DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:35 [batch.py:51] router release req id 8 +INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10725760459899902 s +INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.10924601554870605 s +DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=217565244298140897213528248757437067249, time:1750768415.1773803s req_ids:[8] +DEBUG 06-24 20:33:35 [manager.py:391] +ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:211.3351821899414ms total_cost_time:211.378812789917ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13095 prompt_cache_len:5151 prompt_cache_ratio:0.3933562428407789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 +DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:35 [batch.py:51] router release req id 8 +INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10817217826843262 s +INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.11010169982910156 s +DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=102482833157275957222240261372878038624, time:1750768415.3973932s req_ids:[8] +DEBUG 06-24 20:33:35 [manager.py:391] +ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:210.5729579925537ms total_cost_time:210.6184959411621ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13096 prompt_cache_len:5151 prompt_cache_ratio:0.3933262064752596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 +DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:35 [batch.py:51] router release req id 8 +INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10794353485107422 s +INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.10994172096252441 s +DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=235137378855093151425808332353154521239, time:1750768415.6256328s req_ids:[8] +DEBUG 06-24 20:33:35 [manager.py:391] +ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:228.58285903930664ms total_cost_time:228.62911224365234ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13097 prompt_cache_len:5151 prompt_cache_ratio:0.39329617469649536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 +DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:35 [batch.py:51] router release req id 8 +INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10837078094482422 s +INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.1103823184967041 s +DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=339120375414442526266781403005118283997, time:1750768415.85031s req_ids:[8] +DEBUG 06-24 20:33:35 [manager.py:391] +ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:216.19057655334473ms total_cost_time:216.2344455718994ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13098 prompt_cache_len:5151 prompt_cache_ratio:0.39326614750343564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 +DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:35 [batch.py:51] router release req id 8 +INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.10846233367919922 s +INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11051607131958008 s +DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=102293266523695156172664124600170288198, time:1750768416.0744154s req_ids:[8] +DEBUG 06-24 20:33:36 [manager.py:391] +ERROR 06-24 20:33:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:392.697811126709ms total_cost_time:392.7428722381592ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13099 prompt_cache_len:5151 prompt_cache_ratio:0.3932361248950302 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 +DEBUG 06-24 20:33:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:36 [batch.py:51] router release req id 8 +INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.1094205379486084 s +INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11142683029174805 s +DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=215805281728718706893291492953420040247, time:1750768416.4739976s req_ids:[8] +DEBUG 06-24 20:33:36 [manager.py:391] +ERROR 06-24 20:33:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 first_token_cost:221.86589241027832ms total_cost_time:221.92049026489258ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:13100 prompt_cache_len:5151 prompt_cache_ratio:0.393206106870229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 +DEBUG 06-24 20:33:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:36 [batch.py:51] router release req id 8 +INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.10935616493225098 s +INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11133670806884766 s +DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=38749398393903366112788070344315264609, time:1750768416.7021005s req_ids:[8] +DEBUG 06-24 20:33:36 [manager.py:391] +ERROR 06-24 20:33:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 first_token_cost:217.79489517211914ms total_cost_time:217.8475856781006ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:13101 prompt_cache_len:5151 prompt_cache_ratio:0.3931760934279826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 +DEBUG 06-24 20:33:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:36 [batch.py:51] router release req id 8 +INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.10857033729553223 s +INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11067843437194824 s +DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=246213383002050436690867801226480518367, time:1750768416.9258118s req_ids:[8] +DEBUG 06-24 20:33:36 [manager.py:391] +ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 first_token_cost:216.71152114868164ms total_cost_time:216.76182746887207ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:13102 prompt_cache_len:5151 prompt_cache_ratio:0.39314608456724165 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 +DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:37 [batch.py:51] router release req id 8 +INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.1082758903503418 s +INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s +DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=220151998242717692610290878725419624053, time:1750768417.1489882s req_ids:[8] +DEBUG 06-24 20:33:37 [manager.py:391] +ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:208.60791206359863ms total_cost_time:208.65273475646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13103 prompt_cache_len:5151 prompt_cache_ratio:0.39311608028695716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 +DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:37 [batch.py:51] router release req id 8 +INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.10764813423156738 s +INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.10953569412231445 s +DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=126603676067162142246248605668311683888, time:1750768417.3617554s req_ids:[8] +DEBUG 06-24 20:33:37 [manager.py:391] +ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:211.86184883117676ms total_cost_time:211.90524101257324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13104 prompt_cache_len:5151 prompt_cache_ratio:0.3930860805860806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 +DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:37 [batch.py:51] router release req id 8 +INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.10855340957641602 s +INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.11056303977966309 s +DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=2348258786407126252649409273992505060, time:1750768417.582074s req_ids:[8] +DEBUG 06-24 20:33:37 [manager.py:391] +ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:33:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 52844.647 tokens/s +DEBUG 06-24 20:33:37 [stats.py:37] Avg prompt tokens throughput: 52836.571 tokens/s +DEBUG 06-24 20:33:37 [stats.py:37] Avg generate tokens throughput: 8.076 tokens/s +INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:384.33837890625ms total_cost_time:384.380578994751ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13105 prompt_cache_len:5151 prompt_cache_ratio:0.3930560854635635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 +DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:37 [batch.py:51] router release req id 8 +INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.10895395278930664 s +INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.11098909378051758 s +DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=106287907465496946298735515512885366256, time:1750768417.9713328s req_ids:[8] +DEBUG 06-24 20:33:37 [manager.py:391] +ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:214.85495567321777ms total_cost_time:214.89620208740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13106 prompt_cache_len:5151 prompt_cache_ratio:0.393026094918358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 +DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:38 [batch.py:51] router release req id 8 +INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10806822776794434 s +INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.11009049415588379 s +DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=133715575939016155464983853188350111674, time:1750768418.2034302s req_ids:[8] +DEBUG 06-24 20:33:38 [manager.py:391] +ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:223.68454933166504ms total_cost_time:223.72865676879883ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13107 prompt_cache_len:5151 prompt_cache_ratio:0.39299610894941633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 +DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:38 [batch.py:51] router release req id 8 +INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10760378837585449 s +INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.1094980239868164 s +DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=2942388643164960510834460932907944335, time:1750768418.4225442s req_ids:[8] +DEBUG 06-24 20:33:38 [manager.py:391] +ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:212.06188201904297ms total_cost_time:212.10765838623047ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13108 prompt_cache_len:5151 prompt_cache_ratio:0.39296612755569116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 +DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:38 [batch.py:51] router release req id 8 +INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10879206657409668 s +INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.11085700988769531 s +DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=280073357710133315245537640815566938501, time:1750768418.6402066s req_ids:[8] +DEBUG 06-24 20:33:38 [manager.py:391] +ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:215.22808074951172ms total_cost_time:215.28148651123047ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:13109 prompt_cache_len:5151 prompt_cache_ratio:0.39293615073613547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 +DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:38 [batch.py:51] router release req id 8 +INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10764575004577637 s +INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.10968923568725586 s +DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=129961371539636677502202494753988069393, time:1750768418.861381s req_ids:[8] +DEBUG 06-24 20:33:38 [manager.py:391] +ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:209.3198299407959ms total_cost_time:209.3653678894043ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13110 prompt_cache_len:5151 prompt_cache_ratio:0.3929061784897025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 +DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:38 [batch.py:51] router release req id 8 +INFO 06-24 20:33:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.10762572288513184 s +INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.10957932472229004 s +DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=18225266863192871281478135166024760692, time:1750768419.0801625s req_ids:[8] +DEBUG 06-24 20:33:39 [manager.py:391] +ERROR 06-24 20:33:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:393.3110237121582ms total_cost_time:393.355131149292ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13111 prompt_cache_len:5151 prompt_cache_ratio:0.3928762108153459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 +DEBUG 06-24 20:33:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:39 [batch.py:51] router release req id 8 +INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.10829353332519531 s +INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.11024022102355957 s +DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=47614557519914516895111008322320324772, time:1750768419.4811232s req_ids:[8] +DEBUG 06-24 20:33:39 [manager.py:391] +ERROR 06-24 20:33:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 first_token_cost:218.5828685760498ms total_cost_time:218.6267375946045ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13112 prompt_cache_len:5151 prompt_cache_ratio:0.39284624771201954 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 +DEBUG 06-24 20:33:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:39 [batch.py:51] router release req id 8 +INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.10648703575134277 s +INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.1084439754486084 s +DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=241964492075794988128180418192240577742, time:1750768419.703423s req_ids:[8] +DEBUG 06-24 20:33:39 [manager.py:391] +ERROR 06-24 20:33:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 first_token_cost:211.88020706176758ms total_cost_time:211.92526817321777ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13113 prompt_cache_len:5151 prompt_cache_ratio:0.39281628917867767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 +DEBUG 06-24 20:33:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:39 [batch.py:51] router release req id 8 +INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.11024045944213867 s +INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.11221480369567871 s +DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=233876506240120997342439958071577531130, time:1750768419.9243166s req_ids:[8] +DEBUG 06-24 20:33:39 [manager.py:391] +ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 first_token_cost:213.3033275604248ms total_cost_time:213.38510513305664ms,out_token_counter:1 mean_per_token_cost_time: 0.08177757263183594ms prompt_token_num:13114 prompt_cache_len:5151 prompt_cache_ratio:0.3927863352142748 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 +DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:40 [batch.py:51] router release req id 8 +INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.10881781578063965 s +INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.1107022762298584 s +DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=269065007067966508838694076037293930478, time:1750768420.1432278s req_ids:[8] +DEBUG 06-24 20:33:40 [manager.py:391] +DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:214.1869068145752ms total_cost_time:214.22958374023438ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13115 prompt_cache_len:5151 prompt_cache_ratio:0.3927563858177659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 +DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:40 [batch.py:51] router release req id 8 +INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.10828018188476562 s +INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.11020112037658691 s +DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=149331490007267979553380653723154887190, time:1750768420.3625698s req_ids:[8] +DEBUG 06-24 20:33:40 [manager.py:391] +ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:210.58988571166992ms total_cost_time:210.63661575317383ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13116 prompt_cache_len:5151 prompt_cache_ratio:0.3927264409881061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 +DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:40 [batch.py:51] router release req id 8 +INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.1081228256225586 s +INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s +DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=124022233531927309325753477405988086860, time:1750768420.5799353s req_ids:[8] +DEBUG 06-24 20:33:40 [manager.py:391] +ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:389.0419006347656ms total_cost_time:389.0864849090576ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13117 prompt_cache_len:5151 prompt_cache_ratio:0.392696500724251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 +DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:40 [batch.py:51] router release req id 8 +INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.10799551010131836 s +INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.10993766784667969 s +DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=11895382066407503353753506483218357222, time:1750768420.977581s req_ids:[8] +DEBUG 06-24 20:33:40 [manager.py:391] +ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:213.64617347717285ms total_cost_time:213.69147300720215ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13118 prompt_cache_len:5151 prompt_cache_ratio:0.3926665650251563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 +DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:41 [batch.py:51] router release req id 8 +INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10913538932800293 s +INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.11107897758483887 s +DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=71959944843333010626824696801174723601, time:1750768421.1964347s req_ids:[8] +DEBUG 06-24 20:33:41 [manager.py:391] +ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:213.3793830871582ms total_cost_time:213.4237289428711ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13119 prompt_cache_len:5151 prompt_cache_ratio:0.3926366338897782 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 +DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:41 [batch.py:51] router release req id 8 +INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s +INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.11086249351501465 s +DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=322437738929331059494944927196827846527, time:1750768421.4147146s req_ids:[8] +DEBUG 06-24 20:33:41 [manager.py:391] +ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:213.9723300933838ms total_cost_time:214.01619911193848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13120 prompt_cache_len:5151 prompt_cache_ratio:0.39260670731707314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 +DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:41 [batch.py:51] router release req id 8 +INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10912251472473145 s +INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.1110992431640625 s +DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=249250700262609361583704173320654210803, time:1750768421.6370847s req_ids:[8] +DEBUG 06-24 20:33:41 [manager.py:391] +ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:217.5142765045166ms total_cost_time:217.55695343017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13121 prompt_cache_len:5151 prompt_cache_ratio:0.39257678530599804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 +DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:41 [batch.py:51] router release req id 8 +INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10900449752807617 s +INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.11111021041870117 s +DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=44539688260308992956144120118911558673, time:1750768421.8614998s req_ids:[8] +DEBUG 06-24 20:33:41 [manager.py:391] +ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:213.820219039917ms total_cost_time:213.86384963989258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13122 prompt_cache_len:5151 prompt_cache_ratio:0.39254686785550985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 +DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:41 [batch.py:51] router release req id 8 +INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10813665390014648 s +INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.11005973815917969 s +DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=74337347463814420679056434431801200866, time:1750768422.0833743s req_ids:[8] +DEBUG 06-24 20:33:42 [manager.py:391] +ERROR 06-24 20:33:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:391.7698860168457ms total_cost_time:391.8137550354004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13123 prompt_cache_len:5151 prompt_cache_ratio:0.392516954964566 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 +DEBUG 06-24 20:33:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:42 [batch.py:51] router release req id 8 +INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10879731178283691 s +INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.1107170581817627 s +DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=215611857696704533440411233849793136809, time:1750768422.4788332s req_ids:[8] +DEBUG 06-24 20:33:42 [manager.py:391] +ERROR 06-24 20:33:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 first_token_cost:214.24508094787598ms total_cost_time:214.29204940795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13124 prompt_cache_len:5151 prompt_cache_ratio:0.39248704663212436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 +DEBUG 06-24 20:33:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:42 [batch.py:51] router release req id 8 +INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10798311233520508 s +INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.11002635955810547 s +DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=219856797294241191614183202647101047984, time:1750768422.7087345s req_ids:[8] +DEBUG 06-24 20:33:42 [manager.py:391] +ERROR 06-24 20:33:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 first_token_cost:226.0420322418213ms total_cost_time:226.09400749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:13125 prompt_cache_len:5151 prompt_cache_ratio:0.39245714285714284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 +DEBUG 06-24 20:33:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:42 [batch.py:51] router release req id 8 +INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10776710510253906 s +INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.10970044136047363 s +DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=26346819006962849345193834787108461012, time:1750768422.9301438s req_ids:[8] +DEBUG 06-24 20:33:42 [manager.py:391] +ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 first_token_cost:211.2278938293457ms total_cost_time:211.25030517578125ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:13126 prompt_cache_len:5151 prompt_cache_ratio:0.3924272436385799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 +DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:43 [batch.py:51] router release req id 8 +INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10894560813903809 s +INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.11091041564941406 s +DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=67189245249141275456612893374497925805, time:1750768423.1483104s req_ids:[8] +DEBUG 06-24 20:33:43 [manager.py:391] +ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:213.41490745544434ms total_cost_time:213.45949172973633ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13127 prompt_cache_len:5151 prompt_cache_ratio:0.39239734897539424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 +DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:43 [batch.py:51] router release req id 8 +INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10944318771362305 s +INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.11142873764038086 s +DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=236680426705069418274609115393605560733, time:1750768423.3668118s req_ids:[8] +DEBUG 06-24 20:33:43 [manager.py:391] +ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:210.86454391479492ms total_cost_time:210.90936660766602ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13128 prompt_cache_len:5151 prompt_cache_ratio:0.3923674588665448 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 +DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:43 [batch.py:51] router release req id 8 +INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10833144187927246 s +INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.11022210121154785 s +DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=143835750250091869931839308713673637258, time:1750768423.5842242s req_ids:[8] +DEBUG 06-24 20:33:43 [manager.py:391] +ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:387.21561431884766ms total_cost_time:387.26019859313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13129 prompt_cache_len:5151 prompt_cache_ratio:0.39233757331099095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 +DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:43 [batch.py:51] router release req id 8 +INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s +INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.1098012924194336 s +DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=109727877508427070285813245360448956709, time:1750768423.9771383s req_ids:[8] +DEBUG 06-24 20:33:43 [manager.py:391] +ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:207.5178623199463ms total_cost_time:207.56101608276367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13130 prompt_cache_len:5151 prompt_cache_ratio:0.3923076923076923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 +DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:44 [batch.py:51] router release req id 8 +INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.10912346839904785 s +INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.11168265342712402 s +DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=257425908104607092469222988637047575322, time:1750768424.1915114s req_ids:[8] +DEBUG 06-24 20:33:44 [manager.py:391] +ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:209.30981636047363ms total_cost_time:209.3648910522461ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:13131 prompt_cache_len:5151 prompt_cache_ratio:0.3922778158556089 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 +DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:44 [batch.py:51] router release req id 8 +INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.10640454292297363 s +INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.1082463264465332 s +DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=189436362451676668877945735613098318130, time:1750768424.406522s req_ids:[8] +DEBUG 06-24 20:33:44 [manager.py:391] +ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:209.72681045532227ms total_cost_time:209.77354049682617ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13132 prompt_cache_len:5151 prompt_cache_ratio:0.3922479439537009 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 +DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:44 [batch.py:51] router release req id 8 +INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.10862493515014648 s +INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.11049652099609375 s +DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=50671273354646361973317766641972832274, time:1750768424.6221s req_ids:[8] +DEBUG 06-24 20:33:44 [manager.py:391] +ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:216.62616729736328ms total_cost_time:216.67146682739258ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13133 prompt_cache_len:5151 prompt_cache_ratio:0.39221807660092894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 +DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:44 [batch.py:51] router release req id 8 +INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.1077570915222168 s +INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.10969018936157227 s +DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=191823213820807892383117446565027326626, time:1750768424.8453646s req_ids:[8] +DEBUG 06-24 20:33:44 [manager.py:391] +ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:212.5685214996338ms total_cost_time:212.61334419250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13134 prompt_cache_len:5151 prompt_cache_ratio:0.392188213796254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 +DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:44 [batch.py:51] router release req id 8 +INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.10875558853149414 s +INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.11071085929870605 s +DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=144367958310322236713834943607594626128, time:1750768425.0632458s req_ids:[8] +DEBUG 06-24 20:33:45 [manager.py:391] +ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:388.60249519348145ms total_cost_time:388.64898681640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13135 prompt_cache_len:5151 prompt_cache_ratio:0.3921583555386372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 +DEBUG 06-24 20:33:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:45 [batch.py:51] router release req id 8 +INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.1091303825378418 s +INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.11130261421203613 s +DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=227137784769106517502630691833657814471, time:1750768425.4593356s req_ids:[8] +DEBUG 06-24 20:33:45 [manager.py:391] +ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 first_token_cost:211.56716346740723ms total_cost_time:211.6219997406006ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:13136 prompt_cache_len:5151 prompt_cache_ratio:0.3921285018270402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 +DEBUG 06-24 20:33:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:45 [batch.py:51] router release req id 8 +INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.10862493515014648 s +INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.1098639965057373 s +DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=184659820770372805105040242201823080716, time:1750768425.6771235s req_ids:[8] +DEBUG 06-24 20:33:45 [manager.py:391] +ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 first_token_cost:211.30108833312988ms total_cost_time:211.34686470031738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13137 prompt_cache_len:5151 prompt_cache_ratio:0.39209865266042476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 +DEBUG 06-24 20:33:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:45 [batch.py:51] router release req id 8 +INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.1074066162109375 s +INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.1092691421508789 s +DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=108512955133402766333711173749043693003, time:1750768425.9038515s req_ids:[8] +DEBUG 06-24 20:33:45 [manager.py:391] +ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 first_token_cost:215.0290012359619ms total_cost_time:215.0719165802002ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13138 prompt_cache_len:5151 prompt_cache_ratio:0.39206880803775307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 +DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:46 [batch.py:51] router release req id 8 +INFO 06-24 20:33:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.10793161392211914 s +INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.11023569107055664 s +DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=189364422543677207174063727502278588726, time:1750768426.11561s req_ids:[8] +DEBUG 06-24 20:33:46 [manager.py:391] +ERROR 06-24 20:33:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:210.29019355773926ms total_cost_time:210.33406257629395ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13139 prompt_cache_len:5151 prompt_cache_ratio:0.39203896795798765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 +DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:46 [batch.py:51] router release req id 8 +INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.1081838607788086 s +INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.11005067825317383 s +DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=291036873403600536592124302130319640209, time:1750768426.334734s req_ids:[8] +DEBUG 06-24 20:33:46 [manager.py:391] +DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:209.5813751220703ms total_cost_time:209.60450172424316ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:13140 prompt_cache_len:5151 prompt_cache_ratio:0.39200913242009133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 +DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:46 [batch.py:51] router release req id 8 +INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.10764837265014648 s +INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.10956692695617676 s +DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=248235062703427400615717592393434904045, time:1750768426.551832s req_ids:[8] +DEBUG 06-24 20:33:46 [manager.py:391] +ERROR 06-24 20:33:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:215.67630767822266ms total_cost_time:215.72065353393555ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13141 prompt_cache_len:5151 prompt_cache_ratio:0.39197930142302717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 +DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:46 [batch.py:51] router release req id 8 +INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.30991625785827637 s +INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.31203675270080566 s +DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=323267255850530690238457700372727541278, time:1750768426.9807255s req_ids:[8] +DEBUG 06-24 20:33:46 [manager.py:391] +ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:429.6104907989502ms total_cost_time:429.6560287475586ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13142 prompt_cache_len:5151 prompt_cache_ratio:0.39194947496575866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 +INFO 06-24 20:33:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:47 [batch.py:51] router release req id 8 +INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.10895371437072754 s +INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11103940010070801 s +DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=327626794702331120311314310022341603881, time:1750768427.213491s req_ids:[8] +DEBUG 06-24 20:33:47 [manager.py:391] +ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:220.11518478393555ms total_cost_time:220.17836570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:13143 prompt_cache_len:5151 prompt_cache_ratio:0.39191965304724946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 +DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:47 [batch.py:51] router release req id 8 +INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.10791969299316406 s +INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11004877090454102 s +DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=215188344995670863875619394361210161345, time:1750768427.4352796s req_ids:[8] +DEBUG 06-24 20:33:47 [manager.py:391] +ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:214.42580223083496ms total_cost_time:214.48707580566406ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:13144 prompt_cache_len:5151 prompt_cache_ratio:0.3918898356664638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 +DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:47 [batch.py:51] router release req id 8 +INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.10889172554016113 s +INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11067366600036621 s +DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=37653356391073104813257104893874062915, time:1750768427.6560447s req_ids:[8] +DEBUG 06-24 20:33:47 [manager.py:391] +ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:215.24643898010254ms total_cost_time:215.29102325439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13145 prompt_cache_len:5151 prompt_cache_ratio:0.3918600228223659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 +DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:47 [batch.py:51] router release req id 8 +INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.1082465648651123 s +INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11012649536132812 s +DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=216850800538592376932532414970587151077, time:1750768427.887058s req_ids:[8] +DEBUG 06-24 20:33:47 [manager.py:391] +DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:33:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 53620.075 tokens/s +DEBUG 06-24 20:33:47 [stats.py:37] Avg prompt tokens throughput: 53612.006 tokens/s +DEBUG 06-24 20:33:47 [stats.py:37] Avg generate tokens throughput: 8.069 tokens/s +ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:226.46522521972656ms total_cost_time:226.4845371246338ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13146 prompt_cache_len:5151 prompt_cache_ratio:0.3918302145139206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 +DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:48 [batch.py:51] router release req id 8 +INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.10869717597961426 s +INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.10994672775268555 s +DEBUG 06-24 20:33:48 [manager.py:391] Prefill Batch: batch_id=162447434756048611056141119332207634510, time:1750768428.1100569s req_ids:[8] +DEBUG 06-24 20:33:48 [manager.py:391] +ERROR 06-24 20:33:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:212.87965774536133ms total_cost_time:212.92519569396973ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13147 prompt_cache_len:5151 prompt_cache_ratio:0.3918004107400928 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 +DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:48 [batch.py:51] router release req id 8 +INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.3109896183013916 s +INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.31290268898010254 s +DEBUG 06-24 20:33:48 [manager.py:391] Prefill Batch: batch_id=145673279303995485037262736852125956825, time:1750768428.555787s req_ids:[8] +DEBUG 06-24 20:33:48 [manager.py:391] +ERROR 06-24 20:33:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 first_token_cost:447.85165786743164ms total_cost_time:447.894811630249ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13148 prompt_cache_len:5151 prompt_cache_ratio:0.3917706114998479 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 +DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:48 [batch.py:51] router release req id 8 +INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.10823225975036621 s +INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.10957193374633789 s +DEBUG 06-24 20:33:48 [manager.py:391] Prefill Batch: batch_id=156959208281760943745266472530128954709, time:1750768428.7828045s req_ids:[8] +DEBUG 06-24 20:33:48 [manager.py:391] +ERROR 06-24 20:33:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 first_token_cost:213.98615837097168ms total_cost_time:214.02859687805176ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13149 prompt_cache_len:5151 prompt_cache_ratio:0.3917408167921515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 +DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:48 [batch.py:51] router release req id 8 +INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.10791349411010742 s +INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.11046266555786133 s +DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=4830914276340246469128561212195770224, time:1750768429.0026865s req_ids:[8] +DEBUG 06-24 20:33:49 [manager.py:391] +ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 first_token_cost:211.5764617919922ms total_cost_time:211.66706085205078ms,out_token_counter:1 mean_per_token_cost_time: 0.09059906005859375ms prompt_token_num:13150 prompt_cache_len:5151 prompt_cache_ratio:0.39171102661596957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 +DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:49 [batch.py:51] router release req id 8 +INFO 06-24 20:33:49 [manager.py:224] router recive req id 8 cost time 0.10633349418640137 s +INFO 06-24 20:33:49 [manager.py:68] detokenization recv req id 8 cost time 0.10843229293823242 s +DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=246970390320979636042715647586880820197, time:1750768429.2208374s req_ids:[8] +DEBUG 06-24 20:33:49 [manager.py:391] +ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:216.65191650390625ms total_cost_time:216.69650077819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13151 prompt_cache_len:5151 prompt_cache_ratio:0.3916812409702684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 +DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:49 [batch.py:51] router release req id 8 +INFO 06-24 20:33:49 [manager.py:224] router recive req id 8 cost time 0.10883235931396484 s +INFO 06-24 20:33:49 [manager.py:68] detokenization recv req id 8 cost time 0.11066174507141113 s +DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=63964791975183540221669979967688067887, time:1750768429.4426017s req_ids:[8] +DEBUG 06-24 20:33:49 [manager.py:391] +DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:208.01448822021484ms total_cost_time:208.0678939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:13152 prompt_cache_len:5151 prompt_cache_ratio:0.3916514598540146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 +DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:49 [batch.py:51] router release req id 8 +INFO 06-24 20:33:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:49 [manager.py:224] router recive req id 8 cost time 0.10859537124633789 s +INFO 06-24 20:33:49 [manager.py:68] detokenization recv req id 8 cost time 0.11060643196105957 s +DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=252334872313281557390114054154606555162, time:1750768429.6619606s req_ids:[8] +DEBUG 06-24 20:33:49 [manager.py:391] +ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:212.8884792327881ms total_cost_time:212.9347324371338ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13153 prompt_cache_len:5151 prompt_cache_ratio:0.391621683266175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 +DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:49 [batch.py:51] router release req id 8 +INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.3102736473083496 s +INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.3115420341491699 s +DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=333463761376951333227654609887741986073, time:1750768430.0866444s req_ids:[8] +DEBUG 06-24 20:33:50 [manager.py:391] +ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:428.8172721862793ms total_cost_time:428.8625717163086ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13154 prompt_cache_len:5151 prompt_cache_ratio:0.3915919112057169 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 +DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:50 [batch.py:51] router release req id 8 +INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10837650299072266 s +INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.1098945140838623 s +DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=78554040213818175861095603340165724099, time:1750768430.311668s req_ids:[8] +DEBUG 06-24 20:33:50 [manager.py:391] +ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:215.35730361938477ms total_cost_time:215.40260314941406ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13155 prompt_cache_len:5151 prompt_cache_ratio:0.39156214367160774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 +DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:50 [batch.py:51] router release req id 8 +INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10838174819946289 s +INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.11032223701477051 s +DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=304116427796450187728293106887514233530, time:1750768430.530841s req_ids:[8] +DEBUG 06-24 20:33:50 [manager.py:391] +ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:206.26378059387207ms total_cost_time:206.31051063537598ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13156 prompt_cache_len:5151 prompt_cache_ratio:0.3915323806628154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 +DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:50 [batch.py:51] router release req id 8 +INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10890460014343262 s +INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.11081171035766602 s +DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=187756592006130963605720131132015763847, time:1750768430.747106s req_ids:[8] +DEBUG 06-24 20:33:50 [manager.py:391] +ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:208.99653434753418ms total_cost_time:209.04040336608887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13157 prompt_cache_len:5151 prompt_cache_ratio:0.3915026221783081 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 +DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:50 [batch.py:51] router release req id 8 +INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10941481590270996 s +INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.1105659008026123 s +DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=166132745062119871562775991070301520271, time:1750768430.9627264s req_ids:[8] +DEBUG 06-24 20:33:50 [manager.py:391] +DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:212.65935897827148ms total_cost_time:212.70442008972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13158 prompt_cache_len:5151 prompt_cache_ratio:0.39147286821705424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 +DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:51 [batch.py:51] router release req id 8 +INFO 06-24 20:33:51 [manager.py:224] router recive req id 8 cost time 0.1079254150390625 s +INFO 06-24 20:33:51 [manager.py:68] detokenization recv req id 8 cost time 0.10987186431884766 s +DEBUG 06-24 20:33:51 [manager.py:391] Prefill Batch: batch_id=27450959189400297889441126868468551917, time:1750768431.1807299s req_ids:[8] +DEBUG 06-24 20:33:51 [manager.py:391] +ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:212.49842643737793ms total_cost_time:212.54205703735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13159 prompt_cache_len:5151 prompt_cache_ratio:0.3914431187780226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 +DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:51 [batch.py:51] router release req id 8 +INFO 06-24 20:33:51 [manager.py:224] router recive req id 8 cost time 0.3110380172729492 s +INFO 06-24 20:33:51 [manager.py:68] detokenization recv req id 8 cost time 0.31278109550476074 s +DEBUG 06-24 20:33:51 [manager.py:391] Prefill Batch: batch_id=312288499414900592727716544806364054101, time:1750768431.616931s req_ids:[8] +DEBUG 06-24 20:33:51 [manager.py:391] +ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:440.08421897888184ms total_cost_time:440.1280879974365ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13160 prompt_cache_len:5151 prompt_cache_ratio:0.3914133738601824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 +DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:51 [batch.py:51] router release req id 8 +INFO 06-24 20:33:51 [manager.py:224] router recive req id 8 cost time 0.10962677001953125 s +INFO 06-24 20:33:51 [manager.py:68] detokenization recv req id 8 cost time 0.11153125762939453 s +DEBUG 06-24 20:33:51 [manager.py:391] Prefill Batch: batch_id=248303836053384664579844288582413782517, time:1750768431.846237s req_ids:[8] +DEBUG 06-24 20:33:51 [manager.py:391] +ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:211.81035041809082ms total_cost_time:211.8399143218994ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:13161 prompt_cache_len:5151 prompt_cache_ratio:0.3913836334625028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 +DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:51 [batch.py:51] router release req id 8 +INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.10804629325866699 s +INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.10910272598266602 s +DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=152187104299542735941176055708392711906, time:1750768432.0657094s req_ids:[8] +DEBUG 06-24 20:33:52 [manager.py:391] +ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:214.1108512878418ms total_cost_time:214.18261528015137ms,out_token_counter:1 mean_per_token_cost_time: 0.07176399230957031ms prompt_token_num:13162 prompt_cache_len:5151 prompt_cache_ratio:0.3913538975839538 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 +DEBUG 06-24 20:33:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:52 [batch.py:51] router release req id 8 +INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.10903573036193848 s +INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.11102533340454102 s +DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=106151674992362422390779297338203392687, time:1750768432.2852895s req_ids:[8] +DEBUG 06-24 20:33:52 [manager.py:391] +ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:208.18662643432617ms total_cost_time:208.23287963867188ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13163 prompt_cache_len:5151 prompt_cache_ratio:0.3913241662235053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 +DEBUG 06-24 20:33:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:52 [batch.py:51] router release req id 8 +INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.10899734497070312 s +INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.1110086441040039 s +DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=98143130093823317670466518579096732868, time:1750768432.5020652s req_ids:[8] +DEBUG 06-24 20:33:52 [manager.py:391] +DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:209.61785316467285ms total_cost_time:209.6400260925293ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:13164 prompt_cache_len:5151 prompt_cache_ratio:0.39129443938012765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 +DEBUG 06-24 20:33:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:52 [batch.py:51] router release req id 8 +INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.1062767505645752 s +INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.10819888114929199 s +DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=133363582994201654239667870520594515129, time:1750768432.724929s req_ids:[8] +DEBUG 06-24 20:33:52 [manager.py:391] +ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:224.16257858276367ms total_cost_time:224.20787811279297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13165 prompt_cache_len:5151 prompt_cache_ratio:0.3912647170527915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 +DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:53 [batch.py:51] router release req id 8 +INFO 06-24 20:33:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.3104238510131836 s +INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.3118631839752197 s +DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=181217785764845566239292751026674827305, time:1750768433.162158s req_ids:[8] +DEBUG 06-24 20:33:53 [manager.py:391] +INFO 06-24 20:33:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:33:53 [statics_utils.py:24] mean first cost: 230.39619293099616 ms +INFO 06-24 20:33:53 [statics_utils.py:24] mean per token cost: 0.06011501844902964 ms +ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:441.92028045654297ms total_cost_time:441.96557998657227ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13166 prompt_cache_len:5151 prompt_cache_ratio:0.3912349992404679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 +DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:53 [batch.py:51] router release req id 8 +INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.1094059944152832 s +INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s +DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=247551491489587095815915939284519681815, time:1750768433.3963976s req_ids:[8] +DEBUG 06-24 20:33:53 [manager.py:391] +ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:218.0335521697998ms total_cost_time:218.0793285369873ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13167 prompt_cache_len:5151 prompt_cache_ratio:0.39120528594212806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 +DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:53 [batch.py:51] router release req id 8 +INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.10792970657348633 s +INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.10994148254394531 s +DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=117213379454645645519713659590928378133, time:1750768433.6180952s req_ids:[8] +DEBUG 06-24 20:33:53 [manager.py:391] +ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:205.2023410797119ms total_cost_time:205.2459716796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13168 prompt_cache_len:5151 prompt_cache_ratio:0.3911755771567436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 +DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:53 [batch.py:51] router release req id 8 +INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.10836362838745117 s +INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.11037015914916992 s +DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=324993064159456080911211463287209798369, time:1750768433.832221s req_ids:[8] +DEBUG 06-24 20:33:53 [manager.py:391] +ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:211.78817749023438ms total_cost_time:211.83276176452637ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13169 prompt_cache_len:5151 prompt_cache_ratio:0.3911458728832865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 +DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:53 [batch.py:51] router release req id 8 +INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.10913968086242676 s +INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.11086893081665039 s +DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=82729415078469742769646503238360575526, time:1750768434.0481393s req_ids:[8] +DEBUG 06-24 20:33:54 [manager.py:391] +DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:173.74920845031738ms total_cost_time:173.79450798034668ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13170 prompt_cache_len:5151 prompt_cache_ratio:0.39111617312072894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 +DEBUG 06-24 20:33:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:54 [batch.py:51] router release req id 8 +INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.1078946590423584 s +INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974001884460449 s +DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=316400296747290277298267288365268640272, time:1750768434.229176s req_ids:[8] +DEBUG 06-24 20:33:54 [manager.py:391] +ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:200.11472702026367ms total_cost_time:200.15907287597656ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13171 prompt_cache_len:5151 prompt_cache_ratio:0.3910864778680434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 +DEBUG 06-24 20:33:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:54 [batch.py:51] router release req id 8 +INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.31023216247558594 s +INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.3113899230957031 s +DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=118851558604832335514498913705495319104, time:1750768434.6477134s req_ids:[8] +DEBUG 06-24 20:33:54 [manager.py:391] +ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:433.779239654541ms total_cost_time:433.8250160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13172 prompt_cache_len:5151 prompt_cache_ratio:0.39105678712420283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 +DEBUG 06-24 20:33:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:54 [batch.py:51] router release req id 8 +INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.10837388038635254 s +INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.11031603813171387 s +DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=24036374268205780726137780491025842491, time:1750768434.8822956s req_ids:[8] +DEBUG 06-24 20:33:54 [manager.py:391] +ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:221.31061553955078ms total_cost_time:221.35663032531738ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13173 prompt_cache_len:5151 prompt_cache_ratio:0.3910271008881804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 +DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:55 [batch.py:51] router release req id 8 +INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10839653015136719 s +INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.1102755069732666 s +DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=70422377231592748046638769907519076678, time:1750768435.1057937s req_ids:[8] +DEBUG 06-24 20:33:55 [manager.py:391] +ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:213.30642700195312ms total_cost_time:213.35268020629883ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13174 prompt_cache_len:5151 prompt_cache_ratio:0.3909974191589494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 +DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:55 [batch.py:51] router release req id 8 +INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s +INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.10976052284240723 s +DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=244128714594556216244954003238197054386, time:1750768435.3243606s req_ids:[8] +DEBUG 06-24 20:33:55 [manager.py:391] +ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:211.36069297790527ms total_cost_time:211.40575408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13175 prompt_cache_len:5151 prompt_cache_ratio:0.3909677419354839 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 +DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:55 [batch.py:51] router release req id 8 +INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10777139663696289 s +INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.10982108116149902 s +DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=10263111673548856630867399498336728017, time:1750768435.540788s req_ids:[8] +DEBUG 06-24 20:33:55 [manager.py:391] +DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:214.68853950500488ms total_cost_time:214.7359848022461ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13176 prompt_cache_len:5151 prompt_cache_ratio:0.3909380692167577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 +DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:55 [batch.py:51] router release req id 8 +INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10795855522155762 s +INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.10988664627075195 s +DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=136075030187177610743829637756159298219, time:1750768435.7636127s req_ids:[8] +DEBUG 06-24 20:33:55 [manager.py:391] +ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:213.9413356781006ms total_cost_time:213.98377418518066ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13177 prompt_cache_len:5151 prompt_cache_ratio:0.39090840100174545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 +DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:56 [batch.py:51] router release req id 8 +INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.31020641326904297 s +INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.31221747398376465 s +DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=182577671402752600091128810617392061113, time:1750768436.1948225s req_ids:[8] +DEBUG 06-24 20:33:56 [manager.py:391] +ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:429.09860610961914ms total_cost_time:429.14271354675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13178 prompt_cache_len:5151 prompt_cache_ratio:0.39087873728942174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 +DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:56 [batch.py:51] router release req id 8 +INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.10897088050842285 s +INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.11095118522644043 s +DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=115028498028757378640807879991877252180, time:1750768436.4182608s req_ids:[8] +DEBUG 06-24 20:33:56 [manager.py:391] +ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:214.48183059692383ms total_cost_time:214.5249843597412ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13179 prompt_cache_len:5151 prompt_cache_ratio:0.3908490780787617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 +DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:56 [batch.py:51] router release req id 8 +INFO 06-24 20:33:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.10796833038330078 s +INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.1090703010559082 s +DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=49563928374649874744593749415464691651, time:1750768436.6397333s req_ids:[8] +DEBUG 06-24 20:33:56 [manager.py:391] +ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:215.956449508667ms total_cost_time:216.00008010864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13180 prompt_cache_len:5151 prompt_cache_ratio:0.39081942336874054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 +DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:56 [batch.py:51] router release req id 8 +INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.10817575454711914 s +INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.11021089553833008 s +DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=102613666747554359240784416686912000731, time:1750768436.8623939s req_ids:[8] +DEBUG 06-24 20:33:56 [manager.py:391] +ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:210.94775199890137ms total_cost_time:210.99185943603516ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13181 prompt_cache_len:5151 prompt_cache_ratio:0.39078977315833396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 +DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:56 [batch.py:51] router release req id 8 +INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10738205909729004 s +INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.10931086540222168 s +DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=12272588665411357596502634674284390345, time:1750768437.0808725s req_ids:[8] +DEBUG 06-24 20:33:57 [manager.py:391] +DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:212.1579647064209ms total_cost_time:212.2032642364502ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13182 prompt_cache_len:5151 prompt_cache_ratio:0.390760127446518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 +DEBUG 06-24 20:33:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:57 [batch.py:51] router release req id 8 +INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10815119743347168 s +INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s +DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=141629662422050505262190259199766242749, time:1750768437.3025272s req_ids:[8] +DEBUG 06-24 20:33:57 [manager.py:391] +ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 first_token_cost:206.63046836853027ms total_cost_time:206.67195320129395ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13183 prompt_cache_len:5151 prompt_cache_ratio:0.39073048623226886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 +DEBUG 06-24 20:33:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:57 [batch.py:51] router release req id 8 +INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10807013511657715 s +INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.10993599891662598 s +DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=315654540399207966221048014326005403941, time:1750768437.5229893s req_ids:[8] +DEBUG 06-24 20:33:57 [manager.py:391] +ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 first_token_cost:383.67509841918945ms total_cost_time:383.72039794921875ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13184 prompt_cache_len:5151 prompt_cache_ratio:0.39070084951456313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 +DEBUG 06-24 20:33:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:57 [batch.py:51] router release req id 8 +INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10940718650817871 s +INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.11047554016113281 s +DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=120453181282946172119381735450404362012, time:1750768437.9037576s req_ids:[8] +DEBUG 06-24 20:33:57 [manager.py:391] +DEBUG 06-24 20:33:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 51270.269 tokens/s +DEBUG 06-24 20:33:57 [stats.py:37] Avg prompt tokens throughput: 51262.482 tokens/s +DEBUG 06-24 20:33:57 [stats.py:37] Avg generate tokens throughput: 7.787 tokens/s +ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 first_token_cost:211.28034591674805ms total_cost_time:211.32349967956543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13185 prompt_cache_len:5151 prompt_cache_ratio:0.3906712172923777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 +DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:58 [batch.py:51] router release req id 8 +INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.10783219337463379 s +INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.10933089256286621 s +DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=166843003475726399469422672578994055326, time:1750768438.1223602s req_ids:[8] +DEBUG 06-24 20:33:58 [manager.py:391] +ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:214.63990211486816ms total_cost_time:214.68472480773926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13186 prompt_cache_len:5151 prompt_cache_ratio:0.3906415895646898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 +DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:58 [batch.py:51] router release req id 8 +INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.1082601547241211 s +INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.11018204689025879 s +DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=21147390825331859137541169426695240334, time:1750768438.345973s req_ids:[8] +DEBUG 06-24 20:33:58 [manager.py:391] +ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:215.20161628723145ms total_cost_time:215.24786949157715ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13187 prompt_cache_len:5151 prompt_cache_ratio:0.390611966330477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 +DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:58 [batch.py:51] router release req id 8 +INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.10885262489318848 s +INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.1109774112701416 s +DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=327533307895726290873018928449781516229, time:1750768438.56639s req_ids:[8] +DEBUG 06-24 20:33:58 [manager.py:391] +ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:215.1651382446289ms total_cost_time:215.2092456817627ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13188 prompt_cache_len:5151 prompt_cache_ratio:0.390582347588717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 +DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:58 [batch.py:51] router release req id 8 +INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.1077570915222168 s +INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.10976243019104004 s +DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=18114816391813926391071723661622747849, time:1750768438.789082s req_ids:[8] +DEBUG 06-24 20:33:58 [manager.py:391] +ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:214.32852745056152ms total_cost_time:214.37525749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13189 prompt_cache_len:5151 prompt_cache_ratio:0.390552733338388 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 +DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:58 [batch.py:51] router release req id 8 +INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.10736680030822754 s +INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.1092836856842041 s +DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=8060910753896387065178709579600886727, time:1750768439.0197086s req_ids:[8] +DEBUG 06-24 20:33:59 [manager.py:391] +ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:392.8241729736328ms total_cost_time:392.8680419921875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13190 prompt_cache_len:5151 prompt_cache_ratio:0.3905231235784685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 +DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:59 [batch.py:51] router release req id 8 +INFO 06-24 20:33:59 [manager.py:224] router recive req id 8 cost time 0.1102907657623291 s +INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.11209249496459961 s +DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=300929159348211488814591184282480055407, time:1750768439.4209375s req_ids:[8] +DEBUG 06-24 20:33:59 [manager.py:391] +ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:193.3727264404297ms total_cost_time:193.41707229614258ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13191 prompt_cache_len:5151 prompt_cache_ratio:0.3904935183079372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 +DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:59 [batch.py:51] router release req id 8 +INFO 06-24 20:33:59 [manager.py:224] router recive req id 8 cost time 0.10821914672851562 s +INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.10936307907104492 s +DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=96769123055287817514068320932268282209, time:1750768439.6073055s req_ids:[8] +DEBUG 06-24 20:33:59 [manager.py:391] +ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:209.77067947387695ms total_cost_time:209.8228931427002ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:13192 prompt_cache_len:5151 prompt_cache_ratio:0.3904639175257732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 +DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:59 [batch.py:51] router release req id 8 +INFO 06-24 20:33:59 [manager.py:224] router recive req id 8 cost time 0.10790085792541504 s +INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.10982322692871094 s +DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=75383130942573820089785275737676791179, time:1750768439.8283038s req_ids:[8] +DEBUG 06-24 20:33:59 [manager.py:391] +ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:222.63717651367188ms total_cost_time:222.69535064697266ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:13193 prompt_cache_len:5151 prompt_cache_ratio:0.3904343212309558 mtp_avg_token_per_step:1.0 +INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 +DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:33:59 [batch.py:51] router release req id 8 +INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10787582397460938 s +INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10973429679870605 s +DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=310491619352652089904216655661690453672, time:1750768440.0572007s req_ids:[8] +DEBUG 06-24 20:34:00 [manager.py:391] +ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:184.42893028259277ms total_cost_time:184.47470664978027ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13194 prompt_cache_len:5151 prompt_cache_ratio:0.39040472942246474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 +DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:00 [batch.py:51] router release req id 8 +INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10796928405761719 s +INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10914921760559082 s +DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=163187536737577778575506724070493226392, time:1750768440.2468464s req_ids:[8] +DEBUG 06-24 20:34:00 [manager.py:391] +ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:212.16869354248047ms total_cost_time:212.21446990966797ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13195 prompt_cache_len:5151 prompt_cache_ratio:0.39037514209928004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 +DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:00 [batch.py:51] router release req id 8 +INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10725045204162598 s +INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10908842086791992 s +DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=314241326447672026146963457042004803187, time:1750768440.4628708s req_ids:[8] +DEBUG 06-24 20:34:00 [manager.py:391] +ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:389.7740840911865ms total_cost_time:389.8191452026367ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13196 prompt_cache_len:5151 prompt_cache_ratio:0.39034555926038195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 +DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:00 [batch.py:51] router release req id 8 +INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s +INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10971736907958984 s +DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=329298236837561679479934913208737346445, time:1750768440.859303s req_ids:[8] +DEBUG 06-24 20:34:00 [manager.py:391] +ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:217.97823905944824ms total_cost_time:218.0345058441162ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:13197 prompt_cache_len:5151 prompt_cache_ratio:0.39031598090475106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 +DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:00 [batch.py:51] router release req id 8 +INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.1083669662475586 s +INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11031818389892578 s +DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=284155947081109980376749904396892847037, time:1750768441.083173s req_ids:[8] +DEBUG 06-24 20:34:01 [manager.py:391] +ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:216.01104736328125ms total_cost_time:216.05515480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13198 prompt_cache_len:5151 prompt_cache_ratio:0.3902864070313684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 +DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:01 [batch.py:51] router release req id 8 +INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s +INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11022281646728516 s +DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=84283677724473920741460703560474537174, time:1750768441.3058605s req_ids:[8] +DEBUG 06-24 20:34:01 [manager.py:391] +ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:212.99481391906738ms total_cost_time:213.05012702941895ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:13199 prompt_cache_len:5151 prompt_cache_ratio:0.3902568376392151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 +DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:01 [batch.py:51] router release req id 8 +INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.10829997062683105 s +INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11025238037109375 s +DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=225189486158240959279566138390004322837, time:1750768441.5250845s req_ids:[8] +DEBUG 06-24 20:34:01 [manager.py:391] +ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:208.2829475402832ms total_cost_time:208.34040641784668ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:13200 prompt_cache_len:5151 prompt_cache_ratio:0.3902272727272727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 +DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:01 [batch.py:51] router release req id 8 +INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.10892295837402344 s +INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s +DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=235982416363979630286252794857947961221, time:1750768441.7404175s req_ids:[8] +DEBUG 06-24 20:34:01 [manager.py:391] +ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:214.15042877197266ms total_cost_time:214.19405937194824ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13201 prompt_cache_len:5151 prompt_cache_ratio:0.39019771229452316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 +DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:01 [batch.py:51] router release req id 8 +INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.1095423698425293 s +INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11172008514404297 s +DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=264451072055428580316343265900257533137, time:1750768441.9613812s req_ids:[8] +DEBUG 06-24 20:34:01 [manager.py:391] +ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:390.8357620239258ms total_cost_time:390.8801078796387ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13202 prompt_cache_len:5151 prompt_cache_ratio:0.3901681563399485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 +DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:02 [batch.py:51] router release req id 8 +INFO 06-24 20:34:02 [manager.py:224] router recive req id 8 cost time 0.10834264755249023 s +INFO 06-24 20:34:02 [manager.py:68] detokenization recv req id 8 cost time 0.11036849021911621 s +DEBUG 06-24 20:34:02 [manager.py:391] Prefill Batch: batch_id=289139235998981889836922926926807451065, time:1750768442.3585443s req_ids:[8] +DEBUG 06-24 20:34:02 [manager.py:391] +ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:214.05982971191406ms total_cost_time:214.10250663757324ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13203 prompt_cache_len:5151 prompt_cache_ratio:0.39013860486253127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 +DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:02 [batch.py:51] router release req id 8 +INFO 06-24 20:34:02 [manager.py:224] router recive req id 8 cost time 0.1079702377319336 s +INFO 06-24 20:34:02 [manager.py:68] detokenization recv req id 8 cost time 0.10997653007507324 s +DEBUG 06-24 20:34:02 [manager.py:391] Prefill Batch: batch_id=279489038695631452720626208866594998264, time:1750768442.5884283s req_ids:[8] +DEBUG 06-24 20:34:02 [manager.py:391] +ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:227.63800621032715ms total_cost_time:227.68568992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:13204 prompt_cache_len:5151 prompt_cache_ratio:0.39010905786125416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 +DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:02 [batch.py:51] router release req id 8 +INFO 06-24 20:34:02 [manager.py:224] router recive req id 8 cost time 0.10740780830383301 s +INFO 06-24 20:34:02 [manager.py:68] detokenization recv req id 8 cost time 0.10931086540222168 s +DEBUG 06-24 20:34:02 [manager.py:391] Prefill Batch: batch_id=238772383955670554054154398247459430029, time:1750768442.814802s req_ids:[8] +DEBUG 06-24 20:34:02 [manager.py:391] +ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:210.97087860107422ms total_cost_time:211.01641654968262ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13205 prompt_cache_len:5151 prompt_cache_ratio:0.3900795153351003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 +DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:02 [batch.py:51] router release req id 8 +INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10850071907043457 s +INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.11095714569091797 s +DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=279611988909679756842796077914609622358, time:1750768443.030513s req_ids:[8] +DEBUG 06-24 20:34:03 [manager.py:391] +ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:214.59555625915527ms total_cost_time:214.64133262634277ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13206 prompt_cache_len:5151 prompt_cache_ratio:0.3900499772830532 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 +DEBUG 06-24 20:34:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:03 [batch.py:51] router release req id 8 +INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10794186592102051 s +INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.10985183715820312 s +DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=139092035442193807401632348919139980115, time:1750768443.2515635s req_ids:[8] +DEBUG 06-24 20:34:03 [manager.py:391] +DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:215.3952121734619ms total_cost_time:215.4395580291748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13207 prompt_cache_len:5151 prompt_cache_ratio:0.39002044370409633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 +DEBUG 06-24 20:34:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:03 [batch.py:51] router release req id 8 +INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10782694816589355 s +INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.10992980003356934 s +DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=105024766987536328639105699692070010940, time:1750768443.4783742s req_ids:[8] +DEBUG 06-24 20:34:03 [manager.py:391] +ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:392.59958267211914ms total_cost_time:392.64392852783203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13208 prompt_cache_len:5151 prompt_cache_ratio:0.3899909145972138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 +DEBUG 06-24 20:34:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:03 [batch.py:51] router release req id 8 +INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10864663124084473 s +INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.11053609848022461 s +DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=243630165342058371638470948210602517946, time:1750768443.8744683s req_ids:[8] +DEBUG 06-24 20:34:03 [manager.py:391] +ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:213.6518955230713ms total_cost_time:213.69409561157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13209 prompt_cache_len:5151 prompt_cache_ratio:0.38996138996138996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 +DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:04 [batch.py:51] router release req id 8 +INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10464191436767578 s +INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10653281211853027 s +DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=146833770924420724041854659593946355368, time:1750768444.0985916s req_ids:[8] +DEBUG 06-24 20:34:04 [manager.py:391] +ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:209.8829746246338ms total_cost_time:209.92636680603027ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13210 prompt_cache_len:5151 prompt_cache_ratio:0.3899318697956094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 +DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:04 [batch.py:51] router release req id 8 +INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10705900192260742 s +INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10898494720458984 s +DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=7676467345603072798185129080058320869, time:1750768444.313344s req_ids:[8] +DEBUG 06-24 20:34:04 [manager.py:391] +ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:211.08007431030273ms total_cost_time:211.13038063049316ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:13211 prompt_cache_len:5151 prompt_cache_ratio:0.38990235409885704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 +DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:04 [batch.py:51] router release req id 8 +INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10395121574401855 s +INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10638546943664551 s +DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=258328398136156433401548714798680116052, time:1750768444.5401285s req_ids:[8] +DEBUG 06-24 20:34:04 [manager.py:391] +ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:225.9054183959961ms total_cost_time:225.92759132385254ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:13212 prompt_cache_len:5151 prompt_cache_ratio:0.3898728428701181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 +DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:04 [batch.py:51] router release req id 8 +INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10408186912536621 s +INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10604500770568848 s +DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=233815980363126009679898083775430223385, time:1750768444.7615159s req_ids:[8] +DEBUG 06-24 20:34:04 [manager.py:391] +ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:217.13638305664062ms total_cost_time:217.18096733093262ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13213 prompt_cache_len:5151 prompt_cache_ratio:0.3898433361083781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 +DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:04 [batch.py:51] router release req id 8 +INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10816264152526855 s +INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10927248001098633 s +DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=195553233546468705211168149156281063761, time:1750768444.9853878s req_ids:[8] +DEBUG 06-24 20:34:04 [manager.py:391] +ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:388.9658451080322ms total_cost_time:389.0101909637451ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13214 prompt_cache_len:5151 prompt_cache_ratio:0.38981383381262297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 +DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:05 [batch.py:51] router release req id 8 +INFO 06-24 20:34:05 [manager.py:224] router recive req id 8 cost time 0.10816478729248047 s +INFO 06-24 20:34:05 [manager.py:68] detokenization recv req id 8 cost time 0.10923385620117188 s +DEBUG 06-24 20:34:05 [manager.py:391] Prefill Batch: batch_id=103574787503621450160557009011644233880, time:1750768445.3794458s req_ids:[8] +DEBUG 06-24 20:34:05 [manager.py:391] +ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:215.07644653320312ms total_cost_time:215.12079238891602ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13215 prompt_cache_len:5151 prompt_cache_ratio:0.3897843359818388 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 +DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:05 [batch.py:51] router release req id 8 +INFO 06-24 20:34:05 [manager.py:224] router recive req id 8 cost time 0.10803413391113281 s +INFO 06-24 20:34:05 [manager.py:68] detokenization recv req id 8 cost time 0.10968923568725586 s +DEBUG 06-24 20:34:05 [manager.py:391] Prefill Batch: batch_id=274591624054461904947093176956923952, time:1750768445.6024773s req_ids:[8] +DEBUG 06-24 20:34:05 [manager.py:391] +ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:211.18497848510742ms total_cost_time:211.23003959655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13216 prompt_cache_len:5151 prompt_cache_ratio:0.3897548426150121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 +DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:05 [batch.py:51] router release req id 8 +INFO 06-24 20:34:05 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s +INFO 06-24 20:34:05 [manager.py:68] detokenization recv req id 8 cost time 0.10995292663574219 s +DEBUG 06-24 20:34:05 [manager.py:391] Prefill Batch: batch_id=249680009927053678702348997142134700571, time:1750768445.8179312s req_ids:[8] +DEBUG 06-24 20:34:05 [manager.py:391] +ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:210.98661422729492ms total_cost_time:211.03191375732422ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13217 prompt_cache_len:5151 prompt_cache_ratio:0.3897253537111296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 +DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:05 [batch.py:51] router release req id 8 +INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10788726806640625 s +INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.10989046096801758 s +DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=42725730848096509198490321041235344228, time:1750768446.033306s req_ids:[8] +DEBUG 06-24 20:34:06 [manager.py:391] +ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:210.16812324523926ms total_cost_time:210.21151542663574ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13218 prompt_cache_len:5151 prompt_cache_ratio:0.3896958692691784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 +DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:06 [batch.py:51] router release req id 8 +INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10779500007629395 s +INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.10971832275390625 s +DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=163720494046865216234550839551933833781, time:1750768446.2500188s req_ids:[8] +DEBUG 06-24 20:34:06 [manager.py:391] +ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:211.91143989562988ms total_cost_time:211.95721626281738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13219 prompt_cache_len:5151 prompt_cache_ratio:0.38966638928814584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 +DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:06 [batch.py:51] router release req id 8 +INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10906076431274414 s +INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.11099863052368164 s +DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=83785725144790305575756129569376089562, time:1750768446.4685452s req_ids:[8] +DEBUG 06-24 20:34:06 [manager.py:391] +ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:383.3932876586914ms total_cost_time:383.4385871887207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13220 prompt_cache_len:5151 prompt_cache_ratio:0.38963691376701964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 +DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:06 [batch.py:51] router release req id 8 +INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s +INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.11056923866271973 s +DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=198883607274976995184759676193740069028, time:1750768446.8599784s req_ids:[8] +DEBUG 06-24 20:34:06 [manager.py:391] +ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:214.93005752563477ms total_cost_time:214.97559547424316ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13221 prompt_cache_len:5151 prompt_cache_ratio:0.38960744270478787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 +DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:06 [batch.py:51] router release req id 8 +INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.1092367172241211 s +INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.11119270324707031 s +DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=263454507220464974071885507224942903700, time:1750768447.0805004s req_ids:[8] +DEBUG 06-24 20:34:07 [manager.py:391] +ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:212.5990390777588ms total_cost_time:212.64410018920898ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13222 prompt_cache_len:5151 prompt_cache_ratio:0.38957797610043865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 +DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:07 [batch.py:51] router release req id 8 +INFO 06-24 20:34:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.10758829116821289 s +INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.109527587890625 s +DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=219517760510085441115253027328715121342, time:1750768447.3005784s req_ids:[8] +DEBUG 06-24 20:34:07 [manager.py:391] +ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:207.72767066955566ms total_cost_time:207.77368545532227ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13223 prompt_cache_len:5151 prompt_cache_ratio:0.38954851395296075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 +DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:07 [batch.py:51] router release req id 8 +INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.10835933685302734 s +INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.11026620864868164 s +DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=234789490875355752872122187491129214024, time:1750768447.514303s req_ids:[8] +DEBUG 06-24 20:34:07 [manager.py:391] +ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:208.2529067993164ms total_cost_time:208.2974910736084ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13224 prompt_cache_len:5151 prompt_cache_ratio:0.38951905626134303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 +DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:07 [batch.py:51] router release req id 8 +INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.1082754135131836 s +INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.11023569107055664 s +DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=39555193040158159345082138047775101757, time:1750768447.729522s req_ids:[8] +DEBUG 06-24 20:34:07 [manager.py:391] +ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:212.8002643585205ms total_cost_time:212.8453254699707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13225 prompt_cache_len:5151 prompt_cache_ratio:0.3894896030245747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 +DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:07 [batch.py:51] router release req id 8 +INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.1070396900177002 s +INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.10912585258483887 s +DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=184299323313781469384723318880190528684, time:1750768447.9479165s req_ids:[8] +DEBUG 06-24 20:34:07 [manager.py:391] +DEBUG 06-24 20:34:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 53915.036 tokens/s +DEBUG 06-24 20:34:07 [stats.py:37] Avg prompt tokens throughput: 53906.872 tokens/s +DEBUG 06-24 20:34:07 [stats.py:37] Avg generate tokens throughput: 8.164 tokens/s +ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:212.88323402404785ms total_cost_time:212.92901039123535ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13226 prompt_cache_len:5151 prompt_cache_ratio:0.38946015424164526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 +DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:08 [batch.py:51] router release req id 8 +INFO 06-24 20:34:08 [manager.py:224] router recive req id 8 cost time 0.3091716766357422 s +INFO 06-24 20:34:08 [manager.py:68] detokenization recv req id 8 cost time 0.31128787994384766 s +DEBUG 06-24 20:34:08 [manager.py:391] Prefill Batch: batch_id=46373217870938923404630643517946251471, time:1750768448.3755465s req_ids:[8] +DEBUG 06-24 20:34:08 [manager.py:391] +ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:423.4440326690674ms total_cost_time:423.4635829925537ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13227 prompt_cache_len:5151 prompt_cache_ratio:0.38943070991154455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 +DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:08 [batch.py:51] router release req id 8 +INFO 06-24 20:34:08 [manager.py:224] router recive req id 8 cost time 0.10635805130004883 s +INFO 06-24 20:34:08 [manager.py:68] detokenization recv req id 8 cost time 0.1082150936126709 s +DEBUG 06-24 20:34:08 [manager.py:391] Prefill Batch: batch_id=251140125680003083037019156456089144184, time:1750768448.5984719s req_ids:[8] +DEBUG 06-24 20:34:08 [manager.py:391] +ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:212.5387191772461ms total_cost_time:212.58258819580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13228 prompt_cache_len:5151 prompt_cache_ratio:0.3894012700332628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 +DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:08 [batch.py:51] router release req id 8 +INFO 06-24 20:34:08 [manager.py:224] router recive req id 8 cost time 0.10876727104187012 s +INFO 06-24 20:34:08 [manager.py:68] detokenization recv req id 8 cost time 0.11006855964660645 s +DEBUG 06-24 20:34:08 [manager.py:391] Prefill Batch: batch_id=161452614914814379958163345362000446931, time:1750768448.8171163s req_ids:[8] +DEBUG 06-24 20:34:08 [manager.py:391] +ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:207.51595497131348ms total_cost_time:207.535982131958ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:13229 prompt_cache_len:5151 prompt_cache_ratio:0.38937183460579033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 +DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:08 [batch.py:51] router release req id 8 +INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.10635089874267578 s +INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.10825347900390625 s +DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=118112901851609895715759704254974895705, time:1750768449.0313418s req_ids:[8] +DEBUG 06-24 20:34:09 [manager.py:391] +ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:212.2359275817871ms total_cost_time:212.2793197631836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13230 prompt_cache_len:5151 prompt_cache_ratio:0.3893424036281179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 +DEBUG 06-24 20:34:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:09 [batch.py:51] router release req id 8 +INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.10707664489746094 s +INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.10914421081542969 s +DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=120468380498248346051657165712279680070, time:1750768449.2475252s req_ids:[8] +DEBUG 06-24 20:34:09 [manager.py:391] +ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 first_token_cost:210.3590965270996ms total_cost_time:210.4053497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13231 prompt_cache_len:5151 prompt_cache_ratio:0.3893129770992366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 +DEBUG 06-24 20:34:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:09 [batch.py:51] router release req id 8 +INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.10642075538635254 s +INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.10836935043334961 s +DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=200551638025485648893786877360629061903, time:1750768449.4646432s req_ids:[8] +DEBUG 06-24 20:34:09 [manager.py:391] +ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 first_token_cost:215.8355712890625ms total_cost_time:215.85512161254883ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13232 prompt_cache_len:5151 prompt_cache_ratio:0.38928355501813783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 +DEBUG 06-24 20:34:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:09 [batch.py:51] router release req id 8 +INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.3081932067871094 s +INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.31020140647888184 s +DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=318577815742856077299915704347339483655, time:1750768449.8925207s req_ids:[8] +DEBUG 06-24 20:34:09 [manager.py:391] +ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 first_token_cost:429.92305755615234ms total_cost_time:429.97026443481445ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13233 prompt_cache_len:5151 prompt_cache_ratio:0.3892541373838132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 +DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:10 [batch.py:51] router release req id 8 +INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s +INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.11112141609191895 s +DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=243614932937912585918663049822957612082, time:1750768450.1205792s req_ids:[8] +DEBUG 06-24 20:34:10 [manager.py:391] +ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:215.10696411132812ms total_cost_time:215.1484489440918ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13234 prompt_cache_len:5151 prompt_cache_ratio:0.38922472419525467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 +DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:10 [batch.py:51] router release req id 8 +INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10962080955505371 s +INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.11146950721740723 s +DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=198855448052453556419136530779982547548, time:1750768450.3423676s req_ids:[8] +DEBUG 06-24 20:34:10 [manager.py:391] +ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:185.88805198669434ms total_cost_time:185.9121322631836ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:13235 prompt_cache_len:5151 prompt_cache_ratio:0.38919531545145447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 +DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:10 [batch.py:51] router release req id 8 +INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10886478424072266 s +INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.11064338684082031 s +DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=36740801380968086541778432388451787066, time:1750768450.5375404s req_ids:[8] +DEBUG 06-24 20:34:10 [manager.py:391] +ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:212.13555335998535ms total_cost_time:212.18252182006836ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13236 prompt_cache_len:5151 prompt_cache_ratio:0.38916591115140525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 +DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:10 [batch.py:51] router release req id 8 +INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.1073455810546875 s +INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.10930347442626953 s +DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=108633661379584971730230317968196283231, time:1750768450.7551122s req_ids:[8] +DEBUG 06-24 20:34:10 [manager.py:391] +ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:186.920166015625ms total_cost_time:186.9645118713379ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13237 prompt_cache_len:5151 prompt_cache_ratio:0.3891365112940999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 +DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:10 [batch.py:51] router release req id 8 +INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10855650901794434 s +INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.1104733943939209 s +DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=22022077603118999719566782930974199673, time:1750768450.9455123s req_ids:[8] +DEBUG 06-24 20:34:10 [manager.py:391] +ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:204.82826232910156ms total_cost_time:204.87546920776367ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13238 prompt_cache_len:5151 prompt_cache_ratio:0.3891071158785315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 +DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:11 [batch.py:51] router release req id 8 +INFO 06-24 20:34:11 [manager.py:224] router recive req id 8 cost time 0.3083055019378662 s +INFO 06-24 20:34:11 [manager.py:68] detokenization recv req id 8 cost time 0.3103344440460205 s +DEBUG 06-24 20:34:11 [manager.py:391] Prefill Batch: batch_id=211531202516853339139438487114156805731, time:1750768451.3624263s req_ids:[8] +DEBUG 06-24 20:34:11 [manager.py:391] +ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:425.4274368286133ms total_cost_time:425.4894256591797ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:13239 prompt_cache_len:5151 prompt_cache_ratio:0.38907772490369363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 +DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:11 [batch.py:51] router release req id 8 +INFO 06-24 20:34:11 [manager.py:224] router recive req id 8 cost time 0.10860896110534668 s +INFO 06-24 20:34:11 [manager.py:68] detokenization recv req id 8 cost time 0.11059737205505371 s +DEBUG 06-24 20:34:11 [manager.py:391] Prefill Batch: batch_id=241243108017874049579397564100711297476, time:1750768451.5885189s req_ids:[8] +DEBUG 06-24 20:34:11 [manager.py:391] +ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:211.10200881958008ms total_cost_time:211.12799644470215ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:13240 prompt_cache_len:5151 prompt_cache_ratio:0.38904833836858005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 +DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:11 [batch.py:51] router release req id 8 +INFO 06-24 20:34:11 [manager.py:224] router recive req id 8 cost time 0.10715413093566895 s +INFO 06-24 20:34:11 [manager.py:68] detokenization recv req id 8 cost time 0.10952877998352051 s +DEBUG 06-24 20:34:11 [manager.py:391] Prefill Batch: batch_id=291226933442746978209893671675267383062, time:1750768451.8247874s req_ids:[8] +DEBUG 06-24 20:34:11 [manager.py:391] +ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:232.99169540405273ms total_cost_time:233.03699493408203ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13241 prompt_cache_len:5151 prompt_cache_ratio:0.38901895627218486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 +DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:11 [batch.py:51] router release req id 8 +INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.1090400218963623 s +INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.1109919548034668 s +DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=258512013668525142684105672972523398764, time:1750768452.0475252s req_ids:[8] +DEBUG 06-24 20:34:12 [manager.py:391] +ERROR 06-24 20:34:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:214.08438682556152ms total_cost_time:214.12897109985352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13242 prompt_cache_len:5151 prompt_cache_ratio:0.3889895786135025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 +DEBUG 06-24 20:34:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:12 [batch.py:51] router release req id 8 +INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.10788512229919434 s +INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.10981607437133789 s +DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=331799649987951931148121756007328793416, time:1750768452.2686408s req_ids:[8] +DEBUG 06-24 20:34:12 [manager.py:391] +ERROR 06-24 20:34:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 first_token_cost:214.22648429870605ms total_cost_time:214.28585052490234ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:13243 prompt_cache_len:5151 prompt_cache_ratio:0.3889602053915276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 +DEBUG 06-24 20:34:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:12 [batch.py:51] router release req id 8 +INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.10881447792053223 s +INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.11067748069763184 s +DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=186435294464627031251864794284638482634, time:1750768452.4872186s req_ids:[8] +DEBUG 06-24 20:34:12 [manager.py:391] +ERROR 06-24 20:34:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 first_token_cost:207.7932357788086ms total_cost_time:207.84378051757812ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:13244 prompt_cache_len:5151 prompt_cache_ratio:0.38893083660525524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 +DEBUG 06-24 20:34:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:12 [batch.py:51] router release req id 8 +INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.3101236820220947 s +INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.31209659576416016 s +DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=325116147418699506179523504556102996398, time:1750768452.907753s req_ids:[8] +DEBUG 06-24 20:34:12 [manager.py:391] +ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 first_token_cost:424.59964752197266ms total_cost_time:424.66020584106445ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:13245 prompt_cache_len:5151 prompt_cache_ratio:0.38890147225368066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 +DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:13 [batch.py:51] router release req id 8 +INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10810995101928711 s +INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.1100761890411377 s +DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=102989688166565910432069108566755052127, time:1750768453.132869s req_ids:[8] +DEBUG 06-24 20:34:13 [manager.py:391] +ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:212.4650478363037ms total_cost_time:212.5098705291748ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13246 prompt_cache_len:5151 prompt_cache_ratio:0.3888721123357995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 +DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:13 [batch.py:51] router release req id 8 +INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10818338394165039 s +INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.11011409759521484 s +DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=87399614504325270049581010387201610128, time:1750768453.3537476s req_ids:[8] +DEBUG 06-24 20:34:13 [manager.py:391] +ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:211.0154628753662ms total_cost_time:211.0600471496582ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13247 prompt_cache_len:5151 prompt_cache_ratio:0.3888427568506077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 +DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:13 [batch.py:51] router release req id 8 +INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10909247398376465 s +INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.111328125 s +DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=298000822806958595561918345595003366155, time:1750768453.5706632s req_ids:[8] +DEBUG 06-24 20:34:13 [manager.py:391] +ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:213.58299255371094ms total_cost_time:213.62924575805664ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13248 prompt_cache_len:5151 prompt_cache_ratio:0.38881340579710144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 +DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:13 [batch.py:51] router release req id 8 +INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s +INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.11005043983459473 s +DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=188271118906736624936937767209271999810, time:1750768453.803829s req_ids:[8] +DEBUG 06-24 20:34:13 [manager.py:391] +ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:226.88913345336914ms total_cost_time:226.9432544708252ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:13249 prompt_cache_len:5151 prompt_cache_ratio:0.38878405917427733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 +DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:13 [batch.py:51] router release req id 8 +INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.10753107070922852 s +INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.10941123962402344 s +DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=38450605633933793286702506480046898816, time:1750768454.024239s req_ids:[8] +DEBUG 06-24 20:34:14 [manager.py:391] +ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:215.07978439331055ms total_cost_time:215.12246131896973ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13250 prompt_cache_len:5151 prompt_cache_ratio:0.38875471698113206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 +DEBUG 06-24 20:34:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:14 [batch.py:51] router release req id 8 +INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.3100440502166748 s +INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.3120455741882324 s +DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=45791837934041344415675037762321999949, time:1750768454.459305s req_ids:[8] +DEBUG 06-24 20:34:14 [manager.py:391] +ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:433.8359832763672ms total_cost_time:433.8812828063965ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13251 prompt_cache_len:5151 prompt_cache_ratio:0.3887253792166629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 +DEBUG 06-24 20:34:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:14 [batch.py:51] router release req id 8 +INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.10785198211669922 s +INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.10980510711669922 s +DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=46677370298677177457368165466659186438, time:1750768454.6845593s req_ids:[8] +DEBUG 06-24 20:34:14 [manager.py:391] +ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:207.14998245239258ms total_cost_time:207.19504356384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13252 prompt_cache_len:5151 prompt_cache_ratio:0.38869604587986717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 +DEBUG 06-24 20:34:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:14 [batch.py:51] router release req id 8 +INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.10792875289916992 s +INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.10988998413085938 s +DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=200130974983704445537610377830879420147, time:1750768454.8968728s req_ids:[8] +DEBUG 06-24 20:34:14 [manager.py:391] +ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:206.27522468566895ms total_cost_time:206.32171630859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13253 prompt_cache_len:5151 prompt_cache_ratio:0.3886667169697427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 +DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:15 [batch.py:51] router release req id 8 +INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.10873818397521973 s +INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.11063313484191895 s +DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=266975273867571730082195669270762003421, time:1750768455.1099677s req_ids:[8] +DEBUG 06-24 20:34:15 [manager.py:391] +ERROR 06-24 20:34:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:211.69185638427734ms total_cost_time:211.73548698425293ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13254 prompt_cache_len:5151 prompt_cache_ratio:0.38863739248528745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 +DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:15 [batch.py:51] router release req id 8 +INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.10834980010986328 s +INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.11027836799621582 s +DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=53590973655636142279206130679094312674, time:1750768455.3293796s req_ids:[8] +DEBUG 06-24 20:34:15 [manager.py:391] +ERROR 06-24 20:34:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 first_token_cost:210.13832092285156ms total_cost_time:210.18338203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13255 prompt_cache_len:5151 prompt_cache_ratio:0.38860807242549983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 +DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:15 [batch.py:51] router release req id 8 +INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.10983800888061523 s +INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.11179995536804199 s +DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=157358789157754720624812700704376536776, time:1750768455.544756s req_ids:[8] +DEBUG 06-24 20:34:15 [manager.py:391] +ERROR 06-24 20:34:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 first_token_cost:211.56668663024902ms total_cost_time:211.61198616027832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13256 prompt_cache_len:5151 prompt_cache_ratio:0.3885787567893784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 +DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:15 [batch.py:51] router release req id 8 +INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.31084156036376953 s +INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.3128471374511719 s +DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=169510668736635198190751115919660442695, time:1750768455.9733105s req_ids:[8] +DEBUG 06-24 20:34:15 [manager.py:391] +ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 first_token_cost:430.15313148498535ms total_cost_time:430.19819259643555ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13257 prompt_cache_len:5151 prompt_cache_ratio:0.38854944557592214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 +DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:16 [batch.py:51] router release req id 8 +INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.10911989212036133 s +INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.11116790771484375 s +DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=175001329038555485081906985828512312770, time:1750768456.1995966s req_ids:[8] +DEBUG 06-24 20:34:16 [manager.py:391] +ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:210.80780029296875ms total_cost_time:210.85143089294434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13258 prompt_cache_len:5151 prompt_cache_ratio:0.38852013878413033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 +DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:16 [batch.py:51] router release req id 8 +INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.1090095043182373 s +INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.11104655265808105 s +DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=118485011655941444994415549347396537918, time:1750768456.4176998s req_ids:[8] +DEBUG 06-24 20:34:16 [manager.py:391] +ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:220.46613693237305ms total_cost_time:220.51072120666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13259 prompt_cache_len:5151 prompt_cache_ratio:0.38849083641300247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 +DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:16 [batch.py:51] router release req id 8 +INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.1087350845336914 s +INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.11065125465393066 s +DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=242171465186733280877401133016360032388, time:1750768456.647364s req_ids:[8] +DEBUG 06-24 20:34:16 [manager.py:391] +ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:215.43049812316895ms total_cost_time:215.47508239746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13260 prompt_cache_len:5151 prompt_cache_ratio:0.38846153846153847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 +DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:16 [batch.py:51] router release req id 8 +INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.10754871368408203 s +INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.10941267013549805 s +DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=188760340566130875730933816874202791999, time:1750768456.8647878s req_ids:[8] +DEBUG 06-24 20:34:16 [manager.py:391] +ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:207.4289321899414ms total_cost_time:207.4735164642334ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13261 prompt_cache_len:5151 prompt_cache_ratio:0.3884322449287384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 +DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:16 [batch.py:51] router release req id 8 +INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.10903120040893555 s +INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.11088013648986816 s +DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=145793925969835500185029508474103647973, time:1750768457.079805s req_ids:[8] +DEBUG 06-24 20:34:17 [manager.py:391] +INFO 06-24 20:34:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:34:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:211.95602416992188ms total_cost_time:211.99917793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13262 prompt_cache_len:5151 prompt_cache_ratio:0.3884029558136028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 +DEBUG 06-24 20:34:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:17 [batch.py:51] router release req id 8 +INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.3107261657714844 s +INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.31269097328186035 s +DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=241579952253788886198529772088886353065, time:1750768457.5029957s req_ids:[8] +DEBUG 06-24 20:34:17 [manager.py:391] +ERROR 06-24 20:34:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 first_token_cost:424.26514625549316ms total_cost_time:424.30996894836426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13263 prompt_cache_len:5151 prompt_cache_ratio:0.3883736711151323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 +DEBUG 06-24 20:34:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:17 [batch.py:51] router release req id 8 +INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.10784769058227539 s +INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.10973834991455078 s +DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=166332961069997316229563406366437195131, time:1750768457.7280095s req_ids:[8] +DEBUG 06-24 20:34:17 [manager.py:391] +ERROR 06-24 20:34:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 first_token_cost:210.9353542327881ms total_cost_time:210.9999656677246ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:13264 prompt_cache_len:5151 prompt_cache_ratio:0.3883443908323281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 +DEBUG 06-24 20:34:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:17 [batch.py:51] router release req id 8 +INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.10676145553588867 s +INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.1086883544921875 s +DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=102123420714460829303732529876843887940, time:1750768457.9457557s req_ids:[8] +DEBUG 06-24 20:34:17 [manager.py:391] +ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:34:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 51202.947 tokens/s +DEBUG 06-24 20:34:18 [stats.py:37] Avg prompt tokens throughput: 51195.118 tokens/s +DEBUG 06-24 20:34:18 [stats.py:37] Avg generate tokens throughput: 7.829 tokens/s +INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 first_token_cost:210.2370262145996ms total_cost_time:210.28375625610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13265 prompt_cache_len:5151 prompt_cache_ratio:0.38831511496419147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 +DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:18 [batch.py:51] router release req id 8 +INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.11093950271606445 s +INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.11307239532470703 s +DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=120535169825872346994386659053694923947, time:1750768458.161289s req_ids:[8] +DEBUG 06-24 20:34:18 [manager.py:391] +ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:209.0628147125244ms total_cost_time:209.12456512451172ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:13266 prompt_cache_len:5151 prompt_cache_ratio:0.3882858435097241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 +DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:18 [batch.py:51] router release req id 8 +INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.10839390754699707 s +INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.1104116439819336 s +DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=296371073254043934100068256935941573041, time:1750768458.3781745s req_ids:[8] +DEBUG 06-24 20:34:18 [manager.py:391] +ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:212.64886856079102ms total_cost_time:212.69464492797852ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13267 prompt_cache_len:5151 prompt_cache_ratio:0.38825657646792794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 +DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:18 [batch.py:51] router release req id 8 +INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.10893988609313965 s +INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.11086869239807129 s +DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=7239540229117023935426439683894355890, time:1750768458.608143s req_ids:[8] +DEBUG 06-24 20:34:18 [manager.py:391] +ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:223.78134727478027ms total_cost_time:223.82712364196777ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13268 prompt_cache_len:5151 prompt_cache_ratio:0.38822731383780523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 +DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:18 [batch.py:51] router release req id 8 +INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s +INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.10941386222839355 s +DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=125569581273605578876022099425595575554, time:1750768458.8286712s req_ids:[8] +DEBUG 06-24 20:34:18 [manager.py:391] +ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:372.3931312561035ms total_cost_time:372.4379539489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13269 prompt_cache_len:5151 prompt_cache_ratio:0.3881980556183586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 +DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:19 [batch.py:51] router release req id 8 +INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.10891866683959961 s +INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s +DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=196853312733538741160878159512171671910, time:1750768459.2191699s req_ids:[8] +DEBUG 06-24 20:34:19 [manager.py:391] +ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:228.2869815826416ms total_cost_time:228.3318042755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13270 prompt_cache_len:5151 prompt_cache_ratio:0.3881688018085908 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 +DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:19 [batch.py:51] router release req id 8 +INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.1080317497253418 s +INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11072754859924316 s +DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=10713636262711775863304373661095809739, time:1750768459.4422355s req_ids:[8] +DEBUG 06-24 20:34:19 [manager.py:391] +ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:210.676908493042ms total_cost_time:210.7243537902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13271 prompt_cache_len:5151 prompt_cache_ratio:0.3881395524075051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 +DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:19 [batch.py:51] router release req id 8 +INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.10819315910339355 s +INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11048197746276855 s +DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=133526613819832065568593803641318707084, time:1750768459.6659849s req_ids:[8] +DEBUG 06-24 20:34:19 [manager.py:391] +ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:221.44055366516113ms total_cost_time:221.48418426513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13272 prompt_cache_len:5151 prompt_cache_ratio:0.3881103074141049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 +DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:19 [batch.py:51] router release req id 8 +INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.10815167427062988 s +INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s +DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=205076182973560144562632162393950572772, time:1750768459.8869336s req_ids:[8] +DEBUG 06-24 20:34:19 [manager.py:391] +ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:212.7854824066162ms total_cost_time:212.8317356109619ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13273 prompt_cache_len:5151 prompt_cache_ratio:0.38808106682739396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 +DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:20 [batch.py:51] router release req id 8 +INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.1091468334197998 s +INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11108231544494629 s +DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=90624290049962525220087986788150239551, time:1750768460.1060183s req_ids:[8] +DEBUG 06-24 20:34:20 [manager.py:391] +DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:212.65006065368652ms total_cost_time:212.69488334655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13274 prompt_cache_len:5151 prompt_cache_ratio:0.3880518306463764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 +DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:20 [batch.py:51] router release req id 8 +INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.10874724388122559 s +INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s +DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=254809822294636208917850740163936537849, time:1750768460.3248165s req_ids:[8] +DEBUG 06-24 20:34:20 [manager.py:391] +ERROR 06-24 20:34:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 first_token_cost:366.92309379577637ms total_cost_time:366.9748306274414ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:13275 prompt_cache_len:5151 prompt_cache_ratio:0.3880225988700565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 +DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:20 [batch.py:51] router release req id 8 +INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.10824012756347656 s +INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11012935638427734 s +DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=148740270807343105488793218786194300632, time:1750768460.6976948s req_ids:[8] +DEBUG 06-24 20:34:20 [manager.py:391] +ERROR 06-24 20:34:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 first_token_cost:206.3126564025879ms total_cost_time:206.35628700256348ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13276 prompt_cache_len:5151 prompt_cache_ratio:0.387993371497439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 +DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:20 [batch.py:51] router release req id 8 +INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.10894346237182617 s +INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11095261573791504 s +DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=8402934823824448396517828941532933856, time:1750768460.9115648s req_ids:[8] +DEBUG 06-24 20:34:20 [manager.py:391] +ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 first_token_cost:210.70265769958496ms total_cost_time:210.74676513671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13277 prompt_cache_len:5151 prompt_cache_ratio:0.3879641485275288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 +DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:21 [batch.py:51] router release req id 8 +INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.10902857780456543 s +INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.11096787452697754 s +DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=273990623829264578791656879150909851621, time:1750768461.1263576s req_ids:[8] +DEBUG 06-24 20:34:21 [manager.py:391] +ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:205.3050994873047ms total_cost_time:205.34944534301758ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13278 prompt_cache_len:5151 prompt_cache_ratio:0.3879349299593312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 +DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:21 [batch.py:51] router release req id 8 +INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.10933852195739746 s +INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.11122727394104004 s +DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=65815710773719609410980035895899554937, time:1750768461.3397424s req_ids:[8] +DEBUG 06-24 20:34:21 [manager.py:391] +ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:212.06092834472656ms total_cost_time:212.10598945617676ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13279 prompt_cache_len:5151 prompt_cache_ratio:0.3879057157918518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 +DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:21 [batch.py:51] router release req id 8 +INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.10789322853088379 s +INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.1092844009399414 s +DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=148550624891387866998124830538178345052, time:1750768461.5679934s req_ids:[8] +DEBUG 06-24 20:34:21 [manager.py:391] +ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:225.25668144226074ms total_cost_time:225.30078887939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13280 prompt_cache_len:5151 prompt_cache_ratio:0.3878765060240964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 +DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:21 [batch.py:51] router release req id 8 +INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.1089162826538086 s +INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.11092138290405273 s +DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=16388523261150798952872751257320914812, time:1750768461.788956s req_ids:[8] +DEBUG 06-24 20:34:21 [manager.py:391] +ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:375.5645751953125ms total_cost_time:375.6096363067627ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13281 prompt_cache_len:5151 prompt_cache_ratio:0.38784730065507117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 +DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:22 [batch.py:51] router release req id 8 +INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.10930538177490234 s +INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.11125707626342773 s +DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=173021871920425771490938210620239080416, time:1750768462.1718419s req_ids:[8] +DEBUG 06-24 20:34:22 [manager.py:391] +ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:215.46602249145508ms total_cost_time:215.51966667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:13282 prompt_cache_len:5151 prompt_cache_ratio:0.38781809968378256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 +DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:22 [batch.py:51] router release req id 8 +INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.10857224464416504 s +INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s +DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=156730573729433535155034663631565733010, time:1750768462.3928545s req_ids:[8] +DEBUG 06-24 20:34:22 [manager.py:391] +ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:215.64531326293945ms total_cost_time:215.68918228149414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13283 prompt_cache_len:5151 prompt_cache_ratio:0.38778890310923736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 +DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:22 [batch.py:51] router release req id 8 +INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.1085507869720459 s +INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.11040997505187988 s +DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=292067452700095304807127191607224196640, time:1750768462.6133761s req_ids:[8] +DEBUG 06-24 20:34:22 [manager.py:391] +ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:215.00587463378906ms total_cost_time:215.05093574523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13284 prompt_cache_len:5151 prompt_cache_ratio:0.38775971093044265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 +DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:22 [batch.py:51] router release req id 8 +INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.10807418823242188 s +INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s +DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=71670645124085006351587599963223941469, time:1750768462.8327165s req_ids:[8] +DEBUG 06-24 20:34:22 [manager.py:391] +ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:211.34257316589355ms total_cost_time:211.38644218444824ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13285 prompt_cache_len:5151 prompt_cache_ratio:0.3877305231464057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 +DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:22 [batch.py:51] router release req id 8 +INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.10962367057800293 s +INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.11159825325012207 s +DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=72111479130366341188840038903881089170, time:1750768463.0500581s req_ids:[8] +DEBUG 06-24 20:34:23 [manager.py:391] +ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:212.61048316955566ms total_cost_time:212.65602111816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13286 prompt_cache_len:5151 prompt_cache_ratio:0.3877013397561343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 +DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:23 [batch.py:51] router release req id 8 +INFO 06-24 20:34:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:34:23 [statics_utils.py:24] mean first cost: 230.58410481556828 ms +INFO 06-24 20:34:23 [statics_utils.py:24] mean per token cost: 0.05991084329516538 ms +INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.1098489761352539 s +INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.11172628402709961 s +DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=233545567565794336495777260557776101137, time:1750768463.2685475s req_ids:[8] +DEBUG 06-24 20:34:23 [manager.py:391] +ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:378.49950790405273ms total_cost_time:378.5429000854492ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13287 prompt_cache_len:5151 prompt_cache_ratio:0.38767216075863625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 +DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:23 [batch.py:51] router release req id 8 +INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.1085810661315918 s +INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.11051774024963379 s +DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=31955860154495250317025861510384600977, time:1750768463.654397s req_ids:[8] +DEBUG 06-24 20:34:23 [manager.py:391] +ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:213.3502960205078ms total_cost_time:213.3934497833252ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13288 prompt_cache_len:5151 prompt_cache_ratio:0.3876429861529199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 +DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:23 [batch.py:51] router release req id 8 +INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.10969710350036621 s +INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.1124885082244873 s +DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=312961115159564624565410046022961117838, time:1750768463.8757033s req_ids:[8] +DEBUG 06-24 20:34:23 [manager.py:391] +ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:177.00576782226562ms total_cost_time:177.04129219055176ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:13289 prompt_cache_len:5151 prompt_cache_ratio:0.3876138159379938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 +DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:23 [batch.py:51] router release req id 8 +INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.10633683204650879 s +INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.10828614234924316 s +DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=169023053375662135927991861485762240341, time:1750768464.059542s req_ids:[8] +DEBUG 06-24 20:34:24 [manager.py:391] +ERROR 06-24 20:34:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:205.59954643249512ms total_cost_time:205.66177368164062ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:13290 prompt_cache_len:5151 prompt_cache_ratio:0.3875846501128668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 +DEBUG 06-24 20:34:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:24 [batch.py:51] router release req id 8 +INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.10880851745605469 s +INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.11065125465393066 s +DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=250644729775404607827618610388061703915, time:1750768464.2764766s req_ids:[8] +DEBUG 06-24 20:34:24 [manager.py:391] +ERROR 06-24 20:34:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 first_token_cost:217.14234352111816ms total_cost_time:217.20337867736816ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:13291 prompt_cache_len:5151 prompt_cache_ratio:0.38755548867654804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 +DEBUG 06-24 20:34:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:24 [batch.py:51] router release req id 8 +INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.10802507400512695 s +INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.11016964912414551 s +DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=84179603738064476711100331714633263803, time:1750768464.4989555s req_ids:[8] +DEBUG 06-24 20:34:24 [manager.py:391] +ERROR 06-24 20:34:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 first_token_cost:228.07812690734863ms total_cost_time:228.12271118164062ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13292 prompt_cache_len:5151 prompt_cache_ratio:0.38752633162804695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 +DEBUG 06-24 20:34:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:24 [batch.py:51] router release req id 8 +DEBUG 06-24 20:34:24 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:24 [manager.py:283] +DEBUG 06-24 20:34:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:24 [manager.py:284] +INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.11007046699523926 s +INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.1115565299987793 s +DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=307325463537186754980394037393650943084, time:1750768464.7592304s req_ids:[8] +DEBUG 06-24 20:34:24 [manager.py:391] +ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 first_token_cost:466.34769439697266ms total_cost_time:466.4037227630615ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:13293 prompt_cache_len:5151 prompt_cache_ratio:0.38749717896637326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 +DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:25 [batch.py:51] router release req id 8 +INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.10772943496704102 s +INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.10861945152282715 s +DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=154819498831080952472993432894442586475, time:1750768465.2060678s req_ids:[8] +DEBUG 06-24 20:34:25 [manager.py:391] +ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:170.42112350463867ms total_cost_time:170.46427726745605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13294 prompt_cache_len:5151 prompt_cache_ratio:0.3874680306905371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 +DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:25 [batch.py:51] router release req id 8 +INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.1110997200012207 s +INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.11200904846191406 s +DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=18683441701285720761114593535369755846, time:1750768465.3891943s req_ids:[8] +DEBUG 06-24 20:34:25 [manager.py:391] +ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:202.44240760803223ms total_cost_time:202.48794555664062ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13295 prompt_cache_len:5151 prompt_cache_ratio:0.3874388867995487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 +DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:25 [batch.py:51] router release req id 8 +INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.11037397384643555 s +INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.11228799819946289 s +DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=210340228890201960354285036983104527716, time:1750768465.5955987s req_ids:[8] +DEBUG 06-24 20:34:25 [manager.py:391] +ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:203.40228080749512ms total_cost_time:203.44805717468262ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13296 prompt_cache_len:5151 prompt_cache_ratio:0.3874097472924188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 +DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:25 [batch.py:51] router release req id 8 +INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.10828280448913574 s +INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.10965561866760254 s +DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=88391913485953548656751923497091166671, time:1750768465.8004942s req_ids:[8] +DEBUG 06-24 20:34:25 [manager.py:391] +ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:214.75958824157715ms total_cost_time:214.80464935302734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13297 prompt_cache_len:5151 prompt_cache_ratio:0.3873806121681582 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 +DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:25 [batch.py:51] router release req id 8 +INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.10849332809448242 s +INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.11034369468688965 s +DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=6474796628223588193311580772322194550, time:1750768466.0297804s req_ids:[8] +DEBUG 06-24 20:34:26 [manager.py:391] +ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:217.6821231842041ms total_cost_time:217.726469039917ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13298 prompt_cache_len:5151 prompt_cache_ratio:0.3873514814257783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 +DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:26 [batch.py:51] router release req id 8 +INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.10782217979431152 s +INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.10870933532714844 s +DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=180004943271904922887973549141665397739, time:1750768466.2453258s req_ids:[8] +DEBUG 06-24 20:34:26 [manager.py:391] +DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:392.26317405700684ms total_cost_time:392.3068046569824ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13299 prompt_cache_len:5151 prompt_cache_ratio:0.3873223550642905 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 +DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:26 [batch.py:51] router release req id 8 +INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.11045694351196289 s +INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.11250662803649902 s +DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=167191504617354064838701441169576887811, time:1750768466.6451428s req_ids:[8] +DEBUG 06-24 20:34:26 [manager.py:391] +ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:170.2871322631836ms total_cost_time:170.32980918884277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13300 prompt_cache_len:5151 prompt_cache_ratio:0.38729323308270674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 +DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:26 [batch.py:51] router release req id 8 +INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.10656547546386719 s +INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.10752606391906738 s +DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=4150418313255882979906727803797572546, time:1750768466.822096s req_ids:[8] +DEBUG 06-24 20:34:26 [manager.py:391] +ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:190.5677318572998ms total_cost_time:190.6120777130127ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13301 prompt_cache_len:5151 prompt_cache_ratio:0.3872641154800391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 +DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:26 [batch.py:51] router release req id 8 +INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s +INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.10955810546875 s +DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=25226106773223706676151593907397903370, time:1750768467.0179162s req_ids:[8] +DEBUG 06-24 20:34:27 [manager.py:391] +ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:206.2997817993164ms total_cost_time:206.3460350036621ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13302 prompt_cache_len:5151 prompt_cache_ratio:0.38723500225529994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 +DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:27 [batch.py:51] router release req id 8 +INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.10795140266418457 s +INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.10997819900512695 s +DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=139288533445653198763325090297469919582, time:1750768467.2319744s req_ids:[8] +DEBUG 06-24 20:34:27 [manager.py:391] +ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:208.1735134124756ms total_cost_time:208.2200050354004ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13303 prompt_cache_len:5151 prompt_cache_ratio:0.38720589340750206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 +DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:27 [batch.py:51] router release req id 8 +INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.10751819610595703 s +INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.10956001281738281 s +DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=207710418998591923813689091108603901098, time:1750768467.4570308s req_ids:[8] +DEBUG 06-24 20:34:27 [manager.py:391] +ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:221.21429443359375ms total_cost_time:221.25506401062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:13304 prompt_cache_len:5151 prompt_cache_ratio:0.38717678893565843 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 +DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:27 [batch.py:51] router release req id 8 +INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.1084890365600586 s +INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s +DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=230406849562299603737191674267257501595, time:1750768467.6888814s req_ids:[8] +DEBUG 06-24 20:34:27 [manager.py:391] +ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:401.7808437347412ms total_cost_time:401.824951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13305 prompt_cache_len:5151 prompt_cache_ratio:0.3871476888387824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 +DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:27 [batch.py:51] router release req id 8 +INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s +INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.1098177433013916 s +DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=260875002897118381222453838399087032810, time:1750768468.086295s req_ids:[8] +DEBUG 06-24 20:34:28 [manager.py:391] +DEBUG 06-24 20:34:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 54222.163 tokens/s +DEBUG 06-24 20:34:28 [stats.py:37] Avg prompt tokens throughput: 54214.101 tokens/s +DEBUG 06-24 20:34:28 [stats.py:37] Avg generate tokens throughput: 8.062 tokens/s +ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:202.13603973388672ms total_cost_time:202.1796703338623ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13306 prompt_cache_len:5151 prompt_cache_ratio:0.3871185931158876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 +DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:28 [batch.py:51] router release req id 8 +INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.11082935333251953 s +INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.11268901824951172 s +DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=120413521308991042957430262681675150833, time:1750768468.296189s req_ids:[8] +DEBUG 06-24 20:34:28 [manager.py:391] +ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:209.54084396362305ms total_cost_time:209.58662033081055ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13307 prompt_cache_len:5151 prompt_cache_ratio:0.38708950176598783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 +DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:28 [batch.py:51] router release req id 8 +INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.10729336738586426 s +INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.10915064811706543 s +DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=52546145651560352482289702033017539710, time:1750768468.5087833s req_ids:[8] +DEBUG 06-24 20:34:28 [manager.py:391] +ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:193.64643096923828ms total_cost_time:193.69244575500488ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13308 prompt_cache_len:5151 prompt_cache_ratio:0.3870604147880974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 +DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:28 [batch.py:51] router release req id 8 +INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.1075582504272461 s +INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.1094818115234375 s +DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=297250776709606182053706544093270445017, time:1750768468.7184796s req_ids:[8] +DEBUG 06-24 20:34:28 [manager.py:391] +ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:217.38553047180176ms total_cost_time:217.42868423461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13309 prompt_cache_len:5151 prompt_cache_ratio:0.38703133218123076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 +DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:28 [batch.py:51] router release req id 8 +INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.10634875297546387 s +INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.10812902450561523 s +DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=76914420651271827177910290569348758417, time:1750768468.9326334s req_ids:[8] +DEBUG 06-24 20:34:28 [manager.py:391] +ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:206.09474182128906ms total_cost_time:206.13765716552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13310 prompt_cache_len:5151 prompt_cache_ratio:0.3870022539444027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 +DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:29 [batch.py:51] router release req id 8 +INFO 06-24 20:34:29 [manager.py:224] router recive req id 8 cost time 0.10902857780456543 s +INFO 06-24 20:34:29 [manager.py:68] detokenization recv req id 8 cost time 0.11095213890075684 s +DEBUG 06-24 20:34:29 [manager.py:391] Prefill Batch: batch_id=132176110688790226427969603473242056863, time:1750768469.1445546s req_ids:[8] +DEBUG 06-24 20:34:29 [manager.py:391] +ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:214.30706977844238ms total_cost_time:214.3728733062744ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:13311 prompt_cache_len:5151 prompt_cache_ratio:0.38697318007662834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 +DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:29 [batch.py:51] router release req id 8 +INFO 06-24 20:34:29 [manager.py:224] router recive req id 8 cost time 0.3097703456878662 s +INFO 06-24 20:34:29 [manager.py:68] detokenization recv req id 8 cost time 0.3117096424102783 s +DEBUG 06-24 20:34:29 [manager.py:391] Prefill Batch: batch_id=64468840685805664302130085826344435979, time:1750768469.5722933s req_ids:[8] +DEBUG 06-24 20:34:29 [manager.py:391] +ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:433.2125186920166ms total_cost_time:433.2578182220459ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13312 prompt_cache_len:5151 prompt_cache_ratio:0.3869441105769231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 +DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:29 [batch.py:51] router release req id 8 +INFO 06-24 20:34:29 [manager.py:224] router recive req id 8 cost time 0.11083197593688965 s +INFO 06-24 20:34:29 [manager.py:68] detokenization recv req id 8 cost time 0.1127469539642334 s +DEBUG 06-24 20:34:29 [manager.py:391] Prefill Batch: batch_id=222115773302504691190469052684444292411, time:1750768469.8054507s req_ids:[8] +DEBUG 06-24 20:34:29 [manager.py:391] +ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:217.5467014312744ms total_cost_time:217.59033203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13313 prompt_cache_len:5151 prompt_cache_ratio:0.38691504544430255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 +DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:29 [batch.py:51] router release req id 8 +INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.10777163505554199 s +INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.10969662666320801 s +DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=78762296779658526208716982026603417232, time:1750768470.0239766s req_ids:[8] +DEBUG 06-24 20:34:30 [manager.py:391] +ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:203.28879356384277ms total_cost_time:203.33218574523926ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13314 prompt_cache_len:5151 prompt_cache_ratio:0.3868859846777828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 +DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:30 [batch.py:51] router release req id 8 +INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.10865211486816406 s +INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.1106569766998291 s +DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=233376247430462949683822568687145730807, time:1750768470.2322357s req_ids:[8] +DEBUG 06-24 20:34:30 [manager.py:391] +ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:208.40954780578613ms total_cost_time:208.45532417297363ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13315 prompt_cache_len:5151 prompt_cache_ratio:0.38685692827638 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 +DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:30 [batch.py:51] router release req id 8 +INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.10815000534057617 s +INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.11006021499633789 s +DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=212818461445103077620678443288420431259, time:1750768470.4447634s req_ids:[8] +DEBUG 06-24 20:34:30 [manager.py:391] +ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:212.62216567993164ms total_cost_time:212.68439292907715ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:13316 prompt_cache_len:5151 prompt_cache_ratio:0.38682787623911086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 +DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:30 [batch.py:51] router release req id 8 +INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.1094675064086914 s +INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.11139535903930664 s +DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=112955103886491966992945340336954814518, time:1750768470.6648803s req_ids:[8] +DEBUG 06-24 20:34:30 [manager.py:391] +ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:216.07041358947754ms total_cost_time:216.10426902770996ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:13317 prompt_cache_len:5151 prompt_cache_ratio:0.3867988285649921 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 +DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:30 [batch.py:51] router release req id 8 +INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.3077504634857178 s +INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.3094522953033447 s +DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=177362951330501730493435488673352698706, time:1750768471.1005833s req_ids:[8] +DEBUG 06-24 20:34:31 [manager.py:391] +ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:431.23769760131836ms total_cost_time:431.38623237609863ms,out_token_counter:1 mean_per_token_cost_time: 0.14853477478027344ms prompt_token_num:13318 prompt_cache_len:5151 prompt_cache_ratio:0.386769785253041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 +DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:31 [batch.py:51] router release req id 8 +INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10986042022705078 s +INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11163091659545898 s +DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=186273207995361043360982349395967023736, time:1750768471.322489s req_ids:[8] +DEBUG 06-24 20:34:31 [manager.py:391] +ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:199.84865188598633ms total_cost_time:199.89347457885742ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13319 prompt_cache_len:5151 prompt_cache_ratio:0.38674074630227495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 +DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:31 [batch.py:51] router release req id 8 +INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10897588729858398 s +INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11088418960571289 s +DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=287692765397454993533507600837591810277, time:1750768471.530686s req_ids:[8] +DEBUG 06-24 20:34:31 [manager.py:391] +ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:210.85476875305176ms total_cost_time:210.90292930603027ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:13320 prompt_cache_len:5151 prompt_cache_ratio:0.3867117117117117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 +DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:31 [batch.py:51] router release req id 8 +INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10919833183288574 s +INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11117029190063477 s +DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=313841114746951404400302209799346825357, time:1750768471.7492657s req_ids:[8] +DEBUG 06-24 20:34:31 [manager.py:391] +ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:211.1496925354004ms total_cost_time:211.1952304840088ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13321 prompt_cache_len:5151 prompt_cache_ratio:0.38668268148036933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 +DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:31 [batch.py:51] router release req id 8 +INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10892057418823242 s +INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11085915565490723 s +DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=83191059799593439009547041928459918398, time:1750768471.9677963s req_ids:[8] +DEBUG 06-24 20:34:31 [manager.py:391] +ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:214.7378921508789ms total_cost_time:214.77007865905762ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:13322 prompt_cache_len:5151 prompt_cache_ratio:0.38665365560726617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 +DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:32 [batch.py:51] router release req id 8 +INFO 06-24 20:34:32 [manager.py:224] router recive req id 8 cost time 0.10489034652709961 s +INFO 06-24 20:34:32 [manager.py:68] detokenization recv req id 8 cost time 0.10675954818725586 s +DEBUG 06-24 20:34:32 [manager.py:391] Prefill Batch: batch_id=38674720550244322052289308377233573602, time:1750768472.1901772s req_ids:[8] +DEBUG 06-24 20:34:32 [manager.py:391] +ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:173.48241806030273ms total_cost_time:173.50172996520996ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13323 prompt_cache_len:5151 prompt_cache_ratio:0.38662463409142084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 +DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:32 [batch.py:51] router release req id 8 +INFO 06-24 20:34:32 [manager.py:224] router recive req id 8 cost time 0.30788373947143555 s +INFO 06-24 20:34:32 [manager.py:68] detokenization recv req id 8 cost time 0.30947184562683105 s +DEBUG 06-24 20:34:32 [manager.py:391] Prefill Batch: batch_id=283626753352866107258766744765601226469, time:1750768472.5827947s req_ids:[8] +DEBUG 06-24 20:34:32 [manager.py:391] +ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:438.14921379089355ms total_cost_time:438.19475173950195ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13324 prompt_cache_len:5151 prompt_cache_ratio:0.3865956169318523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 +DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:32 [batch.py:51] router release req id 8 +INFO 06-24 20:34:32 [manager.py:224] router recive req id 8 cost time 0.10838723182678223 s +INFO 06-24 20:34:32 [manager.py:68] detokenization recv req id 8 cost time 0.11035299301147461 s +DEBUG 06-24 20:34:32 [manager.py:391] Prefill Batch: batch_id=31066098368209637903489980868916124349, time:1750768472.8097446s req_ids:[8] +DEBUG 06-24 20:34:32 [manager.py:391] +ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:215.24715423583984ms total_cost_time:215.2884006500244ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13325 prompt_cache_len:5151 prompt_cache_ratio:0.38656660412757976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 +DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:32 [batch.py:51] router release req id 8 +INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.10785222053527832 s +INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.10981392860412598 s +DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=57963396626907316933452558786406332710, time:1750768473.031462s req_ids:[8] +DEBUG 06-24 20:34:33 [manager.py:391] +ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:213.93418312072754ms total_cost_time:213.97924423217773ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13326 prompt_cache_len:5151 prompt_cache_ratio:0.3865375956776227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 +DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:33 [batch.py:51] router release req id 8 +INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.1063077449798584 s +INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.1081855297088623 s +DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=311529501303489006848779245895283417599, time:1750768473.2546487s req_ids:[8] +DEBUG 06-24 20:34:33 [manager.py:391] +ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:214.2324447631836ms total_cost_time:214.25223350524902ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:13327 prompt_cache_len:5151 prompt_cache_ratio:0.386508591581001 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 +DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:33 [batch.py:51] router release req id 8 +INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.10829329490661621 s +INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.11018824577331543 s +DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=1200261701411205359864179422412459161, time:1750768473.4720697s req_ids:[8] +DEBUG 06-24 20:34:33 [manager.py:391] +ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:215.39926528930664ms total_cost_time:215.44218063354492ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13328 prompt_cache_len:5151 prompt_cache_ratio:0.3864795918367347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 +DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:33 [batch.py:51] router release req id 8 +INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.10775971412658691 s +INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.10983586311340332 s +DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=4855611440105863291696627139317182026, time:1750768473.692509s req_ids:[8] +DEBUG 06-24 20:34:33 [manager.py:391] +ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:212.9683494567871ms total_cost_time:213.0146026611328ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13329 prompt_cache_len:5151 prompt_cache_ratio:0.3864505964438443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 +DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:33 [batch.py:51] router release req id 8 +INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.3080439567565918 s +INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.3098316192626953 s +DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=226370342169505831002512095249254170742, time:1750768474.1154816s req_ids:[8] +DEBUG 06-24 20:34:34 [manager.py:391] +ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:374.1021156311035ms total_cost_time:374.12476539611816ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:13330 prompt_cache_len:5151 prompt_cache_ratio:0.38642160540135034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 +DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:34 [batch.py:51] router release req id 8 +INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.10896158218383789 s +INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.11100649833679199 s +DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=85666528927549737683625190018385706141, time:1750768474.2920663s req_ids:[8] +DEBUG 06-24 20:34:34 [manager.py:391] +ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:205.4119110107422ms total_cost_time:205.45530319213867ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13331 prompt_cache_len:5151 prompt_cache_ratio:0.38639261870827396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 +DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:34 [batch.py:51] router release req id 8 +INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.10778999328613281 s +INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.10974359512329102 s +DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=245765613459432858090719253337145659381, time:1750768474.5030286s req_ids:[8] +DEBUG 06-24 20:34:34 [manager.py:391] +ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:208.76669883728027ms total_cost_time:208.81104469299316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13332 prompt_cache_len:5151 prompt_cache_ratio:0.38636363636363635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 +DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:34 [batch.py:51] router release req id 8 +INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.10883831977844238 s +INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.11078476905822754 s +DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=221270784773791838415818188738343882422, time:1750768474.716619s req_ids:[8] +DEBUG 06-24 20:34:34 [manager.py:391] +ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:211.15350723266602ms total_cost_time:211.20643615722656ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:13333 prompt_cache_len:5151 prompt_cache_ratio:0.38633465836645914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 +DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:34 [batch.py:51] router release req id 8 +INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.11041569709777832 s +INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.11249208450317383 s +DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=108524506537588202442719109552500531368, time:1750768474.9363327s req_ids:[8] +DEBUG 06-24 20:34:34 [manager.py:391] +ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:209.17606353759766ms total_cost_time:209.22088623046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13334 prompt_cache_len:5151 prompt_cache_ratio:0.3863056847157642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 +DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:35 [batch.py:51] router release req id 8 +INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.10832047462463379 s +INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.11034154891967773 s +DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=253566143585684281961561025791925358961, time:1750768475.151598s req_ids:[8] +DEBUG 06-24 20:34:35 [manager.py:391] +ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:208.65988731384277ms total_cost_time:208.70494842529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13335 prompt_cache_len:5151 prompt_cache_ratio:0.3862767154105737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 +DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:35 [batch.py:51] router release req id 8 +INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.309093713760376 s +INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.3108856678009033 s +DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=217693437517916325722276134760620006766, time:1750768475.573553s req_ids:[8] +DEBUG 06-24 20:34:35 [manager.py:391] +ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:378.77559661865234ms total_cost_time:378.8173198699951ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13336 prompt_cache_len:5151 prompt_cache_ratio:0.38624775044991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 +DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:35 [batch.py:51] router release req id 8 +INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.11042451858520508 s +INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.11240100860595703 s +DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=189101816073897246250944532616209864485, time:1750768475.7570446s req_ids:[8] +DEBUG 06-24 20:34:35 [manager.py:391] +ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:215.70110321044922ms total_cost_time:215.7435417175293ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13337 prompt_cache_len:5151 prompt_cache_ratio:0.386218789832796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 +DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:35 [batch.py:51] router release req id 8 +INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.10810685157775879 s +INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.1101071834564209 s +DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=115903230706259062438546230178326448578, time:1750768475.9755785s req_ids:[8] +DEBUG 06-24 20:34:35 [manager.py:391] +ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:215.70968627929688ms total_cost_time:215.75284004211426ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13338 prompt_cache_len:5151 prompt_cache_ratio:0.3861898335582546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 +DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:36 [batch.py:51] router release req id 8 +INFO 06-24 20:34:36 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s +INFO 06-24 20:34:36 [manager.py:68] detokenization recv req id 8 cost time 0.10987639427185059 s +DEBUG 06-24 20:34:36 [manager.py:391] Prefill Batch: batch_id=209136065776312270129588530418305434222, time:1750768476.1979475s req_ids:[8] +DEBUG 06-24 20:34:36 [manager.py:391] +ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:218.97268295288086ms total_cost_time:219.01822090148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13339 prompt_cache_len:5151 prompt_cache_ratio:0.38616088162530926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 +DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:36 [batch.py:51] router release req id 8 +INFO 06-24 20:34:36 [manager.py:224] router recive req id 8 cost time 0.10890865325927734 s +INFO 06-24 20:34:36 [manager.py:68] detokenization recv req id 8 cost time 0.1110539436340332 s +DEBUG 06-24 20:34:36 [manager.py:391] Prefill Batch: batch_id=313544624922425589341357260154309204102, time:1750768476.4214225s req_ids:[8] +DEBUG 06-24 20:34:36 [manager.py:391] +ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:221.4982509613037ms total_cost_time:221.56190872192383ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:13340 prompt_cache_len:5151 prompt_cache_ratio:0.3861319340329835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 +DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:36 [batch.py:51] router release req id 8 +INFO 06-24 20:34:36 [manager.py:224] router recive req id 8 cost time 0.10880923271179199 s +INFO 06-24 20:34:36 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s +DEBUG 06-24 20:34:36 [manager.py:391] Prefill Batch: batch_id=11420537257713714483734054495014403587, time:1750768476.663018s req_ids:[8] +DEBUG 06-24 20:34:36 [manager.py:391] +ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:186.39588356018066ms total_cost_time:186.42807006835938ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:13341 prompt_cache_len:5151 prompt_cache_ratio:0.3861029907803013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 +DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:36 [batch.py:51] router release req id 8 +INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.3093400001525879 s +INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.3114652633666992 s +DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=17140434945485784552932443049183987870, time:1750768477.0543027s req_ids:[8] +DEBUG 06-24 20:34:37 [manager.py:391] +ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:434.89909172058105ms total_cost_time:434.94391441345215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13342 prompt_cache_len:5151 prompt_cache_ratio:0.3860740518662869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 +DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:37 [batch.py:51] router release req id 8 +INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s +INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.11041617393493652 s +DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=106815531636650980314051729832312210157, time:1750768477.2853203s req_ids:[8] +DEBUG 06-24 20:34:37 [manager.py:391] +ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:212.8608226776123ms total_cost_time:212.9073143005371ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13343 prompt_cache_len:5151 prompt_cache_ratio:0.38604511728996477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 +DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:37 [batch.py:51] router release req id 8 +INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10707211494445801 s +INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.10850071907043457 s +DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=71106554987862022251150948379219116156, time:1750768477.5108976s req_ids:[8] +DEBUG 06-24 20:34:37 [manager.py:391] +ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:226.85694694519043ms total_cost_time:226.90391540527344ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13344 prompt_cache_len:5151 prompt_cache_ratio:0.3860161870503597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 +DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:37 [batch.py:51] router release req id 8 +INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10851669311523438 s +INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.11031961441040039 s +DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=290831420833195811961788909608880577923, time:1750768477.735443s req_ids:[8] +DEBUG 06-24 20:34:37 [manager.py:391] +ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:212.4619483947754ms total_cost_time:212.50605583190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13345 prompt_cache_len:5151 prompt_cache_ratio:0.38598726114649684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 +DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:37 [batch.py:51] router release req id 8 +INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10752415657043457 s +INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.1088407039642334 s +DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=324239681030183248212379087361979416314, time:1750768477.9530537s req_ids:[8] +DEBUG 06-24 20:34:37 [manager.py:391] +ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:216.52770042419434ms total_cost_time:216.57085418701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13346 prompt_cache_len:5151 prompt_cache_ratio:0.38595833957740144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 +DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:38 [batch.py:51] router release req id 8 +INFO 06-24 20:34:38 [manager.py:224] router recive req id 8 cost time 0.10787773132324219 s +INFO 06-24 20:34:38 [manager.py:68] detokenization recv req id 8 cost time 0.10951828956604004 s +DEBUG 06-24 20:34:38 [manager.py:391] Prefill Batch: batch_id=331332565354822669348409035704888346651, time:1750768478.1827722s req_ids:[8] +DEBUG 06-24 20:34:38 [manager.py:391] +DEBUG 06-24 20:34:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 54128.242 tokens/s +DEBUG 06-24 20:34:38 [stats.py:37] Avg prompt tokens throughput: 54120.120 tokens/s +DEBUG 06-24 20:34:38 [stats.py:37] Avg generate tokens throughput: 8.122 tokens/s +ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:190.01173973083496ms total_cost_time:190.05393981933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13347 prompt_cache_len:5151 prompt_cache_ratio:0.38592942234209937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 +DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:38 [batch.py:51] router release req id 8 +INFO 06-24 20:34:38 [manager.py:224] router recive req id 8 cost time 0.3096923828125 s +INFO 06-24 20:34:38 [manager.py:68] detokenization recv req id 8 cost time 0.3115527629852295 s +DEBUG 06-24 20:34:38 [manager.py:391] Prefill Batch: batch_id=173281029204959241074708907753878159855, time:1750768478.5838084s req_ids:[8] +DEBUG 06-24 20:34:38 [manager.py:391] +ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:424.0241050720215ms total_cost_time:424.0686893463135ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13348 prompt_cache_len:5151 prompt_cache_ratio:0.3859005094396164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 +DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:38 [batch.py:51] router release req id 8 +INFO 06-24 20:34:38 [manager.py:224] router recive req id 8 cost time 0.10899090766906738 s +INFO 06-24 20:34:38 [manager.py:68] detokenization recv req id 8 cost time 0.11115622520446777 s +DEBUG 06-24 20:34:38 [manager.py:391] Prefill Batch: batch_id=149758095706223953161820532491865044321, time:1750768478.8033757s req_ids:[8] +DEBUG 06-24 20:34:38 [manager.py:391] +ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:215.10982513427734ms total_cost_time:215.15583992004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13349 prompt_cache_len:5151 prompt_cache_ratio:0.3858716008689789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 +DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:38 [batch.py:51] router release req id 8 +INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10909533500671387 s +INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.11125969886779785 s +DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=112642451137995098877388906949218499343, time:1750768479.0236616s req_ids:[8] +DEBUG 06-24 20:34:39 [manager.py:391] +ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:218.54114532470703ms total_cost_time:218.5835838317871ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13350 prompt_cache_len:5151 prompt_cache_ratio:0.3858426966292135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 +DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:39 [batch.py:51] router release req id 8 +INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10901069641113281 s +INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.11102890968322754 s +DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=94553971889609887028796371126984861256, time:1750768479.248381s req_ids:[8] +DEBUG 06-24 20:34:39 [manager.py:391] +ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:194.2894458770752ms total_cost_time:194.3340301513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13351 prompt_cache_len:5151 prompt_cache_ratio:0.38581379671934685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 +DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:39 [batch.py:51] router release req id 8 +INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10839486122131348 s +INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.11024975776672363 s +DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=120699560193738076125010600539937657644, time:1750768479.4583073s req_ids:[8] +DEBUG 06-24 20:34:39 [manager.py:391] +ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:197.97897338867188ms total_cost_time:198.00949096679688ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:13352 prompt_cache_len:5151 prompt_cache_ratio:0.38578490113840624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 +DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:39 [batch.py:51] router release req id 8 +INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10759973526000977 s +INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.10949420928955078 s +DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=210132254249584995228058122760667126574, time:1750768479.651793s req_ids:[8] +DEBUG 06-24 20:34:39 [manager.py:391] +ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:207.16428756713867ms total_cost_time:207.20887184143066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13353 prompt_cache_len:5151 prompt_cache_ratio:0.385756009885419 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 +DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:39 [batch.py:51] router release req id 8 +INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10781717300415039 s +INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.10973620414733887 s +DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=25076598888827931772203380331695581203, time:1750768479.8647318s req_ids:[8] +DEBUG 06-24 20:34:39 [manager.py:391] +ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:398.18334579467773ms total_cost_time:398.2272148132324ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13354 prompt_cache_len:5151 prompt_cache_ratio:0.3857271229594129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 +DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:40 [batch.py:51] router release req id 8 +INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s +INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.1104881763458252 s +DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=234512898968712950825306820431799288774, time:1750768480.2711134s req_ids:[8] +DEBUG 06-24 20:34:40 [manager.py:391] +ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:214.39480781555176ms total_cost_time:214.43843841552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13355 prompt_cache_len:5151 prompt_cache_ratio:0.38569824035941597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 +DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:40 [batch.py:51] router release req id 8 +INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10929059982299805 s +INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.11115026473999023 s +DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=225930369365680368003515982506502272161, time:1750768480.4901028s req_ids:[8] +DEBUG 06-24 20:34:40 [manager.py:391] +ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:213.75346183776855ms total_cost_time:213.79661560058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13356 prompt_cache_len:5151 prompt_cache_ratio:0.3856693620844564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 +DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:40 [batch.py:51] router release req id 8 +INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10780143737792969 s +INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.10964512825012207 s +DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=19551937724137746025577459700348611, time:1750768480.7127438s req_ids:[8] +DEBUG 06-24 20:34:40 [manager.py:391] +ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:210.67428588867188ms total_cost_time:210.71767807006836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13357 prompt_cache_len:5151 prompt_cache_ratio:0.38564048813356294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 +DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:40 [batch.py:51] router release req id 8 +INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10523653030395508 s +INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.1064310073852539 s +DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=147660279458457677993649765644210526217, time:1750768480.9303458s req_ids:[8] +DEBUG 06-24 20:34:40 [manager.py:391] +ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:216.57347679138184ms total_cost_time:216.61853790283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13358 prompt_cache_len:5151 prompt_cache_ratio:0.3856116185057643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 +DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:41 [batch.py:51] router release req id 8 +INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.10752081871032715 s +INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.10943460464477539 s +DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=113897170936640574098836156840897902110, time:1750768481.150117s req_ids:[8] +DEBUG 06-24 20:34:41 [manager.py:391] +ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:213.95564079284668ms total_cost_time:213.99950981140137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13359 prompt_cache_len:5151 prompt_cache_ratio:0.38558275320008983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 +DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:41 [batch.py:51] router release req id 8 +INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.1085362434387207 s +INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.11030220985412598 s +DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=214489236956257132774213700024781029756, time:1750768481.3715928s req_ids:[8] +DEBUG 06-24 20:34:41 [manager.py:391] +ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:376.41096115112305ms total_cost_time:376.45530700683594ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13360 prompt_cache_len:5151 prompt_cache_ratio:0.38555389221556885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 +DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:41 [batch.py:51] router release req id 8 +INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.10881805419921875 s +INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.11065196990966797 s +DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=8835921047092611029480533265950375655, time:1750768481.7554536s req_ids:[8] +DEBUG 06-24 20:34:41 [manager.py:391] +ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:216.67027473449707ms total_cost_time:216.71509742736816ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13361 prompt_cache_len:5151 prompt_cache_ratio:0.3855250355512312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 +DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:41 [batch.py:51] router release req id 8 +INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.10723614692687988 s +INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.10906767845153809 s +DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=239066132793640855462031916196968582913, time:1750768481.9779527s req_ids:[8] +DEBUG 06-24 20:34:41 [manager.py:391] +ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:216.0482406616211ms total_cost_time:216.09067916870117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13362 prompt_cache_len:5151 prompt_cache_ratio:0.38549618320610685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 +DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:42 [batch.py:51] router release req id 8 +INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10881710052490234 s +INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.11053228378295898 s +DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=136659964606567164286207428289112721330, time:1750768482.1993475s req_ids:[8] +DEBUG 06-24 20:34:42 [manager.py:391] +ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:216.2790298461914ms total_cost_time:216.3228988647461ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13363 prompt_cache_len:5151 prompt_cache_ratio:0.3854673351792262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 +DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:42 [batch.py:51] router release req id 8 +INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10819625854492188 s +INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.10997343063354492 s +DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=17822103476799280653539342169453412951, time:1750768482.4234436s req_ids:[8] +DEBUG 06-24 20:34:42 [manager.py:391] +ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:214.0371799468994ms total_cost_time:214.0810489654541ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13364 prompt_cache_len:5151 prompt_cache_ratio:0.3854384914696199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 +DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:42 [batch.py:51] router release req id 8 +INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s +INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.10963964462280273 s +DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=294976084089765071861026175907309643892, time:1750768482.644041s req_ids:[8] +DEBUG 06-24 20:34:42 [manager.py:391] +ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:210.83664894104004ms total_cost_time:210.88075637817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13365 prompt_cache_len:5151 prompt_cache_ratio:0.3854096520763187 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 +DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:42 [batch.py:51] router release req id 8 +INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10732412338256836 s +INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.10928559303283691 s +DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=54074300426069124929523614344130724807, time:1750768482.873249s req_ids:[8] +DEBUG 06-24 20:34:42 [manager.py:391] +DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:392.10963249206543ms total_cost_time:392.1544551849365ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13366 prompt_cache_len:5151 prompt_cache_ratio:0.385380816998354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 +DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:43 [batch.py:51] router release req id 8 +INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.10781383514404297 s +INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10903048515319824 s +DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=328480732124916339312086988816429595541, time:1750768483.2618701s req_ids:[8] +DEBUG 06-24 20:34:43 [manager.py:391] +ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:211.36140823364258ms total_cost_time:211.40360832214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13367 prompt_cache_len:5151 prompt_cache_ratio:0.38535198623475725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 +DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:43 [batch.py:51] router release req id 8 +INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.10770845413208008 s +INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10949397087097168 s +DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=215467744301864372549316803299084111319, time:1750768483.481869s req_ids:[8] +DEBUG 06-24 20:34:43 [manager.py:391] +ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:206.77781105041504ms total_cost_time:206.82454109191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13368 prompt_cache_len:5151 prompt_cache_ratio:0.38532315978456017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 +DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:43 [batch.py:51] router release req id 8 +INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.10712170600891113 s +INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10915732383728027 s +DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=301173154886609180516353902747315618699, time:1750768483.6956868s req_ids:[8] +DEBUG 06-24 20:34:43 [manager.py:391] +ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:207.41534233093262ms total_cost_time:207.45849609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13369 prompt_cache_len:5151 prompt_cache_ratio:0.3852943376467948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 +DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:43 [batch.py:51] router release req id 8 +INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.1078341007232666 s +INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10971403121948242 s +DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=131101112766855711354593546834470833590, time:1750768483.9097588s req_ids:[8] +DEBUG 06-24 20:34:43 [manager.py:391] +ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:220.719575881958ms total_cost_time:220.7643985748291ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13370 prompt_cache_len:5151 prompt_cache_ratio:0.3852655198204936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 +DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:44 [batch.py:51] router release req id 8 +INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10865592956542969 s +INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.11059260368347168 s +DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=178128336823981781214968123327841756949, time:1750768484.1565008s req_ids:[8] +DEBUG 06-24 20:34:44 [manager.py:391] +ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:219.451904296875ms total_cost_time:219.4962501525879ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13371 prompt_cache_len:5151 prompt_cache_ratio:0.38523670630468926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 +DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:44 [batch.py:51] router release req id 8 +INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10860204696655273 s +INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.11060261726379395 s +DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=218203607816499016608929607723338341039, time:1750768484.3829703s req_ids:[8] +DEBUG 06-24 20:34:44 [manager.py:391] +ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:395.14827728271484ms total_cost_time:395.19262313842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13372 prompt_cache_len:5151 prompt_cache_ratio:0.3852078970984146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 +DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:44 [batch.py:51] router release req id 8 +INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10798263549804688 s +INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s +DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=10761395316124628856995118223544310802, time:1750768484.7705824s req_ids:[8] +DEBUG 06-24 20:34:44 [manager.py:391] +ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:212.3434543609619ms total_cost_time:212.3878002166748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13373 prompt_cache_len:5151 prompt_cache_ratio:0.3851790922007029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 +DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:44 [batch.py:51] router release req id 8 +INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10755348205566406 s +INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.10945248603820801 s +DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=181453551087880698759050289217072650718, time:1750768484.9993172s req_ids:[8] +DEBUG 06-24 20:34:44 [manager.py:391] +ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:222.18561172485352ms total_cost_time:222.2304344177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13374 prompt_cache_len:5151 prompt_cache_ratio:0.3851502916105877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 +DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:45 [batch.py:51] router release req id 8 +INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10785961151123047 s +INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.10985589027404785 s +DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=78871873914617280703390596729372428416, time:1750768485.2167943s req_ids:[8] +DEBUG 06-24 20:34:45 [manager.py:391] +ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:207.82041549682617ms total_cost_time:207.86595344543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13375 prompt_cache_len:5151 prompt_cache_ratio:0.3851214953271028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 +DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:45 [batch.py:51] router release req id 8 +INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10772871971130371 s +INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.10956645011901855 s +DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=80864216105776078228325315638327534410, time:1750768485.4317555s req_ids:[8] +DEBUG 06-24 20:34:45 [manager.py:391] +ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:202.42953300476074ms total_cost_time:202.47220993041992ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13376 prompt_cache_len:5151 prompt_cache_ratio:0.3850927033492823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 +DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:45 [batch.py:51] router release req id 8 +INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10761356353759766 s +INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.1093907356262207 s +DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=3196720390525090139398010844003986168, time:1750768485.640412s req_ids:[8] +DEBUG 06-24 20:34:45 [manager.py:391] +ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:211.1198902130127ms total_cost_time:211.1673355102539ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13377 prompt_cache_len:5151 prompt_cache_ratio:0.38506391567616055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 +DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:45 [batch.py:51] router release req id 8 +INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10875940322875977 s +INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.11066412925720215 s +DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=332383843549473594965860838077509987634, time:1750768485.8567662s req_ids:[8] +DEBUG 06-24 20:34:45 [manager.py:391] +ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:400.23040771484375ms total_cost_time:400.27499198913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13378 prompt_cache_len:5151 prompt_cache_ratio:0.38503513230677233 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 +DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:46 [batch.py:51] router release req id 8 +INFO 06-24 20:34:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10843825340270996 s +INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.11026453971862793 s +DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=80612008967129460081318619814246405916, time:1750768486.262896s req_ids:[8] +DEBUG 06-24 20:34:46 [manager.py:391] +ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:219.34008598327637ms total_cost_time:219.39635276794434ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:13379 prompt_cache_len:5151 prompt_cache_ratio:0.3850063532401525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 +DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:46 [batch.py:51] router release req id 8 +INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10874199867248535 s +INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.11058878898620605 s +DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=133285603690469072710712176549780434372, time:1750768486.4913287s req_ids:[8] +DEBUG 06-24 20:34:46 [manager.py:391] +ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:209.6383571624756ms total_cost_time:209.68151092529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13380 prompt_cache_len:5151 prompt_cache_ratio:0.3849775784753363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 +DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:46 [batch.py:51] router release req id 8 +INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10864520072937012 s +INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.11070680618286133 s +DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=128977785193546127995412755900398145403, time:1750768486.7087202s req_ids:[8] +DEBUG 06-24 20:34:46 [manager.py:391] +ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:215.3773307800293ms total_cost_time:215.4226303100586ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13381 prompt_cache_len:5151 prompt_cache_ratio:0.3849488080113594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 +DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:46 [batch.py:51] router release req id 8 +INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10785245895385742 s +INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.10982203483581543 s +DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=25838559442113498713591909260482907370, time:1750768486.9297886s req_ids:[8] +DEBUG 06-24 20:34:46 [manager.py:391] +ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:215.5134677886963ms total_cost_time:215.55614471435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13382 prompt_cache_len:5151 prompt_cache_ratio:0.38492004184725753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 +DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:47 [batch.py:51] router release req id 8 +INFO 06-24 20:34:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:34:47 [manager.py:224] router recive req id 8 cost time 0.10802602767944336 s +INFO 06-24 20:34:47 [manager.py:68] detokenization recv req id 8 cost time 0.10979700088500977 s +DEBUG 06-24 20:34:47 [manager.py:391] Prefill Batch: batch_id=77967617222573395436171598603485860766, time:1750768487.153963s req_ids:[8] +DEBUG 06-24 20:34:47 [manager.py:391] +ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:217.43059158325195ms total_cost_time:217.47303009033203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13383 prompt_cache_len:5151 prompt_cache_ratio:0.3848912799820668 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 +DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:47 [batch.py:51] router release req id 8 +INFO 06-24 20:34:47 [manager.py:224] router recive req id 8 cost time 0.10860323905944824 s +INFO 06-24 20:34:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049008369445801 s +DEBUG 06-24 20:34:47 [manager.py:391] Prefill Batch: batch_id=85386356311617993124900537375221467231, time:1750768487.3998854s req_ids:[8] +DEBUG 06-24 20:34:47 [manager.py:391] +ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:404.82449531555176ms total_cost_time:404.86764907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13384 prompt_cache_len:5151 prompt_cache_ratio:0.38486252241482366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 +DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:47 [batch.py:51] router release req id 8 +INFO 06-24 20:34:47 [manager.py:224] router recive req id 8 cost time 0.10890698432922363 s +INFO 06-24 20:34:47 [manager.py:68] detokenization recv req id 8 cost time 0.11075901985168457 s +DEBUG 06-24 20:34:47 [manager.py:391] Prefill Batch: batch_id=224680239491257436096564331748170130145, time:1750768487.7882895s req_ids:[8] +DEBUG 06-24 20:34:47 [manager.py:391] +ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:216.7060375213623ms total_cost_time:216.7491912841797ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13385 prompt_cache_len:5151 prompt_cache_ratio:0.3848337691445648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 +DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:47 [batch.py:51] router release req id 8 +INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.10720586776733398 s +INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.10890579223632812 s +DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=331650535860728505633023387049439511543, time:1750768488.010263s req_ids:[8] +DEBUG 06-24 20:34:48 [manager.py:391] +ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:214.10346031188965ms total_cost_time:214.12372589111328ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:13386 prompt_cache_len:5151 prompt_cache_ratio:0.3848050201703272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 +DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:48 [batch.py:51] router release req id 8 +INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.1086578369140625 s +INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.11047482490539551 s +DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=73850442012899795058565290444905267128, time:1750768488.2313294s req_ids:[8] +DEBUG 06-24 20:34:48 [manager.py:391] +DEBUG 06-24 20:34:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 53217.585 tokens/s +DEBUG 06-24 20:34:48 [stats.py:37] Avg prompt tokens throughput: 53209.624 tokens/s +DEBUG 06-24 20:34:48 [stats.py:37] Avg generate tokens throughput: 7.961 tokens/s +ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:213.38272094726562ms total_cost_time:213.42802047729492ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13387 prompt_cache_len:5151 prompt_cache_ratio:0.3847762754911481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 +DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:48 [batch.py:51] router release req id 8 +INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.1076974868774414 s +INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.10947251319885254 s +DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=105684043258194818915788278373234206214, time:1750768488.4496644s req_ids:[8] +DEBUG 06-24 20:34:48 [manager.py:391] +ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:214.2932415008545ms total_cost_time:214.339017868042ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13388 prompt_cache_len:5151 prompt_cache_ratio:0.38474753510606513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 +DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:48 [batch.py:51] router release req id 8 +INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.10918688774108887 s +INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.11088919639587402 s +DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=21249901609506640352796930378590481986, time:1750768488.66799s req_ids:[8] +DEBUG 06-24 20:34:48 [manager.py:391] +ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:212.54587173461914ms total_cost_time:212.58902549743652ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13389 prompt_cache_len:5151 prompt_cache_ratio:0.38471879901411604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 +DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:48 [batch.py:51] router release req id 8 +INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.10800409317016602 s +INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s +DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=225064741826576108036823851514588155778, time:1750768488.8860953s req_ids:[8] +DEBUG 06-24 20:34:48 [manager.py:391] +ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:379.07910346984863ms total_cost_time:379.12583351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13390 prompt_cache_len:5151 prompt_cache_ratio:0.38469006721433907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 +DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:49 [batch.py:51] router release req id 8 +INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10940194129943848 s +INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.11123156547546387 s +DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=202425316892297402321035240591147188730, time:1750768489.2695765s req_ids:[8] +DEBUG 06-24 20:34:49 [manager.py:391] +DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:216.01533889770508ms total_cost_time:216.05944633483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13391 prompt_cache_len:5151 prompt_cache_ratio:0.38466133970577254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 +DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:49 [batch.py:51] router release req id 8 +INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10958743095397949 s +INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.1113278865814209 s +DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=194155856581267084042990071958308719059, time:1750768489.491703s req_ids:[8] +DEBUG 06-24 20:34:49 [manager.py:391] +ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:212.62097358703613ms total_cost_time:212.66508102416992ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13392 prompt_cache_len:5151 prompt_cache_ratio:0.38463261648745517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 +DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:49 [batch.py:51] router release req id 8 +INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10944676399230957 s +INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.1112220287322998 s +DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=333880246236349465030280056446050387852, time:1750768489.7111957s req_ids:[8] +DEBUG 06-24 20:34:49 [manager.py:391] +ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:216.13097190856934ms total_cost_time:216.17531776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13393 prompt_cache_len:5151 prompt_cache_ratio:0.38460389755842606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 +DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:49 [batch.py:51] router release req id 8 +INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10924887657165527 s +INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.11105608940124512 s +DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=8272239126569953909751734562855877589, time:1750768489.9477632s req_ids:[8] +DEBUG 06-24 20:34:49 [manager.py:391] +ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:234.04765129089355ms total_cost_time:234.09128189086914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13394 prompt_cache_len:5151 prompt_cache_ratio:0.38457518291772436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 +DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:50 [batch.py:51] router release req id 8 +INFO 06-24 20:34:50 [manager.py:224] router recive req id 8 cost time 0.10911870002746582 s +INFO 06-24 20:34:50 [manager.py:68] detokenization recv req id 8 cost time 0.11090564727783203 s +DEBUG 06-24 20:34:50 [manager.py:391] Prefill Batch: batch_id=216321904921803563920769676395248244351, time:1750768490.1733003s req_ids:[8] +DEBUG 06-24 20:34:50 [manager.py:391] +ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:217.057466506958ms total_cost_time:217.1003818511963ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13395 prompt_cache_len:5151 prompt_cache_ratio:0.3845464725643897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 +DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:50 [batch.py:51] router release req id 8 +INFO 06-24 20:34:50 [manager.py:224] router recive req id 8 cost time 0.10827040672302246 s +INFO 06-24 20:34:50 [manager.py:68] detokenization recv req id 8 cost time 0.10999417304992676 s +DEBUG 06-24 20:34:50 [manager.py:391] Prefill Batch: batch_id=329403301763156059926592041944175393698, time:1750768490.3958876s req_ids:[8] +DEBUG 06-24 20:34:50 [manager.py:391] +ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:219.39778327941895ms total_cost_time:219.44308280944824ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13396 prompt_cache_len:5151 prompt_cache_ratio:0.38451776649746194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 +DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:50 [batch.py:51] router release req id 8 +INFO 06-24 20:34:50 [manager.py:224] router recive req id 8 cost time 0.3101820945739746 s +INFO 06-24 20:34:50 [manager.py:68] detokenization recv req id 8 cost time 0.31214165687561035 s +DEBUG 06-24 20:34:50 [manager.py:391] Prefill Batch: batch_id=303469966273679999859715105729713351046, time:1750768490.8251936s req_ids:[8] +DEBUG 06-24 20:34:50 [manager.py:391] +DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:428.3151626586914ms total_cost_time:428.3599853515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13397 prompt_cache_len:5151 prompt_cache_ratio:0.3844890647159812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 +DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:50 [batch.py:51] router release req id 8 +INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.1092386245727539 s +INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.11118435859680176 s +DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=256523803895843155251146324507910263315, time:1750768491.0546696s req_ids:[8] +DEBUG 06-24 20:34:51 [manager.py:391] +ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:215.45052528381348ms total_cost_time:215.49654006958008ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13398 prompt_cache_len:5151 prompt_cache_ratio:0.3844603672189879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 +DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:51 [batch.py:51] router release req id 8 +INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10552573204040527 s +INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.10750961303710938 s +DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=62742609223362523268549013286715924055, time:1750768491.27785s req_ids:[8] +DEBUG 06-24 20:34:51 [manager.py:391] +ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:213.63043785095215ms total_cost_time:213.67692947387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13399 prompt_cache_len:5151 prompt_cache_ratio:0.3844316740055228 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 +DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:51 [batch.py:51] router release req id 8 +INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10798287391662598 s +INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.10988497734069824 s +DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=314520380579866381465107525524539477302, time:1750768491.4938653s req_ids:[8] +DEBUG 06-24 20:34:51 [manager.py:391] +ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:209.71155166625977ms total_cost_time:209.75565910339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13400 prompt_cache_len:5151 prompt_cache_ratio:0.3844029850746269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 +DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:51 [batch.py:51] router release req id 8 +INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10843324661254883 s +INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.1104428768157959 s +DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=170242771642733229299839873032126322609, time:1750768491.7212722s req_ids:[8] +DEBUG 06-24 20:34:51 [manager.py:391] +ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:225.5077362060547ms total_cost_time:225.55232048034668ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13401 prompt_cache_len:5151 prompt_cache_ratio:0.3843743004253414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 +DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:51 [batch.py:51] router release req id 8 +INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10933804512023926 s +INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.11116147041320801 s +DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=29562982289942915449525172571750263843, time:1750768491.9443426s req_ids:[8] +DEBUG 06-24 20:34:51 [manager.py:391] +ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:216.9663906097412ms total_cost_time:217.0100212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13402 prompt_cache_len:5151 prompt_cache_ratio:0.38434562005670797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 +DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:52 [batch.py:51] router release req id 8 +INFO 06-24 20:34:52 [manager.py:224] router recive req id 8 cost time 0.3119361400604248 s +INFO 06-24 20:34:52 [manager.py:68] detokenization recv req id 8 cost time 0.31392407417297363 s +DEBUG 06-24 20:34:52 [manager.py:391] Prefill Batch: batch_id=118301264466934108930829167159194288439, time:1750768492.3778577s req_ids:[8] +DEBUG 06-24 20:34:52 [manager.py:391] +DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:433.4700107574463ms total_cost_time:433.518648147583ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:13403 prompt_cache_len:5151 prompt_cache_ratio:0.3843169439677684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 +DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:52 [batch.py:51] router release req id 8 +INFO 06-24 20:34:52 [manager.py:224] router recive req id 8 cost time 0.10903739929199219 s +INFO 06-24 20:34:52 [manager.py:68] detokenization recv req id 8 cost time 0.11108732223510742 s +DEBUG 06-24 20:34:52 [manager.py:391] Prefill Batch: batch_id=232593348754038125678991477812919696678, time:1750768492.6091928s req_ids:[8] +DEBUG 06-24 20:34:52 [manager.py:391] +ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:216.5539264678955ms total_cost_time:216.60137176513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13404 prompt_cache_len:5151 prompt_cache_ratio:0.3842882721575649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 +DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:52 [batch.py:51] router release req id 8 +INFO 06-24 20:34:52 [manager.py:224] router recive req id 8 cost time 0.10927391052246094 s +INFO 06-24 20:34:52 [manager.py:68] detokenization recv req id 8 cost time 0.11125493049621582 s +DEBUG 06-24 20:34:52 [manager.py:391] Prefill Batch: batch_id=310829970307397273138357558489722157558, time:1750768492.8309891s req_ids:[8] +DEBUG 06-24 20:34:52 [manager.py:391] +ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:217.74888038635254ms total_cost_time:217.79441833496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13405 prompt_cache_len:5151 prompt_cache_ratio:0.38425960462513986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 +DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:52 [batch.py:51] router release req id 8 +INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.10852718353271484 s +INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.1105642318725586 s +DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=324082595468039851526245122039124194777, time:1750768493.0545287s req_ids:[8] +DEBUG 06-24 20:34:53 [manager.py:391] +ERROR 06-24 20:34:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:217.65542030334473ms total_cost_time:217.6971435546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13406 prompt_cache_len:5151 prompt_cache_ratio:0.384230941369536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 +DEBUG 06-24 20:34:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:53 [batch.py:51] router release req id 8 +INFO 06-24 20:34:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:34:53 [statics_utils.py:24] mean first cost: 230.75918787199086 ms +INFO 06-24 20:34:53 [statics_utils.py:24] mean per token cost: 0.05971222435423423 ms +INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.10805296897888184 s +INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.10942244529724121 s +DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=70643713434461362659426302924417986522, time:1750768493.2778869s req_ids:[8] +DEBUG 06-24 20:34:53 [manager.py:391] +INFO 06-24 20:34:53 [manager.py:620] left req id 8can release False refcount 4 +ERROR 06-24 20:34:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 first_token_cost:208.7545394897461ms total_cost_time:208.82058143615723ms,out_token_counter:1 mean_per_token_cost_time: 0.06604194641113281ms prompt_token_num:13407 prompt_cache_len:5151 prompt_cache_ratio:0.38420228238979637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 +DEBUG 06-24 20:34:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:53 [batch.py:51] router release req id 8 +INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.10906124114990234 s +INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.1103515625 s +DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=78302082029276430240479513965582609628, time:1750768493.503303s req_ids:[8] +DEBUG 06-24 20:34:53 [manager.py:391] +ERROR 06-24 20:34:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 first_token_cost:231.12940788269043ms total_cost_time:231.17494583129883ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13408 prompt_cache_len:5151 prompt_cache_ratio:0.3841736276849642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 +DEBUG 06-24 20:34:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:53 [batch.py:51] router release req id 8 +INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.3097701072692871 s +INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.31184911727905273 s +DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=265045116596210824940948639581007715778, time:1750768493.9338503s req_ids:[8] +DEBUG 06-24 20:34:53 [manager.py:391] +DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 first_token_cost:426.76448822021484ms total_cost_time:426.80883407592773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13409 prompt_cache_len:5151 prompt_cache_ratio:0.3841449772540831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 +DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:54 [batch.py:51] router release req id 8 +INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10769271850585938 s +INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.10960650444030762 s +DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=43213915475392955010542532166198436558, time:1750768494.162273s req_ids:[8] +DEBUG 06-24 20:34:54 [manager.py:391] +ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:212.32056617736816ms total_cost_time:212.36348152160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13410 prompt_cache_len:5151 prompt_cache_ratio:0.38411633109619686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 +DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:54 [batch.py:51] router release req id 8 +INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10771536827087402 s +INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974764823913574 s +DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=101328075228466610507661557384009922524, time:1750768494.381358s req_ids:[8] +DEBUG 06-24 20:34:54 [manager.py:391] +ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:216.35007858276367ms total_cost_time:216.39299392700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13411 prompt_cache_len:5151 prompt_cache_ratio:0.3840876892103497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 +DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:54 [batch.py:51] router release req id 8 +INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10893726348876953 s +INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.11087226867675781 s +DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=305276017090498716802864831210447967294, time:1750768494.6021066s req_ids:[8] +DEBUG 06-24 20:34:54 [manager.py:391] +ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:214.7376537322998ms total_cost_time:214.7815227508545ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13412 prompt_cache_len:5151 prompt_cache_ratio:0.38405905159558607 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 +DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:54 [batch.py:51] router release req id 8 +INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10830163955688477 s +INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.11034488677978516 s +DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=257846175379646357075680891322914062966, time:1750768494.8229942s req_ids:[8] +DEBUG 06-24 20:34:54 [manager.py:391] +ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:212.67008781433105ms total_cost_time:212.71443367004395ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13413 prompt_cache_len:5151 prompt_cache_ratio:0.3840304182509506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 +DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:54 [batch.py:51] router release req id 8 +INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.10883665084838867 s +INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.11075711250305176 s +DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=286006572879662344677523408165507611070, time:1750768495.0437355s req_ids:[8] +DEBUG 06-24 20:34:55 [manager.py:391] +ERROR 06-24 20:34:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:216.48263931274414ms total_cost_time:216.52650833129883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13414 prompt_cache_len:5151 prompt_cache_ratio:0.3840017891754883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 +DEBUG 06-24 20:34:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:55 [batch.py:51] router release req id 8 +INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.31078624725341797 s +INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.3127915859222412 s +DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=307318325363404135345259776980010555108, time:1750768495.4690287s req_ids:[8] +DEBUG 06-24 20:34:55 [manager.py:391] +DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 first_token_cost:430.3431510925293ms total_cost_time:430.3896427154541ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13415 prompt_cache_len:5151 prompt_cache_ratio:0.3839731643682445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 +DEBUG 06-24 20:34:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:55 [batch.py:51] router release req id 8 +INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.10810446739196777 s +INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.11008715629577637 s +DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=243371396566056312630200051065767580587, time:1750768495.6965475s req_ids:[8] +DEBUG 06-24 20:34:55 [manager.py:391] +ERROR 06-24 20:34:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 first_token_cost:217.3330783843994ms total_cost_time:217.3748016357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13416 prompt_cache_len:5151 prompt_cache_ratio:0.38394454382826476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 +DEBUG 06-24 20:34:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:55 [batch.py:51] router release req id 8 +INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.10902738571166992 s +INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.11096525192260742 s +DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=196484625563338102340119497964953752782, time:1750768495.9199955s req_ids:[8] +DEBUG 06-24 20:34:55 [manager.py:391] +ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 first_token_cost:217.36764907836914ms total_cost_time:217.41056442260742ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13417 prompt_cache_len:5151 prompt_cache_ratio:0.3839159275545949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 +DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:56 [batch.py:51] router release req id 8 +INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.10807919502258301 s +INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.11009716987609863 s +DEBUG 06-24 20:34:56 [manager.py:391] Prefill Batch: batch_id=85676724454533863835930567005094177020, time:1750768496.140428s req_ids:[8] +DEBUG 06-24 20:34:56 [manager.py:391] +ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:214.28561210632324ms total_cost_time:214.32852745056152ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13418 prompt_cache_len:5151 prompt_cache_ratio:0.38388731554628114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 +DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:56 [batch.py:51] router release req id 8 +INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.10826516151428223 s +INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.11038899421691895 s +DEBUG 06-24 20:34:56 [manager.py:391] Prefill Batch: batch_id=294160361309324828015034673130159350286, time:1750768496.3603203s req_ids:[8] +DEBUG 06-24 20:34:56 [manager.py:391] +ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:215.5923843383789ms total_cost_time:215.6364917755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13419 prompt_cache_len:5151 prompt_cache_ratio:0.38385870780236975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 +DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:56 [batch.py:51] router release req id 8 +INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.10834217071533203 s +INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.11037302017211914 s +DEBUG 06-24 20:34:56 [manager.py:391] Prefill Batch: batch_id=220278863604129038782964679112139531918, time:1750768496.5925498s req_ids:[8] +DEBUG 06-24 20:34:56 [manager.py:391] +ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:217.94390678405762ms total_cost_time:217.9882526397705ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13420 prompt_cache_len:5151 prompt_cache_ratio:0.3838301043219076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 +DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:34:56 [batch.py:51] router release req id 8 +INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.310762882232666 s +INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.3129255771636963 s +DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=284840831908101358143673399245226758415, time:1750768497.007654s req_ids:[8] +DEBUG 06-24 20:34:57 [manager.py:391] +DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:424.49069023132324ms total_cost_time:424.53455924987793ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13421 prompt_cache_len:5151 prompt_cache_ratio:0.38380150510394156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 +DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:57 [batch.py:51] router release req id 8 +INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.10778021812438965 s +INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.10952568054199219 s +DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=315167805053732025120146280821476307154, time:1750768497.2319005s req_ids:[8] +DEBUG 06-24 20:34:57 [manager.py:391] +ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:213.17410469055176ms total_cost_time:213.21821212768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13422 prompt_cache_len:5151 prompt_cache_ratio:0.383772910147519 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 +DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:57 [batch.py:51] router release req id 8 +INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.10835647583007812 s +INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s +DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=274168280615113909501388640978301507603, time:1750768497.4512012s req_ids:[8] +DEBUG 06-24 20:34:57 [manager.py:391] +ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:168.14923286437988ms total_cost_time:168.19071769714355ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13423 prompt_cache_len:5151 prompt_cache_ratio:0.3837443194516874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 +DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:57 [batch.py:51] router release req id 8 +INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.10849857330322266 s +INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.11072802543640137 s +DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=156084330953083719977132631775337902986, time:1750768497.627375s req_ids:[8] +DEBUG 06-24 20:34:57 [manager.py:391] +ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:201.23815536499023ms total_cost_time:201.28345489501953ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13424 prompt_cache_len:5151 prompt_cache_ratio:0.3837157330154946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 +DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:57 [batch.py:51] router release req id 8 +INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.1092526912689209 s +INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.11127448081970215 s +DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=9060830014198150643881071561705305823, time:1750768497.835077s req_ids:[8] +DEBUG 06-24 20:34:57 [manager.py:391] +ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:206.21442794799805ms total_cost_time:206.25662803649902ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13425 prompt_cache_len:5151 prompt_cache_ratio:0.3836871508379888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 +DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:57 [batch.py:51] router release req id 8 +INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.10768532752990723 s +INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.10967135429382324 s +DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=299185952340002568984787034579477555434, time:1750768498.0491061s req_ids:[8] +DEBUG 06-24 20:34:58 [manager.py:391] +ERROR 06-24 20:34:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:207.40079879760742ms total_cost_time:207.444429397583ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13426 prompt_cache_len:5151 prompt_cache_ratio:0.38365857291821837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 +DEBUG 06-24 20:34:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:58 [batch.py:51] router release req id 8 +INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.31101441383361816 s +INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.31314945220947266 s +DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=234274541729162269386060914833890464287, time:1750768498.4661531s req_ids:[8] +DEBUG 06-24 20:34:58 [manager.py:391] +DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +DEBUG 06-24 20:34:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 52406.910 tokens/s +DEBUG 06-24 20:34:58 [stats.py:37] Avg prompt tokens throughput: 52399.094 tokens/s +DEBUG 06-24 20:34:58 [stats.py:37] Avg generate tokens throughput: 7.816 tokens/s +ERROR 06-24 20:34:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 first_token_cost:429.44931983947754ms total_cost_time:429.4934272766113ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13427 prompt_cache_len:5151 prompt_cache_ratio:0.383629999255232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 +DEBUG 06-24 20:34:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:58 [batch.py:51] router release req id 8 +INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.10844612121582031 s +INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.1105201244354248 s +DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=192097547738549999958632050227279657868, time:1750768498.6999648s req_ids:[8] +DEBUG 06-24 20:34:58 [manager.py:391] +ERROR 06-24 20:34:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 first_token_cost:214.8280143737793ms total_cost_time:214.86949920654297ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13428 prompt_cache_len:5151 prompt_cache_ratio:0.38360142984807866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 +DEBUG 06-24 20:34:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:58 [batch.py:51] router release req id 8 +INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.10744524002075195 s +INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.1093900203704834 s +DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=267644012558346507721701051173758620729, time:1750768498.9189537s req_ids:[8] +DEBUG 06-24 20:34:58 [manager.py:391] +ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 first_token_cost:210.71314811706543ms total_cost_time:210.75844764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13429 prompt_cache_len:5151 prompt_cache_ratio:0.3835728646958076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 +DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:59 [batch.py:51] router release req id 8 +INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.10807442665100098 s +INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.10995364189147949 s +DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=116650705123071985991808926348675312659, time:1750768499.1342878s req_ids:[8] +DEBUG 06-24 20:34:59 [manager.py:391] +ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:210.6776237487793ms total_cost_time:210.7224464416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13430 prompt_cache_len:5151 prompt_cache_ratio:0.38354430379746834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 +DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:59 [batch.py:51] router release req id 8 +INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.10918760299682617 s +INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.11165642738342285 s +DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=35020485022871867085432313658953893696, time:1750768499.354187s req_ids:[8] +DEBUG 06-24 20:34:59 [manager.py:391] +ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:219.2516326904297ms total_cost_time:219.29526329040527ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13431 prompt_cache_len:5151 prompt_cache_ratio:0.3835157471521108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 +DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:59 [batch.py:51] router release req id 8 +INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.10944247245788574 s +INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.11156868934631348 s +DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=123556701073859292645224853717933851179, time:1750768499.5765383s req_ids:[8] +DEBUG 06-24 20:34:59 [manager.py:391] +ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:211.69233322143555ms total_cost_time:211.73882484436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13432 prompt_cache_len:5151 prompt_cache_ratio:0.383487194758785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 +DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:59 [batch.py:51] router release req id 8 +INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.2081913948059082 s +INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.20993351936340332 s +DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=300489596054220902127722797455538215431, time:1750768499.8941624s req_ids:[8] +DEBUG 06-24 20:34:59 [manager.py:391] +ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:270.8098888397217ms total_cost_time:270.85256576538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13433 prompt_cache_len:5151 prompt_cache_ratio:0.38345864661654133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 +DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:34:59 [batch.py:51] router release req id 8 +INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.1079111099243164 s +INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.10972237586975098 s +DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=176665783026419884959558153202365616700, time:1750768500.070966s req_ids:[8] +DEBUG 06-24 20:35:00 [manager.py:391] +ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:170.60446739196777ms total_cost_time:170.64666748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13434 prompt_cache_len:5151 prompt_cache_ratio:0.38343010272443057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 +DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:00 [batch.py:51] router release req id 8 +INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.10859298706054688 s +INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.11045551300048828 s +DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=78583443480668124340167812349999102338, time:1750768500.2467515s req_ids:[8] +DEBUG 06-24 20:35:00 [manager.py:391] +ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:202.5127410888672ms total_cost_time:202.622652053833ms,out_token_counter:1 mean_per_token_cost_time: 0.10991096496582031ms prompt_token_num:13435 prompt_cache_len:5151 prompt_cache_ratio:0.3834015630815035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 +DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:00 [batch.py:51] router release req id 8 +INFO 06-24 20:35:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.10786771774291992 s +INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s +DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=56593472697731161013391566063284249802, time:1750768500.4625084s req_ids:[8] +DEBUG 06-24 20:35:00 [manager.py:391] +ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:236.35292053222656ms total_cost_time:236.39631271362305ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13436 prompt_cache_len:5151 prompt_cache_ratio:0.38337302768681153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 +DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:00 [batch.py:51] router release req id 8 +INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.1081702709197998 s +INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.11055922508239746 s +DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=313940312766660450621545253566451170725, time:1750768500.698635s req_ids:[8] +DEBUG 06-24 20:35:00 [manager.py:391] +ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:208.64009857177734ms total_cost_time:208.68563652038574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13437 prompt_cache_len:5151 prompt_cache_ratio:0.3833444965394061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 +DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:00 [batch.py:51] router release req id 8 +INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.10886120796203613 s +INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.11102747917175293 s +DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=53207323025519798610899324938293730417, time:1750768500.913693s req_ids:[8] +DEBUG 06-24 20:35:00 [manager.py:391] +ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:208.40835571289062ms total_cost_time:208.45317840576172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13438 prompt_cache_len:5151 prompt_cache_ratio:0.383315969638339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 +DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:01 [batch.py:51] router release req id 8 +INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s +INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.11106610298156738 s +DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=145849162293287213710576854157590685492, time:1750768501.130194s req_ids:[8] +DEBUG 06-24 20:35:01 [manager.py:391] +ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:375.72169303894043ms total_cost_time:375.7658004760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13439 prompt_cache_len:5151 prompt_cache_ratio:0.3832874469826624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 +DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:01 [batch.py:51] router release req id 8 +INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.10943031311035156 s +INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.11144328117370605 s +DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=210602672533625759662520388832070906193, time:1750768501.5094242s req_ids:[8] +DEBUG 06-24 20:35:01 [manager.py:391] +DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:209.13958549499512ms total_cost_time:209.1834545135498ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13440 prompt_cache_len:5151 prompt_cache_ratio:0.38325892857142857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 +DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:01 [batch.py:51] router release req id 8 +INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.10877490043640137 s +INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.1106882095336914 s +DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=56280643907394361541879637541815678425, time:1750768501.7266338s req_ids:[8] +DEBUG 06-24 20:35:01 [manager.py:391] +ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:216.4902687072754ms total_cost_time:216.5534496307373ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:13441 prompt_cache_len:5151 prompt_cache_ratio:0.3832304144036902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 +DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:01 [batch.py:51] router release req id 8 +INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.1083378791809082 s +INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.1102457046508789 s +DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=173356985908822345956890136803104509534, time:1750768501.9494357s req_ids:[8] +DEBUG 06-24 20:35:01 [manager.py:391] +ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:210.95705032348633ms total_cost_time:211.00211143493652ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13442 prompt_cache_len:5151 prompt_cache_ratio:0.3832019044785002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 +DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:02 [batch.py:51] router release req id 8 +INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.1072847843170166 s +INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.10923171043395996 s +DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=30949156325615806408933136762611302858, time:1750768502.1749244s req_ids:[8] +DEBUG 06-24 20:35:02 [manager.py:391] +ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:226.34267807006836ms total_cost_time:226.38583183288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13443 prompt_cache_len:5151 prompt_cache_ratio:0.38317339879491186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 +DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:02 [batch.py:51] router release req id 8 +INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.10882067680358887 s +INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.11086654663085938 s +DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=118068969668173087556917335370359657572, time:1750768502.3968365s req_ids:[8] +DEBUG 06-24 20:35:02 [manager.py:391] +ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:213.1974697113037ms total_cost_time:213.2420539855957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13444 prompt_cache_len:5151 prompt_cache_ratio:0.3831448973519786 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 +DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:02 [batch.py:51] router release req id 8 +INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.10878324508666992 s +INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.11081194877624512 s +DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=35482596050660843407285326871113351792, time:1750768502.6179655s req_ids:[8] +DEBUG 06-24 20:35:02 [manager.py:391] +ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:370.6042766571045ms total_cost_time:370.6481456756592ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13445 prompt_cache_len:5151 prompt_cache_ratio:0.3831164001487542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 +DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:02 [batch.py:51] router release req id 8 +INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.1086275577545166 s +INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s +DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=256755598860834163583559772172432591414, time:1750768502.9932675s req_ids:[8] +DEBUG 06-24 20:35:02 [manager.py:391] +ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:217.68760681152344ms total_cost_time:217.73266792297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13446 prompt_cache_len:5151 prompt_cache_ratio:0.38308790718429275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 +DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:03 [batch.py:51] router release req id 8 +INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10958504676818848 s +INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.11213064193725586 s +DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=150124896642969235925143198282878571616, time:1750768503.227286s req_ids:[8] +DEBUG 06-24 20:35:03 [manager.py:391] +ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:227.29969024658203ms total_cost_time:227.34498977661133ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13447 prompt_cache_len:5151 prompt_cache_ratio:0.38305941845764857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 +DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:03 [batch.py:51] router release req id 8 +INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10739469528198242 s +INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.10929012298583984 s +DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=273459127074743190091901798057112518392, time:1750768503.4507692s req_ids:[8] +DEBUG 06-24 20:35:03 [manager.py:391] +ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:220.49379348754883ms total_cost_time:220.53980827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13448 prompt_cache_len:5151 prompt_cache_ratio:0.38303093396787624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 +DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:03 [batch.py:51] router release req id 8 +INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10867881774902344 s +INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.11063671112060547 s +DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=325624187278758528690055357092840855587, time:1750768503.6784394s req_ids:[8] +DEBUG 06-24 20:35:03 [manager.py:391] +ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:220.20983695983887ms total_cost_time:220.25465965270996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13449 prompt_cache_len:5151 prompt_cache_ratio:0.3830024537140308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 +DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:03 [batch.py:51] router release req id 8 +INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10824012756347656 s +INFO 06-24 20:35:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.11026787757873535 s +DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=158480789438363950938705978413256453363, time:1750768503.9012341s req_ids:[8] +DEBUG 06-24 20:35:03 [manager.py:391] +ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:209.13290977478027ms total_cost_time:209.17725563049316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13450 prompt_cache_len:5151 prompt_cache_ratio:0.3829739776951673 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 +DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:04 [batch.py:51] router release req id 8 +INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10797834396362305 s +INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.10965585708618164 s +DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=126667913177590825633530372334513923414, time:1750768504.1198914s req_ids:[8] +DEBUG 06-24 20:35:04 [manager.py:391] +ERROR 06-24 20:35:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:387.30645179748535ms total_cost_time:387.35079765319824ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13451 prompt_cache_len:5151 prompt_cache_ratio:0.38294550591034127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 +DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:04 [batch.py:51] router release req id 8 +INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10934972763061523 s +INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.1114346981048584 s +DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=298063075927167836364322972442794323201, time:1750768504.510801s req_ids:[8] +DEBUG 06-24 20:35:04 [manager.py:391] +ERROR 06-24 20:35:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:217.60916709899902ms total_cost_time:217.6523208618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13452 prompt_cache_len:5151 prompt_cache_ratio:0.3829170383586084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 +DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:04 [batch.py:51] router release req id 8 +INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10849642753601074 s +INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.11041498184204102 s +DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=267589180665451916903176809702269722576, time:1750768504.7463286s req_ids:[8] +DEBUG 06-24 20:35:04 [manager.py:391] +ERROR 06-24 20:35:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:223.3293056488037ms total_cost_time:223.3724594116211ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13453 prompt_cache_len:5151 prompt_cache_ratio:0.38288857503902474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 +DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:04 [batch.py:51] router release req id 8 +INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10998797416687012 s +INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.11193966865539551 s +DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=292660240533945290526278843991765375886, time:1750768504.966552s req_ids:[8] +DEBUG 06-24 20:35:04 [manager.py:391] +ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:218.17994117736816ms total_cost_time:218.22309494018555ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13454 prompt_cache_len:5151 prompt_cache_ratio:0.3828601159506467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 +DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:05 [batch.py:51] router release req id 8 +INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.10760927200317383 s +INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.10957646369934082 s +DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=144734362452285279752174046025416010819, time:1750768505.1862257s req_ids:[8] +DEBUG 06-24 20:35:05 [manager.py:391] +ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:206.84814453125ms total_cost_time:206.8924903869629ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13455 prompt_cache_len:5151 prompt_cache_ratio:0.38283166109253064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 +DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:05 [batch.py:51] router release req id 8 +INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.1088104248046875 s +INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.11083650588989258 s +DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=41958042831230348554039047818793133398, time:1750768505.399396s req_ids:[8] +DEBUG 06-24 20:35:05 [manager.py:391] +ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:208.35304260253906ms total_cost_time:208.39858055114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13456 prompt_cache_len:5151 prompt_cache_ratio:0.38280321046373367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 +DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:05 [batch.py:51] router release req id 8 +INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.1074380874633789 s +INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.10917353630065918 s +DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=102776368912420927022605240695449879433, time:1750768505.613654s req_ids:[8] +DEBUG 06-24 20:35:05 [manager.py:391] +ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:340.31128883361816ms total_cost_time:340.35491943359375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13457 prompt_cache_len:5151 prompt_cache_ratio:0.3827747640633128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 +DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:05 [batch.py:51] router release req id 8 +INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.10868573188781738 s +INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s +DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=231649992310249458424839902391921210693, time:1750768505.9593399s req_ids:[8] +DEBUG 06-24 20:35:05 [manager.py:391] +ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:204.36811447143555ms total_cost_time:204.41269874572754ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13458 prompt_cache_len:5151 prompt_cache_ratio:0.3827463218903255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 +DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:06 [batch.py:51] router release req id 8 +INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.10747742652893066 s +INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.10963749885559082 s +DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=188247043342106851397678993449346384502, time:1750768506.1713164s req_ids:[8] +DEBUG 06-24 20:35:06 [manager.py:391] +ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:208.96148681640625ms total_cost_time:209.00583267211914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13459 prompt_cache_len:5151 prompt_cache_ratio:0.3827178839438294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 +DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:06 [batch.py:51] router release req id 8 +INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.1085824966430664 s +INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.11040592193603516 s +DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=17922053966545217476063007868813471002, time:1750768506.3865669s req_ids:[8] +DEBUG 06-24 20:35:06 [manager.py:391] +ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:212.11743354797363ms total_cost_time:212.16130256652832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13460 prompt_cache_len:5151 prompt_cache_ratio:0.3826894502228826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 +DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:06 [batch.py:51] router release req id 8 +INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.10826301574707031 s +INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.11014699935913086 s +DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=252859183864775049065774706018928475597, time:1750768506.6014953s req_ids:[8] +DEBUG 06-24 20:35:06 [manager.py:391] +ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:218.54901313781738ms total_cost_time:218.59312057495117ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13461 prompt_cache_len:5151 prompt_cache_ratio:0.38266102072654334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 +DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:06 [batch.py:51] router release req id 8 +INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.1081855297088623 s +INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.11005663871765137 s +DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=73934876882738438819546037479856896726, time:1750768506.8258233s req_ids:[8] +DEBUG 06-24 20:35:06 [manager.py:391] +ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:209.02371406555176ms total_cost_time:209.06758308410645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13462 prompt_cache_len:5151 prompt_cache_ratio:0.38263259545387013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 +DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:06 [batch.py:51] router release req id 8 +INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.10769057273864746 s +INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.10958695411682129 s +DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=46275736988553236949085518678091005816, time:1750768507.041544s req_ids:[8] +DEBUG 06-24 20:35:07 [manager.py:391] +ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:381.8926811218262ms total_cost_time:381.93726539611816ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13463 prompt_cache_len:5151 prompt_cache_ratio:0.38260417440392186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 +DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:07 [batch.py:51] router release req id 8 +INFO 06-24 20:35:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.10851669311523438 s +INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.11025834083557129 s +DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=78817578120655925193507374845731970857, time:1750768507.4288828s req_ids:[8] +DEBUG 06-24 20:35:07 [manager.py:391] +ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:169.04640197753906ms total_cost_time:169.08979415893555ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13464 prompt_cache_len:5151 prompt_cache_ratio:0.38257575757575757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 +DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:07 [batch.py:51] router release req id 8 +INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.10770630836486816 s +INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.10959029197692871 s +DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=279799178577227839569383544316492396089, time:1750768507.6064525s req_ids:[8] +DEBUG 06-24 20:35:07 [manager.py:391] +DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:209.9752426147461ms total_cost_time:210.01935005187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13465 prompt_cache_len:5151 prompt_cache_ratio:0.3825473449684367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 +DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:07 [batch.py:51] router release req id 8 +INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s +INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.1104118824005127 s +DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=67387231057965455116714085357510225203, time:1750768507.8233638s req_ids:[8] +DEBUG 06-24 20:35:07 [manager.py:391] +ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:212.19372749328613ms total_cost_time:212.23974227905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13466 prompt_cache_len:5151 prompt_cache_ratio:0.3825189365810189 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 +DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:07 [batch.py:51] router release req id 8 +INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.1080925464630127 s +INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.11015868186950684 s +DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=324570286705647453222583553156004154046, time:1750768508.0398533s req_ids:[8] +DEBUG 06-24 20:35:08 [manager.py:391] +ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:213.52767944335938ms total_cost_time:213.57202529907227ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13467 prompt_cache_len:5151 prompt_cache_ratio:0.38249053241256403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 +DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:08 [batch.py:51] router release req id 8 +INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.10873007774353027 s +INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.11062431335449219 s +DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=129895078955538587783916350484032180685, time:1750768508.260541s req_ids:[8] +DEBUG 06-24 20:35:08 [manager.py:391] +ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:208.13465118408203ms total_cost_time:208.17804336547852ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13468 prompt_cache_len:5151 prompt_cache_ratio:0.38246213246213245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 +DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:08 [batch.py:51] router release req id 8 +INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s +INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.1102900505065918 s +DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=168961748511971567245901841521340171912, time:1750768508.4750247s req_ids:[8] +DEBUG 06-24 20:35:08 [manager.py:391] +DEBUG 06-24 20:35:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 56442.429 tokens/s +DEBUG 06-24 20:35:08 [stats.py:37] Avg prompt tokens throughput: 56434.037 tokens/s +DEBUG 06-24 20:35:08 [stats.py:37] Avg generate tokens throughput: 8.393 tokens/s +ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:386.94047927856445ms total_cost_time:386.98720932006836ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13469 prompt_cache_len:5151 prompt_cache_ratio:0.3824337367287846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 +DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:08 [batch.py:51] router release req id 8 +INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.10579824447631836 s +INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.10780930519104004 s +DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=301688297832701480998399461908854773798, time:1750768508.8660834s req_ids:[8] +DEBUG 06-24 20:35:08 [manager.py:391] +ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:207.97443389892578ms total_cost_time:207.99636840820312ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:13470 prompt_cache_len:5151 prompt_cache_ratio:0.3824053452115813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 +DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:08 [batch.py:51] router release req id 8 +INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10503554344177246 s +INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.10688042640686035 s +DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=29949710202076507970261844286236806053, time:1750768509.0824265s req_ids:[8] +DEBUG 06-24 20:35:09 [manager.py:391] +ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:173.60854148864746ms total_cost_time:173.65360260009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13471 prompt_cache_len:5151 prompt_cache_ratio:0.38237695790958354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 +DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:09 [batch.py:51] router release req id 8 +INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10789012908935547 s +INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.10998678207397461 s +DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=67301179099713795909297626148240816845, time:1750768509.259925s req_ids:[8] +DEBUG 06-24 20:35:09 [manager.py:391] +ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:201.9486427307129ms total_cost_time:202.00181007385254ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:13472 prompt_cache_len:5151 prompt_cache_ratio:0.38234857482185275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 +DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:09 [batch.py:51] router release req id 8 +INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.1087331771850586 s +INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.11073994636535645 s +DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=285293231438855593380992693706625705921, time:1750768509.4683764s req_ids:[8] +DEBUG 06-24 20:35:09 [manager.py:391] +ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:209.94830131530762ms total_cost_time:210.00194549560547ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:13473 prompt_cache_len:5151 prompt_cache_ratio:0.3823201959474505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 +DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:09 [batch.py:51] router release req id 8 +INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10764837265014648 s +INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.10963964462280273 s +DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=60865504447285031860664593549026664004, time:1750768509.684147s req_ids:[8] +DEBUG 06-24 20:35:09 [manager.py:391] +ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:213.75179290771484ms total_cost_time:213.79542350769043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13474 prompt_cache_len:5151 prompt_cache_ratio:0.38229182128543865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 +DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:09 [batch.py:51] router release req id 8 +INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10904669761657715 s +INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.11097168922424316 s +DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=65612441072643029611567753718523887471, time:1750768509.9057915s req_ids:[8] +DEBUG 06-24 20:35:09 [manager.py:391] +ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:383.1939697265625ms total_cost_time:383.23974609375ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13475 prompt_cache_len:5151 prompt_cache_ratio:0.3822634508348794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 +DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:10 [batch.py:51] router release req id 8 +INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.1090693473815918 s +INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.11105704307556152 s +DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=174964867633192113139435839378244985993, time:1750768510.2955701s req_ids:[8] +DEBUG 06-24 20:35:10 [manager.py:391] +ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:214.31446075439453ms total_cost_time:214.35999870300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13476 prompt_cache_len:5151 prompt_cache_ratio:0.38223508459483524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 +DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:10 [batch.py:51] router release req id 8 +INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.1067345142364502 s +INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.10868096351623535 s +DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=308643828604693775118117023256396094570, time:1750768510.515572s req_ids:[8] +DEBUG 06-24 20:35:10 [manager.py:391] +ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:211.85612678527832ms total_cost_time:211.88068389892578ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:13477 prompt_cache_len:5151 prompt_cache_ratio:0.38220672256436894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 +DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:10 [batch.py:51] router release req id 8 +INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.10525703430175781 s +INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.10717535018920898 s +DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=10101963248964058692171730701358249386, time:1750768510.7355995s req_ids:[8] +DEBUG 06-24 20:35:10 [manager.py:391] +ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:215.0118350982666ms total_cost_time:215.03448486328125ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:13478 prompt_cache_len:5151 prompt_cache_ratio:0.3821783647425434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 +DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:10 [batch.py:51] router release req id 8 +INFO 06-24 20:35:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.10396385192871094 s +INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.10570693016052246 s +DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=272397299100800314969838377656173381270, time:1750768510.954011s req_ids:[8] +DEBUG 06-24 20:35:10 [manager.py:391] +ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:210.04962921142578ms total_cost_time:210.07418632507324ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:13479 prompt_cache_len:5151 prompt_cache_ratio:0.382150011128422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 +DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:11 [batch.py:51] router release req id 8 +INFO 06-24 20:35:11 [manager.py:224] router recive req id 8 cost time 0.104888916015625 s +INFO 06-24 20:35:11 [manager.py:68] detokenization recv req id 8 cost time 0.10663533210754395 s +DEBUG 06-24 20:35:11 [manager.py:391] Prefill Batch: batch_id=185859036426625171754659723149511840483, time:1750768511.1679704s req_ids:[8] +DEBUG 06-24 20:35:11 [manager.py:391] +ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:211.2557888031006ms total_cost_time:211.28010749816895ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:13480 prompt_cache_len:5151 prompt_cache_ratio:0.38212166172106826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 +DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:11 [batch.py:51] router release req id 8 +INFO 06-24 20:35:11 [manager.py:224] router recive req id 8 cost time 0.10470366477966309 s +INFO 06-24 20:35:11 [manager.py:68] detokenization recv req id 8 cost time 0.10591650009155273 s +DEBUG 06-24 20:35:11 [manager.py:391] Prefill Batch: batch_id=192177260697797844930179787479032210963, time:1750768511.385097s req_ids:[8] +DEBUG 06-24 20:35:11 [manager.py:391] +ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:212.1894359588623ms total_cost_time:212.21446990966797ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13481 prompt_cache_len:5151 prompt_cache_ratio:0.38209331651954603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 +DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:11 [batch.py:51] router release req id 8 +INFO 06-24 20:35:11 [manager.py:224] router recive req id 8 cost time 0.3054969310760498 s +INFO 06-24 20:35:11 [manager.py:68] detokenization recv req id 8 cost time 0.3075411319732666 s +DEBUG 06-24 20:35:11 [manager.py:391] Prefill Batch: batch_id=28488821842327157015131694738451324705, time:1750768511.8081417s req_ids:[8] +DEBUG 06-24 20:35:11 [manager.py:391] +ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:428.14111709594727ms total_cost_time:428.16615104675293ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13482 prompt_cache_len:5151 prompt_cache_ratio:0.38206497552291946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 +DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:11 [batch.py:51] router release req id 8 +INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10455584526062012 s +INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.10701489448547363 s +DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=53140851527832865517067344748969020035, time:1750768512.034052s req_ids:[8] +DEBUG 06-24 20:35:12 [manager.py:391] +ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:215.39831161499023ms total_cost_time:215.4216766357422ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:13483 prompt_cache_len:5151 prompt_cache_ratio:0.3820366387302529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 +DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:12 [batch.py:51] router release req id 8 +INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10480928421020508 s +INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.10692930221557617 s +DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=36786443851075574777872871739557629258, time:1750768512.2549913s req_ids:[8] +DEBUG 06-24 20:35:12 [manager.py:391] +ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:212.87775039672852ms total_cost_time:212.90278434753418ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13484 prompt_cache_len:5151 prompt_cache_ratio:0.3820083061406111 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 +DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:12 [batch.py:51] router release req id 8 +INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10432171821594238 s +INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.1064155101776123 s +DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=109082966791858761244917806041188341923, time:1750768512.4747255s req_ids:[8] +DEBUG 06-24 20:35:12 [manager.py:391] +ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:211.20858192443848ms total_cost_time:211.23337745666504ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:13485 prompt_cache_len:5151 prompt_cache_ratio:0.381979977753059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 +DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:12 [batch.py:51] router release req id 8 +INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10672760009765625 s +INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.10860180854797363 s +DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=136582768890012693046713006038130492799, time:1750768512.6885269s req_ids:[8] +DEBUG 06-24 20:35:12 [manager.py:391] +ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:223.05059432983398ms total_cost_time:223.0966091156006ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13486 prompt_cache_len:5151 prompt_cache_ratio:0.3819516535666617 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 +DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:12 [batch.py:51] router release req id 8 +INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.1079871654510498 s +INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.11034607887268066 s +DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=134467692001876750305319613243137394431, time:1750768512.917149s req_ids:[8] +DEBUG 06-24 20:35:12 [manager.py:391] +ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:207.72099494934082ms total_cost_time:207.7641487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13487 prompt_cache_len:5151 prompt_cache_ratio:0.3819233335804849 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 +DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:13 [batch.py:51] router release req id 8 +INFO 06-24 20:35:13 [manager.py:224] router recive req id 8 cost time 0.3104104995727539 s +INFO 06-24 20:35:13 [manager.py:68] detokenization recv req id 8 cost time 0.3124861717224121 s +DEBUG 06-24 20:35:13 [manager.py:391] Prefill Batch: batch_id=101329860609039574373457443582120150074, time:1750768513.3433776s req_ids:[8] +DEBUG 06-24 20:35:13 [manager.py:391] +ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:432.4986934661865ms total_cost_time:432.5425624847412ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13488 prompt_cache_len:5151 prompt_cache_ratio:0.3818950177935943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 +DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:13 [batch.py:51] router release req id 8 +INFO 06-24 20:35:13 [manager.py:224] router recive req id 8 cost time 0.10758829116821289 s +INFO 06-24 20:35:13 [manager.py:68] detokenization recv req id 8 cost time 0.10963201522827148 s +DEBUG 06-24 20:35:13 [manager.py:391] Prefill Batch: batch_id=192099759748764297886426198030423768572, time:1750768513.5710893s req_ids:[8] +DEBUG 06-24 20:35:13 [manager.py:391] +ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:216.3100242614746ms total_cost_time:216.36223793029785ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:13489 prompt_cache_len:5151 prompt_cache_ratio:0.381866706205056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 +DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:13 [batch.py:51] router release req id 8 +INFO 06-24 20:35:13 [manager.py:224] router recive req id 8 cost time 0.1082456111907959 s +INFO 06-24 20:35:13 [manager.py:68] detokenization recv req id 8 cost time 0.11040210723876953 s +DEBUG 06-24 20:35:13 [manager.py:391] Prefill Batch: batch_id=10656322050022907688425201649855050095, time:1750768513.7937691s req_ids:[8] +DEBUG 06-24 20:35:13 [manager.py:391] +ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:213.29307556152344ms total_cost_time:213.33670616149902ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13490 prompt_cache_len:5151 prompt_cache_ratio:0.3818383988139363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 +DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:13 [batch.py:51] router release req id 8 +INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.10935664176940918 s +INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.11128091812133789 s +DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=305429235431176243755425194693126962958, time:1750768514.0141892s req_ids:[8] +DEBUG 06-24 20:35:14 [manager.py:391] +ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:213.8214111328125ms total_cost_time:213.8655185699463ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13491 prompt_cache_len:5151 prompt_cache_ratio:0.38181009561930174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 +DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:14 [batch.py:51] router release req id 8 +INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.10888195037841797 s +INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s +DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=215283748554130037566963571315535696329, time:1750768514.2338738s req_ids:[8] +DEBUG 06-24 20:35:14 [manager.py:391] +ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:212.4791145324707ms total_cost_time:212.5244140625ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13492 prompt_cache_len:5151 prompt_cache_ratio:0.3817817966202194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 +DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:14 [batch.py:51] router release req id 8 +INFO 06-24 20:35:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.10846471786499023 s +INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.11017727851867676 s +DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=311078691381642513147451874147232175094, time:1750768514.4634604s req_ids:[8] +DEBUG 06-24 20:35:14 [manager.py:391] +ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:226.15551948547363ms total_cost_time:226.20201110839844ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13493 prompt_cache_len:5151 prompt_cache_ratio:0.3817535018157563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 +DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:14 [batch.py:51] router release req id 8 +INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.20916199684143066 s +INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.2108478546142578 s +DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=5263106177638102027304930267248553063, time:1750768514.8197272s req_ids:[8] +DEBUG 06-24 20:35:14 [manager.py:391] +ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:336.9336128234863ms total_cost_time:336.9793891906738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13494 prompt_cache_len:5151 prompt_cache_ratio:0.38172521120498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 +DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:14 [batch.py:51] router release req id 8 +INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.10926103591918945 s +INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.11138486862182617 s +DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=94457410413412500682660574924056772373, time:1750768515.0310712s req_ids:[8] +DEBUG 06-24 20:35:15 [manager.py:391] +ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:210.67261695861816ms total_cost_time:210.71863174438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13495 prompt_cache_len:5151 prompt_cache_ratio:0.38169692478695816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 +DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:15 [batch.py:51] router release req id 8 +INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.1072995662689209 s +INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10949039459228516 s +DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=111262514160323482222028879809646545083, time:1750768515.2448292s req_ids:[8] +DEBUG 06-24 20:35:15 [manager.py:391] +DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:212.43643760681152ms total_cost_time:212.45980262756348ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:13496 prompt_cache_len:5151 prompt_cache_ratio:0.3816686425607587 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 +DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:15 [batch.py:51] router release req id 8 +INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.1041557788848877 s +INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10609292984008789 s +DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=55499376893561196063863924588642457495, time:1750768515.464836s req_ids:[8] +DEBUG 06-24 20:35:15 [manager.py:391] +ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:171.7069149017334ms total_cost_time:171.73051834106445ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:13497 prompt_cache_len:5151 prompt_cache_ratio:0.3816403645254501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 +DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:15 [batch.py:51] router release req id 8 +INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.1047370433807373 s +INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10630321502685547 s +DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=68452599615287034601777441131609894798, time:1750768515.6425755s req_ids:[8] +DEBUG 06-24 20:35:15 [manager.py:391] +ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:204.23555374145508ms total_cost_time:204.2684555053711ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:13498 prompt_cache_len:5151 prompt_cache_ratio:0.38161209068010077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 +DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:15 [batch.py:51] router release req id 8 +INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.10492634773254395 s +INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10689306259155273 s +DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=229293841028034699459007222321287600636, time:1750768515.851977s req_ids:[8] +DEBUG 06-24 20:35:15 [manager.py:391] +ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:212.33725547790527ms total_cost_time:212.36300468444824ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:13499 prompt_cache_len:5151 prompt_cache_ratio:0.3815838210237795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 +DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:16 [batch.py:51] router release req id 8 +INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.30637502670288086 s +INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.30829787254333496 s +DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=122438894719322987146298287444862272656, time:1750768516.2779472s req_ids:[8] +DEBUG 06-24 20:35:16 [manager.py:391] +ERROR 06-24 20:35:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:403.80334854125977ms total_cost_time:403.8267135620117ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:13500 prompt_cache_len:5151 prompt_cache_ratio:0.38155555555555554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 +DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:16 [batch.py:51] router release req id 8 +INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.10470104217529297 s +INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.1067807674407959 s +DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=31865694289182221214471122075911492342, time:1750768516.477213s req_ids:[8] +DEBUG 06-24 20:35:16 [manager.py:391] +ERROR 06-24 20:35:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 first_token_cost:210.07061004638672ms total_cost_time:210.09492874145508ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:13501 prompt_cache_len:5151 prompt_cache_ratio:0.38152729427449816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 +DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:16 [batch.py:51] router release req id 8 +INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.10453510284423828 s +INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.10644745826721191 s +DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=91364465325999722922810822441609455594, time:1750768516.6919203s req_ids:[8] +DEBUG 06-24 20:35:16 [manager.py:391] +ERROR 06-24 20:35:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 first_token_cost:215.38352966308594ms total_cost_time:215.4066562652588ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:13502 prompt_cache_len:5151 prompt_cache_ratio:0.3814990371796771 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 +DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:16 [batch.py:51] router release req id 8 +INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s +INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.10955977439880371 s +DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=251178217581235756545374165214175187558, time:1750768516.9240246s req_ids:[8] +DEBUG 06-24 20:35:16 [manager.py:391] +ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 first_token_cost:225.08955001831055ms total_cost_time:225.1126766204834ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:13503 prompt_cache_len:5151 prompt_cache_ratio:0.3814707842701622 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 +DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:17 [batch.py:51] router release req id 8 +INFO 06-24 20:35:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:35:17 [manager.py:224] router recive req id 8 cost time 0.10447573661804199 s +INFO 06-24 20:35:17 [manager.py:68] detokenization recv req id 8 cost time 0.10636377334594727 s +DEBUG 06-24 20:35:17 [manager.py:391] Prefill Batch: batch_id=241190968457501726389936801867976024410, time:1750768517.144868s req_ids:[8] +DEBUG 06-24 20:35:17 [manager.py:391] +ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:215.40117263793945ms total_cost_time:215.4247760772705ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:13504 prompt_cache_len:5151 prompt_cache_ratio:0.3814425355450237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 +DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:17 [batch.py:51] router release req id 8 +INFO 06-24 20:35:17 [manager.py:224] router recive req id 8 cost time 0.10751724243164062 s +INFO 06-24 20:35:17 [manager.py:68] detokenization recv req id 8 cost time 0.10927295684814453 s +DEBUG 06-24 20:35:17 [manager.py:391] Prefill Batch: batch_id=338085777019073191096955392463724036378, time:1750768517.363466s req_ids:[8] +DEBUG 06-24 20:35:17 [manager.py:391] +ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:213.43278884887695ms total_cost_time:213.47808837890625ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13505 prompt_cache_len:5151 prompt_cache_ratio:0.3814142910033321 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 +DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:17 [batch.py:51] router release req id 8 +INFO 06-24 20:35:17 [manager.py:224] router recive req id 8 cost time 0.310089111328125 s +INFO 06-24 20:35:17 [manager.py:68] detokenization recv req id 8 cost time 0.3121819496154785 s +DEBUG 06-24 20:35:17 [manager.py:391] Prefill Batch: batch_id=338667369625110186613379319189740373125, time:1750768517.7888982s req_ids:[8] +DEBUG 06-24 20:35:17 [manager.py:391] +ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:421.8635559082031ms total_cost_time:421.9067096710205ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13506 prompt_cache_len:5151 prompt_cache_ratio:0.38138605064415815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 +DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:17 [batch.py:51] router release req id 8 +INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10622048377990723 s +INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.10828185081481934 s +DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=234075078935595581354742673354677048314, time:1750768518.012457s req_ids:[8] +DEBUG 06-24 20:35:18 [manager.py:391] +ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:214.90168571472168ms total_cost_time:214.94626998901367ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13507 prompt_cache_len:5151 prompt_cache_ratio:0.3813578144665729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 +DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:18 [batch.py:51] router release req id 8 +INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10863614082336426 s +INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.11063957214355469 s +DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=7134721490995961086479906423878473167, time:1750768518.2336931s req_ids:[8] +DEBUG 06-24 20:35:18 [manager.py:391] +ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:224.01976585388184ms total_cost_time:224.06411170959473ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13508 prompt_cache_len:5151 prompt_cache_ratio:0.3813295824696476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 +DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:18 [batch.py:51] router release req id 8 +INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10918712615966797 s +INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.1113271713256836 s +DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=298738891442720836578092045218827285624, time:1750768518.4641619s req_ids:[8] +DEBUG 06-24 20:35:18 [manager.py:391] +ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:35:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 53552.384 tokens/s +DEBUG 06-24 20:35:18 [stats.py:37] Avg prompt tokens throughput: 53544.346 tokens/s +DEBUG 06-24 20:35:18 [stats.py:37] Avg generate tokens throughput: 8.038 tokens/s +INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:205.08933067321777ms total_cost_time:205.13319969177246ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13509 prompt_cache_len:5151 prompt_cache_ratio:0.3813013546524539 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 +DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:18 [batch.py:51] router release req id 8 +INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10909700393676758 s +INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.11118721961975098 s +DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=338045735656072912198657337667476354446, time:1750768518.6789262s req_ids:[8] +DEBUG 06-24 20:35:18 [manager.py:391] +ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:215.49630165100098ms total_cost_time:215.54088592529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13510 prompt_cache_len:5151 prompt_cache_ratio:0.3812731310140637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 +DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:18 [batch.py:51] router release req id 8 +INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s +INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.11027002334594727 s +DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=257559580897265676228074629595790247602, time:1750768518.9001205s req_ids:[8] +DEBUG 06-24 20:35:18 [manager.py:391] +ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:220.57652473449707ms total_cost_time:220.62158584594727ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13511 prompt_cache_len:5151 prompt_cache_ratio:0.38124491155354895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 +DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:19 [batch.py:51] router release req id 8 +INFO 06-24 20:35:19 [manager.py:224] router recive req id 8 cost time 0.310413122177124 s +INFO 06-24 20:35:19 [manager.py:68] detokenization recv req id 8 cost time 0.3124217987060547 s +DEBUG 06-24 20:35:19 [manager.py:391] Prefill Batch: batch_id=321246369030131685161442878848601716948, time:1750768519.3571875s req_ids:[8] +DEBUG 06-24 20:35:19 [manager.py:391] +ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:452.2745609283447ms total_cost_time:452.3186683654785ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13512 prompt_cache_len:5151 prompt_cache_ratio:0.3812166962699822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 +DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:19 [batch.py:51] router release req id 8 +INFO 06-24 20:35:19 [manager.py:224] router recive req id 8 cost time 0.10793399810791016 s +INFO 06-24 20:35:19 [manager.py:68] detokenization recv req id 8 cost time 0.10993027687072754 s +DEBUG 06-24 20:35:19 [manager.py:391] Prefill Batch: batch_id=294187099250190950767577007509435414236, time:1750768519.5856745s req_ids:[8] +DEBUG 06-24 20:35:19 [manager.py:391] +ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:211.19165420532227ms total_cost_time:211.23790740966797ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13513 prompt_cache_len:5151 prompt_cache_ratio:0.3811884851624362 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 +DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:19 [batch.py:51] router release req id 8 +INFO 06-24 20:35:19 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s +INFO 06-24 20:35:19 [manager.py:68] detokenization recv req id 8 cost time 0.11031436920166016 s +DEBUG 06-24 20:35:19 [manager.py:391] Prefill Batch: batch_id=212518209123676665769896616335160824133, time:1750768519.8030756s req_ids:[8] +DEBUG 06-24 20:35:19 [manager.py:391] +ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:214.5400047302246ms total_cost_time:214.5850658416748ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13514 prompt_cache_len:5151 prompt_cache_ratio:0.3811602782299837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 +DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:19 [batch.py:51] router release req id 8 +INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.10755324363708496 s +INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.1094977855682373 s +DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=260319925486479045100453733391916411980, time:1750768520.0265894s req_ids:[8] +DEBUG 06-24 20:35:20 [manager.py:391] +ERROR 06-24 20:35:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:213.31048011779785ms total_cost_time:213.33551406860352ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13515 prompt_cache_len:5151 prompt_cache_ratio:0.38113207547169814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 +DEBUG 06-24 20:35:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:20 [batch.py:51] router release req id 8 +INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.10675406455993652 s +INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.10875439643859863 s +DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=166234919584143675417082794850606005682, time:1750768520.2577314s req_ids:[8] +DEBUG 06-24 20:35:20 [manager.py:391] +ERROR 06-24 20:35:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 first_token_cost:230.20052909851074ms total_cost_time:230.24439811706543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13516 prompt_cache_len:5151 prompt_cache_ratio:0.38110387688665287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 +DEBUG 06-24 20:35:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:20 [batch.py:51] router release req id 8 +INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.10804557800292969 s +INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.11000347137451172 s +DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=76512940949376956645712486316017614531, time:1750768520.4810264s req_ids:[8] +DEBUG 06-24 20:35:20 [manager.py:391] +ERROR 06-24 20:35:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 first_token_cost:220.73650360107422ms total_cost_time:220.7803726196289ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13517 prompt_cache_len:5151 prompt_cache_ratio:0.3810756824739217 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 +DEBUG 06-24 20:35:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:20 [batch.py:51] router release req id 8 +INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.3115808963775635 s +INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.313647985458374 s +DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=214560136979281156566116959930718590886, time:1750768520.935313s req_ids:[8] +DEBUG 06-24 20:35:20 [manager.py:391] +ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 first_token_cost:448.0314254760742ms total_cost_time:448.0750560760498ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13518 prompt_cache_len:5151 prompt_cache_ratio:0.3810474922325788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 +DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:21 [batch.py:51] router release req id 8 +INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10780739784240723 s +INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.10979294776916504 s +DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=118957170072288500085438178940283941586, time:1750768521.16327s req_ids:[8] +DEBUG 06-24 20:35:21 [manager.py:391] +ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:216.25971794128418ms total_cost_time:216.30167961120605ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13519 prompt_cache_len:5151 prompt_cache_ratio:0.3810193061616983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 +DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:21 [batch.py:51] router release req id 8 +INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10826706886291504 s +INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.11023283004760742 s +DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=74706492479499175073181673788189832839, time:1750768521.3899243s req_ids:[8] +DEBUG 06-24 20:35:21 [manager.py:391] +ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:214.29777145385742ms total_cost_time:214.3421173095703ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13520 prompt_cache_len:5151 prompt_cache_ratio:0.38099112426035503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 +DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:21 [batch.py:51] router release req id 8 +INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10654997825622559 s +INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.10853838920593262 s +DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=61747741271542383717374210681947527137, time:1750768521.6200888s req_ids:[8] +DEBUG 06-24 20:35:21 [manager.py:391] +ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:230.21841049194336ms total_cost_time:230.26466369628906ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13521 prompt_cache_len:5151 prompt_cache_ratio:0.3809629465276237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 +DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:21 [batch.py:51] router release req id 8 +INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10911226272583008 s +INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.11067700386047363 s +DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=44003045671199032746318593329220581350, time:1750768521.8419807s req_ids:[8] +DEBUG 06-24 20:35:21 [manager.py:391] +ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:206.4809799194336ms total_cost_time:206.5267562866211ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13522 prompt_cache_len:5151 prompt_cache_ratio:0.3809347729625795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 +DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:21 [batch.py:51] router release req id 8 +INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.11270380020141602 s +DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=265855933397719171773198952219928772313, time:1750768522.0562634s req_ids:[8] +DEBUG 06-24 20:35:22 [manager.py:391] +INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11477446556091309 s +ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:200.97017288208008ms total_cost_time:201.07793807983398ms,out_token_counter:1 mean_per_token_cost_time: 0.10776519775390625ms prompt_token_num:13523 prompt_cache_len:5151 prompt_cache_ratio:0.38090660356429784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 +DEBUG 06-24 20:35:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:22 [batch.py:51] router release req id 8 +INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.10798072814941406 s +INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11004233360290527 s +DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=207068961858260333355372842801318544803, time:1750768522.2656786s req_ids:[8] +DEBUG 06-24 20:35:22 [manager.py:391] +ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:379.87208366394043ms total_cost_time:379.9169063568115ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13524 prompt_cache_len:5151 prompt_cache_ratio:0.3808784383318545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 +DEBUG 06-24 20:35:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:22 [batch.py:51] router release req id 8 +INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.10879230499267578 s +INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11063408851623535 s +DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=222863301228541998130265184312956947443, time:1750768522.6590815s req_ids:[8] +DEBUG 06-24 20:35:22 [manager.py:391] +ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:223.08802604675293ms total_cost_time:223.1314182281494ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13525 prompt_cache_len:5151 prompt_cache_ratio:0.3808502772643253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 +DEBUG 06-24 20:35:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:22 [batch.py:51] router release req id 8 +INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.10773324966430664 s +INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s +DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=62801144581053709785786445820751095853, time:1750768522.880199s req_ids:[8] +DEBUG 06-24 20:35:22 [manager.py:391] +ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:213.41896057128906ms total_cost_time:213.46402168273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13526 prompt_cache_len:5151 prompt_cache_ratio:0.3808221203607866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 +DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:23 [batch.py:51] router release req id 8 +INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10647249221801758 s +INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.10840892791748047 s +DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=282627781475866203830181504569334316234, time:1750768523.0997498s req_ids:[8] +DEBUG 06-24 20:35:23 [manager.py:391] +ERROR 06-24 20:35:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:213.3805751800537ms total_cost_time:213.4251594543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13527 prompt_cache_len:5151 prompt_cache_ratio:0.3807939676203149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 +INFO 06-24 20:35:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:35:23 [statics_utils.py:24] mean first cost: 230.9125709439558 ms +INFO 06-24 20:35:23 [statics_utils.py:24] mean per token cost: 0.05948571368764656 ms +DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:23 [batch.py:51] router release req id 8 +INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10892796516418457 s +INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.11141824722290039 s +DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=101451309035023524541161163381679769151, time:1750768523.3198268s req_ids:[8] +DEBUG 06-24 20:35:23 [manager.py:391] +ERROR 06-24 20:35:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 first_token_cost:211.17329597473145ms total_cost_time:211.21621131896973ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13528 prompt_cache_len:5151 prompt_cache_ratio:0.380765819041987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 +DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:23 [batch.py:51] router release req id 8 +INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10868334770202637 s +INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s +DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=25223083144882995848980420397434503363, time:1750768523.5451324s req_ids:[8] +DEBUG 06-24 20:35:23 [manager.py:391] +ERROR 06-24 20:35:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 first_token_cost:214.27321434020996ms total_cost_time:214.2939567565918ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:13529 prompt_cache_len:5151 prompt_cache_ratio:0.3807376746248799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 +DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:23 [batch.py:51] router release req id 8 +INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10691666603088379 s +INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.10895442962646484 s +DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=236830920466563412432249674157409265386, time:1750768523.7594347s req_ids:[8] +DEBUG 06-24 20:35:23 [manager.py:391] +ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 first_token_cost:373.1493949890137ms total_cost_time:373.19350242614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13530 prompt_cache_len:5151 prompt_cache_ratio:0.380709534368071 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 +DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:24 [batch.py:51] router release req id 8 +INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10770988464355469 s +INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.10996747016906738 s +DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=97326612597371904055037042127020402445, time:1750768524.139205s req_ids:[8] +DEBUG 06-24 20:35:24 [manager.py:391] +ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:206.79879188537598ms total_cost_time:206.84242248535156ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13531 prompt_cache_len:5151 prompt_cache_ratio:0.3806813982706378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 +DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:24 [batch.py:51] router release req id 8 +INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10660123825073242 s +INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.10860419273376465 s +DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=132762905801960123887224584138663015879, time:1750768524.3514616s req_ids:[8] +DEBUG 06-24 20:35:24 [manager.py:391] +ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:201.29680633544922ms total_cost_time:201.34305953979492ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13532 prompt_cache_len:5151 prompt_cache_ratio:0.3806532663316583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 +DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:24 [batch.py:51] router release req id 8 +INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10932731628417969 s +INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.11131548881530762 s +DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=143822487837873180608873872689709161988, time:1750768524.5586855s req_ids:[8] +DEBUG 06-24 20:35:24 [manager.py:391] +ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:210.78944206237793ms total_cost_time:210.8321189880371ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13533 prompt_cache_len:5151 prompt_cache_ratio:0.3806251385502106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 +DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:24 [batch.py:51] router release req id 8 +INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10851025581359863 s +INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.11051106452941895 s +DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=62590573886996443778712731714568861239, time:1750768524.7763762s req_ids:[8] +DEBUG 06-24 20:35:24 [manager.py:391] +ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:214.646577835083ms total_cost_time:214.6890163421631ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13534 prompt_cache_len:5151 prompt_cache_ratio:0.3805970149253731 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 +DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:24 [batch.py:51] router release req id 8 +INFO 06-24 20:35:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10943722724914551 s +INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.11133766174316406 s +DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=137146347829630407559372545946519609540, time:1750768524.99712s req_ids:[8] +DEBUG 06-24 20:35:24 [manager.py:391] +ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:209.3679904937744ms total_cost_time:209.41448211669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13535 prompt_cache_len:5151 prompt_cache_ratio:0.3805688954562246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 +DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:25 [batch.py:51] router release req id 8 +INFO 06-24 20:35:25 [manager.py:224] router recive req id 8 cost time 0.10835480690002441 s +INFO 06-24 20:35:25 [manager.py:68] detokenization recv req id 8 cost time 0.11026525497436523 s +DEBUG 06-24 20:35:25 [manager.py:391] Prefill Batch: batch_id=101646140415386300518398467143017679567, time:1750768525.213583s req_ids:[8] +DEBUG 06-24 20:35:25 [manager.py:391] +ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:386.6889476776123ms total_cost_time:386.7313861846924ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13536 prompt_cache_len:5151 prompt_cache_ratio:0.38054078014184395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 +DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:25 [batch.py:51] router release req id 8 +INFO 06-24 20:35:25 [manager.py:224] router recive req id 8 cost time 0.10848379135131836 s +INFO 06-24 20:35:25 [manager.py:68] detokenization recv req id 8 cost time 0.1106412410736084 s +DEBUG 06-24 20:35:25 [manager.py:391] Prefill Batch: batch_id=246155680397242381860894956418341823248, time:1750768525.6081538s req_ids:[8] +DEBUG 06-24 20:35:25 [manager.py:391] +ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:216.13430976867676ms total_cost_time:216.17889404296875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13537 prompt_cache_len:5151 prompt_cache_ratio:0.3805126689813105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 +DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:25 [batch.py:51] router release req id 8 +INFO 06-24 20:35:25 [manager.py:224] router recive req id 8 cost time 0.10881710052490234 s +INFO 06-24 20:35:25 [manager.py:68] detokenization recv req id 8 cost time 0.110748291015625 s +DEBUG 06-24 20:35:25 [manager.py:391] Prefill Batch: batch_id=24641326686816645603027094255189333353, time:1750768525.8324242s req_ids:[8] +DEBUG 06-24 20:35:25 [manager.py:391] +ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:211.9290828704834ms total_cost_time:211.9584083557129ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:13538 prompt_cache_len:5151 prompt_cache_ratio:0.38048456197370367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 +DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:25 [batch.py:51] router release req id 8 +INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10706329345703125 s +INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.10902142524719238 s +DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=311621180563016062063343573876797254757, time:1750768526.052534s req_ids:[8] +DEBUG 06-24 20:35:26 [manager.py:391] +DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:216.37821197509766ms total_cost_time:216.42208099365234ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13539 prompt_cache_len:5151 prompt_cache_ratio:0.3804564591181033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 +DEBUG 06-24 20:35:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:26 [batch.py:51] router release req id 8 +INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10910272598266602 s +INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.11110687255859375 s +DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=164511702816413139899114360973358632465, time:1750768526.271763s req_ids:[8] +DEBUG 06-24 20:35:26 [manager.py:391] +ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:214.31350708007812ms total_cost_time:214.3564224243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13540 prompt_cache_len:5151 prompt_cache_ratio:0.38042836041358935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 +DEBUG 06-24 20:35:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:26 [batch.py:51] router release req id 8 +INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10838937759399414 s +INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.11037111282348633 s +DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=144933136015706402702299558769619873126, time:1750768526.4923673s req_ids:[8] +DEBUG 06-24 20:35:26 [manager.py:391] +ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:213.72294425964355ms total_cost_time:213.76824378967285ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13541 prompt_cache_len:5151 prompt_cache_ratio:0.3804002658592423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 +DEBUG 06-24 20:35:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:26 [batch.py:51] router release req id 8 +INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10796999931335449 s +INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.10986566543579102 s +DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=204607957924619966199713946750856585955, time:1750768526.7118762s req_ids:[8] +DEBUG 06-24 20:35:26 [manager.py:391] +ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:387.1006965637207ms total_cost_time:387.1438503265381ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13542 prompt_cache_len:5151 prompt_cache_ratio:0.38037217545414265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 +DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:27 [batch.py:51] router release req id 8 +INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10872817039489746 s +INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.11121034622192383 s +DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=171605179324773507327757219479854522597, time:1750768527.1094468s req_ids:[8] +DEBUG 06-24 20:35:27 [manager.py:391] +ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:219.5589542388916ms total_cost_time:219.6042537689209ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13543 prompt_cache_len:5151 prompt_cache_ratio:0.38034408919737134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 +DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:27 [batch.py:51] router release req id 8 +INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10753583908081055 s +INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s +DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=106478689907539338942813326902768897035, time:1750768527.3417575s req_ids:[8] +DEBUG 06-24 20:35:27 [manager.py:391] +ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:224.3669033050537ms total_cost_time:224.3947982788086ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:13544 prompt_cache_len:5151 prompt_cache_ratio:0.38031600708800944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 +DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:27 [batch.py:51] router release req id 8 +INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10717201232910156 s +INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.10908222198486328 s +DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=50535755730518306237220409847822448762, time:1750768527.5633478s req_ids:[8] +DEBUG 06-24 20:35:27 [manager.py:391] +ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:214.0491008758545ms total_cost_time:214.0958309173584ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13545 prompt_cache_len:5151 prompt_cache_ratio:0.3802879291251384 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 +DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:27 [batch.py:51] router release req id 8 +INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10894584655761719 s +INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.1108856201171875 s +DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=45002839426668605962024154616178814190, time:1750768527.7840836s req_ids:[8] +DEBUG 06-24 20:35:27 [manager.py:391] +ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:214.6153450012207ms total_cost_time:214.6611213684082ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13546 prompt_cache_len:5151 prompt_cache_ratio:0.38025985530783996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 +DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:27 [batch.py:51] router release req id 8 +INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10820555686950684 s +INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.11014294624328613 s +DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=279135822200767994996470094155979586962, time:1750768528.0038457s req_ids:[8] +DEBUG 06-24 20:35:28 [manager.py:391] +ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:215.17276763916016ms total_cost_time:215.21377563476562ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:13547 prompt_cache_len:5151 prompt_cache_ratio:0.380231785635196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 +DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:28 [batch.py:51] router release req id 8 +INFO 06-24 20:35:28 [manager.py:224] router recive req id 8 cost time 0.10881352424621582 s +INFO 06-24 20:35:28 [manager.py:68] detokenization recv req id 8 cost time 0.11098384857177734 s +DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=303575767843888318578559565346184908747, time:1750768528.2237856s req_ids:[8] +DEBUG 06-24 20:35:28 [manager.py:391] +ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:386.5337371826172ms total_cost_time:386.5773677825928ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13548 prompt_cache_len:5151 prompt_cache_ratio:0.3802037201062888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 +DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:28 [batch.py:51] router release req id 8 +INFO 06-24 20:35:28 [manager.py:224] router recive req id 8 cost time 0.10892963409423828 s +INFO 06-24 20:35:28 [manager.py:68] detokenization recv req id 8 cost time 0.11098790168762207 s +DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=222646558114371765648188461988518780805, time:1750768528.6192265s req_ids:[8] +DEBUG 06-24 20:35:28 [manager.py:391] +DEBUG 06-24 20:35:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 53766.600 tokens/s +DEBUG 06-24 20:35:28 [stats.py:37] Avg prompt tokens throughput: 53758.752 tokens/s +DEBUG 06-24 20:35:28 [stats.py:37] Avg generate tokens throughput: 7.848 tokens/s +ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:220.68262100219727ms total_cost_time:220.72720527648926ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13549 prompt_cache_len:5151 prompt_cache_ratio:0.38017565872020076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 +DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:28 [batch.py:51] router release req id 8 +INFO 06-24 20:35:28 [manager.py:224] router recive req id 8 cost time 0.10753560066223145 s +INFO 06-24 20:35:28 [manager.py:68] detokenization recv req id 8 cost time 0.10962128639221191 s +DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=45224402552076409646116309389511232286, time:1750768528.8452046s req_ids:[8] +DEBUG 06-24 20:35:28 [manager.py:391] +ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:216.29571914672852ms total_cost_time:216.3403034210205ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13550 prompt_cache_len:5151 prompt_cache_ratio:0.3801476014760148 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 +DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:28 [batch.py:51] router release req id 8 +INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10804939270019531 s +INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.1097862720489502 s +DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=10632232300827147859452230043595652521, time:1750768529.069847s req_ids:[8] +DEBUG 06-24 20:35:29 [manager.py:391] +ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:175.82941055297852ms total_cost_time:175.85492134094238ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:13551 prompt_cache_len:5151 prompt_cache_ratio:0.3801195483728138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 +DEBUG 06-24 20:35:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:29 [batch.py:51] router release req id 8 +DEBUG 06-24 20:35:29 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:29 [manager.py:283] +DEBUG 06-24 20:35:29 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:29 [manager.py:284] +INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10695171356201172 s +INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.10898971557617188 s +DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=290215412406720085126261590046712118234, time:1750768529.250307s req_ids:[8] +DEBUG 06-24 20:35:29 [manager.py:391] +ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:211.43817901611328ms total_cost_time:211.48180961608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13552 prompt_cache_len:5151 prompt_cache_ratio:0.38009149940968123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 +DEBUG 06-24 20:35:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:29 [batch.py:51] router release req id 8 +INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s +INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.10995721817016602 s +DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=101634299414418226106890870212665705106, time:1750768529.4663765s req_ids:[8] +DEBUG 06-24 20:35:29 [manager.py:391] +ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:215.21854400634766ms total_cost_time:215.25931358337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:13553 prompt_cache_len:5151 prompt_cache_ratio:0.38006345458570057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 +DEBUG 06-24 20:35:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:29 [batch.py:51] router release req id 8 +INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10773062705993652 s +INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.10974955558776855 s +DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=161549705809815091670116986995449395976, time:1750768529.6867502s req_ids:[8] +DEBUG 06-24 20:35:29 [manager.py:391] +ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:406.9249629974365ms total_cost_time:406.9702625274658ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13554 prompt_cache_len:5151 prompt_cache_ratio:0.3800354138999557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 +DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:30 [batch.py:51] router release req id 8 +INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s +INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10999608039855957 s +DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=3363760501513089804308577449257133545, time:1750768530.1011932s req_ids:[8] +DEBUG 06-24 20:35:30 [manager.py:391] +ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:213.2577896118164ms total_cost_time:213.3030891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13555 prompt_cache_len:5151 prompt_cache_ratio:0.3800073773515308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 +DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:30 [batch.py:51] router release req id 8 +INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10790610313415527 s +INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10989618301391602 s +DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=288406126327584555997376147271561287245, time:1750768530.3250968s req_ids:[8] +DEBUG 06-24 20:35:30 [manager.py:391] +ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:221.4045524597168ms total_cost_time:221.44842147827148ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13556 prompt_cache_len:5151 prompt_cache_ratio:0.3799793449395102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 +DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:30 [batch.py:51] router release req id 8 +INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.1083221435546875 s +INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.11029195785522461 s +DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=232861678327069312893139014494772282294, time:1750768530.5481477s req_ids:[8] +DEBUG 06-24 20:35:30 [manager.py:391] +ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:216.88342094421387ms total_cost_time:216.92657470703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13557 prompt_cache_len:5151 prompt_cache_ratio:0.37995131666297854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 +DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:30 [batch.py:51] router release req id 8 +INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10637950897216797 s +INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10842561721801758 s +DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=148102374207448866030297248807675922900, time:1750768530.774753s req_ids:[8] +DEBUG 06-24 20:35:30 [manager.py:391] +ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:216.64094924926758ms total_cost_time:216.68410301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13558 prompt_cache_len:5151 prompt_cache_ratio:0.3799232925210208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 +DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:30 [batch.py:51] router release req id 8 +INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10775184631347656 s +INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10968184471130371 s +DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=2958194460294366080596916224931478982, time:1750768530.994861s req_ids:[8] +DEBUG 06-24 20:35:30 [manager.py:391] +ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:214.7822380065918ms total_cost_time:214.82539176940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13559 prompt_cache_len:5151 prompt_cache_ratio:0.3798952725127222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 +DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:31 [batch.py:51] router release req id 8 +INFO 06-24 20:35:31 [manager.py:224] router recive req id 8 cost time 0.10851478576660156 s +INFO 06-24 20:35:31 [manager.py:68] detokenization recv req id 8 cost time 0.11051130294799805 s +DEBUG 06-24 20:35:31 [manager.py:391] Prefill Batch: batch_id=14660690526824778629956306495919479849, time:1750768531.216488s req_ids:[8] +DEBUG 06-24 20:35:31 [manager.py:391] +ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:389.721155166626ms total_cost_time:389.7831439971924ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:13560 prompt_cache_len:5151 prompt_cache_ratio:0.37986725663716814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 +DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:31 [batch.py:51] router release req id 8 +INFO 06-24 20:35:31 [manager.py:224] router recive req id 8 cost time 0.10866641998291016 s +INFO 06-24 20:35:31 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s +DEBUG 06-24 20:35:31 [manager.py:391] Prefill Batch: batch_id=279983004152471551153886949839592746078, time:1750768531.613336s req_ids:[8] +DEBUG 06-24 20:35:31 [manager.py:391] +ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:210.86907386779785ms total_cost_time:210.91413497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13561 prompt_cache_len:5151 prompt_cache_ratio:0.37983924489344445 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 +DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:31 [batch.py:51] router release req id 8 +INFO 06-24 20:35:31 [manager.py:224] router recive req id 8 cost time 0.10903620719909668 s +INFO 06-24 20:35:31 [manager.py:68] detokenization recv req id 8 cost time 0.11013436317443848 s +DEBUG 06-24 20:35:31 [manager.py:391] Prefill Batch: batch_id=310210767645060727808977156509735760854, time:1750768531.8301296s req_ids:[8] +DEBUG 06-24 20:35:31 [manager.py:391] +ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:213.10186386108398ms total_cost_time:213.14549446105957ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13562 prompt_cache_len:5151 prompt_cache_ratio:0.3798112372806371 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 +DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:31 [batch.py:51] router release req id 8 +INFO 06-24 20:35:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.10948824882507324 s +INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.1114652156829834 s +DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=191242360890962216877215381395845433125, time:1750768532.0513444s req_ids:[8] +DEBUG 06-24 20:35:32 [manager.py:391] +ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:213.2394313812256ms total_cost_time:213.3009433746338ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:13563 prompt_cache_len:5151 prompt_cache_ratio:0.37978323379783235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 +DEBUG 06-24 20:35:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:32 [batch.py:51] router release req id 8 +INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.10674452781677246 s +INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.10880661010742188 s +DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=329806366452338006307468627474264628358, time:1750768532.2776184s req_ids:[8] +DEBUG 06-24 20:35:32 [manager.py:391] +DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:226.2887954711914ms total_cost_time:226.3333797454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13564 prompt_cache_len:5151 prompt_cache_ratio:0.3797552344441168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 +DEBUG 06-24 20:35:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:32 [batch.py:51] router release req id 8 +INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.1084439754486084 s +INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s +DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=281336328116864151463279990971011717083, time:1750768532.5075297s req_ids:[8] +DEBUG 06-24 20:35:32 [manager.py:391] +ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:222.84340858459473ms total_cost_time:222.88751602172852ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13565 prompt_cache_len:5151 prompt_cache_ratio:0.3797272392185772 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 +DEBUG 06-24 20:35:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:32 [batch.py:51] router release req id 8 +INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.10831212997436523 s +INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.11032629013061523 s +DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=85873833822730675423254547922733870832, time:1750768532.731346s req_ids:[8] +DEBUG 06-24 20:35:32 [manager.py:391] +ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:216.45522117614746ms total_cost_time:216.51601791381836ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:13566 prompt_cache_len:5151 prompt_cache_ratio:0.37969924812030076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 +DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:33 [batch.py:51] router release req id 8 +INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.31066012382507324 s +INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.3126206398010254 s +DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=5161970200727564149104243896581751760, time:1750768533.1619406s req_ids:[8] +DEBUG 06-24 20:35:33 [manager.py:391] +ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:427.783727645874ms total_cost_time:427.8290271759033ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13567 prompt_cache_len:5151 prompt_cache_ratio:0.37967126114837474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 +DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:33 [batch.py:51] router release req id 8 +INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.10874271392822266 s +INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.11065173149108887 s +DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=120607128789180591877474473341246723163, time:1750768533.3892071s req_ids:[8] +DEBUG 06-24 20:35:33 [manager.py:391] +ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:216.2156105041504ms total_cost_time:216.25947952270508ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13568 prompt_cache_len:5151 prompt_cache_ratio:0.37964327830188677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 +DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:33 [batch.py:51] router release req id 8 +INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.10796761512756348 s +INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.11000585556030273 s +DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=299143253673056076435071339025638758269, time:1750768533.6104162s req_ids:[8] +DEBUG 06-24 20:35:33 [manager.py:391] +ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:217.33951568603516ms total_cost_time:217.40078926086426ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:13569 prompt_cache_len:5151 prompt_cache_ratio:0.37961529957992485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 +DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:33 [batch.py:51] router release req id 8 +INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.10908389091491699 s +INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.11095380783081055 s +DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=259792603472551837651152441632966603004, time:1750768533.8330035s req_ids:[8] +DEBUG 06-24 20:35:33 [manager.py:391] +DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:212.92948722839355ms total_cost_time:212.97550201416016ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13570 prompt_cache_len:5151 prompt_cache_ratio:0.379587324981577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 +DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:33 [batch.py:51] router release req id 8 +INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10799932479858398 s +INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.11009716987609863 s +DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=78287704870024235580135876696246654243, time:1750768534.0542057s req_ids:[8] +DEBUG 06-24 20:35:34 [manager.py:391] +ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:215.74831008911133ms total_cost_time:215.81029891967773ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:13571 prompt_cache_len:5151 prompt_cache_ratio:0.37955935450593176 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 +DEBUG 06-24 20:35:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:34 [batch.py:51] router release req id 8 +INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10796928405761719 s +INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.1099543571472168 s +DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=20329249252715400333158940932378889567, time:1750768534.2768936s req_ids:[8] +DEBUG 06-24 20:35:34 [manager.py:391] +ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:391.1294937133789ms total_cost_time:391.19744300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.06794929504394531ms prompt_token_num:13572 prompt_cache_len:5151 prompt_cache_ratio:0.3795313881520778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 +DEBUG 06-24 20:35:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:34 [batch.py:51] router release req id 8 +INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10879659652709961 s +INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.11070537567138672 s +DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=24348689807481574669774629787261145293, time:1750768534.6733654s req_ids:[8] +DEBUG 06-24 20:35:34 [manager.py:391] +ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:210.22629737854004ms total_cost_time:210.27255058288574ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13573 prompt_cache_len:5151 prompt_cache_ratio:0.37950342591910413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 +DEBUG 06-24 20:35:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:34 [batch.py:51] router release req id 8 +INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10853147506713867 s +INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.11059141159057617 s +DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=181691761535626656249215004079537671223, time:1750768534.8915102s req_ids:[8] +DEBUG 06-24 20:35:34 [manager.py:391] +ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:211.35449409484863ms total_cost_time:211.39931678771973ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13574 prompt_cache_len:5151 prompt_cache_ratio:0.3794754678060999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 +DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:35 [batch.py:51] router release req id 8 +INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s +INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.1105506420135498 s +DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=236300158308190338302602781827858920842, time:1750768535.108223s req_ids:[8] +DEBUG 06-24 20:35:35 [manager.py:391] +ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:210.22844314575195ms total_cost_time:210.27135848999023ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13575 prompt_cache_len:5151 prompt_cache_ratio:0.3794475138121547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 +DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:35 [batch.py:51] router release req id 8 +INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.10840010643005371 s +INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.11025571823120117 s +DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=216655161829894292069005913172566541290, time:1750768535.3246982s req_ids:[8] +DEBUG 06-24 20:35:35 [manager.py:391] +ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:218.17612648010254ms total_cost_time:218.23692321777344ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:13576 prompt_cache_len:5151 prompt_cache_ratio:0.3794195639363583 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 +DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:35 [batch.py:51] router release req id 8 +INFO 06-24 20:35:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.10879683494567871 s +INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.11079716682434082 s +DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=302396177918448125234015697100354566302, time:1750768535.548697s req_ids:[8] +DEBUG 06-24 20:35:35 [manager.py:391] +ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:215.6054973602295ms total_cost_time:215.64817428588867ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13577 prompt_cache_len:5151 prompt_cache_ratio:0.3793916181778007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 +DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:35 [batch.py:51] router release req id 8 +INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.10898280143737793 s +INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.11007547378540039 s +DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=197906200729026109771295435179287237497, time:1750768535.770817s req_ids:[8] +DEBUG 06-24 20:35:35 [manager.py:391] +ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:214.51091766357422ms total_cost_time:214.55693244934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13578 prompt_cache_len:5151 prompt_cache_ratio:0.37936367653557224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 +DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:36 [batch.py:51] router release req id 8 +INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.30853819847106934 s +INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.3104114532470703 s +DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=14845535836316618575025318317005125427, time:1750768536.200914s req_ids:[8] +DEBUG 06-24 20:35:36 [manager.py:391] +ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:432.54923820495605ms total_cost_time:432.57904052734375ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:13579 prompt_cache_len:5151 prompt_cache_ratio:0.37933573900876355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 +DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:36 [batch.py:51] router release req id 8 +INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.10693049430847168 s +INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.10878181457519531 s +DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=311261208011281589952512084143693185075, time:1750768536.4302967s req_ids:[8] +DEBUG 06-24 20:35:36 [manager.py:391] +ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:217.37432479858398ms total_cost_time:217.41867065429688ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13580 prompt_cache_len:5151 prompt_cache_ratio:0.3793078055964654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 +DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:36 [batch.py:51] router release req id 8 +INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.10851430892944336 s +INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.11056256294250488 s +DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=3641296087445680746202259588715548498, time:1750768536.6532393s req_ids:[8] +DEBUG 06-24 20:35:36 [manager.py:391] +ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:214.39743041992188ms total_cost_time:214.44439888000488ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13581 prompt_cache_len:5151 prompt_cache_ratio:0.37927987629776894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 +DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:36 [batch.py:51] router release req id 8 +INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.10762953758239746 s +INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.10964560508728027 s +DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=278834966051042837863672751450034443664, time:1750768536.8762949s req_ids:[8] +DEBUG 06-24 20:35:36 [manager.py:391] +ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:214.1406536102295ms total_cost_time:214.1859531402588ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13582 prompt_cache_len:5151 prompt_cache_ratio:0.3792519511117656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 +DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:37 [batch.py:51] router release req id 8 +INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.10733580589294434 s +INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.1092066764831543 s +DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=119256392807729663652892353861577670449, time:1750768537.0975304s req_ids:[8] +DEBUG 06-24 20:35:37 [manager.py:391] +ERROR 06-24 20:35:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:218.11389923095703ms total_cost_time:218.15729141235352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13583 prompt_cache_len:5151 prompt_cache_ratio:0.37922403003754696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 +DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:37 [batch.py:51] router release req id 8 +INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.10800814628601074 s +INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.10920095443725586 s +DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=251511050988714103956253247271376093030, time:1750768537.3187537s req_ids:[8] +DEBUG 06-24 20:35:37 [manager.py:391] +ERROR 06-24 20:35:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 first_token_cost:214.94030952453613ms total_cost_time:214.98727798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13584 prompt_cache_len:5151 prompt_cache_ratio:0.3791961130742049 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 +DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:37 [batch.py:51] router release req id 8 +INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.31099843978881836 s +INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.3129112720489502 s +DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=304757996826823222441906173507418880528, time:1750768537.7506688s req_ids:[8] +DEBUG 06-24 20:35:37 [manager.py:391] +ERROR 06-24 20:35:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 first_token_cost:431.8265914916992ms total_cost_time:431.8854808807373ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:13585 prompt_cache_len:5151 prompt_cache_ratio:0.3791682002208318 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 +DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:37 [batch.py:51] router release req id 8 +INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.10791611671447754 s +INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.10918951034545898 s +DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=13146610025857157507931041877957509928, time:1750768537.980818s req_ids:[8] +DEBUG 06-24 20:35:37 [manager.py:391] +ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 first_token_cost:208.8601589202881ms total_cost_time:208.90498161315918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13586 prompt_cache_len:5151 prompt_cache_ratio:0.37914029147651995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 +DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:38 [batch.py:51] router release req id 8 +INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.1077570915222168 s +INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.10912847518920898 s +DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=252127636206557652567308260310565404815, time:1750768538.1977615s req_ids:[8] +DEBUG 06-24 20:35:38 [manager.py:391] +ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:209.63191986083984ms total_cost_time:209.67698097229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13587 prompt_cache_len:5151 prompt_cache_ratio:0.37911238684036214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 +DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:38 [batch.py:51] router release req id 8 +INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.10969209671020508 s +INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.11172056198120117 s +DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=52875310274312571314831454095452601586, time:1750768538.4156318s req_ids:[8] +DEBUG 06-24 20:35:38 [manager.py:391] +ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:217.8635597229004ms total_cost_time:217.88311004638672ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13588 prompt_cache_len:5151 prompt_cache_ratio:0.3790844863114513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 +DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:38 [batch.py:51] router release req id 8 +INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.10608911514282227 s +INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.10798931121826172 s +DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=234359492558451081227452154693864692596, time:1750768538.6362839s req_ids:[8] +DEBUG 06-24 20:35:38 [manager.py:391] +DEBUG 06-24 20:35:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 54193.539 tokens/s +DEBUG 06-24 20:35:38 [stats.py:37] Avg prompt tokens throughput: 54185.553 tokens/s +DEBUG 06-24 20:35:38 [stats.py:37] Avg generate tokens throughput: 7.986 tokens/s +ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:217.10491180419922ms total_cost_time:217.1492576599121ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13589 prompt_cache_len:5151 prompt_cache_ratio:0.3790565898888807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 +DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:38 [batch.py:51] router release req id 8 +INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.10803890228271484 s +INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.10985374450683594 s +DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=89646764811124633090934752491578122788, time:1750768538.8570962s req_ids:[8] +DEBUG 06-24 20:35:38 [manager.py:391] +ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:389.6939754486084ms total_cost_time:389.7385597229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13590 prompt_cache_len:5151 prompt_cache_ratio:0.37902869757174396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 +DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:39 [batch.py:51] router release req id 8 +INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.1086728572845459 s +INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.11056756973266602 s +DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=82866626344295554103967183244468423070, time:1750768539.253616s req_ids:[8] +DEBUG 06-24 20:35:39 [manager.py:391] +ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:212.205171585083ms total_cost_time:212.2499942779541ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13591 prompt_cache_len:5151 prompt_cache_ratio:0.3790008093591347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 +DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:39 [batch.py:51] router release req id 8 +INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.10852742195129395 s +INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.11053180694580078 s +DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=312473198729401489207836045327266829095, time:1750768539.4690185s req_ids:[8] +DEBUG 06-24 20:35:39 [manager.py:391] +ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:211.83228492736816ms total_cost_time:211.87710762023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13592 prompt_cache_len:5151 prompt_cache_ratio:0.37897292525014714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 +DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:39 [batch.py:51] router release req id 8 +INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.10867595672607422 s +INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s +DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=184515634672172484501593220330195933141, time:1750768539.6883461s req_ids:[8] +DEBUG 06-24 20:35:39 [manager.py:391] +ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:213.5021686553955ms total_cost_time:213.545560836792ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13593 prompt_cache_len:5151 prompt_cache_ratio:0.3789450452438755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 +DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:39 [batch.py:51] router release req id 8 +INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.10934281349182129 s +INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.1114661693572998 s +DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=292717016435151522369893764715364919503, time:1750768539.9077733s req_ids:[8] +DEBUG 06-24 20:35:39 [manager.py:391] +ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:211.83085441589355ms total_cost_time:211.87424659729004ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13594 prompt_cache_len:5151 prompt_cache_ratio:0.37891716933941444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 +DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:40 [batch.py:51] router release req id 8 +INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10908269882202148 s +INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.11111974716186523 s +DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=168519778785309824664876323874769970067, time:1750768540.1279783s req_ids:[8] +DEBUG 06-24 20:35:40 [manager.py:391] +ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:210.22486686706543ms total_cost_time:210.24727821350098ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:13595 prompt_cache_len:5151 prompt_cache_ratio:0.37888929753585876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 +DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:40 [batch.py:51] router release req id 8 +INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10319399833679199 s +INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.1050257682800293 s +DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=156977408477062322928380143385979748437, time:1750768540.3443096s req_ids:[8] +DEBUG 06-24 20:35:40 [manager.py:391] +ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:172.66464233398438ms total_cost_time:172.6844310760498ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:13596 prompt_cache_len:5151 prompt_cache_ratio:0.37886142983230364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 +DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:40 [batch.py:51] router release req id 8 +INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10681891441345215 s +INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.10802531242370605 s +DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=55371751508327389827965395931599578888, time:1750768540.5214486s req_ids:[8] +DEBUG 06-24 20:35:40 [manager.py:391] +ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:373.7204074859619ms total_cost_time:373.765230178833ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13597 prompt_cache_len:5151 prompt_cache_ratio:0.37883356622784436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 +DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:40 [batch.py:51] router release req id 8 +INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10801959037780762 s +INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.11014461517333984 s +DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=1967681378395399707303357263697826111, time:1750768540.8969193s req_ids:[8] +DEBUG 06-24 20:35:40 [manager.py:391] +ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:210.36148071289062ms total_cost_time:210.404634475708ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13598 prompt_cache_len:5151 prompt_cache_ratio:0.3788057067215767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 +DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:41 [batch.py:51] router release req id 8 +INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10888171195983887 s +INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.11088204383850098 s +DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=149477419881146959458603593027434585439, time:1750768541.1171978s req_ids:[8] +DEBUG 06-24 20:35:41 [manager.py:391] +ERROR 06-24 20:35:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:217.09609031677246ms total_cost_time:217.14043617248535ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13599 prompt_cache_len:5151 prompt_cache_ratio:0.3787778513125965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 +DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:41 [batch.py:51] router release req id 8 +INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10810399055480957 s +INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.10946416854858398 s +DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=290396287117234931895476039953472333614, time:1750768541.3381763s req_ids:[8] +DEBUG 06-24 20:35:41 [manager.py:391] +ERROR 06-24 20:35:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 first_token_cost:213.71865272521973ms total_cost_time:213.76299858093262ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13600 prompt_cache_len:5151 prompt_cache_ratio:0.37875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 +DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:41 [batch.py:51] router release req id 8 +INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10932135581970215 s +INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.11054611206054688 s +DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=279481349294463796504529855668934353218, time:1750768541.5575454s req_ids:[8] +DEBUG 06-24 20:35:41 [manager.py:391] +ERROR 06-24 20:35:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 first_token_cost:213.01555633544922ms total_cost_time:213.0589485168457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13601 prompt_cache_len:5151 prompt_cache_ratio:0.3787221527828836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 +DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:41 [batch.py:51] router release req id 8 +INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10843920707702637 s +INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.11048507690429688 s +DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=192905104411030839179327211949817695604, time:1750768541.7775164s req_ids:[8] +DEBUG 06-24 20:35:41 [manager.py:391] +ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 first_token_cost:391.2222385406494ms total_cost_time:391.2684917449951ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13602 prompt_cache_len:5151 prompt_cache_ratio:0.37869430966034406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 +DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:42 [batch.py:51] router release req id 8 +INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.10888171195983887 s +INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.11017560958862305 s +DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=140789127812398456879938818544773926082, time:1750768542.1762826s req_ids:[8] +DEBUG 06-24 20:35:42 [manager.py:391] +ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:214.06149864196777ms total_cost_time:214.10655975341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13603 prompt_cache_len:5151 prompt_cache_ratio:0.37866647063147835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 +DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:42 [batch.py:51] router release req id 8 +INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.10792922973632812 s +INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.10974717140197754 s +DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=322251059869106923080699845792245571053, time:1750768542.3957598s req_ids:[8] +DEBUG 06-24 20:35:42 [manager.py:391] +ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:208.77480506896973ms total_cost_time:208.82034301757812ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13604 prompt_cache_len:5151 prompt_cache_ratio:0.3786386356953837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 +DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:42 [batch.py:51] router release req id 8 +INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.11018562316894531 s +INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.11222362518310547 s +DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=8151488355836007724931859557944683671, time:1750768542.6109364s req_ids:[8] +DEBUG 06-24 20:35:42 [manager.py:391] +ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:212.1882438659668ms total_cost_time:212.20898628234863ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:13605 prompt_cache_len:5151 prompt_cache_ratio:0.37861080485115767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 +DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:42 [batch.py:51] router release req id 8 +INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.10851478576660156 s +INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.11065006256103516 s +DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=297603167432330342416373557133570734676, time:1750768542.8426023s req_ids:[8] +DEBUG 06-24 20:35:42 [manager.py:391] +ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:229.41207885742188ms total_cost_time:229.45928573608398ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13606 prompt_cache_len:5151 prompt_cache_ratio:0.378582978097898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 +DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:42 [batch.py:51] router release req id 8 +INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10805392265319824 s +INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.11006855964660645 s +DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=63523953996913537598672544553216320655, time:1750768543.06532s req_ids:[8] +DEBUG 06-24 20:35:43 [manager.py:391] +DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:213.69004249572754ms total_cost_time:213.7320041656494ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13607 prompt_cache_len:5151 prompt_cache_ratio:0.37855515543470275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 +DEBUG 06-24 20:35:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:43 [batch.py:51] router release req id 8 +INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10852479934692383 s +INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.11109399795532227 s +DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=108041713607132045387363621582680762016, time:1750768543.2978554s req_ids:[8] +DEBUG 06-24 20:35:43 [manager.py:391] +ERROR 06-24 20:35:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 first_token_cost:395.83802223205566ms total_cost_time:395.88332176208496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13608 prompt_cache_len:5151 prompt_cache_ratio:0.3785273368606702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 +DEBUG 06-24 20:35:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:43 [batch.py:51] router release req id 8 +INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10890698432922363 s +INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.11099600791931152 s +DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=249601185250909779010198803393374823622, time:1750768543.6880448s req_ids:[8] +DEBUG 06-24 20:35:43 [manager.py:391] +ERROR 06-24 20:35:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 first_token_cost:213.0258083343506ms total_cost_time:213.07086944580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13609 prompt_cache_len:5151 prompt_cache_ratio:0.378499522374899 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 +DEBUG 06-24 20:35:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:43 [batch.py:51] router release req id 8 +INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10825610160827637 s +INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.1102898120880127 s +DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=226611217595596865429454475423742658034, time:1750768543.9064684s req_ids:[8] +DEBUG 06-24 20:35:43 [manager.py:391] +ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 first_token_cost:213.41347694396973ms total_cost_time:213.45973014831543ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13610 prompt_cache_len:5151 prompt_cache_ratio:0.3784717119764879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 +DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:44 [batch.py:51] router release req id 8 +INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.1071770191192627 s +INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.10866880416870117 s +DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=130958493808599589907986504006660274852, time:1750768544.126208s req_ids:[8] +DEBUG 06-24 20:35:44 [manager.py:391] +ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:212.65554428100586ms total_cost_time:212.70084381103516ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13611 prompt_cache_len:5151 prompt_cache_ratio:0.37844390566453606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 +DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:44 [batch.py:51] router release req id 8 +INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.10922050476074219 s +INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.11112236976623535 s +DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=104436109456154896394733093667044818527, time:1750768544.3465219s req_ids:[8] +DEBUG 06-24 20:35:44 [manager.py:391] +ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:215.58308601379395ms total_cost_time:215.62480926513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13612 prompt_cache_len:5151 prompt_cache_ratio:0.3784161034381428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 +DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:44 [batch.py:51] router release req id 8 +INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.11167693138122559 s +INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.1131439208984375 s +DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=44035926024371330174355240425430575612, time:1750768544.5706375s req_ids:[8] +DEBUG 06-24 20:35:44 [manager.py:391] +ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:216.5358066558838ms total_cost_time:216.57991409301758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13613 prompt_cache_len:5151 prompt_cache_ratio:0.3783883052964078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 +DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:44 [batch.py:51] router release req id 8 +INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.10886001586914062 s +INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.11029314994812012 s +DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=119430016434262367425001229304978266536, time:1750768544.7916996s req_ids:[8] +DEBUG 06-24 20:35:44 [manager.py:391] +ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:390.89274406433105ms total_cost_time:390.93661308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13614 prompt_cache_len:5151 prompt_cache_ratio:0.378360511238431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 +DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:45 [batch.py:51] router release req id 8 +INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.1081838607788086 s +INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.10967731475830078 s +DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=273444994832047174098564456961035605906, time:1750768545.1903698s req_ids:[8] +DEBUG 06-24 20:35:45 [manager.py:391] +ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:213.3176326751709ms total_cost_time:213.3617401123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13615 prompt_cache_len:5151 prompt_cache_ratio:0.3783327212633125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 +DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:45 [batch.py:51] router release req id 8 +INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.10836386680603027 s +INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.10988306999206543 s +DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=122998039719627719998469232993655549025, time:1750768545.4208715s req_ids:[8] +DEBUG 06-24 20:35:45 [manager.py:391] +ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:222.11742401123047ms total_cost_time:222.14531898498535ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:13616 prompt_cache_len:5151 prompt_cache_ratio:0.37830493537015275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 +DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:45 [batch.py:51] router release req id 8 +INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.10795927047729492 s +INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.10997319221496582 s +DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=35282630186919789138957764892559378076, time:1750768545.642445s req_ids:[8] +DEBUG 06-24 20:35:45 [manager.py:391] +ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:212.35060691833496ms total_cost_time:212.39352226257324ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13617 prompt_cache_len:5151 prompt_cache_ratio:0.3782771535580524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 +DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:45 [batch.py:51] router release req id 8 +INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.1107480525970459 s +INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.1126852035522461 s +DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=47513810349256356895494341189302484139, time:1750768545.8599033s req_ids:[8] +DEBUG 06-24 20:35:45 [manager.py:391] +ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:213.01603317260742ms total_cost_time:213.057279586792ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13618 prompt_cache_len:5151 prompt_cache_ratio:0.3782493758261125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 +DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:45 [batch.py:51] router release req id 8 +INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.10805535316467285 s +INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.11014151573181152 s +DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=94356904830484959331389902357134745455, time:1750768546.0770204s req_ids:[8] +DEBUG 06-24 20:35:46 [manager.py:391] +ERROR 06-24 20:35:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:216.7515754699707ms total_cost_time:216.79377555847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13619 prompt_cache_len:5151 prompt_cache_ratio:0.37822160217343415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 +DEBUG 06-24 20:35:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:46 [batch.py:51] router release req id 8 +INFO 06-24 20:35:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.10963249206542969 s +INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.11109375953674316 s +DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=320957219285217034357074047481551965828, time:1750768546.2989986s req_ids:[8] +DEBUG 06-24 20:35:46 [manager.py:391] +ERROR 06-24 20:35:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 first_token_cost:209.7768783569336ms total_cost_time:209.82098579406738ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13620 prompt_cache_len:5151 prompt_cache_ratio:0.37819383259911893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 +DEBUG 06-24 20:35:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:46 [batch.py:51] router release req id 8 +INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.3108041286468506 s +INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.31293606758117676 s +DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=292576040227077569662274650760834648801, time:1750768546.7263598s req_ids:[8] +DEBUG 06-24 20:35:46 [manager.py:391] +ERROR 06-24 20:35:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 first_token_cost:427.9489517211914ms total_cost_time:427.9942512512207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13621 prompt_cache_len:5151 prompt_cache_ratio:0.3781660671022686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 +DEBUG 06-24 20:35:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:46 [batch.py:51] router release req id 8 +INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s +INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.11051058769226074 s +DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=309156379884607679918347025486197844202, time:1750768546.9509s req_ids:[8] +DEBUG 06-24 20:35:46 [manager.py:391] +ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 first_token_cost:215.68536758422852ms total_cost_time:215.7270908355713ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13622 prompt_cache_len:5151 prompt_cache_ratio:0.37813830568198503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 +DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:47 [batch.py:51] router release req id 8 +INFO 06-24 20:35:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.10817933082580566 s +INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.11001324653625488 s +DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=24302363388559923243400375049437580690, time:1750768547.1722627s req_ids:[8] +DEBUG 06-24 20:35:47 [manager.py:391] +ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:218.34325790405273ms total_cost_time:218.38688850402832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13623 prompt_cache_len:5151 prompt_cache_ratio:0.3781105483373706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 +DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:47 [batch.py:51] router release req id 8 +INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.1076209545135498 s +INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.10862064361572266 s +DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=320016684876712546197543222767112920097, time:1750768547.395169s req_ids:[8] +DEBUG 06-24 20:35:47 [manager.py:391] +ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:169.72804069519043ms total_cost_time:169.7702407836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13624 prompt_cache_len:5151 prompt_cache_ratio:0.3780827950675279 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 +DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:47 [batch.py:51] router release req id 8 +INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.10774993896484375 s +INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.10892629623413086 s +DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=277279140517817463526152217235581903280, time:1750768547.5735333s req_ids:[8] +DEBUG 06-24 20:35:47 [manager.py:391] +ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:214.3256664276123ms total_cost_time:214.3688201904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13625 prompt_cache_len:5151 prompt_cache_ratio:0.37805504587155964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 +DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:47 [batch.py:51] router release req id 8 +INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.10911369323730469 s +INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.11252140998840332 s +DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=35096955949092871950169631164319642318, time:1750768547.7932768s req_ids:[8] +DEBUG 06-24 20:35:47 [manager.py:391] +ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:387.31884956359863ms total_cost_time:387.3636722564697ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13626 prompt_cache_len:5151 prompt_cache_ratio:0.37802730074856894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 +DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:48 [batch.py:51] router release req id 8 +INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.10870671272277832 s +INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.11010193824768066 s +DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=28758698059860884441342898673187097252, time:1750768548.1884575s req_ids:[8] +DEBUG 06-24 20:35:48 [manager.py:391] +ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:218.01137924194336ms total_cost_time:218.05262565612793ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13627 prompt_cache_len:5151 prompt_cache_ratio:0.3779995596976591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 +DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:48 [batch.py:51] router release req id 8 +INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.1060495376586914 s +INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.10806083679199219 s +DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=281134776810478477271381555937032106350, time:1750768548.4107366s req_ids:[8] +DEBUG 06-24 20:35:48 [manager.py:391] +ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:210.6626033782959ms total_cost_time:210.707426071167ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13628 prompt_cache_len:5151 prompt_cache_ratio:0.37797182271793367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 +DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:48 [batch.py:51] router release req id 8 +INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.10761213302612305 s +INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.10959148406982422 s +DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=142285376794513837059663077645312865958, time:1750768548.6283486s req_ids:[8] +DEBUG 06-24 20:35:48 [manager.py:391] +ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:35:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 53975.273 tokens/s +DEBUG 06-24 20:35:48 [stats.py:37] Avg prompt tokens throughput: 53967.243 tokens/s +DEBUG 06-24 20:35:48 [stats.py:37] Avg generate tokens throughput: 8.030 tokens/s +INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:211.58289909362793ms total_cost_time:211.62700653076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13629 prompt_cache_len:5151 prompt_cache_ratio:0.3779440898084966 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 +DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:48 [batch.py:51] router release req id 8 +INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.10781574249267578 s +INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.10983729362487793 s +DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=300994028136680121246595563542210505910, time:1750768548.8493989s req_ids:[8] +DEBUG 06-24 20:35:48 [manager.py:391] +ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:209.86080169677734ms total_cost_time:209.90395545959473ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13630 prompt_cache_len:5151 prompt_cache_ratio:0.37791636096845194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 +DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:48 [batch.py:51] router release req id 8 +INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10859370231628418 s +INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.11048269271850586 s +DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=99667879679816735471074901445044953506, time:1750768549.063888s req_ids:[8] +DEBUG 06-24 20:35:49 [manager.py:391] +ERROR 06-24 20:35:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:214.9038314819336ms total_cost_time:214.94579315185547ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13631 prompt_cache_len:5151 prompt_cache_ratio:0.3778886361969041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 +DEBUG 06-24 20:35:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:49 [batch.py:51] router release req id 8 +INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10830473899841309 s +INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.11048626899719238 s +DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=257917619119232590601852247808579288165, time:1750768549.2842946s req_ids:[8] +DEBUG 06-24 20:35:49 [manager.py:391] +ERROR 06-24 20:35:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 first_token_cost:388.9954090118408ms total_cost_time:389.0419006347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13632 prompt_cache_len:5151 prompt_cache_ratio:0.37786091549295775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 +DEBUG 06-24 20:35:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:49 [batch.py:51] router release req id 8 +INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10724401473999023 s +INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.1093604564666748 s +DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=31054056425887962216527733282506062550, time:1750768549.6814344s req_ids:[8] +DEBUG 06-24 20:35:49 [manager.py:391] +ERROR 06-24 20:35:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 first_token_cost:218.8417911529541ms total_cost_time:218.88303756713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13633 prompt_cache_len:5151 prompt_cache_ratio:0.37783319885571776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 +DEBUG 06-24 20:35:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:49 [batch.py:51] router release req id 8 +INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10855698585510254 s +INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.11065363883972168 s +DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=95585855388656804125224001217431018532, time:1750768549.9050138s req_ids:[8] +DEBUG 06-24 20:35:49 [manager.py:391] +ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 first_token_cost:217.30995178222656ms total_cost_time:217.35334396362305ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13634 prompt_cache_len:5151 prompt_cache_ratio:0.3778054862842893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 +DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:50 [batch.py:51] router release req id 8 +INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.10902070999145508 s +INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11095762252807617 s +DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=83178396355074804875819021404046644400, time:1750768550.1362364s req_ids:[8] +DEBUG 06-24 20:35:50 [manager.py:391] +ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:223.65427017211914ms total_cost_time:223.70076179504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13635 prompt_cache_len:5151 prompt_cache_ratio:0.37777777777777777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 +DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:50 [batch.py:51] router release req id 8 +INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.10835599899291992 s +INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11032295227050781 s +DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=75409116160910432666062985706831405471, time:1750768550.3613985s req_ids:[8] +DEBUG 06-24 20:35:50 [manager.py:391] +ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:219.35200691223145ms total_cost_time:219.39730644226074ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13636 prompt_cache_len:5151 prompt_cache_ratio:0.3777500733352889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 +DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:50 [batch.py:51] router release req id 8 +INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.10905003547668457 s +INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s +DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=155862091041466772526169255249430827598, time:1750768550.5839698s req_ids:[8] +DEBUG 06-24 20:35:50 [manager.py:391] +ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:218.48559379577637ms total_cost_time:218.54782104492188ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:13637 prompt_cache_len:5151 prompt_cache_ratio:0.3777223729559287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 +DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:50 [batch.py:51] router release req id 8 +INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.11061358451843262 s +INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11263084411621094 s +DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=133657886805782331993092778379460896084, time:1750768550.8176966s req_ids:[8] +DEBUG 06-24 20:35:50 [manager.py:391] +DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:229.89225387573242ms total_cost_time:229.9368381500244ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13638 prompt_cache_len:5151 prompt_cache_ratio:0.37769467663880335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 +DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:51 [batch.py:51] router release req id 8 +INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.31002235412597656 s +INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.31211161613464355 s +DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=174555946281598786902466974557467235897, time:1750768551.2587047s req_ids:[8] +DEBUG 06-24 20:35:51 [manager.py:391] +ERROR 06-24 20:35:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:436.32960319519043ms total_cost_time:436.3729953765869ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13639 prompt_cache_len:5151 prompt_cache_ratio:0.37766698438301927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 +DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:51 [batch.py:51] router release req id 8 +INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.1083681583404541 s +INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.11024785041809082 s +DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=188286778155602474216403692061546385671, time:1750768551.4863336s req_ids:[8] +DEBUG 06-24 20:35:51 [manager.py:391] +ERROR 06-24 20:35:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 first_token_cost:218.10245513916016ms total_cost_time:218.14537048339844ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13640 prompt_cache_len:5151 prompt_cache_ratio:0.37763929618768327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 +DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:51 [batch.py:51] router release req id 8 +INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.10745644569396973 s +INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.10949873924255371 s +DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=8390407928174607071693031107652752929, time:1750768551.7111785s req_ids:[8] +DEBUG 06-24 20:35:51 [manager.py:391] +ERROR 06-24 20:35:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 first_token_cost:218.10030937194824ms total_cost_time:218.14322471618652ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13641 prompt_cache_len:5151 prompt_cache_ratio:0.37761161205190236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 +DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:51 [batch.py:51] router release req id 8 +INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.10839629173278809 s +INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s +DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=213190747695857363323377943642448862316, time:1750768551.9331715s req_ids:[8] +DEBUG 06-24 20:35:51 [manager.py:391] +ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 first_token_cost:216.90034866333008ms total_cost_time:216.94564819335938ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13642 prompt_cache_len:5151 prompt_cache_ratio:0.37758393197478374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 +DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:52 [batch.py:51] router release req id 8 +INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.10802865028381348 s +INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.10996532440185547 s +DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=159746189219849857865710849734312241439, time:1750768552.16121s req_ids:[8] +DEBUG 06-24 20:35:52 [manager.py:391] +ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:180.3135871887207ms total_cost_time:180.3581714630127ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13643 prompt_cache_len:5151 prompt_cache_ratio:0.377556255955435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 +DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:52 [batch.py:51] router release req id 8 +INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.10880303382873535 s +INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.11080074310302734 s +DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=337838145547603146132441832734438942966, time:1750768552.3402197s req_ids:[8] +DEBUG 06-24 20:35:52 [manager.py:391] +DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:375.16236305236816ms total_cost_time:375.20790100097656ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13644 prompt_cache_len:5151 prompt_cache_ratio:0.37752858399296396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 +DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:52 [batch.py:51] router release req id 8 +INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.10774922370910645 s +INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.10974860191345215 s +DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=18407330026922278464330689317177495971, time:1750768552.7197297s req_ids:[8] +DEBUG 06-24 20:35:52 [manager.py:391] +ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:172.00756072998047ms total_cost_time:172.05071449279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13645 prompt_cache_len:5151 prompt_cache_ratio:0.37750091608647857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 +DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:52 [batch.py:51] router release req id 8 +INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s +INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.11090946197509766 s +DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=319335937521893534030063087331540655716, time:1750768552.898341s req_ids:[8] +DEBUG 06-24 20:35:52 [manager.py:391] +ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:203.75561714172363ms total_cost_time:203.80234718322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13646 prompt_cache_len:5151 prompt_cache_ratio:0.3774732522350872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 +DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:53 [batch.py:51] router release req id 8 +INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.10809326171875 s +INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.11011099815368652 s +DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=229593009020485800866750310937352552068, time:1750768553.107266s req_ids:[8] +DEBUG 06-24 20:35:53 [manager.py:391] +ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:212.13769912719727ms total_cost_time:212.18204498291016ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13647 prompt_cache_len:5151 prompt_cache_ratio:0.37744559243789844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 +INFO 06-24 20:35:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:53 [batch.py:51] router release req id 8 +INFO 06-24 20:35:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.10804295539855957 s +INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.1101541519165039 s +DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=126105128133144239977238626492060236269, time:1750768553.324351s req_ids:[8] +DEBUG 06-24 20:35:53 [manager.py:391] +ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:213.05418014526367ms total_cost_time:213.10067176818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13648 prompt_cache_len:5151 prompt_cache_ratio:0.37741793669402113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 +DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:53 [batch.py:51] router release req id 8 +INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.1073906421661377 s +INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.1092832088470459 s +DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=151925410819919333700135408243894110623, time:1750768553.5413709s req_ids:[8] +DEBUG 06-24 20:35:53 [manager.py:391] +ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:212.68725395202637ms total_cost_time:212.73136138916016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13649 prompt_cache_len:5151 prompt_cache_ratio:0.3773902850025643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 +DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:53 [batch.py:51] router release req id 8 +INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.10857605934143066 s +INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.11062741279602051 s +DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=209190148504032345125124346296662538947, time:1750768553.7588232s req_ids:[8] +DEBUG 06-24 20:35:53 [manager.py:391] +ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:213.85693550109863ms total_cost_time:213.90032768249512ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13650 prompt_cache_len:5151 prompt_cache_ratio:0.37736263736263737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 +DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:54 [batch.py:51] router release req id 8 +INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.3093385696411133 s +INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.31124448776245117 s +DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=257007811245343669500775947666003388801, time:1750768554.183651s req_ids:[8] +DEBUG 06-24 20:35:54 [manager.py:391] +ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:378.28707695007324ms total_cost_time:378.3295154571533ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13651 prompt_cache_len:5151 prompt_cache_ratio:0.37733499377334995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 +DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:54 [batch.py:51] router release req id 8 +INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.10752582550048828 s +INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.10958600044250488 s +DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=235612014099335927989610269083737396820, time:1750768554.3614638s req_ids:[8] +DEBUG 06-24 20:35:54 [manager.py:391] +ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:215.03090858459473ms total_cost_time:215.0745391845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13652 prompt_cache_len:5151 prompt_cache_ratio:0.3773073542338119 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 +DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:54 [batch.py:51] router release req id 8 +INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.10862326622009277 s +INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s +DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=49484158378608337013176031824749776154, time:1750768554.580949s req_ids:[8] +DEBUG 06-24 20:35:54 [manager.py:391] +ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:217.12398529052734ms total_cost_time:217.16761589050293ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13653 prompt_cache_len:5151 prompt_cache_ratio:0.3772797187431334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 +DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:54 [batch.py:51] router release req id 8 +INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.10905146598815918 s +INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.11112380027770996 s +DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=108956072929634339371195895641946405800, time:1750768554.8026688s req_ids:[8] +DEBUG 06-24 20:35:54 [manager.py:391] +ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:214.78915214538574ms total_cost_time:214.83564376831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13654 prompt_cache_len:5151 prompt_cache_ratio:0.3772520873004248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 +DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:54 [batch.py:51] router release req id 8 +INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.10964035987854004 s +INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.11162042617797852 s +DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=153382760579351380999977144223642908118, time:1750768555.0227501s req_ids:[8] +DEBUG 06-24 20:35:55 [manager.py:391] +ERROR 06-24 20:35:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:217.02861785888672ms total_cost_time:217.0724868774414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13655 prompt_cache_len:5151 prompt_cache_ratio:0.3772244599047968 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 +DEBUG 06-24 20:35:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:55 [batch.py:51] router release req id 8 +INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.10831761360168457 s +INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.11089706420898438 s +DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=193805241350396400480045712576549230029, time:1750768555.248415s req_ids:[8] +DEBUG 06-24 20:35:55 [manager.py:391] +ERROR 06-24 20:35:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 first_token_cost:222.9306697845459ms total_cost_time:222.975492477417ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13656 prompt_cache_len:5151 prompt_cache_ratio:0.3771968365553603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 +DEBUG 06-24 20:35:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:55 [batch.py:51] router release req id 8 +INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.3101987838745117 s +INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.31226205825805664 s +DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=195118419152740454995841600054416543695, time:1750768555.6782992s req_ids:[8] +DEBUG 06-24 20:35:55 [manager.py:391] +ERROR 06-24 20:35:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 first_token_cost:431.47969245910645ms total_cost_time:431.52523040771484ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13657 prompt_cache_len:5151 prompt_cache_ratio:0.3771692172512265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 +DEBUG 06-24 20:35:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:55 [batch.py:51] router release req id 8 +INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.10817694664001465 s +INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.11034584045410156 s +DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=151631523667400502789879400052510589660, time:1750768555.908569s req_ids:[8] +DEBUG 06-24 20:35:55 [manager.py:391] +ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 first_token_cost:218.30201148986816ms total_cost_time:218.34492683410645ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13658 prompt_cache_len:5151 prompt_cache_ratio:0.3771416019915068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 +DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:56 [batch.py:51] router release req id 8 +INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.10804605484008789 s +INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.11010241508483887 s +DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=257623674290752407476526956654920517947, time:1750768556.131419s req_ids:[8] +DEBUG 06-24 20:35:56 [manager.py:391] +ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:212.73112297058105ms total_cost_time:212.77570724487305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13659 prompt_cache_len:5151 prompt_cache_ratio:0.377113990775313 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 +DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:56 [batch.py:51] router release req id 8 +INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s +INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.10988402366638184 s +DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=190297581107265397179063143987158349761, time:1750768556.3476503s req_ids:[8] +DEBUG 06-24 20:35:56 [manager.py:391] +ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:211.83538436889648ms total_cost_time:211.87901496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13660 prompt_cache_len:5151 prompt_cache_ratio:0.37708638360175695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 +DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:56 [batch.py:51] router release req id 8 +INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.10842680931091309 s +INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.11051678657531738 s +DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=54857662492815382657442871395007006545, time:1750768556.5654368s req_ids:[8] +DEBUG 06-24 20:35:56 [manager.py:391] +ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:217.09418296813965ms total_cost_time:217.13733673095703ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13661 prompt_cache_len:5151 prompt_cache_ratio:0.37705878046995095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 +DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:56 [batch.py:51] router release req id 8 +INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.10813522338867188 s +INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.11013507843017578 s +DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=300364789289121113358902921117596870327, time:1750768556.787558s req_ids:[8] +DEBUG 06-24 20:35:56 [manager.py:391] +ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:386.5816593170166ms total_cost_time:386.6274356842041ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13662 prompt_cache_len:5151 prompt_cache_ratio:0.3770311813790075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 +DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:57 [batch.py:51] router release req id 8 +INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s +INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.1101236343383789 s +DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=187883675330075789851526300167506811549, time:1750768557.1788938s req_ids:[8] +DEBUG 06-24 20:35:57 [manager.py:391] +ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:217.81563758850098ms total_cost_time:217.85950660705566ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13663 prompt_cache_len:5151 prompt_cache_ratio:0.3770035863280392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 +DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:57 [batch.py:51] router release req id 8 +INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.1078939437866211 s +INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.1099846363067627 s +DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=293098158439175210611145627150530717569, time:1750768557.4022622s req_ids:[8] +DEBUG 06-24 20:35:57 [manager.py:391] +ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:217.12994575500488ms total_cost_time:217.17405319213867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13664 prompt_cache_len:5151 prompt_cache_ratio:0.3769759953161593 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 +DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:57 [batch.py:51] router release req id 8 +INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.1082301139831543 s +INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.11027050018310547 s +DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=57238135226023662701856765536486369313, time:1750768557.6270173s req_ids:[8] +DEBUG 06-24 20:35:57 [manager.py:391] +ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:223.72865676879883ms total_cost_time:223.77347946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13665 prompt_cache_len:5151 prompt_cache_ratio:0.3769484083424808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 +DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:57 [batch.py:51] router release req id 8 +INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.10793495178222656 s +INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.10975837707519531 s +DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=164239055244202895134956885315973145799, time:1750768557.8561106s req_ids:[8] +DEBUG 06-24 20:35:57 [manager.py:391] +ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:218.9810276031494ms total_cost_time:219.0229892730713ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13666 prompt_cache_len:5151 prompt_cache_ratio:0.3769208254061174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 +DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:57 [batch.py:51] router release req id 8 +INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10857534408569336 s +INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.11054039001464844 s +DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=17491232647544406456690733448703084188, time:1750768558.0762622s req_ids:[8] +DEBUG 06-24 20:35:58 [manager.py:391] +ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:218.2791233062744ms total_cost_time:218.3229923248291ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13667 prompt_cache_len:5151 prompt_cache_ratio:0.3768932465061828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 +DEBUG 06-24 20:35:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:58 [batch.py:51] router release req id 8 +INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10811400413513184 s +INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.1101381778717041 s +DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=260665062313428928668893639995864761558, time:1750768558.2983599s req_ids:[8] +DEBUG 06-24 20:35:58 [manager.py:391] +ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:209.7315788269043ms total_cost_time:209.7756862640381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13668 prompt_cache_len:5151 prompt_cache_ratio:0.376865671641791 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 +DEBUG 06-24 20:35:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:58 [batch.py:51] router release req id 8 +INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10889554023742676 s +INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.11087918281555176 s +DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=251049571318513807902057573612589340282, time:1750768558.512987s req_ids:[8] +DEBUG 06-24 20:35:58 [manager.py:391] +ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:35:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 54351.951 tokens/s +DEBUG 06-24 20:35:58 [stats.py:37] Avg prompt tokens throughput: 54343.988 tokens/s +DEBUG 06-24 20:35:58 [stats.py:37] Avg generate tokens throughput: 7.963 tokens/s +INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:375.83351135253906ms total_cost_time:375.87928771972656ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13669 prompt_cache_len:5151 prompt_cache_ratio:0.3768381008120565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 +DEBUG 06-24 20:35:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:58 [batch.py:51] router release req id 8 +INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10853147506713867 s +INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.11055994033813477 s +DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=68813320741427866713348010278541618370, time:1750768558.893984s req_ids:[8] +DEBUG 06-24 20:35:58 [manager.py:391] +ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:212.9812240600586ms total_cost_time:213.03915977478027ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:13670 prompt_cache_len:5151 prompt_cache_ratio:0.37681053401609366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 +DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:59 [batch.py:51] router release req id 8 +INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.10829043388366699 s +INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.11027169227600098 s +DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=26892049964847638688863999236649037064, time:1750768559.1130607s req_ids:[8] +DEBUG 06-24 20:35:59 [manager.py:391] +ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:172.37091064453125ms total_cost_time:172.41573333740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13671 prompt_cache_len:5151 prompt_cache_ratio:0.37678297125301735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 +DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:59 [batch.py:51] router release req id 8 +INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.1075596809387207 s +INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.1095273494720459 s +DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=114731582348946010192197612861649036261, time:1750768559.293538s req_ids:[8] +DEBUG 06-24 20:35:59 [manager.py:391] +ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 first_token_cost:204.06317710876465ms total_cost_time:204.10776138305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13672 prompt_cache_len:5151 prompt_cache_ratio:0.37675541252194267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 +DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:59 [batch.py:51] router release req id 8 +INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.10869359970092773 s +INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.11062765121459961 s +DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=254332320868817064403540280358532400553, time:1750768559.5034993s req_ids:[8] +DEBUG 06-24 20:35:59 [manager.py:391] +ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:35:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 first_token_cost:213.58585357666016ms total_cost_time:213.62876892089844ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13673 prompt_cache_len:5151 prompt_cache_ratio:0.3767278578219849 mtp_avg_token_per_step:1.0 +INFO 06-24 20:35:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 +DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:35:59 [batch.py:51] router release req id 8 +INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.10849142074584961 s +INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.11051225662231445 s +DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=310163101815339106029202796761737132947, time:1750768559.7229934s req_ids:[8] +DEBUG 06-24 20:35:59 [manager.py:391] +ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 first_token_cost:394.3326473236084ms total_cost_time:394.3769931793213ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13674 prompt_cache_len:5151 prompt_cache_ratio:0.3767003071522598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 +DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:00 [batch.py:51] router release req id 8 +INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.1081089973449707 s +INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.10997843742370605 s +DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=157916551437019257423777337339710269693, time:1750768560.1221435s req_ids:[8] +DEBUG 06-24 20:36:00 [manager.py:391] +ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:214.73312377929688ms total_cost_time:214.77746963500977ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13675 prompt_cache_len:5151 prompt_cache_ratio:0.376672760511883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 +DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:00 [batch.py:51] router release req id 8 +INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.10854291915893555 s +INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105046272277832 s +DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=236455581206585182835000858650814063414, time:1750768560.3426344s req_ids:[8] +DEBUG 06-24 20:36:00 [manager.py:391] +ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:210.79134941101074ms total_cost_time:210.84880828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:13676 prompt_cache_len:5151 prompt_cache_ratio:0.37664521789997074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 +DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:00 [batch.py:51] router release req id 8 +INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.10779070854187012 s +INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.11014008522033691 s +INFO 06-24 20:36:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=198450818040750336791910335678538978955, time:1750768560.5624502s req_ids:[8] +DEBUG 06-24 20:36:00 [manager.py:391] +ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:219.8197841644287ms total_cost_time:219.8634147644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13677 prompt_cache_len:5151 prompt_cache_ratio:0.3766176793156394 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 +DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:00 [batch.py:51] router release req id 8 +INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.11022591590881348 s +INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.11166834831237793 s +DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=163421767089692774712480714151470099233, time:1750768560.7843263s req_ids:[8] +DEBUG 06-24 20:36:00 [manager.py:391] +ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:223.0820655822754ms total_cost_time:223.12617301940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13678 prompt_cache_len:5151 prompt_cache_ratio:0.37659014475800556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 +DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:00 [batch.py:51] router release req id 8 +INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10873699188232422 s +INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.11069846153259277 s +DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=316280845978368079284644500574320082575, time:1750768561.0155902s req_ids:[8] +DEBUG 06-24 20:36:01 [manager.py:391] +ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:213.88745307922363ms total_cost_time:213.9296531677246ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13679 prompt_cache_len:5151 prompt_cache_ratio:0.37656261422618614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 +DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:01 [batch.py:51] router release req id 8 +INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10891079902648926 s +INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.11090850830078125 s +DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=187473870858225605631348748138118144432, time:1750768561.235861s req_ids:[8] +DEBUG 06-24 20:36:01 [manager.py:391] +ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:395.28894424438477ms total_cost_time:395.33352851867676ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13680 prompt_cache_len:5151 prompt_cache_ratio:0.3765350877192982 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 +DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:01 [batch.py:51] router release req id 8 +INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10886549949645996 s +INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.1104590892791748 s +DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=327126906278624230821965108218765240291, time:1750768561.6380112s req_ids:[8] +DEBUG 06-24 20:36:01 [manager.py:391] +DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:217.81206130981445ms total_cost_time:217.8642749786377ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:13681 prompt_cache_len:5151 prompt_cache_ratio:0.37650756523645934 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 +DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:01 [batch.py:51] router release req id 8 +INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10818719863891602 s +INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.11016273498535156 s +DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=310661958017560045413286966247218173497, time:1750768561.8592618s req_ids:[8] +DEBUG 06-24 20:36:01 [manager.py:391] +ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:215.3182029724121ms total_cost_time:215.3618335723877ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13682 prompt_cache_len:5151 prompt_cache_ratio:0.37648004677678704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 +DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:01 [batch.py:51] router release req id 8 +INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.10772252082824707 s +INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.10873174667358398 s +DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=294296304520917938906888436479656756963, time:1750768562.0810807s req_ids:[8] +DEBUG 06-24 20:36:02 [manager.py:391] +ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:174.0882396697998ms total_cost_time:174.1316318511963ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13683 prompt_cache_len:5151 prompt_cache_ratio:0.37645253233939924 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 +DEBUG 06-24 20:36:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:02 [batch.py:51] router release req id 8 +INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s +INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.11036825180053711 s +DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=224751542837843329291389650593660496840, time:1750768562.2625144s req_ids:[8] +DEBUG 06-24 20:36:02 [manager.py:391] +ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:214.79129791259766ms total_cost_time:214.83540534973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13684 prompt_cache_len:5151 prompt_cache_ratio:0.3764250219234142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 +DEBUG 06-24 20:36:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:02 [batch.py:51] router release req id 8 +INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.10772991180419922 s +INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.1096792221069336 s +DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=2942148602482244426050882912649281021, time:1750768562.4801955s req_ids:[8] +DEBUG 06-24 20:36:02 [manager.py:391] +ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:212.46933937072754ms total_cost_time:212.51320838928223ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13685 prompt_cache_len:5151 prompt_cache_ratio:0.3763975155279503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 +DEBUG 06-24 20:36:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:02 [batch.py:51] router release req id 8 +INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.1087636947631836 s +INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.1106882095336914 s +DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=249628482781049278516473589102285439973, time:1750768562.6958754s req_ids:[8] +DEBUG 06-24 20:36:02 [manager.py:391] +ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:390.7287120819092ms total_cost_time:390.77281951904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13686 prompt_cache_len:5151 prompt_cache_ratio:0.37637001315212626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 +DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:03 [batch.py:51] router release req id 8 +INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10800409317016602 s +INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.1097872257232666 s +DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=20268920181325564169125179817406441366, time:1750768563.092134s req_ids:[8] +DEBUG 06-24 20:36:03 [manager.py:391] +ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:174.38602447509766ms total_cost_time:174.42870140075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13687 prompt_cache_len:5151 prompt_cache_ratio:0.37634251479506103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 +DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:03 [batch.py:51] router release req id 8 +INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10914969444274902 s +INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11111617088317871 s +DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=153888401382603662020931468204088355073, time:1750768563.2735155s req_ids:[8] +DEBUG 06-24 20:36:03 [manager.py:391] +ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:207.27181434631348ms total_cost_time:207.31496810913086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13688 prompt_cache_len:5151 prompt_cache_ratio:0.3763150204558738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 +DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:03 [batch.py:51] router release req id 8 +INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10889148712158203 s +INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11088991165161133 s +DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=207905817326683949055798575734798492031, time:1750768563.48529s req_ids:[8] +DEBUG 06-24 20:36:03 [manager.py:391] +ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:218.5654640197754ms total_cost_time:218.60980987548828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13689 prompt_cache_len:5151 prompt_cache_ratio:0.376287530133684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 +DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:03 [batch.py:51] router release req id 8 +INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10903596878051758 s +INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11102414131164551 s +DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=25200249908781500470442144280894297520, time:1750768563.7075348s req_ids:[8] +DEBUG 06-24 20:36:03 [manager.py:391] +ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:216.77517890930176ms total_cost_time:216.81785583496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13690 prompt_cache_len:5151 prompt_cache_ratio:0.3762600438276114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 +DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:03 [batch.py:51] router release req id 8 +INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10886573791503906 s +INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11089396476745605 s +DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=327898304515158839605476026271680785561, time:1750768563.9306567s req_ids:[8] +DEBUG 06-24 20:36:03 [manager.py:391] +ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:216.02940559387207ms total_cost_time:216.07255935668945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13691 prompt_cache_len:5151 prompt_cache_ratio:0.376232561536776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 +DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:04 [batch.py:51] router release req id 8 +INFO 06-24 20:36:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:04 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s +INFO 06-24 20:36:04 [manager.py:68] detokenization recv req id 8 cost time 0.10953116416931152 s +DEBUG 06-24 20:36:04 [manager.py:391] Prefill Batch: batch_id=291085640019914894290205786406690412241, time:1750768564.1544101s req_ids:[8] +DEBUG 06-24 20:36:04 [manager.py:391] +ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:214.599609375ms total_cost_time:214.64228630065918ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13692 prompt_cache_len:5151 prompt_cache_ratio:0.376205083260298 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 +DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:04 [batch.py:51] router release req id 8 +INFO 06-24 20:36:04 [manager.py:224] router recive req id 8 cost time 0.3113975524902344 s +INFO 06-24 20:36:04 [manager.py:68] detokenization recv req id 8 cost time 0.3134908676147461 s +DEBUG 06-24 20:36:04 [manager.py:391] Prefill Batch: batch_id=4575308800431602052722726970014790449, time:1750768564.5862854s req_ids:[8] +DEBUG 06-24 20:36:04 [manager.py:391] +ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:431.973934173584ms total_cost_time:432.0189952850342ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13693 prompt_cache_len:5151 prompt_cache_ratio:0.3761776089972979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 +DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:04 [batch.py:51] router release req id 8 +INFO 06-24 20:36:04 [manager.py:224] router recive req id 8 cost time 0.1089322566986084 s +INFO 06-24 20:36:04 [manager.py:68] detokenization recv req id 8 cost time 0.11097884178161621 s +DEBUG 06-24 20:36:04 [manager.py:391] Prefill Batch: batch_id=244118052605711041029642501530467195852, time:1750768564.813111s req_ids:[8] +DEBUG 06-24 20:36:04 [manager.py:391] +ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:208.41288566589355ms total_cost_time:208.45651626586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13694 prompt_cache_len:5151 prompt_cache_ratio:0.37615013874689646 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 +DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:04 [batch.py:51] router release req id 8 +INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10863614082336426 s +INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11067414283752441 s +DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=327556265672390547657536032781774180820, time:1750768565.0293002s req_ids:[8] +DEBUG 06-24 20:36:05 [manager.py:391] +ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:213.43302726745605ms total_cost_time:213.47570419311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13695 prompt_cache_len:5151 prompt_cache_ratio:0.3761226725082147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 +DEBUG 06-24 20:36:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:05 [batch.py:51] router release req id 8 +INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10923171043395996 s +INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11116337776184082 s +DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=125598632824474072101852168059193784718, time:1750768565.2476425s req_ids:[8] +DEBUG 06-24 20:36:05 [manager.py:391] +ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 first_token_cost:214.69974517822266ms total_cost_time:214.74385261535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13696 prompt_cache_len:5151 prompt_cache_ratio:0.37609521028037385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 +DEBUG 06-24 20:36:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:05 [batch.py:51] router release req id 8 +INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10865116119384766 s +INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11077713966369629 s +DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=291118268160651536491040511684873440704, time:1750768565.4705367s req_ids:[8] +DEBUG 06-24 20:36:05 [manager.py:391] +ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 first_token_cost:214.52713012695312ms total_cost_time:214.5712375640869ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13697 prompt_cache_len:5151 prompt_cache_ratio:0.37606775206249543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 +DEBUG 06-24 20:36:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:05 [batch.py:51] router release req id 8 +INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10820627212524414 s +INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11037611961364746 s +DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=155870302276030234557503265133845982448, time:1750768565.6892035s req_ids:[8] +DEBUG 06-24 20:36:05 [manager.py:391] +ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 first_token_cost:427.30093002319336ms total_cost_time:427.34503746032715ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13698 prompt_cache_len:5151 prompt_cache_ratio:0.37604029785370124 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 +DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:06 [batch.py:51] router release req id 8 +INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10854005813598633 s +INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11059403419494629 s +DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=20341615895473604732297947997636857953, time:1750768566.1242616s req_ids:[8] +DEBUG 06-24 20:36:06 [manager.py:391] +ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:220.11899948120117ms total_cost_time:220.16239166259766ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13699 prompt_cache_len:5151 prompt_cache_ratio:0.3760128476531134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 +DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:06 [batch.py:51] router release req id 8 +INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10880851745605469 s +INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11080598831176758 s +DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=303462921873735705454450473969115980931, time:1750768566.3484032s req_ids:[8] +DEBUG 06-24 20:36:06 [manager.py:391] +ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:219.62261199951172ms total_cost_time:219.6662425994873ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13700 prompt_cache_len:5151 prompt_cache_ratio:0.37598540145985404 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 +DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:06 [batch.py:51] router release req id 8 +INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10872888565063477 s +INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11072778701782227 s +DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=211209182355129658229141676311176809372, time:1750768566.5727615s req_ids:[8] +DEBUG 06-24 20:36:06 [manager.py:391] +ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:212.80407905578613ms total_cost_time:212.84818649291992ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13701 prompt_cache_len:5151 prompt_cache_ratio:0.37595795927304576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 +DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:06 [batch.py:51] router release req id 8 +INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10840368270874023 s +INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11044859886169434 s +DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=55732032066999074511885898363908197006, time:1750768566.7902997s req_ids:[8] +DEBUG 06-24 20:36:06 [manager.py:391] +ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:211.80248260498047ms total_cost_time:211.84492111206055ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13702 prompt_cache_len:5151 prompt_cache_ratio:0.3759305210918114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 +DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:06 [batch.py:51] router release req id 8 +INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.11054682731628418 s +INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.11253142356872559 s +DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=165393028340050572542096960296010377604, time:1750768567.007564s req_ids:[8] +DEBUG 06-24 20:36:07 [manager.py:391] +ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:211.91000938415527ms total_cost_time:211.95530891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13703 prompt_cache_len:5151 prompt_cache_ratio:0.375903086915274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 +DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:07 [batch.py:51] router release req id 8 +INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.10819363594055176 s +INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.11017918586730957 s +DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=198553233134112528525840085456563704248, time:1750768567.2234504s req_ids:[8] +DEBUG 06-24 20:36:07 [manager.py:391] +ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:391.45469665527344ms total_cost_time:391.4988040924072ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13704 prompt_cache_len:5151 prompt_cache_ratio:0.3758756567425569 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 +DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:07 [batch.py:51] router release req id 8 +INFO 06-24 20:36:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.1075129508972168 s +INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.1093740463256836 s +DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=5084536497712749351864440785575720813, time:1750768567.619706s req_ids:[8] +DEBUG 06-24 20:36:07 [manager.py:391] +ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:216.5853977203369ms total_cost_time:216.6306972503662ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13705 prompt_cache_len:5151 prompt_cache_ratio:0.37584823057278366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 +DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:07 [batch.py:51] router release req id 8 +INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.10883045196533203 s +INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.1109170913696289 s +DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=287150963390130650151157043058514887856, time:1750768567.842655s req_ids:[8] +DEBUG 06-24 20:36:07 [manager.py:391] +ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:211.58337593078613ms total_cost_time:211.69018745422363ms,out_token_counter:1 mean_per_token_cost_time: 0.1068115234375ms prompt_token_num:13706 prompt_cache_len:5151 prompt_cache_ratio:0.3758208084050781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 +DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:07 [batch.py:51] router release req id 8 +INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.10895180702209473 s +INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11102080345153809 s +DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=311413432746304570105988497796260370383, time:1750768568.0574915s req_ids:[8] +DEBUG 06-24 20:36:08 [manager.py:391] +ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:214.80083465576172ms total_cost_time:214.84708786010742ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13707 prompt_cache_len:5151 prompt_cache_ratio:0.37579339023856423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 +DEBUG 06-24 20:36:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:08 [batch.py:51] router release req id 8 +INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.10837292671203613 s +INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11030364036560059 s +DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=274354502620989434857873279386533815404, time:1750768568.2807589s req_ids:[8] +DEBUG 06-24 20:36:08 [manager.py:391] +ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:218.80793571472168ms total_cost_time:218.84965896606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13708 prompt_cache_len:5151 prompt_cache_ratio:0.3757659760723665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 +DEBUG 06-24 20:36:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:08 [batch.py:51] router release req id 8 +INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.1089479923248291 s +INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11100387573242188 s +DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=281900105668401137203555562098484316284, time:1750768568.501424s req_ids:[8] +DEBUG 06-24 20:36:08 [manager.py:391] +ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:212.04900741577148ms total_cost_time:212.09263801574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13709 prompt_cache_len:5151 prompt_cache_ratio:0.37573856590560945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 +DEBUG 06-24 20:36:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:08 [batch.py:51] router release req id 8 +INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.10824275016784668 s +INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11033391952514648 s +DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=277653029160907696816703808996168123923, time:1750768568.7193387s req_ids:[8] +DEBUG 06-24 20:36:08 [manager.py:391] +ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:36:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 55894.979 tokens/s +DEBUG 06-24 20:36:08 [stats.py:37] Avg prompt tokens throughput: 55886.815 tokens/s +DEBUG 06-24 20:36:08 [stats.py:37] Avg generate tokens throughput: 8.165 tokens/s +INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:212.73541450500488ms total_cost_time:212.77785301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13710 prompt_cache_len:5151 prompt_cache_ratio:0.3757111597374179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 +DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:09 [batch.py:51] router release req id 8 +INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.3092312812805176 s +INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.3113980293273926 s +DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=217182270450432715496890801837983297037, time:1750768569.1412618s req_ids:[8] +DEBUG 06-24 20:36:09 [manager.py:391] +ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:419.0068244934082ms total_cost_time:419.0499782562256ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13711 prompt_cache_len:5151 prompt_cache_ratio:0.3756837575669171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 +DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:09 [batch.py:51] router release req id 8 +INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.10861778259277344 s +INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.11081433296203613 s +DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=50937931999066462069969382017307552407, time:1750768569.361859s req_ids:[8] +DEBUG 06-24 20:36:09 [manager.py:391] +ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:213.87887001037598ms total_cost_time:213.92226219177246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13712 prompt_cache_len:5151 prompt_cache_ratio:0.3756563593932322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 +DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:09 [batch.py:51] router release req id 8 +INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.10752296447753906 s +INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.10940265655517578 s +DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=116902822828770268152020166420798167763, time:1750768569.5820744s req_ids:[8] +DEBUG 06-24 20:36:09 [manager.py:391] +ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:208.21738243103027ms total_cost_time:208.26077461242676ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13713 prompt_cache_len:5151 prompt_cache_ratio:0.37562896521548894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 +DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:09 [batch.py:51] router release req id 8 +INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.10857629776000977 s +INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.11072897911071777 s +DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=76345945191424376066825153864064996928, time:1750768569.7971785s req_ids:[8] +DEBUG 06-24 20:36:09 [manager.py:391] +ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:213.90986442565918ms total_cost_time:213.95421028137207ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13714 prompt_cache_len:5151 prompt_cache_ratio:0.37560157503281316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 +DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:09 [batch.py:51] router release req id 8 +INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10765409469604492 s +INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.1098015308380127 s +DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=129952489704944830816134264131196334657, time:1750768570.016723s req_ids:[8] +DEBUG 06-24 20:36:10 [manager.py:391] +ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:208.65583419799805ms total_cost_time:208.69922637939453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13715 prompt_cache_len:5151 prompt_cache_ratio:0.375574188844331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 +DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:10 [batch.py:51] router release req id 8 +INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10903739929199219 s +INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.1103963851928711 s +DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=52105376917487015994568650314316473523, time:1750768570.2317321s req_ids:[8] +DEBUG 06-24 20:36:10 [manager.py:391] +ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:383.27765464782715ms total_cost_time:383.32056999206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13716 prompt_cache_len:5151 prompt_cache_ratio:0.37554680664916884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 +DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:10 [batch.py:51] router release req id 8 +INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10748505592346191 s +INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s +DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=143835826180420810441165626324535168630, time:1750768570.6220236s req_ids:[8] +DEBUG 06-24 20:36:10 [manager.py:391] +ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:211.62152290344238ms total_cost_time:211.66539192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13717 prompt_cache_len:5151 prompt_cache_ratio:0.3755194284464533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 +DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:10 [batch.py:51] router release req id 8 +INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10842370986938477 s +INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.11037635803222656 s +DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=286857817823025784117166942664340752810, time:1750768570.839051s req_ids:[8] +DEBUG 06-24 20:36:10 [manager.py:391] +DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:208.04905891418457ms total_cost_time:208.09197425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13718 prompt_cache_len:5151 prompt_cache_ratio:0.3754920542353113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 +DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:10 [batch.py:51] router release req id 8 +INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10889554023742676 s +INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.11092019081115723 s +DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=251781111053661081982515082912691665298, time:1750768571.054654s req_ids:[8] +DEBUG 06-24 20:36:11 [manager.py:391] +ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:212.5840187072754ms total_cost_time:212.62812614440918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13719 prompt_cache_len:5151 prompt_cache_ratio:0.3754646840148699 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 +DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:11 [batch.py:51] router release req id 8 +INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10879778861999512 s +INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.11066532135009766 s +DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=311429254149751824976687477479524689936, time:1750768571.2837882s req_ids:[8] +DEBUG 06-24 20:36:11 [manager.py:391] +ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:225.27265548706055ms total_cost_time:225.31557083129883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13720 prompt_cache_len:5151 prompt_cache_ratio:0.37543731778425654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 +DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:11 [batch.py:51] router release req id 8 +INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10863995552062988 s +INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.11046886444091797 s +DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=306214874201312827610940028019843013697, time:1750768571.5061178s req_ids:[8] +DEBUG 06-24 20:36:11 [manager.py:391] +ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:215.70158004760742ms total_cost_time:215.74664115905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13721 prompt_cache_len:5151 prompt_cache_ratio:0.3754099555425989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 +DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:11 [batch.py:51] router release req id 8 +INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10695457458496094 s +INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.10865426063537598 s +DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=76669233167020956013514188010362980024, time:1750768571.728009s req_ids:[8] +DEBUG 06-24 20:36:11 [manager.py:391] +ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:171.67377471923828ms total_cost_time:171.71764373779297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13722 prompt_cache_len:5151 prompt_cache_ratio:0.3753825972890249 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 +DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:11 [batch.py:51] router release req id 8 +INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.3095850944519043 s +INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.3114607334136963 s +DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=8520307662988731886649237777785453732, time:1750768572.1143682s req_ids:[8] +DEBUG 06-24 20:36:12 [manager.py:391] +ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:417.80877113342285ms total_cost_time:417.85240173339844ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13723 prompt_cache_len:5151 prompt_cache_ratio:0.3753552430226627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 +DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:12 [batch.py:51] router release req id 8 +INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10873198509216309 s +INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.11078166961669922 s +DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=78201172574491985470099438901983503409, time:1750768572.330608s req_ids:[8] +DEBUG 06-24 20:36:12 [manager.py:391] +DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:216.5513038635254ms total_cost_time:216.59588813781738ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13724 prompt_cache_len:5151 prompt_cache_ratio:0.3753278927426406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 +DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:12 [batch.py:51] router release req id 8 +INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10858607292175293 s +INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.10992097854614258 s +DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=85109968803741861607164277595552530535, time:1750768572.5528843s req_ids:[8] +DEBUG 06-24 20:36:12 [manager.py:391] +ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:204.5912742614746ms total_cost_time:204.634428024292ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13725 prompt_cache_len:5151 prompt_cache_ratio:0.3753005464480874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 +DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:12 [batch.py:51] router release req id 8 +INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10866761207580566 s +INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.11001133918762207 s +DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=122635373191489327158361286025554112380, time:1750768572.7626429s req_ids:[8] +DEBUG 06-24 20:36:12 [manager.py:391] +ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:197.03173637390137ms total_cost_time:197.07441329956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13726 prompt_cache_len:5151 prompt_cache_ratio:0.37527320413813203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 +DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:12 [batch.py:51] router release req id 8 +INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10821247100830078 s +INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.10950231552124023 s +DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=218985897903604413418233686166596847689, time:1750768572.9651563s req_ids:[8] +DEBUG 06-24 20:36:12 [manager.py:391] +ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:169.88301277160645ms total_cost_time:169.92568969726562ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13727 prompt_cache_len:5151 prompt_cache_ratio:0.37524586581190356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 +DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:13 [batch.py:51] router release req id 8 +INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.10871171951293945 s +INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.10994720458984375 s +DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=304264074398272835467310876713460236983, time:1750768573.1419604s req_ids:[8] +DEBUG 06-24 20:36:13 [manager.py:391] +ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:196.00939750671387ms total_cost_time:196.04969024658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:13728 prompt_cache_len:5151 prompt_cache_ratio:0.37521853146853146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 +DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:13 [batch.py:51] router release req id 8 +INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.3095541000366211 s +INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.3106839656829834 s +DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=148181551866251624372565508693686692470, time:1750768573.5564554s req_ids:[8] +DEBUG 06-24 20:36:13 [manager.py:391] +ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:383.4686279296875ms total_cost_time:383.5141658782959ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13729 prompt_cache_len:5151 prompt_cache_ratio:0.37519120110714543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 +DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:13 [batch.py:51] router release req id 8 +INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.10752701759338379 s +INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.108734130859375 s +DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=58067154981188757237548235503975275558, time:1750768573.7340567s req_ids:[8] +DEBUG 06-24 20:36:13 [manager.py:391] +DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:203.0770778656006ms total_cost_time:203.12857627868652ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:13730 prompt_cache_len:5151 prompt_cache_ratio:0.37516387472687546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 +DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:13 [batch.py:51] router release req id 8 +INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.10793185234069824 s +INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.10921669006347656 s +DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=310187810965830319063573596560372678272, time:1750768573.9454048s req_ids:[8] +DEBUG 06-24 20:36:13 [manager.py:391] +ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:210.59274673461914ms total_cost_time:210.63709259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13731 prompt_cache_len:5151 prompt_cache_ratio:0.37513655232685167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 +DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:14 [batch.py:51] router release req id 8 +INFO 06-24 20:36:14 [manager.py:224] router recive req id 8 cost time 0.10861468315124512 s +INFO 06-24 20:36:14 [manager.py:68] detokenization recv req id 8 cost time 0.10989570617675781 s +DEBUG 06-24 20:36:14 [manager.py:391] Prefill Batch: batch_id=110179998312177156412887966118324209880, time:1750768574.1735187s req_ids:[8] +DEBUG 06-24 20:36:14 [manager.py:391] +ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:227.11753845214844ms total_cost_time:227.16116905212402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13732 prompt_cache_len:5151 prompt_cache_ratio:0.3751092339062045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 +DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:14 [batch.py:51] router release req id 8 +INFO 06-24 20:36:14 [manager.py:224] router recive req id 8 cost time 0.1074378490447998 s +INFO 06-24 20:36:14 [manager.py:68] detokenization recv req id 8 cost time 0.10863041877746582 s +DEBUG 06-24 20:36:14 [manager.py:391] Prefill Batch: batch_id=251645619460604959595123328817082694449, time:1750768574.4097853s req_ids:[8] +DEBUG 06-24 20:36:14 [manager.py:391] +ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:226.64904594421387ms total_cost_time:226.69148445129395ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13733 prompt_cache_len:5151 prompt_cache_ratio:0.37508191946406466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 +DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:14 [batch.py:51] router release req id 8 +INFO 06-24 20:36:14 [manager.py:224] router recive req id 8 cost time 0.1076805591583252 s +INFO 06-24 20:36:14 [manager.py:68] detokenization recv req id 8 cost time 0.10887694358825684 s +DEBUG 06-24 20:36:14 [manager.py:391] Prefill Batch: batch_id=168130883046062280644738163940751575686, time:1750768574.643036s req_ids:[8] +DEBUG 06-24 20:36:14 [manager.py:391] +ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:397.9144096374512ms total_cost_time:397.95827865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13734 prompt_cache_len:5151 prompt_cache_ratio:0.3750546089995631 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 +DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:14 [batch.py:51] router release req id 8 +INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s +INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.11007046699523926 s +DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=164291309350944552483986552331379812860, time:1750768575.0363538s req_ids:[8] +DEBUG 06-24 20:36:15 [manager.py:391] +ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:211.3499641418457ms total_cost_time:211.3933563232422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13735 prompt_cache_len:5151 prompt_cache_ratio:0.3750273025118311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 +DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:15 [batch.py:51] router release req id 8 +INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10739946365356445 s +INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10860657691955566 s +DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=251492275325047294743604242113471704599, time:1750768575.2549188s req_ids:[8] +DEBUG 06-24 20:36:15 [manager.py:391] +ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:212.33701705932617ms total_cost_time:212.38183975219727ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13736 prompt_cache_len:5151 prompt_cache_ratio:0.375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 +DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:15 [batch.py:51] router release req id 8 +INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10849523544311523 s +INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s +DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=333242837490716914508993792113220247616, time:1750768575.4735458s req_ids:[8] +DEBUG 06-24 20:36:15 [manager.py:391] +ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:216.73250198364258ms total_cost_time:216.77517890930176ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13737 prompt_cache_len:5151 prompt_cache_ratio:0.37497270146320155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 +DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:15 [batch.py:51] router release req id 8 +INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10762286186218262 s +INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10866951942443848 s +DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=310474599905656710722408677666675840076, time:1750768575.6935468s req_ids:[8] +DEBUG 06-24 20:36:15 [manager.py:391] +ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:170.05419731140137ms total_cost_time:170.09520530700684ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:13738 prompt_cache_len:5151 prompt_cache_ratio:0.37494540690056777 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 +DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:15 [batch.py:51] router release req id 8 +INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10859990119934082 s +INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10983157157897949 s +DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=102971313293502691554675741941244029528, time:1750768575.8722441s req_ids:[8] +DEBUG 06-24 20:36:15 [manager.py:391] +ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:201.41029357910156ms total_cost_time:201.45440101623535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13739 prompt_cache_len:5151 prompt_cache_ratio:0.3749181163112308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 +DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:15 [batch.py:51] router release req id 8 +INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.10754895210266113 s +INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.10883474349975586 s +DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=146554155103969965583475384873459506210, time:1750768576.0809689s req_ids:[8] +DEBUG 06-24 20:36:16 [manager.py:391] +ERROR 06-24 20:36:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:213.52887153625488ms total_cost_time:213.57059478759766ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13740 prompt_cache_len:5151 prompt_cache_ratio:0.37489082969432314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 +DEBUG 06-24 20:36:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:16 [batch.py:51] router release req id 8 +INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.1084742546081543 s +INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.10976958274841309 s +DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=64714086168324757179833858637691748722, time:1750768576.3008327s req_ids:[8] +DEBUG 06-24 20:36:16 [manager.py:391] +ERROR 06-24 20:36:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 first_token_cost:376.6958713531494ms total_cost_time:376.7411708831787ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13741 prompt_cache_len:5151 prompt_cache_ratio:0.3748635470489775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 +DEBUG 06-24 20:36:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:16 [batch.py:51] router release req id 8 +INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.10883831977844238 s +INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.11000490188598633 s +DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=7388831944751906363281937040668829263, time:1750768576.6938064s req_ids:[8] +DEBUG 06-24 20:36:16 [manager.py:391] +ERROR 06-24 20:36:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 first_token_cost:228.02734375ms total_cost_time:228.0709743499756ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13742 prompt_cache_len:5151 prompt_cache_ratio:0.3748362683743269 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 +DEBUG 06-24 20:36:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:16 [batch.py:51] router release req id 8 +INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.10866236686706543 s +INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.11003589630126953 s +DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=297394187883613128425808768086814605002, time:1750768576.9174302s req_ids:[8] +DEBUG 06-24 20:36:16 [manager.py:391] +ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 first_token_cost:214.98870849609375ms total_cost_time:215.03257751464844ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13743 prompt_cache_len:5151 prompt_cache_ratio:0.3748089936695045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 +DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:17 [batch.py:51] router release req id 8 +INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10866355895996094 s +INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.11087965965270996 s +INFO 06-24 20:36:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=205708736660443678644740561827493403005, time:1750768577.138753s req_ids:[8] +DEBUG 06-24 20:36:17 [manager.py:391] +ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:213.2277488708496ms total_cost_time:213.2711410522461ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13744 prompt_cache_len:5151 prompt_cache_ratio:0.37478172293364376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 +DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:17 [batch.py:51] router release req id 8 +INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10869288444519043 s +INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.10991573333740234 s +DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=77735776757368270588351779871356942123, time:1750768577.3578508s req_ids:[8] +DEBUG 06-24 20:36:17 [manager.py:391] +ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:213.76514434814453ms total_cost_time:213.80925178527832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13745 prompt_cache_len:5151 prompt_cache_ratio:0.3747544561658785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 +DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:17 [batch.py:51] router release req id 8 +INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10864710807800293 s +INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.10988211631774902 s +DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=194761394718417097361291852057914111150, time:1750768577.5788531s req_ids:[8] +DEBUG 06-24 20:36:17 [manager.py:391] +ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:385.45870780944824ms total_cost_time:385.5011463165283ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13746 prompt_cache_len:5151 prompt_cache_ratio:0.37472719336534266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 +DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:17 [batch.py:51] router release req id 8 +INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10894012451171875 s +INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.11100196838378906 s +DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=189446459785670268147743911089100159048, time:1750768577.9713166s req_ids:[8] +DEBUG 06-24 20:36:17 [manager.py:391] +ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:216.72534942626953ms total_cost_time:216.77923202514648ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:13747 prompt_cache_len:5151 prompt_cache_ratio:0.37469993453117045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 +DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:18 [batch.py:51] router release req id 8 +INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.1089775562286377 s +INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.11025738716125488 s +DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=22632230072741360454213297010434378741, time:1750768578.2006752s req_ids:[8] +DEBUG 06-24 20:36:18 [manager.py:391] +ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:222.67675399780273ms total_cost_time:222.72062301635742ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13748 prompt_cache_len:5151 prompt_cache_ratio:0.37467267966249634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 +DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:18 [batch.py:51] router release req id 8 +INFO 06-24 20:36:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.1083521842956543 s +INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.10957193374633789 s +DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=24110163655775653966543409758326346276, time:1750768578.4243484s req_ids:[8] +DEBUG 06-24 20:36:18 [manager.py:391] +ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:214.47134017944336ms total_cost_time:214.51449394226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13749 prompt_cache_len:5151 prompt_cache_ratio:0.37464542875845513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 +DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:18 [batch.py:51] router release req id 8 +INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s +INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.10908722877502441 s +DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=242449534380405616665791501238707058657, time:1750768578.6463752s req_ids:[8] +DEBUG 06-24 20:36:18 [manager.py:391] +ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:171.2791919708252ms total_cost_time:171.32186889648438ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13750 prompt_cache_len:5151 prompt_cache_ratio:0.3746181818181818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 +DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:18 [batch.py:51] router release req id 8 +INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.10811901092529297 s +INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.10928773880004883 s +DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=170426951429281651797944932558011878185, time:1750768578.8241036s req_ids:[8] +DEBUG 06-24 20:36:18 [manager.py:391] +DEBUG 06-24 20:36:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 56246.032 tokens/s +DEBUG 06-24 20:36:18 [stats.py:37] Avg prompt tokens throughput: 56237.940 tokens/s +DEBUG 06-24 20:36:18 [stats.py:37] Avg generate tokens throughput: 8.091 tokens/s +ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:202.50225067138672ms total_cost_time:202.5444507598877ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13751 prompt_cache_len:5151 prompt_cache_ratio:0.37459093884081157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 +DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:18 [batch.py:51] router release req id 8 +INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.10807275772094727 s +INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.11008787155151367 s +DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=204737245167080810236968494082647319323, time:1750768579.0348787s req_ids:[8] +DEBUG 06-24 20:36:19 [manager.py:391] +ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:384.1269016265869ms total_cost_time:384.1688632965088ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13752 prompt_cache_len:5151 prompt_cache_ratio:0.37456369982547993 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 +DEBUG 06-24 20:36:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:19 [batch.py:51] router release req id 8 +INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.10860872268676758 s +INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.10989642143249512 s +DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=149301589123398562412915785147825403556, time:1750768579.4295444s req_ids:[8] +DEBUG 06-24 20:36:19 [manager.py:391] +ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:219.0382480621338ms total_cost_time:219.07973289489746ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13753 prompt_cache_len:5151 prompt_cache_ratio:0.37453646477132263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 +DEBUG 06-24 20:36:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:19 [batch.py:51] router release req id 8 +INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.1084439754486084 s +INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.11033821105957031 s +DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=114508494260319896129183344119655172946, time:1750768579.6619585s req_ids:[8] +DEBUG 06-24 20:36:19 [manager.py:391] +ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:225.89921951293945ms total_cost_time:225.94308853149414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13754 prompt_cache_len:5151 prompt_cache_ratio:0.37450923367747563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 +DEBUG 06-24 20:36:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:19 [batch.py:51] router release req id 8 +INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.10907983779907227 s +INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.11026191711425781 s +DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=34577129609621700435090606967669510169, time:1750768579.88677s req_ids:[8] +DEBUG 06-24 20:36:19 [manager.py:391] +DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:218.05834770202637ms total_cost_time:218.10269355773926ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13755 prompt_cache_len:5151 prompt_cache_ratio:0.37448200654307523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 +DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:20 [batch.py:51] router release req id 8 +INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.1080472469329834 s +INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.10915660858154297 s +DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=306945843780238094556184032080721079437, time:1750768580.1156237s req_ids:[8] +DEBUG 06-24 20:36:20 [manager.py:391] +ERROR 06-24 20:36:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:180.2835464477539ms total_cost_time:180.32526969909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13756 prompt_cache_len:5151 prompt_cache_ratio:0.3744547833672579 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 +DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:20 [batch.py:51] router release req id 8 +INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.10847806930541992 s +INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.11044955253601074 s +DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=335847601742950772061300927747988035827, time:1750768580.2994869s req_ids:[8] +DEBUG 06-24 20:36:20 [manager.py:391] +ERROR 06-24 20:36:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 first_token_cost:205.60622215270996ms total_cost_time:205.65056800842285ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13757 prompt_cache_len:5151 prompt_cache_ratio:0.37442756414916045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 +DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:20 [batch.py:51] router release req id 8 +INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.10753703117370605 s +INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.10869479179382324 s +DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=303953348583180577290123414643346505956, time:1750768580.5208037s req_ids:[8] +DEBUG 06-24 20:36:20 [manager.py:391] +ERROR 06-24 20:36:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 first_token_cost:414.111852645874ms total_cost_time:414.1669273376465ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:13758 prompt_cache_len:5151 prompt_cache_ratio:0.37440034888791973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 +DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:20 [batch.py:51] router release req id 8 +INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.10823631286621094 s +INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.10945367813110352 s +DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=153624245308465715898687144558972627157, time:1750768580.9335506s req_ids:[8] +DEBUG 06-24 20:36:20 [manager.py:391] +ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 first_token_cost:193.40848922729492ms total_cost_time:193.4645175933838ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:13759 prompt_cache_len:5151 prompt_cache_ratio:0.37437313758267315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 +DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:21 [batch.py:51] router release req id 8 +INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.1081087589263916 s +INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.11012101173400879 s +DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=264620539015251714852740412797499118791, time:1750768581.134663s req_ids:[8] +DEBUG 06-24 20:36:21 [manager.py:391] +ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:213.16766738891602ms total_cost_time:213.2127285003662ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13760 prompt_cache_len:5151 prompt_cache_ratio:0.37434593023255813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 +DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:21 [batch.py:51] router release req id 8 +INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.10775017738342285 s +INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.10897207260131836 s +DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=37403959710864053333712758767351594528, time:1750768581.3554473s req_ids:[8] +DEBUG 06-24 20:36:21 [manager.py:391] +ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:215.06929397583008ms total_cost_time:215.11077880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13761 prompt_cache_len:5151 prompt_cache_ratio:0.37431872683671247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 +DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:21 [batch.py:51] router release req id 8 +INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.108062744140625 s +INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.10899710655212402 s +DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=192382941579589616308712672403676148631, time:1750768581.5785089s req_ids:[8] +DEBUG 06-24 20:36:21 [manager.py:391] +ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:214.90073204040527ms total_cost_time:214.95699882507324ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:13762 prompt_cache_len:5151 prompt_cache_ratio:0.37429152739427407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 +DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:21 [batch.py:51] router release req id 8 +INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.10805559158325195 s +INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.10997962951660156 s +DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=195730985255048076606269604083898053258, time:1750768581.811194s req_ids:[8] +DEBUG 06-24 20:36:21 [manager.py:391] +ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:230.3941249847412ms total_cost_time:230.4387092590332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13763 prompt_cache_len:5151 prompt_cache_ratio:0.3742643319043813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 +DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:21 [batch.py:51] router release req id 8 +INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.10948801040649414 s +INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.11152958869934082 s +DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=290998651103999768091603125511949066725, time:1750768582.0505743s req_ids:[8] +DEBUG 06-24 20:36:22 [manager.py:391] +ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:230.51881790161133ms total_cost_time:230.5765151977539ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:13764 prompt_cache_len:5151 prompt_cache_ratio:0.3742371403661726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 +DEBUG 06-24 20:36:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:22 [batch.py:51] router release req id 8 +INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.3099632263183594 s +INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.31172823905944824 s +DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=268681031907932602303773090531249750544, time:1750768582.4800303s req_ids:[8] +DEBUG 06-24 20:36:22 [manager.py:391] +ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:375.23961067199707ms total_cost_time:375.28133392333984ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13765 prompt_cache_len:5151 prompt_cache_ratio:0.37420995277878677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 +DEBUG 06-24 20:36:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:22 [batch.py:51] router release req id 8 +INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.10889077186584473 s +INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.11083793640136719 s +DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=252209634441203966976809808674503265691, time:1750768582.6581206s req_ids:[8] +DEBUG 06-24 20:36:22 [manager.py:391] +ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:209.381103515625ms total_cost_time:209.4249725341797ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13766 prompt_cache_len:5151 prompt_cache_ratio:0.3741827691413628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 +DEBUG 06-24 20:36:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:22 [batch.py:51] router release req id 8 +INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.10870218276977539 s +INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.11079573631286621 s +DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=246158324598541104739289850875468705913, time:1750768582.8806024s req_ids:[8] +DEBUG 06-24 20:36:22 [manager.py:391] +ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:225.67319869995117ms total_cost_time:225.72708129882812ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:13767 prompt_cache_len:5151 prompt_cache_ratio:0.3741555894530399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 +DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:23 [batch.py:51] router release req id 8 +INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10903692245483398 s +INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.1111600399017334 s +DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=276604199521003127469782703819175368475, time:1750768583.1171188s req_ids:[8] +DEBUG 06-24 20:36:23 [manager.py:391] +INFO 06-24 20:36:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:36:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:23 [statics_utils.py:24] mean first cost: 231.22188198589367 ms +INFO 06-24 20:36:23 [statics_utils.py:24] mean per token cost: 0.05910160754670098 ms +INFO 06-24 20:36:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:229.04706001281738ms total_cost_time:229.08926010131836ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13768 prompt_cache_len:5151 prompt_cache_ratio:0.3741284137129576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 +DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:23 [batch.py:51] router release req id 8 +INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10813260078430176 s +INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.11015844345092773 s +DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=68805721369044593082329602212297729764, time:1750768583.3416624s req_ids:[8] +DEBUG 06-24 20:36:23 [manager.py:391] +ERROR 06-24 20:36:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 first_token_cost:213.6833667755127ms total_cost_time:213.72652053833008ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13769 prompt_cache_len:5151 prompt_cache_ratio:0.3741012419202556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 +DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:23 [batch.py:51] router release req id 8 +INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10923027992248535 s +INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.11135363578796387 s +DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=160558118431269740121913247666909574047, time:1750768583.5623748s req_ids:[8] +DEBUG 06-24 20:36:23 [manager.py:391] +ERROR 06-24 20:36:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 first_token_cost:383.93568992614746ms total_cost_time:383.98003578186035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13770 prompt_cache_len:5151 prompt_cache_ratio:0.37407407407407406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 +DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:23 [batch.py:51] router release req id 8 +INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10796022415161133 s +INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.10974979400634766 s +DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=75070953023451659117361048065797602897, time:1750768583.963411s req_ids:[8] +DEBUG 06-24 20:36:23 [manager.py:391] +ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 first_token_cost:189.45741653442383ms total_cost_time:189.5003318786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13771 prompt_cache_len:5151 prompt_cache_ratio:0.3740469101735531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 +DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:24 [batch.py:51] router release req id 8 +INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s +INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11094021797180176 s +DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=197120761459988196119057736039053517062, time:1750768584.1534846s req_ids:[8] +DEBUG 06-24 20:36:24 [manager.py:391] +ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:216.7949676513672ms total_cost_time:216.83931350708008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13772 prompt_cache_len:5151 prompt_cache_ratio:0.3740197502178333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 +DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:24 [batch.py:51] router release req id 8 +INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.10871601104736328 s +INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s +DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=52302614451390822232508305827296413715, time:1750768584.3863025s req_ids:[8] +DEBUG 06-24 20:36:24 [manager.py:391] +ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:228.3482551574707ms total_cost_time:228.3935546875ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13773 prompt_cache_len:5151 prompt_cache_ratio:0.37399259420605535 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 +DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:24 [batch.py:51] router release req id 8 +INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.10942840576171875 s +INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11145234107971191 s +DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=18175100276141320125582900888383239504, time:1750768584.6112075s req_ids:[8] +DEBUG 06-24 20:36:24 [manager.py:391] +ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:216.31860733032227ms total_cost_time:216.36152267456055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13774 prompt_cache_len:5151 prompt_cache_ratio:0.37396544213736027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 +DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:24 [batch.py:51] router release req id 8 +INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.1084294319152832 s +INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11046123504638672 s +DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=170748058374648964227533720605739829606, time:1750768584.8326836s req_ids:[8] +DEBUG 06-24 20:36:24 [manager.py:391] +ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:214.3397331237793ms total_cost_time:214.38288688659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13775 prompt_cache_len:5151 prompt_cache_ratio:0.3739382940108893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 +DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:24 [batch.py:51] router release req id 8 +INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.1088404655456543 s +INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11081981658935547 s +DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=270521432285469796941491136416857042900, time:1750768585.0669403s req_ids:[8] +DEBUG 06-24 20:36:25 [manager.py:391] +ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:399.5068073272705ms total_cost_time:399.550199508667ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13776 prompt_cache_len:5151 prompt_cache_ratio:0.373911149825784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 +DEBUG 06-24 20:36:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:25 [batch.py:51] router release req id 8 +INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.10886645317077637 s +INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11099576950073242 s +DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=311512152921332558149500291923138559401, time:1750768585.4620469s req_ids:[8] +DEBUG 06-24 20:36:25 [manager.py:391] +ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 first_token_cost:215.93785285949707ms total_cost_time:215.98243713378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13777 prompt_cache_len:5151 prompt_cache_ratio:0.37388400958118606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 +DEBUG 06-24 20:36:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:25 [batch.py:51] router release req id 8 +INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.10819411277770996 s +INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11018109321594238 s +DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=246547596626297198728442126592732247677, time:1750768585.6973033s req_ids:[8] +DEBUG 06-24 20:36:25 [manager.py:391] +ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 first_token_cost:230.23724555969238ms total_cost_time:230.28206825256348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13778 prompt_cache_len:5151 prompt_cache_ratio:0.37385687327623746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 +DEBUG 06-24 20:36:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:25 [batch.py:51] router release req id 8 +INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.1082007884979248 s +INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11007213592529297 s +DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=28724763507144094436127486113110173813, time:1750768585.935976s req_ids:[8] +DEBUG 06-24 20:36:25 [manager.py:391] +ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 first_token_cost:191.53738021850586ms total_cost_time:191.58935546875ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:13779 prompt_cache_len:5151 prompt_cache_ratio:0.37382974091008053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 +DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:26 [batch.py:51] router release req id 8 +INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.10881257057189941 s +INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.11084151268005371 s +DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=34095584697262581598314064984480564613, time:1750768586.124112s req_ids:[8] +DEBUG 06-24 20:36:26 [manager.py:391] +ERROR 06-24 20:36:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:213.85598182678223ms total_cost_time:213.8984203338623ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13780 prompt_cache_len:5151 prompt_cache_ratio:0.37380261248185775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 +DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:26 [batch.py:51] router release req id 8 +INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.1081688404083252 s +INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.10993552207946777 s +DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=258795262993347749048811046311023198032, time:1750768586.343138s req_ids:[8] +DEBUG 06-24 20:36:26 [manager.py:391] +ERROR 06-24 20:36:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:216.58563613891602ms total_cost_time:216.6283130645752ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13781 prompt_cache_len:5151 prompt_cache_ratio:0.37377548799071186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 +DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:26 [batch.py:51] router release req id 8 +INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s +INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.10970640182495117 s +DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=47099747531194318677015863037323955656, time:1750768586.566929s req_ids:[8] +DEBUG 06-24 20:36:26 [manager.py:391] +ERROR 06-24 20:36:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:378.47304344177246ms total_cost_time:378.51595878601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13782 prompt_cache_len:5151 prompt_cache_ratio:0.3737483674357858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 +DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:26 [batch.py:51] router release req id 8 +INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.10919713973999023 s +INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.11103153228759766 s +DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=310972547881158329060879802940590828289, time:1750768586.9526803s req_ids:[8] +DEBUG 06-24 20:36:26 [manager.py:391] +ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:210.8302116394043ms total_cost_time:210.87360382080078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13783 prompt_cache_len:5151 prompt_cache_ratio:0.37372125081622287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 +DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:27 [batch.py:51] router release req id 8 +INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10943746566772461 s +INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.11133074760437012 s +DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=22602145400359889265682699674721264131, time:1750768587.172856s req_ids:[8] +DEBUG 06-24 20:36:27 [manager.py:391] +ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:215.26670455932617ms total_cost_time:215.31200408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13784 prompt_cache_len:5151 prompt_cache_ratio:0.3736941381311666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 +DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:27 [batch.py:51] router release req id 8 +INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10879182815551758 s +INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.11059236526489258 s +DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=172126498529678486574065180407957794789, time:1750768587.3940177s req_ids:[8] +DEBUG 06-24 20:36:27 [manager.py:391] +ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:217.37098693847656ms total_cost_time:217.41461753845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13785 prompt_cache_len:5151 prompt_cache_ratio:0.3736670293797606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 +DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:27 [batch.py:51] router release req id 8 +INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10834312438964844 s +INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.1101217269897461 s +DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=31819115684325781723398951256790989209, time:1750768587.6156986s req_ids:[8] +DEBUG 06-24 20:36:27 [manager.py:391] +DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:171.06389999389648ms total_cost_time:171.12421989440918ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:13786 prompt_cache_len:5151 prompt_cache_ratio:0.373639924561149 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 +DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:27 [batch.py:51] router release req id 8 +INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10892200469970703 s +INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.11069703102111816 s +DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=141415734649546514267073167431458924933, time:1750768587.7975225s req_ids:[8] +DEBUG 06-24 20:36:27 [manager.py:391] +ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:198.61865043640137ms total_cost_time:198.66085052490234ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13787 prompt_cache_len:5151 prompt_cache_ratio:0.37361282367447596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 +DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:27 [batch.py:51] router release req id 8 +INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10753750801086426 s +INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.10945820808410645 s +DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=191573883793787044665604234645023459800, time:1750768588.009976s req_ids:[8] +DEBUG 06-24 20:36:28 [manager.py:391] +ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:390.8510208129883ms total_cost_time:390.89488983154297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13788 prompt_cache_len:5151 prompt_cache_ratio:0.373585726718886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 +DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:28 [batch.py:51] router release req id 8 +INFO 06-24 20:36:28 [manager.py:224] router recive req id 8 cost time 0.10888314247131348 s +INFO 06-24 20:36:28 [manager.py:68] detokenization recv req id 8 cost time 0.11089181900024414 s +DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=222084563514065249465223596331072764812, time:1750768588.401354s req_ids:[8] +DEBUG 06-24 20:36:28 [manager.py:391] +ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:214.14542198181152ms total_cost_time:214.186429977417ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:13789 prompt_cache_len:5151 prompt_cache_ratio:0.3735586336935238 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 +DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:28 [batch.py:51] router release req id 8 +INFO 06-24 20:36:28 [manager.py:224] router recive req id 8 cost time 0.10840177536010742 s +INFO 06-24 20:36:28 [manager.py:68] detokenization recv req id 8 cost time 0.1103050708770752 s +DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=258993850365837077497049684119097983955, time:1750768588.622603s req_ids:[8] +DEBUG 06-24 20:36:28 [manager.py:391] +ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:212.7218246459961ms total_cost_time:212.76497840881348ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13790 prompt_cache_len:5151 prompt_cache_ratio:0.37353154459753446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 +DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:28 [batch.py:51] router release req id 8 +INFO 06-24 20:36:28 [manager.py:224] router recive req id 8 cost time 0.10889840126037598 s +INFO 06-24 20:36:28 [manager.py:68] detokenization recv req id 8 cost time 0.11088085174560547 s +DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=164015436436284884420168237088550025243, time:1750768588.841591s req_ids:[8] +DEBUG 06-24 20:36:28 [manager.py:391] +DEBUG 06-24 20:36:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 54997.253 tokens/s +DEBUG 06-24 20:36:28 [stats.py:37] Avg prompt tokens throughput: 54989.267 tokens/s +DEBUG 06-24 20:36:28 [stats.py:37] Avg generate tokens throughput: 7.986 tokens/s +ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:213.1824493408203ms total_cost_time:213.2258415222168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13791 prompt_cache_len:5151 prompt_cache_ratio:0.3735044594300631 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 +DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:28 [batch.py:51] router release req id 8 +INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.1085362434387207 s +INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.11050105094909668 s +INFO 06-24 20:36:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=298784555303675484243373553362283608036, time:1750768589.0739431s req_ids:[8] +DEBUG 06-24 20:36:29 [manager.py:391] +ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:226.27806663513184ms total_cost_time:226.3202667236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13792 prompt_cache_len:5151 prompt_cache_ratio:0.3734773781902552 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 +DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:29 [batch.py:51] router release req id 8 +INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.10796117782592773 s +INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.1097409725189209 s +DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=12306011289213749835730560298403401302, time:1750768589.3072891s req_ids:[8] +DEBUG 06-24 20:36:29 [manager.py:391] +ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:225.8775234222412ms total_cost_time:225.9213924407959ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13793 prompt_cache_len:5151 prompt_cache_ratio:0.37345030087725656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 +DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:29 [batch.py:51] router release req id 8 +INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.1081087589263916 s +INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.10989260673522949 s +DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=268402253598227928723197136817821402769, time:1750768589.5292885s req_ids:[8] +DEBUG 06-24 20:36:29 [manager.py:391] +ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:341.8159484863281ms total_cost_time:341.8605327606201ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13794 prompt_cache_len:5151 prompt_cache_ratio:0.3734232274902131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 +DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:29 [batch.py:51] router release req id 8 +INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.10881328582763672 s +INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.11049389839172363 s +DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=199074250767939585557250321712788844136, time:1750768589.877693s req_ids:[8] +DEBUG 06-24 20:36:29 [manager.py:391] +ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:202.47650146484375ms total_cost_time:202.52060890197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13795 prompt_cache_len:5151 prompt_cache_ratio:0.3733961580282711 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 +DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:30 [batch.py:51] router release req id 8 +INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10806512832641602 s +INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.10984921455383301 s +DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=21662857430865574355142304665460947509, time:1750768590.0880036s req_ids:[8] +DEBUG 06-24 20:36:30 [manager.py:391] +ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:210.0837230682373ms total_cost_time:210.1278305053711ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13796 prompt_cache_len:5151 prompt_cache_ratio:0.373369092490577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 +DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:30 [batch.py:51] router release req id 8 +INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10906052589416504 s +INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11103940010070801 s +DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=229119526404368664339368612389792333815, time:1750768590.305363s req_ids:[8] +DEBUG 06-24 20:36:30 [manager.py:391] +ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:214.53380584716797ms total_cost_time:214.57505226135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13797 prompt_cache_len:5151 prompt_cache_ratio:0.37334203087627743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 +DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:30 [batch.py:51] router release req id 8 +INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s +INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11031460762023926 s +DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=294187294687715179390350593562597093965, time:1750768590.5259128s req_ids:[8] +DEBUG 06-24 20:36:30 [manager.py:391] +ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:213.7007713317871ms total_cost_time:213.7444019317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13798 prompt_cache_len:5151 prompt_cache_ratio:0.3733149731845195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 +DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:30 [batch.py:51] router release req id 8 +INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10886597633361816 s +INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11066126823425293 s +DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=263157409327347663164578291585108979386, time:1750768590.7467878s req_ids:[8] +DEBUG 06-24 20:36:30 [manager.py:391] +ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:213.41514587402344ms total_cost_time:213.4568691253662ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13799 prompt_cache_len:5151 prompt_cache_ratio:0.37328791941445033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 +DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:30 [batch.py:51] router release req id 8 +INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10862874984741211 s +INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s +DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=84280328490890618386309806894197726230, time:1750768590.9657977s req_ids:[8] +DEBUG 06-24 20:36:30 [manager.py:391] +ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:376.59168243408203ms total_cost_time:376.6331672668457ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13800 prompt_cache_len:5151 prompt_cache_ratio:0.37326086956521737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 +DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:31 [batch.py:51] router release req id 8 +INFO 06-24 20:36:31 [manager.py:224] router recive req id 8 cost time 0.10898637771606445 s +INFO 06-24 20:36:31 [manager.py:68] detokenization recv req id 8 cost time 0.11097908020019531 s +DEBUG 06-24 20:36:31 [manager.py:391] Prefill Batch: batch_id=235533212991741272577394618294583172855, time:1750768591.3503742s req_ids:[8] +DEBUG 06-24 20:36:31 [manager.py:391] +ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:215.60120582580566ms total_cost_time:215.64412117004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13801 prompt_cache_len:5151 prompt_cache_ratio:0.3732338236359684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 +DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:31 [batch.py:51] router release req id 8 +INFO 06-24 20:36:31 [manager.py:224] router recive req id 8 cost time 0.10780763626098633 s +INFO 06-24 20:36:31 [manager.py:68] detokenization recv req id 8 cost time 0.10966348648071289 s +DEBUG 06-24 20:36:31 [manager.py:391] Prefill Batch: batch_id=311589872824876410451743404516249018369, time:1750768591.5727096s req_ids:[8] +DEBUG 06-24 20:36:31 [manager.py:391] +ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:212.53657341003418ms total_cost_time:212.57829666137695ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13802 prompt_cache_len:5151 prompt_cache_ratio:0.3732067816258513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 +DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:31 [batch.py:51] router release req id 8 +INFO 06-24 20:36:31 [manager.py:224] router recive req id 8 cost time 0.10861492156982422 s +INFO 06-24 20:36:31 [manager.py:68] detokenization recv req id 8 cost time 0.11051177978515625 s +DEBUG 06-24 20:36:31 [manager.py:391] Prefill Batch: batch_id=180994089280192819091335178246480474487, time:1750768591.804878s req_ids:[8] +DEBUG 06-24 20:36:31 [manager.py:391] +ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:228.1324863433838ms total_cost_time:228.17659378051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13803 prompt_cache_len:5151 prompt_cache_ratio:0.37317974353401434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 +DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:31 [batch.py:51] router release req id 8 +INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.10872769355773926 s +INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.11066365242004395 s +DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=112263642498292028161908322246309690685, time:1750768592.0293806s req_ids:[8] +DEBUG 06-24 20:36:32 [manager.py:391] +ERROR 06-24 20:36:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:213.52338790893555ms total_cost_time:213.56630325317383ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13804 prompt_cache_len:5151 prompt_cache_ratio:0.3731527093596059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 +DEBUG 06-24 20:36:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:32 [batch.py:51] router release req id 8 +INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.10878634452819824 s +INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.11066055297851562 s +DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=105635199432557649071407602140511473266, time:1750768592.2499352s req_ids:[8] +DEBUG 06-24 20:36:32 [manager.py:391] +ERROR 06-24 20:36:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 first_token_cost:214.1869068145752ms total_cost_time:214.22958374023438ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13805 prompt_cache_len:5151 prompt_cache_ratio:0.37312567910177474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 +DEBUG 06-24 20:36:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:32 [batch.py:51] router release req id 8 +INFO 06-24 20:36:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.31102728843688965 s +INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.31314516067504883 s +DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=183878873506865024951223248494418614161, time:1750768592.6740313s req_ids:[8] +DEBUG 06-24 20:36:32 [manager.py:391] +ERROR 06-24 20:36:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 first_token_cost:424.18479919433594ms total_cost_time:424.2405891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:13806 prompt_cache_len:5151 prompt_cache_ratio:0.3730986527596697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 +DEBUG 06-24 20:36:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:32 [batch.py:51] router release req id 8 +INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.10816025733947754 s +INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.11016988754272461 s +DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=29119128694980337095192697942810118837, time:1750768592.9186168s req_ids:[8] +DEBUG 06-24 20:36:32 [manager.py:391] +ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 first_token_cost:233.24322700500488ms total_cost_time:233.28542709350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13807 prompt_cache_len:5151 prompt_cache_ratio:0.37307163033244006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 +DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:33 [batch.py:51] router release req id 8 +INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10830402374267578 s +INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.11042428016662598 s +DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=41890065043515425951854453205069601072, time:1750768593.1425343s req_ids:[8] +DEBUG 06-24 20:36:33 [manager.py:391] +ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:215.71850776672363ms total_cost_time:215.7607078552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13808 prompt_cache_len:5151 prompt_cache_ratio:0.3730446118192352 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 +DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:33 [batch.py:51] router release req id 8 +INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10834813117980957 s +INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.1104891300201416 s +DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=67945285421627667327401289378955552362, time:1750768593.365414s req_ids:[8] +DEBUG 06-24 20:36:33 [manager.py:391] +ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:213.4726047515869ms total_cost_time:213.51861953735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13809 prompt_cache_len:5151 prompt_cache_ratio:0.37301759721920486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 +DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:33 [batch.py:51] router release req id 8 +INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10859990119934082 s +INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.11068487167358398 s +DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=98712170673104117742996927683031159346, time:1750768593.5859854s req_ids:[8] +DEBUG 06-24 20:36:33 [manager.py:391] +ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:213.9601707458496ms total_cost_time:214.00165557861328ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13810 prompt_cache_len:5151 prompt_cache_ratio:0.3729905865314989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 +DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:33 [batch.py:51] router release req id 8 +DEBUG 06-24 20:36:33 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:33 [manager.py:283] +DEBUG 06-24 20:36:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:33 [manager.py:284] +INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10910415649414062 s +INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.11101317405700684 s +DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=309805742864741012871573084296122899615, time:1750768593.8196313s req_ids:[8] +DEBUG 06-24 20:36:33 [manager.py:391] +ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:229.74324226379395ms total_cost_time:229.78639602661133ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13811 prompt_cache_len:5151 prompt_cache_ratio:0.37296357975526756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 +DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:33 [batch.py:51] router release req id 8 +INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.1085505485534668 s +INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.11075186729431152 s +DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=115024517166425707963766146518802832767, time:1750768594.046607s req_ids:[8] +DEBUG 06-24 20:36:34 [manager.py:391] +ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:383.0831050872803ms total_cost_time:383.12697410583496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13812 prompt_cache_len:5151 prompt_cache_ratio:0.37293657688966114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 +DEBUG 06-24 20:36:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:34 [batch.py:51] router release req id 8 +INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.10866546630859375 s +INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.11062788963317871 s +DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=70541196149036493980534606155618423455, time:1750768594.4355533s req_ids:[8] +DEBUG 06-24 20:36:34 [manager.py:391] +ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:213.70959281921387ms total_cost_time:213.75131607055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13813 prompt_cache_len:5151 prompt_cache_ratio:0.37290957793383045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 +DEBUG 06-24 20:36:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:34 [batch.py:51] router release req id 8 +INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.10846161842346191 s +INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.11033034324645996 s +DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=289269212022392496069488863591953522820, time:1750768594.666673s req_ids:[8] +DEBUG 06-24 20:36:34 [manager.py:391] +ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:226.78232192993164ms total_cost_time:226.82499885559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13814 prompt_cache_len:5151 prompt_cache_ratio:0.37288258288692633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 +DEBUG 06-24 20:36:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:34 [batch.py:51] router release req id 8 +INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s +INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.1100611686706543 s +DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=147999321632835356861945877280988447644, time:1750768594.8904595s req_ids:[8] +DEBUG 06-24 20:36:34 [manager.py:391] +ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:217.1926498413086ms total_cost_time:217.23461151123047ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13815 prompt_cache_len:5151 prompt_cache_ratio:0.3728555917480999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 +DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:35 [batch.py:51] router release req id 8 +INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10816526412963867 s +INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.10985660552978516 s +DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=233781318575570960723504139025265551648, time:1750768595.113944s req_ids:[8] +DEBUG 06-24 20:36:35 [manager.py:391] +ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:174.32403564453125ms total_cost_time:174.36575889587402ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13816 prompt_cache_len:5151 prompt_cache_ratio:0.3728286045165026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 +DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:35 [batch.py:51] router release req id 8 +INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10708975791931152 s +INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.10892653465270996 s +DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=250347019763933290440288368824110902912, time:1750768595.2951615s req_ids:[8] +DEBUG 06-24 20:36:35 [manager.py:391] +DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:205.47962188720703ms total_cost_time:205.52539825439453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13817 prompt_cache_len:5151 prompt_cache_ratio:0.37280162119128607 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 +DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:35 [batch.py:51] router release req id 8 +INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10813355445861816 s +INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.11006927490234375 s +DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=197984814368252103898274975904288632155, time:1750768595.5067356s req_ids:[8] +DEBUG 06-24 20:36:35 [manager.py:391] +ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:376.85513496398926ms total_cost_time:376.89900398254395ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13818 prompt_cache_len:5151 prompt_cache_ratio:0.37277464177160224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 +DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:35 [batch.py:51] router release req id 8 +INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10777592658996582 s +INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.10974335670471191 s +DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=318426154573738710645627884375551040253, time:1750768595.8908987s req_ids:[8] +DEBUG 06-24 20:36:35 [manager.py:391] +ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:214.9968147277832ms total_cost_time:215.0406837463379ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13819 prompt_cache_len:5151 prompt_cache_ratio:0.3727476662566032 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 +DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:36 [batch.py:51] router release req id 8 +INFO 06-24 20:36:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.1091315746307373 s +INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.1110687255859375 s +DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=244079868142628014672219542702995731856, time:1750768596.1134791s req_ids:[8] +DEBUG 06-24 20:36:36 [manager.py:391] +ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:209.48410034179688ms total_cost_time:209.53011512756348ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13820 prompt_cache_len:5151 prompt_cache_ratio:0.3727206946454414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 +DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:36 [batch.py:51] router release req id 8 +INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.10849666595458984 s +INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.11051607131958008 s +DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=90662897453574747515343883787861614094, time:1750768596.330456s req_ids:[8] +DEBUG 06-24 20:36:36 [manager.py:391] +ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:209.34462547302246ms total_cost_time:209.39016342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13821 prompt_cache_len:5151 prompt_cache_ratio:0.3726937269372694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 +DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:36 [batch.py:51] router release req id 8 +INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.10779738426208496 s +INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.10990643501281738 s +DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=314333125187775010470799002796136271698, time:1750768596.5537639s req_ids:[8] +DEBUG 06-24 20:36:36 [manager.py:391] +ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:222.5472927093506ms total_cost_time:222.59163856506348ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13822 prompt_cache_len:5151 prompt_cache_ratio:0.37266676313124003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 +DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:36 [batch.py:51] router release req id 8 +INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.1085212230682373 s +INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.11033368110656738 s +DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=1105617874569855881666024151752766124, time:1750768596.779617s req_ids:[8] +DEBUG 06-24 20:36:36 [manager.py:391] +ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:216.29691123962402ms total_cost_time:216.3398265838623ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13823 prompt_cache_len:5151 prompt_cache_ratio:0.3726398032265065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 +DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:36 [batch.py:51] router release req id 8 +INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.1076047420501709 s +INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s +DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=92651499534998079335782367191799202241, time:1750768596.9991887s req_ids:[8] +DEBUG 06-24 20:36:36 [manager.py:391] +ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:371.75822257995605ms total_cost_time:371.80423736572266ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13824 prompt_cache_len:5151 prompt_cache_ratio:0.3726128472222222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 +DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:37 [batch.py:51] router release req id 8 +INFO 06-24 20:36:37 [manager.py:224] router recive req id 8 cost time 0.10924839973449707 s +INFO 06-24 20:36:37 [manager.py:68] detokenization recv req id 8 cost time 0.11116671562194824 s +DEBUG 06-24 20:36:37 [manager.py:391] Prefill Batch: batch_id=160755598365206827216034568632498794718, time:1750768597.3777874s req_ids:[8] +DEBUG 06-24 20:36:37 [manager.py:391] +ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:215.9874439239502ms total_cost_time:216.0325050354004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13825 prompt_cache_len:5151 prompt_cache_ratio:0.3725858951175407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 +DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:37 [batch.py:51] router release req id 8 +INFO 06-24 20:36:37 [manager.py:224] router recive req id 8 cost time 0.10918378829956055 s +INFO 06-24 20:36:37 [manager.py:68] detokenization recv req id 8 cost time 0.1112680435180664 s +DEBUG 06-24 20:36:37 [manager.py:391] Prefill Batch: batch_id=239155918142721663883829082087636575407, time:1750768597.5996947s req_ids:[8] +DEBUG 06-24 20:36:37 [manager.py:391] +ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:212.81790733337402ms total_cost_time:212.8608226776123ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13826 prompt_cache_len:5151 prompt_cache_ratio:0.3725589469116158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 +DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:37 [batch.py:51] router release req id 8 +INFO 06-24 20:36:37 [manager.py:224] router recive req id 8 cost time 0.10921144485473633 s +INFO 06-24 20:36:37 [manager.py:68] detokenization recv req id 8 cost time 0.11151456832885742 s +DEBUG 06-24 20:36:37 [manager.py:391] Prefill Batch: batch_id=324467070733286231624022932105240028778, time:1750768597.8255916s req_ids:[8] +DEBUG 06-24 20:36:37 [manager.py:391] +ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:219.73276138305664ms total_cost_time:219.77734565734863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13827 prompt_cache_len:5151 prompt_cache_ratio:0.37253200260360164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 +DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:37 [batch.py:51] router release req id 8 +INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10887455940246582 s +INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.11089420318603516 s +DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=39050962449329032085341850144249384485, time:1750768598.0483422s req_ids:[8] +DEBUG 06-24 20:36:38 [manager.py:391] +ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:215.53349494934082ms total_cost_time:215.5766487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13828 prompt_cache_len:5151 prompt_cache_ratio:0.3725050621926526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 +DEBUG 06-24 20:36:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:38 [batch.py:51] router release req id 8 +INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10919857025146484 s +INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.11104846000671387 s +DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=244719723756609903526087332932343750787, time:1750768598.269116s req_ids:[8] +DEBUG 06-24 20:36:38 [manager.py:391] +ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:372.01786041259766ms total_cost_time:372.06339836120605ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13829 prompt_cache_len:5151 prompt_cache_ratio:0.3724781256779232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 +DEBUG 06-24 20:36:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:38 [batch.py:51] router release req id 8 +INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10878634452819824 s +INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.11073040962219238 s +DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=230503635265953737339175586556390144311, time:1750768598.6486042s req_ids:[8] +DEBUG 06-24 20:36:38 [manager.py:391] +ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:214.09201622009277ms total_cost_time:214.13612365722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13830 prompt_cache_len:5151 prompt_cache_ratio:0.37245119305856833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 +DEBUG 06-24 20:36:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:38 [batch.py:51] router release req id 8 +INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10853958129882812 s +INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s +DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=5213523359176640098225327946997009545, time:1750768598.87044s req_ids:[8] +DEBUG 06-24 20:36:38 [manager.py:391] +DEBUG 06-24 20:36:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 55095.016 tokens/s +DEBUG 06-24 20:36:38 [stats.py:37] Avg prompt tokens throughput: 55087.039 tokens/s +DEBUG 06-24 20:36:38 [stats.py:37] Avg generate tokens throughput: 7.977 tokens/s +ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:216.27163887023926ms total_cost_time:216.31717681884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13831 prompt_cache_len:5151 prompt_cache_ratio:0.37242426433374304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 +DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:39 [batch.py:51] router release req id 8 +INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10915207862854004 s +INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.11117720603942871 s +DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=147498267109635348951056480653060794688, time:1750768599.0980783s req_ids:[8] +DEBUG 06-24 20:36:39 [manager.py:391] +ERROR 06-24 20:36:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:221.34017944335938ms total_cost_time:221.38333320617676ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13832 prompt_cache_len:5151 prompt_cache_ratio:0.3723973395026027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 +DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:39 [batch.py:51] router release req id 8 +INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10929393768310547 s +INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.11136937141418457 s +DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=199183532784738756898525049911494007056, time:1750768599.3201354s req_ids:[8] +DEBUG 06-24 20:36:39 [manager.py:391] +ERROR 06-24 20:36:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 first_token_cost:213.8350009918213ms total_cost_time:213.87910842895508ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13833 prompt_cache_len:5151 prompt_cache_ratio:0.37237041856430275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 +DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:39 [batch.py:51] router release req id 8 +INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10877561569213867 s +INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.11092090606689453 s +DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=160099388801816922247152124771278877809, time:1750768599.540271s req_ids:[8] +DEBUG 06-24 20:36:39 [manager.py:391] +ERROR 06-24 20:36:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 first_token_cost:224.49755668640137ms total_cost_time:224.55739974975586ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:13834 prompt_cache_len:5151 prompt_cache_ratio:0.3723435015179991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 +DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:39 [batch.py:51] router release req id 8 +INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10746002197265625 s +INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.10963702201843262 s +DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=8477697362157091446705166925786408555, time:1750768599.7746449s req_ids:[8] +DEBUG 06-24 20:36:39 [manager.py:391] +ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 first_token_cost:390.53845405578613ms total_cost_time:390.60425758361816ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:13835 prompt_cache_len:5151 prompt_cache_ratio:0.37231658836284787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 +DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:40 [batch.py:51] router release req id 8 +INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10922026634216309 s +INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11135053634643555 s +DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=296883022349961401182770048571305628401, time:1750768600.1689537s req_ids:[8] +DEBUG 06-24 20:36:40 [manager.py:391] +ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:214.67876434326172ms total_cost_time:214.7202491760254ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13836 prompt_cache_len:5151 prompt_cache_ratio:0.3722896790980052 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 +DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:40 [batch.py:51] router release req id 8 +INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10816478729248047 s +INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11014556884765625 s +DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=186959692393502773127962698170287084022, time:1750768600.3898444s req_ids:[8] +DEBUG 06-24 20:36:40 [manager.py:391] +ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:214.65086936950684ms total_cost_time:214.69521522521973ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13837 prompt_cache_len:5151 prompt_cache_ratio:0.3722627737226277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 +DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:40 [batch.py:51] router release req id 8 +INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10806059837341309 s +INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11001014709472656 s +DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=151593025992029793929327637998234458817, time:1750768600.6118808s req_ids:[8] +DEBUG 06-24 20:36:40 [manager.py:391] +ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:215.38972854614258ms total_cost_time:215.43407440185547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13838 prompt_cache_len:5151 prompt_cache_ratio:0.37223587223587223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 +DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:40 [batch.py:51] router release req id 8 +INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10880422592163086 s +INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11088061332702637 s +DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=284111348199580201809267296856892105182, time:1750768600.833257s req_ids:[8] +DEBUG 06-24 20:36:40 [manager.py:391] +ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:213.85598182678223ms total_cost_time:213.8974666595459ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13839 prompt_cache_len:5151 prompt_cache_ratio:0.37220897463689573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 +DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:40 [batch.py:51] router release req id 8 +INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.1089944839477539 s +INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.11111688613891602 s +DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=229560012411902646121446241335715972761, time:1750768601.0539868s req_ids:[8] +DEBUG 06-24 20:36:41 [manager.py:391] +ERROR 06-24 20:36:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:215.2853012084961ms total_cost_time:215.3306007385254ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13840 prompt_cache_len:5151 prompt_cache_ratio:0.3721820809248555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 +DEBUG 06-24 20:36:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:41 [batch.py:51] router release req id 8 +INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.1082298755645752 s +INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.10961794853210449 s +DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=191000643797667570464130565803862420073, time:1750768601.275609s req_ids:[8] +DEBUG 06-24 20:36:41 [manager.py:391] +ERROR 06-24 20:36:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 first_token_cost:387.82382011413574ms total_cost_time:387.8672122955322ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13841 prompt_cache_len:5151 prompt_cache_ratio:0.37215519109890904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 +DEBUG 06-24 20:36:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:41 [batch.py:51] router release req id 8 +INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.10863351821899414 s +INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.10999798774719238 s +DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=337285015877567928872858512179850117286, time:1750768601.6697965s req_ids:[8] +DEBUG 06-24 20:36:41 [manager.py:391] +DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 first_token_cost:215.64030647277832ms total_cost_time:215.6827449798584ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13842 prompt_cache_len:5151 prompt_cache_ratio:0.37212830515821416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 +DEBUG 06-24 20:36:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:41 [batch.py:51] router release req id 8 +INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.10800719261169434 s +INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.10941600799560547 s +DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=188499312184582458259122916032242095939, time:1750768601.9067285s req_ids:[8] +DEBUG 06-24 20:36:41 [manager.py:391] +ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 first_token_cost:229.72512245178223ms total_cost_time:229.77089881896973ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13843 prompt_cache_len:5151 prompt_cache_ratio:0.3721014231019288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 +DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:42 [batch.py:51] router release req id 8 +INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.10821342468261719 s +INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.1094977855682373 s +DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=200937286312493235099449575281552991846, time:1750768602.1284418s req_ids:[8] +DEBUG 06-24 20:36:42 [manager.py:391] +ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:206.97951316833496ms total_cost_time:207.02362060546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13844 prompt_cache_len:5151 prompt_cache_ratio:0.3720745449292112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 +DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:42 [batch.py:51] router release req id 8 +INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.1086421012878418 s +INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s +DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=168031610327138770871967588498501665204, time:1750768602.3416069s req_ids:[8] +DEBUG 06-24 20:36:42 [manager.py:391] +ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:213.4993076324463ms total_cost_time:213.54246139526367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13845 prompt_cache_len:5151 prompt_cache_ratio:0.37204767063921995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 +DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:42 [batch.py:51] router release req id 8 +INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.10858964920043945 s +INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.10977530479431152 s +DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=267902443673609443096683853817627647410, time:1750768602.5630279s req_ids:[8] +DEBUG 06-24 20:36:42 [manager.py:391] +ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:215.15250205993652ms total_cost_time:215.1954174041748ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13846 prompt_cache_len:5151 prompt_cache_ratio:0.37202080023111367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 +DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:42 [batch.py:51] router release req id 8 +INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.10838627815246582 s +INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.10961270332336426 s +DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=49978266871827866954662014980714517645, time:1750768602.7835395s req_ids:[8] +DEBUG 06-24 20:36:42 [manager.py:391] +ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:379.227876663208ms total_cost_time:379.2712688446045ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13847 prompt_cache_len:5151 prompt_cache_ratio:0.3719939337040514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 +DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:43 [batch.py:51] router release req id 8 +INFO 06-24 20:36:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10920262336730957 s +INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s +DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=289699605286671532784967002474739684144, time:1750768603.169512s req_ids:[8] +DEBUG 06-24 20:36:43 [manager.py:391] +ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:213.08422088623047ms total_cost_time:213.12761306762695ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13848 prompt_cache_len:5151 prompt_cache_ratio:0.3719670710571924 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 +DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:43 [batch.py:51] router release req id 8 +INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10862970352172852 s +INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.11002016067504883 s +DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=142644558757226871491006825729974212818, time:1750768603.3885543s req_ids:[8] +DEBUG 06-24 20:36:43 [manager.py:391] +ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:209.75327491760254ms total_cost_time:209.79762077331543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13849 prompt_cache_len:5151 prompt_cache_ratio:0.371940212289696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 +DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:43 [batch.py:51] router release req id 8 +INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10832548141479492 s +INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.11029887199401855 s +DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=3030303708089589036805158924491963015, time:1750768603.6026955s req_ids:[8] +DEBUG 06-24 20:36:43 [manager.py:391] +ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:210.84880828857422ms total_cost_time:210.8936309814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13850 prompt_cache_len:5151 prompt_cache_ratio:0.371913357400722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 +DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:43 [batch.py:51] router release req id 8 +INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10785269737243652 s +INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.10970473289489746 s +DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=122020811646544496219407959949211508329, time:1750768603.8215182s req_ids:[8] +DEBUG 06-24 20:36:43 [manager.py:391] +ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:209.90657806396484ms total_cost_time:209.95044708251953ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13851 prompt_cache_len:5151 prompt_cache_ratio:0.3718865063894304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 +DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:43 [batch.py:51] router release req id 8 +INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.10866403579711914 s +INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.11066794395446777 s +DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=312639360194585937348427258759687999681, time:1750768604.0372434s req_ids:[8] +DEBUG 06-24 20:36:44 [manager.py:391] +ERROR 06-24 20:36:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:210.7105255126953ms total_cost_time:210.75439453125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13852 prompt_cache_len:5151 prompt_cache_ratio:0.3718596592549812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 +DEBUG 06-24 20:36:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:44 [batch.py:51] router release req id 8 +INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.3080918788909912 s +INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.30947160720825195 s +DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=142262630836884404567964149602708608441, time:1750768604.4608305s req_ids:[8] +DEBUG 06-24 20:36:44 [manager.py:391] +ERROR 06-24 20:36:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 first_token_cost:427.9770851135254ms total_cost_time:428.0211925506592ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13853 prompt_cache_len:5151 prompt_cache_ratio:0.37183281599653506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 +DEBUG 06-24 20:36:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:44 [batch.py:51] router release req id 8 +INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.10920572280883789 s +INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.11062884330749512 s +DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=103213169192415222501952943854519291786, time:1750768604.6888561s req_ids:[8] +DEBUG 06-24 20:36:44 [manager.py:391] +ERROR 06-24 20:36:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 first_token_cost:213.28186988830566ms total_cost_time:213.32693099975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13854 prompt_cache_len:5151 prompt_cache_ratio:0.3718059766132525 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 +DEBUG 06-24 20:36:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:44 [batch.py:51] router release req id 8 +INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.10837268829345703 s +INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.10966801643371582 s +DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=195920442552411858775117078734650647630, time:1750768604.9104292s req_ids:[8] +DEBUG 06-24 20:36:44 [manager.py:391] +ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 first_token_cost:210.65402030944824ms total_cost_time:210.69741249084473ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13855 prompt_cache_len:5151 prompt_cache_ratio:0.3717791411042945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 +DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:45 [batch.py:51] router release req id 8 +INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.1070396900177002 s +INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10830545425415039 s +DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=109166986856396400011509957237007307800, time:1750768605.126659s req_ids:[8] +DEBUG 06-24 20:36:45 [manager.py:391] +ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:211.00258827209473ms total_cost_time:211.0462188720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13856 prompt_cache_len:5151 prompt_cache_ratio:0.37175230946882215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 +DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:45 [batch.py:51] router release req id 8 +INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s +INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10932469367980957 s +DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=57315085074251537041873232839668833102, time:1750768605.345094s req_ids:[8] +DEBUG 06-24 20:36:45 [manager.py:391] +ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:209.37705039978027ms total_cost_time:209.42258834838867ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13857 prompt_cache_len:5151 prompt_cache_ratio:0.37172548170599695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 +DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:45 [batch.py:51] router release req id 8 +INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.10824728012084961 s +INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10945773124694824 s +DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=35133569341582258199255030593270971933, time:1750768605.5611644s req_ids:[8] +DEBUG 06-24 20:36:45 [manager.py:391] +ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:212.16273307800293ms total_cost_time:212.2042179107666ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13858 prompt_cache_len:5151 prompt_cache_ratio:0.3716986578149805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 +DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:45 [batch.py:51] router release req id 8 +INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.1073606014251709 s +INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10857224464416504 s +DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=210010580968553470525848017144573051964, time:1750768605.779655s req_ids:[8] +DEBUG 06-24 20:36:45 [manager.py:391] +ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:382.59005546569824ms total_cost_time:382.6336860656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13859 prompt_cache_len:5151 prompt_cache_ratio:0.3716718377949347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 +DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:46 [batch.py:51] router release req id 8 +INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s +INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.10991430282592773 s +DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=89827057632358311562881479544441812275, time:1750768606.1698174s req_ids:[8] +DEBUG 06-24 20:36:46 [manager.py:391] +ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:212.22734451293945ms total_cost_time:212.27073669433594ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13860 prompt_cache_len:5151 prompt_cache_ratio:0.37164502164502167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 +DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:46 [batch.py:51] router release req id 8 +INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10798072814941406 s +INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.1090841293334961 s +DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=188340965640538397083371885415216526966, time:1750768606.3860781s req_ids:[8] +DEBUG 06-24 20:36:46 [manager.py:391] +ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:168.92218589782715ms total_cost_time:168.96438598632812ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13861 prompt_cache_len:5151 prompt_cache_ratio:0.3716182093644037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 +DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:46 [batch.py:51] router release req id 8 +INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10789132118225098 s +INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.10906100273132324 s +DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=339886357382940863562481625270787972175, time:1750768606.5642962s req_ids:[8] +DEBUG 06-24 20:36:46 [manager.py:391] +ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:212.13865280151367ms total_cost_time:212.16773986816406ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:13862 prompt_cache_len:5151 prompt_cache_ratio:0.3715914009522435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 +DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:46 [batch.py:51] router release req id 8 +INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s +INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.10993623733520508 s +DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=132624696046900502541277819093076895869, time:1750768606.7819715s req_ids:[8] +DEBUG 06-24 20:36:46 [manager.py:391] +ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:217.6229953765869ms total_cost_time:217.6668643951416ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13863 prompt_cache_len:5151 prompt_cache_ratio:0.37156459640770395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 +DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:46 [batch.py:51] router release req id 8 +INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10865283012390137 s +INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.10985779762268066 s +DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=186907816647178689546833043204723998337, time:1750768607.0049124s req_ids:[8] +DEBUG 06-24 20:36:47 [manager.py:391] +ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:207.49974250793457ms total_cost_time:207.5352668762207ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:13864 prompt_cache_len:5151 prompt_cache_ratio:0.37153779572994805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 +DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:47 [batch.py:51] router release req id 8 +INFO 06-24 20:36:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10771298408508301 s +INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.10902643203735352 s +DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=200525133305156964602392608855734386426, time:1750768607.2232866s req_ids:[8] +DEBUG 06-24 20:36:47 [manager.py:391] +ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:384.4027519226074ms total_cost_time:384.4466209411621ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13865 prompt_cache_len:5151 prompt_cache_ratio:0.3715109989181392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 +DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:47 [batch.py:51] router release req id 8 +INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10760974884033203 s +INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.10900616645812988 s +DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=268770721857622098822176136249418496627, time:1750768607.6141279s req_ids:[8] +DEBUG 06-24 20:36:47 [manager.py:391] +ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:216.7975902557373ms total_cost_time:216.8421745300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13866 prompt_cache_len:5151 prompt_cache_ratio:0.37148420597144094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 +DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:47 [batch.py:51] router release req id 8 +INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10858702659606934 s +INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.1098475456237793 s +DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=186302833766340318518789690420728412987, time:1750768607.8367853s req_ids:[8] +DEBUG 06-24 20:36:47 [manager.py:391] +ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:206.63022994995117ms total_cost_time:206.67338371276855ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13867 prompt_cache_len:5151 prompt_cache_ratio:0.37145741688901707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 +DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:47 [batch.py:51] router release req id 8 +INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10882186889648438 s +INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.11002111434936523 s +DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=265756992099880502930729079153832710322, time:1750768608.050007s req_ids:[8] +DEBUG 06-24 20:36:48 [manager.py:391] +ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:210.35456657409668ms total_cost_time:210.39581298828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13868 prompt_cache_len:5151 prompt_cache_ratio:0.3714306316700317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 +DEBUG 06-24 20:36:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:48 [batch.py:51] router release req id 8 +INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10707640647888184 s +INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.1083369255065918 s +DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=231235172953752515194520881944545861718, time:1750768608.2670817s req_ids:[8] +DEBUG 06-24 20:36:48 [manager.py:391] +ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:214.75696563720703ms total_cost_time:214.7994041442871ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13869 prompt_cache_len:5151 prompt_cache_ratio:0.37140385031364914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 +DEBUG 06-24 20:36:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:48 [batch.py:51] router release req id 8 +INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10814547538757324 s +INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.10944437980651855 s +DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=161192682418519083200897260738698253611, time:1750768608.4875064s req_ids:[8] +DEBUG 06-24 20:36:48 [manager.py:391] +ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:215.72375297546387ms total_cost_time:215.76738357543945ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13870 prompt_cache_len:5151 prompt_cache_ratio:0.37137707281903387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 +DEBUG 06-24 20:36:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:48 [batch.py:51] router release req id 8 +INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10857295989990234 s +INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.10999202728271484 s +DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=232016903002391139886058467049020045805, time:1750768608.7108488s req_ids:[8] +DEBUG 06-24 20:36:48 [manager.py:391] +ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:36:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 54887.019 tokens/s +DEBUG 06-24 20:36:48 [stats.py:37] Avg prompt tokens throughput: 54878.996 tokens/s +DEBUG 06-24 20:36:48 [stats.py:37] Avg generate tokens throughput: 8.023 tokens/s +INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:373.06737899780273ms total_cost_time:373.08692932128906ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13871 prompt_cache_len:5151 prompt_cache_ratio:0.37135029918535073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 +DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:49 [batch.py:51] router release req id 8 +INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10815143585205078 s +INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.10949420928955078 s +DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=37410786281659214582087252740634380895, time:1750768609.090279s req_ids:[8] +DEBUG 06-24 20:36:49 [manager.py:391] +ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:215.4695987701416ms total_cost_time:215.5132293701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13872 prompt_cache_len:5151 prompt_cache_ratio:0.3713235294117647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 +DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:49 [batch.py:51] router release req id 8 +INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10899043083190918 s +INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.11033916473388672 s +DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=159139081636568805356092117868464190700, time:1750768609.3111503s req_ids:[8] +DEBUG 06-24 20:36:49 [manager.py:391] +DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:215.3482437133789ms total_cost_time:215.37446975708008ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:13873 prompt_cache_len:5151 prompt_cache_ratio:0.37129676349744106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 +DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:49 [batch.py:51] router release req id 8 +INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10642743110656738 s +INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.10766005516052246 s +DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=199949266094131816080751630877071451636, time:1750768609.5347183s req_ids:[8] +DEBUG 06-24 20:36:49 [manager.py:391] +ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:217.93293952941895ms total_cost_time:217.97633171081543ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13874 prompt_cache_len:5151 prompt_cache_ratio:0.3712700014415453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 +DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:49 [batch.py:51] router release req id 8 +INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10874176025390625 s +INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.1100764274597168 s +DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=111480326396203791283489163708786483399, time:1750768609.7561395s req_ids:[8] +DEBUG 06-24 20:36:49 [manager.py:391] +ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:209.78474617004395ms total_cost_time:209.82933044433594ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13875 prompt_cache_len:5151 prompt_cache_ratio:0.37124324324324326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 +DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:49 [batch.py:51] router release req id 8 +INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.1075584888458252 s +INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.10899543762207031 s +DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=136101432281230046515112299127644339559, time:1750768609.9733312s req_ids:[8] +DEBUG 06-24 20:36:49 [manager.py:391] +ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:382.25364685058594ms total_cost_time:382.2791576385498ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:13876 prompt_cache_len:5151 prompt_cache_ratio:0.3712164889017008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 +DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:50 [batch.py:51] router release req id 8 +INFO 06-24 20:36:50 [manager.py:224] router recive req id 8 cost time 0.10883426666259766 s +INFO 06-24 20:36:50 [manager.py:68] detokenization recv req id 8 cost time 0.11017227172851562 s +DEBUG 06-24 20:36:50 [manager.py:391] Prefill Batch: batch_id=24144117691648633256556517425554290391, time:1750768610.3624375s req_ids:[8] +DEBUG 06-24 20:36:50 [manager.py:391] +ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:208.5421085357666ms total_cost_time:208.5871696472168ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13877 prompt_cache_len:5151 prompt_cache_ratio:0.37118973841608416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 +DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:50 [batch.py:51] router release req id 8 +INFO 06-24 20:36:50 [manager.py:224] router recive req id 8 cost time 0.1082761287689209 s +INFO 06-24 20:36:50 [manager.py:68] detokenization recv req id 8 cost time 0.10960030555725098 s +DEBUG 06-24 20:36:50 [manager.py:391] Prefill Batch: batch_id=6685709028180030372752286962180717499, time:1750768610.5792215s req_ids:[8] +DEBUG 06-24 20:36:50 [manager.py:391] +ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:218.780517578125ms total_cost_time:218.82271766662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13878 prompt_cache_len:5151 prompt_cache_ratio:0.3711629917855599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 +DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:50 [batch.py:51] router release req id 8 +INFO 06-24 20:36:50 [manager.py:224] router recive req id 8 cost time 0.10855388641357422 s +INFO 06-24 20:36:50 [manager.py:68] detokenization recv req id 8 cost time 0.10982894897460938 s +DEBUG 06-24 20:36:50 [manager.py:391] Prefill Batch: batch_id=187144056471015864740758622640006219685, time:1750768610.7995799s req_ids:[8] +DEBUG 06-24 20:36:50 [manager.py:391] +ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:211.32278442382812ms total_cost_time:211.3661766052246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13879 prompt_cache_len:5151 prompt_cache_ratio:0.3711362490092946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 +DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:50 [batch.py:51] router release req id 8 +INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.10867500305175781 s +INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.11008906364440918 s +DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=294414627366519872799643503579475874248, time:1750768611.0180745s req_ids:[8] +DEBUG 06-24 20:36:51 [manager.py:391] +ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:217.03696250915527ms total_cost_time:217.0562744140625ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13880 prompt_cache_len:5151 prompt_cache_ratio:0.3711095100864553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 +DEBUG 06-24 20:36:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:51 [batch.py:51] router release req id 8 +INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.10822868347167969 s +INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.10952901840209961 s +DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=219237195035548130982560565690740558908, time:1750768611.242653s req_ids:[8] +DEBUG 06-24 20:36:51 [manager.py:391] +ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:211.5941047668457ms total_cost_time:211.6408348083496ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13881 prompt_cache_len:5151 prompt_cache_ratio:0.37108277501620923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 +DEBUG 06-24 20:36:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:51 [batch.py:51] router release req id 8 +INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.10779047012329102 s +INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.10905647277832031 s +DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=241865483528386857085454563276463318729, time:1750768611.4681647s req_ids:[8] +DEBUG 06-24 20:36:51 [manager.py:391] +ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:398.95081520080566ms total_cost_time:398.99611473083496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13882 prompt_cache_len:5151 prompt_cache_ratio:0.37105604379772367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 +DEBUG 06-24 20:36:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:51 [batch.py:51] router release req id 8 +INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.1078193187713623 s +INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.10904097557067871 s +DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=206987606130043560835733932791108416096, time:1750768611.8649607s req_ids:[8] +DEBUG 06-24 20:36:51 [manager.py:391] +ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:214.71190452575684ms total_cost_time:214.7543430328369ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13883 prompt_cache_len:5151 prompt_cache_ratio:0.3710293164301664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 +DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:52 [batch.py:51] router release req id 8 +INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10795164108276367 s +INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10919690132141113 s +DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=112064123180520990523614817275728448218, time:1750768612.0888593s req_ids:[8] +DEBUG 06-24 20:36:52 [manager.py:391] +ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:210.96420288085938ms total_cost_time:210.99615097045898ms,out_token_counter:1 mean_per_token_cost_time: 0.031948089599609375ms prompt_token_num:13884 prompt_cache_len:5151 prompt_cache_ratio:0.37100259291270526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 +DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:52 [batch.py:51] router release req id 8 +INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10783886909484863 s +INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s +DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=230343888146461684206695859909805039429, time:1750768612.3059282s req_ids:[8] +DEBUG 06-24 20:36:52 [manager.py:391] +ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:217.18120574951172ms total_cost_time:217.2250747680664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13885 prompt_cache_len:5151 prompt_cache_ratio:0.3709758732445085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 +DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:52 [batch.py:51] router release req id 8 +INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10831451416015625 s +INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10951972007751465 s +DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=114049861170827987490525037949853720883, time:1750768612.5294292s req_ids:[8] +DEBUG 06-24 20:36:52 [manager.py:391] +ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:211.51065826416016ms total_cost_time:211.55595779418945ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13886 prompt_cache_len:5151 prompt_cache_ratio:0.3709491574247443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 +DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:52 [batch.py:51] router release req id 8 +INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10798811912536621 s +INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10921406745910645 s +DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=58079987568008194806202508436541762270, time:1750768612.7473814s req_ids:[8] +DEBUG 06-24 20:36:52 [manager.py:391] +ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:215.62433242797852ms total_cost_time:215.6665325164795ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13887 prompt_cache_len:5151 prompt_cache_ratio:0.3709224454525816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 +DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:52 [batch.py:51] router release req id 8 +INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10847687721252441 s +INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10969829559326172 s +DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=6530909416576735998434985069341649683, time:1750768612.969092s req_ids:[8] +DEBUG 06-24 20:36:52 [manager.py:391] +INFO 06-24 20:36:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:36:53 [statics_utils.py:24] mean first cost: 231.35835489509327 ms +INFO 06-24 20:36:53 [statics_utils.py:24] mean per token cost: 0.058900548764697525 ms +ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:386.46578788757324ms total_cost_time:386.4884376525879ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:13888 prompt_cache_len:5151 prompt_cache_ratio:0.37089573732718895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 +DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:53 [batch.py:51] router release req id 8 +INFO 06-24 20:36:53 [manager.py:224] router recive req id 8 cost time 0.10816502571105957 s +INFO 06-24 20:36:53 [manager.py:68] detokenization recv req id 8 cost time 0.1094818115234375 s +DEBUG 06-24 20:36:53 [manager.py:391] Prefill Batch: batch_id=58321234920487974876483567044486685029, time:1750768613.3610613s req_ids:[8] +DEBUG 06-24 20:36:53 [manager.py:391] +INFO 06-24 20:36:53 [manager.py:620] left req id 8can release False refcount 3 +ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:214.6594524383545ms total_cost_time:214.7047519683838ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13889 prompt_cache_len:5151 prompt_cache_ratio:0.3708690330477356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 +DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:53 [batch.py:51] router release req id 8 +INFO 06-24 20:36:53 [manager.py:224] router recive req id 8 cost time 0.109405517578125 s +INFO 06-24 20:36:53 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s +DEBUG 06-24 20:36:53 [manager.py:391] Prefill Batch: batch_id=204776427042440465875919434420798862312, time:1750768613.5832922s req_ids:[8] +DEBUG 06-24 20:36:53 [manager.py:391] +ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:214.6458625793457ms total_cost_time:214.6894931793213ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13890 prompt_cache_len:5151 prompt_cache_ratio:0.37084233261339095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 +DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:53 [batch.py:51] router release req id 8 +INFO 06-24 20:36:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:53 [manager.py:224] router recive req id 8 cost time 0.10839581489562988 s +INFO 06-24 20:36:53 [manager.py:68] detokenization recv req id 8 cost time 0.10965251922607422 s +DEBUG 06-24 20:36:53 [manager.py:391] Prefill Batch: batch_id=229446621718952886065594471646342434207, time:1750768613.8037827s req_ids:[8] +DEBUG 06-24 20:36:53 [manager.py:391] +ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:213.01984786987305ms total_cost_time:213.06324005126953ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13891 prompt_cache_len:5151 prompt_cache_ratio:0.37081563602332446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 +DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:53 [batch.py:51] router release req id 8 +INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.10820651054382324 s +INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.10950016975402832 s +DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=55510846210750215855701179922266833293, time:1750768614.0232441s req_ids:[8] +DEBUG 06-24 20:36:54 [manager.py:391] +ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:213.20819854736328ms total_cost_time:213.25016021728516ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13892 prompt_cache_len:5151 prompt_cache_ratio:0.37078894327670603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 +DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:54 [batch.py:51] router release req id 8 +INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.1090688705444336 s +INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.1103658676147461 s +DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=224121525366343885475807795940245334970, time:1750768614.2442517s req_ids:[8] +DEBUG 06-24 20:36:54 [manager.py:391] +ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:215.5001163482666ms total_cost_time:215.5439853668213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13893 prompt_cache_len:5151 prompt_cache_ratio:0.37076225437270566 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 +DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:54 [batch.py:51] router release req id 8 +INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.10854649543762207 s +INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.10979890823364258 s +DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=103245858121844457271003553170276015467, time:1750768614.465397s req_ids:[8] +DEBUG 06-24 20:36:54 [manager.py:391] +ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:375.079870223999ms total_cost_time:375.1246929168701ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13894 prompt_cache_len:5151 prompt_cache_ratio:0.37073556931049373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 +DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:54 [batch.py:51] router release req id 8 +INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.1083076000213623 s +INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.10956525802612305 s +DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=270065926866823590835036049741054669318, time:1750768614.8458757s req_ids:[8] +DEBUG 06-24 20:36:54 [manager.py:391] +ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:214.2786979675293ms total_cost_time:214.324951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13895 prompt_cache_len:5151 prompt_cache_ratio:0.37070888808924074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 +DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:54 [batch.py:51] router release req id 8 +INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10884308815002441 s +INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.11015510559082031 s +DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=107251159687813623318187323971743775426, time:1750768615.0668144s req_ids:[8] +DEBUG 06-24 20:36:55 [manager.py:391] +ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:215.04974365234375ms total_cost_time:215.08145332336426ms,out_token_counter:1 mean_per_token_cost_time: 0.03170967102050781ms prompt_token_num:13896 prompt_cache_len:5151 prompt_cache_ratio:0.37068221070811747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 +DEBUG 06-24 20:36:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:55 [batch.py:51] router release req id 8 +INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10804367065429688 s +INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.10992789268493652 s +DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=127448916255251244232496196437365392613, time:1750768615.2896206s req_ids:[8] +DEBUG 06-24 20:36:55 [manager.py:391] +ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:214.72954750061035ms total_cost_time:214.77460861206055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13897 prompt_cache_len:5151 prompt_cache_ratio:0.3706555371662949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 +DEBUG 06-24 20:36:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:55 [batch.py:51] router release req id 8 +INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10825395584106445 s +INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.11022281646728516 s +DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=290071268696187492290421224949492356894, time:1750768615.5092902s req_ids:[8] +DEBUG 06-24 20:36:55 [manager.py:391] +DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:210.6313705444336ms total_cost_time:210.67380905151367ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13898 prompt_cache_len:5151 prompt_cache_ratio:0.3706288674629443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 +DEBUG 06-24 20:36:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:55 [batch.py:51] router release req id 8 +INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10738468170166016 s +INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.10920095443725586 s +DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=252098724588724672500571257332623524243, time:1750768615.72714s req_ids:[8] +DEBUG 06-24 20:36:55 [manager.py:391] +ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:212.07666397094727ms total_cost_time:212.12005615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13899 prompt_cache_len:5151 prompt_cache_ratio:0.3706022015972372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 +DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:56 [batch.py:51] router release req id 8 +INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.3101315498352051 s +INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.31182432174682617 s +DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=172063619910785978849918027681875703976, time:1750768616.157644s req_ids:[8] +DEBUG 06-24 20:36:56 [manager.py:391] +ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:402.6494026184082ms total_cost_time:402.6925563812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13900 prompt_cache_len:5151 prompt_cache_ratio:0.3705755395683453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 +DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:56 [batch.py:51] router release req id 8 +INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10751199722290039 s +INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.10923504829406738 s +DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=198092726942894691672229761749615424879, time:1750768616.3572998s req_ids:[8] +DEBUG 06-24 20:36:56 [manager.py:391] +ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:207.09609985351562ms total_cost_time:207.1397304534912ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13901 prompt_cache_len:5151 prompt_cache_ratio:0.3705488813754406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 +DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:56 [batch.py:51] router release req id 8 +INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10843300819396973 s +INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.11042499542236328 s +DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=225518704000031855525121671779690939624, time:1750768616.5707202s req_ids:[8] +DEBUG 06-24 20:36:56 [manager.py:391] +ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:204.12731170654297ms total_cost_time:204.17332649230957ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13902 prompt_cache_len:5151 prompt_cache_ratio:0.37052222701769527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 +DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:56 [batch.py:51] router release req id 8 +INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s +INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.10921597480773926 s +DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=288969984037072555622116783616017625840, time:1750768616.7826922s req_ids:[8] +DEBUG 06-24 20:36:56 [manager.py:391] +ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:206.526517868042ms total_cost_time:206.56943321228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13903 prompt_cache_len:5151 prompt_cache_ratio:0.37049557649428183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 +DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:56 [batch.py:51] router release req id 8 +INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10819458961486816 s +INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.11012554168701172 s +DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=217585346015029125276120308083418588188, time:1750768616.9968886s req_ids:[8] +DEBUG 06-24 20:36:56 [manager.py:391] +ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:206.2528133392334ms total_cost_time:206.29620552062988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13904 prompt_cache_len:5151 prompt_cache_ratio:0.37046892980437285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 +DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:57 [batch.py:51] router release req id 8 +INFO 06-24 20:36:57 [manager.py:224] router recive req id 8 cost time 0.10814023017883301 s +INFO 06-24 20:36:57 [manager.py:68] detokenization recv req id 8 cost time 0.11016845703125 s +DEBUG 06-24 20:36:57 [manager.py:391] Prefill Batch: batch_id=200301460022562832120755463421612955775, time:1750768617.20847s req_ids:[8] +DEBUG 06-24 20:36:57 [manager.py:391] +ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:212.6023769378662ms total_cost_time:212.64886856079102ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13905 prompt_cache_len:5151 prompt_cache_ratio:0.3704422869471413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 +DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:57 [batch.py:51] router release req id 8 +INFO 06-24 20:36:57 [manager.py:224] router recive req id 8 cost time 0.10770320892333984 s +INFO 06-24 20:36:57 [manager.py:68] detokenization recv req id 8 cost time 0.10865592956542969 s +DEBUG 06-24 20:36:57 [manager.py:391] Prefill Batch: batch_id=23918242043753115527774402152552994034, time:1750768617.4421847s req_ids:[8] +DEBUG 06-24 20:36:57 [manager.py:391] +ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:398.3454704284668ms total_cost_time:398.3883857727051ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13906 prompt_cache_len:5151 prompt_cache_ratio:0.3704156479217604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 +DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:57 [batch.py:51] router release req id 8 +INFO 06-24 20:36:57 [manager.py:224] router recive req id 8 cost time 0.10739707946777344 s +INFO 06-24 20:36:57 [manager.py:68] detokenization recv req id 8 cost time 0.10922026634216309 s +DEBUG 06-24 20:36:57 [manager.py:391] Prefill Batch: batch_id=221870554354687592728649868737976966057, time:1750768617.8351018s req_ids:[8] +DEBUG 06-24 20:36:57 [manager.py:391] +ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:215.81506729125977ms total_cost_time:215.85893630981445ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13907 prompt_cache_len:5151 prompt_cache_ratio:0.3703890127274035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 +DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:57 [batch.py:51] router release req id 8 +INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10674214363098145 s +INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.10825014114379883 s +DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=332155539145952671911186223190387063528, time:1750768618.05685s req_ids:[8] +DEBUG 06-24 20:36:58 [manager.py:391] +ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:210.86907386779785ms total_cost_time:210.91461181640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13908 prompt_cache_len:5151 prompt_cache_ratio:0.3703623813632442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 +DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:58 [batch.py:51] router release req id 8 +INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10915160179138184 s +INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.1105949878692627 s +DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=220393118989293678835595544890444441921, time:1750768618.2752411s req_ids:[8] +DEBUG 06-24 20:36:58 [manager.py:391] +ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:215.65675735473633ms total_cost_time:215.6991958618164ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13909 prompt_cache_len:5151 prompt_cache_ratio:0.3703357538284564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 +DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:58 [batch.py:51] router release req id 8 +INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10761451721191406 s +INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.10943722724914551 s +DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=226439665948844633691729562737217002831, time:1750768618.4960828s req_ids:[8] +DEBUG 06-24 20:36:58 [manager.py:391] +ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:170.90892791748047ms total_cost_time:170.95160484313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13910 prompt_cache_len:5151 prompt_cache_ratio:0.3703091301222142 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 +DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:58 [batch.py:51] router release req id 8 +INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10798883438110352 s +INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.11000251770019531 s +DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=309269484416052417248580670333986706377, time:1750768618.672763s req_ids:[8] +DEBUG 06-24 20:36:58 [manager.py:391] +ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:202.38327980041504ms total_cost_time:202.42595672607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13911 prompt_cache_len:5151 prompt_cache_ratio:0.370282510243692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 +DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:58 [batch.py:51] router release req id 8 +INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s +INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.11057209968566895 s +DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=71670868600021393519615150580683130143, time:1750768618.882891s req_ids:[8] +DEBUG 06-24 20:36:58 [manager.py:391] +ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:36:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 55926.074 tokens/s +DEBUG 06-24 20:36:59 [stats.py:37] Avg prompt tokens throughput: 55918.024 tokens/s +DEBUG 06-24 20:36:59 [stats.py:37] Avg generate tokens throughput: 8.050 tokens/s +INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:386.9960308074951ms total_cost_time:387.0401382446289ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13912 prompt_cache_len:5151 prompt_cache_ratio:0.3702558941920644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 +DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:59 [batch.py:51] router release req id 8 +INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10902547836303711 s +INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.11098289489746094 s +DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=133923718832203202596629738446689005717, time:1750768619.2771587s req_ids:[8] +DEBUG 06-24 20:36:59 [manager.py:391] +ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:211.18807792663574ms total_cost_time:211.23266220092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13913 prompt_cache_len:5151 prompt_cache_ratio:0.3702292819665061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 +DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:59 [batch.py:51] router release req id 8 +INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10885930061340332 s +INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.11080145835876465 s +DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=279304733665380026190025460390875255751, time:1750768619.4927404s req_ids:[8] +DEBUG 06-24 20:36:59 [manager.py:391] +ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:214.2024040222168ms total_cost_time:214.2484188079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13914 prompt_cache_len:5151 prompt_cache_ratio:0.3702026735661923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 +DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:59 [batch.py:51] router release req id 8 +INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10801243782043457 s +INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.10943126678466797 s +DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=254200334111609286439824705966843053686, time:1750768619.7145534s req_ids:[8] +DEBUG 06-24 20:36:59 [manager.py:391] +ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:215.36993980407715ms total_cost_time:215.41547775268555ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13915 prompt_cache_len:5151 prompt_cache_ratio:0.37017606899029826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 +DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:36:59 [batch.py:51] router release req id 8 +INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10886955261230469 s +INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.1107642650604248 s +DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=75323565942071777366499456639871474257, time:1750768619.9378896s req_ids:[8] +DEBUG 06-24 20:36:59 [manager.py:391] +ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:210.3097438812256ms total_cost_time:210.3569507598877ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13916 prompt_cache_len:5151 prompt_cache_ratio:0.37014946823799943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 +DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:00 [batch.py:51] router release req id 8 +INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10915851593017578 s +INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.1110389232635498 s +DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=215771239181721537558085871223867551880, time:1750768620.1516893s req_ids:[8] +DEBUG 06-24 20:37:00 [manager.py:391] +ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:210.98756790161133ms total_cost_time:211.0304832458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13917 prompt_cache_len:5151 prompt_cache_ratio:0.37012287130847166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 +DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:00 [batch.py:51] router release req id 8 +INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10920119285583496 s +INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.11122536659240723 s +DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=219970268443140959352921849423234117846, time:1750768620.3724582s req_ids:[8] +DEBUG 06-24 20:37:00 [manager.py:391] +ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:370.25952339172363ms total_cost_time:370.3038692474365ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13918 prompt_cache_len:5151 prompt_cache_ratio:0.3700962782008909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 +DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:00 [batch.py:51] router release req id 8 +INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10903358459472656 s +INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.11103272438049316 s +DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=237538524942996614040975527562041831, time:1750768620.7496567s req_ids:[8] +DEBUG 06-24 20:37:00 [manager.py:391] +ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:215.6364917755127ms total_cost_time:215.68012237548828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13919 prompt_cache_len:5151 prompt_cache_ratio:0.3700696889144335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 +DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:00 [batch.py:51] router release req id 8 +INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10883903503417969 s +INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.11096382141113281 s +DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=257236151209652087226595876016598175686, time:1750768620.9808483s req_ids:[8] +DEBUG 06-24 20:37:00 [manager.py:391] +ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:227.36787796020508ms total_cost_time:227.41150856018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13920 prompt_cache_len:5151 prompt_cache_ratio:0.37004310344827585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 +DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:01 [batch.py:51] router release req id 8 +INFO 06-24 20:37:01 [manager.py:224] router recive req id 8 cost time 0.10938239097595215 s +INFO 06-24 20:37:01 [manager.py:68] detokenization recv req id 8 cost time 0.11133360862731934 s +DEBUG 06-24 20:37:01 [manager.py:391] Prefill Batch: batch_id=232482605998982212544847478536891266789, time:1750768621.2050748s req_ids:[8] +DEBUG 06-24 20:37:01 [manager.py:391] +ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:214.0488624572754ms total_cost_time:214.094877243042ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13921 prompt_cache_len:5151 prompt_cache_ratio:0.3700165218015947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 +DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:01 [batch.py:51] router release req id 8 +INFO 06-24 20:37:01 [manager.py:224] router recive req id 8 cost time 0.1089775562286377 s +INFO 06-24 20:37:01 [manager.py:68] detokenization recv req id 8 cost time 0.11105465888977051 s +DEBUG 06-24 20:37:01 [manager.py:391] Prefill Batch: batch_id=19526503121744536597245145020809529250, time:1750768621.4273324s req_ids:[8] +DEBUG 06-24 20:37:01 [manager.py:391] +ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:210.9816074371338ms total_cost_time:211.0280990600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13922 prompt_cache_len:5151 prompt_cache_ratio:0.369989943973567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 +DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:01 [batch.py:51] router release req id 8 +INFO 06-24 20:37:01 [manager.py:224] router recive req id 8 cost time 0.10915493965148926 s +INFO 06-24 20:37:01 [manager.py:68] detokenization recv req id 8 cost time 0.11102700233459473 s +DEBUG 06-24 20:37:01 [manager.py:391] Prefill Batch: batch_id=128546288495840170102586428914613294600, time:1750768621.6451705s req_ids:[8] +DEBUG 06-24 20:37:01 [manager.py:391] +DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:379.8229694366455ms total_cost_time:379.8825740814209ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:13923 prompt_cache_len:5151 prompt_cache_ratio:0.36996336996337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 +DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:01 [batch.py:51] router release req id 8 +INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10876846313476562 s +INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11078238487243652 s +DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=315587277261789559727399101910876680157, time:1750768622.0303411s req_ids:[8] +DEBUG 06-24 20:37:02 [manager.py:391] +ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:213.28973770141602ms total_cost_time:213.3336067199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13924 prompt_cache_len:5151 prompt_cache_ratio:0.36993679977018096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 +DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:02 [batch.py:51] router release req id 8 +INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10879302024841309 s +INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.10997271537780762 s +DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=333732795332618121932777022375170066622, time:1750768622.252176s req_ids:[8] +DEBUG 06-24 20:37:02 [manager.py:391] +ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:212.98646926879883ms total_cost_time:213.03272247314453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13925 prompt_cache_len:5151 prompt_cache_ratio:0.36991023339317775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 +DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:02 [batch.py:51] router release req id 8 +INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10840725898742676 s +INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11034059524536133 s +DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=101945375544171551629348716303399661860, time:1750768622.4699144s req_ids:[8] +DEBUG 06-24 20:37:02 [manager.py:391] +ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:210.76154708862305ms total_cost_time:210.80708503723145ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13926 prompt_cache_len:5151 prompt_cache_ratio:0.3698836708315381 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 +DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:02 [batch.py:51] router release req id 8 +INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10902595520019531 s +INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11107540130615234 s +DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=28957456801412538655885092959954950539, time:1750768622.6867278s req_ids:[8] +DEBUG 06-24 20:37:02 [manager.py:391] +ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:210.2987766265869ms total_cost_time:210.3433609008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13927 prompt_cache_len:5151 prompt_cache_ratio:0.3698571120844403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 +DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:02 [batch.py:51] router release req id 8 +INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.1097567081451416 s +INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11178207397460938 s +DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=192147062144882168338999978157611606711, time:1750768622.906113s req_ids:[8] +DEBUG 06-24 20:37:02 [manager.py:391] +ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:215.46316146850586ms total_cost_time:215.50703048706055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13928 prompt_cache_len:5151 prompt_cache_ratio:0.3698305571510626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 +DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:03 [batch.py:51] router release req id 8 +INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.11007857322692871 s +INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.11199498176574707 s +DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=41895446920054152142373500974785246976, time:1750768623.1423213s req_ids:[8] +DEBUG 06-24 20:37:03 [manager.py:391] +ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:412.0802879333496ms total_cost_time:412.1253490447998ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13929 prompt_cache_len:5151 prompt_cache_ratio:0.36980400603058367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 +DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:03 [batch.py:51] router release req id 8 +INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.10997319221496582 s +INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.11186957359313965 s +DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=210457571705370692116986256170210928112, time:1750768623.5457253s req_ids:[8] +DEBUG 06-24 20:37:03 [manager.py:391] +ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:209.3219757080078ms total_cost_time:209.367036819458ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13930 prompt_cache_len:5151 prompt_cache_ratio:0.3697774587221823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 +DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:03 [batch.py:51] router release req id 8 +INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.10875368118286133 s +INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.1100616455078125 s +DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=175212948296425593950185278029121148367, time:1750768623.7734683s req_ids:[8] +DEBUG 06-24 20:37:03 [manager.py:391] +ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:227.30445861816406ms total_cost_time:227.35071182250977ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13931 prompt_cache_len:5151 prompt_cache_ratio:0.36975091522503767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 +DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:03 [batch.py:51] router release req id 8 +INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.10892128944396973 s +INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.11064863204956055 s +DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=260396556068167099650344386026952540436, time:1750768623.9977322s req_ids:[8] +DEBUG 06-24 20:37:03 [manager.py:391] +ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:215.5303955078125ms total_cost_time:215.5742645263672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13932 prompt_cache_len:5151 prompt_cache_ratio:0.36972437553832904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 +DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:04 [batch.py:51] router release req id 8 +INFO 06-24 20:37:04 [manager.py:224] router recive req id 8 cost time 0.10816359519958496 s +INFO 06-24 20:37:04 [manager.py:68] detokenization recv req id 8 cost time 0.1098775863647461 s +DEBUG 06-24 20:37:04 [manager.py:391] Prefill Batch: batch_id=288487669328809925050379244021909190040, time:1750768624.2183595s req_ids:[8] +DEBUG 06-24 20:37:04 [manager.py:391] +ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:213.19007873535156ms total_cost_time:213.23561668395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13933 prompt_cache_len:5151 prompt_cache_ratio:0.3696978396612359 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 +DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:04 [batch.py:51] router release req id 8 +INFO 06-24 20:37:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:04 [manager.py:224] router recive req id 8 cost time 0.10943841934204102 s +INFO 06-24 20:37:04 [manager.py:68] detokenization recv req id 8 cost time 0.11119484901428223 s +DEBUG 06-24 20:37:04 [manager.py:391] Prefill Batch: batch_id=246827518607713457017620749205921534259, time:1750768624.4377937s req_ids:[8] +DEBUG 06-24 20:37:04 [manager.py:391] +ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:210.10279655456543ms total_cost_time:210.14666557312012ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13934 prompt_cache_len:5151 prompt_cache_ratio:0.36967130759293815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 +DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:04 [batch.py:51] router release req id 8 +INFO 06-24 20:37:04 [manager.py:224] router recive req id 8 cost time 0.10814499855041504 s +INFO 06-24 20:37:04 [manager.py:68] detokenization recv req id 8 cost time 0.11000776290893555 s +DEBUG 06-24 20:37:04 [manager.py:391] Prefill Batch: batch_id=256159453287421827255114855225021658443, time:1750768624.655235s req_ids:[8] +DEBUG 06-24 20:37:04 [manager.py:391] +ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:384.5798969268799ms total_cost_time:384.6251964569092ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13935 prompt_cache_len:5151 prompt_cache_ratio:0.3696447793326157 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 +DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:04 [batch.py:51] router release req id 8 +INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10903406143188477 s +INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.11103391647338867 s +DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=70224943397812551951389176340976439836, time:1750768625.046903s req_ids:[8] +DEBUG 06-24 20:37:05 [manager.py:391] +ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:211.10844612121582ms total_cost_time:211.1515998840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13936 prompt_cache_len:5151 prompt_cache_ratio:0.3696182548794489 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 +DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:05 [batch.py:51] router release req id 8 +INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10857129096984863 s +INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.11024284362792969 s +DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=264541544625963132932474138182543066423, time:1750768625.267228s req_ids:[8] +DEBUG 06-24 20:37:05 [manager.py:391] +ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:212.6331329345703ms total_cost_time:212.67938613891602ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13937 prompt_cache_len:5151 prompt_cache_ratio:0.3695917342326182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 +DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:05 [batch.py:51] router release req id 8 +INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10804343223571777 s +INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.1097872257232666 s +DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=42814334160434545670834167288501314267, time:1750768625.4857428s req_ids:[8] +DEBUG 06-24 20:37:05 [manager.py:391] +ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:215.73901176452637ms total_cost_time:215.78264236450195ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13938 prompt_cache_len:5151 prompt_cache_ratio:0.3695652173913043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 +DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:05 [batch.py:51] router release req id 8 +INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10842251777648926 s +INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.10965919494628906 s +DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=298567022201804756948667998125621692108, time:1750768625.706435s req_ids:[8] +DEBUG 06-24 20:37:05 [manager.py:391] +ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:208.6482048034668ms total_cost_time:208.69112014770508ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13939 prompt_cache_len:5151 prompt_cache_ratio:0.3695387043546883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 +DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:05 [batch.py:51] router release req id 8 +INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10918402671813965 s +INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.1112363338470459 s +DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=273683894373204317081350673527536600267, time:1750768625.9218066s req_ids:[8] +DEBUG 06-24 20:37:05 [manager.py:391] +ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:210.70480346679688ms total_cost_time:210.74986457824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13940 prompt_cache_len:5151 prompt_cache_ratio:0.3695121951219512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 +DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:06 [batch.py:51] router release req id 8 +INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10886454582214355 s +INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.11039137840270996 s +DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=288771516278495236943905202434162662558, time:1750768626.1395354s req_ids:[8] +DEBUG 06-24 20:37:06 [manager.py:391] +ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:377.1843910217285ms total_cost_time:377.2294521331787ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13941 prompt_cache_len:5151 prompt_cache_ratio:0.3694856896922746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 +DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:06 [batch.py:51] router release req id 8 +INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10865187644958496 s +DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=10445945186429712395826115642952253211, time:1750768626.523873s req_ids:[8] +DEBUG 06-24 20:37:06 [manager.py:391] +INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.11082983016967773 s +ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:211.09294891357422ms total_cost_time:211.1372947692871ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13942 prompt_cache_len:5151 prompt_cache_ratio:0.36945918806484007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 +DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:06 [batch.py:51] router release req id 8 +INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10920166969299316 s +INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.1111900806427002 s +DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=308169976463995912445581497449446571330, time:1750768626.7450776s req_ids:[8] +DEBUG 06-24 20:37:06 [manager.py:391] +ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:213.63449096679688ms total_cost_time:213.68026733398438ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13943 prompt_cache_len:5151 prompt_cache_ratio:0.3694326902388295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 +DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:06 [batch.py:51] router release req id 8 +INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10859107971191406 s +INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.11060118675231934 s +DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=101063073474856270944811072156136174349, time:1750768626.9695165s req_ids:[8] +DEBUG 06-24 20:37:06 [manager.py:391] +ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:217.14258193969727ms total_cost_time:217.18955039978027ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13944 prompt_cache_len:5151 prompt_cache_ratio:0.36940619621342513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 +DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:07 [batch.py:51] router release req id 8 +INFO 06-24 20:37:07 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s +INFO 06-24 20:37:07 [manager.py:68] detokenization recv req id 8 cost time 0.10957121849060059 s +DEBUG 06-24 20:37:07 [manager.py:391] Prefill Batch: batch_id=125099626285870584418590301827953894391, time:1750768627.189367s req_ids:[8] +DEBUG 06-24 20:37:07 [manager.py:391] +ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:212.9814624786377ms total_cost_time:213.02437782287598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13945 prompt_cache_len:5151 prompt_cache_ratio:0.3693797059878092 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 +DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:07 [batch.py:51] router release req id 8 +INFO 06-24 20:37:07 [manager.py:224] router recive req id 8 cost time 0.10852408409118652 s +INFO 06-24 20:37:07 [manager.py:68] detokenization recv req id 8 cost time 0.1104736328125 s +DEBUG 06-24 20:37:07 [manager.py:391] Prefill Batch: batch_id=124045542390231541854400842103060234955, time:1750768627.4104726s req_ids:[8] +DEBUG 06-24 20:37:07 [manager.py:391] +ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:213.79780769348145ms total_cost_time:213.84143829345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13946 prompt_cache_len:5151 prompt_cache_ratio:0.3693532195611645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 +DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:07 [batch.py:51] router release req id 8 +INFO 06-24 20:37:07 [manager.py:224] router recive req id 8 cost time 0.31005144119262695 s +INFO 06-24 20:37:07 [manager.py:68] detokenization recv req id 8 cost time 0.3113112449645996 s +DEBUG 06-24 20:37:07 [manager.py:391] Prefill Batch: batch_id=104308371043862017816374384439789414194, time:1750768627.839343s req_ids:[8] +DEBUG 06-24 20:37:07 [manager.py:391] +ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:381.5882205963135ms total_cost_time:381.6509246826172ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:13947 prompt_cache_len:5151 prompt_cache_ratio:0.3693267369326737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 +DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:07 [batch.py:51] router release req id 8 +INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10747265815734863 s +INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.10870933532714844 s +DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=83400216592221864004433764797439358087, time:1750768628.0191367s req_ids:[8] +DEBUG 06-24 20:37:08 [manager.py:391] +ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:206.29024505615234ms total_cost_time:206.33268356323242ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13948 prompt_cache_len:5151 prompt_cache_ratio:0.36930025810151995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 +DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:08 [batch.py:51] router release req id 8 +INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10938715934753418 s +INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.11162972450256348 s +DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=199110041395681224146989351509459105359, time:1750768628.239972s req_ids:[8] +DEBUG 06-24 20:37:08 [manager.py:391] +ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:222.53108024597168ms total_cost_time:222.57661819458008ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13949 prompt_cache_len:5151 prompt_cache_ratio:0.3692737830668865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 +DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:08 [batch.py:51] router release req id 8 +INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10981082916259766 s +INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.1120309829711914 s +DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=106238026222497706418331104991262201385, time:1750768628.478984s req_ids:[8] +DEBUG 06-24 20:37:08 [manager.py:391] +ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:232.09452629089355ms total_cost_time:232.1479320526123ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:13950 prompt_cache_len:5151 prompt_cache_ratio:0.369247311827957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 +DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:08 [batch.py:51] router release req id 8 +INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10784029960632324 s +INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.10988807678222656 s +DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=55894738104048942861133656649418786502, time:1750768628.718009s req_ids:[8] +DEBUG 06-24 20:37:08 [manager.py:391] +ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:231.60791397094727ms total_cost_time:231.65273666381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13951 prompt_cache_len:5151 prompt_cache_ratio:0.3692208443839151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 +DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:08 [batch.py:51] router release req id 8 +INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10828900337219238 s +INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.11016607284545898 s +DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=305235593761326651968822928120138002363, time:1750768628.9414387s req_ids:[8] +DEBUG 06-24 20:37:08 [manager.py:391] +ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:212.6622200012207ms total_cost_time:212.71681785583496ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:13952 prompt_cache_len:5151 prompt_cache_ratio:0.36919438073394495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 +DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:09 [batch.py:51] router release req id 8 +INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.1085207462310791 s +INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11030125617980957 s +DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=34291958107616886458421667846565404186, time:1750768629.1611254s req_ids:[8] +DEBUG 06-24 20:37:09 [manager.py:391] +DEBUG 06-24 20:37:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 57083.077 tokens/s +DEBUG 06-24 20:37:09 [stats.py:37] Avg prompt tokens throughput: 57074.984 tokens/s +DEBUG 06-24 20:37:09 [stats.py:37] Avg generate tokens throughput: 8.093 tokens/s +ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:378.8440227508545ms total_cost_time:378.89671325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:13953 prompt_cache_len:5151 prompt_cache_ratio:0.3691679208772307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 +DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:09 [batch.py:51] router release req id 8 +INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.10888433456420898 s +INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11104106903076172 s +DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=291333514263332086300485840075899258211, time:1750768629.5459352s req_ids:[8] +DEBUG 06-24 20:37:09 [manager.py:391] +DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:213.08517456054688ms total_cost_time:213.13190460205078ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13954 prompt_cache_len:5151 prompt_cache_ratio:0.36914146481295684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 +DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:09 [batch.py:51] router release req id 8 +INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.1094048023223877 s +INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11150550842285156 s +DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=25593220145067302091142281344771496218, time:1750768629.7638197s req_ids:[8] +DEBUG 06-24 20:37:09 [manager.py:391] +ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:209.12885665893555ms total_cost_time:209.17201042175293ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13955 prompt_cache_len:5151 prompt_cache_ratio:0.36911501254030815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 +DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:09 [batch.py:51] router release req id 8 +INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.110321044921875 s +INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11234331130981445 s +DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=65798691069630346309810407627342742467, time:1750768629.9821873s req_ids:[8] +DEBUG 06-24 20:37:09 [manager.py:391] +ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:205.52539825439453ms total_cost_time:205.5685520172119ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13956 prompt_cache_len:5151 prompt_cache_ratio:0.36908856405846946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 +DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:10 [batch.py:51] router release req id 8 +INFO 06-24 20:37:10 [manager.py:224] router recive req id 8 cost time 0.11044740676879883 s +INFO 06-24 20:37:10 [manager.py:68] detokenization recv req id 8 cost time 0.11247706413269043 s +DEBUG 06-24 20:37:10 [manager.py:391] Prefill Batch: batch_id=67396194817026667487177616517812211631, time:1750768630.196119s req_ids:[8] +DEBUG 06-24 20:37:10 [manager.py:391] +ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:215.56472778320312ms total_cost_time:215.63339233398438ms,out_token_counter:1 mean_per_token_cost_time: 0.06866455078125ms prompt_token_num:13957 prompt_cache_len:5151 prompt_cache_ratio:0.3690621193666261 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 +DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:10 [batch.py:51] router release req id 8 +INFO 06-24 20:37:10 [manager.py:224] router recive req id 8 cost time 0.10779142379760742 s +INFO 06-24 20:37:10 [manager.py:68] detokenization recv req id 8 cost time 0.10990166664123535 s +DEBUG 06-24 20:37:10 [manager.py:391] Prefill Batch: batch_id=76015538003986216031098212277281892458, time:1750768630.4250095s req_ids:[8] +DEBUG 06-24 20:37:10 [manager.py:391] +ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:221.2224006652832ms total_cost_time:221.2655544281006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13958 prompt_cache_len:5151 prompt_cache_ratio:0.36903567846396335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 +DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:10 [batch.py:51] router release req id 8 +INFO 06-24 20:37:10 [manager.py:224] router recive req id 8 cost time 0.1077268123626709 s +INFO 06-24 20:37:10 [manager.py:68] detokenization recv req id 8 cost time 0.10962629318237305 s +DEBUG 06-24 20:37:10 [manager.py:391] Prefill Batch: batch_id=212026111350736059461380495837494867637, time:1750768630.6504242s req_ids:[8] +DEBUG 06-24 20:37:10 [manager.py:391] +ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:408.02526473999023ms total_cost_time:408.0681800842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13959 prompt_cache_len:5151 prompt_cache_ratio:0.3690092413496669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 +DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:10 [batch.py:51] router release req id 8 +INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10836243629455566 s +INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.10998082160949707 s +DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=155066330884774564187075724439780488011, time:1750768631.0608914s req_ids:[8] +DEBUG 06-24 20:37:11 [manager.py:391] +ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:195.59979438781738ms total_cost_time:195.64294815063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13960 prompt_cache_len:5151 prompt_cache_ratio:0.36898280802292266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 +DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:11 [batch.py:51] router release req id 8 +INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10738396644592285 s +INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.10930180549621582 s +DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=289379815544848895498309203267217891342, time:1750768631.2617416s req_ids:[8] +DEBUG 06-24 20:37:11 [manager.py:391] +ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:209.60497856140137ms total_cost_time:209.64932441711426ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13961 prompt_cache_len:5151 prompt_cache_ratio:0.3689563784829167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 +DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:11 [batch.py:51] router release req id 8 +INFO 06-24 20:37:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.1076345443725586 s +INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.1092996597290039 s +DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=103425550519706900866972878420908912444, time:1750768631.4790819s req_ids:[8] +DEBUG 06-24 20:37:11 [manager.py:391] +ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:211.1837863922119ms total_cost_time:211.2290859222412ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13962 prompt_cache_len:5151 prompt_cache_ratio:0.3689299527288354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 +DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:11 [batch.py:51] router release req id 8 +INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10830545425415039 s +INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.11000728607177734 s +DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=156579918448146882857788077408299654878, time:1750768631.6976492s req_ids:[8] +DEBUG 06-24 20:37:11 [manager.py:391] +ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:215.73352813720703ms total_cost_time:215.77930450439453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13963 prompt_cache_len:5151 prompt_cache_ratio:0.3689035307598654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 +DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:11 [batch.py:51] router release req id 8 +INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10782098770141602 s +INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.10944366455078125 s +DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=55823393131493504206408715586916918802, time:1750768631.9187052s req_ids:[8] +DEBUG 06-24 20:37:11 [manager.py:391] +ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:210.6630802154541ms total_cost_time:210.7067108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13964 prompt_cache_len:5151 prompt_cache_ratio:0.36887711257519334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 +DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:12 [batch.py:51] router release req id 8 +INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.10764312744140625 s +INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.1092989444732666 s +DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=113131398368371662771795845302399160720, time:1750768632.1367507s req_ids:[8] +DEBUG 06-24 20:37:12 [manager.py:391] +ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:374.8800754547119ms total_cost_time:374.9239444732666ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13965 prompt_cache_len:5151 prompt_cache_ratio:0.36885069817400645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 +DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:12 [batch.py:51] router release req id 8 +INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.1084134578704834 s +INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s +DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=154201704682901204662442097155732473021, time:1750768632.5170584s req_ids:[8] +DEBUG 06-24 20:37:12 [manager.py:391] +ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:210.44445037841797ms total_cost_time:210.48879623413086ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13966 prompt_cache_len:5151 prompt_cache_ratio:0.3688242875554919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 +DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:12 [batch.py:51] router release req id 8 +INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s +INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.11077618598937988 s +DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=101835523533941867640890436872225148613, time:1750768632.735914s req_ids:[8] +DEBUG 06-24 20:37:12 [manager.py:391] +ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:217.67568588256836ms total_cost_time:217.72098541259766ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13967 prompt_cache_len:5151 prompt_cache_ratio:0.36879788071883723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 +DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:12 [batch.py:51] router release req id 8 +INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.10765337944030762 s +INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.1096041202545166 s +DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=285308498855591838674116357280038341175, time:1750768632.9582512s req_ids:[8] +DEBUG 06-24 20:37:12 [manager.py:391] +ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:213.91558647155762ms total_cost_time:213.96183967590332ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13968 prompt_cache_len:5151 prompt_cache_ratio:0.36877147766323026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 +DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:13 [batch.py:51] router release req id 8 +INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10932421684265137 s +INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.11110043525695801 s +DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=56960264878956924678480344503312042721, time:1750768633.179335s req_ids:[8] +DEBUG 06-24 20:37:13 [manager.py:391] +ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:211.31563186645508ms total_cost_time:211.36116981506348ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13969 prompt_cache_len:5151 prompt_cache_ratio:0.3687450783878588 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 +DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:13 [batch.py:51] router release req id 8 +INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10890960693359375 s +INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.11022567749023438 s +DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=162889442219590974850323458144476646776, time:1750768633.3956206s req_ids:[8] +DEBUG 06-24 20:37:13 [manager.py:391] +ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:370.96571922302246ms total_cost_time:371.01244926452637ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13970 prompt_cache_len:5151 prompt_cache_ratio:0.36871868289191123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 +DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:13 [batch.py:51] router release req id 8 +INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10787796974182129 s +INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.10922646522521973 s +DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=124877348331976675224169518215073884119, time:1750768633.7745442s req_ids:[8] +DEBUG 06-24 20:37:13 [manager.py:391] +ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:218.28055381774902ms total_cost_time:218.3222770690918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13971 prompt_cache_len:5151 prompt_cache_ratio:0.3686922911745759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 +DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:13 [batch.py:51] router release req id 8 +INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10863232612609863 s +INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.10994839668273926 s +DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=127575139360520556106602266782837481100, time:1750768633.9974482s req_ids:[8] +DEBUG 06-24 20:37:13 [manager.py:391] +ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:209.88225936889648ms total_cost_time:209.92684364318848ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13972 prompt_cache_len:5151 prompt_cache_ratio:0.3686659032350415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 +DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:14 [batch.py:51] router release req id 8 +INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.10771965980529785 s +INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.10977697372436523 s +DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=332415336343340678535546209147675204268, time:1750768634.212489s req_ids:[8] +DEBUG 06-24 20:37:14 [manager.py:391] +ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:213.00220489501953ms total_cost_time:213.04702758789062ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13973 prompt_cache_len:5151 prompt_cache_ratio:0.368639519072497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 +DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:14 [batch.py:51] router release req id 8 +INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.10806417465209961 s +INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.11001920700073242 s +DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=36042452911271591799901419889865982062, time:1750768634.4320753s req_ids:[8] +DEBUG 06-24 20:37:14 [manager.py:391] +ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:209.09380912780762ms total_cost_time:209.13982391357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13974 prompt_cache_len:5151 prompt_cache_ratio:0.3686131386861314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 +DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:14 [batch.py:51] router release req id 8 +INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.11086773872375488 s +INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.1127617359161377 s +DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=68598854657716787776582902834585841259, time:1750768634.6516871s req_ids:[8] +DEBUG 06-24 20:37:14 [manager.py:391] +ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:213.9129638671875ms total_cost_time:213.95587921142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13975 prompt_cache_len:5151 prompt_cache_ratio:0.3685867620751342 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 +DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:14 [batch.py:51] router release req id 8 +INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.1095132827758789 s +INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.11198019981384277 s +DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=6873755472947318731095892579841924578, time:1750768634.867758s req_ids:[8] +DEBUG 06-24 20:37:14 [manager.py:391] +ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:386.42024993896484ms total_cost_time:386.4631652832031ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13976 prompt_cache_len:5151 prompt_cache_ratio:0.3685603892386949 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 +DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:15 [batch.py:51] router release req id 8 +INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10797619819641113 s +INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.11002993583679199 s +DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=291625764657491914578755959906717208851, time:1750768635.2616925s req_ids:[8] +DEBUG 06-24 20:37:15 [manager.py:391] +ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:216.02678298950195ms total_cost_time:216.06993675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13977 prompt_cache_len:5151 prompt_cache_ratio:0.36853402017600345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 +DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:15 [batch.py:51] router release req id 8 +INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10770773887634277 s +INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.10898828506469727 s +DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=84317553708246640575189677048345470617, time:1750768635.4856856s req_ids:[8] +DEBUG 06-24 20:37:15 [manager.py:391] +ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:216.20965003967285ms total_cost_time:216.25328063964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13978 prompt_cache_len:5151 prompt_cache_ratio:0.3685076548862498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 +DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:15 [batch.py:51] router release req id 8 +INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10849452018737793 s +INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.1097724437713623 s +DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=161836526280041976193795605993735592229, time:1750768635.7112758s req_ids:[8] +DEBUG 06-24 20:37:15 [manager.py:391] +DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:212.92543411254883ms total_cost_time:212.97025680541992ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13979 prompt_cache_len:5151 prompt_cache_ratio:0.36848129336862434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 +DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:15 [batch.py:51] router release req id 8 +INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10744976997375488 s +INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.10945391654968262 s +DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=291166109562516408654192221174372814706, time:1750768635.9282699s req_ids:[8] +DEBUG 06-24 20:37:15 [manager.py:391] +ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:214.49780464172363ms total_cost_time:214.54143524169922ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13980 prompt_cache_len:5151 prompt_cache_ratio:0.36845493562231757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 +DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:16 [batch.py:51] router release req id 8 +INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.11100411415100098 s +INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.11299681663513184 s +DEBUG 06-24 20:37:16 [manager.py:391] Prefill Batch: batch_id=43141611322986118203876092176844221883, time:1750768636.1505299s req_ids:[8] +DEBUG 06-24 20:37:16 [manager.py:391] +ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:215.79265594482422ms total_cost_time:215.83843231201172ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13981 prompt_cache_len:5151 prompt_cache_ratio:0.36842858164652026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 +DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:16 [batch.py:51] router release req id 8 +INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.10910511016845703 s +INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.11137080192565918 s +DEBUG 06-24 20:37:16 [manager.py:391] Prefill Batch: batch_id=85219022915661342565534414783274569354, time:1750768636.3727517s req_ids:[8] +DEBUG 06-24 20:37:16 [manager.py:391] +ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:386.0025405883789ms total_cost_time:386.044979095459ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13982 prompt_cache_len:5151 prompt_cache_ratio:0.3684022314404234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 +DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:16 [batch.py:51] router release req id 8 +INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.10849857330322266 s +INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.10968971252441406 s +DEBUG 06-24 20:37:16 [manager.py:391] Prefill Batch: batch_id=24553445778010541003338852883563129179, time:1750768636.765505s req_ids:[8] +DEBUG 06-24 20:37:16 [manager.py:391] +ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:221.5421199798584ms total_cost_time:221.5864658355713ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13983 prompt_cache_len:5151 prompt_cache_ratio:0.3683758850032182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 +DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:16 [batch.py:51] router release req id 8 +INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.1080780029296875 s +INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s +DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=55878046498719415876969663998507318066, time:1750768637.002391s req_ids:[8] +DEBUG 06-24 20:37:17 [manager.py:391] +ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:221.53949737548828ms total_cost_time:221.58217430114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13984 prompt_cache_len:5151 prompt_cache_ratio:0.3683495423340961 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 +DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:17 [batch.py:51] router release req id 8 +INFO 06-24 20:37:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.1083831787109375 s +INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.10953021049499512 s +DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=152027078703207171616540982337375675148, time:1750768637.2221699s req_ids:[8] +DEBUG 06-24 20:37:17 [manager.py:391] +ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:215.27528762817383ms total_cost_time:215.32034873962402ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13985 prompt_cache_len:5151 prompt_cache_ratio:0.36832320343224884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 +DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:17 [batch.py:51] router release req id 8 +INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.10882735252380371 s +INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.10998678207397461 s +DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=196210163247209452331683163425857789231, time:1750768637.4443297s req_ids:[8] +DEBUG 06-24 20:37:17 [manager.py:391] +ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:206.42876625061035ms total_cost_time:206.47358894348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13986 prompt_cache_len:5151 prompt_cache_ratio:0.3682968682968683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 +DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:17 [batch.py:51] router release req id 8 +INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.10783791542053223 s +INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.1089780330657959 s +DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=40178283905970698449013150410398637145, time:1750768637.6581635s req_ids:[8] +DEBUG 06-24 20:37:17 [manager.py:391] +ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:209.5041275024414ms total_cost_time:209.5482349395752ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13987 prompt_cache_len:5151 prompt_cache_ratio:0.3682705369271466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 +DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:17 [batch.py:51] router release req id 8 +INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.10881543159484863 s +INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.10997962951660156 s +DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=115689451089287418768844843500558673491, time:1750768637.8727357s req_ids:[8] +DEBUG 06-24 20:37:17 [manager.py:391] +ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:377.0411014556885ms total_cost_time:377.08544731140137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13988 prompt_cache_len:5151 prompt_cache_ratio:0.36824420932227625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 +DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:18 [batch.py:51] router release req id 8 +INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10900115966796875 s +INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.1101992130279541 s +DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=91612312282333967349316688010455937197, time:1750768638.2556121s req_ids:[8] +DEBUG 06-24 20:37:18 [manager.py:391] +ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:212.3241424560547ms total_cost_time:212.3699188232422ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13989 prompt_cache_len:5151 prompt_cache_ratio:0.36821788548144974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 +DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:18 [batch.py:51] router release req id 8 +INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10884428024291992 s +INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.11002969741821289 s +DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=51048576445876644730138261705641469896, time:1750768638.475584s req_ids:[8] +DEBUG 06-24 20:37:18 [manager.py:391] +ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:213.04893493652344ms total_cost_time:213.10806274414062ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:13990 prompt_cache_len:5151 prompt_cache_ratio:0.3681915654038599 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 +DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:18 [batch.py:51] router release req id 8 +INFO 06-24 20:37:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10880017280578613 s +INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.11002302169799805 s +DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=198420326774350614994535982538086570054, time:1750768638.6950521s req_ids:[8] +DEBUG 06-24 20:37:18 [manager.py:391] +ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:209.68294143676758ms total_cost_time:209.72681045532227ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13991 prompt_cache_len:5151 prompt_cache_ratio:0.36816524908869985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 +DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:18 [batch.py:51] router release req id 8 +INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10863065719604492 s +INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.10991168022155762 s +DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=245696302282062438863597396419961229560, time:1750768638.9110687s req_ids:[8] +DEBUG 06-24 20:37:18 [manager.py:391] +ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:211.91692352294922ms total_cost_time:211.9605541229248ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13992 prompt_cache_len:5151 prompt_cache_ratio:0.36813893653516294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 +DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:19 [batch.py:51] router release req id 8 +INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.10809040069580078 s +INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.10938763618469238 s +DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=190339614766738791423041981144301221890, time:1750768639.1300826s req_ids:[8] +DEBUG 06-24 20:37:19 [manager.py:391] +ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:37:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 55552.244 tokens/s +DEBUG 06-24 20:37:19 [stats.py:37] Avg prompt tokens throughput: 55544.195 tokens/s +DEBUG 06-24 20:37:19 [stats.py:37] Avg generate tokens throughput: 8.049 tokens/s +INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:210.73603630065918ms total_cost_time:210.77895164489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13993 prompt_cache_len:5151 prompt_cache_ratio:0.36811262774244263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 +DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:19 [batch.py:51] router release req id 8 +INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.3095400333404541 s +INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.31070590019226074 s +DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=128007873151124328360167139742395403484, time:1750768639.5573237s req_ids:[8] +DEBUG 06-24 20:37:19 [manager.py:391] +ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:381.23369216918945ms total_cost_time:381.27660751342773ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13994 prompt_cache_len:5151 prompt_cache_ratio:0.36808632270973274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 +DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:19 [batch.py:51] router release req id 8 +INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.1073904037475586 s +INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.10873222351074219 s +DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=104141219626366574132826708325097048074, time:1750768639.7342165s req_ids:[8] +DEBUG 06-24 20:37:19 [manager.py:391] +ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:206.8171501159668ms total_cost_time:206.8619728088379ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13995 prompt_cache_len:5151 prompt_cache_ratio:0.36806002143622724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 +DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:19 [batch.py:51] router release req id 8 +INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.10790371894836426 s +INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.10919976234436035 s +DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=184156989492292847444923373823407138356, time:1750768639.9472475s req_ids:[8] +DEBUG 06-24 20:37:19 [manager.py:391] +ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:208.1460952758789ms total_cost_time:208.1894874572754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13996 prompt_cache_len:5151 prompt_cache_ratio:0.3680337239211203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 +DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:20 [batch.py:51] router release req id 8 +INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.10809946060180664 s +INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.10917115211486816 s +DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=20875675385711834203355062944873096311, time:1750768640.1671433s req_ids:[8] +DEBUG 06-24 20:37:20 [manager.py:391] +ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:219.8014259338379ms total_cost_time:219.84505653381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13997 prompt_cache_len:5151 prompt_cache_ratio:0.3680074301636065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 +DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:20 [batch.py:51] router release req id 8 +INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.10826230049133301 s +INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.11017203330993652 s +DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=246693757860352000744020247071501972420, time:1750768640.3940854s req_ids:[8] +DEBUG 06-24 20:37:20 [manager.py:391] +ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:219.30193901062012ms total_cost_time:219.34747695922852ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13998 prompt_cache_len:5151 prompt_cache_ratio:0.3679811401628804 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 +DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:20 [batch.py:51] router release req id 8 +INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.10942482948303223 s +INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.11145472526550293 s +DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=298077991145517651159339065307827904531, time:1750768640.614352s req_ids:[8] +DEBUG 06-24 20:37:20 [manager.py:391] +ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:215.54064750671387ms total_cost_time:215.59429168701172ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:13999 prompt_cache_len:5151 prompt_cache_ratio:0.36795485391813704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 +DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:20 [batch.py:51] router release req id 8 +INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.1089620590209961 s +INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.11093401908874512 s +DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=70860833748614610439076641652526025638, time:1750768640.834475s req_ids:[8] +DEBUG 06-24 20:37:20 [manager.py:391] +ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:377.74133682250977ms total_cost_time:377.78687477111816ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14000 prompt_cache_len:5151 prompt_cache_ratio:0.36792857142857144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 +DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:21 [batch.py:51] router release req id 8 +INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10838842391967773 s +INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11078667640686035 s +DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=125952369016617074460335288017100477137, time:1750768641.2199621s req_ids:[8] +DEBUG 06-24 20:37:21 [manager.py:391] +ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:217.47469902038574ms total_cost_time:217.52023696899414ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14001 prompt_cache_len:5151 prompt_cache_ratio:0.36790229269337904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 +DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:21 [batch.py:51] router release req id 8 +INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10817503929138184 s +INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11039328575134277 s +DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=51889092843763610132730061916938238547, time:1750768641.442208s req_ids:[8] +DEBUG 06-24 20:37:21 [manager.py:391] +ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:171.2355613708496ms total_cost_time:171.27633094787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14002 prompt_cache_len:5151 prompt_cache_ratio:0.3678760177117555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 +DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:21 [batch.py:51] router release req id 8 +INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10850191116333008 s +INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11057519912719727 s +DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=96623721583148842109143267334964102473, time:1750768641.6210558s req_ids:[8] +DEBUG 06-24 20:37:21 [manager.py:391] +ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:203.60231399536133ms total_cost_time:203.64689826965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14003 prompt_cache_len:5151 prompt_cache_ratio:0.36784974648289653 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 +DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:21 [batch.py:51] router release req id 8 +INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10869669914245605 s +INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11070775985717773 s +DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=129109449520881917082790143751131030149, time:1750768641.8317325s req_ids:[8] +DEBUG 06-24 20:37:21 [manager.py:391] +ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:213.01817893981934ms total_cost_time:213.06419372558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14004 prompt_cache_len:5151 prompt_cache_ratio:0.36782347900599827 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 +DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:21 [batch.py:51] router release req id 8 +INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.10898327827453613 s +INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.11087775230407715 s +DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=102235248562697680764947522193850379919, time:1750768642.0505025s req_ids:[8] +DEBUG 06-24 20:37:22 [manager.py:391] +ERROR 06-24 20:37:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:214.00928497314453ms total_cost_time:214.0519618988037ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14005 prompt_cache_len:5151 prompt_cache_ratio:0.36779721528025705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 +DEBUG 06-24 20:37:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:22 [batch.py:51] router release req id 8 +INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.1092216968536377 s +INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.11111879348754883 s +DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=26080150329967155670703753081688791730, time:1750768642.2714345s req_ids:[8] +DEBUG 06-24 20:37:22 [manager.py:391] +ERROR 06-24 20:37:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 first_token_cost:382.2152614593506ms total_cost_time:382.2615146636963ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14006 prompt_cache_len:5151 prompt_cache_ratio:0.36777095530486936 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 +DEBUG 06-24 20:37:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:22 [batch.py:51] router release req id 8 +INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.10785627365112305 s +INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.1098482608795166 s +DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=245187693348831451453768940365758232951, time:1750768642.6757393s req_ids:[8] +DEBUG 06-24 20:37:22 [manager.py:391] +ERROR 06-24 20:37:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 first_token_cost:236.71269416809082ms total_cost_time:236.7548942565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14007 prompt_cache_len:5151 prompt_cache_ratio:0.3677446990790319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 +DEBUG 06-24 20:37:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:22 [batch.py:51] router release req id 8 +INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.10636663436889648 s +INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.1083519458770752 s +DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=263388369887030462642510546699788716067, time:1750768642.9022193s req_ids:[8] +DEBUG 06-24 20:37:22 [manager.py:391] +ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 first_token_cost:216.81475639343262ms total_cost_time:216.8595790863037ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14008 prompt_cache_len:5151 prompt_cache_ratio:0.36771844660194175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 +DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:23 [batch.py:51] router release req id 8 +INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10732889175415039 s +INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.1091916561126709 s +DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=38332137835792899029715002287708746678, time:1750768643.1451976s req_ids:[8] +DEBUG 06-24 20:37:23 [manager.py:391] +INFO 06-24 20:37:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:37:23 [statics_utils.py:24] mean first cost: 231.4989809468831 ms +INFO 06-24 20:37:23 [statics_utils.py:24] mean per token cost: 0.05872487896311082 ms +ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:230.74841499328613ms total_cost_time:230.79276084899902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14009 prompt_cache_len:5151 prompt_cache_ratio:0.36769219787279606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 +DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:23 [batch.py:51] router release req id 8 +INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10780549049377441 s +INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.10970687866210938 s +DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=46426417571693717882570116888007841932, time:1750768643.3628306s req_ids:[8] +DEBUG 06-24 20:37:23 [manager.py:391] +DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:213.61589431762695ms total_cost_time:213.66047859191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14010 prompt_cache_len:5151 prompt_cache_ratio:0.3676659528907923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 +DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:23 [batch.py:51] router release req id 8 +INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10875272750854492 s +INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.11067509651184082 s +DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=217076218733205888835659167576230451014, time:1750768643.58324s req_ids:[8] +DEBUG 06-24 20:37:23 [manager.py:391] +ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:212.22424507141113ms total_cost_time:212.26906776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14011 prompt_cache_len:5151 prompt_cache_ratio:0.3676397116551281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 +DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:23 [batch.py:51] router release req id 8 +INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10829305648803711 s +INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.11030030250549316 s +DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=8799838567320948521137010541018961047, time:1750768643.8045652s req_ids:[8] +DEBUG 06-24 20:37:23 [manager.py:391] +ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:399.5318412780762ms total_cost_time:399.57571029663086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14012 prompt_cache_len:5151 prompt_cache_ratio:0.36761347416500145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 +DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:24 [batch.py:51] router release req id 8 +INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.1088721752166748 s +INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.11092114448547363 s +DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=76525814007984016949614803215842856784, time:1750768644.2097976s req_ids:[8] +DEBUG 06-24 20:37:24 [manager.py:391] +ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:216.7818546295166ms total_cost_time:216.82381629943848ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14013 prompt_cache_len:5151 prompt_cache_ratio:0.3675872404196104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 +DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:24 [batch.py:51] router release req id 8 +INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.10843586921691895 s +INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.10975074768066406 s +DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=150189330082249843617744967146425009659, time:1750768644.4338036s req_ids:[8] +DEBUG 06-24 20:37:24 [manager.py:391] +ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:214.75529670715332ms total_cost_time:214.8001194000244ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14014 prompt_cache_len:5151 prompt_cache_ratio:0.36756101041815326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 +DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:24 [batch.py:51] router release req id 8 +INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.10877275466918945 s +INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.11070990562438965 s +DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=84134536554018676178937675041633655580, time:1750768644.667008s req_ids:[8] +DEBUG 06-24 20:37:24 [manager.py:391] +ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:207.17859268188477ms total_cost_time:207.2288990020752ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:14015 prompt_cache_len:5151 prompt_cache_ratio:0.36753478415982876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 +DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:24 [batch.py:51] router release req id 8 +INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.10899138450622559 s +INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.1110389232635498 s +DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=171055964866307339425703828936070774392, time:1750768644.869254s req_ids:[8] +DEBUG 06-24 20:37:24 [manager.py:391] +ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:209.00321006774902ms total_cost_time:209.04803276062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14016 prompt_cache_len:5151 prompt_cache_ratio:0.3675085616438356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 +DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:24 [batch.py:51] router release req id 8 +INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.1088266372680664 s +INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11078381538391113 s +DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=167541586733189925553104417644809541069, time:1750768645.0848935s req_ids:[8] +DEBUG 06-24 20:37:25 [manager.py:391] +ERROR 06-24 20:37:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:373.7936019897461ms total_cost_time:373.8389015197754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14017 prompt_cache_len:5151 prompt_cache_ratio:0.3674823428693729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 +DEBUG 06-24 20:37:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:25 [batch.py:51] router release req id 8 +INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.10976409912109375 s +INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11188507080078125 s +DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=185905800330692748975643394503732300705, time:1750768645.4645739s req_ids:[8] +DEBUG 06-24 20:37:25 [manager.py:391] +ERROR 06-24 20:37:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 first_token_cost:210.2224826812744ms total_cost_time:210.27326583862305ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:14018 prompt_cache_len:5151 prompt_cache_ratio:0.3674561278356399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 +DEBUG 06-24 20:37:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:25 [batch.py:51] router release req id 8 +INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.10868024826049805 s +INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11068344116210938 s +DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=61587575717285583211793449830634246327, time:1750768645.6829276s req_ids:[8] +DEBUG 06-24 20:37:25 [manager.py:391] +ERROR 06-24 20:37:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 first_token_cost:207.49545097351074ms total_cost_time:207.53860473632812ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14019 prompt_cache_len:5151 prompt_cache_ratio:0.3674299165418361 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 +DEBUG 06-24 20:37:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:25 [batch.py:51] router release req id 8 +INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.10816431045532227 s +INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11012983322143555 s +DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=102084486086390195034546348605748854347, time:1750768645.9066472s req_ids:[8] +DEBUG 06-24 20:37:25 [manager.py:391] +ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 first_token_cost:251.6634464263916ms total_cost_time:251.7070770263672ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14020 prompt_cache_len:5151 prompt_cache_ratio:0.3674037089871612 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 +DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:26 [batch.py:51] router release req id 8 +INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10864615440368652 s +INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11054801940917969 s +DEBUG 06-24 20:37:26 [manager.py:391] Prefill Batch: batch_id=27229082176471292183420565726811461205, time:1750768646.165872s req_ids:[8] +DEBUG 06-24 20:37:26 [manager.py:391] +ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:221.36163711547852ms total_cost_time:221.4052677154541ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14021 prompt_cache_len:5151 prompt_cache_ratio:0.3673775051708152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 +DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:26 [batch.py:51] router release req id 8 +INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10975885391235352 s +INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11153221130371094 s +DEBUG 06-24 20:37:26 [manager.py:391] Prefill Batch: batch_id=250318443827409529391719268268035001032, time:1750768646.3833728s req_ids:[8] +DEBUG 06-24 20:37:26 [manager.py:391] +ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:212.71848678588867ms total_cost_time:212.76402473449707ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14022 prompt_cache_len:5151 prompt_cache_ratio:0.3673513050919983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 +DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:26 [batch.py:51] router release req id 8 +INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10895156860351562 s +INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11090087890625 s +DEBUG 06-24 20:37:26 [manager.py:391] Prefill Batch: batch_id=161666834512740079112209659924765595197, time:1750768646.6020172s req_ids:[8] +DEBUG 06-24 20:37:26 [manager.py:391] +ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:391.2022113800049ms total_cost_time:391.2475109100342ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14023 prompt_cache_len:5151 prompt_cache_ratio:0.36732510874991087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 +DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:26 [batch.py:51] router release req id 8 +INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10798048973083496 s +INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11001396179199219 s +DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=58010366134870118591704892388075613685, time:1750768647.000275s req_ids:[8] +DEBUG 06-24 20:37:27 [manager.py:391] +ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:205.08551597595215ms total_cost_time:205.13010025024414ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14024 prompt_cache_len:5151 prompt_cache_ratio:0.36729891614375354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 +DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:27 [batch.py:51] router release req id 8 +INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s +INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.1098775863647461 s +DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=312172612912482241668571861294427273828, time:1750768647.2121692s req_ids:[8] +DEBUG 06-24 20:37:27 [manager.py:391] +ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:211.378812789917ms total_cost_time:211.42292022705078ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14025 prompt_cache_len:5151 prompt_cache_ratio:0.36727272727272725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 +DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:27 [batch.py:51] router release req id 8 +INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10792350769042969 s +INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.10988759994506836 s +DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=30064478119911264962120305480799129363, time:1750768647.4301913s req_ids:[8] +DEBUG 06-24 20:37:27 [manager.py:391] +ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:213.2437229156494ms total_cost_time:213.2871150970459ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14026 prompt_cache_len:5151 prompt_cache_ratio:0.3672465421360331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 +DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:27 [batch.py:51] router release req id 8 +INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s +INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s +DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=188181456495465733160266496243954849185, time:1750768647.6494856s req_ids:[8] +DEBUG 06-24 20:37:27 [manager.py:391] +ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:170.5005168914795ms total_cost_time:170.54271697998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14027 prompt_cache_len:5151 prompt_cache_ratio:0.36722036073287234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 +DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:27 [batch.py:51] router release req id 8 +INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10907483100891113 s +INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.11092758178710938 s +DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=108924619117105180840169872495899346037, time:1750768647.8259497s req_ids:[8] +DEBUG 06-24 20:37:27 [manager.py:391] +ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:201.55668258666992ms total_cost_time:201.60150527954102ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14028 prompt_cache_len:5151 prompt_cache_ratio:0.36719418306244656 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 +DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:27 [batch.py:51] router release req id 8 +INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.10797715187072754 s +INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.11006927490234375 s +DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=309045816129106047214984119830273207978, time:1750768648.034562s req_ids:[8] +DEBUG 06-24 20:37:28 [manager.py:391] +ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:380.5396556854248ms total_cost_time:380.584716796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14029 prompt_cache_len:5151 prompt_cache_ratio:0.36716800912395753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 +DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:28 [batch.py:51] router release req id 8 +INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.10827922821044922 s +INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.10956501960754395 s +DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=152844601813806767524328005983230681988, time:1750768648.421952s req_ids:[8] +DEBUG 06-24 20:37:28 [manager.py:391] +ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:215.70277214050293ms total_cost_time:215.75617790222168ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:14030 prompt_cache_len:5151 prompt_cache_ratio:0.3671418389166073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 +DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:28 [batch.py:51] router release req id 8 +INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.1079721450805664 s +INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.11005568504333496 s +DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=20573223347345260654732874067021031531, time:1750768648.64376s req_ids:[8] +DEBUG 06-24 20:37:28 [manager.py:391] +ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:214.4315242767334ms total_cost_time:214.4758701324463ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14031 prompt_cache_len:5151 prompt_cache_ratio:0.36711567243959803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 +DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:28 [batch.py:51] router release req id 8 +INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.10799527168273926 s +INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.11003708839416504 s +DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=253000760416701909763620068022718186525, time:1750768648.865531s req_ids:[8] +DEBUG 06-24 20:37:28 [manager.py:391] +ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:214.7531509399414ms total_cost_time:214.79558944702148ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14032 prompt_cache_len:5151 prompt_cache_ratio:0.3670895096921323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 +DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:28 [batch.py:51] router release req id 8 +INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.11120367050170898 s +INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.11249446868896484 s +DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=15837043767542972863930064657531014182, time:1750768649.0867321s req_ids:[8] +DEBUG 06-24 20:37:29 [manager.py:391] +ERROR 06-24 20:37:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:214.17498588562012ms total_cost_time:214.25771713256836ms,out_token_counter:1 mean_per_token_cost_time: 0.08273124694824219ms prompt_token_num:14033 prompt_cache_len:5151 prompt_cache_ratio:0.36706335067341267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 +DEBUG 06-24 20:37:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:29 [batch.py:51] router release req id 8 +INFO 06-24 20:37:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.10867667198181152 s +INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.10996866226196289 s +DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=165558327551126128419124680030533218708, time:1750768649.3105474s req_ids:[8] +DEBUG 06-24 20:37:29 [manager.py:391] +DEBUG 06-24 20:37:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 56973.601 tokens/s +DEBUG 06-24 20:37:29 [stats.py:37] Avg prompt tokens throughput: 56965.570 tokens/s +DEBUG 06-24 20:37:29 [stats.py:37] Avg generate tokens throughput: 8.031 tokens/s +ERROR 06-24 20:37:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 first_token_cost:216.51458740234375ms total_cost_time:216.55774116516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14034 prompt_cache_len:5151 prompt_cache_ratio:0.3670371953826422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 +DEBUG 06-24 20:37:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:29 [batch.py:51] router release req id 8 +INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.10903215408325195 s +INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.11103463172912598 s +DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=263680788569450330300395281002809920399, time:1750768649.5419836s req_ids:[8] +DEBUG 06-24 20:37:29 [manager.py:391] +DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 first_token_cost:389.93096351623535ms total_cost_time:389.97602462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14035 prompt_cache_len:5151 prompt_cache_ratio:0.3670110438190239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 +DEBUG 06-24 20:37:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:29 [batch.py:51] router release req id 8 +INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.1080942153930664 s +INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.11007833480834961 s +DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=91585696867291754894202228285314586261, time:1750768649.9290872s req_ids:[8] +DEBUG 06-24 20:37:29 [manager.py:391] +ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 first_token_cost:212.9838466644287ms total_cost_time:213.0293846130371ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14036 prompt_cache_len:5151 prompt_cache_ratio:0.3669848959817612 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 +DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:30 [batch.py:51] router release req id 8 +INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10773444175720215 s +INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.10978055000305176 s +DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=58840295766180191033116926610078332458, time:1750768650.1485004s req_ids:[8] +DEBUG 06-24 20:37:30 [manager.py:391] +ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:213.16957473754883ms total_cost_time:213.22989463806152ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14037 prompt_cache_len:5151 prompt_cache_ratio:0.3669587518700577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 +DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:30 [batch.py:51] router release req id 8 +INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10789966583251953 s +INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.10994791984558105 s +DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=18073608586559937040394494628310988752, time:1750768650.3678918s req_ids:[8] +DEBUG 06-24 20:37:30 [manager.py:391] +ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:210.32214164733887ms total_cost_time:210.38246154785156ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14038 prompt_cache_len:5151 prompt_cache_ratio:0.36693261148311723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 +DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:30 [batch.py:51] router release req id 8 +INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10905623435974121 s +INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.11101031303405762 s +DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=265745384421021407746865981762234487880, time:1750768650.5854254s req_ids:[8] +DEBUG 06-24 20:37:30 [manager.py:391] +ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:215.23809432983398ms total_cost_time:215.28267860412598ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14039 prompt_cache_len:5151 prompt_cache_ratio:0.3669064748201439 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 +DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:30 [batch.py:51] router release req id 8 +INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10807538032531738 s +INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.11011791229248047 s +DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=154384013037280503034654216437273156687, time:1750768650.80862s req_ids:[8] +DEBUG 06-24 20:37:30 [manager.py:391] +ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:215.4560089111328ms total_cost_time:215.5168056488037ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:14040 prompt_cache_len:5151 prompt_cache_ratio:0.36688034188034185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 +DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:31 [batch.py:51] router release req id 8 +INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.30987048149108887 s +INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.31208252906799316 s +DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=298891842423466771646813608539047303036, time:1750768651.2397535s req_ids:[8] +DEBUG 06-24 20:37:31 [manager.py:391] +DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:430.0730228424072ms total_cost_time:430.1323890686035ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:14041 prompt_cache_len:5151 prompt_cache_ratio:0.36685421266291574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 +DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:31 [batch.py:51] router release req id 8 +INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.108184814453125 s +INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.1103062629699707 s +DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=212150281567210242513205876642373809904, time:1750768651.4682481s req_ids:[8] +DEBUG 06-24 20:37:31 [manager.py:391] +ERROR 06-24 20:37:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 first_token_cost:217.03076362609863ms total_cost_time:217.09346771240234ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:14042 prompt_cache_len:5151 prompt_cache_ratio:0.3668280871670702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 +DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:31 [batch.py:51] router release req id 8 +INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.1091611385345459 s +INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.11118578910827637 s +DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=229182645959696303596835413221865792740, time:1750768651.693989s req_ids:[8] +DEBUG 06-24 20:37:31 [manager.py:391] +ERROR 06-24 20:37:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 first_token_cost:217.2396183013916ms total_cost_time:217.3013687133789ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14043 prompt_cache_len:5151 prompt_cache_ratio:0.36680196539201027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 +DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:31 [batch.py:51] router release req id 8 +INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.10823512077331543 s +INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.11018800735473633 s +DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=167858335139420522594536818592125811707, time:1750768651.917418s req_ids:[8] +DEBUG 06-24 20:37:31 [manager.py:391] +ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 first_token_cost:211.12561225891113ms total_cost_time:211.17258071899414ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14044 prompt_cache_len:5151 prompt_cache_ratio:0.366775847336941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 +DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:32 [batch.py:51] router release req id 8 +INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.10784339904785156 s +INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11023521423339844 s +DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=103721657734544847581579503931288767098, time:1750768652.136251s req_ids:[8] +DEBUG 06-24 20:37:32 [manager.py:391] +ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:217.1940803527832ms total_cost_time:217.2544002532959ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14045 prompt_cache_len:5151 prompt_cache_ratio:0.366749733001068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 +DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:32 [batch.py:51] router release req id 8 +INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.10909891128540039 s +INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11115145683288574 s +DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=219999877022901334713583608274836572005, time:1750768652.3599672s req_ids:[8] +DEBUG 06-24 20:37:32 [manager.py:391] +ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:216.01319313049316ms total_cost_time:216.07375144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:14046 prompt_cache_len:5151 prompt_cache_ratio:0.36672362238359674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 +DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:32 [batch.py:51] router release req id 8 +INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.1090090274810791 s +INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11092615127563477 s +DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=215174347424892181745493533918749329597, time:1750768652.58338s req_ids:[8] +DEBUG 06-24 20:37:32 [manager.py:391] +ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:390.06495475769043ms total_cost_time:390.12622833251953ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:14047 prompt_cache_len:5151 prompt_cache_ratio:0.3666975154837332 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 +DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:32 [batch.py:51] router release req id 8 +INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.1081700325012207 s +INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11018633842468262 s +DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=224820620053238446065934643661716282261, time:1750768652.9830985s req_ids:[8] +DEBUG 06-24 20:37:32 [manager.py:391] +ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:216.10307693481445ms total_cost_time:216.16482734680176ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14048 prompt_cache_len:5151 prompt_cache_ratio:0.3666714123006834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 +DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:33 [batch.py:51] router release req id 8 +INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.1086575984954834 s +INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.11069941520690918 s +DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=299829430647162374295537733541323288139, time:1750768653.204201s req_ids:[8] +DEBUG 06-24 20:37:33 [manager.py:391] +ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:215.07549285888672ms total_cost_time:215.13700485229492ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:14049 prompt_cache_len:5151 prompt_cache_ratio:0.36664531283365365 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 +DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:33 [batch.py:51] router release req id 8 +INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.10890817642211914 s +INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.11088037490844727 s +DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=252984688575898064861044358589249924698, time:1750768653.4267576s req_ids:[8] +DEBUG 06-24 20:37:33 [manager.py:391] +ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:209.5162868499756ms total_cost_time:209.57612991333008ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:14050 prompt_cache_len:5151 prompt_cache_ratio:0.3666192170818505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 +DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:33 [batch.py:51] router release req id 8 +INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.10853290557861328 s +INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.11063408851623535 s +DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=211078981437536084643178642018715812977, time:1750768653.643601s req_ids:[8] +DEBUG 06-24 20:37:33 [manager.py:391] +ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:213.5615348815918ms total_cost_time:213.61923217773438ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:14051 prompt_cache_len:5151 prompt_cache_ratio:0.3665931250444808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 +DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:33 [batch.py:51] router release req id 8 +INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.10774493217468262 s +INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.10901188850402832 s +DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=151261336880285299082661102908601011142, time:1750768653.8634965s req_ids:[8] +DEBUG 06-24 20:37:33 [manager.py:391] +ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:214.23864364624023ms total_cost_time:214.26010131835938ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:14052 prompt_cache_len:5151 prompt_cache_ratio:0.36656703672075147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 +DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:33 [batch.py:51] router release req id 8 +INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.10709404945373535 s +INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.10899901390075684 s +DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=144398890077258877005639262792311657641, time:1750768654.0856285s req_ids:[8] +DEBUG 06-24 20:37:34 [manager.py:391] +ERROR 06-24 20:37:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:382.34472274780273ms total_cost_time:382.3890686035156ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14053 prompt_cache_len:5151 prompt_cache_ratio:0.36654095210986976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 +DEBUG 06-24 20:37:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:34 [batch.py:51] router release req id 8 +INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.10833501815795898 s +INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.11027979850769043 s +DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=189691696680779490671692199764230633667, time:1750768654.472982s req_ids:[8] +DEBUG 06-24 20:37:34 [manager.py:391] +ERROR 06-24 20:37:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 first_token_cost:213.58370780944824ms total_cost_time:213.62662315368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14054 prompt_cache_len:5151 prompt_cache_ratio:0.3665148712110431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 +DEBUG 06-24 20:37:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:34 [batch.py:51] router release req id 8 +INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.10853743553161621 s +INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.11061787605285645 s +DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=242369822099912875063963204839230027108, time:1750768654.693744s req_ids:[8] +DEBUG 06-24 20:37:34 [manager.py:391] +ERROR 06-24 20:37:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 first_token_cost:212.53228187561035ms total_cost_time:212.57638931274414ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14055 prompt_cache_len:5151 prompt_cache_ratio:0.36648879402347917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 +DEBUG 06-24 20:37:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:34 [batch.py:51] router release req id 8 +INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.1078031063079834 s +INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.10974359512329102 s +DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=328967724320692869345656187846599931999, time:1750768654.9132788s req_ids:[8] +DEBUG 06-24 20:37:34 [manager.py:391] +ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 first_token_cost:209.37848091125488ms total_cost_time:209.42282676696777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14056 prompt_cache_len:5151 prompt_cache_ratio:0.3664627205463859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 +DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:35 [batch.py:51] router release req id 8 +INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.1088411808013916 s +INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.11035561561584473 s +DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=283473526727648433820772444719559442807, time:1750768655.1295986s req_ids:[8] +DEBUG 06-24 20:37:35 [manager.py:391] +ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:210.61420440673828ms total_cost_time:210.65926551818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14057 prompt_cache_len:5151 prompt_cache_ratio:0.3664366507789713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 +DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:35 [batch.py:51] router release req id 8 +INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.10881328582763672 s +INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.11084675788879395 s +DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=113446702042286435123236231751379005422, time:1750768655.3452084s req_ids:[8] +DEBUG 06-24 20:37:35 [manager.py:391] +ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:213.75083923339844ms total_cost_time:213.79375457763672ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14058 prompt_cache_len:5151 prompt_cache_ratio:0.36641058472044385 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 +DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:35 [batch.py:51] router release req id 8 +INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.1091616153717041 s +INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.1111752986907959 s +DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=102445438939738045243492104913284013533, time:1750768655.5668592s req_ids:[8] +DEBUG 06-24 20:37:35 [manager.py:391] +ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:373.8517761230469ms total_cost_time:373.89612197875977ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14059 prompt_cache_len:5151 prompt_cache_ratio:0.36638452237001207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 +DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:35 [batch.py:51] router release req id 8 +INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.10816597938537598 s +INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.10990715026855469 s +DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=106982580544501394707297620072146017418, time:1750768655.9459054s req_ids:[8] +DEBUG 06-24 20:37:35 [manager.py:391] +ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:214.3685817718506ms total_cost_time:214.41245079040527ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14060 prompt_cache_len:5151 prompt_cache_ratio:0.3663584637268848 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 +DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:36 [batch.py:51] router release req id 8 +INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10759758949279785 s +INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.1095268726348877 s +DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=218878316548341065638295672978092747566, time:1750768656.1767836s req_ids:[8] +DEBUG 06-24 20:37:36 [manager.py:391] +ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:225.73018074035645ms total_cost_time:225.77404975891113ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14061 prompt_cache_len:5151 prompt_cache_ratio:0.36633240879027096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 +DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:36 [batch.py:51] router release req id 8 +INFO 06-24 20:37:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s +INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.11079835891723633 s +DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=330216702452067760132481773068916742131, time:1750768656.4000735s req_ids:[8] +DEBUG 06-24 20:37:36 [manager.py:391] +ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:213.00888061523438ms total_cost_time:213.05370330810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14062 prompt_cache_len:5151 prompt_cache_ratio:0.3663063575593799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 +DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:36 [batch.py:51] router release req id 8 +INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10791444778442383 s +INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.10991287231445312 s +DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=136219401290633969462016432329402592441, time:1750768656.6195328s req_ids:[8] +DEBUG 06-24 20:37:36 [manager.py:391] +ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:216.0501480102539ms total_cost_time:216.0937786102295ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14063 prompt_cache_len:5151 prompt_cache_ratio:0.36628031003342104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 +DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:36 [batch.py:51] router release req id 8 +INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s +INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.11060190200805664 s +DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=2998648059949904495729050569848859166, time:1750768656.8408601s req_ids:[8] +DEBUG 06-24 20:37:36 [manager.py:391] +ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:379.6370029449463ms total_cost_time:379.6834945678711ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14064 prompt_cache_len:5151 prompt_cache_ratio:0.3662542662116041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 +DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:37 [batch.py:51] router release req id 8 +INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.10842728614807129 s +INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s +DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=141917982380791955525039580240268778492, time:1750768657.2277296s req_ids:[8] +DEBUG 06-24 20:37:37 [manager.py:391] +ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:211.87448501586914ms total_cost_time:211.91930770874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14065 prompt_cache_len:5151 prompt_cache_ratio:0.366228226093139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 +DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:37 [batch.py:51] router release req id 8 +INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.11095833778381348 s +INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.11294221878051758 s +DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=182644050427468272389430063389032866885, time:1750768657.4472916s req_ids:[8] +DEBUG 06-24 20:37:37 [manager.py:391] +DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:213.5324478149414ms total_cost_time:213.5758399963379ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14066 prompt_cache_len:5151 prompt_cache_ratio:0.3662021896772359 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 +DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:37 [batch.py:51] router release req id 8 +INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.10748672485351562 s +INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.10900735855102539 s +DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=16817279111529589239685364579141613467, time:1750768657.6667724s req_ids:[8] +DEBUG 06-24 20:37:37 [manager.py:391] +ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:216.1417007446289ms total_cost_time:216.1862850189209ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14067 prompt_cache_len:5151 prompt_cache_ratio:0.3661761569631051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 +DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:37 [batch.py:51] router release req id 8 +INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.10890316963195801 s +INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.11095261573791504 s +DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=149614486773128525436912602895726722827, time:1750768657.889134s req_ids:[8] +DEBUG 06-24 20:37:37 [manager.py:391] +ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:213.84382247924805ms total_cost_time:213.88959884643555ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14068 prompt_cache_len:5151 prompt_cache_ratio:0.3661501279499573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 +DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:38 [batch.py:51] router release req id 8 +INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10725092887878418 s +INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.10913681983947754 s +DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=250392279850003829453981459188382351771, time:1750768658.1099925s req_ids:[8] +DEBUG 06-24 20:37:38 [manager.py:391] +ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:169.39735412597656ms total_cost_time:169.44050788879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14069 prompt_cache_len:5151 prompt_cache_ratio:0.36612410263700335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 +DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:38 [batch.py:51] router release req id 8 +INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10725688934326172 s +INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.1091775894165039 s +DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=244366808824328074400750273891712716274, time:1750768658.288065s req_ids:[8] +DEBUG 06-24 20:37:38 [manager.py:391] +ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 first_token_cost:378.9544105529785ms total_cost_time:378.9987564086914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14070 prompt_cache_len:5151 prompt_cache_ratio:0.36609808102345415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 +DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:38 [batch.py:51] router release req id 8 +INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10813760757446289 s +INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102907657623291 s +DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=113845917801791225192625135750618769570, time:1750768658.6735034s req_ids:[8] +DEBUG 06-24 20:37:38 [manager.py:391] +ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 first_token_cost:214.98823165893555ms total_cost_time:215.03305435180664ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14071 prompt_cache_len:5151 prompt_cache_ratio:0.3660720631085211 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 +DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:38 [batch.py:51] router release req id 8 +INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s +INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.11012816429138184 s +DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=305497704499796240582864423919009478273, time:1750768658.8953543s req_ids:[8] +DEBUG 06-24 20:37:38 [manager.py:391] +ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 first_token_cost:212.97550201416016ms total_cost_time:213.02199363708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14072 prompt_cache_len:5151 prompt_cache_ratio:0.36604604889141557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 +DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:39 [batch.py:51] router release req id 8 +DEBUG 06-24 20:37:39 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:39 [manager.py:283] +DEBUG 06-24 20:37:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:39 [manager.py:284] +INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.10768890380859375 s +INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096353530883789 s +DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=162882017774484579468974464812263708926, time:1750768659.1150804s req_ids:[8] +DEBUG 06-24 20:37:39 [manager.py:391] +ERROR 06-24 20:37:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:214.5857810974121ms total_cost_time:214.6306037902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14073 prompt_cache_len:5151 prompt_cache_ratio:0.3660200383713494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 +DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:39 [batch.py:51] router release req id 8 +INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.1078329086303711 s +INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.10983800888061523 s +DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=68376915377731259713886549651838568804, time:1750768659.3354313s req_ids:[8] +DEBUG 06-24 20:37:39 [manager.py:391] +DEBUG 06-24 20:37:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 56086.355 tokens/s +DEBUG 06-24 20:37:39 [stats.py:37] Avg prompt tokens throughput: 56078.374 tokens/s +DEBUG 06-24 20:37:39 [stats.py:37] Avg generate tokens throughput: 7.980 tokens/s +ERROR 06-24 20:37:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:212.63837814331055ms total_cost_time:212.68296241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14074 prompt_cache_len:5151 prompt_cache_ratio:0.36599403154753446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 +DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:39 [batch.py:51] router release req id 8 +INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.10843634605407715 s +INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.1103980541229248 s +DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=229911249074997693788055690420662283035, time:1750768659.556083s req_ids:[8] +DEBUG 06-24 20:37:39 [manager.py:391] +ERROR 06-24 20:37:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:209.94162559509277ms total_cost_time:209.98668670654297ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14075 prompt_cache_len:5151 prompt_cache_ratio:0.36596802841918297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 +DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:39 [batch.py:51] router release req id 8 +INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.10949039459228516 s +INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.11149263381958008 s +DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=113119797095664852152736398037907575897, time:1750768659.7720912s req_ids:[8] +DEBUG 06-24 20:37:39 [manager.py:391] +ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:392.55738258361816ms total_cost_time:392.60125160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14076 prompt_cache_len:5151 prompt_cache_ratio:0.36594202898550726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 +DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:40 [batch.py:51] router release req id 8 +INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.1089942455291748 s +INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.11096954345703125 s +DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=84227695559844687687337978093228077122, time:1750768660.1708016s req_ids:[8] +DEBUG 06-24 20:37:40 [manager.py:391] +ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:209.17534828186035ms total_cost_time:209.21850204467773ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14077 prompt_cache_len:5151 prompt_cache_ratio:0.36591603324572 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 +DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:40 [batch.py:51] router release req id 8 +INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.10776257514953613 s +INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.10989236831665039 s +DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=131552831575339104317559156929430238427, time:1750768660.3881104s req_ids:[8] +DEBUG 06-24 20:37:40 [manager.py:391] +ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:216.4437770843506ms total_cost_time:216.48883819580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14078 prompt_cache_len:5151 prompt_cache_ratio:0.36589004119903396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 +DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:40 [batch.py:51] router release req id 8 +INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.10836124420166016 s +INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.1098942756652832 s +DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=189420492797132345394187389989081752103, time:1750768660.6211793s req_ids:[8] +DEBUG 06-24 20:37:40 [manager.py:391] +ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:227.51235961914062ms total_cost_time:227.55742073059082ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14079 prompt_cache_len:5151 prompt_cache_ratio:0.3658640528446623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 +DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:40 [batch.py:51] router release req id 8 +INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.10932660102844238 s +INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.11145472526550293 s +DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=59282430695376911964763600173467593028, time:1750768660.845334s req_ids:[8] +DEBUG 06-24 20:37:40 [manager.py:391] +ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:215.35348892211914ms total_cost_time:215.39688110351562ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14080 prompt_cache_len:5151 prompt_cache_ratio:0.3658380681818182 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 +DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:40 [batch.py:51] router release req id 8 +INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.10935068130493164 s +INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11067461967468262 s +DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=124814697109664966724194642157042970269, time:1750768661.067379s req_ids:[8] +DEBUG 06-24 20:37:41 [manager.py:391] +ERROR 06-24 20:37:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:212.97883987426758ms total_cost_time:213.02485466003418ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14081 prompt_cache_len:5151 prompt_cache_ratio:0.3658120872097152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 +DEBUG 06-24 20:37:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:41 [batch.py:51] router release req id 8 +INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.1087944507598877 s +INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11073994636535645 s +DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=337760848104722275786988979788191222777, time:1750768661.3002846s req_ids:[8] +DEBUG 06-24 20:37:41 [manager.py:391] +ERROR 06-24 20:37:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 first_token_cost:398.62847328186035ms total_cost_time:398.67210388183594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14082 prompt_cache_len:5151 prompt_cache_ratio:0.3657861099275671 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 +DEBUG 06-24 20:37:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:41 [batch.py:51] router release req id 8 +INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.10864114761352539 s +INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11074042320251465 s +DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=185418065051604430992802691647712516531, time:1750768661.692656s req_ids:[8] +DEBUG 06-24 20:37:41 [manager.py:391] +ERROR 06-24 20:37:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 first_token_cost:213.44280242919922ms total_cost_time:213.4873867034912ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14083 prompt_cache_len:5151 prompt_cache_ratio:0.3657601363345878 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 +DEBUG 06-24 20:37:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:41 [batch.py:51] router release req id 8 +INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.10873532295227051 s +INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11081480979919434 s +DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=203452414626859599597906036884031210168, time:1750768661.9124374s req_ids:[8] +DEBUG 06-24 20:37:41 [manager.py:391] +ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 first_token_cost:215.240478515625ms total_cost_time:215.285062789917ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14084 prompt_cache_len:5151 prompt_cache_ratio:0.36573416642999146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 +DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:42 [batch.py:51] router release req id 8 +INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.10890555381774902 s +INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.11100363731384277 s +DEBUG 06-24 20:37:42 [manager.py:391] Prefill Batch: batch_id=246796842294271271208522994024872184536, time:1750768662.1342194s req_ids:[8] +DEBUG 06-24 20:37:42 [manager.py:391] +ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:216.66884422302246ms total_cost_time:216.71080589294434ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14085 prompt_cache_len:5151 prompt_cache_ratio:0.36570820021299255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 +DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:42 [batch.py:51] router release req id 8 +INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.10833859443664551 s +INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.11036014556884766 s +DEBUG 06-24 20:37:42 [manager.py:391] Prefill Batch: batch_id=333429011087523918707569960256852928271, time:1750768662.3568814s req_ids:[8] +DEBUG 06-24 20:37:42 [manager.py:391] +ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:213.64736557006836ms total_cost_time:213.69123458862305ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14086 prompt_cache_len:5151 prompt_cache_ratio:0.3656822376828056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 +DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:42 [batch.py:51] router release req id 8 +INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.10752439498901367 s +INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.10948514938354492 s +DEBUG 06-24 20:37:42 [manager.py:391] Prefill Batch: batch_id=247033001600489871964784834586265937817, time:1750768662.5796947s req_ids:[8] +DEBUG 06-24 20:37:42 [manager.py:391] +ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:212.9817008972168ms total_cost_time:213.02390098571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14087 prompt_cache_len:5151 prompt_cache_ratio:0.36565627883864554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 +DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:42 [batch.py:51] router release req id 8 +INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.309084415435791 s +INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.3113124370574951 s +DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=74811614427725936055679548229522603947, time:1750768663.0080094s req_ids:[8] +DEBUG 06-24 20:37:43 [manager.py:391] +ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:429.5165538787842ms total_cost_time:429.5620918273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14088 prompt_cache_len:5151 prompt_cache_ratio:0.3656303236797274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 +DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:43 [batch.py:51] router release req id 8 +INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10871028900146484 s +INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.11075711250305176 s +DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=54192288800080613278339558260024413250, time:1750768663.2353637s req_ids:[8] +DEBUG 06-24 20:37:43 [manager.py:391] +ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:216.35937690734863ms total_cost_time:216.40348434448242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14089 prompt_cache_len:5151 prompt_cache_ratio:0.36560437220526654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 +DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:43 [batch.py:51] router release req id 8 +INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10855603218078613 s +INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.1105966567993164 s +DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=252810558220714260044052843348628905605, time:1750768663.458472s req_ids:[8] +DEBUG 06-24 20:37:43 [manager.py:391] +ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:215.8493995666504ms total_cost_time:215.89183807373047ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14090 prompt_cache_len:5151 prompt_cache_ratio:0.36557842441447835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 +DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:43 [batch.py:51] router release req id 8 +INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10908365249633789 s +INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.11123323440551758 s +DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=248856373528264998929590753204885788461, time:1750768663.6908002s req_ids:[8] +DEBUG 06-24 20:37:43 [manager.py:391] +ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:225.22521018981934ms total_cost_time:225.26884078979492ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14091 prompt_cache_len:5151 prompt_cache_ratio:0.36555248030657866 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 +DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:43 [batch.py:51] router release req id 8 +INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10860705375671387 s +INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.11064815521240234 s +DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=143194941673592438793976024860222384492, time:1750768663.9129052s req_ids:[8] +DEBUG 06-24 20:37:43 [manager.py:391] +ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:213.33670616149902ms total_cost_time:213.3636474609375ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:14092 prompt_cache_len:5151 prompt_cache_ratio:0.36552653988078343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 +DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:44 [batch.py:51] router release req id 8 +INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10781073570251465 s +INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.10995173454284668 s +DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=242211672724916949323982728451840504427, time:1750768664.1334672s req_ids:[8] +DEBUG 06-24 20:37:44 [manager.py:391] +ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:208.11152458190918ms total_cost_time:208.15467834472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14093 prompt_cache_len:5151 prompt_cache_ratio:0.36550060313630883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 +DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:44 [batch.py:51] router release req id 8 +INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10888028144836426 s +INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.11094880104064941 s +DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=203816478002049820646010088485853959594, time:1750768664.349486s req_ids:[8] +DEBUG 06-24 20:37:44 [manager.py:391] +ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:383.2268714904785ms total_cost_time:383.2731246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14094 prompt_cache_len:5151 prompt_cache_ratio:0.3654746700723712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 +DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:44 [batch.py:51] router release req id 8 +INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10781192779541016 s +INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.10979199409484863 s +DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=274512080196179113935073283673627139126, time:1750768664.7390273s req_ids:[8] +DEBUG 06-24 20:37:44 [manager.py:391] +ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:216.55750274658203ms total_cost_time:216.60113334655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14095 prompt_cache_len:5151 prompt_cache_ratio:0.3654487406881873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 +DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:44 [batch.py:51] router release req id 8 +INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10825824737548828 s +INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s +DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=47039910278596978717417310695713582597, time:1750768664.961557s req_ids:[8] +DEBUG 06-24 20:37:44 [manager.py:391] +ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:210.9377384185791ms total_cost_time:210.9827995300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14096 prompt_cache_len:5151 prompt_cache_ratio:0.3654228149829739 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 +DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:45 [batch.py:51] router release req id 8 +INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.10905861854553223 s +INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.11103057861328125 s +DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=53111157697098343827086054986034001277, time:1750768665.1790483s req_ids:[8] +DEBUG 06-24 20:37:45 [manager.py:391] +DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:207.09538459777832ms total_cost_time:207.14092254638672ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14097 prompt_cache_len:5151 prompt_cache_ratio:0.36539689295594807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 +DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:45 [batch.py:51] router release req id 8 +INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.10804438591003418 s +INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.10985803604125977 s +DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=139901179562605658579253878345892966092, time:1750768665.3925562s req_ids:[8] +DEBUG 06-24 20:37:45 [manager.py:391] +ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:209.33103561401367ms total_cost_time:209.37514305114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14098 prompt_cache_len:5151 prompt_cache_ratio:0.3653709746063271 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 +DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:45 [batch.py:51] router release req id 8 +INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.11006402969360352 s +INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.11215567588806152 s +DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=80359381600072279686203175309783508422, time:1750768665.6079237s req_ids:[8] +DEBUG 06-24 20:37:45 [manager.py:391] +ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:208.90021324157715ms total_cost_time:208.94336700439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14099 prompt_cache_len:5151 prompt_cache_ratio:0.3653450599333286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 +DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:45 [batch.py:51] router release req id 8 +INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.10808706283569336 s +INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.10976219177246094 s +DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=230726844957747816816986112734416442735, time:1750768665.8232913s req_ids:[8] +DEBUG 06-24 20:37:45 [manager.py:391] +ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:373.60215187072754ms total_cost_time:373.645544052124ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14100 prompt_cache_len:5151 prompt_cache_ratio:0.36531914893617023 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 +DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:46 [batch.py:51] router release req id 8 +INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10828399658203125 s +INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.11005330085754395 s +DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=287769941603116957090667671183508706576, time:1750768666.2055066s req_ids:[8] +DEBUG 06-24 20:37:46 [manager.py:391] +ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:204.04505729675293ms total_cost_time:204.09154891967773ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14101 prompt_cache_len:5151 prompt_cache_ratio:0.3652932416140699 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 +DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:46 [batch.py:51] router release req id 8 +INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10772943496704102 s +INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.1095435619354248 s +DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=229381722940246876488527189699258179691, time:1750768666.4151423s req_ids:[8] +DEBUG 06-24 20:37:46 [manager.py:391] +ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:212.16654777526855ms total_cost_time:212.21065521240234ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14102 prompt_cache_len:5151 prompt_cache_ratio:0.36526733796624594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 +DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:46 [batch.py:51] router release req id 8 +INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10854387283325195 s +INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.11043047904968262 s +DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=223863396912680091456873463470667158242, time:1750768666.6343105s req_ids:[8] +DEBUG 06-24 20:37:46 [manager.py:391] +ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:214.22672271728516ms total_cost_time:214.27106857299805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14103 prompt_cache_len:5151 prompt_cache_ratio:0.3652414379919166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 +DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:46 [batch.py:51] router release req id 8 +INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10800862312316895 s +INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.10988759994506836 s +DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=117283384639664790999510404570282636648, time:1750768666.854794s req_ids:[8] +DEBUG 06-24 20:37:46 [manager.py:391] +ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:216.64118766784668ms total_cost_time:216.68386459350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14104 prompt_cache_len:5151 prompt_cache_ratio:0.3652155416903006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 +DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:47 [batch.py:51] router release req id 8 +INFO 06-24 20:37:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10886573791503906 s +INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.11078763008117676 s +DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=130178343013043275426722332064575395975, time:1750768667.0905154s req_ids:[8] +DEBUG 06-24 20:37:47 [manager.py:391] +INFO 06-24 20:37:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:230.849027633667ms total_cost_time:230.8952808380127ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14105 prompt_cache_len:5151 prompt_cache_ratio:0.3651896490606168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 +DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:47 [batch.py:51] router release req id 8 +INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10493016242980957 s +INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.10600852966308594 s +DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=277470874748853763773436651372930561181, time:1750768667.3169117s req_ids:[8] +DEBUG 06-24 20:37:47 [manager.py:391] +ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:353.34086418151855ms total_cost_time:353.38521003723145ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14106 prompt_cache_len:5151 prompt_cache_ratio:0.36516376010208423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 +DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:47 [batch.py:51] router release req id 8 +INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10999488830566406 s +INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.11194300651550293 s +DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=136567641727363853584757833867075372296, time:1750768667.6757884s req_ids:[8] +DEBUG 06-24 20:37:47 [manager.py:391] +ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:211.13324165344238ms total_cost_time:211.17639541625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14107 prompt_cache_len:5151 prompt_cache_ratio:0.3651378748139222 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 +DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:47 [batch.py:51] router release req id 8 +INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10807943344116211 s +INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.11001920700073242 s +DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=308179057211638020981496232372326875999, time:1750768667.8930788s req_ids:[8] +DEBUG 06-24 20:37:47 [manager.py:391] +ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:209.52415466308594ms total_cost_time:209.5663547515869ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14108 prompt_cache_len:5151 prompt_cache_ratio:0.36511199319535015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 +DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:48 [batch.py:51] router release req id 8 +INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10911083221435547 s +INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.11103320121765137 s +DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=339295981134826438782733359420963148046, time:1750768668.109251s req_ids:[8] +DEBUG 06-24 20:37:48 [manager.py:391] +ERROR 06-24 20:37:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:214.43724632263184ms total_cost_time:214.48206901550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14109 prompt_cache_len:5151 prompt_cache_ratio:0.3650861152455879 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 +DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:48 [batch.py:51] router release req id 8 +INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10597705841064453 s +INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.10781741142272949 s +DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=292442417183616886443133011671262261823, time:1750768668.3297327s req_ids:[8] +DEBUG 06-24 20:37:48 [manager.py:391] +ERROR 06-24 20:37:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 first_token_cost:208.33373069763184ms total_cost_time:208.37736129760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14110 prompt_cache_len:5151 prompt_cache_ratio:0.3650602409638554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 +DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:48 [batch.py:51] router release req id 8 +INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10862207412719727 s +INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.11049151420593262 s +DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=79735219659994314233715964630787200208, time:1750768668.5456984s req_ids:[8] +DEBUG 06-24 20:37:48 [manager.py:391] +ERROR 06-24 20:37:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 first_token_cost:373.6429214477539ms total_cost_time:373.6860752105713ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14111 prompt_cache_len:5151 prompt_cache_ratio:0.3650343703493728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 +DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:48 [batch.py:51] router release req id 8 +INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10814213752746582 s +INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.11006379127502441 s +DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=240213431420703554677367038724518438057, time:1750768668.9255707s req_ids:[8] +DEBUG 06-24 20:37:48 [manager.py:391] +ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 first_token_cost:214.53428268432617ms total_cost_time:214.57815170288086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14112 prompt_cache_len:5151 prompt_cache_ratio:0.36500850340136054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 +DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:49 [batch.py:51] router release req id 8 +INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.10921573638916016 s +INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.11102104187011719 s +DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=238398836821138050728425348935243620588, time:1750768669.1471784s req_ids:[8] +DEBUG 06-24 20:37:49 [manager.py:391] +ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:215.55304527282715ms total_cost_time:215.59739112854004ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14113 prompt_cache_len:5151 prompt_cache_ratio:0.3649826401190392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 +DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:49 [batch.py:51] router release req id 8 +INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.10865092277526855 s +INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.10987401008605957 s +DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=262862227789800032836750519787356309382, time:1750768669.3695567s req_ids:[8] +DEBUG 06-24 20:37:49 [manager.py:391] +DEBUG 06-24 20:37:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 56194.656 tokens/s +DEBUG 06-24 20:37:49 [stats.py:37] Avg prompt tokens throughput: 56186.683 tokens/s +DEBUG 06-24 20:37:49 [stats.py:37] Avg generate tokens throughput: 7.973 tokens/s +ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:216.19296073913574ms total_cost_time:216.23682975769043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14114 prompt_cache_len:5151 prompt_cache_ratio:0.3649567805016296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 +DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:49 [batch.py:51] router release req id 8 +INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.10804533958435059 s +INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.11006283760070801 s +DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=94147963942124974241697432601992209333, time:1750768669.5920784s req_ids:[8] +DEBUG 06-24 20:37:49 [manager.py:391] +ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:215.67726135253906ms total_cost_time:215.7301902770996ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:14115 prompt_cache_len:5151 prompt_cache_ratio:0.3649309245483528 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 +DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:49 [batch.py:51] router release req id 8 +INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.11000943183898926 s +INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.1119680404663086 s +DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=100797310594177193735169815687532466371, time:1750768669.8144739s req_ids:[8] +DEBUG 06-24 20:37:49 [manager.py:391] +ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:210.8018398284912ms total_cost_time:210.8445167541504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14116 prompt_cache_len:5151 prompt_cache_ratio:0.36490507225843016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 +DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:49 [batch.py:51] router release req id 8 +INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.1082761287689209 s +INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.1100151538848877 s +DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=315316934739730447191890753273953614989, time:1750768670.032156s req_ids:[8] +DEBUG 06-24 20:37:50 [manager.py:391] +ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:391.50142669677734ms total_cost_time:391.54529571533203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14117 prompt_cache_len:5151 prompt_cache_ratio:0.3648792236310831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 +DEBUG 06-24 20:37:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:50 [batch.py:51] router release req id 8 +INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.1084737777709961 s +INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.1102294921875 s +DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=295762711712756298798726415581105331406, time:1750768670.4293618s req_ids:[8] +DEBUG 06-24 20:37:50 [manager.py:391] +ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:215.26813507080078ms total_cost_time:215.31057357788086ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14118 prompt_cache_len:5151 prompt_cache_ratio:0.36485337866553336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 +DEBUG 06-24 20:37:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:50 [batch.py:51] router release req id 8 +INFO 06-24 20:37:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.10895109176635742 s +INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.11087965965270996 s +DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=82564975426603147322057818733187811786, time:1750768670.654606s req_ids:[8] +DEBUG 06-24 20:37:50 [manager.py:391] +ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:215.01445770263672ms total_cost_time:215.0564193725586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14119 prompt_cache_len:5151 prompt_cache_ratio:0.36482753736100293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 +DEBUG 06-24 20:37:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:50 [batch.py:51] router release req id 8 +INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.10777139663696289 s +INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972714424133301 s +DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=253013045506979420066627614032379650035, time:1750768670.8741245s req_ids:[8] +DEBUG 06-24 20:37:50 [manager.py:391] +ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:211.72380447387695ms total_cost_time:211.76719665527344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14120 prompt_cache_len:5151 prompt_cache_ratio:0.3648016997167139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 +DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:51 [batch.py:51] router release req id 8 +INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10767221450805664 s +INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.10954809188842773 s +DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=65528844113650527310209422258492233131, time:1750768671.091431s req_ids:[8] +DEBUG 06-24 20:37:51 [manager.py:391] +ERROR 06-24 20:37:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:213.63568305969238ms total_cost_time:213.67955207824707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14121 prompt_cache_len:5151 prompt_cache_ratio:0.3647758657318887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 +DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:51 [batch.py:51] router release req id 8 +INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10762619972229004 s +INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.10953760147094727 s +DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=320410845756723611926195434998457428380, time:1750768671.315157s req_ids:[8] +DEBUG 06-24 20:37:51 [manager.py:391] +DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:37:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 first_token_cost:210.32118797302246ms total_cost_time:210.36696434020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14122 prompt_cache_len:5151 prompt_cache_ratio:0.3647500354057499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 +DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:51 [batch.py:51] router release req id 8 +INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10852718353271484 s +INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.11044692993164062 s +DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=166308035229882004696403801283784553590, time:1750768671.530374s req_ids:[8] +DEBUG 06-24 20:37:51 [manager.py:391] +ERROR 06-24 20:37:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 first_token_cost:388.4134292602539ms total_cost_time:388.4589672088623ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14123 prompt_cache_len:5151 prompt_cache_ratio:0.3647242087375204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 +DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:51 [batch.py:51] router release req id 8 +INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10909724235534668 s +INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.1110687255859375 s +DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=295544988925749502539793312492208181003, time:1750768671.9265978s req_ids:[8] +DEBUG 06-24 20:37:51 [manager.py:391] +ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 first_token_cost:218.82200241088867ms total_cost_time:218.86610984802246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14124 prompt_cache_len:5151 prompt_cache_ratio:0.3646983857264231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 +DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:52 [batch.py:51] router release req id 8 +INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.10883641242980957 s +INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.1108245849609375 s +DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=229633522933585892364510757270102188696, time:1750768672.1537292s req_ids:[8] +DEBUG 06-24 20:37:52 [manager.py:391] +ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:214.04194831848145ms total_cost_time:214.10226821899414ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14125 prompt_cache_len:5151 prompt_cache_ratio:0.36467256637168144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 +DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:52 [batch.py:51] router release req id 8 +INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.10899877548217773 s +INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.11101031303405762 s +DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=234953296761910322560994823450977767339, time:1750768672.3789554s req_ids:[8] +DEBUG 06-24 20:37:52 [manager.py:391] +ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:215.36636352539062ms total_cost_time:215.4078483581543ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14126 prompt_cache_len:5151 prompt_cache_ratio:0.3646467506725188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 +DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:52 [batch.py:51] router release req id 8 +INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.10744690895080566 s +INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.10918998718261719 s +DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=204274676720215189689253322568797504445, time:1750768672.5984855s req_ids:[8] +DEBUG 06-24 20:37:52 [manager.py:391] +ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:172.8661060333252ms total_cost_time:172.90830612182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14127 prompt_cache_len:5151 prompt_cache_ratio:0.36462093862815886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 +DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:52 [batch.py:51] router release req id 8 +INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.1073756217956543 s +INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.10922813415527344 s +DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=323899998843028512805501764946876016457, time:1750768672.781292s req_ids:[8] +DEBUG 06-24 20:37:52 [manager.py:391] +ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:214.2770290374756ms total_cost_time:214.32232856750488ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14128 prompt_cache_len:5151 prompt_cache_ratio:0.3645951302378256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 +DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:52 [batch.py:51] router release req id 8 +INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s +INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104559898376465 s +DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=285316706177696563373915860738778524347, time:1750768673.0007403s req_ids:[8] +DEBUG 06-24 20:37:53 [manager.py:391] +INFO 06-24 20:37:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:37:53 [statics_utils.py:24] mean first cost: 231.63000209449999 ms +INFO 06-24 20:37:53 [statics_utils.py:24] mean per token cost: 0.05857177495187085 ms +ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:389.6656036376953ms total_cost_time:389.7109031677246ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14129 prompt_cache_len:5151 prompt_cache_ratio:0.36456932550074317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 +DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:53 [batch.py:51] router release req id 8 +INFO 06-24 20:37:53 [manager.py:224] router recive req id 8 cost time 0.10849142074584961 s +INFO 06-24 20:37:53 [manager.py:68] detokenization recv req id 8 cost time 0.1104118824005127 s +DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=282114903336532640080952245410008783277, time:1750768673.3997617s req_ids:[8] +DEBUG 06-24 20:37:53 [manager.py:391] +ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:220.13068199157715ms total_cost_time:220.17478942871094ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14130 prompt_cache_len:5151 prompt_cache_ratio:0.36454352441613586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 +DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:53 [batch.py:51] router release req id 8 +INFO 06-24 20:37:53 [manager.py:224] router recive req id 8 cost time 0.10838794708251953 s +INFO 06-24 20:37:53 [manager.py:68] detokenization recv req id 8 cost time 0.11044049263000488 s +DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=313203907568116252158009232889714083127, time:1750768673.6249223s req_ids:[8] +DEBUG 06-24 20:37:53 [manager.py:391] +ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:214.68043327331543ms total_cost_time:214.72454071044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14131 prompt_cache_len:5151 prompt_cache_ratio:0.3645177269832284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 +DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:53 [batch.py:51] router release req id 8 +INFO 06-24 20:37:53 [manager.py:224] router recive req id 8 cost time 0.10790300369262695 s +INFO 06-24 20:37:53 [manager.py:68] detokenization recv req id 8 cost time 0.10987520217895508 s +DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=133577367349386424102381485838534862539, time:1750768673.8487456s req_ids:[8] +DEBUG 06-24 20:37:53 [manager.py:391] +ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:217.69285202026367ms total_cost_time:217.73552894592285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14132 prompt_cache_len:5151 prompt_cache_ratio:0.3644919332012454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 +DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:53 [batch.py:51] router release req id 8 +INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.10848474502563477 s +INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.11035490036010742 s +DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=209600652992552617055457788375899478500, time:1750768674.07167s req_ids:[8] +DEBUG 06-24 20:37:54 [manager.py:391] +ERROR 06-24 20:37:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:175.4894256591797ms total_cost_time:175.53043365478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14133 prompt_cache_len:5151 prompt_cache_ratio:0.364466143069412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 +DEBUG 06-24 20:37:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:54 [batch.py:51] router release req id 8 +INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.10819458961486816 s +INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.11009979248046875 s +DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=55014997780429687694394698161236732803, time:1750768674.2547007s req_ids:[8] +DEBUG 06-24 20:37:54 [manager.py:391] +ERROR 06-24 20:37:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 first_token_cost:203.57966423034668ms total_cost_time:203.62138748168945ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14134 prompt_cache_len:5151 prompt_cache_ratio:0.36444035658695345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 +DEBUG 06-24 20:37:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:54 [batch.py:51] router release req id 8 +INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.31034207344055176 s +INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.3123507499694824 s +DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=174934511072892071890292050217957885831, time:1750768674.6721196s req_ids:[8] +DEBUG 06-24 20:37:54 [manager.py:391] +ERROR 06-24 20:37:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 first_token_cost:431.8883419036865ms total_cost_time:431.9322109222412ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14135 prompt_cache_len:5151 prompt_cache_ratio:0.36441457375309516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 +DEBUG 06-24 20:37:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:54 [batch.py:51] router release req id 8 +INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.10892534255981445 s +INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.11080551147460938 s +DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=159096224103353012114937562279190845920, time:1750768674.902135s req_ids:[8] +DEBUG 06-24 20:37:54 [manager.py:391] +ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 first_token_cost:219.37131881713867ms total_cost_time:219.41494941711426ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14136 prompt_cache_len:5151 prompt_cache_ratio:0.3643887945670628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 +DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:55 [batch.py:51] router release req id 8 +INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10866713523864746 s +INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11047124862670898 s +DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=118314997167954466591630726788466876467, time:1750768675.1285143s req_ids:[8] +DEBUG 06-24 20:37:55 [manager.py:391] +ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:213.6552333831787ms total_cost_time:213.6979103088379ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14137 prompt_cache_len:5151 prompt_cache_ratio:0.3643630190280823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 +DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:55 [batch.py:51] router release req id 8 +INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10895133018493652 s +INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11081433296203613 s +DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=278139068034468975468719969973692574417, time:1750768675.3475513s req_ids:[8] +DEBUG 06-24 20:37:55 [manager.py:391] +ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:208.83941650390625ms total_cost_time:208.88543128967285ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14138 prompt_cache_len:5151 prompt_cache_ratio:0.36433724713537985 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 +DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:55 [batch.py:51] router release req id 8 +INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10850048065185547 s +INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11028766632080078 s +DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=294475022554992466051695135087563735652, time:1750768675.5638456s req_ids:[8] +DEBUG 06-24 20:37:55 [manager.py:391] +ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:215.84510803222656ms total_cost_time:215.88826179504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14139 prompt_cache_len:5151 prompt_cache_ratio:0.3643114788881816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 +DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:55 [batch.py:51] router release req id 8 +INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10891485214233398 s +INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11070728302001953 s +DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=224433974289225798482128434043267776354, time:1750768675.7860062s req_ids:[8] +DEBUG 06-24 20:37:55 [manager.py:391] +ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:214.88523483276367ms total_cost_time:214.92838859558105ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14140 prompt_cache_len:5151 prompt_cache_ratio:0.36428571428571427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 +DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:55 [batch.py:51] router release req id 8 +INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10841107368469238 s +INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.11028051376342773 s +DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=261065878541184423760802932626762974602, time:1750768676.0076208s req_ids:[8] +DEBUG 06-24 20:37:56 [manager.py:391] +ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:343.7056541442871ms total_cost_time:343.7507152557373ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14141 prompt_cache_len:5151 prompt_cache_ratio:0.36425995332720457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 +DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:56 [batch.py:51] router release req id 8 +INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10814833641052246 s +INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.11053681373596191 s +DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=188521611180382252637981197944550304484, time:1750768676.3590484s req_ids:[8] +DEBUG 06-24 20:37:56 [manager.py:391] +ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:214.54286575317383ms total_cost_time:214.5862579345703ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14142 prompt_cache_len:5151 prompt_cache_ratio:0.3642341960118795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 +DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:56 [batch.py:51] router release req id 8 +INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10810160636901855 s +INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.11054468154907227 s +DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=32965608084625148339837596242671479271, time:1750768676.5847495s req_ids:[8] +DEBUG 06-24 20:37:56 [manager.py:391] +ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:217.38529205322266ms total_cost_time:217.42820739746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14143 prompt_cache_len:5151 prompt_cache_ratio:0.36420844233896627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 +DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:56 [batch.py:51] router release req id 8 +INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10854172706604004 s +INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.1103975772857666 s +DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=10338391269055599741511502752424696063, time:1750768676.8128757s req_ids:[8] +DEBUG 06-24 20:37:56 [manager.py:391] +ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:183.9284896850586ms total_cost_time:183.97140502929688ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14144 prompt_cache_len:5151 prompt_cache_ratio:0.3641826923076923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 +DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:56 [batch.py:51] router release req id 8 +INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10795783996582031 s +INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.10995101928710938 s +DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=282989488077063676068152277172435487762, time:1750768676.9927053s req_ids:[8] +DEBUG 06-24 20:37:56 [manager.py:391] +ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:204.8056125640869ms total_cost_time:204.8499584197998ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14145 prompt_cache_len:5151 prompt_cache_ratio:0.36415694591728526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 +DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:57 [batch.py:51] router release req id 8 +INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.10774970054626465 s +INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.10986089706420898 s +DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=90387729692475370474599392164878900987, time:1750768677.2045429s req_ids:[8] +DEBUG 06-24 20:37:57 [manager.py:391] +ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:205.98697662353516ms total_cost_time:206.03227615356445ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14146 prompt_cache_len:5151 prompt_cache_ratio:0.364131203166973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 +DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:57 [batch.py:51] router release req id 8 +INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.1082310676574707 s +INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.1102759838104248 s +DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=171647041838881632968734756933956976989, time:1750768677.4151835s req_ids:[8] +DEBUG 06-24 20:37:57 [manager.py:391] +ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:365.72933197021484ms total_cost_time:365.7724857330322ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14147 prompt_cache_len:5151 prompt_cache_ratio:0.3641054640559836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 +DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:57 [batch.py:51] router release req id 8 +INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.1072835922241211 s +INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.1085057258605957 s +DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=56110613564891605873997502883598589304, time:1750768677.788164s req_ids:[8] +DEBUG 06-24 20:37:57 [manager.py:391] +ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:161.74578666687012ms total_cost_time:161.7882251739502ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14148 prompt_cache_len:5151 prompt_cache_ratio:0.36407972858354537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 +DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:57 [batch.py:51] router release req id 8 +INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.11115646362304688 s +INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.11318206787109375 s +DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=117290592537830590460795816361961572936, time:1750768677.986542s req_ids:[8] +DEBUG 06-24 20:37:57 [manager.py:391] +ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:244.70210075378418ms total_cost_time:244.74596977233887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14149 prompt_cache_len:5151 prompt_cache_ratio:0.36405399674888683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 +DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:58 [batch.py:51] router release req id 8 +INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.1094672679901123 s +INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.11153507232666016 s +DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=289189447362010395611448044743870342188, time:1750768678.2216978s req_ids:[8] +DEBUG 06-24 20:37:58 [manager.py:391] +ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:229.1569709777832ms total_cost_time:229.2029857635498ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14150 prompt_cache_len:5151 prompt_cache_ratio:0.36402826855123677 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 +DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:58 [batch.py:51] router release req id 8 +INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.10971188545227051 s +INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.11224722862243652 s +DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=6152934389490409582688667191582163704, time:1750768678.444403s req_ids:[8] +DEBUG 06-24 20:37:58 [manager.py:391] +ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:214.13898468017578ms total_cost_time:214.18166160583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14151 prompt_cache_len:5151 prompt_cache_ratio:0.36400254398982407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 +DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:58 [batch.py:51] router release req id 8 +INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s +INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.10984611511230469 s +DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=125471856090445773112530068365497658847, time:1750768678.6636877s req_ids:[8] +DEBUG 06-24 20:37:58 [manager.py:391] +ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:167.4489974975586ms total_cost_time:167.4942970275879ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14152 prompt_cache_len:5151 prompt_cache_ratio:0.3639768230638779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 +DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:58 [batch.py:51] router release req id 8 +INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.10904717445373535 s +INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.11120223999023438 s +DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=140777075439202458692657638441102399614, time:1750768678.838702s req_ids:[8] +DEBUG 06-24 20:37:58 [manager.py:391] +ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:369.0049648284912ms total_cost_time:369.0469264984131ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14153 prompt_cache_len:5151 prompt_cache_ratio:0.36395110577262774 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 +DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:59 [batch.py:51] router release req id 8 +INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.10903668403625488 s +INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.11108112335205078 s +DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=284766646536800623161959687715104232641, time:1750768679.2187178s req_ids:[8] +DEBUG 06-24 20:37:59 [manager.py:391] +ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:222.09620475769043ms total_cost_time:222.1379280090332ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14154 prompt_cache_len:5151 prompt_cache_ratio:0.3639253921153031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 +DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:59 [batch.py:51] router release req id 8 +INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.1085653305053711 s +INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.11069679260253906 s +DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=201662764345637507177807429457763657828, time:1750768679.4478264s req_ids:[8] +DEBUG 06-24 20:37:59 [manager.py:391] +DEBUG 06-24 20:37:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 57510.157 tokens/s +DEBUG 06-24 20:37:59 [stats.py:37] Avg prompt tokens throughput: 57502.021 tokens/s +DEBUG 06-24 20:37:59 [stats.py:37] Avg generate tokens throughput: 8.136 tokens/s +ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:221.76742553710938ms total_cost_time:221.80891036987305ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14155 prompt_cache_len:5151 prompt_cache_ratio:0.3638996820911339 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 +DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:59 [batch.py:51] router release req id 8 +INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.10825562477111816 s +INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101999282836914 s +DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=73777938578701911787607478562775718659, time:1750768679.674467s req_ids:[8] +DEBUG 06-24 20:37:59 [manager.py:391] +ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:217.54002571105957ms total_cost_time:217.58222579956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14156 prompt_cache_len:5151 prompt_cache_ratio:0.3638739756993501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 +DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:37:59 [batch.py:51] router release req id 8 +INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.10834407806396484 s +INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.11052274703979492 s +DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=44923281524831597534980173550914189146, time:1750768679.8969252s req_ids:[8] +DEBUG 06-24 20:37:59 [manager.py:391] +ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:211.72165870666504ms total_cost_time:211.76481246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14157 prompt_cache_len:5151 prompt_cache_ratio:0.36384827293918204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 +DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:00 [batch.py:51] router release req id 8 +INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.10882925987243652 s +INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.11090993881225586 s +DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=154083090001057811559594309426393665127, time:1750768680.1162448s req_ids:[8] +DEBUG 06-24 20:38:00 [manager.py:391] +ERROR 06-24 20:38:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:382.90929794311523ms total_cost_time:382.9517364501953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14158 prompt_cache_len:5151 prompt_cache_ratio:0.36382257380986016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 +DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:00 [batch.py:51] router release req id 8 +INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.1088705062866211 s +INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.11087703704833984 s +DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=74563966340992247815546664859325984477, time:1750768680.5058646s req_ids:[8] +DEBUG 06-24 20:38:00 [manager.py:391] +DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:216.97258949279785ms total_cost_time:217.01598167419434ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14159 prompt_cache_len:5151 prompt_cache_ratio:0.36379687831061513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 +DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:00 [batch.py:51] router release req id 8 +INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.10825824737548828 s +INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.11020326614379883 s +DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=260905753706121859762174013585322481689, time:1750768680.728447s req_ids:[8] +DEBUG 06-24 20:38:00 [manager.py:391] +ERROR 06-24 20:38:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:214.85376358032227ms total_cost_time:214.89572525024414ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14160 prompt_cache_len:5151 prompt_cache_ratio:0.36377118644067796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 +DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:00 [batch.py:51] router release req id 8 +INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.10899949073791504 s +INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.1113128662109375 s +DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=326952334590303005111523589417872851480, time:1750768680.9521742s req_ids:[8] +DEBUG 06-24 20:38:00 [manager.py:391] +ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:217.00334548950195ms total_cost_time:217.04459190368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14161 prompt_cache_len:5151 prompt_cache_ratio:0.3637454981992797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 +DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:01 [batch.py:51] router release req id 8 +INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10886549949645996 s +INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11098766326904297 s +DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=27005258496246823137157254342321436601, time:1750768681.1731443s req_ids:[8] +DEBUG 06-24 20:38:01 [manager.py:391] +INFO 06-24 20:38:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:218.16086769104004ms total_cost_time:218.2016372680664ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14162 prompt_cache_len:5151 prompt_cache_ratio:0.36371981358565175 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 +DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:01 [batch.py:51] router release req id 8 +INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10854816436767578 s +INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11025094985961914 s +DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=35458298005578539967198214179417879856, time:1750768681.399897s req_ids:[8] +DEBUG 06-24 20:38:01 [manager.py:391] +ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:173.31457138061523ms total_cost_time:173.3555793762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14163 prompt_cache_len:5151 prompt_cache_ratio:0.3636941325990256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 +DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:01 [batch.py:51] router release req id 8 +INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10837125778198242 s +INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11029553413391113 s +DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=211337410280169671186733326716796232792, time:1750768681.5804565s req_ids:[8] +DEBUG 06-24 20:38:01 [manager.py:391] +ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:400.799036026001ms total_cost_time:400.83980560302734ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14164 prompt_cache_len:5151 prompt_cache_ratio:0.36366845523863317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 +DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:01 [batch.py:51] router release req id 8 +INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10798430442810059 s +INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11058998107910156 s +DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=110995380351104975997078192507291917812, time:1750768681.9869306s req_ids:[8] +DEBUG 06-24 20:38:01 [manager.py:391] +ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:218.5213565826416ms total_cost_time:218.56355667114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14165 prompt_cache_len:5151 prompt_cache_ratio:0.3636427815037063 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 +DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:02 [batch.py:51] router release req id 8 +INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10770893096923828 s +INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.10973548889160156 s +DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=242069490161497126536762562289618329982, time:1750768682.2093859s req_ids:[8] +DEBUG 06-24 20:38:02 [manager.py:391] +ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:213.31501007080078ms total_cost_time:213.35840225219727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14166 prompt_cache_len:5151 prompt_cache_ratio:0.36361711139347735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 +DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:02 [batch.py:51] router release req id 8 +INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10867595672607422 s +INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.11080026626586914 s +DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=141027683372777210687452550197337608923, time:1750768682.4319959s req_ids:[8] +DEBUG 06-24 20:38:02 [manager.py:391] +ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:212.02802658081055ms total_cost_time:212.0687961578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14167 prompt_cache_len:5151 prompt_cache_ratio:0.36359144490717865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 +DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:02 [batch.py:51] router release req id 8 +INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10754776000976562 s +INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.10957527160644531 s +DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=51760239038390837105937099685880657354, time:1750768682.6513147s req_ids:[8] +DEBUG 06-24 20:38:02 [manager.py:391] +ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:213.74797821044922ms total_cost_time:213.7916088104248ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14168 prompt_cache_len:5151 prompt_cache_ratio:0.36356578204404294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 +DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:02 [batch.py:51] router release req id 8 +INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10795235633850098 s +INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.10959386825561523 s +DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=10949780752641403882021201027722631303, time:1750768682.8713515s req_ids:[8] +DEBUG 06-24 20:38:02 [manager.py:391] +ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:174.6044158935547ms total_cost_time:174.64780807495117ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14169 prompt_cache_len:5151 prompt_cache_ratio:0.363540122803303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 +DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:02 [batch.py:51] router release req id 8 +INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.1086127758026123 s +INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.11049795150756836 s +DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=238779521307492400694428455414152362553, time:1750768683.0539546s req_ids:[8] +DEBUG 06-24 20:38:03 [manager.py:391] +ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:382.5962543487549ms total_cost_time:382.63845443725586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14170 prompt_cache_len:5151 prompt_cache_ratio:0.36351446718419195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 +DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:03 [batch.py:51] router release req id 8 +INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.10868072509765625 s +INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.11085987091064453 s +DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=18529632351738291947530661825073091866, time:1750768683.4411855s req_ids:[8] +DEBUG 06-24 20:38:03 [manager.py:391] +ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:217.15950965881348ms total_cost_time:217.20266342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14171 prompt_cache_len:5151 prompt_cache_ratio:0.3634888151859431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 +DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:03 [batch.py:51] router release req id 8 +INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s +INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.10971617698669434 s +DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=284746067302748642760279084859379002655, time:1750768683.6650455s req_ids:[8] +DEBUG 06-24 20:38:03 [manager.py:391] +ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:213.98138999938965ms total_cost_time:214.02287483215332ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14172 prompt_cache_len:5151 prompt_cache_ratio:0.36346316680779 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 +DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:03 [batch.py:51] router release req id 8 +INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.1078042984008789 s +INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s +DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=198514015385547453701586904024344144267, time:1750768683.8885264s req_ids:[8] +DEBUG 06-24 20:38:03 [manager.py:391] +ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:181.23912811279297ms total_cost_time:181.28037452697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14173 prompt_cache_len:5151 prompt_cache_ratio:0.36343752204896634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 +DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:03 [batch.py:51] router release req id 8 +INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10780978202819824 s +INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.10933518409729004 s +DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=35313010008105956344341483678136070125, time:1750768684.072916s req_ids:[8] +DEBUG 06-24 20:38:04 [manager.py:391] +ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:179.16345596313477ms total_cost_time:179.20804023742676ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14174 prompt_cache_len:5151 prompt_cache_ratio:0.3634118809087061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 +DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:04 [batch.py:51] router release req id 8 +INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10849118232727051 s +INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.11047577857971191 s +DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=253354572824708775024856970120162485393, time:1750768684.259436s req_ids:[8] +DEBUG 06-24 20:38:04 [manager.py:391] +ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:210.2193832397461ms total_cost_time:210.26277542114258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14175 prompt_cache_len:5151 prompt_cache_ratio:0.36338624338624337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 +DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:04 [batch.py:51] router release req id 8 +INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10636591911315918 s +INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.108245849609375 s +DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=95303560593855870200568265773093014672, time:1750768684.4755044s req_ids:[8] +DEBUG 06-24 20:38:04 [manager.py:391] +ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:378.7209987640381ms total_cost_time:378.76439094543457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14176 prompt_cache_len:5151 prompt_cache_ratio:0.36336060948081267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 +DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:04 [batch.py:51] router release req id 8 +INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s +INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s +DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=191597203096087243131727601509707401763, time:1750768684.8583694s req_ids:[8] +DEBUG 06-24 20:38:04 [manager.py:391] +ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:207.45849609375ms total_cost_time:207.50164985656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14177 prompt_cache_len:5151 prompt_cache_ratio:0.3633349791916484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 +DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:04 [batch.py:51] router release req id 8 +INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.10658931732177734 s +INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.10821914672851562 s +DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=90484294903063968174748456817660525703, time:1750768685.074727s req_ids:[8] +DEBUG 06-24 20:38:05 [manager.py:391] +ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:171.81086540222168ms total_cost_time:171.85187339782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14178 prompt_cache_len:5151 prompt_cache_ratio:0.36330935251798563 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 +DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:05 [batch.py:51] router release req id 8 +INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.1088409423828125 s +INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.11090493202209473 s +DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=51381296324852724349323044681314292449, time:1750768685.251496s req_ids:[8] +DEBUG 06-24 20:38:05 [manager.py:391] +ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:208.24766159057617ms total_cost_time:208.29010009765625ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14179 prompt_cache_len:5151 prompt_cache_ratio:0.3632837294590592 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 +DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:05 [batch.py:51] router release req id 8 +INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.10897231101989746 s +INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.11093783378601074 s +DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=109009362378245755633944678269420645115, time:1750768685.4678187s req_ids:[8] +DEBUG 06-24 20:38:05 [manager.py:391] +ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:213.18435668945312ms total_cost_time:213.23060989379883ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14180 prompt_cache_len:5151 prompt_cache_ratio:0.36325811001410435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 +DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:05 [batch.py:51] router release req id 8 +INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.1091620922088623 s +INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.11108851432800293 s +DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=227513934784281168857900035045679383655, time:1750768685.6898587s req_ids:[8] +DEBUG 06-24 20:38:05 [manager.py:391] +ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:211.456298828125ms total_cost_time:211.5025520324707ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14181 prompt_cache_len:5151 prompt_cache_ratio:0.3632324941823567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 +DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:05 [batch.py:51] router release req id 8 +INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.3102278709411621 s +INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.312058687210083 s +DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=247525936455225559492817472834022954412, time:1750768686.1170447s req_ids:[8] +DEBUG 06-24 20:38:06 [manager.py:391] +ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:435.4870319366455ms total_cost_time:435.5306625366211ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14182 prompt_cache_len:5151 prompt_cache_ratio:0.36320688196305173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 +DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:06 [batch.py:51] router release req id 8 +INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.10930037498474121 s +INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.11130404472351074 s +DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=185960505053212011677462128620847809349, time:1750768686.347963s req_ids:[8] +DEBUG 06-24 20:38:06 [manager.py:391] +ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:218.8713550567627ms total_cost_time:218.9171314239502ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14183 prompt_cache_len:5151 prompt_cache_ratio:0.3631812733554255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 +DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:06 [batch.py:51] router release req id 8 +INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.10779023170471191 s +INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.10980868339538574 s +DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=129728826491704154499531793096668932516, time:1750768686.5732012s req_ids:[8] +DEBUG 06-24 20:38:06 [manager.py:391] +ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:225.14772415161133ms total_cost_time:225.19254684448242ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14184 prompt_cache_len:5151 prompt_cache_ratio:0.36315566835871405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 +DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:06 [batch.py:51] router release req id 8 +INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.10847687721252441 s +INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.11047720909118652 s +DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=199278530453715152521667602023032361682, time:1750768686.8081086s req_ids:[8] +DEBUG 06-24 20:38:06 [manager.py:391] +ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:218.02496910095215ms total_cost_time:218.08648109436035ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:14185 prompt_cache_len:5151 prompt_cache_ratio:0.36313006697215366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 +DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:06 [batch.py:51] router release req id 8 +INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.10957455635070801 s +INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11159825325012207 s +DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=197806944879467932754168708734517403834, time:1750768687.0399966s req_ids:[8] +DEBUG 06-24 20:38:07 [manager.py:391] +ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:227.036714553833ms total_cost_time:227.0808219909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14186 prompt_cache_len:5151 prompt_cache_ratio:0.36310446919498096 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 +DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:07 [batch.py:51] router release req id 8 +INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.1085350513458252 s +INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11054706573486328 s +DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=215321861954828250902320446125661295287, time:1750768687.265464s req_ids:[8] +DEBUG 06-24 20:38:07 [manager.py:391] +ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:176.91993713378906ms total_cost_time:176.96356773376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14187 prompt_cache_len:5151 prompt_cache_ratio:0.36307887502643266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 +DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:07 [batch.py:51] router release req id 8 +INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.10918903350830078 s +INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11087489128112793 s +DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=194528928910483672786366669496067664425, time:1750768687.447619s req_ids:[8] +DEBUG 06-24 20:38:07 [manager.py:391] +ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:342.63134002685547ms total_cost_time:342.67687797546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14188 prompt_cache_len:5151 prompt_cache_ratio:0.3630532844657457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 +DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:07 [batch.py:51] router release req id 8 +INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s +INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11122608184814453 s +DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=122697144990007747778809686766591126935, time:1750768687.7963295s req_ids:[8] +DEBUG 06-24 20:38:07 [manager.py:391] +ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:210.60824394226074ms total_cost_time:210.65282821655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14189 prompt_cache_len:5151 prompt_cache_ratio:0.3630276975121573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 +DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:07 [batch.py:51] router release req id 8 +INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.10903596878051758 s +INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11101126670837402 s +DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=71558937162813044842976126977428581463, time:1750768688.0146272s req_ids:[8] +DEBUG 06-24 20:38:08 [manager.py:391] +DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:217.00334548950195ms total_cost_time:217.04912185668945ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14190 prompt_cache_len:5151 prompt_cache_ratio:0.36300211416490485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 +DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:08 [batch.py:51] router release req id 8 +INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.10921788215637207 s +INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11173605918884277 s +DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=209784717074598817632273593594639108435, time:1750768688.2403324s req_ids:[8] +DEBUG 06-24 20:38:08 [manager.py:391] +ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:219.44642066955566ms total_cost_time:219.49172019958496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14191 prompt_cache_len:5151 prompt_cache_ratio:0.36297653442322597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 +DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:08 [batch.py:51] router release req id 8 +INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.1088409423828125 s +INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11101007461547852 s +DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=33232350300964080644871214953428978630, time:1750768688.4764292s req_ids:[8] +DEBUG 06-24 20:38:08 [manager.py:391] +ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:228.99127006530762ms total_cost_time:229.0365695953369ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14192 prompt_cache_len:5151 prompt_cache_ratio:0.36295095828635854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 +DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:08 [batch.py:51] router release req id 8 +INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.1088857650756836 s +INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11118650436401367 s +DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=119629326759591610755897163418605866060, time:1750768688.7011082s req_ids:[8] +DEBUG 06-24 20:38:08 [manager.py:391] +ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:219.1751003265381ms total_cost_time:219.1946506500244ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:14193 prompt_cache_len:5151 prompt_cache_ratio:0.3629253857535405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 +DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:08 [batch.py:51] router release req id 8 +INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.10694622993469238 s +INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.1090552806854248 s +DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=46972825149682052806645126772296467967, time:1750768688.9257085s req_ids:[8] +DEBUG 06-24 20:38:08 [manager.py:391] +ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:351.0925769805908ms total_cost_time:351.1366844177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14194 prompt_cache_len:5151 prompt_cache_ratio:0.3628998168240101 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 +DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:09 [batch.py:51] router release req id 8 +INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.1088263988494873 s +INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11108541488647461 s +DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=205011454682053495445724172559671150852, time:1750768689.2818682s req_ids:[8] +DEBUG 06-24 20:38:09 [manager.py:391] +ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:211.46130561828613ms total_cost_time:211.50565147399902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14195 prompt_cache_len:5151 prompt_cache_ratio:0.362874251497006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 +DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:09 [batch.py:51] router release req id 8 +INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.10794830322265625 s +INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11014413833618164 s +DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=273744508426812612274062055661189706544, time:1750768689.502176s req_ids:[8] +DEBUG 06-24 20:38:09 [manager.py:391] +DEBUG 06-24 20:38:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 57815.680 tokens/s +DEBUG 06-24 20:38:09 [stats.py:37] Avg prompt tokens throughput: 57807.524 tokens/s +DEBUG 06-24 20:38:09 [stats.py:37] Avg generate tokens throughput: 8.156 tokens/s +ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:211.59934997558594ms total_cost_time:211.64417266845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14196 prompt_cache_len:5151 prompt_cache_ratio:0.3628486897717667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 +DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:09 [batch.py:51] router release req id 8 +INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.10812568664550781 s +INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11015868186950684 s +DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=270743976773976173999166765421058661034, time:1750768689.719876s req_ids:[8] +DEBUG 06-24 20:38:09 [manager.py:391] +ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:176.17368698120117ms total_cost_time:176.21827125549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14197 prompt_cache_len:5151 prompt_cache_ratio:0.36282313164753116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 +DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:09 [batch.py:51] router release req id 8 +INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.1089942455291748 s +INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11105799674987793 s +DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=247115560862649381763609393170566129041, time:1750768689.9018357s req_ids:[8] +DEBUG 06-24 20:38:09 [manager.py:391] +ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:205.84464073181152ms total_cost_time:205.8870792388916ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14198 prompt_cache_len:5151 prompt_cache_ratio:0.36279757712353855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 +DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:10 [batch.py:51] router release req id 8 +INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10808396339416504 s +INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.1102452278137207 s +DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=16984153363977296927577372231580779847, time:1750768690.114682s req_ids:[8] +DEBUG 06-24 20:38:10 [manager.py:391] +ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:215.52777290344238ms total_cost_time:215.57283401489258ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14199 prompt_cache_len:5151 prompt_cache_ratio:0.3627720261990281 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 +DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:10 [batch.py:51] router release req id 8 +INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10829281806945801 s +INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.11023068428039551 s +DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=17017772909587950787982895234348561299, time:1750768690.337348s req_ids:[8] +DEBUG 06-24 20:38:10 [manager.py:391] +ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 first_token_cost:341.1886692047119ms total_cost_time:341.231107711792ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14200 prompt_cache_len:5151 prompt_cache_ratio:0.36274647887323946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 +DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:10 [batch.py:51] router release req id 8 +INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10882735252380371 s +INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.11094212532043457 s +DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=225981476866537441642836077793929861033, time:1750768690.6868007s req_ids:[8] +DEBUG 06-24 20:38:10 [manager.py:391] +ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 first_token_cost:214.57624435424805ms total_cost_time:214.61749076843262ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14201 prompt_cache_len:5151 prompt_cache_ratio:0.3627209351454123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 +DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:10 [batch.py:51] router release req id 8 +INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10770130157470703 s +INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.10980796813964844 s +DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=42070377922869162624690638056074600699, time:1750768690.9040008s req_ids:[8] +DEBUG 06-24 20:38:10 [manager.py:391] +ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 first_token_cost:211.29107475280762ms total_cost_time:211.3347053527832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14202 prompt_cache_len:5151 prompt_cache_ratio:0.36269539501478665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 +DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:11 [batch.py:51] router release req id 8 +INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10793948173522949 s +INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.1104288101196289 s +DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=115951837902654693427578850638982138422, time:1750768691.1255472s req_ids:[8] +DEBUG 06-24 20:38:11 [manager.py:391] +ERROR 06-24 20:38:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:214.65826034545898ms total_cost_time:214.70260620117188ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14203 prompt_cache_len:5151 prompt_cache_ratio:0.3626698584806027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 +DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:11 [batch.py:51] router release req id 8 +INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10895729064941406 s +INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.11098456382751465 s +DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=206825416090870720441436930825699844359, time:1750768691.3447828s req_ids:[8] +DEBUG 06-24 20:38:11 [manager.py:391] +ERROR 06-24 20:38:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:217.3924446105957ms total_cost_time:217.45014190673828ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:14204 prompt_cache_len:5151 prompt_cache_ratio:0.3626443255421008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 +DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:11 [batch.py:51] router release req id 8 +INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10773515701293945 s +INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.10961675643920898 s +DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=106472139598992744683958841479119667584, time:1750768691.5683894s req_ids:[8] +DEBUG 06-24 20:38:11 [manager.py:391] +ERROR 06-24 20:38:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:386.56067848205566ms total_cost_time:386.60597801208496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14205 prompt_cache_len:5151 prompt_cache_ratio:0.36261879619852166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 +DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:11 [batch.py:51] router release req id 8 +INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10941839218139648 s +INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.11181068420410156 s +DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=311271409168698693291115896412474455573, time:1750768691.9611778s req_ids:[8] +DEBUG 06-24 20:38:11 [manager.py:391] +ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:216.37749671936035ms total_cost_time:216.42112731933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14206 prompt_cache_len:5151 prompt_cache_ratio:0.362593270449106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 +DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:12 [batch.py:51] router release req id 8 +INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10801124572753906 s +INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.10973954200744629 s +DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=134637267747952409284968376204123426466, time:1750768692.18736s req_ids:[8] +DEBUG 06-24 20:38:12 [manager.py:391] +ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:176.65600776672363ms total_cost_time:176.6986846923828ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14207 prompt_cache_len:5151 prompt_cache_ratio:0.36256774829309496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 +DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:12 [batch.py:51] router release req id 8 +INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10842704772949219 s +INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.11046242713928223 s +DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=166439763074399175326444046426164749401, time:1750768692.3673694s req_ids:[8] +DEBUG 06-24 20:38:12 [manager.py:391] +ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:208.9369297027588ms total_cost_time:208.98056030273438ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14208 prompt_cache_len:5151 prompt_cache_ratio:0.3625422297297297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 +DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:12 [batch.py:51] router release req id 8 +INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10836172103881836 s +INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.11070489883422852 s +DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=312959480188493585876276955360396461605, time:1750768692.5856762s req_ids:[8] +DEBUG 06-24 20:38:12 [manager.py:391] +ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:218.9047336578369ms total_cost_time:218.949556350708ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14209 prompt_cache_len:5151 prompt_cache_ratio:0.3625167147582518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 +DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:12 [batch.py:51] router release req id 8 +INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10913968086242676 s +INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.11130189895629883 s +DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=295446008450988072544038685101500335228, time:1750768692.810723s req_ids:[8] +DEBUG 06-24 20:38:12 [manager.py:391] +ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:213.58418464660645ms total_cost_time:213.62709999084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14210 prompt_cache_len:5151 prompt_cache_ratio:0.36249120337790286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 +DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:12 [batch.py:51] router release req id 8 +INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.10776090621948242 s +INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11002540588378906 s +DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=182936624155111678582189648609360928994, time:1750768693.0288866s req_ids:[8] +DEBUG 06-24 20:38:13 [manager.py:391] +ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:386.8868350982666ms total_cost_time:386.9304656982422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14211 prompt_cache_len:5151 prompt_cache_ratio:0.36246569558792485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 +DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:13 [batch.py:51] router release req id 8 +INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.10879874229431152 s +INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11119341850280762 s +DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=176480529561905008038249585531648775072, time:1750768693.421224s req_ids:[8] +DEBUG 06-24 20:38:13 [manager.py:391] +ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:216.6297435760498ms total_cost_time:216.6738510131836ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14212 prompt_cache_len:5151 prompt_cache_ratio:0.3624401913875598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 +DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:13 [batch.py:51] router release req id 8 +INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.10827040672302246 s +INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11047816276550293 s +DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=261170535186346665435235803057198718350, time:1750768693.647196s req_ids:[8] +DEBUG 06-24 20:38:13 [manager.py:391] +ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:212.24474906921387ms total_cost_time:212.28981018066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14213 prompt_cache_len:5151 prompt_cache_ratio:0.3624146907760501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 +DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:13 [batch.py:51] router release req id 8 +INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.1090390682220459 s +INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11121153831481934 s +DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=320989574373195236641079262461239703866, time:1750768693.8656025s req_ids:[8] +DEBUG 06-24 20:38:13 [manager.py:391] +ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:210.35218238830566ms total_cost_time:210.4203701019287ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:14214 prompt_cache_len:5151 prompt_cache_ratio:0.36238919375263823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 +DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:13 [batch.py:51] router release req id 8 +INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.10898756980895996 s +INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.11090850830078125 s +DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=31018357144751687697047698270001692961, time:1750768694.082509s req_ids:[8] +DEBUG 06-24 20:38:14 [manager.py:391] +DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:215.2872085571289ms total_cost_time:215.3308391571045ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14215 prompt_cache_len:5151 prompt_cache_ratio:0.362363700316567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 +DEBUG 06-24 20:38:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:14 [batch.py:51] router release req id 8 +INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.1078026294708252 s +INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.10988831520080566 s +DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=81276281701148278909942954766081135249, time:1750768694.3046231s req_ids:[8] +DEBUG 06-24 20:38:14 [manager.py:391] +ERROR 06-24 20:38:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 first_token_cost:219.00486946105957ms total_cost_time:219.04921531677246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14216 prompt_cache_len:5151 prompt_cache_ratio:0.36233821046707937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 +DEBUG 06-24 20:38:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:14 [batch.py:51] router release req id 8 +INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.1093909740447998 s +INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.1117711067199707 s +DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=321577718599418621138164629700082446788, time:1750768694.527994s req_ids:[8] +DEBUG 06-24 20:38:14 [manager.py:391] +ERROR 06-24 20:38:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 first_token_cost:390.775203704834ms total_cost_time:390.8195495605469ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14217 prompt_cache_len:5151 prompt_cache_ratio:0.3623127242034184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 +DEBUG 06-24 20:38:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:14 [batch.py:51] router release req id 8 +INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.10907316207885742 s +INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.11120891571044922 s +DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=327659208897464653068304672886944585881, time:1750768694.9248803s req_ids:[8] +DEBUG 06-24 20:38:14 [manager.py:391] +ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 first_token_cost:216.83096885681152ms total_cost_time:216.8734073638916ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14218 prompt_cache_len:5151 prompt_cache_ratio:0.36228724152482766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 +DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:15 [batch.py:51] router release req id 8 +INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10895967483520508 s +INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11102485656738281 s +DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=101692770434708124250708479874715588181, time:1750768695.1490266s req_ids:[8] +DEBUG 06-24 20:38:15 [manager.py:391] +ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:218.11318397521973ms total_cost_time:218.1565761566162ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14219 prompt_cache_len:5151 prompt_cache_ratio:0.3622617624305507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 +DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:15 [batch.py:51] router release req id 8 +INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10835528373718262 s +INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11044931411743164 s +DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=211485066387148536847904556744189065158, time:1750768695.3734095s req_ids:[8] +DEBUG 06-24 20:38:15 [manager.py:391] +ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:217.6377773284912ms total_cost_time:217.681884765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14220 prompt_cache_len:5151 prompt_cache_ratio:0.36223628691983123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 +DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:15 [batch.py:51] router release req id 8 +INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.11105036735534668 s +INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.1127777099609375 s +DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=233384436029030430711041168363354806871, time:1750768695.5982616s req_ids:[8] +DEBUG 06-24 20:38:15 [manager.py:391] +ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:173.6428737640381ms total_cost_time:173.73323440551758ms,out_token_counter:1 mean_per_token_cost_time: 0.09036064147949219ms prompt_token_num:14221 prompt_cache_len:5151 prompt_cache_ratio:0.3622108149919134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 +DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:15 [batch.py:51] router release req id 8 +INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10799264907836914 s +INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013436317443848 s +DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=42610771945848430321528990043884177795, time:1750768695.7791848s req_ids:[8] +DEBUG 06-24 20:38:15 [manager.py:391] +ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:205.2440643310547ms total_cost_time:205.28793334960938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14222 prompt_cache_len:5151 prompt_cache_ratio:0.36218534664604135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 +DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:15 [batch.py:51] router release req id 8 +INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10832738876342773 s +INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11050915718078613 s +DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=262404354696922514969577495565223239410, time:1750768696.0016062s req_ids:[8] +DEBUG 06-24 20:38:16 [manager.py:391] +ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:391.0329341888428ms total_cost_time:391.07775688171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14223 prompt_cache_len:5151 prompt_cache_ratio:0.3621598818814596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 +DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:16 [batch.py:51] router release req id 8 +INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.10826778411865234 s +INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.11031985282897949 s +DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=25340204156296689908723468427950447326, time:1750768696.388604s req_ids:[8] +DEBUG 06-24 20:38:16 [manager.py:391] +ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:220.21961212158203ms total_cost_time:220.26300430297852ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14224 prompt_cache_len:5151 prompt_cache_ratio:0.3621344206974128 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 +DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:16 [batch.py:51] router release req id 8 +INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.10730528831481934 s +INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.10919857025146484 s +DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=109768081338025255061063554555060608606, time:1750768696.6133816s req_ids:[8] +DEBUG 06-24 20:38:16 [manager.py:391] +ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:175.02784729003906ms total_cost_time:175.07100105285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14225 prompt_cache_len:5151 prompt_cache_ratio:0.3621089630931459 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 +DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:16 [batch.py:51] router release req id 8 +INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.1083369255065918 s +INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.11050295829772949 s +DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=11523934699780223017156626184254741299, time:1750768696.7949543s req_ids:[8] +DEBUG 06-24 20:38:16 [manager.py:391] +ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:176.67150497436523ms total_cost_time:176.713228225708ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14226 prompt_cache_len:5151 prompt_cache_ratio:0.3620835090679038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 +DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:16 [batch.py:51] router release req id 8 +INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.10875916481018066 s +INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.1110374927520752 s +DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=198349858160249422477745918314036845981, time:1750768696.9774497s req_ids:[8] +DEBUG 06-24 20:38:16 [manager.py:391] +ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:208.70423316955566ms total_cost_time:208.74977111816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14227 prompt_cache_len:5151 prompt_cache_ratio:0.36205805862093204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 +DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:17 [batch.py:51] router release req id 8 +INFO 06-24 20:38:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:38:17 [manager.py:224] router recive req id 8 cost time 0.10861992835998535 s +INFO 06-24 20:38:17 [manager.py:68] detokenization recv req id 8 cost time 0.11068344116210938 s +DEBUG 06-24 20:38:17 [manager.py:391] Prefill Batch: batch_id=235489047713352713784873971036072115283, time:1750768697.1912224s req_ids:[8] +DEBUG 06-24 20:38:17 [manager.py:391] +ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:211.58075332641602ms total_cost_time:211.6250991821289ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14228 prompt_cache_len:5151 prompt_cache_ratio:0.36203261175147594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 +DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:17 [batch.py:51] router release req id 8 +INFO 06-24 20:38:17 [manager.py:224] router recive req id 8 cost time 0.3116908073425293 s +INFO 06-24 20:38:17 [manager.py:68] detokenization recv req id 8 cost time 0.3137836456298828 s +DEBUG 06-24 20:38:17 [manager.py:391] Prefill Batch: batch_id=215332593647059960225303148801074257548, time:1750768697.6199865s req_ids:[8] +DEBUG 06-24 20:38:17 [manager.py:391] +ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:431.67734146118164ms total_cost_time:431.72407150268555ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14229 prompt_cache_len:5151 prompt_cache_ratio:0.36200716845878134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 +DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:17 [batch.py:51] router release req id 8 +INFO 06-24 20:38:17 [manager.py:224] router recive req id 8 cost time 0.1080782413482666 s +INFO 06-24 20:38:17 [manager.py:68] detokenization recv req id 8 cost time 0.11011672019958496 s +DEBUG 06-24 20:38:17 [manager.py:391] Prefill Batch: batch_id=109913952028972147365714172136110248492, time:1750768697.8476179s req_ids:[8] +DEBUG 06-24 20:38:17 [manager.py:391] +ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:218.13559532165527ms total_cost_time:218.18041801452637ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14230 prompt_cache_len:5151 prompt_cache_ratio:0.36198172874209417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 +DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:17 [batch.py:51] router release req id 8 +INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.1094510555267334 s +INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.11151289939880371 s +DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=271928610050274309815041404875463624975, time:1750768698.0724802s req_ids:[8] +DEBUG 06-24 20:38:18 [manager.py:391] +ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:215.30532836914062ms total_cost_time:215.3482437133789ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14231 prompt_cache_len:5151 prompt_cache_ratio:0.36195629260066053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 +DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:18 [batch.py:51] router release req id 8 +INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10951423645019531 s +INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.11147141456604004 s +DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=335467360642515318383113717836041957658, time:1750768698.2939093s req_ids:[8] +DEBUG 06-24 20:38:18 [manager.py:391] +ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:217.9696559906006ms total_cost_time:218.01400184631348ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14232 prompt_cache_len:5151 prompt_cache_ratio:0.3619308600337268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 +DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:18 [batch.py:51] router release req id 8 +INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10829997062683105 s +INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.1102900505065918 s +DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=296156113661958927242621097115098895607, time:1750768698.5184383s req_ids:[8] +DEBUG 06-24 20:38:18 [manager.py:391] +ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:216.52507781982422ms total_cost_time:216.5682315826416ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14233 prompt_cache_len:5151 prompt_cache_ratio:0.36190543104053957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 +DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:18 [batch.py:51] router release req id 8 +INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10742068290710449 s +INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.10921001434326172 s +DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=28699766889382186567249182223262387519, time:1750768698.7475092s req_ids:[8] +DEBUG 06-24 20:38:18 [manager.py:391] +ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:184.57818031311035ms total_cost_time:184.61847305297852ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:14234 prompt_cache_len:5151 prompt_cache_ratio:0.36188000562034567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 +DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:18 [batch.py:51] router release req id 8 +INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10851931571960449 s +INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.1104736328125 s +DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=245236834680968743544569505051020927769, time:1750768698.9323914s req_ids:[8] +DEBUG 06-24 20:38:18 [manager.py:391] +ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:381.11305236816406ms total_cost_time:381.15692138671875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14235 prompt_cache_len:5151 prompt_cache_ratio:0.361854583772392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 +DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:19 [batch.py:51] router release req id 8 +INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.1077873706817627 s +INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.10984659194946289 s +DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=339204945956879581360699990316365854712, time:1750768699.3236885s req_ids:[8] +DEBUG 06-24 20:38:19 [manager.py:391] +ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:213.58060836791992ms total_cost_time:213.62709999084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14236 prompt_cache_len:5151 prompt_cache_ratio:0.3618291654959258 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 +DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:19 [batch.py:51] router release req id 8 +INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.10863161087036133 s +INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.11053919792175293 s +DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=38617402104192177323951391505928220693, time:1750768699.5411332s req_ids:[8] +DEBUG 06-24 20:38:19 [manager.py:391] +DEBUG 06-24 20:38:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 58073.104 tokens/s +DEBUG 06-24 20:38:19 [stats.py:37] Avg prompt tokens throughput: 58064.935 tokens/s +DEBUG 06-24 20:38:19 [stats.py:37] Avg generate tokens throughput: 8.168 tokens/s +ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:209.77187156677246ms total_cost_time:209.81621742248535ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14237 prompt_cache_len:5151 prompt_cache_ratio:0.36180375079019456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 +DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:19 [batch.py:51] router release req id 8 +INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.1087031364440918 s +INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.1107625961303711 s +DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=256097474818184332035803194366206232581, time:1750768699.7577147s req_ids:[8] +DEBUG 06-24 20:38:19 [manager.py:391] +ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:212.24474906921387ms total_cost_time:212.28790283203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14238 prompt_cache_len:5151 prompt_cache_ratio:0.36177833965444584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 +DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:19 [batch.py:51] router release req id 8 +INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.10895824432373047 s +INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s +DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=264652790701732437901791516455834824907, time:1750768699.9771464s req_ids:[8] +DEBUG 06-24 20:38:19 [manager.py:391] +ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:211.19427680969238ms total_cost_time:211.23790740966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14239 prompt_cache_len:5151 prompt_cache_ratio:0.3617529320879275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 +DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:20 [batch.py:51] router release req id 8 +INFO 06-24 20:38:20 [manager.py:224] router recive req id 8 cost time 0.10842418670654297 s +INFO 06-24 20:38:20 [manager.py:68] detokenization recv req id 8 cost time 0.1103372573852539 s +DEBUG 06-24 20:38:20 [manager.py:391] Prefill Batch: batch_id=108529410489234853182309024563216782096, time:1750768700.195316s req_ids:[8] +DEBUG 06-24 20:38:20 [manager.py:391] +ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:214.508056640625ms total_cost_time:214.5516872406006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14240 prompt_cache_len:5151 prompt_cache_ratio:0.3617275280898876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 +DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:20 [batch.py:51] router release req id 8 +INFO 06-24 20:38:20 [manager.py:224] router recive req id 8 cost time 0.10850095748901367 s +INFO 06-24 20:38:20 [manager.py:68] detokenization recv req id 8 cost time 0.11060714721679688 s +DEBUG 06-24 20:38:20 [manager.py:391] Prefill Batch: batch_id=326312563112837350401904037313090898498, time:1750768700.4149883s req_ids:[8] +DEBUG 06-24 20:38:20 [manager.py:391] +ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:384.77277755737305ms total_cost_time:384.81855392456055ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14241 prompt_cache_len:5151 prompt_cache_ratio:0.3617021276595745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 +DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:20 [batch.py:51] router release req id 8 +INFO 06-24 20:38:20 [manager.py:224] router recive req id 8 cost time 0.10824346542358398 s +INFO 06-24 20:38:20 [manager.py:68] detokenization recv req id 8 cost time 0.11034774780273438 s +DEBUG 06-24 20:38:20 [manager.py:391] Prefill Batch: batch_id=290064994207658388103121284618235156710, time:1750768700.807226s req_ids:[8] +DEBUG 06-24 20:38:20 [manager.py:391] +ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:210.73126792907715ms total_cost_time:210.77656745910645ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14242 prompt_cache_len:5151 prompt_cache_ratio:0.36167673079623647 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 +DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:20 [batch.py:51] router release req id 8 +INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10868382453918457 s +INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.11076045036315918 s +DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=289781884027258690810513913263231970375, time:1750768701.0239558s req_ids:[8] +DEBUG 06-24 20:38:21 [manager.py:391] +ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:215.06834030151367ms total_cost_time:215.11292457580566ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14243 prompt_cache_len:5151 prompt_cache_ratio:0.36165133749912237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 +DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:21 [batch.py:51] router release req id 8 +INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10881662368774414 s +INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.11085081100463867 s +DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=141337858218616608778451452470357542632, time:1750768701.245617s req_ids:[8] +DEBUG 06-24 20:38:21 [manager.py:391] +ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:213.77253532409668ms total_cost_time:213.81545066833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14244 prompt_cache_len:5151 prompt_cache_ratio:0.361625947767481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 +DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:21 [batch.py:51] router release req id 8 +INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10880327224731445 s +INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.1109156608581543 s +DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=66126172289353260289709221427546203582, time:1750768701.466767s req_ids:[8] +DEBUG 06-24 20:38:21 [manager.py:391] +ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:215.25883674621582ms total_cost_time:215.3007984161377ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14245 prompt_cache_len:5151 prompt_cache_ratio:0.3616005616005616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 +DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:21 [batch.py:51] router release req id 8 +INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10851764678955078 s +INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.1105194091796875 s +DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=277854481664723844360924752477950950529, time:1750768701.6866782s req_ids:[8] +DEBUG 06-24 20:38:21 [manager.py:391] +DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:216.50314331054688ms total_cost_time:216.55559539794922ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:14246 prompt_cache_len:5151 prompt_cache_ratio:0.3615751789976134 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 +DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:21 [batch.py:51] router release req id 8 +INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10894155502319336 s +INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.11093306541442871 s +DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=126858852238684743733970450856982005865, time:1750768701.912173s req_ids:[8] +DEBUG 06-24 20:38:21 [manager.py:391] +ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:380.83696365356445ms total_cost_time:380.88011741638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14247 prompt_cache_len:5151 prompt_cache_ratio:0.3615497999578859 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 +DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:22 [batch.py:51] router release req id 8 +INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10944986343383789 s +INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11165785789489746 s +DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=230478253572078936420492255557112079809, time:1750768702.2975452s req_ids:[8] +DEBUG 06-24 20:38:22 [manager.py:391] +ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:172.98626899719238ms total_cost_time:173.03037643432617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14248 prompt_cache_len:5151 prompt_cache_ratio:0.3615244244806289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 +DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:22 [batch.py:51] router release req id 8 +INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10815834999084473 s +INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11014127731323242 s +DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=26100783133078089268492048524836455998, time:1750768702.4785268s req_ids:[8] +DEBUG 06-24 20:38:22 [manager.py:391] +ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:173.16198348999023ms total_cost_time:173.20585250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14249 prompt_cache_len:5151 prompt_cache_ratio:0.3614990525650923 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 +DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:22 [batch.py:51] router release req id 8 +INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10874557495117188 s +INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089515686035156 s +DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=103445617092853238513290796476499250129, time:1750768702.6582174s req_ids:[8] +DEBUG 06-24 20:38:22 [manager.py:391] +ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:224.23148155212402ms total_cost_time:224.27773475646973ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14250 prompt_cache_len:5151 prompt_cache_ratio:0.36147368421052634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 +DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:22 [batch.py:51] router release req id 8 +INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10981178283691406 s +INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11191320419311523 s +DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=4590846683549223026026924007407810283, time:1750768702.8945384s req_ids:[8] +DEBUG 06-24 20:38:22 [manager.py:391] +ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:221.9405174255371ms total_cost_time:221.9839096069336ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14251 prompt_cache_len:5151 prompt_cache_ratio:0.36144831941618133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 +DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:23 [batch.py:51] router release req id 8 +INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10886716842651367 s +INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.11086440086364746 s +DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=37607218430241042748927753190627397547, time:1750768703.1166275s req_ids:[8] +DEBUG 06-24 20:38:23 [manager.py:391] +INFO 06-24 20:38:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:38:23 [statics_utils.py:24] mean first cost: 231.71281376109815 ms +INFO 06-24 20:38:23 [statics_utils.py:24] mean per token cost: 0.058393696401846086 ms +ERROR 06-24 20:38:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:372.70545959472656ms total_cost_time:372.74742126464844ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14252 prompt_cache_len:5151 prompt_cache_ratio:0.3614229581813079 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 +DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:23 [batch.py:51] router release req id 8 +INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10784482955932617 s +INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.10985279083251953 s +DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=92669643871483030433133368835744357579, time:1750768703.4965448s req_ids:[8] +DEBUG 06-24 20:38:23 [manager.py:391] +ERROR 06-24 20:38:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:217.27514266967773ms total_cost_time:217.3178195953369ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14253 prompt_cache_len:5151 prompt_cache_ratio:0.3613976005051568 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 +DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:23 [batch.py:51] router release req id 8 +INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s +INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.11027908325195312 s +DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=116044546114338132362674620565779797942, time:1750768703.7206903s req_ids:[8] +DEBUG 06-24 20:38:23 [manager.py:391] +ERROR 06-24 20:38:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:214.4005298614502ms total_cost_time:214.4463062286377ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14254 prompt_cache_len:5151 prompt_cache_ratio:0.36137224638697907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 +DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:23 [batch.py:51] router release req id 8 +INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10813665390014648 s +INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s +DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=269809177401614848570507801731414439343, time:1750768703.9397151s req_ids:[8] +DEBUG 06-24 20:38:23 [manager.py:391] +ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:214.97488021850586ms total_cost_time:215.01851081848145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14255 prompt_cache_len:5151 prompt_cache_ratio:0.36134689582602597 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 +DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:24 [batch.py:51] router release req id 8 +INFO 06-24 20:38:24 [manager.py:224] router recive req id 8 cost time 0.10752058029174805 s +INFO 06-24 20:38:24 [manager.py:68] detokenization recv req id 8 cost time 0.10942697525024414 s +DEBUG 06-24 20:38:24 [manager.py:391] Prefill Batch: batch_id=194555102607942816495725954600490454894, time:1750768704.1654363s req_ids:[8] +DEBUG 06-24 20:38:24 [manager.py:391] +ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:218.68610382080078ms total_cost_time:218.72949600219727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14256 prompt_cache_len:5151 prompt_cache_ratio:0.3613215488215488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 +DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:24 [batch.py:51] router release req id 8 +INFO 06-24 20:38:24 [manager.py:224] router recive req id 8 cost time 0.10905623435974121 s +INFO 06-24 20:38:24 [manager.py:68] detokenization recv req id 8 cost time 0.11095643043518066 s +DEBUG 06-24 20:38:24 [manager.py:391] Prefill Batch: batch_id=247994700793165251051711531568661037169, time:1750768704.387687s req_ids:[8] +DEBUG 06-24 20:38:24 [manager.py:391] +ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:210.8604907989502ms total_cost_time:210.9072208404541ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14257 prompt_cache_len:5151 prompt_cache_ratio:0.36129620537279933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 +DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:24 [batch.py:51] router release req id 8 +INFO 06-24 20:38:24 [manager.py:224] router recive req id 8 cost time 0.10790657997131348 s +INFO 06-24 20:38:24 [manager.py:68] detokenization recv req id 8 cost time 0.10964632034301758 s +DEBUG 06-24 20:38:24 [manager.py:391] Prefill Batch: batch_id=24603621795164591053890511868000210579, time:1750768704.6046593s req_ids:[8] +DEBUG 06-24 20:38:24 [manager.py:391] +ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:396.0280418395996ms total_cost_time:396.0742950439453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14258 prompt_cache_len:5151 prompt_cache_ratio:0.3612708654790293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 +DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:24 [batch.py:51] router release req id 8 +INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.1086738109588623 s +INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.11078739166259766 s +DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=155189836786531218277268209659706461832, time:1750768705.0076299s req_ids:[8] +DEBUG 06-24 20:38:25 [manager.py:391] +ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:219.1638946533203ms total_cost_time:219.2087173461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14259 prompt_cache_len:5151 prompt_cache_ratio:0.3612455291394909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 +DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:25 [batch.py:51] router release req id 8 +INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.10912823677062988 s +INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.1111140251159668 s +DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=35466116700310860177728498719819713323, time:1750768705.2336066s req_ids:[8] +DEBUG 06-24 20:38:25 [manager.py:391] +ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:215.57283401489258ms total_cost_time:215.61717987060547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14260 prompt_cache_len:5151 prompt_cache_ratio:0.36122019635343616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 +DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:25 [batch.py:51] router release req id 8 +INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.10834026336669922 s +INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.11022329330444336 s +DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=45711128342854309500103785881196096299, time:1750768705.4570441s req_ids:[8] +DEBUG 06-24 20:38:25 [manager.py:391] +ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:217.73505210876465ms total_cost_time:217.77820587158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14261 prompt_cache_len:5151 prompt_cache_ratio:0.3611948671201178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 +DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:25 [batch.py:51] router release req id 8 +INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.1098330020904541 s +INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.11208367347717285 s +DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=56983335388868975051469468574466608649, time:1750768705.680117s req_ids:[8] +DEBUG 06-24 20:38:25 [manager.py:391] +ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:213.70744705200195ms total_cost_time:213.75012397766113ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14262 prompt_cache_len:5151 prompt_cache_ratio:0.3611695414387884 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 +DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:25 [batch.py:51] router release req id 8 +INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.1081857681274414 s +INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.1101083755493164 s +DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=152675621127161317095742100715856796364, time:1750768705.9025884s req_ids:[8] +DEBUG 06-24 20:38:25 [manager.py:391] +ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:211.9448184967041ms total_cost_time:211.9908332824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14263 prompt_cache_len:5151 prompt_cache_ratio:0.36114421930870083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 +DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:26 [batch.py:51] router release req id 8 +INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10933208465576172 s +INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.11050295829772949 s +DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=112284622224081763664030157942405696724, time:1750768706.1272902s req_ids:[8] +DEBUG 06-24 20:38:26 [manager.py:391] +ERROR 06-24 20:38:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:403.594970703125ms total_cost_time:403.6386013031006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14264 prompt_cache_len:5151 prompt_cache_ratio:0.36111890072910824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 +DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:26 [batch.py:51] router release req id 8 +INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10861468315124512 s +INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.10976266860961914 s +DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=216709563430167644707302198382835305831, time:1750768706.5282092s req_ids:[8] +DEBUG 06-24 20:38:26 [manager.py:391] +ERROR 06-24 20:38:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:212.1129035949707ms total_cost_time:212.1570110321045ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14265 prompt_cache_len:5151 prompt_cache_ratio:0.3610935856992639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 +DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:26 [batch.py:51] router release req id 8 +INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10823917388916016 s +INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.1094522476196289 s +DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=107251540734002584243757528676762937010, time:1750768706.746921s req_ids:[8] +DEBUG 06-24 20:38:26 [manager.py:391] +ERROR 06-24 20:38:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:214.02573585510254ms total_cost_time:214.06984329223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14266 prompt_cache_len:5151 prompt_cache_ratio:0.3610682742184214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 +DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:26 [batch.py:51] router release req id 8 +INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10752058029174805 s +INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.10880208015441895 s +DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=161721564497271796386651570704644392668, time:1750768706.967214s req_ids:[8] +DEBUG 06-24 20:38:26 [manager.py:391] +ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:214.12110328674316ms total_cost_time:214.16568756103516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14267 prompt_cache_len:5151 prompt_cache_ratio:0.36104296628583443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 +DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:27 [batch.py:51] router release req id 8 +INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10801482200622559 s +INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10915827751159668 s +DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=275579050197608394297299400123904209743, time:1750768707.1871688s req_ids:[8] +DEBUG 06-24 20:38:27 [manager.py:391] +ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:174.6532917022705ms total_cost_time:174.6971607208252ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14268 prompt_cache_len:5151 prompt_cache_ratio:0.36101766190075696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 +DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:27 [batch.py:51] router release req id 8 +INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10784435272216797 s +INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10907626152038574 s +DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=322931956293872787454538929591404745921, time:1750768707.368545s req_ids:[8] +DEBUG 06-24 20:38:27 [manager.py:391] +ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:209.7318172454834ms total_cost_time:209.77401733398438ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14269 prompt_cache_len:5151 prompt_cache_ratio:0.36099236106244303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 +DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:27 [batch.py:51] router release req id 8 +INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10804176330566406 s +INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10916543006896973 s +DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=87699627785215763541076044289396657441, time:1750768707.5842388s req_ids:[8] +DEBUG 06-24 20:38:27 [manager.py:391] +ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:380.3107738494873ms total_cost_time:380.3548812866211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14270 prompt_cache_len:5151 prompt_cache_ratio:0.36096706377014715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 +DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:27 [batch.py:51] router release req id 8 +INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10809850692749023 s +INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10928463935852051 s +DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=157341962118228195590479086896174324799, time:1750768707.9725165s req_ids:[8] +DEBUG 06-24 20:38:27 [manager.py:391] +DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:220.45302391052246ms total_cost_time:220.49808502197266ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14271 prompt_cache_len:5151 prompt_cache_ratio:0.36094177002312383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 +DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:28 [batch.py:51] router release req id 8 +INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.10949993133544922 s +INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.11073851585388184 s +DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=324873762278114305504208102717214895293, time:1750768708.2003026s req_ids:[8] +DEBUG 06-24 20:38:28 [manager.py:391] +ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:217.65947341918945ms total_cost_time:217.70524978637695ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14272 prompt_cache_len:5151 prompt_cache_ratio:0.3609164798206278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 +DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:28 [batch.py:51] router release req id 8 +INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.10936522483825684 s +INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s +DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=191727752977890756539975842098113679417, time:1750768708.425195s req_ids:[8] +DEBUG 06-24 20:38:28 [manager.py:391] +ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:214.97082710266113ms total_cost_time:215.01421928405762ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14273 prompt_cache_len:5151 prompt_cache_ratio:0.3608911931619141 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 +DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:28 [batch.py:51] router release req id 8 +INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.1102759838104248 s +INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.11140775680541992 s +DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=183910624031422051294651887010596331794, time:1750768708.646223s req_ids:[8] +DEBUG 06-24 20:38:28 [manager.py:391] +ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:218.31727027893066ms total_cost_time:218.36256980895996ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14274 prompt_cache_len:5151 prompt_cache_ratio:0.3608659100462379 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 +DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:28 [batch.py:51] router release req id 8 +INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.10822892189025879 s +INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s +DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=4988325219455916826580241057238871320, time:1750768708.8695588s req_ids:[8] +DEBUG 06-24 20:38:28 [manager.py:391] +ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:211.8515968322754ms total_cost_time:211.89475059509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14275 prompt_cache_len:5151 prompt_cache_ratio:0.36084063047285464 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 +DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:29 [batch.py:51] router release req id 8 +INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.3114337921142578 s +INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.31279802322387695 s +DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=67142791448226603804232932078153316326, time:1750768709.295927s req_ids:[8] +DEBUG 06-24 20:38:29 [manager.py:391] +ERROR 06-24 20:38:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:432.2316646575928ms total_cost_time:432.2776794433594ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14276 prompt_cache_len:5151 prompt_cache_ratio:0.3608153544410199 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 +DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:29 [batch.py:51] router release req id 8 +INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.10806512832641602 s +INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.10928034782409668 s +DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=175140417026085694421881953622238537700, time:1750768709.5287404s req_ids:[8] +DEBUG 06-24 20:38:29 [manager.py:391] +DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:38:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 56591.656 tokens/s +DEBUG 06-24 20:38:29 [stats.py:37] Avg prompt tokens throughput: 56583.619 tokens/s +DEBUG 06-24 20:38:29 [stats.py:37] Avg generate tokens throughput: 8.037 tokens/s +INFO 06-24 20:38:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 first_token_cost:210.80541610717773ms total_cost_time:210.85214614868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14277 prompt_cache_len:5151 prompt_cache_ratio:0.3607900819499895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 +DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:29 [batch.py:51] router release req id 8 +INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.108367919921875 s +INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.10947775840759277 s +DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=34754242359685825961324259708736057417, time:1750768709.7451184s req_ids:[8] +DEBUG 06-24 20:38:29 [manager.py:391] +ERROR 06-24 20:38:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 first_token_cost:216.62187576293945ms total_cost_time:216.66526794433594ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14278 prompt_cache_len:5151 prompt_cache_ratio:0.36076481299901947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 +DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:29 [batch.py:51] router release req id 8 +INFO 06-24 20:38:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.10810661315917969 s +INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.10940933227539062 s +DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=300448987994262465438565765828240742488, time:1750768709.9672012s req_ids:[8] +DEBUG 06-24 20:38:29 [manager.py:391] +ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 first_token_cost:214.07604217529297ms total_cost_time:214.11991119384766ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14279 prompt_cache_len:5151 prompt_cache_ratio:0.36073954758736604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 +DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:30 [batch.py:51] router release req id 8 +INFO 06-24 20:38:30 [manager.py:224] router recive req id 8 cost time 0.10917043685913086 s +INFO 06-24 20:38:30 [manager.py:68] detokenization recv req id 8 cost time 0.1102442741394043 s +DEBUG 06-24 20:38:30 [manager.py:391] Prefill Batch: batch_id=310330459520562265125238172196327934832, time:1750768710.1854455s req_ids:[8] +DEBUG 06-24 20:38:30 [manager.py:391] +ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:215.08336067199707ms total_cost_time:215.12675285339355ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14280 prompt_cache_len:5151 prompt_cache_ratio:0.3607142857142857 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 +DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:30 [batch.py:51] router release req id 8 +INFO 06-24 20:38:30 [manager.py:224] router recive req id 8 cost time 0.11122584342956543 s +INFO 06-24 20:38:30 [manager.py:68] detokenization recv req id 8 cost time 0.1124427318572998 s +DEBUG 06-24 20:38:30 [manager.py:391] Prefill Batch: batch_id=40110720079963559767079849327104904293, time:1750768710.4106061s req_ids:[8] +DEBUG 06-24 20:38:30 [manager.py:391] +ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:375.704288482666ms total_cost_time:375.7483959197998ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14281 prompt_cache_len:5151 prompt_cache_ratio:0.3606890273790351 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 +DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:30 [batch.py:51] router release req id 8 +INFO 06-24 20:38:30 [manager.py:224] router recive req id 8 cost time 0.1083669662475586 s +INFO 06-24 20:38:30 [manager.py:68] detokenization recv req id 8 cost time 0.10951066017150879 s +DEBUG 06-24 20:38:30 [manager.py:391] Prefill Batch: batch_id=253655184368831574883322889132720151089, time:1750768710.7917306s req_ids:[8] +DEBUG 06-24 20:38:30 [manager.py:391] +ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:218.72663497924805ms total_cost_time:218.78886222839355ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:14282 prompt_cache_len:5151 prompt_cache_ratio:0.360663772580871 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 +DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:30 [batch.py:51] router release req id 8 +INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.10822248458862305 s +INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.1093752384185791 s +DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=300035503163366359119889520728700999557, time:1750768711.012669s req_ids:[8] +DEBUG 06-24 20:38:31 [manager.py:391] +ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:199.4013786315918ms total_cost_time:199.44429397583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14283 prompt_cache_len:5151 prompt_cache_ratio:0.3606385213190506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 +DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:31 [batch.py:51] router release req id 8 +INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.10840773582458496 s +INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.10951876640319824 s +DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=324144855044313137112862026666618971329, time:1750768711.2216167s req_ids:[8] +DEBUG 06-24 20:38:31 [manager.py:391] +ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:209.6273899078369ms total_cost_time:209.6717357635498ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14284 prompt_cache_len:5151 prompt_cache_ratio:0.36061327359283113 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 +DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:31 [batch.py:51] router release req id 8 +INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.1079256534576416 s +INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.10900187492370605 s +DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=239041606207801968215534728356550466793, time:1750768711.437036s req_ids:[8] +DEBUG 06-24 20:38:31 [manager.py:391] +ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:214.6625518798828ms total_cost_time:214.705228805542ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14285 prompt_cache_len:5151 prompt_cache_ratio:0.36058802940147006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 +DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:31 [batch.py:51] router release req id 8 +INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.10941576957702637 s +INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.1103825569152832 s +DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=181353613833834285558959543886686094636, time:1750768711.6585305s req_ids:[8] +DEBUG 06-24 20:38:31 [manager.py:391] +ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:389.76001739501953ms total_cost_time:389.8048400878906ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14286 prompt_cache_len:5151 prompt_cache_ratio:0.3605627887442251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 +DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:31 [batch.py:51] router release req id 8 +INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s +INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.11041641235351562 s +DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=309688211521869080631086722958418318453, time:1750768712.0569487s req_ids:[8] +DEBUG 06-24 20:38:32 [manager.py:391] +ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:211.6374969482422ms total_cost_time:211.70282363891602ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:14287 prompt_cache_len:5151 prompt_cache_ratio:0.36053755162035417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 +DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:32 [batch.py:51] router release req id 8 +INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10897278785705566 s +INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.10994958877563477 s +DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=309401336277837939586333489106531148992, time:1750768712.2734628s req_ids:[8] +DEBUG 06-24 20:38:32 [manager.py:391] +ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:209.02371406555176ms total_cost_time:209.06734466552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14288 prompt_cache_len:5151 prompt_cache_ratio:0.3605123180291153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 +DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:32 [batch.py:51] router release req id 8 +INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10788369178771973 s +INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.10891580581665039 s +DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=31658981941704406071314646806836122281, time:1750768712.4873397s req_ids:[8] +DEBUG 06-24 20:38:32 [manager.py:391] +ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:208.4660530090332ms total_cost_time:208.50896835327148ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14289 prompt_cache_len:5151 prompt_cache_ratio:0.360487087969767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 +DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:32 [batch.py:51] router release req id 8 +INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s +INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.11029791831970215 s +DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=219111016566369660719693670618227443196, time:1750768712.7037668s req_ids:[8] +DEBUG 06-24 20:38:32 [manager.py:391] +ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:211.66348457336426ms total_cost_time:211.70735359191895ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14290 prompt_cache_len:5151 prompt_cache_ratio:0.36046186144156755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 +DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:32 [batch.py:51] router release req id 8 +INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.1091306209564209 s +INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.11019062995910645 s +DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=49863428173590249347122873350681298774, time:1750768712.9218888s req_ids:[8] +DEBUG 06-24 20:38:32 [manager.py:391] +ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:214.41268920898438ms total_cost_time:214.45703506469727ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14291 prompt_cache_len:5151 prompt_cache_ratio:0.3604366384437758 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 +DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:33 [batch.py:51] router release req id 8 +INFO 06-24 20:38:33 [manager.py:224] router recive req id 8 cost time 0.10806941986083984 s +INFO 06-24 20:38:33 [manager.py:68] detokenization recv req id 8 cost time 0.10908055305480957 s +DEBUG 06-24 20:38:33 [manager.py:391] Prefill Batch: batch_id=230975982761298293206945226499347334751, time:1750768713.1429605s req_ids:[8] +DEBUG 06-24 20:38:33 [manager.py:391] +ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:214.41173553466797ms total_cost_time:214.45560455322266ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14292 prompt_cache_len:5151 prompt_cache_ratio:0.3604114189756507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 +DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:33 [batch.py:51] router release req id 8 +INFO 06-24 20:38:33 [manager.py:224] router recive req id 8 cost time 0.3129589557647705 s +INFO 06-24 20:38:33 [manager.py:68] detokenization recv req id 8 cost time 0.31403517723083496 s +DEBUG 06-24 20:38:33 [manager.py:391] Prefill Batch: batch_id=227229248147400051197510052849323221991, time:1750768713.570737s req_ids:[8] +DEBUG 06-24 20:38:33 [manager.py:391] +ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:420.8519458770752ms total_cost_time:420.8977222442627ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14293 prompt_cache_len:5151 prompt_cache_ratio:0.3603862030364514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 +DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:33 [batch.py:51] router release req id 8 +INFO 06-24 20:38:33 [manager.py:224] router recive req id 8 cost time 0.10899615287780762 s +INFO 06-24 20:38:33 [manager.py:68] detokenization recv req id 8 cost time 0.1100611686706543 s +DEBUG 06-24 20:38:33 [manager.py:391] Prefill Batch: batch_id=198740624024640700304364276331600200328, time:1750768713.7922153s req_ids:[8] +DEBUG 06-24 20:38:33 [manager.py:391] +ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:217.24343299865723ms total_cost_time:217.28777885437012ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14294 prompt_cache_len:5151 prompt_cache_ratio:0.36036099062543725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 +DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:33 [batch.py:51] router release req id 8 +INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.10907673835754395 s +INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.11010622978210449 s +DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=308849611877324709313044845432754192573, time:1750768714.0164826s req_ids:[8] +DEBUG 06-24 20:38:34 [manager.py:391] +ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:216.5999412536621ms total_cost_time:216.66264533996582ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:14295 prompt_cache_len:5151 prompt_cache_ratio:0.3603357817418678 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 +DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:34 [batch.py:51] router release req id 8 +INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.10914731025695801 s +INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.11021780967712402 s +DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=7298113297532464827488932196760374987, time:1750768714.2403963s req_ids:[8] +DEBUG 06-24 20:38:34 [manager.py:391] +ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:215.7299518585205ms total_cost_time:215.7726287841797ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14296 prompt_cache_len:5151 prompt_cache_ratio:0.3603105763850028 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 +DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:34 [batch.py:51] router release req id 8 +INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.1101541519165039 s +INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.11122703552246094 s +DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=179995775319010562555444887567852153153, time:1750768714.4633112s req_ids:[8] +DEBUG 06-24 20:38:34 [manager.py:391] +ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:214.10846710205078ms total_cost_time:214.15162086486816ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14297 prompt_cache_len:5151 prompt_cache_ratio:0.36028537455410226 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 +DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:34 [batch.py:51] router release req id 8 +INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.10875439643859863 s +INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.10984134674072266 s +DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=12361914905767672978511222016360947709, time:1750768714.6851068s req_ids:[8] +DEBUG 06-24 20:38:34 [manager.py:391] +ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:393.5692310333252ms total_cost_time:393.6131000518799ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14298 prompt_cache_len:5151 prompt_cache_ratio:0.3602601762484264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 +DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:34 [batch.py:51] router release req id 8 +INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10855221748352051 s +INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.10959458351135254 s +DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=36879388784814690014910353589929816195, time:1750768715.082922s req_ids:[8] +DEBUG 06-24 20:38:35 [manager.py:391] +ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:218.28532218933105ms total_cost_time:218.34087371826172ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:14299 prompt_cache_len:5151 prompt_cache_ratio:0.3602349814672355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 +DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:35 [batch.py:51] router release req id 8 +INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10866689682006836 s +INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.10971522331237793 s +DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=336465445534868957057072612396440288369, time:1750768715.3080647s req_ids:[8] +DEBUG 06-24 20:38:35 [manager.py:391] +ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:217.6058292388916ms total_cost_time:217.6499366760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14300 prompt_cache_len:5151 prompt_cache_ratio:0.3602097902097902 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 +DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:35 [batch.py:51] router release req id 8 +INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.1085505485534668 s +INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s +DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=35839447325684058841472605588877985357, time:1750768715.5309417s req_ids:[8] +DEBUG 06-24 20:38:35 [manager.py:391] +ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:218.49703788757324ms total_cost_time:218.54186058044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14301 prompt_cache_len:5151 prompt_cache_ratio:0.3601846024753514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 +DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:35 [batch.py:51] router release req id 8 +INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10947799682617188 s +INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.11055231094360352 s +DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=39799943014911740192810342187867965428, time:1750768715.7557387s req_ids:[8] +DEBUG 06-24 20:38:35 [manager.py:391] +ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:213.4249210357666ms total_cost_time:213.4687900543213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14302 prompt_cache_len:5151 prompt_cache_ratio:0.36015941826318 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 +DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:35 [batch.py:51] router release req id 8 +INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10860371589660645 s +INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.10959219932556152 s +DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=27625006614332878305967706236117741305, time:1750768715.979675s req_ids:[8] +DEBUG 06-24 20:38:35 [manager.py:391] +ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:216.233491897583ms total_cost_time:216.2764072418213ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14303 prompt_cache_len:5151 prompt_cache_ratio:0.3601342375725372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 +DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:36 [batch.py:51] router release req id 8 +INFO 06-24 20:38:36 [manager.py:224] router recive req id 8 cost time 0.10930061340332031 s +INFO 06-24 20:38:36 [manager.py:68] detokenization recv req id 8 cost time 0.11041665077209473 s +DEBUG 06-24 20:38:36 [manager.py:391] Prefill Batch: batch_id=273084326860973483868345642432186432935, time:1750768716.1987016s req_ids:[8] +DEBUG 06-24 20:38:36 [manager.py:391] +ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:414.10279273986816ms total_cost_time:414.14809226989746ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14304 prompt_cache_len:5151 prompt_cache_ratio:0.3601090604026846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 +DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:36 [batch.py:51] router release req id 8 +INFO 06-24 20:38:36 [manager.py:224] router recive req id 8 cost time 0.10906744003295898 s +INFO 06-24 20:38:36 [manager.py:68] detokenization recv req id 8 cost time 0.11020445823669434 s +DEBUG 06-24 20:38:36 [manager.py:391] Prefill Batch: batch_id=71303769000114458801445844147046321594, time:1750768716.62154s req_ids:[8] +DEBUG 06-24 20:38:36 [manager.py:391] +ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:214.60890769958496ms total_cost_time:214.65349197387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14305 prompt_cache_len:5151 prompt_cache_ratio:0.3600838867528836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 +DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:36 [batch.py:51] router release req id 8 +INFO 06-24 20:38:36 [manager.py:224] router recive req id 8 cost time 0.10835433006286621 s +INFO 06-24 20:38:36 [manager.py:68] detokenization recv req id 8 cost time 0.1094822883605957 s +DEBUG 06-24 20:38:36 [manager.py:391] Prefill Batch: batch_id=19304856825515355643406279628975286621, time:1750768716.841561s req_ids:[8] +DEBUG 06-24 20:38:36 [manager.py:391] +ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:215.19875526428223ms total_cost_time:215.2426242828369ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14306 prompt_cache_len:5151 prompt_cache_ratio:0.3600587166223962 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 +DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:36 [batch.py:51] router release req id 8 +INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.10806012153625488 s +INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.10912823677062988 s +DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=22267125484041152242923526200289025931, time:1750768717.0650477s req_ids:[8] +DEBUG 06-24 20:38:37 [manager.py:391] +ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:215.39068222045898ms total_cost_time:215.43431282043457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14307 prompt_cache_len:5151 prompt_cache_ratio:0.36003355001048437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 +DEBUG 06-24 20:38:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:37 [batch.py:51] router release req id 8 +INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.10772204399108887 s +INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.10873198509216309 s +DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=337614331400476825843764712804249121711, time:1750768717.286842s req_ids:[8] +DEBUG 06-24 20:38:37 [manager.py:391] +DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:213.7451171875ms total_cost_time:213.78827095031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14308 prompt_cache_len:5151 prompt_cache_ratio:0.3600083869164104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 +DEBUG 06-24 20:38:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:37 [batch.py:51] router release req id 8 +INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.1089785099029541 s +INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.11010479927062988 s +DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=238386048792173809608014555876239157672, time:1750768717.5062656s req_ids:[8] +DEBUG 06-24 20:38:37 [manager.py:391] +ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:214.6320343017578ms total_cost_time:214.6759033203125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14309 prompt_cache_len:5151 prompt_cache_ratio:0.3599832273394367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 +DEBUG 06-24 20:38:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:37 [batch.py:51] router release req id 8 +INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.10711240768432617 s +INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.10788607597351074 s +DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=187270018968756731327443780054617100492, time:1750768717.7275078s req_ids:[8] +DEBUG 06-24 20:38:37 [manager.py:391] +ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:358.7324619293213ms total_cost_time:358.7765693664551ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14310 prompt_cache_len:5151 prompt_cache_ratio:0.359958071278826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 +DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:38 [batch.py:51] router release req id 8 +INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.1081550121307373 s +INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10900640487670898 s +DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=207658275690279995385187150600872365173, time:1750768718.0923839s req_ids:[8] +DEBUG 06-24 20:38:38 [manager.py:391] +ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:199.57256317138672ms total_cost_time:199.6157169342041ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14311 prompt_cache_len:5151 prompt_cache_ratio:0.3599329187338411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 +DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:38 [batch.py:51] router release req id 8 +INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.1082143783569336 s +INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10925483703613281 s +DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=203597121101678351338611651429530960445, time:1750768718.2991407s req_ids:[8] +DEBUG 06-24 20:38:38 [manager.py:391] +ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:212.03374862670898ms total_cost_time:212.07880973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14312 prompt_cache_len:5151 prompt_cache_ratio:0.3599077697037451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 +DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:38 [batch.py:51] router release req id 8 +INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.1079714298248291 s +INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10901999473571777 s +DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=32048459086076373550834889312118255152, time:1750768718.5168228s req_ids:[8] +DEBUG 06-24 20:38:38 [manager.py:391] +ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:210.1573944091797ms total_cost_time:210.20150184631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14313 prompt_cache_len:5151 prompt_cache_ratio:0.3598826241878013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 +DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:38 [batch.py:51] router release req id 8 +INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.10927653312683105 s +INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102447509765625 s +DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=47952708225707439984615417012857531081, time:1750768718.7343922s req_ids:[8] +DEBUG 06-24 20:38:38 [manager.py:391] +ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:210.90412139892578ms total_cost_time:210.94512939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14314 prompt_cache_len:5151 prompt_cache_ratio:0.35985748218527314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 +DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:38 [batch.py:51] router release req id 8 +INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.10811734199523926 s +INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10915422439575195 s +DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=161412611603861747931595961641859918496, time:1750768718.961354s req_ids:[8] +DEBUG 06-24 20:38:38 [manager.py:391] +ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:225.05903244018555ms total_cost_time:225.10457038879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14315 prompt_cache_len:5151 prompt_cache_ratio:0.3598323436954244 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 +DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:39 [batch.py:51] router release req id 8 +INFO 06-24 20:38:39 [manager.py:224] router recive req id 8 cost time 0.3109610080718994 s +INFO 06-24 20:38:39 [manager.py:68] detokenization recv req id 8 cost time 0.3121209144592285 s +DEBUG 06-24 20:38:39 [manager.py:391] Prefill Batch: batch_id=309286886493428932844529532691085835534, time:1750768719.388381s req_ids:[8] +DEBUG 06-24 20:38:39 [manager.py:391] +ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:425.7972240447998ms total_cost_time:425.8413314819336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14316 prompt_cache_len:5151 prompt_cache_ratio:0.35980720871751887 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 +DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:39 [batch.py:51] router release req id 8 +INFO 06-24 20:38:39 [manager.py:224] router recive req id 8 cost time 0.10911822319030762 s +INFO 06-24 20:38:39 [manager.py:68] detokenization recv req id 8 cost time 0.11029577255249023 s +DEBUG 06-24 20:38:39 [manager.py:391] Prefill Batch: batch_id=113495359027572542813921866568586584317, time:1750768719.6173482s req_ids:[8] +DEBUG 06-24 20:38:39 [manager.py:391] +ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:38:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 56654.062 tokens/s +DEBUG 06-24 20:38:39 [stats.py:37] Avg prompt tokens throughput: 56646.138 tokens/s +DEBUG 06-24 20:38:39 [stats.py:37] Avg generate tokens throughput: 7.924 tokens/s +INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:214.89906311035156ms total_cost_time:214.94364738464355ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14317 prompt_cache_len:5151 prompt_cache_ratio:0.3597820772508207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 +DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:39 [batch.py:51] router release req id 8 +INFO 06-24 20:38:39 [manager.py:224] router recive req id 8 cost time 0.1083827018737793 s +INFO 06-24 20:38:39 [manager.py:68] detokenization recv req id 8 cost time 0.10938668251037598 s +DEBUG 06-24 20:38:39 [manager.py:391] Prefill Batch: batch_id=219431252192268158481018098232315991305, time:1750768719.8396358s req_ids:[8] +DEBUG 06-24 20:38:39 [manager.py:391] +ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:217.25726127624512ms total_cost_time:217.3008918762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14318 prompt_cache_len:5151 prompt_cache_ratio:0.35975694929459423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 +DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:39 [batch.py:51] router release req id 8 +INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s +INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.10981059074401855 s +DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=292733695753435359011656294375156431344, time:1750768720.062112s req_ids:[8] +DEBUG 06-24 20:38:40 [manager.py:391] +ERROR 06-24 20:38:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:214.77007865905762ms total_cost_time:214.8122787475586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14319 prompt_cache_len:5151 prompt_cache_ratio:0.3597318248481039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 +DEBUG 06-24 20:38:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:40 [batch.py:51] router release req id 8 +INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10806465148925781 s +INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.10899233818054199 s +DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=118814785872162620705492512118275358047, time:1750768720.284169s req_ids:[8] +DEBUG 06-24 20:38:40 [manager.py:391] +ERROR 06-24 20:38:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 first_token_cost:213.90652656555176ms total_cost_time:213.9265537261963ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:14320 prompt_cache_len:5151 prompt_cache_ratio:0.35970670391061454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 +DEBUG 06-24 20:38:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:40 [batch.py:51] router release req id 8 +INFO 06-24 20:38:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10858964920043945 s +INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.10946893692016602 s +DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=259563674301579742299194253949386000473, time:1750768720.5051982s req_ids:[8] +DEBUG 06-24 20:38:40 [manager.py:391] +ERROR 06-24 20:38:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 first_token_cost:402.8322696685791ms total_cost_time:402.8756618499756ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14321 prompt_cache_len:5151 prompt_cache_ratio:0.359681586481391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 +DEBUG 06-24 20:38:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:40 [batch.py:51] router release req id 8 +INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10938239097595215 s +INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.11044192314147949 s +DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=144214591891764007995838349096065152453, time:1750768720.9140751s req_ids:[8] +DEBUG 06-24 20:38:40 [manager.py:391] +ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 first_token_cost:216.8586254119873ms total_cost_time:216.9036865234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14322 prompt_cache_len:5151 prompt_cache_ratio:0.35965647255969835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 +DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:41 [batch.py:51] router release req id 8 +INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.1103208065032959 s +INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.11131978034973145 s +DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=337344520444635864729608261909324467746, time:1750768721.1398907s req_ids:[8] +DEBUG 06-24 20:38:41 [manager.py:391] +ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:214.6289348602295ms total_cost_time:214.68758583068848ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:14323 prompt_cache_len:5151 prompt_cache_ratio:0.35963136214480207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 +DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:41 [batch.py:51] router release req id 8 +INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.1081228256225586 s +INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.10912561416625977 s +DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=65726297902167035493710652419074302266, time:1750768721.3595438s req_ids:[8] +DEBUG 06-24 20:38:41 [manager.py:391] +ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:215.2099609375ms total_cost_time:215.254545211792ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14324 prompt_cache_len:5151 prompt_cache_ratio:0.3596062552359676 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 +DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:41 [batch.py:51] router release req id 8 +INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.1081395149230957 s +INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.10915255546569824 s +DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=10392302593517550863032708739264419999, time:1750768721.5847633s req_ids:[8] +DEBUG 06-24 20:38:41 [manager.py:391] +ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:214.8585319519043ms total_cost_time:214.88118171691895ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:14325 prompt_cache_len:5151 prompt_cache_ratio:0.35958115183246075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 +DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:41 [batch.py:51] router release req id 8 +INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.10678553581237793 s +INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.10784339904785156 s +DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=123777851548131639359384620110996058923, time:1750768721.802933s req_ids:[8] +DEBUG 06-24 20:38:41 [manager.py:391] +ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:216.5658473968506ms total_cost_time:216.61043167114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14326 prompt_cache_len:5151 prompt_cache_ratio:0.3595560519335474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 +DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:41 [batch.py:51] router release req id 8 +INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.10779237747192383 s +INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.10865306854248047 s +DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=331469391169738333711562845954962360877, time:1750768722.0369036s req_ids:[8] +DEBUG 06-24 20:38:42 [manager.py:391] +ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:358.19363594055176ms total_cost_time:358.23774337768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14327 prompt_cache_len:5151 prompt_cache_ratio:0.35953095553849374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 +DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:42 [batch.py:51] router release req id 8 +INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.1087493896484375 s +INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s +DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=81601714097032354123917292213753152592, time:1750768722.3929098s req_ids:[8] +DEBUG 06-24 20:38:42 [manager.py:391] +ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:216.73178672790527ms total_cost_time:216.75372123718262ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:14328 prompt_cache_len:5151 prompt_cache_ratio:0.35950586264656614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 +DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:42 [batch.py:51] router release req id 8 +INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.10896158218383789 s +INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.11008810997009277 s +DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=218284273525638454670084978330504081928, time:1750768722.6141953s req_ids:[8] +DEBUG 06-24 20:38:42 [manager.py:391] +ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:212.10265159606934ms total_cost_time:212.14795112609863ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14329 prompt_cache_len:5151 prompt_cache_ratio:0.3594807732570312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 +DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:42 [batch.py:51] router release req id 8 +INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.10892558097839355 s +INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.11002683639526367 s +DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=157728436992794349873683003312163441299, time:1750768722.8347185s req_ids:[8] +DEBUG 06-24 20:38:42 [manager.py:391] +ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:216.9938087463379ms total_cost_time:217.03696250915527ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14330 prompt_cache_len:5151 prompt_cache_ratio:0.3594556873691556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 +DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:42 [batch.py:51] router release req id 8 +INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.1075901985168457 s +INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.10854625701904297 s +DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=252808046266179192708425490443751566868, time:1750768723.0545304s req_ids:[8] +DEBUG 06-24 20:38:43 [manager.py:391] +ERROR 06-24 20:38:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:216.59159660339355ms total_cost_time:216.63522720336914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14331 prompt_cache_len:5151 prompt_cache_ratio:0.3594306049822064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 +DEBUG 06-24 20:38:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:43 [batch.py:51] router release req id 8 +INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.10807180404663086 s +INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.11003780364990234 s +DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=66916177527109151275972490958749537861, time:1750768723.2817373s req_ids:[8] +DEBUG 06-24 20:38:43 [manager.py:391] +ERROR 06-24 20:38:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 first_token_cost:393.784761428833ms total_cost_time:393.829345703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14332 prompt_cache_len:5151 prompt_cache_ratio:0.35940552609545073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 +DEBUG 06-24 20:38:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:43 [batch.py:51] router release req id 8 +DEBUG 06-24 20:38:43 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:43 [manager.py:283] +DEBUG 06-24 20:38:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:43 [manager.py:284] +INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.10848855972290039 s +INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.11046767234802246 s +DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=31774115044789403844710917488938268970, time:1750768723.6784708s req_ids:[8] +DEBUG 06-24 20:38:43 [manager.py:391] +ERROR 06-24 20:38:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 first_token_cost:217.55647659301758ms total_cost_time:217.60034561157227ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14333 prompt_cache_len:5151 prompt_cache_ratio:0.359380450708156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 +DEBUG 06-24 20:38:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:43 [batch.py:51] router release req id 8 +INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.1088707447052002 s +INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.1108546257019043 s +DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=164320431104385602624799453999687375411, time:1750768723.9022744s req_ids:[8] +DEBUG 06-24 20:38:43 [manager.py:391] +ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 first_token_cost:222.16558456420898ms total_cost_time:222.21136093139648ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14334 prompt_cache_len:5151 prompt_cache_ratio:0.3593553788195898 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 +DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:44 [batch.py:51] router release req id 8 +INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10790753364562988 s +INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.10989141464233398 s +DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=181260337437464389101440027708675598326, time:1750768724.1426075s req_ids:[8] +DEBUG 06-24 20:38:44 [manager.py:391] +ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:231.49895668029785ms total_cost_time:231.54497146606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14335 prompt_cache_len:5151 prompt_cache_ratio:0.35933031042901986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 +DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:44 [batch.py:51] router release req id 8 +INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10909438133239746 s +INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.11114954948425293 s +DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=190273325535456149013251387398130289637, time:1750768724.3705008s req_ids:[8] +DEBUG 06-24 20:38:44 [manager.py:391] +ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:214.81633186340332ms total_cost_time:214.8590087890625ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14336 prompt_cache_len:5151 prompt_cache_ratio:0.3593052455357143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 +DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:44 [batch.py:51] router release req id 8 +INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10935759544372559 s +INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s +DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=208737211068103266627193242115968320951, time:1750768724.5907636s req_ids:[8] +DEBUG 06-24 20:38:44 [manager.py:391] +ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:391.3564682006836ms total_cost_time:391.4015293121338ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14337 prompt_cache_len:5151 prompt_cache_ratio:0.3592801841389412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 +DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:44 [batch.py:51] router release req id 8 +INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10973048210144043 s +INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.11203813552856445 s +DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=219038643703275278225190310142744438, time:1750768724.9901671s req_ids:[8] +DEBUG 06-24 20:38:44 [manager.py:391] +ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:221.30727767944336ms total_cost_time:221.35186195373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14338 prompt_cache_len:5151 prompt_cache_ratio:0.359255126237969 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 +DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:45 [batch.py:51] router release req id 8 +INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.10908961296081543 s +INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.11129379272460938 s +DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=108923366629408673944393699936749510904, time:1750768725.21533s req_ids:[8] +DEBUG 06-24 20:38:45 [manager.py:391] +DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:216.20893478393555ms total_cost_time:216.25328063964844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14339 prompt_cache_len:5151 prompt_cache_ratio:0.3592300718320664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 +DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:45 [batch.py:51] router release req id 8 +INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.1085367202758789 s +INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.11077666282653809 s +DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=308632465537945908575211534247480297205, time:1750768725.4390216s req_ids:[8] +DEBUG 06-24 20:38:45 [manager.py:391] +ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:214.71166610717773ms total_cost_time:214.75553512573242ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14340 prompt_cache_len:5151 prompt_cache_ratio:0.3592050209205021 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 +DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:45 [batch.py:51] router release req id 8 +INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.10823202133178711 s +INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.11020278930664062 s +DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=58550232841298009406527569958720847015, time:1750768725.6590996s req_ids:[8] +DEBUG 06-24 20:38:45 [manager.py:391] +ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:216.17555618286133ms total_cost_time:216.2187099456787ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14341 prompt_cache_len:5151 prompt_cache_ratio:0.35917997350254516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 +DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:45 [batch.py:51] router release req id 8 +INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.1077420711517334 s +INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.10983705520629883 s +DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=291740147306535194174220705649007674927, time:1750768725.8828664s req_ids:[8] +DEBUG 06-24 20:38:45 [manager.py:391] +ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:216.9952392578125ms total_cost_time:217.03863143920898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14342 prompt_cache_len:5151 prompt_cache_ratio:0.3591549295774648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 +DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:46 [batch.py:51] router release req id 8 +INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.10864138603210449 s +INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11047554016113281 s +DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=294307543644076669893561398143932687949, time:1750768726.1049736s req_ids:[8] +DEBUG 06-24 20:38:46 [manager.py:391] +ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:356.8878173828125ms total_cost_time:356.9328784942627ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14343 prompt_cache_len:5151 prompt_cache_ratio:0.3591298891445304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 +DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:46 [batch.py:51] router release req id 8 +INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.10932731628417969 s +INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11153793334960938 s +DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=291812422708849606056610866637691662713, time:1750768726.4673378s req_ids:[8] +DEBUG 06-24 20:38:46 [manager.py:391] +ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:209.23542976379395ms total_cost_time:209.28144454956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14344 prompt_cache_len:5151 prompt_cache_ratio:0.3591048522030117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 +DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:46 [batch.py:51] router release req id 8 +INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.1083974838256836 s +INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11069989204406738 s +DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=38303038482268747395093378139617829839, time:1750768726.6878989s req_ids:[8] +DEBUG 06-24 20:38:46 [manager.py:391] +ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:177.03914642333984ms total_cost_time:177.08253860473633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14345 prompt_cache_len:5151 prompt_cache_ratio:0.3590798187521785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 +DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:46 [batch.py:51] router release req id 8 +INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.10903286933898926 s +INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11106562614440918 s +DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=333635470932938951419752778690552129194, time:1750768726.8706748s req_ids:[8] +DEBUG 06-24 20:38:46 [manager.py:391] +ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:205.79886436462402ms total_cost_time:205.8432102203369ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14346 prompt_cache_len:5151 prompt_cache_ratio:0.3590547887913007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 +DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:46 [batch.py:51] router release req id 8 +INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10878229141235352 s +INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.11083340644836426 s +DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=240321357164904059742514231423195958365, time:1750768727.0804899s req_ids:[8] +DEBUG 06-24 20:38:47 [manager.py:391] +INFO 06-24 20:38:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:209.63644981384277ms total_cost_time:209.68294143676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14347 prompt_cache_len:5151 prompt_cache_ratio:0.3590297623196487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 +DEBUG 06-24 20:38:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:47 [batch.py:51] router release req id 8 +INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10815811157226562 s +INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.1102440357208252 s +DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=291782924917449856868981054640719534167, time:1750768727.2972672s req_ids:[8] +DEBUG 06-24 20:38:47 [manager.py:391] +ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 first_token_cost:381.78181648254395ms total_cost_time:381.82711601257324ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14348 prompt_cache_len:5151 prompt_cache_ratio:0.3590047393364929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 +DEBUG 06-24 20:38:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:47 [batch.py:51] router release req id 8 +INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10808205604553223 s +INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.11005401611328125 s +DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=187372096983019345673797922481341311801, time:1750768727.6876829s req_ids:[8] +DEBUG 06-24 20:38:47 [manager.py:391] +ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 first_token_cost:210.90936660766602ms total_cost_time:210.9541893005371ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14349 prompt_cache_len:5151 prompt_cache_ratio:0.3589797198411039 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 +DEBUG 06-24 20:38:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:47 [batch.py:51] router release req id 8 +INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10875988006591797 s +INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.11075282096862793 s +DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=206689769635735971066669334592737485356, time:1750768727.9021838s req_ids:[8] +DEBUG 06-24 20:38:47 [manager.py:391] +ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 first_token_cost:212.2800350189209ms total_cost_time:212.324857711792ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14350 prompt_cache_len:5151 prompt_cache_ratio:0.35895470383275263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 +DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:48 [batch.py:51] router release req id 8 +INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10795807838439941 s +INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.10990285873413086 s +DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=183360311164322756586402179982383911460, time:1750768728.1225817s req_ids:[8] +DEBUG 06-24 20:38:48 [manager.py:391] +ERROR 06-24 20:38:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:217.41986274719238ms total_cost_time:217.46373176574707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14351 prompt_cache_len:5151 prompt_cache_ratio:0.35892969131071006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 +DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:48 [batch.py:51] router release req id 8 +INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s +INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.11015558242797852 s +DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=159635365050810072896697432373743114288, time:1750768728.3529146s req_ids:[8] +DEBUG 06-24 20:38:48 [manager.py:391] +ERROR 06-24 20:38:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:225.53491592407227ms total_cost_time:225.57783126831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14352 prompt_cache_len:5151 prompt_cache_ratio:0.3589046822742475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 +DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:48 [batch.py:51] router release req id 8 +INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10923480987548828 s +INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.11126923561096191 s +DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=204380967859539168744423039056410633516, time:1750768728.5794687s req_ids:[8] +DEBUG 06-24 20:38:48 [manager.py:391] +ERROR 06-24 20:38:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:218.0943489074707ms total_cost_time:218.1377410888672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14353 prompt_cache_len:5151 prompt_cache_ratio:0.3588796767226364 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 +DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:48 [batch.py:51] router release req id 8 +INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10816001892089844 s +INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.11024284362792969 s +DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=75900295801764187184313746588473686482, time:1750768728.8027496s req_ids:[8] +DEBUG 06-24 20:38:48 [manager.py:391] +ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:382.25293159484863ms total_cost_time:382.29846954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14354 prompt_cache_len:5151 prompt_cache_ratio:0.35885467465514836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 +DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:49 [batch.py:51] router release req id 8 +INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.10794353485107422 s +INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.11008167266845703 s +DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=219203778563677880315393675278291998858, time:1750768729.1941311s req_ids:[8] +DEBUG 06-24 20:38:49 [manager.py:391] +ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:219.7129726409912ms total_cost_time:219.7573184967041ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14355 prompt_cache_len:5151 prompt_cache_ratio:0.35882967607105537 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 +DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:49 [batch.py:51] router release req id 8 +INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.10832571983337402 s +INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.11036443710327148 s +DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=164259472469265572822914066443300393774, time:1750768729.4173343s req_ids:[8] +DEBUG 06-24 20:38:49 [manager.py:391] +ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:215.66057205200195ms total_cost_time:215.70396423339844ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14356 prompt_cache_len:5151 prompt_cache_ratio:0.3588046809696294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 +DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:49 [batch.py:51] router release req id 8 +INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.10794687271118164 s +INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s +DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=230559937663556200687943893613543661208, time:1750768729.6427717s req_ids:[8] +DEBUG 06-24 20:38:49 [manager.py:391] +ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:38:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 57208.811 tokens/s +DEBUG 06-24 20:38:49 [stats.py:37] Avg prompt tokens throughput: 57200.832 tokens/s +DEBUG 06-24 20:38:49 [stats.py:37] Avg generate tokens throughput: 7.979 tokens/s +INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:217.7128791809082ms total_cost_time:217.7567481994629ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14357 prompt_cache_len:5151 prompt_cache_ratio:0.35877968935014276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 +DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:49 [batch.py:51] router release req id 8 +INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.1080632209777832 s +INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.10985755920410156 s +DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=42220058573074296046002228521921814187, time:1750768729.8654232s req_ids:[8] +DEBUG 06-24 20:38:49 [manager.py:391] +ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:218.34492683410645ms total_cost_time:218.39022636413574ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14358 prompt_cache_len:5151 prompt_cache_ratio:0.3587547012118679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 +DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:50 [batch.py:51] router release req id 8 +INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.10917210578918457 s +INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.11118721961975098 s +DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=87025171011789817108054396836622288477, time:1750768730.0888405s req_ids:[8] +DEBUG 06-24 20:38:50 [manager.py:391] +ERROR 06-24 20:38:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:214.59460258483887ms total_cost_time:214.63823318481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14359 prompt_cache_len:5151 prompt_cache_ratio:0.3587297165540776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 +DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:50 [batch.py:51] router release req id 8 +INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.310685396194458 s +INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.31278300285339355 s +DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=144207721673879602576625147573292808344, time:1750768730.5200946s req_ids:[8] +DEBUG 06-24 20:38:50 [manager.py:391] +ERROR 06-24 20:38:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 first_token_cost:431.81920051574707ms total_cost_time:431.86378479003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14360 prompt_cache_len:5151 prompt_cache_ratio:0.3587047353760446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 +DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:50 [batch.py:51] router release req id 8 +INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.10976958274841309 s +INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.11185383796691895 s +DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=136236594581020789496777564185792273578, time:1750768730.7491143s req_ids:[8] +DEBUG 06-24 20:38:50 [manager.py:391] +ERROR 06-24 20:38:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 first_token_cost:215.64555168151855ms total_cost_time:215.69037437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14361 prompt_cache_len:5151 prompt_cache_ratio:0.35867975767704197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 +DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:50 [batch.py:51] router release req id 8 +INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.10815882682800293 s +INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.11040616035461426 s +DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=100913643305791435007204448523901509860, time:1750768730.973198s req_ids:[8] +DEBUG 06-24 20:38:50 [manager.py:391] +ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 first_token_cost:217.2102928161621ms total_cost_time:217.2539234161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14362 prompt_cache_len:5151 prompt_cache_ratio:0.3586547834563431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 +DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:51 [batch.py:51] router release req id 8 +INFO 06-24 20:38:51 [manager.py:224] router recive req id 8 cost time 0.10825490951538086 s +INFO 06-24 20:38:51 [manager.py:68] detokenization recv req id 8 cost time 0.11021113395690918 s +DEBUG 06-24 20:38:51 [manager.py:391] Prefill Batch: batch_id=51427911362471854734295822673637634841, time:1750768731.208283s req_ids:[8] +DEBUG 06-24 20:38:51 [manager.py:391] +ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:232.28883743286133ms total_cost_time:232.33413696289062ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14363 prompt_cache_len:5151 prompt_cache_ratio:0.3586298127132215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 +DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:51 [batch.py:51] router release req id 8 +INFO 06-24 20:38:51 [manager.py:224] router recive req id 8 cost time 0.10936856269836426 s +INFO 06-24 20:38:51 [manager.py:68] detokenization recv req id 8 cost time 0.11143112182617188 s +DEBUG 06-24 20:38:51 [manager.py:391] Prefill Batch: batch_id=302438928585701905998019858229401354922, time:1750768731.434327s req_ids:[8] +DEBUG 06-24 20:38:51 [manager.py:391] +ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:214.42437171936035ms total_cost_time:214.46919441223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14364 prompt_cache_len:5151 prompt_cache_ratio:0.3586048454469507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 +DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:51 [batch.py:51] router release req id 8 +INFO 06-24 20:38:51 [manager.py:224] router recive req id 8 cost time 0.10815572738647461 s +INFO 06-24 20:38:51 [manager.py:68] detokenization recv req id 8 cost time 0.1101076602935791 s +DEBUG 06-24 20:38:51 [manager.py:391] Prefill Batch: batch_id=334067944268963943074233574816122345491, time:1750768731.6567454s req_ids:[8] +DEBUG 06-24 20:38:51 [manager.py:391] +ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:398.45943450927734ms total_cost_time:398.50425720214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14365 prompt_cache_len:5151 prompt_cache_ratio:0.3585798816568047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 +DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:51 [batch.py:51] router release req id 8 +INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.10930657386779785 s +INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11141562461853027 s +DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=128569987552541351663750449937573508348, time:1750768732.0627298s req_ids:[8] +DEBUG 06-24 20:38:52 [manager.py:391] +ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:218.7967300415039ms total_cost_time:218.8425064086914ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14366 prompt_cache_len:5151 prompt_cache_ratio:0.35855492134205763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 +DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:52 [batch.py:51] router release req id 8 +INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.10902714729309082 s +INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11107635498046875 s +DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=185486653347844698767421619877408002861, time:1750768732.2890964s req_ids:[8] +DEBUG 06-24 20:38:52 [manager.py:391] +ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:218.80125999450684ms total_cost_time:218.84393692016602ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14367 prompt_cache_len:5151 prompt_cache_ratio:0.3585299645019837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 +DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:52 [batch.py:51] router release req id 8 +INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.1077420711517334 s +INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.10975503921508789 s +DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=302612390582082725912719117851456903468, time:1750768732.5219085s req_ids:[8] +DEBUG 06-24 20:38:52 [manager.py:391] +ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:223.0243682861328ms total_cost_time:223.0682373046875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14368 prompt_cache_len:5151 prompt_cache_ratio:0.3585050111358575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 +DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:52 [batch.py:51] router release req id 8 +INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.1093752384185791 s +INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11134886741638184 s +DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=258722905979772086307792994837766222813, time:1750768732.7581656s req_ids:[8] +DEBUG 06-24 20:38:52 [manager.py:391] +ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:231.9178581237793ms total_cost_time:231.9622039794922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14369 prompt_cache_len:5151 prompt_cache_ratio:0.35848006124295356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 +DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:52 [batch.py:51] router release req id 8 +INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.10820317268371582 s +INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11017036437988281 s +DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=328147808206937819951451970975491078443, time:1750768732.9847677s req_ids:[8] +DEBUG 06-24 20:38:52 [manager.py:391] +DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:218.23668479919434ms total_cost_time:218.27960014343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14370 prompt_cache_len:5151 prompt_cache_ratio:0.358455114822547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 +INFO 06-24 20:38:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:38:53 [statics_utils.py:24] mean first cost: 231.8866850800596 ms +INFO 06-24 20:38:53 [statics_utils.py:24] mean per token cost: 0.058225489621477514 ms +DEBUG 06-24 20:38:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:53 [batch.py:51] router release req id 8 +INFO 06-24 20:38:53 [manager.py:224] router recive req id 8 cost time 0.31124234199523926 s +INFO 06-24 20:38:53 [manager.py:68] detokenization recv req id 8 cost time 0.3131675720214844 s +DEBUG 06-24 20:38:53 [manager.py:391] Prefill Batch: batch_id=190620952825917569310048264353160084655, time:1750768733.41747s req_ids:[8] +DEBUG 06-24 20:38:53 [manager.py:391] +ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:424.60179328918457ms total_cost_time:424.64613914489746ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14371 prompt_cache_len:5151 prompt_cache_ratio:0.35843017187391274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 +DEBUG 06-24 20:38:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:53 [batch.py:51] router release req id 8 +INFO 06-24 20:38:53 [manager.py:224] router recive req id 8 cost time 0.10901212692260742 s +INFO 06-24 20:38:53 [manager.py:68] detokenization recv req id 8 cost time 0.11099767684936523 s +DEBUG 06-24 20:38:53 [manager.py:391] Prefill Batch: batch_id=308739650557922791188371753540802801577, time:1750768733.6526062s req_ids:[8] +DEBUG 06-24 20:38:53 [manager.py:391] +ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:227.43821144104004ms total_cost_time:227.48303413391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14372 prompt_cache_len:5151 prompt_cache_ratio:0.3584052323963262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 +DEBUG 06-24 20:38:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:53 [batch.py:51] router release req id 8 +INFO 06-24 20:38:53 [manager.py:224] router recive req id 8 cost time 0.10796499252319336 s +INFO 06-24 20:38:53 [manager.py:68] detokenization recv req id 8 cost time 0.10992717742919922 s +DEBUG 06-24 20:38:53 [manager.py:391] Prefill Batch: batch_id=14946206708434810664476221206140397835, time:1750768733.8791194s req_ids:[8] +DEBUG 06-24 20:38:53 [manager.py:391] +ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:212.75925636291504ms total_cost_time:212.80384063720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14373 prompt_cache_len:5151 prompt_cache_ratio:0.3583802963890628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 +DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:54 [batch.py:51] router release req id 8 +INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10826492309570312 s +INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.11031579971313477 s +DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=150461302604282984862274878916774915628, time:1750768734.0992568s req_ids:[8] +DEBUG 06-24 20:38:54 [manager.py:391] +ERROR 06-24 20:38:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:214.50495719909668ms total_cost_time:214.54858779907227ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14374 prompt_cache_len:5151 prompt_cache_ratio:0.3583553638513984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 +DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:54 [batch.py:51] router release req id 8 +INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10945749282836914 s +INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.1114351749420166 s +DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=66564869900728622193550877565057125872, time:1750768734.3318958s req_ids:[8] +DEBUG 06-24 20:38:54 [manager.py:391] +ERROR 06-24 20:38:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 first_token_cost:229.88367080688477ms total_cost_time:229.92849349975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14375 prompt_cache_len:5151 prompt_cache_ratio:0.3583304347826087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 +DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:54 [batch.py:51] router release req id 8 +INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10933399200439453 s +INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.11140799522399902 s +DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=326262474543408525129814083552658122830, time:1750768734.5575106s req_ids:[8] +DEBUG 06-24 20:38:54 [manager.py:391] +DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:38:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 first_token_cost:389.0037536621094ms total_cost_time:389.04762268066406ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14376 prompt_cache_len:5151 prompt_cache_ratio:0.35830550918196996 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 +DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:54 [batch.py:51] router release req id 8 +INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10873150825500488 s +INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.11072945594787598 s +DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=93523537939754278613645912060833902661, time:1750768734.9666553s req_ids:[8] +DEBUG 06-24 20:38:54 [manager.py:391] +ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 first_token_cost:230.16691207885742ms total_cost_time:230.2114963531494ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14377 prompt_cache_len:5151 prompt_cache_ratio:0.3582805870487584 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 +DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:55 [batch.py:51] router release req id 8 +INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10906672477722168 s +INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s +DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=197226083631269664035832265751259241060, time:1750768735.2001183s req_ids:[8] +DEBUG 06-24 20:38:55 [manager.py:391] +ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:224.76983070373535ms total_cost_time:224.81369972229004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14378 prompt_cache_len:5151 prompt_cache_ratio:0.35825566838225065 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 +DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:55 [batch.py:51] router release req id 8 +INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10769033432006836 s +INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.10947155952453613 s +DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=292313296466008008938383126847553270306, time:1750768735.4402578s req_ids:[8] +DEBUG 06-24 20:38:55 [manager.py:391] +ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:234.91454124450684ms total_cost_time:234.95888710021973ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14379 prompt_cache_len:5151 prompt_cache_ratio:0.35823075318172337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 +DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:55 [batch.py:51] router release req id 8 +INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10794973373413086 s +INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.10992741584777832 s +DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=72923984217499151412962265561562571607, time:1750768735.6675034s req_ids:[8] +DEBUG 06-24 20:38:55 [manager.py:391] +ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:217.94581413269043ms total_cost_time:217.99087524414062ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14380 prompt_cache_len:5151 prompt_cache_ratio:0.3582058414464534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 +DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:55 [batch.py:51] router release req id 8 +INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10853099822998047 s +INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.1103672981262207 s +DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=300451890550869285553661382977985928981, time:1750768735.9052405s req_ids:[8] +DEBUG 06-24 20:38:55 [manager.py:391] +ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:230.4835319519043ms total_cost_time:230.52692413330078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14381 prompt_cache_len:5151 prompt_cache_ratio:0.358180933175718 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 +DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:56 [batch.py:51] router release req id 8 +INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10938644409179688 s +INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.11136913299560547 s +DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=330870904745291035828018625600186885491, time:1750768736.143805s req_ids:[8] +DEBUG 06-24 20:38:56 [manager.py:391] +ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:415.82584381103516ms total_cost_time:415.87114334106445ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14382 prompt_cache_len:5151 prompt_cache_ratio:0.35815602836879434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 +DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:56 [batch.py:51] router release req id 8 +INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10914158821105957 s +INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.11096072196960449 s +DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=273415856646088457513876601937729291533, time:1750768736.5768626s req_ids:[8] +DEBUG 06-24 20:38:56 [manager.py:391] +ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:199.27334785461426ms total_cost_time:199.31554794311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14383 prompt_cache_len:5151 prompt_cache_ratio:0.35813112702496 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 +DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:56 [batch.py:51] router release req id 8 +INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10852289199829102 s +INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.11040258407592773 s +DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=154890581781123149465997366429916974874, time:1750768736.760692s req_ids:[8] +DEBUG 06-24 20:38:56 [manager.py:391] +ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:208.01401138305664ms total_cost_time:208.05859565734863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14384 prompt_cache_len:5151 prompt_cache_ratio:0.35810622914349277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 +DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:56 [batch.py:51] router release req id 8 +INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10660934448242188 s +INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.10835647583007812 s +DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=55497801027213253594998370699236783114, time:1750768736.977141s req_ids:[8] +DEBUG 06-24 20:38:56 [manager.py:391] +ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:214.25914764404297ms total_cost_time:214.30373191833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14385 prompt_cache_len:5151 prompt_cache_ratio:0.3580813347236705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 +DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:57 [batch.py:51] router release req id 8 +INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10747647285461426 s +INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.10928988456726074 s +DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=161466351885057138207589984674515676523, time:1750768737.1987567s req_ids:[8] +DEBUG 06-24 20:38:57 [manager.py:391] +ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:175.0340461730957ms total_cost_time:175.079345703125ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14386 prompt_cache_len:5151 prompt_cache_ratio:0.3580564437647713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 +DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:57 [batch.py:51] router release req id 8 +INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10883688926696777 s +INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.11072778701782227 s +DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=1348720482247470096358465875504957843, time:1750768737.3804703s req_ids:[8] +DEBUG 06-24 20:38:57 [manager.py:391] +ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:210.63899993896484ms total_cost_time:210.70456504821777ms,out_token_counter:1 mean_per_token_cost_time: 0.06556510925292969ms prompt_token_num:14387 prompt_cache_len:5151 prompt_cache_ratio:0.3580315562660735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 +DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:57 [batch.py:51] router release req id 8 +INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10799670219421387 s +INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.11003375053405762 s +DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=54127338420909867432850575451597660016, time:1750768737.5996761s req_ids:[8] +DEBUG 06-24 20:38:57 [manager.py:391] +ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:388.319730758667ms total_cost_time:388.3640766143799ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14388 prompt_cache_len:5151 prompt_cache_ratio:0.3580066722268557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 +DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:57 [batch.py:51] router release req id 8 +INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s +INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.10942745208740234 s +DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=221223602299171783311719552443533064284, time:1750768737.9938319s req_ids:[8] +DEBUG 06-24 20:38:57 [manager.py:391] +ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:178.46941947937012ms total_cost_time:178.51519584655762ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14389 prompt_cache_len:5151 prompt_cache_ratio:0.35798179164639654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 +DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:58 [batch.py:51] router release req id 8 +INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.10815215110778809 s +INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s +DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=219340245958995027270679775037366800788, time:1750768738.1792977s req_ids:[8] +DEBUG 06-24 20:38:58 [manager.py:391] +ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:205.09815216064453ms total_cost_time:205.14249801635742ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14390 prompt_cache_len:5151 prompt_cache_ratio:0.357956914523975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 +DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:58 [batch.py:51] router release req id 8 +INFO 06-24 20:38:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.1079709529876709 s +INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.10998773574829102 s +DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=252590943439709788260819316631739246797, time:1750768738.390632s req_ids:[8] +DEBUG 06-24 20:38:58 [manager.py:391] +ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:211.53950691223145ms total_cost_time:211.58361434936523ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14391 prompt_cache_len:5151 prompt_cache_ratio:0.35793204085887015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 +DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:58 [batch.py:51] router release req id 8 +INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.10957646369934082 s +INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.11167478561401367 s +DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=80720211012966524024828235284531471557, time:1750768738.6091018s req_ids:[8] +DEBUG 06-24 20:38:58 [manager.py:391] +ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:207.5796127319336ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14392 prompt_cache_len:5151 prompt_cache_ratio:0.3579071706503613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 +DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:58 [batch.py:51] router release req id 8 +INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.10872101783752441 s +INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.11004400253295898 s +DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=75088281254800938641998824791851614713, time:1750768738.8223948s req_ids:[8] +DEBUG 06-24 20:38:58 [manager.py:391] +ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:215.1663303375244ms total_cost_time:215.2094841003418ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14393 prompt_cache_len:5151 prompt_cache_ratio:0.35788230389772807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 +DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:58 [batch.py:51] router release req id 8 +INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.10905337333679199 s +INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11034965515136719 s +DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=135978810009958514769907754484973747396, time:1750768739.0448203s req_ids:[8] +DEBUG 06-24 20:38:59 [manager.py:391] +ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:380.418062210083ms total_cost_time:380.4628849029541ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14394 prompt_cache_len:5151 prompt_cache_ratio:0.3578574406002501 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 +DEBUG 06-24 20:38:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:59 [batch.py:51] router release req id 8 +INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.10909557342529297 s +INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11111664772033691 s +DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=39040268564985338010407122996169265194, time:1750768739.4313838s req_ids:[8] +DEBUG 06-24 20:38:59 [manager.py:391] +ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:216.83216094970703ms total_cost_time:216.87555313110352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14395 prompt_cache_len:5151 prompt_cache_ratio:0.35783258075720736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 +DEBUG 06-24 20:38:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:59 [batch.py:51] router release req id 8 +INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.1085209846496582 s +INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11037063598632812 s +DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=98044062420297885918263618598664678427, time:1750768739.6537817s req_ids:[8] +DEBUG 06-24 20:38:59 [manager.py:391] +ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:38:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 56019.435 tokens/s +DEBUG 06-24 20:38:59 [stats.py:37] Avg prompt tokens throughput: 56011.643 tokens/s +DEBUG 06-24 20:38:59 [stats.py:37] Avg generate tokens throughput: 7.792 tokens/s +INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:214.57576751708984ms total_cost_time:214.62082862854004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14396 prompt_cache_len:5151 prompt_cache_ratio:0.35780772436787994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 +DEBUG 06-24 20:38:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:38:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:38:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:38:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:38:59 [batch.py:51] router release req id 8 +INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.1094970703125 s +INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11141300201416016 s +DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=177876037772334646041545534003046497116, time:1750768739.8762355s req_ids:[8] +DEBUG 06-24 20:38:59 [manager.py:391] +ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:210.80851554870605ms total_cost_time:210.85095405578613ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14397 prompt_cache_len:5151 prompt_cache_ratio:0.35778287143154824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 +DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:00 [batch.py:51] router release req id 8 +INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10878515243530273 s +INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11072206497192383 s +DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=259672313330926242564452384740224089885, time:1750768740.0926967s req_ids:[8] +DEBUG 06-24 20:39:00 [manager.py:391] +ERROR 06-24 20:39:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:209.92541313171387ms total_cost_time:209.96952056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14398 prompt_cache_len:5151 prompt_cache_ratio:0.35775802194749273 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 +DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:00 [batch.py:51] router release req id 8 +INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10841870307922363 s +INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11034607887268066 s +DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=66094555359693542176500460260931110554, time:1750768740.3087192s req_ids:[8] +DEBUG 06-24 20:39:00 [manager.py:391] +ERROR 06-24 20:39:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 first_token_cost:208.8925838470459ms total_cost_time:208.93621444702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14399 prompt_cache_len:5151 prompt_cache_ratio:0.3577331759149941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 +DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:00 [batch.py:51] router release req id 8 +INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s +INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11062312126159668 s +DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=89801279844420085305197463016979616238, time:1750768740.5253499s req_ids:[8] +DEBUG 06-24 20:39:00 [manager.py:391] +ERROR 06-24 20:39:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 first_token_cost:374.97568130493164ms total_cost_time:375.02026557922363ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14400 prompt_cache_len:5151 prompt_cache_ratio:0.35770833333333335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 +DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:00 [batch.py:51] router release req id 8 +INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10860896110534668 s +INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11110973358154297 s +DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=101407775052477857075531335678002910047, time:1750768740.9061217s req_ids:[8] +DEBUG 06-24 20:39:00 [manager.py:391] +DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 first_token_cost:216.61758422851562ms total_cost_time:216.66240692138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14401 prompt_cache_len:5151 prompt_cache_ratio:0.3576834942017915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 +DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:01 [batch.py:51] router release req id 8 +INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.1093437671661377 s +INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11132502555847168 s +DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=232523285198008779268083075923057207564, time:1750768741.1300201s req_ids:[8] +DEBUG 06-24 20:39:01 [manager.py:391] +ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:215.57164192199707ms total_cost_time:215.61241149902344ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14402 prompt_cache_len:5151 prompt_cache_ratio:0.35765865851965006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 +DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:01 [batch.py:51] router release req id 8 +INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.10904860496520996 s +INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11113572120666504 s +DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=251055890578191398376743145081402192827, time:1750768741.3518686s req_ids:[8] +DEBUG 06-24 20:39:01 [manager.py:391] +ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:213.78469467163086ms total_cost_time:213.82904052734375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14403 prompt_cache_len:5151 prompt_cache_ratio:0.3576338262861904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 +DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:01 [batch.py:51] router release req id 8 +INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.10890364646911621 s +INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11105918884277344 s +DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=80809721121372523809235040015590200844, time:1750768741.57317s req_ids:[8] +DEBUG 06-24 20:39:01 [manager.py:391] +ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:209.88702774047852ms total_cost_time:209.9294662475586ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14404 prompt_cache_len:5151 prompt_cache_ratio:0.35760899750069425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 +DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:01 [batch.py:51] router release req id 8 +INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.1081538200378418 s +INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11001133918762207 s +DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=221295420808640508336952611338352499095, time:1750768741.7896383s req_ids:[8] +DEBUG 06-24 20:39:01 [manager.py:391] +ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:395.2326774597168ms total_cost_time:395.2784538269043ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14405 prompt_cache_len:5151 prompt_cache_ratio:0.3575841721624436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 +DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:02 [batch.py:51] router release req id 8 +INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10817766189575195 s +INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s +DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=207463655190164405078169938567625121943, time:1750768742.1952813s req_ids:[8] +DEBUG 06-24 20:39:02 [manager.py:391] +ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:221.3287353515625ms total_cost_time:221.3733196258545ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14406 prompt_cache_len:5151 prompt_cache_ratio:0.35755935027072056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 +DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:02 [batch.py:51] router release req id 8 +INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10891151428222656 s +INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11093950271606445 s +DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=314058917107092437678228266287984203637, time:1750768742.4193077s req_ids:[8] +DEBUG 06-24 20:39:02 [manager.py:391] +ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:213.9892578125ms total_cost_time:214.03264999389648ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14407 prompt_cache_len:5151 prompt_cache_ratio:0.3575345318248074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 +DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:02 [batch.py:51] router release req id 8 +INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10848164558410645 s +INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11054587364196777 s +DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=113362758880455398170138277649091583106, time:1750768742.639642s req_ids:[8] +DEBUG 06-24 20:39:02 [manager.py:391] +ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:214.68663215637207ms total_cost_time:214.73288536071777ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14408 prompt_cache_len:5151 prompt_cache_ratio:0.3575097168239867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 +DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:02 [batch.py:51] router release req id 8 +INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10929512977600098 s +INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11118578910827637 s +DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=295771330359681548590829762405999873300, time:1750768742.8610358s req_ids:[8] +DEBUG 06-24 20:39:02 [manager.py:391] +ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:212.14056015014648ms total_cost_time:212.18442916870117ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14409 prompt_cache_len:5151 prompt_cache_ratio:0.3574849052675411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 +DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:03 [batch.py:51] router release req id 8 +INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.10904240608215332 s +INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.11108708381652832 s +DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=288477962522164341205604601498197622156, time:1750768743.0917377s req_ids:[8] +DEBUG 06-24 20:39:03 [manager.py:391] +ERROR 06-24 20:39:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:230.57818412780762ms total_cost_time:230.6220531463623ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14410 prompt_cache_len:5151 prompt_cache_ratio:0.35746009715475363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 +DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:03 [batch.py:51] router release req id 8 +INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.1087346076965332 s +INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.11068272590637207 s +DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=91323997676189003678882725092398213662, time:1750768743.3321693s req_ids:[8] +DEBUG 06-24 20:39:03 [manager.py:391] +ERROR 06-24 20:39:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 first_token_cost:233.4749698638916ms total_cost_time:233.5188388824463ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14411 prompt_cache_len:5151 prompt_cache_ratio:0.35743529248490735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 +DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:03 [batch.py:51] router release req id 8 +INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.3099710941314697 s +INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.312058687210083 s +DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=270966981986474575896899475949809773461, time:1750768743.7650669s req_ids:[8] +DEBUG 06-24 20:39:03 [manager.py:391] +ERROR 06-24 20:39:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 first_token_cost:426.17154121398926ms total_cost_time:426.21564865112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14412 prompt_cache_len:5151 prompt_cache_ratio:0.3574104912572856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 +DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:03 [batch.py:51] router release req id 8 +INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.10918617248535156 s +INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.11100912094116211 s +DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=65283269701227960303078114703000133608, time:1750768743.991991s req_ids:[8] +DEBUG 06-24 20:39:03 [manager.py:391] +ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 first_token_cost:218.22047233581543ms total_cost_time:218.26410293579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14413 prompt_cache_len:5151 prompt_cache_ratio:0.3573856934711719 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 +DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:04 [batch.py:51] router release req id 8 +INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.10826349258422852 s +INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.1101534366607666 s +DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=17757040287005866686907265450172287252, time:1750768744.2156723s req_ids:[8] +DEBUG 06-24 20:39:04 [manager.py:391] +ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:216.73154830932617ms total_cost_time:216.77541732788086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14414 prompt_cache_len:5151 prompt_cache_ratio:0.3573608991258499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 +DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:04 [batch.py:51] router release req id 8 +INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.1085977554321289 s +INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.11080336570739746 s +DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=258447223088035561513005383830075366042, time:1750768744.4377725s req_ids:[8] +DEBUG 06-24 20:39:04 [manager.py:391] +ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:213.0756378173828ms total_cost_time:213.1197452545166ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14415 prompt_cache_len:5151 prompt_cache_ratio:0.3573361082206035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 +DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:04 [batch.py:51] router release req id 8 +INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.10781407356262207 s +INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.10985946655273438 s +DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=231123326205140437401424042414156079237, time:1750768744.6564646s req_ids:[8] +DEBUG 06-24 20:39:04 [manager.py:391] +ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:213.28282356262207ms total_cost_time:213.32502365112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14416 prompt_cache_len:5151 prompt_cache_ratio:0.357311320754717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 +DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:04 [batch.py:51] router release req id 8 +INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.10971307754516602 s +INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.11164617538452148 s +DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=269692927050720850323661326627435616653, time:1750768744.8774261s req_ids:[8] +DEBUG 06-24 20:39:04 [manager.py:391] +ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:389.61029052734375ms total_cost_time:389.65463638305664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14417 prompt_cache_len:5151 prompt_cache_ratio:0.3572865367274745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 +DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:05 [batch.py:51] router release req id 8 +INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10916352272033691 s +INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.11112165451049805 s +DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=120645436605576550622915283860715834255, time:1750768745.272444s req_ids:[8] +DEBUG 06-24 20:39:05 [manager.py:391] +ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:213.67239952087402ms total_cost_time:213.71150016784668ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:14418 prompt_cache_len:5151 prompt_cache_ratio:0.35726175613816064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 +DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:05 [batch.py:51] router release req id 8 +INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10825872421264648 s +INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.1103367805480957 s +DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=37160258634418580758908671547267347760, time:1750768745.4932957s req_ids:[8] +DEBUG 06-24 20:39:05 [manager.py:391] +ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:219.30885314941406ms total_cost_time:219.35224533081055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14419 prompt_cache_len:5151 prompt_cache_ratio:0.35723697898606005 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 +DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:05 [batch.py:51] router release req id 8 +INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10815048217773438 s +INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.11012744903564453 s +DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=1025591099157772715304565047340465613, time:1750768745.7270288s req_ids:[8] +DEBUG 06-24 20:39:05 [manager.py:391] +ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:228.88994216918945ms total_cost_time:228.93500328063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14420 prompt_cache_len:5151 prompt_cache_ratio:0.3572122052704577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 +DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:05 [batch.py:51] router release req id 8 +INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10862255096435547 s +INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.11040735244750977 s +DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=230234104841611586230691630628620043949, time:1750768745.9624455s req_ids:[8] +DEBUG 06-24 20:39:05 [manager.py:391] +ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:188.91167640686035ms total_cost_time:188.95697593688965ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14421 prompt_cache_len:5151 prompt_cache_ratio:0.35718743499063865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 +DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:06 [batch.py:51] router release req id 8 +INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.1081242561340332 s +INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.10961103439331055 s +DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=159104142879330775399206596724855937662, time:1750768746.1479666s req_ids:[8] +DEBUG 06-24 20:39:06 [manager.py:391] +ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:208.47797393798828ms total_cost_time:208.52375030517578ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14422 prompt_cache_len:5151 prompt_cache_ratio:0.3571626681458882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 +DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:06 [batch.py:51] router release req id 8 +INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.10764670372009277 s +INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.10957932472229004 s +DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=151300754131816835222323648214254116460, time:1750768746.3631163s req_ids:[8] +DEBUG 06-24 20:39:06 [manager.py:391] +ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:379.1518211364746ms total_cost_time:379.194974899292ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14423 prompt_cache_len:5151 prompt_cache_ratio:0.3571379047354919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 +DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:06 [batch.py:51] router release req id 8 +INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.10953140258789062 s +INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.11168289184570312 s +DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=326477692332041869549175581309848809659, time:1750768746.749041s req_ids:[8] +DEBUG 06-24 20:39:06 [manager.py:391] +ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:215.31915664672852ms total_cost_time:215.4250144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.10585784912109375ms prompt_token_num:14424 prompt_cache_len:5151 prompt_cache_ratio:0.35711314475873546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 +DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:06 [batch.py:51] router release req id 8 +INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.10836052894592285 s +INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.11048030853271484 s +DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=340033724385825236818916253133711560018, time:1750768746.9826066s req_ids:[8] +DEBUG 06-24 20:39:06 [manager.py:391] +ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:228.03735733032227ms total_cost_time:228.08194160461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14425 prompt_cache_len:5151 prompt_cache_ratio:0.3570883882149047 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 +DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:07 [batch.py:51] router release req id 8 +INFO 06-24 20:39:07 [manager.py:224] router recive req id 8 cost time 0.10976767539978027 s +INFO 06-24 20:39:07 [manager.py:68] detokenization recv req id 8 cost time 0.1117706298828125 s +DEBUG 06-24 20:39:07 [manager.py:391] Prefill Batch: batch_id=221842589995435193335948153450176221740, time:1750768747.2080717s req_ids:[8] +DEBUG 06-24 20:39:07 [manager.py:391] +ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:215.90709686279297ms total_cost_time:215.95048904418945ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14426 prompt_cache_len:5151 prompt_cache_ratio:0.35706363510328576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 +DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:07 [batch.py:51] router release req id 8 +INFO 06-24 20:39:07 [manager.py:224] router recive req id 8 cost time 0.10812735557556152 s +INFO 06-24 20:39:07 [manager.py:68] detokenization recv req id 8 cost time 0.11023116111755371 s +DEBUG 06-24 20:39:07 [manager.py:391] Prefill Batch: batch_id=154421752098875093416761266286156495260, time:1750768747.4419856s req_ids:[8] +DEBUG 06-24 20:39:07 [manager.py:391] +ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:227.41246223449707ms total_cost_time:227.45609283447266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14427 prompt_cache_len:5151 prompt_cache_ratio:0.3570388854231649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 +DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:07 [batch.py:51] router release req id 8 +INFO 06-24 20:39:07 [manager.py:224] router recive req id 8 cost time 0.10860276222229004 s +INFO 06-24 20:39:07 [manager.py:68] detokenization recv req id 8 cost time 0.11073446273803711 s +DEBUG 06-24 20:39:07 [manager.py:391] Prefill Batch: batch_id=253899780095196006271365982139554892785, time:1750768747.6680632s req_ids:[8] +DEBUG 06-24 20:39:07 [manager.py:391] +ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:214.39743041992188ms total_cost_time:214.44249153137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14428 prompt_cache_len:5151 prompt_cache_ratio:0.3570141391738287 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 +DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:07 [batch.py:51] router release req id 8 +INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.3111255168914795 s +INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.31325817108154297 s +DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=71611392071358443447537189801981580060, time:1750768748.097501s req_ids:[8] +DEBUG 06-24 20:39:08 [manager.py:391] +ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:422.5592613220215ms total_cost_time:422.6036071777344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14429 prompt_cache_len:5151 prompt_cache_ratio:0.3569893963545637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 +DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:08 [batch.py:51] router release req id 8 +INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10849213600158691 s +INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.11040425300598145 s +DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=230452138273452295752895771748414479189, time:1750768748.319106s req_ids:[8] +DEBUG 06-24 20:39:08 [manager.py:391] +ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:214.15066719055176ms total_cost_time:214.19286727905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14430 prompt_cache_len:5151 prompt_cache_ratio:0.35696465696465696 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 +DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:08 [batch.py:51] router release req id 8 +INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10764765739440918 s +INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.10969829559326172 s +DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=286586147293116425906714578717497710289, time:1750768748.5391588s req_ids:[8] +DEBUG 06-24 20:39:08 [manager.py:391] +ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:208.94622802734375ms total_cost_time:208.98890495300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14431 prompt_cache_len:5151 prompt_cache_ratio:0.35693992100339544 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 +DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:08 [batch.py:51] router release req id 8 +INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10782408714294434 s +INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.1096489429473877 s +DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=78376289869153816009944014406074588319, time:1750768748.7664132s req_ids:[8] +DEBUG 06-24 20:39:08 [manager.py:391] +ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:227.86402702331543ms total_cost_time:227.9069423675537ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14432 prompt_cache_len:5151 prompt_cache_ratio:0.35691518847006654 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 +DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:08 [batch.py:51] router release req id 8 +INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10904288291931152 s +INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.11111903190612793 s +DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=298660883630620630285118260099780099490, time:1750768748.9916048s req_ids:[8] +DEBUG 06-24 20:39:08 [manager.py:391] +ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:213.93799781799316ms total_cost_time:213.98210525512695ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14433 prompt_cache_len:5151 prompt_cache_ratio:0.3568904593639576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 +DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:09 [batch.py:51] router release req id 8 +INFO 06-24 20:39:09 [manager.py:224] router recive req id 8 cost time 0.1086127758026123 s +INFO 06-24 20:39:09 [manager.py:68] detokenization recv req id 8 cost time 0.11135983467102051 s +DEBUG 06-24 20:39:09 [manager.py:391] Prefill Batch: batch_id=1443158031777165879979305883847184775, time:1750768749.2257204s req_ids:[8] +DEBUG 06-24 20:39:09 [manager.py:391] +ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:392.2431468963623ms total_cost_time:392.2848701477051ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14434 prompt_cache_len:5151 prompt_cache_ratio:0.35686573368435637 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 +DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:09 [batch.py:51] router release req id 8 +INFO 06-24 20:39:09 [manager.py:224] router recive req id 8 cost time 0.10902667045593262 s +INFO 06-24 20:39:09 [manager.py:68] detokenization recv req id 8 cost time 0.11118674278259277 s +DEBUG 06-24 20:39:09 [manager.py:391] Prefill Batch: batch_id=190104925755065930834671279092262651122, time:1750768749.6123288s req_ids:[8] +DEBUG 06-24 20:39:09 [manager.py:391] +ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:211.1339569091797ms total_cost_time:211.17687225341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14435 prompt_cache_len:5151 prompt_cache_ratio:0.35684101143055075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 +DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:09 [batch.py:51] router release req id 8 +INFO 06-24 20:39:09 [manager.py:224] router recive req id 8 cost time 0.10796809196472168 s +INFO 06-24 20:39:09 [manager.py:68] detokenization recv req id 8 cost time 0.10998272895812988 s +DEBUG 06-24 20:39:09 [manager.py:391] Prefill Batch: batch_id=244173667086951092690210849735299183393, time:1750768749.8296163s req_ids:[8] +DEBUG 06-24 20:39:09 [manager.py:391] +DEBUG 06-24 20:39:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 57231.970 tokens/s +DEBUG 06-24 20:39:09 [stats.py:37] Avg prompt tokens throughput: 57224.130 tokens/s +DEBUG 06-24 20:39:09 [stats.py:37] Avg generate tokens throughput: 7.839 tokens/s +ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:214.62416648864746ms total_cost_time:214.66684341430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14436 prompt_cache_len:5151 prompt_cache_ratio:0.3568162926018288 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 +DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:09 [batch.py:51] router release req id 8 +INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.10880231857299805 s +INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.11071205139160156 s +DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=260883252017914314194225568725314386477, time:1750768750.052993s req_ids:[8] +DEBUG 06-24 20:39:10 [manager.py:391] +ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:211.09414100646973ms total_cost_time:211.13824844360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14437 prompt_cache_len:5151 prompt_cache_ratio:0.3567915771974787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 +DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:10 [batch.py:51] router release req id 8 +INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.1077430248260498 s +INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.10978984832763672 s +DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=30057341735574622687923989713725793556, time:1750768750.2697062s req_ids:[8] +DEBUG 06-24 20:39:10 [manager.py:391] +DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:214.2353057861328ms total_cost_time:214.277982711792ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14438 prompt_cache_len:5151 prompt_cache_ratio:0.356766865216789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 +DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:10 [batch.py:51] router release req id 8 +INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.10763359069824219 s +INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.10947728157043457 s +DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=82482135982308747291512840619287918130, time:1750768750.500907s req_ids:[8] +DEBUG 06-24 20:39:10 [manager.py:391] +ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:361.2246513366699ms total_cost_time:361.2701892852783ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14439 prompt_cache_len:5151 prompt_cache_ratio:0.3567421566590484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 +DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:10 [batch.py:51] router release req id 8 +INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.10886287689208984 s +INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.11101961135864258 s +DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=112693203331248789428240721837511549641, time:1750768750.8648474s req_ids:[8] +DEBUG 06-24 20:39:10 [manager.py:391] +ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:219.39373016357422ms total_cost_time:219.45548057556152ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14440 prompt_cache_len:5151 prompt_cache_ratio:0.3567174515235457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 +DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:10 [batch.py:51] router release req id 8 +INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10860919952392578 s +INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.11056971549987793 s +DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=309855515936060889190726296737339254099, time:1750768751.0881581s req_ids:[8] +DEBUG 06-24 20:39:11 [manager.py:391] +ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:215.5773639678955ms total_cost_time:215.6224250793457ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14441 prompt_cache_len:5151 prompt_cache_ratio:0.35669274980957 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 +DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:11 [batch.py:51] router release req id 8 +INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s +INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.11014771461486816 s +DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=256156288801476146607702351379864855225, time:1750768751.3120918s req_ids:[8] +DEBUG 06-24 20:39:11 [manager.py:391] +ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:214.51401710510254ms total_cost_time:214.55979347229004ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14442 prompt_cache_len:5151 prompt_cache_ratio:0.35666805151641046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 +DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:11 [batch.py:51] router release req id 8 +INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10839724540710449 s +INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.11056351661682129 s +DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=172673095492456084099490345031176837323, time:1750768751.5329416s req_ids:[8] +DEBUG 06-24 20:39:11 [manager.py:391] +ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:211.7176055908203ms total_cost_time:211.7598056793213ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14443 prompt_cache_len:5151 prompt_cache_ratio:0.35664335664335667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 +DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:11 [batch.py:51] router release req id 8 +INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10801577568054199 s +INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.1093745231628418 s +DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=266424898381614818363760754332839761071, time:1750768751.7517812s req_ids:[8] +DEBUG 06-24 20:39:11 [manager.py:391] +ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:215.79599380493164ms total_cost_time:215.83819389343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14444 prompt_cache_len:5151 prompt_cache_ratio:0.35661866518969815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 +DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:11 [batch.py:51] router release req id 8 +INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s +INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.1099846363067627 s +DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=151103851337256310303437343954511429710, time:1750768751.9747906s req_ids:[8] +DEBUG 06-24 20:39:11 [manager.py:391] +ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:385.0288391113281ms total_cost_time:385.0724697113037ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14445 prompt_cache_len:5151 prompt_cache_ratio:0.3565939771547248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 +DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:12 [batch.py:51] router release req id 8 +INFO 06-24 20:39:12 [manager.py:224] router recive req id 8 cost time 0.10917997360229492 s +INFO 06-24 20:39:12 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s +DEBUG 06-24 20:39:12 [manager.py:391] Prefill Batch: batch_id=274087782406724443260006128639365559476, time:1750768752.3672307s req_ids:[8] +DEBUG 06-24 20:39:12 [manager.py:391] +ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:214.29967880249023ms total_cost_time:214.34259414672852ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14446 prompt_cache_len:5151 prompt_cache_ratio:0.3565692925377267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 +DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:12 [batch.py:51] router release req id 8 +INFO 06-24 20:39:12 [manager.py:224] router recive req id 8 cost time 0.1090247631072998 s +INFO 06-24 20:39:12 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s +DEBUG 06-24 20:39:12 [manager.py:391] Prefill Batch: batch_id=109643730919017540088511146435171013957, time:1750768752.5883234s req_ids:[8] +DEBUG 06-24 20:39:12 [manager.py:391] +ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:224.89619255065918ms total_cost_time:224.95222091674805ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:14447 prompt_cache_len:5151 prompt_cache_ratio:0.35654461133799403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 +DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:12 [batch.py:51] router release req id 8 +INFO 06-24 20:39:12 [manager.py:224] router recive req id 8 cost time 0.10911035537719727 s +INFO 06-24 20:39:12 [manager.py:68] detokenization recv req id 8 cost time 0.11115527153015137 s +DEBUG 06-24 20:39:12 [manager.py:391] Prefill Batch: batch_id=256413163123853200854585687665482682101, time:1750768752.828008s req_ids:[8] +DEBUG 06-24 20:39:12 [manager.py:391] +ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:224.54094886779785ms total_cost_time:224.58291053771973ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14448 prompt_cache_len:5151 prompt_cache_ratio:0.35651993355481726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 +DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:12 [batch.py:51] router release req id 8 +INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10877442359924316 s +INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.1112830638885498 s +DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=291995886525481649736240854874971782809, time:1750768753.0512106s req_ids:[8] +DEBUG 06-24 20:39:13 [manager.py:391] +ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:212.82696723937988ms total_cost_time:212.86988258361816ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14449 prompt_cache_len:5151 prompt_cache_ratio:0.35649525918748703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 +DEBUG 06-24 20:39:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:13 [batch.py:51] router release req id 8 +INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10786557197570801 s +INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.10991096496582031 s +DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=66047282980062518053997735262163468593, time:1750768753.2698097s req_ids:[8] +DEBUG 06-24 20:39:13 [manager.py:391] +ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:376.97553634643555ms total_cost_time:377.01988220214844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14450 prompt_cache_len:5151 prompt_cache_ratio:0.3564705882352941 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 +DEBUG 06-24 20:39:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:13 [batch.py:51] router release req id 8 +INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10900712013244629 s +INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.1111001968383789 s +DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=48994054095646772868058448545485534491, time:1750768753.652219s req_ids:[8] +DEBUG 06-24 20:39:13 [manager.py:391] +ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:214.30706977844238ms total_cost_time:214.36285972595215ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:14451 prompt_cache_len:5151 prompt_cache_ratio:0.35644592069752956 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 +DEBUG 06-24 20:39:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:13 [batch.py:51] router release req id 8 +INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10796451568603516 s +INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.10991406440734863 s +DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=323433360951768358497787823052859395054, time:1750768753.8873s req_ids:[8] +DEBUG 06-24 20:39:13 [manager.py:391] +ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:232.08975791931152ms total_cost_time:232.13434219360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14452 prompt_cache_len:5151 prompt_cache_ratio:0.3564212565734846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 +DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:14 [batch.py:51] router release req id 8 +INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10915827751159668 s +INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11118769645690918 s +DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=98565531160045885128495239375206938881, time:1750768754.1131608s req_ids:[8] +DEBUG 06-24 20:39:14 [manager.py:391] +ERROR 06-24 20:39:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:215.00802040100098ms total_cost_time:215.04926681518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14453 prompt_cache_len:5151 prompt_cache_ratio:0.3563965958624507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 +DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:14 [batch.py:51] router release req id 8 +INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10883212089538574 s +INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11086821556091309 s +DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=120106942341199154735193975849053555217, time:1750768754.336108s req_ids:[8] +DEBUG 06-24 20:39:14 [manager.py:391] +ERROR 06-24 20:39:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 first_token_cost:216.68624877929688ms total_cost_time:216.72821044921875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14454 prompt_cache_len:5151 prompt_cache_ratio:0.3563719385637194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 +DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:14 [batch.py:51] router release req id 8 +INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10880637168884277 s +INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11074304580688477 s +DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=65038110514663976119154620606364292258, time:1750768754.5582387s req_ids:[8] +DEBUG 06-24 20:39:14 [manager.py:391] +ERROR 06-24 20:39:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 first_token_cost:216.08209609985352ms total_cost_time:216.12548828125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14455 prompt_cache_len:5151 prompt_cache_ratio:0.3563472846765825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 +DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:14 [batch.py:51] router release req id 8 +INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10886025428771973 s +INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11099052429199219 s +DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=6407844347482827244912000717078211352, time:1750768754.7799933s req_ids:[8] +DEBUG 06-24 20:39:14 [manager.py:391] +ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 first_token_cost:375.5013942718506ms total_cost_time:375.54430961608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14456 prompt_cache_len:5151 prompt_cache_ratio:0.35632263420033206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 +DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:15 [batch.py:51] router release req id 8 +INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.1091146469116211 s +INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s +DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=1017323158985812167480360359367676943, time:1750768755.1629627s req_ids:[8] +DEBUG 06-24 20:39:15 [manager.py:391] +ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:218.3675765991211ms total_cost_time:218.4135913848877ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14457 prompt_cache_len:5151 prompt_cache_ratio:0.3562979871342602 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 +DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:15 [batch.py:51] router release req id 8 +INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.10799598693847656 s +INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098940372467041 s +DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=263774758892995599267683342175634349986, time:1750768755.3871503s req_ids:[8] +DEBUG 06-24 20:39:15 [manager.py:391] +ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:215.73233604431152ms total_cost_time:215.77692031860352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14458 prompt_cache_len:5151 prompt_cache_ratio:0.35627334347765943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 +DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:15 [batch.py:51] router release req id 8 +INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.10898470878601074 s +INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.11087298393249512 s +DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=308292539267410881415497870082185736748, time:1750768755.6106734s req_ids:[8] +DEBUG 06-24 20:39:15 [manager.py:391] +ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:216.0813808441162ms total_cost_time:216.1245346069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14459 prompt_cache_len:5151 prompt_cache_ratio:0.35624870322982227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 +DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:15 [batch.py:51] router release req id 8 +INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.10798859596252441 s +INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.10982275009155273 s +DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=421920628723533212835412969761451501, time:1750768755.8362823s req_ids:[8] +DEBUG 06-24 20:39:15 [manager.py:391] +ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:223.01745414733887ms total_cost_time:223.06227684020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14460 prompt_cache_len:5151 prompt_cache_ratio:0.3562240663900415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 +DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:15 [batch.py:51] router release req id 8 +INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.10882163047790527 s +INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.11086869239807129 s +DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=264767345193189309436985202029236393217, time:1750768756.059983s req_ids:[8] +DEBUG 06-24 20:39:16 [manager.py:391] +ERROR 06-24 20:39:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:210.26921272277832ms total_cost_time:210.313081741333ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14461 prompt_cache_len:5151 prompt_cache_ratio:0.35619943295761014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 +DEBUG 06-24 20:39:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:16 [batch.py:51] router release req id 8 +INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.3109748363494873 s +INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.31284093856811523 s +DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=13015321894155355799069032665733173622, time:1750768756.4824874s req_ids:[8] +DEBUG 06-24 20:39:16 [manager.py:391] +ERROR 06-24 20:39:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 first_token_cost:426.21803283691406ms total_cost_time:426.26428604125977ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14462 prompt_cache_len:5151 prompt_cache_ratio:0.3561748029318213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 +DEBUG 06-24 20:39:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:16 [batch.py:51] router release req id 8 +INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.10883188247680664 s +INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.11083984375 s +DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=100510524427873256812126511542235161365, time:1750768756.7100923s req_ids:[8] +DEBUG 06-24 20:39:16 [manager.py:391] +ERROR 06-24 20:39:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 first_token_cost:210.86764335632324ms total_cost_time:210.91032028198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14463 prompt_cache_len:5151 prompt_cache_ratio:0.3561501763119685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 +DEBUG 06-24 20:39:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:16 [batch.py:51] router release req id 8 +INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.10869169235229492 s +INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.11073470115661621 s +DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=170230979480275616630987565844123840856, time:1750768756.9292464s req_ids:[8] +DEBUG 06-24 20:39:16 [manager.py:391] +ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 first_token_cost:212.81957626342773ms total_cost_time:212.86392211914062ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14464 prompt_cache_len:5151 prompt_cache_ratio:0.3561255530973451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 +DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:17 [batch.py:51] router release req id 8 +INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.10896801948547363 s +INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11105155944824219 s +DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=222272595704911621297233237235553650850, time:1750768757.149159s req_ids:[8] +DEBUG 06-24 20:39:17 [manager.py:391] +INFO 06-24 20:39:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:214.89953994750977ms total_cost_time:214.94245529174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14465 prompt_cache_len:5151 prompt_cache_ratio:0.35610093328724507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 +DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:17 [batch.py:51] router release req id 8 +INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.10922884941101074 s +INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11127138137817383 s +DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=238724501656088151327940160845152567660, time:1750768757.3707323s req_ids:[8] +DEBUG 06-24 20:39:17 [manager.py:391] +ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:209.83219146728516ms total_cost_time:209.87629890441895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14466 prompt_cache_len:5151 prompt_cache_ratio:0.35607631688096225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 +DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:17 [batch.py:51] router release req id 8 +INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.1083989143371582 s +INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11044740676879883 s +DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=136571648187754387877585146435720928608, time:1750768757.5987244s req_ids:[8] +DEBUG 06-24 20:39:17 [manager.py:391] +ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:393.1889533996582ms total_cost_time:393.2335376739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14467 prompt_cache_len:5151 prompt_cache_ratio:0.35605170387779084 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 +DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:17 [batch.py:51] router release req id 8 +INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.10904955863952637 s +INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11109352111816406 s +DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=259898436402997137538497358270952818357, time:1750768757.9903495s req_ids:[8] +DEBUG 06-24 20:39:17 [manager.py:391] +ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:217.057466506958ms total_cost_time:217.1010971069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14468 prompt_cache_len:5151 prompt_cache_ratio:0.35602709427702517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 +DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:18 [batch.py:51] router release req id 8 +INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.10904479026794434 s +INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11102581024169922 s +DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=337514285645286034376217235270328430748, time:1750768758.2151718s req_ids:[8] +DEBUG 06-24 20:39:18 [manager.py:391] +DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:213.76276016235352ms total_cost_time:213.8059139251709ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14469 prompt_cache_len:5151 prompt_cache_ratio:0.35600248807795976 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 +DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:18 [batch.py:51] router release req id 8 +INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.10860681533813477 s +INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11061429977416992 s +DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=273196104514867727967211452289001512995, time:1750768758.4364254s req_ids:[8] +DEBUG 06-24 20:39:18 [manager.py:391] +ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:216.07637405395508ms total_cost_time:216.12095832824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14470 prompt_cache_len:5151 prompt_cache_ratio:0.35597788527988944 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 +DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:18 [batch.py:51] router release req id 8 +INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.1085045337677002 s +INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11057353019714355 s +DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=115436287213321250280222773846590966675, time:1750768758.6595058s req_ids:[8] +DEBUG 06-24 20:39:18 [manager.py:391] +ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:211.6549015045166ms total_cost_time:211.6987705230713ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14471 prompt_cache_len:5151 prompt_cache_ratio:0.35595328588210906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 +DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:18 [batch.py:51] router release req id 8 +INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.10863327980041504 s +INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11065244674682617 s +DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=178122349760245250984639058720720836890, time:1750768758.8835607s req_ids:[8] +DEBUG 06-24 20:39:18 [manager.py:391] +ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:219.22874450683594ms total_cost_time:219.3012237548828ms,out_token_counter:1 mean_per_token_cost_time: 0.072479248046875ms prompt_token_num:14472 prompt_cache_len:5151 prompt_cache_ratio:0.35592868988391374 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 +DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:19 [batch.py:51] router release req id 8 +INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.31195855140686035 s +INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.31337499618530273 s +DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=98014658054796709363782094895554895198, time:1750768759.3144581s req_ids:[8] +DEBUG 06-24 20:39:19 [manager.py:391] +ERROR 06-24 20:39:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:428.8802146911621ms total_cost_time:428.9236068725586ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14473 prompt_cache_len:5151 prompt_cache_ratio:0.35590409728459893 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 +DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:19 [batch.py:51] router release req id 8 +INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.10820317268371582 s +INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.10939860343933105 s +DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=257719559702122527960078960437955864387, time:1750768759.5421965s req_ids:[8] +DEBUG 06-24 20:39:19 [manager.py:391] +ERROR 06-24 20:39:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 first_token_cost:215.0125503540039ms total_cost_time:215.05475044250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14474 prompt_cache_len:5151 prompt_cache_ratio:0.35587950808345997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 +DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:19 [batch.py:51] router release req id 8 +INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.10843110084533691 s +INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.11015105247497559 s +DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=60246118414337925335603607700257304160, time:1750768759.7666085s req_ids:[8] +DEBUG 06-24 20:39:19 [manager.py:391] +DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:39:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 56167.408 tokens/s +DEBUG 06-24 20:39:19 [stats.py:37] Avg prompt tokens throughput: 56159.538 tokens/s +DEBUG 06-24 20:39:19 [stats.py:37] Avg generate tokens throughput: 7.869 tokens/s +INFO 06-24 20:39:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 first_token_cost:222.62287139892578ms total_cost_time:222.66745567321777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14475 prompt_cache_len:5151 prompt_cache_ratio:0.35585492227979276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 +DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:19 [batch.py:51] router release req id 8 +INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.10888385772705078 s +INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.11073017120361328 s +DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=179252565023677891973026276626664509295, time:1750768759.991026s req_ids:[8] +DEBUG 06-24 20:39:19 [manager.py:391] +ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 first_token_cost:213.1519317626953ms total_cost_time:213.1948471069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14476 prompt_cache_len:5151 prompt_cache_ratio:0.3558303398728931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 +DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:20 [batch.py:51] router release req id 8 +INFO 06-24 20:39:20 [manager.py:224] router recive req id 8 cost time 0.10851144790649414 s +INFO 06-24 20:39:20 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s +DEBUG 06-24 20:39:20 [manager.py:391] Prefill Batch: batch_id=201257005861463015766489437267586091988, time:1750768760.21063s req_ids:[8] +DEBUG 06-24 20:39:20 [manager.py:391] +ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:212.12124824523926ms total_cost_time:212.16535568237305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14477 prompt_cache_len:5151 prompt_cache_ratio:0.35580576086205706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 +DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:20 [batch.py:51] router release req id 8 +INFO 06-24 20:39:20 [manager.py:224] router recive req id 8 cost time 0.1088254451751709 s +INFO 06-24 20:39:20 [manager.py:68] detokenization recv req id 8 cost time 0.11078906059265137 s +DEBUG 06-24 20:39:20 [manager.py:391] Prefill Batch: batch_id=25304054451728900996853120421279660634, time:1750768760.4298644s req_ids:[8] +DEBUG 06-24 20:39:20 [manager.py:391] +ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:387.5579833984375ms total_cost_time:387.6056671142578ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:14478 prompt_cache_len:5151 prompt_cache_ratio:0.35578118524658103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 +DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:20 [batch.py:51] router release req id 8 +INFO 06-24 20:39:20 [manager.py:224] router recive req id 8 cost time 0.10886669158935547 s +INFO 06-24 20:39:20 [manager.py:68] detokenization recv req id 8 cost time 0.11088371276855469 s +DEBUG 06-24 20:39:20 [manager.py:391] Prefill Batch: batch_id=169420038673990720697478235947123232945, time:1750768760.825021s req_ids:[8] +DEBUG 06-24 20:39:20 [manager.py:391] +ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:215.90566635131836ms total_cost_time:215.94905853271484ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14479 prompt_cache_len:5151 prompt_cache_ratio:0.35575661302576145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 +DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:20 [batch.py:51] router release req id 8 +INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10894370079040527 s +INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.11017441749572754 s +DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=217155677914912462187866787741336137257, time:1750768761.046107s req_ids:[8] +DEBUG 06-24 20:39:21 [manager.py:391] +ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:214.79177474975586ms total_cost_time:214.83421325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14480 prompt_cache_len:5151 prompt_cache_ratio:0.35573204419889504 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 +DEBUG 06-24 20:39:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:21 [batch.py:51] router release req id 8 +INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10792064666748047 s +INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.11001896858215332 s +DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=234360684666352354594063016406278605989, time:1750768761.268018s req_ids:[8] +DEBUG 06-24 20:39:21 [manager.py:391] +ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:216.61615371704102ms total_cost_time:216.6593074798584ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14481 prompt_cache_len:5151 prompt_cache_ratio:0.35570747876527864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 +DEBUG 06-24 20:39:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:21 [batch.py:51] router release req id 8 +INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s +INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.1106865406036377 s +DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=296128557938243823528727482844788021495, time:1750768761.4909992s req_ids:[8] +DEBUG 06-24 20:39:21 [manager.py:391] +ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:214.49995040893555ms total_cost_time:214.54191207885742ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14482 prompt_cache_len:5151 prompt_cache_ratio:0.3556829167242094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 +DEBUG 06-24 20:39:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:21 [batch.py:51] router release req id 8 +INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10888814926147461 s +INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.11092019081115723 s +DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=241497182451478511734090784161542539653, time:1750768761.7125306s req_ids:[8] +DEBUG 06-24 20:39:21 [manager.py:391] +ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:392.2848701477051ms total_cost_time:392.32754707336426ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14483 prompt_cache_len:5151 prompt_cache_ratio:0.35565835807498447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 +DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:22 [batch.py:51] router release req id 8 +INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.11084747314453125 s +INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.1131753921508789 s +DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=219084662789604161303982372905736323637, time:1750768762.1104553s req_ids:[8] +DEBUG 06-24 20:39:22 [manager.py:391] +ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:217.47732162475586ms total_cost_time:217.54217147827148ms,out_token_counter:1 mean_per_token_cost_time: 0.064849853515625ms prompt_token_num:14484 prompt_cache_len:5151 prompt_cache_ratio:0.35563380281690143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 +DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:22 [batch.py:51] router release req id 8 +INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.10907149314880371 s +INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11112284660339355 s +DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=308923397991048543146706306520712943503, time:1750768762.3344615s req_ids:[8] +DEBUG 06-24 20:39:22 [manager.py:391] +ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:212.48412132263184ms total_cost_time:212.54587173461914ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14485 prompt_cache_len:5151 prompt_cache_ratio:0.3556092509492578 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 +DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:22 [batch.py:51] router release req id 8 +INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.10924124717712402 s +INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11118888854980469 s +DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=285278350662507412520499487066676135052, time:1750768762.5550234s req_ids:[8] +DEBUG 06-24 20:39:22 [manager.py:391] +ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:212.61334419250488ms total_cost_time:212.65912055969238ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14486 prompt_cache_len:5151 prompt_cache_ratio:0.35558470247135165 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 +DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:22 [batch.py:51] router release req id 8 +INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.10917043685913086 s +INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s +DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=131928666935576175587118790988534849238, time:1750768762.7738123s req_ids:[8] +DEBUG 06-24 20:39:22 [manager.py:391] +ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:216.1080837249756ms total_cost_time:216.15123748779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14487 prompt_cache_len:5151 prompt_cache_ratio:0.35556015738248087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 +DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:22 [batch.py:51] router release req id 8 +INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.1094210147857666 s +INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11148428916931152 s +DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=58444430159362729819293826139630683712, time:1750768762.9958334s req_ids:[8] +DEBUG 06-24 20:39:22 [manager.py:391] +ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:216.28332138061523ms total_cost_time:216.32623672485352ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14488 prompt_cache_len:5151 prompt_cache_ratio:0.3555356156819437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 +DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:23 [batch.py:51] router release req id 8 +INFO 06-24 20:39:23 [manager.py:224] router recive req id 8 cost time 0.1095418930053711 s +INFO 06-24 20:39:23 [manager.py:68] detokenization recv req id 8 cost time 0.11148405075073242 s +INFO 06-24 20:39:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:39:23 [manager.py:391] Prefill Batch: batch_id=1877656917933041874672586830458644840, time:1750768763.2195485s req_ids:[8] +DEBUG 06-24 20:39:23 [manager.py:391] +ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:378.4351348876953ms total_cost_time:378.4806728363037ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14489 prompt_cache_len:5151 prompt_cache_ratio:0.3555110773690386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 +DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:23 [batch.py:51] router release req id 8 +INFO 06-24 20:39:23 [manager.py:224] router recive req id 8 cost time 0.1092534065246582 s +INFO 06-24 20:39:23 [manager.py:68] detokenization recv req id 8 cost time 0.11136984825134277 s +DEBUG 06-24 20:39:23 [manager.py:391] Prefill Batch: batch_id=248209539323442834005900165792536595107, time:1750768763.6035285s req_ids:[8] +DEBUG 06-24 20:39:23 [manager.py:391] +ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:204.44965362548828ms total_cost_time:204.49423789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14490 prompt_cache_len:5151 prompt_cache_ratio:0.35548654244306416 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 +DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:23 [batch.py:51] router release req id 8 +INFO 06-24 20:39:23 [manager.py:224] router recive req id 8 cost time 0.10872411727905273 s +INFO 06-24 20:39:23 [manager.py:68] detokenization recv req id 8 cost time 0.11073040962219238 s +DEBUG 06-24 20:39:23 [manager.py:391] Prefill Batch: batch_id=21632774937063864744075929724236934948, time:1750768763.8149583s req_ids:[8] +DEBUG 06-24 20:39:23 [manager.py:391] +ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:210.1423740386963ms total_cost_time:210.188627243042ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14491 prompt_cache_len:5151 prompt_cache_ratio:0.3554620109033193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 +DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:23 [batch.py:51] router release req id 8 +INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10911226272583008 s +INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11114215850830078 s +DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=167152514488016462976516411848471273693, time:1750768764.030579s req_ids:[8] +DEBUG 06-24 20:39:24 [manager.py:391] +ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:207.9942226409912ms total_cost_time:208.0378532409668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14492 prompt_cache_len:5151 prompt_cache_ratio:0.355437482749103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 +DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:24 [batch.py:51] router release req id 8 +INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10810327529907227 s +INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11009097099304199 s +DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=35036258650783333892583551499587202222, time:1750768764.2477124s req_ids:[8] +DEBUG 06-24 20:39:24 [manager.py:391] +ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:215.30795097351074ms total_cost_time:215.35086631774902ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14493 prompt_cache_len:5151 prompt_cache_ratio:0.35541295797971434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 +DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:24 [batch.py:51] router release req id 8 +INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10853815078735352 s +INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11057496070861816 s +DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=208865894759764555364853057868817563724, time:1750768764.4678087s req_ids:[8] +DEBUG 06-24 20:39:24 [manager.py:391] +ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:215.3451442718506ms total_cost_time:215.38734436035156ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14494 prompt_cache_len:5151 prompt_cache_ratio:0.35538843659445285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 +DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:24 [batch.py:51] router release req id 8 +INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10835599899291992 s +INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11019539833068848 s +DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=245343045783389094714580991855729431060, time:1750768764.7006857s req_ids:[8] +DEBUG 06-24 20:39:24 [manager.py:391] +ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:360.9781265258789ms total_cost_time:361.0224723815918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14495 prompt_cache_len:5151 prompt_cache_ratio:0.3553639185926181 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 +DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:24 [batch.py:51] router release req id 8 +INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10919857025146484 s +INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11102867126464844 s +DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=277210873355568892756519530403589096244, time:1750768765.0587015s req_ids:[8] +DEBUG 06-24 20:39:25 [manager.py:391] +ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:213.07992935180664ms total_cost_time:213.12260627746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14496 prompt_cache_len:5151 prompt_cache_ratio:0.35533940397350994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 +DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:25 [batch.py:51] router release req id 8 +INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10947728157043457 s +INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11156320571899414 s +DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=122297740152915074659302917997924185362, time:1750768765.2778533s req_ids:[8] +DEBUG 06-24 20:39:25 [manager.py:391] +ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:214.99395370483398ms total_cost_time:215.03615379333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14497 prompt_cache_len:5151 prompt_cache_ratio:0.35531489273642825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 +DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:25 [batch.py:51] router release req id 8 +INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10861420631408691 s +INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s +DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=87520481113193748356896574277585490099, time:1750768765.4995306s req_ids:[8] +DEBUG 06-24 20:39:25 [manager.py:391] +ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:216.05515480041504ms total_cost_time:216.09926223754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14498 prompt_cache_len:5151 prompt_cache_ratio:0.3552903848806732 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 +DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:25 [batch.py:51] router release req id 8 +INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10935544967651367 s +INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.1104888916015625 s +DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=312966144090525822192825665557438298362, time:1750768765.7217438s req_ids:[8] +DEBUG 06-24 20:39:25 [manager.py:391] +ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:213.80209922790527ms total_cost_time:213.84620666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14499 prompt_cache_len:5151 prompt_cache_ratio:0.3552658804055452 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 +DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:25 [batch.py:51] router release req id 8 +INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.11137986183166504 s +DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=58759272044990527095065356310375434187, time:1750768765.9398122s req_ids:[8] +DEBUG 06-24 20:39:25 [manager.py:391] +INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11414933204650879 s +DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:419.07405853271484ms total_cost_time:419.1241264343262ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:14500 prompt_cache_len:5151 prompt_cache_ratio:0.35524137931034483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 +DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:26 [batch.py:51] router release req id 8 +INFO 06-24 20:39:26 [manager.py:224] router recive req id 8 cost time 0.11020922660827637 s +INFO 06-24 20:39:26 [manager.py:68] detokenization recv req id 8 cost time 0.11215019226074219 s +DEBUG 06-24 20:39:26 [manager.py:391] Prefill Batch: batch_id=51893165943518206300431894792615276969, time:1750768766.367028s req_ids:[8] +DEBUG 06-24 20:39:26 [manager.py:391] +ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:219.75994110107422ms total_cost_time:219.8021411895752ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14501 prompt_cache_len:5151 prompt_cache_ratio:0.3552168815943728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 +DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:26 [batch.py:51] router release req id 8 +INFO 06-24 20:39:26 [manager.py:224] router recive req id 8 cost time 0.1084597110748291 s +INFO 06-24 20:39:26 [manager.py:68] detokenization recv req id 8 cost time 0.1102910041809082 s +DEBUG 06-24 20:39:26 [manager.py:391] Prefill Batch: batch_id=99835580582123744120723674369900442802, time:1750768766.5912845s req_ids:[8] +DEBUG 06-24 20:39:26 [manager.py:391] +ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:217.0083522796631ms total_cost_time:217.06438064575195ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:14502 prompt_cache_len:5151 prompt_cache_ratio:0.3551923872569301 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 +DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:26 [batch.py:51] router release req id 8 +INFO 06-24 20:39:26 [manager.py:224] router recive req id 8 cost time 0.10892081260681152 s +INFO 06-24 20:39:26 [manager.py:68] detokenization recv req id 8 cost time 0.11076188087463379 s +DEBUG 06-24 20:39:26 [manager.py:391] Prefill Batch: batch_id=222975783770439133021536474467904369785, time:1750768766.8129175s req_ids:[8] +DEBUG 06-24 20:39:26 [manager.py:391] +ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:214.0481472015381ms total_cost_time:214.09153938293457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14503 prompt_cache_len:5151 prompt_cache_ratio:0.3551678962973178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 +DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:26 [batch.py:51] router release req id 8 +INFO 06-24 20:39:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10914945602416992 s +INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.1109151840209961 s +DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=13356536801042388314544361589311080648, time:1750768767.035596s req_ids:[8] +DEBUG 06-24 20:39:27 [manager.py:391] +ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:215.27504920959473ms total_cost_time:215.32630920410156ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:14504 prompt_cache_len:5151 prompt_cache_ratio:0.3551434087148373 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 +DEBUG 06-24 20:39:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:27 [batch.py:51] router release req id 8 +INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10932779312133789 s +INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.11127853393554688 s +DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=121180605291686933521832715627954682820, time:1750768767.2556894s req_ids:[8] +DEBUG 06-24 20:39:27 [manager.py:391] +ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:214.13493156433105ms total_cost_time:214.17641639709473ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14505 prompt_cache_len:5151 prompt_cache_ratio:0.35511892450879007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 +DEBUG 06-24 20:39:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:27 [batch.py:51] router release req id 8 +INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10916662216186523 s +INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.11140942573547363 s +DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=152699025386797280072989503536975120357, time:1750768767.4784515s req_ids:[8] +DEBUG 06-24 20:39:27 [manager.py:391] +ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:397.97163009643555ms total_cost_time:398.01764488220215ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14506 prompt_cache_len:5151 prompt_cache_ratio:0.35509444367847787 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 +DEBUG 06-24 20:39:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:27 [batch.py:51] router release req id 8 +INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10917139053344727 s +INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.11104202270507812 s +DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=225517323539097838137148115057711582619, time:1750768767.8819711s req_ids:[8] +DEBUG 06-24 20:39:27 [manager.py:391] +ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:214.5991325378418ms total_cost_time:214.64180946350098ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14507 prompt_cache_len:5151 prompt_cache_ratio:0.3550699662232026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 +DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:28 [batch.py:51] router release req id 8 +INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10844016075134277 s +INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s +DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=6713878808218885103510731605048636514, time:1750768768.1038742s req_ids:[8] +DEBUG 06-24 20:39:28 [manager.py:391] +ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:212.92805671691895ms total_cost_time:212.97311782836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14508 prompt_cache_len:5151 prompt_cache_ratio:0.35504549214226633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 +DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:28 [batch.py:51] router release req id 8 +INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10911011695861816 s +INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.1111457347869873 s +DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=42346344243147713077976346209372286639, time:1750768768.3278384s req_ids:[8] +DEBUG 06-24 20:39:28 [manager.py:391] +ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:221.2541103363037ms total_cost_time:221.2989330291748ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14509 prompt_cache_len:5151 prompt_cache_ratio:0.3550210214349714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 +DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:28 [batch.py:51] router release req id 8 +INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10893464088439941 s +INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.11146807670593262 s +DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=220021960560683506433750817331502473825, time:1750768768.5506558s req_ids:[8] +DEBUG 06-24 20:39:28 [manager.py:391] +ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:227.0960807800293ms total_cost_time:227.1406650543213ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14510 prompt_cache_len:5151 prompt_cache_ratio:0.35499655410062025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 +DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:28 [batch.py:51] router release req id 8 +INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10839080810546875 s +INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s +DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=229685299257154220715900711509445511589, time:1750768768.785899s req_ids:[8] +DEBUG 06-24 20:39:28 [manager.py:391] +ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:209.9781036376953ms total_cost_time:210.02483367919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14511 prompt_cache_len:5151 prompt_cache_ratio:0.3549720901385156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 +DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:28 [batch.py:51] router release req id 8 +INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10783004760742188 s +INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.10985660552978516 s +DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=116243069301083513492510742167312509184, time:1750768769.0023446s req_ids:[8] +DEBUG 06-24 20:39:29 [manager.py:391] +ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:381.3645839691162ms total_cost_time:381.4098834991455ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14512 prompt_cache_len:5151 prompt_cache_ratio:0.35494762954796033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 +DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:29 [batch.py:51] router release req id 8 +INFO 06-24 20:39:29 [manager.py:224] router recive req id 8 cost time 0.10873961448669434 s +INFO 06-24 20:39:29 [manager.py:68] detokenization recv req id 8 cost time 0.1107938289642334 s +DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=51957068422808433717595741949994564403, time:1750768769.3901327s req_ids:[8] +DEBUG 06-24 20:39:29 [manager.py:391] +ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:215.7609462738037ms total_cost_time:215.8055305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14513 prompt_cache_len:5151 prompt_cache_ratio:0.35492317232825743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 +DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:29 [batch.py:51] router release req id 8 +INFO 06-24 20:39:29 [manager.py:224] router recive req id 8 cost time 0.10838770866394043 s +INFO 06-24 20:39:29 [manager.py:68] detokenization recv req id 8 cost time 0.11039233207702637 s +DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=92997065188900168108083870791664317073, time:1750768769.6129997s req_ids:[8] +DEBUG 06-24 20:39:29 [manager.py:391] +ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:220.14355659484863ms total_cost_time:220.18671035766602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14514 prompt_cache_len:5151 prompt_cache_ratio:0.3548987184787102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 +DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:29 [batch.py:51] router release req id 8 +INFO 06-24 20:39:29 [manager.py:224] router recive req id 8 cost time 0.10933661460876465 s +INFO 06-24 20:39:29 [manager.py:68] detokenization recv req id 8 cost time 0.11144876480102539 s +DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=21022815083986832922779510185894412257, time:1750768769.8390672s req_ids:[8] +DEBUG 06-24 20:39:29 [manager.py:391] +ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:39:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 57593.660 tokens/s +DEBUG 06-24 20:39:29 [stats.py:37] Avg prompt tokens throughput: 57585.714 tokens/s +DEBUG 06-24 20:39:29 [stats.py:37] Avg generate tokens throughput: 7.945 tokens/s +INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:215.01922607421875ms total_cost_time:215.06333351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14515 prompt_cache_len:5151 prompt_cache_ratio:0.3548742679986221 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 +DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:29 [batch.py:51] router release req id 8 +INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.10903692245483398 s +INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.11097288131713867 s +DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=38548695778031382016589659988168318072, time:1750768770.0609934s req_ids:[8] +DEBUG 06-24 20:39:30 [manager.py:391] +ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:214.7083282470703ms total_cost_time:214.7517204284668ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14516 prompt_cache_len:5151 prompt_cache_ratio:0.3548498208872968 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 +DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:30 [batch.py:51] router release req id 8 +INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.10804271697998047 s +INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s +DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=158478708906950481299914873774314052208, time:1750768770.2822213s req_ids:[8] +DEBUG 06-24 20:39:30 [manager.py:391] +ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:381.4666271209717ms total_cost_time:381.5126419067383ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14517 prompt_cache_len:5151 prompt_cache_ratio:0.354825377144038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 +DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:30 [batch.py:51] router release req id 8 +INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.10880446434020996 s +INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.1097707748413086 s +DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=287697090486576627064814895502678023296, time:1750768770.672578s req_ids:[8] +DEBUG 06-24 20:39:30 [manager.py:391] +ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:174.54957962036133ms total_cost_time:174.59583282470703ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14518 prompt_cache_len:5151 prompt_cache_ratio:0.3548009367681499 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 +DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:30 [batch.py:51] router release req id 8 +INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.1092534065246582 s +INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.11119699478149414 s +DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=88368069443256321029080726696609605266, time:1750768770.8521051s req_ids:[8] +DEBUG 06-24 20:39:30 [manager.py:391] +ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:211.24625205993652ms total_cost_time:211.29131317138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14519 prompt_cache_len:5151 prompt_cache_ratio:0.35477649975893655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 +DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:30 [batch.py:51] router release req id 8 +INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10927438735961914 s +INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11032629013061523 s +DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=162736841805040154597337970261109821179, time:1750768771.0699902s req_ids:[8] +DEBUG 06-24 20:39:31 [manager.py:391] +ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:207.46660232543945ms total_cost_time:207.51237869262695ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14520 prompt_cache_len:5151 prompt_cache_ratio:0.3547520661157025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 +DEBUG 06-24 20:39:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:31 [batch.py:51] router release req id 8 +INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10957741737365723 s +INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11060309410095215 s +DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=203851999625692996963080883550923803114, time:1750768771.2857325s req_ids:[8] +DEBUG 06-24 20:39:31 [manager.py:391] +ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 first_token_cost:211.3487720489502ms total_cost_time:211.3945484161377ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14521 prompt_cache_len:5151 prompt_cache_ratio:0.35472763583775224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 +DEBUG 06-24 20:39:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:31 [batch.py:51] router release req id 8 +INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10896444320678711 s +INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11089015007019043 s +DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=84784081357613295429460316189847027415, time:1750768771.505065s req_ids:[8] +DEBUG 06-24 20:39:31 [manager.py:391] +ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 first_token_cost:215.24930000305176ms total_cost_time:215.29245376586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14522 prompt_cache_len:5151 prompt_cache_ratio:0.35470320892439056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 +DEBUG 06-24 20:39:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:31 [batch.py:51] router release req id 8 +INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10847949981689453 s +INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s +DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=34840033883206875132247591791445968657, time:1750768771.7278214s req_ids:[8] +DEBUG 06-24 20:39:31 [manager.py:391] +ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 first_token_cost:383.66174697875977ms total_cost_time:383.71944427490234ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:14523 prompt_cache_len:5151 prompt_cache_ratio:0.35467878537492253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 +DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:32 [batch.py:51] router release req id 8 +INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10870480537414551 s +INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11017632484436035 s +DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=19578909454828492390384784907658982033, time:1750768772.120981s req_ids:[8] +DEBUG 06-24 20:39:32 [manager.py:391] +ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:206.33625984191895ms total_cost_time:206.38251304626465ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14524 prompt_cache_len:5151 prompt_cache_ratio:0.3546543651886533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 +DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:32 [batch.py:51] router release req id 8 +INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10823655128479004 s +INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11028289794921875 s +DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=253050683166302558214573613985803362243, time:1750768772.3340127s req_ids:[8] +DEBUG 06-24 20:39:32 [manager.py:391] +DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:216.36271476745605ms total_cost_time:216.40753746032715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14525 prompt_cache_len:5151 prompt_cache_ratio:0.35462994836488815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 +DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:32 [batch.py:51] router release req id 8 +INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10906076431274414 s +INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11098599433898926 s +DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=172541633832869620480675002880845347885, time:1750768772.554955s req_ids:[8] +DEBUG 06-24 20:39:32 [manager.py:391] +ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:212.04805374145508ms total_cost_time:212.09239959716797ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14526 prompt_cache_len:5151 prompt_cache_ratio:0.3546055349029327 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 +DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:32 [batch.py:51] router release req id 8 +INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10868120193481445 s +INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11058855056762695 s +DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=116563986190267675180670257853089251592, time:1750768772.7719748s req_ids:[8] +DEBUG 06-24 20:39:32 [manager.py:391] +ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:214.12062644958496ms total_cost_time:214.16807174682617ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:14527 prompt_cache_len:5151 prompt_cache_ratio:0.35458112480209264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 +DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:32 [batch.py:51] router release req id 8 +INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10902786254882812 s +INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11092495918273926 s +DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=337779889701170464176396188361321404737, time:1750768772.9921832s req_ids:[8] +DEBUG 06-24 20:39:32 [manager.py:391] +ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:388.883113861084ms total_cost_time:388.92579078674316ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14528 prompt_cache_len:5151 prompt_cache_ratio:0.354556718061674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 +DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:33 [batch.py:51] router release req id 8 +INFO 06-24 20:39:33 [manager.py:224] router recive req id 8 cost time 0.1095120906829834 s +INFO 06-24 20:39:33 [manager.py:68] detokenization recv req id 8 cost time 0.11142444610595703 s +DEBUG 06-24 20:39:33 [manager.py:391] Prefill Batch: batch_id=115831165849966091983738549977859610938, time:1750768773.3863826s req_ids:[8] +DEBUG 06-24 20:39:33 [manager.py:391] +ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:214.63274955749512ms total_cost_time:214.677095413208ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14529 prompt_cache_len:5151 prompt_cache_ratio:0.35453231468098284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 +DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:33 [batch.py:51] router release req id 8 +INFO 06-24 20:39:33 [manager.py:224] router recive req id 8 cost time 0.10982036590576172 s +INFO 06-24 20:39:33 [manager.py:68] detokenization recv req id 8 cost time 0.11160731315612793 s +DEBUG 06-24 20:39:33 [manager.py:391] Prefill Batch: batch_id=297552670641093857025616635569833848039, time:1750768773.6056292s req_ids:[8] +DEBUG 06-24 20:39:33 [manager.py:391] +ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:213.64545822143555ms total_cost_time:213.67383003234863ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:14530 prompt_cache_len:5151 prompt_cache_ratio:0.3545079146593255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 +DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:33 [batch.py:51] router release req id 8 +INFO 06-24 20:39:33 [manager.py:224] router recive req id 8 cost time 0.1051626205444336 s +INFO 06-24 20:39:33 [manager.py:68] detokenization recv req id 8 cost time 0.10715079307556152 s +DEBUG 06-24 20:39:33 [manager.py:391] Prefill Batch: batch_id=68897729774901835811780633384753556976, time:1750768773.8237085s req_ids:[8] +DEBUG 06-24 20:39:33 [manager.py:391] +ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:216.4022922515869ms total_cost_time:216.4478302001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14531 prompt_cache_len:5151 prompt_cache_ratio:0.3544835179960085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 +DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:33 [batch.py:51] router release req id 8 +INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.10673117637634277 s +INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10866165161132812 s +DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=292428663578407402613213095398339634301, time:1750768774.0457487s req_ids:[8] +DEBUG 06-24 20:39:34 [manager.py:391] +ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:212.34393119812012ms total_cost_time:212.36538887023926ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:14532 prompt_cache_len:5151 prompt_cache_ratio:0.3544591246903386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 +DEBUG 06-24 20:39:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:34 [batch.py:51] router release req id 8 +INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.1070253849029541 s +INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10894083976745605 s +DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=85666927417501075917827027665996698415, time:1750768774.2727563s req_ids:[8] +DEBUG 06-24 20:39:34 [manager.py:391] +ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:224.9915599822998ms total_cost_time:225.0204086303711ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:14533 prompt_cache_len:5151 prompt_cache_ratio:0.3544347347416225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 +DEBUG 06-24 20:39:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:34 [batch.py:51] router release req id 8 +INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.1066436767578125 s +INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10848617553710938 s +DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=48624980663240354968588472068637974651, time:1750768774.4927688s req_ids:[8] +DEBUG 06-24 20:39:34 [manager.py:391] +ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:394.64879035949707ms total_cost_time:394.67811584472656ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:14534 prompt_cache_len:5151 prompt_cache_ratio:0.35441034814916744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 +DEBUG 06-24 20:39:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:34 [batch.py:51] router release req id 8 +INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.10545563697814941 s +INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10740542411804199 s +DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=253663197077762910574318102480921532126, time:1750768774.8926473s req_ids:[8] +DEBUG 06-24 20:39:34 [manager.py:391] +ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:212.15009689331055ms total_cost_time:212.17918395996094ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:14535 prompt_cache_len:5151 prompt_cache_ratio:0.3543859649122807 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 +DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:35 [batch.py:51] router release req id 8 +INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.1065216064453125 s +INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.10839557647705078 s +DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=243621567440008859290270158440564756858, time:1750768775.1073027s req_ids:[8] +DEBUG 06-24 20:39:35 [manager.py:391] +ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:214.9794101715088ms total_cost_time:215.00802040100098ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:14536 prompt_cache_len:5151 prompt_cache_ratio:0.35436158503026965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 +DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:35 [batch.py:51] router release req id 8 +INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10625600814819336 s +INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.1080775260925293 s +DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=265979074091991026862489475995578720631, time:1750768775.3319008s req_ids:[8] +DEBUG 06-24 20:39:35 [manager.py:391] +ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:226.24707221984863ms total_cost_time:226.2728214263916ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14537 prompt_cache_len:5151 prompt_cache_ratio:0.35433720850244205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 +DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:35 [batch.py:51] router release req id 8 +INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10551905632019043 s +INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.10728931427001953 s +DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=79254854132675384673988451049041210039, time:1750768775.556698s req_ids:[8] +DEBUG 06-24 20:39:35 [manager.py:391] +ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:213.33575248718262ms total_cost_time:213.36030960083008ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14538 prompt_cache_len:5151 prompt_cache_ratio:0.35431283532810565 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 +DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:35 [batch.py:51] router release req id 8 +INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10634064674377441 s +INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.10814833641052246 s +DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=93757842686643624768673998178436165449, time:1750768775.7731576s req_ids:[8] +DEBUG 06-24 20:39:35 [manager.py:391] +ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:214.75911140441895ms total_cost_time:214.78676795959473ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14539 prompt_cache_len:5151 prompt_cache_ratio:0.35428846550656856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 +DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:35 [batch.py:51] router release req id 8 +INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10568451881408691 s +INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.1065833568572998 s +DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=202427555565836042505631835410287323896, time:1750768775.9910913s req_ids:[8] +DEBUG 06-24 20:39:35 [manager.py:391] +ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:352.3087501525879ms total_cost_time:352.3366451263428ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:14540 prompt_cache_len:5151 prompt_cache_ratio:0.3542640990371389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 +DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:36 [batch.py:51] router release req id 8 +INFO 06-24 20:39:36 [manager.py:224] router recive req id 8 cost time 0.10631704330444336 s +INFO 06-24 20:39:36 [manager.py:68] detokenization recv req id 8 cost time 0.10827064514160156 s +DEBUG 06-24 20:39:36 [manager.py:391] Prefill Batch: batch_id=171990501095187370170590592793478840113, time:1750768776.3483527s req_ids:[8] +DEBUG 06-24 20:39:36 [manager.py:391] +ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:212.1107578277588ms total_cost_time:212.13555335998535ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:14541 prompt_cache_len:5151 prompt_cache_ratio:0.3542397359191252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 +DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:36 [batch.py:51] router release req id 8 +INFO 06-24 20:39:36 [manager.py:224] router recive req id 8 cost time 0.10549807548522949 s +INFO 06-24 20:39:36 [manager.py:68] detokenization recv req id 8 cost time 0.10733342170715332 s +DEBUG 06-24 20:39:36 [manager.py:391] Prefill Batch: batch_id=43865969775910525812742350957049867530, time:1750768776.5692482s req_ids:[8] +DEBUG 06-24 20:39:36 [manager.py:391] +ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:228.26337814331055ms total_cost_time:228.2886505126953ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14542 prompt_cache_len:5151 prompt_cache_ratio:0.35421537615183607 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 +DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:36 [batch.py:51] router release req id 8 +INFO 06-24 20:39:36 [manager.py:224] router recive req id 8 cost time 0.10509443283081055 s +INFO 06-24 20:39:36 [manager.py:68] detokenization recv req id 8 cost time 0.1069021224975586 s +DEBUG 06-24 20:39:36 [manager.py:391] Prefill Batch: batch_id=58206165729686710022622029361539552525, time:1750768776.799016s req_ids:[8] +DEBUG 06-24 20:39:36 [manager.py:391] +ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:227.1115779876709ms total_cost_time:227.13708877563477ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:14543 prompt_cache_len:5151 prompt_cache_ratio:0.3541910197345802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 +DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:36 [batch.py:51] router release req id 8 +INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.1050422191619873 s +INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.10706543922424316 s +DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=77033703735486625642662111757147313043, time:1750768777.0248942s req_ids:[8] +DEBUG 06-24 20:39:37 [manager.py:391] +ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:215.31176567077637ms total_cost_time:215.33703804016113ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14544 prompt_cache_len:5151 prompt_cache_ratio:0.3541666666666667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 +DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:37 [batch.py:51] router release req id 8 +INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.1050117015838623 s +INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.10679054260253906 s +DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=295670723805656863671268656658320243139, time:1750768777.244879s req_ids:[8] +DEBUG 06-24 20:39:37 [manager.py:391] +ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:216.92347526550293ms total_cost_time:216.9477939605713ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:14545 prompt_cache_len:5151 prompt_cache_ratio:0.3541423169474046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 +DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:37 [batch.py:51] router release req id 8 +INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.10505509376525879 s +INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.10692381858825684 s +DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=308159446360874762038106091714660563488, time:1750768777.4642599s req_ids:[8] +DEBUG 06-24 20:39:37 [manager.py:391] +ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:376.6951560974121ms total_cost_time:376.72924995422363ms,out_token_counter:1 mean_per_token_cost_time: 0.03409385681152344ms prompt_token_num:14546 prompt_cache_len:5151 prompt_cache_ratio:0.3541179705761034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 +DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:37 [batch.py:51] router release req id 8 +INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.10520124435424805 s +INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.1069796085357666 s +DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=189630209730561884198165174909157859004, time:1750768777.843753s req_ids:[8] +DEBUG 06-24 20:39:37 [manager.py:391] +ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:215.43645858764648ms total_cost_time:215.46220779418945ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14547 prompt_cache_len:5151 prompt_cache_ratio:0.3540936275520726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 +DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:37 [batch.py:51] router release req id 8 +INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10455322265625 s +INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.10618925094604492 s +DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=325918525779496039842248217357204321678, time:1750768778.062834s req_ids:[8] +DEBUG 06-24 20:39:38 [manager.py:391] +ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:176.32341384887695ms total_cost_time:176.34892463684082ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:14548 prompt_cache_len:5151 prompt_cache_ratio:0.35406928787462194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 +DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:38 [batch.py:51] router release req id 8 +INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10448503494262695 s +INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.10615873336791992 s +DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=92758111282561674591850410791681163346, time:1750768778.2447345s req_ids:[8] +DEBUG 06-24 20:39:38 [manager.py:391] +ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:218.86348724365234ms total_cost_time:218.92595291137695ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:14549 prompt_cache_len:5151 prompt_cache_ratio:0.35404495154306137 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 +DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:38 [batch.py:51] router release req id 8 +INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10870552062988281 s +INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.11075520515441895 s +DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=322179404023758885546546502674286972741, time:1750768778.4655535s req_ids:[8] +DEBUG 06-24 20:39:38 [manager.py:391] +DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:217.61059761047363ms total_cost_time:217.65470504760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14550 prompt_cache_len:5151 prompt_cache_ratio:0.354020618556701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 +DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:38 [batch.py:51] router release req id 8 +INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10891580581665039 s +INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.11069917678833008 s +DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=267868153342560201599840998098879924559, time:1750768778.688823s req_ids:[8] +DEBUG 06-24 20:39:38 [manager.py:391] +ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:389.43934440612793ms total_cost_time:389.4658088684082ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:14551 prompt_cache_len:5151 prompt_cache_ratio:0.3539962889148512 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 +DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:38 [batch.py:51] router release req id 8 +INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10507512092590332 s +INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.1069638729095459 s +DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=180161131653370043387807789626053008799, time:1750768779.0837405s req_ids:[8] +DEBUG 06-24 20:39:39 [manager.py:391] +ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:218.99151802062988ms total_cost_time:219.01702880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:14552 prompt_cache_len:5151 prompt_cache_ratio:0.35397196261682246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 +DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:39 [batch.py:51] router release req id 8 +INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10482287406921387 s +INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.1066582202911377 s +DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=206215961922434598291205563438507583166, time:1750768779.306101s req_ids:[8] +DEBUG 06-24 20:39:39 [manager.py:391] +ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:216.71390533447266ms total_cost_time:216.73917770385742ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14553 prompt_cache_len:5151 prompt_cache_ratio:0.3539476396619254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 +DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:39 [batch.py:51] router release req id 8 +INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10648703575134277 s +INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.10832786560058594 s +DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=252652394669039347765058144388945946225, time:1750768779.52618s req_ids:[8] +DEBUG 06-24 20:39:39 [manager.py:391] +ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:217.89073944091797ms total_cost_time:217.91529655456543ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14554 prompt_cache_len:5151 prompt_cache_ratio:0.3539233200494709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 +DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:39 [batch.py:51] router release req id 8 +INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10501265525817871 s +INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.10709428787231445 s +DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=287534783340934451281967748198843775954, time:1750768779.7469914s req_ids:[8] +DEBUG 06-24 20:39:39 [manager.py:391] +ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:217.52595901489258ms total_cost_time:217.54956245422363ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:14555 prompt_cache_len:5151 prompt_cache_ratio:0.3538990037787702 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 +DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:39 [batch.py:51] router release req id 8 +INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10512948036193848 s +INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.10709619522094727 s +DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=89778433006741374031564466399037010663, time:1750768779.9680543s req_ids:[8] +DEBUG 06-24 20:39:39 [manager.py:391] +DEBUG 06-24 20:39:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 59424.380 tokens/s +DEBUG 06-24 20:39:39 [stats.py:37] Avg prompt tokens throughput: 59416.304 tokens/s +DEBUG 06-24 20:39:39 [stats.py:37] Avg generate tokens throughput: 8.075 tokens/s +ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:386.9898319244385ms total_cost_time:387.01748847961426ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14556 prompt_cache_len:5151 prompt_cache_ratio:0.35387469084913437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 +DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:40 [batch.py:51] router release req id 8 +INFO 06-24 20:39:40 [manager.py:224] router recive req id 8 cost time 0.10444808006286621 s +INFO 06-24 20:39:40 [manager.py:68] detokenization recv req id 8 cost time 0.10617828369140625 s +DEBUG 06-24 20:39:40 [manager.py:391] Prefill Batch: batch_id=280520322171749294401558454858125420448, time:1750768780.358777s req_ids:[8] +DEBUG 06-24 20:39:40 [manager.py:391] +ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:222.35822677612305ms total_cost_time:222.3799228668213ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:14557 prompt_cache_len:5151 prompt_cache_ratio:0.35385038125987495 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 +DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:40 [batch.py:51] router release req id 8 +INFO 06-24 20:39:40 [manager.py:224] router recive req id 8 cost time 0.10513019561767578 s +INFO 06-24 20:39:40 [manager.py:68] detokenization recv req id 8 cost time 0.10718584060668945 s +DEBUG 06-24 20:39:40 [manager.py:391] Prefill Batch: batch_id=140410259684259293180875717506552278344, time:1750768780.585255s req_ids:[8] +DEBUG 06-24 20:39:40 [manager.py:391] +ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:220.48044204711914ms total_cost_time:220.52669525146484ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14558 prompt_cache_len:5151 prompt_cache_ratio:0.3538260750103036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 +DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:40 [batch.py:51] router release req id 8 +INFO 06-24 20:39:40 [manager.py:224] router recive req id 8 cost time 0.10622835159301758 s +INFO 06-24 20:39:40 [manager.py:68] detokenization recv req id 8 cost time 0.10811924934387207 s +DEBUG 06-24 20:39:40 [manager.py:391] Prefill Batch: batch_id=1653032116612397461388754469803233048, time:1750768780.8078618s req_ids:[8] +DEBUG 06-24 20:39:40 [manager.py:391] +ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:212.60905265808105ms total_cost_time:212.6328945159912ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:14559 prompt_cache_len:5151 prompt_cache_ratio:0.3538017720997321 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 +DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:40 [batch.py:51] router release req id 8 +INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10672831535339355 s +INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.1080923080444336 s +DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=205397277942477784722705214871751204846, time:1750768781.0256853s req_ids:[8] +DEBUG 06-24 20:39:41 [manager.py:391] +ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:215.75617790222168ms total_cost_time:215.80052375793457ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14560 prompt_cache_len:5151 prompt_cache_ratio:0.35377747252747255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 +DEBUG 06-24 20:39:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:41 [batch.py:51] router release req id 8 +INFO 06-24 20:39:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10914158821105957 s +INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.11106038093566895 s +DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=19059869327271715355176767311431732765, time:1750768781.2463067s req_ids:[8] +DEBUG 06-24 20:39:41 [manager.py:391] +ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:217.90361404418945ms total_cost_time:217.94795989990234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14561 prompt_cache_len:5151 prompt_cache_ratio:0.35375317629283703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 +DEBUG 06-24 20:39:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:41 [batch.py:51] router release req id 8 +INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10848593711853027 s +INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.11055159568786621 s +DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=206879657124235317704285691125894303644, time:1750768781.468305s req_ids:[8] +DEBUG 06-24 20:39:41 [manager.py:391] +ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:406.7668914794922ms total_cost_time:406.79264068603516ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14562 prompt_cache_len:5151 prompt_cache_ratio:0.353728883395138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 +DEBUG 06-24 20:39:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:41 [batch.py:51] router release req id 8 +INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10489439964294434 s +INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.10699582099914551 s +DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=309916577608175208869587583860240154216, time:1750768781.8821192s req_ids:[8] +DEBUG 06-24 20:39:41 [manager.py:391] +ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:223.8020896911621ms total_cost_time:223.82593154907227ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:14563 prompt_cache_len:5151 prompt_cache_ratio:0.35370459383368813 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 +DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:42 [batch.py:51] router release req id 8 +INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10525345802307129 s +INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.10729098320007324 s +DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=165710777552318443583147627246047820026, time:1750768782.10833s req_ids:[8] +DEBUG 06-24 20:39:42 [manager.py:391] +ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:222.11003303527832ms total_cost_time:222.1357822418213ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14564 prompt_cache_len:5151 prompt_cache_ratio:0.35368030760780006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 +DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:42 [batch.py:51] router release req id 8 +INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10807037353515625 s +INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.11021828651428223 s +DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=215334853418445634214597553135006690364, time:1750768782.3342974s req_ids:[8] +DEBUG 06-24 20:39:42 [manager.py:391] +ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:216.04275703430176ms total_cost_time:216.06683731079102ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:14565 prompt_cache_len:5151 prompt_cache_ratio:0.3536560247167868 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 +DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:42 [batch.py:51] router release req id 8 +INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10508966445922852 s +INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.10776138305664062 s +DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=334573730246093725421829264613665050079, time:1750768782.5556443s req_ids:[8] +DEBUG 06-24 20:39:42 [manager.py:391] +ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:217.07630157470703ms total_cost_time:217.12088584899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14566 prompt_cache_len:5151 prompt_cache_ratio:0.35363174515996154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 +DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:42 [batch.py:51] router release req id 8 +INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10887002944946289 s +INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.11088919639587402 s +DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=215790762517037872206847644005661357873, time:1750768782.7753825s req_ids:[8] +DEBUG 06-24 20:39:42 [manager.py:391] +ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:213.5601043701172ms total_cost_time:213.60325813293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14567 prompt_cache_len:5151 prompt_cache_ratio:0.3536074689366376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 +DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:42 [batch.py:51] router release req id 8 +INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.1089472770690918 s +INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.11099410057067871 s +DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=68790359951395144961366974045103045021, time:1750768782.9947865s req_ids:[8] +DEBUG 06-24 20:39:42 [manager.py:391] +ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:383.84270668029785ms total_cost_time:383.88776779174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14568 prompt_cache_len:5151 prompt_cache_ratio:0.3535831960461285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 +DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:43 [batch.py:51] router release req id 8 +INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.10820460319519043 s +INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.11005616188049316 s +DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=297256081971149290798006059989384643590, time:1750768783.3827567s req_ids:[8] +DEBUG 06-24 20:39:43 [manager.py:391] +ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:175.62031745910645ms total_cost_time:175.6448745727539ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14569 prompt_cache_len:5151 prompt_cache_ratio:0.35355892648774795 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 +DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:43 [batch.py:51] router release req id 8 +INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.10473084449768066 s +INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.10885357856750488 s +DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=4973619096162654162692915543051061468, time:1750768783.564899s req_ids:[8] +DEBUG 06-24 20:39:43 [manager.py:391] +ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:180.81068992614746ms total_cost_time:180.83524703979492ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14570 prompt_cache_len:5151 prompt_cache_ratio:0.3535346602608099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 +DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:43 [batch.py:51] router release req id 8 +INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.1049654483795166 s +INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.10670328140258789 s +DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=233917476471107642307969767589465740054, time:1750768783.7476735s req_ids:[8] +DEBUG 06-24 20:39:43 [manager.py:391] +ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:213.63592147827148ms total_cost_time:213.66190910339355ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:14571 prompt_cache_len:5151 prompt_cache_ratio:0.35351039736462836 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 +DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:43 [batch.py:51] router release req id 8 +INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.10660719871520996 s +INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.10859799385070801 s +DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=17118165458813347663164568606460948944, time:1750768783.9665282s req_ids:[8] +DEBUG 06-24 20:39:43 [manager.py:391] +ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:220.07036209106445ms total_cost_time:220.1082706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:14572 prompt_cache_len:5151 prompt_cache_ratio:0.35348613779851773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 +DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:44 [batch.py:51] router release req id 8 +INFO 06-24 20:39:44 [manager.py:224] router recive req id 8 cost time 0.10977530479431152 s +INFO 06-24 20:39:44 [manager.py:68] detokenization recv req id 8 cost time 0.1117258071899414 s +DEBUG 06-24 20:39:44 [manager.py:391] Prefill Batch: batch_id=205642516942540850395020645980091111761, time:1750768784.189258s req_ids:[8] +DEBUG 06-24 20:39:44 [manager.py:391] +ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:213.36102485656738ms total_cost_time:213.42897415161133ms,out_token_counter:1 mean_per_token_cost_time: 0.06794929504394531ms prompt_token_num:14573 prompt_cache_len:5151 prompt_cache_ratio:0.35346188156179237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 +DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:44 [batch.py:51] router release req id 8 +INFO 06-24 20:39:44 [manager.py:224] router recive req id 8 cost time 0.10812807083129883 s +INFO 06-24 20:39:44 [manager.py:68] detokenization recv req id 8 cost time 0.11024689674377441 s +DEBUG 06-24 20:39:44 [manager.py:391] Prefill Batch: batch_id=329062553862864902825476588063199503672, time:1750768784.4081151s req_ids:[8] +DEBUG 06-24 20:39:44 [manager.py:391] +ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:379.43553924560547ms total_cost_time:379.4875144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:14574 prompt_cache_len:5151 prompt_cache_ratio:0.353437628653767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 +DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:44 [batch.py:51] router release req id 8 +INFO 06-24 20:39:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:44 [manager.py:224] router recive req id 8 cost time 0.10849857330322266 s +INFO 06-24 20:39:44 [manager.py:68] detokenization recv req id 8 cost time 0.11000704765319824 s +DEBUG 06-24 20:39:44 [manager.py:391] Prefill Batch: batch_id=302621952782947911018666686882542310677, time:1750768784.8011167s req_ids:[8] +DEBUG 06-24 20:39:44 [manager.py:391] +DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:225.12412071228027ms total_cost_time:225.1608371734619ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:14575 prompt_cache_len:5151 prompt_cache_ratio:0.3534133790737564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 +DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:44 [batch.py:51] router release req id 8 +INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10988879203796387 s +INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.11193156242370605 s +DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=265653627118302793511361999638043288970, time:1750768785.0297034s req_ids:[8] +DEBUG 06-24 20:39:45 [manager.py:391] +ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:224.74122047424316ms total_cost_time:224.77221488952637ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:14576 prompt_cache_len:5151 prompt_cache_ratio:0.35338913282107576 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 +DEBUG 06-24 20:39:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:45 [batch.py:51] router release req id 8 +INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10801935195922852 s +INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.10956549644470215 s +DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=253164673824209732774513588948796858866, time:1750768785.2535675s req_ids:[8] +DEBUG 06-24 20:39:45 [manager.py:391] +ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:213.23347091674805ms total_cost_time:213.28210830688477ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:14577 prompt_cache_len:5151 prompt_cache_ratio:0.3533648898950401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 +DEBUG 06-24 20:39:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:45 [batch.py:51] router release req id 8 +INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10946917533874512 s +INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.11107063293457031 s +DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=124452974190543933737609369919558691927, time:1750768785.4730792s req_ids:[8] +DEBUG 06-24 20:39:45 [manager.py:391] +ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:217.16594696044922ms total_cost_time:217.20266342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:14578 prompt_cache_len:5151 prompt_cache_ratio:0.35334065029496503 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 +DEBUG 06-24 20:39:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:45 [batch.py:51] router release req id 8 +INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10924315452575684 s +INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.11134648323059082 s +DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=149675380207269222589362834159669788817, time:1750768785.6982708s req_ids:[8] +DEBUG 06-24 20:39:45 [manager.py:391] +ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:216.72892570495605ms total_cost_time:216.77589416503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14579 prompt_cache_len:5151 prompt_cache_ratio:0.353316414020166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 +DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:46 [batch.py:51] router release req id 8 +INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.3128364086151123 s +INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.3141770362854004 s +DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=301335975606783357699383636055311218219, time:1750768786.127607s req_ids:[8] +DEBUG 06-24 20:39:46 [manager.py:391] +ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:431.80131912231445ms total_cost_time:431.83422088623047ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:14580 prompt_cache_len:5151 prompt_cache_ratio:0.35329218106995885 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 +DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:46 [batch.py:51] router release req id 8 +INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.10882687568664551 s +INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.11079978942871094 s +DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=49483186681568051395164541004042744190, time:1750768786.3661072s req_ids:[8] +DEBUG 06-24 20:39:46 [manager.py:391] +DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:227.33020782470703ms total_cost_time:227.3871898651123ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:14581 prompt_cache_len:5151 prompt_cache_ratio:0.3532679514436596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 +DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:46 [batch.py:51] router release req id 8 +INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.11306214332580566 s +INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.11495161056518555 s +DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=311631563991357654020654465196010331414, time:1750768786.5928771s req_ids:[8] +DEBUG 06-24 20:39:46 [manager.py:391] +ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:217.6809310913086ms total_cost_time:217.7255153656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14582 prompt_cache_len:5151 prompt_cache_ratio:0.3532437251405843 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 +DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:46 [batch.py:51] router release req id 8 +INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.10955119132995605 s +INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.11147761344909668 s +DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=213824827493101668332366572389671179453, time:1750768786.8165715s req_ids:[8] +DEBUG 06-24 20:39:46 [manager.py:391] +ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:215.96574783325195ms total_cost_time:216.01009368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14583 prompt_cache_len:5151 prompt_cache_ratio:0.3532195021600494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 +DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:46 [batch.py:51] router release req id 8 +INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.11057877540588379 s +INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.11251163482666016 s +DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=154832086762027315607523519337500576396, time:1750768787.0391874s req_ids:[8] +DEBUG 06-24 20:39:47 [manager.py:391] +ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:216.10641479492188ms total_cost_time:216.14551544189453ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:14584 prompt_cache_len:5151 prompt_cache_ratio:0.3531952825013714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 +INFO 06-24 20:39:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:47 [batch.py:51] router release req id 8 +INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.21015024185180664 s +INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.2118375301361084 s +DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=269062854948957732017759949002905868460, time:1750768787.364056s req_ids:[8] +DEBUG 06-24 20:39:47 [manager.py:391] +ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:276.2646675109863ms total_cost_time:276.3078212738037ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14585 prompt_cache_len:5151 prompt_cache_ratio:0.353171066163867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 +DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:47 [batch.py:51] router release req id 8 +INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.10839128494262695 s +INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.11011576652526855 s +DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=3413577450046016845705075942239804203, time:1750768787.543885s req_ids:[8] +DEBUG 06-24 20:39:47 [manager.py:391] +ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:211.24768257141113ms total_cost_time:211.29274368286133ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14586 prompt_cache_len:5151 prompt_cache_ratio:0.3531468531468531 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 +DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:47 [batch.py:51] router release req id 8 +INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.10943436622619629 s +INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.1112203598022461 s +DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=216751042116308651620632627734768161934, time:1750768787.761565s req_ids:[8] +DEBUG 06-24 20:39:47 [manager.py:391] +ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:210.3433609008789ms total_cost_time:210.3707790374756ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:14587 prompt_cache_len:5151 prompt_cache_ratio:0.35312264344964694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 +DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:47 [batch.py:51] router release req id 8 +INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.11063313484191895 s +INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.11246490478515625 s +DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=251723794494026504238249637016749788398, time:1750768787.9782412s req_ids:[8] +DEBUG 06-24 20:39:47 [manager.py:391] +ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:213.8216495513916ms total_cost_time:213.87910842895508ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:14588 prompt_cache_len:5151 prompt_cache_ratio:0.3530984370715657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 +DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:48 [batch.py:51] router release req id 8 +INFO 06-24 20:39:48 [manager.py:224] router recive req id 8 cost time 0.11202597618103027 s +INFO 06-24 20:39:48 [manager.py:68] detokenization recv req id 8 cost time 0.11405372619628906 s +DEBUG 06-24 20:39:48 [manager.py:391] Prefill Batch: batch_id=338120759683938438999969171600705157505, time:1750768788.2309172s req_ids:[8] +DEBUG 06-24 20:39:48 [manager.py:391] +ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:256.55293464660645ms total_cost_time:256.59871101379395ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14589 prompt_cache_len:5151 prompt_cache_ratio:0.3530742340119268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 +DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:48 [batch.py:51] router release req id 8 +INFO 06-24 20:39:48 [manager.py:224] router recive req id 8 cost time 0.10900354385375977 s +INFO 06-24 20:39:48 [manager.py:68] detokenization recv req id 8 cost time 0.1109468936920166 s +DEBUG 06-24 20:39:48 [manager.py:391] Prefill Batch: batch_id=143278051472972091966221136975172002495, time:1750768788.4723651s req_ids:[8] +DEBUG 06-24 20:39:48 [manager.py:391] +ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:390.85888862609863ms total_cost_time:390.9034729003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14590 prompt_cache_len:5151 prompt_cache_ratio:0.353050034270048 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 +DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:48 [batch.py:51] router release req id 8 +INFO 06-24 20:39:48 [manager.py:224] router recive req id 8 cost time 0.10914468765258789 s +INFO 06-24 20:39:48 [manager.py:68] detokenization recv req id 8 cost time 0.11065673828125 s +DEBUG 06-24 20:39:48 [manager.py:391] Prefill Batch: batch_id=171757553372950164812274981303353789178, time:1750768788.8605888s req_ids:[8] +DEBUG 06-24 20:39:48 [manager.py:391] +ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:217.6070213317871ms total_cost_time:217.6504135131836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14591 prompt_cache_len:5151 prompt_cache_ratio:0.35302583784524705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 +DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:48 [batch.py:51] router release req id 8 +INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10980415344238281 s +INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11185383796691895 s +DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=147303899395693450157815051491875240325, time:1750768789.0860713s req_ids:[8] +DEBUG 06-24 20:39:49 [manager.py:391] +ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:220.31641006469727ms total_cost_time:220.36266326904297ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14592 prompt_cache_len:5151 prompt_cache_ratio:0.3530016447368421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 +DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:49 [batch.py:51] router release req id 8 +INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10911941528320312 s +INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11120724678039551 s +DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=103152850691057898636715907070306923752, time:1750768789.310929s req_ids:[8] +DEBUG 06-24 20:39:49 [manager.py:391] +ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:218.36280822753906ms total_cost_time:218.40691566467285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14593 prompt_cache_len:5151 prompt_cache_ratio:0.3529774549441513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 +DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:49 [batch.py:51] router release req id 8 +DEBUG 06-24 20:39:49 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:49 [manager.py:283] +DEBUG 06-24 20:39:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:49 [manager.py:284] +INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10927987098693848 s +INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11118841171264648 s +DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=35927082414003202068380400190757305817, time:1750768789.536831s req_ids:[8] +DEBUG 06-24 20:39:49 [manager.py:391] +ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:213.1800651550293ms total_cost_time:213.22226524353027ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14594 prompt_cache_len:5151 prompt_cache_ratio:0.35295326846649305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 +DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:49 [batch.py:51] router release req id 8 +INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10883259773254395 s +INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11100125312805176 s +DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=282044102051602559175650873271267029671, time:1750768789.7535982s req_ids:[8] +DEBUG 06-24 20:39:49 [manager.py:391] +ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:216.6440486907959ms total_cost_time:216.68505668640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14595 prompt_cache_len:5151 prompt_cache_ratio:0.352929085303186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 +DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:49 [batch.py:51] router release req id 8 +INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10870146751403809 s +INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11076021194458008 s +DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=218153792189841875753775991752451270359, time:1750768789.976416s req_ids:[8] +DEBUG 06-24 20:39:49 [manager.py:391] +DEBUG 06-24 20:39:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 58264.955 tokens/s +DEBUG 06-24 20:39:49 [stats.py:37] Avg prompt tokens throughput: 58256.962 tokens/s +DEBUG 06-24 20:39:49 [stats.py:37] Avg generate tokens throughput: 7.993 tokens/s +ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:368.6525821685791ms total_cost_time:368.6971664428711ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14596 prompt_cache_len:5151 prompt_cache_ratio:0.3529049054535489 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 +DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:50 [batch.py:51] router release req id 8 +INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.10863208770751953 s +INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.11052775382995605 s +DEBUG 06-24 20:39:50 [manager.py:391] Prefill Batch: batch_id=18352547525446340012965043078790008012, time:1750768790.3512247s req_ids:[8] +DEBUG 06-24 20:39:50 [manager.py:391] +ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:216.1552906036377ms total_cost_time:216.19820594787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14597 prompt_cache_len:5151 prompt_cache_ratio:0.35288072891690075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 +DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:50 [batch.py:51] router release req id 8 +INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.10856080055236816 s +INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.11051297187805176 s +DEBUG 06-24 20:39:50 [manager.py:391] Prefill Batch: batch_id=10695622368058238099645858113850432297, time:1750768790.5725007s req_ids:[8] +DEBUG 06-24 20:39:50 [manager.py:391] +ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:214.4770622253418ms total_cost_time:214.5214080810547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14598 prompt_cache_len:5151 prompt_cache_ratio:0.35285655569256064 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 +DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:50 [batch.py:51] router release req id 8 +INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.10753059387207031 s +INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.10928916931152344 s +DEBUG 06-24 20:39:50 [manager.py:391] Prefill Batch: batch_id=218508103209010209395421857556803296197, time:1750768790.7908895s req_ids:[8] +DEBUG 06-24 20:39:50 [manager.py:391] +ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:207.0603370666504ms total_cost_time:207.10253715515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14599 prompt_cache_len:5151 prompt_cache_ratio:0.35283238577984793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 +DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:50 [batch.py:51] router release req id 8 +INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.11075448989868164 s +INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.1125950813293457 s +DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=32079312299270332517646655638123830093, time:1750768791.0045545s req_ids:[8] +DEBUG 06-24 20:39:51 [manager.py:391] +DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:211.81249618530273ms total_cost_time:211.85803413391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14600 prompt_cache_len:5151 prompt_cache_ratio:0.3528082191780822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 +DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:51 [batch.py:51] router release req id 8 +INFO 06-24 20:39:51 [manager.py:224] router recive req id 8 cost time 0.10862302780151367 s +INFO 06-24 20:39:51 [manager.py:68] detokenization recv req id 8 cost time 0.11037230491638184 s +DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=72107015934581796145915057026780154410, time:1750768791.2195976s req_ids:[8] +DEBUG 06-24 20:39:51 [manager.py:391] +ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:380.9068202972412ms total_cost_time:380.9514045715332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14601 prompt_cache_len:5151 prompt_cache_ratio:0.3527840558865831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 +DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:51 [batch.py:51] router release req id 8 +INFO 06-24 20:39:51 [manager.py:224] router recive req id 8 cost time 0.10866665840148926 s +INFO 06-24 20:39:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000609397888184 s +DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=95970877254586506240287575455117919632, time:1750768791.6071267s req_ids:[8] +DEBUG 06-24 20:39:51 [manager.py:391] +ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:216.994047164917ms total_cost_time:217.02194213867188ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:14602 prompt_cache_len:5151 prompt_cache_ratio:0.3527598959046706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 +DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:51 [batch.py:51] router release req id 8 +INFO 06-24 20:39:51 [manager.py:224] router recive req id 8 cost time 0.10620427131652832 s +INFO 06-24 20:39:51 [manager.py:68] detokenization recv req id 8 cost time 0.10823392868041992 s +DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=317265843642154283015348988840065746539, time:1750768791.833168s req_ids:[8] +DEBUG 06-24 20:39:51 [manager.py:391] +ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:222.67746925354004ms total_cost_time:222.7010726928711ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:14603 prompt_cache_len:5151 prompt_cache_ratio:0.35273573923166474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 +DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:51 [batch.py:51] router release req id 8 +INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10722732543945312 s +INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10929679870605469 s +DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=136128606129125989612163793498456469184, time:1750768792.0681968s req_ids:[8] +DEBUG 06-24 20:39:52 [manager.py:391] +ERROR 06-24 20:39:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:225.9359359741211ms total_cost_time:225.9809970855713ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14604 prompt_cache_len:5151 prompt_cache_ratio:0.3527115858668858 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 +DEBUG 06-24 20:39:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:52 [batch.py:51] router release req id 8 +INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10843300819396973 s +INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10958266258239746 s +DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=127430898602860975647392876129355729424, time:1750768792.2916193s req_ids:[8] +DEBUG 06-24 20:39:52 [manager.py:391] +ERROR 06-24 20:39:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 first_token_cost:215.73829650878906ms total_cost_time:215.76619148254395ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:14605 prompt_cache_len:5151 prompt_cache_ratio:0.3526874358096542 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 +DEBUG 06-24 20:39:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:52 [batch.py:51] router release req id 8 +INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10626411437988281 s +INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10811591148376465 s +DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=340033590239168107458885665131937223048, time:1750768792.5128484s req_ids:[8] +DEBUG 06-24 20:39:52 [manager.py:391] +ERROR 06-24 20:39:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 first_token_cost:218.78623962402344ms total_cost_time:218.8122272491455ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:14606 prompt_cache_len:5151 prompt_cache_ratio:0.35266328905929073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 +DEBUG 06-24 20:39:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:52 [batch.py:51] router release req id 8 +INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10608458518981934 s +INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10817193984985352 s +DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=200743274537124578205950152583811654672, time:1750768792.7434688s req_ids:[8] +DEBUG 06-24 20:39:52 [manager.py:391] +ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 first_token_cost:396.587610244751ms total_cost_time:396.6174125671387ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:14607 prompt_cache_len:5151 prompt_cache_ratio:0.35263914561511606 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 +DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:53 [batch.py:51] router release req id 8 +INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10672283172607422 s +INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.10876917839050293 s +DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=301483937968471526072041871744558342363, time:1750768793.141431s req_ids:[8] +DEBUG 06-24 20:39:53 [manager.py:391] +INFO 06-24 20:39:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:39:53 [statics_utils.py:24] mean first cost: 232.2300645177751 ms +INFO 06-24 20:39:53 [statics_utils.py:24] mean per token cost: 0.057852640866821445 ms +ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:220.24774551391602ms total_cost_time:220.2754020690918ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14608 prompt_cache_len:5151 prompt_cache_ratio:0.35261500547645125 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 +DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:53 [batch.py:51] router release req id 8 +INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10716056823730469 s +INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.11010479927062988 s +DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=312784204664014756927534557653319251439, time:1750768793.366108s req_ids:[8] +DEBUG 06-24 20:39:53 [manager.py:391] +ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:217.6227569580078ms total_cost_time:217.6513671875ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:14609 prompt_cache_len:5151 prompt_cache_ratio:0.35259086864261757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 +DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:53 [batch.py:51] router release req id 8 +INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10644745826721191 s +INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.10797619819641113 s +DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=128547841836206815438437644166158978975, time:1750768793.588976s req_ids:[8] +DEBUG 06-24 20:39:53 [manager.py:391] +ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:213.85979652404785ms total_cost_time:213.88864517211914ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:14610 prompt_cache_len:5151 prompt_cache_ratio:0.35256673511293635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 +DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:53 [batch.py:51] router release req id 8 +INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10611128807067871 s +INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.10823369026184082 s +DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=315830407126642505896334383469077725843, time:1750768793.8071468s req_ids:[8] +DEBUG 06-24 20:39:53 [manager.py:391] +ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:217.33975410461426ms total_cost_time:217.36812591552734ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:14611 prompt_cache_len:5151 prompt_cache_ratio:0.35254260488672917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 +DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:53 [batch.py:51] router release req id 8 +INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.10588216781616211 s +INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10793089866638184 s +DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=251621967783032822995151232132850052759, time:1750768794.0308383s req_ids:[8] +DEBUG 06-24 20:39:54 [manager.py:391] +ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:214.91217613220215ms total_cost_time:214.9367332458496ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14612 prompt_cache_len:5151 prompt_cache_ratio:0.3525184779633178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 +DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:54 [batch.py:51] router release req id 8 +INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.1060643196105957 s +INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10789370536804199 s +DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=54683609956904701781389237146703411612, time:1750768794.2510002s req_ids:[8] +DEBUG 06-24 20:39:54 [manager.py:391] +ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:347.46575355529785ms total_cost_time:347.49484062194824ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:14613 prompt_cache_len:5151 prompt_cache_ratio:0.35249435434202425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 +DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:54 [batch.py:51] router release req id 8 +INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.10524249076843262 s +INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10705113410949707 s +DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=6342112528025415781306804597996030509, time:1750768794.606074s req_ids:[8] +DEBUG 06-24 20:39:54 [manager.py:391] +ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:217.94795989990234ms total_cost_time:217.97537803649902ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:14614 prompt_cache_len:5151 prompt_cache_ratio:0.35247023402217054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 +DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:54 [batch.py:51] router release req id 8 +INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.10634374618530273 s +INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10817933082580566 s +DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=217464734558654374031099404067429390308, time:1750768794.8369815s req_ids:[8] +DEBUG 06-24 20:39:54 [manager.py:391] +ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:229.18248176574707ms total_cost_time:229.21013832092285ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14615 prompt_cache_len:5151 prompt_cache_ratio:0.35244611700307904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 +DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:54 [batch.py:51] router release req id 8 +INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10702347755432129 s +INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.1091005802154541 s +DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=223676207967032390637801055791797868217, time:1750768795.0742579s req_ids:[8] +DEBUG 06-24 20:39:55 [manager.py:391] +ERROR 06-24 20:39:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:231.34779930114746ms total_cost_time:231.37474060058594ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:14616 prompt_cache_len:5151 prompt_cache_ratio:0.35242200328407225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 +DEBUG 06-24 20:39:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:55 [batch.py:51] router release req id 8 +INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10688185691833496 s +INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.10891556739807129 s +DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=180004751886272089253343028418972066478, time:1750768795.3162808s req_ids:[8] +DEBUG 06-24 20:39:55 [manager.py:391] +ERROR 06-24 20:39:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 first_token_cost:238.39902877807617ms total_cost_time:238.42740058898926ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:14617 prompt_cache_len:5151 prompt_cache_ratio:0.3523978928644729 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 +DEBUG 06-24 20:39:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:55 [batch.py:51] router release req id 8 +INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10786652565002441 s +INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.10989022254943848 s +DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=142714408361736700852275728377013525592, time:1750768795.5545967s req_ids:[8] +DEBUG 06-24 20:39:55 [manager.py:391] +ERROR 06-24 20:39:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 first_token_cost:405.1780700683594ms total_cost_time:405.20501136779785ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:14618 prompt_cache_len:5151 prompt_cache_ratio:0.3523737857436038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 +DEBUG 06-24 20:39:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:55 [batch.py:51] router release req id 8 +INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10633182525634766 s +INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.1082770824432373 s +DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=182161739474111145085642832283242148312, time:1750768795.9641972s req_ids:[8] +DEBUG 06-24 20:39:55 [manager.py:391] +ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 first_token_cost:232.13481903076172ms total_cost_time:232.1641445159912ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:14619 prompt_cache_len:5151 prompt_cache_ratio:0.352349681920788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 +DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:56 [batch.py:51] router release req id 8 +INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.10519170761108398 s +INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10738849639892578 s +DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=285106244454682188540343192740695780505, time:1750768796.1957314s req_ids:[8] +DEBUG 06-24 20:39:56 [manager.py:391] +ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:223.36626052856445ms total_cost_time:223.39487075805664ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:14620 prompt_cache_len:5151 prompt_cache_ratio:0.35232558139534886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 +DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:56 [batch.py:51] router release req id 8 +INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.10544943809509277 s +INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10754656791687012 s +DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=176315020001638055913747257223346552792, time:1750768796.4222822s req_ids:[8] +DEBUG 06-24 20:39:56 [manager.py:391] +ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:218.75333786010742ms total_cost_time:218.77813339233398ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:14621 prompt_cache_len:5151 prompt_cache_ratio:0.35230148416660967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 +DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:56 [batch.py:51] router release req id 8 +INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.1060781478881836 s +INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10825228691101074 s +DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=298162584713200607091534333393971425933, time:1750768796.6455512s req_ids:[8] +DEBUG 06-24 20:39:56 [manager.py:391] +ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:223.16384315490723ms total_cost_time:223.19436073303223ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:14622 prompt_cache_len:5151 prompt_cache_ratio:0.35227739023389415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 +DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:56 [batch.py:51] router release req id 8 +INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.10529351234436035 s +INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10734963417053223 s +DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=91351218140577636045305557247925479091, time:1750768796.870546s req_ids:[8] +DEBUG 06-24 20:39:56 [manager.py:391] +ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:215.67440032958984ms total_cost_time:215.70181846618652ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:14623 prompt_cache_len:5151 prompt_cache_ratio:0.352253299596526 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 +DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:57 [batch.py:51] router release req id 8 +INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10541558265686035 s +INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.10755062103271484 s +DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=28915172066466887270575219324325629351, time:1750768797.0898035s req_ids:[8] +DEBUG 06-24 20:39:57 [manager.py:391] +ERROR 06-24 20:39:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:375.9632110595703ms total_cost_time:375.98514556884766ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:14624 prompt_cache_len:5151 prompt_cache_ratio:0.3522292122538293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 +DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:57 [batch.py:51] router release req id 8 +INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10532641410827637 s +INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.1074821949005127 s +DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=269008974523881689285635085617253543697, time:1750768797.476191s req_ids:[8] +DEBUG 06-24 20:39:57 [manager.py:391] +DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:39:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 first_token_cost:218.9481258392334ms total_cost_time:218.97315979003906ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:14625 prompt_cache_len:5151 prompt_cache_ratio:0.3522051282051282 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 +DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:57 [batch.py:51] router release req id 8 +INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10531425476074219 s +INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.10724377632141113 s +DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=295450407924368829242366151270423258738, time:1750768797.6956959s req_ids:[8] +DEBUG 06-24 20:39:57 [manager.py:391] +ERROR 06-24 20:39:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 first_token_cost:218.78290176391602ms total_cost_time:218.80769729614258ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:14626 prompt_cache_len:5151 prompt_cache_ratio:0.35218104744974704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 +DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:57 [batch.py:51] router release req id 8 +INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10580563545227051 s +INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.10789370536804199 s +DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=57912617319386578300860825420687098809, time:1750768797.92045s req_ids:[8] +DEBUG 06-24 20:39:57 [manager.py:391] +ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 first_token_cost:215.4548168182373ms total_cost_time:215.47842025756836ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:14627 prompt_cache_len:5151 prompt_cache_ratio:0.35215696998701035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 +DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:58 [batch.py:51] router release req id 8 +INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.10486388206481934 s +INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.1068871021270752 s +DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=79918718710762034017296853980517708040, time:1750768798.1405146s req_ids:[8] +DEBUG 06-24 20:39:58 [manager.py:391] +ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:219.27380561828613ms total_cost_time:219.2990779876709ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14628 prompt_cache_len:5151 prompt_cache_ratio:0.3521328958162428 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 +DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:58 [batch.py:51] router release req id 8 +INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.1052238941192627 s +INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.10716032981872559 s +DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=296736462963503044587177980886388491731, time:1750768798.3650537s req_ids:[8] +DEBUG 06-24 20:39:58 [manager.py:391] +ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:385.53762435913086ms total_cost_time:385.56361198425293ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:14629 prompt_cache_len:5151 prompt_cache_ratio:0.3521088249367694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 +DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:58 [batch.py:51] router release req id 8 +INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.10832524299621582 s +INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.1101837158203125 s +DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=127522375407686513081918796752475705359, time:1750768798.7554176s req_ids:[8] +DEBUG 06-24 20:39:58 [manager.py:391] +ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:197.36433029174805ms total_cost_time:197.4325180053711ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:14630 prompt_cache_len:5151 prompt_cache_ratio:0.3520847573479152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 +DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:58 [batch.py:51] router release req id 8 +INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.10802865028381348 s +INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.11001706123352051 s +DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=285093775969350845142784214862407191997, time:1750768798.957246s req_ids:[8] +DEBUG 06-24 20:39:58 [manager.py:391] +ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:215.71826934814453ms total_cost_time:215.76213836669922ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14631 prompt_cache_len:5151 prompt_cache_ratio:0.35206069304900556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 +DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:59 [batch.py:51] router release req id 8 +INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.10884499549865723 s +INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.11090373992919922 s +DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=70506625319648077842224250376179252301, time:1750768799.1885874s req_ids:[8] +DEBUG 06-24 20:39:59 [manager.py:391] +ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:228.3337116241455ms total_cost_time:228.35540771484375ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:14632 prompt_cache_len:5151 prompt_cache_ratio:0.35203663203936575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 +DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:59 [batch.py:51] router release req id 8 +INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.10346317291259766 s +INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.10498213768005371 s +DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=188348915149361742991505328604343062615, time:1750768799.4199874s req_ids:[8] +DEBUG 06-24 20:39:59 [manager.py:391] +ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:187.75439262390137ms total_cost_time:187.81089782714844ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:14633 prompt_cache_len:5151 prompt_cache_ratio:0.3520125743183216 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 +DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:59 [batch.py:51] router release req id 8 +INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.10652446746826172 s +INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.10769200325012207 s +DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=40039371805730158033001495857627358933, time:1750768799.6072655s req_ids:[8] +DEBUG 06-24 20:39:59 [manager.py:391] +ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:180.33838272094727ms total_cost_time:180.38320541381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14634 prompt_cache_len:5151 prompt_cache_ratio:0.35198851988519886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 +DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:39:59 [batch.py:51] router release req id 8 +INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.1085507869720459 s +INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.11055135726928711 s +DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=298133020760754316039800047710127863655, time:1750768799.7993019s req_ids:[8] +DEBUG 06-24 20:39:59 [manager.py:391] +ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:40:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 56632.024 tokens/s +DEBUG 06-24 20:40:00 [stats.py:37] Avg prompt tokens throughput: 56624.177 tokens/s +DEBUG 06-24 20:40:00 [stats.py:37] Avg generate tokens throughput: 7.848 tokens/s +INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:365.6184673309326ms total_cost_time:365.6766414642334ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:14635 prompt_cache_len:5151 prompt_cache_ratio:0.3519644687393235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 +DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:00 [batch.py:51] router release req id 8 +INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.11114764213562012 s +INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.11406183242797852 s +DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=283671386727694669175934384718867031338, time:1750768800.1677294s req_ids:[8] +DEBUG 06-24 20:40:00 [manager.py:391] +ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:241.8069839477539ms total_cost_time:241.8651580810547ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:14636 prompt_cache_len:5151 prompt_cache_ratio:0.35194042088002186 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 +DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:00 [batch.py:51] router release req id 8 +INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.10748553276062012 s +INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.1104285717010498 s +DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=78423654741865273524398752190596282397, time:1750768800.4166522s req_ids:[8] +DEBUG 06-24 20:40:00 [manager.py:391] +ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:203.91416549682617ms total_cost_time:203.96065711975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14637 prompt_cache_len:5151 prompt_cache_ratio:0.3519163763066202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 +DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:00 [batch.py:51] router release req id 8 +INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.10904598236083984 s +INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.11102771759033203 s +DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=101497246974836572241333421686202404036, time:1750768800.6269672s req_ids:[8] +DEBUG 06-24 20:40:00 [manager.py:391] +ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:219.27118301391602ms total_cost_time:219.315767288208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14638 prompt_cache_len:5151 prompt_cache_ratio:0.35189233501844513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 +DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:00 [batch.py:51] router release req id 8 +INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.10834193229675293 s +INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.11030030250549316 s +DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=252977016086248619614814875287801501198, time:1750768800.850858s req_ids:[8] +DEBUG 06-24 20:40:00 [manager.py:391] +ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:210.9367847442627ms total_cost_time:210.98041534423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14639 prompt_cache_len:5151 prompt_cache_ratio:0.3518682970148234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 +DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:00 [batch.py:51] router release req id 8 +INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10794258117675781 s +INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.11029672622680664 s +DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=75145028806095838917535046627860976429, time:1750768801.0772765s req_ids:[8] +DEBUG 06-24 20:40:01 [manager.py:391] +ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:203.19271087646484ms total_cost_time:203.23705673217773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14640 prompt_cache_len:5151 prompt_cache_ratio:0.351844262295082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 +DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:01 [batch.py:51] router release req id 8 +INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10883426666259766 s +INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.11145734786987305 s +DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=29425647876760266472668248317845526836, time:1750768801.290563s req_ids:[8] +DEBUG 06-24 20:40:01 [manager.py:391] +ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:415.50731658935547ms total_cost_time:415.55190086364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14641 prompt_cache_len:5151 prompt_cache_ratio:0.3518202308585479 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 +DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:01 [batch.py:51] router release req id 8 +INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10839319229125977 s +INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.11055612564086914 s +DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=31075885119344961701866495387885794661, time:1750768801.7093735s req_ids:[8] +DEBUG 06-24 20:40:01 [manager.py:391] +ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:191.0409927368164ms total_cost_time:191.0860538482666ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14642 prompt_cache_len:5151 prompt_cache_ratio:0.35179620270454853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 +DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:01 [batch.py:51] router release req id 8 +INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10827875137329102 s +INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.1100931167602539 s +DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=310405396197711528359347490688022475197, time:1750768801.899445s req_ids:[8] +DEBUG 06-24 20:40:01 [manager.py:391] +ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:179.06594276428223ms total_cost_time:179.11577224731445ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:14643 prompt_cache_len:5151 prompt_cache_ratio:0.3517721778324114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 +DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:01 [batch.py:51] router release req id 8 +INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10940814018249512 s +INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.11149954795837402 s +DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=38190377739000897392707002250125989748, time:1750768802.0863044s req_ids:[8] +DEBUG 06-24 20:40:02 [manager.py:391] +ERROR 06-24 20:40:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:239.7899627685547ms total_cost_time:239.837646484375ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:14644 prompt_cache_len:5151 prompt_cache_ratio:0.3517481562414641 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 +DEBUG 06-24 20:40:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:02 [batch.py:51] router release req id 8 +INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10821080207824707 s +INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.11034059524536133 s +DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=235138471743341854433750522224591097920, time:1750768802.3327134s req_ids:[8] +DEBUG 06-24 20:40:02 [manager.py:391] +ERROR 06-24 20:40:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 first_token_cost:206.82764053344727ms total_cost_time:206.87365531921387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14645 prompt_cache_len:5151 prompt_cache_ratio:0.35172413793103446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 +DEBUG 06-24 20:40:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:02 [batch.py:51] router release req id 8 +INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10905051231384277 s +INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.11118292808532715 s +DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=252010119904605236361583805794168811429, time:1750768802.5591378s req_ids:[8] +DEBUG 06-24 20:40:02 [manager.py:391] +ERROR 06-24 20:40:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 first_token_cost:228.54137420654297ms total_cost_time:228.60193252563477ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:14646 prompt_cache_len:5151 prompt_cache_ratio:0.35170012290045066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 +DEBUG 06-24 20:40:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:02 [batch.py:51] router release req id 8 +INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10776972770690918 s +INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.10976767539978027 s +DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=1614845614591362272259334149308349989, time:1750768802.7961314s req_ids:[8] +DEBUG 06-24 20:40:02 [manager.py:391] +ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 first_token_cost:408.1401824951172ms total_cost_time:408.2012176513672ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:14647 prompt_cache_len:5151 prompt_cache_ratio:0.35167611114904074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 +DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:03 [batch.py:51] router release req id 8 +INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10899519920349121 s +INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.11150813102722168 s +DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=32200158623744070366573186977118179276, time:1750768803.2030716s req_ids:[8] +DEBUG 06-24 20:40:03 [manager.py:391] +ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:220.25418281555176ms total_cost_time:220.29709815979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14648 prompt_cache_len:5151 prompt_cache_ratio:0.3516521026761333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 +DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:03 [batch.py:51] router release req id 8 +INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10830974578857422 s +INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.11023807525634766 s +DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=280894100610602898366798201723229739363, time:1750768803.4275584s req_ids:[8] +DEBUG 06-24 20:40:03 [manager.py:391] +ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:226.76801681518555ms total_cost_time:226.81260108947754ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14649 prompt_cache_len:5151 prompt_cache_ratio:0.3516280974810567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 +DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:03 [batch.py:51] router release req id 8 +INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10739803314208984 s +INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s +DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=1138200867971789197563616787512741240, time:1750768803.6810126s req_ids:[8] +DEBUG 06-24 20:40:03 [manager.py:391] +DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:239.0146255493164ms total_cost_time:239.0587329864502ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14650 prompt_cache_len:5151 prompt_cache_ratio:0.35160409556313993 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 +DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:03 [batch.py:51] router release req id 8 +INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s +INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.11061835289001465 s +DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=233384928409575620349507820285901576477, time:1750768803.9078903s req_ids:[8] +DEBUG 06-24 20:40:03 [manager.py:391] +ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:207.12661743164062ms total_cost_time:207.17096328735352ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14651 prompt_cache_len:5151 prompt_cache_ratio:0.3515800969217118 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 +DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:04 [batch.py:51] router release req id 8 +INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.11030745506286621 s +INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.11235880851745605 s +DEBUG 06-24 20:40:04 [manager.py:391] Prefill Batch: batch_id=208651620164177868847082966661120653643, time:1750768804.1326857s req_ids:[8] +DEBUG 06-24 20:40:04 [manager.py:391] +ERROR 06-24 20:40:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:416.97120666503906ms total_cost_time:417.01602935791016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14652 prompt_cache_len:5151 prompt_cache_ratio:0.35155610155610156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 +DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:04 [batch.py:51] router release req id 8 +INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.10958456993103027 s +INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.1123802661895752 s +DEBUG 06-24 20:40:04 [manager.py:391] Prefill Batch: batch_id=337668018329813714051921797595025312314, time:1750768804.553901s req_ids:[8] +DEBUG 06-24 20:40:04 [manager.py:391] +ERROR 06-24 20:40:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:217.19741821289062ms total_cost_time:217.30661392211914ms,out_token_counter:1 mean_per_token_cost_time: 0.10919570922851562ms prompt_token_num:14653 prompt_cache_len:5151 prompt_cache_ratio:0.35153210946563845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 +DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:04 [batch.py:51] router release req id 8 +INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.10912942886352539 s +INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.11120343208312988 s +DEBUG 06-24 20:40:04 [manager.py:391] Prefill Batch: batch_id=248904565534071234620878684551747519468, time:1750768804.7700567s req_ids:[8] +DEBUG 06-24 20:40:04 [manager.py:391] +ERROR 06-24 20:40:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:221.48370742797852ms total_cost_time:221.51470184326172ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:14654 prompt_cache_len:5151 prompt_cache_ratio:0.351508120649652 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 +DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:04 [batch.py:51] router release req id 8 +INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.10680437088012695 s +INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.10872602462768555 s +DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=330040149621897743690311209401296562889, time:1750768805.003832s req_ids:[8] +DEBUG 06-24 20:40:05 [manager.py:391] +ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:219.7716236114502ms total_cost_time:219.8176383972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14655 prompt_cache_len:5151 prompt_cache_ratio:0.35148413510747184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 +DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:05 [batch.py:51] router release req id 8 +INFO 06-24 20:40:05 [manager.py:224] router recive req id 8 cost time 0.10593700408935547 s +INFO 06-24 20:40:05 [manager.py:68] detokenization recv req id 8 cost time 0.10802555084228516 s +DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=152227902293252817254054893581143336864, time:1750768805.2319462s req_ids:[8] +DEBUG 06-24 20:40:05 [manager.py:391] +ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:203.94349098205566ms total_cost_time:203.9651870727539ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:14656 prompt_cache_len:5151 prompt_cache_ratio:0.35146015283842796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 +DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:05 [batch.py:51] router release req id 8 +INFO 06-24 20:40:05 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s +INFO 06-24 20:40:05 [manager.py:68] detokenization recv req id 8 cost time 0.10977768898010254 s +DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=160847208572750396787923596571863100231, time:1750768805.4464252s req_ids:[8] +DEBUG 06-24 20:40:05 [manager.py:391] +ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:381.3321590423584ms total_cost_time:381.3765048980713ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14657 prompt_cache_len:5151 prompt_cache_ratio:0.35143617384185033 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 +DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:05 [batch.py:51] router release req id 8 +INFO 06-24 20:40:05 [manager.py:224] router recive req id 8 cost time 0.10767531394958496 s +INFO 06-24 20:40:05 [manager.py:68] detokenization recv req id 8 cost time 0.1094510555267334 s +DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=267951776018354645296607241914416860378, time:1750768805.8219411s req_ids:[8] +DEBUG 06-24 20:40:05 [manager.py:391] +ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:195.69849967956543ms total_cost_time:195.7406997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14658 prompt_cache_len:5151 prompt_cache_ratio:0.3514121981170692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 +DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:05 [batch.py:51] router release req id 8 +INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10608243942260742 s +INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.10776138305664062 s +DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=282227699810045277641654830340455488797, time:1750768806.0340028s req_ids:[8] +DEBUG 06-24 20:40:06 [manager.py:391] +ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:203.0620574951172ms total_cost_time:203.13501358032227ms,out_token_counter:1 mean_per_token_cost_time: 0.07295608520507812ms prompt_token_num:14659 prompt_cache_len:5151 prompt_cache_ratio:0.351388225663415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 +DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:06 [batch.py:51] router release req id 8 +INFO 06-24 20:40:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10859799385070801 s +INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.11053943634033203 s +DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=186493777232001939700521816047499167378, time:1750768806.2328393s req_ids:[8] +DEBUG 06-24 20:40:06 [manager.py:391] +ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:214.54334259033203ms total_cost_time:214.5862579345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14660 prompt_cache_len:5151 prompt_cache_ratio:0.3513642564802183 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 +DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:06 [batch.py:51] router release req id 8 +INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.1081690788269043 s +INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s +DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=222756045335889731770057236036851888253, time:1750768806.4541633s req_ids:[8] +DEBUG 06-24 20:40:06 [manager.py:391] +ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:213.77086639404297ms total_cost_time:213.8228416442871ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:14661 prompt_cache_len:5151 prompt_cache_ratio:0.3513402905668099 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 +DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:06 [batch.py:51] router release req id 8 +INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10773491859436035 s +INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.10969281196594238 s +DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=109573663443342411014985762888248359838, time:1750768806.6748793s req_ids:[8] +DEBUG 06-24 20:40:06 [manager.py:391] +ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:177.16646194458008ms total_cost_time:177.21056938171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14662 prompt_cache_len:5151 prompt_cache_ratio:0.3513163279225208 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 +DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:06 [batch.py:51] router release req id 8 +INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10931563377380371 s +INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.11115598678588867 s +DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=101063367256290899015021728701759335485, time:1750768806.8588803s req_ids:[8] +DEBUG 06-24 20:40:06 [manager.py:391] +ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:372.8907108306885ms total_cost_time:372.93338775634766ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14663 prompt_cache_len:5151 prompt_cache_ratio:0.3512923685466821 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 +DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:07 [batch.py:51] router release req id 8 +INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.1101844310760498 s +INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.11182022094726562 s +DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=39883962420550959448599423589029536963, time:1750768807.2426622s req_ids:[8] +DEBUG 06-24 20:40:07 [manager.py:391] +ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:208.0819606781006ms total_cost_time:208.12606811523438ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14664 prompt_cache_len:5151 prompt_cache_ratio:0.3512684124386252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 +DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:07 [batch.py:51] router release req id 8 +INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.10881614685058594 s +INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.11061692237854004 s +DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=57462205274071478657137618260770688304, time:1750768807.454332s req_ids:[8] +DEBUG 06-24 20:40:07 [manager.py:391] +ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:208.53209495544434ms total_cost_time:208.57524871826172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14665 prompt_cache_len:5151 prompt_cache_ratio:0.3512444595976816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 +DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:07 [batch.py:51] router release req id 8 +INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.10800957679748535 s +INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.10934281349182129 s +DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=286672083995782378182249043300205071198, time:1750768807.6706464s req_ids:[8] +DEBUG 06-24 20:40:07 [manager.py:391] +ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:210.1461887359619ms total_cost_time:210.1905345916748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14666 prompt_cache_len:5151 prompt_cache_ratio:0.3512205100231829 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 +DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:07 [batch.py:51] router release req id 8 +INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.10635113716125488 s +INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.10775232315063477 s +DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=132299397437960114637208644030322416134, time:1750768807.8862617s req_ids:[8] +DEBUG 06-24 20:40:07 [manager.py:391] +ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:199.13649559020996ms total_cost_time:199.17988777160645ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14667 prompt_cache_len:5151 prompt_cache_ratio:0.351196563714461 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 +DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:08 [batch.py:51] router release req id 8 +INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10843658447265625 s +INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.10985541343688965 s +DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=134751804413329011592638054024290144512, time:1750768808.0973508s req_ids:[8] +DEBUG 06-24 20:40:08 [manager.py:391] +ERROR 06-24 20:40:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:206.1624526977539ms total_cost_time:206.2082290649414ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14668 prompt_cache_len:5151 prompt_cache_ratio:0.3511726206708481 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 +DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:08 [batch.py:51] router release req id 8 +INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10817885398864746 s +INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.11066317558288574 s +DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=292250162422457546909000223461499183186, time:1750768808.3055372s req_ids:[8] +DEBUG 06-24 20:40:08 [manager.py:391] +ERROR 06-24 20:40:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 first_token_cost:366.82963371276855ms total_cost_time:366.87493324279785ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14669 prompt_cache_len:5151 prompt_cache_ratio:0.35114868089167633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 +DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:08 [batch.py:51] router release req id 8 +INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10732626914978027 s +INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.1089630126953125 s +DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=307146466464324948792893729798468659939, time:1750768808.6853666s req_ids:[8] +DEBUG 06-24 20:40:08 [manager.py:391] +ERROR 06-24 20:40:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 first_token_cost:217.52119064331055ms total_cost_time:217.54193305969238ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:14670 prompt_cache_len:5151 prompt_cache_ratio:0.3511247443762781 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 +DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:08 [batch.py:51] router release req id 8 +INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10756731033325195 s +INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s +DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=73876365064544821356557035080322412181, time:1750768808.9131062s req_ids:[8] +DEBUG 06-24 20:40:08 [manager.py:391] +ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 first_token_cost:228.98292541503906ms total_cost_time:229.02679443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14671 prompt_cache_len:5151 prompt_cache_ratio:0.3511008111239861 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 +DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:09 [batch.py:51] router release req id 8 +INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.10862016677856445 s +INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11059904098510742 s +DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=66176665053939551211755757697881003234, time:1750768809.1368496s req_ids:[8] +DEBUG 06-24 20:40:09 [manager.py:391] +ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:215.0115966796875ms total_cost_time:215.0554656982422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14672 prompt_cache_len:5151 prompt_cache_ratio:0.35107688113413305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 +DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:09 [batch.py:51] router release req id 8 +INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.11381244659423828 s +INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11596465110778809 s +DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=246670812287341754160553141257638364089, time:1750768809.3659966s req_ids:[8] +DEBUG 06-24 20:40:09 [manager.py:391] +ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:217.75007247924805ms total_cost_time:217.79179573059082ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14673 prompt_cache_len:5151 prompt_cache_ratio:0.35105295440605194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 +DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:09 [batch.py:51] router release req id 8 +INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.1089484691619873 s +INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11101055145263672 s +DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=75296899705129925377362098132480849591, time:1750768809.5828452s req_ids:[8] +DEBUG 06-24 20:40:09 [manager.py:391] +ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:214.37907218933105ms total_cost_time:214.42365646362305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14674 prompt_cache_len:5151 prompt_cache_ratio:0.3510290309390759 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 +DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:09 [batch.py:51] router release req id 8 +INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.10825109481811523 s +INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11038017272949219 s +DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=85232070758264560754967031954451671468, time:1750768809.818333s req_ids:[8] +DEBUG 06-24 20:40:09 [manager.py:391] +DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:40:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 58414.144 tokens/s +DEBUG 06-24 20:40:10 [stats.py:37] Avg prompt tokens throughput: 58406.173 tokens/s +DEBUG 06-24 20:40:10 [stats.py:37] Avg generate tokens throughput: 7.971 tokens/s +INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:389.2490863800049ms total_cost_time:389.29224014282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14675 prompt_cache_len:5151 prompt_cache_ratio:0.3510051107325383 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 +DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:10 [batch.py:51] router release req id 8 +INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.10872507095336914 s +INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.1110992431640625 s +DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=288617072645281090501793009085291308542, time:1750768810.2027705s req_ids:[8] +DEBUG 06-24 20:40:10 [manager.py:391] +ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:216.3245677947998ms total_cost_time:216.3674831390381ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14676 prompt_cache_len:5151 prompt_cache_ratio:0.3509811937857727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 +DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:10 [batch.py:51] router release req id 8 +INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.10858917236328125 s +INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.11062860488891602 s +DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=251449859872343744570645712716914805734, time:1750768810.4378803s req_ids:[8] +DEBUG 06-24 20:40:10 [manager.py:391] +ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:227.8614044189453ms total_cost_time:227.9031276702881ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14677 prompt_cache_len:5151 prompt_cache_ratio:0.35095728009811267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 +DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:10 [batch.py:51] router release req id 8 +INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.1087045669555664 s +INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.11076140403747559 s +DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=296478288126915811178124684060817830986, time:1750768810.6640592s req_ids:[8] +DEBUG 06-24 20:40:10 [manager.py:391] +ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:215.3301239013672ms total_cost_time:215.37351608276367ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14678 prompt_cache_len:5151 prompt_cache_ratio:0.3509333696688922 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 +DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:10 [batch.py:51] router release req id 8 +INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.11052227020263672 s +INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.11247849464416504 s +DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=73120314939454522818614101276649400932, time:1750768810.88674s req_ids:[8] +DEBUG 06-24 20:40:10 [manager.py:391] +ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:228.46174240112305ms total_cost_time:228.50656509399414ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14679 prompt_cache_len:5151 prompt_cache_ratio:0.3509094624974453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 +DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:11 [batch.py:51] router release req id 8 +INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.10883855819702148 s +INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.11079025268554688 s +DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=260313848381223299663677298441291830775, time:1750768811.120842s req_ids:[8] +DEBUG 06-24 20:40:11 [manager.py:391] +ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:209.08832550048828ms total_cost_time:209.13243293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14680 prompt_cache_len:5151 prompt_cache_ratio:0.35088555858310627 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 +DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:11 [batch.py:51] router release req id 8 +INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.20868897438049316 s +INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.21042728424072266 s +DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=104439952574871363373452588716405472152, time:1750768811.4391289s req_ids:[8] +DEBUG 06-24 20:40:11 [manager.py:391] +ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:275.32243728637695ms total_cost_time:275.36582946777344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14681 prompt_cache_len:5151 prompt_cache_ratio:0.35086165792520946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 +DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:11 [batch.py:51] router release req id 8 +INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.10838603973388672 s +INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029481887817383 s +DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=163806316326058749563679437850570691710, time:1750768811.6180243s req_ids:[8] +DEBUG 06-24 20:40:11 [manager.py:391] +ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:223.51574897766113ms total_cost_time:223.56033325195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14682 prompt_cache_len:5151 prompt_cache_ratio:0.3508377605230895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 +DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:11 [batch.py:51] router release req id 8 +INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.10888910293579102 s +INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.11084342002868652 s +DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=259692710921977007290304756638727369546, time:1750768811.8481991s req_ids:[8] +DEBUG 06-24 20:40:11 [manager.py:391] +ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:208.74762535095215ms total_cost_time:208.79125595092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14683 prompt_cache_len:5151 prompt_cache_ratio:0.3508138663760812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 +DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:11 [batch.py:51] router release req id 8 +INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.10900163650512695 s +INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.11109209060668945 s +DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=106178268281230887263468439569640370188, time:1750768812.0623271s req_ids:[8] +DEBUG 06-24 20:40:12 [manager.py:391] +ERROR 06-24 20:40:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:212.0676040649414ms total_cost_time:212.1131420135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14684 prompt_cache_len:5151 prompt_cache_ratio:0.3507899754835195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 +DEBUG 06-24 20:40:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:12 [batch.py:51] router release req id 8 +INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.10736584663391113 s +INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.10934853553771973 s +DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=229506463445512717252030612818934474975, time:1750768812.2813s req_ids:[8] +DEBUG 06-24 20:40:12 [manager.py:391] +ERROR 06-24 20:40:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 first_token_cost:210.04223823547363ms total_cost_time:210.08706092834473ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14685 prompt_cache_len:5151 prompt_cache_ratio:0.3507660878447395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 +DEBUG 06-24 20:40:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:12 [batch.py:51] router release req id 8 +INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.3103487491607666 s +INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.3125133514404297 s +DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=46646136924526523423223190120439972658, time:1750768812.7113745s req_ids:[8] +DEBUG 06-24 20:40:12 [manager.py:391] +ERROR 06-24 20:40:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 first_token_cost:438.0228519439697ms total_cost_time:438.0664825439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14686 prompt_cache_len:5151 prompt_cache_ratio:0.3507422034590767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 +DEBUG 06-24 20:40:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:12 [batch.py:51] router release req id 8 +INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.1081547737121582 s +INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.11033177375793457 s +DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=280227179896601486981952907103804898449, time:1750768812.941671s req_ids:[8] +DEBUG 06-24 20:40:12 [manager.py:391] +ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 first_token_cost:215.75236320495605ms total_cost_time:215.8060073852539ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:14687 prompt_cache_len:5151 prompt_cache_ratio:0.3507183223258664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 +DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:13 [batch.py:51] router release req id 8 +INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.1079094409942627 s +INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.10978579521179199 s +DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=230946220934210716534521054901551339644, time:1750768813.1633117s req_ids:[8] +DEBUG 06-24 20:40:13 [manager.py:391] +ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:216.26901626586914ms total_cost_time:216.32790565490723ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:14688 prompt_cache_len:5151 prompt_cache_ratio:0.3506944444444444 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 +DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:13 [batch.py:51] router release req id 8 +INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.10831618309020996 s +INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.10941672325134277 s +DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=15618498689938842387835959734554964984, time:1750768813.3916268s req_ids:[8] +DEBUG 06-24 20:40:13 [manager.py:391] +ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:220.8397388458252ms total_cost_time:220.8840847015381ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14689 prompt_cache_len:5151 prompt_cache_ratio:0.3506705698141466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 +DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:13 [batch.py:51] router release req id 8 +INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.10974001884460449 s +INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.11181926727294922 s +DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=209437871507000890353336207164633925234, time:1750768813.6129513s req_ids:[8] +DEBUG 06-24 20:40:13 [manager.py:391] +ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:212.4958038330078ms total_cost_time:212.5394344329834ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14690 prompt_cache_len:5151 prompt_cache_ratio:0.35064669843430907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 +DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:13 [batch.py:51] router release req id 8 +INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.10801553726196289 s +INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.10989546775817871 s +DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=194283928511761194593323154536738791253, time:1750768813.8333905s req_ids:[8] +DEBUG 06-24 20:40:13 [manager.py:391] +ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:370.18895149230957ms total_cost_time:370.23234367370605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14691 prompt_cache_len:5151 prompt_cache_ratio:0.35062283030426794 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 +DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:14 [batch.py:51] router release req id 8 +INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.10793113708496094 s +INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.1097562313079834 s +DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=85267071529423387605784326357880483224, time:1750768814.208885s req_ids:[8] +DEBUG 06-24 20:40:14 [manager.py:391] +ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:213.16957473754883ms total_cost_time:213.21463584899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14692 prompt_cache_len:5151 prompt_cache_ratio:0.35059896542335967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 +DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:14 [batch.py:51] router release req id 8 +INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.11032605171203613 s +INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.1128394603729248 s +DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=256547681957251109996189322030825728333, time:1750768814.4288466s req_ids:[8] +DEBUG 06-24 20:40:14 [manager.py:391] +ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:216.47310256958008ms total_cost_time:216.51601791381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14693 prompt_cache_len:5151 prompt_cache_ratio:0.35057510379092083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 +DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:14 [batch.py:51] router release req id 8 +INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.10784578323364258 s +INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.10986518859863281 s +DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=149996451286698082104683088451529236869, time:1750768814.6504965s req_ids:[8] +DEBUG 06-24 20:40:14 [manager.py:391] +DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:214.82348442077637ms total_cost_time:214.8764133453369ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:14694 prompt_cache_len:5151 prompt_cache_ratio:0.3505512454062883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 +DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:14 [batch.py:51] router release req id 8 +INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.10989880561828613 s +INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.11208891868591309 s +DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=292610880929220073819236610874294689197, time:1750768814.873325s req_ids:[8] +DEBUG 06-24 20:40:14 [manager.py:391] +ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:211.86375617980957ms total_cost_time:211.90953254699707ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14695 prompt_cache_len:5151 prompt_cache_ratio:0.3505273902687989 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 +DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:15 [batch.py:51] router release req id 8 +INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10903048515319824 s +INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.11045002937316895 s +DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=83978598018006754585282364874160156125, time:1750768815.090116s req_ids:[8] +DEBUG 06-24 20:40:15 [manager.py:391] +ERROR 06-24 20:40:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:212.63599395751953ms total_cost_time:212.68177032470703ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14696 prompt_cache_len:5151 prompt_cache_ratio:0.35050353837778986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 +DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:15 [batch.py:51] router release req id 8 +INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10903358459472656 s +INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.11043787002563477 s +DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=202411820592296422443235918652591464510, time:1750768815.3099244s req_ids:[8] +DEBUG 06-24 20:40:15 [manager.py:391] +ERROR 06-24 20:40:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 first_token_cost:372.90358543395996ms total_cost_time:372.94650077819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14697 prompt_cache_len:5151 prompt_cache_ratio:0.35047968973259847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 +DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:15 [batch.py:51] router release req id 8 +INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10810661315917969 s +INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.10949444770812988 s +DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=109145759774436825299845737751297545959, time:1750768815.689857s req_ids:[8] +DEBUG 06-24 20:40:15 [manager.py:391] +ERROR 06-24 20:40:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 first_token_cost:220.91054916381836ms total_cost_time:220.95441818237305ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14698 prompt_cache_len:5151 prompt_cache_ratio:0.35045584433256227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 +DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:15 [batch.py:51] router release req id 8 +INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10871458053588867 s +INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.11009097099304199 s +DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=71291847029573411913523924862250645828, time:1750768815.915585s req_ids:[8] +DEBUG 06-24 20:40:15 [manager.py:391] +ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 first_token_cost:210.46900749206543ms total_cost_time:210.51430702209473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14699 prompt_cache_len:5151 prompt_cache_ratio:0.35043200217701886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 +DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:16 [batch.py:51] router release req id 8 +INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.1082468032836914 s +INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.10935521125793457 s +DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=330985283981790481572471721936287624934, time:1750768816.1315439s req_ids:[8] +DEBUG 06-24 20:40:16 [manager.py:391] +ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:210.41488647460938ms total_cost_time:210.46066284179688ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14700 prompt_cache_len:5151 prompt_cache_ratio:0.3504081632653061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 +DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:16 [batch.py:51] router release req id 8 +INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.1092681884765625 s +INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.11140871047973633 s +DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=71927880182853218777787044060417739260, time:1750768816.3487992s req_ids:[8] +DEBUG 06-24 20:40:16 [manager.py:391] +ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:212.13054656982422ms total_cost_time:212.1756076812744ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14701 prompt_cache_len:5151 prompt_cache_ratio:0.35038432759676214 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 +DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:16 [batch.py:51] router release req id 8 +INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.10783219337463379 s +INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.10994982719421387 s +DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=23532449572181476429637247826764038211, time:1750768816.5680692s req_ids:[8] +DEBUG 06-24 20:40:16 [manager.py:391] +ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:423.922061920166ms total_cost_time:423.9675998687744ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14702 prompt_cache_len:5151 prompt_cache_ratio:0.3503604951707251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 +DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:16 [batch.py:51] router release req id 8 +INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.10913920402526855 s +INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.11120963096618652 s +DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=337716684614698894360310496654351709683, time:1750768816.9980087s req_ids:[8] +DEBUG 06-24 20:40:16 [manager.py:391] +ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:215.03663063049316ms total_cost_time:215.08193016052246ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14703 prompt_cache_len:5151 prompt_cache_ratio:0.35033666598653335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 +DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:17 [batch.py:51] router release req id 8 +INFO 06-24 20:40:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s +INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11098957061767578 s +DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=84344801542775111924716179508882138168, time:1750768817.2217424s req_ids:[8] +DEBUG 06-24 20:40:17 [manager.py:391] +ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:220.90721130371094ms total_cost_time:220.94988822937012ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14704 prompt_cache_len:5151 prompt_cache_ratio:0.3503128400435256 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 +DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:17 [batch.py:51] router release req id 8 +INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10928678512573242 s +INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11136794090270996 s +DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=278053924147190657618031647954905177305, time:1750768817.4453528s req_ids:[8] +DEBUG 06-24 20:40:17 [manager.py:391] +ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:212.11957931518555ms total_cost_time:212.16368675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14705 prompt_cache_len:5151 prompt_cache_ratio:0.35028901734104045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 +DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:17 [batch.py:51] router release req id 8 +INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10926270484924316 s +INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11130595207214355 s +DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=162949643589235368891536876983704878377, time:1750768817.6650093s req_ids:[8] +DEBUG 06-24 20:40:17 [manager.py:391] +ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:215.9428596496582ms total_cost_time:215.9872055053711ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14706 prompt_cache_len:5151 prompt_cache_ratio:0.350265197878417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 +DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:17 [batch.py:51] router release req id 8 +INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10977816581726074 s +INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11183404922485352 s +DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=59771654104974208202503201462900064956, time:1750768817.8874245s req_ids:[8] +DEBUG 06-24 20:40:17 [manager.py:391] +ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:213.80138397216797ms total_cost_time:213.85765075683594ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:14707 prompt_cache_len:5151 prompt_cache_ratio:0.35024138165499424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 +DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:18 [batch.py:51] router release req id 8 +INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.10902619361877441 s +INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.11090493202209473 s +DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=163840034019968302116533132363777254139, time:1750768818.117605s req_ids:[8] +DEBUG 06-24 20:40:18 [manager.py:391] +ERROR 06-24 20:40:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:400.130033493042ms total_cost_time:400.1734256744385ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14708 prompt_cache_len:5151 prompt_cache_ratio:0.3502175686701115 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 +DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:18 [batch.py:51] router release req id 8 +INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.10979270935058594 s +INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.1118624210357666 s +DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=139229620244462910979186042450618194967, time:1750768818.5142334s req_ids:[8] +DEBUG 06-24 20:40:18 [manager.py:391] +ERROR 06-24 20:40:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 first_token_cost:215.50583839416504ms total_cost_time:215.54875373840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14709 prompt_cache_len:5151 prompt_cache_ratio:0.3501937589231083 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 +DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:18 [batch.py:51] router release req id 8 +INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.10936713218688965 s +INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.11137080192565918 s +DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=154067350897321068229419894495690673731, time:1750768818.735521s req_ids:[8] +DEBUG 06-24 20:40:18 [manager.py:391] +ERROR 06-24 20:40:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 first_token_cost:216.4454460144043ms total_cost_time:216.4900302886963ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14710 prompt_cache_len:5151 prompt_cache_ratio:0.35016995241332427 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 +DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:18 [batch.py:51] router release req id 8 +INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.1081690788269043 s +INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.11007475852966309 s +DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=92924276508592284431361526557100415736, time:1750768818.957816s req_ids:[8] +DEBUG 06-24 20:40:18 [manager.py:391] +ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 first_token_cost:214.86616134643555ms total_cost_time:214.91003036499023ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14711 prompt_cache_len:5151 prompt_cache_ratio:0.3501461491400992 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 +DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:19 [batch.py:51] router release req id 8 +INFO 06-24 20:40:19 [manager.py:224] router recive req id 8 cost time 0.10924339294433594 s +INFO 06-24 20:40:19 [manager.py:68] detokenization recv req id 8 cost time 0.11134076118469238 s +DEBUG 06-24 20:40:19 [manager.py:391] Prefill Batch: batch_id=27474986404538544395457980219412924981, time:1750768819.1845744s req_ids:[8] +DEBUG 06-24 20:40:19 [manager.py:391] +ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:222.17130661010742ms total_cost_time:222.2137451171875ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14712 prompt_cache_len:5151 prompt_cache_ratio:0.35012234910277323 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 +DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:19 [batch.py:51] router release req id 8 +INFO 06-24 20:40:19 [manager.py:224] router recive req id 8 cost time 0.11055731773376465 s +INFO 06-24 20:40:19 [manager.py:68] detokenization recv req id 8 cost time 0.11250495910644531 s +DEBUG 06-24 20:40:19 [manager.py:391] Prefill Batch: batch_id=278734477961778467504097557522278095925, time:1750768819.4089684s req_ids:[8] +DEBUG 06-24 20:40:19 [manager.py:391] +ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:216.01176261901855ms total_cost_time:216.05348587036133ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14713 prompt_cache_len:5151 prompt_cache_ratio:0.3500985523006865 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 +DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:19 [batch.py:51] router release req id 8 +INFO 06-24 20:40:19 [manager.py:224] router recive req id 8 cost time 0.10862278938293457 s +INFO 06-24 20:40:19 [manager.py:68] detokenization recv req id 8 cost time 0.11061406135559082 s +DEBUG 06-24 20:40:19 [manager.py:391] Prefill Batch: batch_id=195938774654422983008079039376137154699, time:1750768819.6332347s req_ids:[8] +DEBUG 06-24 20:40:19 [manager.py:391] +ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:380.9962272644043ms total_cost_time:381.0403347015381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14714 prompt_cache_len:5151 prompt_cache_ratio:0.3500747587331793 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 +DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:19 [batch.py:51] router release req id 8 +INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10918760299682617 s +INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11131072044372559 s +DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=252146470875259449011412569199263454128, time:1750768820.0181272s req_ids:[8] +DEBUG 06-24 20:40:20 [manager.py:391] +ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:40:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 58592.326 tokens/s +DEBUG 06-24 20:40:20 [stats.py:37] Avg prompt tokens throughput: 58584.353 tokens/s +DEBUG 06-24 20:40:20 [stats.py:37] Avg generate tokens throughput: 7.973 tokens/s +INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:211.3654613494873ms total_cost_time:211.42005920410156ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:14715 prompt_cache_len:5151 prompt_cache_ratio:0.35005096839959227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 +DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:20 [batch.py:51] router release req id 8 +INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10906600952148438 s +INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11110997200012207 s +DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=95954748815909958735878640338729599983, time:1750768820.2440207s req_ids:[8] +DEBUG 06-24 20:40:20 [manager.py:391] +ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:224.13134574890137ms total_cost_time:224.17378425598145ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14716 prompt_cache_len:5151 prompt_cache_ratio:0.35002718129926613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 +DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:20 [batch.py:51] router release req id 8 +INFO 06-24 20:40:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.1079716682434082 s +INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.10998225212097168 s +DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=244795472970996849304813386913812005204, time:1750768820.468231s req_ids:[8] +DEBUG 06-24 20:40:20 [manager.py:391] +ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:228.98483276367188ms total_cost_time:229.02560234069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14717 prompt_cache_len:5151 prompt_cache_ratio:0.35000339743154174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 +DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:20 [batch.py:51] router release req id 8 +INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10828018188476562 s +INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11029648780822754 s +DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=102202093191719663057110273167712185687, time:1750768820.7090678s req_ids:[8] +DEBUG 06-24 20:40:20 [manager.py:391] +ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:215.67010879516602ms total_cost_time:215.71087837219238ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14718 prompt_cache_len:5151 prompt_cache_ratio:0.3499796167957603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 +DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:20 [batch.py:51] router release req id 8 +INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10885381698608398 s +INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11074352264404297 s +DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=218190155008774682513115986361626660133, time:1750768820.9254787s req_ids:[8] +DEBUG 06-24 20:40:20 [manager.py:391] +DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:379.9419403076172ms total_cost_time:379.98366355895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14719 prompt_cache_len:5151 prompt_cache_ratio:0.349955839391263 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 +DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:21 [batch.py:51] router release req id 8 +INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10873579978942871 s +INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11084127426147461 s +DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=150353056383179411865339720371127552281, time:1750768821.3147857s req_ids:[8] +DEBUG 06-24 20:40:21 [manager.py:391] +ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:218.5831069946289ms total_cost_time:218.62459182739258ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14720 prompt_cache_len:5151 prompt_cache_ratio:0.3499320652173913 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 +DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:21 [batch.py:51] router release req id 8 +INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10844993591308594 s +INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11044096946716309 s +DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=240411819897803517244955389626172483581, time:1750768821.552177s req_ids:[8] +DEBUG 06-24 20:40:21 [manager.py:391] +ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:232.48791694641113ms total_cost_time:232.53226280212402ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14721 prompt_cache_len:5151 prompt_cache_ratio:0.34990829427348685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 +DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:21 [batch.py:51] router release req id 8 +INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10884809494018555 s +INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11087560653686523 s +DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=241244702501274647831771362213054799806, time:1750768821.778202s req_ids:[8] +DEBUG 06-24 20:40:21 [manager.py:391] +ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:217.1335220336914ms total_cost_time:217.1761989593506ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14722 prompt_cache_len:5151 prompt_cache_ratio:0.34988452655889146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 +DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:21 [batch.py:51] router release req id 8 +INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10856294631958008 s +INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11064267158508301 s +DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=42028759426480868085254035638232032891, time:1750768821.9992962s req_ids:[8] +DEBUG 06-24 20:40:21 [manager.py:391] +ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:213.11140060424805ms total_cost_time:213.15336227416992ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14723 prompt_cache_len:5151 prompt_cache_ratio:0.3498607620729471 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 +DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:22 [batch.py:51] router release req id 8 +INFO 06-24 20:40:22 [manager.py:224] router recive req id 8 cost time 0.10865616798400879 s +INFO 06-24 20:40:22 [manager.py:68] detokenization recv req id 8 cost time 0.11079668998718262 s +DEBUG 06-24 20:40:22 [manager.py:391] Prefill Batch: batch_id=82729548392927690412541713795164685453, time:1750768822.2203574s req_ids:[8] +DEBUG 06-24 20:40:22 [manager.py:391] +ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:218.5375690460205ms total_cost_time:218.57905387878418ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14724 prompt_cache_len:5151 prompt_cache_ratio:0.3498370008149959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 +DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:22 [batch.py:51] router release req id 8 +INFO 06-24 20:40:22 [manager.py:224] router recive req id 8 cost time 0.1083834171295166 s +INFO 06-24 20:40:22 [manager.py:68] detokenization recv req id 8 cost time 0.11037731170654297 s +DEBUG 06-24 20:40:22 [manager.py:391] Prefill Batch: batch_id=216245359714521007635436767050189627188, time:1750768822.4430335s req_ids:[8] +DEBUG 06-24 20:40:22 [manager.py:391] +ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:373.4426498413086ms total_cost_time:373.48389625549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14725 prompt_cache_len:5151 prompt_cache_ratio:0.3498132427843803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 +DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:22 [batch.py:51] router release req id 8 +INFO 06-24 20:40:22 [manager.py:224] router recive req id 8 cost time 0.10816335678100586 s +INFO 06-24 20:40:22 [manager.py:68] detokenization recv req id 8 cost time 0.11008167266845703 s +DEBUG 06-24 20:40:22 [manager.py:391] Prefill Batch: batch_id=106960437875073430660383625589556351635, time:1750768822.8247607s req_ids:[8] +DEBUG 06-24 20:40:22 [manager.py:391] +ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:215.49201011657715ms total_cost_time:215.53468704223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14726 prompt_cache_len:5151 prompt_cache_ratio:0.34978948798044274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 +DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:22 [batch.py:51] router release req id 8 +INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10790538787841797 s +INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.10997509956359863 s +DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=234752422244079015440265751350404841205, time:1750768823.0459228s req_ids:[8] +DEBUG 06-24 20:40:23 [manager.py:391] +ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:216.8595790863037ms total_cost_time:216.9017791748047ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14727 prompt_cache_len:5151 prompt_cache_ratio:0.34976573640252595 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 +DEBUG 06-24 20:40:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:23 [batch.py:51] router release req id 8 +INFO 06-24 20:40:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10804319381713867 s +INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.11053991317749023 s +DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=205892908370596310986121267695549648449, time:1750768823.2683558s req_ids:[8] +DEBUG 06-24 20:40:23 [manager.py:391] +ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:209.45215225219727ms total_cost_time:209.49459075927734ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14728 prompt_cache_len:5151 prompt_cache_ratio:0.34974198804997286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 +DEBUG 06-24 20:40:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:23 [batch.py:51] router release req id 8 +INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10879898071289062 s +INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.11079049110412598 s +DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=146336133036422488894788449094167406076, time:1750768823.4838119s req_ids:[8] +DEBUG 06-24 20:40:23 [manager.py:391] +ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:214.46514129638672ms total_cost_time:214.508056640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14729 prompt_cache_len:5151 prompt_cache_ratio:0.34971824292212644 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 +DEBUG 06-24 20:40:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:23 [batch.py:51] router release req id 8 +INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10812997817993164 s +INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.11014509201049805 s +DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=225803028369704988127200924823203187100, time:1750768823.705018s req_ids:[8] +DEBUG 06-24 20:40:23 [manager.py:391] +ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:385.5254650115967ms total_cost_time:385.56861877441406ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14730 prompt_cache_len:5151 prompt_cache_ratio:0.34969450101832994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 +DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:24 [batch.py:51] router release req id 8 +INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10860252380371094 s +INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.11075830459594727 s +DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=329839708637922975065464774054145773582, time:1750768824.0985038s req_ids:[8] +DEBUG 06-24 20:40:24 [manager.py:391] +ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:216.28355979919434ms total_cost_time:216.325044631958ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14731 prompt_cache_len:5151 prompt_cache_ratio:0.3496707623379268 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 +DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:24 [batch.py:51] router release req id 8 +INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10773038864135742 s +INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.10971999168395996 s +DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=108882513264186924646401657806975031396, time:1750768824.3194456s req_ids:[8] +DEBUG 06-24 20:40:24 [manager.py:391] +ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:214.01596069335938ms total_cost_time:214.05887603759766ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14732 prompt_cache_len:5151 prompt_cache_ratio:0.34964702688026067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 +DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:24 [batch.py:51] router release req id 8 +INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10885095596313477 s +INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.11100363731384277 s +DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=54398509598182422363793455840809106726, time:1750768824.5495422s req_ids:[8] +DEBUG 06-24 20:40:24 [manager.py:391] +ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:225.41236877441406ms total_cost_time:225.45361518859863ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14733 prompt_cache_len:5151 prompt_cache_ratio:0.34962329464467523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 +DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:24 [batch.py:51] router release req id 8 +INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10812640190124512 s +INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.11012649536132812 s +DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=99222919712112739614638693430011173829, time:1750768824.7716093s req_ids:[8] +DEBUG 06-24 20:40:24 [manager.py:391] +ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:213.7291431427002ms total_cost_time:213.77229690551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14734 prompt_cache_len:5151 prompt_cache_ratio:0.3495995656305145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 +DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:24 [batch.py:51] router release req id 8 +INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10829687118530273 s +INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.1103060245513916 s +DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=161136993519924356743507236976667227395, time:1750768824.9927127s req_ids:[8] +DEBUG 06-24 20:40:24 [manager.py:391] +ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:216.5241241455078ms total_cost_time:216.5672779083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14735 prompt_cache_len:5151 prompt_cache_ratio:0.3495758398371225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 +DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:25 [batch.py:51] router release req id 8 +INFO 06-24 20:40:25 [manager.py:224] router recive req id 8 cost time 0.10892605781555176 s +INFO 06-24 20:40:25 [manager.py:68] detokenization recv req id 8 cost time 0.11095738410949707 s +DEBUG 06-24 20:40:25 [manager.py:391] Prefill Batch: batch_id=215037999732090311585642783474842887600, time:1750768825.215685s req_ids:[8] +DEBUG 06-24 20:40:25 [manager.py:391] +ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:379.8520565032959ms total_cost_time:379.8937797546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14736 prompt_cache_len:5151 prompt_cache_ratio:0.34955211726384366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 +DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:25 [batch.py:51] router release req id 8 +INFO 06-24 20:40:25 [manager.py:224] router recive req id 8 cost time 0.10880565643310547 s +INFO 06-24 20:40:25 [manager.py:68] detokenization recv req id 8 cost time 0.11093330383300781 s +DEBUG 06-24 20:40:25 [manager.py:391] Prefill Batch: batch_id=232023640339493069061001373657956031557, time:1750768825.6021s req_ids:[8] +DEBUG 06-24 20:40:25 [manager.py:391] +ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:217.83018112182617ms total_cost_time:217.87142753601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14737 prompt_cache_len:5151 prompt_cache_ratio:0.3495283979100224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 +DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:25 [batch.py:51] router release req id 8 +INFO 06-24 20:40:25 [manager.py:224] router recive req id 8 cost time 0.10885071754455566 s +INFO 06-24 20:40:25 [manager.py:68] detokenization recv req id 8 cost time 0.11101722717285156 s +DEBUG 06-24 20:40:25 [manager.py:391] Prefill Batch: batch_id=171591947182874947658593488728448140988, time:1750768825.8262174s req_ids:[8] +DEBUG 06-24 20:40:25 [manager.py:391] +ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:217.36598014831543ms total_cost_time:217.4074649810791ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14738 prompt_cache_len:5151 prompt_cache_ratio:0.3495046817750034 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 +DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:25 [batch.py:51] router release req id 8 +INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.10931706428527832 s +INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.11148858070373535 s +DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=129509340968800457324747685734214088552, time:1750768826.0505934s req_ids:[8] +DEBUG 06-24 20:40:26 [manager.py:391] +ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:213.6819362640381ms total_cost_time:213.72437477111816ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14739 prompt_cache_len:5151 prompt_cache_ratio:0.3494809688581315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 +DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:26 [batch.py:51] router release req id 8 +INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.1081857681274414 s +INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.1102762222290039 s +DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=193781136826422544923064446438687067735, time:1750768826.2682414s req_ids:[8] +DEBUG 06-24 20:40:26 [manager.py:391] +ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:210.81304550170898ms total_cost_time:210.85667610168457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14740 prompt_cache_len:5151 prompt_cache_ratio:0.3494572591587517 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 +DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:26 [batch.py:51] router release req id 8 +INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.10837364196777344 s +INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.11030197143554688 s +DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=170674567482056146155922867433993650960, time:1750768826.4859996s req_ids:[8] +DEBUG 06-24 20:40:26 [manager.py:391] +ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:215.19112586975098ms total_cost_time:215.23213386535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14741 prompt_cache_len:5151 prompt_cache_ratio:0.34943355267620924 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 +DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:26 [batch.py:51] router release req id 8 +INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.10756421089172363 s +INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s +DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=299320712009081363472658467266416720119, time:1750768826.70843s req_ids:[8] +DEBUG 06-24 20:40:26 [manager.py:391] +ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:344.0220355987549ms total_cost_time:344.07591819763184ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:14742 prompt_cache_len:5151 prompt_cache_ratio:0.3494098494098494 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 +DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:26 [batch.py:51] router release req id 8 +INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.1082615852355957 s +INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11004424095153809 s +DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=280885242141554199862299742696802391378, time:1750768827.0586078s req_ids:[8] +DEBUG 06-24 20:40:27 [manager.py:391] +ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:219.9854850769043ms total_cost_time:220.0303077697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14743 prompt_cache_len:5151 prompt_cache_ratio:0.34938614935901785 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 +DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:27 [batch.py:51] router release req id 8 +INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10919308662414551 s +INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.1111447811126709 s +DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=25860185952810873825296780073958984946, time:1750768827.2886305s req_ids:[8] +DEBUG 06-24 20:40:27 [manager.py:391] +DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:215.6996726989746ms total_cost_time:215.74163436889648ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14744 prompt_cache_len:5151 prompt_cache_ratio:0.34936245252306025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 +DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:27 [batch.py:51] router release req id 8 +INFO 06-24 20:40:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10879969596862793 s +INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11079573631286621 s +DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=337448448930846003328047093995079386642, time:1750768827.507925s req_ids:[8] +DEBUG 06-24 20:40:27 [manager.py:391] +ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:213.3791446685791ms total_cost_time:213.4225368499756ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14745 prompt_cache_len:5151 prompt_cache_ratio:0.3493387589013225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 +DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:27 [batch.py:51] router release req id 8 +INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10819721221923828 s +INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11011791229248047 s +DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=111885460944661861847382958704664014548, time:1750768827.7268631s req_ids:[8] +DEBUG 06-24 20:40:27 [manager.py:391] +ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:215.3646945953369ms total_cost_time:215.4068946838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14746 prompt_cache_len:5151 prompt_cache_ratio:0.3493150684931507 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 +DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:27 [batch.py:51] router release req id 8 +INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10886240005493164 s +INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11078906059265137 s +DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=204066637168487904519354615423161611838, time:1750768827.948776s req_ids:[8] +DEBUG 06-24 20:40:27 [manager.py:391] +ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:210.40749549865723ms total_cost_time:210.45207977294922ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14747 prompt_cache_len:5151 prompt_cache_ratio:0.3492913812978911 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 +DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:28 [batch.py:51] router release req id 8 +INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.10925960540771484 s +INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.11109066009521484 s +DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=301928371783730248639170560846551572358, time:1750768828.1656218s req_ids:[8] +DEBUG 06-24 20:40:28 [manager.py:391] +ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:375.3514289855957ms total_cost_time:375.3950595855713ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14748 prompt_cache_len:5151 prompt_cache_ratio:0.34926769731489016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 +DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:28 [batch.py:51] router release req id 8 +INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.1084585189819336 s +INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.1103816032409668 s +DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=85022395819210198641587113467841012162, time:1750768828.5467563s req_ids:[8] +DEBUG 06-24 20:40:28 [manager.py:391] +ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:215.6815528869629ms total_cost_time:215.7275676727295ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14749 prompt_cache_len:5151 prompt_cache_ratio:0.34924401654349446 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 +DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:28 [batch.py:51] router release req id 8 +INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.10824012756347656 s +INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.11013531684875488 s +DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=304717683666559356671544002785498700441, time:1750768828.7697897s req_ids:[8] +DEBUG 06-24 20:40:28 [manager.py:391] +ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:215.54255485534668ms total_cost_time:215.58523178100586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14750 prompt_cache_len:5151 prompt_cache_ratio:0.3492203389830508 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 +DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:28 [batch.py:51] router release req id 8 +INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s +INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.10983443260192871 s +DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=238948343146417012451175734216975354500, time:1750768828.9924626s req_ids:[8] +DEBUG 06-24 20:40:28 [manager.py:391] +ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:212.10694313049316ms total_cost_time:212.15057373046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14751 prompt_cache_len:5151 prompt_cache_ratio:0.3491966646329062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 +DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:29 [batch.py:51] router release req id 8 +INFO 06-24 20:40:29 [manager.py:224] router recive req id 8 cost time 0.10842394828796387 s +INFO 06-24 20:40:29 [manager.py:68] detokenization recv req id 8 cost time 0.11021709442138672 s +DEBUG 06-24 20:40:29 [manager.py:391] Prefill Batch: batch_id=261982068674132505410481916816620574882, time:1750768829.2109358s req_ids:[8] +DEBUG 06-24 20:40:29 [manager.py:391] +ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:213.44590187072754ms total_cost_time:213.49191665649414ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14752 prompt_cache_len:5151 prompt_cache_ratio:0.3491729934924078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 +DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:29 [batch.py:51] router release req id 8 +INFO 06-24 20:40:29 [manager.py:224] router recive req id 8 cost time 0.10874342918395996 s +INFO 06-24 20:40:29 [manager.py:68] detokenization recv req id 8 cost time 0.11051368713378906 s +DEBUG 06-24 20:40:29 [manager.py:391] Prefill Batch: batch_id=169220190852814432629664672288023179917, time:1750768829.4294777s req_ids:[8] +DEBUG 06-24 20:40:29 [manager.py:391] +ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:381.50978088378906ms total_cost_time:381.55388832092285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14753 prompt_cache_len:5151 prompt_cache_ratio:0.3491493255609029 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 +DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:29 [batch.py:51] router release req id 8 +INFO 06-24 20:40:29 [manager.py:224] router recive req id 8 cost time 0.1082465648651123 s +INFO 06-24 20:40:29 [manager.py:68] detokenization recv req id 8 cost time 0.11024594306945801 s +DEBUG 06-24 20:40:29 [manager.py:391] Prefill Batch: batch_id=136751505937359784113896126303637329662, time:1750768829.8186388s req_ids:[8] +DEBUG 06-24 20:40:29 [manager.py:391] +ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:215.44146537780762ms total_cost_time:215.4850959777832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14754 prompt_cache_len:5151 prompt_cache_ratio:0.34912566083773894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 +DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:29 [batch.py:51] router release req id 8 +INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.10795116424560547 s +INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.10994172096252441 s +DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=335897480439458947946066291621747337719, time:1750768830.0413418s req_ids:[8] +DEBUG 06-24 20:40:30 [manager.py:391] +ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:40:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 58824.353 tokens/s +DEBUG 06-24 20:40:30 [stats.py:37] Avg prompt tokens throughput: 58816.370 tokens/s +DEBUG 06-24 20:40:30 [stats.py:37] Avg generate tokens throughput: 7.983 tokens/s +INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:211.83013916015625ms total_cost_time:211.87281608581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14755 prompt_cache_len:5151 prompt_cache_ratio:0.34910199932226366 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 +DEBUG 06-24 20:40:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:30 [batch.py:51] router release req id 8 +INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.10884952545166016 s +INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.11098051071166992 s +DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=316787523390822672496591234072188396123, time:1750768830.257951s req_ids:[8] +DEBUG 06-24 20:40:30 [manager.py:391] +ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:214.9507999420166ms total_cost_time:214.9949073791504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14756 prompt_cache_len:5151 prompt_cache_ratio:0.3490783410138249 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 +DEBUG 06-24 20:40:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:30 [batch.py:51] router release req id 8 +INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.1086118221282959 s +INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.110504150390625 s +DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=8379380313012295456977891024923444727, time:1750768830.4794853s req_ids:[8] +DEBUG 06-24 20:40:30 [manager.py:391] +ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:214.4148349761963ms total_cost_time:214.45846557617188ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14757 prompt_cache_len:5151 prompt_cache_ratio:0.3490546859117707 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 +DEBUG 06-24 20:40:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:30 [batch.py:51] router release req id 8 +INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.10900068283081055 s +INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.11089086532592773 s +DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=77357581678859967715883209571237919283, time:1750768830.699089s req_ids:[8] +DEBUG 06-24 20:40:30 [manager.py:391] +ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:402.65727043151855ms total_cost_time:402.69970893859863ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14758 prompt_cache_len:5151 prompt_cache_ratio:0.34903103401544927 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 +DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:31 [batch.py:51] router release req id 8 +INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10871553421020508 s +INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.11075878143310547 s +DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=89919806168213389146430452084712271202, time:1750768831.1073728s req_ids:[8] +DEBUG 06-24 20:40:31 [manager.py:391] +ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:214.13397789001465ms total_cost_time:214.17641639709473ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14759 prompt_cache_len:5151 prompt_cache_ratio:0.34900738532420894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 +DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:31 [batch.py:51] router release req id 8 +INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10930585861206055 s +INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.11135673522949219 s +DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=144128991160033358385336282731478328251, time:1750768831.3273485s req_ids:[8] +DEBUG 06-24 20:40:31 [manager.py:391] +ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:216.39680862426758ms total_cost_time:216.44115447998047ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14760 prompt_cache_len:5151 prompt_cache_ratio:0.3489837398373984 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 +DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:31 [batch.py:51] router release req id 8 +INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10842418670654297 s +INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.11038541793823242 s +DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=162800491243798785339987305273618859483, time:1750768831.5511847s req_ids:[8] +DEBUG 06-24 20:40:31 [manager.py:391] +ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:216.28355979919434ms total_cost_time:216.34173393249512ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:14761 prompt_cache_len:5151 prompt_cache_ratio:0.34896009755436624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 +DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:31 [batch.py:51] router release req id 8 +INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s +INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997700691223145 s +DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=14236172323634276415849350806864899728, time:1750768831.782177s req_ids:[8] +DEBUG 06-24 20:40:31 [manager.py:391] +ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:225.70037841796875ms total_cost_time:225.74448585510254ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14762 prompt_cache_len:5151 prompt_cache_ratio:0.3489364584744615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 +DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:31 [batch.py:51] router release req id 8 +INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10927534103393555 s +INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.11098504066467285 s +DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=90278934900427237829959953923954571040, time:1750768832.0072606s req_ids:[8] +DEBUG 06-24 20:40:32 [manager.py:391] +ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:214.4942283630371ms total_cost_time:214.5369052886963ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14763 prompt_cache_len:5151 prompt_cache_ratio:0.3489128225970331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 +DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:32 [batch.py:51] router release req id 8 +INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10785079002380371 s +INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.10979413986206055 s +DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=290108574513598769533109026722285547161, time:1750768832.226965s req_ids:[8] +DEBUG 06-24 20:40:32 [manager.py:391] +ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:380.3138732910156ms total_cost_time:380.3596496582031ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14764 prompt_cache_len:5151 prompt_cache_ratio:0.34888918992143053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 +DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:32 [batch.py:51] router release req id 8 +INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10946297645568848 s +INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.11204075813293457 s +DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=308653551652250667168158287772320357287, time:1750768832.6149256s req_ids:[8] +DEBUG 06-24 20:40:32 [manager.py:391] +ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:214.951753616333ms total_cost_time:214.9946689605713ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14765 prompt_cache_len:5151 prompt_cache_ratio:0.34886556044700306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 +DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:32 [batch.py:51] router release req id 8 +INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10861754417419434 s +INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s +DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=250059938936350750265845591852419488804, time:1750768832.837375s req_ids:[8] +DEBUG 06-24 20:40:32 [manager.py:391] +ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:211.75622940063477ms total_cost_time:211.80009841918945ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14766 prompt_cache_len:5151 prompt_cache_ratio:0.3488419341731004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 +DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:32 [batch.py:51] router release req id 8 +INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10816597938537598 s +INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.11007332801818848 s +DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=86195731079465471946376652617093461032, time:1750768833.0554962s req_ids:[8] +DEBUG 06-24 20:40:33 [manager.py:391] +ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:214.0824794769287ms total_cost_time:214.1268253326416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14767 prompt_cache_len:5151 prompt_cache_ratio:0.34881831109907224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 +DEBUG 06-24 20:40:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:33 [batch.py:51] router release req id 8 +INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10895419120788574 s +INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.11115264892578125 s +DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=319918078160362812947852012016022974385, time:1750768833.2873003s req_ids:[8] +DEBUG 06-24 20:40:33 [manager.py:391] +ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 first_token_cost:228.98006439208984ms total_cost_time:229.02369499206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14768 prompt_cache_len:5151 prompt_cache_ratio:0.3487946912242687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 +DEBUG 06-24 20:40:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:33 [batch.py:51] router release req id 8 +INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10770630836486816 s +INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.10962724685668945 s +DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=240273272608963791126271751781661022787, time:1750768833.5117347s req_ids:[8] +DEBUG 06-24 20:40:33 [manager.py:391] +DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 first_token_cost:215.93284606933594ms total_cost_time:215.97599983215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14769 prompt_cache_len:5151 prompt_cache_ratio:0.3487710745480398 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 +DEBUG 06-24 20:40:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:33 [batch.py:51] router release req id 8 +INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10782051086425781 s +INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.10991096496582031 s +DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=213404560950185398293611639552792144541, time:1750768833.7347152s req_ids:[8] +DEBUG 06-24 20:40:33 [manager.py:391] +ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 first_token_cost:377.90489196777344ms total_cost_time:377.95066833496094ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14770 prompt_cache_len:5151 prompt_cache_ratio:0.348747461069736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 +DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:34 [batch.py:51] router release req id 8 +INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.10912370681762695 s +INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11104559898376465 s +DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=5606609173108477116399842652558081711, time:1750768834.1190033s req_ids:[8] +DEBUG 06-24 20:40:34 [manager.py:391] +ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:215.09933471679688ms total_cost_time:215.14344215393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14771 prompt_cache_len:5151 prompt_cache_ratio:0.3487238507887076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 +DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:34 [batch.py:51] router release req id 8 +INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.10826849937438965 s +INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11042642593383789 s +DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=296724273848153874182849290219519330917, time:1750768834.3406718s req_ids:[8] +DEBUG 06-24 20:40:34 [manager.py:391] +ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:214.75720405578613ms total_cost_time:214.8146629333496ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:14772 prompt_cache_len:5151 prompt_cache_ratio:0.34870024370430547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 +DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:34 [batch.py:51] router release req id 8 +INFO 06-24 20:40:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.11103177070617676 s +INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11298656463623047 s +DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=149230804867059076690486805536121674095, time:1750768834.573161s req_ids:[8] +DEBUG 06-24 20:40:34 [manager.py:391] +ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:229.5377254486084ms total_cost_time:229.5827865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14773 prompt_cache_len:5151 prompt_cache_ratio:0.3486766398158803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 +DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:34 [batch.py:51] router release req id 8 +INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.10884380340576172 s +INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11079716682434082 s +DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=314677339942655718412197114918525105668, time:1750768834.7979693s req_ids:[8] +DEBUG 06-24 20:40:34 [manager.py:391] +ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:215.19780158996582ms total_cost_time:215.2414321899414ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14774 prompt_cache_len:5151 prompt_cache_ratio:0.3486530391227833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 +DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:34 [batch.py:51] router release req id 8 +INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s +INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.10965371131896973 s +DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=195598420901633717811654569604019799592, time:1750768835.020666s req_ids:[8] +DEBUG 06-24 20:40:35 [manager.py:391] +ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:209.25498008728027ms total_cost_time:209.30123329162598ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14775 prompt_cache_len:5151 prompt_cache_ratio:0.3486294416243655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 +DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:35 [batch.py:51] router release req id 8 +INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10860824584960938 s +INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.11043357849121094 s +DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=326425699761088970318213995691303837569, time:1750768835.236078s req_ids:[8] +DEBUG 06-24 20:40:35 [manager.py:391] +ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:377.02012062072754ms total_cost_time:377.06565856933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14776 prompt_cache_len:5151 prompt_cache_ratio:0.34860584731997835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 +DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:35 [batch.py:51] router release req id 8 +INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10793638229370117 s +INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.11000704765319824 s +DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=188097941491711298999829338146366058409, time:1750768835.620981s req_ids:[8] +DEBUG 06-24 20:40:35 [manager.py:391] +ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:217.18573570251465ms total_cost_time:217.22912788391113ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14777 prompt_cache_len:5151 prompt_cache_ratio:0.34858225620897343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 +DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:35 [batch.py:51] router release req id 8 +INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10787796974182129 s +INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.10982799530029297 s +DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=289448305566789996028884232127358435451, time:1750768835.8445811s req_ids:[8] +DEBUG 06-24 20:40:35 [manager.py:391] +ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:209.02204513549805ms total_cost_time:209.06567573547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14778 prompt_cache_len:5151 prompt_cache_ratio:0.3485586682907024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 +DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:35 [batch.py:51] router release req id 8 +INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.10978341102600098 s +INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.11185026168823242 s +DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=203507385669399716196884834621496899202, time:1750768836.058192s req_ids:[8] +DEBUG 06-24 20:40:36 [manager.py:391] +ERROR 06-24 20:40:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:211.33089065551758ms total_cost_time:211.37332916259766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14779 prompt_cache_len:5151 prompt_cache_ratio:0.34853508356451723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 +DEBUG 06-24 20:40:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:36 [batch.py:51] router release req id 8 +INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.10946917533874512 s +INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.11152410507202148 s +DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=174696255399302560808350780295131976025, time:1750768836.2775006s req_ids:[8] +DEBUG 06-24 20:40:36 [manager.py:391] +ERROR 06-24 20:40:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 first_token_cost:215.76189994812012ms total_cost_time:215.82627296447754ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:14780 prompt_cache_len:5151 prompt_cache_ratio:0.34851150202976994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 +DEBUG 06-24 20:40:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:36 [batch.py:51] router release req id 8 +INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.10876631736755371 s +INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.1107320785522461 s +DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=128572679933745898463541105595346638377, time:1750768836.4973052s req_ids:[8] +DEBUG 06-24 20:40:36 [manager.py:391] +ERROR 06-24 20:40:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 first_token_cost:207.84950256347656ms total_cost_time:207.89337158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14781 prompt_cache_len:5151 prompt_cache_ratio:0.3484879236858129 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 +DEBUG 06-24 20:40:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:36 [batch.py:51] router release req id 8 +INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.31009888648986816 s +INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.3113257884979248 s +DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=322321286095607465575083003658026026616, time:1750768836.9207911s req_ids:[8] +DEBUG 06-24 20:40:36 [manager.py:391] +ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 first_token_cost:428.9834499359131ms total_cost_time:429.0294647216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14782 prompt_cache_len:5151 prompt_cache_ratio:0.34846434853199837 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 +DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:37 [batch.py:51] router release req id 8 +INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.10892033576965332 s +INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.11075758934020996 s +DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=274554121658867261844014937918426724658, time:1750768837.1492841s req_ids:[8] +DEBUG 06-24 20:40:37 [manager.py:391] +ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:215.35181999206543ms total_cost_time:215.39568901062012ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14783 prompt_cache_len:5151 prompt_cache_ratio:0.3484407765676791 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 +DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:37 [batch.py:51] router release req id 8 +INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.10909843444824219 s +INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s +DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=8613331649267303223324639721188083384, time:1750768837.3838494s req_ids:[8] +DEBUG 06-24 20:40:37 [manager.py:391] +ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:230.04770278930664ms total_cost_time:230.09181022644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14784 prompt_cache_len:5151 prompt_cache_ratio:0.3484172077922078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 +DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:37 [batch.py:51] router release req id 8 +INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.10836982727050781 s +INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.11016702651977539 s +DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=249941453246777411140560058546242566782, time:1750768837.608913s req_ids:[8] +DEBUG 06-24 20:40:37 [manager.py:391] +ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:195.16873359680176ms total_cost_time:195.21164894104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14785 prompt_cache_len:5151 prompt_cache_ratio:0.34839364220493746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 +DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:37 [batch.py:51] router release req id 8 +INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.1085968017578125 s +INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.11036181449890137 s +DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=338885971832902405423569492111995136872, time:1750768837.823307s req_ids:[8] +DEBUG 06-24 20:40:37 [manager.py:391] +ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:224.26366806030273ms total_cost_time:224.30682182312012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14786 prompt_cache_len:5151 prompt_cache_ratio:0.34837007980522117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 +DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:38 [batch.py:51] router release req id 8 +INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.3097951412200928 s +INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.31174707412719727 s +DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=23925561919112979986364379510242258014, time:1750768838.246147s req_ids:[8] +DEBUG 06-24 20:40:38 [manager.py:391] +ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:426.2204170227051ms total_cost_time:426.26500129699707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14787 prompt_cache_len:5151 prompt_cache_ratio:0.3483465205924123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 +DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:38 [batch.py:51] router release req id 8 +INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.10868334770202637 s +INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s +DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=140012892025942787003626803520117093065, time:1750768838.4734833s req_ids:[8] +DEBUG 06-24 20:40:38 [manager.py:391] +ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:193.4823989868164ms total_cost_time:193.5257911682129ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14788 prompt_cache_len:5151 prompt_cache_ratio:0.3483229645658642 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 +DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:38 [batch.py:51] router release req id 8 +INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.1091318130493164 s +INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.1110684871673584 s +DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=150865796908841619747971517517389321226, time:1750768838.673945s req_ids:[8] +DEBUG 06-24 20:40:38 [manager.py:391] +ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:206.91204071044922ms total_cost_time:206.9554328918457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14789 prompt_cache_len:5151 prompt_cache_ratio:0.3482994117249307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 +DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:38 [batch.py:51] router release req id 8 +INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.10880875587463379 s +INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.11069893836975098 s +DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=155370376659140290057391126074022939875, time:1750768838.8885734s req_ids:[8] +DEBUG 06-24 20:40:38 [manager.py:391] +ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:209.19156074523926ms total_cost_time:209.23328399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14790 prompt_cache_len:5151 prompt_cache_ratio:0.3482758620689655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 +DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:39 [batch.py:51] router release req id 8 +INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10833907127380371 s +INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11026334762573242 s +DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=55480398420202676776097449004736821523, time:1750768839.1034193s req_ids:[8] +DEBUG 06-24 20:40:39 [manager.py:391] +ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:211.74979209899902ms total_cost_time:211.79485321044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14791 prompt_cache_len:5151 prompt_cache_ratio:0.34825231559732267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 +DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:39 [batch.py:51] router release req id 8 +INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10880374908447266 s +INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11072587966918945 s +DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=43485270828814745586883311352624281336, time:1750768839.3224406s req_ids:[8] +DEBUG 06-24 20:40:39 [manager.py:391] +ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:344.2087173461914ms total_cost_time:344.2542552947998ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14792 prompt_cache_len:5151 prompt_cache_ratio:0.3482287723093564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 +DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:39 [batch.py:51] router release req id 8 +INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10952568054199219 s +INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11155033111572266 s +DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=190588304763817001540117053555682120257, time:1750768839.672383s req_ids:[8] +DEBUG 06-24 20:40:39 [manager.py:391] +ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:211.56954765319824ms total_cost_time:211.61460876464844ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14793 prompt_cache_len:5151 prompt_cache_ratio:0.348205232204421 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 +DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:39 [batch.py:51] router release req id 8 +INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10945820808410645 s +INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11140561103820801 s +DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=216017589300901786021104684395790007967, time:1750768839.8923526s req_ids:[8] +DEBUG 06-24 20:40:39 [manager.py:391] +ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:214.38193321228027ms total_cost_time:214.4031524658203ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:14794 prompt_cache_len:5151 prompt_cache_ratio:0.34818169528187104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 +DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:40 [batch.py:51] router release req id 8 +INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10372233390808105 s +INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.10554194450378418 s +DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=88771526094195011519561903158310682478, time:1750768840.1132681s req_ids:[8] +DEBUG 06-24 20:40:40 [manager.py:391] +ERROR 06-24 20:40:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:40:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 58900.734 tokens/s +DEBUG 06-24 20:40:40 [stats.py:37] Avg prompt tokens throughput: 58892.762 tokens/s +DEBUG 06-24 20:40:40 [stats.py:37] Avg generate tokens throughput: 7.972 tokens/s +INFO 06-24 20:40:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:176.04732513427734ms total_cost_time:176.08952522277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14795 prompt_cache_len:5151 prompt_cache_ratio:0.34815816154106116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 +DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:40 [batch.py:51] router release req id 8 +INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10633277893066406 s +INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.10846066474914551 s +DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=38412919081624769680726841100780455480, time:1750768840.2944572s req_ids:[8] +DEBUG 06-24 20:40:40 [manager.py:391] +ERROR 06-24 20:40:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 first_token_cost:207.96680450439453ms total_cost_time:208.0094814300537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14796 prompt_cache_len:5151 prompt_cache_ratio:0.3481346309813463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 +DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:40 [batch.py:51] router release req id 8 +INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10823559761047363 s +INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.11023259162902832 s +DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=317988550995046517931631244083331426994, time:1750768840.5065968s req_ids:[8] +DEBUG 06-24 20:40:40 [manager.py:391] +ERROR 06-24 20:40:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 first_token_cost:215.66438674926758ms total_cost_time:215.70587158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14797 prompt_cache_len:5151 prompt_cache_ratio:0.34811110360208153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 +DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:40 [batch.py:51] router release req id 8 +INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10900259017944336 s +INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.11096787452697754 s +DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=199367957817151179193796228365825485852, time:1750768840.7287345s req_ids:[8] +DEBUG 06-24 20:40:40 [manager.py:391] +ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 first_token_cost:391.7500972747803ms total_cost_time:391.79515838623047ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14798 prompt_cache_len:5151 prompt_cache_ratio:0.34808757940262197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 +DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:41 [batch.py:51] router release req id 8 +INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10953998565673828 s +INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.11149477958679199 s +DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=21686356965695087694596218120765870718, time:1750768841.1277306s req_ids:[8] +DEBUG 06-24 20:40:41 [manager.py:391] +ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:218.4431552886963ms total_cost_time:218.48773956298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14799 prompt_cache_len:5151 prompt_cache_ratio:0.3480640583823231 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 +DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:41 [batch.py:51] router release req id 8 +INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10848569869995117 s +INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.11051034927368164 s +DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=246630996474985688940491096054188049027, time:1750768841.3493364s req_ids:[8] +DEBUG 06-24 20:40:41 [manager.py:391] +ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:213.88721466064453ms total_cost_time:213.93203735351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14800 prompt_cache_len:5151 prompt_cache_ratio:0.34804054054054057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 +DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:41 [batch.py:51] router release req id 8 +INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10902690887451172 s +INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.11108899116516113 s +DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=146121580510277964902055776495097165605, time:1750768841.5718713s req_ids:[8] +DEBUG 06-24 20:40:41 [manager.py:391] +ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:228.89161109924316ms total_cost_time:228.93786430358887ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14801 prompt_cache_len:5151 prompt_cache_ratio:0.34801702587662997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 +DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:41 [batch.py:51] router release req id 8 +INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10832476615905762 s +INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.1101219654083252 s +DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=75353447346946242889611135486140653627, time:1750768841.8321242s req_ids:[8] +DEBUG 06-24 20:40:41 [manager.py:391] +ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:228.77216339111328ms total_cost_time:228.81650924682617ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14802 prompt_cache_len:5151 prompt_cache_ratio:0.3479935143899473 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 +DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:41 [batch.py:51] router release req id 8 +INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.1090857982635498 s +INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.11103224754333496 s +DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=98355756968641116070676484962887962975, time:1750768842.04269s req_ids:[8] +DEBUG 06-24 20:40:42 [manager.py:391] +ERROR 06-24 20:40:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:384.63521003723145ms total_cost_time:384.6783638000488ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14803 prompt_cache_len:5151 prompt_cache_ratio:0.3479700060798487 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 +DEBUG 06-24 20:40:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:42 [batch.py:51] router release req id 8 +INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.10943102836608887 s +INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.11142444610595703 s +DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=176278200048239980606288599395600652085, time:1750768842.4341617s req_ids:[8] +DEBUG 06-24 20:40:42 [manager.py:391] +ERROR 06-24 20:40:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 first_token_cost:228.87253761291504ms total_cost_time:228.91569137573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14804 prompt_cache_len:5151 prompt_cache_ratio:0.34794650094569035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 +DEBUG 06-24 20:40:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:42 [batch.py:51] router release req id 8 +INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.10860681533813477 s +INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.11044955253601074 s +DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=182347878234621687476335759762714745994, time:1750768842.6866887s req_ids:[8] +DEBUG 06-24 20:40:42 [manager.py:391] +ERROR 06-24 20:40:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 first_token_cost:224.33233261108398ms total_cost_time:224.37810897827148ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14805 prompt_cache_len:5151 prompt_cache_ratio:0.34792299898682877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 +DEBUG 06-24 20:40:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:42 [batch.py:51] router release req id 8 +INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.10746335983276367 s +INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.10937714576721191 s +DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=78715056893372488881069265307737301189, time:1750768842.9111028s req_ids:[8] +DEBUG 06-24 20:40:42 [manager.py:391] +DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 first_token_cost:227.83684730529785ms total_cost_time:227.88000106811523ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14806 prompt_cache_len:5151 prompt_cache_ratio:0.34789950020262056 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 +DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:43 [batch.py:51] router release req id 8 +INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s +INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.11004066467285156 s +DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=42440626120652398773116384548159058234, time:1750768843.1347637s req_ids:[8] +DEBUG 06-24 20:40:43 [manager.py:391] +ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:217.7562713623047ms total_cost_time:217.79990196228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14807 prompt_cache_len:5151 prompt_cache_ratio:0.3478760045924225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 +DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:43 [batch.py:51] router release req id 8 +INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10781073570251465 s +INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.1094658374786377 s +DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=218908050620441887875082233001866425646, time:1750768843.3588357s req_ids:[8] +DEBUG 06-24 20:40:43 [manager.py:391] +ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:207.1547508239746ms total_cost_time:207.1990966796875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14808 prompt_cache_len:5151 prompt_cache_ratio:0.3478525121555916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 +DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:43 [batch.py:51] router release req id 8 +INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10818624496459961 s +INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.11036348342895508 s +DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=61797890058957720954323536615300824332, time:1750768843.5725849s req_ids:[8] +DEBUG 06-24 20:40:43 [manager.py:391] +ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:384.16099548339844ms total_cost_time:384.2048645019531ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14809 prompt_cache_len:5151 prompt_cache_ratio:0.34782902289148493 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 +DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:43 [batch.py:51] router release req id 8 +INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10889124870300293 s +INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.11093664169311523 s +DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=248102945857509419014935369627352271495, time:1750768843.965249s req_ids:[8] +DEBUG 06-24 20:40:43 [manager.py:391] +ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:215.2543067932129ms total_cost_time:215.29650688171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14810 prompt_cache_len:5151 prompt_cache_ratio:0.3478055367994598 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 +DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:44 [batch.py:51] router release req id 8 +INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.10865592956542969 s +INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s +DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=132179859115854363224722763400381134218, time:1750768844.188374s req_ids:[8] +DEBUG 06-24 20:40:44 [manager.py:391] +ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:210.71434020996094ms total_cost_time:210.75844764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14811 prompt_cache_len:5151 prompt_cache_ratio:0.3477820538788738 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 +DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:44 [batch.py:51] router release req id 8 +INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.10777425765991211 s +INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.10993742942810059 s +DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=181707344387863956869768278569590949484, time:1750768844.4064724s req_ids:[8] +DEBUG 06-24 20:40:44 [manager.py:391] +ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:211.44723892211914ms total_cost_time:211.49373054504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14812 prompt_cache_len:5151 prompt_cache_ratio:0.3477585741290845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 +DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:44 [batch.py:51] router release req id 8 +INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.10907864570617676 s +INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.11139512062072754 s +DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=182705360682541200207469516667798243953, time:1750768844.6259582s req_ids:[8] +DEBUG 06-24 20:40:44 [manager.py:391] +ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:215.89064598083496ms total_cost_time:215.93403816223145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14813 prompt_cache_len:5151 prompt_cache_ratio:0.3477350975494498 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 +DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:44 [batch.py:51] router release req id 8 +INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.1096944808959961 s +INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.11194062232971191 s +DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=65094034135427083880458228615148150183, time:1750768844.848637s req_ids:[8] +DEBUG 06-24 20:40:44 [manager.py:391] +ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:239.6676540374756ms total_cost_time:239.71319198608398ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14814 prompt_cache_len:5151 prompt_cache_ratio:0.34771162413932766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 +DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:45 [batch.py:51] router release req id 8 +INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.10834097862243652 s +INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11023116111755371 s +DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=252061074605528275909546173656225196990, time:1750768845.1003077s req_ids:[8] +DEBUG 06-24 20:40:45 [manager.py:391] +ERROR 06-24 20:40:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:386.63387298583984ms total_cost_time:386.69538497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:14815 prompt_cache_len:5151 prompt_cache_ratio:0.3476881538980763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 +DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:45 [batch.py:51] router release req id 8 +INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.1091454029083252 s +INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11174368858337402 s +DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=235027181488721524814439854195963616286, time:1750768845.4884155s req_ids:[8] +DEBUG 06-24 20:40:45 [manager.py:391] +ERROR 06-24 20:40:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 first_token_cost:207.66639709472656ms total_cost_time:207.71026611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14816 prompt_cache_len:5151 prompt_cache_ratio:0.34766468682505397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 +DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:45 [batch.py:51] router release req id 8 +INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.10788989067077637 s +INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11034226417541504 s +DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=231828423082246704938994927192289814123, time:1750768845.7132616s req_ids:[8] +DEBUG 06-24 20:40:45 [manager.py:391] +ERROR 06-24 20:40:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 first_token_cost:235.4729175567627ms total_cost_time:235.5179786682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14817 prompt_cache_len:5151 prompt_cache_ratio:0.34764122291961935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 +DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:45 [batch.py:51] router release req id 8 +INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.10912156105041504 s +INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11171650886535645 s +DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=70839508973314836433232067581364468733, time:1750768845.9441757s req_ids:[8] +DEBUG 06-24 20:40:45 [manager.py:391] +ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 first_token_cost:204.41842079162598ms total_cost_time:204.46252822875977ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14818 prompt_cache_len:5151 prompt_cache_ratio:0.34761776218113105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 +DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:46 [batch.py:51] router release req id 8 +INFO 06-24 20:40:46 [manager.py:224] router recive req id 8 cost time 0.10917353630065918 s +INFO 06-24 20:40:46 [manager.py:68] detokenization recv req id 8 cost time 0.1114656925201416 s +DEBUG 06-24 20:40:46 [manager.py:391] Prefill Batch: batch_id=92029641910859212780503711229643831801, time:1750768846.1594272s req_ids:[8] +DEBUG 06-24 20:40:46 [manager.py:391] +ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:219.2704677581787ms total_cost_time:219.313383102417ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14819 prompt_cache_len:5151 prompt_cache_ratio:0.34759430460894797 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 +DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:46 [batch.py:51] router release req id 8 +INFO 06-24 20:40:46 [manager.py:224] router recive req id 8 cost time 0.10956382751464844 s +INFO 06-24 20:40:46 [manager.py:68] detokenization recv req id 8 cost time 0.11150240898132324 s +DEBUG 06-24 20:40:46 [manager.py:391] Prefill Batch: batch_id=248794822004897147880821612489529625877, time:1750768846.3853245s req_ids:[8] +DEBUG 06-24 20:40:46 [manager.py:391] +ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:387.1264457702637ms total_cost_time:387.17150688171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14820 prompt_cache_len:5151 prompt_cache_ratio:0.34757085020242917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 +DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:46 [batch.py:51] router release req id 8 +INFO 06-24 20:40:46 [manager.py:224] router recive req id 8 cost time 0.10947775840759277 s +INFO 06-24 20:40:46 [manager.py:68] detokenization recv req id 8 cost time 0.11163878440856934 s +DEBUG 06-24 20:40:46 [manager.py:391] Prefill Batch: batch_id=337460981725000884871031918288729756582, time:1750768846.7813277s req_ids:[8] +DEBUG 06-24 20:40:46 [manager.py:391] +ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:222.1059799194336ms total_cost_time:222.14865684509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14821 prompt_cache_len:5151 prompt_cache_ratio:0.3475473989609338 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 +DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:46 [batch.py:51] router release req id 8 +INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.10936737060546875 s +INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11185455322265625 s +DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=58911421354191091316940430440805258336, time:1750768847.007873s req_ids:[8] +DEBUG 06-24 20:40:47 [manager.py:391] +ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:214.7653102874756ms total_cost_time:214.80751037597656ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14822 prompt_cache_len:5151 prompt_cache_ratio:0.3475239508838213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 +DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:47 [batch.py:51] router release req id 8 +INFO 06-24 20:40:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.10927057266235352 s +INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11122250556945801 s +DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=286061251294349083806485342800136138260, time:1750768847.2354212s req_ids:[8] +DEBUG 06-24 20:40:47 [manager.py:391] +ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:224.26366806030273ms total_cost_time:224.30920600891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14823 prompt_cache_len:5151 prompt_cache_ratio:0.34750050597045135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 +DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:47 [batch.py:51] router release req id 8 +INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.11070775985717773 s +INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11274242401123047 s +DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=54203497360153627201432010978567480053, time:1750768847.4633265s req_ids:[8] +DEBUG 06-24 20:40:47 [manager.py:391] +ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:221.99225425720215ms total_cost_time:222.03564643859863ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14824 prompt_cache_len:5151 prompt_cache_ratio:0.3474770642201835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 +DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:47 [batch.py:51] router release req id 8 +INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.10803079605102539 s +INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994720458984375 s +DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=191499043781114413400517843306229530258, time:1750768847.703136s req_ids:[8] +DEBUG 06-24 20:40:47 [manager.py:391] +ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:236.19437217712402ms total_cost_time:236.23919486999512ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14825 prompt_cache_len:5151 prompt_cache_ratio:0.34745362563237775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 +DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:47 [batch.py:51] router release req id 8 +INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.1084141731262207 s +INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11034631729125977 s +DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=274023715708474064479896137189114040211, time:1750768847.9301534s req_ids:[8] +DEBUG 06-24 20:40:47 [manager.py:391] +ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:381.8204402923584ms total_cost_time:381.866455078125ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14826 prompt_cache_len:5151 prompt_cache_ratio:0.34743019020639415 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 +DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:48 [batch.py:51] router release req id 8 +INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.11069416999816895 s +INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11327195167541504 s +DEBUG 06-24 20:40:48 [manager.py:391] Prefill Batch: batch_id=302140411455865068881109828679972372929, time:1750768848.3216832s req_ids:[8] +DEBUG 06-24 20:40:48 [manager.py:391] +ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:221.50349617004395ms total_cost_time:221.54712677001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14827 prompt_cache_len:5151 prompt_cache_ratio:0.34740675794159304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 +DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:48 [batch.py:51] router release req id 8 +INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.10906767845153809 s +INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11093854904174805 s +DEBUG 06-24 20:40:48 [manager.py:391] Prefill Batch: batch_id=127151430386715834625302850410601790372, time:1750768848.547938s req_ids:[8] +DEBUG 06-24 20:40:48 [manager.py:391] +ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:218.5075283050537ms total_cost_time:218.5509204864502ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14828 prompt_cache_len:5151 prompt_cache_ratio:0.34738332883733475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 +DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:48 [batch.py:51] router release req id 8 +INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.10842251777648926 s +INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11043500900268555 s +DEBUG 06-24 20:40:48 [manager.py:391] Prefill Batch: batch_id=65346817275911521283094103287015042885, time:1750768848.7742355s req_ids:[8] +DEBUG 06-24 20:40:48 [manager.py:391] +ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:220.92795372009277ms total_cost_time:220.97182273864746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14829 prompt_cache_len:5151 prompt_cache_ratio:0.34735990289297997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 +DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:48 [batch.py:51] router release req id 8 +INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.1081078052520752 s +INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11030244827270508 s +DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=148721898668657336259600816473668756952, time:1750768849.012154s req_ids:[8] +DEBUG 06-24 20:40:49 [manager.py:391] +ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:232.29694366455078ms total_cost_time:232.34176635742188ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14830 prompt_cache_len:5151 prompt_cache_ratio:0.3473364801078894 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 +DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:49 [batch.py:51] router release req id 8 +INFO 06-24 20:40:49 [manager.py:224] router recive req id 8 cost time 0.10814929008483887 s +INFO 06-24 20:40:49 [manager.py:68] detokenization recv req id 8 cost time 0.11016297340393066 s +DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=36131333846343433078011965290492150550, time:1750768849.2393086s req_ids:[8] +DEBUG 06-24 20:40:49 [manager.py:391] +DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:387.85552978515625ms total_cost_time:387.89844512939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14831 prompt_cache_len:5151 prompt_cache_ratio:0.347313060481424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 +DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:49 [batch.py:51] router release req id 8 +INFO 06-24 20:40:49 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s +INFO 06-24 20:40:49 [manager.py:68] detokenization recv req id 8 cost time 0.11047220230102539 s +DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=270885321025767923499650750163872627883, time:1750768849.6341548s req_ids:[8] +DEBUG 06-24 20:40:49 [manager.py:391] +ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:217.2551155090332ms total_cost_time:217.29707717895508ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14832 prompt_cache_len:5151 prompt_cache_ratio:0.347289644012945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 +DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:49 [batch.py:51] router release req id 8 +INFO 06-24 20:40:49 [manager.py:224] router recive req id 8 cost time 0.10888862609863281 s +INFO 06-24 20:40:49 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s +DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=167722352798595551960208905556493702864, time:1750768849.8588948s req_ids:[8] +DEBUG 06-24 20:40:49 [manager.py:391] +ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:215.82388877868652ms total_cost_time:215.867280960083ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14833 prompt_cache_len:5151 prompt_cache_ratio:0.3472662307018135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 +DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:49 [batch.py:51] router release req id 8 +INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10925173759460449 s +INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11127257347106934 s +DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=138686576213767389852768482786332323152, time:1750768850.0802891s req_ids:[8] +DEBUG 06-24 20:40:50 [manager.py:391] +ERROR 06-24 20:40:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:40:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 57727.491 tokens/s +DEBUG 06-24 20:40:50 [stats.py:37] Avg prompt tokens throughput: 57719.699 tokens/s +DEBUG 06-24 20:40:50 [stats.py:37] Avg generate tokens throughput: 7.792 tokens/s +INFO 06-24 20:40:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:218.55664253234863ms total_cost_time:218.5990810394287ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14834 prompt_cache_len:5151 prompt_cache_ratio:0.34724282054739114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 +DEBUG 06-24 20:40:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:50 [batch.py:51] router release req id 8 +INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10952496528625488 s +INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11142468452453613 s +DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=268796056452635516818572284828377338424, time:1750768850.3064694s req_ids:[8] +DEBUG 06-24 20:40:50 [manager.py:391] +ERROR 06-24 20:40:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 first_token_cost:217.21363067626953ms total_cost_time:217.25749969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14835 prompt_cache_len:5151 prompt_cache_ratio:0.34721941354903946 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 +DEBUG 06-24 20:40:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:50 [batch.py:51] router release req id 8 +INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10938119888305664 s +INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11140751838684082 s +DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=42937648339821814852118522490560197425, time:1750768850.5299842s req_ids:[8] +DEBUG 06-24 20:40:50 [manager.py:391] +ERROR 06-24 20:40:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 first_token_cost:213.914155960083ms total_cost_time:213.9577865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14836 prompt_cache_len:5151 prompt_cache_ratio:0.34719600970612025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 +DEBUG 06-24 20:40:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:50 [batch.py:51] router release req id 8 +INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10964083671569824 s +INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11079764366149902 s +DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=126637337826036183264262803857920339117, time:1750768850.7508225s req_ids:[8] +DEBUG 06-24 20:40:50 [manager.py:391] +ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 first_token_cost:383.699893951416ms total_cost_time:383.7423324584961ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14837 prompt_cache_len:5151 prompt_cache_ratio:0.34717260901799557 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 +DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:51 [batch.py:51] router release req id 8 +INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10813140869140625 s +INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11004853248596191 s +DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=61994194845261628209951332147028630730, time:1750768851.141053s req_ids:[8] +DEBUG 06-24 20:40:51 [manager.py:391] +ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:218.3704376220703ms total_cost_time:218.4126377105713ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14838 prompt_cache_len:5151 prompt_cache_ratio:0.3471492114840275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 +DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:51 [batch.py:51] router release req id 8 +INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10895276069641113 s +INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11089301109313965 s +DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=127323199384974285863607263965967918894, time:1750768851.3745673s req_ids:[8] +DEBUG 06-24 20:40:51 [manager.py:391] +ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:228.2085418701172ms total_cost_time:228.25288772583008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14839 prompt_cache_len:5151 prompt_cache_ratio:0.3471258171035784 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 +DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:51 [batch.py:51] router release req id 8 +INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10844087600708008 s +INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11014747619628906 s +DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=303294219303015022340331324845255028826, time:1750768851.6000302s req_ids:[8] +DEBUG 06-24 20:40:51 [manager.py:391] +ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:217.31138229370117ms total_cost_time:217.35429763793945ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14840 prompt_cache_len:5151 prompt_cache_ratio:0.34710242587601076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 +DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:51 [batch.py:51] router release req id 8 +INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10945439338684082 s +INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11120843887329102 s +DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=48445642108979346594076659743089461924, time:1750768851.8232896s req_ids:[8] +DEBUG 06-24 20:40:51 [manager.py:391] +ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:214.20764923095703ms total_cost_time:214.2500877380371ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14841 prompt_cache_len:5151 prompt_cache_ratio:0.3470790378006873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 +DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:51 [batch.py:51] router release req id 8 +INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.10936880111694336 s +INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104202270507812 s +DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=205344609554262640986074329798015933901, time:1750768852.0445037s req_ids:[8] +DEBUG 06-24 20:40:52 [manager.py:391] +ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:217.4081802368164ms total_cost_time:217.44990348815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14842 prompt_cache_len:5151 prompt_cache_ratio:0.34705565287697077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 +DEBUG 06-24 20:40:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:52 [batch.py:51] router release req id 8 +INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.10831856727600098 s +INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11016321182250977 s +DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=226388964299291264210012713446691124446, time:1750768852.2677495s req_ids:[8] +DEBUG 06-24 20:40:52 [manager.py:391] +ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:386.15965843200684ms total_cost_time:386.2185478210449ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:14843 prompt_cache_len:5151 prompt_cache_ratio:0.3470322711042242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 +DEBUG 06-24 20:40:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:52 [batch.py:51] router release req id 8 +INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.10890936851501465 s +INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11061549186706543 s +DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=39207660011688437413472176639765168578, time:1750768852.6611636s req_ids:[8] +DEBUG 06-24 20:40:52 [manager.py:391] +ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:213.16051483154297ms total_cost_time:213.20366859436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14844 prompt_cache_len:5151 prompt_cache_ratio:0.3470088924818108 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 +DEBUG 06-24 20:40:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:52 [batch.py:51] router release req id 8 +INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.1093142032623291 s +INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104321479797363 s +DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=45419421682986767023647893075479562899, time:1750768852.879836s req_ids:[8] +DEBUG 06-24 20:40:52 [manager.py:391] +ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:215.76666831970215ms total_cost_time:215.82412719726562ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:14845 prompt_cache_len:5151 prompt_cache_ratio:0.34698551700909397 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 +DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:53 [batch.py:51] router release req id 8 +INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.10855889320373535 s +INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11037707328796387 s +DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=168844597530939919492288970828221909974, time:1750768853.1053097s req_ids:[8] +DEBUG 06-24 20:40:53 [manager.py:391] +ERROR 06-24 20:40:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:213.56678009033203ms total_cost_time:213.608980178833ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14846 prompt_cache_len:5151 prompt_cache_ratio:0.3469621446854372 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 +INFO 06-24 20:40:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:40:53 [statics_utils.py:24] mean first cost: 232.5301736641789 ms +INFO 06-24 20:40:53 [statics_utils.py:24] mean per token cost: 0.05751931347417947 ms +DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:53 [batch.py:51] router release req id 8 +INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.10929441452026367 s +INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11133098602294922 s +DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=21710506362242675556027847984882967294, time:1750768853.323157s req_ids:[8] +DEBUG 06-24 20:40:53 [manager.py:391] +INFO 06-24 20:40:53 [manager.py:620] left req id 8can release False refcount 4 +ERROR 06-24 20:40:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 first_token_cost:214.5230770111084ms total_cost_time:214.5669460296631ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14847 prompt_cache_len:5151 prompt_cache_ratio:0.3469387755102041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 +DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:53 [batch.py:51] router release req id 8 +INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.10895490646362305 s +INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11088037490844727 s +DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=163113732338514847852512582675087241753, time:1750768853.5442932s req_ids:[8] +DEBUG 06-24 20:40:53 [manager.py:391] +ERROR 06-24 20:40:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 first_token_cost:214.05339241027832ms total_cost_time:214.0965461730957ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14848 prompt_cache_len:5151 prompt_cache_ratio:0.3469154094827586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 +DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:53 [batch.py:51] router release req id 8 +INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.1089470386505127 s +INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11101794242858887 s +DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=46519230388910376681851162275862435134, time:1750768853.7657351s req_ids:[8] +DEBUG 06-24 20:40:53 [manager.py:391] +ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 first_token_cost:381.4725875854492ms total_cost_time:381.5171718597412ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14849 prompt_cache_len:5151 prompt_cache_ratio:0.3468920466024648 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 +DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:54 [batch.py:51] router release req id 8 +INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10893440246582031 s +INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.11097884178161621 s +DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=308334606095917163127620037768145741653, time:1750768854.1530652s req_ids:[8] +DEBUG 06-24 20:40:54 [manager.py:391] +ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:220.64542770385742ms total_cost_time:220.6895351409912ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14850 prompt_cache_len:5151 prompt_cache_ratio:0.3468686868686869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 +DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:54 [batch.py:51] router release req id 8 +INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10790729522705078 s +INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.10994958877563477 s +DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=148226305758453917163101162164592496159, time:1750768854.379973s req_ids:[8] +DEBUG 06-24 20:40:54 [manager.py:391] +ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:218.13464164733887ms total_cost_time:218.17898750305176ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14851 prompt_cache_len:5151 prompt_cache_ratio:0.3468453302807892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 +DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:54 [batch.py:51] router release req id 8 +INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10825991630554199 s +INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.11024022102355957 s +DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=224344271085155326154806314905491487807, time:1750768854.6065054s req_ids:[8] +DEBUG 06-24 20:40:54 [manager.py:391] +ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:215.86012840270996ms total_cost_time:215.90447425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14852 prompt_cache_len:5151 prompt_cache_ratio:0.34682197683813626 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 +DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:54 [batch.py:51] router release req id 8 +INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10873794555664062 s +INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.11076831817626953 s +DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=129504238438278196502079877768591557443, time:1750768854.8268094s req_ids:[8] +DEBUG 06-24 20:40:54 [manager.py:391] +ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:213.02270889282227ms total_cost_time:213.06610107421875ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14853 prompt_cache_len:5151 prompt_cache_ratio:0.3467986265400929 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 +DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:54 [batch.py:51] router release req id 8 +INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.10822749137878418 s +INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11013269424438477 s +DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=252743147959163400454552036055707437162, time:1750768855.058071s req_ids:[8] +DEBUG 06-24 20:40:55 [manager.py:391] +ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:392.67754554748535ms total_cost_time:392.72332191467285ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14854 prompt_cache_len:5151 prompt_cache_ratio:0.346775279386024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 +DEBUG 06-24 20:40:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:55 [batch.py:51] router release req id 8 +INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.1092689037322998 s +INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11135411262512207 s +DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=29328081965286694053172317514460251644, time:1750768855.4476278s req_ids:[8] +DEBUG 06-24 20:40:55 [manager.py:391] +ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:218.02377700805664ms total_cost_time:218.06740760803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14855 prompt_cache_len:5151 prompt_cache_ratio:0.34675193537529453 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 +DEBUG 06-24 20:40:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:55 [batch.py:51] router release req id 8 +INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.10827946662902832 s +INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11037564277648926 s +DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=254371067539319987672283213949260502640, time:1750768855.67229s req_ids:[8] +DEBUG 06-24 20:40:55 [manager.py:391] +DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:218.77336502075195ms total_cost_time:218.81580352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14856 prompt_cache_len:5151 prompt_cache_ratio:0.3467285945072698 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 +DEBUG 06-24 20:40:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:55 [batch.py:51] router release req id 8 +INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.10887408256530762 s +INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11081576347351074 s +DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=208165317810573962612201287272215255784, time:1750768855.895097s req_ids:[8] +DEBUG 06-24 20:40:55 [manager.py:391] +ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:210.76273918151855ms total_cost_time:210.80660820007324ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14857 prompt_cache_len:5151 prompt_cache_ratio:0.3467052567813152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 +DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:56 [batch.py:51] router release req id 8 +INFO 06-24 20:40:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s +INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.11038661003112793 s +DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=186937536360240095596580753588514012725, time:1750768856.1157286s req_ids:[8] +DEBUG 06-24 20:40:56 [manager.py:391] +ERROR 06-24 20:40:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:217.02289581298828ms total_cost_time:217.06581115722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14858 prompt_cache_len:5151 prompt_cache_ratio:0.34668192219679633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 +DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:56 [batch.py:51] router release req id 8 +INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10913658142089844 s +INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.11117696762084961 s +DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=90944576786277947240352311636465545112, time:1750768856.3375335s req_ids:[8] +DEBUG 06-24 20:40:56 [manager.py:391] +ERROR 06-24 20:40:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 first_token_cost:387.88771629333496ms total_cost_time:387.93373107910156ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14859 prompt_cache_len:5151 prompt_cache_ratio:0.3466585907530789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 +DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:56 [batch.py:51] router release req id 8 +INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10756540298461914 s +INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.10944414138793945 s +DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=130144033473181817960502432533194540488, time:1750768856.7327209s req_ids:[8] +DEBUG 06-24 20:40:56 [manager.py:391] +ERROR 06-24 20:40:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 first_token_cost:216.43495559692383ms total_cost_time:216.4778709411621ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14860 prompt_cache_len:5151 prompt_cache_ratio:0.3466352624495289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 +DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:56 [batch.py:51] router release req id 8 +INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10744452476501465 s +INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.10934257507324219 s +DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=325705179179017476770047909346264951467, time:1750768856.9551404s req_ids:[8] +DEBUG 06-24 20:40:56 [manager.py:391] +ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 first_token_cost:216.51363372802734ms total_cost_time:216.55678749084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14861 prompt_cache_len:5151 prompt_cache_ratio:0.34661193728551243 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 +DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:57 [batch.py:51] router release req id 8 +INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.10899615287780762 s +INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11096930503845215 s +DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=310223895000534510674887324135487974484, time:1750768857.1789212s req_ids:[8] +DEBUG 06-24 20:40:57 [manager.py:391] +ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:215.5017852783203ms total_cost_time:215.5442237854004ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14862 prompt_cache_len:5151 prompt_cache_ratio:0.34658861526039564 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 +DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:57 [batch.py:51] router release req id 8 +INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.1082003116607666 s +INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11026120185852051 s +DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=43106765343510030714209204094423593191, time:1750768857.3989134s req_ids:[8] +DEBUG 06-24 20:40:57 [manager.py:391] +ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:216.39633178710938ms total_cost_time:216.43972396850586ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14863 prompt_cache_len:5151 prompt_cache_ratio:0.34656529637354505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 +DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:57 [batch.py:51] router release req id 8 +INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.10869336128234863 s +INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11070585250854492 s +DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=194386802533934419758830174697754777399, time:1750768857.6217022s req_ids:[8] +DEBUG 06-24 20:40:57 [manager.py:391] +ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:220.0336456298828ms total_cost_time:220.0767993927002ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14864 prompt_cache_len:5151 prompt_cache_ratio:0.34654198062432723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 +DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:57 [batch.py:51] router release req id 8 +INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.10886096954345703 s +INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11061978340148926 s +DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=70435799213386712513996817956545656973, time:1750768857.848022s req_ids:[8] +DEBUG 06-24 20:40:57 [manager.py:391] +ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:382.7171325683594ms total_cost_time:382.7638626098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14865 prompt_cache_len:5151 prompt_cache_ratio:0.34651866801210895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 +DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:58 [batch.py:51] router release req id 8 +INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10949373245239258 s +INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.11149787902832031 s +DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=51790543177241029601962955433113348307, time:1750768858.2367487s req_ids:[8] +DEBUG 06-24 20:40:58 [manager.py:391] +ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:216.0036563873291ms total_cost_time:216.04585647583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14866 prompt_cache_len:5151 prompt_cache_ratio:0.34649535853625724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 +DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:58 [batch.py:51] router release req id 8 +INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10840249061584473 s +INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.11014723777770996 s +DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=97050191559475073871642634152391601779, time:1750768858.459972s req_ids:[8] +DEBUG 06-24 20:40:58 [manager.py:391] +ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:216.7670726776123ms total_cost_time:216.80903434753418ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14867 prompt_cache_len:5151 prompt_cache_ratio:0.3464720521961391 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 +DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:58 [batch.py:51] router release req id 8 +INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10908746719360352 s +INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.11095929145812988 s +DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=137586483109649023358936739097714199793, time:1750768858.691904s req_ids:[8] +DEBUG 06-24 20:40:58 [manager.py:391] +ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:225.36873817443848ms total_cost_time:225.41213035583496ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14868 prompt_cache_len:5151 prompt_cache_ratio:0.3464487489911219 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 +DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:58 [batch.py:51] router release req id 8 +INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10814285278320312 s +INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.1098334789276123 s +DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=52654648932681713816636337981151815318, time:1750768858.9151597s req_ids:[8] +DEBUG 06-24 20:40:58 [manager.py:391] +ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:213.58013153076172ms total_cost_time:213.6240005493164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14869 prompt_cache_len:5151 prompt_cache_ratio:0.34642544892057303 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 +DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:59 [batch.py:51] router release req id 8 +INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s +INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101691722869873 s +DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=203150616436167594968624190923035975230, time:1750768859.1396341s req_ids:[8] +DEBUG 06-24 20:40:59 [manager.py:391] +ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:216.9501781463623ms total_cost_time:216.9930934906006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14870 prompt_cache_len:5151 prompt_cache_ratio:0.3464021519838601 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 +DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:59 [batch.py:51] router release req id 8 +INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.10931873321533203 s +INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.11124873161315918 s +DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=167031256084227916068579839790065652491, time:1750768859.358784s req_ids:[8] +DEBUG 06-24 20:40:59 [manager.py:391] +ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:375.6697177886963ms total_cost_time:375.713586807251ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14871 prompt_cache_len:5151 prompt_cache_ratio:0.346378858180351 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 +DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:59 [batch.py:51] router release req id 8 +INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.10915827751159668 s +INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.11115407943725586 s +DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=136760883050539528999813811172541162904, time:1750768859.7418325s req_ids:[8] +DEBUG 06-24 20:40:59 [manager.py:391] +ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:212.62288093566895ms total_cost_time:212.66746520996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14872 prompt_cache_len:5151 prompt_cache_ratio:0.34635556750941365 mtp_avg_token_per_step:1.0 +INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 +DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:40:59 [batch.py:51] router release req id 8 +INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.10931992530822754 s +INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.11138033866882324 s +DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=52999980440188132058692401983966633638, time:1750768859.9696s req_ids:[8] +DEBUG 06-24 20:40:59 [manager.py:391] +ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:226.6998291015625ms total_cost_time:226.74322128295898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14873 prompt_cache_len:5151 prompt_cache_ratio:0.3463322799704162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 +DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:00 [batch.py:51] router release req id 8 +INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10899138450622559 s +INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.11091923713684082 s +DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=30340022811735612688987866621317335719, time:1750768860.1935837s req_ids:[8] +DEBUG 06-24 20:41:00 [manager.py:391] +DEBUG 06-24 20:41:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 59351.138 tokens/s +DEBUG 06-24 20:41:00 [stats.py:37] Avg prompt tokens throughput: 59343.248 tokens/s +DEBUG 06-24 20:41:00 [stats.py:37] Avg generate tokens throughput: 7.890 tokens/s +ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:215.76189994812012ms total_cost_time:215.8055305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14874 prompt_cache_len:5151 prompt_cache_ratio:0.34630899556272693 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 +DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:00 [batch.py:51] router release req id 8 +INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10855937004089355 s +INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.10974311828613281 s +DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=24440539916409399144162206739061709299, time:1750768860.4161408s req_ids:[8] +DEBUG 06-24 20:41:00 [manager.py:391] +ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:214.29109573364258ms total_cost_time:214.33401107788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14875 prompt_cache_len:5151 prompt_cache_ratio:0.3462857142857143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 +DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:00 [batch.py:51] router release req id 8 +INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10857391357421875 s +INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s +DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=162360816456284702805959460382544291702, time:1750768860.6371193s req_ids:[8] +DEBUG 06-24 20:41:00 [manager.py:391] +ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:214.96176719665527ms total_cost_time:215.00444412231445ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14876 prompt_cache_len:5151 prompt_cache_ratio:0.34626243613874697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 +DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:00 [batch.py:51] router release req id 8 +INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10768413543701172 s +INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.10979032516479492 s +DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=109887392837987005072029733850974912145, time:1750768860.8588235s req_ids:[8] +DEBUG 06-24 20:41:00 [manager.py:391] +ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:382.4770450592041ms total_cost_time:382.5194835662842ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14877 prompt_cache_len:5151 prompt_cache_ratio:0.3462391611211938 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 +DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:01 [batch.py:51] router release req id 8 +INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.10885167121887207 s +INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.11104369163513184 s +DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=107355060012968455688088632834658166087, time:1750768861.2476895s req_ids:[8] +DEBUG 06-24 20:41:01 [manager.py:391] +ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:216.83907508850098ms total_cost_time:216.88246726989746ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14878 prompt_cache_len:5151 prompt_cache_ratio:0.3462158892324237 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 +DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:01 [batch.py:51] router release req id 8 +INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.10940670967102051 s +INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.11199784278869629 s +DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=151973044893258421316056284232615194776, time:1750768861.4725146s req_ids:[8] +DEBUG 06-24 20:41:01 [manager.py:391] +ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:214.2469882965088ms total_cost_time:214.28942680358887ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14879 prompt_cache_len:5151 prompt_cache_ratio:0.3461926204718059 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 +DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:01 [batch.py:51] router release req id 8 +INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.1088857650756836 s +INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.11100530624389648 s +DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=265633169987935817030960517438594143582, time:1750768861.6918795s req_ids:[8] +DEBUG 06-24 20:41:01 [manager.py:391] +ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:212.5692367553711ms total_cost_time:212.61310577392578ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14880 prompt_cache_len:5151 prompt_cache_ratio:0.3461693548387097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 +DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:01 [batch.py:51] router release req id 8 +INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.10915040969848633 s +INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.1112222671508789 s +DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=66353668469187788581520762286825745195, time:1750768861.911734s req_ids:[8] +DEBUG 06-24 20:41:01 [manager.py:391] +DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:215.39020538330078ms total_cost_time:215.43145179748535ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14881 prompt_cache_len:5151 prompt_cache_ratio:0.34614609233250454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 +DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:02 [batch.py:51] router release req id 8 +INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.10875511169433594 s +INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.11082077026367188 s +DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=87726903341641546901883822299707514518, time:1750768862.1344776s req_ids:[8] +DEBUG 06-24 20:41:02 [manager.py:391] +ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:216.63475036621094ms total_cost_time:216.67766571044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14882 prompt_cache_len:5151 prompt_cache_ratio:0.34612283295256013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 +DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:02 [batch.py:51] router release req id 8 +INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.2092437744140625 s +INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.21091532707214355 s +DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=95319859116897128760894535842799485515, time:1750768862.490029s req_ids:[8] +DEBUG 06-24 20:41:02 [manager.py:391] +ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:329.2872905731201ms total_cost_time:329.3297290802002ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14883 prompt_cache_len:5151 prompt_cache_ratio:0.3460995766982463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 +DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:02 [batch.py:51] router release req id 8 +INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.10882949829101562 s +INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.1108694076538086 s +DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=43123884381091590076306023372049116752, time:1750768862.6929505s req_ids:[8] +DEBUG 06-24 20:41:02 [manager.py:391] +ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:211.79747581481934ms total_cost_time:211.8396759033203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14884 prompt_cache_len:5151 prompt_cache_ratio:0.3460763235689331 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 +DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:02 [batch.py:51] router release req id 8 +INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.10748481750488281 s +INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.10918593406677246 s +DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=23118814973476388619837085834386101701, time:1750768862.9101026s req_ids:[8] +DEBUG 06-24 20:41:02 [manager.py:391] +ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:171.4460849761963ms total_cost_time:171.48756980895996ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14885 prompt_cache_len:5151 prompt_cache_ratio:0.3460530735639906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 +DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:03 [batch.py:51] router release req id 8 +INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.10857176780700684 s +INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.11064577102661133 s +DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=186301524104673297330674387919432549129, time:1750768863.0887148s req_ids:[8] +DEBUG 06-24 20:41:03 [manager.py:391] +ERROR 06-24 20:41:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:209.23161506652832ms total_cost_time:209.2759609222412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14886 prompt_cache_len:5151 prompt_cache_ratio:0.3460298266827892 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 +DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:03 [batch.py:51] router release req id 8 +INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.10918450355529785 s +INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.1112356185913086 s +DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=242582471906023648059019302265076176598, time:1750768863.3153472s req_ids:[8] +DEBUG 06-24 20:41:03 [manager.py:391] +ERROR 06-24 20:41:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 first_token_cost:231.90569877624512ms total_cost_time:231.9498062133789ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14887 prompt_cache_len:5151 prompt_cache_ratio:0.3460065829246994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 +DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:03 [batch.py:51] router release req id 8 +INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.3105282783508301 s +INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.31249284744262695 s +DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=111972613815247801845076089116162334219, time:1750768863.7491968s req_ids:[8] +DEBUG 06-24 20:41:03 [manager.py:391] +ERROR 06-24 20:41:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 first_token_cost:432.4631690979004ms total_cost_time:432.5079917907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14888 prompt_cache_len:5151 prompt_cache_ratio:0.3459833422890919 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 +DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:03 [batch.py:51] router release req id 8 +INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.10958552360534668 s +INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.11178851127624512 s +DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=174520694503096606808550088945943102036, time:1750768863.98159s req_ids:[8] +DEBUG 06-24 20:41:03 [manager.py:391] +ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 first_token_cost:219.44189071655273ms total_cost_time:219.4845676422119ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14889 prompt_cache_len:5151 prompt_cache_ratio:0.3459601047753375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 +DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:04 [batch.py:51] router release req id 8 +INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10886383056640625 s +INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11073994636535645 s +DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=250893711619579312756076361067948681755, time:1750768864.2066488s req_ids:[8] +DEBUG 06-24 20:41:04 [manager.py:391] +ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:215.14010429382324ms total_cost_time:215.18373489379883ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14890 prompt_cache_len:5151 prompt_cache_ratio:0.34593687038280724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 +DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:04 [batch.py:51] router release req id 8 +INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10869145393371582 s +INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11074542999267578 s +DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=276314252462958894868404663225309688840, time:1750768864.4302382s req_ids:[8] +DEBUG 06-24 20:41:04 [manager.py:391] +ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:211.64703369140625ms total_cost_time:211.69137954711914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14891 prompt_cache_len:5151 prompt_cache_ratio:0.34591363911087236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 +DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:04 [batch.py:51] router release req id 8 +INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10960793495178223 s +INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11159968376159668 s +DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=216745425805572535129557559109168650304, time:1750768864.649254s req_ids:[8] +DEBUG 06-24 20:41:04 [manager.py:391] +ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:212.6913070678711ms total_cost_time:212.7358913421631ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14892 prompt_cache_len:5151 prompt_cache_ratio:0.3458904109589041 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 +DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:04 [batch.py:51] router release req id 8 +INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10870218276977539 s +INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11073541641235352 s +DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=267529852153740872475190704003029324575, time:1750768864.8791468s req_ids:[8] +DEBUG 06-24 20:41:04 [manager.py:391] +ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:392.27843284606934ms total_cost_time:392.322301864624ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14893 prompt_cache_len:5151 prompt_cache_ratio:0.3458671859262741 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 +DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:05 [batch.py:51] router release req id 8 +DEBUG 06-24 20:41:05 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:05 [manager.py:283] +DEBUG 06-24 20:41:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:05 [manager.py:284] +INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10910820960998535 s +INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.11134052276611328 s +DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=32897946877815333385498523328267399539, time:1750768865.2683203s req_ids:[8] +DEBUG 06-24 20:41:05 [manager.py:391] +ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:217.84400939941406ms total_cost_time:217.88668632507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14894 prompt_cache_len:5151 prompt_cache_ratio:0.34584396401235395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 +DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:05 [batch.py:51] router release req id 8 +INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10877370834350586 s +INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.1107935905456543 s +DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=4280161830768621835617159168658818268, time:1750768865.493105s req_ids:[8] +DEBUG 06-24 20:41:05 [manager.py:391] +ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:209.0744972229004ms total_cost_time:209.120512008667ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14895 prompt_cache_len:5151 prompt_cache_ratio:0.3458207452165156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 +DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:05 [batch.py:51] router release req id 8 +INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10905838012695312 s +INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.11100506782531738 s +DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=112935126093636079472208406474335395449, time:1750768865.7077105s req_ids:[8] +DEBUG 06-24 20:41:05 [manager.py:391] +ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:212.75758743286133ms total_cost_time:212.8005027770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14896 prompt_cache_len:5151 prompt_cache_ratio:0.34579752953813103 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 +DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:05 [batch.py:51] router release req id 8 +INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10890340805053711 s +INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.11079239845275879 s +DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=253698152045727825433128296209084539004, time:1750768865.927933s req_ids:[8] +DEBUG 06-24 20:41:05 [manager.py:391] +ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:221.4500904083252ms total_cost_time:221.49395942687988ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14897 prompt_cache_len:5151 prompt_cache_ratio:0.34577431697657246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 +DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:06 [batch.py:51] router release req id 8 +INFO 06-24 20:41:06 [manager.py:224] router recive req id 8 cost time 0.10950064659118652 s +INFO 06-24 20:41:06 [manager.py:68] detokenization recv req id 8 cost time 0.11147761344909668 s +DEBUG 06-24 20:41:06 [manager.py:391] Prefill Batch: batch_id=145636469247426338582433858355420701122, time:1750768866.155177s req_ids:[8] +DEBUG 06-24 20:41:06 [manager.py:391] +ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:212.5387191772461ms total_cost_time:212.58234977722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14898 prompt_cache_len:5151 prompt_cache_ratio:0.34575110753121224 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 +DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:06 [batch.py:51] router release req id 8 +INFO 06-24 20:41:06 [manager.py:224] router recive req id 8 cost time 0.10746288299560547 s +INFO 06-24 20:41:06 [manager.py:68] detokenization recv req id 8 cost time 0.1094825267791748 s +DEBUG 06-24 20:41:06 [manager.py:391] Prefill Batch: batch_id=43069594315191217862609225695507154677, time:1750768866.3785434s req_ids:[8] +DEBUG 06-24 20:41:06 [manager.py:391] +ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:416.4903163909912ms total_cost_time:416.5332317352295ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14899 prompt_cache_len:5151 prompt_cache_ratio:0.34572790120142294 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 +DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:06 [batch.py:51] router release req id 8 +INFO 06-24 20:41:06 [manager.py:224] router recive req id 8 cost time 0.10793209075927734 s +INFO 06-24 20:41:06 [manager.py:68] detokenization recv req id 8 cost time 0.10999417304992676 s +DEBUG 06-24 20:41:06 [manager.py:391] Prefill Batch: batch_id=221285560943699036228107238866254889367, time:1750768866.8026779s req_ids:[8] +DEBUG 06-24 20:41:06 [manager.py:391] +ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:221.15278244018555ms total_cost_time:221.19665145874023ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14900 prompt_cache_len:5151 prompt_cache_ratio:0.34570469798657716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 +DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:06 [batch.py:51] router release req id 8 +INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.10841536521911621 s +INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s +DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=36033659262829388529023374209089953215, time:1750768867.0295746s req_ids:[8] +DEBUG 06-24 20:41:07 [manager.py:391] +ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:216.06040000915527ms total_cost_time:216.10331535339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14901 prompt_cache_len:5151 prompt_cache_ratio:0.3456814978860479 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 +DEBUG 06-24 20:41:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:07 [batch.py:51] router release req id 8 +INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.10922980308532715 s +INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.11124420166015625 s +DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=270315735946108243168961941880375939391, time:1750768867.2514284s req_ids:[8] +DEBUG 06-24 20:41:07 [manager.py:391] +ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:214.87140655517578ms total_cost_time:214.89405632019043ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:14902 prompt_cache_len:5151 prompt_cache_ratio:0.3456583008992082 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 +DEBUG 06-24 20:41:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:07 [batch.py:51] router release req id 8 +INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.10830330848693848 s +INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.11036920547485352 s +DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=193771079240656229269882947576340212010, time:1750768867.4719985s req_ids:[8] +DEBUG 06-24 20:41:07 [manager.py:391] +ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:219.62523460388184ms total_cost_time:219.6676731109619ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14903 prompt_cache_len:5151 prompt_cache_ratio:0.3456351070254311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 +DEBUG 06-24 20:41:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:07 [batch.py:51] router release req id 8 +INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.11028480529785156 s +INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.1122591495513916 s +DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=291142253853659683131321862920721975389, time:1750768867.6973171s req_ids:[8] +DEBUG 06-24 20:41:07 [manager.py:391] +ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:395.46918869018555ms total_cost_time:395.57647705078125ms,out_token_counter:1 mean_per_token_cost_time: 0.10728836059570312ms prompt_token_num:14904 prompt_cache_len:5151 prompt_cache_ratio:0.34561191626409016 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 +DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:08 [batch.py:51] router release req id 8 +INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.1080937385559082 s +INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.10995817184448242 s +DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=215810424577079131747000277978612690255, time:1750768868.0994499s req_ids:[8] +DEBUG 06-24 20:41:08 [manager.py:391] +ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:173.93827438354492ms total_cost_time:173.9802360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14905 prompt_cache_len:5151 prompt_cache_ratio:0.34558872861455886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 +DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:08 [batch.py:51] router release req id 8 +INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.10782909393310547 s +INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.10982394218444824 s +DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=118800833338764372539361865488163742873, time:1750768868.2800276s req_ids:[8] +DEBUG 06-24 20:41:08 [manager.py:391] +DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:215.7881259918213ms total_cost_time:215.82984924316406ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14906 prompt_cache_len:5151 prompt_cache_ratio:0.34556554407621093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 +DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:08 [batch.py:51] router release req id 8 +INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.10789012908935547 s +INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.1098940372467041 s +DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=175348578824830694084324027796760366306, time:1750768868.5019732s req_ids:[8] +DEBUG 06-24 20:41:08 [manager.py:391] +ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:229.76255416870117ms total_cost_time:229.80570793151855ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14907 prompt_cache_len:5151 prompt_cache_ratio:0.3455423626484202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 +DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:08 [batch.py:51] router release req id 8 +INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.10945582389831543 s +INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.11144566535949707 s +DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=61457639929847069873615895232070059774, time:1750768868.7436316s req_ids:[8] +DEBUG 06-24 20:41:08 [manager.py:391] +ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:219.81477737426758ms total_cost_time:219.85983848571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14908 prompt_cache_len:5151 prompt_cache_ratio:0.3455191843305608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 +DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:08 [batch.py:51] router release req id 8 +INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.1083064079284668 s +INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.1102449893951416 s +DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=293132026085252512129214121529498567958, time:1750768868.9670632s req_ids:[8] +DEBUG 06-24 20:41:08 [manager.py:391] +ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:218.78480911254883ms total_cost_time:218.827486038208ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14909 prompt_cache_len:5151 prompt_cache_ratio:0.34549600912200684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 +DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:09 [batch.py:51] router release req id 8 +INFO 06-24 20:41:09 [manager.py:224] router recive req id 8 cost time 0.10799765586853027 s +INFO 06-24 20:41:09 [manager.py:68] detokenization recv req id 8 cost time 0.10994482040405273 s +DEBUG 06-24 20:41:09 [manager.py:391] Prefill Batch: batch_id=62410351373127482399835636552348370672, time:1750768869.1899393s req_ids:[8] +DEBUG 06-24 20:41:09 [manager.py:391] +ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:387.9415988922119ms total_cost_time:387.9833221435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14910 prompt_cache_len:5151 prompt_cache_ratio:0.3454728370221328 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 +DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:09 [batch.py:51] router release req id 8 +INFO 06-24 20:41:09 [manager.py:224] router recive req id 8 cost time 0.1089627742767334 s +INFO 06-24 20:41:09 [manager.py:68] detokenization recv req id 8 cost time 0.11092758178710938 s +DEBUG 06-24 20:41:09 [manager.py:391] Prefill Batch: batch_id=252868431862889257448383693773606786921, time:1750768869.5886948s req_ids:[8] +DEBUG 06-24 20:41:09 [manager.py:391] +ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:223.01387786865234ms total_cost_time:223.05750846862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14911 prompt_cache_len:5151 prompt_cache_ratio:0.3454496680303132 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 +DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:09 [batch.py:51] router release req id 8 +INFO 06-24 20:41:09 [manager.py:224] router recive req id 8 cost time 0.10886788368225098 s +INFO 06-24 20:41:09 [manager.py:68] detokenization recv req id 8 cost time 0.11022806167602539 s +DEBUG 06-24 20:41:09 [manager.py:391] Prefill Batch: batch_id=81260551969142300189781994789688793109, time:1750768869.8172355s req_ids:[8] +DEBUG 06-24 20:41:09 [manager.py:391] +ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:211.57002449035645ms total_cost_time:211.61293983459473ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14912 prompt_cache_len:5151 prompt_cache_ratio:0.34542650214592274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 +DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:09 [batch.py:51] router release req id 8 +INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.1091761589050293 s +INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11064815521240234 s +DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=159757178682864389425469575912161281887, time:1750768870.0343378s req_ids:[8] +DEBUG 06-24 20:41:10 [manager.py:391] +ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:214.37954902648926ms total_cost_time:214.42389488220215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14913 prompt_cache_len:5151 prompt_cache_ratio:0.34540333936833634 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 +DEBUG 06-24 20:41:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:10 [batch.py:51] router release req id 8 +INFO 06-24 20:41:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.10974812507629395 s +INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11115741729736328 s +DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=93185547198237627460301643082472967646, time:1750768870.255305s req_ids:[8] +DEBUG 06-24 20:41:10 [manager.py:391] +DEBUG 06-24 20:41:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 59220.603 tokens/s +DEBUG 06-24 20:41:10 [stats.py:37] Avg prompt tokens throughput: 59212.652 tokens/s +DEBUG 06-24 20:41:10 [stats.py:37] Avg generate tokens throughput: 7.951 tokens/s +ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:217.26322174072266ms total_cost_time:217.30971336364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14914 prompt_cache_len:5151 prompt_cache_ratio:0.3453801796969291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 +DEBUG 06-24 20:41:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:10 [batch.py:51] router release req id 8 +INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.10945010185241699 s +INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11086177825927734 s +DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=52968974788723638813367291977635059153, time:1750768870.4805448s req_ids:[8] +DEBUG 06-24 20:41:10 [manager.py:391] +ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:220.19219398498535ms total_cost_time:220.23916244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14915 prompt_cache_len:5151 prompt_cache_ratio:0.3453570231310761 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 +DEBUG 06-24 20:41:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:10 [batch.py:51] router release req id 8 +INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.10928606986999512 s +INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11086273193359375 s +DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=46282328124414315388060674623020798475, time:1750768870.7209675s req_ids:[8] +DEBUG 06-24 20:41:10 [manager.py:391] +ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:400.47430992126465ms total_cost_time:400.5169868469238ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14916 prompt_cache_len:5151 prompt_cache_ratio:0.34533386967015284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 +DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:11 [batch.py:51] router release req id 8 +INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10915231704711914 s +INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.11057043075561523 s +DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=159259871218461477861587348394248744543, time:1750768871.1156638s req_ids:[8] +DEBUG 06-24 20:41:11 [manager.py:391] +ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:219.57087516784668ms total_cost_time:219.61426734924316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14917 prompt_cache_len:5151 prompt_cache_ratio:0.3453107193135349 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 +DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:11 [batch.py:51] router release req id 8 +INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10951471328735352 s +INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.11063146591186523 s +DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=34914395714947755613268331326873610894, time:1750768871.3389401s req_ids:[8] +DEBUG 06-24 20:41:11 [manager.py:391] +ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:216.02869033813477ms total_cost_time:216.07255935668945ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14918 prompt_cache_len:5151 prompt_cache_ratio:0.34528757206059796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 +DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:11 [batch.py:51] router release req id 8 +INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10887789726257324 s +INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.10990095138549805 s +DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=109201385768949674141668179527031702705, time:1750768871.5622141s req_ids:[8] +DEBUG 06-24 20:41:11 [manager.py:391] +ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:218.50013732910156ms total_cost_time:218.54257583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14919 prompt_cache_len:5151 prompt_cache_ratio:0.3452644279107179 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 +DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:11 [batch.py:51] router release req id 8 +INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.11032557487487793 s +INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.11227774620056152 s +DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=321789107968804759616621093226387698477, time:1750768871.786662s req_ids:[8] +DEBUG 06-24 20:41:11 [manager.py:391] +ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:211.0130786895752ms total_cost_time:211.05694770812988ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14920 prompt_cache_len:5151 prompt_cache_ratio:0.34524128686327077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 +DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:11 [batch.py:51] router release req id 8 +INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10947847366333008 s +INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11155867576599121 s +DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=329240170780994265122548500954997707267, time:1750768872.0110292s req_ids:[8] +DEBUG 06-24 20:41:12 [manager.py:391] +ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:396.0099220275879ms total_cost_time:396.0533142089844ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14921 prompt_cache_len:5151 prompt_cache_ratio:0.34521814891763286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 +DEBUG 06-24 20:41:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:12 [batch.py:51] router release req id 8 +INFO 06-24 20:41:12 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s +INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11045980453491211 s +DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=245209820497953646340639040012631700817, time:1750768872.4154806s req_ids:[8] +DEBUG 06-24 20:41:12 [manager.py:391] +ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:231.16087913513184ms total_cost_time:231.20498657226562ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14922 prompt_cache_len:5151 prompt_cache_ratio:0.34519501407318054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 +DEBUG 06-24 20:41:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:12 [batch.py:51] router release req id 8 +INFO 06-24 20:41:12 [manager.py:224] router recive req id 8 cost time 0.10846066474914551 s +INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11057519912719727 s +DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=147912887620968984763564838889853909967, time:1750768872.6462255s req_ids:[8] +DEBUG 06-24 20:41:12 [manager.py:391] +ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:219.98310089111328ms total_cost_time:220.02649307250977ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14923 prompt_cache_len:5151 prompt_cache_ratio:0.34517188232929036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 +DEBUG 06-24 20:41:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:12 [batch.py:51] router release req id 8 +INFO 06-24 20:41:12 [manager.py:224] router recive req id 8 cost time 0.10841870307922363 s +INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11042213439941406 s +DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=175272184762289720530426725224908876527, time:1750768872.8709216s req_ids:[8] +DEBUG 06-24 20:41:12 [manager.py:391] +ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:216.45331382751465ms total_cost_time:216.49813652038574ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14924 prompt_cache_len:5151 prompt_cache_ratio:0.34514875368533904 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 +DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:13 [batch.py:51] router release req id 8 +INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.10924768447875977 s +INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11127924919128418 s +DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=191603966747855760707361989097258415913, time:1750768873.0948272s req_ids:[8] +DEBUG 06-24 20:41:13 [manager.py:391] +ERROR 06-24 20:41:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:217.01836585998535ms total_cost_time:217.06295013427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14925 prompt_cache_len:5151 prompt_cache_ratio:0.3451256281407035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 +DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:13 [batch.py:51] router release req id 8 +INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.10977792739868164 s +INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11204981803894043 s +DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=293782200822397432423555264145994325254, time:1750768873.3185103s req_ids:[8] +DEBUG 06-24 20:41:13 [manager.py:391] +ERROR 06-24 20:41:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 first_token_cost:217.24915504455566ms total_cost_time:217.29230880737305ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14926 prompt_cache_len:5151 prompt_cache_ratio:0.3451025056947608 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 +DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:13 [batch.py:51] router release req id 8 +INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.1117391586303711 s +INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11395692825317383 s +DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=289601180895871672955601282572724058808, time:1750768873.5444384s req_ids:[8] +DEBUG 06-24 20:41:13 [manager.py:391] +ERROR 06-24 20:41:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 first_token_cost:389.62244987487793ms total_cost_time:389.6653652191162ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14927 prompt_cache_len:5151 prompt_cache_ratio:0.3450793863468882 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 +DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:13 [batch.py:51] router release req id 8 +INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.10827255249023438 s +INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11023592948913574 s +DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=99433586430800847066807114448760384876, time:1750768873.9387445s req_ids:[8] +DEBUG 06-24 20:41:13 [manager.py:391] +ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 first_token_cost:208.73188972473145ms total_cost_time:208.77575874328613ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14928 prompt_cache_len:5151 prompt_cache_ratio:0.345056270096463 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 +DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:14 [batch.py:51] router release req id 8 +INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.10824823379516602 s +INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.11034345626831055 s +DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=113566831266858445432531689496601523866, time:1750768874.1633537s req_ids:[8] +DEBUG 06-24 20:41:14 [manager.py:391] +ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:230.66306114196777ms total_cost_time:230.70883750915527ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14929 prompt_cache_len:5151 prompt_cache_ratio:0.3450331569428629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 +DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:14 [batch.py:51] router release req id 8 +INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.10857868194580078 s +INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.11070632934570312 s +DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=305328272555057567545257923316995110337, time:1750768874.3937106s req_ids:[8] +DEBUG 06-24 20:41:14 [manager.py:391] +ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:215.8036231994629ms total_cost_time:215.84630012512207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14930 prompt_cache_len:5151 prompt_cache_ratio:0.3450100468854655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 +DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:14 [batch.py:51] router release req id 8 +INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.1102752685546875 s +INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.11266732215881348 s +DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=143955387927802354349233989108501751658, time:1750768874.618446s req_ids:[8] +DEBUG 06-24 20:41:14 [manager.py:391] +DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:214.56146240234375ms total_cost_time:214.60437774658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14931 prompt_cache_len:5151 prompt_cache_ratio:0.34498693992364876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 +DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:14 [batch.py:51] router release req id 8 +INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.10750436782836914 s +INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.10937261581420898 s +DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=45151492009601507274870022363887926434, time:1750768874.843152s req_ids:[8] +DEBUG 06-24 20:41:14 [manager.py:391] +ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:354.259729385376ms total_cost_time:354.3047904968262ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14932 prompt_cache_len:5151 prompt_cache_ratio:0.34496383605679076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 +DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:15 [batch.py:51] router release req id 8 +INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.10905313491821289 s +INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11108660697937012 s +DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=154705034634744469700013866426514549219, time:1750768875.2007685s req_ids:[8] +DEBUG 06-24 20:41:15 [manager.py:391] +ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:215.19970893859863ms total_cost_time:215.24381637573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14933 prompt_cache_len:5151 prompt_cache_ratio:0.3449407352842697 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 +DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:15 [batch.py:51] router release req id 8 +INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.1080160140991211 s +INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11016631126403809 s +DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=113650760469740398504036855212413053725, time:1750768875.4239767s req_ids:[8] +DEBUG 06-24 20:41:15 [manager.py:391] +ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:215.61264991760254ms total_cost_time:215.6543731689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14934 prompt_cache_len:5151 prompt_cache_ratio:0.34491763760546407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 +DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:15 [batch.py:51] router release req id 8 +INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.10861754417419434 s +INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s +DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=276417682175909890318940379818925710452, time:1750768875.6471417s req_ids:[8] +DEBUG 06-24 20:41:15 [manager.py:391] +ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:210.80255508422852ms total_cost_time:210.8464241027832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14935 prompt_cache_len:5151 prompt_cache_ratio:0.34489454301975225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 +DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:15 [batch.py:51] router release req id 8 +INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.10915136337280273 s +INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11137652397155762 s +DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=104113656679349919669617940578133823850, time:1750768875.8652654s req_ids:[8] +DEBUG 06-24 20:41:15 [manager.py:391] +ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:221.62795066833496ms total_cost_time:221.67253494262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14936 prompt_cache_len:5151 prompt_cache_ratio:0.3448714515265131 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 +DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:16 [batch.py:51] router release req id 8 +INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10883808135986328 s +INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.1109774112701416 s +DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=324236625503766573972200914267355369876, time:1750768876.0969372s req_ids:[8] +DEBUG 06-24 20:41:16 [manager.py:391] +ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:223.53196144104004ms total_cost_time:223.57606887817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14937 prompt_cache_len:5151 prompt_cache_ratio:0.3448483631251255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 +DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:16 [batch.py:51] router release req id 8 +INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10943961143493652 s +INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.11159706115722656 s +DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=301800610547718163294629232807234741408, time:1750768876.3237417s req_ids:[8] +DEBUG 06-24 20:41:16 [manager.py:391] +ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 first_token_cost:382.4901580810547ms total_cost_time:382.5352191925049ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14938 prompt_cache_len:5151 prompt_cache_ratio:0.3448252778149685 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 +DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:16 [batch.py:51] router release req id 8 +INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10911917686462402 s +INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.11112380027770996 s +DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=326650267207700980285373585686392839786, time:1750768876.715178s req_ids:[8] +DEBUG 06-24 20:41:16 [manager.py:391] +ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 first_token_cost:219.47026252746582ms total_cost_time:219.51603889465332ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14939 prompt_cache_len:5151 prompt_cache_ratio:0.34480219559542136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 +DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:16 [batch.py:51] router release req id 8 +INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10836076736450195 s +INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.11023139953613281 s +DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=101285894011348526691503753989736651421, time:1750768876.9425979s req_ids:[8] +DEBUG 06-24 20:41:16 [manager.py:391] +ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 first_token_cost:175.26865005493164ms total_cost_time:175.3091812133789ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:14940 prompt_cache_len:5151 prompt_cache_ratio:0.34477911646586346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 +DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:17 [batch.py:51] router release req id 8 +INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10772228240966797 s +INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.11010217666625977 s +DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=260650953263683490790027291494405948615, time:1750768877.125262s req_ids:[8] +DEBUG 06-24 20:41:17 [manager.py:391] +ERROR 06-24 20:41:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:177.09589004516602ms total_cost_time:177.140474319458ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14941 prompt_cache_len:5151 prompt_cache_ratio:0.3447560404256743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 +DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:17 [batch.py:51] router release req id 8 +INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10820508003234863 s +INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.11014461517333984 s +DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=250275585823277219198959375671879736677, time:1750768877.3044581s req_ids:[8] +DEBUG 06-24 20:41:17 [manager.py:391] +ERROR 06-24 20:41:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:209.34724807739258ms total_cost_time:209.39350128173828ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14942 prompt_cache_len:5151 prompt_cache_ratio:0.3447329674742337 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 +DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:17 [batch.py:51] router release req id 8 +INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10933732986450195 s +INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.1113741397857666 s +DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=91931578339703094490489354494758685840, time:1750768877.525258s req_ids:[8] +DEBUG 06-24 20:41:17 [manager.py:391] +ERROR 06-24 20:41:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:222.92160987854004ms total_cost_time:222.96571731567383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14943 prompt_cache_len:5151 prompt_cache_ratio:0.3447098976109215 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 +DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:17 [batch.py:51] router release req id 8 +INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10807657241821289 s +INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.11012792587280273 s +DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=152723289749101446278691476070451344161, time:1750768877.7542567s req_ids:[8] +DEBUG 06-24 20:41:17 [manager.py:391] +ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:412.5781059265137ms total_cost_time:412.62340545654297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14944 prompt_cache_len:5151 prompt_cache_ratio:0.34468683083511775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 +DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:18 [batch.py:51] router release req id 8 +INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.10827350616455078 s +INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11020612716674805 s +DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=214815922351812969449846664858499244407, time:1750768878.1712513s req_ids:[8] +DEBUG 06-24 20:41:18 [manager.py:391] +ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:199.0034580230713ms total_cost_time:199.0506649017334ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:14945 prompt_cache_len:5151 prompt_cache_ratio:0.3446637671462027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 +DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:18 [batch.py:51] router release req id 8 +INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.10991024971008301 s +INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11245560646057129 s +DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=214237287897823853471084511843904846093, time:1750768878.3770719s req_ids:[8] +DEBUG 06-24 20:41:18 [manager.py:391] +ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:217.2262668609619ms total_cost_time:217.26751327514648ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14946 prompt_cache_len:5151 prompt_cache_ratio:0.34464070654355683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 +DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:18 [batch.py:51] router release req id 8 +INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.11017036437988281 s +INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11273384094238281 s +DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=177481519683233426514864260658378008748, time:1750768878.6017735s req_ids:[8] +DEBUG 06-24 20:41:18 [manager.py:391] +ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:218.2917594909668ms total_cost_time:218.336820602417ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14947 prompt_cache_len:5151 prompt_cache_ratio:0.3446176490265605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 +DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:18 [batch.py:51] router release req id 8 +INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.11173701286315918 s +INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11375045776367188 s +DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=156482145176183525931288434239349954042, time:1750768878.8257139s req_ids:[8] +DEBUG 06-24 20:41:18 [manager.py:391] +ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:211.55095100402832ms total_cost_time:211.5955352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14948 prompt_cache_len:5151 prompt_cache_ratio:0.34459459459459457 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 +DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:18 [batch.py:51] router release req id 8 +INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.10914969444274902 s +INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.11104154586791992 s +DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=75378085655337040812567313058374724027, time:1750768879.0430663s req_ids:[8] +DEBUG 06-24 20:41:19 [manager.py:391] +ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:218.74380111694336ms total_cost_time:218.78767013549805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14949 prompt_cache_len:5151 prompt_cache_ratio:0.3445715432470399 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 +DEBUG 06-24 20:41:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:19 [batch.py:51] router release req id 8 +INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s +INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.11168599128723145 s +DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=334495049688107621405657876420559659118, time:1750768879.2787092s req_ids:[8] +DEBUG 06-24 20:41:19 [manager.py:391] +ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:393.19634437561035ms total_cost_time:393.24140548706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14950 prompt_cache_len:5151 prompt_cache_ratio:0.3445484949832776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 +DEBUG 06-24 20:41:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:19 [batch.py:51] router release req id 8 +INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.1117246150970459 s +INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.1139681339263916 s +DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=236165610114740173589558707284836869659, time:1750768879.6683269s req_ids:[8] +DEBUG 06-24 20:41:19 [manager.py:391] +ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:218.66393089294434ms total_cost_time:218.70803833007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14951 prompt_cache_len:5151 prompt_cache_ratio:0.3445254498026888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 +DEBUG 06-24 20:41:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:19 [batch.py:51] router release req id 8 +INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.10839080810546875 s +INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.11041259765625 s +DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=248171077159780907060682897082116600116, time:1750768879.8925724s req_ids:[8] +DEBUG 06-24 20:41:19 [manager.py:391] +ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:209.71345901489258ms total_cost_time:209.75756645202637ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14952 prompt_cache_len:5151 prompt_cache_ratio:0.3445024077046549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 +DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:20 [batch.py:51] router release req id 8 +INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.1094961166381836 s +INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.11140632629394531 s +DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=35841379271185118501953147625332758852, time:1750768880.111558s req_ids:[8] +DEBUG 06-24 20:41:20 [manager.py:391] +ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:220.4899787902832ms total_cost_time:220.5333709716797ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14953 prompt_cache_len:5151 prompt_cache_ratio:0.3444793686885575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 +DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:20 [batch.py:51] router release req id 8 +INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.10946917533874512 s +INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.11154723167419434 s +DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=77495247073661216537624098622616427048, time:1750768880.3350513s req_ids:[8] +DEBUG 06-24 20:41:20 [manager.py:391] +DEBUG 06-24 20:41:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 59273.368 tokens/s +DEBUG 06-24 20:41:20 [stats.py:37] Avg prompt tokens throughput: 59265.431 tokens/s +DEBUG 06-24 20:41:20 [stats.py:37] Avg generate tokens throughput: 7.937 tokens/s +ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:217.73958206176758ms total_cost_time:217.78202056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14954 prompt_cache_len:5151 prompt_cache_ratio:0.34445633275377824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 +DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:20 [batch.py:51] router release req id 8 +INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.10730338096618652 s +INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.10904955863952637 s +DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=219417800556851274654125096325751401932, time:1750768880.5630672s req_ids:[8] +DEBUG 06-24 20:41:20 [manager.py:391] +ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:358.35933685302734ms total_cost_time:358.40439796447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14955 prompt_cache_len:5151 prompt_cache_ratio:0.3444332998996991 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 +DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:20 [batch.py:51] router release req id 8 +INFO 06-24 20:41:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.1063230037689209 s +INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.10821866989135742 s +DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=209915660412498998881438403447617463348, time:1750768880.9229097s req_ids:[8] +DEBUG 06-24 20:41:20 [manager.py:391] +DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:172.7914810180664ms total_cost_time:172.81365394592285ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:14956 prompt_cache_len:5151 prompt_cache_ratio:0.3444102701257021 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 +DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:21 [batch.py:51] router release req id 8 +INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10422301292419434 s +INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.10626578330993652 s +DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=183077454607186067817380255998330358491, time:1750768881.1055334s req_ids:[8] +DEBUG 06-24 20:41:21 [manager.py:391] +ERROR 06-24 20:41:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:216.7809009552002ms total_cost_time:216.8254852294922ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14957 prompt_cache_len:5151 prompt_cache_ratio:0.34438724343116933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 +DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:21 [batch.py:51] router release req id 8 +INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10835146903991699 s +INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.11046004295349121 s +DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=226697835416790576099156633690799214932, time:1750768881.323791s req_ids:[8] +DEBUG 06-24 20:41:21 [manager.py:391] +ERROR 06-24 20:41:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 first_token_cost:213.1643295288086ms total_cost_time:213.20796012878418ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14958 prompt_cache_len:5151 prompt_cache_ratio:0.3443642198154834 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 +DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:21 [batch.py:51] router release req id 8 +INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10941147804260254 s +INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.11145329475402832 s +DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=212857202184910939121903020548283121422, time:1750768881.5450943s req_ids:[8] +DEBUG 06-24 20:41:21 [manager.py:391] +ERROR 06-24 20:41:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 first_token_cost:217.17429161071777ms total_cost_time:217.21959114074707ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14959 prompt_cache_len:5151 prompt_cache_ratio:0.3443411992780266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 +DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:21 [batch.py:51] router release req id 8 +INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10811233520507812 s +INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.11004042625427246 s +DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=315897656889564681504374153397622782887, time:1750768881.7685308s req_ids:[8] +DEBUG 06-24 20:41:21 [manager.py:391] +ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 first_token_cost:388.32569122314453ms total_cost_time:388.3693218231201ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14960 prompt_cache_len:5151 prompt_cache_ratio:0.3443181818181818 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 +DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:22 [batch.py:51] router release req id 8 +INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.10830259323120117 s +INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.11093449592590332 s +DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=26031495386456320580579251256594121373, time:1750768882.163792s req_ids:[8] +DEBUG 06-24 20:41:22 [manager.py:391] +ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:217.87619590759277ms total_cost_time:217.92078018188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14961 prompt_cache_len:5151 prompt_cache_ratio:0.34429516743533184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 +DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:22 [batch.py:51] router release req id 8 +INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.1083977222442627 s +INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.10976433753967285 s +DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=160961722459132032484570639098101158745, time:1750768882.3879912s req_ids:[8] +DEBUG 06-24 20:41:22 [manager.py:391] +ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:217.1177864074707ms total_cost_time:217.1630859375ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14962 prompt_cache_len:5151 prompt_cache_ratio:0.34427215612885975 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 +DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:22 [batch.py:51] router release req id 8 +INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.10861539840698242 s +INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.11049628257751465 s +DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=88030114080803870590604836438848209330, time:1750768882.6137786s req_ids:[8] +DEBUG 06-24 20:41:22 [manager.py:391] +ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:216.92800521850586ms total_cost_time:216.97068214416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14963 prompt_cache_len:5151 prompt_cache_ratio:0.34424914789814876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 +DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:22 [batch.py:51] router release req id 8 +INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.10946893692016602 s +INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.11153125762939453 s +DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=52552652545220923263933244925597941915, time:1750768882.834685s req_ids:[8] +DEBUG 06-24 20:41:22 [manager.py:391] +ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:219.9416160583496ms total_cost_time:219.9862003326416ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14964 prompt_cache_len:5151 prompt_cache_ratio:0.3442261427425822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 +DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:22 [batch.py:51] router release req id 8 +INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.1091921329498291 s +INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.11132645606994629 s +DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=43007143102164667166881812987655176896, time:1750768883.0607471s req_ids:[8] +DEBUG 06-24 20:41:23 [manager.py:391] +ERROR 06-24 20:41:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:218.6431884765625ms total_cost_time:218.6868190765381ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14965 prompt_cache_len:5151 prompt_cache_ratio:0.3442031406615436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 +DEBUG 06-24 20:41:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:23 [batch.py:51] router release req id 8 +INFO 06-24 20:41:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:41:23 [statics_utils.py:24] mean first cost: 232.67209177533982 ms +INFO 06-24 20:41:23 [statics_utils.py:24] mean per token cost: 0.057365905146663516 ms +INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.10883665084838867 s +INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.11078643798828125 s +DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=21710642144484523080270899330482751543, time:1750768883.2940938s req_ids:[8] +DEBUG 06-24 20:41:23 [manager.py:391] +ERROR 06-24 20:41:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 first_token_cost:401.4556407928467ms total_cost_time:401.49807929992676ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14966 prompt_cache_len:5151 prompt_cache_ratio:0.3441801416544167 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 +DEBUG 06-24 20:41:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:23 [batch.py:51] router release req id 8 +INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.1083989143371582 s +INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.10968470573425293 s +DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=273942242254472272849742705831906454694, time:1750768883.6943724s req_ids:[8] +DEBUG 06-24 20:41:23 [manager.py:391] +ERROR 06-24 20:41:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 first_token_cost:222.2139835357666ms total_cost_time:222.2576141357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14967 prompt_cache_len:5151 prompt_cache_ratio:0.3441571457205853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 +DEBUG 06-24 20:41:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:23 [batch.py:51] router release req id 8 +INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.10795950889587402 s +INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.10975027084350586 s +DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=321004118212953356089864327371181726624, time:1750768883.9201534s req_ids:[8] +DEBUG 06-24 20:41:23 [manager.py:391] +ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 first_token_cost:224.52855110168457ms total_cost_time:224.57385063171387ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14968 prompt_cache_len:5151 prompt_cache_ratio:0.34413415285943344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 +DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:24 [batch.py:51] router release req id 8 +INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.1088109016418457 s +INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.11099386215209961 s +DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=79858301090016873276182837077549440530, time:1750768884.1503096s req_ids:[8] +DEBUG 06-24 20:41:24 [manager.py:391] +ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:215.6052589416504ms total_cost_time:215.66033363342285ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:14969 prompt_cache_len:5151 prompt_cache_ratio:0.3441111630703454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 +DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:24 [batch.py:51] router release req id 8 +INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.10799074172973633 s +INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.11005187034606934 s +DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=214563019284504171825083332913887698654, time:1750768884.3888195s req_ids:[8] +DEBUG 06-24 20:41:24 [manager.py:391] +ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:236.95874214172363ms total_cost_time:237.0133399963379ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:14970 prompt_cache_len:5151 prompt_cache_ratio:0.3440881763527054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 +DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:24 [batch.py:51] router release req id 8 +INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.10923886299133301 s +INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.1113734245300293 s +DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=201013074390565482375988652935808103009, time:1750768884.6317704s req_ids:[8] +DEBUG 06-24 20:41:24 [manager.py:391] +ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:234.67636108398438ms total_cost_time:234.73072052001953ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:14971 prompt_cache_len:5151 prompt_cache_ratio:0.3440651927058981 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 +DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:24 [batch.py:51] router release req id 8 +INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.10866522789001465 s +INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.11061668395996094 s +DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=78670691076752652961665803065169371061, time:1750768884.8598359s req_ids:[8] +DEBUG 06-24 20:41:24 [manager.py:391] +ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:383.284330368042ms total_cost_time:383.3284378051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14972 prompt_cache_len:5151 prompt_cache_ratio:0.344042212129308 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 +DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:25 [batch.py:51] router release req id 8 +INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s +INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.11051154136657715 s +DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=46242747765838290547564530125389405441, time:1750768885.252965s req_ids:[8] +DEBUG 06-24 20:41:25 [manager.py:391] +ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:214.37692642211914ms total_cost_time:214.42103385925293ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14973 prompt_cache_len:5151 prompt_cache_ratio:0.3440192346223202 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 +DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:25 [batch.py:51] router release req id 8 +INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10986638069152832 s +INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.11186480522155762 s +DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=82036150584605391026262524636438465327, time:1750768885.4717221s req_ids:[8] +DEBUG 06-24 20:41:25 [manager.py:391] +ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:212.7530574798584ms total_cost_time:212.7985954284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14974 prompt_cache_len:5151 prompt_cache_ratio:0.3439962601843195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 +DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:25 [batch.py:51] router release req id 8 +INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10950326919555664 s +INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.11161565780639648 s +DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=77009969539375109700450108827437171911, time:1750768885.6920593s req_ids:[8] +DEBUG 06-24 20:41:25 [manager.py:391] +ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:218.5971736907959ms total_cost_time:218.6422348022461ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14975 prompt_cache_len:5151 prompt_cache_ratio:0.34397328881469114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 +DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:25 [batch.py:51] router release req id 8 +INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10876822471618652 s +INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.1109457015991211 s +DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=239450163424048794128482734545059733318, time:1750768885.9330297s req_ids:[8] +DEBUG 06-24 20:41:25 [manager.py:391] +ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:243.74866485595703ms total_cost_time:243.80731582641602ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:14976 prompt_cache_len:5151 prompt_cache_ratio:0.34395032051282054 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 +DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:26 [batch.py:51] router release req id 8 +INFO 06-24 20:41:26 [manager.py:224] router recive req id 8 cost time 0.10805988311767578 s +INFO 06-24 20:41:26 [manager.py:68] detokenization recv req id 8 cost time 0.11014962196350098 s +DEBUG 06-24 20:41:26 [manager.py:391] Prefill Batch: batch_id=327443374163179770836718083413923009411, time:1750768886.1661863s req_ids:[8] +DEBUG 06-24 20:41:26 [manager.py:391] +ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:218.66798400878906ms total_cost_time:218.71280670166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14977 prompt_cache_len:5151 prompt_cache_ratio:0.3439273552780931 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 +DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:26 [batch.py:51] router release req id 8 +INFO 06-24 20:41:26 [manager.py:224] router recive req id 8 cost time 0.10923552513122559 s +INFO 06-24 20:41:26 [manager.py:68] detokenization recv req id 8 cost time 0.11118197441101074 s +DEBUG 06-24 20:41:26 [manager.py:391] Prefill Batch: batch_id=322400434840132519631437592111349382350, time:1750768886.3931775s req_ids:[8] +DEBUG 06-24 20:41:26 [manager.py:391] +ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:383.2049369812012ms total_cost_time:383.2714557647705ms,out_token_counter:1 mean_per_token_cost_time: 0.06651878356933594ms prompt_token_num:14978 prompt_cache_len:5151 prompt_cache_ratio:0.3439043931098945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 +DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:26 [batch.py:51] router release req id 8 +INFO 06-24 20:41:26 [manager.py:224] router recive req id 8 cost time 0.10860943794250488 s +INFO 06-24 20:41:26 [manager.py:68] detokenization recv req id 8 cost time 0.11075282096862793 s +DEBUG 06-24 20:41:26 [manager.py:391] Prefill Batch: batch_id=70689272754661912680935651976168503919, time:1750768886.7840316s req_ids:[8] +DEBUG 06-24 20:41:26 [manager.py:391] +ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:217.97752380371094ms total_cost_time:218.02258491516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14979 prompt_cache_len:5151 prompt_cache_ratio:0.3438814340076107 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 +DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:26 [batch.py:51] router release req id 8 +INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.1091768741607666 s +INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.1114339828491211 s +DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=140833368004190543248738872519941228568, time:1750768887.0089767s req_ids:[8] +DEBUG 06-24 20:41:27 [manager.py:391] +ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:219.49219703674316ms total_cost_time:219.53797340393066ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14980 prompt_cache_len:5151 prompt_cache_ratio:0.3438584779706275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 +DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:27 [batch.py:51] router release req id 8 +INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.10844993591308594 s +INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.11055397987365723 s +DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=149721921327696597458085646274579713828, time:1750768887.235228s req_ids:[8] +DEBUG 06-24 20:41:27 [manager.py:391] +DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:215.77787399291992ms total_cost_time:215.8217430114746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14981 prompt_cache_len:5151 prompt_cache_ratio:0.3438355249983312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 +DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:27 [batch.py:51] router release req id 8 +INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.10875606536865234 s +INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.11074066162109375 s +DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=73542233364073794684397801339461334109, time:1750768887.4565916s req_ids:[8] +DEBUG 06-24 20:41:27 [manager.py:391] +ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:215.03233909606934ms total_cost_time:215.07525444030762ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14982 prompt_cache_len:5151 prompt_cache_ratio:0.34381257509010815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 +DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:27 [batch.py:51] router release req id 8 +INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.1084136962890625 s +INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.10964488983154297 s +DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=333631010107865183010568282629378819274, time:1750768887.6783066s req_ids:[8] +DEBUG 06-24 20:41:27 [manager.py:391] +ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:213.23299407958984ms total_cost_time:213.27614784240723ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14983 prompt_cache_len:5151 prompt_cache_ratio:0.3437896282453447 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 +DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:27 [batch.py:51] router release req id 8 +INFO 06-24 20:41:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.3110027313232422 s +INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.31238603591918945 s +DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=169611552380853464397458392309101915453, time:1750768888.1083164s req_ids:[8] +DEBUG 06-24 20:41:28 [manager.py:391] +ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:437.23607063293457ms total_cost_time:437.28113174438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14984 prompt_cache_len:5151 prompt_cache_ratio:0.3437666844634277 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 +DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:28 [batch.py:51] router release req id 8 +INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.1092691421508789 s +INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11111664772033691 s +DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=57443635991513523322381091842748031222, time:1750768888.3408473s req_ids:[8] +DEBUG 06-24 20:41:28 [manager.py:391] +ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:216.8436050415039ms total_cost_time:216.8881893157959ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14985 prompt_cache_len:5151 prompt_cache_ratio:0.3437437437437437 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 +DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:28 [batch.py:51] router release req id 8 +INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.10868358612060547 s +INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11088681221008301 s +DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=231993416682468758284335136978555147630, time:1750768888.5643852s req_ids:[8] +DEBUG 06-24 20:41:28 [manager.py:391] +ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:212.48340606689453ms total_cost_time:212.5263214111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14986 prompt_cache_len:5151 prompt_cache_ratio:0.34372080608567995 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 +DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:28 [batch.py:51] router release req id 8 +INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.10904598236083984 s +INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11093354225158691 s +DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=102221070183489978111468385498913373065, time:1750768888.7840025s req_ids:[8] +DEBUG 06-24 20:41:28 [manager.py:391] +DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:210.53147315979004ms total_cost_time:210.59346199035645ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:14987 prompt_cache_len:5151 prompt_cache_ratio:0.34369787148862346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 +DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:28 [batch.py:51] router release req id 8 +INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.10878419876098633 s +INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11060523986816406 s +DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=182975904839457891027082941293161034351, time:1750768889.0021937s req_ids:[8] +DEBUG 06-24 20:41:29 [manager.py:391] +ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:211.55357360839844ms total_cost_time:211.59672737121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14988 prompt_cache_len:5151 prompt_cache_ratio:0.3436749399519616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 +DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:29 [batch.py:51] router release req id 8 +INFO 06-24 20:41:29 [manager.py:224] router recive req id 8 cost time 0.10860466957092285 s +INFO 06-24 20:41:29 [manager.py:68] detokenization recv req id 8 cost time 0.11034631729125977 s +DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=16232881263146766062903896870990119864, time:1750768889.2196805s req_ids:[8] +DEBUG 06-24 20:41:29 [manager.py:391] +ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:382.51614570617676ms total_cost_time:382.56311416625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14989 prompt_cache_len:5151 prompt_cache_ratio:0.34365201147508173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 +DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:29 [batch.py:51] router release req id 8 +INFO 06-24 20:41:29 [manager.py:224] router recive req id 8 cost time 0.10853338241577148 s +INFO 06-24 20:41:29 [manager.py:68] detokenization recv req id 8 cost time 0.10986471176147461 s +DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=199101331252947403570158880205632921876, time:1750768889.6085775s req_ids:[8] +DEBUG 06-24 20:41:29 [manager.py:391] +ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:211.71975135803223ms total_cost_time:211.76433563232422ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14990 prompt_cache_len:5151 prompt_cache_ratio:0.3436290860573716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 +DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:29 [batch.py:51] router release req id 8 +INFO 06-24 20:41:29 [manager.py:224] router recive req id 8 cost time 0.10840606689453125 s +INFO 06-24 20:41:29 [manager.py:68] detokenization recv req id 8 cost time 0.11035609245300293 s +DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=42899343357916828924906647653627556219, time:1750768889.8281124s req_ids:[8] +DEBUG 06-24 20:41:29 [manager.py:391] +ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:215.0402069091797ms total_cost_time:215.08359909057617ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14991 prompt_cache_len:5151 prompt_cache_ratio:0.34360616369821895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 +DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:29 [batch.py:51] router release req id 8 +INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.10937738418579102 s +INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11071372032165527 s +DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=161943752785316588439001681693510506372, time:1750768890.0520632s req_ids:[8] +DEBUG 06-24 20:41:30 [manager.py:391] +ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:223.71983528137207ms total_cost_time:223.76418113708496ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14992 prompt_cache_len:5151 prompt_cache_ratio:0.34358324439701177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 +DEBUG 06-24 20:41:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:30 [batch.py:51] router release req id 8 +INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.11098957061767578 s +INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11255455017089844 s +DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=40072304252502428723046251820511191123, time:1750768890.277095s req_ids:[8] +DEBUG 06-24 20:41:30 [manager.py:391] +ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:41:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 58158.496 tokens/s +DEBUG 06-24 20:41:30 [stats.py:37] Avg prompt tokens throughput: 58150.629 tokens/s +DEBUG 06-24 20:41:30 [stats.py:37] Avg generate tokens throughput: 7.866 tokens/s +INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:217.7412509918213ms total_cost_time:217.78440475463867ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14993 prompt_cache_len:5151 prompt_cache_ratio:0.34356032815313814 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 +DEBUG 06-24 20:41:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:30 [batch.py:51] router release req id 8 +INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.11121320724487305 s +INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11330723762512207 s +DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=80934075272110578585893113064031280292, time:1750768890.4998765s req_ids:[8] +DEBUG 06-24 20:41:30 [manager.py:391] +ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:213.84692192077637ms total_cost_time:213.89150619506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14994 prompt_cache_len:5151 prompt_cache_ratio:0.3435374149659864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 +DEBUG 06-24 20:41:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:30 [batch.py:51] router release req id 8 +INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.10911178588867188 s +INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s +DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=173173830171311416091205740646480584, time:1750768890.721452s req_ids:[8] +DEBUG 06-24 20:41:30 [manager.py:391] +ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:372.1592426300049ms total_cost_time:372.2035884857178ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14995 prompt_cache_len:5151 prompt_cache_ratio:0.343514504834945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 +DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:31 [batch.py:51] router release req id 8 +INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10910344123840332 s +INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s +DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=143480452642961181247150027882143914067, time:1750768891.1012316s req_ids:[8] +DEBUG 06-24 20:41:31 [manager.py:391] +ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:219.92778778076172ms total_cost_time:219.9723720550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14996 prompt_cache_len:5151 prompt_cache_ratio:0.3434915977594025 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 +DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:31 [batch.py:51] router release req id 8 +INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10905337333679199 s +INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11118197441101074 s +DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=227640888425608508024319286040649382012, time:1750768891.3238666s req_ids:[8] +DEBUG 06-24 20:41:31 [manager.py:391] +ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:211.3053798675537ms total_cost_time:211.3499641418457ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14997 prompt_cache_len:5151 prompt_cache_ratio:0.3434686937387478 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 +DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:31 [batch.py:51] router release req id 8 +INFO 06-24 20:41:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10922765731811523 s +INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11138463020324707 s +DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=335000376025672409847314505226831500100, time:1750768891.5423598s req_ids:[8] +DEBUG 06-24 20:41:31 [manager.py:391] +ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:215.87848663330078ms total_cost_time:215.92140197753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14998 prompt_cache_len:5151 prompt_cache_ratio:0.34344579277236964 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 +DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:31 [batch.py:51] router release req id 8 +INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10851383209228516 s +INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.10992789268493652 s +DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=92721587902157764722294624981176418827, time:1750768891.7660408s req_ids:[8] +DEBUG 06-24 20:41:31 [manager.py:391] +ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:214.89191055297852ms total_cost_time:214.9348258972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14999 prompt_cache_len:5151 prompt_cache_ratio:0.3434228948596573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 +DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:31 [batch.py:51] router release req id 8 +INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10943603515625 s +INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11137628555297852 s +DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=133805948068428467204686380321082387768, time:1750768891.9860535s req_ids:[8] +DEBUG 06-24 20:41:31 [manager.py:391] +ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:385.1132392883301ms total_cost_time:385.15734672546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15000 prompt_cache_len:5151 prompt_cache_ratio:0.3434 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 +DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:32 [batch.py:51] router release req id 8 +INFO 06-24 20:41:32 [manager.py:224] router recive req id 8 cost time 0.10851502418518066 s +INFO 06-24 20:41:32 [manager.py:68] detokenization recv req id 8 cost time 0.1105794906616211 s +DEBUG 06-24 20:41:32 [manager.py:391] Prefill Batch: batch_id=56075173006742504993432706230173012587, time:1750768892.3758278s req_ids:[8] +DEBUG 06-24 20:41:32 [manager.py:391] +ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:215.35515785217285ms total_cost_time:215.39807319641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15001 prompt_cache_len:5151 prompt_cache_ratio:0.34337710819278716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 +DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:32 [batch.py:51] router release req id 8 +INFO 06-24 20:41:32 [manager.py:224] router recive req id 8 cost time 0.1081087589263916 s +INFO 06-24 20:41:32 [manager.py:68] detokenization recv req id 8 cost time 0.1094973087310791 s +DEBUG 06-24 20:41:32 [manager.py:391] Prefill Batch: batch_id=59256218168929873761030392192045639108, time:1750768892.5983126s req_ids:[8] +DEBUG 06-24 20:41:32 [manager.py:391] +ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:211.52544021606445ms total_cost_time:211.56978607177734ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15002 prompt_cache_len:5151 prompt_cache_ratio:0.34335421943740835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 +DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:32 [batch.py:51] router release req id 8 +INFO 06-24 20:41:32 [manager.py:224] router recive req id 8 cost time 0.10838723182678223 s +INFO 06-24 20:41:32 [manager.py:68] detokenization recv req id 8 cost time 0.11030411720275879 s +DEBUG 06-24 20:41:32 [manager.py:391] Prefill Batch: batch_id=169350086579639260450698616675564766541, time:1750768892.8152936s req_ids:[8] +DEBUG 06-24 20:41:32 [manager.py:391] +ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:215.00492095947266ms total_cost_time:215.04831314086914ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15003 prompt_cache_len:5151 prompt_cache_ratio:0.34333133373325336 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 +DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:32 [batch.py:51] router release req id 8 +INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.10988140106201172 s +INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.1118004322052002 s +DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=335165873826914932394773740613826496076, time:1750768893.0450776s req_ids:[8] +DEBUG 06-24 20:41:33 [manager.py:391] +ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:232.0852279663086ms total_cost_time:232.1302890777588ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15004 prompt_cache_len:5151 prompt_cache_ratio:0.3433084510797121 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 +DEBUG 06-24 20:41:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:33 [batch.py:51] router release req id 8 +INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.11040139198303223 s +INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.11150336265563965 s +DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=78193806412122427668163002264328966167, time:1750768893.2716181s req_ids:[8] +DEBUG 06-24 20:41:33 [manager.py:391] +ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:216.96782112121582ms total_cost_time:217.01312065124512ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15005 prompt_cache_len:5151 prompt_cache_ratio:0.3432855714761746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 +DEBUG 06-24 20:41:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:33 [batch.py:51] router release req id 8 +INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.11034941673278809 s +INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.11243271827697754 s +DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=73988265215392574866951698619167243100, time:1750768893.4943929s req_ids:[8] +DEBUG 06-24 20:41:33 [manager.py:391] +ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:376.37948989868164ms total_cost_time:376.42431259155273ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15006 prompt_cache_len:5151 prompt_cache_ratio:0.3432626949220312 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 +DEBUG 06-24 20:41:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:33 [batch.py:51] router release req id 8 +INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.10841703414916992 s +INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.10952353477478027 s +DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=105773950202960426558586750136995196485, time:1750768893.8781543s req_ids:[8] +DEBUG 06-24 20:41:33 [manager.py:391] +ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:219.1150188446045ms total_cost_time:219.15721893310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15007 prompt_cache_len:5151 prompt_cache_ratio:0.3432398214166722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 +DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:34 [batch.py:51] router release req id 8 +INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s +INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.1096651554107666 s +DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=62566946012981551267172630404501431052, time:1750768894.1051495s req_ids:[8] +DEBUG 06-24 20:41:34 [manager.py:391] +ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:176.82409286499023ms total_cost_time:176.86820030212402ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15008 prompt_cache_len:5151 prompt_cache_ratio:0.3432169509594883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 +DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:34 [batch.py:51] router release req id 8 +INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.1076056957244873 s +INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.10947704315185547 s +DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=111003280065290505829597606741982113564, time:1750768894.2852952s req_ids:[8] +DEBUG 06-24 20:41:34 [manager.py:391] +ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:204.8358917236328ms total_cost_time:204.88476753234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:15009 prompt_cache_len:5151 prompt_cache_ratio:0.3431940835498701 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 +DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:34 [batch.py:51] router release req id 8 +INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.10960888862609863 s +INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.11184549331665039 s +DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=192141180293667075722977335216106723674, time:1750768894.497001s req_ids:[8] +DEBUG 06-24 20:41:34 [manager.py:391] +ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:212.61096000671387ms total_cost_time:212.65482902526855ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15010 prompt_cache_len:5151 prompt_cache_ratio:0.34317121918720855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 +DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:34 [batch.py:51] router release req id 8 +INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.10865545272827148 s +INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.11078691482543945 s +DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=140630677894783208402179990917328003007, time:1750768894.7168555s req_ids:[8] +DEBUG 06-24 20:41:34 [manager.py:391] +ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:381.20222091674805ms total_cost_time:381.24680519104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15011 prompt_cache_len:5151 prompt_cache_ratio:0.3431483578708947 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 +DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:35 [batch.py:51] router release req id 8 +INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10877084732055664 s +INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.11084628105163574 s +DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=275302806747942986523242196890100767947, time:1750768895.1048276s req_ids:[8] +DEBUG 06-24 20:41:35 [manager.py:391] +DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:220.1235294342041ms total_cost_time:220.16668319702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15012 prompt_cache_len:5151 prompt_cache_ratio:0.34312549960031974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 +DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:35 [batch.py:51] router release req id 8 +INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10899996757507324 s +INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.1115269660949707 s +DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=198240914322113728627115471572143307914, time:1750768895.3384814s req_ids:[8] +DEBUG 06-24 20:41:35 [manager.py:391] +ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:228.6968231201172ms total_cost_time:228.74021530151367ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15013 prompt_cache_len:5151 prompt_cache_ratio:0.3431026443748751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 +DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:35 [batch.py:51] router release req id 8 +INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s +INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.11054205894470215 s +DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=159135697595853286631329192648537620795, time:1750768895.5652165s req_ids:[8] +DEBUG 06-24 20:41:35 [manager.py:391] +ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:212.51273155212402ms total_cost_time:212.53490447998047ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:15014 prompt_cache_len:5151 prompt_cache_ratio:0.3430797921939523 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 +DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:35 [batch.py:51] router release req id 8 +INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10781216621398926 s +INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.10944986343383789 s +DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=106464030211109145481723477885784725373, time:1750768895.7843294s req_ids:[8] +DEBUG 06-24 20:41:35 [manager.py:391] +ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:220.23439407348633ms total_cost_time:220.2889919281006ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:15015 prompt_cache_len:5151 prompt_cache_ratio:0.34305694305694306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 +DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:35 [batch.py:51] router release req id 8 +INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.10868167877197266 s +INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.10962700843811035 s +DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=175251293042478211154517803785102976318, time:1750768896.0106792s req_ids:[8] +DEBUG 06-24 20:41:36 [manager.py:391] +ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:346.91834449768066ms total_cost_time:346.96221351623535ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15016 prompt_cache_len:5151 prompt_cache_ratio:0.3430340969632392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 +DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:36 [batch.py:51] router release req id 8 +INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.10761761665344238 s +INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.10961174964904785 s +DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=89920203491889801626574200972905509593, time:1750768896.3622448s req_ids:[8] +DEBUG 06-24 20:41:36 [manager.py:391] +ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:204.9400806427002ms total_cost_time:204.98299598693848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15017 prompt_cache_len:5151 prompt_cache_ratio:0.3430112539122328 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 +DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:36 [batch.py:51] router release req id 8 +INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.10952949523925781 s +INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.11148929595947266 s +DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=297901532215773788290340818267662566942, time:1750768896.5762126s req_ids:[8] +DEBUG 06-24 20:41:36 [manager.py:391] +ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:212.6333713531494ms total_cost_time:212.6789093017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15018 prompt_cache_len:5151 prompt_cache_ratio:0.342988413903316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 +DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:36 [batch.py:51] router release req id 8 +INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.11171317100524902 s +INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.11306214332580566 s +DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=191501369723447700927485896521963568442, time:1750768896.7957127s req_ids:[8] +DEBUG 06-24 20:41:36 [manager.py:391] +ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:214.84923362731934ms total_cost_time:214.8916721343994ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15019 prompt_cache_len:5151 prompt_cache_ratio:0.3429655769358812 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 +DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:36 [batch.py:51] router release req id 8 +INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.1109771728515625 s +INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.11289238929748535 s +DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=141770978761726578433607078369791848463, time:1750768897.0164444s req_ids:[8] +DEBUG 06-24 20:41:37 [manager.py:391] +ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:217.42534637451172ms total_cost_time:217.4696922302246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15020 prompt_cache_len:5151 prompt_cache_ratio:0.3429427430093209 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 +DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:37 [batch.py:51] router release req id 8 +INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.10837578773498535 s +INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.11030149459838867 s +DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=314916997881633685913953111632454184799, time:1750768897.2404473s req_ids:[8] +DEBUG 06-24 20:41:37 [manager.py:391] +ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:217.0114517211914ms total_cost_time:217.0555591583252ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15021 prompt_cache_len:5151 prompt_cache_ratio:0.34291991212302775 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 +DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:37 [batch.py:51] router release req id 8 +INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.10789918899536133 s +INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.10979580879211426 s +DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=222304584216660401759664066359327857125, time:1750768897.4617805s req_ids:[8] +DEBUG 06-24 20:41:37 [manager.py:391] +ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:383.0609321594238ms total_cost_time:383.1052780151367ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15022 prompt_cache_len:5151 prompt_cache_ratio:0.34289708427639465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 +DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:37 [batch.py:51] router release req id 8 +INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.1085355281829834 s +INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.11047482490539551 s +DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=339356230939116679823397430755638084455, time:1750768897.8528774s req_ids:[8] +DEBUG 06-24 20:41:37 [manager.py:391] +ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:214.83087539672852ms total_cost_time:214.87784385681152ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:15023 prompt_cache_len:5151 prompt_cache_ratio:0.3428742594688145 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 +DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:37 [batch.py:51] router release req id 8 +INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s +INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.11125850677490234 s +DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=251607770807142919921521026223677639570, time:1750768898.074562s req_ids:[8] +DEBUG 06-24 20:41:38 [manager.py:391] +ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:206.5730094909668ms total_cost_time:206.61377906799316ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:15024 prompt_cache_len:5151 prompt_cache_ratio:0.3428514376996805 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 +DEBUG 06-24 20:41:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:38 [batch.py:51] router release req id 8 +INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.10967540740966797 s +INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.1117238998413086 s +DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=266940633232077859721598610060156828293, time:1750768898.2888916s req_ids:[8] +DEBUG 06-24 20:41:38 [manager.py:391] +ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:218.05095672607422ms total_cost_time:218.0941104888916ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15025 prompt_cache_len:5151 prompt_cache_ratio:0.342828618968386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 +DEBUG 06-24 20:41:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:38 [batch.py:51] router release req id 8 +INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.11025452613830566 s +INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.11179256439208984 s +DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=280325910839107438213898396030809525308, time:1750768898.5130851s req_ids:[8] +DEBUG 06-24 20:41:38 [manager.py:391] +ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:216.56203269958496ms total_cost_time:216.60661697387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15026 prompt_cache_len:5151 prompt_cache_ratio:0.3428058032743245 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 +DEBUG 06-24 20:41:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:38 [batch.py:51] router release req id 8 +INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.11108803749084473 s +INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.11269330978393555 s +DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=288021914962145183480358429272282614988, time:1750768898.7456963s req_ids:[8] +DEBUG 06-24 20:41:38 [manager.py:391] +ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:188.39240074157715ms total_cost_time:188.43436241149902ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15027 prompt_cache_len:5151 prompt_cache_ratio:0.3427829906168896 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 +DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:39 [batch.py:51] router release req id 8 +INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.3108804225921631 s +INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.3128793239593506 s +DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=107909350400421865603764395207111990568, time:1750768899.1388607s req_ids:[8] +DEBUG 06-24 20:41:39 [manager.py:391] +ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:427.5243282318115ms total_cost_time:427.5696277618408ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15028 prompt_cache_len:5151 prompt_cache_ratio:0.3427601809954751 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 +DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:39 [batch.py:51] router release req id 8 +INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.1105349063873291 s +INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.1125798225402832 s +DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=136919818469105812953397167437127364100, time:1750768899.365628s req_ids:[8] +DEBUG 06-24 20:41:39 [manager.py:391] +ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:205.66105842590332ms total_cost_time:205.70826530456543ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15029 prompt_cache_len:5151 prompt_cache_ratio:0.342737374409475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 +DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:39 [batch.py:51] router release req id 8 +INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.11058807373046875 s +INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.11258339881896973 s +DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=29082367047007068620337796489358134645, time:1750768899.5769184s req_ids:[8] +DEBUG 06-24 20:41:39 [manager.py:391] +ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:216.81737899780273ms total_cost_time:216.86053276062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15030 prompt_cache_len:5151 prompt_cache_ratio:0.34271457085828344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 +DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:39 [batch.py:51] router release req id 8 +INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s +INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.10949993133544922 s +DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=79282224109627704573341929260586742131, time:1750768899.8006852s req_ids:[8] +DEBUG 06-24 20:41:39 [manager.py:391] +ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:202.37183570861816ms total_cost_time:202.41665840148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15031 prompt_cache_len:5151 prompt_cache_ratio:0.34269177034129467 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 +DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:39 [batch.py:51] router release req id 8 +INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.10985803604125977 s +INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.11177897453308105 s +DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=4914664528978754296559123802525416125, time:1750768900.00942s req_ids:[8] +DEBUG 06-24 20:41:40 [manager.py:391] +ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:214.22147750854492ms total_cost_time:214.2658233642578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15032 prompt_cache_len:5151 prompt_cache_ratio:0.34266897285790315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 +DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:40 [batch.py:51] router release req id 8 +INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.11078739166259766 s +INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.11259961128234863 s +DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=128836002972202678290920100983281839749, time:1750768900.2300262s req_ids:[8] +DEBUG 06-24 20:41:40 [manager.py:391] +ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:41:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 59388.197 tokens/s +DEBUG 06-24 20:41:40 [stats.py:37] Avg prompt tokens throughput: 59380.286 tokens/s +DEBUG 06-24 20:41:40 [stats.py:37] Avg generate tokens throughput: 7.910 tokens/s +INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:376.9841194152832ms total_cost_time:377.0277500152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15033 prompt_cache_len:5151 prompt_cache_ratio:0.3426461784075035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 +DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:40 [batch.py:51] router release req id 8 +INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.10748910903930664 s +INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.10924458503723145 s +DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=245612086217130801440167574684926194508, time:1750768900.614228s req_ids:[8] +DEBUG 06-24 20:41:40 [manager.py:391] +ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:214.86496925354004ms total_cost_time:214.90955352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15034 prompt_cache_len:5151 prompt_cache_ratio:0.34262338698949046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 +DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:40 [batch.py:51] router release req id 8 +INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.1085965633392334 s +INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.11049413681030273 s +DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=132434276140596343699172035391200432652, time:1750768900.8370717s req_ids:[8] +DEBUG 06-24 20:41:40 [manager.py:391] +ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:216.69816970825195ms total_cost_time:216.74299240112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15035 prompt_cache_len:5151 prompt_cache_ratio:0.3426005986032591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 +DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:40 [batch.py:51] router release req id 8 +INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.11188244819641113 s +INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.11375212669372559 s +DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=303676494995468759222598213438839012224, time:1750768901.0596085s req_ids:[8] +DEBUG 06-24 20:41:41 [manager.py:391] +ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:215.8670425415039ms total_cost_time:215.9261703491211ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15036 prompt_cache_len:5151 prompt_cache_ratio:0.3425778132482043 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 +DEBUG 06-24 20:41:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:41 [batch.py:51] router release req id 8 +INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.10904645919799805 s +INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.1109151840209961 s +DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=242018460239485632958360086014286758761, time:1750768901.2834673s req_ids:[8] +DEBUG 06-24 20:41:41 [manager.py:391] +ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:215.66128730773926ms total_cost_time:215.70611000061035ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15037 prompt_cache_len:5151 prompt_cache_ratio:0.34255503092372147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 +DEBUG 06-24 20:41:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:41 [batch.py:51] router release req id 8 +INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.10912489891052246 s +INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.11099553108215332 s +DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=147697877860422208081557916912506719336, time:1750768901.5062788s req_ids:[8] +DEBUG 06-24 20:41:41 [manager.py:391] +ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:216.57347679138184ms total_cost_time:216.63904190063477ms,out_token_counter:1 mean_per_token_cost_time: 0.06556510925292969ms prompt_token_num:15038 prompt_cache_len:5151 prompt_cache_ratio:0.34253225162920603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 +DEBUG 06-24 20:41:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:41 [batch.py:51] router release req id 8 +INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.10895037651062012 s +INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.11089134216308594 s +DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=198936895926139838807469166597417840416, time:1750768901.728543s req_ids:[8] +DEBUG 06-24 20:41:41 [manager.py:391] +ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:374.1450309753418ms total_cost_time:374.1891384124756ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15039 prompt_cache_len:5151 prompt_cache_ratio:0.34250947536405346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 +DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:42 [batch.py:51] router release req id 8 +INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.10847711563110352 s +INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11039614677429199 s +DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=292552252816090919278202324868528294325, time:1750768902.1130116s req_ids:[8] +DEBUG 06-24 20:41:42 [manager.py:391] +ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:222.54085540771484ms total_cost_time:222.58639335632324ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15040 prompt_cache_len:5151 prompt_cache_ratio:0.3424867021276596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 +DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:42 [batch.py:51] router release req id 8 +INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.1109931468963623 s +INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11273956298828125 s +DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=155634396420410974871778212565666806270, time:1750768902.3386033s req_ids:[8] +DEBUG 06-24 20:41:42 [manager.py:391] +ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:215.12150764465332ms total_cost_time:215.1651382446289ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15041 prompt_cache_len:5151 prompt_cache_ratio:0.34246393191942026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 +DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:42 [batch.py:51] router release req id 8 +INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.11055493354797363 s +INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11223125457763672 s +DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=104076044532069330015692727114806687122, time:1750768902.561581s req_ids:[8] +DEBUG 06-24 20:41:42 [manager.py:391] +ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:212.03351020812988ms total_cost_time:212.0802402496338ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15042 prompt_cache_len:5151 prompt_cache_ratio:0.34244116473873154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 +DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:42 [batch.py:51] router release req id 8 +INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.11113142967224121 s +INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11294245719909668 s +DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=314942193439731529536272246433010048572, time:1750768902.7804158s req_ids:[8] +DEBUG 06-24 20:41:42 [manager.py:391] +DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:216.78972244262695ms total_cost_time:216.83430671691895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15043 prompt_cache_len:5151 prompt_cache_ratio:0.3424184005849897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 +DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:42 [batch.py:51] router release req id 8 +INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.1097254753112793 s +INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.11172795295715332 s +DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=59749822216877583008861517630733104703, time:1750768903.0035286s req_ids:[8] +DEBUG 06-24 20:41:43 [manager.py:391] +ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:381.01911544799805ms total_cost_time:381.06513023376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15044 prompt_cache_len:5151 prompt_cache_ratio:0.34239563945759105 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 +DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:43 [batch.py:51] router release req id 8 +INFO 06-24 20:41:43 [manager.py:224] router recive req id 8 cost time 0.10781645774841309 s +INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.10959196090698242 s +DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=191050457956107257876441725566553381860, time:1750768903.3899517s req_ids:[8] +DEBUG 06-24 20:41:43 [manager.py:391] +ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:214.55931663513184ms total_cost_time:214.60270881652832ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15045 prompt_cache_len:5151 prompt_cache_ratio:0.3423728813559322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 +DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:43 [batch.py:51] router release req id 8 +INFO 06-24 20:41:43 [manager.py:224] router recive req id 8 cost time 0.11039328575134277 s +INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.11206436157226562 s +DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=265373963661181809296748559712683028642, time:1750768903.6144896s req_ids:[8] +DEBUG 06-24 20:41:43 [manager.py:391] +ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:218.22237968444824ms total_cost_time:218.26696395874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15046 prompt_cache_len:5151 prompt_cache_ratio:0.3423501262794098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 +DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:43 [batch.py:51] router release req id 8 +INFO 06-24 20:41:43 [manager.py:224] router recive req id 8 cost time 0.10941934585571289 s +INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.1113288402557373 s +DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=18879904704026333109587426434251455500, time:1750768903.8368983s req_ids:[8] +DEBUG 06-24 20:41:43 [manager.py:391] +ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:215.09265899658203ms total_cost_time:215.13652801513672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15047 prompt_cache_len:5151 prompt_cache_ratio:0.34232737422742077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 +DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:43 [batch.py:51] router release req id 8 +INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.10899996757507324 s +INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11086273193359375 s +DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=333871928953391483110826278126430207582, time:1750768904.0593557s req_ids:[8] +DEBUG 06-24 20:41:44 [manager.py:391] +ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:216.02439880371094ms total_cost_time:216.06945991516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15048 prompt_cache_len:5151 prompt_cache_ratio:0.34230462519936206 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 +DEBUG 06-24 20:41:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:44 [batch.py:51] router release req id 8 +INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.10808587074279785 s +INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11002969741821289 s +DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=210932886025604261108962234408949213076, time:1750768904.2824395s req_ids:[8] +DEBUG 06-24 20:41:44 [manager.py:391] +ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:211.30990982055664ms total_cost_time:211.35377883911133ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15049 prompt_cache_len:5151 prompt_cache_ratio:0.3422818791946309 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 +DEBUG 06-24 20:41:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:44 [batch.py:51] router release req id 8 +INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.1109771728515625 s +INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11298155784606934 s +DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=212810875700332531170383542092353110788, time:1750768904.4996517s req_ids:[8] +DEBUG 06-24 20:41:44 [manager.py:391] +ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:374.4809627532959ms total_cost_time:374.5253086090088ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15050 prompt_cache_len:5151 prompt_cache_ratio:0.3422591362126246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 +DEBUG 06-24 20:41:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:44 [batch.py:51] router release req id 8 +INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.1083230972290039 s +INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11037421226501465 s +DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=174453179966349153166111277100499363826, time:1750768904.8798547s req_ids:[8] +DEBUG 06-24 20:41:44 [manager.py:391] +ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:217.81682968139648ms total_cost_time:217.8630828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15051 prompt_cache_len:5151 prompt_cache_ratio:0.34223639625274066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 +DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:45 [batch.py:51] router release req id 8 +INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.10822176933288574 s +INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.10925722122192383 s +DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=284800008336978484726762450352224316927, time:1750768905.1058981s req_ids:[8] +DEBUG 06-24 20:41:45 [manager.py:391] +ERROR 06-24 20:41:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:210.59775352478027ms total_cost_time:210.64352989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15052 prompt_cache_len:5151 prompt_cache_ratio:0.3422136593143768 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 +DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:45 [batch.py:51] router release req id 8 +INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.1087188720703125 s +INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.11047530174255371 s +DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=183265122316067514672789967252552499440, time:1750768905.3234782s req_ids:[8] +DEBUG 06-24 20:41:45 [manager.py:391] +ERROR 06-24 20:41:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 first_token_cost:216.8886661529541ms total_cost_time:216.933012008667ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15053 prompt_cache_len:5151 prompt_cache_ratio:0.34219092539693086 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 +DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:45 [batch.py:51] router release req id 8 +INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.10807347297668457 s +INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.11026334762573242 s +DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=212726617675304735376846608961245807459, time:1750768905.5461123s req_ids:[8] +DEBUG 06-24 20:41:45 [manager.py:391] +ERROR 06-24 20:41:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 first_token_cost:215.94548225402832ms total_cost_time:215.9874439239502ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15054 prompt_cache_len:5151 prompt_cache_ratio:0.34216819449980074 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 +DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:45 [batch.py:51] router release req id 8 +INFO 06-24 20:41:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.1086583137512207 s +INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.11058449745178223 s +DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=236064269211470818176107771091560229518, time:1750768905.7691543s req_ids:[8] +DEBUG 06-24 20:41:45 [manager.py:391] +ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 first_token_cost:379.17256355285645ms total_cost_time:379.21881675720215ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15055 prompt_cache_len:5151 prompt_cache_ratio:0.3421454666223846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 +DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:46 [batch.py:51] router release req id 8 +INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.11116313934326172 s +DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=39233689289509105446680506279282266896, time:1750768906.1537817s req_ids:[8] +DEBUG 06-24 20:41:46 [manager.py:391] +INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.11307764053344727 s +ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:210.62135696411133ms total_cost_time:210.66665649414062ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15056 prompt_cache_len:5151 prompt_cache_ratio:0.34212274176408075 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 +DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:46 [batch.py:51] router release req id 8 +INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.10905909538269043 s +INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.11101508140563965 s +DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=141647235897768435332735377732571189771, time:1750768906.371627s req_ids:[8] +DEBUG 06-24 20:41:46 [manager.py:391] +ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:213.29236030578613ms total_cost_time:213.33670616149902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15057 prompt_cache_len:5151 prompt_cache_ratio:0.3421000199242877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 +DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:46 [batch.py:51] router release req id 8 +INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.10763406753540039 s +INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.10913538932800293 s +DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=220219238596842224809658054709473577718, time:1750768906.5937047s req_ids:[8] +DEBUG 06-24 20:41:46 [manager.py:391] +ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:217.4522876739502ms total_cost_time:217.49591827392578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15058 prompt_cache_len:5151 prompt_cache_ratio:0.34207730110240403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 +DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:46 [batch.py:51] router release req id 8 +INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s +INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.11039900779724121 s +DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=58573806701864939045054105707694057397, time:1750768906.817152s req_ids:[8] +DEBUG 06-24 20:41:46 [manager.py:391] +ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:216.37868881225586ms total_cost_time:216.42088890075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15059 prompt_cache_len:5151 prompt_cache_ratio:0.34205458529782856 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 +DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:46 [batch.py:51] router release req id 8 +INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.10882949829101562 s +INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.11074972152709961 s +DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=161546693945666954248507016722334263114, time:1750768907.0414143s req_ids:[8] +DEBUG 06-24 20:41:47 [manager.py:391] +INFO 06-24 20:41:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:382.30180740356445ms total_cost_time:382.36260414123535ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15060 prompt_cache_len:5151 prompt_cache_ratio:0.34203187250996014 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 +DEBUG 06-24 20:41:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:47 [batch.py:51] router release req id 8 +INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.10748028755187988 s +INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.10946893692016602 s +DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=271932922816642209706776211752563226564, time:1750768907.430292s req_ids:[8] +DEBUG 06-24 20:41:47 [manager.py:391] +ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:212.47029304504395ms total_cost_time:212.48984336853027ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:15061 prompt_cache_len:5151 prompt_cache_ratio:0.342009162738198 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 +DEBUG 06-24 20:41:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:47 [batch.py:51] router release req id 8 +INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.1084599494934082 s +INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.1104888916015625 s +DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=271995003663406157061856077165916883574, time:1750768907.6499057s req_ids:[8] +DEBUG 06-24 20:41:47 [manager.py:391] +ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:217.637300491333ms total_cost_time:217.695951461792ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:15062 prompt_cache_len:5151 prompt_cache_ratio:0.34198645598194133 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 +DEBUG 06-24 20:41:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:47 [batch.py:51] router release req id 8 +INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s +INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.10974693298339844 s +DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=334489070257517803658949640124315822150, time:1750768907.8741257s req_ids:[8] +DEBUG 06-24 20:41:47 [manager.py:391] +ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:216.61829948425293ms total_cost_time:216.68362617492676ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:15063 prompt_cache_len:5151 prompt_cache_ratio:0.3419637522405895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 +DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:48 [batch.py:51] router release req id 8 +INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10641193389892578 s +INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.10826396942138672 s +DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=255097552523517388024575289258992940419, time:1750768908.1018918s req_ids:[8] +DEBUG 06-24 20:41:48 [manager.py:391] +ERROR 06-24 20:41:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:222.92447090148926ms total_cost_time:222.98693656921387ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:15064 prompt_cache_len:5151 prompt_cache_ratio:0.3419410515135422 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 +DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:48 [batch.py:51] router release req id 8 +INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10907387733459473 s +INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.11105537414550781 s +DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=120568070232426805315451619226877690644, time:1750768908.3281517s req_ids:[8] +DEBUG 06-24 20:41:48 [manager.py:391] +ERROR 06-24 20:41:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 first_token_cost:217.74792671203613ms total_cost_time:217.80681610107422ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:15065 prompt_cache_len:5151 prompt_cache_ratio:0.34191835380019914 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 +DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:48 [batch.py:51] router release req id 8 +INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10883069038391113 s +INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.11052107810974121 s +DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=336638436744989719257877848521813461769, time:1750768908.5524032s req_ids:[8] +DEBUG 06-24 20:41:48 [manager.py:391] +ERROR 06-24 20:41:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 first_token_cost:346.085786819458ms total_cost_time:346.1451530456543ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15066 prompt_cache_len:5151 prompt_cache_ratio:0.34189565909996017 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 +DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:48 [batch.py:51] router release req id 8 +INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10966777801513672 s +INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.11153125762939453 s +DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=340026426714101034507917899392667794028, time:1750768908.9067547s req_ids:[8] +DEBUG 06-24 20:41:48 [manager.py:391] +ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 first_token_cost:214.26057815551758ms total_cost_time:214.32256698608398ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15067 prompt_cache_len:5151 prompt_cache_ratio:0.3418729674122254 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 +DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:49 [batch.py:51] router release req id 8 +INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10860896110534668 s +INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.10987520217895508 s +DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=112979490311968913964166121693198819558, time:1750768909.12435s req_ids:[8] +DEBUG 06-24 20:41:49 [manager.py:391] +DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:209.46025848388672ms total_cost_time:209.5053195953369ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15068 prompt_cache_len:5151 prompt_cache_ratio:0.341850278736395 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 +DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:49 [batch.py:51] router release req id 8 +INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10927033424377441 s +INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.1111140251159668 s +DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=234682628043113877485682750706014912415, time:1750768909.3423376s req_ids:[8] +DEBUG 06-24 20:41:49 [manager.py:391] +ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:214.1251564025879ms total_cost_time:214.16854858398438ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15069 prompt_cache_len:5151 prompt_cache_ratio:0.3418275930718694 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 +DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:49 [batch.py:51] router release req id 8 +INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10878109931945801 s +INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.11023998260498047 s +DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=33565681220975658098454018508018360983, time:1750768909.5616922s req_ids:[8] +DEBUG 06-24 20:41:49 [manager.py:391] +ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:171.84209823608398ms total_cost_time:171.88501358032227ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15070 prompt_cache_len:5151 prompt_cache_ratio:0.3418049104180491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 +DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:49 [batch.py:51] router release req id 8 +INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10914039611816406 s +INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.1110079288482666 s +DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=203729187529340237229402757958075157743, time:1750768909.7414377s req_ids:[8] +DEBUG 06-24 20:41:49 [manager.py:391] +ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:208.42409133911133ms total_cost_time:208.46891403198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15071 prompt_cache_len:5151 prompt_cache_ratio:0.34178223077433484 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 +DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:50 [batch.py:51] router release req id 8 +INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.2095801830291748 s +INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.2112407684326172 s +DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=281645117756050246340461154399441733454, time:1750768910.0918024s req_ids:[8] +DEBUG 06-24 20:41:50 [manager.py:391] +ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:336.0590934753418ms total_cost_time:336.1053466796875ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15072 prompt_cache_len:5151 prompt_cache_ratio:0.3417595541401274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 +DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:50 [batch.py:51] router release req id 8 +INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.11126232147216797 s +INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11313939094543457 s +DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=80926614092023239868298072321053156853, time:1750768910.2985556s req_ids:[8] +DEBUG 06-24 20:41:50 [manager.py:391] +ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:215.11578559875488ms total_cost_time:215.15893936157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15073 prompt_cache_len:5151 prompt_cache_ratio:0.3417368805148278 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 +DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:50 [batch.py:51] router release req id 8 +INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.10970163345336914 s +INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11186099052429199 s +DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=300835596679619547576940777854797814847, time:1750768910.5195727s req_ids:[8] +DEBUG 06-24 20:41:50 [manager.py:391] +DEBUG 06-24 20:41:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 61554.403 tokens/s +DEBUG 06-24 20:41:50 [stats.py:37] Avg prompt tokens throughput: 61546.326 tokens/s +DEBUG 06-24 20:41:50 [stats.py:37] Avg generate tokens throughput: 8.077 tokens/s +ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:217.8051471710205ms total_cost_time:217.8492546081543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15074 prompt_cache_len:5151 prompt_cache_ratio:0.34171420989783735 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 +DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:50 [batch.py:51] router release req id 8 +INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.10820412635803223 s +INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s +DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=261094707866582202039746025720798255286, time:1750768910.7439256s req_ids:[8] +DEBUG 06-24 20:41:50 [manager.py:391] +ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:211.75765991210938ms total_cost_time:211.80248260498047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15075 prompt_cache_len:5151 prompt_cache_ratio:0.3416915422885572 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 +DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:50 [batch.py:51] router release req id 8 +INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.1100773811340332 s +INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11213278770446777 s +DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=283066310180235221907807822913137620973, time:1750768910.9623802s req_ids:[8] +DEBUG 06-24 20:41:50 [manager.py:391] +ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:176.66149139404297ms total_cost_time:176.72300338745117ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:15076 prompt_cache_len:5151 prompt_cache_ratio:0.341668877686389 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 +DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:51 [batch.py:51] router release req id 8 +INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.10788965225219727 s +INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.10979986190795898 s +DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=249701800865116622744170380505431949963, time:1750768911.1459336s req_ids:[8] +DEBUG 06-24 20:41:51 [manager.py:391] +ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:383.7168216705322ms total_cost_time:383.76426696777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15077 prompt_cache_len:5151 prompt_cache_ratio:0.34164621609073426 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 +DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:51 [batch.py:51] router release req id 8 +INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.10863828659057617 s +INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.11043524742126465 s +DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=258297022791512060755435841648333940404, time:1750768911.534111s req_ids:[8] +DEBUG 06-24 20:41:51 [manager.py:391] +ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:172.806978225708ms total_cost_time:172.84893989562988ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15078 prompt_cache_len:5151 prompt_cache_ratio:0.3416235575009948 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 +DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:51 [batch.py:51] router release req id 8 +INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.1094825267791748 s +INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.11121201515197754 s +DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=2183851551898486920158933134249960023, time:1750768911.7156236s req_ids:[8] +DEBUG 06-24 20:41:51 [manager.py:391] +ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:210.1306915283203ms total_cost_time:210.15644073486328ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:15079 prompt_cache_len:5151 prompt_cache_ratio:0.3416009019165727 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 +DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:51 [batch.py:51] router release req id 8 +INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.10787439346313477 s +INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.10962581634521484 s +DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=69774270831748128100184585093067372450, time:1750768911.9326503s req_ids:[8] +DEBUG 06-24 20:41:51 [manager.py:391] +ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:217.0889377593994ms total_cost_time:217.1323299407959ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15080 prompt_cache_len:5151 prompt_cache_ratio:0.34157824933687003 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 +DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:52 [batch.py:51] router release req id 8 +INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10929417610168457 s +INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.11125898361206055 s +DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=215545012090180538169451771818967844747, time:1750768912.1555722s req_ids:[8] +DEBUG 06-24 20:41:52 [manager.py:391] +ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:219.51699256896973ms total_cost_time:219.5594310760498ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15081 prompt_cache_len:5151 prompt_cache_ratio:0.341555599761289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 +DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:52 [batch.py:51] router release req id 8 +INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10802912712097168 s +INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.1097099781036377 s +DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=28506148198625333011481927810087402592, time:1750768912.3796394s req_ids:[8] +DEBUG 06-24 20:41:52 [manager.py:391] +ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:172.89376258850098ms total_cost_time:172.93405532836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:15082 prompt_cache_len:5151 prompt_cache_ratio:0.3415329531892322 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 +DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:52 [batch.py:51] router release req id 8 +INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10737276077270508 s +INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.10890555381774902 s +DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=181571877202807345102380543676494716370, time:1750768912.559716s req_ids:[8] +DEBUG 06-24 20:41:52 [manager.py:391] +ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:343.7011241912842ms total_cost_time:343.74499320983887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15083 prompt_cache_len:5151 prompt_cache_ratio:0.3415103096201021 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 +DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:52 [batch.py:51] router release req id 8 +INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10851836204528809 s +INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.11062788963317871 s +DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=45970912719117339612302879272312477101, time:1750768912.9097073s req_ids:[8] +DEBUG 06-24 20:41:52 [manager.py:391] +ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:208.404541015625ms total_cost_time:208.44721794128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15084 prompt_cache_len:5151 prompt_cache_ratio:0.3414876690533015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 +DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:53 [batch.py:51] router release req id 8 +INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.10874509811401367 s +INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.11069011688232422 s +DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=21841646188685403684557818106693023862, time:1750768913.1240482s req_ids:[8] +DEBUG 06-24 20:41:53 [manager.py:391] +ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:174.47376251220703ms total_cost_time:174.51715469360352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15085 prompt_cache_len:5151 prompt_cache_ratio:0.3414650314882333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 +DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:53 [batch.py:51] router release req id 8 +INFO 06-24 20:41:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:41:53 [statics_utils.py:24] mean first cost: 232.79526009011153 ms +INFO 06-24 20:41:53 [statics_utils.py:24] mean per token cost: 0.057237066090271876 ms +INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.10676407814025879 s +INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.10798239707946777 s +DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=151189781837006036891553911545168133127, time:1750768913.304848s req_ids:[8] +DEBUG 06-24 20:41:53 [manager.py:391] +ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:212.13412284851074ms total_cost_time:212.15415000915527ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:15086 prompt_cache_len:5151 prompt_cache_ratio:0.3414423969243007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 +DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:53 [batch.py:51] router release req id 8 +INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.1082007884979248 s +INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s +DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=318779123571854947444627469951396773643, time:1750768913.522357s req_ids:[8] +DEBUG 06-24 20:41:53 [manager.py:391] +DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:209.94091033935547ms total_cost_time:209.98358726501465ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15087 prompt_cache_len:5151 prompt_cache_ratio:0.34141976536090674 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 +DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:53 [batch.py:51] router release req id 8 +INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.10874629020690918 s +INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.110870361328125 s +DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=298482292756897104505684097350522172629, time:1750768913.74107s req_ids:[8] +DEBUG 06-24 20:41:53 [manager.py:391] +ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:379.79793548583984ms total_cost_time:379.84204292297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15088 prompt_cache_len:5151 prompt_cache_ratio:0.3413971367974549 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 +DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:54 [batch.py:51] router release req id 8 +INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10917282104492188 s +INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.11033749580383301 s +DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=152348007431906108863543912305695915814, time:1750768914.127895s req_ids:[8] +DEBUG 06-24 20:41:54 [manager.py:391] +ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:204.23555374145508ms total_cost_time:204.27894592285156ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15089 prompt_cache_len:5151 prompt_cache_ratio:0.3413745112333488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 +DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:54 [batch.py:51] router release req id 8 +INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10850930213928223 s +INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.1105494499206543 s +DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=72234143059623363816169244082614546560, time:1750768914.338672s req_ids:[8] +DEBUG 06-24 20:41:54 [manager.py:391] +ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:213.31381797790527ms total_cost_time:213.35721015930176ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15090 prompt_cache_len:5151 prompt_cache_ratio:0.34135188866799204 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 +DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:54 [batch.py:51] router release req id 8 +INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10905218124389648 s +INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.1110537052154541 s +DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=41758195201830778076746444762616542678, time:1750768914.5580869s req_ids:[8] +DEBUG 06-24 20:41:54 [manager.py:391] +ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:213.81735801696777ms total_cost_time:213.86075019836426ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15091 prompt_cache_len:5151 prompt_cache_ratio:0.34132926910078853 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 +DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:54 [batch.py:51] router release req id 8 +INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10814189910888672 s +INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.10978412628173828 s +DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=106354437660802350200787674333870438646, time:1750768914.777274s req_ids:[8] +DEBUG 06-24 20:41:54 [manager.py:391] +ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:173.77638816833496ms total_cost_time:173.81882667541504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15092 prompt_cache_len:5151 prompt_cache_ratio:0.34130665253114234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 +DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:54 [batch.py:51] router release req id 8 +INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.1071019172668457 s +INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.10902285575866699 s +DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=163271021314456153848346391459503465796, time:1750768914.97318s req_ids:[8] +DEBUG 06-24 20:41:54 [manager.py:391] +ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:219.13743019104004ms total_cost_time:219.18082237243652ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15093 prompt_cache_len:5151 prompt_cache_ratio:0.34128403895845755 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 +DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:55 [batch.py:51] router release req id 8 +INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.10882329940795898 s +INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.11159253120422363 s +DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=112914082212821095783330252662377630594, time:1750768915.1838322s req_ids:[8] +DEBUG 06-24 20:41:55 [manager.py:391] +ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:412.6605987548828ms total_cost_time:412.7049446105957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15094 prompt_cache_len:5151 prompt_cache_ratio:0.3412614283821386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 +DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:55 [batch.py:51] router release req id 8 +INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s +INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.1103217601776123 s +DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=289889264986668363313209391944932275600, time:1750768915.6031835s req_ids:[8] +DEBUG 06-24 20:41:55 [manager.py:391] +ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:175.10056495666504ms total_cost_time:175.12154579162598ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:15095 prompt_cache_len:5151 prompt_cache_ratio:0.3412388208015899 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 +DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:55 [batch.py:51] router release req id 8 +INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s +INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.10913252830505371 s +DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=51156618889855294454517010463924585283, time:1750768915.7859282s req_ids:[8] +DEBUG 06-24 20:41:55 [manager.py:391] +ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:175.44960975646973ms total_cost_time:175.49419403076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15096 prompt_cache_len:5151 prompt_cache_ratio:0.34121621621621623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 +DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:55 [batch.py:51] router release req id 8 +INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.10823392868041992 s +INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.11012482643127441 s +DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=59115779834676107309353225364594538019, time:1750768915.9669774s req_ids:[8] +DEBUG 06-24 20:41:55 [manager.py:391] +ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:208.8449001312256ms total_cost_time:208.8909149169922ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15097 prompt_cache_len:5151 prompt_cache_ratio:0.34119361462542225 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 +DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:56 [batch.py:51] router release req id 8 +INFO 06-24 20:41:56 [manager.py:224] router recive req id 8 cost time 0.1086878776550293 s +INFO 06-24 20:41:56 [manager.py:68] detokenization recv req id 8 cost time 0.11002779006958008 s +DEBUG 06-24 20:41:56 [manager.py:391] Prefill Batch: batch_id=14953796354546814740006014941513496716, time:1750768916.1813307s req_ids:[8] +DEBUG 06-24 20:41:56 [manager.py:391] +ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:233.26992988586426ms total_cost_time:233.31403732299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15098 prompt_cache_len:5151 prompt_cache_ratio:0.34117101602861305 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 +DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:56 [batch.py:51] router release req id 8 +INFO 06-24 20:41:56 [manager.py:224] router recive req id 8 cost time 0.10784149169921875 s +INFO 06-24 20:41:56 [manager.py:68] detokenization recv req id 8 cost time 0.10907888412475586 s +DEBUG 06-24 20:41:56 [manager.py:391] Prefill Batch: batch_id=260226142424154188946531448959269391513, time:1750768916.4308105s req_ids:[8] +DEBUG 06-24 20:41:56 [manager.py:391] +ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:384.4788074493408ms total_cost_time:384.5231533050537ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15099 prompt_cache_len:5151 prompt_cache_ratio:0.3411484204251937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 +DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:56 [batch.py:51] router release req id 8 +INFO 06-24 20:41:56 [manager.py:224] router recive req id 8 cost time 0.10956287384033203 s +INFO 06-24 20:41:56 [manager.py:68] detokenization recv req id 8 cost time 0.11157536506652832 s +DEBUG 06-24 20:41:56 [manager.py:391] Prefill Batch: batch_id=256805047913416277488057881128160942168, time:1750768916.812071s req_ids:[8] +DEBUG 06-24 20:41:56 [manager.py:391] +ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:216.28880500793457ms total_cost_time:216.33291244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15100 prompt_cache_len:5151 prompt_cache_ratio:0.3411258278145695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 +DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:56 [batch.py:51] router release req id 8 +INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.10841679573059082 s +INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.11043667793273926 s +DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=184080922360411198335547994297255947972, time:1750768917.03691s req_ids:[8] +DEBUG 06-24 20:41:57 [manager.py:391] +ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:219.9394702911377ms total_cost_time:219.98310089111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15101 prompt_cache_len:5151 prompt_cache_ratio:0.34110323819614596 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 +DEBUG 06-24 20:41:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:57 [batch.py:51] router release req id 8 +INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.10941600799560547 s +INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.11138153076171875 s +DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=134052431938879338581717528815211457945, time:1750768917.2632842s req_ids:[8] +DEBUG 06-24 20:41:57 [manager.py:391] +ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:215.58690071105957ms total_cost_time:215.63053131103516ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15102 prompt_cache_len:5151 prompt_cache_ratio:0.34108065156932854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 +DEBUG 06-24 20:41:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:57 [batch.py:51] router release req id 8 +INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.10909485816955566 s +INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.11106586456298828 s +DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=182273116362900452627579989927537463669, time:1750768917.484107s req_ids:[8] +DEBUG 06-24 20:41:57 [manager.py:391] +ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:216.2339687347412ms total_cost_time:216.292142868042ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:15103 prompt_cache_len:5151 prompt_cache_ratio:0.34105806793352317 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 +DEBUG 06-24 20:41:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:57 [batch.py:51] router release req id 8 +INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.1091310977935791 s +INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.1109170913696289 s +DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=144322164691599546479468913602393519184, time:1750768917.7046149s req_ids:[8] +DEBUG 06-24 20:41:57 [manager.py:391] +ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:383.1789493560791ms total_cost_time:383.2218647003174ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15104 prompt_cache_len:5151 prompt_cache_ratio:0.3410354872881356 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 +DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:58 [batch.py:51] router release req id 8 +INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10949039459228516 s +INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.1112978458404541 s +DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=328759578369771033784862834271249146551, time:1750768918.0942457s req_ids:[8] +DEBUG 06-24 20:41:58 [manager.py:391] +ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:216.5665626525879ms total_cost_time:216.60852432250977ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15105 prompt_cache_len:5151 prompt_cache_ratio:0.34101290963257197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 +DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:58 [batch.py:51] router release req id 8 +INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10967087745666504 s +INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.11164188385009766 s +DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=84646863349846286003089801263035732345, time:1750768918.3149915s req_ids:[8] +DEBUG 06-24 20:41:58 [manager.py:391] +ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:210.11018753051758ms total_cost_time:210.15477180480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15106 prompt_cache_len:5151 prompt_cache_ratio:0.3409903349662386 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 +DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:58 [batch.py:51] router release req id 8 +INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10940432548522949 s +INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.1114039421081543 s +DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=151880485984966726155694783717332178651, time:1750768918.5327442s req_ids:[8] +DEBUG 06-24 20:41:58 [manager.py:391] +ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:215.93093872070312ms total_cost_time:215.9733772277832ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15107 prompt_cache_len:5151 prompt_cache_ratio:0.3409677632885417 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 +DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:58 [batch.py:51] router release req id 8 +INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10916733741760254 s +INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.11111998558044434 s +DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=18710355243211274935617998411004518276, time:1750768918.754063s req_ids:[8] +DEBUG 06-24 20:41:58 [manager.py:391] +ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:217.9553508758545ms total_cost_time:217.99898147583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15108 prompt_cache_len:5151 prompt_cache_ratio:0.340945194598888 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 +DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:58 [batch.py:51] router release req id 8 +INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10952425003051758 s +INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.11126160621643066 s +DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=41001186266519349659632480122461850623, time:1750768918.979691s req_ids:[8] +DEBUG 06-24 20:41:58 [manager.py:391] +ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:176.57470703125ms total_cost_time:176.6207218170166ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15109 prompt_cache_len:5151 prompt_cache_ratio:0.3409226288966841 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 +DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:59 [batch.py:51] router release req id 8 +INFO 06-24 20:41:59 [manager.py:224] router recive req id 8 cost time 0.10947895050048828 s +INFO 06-24 20:41:59 [manager.py:68] detokenization recv req id 8 cost time 0.1114969253540039 s +DEBUG 06-24 20:41:59 [manager.py:391] Prefill Batch: batch_id=135907808961695677569412970812330166470, time:1750768919.162218s req_ids:[8] +DEBUG 06-24 20:41:59 [manager.py:391] +ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:384.69672203063965ms total_cost_time:384.74154472351074ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15110 prompt_cache_len:5151 prompt_cache_ratio:0.34090006618133684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 +DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:59 [batch.py:51] router release req id 8 +INFO 06-24 20:41:59 [manager.py:224] router recive req id 8 cost time 0.10837173461914062 s +INFO 06-24 20:41:59 [manager.py:68] detokenization recv req id 8 cost time 0.1103212833404541 s +DEBUG 06-24 20:41:59 [manager.py:391] Prefill Batch: batch_id=61567501262750498214145369267629152541, time:1750768919.554969s req_ids:[8] +DEBUG 06-24 20:41:59 [manager.py:391] +ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:216.82500839233398ms total_cost_time:216.87626838684082ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:15111 prompt_cache_len:5151 prompt_cache_ratio:0.34087750645225334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 +DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:59 [batch.py:51] router release req id 8 +INFO 06-24 20:41:59 [manager.py:224] router recive req id 8 cost time 0.10970854759216309 s +INFO 06-24 20:41:59 [manager.py:68] detokenization recv req id 8 cost time 0.11183714866638184 s +DEBUG 06-24 20:41:59 [manager.py:391] Prefill Batch: batch_id=134668308301368890131021354029335366204, time:1750768919.7796903s req_ids:[8] +DEBUG 06-24 20:41:59 [manager.py:391] +DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:41:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:234.53927040100098ms total_cost_time:234.58528518676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15112 prompt_cache_len:5151 prompt_cache_ratio:0.34085494970884067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 +DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:41:59 [batch.py:51] router release req id 8 +INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.10832834243774414 s +INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.1104893684387207 s +DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=47713204662063076854880932830691400815, time:1750768920.027444s req_ids:[8] +DEBUG 06-24 20:42:00 [manager.py:391] +ERROR 06-24 20:42:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:221.17090225219727ms total_cost_time:221.23980522155762ms,out_token_counter:1 mean_per_token_cost_time: 0.06890296936035156ms prompt_token_num:15113 prompt_cache_len:5151 prompt_cache_ratio:0.3408323959505062 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 +DEBUG 06-24 20:42:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:00 [batch.py:51] router release req id 8 +INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.10856366157531738 s +INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.1106421947479248 s +DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=320860260490044786099541792835853312356, time:1750768920.2503104s req_ids:[8] +DEBUG 06-24 20:42:00 [manager.py:391] +ERROR 06-24 20:42:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 first_token_cost:249.50003623962402ms total_cost_time:249.54628944396973ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15114 prompt_cache_len:5151 prompt_cache_ratio:0.3408098451766574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 +DEBUG 06-24 20:42:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:00 [batch.py:51] router release req id 8 +INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.10890436172485352 s +INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.11071324348449707 s +DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=251201076994834275782970455603988051977, time:1750768920.52111s req_ids:[8] +DEBUG 06-24 20:42:00 [manager.py:391] +DEBUG 06-24 20:42:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 61890.440 tokens/s +DEBUG 06-24 20:42:00 [stats.py:37] Avg prompt tokens throughput: 61882.241 tokens/s +DEBUG 06-24 20:42:00 [stats.py:37] Avg generate tokens throughput: 8.199 tokens/s +ERROR 06-24 20:42:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 first_token_cost:223.84309768676758ms total_cost_time:223.92702102661133ms,out_token_counter:1 mean_per_token_cost_time: 0.08392333984375ms prompt_token_num:15115 prompt_cache_len:5151 prompt_cache_ratio:0.34078729738670194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 +DEBUG 06-24 20:42:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:00 [batch.py:51] router release req id 8 +INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.3114936351776123 s +INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.31391263008117676 s +DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=142770800511558748084894835519988784642, time:1750768920.9400666s req_ids:[8] +DEBUG 06-24 20:42:00 [manager.py:391] +ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 first_token_cost:446.5157985687256ms total_cost_time:446.57349586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15116 prompt_cache_len:5151 prompt_cache_ratio:0.3407647525800476 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 +DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:01 [batch.py:51] router release req id 8 +INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10781216621398926 s +INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.10976910591125488 s +DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=292716616776109716335042459139377068333, time:1750768921.1976094s req_ids:[8] +DEBUG 06-24 20:42:01 [manager.py:391] +ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:237.7016544342041ms total_cost_time:237.7619743347168ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15117 prompt_cache_len:5151 prompt_cache_ratio:0.3407422107561024 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 +DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:01 [batch.py:51] router release req id 8 +INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10908913612365723 s +INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.11115217208862305 s +DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=292752961160642883329417984701403430296, time:1750768921.4409993s req_ids:[8] +DEBUG 06-24 20:42:01 [manager.py:391] +DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:227.1733283996582ms total_cost_time:227.2329330444336ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:15118 prompt_cache_len:5151 prompt_cache_ratio:0.3407196719142744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 +DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:01 [batch.py:51] router release req id 8 +INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10768461227416992 s +INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s +DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=23728277255687012303600469779707287489, time:1750768921.6701703s req_ids:[8] +DEBUG 06-24 20:42:01 [manager.py:391] +ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:185.14776229858398ms total_cost_time:185.19306182861328ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15119 prompt_cache_len:5151 prompt_cache_ratio:0.34069713605397184 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 +DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:01 [batch.py:51] router release req id 8 +INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10853767395019531 s +INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.11011219024658203 s +DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=332528922802634058728495723105243719466, time:1750768921.8705637s req_ids:[8] +DEBUG 06-24 20:42:01 [manager.py:391] +ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:194.3376064300537ms total_cost_time:194.39959526062012ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15120 prompt_cache_len:5151 prompt_cache_ratio:0.3406746031746032 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 +DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:01 [batch.py:51] router release req id 8 +INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10902142524719238 s +INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11117792129516602 s +DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=129464832289572141500767201707391086603, time:1750768922.0700247s req_ids:[8] +DEBUG 06-24 20:42:02 [manager.py:391] +ERROR 06-24 20:42:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:405.92408180236816ms total_cost_time:405.9772491455078ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:15121 prompt_cache_len:5151 prompt_cache_ratio:0.340652073275577 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 +DEBUG 06-24 20:42:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:02 [batch.py:51] router release req id 8 +INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10884380340576172 s +INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11147713661193848 s +DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=191854365426463251610690205097331085982, time:1750768922.4774551s req_ids:[8] +DEBUG 06-24 20:42:02 [manager.py:391] +ERROR 06-24 20:42:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 first_token_cost:232.93733596801758ms total_cost_time:232.99860954284668ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15122 prompt_cache_len:5151 prompt_cache_ratio:0.34062954635630205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 +DEBUG 06-24 20:42:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:02 [batch.py:51] router release req id 8 +INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10943412780761719 s +INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11169576644897461 s +DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=236025743619789428316483758715654471847, time:1750768922.722321s req_ids:[8] +DEBUG 06-24 20:42:02 [manager.py:391] +ERROR 06-24 20:42:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 first_token_cost:234.53807830810547ms total_cost_time:234.59792137145996ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:15123 prompt_cache_len:5151 prompt_cache_ratio:0.3406070224161873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 +DEBUG 06-24 20:42:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:02 [batch.py:51] router release req id 8 +INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10884308815002441 s +INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11088109016418457 s +DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=158442976878761652122875386252503453636, time:1750768922.9648066s req_ids:[8] +DEBUG 06-24 20:42:02 [manager.py:391] +ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 first_token_cost:236.03343963623047ms total_cost_time:236.09113693237305ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15124 prompt_cache_len:5151 prompt_cache_ratio:0.34058450145464164 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 +DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:03 [batch.py:51] router release req id 8 +INFO 06-24 20:42:03 [manager.py:224] router recive req id 8 cost time 0.10809063911437988 s +INFO 06-24 20:42:03 [manager.py:68] detokenization recv req id 8 cost time 0.11016058921813965 s +DEBUG 06-24 20:42:03 [manager.py:391] Prefill Batch: batch_id=260975344026449079813912402695700404977, time:1750768923.2087054s req_ids:[8] +DEBUG 06-24 20:42:03 [manager.py:391] +ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:220.14141082763672ms total_cost_time:220.20196914672852ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:15125 prompt_cache_len:5151 prompt_cache_ratio:0.34056198347107436 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 +DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:03 [batch.py:51] router release req id 8 +INFO 06-24 20:42:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:03 [manager.py:224] router recive req id 8 cost time 0.10964751243591309 s +INFO 06-24 20:42:03 [manager.py:68] detokenization recv req id 8 cost time 0.11169838905334473 s +DEBUG 06-24 20:42:03 [manager.py:391] Prefill Batch: batch_id=56328092031856350232001204283227619558, time:1750768923.4298987s req_ids:[8] +DEBUG 06-24 20:42:03 [manager.py:391] +ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:230.26490211486816ms total_cost_time:230.30996322631836ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15126 prompt_cache_len:5151 prompt_cache_ratio:0.34053946846489486 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 +DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:03 [batch.py:51] router release req id 8 +INFO 06-24 20:42:03 [manager.py:224] router recive req id 8 cost time 0.10875535011291504 s +INFO 06-24 20:42:03 [manager.py:68] detokenization recv req id 8 cost time 0.11017560958862305 s +DEBUG 06-24 20:42:03 [manager.py:391] Prefill Batch: batch_id=242819618091853126138592557824733027511, time:1750768923.6814861s req_ids:[8] +DEBUG 06-24 20:42:03 [manager.py:391] +ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:362.987756729126ms total_cost_time:363.0409240722656ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:15127 prompt_cache_len:5151 prompt_cache_ratio:0.34051695643551266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 +DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:03 [batch.py:51] router release req id 8 +INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10764312744140625 s +INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.10963201522827148 s +DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=119648962098155508205452085484944690819, time:1750768924.0371702s req_ids:[8] +DEBUG 06-24 20:42:04 [manager.py:391] +ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:209.49387550354004ms total_cost_time:209.53941345214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15128 prompt_cache_len:5151 prompt_cache_ratio:0.34049444738233736 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 +DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:04 [batch.py:51] router release req id 8 +INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10999608039855957 s +INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.11194300651550293 s +DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=151203195932878952939661329562518074308, time:1750768924.2661686s req_ids:[8] +DEBUG 06-24 20:42:04 [manager.py:391] +ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:230.59725761413574ms total_cost_time:230.65757751464844ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15129 prompt_cache_len:5151 prompt_cache_ratio:0.3404719413047789 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 +DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:04 [batch.py:51] router release req id 8 +INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10804295539855957 s +INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.10997271537780762 s +DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=245520470666247478266902375212474005024, time:1750768924.4923224s req_ids:[8] +DEBUG 06-24 20:42:04 [manager.py:391] +ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:217.29779243469238ms total_cost_time:217.35882759094238ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:15130 prompt_cache_len:5151 prompt_cache_ratio:0.3404494382022472 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 +DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:04 [batch.py:51] router release req id 8 +INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10970568656921387 s +INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.11175394058227539 s +DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=34621216213874200289464525700555307880, time:1750768924.7163055s req_ids:[8] +DEBUG 06-24 20:42:04 [manager.py:391] +ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:211.95578575134277ms total_cost_time:212.01491355895996ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15131 prompt_cache_len:5151 prompt_cache_ratio:0.3404269380741524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 +DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:04 [batch.py:51] router release req id 8 +INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10884928703308105 s +INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.11084413528442383 s +DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=174222114439524139736014604387299845718, time:1750768924.9352312s req_ids:[8] +DEBUG 06-24 20:42:04 [manager.py:391] +ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:383.0540180206299ms total_cost_time:383.1160068511963ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15132 prompt_cache_len:5151 prompt_cache_ratio:0.34040444091990485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 +DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:05 [batch.py:51] router release req id 8 +INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.1089780330657959 s +INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.11071014404296875 s +DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=293943600145546403510529913689509359480, time:1750768925.3274732s req_ids:[8] +DEBUG 06-24 20:42:05 [manager.py:391] +ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:217.9248332977295ms total_cost_time:217.9715633392334ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15133 prompt_cache_len:5151 prompt_cache_ratio:0.340381946738915 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 +DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:05 [batch.py:51] router release req id 8 +INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.10852479934692383 s +INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.11061263084411621 s +DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=122567621281190014287128082149545896538, time:1750768925.5507078s req_ids:[8] +DEBUG 06-24 20:42:05 [manager.py:391] +ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:218.98531913757324ms total_cost_time:219.04659271240234ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15134 prompt_cache_len:5151 prompt_cache_ratio:0.3403594555305934 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 +DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:05 [batch.py:51] router release req id 8 +INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s +INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s +DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=181716967487952626663313275643190550642, time:1750768925.7759686s req_ids:[8] +DEBUG 06-24 20:42:05 [manager.py:391] +ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:210.6630802154541ms total_cost_time:210.7076644897461ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15135 prompt_cache_len:5151 prompt_cache_ratio:0.34033696729435087 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 +DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:05 [batch.py:51] router release req id 8 +INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.10898113250732422 s +INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.1109459400177002 s +DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=329770381711218239256854856042062636515, time:1750768925.9945657s req_ids:[8] +DEBUG 06-24 20:42:05 [manager.py:391] +ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:215.5628204345703ms total_cost_time:215.620756149292ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:15136 prompt_cache_len:5151 prompt_cache_ratio:0.3403144820295983 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 +DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:06 [batch.py:51] router release req id 8 +INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10923385620117188 s +INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11123871803283691 s +DEBUG 06-24 20:42:06 [manager.py:391] Prefill Batch: batch_id=105718388953191655217562251393093617301, time:1750768926.217449s req_ids:[8] +DEBUG 06-24 20:42:06 [manager.py:391] +ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:217.71669387817383ms total_cost_time:217.77629852294922ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:15137 prompt_cache_len:5151 prompt_cache_ratio:0.34029199973574686 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 +DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:06 [batch.py:51] router release req id 8 +INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10824847221374512 s +INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11018109321594238 s +DEBUG 06-24 20:42:06 [manager.py:391] Prefill Batch: batch_id=213705483799439245614474845534458892873, time:1750768926.439531s req_ids:[8] +DEBUG 06-24 20:42:06 [manager.py:391] +ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:371.18053436279297ms total_cost_time:371.2432384490967ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:15138 prompt_cache_len:5151 prompt_cache_ratio:0.3402695204122077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 +DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:06 [batch.py:51] router release req id 8 +INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10851550102233887 s +INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11023736000061035 s +DEBUG 06-24 20:42:06 [manager.py:391] Prefill Batch: batch_id=106371600486117104738433619347586511830, time:1750768926.8193789s req_ids:[8] +DEBUG 06-24 20:42:06 [manager.py:391] +ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:172.07813262939453ms total_cost_time:172.13034629821777ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:15139 prompt_cache_len:5151 prompt_cache_ratio:0.34024704405839223 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 +DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:06 [batch.py:51] router release req id 8 +INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10879158973693848 s +INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11054086685180664 s +DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=253727504005699471469345535585371517078, time:1750768927.0016215s req_ids:[8] +DEBUG 06-24 20:42:07 [manager.py:391] +ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:174.9410629272461ms total_cost_time:174.98445510864258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15140 prompt_cache_len:5151 prompt_cache_ratio:0.34022457067371203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 +DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:07 [batch.py:51] router release req id 8 +INFO 06-24 20:42:07 [manager.py:224] router recive req id 8 cost time 0.10932111740112305 s +INFO 06-24 20:42:07 [manager.py:68] detokenization recv req id 8 cost time 0.11110258102416992 s +DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=90591396344182833445147627550415117224, time:1750768927.1830559s req_ids:[8] +DEBUG 06-24 20:42:07 [manager.py:391] +ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:213.3197784423828ms total_cost_time:213.364839553833ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15141 prompt_cache_len:5151 prompt_cache_ratio:0.34020210025757874 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 +DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:07 [batch.py:51] router release req id 8 +INFO 06-24 20:42:07 [manager.py:224] router recive req id 8 cost time 0.10909295082092285 s +INFO 06-24 20:42:07 [manager.py:68] detokenization recv req id 8 cost time 0.11114740371704102 s +DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=44516360052309234812207522500813250719, time:1750768927.4037309s req_ids:[8] +DEBUG 06-24 20:42:07 [manager.py:391] +ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:221.28891944885254ms total_cost_time:221.33231163024902ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15142 prompt_cache_len:5151 prompt_cache_ratio:0.34017963280940433 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 +DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:07 [batch.py:51] router release req id 8 +INFO 06-24 20:42:07 [manager.py:224] router recive req id 8 cost time 0.10893535614013672 s +INFO 06-24 20:42:07 [manager.py:68] detokenization recv req id 8 cost time 0.1106722354888916 s +DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=310803097578630834209985896133924027191, time:1750768927.6349337s req_ids:[8] +DEBUG 06-24 20:42:07 [manager.py:391] +DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:380.4326057434082ms total_cost_time:380.479097366333ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15143 prompt_cache_len:5151 prompt_cache_ratio:0.34015716832860066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 +DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:07 [batch.py:51] router release req id 8 +INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.11096787452697754 s +INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.11319112777709961 s +DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=16770693465956860871455644356012166182, time:1750768928.0183327s req_ids:[8] +DEBUG 06-24 20:42:08 [manager.py:391] +ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:212.88061141967773ms total_cost_time:212.92424201965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15144 prompt_cache_len:5151 prompt_cache_ratio:0.34013470681458 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 +DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:08 [batch.py:51] router release req id 8 +INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.10921430587768555 s +INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.1111292839050293 s +DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=220242871540421039298134463637496962635, time:1750768928.2374442s req_ids:[8] +DEBUG 06-24 20:42:08 [manager.py:391] +ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:216.54343605041504ms total_cost_time:216.58802032470703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15145 prompt_cache_len:5151 prompt_cache_ratio:0.3401122482667547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 +DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:08 [batch.py:51] router release req id 8 +INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.10704708099365234 s +INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.1088407039642334 s +DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=75320475940050182699501259913804923522, time:1750768928.4599936s req_ids:[8] +DEBUG 06-24 20:42:08 [manager.py:391] +ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:173.4154224395752ms total_cost_time:173.44927787780762ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:15146 prompt_cache_len:5151 prompt_cache_ratio:0.34008979268453715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 +DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:08 [batch.py:51] router release req id 8 +INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.10980939865112305 s +INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.11166024208068848 s +DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=10151914379784797819032507642563774842, time:1750768928.641206s req_ids:[8] +DEBUG 06-24 20:42:08 [manager.py:391] +ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:211.59887313842773ms total_cost_time:211.64274215698242ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15147 prompt_cache_len:5151 prompt_cache_ratio:0.3400673400673401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 +DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:08 [batch.py:51] router release req id 8 +INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.11143040657043457 s +INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.11342620849609375 s +DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=237509338301135155914547641553873250848, time:1750768928.8589869s req_ids:[8] +DEBUG 06-24 20:42:08 [manager.py:391] +ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:390.4128074645996ms total_cost_time:390.4588222503662ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15148 prompt_cache_len:5151 prompt_cache_ratio:0.3400448904145762 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 +DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:09 [batch.py:51] router release req id 8 +INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.10909199714660645 s +INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11124444007873535 s +DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=266986863866306553650511161103714025302, time:1750768929.2548144s req_ids:[8] +DEBUG 06-24 20:42:09 [manager.py:391] +ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:216.14670753479004ms total_cost_time:216.18938446044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15149 prompt_cache_len:5151 prompt_cache_ratio:0.3400224437256585 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 +DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:09 [batch.py:51] router release req id 8 +INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.11080431938171387 s +INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11290836334228516 s +DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=259719751702741891281946431056453846828, time:1750768929.4812276s req_ids:[8] +DEBUG 06-24 20:42:09 [manager.py:391] +ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:225.59309005737305ms total_cost_time:225.6314754486084ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:15150 prompt_cache_len:5151 prompt_cache_ratio:0.34 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 +DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:09 [batch.py:51] router release req id 8 +INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.10841202735900879 s +INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11053323745727539 s +DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=162666230470223322202687180923312568516, time:1750768929.7132275s req_ids:[8] +DEBUG 06-24 20:42:09 [manager.py:391] +ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:210.80708503723145ms total_cost_time:210.85858345031738ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:15151 prompt_cache_len:5151 prompt_cache_ratio:0.33997755923701406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 +DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:09 [batch.py:51] router release req id 8 +INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.10915303230285645 s +INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s +DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=204379589067036472048125837799511295054, time:1750768929.929747s req_ids:[8] +DEBUG 06-24 20:42:09 [manager.py:391] +ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:212.81671524047852ms total_cost_time:212.8775119781494ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15152 prompt_cache_len:5151 prompt_cache_ratio:0.33995512143611406 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 +DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:10 [batch.py:51] router release req id 8 +INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.10874009132385254 s +INFO 06-24 20:42:10 [manager.py:68] detokenization recv req id 8 cost time 0.11068916320800781 s +DEBUG 06-24 20:42:10 [manager.py:391] Prefill Batch: batch_id=162472383881362703335362734243699832234, time:1750768930.1613822s req_ids:[8] +DEBUG 06-24 20:42:10 [manager.py:391] +ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:231.47225379943848ms total_cost_time:231.5351963043213ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:15153 prompt_cache_len:5151 prompt_cache_ratio:0.3399326865967135 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 +DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:10 [batch.py:51] router release req id 8 +INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.10875296592712402 s +INFO 06-24 20:42:10 [manager.py:68] detokenization recv req id 8 cost time 0.11060166358947754 s +DEBUG 06-24 20:42:10 [manager.py:391] Prefill Batch: batch_id=36687424468851237904626865667461902308, time:1750768930.402315s req_ids:[8] +DEBUG 06-24 20:42:10 [manager.py:391] +ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:42:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 58244.419 tokens/s +DEBUG 06-24 20:42:10 [stats.py:37] Avg prompt tokens throughput: 58236.625 tokens/s +DEBUG 06-24 20:42:10 [stats.py:37] Avg generate tokens throughput: 7.794 tokens/s +INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:383.56494903564453ms total_cost_time:383.6092948913574ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15154 prompt_cache_len:5151 prompt_cache_ratio:0.3399102547182262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 +DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:10 [batch.py:51] router release req id 8 +INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.10806870460510254 s +INFO 06-24 20:42:10 [manager.py:68] detokenization recv req id 8 cost time 0.11001086235046387 s +DEBUG 06-24 20:42:10 [manager.py:391] Prefill Batch: batch_id=116954138388791682524458953858580656279, time:1750768930.7820656s req_ids:[8] +DEBUG 06-24 20:42:10 [manager.py:391] +ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:220.55411338806152ms total_cost_time:220.59869766235352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15155 prompt_cache_len:5151 prompt_cache_ratio:0.339887825800066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 +DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:10 [batch.py:51] router release req id 8 +DEBUG 06-24 20:42:10 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:10 [manager.py:283] +DEBUG 06-24 20:42:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:10 [manager.py:284] +INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.1089625358581543 s +INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.11111330986022949 s +DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=217637866310348812204766851318435611166, time:1750768931.00737s req_ids:[8] +DEBUG 06-24 20:42:11 [manager.py:391] +ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:219.18988227844238ms total_cost_time:219.24901008605957ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15156 prompt_cache_len:5151 prompt_cache_ratio:0.3398653998416469 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 +DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:11 [batch.py:51] router release req id 8 +INFO 06-24 20:42:11 [manager.py:224] router recive req id 8 cost time 0.10811996459960938 s +INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.1100471019744873 s +DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=88412097004777143828733987993335715672, time:1750768931.2318702s req_ids:[8] +DEBUG 06-24 20:42:11 [manager.py:391] +ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:215.5916690826416ms total_cost_time:215.63458442687988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15157 prompt_cache_len:5151 prompt_cache_ratio:0.33984297684238307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 +DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:11 [batch.py:51] router release req id 8 +INFO 06-24 20:42:11 [manager.py:224] router recive req id 8 cost time 0.10874032974243164 s +INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.11077022552490234 s +DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=308396656811554247816575141514679258621, time:1750768931.4543045s req_ids:[8] +DEBUG 06-24 20:42:11 [manager.py:391] +ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:212.4795913696289ms total_cost_time:212.5232219696045ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15158 prompt_cache_len:5151 prompt_cache_ratio:0.3398205568016889 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 +DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:11 [batch.py:51] router release req id 8 +INFO 06-24 20:42:11 [manager.py:224] router recive req id 8 cost time 0.10859870910644531 s +INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s +DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=163419811437959115575735290248869874884, time:1750768931.6740763s req_ids:[8] +DEBUG 06-24 20:42:11 [manager.py:391] +ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:216.54152870178223ms total_cost_time:216.60423278808594ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:15159 prompt_cache_len:5151 prompt_cache_ratio:0.3397981397189788 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 +DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:11 [batch.py:51] router release req id 8 +INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.3139023780822754 s +INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.31595540046691895 s +DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=298875651006723307089745361053395311799, time:1750768932.100516s req_ids:[8] +DEBUG 06-24 20:42:12 [manager.py:391] +ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:422.85943031311035ms total_cost_time:422.90329933166504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15160 prompt_cache_len:5151 prompt_cache_ratio:0.33977572559366753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 +DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:12 [batch.py:51] router release req id 8 +INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10849785804748535 s +INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11026167869567871 s +DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=75458211344852979027204447494708317446, time:1750768932.3290565s req_ids:[8] +DEBUG 06-24 20:42:12 [manager.py:391] +ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:171.73409461975098ms total_cost_time:171.77605628967285ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15161 prompt_cache_len:5151 prompt_cache_ratio:0.3397533144251698 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 +DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:12 [batch.py:51] router release req id 8 +INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10846972465515137 s +INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11050009727478027 s +DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=314514047641390217513180188380208649819, time:1750768932.509847s req_ids:[8] +DEBUG 06-24 20:42:12 [manager.py:391] +ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:215.69108963012695ms total_cost_time:215.73495864868164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15162 prompt_cache_len:5151 prompt_cache_ratio:0.3397309062129007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 +DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:12 [batch.py:51] router release req id 8 +INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10895943641662598 s +INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11084604263305664 s +DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=12017392277513101675152169549719479217, time:1750768932.7316117s req_ids:[8] +DEBUG 06-24 20:42:12 [manager.py:391] +ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:218.57333183288574ms total_cost_time:218.61934661865234ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15163 prompt_cache_len:5151 prompt_cache_ratio:0.33970850095627514 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 +DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:12 [batch.py:51] router release req id 8 +INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10940980911254883 s +INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11160588264465332 s +DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=160749533772920837796674148345896475644, time:1750768932.9571192s req_ids:[8] +DEBUG 06-24 20:42:12 [manager.py:391] +ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:212.74328231811523ms total_cost_time:212.79120445251465ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:15164 prompt_cache_len:5151 prompt_cache_ratio:0.3396860986547085 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 +DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:13 [batch.py:51] router release req id 8 +INFO 06-24 20:42:13 [manager.py:224] router recive req id 8 cost time 0.10826396942138672 s +INFO 06-24 20:42:13 [manager.py:68] detokenization recv req id 8 cost time 0.11040568351745605 s +DEBUG 06-24 20:42:13 [manager.py:391] Prefill Batch: batch_id=100905699756882877748345440464227284438, time:1750768933.1762204s req_ids:[8] +DEBUG 06-24 20:42:13 [manager.py:391] +ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:387.4964714050293ms total_cost_time:387.54963874816895ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:15165 prompt_cache_len:5151 prompt_cache_ratio:0.3396636993076162 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 +DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:13 [batch.py:51] router release req id 8 +INFO 06-24 20:42:13 [manager.py:224] router recive req id 8 cost time 0.1082296371459961 s +INFO 06-24 20:42:13 [manager.py:68] detokenization recv req id 8 cost time 0.11031103134155273 s +DEBUG 06-24 20:42:13 [manager.py:391] Prefill Batch: batch_id=130663392160568233189535034101535982721, time:1750768933.5693545s req_ids:[8] +DEBUG 06-24 20:42:13 [manager.py:391] +ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:219.81334686279297ms total_cost_time:219.87366676330566ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15166 prompt_cache_len:5151 prompt_cache_ratio:0.3396413029144138 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 +DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:13 [batch.py:51] router release req id 8 +INFO 06-24 20:42:13 [manager.py:224] router recive req id 8 cost time 0.10912728309631348 s +INFO 06-24 20:42:13 [manager.py:68] detokenization recv req id 8 cost time 0.1112067699432373 s +DEBUG 06-24 20:42:13 [manager.py:391] Prefill Batch: batch_id=114687940023632532315405087634798030527, time:1750768933.8040345s req_ids:[8] +DEBUG 06-24 20:42:13 [manager.py:391] +ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:220.98731994628906ms total_cost_time:221.04740142822266ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:15167 prompt_cache_len:5151 prompt_cache_ratio:0.33961890947451706 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 +DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:13 [batch.py:51] router release req id 8 +INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.10866451263427734 s +INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11071491241455078 s +DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=211290683999722074312007029203276003648, time:1750768934.0256963s req_ids:[8] +DEBUG 06-24 20:42:14 [manager.py:391] +ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:217.06151962280273ms total_cost_time:217.10896492004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15168 prompt_cache_len:5151 prompt_cache_ratio:0.3395965189873418 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 +DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:14 [batch.py:51] router release req id 8 +INFO 06-24 20:42:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.11349654197692871 s +DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=37553541000315564764521294644688198063, time:1750768934.2504926s req_ids:[8] +DEBUG 06-24 20:42:14 [manager.py:391] +INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11444735527038574 s +ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:174.85976219177246ms total_cost_time:174.98469352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.12493133544921875ms prompt_token_num:15169 prompt_cache_len:5151 prompt_cache_ratio:0.33957413145230403 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 +DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:14 [batch.py:51] router release req id 8 +INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.10886502265930176 s +INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11073589324951172 s +DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=1096136736441189369241097787671684276, time:1750768934.4331121s req_ids:[8] +DEBUG 06-24 20:42:14 [manager.py:391] +ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:213.43302726745605ms total_cost_time:213.49525451660156ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:15170 prompt_cache_len:5151 prompt_cache_ratio:0.33955174686882006 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 +DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:14 [batch.py:51] router release req id 8 +INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.10869097709655762 s +INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s +DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=10604411426724509131112122600169596658, time:1750768934.6683373s req_ids:[8] +DEBUG 06-24 20:42:14 [manager.py:391] +ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:396.2969779968262ms total_cost_time:396.35205268859863ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:15171 prompt_cache_len:5151 prompt_cache_ratio:0.3395293652363061 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 +DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:14 [batch.py:51] router release req id 8 +INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10937976837158203 s +INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11175155639648438 s +DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=308278672626882888112016134426806295630, time:1750768935.0715678s req_ids:[8] +DEBUG 06-24 20:42:15 [manager.py:391] +ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:231.5058708190918ms total_cost_time:231.5652370452881ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15172 prompt_cache_len:5151 prompt_cache_ratio:0.33950698655417877 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 +DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:15 [batch.py:51] router release req id 8 +INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.1088871955871582 s +INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11085391044616699 s +DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=154269202804267551771659652672756194674, time:1750768935.2985997s req_ids:[8] +DEBUG 06-24 20:42:15 [manager.py:391] +ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:217.27395057678223ms total_cost_time:217.32759475708008ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:15173 prompt_cache_len:5151 prompt_cache_ratio:0.3394846108218546 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 +DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:15 [batch.py:51] router release req id 8 +INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10907983779907227 s +INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11095786094665527 s +DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=211162656016160909205669852684200529289, time:1750768935.5333889s req_ids:[8] +DEBUG 06-24 20:42:15 [manager.py:391] +DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:229.59232330322266ms total_cost_time:229.65168952941895ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15174 prompt_cache_len:5151 prompt_cache_ratio:0.3394622380387505 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 +DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:15 [batch.py:51] router release req id 8 +INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10830283164978027 s +INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.10949945449829102 s +DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=222070297526169833170158616475890934056, time:1750768935.7594836s req_ids:[8] +DEBUG 06-24 20:42:15 [manager.py:391] +ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:218.34182739257812ms total_cost_time:218.38879585266113ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:15175 prompt_cache_len:5151 prompt_cache_ratio:0.33943986820428335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 +DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:15 [batch.py:51] router release req id 8 +INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10854315757751465 s +INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11062383651733398 s +DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=37914800362907107182648095138160948131, time:1750768935.9894776s req_ids:[8] +DEBUG 06-24 20:42:15 [manager.py:391] +ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:397.7634906768799ms total_cost_time:397.8140354156494ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:15176 prompt_cache_len:5151 prompt_cache_ratio:0.3394175013178703 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 +DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:16 [batch.py:51] router release req id 8 +INFO 06-24 20:42:16 [manager.py:224] router recive req id 8 cost time 0.10888528823852539 s +INFO 06-24 20:42:16 [manager.py:68] detokenization recv req id 8 cost time 0.11095213890075684 s +DEBUG 06-24 20:42:16 [manager.py:391] Prefill Batch: batch_id=65728021892721642036134691515062371818, time:1750768936.3902884s req_ids:[8] +DEBUG 06-24 20:42:16 [manager.py:391] +ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:218.76049041748047ms total_cost_time:218.82057189941406ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:15177 prompt_cache_len:5151 prompt_cache_ratio:0.3393951373789286 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 +DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:16 [batch.py:51] router release req id 8 +INFO 06-24 20:42:16 [manager.py:224] router recive req id 8 cost time 0.10917282104492188 s +INFO 06-24 20:42:16 [manager.py:68] detokenization recv req id 8 cost time 0.11114501953125 s +DEBUG 06-24 20:42:16 [manager.py:391] Prefill Batch: batch_id=99728906666724970926512405386079584250, time:1750768936.6199012s req_ids:[8] +DEBUG 06-24 20:42:16 [manager.py:391] +ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:223.0672836303711ms total_cost_time:223.1285572052002ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15178 prompt_cache_len:5151 prompt_cache_ratio:0.33937277638687574 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 +DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:16 [batch.py:51] router release req id 8 +INFO 06-24 20:42:16 [manager.py:224] router recive req id 8 cost time 0.10848760604858398 s +INFO 06-24 20:42:16 [manager.py:68] detokenization recv req id 8 cost time 0.11055922508239746 s +DEBUG 06-24 20:42:16 [manager.py:391] Prefill Batch: batch_id=313689337459227610288645289335041421092, time:1750768936.8465247s req_ids:[8] +DEBUG 06-24 20:42:16 [manager.py:391] +ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:217.30899810791016ms total_cost_time:217.36979484558105ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15179 prompt_cache_len:5151 prompt_cache_ratio:0.33935041834112917 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 +DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:16 [batch.py:51] router release req id 8 +INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10905289649963379 s +INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.1110689640045166 s +DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=122687061824358778447861747482944507803, time:1750768937.0705225s req_ids:[8] +DEBUG 06-24 20:42:17 [manager.py:391] +ERROR 06-24 20:42:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:218.20354461669922ms total_cost_time:218.2629108428955ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15180 prompt_cache_len:5151 prompt_cache_ratio:0.3393280632411067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 +INFO 06-24 20:42:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:42:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:17 [batch.py:51] router release req id 8 +INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10816097259521484 s +INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.11010360717773438 s +DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=173800583072736181823245617748441481075, time:1750768937.29692s req_ids:[8] +DEBUG 06-24 20:42:17 [manager.py:391] +ERROR 06-24 20:42:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 first_token_cost:394.1371440887451ms total_cost_time:394.18625831604004ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:15181 prompt_cache_len:5151 prompt_cache_ratio:0.3393057110862262 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 +DEBUG 06-24 20:42:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:17 [batch.py:51] router release req id 8 +INFO 06-24 20:42:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10852384567260742 s +INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.11071634292602539 s +DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=46527016274505654992188616480697623102, time:1750768937.696355s req_ids:[8] +DEBUG 06-24 20:42:17 [manager.py:391] +ERROR 06-24 20:42:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 first_token_cost:214.1575813293457ms total_cost_time:214.22076225280762ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:15182 prompt_cache_len:5151 prompt_cache_ratio:0.3392833618759057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 +DEBUG 06-24 20:42:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:17 [batch.py:51] router release req id 8 +INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10872054100036621 s +INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.11062502861022949 s +DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=46924685682493559376080246293275289045, time:1750768937.9186237s req_ids:[8] +DEBUG 06-24 20:42:17 [manager.py:391] +ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 first_token_cost:217.99039840698242ms total_cost_time:218.05286407470703ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:15183 prompt_cache_len:5151 prompt_cache_ratio:0.33926101560956334 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 +DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:18 [batch.py:51] router release req id 8 +INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.10913634300231934 s +INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.11122941970825195 s +DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=133919047054363655536239907345678774093, time:1750768938.1442797s req_ids:[8] +DEBUG 06-24 20:42:18 [manager.py:391] +ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:219.04754638671875ms total_cost_time:219.10834312438965ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15184 prompt_cache_len:5151 prompt_cache_ratio:0.33923867228661747 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 +DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:18 [batch.py:51] router release req id 8 +INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.10908746719360352 s +INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s +DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=138917395478911565847010630331177765062, time:1750768938.3713663s req_ids:[8] +DEBUG 06-24 20:42:18 [manager.py:391] +ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:215.58880805969238ms total_cost_time:215.65008163452148ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15185 prompt_cache_len:5151 prompt_cache_ratio:0.33921633190648665 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 +DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:18 [batch.py:51] router release req id 8 +INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.10671019554138184 s +INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.1080636978149414 s +DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=133186548632280356345333128492140982055, time:1750768938.5926871s req_ids:[8] +DEBUG 06-24 20:42:18 [manager.py:391] +ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:209.4879150390625ms total_cost_time:209.5494270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:15186 prompt_cache_len:5151 prompt_cache_ratio:0.3391939944685895 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 +DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:18 [batch.py:51] router release req id 8 +INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.1077888011932373 s +INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.10995030403137207 s +DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=47010660621880075201665836139575816647, time:1750768938.8089595s req_ids:[8] +DEBUG 06-24 20:42:18 [manager.py:391] +ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:385.48803329467773ms total_cost_time:385.5326175689697ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15187 prompt_cache_len:5151 prompt_cache_ratio:0.3391716599723448 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 +DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:19 [batch.py:51] router release req id 8 +INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10898828506469727 s +INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.11094927787780762 s +DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=49322155431499347462737771567379886993, time:1750768939.1997495s req_ids:[8] +DEBUG 06-24 20:42:19 [manager.py:391] +ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:215.9411907196045ms total_cost_time:215.98458290100098ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15188 prompt_cache_len:5151 prompt_cache_ratio:0.33914932841717144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 +DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:19 [batch.py:51] router release req id 8 +INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10936641693115234 s +INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.11133575439453125 s +DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=57046338280859119376316374736171206338, time:1750768939.4239733s req_ids:[8] +DEBUG 06-24 20:42:19 [manager.py:391] +ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:217.9713249206543ms total_cost_time:218.0333137512207ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15189 prompt_cache_len:5151 prompt_cache_ratio:0.3391269998024886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 +DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:19 [batch.py:51] router release req id 8 +INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10805773735046387 s +INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.10998106002807617 s +DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=300428107430158495157623225369137519176, time:1750768939.659912s req_ids:[8] +DEBUG 06-24 20:42:19 [manager.py:391] +ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:232.74469375610352ms total_cost_time:232.79213905334473ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15190 prompt_cache_len:5151 prompt_cache_ratio:0.3391046741277156 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 +DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:19 [batch.py:51] router release req id 8 +INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10874414443969727 s +INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.11097455024719238 s +DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=147403776232530842411857526325865068907, time:1750768939.8987508s req_ids:[8] +DEBUG 06-24 20:42:19 [manager.py:391] +ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:229.19082641601562ms total_cost_time:229.23970222473145ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:15191 prompt_cache_len:5151 prompt_cache_ratio:0.3390823513922717 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 +DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:20 [batch.py:51] router release req id 8 +INFO 06-24 20:42:20 [manager.py:224] router recive req id 8 cost time 0.10995626449584961 s +INFO 06-24 20:42:20 [manager.py:68] detokenization recv req id 8 cost time 0.11193084716796875 s +DEBUG 06-24 20:42:20 [manager.py:391] Prefill Batch: batch_id=254938150094575030094327282537852081225, time:1750768940.1244683s req_ids:[8] +DEBUG 06-24 20:42:20 [manager.py:391] +ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:213.0424976348877ms total_cost_time:213.10162544250488ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15192 prompt_cache_len:5151 prompt_cache_ratio:0.3390600315955766 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 +DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:20 [batch.py:51] router release req id 8 +INFO 06-24 20:42:20 [manager.py:224] router recive req id 8 cost time 0.310835599899292 s +INFO 06-24 20:42:20 [manager.py:68] detokenization recv req id 8 cost time 0.31293177604675293 s +DEBUG 06-24 20:42:20 [manager.py:391] Prefill Batch: batch_id=246751136733251994068484139025190547656, time:1750768940.5590603s req_ids:[8] +DEBUG 06-24 20:42:20 [manager.py:391] +ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:42:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 59136.937 tokens/s +DEBUG 06-24 20:42:20 [stats.py:37] Avg prompt tokens throughput: 59129.143 tokens/s +DEBUG 06-24 20:42:20 [stats.py:37] Avg generate tokens throughput: 7.793 tokens/s +INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:434.2925548553467ms total_cost_time:434.35192108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15193 prompt_cache_len:5151 prompt_cache_ratio:0.33903771473704997 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 +DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:20 [batch.py:51] router release req id 8 +INFO 06-24 20:42:20 [manager.py:224] router recive req id 8 cost time 0.10979533195495605 s +INFO 06-24 20:42:20 [manager.py:68] detokenization recv req id 8 cost time 0.11196565628051758 s +DEBUG 06-24 20:42:20 [manager.py:391] Prefill Batch: batch_id=251338580535259355565701219972431072810, time:1750768940.7882512s req_ids:[8] +DEBUG 06-24 20:42:20 [manager.py:391] +ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:217.49234199523926ms total_cost_time:217.55266189575195ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15194 prompt_cache_len:5151 prompt_cache_ratio:0.33901540081611165 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 +DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:20 [batch.py:51] router release req id 8 +INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.10877585411071777 s +INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11076641082763672 s +DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=57537521288962973122640586936020283672, time:1750768941.0136397s req_ids:[8] +DEBUG 06-24 20:42:21 [manager.py:391] +ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:211.56883239746094ms total_cost_time:211.61365509033203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15195 prompt_cache_len:5151 prompt_cache_ratio:0.3389930898321816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 +DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:21 [batch.py:51] router release req id 8 +INFO 06-24 20:42:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.1091756820678711 s +INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11126041412353516 s +DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=58737911634750185734373420974689333322, time:1750768941.2315671s req_ids:[8] +DEBUG 06-24 20:42:21 [manager.py:391] +ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:211.62080764770508ms total_cost_time:211.66539192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15196 prompt_cache_len:5151 prompt_cache_ratio:0.33897078178468015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 +DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:21 [batch.py:51] router release req id 8 +INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.10792279243469238 s +INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11001396179199219 s +DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=121495887664646698917868106388007559133, time:1750768941.451874s req_ids:[8] +DEBUG 06-24 20:42:21 [manager.py:391] +ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:216.57466888427734ms total_cost_time:216.60280227661133ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:15197 prompt_cache_len:5151 prompt_cache_ratio:0.3389484766730276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 +DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:21 [batch.py:51] router release req id 8 +INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.1082913875579834 s +INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11018228530883789 s +DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=303448412231199530525807995885070100682, time:1750768941.676679s req_ids:[8] +DEBUG 06-24 20:42:21 [manager.py:391] +ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:383.070707321167ms total_cost_time:383.1191062927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:15198 prompt_cache_len:5151 prompt_cache_ratio:0.3389261744966443 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 +DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:21 [batch.py:51] router release req id 8 +INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10771846771240234 s +INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.10977554321289062 s +DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=51877779178005503494266830329763452129, time:1750768942.0679781s req_ids:[8] +DEBUG 06-24 20:42:22 [manager.py:391] +ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:216.2184715270996ms total_cost_time:216.2797451019287ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15199 prompt_cache_len:5151 prompt_cache_ratio:0.33890387525495097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 +DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:22 [batch.py:51] router release req id 8 +INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10918045043945312 s +INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.1111598014831543 s +DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=146734159022037405147800068623686117245, time:1750768942.2900643s req_ids:[8] +DEBUG 06-24 20:42:22 [manager.py:391] +ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:216.01223945617676ms total_cost_time:216.07351303100586ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15200 prompt_cache_len:5151 prompt_cache_ratio:0.3388815789473684 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 +DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:22 [batch.py:51] router release req id 8 +INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10944771766662598 s +INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.11131501197814941 s +DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=40530824994474852669787678309673075508, time:1750768942.5145206s req_ids:[8] +DEBUG 06-24 20:42:22 [manager.py:391] +ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:217.27252006530762ms total_cost_time:217.3311710357666ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:15201 prompt_cache_len:5151 prompt_cache_ratio:0.33885928557331757 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 +DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:22 [batch.py:51] router release req id 8 +INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10907149314880371 s +INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.11101007461547852 s +DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=225259572782006256435400484393447163302, time:1750768942.7527535s req_ids:[8] +DEBUG 06-24 20:42:22 [manager.py:391] +ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:234.60936546325684ms total_cost_time:234.65991020202637ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:15202 prompt_cache_len:5151 prompt_cache_ratio:0.33883699513221943 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 +DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:22 [batch.py:51] router release req id 8 +INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10909843444824219 s +INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.11119580268859863 s +DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=91389964008902718819311242030820172461, time:1750768942.9810998s req_ids:[8] +DEBUG 06-24 20:42:22 [manager.py:391] +INFO 06-24 20:42:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:42:23 [statics_utils.py:24] mean first cost: 232.944852018184 ms +INFO 06-24 20:42:23 [statics_utils.py:24] mean per token cost: 0.05717543137161622 ms +ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:386.6608142852783ms total_cost_time:386.7065906524658ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15203 prompt_cache_len:5151 prompt_cache_ratio:0.33881470762349536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 +DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:23 [batch.py:51] router release req id 8 +INFO 06-24 20:42:23 [manager.py:224] router recive req id 8 cost time 0.10760307312011719 s +INFO 06-24 20:42:23 [manager.py:68] detokenization recv req id 8 cost time 0.10970354080200195 s +DEBUG 06-24 20:42:23 [manager.py:391] Prefill Batch: batch_id=285216507773551007437188874236833020041, time:1750768943.3764598s req_ids:[8] +DEBUG 06-24 20:42:23 [manager.py:391] +ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:231.25195503234863ms total_cost_time:231.31251335144043ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:15204 prompt_cache_len:5151 prompt_cache_ratio:0.3387924230465667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 +DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:23 [batch.py:51] router release req id 8 +INFO 06-24 20:42:23 [manager.py:224] router recive req id 8 cost time 0.11190986633300781 s +INFO 06-24 20:42:23 [manager.py:68] detokenization recv req id 8 cost time 0.11417865753173828 s +DEBUG 06-24 20:42:23 [manager.py:391] Prefill Batch: batch_id=333949612434111225613799545097591697778, time:1750768943.6196811s req_ids:[8] +DEBUG 06-24 20:42:23 [manager.py:391] +DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:236.7570400238037ms total_cost_time:236.8011474609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15205 prompt_cache_len:5151 prompt_cache_ratio:0.338770141400855 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 +DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:23 [batch.py:51] router release req id 8 +INFO 06-24 20:42:23 [manager.py:224] router recive req id 8 cost time 0.10896468162536621 s +INFO 06-24 20:42:23 [manager.py:68] detokenization recv req id 8 cost time 0.11098527908325195 s +DEBUG 06-24 20:42:23 [manager.py:391] Prefill Batch: batch_id=151322254967275387401459255985038613734, time:1750768943.856885s req_ids:[8] +DEBUG 06-24 20:42:23 [manager.py:391] +ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:211.11083030700684ms total_cost_time:211.14325523376465ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:15206 prompt_cache_len:5151 prompt_cache_ratio:0.33874786268578194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 +DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:23 [batch.py:51] router release req id 8 +INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.11091995239257812 s +INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11284542083740234 s +DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=339655059029783761566215805014444899395, time:1750768944.077825s req_ids:[8] +DEBUG 06-24 20:42:24 [manager.py:391] +ERROR 06-24 20:42:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:220.93868255615234ms total_cost_time:220.98135948181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15207 prompt_cache_len:5151 prompt_cache_ratio:0.33872558690076937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 +DEBUG 06-24 20:42:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:24 [batch.py:51] router release req id 8 +INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.10907649993896484 s +INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11097145080566406 s +DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=99907977983238074245606867818415707455, time:1750768944.3018675s req_ids:[8] +DEBUG 06-24 20:42:24 [manager.py:391] +ERROR 06-24 20:42:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 first_token_cost:216.84026718139648ms total_cost_time:216.8877124786377ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15208 prompt_cache_len:5151 prompt_cache_ratio:0.33870331404523935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 +DEBUG 06-24 20:42:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:24 [batch.py:51] router release req id 8 +INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.10959887504577637 s +INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11164021492004395 s +DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=156053127792389014832685293353448591151, time:1750768944.5375254s req_ids:[8] +DEBUG 06-24 20:42:24 [manager.py:391] +ERROR 06-24 20:42:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 first_token_cost:397.0463275909424ms total_cost_time:397.0935344696045ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15209 prompt_cache_len:5151 prompt_cache_ratio:0.338681044118614 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 +DEBUG 06-24 20:42:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:24 [batch.py:51] router release req id 8 +INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.11020135879516602 s +INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11220836639404297 s +DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=183865536449247141930842194879354619325, time:1750768944.929993s req_ids:[8] +DEBUG 06-24 20:42:24 [manager.py:391] +ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 first_token_cost:208.4503173828125ms total_cost_time:208.4939479827881ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15210 prompt_cache_len:5151 prompt_cache_ratio:0.33865877712031556 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 +DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:25 [batch.py:51] router release req id 8 +INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.10902929306030273 s +INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.11122608184814453 s +DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=97291444785418183794932015250697884242, time:1750768945.145193s req_ids:[8] +DEBUG 06-24 20:42:25 [manager.py:391] +ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:218.15824508666992ms total_cost_time:218.2016372680664ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15211 prompt_cache_len:5151 prompt_cache_ratio:0.3386365130497666 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 +DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:25 [batch.py:51] router release req id 8 +INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.10958027839660645 s +INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.11162376403808594 s +DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=152815036522922430042762051595382150210, time:1750768945.3811061s req_ids:[8] +DEBUG 06-24 20:42:25 [manager.py:391] +ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:231.35852813720703ms total_cost_time:231.40215873718262ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15212 prompt_cache_len:5151 prompt_cache_ratio:0.3386142519063897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 +DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:25 [batch.py:51] router release req id 8 +INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.10905265808105469 s +INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.1104574203491211 s +DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=28347471482245471441616016408783668581, time:1750768945.610829s req_ids:[8] +DEBUG 06-24 20:42:25 [manager.py:391] +ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:210.48855781555176ms total_cost_time:210.53457260131836ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15213 prompt_cache_len:5151 prompt_cache_ratio:0.3385919936896076 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 +DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:25 [batch.py:51] router release req id 8 +INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.1092538833618164 s +INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.11125373840332031 s +DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=167309450980744844688063409561212324790, time:1750768945.8358808s req_ids:[8] +DEBUG 06-24 20:42:25 [manager.py:391] +ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:400.0062942504883ms total_cost_time:400.03323554992676ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:15214 prompt_cache_len:5151 prompt_cache_ratio:0.3385697383988432 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 +DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:26 [batch.py:51] router release req id 8 +INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10921430587768555 s +INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11124634742736816 s +DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=163013880815729421790907961308180951683, time:1750768946.2316225s req_ids:[8] +DEBUG 06-24 20:42:26 [manager.py:391] +ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:219.4373607635498ms total_cost_time:219.4831371307373ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15215 prompt_cache_len:5151 prompt_cache_ratio:0.33854748603351953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 +DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:26 [batch.py:51] router release req id 8 +INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10815072059631348 s +INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11019372940063477 s +DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=224759704197104578791684149686602452294, time:1750768946.4692094s req_ids:[8] +DEBUG 06-24 20:42:26 [manager.py:391] +ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:191.1611557006836ms total_cost_time:191.20502471923828ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15216 prompt_cache_len:5151 prompt_cache_ratio:0.33852523659305994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 +DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:26 [batch.py:51] router release req id 8 +INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10878157615661621 s +INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11086726188659668 s +DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=42535516330004298637238848173277342090, time:1750768946.6555483s req_ids:[8] +DEBUG 06-24 20:42:26 [manager.py:391] +ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:208.96124839782715ms total_cost_time:209.00678634643555ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15217 prompt_cache_len:5151 prompt_cache_ratio:0.33850299007688767 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 +DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:26 [batch.py:51] router release req id 8 +INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10907435417175293 s +INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11107349395751953 s +DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=311406162475802883682689433313708292888, time:1750768946.8719618s req_ids:[8] +DEBUG 06-24 20:42:26 [manager.py:391] +ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:213.96803855895996ms total_cost_time:214.01357650756836ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15218 prompt_cache_len:5151 prompt_cache_ratio:0.33848074648442633 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 +DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:27 [batch.py:51] router release req id 8 +INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10867881774902344 s +INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.1105954647064209 s +DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=156952397275002005425798255186950474003, time:1750768947.092765s req_ids:[8] +DEBUG 06-24 20:42:27 [manager.py:391] +ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:347.123384475708ms total_cost_time:347.1667766571045ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15219 prompt_cache_len:5151 prompt_cache_ratio:0.33845850581509956 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 +DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:27 [batch.py:51] router release req id 8 +INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10931038856506348 s +INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.11133122444152832 s +DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=248389825422883365581418157280440671625, time:1750768947.4479792s req_ids:[8] +DEBUG 06-24 20:42:27 [manager.py:391] +ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:209.9626064300537ms total_cost_time:210.0067138671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15220 prompt_cache_len:5151 prompt_cache_ratio:0.33843626806833116 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 +DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:27 [batch.py:51] router release req id 8 +INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10956573486328125 s +INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.11157965660095215 s +DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=192128486609736799629233222319838368635, time:1750768947.664769s req_ids:[8] +DEBUG 06-24 20:42:27 [manager.py:391] +ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:213.23513984680176ms total_cost_time:213.27805519104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15221 prompt_cache_len:5151 prompt_cache_ratio:0.3384140332435451 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 +DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:27 [batch.py:51] router release req id 8 +INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10822319984436035 s +INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.11033463478088379 s +DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=24751020865396336538753651257858856054, time:1750768947.8837166s req_ids:[8] +DEBUG 06-24 20:42:27 [manager.py:391] +ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:215.23308753967285ms total_cost_time:215.27481079101562ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:15222 prompt_cache_len:5151 prompt_cache_ratio:0.33839180134016555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 +DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:28 [batch.py:51] router release req id 8 +INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10889863967895508 s +INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11082029342651367 s +DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=324145480685840641495072148659263265532, time:1750768948.1059062s req_ids:[8] +DEBUG 06-24 20:42:28 [manager.py:391] +ERROR 06-24 20:42:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:217.3595428466797ms total_cost_time:217.40198135375977ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15223 prompt_cache_len:5151 prompt_cache_ratio:0.3383695723576168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 +DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:28 [batch.py:51] router release req id 8 +INFO 06-24 20:42:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10905265808105469 s +INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11097836494445801 s +DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=290819936024197858379648165854937545962, time:1750768948.3310833s req_ids:[8] +DEBUG 06-24 20:42:28 [manager.py:391] +ERROR 06-24 20:42:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 first_token_cost:391.1268711090088ms total_cost_time:391.1712169647217ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15224 prompt_cache_len:5151 prompt_cache_ratio:0.33834734629532315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 +DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:28 [batch.py:51] router release req id 8 +INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10889196395874023 s +INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11055397987365723 s +DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=117876272925479813246889919741665322065, time:1750768948.729227s req_ids:[8] +DEBUG 06-24 20:42:28 [manager.py:391] +ERROR 06-24 20:42:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 first_token_cost:216.34268760681152ms total_cost_time:216.38751029968262ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15225 prompt_cache_len:5151 prompt_cache_ratio:0.33832512315270935 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 +DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:28 [batch.py:51] router release req id 8 +INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s +INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11058282852172852 s +DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=25951506112116602865193496287732003066, time:1750768948.951071s req_ids:[8] +DEBUG 06-24 20:42:28 [manager.py:391] +ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 first_token_cost:217.43345260620117ms total_cost_time:217.47922897338867ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15226 prompt_cache_len:5151 prompt_cache_ratio:0.33830290292920007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 +DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:29 [batch.py:51] router release req id 8 +INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.11277055740356445 s +DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=275937255153162806138996847243901716838, time:1750768949.1742778s req_ids:[8] +DEBUG 06-24 20:42:29 [manager.py:391] +INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.11487984657287598 s +ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:217.30661392211914ms total_cost_time:217.35000610351562ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15227 prompt_cache_len:5151 prompt_cache_ratio:0.33828068562422015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 +DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:29 [batch.py:51] router release req id 8 +INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.11216044425964355 s +INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.11431145668029785 s +DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=135179875387446902422752135353394010466, time:1750768949.4319224s req_ids:[8] +DEBUG 06-24 20:42:29 [manager.py:391] +ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:255.0947666168213ms total_cost_time:255.11574745178223ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:15228 prompt_cache_len:5151 prompt_cache_ratio:0.33825847123719466 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 +DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:29 [batch.py:51] router release req id 8 +INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.10884928703308105 s +INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.1106407642364502 s +DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=33298410059919722005003279901942847236, time:1750768949.6610684s req_ids:[8] +DEBUG 06-24 20:42:29 [manager.py:391] +ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:175.3823757171631ms total_cost_time:175.41170120239258ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:15229 prompt_cache_len:5151 prompt_cache_ratio:0.33823625976754873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 +DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:29 [batch.py:51] router release req id 8 +INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.11077356338500977 s +INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.11278080940246582 s +DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=54710067149018350118956873490146067376, time:1750768949.843968s req_ids:[8] +DEBUG 06-24 20:42:29 [manager.py:391] +ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:384.5257759094238ms total_cost_time:384.5703601837158ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15230 prompt_cache_len:5151 prompt_cache_ratio:0.3382140512147078 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 +DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:30 [batch.py:51] router release req id 8 +INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.11068367958068848 s +INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11259150505065918 s +DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=154394495422275240855932498769734734563, time:1750768950.2337487s req_ids:[8] +DEBUG 06-24 20:42:30 [manager.py:391] +ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:216.55726432800293ms total_cost_time:216.6006565093994ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15231 prompt_cache_len:5151 prompt_cache_ratio:0.3381918455780973 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 +DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:30 [batch.py:51] router release req id 8 +INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.11116528511047363 s +INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11313652992248535 s +DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=13144452047582576582857498679996981844, time:1750768950.4903393s req_ids:[8] +DEBUG 06-24 20:42:30 [manager.py:391] +ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:254.37426567077637ms total_cost_time:254.41837310791016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15232 prompt_cache_len:5151 prompt_cache_ratio:0.33816964285714285 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 +DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:30 [batch.py:51] router release req id 8 +INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.11394071578979492 s +INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11597943305969238 s +DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=207564985157224758571846504791144265699, time:1750768950.7233686s req_ids:[8] +DEBUG 06-24 20:42:30 [manager.py:391] +DEBUG 06-24 20:42:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 60506.157 tokens/s +DEBUG 06-24 20:42:30 [stats.py:37] Avg prompt tokens throughput: 60498.303 tokens/s +DEBUG 06-24 20:42:30 [stats.py:37] Avg generate tokens throughput: 7.854 tokens/s +ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:221.15778923034668ms total_cost_time:221.20213508605957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15233 prompt_cache_len:5151 prompt_cache_ratio:0.33814744305127026 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 +DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:30 [batch.py:51] router release req id 8 +INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.1099851131439209 s +INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11192083358764648 s +DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=38554427302605331109027670585372408576, time:1750768950.9511874s req_ids:[8] +DEBUG 06-24 20:42:30 [manager.py:391] +ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:220.1673984527588ms total_cost_time:220.21150588989258ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15234 prompt_cache_len:5151 prompt_cache_ratio:0.33812524615990547 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 +DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:31 [batch.py:51] router release req id 8 +INFO 06-24 20:42:31 [manager.py:224] router recive req id 8 cost time 0.10855889320373535 s +INFO 06-24 20:42:31 [manager.py:68] detokenization recv req id 8 cost time 0.11063671112060547 s +DEBUG 06-24 20:42:31 [manager.py:391] Prefill Batch: batch_id=324580595878255722641191650772519065767, time:1750768951.1775544s req_ids:[8] +DEBUG 06-24 20:42:31 [manager.py:391] +ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:212.97574043273926ms total_cost_time:213.01984786987305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15235 prompt_cache_len:5151 prompt_cache_ratio:0.33810305218247455 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 +DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:31 [batch.py:51] router release req id 8 +INFO 06-24 20:42:31 [manager.py:224] router recive req id 8 cost time 0.31091952323913574 s +INFO 06-24 20:42:31 [manager.py:68] detokenization recv req id 8 cost time 0.31317663192749023 s +DEBUG 06-24 20:42:31 [manager.py:391] Prefill Batch: batch_id=27356283079339726189606001896392536810, time:1750768951.6045935s req_ids:[8] +DEBUG 06-24 20:42:31 [manager.py:391] +ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:431.8280220031738ms total_cost_time:431.8726062774658ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15236 prompt_cache_len:5151 prompt_cache_ratio:0.3380808611184038 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 +DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:31 [batch.py:51] router release req id 8 +INFO 06-24 20:42:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:31 [manager.py:224] router recive req id 8 cost time 0.10943365097045898 s +INFO 06-24 20:42:31 [manager.py:68] detokenization recv req id 8 cost time 0.11138010025024414 s +DEBUG 06-24 20:42:31 [manager.py:391] Prefill Batch: batch_id=67507952225215512254920186798505852976, time:1750768951.8471699s req_ids:[8] +DEBUG 06-24 20:42:31 [manager.py:391] +ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:230.96728324890137ms total_cost_time:230.9889793395996ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:15237 prompt_cache_len:5151 prompt_cache_ratio:0.3380586729671195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 +DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:31 [batch.py:51] router release req id 8 +INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10959744453430176 s +INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.11153769493103027 s +DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=210861354281717730838161154896459820329, time:1750768952.0734813s req_ids:[8] +DEBUG 06-24 20:42:32 [manager.py:391] +ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:220.64971923828125ms total_cost_time:220.69454193115234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15238 prompt_cache_len:5151 prompt_cache_ratio:0.3380364877280483 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 +DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:32 [batch.py:51] router release req id 8 +INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10902643203735352 s +INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.11081838607788086 s +DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=29490741528308523511030928288014233792, time:1750768952.2985284s req_ids:[8] +DEBUG 06-24 20:42:32 [manager.py:391] +ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:176.00083351135254ms total_cost_time:176.0389804840088ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:15239 prompt_cache_len:5151 prompt_cache_ratio:0.33801430540061683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 +DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:32 [batch.py:51] router release req id 8 +INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10778403282165527 s +INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.10960555076599121 s +DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=125448507203496648137271374463379221196, time:1750768952.4797475s req_ids:[8] +DEBUG 06-24 20:42:32 [manager.py:391] +ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:205.78765869140625ms total_cost_time:205.83057403564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15240 prompt_cache_len:5151 prompt_cache_ratio:0.337992125984252 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 +DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:32 [batch.py:51] router release req id 8 +INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10804128646850586 s +INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.11007571220397949 s +DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=330574665252447096149541816101523739973, time:1750768952.691846s req_ids:[8] +DEBUG 06-24 20:42:32 [manager.py:391] +ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:378.2675266265869ms total_cost_time:378.3283233642578ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15241 prompt_cache_len:5151 prompt_cache_ratio:0.33796994947838066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 +DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:32 [batch.py:51] router release req id 8 +INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.11238646507263184 s +DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=128517104914843866067287262035021677960, time:1750768953.077379s req_ids:[8] +DEBUG 06-24 20:42:33 [manager.py:391] +INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11432385444641113 s +ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:216.3839340209961ms total_cost_time:216.4287567138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15242 prompt_cache_len:5151 prompt_cache_ratio:0.33794777588243013 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 +DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:33 [batch.py:51] router release req id 8 +INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.11250591278076172 s +DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=85046733415100996830233130992949663858, time:1750768953.301085s req_ids:[8] +DEBUG 06-24 20:42:33 [manager.py:391] +INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11464571952819824 s +ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:216.40253067016602ms total_cost_time:216.44902229309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15243 prompt_cache_len:5151 prompt_cache_ratio:0.3379256051958276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 +DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:33 [batch.py:51] router release req id 8 +INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.11189079284667969 s +INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11378955841064453 s +DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=37397979130218230758955987671071890910, time:1750768953.5256927s req_ids:[8] +DEBUG 06-24 20:42:33 [manager.py:391] +ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:212.06283569335938ms total_cost_time:212.10908889770508ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15244 prompt_cache_len:5151 prompt_cache_ratio:0.33790343741800055 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 +DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:33 [batch.py:51] router release req id 8 +INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.10970377922058105 s +INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11171817779541016 s +DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=221122639943293744259544217770029869344, time:1750768953.7444096s req_ids:[8] +DEBUG 06-24 20:42:33 [manager.py:391] +ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:215.61884880065918ms total_cost_time:215.66247940063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15245 prompt_cache_len:5151 prompt_cache_ratio:0.3378812725483765 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 +DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:33 [batch.py:51] router release req id 8 +INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.10953116416931152 s +INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11148524284362793 s +DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=148271004254697567596119525530489805705, time:1750768953.9660368s req_ids:[8] +DEBUG 06-24 20:42:33 [manager.py:391] +ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:381.4873695373535ms total_cost_time:381.5462589263916ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:15246 prompt_cache_len:5151 prompt_cache_ratio:0.3378591105863833 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 +DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:34 [batch.py:51] router release req id 8 +INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10900259017944336 s +INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.11106348037719727 s +DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=153653718605778331430612870875373893264, time:1750768954.355918s req_ids:[8] +DEBUG 06-24 20:42:34 [manager.py:391] +ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:218.86253356933594ms total_cost_time:218.90640258789062ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15247 prompt_cache_len:5151 prompt_cache_ratio:0.3378369515314488 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 +DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:34 [batch.py:51] router release req id 8 +INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10845804214477539 s +INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.10936522483825684 s +DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=3045314096655996475288756209180429968, time:1750768954.581874s req_ids:[8] +DEBUG 06-24 20:42:34 [manager.py:391] +DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:175.08554458618164ms total_cost_time:175.12941360473633ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15248 prompt_cache_len:5151 prompt_cache_ratio:0.3378147953830011 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 +DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:34 [batch.py:51] router release req id 8 +INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10876297950744629 s +INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.11086845397949219 s +DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=131145743787009875682427713212871096648, time:1750768954.762036s req_ids:[8] +DEBUG 06-24 20:42:34 [manager.py:391] +ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:205.63840866088867ms total_cost_time:205.68275451660156ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15249 prompt_cache_len:5151 prompt_cache_ratio:0.3377926421404682 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 +DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:34 [batch.py:51] router release req id 8 +INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10895180702209473 s +INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.11102509498596191 s +DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=190237609166860698510660642048507000917, time:1750768954.9716654s req_ids:[8] +DEBUG 06-24 20:42:34 [manager.py:391] +ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:213.5453224182129ms total_cost_time:213.58871459960938ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15250 prompt_cache_len:5151 prompt_cache_ratio:0.33777049180327867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 +DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:35 [batch.py:51] router release req id 8 +INFO 06-24 20:42:35 [manager.py:224] router recive req id 8 cost time 0.10855531692504883 s +INFO 06-24 20:42:35 [manager.py:68] detokenization recv req id 8 cost time 0.11056852340698242 s +DEBUG 06-24 20:42:35 [manager.py:391] Prefill Batch: batch_id=29226589432581069773330096974399410771, time:1750768955.1923192s req_ids:[8] +DEBUG 06-24 20:42:35 [manager.py:391] +ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:216.48788452148438ms total_cost_time:216.53056144714355ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15251 prompt_cache_len:5151 prompt_cache_ratio:0.33774834437086093 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 +DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:35 [batch.py:51] router release req id 8 +INFO 06-24 20:42:35 [manager.py:224] router recive req id 8 cost time 0.11019277572631836 s +INFO 06-24 20:42:35 [manager.py:68] detokenization recv req id 8 cost time 0.1121358871459961 s +DEBUG 06-24 20:42:35 [manager.py:391] Prefill Batch: batch_id=197854366029171773451263009704251934502, time:1750768955.4172056s req_ids:[8] +DEBUG 06-24 20:42:35 [manager.py:391] +ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:374.58229064941406ms total_cost_time:374.62568283081055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15252 prompt_cache_len:5151 prompt_cache_ratio:0.33772619984264357 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 +DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:35 [batch.py:51] router release req id 8 +INFO 06-24 20:42:35 [manager.py:224] router recive req id 8 cost time 0.10869193077087402 s +INFO 06-24 20:42:35 [manager.py:68] detokenization recv req id 8 cost time 0.11061859130859375 s +DEBUG 06-24 20:42:35 [manager.py:391] Prefill Batch: batch_id=4472235495899164523274913813495401011, time:1750768955.7989151s req_ids:[8] +DEBUG 06-24 20:42:35 [manager.py:391] +ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:216.09759330749512ms total_cost_time:216.1407470703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15253 prompt_cache_len:5151 prompt_cache_ratio:0.3377040582180555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 +DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:35 [batch.py:51] router release req id 8 +INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10822176933288574 s +INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s +DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=34239524635493832308781079472720458273, time:1750768956.0217457s req_ids:[8] +DEBUG 06-24 20:42:36 [manager.py:391] +ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:212.25237846374512ms total_cost_time:212.2964859008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15254 prompt_cache_len:5151 prompt_cache_ratio:0.3376819194965255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 +DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:36 [batch.py:51] router release req id 8 +INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10984563827514648 s +INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.11197209358215332 s +DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=199908730056097122643285332484719901504, time:1750768956.240059s req_ids:[8] +DEBUG 06-24 20:42:36 [manager.py:391] +ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:217.25010871887207ms total_cost_time:217.29373931884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15255 prompt_cache_len:5151 prompt_cache_ratio:0.3376597836774828 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 +DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:36 [batch.py:51] router release req id 8 +INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10911297798156738 s +INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.11044478416442871 s +DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=299275304387965678991653534924160291823, time:1750768956.465387s req_ids:[8] +DEBUG 06-24 20:42:36 [manager.py:391] +ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:216.3708209991455ms total_cost_time:216.4134979248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15256 prompt_cache_len:5151 prompt_cache_ratio:0.33763765076035657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 +DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:36 [batch.py:51] router release req id 8 +INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10865259170532227 s +INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s +DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=301251761444856908197318849533497271219, time:1750768956.6872032s req_ids:[8] +DEBUG 06-24 20:42:36 [manager.py:391] +ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:386.75689697265625ms total_cost_time:386.81840896606445ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:15257 prompt_cache_len:5151 prompt_cache_ratio:0.33761552074457624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 +DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:36 [batch.py:51] router release req id 8 +INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.11145901679992676 s +INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11346960067749023 s +DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=239549778611540566230187740711706982135, time:1750768957.0811865s req_ids:[8] +DEBUG 06-24 20:42:37 [manager.py:391] +ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:213.58156204223633ms total_cost_time:213.62543106079102ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15258 prompt_cache_len:5151 prompt_cache_ratio:0.33759339362957136 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 +DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:37 [batch.py:51] router release req id 8 +INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.10931849479675293 s +INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11143732070922852 s +DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=143272613221294398071794191884084107276, time:1750768957.304773s req_ids:[8] +DEBUG 06-24 20:42:37 [manager.py:391] +ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:216.56489372253418ms total_cost_time:216.60804748535156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15259 prompt_cache_len:5151 prompt_cache_ratio:0.3375712694147716 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 +DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:37 [batch.py:51] router release req id 8 +INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.11106324195861816 s +INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11294031143188477 s +DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=51785673864588079644180840029810005998, time:1750768957.5249245s req_ids:[8] +DEBUG 06-24 20:42:37 [manager.py:391] +ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:213.50860595703125ms total_cost_time:213.55342864990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15260 prompt_cache_len:5151 prompt_cache_ratio:0.3375491480996068 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 +DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:37 [batch.py:51] router release req id 8 +INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.10996389389038086 s +INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11209917068481445 s +DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=264176749272298275267341445895125438090, time:1750768957.7461736s req_ids:[8] +DEBUG 06-24 20:42:37 [manager.py:391] +ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:214.18166160583496ms total_cost_time:214.22505378723145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15261 prompt_cache_len:5151 prompt_cache_ratio:0.33752702968350695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 +DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:37 [batch.py:51] router release req id 8 +INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.1085672378540039 s +INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11065244674682617 s +DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=236767826369951394620205539337971581064, time:1750768957.96786s req_ids:[8] +DEBUG 06-24 20:42:37 [manager.py:391] +ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:351.1476516723633ms total_cost_time:351.1929512023926ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15262 prompt_cache_len:5151 prompt_cache_ratio:0.3375049141659022 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 +DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:38 [batch.py:51] router release req id 8 +INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10834646224975586 s +INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102750301361084 s +DEBUG 06-24 20:42:38 [manager.py:391] Prefill Batch: batch_id=224205266336419571404865413319454941215, time:1750768958.3244746s req_ids:[8] +DEBUG 06-24 20:42:38 [manager.py:391] +ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:216.62545204162598ms total_cost_time:216.66884422302246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15263 prompt_cache_len:5151 prompt_cache_ratio:0.3374828015462229 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 +DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:38 [batch.py:51] router release req id 8 +INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10828590393066406 s +INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.1103816032409668 s +DEBUG 06-24 20:42:38 [manager.py:391] Prefill Batch: batch_id=233071193449393166954206902675184340935, time:1750768958.5495353s req_ids:[8] +DEBUG 06-24 20:42:38 [manager.py:391] +ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:218.40500831604004ms total_cost_time:218.44959259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15264 prompt_cache_len:5151 prompt_cache_ratio:0.3374606918238994 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 +DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:38 [batch.py:51] router release req id 8 +INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10899186134338379 s +INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.11118888854980469 s +DEBUG 06-24 20:42:38 [manager.py:391] Prefill Batch: batch_id=327205533889318930082625987062977388943, time:1750768958.7728577s req_ids:[8] +DEBUG 06-24 20:42:38 [manager.py:391] +ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:216.57180786132812ms total_cost_time:216.61686897277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15265 prompt_cache_len:5151 prompt_cache_ratio:0.33743858499836227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 +DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:38 [batch.py:51] router release req id 8 +INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10772418975830078 s +INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.1086127758026123 s +DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=92699773777352220089411950717588010729, time:1750768959.0058718s req_ids:[8] +DEBUG 06-24 20:42:39 [manager.py:391] +ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:190.78421592712402ms total_cost_time:190.83833694458008ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:15266 prompt_cache_len:5151 prompt_cache_ratio:0.3374164810690423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 +DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:39 [batch.py:51] router release req id 8 +INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10918068885803223 s +INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.1110680103302002 s +DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=25172233717920294897555473414303272221, time:1750768959.193971s req_ids:[8] +DEBUG 06-24 20:42:39 [manager.py:391] +ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:381.75106048583984ms total_cost_time:381.79636001586914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15267 prompt_cache_len:5151 prompt_cache_ratio:0.3373943800353704 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 +DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:39 [batch.py:51] router release req id 8 +INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s +INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.11003923416137695 s +DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=89577200359681050252138795102324039331, time:1750768959.5815437s req_ids:[8] +DEBUG 06-24 20:42:39 [manager.py:391] +ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:173.42853546142578ms total_cost_time:173.47192764282227ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15268 prompt_cache_len:5151 prompt_cache_ratio:0.3373722818967776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 +DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:39 [batch.py:51] router release req id 8 +INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s +INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.10952615737915039 s +DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=314245843455946025836402085714239572824, time:1750768959.761197s req_ids:[8] +DEBUG 06-24 20:42:39 [manager.py:391] +ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:173.3419895172119ms total_cost_time:173.3996868133545ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15269 prompt_cache_len:5151 prompt_cache_ratio:0.337350186652695 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 +DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:39 [batch.py:51] router release req id 8 +INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10966873168945312 s +DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=254714919351273262804701767517969830652, time:1750768959.9416606s req_ids:[8] +DEBUG 06-24 20:42:39 [manager.py:391] +INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.11176300048828125 s +ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:212.04471588134766ms total_cost_time:212.08763122558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15270 prompt_cache_len:5151 prompt_cache_ratio:0.337328094302554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 +DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:40 [batch.py:51] router release req id 8 +INFO 06-24 20:42:40 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s +INFO 06-24 20:42:40 [manager.py:68] detokenization recv req id 8 cost time 0.11075401306152344 s +DEBUG 06-24 20:42:40 [manager.py:391] Prefill Batch: batch_id=19166768681254532233948511541079624824, time:1750768960.1618207s req_ids:[8] +DEBUG 06-24 20:42:40 [manager.py:391] +ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:213.84382247924805ms total_cost_time:213.88721466064453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15271 prompt_cache_len:5151 prompt_cache_ratio:0.33730600484578616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 +DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:40 [batch.py:51] router release req id 8 +INFO 06-24 20:42:40 [manager.py:224] router recive req id 8 cost time 0.11330294609069824 s +INFO 06-24 20:42:40 [manager.py:68] detokenization recv req id 8 cost time 0.11533331871032715 s +DEBUG 06-24 20:42:40 [manager.py:391] Prefill Batch: batch_id=204692234715884295545748767734487068399, time:1750768960.419041s req_ids:[8] +DEBUG 06-24 20:42:40 [manager.py:391] +ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:256.53839111328125ms total_cost_time:256.5774917602539ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:15272 prompt_cache_len:5151 prompt_cache_ratio:0.33728391828182297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 +DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:40 [batch.py:51] router release req id 8 +INFO 06-24 20:42:40 [manager.py:224] router recive req id 8 cost time 0.10823988914489746 s +INFO 06-24 20:42:40 [manager.py:68] detokenization recv req id 8 cost time 0.1102762222290039 s +DEBUG 06-24 20:42:40 [manager.py:391] Prefill Batch: batch_id=268795295248525539910702455514642451641, time:1750768960.6497185s req_ids:[8] +DEBUG 06-24 20:42:40 [manager.py:391] +ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:42:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 59835.101 tokens/s +DEBUG 06-24 20:42:40 [stats.py:37] Avg prompt tokens throughput: 59827.159 tokens/s +DEBUG 06-24 20:42:40 [stats.py:37] Avg generate tokens throughput: 7.942 tokens/s +INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:389.4011974334717ms total_cost_time:389.4460201263428ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15273 prompt_cache_len:5151 prompt_cache_ratio:0.33726183461009623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 +DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:40 [batch.py:51] router release req id 8 +INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10832500457763672 s +INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11030173301696777 s +DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=57573714678294995531376285014907843847, time:1750768961.0450807s req_ids:[8] +DEBUG 06-24 20:42:41 [manager.py:391] +ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:212.60547637939453ms total_cost_time:212.65244483947754ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:15274 prompt_cache_len:5151 prompt_cache_ratio:0.33723975383003796 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 +DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:41 [batch.py:51] router release req id 8 +INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10874795913696289 s +INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11092638969421387 s +DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=333533988152382795405080613011908114404, time:1750768961.262557s req_ids:[8] +DEBUG 06-24 20:42:41 [manager.py:391] +ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:209.6564769744873ms total_cost_time:209.7012996673584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15275 prompt_cache_len:5151 prompt_cache_ratio:0.3372176759410802 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 +DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:41 [batch.py:51] router release req id 8 +INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10807394981384277 s +INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11007881164550781 s +DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=73146709741062125027801460311818684545, time:1750768961.4802914s req_ids:[8] +DEBUG 06-24 20:42:41 [manager.py:391] +ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:212.40472793579102ms total_cost_time:212.42666244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:15276 prompt_cache_len:5151 prompt_cache_ratio:0.33719560094265516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 +DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:41 [batch.py:51] router release req id 8 +INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10896039009094238 s +INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s +DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=277453151690861920073459323343605934623, time:1750768961.699458s req_ids:[8] +DEBUG 06-24 20:42:41 [manager.py:391] +ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:218.67990493774414ms total_cost_time:218.72329711914062ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15277 prompt_cache_len:5151 prompt_cache_ratio:0.3371735288341952 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 +DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:41 [batch.py:51] router release req id 8 +INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10841536521911621 s +INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.1104135513305664 s +DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=249761171618258891912573798028683362201, time:1750768961.939264s req_ids:[8] +DEBUG 06-24 20:42:41 [manager.py:391] +ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:218.72901916503906ms total_cost_time:218.77360343933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15278 prompt_cache_len:5151 prompt_cache_ratio:0.3371514596151329 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 +DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:42 [batch.py:51] router release req id 8 +INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.3140742778778076 s +INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.3162975311279297 s +DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=273266287757559814926770517223130730889, time:1750768962.3595288s req_ids:[8] +DEBUG 06-24 20:42:42 [manager.py:391] +ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:429.78358268737793ms total_cost_time:429.83102798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15279 prompt_cache_len:5151 prompt_cache_ratio:0.3371293932849008 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 +DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:42 [batch.py:51] router release req id 8 +INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.11202740669250488 s +INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.11411428451538086 s +DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=154082030348393430774514024572163345470, time:1750768962.5859945s req_ids:[8] +DEBUG 06-24 20:42:42 [manager.py:391] +ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:213.87410163879395ms total_cost_time:213.91940116882324ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15280 prompt_cache_len:5151 prompt_cache_ratio:0.33710732984293196 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 +DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:42 [batch.py:51] router release req id 8 +INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.11049938201904297 s +INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.11228060722351074 s +DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=86304403666834894973589082259328266084, time:1750768962.8055751s req_ids:[8] +DEBUG 06-24 20:42:42 [manager.py:391] +ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:173.98786544799805ms total_cost_time:174.03292655944824ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15281 prompt_cache_len:5151 prompt_cache_ratio:0.3370852692886591 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 +DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:42 [batch.py:51] router release req id 8 +INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.10813379287719727 s +INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.10999512672424316 s +DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=80001343279135406418980920042664219934, time:1750768962.9881215s req_ids:[8] +DEBUG 06-24 20:42:42 [manager.py:391] +ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:175.9941577911377ms total_cost_time:176.039457321167ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15282 prompt_cache_len:5151 prompt_cache_ratio:0.33706321162151553 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 +DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:43 [batch.py:51] router release req id 8 +INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10897111892700195 s +INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.11089539527893066 s +DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=143991533333115594383488610385072590830, time:1750768963.1694014s req_ids:[8] +DEBUG 06-24 20:42:43 [manager.py:391] +ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:210.35432815551758ms total_cost_time:210.40058135986328ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15283 prompt_cache_len:5151 prompt_cache_ratio:0.3370411568409344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 +DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:43 [batch.py:51] router release req id 8 +INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10897183418273926 s +INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.1109616756439209 s +DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=86503241519473131785912430707854228605, time:1750768963.387512s req_ids:[8] +DEBUG 06-24 20:42:43 [manager.py:391] +ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:381.89172744750977ms total_cost_time:381.9551467895508ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:15284 prompt_cache_len:5151 prompt_cache_ratio:0.3370191049463491 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 +DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:43 [batch.py:51] router release req id 8 +INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10857605934143066 s +INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.11029553413391113 s +DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=302909532962369433250945153497286574004, time:1750768963.7756867s req_ids:[8] +DEBUG 06-24 20:42:43 [manager.py:391] +DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:173.8271713256836ms total_cost_time:173.87008666992188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15285 prompt_cache_len:5151 prompt_cache_ratio:0.3369970559371933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 +DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:43 [batch.py:51] router release req id 8 +INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10913348197937012 s +INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.11125445365905762 s +DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=133319371431059373422684897580878663486, time:1750768963.9567096s req_ids:[8] +DEBUG 06-24 20:42:43 [manager.py:391] +ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:211.12847328186035ms total_cost_time:211.17067337036133ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15286 prompt_cache_len:5151 prompt_cache_ratio:0.3369750098129007 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 +DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:44 [batch.py:51] router release req id 8 +INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10867500305175781 s +INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11057782173156738 s +DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=232786782293569675701267987969537747441, time:1750768964.175408s req_ids:[8] +DEBUG 06-24 20:42:44 [manager.py:391] +ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:216.5670394897461ms total_cost_time:216.61090850830078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15287 prompt_cache_len:5151 prompt_cache_ratio:0.3369529665729051 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 +DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:44 [batch.py:51] router release req id 8 +INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s +INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s +DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=205263109471168399304651825252523744422, time:1750768964.402068s req_ids:[8] +DEBUG 06-24 20:42:44 [manager.py:391] +ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:222.05400466918945ms total_cost_time:222.09811210632324ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15288 prompt_cache_len:5151 prompt_cache_ratio:0.3369309262166405 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 +DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:44 [batch.py:51] router release req id 8 +INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10824131965637207 s +INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11010980606079102 s +DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=189456826049611772785593103769999278470, time:1750768964.6263907s req_ids:[8] +DEBUG 06-24 20:42:44 [manager.py:391] +ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:347.3842144012451ms total_cost_time:347.4287986755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15289 prompt_cache_len:5151 prompt_cache_ratio:0.33690888874354114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 +DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:44 [batch.py:51] router release req id 8 +INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10918021202087402 s +INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11117053031921387 s +DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=31906429100339121136266182681113234525, time:1750768964.9802153s req_ids:[8] +DEBUG 06-24 20:42:44 [manager.py:391] +ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:216.04061126708984ms total_cost_time:216.08424186706543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15290 prompt_cache_len:5151 prompt_cache_ratio:0.3368868541530412 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 +DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:45 [batch.py:51] router release req id 8 +INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.1100618839263916 s +INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.11237573623657227 s +DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=72485119064450478069388092176801404097, time:1750768965.2021701s req_ids:[8] +DEBUG 06-24 20:42:45 [manager.py:391] +ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:214.07413482666016ms total_cost_time:214.11609649658203ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15291 prompt_cache_len:5151 prompt_cache_ratio:0.3368648224445752 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 +DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:45 [batch.py:51] router release req id 8 +INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.1081535816192627 s +INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.11034321784973145 s +DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=120322700966790208587618739429897530080, time:1750768965.4229474s req_ids:[8] +DEBUG 06-24 20:42:45 [manager.py:391] +ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:218.1849479675293ms total_cost_time:218.2292938232422ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15292 prompt_cache_len:5151 prompt_cache_ratio:0.3368427936175778 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 +DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:45 [batch.py:51] router release req id 8 +INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.10793709754943848 s +INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.10969018936157227 s +DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=239528137336332536001279364489197703124, time:1750768965.6485834s req_ids:[8] +DEBUG 06-24 20:42:45 [manager.py:391] +ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:216.71557426452637ms total_cost_time:216.75848960876465ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15293 prompt_cache_len:5151 prompt_cache_ratio:0.33682076767148367 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 +DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:45 [batch.py:51] router release req id 8 +INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.11090993881225586 s +INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.11289620399475098 s +DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=124696280399343810459781301469479435008, time:1750768965.8719244s req_ids:[8] +DEBUG 06-24 20:42:45 [manager.py:391] +ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:218.9795970916748ms total_cost_time:219.0232276916504ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15294 prompt_cache_len:5151 prompt_cache_ratio:0.33679874460572773 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 +DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:46 [batch.py:51] router release req id 8 +INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.10926485061645508 s +INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.11129879951477051 s +DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=46623969522368298408880099680608602768, time:1750768966.104454s req_ids:[8] +DEBUG 06-24 20:42:46 [manager.py:391] +ERROR 06-24 20:42:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:433.0558776855469ms total_cost_time:433.09998512268066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15295 prompt_cache_len:5151 prompt_cache_ratio:0.336776724419745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 +DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:46 [batch.py:51] router release req id 8 +INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.1069023609161377 s +INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.10792016983032227 s +DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=333163605100754676007736873758363233882, time:1750768966.5384912s req_ids:[8] +DEBUG 06-24 20:42:46 [manager.py:391] +ERROR 06-24 20:42:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 first_token_cost:218.7819480895996ms total_cost_time:218.82939338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15296 prompt_cache_len:5151 prompt_cache_ratio:0.33675470711297073 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 +DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:46 [batch.py:51] router release req id 8 +INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.10905599594116211 s +INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.11026453971862793 s +DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=127141239058018062596515531762985767424, time:1750768966.7632134s req_ids:[8] +DEBUG 06-24 20:42:46 [manager.py:391] +ERROR 06-24 20:42:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 first_token_cost:211.95054054260254ms total_cost_time:211.99631690979004ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15297 prompt_cache_len:5151 prompt_cache_ratio:0.3367326926848402 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 +DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:46 [batch.py:51] router release req id 8 +INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.10945892333984375 s +INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.11123824119567871 s +DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=201658133625886701345475311168542024000, time:1750768966.9832294s req_ids:[8] +DEBUG 06-24 20:42:46 [manager.py:391] +ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 first_token_cost:214.39385414123535ms total_cost_time:214.43867683410645ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15298 prompt_cache_len:5151 prompt_cache_ratio:0.33671068113478886 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 +DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:47 [batch.py:51] router release req id 8 +INFO 06-24 20:42:47 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s +INFO 06-24 20:42:47 [manager.py:68] detokenization recv req id 8 cost time 0.11098456382751465 s +DEBUG 06-24 20:42:47 [manager.py:391] Prefill Batch: batch_id=281181628271042014554416719716299624085, time:1750768967.2042766s req_ids:[8] +DEBUG 06-24 20:42:47 [manager.py:391] +INFO 06-24 20:42:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:217.24796295166016ms total_cost_time:217.29183197021484ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15299 prompt_cache_len:5151 prompt_cache_ratio:0.3366886724622524 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 +DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:47 [batch.py:51] router release req id 8 +INFO 06-24 20:42:47 [manager.py:224] router recive req id 8 cost time 0.10974502563476562 s +INFO 06-24 20:42:47 [manager.py:68] detokenization recv req id 8 cost time 0.11173295974731445 s +DEBUG 06-24 20:42:47 [manager.py:391] Prefill Batch: batch_id=207190394608493638163270070010247435715, time:1750768967.428706s req_ids:[8] +DEBUG 06-24 20:42:47 [manager.py:391] +ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:395.56384086608887ms total_cost_time:395.60890197753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15300 prompt_cache_len:5151 prompt_cache_ratio:0.33666666666666667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 +DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:47 [batch.py:51] router release req id 8 +INFO 06-24 20:42:47 [manager.py:224] router recive req id 8 cost time 0.10812163352966309 s +INFO 06-24 20:42:47 [manager.py:68] detokenization recv req id 8 cost time 0.11037182807922363 s +DEBUG 06-24 20:42:47 [manager.py:391] Prefill Batch: batch_id=143788279983788295104220454539294549530, time:1750768967.8298116s req_ids:[8] +DEBUG 06-24 20:42:47 [manager.py:391] +ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:221.05646133422852ms total_cost_time:221.11201286315918ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:15301 prompt_cache_len:5151 prompt_cache_ratio:0.3366446637474675 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 +DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:47 [batch.py:51] router release req id 8 +INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.10946321487426758 s +INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.11149191856384277 s +DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=118735260533829356698187123397974787914, time:1750768968.058415s req_ids:[8] +DEBUG 06-24 20:42:48 [manager.py:391] +ERROR 06-24 20:42:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:215.14058113098145ms total_cost_time:215.16752243041992ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:15302 prompt_cache_len:5151 prompt_cache_ratio:0.33662266370409094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 +DEBUG 06-24 20:42:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:48 [batch.py:51] router release req id 8 +INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.10944604873657227 s +INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.11142230033874512 s +DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=218900032652891180590334931963816945698, time:1750768968.2891686s req_ids:[8] +DEBUG 06-24 20:42:48 [manager.py:391] +ERROR 06-24 20:42:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 first_token_cost:228.58357429504395ms total_cost_time:228.62958908081055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15303 prompt_cache_len:5151 prompt_cache_ratio:0.3366006665359733 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 +DEBUG 06-24 20:42:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:48 [batch.py:51] router release req id 8 +INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.11382603645324707 s +INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.1156766414642334 s +DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=106573955011478134305099838849853628949, time:1750768968.5479906s req_ids:[8] +DEBUG 06-24 20:42:48 [manager.py:391] +ERROR 06-24 20:42:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 first_token_cost:255.3424835205078ms total_cost_time:255.3870677947998ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15304 prompt_cache_len:5151 prompt_cache_ratio:0.33657867224255095 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 +DEBUG 06-24 20:42:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:48 [batch.py:51] router release req id 8 +INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.11331605911254883 s +DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=220570184046574146735611968749597580841, time:1750768968.7777324s req_ids:[8] +DEBUG 06-24 20:42:48 [manager.py:391] +INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.1154031753540039 s +ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 first_token_cost:390.60139656066895ms total_cost_time:390.64502716064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15305 prompt_cache_len:5151 prompt_cache_ratio:0.33655668082326035 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 +DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:49 [batch.py:51] router release req id 8 +INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10929393768310547 s +INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.1111762523651123 s +DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=213266858597367368396693061512444701271, time:1750768969.1739895s req_ids:[8] +DEBUG 06-24 20:42:49 [manager.py:391] +ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:218.27101707458496ms total_cost_time:218.31536293029785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15306 prompt_cache_len:5151 prompt_cache_ratio:0.33653469227753824 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 +DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:49 [batch.py:51] router release req id 8 +INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10826230049133301 s +INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.11028647422790527 s +DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=207197339794377823852207332076431113080, time:1750768969.4005055s req_ids:[8] +DEBUG 06-24 20:42:49 [manager.py:391] +ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:218.2481288909912ms total_cost_time:218.27006340026855ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:15307 prompt_cache_len:5151 prompt_cache_ratio:0.3365127066048213 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 +DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:49 [batch.py:51] router release req id 8 +INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10643291473388672 s +INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.10820460319519043 s +DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=151404732567665939835927566587686360725, time:1750768969.6370568s req_ids:[8] +DEBUG 06-24 20:42:49 [manager.py:391] +ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:185.45937538146973ms total_cost_time:185.5015754699707ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15308 prompt_cache_len:5151 prompt_cache_ratio:0.33649072380454664 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 +DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:49 [batch.py:51] router release req id 8 +INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10833215713500977 s +INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s +DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=139395259600872835069267260525045237871, time:1750768969.8190825s req_ids:[8] +DEBUG 06-24 20:42:49 [manager.py:391] +ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:209.79952812194824ms total_cost_time:209.84554290771484ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15309 prompt_cache_len:5151 prompt_cache_ratio:0.3364687438761513 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 +DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:49 [batch.py:51] router release req id 8 +INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10900211334228516 s +INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11103439331054688 s +DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=136681978341145368486439710936022663088, time:1750768970.0322962s req_ids:[8] +DEBUG 06-24 20:42:50 [manager.py:391] +ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:389.39881324768066ms total_cost_time:389.44387435913086ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15310 prompt_cache_len:5151 prompt_cache_ratio:0.3364467668190725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 +DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:50 [batch.py:51] router release req id 8 +INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10852909088134766 s +INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11036944389343262 s +DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=327174258089267058152819911523288870919, time:1750768970.4308302s req_ids:[8] +DEBUG 06-24 20:42:50 [manager.py:391] +ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:218.72544288635254ms total_cost_time:218.76907348632812ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15311 prompt_cache_len:5151 prompt_cache_ratio:0.3364247926327477 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 +DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:50 [batch.py:51] router release req id 8 +INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10945248603820801 s +INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11139345169067383 s +DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=296035011426413534414214817507477122017, time:1750768970.6616118s req_ids:[8] +DEBUG 06-24 20:42:50 [manager.py:391] +ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:226.46546363830566ms total_cost_time:226.51124000549316ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15312 prompt_cache_len:5151 prompt_cache_ratio:0.3364028213166144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 +DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:50 [batch.py:51] router release req id 8 +INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10907888412475586 s +INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11091971397399902 s +DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=139225536724817114983254184863201697265, time:1750768970.898214s req_ids:[8] +DEBUG 06-24 20:42:50 [manager.py:391] +ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:42:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 60926.492 tokens/s +DEBUG 06-24 20:42:50 [stats.py:37] Avg prompt tokens throughput: 60918.525 tokens/s +DEBUG 06-24 20:42:50 [stats.py:37] Avg generate tokens throughput: 7.967 tokens/s +INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:189.68796730041504ms total_cost_time:189.73159790039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15313 prompt_cache_len:5151 prompt_cache_ratio:0.3363808528701104 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 +DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:50 [batch.py:51] router release req id 8 +INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.1082766056060791 s +INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11003375053405762 s +DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=75273083978615056677417808843088518856, time:1750768971.0878198s req_ids:[8] +DEBUG 06-24 20:42:51 [manager.py:391] +ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:175.54640769958496ms total_cost_time:175.59003829956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15314 prompt_cache_len:5151 prompt_cache_ratio:0.3363588872926734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 +DEBUG 06-24 20:42:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:51 [batch.py:51] router release req id 8 +INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.10881686210632324 s +INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s +DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=205671522056542375350667359860331681758, time:1750768971.2701604s req_ids:[8] +DEBUG 06-24 20:42:51 [manager.py:391] +ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:205.4746150970459ms total_cost_time:205.51753044128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15315 prompt_cache_len:5151 prompt_cache_ratio:0.3363369245837414 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 +DEBUG 06-24 20:42:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:51 [batch.py:51] router release req id 8 +INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.10948944091796875 s +INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11149978637695312 s +DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=42625442722787368158268338541939090214, time:1750768971.4821432s req_ids:[8] +DEBUG 06-24 20:42:51 [manager.py:391] +DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:388.7600898742676ms total_cost_time:388.80228996276855ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15316 prompt_cache_len:5151 prompt_cache_ratio:0.3363149647427527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 +DEBUG 06-24 20:42:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:51 [batch.py:51] router release req id 8 +INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.10973548889160156 s +INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11172127723693848 s +DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=80787102678951303253530927990046975483, time:1750768971.8776014s req_ids:[8] +DEBUG 06-24 20:42:51 [manager.py:391] +ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:215.8496379852295ms total_cost_time:215.90423583984375ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:15317 prompt_cache_len:5151 prompt_cache_ratio:0.3362930077691454 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 +DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:52 [batch.py:51] router release req id 8 +INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.1086113452911377 s +INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11049032211303711 s +DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=304653817357335848159481074334328882217, time:1750768972.101409s req_ids:[8] +DEBUG 06-24 20:42:52 [manager.py:391] +ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:219.18559074401855ms total_cost_time:219.23041343688965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15318 prompt_cache_len:5151 prompt_cache_ratio:0.336271053662358 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 +DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:52 [batch.py:51] router release req id 8 +INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.10921835899353027 s +INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104011535644531 s +DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=286275820780609436128846902014346733486, time:1750768972.3287394s req_ids:[8] +DEBUG 06-24 20:42:52 [manager.py:391] +ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:225.19254684448242ms total_cost_time:225.2368927001953ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15319 prompt_cache_len:5151 prompt_cache_ratio:0.3362491024218291 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 +DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:52 [batch.py:51] router release req id 8 +INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.10818934440612793 s +INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11007094383239746 s +DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=258991911638198069650165832121559093558, time:1750768972.583152s req_ids:[8] +DEBUG 06-24 20:42:52 [manager.py:391] +ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:245.7449436187744ms total_cost_time:245.78857421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15320 prompt_cache_len:5151 prompt_cache_ratio:0.3362271540469974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 +DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:52 [batch.py:51] router release req id 8 +INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.10894489288330078 s +INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s +DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=277590033815439948017843451332998192368, time:1750768972.8383968s req_ids:[8] +DEBUG 06-24 20:42:52 [manager.py:391] +ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:232.99694061279297ms total_cost_time:233.04414749145508ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15321 prompt_cache_len:5151 prompt_cache_ratio:0.33620520853730174 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 +DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:53 [batch.py:51] router release req id 8 +INFO 06-24 20:42:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:42:53 [statics_utils.py:24] mean first cost: 233.08571218308117 ms +INFO 06-24 20:42:53 [statics_utils.py:24] mean per token cost: 0.05703200167086259 ms +INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.3113415241241455 s +INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.31342530250549316 s +DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=92646671727510335327827355820773469934, time:1750768973.26546s req_ids:[8] +DEBUG 06-24 20:42:53 [manager.py:391] +DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:440.8905506134033ms total_cost_time:440.91176986694336ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:15322 prompt_cache_len:5151 prompt_cache_ratio:0.33618326589218117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 +DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:53 [batch.py:51] router release req id 8 +INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.10953950881958008 s +INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.11092185974121094 s +DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=226969634425264763476423843801536342598, time:1750768973.495207s req_ids:[8] +DEBUG 06-24 20:42:53 [manager.py:391] +ERROR 06-24 20:42:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 first_token_cost:216.477632522583ms total_cost_time:216.5207862854004ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15323 prompt_cache_len:5151 prompt_cache_ratio:0.33616132611107485 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 +DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:53 [batch.py:51] router release req id 8 +INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.1086881160736084 s +INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.11022067070007324 s +DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=18638250450885591266255624606402263586, time:1750768973.717723s req_ids:[8] +DEBUG 06-24 20:42:53 [manager.py:391] +ERROR 06-24 20:42:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 first_token_cost:207.84378051757812ms total_cost_time:207.88908004760742ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15324 prompt_cache_len:5151 prompt_cache_ratio:0.33613938919342207 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 +DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:53 [batch.py:51] router release req id 8 +INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.10832977294921875 s +INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.10979795455932617 s +DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=306377985823902198740730944060201942532, time:1750768973.9348261s req_ids:[8] +DEBUG 06-24 20:42:53 [manager.py:391] +ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 first_token_cost:214.09034729003906ms total_cost_time:214.13326263427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15325 prompt_cache_len:5151 prompt_cache_ratio:0.3361174551386623 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 +DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:54 [batch.py:51] router release req id 8 +INFO 06-24 20:42:54 [manager.py:224] router recive req id 8 cost time 0.10876607894897461 s +INFO 06-24 20:42:54 [manager.py:68] detokenization recv req id 8 cost time 0.11004829406738281 s +DEBUG 06-24 20:42:54 [manager.py:391] Prefill Batch: batch_id=16362404637402728455321410143009721430, time:1750768974.1546347s req_ids:[8] +DEBUG 06-24 20:42:54 [manager.py:391] +ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:222.97406196594238ms total_cost_time:223.01888465881348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15326 prompt_cache_len:5151 prompt_cache_ratio:0.33609552394623515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 +DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:54 [batch.py:51] router release req id 8 +INFO 06-24 20:42:54 [manager.py:224] router recive req id 8 cost time 0.1080622673034668 s +INFO 06-24 20:42:54 [manager.py:68] detokenization recv req id 8 cost time 0.10990643501281738 s +DEBUG 06-24 20:42:54 [manager.py:391] Prefill Batch: batch_id=164732157111739476198136279750358592644, time:1750768974.4084578s req_ids:[8] +DEBUG 06-24 20:42:54 [manager.py:391] +ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:445.68586349487305ms total_cost_time:445.7285404205322ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15327 prompt_cache_len:5151 prompt_cache_ratio:0.33607359561558037 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 +DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:54 [batch.py:51] router release req id 8 +INFO 06-24 20:42:54 [manager.py:224] router recive req id 8 cost time 0.10962843894958496 s +INFO 06-24 20:42:54 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s +DEBUG 06-24 20:42:54 [manager.py:391] Prefill Batch: batch_id=326522886053188905775444211210609141097, time:1750768974.836165s req_ids:[8] +DEBUG 06-24 20:42:54 [manager.py:391] +ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:213.6058807373047ms total_cost_time:213.64951133728027ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15328 prompt_cache_len:5151 prompt_cache_ratio:0.3360516701461378 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 +DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:54 [batch.py:51] router release req id 8 +INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.1097555160522461 s +INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11185979843139648 s +DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=144344382519892597221764530356904377539, time:1750768975.056322s req_ids:[8] +DEBUG 06-24 20:42:55 [manager.py:391] +ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:215.55852890014648ms total_cost_time:215.60311317443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15329 prompt_cache_len:5151 prompt_cache_ratio:0.3360297475373475 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 +DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:55 [batch.py:51] router release req id 8 +INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.10883140563964844 s +INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11094093322753906 s +DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=227988676369477360678641043278677978321, time:1750768975.2786465s req_ids:[8] +DEBUG 06-24 20:42:55 [manager.py:391] +ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:219.46263313293457ms total_cost_time:219.50721740722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15330 prompt_cache_len:5151 prompt_cache_ratio:0.3360078277886497 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 +DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:55 [batch.py:51] router release req id 8 +INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.10972189903259277 s +INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11109375953674316 s +DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=282966017163980914720924374168613210294, time:1750768975.50362s req_ids:[8] +DEBUG 06-24 20:42:55 [manager.py:391] +ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:218.20974349975586ms total_cost_time:218.25456619262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15331 prompt_cache_len:5151 prompt_cache_ratio:0.3359859108994847 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 +DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:55 [batch.py:51] router release req id 8 +INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.10836362838745117 s +INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11007022857666016 s +DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=122723376335127200083220617165330103781, time:1750768975.7297702s req_ids:[8] +DEBUG 06-24 20:42:55 [manager.py:391] +ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:347.54300117492676ms total_cost_time:347.58687019348145ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15332 prompt_cache_len:5151 prompt_cache_ratio:0.33596399686929296 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 +DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:55 [batch.py:51] router release req id 8 +INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10858631134033203 s +INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.11065673828125 s +DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=274594342980269237021212139939735502037, time:1750768976.08399s req_ids:[8] +DEBUG 06-24 20:42:56 [manager.py:391] +ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:209.64765548706055ms total_cost_time:209.69319343566895ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15333 prompt_cache_len:5151 prompt_cache_ratio:0.33594208569751516 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 +DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:56 [batch.py:51] router release req id 8 +INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10797238349914551 s +INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.10984611511230469 s +DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=237610410454759499936795613696038081288, time:1750768976.2998629s req_ids:[8] +DEBUG 06-24 20:42:56 [manager.py:391] +ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:173.98357391357422ms total_cost_time:174.0267276763916ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15334 prompt_cache_len:5151 prompt_cache_ratio:0.33592017738359203 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 +DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:56 [batch.py:51] router release req id 8 +INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10795402526855469 s +INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.10968017578125 s +DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=295983761400535653952052349898365469088, time:1750768976.4816022s req_ids:[8] +DEBUG 06-24 20:42:56 [manager.py:391] +ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:181.04147911071777ms total_cost_time:181.08773231506348ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15335 prompt_cache_len:5151 prompt_cache_ratio:0.3358982719269645 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 +DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:56 [batch.py:51] router release req id 8 +INFO 06-24 20:42:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10856056213378906 s +INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.11034536361694336 s +DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=56407298152871248156032788240139055168, time:1750768976.6703231s req_ids:[8] +DEBUG 06-24 20:42:56 [manager.py:391] +ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:206.0222625732422ms total_cost_time:206.07805252075195ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:15336 prompt_cache_len:5151 prompt_cache_ratio:0.33587636932707354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 +DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:56 [batch.py:51] router release req id 8 +INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10825300216674805 s +INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.11021971702575684 s +DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=158740947624756608791884466503603174073, time:1750768976.8820312s req_ids:[8] +DEBUG 06-24 20:42:56 [manager.py:391] +ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:212.88132667541504ms total_cost_time:212.92686462402344ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15337 prompt_cache_len:5151 prompt_cache_ratio:0.3358544695833605 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 +DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:57 [batch.py:51] router release req id 8 +INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s +INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11037993431091309 s +DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=322517486264072634995032119120306214942, time:1750768977.1016862s req_ids:[8] +DEBUG 06-24 20:42:57 [manager.py:391] +ERROR 06-24 20:42:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:379.61459159851074ms total_cost_time:379.65965270996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15338 prompt_cache_len:5151 prompt_cache_ratio:0.3358325726952667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 +DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:57 [batch.py:51] router release req id 8 +INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.11078834533691406 s +INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11257719993591309 s +DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=79567468288218621607874284058498190307, time:1750768977.4884176s req_ids:[8] +DEBUG 06-24 20:42:57 [manager.py:391] +ERROR 06-24 20:42:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 first_token_cost:217.86117553710938ms total_cost_time:217.90599822998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15339 prompt_cache_len:5151 prompt_cache_ratio:0.3358106786622335 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 +DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:57 [batch.py:51] router release req id 8 +INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.10887408256530762 s +INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11094784736633301 s +DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=156001186831924051612221275691133984214, time:1750768977.712095s req_ids:[8] +DEBUG 06-24 20:42:57 [manager.py:391] +ERROR 06-24 20:42:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 first_token_cost:217.9102897644043ms total_cost_time:217.9543972015381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15340 prompt_cache_len:5151 prompt_cache_ratio:0.3357887874837027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 +DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:57 [batch.py:51] router release req id 8 +INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s +INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11090946197509766 s +DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=51495630030534115698473573854292362327, time:1750768977.9371724s req_ids:[8] +DEBUG 06-24 20:42:57 [manager.py:391] +ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 first_token_cost:217.44275093078613ms total_cost_time:217.48948097229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15341 prompt_cache_len:5151 prompt_cache_ratio:0.3357668991591161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 +DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:58 [batch.py:51] router release req id 8 +INFO 06-24 20:42:58 [manager.py:224] router recive req id 8 cost time 0.10848426818847656 s +INFO 06-24 20:42:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052250862121582 s +DEBUG 06-24 20:42:58 [manager.py:391] Prefill Batch: batch_id=212219375294411603175039535825477325631, time:1750768978.1626809s req_ids:[8] +DEBUG 06-24 20:42:58 [manager.py:391] +ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:223.7873077392578ms total_cost_time:223.8328456878662ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15342 prompt_cache_len:5151 prompt_cache_ratio:0.33574501368791554 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 +DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:58 [batch.py:51] router release req id 8 +INFO 06-24 20:42:58 [manager.py:224] router recive req id 8 cost time 0.1091911792755127 s +INFO 06-24 20:42:58 [manager.py:68] detokenization recv req id 8 cost time 0.11118364334106445 s +DEBUG 06-24 20:42:58 [manager.py:391] Prefill Batch: batch_id=155002304727997770478871482088627870759, time:1750768978.395067s req_ids:[8] +DEBUG 06-24 20:42:58 [manager.py:391] +ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:386.35754585266113ms total_cost_time:386.3997459411621ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15343 prompt_cache_len:5151 prompt_cache_ratio:0.3357231310695431 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 +DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:58 [batch.py:51] router release req id 8 +INFO 06-24 20:42:58 [manager.py:224] router recive req id 8 cost time 0.10852479934692383 s +INFO 06-24 20:42:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052203178405762 s +DEBUG 06-24 20:42:58 [manager.py:391] Prefill Batch: batch_id=200070183466653755022877277077968782016, time:1750768978.7852924s req_ids:[8] +DEBUG 06-24 20:42:58 [manager.py:391] +ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:217.12398529052734ms total_cost_time:217.16713905334473ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15344 prompt_cache_len:5151 prompt_cache_ratio:0.3357012513034411 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 +DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:58 [batch.py:51] router release req id 8 +INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.10964345932006836 s +INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11145305633544922 s +DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=283814069062744516629299119373090608740, time:1750768979.0077274s req_ids:[8] +DEBUG 06-24 20:42:59 [manager.py:391] +ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:213.52458000183105ms total_cost_time:213.56987953186035ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15345 prompt_cache_len:5151 prompt_cache_ratio:0.3356793743890518 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 +DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:59 [batch.py:51] router release req id 8 +INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.10906672477722168 s +INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11075878143310547 s +DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=323982062977136394240571413242254904374, time:1750768979.2279358s req_ids:[8] +DEBUG 06-24 20:42:59 [manager.py:391] +ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:212.01133728027344ms total_cost_time:212.05902099609375ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:15346 prompt_cache_len:5151 prompt_cache_ratio:0.3356575003258178 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 +DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:59 [batch.py:51] router release req id 8 +INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.10912823677062988 s +INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11102414131164551 s +DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=271456533260929858801787805114912979618, time:1750768979.447707s req_ids:[8] +DEBUG 06-24 20:42:59 [manager.py:391] +DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:211.08031272888184ms total_cost_time:211.12465858459473ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15347 prompt_cache_len:5151 prompt_cache_ratio:0.33563562911318173 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 +DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:59 [batch.py:51] router release req id 8 +INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.1087043285369873 s +INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11056780815124512 s +DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=153066080821979535270323887584686654548, time:1750768979.6774912s req_ids:[8] +DEBUG 06-24 20:42:59 [manager.py:391] +ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:399.2457389831543ms total_cost_time:399.289608001709ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15348 prompt_cache_len:5151 prompt_cache_ratio:0.3356137607505864 mtp_avg_token_per_step:1.0 +INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 +DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:42:59 [batch.py:51] router release req id 8 +INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10561442375183105 s +INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.10747265815734863 s +DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=270145558130665314116764292355998010863, time:1750768980.072468s req_ids:[8] +DEBUG 06-24 20:43:00 [manager.py:391] +ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:174.12495613098145ms total_cost_time:174.147367477417ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:15349 prompt_cache_len:5151 prompt_cache_ratio:0.33559189523747474 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 +DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:00 [batch.py:51] router release req id 8 +INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10703706741333008 s +INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.10889458656311035 s +DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=9078645035160183809545011812628570115, time:1750768980.2552655s req_ids:[8] +DEBUG 06-24 20:43:00 [manager.py:391] +ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:210.97302436828613ms total_cost_time:211.01784706115723ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15350 prompt_cache_len:5151 prompt_cache_ratio:0.3355700325732899 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 +DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:00 [batch.py:51] router release req id 8 +INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10949969291687012 s +INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.11165308952331543 s +DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=26151391008697655829321699638125795870, time:1750768980.4727352s req_ids:[8] +DEBUG 06-24 20:43:00 [manager.py:391] +ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:219.36917304992676ms total_cost_time:219.41494941711426ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15351 prompt_cache_len:5151 prompt_cache_ratio:0.33554817275747506 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 +DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:00 [batch.py:51] router release req id 8 +INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10872817039489746 s +INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.11069750785827637 s +DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=127727202330861232230604765399495255353, time:1750768980.6983454s req_ids:[8] +DEBUG 06-24 20:43:00 [manager.py:391] +ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:216.5842056274414ms total_cost_time:216.62664413452148ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15352 prompt_cache_len:5151 prompt_cache_ratio:0.3355263157894737 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 +DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:00 [batch.py:51] router release req id 8 +INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10840582847595215 s +INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.1104581356048584 s +DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=258736601547835548276467609077952617779, time:1750768980.91914s req_ids:[8] +DEBUG 06-24 20:43:00 [manager.py:391] +ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:43:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 59981.522 tokens/s +DEBUG 06-24 20:43:01 [stats.py:37] Avg prompt tokens throughput: 59973.699 tokens/s +DEBUG 06-24 20:43:01 [stats.py:37] Avg generate tokens throughput: 7.823 tokens/s +INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:386.446475982666ms total_cost_time:386.4891529083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15353 prompt_cache_len:5151 prompt_cache_ratio:0.3355044616687292 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 +DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:01 [batch.py:51] router release req id 8 +INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.10991120338439941 s +INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.11188650131225586 s +DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=290799042573286687400949406338462368788, time:1750768981.3136764s req_ids:[8] +DEBUG 06-24 20:43:01 [manager.py:391] +ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:217.6980972290039ms total_cost_time:217.7414894104004ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15354 prompt_cache_len:5151 prompt_cache_ratio:0.3354826103946854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 +DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:01 [batch.py:51] router release req id 8 +INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s +INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.1107947826385498 s +DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=158640378962868852408319225048174198547, time:1750768981.540713s req_ids:[8] +DEBUG 06-24 20:43:01 [manager.py:391] +ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:218.29915046691895ms total_cost_time:218.34349632263184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15355 prompt_cache_len:5151 prompt_cache_ratio:0.33546076196678604 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 +DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:01 [batch.py:51] router release req id 8 +INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.10948944091796875 s +INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.11159467697143555 s +DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=81025357482006603200975788762374016352, time:1750768981.7629461s req_ids:[8] +DEBUG 06-24 20:43:01 [manager.py:391] +ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:215.57283401489258ms total_cost_time:215.61527252197266ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15356 prompt_cache_len:5151 prompt_cache_ratio:0.33543891638447515 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 +DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:01 [batch.py:51] router release req id 8 +INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.11089658737182617 s +INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.11288619041442871 s +DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=268268635266716478838450128042643936704, time:1750768981.9836023s req_ids:[8] +DEBUG 06-24 20:43:01 [manager.py:391] +ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:214.54143524169922ms total_cost_time:214.58864212036133ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15357 prompt_cache_len:5151 prompt_cache_ratio:0.3354170736471967 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 +DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:02 [batch.py:51] router release req id 8 +INFO 06-24 20:43:02 [manager.py:224] router recive req id 8 cost time 0.109588623046875 s +INFO 06-24 20:43:02 [manager.py:68] detokenization recv req id 8 cost time 0.11157441139221191 s +DEBUG 06-24 20:43:02 [manager.py:391] Prefill Batch: batch_id=287625837531010990463567237447162271395, time:1750768982.205321s req_ids:[8] +DEBUG 06-24 20:43:02 [manager.py:391] +ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:216.05300903320312ms total_cost_time:216.09807014465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15358 prompt_cache_len:5151 prompt_cache_ratio:0.3353952337543951 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 +DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:02 [batch.py:51] router release req id 8 +INFO 06-24 20:43:02 [manager.py:224] router recive req id 8 cost time 0.11161446571350098 s +INFO 06-24 20:43:02 [manager.py:68] detokenization recv req id 8 cost time 0.1137533187866211 s +DEBUG 06-24 20:43:02 [manager.py:391] Prefill Batch: batch_id=92855271251293218863292472613775499392, time:1750768982.4291716s req_ids:[8] +DEBUG 06-24 20:43:02 [manager.py:391] +ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:390.2320861816406ms total_cost_time:390.2771472930908ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15359 prompt_cache_len:5151 prompt_cache_ratio:0.3353733967055147 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 +DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:02 [batch.py:51] router release req id 8 +INFO 06-24 20:43:02 [manager.py:224] router recive req id 8 cost time 0.10943078994750977 s +INFO 06-24 20:43:02 [manager.py:68] detokenization recv req id 8 cost time 0.11136627197265625 s +DEBUG 06-24 20:43:02 [manager.py:391] Prefill Batch: batch_id=38800973283737081648578372040491696929, time:1750768982.8256137s req_ids:[8] +DEBUG 06-24 20:43:02 [manager.py:391] +ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:215.7583236694336ms total_cost_time:215.8043384552002ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15360 prompt_cache_len:5151 prompt_cache_ratio:0.3353515625 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 +DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:02 [batch.py:51] router release req id 8 +INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10936498641967773 s +INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.1113579273223877 s +DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=177247339212693740176265226471529954408, time:1750768983.047578s req_ids:[8] +DEBUG 06-24 20:43:03 [manager.py:391] +ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:211.4555835723877ms total_cost_time:211.49873733520508ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15361 prompt_cache_len:5151 prompt_cache_ratio:0.33532973113729575 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 +DEBUG 06-24 20:43:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:03 [batch.py:51] router release req id 8 +INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10856294631958008 s +INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.11044812202453613 s +DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=143074062640900478189127587870579759452, time:1750768983.2670083s req_ids:[8] +DEBUG 06-24 20:43:03 [manager.py:391] +ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:212.54682540893555ms total_cost_time:212.59260177612305ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15362 prompt_cache_len:5151 prompt_cache_ratio:0.3353079026168468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 +DEBUG 06-24 20:43:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:03 [batch.py:51] router release req id 8 +INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10832500457763672 s +INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.11034464836120605 s +DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=302818105532362846049912944537836351008, time:1750768983.4855945s req_ids:[8] +DEBUG 06-24 20:43:03 [manager.py:391] +ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:216.07589721679688ms total_cost_time:216.11857414245605ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15363 prompt_cache_len:5151 prompt_cache_ratio:0.33528607693809803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 +DEBUG 06-24 20:43:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:03 [batch.py:51] router release req id 8 +INFO 06-24 20:43:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10929083824157715 s +INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.11132240295410156 s +DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=74308727657514916718725508856092378739, time:1750768983.7086174s req_ids:[8] +DEBUG 06-24 20:43:03 [manager.py:391] +ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:214.54358100891113ms total_cost_time:214.58768844604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15364 prompt_cache_len:5151 prompt_cache_ratio:0.33526425410049465 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 +DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:04 [batch.py:51] router release req id 8 +INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.3108022212982178 s +INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.31284117698669434 s +DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=63064194624646275818112140183214060079, time:1750768984.137029s req_ids:[8] +DEBUG 06-24 20:43:04 [manager.py:391] +ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:428.3902645111084ms total_cost_time:428.4350872039795ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15365 prompt_cache_len:5151 prompt_cache_ratio:0.3352424341034819 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 +DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:04 [batch.py:51] router release req id 8 +INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.10988879203796387 s +INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.11189603805541992 s +DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=311389230740309513958957683030451969073, time:1750768984.3652768s req_ids:[8] +DEBUG 06-24 20:43:04 [manager.py:391] +ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:211.7774486541748ms total_cost_time:211.81988716125488ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15366 prompt_cache_len:5151 prompt_cache_ratio:0.3352206169465053 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 +DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:04 [batch.py:51] router release req id 8 +INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.10894012451171875 s +INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.11089324951171875 s +DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=172458119560786033644438559276628151925, time:1750768984.5838003s req_ids:[8] +DEBUG 06-24 20:43:04 [manager.py:391] +ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:216.31336212158203ms total_cost_time:216.3560390472412ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15367 prompt_cache_len:5151 prompt_cache_ratio:0.3351988026290102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 +DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:04 [batch.py:51] router release req id 8 +INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.10941243171691895 s +INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.11143898963928223 s +DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=252111704490846481959061713900815059261, time:1750768984.806749s req_ids:[8] +DEBUG 06-24 20:43:04 [manager.py:391] +ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:216.92419052124023ms total_cost_time:216.96853637695312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15368 prompt_cache_len:5151 prompt_cache_ratio:0.33517699115044247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 +DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:04 [batch.py:51] router release req id 8 +INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10915112495422363 s +INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.11105155944824219 s +DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=229573794079567529141691555750646106229, time:1750768985.0310078s req_ids:[8] +DEBUG 06-24 20:43:05 [manager.py:391] +ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:216.85504913330078ms total_cost_time:216.89748764038086ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15369 prompt_cache_len:5151 prompt_cache_ratio:0.3351551825102479 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 +DEBUG 06-24 20:43:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:05 [batch.py:51] router release req id 8 +INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10850024223327637 s +INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.10983395576477051 s +DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=53414658934787146395039420875538988430, time:1750768985.2553716s req_ids:[8] +DEBUG 06-24 20:43:05 [manager.py:391] +ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:387.6934051513672ms total_cost_time:387.7372741699219ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15370 prompt_cache_len:5151 prompt_cache_ratio:0.33513337670787247 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 +DEBUG 06-24 20:43:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:05 [batch.py:51] router release req id 8 +INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10744404792785645 s +INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.1085209846496582 s +DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=325242214310208396462809028867472014592, time:1750768985.648186s req_ids:[8] +DEBUG 06-24 20:43:05 [manager.py:391] +ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:213.75560760498047ms total_cost_time:213.80066871643066ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15371 prompt_cache_len:5151 prompt_cache_ratio:0.33511157374276235 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 +DEBUG 06-24 20:43:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:05 [batch.py:51] router release req id 8 +INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10821747779846191 s +INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.1102144718170166 s +DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=191902520460076828967114699486866690553, time:1750768985.8690102s req_ids:[8] +DEBUG 06-24 20:43:05 [manager.py:391] +ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:218.87826919555664ms total_cost_time:218.92023086547852ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15372 prompt_cache_len:5151 prompt_cache_ratio:0.33508977361436376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 +DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:06 [batch.py:51] router release req id 8 +INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.10853958129882812 s +INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s +DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=130298085718475251897893912261769743178, time:1750768986.0944624s req_ids:[8] +DEBUG 06-24 20:43:06 [manager.py:391] +ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:213.54961395263672ms total_cost_time:213.5944366455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15373 prompt_cache_len:5151 prompt_cache_ratio:0.3350679763221232 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 +DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:06 [batch.py:51] router release req id 8 +INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.1080322265625 s +INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.1095123291015625 s +DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=284784535097765960780685482871099146251, time:1750768986.3134863s req_ids:[8] +DEBUG 06-24 20:43:06 [manager.py:391] +ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:175.3544807434082ms total_cost_time:175.3995418548584ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15374 prompt_cache_len:5151 prompt_cache_ratio:0.3350461818654872 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 +DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:06 [batch.py:51] router release req id 8 +INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.10727405548095703 s +INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.10831141471862793 s +DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=203465573709955617309034880723087023812, time:1750768986.4958093s req_ids:[8] +DEBUG 06-24 20:43:06 [manager.py:391] +ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:381.0992240905762ms total_cost_time:381.1452388763428ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15375 prompt_cache_len:5151 prompt_cache_ratio:0.33502439024390246 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 +DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:06 [batch.py:51] router release req id 8 +INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.1085050106048584 s +INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.10973644256591797 s +DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=275363071337174741334805848211209491364, time:1750768986.8826015s req_ids:[8] +DEBUG 06-24 20:43:06 [manager.py:391] +ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:216.28427505493164ms total_cost_time:216.32933616638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15376 prompt_cache_len:5151 prompt_cache_ratio:0.3350026014568158 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 +DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:07 [batch.py:51] router release req id 8 +INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.11011767387390137 s +INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.11158275604248047 s +DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=28748341201241374132325111932275566416, time:1750768987.1066349s req_ids:[8] +DEBUG 06-24 20:43:07 [manager.py:391] +ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:216.2301540374756ms total_cost_time:216.25757217407227ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:15377 prompt_cache_len:5151 prompt_cache_ratio:0.3349808155036743 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 +DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:07 [batch.py:51] router release req id 8 +INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.10867762565612793 s +INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.10971355438232422 s +DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=27307337222922660929956799907225599158, time:1750768987.329331s req_ids:[8] +DEBUG 06-24 20:43:07 [manager.py:391] +DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:218.9939022064209ms total_cost_time:219.03157234191895ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:15378 prompt_cache_len:5151 prompt_cache_ratio:0.3349590323839251 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 +DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:07 [batch.py:51] router release req id 8 +INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.10798525810241699 s +INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.10971403121948242 s +DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=268847276328336262217239237926366796205, time:1750768987.5548983s req_ids:[8] +DEBUG 06-24 20:43:07 [manager.py:391] +ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:221.94337844848633ms total_cost_time:221.98772430419922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15379 prompt_cache_len:5151 prompt_cache_ratio:0.3349372520970154 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 +DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:07 [batch.py:51] router release req id 8 +INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.10790586471557617 s +INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.10968899726867676 s +DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=115752829122166113927631094203261456617, time:1750768987.7880783s req_ids:[8] +DEBUG 06-24 20:43:07 [manager.py:391] +ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:183.96997451782227ms total_cost_time:184.01241302490234ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15380 prompt_cache_len:5151 prompt_cache_ratio:0.33491547464239274 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 +DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:07 [batch.py:51] router release req id 8 +INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.1085822582244873 s +INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.11048269271850586 s +DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=179713555479128556894913035793076482, time:1750768987.972466s req_ids:[8] +DEBUG 06-24 20:43:07 [manager.py:391] +ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:373.6555576324463ms total_cost_time:373.7020492553711ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15381 prompt_cache_len:5151 prompt_cache_ratio:0.3348937000195046 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 +DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:08 [batch.py:51] router release req id 8 +INFO 06-24 20:43:08 [manager.py:224] router recive req id 8 cost time 0.10820245742797852 s +INFO 06-24 20:43:08 [manager.py:68] detokenization recv req id 8 cost time 0.11017012596130371 s +DEBUG 06-24 20:43:08 [manager.py:391] Prefill Batch: batch_id=222199049591143076391014291186387884592, time:1750768988.3543856s req_ids:[8] +DEBUG 06-24 20:43:08 [manager.py:391] +ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:222.46980667114258ms total_cost_time:222.51296043395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15382 prompt_cache_len:5151 prompt_cache_ratio:0.3348719282277987 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 +DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:08 [batch.py:51] router release req id 8 +INFO 06-24 20:43:08 [manager.py:224] router recive req id 8 cost time 0.10981488227844238 s +INFO 06-24 20:43:08 [manager.py:68] detokenization recv req id 8 cost time 0.11178183555603027 s +DEBUG 06-24 20:43:08 [manager.py:391] Prefill Batch: batch_id=103545135952820151947916647480866047324, time:1750768988.5826573s req_ids:[8] +DEBUG 06-24 20:43:08 [manager.py:391] +ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:216.74752235412598ms total_cost_time:216.81451797485352ms,out_token_counter:1 mean_per_token_cost_time: 0.06699562072753906ms prompt_token_num:15383 prompt_cache_len:5151 prompt_cache_ratio:0.334850159266723 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 +DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:08 [batch.py:51] router release req id 8 +INFO 06-24 20:43:08 [manager.py:224] router recive req id 8 cost time 0.1080472469329834 s +INFO 06-24 20:43:08 [manager.py:68] detokenization recv req id 8 cost time 0.10985088348388672 s +DEBUG 06-24 20:43:08 [manager.py:391] Prefill Batch: batch_id=180511660459799740200696957219688452439, time:1750768988.8052795s req_ids:[8] +DEBUG 06-24 20:43:08 [manager.py:391] +ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:213.9143943786621ms total_cost_time:213.95587921142578ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15384 prompt_cache_len:5151 prompt_cache_ratio:0.33482839313572543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 +DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:08 [batch.py:51] router release req id 8 +INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.1089012622833252 s +INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.11016225814819336 s +DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=5678973956362070982095814165889962956, time:1750768989.026877s req_ids:[8] +DEBUG 06-24 20:43:09 [manager.py:391] +ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:220.50833702087402ms total_cost_time:220.5519676208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15385 prompt_cache_len:5151 prompt_cache_ratio:0.33480662983425413 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 +DEBUG 06-24 20:43:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:09 [batch.py:51] router release req id 8 +INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s +INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.10934162139892578 s +DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=306702398674857164511381156890790356101, time:1750768989.2533498s req_ids:[8] +DEBUG 06-24 20:43:09 [manager.py:391] +ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:389.7264003753662ms total_cost_time:389.7716999053955ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15386 prompt_cache_len:5151 prompt_cache_ratio:0.33478486936175744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 +DEBUG 06-24 20:43:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:09 [batch.py:51] router release req id 8 +INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.10943794250488281 s +INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.11133217811584473 s +DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=132337494665832374764081061621139557188, time:1750768989.6514843s req_ids:[8] +DEBUG 06-24 20:43:09 [manager.py:391] +ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:218.25337409973145ms total_cost_time:218.29938888549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15387 prompt_cache_len:5151 prompt_cache_ratio:0.33476311171768375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 +DEBUG 06-24 20:43:09 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:09 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:09 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:09 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:09 [batch.py:51] router release req id 8 +INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.10807394981384277 s +INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.11014723777770996 s +DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=129152334527789253160810628574957973227, time:1750768989.873794s req_ids:[8] +DEBUG 06-24 20:43:09 [manager.py:391] +ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:216.21990203857422ms total_cost_time:216.2625789642334ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15388 prompt_cache_len:5151 prompt_cache_ratio:0.33474135690148166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 +DEBUG 06-24 20:43:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:10 [batch.py:51] router release req id 8 +INFO 06-24 20:43:10 [manager.py:224] router recive req id 8 cost time 0.10935521125793457 s +INFO 06-24 20:43:10 [manager.py:68] detokenization recv req id 8 cost time 0.11126232147216797 s +DEBUG 06-24 20:43:10 [manager.py:391] Prefill Batch: batch_id=301707460772738326108228294881170992639, time:1750768990.0978022s req_ids:[8] +DEBUG 06-24 20:43:10 [manager.py:391] +ERROR 06-24 20:43:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:226.64141654968262ms total_cost_time:226.6843318939209ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15389 prompt_cache_len:5151 prompt_cache_ratio:0.3347196049125999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:10 [manager.py:106] timer detokenize batch cost time 317.1837329864502 ms +INFO 06-24 20:43:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 +DEBUG 06-24 20:43:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:10 [batch.py:51] router release req id 8 +INFO 06-24 20:43:10 [manager.py:224] router recive req id 8 cost time 0.10874342918395996 s +INFO 06-24 20:43:10 [manager.py:68] detokenization recv req id 8 cost time 0.11060690879821777 s +DEBUG 06-24 20:43:10 [manager.py:391] Prefill Batch: batch_id=81300660259002508943704420534911316703, time:1750768990.6677232s req_ids:[8] +DEBUG 06-24 20:43:10 [manager.py:391] +ERROR 06-24 20:43:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:10 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 first_token_cost:257.68017768859863ms total_cost_time:257.7247619628906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15390 prompt_cache_len:5151 prompt_cache_ratio:0.3346978557504873 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 +DEBUG 06-24 20:43:10 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:10 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:10 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:10 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:10 [batch.py:51] router release req id 8 +INFO 06-24 20:43:10 [manager.py:224] router recive req id 8 cost time 0.10962891578674316 s +INFO 06-24 20:43:10 [manager.py:68] detokenization recv req id 8 cost time 0.11192893981933594 s +DEBUG 06-24 20:43:10 [manager.py:391] Prefill Batch: batch_id=180103438408433234449581578360276301228, time:1750768990.917566s req_ids:[8] +DEBUG 06-24 20:43:10 [manager.py:391] +ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:43:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 58406.070 tokens/s +DEBUG 06-24 20:43:11 [stats.py:37] Avg prompt tokens throughput: 58398.472 tokens/s +DEBUG 06-24 20:43:11 [stats.py:37] Avg generate tokens throughput: 7.598 tokens/s +INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 first_token_cost:398.74958992004395ms total_cost_time:398.79560470581055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15391 prompt_cache_len:5151 prompt_cache_ratio:0.33467610941459297 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 +DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:11 [batch.py:51] router release req id 8 +INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10924124717712402 s +INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.1113588809967041 s +DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=85566923316330572428938097048648464572, time:1750768991.3164673s req_ids:[8] +DEBUG 06-24 20:43:11 [manager.py:391] +ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:211.38334274291992ms total_cost_time:211.4262580871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15392 prompt_cache_len:5151 prompt_cache_ratio:0.3346543659043659 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 +DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:11 [batch.py:51] router release req id 8 +INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10875225067138672 s +INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.1106879711151123 s +DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=313972188896506657273327691386561294965, time:1750768991.535128s req_ids:[8] +DEBUG 06-24 20:43:11 [manager.py:391] +ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:213.61398696899414ms total_cost_time:213.65714073181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15393 prompt_cache_len:5151 prompt_cache_ratio:0.3346326252192555 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 +DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:11 [batch.py:51] router release req id 8 +INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10947370529174805 s +INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.1113595962524414 s +DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=290822336709275417918323137163358897869, time:1750768991.7550218s req_ids:[8] +DEBUG 06-24 20:43:11 [manager.py:391] +ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:212.81194686889648ms total_cost_time:212.8584384918213ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15394 prompt_cache_len:5151 prompt_cache_ratio:0.3346108873587112 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 +DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:11 [batch.py:51] router release req id 8 +INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10914349555969238 s +INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.11104607582092285 s +DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=213481428035240435327713353303699695161, time:1750768991.974523s req_ids:[8] +DEBUG 06-24 20:43:11 [manager.py:391] +ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:213.1814956665039ms total_cost_time:213.2244110107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15395 prompt_cache_len:5151 prompt_cache_ratio:0.3345891523221825 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 +DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:12 [batch.py:51] router release req id 8 +INFO 06-24 20:43:12 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s +INFO 06-24 20:43:12 [manager.py:68] detokenization recv req id 8 cost time 0.11034584045410156 s +DEBUG 06-24 20:43:12 [manager.py:391] Prefill Batch: batch_id=96381301246285436145112724650056725543, time:1750768992.1939557s req_ids:[8] +DEBUG 06-24 20:43:12 [manager.py:391] +ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:384.7815990447998ms total_cost_time:384.8264217376709ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15396 prompt_cache_len:5151 prompt_cache_ratio:0.3345674201091193 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 +DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:12 [batch.py:51] router release req id 8 +INFO 06-24 20:43:12 [manager.py:224] router recive req id 8 cost time 0.10871767997741699 s +INFO 06-24 20:43:12 [manager.py:68] detokenization recv req id 8 cost time 0.11065840721130371 s +DEBUG 06-24 20:43:12 [manager.py:391] Prefill Batch: batch_id=57806719697594403274272096543536383004, time:1750768992.5862477s req_ids:[8] +DEBUG 06-24 20:43:12 [manager.py:391] +ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:211.87186241149902ms total_cost_time:211.91692352294922ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15397 prompt_cache_len:5151 prompt_cache_ratio:0.3345456907189712 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 +DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:12 [batch.py:51] router release req id 8 +INFO 06-24 20:43:12 [manager.py:224] router recive req id 8 cost time 0.10848116874694824 s +INFO 06-24 20:43:12 [manager.py:68] detokenization recv req id 8 cost time 0.11076831817626953 s +DEBUG 06-24 20:43:12 [manager.py:391] Prefill Batch: batch_id=219660776213359549977653419765497230576, time:1750768992.803929s req_ids:[8] +DEBUG 06-24 20:43:12 [manager.py:391] +ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:216.2792682647705ms total_cost_time:216.3236141204834ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15398 prompt_cache_len:5151 prompt_cache_ratio:0.33452396415118846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 +DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:12 [batch.py:51] router release req id 8 +INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10858869552612305 s +INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11043334007263184 s +DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=124706528384044252875859282675346320134, time:1750768993.0296085s req_ids:[8] +DEBUG 06-24 20:43:13 [manager.py:391] +ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:218.68252754211426ms total_cost_time:218.72901916503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15399 prompt_cache_len:5151 prompt_cache_ratio:0.33450224040522114 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 +DEBUG 06-24 20:43:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:13 [batch.py:51] router release req id 8 +INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10816168785095215 s +INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11008620262145996 s +DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=232024296140760365638428958816223515262, time:1750768993.2540321s req_ids:[8] +DEBUG 06-24 20:43:13 [manager.py:391] +ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:217.1928882598877ms total_cost_time:217.2374725341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15400 prompt_cache_len:5151 prompt_cache_ratio:0.3344805194805195 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 +DEBUG 06-24 20:43:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:13 [batch.py:51] router release req id 8 +INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10905265808105469 s +INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11109161376953125 s +DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=58136029423578194316573177133878374348, time:1750768993.4777226s req_ids:[8] +DEBUG 06-24 20:43:13 [manager.py:391] +ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:217.01979637145996ms total_cost_time:217.06366539001465ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15401 prompt_cache_len:5151 prompt_cache_ratio:0.334458801376534 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 +DEBUG 06-24 20:43:13 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:13 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:13 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:13 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:13 [batch.py:51] router release req id 8 +INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10912156105041504 s +INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11156368255615234 s +DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=239367475946870249514322205844673974904, time:1750768993.700736s req_ids:[8] +DEBUG 06-24 20:43:13 [manager.py:391] +ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:388.5383605957031ms total_cost_time:388.582706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15402 prompt_cache_len:5151 prompt_cache_ratio:0.3344370860927152 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 +DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:14 [batch.py:51] router release req id 8 +INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10821652412414551 s +INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11035895347595215 s +DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=151164019357857465822206331908738888446, time:1750768994.0968673s req_ids:[8] +DEBUG 06-24 20:43:14 [manager.py:391] +DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:212.2197151184082ms total_cost_time:212.2633457183838ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15403 prompt_cache_len:5151 prompt_cache_ratio:0.3344153736285139 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 +DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:14 [batch.py:51] router release req id 8 +INFO 06-24 20:43:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10928821563720703 s +INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11142516136169434 s +DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=331395454763253572283641362036003778945, time:1750768994.316298s req_ids:[8] +DEBUG 06-24 20:43:14 [manager.py:391] +ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:213.24396133422852ms total_cost_time:213.2883071899414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15404 prompt_cache_len:5151 prompt_cache_ratio:0.33439366398338094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 +DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:14 [batch.py:51] router release req id 8 +INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10882997512817383 s +INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11083388328552246 s +DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=124013980840683801576894085890826652153, time:1750768994.5370975s req_ids:[8] +DEBUG 06-24 20:43:14 [manager.py:391] +ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:213.75608444213867ms total_cost_time:213.79947662353516ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15405 prompt_cache_len:5151 prompt_cache_ratio:0.33437195715676726 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 +DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:14 [batch.py:51] router release req id 8 +INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10836458206176758 s +INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11024045944213867 s +DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=302877489830753757722032464645096476937, time:1750768994.7566955s req_ids:[8] +DEBUG 06-24 20:43:14 [manager.py:391] +ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:217.756986618042ms total_cost_time:217.80085563659668ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15406 prompt_cache_len:5151 prompt_cache_ratio:0.3343502531481241 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 +DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:14 [batch.py:51] router release req id 8 +INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10835695266723633 s +INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11048412322998047 s +DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=24581509388758548075957499746046130690, time:1750768994.9801626s req_ids:[8] +DEBUG 06-24 20:43:14 [manager.py:391] +ERROR 06-24 20:43:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:216.02725982666016ms total_cost_time:216.08662605285645ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15407 prompt_cache_len:5151 prompt_cache_ratio:0.3343285519569027 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 +DEBUG 06-24 20:43:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:15 [batch.py:51] router release req id 8 +INFO 06-24 20:43:15 [manager.py:224] router recive req id 8 cost time 0.3141491413116455 s +INFO 06-24 20:43:15 [manager.py:68] detokenization recv req id 8 cost time 0.31620049476623535 s +DEBUG 06-24 20:43:15 [manager.py:391] Prefill Batch: batch_id=45656487084254762653541826288689922393, time:1750768995.4146621s req_ids:[8] +DEBUG 06-24 20:43:15 [manager.py:391] +ERROR 06-24 20:43:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:15 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 first_token_cost:436.81812286376953ms total_cost_time:436.8605613708496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15408 prompt_cache_len:5151 prompt_cache_ratio:0.3343068535825545 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 +DEBUG 06-24 20:43:15 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:15 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:15 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:15 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:15 [batch.py:51] router release req id 8 +INFO 06-24 20:43:15 [manager.py:224] router recive req id 8 cost time 0.11034822463989258 s +INFO 06-24 20:43:15 [manager.py:68] detokenization recv req id 8 cost time 0.4668753147125244 s +DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=179764857156876102112194523042355233840, time:1750768996.0324202s req_ids:[8] +DEBUG 06-24 20:43:16 [manager.py:391] +ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 first_token_cost:614.6852970123291ms total_cost_time:614.7286891937256ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15409 prompt_cache_len:5151 prompt_cache_ratio:0.3342851580245311 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 +DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:16 [batch.py:51] router release req id 8 +INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10853028297424316 s +INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.11043548583984375 s +DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=312962332055338874345956745086123473610, time:1750768996.2694905s req_ids:[8] +DEBUG 06-24 20:43:16 [manager.py:391] +ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:220.7329273223877ms total_cost_time:220.77584266662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15410 prompt_cache_len:5151 prompt_cache_ratio:0.33426346528228423 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 +DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:16 [batch.py:51] router release req id 8 +INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10837054252624512 s +INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.10977959632873535 s +DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=184278810902614614469661160053231905718, time:1750768996.4954352s req_ids:[8] +DEBUG 06-24 20:43:16 [manager.py:391] +ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:216.17555618286133ms total_cost_time:216.2189483642578ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15411 prompt_cache_len:5151 prompt_cache_ratio:0.3342417753552657 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 +DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:16 [batch.py:51] router release req id 8 +INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10907387733459473 s +INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.11047863960266113 s +DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=127334781582033291877612998857628704528, time:1750768996.730037s req_ids:[8] +DEBUG 06-24 20:43:16 [manager.py:391] +ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:230.37409782409668ms total_cost_time:230.41820526123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15412 prompt_cache_len:5151 prompt_cache_ratio:0.3342200882429276 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 +DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:16 [batch.py:51] router release req id 8 +INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10807085037231445 s +INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.1100459098815918 s +DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=206383878400547312186242012635412106189, time:1750768996.9565449s req_ids:[8] +DEBUG 06-24 20:43:16 [manager.py:391] +INFO 06-24 20:43:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:384.3717575073242ms total_cost_time:384.4037055969238ms,out_token_counter:1 mean_per_token_cost_time: 0.031948089599609375ms prompt_token_num:15413 prompt_cache_len:5151 prompt_cache_ratio:0.334198403944722 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 +DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:17 [batch.py:51] router release req id 8 +INFO 06-24 20:43:17 [manager.py:224] router recive req id 8 cost time 0.10870766639709473 s +INFO 06-24 20:43:17 [manager.py:68] detokenization recv req id 8 cost time 0.11110234260559082 s +DEBUG 06-24 20:43:17 [manager.py:391] Prefill Batch: batch_id=125220546088489368585121117453757955615, time:1750768997.3469355s req_ids:[8] +DEBUG 06-24 20:43:17 [manager.py:391] +ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:215.54183959960938ms total_cost_time:215.58451652526855ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15414 prompt_cache_len:5151 prompt_cache_ratio:0.3341767224601012 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 +DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:17 [batch.py:51] router release req id 8 +INFO 06-24 20:43:17 [manager.py:224] router recive req id 8 cost time 0.10894107818603516 s +INFO 06-24 20:43:17 [manager.py:68] detokenization recv req id 8 cost time 0.11038947105407715 s +DEBUG 06-24 20:43:17 [manager.py:391] Prefill Batch: batch_id=10271938347883595421600931731739267664, time:1750768997.569174s req_ids:[8] +DEBUG 06-24 20:43:17 [manager.py:391] +DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:217.9241180419922ms total_cost_time:217.98467636108398ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:15415 prompt_cache_len:5151 prompt_cache_ratio:0.3341550437885177 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 +DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:17 [batch.py:51] router release req id 8 +INFO 06-24 20:43:17 [manager.py:224] router recive req id 8 cost time 0.1117558479309082 s +INFO 06-24 20:43:17 [manager.py:68] detokenization recv req id 8 cost time 0.11353754997253418 s +DEBUG 06-24 20:43:17 [manager.py:391] Prefill Batch: batch_id=299861424305003713188570558129473653595, time:1750768997.7940192s req_ids:[8] +DEBUG 06-24 20:43:17 [manager.py:391] +ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:216.65525436401367ms total_cost_time:216.70007705688477ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15416 prompt_cache_len:5151 prompt_cache_ratio:0.334133367929424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 +DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:17 [batch.py:51] router release req id 8 +INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10855364799499512 s +INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11046457290649414 s +DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=129765180899556088813968405685969006848, time:1750768998.0230882s req_ids:[8] +DEBUG 06-24 20:43:18 [manager.py:391] +ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:225.85725784301758ms total_cost_time:225.90255737304688ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15417 prompt_cache_len:5151 prompt_cache_ratio:0.3341116948822728 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 +DEBUG 06-24 20:43:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:18 [batch.py:51] router release req id 8 +INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10993432998657227 s +INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11182641983032227 s +DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=189023724908253657618418827555334444259, time:1750768998.2482333s req_ids:[8] +DEBUG 06-24 20:43:18 [manager.py:391] +ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:387.33863830566406ms total_cost_time:387.38131523132324ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15418 prompt_cache_len:5151 prompt_cache_ratio:0.33409002464651705 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 +DEBUG 06-24 20:43:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:18 [batch.py:51] router release req id 8 +INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10862183570861816 s +INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11064600944519043 s +DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=325010068512206289651033121211828969125, time:1750768998.6425078s req_ids:[8] +DEBUG 06-24 20:43:18 [manager.py:391] +ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:216.26520156860352ms total_cost_time:216.30859375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15419 prompt_cache_len:5151 prompt_cache_ratio:0.3340683572216097 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 +DEBUG 06-24 20:43:18 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:18 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:18 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:18 [batch.py:51] router release req id 8 +INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10863685607910156 s +INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s +DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=260660830272828802047573391917372451747, time:1750768998.8690102s req_ids:[8] +DEBUG 06-24 20:43:18 [manager.py:391] +ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:218.65081787109375ms total_cost_time:218.69373321533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15420 prompt_cache_len:5151 prompt_cache_ratio:0.33404669260700387 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 +DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:19 [batch.py:51] router release req id 8 +INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10846996307373047 s +INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.1105659008026123 s +DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=271177456271730926536685791785396149031, time:1750768999.0920513s req_ids:[8] +DEBUG 06-24 20:43:19 [manager.py:391] +ERROR 06-24 20:43:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:214.9369716644287ms total_cost_time:214.9813175201416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15421 prompt_cache_len:5151 prompt_cache_ratio:0.3340250308021529 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 +DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:19 [batch.py:51] router release req id 8 +INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10835599899291992 s +INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s +DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=251787789265713170359376828944363800646, time:1750768999.3146327s req_ids:[8] +DEBUG 06-24 20:43:19 [manager.py:391] +ERROR 06-24 20:43:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 first_token_cost:259.42230224609375ms total_cost_time:259.46640968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15422 prompt_cache_len:5151 prompt_cache_ratio:0.3340033718065102 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 +DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:19 [batch.py:51] router release req id 8 +INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10930490493774414 s +INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.11123132705688477 s +DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=280130801048184147228352792616082019649, time:1750768999.6012118s req_ids:[8] +DEBUG 06-24 20:43:19 [manager.py:391] +ERROR 06-24 20:43:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:19 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 first_token_cost:221.84133529663086ms total_cost_time:221.89807891845703ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:15423 prompt_cache_len:5151 prompt_cache_ratio:0.3339817156195293 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 +DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:19 [batch.py:51] router release req id 8 +INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10887408256530762 s +INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.1102292537689209 s +DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=236997033003466197485780446928212902286, time:1750768999.826101s req_ids:[8] +DEBUG 06-24 20:43:19 [manager.py:391] +ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 first_token_cost:344.8307514190674ms total_cost_time:344.8760509490967ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15424 prompt_cache_len:5151 prompt_cache_ratio:0.3339600622406639 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 +DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:20 [batch.py:51] router release req id 8 +INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.10805130004882812 s +INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11015486717224121 s +DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=57309986884507969375818372613517005954, time:1750769000.178851s req_ids:[8] +DEBUG 06-24 20:43:20 [manager.py:391] +ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:213.73748779296875ms total_cost_time:213.78302574157715ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15425 prompt_cache_len:5151 prompt_cache_ratio:0.3339384116693679 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 +DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:20 [batch.py:51] router release req id 8 +INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.10901594161987305 s +INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s +DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=120139984236611335976036335208686722033, time:1750769000.40728s req_ids:[8] +DEBUG 06-24 20:43:20 [manager.py:391] +ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:231.99105262756348ms total_cost_time:232.15413093566895ms,out_token_counter:1 mean_per_token_cost_time: 0.16307830810546875ms prompt_token_num:15426 prompt_cache_len:5151 prompt_cache_ratio:0.3339167639050953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 +DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:20 [batch.py:51] router release req id 8 +INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.11100220680236816 s +INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11310529708862305 s +DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=178239456482495432555398965045462208962, time:1750769000.6715631s req_ids:[8] +DEBUG 06-24 20:43:20 [manager.py:391] +ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:255.23805618286133ms total_cost_time:255.28311729431152ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15427 prompt_cache_len:5151 prompt_cache_ratio:0.3338951189473002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 +DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:20 [batch.py:51] router release req id 8 +INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.11085247993469238 s +INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11293339729309082 s +DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=241827451097481701816494216152067596274, time:1750769000.9060152s req_ids:[8] +DEBUG 06-24 20:43:20 [manager.py:391] +ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:214.76197242736816ms total_cost_time:214.80441093444824ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15428 prompt_cache_len:5151 prompt_cache_ratio:0.3338734767954369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 +DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:21 [batch.py:51] router release req id 8 +INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.10837912559509277 s +INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.11051201820373535 s +DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=267478173731040531160448048982231957021, time:1750769001.1244347s req_ids:[8] +DEBUG 06-24 20:43:21 [manager.py:391] +ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:43:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 57399.840 tokens/s +DEBUG 06-24 20:43:21 [stats.py:37] Avg prompt tokens throughput: 57392.392 tokens/s +DEBUG 06-24 20:43:21 [stats.py:37] Avg generate tokens throughput: 7.448 tokens/s +INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:389.1007900238037ms total_cost_time:389.14942741394043ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:15429 prompt_cache_len:5151 prompt_cache_ratio:0.33385183744895974 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 +DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:21 [batch.py:51] router release req id 8 +INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.1093900203704834 s +INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.11147904396057129 s +DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=157527296228540410486096363381835443000, time:1750769001.5206366s req_ids:[8] +DEBUG 06-24 20:43:21 [manager.py:391] +ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:218.97482872009277ms total_cost_time:219.03038024902344ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:15430 prompt_cache_len:5151 prompt_cache_ratio:0.3338302009073234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 +DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:21 [batch.py:51] router release req id 8 +INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.11124801635742188 s +DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=338832660266854091497349862000465714120, time:1750769001.7454457s req_ids:[8] +DEBUG 06-24 20:43:21 [manager.py:391] +INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.11341428756713867 s +ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:210.35408973693848ms total_cost_time:210.39700508117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15431 prompt_cache_len:5151 prompt_cache_ratio:0.33380856716998253 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 +DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:21 [batch.py:51] router release req id 8 +INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.10828351974487305 s +INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.1099236011505127 s +DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=8811457152464451264968692518318777534, time:1750769001.9638553s req_ids:[8] +DEBUG 06-24 20:43:21 [manager.py:391] +ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:217.67520904541016ms total_cost_time:217.71883964538574ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15432 prompt_cache_len:5151 prompt_cache_ratio:0.33378693623639194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 +DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:22 [batch.py:51] router release req id 8 +INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.10793471336364746 s +INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.10970568656921387 s +DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=53760691995464876594379747515243920548, time:1750769002.188636s req_ids:[8] +DEBUG 06-24 20:43:22 [manager.py:391] +ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:174.33476448059082ms total_cost_time:174.3781566619873ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15433 prompt_cache_len:5151 prompt_cache_ratio:0.33376530810600663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 +DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:22 [batch.py:51] router release req id 8 +INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.10853791236877441 s +INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.1109762191772461 s +DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=38032441872155846565838070902272483626, time:1750769002.3701382s req_ids:[8] +DEBUG 06-24 20:43:22 [manager.py:391] +ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:344.6993827819824ms total_cost_time:344.7437286376953ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15434 prompt_cache_len:5151 prompt_cache_ratio:0.3337436827782817 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 +DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:22 [batch.py:51] router release req id 8 +INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.11163687705993652 s +DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=262153972283909031013284660982623243805, time:1750769002.7198937s req_ids:[8] +DEBUG 06-24 20:43:22 [manager.py:391] +INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.11339735984802246 s +ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:172.82676696777344ms total_cost_time:172.87015914916992ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15435 prompt_cache_len:5151 prompt_cache_ratio:0.3337220602526725 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 +DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:22 [batch.py:51] router release req id 8 +INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.11005949974060059 s +INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.1120445728302002 s +DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=270058030800655076917920579116954086705, time:1750769002.9012778s req_ids:[8] +DEBUG 06-24 20:43:22 [manager.py:391] +ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:205.0013542175293ms total_cost_time:205.0468921661377ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15436 prompt_cache_len:5151 prompt_cache_ratio:0.33370044052863435 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 +DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:23 [batch.py:51] router release req id 8 +INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10875415802001953 s +INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.1107327938079834 s +DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=65619476569552218348962656888527820624, time:1750769003.1157167s req_ids:[8] +DEBUG 06-24 20:43:23 [manager.py:391] +ERROR 06-24 20:43:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:214.39886093139648ms total_cost_time:214.44106101989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15437 prompt_cache_len:5151 prompt_cache_ratio:0.33367882360562284 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:43:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 +INFO 06-24 20:43:23 [statics_utils.py:24] mean first cost: 233.27891738854936 ms +INFO 06-24 20:43:23 [statics_utils.py:24] mean per token cost: 0.05690889423068175 ms +DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:23 [batch.py:51] router release req id 8 +INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10909080505371094 s +INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.11149430274963379 s +DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=59041762474116765485562589127778992167, time:1750769003.334885s req_ids:[8] +DEBUG 06-24 20:43:23 [manager.py:391] +ERROR 06-24 20:43:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 first_token_cost:212.97001838684082ms total_cost_time:213.0138874053955ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15438 prompt_cache_len:5151 prompt_cache_ratio:0.3336572094830937 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 +DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:23 [batch.py:51] router release req id 8 +INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10874819755554199 s +INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s +DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=163291143409158833628341115754739092869, time:1750769003.557256s req_ids:[8] +DEBUG 06-24 20:43:23 [manager.py:391] +ERROR 06-24 20:43:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:23 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 first_token_cost:387.91823387145996ms total_cost_time:387.96329498291016ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15439 prompt_cache_len:5151 prompt_cache_ratio:0.33363559816050264 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 +DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:23 [batch.py:51] router release req id 8 +INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s +INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.10911989212036133 s +DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=229808869300752675759341439956250764466, time:1750769003.9604712s req_ids:[8] +DEBUG 06-24 20:43:23 [manager.py:391] +ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 first_token_cost:227.89478302001953ms total_cost_time:227.9372215270996ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15440 prompt_cache_len:5151 prompt_cache_ratio:0.3336139896373057 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 +DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:24 [batch.py:51] router release req id 8 +INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.10939478874206543 s +INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.1115720272064209 s +DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=37132088448787553993466938728754968678, time:1750769004.1874907s req_ids:[8] +DEBUG 06-24 20:43:24 [manager.py:391] +ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:214.4761085510254ms total_cost_time:214.51759338378906ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15441 prompt_cache_len:5151 prompt_cache_ratio:0.333592383912959 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 +DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:24 [batch.py:51] router release req id 8 +INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.10942554473876953 s +INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.11134600639343262 s +DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=314276141382556848510486457349007866900, time:1750769004.426429s req_ids:[8] +DEBUG 06-24 20:43:24 [manager.py:391] +ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:270.92695236206055ms total_cost_time:270.9698677062988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15442 prompt_cache_len:5151 prompt_cache_ratio:0.3335707809869188 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 +DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:24 [batch.py:51] router release req id 8 +INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.11108183860778809 s +INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.11325907707214355 s +DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=190968883731145992986182063731898355058, time:1750769004.7023172s req_ids:[8] +DEBUG 06-24 20:43:24 [manager.py:391] +ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:217.15092658996582ms total_cost_time:217.1950340270996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15443 prompt_cache_len:5151 prompt_cache_ratio:0.33354918085864144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 +DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:24 [batch.py:51] router release req id 8 +INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.1082298755645752 s +INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.11023688316345215 s +DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=196763527768202311873037372773450053327, time:1750769004.9253538s req_ids:[8] +DEBUG 06-24 20:43:24 [manager.py:391] +ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:219.42687034606934ms total_cost_time:219.47002410888672ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15444 prompt_cache_len:5151 prompt_cache_ratio:0.33352758352758355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 +DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:25 [batch.py:51] router release req id 8 +INFO 06-24 20:43:25 [manager.py:224] router recive req id 8 cost time 0.10905122756958008 s +INFO 06-24 20:43:25 [manager.py:68] detokenization recv req id 8 cost time 0.11112761497497559 s +DEBUG 06-24 20:43:25 [manager.py:391] Prefill Batch: batch_id=58386318805146299272037676221056832045, time:1750769005.1626537s req_ids:[8] +DEBUG 06-24 20:43:25 [manager.py:391] +ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:399.8548984527588ms total_cost_time:399.8987674713135ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15445 prompt_cache_len:5151 prompt_cache_ratio:0.33350598899320166 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 +DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:25 [batch.py:51] router release req id 8 +INFO 06-24 20:43:25 [manager.py:224] router recive req id 8 cost time 0.10901379585266113 s +INFO 06-24 20:43:25 [manager.py:68] detokenization recv req id 8 cost time 0.11108016967773438 s +DEBUG 06-24 20:43:25 [manager.py:391] Prefill Batch: batch_id=236306962793082257345209269303516878151, time:1750769005.5599859s req_ids:[8] +DEBUG 06-24 20:43:25 [manager.py:391] +DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:217.03815460205078ms total_cost_time:217.08154678344727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15446 prompt_cache_len:5151 prompt_cache_ratio:0.3334843972549527 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 +DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:25 [batch.py:51] router release req id 8 +INFO 06-24 20:43:25 [manager.py:224] router recive req id 8 cost time 0.10959148406982422 s +INFO 06-24 20:43:25 [manager.py:68] detokenization recv req id 8 cost time 0.1118319034576416 s +DEBUG 06-24 20:43:25 [manager.py:391] Prefill Batch: batch_id=145501889432507285585211031145695928294, time:1750769005.7931159s req_ids:[8] +DEBUG 06-24 20:43:25 [manager.py:391] +ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:223.7563133239746ms total_cost_time:223.8013744354248ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15447 prompt_cache_len:5151 prompt_cache_ratio:0.33346280831229363 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 +DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:25 [batch.py:51] router release req id 8 +INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.11140751838684082 s +INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.11356925964355469 s +DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=339725403631621527710553801225795733746, time:1750769006.0332572s req_ids:[8] +DEBUG 06-24 20:43:26 [manager.py:391] +ERROR 06-24 20:43:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:235.92329025268555ms total_cost_time:235.96835136413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15448 prompt_cache_len:5151 prompt_cache_ratio:0.3334412221646815 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 +DEBUG 06-24 20:43:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:26 [batch.py:51] router release req id 8 +INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.11177849769592285 s +INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.11383533477783203 s +DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=169702736765862436870389350487073749295, time:1750769006.2760806s req_ids:[8] +DEBUG 06-24 20:43:26 [manager.py:391] +ERROR 06-24 20:43:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 first_token_cost:234.91382598876953ms total_cost_time:234.95745658874512ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15449 prompt_cache_len:5151 prompt_cache_ratio:0.33341963881157355 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 +DEBUG 06-24 20:43:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:26 [batch.py:51] router release req id 8 +INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.10898494720458984 s +INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.11101436614990234 s +DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=236657486488954502226365137269568090389, time:1750769006.504517s req_ids:[8] +DEBUG 06-24 20:43:26 [manager.py:391] +ERROR 06-24 20:43:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:26 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 first_token_cost:213.85979652404785ms total_cost_time:213.90295028686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15450 prompt_cache_len:5151 prompt_cache_ratio:0.3333980582524272 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 +DEBUG 06-24 20:43:26 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:26 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:26 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:26 [batch.py:51] router release req id 8 +INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.31144189834594727 s +INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.31356167793273926 s +DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=5557421891798285109525589155678990965, time:1750769006.934319s req_ids:[8] +DEBUG 06-24 20:43:26 [manager.py:391] +ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 first_token_cost:431.17356300354004ms total_cost_time:431.229829788208ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:15451 prompt_cache_len:5151 prompt_cache_ratio:0.3333764804866999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 +DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:27 [batch.py:51] router release req id 8 +INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.11226201057434082 s +INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11411046981811523 s +DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=286829608933489572007865618870403139389, time:1750769007.1763513s req_ids:[8] +DEBUG 06-24 20:43:27 [manager.py:391] +DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:233.34693908691406ms total_cost_time:233.38985443115234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15452 prompt_cache_len:5151 prompt_cache_ratio:0.33335490551384933 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 +DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:27 [batch.py:51] router release req id 8 +INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.10942244529724121 s +INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11141633987426758 s +DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=224048784967695890038946223843890988933, time:1750769007.4058323s req_ids:[8] +DEBUG 06-24 20:43:27 [manager.py:391] +ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:213.1357192993164ms total_cost_time:213.1783962249756ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15453 prompt_cache_len:5151 prompt_cache_ratio:0.3333333333333333 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 +DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:27 [batch.py:51] router release req id 8 +INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.10915660858154297 s +INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11101555824279785 s +DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=123668264819005789502262842458729418571, time:1750769007.626951s req_ids:[8] +DEBUG 06-24 20:43:27 [manager.py:391] +ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:214.92433547973633ms total_cost_time:214.9674892425537ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15454 prompt_cache_len:5151 prompt_cache_ratio:0.3333117639446098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 +DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:27 [batch.py:51] router release req id 8 +INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.10917448997497559 s +INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11126160621643066 s +DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=99933064138580819749610954395884721208, time:1750769007.8481743s req_ids:[8] +DEBUG 06-24 20:43:27 [manager.py:391] +ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:217.77963638305664ms total_cost_time:217.82541275024414ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15455 prompt_cache_len:5151 prompt_cache_ratio:0.33329019734713683 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 +DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:27 [batch.py:51] router release req id 8 +INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.11059713363647461 s +INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.11274552345275879 s +DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=2285963685280466275844023781773615237, time:1750769008.0875654s req_ids:[8] +DEBUG 06-24 20:43:28 [manager.py:391] +ERROR 06-24 20:43:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:393.904447555542ms total_cost_time:393.963098526001ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:15456 prompt_cache_len:5151 prompt_cache_ratio:0.33326863354037267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 +DEBUG 06-24 20:43:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:28 [batch.py:51] router release req id 8 +INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.11050629615783691 s +INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.1125028133392334 s +DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=131277016215271845423939224382614101736, time:1750769008.477619s req_ids:[8] +DEBUG 06-24 20:43:28 [manager.py:391] +ERROR 06-24 20:43:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 first_token_cost:227.58078575134277ms total_cost_time:227.62632369995117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15457 prompt_cache_len:5151 prompt_cache_ratio:0.3332470725237756 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 +DEBUG 06-24 20:43:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:28 [batch.py:51] router release req id 8 +INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.1099538803100586 s +INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.1120610237121582 s +DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=280283602041049408341498935033118877930, time:1750769008.7204874s req_ids:[8] +DEBUG 06-24 20:43:28 [manager.py:391] +ERROR 06-24 20:43:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:28 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 first_token_cost:225.05831718444824ms total_cost_time:225.10361671447754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15458 prompt_cache_len:5151 prompt_cache_ratio:0.33322551429680425 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 +DEBUG 06-24 20:43:28 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:28 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:28 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:28 [batch.py:51] router release req id 8 +INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.11077666282653809 s +INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.11269736289978027 s +DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=324105153404654701639401294464583416091, time:1750769008.9559665s req_ids:[8] +DEBUG 06-24 20:43:28 [manager.py:391] +ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 first_token_cost:227.64897346496582ms total_cost_time:227.69570350646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15459 prompt_cache_len:5151 prompt_cache_ratio:0.3332039588589171 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 +DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:29 [batch.py:51] router release req id 8 +INFO 06-24 20:43:29 [manager.py:224] router recive req id 8 cost time 0.10947012901306152 s +INFO 06-24 20:43:29 [manager.py:68] detokenization recv req id 8 cost time 0.1116032600402832 s +DEBUG 06-24 20:43:29 [manager.py:391] Prefill Batch: batch_id=126954439773292175961168287538622488286, time:1750769009.182194s req_ids:[8] +DEBUG 06-24 20:43:29 [manager.py:391] +ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:218.9338207244873ms total_cost_time:218.99151802062988ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15460 prompt_cache_len:5151 prompt_cache_ratio:0.33318240620957307 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 +DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:29 [batch.py:51] router release req id 8 +INFO 06-24 20:43:29 [manager.py:224] router recive req id 8 cost time 0.11010313034057617 s +DEBUG 06-24 20:43:29 [manager.py:391] Prefill Batch: batch_id=189565023201435435948852307635204997817, time:1750769009.4067895s req_ids:[8] +DEBUG 06-24 20:43:29 [manager.py:391] +INFO 06-24 20:43:29 [manager.py:68] detokenization recv req id 8 cost time 0.11206483840942383 s +ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:384.8536014556885ms total_cost_time:384.91201400756836ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:15461 prompt_cache_len:5151 prompt_cache_ratio:0.33316085634823106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 +DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:29 [batch.py:51] router release req id 8 +INFO 06-24 20:43:29 [manager.py:224] router recive req id 8 cost time 0.1094961166381836 s +INFO 06-24 20:43:29 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s +DEBUG 06-24 20:43:29 [manager.py:391] Prefill Batch: batch_id=239441346017877375543889587438427279271, time:1750769009.8019886s req_ids:[8] +DEBUG 06-24 20:43:29 [manager.py:391] +ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:219.3284034729004ms total_cost_time:219.37131881713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15462 prompt_cache_len:5151 prompt_cache_ratio:0.33313930927435004 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 +DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:29 [batch.py:51] router release req id 8 +INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10935711860656738 s +INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.1114358901977539 s +DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=221204952879915705818642985049516004425, time:1750769010.027834s req_ids:[8] +DEBUG 06-24 20:43:30 [manager.py:391] +ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:220.414400100708ms total_cost_time:220.46971321105957ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:15463 prompt_cache_len:5151 prompt_cache_ratio:0.33311776498738926 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 +DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:30 [batch.py:51] router release req id 8 +INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10989665985107422 s +INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.11185979843139648 s +DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=104790985737874088387315238630219527358, time:1750769010.2546425s req_ids:[8] +DEBUG 06-24 20:43:30 [manager.py:391] +ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:215.179443359375ms total_cost_time:215.2235507965088ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15464 prompt_cache_len:5151 prompt_cache_ratio:0.33309622348680806 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 +DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:30 [batch.py:51] router release req id 8 +INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10973358154296875 s +INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.1118617057800293 s +DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=302018470920699872595036178330893233693, time:1750769010.4780586s req_ids:[8] +DEBUG 06-24 20:43:30 [manager.py:391] +ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:217.82684326171875ms total_cost_time:217.88668632507324ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:15465 prompt_cache_len:5151 prompt_cache_ratio:0.333074684772066 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 +DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:30 [batch.py:51] router release req id 8 +INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10962462425231934 s +INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.11161327362060547 s +DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=210025485215097564114753896535914509387, time:1750769010.7010825s req_ids:[8] +DEBUG 06-24 20:43:30 [manager.py:391] +ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:211.1499309539795ms total_cost_time:211.1952304840088ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15466 prompt_cache_len:5151 prompt_cache_ratio:0.33305314884262255 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 +DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:30 [batch.py:51] router release req id 8 +INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10875296592712402 s +INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.11078310012817383 s +DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=184426979977695687461517933117575577203, time:1750769010.9202573s req_ids:[8] +DEBUG 06-24 20:43:30 [manager.py:391] +ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:380.0070285797119ms total_cost_time:380.051851272583ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15467 prompt_cache_len:5151 prompt_cache_ratio:0.33303161569793754 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 +DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:31 [batch.py:51] router release req id 8 +INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.10895299911499023 s +INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.11087369918823242 s +DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=305570453939615981126622214639287995897, time:1750769011.3077374s req_ids:[8] +DEBUG 06-24 20:43:31 [manager.py:391] +ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:43:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 60186.263 tokens/s +DEBUG 06-24 20:43:31 [stats.py:37] Avg prompt tokens throughput: 60178.472 tokens/s +DEBUG 06-24 20:43:31 [stats.py:37] Avg generate tokens throughput: 7.791 tokens/s +INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:217.96512603759766ms total_cost_time:218.00994873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15468 prompt_cache_len:5151 prompt_cache_ratio:0.3330100853374709 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 +DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:31 [batch.py:51] router release req id 8 +INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.1089484691619873 s +INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.1110081672668457 s +DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=99060813480093921108669805488079059722, time:1750769011.5321562s req_ids:[8] +DEBUG 06-24 20:43:31 [manager.py:391] +ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:215.66486358642578ms total_cost_time:215.70873260498047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15469 prompt_cache_len:5151 prompt_cache_ratio:0.33298855776068265 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 +DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:31 [batch.py:51] router release req id 8 +INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.10861921310424805 s +INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.11071062088012695 s +DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=312295098165254681390643443013766764798, time:1750769011.7536666s req_ids:[8] +DEBUG 06-24 20:43:31 [manager.py:391] +ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:212.29004859924316ms total_cost_time:212.33463287353516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15470 prompt_cache_len:5151 prompt_cache_ratio:0.33296703296703295 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 +DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:31 [batch.py:51] router release req id 8 +INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.10924267768859863 s +INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.11113262176513672 s +DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=72984598762621632635097769896538917076, time:1750769011.9749513s req_ids:[8] +DEBUG 06-24 20:43:31 [manager.py:391] +ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:219.02084350585938ms total_cost_time:219.06375885009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15471 prompt_cache_len:5151 prompt_cache_ratio:0.3329455109559822 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 +DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:32 [batch.py:51] router release req id 8 +INFO 06-24 20:43:32 [manager.py:224] router recive req id 8 cost time 0.10824799537658691 s +INFO 06-24 20:43:32 [manager.py:68] detokenization recv req id 8 cost time 0.11019110679626465 s +DEBUG 06-24 20:43:32 [manager.py:391] Prefill Batch: batch_id=136026491060450398138517916074245982929, time:1750769012.1988928s req_ids:[8] +DEBUG 06-24 20:43:32 [manager.py:391] +ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:384.2761516571045ms total_cost_time:384.3202590942383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15472 prompt_cache_len:5151 prompt_cache_ratio:0.3329239917269907 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 +DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:32 [batch.py:51] router release req id 8 +INFO 06-24 20:43:32 [manager.py:224] router recive req id 8 cost time 0.1090857982635498 s +INFO 06-24 20:43:32 [manager.py:68] detokenization recv req id 8 cost time 0.11110711097717285 s +DEBUG 06-24 20:43:32 [manager.py:391] Prefill Batch: batch_id=73868169285685713245807988527796424875, time:1750769012.590482s req_ids:[8] +DEBUG 06-24 20:43:32 [manager.py:391] +ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:218.6727523803711ms total_cost_time:218.71709823608398ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15473 prompt_cache_len:5151 prompt_cache_ratio:0.33290247527951916 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 +DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:32 [batch.py:51] router release req id 8 +INFO 06-24 20:43:32 [manager.py:224] router recive req id 8 cost time 0.10828709602355957 s +INFO 06-24 20:43:32 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s +DEBUG 06-24 20:43:32 [manager.py:391] Prefill Batch: batch_id=67274642922715787807076773425212407903, time:1750769012.8166347s req_ids:[8] +DEBUG 06-24 20:43:32 [manager.py:391] +ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:212.49032020568848ms total_cost_time:212.53299713134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15474 prompt_cache_len:5151 prompt_cache_ratio:0.3328809616130283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 +DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:32 [batch.py:51] router release req id 8 +INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.10964417457580566 s +INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.11161136627197266 s +DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=53667939613524688608361647364584573029, time:1750769013.0361764s req_ids:[8] +DEBUG 06-24 20:43:33 [manager.py:391] +ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:215.73996543884277ms total_cost_time:215.78407287597656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15475 prompt_cache_len:5151 prompt_cache_ratio:0.332859450726979 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 +DEBUG 06-24 20:43:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:33 [batch.py:51] router release req id 8 +INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.1082603931427002 s +INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.1109914779663086 s +DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=107275762603649880429416896818172619369, time:1750769013.2599587s req_ids:[8] +DEBUG 06-24 20:43:33 [manager.py:391] +ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:217.42010116577148ms total_cost_time:217.46420860290527ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15476 prompt_cache_len:5151 prompt_cache_ratio:0.33283794262083227 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 +DEBUG 06-24 20:43:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:33 [batch.py:51] router release req id 8 +INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.10973119735717773 s +INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.11173176765441895 s +DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=153036940948665084371104056569659416867, time:1750769013.487214s req_ids:[8] +DEBUG 06-24 20:43:33 [manager.py:391] +DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:387.67504692077637ms total_cost_time:387.71867752075195ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15477 prompt_cache_len:5151 prompt_cache_ratio:0.33281643729404925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 +DEBUG 06-24 20:43:33 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:33 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:33 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:33 [batch.py:51] router release req id 8 +INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.10835552215576172 s +INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.11038017272949219 s +DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=131429948924080757927873330310697027713, time:1750769013.8777986s req_ids:[8] +DEBUG 06-24 20:43:33 [manager.py:391] +ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:217.46373176574707ms total_cost_time:217.50950813293457ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15478 prompt_cache_len:5151 prompt_cache_ratio:0.33279493474609123 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 +DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:34 [batch.py:51] router release req id 8 +INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.108062744140625 s +INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.1101231575012207 s +DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=240716361288372971613221214414598235470, time:1750769014.1030223s req_ids:[8] +DEBUG 06-24 20:43:34 [manager.py:391] +ERROR 06-24 20:43:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:210.06369590759277ms total_cost_time:210.10923385620117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15479 prompt_cache_len:5151 prompt_cache_ratio:0.33277343497641965 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 +DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:34 [batch.py:51] router release req id 8 +INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.1082620620727539 s +INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s +DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=156938767727576743828337823644899685455, time:1750769014.3244421s req_ids:[8] +DEBUG 06-24 20:43:34 [manager.py:391] +ERROR 06-24 20:43:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 first_token_cost:222.18036651611328ms total_cost_time:222.22590446472168ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15480 prompt_cache_len:5151 prompt_cache_ratio:0.33275193798449615 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 +DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:34 [batch.py:51] router release req id 8 +INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.10965895652770996 s +INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.11166501045227051 s +DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=119835342933129630132236326942008096500, time:1750769014.5491033s req_ids:[8] +DEBUG 06-24 20:43:34 [manager.py:391] +ERROR 06-24 20:43:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:34 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 first_token_cost:217.49615669250488ms total_cost_time:217.54002571105957ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15481 prompt_cache_len:5151 prompt_cache_ratio:0.33273044376978234 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 +DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:34 [batch.py:51] router release req id 8 +INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.10910940170288086 s +INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.11089539527893066 s +DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=282507000742130658888453064996081948521, time:1750769014.77378s req_ids:[8] +DEBUG 06-24 20:43:34 [manager.py:391] +ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 first_token_cost:390.78283309936523ms total_cost_time:390.82860946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15482 prompt_cache_len:5151 prompt_cache_ratio:0.3327089523317401 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 +DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:35 [batch.py:51] router release req id 8 +INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10924935340881348 s +INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11108207702636719 s +DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=251889490099763646579944707063002492875, time:1750769015.1706903s req_ids:[8] +DEBUG 06-24 20:43:35 [manager.py:391] +ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:216.18270874023438ms total_cost_time:216.22872352600098ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15483 prompt_cache_len:5151 prompt_cache_ratio:0.33268746366983143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 +DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:35 [batch.py:51] router release req id 8 +INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10914778709411621 s +DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=185327335961143310989529372401487027081, time:1750769015.3925962s req_ids:[8] +DEBUG 06-24 20:43:35 [manager.py:391] +INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11093425750732422 s +ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:212.7859592437744ms total_cost_time:212.8283977508545ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15484 prompt_cache_len:5151 prompt_cache_ratio:0.3326659777835185 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 +DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:35 [batch.py:51] router release req id 8 +INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10917401313781738 s +INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11100506782531738 s +INFO 06-24 20:43:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=156352584738741434355173611217471607218, time:1750769015.6198106s req_ids:[8] +DEBUG 06-24 20:43:35 [manager.py:391] +ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:216.35770797729492ms total_cost_time:216.4008617401123ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15485 prompt_cache_len:5151 prompt_cache_ratio:0.3326444946722635 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 +DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:35 [batch.py:51] router release req id 8 +INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10853719711303711 s +INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11038994789123535 s +DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=90190926452675642297566359561849615636, time:1750769015.8393862s req_ids:[8] +DEBUG 06-24 20:43:35 [manager.py:391] +ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:215.46339988708496ms total_cost_time:215.50798416137695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15486 prompt_cache_len:5151 prompt_cache_ratio:0.3326230143355289 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 +DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:35 [batch.py:51] router release req id 8 +INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10935282707214355 s +INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11129403114318848 s +DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=62113987517455922359902916587392281832, time:1750769016.0620043s req_ids:[8] +DEBUG 06-24 20:43:36 [manager.py:391] +ERROR 06-24 20:43:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:220.16668319702148ms total_cost_time:220.21198272705078ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15487 prompt_cache_len:5151 prompt_cache_ratio:0.33260153677277715 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 +DEBUG 06-24 20:43:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:36 [batch.py:51] router release req id 8 +INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10828351974487305 s +INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11025691032409668 s +DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=225312501493360974579127034886677379280, time:1750769016.2881653s req_ids:[8] +DEBUG 06-24 20:43:36 [manager.py:391] +ERROR 06-24 20:43:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 first_token_cost:422.806978225708ms total_cost_time:422.8498935699463ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15488 prompt_cache_len:5151 prompt_cache_ratio:0.33258006198347106 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 +DEBUG 06-24 20:43:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:36 [batch.py:51] router release req id 8 +INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10875320434570312 s +INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11066293716430664 s +DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=94719725824781149228032226210658312208, time:1750769016.7179585s req_ids:[8] +DEBUG 06-24 20:43:36 [manager.py:391] +ERROR 06-24 20:43:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:36 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 first_token_cost:203.75299453735352ms total_cost_time:203.7985324859619ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15489 prompt_cache_len:5151 prompt_cache_ratio:0.3325585899670734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 +DEBUG 06-24 20:43:36 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:36 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:36 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:36 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:36 [batch.py:51] router release req id 8 +DEBUG 06-24 20:43:36 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:36 [manager.py:283] +DEBUG 06-24 20:43:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:36 [manager.py:284] +INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10907626152038574 s +INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11105656623840332 s +DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=196026483954770485932375621299786134364, time:1750769016.9293842s req_ids:[8] +DEBUG 06-24 20:43:36 [manager.py:391] +ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 first_token_cost:214.73979949951172ms total_cost_time:214.78271484375ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15490 prompt_cache_len:5151 prompt_cache_ratio:0.33253712072304714 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 +DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:37 [batch.py:51] router release req id 8 +INFO 06-24 20:43:37 [manager.py:224] router recive req id 8 cost time 0.10830116271972656 s +INFO 06-24 20:43:37 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s +DEBUG 06-24 20:43:37 [manager.py:391] Prefill Batch: batch_id=332235020123944962942246435542303478277, time:1750769017.150864s req_ids:[8] +DEBUG 06-24 20:43:37 [manager.py:391] +ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:214.72620964050293ms total_cost_time:214.7691249847412ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15491 prompt_cache_len:5151 prompt_cache_ratio:0.33251565425085533 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 +DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:37 [batch.py:51] router release req id 8 +INFO 06-24 20:43:37 [manager.py:224] router recive req id 8 cost time 0.10905289649963379 s +INFO 06-24 20:43:37 [manager.py:68] detokenization recv req id 8 cost time 0.11128711700439453 s +DEBUG 06-24 20:43:37 [manager.py:391] Prefill Batch: batch_id=169890904052221588498594811705163407816, time:1750769017.3724678s req_ids:[8] +DEBUG 06-24 20:43:37 [manager.py:391] +ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:213.59705924987793ms total_cost_time:213.6397361755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15492 prompt_cache_len:5151 prompt_cache_ratio:0.33249419054996127 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 +DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:37 [batch.py:51] router release req id 8 +INFO 06-24 20:43:37 [manager.py:224] router recive req id 8 cost time 0.1094675064086914 s +INFO 06-24 20:43:37 [manager.py:68] detokenization recv req id 8 cost time 0.11138677597045898 s +DEBUG 06-24 20:43:37 [manager.py:391] Prefill Batch: batch_id=322676009228767463733138541071397237828, time:1750769017.5916646s req_ids:[8] +DEBUG 06-24 20:43:37 [manager.py:391] +ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:215.20471572875977ms total_cost_time:215.24786949157715ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15493 prompt_cache_len:5151 prompt_cache_ratio:0.3324727296198283 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 +DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:37 [batch.py:51] router release req id 8 +INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.31024932861328125 s +INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.31502318382263184 s +DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=154176038929949792403952196384885986923, time:1750769018.018621s req_ids:[8] +DEBUG 06-24 20:43:38 [manager.py:391] +ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:427.809476852417ms total_cost_time:427.8557300567627ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15494 prompt_cache_len:5151 prompt_cache_ratio:0.33245127145992 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 +DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:38 [batch.py:51] router release req id 8 +INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10812211036682129 s +INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.11049509048461914 s +DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=176293531940193866655195328798697408596, time:1750769018.2486646s req_ids:[8] +DEBUG 06-24 20:43:38 [manager.py:391] +ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:214.2167091369629ms total_cost_time:214.26129341125488ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15495 prompt_cache_len:5151 prompt_cache_ratio:0.3324298160696999 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 +DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:38 [batch.py:51] router release req id 8 +INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10844874382019043 s +INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.11204886436462402 s +DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=144846425567057627243962262319234119388, time:1750769018.473652s req_ids:[8] +DEBUG 06-24 20:43:38 [manager.py:391] +ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:201.89785957336426ms total_cost_time:201.94077491760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15496 prompt_cache_len:5151 prompt_cache_ratio:0.3324083634486319 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 +DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:38 [batch.py:51] router release req id 8 +INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10819530487060547 s +INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.10994768142700195 s +DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=90968109130511610281869163205376469076, time:1750769018.691005s req_ids:[8] +DEBUG 06-24 20:43:38 [manager.py:391] +ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:209.58638191223145ms total_cost_time:209.63025093078613ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15497 prompt_cache_len:5151 prompt_cache_ratio:0.3323869135961799 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 +DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:38 [batch.py:51] router release req id 8 +INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10927915573120117 s +INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s +DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=53953272495919721536225720168864675600, time:1750769018.9026737s req_ids:[8] +DEBUG 06-24 20:43:38 [manager.py:391] +ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:203.83739471435547ms total_cost_time:203.88293266296387ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15498 prompt_cache_len:5151 prompt_cache_ratio:0.332365466511808 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 +DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:39 [batch.py:51] router release req id 8 +INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.1075601577758789 s +INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s +DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=179672858928877123196054330801667626126, time:1750769019.119352s req_ids:[8] +DEBUG 06-24 20:43:39 [manager.py:391] +ERROR 06-24 20:43:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:373.5947608947754ms total_cost_time:373.6388683319092ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15499 prompt_cache_len:5151 prompt_cache_ratio:0.3323440221949803 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 +DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:39 [batch.py:51] router release req id 8 +INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.10801005363464355 s +INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s +DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=326237757909633856153384566180308443622, time:1750769019.515276s req_ids:[8] +DEBUG 06-24 20:43:39 [manager.py:391] +ERROR 06-24 20:43:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 first_token_cost:239.72749710083008ms total_cost_time:239.76993560791016ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15500 prompt_cache_len:5151 prompt_cache_ratio:0.3323225806451613 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 +DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:39 [batch.py:51] router release req id 8 +INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.10840535163879395 s +INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s +DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=98342287027970499537167805550769379722, time:1750769019.7317693s req_ids:[8] +DEBUG 06-24 20:43:39 [manager.py:391] +ERROR 06-24 20:43:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:39 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 first_token_cost:207.47852325439453ms total_cost_time:207.5207233428955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15501 prompt_cache_len:5151 prompt_cache_ratio:0.33230114186181536 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 +DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:39 [batch.py:51] router release req id 8 +INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.10872769355773926 s +INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.11060380935668945 s +DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=316351813828968168688975933987935746619, time:1750769019.960206s req_ids:[8] +DEBUG 06-24 20:43:39 [manager.py:391] +ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 first_token_cost:229.83813285827637ms total_cost_time:229.88367080688477ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15502 prompt_cache_len:5151 prompt_cache_ratio:0.3322797058444072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 +DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:40 [batch.py:51] router release req id 8 +INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10823583602905273 s +INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.11039900779724121 s +DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=49120880856124430077460048045797745168, time:1750769020.184157s req_ids:[8] +DEBUG 06-24 20:43:40 [manager.py:391] +ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:213.62853050231934ms total_cost_time:213.67263793945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15503 prompt_cache_len:5151 prompt_cache_ratio:0.3322582725924015 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 +DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:40 [batch.py:51] router release req id 8 +INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10865330696105957 s +INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.11075806617736816 s +DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=278881800412086961829088317152244328712, time:1750769020.404828s req_ids:[8] +DEBUG 06-24 20:43:40 [manager.py:391] +ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:362.5960350036621ms total_cost_time:362.6413345336914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15504 prompt_cache_len:5151 prompt_cache_ratio:0.33223684210526316 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 +DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:40 [batch.py:51] router release req id 8 +INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10818123817443848 s +INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.11027264595031738 s +DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=139379794584315504223860311709632557893, time:1750769020.7747266s req_ids:[8] +DEBUG 06-24 20:43:40 [manager.py:391] +ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:211.0278606414795ms total_cost_time:211.07172966003418ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15505 prompt_cache_len:5151 prompt_cache_ratio:0.3322154143824573 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 +DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:40 [batch.py:51] router release req id 8 +INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10772895812988281 s +INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.10985684394836426 s +DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=276999116135120299237225054605357287, time:1750769020.9926078s req_ids:[8] +DEBUG 06-24 20:43:40 [manager.py:391] +ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:211.36164665222168ms total_cost_time:211.40480041503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15506 prompt_cache_len:5151 prompt_cache_ratio:0.33219398942344897 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 +DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:41 [batch.py:51] router release req id 8 +INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.1097109317779541 s +INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.11178469657897949 s +DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=323019034965407069123328577334358345369, time:1750769021.2118876s req_ids:[8] +DEBUG 06-24 20:43:41 [manager.py:391] +ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:216.28785133361816ms total_cost_time:216.33052825927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15507 prompt_cache_len:5151 prompt_cache_ratio:0.3321725672277036 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 +DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:41 [batch.py:51] router release req id 8 +INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.10851693153381348 s +INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.11034893989562988 s +DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=338686882482975875892332852139445727500, time:1750769021.434208s req_ids:[8] +DEBUG 06-24 20:43:41 [manager.py:391] +DEBUG 06-24 20:43:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 61806.758 tokens/s +DEBUG 06-24 20:43:41 [stats.py:37] Avg prompt tokens throughput: 61798.877 tokens/s +DEBUG 06-24 20:43:41 [stats.py:37] Avg generate tokens throughput: 7.880 tokens/s +ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:212.97788619995117ms total_cost_time:213.01889419555664ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:15508 prompt_cache_len:5151 prompt_cache_ratio:0.33215114779468663 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 +DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:41 [batch.py:51] router release req id 8 +INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.10841083526611328 s +INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.11022520065307617 s +DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=261928917451178508832671656259816829495, time:1750769021.6518214s req_ids:[8] +DEBUG 06-24 20:43:41 [manager.py:391] +ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:166.74184799194336ms total_cost_time:166.78404808044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15509 prompt_cache_len:5151 prompt_cache_ratio:0.33212973112386357 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 +DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:41 [batch.py:51] router release req id 8 +INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.10819625854492188 s +INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.10960817337036133 s +DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=5273772548952963671445260707588477053, time:1750769021.8250737s req_ids:[8] +DEBUG 06-24 20:43:41 [manager.py:391] +ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:319.5171356201172ms total_cost_time:319.5605278015137ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15510 prompt_cache_len:5151 prompt_cache_ratio:0.3321083172147002 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 +DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:42 [batch.py:51] router release req id 8 +INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.10884642601013184 s +INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.11066818237304688 s +DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=19619467589783348822045253082794053381, time:1750769022.1836612s req_ids:[8] +DEBUG 06-24 20:43:42 [manager.py:391] +ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:234.19904708862305ms total_cost_time:234.24148559570312ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15511 prompt_cache_len:5151 prompt_cache_ratio:0.33208690606666236 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 +DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:42 [batch.py:51] router release req id 8 +INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.10812544822692871 s +DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=316269426407312303547313796351305791702, time:1750769022.3916593s req_ids:[8] +DEBUG 06-24 20:43:42 [manager.py:391] +INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.10962510108947754 s +ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:199.63908195495605ms total_cost_time:199.68461990356445ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15512 prompt_cache_len:5151 prompt_cache_ratio:0.3320654976792161 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 +DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:42 [batch.py:51] router release req id 8 +INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.1075887680053711 s +INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.10952448844909668 s +DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=329017144932209401201276784159304588777, time:1750769022.6012936s req_ids:[8] +DEBUG 06-24 20:43:42 [manager.py:391] +ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:212.99004554748535ms total_cost_time:213.03439140319824ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15513 prompt_cache_len:5151 prompt_cache_ratio:0.3320440920518275 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 +DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:42 [batch.py:51] router release req id 8 +INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.10810232162475586 s +INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.1108400821685791 s +DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=303196132137554750414265443312942272038, time:1750769022.824088s req_ids:[8] +DEBUG 06-24 20:43:42 [manager.py:391] +ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:232.99384117126465ms total_cost_time:233.03937911987305ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15514 prompt_cache_len:5151 prompt_cache_ratio:0.3320226891839629 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 +DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:42 [batch.py:51] router release req id 8 +INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.10845279693603516 s +INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.11036467552185059 s +DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=307230671625728604567787563093723081941, time:1750769023.0726013s req_ids:[8] +DEBUG 06-24 20:43:43 [manager.py:391] +ERROR 06-24 20:43:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:396.67487144470215ms total_cost_time:396.72040939331055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15515 prompt_cache_len:5151 prompt_cache_ratio:0.33200128907508863 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 +DEBUG 06-24 20:43:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:43 [batch.py:51] router release req id 8 +INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.10805821418762207 s +INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.1099388599395752 s +DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=45754913226802713990027559320085193234, time:1750769023.467828s req_ids:[8] +DEBUG 06-24 20:43:43 [manager.py:391] +ERROR 06-24 20:43:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 first_token_cost:220.01290321350098ms total_cost_time:220.05701065063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15516 prompt_cache_len:5151 prompt_cache_ratio:0.3319798917246713 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 +DEBUG 06-24 20:43:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:43 [batch.py:51] router release req id 8 +INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.1091923713684082 s +INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.11132264137268066 s +DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=318214888497941041975188300981612976238, time:1750769023.6943517s req_ids:[8] +DEBUG 06-24 20:43:43 [manager.py:391] +ERROR 06-24 20:43:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:43 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 first_token_cost:218.61982345581055ms total_cost_time:218.66297721862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15517 prompt_cache_len:5151 prompt_cache_ratio:0.3319584971321776 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 +DEBUG 06-24 20:43:43 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:43 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:43 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:43 [batch.py:51] router release req id 8 +INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.1096038818359375 s +INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.11199712753295898 s +DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=109436801085966099966744629989268400186, time:1750769023.918232s req_ids:[8] +DEBUG 06-24 20:43:43 [manager.py:391] +ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 first_token_cost:213.72485160827637ms total_cost_time:213.76681327819824ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15518 prompt_cache_len:5151 prompt_cache_ratio:0.3319371052970744 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 +DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:44 [batch.py:51] router release req id 8 +INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.10857915878295898 s +INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11065101623535156 s +DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=103802723867093625407415147389185553408, time:1750769024.1391547s req_ids:[8] +DEBUG 06-24 20:43:44 [manager.py:391] +ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:213.38510513305664ms total_cost_time:213.43016624450684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15519 prompt_cache_len:5151 prompt_cache_ratio:0.33191571621882854 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 +DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:44 [batch.py:51] router release req id 8 +INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.1097710132598877 s +INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11186552047729492 s +DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=242984402063095259225873652698373280291, time:1750769024.360155s req_ids:[8] +DEBUG 06-24 20:43:44 [manager.py:391] +DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:385.99467277526855ms total_cost_time:386.03997230529785ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15520 prompt_cache_len:5151 prompt_cache_ratio:0.3318943298969072 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 +DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:44 [batch.py:51] router release req id 8 +INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.10904550552368164 s +INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11099576950073242 s +DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=102059917900976279482206318202318222254, time:1750769024.752716s req_ids:[8] +DEBUG 06-24 20:43:44 [manager.py:391] +ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:218.8570499420166ms total_cost_time:218.89996528625488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15521 prompt_cache_len:5151 prompt_cache_ratio:0.33187294633077763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 +DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:44 [batch.py:51] router release req id 8 +INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.10829567909240723 s +INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s +DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=299729287154150161561299181764875804344, time:1750769024.9782145s req_ids:[8] +DEBUG 06-24 20:43:44 [manager.py:391] +ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:217.8504467010498ms total_cost_time:217.8940773010254ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15522 prompt_cache_len:5151 prompt_cache_ratio:0.33185156551990724 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 +DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:45 [batch.py:51] router release req id 8 +INFO 06-24 20:43:45 [manager.py:224] router recive req id 8 cost time 0.1095130443572998 s +INFO 06-24 20:43:45 [manager.py:68] detokenization recv req id 8 cost time 0.11152219772338867 s +DEBUG 06-24 20:43:45 [manager.py:391] Prefill Batch: batch_id=207303940375172202598675770025417374171, time:1750769025.2024257s req_ids:[8] +DEBUG 06-24 20:43:45 [manager.py:391] +ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:211.37762069702148ms total_cost_time:211.42101287841797ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15523 prompt_cache_len:5151 prompt_cache_ratio:0.33183018746376347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 +DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:45 [batch.py:51] router release req id 8 +INFO 06-24 20:43:45 [manager.py:224] router recive req id 8 cost time 0.1092832088470459 s +INFO 06-24 20:43:45 [manager.py:68] detokenization recv req id 8 cost time 0.11099910736083984 s +DEBUG 06-24 20:43:45 [manager.py:391] Prefill Batch: batch_id=142950815813708327940461603860703940202, time:1750769025.4204268s req_ids:[8] +DEBUG 06-24 20:43:45 [manager.py:391] +ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:211.83252334594727ms total_cost_time:211.87758445739746ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15524 prompt_cache_len:5151 prompt_cache_ratio:0.33180881216181396 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 +DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:45 [batch.py:51] router release req id 8 +INFO 06-24 20:43:45 [manager.py:224] router recive req id 8 cost time 0.10939145088195801 s +INFO 06-24 20:43:45 [manager.py:68] detokenization recv req id 8 cost time 0.11132168769836426 s +DEBUG 06-24 20:43:45 [manager.py:391] Prefill Batch: batch_id=101950293631671734011448909659977498726, time:1750769025.6506877s req_ids:[8] +DEBUG 06-24 20:43:45 [manager.py:391] +ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:404.6914577484131ms total_cost_time:404.73484992980957ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15525 prompt_cache_len:5151 prompt_cache_ratio:0.3317874396135266 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 +DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:45 [batch.py:51] router release req id 8 +INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10904622077941895 s +INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.1109311580657959 s +DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=332371293557360053851842754272485921015, time:1750769026.0508275s req_ids:[8] +DEBUG 06-24 20:43:46 [manager.py:391] +ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:219.32601928710938ms total_cost_time:219.37060356140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15526 prompt_cache_len:5151 prompt_cache_ratio:0.3317660698183692 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 +DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:46 [batch.py:51] router release req id 8 +INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10840916633605957 s +INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11032652854919434 s +DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=30911605617061220223646506835168024326, time:1750769026.2763624s req_ids:[8] +DEBUG 06-24 20:43:46 [manager.py:391] +ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:214.61892127990723ms total_cost_time:214.66398239135742ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15527 prompt_cache_len:5151 prompt_cache_ratio:0.33174470277580986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 +DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:46 [batch.py:51] router release req id 8 +INFO 06-24 20:43:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10843610763549805 s +INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11064028739929199 s +DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=126905134549935820575579145840257770058, time:1750769026.4983764s req_ids:[8] +DEBUG 06-24 20:43:46 [manager.py:391] +ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:218.4758186340332ms total_cost_time:218.51778030395508ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15528 prompt_cache_len:5151 prompt_cache_ratio:0.3317233384853168 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 +DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:46 [batch.py:51] router release req id 8 +INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.1088407039642334 s +INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11062455177307129 s +DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=102514456566867554588566489105507614995, time:1750769026.7221158s req_ids:[8] +DEBUG 06-24 20:43:46 [manager.py:391] +ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:214.47443962097168ms total_cost_time:214.5540714263916ms,out_token_counter:1 mean_per_token_cost_time: 0.07963180541992188ms prompt_token_num:15529 prompt_cache_len:5151 prompt_cache_ratio:0.33170197694635845 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 +DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:46 [batch.py:51] router release req id 8 +INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10939574241638184 s +INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11149168014526367 s +DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=204619035539067143220960315319791039486, time:1750769026.9447358s req_ids:[8] +DEBUG 06-24 20:43:46 [manager.py:391] +ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:217.1328067779541ms total_cost_time:217.1781063079834ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15530 prompt_cache_len:5151 prompt_cache_ratio:0.3316806181584031 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 +DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:47 [batch.py:51] router release req id 8 +INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.1081230640411377 s +INFO 06-24 20:43:47 [manager.py:68] detokenization recv req id 8 cost time 0.11090254783630371 s +DEBUG 06-24 20:43:47 [manager.py:391] Prefill Batch: batch_id=150118097590983921166223722144446790883, time:1750769027.1711185s req_ids:[8] +DEBUG 06-24 20:43:47 [manager.py:391] +INFO 06-24 20:43:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:389.64176177978516ms total_cost_time:389.68634605407715ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15531 prompt_cache_len:5151 prompt_cache_ratio:0.33165926212091945 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 +DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:47 [batch.py:51] router release req id 8 +INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.10819673538208008 s +INFO 06-24 20:43:47 [manager.py:68] detokenization recv req id 8 cost time 0.11037588119506836 s +DEBUG 06-24 20:43:47 [manager.py:391] Prefill Batch: batch_id=242542600559515427446593851441123828876, time:1750769027.5656703s req_ids:[8] +DEBUG 06-24 20:43:47 [manager.py:391] +ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:209.35606956481934ms total_cost_time:209.39970016479492ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15532 prompt_cache_len:5151 prompt_cache_ratio:0.33163790883337624 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 +DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:47 [batch.py:51] router release req id 8 +INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.10970306396484375 s +INFO 06-24 20:43:47 [manager.py:68] detokenization recv req id 8 cost time 0.11189413070678711 s +DEBUG 06-24 20:43:47 [manager.py:391] Prefill Batch: batch_id=171464810881854301774516463915539359504, time:1750769027.7821412s req_ids:[8] +DEBUG 06-24 20:43:47 [manager.py:391] +ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:214.22362327575684ms total_cost_time:214.26820755004883ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15533 prompt_cache_len:5151 prompt_cache_ratio:0.3316165582952424 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 +DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:47 [batch.py:51] router release req id 8 +INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.1089773178100586 s +INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.11106634140014648 s +DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=308284938768527598047522113813846306806, time:1750769028.003821s req_ids:[8] +DEBUG 06-24 20:43:48 [manager.py:391] +ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:218.7483310699463ms total_cost_time:218.79339218139648ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15534 prompt_cache_len:5151 prompt_cache_ratio:0.3315952105059869 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 +DEBUG 06-24 20:43:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:48 [batch.py:51] router release req id 8 +INFO 06-24 20:43:48 [manager.py:224] router recive req id 8 cost time 0.1097109317779541 s +INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.11153721809387207 s +DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=306627584847641138759538466045388416844, time:1750769028.2294636s req_ids:[8] +DEBUG 06-24 20:43:48 [manager.py:391] +ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:216.54891967773438ms total_cost_time:216.59088134765625ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15535 prompt_cache_len:5151 prompt_cache_ratio:0.33157386546507883 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 +DEBUG 06-24 20:43:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:48 [batch.py:51] router release req id 8 +INFO 06-24 20:43:48 [manager.py:224] router recive req id 8 cost time 0.11142349243164062 s +INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.11330986022949219 s +DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=249088319425165120783365185202831381600, time:1750769028.4527485s req_ids:[8] +DEBUG 06-24 20:43:48 [manager.py:391] +ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:217.0267105102539ms total_cost_time:217.0712947845459ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15536 prompt_cache_len:5151 prompt_cache_ratio:0.33155252317198763 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 +DEBUG 06-24 20:43:48 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:48 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:48 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:48 [batch.py:51] router release req id 8 +INFO 06-24 20:43:48 [manager.py:224] router recive req id 8 cost time 0.3107578754425049 s +INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.31271815299987793 s +DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=46338616987134804412961997750335130830, time:1750769028.8855793s req_ids:[8] +DEBUG 06-24 20:43:48 [manager.py:391] +ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:429.05402183532715ms total_cost_time:429.09908294677734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15537 prompt_cache_len:5151 prompt_cache_ratio:0.33153118362618267 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 +DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:49 [batch.py:51] router release req id 8 +INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10877561569213867 s +INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11074161529541016 s +DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=256304896901180767019829689296353044728, time:1750769029.111985s req_ids:[8] +DEBUG 06-24 20:43:49 [manager.py:391] +ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:216.85051918029785ms total_cost_time:216.89343452453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15538 prompt_cache_len:5151 prompt_cache_ratio:0.33150984682713347 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 +DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:49 [batch.py:51] router release req id 8 +INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10911345481872559 s +INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11107611656188965 s +DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=11914350465585293193335753998047384224, time:1750769029.3375275s req_ids:[8] +DEBUG 06-24 20:43:49 [manager.py:391] +ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:218.6570167541504ms total_cost_time:218.69945526123047ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15539 prompt_cache_len:5151 prompt_cache_ratio:0.3314885127743098 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 +DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:49 [batch.py:51] router release req id 8 +INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10962080955505371 s +INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11184453964233398 s +DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=92559025657682593139588003299918682449, time:1750769029.563439s req_ids:[8] +DEBUG 06-24 20:43:49 [manager.py:391] +ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:217.23246574401855ms total_cost_time:217.27585792541504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15540 prompt_cache_len:5151 prompt_cache_ratio:0.33146718146718146 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 +DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:49 [batch.py:51] router release req id 8 +INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10815238952636719 s +INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11005711555480957 s +DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=138787423009346355910998276199849501270, time:1750769029.7868407s req_ids:[8] +DEBUG 06-24 20:43:49 [manager.py:391] +ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:219.39587593078613ms total_cost_time:219.4387912750244ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15541 prompt_cache_len:5151 prompt_cache_ratio:0.33144585290521844 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 +DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:49 [batch.py:51] router release req id 8 +INFO 06-24 20:43:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10800313949584961 s +INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.10933399200439453 s +DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=15917486968578099426193658055216877790, time:1750769030.0150003s req_ids:[8] +DEBUG 06-24 20:43:50 [manager.py:391] +ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:406.0220718383789ms total_cost_time:406.08739852905273ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:15542 prompt_cache_len:5151 prompt_cache_ratio:0.3314245270878909 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 +DEBUG 06-24 20:43:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:50 [batch.py:51] router release req id 8 +INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10936927795410156 s +INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.11150431632995605 s +DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=74724516942241345359130765108610129002, time:1750769030.4255998s req_ids:[8] +DEBUG 06-24 20:43:50 [manager.py:391] +ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:220.32666206359863ms total_cost_time:220.37053108215332ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15543 prompt_cache_len:5151 prompt_cache_ratio:0.331403204014669 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 +DEBUG 06-24 20:43:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:50 [batch.py:51] router release req id 8 +INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10883212089538574 s +INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.11093354225158691 s +DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=237161794849529447737333821955647323542, time:1750769030.6510277s req_ids:[8] +DEBUG 06-24 20:43:50 [manager.py:391] +ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:217.75317192077637ms total_cost_time:217.79704093933105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15544 prompt_cache_len:5151 prompt_cache_ratio:0.33138188368502314 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 +DEBUG 06-24 20:43:50 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:50 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:50 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:50 [batch.py:51] router release req id 8 +INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10813164710998535 s +INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.11019206047058105 s +DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=197860938431525613644076390093338610885, time:1750769030.8760686s req_ids:[8] +DEBUG 06-24 20:43:50 [manager.py:391] +ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:219.44522857666016ms total_cost_time:219.49052810668945ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15545 prompt_cache_len:5151 prompt_cache_ratio:0.3313605660984239 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 +DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:51 [batch.py:51] router release req id 8 +INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.10980010032653809 s +INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11174654960632324 s +DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=118969098824032969123341260923366932207, time:1750769031.105517s req_ids:[8] +DEBUG 06-24 20:43:51 [manager.py:391] +ERROR 06-24 20:43:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:221.2235927581787ms total_cost_time:221.266508102417ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15546 prompt_cache_len:5151 prompt_cache_ratio:0.33133925125434194 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 +DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:51 [batch.py:51] router release req id 8 +INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.10801362991333008 s +INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11009836196899414 s +DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=297666862153480019537062366943576659205, time:1750769031.3304858s req_ids:[8] +DEBUG 06-24 20:43:51 [manager.py:391] +ERROR 06-24 20:43:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:43:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 59569.973 tokens/s +DEBUG 06-24 20:43:51 [stats.py:37] Avg prompt tokens throughput: 59562.203 tokens/s +DEBUG 06-24 20:43:51 [stats.py:37] Avg generate tokens throughput: 7.770 tokens/s +INFO 06-24 20:43:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 first_token_cost:387.17007637023926ms total_cost_time:387.21346855163574ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15547 prompt_cache_len:5151 prompt_cache_ratio:0.331317939152248 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 +DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:51 [batch.py:51] router release req id 8 +INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.1094655990600586 s +INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11140894889831543 s +DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=254781524972102490119496142615516110804, time:1750769031.7256694s req_ids:[8] +DEBUG 06-24 20:43:51 [manager.py:391] +ERROR 06-24 20:43:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:51 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 first_token_cost:220.6132411956787ms total_cost_time:220.6583023071289ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15548 prompt_cache_len:5151 prompt_cache_ratio:0.33129662979161306 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 +DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:51 [batch.py:51] router release req id 8 +INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.10832619667053223 s +INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11049365997314453 s +DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=70308892265831410812602855225644730294, time:1750769031.9526315s req_ids:[8] +DEBUG 06-24 20:43:51 [manager.py:391] +ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 first_token_cost:215.53516387939453ms total_cost_time:215.5773639678955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15549 prompt_cache_len:5151 prompt_cache_ratio:0.33127532317190816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 +DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:52 [batch.py:51] router release req id 8 +INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10905170440673828 s +INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.11087799072265625 s +DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=97539143484511959104897698645568178075, time:1750769032.174345s req_ids:[8] +DEBUG 06-24 20:43:52 [manager.py:391] +ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:214.7085666656494ms total_cost_time:214.7524356842041ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15550 prompt_cache_len:5151 prompt_cache_ratio:0.3312540192926045 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 +DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:52 [batch.py:51] router release req id 8 +INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10947918891906738 s +INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.11160016059875488 s +DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=282804342949524630407896981436059801484, time:1750769032.3964672s req_ids:[8] +DEBUG 06-24 20:43:52 [manager.py:391] +DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:215.5911922454834ms total_cost_time:215.63315391540527ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15551 prompt_cache_len:5151 prompt_cache_ratio:0.33123271815317346 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 +DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:52 [batch.py:51] router release req id 8 +INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10886693000793457 s +INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.11066007614135742 s +DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=309164303217772779578861095620899456232, time:1750769032.6172438s req_ids:[8] +DEBUG 06-24 20:43:52 [manager.py:391] +ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:216.37916564941406ms total_cost_time:216.42065048217773ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15552 prompt_cache_len:5151 prompt_cache_ratio:0.33121141975308643 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 +DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:52 [batch.py:51] router release req id 8 +INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10906863212585449 s +INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.1109459400177002 s +DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=121579550962863252033105642091039241369, time:1750769032.840269s req_ids:[8] +DEBUG 06-24 20:43:52 [manager.py:391] +ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:375.5974769592285ms total_cost_time:375.6420612335205ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15553 prompt_cache_len:5151 prompt_cache_ratio:0.3311901240918151 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 +DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:53 [batch.py:51] router release req id 8 +INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.10796952247619629 s +INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.10979294776916504 s +INFO 06-24 20:43:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=50347335171169126184684192751692342884, time:1750769033.223577s req_ids:[8] +DEBUG 06-24 20:43:53 [manager.py:391] +INFO 06-24 20:43:53 [statics_utils.py:24] mean first cost: 233.45740679691613 ms +INFO 06-24 20:43:53 [statics_utils.py:24] mean per token cost: 0.05678636001360444 ms +ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:220.28183937072754ms total_cost_time:220.32499313354492ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15554 prompt_cache_len:5151 prompt_cache_ratio:0.33116883116883117 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 +DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:53 [batch.py:51] router release req id 8 +INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s +INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.10987401008605957 s +DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=275208670064836212021675735656053088391, time:1750769033.4508846s req_ids:[8] +DEBUG 06-24 20:43:53 [manager.py:391] +ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:173.6316680908203ms total_cost_time:173.67267608642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:15555 prompt_cache_len:5151 prompt_cache_ratio:0.33114754098360655 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 +DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:53 [batch.py:51] router release req id 8 +INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.1094212532043457 s +INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.11130285263061523 s +DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=60087378125098382631853072830392824417, time:1750769033.6325872s req_ids:[8] +DEBUG 06-24 20:43:53 [manager.py:391] +ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:213.26160430908203ms total_cost_time:213.30571174621582ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15556 prompt_cache_len:5151 prompt_cache_ratio:0.33112625353561326 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 +DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:53 [batch.py:51] router release req id 8 +INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.10945820808410645 s +INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.11155509948730469 s +DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=39011637767286973061479284200907232230, time:1750769033.8521712s req_ids:[8] +DEBUG 06-24 20:43:53 [manager.py:391] +ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:216.94326400756836ms total_cost_time:216.99070930480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15557 prompt_cache_len:5151 prompt_cache_ratio:0.33110496882432344 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 +DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:53 [batch.py:51] router release req id 8 +INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10886120796203613 s +INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11095285415649414 s +DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=26632357620585585361142215201953563942, time:1750769034.076187s req_ids:[8] +DEBUG 06-24 20:43:54 [manager.py:391] +ERROR 06-24 20:43:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:420.6821918487549ms total_cost_time:420.72558403015137ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15558 prompt_cache_len:5151 prompt_cache_ratio:0.3310836868492094 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 +DEBUG 06-24 20:43:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:54 [batch.py:51] router release req id 8 +INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10837960243225098 s +INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11043381690979004 s +DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=277416376479513446008779012406765754350, time:1750769034.5074878s req_ids:[8] +DEBUG 06-24 20:43:54 [manager.py:391] +ERROR 06-24 20:43:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 first_token_cost:226.792573928833ms total_cost_time:226.8376350402832ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15559 prompt_cache_len:5151 prompt_cache_ratio:0.33106240760974354 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 +DEBUG 06-24 20:43:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:54 [batch.py:51] router release req id 8 +INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10803532600402832 s +INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012959480285645 s +DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=60558359434636768029288516204385319708, time:1750769034.7373013s req_ids:[8] +DEBUG 06-24 20:43:54 [manager.py:391] +ERROR 06-24 20:43:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:54 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 first_token_cost:220.03936767578125ms total_cost_time:220.08371353149414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15560 prompt_cache_len:5151 prompt_cache_ratio:0.33104113110539846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 +DEBUG 06-24 20:43:54 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:54 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:54 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:54 [batch.py:51] router release req id 8 +INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10810565948486328 s +INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11007189750671387 s +DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=34865227742933153772796010995088812573, time:1750769034.962294s req_ids:[8] +DEBUG 06-24 20:43:54 [manager.py:391] +ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 first_token_cost:219.09403800964355ms total_cost_time:219.13719177246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15561 prompt_cache_len:5151 prompt_cache_ratio:0.3310198573356468 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 +DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:55 [batch.py:51] router release req id 8 +INFO 06-24 20:43:55 [manager.py:224] router recive req id 8 cost time 0.10815930366516113 s +INFO 06-24 20:43:55 [manager.py:68] detokenization recv req id 8 cost time 0.11016368865966797 s +DEBUG 06-24 20:43:55 [manager.py:391] Prefill Batch: batch_id=293460912257357475780100269515807154621, time:1750769035.1969485s req_ids:[8] +DEBUG 06-24 20:43:55 [manager.py:391] +ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:231.1840057373047ms total_cost_time:231.22859001159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15562 prompt_cache_len:5151 prompt_cache_ratio:0.33099858629996143 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 +DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:55 [batch.py:51] router release req id 8 +INFO 06-24 20:43:55 [manager.py:224] router recive req id 8 cost time 0.10921025276184082 s +INFO 06-24 20:43:55 [manager.py:68] detokenization recv req id 8 cost time 0.11129212379455566 s +DEBUG 06-24 20:43:55 [manager.py:391] Prefill Batch: batch_id=106826062946423968061798768291017682418, time:1750769035.4250224s req_ids:[8] +DEBUG 06-24 20:43:55 [manager.py:391] +ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:386.80553436279297ms total_cost_time:386.84892654418945ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15563 prompt_cache_len:5151 prompt_cache_ratio:0.3309773179978153 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 +DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:55 [batch.py:51] router release req id 8 +INFO 06-24 20:43:55 [manager.py:224] router recive req id 8 cost time 0.10807585716247559 s +INFO 06-24 20:43:55 [manager.py:68] detokenization recv req id 8 cost time 0.1101679801940918 s +DEBUG 06-24 20:43:55 [manager.py:391] Prefill Batch: batch_id=96053962834386165345156740521155962988, time:1750769035.81826s req_ids:[8] +DEBUG 06-24 20:43:55 [manager.py:391] +ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:242.84934997558594ms total_cost_time:242.8908348083496ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15564 prompt_cache_len:5151 prompt_cache_ratio:0.3309560524286816 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 +DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:55 [batch.py:51] router release req id 8 +INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10907101631164551 s +INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.1111140251159668 s +DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=267241612546131875695847738842228354737, time:1750769036.0807421s req_ids:[8] +DEBUG 06-24 20:43:56 [manager.py:391] +ERROR 06-24 20:43:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:227.97656059265137ms total_cost_time:228.02042961120605ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15565 prompt_cache_len:5151 prompt_cache_ratio:0.33093478959203343 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 +DEBUG 06-24 20:43:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:56 [batch.py:51] router release req id 8 +INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10927510261535645 s +INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.11139130592346191 s +DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=216870776308263750617614993022373564315, time:1750769036.3034408s req_ids:[8] +DEBUG 06-24 20:43:56 [manager.py:391] +ERROR 06-24 20:43:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 first_token_cost:218.06931495666504ms total_cost_time:218.11413764953613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15566 prompt_cache_len:5151 prompt_cache_ratio:0.3309135294873442 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 +DEBUG 06-24 20:43:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:56 [batch.py:51] router release req id 8 +INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10832381248474121 s +INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.11050820350646973 s +DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=27891753634078613310716833744905710286, time:1750769036.5279362s req_ids:[8] +DEBUG 06-24 20:43:56 [manager.py:391] +ERROR 06-24 20:43:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:56 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 first_token_cost:218.16229820251465ms total_cost_time:218.21045875549316ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:15567 prompt_cache_len:5151 prompt_cache_ratio:0.3308922721140875 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 +DEBUG 06-24 20:43:56 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:56 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:56 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:56 [batch.py:51] router release req id 8 +INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10973596572875977 s +INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.11185550689697266 s +DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=148498682648414606884499959145094835653, time:1750769036.7533002s req_ids:[8] +DEBUG 06-24 20:43:56 [manager.py:391] +ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 first_token_cost:440.08874893188477ms total_cost_time:440.13404846191406ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15568 prompt_cache_len:5151 prompt_cache_ratio:0.3308710174717369 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 +DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:57 [batch.py:51] router release req id 8 +INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.10959744453430176 s +INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.11164569854736328 s +DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=18957748361223813692530092821044491426, time:1750769037.199362s req_ids:[8] +DEBUG 06-24 20:43:57 [manager.py:391] +ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:255.60450553894043ms total_cost_time:255.6462287902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:15569 prompt_cache_len:5151 prompt_cache_ratio:0.3308497655597662 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 +DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:57 [batch.py:51] router release req id 8 +INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.10805416107177734 s +INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.10989713668823242 s +DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=221903994885307164879450266370046169234, time:1750769037.472642s req_ids:[8] +DEBUG 06-24 20:43:57 [manager.py:391] +ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:228.04856300354004ms total_cost_time:228.09219360351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15570 prompt_cache_len:5151 prompt_cache_ratio:0.3308285163776493 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 +DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:57 [batch.py:51] router release req id 8 +INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.11089396476745605 s +INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.11359977722167969 s +DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=144701399391632958195971870606180546345, time:1750769037.6988287s req_ids:[8] +DEBUG 06-24 20:43:57 [manager.py:391] +ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:221.29273414611816ms total_cost_time:221.33660316467285ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15571 prompt_cache_len:5151 prompt_cache_ratio:0.3308072699248603 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 +DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:57 [batch.py:51] router release req id 8 +INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.10839557647705078 s +INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s +DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=323883346104674957022216729842675927066, time:1750769037.9213834s req_ids:[8] +DEBUG 06-24 20:43:57 [manager.py:391] +ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:218.0006504058838ms total_cost_time:218.04547309875488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15572 prompt_cache_len:5151 prompt_cache_ratio:0.3307860262008734 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 +DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:58 [batch.py:51] router release req id 8 +INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10829472541809082 s +INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11043000221252441 s +DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=160500517572896204814024325261048404589, time:1750769038.1482825s req_ids:[8] +DEBUG 06-24 20:43:58 [manager.py:391] +ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:215.3482437133789ms total_cost_time:215.39068222045898ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15573 prompt_cache_len:5151 prompt_cache_ratio:0.3307647852051628 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 +DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:58 [batch.py:51] router release req id 8 +INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10851383209228516 s +INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11050248146057129 s +DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=242814369642300358416487634402168749632, time:1750769038.3708375s req_ids:[8] +DEBUG 06-24 20:43:58 [manager.py:391] +ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:383.8672637939453ms total_cost_time:383.9116096496582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15574 prompt_cache_len:5151 prompt_cache_ratio:0.33074354693720304 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 +DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:58 [batch.py:51] router release req id 8 +INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10952639579772949 s +INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11153697967529297 s +DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=82362349742169250307623793880029417966, time:1750769038.7604861s req_ids:[8] +DEBUG 06-24 20:43:58 [manager.py:391] +ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:216.45045280456543ms total_cost_time:216.4938449859619ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15575 prompt_cache_len:5151 prompt_cache_ratio:0.3307223113964687 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 +DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:58 [batch.py:51] router release req id 8 +INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10793685913085938 s +INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11028456687927246 s +DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=274867563622895852186579429952359736612, time:1750769038.9884715s req_ids:[8] +DEBUG 06-24 20:43:58 [manager.py:391] +DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:221.94623947143555ms total_cost_time:221.99106216430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15576 prompt_cache_len:5151 prompt_cache_ratio:0.3307010785824345 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 +DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:59 [batch.py:51] router release req id 8 +INFO 06-24 20:43:59 [manager.py:224] router recive req id 8 cost time 0.10811305046081543 s +INFO 06-24 20:43:59 [manager.py:68] detokenization recv req id 8 cost time 0.11012792587280273 s +DEBUG 06-24 20:43:59 [manager.py:391] Prefill Batch: batch_id=57935775165940663640154449206823411741, time:1750769039.2132866s req_ids:[8] +DEBUG 06-24 20:43:59 [manager.py:391] +ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:217.34952926635742ms total_cost_time:217.4086570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15577 prompt_cache_len:5151 prompt_cache_ratio:0.3306798484945753 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 +DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:59 [batch.py:51] router release req id 8 +INFO 06-24 20:43:59 [manager.py:224] router recive req id 8 cost time 0.10883426666259766 s +INFO 06-24 20:43:59 [manager.py:68] detokenization recv req id 8 cost time 0.11069869995117188 s +DEBUG 06-24 20:43:59 [manager.py:391] Prefill Batch: batch_id=245573929203060346338848107669351688401, time:1750769039.4372563s req_ids:[8] +DEBUG 06-24 20:43:59 [manager.py:391] +ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:217.03338623046875ms total_cost_time:217.07606315612793ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15578 prompt_cache_len:5151 prompt_cache_ratio:0.33065862113236616 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 +DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:59 [batch.py:51] router release req id 8 +INFO 06-24 20:43:59 [manager.py:224] router recive req id 8 cost time 0.10820817947387695 s +INFO 06-24 20:43:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101982593536377 s +DEBUG 06-24 20:43:59 [manager.py:391] Prefill Batch: batch_id=199329132013138563119489454335687447116, time:1750769039.6621404s req_ids:[8] +DEBUG 06-24 20:43:59 [manager.py:391] +ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 +INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:214.20645713806152ms total_cost_time:214.2481803894043ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:15579 prompt_cache_len:5151 prompt_cache_ratio:0.3306373964952821 mtp_avg_token_per_step:1.0 +INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 +DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:43:59 [batch.py:51] router release req id 8 +INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.3109908103942871 s +INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.31294822692871094 s +DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=217158442847504299065006605246316436188, time:1750769040.0884008s req_ids:[8] +DEBUG 06-24 20:44:00 [manager.py:391] +ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:429.81815338134766ms total_cost_time:429.86297607421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15580 prompt_cache_len:5151 prompt_cache_ratio:0.33061617458279846 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 +DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:00 [batch.py:51] router release req id 8 +INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.10846376419067383 s +INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.11043548583984375 s +DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=136596989182133262460614423106407248956, time:1750769040.3191776s req_ids:[8] +DEBUG 06-24 20:44:00 [manager.py:391] +ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:217.90814399719238ms total_cost_time:217.95201301574707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15581 prompt_cache_len:5151 prompt_cache_ratio:0.3305949553943906 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 +DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:00 [batch.py:51] router release req id 8 +INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.10970783233642578 s +INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.11163592338562012 s +DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=141374874344924916512631725113746745222, time:1750769040.5451803s req_ids:[8] +DEBUG 06-24 20:44:00 [manager.py:391] +DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:221.36259078979492ms total_cost_time:221.4062213897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15582 prompt_cache_len:5151 prompt_cache_ratio:0.33057373892953407 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 +DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:00 [batch.py:51] router release req id 8 +INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.11149716377258301 s +INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.11348748207092285 s +DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=223543650573796889152688291723017509287, time:1750769040.7816372s req_ids:[8] +DEBUG 06-24 20:44:00 [manager.py:391] +ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:224.3812084197998ms total_cost_time:224.4248390197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15583 prompt_cache_len:5151 prompt_cache_ratio:0.33055252518770456 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 +DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:00 [batch.py:51] router release req id 8 +INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.10783004760742188 s +INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.10973930358886719 s +DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=119291079920543445537748036772034945876, time:1750769041.0060463s req_ids:[8] +DEBUG 06-24 20:44:01 [manager.py:391] +ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:176.26595497131348ms total_cost_time:176.30863189697266ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15584 prompt_cache_len:5151 prompt_cache_ratio:0.33053131416837783 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 +DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:01 [batch.py:51] router release req id 8 +INFO 06-24 20:44:01 [manager.py:224] router recive req id 8 cost time 0.10798859596252441 s +INFO 06-24 20:44:01 [manager.py:68] detokenization recv req id 8 cost time 0.10965895652770996 s +DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=230067573530730370665353556733192068988, time:1750769041.1878889s req_ids:[8] +DEBUG 06-24 20:44:01 [manager.py:391] +ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:372.8814125061035ms total_cost_time:372.9245662689209ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15585 prompt_cache_len:5151 prompt_cache_ratio:0.33051010587102986 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 +DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:01 [batch.py:51] router release req id 8 +INFO 06-24 20:44:01 [manager.py:224] router recive req id 8 cost time 0.10814404487609863 s +INFO 06-24 20:44:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101217269897461 s +DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=79148854643542804781375309673619687319, time:1750769041.5670257s req_ids:[8] +DEBUG 06-24 20:44:01 [manager.py:391] +ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +DEBUG 06-24 20:44:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 60295.758 tokens/s +DEBUG 06-24 20:44:01 [stats.py:37] Avg prompt tokens throughput: 60288.012 tokens/s +DEBUG 06-24 20:44:01 [stats.py:37] Avg generate tokens throughput: 7.746 tokens/s +INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:221.7864990234375ms total_cost_time:221.82941436767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15586 prompt_cache_len:5151 prompt_cache_ratio:0.33048890029513667 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 +DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:01 [batch.py:51] router release req id 8 +INFO 06-24 20:44:01 [manager.py:224] router recive req id 8 cost time 0.10878109931945801 s +INFO 06-24 20:44:01 [manager.py:68] detokenization recv req id 8 cost time 0.11073112487792969 s +DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=70481385614710390788127995766509348133, time:1750769041.7953067s req_ids:[8] +DEBUG 06-24 20:44:01 [manager.py:391] +ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:216.71438217163086ms total_cost_time:216.75682067871094ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15587 prompt_cache_len:5151 prompt_cache_ratio:0.3304676974401745 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 +DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:01 [batch.py:51] router release req id 8 +INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10994648933410645 s +INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11202478408813477 s +DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=71709303262601156528156648155815578320, time:1750769042.0212622s req_ids:[8] +DEBUG 06-24 20:44:02 [manager.py:391] +ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:217.73982048034668ms total_cost_time:217.78368949890137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15588 prompt_cache_len:5151 prompt_cache_ratio:0.3304464973056197 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 +DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:02 [batch.py:51] router release req id 8 +INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10897445678710938 s +INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11103248596191406 s +DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=338796347442452518000609775651903139234, time:1750769042.2442138s req_ids:[8] +DEBUG 06-24 20:44:02 [manager.py:391] +ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:214.60390090942383ms total_cost_time:214.6470546722412ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15589 prompt_cache_len:5151 prompt_cache_ratio:0.33042529989094876 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 +DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:02 [batch.py:51] router release req id 8 +INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10928654670715332 s +INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11127018928527832 s +DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=162655247608858853905420879662288003624, time:1750769042.463818s req_ids:[8] +DEBUG 06-24 20:44:02 [manager.py:391] +ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:387.0246410369873ms total_cost_time:387.0689868927002ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15590 prompt_cache_len:5151 prompt_cache_ratio:0.33040410519563823 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 +DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:02 [batch.py:51] router release req id 8 +INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10901427268981934 s +INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11086583137512207 s +DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=32848749479221368454130089598432842669, time:1750769042.8598044s req_ids:[8] +DEBUG 06-24 20:44:02 [manager.py:391] +ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:219.8486328125ms total_cost_time:219.89178657531738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15591 prompt_cache_len:5151 prompt_cache_ratio:0.3303829132191649 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 +DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:02 [batch.py:51] router release req id 8 +INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10799121856689453 s +INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.1098639965057373 s +DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=205391140681309039459769396076018025086, time:1750769043.085207s req_ids:[8] +DEBUG 06-24 20:44:03 [manager.py:391] +ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:212.66961097717285ms total_cost_time:212.72635459899902ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:15592 prompt_cache_len:5151 prompt_cache_ratio:0.33036172396100566 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 +DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:03 [batch.py:51] router release req id 8 +INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.11019492149353027 s +INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.11204075813293457 s +DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=19380018098002271237111663721783244297, time:1750769043.3134947s req_ids:[8] +DEBUG 06-24 20:44:03 [manager.py:391] +ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:225.51798820495605ms total_cost_time:225.56114196777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15593 prompt_cache_len:5151 prompt_cache_ratio:0.33034053742063746 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 +DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:03 [batch.py:51] router release req id 8 +INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10837745666503906 s +INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.11027693748474121 s +DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=279974663404867781579620751132709683047, time:1750769043.5385728s req_ids:[8] +DEBUG 06-24 20:44:03 [manager.py:391] +ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:217.72384643554688ms total_cost_time:217.76747703552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15594 prompt_cache_len:5151 prompt_cache_ratio:0.3303193535975375 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 +DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:03 [batch.py:51] router release req id 8 +INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10806703567504883 s +INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.10996437072753906 s +DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=70156263583445701593495362126811458634, time:1750769043.7625334s req_ids:[8] +DEBUG 06-24 20:44:03 [manager.py:391] +ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:218.89591217041016ms total_cost_time:218.94025802612305ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15595 prompt_cache_len:5151 prompt_cache_ratio:0.3302981724911831 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 +DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:03 [batch.py:51] router release req id 8 +INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10865163803100586 s +INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.11051082611083984 s +DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=329634273476295423486024366947665062063, time:1750769043.9869545s req_ids:[8] +DEBUG 06-24 20:44:03 [manager.py:391] +ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:377.44736671447754ms total_cost_time:377.49266624450684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15596 prompt_cache_len:5151 prompt_cache_ratio:0.33027699410105155 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 +DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:04 [batch.py:51] router release req id 8 +INFO 06-24 20:44:04 [manager.py:224] router recive req id 8 cost time 0.10796856880187988 s +INFO 06-24 20:44:04 [manager.py:68] detokenization recv req id 8 cost time 0.10987067222595215 s +DEBUG 06-24 20:44:04 [manager.py:391] Prefill Batch: batch_id=322557533967463232520268152032680585279, time:1750769044.3729513s req_ids:[8] +DEBUG 06-24 20:44:04 [manager.py:391] +ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:212.50200271606445ms total_cost_time:212.55922317504883ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:15597 prompt_cache_len:5151 prompt_cache_ratio:0.3302558184266205 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 +DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:04 [batch.py:51] router release req id 8 +INFO 06-24 20:44:04 [manager.py:224] router recive req id 8 cost time 0.10915160179138184 s +INFO 06-24 20:44:04 [manager.py:68] detokenization recv req id 8 cost time 0.11124968528747559 s +DEBUG 06-24 20:44:04 [manager.py:391] Prefill Batch: batch_id=312329460033288899770323793729315839120, time:1750769044.5944364s req_ids:[8] +DEBUG 06-24 20:44:04 [manager.py:391] +ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:213.4549617767334ms total_cost_time:213.4988307952881ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15598 prompt_cache_len:5151 prompt_cache_ratio:0.33023464546736764 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 +DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:04 [batch.py:51] router release req id 8 +INFO 06-24 20:44:04 [manager.py:224] router recive req id 8 cost time 0.1076958179473877 s +INFO 06-24 20:44:04 [manager.py:68] detokenization recv req id 8 cost time 0.10963559150695801 s +DEBUG 06-24 20:44:04 [manager.py:391] Prefill Batch: batch_id=79843873979181730478863219092620128921, time:1750769044.8120863s req_ids:[8] +DEBUG 06-24 20:44:04 [manager.py:391] +ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:213.21964263916016ms total_cost_time:213.26470375061035ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15599 prompt_cache_len:5151 prompt_cache_ratio:0.33021347522277067 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 +DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:04 [batch.py:51] router release req id 8 +INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.10822296142578125 s +INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s +DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=139948103148018515538760352719222388026, time:1750769045.032056s req_ids:[8] +DEBUG 06-24 20:44:05 [manager.py:391] +ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:215.45767784118652ms total_cost_time:215.5015468597412ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15600 prompt_cache_len:5151 prompt_cache_ratio:0.3301923076923077 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 +DEBUG 06-24 20:44:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:05 [batch.py:51] router release req id 8 +INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.1096811294555664 s +INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11170363426208496 s +DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=330887729930812472779024537647204646611, time:1750769045.2546244s req_ids:[8] +DEBUG 06-24 20:44:05 [manager.py:391] +ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:381.21843338012695ms total_cost_time:381.26659393310547ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:15601 prompt_cache_len:5151 prompt_cache_ratio:0.3301711428754567 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 +DEBUG 06-24 20:44:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:05 [batch.py:51] router release req id 8 +INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s +INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11067390441894531 s +DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=333422561239658681084997008958819542978, time:1750769045.643798s req_ids:[8] +DEBUG 06-24 20:44:05 [manager.py:391] +ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:219.23518180847168ms total_cost_time:219.29216384887695ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:15602 prompt_cache_len:5151 prompt_cache_ratio:0.33014998077169594 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 +DEBUG 06-24 20:44:05 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:05 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:05 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:05 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:05 [batch.py:51] router release req id 8 +INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.11032629013061523 s +INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11219000816345215 s +DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=325538447371627327855414416757787265963, time:1750769045.8725553s req_ids:[8] +DEBUG 06-24 20:44:05 [manager.py:391] +ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:216.50314331054688ms total_cost_time:216.56346321105957ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15603 prompt_cache_len:5151 prompt_cache_ratio:0.33012882138050376 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 +DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:06 [batch.py:51] router release req id 8 +INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.10970854759216309 s +INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.1116495132446289 s +DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=294196879378598490746288246174432098368, time:1750769046.0933654s req_ids:[8] +DEBUG 06-24 20:44:06 [manager.py:391] +ERROR 06-24 20:44:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:217.484712600708ms total_cost_time:217.5285816192627ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15604 prompt_cache_len:5151 prompt_cache_ratio:0.3301076647013586 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 +DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:06 [batch.py:51] router release req id 8 +INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.10951852798461914 s +INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.11154556274414062 s +DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=85075338928227049722017323278502291142, time:1750769046.3187554s req_ids:[8] +DEBUG 06-24 20:44:06 [manager.py:391] +ERROR 06-24 20:44:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 first_token_cost:221.36449813842773ms total_cost_time:221.40789031982422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15605 prompt_cache_len:5151 prompt_cache_ratio:0.3300865107337392 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 +DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:06 [batch.py:51] router release req id 8 +INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.1085820198059082 s +INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.11063885688781738 s +DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=285285321113052875781959915141359871321, time:1750769046.5466163s req_ids:[8] +DEBUG 06-24 20:44:06 [manager.py:391] +ERROR 06-24 20:44:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:06 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 first_token_cost:390.86270332336426ms total_cost_time:390.90704917907715ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15606 prompt_cache_len:5151 prompt_cache_ratio:0.3300653594771242 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 +DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:06 [batch.py:51] router release req id 8 +INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s +INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.11003613471984863 s +DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=335833418022450136740304770396554491188, time:1750769046.9432716s req_ids:[8] +DEBUG 06-24 20:44:06 [manager.py:391] +DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token +ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 first_token_cost:215.1169776916504ms total_cost_time:215.16132354736328ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15607 prompt_cache_len:5151 prompt_cache_ratio:0.3300442109309925 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 +DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:07 [batch.py:51] router release req id 8 +INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s +INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.11024904251098633 s +DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=113670200968217977586127072351317721349, time:1750769047.1649494s req_ids:[8] +DEBUG 06-24 20:44:07 [manager.py:391] +ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:217.74983406066895ms total_cost_time:217.79417991638184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15608 prompt_cache_len:5151 prompt_cache_ratio:0.33002306509482315 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 +DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:07 [batch.py:51] router release req id 8 +INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10779404640197754 s +INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.10955476760864258 s +DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=265541947445309822006754916157873628141, time:1750769047.3889048s req_ids:[8] +DEBUG 06-24 20:44:07 [manager.py:391] +ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:173.54249954223633ms total_cost_time:173.5851764678955ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15609 prompt_cache_len:5151 prompt_cache_ratio:0.3300019219680953 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 +DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:07 [batch.py:51] router release req id 8 +INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10896611213684082 s +INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.11059355735778809 s +DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=193005792664032784804455577603626392482, time:1750769047.570233s req_ids:[8] +DEBUG 06-24 20:44:07 [manager.py:391] +ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:174.96323585510254ms total_cost_time:175.00758171081543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15610 prompt_cache_len:5151 prompt_cache_ratio:0.32998078155028826 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 +DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:07 [batch.py:51] router release req id 8 +INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10877060890197754 s +INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.11060094833374023 s +DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=321716917940814754644547379286405604032, time:1750769047.7514484s req_ids:[8] +DEBUG 06-24 20:44:07 [manager.py:391] +ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:379.25219535827637ms total_cost_time:379.2986869812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15611 prompt_cache_len:5151 prompt_cache_ratio:0.32995964384088144 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 +DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:08 [batch.py:51] router release req id 8 +INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10910964012145996 s +INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.11100554466247559 s +DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=316127366364544474139136975786156098144, time:1750769048.1380901s req_ids:[8] +DEBUG 06-24 20:44:08 [manager.py:391] +ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:216.7067527770996ms total_cost_time:216.7503833770752ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15612 prompt_cache_len:5151 prompt_cache_ratio:0.3299385088393543 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 +DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:08 [batch.py:51] router release req id 8 +INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10815620422363281 s +INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s +DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=45763653949086645735861526297465872720, time:1750769048.3611634s req_ids:[8] +DEBUG 06-24 20:44:08 [manager.py:391] +ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:213.98210525512695ms total_cost_time:214.02406692504883ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15613 prompt_cache_len:5151 prompt_cache_ratio:0.3299173765451867 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 +DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:08 [batch.py:51] router release req id 8 +INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10932779312133789 s +INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.11132931709289551 s +DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=268533939464099257755510652993975488838, time:1750769048.5812194s req_ids:[8] +DEBUG 06-24 20:44:08 [manager.py:391] +ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:213.94586563110352ms total_cost_time:213.9897346496582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15614 prompt_cache_len:5151 prompt_cache_ratio:0.32989624695785835 mtp_avg_token_per_step:1.0 +INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 +DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:08 [batch.py:51] router release req id 8 +INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10983800888061523 s +INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.1118314266204834 s +DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=45696873797961603328040586443287464642, time:1750769048.802643s req_ids:[8] +DEBUG 06-24 20:44:08 [manager.py:391] +ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache +INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 +INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:211.9007110595703ms total_cost_time:211.9443416595459ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15615 prompt_cache_len:5151 prompt_cache_ratio:0.32987512007684916 mtp_avg_token_per_step:1.0 +DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 +DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 +DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 +DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:44:08 [batch.py:51] router release req id 8 +INFO 06-24 20:44:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:44:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:44:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:44:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:44:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:44:37 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:44:37 [manager.py:283] +DEBUG 06-24 20:44:37 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:44:37 [manager.py:284] +INFO 06-24 20:44:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:44:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:44:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:44:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:44:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:44:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:45:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:45:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:45:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:45:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:45:38 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:45:38 [manager.py:283] +DEBUG 06-24 20:45:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:45:38 [manager.py:284] +INFO 06-24 20:45:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:45:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:45:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:45:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:46:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:46:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:46:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:46:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:46:38 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:46:38 [manager.py:283] +DEBUG 06-24 20:46:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:46:38 [manager.py:284] +INFO 06-24 20:46:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:46:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:46:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:46:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:46:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:46:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:47:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:47:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:47:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:47:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:47:39 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:47:39 [manager.py:283] +DEBUG 06-24 20:47:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:47:39 [manager.py:284] +INFO 06-24 20:47:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:47:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:47:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:47:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:47:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:47:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:48:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:48:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:48:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:48:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:48:40 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:48:40 [manager.py:283] +DEBUG 06-24 20:48:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:48:40 [manager.py:284] +INFO 06-24 20:48:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:48:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:48:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:48:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:48:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:48:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:49:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:49:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:49:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:49:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:49:40 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:49:40 [manager.py:283] +DEBUG 06-24 20:49:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:49:40 [manager.py:284] +INFO 06-24 20:49:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:49:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:49:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:49:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:50:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:50:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:50:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:50:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:50:41 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:50:41 [manager.py:283] +DEBUG 06-24 20:50:41 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:50:41 [manager.py:284] +INFO 06-24 20:50:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:50:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:50:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:50:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:50:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:50:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:51:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:51:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:51:42 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:51:42 [manager.py:283] +DEBUG 06-24 20:51:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:51:42 [manager.py:284] +INFO 06-24 20:51:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:51:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:51:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:51:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:51:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:51:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:52:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:52:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:52:42 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:52:42 [manager.py:283] +DEBUG 06-24 20:52:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:52:42 [manager.py:284] +INFO 06-24 20:52:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:52:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:52:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:52:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:52:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:52:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:53:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:53:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:53:43 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:53:43 [manager.py:283] +DEBUG 06-24 20:53:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:53:43 [manager.py:284] +INFO 06-24 20:53:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:53:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:53:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:53:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:53:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:53:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:54:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:54:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:54:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:54:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:54:44 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:54:44 [manager.py:283] +DEBUG 06-24 20:54:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:54:44 [manager.py:284] +INFO 06-24 20:54:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:54:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:54:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:54:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:54:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:54:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:55:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:55:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:55:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:55:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:55:44 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:55:44 [manager.py:283] +DEBUG 06-24 20:55:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:55:44 [manager.py:284] +INFO 06-24 20:55:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:55:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:55:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:55:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:55:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:55:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:56:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:56:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:56:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:56:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:56:45 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:56:45 [manager.py:283] +DEBUG 06-24 20:56:45 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:56:45 [manager.py:284] +INFO 06-24 20:56:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:56:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:56:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:56:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:56:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:56:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:57:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:57:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:57:46 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:57:46 [manager.py:283] +DEBUG 06-24 20:57:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:57:46 [manager.py:284] +INFO 06-24 20:57:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:57:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:57:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:57:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:57:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:57:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:58:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:58:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:58:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:58:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 20:58:46 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:58:46 [manager.py:283] +DEBUG 06-24 20:58:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:58:46 [manager.py:284] +INFO 06-24 20:58:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:58:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:58:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:58:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:58:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:58:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:59:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:59:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 20:59:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 20:59:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:59:47 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:59:47 [manager.py:283] +DEBUG 06-24 20:59:47 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:59:47 [manager.py:284] +INFO 06-24 20:59:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:59:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 20:59:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:00:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:00:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:00:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:00:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:00:48 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:00:48 [manager.py:283] +DEBUG 06-24 21:00:48 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:00:48 [manager.py:284] +INFO 06-24 21:00:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:00:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:00:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:00:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:00:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:01:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:01:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:01:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:01:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:01:49 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:01:49 [manager.py:283] +DEBUG 06-24 21:01:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:01:49 [manager.py:284] +INFO 06-24 21:01:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:01:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:01:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:01:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:01:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:02:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:02:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:02:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:02:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:02:49 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:02:49 [manager.py:283] +DEBUG 06-24 21:02:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:02:49 [manager.py:284] +INFO 06-24 21:02:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:02:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:02:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:03:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:03:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:03:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:03:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:03:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:03:50 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:03:50 [manager.py:283] +DEBUG 06-24 21:03:50 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:03:50 [manager.py:284] +INFO 06-24 21:03:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:03:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:03:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:03:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:03:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:04:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:04:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:04:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:04:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:04:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:04:51 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:04:51 [manager.py:283] +DEBUG 06-24 21:04:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:04:51 [manager.py:284] +INFO 06-24 21:04:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:04:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:04:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:04:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:04:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:05:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:05:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:05:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:05:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:05:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:05:51 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:05:51 [manager.py:283] +DEBUG 06-24 21:05:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:05:51 [manager.py:284] +INFO 06-24 21:05:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:05:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:05:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:05:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:05:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:06:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:06:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:06:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:06:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:06:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:06:52 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:06:52 [manager.py:283] +DEBUG 06-24 21:06:52 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:06:52 [manager.py:284] +INFO 06-24 21:06:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:06:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:06:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:06:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:06:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:07:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:07:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:07:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:07:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:07:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:07:53 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:07:53 [manager.py:283] +DEBUG 06-24 21:07:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:07:53 [manager.py:284] +INFO 06-24 21:07:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:07:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:07:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:07:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:07:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:08:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:08:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:08:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:08:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:08:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:08:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:08:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +DEBUG 06-24 21:08:53 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:08:53 [manager.py:283] +DEBUG 06-24 21:08:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:08:53 [manager.py:284] +INFO 06-24 21:08:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:08:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:09:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:09:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:09:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:09:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:09:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:09:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:09:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:09:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:09:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:09:54 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:09:54 [manager.py:283] +DEBUG 06-24 21:09:54 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:09:54 [manager.py:284] +INFO 06-24 21:09:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:10:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:10:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:10:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:10:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:10:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:10:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:10:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:10:55 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:10:55 [manager.py:283] +DEBUG 06-24 21:10:55 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:10:55 [manager.py:284] +INFO 06-24 21:10:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:11:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:11:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:11:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:11:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:11:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:11:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:11:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:11:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:11:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:11:55 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:11:55 [manager.py:283] +DEBUG 06-24 21:11:55 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:11:55 [manager.py:284] +INFO 06-24 21:11:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:12:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:12:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:12:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:12:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:12:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:12:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:12:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:12:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +DEBUG 06-24 21:12:56 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:12:56 [manager.py:283] +DEBUG 06-24 21:12:56 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:12:56 [manager.py:284] +INFO 06-24 21:12:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:13:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:13:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:13:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:13:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:13:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:13:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:13:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:13:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:13:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:13:57 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:13:57 [manager.py:283] +DEBUG 06-24 21:13:57 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:13:57 [manager.py:284] +INFO 06-24 21:13:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:14:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:14:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:14:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:14:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:14:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:14:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:14:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:14:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:14:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:14:57 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:14:57 [manager.py:283] +DEBUG 06-24 21:14:57 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:14:57 [manager.py:284] +INFO 06-24 21:14:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:15:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:15:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:15:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:15:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:15:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:15:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:15:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:15:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:15:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:15:58 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:15:58 [manager.py:283] +DEBUG 06-24 21:15:58 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:15:58 [manager.py:284] +INFO 06-24 21:15:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:16:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:16:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:16:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:16:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:16:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:16:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:16:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:16:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:16:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:16:59 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:16:59 [manager.py:283] +DEBUG 06-24 21:16:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:16:59 [manager.py:284] +INFO 06-24 21:17:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:17:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:17:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:17:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:17:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:17:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:17:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:17:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:17:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:17:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:17:59 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:17:59 [manager.py:283] +DEBUG 06-24 21:17:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:17:59 [manager.py:284] +INFO 06-24 21:18:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:18:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:18:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:18:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:18:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:18:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:18:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:18:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:19:00 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:19:00 [manager.py:283] +DEBUG 06-24 21:19:00 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:19:00 [manager.py:284] +INFO 06-24 21:19:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:19:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:19:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:19:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:19:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:19:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:19:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:19:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:19:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:19:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:20:01 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:20:01 [manager.py:283] +DEBUG 06-24 21:20:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:20:01 [manager.py:284] +INFO 06-24 21:20:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:20:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:20:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:20:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:20:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:20:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:20:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:20:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:20:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:20:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:21:01 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:21:01 [manager.py:283] +DEBUG 06-24 21:21:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:21:01 [manager.py:284] +INFO 06-24 21:21:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:21:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:21:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:21:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:21:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:21:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:21:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:21:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:21:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:21:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:22:02 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:22:02 [manager.py:283] +DEBUG 06-24 21:22:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:22:02 [manager.py:284] +INFO 06-24 21:22:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:22:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:22:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:22:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:22:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:22:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:22:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:22:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:22:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:22:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:23:03 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:23:03 [manager.py:283] +DEBUG 06-24 21:23:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:23:03 [manager.py:284] +INFO 06-24 21:23:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:23:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:23:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:23:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:23:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:23:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:23:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:23:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:23:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:23:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:24:03 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:24:03 [manager.py:283] +DEBUG 06-24 21:24:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:24:03 [manager.py:284] +INFO 06-24 21:24:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:24:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:24:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:24:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:24:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:24:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:24:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:24:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:24:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:24:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:25:04 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:25:04 [manager.py:283] +DEBUG 06-24 21:25:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:25:04 [manager.py:284] +INFO 06-24 21:25:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:25:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:25:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:25:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:25:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:25:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:25:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:25:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:25:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:25:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:26:05 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:26:05 [manager.py:283] +DEBUG 06-24 21:26:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:26:05 [manager.py:284] +INFO 06-24 21:26:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:26:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:26:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:26:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:26:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:26:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:26:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:26:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:26:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:26:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:27:05 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:27:05 [manager.py:283] +DEBUG 06-24 21:27:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:27:05 [manager.py:284] +INFO 06-24 21:27:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:27:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:27:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:27:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:27:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:27:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:27:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:27:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:27:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:27:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:28:06 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:28:06 [manager.py:283] +DEBUG 06-24 21:28:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:28:06 [manager.py:284] +INFO 06-24 21:28:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:28:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:28:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:28:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:28:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:28:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:28:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:28:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:29:07 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:29:07 [manager.py:283] +DEBUG 06-24 21:29:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:29:07 [manager.py:284] +INFO 06-24 21:29:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:29:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:29:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:29:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:29:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:29:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:29:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:29:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:29:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:29:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:30:07 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:30:07 [manager.py:283] +DEBUG 06-24 21:30:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:30:07 [manager.py:284] +INFO 06-24 21:30:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:30:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:30:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:30:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:30:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:30:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:30:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:30:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:30:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:30:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:31:08 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:31:08 [manager.py:283] +DEBUG 06-24 21:31:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:31:08 [manager.py:284] +INFO 06-24 21:31:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:31:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:31:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:31:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:31:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:31:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:31:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:31:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:32:09 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:32:09 [manager.py:283] +DEBUG 06-24 21:32:09 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:32:09 [manager.py:284] +INFO 06-24 21:32:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:32:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:32:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:32:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:32:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:32:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:32:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:32:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:32:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:32:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:33:10 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:33:10 [manager.py:283] +DEBUG 06-24 21:33:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:33:10 [manager.py:284] +INFO 06-24 21:33:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:33:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:33:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:33:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:33:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:33:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:33:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:33:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:33:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:33:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:34:10 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:34:10 [manager.py:283] +DEBUG 06-24 21:34:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:34:10 [manager.py:284] +INFO 06-24 21:34:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:34:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:34:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:34:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:34:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:34:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:34:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:34:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:34:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:34:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:35:11 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:35:11 [manager.py:283] +DEBUG 06-24 21:35:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:35:11 [manager.py:284] +INFO 06-24 21:35:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:35:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:35:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:35:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:35:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:35:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:35:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:35:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:36:12 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:36:12 [manager.py:283] +DEBUG 06-24 21:36:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:36:12 [manager.py:284] +INFO 06-24 21:36:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:36:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:36:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:36:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:36:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:36:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:36:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:36:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:36:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:36:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:37:12 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:37:12 [manager.py:283] +DEBUG 06-24 21:37:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:37:12 [manager.py:284] +INFO 06-24 21:37:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:37:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:37:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:37:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:37:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:37:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:37:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:37:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:37:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:37:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:38:13 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:38:13 [manager.py:283] +DEBUG 06-24 21:38:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:38:13 [manager.py:284] +INFO 06-24 21:38:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:38:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:38:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:38:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:38:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:38:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:38:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:38:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:38:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:38:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:39:14 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:39:14 [manager.py:283] +DEBUG 06-24 21:39:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:39:14 [manager.py:284] +INFO 06-24 21:39:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:39:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:39:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:39:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:39:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:39:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:39:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:39:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:39:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:39:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:40:14 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:40:14 [manager.py:283] +DEBUG 06-24 21:40:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:40:14 [manager.py:284] +INFO 06-24 21:40:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:40:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:40:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:40:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:40:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:40:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:40:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:40:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:41:15 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:41:15 [manager.py:283] +DEBUG 06-24 21:41:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:41:15 [manager.py:284] +INFO 06-24 21:41:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:41:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:41:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:41:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:41:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:41:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:41:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:41:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:42:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:42:16 [manager.py:283] +DEBUG 06-24 21:42:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:42:16 [manager.py:284] +INFO 06-24 21:42:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:42:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:42:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:42:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:42:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:42:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:42:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:42:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:43:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:43:16 [manager.py:283] +DEBUG 06-24 21:43:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:43:16 [manager.py:284] +INFO 06-24 21:43:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:43:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:43:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:43:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:43:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:43:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:43:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:43:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:43:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:43:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:44:17 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:44:17 [manager.py:283] +DEBUG 06-24 21:44:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:44:17 [manager.py:284] +INFO 06-24 21:44:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:44:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:44:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:44:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:44:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:44:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:44:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:44:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:45:17 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:45:17 [manager.py:283] +DEBUG 06-24 21:45:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:45:17 [manager.py:284] +INFO 06-24 21:45:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:45:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:45:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:45:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:45:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:45:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:45:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:45:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:45:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:45:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:46:18 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:46:18 [manager.py:283] +DEBUG 06-24 21:46:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:46:18 [manager.py:284] +INFO 06-24 21:46:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:46:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:46:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:46:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:46:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:46:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:46:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:46:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:46:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:47:18 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:47:18 [manager.py:283] +DEBUG 06-24 21:47:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:47:18 [manager.py:284] +INFO 06-24 21:47:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:47:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:47:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:47:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:39 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:46 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:47:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:47:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:47:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:47:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:47:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:48:19 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:48:19 [manager.py:283] +DEBUG 06-24 21:48:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:48:19 [manager.py:284] +INFO 06-24 21:48:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:48:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:48:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:48:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:48:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:53 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:48:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:48:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:48:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:48:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:00 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:07 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:14 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:49:20 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:49:20 [manager.py:283] +DEBUG 06-24 21:49:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:49:20 [manager.py:284] +INFO 06-24 21:49:21 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:49:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:49:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:49:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:28 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:35 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:42 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:49:49 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:49:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:49:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:49:56 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:49:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:03 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:10 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:17 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:50:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:50:20 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:50:20 [manager.py:283] +DEBUG 06-24 21:50:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:50:20 [manager.py:284] +INFO 06-24 21:50:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:50:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:50:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:50:24 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:31 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:38 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:45 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:50:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:52 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:50:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:50:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:50:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:50:59 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:06 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:13 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:51:20 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:51:21 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:51:21 [manager.py:283] +DEBUG 06-24 21:51:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:51:21 [manager.py:284] +INFO 06-24 21:51:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:51:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:51:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:51:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:27 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:34 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:41 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:51:48 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:51:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:51:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:51:55 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:51:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:02 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:09 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:16 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:52:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:52:21 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:52:21 [manager.py:283] +DEBUG 06-24 21:52:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:52:21 [manager.py:284] +INFO 06-24 21:52:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:52:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:52:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:52:23 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:30 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:37 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:44 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:52:51 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:52:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:52:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:52:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:52:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:53:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:53:22 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:53:22 [manager.py:283] +DEBUG 06-24 21:53:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:53:22 [manager.py:284] +INFO 06-24 21:53:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:53:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:53:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:53:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:53:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:53:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:53:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:53:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:53:58 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:05 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:12 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:54:19 [decode_infer_rpyc.py:178] kv time out reqs: [] +ERROR 06-24 21:54:19 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:19 [pd_loop.py:121] no close frame received or sent +ERROR 06-24 21:54:19 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task +ERROR 06-24 21:54:19 [pd_loop.py:121] recv_bytes = await websocket.recv() +ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv +ERROR 06-24 21:54:19 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc +ERROR 06-24 21:54:19 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent +INFO 06-24 21:54:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +DEBUG 06-24 21:54:23 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:54:23 [manager.py:283] +DEBUG 06-24 21:54:23 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:54:23 [manager.py:284] +INFO 06-24 21:54:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:54:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:54:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:54:26 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:29 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:29 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:29 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:29 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:29 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:29 [pd_loop.py:121] return await self +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:29 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:29 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:29 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:54:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:33 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:39 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:39 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:39 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:39 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:39 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:39 [pd_loop.py:121] return await self +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:39 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:39 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:39 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:54:40 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:43 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:47 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:54:49 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:49 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:49 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:49 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:49 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:49 [pd_loop.py:121] return await self +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:49 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:49 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:49 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:54:50 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:54:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:54:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +INFO 06-24 21:54:54 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:57 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:54:59 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:59 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:59 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:59 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:59 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:59 [pd_loop.py:121] return await self +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:59 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:59 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:59 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:55:01 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:04 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:08 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:09 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:55:09 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:55:09 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:55:09 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:55:09 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:55:09 [pd_loop.py:121] return await self +ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:55:09 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:55:09 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:55:09 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:55:09 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:55:09 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:55:09 [decode_kv_move_manager.py:206] connect id f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df disconnect +ERROR 06-24 21:55:09 [decode_trans_obj.py:180] put_to_radix_loop thread quit, info: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 +ERROR 06-24 21:55:09 [decode_trans_obj.py:136] kv_move_loop thread quit +ERROR 06-24 21:55:09 [decode_trans_obj.py:226] trans obj del start, info: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 +ERROR 06-24 21:55:09 [decode_trans_obj.py:249] trans obj deled, info: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 +INFO 06-24 21:55:09 [decode_trans_process.py:123] destory PDTransLeaveInfo(decode_id=147275795944234129756100418482494441380, prefill_id=163479035537597727162519172725806046247, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') nccl communicator. +INFO 06-24 21:55:11 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:15 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:55:18 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:19 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:55:19 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:55:19 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:55:19 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:55:19 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:55:19 [pd_loop.py:121] return await self +ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:55:19 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:55:19 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:55:19 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:55:19 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:55:19 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:55:22 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:55:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms +INFO 06-24 21:55:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms +DEBUG 06-24 21:55:23 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:55:23 [manager.py:283] +DEBUG 06-24 21:55:23 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:55:23 [manager.py:284] +INFO 06-24 21:55:25 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:29 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:29 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:55:29 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:55:29 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:55:29 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:55:29 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:55:29 [pd_loop.py:121] return await self +ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:55:29 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:55:29 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:55:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:55:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:55:29 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:55:32 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:36 [decode_infer_rpyc.py:178] kv time out reqs: [] +INFO 06-24 21:55:38 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... + +ERROR 06-24 21:55:38 [decode_kv_move_manager.py:301] +Traceback (most recent call last): + File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/decode_node_impl/decode_kv_move_manager.py", line 299, in timer_loop + time.sleep(3.5) +KeyboardInterrupt +INFO 06-24 21:55:38 [start_utils.py:106] Killing child process 1214345 +INFO 06-24 21:55:38 [start_utils.py:106] Killing child process 1214699 +INFO 06-24 21:55:38 [start_utils.py:108] Killing parent process 1214339 +INFO 06-24 21:55:38 [start_utils.py:53] Killing parent process 1212075 +INFO 06-24 21:55:38 [start_utils.py:51] Killing child process 1213381 +INFO 06-24 21:55:38 [start_utils.py:51] Killing child process 1213863 +INFO 06-24 21:55:38 [start_utils.py:51] Killing child process 1213865 +INFO 06-24 21:55:38 [start_utils.py:53] Killing parent process 1212190 +INFO 06-24 21:55:39 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 21:55:39 [start_utils.py:108] Killing parent process 1214339 +INFO 06-24 21:55:39 [start_utils.py:53] Killing parent process 1212190 +INFO 06-24 21:55:39 [start_utils.py:53] Killing parent process 1212191 +INFO 06-24 21:55:39 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 21:55:39 [start_utils.py:108] Killing parent process 1214339 +WARNING 06-24 21:55:39 [start_utils.py:56] Process 1212191 does not exist. +INFO 06-24 21:55:39 [start_utils.py:69] All processes terminated gracefully. +INFO 06-24 21:55:39 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_master.log b/pd_master.log new file mode 100644 index 000000000..0dd78a2f7 --- /dev/null +++ b/pd_master.log @@ -0,0 +1,4529 @@ +INFO 06-24 21:55:19 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:55:20 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:55:21 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:55:23 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:55:23 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:55:23 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +[api_server] pd_master_start +[pd_master_start] pd_chunk_size: 0 +INFO 06-24 21:55:23 [api_start.py:344] use tgi api: False +INFO 06-24 21:55:23 [api_start.py:345] all start args:Namespace(run_mode='pd_master', host='127.0.1.1', port=60011, httpserver_workers=1, zmq_mode='ipc:///tmp/', pd_master_ip='0.0.0.0', pd_master_port=1212, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=None, mem_fraction=0.9, batch_max_tokens=None, eos_id=None, tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=28765, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type=None, return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=None, visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=0, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, pd_node_id=0) +INFO 06-24 21:55:25 [start_utils.py:37] init func start_metric_manager : init ok +INFO 06-24 21:55:25 [api_start.py:57] start process pid 1410832 +INFO 06-24 21:55:25 [api_start.py:58] http server pid 1411519 +INFO 06-24 21:55:28 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:55:29 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:55:30 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:55:32 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:55:32 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:55:32 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:55:32 [api_http.py:326] server start up +INFO 06-24 21:55:32 [atomic_lock.py:29] link lock shm 60011_pd_master_req_id_gen_lock +INFO 06-24 21:55:33 [api_http.py:330] server start up ok, loop use is +INFO 06-24 21:56:03 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 21:56:03 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 21:56:28 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 33900 +INFO 06-24 21:56:28 [api_http.py:268] recieved regist_json {'node_id': 287595743282619216970276961428881885738, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10216, 'detokenization_port': 10253, 'detokenization_pub_port': 10154, 'visual_port': 10095, 'audio_port': 10138, 'cache_port': 10064, 'metric_port': 10217, 'pd_node_infer_rpyc_ports': [10133], 'pd_node_id': 287595743282619216970276961428881885738, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 21:56:28 [manager.py:67] mode: prefill url: 127.0.1.1:8017 registed +INFO 06-24 21:56:36 [api_http.py:291] kv_move_status Client connected from IP: 127.0.0.1, Port: 39458 +INFO 06-24 21:56:44 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 48452 +INFO 06-24 21:56:44 [api_http.py:268] recieved regist_json {'node_id': 148730891575017957868136796871489876076, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10011, 'detokenization_port': 10239, 'detokenization_pub_port': 10144, 'visual_port': 10176, 'audio_port': 10271, 'cache_port': 10117, 'metric_port': 10125, 'pd_node_infer_rpyc_ports': [10126], 'pd_node_id': 148730891575017957868136796871489876076, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 21:56:44 [manager.py:67] mode: decode url: 127.0.1.1:8118 registed +INFO 06-24 21:57:03 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 21:57:03 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:8 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:16 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:24 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:32 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:40 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:48 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:56 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:64 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:72 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:80 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:88 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:96 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:104 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:112 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:120 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:128 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:136 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:144 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:152 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:160 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:168 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:176 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:184 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:192 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:200 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:208 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:216 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:224 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:232 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:240 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:248 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:256 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:264 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:272 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:280 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:288 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:296 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:304 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:312 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:320 +INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:328 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:336 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:344 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:352 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:360 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:368 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:376 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:384 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:392 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:400 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:408 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:416 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:424 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:432 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:440 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:448 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:456 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:464 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:472 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:480 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:488 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:496 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:504 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:512 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:520 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:528 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:536 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:544 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:552 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:560 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:568 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:576 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:584 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:592 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:600 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:608 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:616 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:624 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:632 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:640 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:648 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:656 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:664 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:672 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:680 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:688 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:696 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:704 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:712 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:720 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:728 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:736 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:744 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:752 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:760 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:768 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:776 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:784 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:792 +INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:800 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14560, pd_chunk_size: 0 +INFO 06-24 21:57:33 [statics_utils.py:24] mean first cost: 9286.72189950943 ms +INFO 06-24 21:57:33 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 21:58:03 [statics_utils.py:24] mean first cost: 9286.72189950943 ms +INFO 06-24 21:58:03 [statics_utils.py:24] mean per token cost: 0.0 ms +WARNING 06-24 21:58:17 [manager.py:236] group_request_id: 8 kv move time out err, server is busy now. +ERROR 06-24 21:58:17 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:17 [manager.py:349] aborted group_request_id 8 +ERROR 06-24 21:58:17 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:17 [api_http.py:183] await fut +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:17 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:17 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:17 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:17 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:17 [api_http.py:183] raise e +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:17 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:17 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46184 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:17 [manager.py:236] group_request_id: 24 kv move time out err, server is busy now. +ERROR 06-24 21:58:17 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:17 [manager.py:349] aborted group_request_id 24 +ERROR 06-24 21:58:17 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:17 [api_http.py:183] await fut +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:17 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:17 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:17 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:17 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:17 [api_http.py:183] raise e +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:17 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:17 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46214 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:17 [manager.py:236] group_request_id: 32 kv move time out err, server is busy now. +ERROR 06-24 21:58:17 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:17 [manager.py:349] aborted group_request_id 32 +ERROR 06-24 21:58:17 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:17 [api_http.py:183] await fut +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:17 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:17 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:17 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:17 [api_http.py:183] +ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:17 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:17 [api_http.py:183] raise e +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:17 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:17 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46216 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:18 [manager.py:236] group_request_id: 40 kv move time out err, server is busy now. +ERROR 06-24 21:58:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:18 [manager.py:349] aborted group_request_id 40 +ERROR 06-24 21:58:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:18 [api_http.py:183] await fut +ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:18 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:18 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:18 [api_http.py:183] raise e +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:18 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46218 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:18 [manager.py:236] group_request_id: 48 kv move time out err, server is busy now. +ERROR 06-24 21:58:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:18 [manager.py:349] aborted group_request_id 48 +ERROR 06-24 21:58:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:18 [api_http.py:183] await fut +ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:18 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:18 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:18 [api_http.py:183] +ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:18 [api_http.py:183] raise e +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:18 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46234 - "POST /generate HTTP/1.1" 503 +INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:808 +INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:816 +INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:824 +INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:832 +INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:840 +WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 56 kv move time out err, server is busy now. +ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 56 +ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:19 [api_http.py:183] await fut +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:19 [api_http.py:183] raise e +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46238 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 64 kv move time out err, server is busy now. +ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 64 +ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:19 [api_http.py:183] await fut +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:19 [api_http.py:183] raise e +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46252 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 72 kv move time out err, server is busy now. +ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 72 +ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:19 [api_http.py:183] await fut +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:19 [api_http.py:183] raise e +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46254 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 80 kv move time out err, server is busy now. +ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 80 +ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:19 [api_http.py:183] await fut +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:19 [api_http.py:183] raise e +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46262 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 88 kv move time out err, server is busy now. +ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 88 +ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:19 [api_http.py:183] await fut +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:19 [api_http.py:183] raise e +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46272 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 96 kv move time out err, server is busy now. +ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 96 +ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:19 [api_http.py:183] await fut +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:19 [api_http.py:183] raise e +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46282 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 104 kv move time out err, server is busy now. +ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 104 +ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:19 [api_http.py:183] await fut +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:19 [api_http.py:183] +ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:19 [api_http.py:183] raise e +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46294 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 112 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 112 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46300 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 120 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 120 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46310 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 128 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 128 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46324 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 136 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 136 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46340 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 144 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 144 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46348 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 152 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 152 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46362 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 160 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 160 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46376 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 168 kv move time out err, server is busy now. +ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 168 +ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:20 [api_http.py:183] await fut +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:20 [api_http.py:183] +ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:20 [api_http.py:183] raise e +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46386 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 176 kv move time out err, server is busy now. +ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 176 +ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:21 [api_http.py:183] await fut +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:21 [api_http.py:183] raise e +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46402 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 184 kv move time out err, server is busy now. +ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 184 +ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:21 [api_http.py:183] await fut +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:21 [api_http.py:183] raise e +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46414 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 192 kv move time out err, server is busy now. +ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 192 +ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:21 [api_http.py:183] await fut +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:21 [api_http.py:183] raise e +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46430 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 200 kv move time out err, server is busy now. +ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 200 +ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:21 [api_http.py:183] await fut +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:21 [api_http.py:183] raise e +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46436 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 208 kv move time out err, server is busy now. +ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 208 +ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:21 [api_http.py:183] await fut +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:21 [api_http.py:183] raise e +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46448 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 216 kv move time out err, server is busy now. +ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 216 +ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:21 [api_http.py:183] await fut +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:21 [api_http.py:183] raise e +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46456 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 224 kv move time out err, server is busy now. +ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 224 +ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:21 [api_http.py:183] await fut +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:21 [api_http.py:183] +ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:21 [api_http.py:183] raise e +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46472 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 232 kv move time out err, server is busy now. +ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 232 +ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:22 [api_http.py:183] await fut +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:22 [api_http.py:183] raise e +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46482 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 240 kv move time out err, server is busy now. +ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 240 +ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:22 [api_http.py:183] await fut +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:22 [api_http.py:183] raise e +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46494 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 248 kv move time out err, server is busy now. +ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 248 +ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:22 [api_http.py:183] await fut +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:22 [api_http.py:183] raise e +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46500 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 256 kv move time out err, server is busy now. +ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 256 +ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:22 [api_http.py:183] await fut +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:22 [api_http.py:183] raise e +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46506 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 264 kv move time out err, server is busy now. +ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 264 +ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:22 [api_http.py:183] await fut +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:22 [api_http.py:183] raise e +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46520 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 272 kv move time out err, server is busy now. +ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 272 +ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:22 [api_http.py:183] await fut +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:22 [api_http.py:183] raise e +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46532 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 280 kv move time out err, server is busy now. +ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 280 +ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:22 [api_http.py:183] await fut +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:22 [api_http.py:183] +ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:22 [api_http.py:183] raise e +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46542 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 288 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 288 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46560 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 296 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 296 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46558 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 304 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 304 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46570 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 312 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 312 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46586 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 320 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 320 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46594 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 328 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 328 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46608 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 336 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 336 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46612 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 344 kv move time out err, server is busy now. +ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 344 +ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:23 [api_http.py:183] await fut +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:23 [api_http.py:183] +ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:23 [api_http.py:183] raise e +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46620 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 352 kv move time out err, server is busy now. +ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 352 +ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:24 [api_http.py:183] await fut +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:24 [api_http.py:183] raise e +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46622 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 360 kv move time out err, server is busy now. +ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 360 +ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:24 [api_http.py:183] await fut +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:24 [api_http.py:183] raise e +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46636 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 368 kv move time out err, server is busy now. +ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 368 +ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:24 [api_http.py:183] await fut +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:24 [api_http.py:183] raise e +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46638 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 376 kv move time out err, server is busy now. +ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 376 +ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:24 [api_http.py:183] await fut +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:24 [api_http.py:183] raise e +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46652 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 384 kv move time out err, server is busy now. +ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 384 +ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:24 [api_http.py:183] await fut +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:24 [api_http.py:183] raise e +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46660 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 392 kv move time out err, server is busy now. +ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 392 +ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:24 [api_http.py:183] await fut +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:24 [api_http.py:183] raise e +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46670 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 400 kv move time out err, server is busy now. +ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 400 +ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:24 [api_http.py:183] await fut +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:24 [api_http.py:183] +ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:24 [api_http.py:183] raise e +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46678 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 408 kv move time out err, server is busy now. +ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 408 +ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:25 [api_http.py:183] await fut +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:25 [api_http.py:183] raise e +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46690 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 416 kv move time out err, server is busy now. +ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 416 +ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:25 [api_http.py:183] await fut +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:25 [api_http.py:183] raise e +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46706 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 424 kv move time out err, server is busy now. +ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 424 +ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:25 [api_http.py:183] await fut +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:25 [api_http.py:183] raise e +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46718 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 432 kv move time out err, server is busy now. +ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 432 +ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:25 [api_http.py:183] await fut +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:25 [api_http.py:183] raise e +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46734 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 440 kv move time out err, server is busy now. +ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 440 +ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:25 [api_http.py:183] await fut +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:25 [api_http.py:183] raise e +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46748 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 448 kv move time out err, server is busy now. +ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 448 +ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:25 [api_http.py:183] await fut +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:25 [api_http.py:183] raise e +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46762 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 456 kv move time out err, server is busy now. +ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 456 +ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:25 [api_http.py:183] await fut +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:25 [api_http.py:183] +ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:25 [api_http.py:183] raise e +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46768 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 464 kv move time out err, server is busy now. +ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 464 +ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:26 [api_http.py:183] await fut +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:26 [api_http.py:183] raise e +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46780 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 472 kv move time out err, server is busy now. +ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 472 +ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:26 [api_http.py:183] await fut +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:26 [api_http.py:183] raise e +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46792 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 480 kv move time out err, server is busy now. +ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 480 +ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:26 [api_http.py:183] await fut +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:26 [api_http.py:183] raise e +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46802 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 488 kv move time out err, server is busy now. +ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 488 +ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:26 [api_http.py:183] await fut +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:26 [api_http.py:183] raise e +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46814 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 496 kv move time out err, server is busy now. +ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 496 +ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:26 [api_http.py:183] await fut +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:26 [api_http.py:183] raise e +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46830 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 504 kv move time out err, server is busy now. +ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 504 +ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:26 [api_http.py:183] await fut +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:26 [api_http.py:183] raise e +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46846 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 512 kv move time out err, server is busy now. +ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 512 +ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:26 [api_http.py:183] await fut +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:26 [api_http.py:183] +ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:26 [api_http.py:183] raise e +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46856 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:27 [manager.py:236] group_request_id: 520 kv move time out err, server is busy now. +ERROR 06-24 21:58:27 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:27 [manager.py:349] aborted group_request_id 520 +ERROR 06-24 21:58:27 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:27 [api_http.py:183] await fut +ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:27 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:27 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:27 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:27 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:27 [api_http.py:183] raise e +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:27 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:27 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46858 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:27 [manager.py:236] group_request_id: 528 kv move time out err, server is busy now. +ERROR 06-24 21:58:27 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:27 [manager.py:349] aborted group_request_id 528 +ERROR 06-24 21:58:27 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:27 [api_http.py:183] await fut +ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:27 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:27 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:27 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:27 [api_http.py:183] +ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:27 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:27 [api_http.py:183] raise e +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:27 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:27 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46862 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:27 [manager.py:236] group_request_id: 536 kv move time out err, server is busy now. +ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 536 +ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:28 [api_http.py:183] await fut +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:28 [api_http.py:183] raise e +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46870 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 544 kv move time out err, server is busy now. +ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 544 +ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:28 [api_http.py:183] await fut +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:28 [api_http.py:183] raise e +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46878 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 552 kv move time out err, server is busy now. +ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 552 +ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:28 [api_http.py:183] await fut +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:28 [api_http.py:183] raise e +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46880 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 560 kv move time out err, server is busy now. +ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 560 +ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:28 [api_http.py:183] await fut +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:28 [api_http.py:183] raise e +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46896 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 568 kv move time out err, server is busy now. +ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 568 +ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:28 [api_http.py:183] await fut +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:28 [api_http.py:183] raise e +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46900 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 576 kv move time out err, server is busy now. +ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 576 +ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:28 [api_http.py:183] await fut +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:28 [api_http.py:183] +ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:28 [api_http.py:183] raise e +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46916 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 584 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 584 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46918 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 592 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 592 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46926 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 600 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 600 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46928 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 608 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 608 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46932 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 616 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 616 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46938 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 624 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 624 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46948 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 632 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 632 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46956 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 640 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 640 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46962 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 648 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 648 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46974 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 656 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 656 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46976 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 664 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 664 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46986 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 672 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 672 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:46990 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 680 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 680 +ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:29 [api_http.py:183] await fut +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:29 [api_http.py:183] +ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:29 [api_http.py:183] raise e +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47002 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 688 kv move time out err, server is busy now. +ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:30 [manager.py:349] aborted group_request_id 688 +ERROR 06-24 21:58:30 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:30 [api_http.py:183] await fut +ERROR 06-24 21:58:30 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:30 [api_http.py:183] +ERROR 06-24 21:58:30 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:30 [api_http.py:183] +ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:30 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:30 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:30 [api_http.py:183] +ERROR 06-24 21:58:30 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:30 [api_http.py:183] +ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:30 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:30 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:30 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:30 [api_http.py:183] +ERROR 06-24 21:58:30 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:30 [api_http.py:183] +ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:30 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:30 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:30 [api_http.py:183] raise e +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:30 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:30 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:30 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:30 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47008 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 696 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 696 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47016 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 704 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 704 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47018 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 712 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 712 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47026 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 720 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 720 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47028 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 728 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 728 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47036 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 736 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 736 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47042 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 744 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 744 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47052 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 752 kv move time out err, server is busy now. +ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 752 +ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:31 [api_http.py:183] await fut +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:31 [api_http.py:183] +ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:31 [api_http.py:183] raise e +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47054 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 760 kv move time out err, server is busy now. +ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 760 +ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:32 [api_http.py:183] await fut +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:32 [api_http.py:183] raise e +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47056 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 768 kv move time out err, server is busy now. +ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 768 +ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:32 [api_http.py:183] await fut +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:32 [api_http.py:183] raise e +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47068 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 776 kv move time out err, server is busy now. +ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 776 +ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:32 [api_http.py:183] await fut +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:32 [api_http.py:183] raise e +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47078 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 784 kv move time out err, server is busy now. +ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 784 +ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:32 [api_http.py:183] await fut +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:32 [api_http.py:183] raise e +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47074 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 792 kv move time out err, server is busy now. +ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 792 +ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:32 [api_http.py:183] await fut +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:32 [api_http.py:183] raise e +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47086 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 800 kv move time out err, server is busy now. +ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 800 +ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:58:32 [api_http.py:183] await fut +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:58:32 [api_http.py:183] +ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:58:32 [api_http.py:183] raise e +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:47094 - "POST /generate HTTP/1.1" 503 +INFO 06-24 21:58:33 [statics_utils.py:24] mean first cost: 8877.26598013015 ms +INFO 06-24 21:58:33 [statics_utils.py:24] mean per token cost: 0.0 ms +WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 808 kv move time out err, server is busy now. +ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 808 +ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:59:18 [api_http.py:183] await fut +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:59:18 [api_http.py:183] raise e +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:51404 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 816 kv move time out err, server is busy now. +ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 816 +ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:59:18 [api_http.py:183] await fut +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:59:18 [api_http.py:183] raise e +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:51416 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 824 kv move time out err, server is busy now. +ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 824 +ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:59:18 [api_http.py:183] await fut +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:59:18 [api_http.py:183] raise e +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:51424 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 832 kv move time out err, server is busy now. +ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 832 +ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:59:18 [api_http.py:183] await fut +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:59:18 [api_http.py:183] raise e +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:51432 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 840 kv move time out err, server is busy now. +ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 840 +ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 21:59:18 [api_http.py:183] await fut +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 21:59:18 [api_http.py:183] +ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 21:59:18 [api_http.py:183] raise e +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:51436 - "POST /generate HTTP/1.1" 503 +INFO 06-24 21:59:33 [statics_utils.py:24] mean first cost: 8877.26598013015 ms +INFO 06-24 21:59:33 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 22:00:03 [statics_utils.py:24] mean first cost: 8877.26598013015 ms +INFO 06-24 22:00:03 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 22:00:33 [statics_utils.py:24] mean first cost: 8877.26598013015 ms +INFO 06-24 22:00:33 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 22:01:03 [statics_utils.py:24] mean first cost: 8877.26598013015 ms +INFO 06-24 22:01:03 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 22:01:39 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... + +INFO 06-24 22:01:39 [start_utils.py:106] Killing child process 1412175 +INFO 06-24 22:01:39 [start_utils.py:106] Killing child process 1412182 +INFO 06-24 22:01:39 [start_utils.py:108] Killing parent process 1411519 +INFO 06-24 22:01:39 [start_utils.py:53] Killing parent process 1411353 +INFO 06-24 22:01:39 [start_utils.py:69] All processes terminated gracefully. +INFO 06-24 22:01:39 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_master_4096.log b/pd_master_4096.log new file mode 100644 index 000000000..ea00b53d9 --- /dev/null +++ b/pd_master_4096.log @@ -0,0 +1,15896 @@ +INFO 06-24 19:53:44 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:53:45 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:53:46 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:53:48 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:53:48 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:53:48 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +[api_server] pd_master_start +[pd_master_start] pd_chunk_size: 4096 +INFO 06-24 19:53:48 [api_start.py:344] use tgi api: False +INFO 06-24 19:53:48 [api_start.py:345] all start args:Namespace(run_mode='pd_master', host='127.0.1.1', port=60011, httpserver_workers=1, zmq_mode='ipc:///tmp/', pd_master_ip='0.0.0.0', pd_master_port=1212, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=None, mem_fraction=0.9, batch_max_tokens=None, eos_id=None, tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=28765, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type=None, return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=None, visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=0, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=4096, pd_node_id=0) +INFO 06-24 19:53:50 [start_utils.py:37] init func start_metric_manager : init ok +INFO 06-24 19:53:50 [api_start.py:57] start process pid 1210636 +INFO 06-24 19:53:50 [api_start.py:58] http server pid 1211009 +INFO 06-24 19:53:53 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:53:54 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:53:55 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:53:57 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:53:57 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:53:57 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:53:57 [api_http.py:326] server start up +INFO 06-24 19:53:57 [atomic_lock.py:29] link lock shm 60011_pd_master_req_id_gen_lock +INFO 06-24 19:53:57 [api_http.py:330] server start up ok, loop use is +INFO 06-24 19:54:27 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:54:27 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:54:43 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 36276 +INFO 06-24 19:54:43 [api_http.py:268] recieved regist_json {'node_id': 163479035537597727162519172725806046247, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10173, 'detokenization_port': 10076, 'detokenization_pub_port': 10098, 'visual_port': 10080, 'audio_port': 10160, 'cache_port': 10233, 'metric_port': 10089, 'pd_node_infer_rpyc_ports': [10247], 'pd_node_id': 163479035537597727162519172725806046247, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 19:54:43 [manager.py:67] mode: prefill url: 127.0.1.1:8017 registed +INFO 06-24 19:54:46 [api_http.py:291] kv_move_status Client connected from IP: 127.0.0.1, Port: 36288 +INFO 06-24 19:54:53 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 41776 +INFO 06-24 19:54:53 [api_http.py:268] recieved regist_json {'node_id': 147275795944234129756100418482494441380, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10135, 'detokenization_port': 10143, 'detokenization_pub_port': 10207, 'visual_port': 10165, 'audio_port': 10059, 'cache_port': 10114, 'metric_port': 10051, 'pd_node_infer_rpyc_ports': [10236], 'pd_node_id': 147275795944234129756100418482494441380, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 19:54:53 [manager.py:67] mode: decode url: 127.0.1.1:8118 registed +INFO 06-24 19:54:57 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:54:57 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:55:27 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:55:27 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:55:57 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:55:57 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:56:57 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:56:57 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:57:57 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:57:57 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:58:27 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:58:27 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:58:57 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:58:57 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:59:27 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:59:27 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:16 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:24 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:32 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:40 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:48 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:56 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:64 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:72 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:80 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:88 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:96 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:104 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:112 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:136 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:144 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:152 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:160 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:168 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:184 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:192 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:200 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:208 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:216 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:224 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:232 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:248 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:272 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:288 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:296 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:312 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:328 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:336 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:344 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:352 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:360 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:368 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:376 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:384 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:392 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:408 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:416 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:424 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:432 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:440 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:448 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:464 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:472 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:480 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:488 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:496 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:504 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:512 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:520 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:536 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:544 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:552 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:560 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:568 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:576 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:584 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:592 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:600 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:608 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:616 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:624 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:632 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:640 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:648 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:656 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:672 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:680 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:688 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:696 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:704 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:712 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:720 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:728 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:736 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:744 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:760 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:768 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:776 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:784 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:792 +INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:800 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:43 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 first_token_cost:3264.9035453796387ms total_cost_time:3589.8795127868652ms,out_token_counter:2 mean_per_token_cost_time: 162.48798370361328ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40222 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:43 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:808 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:44 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 first_token_cost:4415.666103363037ms total_cost_time:4719.113349914551ms,out_token_counter:2 mean_per_token_cost_time: 151.72362327575684ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40270 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:45 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:816 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:46 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 first_token_cost:5572.38507270813ms total_cost_time:5953.195095062256ms,out_token_counter:2 mean_per_token_cost_time: 190.405011177063ms prompt_token_num:1064 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40344 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:46 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:824 +INFO 06-24 19:59:46 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 first_token_cost:5570.23286819458ms total_cost_time:5978.773593902588ms,out_token_counter:2 mean_per_token_cost_time: 204.2703628540039ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40366 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:46 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 +INFO 06-24 19:59:46 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 first_token_cost:5569.038391113281ms total_cost_time:6003.514051437378ms,out_token_counter:2 mean_per_token_cost_time: 217.23783016204834ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40380 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:46 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:840 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:47 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 first_token_cost:6644.745111465454ms total_cost_time:6978.367805480957ms,out_token_counter:2 mean_per_token_cost_time: 166.81134700775146ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40416 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:47 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:848 +INFO 06-24 19:59:47 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 first_token_cost:6642.567157745361ms total_cost_time:7003.440856933594ms,out_token_counter:2 mean_per_token_cost_time: 180.4368495941162ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40438 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:47 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:856 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:50 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 first_token_cost:9978.71732711792ms total_cost_time:10221.765756607056ms,out_token_counter:2 mean_per_token_cost_time: 121.52421474456787ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40610 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:50 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:864 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:51 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 first_token_cost:11109.405755996704ms total_cost_time:11378.748655319214ms,out_token_counter:2 mean_per_token_cost_time: 134.67144966125488ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40710 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:51 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:53 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 first_token_cost:13133.885145187378ms total_cost_time:13412.688255310059ms,out_token_counter:2 mean_per_token_cost_time: 139.40155506134033ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40866 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:53 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:880 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:55 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 first_token_cost:15290.080785751343ms total_cost_time:15553.339958190918ms,out_token_counter:2 mean_per_token_cost_time: 131.6295862197876ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40976 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:55 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:888 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:57 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 first_token_cost:10523.409843444824ms total_cost_time:10836.05980873108ms,out_token_counter:2 mean_per_token_cost_time: 156.32498264312744ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:48768 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:57 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:896 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 19:59:57 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 first_token_cost:5573.999404907227ms total_cost_time:5863.377809524536ms,out_token_counter:2 mean_per_token_cost_time: 144.68920230865479ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:48836 - "POST /generate HTTP/1.1" 200 +INFO 06-24 19:59:57 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:904 +INFO 06-24 19:59:57 [statics_utils.py:24] mean first cost: 8480.433994105884 ms +INFO 06-24 19:59:57 [statics_utils.py:24] mean per token cost: 161.66261526254507 ms +INFO 06-24 20:00:27 [statics_utils.py:24] mean first cost: 8409.328593616992 ms +INFO 06-24 20:00:27 [statics_utils.py:24] mean per token cost: 161.66261526254507 ms +INFO 06-24 20:00:32 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 first_token_cost:6648.284196853638ms total_cost_time:52575.37841796875ms,out_token_counter:1379 mean_per_token_cost_time: 33.30463685360051ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40402 - "POST /generate HTTP/1.1" 200 +INFO 06-24 20:00:32 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:32 lightllm_req_id:912 +WARNING 06-24 20:00:40 [manager.py:236] group_request_id: 16 kv move time out err, server is busy now. +ERROR 06-24 20:00:40 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:40 [manager.py:349] aborted group_request_id 16 +ERROR 06-24 20:00:40 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:40 [api_http.py:183] await fut +ERROR 06-24 20:00:40 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:40 [api_http.py:183] +ERROR 06-24 20:00:40 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:40 [api_http.py:183] +ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:40 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:40 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:40 [api_http.py:183] +ERROR 06-24 20:00:40 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:40 [api_http.py:183] +ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:40 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:40 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:40 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:40 [api_http.py:183] +ERROR 06-24 20:00:40 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:40 [api_http.py:183] +ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:40 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:40 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:40 [api_http.py:183] raise e +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:40 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:40 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:40 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:40 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40082 - "POST /generate HTTP/1.1" 503 +INFO 06-24 20:00:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:40 lightllm_req_id:920 +WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 24 kv move time out err, server is busy now. +ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 24 +ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:41 [api_http.py:183] await fut +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:41 [api_http.py:183] raise e +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40102 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 32 kv move time out err, server is busy now. +ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 32 +ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:41 [api_http.py:183] await fut +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:41 [api_http.py:183] raise e +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40118 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 40 kv move time out err, server is busy now. +ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 40 +ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:41 [api_http.py:183] await fut +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:41 [api_http.py:183] raise e +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40124 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 48 kv move time out err, server is busy now. +ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 48 +ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:41 [api_http.py:183] await fut +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:41 [api_http.py:183] raise e +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40136 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 56 kv move time out err, server is busy now. +ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 56 +ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:41 [api_http.py:183] await fut +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:41 [api_http.py:183] raise e +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40148 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 64 kv move time out err, server is busy now. +ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 64 +ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:41 [api_http.py:183] await fut +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:41 [api_http.py:183] raise e +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40156 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 72 kv move time out err, server is busy now. +ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 72 +ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:41 [api_http.py:183] await fut +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:41 [api_http.py:183] +ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:41 [api_http.py:183] raise e +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40160 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 80 kv move time out err, server is busy now. +ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 80 +ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:43 [api_http.py:183] await fut +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:43 [api_http.py:183] raise e +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40162 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 88 kv move time out err, server is busy now. +ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 88 +ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:43 [api_http.py:183] await fut +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:43 [api_http.py:183] raise e +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40178 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 96 kv move time out err, server is busy now. +ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 96 +ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:43 [api_http.py:183] await fut +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:43 [api_http.py:183] raise e +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40194 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 104 kv move time out err, server is busy now. +ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 104 +ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:43 [api_http.py:183] await fut +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:43 [api_http.py:183] raise e +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40204 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 112 kv move time out err, server is busy now. +ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 112 +ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:43 [api_http.py:183] await fut +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:43 [api_http.py:183] raise e +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40214 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 136 kv move time out err, server is busy now. +ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 136 +ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:43 [api_http.py:183] await fut +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:43 [api_http.py:183] +ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:43 [api_http.py:183] raise e +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40224 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 144 kv move time out err, server is busy now. +ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 144 +ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:44 [api_http.py:183] await fut +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:44 [api_http.py:183] raise e +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40240 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 152 kv move time out err, server is busy now. +ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 152 +ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:44 [api_http.py:183] await fut +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:44 [api_http.py:183] raise e +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40244 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 160 kv move time out err, server is busy now. +ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 160 +ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:44 [api_http.py:183] await fut +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:44 [api_http.py:183] raise e +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40258 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 168 kv move time out err, server is busy now. +ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 168 +ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:44 [api_http.py:183] await fut +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:44 [api_http.py:183] raise e +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40268 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 184 kv move time out err, server is busy now. +ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 184 +ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:44 [api_http.py:183] await fut +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:44 [api_http.py:183] raise e +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40282 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 192 kv move time out err, server is busy now. +ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 192 +ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:44 [api_http.py:183] await fut +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:44 [api_http.py:183] raise e +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40298 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 200 kv move time out err, server is busy now. +ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 200 +ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:44 [api_http.py:183] await fut +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:44 [api_http.py:183] +ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:44 [api_http.py:183] raise e +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40308 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 208 kv move time out err, server is busy now. +ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 208 +ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:45 [api_http.py:183] await fut +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:45 [api_http.py:183] raise e +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40312 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 216 kv move time out err, server is busy now. +ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 216 +ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:45 [api_http.py:183] await fut +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:45 [api_http.py:183] raise e +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40336 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 224 kv move time out err, server is busy now. +ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 224 +ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:45 [api_http.py:183] await fut +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:45 [api_http.py:183] raise e +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40320 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 232 kv move time out err, server is busy now. +ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 232 +ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:45 [api_http.py:183] await fut +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:45 [api_http.py:183] raise e +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40338 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 248 kv move time out err, server is busy now. +ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 248 +ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:45 [api_http.py:183] await fut +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:45 [api_http.py:183] +ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:45 [api_http.py:183] raise e +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40360 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 272 kv move time out err, server is busy now. +ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 272 +ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:46 [api_http.py:183] await fut +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:46 [api_http.py:183] raise e +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40390 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 288 kv move time out err, server is busy now. +ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 288 +ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:46 [api_http.py:183] await fut +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:46 [api_http.py:183] raise e +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40404 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 296 kv move time out err, server is busy now. +ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 296 +ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:46 [api_http.py:183] await fut +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:46 [api_http.py:183] raise e +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40410 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 312 kv move time out err, server is busy now. +ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 312 +ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:46 [api_http.py:183] await fut +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:46 [api_http.py:183] +ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:46 [api_http.py:183] raise e +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40424 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 328 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 328 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40446 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 336 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 336 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40448 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 344 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 344 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40464 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 352 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 352 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40476 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 360 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 360 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40486 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 368 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 368 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40496 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 376 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 376 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40512 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 384 kv move time out err, server is busy now. +ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 384 +ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:48 [api_http.py:183] await fut +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:48 [api_http.py:183] +ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:48 [api_http.py:183] raise e +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40528 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 392 kv move time out err, server is busy now. +ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 392 +ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:49 [api_http.py:183] await fut +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:49 [api_http.py:183] raise e +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40538 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 408 kv move time out err, server is busy now. +ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 408 +ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:49 [api_http.py:183] await fut +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:49 [api_http.py:183] raise e +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40550 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 416 kv move time out err, server is busy now. +ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 416 +ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:49 [api_http.py:183] await fut +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:49 [api_http.py:183] raise e +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40554 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 424 kv move time out err, server is busy now. +ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 424 +ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:49 [api_http.py:183] await fut +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:49 [api_http.py:183] raise e +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40568 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 432 kv move time out err, server is busy now. +ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 432 +ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:49 [api_http.py:183] await fut +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:49 [api_http.py:183] raise e +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40572 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 440 kv move time out err, server is busy now. +ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 440 +ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:49 [api_http.py:183] await fut +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:49 [api_http.py:183] raise e +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40586 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 448 kv move time out err, server is busy now. +ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 448 +ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:49 [api_http.py:183] await fut +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:49 [api_http.py:183] +ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:49 [api_http.py:183] raise e +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40598 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 464 kv move time out err, server is busy now. +ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 464 +ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:50 [api_http.py:183] await fut +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:50 [api_http.py:183] raise e +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40620 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 472 kv move time out err, server is busy now. +ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 472 +ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:50 [api_http.py:183] await fut +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:50 [api_http.py:183] raise e +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40632 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 480 kv move time out err, server is busy now. +ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 480 +ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:50 [api_http.py:183] await fut +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:50 [api_http.py:183] raise e +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40648 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 488 kv move time out err, server is busy now. +ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 488 +ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:50 [api_http.py:183] await fut +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:50 [api_http.py:183] raise e +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40664 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 496 kv move time out err, server is busy now. +ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 496 +ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:50 [api_http.py:183] await fut +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:50 [api_http.py:183] raise e +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40680 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 504 kv move time out err, server is busy now. +ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 504 +ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:50 [api_http.py:183] await fut +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:50 [api_http.py:183] +ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:50 [api_http.py:183] raise e +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40684 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 512 kv move time out err, server is busy now. +ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 512 +ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:51 [api_http.py:183] await fut +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:51 [api_http.py:183] raise e +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40694 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 520 kv move time out err, server is busy now. +ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 520 +ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:51 [api_http.py:183] await fut +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:51 [api_http.py:183] raise e +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40700 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 536 kv move time out err, server is busy now. +ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 536 +ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:51 [api_http.py:183] await fut +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:51 [api_http.py:183] raise e +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40726 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 544 kv move time out err, server is busy now. +ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 544 +ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:51 [api_http.py:183] await fut +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:51 [api_http.py:183] raise e +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40728 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 552 kv move time out err, server is busy now. +ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 552 +ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:51 [api_http.py:183] await fut +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:51 [api_http.py:183] raise e +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40740 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 560 kv move time out err, server is busy now. +ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 560 +ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:51 [api_http.py:183] await fut +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:51 [api_http.py:183] raise e +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40746 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 568 kv move time out err, server is busy now. +ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 568 +ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:51 [api_http.py:183] await fut +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:51 [api_http.py:183] +ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:51 [api_http.py:183] raise e +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40758 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 576 kv move time out err, server is busy now. +ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 576 +ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:52 [api_http.py:183] await fut +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:52 [api_http.py:183] raise e +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40762 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 584 kv move time out err, server is busy now. +ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 584 +ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:52 [api_http.py:183] await fut +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:52 [api_http.py:183] raise e +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40776 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 592 kv move time out err, server is busy now. +ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 592 +ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:52 [api_http.py:183] await fut +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:52 [api_http.py:183] raise e +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40790 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 600 kv move time out err, server is busy now. +ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 600 +ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:52 [api_http.py:183] await fut +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:52 [api_http.py:183] raise e +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40804 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 608 kv move time out err, server is busy now. +ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 608 +ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:52 [api_http.py:183] await fut +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:52 [api_http.py:183] raise e +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40816 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 616 kv move time out err, server is busy now. +ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 616 +ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:52 [api_http.py:183] await fut +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:52 [api_http.py:183] raise e +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40832 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 624 kv move time out err, server is busy now. +ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 624 +ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:52 [api_http.py:183] await fut +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:52 [api_http.py:183] +ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:52 [api_http.py:183] raise e +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40834 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 632 kv move time out err, server is busy now. +ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 632 +ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:53 [api_http.py:183] await fut +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:53 [api_http.py:183] raise e +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40846 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 640 kv move time out err, server is busy now. +ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 640 +ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:53 [api_http.py:183] await fut +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:53 [api_http.py:183] raise e +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40850 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 648 kv move time out err, server is busy now. +ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 648 +ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:53 [api_http.py:183] await fut +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:53 [api_http.py:183] raise e +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40858 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 656 kv move time out err, server is busy now. +ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 656 +ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:53 [api_http.py:183] await fut +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:53 [api_http.py:183] raise e +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40862 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 672 kv move time out err, server is busy now. +ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 672 +ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:53 [api_http.py:183] await fut +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:53 [api_http.py:183] raise e +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40878 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 680 kv move time out err, server is busy now. +ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 680 +ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:53 [api_http.py:183] await fut +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:53 [api_http.py:183] +ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:53 [api_http.py:183] raise e +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40894 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 688 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 688 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40900 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 696 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 696 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40910 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 704 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 704 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40924 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 712 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 712 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40932 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 720 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 720 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40938 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 728 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 728 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40952 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 736 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 736 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40964 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 744 kv move time out err, server is busy now. +ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 744 +ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:54 [api_http.py:183] await fut +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:54 [api_http.py:183] +ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:54 [api_http.py:183] raise e +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40968 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:55 [manager.py:236] group_request_id: 760 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 760 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40980 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 768 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 768 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40992 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 776 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 776 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:40998 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 784 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 784 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:41014 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 792 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 792 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:41030 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 800 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 800 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:41040 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 808 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 808 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:48728 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 816 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 816 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:48740 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 824 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 824 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:48752 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 840 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 840 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:48784 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 848 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 848 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:48796 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 856 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 856 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:48804 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 864 kv move time out err, server is busy now. +ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 864 +ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:56 [api_http.py:183] await fut +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:56 [api_http.py:183] +ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:56 [api_http.py:183] raise e +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:48820 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:57 [manager.py:236] group_request_id: 880 kv move time out err, server is busy now. +ERROR 06-24 20:00:57 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:57 [manager.py:349] aborted group_request_id 880 +ERROR 06-24 20:00:57 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:57 [api_http.py:183] await fut +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:57 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:57 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:57 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:57 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:57 [api_http.py:183] raise e +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:57 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:57 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:59482 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:57 [manager.py:236] group_request_id: 888 kv move time out err, server is busy now. +ERROR 06-24 20:00:57 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:57 [manager.py:349] aborted group_request_id 888 +ERROR 06-24 20:00:57 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:57 [api_http.py:183] await fut +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:57 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:57 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:57 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:57 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:57 [api_http.py:183] raise e +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:57 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:57 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:59494 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:00:57 [manager.py:236] group_request_id: 896 kv move time out err, server is busy now. +ERROR 06-24 20:00:57 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:57 [manager.py:349] aborted group_request_id 896 +ERROR 06-24 20:00:57 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:57 [api_http.py:183] await fut +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:57 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:57 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:57 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:57 [api_http.py:183] +ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:57 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:57 [api_http.py:183] raise e +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:57 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:57 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:59496 - "POST /generate HTTP/1.1" 503 +INFO 06-24 20:00:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:00:58 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms +WARNING 06-24 20:00:58 [manager.py:236] group_request_id: 904 kv move time out err, server is busy now. +ERROR 06-24 20:00:58 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:00:58 [manager.py:349] aborted group_request_id 904 +ERROR 06-24 20:00:58 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:00:58 [api_http.py:183] await fut +ERROR 06-24 20:00:58 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:58 [api_http.py:183] +ERROR 06-24 20:00:58 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:58 [api_http.py:183] +ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:00:58 [api_http.py:183] return fut.result() +ERROR 06-24 20:00:58 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:00:58 [api_http.py:183] +ERROR 06-24 20:00:58 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:00:58 [api_http.py:183] +ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:00:58 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:00:58 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:00:58 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:00:58 [api_http.py:183] +ERROR 06-24 20:00:58 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:00:58 [api_http.py:183] +ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:00:58 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:00:58 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:00:58 [api_http.py:183] raise e +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:00:58 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:00:58 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:00:58 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:00:58 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:59504 - "POST /generate HTTP/1.1" 503 +INFO 06-24 20:01:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:01:28 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms +WARNING 06-24 20:01:33 [manager.py:236] group_request_id: 912 kv move time out err, server is busy now. +ERROR 06-24 20:01:33 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:01:33 [manager.py:349] aborted group_request_id 912 +ERROR 06-24 20:01:33 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:01:33 [api_http.py:183] await fut +ERROR 06-24 20:01:33 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:01:33 [api_http.py:183] +ERROR 06-24 20:01:33 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:01:33 [api_http.py:183] +ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:01:33 [api_http.py:183] return fut.result() +ERROR 06-24 20:01:33 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:01:33 [api_http.py:183] +ERROR 06-24 20:01:33 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:01:33 [api_http.py:183] +ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:01:33 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:01:33 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:01:33 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:01:33 [api_http.py:183] +ERROR 06-24 20:01:33 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:01:33 [api_http.py:183] +ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:01:33 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:01:33 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:01:33 [api_http.py:183] raise e +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:01:33 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:01:33 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:01:33 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:01:33 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:39294 - "POST /generate HTTP/1.1" 503 +WARNING 06-24 20:01:41 [manager.py:236] group_request_id: 920 kv move time out err, server is busy now. +ERROR 06-24 20:01:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) +WARNING 06-24 20:01:41 [manager.py:349] aborted group_request_id 920 +ERROR 06-24 20:01:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) +ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait +ERROR 06-24 20:01:41 [api_http.py:183] await fut +ERROR 06-24 20:01:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:01:41 [api_http.py:183] +ERROR 06-24 20:01:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:01:41 [api_http.py:183] +ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for +ERROR 06-24 20:01:41 [api_http.py:183] return fut.result() +ERROR 06-24 20:01:41 [api_http.py:183] asyncio.exceptions.CancelledError +ERROR 06-24 20:01:41 [api_http.py:183] +ERROR 06-24 20:01:41 [api_http.py:183] The above exception was the direct cause of the following exception: +ERROR 06-24 20:01:41 [api_http.py:183] +ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream +ERROR 06-24 20:01:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for +ERROR 06-24 20:01:41 [api_http.py:183] raise exceptions.TimeoutError() from exc +ERROR 06-24 20:01:41 [api_http.py:183] asyncio.exceptions.TimeoutError +ERROR 06-24 20:01:41 [api_http.py:183] +ERROR 06-24 20:01:41 [api_http.py:183] During handling of the above exception, another exception occurred: +ERROR 06-24 20:01:41 [api_http.py:183] +ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate +ERROR 06-24 20:01:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate +ERROR 06-24 20:01:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate +ERROR 06-24 20:01:41 [api_http.py:183] raise e +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate +ERROR 06-24 20:01:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package +ERROR 06-24 20:01:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( +ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream +ERROR 06-24 20:01:41 [api_http.py:183] raise ServerBusyError() +ERROR 06-24 20:01:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) +127.0.0.1:39304 - "POST /generate HTTP/1.1" 503 +INFO 06-24 20:01:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:01:58 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 189, pd_chunk_size: 4096 +INFO 06-24 20:02:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:02:28 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 99, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 98, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 97, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 96, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 95, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 94, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 93, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 92, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 91, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 90, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 89, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 88, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 87, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 86, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 85, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 84, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 83, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 82, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 81, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 80, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 79, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 78, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 77, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 76, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 75, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 454, pd_chunk_size: 4096 +INFO 06-24 20:02:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:02:58 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 74, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 73, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 72, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 71, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 70, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 69, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 68, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 67, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 66, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 65, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 64, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 63, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 62, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 61, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 60, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 59, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 58, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 57, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 56, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 55, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 54, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 53, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 52, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 51, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 50, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 49, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 48, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 47, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 46, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 45, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 44, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 43, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 42, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 41, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 40, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 39, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 38, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 37, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 36, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 35, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 34, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 33, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 32, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 31, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 30, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 29, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 28, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 27, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 26, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 25, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 24, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 23, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 22, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 21, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 20, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 19, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 18, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 17, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 16, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 15, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 13, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 12, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 11, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10272, pd_chunk_size: 4096 +INFO 06-24 20:03:14 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 first_token_cost:3266.073703765869ms total_cost_time:214615.10252952576ms,out_token_counter:4288 mean_per_token_cost_time: 49.28848620003729ms prompt_token_num:5345 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40218 - "POST /generate HTTP/1.1" 200 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 301, pd_chunk_size: 4096 +INFO 06-24 20:03:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:03:28 [statics_utils.py:24] mean per token cost: 145.6138080977816 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 136, pd_chunk_size: 4096 +INFO 06-24 20:03:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:03:58 [statics_utils.py:24] mean per token cost: 145.6138080977816 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 99, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 98, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 97, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 96, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 95, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 94, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 93, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 92, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 91, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 90, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 89, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 88, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 87, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 86, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 85, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 84, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 83, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 82, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 81, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 80, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 79, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 78, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 77, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 76, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 75, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 74, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 73, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 72, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 71, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 70, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 69, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 68, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 67, pd_chunk_size: 4096 +INFO 06-24 20:04:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:04:28 [statics_utils.py:24] mean per token cost: 145.6138080977816 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 66, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 65, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 64, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 63, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 62, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 61, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 60, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 59, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 58, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 57, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 56, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 55, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 54, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 53, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 52, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 51, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 50, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 49, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 48, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 47, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 46, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 45, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 44, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 43, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 42, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 41, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 40, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 39, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 38, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 37, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 36, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 35, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 34, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 33, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 32, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 31, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 30, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 29, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 28, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 27, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 26, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 25, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 24, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 23, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 22, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 21, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 20, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 19, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 18, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 17, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 16, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 15, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 13, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 12, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 11, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 20:04:55 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 first_token_cost:8981.547832489014ms total_cost_time:315349.3595123291ms,out_token_counter:4631 mean_per_token_cost_time: 66.15586518675018ms prompt_token_num:5679 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40544 - "POST /generate HTTP/1.1" 200 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9886, pd_chunk_size: 4096 +INFO 06-24 20:04:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:04:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9621, pd_chunk_size: 4096 +INFO 06-24 20:05:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:05:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9486, pd_chunk_size: 4096 +INFO 06-24 20:06:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:06:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9216, pd_chunk_size: 4096 +INFO 06-24 20:07:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:07:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9071, pd_chunk_size: 4096 +INFO 06-24 20:07:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:07:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8925, pd_chunk_size: 4096 +INFO 06-24 20:08:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:08:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8793, pd_chunk_size: 4096 +INFO 06-24 20:08:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:08:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8660, pd_chunk_size: 4096 +INFO 06-24 20:09:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:09:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8526, pd_chunk_size: 4096 +INFO 06-24 20:09:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:09:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8394, pd_chunk_size: 4096 +INFO 06-24 20:10:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:10:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8131, pd_chunk_size: 4096 +INFO 06-24 20:11:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:11:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7872, pd_chunk_size: 4096 +INFO 06-24 20:12:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:12:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7740, pd_chunk_size: 4096 +INFO 06-24 20:12:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:12:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7607, pd_chunk_size: 4096 +INFO 06-24 20:13:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:13:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7346, pd_chunk_size: 4096 +INFO 06-24 20:14:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:14:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7220, pd_chunk_size: 4096 +INFO 06-24 20:14:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:14:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7090, pd_chunk_size: 4096 +INFO 06-24 20:15:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:15:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6959, pd_chunk_size: 4096 +INFO 06-24 20:15:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:15:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6702, pd_chunk_size: 4096 +INFO 06-24 20:16:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:16:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6572, pd_chunk_size: 4096 +INFO 06-24 20:17:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:17:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6314, pd_chunk_size: 4096 +INFO 06-24 20:18:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:18:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6187, pd_chunk_size: 4096 +INFO 06-24 20:18:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:18:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5931, pd_chunk_size: 4096 +INFO 06-24 20:19:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:19:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5805, pd_chunk_size: 4096 +INFO 06-24 20:20:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:20:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5676, pd_chunk_size: 4096 +INFO 06-24 20:20:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:20:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5548, pd_chunk_size: 4096 +INFO 06-24 20:21:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:21:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5419, pd_chunk_size: 4096 +INFO 06-24 20:21:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:21:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5162, pd_chunk_size: 4096 +INFO 06-24 20:22:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:22:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5033, pd_chunk_size: 4096 +INFO 06-24 20:23:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:23:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4905, pd_chunk_size: 4096 +INFO 06-24 20:23:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:23:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4777, pd_chunk_size: 4096 +INFO 06-24 20:24:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:24:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4651, pd_chunk_size: 4096 +INFO 06-24 20:24:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:24:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4398, pd_chunk_size: 4096 +INFO 06-24 20:25:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:25:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4148, pd_chunk_size: 4096 +INFO 06-24 20:26:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:26:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4022, pd_chunk_size: 4096 +INFO 06-24 20:27:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:27:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3896, pd_chunk_size: 4096 +INFO 06-24 20:27:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:27:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3771, pd_chunk_size: 4096 +INFO 06-24 20:28:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:28:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3642, pd_chunk_size: 4096 +INFO 06-24 20:28:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:28:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3522, pd_chunk_size: 4096 +INFO 06-24 20:29:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:29:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3282, pd_chunk_size: 4096 +INFO 06-24 20:30:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:30:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3037, pd_chunk_size: 4096 +INFO 06-24 20:31:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:31:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2915, pd_chunk_size: 4096 +INFO 06-24 20:31:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:31:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2794, pd_chunk_size: 4096 +INFO 06-24 20:32:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:32:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2673, pd_chunk_size: 4096 +INFO 06-24 20:32:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:32:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2430, pd_chunk_size: 4096 +INFO 06-24 20:33:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:33:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2310, pd_chunk_size: 4096 +INFO 06-24 20:34:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:34:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2190, pd_chunk_size: 4096 +INFO 06-24 20:34:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:34:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2069, pd_chunk_size: 4096 +INFO 06-24 20:35:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:35:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1988, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1828, pd_chunk_size: 4096 +INFO 06-24 20:36:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:36:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1751, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1632, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1587, pd_chunk_size: 4096 +INFO 06-24 20:37:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:37:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1511, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1467, pd_chunk_size: 4096 +INFO 06-24 20:37:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:37:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1393, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1345, pd_chunk_size: 4096 +INFO 06-24 20:38:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:38:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1274, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1227, pd_chunk_size: 4096 +INFO 06-24 20:38:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:38:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1160, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1108, pd_chunk_size: 4096 +INFO 06-24 20:39:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:39:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1099, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1098, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1097, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1096, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1095, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1094, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1093, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1092, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1091, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1090, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1089, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1088, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1087, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1086, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1085, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1084, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1083, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1082, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1081, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1080, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1079, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1078, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1077, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1076, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1075, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1074, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1073, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1072, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1071, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1070, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1069, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1068, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1067, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1066, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1065, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1064, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1063, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1062, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1061, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1060, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1059, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1058, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1057, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1056, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1055, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1054, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1053, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1052, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1051, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1050, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1049, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1048, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1047, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1046, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1045, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1044, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1043, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1042, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1041, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1040, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1039, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1038, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1037, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1036, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1035, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1034, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1033, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1032, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1031, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1030, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1029, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1028, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1027, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1026, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1025, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1024, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1023, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1022, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1021, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1020, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1019, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1018, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1017, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1016, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1015, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1014, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1013, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1012, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1011, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1010, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1009, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1008, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1007, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1006, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1005, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1004, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1003, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1002, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1001, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1000, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 999, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 998, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 997, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 996, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 995, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 994, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 993, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 992, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 991, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 990, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 989, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 988, pd_chunk_size: 4096 +INFO 06-24 20:39:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:39:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 987, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 986, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 985, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 984, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 983, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 982, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 981, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 980, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 979, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 978, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 977, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 976, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 975, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 974, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 973, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 972, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 971, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 970, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 969, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 968, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 967, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 966, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 965, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 964, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 963, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 962, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 961, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 960, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 959, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 958, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 957, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 956, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 955, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 954, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 953, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 952, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 951, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 950, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 949, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 948, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 947, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 946, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 945, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 944, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 943, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 942, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 941, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 940, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 939, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 938, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 937, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 936, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 935, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 934, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 933, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 932, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 931, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 930, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 929, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 928, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 927, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 926, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 925, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 924, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 923, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 922, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 921, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 920, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 919, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 918, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 917, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 916, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 915, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 914, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 913, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 912, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 911, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 910, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 909, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 908, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 907, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 906, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 905, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 904, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 903, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 902, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 901, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 900, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 899, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 898, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 897, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 896, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 895, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 894, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 893, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 892, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 891, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 890, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 889, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 888, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 887, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 886, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 885, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 884, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 883, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 882, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 881, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 880, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 879, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 878, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 877, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 876, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 875, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 874, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 873, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 872, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 871, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 870, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 869, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 868, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 867, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 866, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 865, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 864, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 863, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 862, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 861, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 860, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 859, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 858, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 857, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 856, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 855, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 854, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 853, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 852, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 851, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 850, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 849, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 848, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 847, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 846, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 845, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 844, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 843, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 842, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 841, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 840, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 839, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 838, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 837, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 836, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 835, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 834, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 833, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 832, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 831, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 830, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 829, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 828, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 827, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 826, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 825, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 824, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 823, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 822, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 821, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 820, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 819, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 818, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 817, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 816, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 815, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 814, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 813, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 812, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 811, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 810, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 809, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 808, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 807, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 806, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 805, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 804, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 803, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 802, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 801, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 800, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 799, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 798, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 797, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 796, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 795, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 794, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 793, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 792, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 791, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 790, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 789, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 788, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 787, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 786, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 785, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 784, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 783, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 782, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 781, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 780, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 779, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 778, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 777, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 776, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 775, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 774, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 773, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 772, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 771, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 770, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 769, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 768, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 767, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 766, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 765, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 764, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 763, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 762, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 761, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 760, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 759, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 758, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 757, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 756, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 755, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 754, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 753, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 752, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 751, pd_chunk_size: 4096 +INFO 06-24 20:40:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:40:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 750, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 749, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 748, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 747, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 746, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 745, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 744, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 743, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 742, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 741, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 740, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 739, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 738, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 737, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 736, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 735, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 734, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 733, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 732, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 731, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 730, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 729, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 728, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 727, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 726, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 725, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 724, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 723, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 722, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 721, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 720, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 719, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 718, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 717, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 716, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 715, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 714, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 713, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 712, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 711, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 710, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 709, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 708, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 707, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 706, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 705, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 704, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 703, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 702, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 701, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 700, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 699, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 698, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 697, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 696, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 695, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 694, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 693, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 692, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 691, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 690, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 689, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 688, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 687, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 686, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 685, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 684, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 683, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 682, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 681, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 680, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 679, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 678, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 677, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 676, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 675, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 674, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 673, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 672, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 671, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 670, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 669, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 668, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 667, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 666, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 665, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 664, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 663, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 662, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 661, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 660, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 659, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 658, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 657, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 656, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 655, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 654, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 653, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 652, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 651, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 650, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 649, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 648, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 647, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 646, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 645, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 644, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 643, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 642, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 641, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 640, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 639, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 638, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 637, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 636, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 635, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 634, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 633, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 632, pd_chunk_size: 4096 +INFO 06-24 20:41:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:41:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 631, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 630, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 629, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 628, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 627, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 626, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 625, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 624, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 623, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 622, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 621, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 620, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 619, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 618, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 617, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 616, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 615, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 614, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 613, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 612, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 611, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 610, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 609, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 608, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 607, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 606, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 605, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 604, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 603, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 602, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 601, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 600, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 599, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 598, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 597, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 596, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 595, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 594, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 593, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 592, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 591, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 590, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 589, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 588, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 587, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 586, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 585, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 584, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 583, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 582, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 581, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 580, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 579, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 578, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 577, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 576, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 575, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 574, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 573, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 572, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 571, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 570, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 569, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 568, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 567, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 566, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 565, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 564, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 563, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 562, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 561, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 560, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 559, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 558, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 557, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 556, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 555, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 554, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 553, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 552, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 551, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 550, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 549, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 548, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 547, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 546, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 545, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 544, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 543, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 542, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 541, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 540, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 539, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 538, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 537, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 536, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 535, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 534, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 533, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 532, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 531, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 530, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 529, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 528, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 527, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 526, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 525, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 524, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 523, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 522, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 521, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 520, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 519, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 518, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 517, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 516, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 515, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 514, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 513, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 512, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 511, pd_chunk_size: 4096 +INFO 06-24 20:41:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:41:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 510, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 509, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 508, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 507, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 506, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 505, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 504, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 503, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 502, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 501, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 500, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 499, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 498, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 497, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 496, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 495, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 494, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 493, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 492, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 491, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 490, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 489, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 488, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 487, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 486, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 485, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 484, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 483, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 482, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 481, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 480, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 479, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 478, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 477, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 476, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 475, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 474, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 473, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 472, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 471, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 470, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 469, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 468, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 467, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 466, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 465, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 464, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 463, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 462, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 461, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 460, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 459, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 458, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 457, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 456, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 455, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 454, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 453, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 452, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 451, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 450, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 449, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 448, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 447, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 446, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 445, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 444, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 443, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 442, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 441, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 440, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 439, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 438, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 437, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 436, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 435, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 434, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 433, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 432, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 431, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 430, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 429, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 428, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 427, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 426, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 425, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 424, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 423, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 422, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 421, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 420, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 419, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 418, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 417, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 416, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 415, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 414, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 413, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 412, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 411, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 410, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 409, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 408, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 407, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 406, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 405, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 404, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 403, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 402, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 401, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 400, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 399, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 398, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 397, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 396, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 395, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 394, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 393, pd_chunk_size: 4096 +INFO 06-24 20:42:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:42:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 392, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 391, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 390, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 389, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 388, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 387, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 386, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 385, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 384, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 383, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 382, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 381, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 380, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 379, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 378, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 377, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 376, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 375, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 374, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 373, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 372, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 371, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 370, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 369, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 368, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 367, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 366, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 365, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 364, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 363, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 362, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 361, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 360, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 359, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 358, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 357, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 356, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 355, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 354, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 353, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 352, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 351, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 350, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 349, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 348, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 347, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 346, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 345, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 344, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 343, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 342, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 341, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 340, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 339, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 338, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 337, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 336, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 335, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 334, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 333, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 332, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 331, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 330, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 329, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 328, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 327, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 326, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 325, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 324, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 323, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 322, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 321, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 320, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 319, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 318, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 317, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 316, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 315, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 314, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 313, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 312, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 311, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 310, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 309, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 308, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 307, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 306, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 305, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 304, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 303, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 302, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 301, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 300, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 299, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 298, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 297, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 296, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 295, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 294, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 293, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 292, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 291, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 290, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 289, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 288, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 287, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 286, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 285, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 284, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 283, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 282, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 281, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 280, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 279, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 278, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 277, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 276, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 275, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 274, pd_chunk_size: 4096 +INFO 06-24 20:42:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:42:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 273, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 272, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 271, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 270, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 269, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 268, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 267, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 266, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 265, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 264, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 263, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 262, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 261, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 260, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 259, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 258, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 257, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 256, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 255, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 254, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 253, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 252, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 251, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 250, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 249, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 248, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 247, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 246, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 245, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 244, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 243, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 242, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 241, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 240, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 239, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 238, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 237, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 236, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 235, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 234, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 233, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 232, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 231, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 230, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 229, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 228, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 227, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 226, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 225, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 224, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 223, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 222, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 221, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 220, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 219, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 218, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 217, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 216, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 215, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 214, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 213, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 212, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 211, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 210, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 209, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 208, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 207, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 206, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 205, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 204, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 203, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 202, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 201, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 200, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 199, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 198, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 197, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 196, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 195, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 194, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 193, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 192, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 191, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 190, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 189, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 188, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 187, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 186, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 185, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 184, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 183, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 182, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 181, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 180, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 179, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 178, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 177, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 176, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 175, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 174, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 173, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 172, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 171, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 170, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 169, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 168, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 167, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 166, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 165, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 164, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 163, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 162, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 161, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 160, pd_chunk_size: 4096 +INFO 06-24 20:43:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:43:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 159, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 158, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 157, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 156, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 155, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 154, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 153, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 152, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 151, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 150, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 149, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 148, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 147, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 146, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 145, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 144, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 143, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 142, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 141, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 140, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 139, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 138, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 137, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 136, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 135, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 134, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 133, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 132, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 131, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 130, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 129, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 128, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 127, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 126, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 125, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 124, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 123, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 122, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 121, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 120, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 119, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 118, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 117, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 116, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 115, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 114, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 113, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 112, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 111, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 110, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 109, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 108, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 107, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 106, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 105, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 104, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 103, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 102, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 101, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 100, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 99, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 98, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 97, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 96, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 95, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 94, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 93, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 92, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 91, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 90, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 89, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 88, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 87, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 86, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 85, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 84, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 83, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 82, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 81, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 80, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 79, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 78, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 77, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 76, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 75, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 74, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 73, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 72, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 71, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 70, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 69, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 68, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 67, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 66, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 65, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 64, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 63, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 62, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 61, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 60, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 59, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 58, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 57, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 56, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 55, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 54, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 53, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 52, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 51, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 50, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 49, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 48, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 47, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 46, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 45, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 44, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 43, pd_chunk_size: 4096 +INFO 06-24 20:43:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:43:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 42, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 41, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 40, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 39, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 38, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 37, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 36, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 35, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 34, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 33, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 32, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 31, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 30, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 29, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 28, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 27, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 26, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 25, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 24, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 23, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 22, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 21, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 20, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 19, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 18, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 17, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 16, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 15, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 13, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 12, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 11, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2, pd_chunk_size: 4096 +[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 +INFO 06-24 20:44:08 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 first_token_cost:825.5589008331299ms total_cost_time:2668792.4263477325ms,out_token_counter:14561 mean_per_token_cost_time: 183.22689838932075ms prompt_token_num:15615 prompt_cache_len:0 prompt_cache_ratio:0.0 +127.0.0.1:40092 - "POST /generate HTTP/1.1" 200 +INFO 06-24 20:44:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:44:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:44:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:44:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:45:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:45:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:45:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:45:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:46:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:46:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:46:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:46:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:47:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:47:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:47:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:47:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:48:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:48:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:49:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:49:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:49:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:49:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:50:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:50:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:51:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:51:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:51:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:51:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:52:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:52:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:52:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:52:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:53:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:53:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:54:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:54:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:54:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:54:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:55:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:55:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:56:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:56:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:56:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:56:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:57:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:57:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:57:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:57:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:58:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:58:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:58:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:58:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:59:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:59:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 20:59:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 20:59:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:00:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:00:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:01:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:01:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:01:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:01:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:02:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:02:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:02:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:02:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:03:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:03:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:03:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:03:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:04:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:04:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:04:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:04:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:05:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:05:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:05:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:05:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:06:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:06:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:06:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:06:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:07:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:07:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:07:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:07:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:08:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:08:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:08:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:08:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:09:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:09:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:09:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:09:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:10:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:10:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:10:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:10:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:11:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:11:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:11:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:11:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:12:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:12:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:12:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:12:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:13:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:13:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:13:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:13:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:14:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:14:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:14:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:14:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:15:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:15:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:16:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:16:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:17:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:17:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:18:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:18:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:19:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:19:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:19:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:19:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:20:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:20:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:21:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:21:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:21:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:21:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:22:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:22:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:23:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:23:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:24:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:24:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:24:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:24:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:25:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:25:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:26:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:26:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:26:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:26:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:27:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:27:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:28:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:28:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:28:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:28:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:29:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:29:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:30:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:30:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:31:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:31:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:32:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:32:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:32:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:32:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:33:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:33:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:34:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:34:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:34:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:34:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:35:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:35:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:35:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:35:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:36:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:36:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:36:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:36:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:37:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:37:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:37:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:37:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:38:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:38:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:39:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:39:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:40:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:40:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:40:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:40:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:41:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:41:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:41:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:41:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:42:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:42:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:43:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:43:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:44:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:44:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:45:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:45:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:45:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:45:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:46:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:46:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:47:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:47:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:48:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:48:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:48:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:48:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:49:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:49:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:50:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:50:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:50:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:50:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:51:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:51:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:52:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:52:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:52:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:52:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:53:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:53:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:53:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms +INFO 06-24 21:53:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms +INFO 06-24 21:54:19 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... + +INFO 06-24 21:54:19 [start_utils.py:106] Killing child process 1211478 +INFO 06-24 21:54:19 [start_utils.py:106] Killing child process 1211483 +INFO 06-24 21:54:19 [start_utils.py:108] Killing parent process 1211009 +INFO 06-24 21:54:19 [start_utils.py:53] Killing parent process 1210907 +INFO 06-24 21:54:19 [start_utils.py:69] All processes terminated gracefully. +INFO 06-24 21:54:19 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_p.log b/pd_p.log new file mode 100644 index 000000000..da5287651 --- /dev/null +++ b/pd_p.log @@ -0,0 +1,1512 @@ +INFO 06-24 21:55:24 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:55:24 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:55:25 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:55:27 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:55:27 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:55:27 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:55:27 [api_start.py:79] zmq mode head: ipc:///tmp/_2732_0_ +INFO 06-24 21:55:27 [api_start.py:81] use tgi api: False +INFO 06-24 21:55:27 [api_start.py:192] alloced ports: [10216, 10253, 10154, 10095, 10138, 10064, 10217, 10074, 10133] +INFO 06-24 21:55:27 [api_start.py:233] all start args:Namespace(run_mode='prefill', host='127.0.1.1', port=8017, httpserver_workers=1, zmq_mode='ipc:///tmp/_2732_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=128, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16000, nccl_host='127.0.0.1', nccl_port=2732, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=True, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=16000, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10216, detokenization_port=10253, detokenization_pub_port=10154, visual_port=10095, audio_port=10138, cache_port=10064, metric_port=10217, pd_node_infer_rpyc_ports=[10133], pd_node_id=287595743282619216970276961428881885738, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) +INFO 06-24 21:55:29 [start_utils.py:37] init func start_metric_manager : init ok +INFO 06-24 21:55:31 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:55:32 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:55:32 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:55:33 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:55:33 [__init__.py:239] Automatically detected platform cuda. +INFO 06-24 21:55:34 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:55:35 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:55:35 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:55:35 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:55:35 [shm_req_manager.py:59] create lock shm 2732_0_req_shm_total +INFO 06-24 21:55:35 [atomic_array_lock.py:29] create lock shm 2732_0_array_reqs_lock +INFO 06-24 21:55:35 [atomic_lock.py:26] create lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 21:55:35 [shared_arr.py:17] create shm 2732_0_mem_manger_can_use_token_num_0 +INFO 06-24 21:55:35 [shared_arr.py:17] create shm 2732_0_shared_token_load +INFO 06-24 21:55:35 [shared_arr.py:17] create shm 2732_0_shared_token_load_ext_infos +INFO 06-24 21:55:36 [model_rpc.py:70] Initialized RPC server for rank 0. +INFO 06-24 21:55:36 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total +INFO 06-24 21:55:36 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock +INFO 06-24 21:55:36 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 21:55:36 [model_rpc.py:184] use ChunckedPrefillForPrefillNode +WARNING 06-24 21:55:36 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:55:36 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:55:36 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:55:36 [manager.py:41] pub_to_httpserver sendhwm 1000 +INFO 06-24 21:55:36 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total +INFO 06-24 21:55:36 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock +INFO 06-24 21:55:36 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 21:55:38 [shared_arr.py:20] link shm 2732_0_shared_token_load +INFO 06-24 21:55:38 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos +INFO 06-24 21:55:38 [shared_arr.py:17] create shm 2732_0_dp_rank_0_lock_tp_infos +INFO 06-24 21:55:38 [basemodel.py:134] Initial quantization. The default quantization method is none +INFO 06-24 21:55:38 [mem_utils.py:11] mode setting params: [] +INFO 06-24 21:55:38 [mem_utils.py:25] Model kv cache using mode normal +INFO 06-24 21:55:38 [shared_arr.py:20] link shm 2732_0_mem_manger_can_use_token_num_0 +INFO 06-24 21:56:04 [basemodel.py:652] begin check max_len infer +INFO 06-24 21:56:05 [basemodel.py:680] check max_len 8448 infer ok +INFO 06-24 21:56:05 [shared_arr.py:17] create shm 2732_0_refed_tokens_num_0 +INFO 06-24 21:56:05 [shared_arr.py:17] create shm 2732_0_tree_total_tokens_num_0 +INFO 06-24 21:56:05 [base_backend.py:135] loaded model class +INFO 06-24 21:56:05 [prefill_impl.py:36] lock_nccl_group ranks 0 +INFO 06-24 21:56:05 [shared_arr.py:20] link shm 2732_0_refed_tokens_num_0 +INFO 06-24 21:56:05 [shared_arr.py:20] link shm 2732_0_tree_total_tokens_num_0 +INFO 06-24 21:56:05 [manager.py:196] use req queue QueueForPDChunkedPrefill +INFO 06-24 21:56:08 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:09 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:10 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:56:13 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:13 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:13 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:13 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 21:56:13 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 21:56:13 [prefill_kv_move_manager.py:55] rpyc connect to infer rpyc port: 10133 ok +INFO 06-24 21:56:13 [net_utils.py:51] get hostname ip 127.0.1.1 +INFO 06-24 21:56:13 [prefill_trans_process.py:154] prefill trans kv process for device: 0 started! +INFO 06-24 21:56:15 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:16 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:18 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:56:20 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:20 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:20 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:20 [prefill_infer_rpyc.py:51] put mem manager to mem_queue ok +INFO 06-24 21:56:20 [prefill_kv_move_manager.py:246] prefill kv move manager process started +INFO 06-24 21:56:20 [start_utils.py:37] init func start_router_process : init ok +INFO 06-24 21:56:20 [start_utils.py:37] init func start_detokenization_process : init ok +INFO 06-24 21:56:20 [api_start.py:57] start process pid 1411123 +INFO 06-24 21:56:20 [api_start.py:58] http server pid 1414655 +INFO 06-24 21:56:24 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 21:56:24 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 21:56:25 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 21:56:27 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 21:56:27 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 21:56:27 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 21:56:27 [api_http.py:326] server start up +INFO 06-24 21:56:27 [atomic_array_lock.py:32] link lock shm 2732_0_lightllm_resource_lock +INFO 06-24 21:56:27 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total +INFO 06-24 21:56:27 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock +INFO 06-24 21:56:27 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 21:56:28 [atomic_lock.py:29] link lock shm 2732_0_req_id_gen_lock +INFO 06-24 21:56:28 [shared_arr.py:20] link shm 2732_0_latest_success_infer_time_mark +INFO 06-24 21:56:28 [shared_arr.py:20] link shm 2732_0_shared_token_load +INFO 06-24 21:56:28 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos +INFO 06-24 21:56:28 [api_http.py:330] server start up ok, loop use is +INFO 06-24 21:56:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:56:28 [pd_loop.py:92] Sent registration JSON: {'node_id': 287595743282619216970276961428881885738, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10216, 'detokenization_port': 10253, 'detokenization_pub_port': 10154, 'visual_port': 10095, 'audio_port': 10138, 'cache_port': 10064, 'metric_port': 10217, 'pd_node_infer_rpyc_ports': [10133], 'pd_node_id': 287595743282619216970276961428881885738, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 21:56:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:56:58 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 21:56:58 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:8 +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:16 +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:24 +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:32 +INFO 06-24 21:57:15 [manager.py:224] router recive req id 8 cost time 0.07434654235839844 s +INFO 06-24 21:57:15 [manager.py:224] router recive req id 16 cost time 0.025234222412109375 s +INFO 06-24 21:57:15 [manager.py:224] router recive req id 24 cost time 0.014957904815673828 s +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:40 +INFO 06-24 21:57:15 [manager.py:224] router recive req id 32 cost time 0.009561538696289062 s +INFO 06-24 21:57:15 [manager.py:224] router recive req id 40 cost time 0.010047435760498047 s +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:48 +INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 8 cost time 0.10156798362731934 s +INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 16 cost time 0.052925825119018555 s +INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 24 cost time 0.043247222900390625 s +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:56 +INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 32 cost time 0.03420853614807129 s +INFO 06-24 21:57:15 [manager.py:224] router recive req id 48 cost time 0.0063211917877197266 s +INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 40 cost time 0.025928497314453125 s +INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 48 cost time 0.00784921646118164 s +INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:64 +DEBUG 06-24 21:57:15 [manager.py:391] Prefill Batch: batch_id=183192395226738958363005591887549515689, time:1750773435.9941852s req_ids:[8, 16, 24, 32, 40, 48] +DEBUG 06-24 21:57:15 [manager.py:391] +DEBUG 06-24 21:57:15 [stats.py:37] Avg tokens(prompt+generate) throughput: 63.452 tokens/s +DEBUG 06-24 21:57:15 [stats.py:37] Avg prompt tokens throughput: 63.452 tokens/s +DEBUG 06-24 21:57:15 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:72 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:80 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:88 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:96 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:104 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:112 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:120 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:128 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:136 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:144 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:152 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:160 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:168 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:176 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:184 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:192 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:200 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:208 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:216 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:224 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:232 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:240 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:248 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:256 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:264 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:272 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:280 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:288 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:296 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:304 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:312 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:320 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:328 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:336 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:344 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:352 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:360 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:368 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:376 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:384 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:392 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:400 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:408 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:416 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:424 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:432 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:440 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:448 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:456 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:464 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:472 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:480 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:488 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:496 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:504 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:512 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:520 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:528 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:536 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:544 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:552 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:560 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:568 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:576 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:584 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:592 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:600 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:608 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:616 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:624 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:632 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:640 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:648 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:656 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:664 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:672 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:680 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:688 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:696 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:704 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:712 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:720 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:728 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:736 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:744 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:752 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:760 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:768 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:776 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:784 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:792 +INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:800 +INFO 06-24 21:57:17 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:17 [manager.py:162] detoken release req id 8 +INFO 06-24 21:57:17 [manager.py:162] detoken release req id 16 +INFO 06-24 21:57:17 [manager.py:162] detoken release req id 24 +INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:8 first_token_cost:2066.8702125549316ms total_cost_time:2066.9257640838623ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:17 [manager.py:162] detoken release req id 32 +INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:16 first_token_cost:2017.4884796142578ms total_cost_time:2017.5154209136963ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:24 first_token_cost:2007.4079036712646ms total_cost_time:2007.4307918548584ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:17 [manager.py:162] detoken release req id 40 +INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:32 first_token_cost:1997.9596138000488ms total_cost_time:1997.981071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:17 [manager.py:162] detoken release req id 48 +INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:40 first_token_cost:1989.2513751983643ms total_cost_time:1989.2728328704834ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:48 first_token_cost:1971.0087776184082ms total_cost_time:1971.02952003479ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:18 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 21:57:18 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:18 [infer_batch.py:156] radix refed token num 6345 +DEBUG 06-24 21:57:18 [infer_batch.py:156] radix hold token num 6345 +DEBUG 06-24 21:57:18 [infer_batch.py:156] mem manager can alloc token num 10047 +DEBUG 06-24 21:57:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:18 [batch.py:51] router release req id 8 +INFO 06-24 21:57:18 [batch.py:51] router release req id 16 +INFO 06-24 21:57:18 [batch.py:51] router release req id 24 +INFO 06-24 21:57:18 [batch.py:51] router release req id 32 +INFO 06-24 21:57:18 [batch.py:51] router release req id 40 +INFO 06-24 21:57:18 [batch.py:51] router release req id 48 +INFO 06-24 21:57:18 [manager.py:224] router recive req id 56 cost time 2.0529732704162598 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 64 cost time 2.0450782775878906 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 56 cost time 2.054810047149658 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 72 cost time 2.036593198776245 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 80 cost time 2.0283164978027344 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 88 cost time 1.9868121147155762 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 64 cost time 2.047435998916626 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 96 cost time 1.981055736541748 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 104 cost time 1.9764375686645508 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 112 cost time 1.9717299938201904 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 72 cost time 2.039720058441162 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 120 cost time 1.9665844440460205 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 128 cost time 1.9616596698760986 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 80 cost time 2.032367706298828 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 136 cost time 1.953277349472046 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 144 cost time 1.9455409049987793 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 152 cost time 1.9381029605865479 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 88 cost time 1.9917616844177246 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 160 cost time 1.9307143688201904 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 168 cost time 1.9222908020019531 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 96 cost time 1.9869613647460938 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 176 cost time 1.9146487712860107 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 184 cost time 1.9070994853973389 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 192 cost time 1.8989145755767822 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 104 cost time 1.983229160308838 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 200 cost time 1.8905799388885498 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 112 cost time 1.9795911312103271 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 208 cost time 1.883430004119873 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 216 cost time 1.8658819198608398 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 224 cost time 1.8562607765197754 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 232 cost time 1.8481740951538086 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 120 cost time 1.976130723953247 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 240 cost time 1.8409795761108398 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 248 cost time 1.8338754177093506 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 256 cost time 1.8289406299591064 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 264 cost time 1.8243746757507324 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 272 cost time 1.8194279670715332 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 128 cost time 1.9731316566467285 s +INFO 06-24 21:57:18 [manager.py:224] router recive req id 280 cost time 1.814791202545166 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 136 cost time 1.9656925201416016 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 144 cost time 1.9588980674743652 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 152 cost time 1.9523286819458008 s +DEBUG 06-24 21:57:18 [manager.py:391] Prefill Batch: batch_id=183435466452217083455255300120862165865, time:1750773438.0651114s req_ids:[56, 64, 72, 80, 88, 96, 104] +DEBUG 06-24 21:57:18 [manager.py:391] +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 160 cost time 1.978437900543213 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 168 cost time 1.9713377952575684 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 176 cost time 1.964714765548706 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 184 cost time 1.958141565322876 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 192 cost time 1.951369047164917 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 200 cost time 1.943962574005127 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 208 cost time 1.9369051456451416 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 216 cost time 1.9202401638031006 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 224 cost time 1.9116225242614746 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 232 cost time 1.904437780380249 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 240 cost time 1.8981175422668457 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 248 cost time 1.8918724060058594 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 256 cost time 1.8877604007720947 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 264 cost time 1.8840248584747314 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 272 cost time 1.8799102306365967 s +INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 280 cost time 1.8761711120605469 s +INFO 06-24 21:57:18 [rpyc_fix_utils.py:36] change socket buffer from 2626560 131072 change to 4194304 +INFO 06-24 21:57:18 [prefill_trans_process.py:61] connect start PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=-1, prefill_id=287595743282619216970276961428881885738, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') +INFO 06-24 21:57:18 [prefill_trans_process.py:64] connect src_id 287595743282619216970276961428881885738 dest_id 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:18 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 +INFO 06-24 21:57:18 [pynccl.py:180] LightLLM is using nccl==2.21.5 +INFO 06-24 21:57:19 [manager.py:224] router recive req id 288 cost time 2.843327045440674 s +INFO 06-24 21:57:19 [manager.py:162] detoken release req id 56 +INFO 06-24 21:57:19 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:19 [manager.py:162] detoken release req id 64 +INFO 06-24 21:57:19 [manager.py:162] detoken release req id 72 +INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:56 first_token_cost:3107.999324798584ms total_cost_time:3108.043432235718ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:64 first_token_cost:3099.2417335510254ms total_cost_time:3099.2672443389893ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:19 [manager.py:162] detoken release req id 80 +INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:72 first_token_cost:3090.4550552368164ms total_cost_time:3090.477228164673ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:19 [manager.py:162] detoken release req id 88 +INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:80 first_token_cost:3082.355737686157ms total_cost_time:3082.376480102539ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:88 first_token_cost:3040.7509803771973ms total_cost_time:3040.7721996307373ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:19 [manager.py:162] detoken release req id 96 +INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:96 first_token_cost:3034.8153114318848ms total_cost_time:3034.8355770111084ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:104 first_token_cost:3030.2133560180664ms total_cost_time:3030.233383178711ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:19 [manager.py:162] detoken release req id 104 +INFO 06-24 21:57:19 [prefill_trans_process.py:81] PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=-1, prefill_id=287595743282619216970276961428881885738, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') kv trans connected! +INFO 06-24 21:57:19 [prefill_trans_obj.py:104] create KVTransConnectObj success: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 288 cost time 2.852398157119751 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 8 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.1529920101165771 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 16 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.1361424922943115 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 24 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.1195614337921143 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 32 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.10410737991333 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 40 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.090341329574585 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 48 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.0786843299865723 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 56 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008833646774291992 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 64 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0019044876098632812 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 8 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.013906478881835938 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 8 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 24 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 32 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 40 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 48 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 56 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 64 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009841203689575195 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 80 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0033121109008789062 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc to start kv movequeue time 1.1567583084106445 s +INFO 06-24 21:57:19 [prefill_trans_process.py:34] trans start: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007048368453979492 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 72 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 80 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 88 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0039234161376953125 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 88 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.004856109619140625 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 88 not send, decode is busy +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 96 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0009615421295166016 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 96 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.004712343215942383 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 96 not send, decode is busy +INFO 06-24 21:57:19 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 8 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 24 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 32 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 40 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 48 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 56 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 64 +DEBUG 06-24 21:57:19 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:19 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:19 [infer_batch.py:156] radix refed token num 6333 +DEBUG 06-24 21:57:19 [infer_batch.py:156] radix hold token num 13738 +DEBUG 06-24 21:57:19 [infer_batch.py:156] mem manager can alloc token num 2654 +DEBUG 06-24 21:57:19 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 72 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 80 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 88 +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 96 +INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 104 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00585484504699707 s +INFO 06-24 21:57:19 [batch.py:51] router release req id 56 +INFO 06-24 21:57:19 [batch.py:51] router release req id 64 +INFO 06-24 21:57:19 [batch.py:51] router release req id 72 +INFO 06-24 21:57:19 [batch.py:51] router release req id 80 +INFO 06-24 21:57:19 [batch.py:51] router release req id 88 +INFO 06-24 21:57:19 [batch.py:51] router release req id 96 +INFO 06-24 21:57:19 [batch.py:51] router release req id 104 +INFO 06-24 21:57:19 [manager.py:224] router recive req id 296 cost time 2.904078483581543 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 304 cost time 2.9004077911376953 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 296 cost time 2.9058501720428467 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 312 cost time 2.89624285697937 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 104 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.006551504135131836 s +INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 104 not send, decode is busy +INFO 06-24 21:57:19 [manager.py:224] router recive req id 320 cost time 2.8917062282562256 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 328 cost time 2.8868372440338135 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 304 cost time 2.9025967121124268 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 336 cost time 2.882467031478882 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 344 cost time 2.8421084880828857 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 352 cost time 2.8355062007904053 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 312 cost time 2.899076223373413 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 360 cost time 2.830348253250122 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 368 cost time 2.8254940509796143 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 376 cost time 2.820925235748291 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 320 cost time 2.8958187103271484 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 384 cost time 2.8162660598754883 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 392 cost time 2.8115522861480713 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 400 cost time 2.806875705718994 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 328 cost time 2.891998052597046 s +INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 104 +INFO 06-24 21:57:19 [manager.py:224] router recive req id 408 cost time 2.8029000759124756 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 336 cost time 2.888749361038208 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 416 cost time 2.798424005508423 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 424 cost time 2.7936484813690186 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 432 cost time 2.7890119552612305 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 344 cost time 2.849390983581543 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 440 cost time 2.7844791412353516 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 448 cost time 2.7797770500183105 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 352 cost time 2.843766689300537 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 456 cost time 2.7765212059020996 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 360 cost time 2.8396759033203125 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 464 cost time 2.7719948291778564 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 472 cost time 2.767430067062378 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 480 cost time 2.7624762058258057 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 368 cost time 2.8358266353607178 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 488 cost time 2.757695198059082 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 496 cost time 2.75347638130188 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 504 cost time 2.7491462230682373 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 376 cost time 2.832227945327759 s +INFO 06-24 21:57:19 [manager.py:224] router recive req id 512 cost time 2.744821310043335 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 384 cost time 2.8284521102905273 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 392 cost time 2.824519157409668 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 400 cost time 2.820510149002075 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 408 cost time 2.816723346710205 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 416 cost time 2.812837600708008 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 424 cost time 2.808816432952881 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 432 cost time 2.804975748062134 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 440 cost time 2.8012473583221436 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 448 cost time 2.797304391860962 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 456 cost time 2.7937395572662354 s +DEBUG 06-24 21:57:19 [manager.py:391] Prefill Batch: batch_id=92541181842414176735446229458234978367, time:1750773439.1856368s req_ids:[112, 120, 128, 136, 144, 152, 160, 168] +DEBUG 06-24 21:57:19 [manager.py:391] +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 464 cost time 2.7900025844573975 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 472 cost time 2.814335584640503 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 480 cost time 2.811356544494629 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 488 cost time 2.8079729080200195 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 496 cost time 2.804640293121338 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 504 cost time 2.801222324371338 s +INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 512 cost time 2.7986178398132324 s +INFO 06-24 21:57:19 [prefill_trans_process.py:42] trans finished: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc move len: 1055 +INFO 06-24 21:57:20 [manager.py:224] router recive req id 520 cost time 4.18721604347229 s +INFO 06-24 21:57:20 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 112 +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 120 +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 128 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:112 first_token_cost:4553.349018096924ms total_cost_time:4553.433895111084ms,out_token_counter:1 mean_per_token_cost_time: 0.08487701416015625ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 136 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:120 first_token_cost:4548.264026641846ms total_cost_time:4548.290967941284ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 144 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:128 first_token_cost:4543.472051620483ms total_cost_time:4543.494939804077ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 152 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:136 first_token_cost:4535.346984863281ms total_cost_time:4535.3686809539795ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 160 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:144 first_token_cost:4527.52161026001ms total_cost_time:4527.5421142578125ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:152 first_token_cost:4519.891738891602ms total_cost_time:4519.912958145142ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:162] detoken release req id 168 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:160 first_token_cost:4512.29453086853ms total_cost_time:4512.31575012207ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:168 first_token_cost:4503.759384155273ms total_cost_time:4503.7806034088135ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 520 cost time 4.196249008178711 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0020177364349365234 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008722305297851562 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 112 not send, decode is busy +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 120 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0048978328704833984 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 120 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00770878791809082 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 120 not send, decode is busy +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007648468017578125 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 136 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0018413066864013672 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012303352355957031 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 128 not send, decode is busy +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 136 not send, decode is busy +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009046792984008789 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 152 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0028464794158935547 s +INFO 06-24 21:57:20 [prefill_trans_process.py:44] trans cost time: 1.5370166301727295,move_total_kv_len: 1055, id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc +INFO 06-24 21:57:20 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 16 cost total time: 2.695488214492798 s +INFO 06-24 21:57:20 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 112 +INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.01164865493774414 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 144 not send, decode is busy +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 152 not send, decode is busy +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 160 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009674549102783203 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 168 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00382232666015625 s +DEBUG 06-24 21:57:20 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:20 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:20 [infer_batch.py:156] radix refed token num 8408 +DEBUG 06-24 21:57:20 [infer_batch.py:156] radix hold token num 15808 +DEBUG 06-24 21:57:20 [infer_batch.py:156] mem manager can alloc token num 584 +DEBUG 06-24 21:57:20 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:20 [batch.py:51] router release req id 112 +INFO 06-24 21:57:20 [batch.py:51] router release req id 120 +INFO 06-24 21:57:20 [batch.py:51] router release req id 128 +INFO 06-24 21:57:20 [batch.py:51] router release req id 136 +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 120 +INFO 06-24 21:57:20 [batch.py:51] router release req id 144 +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 128 +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 136 +INFO 06-24 21:57:20 [batch.py:51] router release req id 152 +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 16 +INFO 06-24 21:57:20 [batch.py:51] router release req id 160 +INFO 06-24 21:57:20 [batch.py:51] router release req id 168 +INFO 06-24 21:57:20 [manager.py:224] router recive req id 528 cost time 4.2441792488098145 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 536 cost time 4.239896059036255 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 544 cost time 4.23553466796875 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 528 cost time 4.245772361755371 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 552 cost time 4.231782913208008 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 560 cost time 4.22771143913269 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 568 cost time 4.223820924758911 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 536 cost time 4.2426581382751465 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 576 cost time 4.219863414764404 s +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 144 +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 544 cost time 4.23926568031311 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 584 cost time 4.21586275100708 s +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 152 +INFO 06-24 21:57:20 [manager.py:224] router recive req id 592 cost time 4.212351322174072 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 552 cost time 4.235877513885498 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 600 cost time 4.207972526550293 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 608 cost time 4.203743934631348 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 560 cost time 4.23270320892334 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 616 cost time 4.199443817138672 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 624 cost time 4.19502067565918 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 632 cost time 4.190509080886841 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 640 cost time 4.186006546020508 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 568 cost time 4.22925877571106 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 648 cost time 4.181662082672119 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 656 cost time 4.177393674850464 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 664 cost time 4.172720193862915 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 576 cost time 4.2260565757751465 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 672 cost time 4.168277740478516 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 680 cost time 4.16365385055542 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 688 cost time 4.159157991409302 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 696 cost time 4.15469765663147 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 704 cost time 4.150240898132324 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 712 cost time 4.1457295417785645 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 584 cost time 4.223072528839111 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 720 cost time 4.14218282699585 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 728 cost time 4.13811993598938 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 736 cost time 4.133428573608398 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 592 cost time 4.220565557479858 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 744 cost time 4.128777742385864 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 752 cost time 4.12431788444519 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 760 cost time 4.119714975357056 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 768 cost time 4.114940881729126 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 776 cost time 4.110274791717529 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 784 cost time 4.105071544647217 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 600 cost time 4.217778921127319 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 792 cost time 4.100350618362427 s +INFO 06-24 21:57:20 [manager.py:224] router recive req id 800 cost time 4.095543622970581 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 608 cost time 4.214792966842651 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 616 cost time 4.211430549621582 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 624 cost time 4.208020210266113 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 160 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.02581334114074707 s +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 160 not send, decode is busy +INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 168 not send, decode is busy +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 632 cost time 4.204531669616699 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 640 cost time 4.2012012004852295 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 648 cost time 4.197704315185547 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 656 cost time 4.194422245025635 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 664 cost time 4.190845489501953 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 672 cost time 4.187482833862305 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 680 cost time 4.1839280128479 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 688 cost time 4.18047571182251 s +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 160 +DEBUG 06-24 21:57:20 [manager.py:391] Prefill Batch: batch_id=201455354752215698501524051058319870647, time:1750773440.7145548s req_ids:[176, 184, 192, 200, 208, 216, 224] +DEBUG 06-24 21:57:20 [manager.py:391] +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 696 cost time 4.177005767822266 s +INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 168 +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 704 cost time 4.173741340637207 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 712 cost time 4.170109510421753 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 720 cost time 4.16648006439209 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 728 cost time 4.163381576538086 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 736 cost time 4.159599781036377 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 744 cost time 4.155829429626465 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 752 cost time 4.152259588241577 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 760 cost time 4.14850926399231 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 768 cost time 4.144503593444824 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 776 cost time 4.1405723094940186 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 784 cost time 4.136146783828735 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 792 cost time 4.132099390029907 s +INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 800 cost time 4.127938747406006 s +INFO 06-24 21:57:21 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:21 [manager.py:162] detoken release req id 176 +INFO 06-24 21:57:21 [manager.py:162] detoken release req id 184 +INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:176 first_token_cost:5522.639989852905ms total_cost_time:5522.682666778564ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:184 first_token_cost:5515.013217926025ms total_cost_time:5515.039682388306ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:21 [manager.py:162] detoken release req id 192 +INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:192 first_token_cost:5506.82806968689ms total_cost_time:5506.8519115448ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:21 [manager.py:162] detoken release req id 200 +INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:200 first_token_cost:5498.515605926514ms total_cost_time:5498.550653457642ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:208 first_token_cost:5490.27419090271ms total_cost_time:5490.3223514556885ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:21 [manager.py:162] detoken release req id 208 +INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:216 first_token_cost:5472.749710083008ms total_cost_time:5472.784519195557ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:21 [manager.py:162] detoken release req id 216 +INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:224 first_token_cost:5463.237047195435ms total_cost_time:5463.2627964019775ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:21 [manager.py:162] detoken release req id 224 +INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 176 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008868217468261719 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 184 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.002635955810546875 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 176 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012252092361450195 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 176 not send, decode is busy +INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 184 not send, decode is busy +INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 192 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009368896484375 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 200 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003036975860595703 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 192 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009622812271118164 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 192 not send, decode is busy +INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 200 not send, decode is busy +INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005233287811279297 s +INFO 06-24 21:57:21 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007521152496337891 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 208 not send, decode is busy +INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 176 +INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 216 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.006842136383056641 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 224 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.000782012939453125 s +INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 184 +DEBUG 06-24 21:57:21 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:21 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:21 [infer_batch.py:156] radix refed token num 5290 +DEBUG 06-24 21:57:21 [infer_batch.py:156] radix hold token num 15809 +DEBUG 06-24 21:57:21 [infer_batch.py:156] mem manager can alloc token num 583 +DEBUG 06-24 21:57:21 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 192 +INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 200 +INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 208 +INFO 06-24 21:57:21 [batch.py:51] router release req id 176 +INFO 06-24 21:57:21 [batch.py:51] router release req id 184 +INFO 06-24 21:57:21 [batch.py:51] router release req id 192 +INFO 06-24 21:57:21 [batch.py:51] router release req id 200 +INFO 06-24 21:57:21 [batch.py:51] router release req id 208 +INFO 06-24 21:57:21 [batch.py:51] router release req id 216 +INFO 06-24 21:57:21 [batch.py:51] router release req id 224 +INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 216 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010659933090209961 s +INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 216 not send, decode is busy +INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 224 not send, decode is busy +INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 216 +INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 224 +DEBUG 06-24 21:57:21 [manager.py:391] Prefill Batch: batch_id=128383818688016428732547559065532079804, time:1750773441.7364323s req_ids:[232, 240, 248, 256, 264, 272, 280] +DEBUG 06-24 21:57:21 [manager.py:391] +INFO 06-24 21:57:22 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:22 [manager.py:162] detoken release req id 232 +INFO 06-24 21:57:22 [manager.py:162] detoken release req id 240 +INFO 06-24 21:57:22 [manager.py:162] detoken release req id 248 +INFO 06-24 21:57:22 [manager.py:162] detoken release req id 256 +INFO 06-24 21:57:22 [manager.py:162] detoken release req id 264 +INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:232 first_token_cost:6473.076820373535ms total_cost_time:6473.118305206299ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:240 first_token_cost:6465.898275375366ms total_cost_time:6465.925931930542ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:22 [manager.py:162] detoken release req id 272 +INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:248 first_token_cost:6458.858489990234ms total_cost_time:6458.880662918091ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:256 first_token_cost:6453.812837600708ms total_cost_time:6453.834533691406ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:22 [manager.py:162] detoken release req id 280 +INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:264 first_token_cost:6449.409961700439ms total_cost_time:6449.431657791138ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:272 first_token_cost:6444.46873664856ms total_cost_time:6444.489240646362ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:280 first_token_cost:6439.7313594818115ms total_cost_time:6439.752101898193ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 232 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.001619100570678711 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 232 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007566690444946289 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 232 not send, decode is busy +INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 240 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004763364791870117 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 240 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007576942443847656 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 240 not send, decode is busy +INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 248 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0076656341552734375 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 256 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.002526998519897461 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 248 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007674455642700195 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 248 not send, decode is busy +INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 256 not send, decode is busy +INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 264 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00623011589050293 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 272 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0016736984252929688 s +INFO 06-24 21:57:22 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 232 +DEBUG 06-24 21:57:22 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:22 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:22 [infer_batch.py:156] radix refed token num 6334 +DEBUG 06-24 21:57:22 [infer_batch.py:156] radix hold token num 15843 +DEBUG 06-24 21:57:22 [infer_batch.py:156] mem manager can alloc token num 549 +DEBUG 06-24 21:57:22 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 240 +INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 248 +INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 256 +INFO 06-24 21:57:22 [batch.py:51] router release req id 232 +INFO 06-24 21:57:22 [batch.py:51] router release req id 240 +INFO 06-24 21:57:22 [batch.py:51] router release req id 248 +INFO 06-24 21:57:22 [batch.py:51] router release req id 256 +INFO 06-24 21:57:22 [batch.py:51] router release req id 264 +INFO 06-24 21:57:22 [batch.py:51] router release req id 272 +INFO 06-24 21:57:22 [batch.py:51] router release req id 280 +INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 264 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009365320205688477 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 264 not send, decode is busy +INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 272 not send, decode is busy +INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 280 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0067102909088134766 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 280 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.005312204360961914 s +INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 280 not send, decode is busy +INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 264 +INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 272 +INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 280 +DEBUG 06-24 21:57:22 [manager.py:391] Prefill Batch: batch_id=105466661178272767147654849707186787718, time:1750773442.742199s req_ids:[288, 296, 304, 312, 320, 328, 336, 344] +DEBUG 06-24 21:57:22 [manager.py:391] +INFO 06-24 21:57:23 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 288 +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 296 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:288 first_token_cost:7568.840265274048ms total_cost_time:7568.888187408447ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 304 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:296 first_token_cost:7564.2688274383545ms total_cost_time:7564.295768737793ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 312 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:304 first_token_cost:7559.7474575042725ms total_cost_time:7559.769868850708ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 320 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:312 first_token_cost:7555.351972579956ms total_cost_time:7555.373668670654ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:320 first_token_cost:7550.553798675537ms total_cost_time:7550.575494766235ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:328 first_token_cost:7545.566082000732ms total_cost_time:7545.602083206177ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:336 first_token_cost:7541.104316711426ms total_cost_time:7541.126489639282ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:344 first_token_cost:7500.508069992065ms total_cost_time:7500.529766082764ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 328 +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 336 +INFO 06-24 21:57:23 [manager.py:162] detoken release req id 344 +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 288 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0034797191619873047 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 288 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010988235473632812 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 288 not send, decode is busy +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 296 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009880542755126953 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 304 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005002260208129883 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 296 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009933948516845703 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 296 not send, decode is busy +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 304 not send, decode is busy +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010309934616088867 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 320 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0040853023529052734 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008486032485961914 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 312 not send, decode is busy +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 320 not send, decode is busy +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00830388069152832 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 336 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003463268280029297 s +INFO 06-24 21:57:23 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 288 +DEBUG 06-24 21:57:23 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:23 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:23 [infer_batch.py:156] radix refed token num 7383 +DEBUG 06-24 21:57:23 [infer_batch.py:156] radix hold token num 15833 +DEBUG 06-24 21:57:23 [infer_batch.py:156] mem manager can alloc token num 559 +DEBUG 06-24 21:57:23 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:23 [batch.py:51] router release req id 288 +INFO 06-24 21:57:23 [batch.py:51] router release req id 296 +INFO 06-24 21:57:23 [batch.py:51] router release req id 304 +INFO 06-24 21:57:23 [batch.py:51] router release req id 312 +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 296 +INFO 06-24 21:57:23 [batch.py:51] router release req id 320 +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 304 +INFO 06-24 21:57:23 [batch.py:51] router release req id 328 +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 312 +INFO 06-24 21:57:23 [batch.py:51] router release req id 336 +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 320 +INFO 06-24 21:57:23 [batch.py:51] router release req id 344 +INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009533166885375977 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 328 not send, decode is busy +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 336 not send, decode is busy +INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 344 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009120464324951172 s +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 328 +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 336 +INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 344 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00877833366394043 s +INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 344 not send, decode is busy +INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 344 +DEBUG 06-24 21:57:23 [manager.py:391] Prefill Batch: batch_id=24528051848475343217428906132102045920, time:1750773443.890494s req_ids:[352, 360, 368, 376, 384, 392, 400] +DEBUG 06-24 21:57:23 [manager.py:391] +INFO 06-24 21:57:24 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:24 [manager.py:162] detoken release req id 352 +INFO 06-24 21:57:24 [manager.py:162] detoken release req id 360 +INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:352 first_token_cost:8504.795789718628ms total_cost_time:8504.839897155762ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:24 [manager.py:162] detoken release req id 368 +INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:360 first_token_cost:8499.641180038452ms total_cost_time:8499.666452407837ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:368 first_token_cost:8494.734048843384ms total_cost_time:8494.755506515503ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:24 [manager.py:162] detoken release req id 376 +INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:376 first_token_cost:8490.172386169434ms total_cost_time:8490.193605422974ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:24 [manager.py:162] detoken release req id 384 +INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:384 first_token_cost:8485.517740249634ms total_cost_time:8485.54015159607ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:24 [manager.py:162] detoken release req id 392 +INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:392 first_token_cost:8480.722188949585ms total_cost_time:8480.743408203125ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:400 first_token_cost:8475.736141204834ms total_cost_time:8475.756406784058ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:24 [manager.py:162] detoken release req id 400 +INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0008697509765625 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012676715850830078 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 352 not send, decode is busy +INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 360 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009419918060302734 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 368 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004806995391845703 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 360 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009534835815429688 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 360 not send, decode is busy +INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 368 not send, decode is busy +INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 376 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009966135025024414 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 384 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004705905914306641 s +INFO 06-24 21:57:24 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 352 +DEBUG 06-24 21:57:24 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:24 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:24 [infer_batch.py:156] radix refed token num 6332 +DEBUG 06-24 21:57:24 [infer_batch.py:156] radix hold token num 15824 +DEBUG 06-24 21:57:24 [infer_batch.py:156] mem manager can alloc token num 568 +DEBUG 06-24 21:57:24 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 360 +INFO 06-24 21:57:24 [batch.py:51] router release req id 352 +INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 368 +INFO 06-24 21:57:24 [batch.py:51] router release req id 360 +INFO 06-24 21:57:24 [batch.py:51] router release req id 368 +INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 376 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010133743286132812 s +INFO 06-24 21:57:24 [batch.py:51] router release req id 376 +INFO 06-24 21:57:24 [batch.py:51] router release req id 384 +INFO 06-24 21:57:24 [batch.py:51] router release req id 392 +INFO 06-24 21:57:24 [batch.py:51] router release req id 400 +INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 376 not send, decode is busy +INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 384 not send, decode is busy +INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.011520147323608398 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 400 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007290840148925781 s +INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 376 +INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 384 +INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009365081787109375 s +INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 392 not send, decode is busy +INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 400 not send, decode is busy +INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 392 +INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 400 +DEBUG 06-24 21:57:24 [manager.py:391] Prefill Batch: batch_id=191998082800694326435444267982571911092, time:1750773444.8943431s req_ids:[408, 416, 424, 432, 440, 448, 456] +DEBUG 06-24 21:57:24 [manager.py:391] +INFO 06-24 21:57:25 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:25 [manager.py:162] detoken release req id 408 +INFO 06-24 21:57:25 [manager.py:162] detoken release req id 416 +INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:408 first_token_cost:9469.240427017212ms total_cost_time:9469.283819198608ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:25 [manager.py:162] detoken release req id 424 +INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:416 first_token_cost:9464.5094871521ms total_cost_time:9464.535474777222ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:25 [manager.py:162] detoken release req id 432 +INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:424 first_token_cost:9459.718704223633ms total_cost_time:9459.740400314331ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:432 first_token_cost:9454.83422279358ms total_cost_time:9454.855680465698ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:25 [manager.py:162] detoken release req id 440 +INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:440 first_token_cost:9450.320720672607ms total_cost_time:9450.342893600464ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:25 [manager.py:162] detoken release req id 448 +INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:448 first_token_cost:9445.702314376831ms total_cost_time:9445.725440979004ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:456 first_token_cost:9441.13302230835ms total_cost_time:9441.15424156189ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:25 [manager.py:162] detoken release req id 456 +INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 408 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004118204116821289 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 408 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009727954864501953 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 408 not send, decode is busy +INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 416 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0096893310546875 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 424 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004786968231201172 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 416 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00877523422241211 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 416 not send, decode is busy +INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 424 not send, decode is busy +INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 432 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009337425231933594 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 440 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004602670669555664 s +INFO 06-24 21:57:25 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 408 +DEBUG 06-24 21:57:25 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:25 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:25 [infer_batch.py:156] radix refed token num 6334 +DEBUG 06-24 21:57:25 [infer_batch.py:156] radix hold token num 15836 +DEBUG 06-24 21:57:25 [infer_batch.py:156] mem manager can alloc token num 556 +DEBUG 06-24 21:57:25 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:25 [batch.py:51] router release req id 408 +INFO 06-24 21:57:25 [batch.py:51] router release req id 416 +INFO 06-24 21:57:25 [batch.py:51] router release req id 424 +INFO 06-24 21:57:25 [batch.py:51] router release req id 432 +INFO 06-24 21:57:25 [batch.py:51] router release req id 440 +INFO 06-24 21:57:25 [batch.py:51] router release req id 448 +INFO 06-24 21:57:25 [batch.py:51] router release req id 456 +INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 432 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011211633682250977 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 432 not send, decode is busy +INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 440 not send, decode is busy +INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 416 +INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 448 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.01125788688659668 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 456 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00641179084777832 s +INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 424 +INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 432 +INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 440 +INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 448 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011875152587890625 s +INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 448 not send, decode is busy +INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 456 not send, decode is busy +INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 448 +INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 456 +DEBUG 06-24 21:57:25 [manager.py:391] Prefill Batch: batch_id=218303380001311569318333645359995508613, time:1750773445.8957012s req_ids:[464, 472, 480, 488, 496, 504, 512] +DEBUG 06-24 21:57:25 [manager.py:391] +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 current batch size: 7 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 7405 +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.9660810151293314 contain prompt cache tree unrefed token +DEBUG 06-24 21:57:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 5661.956 tokens/s +DEBUG 06-24 21:57:26 [stats.py:37] Avg prompt tokens throughput: 5650.788 tokens/s +DEBUG 06-24 21:57:26 [stats.py:37] Avg generate tokens throughput: 11.169 tokens/s +INFO 06-24 21:57:26 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:26 [manager.py:162] detoken release req id 464 +INFO 06-24 21:57:26 [manager.py:162] detoken release req id 472 +INFO 06-24 21:57:26 [manager.py:162] detoken release req id 480 +INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:464 first_token_cost:10434.598922729492ms total_cost_time:10434.705018997192ms,out_token_counter:1 mean_per_token_cost_time: 0.10609626770019531ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:472 first_token_cost:10430.545806884766ms total_cost_time:10430.574655532837ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:26 [manager.py:162] detoken release req id 488 +INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:480 first_token_cost:10425.76789855957ms total_cost_time:10425.791263580322ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:26 [manager.py:162] detoken release req id 496 +INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:488 first_token_cost:10420.909881591797ms total_cost_time:10420.931339263916ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:26 [manager.py:162] detoken release req id 504 +INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:496 first_token_cost:10416.791915893555ms total_cost_time:10416.813850402832ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:26 [manager.py:162] detoken release req id 512 +INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:504 first_token_cost:10412.447452545166ms total_cost_time:10412.468910217285ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:512 first_token_cost:10408.00404548645ms total_cost_time:10408.025979995728ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 464 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0048732757568359375 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 464 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011396169662475586 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 464 not send, decode is busy +INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 472 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.011648893356323242 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 480 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.006359100341796875 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 488 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0016443729400634766 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 472 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010746479034423828 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 472 not send, decode is busy +INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 480 not send, decode is busy +INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 488 not send, decode is busy +INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 496 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008312463760375977 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 504 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0033416748046875 s +INFO 06-24 21:57:26 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 464 +DEBUG 06-24 21:57:26 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:26 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:26 [infer_batch.py:156] radix refed token num 6338 +DEBUG 06-24 21:57:26 [infer_batch.py:156] radix hold token num 15846 +DEBUG 06-24 21:57:26 [infer_batch.py:156] mem manager can alloc token num 546 +DEBUG 06-24 21:57:26 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 472 +INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 480 +INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 488 +INFO 06-24 21:57:26 [batch.py:51] router release req id 464 +INFO 06-24 21:57:26 [batch.py:51] router release req id 472 +INFO 06-24 21:57:26 [batch.py:51] router release req id 480 +INFO 06-24 21:57:26 [batch.py:51] router release req id 488 +INFO 06-24 21:57:26 [batch.py:51] router release req id 496 +INFO 06-24 21:57:26 [batch.py:51] router release req id 504 +INFO 06-24 21:57:26 [batch.py:51] router release req id 512 +INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 496 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010193109512329102 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 496 not send, decode is busy +INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 504 not send, decode is busy +INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 512 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00936269760131836 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 512 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008563041687011719 s +INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 512 not send, decode is busy +INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 496 +INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 504 +INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 512 +DEBUG 06-24 21:57:26 [manager.py:391] Prefill Batch: batch_id=161650577233271773526771599162437663158, time:1750773446.900781s req_ids:[520, 528, 536, 544, 552, 560, 568, 576] +DEBUG 06-24 21:57:26 [manager.py:391] +INFO 06-24 21:57:27 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 520 +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 528 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:520 first_token_cost:11542.181491851807ms total_cost_time:11542.22321510315ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:528 first_token_cost:11537.727355957031ms total_cost_time:11537.752628326416ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:1045 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 536 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:536 first_token_cost:11533.485889434814ms total_cost_time:11533.507585525513ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 544 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:544 first_token_cost:11529.413938522339ms total_cost_time:11529.436111450195ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 552 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:552 first_token_cost:11525.158405303955ms total_cost_time:11525.179624557495ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:560 first_token_cost:11521.02780342102ms total_cost_time:11521.04902267456ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 560 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:568 first_token_cost:11516.887664794922ms total_cost_time:11516.90936088562ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 568 +INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:576 first_token_cost:11512.858629226685ms total_cost_time:11512.879371643066ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:27 [manager.py:162] detoken release req id 576 +INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 520 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007232189178466797 s +INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 528 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0026750564575195312 s +INFO 06-24 21:57:27 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 520 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012886285781860352 s +INFO 06-24 21:57:27 [prefill_trans_obj.py:166] prefill node kv move task req_id: 520 not send, decode is busy +INFO 06-24 21:57:27 [prefill_trans_obj.py:166] prefill node kv move task req_id: 528 not send, decode is busy +INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.011229753494262695 s +INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 544 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.006566762924194336 s +INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 552 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0017480850219726562 s +INFO 06-24 21:57:28 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009779691696166992 s +INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 536 not send, decode is busy +INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 544 not send, decode is busy +INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 552 not send, decode is busy +INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007314920425415039 s +INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 568 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0025243759155273438 s +INFO 06-24 21:57:28 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 520 +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 528 +DEBUG 06-24 21:57:28 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:28 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:28 [infer_batch.py:156] radix refed token num 6346 +DEBUG 06-24 21:57:28 [infer_batch.py:156] radix hold token num 15840 +DEBUG 06-24 21:57:28 [infer_batch.py:156] mem manager can alloc token num 552 +DEBUG 06-24 21:57:28 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:28 [batch.py:51] router release req id 520 +INFO 06-24 21:57:28 [batch.py:51] router release req id 528 +INFO 06-24 21:57:28 [batch.py:51] router release req id 536 +INFO 06-24 21:57:28 [batch.py:51] router release req id 544 +INFO 06-24 21:57:28 [batch.py:51] router release req id 552 +INFO 06-24 21:57:28 [batch.py:51] router release req id 560 +INFO 06-24 21:57:28 [batch.py:51] router release req id 568 +INFO 06-24 21:57:28 [batch.py:51] router release req id 576 +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 536 +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 544 +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 552 +INFO 06-24 21:57:28 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009374856948852539 s +INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 560 not send, decode is busy +INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 568 not send, decode is busy +INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0072596073150634766 s +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 560 +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 568 +INFO 06-24 21:57:28 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010232925415039062 s +INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 576 not send, decode is busy +INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 576 +DEBUG 06-24 21:57:28 [manager.py:391] Prefill Batch: batch_id=67017997309100946241665083556177100204, time:1750773448.0456958s req_ids:[584, 592, 600, 608, 616, 624, 632] +DEBUG 06-24 21:57:28 [manager.py:391] +INFO 06-24 21:57:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:57:28 [statics_utils.py:24] mean first cost: 7008.399413691626 ms +INFO 06-24 21:57:28 [statics_utils.py:24] mean per token cost: 0.027954578399658203 ms +INFO 06-24 21:57:28 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:28 [manager.py:162] detoken release req id 584 +INFO 06-24 21:57:28 [manager.py:162] detoken release req id 592 +INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:584 first_token_cost:12511.72685623169ms total_cost_time:12511.76929473877ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:592 first_token_cost:12507.427453994751ms total_cost_time:12507.45415687561ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:28 [manager.py:162] detoken release req id 600 +INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:600 first_token_cost:12503.117799758911ms total_cost_time:12503.142595291138ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:1071 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:28 [manager.py:162] detoken release req id 608 +INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:608 first_token_cost:12498.862743377686ms total_cost_time:12498.884439468384ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:616 first_token_cost:12494.251251220703ms total_cost_time:12494.272232055664ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:28 [manager.py:162] detoken release req id 616 +INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:624 first_token_cost:12489.952325820923ms total_cost_time:12489.9742603302ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:632 first_token_cost:12485.601663589478ms total_cost_time:12485.623121261597ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:28 [manager.py:162] detoken release req id 624 +INFO 06-24 21:57:28 [manager.py:162] detoken release req id 632 +INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.002023935317993164 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011422157287597656 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 584 not send, decode is busy +INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 592 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009478092193603516 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 600 in_len:1071 v_len: 1071 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0046541690826416016 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 592 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008721351623535156 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 592 not send, decode is busy +INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 600 not send, decode is busy +INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009311199188232422 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 616 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004762172698974609 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.0055027008056640625 s +INFO 06-24 21:57:29 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 608 not send, decode is busy +INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 616 not send, decode is busy +INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 584 +INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005312681198120117 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 632 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0006544589996337891 s +DEBUG 06-24 21:57:29 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:29 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:29 [infer_batch.py:156] radix refed token num 6347 +DEBUG 06-24 21:57:29 [infer_batch.py:156] radix hold token num 15857 +DEBUG 06-24 21:57:29 [infer_batch.py:156] mem manager can alloc token num 535 +DEBUG 06-24 21:57:29 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 592 +INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 600 +INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 608 +INFO 06-24 21:57:29 [batch.py:51] router release req id 584 +INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 616 +INFO 06-24 21:57:29 [batch.py:51] router release req id 592 +INFO 06-24 21:57:29 [batch.py:51] router release req id 600 +INFO 06-24 21:57:29 [batch.py:51] router release req id 608 +INFO 06-24 21:57:29 [batch.py:51] router release req id 616 +INFO 06-24 21:57:29 [batch.py:51] router release req id 624 +INFO 06-24 21:57:29 [batch.py:51] router release req id 632 +INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007875204086303711 s +INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 624 not send, decode is busy +INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 632 not send, decode is busy +INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 624 +INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 632 +DEBUG 06-24 21:57:29 [manager.py:391] Prefill Batch: batch_id=47045407989267376152470951590131429777, time:1750773449.049589s req_ids:[640, 648, 656, 664, 672, 680, 688] +DEBUG 06-24 21:57:29 [manager.py:391] +INFO 06-24 21:57:29 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:29 [manager.py:162] detoken release req id 640 +INFO 06-24 21:57:29 [manager.py:162] detoken release req id 648 +INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:640 first_token_cost:13478.892087936401ms total_cost_time:13478.918552398682ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:648 first_token_cost:13474.328994750977ms total_cost_time:13474.342823028564ms,out_token_counter:1 mean_per_token_cost_time: 0.013828277587890625ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:29 [manager.py:162] detoken release req id 656 +INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:656 first_token_cost:13469.989776611328ms total_cost_time:13470.000267028809ms,out_token_counter:1 mean_per_token_cost_time: 0.01049041748046875ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:664 first_token_cost:13465.176105499268ms total_cost_time:13465.185642242432ms,out_token_counter:1 mean_per_token_cost_time: 0.0095367431640625ms prompt_token_num:1043 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:29 [manager.py:162] detoken release req id 664 +INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:672 first_token_cost:13460.595607757568ms total_cost_time:13460.603952407837ms,out_token_counter:1 mean_per_token_cost_time: 0.008344650268554688ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:680 first_token_cost:13455.767631530762ms total_cost_time:13455.775499343872ms,out_token_counter:1 mean_per_token_cost_time: 0.007867813110351562ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:29 [manager.py:162] detoken release req id 672 +INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:688 first_token_cost:13451.16138458252ms total_cost_time:13451.169967651367ms,out_token_counter:1 mean_per_token_cost_time: 0.00858306884765625ms prompt_token_num:1069 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:29 [manager.py:162] detoken release req id 680 +INFO 06-24 21:57:29 [manager.py:162] detoken release req id 688 +INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 640 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003117799758911133 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 640 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.006980419158935547 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 640 not send, decode is busy +INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0006282329559326172 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.003368854522705078 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 648 not send, decode is busy +INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 656 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004578828811645508 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 656 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.0031185150146484375 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 656 not send, decode is busy +INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 664 in_len:1043 v_len: 1043 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008547067642211914 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 664 in_len:1043 v_len: 1043 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.003646373748779297 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 664 not send, decode is busy +INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 672 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0026831626892089844 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 672 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.002583026885986328 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 672 not send, decode is busy +INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005922794342041016 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.003292560577392578 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 680 not send, decode is busy +INFO 06-24 21:57:30 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 640 +DEBUG 06-24 21:57:30 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:30 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:30 [infer_batch.py:156] radix refed token num 6342 +DEBUG 06-24 21:57:30 [infer_batch.py:156] radix hold token num 15883 +DEBUG 06-24 21:57:30 [infer_batch.py:156] mem manager can alloc token num 509 +DEBUG 06-24 21:57:30 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 648 +INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 656 +INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 664 +INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 672 +INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 680 +INFO 06-24 21:57:30 [batch.py:51] router release req id 640 +INFO 06-24 21:57:30 [batch.py:51] router release req id 648 +INFO 06-24 21:57:30 [batch.py:51] router release req id 656 +INFO 06-24 21:57:30 [batch.py:51] router release req id 664 +INFO 06-24 21:57:30 [batch.py:51] router release req id 672 +INFO 06-24 21:57:30 [batch.py:51] router release req id 680 +INFO 06-24 21:57:30 [batch.py:51] router release req id 688 +INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.012074470520019531 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010558128356933594 s +INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 688 not send, decode is busy +INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 688 +DEBUG 06-24 21:57:30 [manager.py:391] Prefill Batch: batch_id=248254461732847854709012582094359501133, time:1750773450.086136s req_ids:[696, 704, 712, 720, 728, 736, 744, 752] +DEBUG 06-24 21:57:30 [manager.py:391] +INFO 06-24 21:57:31 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 696 +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 704 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:696 first_token_cost:14621.362209320068ms total_cost_time:14621.402740478516ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:704 first_token_cost:14617.124557495117ms total_cost_time:14617.151260375977ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 712 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:712 first_token_cost:14612.886428833008ms total_cost_time:14612.910747528076ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 720 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:720 first_token_cost:14608.647346496582ms total_cost_time:14608.66928100586ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:728 first_token_cost:14604.609727859497ms total_cost_time:14604.631900787354ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 728 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:736 first_token_cost:14600.08454322815ms total_cost_time:14600.10552406311ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 736 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:744 first_token_cost:14595.526695251465ms total_cost_time:14595.547914505005ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:752 first_token_cost:14591.10713005066ms total_cost_time:14591.128587722778ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 744 +INFO 06-24 21:57:31 [manager.py:162] detoken release req id 752 +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 696 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005808353424072266 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 696 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010880708694458008 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 696 not send, decode is busy +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010863780975341797 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 712 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0048520565032958984 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011660099029541016 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 704 not send, decode is busy +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 712 not send, decode is busy +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 720 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010832786560058594 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 728 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004339456558227539 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 720 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011260509490966797 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 720 not send, decode is busy +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 728 not send, decode is busy +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 736 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010160446166992188 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 744 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004326820373535156 s +INFO 06-24 21:57:31 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 696 +DEBUG 06-24 21:57:31 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:57:31 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:31 [infer_batch.py:156] radix refed token num 7375 +DEBUG 06-24 21:57:31 [infer_batch.py:156] radix hold token num 15828 +DEBUG 06-24 21:57:31 [infer_batch.py:156] mem manager can alloc token num 564 +DEBUG 06-24 21:57:31 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 704 +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 712 +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 720 +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 728 +INFO 06-24 21:57:31 [batch.py:51] router release req id 696 +INFO 06-24 21:57:31 [batch.py:51] router release req id 704 +INFO 06-24 21:57:31 [batch.py:51] router release req id 712 +INFO 06-24 21:57:31 [batch.py:51] router release req id 720 +INFO 06-24 21:57:31 [batch.py:51] router release req id 728 +INFO 06-24 21:57:31 [batch.py:51] router release req id 736 +INFO 06-24 21:57:31 [batch.py:51] router release req id 744 +INFO 06-24 21:57:31 [batch.py:51] router release req id 752 +INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 736 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011093854904174805 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 736 not send, decode is busy +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 744 not send, decode is busy +INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010010242462158203 s +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 736 +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 744 +INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008131027221679688 s +INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 752 not send, decode is busy +INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 752 +DEBUG 06-24 21:57:31 [manager.py:391] Prefill Batch: batch_id=159894059491602842148363584452156276559, time:1750773451.243592s req_ids:[760, 768, 776, 784, 792, 800] +DEBUG 06-24 21:57:31 [manager.py:391] +INFO 06-24 21:57:32 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:57:32 [manager.py:162] detoken release req id 760 +INFO 06-24 21:57:32 [manager.py:162] detoken release req id 768 +INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:760 first_token_cost:15483.526229858398ms total_cost_time:15483.655452728271ms,out_token_counter:1 mean_per_token_cost_time: 0.12922286987304688ms prompt_token_num:1064 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:32 [manager.py:162] detoken release req id 776 +INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:768 first_token_cost:15479.77352142334ms total_cost_time:15479.809999465942ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:776 first_token_cost:15475.495100021362ms total_cost_time:15475.528001785278ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:32 [manager.py:162] detoken release req id 784 +INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:784 first_token_cost:15470.622777938843ms total_cost_time:15470.65258026123ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:792 first_token_cost:15466.00604057312ms total_cost_time:15466.034650802612ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:32 [manager.py:162] detoken release req id 792 +INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:800 first_token_cost:15461.296558380127ms total_cost_time:15461.327075958252ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:57:32 [manager.py:162] detoken release req id 800 +INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 760 in_len:1064 v_len: 1064 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010465860366821289 s +INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 768 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004469871520996094 s +INFO 06-24 21:57:32 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 760 in_len:1064 v_len: 1064 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011321306228637695 s +INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 760 not send, decode is busy +INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 768 not send, decode is busy +INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 776 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010179758071899414 s +INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 784 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004068851470947266 s +INFO 06-24 21:57:32 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 21:57:32 [req_manager.py:78] freed all request size 136 +INFO 06-24 21:57:32 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 776 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010899782180786133 s +INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 776 not send, decode is busy +INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 784 not send, decode is busy +DEBUG 06-24 21:57:32 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:57:32 [infer_batch.py:156] radix refed token num 6350 +DEBUG 06-24 21:57:32 [infer_batch.py:156] radix hold token num 15847 +DEBUG 06-24 21:57:32 [infer_batch.py:156] mem manager can alloc token num 545 +DEBUG 06-24 21:57:32 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 792 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00947117805480957 s +INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 800 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003470897674560547 s +INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 760 +INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 768 +INFO 06-24 21:57:32 [batch.py:51] router release req id 760 +INFO 06-24 21:57:32 [batch.py:51] router release req id 768 +INFO 06-24 21:57:32 [batch.py:51] router release req id 776 +INFO 06-24 21:57:32 [batch.py:51] router release req id 784 +INFO 06-24 21:57:32 [batch.py:51] router release req id 792 +INFO 06-24 21:57:32 [batch.py:51] router release req id 800 +INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 776 +INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 784 +INFO 06-24 21:57:32 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 792 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011971712112426758 s +INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 792 not send, decode is busy +INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 800 not send, decode is busy +INFO 06-24 21:57:32 [shm_req_manager.py:119] all shm req has been release ok +INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 792 +INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 800 +DEBUG 06-24 21:57:36 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:57:36 [manager.py:283] +DEBUG 06-24 21:57:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:57:36 [manager.py:284] +INFO 06-24 21:57:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:57:58 [statics_utils.py:24] mean first cost: 8960.396783351898 ms +INFO 06-24 21:57:58 [statics_utils.py:24] mean per token cost: 0.027647018432617188 ms +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist +INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:808 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_99 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_99 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_99 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_99 +INFO 06-24 21:58:18 [manager.py:224] router recive req id 808 cost time 0.03365135192871094 s +INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:816 +INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 808 cost time 0.035724639892578125 s +DEBUG 06-24 21:58:18 [manager.py:391] Prefill Batch: batch_id=116507823906490739156615915078303123438, time:1750773498.0802565s req_ids:[808] +DEBUG 06-24 21:58:18 [manager.py:391] +DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 current batch size: 1 +DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 1059 +DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 token used ratio: 0.9667520741825281 contain prompt cache tree unrefed token +DEBUG 06-24 21:58:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 764.296 tokens/s +DEBUG 06-24 21:58:18 [stats.py:37] Avg prompt tokens throughput: 762.754 tokens/s +DEBUG 06-24 21:58:18 [stats.py:37] Avg generate tokens throughput: 1.541 tokens/s +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_98 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_98 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_98 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_98 +INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:824 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_97 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_97 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_97 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_97 +INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:832 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_96 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_96 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_96 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_96 +INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:840 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_95 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_95 +WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_95 and create again +INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_95 +INFO 06-24 21:58:18 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:58:18 [manager.py:162] detoken release req id 808 +INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:808 first_token_cost:207.29327201843262ms total_cost_time:207.34190940856934ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:58:18 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 21:58:18 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:58:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:58:18 [infer_batch.py:156] radix refed token num 1058 +DEBUG 06-24 21:58:18 [infer_batch.py:156] radix hold token num 15836 +DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager can alloc token num 556 +DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:58:18 [batch.py:51] router release req id 808 +INFO 06-24 21:58:18 [manager.py:224] router recive req id 816 cost time 0.17960238456726074 s +INFO 06-24 21:58:18 [manager.py:224] router recive req id 824 cost time 0.16028761863708496 s +INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 816 cost time 0.1804039478302002 s +INFO 06-24 21:58:18 [manager.py:224] router recive req id 832 cost time 0.14226198196411133 s +INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 824 cost time 0.1611027717590332 s +INFO 06-24 21:58:18 [manager.py:224] router recive req id 840 cost time 0.12623143196105957 s +INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 832 cost time 0.14295721054077148 s +INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 840 cost time 0.12695002555847168 s +INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 808 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005694866180419922 s +INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 808 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00798654556274414 s +INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 808 not send, decode is busy +INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 808 +DEBUG 06-24 21:58:18 [manager.py:391] Prefill Batch: batch_id=180573588197561696527024042514760098171, time:1750773498.271277s req_ids:[816, 824, 832, 840] +DEBUG 06-24 21:58:18 [manager.py:391] +INFO 06-24 21:58:18 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 21:58:18 [manager.py:162] detoken release req id 816 +INFO 06-24 21:58:18 [manager.py:162] detoken release req id 824 +INFO 06-24 21:58:18 [manager.py:162] detoken release req id 832 +INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:816 first_token_cost:766.4666175842285ms total_cost_time:766.5369510650635ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:58:18 [manager.py:162] detoken release req id 840 +INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:824 first_token_cost:747.0569610595703ms total_cost_time:747.0846176147461ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:832 first_token_cost:728.865385055542ms total_cost_time:728.8897037506104ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:840 first_token_cost:712.9158973693848ms total_cost_time:712.9440307617188ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 816 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007913351058959961 s +INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 824 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0014026165008544922 s +INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 816 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007066965103149414 s +INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 816 not send, decode is busy +INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 824 not send, decode is busy +INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0028319358825683594 s +INFO 06-24 21:58:18 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 816 +INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 824 +DEBUG 06-24 21:58:18 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 21:58:18 [infer_batch.py:156] free a batch state: +DEBUG 06-24 21:58:18 [infer_batch.py:156] radix refed token num 2115 +DEBUG 06-24 21:58:18 [infer_batch.py:156] radix hold token num 15834 +DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager can alloc token num 558 +DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 21:58:18 [batch.py:51] router release req id 816 +INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007506370544433594 s +INFO 06-24 21:58:18 [batch.py:51] router release req id 824 +INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 832 not send, decode is busy +INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 840 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003538370132446289 s +INFO 06-24 21:58:18 [batch.py:51] router release req id 832 +INFO 06-24 21:58:18 [batch.py:51] router release req id 840 +INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 840 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007813692092895508 s +INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 840 not send, decode is busy +INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 832 +INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 840 +INFO 06-24 21:58:18 [shm_req_manager.py:119] all shm req has been release ok +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist +INFO 06-24 21:58:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:58:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms +INFO 06-24 21:58:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 21:58:38 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:58:38 [manager.py:283] +DEBUG 06-24 21:58:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:58:38 [manager.py:284] +INFO 06-24 21:58:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:58:58 [statics_utils.py:24] mean first cost: 8563.83120445978 ms +INFO 06-24 21:58:58 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist +INFO 06-24 21:59:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:59:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms +INFO 06-24 21:59:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms +DEBUG 06-24 21:59:38 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:59:38 [manager.py:283] +DEBUG 06-24 21:59:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:59:38 [manager.py:284] +INFO 06-24 21:59:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:59:58 [statics_utils.py:24] mean first cost: 8563.83120445978 ms +INFO 06-24 21:59:58 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms +INFO 06-24 22:00:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:00:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms +INFO 06-24 22:00:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms +DEBUG 06-24 22:00:39 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:00:39 [manager.py:283] +DEBUG 06-24 22:00:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 22:00:39 [manager.py:284] +INFO 06-24 22:00:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:00:58 [statics_utils.py:24] mean first cost: 8563.83120445978 ms +INFO 06-24 22:00:58 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms +INFO 06-24 22:01:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 22:01:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms +INFO 06-24 22:01:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms +ERROR 06-24 22:01:39 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 22:01:39 [pd_loop.py:121] no close frame received or sent +ERROR 06-24 22:01:39 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task +ERROR 06-24 22:01:39 [pd_loop.py:121] recv_bytes = await websocket.recv() +ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv +ERROR 06-24 22:01:39 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc +ERROR 06-24 22:01:39 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent +DEBUG 06-24 22:01:40 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 22:01:40 [manager.py:283] +DEBUG 06-24 22:01:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 22:01:40 [manager.py:284] +INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... + +ERROR 06-24 22:01:41 [prefill_kv_move_manager.py:96] +Traceback (most recent call last): + File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_kv_move_manager.py", line 85, in task_dispatcher_loop + move_task: KVMoveTask = self.info_queue.get() + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/queues.py", line 103, in get + res = self._recv_bytes() + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 216, in recv_bytes + buf = self._recv_bytes(maxlength) + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes + buf = self._recv(4) + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 379, in _recv + chunk = read(handle, remaining) +KeyboardInterrupt +INFO 06-24 22:01:41 [start_utils.py:106] Killing child process 1414656 +INFO 06-24 22:01:41 [start_utils.py:106] Killing child process 1415230 +INFO 06-24 22:01:41 [start_utils.py:108] Killing parent process 1414655 +INFO 06-24 22:01:41 [start_utils.py:53] Killing parent process 1411713 +INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 22:01:41 [start_utils.py:108] Killing parent process 1414655 +INFO 06-24 22:01:41 [start_utils.py:51] Killing child process 1413850 +INFO 06-24 22:01:41 [start_utils.py:51] Killing child process 1414262 +INFO 06-24 22:01:41 [start_utils.py:53] Killing parent process 1411872 +INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 22:01:41 [start_utils.py:108] Killing parent process 1414655 +INFO 06-24 22:01:41 [start_utils.py:51] Killing child process 1413850 +INFO 06-24 22:01:41 [start_utils.py:53] Killing parent process 1411872 +INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... +INFO 06-24 22:01:42 [start_utils.py:108] Killing parent process 1414655 +INFO 06-24 22:01:42 [start_utils.py:53] Killing parent process 1411872 +INFO 06-24 22:01:42 [start_utils.py:69] All processes terminated gracefully. +INFO 06-24 22:01:42 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_p_4096.log b/pd_p_4096.log new file mode 100644 index 000000000..3c689fff0 --- /dev/null +++ b/pd_p_4096.log @@ -0,0 +1,2843 @@ +INFO 06-24 19:53:53 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:53:54 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:53:55 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:53:57 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:53:57 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:53:57 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:53:57 [api_start.py:79] zmq mode head: ipc:///tmp/_2732_0_ +INFO 06-24 19:53:57 [api_start.py:81] use tgi api: False +INFO 06-24 19:53:57 [api_start.py:192] alloced ports: [10173, 10076, 10098, 10080, 10160, 10233, 10089, 10220, 10247] +INFO 06-24 19:53:57 [api_start.py:233] all start args:Namespace(run_mode='prefill', host='127.0.1.1', port=8017, httpserver_workers=1, zmq_mode='ipc:///tmp/_2732_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=128, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16000, nccl_host='127.0.0.1', nccl_port=2732, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=True, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=16000, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10173, detokenization_port=10076, detokenization_pub_port=10098, visual_port=10080, audio_port=10160, cache_port=10233, metric_port=10089, pd_node_infer_rpyc_ports=[10247], pd_node_id=163479035537597727162519172725806046247, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) +INFO 06-24 19:53:58 [start_utils.py:37] init func start_metric_manager : init ok +INFO 06-24 19:54:00 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:01 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:02 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:03 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:03 [__init__.py:239] Automatically detected platform cuda. +INFO 06-24 19:54:04 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:05 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:05 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:05 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:05 [shm_req_manager.py:59] create lock shm 2732_0_req_shm_total +INFO 06-24 19:54:05 [atomic_array_lock.py:29] create lock shm 2732_0_array_reqs_lock +INFO 06-24 19:54:05 [atomic_lock.py:26] create lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 19:54:05 [shared_arr.py:17] create shm 2732_0_mem_manger_can_use_token_num_0 +INFO 06-24 19:54:05 [shared_arr.py:17] create shm 2732_0_shared_token_load +INFO 06-24 19:54:05 [shared_arr.py:17] create shm 2732_0_shared_token_load_ext_infos +INFO 06-24 19:54:05 [model_rpc.py:70] Initialized RPC server for rank 0. +INFO 06-24 19:54:05 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total +INFO 06-24 19:54:05 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock +INFO 06-24 19:54:05 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 19:54:05 [model_rpc.py:184] use ChunckedPrefillForPrefillNode +WARNING 06-24 19:54:05 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:05 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:05 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:06 [manager.py:41] pub_to_httpserver sendhwm 1000 +INFO 06-24 19:54:06 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total +INFO 06-24 19:54:06 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock +INFO 06-24 19:54:06 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 19:54:07 [shared_arr.py:20] link shm 2732_0_shared_token_load +INFO 06-24 19:54:07 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos +INFO 06-24 19:54:07 [shared_arr.py:17] create shm 2732_0_dp_rank_0_lock_tp_infos +INFO 06-24 19:54:07 [basemodel.py:134] Initial quantization. The default quantization method is none +INFO 06-24 19:54:07 [mem_utils.py:11] mode setting params: [] +INFO 06-24 19:54:07 [mem_utils.py:25] Model kv cache using mode normal +INFO 06-24 19:54:07 [shared_arr.py:20] link shm 2732_0_mem_manger_can_use_token_num_0 +INFO 06-24 19:54:20 [basemodel.py:652] begin check max_len infer +INFO 06-24 19:54:21 [basemodel.py:680] check max_len 8448 infer ok +INFO 06-24 19:54:21 [shared_arr.py:17] create shm 2732_0_refed_tokens_num_0 +INFO 06-24 19:54:21 [shared_arr.py:17] create shm 2732_0_tree_total_tokens_num_0 +INFO 06-24 19:54:21 [base_backend.py:135] loaded model class +INFO 06-24 19:54:21 [prefill_impl.py:36] lock_nccl_group ranks 0 +INFO 06-24 19:54:21 [shared_arr.py:20] link shm 2732_0_refed_tokens_num_0 +INFO 06-24 19:54:21 [shared_arr.py:20] link shm 2732_0_tree_total_tokens_num_0 +INFO 06-24 19:54:21 [manager.py:196] use req queue QueueForPDChunkedPrefill +INFO 06-24 19:54:23 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:24 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:26 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:28 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:28 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:28 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:28 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 19:54:28 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 +INFO 06-24 19:54:28 [prefill_kv_move_manager.py:55] rpyc connect to infer rpyc port: 10247 ok +INFO 06-24 19:54:28 [net_utils.py:51] get hostname ip 127.0.1.1 +INFO 06-24 19:54:28 [prefill_trans_process.py:154] prefill trans kv process for device: 0 started! +INFO 06-24 19:54:30 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:31 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:33 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:35 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:35 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:35 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:35 [prefill_infer_rpyc.py:51] put mem manager to mem_queue ok +INFO 06-24 19:54:35 [prefill_kv_move_manager.py:246] prefill kv move manager process started +INFO 06-24 19:54:35 [start_utils.py:37] init func start_router_process : init ok +INFO 06-24 19:54:35 [start_utils.py:37] init func start_detokenization_process : init ok +INFO 06-24 19:54:35 [api_start.py:57] start process pid 1211011 +INFO 06-24 19:54:35 [api_start.py:58] http server pid 1213612 +INFO 06-24 19:54:39 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On +INFO 06-24 19:54:40 [importing.py:53] Triton module has been replaced with a placeholder. +INFO 06-24 19:54:40 [__init__.py:239] Automatically detected platform cuda. +WARNING 06-24 19:54:42 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. +WARNING 06-24 19:54:42 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm +INFO 06-24 19:54:42 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. +INFO 06-24 19:54:42 [api_http.py:326] server start up +INFO 06-24 19:54:42 [atomic_array_lock.py:32] link lock shm 2732_0_lightllm_resource_lock +INFO 06-24 19:54:42 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total +INFO 06-24 19:54:42 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock +INFO 06-24 19:54:42 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock +INFO 06-24 19:54:43 [atomic_lock.py:29] link lock shm 2732_0_req_id_gen_lock +INFO 06-24 19:54:43 [shared_arr.py:20] link shm 2732_0_latest_success_infer_time_mark +INFO 06-24 19:54:43 [shared_arr.py:20] link shm 2732_0_shared_token_load +INFO 06-24 19:54:43 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos +INFO 06-24 19:54:43 [api_http.py:330] server start up ok, loop use is +INFO 06-24 19:54:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:54:43 [pd_loop.py:92] Sent registration JSON: {'node_id': 163479035537597727162519172725806046247, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10173, 'detokenization_port': 10076, 'detokenization_pub_port': 10098, 'visual_port': 10080, 'audio_port': 10160, 'cache_port': 10233, 'metric_port': 10089, 'pd_node_infer_rpyc_ports': [10247], 'pd_node_id': 163479035537597727162519172725806046247, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} +INFO 06-24 19:55:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:55:13 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:55:13 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 19:55:41 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:55:41 [manager.py:283] +DEBUG 06-24 19:55:41 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:55:41 [manager.py:284] +INFO 06-24 19:55:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:55:43 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:55:43 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:56:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:56:13 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:56:13 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 19:56:42 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:56:42 [manager.py:283] +DEBUG 06-24 19:56:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:56:42 [manager.py:284] +INFO 06-24 19:56:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:56:43 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:56:43 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:57:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:57:13 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:57:13 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 19:57:43 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:57:43 [manager.py:283] +DEBUG 06-24 19:57:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:57:43 [manager.py:284] +INFO 06-24 19:57:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:57:43 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:57:43 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:58:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:58:13 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:58:13 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:58:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:58:43 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:58:43 [statics_utils.py:24] mean per token cost: 0.0 ms +DEBUG 06-24 19:58:43 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:58:43 [manager.py:283] +DEBUG 06-24 19:58:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 19:58:43 [manager.py:284] +INFO 06-24 19:59:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:59:13 [statics_utils.py:24] mean first cost: 0.0 ms +INFO 06-24 19:59:13 [statics_utils.py:24] mean per token cost: 0.0 ms +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 8 cost time 0.0771784782409668 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:16 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:24 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 16 cost time 0.01605367660522461 s +DEBUG 06-24 19:59:40 [manager.py:391] Prefill Batch: batch_id=5056365483121303145815933528891045344, time:1750766380.2746513s req_ids:[8] +DEBUG 06-24 19:59:40 [manager.py:391] +DEBUG 06-24 19:59:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 3.153 tokens/s +DEBUG 06-24 19:59:40 [stats.py:37] Avg prompt tokens throughput: 3.153 tokens/s +DEBUG 06-24 19:59:40 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 8 cost time 0.11862468719482422 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 16 cost time 0.023453235626220703 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:32 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:40 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:48 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:56 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:64 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:72 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:80 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:88 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:96 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:104 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:112 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:136 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:144 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:152 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:160 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:168 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:184 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:192 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:200 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:208 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:216 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:224 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:232 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:248 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:272 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:288 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:296 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:312 +INFO 06-24 19:59:40 [manager.py:162] detoken release req id 8 +DEBUG 06-24 19:59:40 [manager.py:391] Prefill Batch: batch_id=287395042470880448688684009838947316744, time:1750766380.5638816s req_ids:[16] +DEBUG 06-24 19:59:40 [manager.py:391] +INFO 06-24 19:59:40 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:328 +INFO 06-24 19:59:40 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:336 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:344 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:352 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:360 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:368 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:376 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:384 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:392 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:408 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:416 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:424 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:432 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:440 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:448 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:464 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:472 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:480 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:488 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:496 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:504 +INFO 06-24 19:59:40 [batch.py:51] router release req id 8 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 24 cost time 0.4605579376220703 s +INFO 06-24 19:59:40 [manager.py:162] detoken release req id 16 +INFO 06-24 19:59:40 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:512 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 24 cost time 0.46323204040527344 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:520 +INFO 06-24 19:59:40 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 19:59:40 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:40 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:40 [infer_batch.py:156] radix refed token num 2108 +DEBUG 06-24 19:59:40 [infer_batch.py:156] radix hold token num 2108 +DEBUG 06-24 19:59:40 [infer_batch.py:156] mem manager can alloc token num 14284 +DEBUG 06-24 19:59:40 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:40 [batch.py:51] router release req id 16 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 32 cost time 0.4595165252685547 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 40 cost time 0.4420509338378906 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 48 cost time 0.43045759201049805 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 32 cost time 0.46100616455078125 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 56 cost time 0.4211440086364746 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 64 cost time 0.4114036560058594 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 72 cost time 0.40201258659362793 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 80 cost time 0.39416027069091797 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 40 cost time 0.4444599151611328 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 88 cost time 0.3561263084411621 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 96 cost time 0.34813475608825684 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 104 cost time 0.3421444892883301 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 112 cost time 0.3359825611114502 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 120 cost time 0.32976698875427246 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 48 cost time 0.433704137802124 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 128 cost time 0.3224823474884033 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 136 cost time 0.3159162998199463 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 144 cost time 0.30929017066955566 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:536 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 152 cost time 0.30318641662597656 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 56 cost time 0.425382137298584 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 160 cost time 0.297224760055542 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 168 cost time 0.29160284996032715 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 176 cost time 0.2858412265777588 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 184 cost time 0.28053855895996094 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 64 cost time 0.416536808013916 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 192 cost time 0.2750709056854248 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 200 cost time 0.2698476314544678 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 208 cost time 0.2634468078613281 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 216 cost time 0.2484593391418457 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 72 cost time 0.4081766605377197 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 224 cost time 0.24333763122558594 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 232 cost time 0.23853564262390137 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 240 cost time 0.23358726501464844 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 248 cost time 0.22908997535705566 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 80 cost time 0.4013192653656006 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 256 cost time 0.22463440895080566 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 264 cost time 0.22019624710083008 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:544 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 272 cost time 0.2154369354248047 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 280 cost time 0.20968985557556152 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 88 cost time 0.3643667697906494 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 288 cost time 0.20473432540893555 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 296 cost time 0.2002854347229004 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 304 cost time 0.19563555717468262 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 96 cost time 0.35745882987976074 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 312 cost time 0.19149160385131836 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 320 cost time 0.18721485137939453 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 328 cost time 0.18253755569458008 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 104 cost time 0.3525278568267822 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 336 cost time 0.17823219299316406 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 344 cost time 0.1357724666595459 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 352 cost time 0.13075518608093262 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 360 cost time 0.12569165229797363 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 112 cost time 0.34745216369628906 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 368 cost time 0.12105774879455566 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:552 +INFO 06-24 19:59:40 [manager.py:224] router recive req id 376 cost time 0.1166226863861084 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 384 cost time 0.11048221588134766 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 392 cost time 0.10562920570373535 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 120 cost time 0.3422739505767822 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 400 cost time 0.10108423233032227 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 408 cost time 0.0954592227935791 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 416 cost time 0.09096550941467285 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 424 cost time 0.08623027801513672 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 432 cost time 0.08154058456420898 s +INFO 06-24 19:59:40 [manager.py:224] router recive req id 440 cost time 0.07657957077026367 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:560 +DEBUG 06-24 19:59:40 [manager.py:391] Prefill Batch: batch_id=267698894269622210945840868524439186934, time:1750766380.767022s req_ids:[24, 32, 40, 48, 56, 64, 72] +DEBUG 06-24 19:59:40 [manager.py:391] +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:568 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:576 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:584 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:592 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:600 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:608 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 128 cost time 0.3727076053619385 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 136 cost time 0.3678281307220459 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:616 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 144 cost time 0.3624389171600342 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 152 cost time 0.3575727939605713 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 160 cost time 0.35327982902526855 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:624 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 168 cost time 0.34880661964416504 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 176 cost time 0.34411168098449707 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 184 cost time 0.33986663818359375 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:632 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 192 cost time 0.33549952507019043 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 200 cost time 0.33135485649108887 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 208 cost time 0.3260159492492676 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 216 cost time 0.3121187686920166 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:640 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 224 cost time 0.3080432415008545 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 232 cost time 0.3041059970855713 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 240 cost time 0.3001420497894287 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:648 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 248 cost time 0.2966794967651367 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 256 cost time 0.2932896614074707 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 264 cost time 0.289884090423584 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 272 cost time 0.2861654758453369 s +INFO 06-24 19:59:40 [rpyc_fix_utils.py:36] change socket buffer from 2626560 131072 change to 4194304 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:656 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 280 cost time 0.28146958351135254 s +INFO 06-24 19:59:40 [prefill_trans_process.py:61] connect start PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=-1, prefill_id=163479035537597727162519172725806046247, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') +INFO 06-24 19:59:40 [prefill_trans_process.py:64] connect src_id 163479035537597727162519172725806046247 dest_id f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:40 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 288 cost time 0.27756190299987793 s +INFO 06-24 19:59:40 [pynccl.py:180] LightLLM is using nccl==2.21.5 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 296 cost time 0.2741262912750244 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 304 cost time 0.27051210403442383 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 312 cost time 0.2665543556213379 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 320 cost time 0.26325225830078125 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 328 cost time 0.2595937252044678 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 336 cost time 0.2561604976654053 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:672 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 344 cost time 0.21455073356628418 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 352 cost time 0.21040844917297363 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 360 cost time 0.20623016357421875 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 368 cost time 0.20246386528015137 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:680 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 376 cost time 0.19890666007995605 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:688 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:696 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:704 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:712 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:720 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:728 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 384 cost time 0.22502851486206055 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 392 cost time 0.22162365913391113 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 400 cost time 0.2180180549621582 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:736 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 408 cost time 0.21332144737243652 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 416 cost time 0.20965194702148438 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 424 cost time 0.2062544822692871 s +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 432 cost time 0.20240402221679688 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:744 +INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 440 cost time 0.19823169708251953 s +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:760 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:768 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:776 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:784 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:792 +INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:800 +INFO 06-24 19:59:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 first_token_cost:764.474630355835ms total_cost_time:764.5032405853271ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:16 first_token_cost:668.4184074401855ms total_cost_time:668.4293746948242ms,out_token_counter:1 mean_per_token_cost_time: 0.010967254638671875ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [prefill_trans_process.py:81] PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=-1, prefill_id=163479035537597727162519172725806046247, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') kv trans connected! +INFO 06-24 19:59:41 [prefill_trans_obj.py:104] create KVTransConnectObj success: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 +INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 8 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 1.196134328842163 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 16 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 1.0328316688537598 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 8 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009828329086303711 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 16 not send, decode is busy +INFO 06-24 19:59:41 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 1.206552505493164 s +INFO 06-24 19:59:41 [prefill_trans_process.py:34] trans start: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:41 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 16 +INFO 06-24 19:59:41 [manager.py:224] router recive req id 448 cost time 1.2438838481903076 s +INFO 06-24 19:59:41 [manager.py:162] detoken release req id 24 +INFO 06-24 19:59:41 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:41 [manager.py:162] detoken release req id 32 +INFO 06-24 19:59:41 [manager.py:162] detoken release req id 40 +INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:24 first_token_cost:1665.039300918579ms total_cost_time:1665.0831699371338ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:32 first_token_cost:1653.5532474517822ms total_cost_time:1653.5794734954834ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [manager.py:162] detoken release req id 48 +INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:40 first_token_cost:1636.0180377960205ms total_cost_time:1636.0392570495605ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [manager.py:162] detoken release req id 56 +INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:48 first_token_cost:1624.2146492004395ms total_cost_time:1624.2358684539795ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [manager.py:162] detoken release req id 64 +INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:56 first_token_cost:1615.1435375213623ms total_cost_time:1615.1671409606934ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:64 first_token_cost:1605.3791046142578ms total_cost_time:1605.4003238677979ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [manager.py:162] detoken release req id 72 +INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:72 first_token_cost:1596.0474014282227ms total_cost_time:1596.0681438446045ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:41 [manager.py:68] detokenization recv req id 448 cost time 1.2521979808807373 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 24 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007410764694213867 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 24 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009976387023925781 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 24 not send, decode is busy +INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 32 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006712436676025391 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 32 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007524251937866211 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 32 not send, decode is busy +INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 40 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0020258426666259766 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 40 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007321357727050781 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 40 not send, decode is busy +INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 48 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009424448013305664 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 48 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008481264114379883 s +INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 48 not send, decode is busy +INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 56 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008547306060791016 s +INFO 06-24 19:59:42 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 56 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008028745651245117 s +INFO 06-24 19:59:42 [prefill_trans_obj.py:166] prefill node kv move task req_id: 56 not send, decode is busy +INFO 06-24 19:59:42 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 64 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007581472396850586 s +INFO 06-24 19:59:42 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 24 +DEBUG 06-24 19:59:42 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:42 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:42 [infer_batch.py:156] radix refed token num 7398 +DEBUG 06-24 19:59:42 [infer_batch.py:156] radix hold token num 9510 +DEBUG 06-24 19:59:42 [infer_batch.py:156] mem manager can alloc token num 6882 +DEBUG 06-24 19:59:42 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 32 +INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 40 +INFO 06-24 19:59:42 [batch.py:51] router release req id 24 +INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 48 +INFO 06-24 19:59:42 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 64 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.00882101058959961 s +INFO 06-24 19:59:42 [prefill_trans_obj.py:166] prefill node kv move task req_id: 64 not send, decode is busy +INFO 06-24 19:59:42 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007585048675537109 s +INFO 06-24 19:59:42 [batch.py:51] router release req id 32 +INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 56 +INFO 06-24 19:59:42 [batch.py:51] router release req id 40 +INFO 06-24 19:59:42 [batch.py:51] router release req id 48 +INFO 06-24 19:59:42 [batch.py:51] router release req id 56 +INFO 06-24 19:59:42 [batch.py:51] router release req id 64 +INFO 06-24 19:59:42 [batch.py:51] router release req id 72 +INFO 06-24 19:59:42 [manager.py:224] router recive req id 456 cost time 1.3277161121368408 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 464 cost time 1.3242998123168945 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 472 cost time 1.319894552230835 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 456 cost time 1.3300683498382568 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 480 cost time 1.3157711029052734 s +INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 64 +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 464 cost time 1.326838493347168 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 488 cost time 1.3120489120483398 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 472 cost time 1.322997808456421 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 496 cost time 1.3070104122161865 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 504 cost time 1.3010101318359375 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 512 cost time 1.296341896057129 s +INFO 06-24 19:59:42 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009949684143066406 s +INFO 06-24 19:59:42 [prefill_trans_obj.py:166] prefill node kv move task req_id: 72 not send, decode is busy +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 480 cost time 1.3195230960845947 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 520 cost time 1.291534662246704 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 528 cost time 1.2869133949279785 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 536 cost time 1.2822480201721191 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 488 cost time 1.3156139850616455 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 544 cost time 1.2775328159332275 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 552 cost time 1.2729871273040771 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 496 cost time 1.3109245300292969 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 560 cost time 1.2685813903808594 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 568 cost time 1.2642817497253418 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 576 cost time 1.2597098350524902 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 504 cost time 1.305795431137085 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 584 cost time 1.2549967765808105 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 592 cost time 1.2503442764282227 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 600 cost time 1.2441017627716064 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 512 cost time 1.302056074142456 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 608 cost time 1.2394473552703857 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 616 cost time 1.235461950302124 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 520 cost time 1.2981574535369873 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 624 cost time 1.2309205532073975 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 632 cost time 1.2266817092895508 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 640 cost time 1.2221033573150635 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 528 cost time 1.294480323791504 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 648 cost time 1.2175545692443848 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 656 cost time 1.21309232711792 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 664 cost time 1.2083725929260254 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 536 cost time 1.2907042503356934 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 672 cost time 1.2038202285766602 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 680 cost time 1.1997058391571045 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 544 cost time 1.286849021911621 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 688 cost time 1.1952989101409912 s +INFO 06-24 19:59:42 [manager.py:224] router recive req id 696 cost time 1.1898579597473145 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 552 cost time 1.2832188606262207 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 560 cost time 1.2795476913452148 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 568 cost time 1.276412010192871 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 576 cost time 1.2727677822113037 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 584 cost time 1.268918514251709 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 592 cost time 1.2651641368865967 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 600 cost time 1.2598137855529785 s +DEBUG 06-24 19:59:42 [manager.py:391] Prefill Batch: batch_id=148194245146493766126448738228854701509, time:1750766382.0499694s req_ids:[80, 88, 96, 104, 112, 120, 128, 136] +DEBUG 06-24 19:59:42 [manager.py:391] +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 608 cost time 1.2560806274414062 s +INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 72 +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 616 cost time 1.2526047229766846 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 624 cost time 1.2489118576049805 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 632 cost time 1.2455463409423828 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 640 cost time 1.241865873336792 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 648 cost time 1.2382326126098633 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 656 cost time 1.2346761226654053 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 664 cost time 1.2308566570281982 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 672 cost time 1.2271971702575684 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 680 cost time 1.2235558032989502 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 688 cost time 1.2199194431304932 s +INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 696 cost time 1.2149651050567627 s +INFO 06-24 19:59:42 [prefill_trans_process.py:42] trans finished: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1055 +INFO 06-24 19:59:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 19:59:43 [statics_utils.py:24] mean first cost: 1425.3653685251873 ms +INFO 06-24 19:59:43 [statics_utils.py:24] mean per token cost: 0.02418624030219184 ms +INFO 06-24 19:59:43 [prefill_trans_process.py:44] trans cost time: 1.5516793727874756,move_total_kv_len: 1055, id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 8 cost total time: 2.7596399784088135 s +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 8 +INFO 06-24 19:59:43 [manager.py:224] router recive req id 704 cost time 2.6499106884002686 s +INFO 06-24 19:59:43 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 80 +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 88 +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 96 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:80 first_token_cost:3159.6274375915527ms total_cost_time:3159.6851348876953ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:88 first_token_cost:3121.9112873077393ms total_cost_time:3121.9427585601807ms,out_token_counter:1 mean_per_token_cost_time: 0.03147125244140625ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 104 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:96 first_token_cost:3114.2942905426025ms total_cost_time:3114.32147026062ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:104 first_token_cost:3108.484983444214ms total_cost_time:3108.5076332092285ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:112 first_token_cost:3102.428913116455ms total_cost_time:3102.4506092071533ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 112 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 first_token_cost:3096.4436531066895ms total_cost_time:3096.466302871704ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 first_token_cost:3089.165449142456ms total_cost_time:3089.1857147216797ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:136 first_token_cost:3082.70263671875ms total_cost_time:3082.723379135132ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 120 +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 128 +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 80 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0018470287322998047 s +INFO 06-24 19:59:43 [manager.py:162] detoken release req id 136 +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 704 cost time 2.6649019718170166 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 80 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.018401384353637695 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 80 not send, decode is busy +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 88 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.014518260955810547 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 96 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008265972137451172 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 104 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0021898746490478516 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 88 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010574102401733398 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 88 not send, decode is busy +INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 96 not send, decode is busy +INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 104 not send, decode is busy +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0076749324798583984 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 120 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0008678436279296875 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009312868118286133 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 112 not send, decode is busy +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004452228546142578 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.012329578399658203 s +INFO 06-24 19:59:43 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 80 +INFO 06-24 19:59:43 [prefill_trans_process.py:34] trans start: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +DEBUG 06-24 19:59:43 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:43 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:43 [infer_batch.py:156] radix refed token num 7389 +DEBUG 06-24 19:59:43 [infer_batch.py:156] radix hold token num 15845 +DEBUG 06-24 19:59:43 [infer_batch.py:156] mem manager can alloc token num 547 +DEBUG 06-24 19:59:43 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:43 [batch.py:51] router release req id 80 +INFO 06-24 19:59:43 [batch.py:51] router release req id 88 +INFO 06-24 19:59:43 [batch.py:51] router release req id 96 +INFO 06-24 19:59:43 [batch.py:51] router release req id 104 +INFO 06-24 19:59:43 [batch.py:51] router release req id 112 +INFO 06-24 19:59:43 [batch.py:51] router release req id 120 +INFO 06-24 19:59:43 [batch.py:51] router release req id 128 +INFO 06-24 19:59:43 [batch.py:51] router release req id 136 +INFO 06-24 19:59:43 [manager.py:224] router recive req id 712 cost time 2.7033867835998535 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 720 cost time 2.6990702152252197 s +INFO 06-24 19:59:43 [prefill_trans_process.py:42] trans finished: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 +INFO 06-24 19:59:43 [manager.py:224] router recive req id 728 cost time 2.6946513652801514 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 736 cost time 2.6902432441711426 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 712 cost time 2.70529842376709 s +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 88 +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 96 +INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010231494903564453 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 744 cost time 2.6858882904052734 s +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 104 +INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 136 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009371519088745117 s +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 112 +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 720 cost time 2.701856851577759 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 752 cost time 2.68165922164917 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 760 cost time 2.67747163772583 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 768 cost time 2.6727747917175293 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 776 cost time 2.667965888977051 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 784 cost time 2.662921667098999 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 792 cost time 2.6576287746429443 s +INFO 06-24 19:59:43 [manager.py:224] router recive req id 800 cost time 2.652892827987671 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 728 cost time 2.6993229389190674 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 736 cost time 2.696143627166748 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 744 cost time 2.6924619674682617 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 752 cost time 2.6888301372528076 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 760 cost time 2.685490608215332 s +INFO 06-24 19:59:43 [prefill_trans_process.py:44] trans cost time: 0.017946243286132812,move_total_kv_len: 1058, id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 120 cost total time: 0.03179788589477539 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.025861501693725586 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 768 cost time 2.681823968887329 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 136 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011013269424438477 s +INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 136 not send, decode is busy +INFO 06-24 19:59:43 [prefill_trans_process.py:34] trans start: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 776 cost time 2.678018808364868 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 784 cost time 2.6740620136260986 s +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 120 +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 136 +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 792 cost time 2.6707096099853516 s +INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 800 cost time 2.6670334339141846 s +INFO 06-24 19:59:43 [prefill_trans_process.py:42] trans finished: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 +DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=294565578027902777965125904067623474870, time:1750766383.592034s req_ids:[144, 152, 160, 168, 176, 184, 192, 200] +DEBUG 06-24 19:59:43 [manager.py:391] +INFO 06-24 19:59:43 [prefill_trans_process.py:44] trans cost time: 0.020441293716430664,move_total_kv_len: 1049, id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:43 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 128 cost total time: 0.04755067825317383 s +INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 128 +INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:808 +WARNING 06-24 19:59:43 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_16 and create again +INFO 06-24 19:59:43 [shm_array.py:30] create shm 2732_0_shm_logprobs_16 +WARNING 06-24 19:59:43 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_16 and create again +INFO 06-24 19:59:43 [shm_array.py:30] create shm 2732_0_shm_prompts_16 +INFO 06-24 19:59:44 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 144 +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 152 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:144 first_token_cost:4231.854200363159ms total_cost_time:4231.899261474609ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:152 first_token_cost:4225.930213928223ms total_cost_time:4225.956916809082ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:160 first_token_cost:4220.216751098633ms total_cost_time:4220.238924026489ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 160 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:168 first_token_cost:4214.667320251465ms total_cost_time:4214.688777923584ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 first_token_cost:4208.98175239563ms total_cost_time:4209.003448486328ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:184 first_token_cost:4203.855276107788ms total_cost_time:4203.876495361328ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 168 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:192 first_token_cost:4198.5790729522705ms total_cost_time:4198.600053787231ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:200 first_token_cost:4193.4356689453125ms total_cost_time:4193.456411361694ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 176 +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 184 +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 192 +INFO 06-24 19:59:44 [manager.py:162] detoken release req id 200 +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0053310394287109375 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.01036977767944336 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 144 not send, decode is busy +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 152 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010135412216186523 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 160 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0039844512939453125 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 152 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010167598724365234 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 152 not send, decode is busy +INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 160 not send, decode is busy +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 168 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009825944900512695 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 176 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004091024398803711 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 168 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010155677795410156 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 168 not send, decode is busy +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 184 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008224964141845703 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 192 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0022199153900146484 s +INFO 06-24 19:59:44 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 144 +DEBUG 06-24 19:59:44 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:44 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:44 [infer_batch.py:156] radix refed token num 7373 +DEBUG 06-24 19:59:44 [infer_batch.py:156] radix hold token num 15810 +DEBUG 06-24 19:59:44 [infer_batch.py:156] mem manager can alloc token num 582 +DEBUG 06-24 19:59:44 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:44 [batch.py:51] router release req id 144 +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 152 +INFO 06-24 19:59:44 [batch.py:51] router release req id 152 +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 160 +INFO 06-24 19:59:44 [batch.py:51] router release req id 160 +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 168 +INFO 06-24 19:59:44 [batch.py:51] router release req id 168 +INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 184 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009021282196044922 s +INFO 06-24 19:59:44 [batch.py:51] router release req id 176 +INFO 06-24 19:59:44 [batch.py:51] router release req id 184 +INFO 06-24 19:59:44 [batch.py:51] router release req id 192 +INFO 06-24 19:59:44 [batch.py:51] router release req id 200 +INFO 06-24 19:59:44 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.024348974227905273 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 184 not send, decode is busy +INFO 06-24 19:59:44 [manager.py:224] router recive req id 808 cost time 0.850881814956665 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 192 not send, decode is busy +INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 200 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007191896438598633 s +INFO 06-24 19:59:44 [prefill_trans_process.py:34] trans start: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:44 [manager.py:68] detokenization recv req id 808 cost time 0.8527519702911377 s +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 184 +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 192 +INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 200 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008372306823730469 s +INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 200 not send, decode is busy +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 200 +INFO 06-24 19:59:44 [prefill_trans_process.py:42] trans finished: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 +DEBUG 06-24 19:59:44 [manager.py:391] Prefill Batch: batch_id=182561919414357009603534939896602708878, time:1750766384.7506483s req_ids:[208, 216, 224, 232, 240, 248, 256, 264] +DEBUG 06-24 19:59:44 [manager.py:391] +INFO 06-24 19:59:44 [prefill_trans_process.py:44] trans cost time: 0.0439300537109375,move_total_kv_len: 1056, id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:44 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 176 cost total time: 0.07047057151794434 s +INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 176 +INFO 06-24 19:59:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:816 +WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_24 and create again +INFO 06-24 19:59:45 [shm_array.py:30] create shm 2732_0_shm_logprobs_24 +WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_24 and create again +INFO 06-24 19:59:45 [shm_array.py:30] create shm 2732_0_shm_prompts_24 +INFO 06-24 19:59:45 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 208 +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 216 +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 224 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:208 first_token_cost:5349.753856658936ms total_cost_time:5349.806070327759ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 232 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:216 first_token_cost:5335.107803344727ms total_cost_time:5335.139274597168ms,out_token_counter:1 mean_per_token_cost_time: 0.03147125244140625ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 240 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:224 first_token_cost:5330.266237258911ms total_cost_time:5330.289125442505ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:232 first_token_cost:5325.3490924835205ms total_cost_time:5325.370073318481ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 248 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 first_token_cost:5320.688724517822ms total_cost_time:5320.711135864258ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 256 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:248 first_token_cost:5316.360712051392ms total_cost_time:5316.3816928863525ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 first_token_cost:5311.980485916138ms total_cost_time:5312.000751495361ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [manager.py:162] detoken release req id 264 +INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 first_token_cost:5307.828664779663ms total_cost_time:5307.848930358887ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008646726608276367 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 216 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002033233642578125 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011253595352172852 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 208 not send, decode is busy +INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 216 not send, decode is busy +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 224 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007774829864501953 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 232 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0018138885498046875 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 224 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008698463439941406 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 224 not send, decode is busy +INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 232 not send, decode is busy +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 240 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006409168243408203 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 248 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0017321109771728516 s +INFO 06-24 19:59:45 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 208 +INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 216 +INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 224 +INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 232 +INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 240 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010566473007202148 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 248 not send, decode is busy +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 256 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007519245147705078 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 264 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00293731689453125 s +DEBUG 06-24 19:59:45 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:45 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:45 [infer_batch.py:156] radix refed token num 4230 +DEBUG 06-24 19:59:45 [infer_batch.py:156] radix hold token num 15820 +DEBUG 06-24 19:59:45 [infer_batch.py:156] mem manager can alloc token num 572 +DEBUG 06-24 19:59:45 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:45 [batch.py:51] router release req id 208 +INFO 06-24 19:59:45 [batch.py:51] router release req id 216 +INFO 06-24 19:59:45 [batch.py:51] router release req id 224 +INFO 06-24 19:59:45 [batch.py:51] router release req id 232 +INFO 06-24 19:59:45 [batch.py:51] router release req id 240 +INFO 06-24 19:59:45 [batch.py:51] router release req id 248 +INFO 06-24 19:59:45 [batch.py:51] router release req id 256 +INFO 06-24 19:59:45 [batch.py:51] router release req id 264 +INFO 06-24 19:59:45 [manager.py:224] router recive req id 816 cost time 0.8788070678710938 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.021255970001220703 s +INFO 06-24 19:59:45 [manager.py:68] detokenization recv req id 816 cost time 0.8805432319641113 s +INFO 06-24 19:59:45 [prefill_trans_process.py:34] trans start: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 248 +INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 256 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011115550994873047 s +INFO 06-24 19:59:45 [prefill_trans_process.py:42] trans finished: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1063 +DEBUG 06-24 19:59:45 [manager.py:391] Prefill Batch: batch_id=190779151374460836427467471628907477635, time:1750766385.9125724s req_ids:[272, 280, 288, 296, 304, 312, 320] +DEBUG 06-24 19:59:45 [manager.py:391] +INFO 06-24 19:59:45 [prefill_trans_process.py:44] trans cost time: 0.0259249210357666,move_total_kv_len: 1063, id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 240 cost total time: 0.04833054542541504 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.03835797309875488 s +INFO 06-24 19:59:45 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 264 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.033788442611694336 s +INFO 06-24 19:59:45 [prefill_trans_process.py:34] trans start: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 240 +INFO 06-24 19:59:45 [prefill_trans_process.py:42] trans finished: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 2108 +INFO 06-24 19:59:46 [prefill_trans_process.py:44] trans cost time: 0.14321660995483398,move_total_kv_len: 2108, id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 256 cost total time: 0.18323612213134766 s +INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 256 +INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 264 +INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:824 +WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_32 and create again +INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_logprobs_32 +WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_32 and create again +INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_prompts_32 +INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 +WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_31 and create again +INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_logprobs_31 +WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_31 and create again +INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_prompts_31 +INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:840 +INFO 06-24 19:59:46 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 272 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 280 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:272 first_token_cost:6383.890151977539ms total_cost_time:6383.934736251831ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 288 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 first_token_cost:6378.338098526001ms total_cost_time:6378.364562988281ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 296 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:288 first_token_cost:6373.648405075073ms total_cost_time:6373.671770095825ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:296 first_token_cost:6369.301319122314ms total_cost_time:6369.323492050171ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 304 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 first_token_cost:6365.0031089782715ms total_cost_time:6365.0267124176025ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:312 first_token_cost:6360.0428104400635ms total_cost_time:6360.0640296936035ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 312 +INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 first_token_cost:6355.86953163147ms total_cost_time:6355.890274047852ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:46 [manager.py:162] detoken release req id 320 +INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 272 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0007350444793701172 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 272 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010283470153808594 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 272 not send, decode is busy +INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 280 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0067899227142333984 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 288 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002086639404296875 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 280 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010869979858398438 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 288 not send, decode is busy +INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 296 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00860905647277832 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 304 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003374814987182617 s +INFO 06-24 19:59:46 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 272 +DEBUG 06-24 19:59:46 [req_manager.py:78] freed all request size 136 +INFO 06-24 19:59:46 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.027638673782348633 s +DEBUG 06-24 19:59:46 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:46 [infer_batch.py:156] radix refed token num 6334 +DEBUG 06-24 19:59:46 [infer_batch.py:156] radix hold token num 15842 +DEBUG 06-24 19:59:46 [infer_batch.py:156] mem manager can alloc token num 550 +DEBUG 06-24 19:59:46 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 288 +INFO 06-24 19:59:46 [batch.py:51] router release req id 272 +INFO 06-24 19:59:46 [prefill_trans_process.py:34] trans start: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [batch.py:51] router release req id 280 +INFO 06-24 19:59:46 [batch.py:51] router release req id 288 +INFO 06-24 19:59:46 [batch.py:51] router release req id 296 +INFO 06-24 19:59:46 [batch.py:51] router release req id 304 +INFO 06-24 19:59:46 [batch.py:51] router release req id 312 +INFO 06-24 19:59:46 [batch.py:51] router release req id 320 +INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 296 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010961771011352539 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 296 not send, decode is busy +INFO 06-24 19:59:46 [manager.py:224] router recive req id 824 cost time 0.7054061889648438 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010041952133178711 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 320 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0052471160888671875 s +INFO 06-24 19:59:46 [manager.py:224] router recive req id 832 cost time 0.6817638874053955 s +INFO 06-24 19:59:46 [manager.py:224] router recive req id 840 cost time 0.655919075012207 s +INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 824 cost time 0.7071444988250732 s +INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 832 cost time 0.6850030422210693 s +INFO 06-24 19:59:46 [prefill_trans_process.py:42] trans finished: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 +INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 840 cost time 0.6609628200531006 s +INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 296 +INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011251211166381836 s +INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 312 not send, decode is busy +DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=70930678938869571698225208165763613302, time:1750766386.987919s req_ids:[328, 336, 344, 352, 360, 368, 376, 384] +DEBUG 06-24 19:59:46 [manager.py:391] +INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 312 +INFO 06-24 19:59:46 [prefill_trans_process.py:44] trans cost time: 0.035775184631347656,move_total_kv_len: 1058, id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:46 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 280 cost total time: 0.0642862319946289 s +INFO 06-24 19:59:46 [task_queue.py:39] queue ready_kv_trans_task_queue left size: 1 +INFO 06-24 19:59:46 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.049445390701293945 s +INFO 06-24 19:59:46 [prefill_trans_process.py:34] trans start: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 280 +INFO 06-24 19:59:47 [prefill_trans_process.py:42] trans finished: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1051 +INFO 06-24 19:59:47 [prefill_trans_process.py:44] trans cost time: 0.07567572593688965,move_total_kv_len: 1051, id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 304 cost total time: 0.12643718719482422 s +INFO 06-24 19:59:47 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.11698675155639648 s +INFO 06-24 19:59:47 [prefill_trans_process.py:34] trans start: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [prefill_trans_process.py:42] trans finished: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1057 +INFO 06-24 19:59:47 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 304 +INFO 06-24 19:59:47 [prefill_trans_process.py:44] trans cost time: 0.04365849494934082,move_total_kv_len: 1057, id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:47 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 320 cost total time: 0.1618814468383789 s +INFO 06-24 19:59:47 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 320 +INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:848 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_39 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_logprobs_39 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_39 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_prompts_39 +INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:856 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_38 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_logprobs_38 +WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_38 and create again +INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_prompts_38 +INFO 06-24 19:59:48 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 328 +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 336 +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 344 +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 352 +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 360 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:328 first_token_cost:7552.9944896698ms total_cost_time:7553.060293197632ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:336 first_token_cost:7548.905849456787ms total_cost_time:7548.933506011963ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 368 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:344 first_token_cost:7506.665468215942ms total_cost_time:7506.687164306641ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 376 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:352 first_token_cost:7502.08854675293ms total_cost_time:7502.110242843628ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [manager.py:162] detoken release req id 384 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:360 first_token_cost:7497.266054153442ms total_cost_time:7497.288942337036ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:368 first_token_cost:7492.713928222656ms total_cost_time:7492.736101150513ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:376 first_token_cost:7488.4033203125ms total_cost_time:7488.430023193359ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:384 first_token_cost:7482.416868209839ms total_cost_time:7482.438087463379ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002575397491455078 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009096384048461914 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 328 not send, decode is busy +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 336 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007551670074462891 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 344 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0028002262115478516 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 336 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009668111801147461 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 336 not send, decode is busy +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 344 not send, decode is busy +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008342266082763672 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 360 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0036711692810058594 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008313894271850586 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 352 not send, decode is busy +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 360 not send, decode is busy +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 368 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007716655731201172 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 376 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0028553009033203125 s +INFO 06-24 19:59:48 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 328 +DEBUG 06-24 19:59:48 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:48 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:48 [infer_batch.py:156] radix refed token num 7387 +DEBUG 06-24 19:59:48 [infer_batch.py:156] radix hold token num 15841 +DEBUG 06-24 19:59:48 [infer_batch.py:156] mem manager can alloc token num 551 +DEBUG 06-24 19:59:48 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:48 [batch.py:51] router release req id 328 +INFO 06-24 19:59:48 [batch.py:51] router release req id 336 +INFO 06-24 19:59:48 [batch.py:51] router release req id 344 +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 336 +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 344 +INFO 06-24 19:59:48 [batch.py:51] router release req id 352 +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 352 +INFO 06-24 19:59:48 [batch.py:51] router release req id 360 +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 360 +INFO 06-24 19:59:48 [batch.py:51] router release req id 368 +INFO 06-24 19:59:48 [batch.py:51] router release req id 376 +INFO 06-24 19:59:48 [batch.py:51] router release req id 384 +INFO 06-24 19:59:48 [manager.py:224] router recive req id 848 cost time 0.8817059993743896 s +INFO 06-24 19:59:48 [manager.py:224] router recive req id 856 cost time 0.8530170917510986 s +INFO 06-24 19:59:48 [manager.py:68] detokenization recv req id 848 cost time 0.8824746608734131 s +INFO 06-24 19:59:48 [manager.py:68] detokenization recv req id 856 cost time 0.8538038730621338 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 368 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011055707931518555 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 368 not send, decode is busy +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 376 not send, decode is busy +INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 384 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009758234024047852 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 384 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.0060765743255615234 s +INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 384 not send, decode is busy +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 368 +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 376 +INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 384 +DEBUG 06-24 19:59:48 [manager.py:391] Prefill Batch: batch_id=189601775423375408021768785101695884949, time:1750766388.1974154s req_ids:[392, 400, 408, 416, 424, 432, 440, 448] +DEBUG 06-24 19:59:48 [manager.py:391] +INFO 06-24 19:59:49 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 392 +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 400 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:392 first_token_cost:8616.84775352478ms total_cost_time:8616.903305053711ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 408 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 first_token_cost:8612.566709518433ms total_cost_time:8612.594604492188ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:408 first_token_cost:8607.21755027771ms total_cost_time:8607.239723205566ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 416 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:416 first_token_cost:8602.810382843018ms total_cost_time:8602.831602096558ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 424 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:424 first_token_cost:8598.429679870605ms total_cost_time:8598.450899124146ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 432 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:432 first_token_cost:8593.9359664917ms total_cost_time:8593.95718574524ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:440 first_token_cost:8589.045763015747ms total_cost_time:8589.066743850708ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 440 +INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:448 first_token_cost:8584.46478843689ms total_cost_time:8584.48576927185ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:49 [manager.py:162] detoken release req id 448 +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.005452871322631836 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 400 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0007920265197753906 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011265277862548828 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 392 not send, decode is busy +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 408 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007835626602172852 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 416 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00324249267578125 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.018041372299194336 s +INFO 06-24 19:59:49 [prefill_trans_process.py:34] trans start: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 408 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009248495101928711 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 408 not send, decode is busy +INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 416 not send, decode is busy +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 424 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008563041687011719 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 432 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003607034683227539 s +INFO 06-24 19:59:49 [prefill_trans_process.py:42] trans finished: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 +INFO 06-24 19:59:49 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 392 +DEBUG 06-24 19:59:49 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:49 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:49 [infer_batch.py:156] radix refed token num 7386 +DEBUG 06-24 19:59:49 [infer_batch.py:156] radix hold token num 15826 +DEBUG 06-24 19:59:49 [infer_batch.py:156] mem manager can alloc token num 566 +DEBUG 06-24 19:59:49 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:49 [batch.py:51] router release req id 392 +INFO 06-24 19:59:49 [batch.py:51] router release req id 400 +INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 424 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009217023849487305 s +INFO 06-24 19:59:49 [batch.py:51] router release req id 408 +INFO 06-24 19:59:49 [batch.py:51] router release req id 416 +INFO 06-24 19:59:49 [batch.py:51] router release req id 424 +INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 424 not send, decode is busy +INFO 06-24 19:59:49 [batch.py:51] router release req id 432 +INFO 06-24 19:59:49 [batch.py:51] router release req id 440 +INFO 06-24 19:59:49 [batch.py:51] router release req id 448 +INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 432 not send, decode is busy +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 440 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009834527969360352 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 448 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.005139827728271484 s +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 408 +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 416 +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 424 +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 432 +INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 440 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008382081985473633 s +INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 440 not send, decode is busy +INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 448 not send, decode is busy +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 440 +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 448 +INFO 06-24 19:59:49 [prefill_trans_process.py:44] trans cost time: 0.03542947769165039,move_total_kv_len: 1049, id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:49 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 400 cost total time: 0.054984331130981445 s +DEBUG 06-24 19:59:49 [manager.py:391] Prefill Batch: batch_id=326357583043305760939398804875066805400, time:1750766389.3391924s req_ids:[456, 464, 472, 480, 488, 496, 504] +DEBUG 06-24 19:59:49 [manager.py:391] +INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 400 +INFO 06-24 19:59:50 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 456 +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 464 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 first_token_cost:9583.629608154297ms total_cost_time:9583.716869354248ms,out_token_counter:1 mean_per_token_cost_time: 0.08726119995117188ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 472 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:464 first_token_cost:9579.71978187561ms total_cost_time:9579.748630523682ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 480 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:472 first_token_cost:9575.04153251648ms total_cost_time:9575.064182281494ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 488 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:480 first_token_cost:9570.974111557007ms total_cost_time:9570.996284484863ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:488 first_token_cost:9566.270112991333ms total_cost_time:9566.292762756348ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 496 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:496 first_token_cost:9560.660123825073ms total_cost_time:9560.681343078613ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [manager.py:162] detoken release req id 504 +INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:504 first_token_cost:9554.535627365112ms total_cost_time:9554.557085037231ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 456 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007656574249267578 s +INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 464 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0028052330017089844 s +INFO 06-24 19:59:50 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 456 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010640859603881836 s +INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 464 not send, decode is busy +INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 472 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009193897247314453 s +INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 480 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004563331604003906 s +INFO 06-24 19:59:50 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.020784616470336914 s +INFO 06-24 19:59:50 [prefill_trans_process.py:34] trans start: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:50 [prefill_trans_process.py:42] trans finished: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 +INFO 06-24 19:59:50 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 464 +INFO 06-24 19:59:50 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 472 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010582923889160156 s +INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 472 not send, decode is busy +INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 480 not send, decode is busy +DEBUG 06-24 19:59:50 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:50 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:50 [infer_batch.py:156] radix refed token num 6346 +DEBUG 06-24 19:59:50 [infer_batch.py:156] radix hold token num 15845 +DEBUG 06-24 19:59:50 [infer_batch.py:156] mem manager can alloc token num 547 +DEBUG 06-24 19:59:50 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 488 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.012292623519897461 s +INFO 06-24 19:59:50 [batch.py:51] router release req id 456 +INFO 06-24 19:59:50 [batch.py:51] router release req id 464 +INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 496 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00767827033996582 s +INFO 06-24 19:59:50 [batch.py:51] router release req id 472 +INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 504 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003205537796020508 s +INFO 06-24 19:59:50 [batch.py:51] router release req id 480 +INFO 06-24 19:59:50 [batch.py:51] router release req id 488 +INFO 06-24 19:59:50 [batch.py:51] router release req id 496 +INFO 06-24 19:59:50 [batch.py:51] router release req id 504 +INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 472 +INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 480 +INFO 06-24 19:59:50 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 488 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.012189865112304688 s +INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 488 not send, decode is busy +INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 496 not send, decode is busy +INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 504 not send, decode is busy +INFO 06-24 19:59:50 [prefill_trans_process.py:44] trans cost time: 0.03583383560180664,move_total_kv_len: 1062, id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:50 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 456 cost total time: 0.05907273292541504 s +INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 488 +DEBUG 06-24 19:59:50 [manager.py:391] Prefill Batch: batch_id=332681513737580430563604603620588698251, time:1750766390.3448286s req_ids:[512, 520, 528, 536, 544, 552, 560, 568] +DEBUG 06-24 19:59:50 [manager.py:391] +INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 496 +DEBUG 06-24 19:59:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 7349.145 tokens/s +DEBUG 06-24 19:59:50 [stats.py:37] Avg prompt tokens throughput: 7336.634 tokens/s +DEBUG 06-24 19:59:50 [stats.py:37] Avg generate tokens throughput: 12.512 tokens/s +INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 504 +INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 456 +INFO 06-24 19:59:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:864 +WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again +INFO 06-24 19:59:50 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 +WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again +INFO 06-24 19:59:50 [shm_array.py:30] create shm 2732_0_shm_prompts_62 +INFO 06-24 19:59:51 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 512 +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 520 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:512 first_token_cost:10687.07275390625ms total_cost_time:10687.12043762207ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:520 first_token_cost:10682.308197021484ms total_cost_time:10682.335376739502ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 528 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 first_token_cost:10677.851438522339ms total_cost_time:10677.873849868774ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1045 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:536 first_token_cost:10673.033714294434ms total_cost_time:10673.054695129395ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:544 first_token_cost:10668.126106262207ms total_cost_time:10668.146848678589ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:552 first_token_cost:10663.379192352295ms total_cost_time:10663.398504257202ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 536 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:560 first_token_cost:10658.718585968018ms total_cost_time:10658.739566802979ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:568 first_token_cost:10654.187440872192ms total_cost_time:10654.206991195679ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 544 +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 552 +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 560 +INFO 06-24 19:59:51 [manager.py:162] detoken release req id 568 +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 512 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004446506500244141 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 512 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007849693298339844 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 512 not send, decode is busy +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 520 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007852554321289062 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 528 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0032262802124023438 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 520 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007605075836181641 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 520 not send, decode is busy +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006117343902587891 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 544 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0013079643249511719 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006703376770019531 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 536 not send, decode is busy +INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 544 not send, decode is busy +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 552 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003616809844970703 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.018597126007080078 s +INFO 06-24 19:59:51 [prefill_trans_process.py:34] trans start: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:51 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 512 +INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 552 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007677316665649414 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 552 not send, decode is busy +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0069620609283447266 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 568 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0021970272064208984 s +DEBUG 06-24 19:59:51 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:51 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:51 [infer_batch.py:156] radix refed token num 7377 +DEBUG 06-24 19:59:51 [infer_batch.py:156] radix hold token num 15834 +DEBUG 06-24 19:59:51 [infer_batch.py:156] mem manager can alloc token num 558 +DEBUG 06-24 19:59:51 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:51 [batch.py:51] router release req id 512 +INFO 06-24 19:59:51 [batch.py:51] router release req id 520 +INFO 06-24 19:59:51 [batch.py:51] router release req id 528 +INFO 06-24 19:59:51 [batch.py:51] router release req id 536 +INFO 06-24 19:59:51 [batch.py:51] router release req id 544 +INFO 06-24 19:59:51 [batch.py:51] router release req id 552 +INFO 06-24 19:59:51 [batch.py:51] router release req id 560 +INFO 06-24 19:59:51 [batch.py:51] router release req id 568 +INFO 06-24 19:59:51 [prefill_trans_process.py:42] trans finished: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1045 +INFO 06-24 19:59:51 [manager.py:224] router recive req id 864 cost time 0.907036304473877 s +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 520 +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 536 +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 544 +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 552 +INFO 06-24 19:59:51 [manager.py:68] detokenization recv req id 864 cost time 0.9086446762084961 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006991863250732422 s +INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 560 not send, decode is busy +INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 568 not send, decode is busy +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 560 +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 568 +INFO 06-24 19:59:51 [prefill_trans_process.py:44] trans cost time: 0.024519681930541992,move_total_kv_len: 1045, id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:51 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 528 cost total time: 0.044325828552246094 s +DEBUG 06-24 19:59:51 [manager.py:391] Prefill Batch: batch_id=151190849069827501310392242175690041406, time:1750766391.4910963s req_ids:[576, 584, 592, 600, 608, 616, 624] +DEBUG 06-24 19:59:51 [manager.py:391] +INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 528 +INFO 06-24 19:59:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 +WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_70 and create again +INFO 06-24 19:59:51 [shm_array.py:30] create shm 2732_0_shm_logprobs_70 +WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_70 and create again +INFO 06-24 19:59:51 [shm_array.py:30] create shm 2732_0_shm_prompts_70 +INFO 06-24 19:59:52 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:52 [manager.py:162] detoken release req id 576 +INFO 06-24 19:59:52 [manager.py:162] detoken release req id 584 +INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:576 first_token_cost:11679.4753074646ms total_cost_time:11679.52013015747ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:52 [manager.py:162] detoken release req id 592 +INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:584 first_token_cost:11674.811840057373ms total_cost_time:11674.840211868286ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:592 first_token_cost:11670.287609100342ms total_cost_time:11670.311450958252ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:52 [manager.py:162] detoken release req id 600 +INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:600 first_token_cost:11664.368629455566ms total_cost_time:11664.392709732056ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:1071 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:52 [manager.py:162] detoken release req id 608 +INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:608 first_token_cost:11659.80839729309ms total_cost_time:11659.832239151001ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:52 [manager.py:162] detoken release req id 616 +INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:616 first_token_cost:11655.45129776001ms total_cost_time:11655.477523803711ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:624 first_token_cost:11650.835990905762ms total_cost_time:11650.865077972412ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:52 [manager.py:162] detoken release req id 624 +INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006216764450073242 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009234428405761719 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 576 not send, decode is busy +INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009853839874267578 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 592 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003866434097290039 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008233308792114258 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 584 not send, decode is busy +INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 592 not send, decode is busy +INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 600 in_len:1071 v_len: 1071 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006408214569091797 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 600 in_len:1071 v_len: 1071 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007149934768676758 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 600 not send, decode is busy +INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007205009460449219 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 616 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0011563301086425781 s +INFO 06-24 19:59:52 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 576 +DEBUG 06-24 19:59:52 [req_manager.py:78] freed all request size 136 +INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007665872573852539 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 608 not send, decode is busy +DEBUG 06-24 19:59:52 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:52 [infer_batch.py:156] radix refed token num 6360 +DEBUG 06-24 19:59:52 [infer_batch.py:156] radix hold token num 15856 +DEBUG 06-24 19:59:52 [infer_batch.py:156] mem manager can alloc token num 536 +DEBUG 06-24 19:59:52 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 616 not send, decode is busy +INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0030508041381835938 s +INFO 06-24 19:59:52 [batch.py:51] router release req id 576 +INFO 06-24 19:59:52 [batch.py:51] router release req id 584 +INFO 06-24 19:59:52 [batch.py:51] router release req id 592 +INFO 06-24 19:59:52 [batch.py:51] router release req id 600 +INFO 06-24 19:59:52 [batch.py:51] router release req id 608 +INFO 06-24 19:59:52 [batch.py:51] router release req id 616 +INFO 06-24 19:59:52 [batch.py:51] router release req id 624 +INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 584 +INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 592 +INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 600 +INFO 06-24 19:59:52 [manager.py:224] router recive req id 872 cost time 0.7806687355041504 s +INFO 06-24 19:59:52 [manager.py:68] detokenization recv req id 872 cost time 0.7828049659729004 s +INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 608 +INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 616 +INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006276130676269531 s +INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 624 not send, decode is busy +INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 624 +DEBUG 06-24 19:59:52 [manager.py:391] Prefill Batch: batch_id=41061000598215678357118032570746485017, time:1750766392.5295331s req_ids:[632, 640, 648, 656, 664, 672, 680] +DEBUG 06-24 19:59:52 [manager.py:391] +INFO 06-24 19:59:53 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 632 +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 640 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:632 first_token_cost:12656.394720077515ms total_cost_time:12656.439304351807ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:640 first_token_cost:12651.873588562012ms total_cost_time:12651.901721954346ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 648 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:648 first_token_cost:12647.441148757935ms total_cost_time:12647.463321685791ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 656 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:656 first_token_cost:12643.292665481567ms total_cost_time:12643.314838409424ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 664 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 first_token_cost:12638.623237609863ms total_cost_time:12638.644218444824ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:672 first_token_cost:12634.015321731567ms total_cost_time:12634.036540985107ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1043 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 672 +INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:680 first_token_cost:12629.651308059692ms total_cost_time:12629.672050476074ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:53 [manager.py:162] detoken release req id 680 +INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 632 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006348371505737305 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 640 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0016994476318359375 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 632 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010278701782226562 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 632 not send, decode is busy +INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 640 not send, decode is busy +INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00801229476928711 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 656 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003419637680053711 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007851362228393555 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 648 not send, decode is busy +INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 656 not send, decode is busy +INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 664 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007013559341430664 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 672 in_len:1043 v_len: 1043 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0025191307067871094 s +INFO 06-24 19:59:53 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 19:59:53 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:53 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:53 [infer_batch.py:156] radix refed token num 7386 +DEBUG 06-24 19:59:53 [infer_batch.py:156] radix hold token num 15874 +DEBUG 06-24 19:59:53 [infer_batch.py:156] mem manager can alloc token num 518 +DEBUG 06-24 19:59:53 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 632 +INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 640 +INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 648 +INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 656 +INFO 06-24 19:59:53 [batch.py:51] router release req id 632 +INFO 06-24 19:59:53 [batch.py:51] router release req id 640 +INFO 06-24 19:59:53 [batch.py:51] router release req id 648 +INFO 06-24 19:59:53 [batch.py:51] router release req id 656 +INFO 06-24 19:59:53 [batch.py:51] router release req id 664 +INFO 06-24 19:59:53 [batch.py:51] router release req id 672 +INFO 06-24 19:59:53 [batch.py:51] router release req id 680 +INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 664 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010159492492675781 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 672 not send, decode is busy +INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008017778396606445 s +INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 672 +INFO 06-24 19:59:53 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.026720762252807617 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.00932931900024414 s +INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 680 not send, decode is busy +INFO 06-24 19:59:53 [prefill_trans_process.py:34] trans start: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 680 +INFO 06-24 19:59:53 [prefill_trans_process.py:42] trans finished: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 +DEBUG 06-24 19:59:53 [manager.py:391] Prefill Batch: batch_id=168428558191206249765469831658675004253, time:1750766393.5315804s req_ids:[688, 696, 704, 712, 720, 728, 736, 744] +DEBUG 06-24 19:59:53 [manager.py:391] +INFO 06-24 19:59:53 [prefill_trans_process.py:44] trans cost time: 0.03470134735107422,move_total_kv_len: 1062, id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:53 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 664 cost total time: 0.06349658966064453 s +INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 664 +INFO 06-24 19:59:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:880 +WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_84 and create again +INFO 06-24 19:59:53 [shm_array.py:30] create shm 2732_0_shm_logprobs_84 +WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_84 and create again +INFO 06-24 19:59:53 [shm_array.py:30] create shm 2732_0_shm_prompts_84 +INFO 06-24 19:59:54 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 688 +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 696 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:688 first_token_cost:13776.899814605713ms total_cost_time:13776.966333389282ms,out_token_counter:1 mean_per_token_cost_time: 0.06651878356933594ms prompt_token_num:1069 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 704 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:696 first_token_cost:13771.39139175415ms total_cost_time:13771.419286727905ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 712 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:704 first_token_cost:13766.984701156616ms total_cost_time:13767.007827758789ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 720 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:712 first_token_cost:13762.664794921875ms total_cost_time:13762.687921524048ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 728 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:720 first_token_cost:13758.100509643555ms total_cost_time:13758.121967315674ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:728 first_token_cost:13753.724813461304ms total_cost_time:13753.75747680664ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 736 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:736 first_token_cost:13749.449014663696ms total_cost_time:13749.486207962036ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [manager.py:162] detoken release req id 744 +INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:744 first_token_cost:13745.047092437744ms total_cost_time:13745.08261680603ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010353565216064453 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 696 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00538325309753418 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010210990905761719 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 688 not send, decode is busy +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 696 not send, decode is busy +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.011600494384765625 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 712 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007045745849609375 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 720 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0024051666259765625 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010615110397338867 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 704 not send, decode is busy +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 712 not send, decode is busy +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 720 not send, decode is busy +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 728 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008868217468261719 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 736 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0021393299102783203 s +INFO 06-24 19:59:54 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 688 +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 696 +DEBUG 06-24 19:59:54 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:54 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:54 [infer_batch.py:156] radix refed token num 6322 +DEBUG 06-24 19:59:54 [infer_batch.py:156] radix hold token num 15830 +DEBUG 06-24 19:59:54 [infer_batch.py:156] mem manager can alloc token num 562 +DEBUG 06-24 19:59:54 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:54 [batch.py:51] router release req id 688 +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 704 +INFO 06-24 19:59:54 [batch.py:51] router release req id 696 +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 712 +INFO 06-24 19:59:54 [batch.py:51] router release req id 704 +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 720 +INFO 06-24 19:59:54 [batch.py:51] router release req id 712 +INFO 06-24 19:59:54 [batch.py:51] router release req id 720 +INFO 06-24 19:59:54 [batch.py:51] router release req id 728 +INFO 06-24 19:59:54 [batch.py:51] router release req id 736 +INFO 06-24 19:59:54 [batch.py:51] router release req id 744 +INFO 06-24 19:59:54 [manager.py:224] router recive req id 880 cost time 0.8910543918609619 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 728 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009208917617797852 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 728 not send, decode is busy +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 736 not send, decode is busy +INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 744 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007225751876831055 s +INFO 06-24 19:59:54 [manager.py:68] detokenization recv req id 880 cost time 0.8926985263824463 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 744 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006890773773193359 s +INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 744 not send, decode is busy +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 728 +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 736 +INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 744 +DEBUG 06-24 19:59:54 [manager.py:391] Prefill Batch: batch_id=298089166584524292011397622308017931149, time:1750766394.6946175s req_ids:[752, 760, 768, 776, 784, 792, 800] +DEBUG 06-24 19:59:54 [manager.py:391] +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 current batch size: 7 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 paused req num: 0 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 frozen token num: 0 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 7410 +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token +DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.9657149829184968 contain prompt cache tree unrefed token +INFO 06-24 19:59:55 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 752 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 first_token_cost:14746.919870376587ms total_cost_time:14746.965169906616ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 760 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:760 first_token_cost:14742.695808410645ms total_cost_time:14742.723226547241ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:1064 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:768 first_token_cost:14738.271713256836ms total_cost_time:14738.295555114746ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 768 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:776 first_token_cost:14733.774900436401ms total_cost_time:14733.810663223267ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 776 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:784 first_token_cost:14728.860855102539ms total_cost_time:14728.896141052246ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 784 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:792 first_token_cost:14723.912000656128ms total_cost_time:14723.941802978516ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:800 first_token_cost:14719.4185256958ms total_cost_time:14719.455480575562ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 792 +INFO 06-24 19:59:55 [manager.py:162] detoken release req id 800 +INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00774383544921875 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 760 in_len:1064 v_len: 1064 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002983570098876953 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011865377426147461 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 760 not send, decode is busy +INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 768 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010986804962158203 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 776 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0045206546783447266 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.02506709098815918 s +INFO 06-24 19:59:55 [prefill_trans_process.py:34] trans start: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:55 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 760 +INFO 06-24 19:59:55 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 768 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.01067972183227539 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 768 not send, decode is busy +INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 776 not send, decode is busy +INFO 06-24 19:59:55 [prefill_trans_process.py:42] trans finished: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1053 +INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 784 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.012121438980102539 s +DEBUG 06-24 19:59:55 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:55 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:55 [infer_batch.py:156] radix refed token num 6339 +DEBUG 06-24 19:59:55 [infer_batch.py:156] radix hold token num 15847 +DEBUG 06-24 19:59:55 [infer_batch.py:156] mem manager can alloc token num 545 +DEBUG 06-24 19:59:55 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 792 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007155895233154297 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 800 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0024378299713134766 s +INFO 06-24 19:59:55 [batch.py:51] router release req id 752 +INFO 06-24 19:59:55 [batch.py:51] router release req id 760 +INFO 06-24 19:59:55 [batch.py:51] router release req id 768 +INFO 06-24 19:59:55 [batch.py:51] router release req id 776 +INFO 06-24 19:59:55 [batch.py:51] router release req id 784 +INFO 06-24 19:59:55 [batch.py:51] router release req id 792 +INFO 06-24 19:59:55 [batch.py:51] router release req id 800 +INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 768 +INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 776 +INFO 06-24 19:59:55 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 784 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010683774948120117 s +INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 784 not send, decode is busy +INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 792 not send, decode is busy +INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 800 not send, decode is busy +INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 784 +INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 792 +INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 800 +DEBUG 06-24 19:59:55 [manager.py:391] Prefill Batch: batch_id=190381160306182571724730900033463728660, time:1750766395.7016985s req_ids:[808, 816, 824, 832, 840, 848, 856, 864] +DEBUG 06-24 19:59:55 [manager.py:391] +INFO 06-24 19:59:55 [prefill_trans_process.py:44] trans cost time: 0.038568973541259766,move_total_kv_len: 1053, id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:55 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 752 cost total time: 0.06527590751647949 s +INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 752 +INFO 06-24 19:59:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:888 +WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_99 and create again +INFO 06-24 19:59:55 [shm_array.py:30] create shm 2732_0_shm_logprobs_99 +WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_99 and create again +INFO 06-24 19:59:55 [shm_array.py:30] create shm 2732_0_shm_prompts_99 +INFO 06-24 19:59:56 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 808 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:808 first_token_cost:12903.241634368896ms total_cost_time:12903.290271759033ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 816 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:816 first_token_cost:11768.028497695923ms total_cost_time:11768.057584762573ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 824 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:824 first_token_cost:10520.995140075684ms total_cost_time:10521.018505096436ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 first_token_cost:10497.162580490112ms total_cost_time:10497.184753417969ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 832 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:840 first_token_cost:10471.440553665161ms total_cost_time:10471.463203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:848 first_token_cost:9490.027904510498ms total_cost_time:9490.050077438354ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 840 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:856 first_token_cost:9461.206197738647ms total_cost_time:9461.227893829346ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 848 +INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:864 first_token_cost:6223.236322402954ms total_cost_time:6223.257541656494ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 856 +INFO 06-24 19:59:56 [manager.py:162] detoken release req id 864 +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 808 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002652406692504883 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 808 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008903980255126953 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 808 not send, decode is busy +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 816 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0076906681060791016 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 824 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0029401779174804688 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 816 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010135412216186523 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 816 not send, decode is busy +INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 824 not send, decode is busy +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010025501251220703 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 840 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.005650520324707031 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008464336395263672 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 840 not send, decode is busy +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 848 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009446859359741211 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 856 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002905130386352539 s +INFO 06-24 19:59:56 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 808 +DEBUG 06-24 19:59:56 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:56 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:56 [infer_batch.py:156] radix refed token num 7400 +DEBUG 06-24 19:59:56 [infer_batch.py:156] radix hold token num 15849 +DEBUG 06-24 19:59:56 [infer_batch.py:156] mem manager can alloc token num 543 +DEBUG 06-24 19:59:56 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:56 [batch.py:51] router release req id 808 +INFO 06-24 19:59:56 [batch.py:51] router release req id 816 +INFO 06-24 19:59:56 [batch.py:51] router release req id 824 +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 816 +INFO 06-24 19:59:56 [batch.py:51] router release req id 832 +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 824 +INFO 06-24 19:59:56 [batch.py:51] router release req id 840 +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 840 +INFO 06-24 19:59:56 [batch.py:51] router release req id 848 +INFO 06-24 19:59:56 [batch.py:51] router release req id 856 +INFO 06-24 19:59:56 [batch.py:51] router release req id 864 +INFO 06-24 19:59:56 [manager.py:224] router recive req id 888 cost time 0.8922204971313477 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 848 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.00862264633178711 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 848 not send, decode is busy +INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 856 not send, decode is busy +INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 864 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007134914398193359 s +INFO 06-24 19:59:56 [manager.py:68] detokenization recv req id 888 cost time 0.894127368927002 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.02945232391357422 s +INFO 06-24 19:59:56 [prefill_trans_process.py:34] trans start: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 864 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008223772048950195 s +INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 864 not send, decode is busy +INFO 06-24 19:59:56 [prefill_trans_process.py:42] trans finished: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 848 +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 856 +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 864 +DEBUG 06-24 19:59:56 [manager.py:391] Prefill Batch: batch_id=266402877888843830617698295371328410035, time:1750766396.8458936s req_ids:[872, 880, 888] +DEBUG 06-24 19:59:56 [manager.py:391] +INFO 06-24 19:59:56 [prefill_trans_process.py:44] trans cost time: 0.04648113250732422,move_total_kv_len: 1056, id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:56 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 832 cost total time: 0.07764625549316406 s +INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 832 +INFO 06-24 19:59:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:896 +WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again +INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 +WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again +INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_prompts_62 +INFO 06-24 19:59:57 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:57 [manager.py:162] detoken release req id 872 +INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 first_token_cost:5556.049346923828ms total_cost_time:5556.09393119812ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:57 [manager.py:162] detoken release req id 880 +INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:880 first_token_cost:3500.9469985961914ms total_cost_time:3500.9727478027344ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:57 [manager.py:162] detoken release req id 888 +INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:888 first_token_cost:1350.1379489898682ms total_cost_time:1350.1601219177246ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 872 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00251007080078125 s +INFO 06-24 19:59:57 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 19:59:57 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:57 [infer_batch.py:156] radix refed token num 3166 +DEBUG 06-24 19:59:57 [infer_batch.py:156] radix hold token num 15846 +DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager can alloc token num 546 +DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:57 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 872 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010227203369140625 s +INFO 06-24 19:59:57 [batch.py:51] router release req id 872 +INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 880 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008426189422607422 s +INFO 06-24 19:59:57 [batch.py:51] router release req id 880 +INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 888 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0015916824340820312 s +INFO 06-24 19:59:57 [batch.py:51] router release req id 888 +INFO 06-24 19:59:57 [manager.py:224] router recive req id 896 cost time 0.17604541778564453 s +INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 896 cost time 0.17776012420654297 s +INFO 06-24 19:59:57 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.022619962692260742 s +INFO 06-24 19:59:57 [prefill_trans_process.py:34] trans start: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:57 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 880 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010534048080444336 s +INFO 06-24 19:59:57 [prefill_trans_obj.py:166] prefill node kv move task req_id: 880 not send, decode is busy +INFO 06-24 19:59:57 [prefill_trans_obj.py:166] prefill node kv move task req_id: 888 not send, decode is busy +INFO 06-24 19:59:57 [prefill_trans_process.py:42] trans finished: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 +INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 880 +INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 888 +DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=84970949193112820747633257429598105154, time:1750766397.3190756s req_ids:[896] +DEBUG 06-24 19:59:57 [manager.py:391] +INFO 06-24 19:59:57 [prefill_trans_process.py:44] trans cost time: 0.02783513069152832,move_total_kv_len: 1049, id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df +INFO 06-24 19:59:57 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 872 cost total time: 0.05187344551086426 s +INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 872 +INFO 06-24 19:59:57 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:57 [manager.py:162] detoken release req id 896 +INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:896 first_token_cost:368.4573173522949ms total_cost_time:368.544340133667ms,out_token_counter:1 mean_per_token_cost_time: 0.08702278137207031ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:57 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 19:59:57 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:57 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:57 [infer_batch.py:156] radix refed token num 1054 +DEBUG 06-24 19:59:57 [infer_batch.py:156] radix hold token num 15849 +DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager can alloc token num 543 +DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:57 [batch.py:51] router release req id 896 +INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 896 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0046384334564208984 s +INFO 06-24 19:59:57 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 896 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008706331253051758 s +INFO 06-24 19:59:57 [prefill_trans_obj.py:166] prefill node kv move task req_id: 896 not send, decode is busy +INFO 06-24 19:59:57 [shm_req_manager.py:119] all shm req has been release ok +INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 896 +INFO 06-24 19:59:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:904 +WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again +INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 +WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again +INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_prompts_62 +INFO 06-24 19:59:57 [manager.py:224] router recive req id 904 cost time 0.023071765899658203 s +INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 904 cost time 0.025092363357543945 s +DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=265990561747531797382892875336867628745, time:1750766397.6218994s req_ids:[904] +DEBUG 06-24 19:59:57 [manager.py:391] +INFO 06-24 19:59:58 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 19:59:58 [manager.py:162] detoken release req id 904 +INFO 06-24 19:59:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:904 first_token_cost:436.77806854248047ms total_cost_time:436.82312965393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 19:59:58 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 19:59:58 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 19:59:58 [infer_batch.py:156] free a batch state: +DEBUG 06-24 19:59:58 [infer_batch.py:156] radix refed token num 1056 +DEBUG 06-24 19:59:58 [infer_batch.py:156] radix hold token num 15848 +DEBUG 06-24 19:59:58 [infer_batch.py:156] mem manager can alloc token num 544 +DEBUG 06-24 19:59:58 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 19:59:58 [batch.py:51] router release req id 904 +INFO 06-24 19:59:58 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 904 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010128498077392578 s +INFO 06-24 19:59:58 [shm_req_manager.py:119] all shm req has been release ok +INFO 06-24 19:59:58 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 904 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009553194046020508 s +INFO 06-24 19:59:58 [prefill_trans_obj.py:166] prefill node kv move task req_id: 904 not send, decode is busy +INFO 06-24 19:59:58 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 904 +DEBUG 06-24 20:00:00 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:00:00 [manager.py:283] +DEBUG 06-24 20:00:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:00:01 [manager.py:284] +INFO 06-24 20:00:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:00:13 [statics_utils.py:24] mean first cost: 8103.882593391216 ms +INFO 06-24 20:00:13 [statics_utils.py:24] mean per token cost: 0.02849207515210177 ms +INFO 06-24 20:00:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:32 lightllm_req_id:912 +WARNING 06-24 20:00:32 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again +INFO 06-24 20:00:32 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 +WARNING 06-24 20:00:32 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again +INFO 06-24 20:00:32 [shm_array.py:30] create shm 2732_0_shm_prompts_62 +INFO 06-24 20:00:32 [manager.py:224] router recive req id 912 cost time 0.011658906936645508 s +INFO 06-24 20:00:32 [manager.py:68] detokenization recv req id 912 cost time 0.013936042785644531 s +DEBUG 06-24 20:00:32 [manager.py:391] Prefill Batch: batch_id=242973563606902050196922819935859844168, time:1750766432.9127753s req_ids:[912] +DEBUG 06-24 20:00:32 [manager.py:391] +DEBUG 06-24 20:00:32 [stats.py:37] Avg tokens(prompt+generate) throughput: 1069.843 tokens/s +DEBUG 06-24 20:00:32 [stats.py:37] Avg prompt tokens throughput: 1067.494 tokens/s +DEBUG 06-24 20:00:32 [stats.py:37] Avg generate tokens throughput: 2.349 tokens/s +INFO 06-24 20:00:33 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 20:00:33 [manager.py:162] detoken release req id 912 +INFO 06-24 20:00:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:32 lightllm_req_id:912 first_token_cost:193.95685195922852ms total_cost_time:194.00525093078613ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 20:00:33 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 20:00:33 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 20:00:33 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:00:33 [infer_batch.py:156] radix refed token num 1058 +DEBUG 06-24 20:00:33 [infer_batch.py:156] radix hold token num 15845 +DEBUG 06-24 20:00:33 [infer_batch.py:156] mem manager can alloc token num 547 +DEBUG 06-24 20:00:33 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:00:33 [batch.py:51] router release req id 912 +INFO 06-24 20:00:33 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 912 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0037949085235595703 s +INFO 06-24 20:00:33 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 912 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009572982788085938 s +INFO 06-24 20:00:33 [prefill_trans_obj.py:166] prefill node kv move task req_id: 912 not send, decode is busy +INFO 06-24 20:00:33 [shm_req_manager.py:119] all shm req has been release ok +INFO 06-24 20:00:33 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 912 +WARNING 06-24 20:00:40 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:00:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:40 lightllm_req_id:920 +WARNING 06-24 20:00:40 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again +INFO 06-24 20:00:40 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 +WARNING 06-24 20:00:40 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again +INFO 06-24 20:00:40 [shm_array.py:30] create shm 2732_0_shm_prompts_62 +INFO 06-24 20:00:40 [manager.py:224] router recive req id 920 cost time 0.0210115909576416 s +INFO 06-24 20:00:41 [manager.py:68] detokenization recv req id 920 cost time 0.023111581802368164 s +DEBUG 06-24 20:00:41 [manager.py:391] Prefill Batch: batch_id=259424221880659235483337860354254147774, time:1750766441.0110836s req_ids:[920] +DEBUG 06-24 20:00:41 [manager.py:391] +INFO 06-24 20:00:41 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens +INFO 06-24 20:00:41 [manager.py:162] detoken release req id 920 +INFO 06-24 20:00:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:40 lightllm_req_id:920 first_token_cost:195.88994979858398ms total_cost_time:195.93286514282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:1045 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 +INFO 06-24 20:00:41 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end +DEBUG 06-24 20:00:41 [req_manager.py:78] freed all request size 136 +DEBUG 06-24 20:00:41 [infer_batch.py:156] free a batch state: +DEBUG 06-24 20:00:41 [infer_batch.py:156] radix refed token num 1045 +DEBUG 06-24 20:00:41 [infer_batch.py:156] radix hold token num 15825 +DEBUG 06-24 20:00:41 [infer_batch.py:156] mem manager can alloc token num 567 +DEBUG 06-24 20:00:41 [infer_batch.py:156] mem manager total size 16392 +INFO 06-24 20:00:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 920 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0008890628814697266 s +INFO 06-24 20:00:41 [batch.py:51] router release req id 920 +INFO 06-24 20:00:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 920 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011546134948730469 s +INFO 06-24 20:00:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 920 not send, decode is busy +INFO 06-24 20:00:41 [shm_req_manager.py:119] all shm req has been release ok +INFO 06-24 20:00:41 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 920 +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:00:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:00:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:00:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:00:58 [manager.py:590] aborted group_request_id not exist +DEBUG 06-24 20:01:02 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:01:02 [manager.py:283] +DEBUG 06-24 20:01:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:01:02 [manager.py:284] +INFO 06-24 20:01:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:01:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:01:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +WARNING 06-24 20:01:33 [manager.py:590] aborted group_request_id not exist +WARNING 06-24 20:01:41 [manager.py:590] aborted group_request_id not exist +INFO 06-24 20:01:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:02:02 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:02:02 [manager.py:283] +DEBUG 06-24 20:02:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:02:02 [manager.py:284] +INFO 06-24 20:02:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:02:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:02:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:02:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:02:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:02:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:03:03 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:03:03 [manager.py:283] +DEBUG 06-24 20:03:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:03:03 [manager.py:284] +INFO 06-24 20:03:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:03:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:03:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:03:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:03:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:03:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:04:04 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:04:04 [manager.py:283] +DEBUG 06-24 20:04:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:04:04 [manager.py:284] +INFO 06-24 20:04:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:04:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:04:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:04:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:04:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:04:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:05:04 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:05:04 [manager.py:283] +DEBUG 06-24 20:05:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:05:04 [manager.py:284] +INFO 06-24 20:05:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:05:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:05:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:05:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:05:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:05:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:06:05 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:06:05 [manager.py:283] +DEBUG 06-24 20:06:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:06:05 [manager.py:284] +INFO 06-24 20:06:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:06:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:06:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:06:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:06:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:06:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:07:06 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:07:06 [manager.py:283] +DEBUG 06-24 20:07:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:07:06 [manager.py:284] +INFO 06-24 20:07:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:07:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:07:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:07:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:07:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:07:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:08:07 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:08:07 [manager.py:283] +DEBUG 06-24 20:08:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:08:07 [manager.py:284] +INFO 06-24 20:08:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:08:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:08:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:08:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:08:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:08:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:09:07 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:09:07 [manager.py:283] +DEBUG 06-24 20:09:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:09:07 [manager.py:284] +INFO 06-24 20:09:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:09:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:09:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:09:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:09:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:09:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:10:08 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:10:08 [manager.py:283] +DEBUG 06-24 20:10:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:10:08 [manager.py:284] +INFO 06-24 20:10:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:10:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:10:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:10:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:11:09 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:11:09 [manager.py:283] +DEBUG 06-24 20:11:09 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:11:09 [manager.py:284] +INFO 06-24 20:11:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:11:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:11:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:11:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:11:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:11:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:12:10 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:12:10 [manager.py:283] +DEBUG 06-24 20:12:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:12:10 [manager.py:284] +INFO 06-24 20:12:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:12:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:12:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:12:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:12:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:12:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:13:10 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:13:10 [manager.py:283] +DEBUG 06-24 20:13:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:13:10 [manager.py:284] +INFO 06-24 20:13:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:13:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:13:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:13:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:13:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:14:11 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:14:11 [manager.py:283] +DEBUG 06-24 20:14:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:14:11 [manager.py:284] +INFO 06-24 20:14:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:14:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:14:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:14:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:14:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:14:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:15:12 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:15:12 [manager.py:283] +DEBUG 06-24 20:15:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:15:12 [manager.py:284] +INFO 06-24 20:15:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:15:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:15:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:15:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:16:13 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:16:13 [manager.py:283] +DEBUG 06-24 20:16:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:16:13 [manager.py:284] +INFO 06-24 20:16:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:16:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:16:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:16:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:17:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:17:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:17:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:17:13 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:17:13 [manager.py:283] +DEBUG 06-24 20:17:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:17:13 [manager.py:284] +INFO 06-24 20:17:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:17:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:17:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:18:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:18:14 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:18:14 [manager.py:283] +DEBUG 06-24 20:18:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:18:14 [manager.py:284] +INFO 06-24 20:18:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:18:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:18:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:19:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:19:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:19:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:19:15 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:19:15 [manager.py:283] +DEBUG 06-24 20:19:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:19:15 [manager.py:284] +INFO 06-24 20:19:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:19:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:19:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:20:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:20:15 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:20:15 [manager.py:283] +DEBUG 06-24 20:20:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:20:15 [manager.py:284] +INFO 06-24 20:20:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:20:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:20:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:21:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:21:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:21:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:21:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:21:16 [manager.py:283] +DEBUG 06-24 20:21:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:21:16 [manager.py:284] +INFO 06-24 20:21:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:22:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:22:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:22:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:22:17 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:22:17 [manager.py:283] +DEBUG 06-24 20:22:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:22:17 [manager.py:284] +INFO 06-24 20:22:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:22:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:22:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:23:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:23:17 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:23:17 [manager.py:283] +DEBUG 06-24 20:23:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:23:17 [manager.py:284] +INFO 06-24 20:23:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:23:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:23:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:24:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:24:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:24:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:24:18 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:24:18 [manager.py:283] +DEBUG 06-24 20:24:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:24:18 [manager.py:284] +INFO 06-24 20:24:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:25:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:25:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:25:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:25:19 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:25:19 [manager.py:283] +DEBUG 06-24 20:25:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:25:19 [manager.py:284] +INFO 06-24 20:25:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:25:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:25:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:26:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:26:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:26:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:26:19 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:26:19 [manager.py:283] +DEBUG 06-24 20:26:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:26:19 [manager.py:284] +INFO 06-24 20:26:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:26:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:26:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:27:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:27:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:27:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:27:20 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:27:20 [manager.py:283] +DEBUG 06-24 20:27:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:27:20 [manager.py:284] +INFO 06-24 20:27:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:27:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:27:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:28:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:28:21 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:28:21 [manager.py:283] +DEBUG 06-24 20:28:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:28:21 [manager.py:284] +INFO 06-24 20:28:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:28:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:28:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:29:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:29:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:29:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:29:22 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:29:22 [manager.py:283] +DEBUG 06-24 20:29:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:29:22 [manager.py:284] +INFO 06-24 20:29:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:29:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:29:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:30:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:30:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:30:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:30:22 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:30:22 [manager.py:283] +DEBUG 06-24 20:30:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:30:22 [manager.py:284] +INFO 06-24 20:30:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:30:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:30:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:31:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:31:23 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:31:23 [manager.py:283] +DEBUG 06-24 20:31:23 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:31:23 [manager.py:284] +INFO 06-24 20:31:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:31:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:31:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:32:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:32:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:32:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:32:24 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:32:24 [manager.py:283] +DEBUG 06-24 20:32:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:32:24 [manager.py:284] +INFO 06-24 20:32:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:32:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:32:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:33:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:33:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:33:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:33:24 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:33:24 [manager.py:283] +DEBUG 06-24 20:33:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:33:24 [manager.py:284] +INFO 06-24 20:33:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:34:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:34:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:34:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:34:25 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:34:25 [manager.py:283] +DEBUG 06-24 20:34:25 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:34:25 [manager.py:284] +INFO 06-24 20:34:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:34:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:34:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:35:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:35:26 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:35:26 [manager.py:283] +DEBUG 06-24 20:35:26 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:35:26 [manager.py:284] +INFO 06-24 20:35:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:35:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:35:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:36:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:36:27 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:36:27 [manager.py:283] +DEBUG 06-24 20:36:27 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:36:27 [manager.py:284] +INFO 06-24 20:36:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:36:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:36:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:37:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:37:27 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:37:27 [manager.py:283] +DEBUG 06-24 20:37:27 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:37:27 [manager.py:284] +INFO 06-24 20:37:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:37:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:37:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:38:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:38:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:38:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:38:28 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:38:28 [manager.py:283] +DEBUG 06-24 20:38:28 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:38:28 [manager.py:284] +INFO 06-24 20:38:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:39:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:39:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:39:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:39:29 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:39:29 [manager.py:283] +DEBUG 06-24 20:39:29 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:39:29 [manager.py:284] +INFO 06-24 20:39:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:39:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:39:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:40:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:40:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:40:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:40:30 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:40:30 [manager.py:283] +DEBUG 06-24 20:40:30 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:40:30 [manager.py:284] +INFO 06-24 20:40:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:40:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:40:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:41:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:41:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:41:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:41:30 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:41:30 [manager.py:283] +DEBUG 06-24 20:41:30 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:41:30 [manager.py:284] +INFO 06-24 20:41:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:42:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:42:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:42:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:42:31 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:42:31 [manager.py:283] +DEBUG 06-24 20:42:31 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:42:31 [manager.py:284] +INFO 06-24 20:42:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:42:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:42:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:43:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:43:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:43:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:43:32 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:43:32 [manager.py:283] +DEBUG 06-24 20:43:32 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:43:32 [manager.py:284] +INFO 06-24 20:43:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:43:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:43:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:44:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:44:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:44:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:44:33 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:44:33 [manager.py:283] +DEBUG 06-24 20:44:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:44:33 [manager.py:284] +INFO 06-24 20:44:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:44:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:44:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:45:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:45:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:45:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:45:33 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:45:33 [manager.py:283] +DEBUG 06-24 20:45:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:45:33 [manager.py:284] +INFO 06-24 20:45:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:46:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:46:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:46:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:46:34 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:46:34 [manager.py:283] +DEBUG 06-24 20:46:34 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:46:34 [manager.py:284] +INFO 06-24 20:46:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:47:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:47:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:47:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:47:35 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:47:35 [manager.py:283] +DEBUG 06-24 20:47:35 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:47:35 [manager.py:284] +INFO 06-24 20:47:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:47:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:47:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:48:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:48:36 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:48:36 [manager.py:283] +DEBUG 06-24 20:48:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:48:36 [manager.py:284] +INFO 06-24 20:48:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:48:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:48:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:49:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:49:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:49:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:49:36 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:49:36 [manager.py:283] +DEBUG 06-24 20:49:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:49:36 [manager.py:284] +INFO 06-24 20:49:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:49:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:49:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:50:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:50:37 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:50:37 [manager.py:283] +DEBUG 06-24 20:50:37 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:50:37 [manager.py:284] +INFO 06-24 20:50:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:50:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:50:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:51:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:51:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:51:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:51:38 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:51:38 [manager.py:283] +DEBUG 06-24 20:51:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:51:38 [manager.py:284] +INFO 06-24 20:51:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:51:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:51:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:52:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:52:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:52:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:52:39 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:52:39 [manager.py:283] +DEBUG 06-24 20:52:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:52:39 [manager.py:284] +INFO 06-24 20:52:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:53:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:53:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:53:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:53:39 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:53:39 [manager.py:283] +DEBUG 06-24 20:53:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:53:39 [manager.py:284] +INFO 06-24 20:53:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:54:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:54:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:54:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:54:40 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:54:40 [manager.py:283] +DEBUG 06-24 20:54:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:54:40 [manager.py:284] +INFO 06-24 20:54:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:54:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:54:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:55:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:55:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:55:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:55:41 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:55:41 [manager.py:283] +DEBUG 06-24 20:55:41 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:55:41 [manager.py:284] +INFO 06-24 20:55:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:55:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:55:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:56:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 20:56:42 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:56:42 [manager.py:283] +DEBUG 06-24 20:56:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:56:42 [manager.py:284] +INFO 06-24 20:56:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:56:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:56:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:57:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:57:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:57:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:57:42 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:57:42 [manager.py:283] +DEBUG 06-24 20:57:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:57:42 [manager.py:284] +INFO 06-24 20:57:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:58:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:58:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:58:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:58:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:58:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:58:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:58:43 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:58:43 [manager.py:283] +DEBUG 06-24 20:58:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:58:43 [manager.py:284] +INFO 06-24 20:59:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:59:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:59:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 20:59:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 20:59:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 20:59:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 20:59:44 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 20:59:44 [manager.py:283] +DEBUG 06-24 20:59:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 20:59:44 [manager.py:284] +INFO 06-24 21:00:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:00:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:00:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:00:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:00:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:00:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:00:44 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:00:44 [manager.py:283] +DEBUG 06-24 21:00:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:00:44 [manager.py:284] +INFO 06-24 21:01:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:01:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:01:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:01:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:01:45 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:01:45 [manager.py:283] +DEBUG 06-24 21:01:45 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:01:45 [manager.py:284] +INFO 06-24 21:02:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:02:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:02:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:02:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:02:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:02:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:02:46 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:02:46 [manager.py:283] +DEBUG 06-24 21:02:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:02:46 [manager.py:284] +INFO 06-24 21:03:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:03:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:03:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:03:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:03:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:03:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:03:46 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:03:46 [manager.py:283] +DEBUG 06-24 21:03:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:03:46 [manager.py:284] +INFO 06-24 21:04:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:04:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:04:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:04:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:04:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:04:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:04:47 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:04:47 [manager.py:283] +DEBUG 06-24 21:04:47 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:04:47 [manager.py:284] +INFO 06-24 21:05:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:05:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:05:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:05:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:05:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:05:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:05:48 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:05:48 [manager.py:283] +DEBUG 06-24 21:05:48 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:05:48 [manager.py:284] +INFO 06-24 21:06:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:06:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:06:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:06:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:06:49 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:06:49 [manager.py:283] +DEBUG 06-24 21:06:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:06:49 [manager.py:284] +INFO 06-24 21:07:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:07:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:07:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:07:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:07:49 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:07:49 [manager.py:283] +DEBUG 06-24 21:07:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:07:49 [manager.py:284] +INFO 06-24 21:08:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:08:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:08:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:08:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:08:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:08:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:08:50 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:08:50 [manager.py:283] +DEBUG 06-24 21:08:50 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:08:50 [manager.py:284] +INFO 06-24 21:09:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:09:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:09:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:09:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:09:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:09:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:09:51 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:09:51 [manager.py:283] +DEBUG 06-24 21:09:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:09:51 [manager.py:284] +INFO 06-24 21:10:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:10:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:10:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:10:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:10:51 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:10:51 [manager.py:283] +DEBUG 06-24 21:10:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:10:51 [manager.py:284] +INFO 06-24 21:11:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:11:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:11:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:11:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:11:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:11:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:11:52 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:11:52 [manager.py:283] +DEBUG 06-24 21:11:52 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:11:52 [manager.py:284] +INFO 06-24 21:12:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:12:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:12:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:12:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:12:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:12:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:12:53 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:12:53 [manager.py:283] +DEBUG 06-24 21:12:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:12:53 [manager.py:284] +INFO 06-24 21:13:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:13:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:13:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:13:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:13:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:13:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:13:53 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:13:53 [manager.py:283] +DEBUG 06-24 21:13:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:13:53 [manager.py:284] +INFO 06-24 21:14:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:14:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:14:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:14:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:14:54 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:14:54 [manager.py:283] +DEBUG 06-24 21:14:54 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:14:54 [manager.py:284] +INFO 06-24 21:15:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:15:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:15:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:15:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:15:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:15:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:15:55 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:15:55 [manager.py:283] +DEBUG 06-24 21:15:55 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:15:55 [manager.py:284] +INFO 06-24 21:16:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:16:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:16:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:16:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:16:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:16:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:16:56 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:16:56 [manager.py:283] +DEBUG 06-24 21:16:56 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:16:56 [manager.py:284] +INFO 06-24 21:17:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:17:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:17:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:17:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:17:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:17:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:17:56 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:17:56 [manager.py:283] +DEBUG 06-24 21:17:56 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:17:56 [manager.py:284] +INFO 06-24 21:18:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:18:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:18:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:18:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:18:57 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:18:57 [manager.py:283] +DEBUG 06-24 21:18:57 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:18:57 [manager.py:284] +INFO 06-24 21:19:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:19:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:19:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:19:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:19:58 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:19:58 [manager.py:283] +DEBUG 06-24 21:19:58 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:19:58 [manager.py:284] +INFO 06-24 21:20:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:20:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:20:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:20:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:20:59 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:20:59 [manager.py:283] +DEBUG 06-24 21:20:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:20:59 [manager.py:284] +INFO 06-24 21:21:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:21:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:21:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:21:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:21:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:21:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:21:59 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:21:59 [manager.py:283] +DEBUG 06-24 21:21:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:21:59 [manager.py:284] +INFO 06-24 21:22:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:22:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:22:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:22:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:23:00 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:23:00 [manager.py:283] +DEBUG 06-24 21:23:00 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:23:00 [manager.py:284] +INFO 06-24 21:23:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:23:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:23:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:23:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:23:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:23:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:24:01 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:24:01 [manager.py:283] +DEBUG 06-24 21:24:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:24:01 [manager.py:284] +INFO 06-24 21:24:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:24:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:24:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:24:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:24:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:24:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:25:01 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:25:01 [manager.py:283] +DEBUG 06-24 21:25:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:25:01 [manager.py:284] +INFO 06-24 21:25:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:25:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:25:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:25:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:26:02 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:26:02 [manager.py:283] +DEBUG 06-24 21:26:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:26:02 [manager.py:284] +INFO 06-24 21:26:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:26:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:26:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:26:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:26:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:26:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:27:03 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:27:03 [manager.py:283] +DEBUG 06-24 21:27:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:27:03 [manager.py:284] +INFO 06-24 21:27:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:27:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:27:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:27:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:27:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:27:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:28:03 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:28:03 [manager.py:283] +DEBUG 06-24 21:28:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:28:03 [manager.py:284] +INFO 06-24 21:28:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:28:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:28:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:28:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:29:04 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:29:04 [manager.py:283] +DEBUG 06-24 21:29:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:29:04 [manager.py:284] +INFO 06-24 21:29:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:29:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:29:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:29:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:30:05 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:30:05 [manager.py:283] +DEBUG 06-24 21:30:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:30:05 [manager.py:284] +INFO 06-24 21:30:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:30:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:30:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:30:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:31:06 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:31:06 [manager.py:283] +DEBUG 06-24 21:31:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:31:06 [manager.py:284] +INFO 06-24 21:31:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:31:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:31:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:31:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:31:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:31:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:32:07 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:32:07 [manager.py:283] +DEBUG 06-24 21:32:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:32:07 [manager.py:284] +INFO 06-24 21:32:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:32:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:32:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:32:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:32:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:32:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:33:08 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:33:08 [manager.py:283] +DEBUG 06-24 21:33:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:33:08 [manager.py:284] +INFO 06-24 21:33:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:33:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:33:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:33:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:34:08 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:34:08 [manager.py:283] +DEBUG 06-24 21:34:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:34:08 [manager.py:284] +INFO 06-24 21:34:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:34:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:34:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:34:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:34:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:34:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:35:09 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:35:09 [manager.py:283] +DEBUG 06-24 21:35:09 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:35:09 [manager.py:284] +INFO 06-24 21:35:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:35:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:35:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:35:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:35:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:35:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:36:10 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:36:10 [manager.py:283] +DEBUG 06-24 21:36:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:36:10 [manager.py:284] +INFO 06-24 21:36:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:36:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:36:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:36:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:36:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:36:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:37:11 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:37:11 [manager.py:283] +DEBUG 06-24 21:37:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:37:11 [manager.py:284] +INFO 06-24 21:37:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:37:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:37:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:37:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:37:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:37:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:38:11 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:38:11 [manager.py:283] +DEBUG 06-24 21:38:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:38:11 [manager.py:284] +INFO 06-24 21:38:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:38:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:38:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:38:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:38:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:38:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:39:12 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:39:12 [manager.py:283] +DEBUG 06-24 21:39:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:39:12 [manager.py:284] +INFO 06-24 21:39:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:39:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:39:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:39:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:39:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:39:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:40:13 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:40:13 [manager.py:283] +DEBUG 06-24 21:40:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:40:13 [manager.py:284] +INFO 06-24 21:40:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:40:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:40:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:40:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:41:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:41:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:41:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:41:13 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:41:13 [manager.py:283] +DEBUG 06-24 21:41:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:41:13 [manager.py:284] +INFO 06-24 21:41:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:41:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:41:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:42:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:42:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:42:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:42:14 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:42:14 [manager.py:283] +DEBUG 06-24 21:42:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:42:14 [manager.py:284] +INFO 06-24 21:42:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:43:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:43:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:43:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:43:15 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:43:15 [manager.py:283] +DEBUG 06-24 21:43:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:43:15 [manager.py:284] +INFO 06-24 21:43:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:43:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:43:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:44:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:44:15 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:44:15 [manager.py:283] +DEBUG 06-24 21:44:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:44:15 [manager.py:284] +INFO 06-24 21:44:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:44:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:44:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:45:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +DEBUG 06-24 21:45:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:45:16 [manager.py:283] +DEBUG 06-24 21:45:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:45:16 [manager.py:284] +INFO 06-24 21:45:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:45:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:45:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:46:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:46:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:46:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:46:16 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:46:16 [manager.py:283] +DEBUG 06-24 21:46:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:46:16 [manager.py:284] +INFO 06-24 21:46:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:47:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:47:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:47:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:47:17 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:47:17 [manager.py:283] +DEBUG 06-24 21:47:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:47:17 [manager.py:284] +INFO 06-24 21:47:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:47:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:47:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:48:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:48:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:48:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:48:18 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:48:18 [manager.py:283] +DEBUG 06-24 21:48:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:48:18 [manager.py:284] +INFO 06-24 21:48:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:48:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:48:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:49:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:49:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:49:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:49:19 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:49:19 [manager.py:283] +DEBUG 06-24 21:49:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:49:19 [manager.py:284] +INFO 06-24 21:49:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:49:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:49:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:50:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:50:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:50:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:50:19 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:50:19 [manager.py:283] +DEBUG 06-24 21:50:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:50:19 [manager.py:284] +INFO 06-24 21:50:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:51:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:51:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:51:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:51:20 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:51:20 [manager.py:283] +DEBUG 06-24 21:51:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:51:20 [manager.py:284] +INFO 06-24 21:51:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:51:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:51:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:52:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:52:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:52:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:52:20 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:52:20 [manager.py:283] +DEBUG 06-24 21:52:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:52:20 [manager.py:284] +INFO 06-24 21:52:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:52:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:52:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:53:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:53:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:53:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +DEBUG 06-24 21:53:21 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:53:21 [manager.py:283] +DEBUG 06-24 21:53:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:53:21 [manager.py:284] +INFO 06-24 21:53:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:53:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:53:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:54:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:54:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:54:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +ERROR 06-24 21:54:19 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:19 [pd_loop.py:121] no close frame received or sent +ERROR 06-24 21:54:19 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task +ERROR 06-24 21:54:19 [pd_loop.py:121] recv_bytes = await websocket.recv() +ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv +ERROR 06-24 21:54:19 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc +ERROR 06-24 21:54:19 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent +DEBUG 06-24 21:54:22 [manager.py:283] dp_i 0 frozen token num: 0 +DEBUG 06-24 21:54:22 [manager.py:283] +DEBUG 06-24 21:54:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 +DEBUG 06-24 21:54:22 [manager.py:284] +INFO 06-24 21:54:29 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:29 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:29 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:29 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:29 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:29 [pd_loop.py:121] return await self +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:29 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:29 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:29 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:54:39 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:39 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:39 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:39 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:39 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:39 [pd_loop.py:121] return await self +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:39 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:39 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:39 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:54:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} +INFO 06-24 21:54:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms +INFO 06-24 21:54:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms +INFO 06-24 21:54:49 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:49 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:49 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:49 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:49 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:49 [pd_loop.py:121] return await self +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:49 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:49 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:49 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:54:59 [pd_loop.py:126] reconnection to pd_master +ERROR 06-24 21:54:59 [pd_loop.py:120] connetion to pd_master has error +ERROR 06-24 21:54:59 [pd_loop.py:121] [Errno 111] Connection refused +ERROR 06-24 21:54:59 [pd_loop.py:121] Traceback (most recent call last): +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task +ERROR 06-24 21:54:59 [pd_loop.py:121] async with websockets.connect( +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ +ERROR 06-24 21:54:59 [pd_loop.py:121] return await self +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ +ERROR 06-24 21:54:59 [pd_loop.py:121] self.connection = await self.create_connection() +ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection +ERROR 06-24 21:54:59 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) +ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection +ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection +ERROR 06-24 21:54:59 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused +INFO 06-24 21:55:09 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... + +ERROR 06-24 21:55:09 [prefill_kv_move_manager.py:96] +Traceback (most recent call last): + File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_kv_move_manager.py", line 85, in task_dispatcher_loop + move_task: KVMoveTask = self.info_queue.get() + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/queues.py", line 103, in get + res = self._recv_bytes() + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 216, in recv_bytes + buf = self._recv_bytes(maxlength) + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes + buf = self._recv(4) + File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 379, in _recv + chunk = read(handle, remaining) +KeyboardInterrupt +INFO 06-24 21:55:09 [start_utils.py:106] Killing child process 1213613 +INFO 06-24 21:55:09 [start_utils.py:106] Killing child process 1214162 +INFO 06-24 21:55:09 [start_utils.py:108] Killing parent process 1213612 +INFO 06-24 21:55:09 [start_utils.py:53] Killing parent process 1211407 +INFO 06-24 21:55:09 [start_utils.py:51] Killing child process 1212910 +INFO 06-24 21:55:09 [start_utils.py:51] Killing child process 1213222 +INFO 06-24 21:55:09 [start_utils.py:53] Killing parent process 1211508 +INFO 06-24 21:55:09 [start_utils.py:53] Killing parent process 1211509 +INFO 06-24 21:55:09 [start_utils.py:69] All processes terminated gracefully. +INFO 06-24 21:55:09 [api_start.py:30] All processes have been forcefully terminated. diff --git a/server_d.sh b/server_d.sh deleted file mode 100644 index 9c408e104..000000000 --- a/server_d.sh +++ /dev/null @@ -1,13 +0,0 @@ -CUDA_VISIBLE_DEVICES=1 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ - --model_dir /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ - --run_mode "decode" \ - --host 127.0.1.1 \ - --port 8118 \ - --nccl_port 12322 \ - --tp 1 \ - --max_total_token_num 16392 \ - --graph_max_len_in_batch 2048 \ - --graph_max_batch_size 16 \ - --tokenizer_mode fast \ - --pd_master_ip 127.0.1.1 \ - --pd_master_port 60011 \ No newline at end of file diff --git a/server_master.sh b/server_master.sh deleted file mode 100644 index 1c2dacdea..000000000 --- a/server_master.sh +++ /dev/null @@ -1,6 +0,0 @@ -python3 -m lightllm.server.api_server \ - --model_dir /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ - --run_mode "pd_master" \ - --host 127.0.1.1 \ - --port 60011 \ - --pd_chunk_size 4096 \ No newline at end of file diff --git a/server_p.sh b/server_p.sh deleted file mode 100644 index f10c06f52..000000000 --- a/server_p.sh +++ /dev/null @@ -1,14 +0,0 @@ -CUDA_VISIBLE_DEVICES=0 KV_TRANS_USE_P2P=1 LOADWORKER=1 python3 -m lightllm.server.api_server \ - --model_dir /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ - --run_mode "prefill" \ - --host 127.0.1.1 \ - --port 8017 \ - --tp 1 \ - --nccl_port 2732 \ - --max_total_token_num 16392 \ - --tokenizer_mode fast \ - --pd_master_ip 127.0.1.1 \ - --pd_master_port 60011 \ - --max_req_total_len 16000 \ - --running_max_req_size 128 \ - --disable_cudagraph \ No newline at end of file diff --git a/test.sh b/test.sh deleted file mode 100644 index c16b3cc64..000000000 --- a/test.sh +++ /dev/null @@ -1,8 +0,0 @@ -python3 test/benchmark_client.py \ - --url http://127.0.1.1:60011/generate \ - --num_clients 1 \ - --tokenizer_path /mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B \ - --output_len 16384 \ - --server_api lightllm \ - --dump_file result.json \ - --seed 42 \ No newline at end of file From 2ee1c389ebc87319f65ace56616945dded694834 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Wed, 25 Jun 2025 11:22:21 +0800 Subject: [PATCH 09/17] fix: remove log --- pd_d.log | 2282 - pd_d_4096.log | 196230 ------------------------------------------ pd_master.log | 4529 - pd_master_4096.log | 15896 ---- pd_p.log | 1512 - pd_p_4096.log | 2843 - 6 files changed, 223292 deletions(-) delete mode 100644 pd_d.log delete mode 100644 pd_d_4096.log delete mode 100644 pd_master.log delete mode 100644 pd_master_4096.log delete mode 100644 pd_p.log delete mode 100644 pd_p_4096.log diff --git a/pd_d.log b/pd_d.log deleted file mode 100644 index 2268f088d..000000000 --- a/pd_d.log +++ /dev/null @@ -1,2282 +0,0 @@ -INFO 06-24 21:55:51 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:55:52 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:55:53 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:55:55 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:55:55 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:55:55 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:55:55 [api_start.py:79] zmq mode head: ipc:///tmp/_12322_0_ -INFO 06-24 21:55:55 [api_start.py:81] use tgi api: False -INFO 06-24 21:55:55 [api_start.py:192] alloced ports: [10011, 10239, 10144, 10176, 10271, 10117, 10125, 10205, 10126] -INFO 06-24 21:55:55 [api_start.py:233] all start args:Namespace(run_mode='decode', host='127.0.1.1', port=8118, httpserver_workers=1, zmq_mode='ipc:///tmp/_12322_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=12322, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=0, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=16, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=2048, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10011, detokenization_port=10239, detokenization_pub_port=10144, visual_port=10176, audio_port=10271, cache_port=10117, metric_port=10125, pd_node_infer_rpyc_ports=[10126], pd_node_id=148730891575017957868136796871489876076, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) -INFO 06-24 21:55:57 [start_utils.py:37] init func start_metric_manager : init ok -INFO 06-24 21:55:59 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:00 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:00 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:00 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:01 [__init__.py:239] Automatically detected platform cuda. -INFO 06-24 21:56:02 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:56:03 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:03 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:03 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:03 [manager.py:41] pub_to_httpserver sendhwm 1000 -INFO 06-24 21:56:04 [shm_req_manager.py:59] create lock shm 12322_0_req_shm_total -INFO 06-24 21:56:04 [atomic_array_lock.py:29] create lock shm 12322_0_array_reqs_lock -INFO 06-24 21:56:04 [atomic_lock.py:26] create lock shm 12322_0_shm_reqs_manager_lock -WARNING 06-24 21:56:04 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:04 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:04 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:04 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total -INFO 06-24 21:56:04 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock -INFO 06-24 21:56:04 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock -INFO 06-24 21:56:04 [shared_arr.py:17] create shm 12322_0_mem_manger_can_use_token_num_0 -INFO 06-24 21:56:04 [shared_arr.py:17] create shm 12322_0_shared_token_load -INFO 06-24 21:56:04 [shared_arr.py:17] create shm 12322_0_shared_token_load_ext_infos -INFO 06-24 21:56:04 [model_rpc.py:70] Initialized RPC server for rank 0. -INFO 06-24 21:56:04 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total -INFO 06-24 21:56:04 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock -INFO 06-24 21:56:04 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock -INFO 06-24 21:56:04 [model_rpc.py:184] use ContinuesBatchBackendForDecodeNode -INFO 06-24 21:56:06 [shared_arr.py:20] link shm 12322_0_shared_token_load -INFO 06-24 21:56:06 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos -INFO 06-24 21:56:06 [shared_arr.py:17] create shm 12322_0_dp_rank_0_lock_tp_infos -INFO 06-24 21:56:06 [basemodel.py:134] Initial quantization. The default quantization method is none -INFO 06-24 21:56:06 [mem_utils.py:11] mode setting params: [] -INFO 06-24 21:56:06 [mem_utils.py:25] Model kv cache using mode normal -INFO 06-24 21:56:06 [shared_arr.py:20] link shm 12322_0_mem_manger_can_use_token_num_0 -INFO 06-24 21:56:17 [cuda_graph.py:45] cuda graph batch_sizes: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] -INFO 06-24 21:56:17 [cuda_graph.py:187] Begin capture cudagraph, use the --disable_cudagraph to disable it. -INFO 06-24 21:56:17 [cache_tensor_manager.py:75] pid 1413335 cuda graph alloc graph out mem (16, 152064) torch.float32 2433024 2433024 -INFO 06-24 21:56:17 [cache_tensor_manager.py:77] cuda graph managed_total_tensor_bytes: 9732096 -INFO 06-24 21:56:22 [cuda_graph.py:232] Capture cudagraph success, batch_size <=16 and max_len_in_batch <= 2048 will infer with cudagraph. -INFO 06-24 21:56:22 [basemodel.py:652] begin check max_len infer -INFO 06-24 21:56:23 [basemodel.py:680] check max_len 8448 infer ok -INFO 06-24 21:56:23 [shared_arr.py:17] create shm 12322_0_refed_tokens_num_0 -INFO 06-24 21:56:23 [shared_arr.py:17] create shm 12322_0_tree_total_tokens_num_0 -INFO 06-24 21:56:23 [base_backend.py:135] loaded model class -INFO 06-24 21:56:23 [decode_impl.py:36] lock_nccl_group ranks 0 -INFO 06-24 21:56:23 [shared_arr.py:20] link shm 12322_0_refed_tokens_num_0 -INFO 06-24 21:56:23 [shared_arr.py:20] link shm 12322_0_tree_total_tokens_num_0 -INFO 06-24 21:56:23 [manager.py:196] use req queue QueueForPDDecode -INFO 06-24 21:56:25 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:26 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:27 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:56:29 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:29 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:29 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:30 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 21:56:30 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 21:56:30 [decode_kv_move_manager.py:59] rpyc connect to port: 10126 ok -INFO 06-24 21:56:30 [up_status.py:122] up_kv_status_process start -INFO 06-24 21:56:30 [decode_trans_process.py:145] decode trans kv process for device: 0 start! -INFO 06-24 21:56:31 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:32 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:32 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:32 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:34 [__init__.py:239] Automatically detected platform cuda. -INFO 06-24 21:56:34 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:56:35 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:35 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:35 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:36 [up_status.py:112] up kv manager start ok -INFO 06-24 21:56:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 21:56:36 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:36 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:36 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:36 [decode_infer_rpyc.py:171] put mem manager to info_queues ok -INFO 06-24 21:56:36 [decode_kv_move_manager.py:388] decode kv move manager process started -INFO 06-24 21:56:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:56:36 [start_utils.py:37] init func start_router_process : init ok -INFO 06-24 21:56:36 [start_utils.py:37] init func start_detokenization_process : init ok -INFO 06-24 21:56:36 [api_start.py:57] start process pid 1412821 -INFO 06-24 21:56:36 [api_start.py:58] http server pid 1415861 -INFO 06-24 21:56:39 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:56:40 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:41 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:56:43 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:43 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:43 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:43 [api_http.py:326] server start up -INFO 06-24 21:56:43 [atomic_array_lock.py:32] link lock shm 12322_0_lightllm_resource_lock -INFO 06-24 21:56:43 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total -INFO 06-24 21:56:43 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock -INFO 06-24 21:56:43 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock -INFO 06-24 21:56:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:56:43 [atomic_lock.py:29] link lock shm 12322_0_req_id_gen_lock -INFO 06-24 21:56:43 [shared_arr.py:20] link shm 12322_0_latest_success_infer_time_mark -INFO 06-24 21:56:43 [shared_arr.py:20] link shm 12322_0_shared_token_load -INFO 06-24 21:56:43 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos -INFO 06-24 21:56:43 [api_http.py:330] server start up ok, loop use is -INFO 06-24 21:56:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:56:44 [pd_loop.py:92] Sent registration JSON: {'node_id': 148730891575017957868136796871489876076, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10011, 'detokenization_port': 10239, 'detokenization_pub_port': 10144, 'visual_port': 10176, 'audio_port': 10271, 'cache_port': 10117, 'metric_port': 10125, 'pd_node_infer_rpyc_ports': [10126], 'pd_node_id': 148730891575017957868136796871489876076, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 21:56:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:56:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:56:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:56:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:57:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:57:13 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 21:57:13 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 21:57:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:18 [rpyc_fix_utils.py:107] set nodelay mode -INFO 06-24 21:57:18 [rpyc_fix_utils.py:113] change socket buffer from 2626560 131072 change to 4194304 -INFO 06-24 21:57:18 [decode_kv_move_manager.py:225] build trans infos 287595743282619216970276961428881885738 127.0.1.1 20000 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:18 [decode_trans_process.py:57] connect start PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=0, prefill_id=287595743282619216970276961428881885738, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') -INFO 06-24 21:57:18 [decode_trans_process.py:67] connect src_id 287595743282619216970276961428881885738 dest_id 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:18 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 -INFO 06-24 21:57:18 [pynccl.py:180] LightLLM is using nccl==2.21.5 -INFO 06-24 21:57:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:19 [decode_trans_process.py:85] PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=0, prefill_id=287595743282619216970276961428881885738, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') kv trans connected -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 8 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 16 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 24 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 8 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 16392 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 24 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 8 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 24 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 32 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 40 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 48 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 56 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 64 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 80 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 80 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -INFO 06-24 21:57:19 [decode_trans_obj.py:118] kv_move_loop get task id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_trans_process.py:34] trans start: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 72 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 80 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 88 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 88 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 88 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 96 request_data_transfer fail, server is busy -INFO 06-24 21:57:19 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:19 [decode_infer_rpyc.py:75] req_id: id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:19 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:19 [decode_kv_move_manager.py:273] req id 104 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 112 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 120 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 120 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 120 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 128 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_trans_process.py:43] trans finished: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc move len: 1055 -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 128 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 128 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 136 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_trans_process.py:45] trans cost time: 1.5387728214263916, id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_trans_obj.py:95] _transfer_kv ok id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 144 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 152 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:20 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 16 cost_time 0.02942347526550293 s -INFO 06-24 21:57:20 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 16 finished -INFO 06-24 21:57:20 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=16, dp_index=0, pd_master_node_id=0) -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [decode_infer_rpyc.py:75] req_id: id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 21:57:20 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:20 lightllm_req_id:16 -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 160 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [decode_kv_move_manager.py:273] req id 168 request_data_transfer fail, server is busy -INFO 06-24 21:57:20 [manager.py:224] router recive req id 16 cost time 0.08874297142028809 s -DEBUG 06-24 21:57:20 [manager.py:391] Prefill Batch: batch_id=287305880176870206574426824661545321596, time:1750773440.8170793s req_ids:[16] -DEBUG 06-24 21:57:20 [manager.py:391] -DEBUG 06-24 21:57:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 13.839 tokens/s -DEBUG 06-24 21:57:20 [stats.py:37] Avg prompt tokens throughput: 13.839 tokens/s -DEBUG 06-24 21:57:20 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 16 cost time 0.14529085159301758 s -INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 176 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 176 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15307 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15307 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 176 request_data_transfer fail, server is busy -INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 184 request_data_transfer fail, server is busy -INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15306 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15306 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 192 request_data_transfer fail, server is busy -INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 200 request_data_transfer fail, server is busy -INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15306 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 208 request_data_transfer fail, server is busy -INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:21 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15305 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:21 [decode_infer_rpyc.py:75] req_id: id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager can alloc token num 15305 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:21 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 216 request_data_transfer fail, server is busy -INFO 06-24 21:57:21 [decode_kv_move_manager.py:273] req id 224 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 token used ratio: 0.06710590531966813 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:22 [manager.py:248] dp_i 0 token used ratio: 0.06710590531966813 contain prompt cache tree unrefed token -INFO 06-24 21:57:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 232 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 232 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 232 request_data_transfer fail, server is busy -INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 240 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 240 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 240 request_data_transfer fail, server is busy -INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 256 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 256 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15264 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 248 request_data_transfer fail, server is busy -INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 256 request_data_transfer fail, server is busy -INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 264 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 272 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 264 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15263 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 272 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15263 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 264 request_data_transfer fail, server is busy -INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 272 request_data_transfer fail, server is busy -INFO 06-24 21:57:22 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 280 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:22 [decode_infer_rpyc.py:75] req_id: id: 280 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager can alloc token num 15263 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:22 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:22 [decode_kv_move_manager.py:273] req id 280 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 token used ratio: 0.0701561737432894 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:23 [manager.py:248] dp_i 0 token used ratio: 0.0701561737432894 contain prompt cache tree unrefed token -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 288 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 288 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 288 request_data_transfer fail, server is busy -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 296 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 304 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 296 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 304 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 296 request_data_transfer fail, server is busy -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 304 request_data_transfer fail, server is busy -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 320 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 320 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15215 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 312 request_data_transfer fail, server is busy -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 320 request_data_transfer fail, server is busy -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15214 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15214 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 328 request_data_transfer fail, server is busy -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 336 request_data_transfer fail, server is busy -INFO 06-24 21:57:23 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:23 [decode_infer_rpyc.py:75] req_id: id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager can alloc token num 15214 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:23 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:23 [decode_kv_move_manager.py:273] req id 344 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 token used ratio: 0.07320644216691069 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:24 [manager.py:248] dp_i 0 token used ratio: 0.07320644216691069 contain prompt cache tree unrefed token -INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 352 request_data_transfer fail, server is busy -INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 360 request_data_transfer fail, server is busy -INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 368 request_data_transfer fail, server is busy -INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15173 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 376 request_data_transfer fail, server is busy -INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 384 request_data_transfer fail, server is busy -INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:24 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 400 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15172 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:24 [decode_infer_rpyc.py:75] req_id: id: 400 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager can alloc token num 15172 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:24 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 392 request_data_transfer fail, server is busy -INFO 06-24 21:57:24 [decode_kv_move_manager.py:273] req id 400 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.07625671059053196 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.07625671059053196 contain prompt cache tree unrefed token -INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 408 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 408 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15131 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 408 request_data_transfer fail, server is busy -INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 416 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 424 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 416 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15131 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 424 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15131 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 416 request_data_transfer fail, server is busy -INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 424 request_data_transfer fail, server is busy -INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 432 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 440 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 432 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 440 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 432 request_data_transfer fail, server is busy -INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 440 request_data_transfer fail, server is busy -INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:25 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 456 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:25 [decode_infer_rpyc.py:75] req_id: id: 456 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager can alloc token num 15130 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:25 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 448 request_data_transfer fail, server is busy -INFO 06-24 21:57:25 [decode_kv_move_manager.py:273] req id 456 request_data_transfer fail, server is busy -INFO 06-24 21:57:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 token used ratio: 0.07930697901415325 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:26 [manager.py:248] dp_i 0 token used ratio: 0.07930697901415325 contain prompt cache tree unrefed token -INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 464 request_data_transfer fail, server is busy -INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 472 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 488 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 472 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 488 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15089 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 472 request_data_transfer fail, server is busy -INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 480 request_data_transfer fail, server is busy -INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 488 request_data_transfer fail, server is busy -INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15088 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15088 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 496 request_data_transfer fail, server is busy -INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 504 request_data_transfer fail, server is busy -INFO 06-24 21:57:26 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 512 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:26 [decode_infer_rpyc.py:75] req_id: id: 512 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager can alloc token num 15088 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:26 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:26 [decode_kv_move_manager.py:273] req id 512 request_data_transfer fail, server is busy -INFO 06-24 21:57:27 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 520 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:27 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 528 in_len:1045 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 token used ratio: 0.08235724743777452 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:27 [manager.py:248] dp_i 0 token used ratio: 0.08235724743777452 contain prompt cache tree unrefed token -INFO 06-24 21:57:27 [decode_infer_rpyc.py:75] req_id: id: 520 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:27 [decode_infer_rpyc.py:75] req_id: id: 528 in_len:1045 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:27 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:27 [decode_kv_move_manager.py:273] req id 520 request_data_transfer fail, server is busy -INFO 06-24 21:57:27 [decode_kv_move_manager.py:273] req id 528 request_data_transfer fail, server is busy -INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 536 request_data_transfer fail, server is busy -INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 544 request_data_transfer fail, server is busy -INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 552 request_data_transfer fail, server is busy -INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15041 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 560 request_data_transfer fail, server is busy -INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 568 request_data_transfer fail, server is busy -INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 15040 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 576 request_data_transfer fail, server is busy -INFO 06-24 21:57:28 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:28 [decode_infer_rpyc.py:75] req_id: id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:28 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:28 [decode_kv_move_manager.py:273] req id 584 request_data_transfer fail, server is busy -INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 592 request_data_transfer fail, server is busy -INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 600 request_data_transfer fail, server is busy -INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14999 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 608 request_data_transfer fail, server is busy -INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 616 request_data_transfer fail, server is busy -INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14998 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14998 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 624 request_data_transfer fail, server is busy -INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 632 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 token used ratio: 0.0854075158613958 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:29 [manager.py:248] dp_i 0 token used ratio: 0.0854075158613958 contain prompt cache tree unrefed token -INFO 06-24 21:57:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:29 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:29 [decode_infer_rpyc.py:75] req_id: id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager can alloc token num 14958 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:29 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:29 [decode_kv_move_manager.py:273] req id 640 request_data_transfer fail, server is busy -INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14958 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 648 request_data_transfer fail, server is busy -INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14957 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 656 request_data_transfer fail, server is busy -INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 664 in_len:1043 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 664 in_len:1043 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14956 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 664 request_data_transfer fail, server is busy -INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 672 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 672 in_len:1062 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14956 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 672 request_data_transfer fail, server is busy -INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14956 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 680 request_data_transfer fail, server is busy -INFO 06-24 21:57:30 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:30 [decode_infer_rpyc.py:75] req_id: id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager can alloc token num 14955 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:30 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:30 [decode_kv_move_manager.py:273] req id 688 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 token used ratio: 0.08845778428501708 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:30 [manager.py:248] dp_i 0 token used ratio: 0.08845778428501708 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 41.301 tokens/s -DEBUG 06-24 21:57:30 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:57:30 [stats.py:37] Avg generate tokens throughput: 41.301 tokens/s -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14910 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 696 request_data_transfer fail, server is busy -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 704 request_data_transfer fail, server is busy -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 712 request_data_transfer fail, server is busy -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14909 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 720 request_data_transfer fail, server is busy -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 728 request_data_transfer fail, server is busy -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14908 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14908 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 736 request_data_transfer fail, server is busy -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 744 request_data_transfer fail, server is busy -INFO 06-24 21:57:31 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 752 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:31 [decode_infer_rpyc.py:75] req_id: id: 752 in_len:1053 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager can alloc token num 14908 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:31 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:31 [decode_kv_move_manager.py:273] req id 752 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 token used ratio: 0.09150805270863836 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:31 [manager.py:248] dp_i 0 token used ratio: 0.09150805270863836 contain prompt cache tree unrefed token -INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 760 request_data_transfer fail, server is busy -INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 768 request_data_transfer fail, server is busy -INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 776 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 784 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 776 in_len:1057 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 784 in_len:1051 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14872 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 776 request_data_transfer fail, server is busy -INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 784 request_data_transfer fail, server is busy -INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:32 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14871 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:32 [decode_infer_rpyc.py:75] req_id: id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager can alloc token num 14871 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:57:32 [decode_infer_rpyc.py:80] -INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 792 request_data_transfer fail, server is busy -INFO 06-24 21:57:32 [decode_kv_move_manager.py:273] req id 800 request_data_transfer fail, server is busy -DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 token used ratio: 0.09455832113225963 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:32 [manager.py:248] dp_i 0 token used ratio: 0.09455832113225963 contain prompt cache tree unrefed token -INFO 06-24 21:57:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 token used ratio: 0.09760858955588092 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:34 [manager.py:248] dp_i 0 token used ratio: 0.09760858955588092 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 token used ratio: 0.10065885797950219 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:35 [manager.py:248] dp_i 0 token used ratio: 0.10065885797950219 contain prompt cache tree unrefed token -INFO 06-24 21:57:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:57:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 token used ratio: 0.10370912640312348 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:36 [manager.py:248] dp_i 0 token used ratio: 0.10370912640312348 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 token used ratio: 0.10675939482674475 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:37 [manager.py:248] dp_i 0 token used ratio: 0.10675939482674475 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 token used ratio: 0.10980966325036604 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:38 [manager.py:248] dp_i 0 token used ratio: 0.10980966325036604 contain prompt cache tree unrefed token -INFO 06-24 21:57:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 token used ratio: 0.11285993167398731 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:40 [manager.py:248] dp_i 0 token used ratio: 0.11285993167398731 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 40.475 tokens/s -DEBUG 06-24 21:57:40 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:57:40 [stats.py:37] Avg generate tokens throughput: 40.475 tokens/s -DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 token used ratio: 0.11591020009760859 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:41 [manager.py:248] dp_i 0 token used ratio: 0.11591020009760859 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 token used ratio: 0.11896046852122986 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:42 [manager.py:248] dp_i 0 token used ratio: 0.11896046852122986 contain prompt cache tree unrefed token -INFO 06-24 21:57:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:57:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:57:43 [statics_utils.py:24] mean first cost: 290.2035713195801 ms -INFO 06-24 21:57:43 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 token used ratio: 0.12201073694485115 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:44 [manager.py:248] dp_i 0 token used ratio: 0.12201073694485115 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 token used ratio: 0.12506100536847242 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:45 [manager.py:248] dp_i 0 token used ratio: 0.12506100536847242 contain prompt cache tree unrefed token -INFO 06-24 21:57:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 token used ratio: 0.1281112737920937 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:47 [manager.py:248] dp_i 0 token used ratio: 0.1281112737920937 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 token used ratio: 0.131161542215715 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:50 [manager.py:248] dp_i 0 token used ratio: 0.131161542215715 contain prompt cache tree unrefed token -INFO 06-24 21:57:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 29.473 tokens/s -DEBUG 06-24 21:57:50 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:57:50 [stats.py:37] Avg generate tokens throughput: 29.473 tokens/s -DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 token used ratio: 0.13421181063933627 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:52 [manager.py:248] dp_i 0 token used ratio: 0.13421181063933627 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 token used ratio: 0.13726207906295754 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:53 [manager.py:248] dp_i 0 token used ratio: 0.13726207906295754 contain prompt cache tree unrefed token -INFO 06-24 21:57:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 token used ratio: 0.1403123474865788 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:55 [manager.py:248] dp_i 0 token used ratio: 0.1403123474865788 contain prompt cache tree unrefed token -INFO 06-24 21:57:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 token used ratio: 0.1433626159102001 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:57 [manager.py:248] dp_i 0 token used ratio: 0.1433626159102001 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 token used ratio: 0.14641288433382138 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:59 [manager.py:248] dp_i 0 token used ratio: 0.14641288433382138 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.256 tokens/s -DEBUG 06-24 21:58:00 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:58:00 [stats.py:37] Avg generate tokens throughput: 26.256 tokens/s -INFO 06-24 21:58:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 token used ratio: 0.14946315275744265 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:01 [manager.py:248] dp_i 0 token used ratio: 0.14946315275744265 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 token used ratio: 0.15251342118106392 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:03 [manager.py:248] dp_i 0 token used ratio: 0.15251342118106392 contain prompt cache tree unrefed token -INFO 06-24 21:58:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 token used ratio: 0.15556368960468522 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:05 [manager.py:248] dp_i 0 token used ratio: 0.15556368960468522 contain prompt cache tree unrefed token -INFO 06-24 21:58:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 token used ratio: 0.1586139580283065 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:07 [manager.py:248] dp_i 0 token used ratio: 0.1586139580283065 contain prompt cache tree unrefed token -INFO 06-24 21:58:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 token used ratio: 0.16166422645192777 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:09 [manager.py:248] dp_i 0 token used ratio: 0.16166422645192777 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.558 tokens/s -DEBUG 06-24 21:58:10 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:58:10 [stats.py:37] Avg generate tokens throughput: 25.558 tokens/s -DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 token used ratio: 0.16471449487554904 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:11 [manager.py:248] dp_i 0 token used ratio: 0.16471449487554904 contain prompt cache tree unrefed token -INFO 06-24 21:58:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 token used ratio: 0.16776476329917034 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:13 [manager.py:248] dp_i 0 token used ratio: 0.16776476329917034 contain prompt cache tree unrefed token -INFO 06-24 21:58:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:58:13 [statics_utils.py:24] mean first cost: 290.2035713195801 ms -INFO 06-24 21:58:13 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 21:58:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 token used ratio: 0.1708150317227916 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:15 [manager.py:248] dp_i 0 token used ratio: 0.1708150317227916 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 token used ratio: 0.17386530014641288 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:17 [manager.py:248] dp_i 0 token used ratio: 0.17386530014641288 contain prompt cache tree unrefed token -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 808 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 808 in_len:1058 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13513 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] -INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 808 request_data_transfer fail, server is busy -INFO 06-24 21:58:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 816 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 816 in_len:1046 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13497 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] -INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13497 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] -INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 816 request_data_transfer fail, server is busy -INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 824 request_data_transfer fail, server is busy -INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 832 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 832 in_len:1056 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13497 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] -INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 832 request_data_transfer fail, server is busy -INFO 06-24 21:58:18 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc, type -INFO 06-24 21:58:18 [decode_infer_rpyc.py:75] req_id: id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc alloc token failed -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager can alloc token num 13496 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] estimated peak token num 15623 -DEBUG 06-24 21:58:18 [decode_infer_rpyc.py:80] -INFO 06-24 21:58:18 [decode_kv_move_manager.py:273] req id 840 request_data_transfer fail, server is busy -DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 token used ratio: 0.17691556857003415 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:19 [manager.py:248] dp_i 0 token used ratio: 0.17691556857003415 contain prompt cache tree unrefed token -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:58:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.113 tokens/s -DEBUG 06-24 21:58:20 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:58:20 [stats.py:37] Avg generate tokens throughput: 26.113 tokens/s -DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 token used ratio: 0.17996583699365545 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:20 [manager.py:248] dp_i 0 token used ratio: 0.17996583699365545 contain prompt cache tree unrefed token -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -INFO 06-24 21:58:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 token used ratio: 0.18301610541727673 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:22 [manager.py:248] dp_i 0 token used ratio: 0.18301610541727673 contain prompt cache tree unrefed token -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 token used ratio: 0.186066373840898 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:24 [manager.py:248] dp_i 0 token used ratio: 0.186066373840898 contain prompt cache tree unrefed token -INFO 06-24 21:58:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 token used ratio: 0.18911664226451927 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:26 [manager.py:248] dp_i 0 token used ratio: 0.18911664226451927 contain prompt cache tree unrefed token -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 token used ratio: 0.19216691068814057 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:28 [manager.py:248] dp_i 0 token used ratio: 0.19216691068814057 contain prompt cache tree unrefed token -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -INFO 06-24 21:58:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:58:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 24.824 tokens/s -DEBUG 06-24 21:58:30 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:58:30 [stats.py:37] Avg generate tokens throughput: 24.824 tokens/s -DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 token used ratio: 0.19521717911176184 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:31 [manager.py:248] dp_i 0 token used ratio: 0.19521717911176184 contain prompt cache tree unrefed token -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -INFO 06-24 21:58:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 token used ratio: 0.1982674475353831 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:33 [manager.py:248] dp_i 0 token used ratio: 0.1982674475353831 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 token used ratio: 0.20131771595900438 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:35 [manager.py:248] dp_i 0 token used ratio: 0.20131771595900438 contain prompt cache tree unrefed token -INFO 06-24 21:58:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:58:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 token used ratio: 0.20436798438262568 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:38 [manager.py:248] dp_i 0 token used ratio: 0.20436798438262568 contain prompt cache tree unrefed token -INFO 06-24 21:58:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 token used ratio: 0.20741825280624696 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:39 [manager.py:248] dp_i 0 token used ratio: 0.20741825280624696 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 22.839 tokens/s -DEBUG 06-24 21:58:40 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:58:40 [stats.py:37] Avg generate tokens throughput: 22.839 tokens/s -DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 token used ratio: 0.21046852122986823 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:41 [manager.py:248] dp_i 0 token used ratio: 0.21046852122986823 contain prompt cache tree unrefed token -INFO 06-24 21:58:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 token used ratio: 0.2135187896534895 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:43 [manager.py:248] dp_i 0 token used ratio: 0.2135187896534895 contain prompt cache tree unrefed token -INFO 06-24 21:58:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:58:44 [statics_utils.py:24] mean first cost: 290.2035713195801 ms -INFO 06-24 21:58:44 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 21:58:44 [manager.py:620] left req id 16can release False refcount 4 -DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 token used ratio: 0.2165690580771108 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:45 [manager.py:248] dp_i 0 token used ratio: 0.2165690580771108 contain prompt cache tree unrefed token -INFO 06-24 21:58:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 token used ratio: 0.21961932650073207 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:47 [manager.py:248] dp_i 0 token used ratio: 0.21961932650073207 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 token used ratio: 0.22266959492435334 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:49 [manager.py:248] dp_i 0 token used ratio: 0.22266959492435334 contain prompt cache tree unrefed token -INFO 06-24 21:58:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.730 tokens/s -DEBUG 06-24 21:58:51 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:58:51 [stats.py:37] Avg generate tokens throughput: 26.730 tokens/s -DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 token used ratio: 0.22571986334797461 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:51 [manager.py:248] dp_i 0 token used ratio: 0.22571986334797461 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 token used ratio: 0.2287701317715959 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:53 [manager.py:248] dp_i 0 token used ratio: 0.2287701317715959 contain prompt cache tree unrefed token -INFO 06-24 21:58:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 token used ratio: 0.23182040019521719 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:55 [manager.py:248] dp_i 0 token used ratio: 0.23182040019521719 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 token used ratio: 0.23487066861883846 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:56 [manager.py:248] dp_i 0 token used ratio: 0.23487066861883846 contain prompt cache tree unrefed token -INFO 06-24 21:58:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 token used ratio: 0.23792093704245973 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:58 [manager.py:248] dp_i 0 token used ratio: 0.23792093704245973 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 token used ratio: 0.24097120546608103 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:00 [manager.py:248] dp_i 0 token used ratio: 0.24097120546608103 contain prompt cache tree unrefed token -INFO 06-24 21:59:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.807 tokens/s -DEBUG 06-24 21:59:01 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:59:01 [stats.py:37] Avg generate tokens throughput: 26.807 tokens/s -DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 token used ratio: 0.2440214738897023 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:02 [manager.py:248] dp_i 0 token used ratio: 0.2440214738897023 contain prompt cache tree unrefed token -INFO 06-24 21:59:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 token used ratio: 0.24707174231332357 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:04 [manager.py:248] dp_i 0 token used ratio: 0.24707174231332357 contain prompt cache tree unrefed token -INFO 06-24 21:59:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 token used ratio: 0.25012201073694484 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:06 [manager.py:248] dp_i 0 token used ratio: 0.25012201073694484 contain prompt cache tree unrefed token -INFO 06-24 21:59:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 token used ratio: 0.2531722791605661 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:08 [manager.py:248] dp_i 0 token used ratio: 0.2531722791605661 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 token used ratio: 0.2562225475841874 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:10 [manager.py:248] dp_i 0 token used ratio: 0.2562225475841874 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.359 tokens/s -DEBUG 06-24 21:59:11 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:59:11 [stats.py:37] Avg generate tokens throughput: 25.359 tokens/s -INFO 06-24 21:59:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 token used ratio: 0.25927281600780866 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:12 [manager.py:248] dp_i 0 token used ratio: 0.25927281600780866 contain prompt cache tree unrefed token -INFO 06-24 21:59:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:59:14 [statics_utils.py:24] mean first cost: 290.2035713195801 ms -INFO 06-24 21:59:14 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 token used ratio: 0.26232308443143 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:14 [manager.py:248] dp_i 0 token used ratio: 0.26232308443143 contain prompt cache tree unrefed token -INFO 06-24 21:59:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 token used ratio: 0.26537335285505126 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:16 [manager.py:248] dp_i 0 token used ratio: 0.26537335285505126 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 token used ratio: 0.26842362127867253 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:18 [manager.py:248] dp_i 0 token used ratio: 0.26842362127867253 contain prompt cache tree unrefed token -INFO 06-24 21:59:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 token used ratio: 0.2714738897022938 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:19 [manager.py:248] dp_i 0 token used ratio: 0.2714738897022938 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.152 tokens/s -DEBUG 06-24 21:59:21 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:59:21 [stats.py:37] Avg generate tokens throughput: 26.152 tokens/s -INFO 06-24 21:59:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 token used ratio: 0.2745241581259151 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:21 [manager.py:248] dp_i 0 token used ratio: 0.2745241581259151 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 token used ratio: 0.27757442654953635 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:23 [manager.py:248] dp_i 0 token used ratio: 0.27757442654953635 contain prompt cache tree unrefed token -INFO 06-24 21:59:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 token used ratio: 0.2806246949731576 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:25 [manager.py:248] dp_i 0 token used ratio: 0.2806246949731576 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 token used ratio: 0.2836749633967789 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:27 [manager.py:248] dp_i 0 token used ratio: 0.2836749633967789 contain prompt cache tree unrefed token -INFO 06-24 21:59:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 token used ratio: 0.2867252318204002 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:29 [manager.py:248] dp_i 0 token used ratio: 0.2867252318204002 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.161 tokens/s -DEBUG 06-24 21:59:31 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:59:31 [stats.py:37] Avg generate tokens throughput: 25.161 tokens/s -DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 token used ratio: 0.2897755002440215 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:31 [manager.py:248] dp_i 0 token used ratio: 0.2897755002440215 contain prompt cache tree unrefed token -INFO 06-24 21:59:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 token used ratio: 0.29282576866764276 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:33 [manager.py:248] dp_i 0 token used ratio: 0.29282576866764276 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 token used ratio: 0.29587603709126403 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:35 [manager.py:248] dp_i 0 token used ratio: 0.29587603709126403 contain prompt cache tree unrefed token -INFO 06-24 21:59:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:59:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 token used ratio: 0.2989263055148853 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:37 [manager.py:248] dp_i 0 token used ratio: 0.2989263055148853 contain prompt cache tree unrefed token -INFO 06-24 21:59:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 token used ratio: 0.3019765739385066 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:39 [manager.py:248] dp_i 0 token used ratio: 0.3019765739385066 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.809 tokens/s -DEBUG 06-24 21:59:41 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:59:41 [stats.py:37] Avg generate tokens throughput: 25.809 tokens/s -DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 token used ratio: 0.30502684236212785 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:41 [manager.py:248] dp_i 0 token used ratio: 0.30502684236212785 contain prompt cache tree unrefed token -INFO 06-24 21:59:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 token used ratio: 0.3080771107857491 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:43 [manager.py:248] dp_i 0 token used ratio: 0.3080771107857491 contain prompt cache tree unrefed token -INFO 06-24 21:59:44 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:59:44 [statics_utils.py:24] mean first cost: 290.2035713195801 ms -INFO 06-24 21:59:44 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 token used ratio: 0.31112737920937045 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:45 [manager.py:248] dp_i 0 token used ratio: 0.31112737920937045 contain prompt cache tree unrefed token -INFO 06-24 21:59:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 token used ratio: 0.3141776476329917 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:47 [manager.py:248] dp_i 0 token used ratio: 0.3141776476329917 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 token used ratio: 0.317227916056613 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:48 [manager.py:248] dp_i 0 token used ratio: 0.317227916056613 contain prompt cache tree unrefed token -INFO 06-24 21:59:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 token used ratio: 0.32027818448023426 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:50 [manager.py:248] dp_i 0 token used ratio: 0.32027818448023426 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.841 tokens/s -DEBUG 06-24 21:59:51 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 21:59:51 [stats.py:37] Avg generate tokens throughput: 26.841 tokens/s -DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 token used ratio: 0.32332845290385553 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:52 [manager.py:248] dp_i 0 token used ratio: 0.32332845290385553 contain prompt cache tree unrefed token -INFO 06-24 21:59:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 token used ratio: 0.3263787213274768 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:54 [manager.py:248] dp_i 0 token used ratio: 0.3263787213274768 contain prompt cache tree unrefed token -DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 token used ratio: 0.3294289897510981 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:56 [manager.py:248] dp_i 0 token used ratio: 0.3294289897510981 contain prompt cache tree unrefed token -INFO 06-24 21:59:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 token used ratio: 0.33247925817471935 not contain prompt cache tree unrefed token -DEBUG 06-24 21:59:58 [manager.py:248] dp_i 0 token used ratio: 0.33247925817471935 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 token used ratio: 0.3355295265983407 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:00 [manager.py:248] dp_i 0 token used ratio: 0.3355295265983407 contain prompt cache tree unrefed token -INFO 06-24 22:00:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 27.086 tokens/s -DEBUG 06-24 22:00:01 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:00:01 [stats.py:37] Avg generate tokens throughput: 27.086 tokens/s -DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 token used ratio: 0.33857979502196195 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:01 [manager.py:248] dp_i 0 token used ratio: 0.33857979502196195 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 token used ratio: 0.3416300634455832 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:03 [manager.py:248] dp_i 0 token used ratio: 0.3416300634455832 contain prompt cache tree unrefed token -INFO 06-24 22:00:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 token used ratio: 0.3446803318692045 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:05 [manager.py:248] dp_i 0 token used ratio: 0.3446803318692045 contain prompt cache tree unrefed token -INFO 06-24 22:00:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:00:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 token used ratio: 0.34773060029282576 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:07 [manager.py:248] dp_i 0 token used ratio: 0.34773060029282576 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 token used ratio: 0.35078086871644704 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:09 [manager.py:248] dp_i 0 token used ratio: 0.35078086871644704 contain prompt cache tree unrefed token -INFO 06-24 22:00:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 token used ratio: 0.3538311371400683 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:11 [manager.py:248] dp_i 0 token used ratio: 0.3538311371400683 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 27.118 tokens/s -DEBUG 06-24 22:00:11 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:00:11 [stats.py:37] Avg generate tokens throughput: 27.118 tokens/s -DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 token used ratio: 0.3568814055636896 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:12 [manager.py:248] dp_i 0 token used ratio: 0.3568814055636896 contain prompt cache tree unrefed token -INFO 06-24 22:00:14 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:00:14 [statics_utils.py:24] mean first cost: 290.2035713195801 ms -INFO 06-24 22:00:14 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 22:00:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 token used ratio: 0.3599316739873109 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:14 [manager.py:248] dp_i 0 token used ratio: 0.3599316739873109 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 token used ratio: 0.3629819424109322 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:16 [manager.py:248] dp_i 0 token used ratio: 0.3629819424109322 contain prompt cache tree unrefed token -INFO 06-24 22:00:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 token used ratio: 0.36603221083455345 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:18 [manager.py:248] dp_i 0 token used ratio: 0.36603221083455345 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 token used ratio: 0.3690824792581747 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:20 [manager.py:248] dp_i 0 token used ratio: 0.3690824792581747 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.744 tokens/s -DEBUG 06-24 22:00:21 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:00:21 [stats.py:37] Avg generate tokens throughput: 26.744 tokens/s -INFO 06-24 22:00:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 token used ratio: 0.372132747681796 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:22 [manager.py:248] dp_i 0 token used ratio: 0.372132747681796 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 token used ratio: 0.37518301610541727 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:24 [manager.py:248] dp_i 0 token used ratio: 0.37518301610541727 contain prompt cache tree unrefed token -INFO 06-24 22:00:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 token used ratio: 0.37823328452903854 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:26 [manager.py:248] dp_i 0 token used ratio: 0.37823328452903854 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 token used ratio: 0.3812835529526598 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:28 [manager.py:248] dp_i 0 token used ratio: 0.3812835529526598 contain prompt cache tree unrefed token -INFO 06-24 22:00:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 token used ratio: 0.38433382137628114 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:29 [manager.py:248] dp_i 0 token used ratio: 0.38433382137628114 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.338 tokens/s -DEBUG 06-24 22:00:31 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:00:31 [stats.py:37] Avg generate tokens throughput: 26.338 tokens/s -INFO 06-24 22:00:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 token used ratio: 0.3873840897999024 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:31 [manager.py:248] dp_i 0 token used ratio: 0.3873840897999024 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 token used ratio: 0.3904343582235237 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:33 [manager.py:248] dp_i 0 token used ratio: 0.3904343582235237 contain prompt cache tree unrefed token -INFO 06-24 22:00:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 token used ratio: 0.39348462664714495 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:35 [manager.py:248] dp_i 0 token used ratio: 0.39348462664714495 contain prompt cache tree unrefed token -INFO 06-24 22:00:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 token used ratio: 0.3965348950707662 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:37 [manager.py:248] dp_i 0 token used ratio: 0.3965348950707662 contain prompt cache tree unrefed token -INFO 06-24 22:00:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 token used ratio: 0.3995851634943875 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:39 [manager.py:248] dp_i 0 token used ratio: 0.3995851634943875 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.030 tokens/s -DEBUG 06-24 22:00:41 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:00:41 [stats.py:37] Avg generate tokens throughput: 26.030 tokens/s -DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 token used ratio: 0.40263543191800877 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:41 [manager.py:248] dp_i 0 token used ratio: 0.40263543191800877 contain prompt cache tree unrefed token -INFO 06-24 22:00:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 token used ratio: 0.40568570034163004 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:43 [manager.py:248] dp_i 0 token used ratio: 0.40568570034163004 contain prompt cache tree unrefed token -INFO 06-24 22:00:44 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:00:44 [manager.py:620] left req id 16can release False refcount 4 -DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 token used ratio: 0.40873596876525137 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:45 [manager.py:248] dp_i 0 token used ratio: 0.40873596876525137 contain prompt cache tree unrefed token -INFO 06-24 22:00:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 token used ratio: 0.41178623718887264 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:46 [manager.py:248] dp_i 0 token used ratio: 0.41178623718887264 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 token used ratio: 0.4148365056124939 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:48 [manager.py:248] dp_i 0 token used ratio: 0.4148365056124939 contain prompt cache tree unrefed token -INFO 06-24 22:00:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 token used ratio: 0.4178867740361152 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:50 [manager.py:248] dp_i 0 token used ratio: 0.4178867740361152 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.794 tokens/s -DEBUG 06-24 22:00:51 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:00:51 [stats.py:37] Avg generate tokens throughput: 26.794 tokens/s -DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 token used ratio: 0.42093704245973645 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:52 [manager.py:248] dp_i 0 token used ratio: 0.42093704245973645 contain prompt cache tree unrefed token -INFO 06-24 22:00:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 token used ratio: 0.4239873108833577 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:54 [manager.py:248] dp_i 0 token used ratio: 0.4239873108833577 contain prompt cache tree unrefed token -DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 token used ratio: 0.427037579306979 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:56 [manager.py:248] dp_i 0 token used ratio: 0.427037579306979 contain prompt cache tree unrefed token -INFO 06-24 22:00:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 token used ratio: 0.43008784773060027 not contain prompt cache tree unrefed token -DEBUG 06-24 22:00:58 [manager.py:248] dp_i 0 token used ratio: 0.43008784773060027 contain prompt cache tree unrefed token -INFO 06-24 22:00:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 token used ratio: 0.4331381161542216 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:00 [manager.py:248] dp_i 0 token used ratio: 0.4331381161542216 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.958 tokens/s -DEBUG 06-24 22:01:01 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:01:01 [stats.py:37] Avg generate tokens throughput: 26.958 tokens/s -DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 token used ratio: 0.43618838457784287 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:01 [manager.py:248] dp_i 0 token used ratio: 0.43618838457784287 contain prompt cache tree unrefed token -INFO 06-24 22:01:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 token used ratio: 0.43923865300146414 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:03 [manager.py:248] dp_i 0 token used ratio: 0.43923865300146414 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 token used ratio: 0.4422889214250854 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:05 [manager.py:248] dp_i 0 token used ratio: 0.4422889214250854 contain prompt cache tree unrefed token -INFO 06-24 22:01:06 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:01:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 token used ratio: 0.4453391898487067 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:07 [manager.py:248] dp_i 0 token used ratio: 0.4453391898487067 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 token used ratio: 0.44838945827232796 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:09 [manager.py:248] dp_i 0 token used ratio: 0.44838945827232796 contain prompt cache tree unrefed token -INFO 06-24 22:01:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 token used ratio: 0.45143972669594923 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:11 [manager.py:248] dp_i 0 token used ratio: 0.45143972669594923 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.708 tokens/s -DEBUG 06-24 22:01:11 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:01:11 [stats.py:37] Avg generate tokens throughput: 26.708 tokens/s -DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 token used ratio: 0.4544899951195705 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:13 [manager.py:248] dp_i 0 token used ratio: 0.4544899951195705 contain prompt cache tree unrefed token -INFO 06-24 22:01:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 22:01:14 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:01:14 [statics_utils.py:24] mean first cost: 290.2035713195801 ms -INFO 06-24 22:01:14 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 token used ratio: 0.4575402635431918 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:14 [manager.py:248] dp_i 0 token used ratio: 0.4575402635431918 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 token used ratio: 0.4605905319668131 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:16 [manager.py:248] dp_i 0 token used ratio: 0.4605905319668131 contain prompt cache tree unrefed token -INFO 06-24 22:01:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 token used ratio: 0.46364080039043437 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:18 [manager.py:248] dp_i 0 token used ratio: 0.46364080039043437 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 token used ratio: 0.46669106881405564 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:20 [manager.py:248] dp_i 0 token used ratio: 0.46669106881405564 contain prompt cache tree unrefed token -INFO 06-24 22:01:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.467 tokens/s -DEBUG 06-24 22:01:21 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:01:21 [stats.py:37] Avg generate tokens throughput: 26.467 tokens/s -DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 token used ratio: 0.4697413372376769 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:22 [manager.py:248] dp_i 0 token used ratio: 0.4697413372376769 contain prompt cache tree unrefed token -INFO 06-24 22:01:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 token used ratio: 0.4727916056612982 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:24 [manager.py:248] dp_i 0 token used ratio: 0.4727916056612982 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 token used ratio: 0.47584187408491946 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:26 [manager.py:248] dp_i 0 token used ratio: 0.47584187408491946 contain prompt cache tree unrefed token -INFO 06-24 22:01:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 token used ratio: 0.47889214250854073 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:28 [manager.py:248] dp_i 0 token used ratio: 0.47889214250854073 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 token used ratio: 0.48194241093216206 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:30 [manager.py:248] dp_i 0 token used ratio: 0.48194241093216206 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 26.121 tokens/s -DEBUG 06-24 22:01:31 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:01:31 [stats.py:37] Avg generate tokens throughput: 26.121 tokens/s -INFO 06-24 22:01:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 token used ratio: 0.48499267935578333 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:32 [manager.py:248] dp_i 0 token used ratio: 0.48499267935578333 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 token used ratio: 0.4880429477794046 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:34 [manager.py:248] dp_i 0 token used ratio: 0.4880429477794046 contain prompt cache tree unrefed token -INFO 06-24 22:01:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 token used ratio: 0.4910932162030259 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:36 [manager.py:248] dp_i 0 token used ratio: 0.4910932162030259 contain prompt cache tree unrefed token -INFO 06-24 22:01:36 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 token used ratio: 0.49414348462664714 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:37 [manager.py:248] dp_i 0 token used ratio: 0.49414348462664714 contain prompt cache tree unrefed token -INFO 06-24 22:01:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -ERROR 06-24 22:01:39 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 22:01:39 [pd_loop.py:121] no close frame received or sent -ERROR 06-24 22:01:39 [pd_loop.py:121] ConnectionResetError: [Errno 104] Connection reset by peer -ERROR 06-24 22:01:39 [pd_loop.py:121] -ERROR 06-24 22:01:39 [pd_loop.py:121] The above exception was the direct cause of the following exception: -ERROR 06-24 22:01:39 [pd_loop.py:121] -ERROR 06-24 22:01:39 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task -ERROR 06-24 22:01:39 [pd_loop.py:121] recv_bytes = await websocket.recv() -ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv -ERROR 06-24 22:01:39 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc -ERROR 06-24 22:01:39 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent -DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 token used ratio: 0.4971937530502684 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:39 [manager.py:248] dp_i 0 token used ratio: 0.4971937530502684 contain prompt cache tree unrefed token -DEBUG 06-24 22:01:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 25.706 tokens/s -DEBUG 06-24 22:01:41 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 22:01:41 [stats.py:37] Avg generate tokens throughput: 25.706 tokens/s -INFO 06-24 22:01:41 [decode_kv_move_manager.py:206] connect id 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc disconnect -ERROR 06-24 22:01:41 [decode_trans_obj.py:180] put_to_radix_loop thread quit, info: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 -ERROR 06-24 22:01:41 [decode_trans_obj.py:136] kv_move_loop thread quit -ERROR 06-24 22:01:41 [decode_trans_obj.py:226] trans obj del start, info: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 -ERROR 06-24 22:01:41 [decode_trans_obj.py:249] trans obj deled, info: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 -INFO 06-24 22:01:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15623 -DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 token used ratio: 0.5002440214738897 not contain prompt cache tree unrefed token -DEBUG 06-24 22:01:41 [manager.py:248] dp_i 0 token used ratio: 0.5002440214738897 contain prompt cache tree unrefed token -INFO 06-24 22:01:42 [decode_trans_process.py:123] destory PDTransLeaveInfo(decode_id=148730891575017957868136796871489876076, prefill_id=287595743282619216970276961428881885738, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') nccl communicator. -INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -ERROR 06-24 22:01:43 [manager.py:487] Router Caught exception: {'message': 'Task exception was never retrieved', 'exception': KeyboardInterrupt(), 'future': exception=KeyboardInterrupt()>} -ERROR 06-24 22:01:43 [manager.py:487] NoneType: None -ERROR 06-24 22:01:43 [decode_kv_move_manager.py:301] -Traceback (most recent call last): - File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/decode_node_impl/decode_kv_move_manager.py", line 299, in timer_loop - time.sleep(3.5) -KeyboardInterrupt - -INFO 06-24 22:01:43 [start_utils.py:106] Killing child process 1415871 -INFO 06-24 22:01:43 [start_utils.py:106] Killing child process 1416220 -INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 -INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413171 -INFO 06-24 22:01:43 [start_utils.py:51] Killing child process 1414844 -INFO 06-24 22:01:43 [start_utils.py:51] Killing child process 1415305 -INFO 06-24 22:01:43 [start_utils.py:51] Killing child process 1415308 -INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413335 -INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 -INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413335 -INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 -INFO 06-24 22:01:43 [start_utils.py:53] Killing parent process 1413335 -INFO 06-24 22:01:43 [start_utils.py:69] All processes terminated gracefully. -INFO 06-24 22:01:43 [api_start.py:30] All processes have been forcefully terminated. -INFO 06-24 22:01:43 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 22:01:43 [start_utils.py:108] Killing parent process 1415861 -INFO 06-24 22:01:43 [start_utils.py:69] All processes terminated gracefully. -INFO 06-24 22:01:43 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_d_4096.log b/pd_d_4096.log deleted file mode 100644 index ea644a687..000000000 --- a/pd_d_4096.log +++ /dev/null @@ -1,196230 +0,0 @@ -INFO 06-24 19:54:01 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:02 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:03 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:04 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:04 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:04 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:05 [api_start.py:79] zmq mode head: ipc:///tmp/_12322_0_ -INFO 06-24 19:54:05 [api_start.py:81] use tgi api: False -INFO 06-24 19:54:05 [api_start.py:192] alloced ports: [10135, 10143, 10207, 10165, 10059, 10114, 10051, 10001, 10236] -INFO 06-24 19:54:05 [api_start.py:233] all start args:Namespace(run_mode='decode', host='127.0.1.1', port=8118, httpserver_workers=1, zmq_mode='ipc:///tmp/_12322_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=12322, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=0, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=16, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=2048, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10135, detokenization_port=10143, detokenization_pub_port=10207, visual_port=10165, audio_port=10059, cache_port=10114, metric_port=10051, pd_node_infer_rpyc_ports=[10236], pd_node_id=147275795944234129756100418482494441380, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) -INFO 06-24 19:54:06 [start_utils.py:37] init func start_metric_manager : init ok -INFO 06-24 19:54:08 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:09 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:09 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:10 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:11 [__init__.py:239] Automatically detected platform cuda. -INFO 06-24 19:54:11 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:12 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:12 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:12 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:13 [shm_req_manager.py:59] create lock shm 12322_0_req_shm_total -INFO 06-24 19:54:13 [atomic_array_lock.py:29] create lock shm 12322_0_array_reqs_lock -INFO 06-24 19:54:13 [atomic_lock.py:26] create lock shm 12322_0_shm_reqs_manager_lock -INFO 06-24 19:54:13 [shared_arr.py:17] create shm 12322_0_mem_manger_can_use_token_num_0 -INFO 06-24 19:54:13 [shared_arr.py:17] create shm 12322_0_shared_token_load -INFO 06-24 19:54:13 [shared_arr.py:17] create shm 12322_0_shared_token_load_ext_infos -INFO 06-24 19:54:13 [model_rpc.py:70] Initialized RPC server for rank 0. -INFO 06-24 19:54:13 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total -INFO 06-24 19:54:13 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock -INFO 06-24 19:54:13 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock -INFO 06-24 19:54:13 [model_rpc.py:184] use ContinuesBatchBackendForDecodeNode -WARNING 06-24 19:54:13 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:13 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:13 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:14 [manager.py:41] pub_to_httpserver sendhwm 1000 -INFO 06-24 19:54:14 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total -INFO 06-24 19:54:14 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock -INFO 06-24 19:54:14 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock -INFO 06-24 19:54:15 [shared_arr.py:20] link shm 12322_0_shared_token_load -INFO 06-24 19:54:15 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos -INFO 06-24 19:54:15 [shared_arr.py:17] create shm 12322_0_dp_rank_0_lock_tp_infos -INFO 06-24 19:54:15 [basemodel.py:134] Initial quantization. The default quantization method is none -INFO 06-24 19:54:15 [mem_utils.py:11] mode setting params: [] -INFO 06-24 19:54:15 [mem_utils.py:25] Model kv cache using mode normal -INFO 06-24 19:54:15 [shared_arr.py:20] link shm 12322_0_mem_manger_can_use_token_num_0 -INFO 06-24 19:54:26 [cuda_graph.py:45] cuda graph batch_sizes: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] -INFO 06-24 19:54:26 [cuda_graph.py:187] Begin capture cudagraph, use the --disable_cudagraph to disable it. -INFO 06-24 19:54:26 [cache_tensor_manager.py:75] pid 1212190 cuda graph alloc graph out mem (16, 152064) torch.float32 2433024 2433024 -INFO 06-24 19:54:26 [cache_tensor_manager.py:77] cuda graph managed_total_tensor_bytes: 9732096 -INFO 06-24 19:54:31 [cuda_graph.py:232] Capture cudagraph success, batch_size <=16 and max_len_in_batch <= 2048 will infer with cudagraph. -INFO 06-24 19:54:31 [basemodel.py:652] begin check max_len infer -INFO 06-24 19:54:32 [basemodel.py:680] check max_len 8448 infer ok -INFO 06-24 19:54:32 [shared_arr.py:17] create shm 12322_0_refed_tokens_num_0 -INFO 06-24 19:54:32 [shared_arr.py:17] create shm 12322_0_tree_total_tokens_num_0 -INFO 06-24 19:54:32 [base_backend.py:135] loaded model class -INFO 06-24 19:54:32 [decode_impl.py:36] lock_nccl_group ranks 0 -INFO 06-24 19:54:32 [shared_arr.py:20] link shm 12322_0_refed_tokens_num_0 -INFO 06-24 19:54:32 [shared_arr.py:20] link shm 12322_0_tree_total_tokens_num_0 -INFO 06-24 19:54:32 [manager.py:196] use req queue QueueForPDDecode -INFO 06-24 19:54:34 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:35 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:36 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:38 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:38 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:38 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:38 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 19:54:38 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 19:54:38 [decode_kv_move_manager.py:59] rpyc connect to port: 10236 ok -INFO 06-24 19:54:38 [up_status.py:122] up_kv_status_process start -INFO 06-24 19:54:38 [decode_trans_process.py:145] decode trans kv process for device: 0 start! -INFO 06-24 19:54:40 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:40 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:41 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:41 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:43 [__init__.py:239] Automatically detected platform cuda. -INFO 06-24 19:54:43 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:44 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:44 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:44 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:45 [decode_infer_rpyc.py:171] put mem manager to info_queues ok -INFO 06-24 19:54:45 [decode_kv_move_manager.py:388] decode kv move manager process started -INFO 06-24 19:54:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:54:45 [start_utils.py:37] init func start_router_process : init ok -INFO 06-24 19:54:45 [start_utils.py:37] init func start_detokenization_process : init ok -INFO 06-24 19:54:45 [api_start.py:57] start process pid 1211578 -INFO 06-24 19:54:45 [api_start.py:58] http server pid 1214339 -WARNING 06-24 19:54:45 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:45 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:45 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:46 [up_status.py:112] up kv manager start ok -INFO 06-24 19:54:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:54:48 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:54:49 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:50 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:52 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:52 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:52 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:54:52 [api_http.py:326] server start up -INFO 06-24 19:54:52 [atomic_array_lock.py:32] link lock shm 12322_0_lightllm_resource_lock -INFO 06-24 19:54:52 [shm_req_manager.py:62] link lock shm 12322_0_req_shm_total -INFO 06-24 19:54:52 [atomic_array_lock.py:32] link lock shm 12322_0_array_reqs_lock -INFO 06-24 19:54:52 [atomic_lock.py:29] link lock shm 12322_0_shm_reqs_manager_lock -INFO 06-24 19:54:53 [atomic_lock.py:29] link lock shm 12322_0_req_id_gen_lock -INFO 06-24 19:54:53 [shared_arr.py:20] link shm 12322_0_latest_success_infer_time_mark -INFO 06-24 19:54:53 [shared_arr.py:20] link shm 12322_0_shared_token_load -INFO 06-24 19:54:53 [shared_arr.py:20] link shm 12322_0_shared_token_load_ext_infos -INFO 06-24 19:54:53 [api_http.py:330] server start up ok, loop use is -INFO 06-24 19:54:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:54:53 [pd_loop.py:92] Sent registration JSON: {'node_id': 147275795944234129756100418482494441380, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10135, 'detokenization_port': 10143, 'detokenization_pub_port': 10207, 'visual_port': 10165, 'audio_port': 10059, 'cache_port': 10114, 'metric_port': 10051, 'pd_node_infer_rpyc_ports': [10236], 'pd_node_id': 147275795944234129756100418482494441380, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 19:54:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:54:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:55:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:55:23 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:55:23 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:55:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:55:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 19:55:51 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:55:51 [manager.py:283] -DEBUG 06-24 19:55:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:55:51 [manager.py:284] -INFO 06-24 19:55:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:55:53 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:55:53 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:55:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:55:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:56:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:56:23 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:56:23 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:56:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:56:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 19:56:51 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:56:51 [manager.py:283] -DEBUG 06-24 19:56:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:56:51 [manager.py:284] -INFO 06-24 19:56:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:56:53 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:56:53 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:56:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:56:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:57:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:57:23 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:57:23 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:57:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:57:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 19:57:52 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:57:52 [manager.py:283] -DEBUG 06-24 19:57:52 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:57:52 [manager.py:284] -INFO 06-24 19:57:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:57:53 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:57:53 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:57:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:57:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:58:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:58:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:58:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:58:53 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:58:53 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 19:58:53 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:58:53 [manager.py:283] -DEBUG 06-24 19:58:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:58:53 [manager.py:284] -INFO 06-24 19:58:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:58:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:59:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:59:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:40 [rpyc_fix_utils.py:107] set nodelay mode -INFO 06-24 19:59:40 [rpyc_fix_utils.py:113] change socket buffer from 2626560 131072 change to 4194304 -INFO 06-24 19:59:40 [decode_kv_move_manager.py:225] build trans infos 163479035537597727162519172725806046247 127.0.1.1 20000 f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:40 [decode_trans_process.py:57] connect start PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=0, prefill_id=163479035537597727162519172725806046247, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') -INFO 06-24 19:59:40 [decode_trans_process.py:67] connect src_id 163479035537597727162519172725806046247 dest_id f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:40 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 -INFO 06-24 19:59:40 [pynccl.py:180] LightLLM is using nccl==2.21.5 -INFO 06-24 19:59:41 [decode_trans_process.py:85] PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=0, prefill_id=163479035537597727162519172725806046247, prefill_device_id=-1, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') kv trans connected -INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 8 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 16 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 16 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 16 request_data_transfer fail, server is busy -INFO 06-24 19:59:41 [decode_trans_obj.py:118] kv_move_loop get task id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:41 [decode_trans_process.py:34] trans start: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 24 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 24 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 24 request_data_transfer fail, server is busy -INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 32 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 32 request_data_transfer fail, server is busy -INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 40 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 40 request_data_transfer fail, server is busy -INFO 06-24 19:59:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:41 [decode_infer_rpyc.py:75] req_id: id: 48 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:41 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:41 [decode_kv_move_manager.py:273] req id 48 request_data_transfer fail, server is busy -INFO 06-24 19:59:42 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:42 [decode_infer_rpyc.py:75] req_id: id: 56 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:42 [decode_kv_move_manager.py:273] req id 56 request_data_transfer fail, server is busy -INFO 06-24 19:59:42 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:42 [decode_infer_rpyc.py:75] req_id: id: 64 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:42 [decode_kv_move_manager.py:273] req id 64 request_data_transfer fail, server is busy -INFO 06-24 19:59:42 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:42 [decode_infer_rpyc.py:75] req_id: id: 72 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix refed token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] radix hold token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager can alloc token num 15337 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 15615 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] estimated peak token num 0 -DEBUG 06-24 19:59:42 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:42 [decode_kv_move_manager.py:273] req id 72 request_data_transfer fail, server is busy -INFO 06-24 19:59:43 [decode_trans_process.py:43] trans finished: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1055 -INFO 06-24 19:59:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:43 [decode_trans_process.py:45] trans cost time: 1.5428881645202637, id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_obj.py:95] _transfer_kv ok id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 8 cost_time 0.027128219604492188 s -INFO 06-24 19:59:43 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 8 finished -INFO 06-24 19:59:43 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=8, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:8 -INFO 06-24 19:59:43 [manager.py:224] router recive req id 8 cost time 0.09608936309814453 s -DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=78859837134492230658999430496479756877, time:1750766383.4995642s req_ids:[8] -DEBUG 06-24 19:59:43 [manager.py:391] -DEBUG 06-24 19:59:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 3.198 tokens/s -DEBUG 06-24 19:59:43 [stats.py:37] Avg prompt tokens throughput: 3.198 tokens/s -DEBUG 06-24 19:59:43 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 80 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 80 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 8 cost time 0.15932035446166992 s -INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 80 request_data_transfer fail, server is busy -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 88 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 88 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 96 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 104 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 88 request_data_transfer fail, server is busy -INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 96 request_data_transfer fail, server is busy -INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 104 request_data_transfer fail, server is busy -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 120 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 112 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 15336 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 112 request_data_transfer fail, server is busy -INFO 06-24 19:59:43 [decode_trans_obj.py:118] kv_move_loop get task id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_process.py:34] trans start: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 128 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_trans_process.py:43] trans finished: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 -INFO 06-24 19:59:43 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:43 [decode_infer_rpyc.py:75] req_id: id: 136 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix refed token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] radix hold token num 1055 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager can alloc token num 13229 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 6395 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] estimated peak token num 5159 -DEBUG 06-24 19:59:43 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:43 [decode_kv_move_manager.py:273] req id 136 request_data_transfer fail, server is busy -INFO 06-24 19:59:43 [decode_trans_process.py:45] trans cost time: 0.018827438354492188, id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_obj.py:95] _transfer_kv ok id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_obj.py:118] kv_move_loop get task id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_process.py:34] trans start: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_process.py:43] trans finished: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 -INFO 06-24 19:59:43 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_process.py:45] trans cost time: 0.021219968795776367, id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_obj.py:95] _transfer_kv ok id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 120 cost_time 0.030753135681152344 s -INFO 06-24 19:59:43 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 120 finished -INFO 06-24 19:59:43 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=120, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:120 -INFO 06-24 19:59:43 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 128 cost_time 0.030910253524780273 s -INFO 06-24 19:59:43 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 128 finished -INFO 06-24 19:59:43 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=128, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:128 -INFO 06-24 19:59:43 [manager.py:224] router recive req id 120 cost time 0.10020303726196289 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 120 cost time 0.10201883316040039 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 128 cost time 0.12596750259399414 s -DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=197246024226914264121713941442776912679, time:1750766383.760377s req_ids:[120] -DEBUG 06-24 19:59:43 [manager.py:391] -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 128 cost time 0.1275322437286377 s -DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=83240924680699306059748328113417723632, time:1750766383.7850044s req_ids:[128] -DEBUG 06-24 19:59:43 [manager.py:391] -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 128 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:128 first_token_cost:223.16217422485352ms total_cost_time:223.18410873413086ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1050 prompt_cache_len:1049 prompt_cache_ratio:0.9990476190476191 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [batch.py:51] router release req id 128 -DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 -DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 token used ratio: 0.12981942410932162 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:43 [manager.py:248] dp_i 0 token used ratio: 0.19387506100536847 contain prompt cache tree unrefed token -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 144 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 144 request_data_transfer fail, server is busy -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 152 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 160 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 152 request_data_transfer fail, server is busy -INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 160 request_data_transfer fail, server is busy -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 176 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 168 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 13148 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 168 request_data_transfer fail, server is busy -INFO 06-24 19:59:44 [decode_trans_obj.py:118] kv_move_loop get task id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:44 [decode_trans_process.py:34] trans start: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 184 in_len:1050 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 12090 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 192 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 12090 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 184 request_data_transfer fail, server is busy -INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 192 request_data_transfer fail, server is busy -INFO 06-24 19:59:44 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:44 [decode_infer_rpyc.py:75] req_id: id: 200 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] radix hold token num 3163 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager can alloc token num 12090 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:44 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:44 [decode_kv_move_manager.py:273] req id 200 request_data_transfer fail, server is busy -INFO 06-24 19:59:44 [decode_trans_process.py:43] trans finished: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 -INFO 06-24 19:59:44 [decode_trans_process.py:45] trans cost time: 0.0549924373626709, id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:44 [decode_trans_obj.py:95] _transfer_kv ok id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:44 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:44 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 176 cost_time 0.029801130294799805 s -INFO 06-24 19:59:44 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 176 finished -INFO 06-24 19:59:44 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=176, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:44 lightllm_req_id:176 -WARNING 06-24 19:59:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again -INFO 06-24 19:59:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 -WARNING 06-24 19:59:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again -INFO 06-24 19:59:44 [shm_array.py:30] create shm 12322_0_shm_prompts_2 -INFO 06-24 19:59:44 [manager.py:224] router recive req id 176 cost time 0.09153246879577637 s -INFO 06-24 19:59:44 [manager.py:68] detokenization recv req id 176 cost time 0.09333586692810059 s -DEBUG 06-24 19:59:44 [manager.py:391] Prefill Batch: batch_id=144644668040586024174396603513996757795, time:1750766384.942718s req_ids:[176] -DEBUG 06-24 19:59:44 [manager.py:391] -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 176 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:44 lightllm_req_id:176 first_token_cost:193.2220458984375ms total_cost_time:193.24350357055664ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1057 prompt_cache_len:1056 prompt_cache_ratio:0.9990539262062441 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [batch.py:51] router release req id 176 -DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 -DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 token used ratio: 0.13579795021961932 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:45 [manager.py:248] dp_i 0 token used ratio: 0.26433626159102 contain prompt cache tree unrefed token -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 208 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 216 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 208 request_data_transfer fail, server is busy -INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 216 request_data_transfer fail, server is busy -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 232 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 224 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 232 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 11999 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 224 request_data_transfer fail, server is busy -INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 232 request_data_transfer fail, server is busy -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 240 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_infer_rpyc.py:75] req_id: id: 248 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] radix hold token num 4220 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager can alloc token num 10934 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1064 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:45 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:45 [decode_kv_move_manager.py:273] req id 248 request_data_transfer fail, server is busy -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 256 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 264 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:45 [decode_trans_obj.py:118] kv_move_loop get task id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_process.py:34] trans start: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_process.py:43] trans finished: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1063 -INFO 06-24 19:59:45 [decode_trans_process.py:45] trans cost time: 0.024710416793823242, id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_obj.py:95] _transfer_kv ok id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_obj.py:118] kv_move_loop get task id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_obj.py:118] kv_move_loop get task id: 264 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_process.py:34] trans start: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [decode_trans_process.py:43] trans finished: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 2108 -INFO 06-24 19:59:45 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 240 cost_time 0.02665853500366211 s -INFO 06-24 19:59:45 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 240 finished -INFO 06-24 19:59:45 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=240, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:240 -WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again -INFO 06-24 19:59:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 -WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again -INFO 06-24 19:59:45 [shm_array.py:30] create shm 12322_0_shm_prompts_2 -INFO 06-24 19:59:46 [decode_trans_process.py:45] trans cost time: 0.14300155639648438, id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_obj.py:95] _transfer_kv ok id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 264 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 256 cost_time 0.051763296127319336 s -INFO 06-24 19:59:46 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 256 finished -INFO 06-24 19:59:46 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 264 cost_time 0.05242276191711426 s -INFO 06-24 19:59:46 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 264 finished -INFO 06-24 19:59:46 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=256, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:46 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=264, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:256 -INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:264 -INFO 06-24 19:59:46 [manager.py:224] router recive req id 240 cost time 0.18394184112548828 s -INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 240 cost time 0.18611979484558105 s -INFO 06-24 19:59:46 [manager.py:224] router recive req id 256 cost time 0.06905341148376465 s -DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=67281202978481964711024831614577858867, time:1750766386.1856062s req_ids:[240] -DEBUG 06-24 19:59:46 [manager.py:391] -INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 256 cost time 0.07062745094299316 s -INFO 06-24 19:59:46 [manager.py:224] router recive req id 264 cost time 0.08840799331665039 s -DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=181293788626017492202555092884360541225, time:1750766386.2110493s req_ids:[256] -DEBUG 06-24 19:59:46 [manager.py:391] -INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 264 cost time 0.09010958671569824 s -DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=217589871489938957811806292531150173471, time:1750766386.237144s req_ids:[264] -DEBUG 06-24 19:59:46 [manager.py:391] -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 240 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:240 first_token_cost:288.67244720458984ms total_cost_time:288.6958122253418ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1064 prompt_cache_len:1063 prompt_cache_ratio:0.9990601503759399 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [batch.py:51] router release req id 240 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 256 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:256 first_token_cost:148.67806434631348ms total_cost_time:148.6983299255371ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1055 prompt_cache_len:1054 prompt_cache_ratio:0.9990521327014218 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:59:46 [batch.py:51] router release req id 256 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 264 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:264 first_token_cost:168.3180332183838ms total_cost_time:168.33782196044922ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:1055 prompt_cache_len:1054 prompt_cache_ratio:0.9990521327014218 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [batch.py:51] router release req id 264 -DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 -DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 token used ratio: 0.14153245485602733 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:46 [manager.py:248] dp_i 0 token used ratio: 0.4637018057589068 contain prompt cache tree unrefed token -INFO 06-24 19:59:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 272 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 272 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 8749 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 272 request_data_transfer fail, server is busy -INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 280 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 288 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 288 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 7691 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 2436 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 288 request_data_transfer fail, server is busy -INFO 06-24 19:59:46 [decode_trans_obj.py:118] kv_move_loop get task id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_process.py:34] trans start: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 296 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 304 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 296 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 7689 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 2436 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:46 [decode_trans_process.py:43] trans finished: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 -INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 296 request_data_transfer fail, server is busy -INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:46 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 320 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:46 [decode_infer_rpyc.py:75] req_id: id: 312 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix refed token num 2113 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] radix hold token num 7394 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager can alloc token num 6638 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 3488 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:46 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:46 [decode_kv_move_manager.py:273] req id 312 request_data_transfer fail, server is busy -INFO 06-24 19:59:46 [decode_trans_process.py:45] trans cost time: 0.04500317573547363, id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_obj.py:95] _transfer_kv ok id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [task_queue.py:39] queue ready_to_move_queue left size: 1 -INFO 06-24 19:59:46 [decode_trans_obj.py:118] kv_move_loop get task id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_process.py:34] trans start: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_process.py:43] trans finished: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1051 -INFO 06-24 19:59:47 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 280 cost_time 0.016895771026611328 s -INFO 06-24 19:59:47 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 280 finished -INFO 06-24 19:59:47 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=280, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:280 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_4 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_4 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_4 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_prompts_4 -INFO 06-24 19:59:47 [decode_trans_process.py:45] trans cost time: 0.07761478424072266, id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_obj.py:95] _transfer_kv ok id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_obj.py:118] kv_move_loop get task id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_process.py:34] trans start: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_process.py:43] trans finished: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1057 -INFO 06-24 19:59:47 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 304 cost_time 0.02335834503173828 s -INFO 06-24 19:59:47 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 304 finished -INFO 06-24 19:59:47 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=304, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:304 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -INFO 06-24 19:59:47 [decode_trans_process.py:45] trans cost time: 0.04166221618652344, id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_obj.py:95] _transfer_kv ok id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 320 cost_time 0.015163183212280273 s -INFO 06-24 19:59:47 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 320 finished -INFO 06-24 19:59:47 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=320, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:320 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 12322_0_shm_prompts_2 -INFO 06-24 19:59:47 [manager.py:224] router recive req id 280 cost time 0.12640666961669922 s -INFO 06-24 19:59:47 [manager.py:68] detokenization recv req id 280 cost time 0.12806081771850586 s -INFO 06-24 19:59:47 [manager.py:224] router recive req id 304 cost time 0.09123945236206055 s -DEBUG 06-24 19:59:47 [manager.py:391] Prefill Batch: batch_id=277777573062047994740171680530455957075, time:1750766387.1931908s req_ids:[280] -DEBUG 06-24 19:59:47 [manager.py:391] -INFO 06-24 19:59:47 [manager.py:68] detokenization recv req id 304 cost time 0.09308290481567383 s -INFO 06-24 19:59:47 [manager.py:224] router recive req id 320 cost time 0.08214735984802246 s -DEBUG 06-24 19:59:47 [manager.py:391] Prefill Batch: batch_id=319286642773941052764187011823303848839, time:1750766387.2187362s req_ids:[304] -DEBUG 06-24 19:59:47 [manager.py:391] -INFO 06-24 19:59:47 [manager.py:68] detokenization recv req id 320 cost time 0.08399581909179688 s -DEBUG 06-24 19:59:47 [manager.py:391] Prefill Batch: batch_id=291105873674365910161113312262977364089, time:1750766387.2456753s req_ids:[320] -DEBUG 06-24 19:59:47 [manager.py:391] -INFO 06-24 19:59:47 [manager.py:162] detoken release req id 304 -INFO 06-24 19:59:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:304 first_token_cost:171.4761257171631ms total_cost_time:171.49710655212402ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1052 prompt_cache_len:1051 prompt_cache_ratio:0.9990494296577946 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:47 [batch.py:51] router release req id 304 -INFO 06-24 19:59:47 [manager.py:162] detoken release req id 320 -INFO 06-24 19:59:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:320 first_token_cost:161.9546413421631ms total_cost_time:161.96465492248535ms,out_token_counter:1 mean_per_token_cost_time: 0.010013580322265625ms prompt_token_num:1058 prompt_cache_len:1057 prompt_cache_ratio:0.999054820415879 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:47 [batch.py:51] router release req id 320 -DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 -DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 token used ratio: 0.21296974133723767 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:47 [manager.py:248] dp_i 0 token used ratio: 0.6638604197169351 contain prompt cache tree unrefed token -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 328 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5459 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 328 request_data_transfer fail, server is busy -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 336 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5459 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 344 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5459 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 336 request_data_transfer fail, server is busy -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 344 request_data_transfer fail, server is busy -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 352 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 360 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 352 request_data_transfer fail, server is busy -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 360 request_data_transfer fail, server is busy -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 368 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 376 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5456 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 368 request_data_transfer fail, server is busy -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 376 request_data_transfer fail, server is busy -INFO 06-24 19:59:48 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:48 [decode_infer_rpyc.py:75] req_id: id: 384 in_len:1063 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager can alloc token num 5453 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:48 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:48 [decode_kv_move_manager.py:273] req id 384 request_data_transfer fail, server is busy -DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 10314 -DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 token used ratio: 0.22212054660810152 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:49 [manager.py:248] dp_i 0 token used ratio: 0.673011224987799 contain prompt cache tree unrefed token -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 400 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 392 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 5327 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 392 request_data_transfer fail, server is busy -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 408 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 416 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_trans_obj.py:118] kv_move_loop get task id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 408 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 416 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:49 [decode_trans_process.py:34] trans start: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 408 request_data_transfer fail, server is busy -INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 416 request_data_transfer fail, server is busy -INFO 06-24 19:59:49 [decode_trans_process.py:43] trans finished: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 424 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 432 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 424 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 432 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 424 request_data_transfer fail, server is busy -INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 432 request_data_transfer fail, server is busy -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 440 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 440 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:49 [decode_infer_rpyc.py:75] req_id: id: 448 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix refed token num 3171 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] radix hold token num 10562 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager can alloc token num 4275 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 5679 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] estimated peak token num 10314 -DEBUG 06-24 19:59:49 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 440 request_data_transfer fail, server is busy -INFO 06-24 19:59:49 [decode_kv_move_manager.py:273] req id 448 request_data_transfer fail, server is busy -INFO 06-24 19:59:49 [decode_trans_process.py:45] trans cost time: 0.035636186599731445, id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:49 [decode_trans_obj.py:95] _transfer_kv ok id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:49 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:49 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 400 cost_time 0.020232439041137695 s -INFO 06-24 19:59:49 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 400 finished -INFO 06-24 19:59:49 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=400, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:49 lightllm_req_id:400 -WARNING 06-24 19:59:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_2 and create again -INFO 06-24 19:59:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_2 -WARNING 06-24 19:59:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_2 and create again -INFO 06-24 19:59:49 [shm_array.py:30] create shm 12322_0_shm_prompts_2 -INFO 06-24 19:59:49 [manager.py:224] router recive req id 400 cost time 0.1079549789428711 s -INFO 06-24 19:59:49 [manager.py:68] detokenization recv req id 400 cost time 0.10963606834411621 s -DEBUG 06-24 19:59:49 [manager.py:391] Prefill Batch: batch_id=240562744484786649800446342328616789333, time:1750766389.5182493s req_ids:[400] -DEBUG 06-24 19:59:49 [manager.py:391] -DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 token used ratio: 0.29685212298682284 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:50 [manager.py:248] dp_i 0 token used ratio: 0.7477428013665203 contain prompt cache tree unrefed token -INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 456 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 464 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3069 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 464 request_data_transfer fail, server is busy -INFO 06-24 19:59:50 [decode_trans_obj.py:118] kv_move_loop get task id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:50 [decode_trans_process.py:34] trans start: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 472 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 472 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 480 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:50 [decode_trans_process.py:43] trans finished: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 -INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 472 request_data_transfer fail, server is busy -INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 480 request_data_transfer fail, server is busy -INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 488 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:50 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 488 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 496 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:50 [decode_infer_rpyc.py:75] req_id: id: 504 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] radix hold token num 11611 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager can alloc token num 3065 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:50 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 488 request_data_transfer fail, server is busy -INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 496 request_data_transfer fail, server is busy -INFO 06-24 19:59:50 [decode_kv_move_manager.py:273] req id 504 request_data_transfer fail, server is busy -INFO 06-24 19:59:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:50 [decode_trans_process.py:45] trans cost time: 0.03783893585205078, id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:50 [decode_trans_obj.py:95] _transfer_kv ok id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:50 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:50 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 456 cost_time 0.01764392852783203 s -INFO 06-24 19:59:50 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 456 finished -INFO 06-24 19:59:50 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=456, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:456 -WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 19:59:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 19:59:50 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -INFO 06-24 19:59:50 [manager.py:224] router recive req id 456 cost time 0.07205057144165039 s -INFO 06-24 19:59:50 [manager.py:68] detokenization recv req id 456 cost time 0.07368993759155273 s -DEBUG 06-24 19:59:50 [manager.py:391] Prefill Batch: batch_id=336204780369009327632129103991276574369, time:1750766390.4856179s req_ids:[456] -DEBUG 06-24 19:59:50 [manager.py:391] -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 456 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:456 first_token_cost:176.88751220703125ms total_cost_time:176.9094467163086ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1063 prompt_cache_len:1062 prompt_cache_ratio:0.9990592662276576 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [batch.py:51] router release req id 456 -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 512 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 512 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 2896 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 512 request_data_transfer fail, server is busy -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 520 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 528 in_len:1045 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 520 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 2892 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 520 request_data_transfer fail, server is busy -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 536 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 544 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 536 request_data_transfer fail, server is busy -INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 544 request_data_transfer fail, server is busy -INFO 06-24 19:59:51 [decode_trans_obj.py:118] kv_move_loop get task id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_trans_process.py:34] trans start: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 552 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 552 request_data_transfer fail, server is busy -INFO 06-24 19:59:51 [decode_trans_process.py:43] trans finished: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1045 -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 560 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:51 [decode_infer_rpyc.py:75] req_id: id: 568 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] radix hold token num 12674 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager can alloc token num 1847 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1046 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:51 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 560 request_data_transfer fail, server is busy -INFO 06-24 19:59:51 [decode_kv_move_manager.py:273] req id 568 request_data_transfer fail, server is busy -INFO 06-24 19:59:51 [decode_trans_process.py:45] trans cost time: 0.023418903350830078, id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:51 [decode_trans_obj.py:95] _transfer_kv ok id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:51 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:51 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 528 cost_time 0.014069318771362305 s -INFO 06-24 19:59:51 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 528 finished -INFO 06-24 19:59:51 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=528, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:528 -WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 19:59:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 19:59:51 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 frozen token num: 1046 -DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 token used ratio: 0.372559785261103 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:51 [manager.py:248] dp_i 0 token used ratio: 0.8882991703269888 contain prompt cache tree unrefed token -INFO 06-24 19:59:51 [manager.py:224] router recive req id 528 cost time 0.10735130310058594 s -INFO 06-24 19:59:51 [manager.py:68] detokenization recv req id 528 cost time 0.10909676551818848 s -DEBUG 06-24 19:59:51 [manager.py:391] Prefill Batch: batch_id=332065354239372522643271644973622948359, time:1750766391.653858s req_ids:[528] -DEBUG 06-24 19:59:51 [manager.py:391] -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 528 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:528 first_token_cost:212.17942237854004ms total_cost_time:212.20135688781738ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1046 prompt_cache_len:1045 prompt_cache_ratio:0.9990439770554493 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [batch.py:51] router release req id 528 -INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 576 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 576 request_data_transfer fail, server is busy -INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 584 in_len:1068 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 592 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 584 request_data_transfer fail, server is busy -INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 592 request_data_transfer fail, server is busy -INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 600 in_len:1071 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1686 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 600 request_data_transfer fail, server is busy -INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 608 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1682 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 616 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1682 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 608 request_data_transfer fail, server is busy -INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 616 request_data_transfer fail, server is busy -INFO 06-24 19:59:52 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:52 [decode_infer_rpyc.py:75] req_id: id: 624 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager can alloc token num 1682 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:52 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:52 [decode_kv_move_manager.py:273] req id 624 request_data_transfer fail, server is busy -DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 token used ratio: 0.32076622742801364 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:52 [manager.py:248] dp_i 0 token used ratio: 0.9003172279160566 contain prompt cache tree unrefed token -INFO 06-24 19:59:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:59:53 [statics_utils.py:24] mean first cost: 206.37761629544772 ms -INFO 06-24 19:59:53 [statics_utils.py:24] mean per token cost: 0.020186106363932293 ms -INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 632 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 640 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 632 request_data_transfer fail, server is busy -INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 640 request_data_transfer fail, server is busy -INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 648 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 656 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 1530 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 648 request_data_transfer fail, server is busy -INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 656 request_data_transfer fail, server is busy -INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 664 in_len:1062 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 672 in_len:1043 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -DEBUG 06-24 19:59:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 1384.057 tokens/s -DEBUG 06-24 19:59:53 [stats.py:37] Avg prompt tokens throughput: 1266.761 tokens/s -DEBUG 06-24 19:59:53 [stats.py:37] Avg generate tokens throughput: 117.296 tokens/s -INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 672 in_len:1043 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 464 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 672 request_data_transfer fail, server is busy -INFO 06-24 19:59:53 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:53 [decode_infer_rpyc.py:75] req_id: id: 680 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] radix hold token num 13720 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager can alloc token num 464 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1063 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:53 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:53 [decode_kv_move_manager.py:273] req id 680 request_data_transfer fail, server is busy -INFO 06-24 19:59:53 [decode_trans_obj.py:118] kv_move_loop get task id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:53 [decode_trans_process.py:34] trans start: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:53 [decode_trans_process.py:43] trans finished: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 -INFO 06-24 19:59:53 [decode_trans_process.py:45] trans cost time: 0.0374302864074707, id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:53 [decode_trans_obj.py:95] _transfer_kv ok id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:53 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:53 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 664 cost_time 0.018375873565673828 s -INFO 06-24 19:59:53 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 664 finished -INFO 06-24 19:59:53 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=664, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:664 -WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 19:59:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 19:59:53 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -INFO 06-24 19:59:53 [manager.py:224] router recive req id 664 cost time 0.07961368560791016 s -INFO 06-24 19:59:53 [manager.py:68] detokenization recv req id 664 cost time 0.08190369606018066 s -DEBUG 06-24 19:59:53 [manager.py:391] Prefill Batch: batch_id=56316389325537960521534077905631517256, time:1750766393.707428s req_ids:[664] -DEBUG 06-24 19:59:53 [manager.py:391] -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 664 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:664 first_token_cost:186.48576736450195ms total_cost_time:186.5079402923584ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1063 prompt_cache_len:1062 prompt_cache_ratio:0.9990592662276576 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [batch.py:51] router release req id 664 -INFO 06-24 19:59:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.3327232796486091 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.9771229868228404 contain prompt cache tree unrefed token -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 688 in_len:1069 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 291 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 696 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 291 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 688 request_data_transfer fail, server is busy -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 696 request_data_transfer fail, server is busy -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 704 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 712 in_len:1048 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 720 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 704 request_data_transfer fail, server is busy -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 712 request_data_transfer fail, server is busy -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 720 request_data_transfer fail, server is busy -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 728 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 736 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 728 request_data_transfer fail, server is busy -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 736 request_data_transfer fail, server is busy -INFO 06-24 19:59:54 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:54 [decode_infer_rpyc.py:75] req_id: id: 744 in_len:1047 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] radix hold token num 14783 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager can alloc token num 287 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:54 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:54 [decode_kv_move_manager.py:273] req id 744 request_data_transfer fail, server is busy -DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 token used ratio: 0.3449243533430942 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:55 [manager.py:248] dp_i 0 token used ratio: 0.9893240605173256 contain prompt cache tree unrefed token -INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 752 in_len:1053 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 760 in_len:1064 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 132 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 760 request_data_transfer fail, server is busy -INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 776 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:55 [decode_trans_obj.py:118] kv_move_loop get task id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:55 [decode_trans_process.py:34] trans start: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 768 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 776 in_len:1051 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 768 request_data_transfer fail, server is busy -INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 776 request_data_transfer fail, server is busy -INFO 06-24 19:59:55 [decode_trans_process.py:43] trans finished: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1053 -INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 784 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:55 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 784 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 792 in_len:1061 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:55 [decode_infer_rpyc.py:75] req_id: id: 800 in_len:1065 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] radix hold token num 13733 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager can alloc token num 128 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1054 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:55 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 784 request_data_transfer fail, server is busy -INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 792 request_data_transfer fail, server is busy -INFO 06-24 19:59:55 [decode_kv_move_manager.py:273] req id 800 request_data_transfer fail, server is busy -INFO 06-24 19:59:55 [decode_trans_process.py:45] trans cost time: 0.03745913505554199, id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:55 [decode_trans_obj.py:95] _transfer_kv ok id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:55 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:55 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 752 cost_time 0.020582199096679688 s -INFO 06-24 19:59:55 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 752 finished -INFO 06-24 19:59:55 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=752, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:752 -WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 19:59:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 19:59:55 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -INFO 06-24 19:59:55 [manager.py:224] router recive req id 752 cost time 0.08124971389770508 s -INFO 06-24 19:59:55 [manager.py:68] detokenization recv req id 752 cost time 0.08315467834472656 s -DEBUG 06-24 19:59:55 [manager.py:391] Prefill Batch: batch_id=108854083105344818407372731453821830999, time:1750766395.8599865s req_ids:[752] -DEBUG 06-24 19:59:55 [manager.py:391] -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 752 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:752 first_token_cost:188.4760856628418ms total_cost_time:188.50088119506836ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:1054 prompt_cache_len:1053 prompt_cache_ratio:0.9990512333965844 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [batch.py:51] router release req id 752 -DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 token used ratio: 0.3568814055636896 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:56 [manager.py:248] dp_i 0 token used ratio: 0.9370424597364568 contain prompt cache tree unrefed token -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 808 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 808 in_len:1046 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 13730 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1016 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 808 request_data_transfer fail, server is busy -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 816 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 816 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 13730 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1012 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 824 in_len:1055 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 13730 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1012 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 816 request_data_transfer fail, server is busy -INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 824 request_data_transfer fail, server is busy -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 832 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 840 in_len:1059 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1020 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 840 request_data_transfer fail, server is busy -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 848 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 856 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 848 in_len:1052 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1020 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 856 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1020 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 848 request_data_transfer fail, server is busy -INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 856 request_data_transfer fail, server is busy -INFO 06-24 19:59:56 [decode_trans_obj.py:118] kv_move_loop get task id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:56 [decode_trans_process.py:34] trans start: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:56 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 864 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:56 [decode_infer_rpyc.py:75] req_id: id: 864 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] radix hold token num 12666 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager can alloc token num 1016 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1057 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:56 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:56 [decode_kv_move_manager.py:273] req id 864 request_data_transfer fail, server is busy -INFO 06-24 19:59:56 [decode_trans_process.py:43] trans finished: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 -INFO 06-24 19:59:56 [decode_trans_process.py:45] trans cost time: 0.048836708068847656, id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:56 [decode_trans_obj.py:95] _transfer_kv ok id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:56 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:56 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 832 cost_time 0.02505183219909668 s -INFO 06-24 19:59:56 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 832 finished -INFO 06-24 19:59:56 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=832, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:56 lightllm_req_id:832 -WARNING 06-24 19:59:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 19:59:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 19:59:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 19:59:56 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -INFO 06-24 19:59:57 [manager.py:224] router recive req id 832 cost time 0.09881830215454102 s -INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 832 cost time 0.10045814514160156 s -DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=6221752173663725315526152860676858504, time:1750766397.052792s req_ids:[832] -DEBUG 06-24 19:59:57 [manager.py:391] -INFO 06-24 19:59:57 [manager.py:162] detoken release req id 832 -INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:56 lightllm_req_id:832 first_token_cost:204.74624633789062ms total_cost_time:204.76865768432617ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1057 prompt_cache_len:1056 prompt_cache_ratio:0.9990539262062441 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:57 [batch.py:51] router release req id 832 -INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 872 in_len:1049 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:57 [decode_trans_obj.py:118] kv_move_loop get task id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:57 [decode_trans_process.py:34] trans start: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 880 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 888 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:57 [decode_infer_rpyc.py:75] req_id: id: 880 in_len:1057 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix hold token num 12668 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager can alloc token num 953 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1050 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:57 [decode_infer_rpyc.py:75] req_id: id: 888 in_len:1060 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix hold token num 12668 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager can alloc token num 953 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1050 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:57 [decode_trans_process.py:43] trans finished: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 -INFO 06-24 19:59:57 [decode_kv_move_manager.py:273] req id 880 request_data_transfer fail, server is busy -INFO 06-24 19:59:57 [decode_kv_move_manager.py:273] req id 888 request_data_transfer fail, server is busy -INFO 06-24 19:59:57 [decode_trans_process.py:45] trans cost time: 0.03789687156677246, id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:57 [decode_trans_obj.py:95] _transfer_kv ok id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 19:59:57 [decode_trans_obj.py:152] put_to_radix_loop get put radix task id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:57 [decode_trans_obj.py:159] put_to_radix_loop put kv to radix cache ok, req_id: 872 cost_time 0.03162407875061035 s -INFO 06-24 19:59:57 [decode_trans_obj.py:169] put_to_radix_loop up kv status req_id: 872 finished -INFO 06-24 19:59:57 [up_status.py:92] up status: UpKVStatus(type='kv_move_status', group_request_id=872, dp_index=0, pd_master_node_id=0) -INFO 06-24 19:59:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:872 -WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 19:59:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 19:59:57 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -INFO 06-24 19:59:57 [manager.py:224] router recive req id 872 cost time 0.09926819801330566 s -INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 872 cost time 0.10094022750854492 s -INFO 06-24 19:59:57 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 896 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:57 [decode_infer_rpyc.py:75] req_id: id: 896 in_len:1054 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix refed token num 5269 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] radix hold token num 13717 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager can alloc token num 925 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 1050 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:57 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:57 [decode_kv_move_manager.py:273] req id 896 request_data_transfer fail, server is busy -DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=88475449123448403812857265878733895974, time:1750766397.5221214s req_ids:[872] -DEBUG 06-24 19:59:57 [manager.py:391] -INFO 06-24 19:59:57 [manager.py:162] detoken release req id 872 -INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:872 first_token_cost:205.86729049682617ms total_cost_time:205.8889865875244ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1050 prompt_cache_len:1049 prompt_cache_ratio:0.9990476190476191 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:57 [batch.py:51] router release req id 872 -DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 token used ratio: 0.3685944363103953 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:57 [manager.py:248] dp_i 0 token used ratio: 0.9480234260614934 contain prompt cache tree unrefed token -INFO 06-24 19:59:58 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 904 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 19:59:58 [decode_infer_rpyc.py:75] req_id: id: 904 in_len:1056 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] radix refed token num 4220 -DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] radix hold token num 13718 -DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] mem manager can alloc token num 840 -DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 19:59:58 [decode_infer_rpyc.py:80] -INFO 06-24 19:59:58 [decode_kv_move_manager.py:273] req id 904 request_data_transfer fail, server is busy -DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 token used ratio: 0.38079551000488043 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:59 [manager.py:248] dp_i 0 token used ratio: 0.9602244997559786 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 token used ratio: 0.3929965836993655 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:00 [manager.py:248] dp_i 0 token used ratio: 0.9724255734504637 contain prompt cache tree unrefed token -INFO 06-24 20:00:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 token used ratio: 0.40519765739385066 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:01 [manager.py:248] dp_i 0 token used ratio: 0.9846266471449487 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 token used ratio: 0.41739873108833575 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:03 [manager.py:248] dp_i 0 token used ratio: 0.9968277208394338 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 576.096 tokens/s -DEBUG 06-24 20:00:03 [stats.py:37] Avg prompt tokens throughput: 421.885 tokens/s -DEBUG 06-24 20:00:03 [stats.py:37] Avg generate tokens throughput: 154.212 tokens/s -INFO 06-24 20:00:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 token used ratio: 0.4295998047828209 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:04 [manager.py:248] dp_i 0 token used ratio: 0.94466813079551 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 token used ratio: 0.441800878477306 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:05 [manager.py:248] dp_i 0 token used ratio: 0.9568692044899951 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 token used ratio: 0.4540019521717911 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:07 [manager.py:248] dp_i 0 token used ratio: 0.9690702781844802 contain prompt cache tree unrefed token -INFO 06-24 20:00:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 token used ratio: 0.4662030258662762 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:08 [manager.py:248] dp_i 0 token used ratio: 0.9812713518789653 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 token used ratio: 0.47840409956076135 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:10 [manager.py:248] dp_i 0 token used ratio: 0.9934724255734505 contain prompt cache tree unrefed token -INFO 06-24 20:00:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 token used ratio: 0.49060517325524644 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:12 [manager.py:248] dp_i 0 token used ratio: 0.9414958516349439 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 122.406 tokens/s -DEBUG 06-24 20:00:13 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:00:13 [stats.py:37] Avg generate tokens throughput: 122.406 tokens/s -INFO 06-24 20:00:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 token used ratio: 0.5028062469497315 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:14 [manager.py:248] dp_i 0 token used ratio: 0.953696925329429 contain prompt cache tree unrefed token -INFO 06-24 20:00:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 token used ratio: 0.5150073206442167 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:16 [manager.py:248] dp_i 0 token used ratio: 0.9658979990239142 contain prompt cache tree unrefed token -INFO 06-24 20:00:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 token used ratio: 0.5272083943387018 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:19 [manager.py:248] dp_i 0 token used ratio: 0.9780990727183992 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 token used ratio: 0.5394094680331869 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:21 [manager.py:248] dp_i 0 token used ratio: 0.9903001464128843 contain prompt cache tree unrefed token -INFO 06-24 20:00:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:00:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:00:23 [statics_utils.py:24] mean first cost: 204.02849421781653 ms -INFO 06-24 20:00:23 [statics_utils.py:24] mean per token cost: 0.0209808349609375 ms -DEBUG 06-24 20:00:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 89.890 tokens/s -DEBUG 06-24 20:00:23 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:00:23 [stats.py:37] Avg generate tokens throughput: 89.890 tokens/s -DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 token used ratio: 0.551610541727672 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:23 [manager.py:248] dp_i 0 token used ratio: 0.9379575402635432 contain prompt cache tree unrefed token -INFO 06-24 20:00:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 token used ratio: 0.5638116154221572 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:25 [manager.py:248] dp_i 0 token used ratio: 0.9501586139580283 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 token used ratio: 0.5760126891166423 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:28 [manager.py:248] dp_i 0 token used ratio: 0.9623596876525135 contain prompt cache tree unrefed token -INFO 06-24 20:00:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 token used ratio: 0.5882137628111274 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:30 [manager.py:248] dp_i 0 token used ratio: 0.9745607613469985 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 current batch size: 4 -DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 token used ratio: 0.6004148365056124 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:32 [manager.py:248] dp_i 0 token used ratio: 0.9867618350414836 contain prompt cache tree unrefed token -INFO 06-24 20:00:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:00:32 [manager.py:162] detoken release req id 280 -INFO 06-24 20:00:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:280 first_token_cost:231.58621788024902ms total_cost_time:45849.95889663696ms,out_token_counter:1378 mean_per_token_cost_time: 33.104769723335785ms prompt_token_num:1059 prompt_cache_len:1058 prompt_cache_ratio:0.9990557129367328 mtp_avg_token_per_step:1.0 -INFO 06-24 20:00:32 [batch.py:51] router release req id 280 -INFO 06-24 20:00:33 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 912 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 20:00:33 [decode_infer_rpyc.py:75] req_id: id: 912 in_len:1058 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] radix refed token num 3162 -DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] radix hold token num 11931 -DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] mem manager can alloc token num 138 -DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 20:00:33 [decode_infer_rpyc.py:80] -INFO 06-24 20:00:33 [decode_kv_move_manager.py:273] req id 912 request_data_transfer fail, server is busy -DEBUG 06-24 20:00:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 92.605 tokens/s -DEBUG 06-24 20:00:33 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:00:33 [stats.py:37] Avg generate tokens throughput: 92.605 tokens/s -DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 token used ratio: 0.46193265007320644 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:34 [manager.py:248] dp_i 0 token used ratio: 0.9968887262079063 contain prompt cache tree unrefed token -INFO 06-24 20:00:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 token used ratio: 0.47108345534407026 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:36 [manager.py:248] dp_i 0 token used ratio: 0.9411908247925818 contain prompt cache tree unrefed token -DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 token used ratio: 0.4802342606149341 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:38 [manager.py:248] dp_i 0 token used ratio: 0.9503416300634456 contain prompt cache tree unrefed token -INFO 06-24 20:00:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 token used ratio: 0.48938506588579794 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:40 [manager.py:248] dp_i 0 token used ratio: 0.9594924353343094 contain prompt cache tree unrefed token -WARNING 06-24 20:00:40 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:00:41 [decode_kv_move_manager.py:241] exposed_request_data_transfer in id: 920 in_len:1045 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df, type -INFO 06-24 20:00:41 [decode_infer_rpyc.py:75] req_id: id: 920 in_len:1045 v_len: None move_len: None dp_index:None connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df alloc token failed -DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] radix refed token num 3162 -DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] radix hold token num 10868 -DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] mem manager can alloc token num 637 -DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] mem manager total size 16392frozened token num 0 -DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] estimated peak token num 15243 -DEBUG 06-24 20:00:41 [decode_infer_rpyc.py:80] -INFO 06-24 20:00:41 [decode_kv_move_manager.py:273] req id 920 request_data_transfer fail, server is busy -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:00:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 token used ratio: 0.4985358711566618 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:43 [manager.py:248] dp_i 0 token used ratio: 0.9686432406051733 contain prompt cache tree unrefed token -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 69.053 tokens/s -DEBUG 06-24 20:00:43 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:00:43 [stats.py:37] Avg generate tokens throughput: 69.053 tokens/s -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 token used ratio: 0.5076866764275256 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:45 [manager.py:248] dp_i 0 token used ratio: 0.9777940458760371 contain prompt cache tree unrefed token -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:00:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:00:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 token used ratio: 0.5168374816983895 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:48 [manager.py:248] dp_i 0 token used ratio: 0.9869448511469009 contain prompt cache tree unrefed token -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:00:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 token used ratio: 0.5259882869692533 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:50 [manager.py:248] dp_i 0 token used ratio: 0.9960956564177648 contain prompt cache tree unrefed token -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 token used ratio: 0.5351390922401171 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:53 [manager.py:248] dp_i 0 token used ratio: 0.9414348462664714 contain prompt cache tree unrefed token -INFO 06-24 20:00:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:00:53 [statics_utils.py:24] mean first cost: 204.02849421781653 ms -INFO 06-24 20:00:53 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms -INFO 06-24 20:00:53 [manager.py:620] left req id 8can release False refcount 4 -INFO 06-24 20:00:53 [manager.py:620] left req id 120can release False refcount 4 -INFO 06-24 20:00:53 [manager.py:620] left req id 400can release False refcount 4 -INFO 06-24 20:00:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 59.813 tokens/s -DEBUG 06-24 20:00:53 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:00:53 [stats.py:37] Avg generate tokens throughput: 59.813 tokens/s -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 token used ratio: 0.544289897510981 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:55 [manager.py:248] dp_i 0 token used ratio: 0.9505856515373353 contain prompt cache tree unrefed token -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:00:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 token used ratio: 0.5534407027818448 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:57 [manager.py:248] dp_i 0 token used ratio: 0.9597364568081991 contain prompt cache tree unrefed token -WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:58 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 token used ratio: 0.5625915080527086 not contain prompt cache tree unrefed token -DEBUG 06-24 20:00:59 [manager.py:248] dp_i 0 token used ratio: 0.968887262079063 contain prompt cache tree unrefed token -INFO 06-24 20:01:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 token used ratio: 0.5717423133235725 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:01 [manager.py:248] dp_i 0 token used ratio: 0.9780380673499268 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 68.165 tokens/s -DEBUG 06-24 20:01:03 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:01:03 [stats.py:37] Avg generate tokens throughput: 68.165 tokens/s -INFO 06-24 20:01:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 token used ratio: 0.5808931185944363 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:04 [manager.py:248] dp_i 0 token used ratio: 0.9871888726207906 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 token used ratio: 0.5900439238653001 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:06 [manager.py:248] dp_i 0 token used ratio: 0.9963396778916545 contain prompt cache tree unrefed token -INFO 06-24 20:01:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 token used ratio: 0.599194729136164 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:08 [manager.py:248] dp_i 0 token used ratio: 0.94064177647633 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 token used ratio: 0.6083455344070278 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:10 [manager.py:248] dp_i 0 token used ratio: 0.9497925817471937 contain prompt cache tree unrefed token -INFO 06-24 20:01:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 token used ratio: 0.6174963396778916 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:12 [manager.py:248] dp_i 0 token used ratio: 0.9589433870180576 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 69.460 tokens/s -DEBUG 06-24 20:01:13 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:01:13 [stats.py:37] Avg generate tokens throughput: 69.460 tokens/s -INFO 06-24 20:01:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 token used ratio: 0.6266471449487555 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:14 [manager.py:248] dp_i 0 token used ratio: 0.9680941922889215 contain prompt cache tree unrefed token -INFO 06-24 20:01:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 token used ratio: 0.6357979502196194 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:16 [manager.py:248] dp_i 0 token used ratio: 0.9772449975597852 contain prompt cache tree unrefed token -INFO 06-24 20:01:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 token used ratio: 0.6449487554904831 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:18 [manager.py:248] dp_i 0 token used ratio: 0.9863958028306491 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 token used ratio: 0.654099560761347 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:21 [manager.py:248] dp_i 0 token used ratio: 0.995546608101513 contain prompt cache tree unrefed token -INFO 06-24 20:01:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 token used ratio: 0.6632503660322109 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:23 [manager.py:248] dp_i 0 token used ratio: 0.9403977550024402 contain prompt cache tree unrefed token -INFO 06-24 20:01:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:01:23 [statics_utils.py:24] mean first cost: 204.02849421781653 ms -INFO 06-24 20:01:23 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms -DEBUG 06-24 20:01:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 72.525 tokens/s -DEBUG 06-24 20:01:23 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:01:23 [stats.py:37] Avg generate tokens throughput: 72.525 tokens/s -INFO 06-24 20:01:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 token used ratio: 0.6724011713030746 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:25 [manager.py:248] dp_i 0 token used ratio: 0.949548560273304 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 token used ratio: 0.6815519765739385 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:27 [manager.py:248] dp_i 0 token used ratio: 0.9586993655441679 contain prompt cache tree unrefed token -INFO 06-24 20:01:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 token used ratio: 0.6907027818448024 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:29 [manager.py:248] dp_i 0 token used ratio: 0.9678501708150318 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 token used ratio: 0.6998535871156661 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:31 [manager.py:248] dp_i 0 token used ratio: 0.9770009760858955 contain prompt cache tree unrefed token -INFO 06-24 20:01:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:01:33 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 token used ratio: 0.70900439238653 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:33 [manager.py:248] dp_i 0 token used ratio: 0.9861517813567594 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 73.637 tokens/s -DEBUG 06-24 20:01:33 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:01:33 [stats.py:37] Avg generate tokens throughput: 73.637 tokens/s -DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 token used ratio: 0.7181551976573939 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:35 [manager.py:248] dp_i 0 token used ratio: 0.9953025866276233 contain prompt cache tree unrefed token -INFO 06-24 20:01:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 token used ratio: 0.7273060029282576 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:37 [manager.py:248] dp_i 0 token used ratio: 0.9399707174231332 contain prompt cache tree unrefed token -INFO 06-24 20:01:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 token used ratio: 0.7364568081991215 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:39 [manager.py:248] dp_i 0 token used ratio: 0.9491215226939971 contain prompt cache tree unrefed token -WARNING 06-24 20:01:41 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 token used ratio: 0.7456076134699854 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:41 [manager.py:248] dp_i 0 token used ratio: 0.958272327964861 contain prompt cache tree unrefed token -INFO 06-24 20:01:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 token used ratio: 0.7547584187408491 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:43 [manager.py:248] dp_i 0 token used ratio: 0.9674231332357247 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 73.200 tokens/s -DEBUG 06-24 20:01:43 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:01:43 [stats.py:37] Avg generate tokens throughput: 73.200 tokens/s -DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 token used ratio: 0.763909224011713 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:45 [manager.py:248] dp_i 0 token used ratio: 0.9765739385065886 contain prompt cache tree unrefed token -INFO 06-24 20:01:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:01:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 token used ratio: 0.7730600292825769 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:47 [manager.py:248] dp_i 0 token used ratio: 0.9857247437774525 contain prompt cache tree unrefed token -INFO 06-24 20:01:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 token used ratio: 0.7822108345534406 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:49 [manager.py:248] dp_i 0 token used ratio: 0.9948755490483162 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 token used ratio: 0.7913616398243045 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:51 [manager.py:248] dp_i 0 token used ratio: 0.9399707174231332 contain prompt cache tree unrefed token -INFO 06-24 20:01:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:01:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:01:53 [statics_utils.py:24] mean first cost: 204.02849421781653 ms -INFO 06-24 20:01:53 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms -DEBUG 06-24 20:01:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 73.341 tokens/s -DEBUG 06-24 20:01:53 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:01:53 [stats.py:37] Avg generate tokens throughput: 73.341 tokens/s -DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 token used ratio: 0.8005124450951684 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:53 [manager.py:248] dp_i 0 token used ratio: 0.9491215226939971 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 token used ratio: 0.8096632503660323 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:55 [manager.py:248] dp_i 0 token used ratio: 0.958272327964861 contain prompt cache tree unrefed token -INFO 06-24 20:01:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 token used ratio: 0.818814055636896 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:57 [manager.py:248] dp_i 0 token used ratio: 0.9674231332357247 contain prompt cache tree unrefed token -DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 token used ratio: 0.8279648609077599 not contain prompt cache tree unrefed token -DEBUG 06-24 20:01:59 [manager.py:248] dp_i 0 token used ratio: 0.9765739385065886 contain prompt cache tree unrefed token -INFO 06-24 20:02:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 token used ratio: 0.8371156661786238 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:02 [manager.py:248] dp_i 0 token used ratio: 0.9857247437774525 contain prompt cache tree unrefed token -INFO 06-24 20:02:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:02:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 70.168 tokens/s -DEBUG 06-24 20:02:03 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:02:03 [stats.py:37] Avg generate tokens throughput: 70.168 tokens/s -DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 token used ratio: 0.8462664714494875 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:04 [manager.py:248] dp_i 0 token used ratio: 0.9948755490483162 contain prompt cache tree unrefed token -DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 token used ratio: 0.8554172767203514 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:07 [manager.py:248] dp_i 0 token used ratio: 0.9199609565641776 contain prompt cache tree unrefed token -INFO 06-24 20:02:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 token used ratio: 0.8645680819912153 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:09 [manager.py:248] dp_i 0 token used ratio: 0.9291117618350415 contain prompt cache tree unrefed token -INFO 06-24 20:02:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 token used ratio: 0.873718887262079 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:11 [manager.py:248] dp_i 0 token used ratio: 0.9382625671059053 contain prompt cache tree unrefed token -DEBUG 06-24 20:02:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 64.428 tokens/s -DEBUG 06-24 20:02:13 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:02:13 [stats.py:37] Avg generate tokens throughput: 64.428 tokens/s -DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 token used ratio: 0.8828696925329429 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:13 [manager.py:248] dp_i 0 token used ratio: 0.9474133723767691 contain prompt cache tree unrefed token -INFO 06-24 20:02:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 token used ratio: 0.8920204978038068 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:16 [manager.py:248] dp_i 0 token used ratio: 0.956564177647633 contain prompt cache tree unrefed token -INFO 06-24 20:02:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 token used ratio: 0.9011713030746705 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:19 [manager.py:248] dp_i 0 token used ratio: 0.9657149829184968 contain prompt cache tree unrefed token -INFO 06-24 20:02:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 token used ratio: 0.9103221083455344 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:21 [manager.py:248] dp_i 0 token used ratio: 0.9748657881893606 contain prompt cache tree unrefed token -INFO 06-24 20:02:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:02:23 [statics_utils.py:24] mean first cost: 204.02849421781653 ms -INFO 06-24 20:02:23 [statics_utils.py:24] mean per token cost: 2.384108612701998 ms -DEBUG 06-24 20:02:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 57.594 tokens/s -DEBUG 06-24 20:02:23 [stats.py:37] Avg prompt tokens throughput: 0.000 tokens/s -DEBUG 06-24 20:02:23 [stats.py:37] Avg generate tokens throughput: 57.594 tokens/s -DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 token used ratio: 0.9194729136163983 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:24 [manager.py:248] dp_i 0 token used ratio: 0.9840165934602245 contain prompt cache tree unrefed token -INFO 06-24 20:02:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:8 first_token_cost:273.547887802124ms total_cost_time:163344.9625968933ms,out_token_counter:4096 mean_per_token_cost_time: 39.812357106711715ms prompt_token_num:1056 prompt_cache_len:1055 prompt_cache_ratio:0.9990530303030303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:26 lightllm_req_id:8 -WARNING 06-24 20:02:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_4 and create again -INFO 06-24 20:02:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_4 -WARNING 06-24 20:02:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_4 and create again -INFO 06-24 20:02:26 [shm_array.py:30] create shm 12322_0_shm_prompts_4 -INFO 06-24 20:02:26 [batch.py:51] router release req id 8 -DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 15243 -DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 token used ratio: 0.6143240605173256 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:26 [manager.py:248] dp_i 0 token used ratio: 0.9931063933626159 contain prompt cache tree unrefed token -INFO 06-24 20:02:26 [manager.py:224] router recive req id 8 cost time 0.15075016021728516 s -INFO 06-24 20:02:26 [manager.py:68] detokenization recv req id 8 cost time 0.15286540985107422 s -DEBUG 06-24 20:02:26 [manager.py:391] Prefill Batch: batch_id=58698570276155524760032194803622252344, time:1750766546.9490788s req_ids:[8] -DEBUG 06-24 20:02:26 [manager.py:391] -ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:27 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:26 lightllm_req_id:8 first_token_cost:295.5002784729004ms total_cost_time:295.53794860839844ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5152 prompt_cache_len:5151 prompt_cache_ratio:0.999805900621118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:120 first_token_cost:219.25711631774902ms total_cost_time:163406.2693119049ms,out_token_counter:4096 mean_per_token_cost_time: 39.84057914931327ms prompt_token_num:1059 prompt_cache_len:1058 prompt_cache_ratio:0.9990557129367328 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 -WARNING 06-24 20:02:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_3 and create again -INFO 06-24 20:02:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_3 -WARNING 06-24 20:02:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_3 and create again -INFO 06-24 20:02:27 [shm_array.py:30] create shm 12322_0_shm_prompts_3 -INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 -INFO 06-24 20:02:27 [batch.py:51] router release req id 120 -INFO 06-24 20:02:27 [batch.py:51] router release req id 8 -INFO 06-24 20:02:27 [manager.py:224] router recive req id 8 cost time 0.18331217765808105 s -INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 8 cost time 0.18489503860473633 s -INFO 06-24 20:02:27 [manager.py:224] router recive req id 120 cost time 0.25066685676574707 s -DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=55151820991519834530335084348763236391, time:1750766547.2862108s req_ids:[8] -DEBUG 06-24 20:02:27 [manager.py:391] -INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 120 cost time 0.2523775100708008 s -ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=324595234068718905327107771453927268720, time:1750766547.352423s req_ids:[120] -DEBUG 06-24 20:02:27 [manager.py:391] -INFO 06-24 20:02:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 first_token_cost:327.23283767700195ms total_cost_time:327.27575302124023ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5153 prompt_cache_len:5151 prompt_cache_ratio:0.9996118765767514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 -ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:27 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:27 [batch.py:51] router release req id 8 -INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 first_token_cost:410.9804630279541ms total_cost_time:411.0243320465088ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5155 prompt_cache_len:5154 prompt_cache_ratio:0.9998060135790494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 -INFO 06-24 20:02:27 [batch.py:51] router release req id 120 -INFO 06-24 20:02:27 [manager.py:224] router recive req id 8 cost time 0.18894100189208984 s -INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 8 cost time 0.19057464599609375 s -INFO 06-24 20:02:27 [manager.py:224] router recive req id 120 cost time 0.17214608192443848 s -DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=177490213761783962427997777087416007060, time:1750766547.6244812s req_ids:[8] -DEBUG 06-24 20:02:27 [manager.py:391] -INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 120 cost time 0.17384767532348633 s -ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=307868029576368645239940015534616110336, time:1750766547.6910472s req_ids:[120] -DEBUG 06-24 20:02:27 [manager.py:391] -INFO 06-24 20:02:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 first_token_cost:332.98230171203613ms total_cost_time:333.0240249633789ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5154 prompt_cache_len:5151 prompt_cache_ratio:0.9994179278230501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 -ERROR 06-24 20:02:27 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:27 [batch.py:51] router release req id 8 -INFO 06-24 20:02:27 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 first_token_cost:334.6529006958008ms total_cost_time:334.69367027282715ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5156 prompt_cache_len:5154 prompt_cache_ratio:0.9996121024049651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 -INFO 06-24 20:02:27 [batch.py:51] router release req id 120 -INFO 06-24 20:02:27 [manager.py:224] router recive req id 8 cost time 0.18998241424560547 s -INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 8 cost time 0.1916346549987793 s -INFO 06-24 20:02:27 [manager.py:224] router recive req id 120 cost time 0.17139911651611328 s -DEBUG 06-24 20:02:27 [manager.py:391] Prefill Batch: batch_id=148107591610778313726679120311886011716, time:1750766547.96445s req_ids:[8] -DEBUG 06-24 20:02:27 [manager.py:391] -INFO 06-24 20:02:27 [manager.py:68] detokenization recv req id 120 cost time 0.17303133010864258 s -ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=191872871025326045705711332854194569796, time:1750766548.0315557s req_ids:[120] -DEBUG 06-24 20:02:28 [manager.py:391] -INFO 06-24 20:02:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:8 first_token_cost:334.2723846435547ms total_cost_time:334.31410789489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5155 prompt_cache_len:5151 prompt_cache_ratio:0.9992240543161979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 -ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:28 [batch.py:51] router release req id 8 -INFO 06-24 20:02:28 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:27 lightllm_req_id:120 first_token_cost:334.1801166534424ms total_cost_time:334.22327041625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5157 prompt_cache_len:5154 prompt_cache_ratio:0.9994182664339732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 -INFO 06-24 20:02:28 [batch.py:51] router release req id 120 -INFO 06-24 20:02:28 [manager.py:224] router recive req id 8 cost time 0.19146418571472168 s -INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 8 cost time 0.19313263893127441 s -INFO 06-24 20:02:28 [manager.py:224] router recive req id 120 cost time 0.1751861572265625 s -DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=19951010786248747427721704446616503021, time:1750766548.3064108s req_ids:[8] -DEBUG 06-24 20:02:28 [manager.py:391] -INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 120 cost time 0.17684268951416016 s -ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=9544498945230240798969547345878200462, time:1750766548.3725064s req_ids:[120] -DEBUG 06-24 20:02:28 [manager.py:391] -INFO 06-24 20:02:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 first_token_cost:335.15071868896484ms total_cost_time:335.1907730102539ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5156 prompt_cache_len:5151 prompt_cache_ratio:0.9990302560124127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 -ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:28 [batch.py:51] router release req id 8 -INFO 06-24 20:02:28 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 first_token_cost:337.04614639282227ms total_cost_time:337.10575103759766ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5158 prompt_cache_len:5154 prompt_cache_ratio:0.9992245056223342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 -INFO 06-24 20:02:28 [batch.py:51] router release req id 120 -INFO 06-24 20:02:28 [manager.py:224] router recive req id 8 cost time 0.18924427032470703 s -INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 8 cost time 0.19082164764404297 s -INFO 06-24 20:02:28 [manager.py:224] router recive req id 120 cost time 0.16993260383605957 s -DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=206301375493748779292062980281842267970, time:1750766548.645843s req_ids:[8] -DEBUG 06-24 20:02:28 [manager.py:391] -INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 120 cost time 0.17154550552368164 s -ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=258367600656908120183520187698952271172, time:1750766548.711979s req_ids:[120] -DEBUG 06-24 20:02:28 [manager.py:391] -INFO 06-24 20:02:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 first_token_cost:333.9109420776367ms total_cost_time:333.9529037475586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5157 prompt_cache_len:5151 prompt_cache_ratio:0.9988365328679465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 -ERROR 06-24 20:02:28 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:28 [batch.py:51] router release req id 8 -INFO 06-24 20:02:28 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 first_token_cost:330.7986259460449ms total_cost_time:330.8393955230713ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5159 prompt_cache_len:5154 prompt_cache_ratio:0.9990308199263424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 -INFO 06-24 20:02:28 [batch.py:51] router release req id 120 -DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 10505 -DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 token used ratio: 0.3026476329917033 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:28 [manager.py:248] dp_i 0 token used ratio: 0.9958516349438751 contain prompt cache tree unrefed token -INFO 06-24 20:02:28 [manager.py:224] router recive req id 8 cost time 0.19005727767944336 s -INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 8 cost time 0.1916353702545166 s -INFO 06-24 20:02:28 [manager.py:224] router recive req id 120 cost time 0.17285466194152832 s -DEBUG 06-24 20:02:28 [manager.py:391] Prefill Batch: batch_id=130994044296742924544660401690901123193, time:1750766548.985329s req_ids:[8] -DEBUG 06-24 20:02:28 [manager.py:391] -INFO 06-24 20:02:28 [manager.py:68] detokenization recv req id 120 cost time 0.174422025680542 s -ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=256343030381444045174742585450945114861, time:1750766549.0520992s req_ids:[120] -DEBUG 06-24 20:02:29 [manager.py:391] -INFO 06-24 20:02:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:8 first_token_cost:334.6133232116699ms total_cost_time:334.6571922302246ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5158 prompt_cache_len:5151 prompt_cache_ratio:0.9986428848390849 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 -ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:29 [batch.py:51] router release req id 8 -INFO 06-24 20:02:29 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:28 lightllm_req_id:120 first_token_cost:566.0884380340576ms total_cost_time:566.131591796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5160 prompt_cache_len:5154 prompt_cache_ratio:0.9988372093023256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 -INFO 06-24 20:02:29 [batch.py:51] router release req id 120 -INFO 06-24 20:02:29 [manager.py:224] router recive req id 8 cost time 0.42057371139526367 s -INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 8 cost time 0.42220330238342285 s -INFO 06-24 20:02:29 [manager.py:224] router recive req id 120 cost time 0.16965055465698242 s -DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=320710000408154836108755850928504862051, time:1750766549.5546157s req_ids:[8] -DEBUG 06-24 20:02:29 [manager.py:391] -INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 120 cost time 0.17153406143188477 s -ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=279463898450996377852457583534159633084, time:1750766549.6245277s req_ids:[120] -DEBUG 06-24 20:02:29 [manager.py:391] -INFO 06-24 20:02:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 first_token_cost:566.643476486206ms total_cost_time:566.6866302490234ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5159 prompt_cache_len:5151 prompt_cache_ratio:0.9984493118821477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 -ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:29 [batch.py:51] router release req id 8 -INFO 06-24 20:02:29 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 first_token_cost:334.5487117767334ms total_cost_time:334.60474014282227ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:5161 prompt_cache_len:5154 prompt_cache_ratio:0.998643673706646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 -INFO 06-24 20:02:29 [batch.py:51] router release req id 120 -INFO 06-24 20:02:29 [manager.py:224] router recive req id 8 cost time 0.19302701950073242 s -INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 8 cost time 0.19452691078186035 s -INFO 06-24 20:02:29 [manager.py:224] router recive req id 120 cost time 0.17338347434997559 s -DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=221046665309317463906105869468329545501, time:1750766549.8992717s req_ids:[8] -DEBUG 06-24 20:02:29 [manager.py:391] -INFO 06-24 20:02:29 [manager.py:68] detokenization recv req id 120 cost time 0.17486906051635742 s -ERROR 06-24 20:02:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:29 [manager.py:391] Prefill Batch: batch_id=43310256160888501956184753382885587604, time:1750766549.9690757s req_ids:[120] -DEBUG 06-24 20:02:29 [manager.py:391] -INFO 06-24 20:02:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 first_token_cost:338.1624221801758ms total_cost_time:338.20295333862305ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:5160 prompt_cache_len:5151 prompt_cache_ratio:0.9982558139534884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 -ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:30 [batch.py:51] router release req id 8 -INFO 06-24 20:02:30 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:120 first_token_cost:342.17357635498047ms total_cost_time:342.21434593200684ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5162 prompt_cache_len:5154 prompt_cache_ratio:0.9984502130956994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 -INFO 06-24 20:02:30 [batch.py:51] router release req id 120 -INFO 06-24 20:02:30 [manager.py:224] router recive req id 8 cost time 0.19750738143920898 s -INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 8 cost time 0.19896626472473145 s -INFO 06-24 20:02:30 [manager.py:224] router recive req id 120 cost time 0.18012619018554688 s -DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=233065189596150499436936420445503785771, time:1750766550.25376s req_ids:[8] -DEBUG 06-24 20:02:30 [manager.py:391] -INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 120 cost time 0.18140125274658203 s -ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=57186440482347288099027679364484160941, time:1750766550.3238456s req_ids:[120] -DEBUG 06-24 20:02:30 [manager.py:391] -INFO 06-24 20:02:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:29 lightllm_req_id:8 first_token_cost:349.43580627441406ms total_cost_time:349.48039054870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5161 prompt_cache_len:5151 prompt_cache_ratio:0.9980623910094942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 -ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:30 [batch.py:51] router release req id 8 -INFO 06-24 20:02:30 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 first_token_cost:349.84445571899414ms total_cost_time:349.8857021331787ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5163 prompt_cache_len:5154 prompt_cache_ratio:0.9982568274259151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 -INFO 06-24 20:02:30 [batch.py:51] router release req id 120 -INFO 06-24 20:02:30 [manager.py:224] router recive req id 8 cost time 0.19885563850402832 s -INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 8 cost time 0.20014476776123047 s -INFO 06-24 20:02:30 [manager.py:224] router recive req id 120 cost time 0.18237876892089844 s -DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=325166414549089171055933772292823555424, time:1750766550.6116781s req_ids:[8] -DEBUG 06-24 20:02:30 [manager.py:391] -INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 120 cost time 0.1838054656982422 s -ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=182687286152867624407180658577576079028, time:1750766550.6806443s req_ids:[120] -DEBUG 06-24 20:02:30 [manager.py:391] -INFO 06-24 20:02:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 first_token_cost:349.63130950927734ms total_cost_time:349.6527671813965ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:5162 prompt_cache_len:5151 prompt_cache_ratio:0.9978690430065866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 -ERROR 06-24 20:02:30 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:30 [batch.py:51] router release req id 8 -INFO 06-24 20:02:30 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 first_token_cost:351.529598236084ms total_cost_time:351.58514976501465ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:5164 prompt_cache_len:5154 prompt_cache_ratio:0.9980635166537568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 -INFO 06-24 20:02:30 [batch.py:51] router release req id 120 -INFO 06-24 20:02:30 [manager.py:224] router recive req id 8 cost time 0.2012631893157959 s -INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 8 cost time 0.2022998332977295 s -INFO 06-24 20:02:30 [manager.py:224] router recive req id 120 cost time 0.17996978759765625 s -DEBUG 06-24 20:02:30 [manager.py:391] Prefill Batch: batch_id=181692654978745119352071937201076676150, time:1750766550.966496s req_ids:[8] -DEBUG 06-24 20:02:30 [manager.py:391] -INFO 06-24 20:02:30 [manager.py:68] detokenization recv req id 120 cost time 0.18112778663635254 s -ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=137537163284179342353635003783964228096, time:1750766551.0392952s req_ids:[120] -DEBUG 06-24 20:02:31 [manager.py:391] -INFO 06-24 20:02:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:8 first_token_cost:353.787899017334ms total_cost_time:353.82986068725586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5163 prompt_cache_len:5151 prompt_cache_ratio:0.9976757699012202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 -ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:31 [batch.py:51] router release req id 8 -INFO 06-24 20:02:31 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:30 lightllm_req_id:120 first_token_cost:352.89525985717773ms total_cost_time:352.9384136199951ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5165 prompt_cache_len:5154 prompt_cache_ratio:0.9978702807357211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 -INFO 06-24 20:02:31 [batch.py:51] router release req id 120 -INFO 06-24 20:02:31 [manager.py:224] router recive req id 8 cost time 0.1950840950012207 s -INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 8 cost time 0.19604825973510742 s -DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 5153 -DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 token used ratio: 0.3049658369936554 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:31 [manager.py:248] dp_i 0 token used ratio: 0.9981698389458272 contain prompt cache tree unrefed token -INFO 06-24 20:02:31 [manager.py:224] router recive req id 120 cost time 0.17209434509277344 s -DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=197006888500978337676448670582637360287, time:1750766551.3175275s req_ids:[8] -DEBUG 06-24 20:02:31 [manager.py:391] -INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 120 cost time 0.1730031967163086 s -ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=11670169896892292318513132832919621299, time:1750766551.3842564s req_ids:[120] -DEBUG 06-24 20:02:31 [manager.py:391] -INFO 06-24 20:02:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 first_token_cost:338.67692947387695ms total_cost_time:338.7181758880615ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5164 prompt_cache_len:5151 prompt_cache_ratio:0.9974825716498839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 -ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:31 [batch.py:51] router release req id 8 -INFO 06-24 20:02:31 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 first_token_cost:334.4714641571045ms total_cost_time:334.51390266418457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5166 prompt_cache_len:5154 prompt_cache_ratio:0.9976771196283392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 -INFO 06-24 20:02:31 [batch.py:51] router release req id 120 -INFO 06-24 20:02:31 [manager.py:224] router recive req id 8 cost time 0.19119596481323242 s -INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 8 cost time 0.19215726852416992 s -INFO 06-24 20:02:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:31 [manager.py:224] router recive req id 120 cost time 0.17340993881225586 s -DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=120668157758041636555107115534557259494, time:1750766551.658939s req_ids:[8] -DEBUG 06-24 20:02:31 [manager.py:391] -INFO 06-24 20:02:31 [manager.py:68] detokenization recv req id 120 cost time 0.17439556121826172 s -ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:31 [manager.py:391] Prefill Batch: batch_id=14423423920378056212830746976957857580, time:1750766551.7268872s req_ids:[120] -DEBUG 06-24 20:02:31 [manager.py:391] -INFO 06-24 20:02:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 first_token_cost:337.3236656188965ms total_cost_time:337.3675346374512ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5165 prompt_cache_len:5151 prompt_cache_ratio:0.9972894482090997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 -ERROR 06-24 20:02:31 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:31 [batch.py:51] router release req id 8 -INFO 06-24 20:02:31 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 first_token_cost:500.0150203704834ms total_cost_time:500.0574588775635ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5167 prompt_cache_len:5154 prompt_cache_ratio:0.9974840332881749 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 -INFO 06-24 20:02:32 [batch.py:51] router release req id 120 -INFO 06-24 20:02:32 [manager.py:224] router recive req id 8 cost time 0.35269975662231445 s -INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 8 cost time 0.35370349884033203 s -INFO 06-24 20:02:32 [manager.py:224] router recive req id 120 cost time 0.17232894897460938 s -DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=179823136526523305886515440313213155976, time:1750766552.163929s req_ids:[8] -DEBUG 06-24 20:02:32 [manager.py:391] -INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 120 cost time 0.17338347434997559 s -ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=88046096357712232782403390132352545457, time:1750766552.2301753s req_ids:[120] -DEBUG 06-24 20:02:32 [manager.py:391] -INFO 06-24 20:02:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:8 first_token_cost:497.23196029663086ms total_cost_time:497.27463722229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5166 prompt_cache_len:5151 prompt_cache_ratio:0.997096399535424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 -ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:32 [batch.py:51] router release req id 8 -INFO 06-24 20:02:32 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:31 lightllm_req_id:120 first_token_cost:333.2059383392334ms total_cost_time:333.2488536834717ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5168 prompt_cache_len:5154 prompt_cache_ratio:0.9972910216718266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 -INFO 06-24 20:02:32 [batch.py:51] router release req id 120 -INFO 06-24 20:02:32 [manager.py:224] router recive req id 8 cost time 0.18969106674194336 s -INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 8 cost time 0.19072461128234863 s -INFO 06-24 20:02:32 [manager.py:224] router recive req id 120 cost time 0.1734631061553955 s -DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=304560050187529194552626759112820445802, time:1750766552.5041142s req_ids:[8] -DEBUG 06-24 20:02:32 [manager.py:391] -INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 120 cost time 0.17458558082580566 s -ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=70854333583937847052961646541117977776, time:1750766552.5711844s req_ids:[120] -DEBUG 06-24 20:02:32 [manager.py:391] -INFO 06-24 20:02:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 first_token_cost:335.3080749511719ms total_cost_time:335.35122871398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5167 prompt_cache_len:5151 prompt_cache_ratio:0.9969034255854461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 -ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:32 [batch.py:51] router release req id 8 -INFO 06-24 20:02:32 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 first_token_cost:339.2155170440674ms total_cost_time:339.25747871398926ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5169 prompt_cache_len:5154 prompt_cache_ratio:0.9970980847359258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 -INFO 06-24 20:02:32 [batch.py:51] router release req id 120 -INFO 06-24 20:02:32 [manager.py:224] router recive req id 8 cost time 0.19597721099853516 s -INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 8 cost time 0.1969306468963623 s -INFO 06-24 20:02:32 [manager.py:224] router recive req id 120 cost time 0.17893099784851074 s -DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=153692536721252447721302630558412660203, time:1750766552.8546917s req_ids:[8] -DEBUG 06-24 20:02:32 [manager.py:391] -INFO 06-24 20:02:32 [manager.py:68] detokenization recv req id 120 cost time 0.18005657196044922 s -ERROR 06-24 20:02:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:32 [manager.py:391] Prefill Batch: batch_id=325272125246867870141921304363947877800, time:1750766552.9278076s req_ids:[120] -DEBUG 06-24 20:02:32 [manager.py:391] -INFO 06-24 20:02:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 first_token_cost:351.01842880249023ms total_cost_time:351.0599136352539ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5168 prompt_cache_len:5151 prompt_cache_ratio:0.9967105263157895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 -ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:33 [batch.py:51] router release req id 8 -INFO 06-24 20:02:33 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:120 first_token_cost:350.34632682800293ms total_cost_time:350.4002094268799ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:5170 prompt_cache_len:5154 prompt_cache_ratio:0.9969052224371373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 -INFO 06-24 20:02:33 [batch.py:51] router release req id 120 -INFO 06-24 20:02:33 [manager.py:224] router recive req id 8 cost time 0.1944289207458496 s -INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 8 cost time 0.1954362392425537 s -INFO 06-24 20:02:33 [manager.py:224] router recive req id 120 cost time 0.17339372634887695 s -DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=292150644875981341720552266159506310504, time:1750766553.2059548s req_ids:[8] -DEBUG 06-24 20:02:33 [manager.py:391] -INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 120 cost time 0.17440032958984375 s -ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=30636783448264997668087792726228861062, time:1750766553.2731233s req_ids:[120] -DEBUG 06-24 20:02:33 [manager.py:391] -INFO 06-24 20:02:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:32 lightllm_req_id:8 first_token_cost:338.33932876586914ms total_cost_time:338.38415145874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5169 prompt_cache_len:5151 prompt_cache_ratio:0.9965177016831108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 -ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:33 [batch.py:51] router release req id 8 -INFO 06-24 20:02:33 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 first_token_cost:341.40872955322266ms total_cost_time:341.45116806030273ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5171 prompt_cache_len:5154 prompt_cache_ratio:0.9967124347321601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 -INFO 06-24 20:02:33 [batch.py:51] router release req id 120 -INFO 06-24 20:02:33 [manager.py:224] router recive req id 8 cost time 0.19852948188781738 s -INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 8 cost time 0.19960403442382812 s -INFO 06-24 20:02:33 [manager.py:224] router recive req id 120 cost time 0.1758863925933838 s -DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=284266565621990171334867632545315592012, time:1750766553.555611s req_ids:[8] -DEBUG 06-24 20:02:33 [manager.py:391] -DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 5153 -DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 token used ratio: 0.3072230356271352 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:33 [manager.py:248] dp_i 0 token used ratio: 0.9358833577354807 contain prompt cache tree unrefed token -INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 120 cost time 0.17690825462341309 s -ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=148365955361019506689171581896907311253, time:1750766553.6230311s req_ids:[120] -DEBUG 06-24 20:02:33 [manager.py:391] -INFO 06-24 20:02:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 first_token_cost:344.1734313964844ms total_cost_time:344.21753883361816ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5170 prompt_cache_len:5151 prompt_cache_ratio:0.9963249516441006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 -ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:33 [batch.py:51] router release req id 8 -INFO 06-24 20:02:33 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 first_token_cost:338.58776092529297ms total_cost_time:338.62948417663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5172 prompt_cache_len:5154 prompt_cache_ratio:0.9965197215777262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 -INFO 06-24 20:02:33 [batch.py:51] router release req id 120 -DEBUG 06-24 20:02:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 19104.743 tokens/s -DEBUG 06-24 20:02:33 [stats.py:37] Avg prompt tokens throughput: 19066.909 tokens/s -DEBUG 06-24 20:02:33 [stats.py:37] Avg generate tokens throughput: 37.834 tokens/s -INFO 06-24 20:02:33 [manager.py:224] router recive req id 8 cost time 0.19037938117980957 s -INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 8 cost time 0.19129514694213867 s -INFO 06-24 20:02:33 [manager.py:224] router recive req id 120 cost time 0.17192554473876953 s -DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=106275082060199716910361946551511747546, time:1750766553.8956792s req_ids:[8] -DEBUG 06-24 20:02:33 [manager.py:391] -INFO 06-24 20:02:33 [manager.py:68] detokenization recv req id 120 cost time 0.1729111671447754 s -ERROR 06-24 20:02:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:33 [manager.py:391] Prefill Batch: batch_id=116522267955794727148873301070590193805, time:1750766553.9633794s req_ids:[120] -DEBUG 06-24 20:02:33 [manager.py:391] -INFO 06-24 20:02:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 first_token_cost:334.67769622802734ms total_cost_time:334.72204208374023ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5171 prompt_cache_len:5151 prompt_cache_ratio:0.9961322761554825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 -ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:34 [batch.py:51] router release req id 8 -INFO 06-24 20:02:34 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:120 first_token_cost:335.0250720977783ms total_cost_time:335.0677490234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5173 prompt_cache_len:5154 prompt_cache_ratio:0.9963270829306012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 -INFO 06-24 20:02:34 [batch.py:51] router release req id 120 -INFO 06-24 20:02:34 [manager.py:224] router recive req id 8 cost time 0.190169095993042 s -INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 8 cost time 0.19116473197937012 s -INFO 06-24 20:02:34 [manager.py:224] router recive req id 120 cost time 0.17226386070251465 s -DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=216180060899162795446057214724923543950, time:1750766554.2366824s req_ids:[8] -DEBUG 06-24 20:02:34 [manager.py:391] -INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 120 cost time 0.17324280738830566 s -ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=133335284426750078538303677658081183383, time:1750766554.3066978s req_ids:[120] -DEBUG 06-24 20:02:34 [manager.py:391] -INFO 06-24 20:02:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:33 lightllm_req_id:8 first_token_cost:337.141752243042ms total_cost_time:337.18180656433105ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5172 prompt_cache_len:5151 prompt_cache_ratio:0.9959396751740139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 -ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:34 [batch.py:51] router release req id 8 -INFO 06-24 20:02:34 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 first_token_cost:499.93205070495605ms total_cost_time:499.97663497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5174 prompt_cache_len:5154 prompt_cache_ratio:0.9961345187475841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 -INFO 06-24 20:02:34 [batch.py:51] router release req id 120 -INFO 06-24 20:02:34 [manager.py:224] router recive req id 8 cost time 0.3507866859436035 s -INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 8 cost time 0.35181355476379395 s -INFO 06-24 20:02:34 [manager.py:224] router recive req id 120 cost time 0.16745638847351074 s -DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=328487710547830764985725990226098219099, time:1750766554.7383745s req_ids:[8] -DEBUG 06-24 20:02:34 [manager.py:391] -INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 120 cost time 0.16847538948059082 s -ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:34 [manager.py:391] Prefill Batch: batch_id=171092730690731262668778781094899037403, time:1750766554.8028111s req_ids:[120] -DEBUG 06-24 20:02:34 [manager.py:391] -INFO 06-24 20:02:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 first_token_cost:490.24319648742676ms total_cost_time:490.28682708740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5173 prompt_cache_len:5151 prompt_cache_ratio:0.9957471486564856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 -ERROR 06-24 20:02:34 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:34 [batch.py:51] router release req id 8 -INFO 06-24 20:02:34 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 first_token_cost:324.27501678466797ms total_cost_time:324.31697845458984ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5175 prompt_cache_len:5154 prompt_cache_ratio:0.9959420289855072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 -INFO 06-24 20:02:34 [batch.py:51] router release req id 120 -INFO 06-24 20:02:34 [manager.py:224] router recive req id 8 cost time 0.18387365341186523 s -INFO 06-24 20:02:34 [manager.py:68] detokenization recv req id 8 cost time 0.18485713005065918 s -INFO 06-24 20:02:35 [manager.py:224] router recive req id 120 cost time 0.16688251495361328 s -DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=308381661668696947110093909189302682866, time:1750766555.0676937s req_ids:[8] -DEBUG 06-24 20:02:35 [manager.py:391] -INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 120 cost time 0.16782617568969727 s -ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:35 [manager.py:162] detoken release req id 8 -DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=269123448390170420007340370954879016595, time:1750766555.1320918s req_ids:[120] -DEBUG 06-24 20:02:35 [manager.py:391] -INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:8 first_token_cost:322.0694065093994ms total_cost_time:322.1457004547119ms,out_token_counter:1 mean_per_token_cost_time: 0.0762939453125ms prompt_token_num:5174 prompt_cache_len:5151 prompt_cache_ratio:0.9955546965597217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 -ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:35 [batch.py:51] router release req id 8 -INFO 06-24 20:02:35 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:34 lightllm_req_id:120 first_token_cost:329.40220832824707ms total_cost_time:329.44512367248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5176 prompt_cache_len:5154 prompt_cache_ratio:0.9957496136012365 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 -INFO 06-24 20:02:35 [batch.py:51] router release req id 120 -INFO 06-24 20:02:35 [manager.py:224] router recive req id 8 cost time 0.1957552433013916 s -INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 8 cost time 0.1977832317352295 s -INFO 06-24 20:02:35 [manager.py:224] router recive req id 120 cost time 0.17625045776367188 s -DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=94067862915937339081023395007342340164, time:1750766555.4121013s req_ids:[8] -DEBUG 06-24 20:02:35 [manager.py:391] -INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 120 cost time 0.1781754493713379 s -ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=87206748564518505841282697682956678676, time:1750766555.48042s req_ids:[120] -DEBUG 06-24 20:02:35 [manager.py:391] -INFO 06-24 20:02:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 first_token_cost:343.7843322753906ms total_cost_time:343.8246250152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5175 prompt_cache_len:5151 prompt_cache_ratio:0.9953623188405797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 -ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:35 [batch.py:51] router release req id 8 -INFO 06-24 20:02:35 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 first_token_cost:340.26598930358887ms total_cost_time:340.30938148498535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5177 prompt_cache_len:5154 prompt_cache_ratio:0.9955572725516708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 -INFO 06-24 20:02:35 [batch.py:51] router release req id 120 -INFO 06-24 20:02:35 [manager.py:224] router recive req id 8 cost time 0.19130301475524902 s -INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 8 cost time 0.19298934936523438 s -INFO 06-24 20:02:35 [manager.py:224] router recive req id 120 cost time 0.1761031150817871 s -DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=147672100185494361982363591948111087835, time:1750766555.7579165s req_ids:[8] -DEBUG 06-24 20:02:35 [manager.py:391] -INFO 06-24 20:02:35 [manager.py:68] detokenization recv req id 120 cost time 0.17804574966430664 s -ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:35 [manager.py:391] Prefill Batch: batch_id=143047659011745498613406751312335334599, time:1750766555.8263934s req_ids:[120] -DEBUG 06-24 20:02:35 [manager.py:391] -DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 10409 -DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 token used ratio: 0.623718887262079 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:35 [manager.py:248] dp_i 0 token used ratio: 0.9381405563689604 contain prompt cache tree unrefed token -INFO 06-24 20:02:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 first_token_cost:340.7261371612549ms total_cost_time:340.76833724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5176 prompt_cache_len:5151 prompt_cache_ratio:0.9951700154559505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 -ERROR 06-24 20:02:35 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:35 [batch.py:51] router release req id 8 -INFO 06-24 20:02:35 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 first_token_cost:344.4535732269287ms total_cost_time:344.4943428039551ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5178 prompt_cache_len:5154 prompt_cache_ratio:0.9953650057937428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 -INFO 06-24 20:02:35 [batch.py:51] router release req id 120 -INFO 06-24 20:02:36 [manager.py:224] router recive req id 8 cost time 0.19268131256103516 s -INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 8 cost time 0.19474244117736816 s -INFO 06-24 20:02:36 [manager.py:224] router recive req id 120 cost time 0.1732466220855713 s -DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=319358495761877469808091573891295074292, time:1750766556.1048965s req_ids:[8] -DEBUG 06-24 20:02:36 [manager.py:391] -INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 120 cost time 0.1749589443206787 s -ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=88442397095455837595288464995755154619, time:1750766556.173116s req_ids:[120] -DEBUG 06-24 20:02:36 [manager.py:391] -INFO 06-24 20:02:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:8 first_token_cost:340.27099609375ms total_cost_time:340.31200408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5177 prompt_cache_len:5151 prompt_cache_ratio:0.9949777863627584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 -ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:36 [batch.py:51] router release req id 8 -INFO 06-24 20:02:36 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:35 lightllm_req_id:120 first_token_cost:337.16678619384766ms total_cost_time:337.2082710266113ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5179 prompt_cache_len:5154 prompt_cache_ratio:0.9951728132844179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 -INFO 06-24 20:02:36 [batch.py:51] router release req id 120 -INFO 06-24 20:02:36 [manager.py:224] router recive req id 8 cost time 0.18986248970031738 s -INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 8 cost time 0.19164085388183594 s -INFO 06-24 20:02:36 [manager.py:224] router recive req id 120 cost time 0.17455458641052246 s -DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=105715028260458016211972132122476947014, time:1750766556.4490292s req_ids:[8] -DEBUG 06-24 20:02:36 [manager.py:391] -INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 120 cost time 0.17646193504333496 s -ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=42024943894738311677341859538473944952, time:1750766556.5168693s req_ids:[120] -DEBUG 06-24 20:02:36 [manager.py:391] -INFO 06-24 20:02:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 first_token_cost:338.37127685546875ms total_cost_time:338.4137153625488ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5178 prompt_cache_len:5151 prompt_cache_ratio:0.9947856315179606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 -ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:36 [batch.py:51] router release req id 8 -INFO 06-24 20:02:36 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 first_token_cost:338.2561206817627ms total_cost_time:338.31024169921875ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5180 prompt_cache_len:5154 prompt_cache_ratio:0.994980694980695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 -INFO 06-24 20:02:36 [batch.py:51] router release req id 120 -INFO 06-24 20:02:36 [manager.py:224] router recive req id 8 cost time 0.18833446502685547 s -INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 8 cost time 0.1901721954345703 s -INFO 06-24 20:02:36 [manager.py:224] router recive req id 120 cost time 0.17294073104858398 s -DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=74474623728033823189024821135637606482, time:1750766556.7925127s req_ids:[8] -DEBUG 06-24 20:02:36 [manager.py:391] -INFO 06-24 20:02:36 [manager.py:68] detokenization recv req id 120 cost time 0.1747579574584961 s -ERROR 06-24 20:02:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:36 [manager.py:391] Prefill Batch: batch_id=238536329505933910770105629899399880214, time:1750766556.8604538s req_ids:[120] -DEBUG 06-24 20:02:36 [manager.py:391] -INFO 06-24 20:02:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 first_token_cost:336.4260196685791ms total_cost_time:336.47966384887695ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5179 prompt_cache_len:5151 prompt_cache_ratio:0.994593550878548 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 -ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:37 [batch.py:51] router release req id 8 -INFO 06-24 20:02:37 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:120 first_token_cost:495.8524703979492ms total_cost_time:495.8951473236084ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5181 prompt_cache_len:5154 prompt_cache_ratio:0.9947886508396062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 -INFO 06-24 20:02:37 [batch.py:51] router release req id 120 -INFO 06-24 20:02:37 [manager.py:224] router recive req id 8 cost time 0.3491194248199463 s -INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 8 cost time 0.35091090202331543 s -INFO 06-24 20:02:37 [manager.py:224] router recive req id 120 cost time 0.17320775985717773 s -DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=189323941677795171105667660791104535233, time:1750766557.2944403s req_ids:[8] -DEBUG 06-24 20:02:37 [manager.py:391] -INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 120 cost time 0.17506647109985352 s -ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=66057912821395346582374237591150179711, time:1750766557.362545s req_ids:[120] -DEBUG 06-24 20:02:37 [manager.py:391] -INFO 06-24 20:02:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:36 lightllm_req_id:8 first_token_cost:495.4962730407715ms total_cost_time:495.5401420593262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5180 prompt_cache_len:5151 prompt_cache_ratio:0.9944015444015444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 -ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:37 [batch.py:51] router release req id 8 -INFO 06-24 20:02:37 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 first_token_cost:336.8966579437256ms total_cost_time:336.93814277648926ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5182 prompt_cache_len:5154 prompt_cache_ratio:0.994596680818217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 -INFO 06-24 20:02:37 [batch.py:51] router release req id 120 -INFO 06-24 20:02:37 [manager.py:224] router recive req id 8 cost time 0.1900639533996582 s -INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 8 cost time 0.19208860397338867 s -INFO 06-24 20:02:37 [manager.py:224] router recive req id 120 cost time 0.17496514320373535 s -DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=50082484109554152682919667096071512388, time:1750766557.638698s req_ids:[8] -DEBUG 06-24 20:02:37 [manager.py:391] -INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 120 cost time 0.17653203010559082 s -ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=322850610058007483413692199976770092967, time:1750766557.7064953s req_ids:[120] -DEBUG 06-24 20:02:37 [manager.py:391] -INFO 06-24 20:02:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 first_token_cost:338.0165100097656ms total_cost_time:338.0591869354248ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5181 prompt_cache_len:5151 prompt_cache_ratio:0.9942096120440069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 -ERROR 06-24 20:02:37 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:37 [batch.py:51] router release req id 8 -INFO 06-24 20:02:37 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 first_token_cost:338.3982181549072ms total_cost_time:338.442325592041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5183 prompt_cache_len:5154 prompt_cache_ratio:0.9944047848736253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 -INFO 06-24 20:02:37 [batch.py:51] router release req id 120 -INFO 06-24 20:02:37 [manager.py:224] router recive req id 8 cost time 0.1906728744506836 s -INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 8 cost time 0.19226837158203125 s -INFO 06-24 20:02:37 [manager.py:224] router recive req id 120 cost time 0.1736290454864502 s -DEBUG 06-24 20:02:37 [manager.py:391] Prefill Batch: batch_id=310037248984337995353741690915580331398, time:1750766557.9810693s req_ids:[8] -DEBUG 06-24 20:02:37 [manager.py:391] -INFO 06-24 20:02:37 [manager.py:68] detokenization recv req id 120 cost time 0.1753554344177246 s -ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=89094203310283461523971066376083520727, time:1750766558.0492156s req_ids:[120] -DEBUG 06-24 20:02:38 [manager.py:391] -INFO 06-24 20:02:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:8 first_token_cost:337.5270366668701ms total_cost_time:337.5692367553711ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5182 prompt_cache_len:5151 prompt_cache_ratio:0.9940177537630258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 -ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:38 [batch.py:51] router release req id 8 -INFO 06-24 20:02:38 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:37 lightllm_req_id:120 first_token_cost:338.4726047515869ms total_cost_time:338.5307788848877ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5184 prompt_cache_len:5154 prompt_cache_ratio:0.9942129629629629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 -INFO 06-24 20:02:38 [batch.py:51] router release req id 120 -DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 10380 -DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 token used ratio: 0.3117984382625671 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:38 [manager.py:248] dp_i 0 token used ratio: 0.9404587603709127 contain prompt cache tree unrefed token -INFO 06-24 20:02:38 [manager.py:224] router recive req id 8 cost time 0.1923377513885498 s -INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 8 cost time 0.19388699531555176 s -INFO 06-24 20:02:38 [manager.py:224] router recive req id 120 cost time 0.17340946197509766 s -DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=238723149873922316462447295905857588050, time:1750766558.3256867s req_ids:[8] -DEBUG 06-24 20:02:38 [manager.py:391] -INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 120 cost time 0.17495369911193848 s -ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=182779175972599225594068117589919037104, time:1750766558.3929396s req_ids:[120] -DEBUG 06-24 20:02:38 [manager.py:391] -INFO 06-24 20:02:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 first_token_cost:338.0553722381592ms total_cost_time:338.09757232666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5183 prompt_cache_len:5151 prompt_cache_ratio:0.9938259695157244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 -ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:38 [batch.py:51] router release req id 8 -INFO 06-24 20:02:38 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 first_token_cost:335.74557304382324ms total_cost_time:335.7884883880615ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5185 prompt_cache_len:5154 prompt_cache_ratio:0.9940212150433944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 -INFO 06-24 20:02:38 [batch.py:51] router release req id 120 -INFO 06-24 20:02:38 [manager.py:224] router recive req id 8 cost time 0.18851876258850098 s -INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 8 cost time 0.19037199020385742 s -INFO 06-24 20:02:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:38 [manager.py:224] router recive req id 120 cost time 0.17420148849487305 s -DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=292276559148319682824863746895715753657, time:1750766558.6680949s req_ids:[8] -DEBUG 06-24 20:02:38 [manager.py:391] -INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 120 cost time 0.1759662628173828 s -ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:38 [manager.py:391] Prefill Batch: batch_id=277429260547364294425535208052553364166, time:1750766558.7361224s req_ids:[120] -DEBUG 06-24 20:02:38 [manager.py:391] -INFO 06-24 20:02:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 first_token_cost:336.2894058227539ms total_cost_time:336.3327980041504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5184 prompt_cache_len:5151 prompt_cache_ratio:0.9936342592592593 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 -ERROR 06-24 20:02:38 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:38 [batch.py:51] router release req id 8 -INFO 06-24 20:02:38 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 first_token_cost:340.6651020050049ms total_cost_time:340.70611000061035ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5186 prompt_cache_len:5154 prompt_cache_ratio:0.9938295410721172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 -INFO 06-24 20:02:38 [batch.py:51] router release req id 120 -INFO 06-24 20:02:38 [manager.py:224] router recive req id 8 cost time 0.19263410568237305 s -INFO 06-24 20:02:38 [manager.py:68] detokenization recv req id 8 cost time 0.19434475898742676 s -INFO 06-24 20:02:39 [manager.py:224] router recive req id 120 cost time 0.17420172691345215 s -DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=118327325064722403699989140212815933934, time:1750766559.0144398s req_ids:[8] -DEBUG 06-24 20:02:39 [manager.py:391] -INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 120 cost time 0.1767127513885498 s -ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=62729845736357706451825428816338906382, time:1750766559.0825098s req_ids:[120] -DEBUG 06-24 20:02:39 [manager.py:391] -INFO 06-24 20:02:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:8 first_token_cost:339.8125171661377ms total_cost_time:339.84994888305664ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5185 prompt_cache_len:5151 prompt_cache_ratio:0.9934426229508196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 -ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:39 [batch.py:51] router release req id 8 -INFO 06-24 20:02:39 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:38 lightllm_req_id:120 first_token_cost:337.8791809082031ms total_cost_time:337.9225730895996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5187 prompt_cache_len:5154 prompt_cache_ratio:0.993637941006362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 -INFO 06-24 20:02:39 [batch.py:51] router release req id 120 -INFO 06-24 20:02:39 [manager.py:224] router recive req id 8 cost time 0.19096636772155762 s -INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 8 cost time 0.1929643154144287 s -INFO 06-24 20:02:39 [manager.py:224] router recive req id 120 cost time 0.17554926872253418 s -DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=64437539872323005501070832531747967521, time:1750766559.3594937s req_ids:[8] -DEBUG 06-24 20:02:39 [manager.py:391] -INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 120 cost time 0.1775045394897461 s -ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=195780276065028672541593939212991762257, time:1750766559.4271584s req_ids:[120] -DEBUG 06-24 20:02:39 [manager.py:391] -INFO 06-24 20:02:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 first_token_cost:339.89930152893066ms total_cost_time:339.94197845458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5186 prompt_cache_len:5151 prompt_cache_ratio:0.9932510605476282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 -ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:39 [batch.py:51] router release req id 8 -INFO 06-24 20:02:39 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 first_token_cost:501.38211250305176ms total_cost_time:501.42478942871094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5188 prompt_cache_len:5154 prompt_cache_ratio:0.9934464148033925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 -INFO 06-24 20:02:39 [batch.py:51] router release req id 120 -INFO 06-24 20:02:39 [manager.py:224] router recive req id 8 cost time 0.3508286476135254 s -INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 8 cost time 0.35257458686828613 s -INFO 06-24 20:02:39 [manager.py:224] router recive req id 120 cost time 0.17219257354736328 s -DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=62827745303995127719037238041028755552, time:1750766559.8635664s req_ids:[8] -DEBUG 06-24 20:02:39 [manager.py:391] -INFO 06-24 20:02:39 [manager.py:68] detokenization recv req id 120 cost time 0.17403888702392578 s -ERROR 06-24 20:02:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:39 [manager.py:391] Prefill Batch: batch_id=276211917171070532462303628244523093220, time:1750766559.931689s req_ids:[120] -DEBUG 06-24 20:02:39 [manager.py:391] -INFO 06-24 20:02:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 first_token_cost:497.9369640350342ms total_cost_time:497.97916412353516ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5187 prompt_cache_len:5151 prompt_cache_ratio:0.9930595720069404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [batch.py:51] router release req id 8 -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:120 first_token_cost:336.1246585845947ms total_cost_time:336.167573928833ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5189 prompt_cache_len:5154 prompt_cache_ratio:0.9932549624205049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 -INFO 06-24 20:02:40 [batch.py:51] router release req id 120 -INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.18917346000671387 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.19168543815612793 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.16874957084655762 s -DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=24829423517738258191934446368084965230, time:1750766560.2027774s req_ids:[8] -DEBUG 06-24 20:02:40 [manager.py:391] -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.17043805122375488 s -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=47183152602107509258019726468215310231, time:1750766560.267163s req_ids:[120] -DEBUG 06-24 20:02:40 [manager.py:391] -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:39 lightllm_req_id:8 first_token_cost:329.64324951171875ms total_cost_time:329.6961784362793ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:5188 prompt_cache_len:5151 prompt_cache_ratio:0.9928681572860447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:49 lightllm_req_id:400 first_token_cost:213.96732330322266ms total_cost_time:170911.7624759674ms,out_token_counter:4096 mean_per_token_cost_time: 41.674266394693404ms prompt_token_num:1050 prompt_cache_len:1049 prompt_cache_ratio:0.9990476190476191 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [batch.py:51] router release req id 400 -INFO 06-24 20:02:40 [batch.py:51] router release req id 8 -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:296.2765693664551ms total_cost_time:296.31829261779785ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5190 prompt_cache_len:5154 prompt_cache_ratio:0.9930635838150289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 -DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:40 [batch.py:51] router release req id 120 -INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.07571887969970703 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.0699927806854248 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.021096467971801758 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.07718205451965332 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.07282781600952148 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.02525782585144043 s -DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=294615103522950980572974644756615694644, time:1750766560.3752382s req_ids:[8, 400, 120] -DEBUG 06-24 20:02:40 [manager.py:391] -DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:127.0895004272461ms total_cost_time:127.13336944580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5189 prompt_cache_len:5151 prompt_cache_ratio:0.9926768163422625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:120.69869041442871ms total_cost_time:120.73659896850586ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5191 prompt_cache_len:5154 prompt_cache_ratio:0.9928722789443267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:170.52340507507324ms total_cost_time:170.5484390258789ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5146 prompt_cache_len:5145 prompt_cache_ratio:0.9998056743101438 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 -DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:40 [batch.py:51] router release req id 8 -INFO 06-24 20:02:40 [batch.py:51] router release req id 400 -INFO 06-24 20:02:40 [batch.py:51] router release req id 120 -INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.12004637718200684 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.06810331344604492 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.06298112869262695 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.12163305282592773 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.07121467590332031 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.06729984283447266 s -DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=230329129274426114032723946096066484947, time:1750766560.5529442s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:40 [manager.py:391] -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:199.72586631774902ms total_cost_time:199.7685432434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5190 prompt_cache_len:5151 prompt_cache_ratio:0.992485549132948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:147.9017734527588ms total_cost_time:147.92823791503906ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5192 prompt_cache_len:5154 prompt_cache_ratio:0.9926810477657936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:163.41471672058105ms total_cost_time:163.4519100189209ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5147 prompt_cache_len:5145 prompt_cache_ratio:0.9996114241305615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 -DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:40 [batch.py:51] router release req id 8 -INFO 06-24 20:02:40 [batch.py:51] router release req id 120 -INFO 06-24 20:02:40 [batch.py:51] router release req id 400 -INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.08940482139587402 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.08448123931884766 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.07314324378967285 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.09107470512390137 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.08757710456848145 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.07760453224182129 s -DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=54783462695815102979971716264587004769, time:1750766560.7301013s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:40 [manager.py:391] -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:145.1871395111084ms total_cost_time:145.2312469482422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5191 prompt_cache_len:5151 prompt_cache_ratio:0.9922943556154884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:183.8662624359131ms total_cost_time:183.90297889709473ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5193 prompt_cache_len:5154 prompt_cache_ratio:0.9924898902368573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:172.61433601379395ms total_cost_time:172.64080047607422ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5148 prompt_cache_len:5145 prompt_cache_ratio:0.9994172494172494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 -DEBUG 06-24 20:02:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:40 [batch.py:51] router release req id 8 -INFO 06-24 20:02:40 [batch.py:51] router release req id 120 -INFO 06-24 20:02:40 [batch.py:51] router release req id 400 -INFO 06-24 20:02:40 [manager.py:224] router recive req id 8 cost time 0.1146082878112793 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 120 cost time 0.06705021858215332 s -INFO 06-24 20:02:40 [manager.py:224] router recive req id 400 cost time 0.061409950256347656 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 8 cost time 0.11623930931091309 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 120 cost time 0.07025885581970215 s -INFO 06-24 20:02:40 [manager.py:68] detokenization recv req id 400 cost time 0.0658421516418457 s -DEBUG 06-24 20:02:40 [manager.py:391] Prefill Batch: batch_id=212707212336415738926563630506934879922, time:1750766560.9067447s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:40 [manager.py:391] -ERROR 06-24 20:02:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:198.54259490966797ms total_cost_time:198.59981536865234ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5192 prompt_cache_len:5151 prompt_cache_ratio:0.9921032357473035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:120 first_token_cost:334.0299129486084ms total_cost_time:334.0728282928467ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5194 prompt_cache_len:5154 prompt_cache_ratio:0.9922988063149788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:400 first_token_cost:328.48310470581055ms total_cost_time:328.5074234008789ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:5149 prompt_cache_len:5145 prompt_cache_ratio:0.9992231501262381 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 -DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:41 [batch.py:51] router release req id 8 -INFO 06-24 20:02:41 [batch.py:51] router release req id 120 -INFO 06-24 20:02:41 [batch.py:51] router release req id 400 -INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.25322508811950684 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06767559051513672 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.06233024597167969 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.25475335121154785 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.07062792778015137 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.06661105155944824 s -DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=177120281547588888327394775277671623407, time:1750766561.2497702s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:41 [manager.py:391] -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:40 lightllm_req_id:8 first_token_cost:349.0462303161621ms total_cost_time:349.0874767303467ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5193 prompt_cache_len:5151 prompt_cache_ratio:0.9919121894858464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:163.50150108337402ms total_cost_time:163.5277271270752ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5195 prompt_cache_len:5154 prompt_cache_ratio:0.9921077959576516 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:163.64121437072754ms total_cost_time:163.67554664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.034332275390625ms prompt_token_num:5150 prompt_cache_len:5145 prompt_cache_ratio:0.9990291262135922 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 -DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:41 [batch.py:51] router release req id 8 -INFO 06-24 20:02:41 [batch.py:51] router release req id 120 -INFO 06-24 20:02:41 [batch.py:51] router release req id 400 -INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.07371664047241211 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06903576850891113 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.05933523178100586 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.07530403137207031 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.07195568084716797 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.06350278854370117 s -DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=60040809617159010657647324259870243873, time:1750766561.4285986s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:41 [manager.py:391] -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:146.27623558044434ms total_cost_time:146.31986618041992ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5194 prompt_cache_len:5151 prompt_cache_ratio:0.9917212167886023 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:169.52157020568848ms total_cost_time:169.56043243408203ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5196 prompt_cache_len:5154 prompt_cache_ratio:0.9919168591224018 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:159.9252223968506ms total_cost_time:159.95025634765625ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5151 prompt_cache_len:5145 prompt_cache_ratio:0.9988351776354106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 -DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:41 [batch.py:51] router release req id 8 -INFO 06-24 20:02:41 [batch.py:51] router release req id 120 -INFO 06-24 20:02:41 [batch.py:51] router release req id 400 -INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.09943914413452148 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06870055198669434 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.06301021575927734 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.10176420211791992 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.07337760925292969 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.0698850154876709 s -DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=137366473691382659998425482788712593774, time:1750766561.606176s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:41 [manager.py:391] -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:199.38158988952637ms total_cost_time:199.42402839660645ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5195 prompt_cache_len:5151 prompt_cache_ratio:0.9915303176130895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:168.6270236968994ms total_cost_time:168.6534881591797ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5197 prompt_cache_len:5154 prompt_cache_ratio:0.9917259957667885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:165.20404815673828ms total_cost_time:165.24767875671387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5152 prompt_cache_len:5145 prompt_cache_ratio:0.998641304347826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 -DEBUG 06-24 20:02:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:41 [batch.py:51] router release req id 8 -INFO 06-24 20:02:41 [batch.py:51] router release req id 120 -INFO 06-24 20:02:41 [batch.py:51] router release req id 400 -INFO 06-24 20:02:41 [manager.py:224] router recive req id 8 cost time 0.0691370964050293 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 120 cost time 0.06269288063049316 s -INFO 06-24 20:02:41 [manager.py:224] router recive req id 400 cost time 0.05591845512390137 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 8 cost time 0.07076549530029297 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 120 cost time 0.0657815933227539 s -INFO 06-24 20:02:41 [manager.py:68] detokenization recv req id 400 cost time 0.06028246879577637 s -DEBUG 06-24 20:02:41 [manager.py:391] Prefill Batch: batch_id=121350927446857115063884673502289609276, time:1750766561.7840889s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:41 [manager.py:391] -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:144.8225975036621ms total_cost_time:144.92011070251465ms,out_token_counter:1 mean_per_token_cost_time: 0.09751319885253906ms prompt_token_num:5196 prompt_cache_len:5151 prompt_cache_ratio:0.9913394919168591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:138.44585418701172ms total_cost_time:138.4735107421875ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5198 prompt_cache_len:5154 prompt_cache_ratio:0.9915352058484033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 -ERROR 06-24 20:02:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:155.85637092590332ms total_cost_time:155.89427947998047ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5153 prompt_cache_len:5145 prompt_cache_ratio:0.9984475063070056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 -DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:42 [batch.py:51] router release req id 8 -INFO 06-24 20:02:42 [batch.py:51] router release req id 120 -INFO 06-24 20:02:42 [batch.py:51] router release req id 400 -INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.25313591957092285 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.24799203872680664 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.23288559913635254 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.25470542907714844 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.25093746185302734 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.23709869384765625 s -DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=35260259955341358144012866797719747089, time:1750766562.122755s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:42 [manager.py:391] -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:8 first_token_cost:347.13053703308105ms total_cost_time:347.17535972595215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5197 prompt_cache_len:5151 prompt_cache_ratio:0.9911487396574947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:120 first_token_cost:341.9039249420166ms total_cost_time:341.9318199157715ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5199 prompt_cache_len:5154 prompt_cache_ratio:0.9913444893248702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:41 lightllm_req_id:400 first_token_cost:346.099853515625ms total_cost_time:346.14062309265137ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5154 prompt_cache_len:5145 prompt_cache_ratio:0.9982537834691502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 -DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:42 [batch.py:51] router release req id 8 -INFO 06-24 20:02:42 [batch.py:51] router release req id 120 -INFO 06-24 20:02:42 [batch.py:51] router release req id 400 -INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.07563138008117676 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.07058191299438477 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.059890031814575195 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.07723164558410645 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.0736396312713623 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.06416821479797363 s -DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=293881141402435172941675046230236215183, time:1750766562.3001063s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:42 [manager.py:391] -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:148.16975593566895ms total_cost_time:148.21267127990723ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5198 prompt_cache_len:5151 prompt_cache_ratio:0.9909580607926125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:170.34912109375ms total_cost_time:170.38989067077637ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5200 prompt_cache_len:5154 prompt_cache_ratio:0.9911538461538462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:159.7440242767334ms total_cost_time:159.76905822753906ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5155 prompt_cache_len:5145 prompt_cache_ratio:0.9980601357904947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 -DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:42 [batch.py:51] router release req id 8 -INFO 06-24 20:02:42 [batch.py:51] router release req id 120 -INFO 06-24 20:02:42 [batch.py:51] router release req id 400 -INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.09848308563232422 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.06808829307556152 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.06273770332336426 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.10004639625549316 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.07109308242797852 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.0669863224029541 s -DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=208206045579062200952449328727469213305, time:1750766562.4776666s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:42 [manager.py:391] -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:149.97053146362305ms total_cost_time:150.01511573791504ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5199 prompt_cache_len:5151 prompt_cache_ratio:0.9907674552798615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:169.48652267456055ms total_cost_time:169.525146484375ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5201 prompt_cache_len:5154 prompt_cache_ratio:0.9909632762930206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:164.20340538024902ms total_cost_time:164.229154586792ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5156 prompt_cache_len:5145 prompt_cache_ratio:0.997866563227308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 -DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:42 [batch.py:51] router release req id 8 -INFO 06-24 20:02:42 [batch.py:51] router release req id 120 -INFO 06-24 20:02:42 [batch.py:51] router release req id 400 -INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.12104439735412598 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.06738829612731934 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.06241774559020996 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.12282443046569824 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.07056784629821777 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.06677842140197754 s -DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=128806875567830028876767950250780831397, time:1750766562.6545167s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:42 [manager.py:391] -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:199.33533668518066ms total_cost_time:199.37610626220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5200 prompt_cache_len:5151 prompt_cache_ratio:0.990576923076923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:145.63512802124023ms total_cost_time:145.6613540649414ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5202 prompt_cache_len:5154 prompt_cache_ratio:0.9907727797001153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:161.62467002868652ms total_cost_time:161.66234016418457ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5157 prompt_cache_len:5145 prompt_cache_ratio:0.997673065735893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 -DEBUG 06-24 20:02:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:42 [batch.py:51] router release req id 8 -INFO 06-24 20:02:42 [batch.py:51] router release req id 120 -INFO 06-24 20:02:42 [batch.py:51] router release req id 400 -INFO 06-24 20:02:42 [manager.py:224] router recive req id 8 cost time 0.08949828147888184 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 120 cost time 0.08461380004882812 s -INFO 06-24 20:02:42 [manager.py:224] router recive req id 400 cost time 0.07227039337158203 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 8 cost time 0.09049105644226074 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 120 cost time 0.08624386787414551 s -INFO 06-24 20:02:42 [manager.py:68] detokenization recv req id 400 cost time 0.07428646087646484 s -DEBUG 06-24 20:02:42 [manager.py:391] Prefill Batch: batch_id=228776173924554834851270083177592946646, time:1750766562.8317842s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:42 [manager.py:391] -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:145.65062522888184ms total_cost_time:145.7064151763916ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:5201 prompt_cache_len:5151 prompt_cache_ratio:0.9903864641415112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 -ERROR 06-24 20:02:42 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:42 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:191.08128547668457ms total_cost_time:191.12396240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5203 prompt_cache_len:5154 prompt_cache_ratio:0.9905823563328848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:400 first_token_cost:331.4690589904785ms total_cost_time:331.5122127532959ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5158 prompt_cache_len:5145 prompt_cache_ratio:0.9974796432725863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 -DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:43 [batch.py:51] router release req id 8 -INFO 06-24 20:02:43 [batch.py:51] router release req id 120 -INFO 06-24 20:02:43 [batch.py:51] router release req id 400 -INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.2753880023956299 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.22573065757751465 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.07296562194824219 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.27635765075683594 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.22722578048706055 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.07490873336791992 s -DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=148608290994619930076366228567200426137, time:1750766563.1680388s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:43 [manager.py:391] -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:8 first_token_cost:347.6216793060303ms total_cost_time:347.66626358032227ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5202 prompt_cache_len:5151 prompt_cache_ratio:0.9901960784313726 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:42 lightllm_req_id:120 first_token_cost:325.7572650909424ms total_cost_time:325.7937431335449ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5204 prompt_cache_len:5154 prompt_cache_ratio:0.990392006149116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:173.065185546875ms total_cost_time:173.08950424194336ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:5159 prompt_cache_len:5145 prompt_cache_ratio:0.9972862957937585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 -DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:43 [batch.py:51] router release req id 8 -INFO 06-24 20:02:43 [batch.py:51] router release req id 120 -INFO 06-24 20:02:43 [batch.py:51] router release req id 400 -INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.09999465942382812 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.06880879402160645 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.10091376304626465 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.06396603584289551 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.07018303871154785 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.06577944755554199 s -DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=317668582527231393955332500802508033814, time:1750766563.3453636s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:43 [manager.py:391] -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:148.03028106689453ms total_cost_time:148.0727195739746ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5203 prompt_cache_len:5151 prompt_cache_ratio:0.990005765904286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:167.3107147216797ms total_cost_time:167.35053062438965ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5205 prompt_cache_len:5154 prompt_cache_ratio:0.9902017291066283 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:164.6416187286377ms total_cost_time:164.69359397888184ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:5160 prompt_cache_len:5145 prompt_cache_ratio:0.997093023255814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 -DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:43 [batch.py:51] router release req id 8 -INFO 06-24 20:02:43 [batch.py:51] router release req id 120 -INFO 06-24 20:02:43 [batch.py:51] router release req id 400 -INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.12473940849304199 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.0742034912109375 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.1256732940673828 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.0655219554901123 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.07572269439697266 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.06733393669128418 s -DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=53283205650233727839010787215544686011, time:1750766563.524954s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:43 [manager.py:391] -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:197.34644889831543ms total_cost_time:197.3886489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5204 prompt_cache_len:5151 prompt_cache_ratio:0.989815526518063 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:175.4145622253418ms total_cost_time:175.45175552368164ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5206 prompt_cache_len:5154 prompt_cache_ratio:0.9900115251632732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:166.67866706848145ms total_cost_time:166.7032241821289ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:5161 prompt_cache_len:5145 prompt_cache_ratio:0.9968998256151909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 -DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:43 [batch.py:51] router release req id 8 -INFO 06-24 20:02:43 [batch.py:51] router release req id 120 -INFO 06-24 20:02:43 [batch.py:51] router release req id 400 -INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.10004329681396484 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.06942963600158691 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.06429123878479004 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.1009972095489502 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.07090091705322266 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.06582856178283691 s -DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=315500482468433114172399458886555902873, time:1750766563.7022219s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:43 [manager.py:391] -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:197.69787788391113ms total_cost_time:197.7403163909912ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5205 prompt_cache_len:5151 prompt_cache_ratio:0.9896253602305476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:167.12594032287598ms total_cost_time:167.15407371520996ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:5207 prompt_cache_len:5154 prompt_cache_ratio:0.9898213942769349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:187.4704360961914ms total_cost_time:187.50882148742676ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5162 prompt_cache_len:5145 prompt_cache_ratio:0.996706702828361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 -DEBUG 06-24 20:02:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:43 [batch.py:51] router release req id 8 -INFO 06-24 20:02:43 [batch.py:51] router release req id 120 -INFO 06-24 20:02:43 [batch.py:51] router release req id 400 -INFO 06-24 20:02:43 [manager.py:224] router recive req id 8 cost time 0.1163320541381836 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 120 cost time 0.11059951782226562 s -INFO 06-24 20:02:43 [manager.py:224] router recive req id 400 cost time 0.09522318840026855 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 8 cost time 0.11796021461486816 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 120 cost time 0.11366653442382812 s -INFO 06-24 20:02:43 [manager.py:68] detokenization recv req id 400 cost time 0.09942770004272461 s -DEBUG 06-24 20:02:43 [manager.py:391] Prefill Batch: batch_id=319947527375132010620479070993530412357, time:1750766563.9242387s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:43 [manager.py:391] -DEBUG 06-24 20:02:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 46186.739 tokens/s -DEBUG 06-24 20:02:43 [stats.py:37] Avg prompt tokens throughput: 46159.030 tokens/s -DEBUG 06-24 20:02:43 [stats.py:37] Avg generate tokens throughput: 27.710 tokens/s -ERROR 06-24 20:02:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:196.23327255249023ms total_cost_time:196.27642631530762ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5206 prompt_cache_len:5151 prompt_cache_ratio:0.9894352669996158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:120 first_token_cost:438.7969970703125ms total_cost_time:438.8401508331299ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5208 prompt_cache_len:5154 prompt_cache_ratio:0.9896313364055299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:400 first_token_cost:423.4936237335205ms total_cost_time:423.5203266143799ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5163 prompt_cache_len:5145 prompt_cache_ratio:0.9965136548518303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 -DEBUG 06-24 20:02:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:44 [batch.py:51] router release req id 8 -INFO 06-24 20:02:44 [batch.py:51] router release req id 120 -INFO 06-24 20:02:44 [batch.py:51] router release req id 400 -INFO 06-24 20:02:44 [manager.py:224] router recive req id 8 cost time 0.3417627811431885 s -INFO 06-24 20:02:44 [manager.py:224] router recive req id 120 cost time 0.09117412567138672 s -INFO 06-24 20:02:44 [manager.py:224] router recive req id 400 cost time 0.0856773853302002 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 8 cost time 0.34337449073791504 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 120 cost time 0.09405755996704102 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 400 cost time 0.08978867530822754 s -DEBUG 06-24 20:02:44 [manager.py:391] Prefill Batch: batch_id=230783454148511429882451945577098558046, time:1750766564.3544729s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:44 [manager.py:391] -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:43 lightllm_req_id:8 first_token_cost:401.20410919189453ms total_cost_time:401.247501373291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5207 prompt_cache_len:5151 prompt_cache_ratio:0.9892452467831765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:201.5233039855957ms total_cost_time:201.56216621398926ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5209 prompt_cache_len:5154 prompt_cache_ratio:0.9894413515070071 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:209.26284790039062ms total_cost_time:209.30075645446777ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5164 prompt_cache_len:5145 prompt_cache_ratio:0.9963206816421378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 -DEBUG 06-24 20:02:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:44 [batch.py:51] router release req id 8 -INFO 06-24 20:02:44 [batch.py:51] router release req id 120 -INFO 06-24 20:02:44 [batch.py:51] router release req id 400 -INFO 06-24 20:02:44 [manager.py:224] router recive req id 8 cost time 0.15827465057373047 s -INFO 06-24 20:02:44 [manager.py:224] router recive req id 120 cost time 0.10768008232116699 s -INFO 06-24 20:02:44 [manager.py:224] router recive req id 400 cost time 0.09426021575927734 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 8 cost time 0.15978455543518066 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 120 cost time 0.11042118072509766 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 400 cost time 0.09823155403137207 s -DEBUG 06-24 20:02:44 [manager.py:391] Prefill Batch: batch_id=275655102703287675180080340832436846042, time:1750766564.5763066s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:44 [manager.py:391] -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 first_token_cost:249.3112087249756ms total_cost_time:249.35460090637207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5208 prompt_cache_len:5151 prompt_cache_ratio:0.9890552995391705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:198.5642910003662ms total_cost_time:198.59051704406738ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5210 prompt_cache_len:5154 prompt_cache_ratio:0.9892514395393474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:215.7444953918457ms total_cost_time:215.78264236450195ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5165 prompt_cache_len:5145 prompt_cache_ratio:0.9961277831558567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 -DEBUG 06-24 20:02:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:44 [batch.py:51] router release req id 8 -INFO 06-24 20:02:44 [batch.py:51] router release req id 120 -INFO 06-24 20:02:44 [batch.py:51] router release req id 400 -INFO 06-24 20:02:44 [manager.py:224] router recive req id 8 cost time 0.12160444259643555 s -INFO 06-24 20:02:44 [manager.py:224] router recive req id 120 cost time 0.11640071868896484 s -INFO 06-24 20:02:44 [manager.py:224] router recive req id 400 cost time 0.09479784965515137 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 8 cost time 0.12312984466552734 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 120 cost time 0.11922335624694824 s -INFO 06-24 20:02:44 [manager.py:68] detokenization recv req id 400 cost time 0.09863018989562988 s -DEBUG 06-24 20:02:44 [manager.py:391] Prefill Batch: batch_id=49281106245312341984666419430909755772, time:1750766564.7995226s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:44 [manager.py:391] -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 first_token_cost:196.4707374572754ms total_cost_time:196.51460647583008ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5209 prompt_cache_len:5151 prompt_cache_ratio:0.9888654252255711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:240.75651168823242ms total_cost_time:240.79442024230957ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5211 prompt_cache_len:5154 prompt_cache_ratio:0.9890616004605642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:219.03252601623535ms total_cost_time:219.0573215484619ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5166 prompt_cache_len:5145 prompt_cache_ratio:0.9959349593495935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 -INFO 06-24 20:02:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 -DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:45 [batch.py:51] router release req id 8 -INFO 06-24 20:02:45 [batch.py:51] router release req id 120 -INFO 06-24 20:02:45 [batch.py:51] router release req id 400 -INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.1434493064880371 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.09111571311950684 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.08559679985046387 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.14496564865112305 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.09403395652770996 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.08971214294433594 s -DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=95099462047739173574208768820256951114, time:1750766565.0238433s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:45 [manager.py:391] -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:8 first_token_cost:249.44019317626953ms total_cost_time:249.4826316833496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5210 prompt_cache_len:5151 prompt_cache_ratio:0.9886756238003839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:120 first_token_cost:197.12543487548828ms total_cost_time:197.15142250061035ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5212 prompt_cache_len:5154 prompt_cache_ratio:0.988871834228703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:44 lightllm_req_id:400 first_token_cost:212.59140968322754ms total_cost_time:212.62860298156738ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5167 prompt_cache_len:5145 prompt_cache_ratio:0.9957422101799884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 -DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:45 [batch.py:51] router release req id 8 -INFO 06-24 20:02:45 [batch.py:51] router release req id 120 -INFO 06-24 20:02:45 [batch.py:51] router release req id 400 -INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.10842728614807129 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.10210609436035156 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.09076905250549316 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.10999822616577148 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.10509061813354492 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.09489107131958008 s -DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=317818893624569935822494954495013423899, time:1750766565.2470171s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:45 [manager.py:391] -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:347.0473289489746ms total_cost_time:347.0919132232666ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5211 prompt_cache_len:5151 prompt_cache_ratio:0.9884858952216465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:391.62445068359375ms total_cost_time:391.6647434234619ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5213 prompt_cache_len:5154 prompt_cache_ratio:0.9886821408018416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:393.04447174072266ms total_cost_time:393.0854797363281ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5168 prompt_cache_len:5145 prompt_cache_ratio:0.9955495356037152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 -DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:45 [batch.py:51] router release req id 8 -INFO 06-24 20:02:45 [batch.py:51] router release req id 120 -INFO 06-24 20:02:45 [batch.py:51] router release req id 400 -INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.15468955039978027 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.10419964790344238 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.09134817123413086 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.15636634826660156 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.10712957382202148 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.09556221961975098 s -DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=286179879701422339754763011790231859823, time:1750766565.644323s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:45 [manager.py:391] -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:249.9847412109375ms total_cost_time:250.02765655517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5212 prompt_cache_len:5151 prompt_cache_ratio:0.9882962394474291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:199.27287101745605ms total_cost_time:199.29957389831543ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5214 prompt_cache_len:5154 prompt_cache_ratio:0.9884925201380897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:216.79091453552246ms total_cost_time:216.83382987976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5169 prompt_cache_len:5145 prompt_cache_ratio:0.9953569355774812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 -DEBUG 06-24 20:02:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:45 [batch.py:51] router release req id 8 -INFO 06-24 20:02:45 [batch.py:51] router release req id 120 -INFO 06-24 20:02:45 [batch.py:51] router release req id 400 -INFO 06-24 20:02:45 [manager.py:224] router recive req id 8 cost time 0.12239265441894531 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 120 cost time 0.1168978214263916 s -INFO 06-24 20:02:45 [manager.py:224] router recive req id 400 cost time 0.09710407257080078 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 8 cost time 0.1241767406463623 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 120 cost time 0.12004923820495605 s -INFO 06-24 20:02:45 [manager.py:68] detokenization recv req id 400 cost time 0.10146474838256836 s -DEBUG 06-24 20:02:45 [manager.py:391] Prefill Batch: batch_id=171311091287859552820779734640431589121, time:1750766565.8727746s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:45 [manager.py:391] -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:197.28970527648926ms total_cost_time:197.33381271362305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5213 prompt_cache_len:5151 prompt_cache_ratio:0.9881066564358335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:240.74077606201172ms total_cost_time:240.7820224761963ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5215 prompt_cache_len:5154 prompt_cache_ratio:0.9883029721955896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:220.96943855285645ms total_cost_time:220.99590301513672ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5170 prompt_cache_len:5145 prompt_cache_ratio:0.995164410058027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 -INFO 06-24 20:02:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 -DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:46 [batch.py:51] router release req id 8 -INFO 06-24 20:02:46 [batch.py:51] router release req id 120 -INFO 06-24 20:02:46 [batch.py:51] router release req id 400 -INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.14344072341918945 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.0900886058807373 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.08398175239562988 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.14513826370239258 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.09312033653259277 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.08826327323913574 s -DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=72655901201623586805876811164415711376, time:1750766566.0961924s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:46 [manager.py:391] -DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:8 first_token_cost:202.3460865020752ms total_cost_time:202.38947868347168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5214 prompt_cache_len:5151 prompt_cache_ratio:0.9879171461449943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:120 first_token_cost:199.71895217895508ms total_cost_time:199.75709915161133ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5216 prompt_cache_len:5154 prompt_cache_ratio:0.9881134969325154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:45 lightllm_req_id:400 first_token_cost:209.1236114501953ms total_cost_time:209.16104316711426ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5171 prompt_cache_len:5145 prompt_cache_ratio:0.9949719590021272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 -DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:46 [batch.py:51] router release req id 8 -INFO 06-24 20:02:46 [batch.py:51] router release req id 120 -INFO 06-24 20:02:46 [batch.py:51] router release req id 400 -INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.16060757637023926 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.11036467552185059 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.09540677070617676 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.16232705116271973 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.11345648765563965 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.09962940216064453 s -INFO 06-24 20:02:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=40453273424850931302616342077142105795, time:1750766566.3215234s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:46 [manager.py:391] -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:249.01771545410156ms total_cost_time:249.0713596343994ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5215 prompt_cache_len:5151 prompt_cache_ratio:0.9877277085330777 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:233.56938362121582ms total_cost_time:233.6099147796631ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:5217 prompt_cache_len:5154 prompt_cache_ratio:0.9879240943070731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:218.57571601867676ms total_cost_time:218.60146522521973ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5172 prompt_cache_len:5145 prompt_cache_ratio:0.9947795823665894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 -DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:46 [batch.py:51] router release req id 8 -INFO 06-24 20:02:46 [batch.py:51] router release req id 120 -INFO 06-24 20:02:46 [batch.py:51] router release req id 400 -INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.28092455863952637 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.24422788619995117 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.23957157135009766 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.28275585174560547 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.24747443199157715 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.24407458305358887 s -DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=172230966633847778216040757786092878543, time:1750766566.696777s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:46 [manager.py:391] -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:350.7411479949951ms total_cost_time:350.7864475250244ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5216 prompt_cache_len:5151 prompt_cache_ratio:0.9875383435582822 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:346.33398056030273ms total_cost_time:346.3706970214844ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5218 prompt_cache_len:5154 prompt_cache_ratio:0.987734764277501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:341.71533584594727ms total_cost_time:341.7401313781738ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5173 prompt_cache_len:5145 prompt_cache_ratio:0.9945872801082544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 -DEBUG 06-24 20:02:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:46 [batch.py:51] router release req id 8 -INFO 06-24 20:02:46 [batch.py:51] router release req id 120 -INFO 06-24 20:02:46 [batch.py:51] router release req id 400 -INFO 06-24 20:02:46 [manager.py:224] router recive req id 8 cost time 0.10393142700195312 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 120 cost time 0.06840944290161133 s -INFO 06-24 20:02:46 [manager.py:224] router recive req id 400 cost time 0.06266450881958008 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 8 cost time 0.10568881034851074 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 120 cost time 0.07154107093811035 s -INFO 06-24 20:02:46 [manager.py:68] detokenization recv req id 400 cost time 0.06706380844116211 s -DEBUG 06-24 20:02:46 [manager.py:391] Prefill Batch: batch_id=42485708086675373357309966435325513359, time:1750766566.8757374s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:46 [manager.py:391] -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:199.93972778320312ms total_cost_time:199.981689453125ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5217 prompt_cache_len:5151 prompt_cache_ratio:0.9873490511788384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:164.33358192443848ms total_cost_time:164.36004638671875ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5219 prompt_cache_len:5154 prompt_cache_ratio:0.9875455068020693 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:162.92285919189453ms total_cost_time:162.95766830444336ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5174 prompt_cache_len:5145 prompt_cache_ratio:0.9943950521839969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 -INFO 06-24 20:02:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 -DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:47 [batch.py:51] router release req id 8 -INFO 06-24 20:02:47 [batch.py:51] router release req id 120 -INFO 06-24 20:02:47 [batch.py:51] router release req id 400 -INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.07356977462768555 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.06813430786132812 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06139397621154785 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.0751190185546875 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.07128763198852539 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.06577181816101074 s -DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=141621909717983627052793268080161525040, time:1750766567.0530863s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:47 [manager.py:391] -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:8 first_token_cost:145.16496658325195ms total_cost_time:145.21026611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5218 prompt_cache_len:5151 prompt_cache_ratio:0.9871598313530088 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:120 first_token_cost:167.7708625793457ms total_cost_time:167.80710220336914ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5220 prompt_cache_len:5154 prompt_cache_ratio:0.9873563218390805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:46 lightllm_req_id:400 first_token_cost:161.07988357543945ms total_cost_time:161.10491752624512ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5175 prompt_cache_len:5145 prompt_cache_ratio:0.9942028985507246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 -DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:47 [batch.py:51] router release req id 8 -INFO 06-24 20:02:47 [batch.py:51] router release req id 120 -INFO 06-24 20:02:47 [batch.py:51] router release req id 400 -INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.09864521026611328 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.06913280487060547 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06261277198791504 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.10019087791442871 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.0721583366394043 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.06686139106750488 s -DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=65744191368026087255134059996568159161, time:1750766567.2308347s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:47 [manager.py:391] -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:148.27466011047363ms total_cost_time:148.31852912902832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5219 prompt_cache_len:5151 prompt_cache_ratio:0.9869706840390879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:169.89970207214355ms total_cost_time:169.94023323059082ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:5221 prompt_cache_len:5154 prompt_cache_ratio:0.9871672093468684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:163.47551345825195ms total_cost_time:163.50221633911133ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5176 prompt_cache_len:5145 prompt_cache_ratio:0.9940108191653787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 -DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:47 [batch.py:51] router release req id 8 -INFO 06-24 20:02:47 [batch.py:51] router release req id 120 -INFO 06-24 20:02:47 [batch.py:51] router release req id 400 -INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.12335062026977539 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.06944751739501953 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06419968605041504 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.12493491172790527 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.07249283790588379 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.0684814453125 s -DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=1804965171305388270106208883991935717, time:1750766567.4084427s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:47 [manager.py:391] -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:199.97620582580566ms total_cost_time:200.01840591430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5220 prompt_cache_len:5151 prompt_cache_ratio:0.9867816091954023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:146.071195602417ms total_cost_time:146.09718322753906ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5222 prompt_cache_len:5154 prompt_cache_ratio:0.9869781692837993 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:329.0572166442871ms total_cost_time:329.1027545928955ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5177 prompt_cache_len:5145 prompt_cache_ratio:0.9938188139849333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 -DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:47 [batch.py:51] router release req id 8 -INFO 06-24 20:02:47 [batch.py:51] router release req id 120 -INFO 06-24 20:02:47 [batch.py:51] router release req id 400 -INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.25643396377563477 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.2510795593261719 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.07161736488342285 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.2581908702850342 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.2542910575866699 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.07609987258911133 s -DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=67843138287693751065499900990329021479, time:1750766567.751175s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:47 [manager.py:391] -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:347.55587577819824ms total_cost_time:347.59998321533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5221 prompt_cache_len:5151 prompt_cache_ratio:0.9865926067803102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:342.17238426208496ms total_cost_time:342.1976566314697ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5223 prompt_cache_len:5154 prompt_cache_ratio:0.9867892016082711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:181.56719207763672ms total_cost_time:181.60605430603027ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5178 prompt_cache_len:5145 prompt_cache_ratio:0.9936268829663963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 -DEBUG 06-24 20:02:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:47 [batch.py:51] router release req id 8 -INFO 06-24 20:02:47 [batch.py:51] router release req id 120 -INFO 06-24 20:02:47 [batch.py:51] router release req id 400 -INFO 06-24 20:02:47 [manager.py:224] router recive req id 8 cost time 0.07826685905456543 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 120 cost time 0.07308268547058105 s -INFO 06-24 20:02:47 [manager.py:224] router recive req id 400 cost time 0.06275081634521484 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 8 cost time 0.0798642635345459 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 120 cost time 0.07611298561096191 s -INFO 06-24 20:02:47 [manager.py:68] detokenization recv req id 400 cost time 0.06707406044006348 s -DEBUG 06-24 20:02:47 [manager.py:391] Prefill Batch: batch_id=113979492298701802096809321035682046342, time:1750766567.928425s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:47 [manager.py:391] -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:147.2771167755127ms total_cost_time:147.40657806396484ms,out_token_counter:1 mean_per_token_cost_time: 0.12946128845214844ms prompt_token_num:5222 prompt_cache_len:5151 prompt_cache_ratio:0.9864036767522022 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:47 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:120 first_token_cost:173.83193969726562ms total_cost_time:173.87104034423828ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5224 prompt_cache_len:5154 prompt_cache_ratio:0.9866003062787136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:400 first_token_cost:163.59853744506836ms total_cost_time:163.62428665161133ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5179 prompt_cache_len:5145 prompt_cache_ratio:0.9934350260668082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 -DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:48 [batch.py:51] router release req id 8 -INFO 06-24 20:02:48 [batch.py:51] router release req id 120 -INFO 06-24 20:02:48 [batch.py:51] router release req id 400 -INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.10093259811401367 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06888270378112793 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.06319141387939453 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.10255908966064453 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.07193541526794434 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06752872467041016 s -DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=171026834703893758007870875179720792469, time:1750766568.1073458s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:48 [manager.py:391] -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:47 lightllm_req_id:8 first_token_cost:197.1752643585205ms total_cost_time:197.21746444702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5223 prompt_cache_len:5151 prompt_cache_ratio:0.9862148190695003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:165.09127616882324ms total_cost_time:165.1175022125244ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5225 prompt_cache_len:5154 prompt_cache_ratio:0.9864114832535885 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:164.1671657562256ms total_cost_time:164.2019748687744ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5180 prompt_cache_len:5145 prompt_cache_ratio:0.9932432432432432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 -DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:48 [batch.py:51] router release req id 8 -INFO 06-24 20:02:48 [batch.py:51] router release req id 120 -INFO 06-24 20:02:48 [batch.py:51] router release req id 400 -INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.07435035705566406 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06873035430908203 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.062180280685424805 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.07669281959533691 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.07339644432067871 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06882047653198242 s -DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=161512257408937614815745953922568981624, time:1750766568.2853134s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:48 [manager.py:391] -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:145.45559883117676ms total_cost_time:145.49803733825684ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5224 prompt_cache_len:5151 prompt_cache_ratio:0.9860260336906586 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:170.00246047973633ms total_cost_time:170.0572967529297ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5226 prompt_cache_len:5154 prompt_cache_ratio:0.9862227324913893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:163.47169876098633ms total_cost_time:163.4986400604248ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5181 prompt_cache_len:5145 prompt_cache_ratio:0.9930515344528084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 -DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:48 [batch.py:51] router release req id 8 -INFO 06-24 20:02:48 [batch.py:51] router release req id 120 -INFO 06-24 20:02:48 [batch.py:51] router release req id 400 -INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.10375332832336426 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06914138793945312 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.06386280059814453 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.1048882007598877 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.0706186294555664 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06560993194580078 s -DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=220389546080767873193089226288918592327, time:1750766568.4676797s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:48 [manager.py:391] -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:349.9183654785156ms total_cost_time:349.963903427124ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5225 prompt_cache_len:5151 prompt_cache_ratio:0.9858373205741627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:315.2749538421631ms total_cost_time:315.30237197875977ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5227 prompt_cache_len:5154 prompt_cache_ratio:0.9860340539506409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:329.6546936035156ms total_cost_time:329.6947479248047ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5182 prompt_cache_len:5145 prompt_cache_ratio:0.9928598996526438 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 -DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:48 [batch.py:51] router release req id 8 -INFO 06-24 20:02:48 [batch.py:51] router release req id 120 -INFO 06-24 20:02:48 [batch.py:51] router release req id 400 -INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.09048676490783691 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.08513689041137695 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.07510995864868164 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.09225940704345703 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.08833074569702148 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.07944226264953613 s -DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=183814381258494528671191550415711091738, time:1750766568.813486s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:48 [manager.py:391] -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:148.40197563171387ms total_cost_time:148.44512939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5226 prompt_cache_len:5151 prompt_cache_ratio:0.9856486796785304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:189.38350677490234ms total_cost_time:189.4216537475586ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5228 prompt_cache_len:5154 prompt_cache_ratio:0.9858454475899006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:179.2891025543213ms total_cost_time:179.31342124938965ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:5183 prompt_cache_len:5145 prompt_cache_ratio:0.9926683387999228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 -INFO 06-24 20:02:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 -DEBUG 06-24 20:02:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:48 [batch.py:51] router release req id 8 -INFO 06-24 20:02:48 [batch.py:51] router release req id 120 -INFO 06-24 20:02:48 [batch.py:51] router release req id 400 -INFO 06-24 20:02:48 [manager.py:224] router recive req id 8 cost time 0.11712503433227539 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 120 cost time 0.06935739517211914 s -INFO 06-24 20:02:48 [manager.py:224] router recive req id 400 cost time 0.06408143043518066 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 8 cost time 0.11897540092468262 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 120 cost time 0.07263517379760742 s -INFO 06-24 20:02:48 [manager.py:68] detokenization recv req id 400 cost time 0.06847476959228516 s -DEBUG 06-24 20:02:48 [manager.py:391] Prefill Batch: batch_id=193723613199783006716287912473031008588, time:1750766568.9948952s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:48 [manager.py:391] -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:8 first_token_cost:197.25370407104492ms total_cost_time:197.2956657409668ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5227 prompt_cache_len:5151 prompt_cache_ratio:0.9854601109623111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:120 first_token_cost:149.43504333496094ms total_cost_time:149.4605541229248ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5229 prompt_cache_len:5154 prompt_cache_ratio:0.9856569133677567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:48 lightllm_req_id:400 first_token_cost:167.88601875305176ms total_cost_time:167.9244041442871ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5184 prompt_cache_len:5145 prompt_cache_ratio:0.9924768518518519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 -DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:49 [batch.py:51] router release req id 8 -INFO 06-24 20:02:49 [batch.py:51] router release req id 120 -INFO 06-24 20:02:49 [batch.py:51] router release req id 400 -INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.09158658981323242 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.0866386890411377 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.0740041732788086 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.09335565567016602 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.0898129940032959 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.07839202880859375 s -DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=36108071487365200816681941474101197067, time:1750766569.1776986s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:49 [manager.py:391] -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:193.80664825439453ms total_cost_time:193.8493251800537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5228 prompt_cache_len:5151 prompt_cache_ratio:0.9852716143840857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:188.80295753479004ms total_cost_time:188.82989883422852ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5230 prompt_cache_len:5154 prompt_cache_ratio:0.9854684512428298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:178.21812629699707ms total_cost_time:178.2546043395996ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5185 prompt_cache_len:5145 prompt_cache_ratio:0.9922854387656702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 -DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:49 [batch.py:51] router release req id 8 -INFO 06-24 20:02:49 [batch.py:51] router release req id 120 -INFO 06-24 20:02:49 [batch.py:51] router release req id 400 -INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.07204031944274902 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.06410527229309082 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.055680274963378906 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.07362127304077148 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.06734776496887207 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.060156822204589844 s -DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=332687357680878472029442304223983177776, time:1750766569.3614087s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:49 [manager.py:391] -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:145.51758766174316ms total_cost_time:145.55978775024414ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5229 prompt_cache_len:5151 prompt_cache_ratio:0.985083189902467 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:168.54619979858398ms total_cost_time:168.58267784118652ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5231 prompt_cache_len:5154 prompt_cache_ratio:0.9852800611737718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:160.1412296295166ms total_cost_time:160.16697883605957ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5186 prompt_cache_len:5145 prompt_cache_ratio:0.9920940994986502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 -DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:49 [batch.py:51] router release req id 8 -INFO 06-24 20:02:49 [batch.py:51] router release req id 120 -INFO 06-24 20:02:49 [batch.py:51] router release req id 400 -INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.2644016742706299 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.23098254203796387 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.22561407089233398 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.2661874294281006 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.23415732383728027 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.22982549667358398 s -DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=54126910484437954321278348656260176441, time:1750766569.7047596s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:49 [manager.py:391] -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:350.5678176879883ms total_cost_time:350.61097145080566ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5230 prompt_cache_len:5151 prompt_cache_ratio:0.9848948374760994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:317.05760955810547ms total_cost_time:317.08431243896484ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5232 prompt_cache_len:5154 prompt_cache_ratio:0.9850917431192661 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:331.2101364135742ms total_cost_time:331.24756813049316ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5187 prompt_cache_len:5145 prompt_cache_ratio:0.9919028340080972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 -DEBUG 06-24 20:02:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:49 [batch.py:51] router release req id 8 -INFO 06-24 20:02:49 [batch.py:51] router release req id 120 -INFO 06-24 20:02:49 [batch.py:51] router release req id 400 -INFO 06-24 20:02:49 [manager.py:224] router recive req id 8 cost time 0.0867466926574707 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 120 cost time 0.08163571357727051 s -INFO 06-24 20:02:49 [manager.py:224] router recive req id 400 cost time 0.06937360763549805 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 8 cost time 0.08866572380065918 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 120 cost time 0.08495783805847168 s -INFO 06-24 20:02:49 [manager.py:68] detokenization recv req id 400 cost time 0.0737156867980957 s -DEBUG 06-24 20:02:49 [manager.py:391] Prefill Batch: batch_id=284584860480274361578153255982552055212, time:1750766569.885161s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:49 [manager.py:391] -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:148.48971366882324ms total_cost_time:148.53262901306152ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5231 prompt_cache_len:5151 prompt_cache_ratio:0.984706557063659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:184.68618392944336ms total_cost_time:184.73505973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:5233 prompt_cache_len:5154 prompt_cache_ratio:0.9849034970380279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:172.41263389587402ms total_cost_time:172.44458198547363ms,out_token_counter:1 mean_per_token_cost_time: 0.031948089599609375ms prompt_token_num:5188 prompt_cache_len:5145 prompt_cache_ratio:0.9917116422513492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 -INFO 06-24 20:02:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 -DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:50 [batch.py:51] router release req id 8 -INFO 06-24 20:02:50 [batch.py:51] router release req id 120 -INFO 06-24 20:02:50 [batch.py:51] router release req id 400 -INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.11399674415588379 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.0692594051361084 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.06387782096862793 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.11558175086975098 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.07245659828186035 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.06834053993225098 s -DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=113887091680116134046690464418394073136, time:1750766570.0675366s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:50 [manager.py:391] -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:8 first_token_cost:198.96626472473145ms total_cost_time:199.01132583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5232 prompt_cache_len:5151 prompt_cache_ratio:0.9845183486238532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:120 first_token_cost:154.41489219665527ms total_cost_time:154.44087982177734ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5234 prompt_cache_len:5154 prompt_cache_ratio:0.9847153228888039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:49 lightllm_req_id:400 first_token_cost:168.75576972961426ms total_cost_time:168.7936782836914ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5189 prompt_cache_len:5145 prompt_cache_ratio:0.9915205241857776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 -DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:50 [batch.py:51] router release req id 8 -INFO 06-24 20:02:50 [batch.py:51] router release req id 120 -INFO 06-24 20:02:50 [batch.py:51] router release req id 400 -INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.08876347541809082 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.08342266082763672 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.07222270965576172 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.09048843383789062 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.08654928207397461 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.07658696174621582 s -DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=72154061987392291080148914565191156117, time:1750766570.250024s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:50 [manager.py:391] -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:146.91519737243652ms total_cost_time:146.93355560302734ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:5233 prompt_cache_len:5151 prompt_cache_ratio:0.9843302121154214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:187.8368854522705ms total_cost_time:187.87527084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5235 prompt_cache_len:5154 prompt_cache_ratio:0.9845272206303725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:176.70536041259766ms total_cost_time:176.73087120056152ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5190 prompt_cache_len:5145 prompt_cache_ratio:0.9913294797687862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 -DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:50 [batch.py:51] router release req id 8 -INFO 06-24 20:02:50 [batch.py:51] router release req id 120 -INFO 06-24 20:02:50 [batch.py:51] router release req id 400 -INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.11793732643127441 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.06876111030578613 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.06348848342895508 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.11950349807739258 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.07170939445495605 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.06775021553039551 s -DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=116340167260195613421598654793460460159, time:1750766570.4315321s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:50 [manager.py:391] -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:202.07953453063965ms total_cost_time:202.12221145629883ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5234 prompt_cache_len:5151 prompt_cache_ratio:0.9841421474971341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:152.82869338989258ms total_cost_time:152.85396575927734ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5236 prompt_cache_len:5154 prompt_cache_ratio:0.9843391902215431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:169.85654830932617ms total_cost_time:169.9063777923584ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:5191 prompt_cache_len:5145 prompt_cache_ratio:0.9911385089578116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 -DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:50 [batch.py:51] router release req id 8 -INFO 06-24 20:02:50 [batch.py:51] router release req id 120 -INFO 06-24 20:02:50 [batch.py:51] router release req id 400 -INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.2665128707885742 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.2609217166900635 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.24997472763061523 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.26821470260620117 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.2642223834991455 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.254561185836792 s -DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=114861829841792204182494582706844302284, time:1750766570.792475s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:50 [manager.py:391] -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:345.2742099761963ms total_cost_time:345.3185558319092ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5235 prompt_cache_len:5151 prompt_cache_ratio:0.9839541547277937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:339.80703353881836ms total_cost_time:339.832067489624ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5237 prompt_cache_len:5154 prompt_cache_ratio:0.9841512316211571 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 -ERROR 06-24 20:02:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:352.7500629425049ms total_cost_time:352.78868675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5192 prompt_cache_len:5145 prompt_cache_ratio:0.9909476117103235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 -DEBUG 06-24 20:02:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:50 [batch.py:51] router release req id 8 -INFO 06-24 20:02:50 [batch.py:51] router release req id 120 -INFO 06-24 20:02:50 [batch.py:51] router release req id 400 -INFO 06-24 20:02:50 [manager.py:224] router recive req id 8 cost time 0.09287905693054199 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 120 cost time 0.08757781982421875 s -INFO 06-24 20:02:50 [manager.py:224] router recive req id 400 cost time 0.07364726066589355 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 8 cost time 0.09470415115356445 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 120 cost time 0.09082150459289551 s -INFO 06-24 20:02:50 [manager.py:68] detokenization recv req id 400 cost time 0.07813429832458496 s -DEBUG 06-24 20:02:50 [manager.py:391] Prefill Batch: batch_id=258074016904941905616835299421948813108, time:1750766570.9736176s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:50 [manager.py:391] -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:8 first_token_cost:194.53167915344238ms total_cost_time:194.57507133483887ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5236 prompt_cache_len:5151 prompt_cache_ratio:0.9837662337662337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:120 first_token_cost:189.13841247558594ms total_cost_time:189.1646385192871ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5238 prompt_cache_len:5154 prompt_cache_ratio:0.983963344788087 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:50 lightllm_req_id:400 first_token_cost:178.93052101135254ms total_cost_time:178.96628379821777ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:5193 prompt_cache_len:5145 prompt_cache_ratio:0.9907567879838244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 -DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:51 [batch.py:51] router release req id 8 -INFO 06-24 20:02:51 [batch.py:51] router release req id 120 -INFO 06-24 20:02:51 [batch.py:51] router release req id 400 -INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.07263016700744629 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.0673072338104248 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.05789756774902344 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.07437014579772949 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.07047247886657715 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.06220197677612305 s -DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=5945537567179164147587546669967627772, time:1750766571.157539s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:51 [manager.py:391] -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:145.4331874847412ms total_cost_time:145.4770565032959ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5237 prompt_cache_len:5151 prompt_cache_ratio:0.9835783845713194 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:171.53310775756836ms total_cost_time:171.5712547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5239 prompt_cache_len:5154 prompt_cache_ratio:0.9837755296812368 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:162.07170486450195ms total_cost_time:162.09721565246582ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5194 prompt_cache_len:5145 prompt_cache_ratio:0.9905660377358491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 -DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:51 [batch.py:51] router release req id 8 -INFO 06-24 20:02:51 [batch.py:51] router release req id 120 -INFO 06-24 20:02:51 [batch.py:51] router release req id 400 -INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.10373950004577637 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.06980681419372559 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.06439328193664551 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.10528230667114258 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.07274293899536133 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.06854772567749023 s -DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=119026861543401500830279452836118553216, time:1750766571.33999s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:51 [manager.py:391] -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:199.16558265686035ms total_cost_time:199.20778274536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5238 prompt_cache_len:5151 prompt_cache_ratio:0.9833906071019473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:165.21811485290527ms total_cost_time:165.24481773376465ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5240 prompt_cache_len:5154 prompt_cache_ratio:0.983587786259542 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:183.20798873901367ms total_cost_time:183.2449436187744ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5195 prompt_cache_len:5145 prompt_cache_ratio:0.9903753609239654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 -DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:51 [batch.py:51] router release req id 8 -INFO 06-24 20:02:51 [batch.py:51] router release req id 120 -INFO 06-24 20:02:51 [batch.py:51] router release req id 400 -INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.23725247383117676 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.23113727569580078 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.22025251388549805 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.23898601531982422 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.23438167572021484 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.22449541091918945 s -DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=131024800894177316653257120401693257, time:1750766571.6836846s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:51 [manager.py:391] -DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:295.058012008667ms total_cost_time:295.1023578643799ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5239 prompt_cache_len:5151 prompt_cache_ratio:0.9832029013170452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:335.446834564209ms total_cost_time:335.48808097839355ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5241 prompt_cache_len:5154 prompt_cache_ratio:0.9834001144819691 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:324.4318962097168ms total_cost_time:324.45812225341797ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5196 prompt_cache_len:5145 prompt_cache_ratio:0.9901847575057737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 -DEBUG 06-24 20:02:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:51 [batch.py:51] router release req id 8 -INFO 06-24 20:02:51 [batch.py:51] router release req id 120 -INFO 06-24 20:02:51 [batch.py:51] router release req id 400 -INFO 06-24 20:02:51 [manager.py:224] router recive req id 8 cost time 0.11945700645446777 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 120 cost time 0.0689542293548584 s -INFO 06-24 20:02:51 [manager.py:224] router recive req id 400 cost time 0.06354188919067383 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 8 cost time 0.12111949920654297 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 120 cost time 0.0722959041595459 s -INFO 06-24 20:02:51 [manager.py:68] detokenization recv req id 400 cost time 0.0680394172668457 s -DEBUG 06-24 20:02:51 [manager.py:391] Prefill Batch: batch_id=9231044424691171558073278120236064217, time:1750766571.865661s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:51 [manager.py:391] -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:197.8168487548828ms total_cost_time:197.8588104248047ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5240 prompt_cache_len:5151 prompt_cache_ratio:0.9830152671755725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:147.5076675415039ms total_cost_time:147.53413200378418ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5242 prompt_cache_len:5154 prompt_cache_ratio:0.9832125143075162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 -ERROR 06-24 20:02:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:166.87893867492676ms total_cost_time:166.9178009033203ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5197 prompt_cache_len:5145 prompt_cache_ratio:0.9899942274389071 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 -DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:52 [batch.py:51] router release req id 8 -INFO 06-24 20:02:52 [batch.py:51] router release req id 120 -INFO 06-24 20:02:52 [batch.py:51] router release req id 400 -INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.09177446365356445 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.08695030212402344 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.07159900665283203 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.09340262413024902 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.08996081352233887 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.0758049488067627 s -DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=3241771730246344814940865378067211563, time:1750766572.0470524s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:52 [manager.py:391] -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:8 first_token_cost:193.70460510253906ms total_cost_time:193.74585151672363ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5241 prompt_cache_len:5151 prompt_cache_ratio:0.9828277046365197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:120 first_token_cost:188.81797790527344ms total_cost_time:188.84515762329102ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5243 prompt_cache_len:5154 prompt_cache_ratio:0.9830249856952127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:51 lightllm_req_id:400 first_token_cost:175.2147674560547ms total_cost_time:175.2481460571289ms,out_token_counter:1 mean_per_token_cost_time: 0.03337860107421875ms prompt_token_num:5198 prompt_cache_len:5145 prompt_cache_ratio:0.9898037706810312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 -DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:52 [batch.py:51] router release req id 8 -INFO 06-24 20:02:52 [batch.py:51] router release req id 120 -INFO 06-24 20:02:52 [batch.py:51] router release req id 400 -INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.07125329971313477 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.06663727760314941 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.0594332218170166 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.07299089431762695 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.06977391242980957 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.06398510932922363 s -DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=83457029891860222385253277944389321619, time:1750766572.2273893s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:52 [manager.py:391] -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:146.76308631896973ms total_cost_time:146.80743217468262ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5242 prompt_cache_len:5151 prompt_cache_ratio:0.9826402136589089 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:169.48199272155762ms total_cost_time:169.53635215759277ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5244 prompt_cache_len:5154 prompt_cache_ratio:0.982837528604119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:162.68301010131836ms total_cost_time:162.7202033996582ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5199 prompt_cache_len:5145 prompt_cache_ratio:0.9896133871898442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 -DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:52 [batch.py:51] router release req id 8 -INFO 06-24 20:02:52 [batch.py:51] router release req id 120 -INFO 06-24 20:02:52 [batch.py:51] router release req id 400 -INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.09785985946655273 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.0672454833984375 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.059719085693359375 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.09942197799682617 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.07021665573120117 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.06392478942871094 s -DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=161102350868659160106859563961468787287, time:1750766572.4088843s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:52 [manager.py:391] -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:197.93128967285156ms total_cost_time:197.98564910888672ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5243 prompt_cache_len:5151 prompt_cache_ratio:0.9824527942017929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:167.3884391784668ms total_cost_time:167.42205619812012ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:5245 prompt_cache_len:5154 prompt_cache_ratio:0.982650142993327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:326.54786109924316ms total_cost_time:326.59220695495605ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5200 prompt_cache_len:5145 prompt_cache_ratio:0.989423076923077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 -DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:52 [batch.py:51] router release req id 8 -INFO 06-24 20:02:52 [batch.py:51] router release req id 120 -INFO 06-24 20:02:52 [batch.py:51] router release req id 400 -INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.2346498966217041 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.22957229614257812 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.07171273231506348 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.2364654541015625 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.23282957077026367 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.07621216773986816 s -DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=129884596516568863242925438522015958112, time:1750766572.7524042s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:52 [manager.py:391] -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:297.4991798400879ms total_cost_time:297.544002532959ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5244 prompt_cache_len:5151 prompt_cache_ratio:0.9822654462242563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:332.9939842224121ms total_cost_time:333.03308486938477ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5246 prompt_cache_len:5154 prompt_cache_ratio:0.9824628288219596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:175.19116401672363ms total_cost_time:175.2171516418457ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5201 prompt_cache_len:5145 prompt_cache_ratio:0.9892328398384926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 -INFO 06-24 20:02:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 -DEBUG 06-24 20:02:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:52 [batch.py:51] router release req id 8 -INFO 06-24 20:02:52 [batch.py:51] router release req id 120 -INFO 06-24 20:02:52 [batch.py:51] router release req id 400 -INFO 06-24 20:02:52 [manager.py:224] router recive req id 8 cost time 0.11379218101501465 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 120 cost time 0.06990361213684082 s -INFO 06-24 20:02:52 [manager.py:224] router recive req id 400 cost time 0.06427288055419922 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 8 cost time 0.1154327392578125 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 120 cost time 0.07317590713500977 s -INFO 06-24 20:02:52 [manager.py:68] detokenization recv req id 400 cost time 0.06866884231567383 s -DEBUG 06-24 20:02:52 [manager.py:391] Prefill Batch: batch_id=209942190913813938067526097973070525471, time:1750766572.9348528s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:52 [manager.py:391] -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:52 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:8 first_token_cost:200.65069198608398ms total_cost_time:200.69265365600586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5245 prompt_cache_len:5151 prompt_cache_ratio:0.9820781696854147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:120 first_token_cost:156.9201946258545ms total_cost_time:156.94689750671387ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5247 prompt_cache_len:5154 prompt_cache_ratio:0.982275586049171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:52 lightllm_req_id:400 first_token_cost:170.2725887298584ms total_cost_time:170.31145095825195ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5202 prompt_cache_len:5145 prompt_cache_ratio:0.989042675893887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 -DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:53 [batch.py:51] router release req id 8 -INFO 06-24 20:02:53 [batch.py:51] router release req id 120 -INFO 06-24 20:02:53 [batch.py:51] router release req id 400 -INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.08655643463134766 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.081329345703125 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.0688636302947998 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.08811497688293457 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.08447098731994629 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.07328128814697266 s -DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=2705697327813994178765300784631705432, time:1750766573.1153185s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:53 [manager.py:391] -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:149.04284477233887ms total_cost_time:149.08766746520996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5246 prompt_cache_len:5151 prompt_cache_ratio:0.9818909645444148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:02:53 [statics_utils.py:24] mean first cost: 253.81479736502843 ms -INFO 06-24 20:02:53 [statics_utils.py:24] mean per token cost: 0.6276211114843245 ms -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:53 [manager.py:620] left req id 8can release False refcount 1 -INFO 06-24 20:02:53 [manager.py:620] left req id 120can release True refcount 3 -INFO 06-24 20:02:53 [manager.py:620] left req id 400can release False refcount 4 -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:184.0071678161621ms total_cost_time:184.04626846313477ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5248 prompt_cache_len:5154 prompt_cache_ratio:0.9820884146341463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:171.50282859802246ms total_cost_time:171.5259552001953ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:5203 prompt_cache_len:5145 prompt_cache_ratio:0.9888525850470882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 -DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:53 [batch.py:51] router release req id 8 -INFO 06-24 20:02:53 [batch.py:51] router release req id 120 -INFO 06-24 20:02:53 [batch.py:51] router release req id 400 -INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.11163592338562012 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.0683906078338623 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.0630185604095459 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.11339902877807617 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.0717473030090332 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.06750822067260742 s -DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=92134936354665294992778054666637416072, time:1750766573.2956557s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:53 [manager.py:391] -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:199.86629486083984ms total_cost_time:199.91159439086914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5247 prompt_cache_len:5151 prompt_cache_ratio:0.9817038307604345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:156.80170059204102ms total_cost_time:156.8281650543213ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5249 prompt_cache_len:5154 prompt_cache_ratio:0.9819013145361021 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:170.56655883789062ms total_cost_time:170.60399055480957ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5204 prompt_cache_len:5145 prompt_cache_ratio:0.9886625672559569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 -DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:53 [batch.py:51] router release req id 8 -INFO 06-24 20:02:53 [batch.py:51] router release req id 120 -INFO 06-24 20:02:53 [batch.py:51] router release req id 400 -INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.0836787223815918 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.07861518859863281 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.06662321090698242 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.08552145957946777 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.08181571960449219 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.0711214542388916 s -DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=13151274556950410961679043541694526760, time:1750766573.4754348s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:53 [manager.py:391] -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:147.11427688598633ms total_cost_time:147.1574306488037ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5248 prompt_cache_len:5151 prompt_cache_ratio:0.981516768292683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:182.3885440826416ms total_cost_time:182.43002891540527ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5250 prompt_cache_len:5154 prompt_cache_ratio:0.9817142857142858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:170.49837112426758ms total_cost_time:170.52388191223145ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5205 prompt_cache_len:5145 prompt_cache_ratio:0.9884726224783862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 -DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:53 [batch.py:51] router release req id 8 -INFO 06-24 20:02:53 [batch.py:51] router release req id 120 -INFO 06-24 20:02:53 [batch.py:51] router release req id 400 -INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.274993896484375 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.23135662078857422 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.27588629722595215 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.22590875625610352 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.23252582550048828 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.22729897499084473 s -DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=81634246692624740912354441538916512075, time:1750766573.8191593s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:53 [manager.py:391] -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 8 -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:350.5971431732178ms total_cost_time:350.64005851745605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5249 prompt_cache_len:5151 prompt_cache_ratio:0.9813297771004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 -ERROR 06-24 20:02:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:333.5714340209961ms total_cost_time:333.61124992370605ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5251 prompt_cache_len:5154 prompt_cache_ratio:0.9815273281279756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:328.1712532043457ms total_cost_time:328.1974792480469ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5206 prompt_cache_len:5145 prompt_cache_ratio:0.9882827506723012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 -INFO 06-24 20:02:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 -DEBUG 06-24 20:02:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:53 [batch.py:51] router release req id 8 -INFO 06-24 20:02:53 [batch.py:51] router release req id 120 -INFO 06-24 20:02:53 [batch.py:51] router release req id 400 -INFO 06-24 20:02:53 [manager.py:224] router recive req id 8 cost time 0.09893250465393066 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 120 cost time 0.06884479522705078 s -INFO 06-24 20:02:53 [manager.py:224] router recive req id 400 cost time 0.0640878677368164 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 8 cost time 0.10047078132629395 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 120 cost time 0.07195591926574707 s -INFO 06-24 20:02:53 [manager.py:68] detokenization recv req id 400 cost time 0.06844496726989746 s -DEBUG 06-24 20:02:53 [manager.py:391] Prefill Batch: batch_id=3265062220488357911602693387903255028, time:1750766573.9997535s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:53 [manager.py:391] -DEBUG 06-24 20:02:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 68344.947 tokens/s -DEBUG 06-24 20:02:54 [stats.py:37] Avg prompt tokens throughput: 68318.746 tokens/s -DEBUG 06-24 20:02:54 [stats.py:37] Avg generate tokens throughput: 26.202 tokens/s -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:8 first_token_cost:199.83887672424316ms total_cost_time:199.88250732421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5250 prompt_cache_len:5151 prompt_cache_ratio:0.9811428571428571 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:120 first_token_cost:169.94500160217285ms total_cost_time:169.97194290161133ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5252 prompt_cache_len:5154 prompt_cache_ratio:0.9813404417364814 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:53 lightllm_req_id:400 first_token_cost:168.0431365966797ms total_cost_time:168.07866096496582ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:5207 prompt_cache_len:5145 prompt_cache_ratio:0.9880929517956597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 -DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:54 [batch.py:51] router release req id 8 -INFO 06-24 20:02:54 [batch.py:51] router release req id 120 -INFO 06-24 20:02:54 [batch.py:51] router release req id 400 -INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.06940555572509766 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.06410503387451172 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.0575556755065918 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.07092642784118652 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.06723570823669434 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06194353103637695 s -DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=206958994617528890900615359731581510637, time:1750766574.1810417s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:54 [manager.py:391] -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:143.72849464416504ms total_cost_time:143.77331733703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5251 prompt_cache_len:5151 prompt_cache_ratio:0.9809560083793563 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:166.96476936340332ms total_cost_time:167.0057773590088ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5253 prompt_cache_len:5154 prompt_cache_ratio:0.9811536264991434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:160.53104400634766ms total_cost_time:160.55798530578613ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5208 prompt_cache_len:5145 prompt_cache_ratio:0.9879032258064516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 -DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:54 [batch.py:51] router release req id 8 -INFO 06-24 20:02:54 [batch.py:51] router release req id 120 -INFO 06-24 20:02:54 [batch.py:51] router release req id 400 -INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.0986175537109375 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.06905198097229004 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.06377983093261719 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.10032796859741211 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.07214903831481934 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06863665580749512 s -DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=146170109155660599558531665483957543008, time:1750766574.3614078s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:54 [manager.py:391] -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:198.87113571166992ms total_cost_time:198.91619682312012ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5252 prompt_cache_len:5151 prompt_cache_ratio:0.9807692307692307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:169.26050186157227ms total_cost_time:169.28625106811523ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5254 prompt_cache_len:5154 prompt_cache_ratio:0.980966882375333 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:166.87369346618652ms total_cost_time:166.90850257873535ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5209 prompt_cache_len:5145 prompt_cache_ratio:0.9877135726626992 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 -DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:54 [batch.py:51] router release req id 8 -INFO 06-24 20:02:54 [batch.py:51] router release req id 120 -INFO 06-24 20:02:54 [batch.py:51] router release req id 400 -INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.07230687141418457 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.06747937202453613 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.06181001663208008 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.07379937171936035 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.07049369812011719 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06596875190734863 s -DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=136919901274101448323875996927526254737, time:1750766574.542649s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:54 [manager.py:391] -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:146.38113975524902ms total_cost_time:146.42596244812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5253 prompt_cache_len:5151 prompt_cache_ratio:0.9805825242718447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:333.7271213531494ms total_cost_time:333.7712287902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5255 prompt_cache_len:5154 prompt_cache_ratio:0.9807802093244529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:328.10258865356445ms total_cost_time:328.1288146972656ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5210 prompt_cache_len:5145 prompt_cache_ratio:0.9875239923224568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 -DEBUG 06-24 20:02:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:54 [batch.py:51] router release req id 8 -INFO 06-24 20:02:54 [batch.py:51] router release req id 120 -INFO 06-24 20:02:54 [batch.py:51] router release req id 400 -INFO 06-24 20:02:54 [manager.py:224] router recive req id 8 cost time 0.2641136646270752 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 120 cost time 0.0690619945526123 s -INFO 06-24 20:02:54 [manager.py:224] router recive req id 400 cost time 0.06465697288513184 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 8 cost time 0.2651369571685791 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 120 cost time 0.07157778739929199 s -INFO 06-24 20:02:54 [manager.py:68] detokenization recv req id 400 cost time 0.06843113899230957 s -DEBUG 06-24 20:02:54 [manager.py:391] Prefill Batch: batch_id=34011374031020010090033538090945874728, time:1750766574.8859375s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:54 [manager.py:391] -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:348.7358093261719ms total_cost_time:348.77943992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5254 prompt_cache_len:5151 prompt_cache_ratio:0.9803958888465931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:153.68938446044922ms total_cost_time:153.7158489227295ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5256 prompt_cache_len:5154 prompt_cache_ratio:0.980593607305936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 -ERROR 06-24 20:02:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:170.21942138671875ms total_cost_time:170.2558994293213ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5211 prompt_cache_len:5145 prompt_cache_ratio:0.9873344847438111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 -DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:55 [batch.py:51] router release req id 8 -INFO 06-24 20:02:55 [batch.py:51] router release req id 120 -INFO 06-24 20:02:55 [batch.py:51] router release req id 400 -INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.08820199966430664 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.08188533782958984 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.0710752010345459 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.08975553512573242 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.08503437042236328 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.07547402381896973 s -DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=278367308038101676331255405218844979123, time:1750766575.067711s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:55 [manager.py:391] -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:8 first_token_cost:146.67940139770508ms total_cost_time:146.72422409057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5255 prompt_cache_len:5151 prompt_cache_ratio:0.980209324452902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:120 first_token_cost:185.6391429901123ms total_cost_time:185.68754196166992ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:5257 prompt_cache_len:5154 prompt_cache_ratio:0.9804070762792467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:54 lightllm_req_id:400 first_token_cost:175.0478744506836ms total_cost_time:175.08578300476074ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5212 prompt_cache_len:5145 prompt_cache_ratio:0.987145049884881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 -DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:55 [batch.py:51] router release req id 8 -INFO 06-24 20:02:55 [batch.py:51] router release req id 120 -INFO 06-24 20:02:55 [batch.py:51] router release req id 400 -INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.11642789840698242 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.06826424598693848 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.06376218795776367 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.11816811561584473 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.07135510444641113 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.06809425354003906 s -DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=324459728662323944494060785249951884936, time:1750766575.248322s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:55 [manager.py:391] -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:199.0346908569336ms total_cost_time:199.07665252685547ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5256 prompt_cache_len:5151 prompt_cache_ratio:0.9800228310502284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:150.79760551452637ms total_cost_time:150.82454681396484ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5258 prompt_cache_len:5154 prompt_cache_ratio:0.9802206162038798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:167.222261428833ms total_cost_time:167.26016998291016ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5213 prompt_cache_len:5145 prompt_cache_ratio:0.9869556877038174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 -DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:55 [batch.py:51] router release req id 8 -INFO 06-24 20:02:55 [batch.py:51] router release req id 120 -INFO 06-24 20:02:55 [batch.py:51] router release req id 400 -INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.09134721755981445 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.0855112075805664 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.07272052764892578 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.09288477897644043 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.08856606483459473 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.07706713676452637 s -DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=65400348464549236068612662973453324390, time:1750766575.4307563s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:55 [manager.py:391] -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:146.88754081726074ms total_cost_time:146.93140983581543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5257 prompt_cache_len:5151 prompt_cache_ratio:0.9798364085980598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:192.00444221496582ms total_cost_time:192.0452117919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5259 prompt_cache_len:5154 prompt_cache_ratio:0.980034227039361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:338.1540775299072ms total_cost_time:338.1989002227783ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5214 prompt_cache_len:5145 prompt_cache_ratio:0.9867663981588032 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 -DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:55 [batch.py:51] router release req id 8 -INFO 06-24 20:02:55 [batch.py:51] router release req id 120 -INFO 06-24 20:02:55 [batch.py:51] router release req id 400 -INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.2814028263092041 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.2294013500213623 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.07239556312561035 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.2832174301147461 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.23264408111572266 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.07686209678649902 s -DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=284083830934789910735376228269160234085, time:1750766575.7746549s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:55 [manager.py:391] -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:349.6870994567871ms total_cost_time:349.7285842895508ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5258 prompt_cache_len:5151 prompt_cache_ratio:0.9796500570559148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:332.72504806518555ms total_cost_time:332.7798843383789ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5260 prompt_cache_len:5154 prompt_cache_ratio:0.9798479087452472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:175.77242851257324ms total_cost_time:175.80032348632812ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5215 prompt_cache_len:5145 prompt_cache_ratio:0.9865771812080537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 -INFO 06-24 20:02:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 -DEBUG 06-24 20:02:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:55 [batch.py:51] router release req id 8 -INFO 06-24 20:02:55 [batch.py:51] router release req id 120 -INFO 06-24 20:02:55 [batch.py:51] router release req id 400 -INFO 06-24 20:02:55 [manager.py:224] router recive req id 8 cost time 0.10731053352355957 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 120 cost time 0.06700968742370605 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 8 cost time 0.10824441909790039 s -INFO 06-24 20:02:55 [manager.py:224] router recive req id 400 cost time 0.06201624870300293 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 120 cost time 0.06818294525146484 s -INFO 06-24 20:02:55 [manager.py:68] detokenization recv req id 400 cost time 0.06318330764770508 s -DEBUG 06-24 20:02:55 [manager.py:391] Prefill Batch: batch_id=211766155860229120632619050207040578690, time:1750766575.955179s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:55 [manager.py:391] -ERROR 06-24 20:02:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:8 first_token_cost:196.63190841674805ms total_cost_time:196.67482376098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5259 prompt_cache_len:5151 prompt_cache_ratio:0.9794637763833428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:120 first_token_cost:156.2635898590088ms total_cost_time:156.29005432128906ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5261 prompt_cache_len:5154 prompt_cache_ratio:0.9796616612811253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:55 lightllm_req_id:400 first_token_cost:171.51188850402832ms total_cost_time:171.54979705810547ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5216 prompt_cache_len:5145 prompt_cache_ratio:0.9863880368098159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 -DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:56 [batch.py:51] router release req id 8 -INFO 06-24 20:02:56 [batch.py:51] router release req id 120 -INFO 06-24 20:02:56 [batch.py:51] router release req id 400 -INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.08247923851013184 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.07744526863098145 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.06571388244628906 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.08420991897583008 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.08054614067077637 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.06988787651062012 s -DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=76294914724792995211805685286338746436, time:1750766576.136636s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:56 [manager.py:391] -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:148.37932586669922ms total_cost_time:148.423433303833ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5260 prompt_cache_len:5151 prompt_cache_ratio:0.9792775665399239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:181.94866180419922ms total_cost_time:181.9894313812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5262 prompt_cache_len:5154 prompt_cache_ratio:0.9794754846066135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:170.10998725891113ms total_cost_time:170.1350212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5217 prompt_cache_len:5145 prompt_cache_ratio:0.9861989649223691 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 -DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:56 [batch.py:51] router release req id 8 -INFO 06-24 20:02:56 [batch.py:51] router release req id 120 -INFO 06-24 20:02:56 [batch.py:51] router release req id 400 -INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.11077141761779785 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.06993722915649414 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.06426572799682617 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.1122901439666748 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.07293963432312012 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.06844687461853027 s -DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=295722499348286923707146046955766765905, time:1750766576.3189332s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:56 [manager.py:391] -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:199.06997680664062ms total_cost_time:199.1140842437744ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5261 prompt_cache_len:5151 prompt_cache_ratio:0.979091427485269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:158.27703475952148ms total_cost_time:158.30373764038086ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5263 prompt_cache_len:5154 prompt_cache_ratio:0.9792893786813605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:171.66519165039062ms total_cost_time:171.70405387878418ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5218 prompt_cache_len:5145 prompt_cache_ratio:0.9860099655040245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 -DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:56 [batch.py:51] router release req id 8 -INFO 06-24 20:02:56 [batch.py:51] router release req id 120 -INFO 06-24 20:02:56 [batch.py:51] router release req id 400 -INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.08559179306030273 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.0804300308227539 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.07039880752563477 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.08800148963928223 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.08546566963195801 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.07663488388061523 s -DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=67362860849785924080459254445982399049, time:1750766576.5014725s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:56 [manager.py:391] -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:147.3236083984375ms total_cost_time:147.36580848693848ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5262 prompt_cache_len:5151 prompt_cache_ratio:0.9789053591790193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:344.4664478302002ms total_cost_time:344.5086479187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5264 prompt_cache_len:5154 prompt_cache_ratio:0.9791033434650456 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:338.14406394958496ms total_cost_time:338.1788730621338ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5219 prompt_cache_len:5145 prompt_cache_ratio:0.9858210385131251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 -DEBUG 06-24 20:02:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:56 [batch.py:51] router release req id 8 -INFO 06-24 20:02:56 [batch.py:51] router release req id 120 -INFO 06-24 20:02:56 [batch.py:51] router release req id 400 -INFO 06-24 20:02:56 [manager.py:224] router recive req id 8 cost time 0.2792527675628662 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 120 cost time 0.07652473449707031 s -INFO 06-24 20:02:56 [manager.py:224] router recive req id 400 cost time 0.06608700752258301 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 8 cost time 0.280977725982666 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 120 cost time 0.07957220077514648 s -INFO 06-24 20:02:56 [manager.py:68] detokenization recv req id 400 cost time 0.07026386260986328 s -DEBUG 06-24 20:02:56 [manager.py:391] Prefill Batch: batch_id=149780053443978379004941183649294894592, time:1750766576.8479824s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:56 [manager.py:391] -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:351.7799377441406ms total_cost_time:351.8235683441162ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5263 prompt_cache_len:5151 prompt_cache_ratio:0.9787193615808474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:182.51872062683105ms total_cost_time:182.56020545959473ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5265 prompt_cache_len:5154 prompt_cache_ratio:0.978917378917379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:172.00994491577148ms total_cost_time:172.03593254089355ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5220 prompt_cache_len:5145 prompt_cache_ratio:0.985632183908046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:56 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 -INFO 06-24 20:02:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 -DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:57 [batch.py:51] router release req id 8 -INFO 06-24 20:02:57 [batch.py:51] router release req id 120 -INFO 06-24 20:02:57 [batch.py:51] router release req id 400 -INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.10492515563964844 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.06873679161071777 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.06345820426940918 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.10660004615783691 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.07173395156860352 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.06770086288452148 s -DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=193658042960038636184952126573462673029, time:1750766577.0308678s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:57 [manager.py:391] -DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:02:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:8 first_token_cost:199.90777969360352ms total_cost_time:199.9499797821045ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5264 prompt_cache_len:5151 prompt_cache_ratio:0.978533434650456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:120 first_token_cost:163.67030143737793ms total_cost_time:163.6974811553955ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5266 prompt_cache_len:5154 prompt_cache_ratio:0.978731484998101 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:56 lightllm_req_id:400 first_token_cost:179.65221405029297ms total_cost_time:179.6896457672119ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5221 prompt_cache_len:5145 prompt_cache_ratio:0.985443401647194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 -DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:57 [batch.py:51] router release req id 8 -INFO 06-24 20:02:57 [batch.py:51] router release req id 120 -INFO 06-24 20:02:57 [batch.py:51] router release req id 400 -INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.07663941383361816 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.07186698913574219 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.060872793197631836 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.07818078994750977 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.07466983795166016 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.06488370895385742 s -DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=238477062562069395972721753778868477347, time:1750766577.2123666s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:57 [manager.py:391] -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:145.30348777770996ms total_cost_time:145.34711837768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5265 prompt_cache_len:5151 prompt_cache_ratio:0.9783475783475784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:174.33881759643555ms total_cost_time:174.3764877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5267 prompt_cache_len:5154 prompt_cache_ratio:0.9785456616669831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:163.41018676757812ms total_cost_time:163.4359359741211ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5222 prompt_cache_len:5145 prompt_cache_ratio:0.985254691689008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 -DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:57 [batch.py:51] router release req id 8 -INFO 06-24 20:02:57 [batch.py:51] router release req id 120 -INFO 06-24 20:02:57 [batch.py:51] router release req id 400 -INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.1061849594116211 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.06943631172180176 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.06514430046081543 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.10779094696044922 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.07232856750488281 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.06926202774047852 s -DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=259411934968734860524228707761348215427, time:1750766577.393905s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:57 [manager.py:391] -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:198.2131004333496ms total_cost_time:198.25983047485352ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5266 prompt_cache_len:5151 prompt_cache_ratio:0.9781617926319788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:161.42702102661133ms total_cost_time:161.454439163208ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5268 prompt_cache_len:5154 prompt_cache_ratio:0.9783599088838268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:177.5836944580078ms total_cost_time:177.62207984924316ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5223 prompt_cache_len:5145 prompt_cache_ratio:0.9850660539919587 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 -DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:57 [batch.py:51] router release req id 8 -INFO 06-24 20:02:57 [batch.py:51] router release req id 120 -INFO 06-24 20:02:57 [batch.py:51] router release req id 400 -INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.24242353439331055 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.23661541938781738 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.22551822662353516 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.24406909942626953 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.2397611141204834 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.22988033294677734 s -DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=198269870995469517647432146918450003320, time:1750766577.7364511s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:57 [manager.py:391] -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:296.5719699859619ms total_cost_time:296.6158390045166ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5267 prompt_cache_len:5151 prompt_cache_ratio:0.9779760774634517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:339.9538993835449ms total_cost_time:340.00587463378906ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:5269 prompt_cache_len:5154 prompt_cache_ratio:0.9781742266084646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:329.15639877319336ms total_cost_time:329.18858528137207ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:5224 prompt_cache_len:5145 prompt_cache_ratio:0.9848774885145483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 -DEBUG 06-24 20:02:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:57 [batch.py:51] router release req id 8 -INFO 06-24 20:02:57 [batch.py:51] router release req id 120 -INFO 06-24 20:02:57 [batch.py:51] router release req id 400 -INFO 06-24 20:02:57 [manager.py:224] router recive req id 8 cost time 0.1193990707397461 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 120 cost time 0.06866121292114258 s -INFO 06-24 20:02:57 [manager.py:224] router recive req id 400 cost time 0.06359457969665527 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 8 cost time 0.12102293968200684 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 120 cost time 0.071563720703125 s -INFO 06-24 20:02:57 [manager.py:68] detokenization recv req id 400 cost time 0.0676732063293457 s -DEBUG 06-24 20:02:57 [manager.py:391] Prefill Batch: batch_id=57736344375771534419464930785140962856, time:1750766577.915871s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:57 [manager.py:391] -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:196.96831703186035ms total_cost_time:197.01051712036133ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5268 prompt_cache_len:5151 prompt_cache_ratio:0.9777904328018223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:146.16703987121582ms total_cost_time:146.1935043334961ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5270 prompt_cache_len:5154 prompt_cache_ratio:0.977988614800759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 -INFO 06-24 20:02:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 -ERROR 06-24 20:02:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:400 first_token_cost:165.84539413452148ms total_cost_time:165.88234901428223ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5225 prompt_cache_len:5145 prompt_cache_ratio:0.9846889952153111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 -DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:58 [batch.py:51] router release req id 8 -INFO 06-24 20:02:58 [batch.py:51] router release req id 120 -INFO 06-24 20:02:58 [batch.py:51] router release req id 400 -INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.09508013725280762 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.08972668647766113 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.07290792465209961 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.096832275390625 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.09283947944641113 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.07722759246826172 s -DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=163105448317614145423382377236093730641, time:1750766578.0977929s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:58 [manager.py:391] -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:8 first_token_cost:195.89662551879883ms total_cost_time:195.9395408630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5269 prompt_cache_len:5151 prompt_cache_ratio:0.9776048586069462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:57 lightllm_req_id:120 first_token_cost:190.48380851745605ms total_cost_time:190.50979614257812ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5271 prompt_cache_len:5154 prompt_cache_ratio:0.9778030734206034 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:176.18703842163086ms total_cost_time:176.22089385986328ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:5226 prompt_cache_len:5145 prompt_cache_ratio:0.9845005740528129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 -DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:58 [batch.py:51] router release req id 8 -INFO 06-24 20:02:58 [batch.py:51] router release req id 120 -INFO 06-24 20:02:58 [batch.py:51] router release req id 400 -INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.07253289222717285 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.06770753860473633 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.062004804611206055 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.07416534423828125 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.07058405876159668 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.06607985496520996 s -DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=107990464613445968187221303691437965877, time:1750766578.279516s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:58 [manager.py:391] -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:145.0951099395752ms total_cost_time:145.13802528381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5270 prompt_cache_len:5151 prompt_cache_ratio:0.9774193548387097 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:171.39554023742676ms total_cost_time:171.4344024658203ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5272 prompt_cache_len:5154 prompt_cache_ratio:0.9776176024279211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:165.76814651489258ms total_cost_time:165.79270362854004ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:5227 prompt_cache_len:5145 prompt_cache_ratio:0.9843122249856514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 -DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:58 [batch.py:51] router release req id 8 -INFO 06-24 20:02:58 [batch.py:51] router release req id 120 -INFO 06-24 20:02:58 [batch.py:51] router release req id 400 -INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.10302448272705078 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.06861424446105957 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.06339859962463379 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.10484886169433594 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.07177543640136719 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.06783032417297363 s -DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=252717552862111207250401123124524785553, time:1750766578.460681s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:58 [manager.py:391] -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:199.79596138000488ms total_cost_time:199.83887672424316ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5271 prompt_cache_len:5151 prompt_cache_ratio:0.977233921457029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:165.30752182006836ms total_cost_time:165.33446311950684ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5273 prompt_cache_len:5154 prompt_cache_ratio:0.9774322017826664 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:180.7103157043457ms total_cost_time:180.7551383972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5228 prompt_cache_len:5145 prompt_cache_ratio:0.9841239479724561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 -DEBUG 06-24 20:02:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:58 [batch.py:51] router release req id 8 -INFO 06-24 20:02:58 [batch.py:51] router release req id 120 -INFO 06-24 20:02:58 [batch.py:51] router release req id 400 -INFO 06-24 20:02:58 [manager.py:224] router recive req id 8 cost time 0.24073100090026855 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 120 cost time 0.23489117622375488 s -INFO 06-24 20:02:58 [manager.py:224] router recive req id 400 cost time 0.22488045692443848 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 8 cost time 0.2423253059387207 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 120 cost time 0.23803997039794922 s -INFO 06-24 20:02:58 [manager.py:68] detokenization recv req id 400 cost time 0.22934603691101074 s -DEBUG 06-24 20:02:58 [manager.py:391] Prefill Batch: batch_id=18173306447344486328654646520677566058, time:1750766578.8067722s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:58 [manager.py:391] -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:346.76575660705566ms total_cost_time:346.80962562561035ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5272 prompt_cache_len:5151 prompt_cache_ratio:0.9770485584218513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:340.9996032714844ms total_cost_time:341.02702140808105ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5274 prompt_cache_len:5154 prompt_cache_ratio:0.9772468714448237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 -ERROR 06-24 20:02:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:58 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:351.593017578125ms total_cost_time:351.62925720214844ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5229 prompt_cache_len:5145 prompt_cache_ratio:0.9839357429718876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 -DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:59 [batch.py:51] router release req id 8 -INFO 06-24 20:02:59 [batch.py:51] router release req id 120 -INFO 06-24 20:02:59 [batch.py:51] router release req id 400 -INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.11075305938720703 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.10550141334533691 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.09362626075744629 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.11229586601257324 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.10835576057434082 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.09774899482727051 s -DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=168181395587955718704358422693170453354, time:1750766579.0325258s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:59 [manager.py:391] -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:8 first_token_cost:196.78163528442383ms total_cost_time:196.8247890472412ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5273 prompt_cache_len:5151 prompt_cache_ratio:0.9768632656931538 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:120 first_token_cost:230.74817657470703ms total_cost_time:230.7896614074707ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5275 prompt_cache_len:5154 prompt_cache_ratio:0.9770616113744076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:58 lightllm_req_id:400 first_token_cost:218.9624309539795ms total_cost_time:218.98818016052246ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5230 prompt_cache_len:5145 prompt_cache_ratio:0.9837476099426387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 -DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:59 [batch.py:51] router release req id 8 -INFO 06-24 20:02:59 [batch.py:51] router release req id 120 -INFO 06-24 20:02:59 [batch.py:51] router release req id 400 -INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.1333463191986084 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.0912010669708252 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.08359503746032715 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.134993314743042 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.09429264068603516 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.08779358863830566 s -DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=250623435912946141919493879886071853332, time:1750766579.2575517s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:59 [manager.py:391] -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:200.35767555236816ms total_cost_time:200.41942596435547ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5274 prompt_cache_len:5151 prompt_cache_ratio:0.9766780432309442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:209.25402641296387ms total_cost_time:209.29527282714844ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5276 prompt_cache_len:5154 prompt_cache_ratio:0.9768764215314633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:212.24594116210938ms total_cost_time:212.28361129760742ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5231 prompt_cache_len:5145 prompt_cache_ratio:0.9835595488434333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 -DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:59 [batch.py:51] router release req id 8 -INFO 06-24 20:02:59 [batch.py:51] router release req id 120 -INFO 06-24 20:02:59 [batch.py:51] router release req id 400 -INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.15273022651672363 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.10262823104858398 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.09177160263061523 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.15452098846435547 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.10572266578674316 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.09604763984680176 s -DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=60146283299106991422489108794745049196, time:1750766579.4845774s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:59 [manager.py:391] -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:249.2082118988037ms total_cost_time:249.25470352172852ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5275 prompt_cache_len:5151 prompt_cache_ratio:0.9764928909952607 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:198.99535179138184ms total_cost_time:199.0222930908203ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5277 prompt_cache_len:5154 prompt_cache_ratio:0.9766913018760659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:217.60129928588867ms total_cost_time:217.64135360717773ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5232 prompt_cache_len:5145 prompt_cache_ratio:0.9833715596330275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 -DEBUG 06-24 20:02:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:02:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:02:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:02:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:02:59 [batch.py:51] router release req id 8 -INFO 06-24 20:02:59 [batch.py:51] router release req id 120 -INFO 06-24 20:02:59 [batch.py:51] router release req id 400 -INFO 06-24 20:02:59 [manager.py:224] router recive req id 8 cost time 0.11905884742736816 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 120 cost time 0.11362624168395996 s -INFO 06-24 20:02:59 [manager.py:224] router recive req id 400 cost time 0.09503602981567383 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 8 cost time 0.12063336372375488 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 120 cost time 0.11663436889648438 s -INFO 06-24 20:02:59 [manager.py:68] detokenization recv req id 400 cost time 0.09913206100463867 s -DEBUG 06-24 20:02:59 [manager.py:391] Prefill Batch: batch_id=335252716620284208802720739114582642921, time:1750766579.710724s req_ids:[8, 120, 400] -DEBUG 06-24 20:02:59 [manager.py:391] -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:195.0395107269287ms total_cost_time:195.0817108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5276 prompt_cache_len:5151 prompt_cache_ratio:0.9763078089461713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 120 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:391.30496978759766ms total_cost_time:391.34716987609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5278 prompt_cache_len:5154 prompt_cache_ratio:0.9765062523683213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 -ERROR 06-24 20:02:59 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:02:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:02:59 [manager.py:162] detoken release req id 400 -INFO 06-24 20:02:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:391.216516494751ms total_cost_time:391.25680923461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5233 prompt_cache_len:5145 prompt_cache_ratio:0.9831836422702083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:02:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 -DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:00 [batch.py:51] router release req id 8 -INFO 06-24 20:03:00 [batch.py:51] router release req id 120 -INFO 06-24 20:03:00 [batch.py:51] router release req id 400 -INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.31546807289123535 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.11297273635864258 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.09592032432556152 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.31705236434936523 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.11597013473510742 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.10002803802490234 s -DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=243493336054955487679182620365720616481, time:1750766580.1080182s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:00 [manager.py:391] -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:8 first_token_cost:400.73108673095703ms total_cost_time:400.7754325866699ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5277 prompt_cache_len:5151 prompt_cache_ratio:0.9761227970437749 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:120 first_token_cost:239.70341682434082ms total_cost_time:239.7456169128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5279 prompt_cache_len:5154 prompt_cache_ratio:0.9763212729683652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:02:59 lightllm_req_id:400 first_token_cost:222.6274013519287ms total_cost_time:222.65338897705078ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5234 prompt_cache_len:5145 prompt_cache_ratio:0.9829957967137944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 -DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:00 [batch.py:51] router release req id 8 -INFO 06-24 20:03:00 [batch.py:51] router release req id 120 -INFO 06-24 20:03:00 [batch.py:51] router release req id 400 -INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.1340494155883789 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.08982586860656738 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.08382964134216309 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.13568496704101562 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.09305906295776367 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.08835601806640625 s -DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=263235028361106040947328215850264471338, time:1750766580.33224s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:00 [manager.py:391] -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:199.5065212249756ms total_cost_time:199.5527744293213ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5278 prompt_cache_len:5151 prompt_cache_ratio:0.9759378552482001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:206.71725273132324ms total_cost_time:206.7587375640869ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5280 prompt_cache_len:5154 prompt_cache_ratio:0.9761363636363637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:211.7602825164795ms total_cost_time:211.79676055908203ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5235 prompt_cache_len:5145 prompt_cache_ratio:0.9828080229226361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 -DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:00 [batch.py:51] router release req id 8 -INFO 06-24 20:03:00 [batch.py:51] router release req id 120 -INFO 06-24 20:03:00 [batch.py:51] router release req id 400 -INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.15256118774414062 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.10103631019592285 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.09058427810668945 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.15433216094970703 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.10418081283569336 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.09496068954467773 s -DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=220412517274802927186084874659131687305, time:1750766580.5570736s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:00 [manager.py:391] -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:251.68132781982422ms total_cost_time:251.7232894897461ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5279 prompt_cache_len:5151 prompt_cache_ratio:0.9757529835196059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:200.01578330993652ms total_cost_time:200.0417709350586ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5281 prompt_cache_len:5154 prompt_cache_ratio:0.9759515243325128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:215.86322784423828ms total_cost_time:215.90542793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5236 prompt_cache_len:5145 prompt_cache_ratio:0.982620320855615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 -DEBUG 06-24 20:03:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:00 [batch.py:51] router release req id 8 -INFO 06-24 20:03:00 [batch.py:51] router release req id 120 -INFO 06-24 20:03:00 [batch.py:51] router release req id 400 -INFO 06-24 20:03:00 [manager.py:224] router recive req id 8 cost time 0.1159822940826416 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 120 cost time 0.11146903038024902 s -INFO 06-24 20:03:00 [manager.py:224] router recive req id 400 cost time 0.09499955177307129 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 8 cost time 0.11766767501831055 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 120 cost time 0.11440825462341309 s -INFO 06-24 20:03:00 [manager.py:68] detokenization recv req id 400 cost time 0.09891843795776367 s -DEBUG 06-24 20:03:00 [manager.py:391] Prefill Batch: batch_id=29122925912562986131668313164799944060, time:1750766580.7816646s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:00 [manager.py:391] -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:195.54853439331055ms total_cost_time:195.59311866760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5280 prompt_cache_len:5151 prompt_cache_ratio:0.9755681818181818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:00 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:236.15574836730957ms total_cost_time:236.19413375854492ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5282 prompt_cache_len:5154 prompt_cache_ratio:0.975766755017039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:219.5131778717041ms total_cost_time:219.53892707824707ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5237 prompt_cache_len:5145 prompt_cache_ratio:0.982432690471644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 -INFO 06-24 20:03:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 -DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:01 [batch.py:51] router release req id 8 -INFO 06-24 20:03:01 [batch.py:51] router release req id 120 -INFO 06-24 20:03:01 [batch.py:51] router release req id 400 -INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.3114128112792969 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.2635183334350586 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.25843238830566406 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.3139491081237793 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.2681081295013428 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.26425671577453613 s -DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=182335962842177281224558857469851022107, time:1750766581.1789913s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:01 [manager.py:391] -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:8 first_token_cost:399.6856212615967ms total_cost_time:399.72782135009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5281 prompt_cache_len:5151 prompt_cache_ratio:0.975383450104147 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:120 first_token_cost:390.51127433776855ms total_cost_time:390.55371284484863ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5283 prompt_cache_len:5154 prompt_cache_ratio:0.9755820556501987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:00 lightllm_req_id:400 first_token_cost:385.41722297668457ms total_cost_time:385.44297218322754ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5238 prompt_cache_len:5145 prompt_cache_ratio:0.9822451317296678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 -DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:01 [batch.py:51] router release req id 8 -INFO 06-24 20:03:01 [batch.py:51] router release req id 120 -INFO 06-24 20:03:01 [batch.py:51] router release req id 400 -INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.13236498832702637 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.09107613563537598 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.08606386184692383 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.13392972946166992 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.09407854080200195 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.09026718139648438 s -DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=188293439928123939094173931805000715375, time:1750766581.4046385s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:01 [manager.py:391] -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:200.00052452087402ms total_cost_time:200.04606246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5282 prompt_cache_len:5151 prompt_cache_ratio:0.9751987883377509 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:209.95163917541504ms total_cost_time:209.9928855895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5284 prompt_cache_len:5154 prompt_cache_ratio:0.9753974261922785 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:216.51196479797363ms total_cost_time:216.5522575378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5239 prompt_cache_len:5145 prompt_cache_ratio:0.982057644588662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 -DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:01 [batch.py:51] router release req id 8 -INFO 06-24 20:03:01 [batch.py:51] router release req id 120 -INFO 06-24 20:03:01 [batch.py:51] router release req id 400 -INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.15140771865844727 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.10064196586608887 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.08910465240478516 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.15315937995910645 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.10357975959777832 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.09314990043640137 s -DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=259194658151707794372523836442420707745, time:1750766581.630687s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:01 [manager.py:391] -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:250.9772777557373ms total_cost_time:251.02710723876953ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:5283 prompt_cache_len:5151 prompt_cache_ratio:0.9750141964792731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:200.1051902770996ms total_cost_time:200.14071464538574ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:5285 prompt_cache_len:5154 prompt_cache_ratio:0.9752128666035951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:214.32018280029297ms total_cost_time:214.35904502868652ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5240 prompt_cache_len:5145 prompt_cache_ratio:0.9818702290076335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 -DEBUG 06-24 20:03:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:01 [batch.py:51] router release req id 8 -INFO 06-24 20:03:01 [batch.py:51] router release req id 120 -INFO 06-24 20:03:01 [batch.py:51] router release req id 400 -INFO 06-24 20:03:01 [manager.py:224] router recive req id 8 cost time 0.11613821983337402 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 120 cost time 0.11108255386352539 s -INFO 06-24 20:03:01 [manager.py:224] router recive req id 400 cost time 0.09606814384460449 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 8 cost time 0.1177058219909668 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 120 cost time 0.11397910118103027 s -INFO 06-24 20:03:01 [manager.py:68] detokenization recv req id 400 cost time 0.10022997856140137 s -DEBUG 06-24 20:03:01 [manager.py:391] Prefill Batch: batch_id=64197573786525407112719760275378176415, time:1750766581.855237s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:01 [manager.py:391] -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:193.89581680297852ms total_cost_time:193.9396858215332ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5284 prompt_cache_len:5151 prompt_cache_ratio:0.9748296744890235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:01 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:236.18841171264648ms total_cost_time:236.22775077819824ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5286 prompt_cache_len:5154 prompt_cache_ratio:0.9750283768444948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:221.28844261169434ms total_cost_time:221.3146686553955ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5241 prompt_cache_len:5145 prompt_cache_ratio:0.9816828849456211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 -INFO 06-24 20:03:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 -DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:02 [batch.py:51] router release req id 8 -INFO 06-24 20:03:02 [batch.py:51] router release req id 120 -INFO 06-24 20:03:02 [batch.py:51] router release req id 400 -INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.14007067680358887 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.09129524230957031 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.08695363998413086 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.1417217254638672 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.09403705596923828 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.09047293663024902 s -DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=125094437991998793190289606147543117805, time:1750766582.0803537s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:02 [manager.py:391] -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:8 first_token_cost:199.34797286987305ms total_cost_time:199.39351081848145ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5285 prompt_cache_len:5151 prompt_cache_ratio:0.9746452223273415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:120 first_token_cost:201.54619216918945ms total_cost_time:201.62177085876465ms,out_token_counter:1 mean_per_token_cost_time: 0.07557868957519531ms prompt_token_num:5287 prompt_cache_len:5154 prompt_cache_ratio:0.9748439568753546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:01 lightllm_req_id:400 first_token_cost:210.06369590759277ms total_cost_time:210.10303497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5242 prompt_cache_len:5145 prompt_cache_ratio:0.981495612361694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 -DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:02 [batch.py:51] router release req id 8 -INFO 06-24 20:03:02 [batch.py:51] router release req id 120 -INFO 06-24 20:03:02 [batch.py:51] router release req id 400 -INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.33292460441589355 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.28167271614074707 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.26958537101745605 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.3346402645111084 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.2847588062286377 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.27356910705566406 s -DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=239136155835675422613035786088671133321, time:1750766582.4800446s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:02 [manager.py:391] -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:402.8663635253906ms total_cost_time:402.9114246368408ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5286 prompt_cache_len:5151 prompt_cache_ratio:0.9744608399545971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:402.71663665771484ms total_cost_time:402.7600288391113ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5288 prompt_cache_len:5154 prompt_cache_ratio:0.9746596066565809 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:391.9949531555176ms total_cost_time:392.02880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:5243 prompt_cache_len:5145 prompt_cache_ratio:0.9813084112149533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 -DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:02 [batch.py:51] router release req id 8 -INFO 06-24 20:03:02 [batch.py:51] router release req id 120 -INFO 06-24 20:03:02 [batch.py:51] router release req id 400 -INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.12467145919799805 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.07447981834411621 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.0682077407836914 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.1262655258178711 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.07736825942993164 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.07226347923278809 s -DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=281128153793894351413878658290320283819, time:1750766582.6805327s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:02 [manager.py:391] -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:199.27430152893066ms total_cost_time:199.31912422180176ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5287 prompt_cache_len:5151 prompt_cache_ratio:0.9742765273311897 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:177.9038906097412ms total_cost_time:177.94227600097656ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5289 prompt_cache_len:5154 prompt_cache_ratio:0.9744753261486103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:171.6759204864502ms total_cost_time:171.70143127441406ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5244 prompt_cache_len:5145 prompt_cache_ratio:0.9811212814645309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 -DEBUG 06-24 20:03:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:02 [batch.py:51] router release req id 8 -INFO 06-24 20:03:02 [batch.py:51] router release req id 120 -INFO 06-24 20:03:02 [batch.py:51] router release req id 400 -INFO 06-24 20:03:02 [manager.py:224] router recive req id 8 cost time 0.1013634204864502 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 120 cost time 0.06864643096923828 s -INFO 06-24 20:03:02 [manager.py:224] router recive req id 400 cost time 0.06300497055053711 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 8 cost time 0.10288476943969727 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 120 cost time 0.07166266441345215 s -INFO 06-24 20:03:02 [manager.py:68] detokenization recv req id 400 cost time 0.06723737716674805 s -DEBUG 06-24 20:03:02 [manager.py:391] Prefill Batch: batch_id=91523970985601352697070865277596008677, time:1750766582.8603294s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:02 [manager.py:391] -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:200.02460479736328ms total_cost_time:200.06918907165527ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5288 prompt_cache_len:5151 prompt_cache_ratio:0.9740922844175491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:167.4044132232666ms total_cost_time:167.43206977844238ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5290 prompt_cache_len:5154 prompt_cache_ratio:0.9742911153119093 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:02 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:164.3974781036377ms total_cost_time:164.4306182861328ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:5245 prompt_cache_len:5145 prompt_cache_ratio:0.9809342230695901 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 -INFO 06-24 20:03:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 -DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:03 [batch.py:51] router release req id 8 -INFO 06-24 20:03:03 [batch.py:51] router release req id 120 -INFO 06-24 20:03:03 [batch.py:51] router release req id 400 -INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.07186365127563477 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.06684017181396484 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06063389778137207 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.07349824905395508 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.06997442245483398 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.06496000289916992 s -DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=288233766182506990015444085610493712215, time:1750766583.0410485s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:03 [manager.py:391] -DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:8 first_token_cost:146.42024040222168ms total_cost_time:146.46410942077637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5289 prompt_cache_len:5151 prompt_cache_ratio:0.973908111174135 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:120 first_token_cost:171.48447036743164ms total_cost_time:171.5230941772461ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5291 prompt_cache_len:5154 prompt_cache_ratio:0.9741069741069741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:02 lightllm_req_id:400 first_token_cost:165.26389122009277ms total_cost_time:165.29035568237305ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5246 prompt_cache_len:5145 prompt_cache_ratio:0.9807472359893252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 -DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:03 [batch.py:51] router release req id 8 -INFO 06-24 20:03:03 [batch.py:51] router release req id 120 -INFO 06-24 20:03:03 [batch.py:51] router release req id 400 -INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.1012265682220459 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.06892633438110352 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06318545341491699 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.10269975662231445 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.07202768325805664 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.06740689277648926 s -DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=182149879844355413368528578342019487823, time:1750766583.2229466s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:03 [manager.py:391] -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:197.94797897338867ms total_cost_time:197.99208641052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5290 prompt_cache_len:5151 prompt_cache_ratio:0.9737240075614366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:317.48461723327637ms total_cost_time:317.53015518188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5292 prompt_cache_len:5154 prompt_cache_ratio:0.9739229024943311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:325.18744468688965ms total_cost_time:325.2289295196533ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5247 prompt_cache_len:5145 prompt_cache_ratio:0.9805603201829617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 -DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:03 [batch.py:51] router release req id 8 -INFO 06-24 20:03:03 [batch.py:51] router release req id 120 -INFO 06-24 20:03:03 [batch.py:51] router release req id 400 -INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.23842501640319824 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.08643913269042969 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.07294750213623047 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.24007129669189453 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.08942604064941406 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.07713079452514648 s -DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=217347339546027922626723107032738883650, time:1750766583.562186s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:03 [manager.py:391] -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:302.78849601745605ms total_cost_time:302.83212661743164ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5291 prompt_cache_len:5151 prompt_cache_ratio:0.9735399735399736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:191.08033180236816ms total_cost_time:191.11943244934082ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5293 prompt_cache_len:5154 prompt_cache_ratio:0.9737389004345361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:177.67786979675293ms total_cost_time:177.7033805847168ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5248 prompt_cache_len:5145 prompt_cache_ratio:0.9803734756097561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 -DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:03 [batch.py:51] router release req id 8 -INFO 06-24 20:03:03 [batch.py:51] router release req id 120 -INFO 06-24 20:03:03 [batch.py:51] router release req id 400 -INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.11289215087890625 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.06951093673706055 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06439065933227539 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.11450624465942383 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.0725090503692627 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.0685873031616211 s -DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=227679676313343204764042523904389103901, time:1750766583.745937s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:03 [manager.py:391] -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:199.91564750671387ms total_cost_time:199.97620582580566ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5292 prompt_cache_len:5151 prompt_cache_ratio:0.9733560090702947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:156.70180320739746ms total_cost_time:156.7401885986328ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5294 prompt_cache_len:5154 prompt_cache_ratio:0.9735549678881753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:176.6190528869629ms total_cost_time:176.65529251098633ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5249 prompt_cache_len:5145 prompt_cache_ratio:0.980186702228996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 -DEBUG 06-24 20:03:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:03 [batch.py:51] router release req id 8 -INFO 06-24 20:03:03 [batch.py:51] router release req id 120 -INFO 06-24 20:03:03 [batch.py:51] router release req id 400 -INFO 06-24 20:03:03 [manager.py:224] router recive req id 8 cost time 0.08400130271911621 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 120 cost time 0.07600045204162598 s -INFO 06-24 20:03:03 [manager.py:224] router recive req id 400 cost time 0.06231570243835449 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 8 cost time 0.08558297157287598 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 120 cost time 0.07914495468139648 s -INFO 06-24 20:03:03 [manager.py:68] detokenization recv req id 400 cost time 0.06668877601623535 s -DEBUG 06-24 20:03:03 [manager.py:391] Prefill Batch: batch_id=275884247643486442865157346923580975197, time:1750766583.92665s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:03 [manager.py:391] -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:145.5838680267334ms total_cost_time:145.62582969665527ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5293 prompt_cache_len:5151 prompt_cache_ratio:0.9731721141129794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 -ERROR 06-24 20:03:03 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:03:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 67803.981 tokens/s -DEBUG 06-24 20:03:04 [stats.py:37] Avg prompt tokens throughput: 67777.902 tokens/s -DEBUG 06-24 20:03:04 [stats.py:37] Avg generate tokens throughput: 26.079 tokens/s -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:120 first_token_cost:179.52299118041992ms total_cost_time:179.56161499023438ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5295 prompt_cache_len:5154 prompt_cache_ratio:0.973371104815864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:400 first_token_cost:165.90023040771484ms total_cost_time:165.9252643585205ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5250 prompt_cache_len:5145 prompt_cache_ratio:0.98 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 -DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:04 [batch.py:51] router release req id 8 -INFO 06-24 20:03:04 [batch.py:51] router release req id 120 -INFO 06-24 20:03:04 [batch.py:51] router release req id 400 -INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.11393237113952637 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06885027885437012 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.06300020217895508 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.11544537544250488 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.0719749927520752 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.06738066673278809 s -DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=66708671877183112421689361876483998042, time:1750766584.1074975s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:04 [manager.py:391] -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:03 lightllm_req_id:8 first_token_cost:199.75829124450684ms total_cost_time:199.80168342590332ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5294 prompt_cache_len:5151 prompt_cache_ratio:0.9729882886286362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:154.83808517456055ms total_cost_time:154.8635959625244ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5296 prompt_cache_len:5154 prompt_cache_ratio:0.9731873111782477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:170.65811157226562ms total_cost_time:170.71175575256348ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5251 prompt_cache_len:5145 prompt_cache_ratio:0.9798133688821177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 -DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:04 [batch.py:51] router release req id 8 -INFO 06-24 20:03:04 [batch.py:51] router release req id 120 -INFO 06-24 20:03:04 [batch.py:51] router release req id 400 -INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.2448740005493164 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.23986124992370605 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.2283473014831543 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.24667954444885254 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.24306750297546387 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.23279142379760742 s -DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=149687600297356712041009137080545313648, time:1750766584.4473498s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:04 [manager.py:391] -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:345.71194648742676ms total_cost_time:345.75676918029785ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5295 prompt_cache_len:5151 prompt_cache_ratio:0.9728045325779037 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:340.63172340393066ms total_cost_time:340.65818786621094ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5297 prompt_cache_len:5154 prompt_cache_ratio:0.9730035869360015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:330.96957206726074ms total_cost_time:331.00438117980957ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5252 prompt_cache_len:5145 prompt_cache_ratio:0.9796268088347296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 -DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:04 [batch.py:51] router release req id 8 -INFO 06-24 20:03:04 [batch.py:51] router release req id 120 -INFO 06-24 20:03:04 [batch.py:51] router release req id 400 -INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.07063508033752441 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06555461883544922 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.05957508087158203 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.07228374481201172 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.06875753402709961 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.06395506858825684 s -DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=47316391409393921248033422328897065009, time:1750766584.6288254s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:04 [manager.py:391] -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:145.06840705871582ms total_cost_time:145.1106071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5296 prompt_cache_len:5151 prompt_cache_ratio:0.9726208459214502 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:168.3824062347412ms total_cost_time:168.42103004455566ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5298 prompt_cache_len:5154 prompt_cache_ratio:0.9728199320498301 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:162.4002456665039ms total_cost_time:162.42527961730957ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5253 prompt_cache_len:5145 prompt_cache_ratio:0.9794403198172473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 -DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:04 [batch.py:51] router release req id 8 -INFO 06-24 20:03:04 [batch.py:51] router release req id 120 -INFO 06-24 20:03:04 [batch.py:51] router release req id 400 -INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.09999442100524902 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06928372383117676 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.06400632858276367 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.10173821449279785 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.07449841499328613 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.07265734672546387 s -DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=79892869252685447031901683572463103244, time:1750766584.8082302s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:04 [manager.py:391] -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:200.4532814025879ms total_cost_time:200.4988193511963ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5297 prompt_cache_len:5151 prompt_cache_ratio:0.9724372286199736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 120 -ERROR 06-24 20:03:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:169.69013214111328ms total_cost_time:169.71683502197266ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5299 prompt_cache_len:5154 prompt_cache_ratio:0.972636346480468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:166.36323928833008ms total_cost_time:166.4128303527832ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:5254 prompt_cache_len:5145 prompt_cache_ratio:0.9792539017891131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 -INFO 06-24 20:03:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 -DEBUG 06-24 20:03:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:04 [batch.py:51] router release req id 8 -INFO 06-24 20:03:04 [batch.py:51] router release req id 120 -INFO 06-24 20:03:04 [batch.py:51] router release req id 400 -INFO 06-24 20:03:04 [manager.py:224] router recive req id 8 cost time 0.06858706474304199 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 120 cost time 0.06291651725769043 s -INFO 06-24 20:03:04 [manager.py:224] router recive req id 400 cost time 0.05619978904724121 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 8 cost time 0.07008528709411621 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 120 cost time 0.06586384773254395 s -INFO 06-24 20:03:04 [manager.py:68] detokenization recv req id 400 cost time 0.060288190841674805 s -DEBUG 06-24 20:03:04 [manager.py:391] Prefill Batch: batch_id=164075764231021247856704771629386601870, time:1750766584.9884338s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:04 [manager.py:391] -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:8 first_token_cost:144.00506019592285ms total_cost_time:144.04654502868652ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5298 prompt_cache_len:5151 prompt_cache_ratio:0.9722536806342016 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:120 first_token_cost:166.64743423461914ms total_cost_time:166.7020320892334ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:5300 prompt_cache_len:5154 prompt_cache_ratio:0.9724528301886792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:04 lightllm_req_id:400 first_token_cost:159.93785858154297ms total_cost_time:159.96360778808594ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5255 prompt_cache_len:5145 prompt_cache_ratio:0.9790675547098002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 -DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:05 [batch.py:51] router release req id 8 -INFO 06-24 20:03:05 [batch.py:51] router release req id 120 -INFO 06-24 20:03:05 [batch.py:51] router release req id 400 -INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.10055685043334961 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.06757378578186035 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.06247830390930176 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.10227465629577637 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.07069587707519531 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.06672811508178711 s -DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=259431682458955373295122064125422602180, time:1750766585.1691797s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:05 [manager.py:391] -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:199.46908950805664ms total_cost_time:199.51486587524414ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5299 prompt_cache_len:5151 prompt_cache_ratio:0.9720702019248915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:318.6924457550049ms total_cost_time:318.73512268066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5301 prompt_cache_len:5154 prompt_cache_ratio:0.9722693831352575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:325.64830780029297ms total_cost_time:325.68836212158203ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5256 prompt_cache_len:5145 prompt_cache_ratio:0.9788812785388128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 -DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:05 [batch.py:51] router release req id 8 -INFO 06-24 20:03:05 [batch.py:51] router release req id 120 -INFO 06-24 20:03:05 [batch.py:51] router release req id 400 -INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.2349088191986084 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.08285832405090332 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.07140970230102539 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.2364649772644043 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.08598685264587402 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.07569289207458496 s -DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=310158004849090102742443202022598035781, time:1750766585.5102398s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:05 [manager.py:391] -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:301.8653392791748ms total_cost_time:301.9092082977295ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5300 prompt_cache_len:5151 prompt_cache_ratio:0.9718867924528302 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:186.90824508666992ms total_cost_time:186.94829940795898ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5302 prompt_cache_len:5154 prompt_cache_ratio:0.9720860052810261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:175.431489944458ms total_cost_time:175.45676231384277ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5257 prompt_cache_len:5145 prompt_cache_ratio:0.9786950732356857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 -DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:05 [batch.py:51] router release req id 8 -INFO 06-24 20:03:05 [batch.py:51] router release req id 120 -INFO 06-24 20:03:05 [batch.py:51] router release req id 400 -INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.10951685905456543 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.06955742835998535 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.0648806095123291 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.0724940299987793 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.06974482536315918 s -DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=84577353042093161251123392718419548024, time:1750766585.692095s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:05 [manager.py:391] -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:199.65267181396484ms total_cost_time:199.69701766967773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5301 prompt_cache_len:5151 prompt_cache_ratio:0.9717034521788341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:159.52801704406738ms total_cost_time:159.55376625061035ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5303 prompt_cache_len:5154 prompt_cache_ratio:0.9719026965868376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:174.37219619750977ms total_cost_time:174.41177368164062ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5258 prompt_cache_len:5145 prompt_cache_ratio:0.9785089387599848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 -DEBUG 06-24 20:03:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:05 [batch.py:51] router release req id 8 -INFO 06-24 20:03:05 [batch.py:51] router release req id 120 -INFO 06-24 20:03:05 [batch.py:51] router release req id 400 -INFO 06-24 20:03:05 [manager.py:224] router recive req id 8 cost time 0.08170604705810547 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 120 cost time 0.07679009437561035 s -INFO 06-24 20:03:05 [manager.py:224] router recive req id 400 cost time 0.06541228294372559 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 8 cost time 0.0833137035369873 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 120 cost time 0.07999515533447266 s -INFO 06-24 20:03:05 [manager.py:68] detokenization recv req id 400 cost time 0.06974577903747559 s -DEBUG 06-24 20:03:05 [manager.py:391] Prefill Batch: batch_id=231194745762646292343607873640996808426, time:1750766585.8702967s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:05 [manager.py:391] -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:147.35794067382812ms total_cost_time:147.40419387817383ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5302 prompt_cache_len:5151 prompt_cache_ratio:0.9715201810637495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:178.8315773010254ms total_cost_time:178.87401580810547ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5304 prompt_cache_len:5154 prompt_cache_ratio:0.9717194570135747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:167.42491722106934ms total_cost_time:167.4516201019287ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5259 prompt_cache_len:5145 prompt_cache_ratio:0.9783228750713063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 -INFO 06-24 20:03:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 -DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:06 [batch.py:51] router release req id 8 -INFO 06-24 20:03:06 [batch.py:51] router release req id 120 -INFO 06-24 20:03:06 [batch.py:51] router release req id 400 -INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.10860681533813477 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.0692598819732666 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.0637061595916748 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.11040830612182617 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.07240819931030273 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06789445877075195 s -DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=160257433247289975480812048254156051702, time:1750766586.0519567s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:06 [manager.py:391] -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:8 first_token_cost:199.94592666625977ms total_cost_time:199.99003410339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5303 prompt_cache_len:5151 prompt_cache_ratio:0.9713369790684518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:120 first_token_cost:160.53271293640137ms total_cost_time:160.55870056152344ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5305 prompt_cache_len:5154 prompt_cache_ratio:0.971536286522149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:05 lightllm_req_id:400 first_token_cost:173.99215698242188ms total_cost_time:174.03030395507812ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5260 prompt_cache_len:5145 prompt_cache_ratio:0.9781368821292775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 -DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:06 [batch.py:51] router release req id 8 -INFO 06-24 20:03:06 [batch.py:51] router release req id 120 -INFO 06-24 20:03:06 [batch.py:51] router release req id 400 -INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.24316668510437012 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.23845696449279785 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.22804641723632812 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.24495697021484375 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.2417299747467041 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.23244285583496094 s -DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=190813674211183090339878898280079691919, time:1750766586.3956118s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:06 [manager.py:391] -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:298.02536964416504ms total_cost_time:298.07019233703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5304 prompt_cache_len:5151 prompt_cache_ratio:0.9711538461538461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:342.423677444458ms total_cost_time:342.4665927886963ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5306 prompt_cache_len:5154 prompt_cache_ratio:0.9713531850735017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:332.0116996765137ms total_cost_time:332.03911781311035ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5261 prompt_cache_len:5145 prompt_cache_ratio:0.9779509598935564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 -DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:06 [batch.py:51] router release req id 8 -INFO 06-24 20:03:06 [batch.py:51] router release req id 120 -INFO 06-24 20:03:06 [batch.py:51] router release req id 400 -INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.12110543251037598 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.06941533088684082 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.06402778625488281 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.12287569046020508 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.07263469696044922 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06830286979675293 s -DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=88611357532451146026380542523366982005, time:1750766586.577605s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:06 [manager.py:391] -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:197.9985237121582ms total_cost_time:198.0421543121338ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5305 prompt_cache_len:5151 prompt_cache_ratio:0.9709707822808671 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:174.36647415161133ms total_cost_time:174.4062900543213ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5307 prompt_cache_len:5154 prompt_cache_ratio:0.9711701526286037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:168.88022422790527ms total_cost_time:168.90597343444824ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5262 prompt_cache_len:5145 prompt_cache_ratio:0.9777651083238312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 -DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:06 [batch.py:51] router release req id 8 -INFO 06-24 20:03:06 [batch.py:51] router release req id 120 -INFO 06-24 20:03:06 [batch.py:51] router release req id 400 -INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.0994877815246582 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.06853818893432617 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.06286787986755371 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.10113167762756348 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.07184529304504395 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06729817390441895 s -DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=335940104050689606158566240719424468424, time:1750766586.7589724s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:06 [manager.py:391] -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:198.90069961547852ms total_cost_time:198.944091796875ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5306 prompt_cache_len:5151 prompt_cache_ratio:0.9707877874104787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:168.14136505126953ms total_cost_time:168.1685447692871ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5308 prompt_cache_len:5154 prompt_cache_ratio:0.9709871891484552 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:166.00513458251953ms total_cost_time:166.0468578338623ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5263 prompt_cache_len:5145 prompt_cache_ratio:0.9775793273798214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 -DEBUG 06-24 20:03:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:06 [batch.py:51] router release req id 8 -INFO 06-24 20:03:06 [batch.py:51] router release req id 120 -INFO 06-24 20:03:06 [batch.py:51] router release req id 400 -INFO 06-24 20:03:06 [manager.py:224] router recive req id 8 cost time 0.07269501686096191 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 120 cost time 0.06657648086547852 s -INFO 06-24 20:03:06 [manager.py:224] router recive req id 400 cost time 0.06083345413208008 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 8 cost time 0.07430529594421387 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 120 cost time 0.06975412368774414 s -INFO 06-24 20:03:06 [manager.py:68] detokenization recv req id 400 cost time 0.06520557403564453 s -DEBUG 06-24 20:03:06 [manager.py:391] Prefill Batch: batch_id=170816745227154724700370166280682941226, time:1750766586.941039s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:06 [manager.py:391] -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:145.7841396331787ms total_cost_time:145.83396911621094ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:5307 prompt_cache_len:5151 prompt_cache_ratio:0.9706048615036744 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:06 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:120 first_token_cost:171.1738109588623ms total_cost_time:171.21291160583496ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5309 prompt_cache_len:5154 prompt_cache_ratio:0.9708042945940856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:400 first_token_cost:165.4052734375ms total_cost_time:165.43173789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5264 prompt_cache_len:5145 prompt_cache_ratio:0.9773936170212766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 -DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:07 [batch.py:51] router release req id 8 -INFO 06-24 20:03:07 [batch.py:51] router release req id 120 -INFO 06-24 20:03:07 [batch.py:51] router release req id 400 -INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.10344791412353516 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.06972265243530273 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.06460857391357422 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.10510468482971191 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.07271480560302734 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.06862640380859375 s -DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=71340613666972578149316851485645079893, time:1750766587.1236358s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:07 [manager.py:391] -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:06 lightllm_req_id:8 first_token_cost:198.45008850097656ms total_cost_time:198.49443435668945ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5308 prompt_cache_len:5151 prompt_cache_ratio:0.970422004521477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:164.6888256072998ms total_cost_time:164.71505165100098ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5310 prompt_cache_len:5154 prompt_cache_ratio:0.9706214689265537 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:178.36523056030273ms total_cost_time:178.4040927886963ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5265 prompt_cache_len:5145 prompt_cache_ratio:0.9772079772079773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 -DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:07 [batch.py:51] router release req id 8 -INFO 06-24 20:03:07 [batch.py:51] router release req id 120 -INFO 06-24 20:03:07 [batch.py:51] router release req id 400 -INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.24206900596618652 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.2371683120727539 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.22643351554870605 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.2437143325805664 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.24038290977478027 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.230987548828125 s -DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=307441400527026226635470668807473442954, time:1750766587.4694526s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:07 [manager.py:391] -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:297.8386878967285ms total_cost_time:297.8825569152832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5309 prompt_cache_len:5151 prompt_cache_ratio:0.9702392164249388 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:340.84439277648926ms total_cost_time:340.8844470977783ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5311 prompt_cache_len:5154 prompt_cache_ratio:0.9704387121069479 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:330.20973205566406ms total_cost_time:330.2347660064697ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5266 prompt_cache_len:5145 prompt_cache_ratio:0.9770224078997342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 -DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:07 [batch.py:51] router release req id 8 -INFO 06-24 20:03:07 [batch.py:51] router release req id 120 -INFO 06-24 20:03:07 [batch.py:51] router release req id 400 -INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.11931276321411133 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.06859779357910156 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.06405234336853027 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.12086820602416992 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.0715947151184082 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.06826186180114746 s -DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=312672663945024470632483703707765056425, time:1750766587.6492965s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:07 [manager.py:391] -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:199.5561122894287ms total_cost_time:199.6002197265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5310 prompt_cache_len:5151 prompt_cache_ratio:0.9700564971751412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:148.8039493560791ms total_cost_time:148.83041381835938ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5312 prompt_cache_len:5154 prompt_cache_ratio:0.9702560240963856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:167.71340370178223ms total_cost_time:167.7529811859131ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5267 prompt_cache_len:5145 prompt_cache_ratio:0.9768369090563889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 -DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:07 [batch.py:51] router release req id 8 -INFO 06-24 20:03:07 [batch.py:51] router release req id 120 -INFO 06-24 20:03:07 [batch.py:51] router release req id 400 -INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.09345245361328125 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.08801460266113281 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.07297444343566895 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.09522056579589844 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.09141969680786133 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.07752132415771484 s -DEBUG 06-24 20:03:07 [manager.py:391] Prefill Batch: batch_id=193091722683291349466394464995447508586, time:1750766587.8326578s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:07 [manager.py:391] -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 8 -ERROR 06-24 20:03:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:195.34993171691895ms total_cost_time:195.39237022399902ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5311 prompt_cache_len:5151 prompt_cache_ratio:0.9698738467331952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:190.08588790893555ms total_cost_time:190.11354446411133ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5313 prompt_cache_len:5154 prompt_cache_ratio:0.9700734048560136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:176.88751220703125ms total_cost_time:176.92995071411133ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5268 prompt_cache_len:5145 prompt_cache_ratio:0.9766514806378133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 -INFO 06-24 20:03:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 -DEBUG 06-24 20:03:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:07 [batch.py:51] router release req id 8 -INFO 06-24 20:03:07 [batch.py:51] router release req id 120 -INFO 06-24 20:03:07 [batch.py:51] router release req id 400 -INFO 06-24 20:03:07 [manager.py:224] router recive req id 8 cost time 0.06983780860900879 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 120 cost time 0.06453132629394531 s -INFO 06-24 20:03:07 [manager.py:224] router recive req id 400 cost time 0.05739402770996094 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 8 cost time 0.07161664962768555 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 120 cost time 0.06784510612487793 s -INFO 06-24 20:03:07 [manager.py:68] detokenization recv req id 400 cost time 0.06203031539916992 s -DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=64303321026449279438288865713711737914, time:1750766588.012172s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:08 [manager.py:391] -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:8 first_token_cost:145.30682563781738ms total_cost_time:145.35093307495117ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5312 prompt_cache_len:5151 prompt_cache_ratio:0.969691265060241 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:120 first_token_cost:168.3492660522461ms total_cost_time:168.38932037353516ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5314 prompt_cache_len:5154 prompt_cache_ratio:0.969890854347008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:07 lightllm_req_id:400 first_token_cost:161.27586364746094ms total_cost_time:161.3016128540039ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5269 prompt_cache_len:5145 prompt_cache_ratio:0.9764661226039096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 -DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:08 [batch.py:51] router release req id 8 -INFO 06-24 20:03:08 [batch.py:51] router release req id 120 -INFO 06-24 20:03:08 [batch.py:51] router release req id 400 -INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.10009527206420898 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.06900477409362793 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.06406497955322266 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.10164761543273926 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.0722665786743164 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.06852245330810547 s -DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=327575761866047990773155610397381304783, time:1750766588.194093s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:08 [manager.py:391] -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:350.4462242126465ms total_cost_time:350.48985481262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5313 prompt_cache_len:5151 prompt_cache_ratio:0.9695087521174478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:319.5836544036865ms total_cost_time:319.6108341217041ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5315 prompt_cache_len:5154 prompt_cache_ratio:0.9697083725305738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:333.5709571838379ms total_cost_time:333.61077308654785ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5270 prompt_cache_len:5145 prompt_cache_ratio:0.976280834914611 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 -DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:08 [batch.py:51] router release req id 8 -INFO 06-24 20:03:08 [batch.py:51] router release req id 120 -INFO 06-24 20:03:08 [batch.py:51] router release req id 400 -INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.0843658447265625 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.07943940162658691 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.06834888458251953 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.08600425720214844 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.08255338668823242 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.07269573211669922 s -DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=247661076971784525710063830289677839388, time:1750766588.5378354s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:08 [manager.py:391] -DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:147.89772033691406ms total_cost_time:147.94111251831055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5314 prompt_cache_len:5151 prompt_cache_ratio:0.9693263078660143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:184.28349494934082ms total_cost_time:184.32331085205078ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5316 prompt_cache_len:5154 prompt_cache_ratio:0.9695259593679458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:173.2776165008545ms total_cost_time:173.30336570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5271 prompt_cache_len:5145 prompt_cache_ratio:0.9760956175298805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 -DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:08 [batch.py:51] router release req id 8 -INFO 06-24 20:03:08 [batch.py:51] router release req id 120 -INFO 06-24 20:03:08 [batch.py:51] router release req id 400 -INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.11314868927001953 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.06896233558654785 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.06373453140258789 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.11490797996520996 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.07211685180664062 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.06814074516296387 s -DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=140511977231311428800538261355338594472, time:1750766588.7198162s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:08 [manager.py:391] -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:198.1799602508545ms total_cost_time:198.22072982788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5315 prompt_cache_len:5151 prompt_cache_ratio:0.9691439322671684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:153.9597511291504ms total_cost_time:153.98621559143066ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5317 prompt_cache_len:5154 prompt_cache_ratio:0.9693436148203874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:170.24636268615723ms total_cost_time:170.28427124023438ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5272 prompt_cache_len:5145 prompt_cache_ratio:0.9759104704097117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 -DEBUG 06-24 20:03:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:08 [batch.py:51] router release req id 8 -INFO 06-24 20:03:08 [batch.py:51] router release req id 120 -INFO 06-24 20:03:08 [batch.py:51] router release req id 400 -INFO 06-24 20:03:08 [manager.py:224] router recive req id 8 cost time 0.0884101390838623 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 120 cost time 0.082305908203125 s -INFO 06-24 20:03:08 [manager.py:224] router recive req id 400 cost time 0.07048821449279785 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 8 cost time 0.09008431434631348 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 120 cost time 0.08532214164733887 s -INFO 06-24 20:03:08 [manager.py:68] detokenization recv req id 400 cost time 0.07485628128051758 s -DEBUG 06-24 20:03:08 [manager.py:391] Prefill Batch: batch_id=221469127664766532209913394809552239708, time:1750766588.901205s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:08 [manager.py:391] -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:147.83906936645508ms total_cost_time:147.88341522216797ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5316 prompt_cache_len:5151 prompt_cache_ratio:0.968961625282167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:08 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:183.5784912109375ms total_cost_time:183.63213539123535ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:5318 prompt_cache_len:5154 prompt_cache_ratio:0.9691613388491914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:172.09267616271973ms total_cost_time:172.12605476379395ms,out_token_counter:1 mean_per_token_cost_time: 0.03337860107421875ms prompt_token_num:5273 prompt_cache_len:5145 prompt_cache_ratio:0.9757253935141286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 -INFO 06-24 20:03:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 -DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:09 [batch.py:51] router release req id 8 -INFO 06-24 20:03:09 [batch.py:51] router release req id 120 -INFO 06-24 20:03:09 [batch.py:51] router release req id 400 -INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.11386823654174805 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.07115316390991211 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.06558728218078613 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.11551022529602051 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.07411003112792969 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06966829299926758 s -DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=20319516309224191970199725162782006250, time:1750766589.0809584s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:09 [manager.py:391] -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:8 first_token_cost:198.61435890197754ms total_cost_time:198.66013526916504ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5317 prompt_cache_len:5151 prompt_cache_ratio:0.9687793868722964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:120 first_token_cost:155.8363437652588ms total_cost_time:155.86304664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5319 prompt_cache_len:5154 prompt_cache_ratio:0.9689791314156796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:08 lightllm_req_id:400 first_token_cost:171.28586769104004ms total_cost_time:171.32306098937988ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5274 prompt_cache_len:5145 prompt_cache_ratio:0.9755403868031854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 -DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:09 [batch.py:51] router release req id 8 -INFO 06-24 20:03:09 [batch.py:51] router release req id 120 -INFO 06-24 20:03:09 [batch.py:51] router release req id 400 -INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.24782609939575195 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.24282050132751465 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.23194313049316406 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.24942564964294434 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.24591469764709473 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.23625755310058594 s -DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=235892825169247348311076354257188749409, time:1750766589.4237509s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:09 [manager.py:391] -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:345.47924995422363ms total_cost_time:345.5231189727783ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5318 prompt_cache_len:5151 prompt_cache_ratio:0.9685972169988718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:340.5113220214844ms total_cost_time:340.53707122802734ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5320 prompt_cache_len:5154 prompt_cache_ratio:0.968796992481203 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:334.89131927490234ms total_cost_time:334.92469787597656ms,out_token_counter:1 mean_per_token_cost_time: 0.03337860107421875ms prompt_token_num:5275 prompt_cache_len:5145 prompt_cache_ratio:0.9753554502369668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 -DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:09 [batch.py:51] router release req id 8 -INFO 06-24 20:03:09 [batch.py:51] router release req id 120 -INFO 06-24 20:03:09 [batch.py:51] router release req id 400 -INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.07388758659362793 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.06824827194213867 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.06154179573059082 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.07535743713378906 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.07131409645080566 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06591558456420898 s -DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=81903583301967354346757096417475418656, time:1750766589.6045299s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:09 [manager.py:391] -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:146.39520645141602ms total_cost_time:146.4381217956543ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5319 prompt_cache_len:5151 prompt_cache_ratio:0.9684151156232375 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:173.91061782836914ms total_cost_time:173.9494800567627ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5321 prompt_cache_len:5154 prompt_cache_ratio:0.9686149220071415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:167.280912399292ms total_cost_time:167.30690002441406ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5276 prompt_cache_len:5145 prompt_cache_ratio:0.9751705837755875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 -DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:09 [batch.py:51] router release req id 8 -INFO 06-24 20:03:09 [batch.py:51] router release req id 120 -INFO 06-24 20:03:09 [batch.py:51] router release req id 400 -INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.10403776168823242 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.06827974319458008 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.06267166137695312 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.10580134391784668 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.07145380973815918 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06708765029907227 s -DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=338763151939561981775034878317933767378, time:1750766589.7862425s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:09 [manager.py:391] -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:198.16231727600098ms total_cost_time:198.20594787597656ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5320 prompt_cache_len:5151 prompt_cache_ratio:0.9682330827067669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:162.35804557800293ms total_cost_time:162.3842716217041ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5322 prompt_cache_len:5154 prompt_cache_ratio:0.9684329199549042 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:09 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 -INFO 06-24 20:03:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:178.1024932861328ms total_cost_time:178.14159393310547ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5277 prompt_cache_len:5145 prompt_cache_ratio:0.9749857873791927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 -DEBUG 06-24 20:03:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:09 [batch.py:51] router release req id 8 -INFO 06-24 20:03:09 [batch.py:51] router release req id 120 -INFO 06-24 20:03:09 [batch.py:51] router release req id 400 -INFO 06-24 20:03:09 [manager.py:224] router recive req id 8 cost time 0.07581067085266113 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 120 cost time 0.07088828086853027 s -INFO 06-24 20:03:09 [manager.py:224] router recive req id 400 cost time 0.05869793891906738 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 8 cost time 0.07762432098388672 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 120 cost time 0.0740969181060791 s -INFO 06-24 20:03:09 [manager.py:68] detokenization recv req id 400 cost time 0.06306934356689453 s -DEBUG 06-24 20:03:09 [manager.py:391] Prefill Batch: batch_id=40901409512790474375518464984594005385, time:1750766589.9674773s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:09 [manager.py:391] -ERROR 06-24 20:03:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:8 first_token_cost:145.59650421142578ms total_cost_time:145.64037322998047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5321 prompt_cache_len:5151 prompt_cache_ratio:0.9680511182108626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:120 first_token_cost:175.22168159484863ms total_cost_time:175.26912689208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:5323 prompt_cache_len:5154 prompt_cache_ratio:0.968250986285929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:09 lightllm_req_id:400 first_token_cost:163.1004810333252ms total_cost_time:163.1300449371338ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:5278 prompt_cache_len:5145 prompt_cache_ratio:0.9748010610079576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 -DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:10 [batch.py:51] router release req id 8 -INFO 06-24 20:03:10 [batch.py:51] router release req id 120 -INFO 06-24 20:03:10 [batch.py:51] router release req id 400 -INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.10704231262207031 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.06838369369506836 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.06333398818969727 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.10885405540466309 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.07172966003417969 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.06785249710083008 s -DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=266292312149013768256993696514765246322, time:1750766590.1492386s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:10 [manager.py:391] -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:199.02324676513672ms total_cost_time:199.06854629516602ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5322 prompt_cache_len:5151 prompt_cache_ratio:0.967869222096956 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:160.4321002960205ms total_cost_time:160.45761108398438ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5324 prompt_cache_len:5154 prompt_cache_ratio:0.968069120961683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:328.9916515350342ms total_cost_time:329.04577255249023ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5279 prompt_cache_len:5145 prompt_cache_ratio:0.9746164046220875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 -DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:10 [batch.py:51] router release req id 8 -INFO 06-24 20:03:10 [batch.py:51] router release req id 120 -INFO 06-24 20:03:10 [batch.py:51] router release req id 400 -INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.2432699203491211 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.23871374130249023 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.07324957847595215 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.24477672576904297 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.24170279502868652 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.07749390602111816 s -DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=322481225434558682014507328874036955519, time:1750766590.4936216s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:10 [manager.py:391] -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:297.9154586791992ms total_cost_time:297.9590892791748ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5323 prompt_cache_len:5151 prompt_cache_ratio:0.9676873943265076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:342.2665596008301ms total_cost_time:342.30685234069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5325 prompt_cache_len:5154 prompt_cache_ratio:0.967887323943662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:176.88989639282227ms total_cost_time:176.91516876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5280 prompt_cache_len:5145 prompt_cache_ratio:0.9744318181818182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 -DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:10 [batch.py:51] router release req id 8 -INFO 06-24 20:03:10 [batch.py:51] router release req id 120 -INFO 06-24 20:03:10 [batch.py:51] router release req id 400 -INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.12222766876220703 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.06864333152770996 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.0626680850982666 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.12378525733947754 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.07193636894226074 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.06710553169250488 s -DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=220151859614946108368536776786202970805, time:1750766590.6743085s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:10 [manager.py:391] -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:200.00195503234863ms total_cost_time:200.04510879516602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5324 prompt_cache_len:5151 prompt_cache_ratio:0.9675056348610067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:146.6064453125ms total_cost_time:146.63290977478027ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5326 prompt_cache_len:5154 prompt_cache_ratio:0.9677055951933909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:165.28987884521484ms total_cost_time:165.3306484222412ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5281 prompt_cache_len:5145 prompt_cache_ratio:0.9742473016474152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 -DEBUG 06-24 20:03:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:10 [batch.py:51] router release req id 8 -INFO 06-24 20:03:10 [batch.py:51] router release req id 120 -INFO 06-24 20:03:10 [batch.py:51] router release req id 400 -INFO 06-24 20:03:10 [manager.py:224] router recive req id 8 cost time 0.09436726570129395 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 120 cost time 0.08864331245422363 s -INFO 06-24 20:03:10 [manager.py:224] router recive req id 400 cost time 0.0736687183380127 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 8 cost time 0.09592103958129883 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 120 cost time 0.09162449836730957 s -INFO 06-24 20:03:10 [manager.py:68] detokenization recv req id 400 cost time 0.07790493965148926 s -DEBUG 06-24 20:03:10 [manager.py:391] Prefill Batch: batch_id=128487460138582329053149305547734082855, time:1750766590.855353s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:10 [manager.py:391] -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:195.4941749572754ms total_cost_time:195.63674926757812ms,out_token_counter:1 mean_per_token_cost_time: 0.14257431030273438ms prompt_token_num:5325 prompt_cache_len:5151 prompt_cache_ratio:0.9673239436619718 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:190.49668312072754ms total_cost_time:190.52457809448242ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5327 prompt_cache_len:5154 prompt_cache_ratio:0.9675239346724235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:178.20215225219727ms total_cost_time:178.2360076904297ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:5282 prompt_cache_len:5145 prompt_cache_ratio:0.9740628549791746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 -INFO 06-24 20:03:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 -DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:11 [batch.py:51] router release req id 8 -INFO 06-24 20:03:11 [batch.py:51] router release req id 120 -INFO 06-24 20:03:11 [batch.py:51] router release req id 400 -INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.06902575492858887 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.06267142295837402 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.0565953254699707 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.07071208953857422 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.06584715843200684 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.060863494873046875 s -DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=290035317868547564990657383240947801738, time:1750766591.0376327s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:11 [manager.py:391] -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:8 first_token_cost:142.7133083343506ms total_cost_time:142.75860786437988ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5326 prompt_cache_len:5151 prompt_cache_ratio:0.9671423206909501 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:120 first_token_cost:166.78762435913086ms total_cost_time:166.82910919189453ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5328 prompt_cache_len:5154 prompt_cache_ratio:0.9673423423423423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:10 lightllm_req_id:400 first_token_cost:160.65025329589844ms total_cost_time:160.67814826965332ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5283 prompt_cache_len:5145 prompt_cache_ratio:0.9738784781374219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 -DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:11 [batch.py:51] router release req id 8 -INFO 06-24 20:03:11 [batch.py:51] router release req id 120 -INFO 06-24 20:03:11 [batch.py:51] router release req id 400 -INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.10183286666870117 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.06792640686035156 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.0631246566772461 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.10357499122619629 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.07109856605529785 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.06754469871520996 s -DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=24695960871113498125025115045639151037, time:1750766591.2185159s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:11 [manager.py:391] -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:199.5987892150879ms total_cost_time:199.64218139648438ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5327 prompt_cache_len:5151 prompt_cache_ratio:0.9669607659095175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:317.3503875732422ms total_cost_time:317.3949718475342ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5329 prompt_cache_len:5154 prompt_cache_ratio:0.9671608181647589 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:329.5876979827881ms total_cost_time:329.62703704833984ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5284 prompt_cache_len:5145 prompt_cache_ratio:0.9736941710825132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 -DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:11 [batch.py:51] router release req id 8 -INFO 06-24 20:03:11 [batch.py:51] router release req id 120 -INFO 06-24 20:03:11 [batch.py:51] router release req id 400 -INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.24199199676513672 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.08997678756713867 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.07400131225585938 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.24371337890625 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.0930783748626709 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.07832789421081543 s -DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=57661434145422022087137357498451544339, time:1750766591.563655s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:11 [manager.py:391] -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:300.83250999450684ms total_cost_time:300.8759021759033ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5328 prompt_cache_len:5151 prompt_cache_ratio:0.9667792792792793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:194.06867027282715ms total_cost_time:194.1089630126953ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5330 prompt_cache_len:5154 prompt_cache_ratio:0.9669793621013133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:178.14064025878906ms total_cost_time:178.1916618347168ms,out_token_counter:1 mean_per_token_cost_time: 0.051021575927734375ms prompt_token_num:5285 prompt_cache_len:5145 prompt_cache_ratio:0.9735099337748344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 -DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:11 [batch.py:51] router release req id 8 -INFO 06-24 20:03:11 [batch.py:51] router release req id 120 -INFO 06-24 20:03:11 [batch.py:51] router release req id 400 -INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.11713314056396484 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.06909584999084473 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.06452155113220215 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.1187899112701416 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.07206034660339355 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.06853246688842773 s -DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=317777703859871646095525156885254569624, time:1750766591.7457037s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:11 [manager.py:391] -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:199.34940338134766ms total_cost_time:199.39279556274414ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5329 prompt_cache_len:5151 prompt_cache_ratio:0.966597860761869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:151.2582302093506ms total_cost_time:151.28517150878906ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5331 prompt_cache_len:5154 prompt_cache_ratio:0.9667979741136747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:11 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:168.609619140625ms total_cost_time:168.64991188049316ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5286 prompt_cache_len:5145 prompt_cache_ratio:0.9733257661748014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 -DEBUG 06-24 20:03:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:11 [batch.py:51] router release req id 8 -INFO 06-24 20:03:11 [batch.py:51] router release req id 120 -INFO 06-24 20:03:11 [batch.py:51] router release req id 400 -INFO 06-24 20:03:11 [manager.py:224] router recive req id 8 cost time 0.11312317848205566 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 120 cost time 0.10802197456359863 s -INFO 06-24 20:03:11 [manager.py:224] router recive req id 400 cost time 0.09498333930969238 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 8 cost time 0.11467885971069336 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 120 cost time 0.1109166145324707 s -INFO 06-24 20:03:11 [manager.py:68] detokenization recv req id 400 cost time 0.09885668754577637 s -DEBUG 06-24 20:03:11 [manager.py:391] Prefill Batch: batch_id=313643772326803029637075869083573974472, time:1750766591.949918s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:11 [manager.py:391] -ERROR 06-24 20:03:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:8 first_token_cost:195.23978233337402ms total_cost_time:195.2826976776123ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5330 prompt_cache_len:5151 prompt_cache_ratio:0.9664165103189494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:120 first_token_cost:223.4516143798828ms total_cost_time:223.48880767822266ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5332 prompt_cache_len:5154 prompt_cache_ratio:0.9666166541635409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:11 lightllm_req_id:400 first_token_cost:210.21485328674316ms total_cost_time:210.24012565612793ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5287 prompt_cache_len:5145 prompt_cache_ratio:0.9731416682428599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 -DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:12 [batch.py:51] router release req id 8 -INFO 06-24 20:03:12 [batch.py:51] router release req id 120 -INFO 06-24 20:03:12 [batch.py:51] router release req id 400 -INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.10523557662963867 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.06858348846435547 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.06328058242797852 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.10685968399047852 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.07165408134460449 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.06772708892822266 s -DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=18784754208781915160655151353157979525, time:1750766592.14187s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:12 [manager.py:391] -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:199.97930526733398ms total_cost_time:200.02365112304688ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5331 prompt_cache_len:5151 prompt_cache_ratio:0.9662352279122116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:163.30265998840332ms total_cost_time:163.3291244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5333 prompt_cache_len:5154 prompt_cache_ratio:0.9664354022126382 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:178.04765701293945ms total_cost_time:178.0855655670166ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5288 prompt_cache_len:5145 prompt_cache_ratio:0.9729576399394856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 -DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:12 [batch.py:51] router release req id 8 -INFO 06-24 20:03:12 [batch.py:51] router release req id 120 -INFO 06-24 20:03:12 [batch.py:51] router release req id 400 -INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.24019265174865723 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.23466897010803223 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.22370553016662598 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.24183392524719238 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.2377324104309082 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.22808003425598145 s -DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=156391083831002154803495374581616708161, time:1750766592.4854987s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:12 [manager.py:391] -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:296.7512607574463ms total_cost_time:296.7960834503174ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5332 prompt_cache_len:5151 prompt_cache_ratio:0.9660540135033758 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:338.7103080749512ms total_cost_time:338.75060081481934ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5334 prompt_cache_len:5154 prompt_cache_ratio:0.9662542182227222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:327.8787136077881ms total_cost_time:327.90493965148926ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5289 prompt_cache_len:5145 prompt_cache_ratio:0.9727736812251844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 -DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:12 [batch.py:51] router release req id 8 -INFO 06-24 20:03:12 [batch.py:51] router release req id 120 -INFO 06-24 20:03:12 [batch.py:51] router release req id 400 -INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.11927652359008789 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.06856822967529297 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.06243705749511719 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.1210019588470459 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.0716392993927002 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.06669402122497559 s -DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=62189537672751645756612474582055411552, time:1750766592.6662848s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:12 [manager.py:391] -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:198.29964637756348ms total_cost_time:198.34208488464355ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5333 prompt_cache_len:5151 prompt_cache_ratio:0.9658728670541908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:147.54295349121094ms total_cost_time:147.57013320922852ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5335 prompt_cache_len:5154 prompt_cache_ratio:0.9660731021555764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:164.5376682281494ms total_cost_time:164.57509994506836ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5290 prompt_cache_len:5145 prompt_cache_ratio:0.9725897920604915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 -DEBUG 06-24 20:03:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:12 [batch.py:51] router release req id 8 -INFO 06-24 20:03:12 [batch.py:51] router release req id 120 -INFO 06-24 20:03:12 [batch.py:51] router release req id 400 -INFO 06-24 20:03:12 [manager.py:224] router recive req id 8 cost time 0.09337854385375977 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 120 cost time 0.08781576156616211 s -INFO 06-24 20:03:12 [manager.py:224] router recive req id 400 cost time 0.07291984558105469 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 8 cost time 0.09497404098510742 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 120 cost time 0.09160900115966797 s -INFO 06-24 20:03:12 [manager.py:68] detokenization recv req id 400 cost time 0.07830214500427246 s -DEBUG 06-24 20:03:12 [manager.py:391] Prefill Batch: batch_id=75055817195679535222948774519941722017, time:1750766592.8478618s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:12 [manager.py:391] -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:195.2962875366211ms total_cost_time:195.33967971801758ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5334 prompt_cache_len:5151 prompt_cache_ratio:0.9656917885264342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:189.76879119873047ms total_cost_time:189.79430198669434ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5336 prompt_cache_len:5154 prompt_cache_ratio:0.9658920539730135 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:176.99289321899414ms total_cost_time:177.04224586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:5291 prompt_cache_len:5145 prompt_cache_ratio:0.9724059724059724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 -INFO 06-24 20:03:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 -DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:13 [batch.py:51] router release req id 8 -INFO 06-24 20:03:13 [batch.py:51] router release req id 120 -INFO 06-24 20:03:13 [batch.py:51] router release req id 400 -INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.07023143768310547 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.06479072570800781 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.057845115661621094 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.07196855545043945 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.06797099113464355 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.06226181983947754 s -DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=157606791495605492007818834588376794682, time:1750766593.030438s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:13 [manager.py:391] -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:8 first_token_cost:144.97065544128418ms total_cost_time:145.01285552978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5335 prompt_cache_len:5151 prompt_cache_ratio:0.9655107778819119 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:120 first_token_cost:168.69854927062988ms total_cost_time:168.73621940612793ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5337 prompt_cache_len:5154 prompt_cache_ratio:0.9657110736368747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:12 lightllm_req_id:400 first_token_cost:161.8201732635498ms total_cost_time:161.84568405151367ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5292 prompt_cache_len:5145 prompt_cache_ratio:0.9722222222222222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 -DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:13 [batch.py:51] router release req id 8 -INFO 06-24 20:03:13 [batch.py:51] router release req id 120 -INFO 06-24 20:03:13 [batch.py:51] router release req id 400 -INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.10184240341186523 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.06973958015441895 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.06485867500305176 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.1033639907836914 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.0726320743560791 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.06896209716796875 s -DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=188242593780906835263794326487222722732, time:1750766593.2112694s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:13 [manager.py:391] -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:199.65195655822754ms total_cost_time:199.69582557678223ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5336 prompt_cache_len:5151 prompt_cache_ratio:0.9653298350824587 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:319.2763328552246ms total_cost_time:319.3178176879883ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5338 prompt_cache_len:5154 prompt_cache_ratio:0.9655301611090296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:326.72619819641113ms total_cost_time:326.7652988433838ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5293 prompt_cache_len:5145 prompt_cache_ratio:0.9720385414698659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 -DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:13 [batch.py:51] router release req id 8 -INFO 06-24 20:03:13 [batch.py:51] router release req id 120 -INFO 06-24 20:03:13 [batch.py:51] router release req id 400 -INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.23711180686950684 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.08566713333129883 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.0733346939086914 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.23889541625976562 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.08885645866394043 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.0777883529663086 s -DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=131936270174258965455591418755039847242, time:1750766593.5528991s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:13 [manager.py:391] -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:302.609920501709ms total_cost_time:302.65355110168457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5337 prompt_cache_len:5151 prompt_cache_ratio:0.9651489600899381 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:190.64664840698242ms total_cost_time:190.68622589111328ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5339 prompt_cache_len:5154 prompt_cache_ratio:0.9653493163513767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:178.36570739746094ms total_cost_time:178.3902645111084ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:5294 prompt_cache_len:5145 prompt_cache_ratio:0.971854930109558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 -DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:13 [batch.py:51] router release req id 8 -INFO 06-24 20:03:13 [batch.py:51] router release req id 120 -INFO 06-24 20:03:13 [batch.py:51] router release req id 400 -INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.11217737197875977 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.06891465187072754 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.0636281967163086 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.11380553245544434 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.07216238975524902 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.06802606582641602 s -DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=10450261760703601424695349161846293356, time:1750766593.7355967s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:13 [manager.py:391] -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:200.0892162322998ms total_cost_time:200.1335620880127ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5338 prompt_cache_len:5151 prompt_cache_ratio:0.964968152866242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:156.99243545532227ms total_cost_time:157.01842308044434ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5340 prompt_cache_len:5154 prompt_cache_ratio:0.9651685393258427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:172.64246940612793ms total_cost_time:172.68109321594238ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5295 prompt_cache_len:5145 prompt_cache_ratio:0.9716713881019831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 -DEBUG 06-24 20:03:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:13 [batch.py:51] router release req id 8 -INFO 06-24 20:03:13 [batch.py:51] router release req id 120 -INFO 06-24 20:03:13 [batch.py:51] router release req id 400 -INFO 06-24 20:03:13 [manager.py:224] router recive req id 8 cost time 0.085052490234375 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 120 cost time 0.07938790321350098 s -INFO 06-24 20:03:13 [manager.py:224] router recive req id 400 cost time 0.06761765480041504 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 8 cost time 0.08659005165100098 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 120 cost time 0.08255577087402344 s -INFO 06-24 20:03:13 [manager.py:68] detokenization recv req id 400 cost time 0.07191276550292969 s -DEBUG 06-24 20:03:13 [manager.py:391] Prefill Batch: batch_id=169342856567867684146074955245936758200, time:1750766593.9175556s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:13 [manager.py:391] -DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 current batch size: 3 -DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:148.78106117248535ms total_cost_time:148.82421493530273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5339 prompt_cache_len:5151 prompt_cache_ratio:0.9647874133732909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 -ERROR 06-24 20:03:13 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:120 first_token_cost:184.75866317749023ms total_cost_time:184.7987174987793ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5341 prompt_cache_len:5154 prompt_cache_ratio:0.964987829994383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:400 first_token_cost:172.96719551086426ms total_cost_time:172.99222946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5296 prompt_cache_len:5145 prompt_cache_ratio:0.971487915407855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 -DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:14 [batch.py:51] router release req id 8 -INFO 06-24 20:03:14 [batch.py:51] router release req id 120 -INFO 06-24 20:03:14 [batch.py:51] router release req id 400 -INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.11292767524719238 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.06888198852539062 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06382298469543457 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.11452674865722656 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.07187414169311523 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.0680234432220459 s -DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=259080315080310031435413927249989612548, time:1750766594.0998464s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:14 [manager.py:391] -DEBUG 06-24 20:03:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 74123.011 tokens/s -DEBUG 06-24 20:03:14 [stats.py:37] Avg prompt tokens throughput: 74095.365 tokens/s -DEBUG 06-24 20:03:14 [stats.py:37] Avg generate tokens throughput: 27.646 tokens/s -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:13 lightllm_req_id:8 first_token_cost:198.92311096191406ms total_cost_time:198.96793365478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5340 prompt_cache_len:5151 prompt_cache_ratio:0.9646067415730337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:154.84309196472168ms total_cost_time:154.86812591552734ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5342 prompt_cache_len:5154 prompt_cache_ratio:0.9648071883189816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:170.48883438110352ms total_cost_time:170.52817344665527ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5297 prompt_cache_len:5145 prompt_cache_ratio:0.9713045119879177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 -DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:14 [batch.py:51] router release req id 8 -INFO 06-24 20:03:14 [batch.py:51] router release req id 120 -INFO 06-24 20:03:14 [batch.py:51] router release req id 400 -INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.24832463264465332 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.24385905265808105 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.23159050941467285 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.2501237392425537 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.24709606170654297 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.23588991165161133 s -DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=141971760801296374767195326882432588540, time:1750766594.4423323s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:14 [manager.py:391] -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:346.5564250946045ms total_cost_time:346.6014862060547ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5341 prompt_cache_len:5151 prompt_cache_ratio:0.9644261374274481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:342.0062065124512ms total_cost_time:342.03243255615234ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5343 prompt_cache_len:5154 prompt_cache_ratio:0.9646266142616507 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:334.43188667297363ms total_cost_time:334.48076248168945ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:5298 prompt_cache_len:5145 prompt_cache_ratio:0.9711211778029445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 -DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:14 [batch.py:51] router release req id 8 -INFO 06-24 20:03:14 [batch.py:51] router release req id 120 -INFO 06-24 20:03:14 [batch.py:51] router release req id 400 -INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.07294654846191406 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.06713199615478516 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06101107597351074 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.07456398010253906 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.07023024559020996 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.06514811515808105 s -DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=40778025634488550235479753694553256226, time:1750766594.6247032s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:14 [manager.py:391] -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:143.79525184631348ms total_cost_time:143.83721351623535ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5342 prompt_cache_len:5151 prompt_cache_ratio:0.9642456008985398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:170.94969749450684ms total_cost_time:170.9880828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5344 prompt_cache_len:5154 prompt_cache_ratio:0.9644461077844312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:164.6571159362793ms total_cost_time:164.68238830566406ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5299 prompt_cache_len:5145 prompt_cache_ratio:0.9709379128137384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 -DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:14 [batch.py:51] router release req id 8 -INFO 06-24 20:03:14 [batch.py:51] router release req id 120 -INFO 06-24 20:03:14 [batch.py:51] router release req id 400 -INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.1055765151977539 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 120 cost time 0.06861662864685059 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06290388107299805 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.1073000431060791 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 120 cost time 0.07172536849975586 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.06704378128051758 s -DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=20284907941312092189839901193717122740, time:1750766594.8065596s req_ids:[8, 120, 400] -DEBUG 06-24 20:03:14 [manager.py:391] -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 120 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:200.00481605529785ms total_cost_time:200.04820823669434ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5343 prompt_cache_len:5151 prompt_cache_ratio:0.9640651319483436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 120 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:120 first_token_cost:162.99962997436523ms total_cost_time:163.0268096923828ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5345 prompt_cache_len:5154 prompt_cache_ratio:0.9642656688493919 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 -INFO 06-24 20:03:14 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:172.1630096435547ms total_cost_time:172.20282554626465ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5300 prompt_cache_len:5145 prompt_cache_ratio:0.970754716981132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 -DEBUG 06-24 20:03:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:14 [batch.py:51] router release req id 8 -INFO 06-24 20:03:14 [batch.py:51] router release req id 120 -INFO 06-24 20:03:14 [batch.py:51] router release req id 400 -INFO 06-24 20:03:14 [manager.py:224] router recive req id 8 cost time 0.07947611808776855 s -INFO 06-24 20:03:14 [manager.py:224] router recive req id 400 cost time 0.06709527969360352 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 8 cost time 0.08098983764648438 s -INFO 06-24 20:03:14 [manager.py:68] detokenization recv req id 400 cost time 0.07028484344482422 s -DEBUG 06-24 20:03:14 [manager.py:391] Prefill Batch: batch_id=327174093909908919411840986831610256029, time:1750766594.9872627s req_ids:[8, 400] -DEBUG 06-24 20:03:14 [manager.py:391] -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:8 first_token_cost:147.60255813598633ms total_cost_time:147.6445198059082ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5344 prompt_cache_len:5151 prompt_cache_ratio:0.9638847305389222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 -WARNING 06-24 20:03:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_539 and create again -INFO 06-24 20:03:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_539 -WARNING 06-24 20:03:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_539 and create again -INFO 06-24 20:03:15 [shm_array.py:30] create shm 12322_0_shm_prompts_539 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:14 lightllm_req_id:400 first_token_cost:150.19869804382324ms total_cost_time:150.2377986907959ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5301 prompt_cache_len:5145 prompt_cache_ratio:0.9705715902659876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 -DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:15 [batch.py:51] router release req id 8 -INFO 06-24 20:03:15 [batch.py:51] router release req id 400 -INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.05526137351989746 s -INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.04001045227050781 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.05663442611694336 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.04280972480773926 s -DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=189951137416322866039773932100182786338, time:1750766595.1170263s req_ids:[8, 400] -DEBUG 06-24 20:03:15 [manager.py:391] -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:132.490873336792ms total_cost_time:132.53450393676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5345 prompt_cache_len:5151 prompt_cache_ratio:0.9637043966323667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:117.30670928955078ms total_cost_time:117.33293533325195ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5302 prompt_cache_len:5145 prompt_cache_ratio:0.9703885326291966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 -DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:15 [batch.py:51] router release req id 8 -INFO 06-24 20:03:15 [batch.py:51] router release req id 400 -INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.04217696189880371 s -INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.03687310218811035 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.04360365867614746 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.03970789909362793 s -DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=46692940531810719990292236079464638852, time:1750766595.2450464s req_ids:[8, 400] -DEBUG 06-24 20:03:15 [manager.py:391] -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:259.655237197876ms total_cost_time:259.69982147216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5346 prompt_cache_len:5151 prompt_cache_ratio:0.9635241301907969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:272.1405029296875ms total_cost_time:272.18079566955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5303 prompt_cache_len:5145 prompt_cache_ratio:0.9702055440316801 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 -DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:15 [batch.py:51] router release req id 8 -INFO 06-24 20:03:15 [batch.py:51] router release req id 400 -INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.06503462791442871 s -INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.04769539833068848 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.06670260429382324 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.051019906997680664 s -DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=136432012742320644167027789481584047054, time:1750766595.5323694s req_ids:[8, 400] -DEBUG 06-24 20:03:15 [manager.py:391] -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:142.6372528076172ms total_cost_time:142.68136024475098ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5347 prompt_cache_len:5151 prompt_cache_ratio:0.9633439311763605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:125.47922134399414ms total_cost_time:125.50592422485352ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5304 prompt_cache_len:5145 prompt_cache_ratio:0.9700226244343891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 -DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:15 [batch.py:51] router release req id 8 -INFO 06-24 20:03:15 [batch.py:51] router release req id 400 -INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.04376411437988281 s -INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.03836464881896973 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.04517722129821777 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.04124140739440918 s -DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=40478381412344561412632383900827468522, time:1750766595.662321s req_ids:[8, 400] -DEBUG 06-24 20:03:15 [manager.py:391] -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:102.98466682434082ms total_cost_time:103.02901268005371ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5348 prompt_cache_len:5151 prompt_cache_ratio:0.9631637995512341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:113.42120170593262ms total_cost_time:113.45887184143066ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5305 prompt_cache_len:5145 prompt_cache_ratio:0.9698397737983034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 -DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:15 [batch.py:51] router release req id 8 -INFO 06-24 20:03:15 [batch.py:51] router release req id 400 -INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.06212925910949707 s -INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.0465695858001709 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.06363844871520996 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.04974842071533203 s -DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=247422156102940647801278631174207531889, time:1750766595.7900999s req_ids:[8, 400] -DEBUG 06-24 20:03:15 [manager.py:391] -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:139.8324966430664ms total_cost_time:139.8763656616211ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5349 prompt_cache_len:5151 prompt_cache_ratio:0.9629837352776219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:124.47714805603027ms total_cost_time:124.50242042541504ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5306 prompt_cache_len:5145 prompt_cache_ratio:0.9696569920844327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 -DEBUG 06-24 20:03:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:15 [batch.py:51] router release req id 8 -INFO 06-24 20:03:15 [batch.py:51] router release req id 400 -INFO 06-24 20:03:15 [manager.py:224] router recive req id 8 cost time 0.0438838005065918 s -INFO 06-24 20:03:15 [manager.py:224] router recive req id 400 cost time 0.03831219673156738 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 8 cost time 0.04529142379760742 s -INFO 06-24 20:03:15 [manager.py:68] detokenization recv req id 400 cost time 0.041277170181274414 s -DEBUG 06-24 20:03:15 [manager.py:391] Prefill Batch: batch_id=98433266959917497981121697478807169414, time:1750766595.9207468s req_ids:[8, 400] -DEBUG 06-24 20:03:15 [manager.py:391] -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:104.61807250976562ms total_cost_time:104.66361045837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5350 prompt_cache_len:5151 prompt_cache_ratio:0.962803738317757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 -ERROR 06-24 20:03:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:15 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:114.96949195861816ms total_cost_time:115.00811576843262ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5307 prompt_cache_len:5145 prompt_cache_ratio:0.9694742792538157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 -DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:16 [batch.py:51] router release req id 8 -INFO 06-24 20:03:16 [batch.py:51] router release req id 400 -INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.06327080726623535 s -INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.04785466194152832 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.06497001647949219 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.05095171928405762 s -DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=35894921562136922557296646936210573730, time:1750766596.0499103s req_ids:[8, 400] -DEBUG 06-24 20:03:16 [manager.py:391] -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:8 first_token_cost:142.64822006225586ms total_cost_time:142.69018173217773ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5351 prompt_cache_len:5151 prompt_cache_ratio:0.9626238086339002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:15 lightllm_req_id:400 first_token_cost:127.12407112121582ms total_cost_time:127.14934349060059ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5308 prompt_cache_len:5145 prompt_cache_ratio:0.9692916352675207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 -DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:16 [batch.py:51] router release req id 8 -INFO 06-24 20:03:16 [batch.py:51] router release req id 400 -INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.04305100440979004 s -INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.037767648696899414 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.04459071159362793 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.04080605506896973 s -DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=38888879662320504780947184851210148357, time:1750766596.1816561s req_ids:[8, 400] -DEBUG 06-24 20:03:16 [manager.py:391] -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:101.00221633911133ms total_cost_time:101.04703903198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5352 prompt_cache_len:5151 prompt_cache_ratio:0.9624439461883408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:113.91162872314453ms total_cost_time:113.94977569580078ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5309 prompt_cache_len:5145 prompt_cache_ratio:0.9691090600866453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 -INFO 06-24 20:03:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:16 [batch.py:51] router release req id 8 -INFO 06-24 20:03:16 [batch.py:51] router release req id 400 -INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.227386474609375 s -INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.2101593017578125 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.229170560836792 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.21335315704345703 s -DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=270143158965125304432810081785770064385, time:1750766596.4736764s req_ids:[8, 400] -DEBUG 06-24 20:03:16 [manager.py:391] -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:298.37608337402344ms total_cost_time:298.419713973999ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5353 prompt_cache_len:5151 prompt_cache_ratio:0.9622641509433962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:294.1858768463135ms total_cost_time:294.2242622375488ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5310 prompt_cache_len:5145 prompt_cache_ratio:0.9689265536723164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 -DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:16 [batch.py:51] router release req id 8 -INFO 06-24 20:03:16 [batch.py:51] router release req id 400 -INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.05416274070739746 s -INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.041588783264160156 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.05591702461242676 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.044759273529052734 s -DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=311394143339911508401203537531421472913, time:1750766596.6023881s req_ids:[8, 400] -DEBUG 06-24 20:03:16 [manager.py:391] -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:131.15596771240234ms total_cost_time:131.19935989379883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5354 prompt_cache_len:5151 prompt_cache_ratio:0.962084422861412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:118.56532096862793ms total_cost_time:118.59130859375ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5311 prompt_cache_len:5145 prompt_cache_ratio:0.9687441159856901 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 -DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:16 [batch.py:51] router release req id 8 -INFO 06-24 20:03:16 [batch.py:51] router release req id 400 -INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.04376959800720215 s -INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.039160728454589844 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.045458316802978516 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.04223227500915527 s -DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=21990912508586108754065992746704248702, time:1750766596.7321157s req_ids:[8, 400] -DEBUG 06-24 20:03:16 [manager.py:391] -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:113.62671852111816ms total_cost_time:113.66939544677734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5355 prompt_cache_len:5151 prompt_cache_ratio:0.9619047619047619 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:121.90103530883789ms total_cost_time:121.94037437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5312 prompt_cache_len:5145 prompt_cache_ratio:0.9685617469879518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 -DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:16 [batch.py:51] router release req id 8 -INFO 06-24 20:03:16 [batch.py:51] router release req id 400 -INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.05467033386230469 s -INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.04224729537963867 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.05626034736633301 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.045326948165893555 s -DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=229560545551786866641632425460018259376, time:1750766596.8612866s req_ids:[8, 400] -DEBUG 06-24 20:03:16 [manager.py:391] -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:131.14666938781738ms total_cost_time:131.18934631347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5356 prompt_cache_len:5151 prompt_cache_ratio:0.9617251680358476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:118.79324913024902ms total_cost_time:118.8192367553711ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5313 prompt_cache_len:5145 prompt_cache_ratio:0.9683794466403162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 -INFO 06-24 20:03:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 -DEBUG 06-24 20:03:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:16 [batch.py:51] router release req id 8 -INFO 06-24 20:03:16 [batch.py:51] router release req id 400 -INFO 06-24 20:03:16 [manager.py:224] router recive req id 8 cost time 0.042740821838378906 s -INFO 06-24 20:03:16 [manager.py:224] router recive req id 400 cost time 0.03650379180908203 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 8 cost time 0.04443788528442383 s -INFO 06-24 20:03:16 [manager.py:68] detokenization recv req id 400 cost time 0.03966951370239258 s -DEBUG 06-24 20:03:16 [manager.py:391] Prefill Batch: batch_id=186243878365602043137147869268313065741, time:1750766596.989342s req_ids:[8, 400] -DEBUG 06-24 20:03:16 [manager.py:391] -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:8 first_token_cost:112.67781257629395ms total_cost_time:112.72048950195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5357 prompt_cache_len:5151 prompt_cache_ratio:0.9615456412170991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:16 lightllm_req_id:400 first_token_cost:117.2645092010498ms total_cost_time:117.30194091796875ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5314 prompt_cache_len:5145 prompt_cache_ratio:0.9681972149040271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 -DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:17 [batch.py:51] router release req id 8 -INFO 06-24 20:03:17 [batch.py:51] router release req id 400 -INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.053771257400512695 s -INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.042955636978149414 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.055507659912109375 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.04607725143432617 s -DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=230636701291539464809625689598923462140, time:1750766597.1193354s req_ids:[8, 400] -DEBUG 06-24 20:03:17 [manager.py:391] -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:131.666898727417ms total_cost_time:131.71029090881348ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5358 prompt_cache_len:5151 prompt_cache_ratio:0.9613661814109743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:120.93472480773926ms total_cost_time:120.97048759460449ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:5315 prompt_cache_len:5145 prompt_cache_ratio:0.9680150517403575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 -DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:17 [batch.py:51] router release req id 8 -INFO 06-24 20:03:17 [batch.py:51] router release req id 400 -INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.04317283630371094 s -INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.03769993782043457 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.04466533660888672 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.04063582420349121 s -DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=120869968107718192522984805409364279403, time:1750766597.2487464s req_ids:[8, 400] -DEBUG 06-24 20:03:17 [manager.py:391] -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:264.00065422058105ms total_cost_time:264.04285430908203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5359 prompt_cache_len:5151 prompt_cache_ratio:0.961186788579959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:279.6156406402588ms total_cost_time:279.65474128723145ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5316 prompt_cache_len:5145 prompt_cache_ratio:0.9678329571106095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 -DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:17 [batch.py:51] router release req id 8 -INFO 06-24 20:03:17 [batch.py:51] router release req id 400 -INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.06868243217468262 s -INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.04792928695678711 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.07042360305786133 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.051039695739746094 s -DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=185462889001181234029293729776561510086, time:1750766597.544036s req_ids:[8, 400] -DEBUG 06-24 20:03:17 [manager.py:391] -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:146.11434936523438ms total_cost_time:146.15726470947266ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5360 prompt_cache_len:5151 prompt_cache_ratio:0.9610074626865671 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:125.28634071350098ms total_cost_time:125.31399726867676ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5317 prompt_cache_len:5145 prompt_cache_ratio:0.9676509309761143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 -DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:17 [batch.py:51] router release req id 8 -INFO 06-24 20:03:17 [batch.py:51] router release req id 400 -INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.04425168037414551 s -INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.03928947448730469 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.045926570892333984 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.042326927185058594 s -DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=188593728979199905353980186123864104054, time:1750766597.6732187s req_ids:[8, 400] -DEBUG 06-24 20:03:17 [manager.py:391] -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:96.36425971984863ms total_cost_time:96.4057445526123ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5361 prompt_cache_len:5151 prompt_cache_ratio:0.9608282036933408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:116.02663993835449ms total_cost_time:116.06526374816895ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5318 prompt_cache_len:5145 prompt_cache_ratio:0.9674689732982324 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 -DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:17 [batch.py:51] router release req id 8 -INFO 06-24 20:03:17 [batch.py:51] router release req id 400 -INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.07289242744445801 s -INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.04809832572937012 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.07454705238342285 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.05107879638671875 s -DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=280100085227197225193040152187720737292, time:1750766597.8044372s req_ids:[8, 400] -DEBUG 06-24 20:03:17 [manager.py:391] -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:148.35596084594727ms total_cost_time:148.40197563171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5362 prompt_cache_len:5151 prompt_cache_ratio:0.9606490115628497 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:125.0462532043457ms total_cost_time:125.08416175842285ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5319 prompt_cache_len:5145 prompt_cache_ratio:0.9672870840383531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 -INFO 06-24 20:03:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 -DEBUG 06-24 20:03:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:17 [batch.py:51] router release req id 8 -INFO 06-24 20:03:17 [batch.py:51] router release req id 400 -INFO 06-24 20:03:17 [manager.py:224] router recive req id 8 cost time 0.04773211479187012 s -INFO 06-24 20:03:17 [manager.py:224] router recive req id 400 cost time 0.04088282585144043 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 8 cost time 0.04934430122375488 s -INFO 06-24 20:03:17 [manager.py:68] detokenization recv req id 400 cost time 0.044008493423461914 s -DEBUG 06-24 20:03:17 [manager.py:391] Prefill Batch: batch_id=156165060869680668527641726546416142808, time:1750766597.934053s req_ids:[8, 400] -DEBUG 06-24 20:03:17 [manager.py:391] -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:8 first_token_cost:125.30040740966797ms total_cost_time:125.34427642822266ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5363 prompt_cache_len:5151 prompt_cache_ratio:0.9604698862576916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:17 lightllm_req_id:400 first_token_cost:118.55173110961914ms total_cost_time:118.57914924621582ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5320 prompt_cache_len:5145 prompt_cache_ratio:0.9671052631578947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 -DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:18 [batch.py:51] router release req id 8 -INFO 06-24 20:03:18 [batch.py:51] router release req id 400 -INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.04442906379699707 s -INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.03907513618469238 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.04649662971496582 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.04331088066101074 s -DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=273966127553962088038974822208457010276, time:1750766598.0649917s req_ids:[8, 400] -DEBUG 06-24 20:03:18 [manager.py:391] -DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:116.63603782653809ms total_cost_time:116.68062210083008ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5364 prompt_cache_len:5151 prompt_cache_ratio:0.9602908277404921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:122.51424789428711ms total_cost_time:122.55191802978516ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5321 prompt_cache_len:5145 prompt_cache_ratio:0.9669235106183048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 -DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:18 [batch.py:51] router release req id 8 -INFO 06-24 20:03:18 [batch.py:51] router release req id 400 -INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.055776119232177734 s -INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.04394197463989258 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.05722236633300781 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.046793222427368164 s -DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=117949674048193035804295712480210034488, time:1750766598.1982203s req_ids:[8, 400] -DEBUG 06-24 20:03:18 [manager.py:391] -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:134.31692123413086ms total_cost_time:134.35959815979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5365 prompt_cache_len:5151 prompt_cache_ratio:0.9601118359739049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:122.57075309753418ms total_cost_time:122.59793281555176ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5322 prompt_cache_len:5145 prompt_cache_ratio:0.9667418263810598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 -DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:18 [batch.py:51] router release req id 8 -INFO 06-24 20:03:18 [batch.py:51] router release req id 400 -INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.21005940437316895 s -INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.20437359809875488 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.2119007110595703 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.2075343132019043 s -DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=214011622827882394983153150710142588809, time:1750766598.4954398s req_ids:[8, 400] -DEBUG 06-24 20:03:18 [manager.py:391] -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:288.6219024658203ms total_cost_time:288.6662483215332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5366 prompt_cache_len:5151 prompt_cache_ratio:0.9599329109206113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:282.7904224395752ms total_cost_time:282.81712532043457ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5323 prompt_cache_len:5145 prompt_cache_ratio:0.9665602104076648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 -DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:18 [batch.py:51] router release req id 8 -INFO 06-24 20:03:18 [batch.py:51] router release req id 400 -INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.043175697326660156 s -INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.03703474998474121 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.044617652893066406 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.03991079330444336 s -DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=301697731384390924163457577554198952603, time:1750766598.6244838s req_ids:[8, 400] -DEBUG 06-24 20:03:18 [manager.py:391] -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:114.58826065063477ms total_cost_time:114.63212966918945ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5367 prompt_cache_len:5151 prompt_cache_ratio:0.9597540525433202 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:120.16773223876953ms total_cost_time:120.20540237426758ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5324 prompt_cache_len:5145 prompt_cache_ratio:0.9663786626596544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 -DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:18 [batch.py:51] router release req id 8 -INFO 06-24 20:03:18 [batch.py:51] router release req id 400 -INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.052245378494262695 s -INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.04084420204162598 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.053244590759277344 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.04190850257873535 s -DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=102727216634116764231115538267062852807, time:1750766598.7532914s req_ids:[8, 400] -DEBUG 06-24 20:03:18 [manager.py:391] -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:129.8372745513916ms total_cost_time:129.87899780273438ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5368 prompt_cache_len:5151 prompt_cache_ratio:0.959575260804769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:118.35575103759766ms total_cost_time:118.38269233703613ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5325 prompt_cache_len:5145 prompt_cache_ratio:0.9661971830985916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 -DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:18 [batch.py:51] router release req id 8 -INFO 06-24 20:03:18 [batch.py:51] router release req id 400 -INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.041342735290527344 s -INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.036348581314086914 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.042851924896240234 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.039415836334228516 s -DEBUG 06-24 20:03:18 [manager.py:391] Prefill Batch: batch_id=98124483665698792647859000193566248540, time:1750766598.8847835s req_ids:[8, 400] -DEBUG 06-24 20:03:18 [manager.py:391] -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:110.37969589233398ms total_cost_time:110.43381690979004ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5369 prompt_cache_len:5151 prompt_cache_ratio:0.9593965356677221 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 -ERROR 06-24 20:03:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:117.15388298034668ms total_cost_time:117.19298362731934ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5326 prompt_cache_len:5145 prompt_cache_ratio:0.9660157716860683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 -DEBUG 06-24 20:03:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:18 [batch.py:51] router release req id 8 -INFO 06-24 20:03:18 [batch.py:51] router release req id 400 -INFO 06-24 20:03:18 [manager.py:224] router recive req id 8 cost time 0.05618596076965332 s -INFO 06-24 20:03:18 [manager.py:224] router recive req id 400 cost time 0.045150041580200195 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 8 cost time 0.05710721015930176 s -INFO 06-24 20:03:18 [manager.py:68] detokenization recv req id 400 cost time 0.04642224311828613 s -DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=147849587969594960881713478086376920888, time:1750766599.014787s req_ids:[8, 400] -DEBUG 06-24 20:03:19 [manager.py:391] -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:8 first_token_cost:134.51647758483887ms total_cost_time:134.55796241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5370 prompt_cache_len:5151 prompt_cache_ratio:0.9592178770949721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:18 lightllm_req_id:400 first_token_cost:123.38685989379883ms total_cost_time:123.4128475189209ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5327 prompt_cache_len:5145 prompt_cache_ratio:0.9658344283837057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 -DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:19 [batch.py:51] router release req id 8 -INFO 06-24 20:03:19 [batch.py:51] router release req id 400 -INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.04154849052429199 s -INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.036346435546875 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.04313373565673828 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.03940391540527344 s -DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=255306634295527561674110196835588646071, time:1750766599.1450534s req_ids:[8, 400] -DEBUG 06-24 20:03:19 [manager.py:391] -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:106.36138916015625ms total_cost_time:106.40597343444824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5371 prompt_cache_len:5151 prompt_cache_ratio:0.959039285049339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:113.07692527770996ms total_cost_time:113.11459541320801ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5328 prompt_cache_len:5145 prompt_cache_ratio:0.9656531531531531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 -DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:19 [batch.py:51] router release req id 8 -INFO 06-24 20:03:19 [batch.py:51] router release req id 400 -INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.05828714370727539 s -INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.04680824279785156 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.05968165397644043 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.04972648620605469 s -DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=30574143479048206985942298018467704674, time:1750766599.273315s req_ids:[8, 400] -DEBUG 06-24 20:03:19 [manager.py:391] -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:146.4998722076416ms total_cost_time:146.5449333190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5372 prompt_cache_len:5151 prompt_cache_ratio:0.9588607594936709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:285.0158214569092ms total_cost_time:285.05873680114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5329 prompt_cache_len:5145 prompt_cache_ratio:0.9654719459560893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 -DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:19 [batch.py:51] router release req id 8 -INFO 06-24 20:03:19 [batch.py:51] router release req id 400 -INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.19778752326965332 s -INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.04796576499938965 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.199296236038208 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.0510556697845459 s -DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=270871851661331763059887179467384104673, time:1750766599.5660577s req_ids:[8, 400] -DEBUG 06-24 20:03:19 [manager.py:391] -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:275.1026153564453ms total_cost_time:275.1462459564209ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5373 prompt_cache_len:5151 prompt_cache_ratio:0.9586823003908431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:125.35643577575684ms total_cost_time:125.38266181945801ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5330 prompt_cache_len:5145 prompt_cache_ratio:0.9652908067542214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 -DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:19 [batch.py:51] router release req id 8 -INFO 06-24 20:03:19 [batch.py:51] router release req id 400 -INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.04194974899291992 s -INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.035865068435668945 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.043467044830322266 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.03886151313781738 s -DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=112294743771034080161203355033738056235, time:1750766599.69536s req_ids:[8, 400] -DEBUG 06-24 20:03:19 [manager.py:391] -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:115.8151626586914ms total_cost_time:115.85712432861328ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5374 prompt_cache_len:5151 prompt_cache_ratio:0.9585039077037588 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:111.65165901184082ms total_cost_time:111.68527603149414ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:5331 prompt_cache_len:5145 prompt_cache_ratio:0.9651097355092854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 -DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:19 [batch.py:51] router release req id 8 -INFO 06-24 20:03:19 [batch.py:51] router release req id 400 -INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.05036592483520508 s -INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.043828487396240234 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.05179238319396973 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.046666860580444336 s -DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=273266674224674944785396183209738039119, time:1750766599.8254528s req_ids:[8, 400] -DEBUG 06-24 20:03:19 [manager.py:391] -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:128.71074676513672ms total_cost_time:128.7548542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5375 prompt_cache_len:5151 prompt_cache_ratio:0.9583255813953488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:122.37954139709473ms total_cost_time:122.41649627685547ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5332 prompt_cache_len:5145 prompt_cache_ratio:0.9649287321830458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 -INFO 06-24 20:03:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 -DEBUG 06-24 20:03:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:19 [batch.py:51] router release req id 8 -INFO 06-24 20:03:19 [batch.py:51] router release req id 400 -INFO 06-24 20:03:19 [manager.py:224] router recive req id 8 cost time 0.041793107986450195 s -INFO 06-24 20:03:19 [manager.py:224] router recive req id 400 cost time 0.03604006767272949 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 8 cost time 0.04351019859313965 s -INFO 06-24 20:03:19 [manager.py:68] detokenization recv req id 400 cost time 0.03925728797912598 s -DEBUG 06-24 20:03:19 [manager.py:391] Prefill Batch: batch_id=297545294467207005180114693576846680623, time:1750766599.9568624s req_ids:[8, 400] -DEBUG 06-24 20:03:19 [manager.py:391] -ERROR 06-24 20:03:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:8 first_token_cost:113.5258674621582ms total_cost_time:113.56949806213379ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5376 prompt_cache_len:5151 prompt_cache_ratio:0.9581473214285714 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:19 lightllm_req_id:400 first_token_cost:120.72896957397461ms total_cost_time:120.76687812805176ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5333 prompt_cache_len:5145 prompt_cache_ratio:0.964747796737296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 -DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:20 [batch.py:51] router release req id 8 -INFO 06-24 20:03:20 [batch.py:51] router release req id 400 -INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.052423954010009766 s -INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.040143728256225586 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.05401468276977539 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.0430912971496582 s -DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=217055771786444408326993347723112055811, time:1750766600.0868733s req_ids:[8, 400] -DEBUG 06-24 20:03:20 [manager.py:391] -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:130.37395477294922ms total_cost_time:130.418062210083ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5377 prompt_cache_len:5151 prompt_cache_ratio:0.9579691277664125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:118.05057525634766ms total_cost_time:118.07703971862793ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5334 prompt_cache_len:5145 prompt_cache_ratio:0.9645669291338582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 -DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:20 [batch.py:51] router release req id 8 -INFO 06-24 20:03:20 [batch.py:51] router release req id 400 -INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.04285025596618652 s -INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.03669095039367676 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.04431319236755371 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.03964710235595703 s -DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=112477783830639367390967217710355093768, time:1750766600.2170322s req_ids:[8, 400] -DEBUG 06-24 20:03:20 [manager.py:391] -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:111.85002326965332ms total_cost_time:111.8929386138916ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5378 prompt_cache_len:5151 prompt_cache_ratio:0.9577910003718855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:118.47233772277832ms total_cost_time:118.51167678833008ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5335 prompt_cache_len:5145 prompt_cache_ratio:0.9643861293345829 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 -DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:20 [batch.py:51] router release req id 8 -INFO 06-24 20:03:20 [batch.py:51] router release req id 400 -INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.054384469985961914 s -INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.04178619384765625 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.05599856376647949 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.04481172561645508 s -DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=111760969116218091044910498362681902475, time:1750766600.3461525s req_ids:[8, 400] -DEBUG 06-24 20:03:20 [manager.py:391] -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:291.4910316467285ms total_cost_time:291.55564308166504ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:5379 prompt_cache_len:5151 prompt_cache_ratio:0.9576129392080313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:278.98287773132324ms total_cost_time:279.0100574493408ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5336 prompt_cache_len:5145 prompt_cache_ratio:0.9642053973013494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 -DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:20 [batch.py:51] router release req id 8 -INFO 06-24 20:03:20 [batch.py:51] router release req id 400 -INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.040621042251586914 s -INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.035002946853637695 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.04219484329223633 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.038141489028930664 s -DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=276025679932591819049439770061506921453, time:1750766600.6369236s req_ids:[8, 400] -DEBUG 06-24 20:03:20 [manager.py:391] -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:99.53069686889648ms total_cost_time:99.57313537597656ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5380 prompt_cache_len:5151 prompt_cache_ratio:0.9574349442379182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:112.55383491516113ms total_cost_time:112.59222030639648ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5337 prompt_cache_len:5145 prompt_cache_ratio:0.9640247329960652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 -DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:20 [batch.py:51] router release req id 8 -INFO 06-24 20:03:20 [batch.py:51] router release req id 400 -INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.06434202194213867 s -INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.04566383361816406 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.06608295440673828 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.048783302307128906 s -DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=286135391832825627885999024620823385726, time:1750766600.7672696s req_ids:[8, 400] -DEBUG 06-24 20:03:20 [manager.py:391] -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:144.6220874786377ms total_cost_time:144.6688175201416ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5381 prompt_cache_len:5151 prompt_cache_ratio:0.9572570154246423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:125.84424018859863ms total_cost_time:125.87094306945801ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5338 prompt_cache_len:5145 prompt_cache_ratio:0.9638441363806669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 -DEBUG 06-24 20:03:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:20 [batch.py:51] router release req id 8 -INFO 06-24 20:03:20 [batch.py:51] router release req id 400 -INFO 06-24 20:03:20 [manager.py:224] router recive req id 8 cost time 0.04098677635192871 s -INFO 06-24 20:03:20 [manager.py:224] router recive req id 400 cost time 0.03562450408935547 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 8 cost time 0.04268312454223633 s -INFO 06-24 20:03:20 [manager.py:68] detokenization recv req id 400 cost time 0.038706064224243164 s -DEBUG 06-24 20:03:20 [manager.py:391] Prefill Batch: batch_id=78164605853841112862269592373220035213, time:1750766600.8976705s req_ids:[8, 400] -DEBUG 06-24 20:03:20 [manager.py:391] -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:96.87423706054688ms total_cost_time:96.91619873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5382 prompt_cache_len:5151 prompt_cache_ratio:0.9570791527313266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 -ERROR 06-24 20:03:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:113.4788990020752ms total_cost_time:113.51895332336426ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5339 prompt_cache_len:5145 prompt_cache_ratio:0.9636636074171193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 -DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:21 [batch.py:51] router release req id 8 -INFO 06-24 20:03:21 [batch.py:51] router release req id 400 -INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.06828618049621582 s -INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.04655122756958008 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.06995296478271484 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.04954338073730469 s -DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=297213816336514699910842439804809839254, time:1750766601.0273569s req_ids:[8, 400] -DEBUG 06-24 20:03:21 [manager.py:391] -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:8 first_token_cost:145.74956893920898ms total_cost_time:145.79272270202637ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5383 prompt_cache_len:5151 prompt_cache_ratio:0.956901356121122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:20 lightllm_req_id:400 first_token_cost:123.9314079284668ms total_cost_time:123.95691871643066ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5340 prompt_cache_len:5145 prompt_cache_ratio:0.9634831460674157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 -DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:21 [batch.py:51] router release req id 8 -INFO 06-24 20:03:21 [batch.py:51] router release req id 400 -INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.04177379608154297 s -INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.03552532196044922 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.043331146240234375 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.03861212730407715 s -DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=135925319698489064531153805945555175362, time:1750766601.157251s req_ids:[8, 400] -DEBUG 06-24 20:03:21 [manager.py:391] -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:93.95551681518555ms total_cost_time:94.0089225769043ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:5384 prompt_cache_len:5151 prompt_cache_ratio:0.9567236255572066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:112.30230331420898ms total_cost_time:112.33925819396973ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5341 prompt_cache_len:5145 prompt_cache_ratio:0.963302752293578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 -DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:21 [batch.py:51] router release req id 8 -INFO 06-24 20:03:21 [batch.py:51] router release req id 400 -INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.07148361206054688 s -INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.04772377014160156 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.07359766960144043 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.05205821990966797 s -DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=134779627233529057769425696002701442732, time:1750766601.2885098s req_ids:[8, 400] -DEBUG 06-24 20:03:21 [manager.py:391] -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:146.78692817687988ms total_cost_time:146.83008193969727ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5385 prompt_cache_len:5151 prompt_cache_ratio:0.9565459610027855 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:124.59278106689453ms total_cost_time:124.62759017944336ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:5342 prompt_cache_len:5145 prompt_cache_ratio:0.9631224260576563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 -DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:21 [batch.py:51] router release req id 8 -INFO 06-24 20:03:21 [batch.py:51] router release req id 400 -INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.20799803733825684 s -INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.20207691192626953 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.2096102237701416 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.2052173614501953 s -DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=186208313997580636810905768666178473861, time:1750766601.5780487s req_ids:[8, 400] -DEBUG 06-24 20:03:21 [manager.py:391] -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:288.12170028686523ms total_cost_time:288.1660461425781ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5386 prompt_cache_len:5151 prompt_cache_ratio:0.9563683624210917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:282.26184844970703ms total_cost_time:282.2885513305664ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5343 prompt_cache_len:5145 prompt_cache_ratio:0.9629421673217293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 -DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:21 [batch.py:51] router release req id 8 -INFO 06-24 20:03:21 [batch.py:51] router release req id 400 -INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.04269051551818848 s -INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.0370328426361084 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.04415607452392578 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.0400547981262207 s -DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=222480278121724777104444868214524344190, time:1750766601.7098036s req_ids:[8, 400] -DEBUG 06-24 20:03:21 [manager.py:391] -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:106.95958137512207ms total_cost_time:107.00416564941406ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5387 prompt_cache_len:5151 prompt_cache_ratio:0.9561908297753852 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:129.85682487487793ms total_cost_time:129.8964023590088ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5344 prompt_cache_len:5145 prompt_cache_ratio:0.9627619760479041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 -DEBUG 06-24 20:03:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:21 [batch.py:51] router release req id 8 -INFO 06-24 20:03:21 [batch.py:51] router release req id 400 -INFO 06-24 20:03:21 [manager.py:224] router recive req id 8 cost time 0.0907132625579834 s -INFO 06-24 20:03:21 [manager.py:224] router recive req id 400 cost time 0.06250143051147461 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 8 cost time 0.09232044219970703 s -INFO 06-24 20:03:21 [manager.py:68] detokenization recv req id 400 cost time 0.0655050277709961 s -DEBUG 06-24 20:03:21 [manager.py:391] Prefill Batch: batch_id=124571461464393980073213979239828388332, time:1750766601.8685126s req_ids:[8, 400] -DEBUG 06-24 20:03:21 [manager.py:391] -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:150.7575511932373ms total_cost_time:150.79951286315918ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5388 prompt_cache_len:5151 prompt_cache_ratio:0.9560133630289532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 -ERROR 06-24 20:03:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:154.83736991882324ms total_cost_time:154.8764705657959ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5345 prompt_cache_len:5145 prompt_cache_ratio:0.9625818521983162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 -DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:22 [batch.py:51] router release req id 8 -INFO 06-24 20:03:22 [batch.py:51] router release req id 400 -INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.09172987937927246 s -INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.06131577491760254 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.09335494041442871 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.06451940536499023 s -DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=142457900377968623178329445699247551843, time:1750766602.0300777s req_ids:[8, 400] -DEBUG 06-24 20:03:22 [manager.py:391] -DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:22 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:8 first_token_cost:186.33580207824707ms total_cost_time:186.37871742248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5389 prompt_cache_len:5151 prompt_cache_ratio:0.9558359621451105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:21 lightllm_req_id:400 first_token_cost:155.86423873901367ms total_cost_time:155.89189529418945ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5346 prompt_cache_len:5145 prompt_cache_ratio:0.9624017957351291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 -DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:22 [batch.py:51] router release req id 8 -INFO 06-24 20:03:22 [batch.py:51] router release req id 400 -INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.05829787254333496 s -INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05179548263549805 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.0599365234375 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.05483746528625488 s -DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=234893896941947979002553866056014758351, time:1750766602.1918833s req_ids:[8, 400] -DEBUG 06-24 20:03:22 [manager.py:391] -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:150.9406566619873ms total_cost_time:150.9835720062256ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5390 prompt_cache_len:5151 prompt_cache_ratio:0.9556586270871985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:144.44446563720703ms total_cost_time:144.4699764251709ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5347 prompt_cache_len:5145 prompt_cache_ratio:0.9622218066205349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 -DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:22 [batch.py:51] router release req id 8 -INFO 06-24 20:03:22 [batch.py:51] router release req id 400 -INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.05821728706359863 s -INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05285143852233887 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.05965733528137207 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.0556492805480957 s -DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=177368139352387036710850507953880825522, time:1750766602.3511534s req_ids:[8, 400] -DEBUG 06-24 20:03:22 [manager.py:391] -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:147.5977897644043ms total_cost_time:147.64094352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5391 prompt_cache_len:5151 prompt_cache_ratio:0.9554813578185866 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:154.71291542053223ms total_cost_time:154.75702285766602ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5348 prompt_cache_len:5145 prompt_cache_ratio:0.9620418848167539 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 -DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:22 [batch.py:51] router release req id 8 -INFO 06-24 20:03:22 [batch.py:51] router release req id 400 -INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.06644487380981445 s -INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05464601516723633 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.06823897361755371 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.05769062042236328 s -DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=261174539934488379927568871207490788573, time:1750766602.5114079s req_ids:[8, 400] -DEBUG 06-24 20:03:22 [manager.py:391] -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:149.19114112854004ms total_cost_time:149.24335479736328ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:5392 prompt_cache_len:5151 prompt_cache_ratio:0.9553041543026706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:322.4632740020752ms total_cost_time:322.5069046020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5349 prompt_cache_len:5145 prompt_cache_ratio:0.9618620302860348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 -DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:22 [batch.py:51] router release req id 8 -INFO 06-24 20:03:22 [batch.py:51] router release req id 400 -INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.24780631065368652 s -INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.06302690505981445 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.24949955940246582 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.0659639835357666 s -DEBUG 06-24 20:03:22 [manager.py:391] Prefill Batch: batch_id=317924181214390439860970743783356698597, time:1750766602.8485909s req_ids:[8, 400] -DEBUG 06-24 20:03:22 [manager.py:391] -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:341.80521965026855ms total_cost_time:341.8586254119873ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:5393 prompt_cache_len:5151 prompt_cache_ratio:0.9551270165028741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:157.02319145202637ms total_cost_time:157.06157684326172ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5350 prompt_cache_len:5145 prompt_cache_ratio:0.9616822429906542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 -INFO 06-24 20:03:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 -DEBUG 06-24 20:03:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:22 [batch.py:51] router release req id 8 -INFO 06-24 20:03:22 [batch.py:51] router release req id 400 -INFO 06-24 20:03:22 [manager.py:224] router recive req id 8 cost time 0.05843234062194824 s -INFO 06-24 20:03:22 [manager.py:224] router recive req id 400 cost time 0.05263257026672363 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 8 cost time 0.0601048469543457 s -INFO 06-24 20:03:22 [manager.py:68] detokenization recv req id 400 cost time 0.05573582649230957 s -DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=234783476287721002190218136072740691845, time:1750766603.0100996s req_ids:[8, 400] -DEBUG 06-24 20:03:23 [manager.py:391] -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:8 first_token_cost:151.06511116027832ms total_cost_time:151.1077880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5394 prompt_cache_len:5151 prompt_cache_ratio:0.9549499443826474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:22 lightllm_req_id:400 first_token_cost:145.22838592529297ms total_cost_time:145.25485038757324ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5351 prompt_cache_len:5145 prompt_cache_ratio:0.9615025228929173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 -DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:23 [batch.py:51] router release req id 8 -INFO 06-24 20:03:23 [batch.py:51] router release req id 400 -INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.05952906608581543 s -INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.05414128303527832 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.06118273735046387 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.05701899528503418 s -DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=145475987631001248074123468473000341903, time:1750766603.1710052s req_ids:[8, 400] -DEBUG 06-24 20:03:23 [manager.py:391] -INFO 06-24 20:03:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:03:23 [statics_utils.py:24] mean first cost: 220.15237989672426 ms -INFO 06-24 20:03:23 [statics_utils.py:24] mean per token cost: 0.27344202866706124 ms -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:144.13022994995117ms total_cost_time:144.17123794555664ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5395 prompt_cache_len:5151 prompt_cache_ratio:0.954772937905468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:150.2683162689209ms total_cost_time:150.30765533447266ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5352 prompt_cache_len:5145 prompt_cache_ratio:0.9613228699551569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 -DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:23 [batch.py:51] router release req id 8 -INFO 06-24 20:03:23 [batch.py:51] router release req id 400 -INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.07183074951171875 s -INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.05985760688781738 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.07356953620910645 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.0631256103515625 s -DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=98074405731391139573687344266870355138, time:1750766603.3330717s req_ids:[8, 400] -DEBUG 06-24 20:03:23 [manager.py:391] -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:149.07264709472656ms total_cost_time:149.11675453186035ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5396 prompt_cache_len:5151 prompt_cache_ratio:0.9545959970348407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:153.89466285705566ms total_cost_time:153.94091606140137ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5353 prompt_cache_len:5145 prompt_cache_ratio:0.9611432841397347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 -DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:23 [batch.py:51] router release req id 8 -INFO 06-24 20:03:23 [batch.py:51] router release req id 400 -INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.07914233207702637 s -INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.06242012977600098 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.0806882381439209 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.06532621383666992 s -DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=177230458683442775947328395995968344935, time:1750766603.4940922s req_ids:[8, 400] -DEBUG 06-24 20:03:23 [manager.py:391] -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:149.7194766998291ms total_cost_time:149.7645378112793ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5397 prompt_cache_len:5151 prompt_cache_ratio:0.9544191217342969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:155.12943267822266ms total_cost_time:155.1685333251953ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5354 prompt_cache_len:5145 prompt_cache_ratio:0.9609637654090399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 -DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:23 [batch.py:51] router release req id 8 -INFO 06-24 20:03:23 [batch.py:51] router release req id 400 -INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.08433914184570312 s -INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.0628652572631836 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.0858919620513916 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.06565260887145996 s -DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=258063346900042113233655153145958006950, time:1750766603.6545858s req_ids:[8, 400] -DEBUG 06-24 20:03:23 [manager.py:391] -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:150.58422088623047ms total_cost_time:150.62880516052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5398 prompt_cache_len:5151 prompt_cache_ratio:0.9542423119673953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 -ERROR 06-24 20:03:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:155.68113327026367ms total_cost_time:155.72094917297363ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5355 prompt_cache_len:5145 prompt_cache_ratio:0.9607843137254902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 -DEBUG 06-24 20:03:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:23 [batch.py:51] router release req id 8 -INFO 06-24 20:03:23 [batch.py:51] router release req id 400 -INFO 06-24 20:03:23 [manager.py:224] router recive req id 8 cost time 0.09006857872009277 s -INFO 06-24 20:03:23 [manager.py:224] router recive req id 400 cost time 0.06382942199707031 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 8 cost time 0.0915532112121582 s -INFO 06-24 20:03:23 [manager.py:68] detokenization recv req id 400 cost time 0.06651949882507324 s -DEBUG 06-24 20:03:23 [manager.py:391] Prefill Batch: batch_id=249229290258155323088474967584757917971, time:1750766603.8153276s req_ids:[8, 400] -DEBUG 06-24 20:03:23 [manager.py:391] -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:8 first_token_cost:351.37438774108887ms total_cost_time:351.41777992248535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5399 prompt_cache_len:5151 prompt_cache_ratio:0.9540655676977218 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:23 lightllm_req_id:400 first_token_cost:329.23269271850586ms total_cost_time:329.2655944824219ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:5356 prompt_cache_len:5145 prompt_cache_ratio:0.9606049290515309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 -DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:24 [batch.py:51] router release req id 8 -INFO 06-24 20:03:24 [batch.py:51] router release req id 400 -INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.06512832641601562 s -INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.0543978214263916 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.06679320335388184 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.0573732852935791 s -DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=52597344188287752416133989793924407275, time:1750766604.1495602s req_ids:[8, 400] -DEBUG 06-24 20:03:24 [manager.py:391] -DEBUG 06-24 20:03:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 65490.148 tokens/s -DEBUG 06-24 20:03:24 [stats.py:37] Avg prompt tokens throughput: 65465.471 tokens/s -DEBUG 06-24 20:03:24 [stats.py:37] Avg generate tokens throughput: 24.677 tokens/s -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:147.5844383239746ms total_cost_time:147.6287841796875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5400 prompt_cache_len:5151 prompt_cache_ratio:0.9538888888888889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:149.15847778320312ms total_cost_time:149.19757843017578ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5357 prompt_cache_len:5145 prompt_cache_ratio:0.960425611349636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 -DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:24 [batch.py:51] router release req id 8 -INFO 06-24 20:03:24 [batch.py:51] router release req id 400 -INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.07307100296020508 s -INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06075239181518555 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.0745384693145752 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.06353449821472168 s -DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=182291598458812760724701045789992271971, time:1750766604.3106132s req_ids:[8, 400] -DEBUG 06-24 20:03:24 [manager.py:391] -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:146.57258987426758ms total_cost_time:146.61598205566406ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5401 prompt_cache_len:5151 prompt_cache_ratio:0.9537122755045362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:152.2524356842041ms total_cost_time:152.2986888885498ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5358 prompt_cache_len:5145 prompt_cache_ratio:0.9602463605823068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 -DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:24 [batch.py:51] router release req id 8 -INFO 06-24 20:03:24 [batch.py:51] router release req id 400 -INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.07987260818481445 s -INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06191205978393555 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.08144092559814453 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.06473970413208008 s -DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=332941924801027823445769937160025542787, time:1750766604.4717515s req_ids:[8, 400] -DEBUG 06-24 20:03:24 [manager.py:391] -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:149.01494979858398ms total_cost_time:149.05905723571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5402 prompt_cache_len:5151 prompt_cache_ratio:0.9535357275083303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:159.64317321777344ms total_cost_time:159.682035446167ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5359 prompt_cache_len:5145 prompt_cache_ratio:0.9600671767120732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 -DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:24 [batch.py:51] router release req id 8 -INFO 06-24 20:03:24 [batch.py:51] router release req id 400 -INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.08970522880554199 s -INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06165480613708496 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.0912163257598877 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.06441521644592285 s -DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=305586345978787976424141272683256054825, time:1750766604.6364968s req_ids:[8, 400] -DEBUG 06-24 20:03:24 [manager.py:391] -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:149.17564392089844ms total_cost_time:149.21951293945312ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5403 prompt_cache_len:5151 prompt_cache_ratio:0.9533592448639645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:154.50143814086914ms total_cost_time:154.5395851135254ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5360 prompt_cache_len:5145 prompt_cache_ratio:0.9598880597014925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 -DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:24 [batch.py:51] router release req id 8 -INFO 06-24 20:03:24 [batch.py:51] router release req id 400 -INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.09705662727355957 s -INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.06425786018371582 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.09850859642028809 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.0669853687286377 s -DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=115845301462857095522409489583683656259, time:1750766604.7983978s req_ids:[8, 400] -DEBUG 06-24 20:03:24 [manager.py:391] -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:185.91952323913574ms total_cost_time:185.96291542053223ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5404 prompt_cache_len:5151 prompt_cache_ratio:0.9531828275351591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:152.97341346740723ms total_cost_time:153.0003547668457ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5361 prompt_cache_len:5145 prompt_cache_ratio:0.9597090095131505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 -DEBUG 06-24 20:03:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:24 [batch.py:51] router release req id 8 -INFO 06-24 20:03:24 [batch.py:51] router release req id 400 -INFO 06-24 20:03:24 [manager.py:224] router recive req id 8 cost time 0.04229736328125 s -INFO 06-24 20:03:24 [manager.py:224] router recive req id 400 cost time 0.03718709945678711 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 8 cost time 0.043793678283691406 s -INFO 06-24 20:03:24 [manager.py:68] detokenization recv req id 400 cost time 0.040232181549072266 s -DEBUG 06-24 20:03:24 [manager.py:391] Prefill Batch: batch_id=188934003509489378439395106842846349720, time:1750766604.9398494s req_ids:[8, 400] -DEBUG 06-24 20:03:24 [manager.py:391] -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:106.69994354248047ms total_cost_time:106.74548149108887ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5405 prompt_cache_len:5151 prompt_cache_ratio:0.9530064754856614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 -ERROR 06-24 20:03:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:400 first_token_cost:113.97814750671387ms total_cost_time:114.01557922363281ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5362 prompt_cache_len:5145 prompt_cache_ratio:0.9595300261096605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 -DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:25 [batch.py:51] router release req id 8 -INFO 06-24 20:03:25 [batch.py:51] router release req id 400 -INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.05905604362487793 s -INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.04689764976501465 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.06046724319458008 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04974985122680664 s -DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=106941296513059239210358134024724807212, time:1750766605.0696821s req_ids:[8, 400] -DEBUG 06-24 20:03:25 [manager.py:391] -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:24 lightllm_req_id:8 first_token_cost:299.2374897003174ms total_cost_time:299.2825508117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5406 prompt_cache_len:5151 prompt_cache_ratio:0.9528301886792453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:288.59829902648926ms total_cost_time:288.6345386505127ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5363 prompt_cache_len:5145 prompt_cache_ratio:0.959351109453664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 -DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:25 [batch.py:51] router release req id 8 -INFO 06-24 20:03:25 [batch.py:51] router release req id 400 -INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.04875373840332031 s -INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.04164385795593262 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.05025887489318848 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04451394081115723 s -DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=109518551772225882203425461416569783940, time:1750766605.3635254s req_ids:[8, 400] -DEBUG 06-24 20:03:25 [manager.py:391] -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:126.4505386352539ms total_cost_time:126.49273872375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5407 prompt_cache_len:5151 prompt_cache_ratio:0.9526539670797115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:119.39740180969238ms total_cost_time:119.42362785339355ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5364 prompt_cache_len:5145 prompt_cache_ratio:0.95917225950783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 -DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:25 [batch.py:51] router release req id 8 -INFO 06-24 20:03:25 [batch.py:51] router release req id 400 -INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.04301190376281738 s -INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.03754377365112305 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.04456186294555664 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04060029983520508 s -DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=301397941774356863546476412261306772242, time:1750766605.492426s req_ids:[8, 400] -DEBUG 06-24 20:03:25 [manager.py:391] -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:116.78671836853027ms total_cost_time:116.83058738708496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5408 prompt_cache_len:5151 prompt_cache_ratio:0.9524778106508875 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:123.25358390808105ms total_cost_time:123.29983711242676ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5365 prompt_cache_len:5145 prompt_cache_ratio:0.9589934762348555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 -DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:25 [batch.py:51] router release req id 8 -INFO 06-24 20:03:25 [batch.py:51] router release req id 400 -INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.05025053024291992 s -INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.03904008865356445 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.051779985427856445 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.0417790412902832 s -DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=105624872389767533399666806461708471141, time:1750766605.6227462s req_ids:[8, 400] -DEBUG 06-24 20:03:25 [manager.py:391] -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:129.0872097015381ms total_cost_time:129.13131713867188ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5409 prompt_cache_len:5151 prompt_cache_ratio:0.9523017193566279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:117.69700050354004ms total_cost_time:117.7220344543457ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5366 prompt_cache_len:5145 prompt_cache_ratio:0.9588147595974655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 -DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:25 [batch.py:51] router release req id 8 -INFO 06-24 20:03:25 [batch.py:51] router release req id 400 -INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.04254436492919922 s -INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.0374608039855957 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.04412531852722168 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04032325744628906 s -DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=24229803649190843203552023773833686566, time:1750766605.7525249s req_ids:[8, 400] -DEBUG 06-24 20:03:25 [manager.py:391] -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:113.68703842163086ms total_cost_time:113.72900009155273ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5410 prompt_cache_len:5151 prompt_cache_ratio:0.9521256931608133 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:119.02165412902832ms total_cost_time:119.05813217163086ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5367 prompt_cache_len:5145 prompt_cache_ratio:0.9586361095584125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 -DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:25 [batch.py:51] router release req id 8 -INFO 06-24 20:03:25 [batch.py:51] router release req id 400 -INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.052778005599975586 s -INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.04199981689453125 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.05484318733215332 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04638528823852539 s -DEBUG 06-24 20:03:25 [manager.py:391] Prefill Batch: batch_id=177010643694584652993485228236392188125, time:1750766605.881679s req_ids:[8, 400] -DEBUG 06-24 20:03:25 [manager.py:391] -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:131.6516399383545ms total_cost_time:131.69455528259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5411 prompt_cache_len:5151 prompt_cache_ratio:0.9519497320273517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:120.88847160339355ms total_cost_time:120.91374397277832ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5368 prompt_cache_len:5145 prompt_cache_ratio:0.9584575260804769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 -INFO 06-24 20:03:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 -DEBUG 06-24 20:03:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:25 [batch.py:51] router release req id 8 -INFO 06-24 20:03:25 [batch.py:51] router release req id 400 -INFO 06-24 20:03:25 [manager.py:224] router recive req id 8 cost time 0.0415797233581543 s -INFO 06-24 20:03:25 [manager.py:224] router recive req id 400 cost time 0.03612041473388672 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 8 cost time 0.04372453689575195 s -INFO 06-24 20:03:25 [manager.py:68] detokenization recv req id 400 cost time 0.04061317443847656 s -DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=9730814389562709508790817266911640232, time:1750766606.0124953s req_ids:[8, 400] -DEBUG 06-24 20:03:26 [manager.py:391] -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:8 first_token_cost:112.6091480255127ms total_cost_time:112.65206336975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5412 prompt_cache_len:5151 prompt_cache_ratio:0.9517738359201774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:25 lightllm_req_id:400 first_token_cost:117.65170097351074ms total_cost_time:117.68770217895508ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5369 prompt_cache_len:5145 prompt_cache_ratio:0.9582790091264668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 -DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:26 [batch.py:51] router release req id 8 -INFO 06-24 20:03:26 [batch.py:51] router release req id 400 -INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.2191617488861084 s -INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.20856332778930664 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.22147083282470703 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.21335339546203613 s -DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=304176749670807226224668701127724980507, time:1750766606.30706s req_ids:[8, 400] -DEBUG 06-24 20:03:26 [manager.py:391] -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:297.47796058654785ms total_cost_time:297.52254486083984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5413 prompt_cache_len:5151 prompt_cache_ratio:0.9515980048032514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:286.9689464569092ms total_cost_time:286.99541091918945ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5370 prompt_cache_len:5145 prompt_cache_ratio:0.9581005586592178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 -DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:26 [batch.py:51] router release req id 8 -INFO 06-24 20:03:26 [batch.py:51] router release req id 400 -INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.04301762580871582 s -INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.03847908973693848 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.04520916938781738 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.04186749458312988 s -DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=155768307003440881433306197369814046090, time:1750766606.438347s req_ids:[8, 400] -DEBUG 06-24 20:03:26 [manager.py:391] -DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:148.1490135192871ms total_cost_time:148.193359375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5414 prompt_cache_len:5151 prompt_cache_ratio:0.9514222386405615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:157.11665153503418ms total_cost_time:157.15384483337402ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5371 prompt_cache_len:5145 prompt_cache_ratio:0.9579221746415938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 -DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:26 [batch.py:51] router release req id 8 -INFO 06-24 20:03:26 [batch.py:51] router release req id 400 -INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.09540843963623047 s -INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.08272886276245117 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.09698629379272461 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.08544135093688965 s -DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=115379664095328341896948840713000160689, time:1750766606.6432939s req_ids:[8, 400] -DEBUG 06-24 20:03:26 [manager.py:391] -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:199.63550567626953ms total_cost_time:199.6903419494629ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5415 prompt_cache_len:5151 prompt_cache_ratio:0.9512465373961219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:202.64840126037598ms total_cost_time:202.72135734558105ms,out_token_counter:1 mean_per_token_cost_time: 0.07295608520507812ms prompt_token_num:5372 prompt_cache_len:5145 prompt_cache_ratio:0.9577438570364855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 -DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:26 [batch.py:51] router release req id 8 -INFO 06-24 20:03:26 [batch.py:51] router release req id 400 -INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.08435773849487305 s -INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.06706047058105469 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.08589911460876465 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.06977581977844238 s -DEBUG 06-24 20:03:26 [manager.py:391] Prefill Batch: batch_id=293968352126310077614312109756214923554, time:1750766606.8370242s req_ids:[8, 400] -DEBUG 06-24 20:03:26 [manager.py:391] -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:147.58944511413574ms total_cost_time:147.63593673706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5416 prompt_cache_len:5151 prompt_cache_ratio:0.9510709010339734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 -ERROR 06-24 20:03:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:26 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:160.14909744262695ms total_cost_time:160.1850986480713ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5373 prompt_cache_len:5145 prompt_cache_ratio:0.9575656058068118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 -DEBUG 06-24 20:03:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:26 [batch.py:51] router release req id 8 -INFO 06-24 20:03:26 [batch.py:51] router release req id 400 -INFO 06-24 20:03:26 [manager.py:224] router recive req id 8 cost time 0.09514594078063965 s -INFO 06-24 20:03:26 [manager.py:224] router recive req id 400 cost time 0.06626176834106445 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 8 cost time 0.0965878963470459 s -INFO 06-24 20:03:26 [manager.py:68] detokenization recv req id 400 cost time 0.06908774375915527 s -DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=167325842124086770004192277942009369444, time:1750766607.0014303s req_ids:[8, 400] -DEBUG 06-24 20:03:27 [manager.py:391] -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:8 first_token_cost:188.58742713928223ms total_cost_time:188.63177299499512ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5417 prompt_cache_len:5151 prompt_cache_ratio:0.9508953295181835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:26 lightllm_req_id:400 first_token_cost:159.67702865600586ms total_cost_time:159.70277786254883ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5374 prompt_cache_len:5145 prompt_cache_ratio:0.9573874209155192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 -DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:27 [batch.py:51] router release req id 8 -INFO 06-24 20:03:27 [batch.py:51] router release req id 400 -INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.06267595291137695 s -INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.0576627254486084 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.06410479545593262 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.060454368591308594 s -DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=303111087904259815833922205561312474417, time:1750766607.1631618s req_ids:[8, 400] -DEBUG 06-24 20:03:27 [manager.py:391] -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:157.99832344055176ms total_cost_time:158.04672241210938ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:5418 prompt_cache_len:5151 prompt_cache_ratio:0.9507198228128461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:325.01983642578125ms total_cost_time:325.06251335144043ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5375 prompt_cache_len:5145 prompt_cache_ratio:0.9572093023255814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 -DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:27 [batch.py:51] router release req id 8 -INFO 06-24 20:03:27 [batch.py:51] router release req id 400 -INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.23650550842285156 s -INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.06451678276062012 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.2381153106689453 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06746101379394531 s -DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=23737735877736789029864311632157714978, time:1750766607.5006576s req_ids:[8, 400] -DEBUG 06-24 20:03:27 [manager.py:391] -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:301.5153408050537ms total_cost_time:301.5611171722412ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5419 prompt_cache_len:5151 prompt_cache_ratio:0.9505443808820816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:158.22863578796387ms total_cost_time:158.28299522399902ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5376 prompt_cache_len:5145 prompt_cache_ratio:0.95703125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 -DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:27 [batch.py:51] router release req id 8 -INFO 06-24 20:03:27 [batch.py:51] router release req id 400 -INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.09329581260681152 s -INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.06400322914123535 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.0947713851928711 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06665468215942383 s -DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=184186379465645799903256587193586697744, time:1750766607.6640804s req_ids:[8, 400] -DEBUG 06-24 20:03:27 [manager.py:391] -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:187.39676475524902ms total_cost_time:187.4384880065918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5420 prompt_cache_len:5151 prompt_cache_ratio:0.9503690036900369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:157.944917678833ms total_cost_time:157.9723358154297ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5377 prompt_cache_len:5145 prompt_cache_ratio:0.956853263901804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 -DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:27 [batch.py:51] router release req id 8 -INFO 06-24 20:03:27 [batch.py:51] router release req id 400 -INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.06305503845214844 s -INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.057465553283691406 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.06470632553100586 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06039118766784668 s -DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=26852431945214795417678461753140578788, time:1750766607.8283565s req_ids:[8, 400] -DEBUG 06-24 20:03:27 [manager.py:391] -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 8 -ERROR 06-24 20:03:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:159.64293479919434ms total_cost_time:159.68775749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5421 prompt_cache_len:5151 prompt_cache_ratio:0.9501936912008855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:155.32898902893066ms total_cost_time:155.36212921142578ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:5378 prompt_cache_len:5145 prompt_cache_ratio:0.9566753439940499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 -INFO 06-24 20:03:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 -DEBUG 06-24 20:03:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:27 [batch.py:51] router release req id 8 -INFO 06-24 20:03:27 [batch.py:51] router release req id 400 -INFO 06-24 20:03:27 [manager.py:224] router recive req id 8 cost time 0.06591558456420898 s -INFO 06-24 20:03:27 [manager.py:224] router recive req id 400 cost time 0.058904409408569336 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 8 cost time 0.06739091873168945 s -INFO 06-24 20:03:27 [manager.py:68] detokenization recv req id 400 cost time 0.06159090995788574 s -DEBUG 06-24 20:03:27 [manager.py:391] Prefill Batch: batch_id=330791697677108148587356519040261344416, time:1750766607.9936192s req_ids:[8, 400] -DEBUG 06-24 20:03:27 [manager.py:391] -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:8 first_token_cost:149.15871620178223ms total_cost_time:149.20306205749512ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5422 prompt_cache_len:5151 prompt_cache_ratio:0.950018443378827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:27 lightllm_req_id:400 first_token_cost:152.6808738708496ms total_cost_time:152.71830558776855ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5379 prompt_cache_len:5145 prompt_cache_ratio:0.9564974902398216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 -DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:28 [batch.py:51] router release req id 8 -INFO 06-24 20:03:28 [batch.py:51] router release req id 400 -INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.07291269302368164 s -INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.0631561279296875 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.07444310188293457 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.06596684455871582 s -DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=338901319347185641443872713378132059222, time:1750766608.1535382s req_ids:[8, 400] -DEBUG 06-24 20:03:28 [manager.py:391] -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:148.37312698364258ms total_cost_time:148.41628074645996ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5423 prompt_cache_len:5151 prompt_cache_ratio:0.9498432601880877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:155.99584579467773ms total_cost_time:156.0525894165039ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:5380 prompt_cache_len:5145 prompt_cache_ratio:0.9563197026022305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 -DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:28 [batch.py:51] router release req id 8 -INFO 06-24 20:03:28 [batch.py:51] router release req id 400 -INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.08449220657348633 s -INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.06730389595031738 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.08582663536071777 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.06997990608215332 s -DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=247586718328975964051524694451745231976, time:1750766608.318872s req_ids:[8, 400] -DEBUG 06-24 20:03:28 [manager.py:391] -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:149.57404136657715ms total_cost_time:149.61743354797363ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5424 prompt_cache_len:5151 prompt_cache_ratio:0.9496681415929203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:159.9726676940918ms total_cost_time:160.02678871154785ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:5381 prompt_cache_len:5145 prompt_cache_ratio:0.9561419810444155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 -DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:28 [batch.py:51] router release req id 8 -INFO 06-24 20:03:28 [batch.py:51] router release req id 400 -INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.091552734375 s -INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.06432533264160156 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.0930032730102539 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.0669867992401123 s -DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=337814848559277299921959166467115357587, time:1750766608.4799948s req_ids:[8, 400] -DEBUG 06-24 20:03:28 [manager.py:391] -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:199.8891830444336ms total_cost_time:199.93185997009277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5425 prompt_cache_len:5151 prompt_cache_ratio:0.9494930875576036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:326.31921768188477ms total_cost_time:326.37929916381836ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:5382 prompt_cache_len:5145 prompt_cache_ratio:0.9559643255295429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 -DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:28 [batch.py:51] router release req id 8 -INFO 06-24 20:03:28 [batch.py:51] router release req id 400 -INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.22453880310058594 s -INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.0705423355102539 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.22608232498168945 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.07349467277526855 s -DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=60154028013799029199347481649118869184, time:1750766608.818252s req_ids:[8, 400] -DEBUG 06-24 20:03:28 [manager.py:391] -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:300.0452518463135ms total_cost_time:300.08864402770996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5426 prompt_cache_len:5151 prompt_cache_ratio:0.9493180980464431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 -ERROR 06-24 20:03:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:28 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:165.14205932617188ms total_cost_time:165.19641876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5383 prompt_cache_len:5145 prompt_cache_ratio:0.9557867360208062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 -DEBUG 06-24 20:03:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:28 [batch.py:51] router release req id 8 -INFO 06-24 20:03:28 [batch.py:51] router release req id 400 -INFO 06-24 20:03:28 [manager.py:224] router recive req id 8 cost time 0.0726935863494873 s -INFO 06-24 20:03:28 [manager.py:224] router recive req id 400 cost time 0.05223393440246582 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 8 cost time 0.07442879676818848 s -INFO 06-24 20:03:28 [manager.py:68] detokenization recv req id 400 cost time 0.0556330680847168 s -DEBUG 06-24 20:03:28 [manager.py:391] Prefill Batch: batch_id=154028495638990239280161464070004995579, time:1750766608.971124s req_ids:[8, 400] -DEBUG 06-24 20:03:28 [manager.py:391] -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:8 first_token_cost:149.59239959716797ms total_cost_time:149.63722229003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5427 prompt_cache_len:5151 prompt_cache_ratio:0.94914317302377 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:28 lightllm_req_id:400 first_token_cost:131.31308555603027ms total_cost_time:131.3624382019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:5384 prompt_cache_len:5145 prompt_cache_ratio:0.9556092124814265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 -DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:29 [batch.py:51] router release req id 8 -INFO 06-24 20:03:29 [batch.py:51] router release req id 400 -INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.05073976516723633 s -INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.04285001754760742 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.052385807037353516 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.04587292671203613 s -DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=163135921896879067316561220007680792676, time:1750766609.1041005s req_ids:[8, 400] -DEBUG 06-24 20:03:29 [manager.py:391] -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:131.39724731445312ms total_cost_time:131.45852088928223ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:5428 prompt_cache_len:5151 prompt_cache_ratio:0.9489683124539425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:123.66914749145508ms total_cost_time:123.70848655700684ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5385 prompt_cache_len:5145 prompt_cache_ratio:0.9554317548746518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 -DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:29 [batch.py:51] router release req id 8 -INFO 06-24 20:03:29 [batch.py:51] router release req id 400 -INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.04875373840332031 s -INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.042389631271362305 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.05027008056640625 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.04531431198120117 s -DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=66245248201670615578028118172994514477, time:1750766609.2394724s req_ids:[8, 400] -DEBUG 06-24 20:03:29 [manager.py:391] -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:115.2949333190918ms total_cost_time:115.33713340759277ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5429 prompt_cache_len:5151 prompt_cache_ratio:0.9487935163013447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:122.15805053710938ms total_cost_time:122.20287322998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5386 prompt_cache_len:5145 prompt_cache_ratio:0.9552543631637579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 -DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:29 [batch.py:51] router release req id 8 -INFO 06-24 20:03:29 [batch.py:51] router release req id 400 -INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.061919212341308594 s -INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.048467159271240234 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.0633692741394043 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.05130314826965332 s -DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=20460130740402664492274137011520852336, time:1750766609.372251s req_ids:[8, 400] -DEBUG 06-24 20:03:29 [manager.py:391] -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:140.0299072265625ms total_cost_time:140.0902271270752ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:5430 prompt_cache_len:5151 prompt_cache_ratio:0.9486187845303867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:126.73234939575195ms total_cost_time:126.76453590393066ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:5387 prompt_cache_len:5145 prompt_cache_ratio:0.9550770373120475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 -DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:29 [batch.py:51] router release req id 8 -INFO 06-24 20:03:29 [batch.py:51] router release req id 400 -INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.042932748794555664 s -INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.036199331283569336 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.04449129104614258 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.03898000717163086 s -DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=183371005889916883990738994599749282125, time:1750766609.506414s req_ids:[8, 400] -DEBUG 06-24 20:03:29 [manager.py:391] -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:104.80642318725586ms total_cost_time:104.85124588012695ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5431 prompt_cache_len:5151 prompt_cache_ratio:0.9484441171055055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:118.35598945617676ms total_cost_time:118.4084415435791ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:5388 prompt_cache_len:5145 prompt_cache_ratio:0.9548997772828508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 -DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:29 [batch.py:51] router release req id 8 -INFO 06-24 20:03:29 [batch.py:51] router release req id 400 -INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.06884407997131348 s -INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.048369646072387695 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.07041501998901367 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.05144786834716797 s -DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=45311421111920125492293462332825522287, time:1750766609.6423876s req_ids:[8, 400] -DEBUG 06-24 20:03:29 [manager.py:391] -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:147.84669876098633ms total_cost_time:147.89390563964844ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:5432 prompt_cache_len:5151 prompt_cache_ratio:0.9482695139911634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:127.69746780395508ms total_cost_time:127.73752212524414ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5389 prompt_cache_len:5145 prompt_cache_ratio:0.954722583039525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 -INFO 06-24 20:03:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 -DEBUG 06-24 20:03:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:29 [batch.py:51] router release req id 8 -INFO 06-24 20:03:29 [batch.py:51] router release req id 400 -INFO 06-24 20:03:29 [manager.py:224] router recive req id 8 cost time 0.20403599739074707 s -INFO 06-24 20:03:29 [manager.py:224] router recive req id 400 cost time 0.1997208595275879 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 8 cost time 0.20572781562805176 s -INFO 06-24 20:03:29 [manager.py:68] detokenization recv req id 400 cost time 0.20275378227233887 s -DEBUG 06-24 20:03:29 [manager.py:391] Prefill Batch: batch_id=102080339307434120591735184785497978270, time:1750766609.9338508s req_ids:[8, 400] -DEBUG 06-24 20:03:29 [manager.py:391] -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:8 first_token_cost:284.75284576416016ms total_cost_time:284.79504585266113ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5433 prompt_cache_len:5151 prompt_cache_ratio:0.9480949751518498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:29 lightllm_req_id:400 first_token_cost:280.3466320037842ms total_cost_time:280.37452697753906ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5390 prompt_cache_len:5145 prompt_cache_ratio:0.9545454545454546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 -DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:30 [batch.py:51] router release req id 8 -INFO 06-24 20:03:30 [batch.py:51] router release req id 400 -INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.0449519157409668 s -INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.03904891014099121 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.046533823013305664 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.04213857650756836 s -DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=126334105448398784146160656944666796615, time:1750766610.0661843s req_ids:[8, 400] -DEBUG 06-24 20:03:30 [manager.py:391] -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:104.65312004089355ms total_cost_time:104.68101501464844ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5434 prompt_cache_len:5151 prompt_cache_ratio:0.9479205005520795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:116.09530448913574ms total_cost_time:116.11747741699219ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:5391 prompt_cache_len:5145 prompt_cache_ratio:0.9543683917640512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 -DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:30 [batch.py:51] router release req id 8 -INFO 06-24 20:03:30 [batch.py:51] router release req id 400 -INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.06585979461669922 s -INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.04912996292114258 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.06747126579284668 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.05209231376647949 s -DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=242107810493557566891979709824189032318, time:1750766610.197466s req_ids:[8, 400] -DEBUG 06-24 20:03:30 [manager.py:391] -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:146.9252109527588ms total_cost_time:146.95191383361816ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5435 prompt_cache_len:5151 prompt_cache_ratio:0.9477460901563938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:129.90307807922363ms total_cost_time:129.91714477539062ms,out_token_counter:1 mean_per_token_cost_time: 0.014066696166992188ms prompt_token_num:5392 prompt_cache_len:5145 prompt_cache_ratio:0.9541913946587537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 -DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:30 [batch.py:51] router release req id 8 -INFO 06-24 20:03:30 [batch.py:51] router release req id 400 -INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.0483396053314209 s -INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.04661202430725098 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.049868106842041016 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.0494232177734375 s -DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=67592681871443124720309873873702244054, time:1750766610.334043s req_ids:[8, 400] -DEBUG 06-24 20:03:30 [manager.py:391] -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:126.35612487792969ms total_cost_time:126.39927864074707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5436 prompt_cache_len:5151 prompt_cache_ratio:0.9475717439293598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:124.53842163085938ms total_cost_time:124.56369400024414ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5393 prompt_cache_len:5145 prompt_cache_ratio:0.954014463193028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 -DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:30 [batch.py:51] router release req id 8 -INFO 06-24 20:03:30 [batch.py:51] router release req id 400 -INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.0446925163269043 s -INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.04045701026916504 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.046218156814575195 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.043189048767089844 s -DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=260155226503569480391415024607049133174, time:1750766610.4645112s req_ids:[8, 400] -DEBUG 06-24 20:03:30 [manager.py:391] -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:116.78838729858398ms total_cost_time:116.83058738708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5437 prompt_cache_len:5151 prompt_cache_ratio:0.947397461835571 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:123.0618953704834ms total_cost_time:123.09694290161133ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5394 prompt_cache_len:5145 prompt_cache_ratio:0.9538375973303671 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 -DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:30 [batch.py:51] router release req id 8 -INFO 06-24 20:03:30 [batch.py:51] router release req id 400 -INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.05352020263671875 s -INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.043241024017333984 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.0549314022064209 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.04600262641906738 s -DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=313234555886742557445646386306637271280, time:1750766610.5965967s req_ids:[8, 400] -DEBUG 06-24 20:03:30 [manager.py:391] -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:132.52925872802734ms total_cost_time:132.57145881652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5438 prompt_cache_len:5151 prompt_cache_ratio:0.947223243839647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:122.29228019714355ms total_cost_time:122.31922149658203ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5395 prompt_cache_len:5145 prompt_cache_ratio:0.953660797034291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 -DEBUG 06-24 20:03:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:30 [batch.py:51] router release req id 8 -INFO 06-24 20:03:30 [batch.py:51] router release req id 400 -INFO 06-24 20:03:30 [manager.py:224] router recive req id 8 cost time 0.04629039764404297 s -INFO 06-24 20:03:30 [manager.py:224] router recive req id 400 cost time 0.03888344764709473 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 8 cost time 0.04778409004211426 s -INFO 06-24 20:03:30 [manager.py:68] detokenization recv req id 400 cost time 0.041953086853027344 s -DEBUG 06-24 20:03:30 [manager.py:391] Prefill Batch: batch_id=198433364997030551283595780849039249513, time:1750766610.732075s req_ids:[8, 400] -DEBUG 06-24 20:03:30 [manager.py:391] -DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:107.01417922973633ms total_cost_time:107.05995559692383ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5439 prompt_cache_len:5151 prompt_cache_ratio:0.9470490899062327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 -ERROR 06-24 20:03:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:304.6762943267822ms total_cost_time:304.7182559967041ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5396 prompt_cache_len:5145 prompt_cache_ratio:0.9534840622683469 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 -DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:31 [batch.py:51] router release req id 8 -INFO 06-24 20:03:31 [batch.py:51] router release req id 400 -INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.2552757263183594 s -INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.050196170806884766 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.25688695907592773 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.05312943458557129 s -DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=8463104666033899046106122913629846796, time:1750766611.0526986s req_ids:[8, 400] -DEBUG 06-24 20:03:31 [manager.py:391] -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:8 first_token_cost:333.13965797424316ms total_cost_time:333.18352699279785ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5440 prompt_cache_len:5151 prompt_cache_ratio:0.946875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:30 lightllm_req_id:400 first_token_cost:128.03030014038086ms total_cost_time:128.05986404418945ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:5397 prompt_cache_len:5145 prompt_cache_ratio:0.953307392996109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 -DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:31 [batch.py:51] router release req id 8 -INFO 06-24 20:03:31 [batch.py:51] router release req id 400 -INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.04332733154296875 s -INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.03737449645996094 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.044718265533447266 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.04013657569885254 s -DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=88066736761649289802742310716448559315, time:1750766611.1830666s req_ids:[8, 400] -DEBUG 06-24 20:03:31 [manager.py:391] -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:110.96954345703125ms total_cost_time:110.99648475646973ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5441 prompt_cache_len:5151 prompt_cache_ratio:0.946700974085646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:116.01614952087402ms total_cost_time:116.05310440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5398 prompt_cache_len:5145 prompt_cache_ratio:0.9531307891811782 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 -DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:31 [batch.py:51] router release req id 8 -INFO 06-24 20:03:31 [batch.py:51] router release req id 400 -INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.0576629638671875 s -INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.047483205795288086 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.05909895896911621 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.05029010772705078 s -DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=281492459761609073606103375947265986960, time:1750766611.3139722s req_ids:[8, 400] -DEBUG 06-24 20:03:31 [manager.py:391] -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:140.0768756866455ms total_cost_time:140.12837409973145ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:5442 prompt_cache_len:5151 prompt_cache_ratio:0.9465270121278941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:130.11527061462402ms total_cost_time:130.15127182006836ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5399 prompt_cache_len:5145 prompt_cache_ratio:0.9529542507871828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 -DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:31 [batch.py:51] router release req id 8 -INFO 06-24 20:03:31 [batch.py:51] router release req id 400 -INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.04406142234802246 s -INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.03776431083679199 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.045690059661865234 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.04068279266357422 s -DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=762836077333495209866306699082543465, time:1750766611.449527s req_ids:[8, 400] -DEBUG 06-24 20:03:31 [manager.py:391] -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:104.22706604003906ms total_cost_time:104.27021980285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5443 prompt_cache_len:5151 prompt_cache_ratio:0.9463531140914937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:115.49568176269531ms total_cost_time:115.53430557250977ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5400 prompt_cache_len:5145 prompt_cache_ratio:0.9527777777777777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 -DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:31 [batch.py:51] router release req id 8 -INFO 06-24 20:03:31 [batch.py:51] router release req id 400 -INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.06519579887390137 s -INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.04776334762573242 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.06674647331237793 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.05061507225036621 s -DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=21300543656571417567429651290240614390, time:1750766611.5809498s req_ids:[8, 400] -DEBUG 06-24 20:03:31 [manager.py:391] -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:145.11632919311523ms total_cost_time:145.16019821166992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5444 prompt_cache_len:5151 prompt_cache_ratio:0.9461792799412196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:127.64120101928711ms total_cost_time:127.66695022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5401 prompt_cache_len:5145 prompt_cache_ratio:0.952601370116645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 -DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:31 [batch.py:51] router release req id 8 -INFO 06-24 20:03:31 [batch.py:51] router release req id 400 -INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.20452284812927246 s -INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.20039844512939453 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.20624089241027832 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.20335030555725098 s -DEBUG 06-24 20:03:31 [manager.py:391] Prefill Batch: batch_id=20733769253323551879912948740146819659, time:1750766611.8724234s req_ids:[8, 400] -DEBUG 06-24 20:03:31 [manager.py:391] -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:282.98497200012207ms total_cost_time:283.02764892578125ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5445 prompt_cache_len:5151 prompt_cache_ratio:0.9460055096418732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:278.6588668823242ms total_cost_time:278.6848545074463ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5402 prompt_cache_len:5145 prompt_cache_ratio:0.9524250277674935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 -INFO 06-24 20:03:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 -DEBUG 06-24 20:03:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:31 [batch.py:51] router release req id 8 -INFO 06-24 20:03:31 [batch.py:51] router release req id 400 -INFO 06-24 20:03:31 [manager.py:224] router recive req id 8 cost time 0.0439298152923584 s -INFO 06-24 20:03:31 [manager.py:224] router recive req id 400 cost time 0.038420677185058594 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 8 cost time 0.046257734298706055 s -INFO 06-24 20:03:31 [manager.py:68] detokenization recv req id 400 cost time 0.04876399040222168 s -DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=337857081280152104816477595388679153727, time:1750766612.004838s req_ids:[8, 400] -DEBUG 06-24 20:03:32 [manager.py:391] -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:8 first_token_cost:112.43987083435059ms total_cost_time:112.48183250427246ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5446 prompt_cache_len:5151 prompt_cache_ratio:0.9458318031582813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:31 lightllm_req_id:400 first_token_cost:119.09031867980957ms total_cost_time:119.12846565246582ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5403 prompt_cache_len:5145 prompt_cache_ratio:0.9522487506940589 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 -DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:32 [batch.py:51] router release req id 8 -INFO 06-24 20:03:32 [batch.py:51] router release req id 400 -INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.058426618576049805 s -INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.047699689865112305 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.059938669204711914 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.05072283744812012 s -DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=228066695473364257078105924335940953061, time:1750766612.1377354s req_ids:[8, 400] -DEBUG 06-24 20:03:32 [manager.py:391] -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:136.1987590789795ms total_cost_time:136.25669479370117ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5447 prompt_cache_len:5151 prompt_cache_ratio:0.9456581604552965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:125.73456764221191ms total_cost_time:125.77176094055176ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:5404 prompt_cache_len:5145 prompt_cache_ratio:0.9520725388601037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 -DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:32 [batch.py:51] router release req id 8 -INFO 06-24 20:03:32 [batch.py:51] router release req id 400 -INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.04080080986022949 s -INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.03333711624145508 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.04228067398071289 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.036275625228881836 s -DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=247604284250055934613918963996553235052, time:1750766612.2668216s req_ids:[8, 400] -DEBUG 06-24 20:03:32 [manager.py:391] -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:104.88533973693848ms total_cost_time:104.92920875549316ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5448 prompt_cache_len:5151 prompt_cache_ratio:0.9454845814977973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:111.44757270812988ms total_cost_time:111.49168014526367ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5405 prompt_cache_len:5145 prompt_cache_ratio:0.9518963922294172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 -DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:32 [batch.py:51] router release req id 8 -INFO 06-24 20:03:32 [batch.py:51] router release req id 400 -INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.06156516075134277 s -INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.047151803970336914 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.06313347816467285 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.0499722957611084 s -DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=174429969657558533135303064167703084055, time:1750766612.3980412s req_ids:[8, 400] -DEBUG 06-24 20:03:32 [manager.py:391] -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:148.0262279510498ms total_cost_time:148.0691432952881ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5449 prompt_cache_len:5151 prompt_cache_ratio:0.9453110662506882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:145.70903778076172ms total_cost_time:145.75815200805664ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:5406 prompt_cache_len:5145 prompt_cache_ratio:0.9517203107658158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 -DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:32 [batch.py:51] router release req id 8 -INFO 06-24 20:03:32 [batch.py:51] router release req id 400 -INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.07125639915466309 s -INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.05977034568786621 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.0728905200958252 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.06276249885559082 s -DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=239972212520793441422113569732139637729, time:1750766612.5604982s req_ids:[8, 400] -DEBUG 06-24 20:03:32 [manager.py:391] -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:149.22571182250977ms total_cost_time:149.26958084106445ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5450 prompt_cache_len:5151 prompt_cache_ratio:0.9451376146788991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:151.5524387359619ms total_cost_time:151.60584449768066ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:5407 prompt_cache_len:5145 prompt_cache_ratio:0.9515442944331423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 -DEBUG 06-24 20:03:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:32 [batch.py:51] router release req id 8 -INFO 06-24 20:03:32 [batch.py:51] router release req id 400 -INFO 06-24 20:03:32 [manager.py:224] router recive req id 8 cost time 0.07585835456848145 s -INFO 06-24 20:03:32 [manager.py:224] router recive req id 400 cost time 0.06166410446166992 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 8 cost time 0.07739901542663574 s -INFO 06-24 20:03:32 [manager.py:68] detokenization recv req id 400 cost time 0.06461405754089355 s -DEBUG 06-24 20:03:32 [manager.py:391] Prefill Batch: batch_id=145266479497705978809128689037259388380, time:1750766612.7202573s req_ids:[8, 400] -DEBUG 06-24 20:03:32 [manager.py:391] -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:149.8281955718994ms total_cost_time:149.8720645904541ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5451 prompt_cache_len:5151 prompt_cache_ratio:0.9449642267473858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 -ERROR 06-24 20:03:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:155.55930137634277ms total_cost_time:155.61223030090332ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:5408 prompt_cache_len:5145 prompt_cache_ratio:0.9513683431952663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 -DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:33 [batch.py:51] router release req id 8 -INFO 06-24 20:03:33 [batch.py:51] router release req id 400 -INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.24834465980529785 s -INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.22836756706237793 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.2501695156097412 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.23155927658081055 s -DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=231897547603849720267055709208481616009, time:1750766613.0471766s req_ids:[8, 400] -DEBUG 06-24 20:03:33 [manager.py:391] -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:8 first_token_cost:341.75777435302734ms total_cost_time:341.80331230163574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5452 prompt_cache_len:5151 prompt_cache_ratio:0.9447909024211298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:32 lightllm_req_id:400 first_token_cost:321.9008445739746ms total_cost_time:321.9411373138428ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5409 prompt_cache_len:5145 prompt_cache_ratio:0.9511924570160843 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 -DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:33 [batch.py:51] router release req id 8 -INFO 06-24 20:03:33 [batch.py:51] router release req id 400 -INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.05870652198791504 s -INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.054108381271362305 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.06033134460449219 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.056955814361572266 s -DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=228439960587663879943879096787604973611, time:1750766613.20849s req_ids:[8, 400] -DEBUG 06-24 20:03:33 [manager.py:391] -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:151.4911651611328ms total_cost_time:151.5507698059082ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5453 prompt_cache_len:5151 prompt_cache_ratio:0.9446176416651385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:146.81506156921387ms total_cost_time:146.85988426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5410 prompt_cache_len:5145 prompt_cache_ratio:0.9510166358595195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 -DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:33 [batch.py:51] router release req id 8 -INFO 06-24 20:03:33 [batch.py:51] router release req id 400 -INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.05869030952453613 s -INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.05304574966430664 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.06087470054626465 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.057590484619140625 s -DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=58569480998902752068275225342265434688, time:1750766613.3693695s req_ids:[8, 400] -DEBUG 06-24 20:03:33 [manager.py:391] -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:144.19245719909668ms total_cost_time:144.23441886901855ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5454 prompt_cache_len:5151 prompt_cache_ratio:0.9444444444444444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:151.98588371276855ms total_cost_time:152.04191207885742ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:5411 prompt_cache_len:5145 prompt_cache_ratio:0.9508408796895214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 -DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:33 [batch.py:51] router release req id 8 -INFO 06-24 20:03:33 [batch.py:51] router release req id 400 -INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.07071161270141602 s -INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.057137489318847656 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.07224273681640625 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.05996823310852051 s -DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=236890264408260102127111188309508912438, time:1750766613.5314384s req_ids:[8, 400] -DEBUG 06-24 20:03:33 [manager.py:391] -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:146.73113822937012ms total_cost_time:146.7747688293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5455 prompt_cache_len:5151 prompt_cache_ratio:0.9442713107241063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:150.47836303710938ms total_cost_time:150.5270004272461ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:5412 prompt_cache_len:5145 prompt_cache_ratio:0.9506651884700665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 -DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:33 [batch.py:51] router release req id 8 -INFO 06-24 20:03:33 [batch.py:51] router release req id 400 -INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.07972478866577148 s -INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.062406301498413086 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.08193325996398926 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.06684494018554688 s -DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=303902784344703244612003560601594996873, time:1750766613.6913793s req_ids:[8, 400] -DEBUG 06-24 20:03:33 [manager.py:391] -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:151.5970230102539ms total_cost_time:151.6411304473877ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5456 prompt_cache_len:5151 prompt_cache_ratio:0.9440982404692082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:153.2003879547119ms total_cost_time:153.25331687927246ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:5413 prompt_cache_len:5145 prompt_cache_ratio:0.950489562165158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 -DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:33 [batch.py:51] router release req id 8 -INFO 06-24 20:03:33 [batch.py:51] router release req id 400 -INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.08222222328186035 s -INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.06279706954956055 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.08446049690246582 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.06739950180053711 s -DEBUG 06-24 20:03:33 [manager.py:391] Prefill Batch: batch_id=36138175624287798503941517702568473641, time:1750766613.8523045s req_ids:[8, 400] -DEBUG 06-24 20:03:33 [manager.py:391] -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:151.93939208984375ms total_cost_time:151.98349952697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5457 prompt_cache_len:5151 prompt_cache_ratio:0.9439252336448598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 -ERROR 06-24 20:03:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:156.13746643066406ms total_cost_time:156.1887264251709ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:5414 prompt_cache_len:5145 prompt_cache_ratio:0.9503140007388252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 -DEBUG 06-24 20:03:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:33 [batch.py:51] router release req id 8 -INFO 06-24 20:03:33 [batch.py:51] router release req id 400 -INFO 06-24 20:03:33 [manager.py:224] router recive req id 8 cost time 0.08626484870910645 s -INFO 06-24 20:03:33 [manager.py:224] router recive req id 400 cost time 0.0625617504119873 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 8 cost time 0.08783483505249023 s -INFO 06-24 20:03:33 [manager.py:68] detokenization recv req id 400 cost time 0.06550312042236328 s -DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=226418088539405689152697069365719665906, time:1750766614.0139081s req_ids:[8, 400] -DEBUG 06-24 20:03:34 [manager.py:391] -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:8 first_token_cost:150.85697174072266ms total_cost_time:150.89941024780273ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5458 prompt_cache_len:5151 prompt_cache_ratio:0.9437522902161964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:33 lightllm_req_id:400 first_token_cost:155.23576736450195ms total_cost_time:155.27749061584473ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5415 prompt_cache_len:5145 prompt_cache_ratio:0.9501385041551247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 -DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:34 [batch.py:51] router release req id 8 -INFO 06-24 20:03:34 [batch.py:51] router release req id 400 -INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.263899564743042 s -INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.23550772666931152 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.2656059265136719 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.2386183738708496 s -DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=329762443465025919669621080661930544679, time:1750766614.346624s req_ids:[8, 400] -DEBUG 06-24 20:03:34 [manager.py:391] -DEBUG 06-24 20:03:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 62610.791 tokens/s -DEBUG 06-24 20:03:34 [stats.py:37] Avg prompt tokens throughput: 62587.647 tokens/s -DEBUG 06-24 20:03:34 [stats.py:37] Avg generate tokens throughput: 23.144 tokens/s -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:349.78556632995605ms total_cost_time:349.895715713501ms,out_token_counter:1 mean_per_token_cost_time: 0.11014938354492188ms prompt_token_num:5459 prompt_cache_len:5151 prompt_cache_ratio:0.9435794101483789 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:325.7250785827637ms total_cost_time:325.7758617401123ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:5416 prompt_cache_len:5145 prompt_cache_ratio:0.9499630723781388 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 -DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:34 [batch.py:51] router release req id 8 -INFO 06-24 20:03:34 [batch.py:51] router release req id 400 -INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.06442093849182129 s -INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.05374312400817871 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.06600570678710938 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.05669116973876953 s -DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=195730936274349755573355273465168684015, time:1750766614.5051832s req_ids:[8, 400] -DEBUG 06-24 20:03:34 [manager.py:391] -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:144.65665817260742ms total_cost_time:144.6988582611084ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5460 prompt_cache_len:5151 prompt_cache_ratio:0.9434065934065934 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:150.8946418762207ms total_cost_time:150.94923973083496ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:5417 prompt_cache_len:5145 prompt_cache_ratio:0.9497877053719771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 -DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:34 [batch.py:51] router release req id 8 -INFO 06-24 20:03:34 [batch.py:51] router release req id 400 -INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.0860910415649414 s -INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.06761026382446289 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.0875704288482666 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.07041740417480469 s -DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=181565894098771917202476306658473994742, time:1750766614.6742842s req_ids:[8, 400] -DEBUG 06-24 20:03:34 [manager.py:391] -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:150.1750946044922ms total_cost_time:150.22039413452148ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5461 prompt_cache_len:5151 prompt_cache_ratio:0.943233839956052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:158.97560119628906ms total_cost_time:159.0280532836914ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:5418 prompt_cache_len:5145 prompt_cache_ratio:0.9496124031007752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 -DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:34 [batch.py:51] router release req id 8 -INFO 06-24 20:03:34 [batch.py:51] router release req id 400 -INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.09009885787963867 s -INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.0627443790435791 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.0922689437866211 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.06710219383239746 s -DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=120703852401807362131516768463402938812, time:1750766614.8360612s req_ids:[8, 400] -DEBUG 06-24 20:03:34 [manager.py:391] -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:151.49259567260742ms total_cost_time:151.5340805053711ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5462 prompt_cache_len:5151 prompt_cache_ratio:0.943061149761992 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 -ERROR 06-24 20:03:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:157.11140632629395ms total_cost_time:157.1497917175293ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5419 prompt_cache_len:5145 prompt_cache_ratio:0.9494371655286953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 -DEBUG 06-24 20:03:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:34 [batch.py:51] router release req id 8 -INFO 06-24 20:03:34 [batch.py:51] router release req id 400 -INFO 06-24 20:03:34 [manager.py:224] router recive req id 8 cost time 0.09524822235107422 s -INFO 06-24 20:03:34 [manager.py:224] router recive req id 400 cost time 0.06283330917358398 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 8 cost time 0.09691023826599121 s -INFO 06-24 20:03:34 [manager.py:68] detokenization recv req id 400 cost time 0.06598091125488281 s -DEBUG 06-24 20:03:34 [manager.py:391] Prefill Batch: batch_id=138612450913366828381385947570486151533, time:1750766614.9981973s req_ids:[8, 400] -DEBUG 06-24 20:03:34 [manager.py:391] -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:8 first_token_cost:190.29664993286133ms total_cost_time:190.338134765625ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5463 prompt_cache_len:5151 prompt_cache_ratio:0.942888522789676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:34 lightllm_req_id:400 first_token_cost:157.81736373901367ms total_cost_time:157.84311294555664ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5420 prompt_cache_len:5145 prompt_cache_ratio:0.9492619926199262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 -DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:35 [batch.py:51] router release req id 8 -INFO 06-24 20:03:35 [batch.py:51] router release req id 400 -INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.05727028846740723 s -INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.051714420318603516 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.0588831901550293 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.05479764938354492 s -DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=73263162190928750566617320477825427881, time:1750766615.1584284s req_ids:[8, 400] -DEBUG 06-24 20:03:35 [manager.py:391] -DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:151.34024620056152ms total_cost_time:151.38506889343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5464 prompt_cache_len:5151 prompt_cache_ratio:0.9427159590043924 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:145.8280086517334ms total_cost_time:145.8570957183838ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:5421 prompt_cache_len:5145 prompt_cache_ratio:0.9490868843386829 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 -DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:35 [batch.py:51] router release req id 8 -INFO 06-24 20:03:35 [batch.py:51] router release req id 400 -INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.05920004844665527 s -INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.054059743881225586 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.06071758270263672 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.05686688423156738 s -DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=316947990396636921151122982380940675288, time:1750766615.3188865s req_ids:[8, 400] -DEBUG 06-24 20:03:35 [manager.py:391] -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:146.7444896697998ms total_cost_time:146.7874050140381ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5465 prompt_cache_len:5151 prompt_cache_ratio:0.9425434583714547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:311.0802173614502ms total_cost_time:311.1400604248047ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5422 prompt_cache_len:5145 prompt_cache_ratio:0.948911840649207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 -DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:35 [batch.py:51] router release req id 8 -INFO 06-24 20:03:35 [batch.py:51] router release req id 400 -INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.21719074249267578 s -INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.04756736755371094 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.2189335823059082 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.05070018768310547 s -DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=9189670993887045612737822298060446211, time:1750766615.6310532s req_ids:[8, 400] -DEBUG 06-24 20:03:35 [manager.py:391] -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:298.3372211456299ms total_cost_time:298.3822822570801ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5466 prompt_cache_len:5151 prompt_cache_ratio:0.942371020856202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:128.6458969116211ms total_cost_time:128.67283821105957ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5423 prompt_cache_len:5145 prompt_cache_ratio:0.9487368615157662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 -DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:35 [batch.py:51] router release req id 8 -INFO 06-24 20:03:35 [batch.py:51] router release req id 400 -INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.04225039482116699 s -INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.03630948066711426 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.04439592361450195 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.04091238975524902 s -DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=51538974472427304713807193592513952491, time:1750766615.7622607s req_ids:[8, 400] -DEBUG 06-24 20:03:35 [manager.py:391] -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:96.61436080932617ms total_cost_time:96.65513038635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5467 prompt_cache_len:5151 prompt_cache_ratio:0.9421986464239985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:114.78972434997559ms total_cost_time:114.82858657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5424 prompt_cache_len:5145 prompt_cache_ratio:0.9485619469026548 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 -DEBUG 06-24 20:03:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:35 [batch.py:51] router release req id 8 -INFO 06-24 20:03:35 [batch.py:51] router release req id 400 -INFO 06-24 20:03:35 [manager.py:224] router recive req id 8 cost time 0.07087564468383789 s -INFO 06-24 20:03:35 [manager.py:224] router recive req id 400 cost time 0.047078847885131836 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 8 cost time 0.07310366630554199 s -INFO 06-24 20:03:35 [manager.py:68] detokenization recv req id 400 cost time 0.051564931869506836 s -DEBUG 06-24 20:03:35 [manager.py:391] Prefill Batch: batch_id=185429478749449503319306454850183956330, time:1750766615.891475s req_ids:[8, 400] -DEBUG 06-24 20:03:35 [manager.py:391] -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:148.2698917388916ms total_cost_time:148.31233024597168ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5468 prompt_cache_len:5151 prompt_cache_ratio:0.9420263350402341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:124.44233894348145ms total_cost_time:124.46975708007812ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5425 prompt_cache_len:5145 prompt_cache_ratio:0.9483870967741935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 -INFO 06-24 20:03:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 -DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:36 [batch.py:51] router release req id 8 -INFO 06-24 20:03:36 [batch.py:51] router release req id 400 -INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.04348111152648926 s -INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.03818321228027344 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.0456089973449707 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04280877113342285 s -DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=210000177241489038742211947974033471283, time:1750766616.0221558s req_ids:[8, 400] -DEBUG 06-24 20:03:36 [manager.py:391] -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:8 first_token_cost:97.84054756164551ms total_cost_time:97.88155555725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5469 prompt_cache_len:5151 prompt_cache_ratio:0.9418540866703237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:35 lightllm_req_id:400 first_token_cost:117.5074577331543ms total_cost_time:117.54536628723145ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5426 prompt_cache_len:5145 prompt_cache_ratio:0.948212311094729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 -DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:36 [batch.py:51] router release req id 8 -INFO 06-24 20:03:36 [batch.py:51] router release req id 400 -INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.07159090042114258 s -INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.0473637580871582 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.07374429702758789 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.051725149154663086 s -DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=162569698941648703211287072808277563915, time:1750766616.153649s req_ids:[8, 400] -DEBUG 06-24 20:03:36 [manager.py:391] -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:148.31829071044922ms total_cost_time:148.3612060546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5470 prompt_cache_len:5151 prompt_cache_ratio:0.9416819012797075 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:126.03974342346191ms total_cost_time:126.08981132507324ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:5427 prompt_cache_len:5145 prompt_cache_ratio:0.9480375898286346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 -DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:36 [batch.py:51] router release req id 8 -INFO 06-24 20:03:36 [batch.py:51] router release req id 400 -INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.2066042423248291 s -INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.1993255615234375 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.20892548561096191 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.20395541191101074 s -DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=130010109753787456219633016630110609604, time:1750766616.4430048s req_ids:[8, 400] -DEBUG 06-24 20:03:36 [manager.py:391] -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:286.9291305541992ms total_cost_time:286.9884967803955ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5471 prompt_cache_len:5151 prompt_cache_ratio:0.9415097788338512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:279.76536750793457ms total_cost_time:279.7966003417969ms,out_token_counter:1 mean_per_token_cost_time: 0.031232833862304688ms prompt_token_num:5428 prompt_cache_len:5145 prompt_cache_ratio:0.9478629329403095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 -DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:36 [batch.py:51] router release req id 8 -INFO 06-24 20:03:36 [batch.py:51] router release req id 400 -INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.04249286651611328 s -INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.03691506385803223 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.04454183578491211 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04138541221618652 s -DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=30932026737940502576981443611834848389, time:1750766616.5737557s req_ids:[8, 400] -DEBUG 06-24 20:03:36 [manager.py:391] -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:109.66634750366211ms total_cost_time:109.7111701965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5472 prompt_cache_len:5151 prompt_cache_ratio:0.9413377192982456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:115.7221794128418ms total_cost_time:115.76342582702637ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5429 prompt_cache_len:5145 prompt_cache_ratio:0.9476883403941794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 -DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:36 [batch.py:51] router release req id 8 -INFO 06-24 20:03:36 [batch.py:51] router release req id 400 -INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.05890941619873047 s -INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.0468592643737793 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.06042885780334473 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04991865158081055 s -DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=43002134030329428879573188245239996644, time:1750766616.7063823s req_ids:[8, 400] -DEBUG 06-24 20:03:36 [manager.py:391] -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:140.33913612365723ms total_cost_time:140.40088653564453ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5473 prompt_cache_len:5151 prompt_cache_ratio:0.9411657226384067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:128.58343124389648ms total_cost_time:128.62610816955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5430 prompt_cache_len:5145 prompt_cache_ratio:0.9475138121546961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 -DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:36 [batch.py:51] router release req id 8 -INFO 06-24 20:03:36 [batch.py:51] router release req id 400 -INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.043108463287353516 s -INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.03757143020629883 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.04462027549743652 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.04056262969970703 s -DEBUG 06-24 20:03:36 [manager.py:391] Prefill Batch: batch_id=56348947202715350430636309754883468755, time:1750766616.8402448s req_ids:[8, 400] -DEBUG 06-24 20:03:36 [manager.py:391] -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:104.10261154174805ms total_cost_time:104.1266918182373ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:5474 prompt_cache_len:5151 prompt_cache_ratio:0.9409937888198758 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 -ERROR 06-24 20:03:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:135.00475883483887ms total_cost_time:135.0269317626953ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:5431 prompt_cache_len:5145 prompt_cache_ratio:0.9473393481863377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 -DEBUG 06-24 20:03:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:36 [batch.py:51] router release req id 8 -INFO 06-24 20:03:36 [batch.py:51] router release req id 400 -INFO 06-24 20:03:36 [manager.py:224] router recive req id 8 cost time 0.10050511360168457 s -INFO 06-24 20:03:36 [manager.py:224] router recive req id 400 cost time 0.06466460227966309 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 8 cost time 0.1022646427154541 s -INFO 06-24 20:03:36 [manager.py:68] detokenization recv req id 400 cost time 0.06761837005615234 s -DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=48142095797838133697149078602854040769, time:1750766617.006159s req_ids:[8, 400] -DEBUG 06-24 20:03:37 [manager.py:391] -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:8 first_token_cost:196.36249542236328ms total_cost_time:196.38848304748535ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5475 prompt_cache_len:5151 prompt_cache_ratio:0.9408219178082192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:36 lightllm_req_id:400 first_token_cost:160.03799438476562ms total_cost_time:160.05229949951172ms,out_token_counter:1 mean_per_token_cost_time: 0.01430511474609375ms prompt_token_num:5432 prompt_cache_len:5145 prompt_cache_ratio:0.9471649484536082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 -DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:37 [batch.py:51] router release req id 8 -INFO 06-24 20:03:37 [batch.py:51] router release req id 400 -INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.06491494178771973 s -INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.06207680702209473 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.0670013427734375 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.06541562080383301 s -DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=49102353570290562279437135788005333577, time:1750766617.1713684s req_ids:[8, 400] -DEBUG 06-24 20:03:37 [manager.py:391] -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:150.54059028625488ms total_cost_time:150.56657791137695ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5476 prompt_cache_len:5151 prompt_cache_ratio:0.9406501095690285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:157.32622146606445ms total_cost_time:157.3467254638672ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:5433 prompt_cache_len:5145 prompt_cache_ratio:0.9469906129210381 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 -DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:37 [batch.py:51] router release req id 8 -INFO 06-24 20:03:37 [batch.py:51] router release req id 400 -INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.07412052154541016 s -INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.06517767906188965 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.07586669921875 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.06809163093566895 s -DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=158460628801869444412491162713163251356, time:1750766617.3367357s req_ids:[8, 400] -DEBUG 06-24 20:03:37 [manager.py:391] -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:149.97220039367676ms total_cost_time:149.99794960021973ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5477 prompt_cache_len:5151 prompt_cache_ratio:0.9404783640679204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:161.37051582336426ms total_cost_time:161.3912582397461ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:5434 prompt_cache_len:5145 prompt_cache_ratio:0.9468163415531836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 -DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:37 [batch.py:51] router release req id 8 -INFO 06-24 20:03:37 [batch.py:51] router release req id 400 -INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.267611026763916 s -INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.24760913848876953 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.2696850299835205 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.2509434223175049 s -DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=322497241890493419874620467040701848401, time:1750766617.6848779s req_ids:[8, 400] -DEBUG 06-24 20:03:37 [manager.py:391] -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:350.9180545806885ms total_cost_time:350.9635925292969ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5478 prompt_cache_len:5151 prompt_cache_ratio:0.9403066812705367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:343.2471752166748ms total_cost_time:343.28627586364746ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5435 prompt_cache_len:5145 prompt_cache_ratio:0.9466421343146274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 -DEBUG 06-24 20:03:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:37 [batch.py:51] router release req id 8 -INFO 06-24 20:03:37 [batch.py:51] router release req id 400 -INFO 06-24 20:03:37 [manager.py:224] router recive req id 8 cost time 0.07410836219787598 s -INFO 06-24 20:03:37 [manager.py:224] router recive req id 400 cost time 0.06276607513427734 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 8 cost time 0.07597732543945312 s -INFO 06-24 20:03:37 [manager.py:68] detokenization recv req id 400 cost time 0.06632637977600098 s -DEBUG 06-24 20:03:37 [manager.py:391] Prefill Batch: batch_id=94719355295057164348625767212695700820, time:1750766617.8478243s req_ids:[8, 400] -DEBUG 06-24 20:03:37 [manager.py:391] -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:149.43575859069824ms total_cost_time:149.48034286499023ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5479 prompt_cache_len:5151 prompt_cache_ratio:0.9401350611425443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 -ERROR 06-24 20:03:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:164.99781608581543ms total_cost_time:165.0540828704834ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:5436 prompt_cache_len:5145 prompt_cache_ratio:0.9464679911699779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 -DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:38 [batch.py:51] router release req id 8 -INFO 06-24 20:03:38 [batch.py:51] router release req id 400 -INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.10403227806091309 s -INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.0771474838256836 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.1065359115600586 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.08168745040893555 s -DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=169128198364472471035314465327019068800, time:1750766618.0340252s req_ids:[8, 400] -DEBUG 06-24 20:03:38 [manager.py:391] -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:8 first_token_cost:199.70464706420898ms total_cost_time:199.74923133850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5480 prompt_cache_len:5151 prompt_cache_ratio:0.939963503649635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:37 lightllm_req_id:400 first_token_cost:222.3520278930664ms total_cost_time:222.3968505859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5437 prompt_cache_len:5145 prompt_cache_ratio:0.9462939120838698 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 -DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:38 [batch.py:51] router release req id 8 -INFO 06-24 20:03:38 [batch.py:51] router release req id 400 -INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.11401057243347168 s -INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06454849243164062 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.11652994155883789 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06919145584106445 s -DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=40486159350295905578338550203234388261, time:1750766618.2483828s req_ids:[8, 400] -DEBUG 06-24 20:03:38 [manager.py:391] -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:200.3791332244873ms total_cost_time:200.425386428833ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5481 prompt_cache_len:5151 prompt_cache_ratio:0.939792008757526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:163.77830505371094ms total_cost_time:163.8178825378418ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:5438 prompt_cache_len:5145 prompt_cache_ratio:0.9461198970209636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 -DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:38 [batch.py:51] router release req id 8 -INFO 06-24 20:03:38 [batch.py:51] router release req id 400 -INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.07290983200073242 s -INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06025242805480957 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.07513570785522461 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06480908393859863 s -DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=301123155516952396020871412387181398298, time:1750766618.414426s req_ids:[8, 400] -DEBUG 06-24 20:03:38 [manager.py:391] -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:149.09625053405762ms total_cost_time:149.1401195526123ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5482 prompt_cache_len:5151 prompt_cache_ratio:0.9396205764319592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:153.53655815124512ms total_cost_time:153.57685089111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5439 prompt_cache_len:5145 prompt_cache_ratio:0.9459459459459459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 -DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:38 [batch.py:51] router release req id 8 -INFO 06-24 20:03:38 [batch.py:51] router release req id 400 -INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.0793769359588623 s -INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06363534927368164 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.08170771598815918 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06795048713684082 s -DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=116750067881656625557120399952146984721, time:1750766618.5762854s req_ids:[8, 400] -DEBUG 06-24 20:03:38 [manager.py:391] -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:150.56800842285156ms total_cost_time:150.61283111572266ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5483 prompt_cache_len:5151 prompt_cache_ratio:0.9394492066387015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:159.74164009094238ms total_cost_time:159.78264808654785ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5440 prompt_cache_len:5145 prompt_cache_ratio:0.9457720588235294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 -DEBUG 06-24 20:03:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:38 [batch.py:51] router release req id 8 -INFO 06-24 20:03:38 [batch.py:51] router release req id 400 -INFO 06-24 20:03:38 [manager.py:224] router recive req id 8 cost time 0.08829593658447266 s -INFO 06-24 20:03:38 [manager.py:224] router recive req id 400 cost time 0.06318473815917969 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 8 cost time 0.09060263633728027 s -INFO 06-24 20:03:38 [manager.py:68] detokenization recv req id 400 cost time 0.06763386726379395 s -DEBUG 06-24 20:03:38 [manager.py:391] Prefill Batch: batch_id=278416267166014473420255664567043084141, time:1750766618.7411492s req_ids:[8, 400] -DEBUG 06-24 20:03:38 [manager.py:391] -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:190.9964084625244ms total_cost_time:191.0405158996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5484 prompt_cache_len:5151 prompt_cache_ratio:0.9392778993435449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:165.68994522094727ms total_cost_time:165.71617126464844ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5441 prompt_cache_len:5145 prompt_cache_ratio:0.9455982356184525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 -INFO 06-24 20:03:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 -DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:39 [batch.py:51] router release req id 8 -INFO 06-24 20:03:39 [batch.py:51] router release req id 400 -INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.2627263069152832 s -INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.25689268112182617 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.2648928165435791 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.26111268997192383 s -DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=129706347939593724599986550451753117018, time:1750766619.1145344s req_ids:[8, 400] -DEBUG 06-24 20:03:39 [manager.py:391] -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:8 first_token_cost:355.96179962158203ms total_cost_time:356.0066223144531ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5485 prompt_cache_len:5151 prompt_cache_ratio:0.9391066545123062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:38 lightllm_req_id:400 first_token_cost:349.85852241516113ms total_cost_time:349.8833179473877ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5442 prompt_cache_len:5145 prompt_cache_ratio:0.9454244762954797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 -DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:39 [batch.py:51] router release req id 8 -INFO 06-24 20:03:39 [batch.py:51] router release req id 400 -INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.05801534652709961 s -INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.052634239196777344 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.05999279022216797 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.05617570877075195 s -DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=87146945186067301067864858556266369722, time:1750766619.275268s req_ids:[8, 400] -DEBUG 06-24 20:03:39 [manager.py:391] -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:146.03495597839355ms total_cost_time:146.0568904876709ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:5486 prompt_cache_len:5151 prompt_cache_ratio:0.9389354721108275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:154.07395362854004ms total_cost_time:154.0968418121338ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:5443 prompt_cache_len:5145 prompt_cache_ratio:0.9452507808194011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 -DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:39 [batch.py:51] router release req id 8 -INFO 06-24 20:03:39 [batch.py:51] router release req id 400 -INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.07618451118469238 s -INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.06422042846679688 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.07848453521728516 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.06868958473205566 s -DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=3134814338491348743743000294539162370, time:1750766619.4442506s req_ids:[8, 400] -DEBUG 06-24 20:03:39 [manager.py:391] -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:150.85291862487793ms total_cost_time:150.895357131958ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5487 prompt_cache_len:5151 prompt_cache_ratio:0.9387643521049754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:166.31817817687988ms total_cost_time:166.36157035827637ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5444 prompt_cache_len:5145 prompt_cache_ratio:0.945077149155033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 -DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:39 [batch.py:51] router release req id 8 -INFO 06-24 20:03:39 [batch.py:51] router release req id 400 -INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.08937716484069824 s -INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.06246542930603027 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.09199929237365723 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.06730175018310547 s -DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=6247217228465912397711997017917951727, time:1750766619.6143165s req_ids:[8, 400] -DEBUG 06-24 20:03:39 [manager.py:391] -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:152.59122848510742ms total_cost_time:152.63652801513672ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5488 prompt_cache_len:5151 prompt_cache_ratio:0.9385932944606414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:159.09409523010254ms total_cost_time:159.13748741149902ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5445 prompt_cache_len:5145 prompt_cache_ratio:0.9449035812672176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 -DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:39 [batch.py:51] router release req id 8 -INFO 06-24 20:03:39 [batch.py:51] router release req id 400 -INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.09504103660583496 s -INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.06266522407531738 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.09713959693908691 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.06722712516784668 s -DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=310611555034683849966821181303323645239, time:1750766619.7790356s req_ids:[8, 400] -DEBUG 06-24 20:03:39 [manager.py:391] -DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:190.79136848449707ms total_cost_time:190.83523750305176ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5489 prompt_cache_len:5151 prompt_cache_ratio:0.938422299143742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:158.48040580749512ms total_cost_time:158.5063934326172ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5446 prompt_cache_len:5145 prompt_cache_ratio:0.9447300771208226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:39 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 -INFO 06-24 20:03:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 -DEBUG 06-24 20:03:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:39 [batch.py:51] router release req id 8 -INFO 06-24 20:03:39 [batch.py:51] router release req id 400 -INFO 06-24 20:03:39 [manager.py:224] router recive req id 8 cost time 0.058948516845703125 s -INFO 06-24 20:03:39 [manager.py:224] router recive req id 400 cost time 0.053785085678100586 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 8 cost time 0.061005353927612305 s -INFO 06-24 20:03:39 [manager.py:68] detokenization recv req id 400 cost time 0.05808091163635254 s -DEBUG 06-24 20:03:39 [manager.py:391] Prefill Batch: batch_id=253867498428063382271975746685462740671, time:1750766619.9419792s req_ids:[8, 400] -DEBUG 06-24 20:03:39 [manager.py:391] -ERROR 06-24 20:03:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:8 first_token_cost:153.4113883972168ms total_cost_time:153.4566879272461ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5490 prompt_cache_len:5151 prompt_cache_ratio:0.9382513661202185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:39 lightllm_req_id:400 first_token_cost:148.25987815856934ms total_cost_time:148.2863426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5447 prompt_cache_len:5145 prompt_cache_ratio:0.9445566366807417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 -DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:40 [batch.py:51] router release req id 8 -INFO 06-24 20:03:40 [batch.py:51] router release req id 400 -INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.058005571365356445 s -INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.05278468132019043 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.05971884727478027 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.054704904556274414 s -DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=115700462757155041086192297804250255747, time:1750766620.1042194s req_ids:[8, 400] -DEBUG 06-24 20:03:40 [manager.py:391] -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:145.2500820159912ms total_cost_time:145.2944278717041ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5491 prompt_cache_len:5151 prompt_cache_ratio:0.9380804953560371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:298.1858253479004ms total_cost_time:298.2308864593506ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5448 prompt_cache_len:5145 prompt_cache_ratio:0.9443832599118943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 -DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:40 [batch.py:51] router release req id 8 -INFO 06-24 20:03:40 [batch.py:51] router release req id 400 -INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.20426201820373535 s -INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.04655003547668457 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.20604443550109863 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.0496368408203125 s -DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=149437139165406844475054096712746950522, time:1750766620.3992245s req_ids:[8, 400] -DEBUG 06-24 20:03:40 [manager.py:391] -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:281.9509506225586ms total_cost_time:281.994104385376ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5492 prompt_cache_len:5151 prompt_cache_ratio:0.9379096868171887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:124.15003776550293ms total_cost_time:124.1767406463623ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5449 prompt_cache_len:5145 prompt_cache_ratio:0.9442099467792255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 -DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:40 [batch.py:51] router release req id 8 -INFO 06-24 20:03:40 [batch.py:51] router release req id 400 -INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.04370570182800293 s -INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.03839445114135742 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.04548001289367676 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.04150676727294922 s -DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=192771443649093074155482288364382980342, time:1750766620.5299993s req_ids:[8, 400] -DEBUG 06-24 20:03:40 [manager.py:391] -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:112.8549575805664ms total_cost_time:112.90121078491211ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5493 prompt_cache_len:5151 prompt_cache_ratio:0.9377389404696886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:118.52025985717773ms total_cost_time:118.55673789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5450 prompt_cache_len:5145 prompt_cache_ratio:0.9440366972477064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 -DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:40 [batch.py:51] router release req id 8 -INFO 06-24 20:03:40 [batch.py:51] router release req id 400 -INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.05673837661743164 s -INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.046599388122558594 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.05822181701660156 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.0494847297668457 s -DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=17387028427260555587337005555667821935, time:1750766620.6615283s req_ids:[8, 400] -DEBUG 06-24 20:03:40 [manager.py:391] -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:134.05156135559082ms total_cost_time:134.0937614440918ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5494 prompt_cache_len:5151 prompt_cache_ratio:0.9375682562795777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:124.00150299072266ms total_cost_time:124.02820587158203ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5451 prompt_cache_len:5145 prompt_cache_ratio:0.9438635112823335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 -DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:40 [batch.py:51] router release req id 8 -INFO 06-24 20:03:40 [batch.py:51] router release req id 400 -INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.04505300521850586 s -INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.03983449935913086 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.04656481742858887 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.04274725914001465 s -DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=279914340483530208022775552399288758313, time:1750766620.7912085s req_ids:[8, 400] -DEBUG 06-24 20:03:40 [manager.py:391] -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:110.8083724975586ms total_cost_time:110.85247993469238ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5495 prompt_cache_len:5151 prompt_cache_ratio:0.9373976342129209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:116.74213409423828ms total_cost_time:116.78004264831543ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5452 prompt_cache_len:5145 prompt_cache_ratio:0.9436903888481292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 -DEBUG 06-24 20:03:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:40 [batch.py:51] router release req id 8 -INFO 06-24 20:03:40 [batch.py:51] router release req id 400 -INFO 06-24 20:03:40 [manager.py:224] router recive req id 8 cost time 0.05859804153442383 s -INFO 06-24 20:03:40 [manager.py:224] router recive req id 400 cost time 0.048033952713012695 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 8 cost time 0.06009221076965332 s -INFO 06-24 20:03:40 [manager.py:68] detokenization recv req id 400 cost time 0.05092620849609375 s -DEBUG 06-24 20:03:40 [manager.py:391] Prefill Batch: batch_id=317940517780281782685719143350097904372, time:1750766620.9211283s req_ids:[8, 400] -DEBUG 06-24 20:03:40 [manager.py:391] -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:135.80894470214844ms total_cost_time:135.85162162780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5496 prompt_cache_len:5151 prompt_cache_ratio:0.9372270742358079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:125.33068656921387ms total_cost_time:125.35691261291504ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5453 prompt_cache_len:5145 prompt_cache_ratio:0.9435173299101413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 -INFO 06-24 20:03:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 -DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:41 [batch.py:51] router release req id 8 -INFO 06-24 20:03:41 [batch.py:51] router release req id 400 -INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.20488452911376953 s -INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.19974970817565918 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.20643329620361328 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.2028522491455078 s -DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=222248072915643865879193887607851236332, time:1750766621.2138107s req_ids:[8, 400] -DEBUG 06-24 20:03:41 [manager.py:391] -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:8 first_token_cost:284.5888137817383ms total_cost_time:284.6338748931885ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5497 prompt_cache_len:5151 prompt_cache_ratio:0.9370565763143532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:40 lightllm_req_id:400 first_token_cost:279.52051162719727ms total_cost_time:279.54769134521484ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5454 prompt_cache_len:5145 prompt_cache_ratio:0.9433443344334433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 -DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:41 [batch.py:51] router release req id 8 -INFO 06-24 20:03:41 [batch.py:51] router release req id 400 -INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.04375576972961426 s -INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.03897881507873535 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.04532217979431152 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.042089223861694336 s -DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=319518911672266648223702621450027877125, time:1750766621.345943s req_ids:[8, 400] -DEBUG 06-24 20:03:41 [manager.py:391] -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:115.67831039428711ms total_cost_time:115.72122573852539ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5498 prompt_cache_len:5151 prompt_cache_ratio:0.9368861404146962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:121.96588516235352ms total_cost_time:122.00260162353516ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5455 prompt_cache_len:5145 prompt_cache_ratio:0.9431714023831348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 -DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:41 [batch.py:51] router release req id 8 -INFO 06-24 20:03:41 [batch.py:51] router release req id 400 -INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.056416988372802734 s -INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.04623913764953613 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.05796480178833008 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.049021005630493164 s -DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=278679238097332180285458105917212097973, time:1750766621.4798312s req_ids:[8, 400] -DEBUG 06-24 20:03:41 [manager.py:391] -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:135.40983200073242ms total_cost_time:135.4525089263916ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5499 prompt_cache_len:5151 prompt_cache_ratio:0.9367157665030006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:125.19407272338867ms total_cost_time:125.21886825561523ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:5456 prompt_cache_len:5145 prompt_cache_ratio:0.9429985337243402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 -DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:41 [batch.py:51] router release req id 8 -INFO 06-24 20:03:41 [batch.py:51] router release req id 400 -INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.04453921318054199 s -INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.039920806884765625 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.046036720275878906 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.04279613494873047 s -DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=56445385961245697677117280664771420070, time:1750766621.610605s req_ids:[8, 400] -DEBUG 06-24 20:03:41 [manager.py:391] -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:107.49530792236328ms total_cost_time:107.54084587097168ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5500 prompt_cache_len:5151 prompt_cache_ratio:0.9365454545454546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:118.26539039611816ms total_cost_time:118.3018684387207ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5457 prompt_cache_len:5145 prompt_cache_ratio:0.94282572842221 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 -DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:41 [batch.py:51] router release req id 8 -INFO 06-24 20:03:41 [batch.py:51] router release req id 400 -INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.06289219856262207 s -INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.04823446273803711 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.06427669525146484 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.05086827278137207 s -DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=325098990388853035416101800897035623329, time:1750766621.7430327s req_ids:[8, 400] -DEBUG 06-24 20:03:41 [manager.py:391] -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:143.16177368164062ms total_cost_time:143.2046890258789ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5501 prompt_cache_len:5151 prompt_cache_ratio:0.9363752045082713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:128.4658908843994ms total_cost_time:128.4935474395752ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:5458 prompt_cache_len:5145 prompt_cache_ratio:0.9426529864419201 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 -DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:41 [batch.py:51] router release req id 8 -INFO 06-24 20:03:41 [batch.py:51] router release req id 400 -INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.04438018798828125 s -INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.03922462463378906 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.04580497741699219 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.04224562644958496 s -DEBUG 06-24 20:03:41 [manager.py:391] Prefill Batch: batch_id=14635539580292310731747326689292859442, time:1750766621.8758655s req_ids:[8, 400] -DEBUG 06-24 20:03:41 [manager.py:391] -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:97.8844165802002ms total_cost_time:97.92876243591309ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5502 prompt_cache_len:5151 prompt_cache_ratio:0.9362050163576882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 -ERROR 06-24 20:03:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:117.2182559967041ms total_cost_time:117.25473403930664ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5459 prompt_cache_len:5145 prompt_cache_ratio:0.9424803077486719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 -DEBUG 06-24 20:03:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:41 [batch.py:51] router release req id 8 -INFO 06-24 20:03:41 [batch.py:51] router release req id 400 -INFO 06-24 20:03:41 [manager.py:224] router recive req id 8 cost time 0.0717918872833252 s -INFO 06-24 20:03:41 [manager.py:224] router recive req id 400 cost time 0.04829549789428711 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 8 cost time 0.07334518432617188 s -INFO 06-24 20:03:41 [manager.py:68] detokenization recv req id 400 cost time 0.051096200942993164 s -DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=1511357465828111383588353860684841422, time:1750766622.006007s req_ids:[8, 400] -DEBUG 06-24 20:03:42 [manager.py:391] -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:8 first_token_cost:146.93307876586914ms total_cost_time:146.97694778442383ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5503 prompt_cache_len:5151 prompt_cache_ratio:0.9360348900599673 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:41 lightllm_req_id:400 first_token_cost:125.76103210449219ms total_cost_time:125.79607963562012ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5460 prompt_cache_len:5145 prompt_cache_ratio:0.9423076923076923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 -DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:42 [batch.py:51] router release req id 8 -INFO 06-24 20:03:42 [batch.py:51] router release req id 400 -INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.21104717254638672 s -INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.20490598678588867 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.21272015571594238 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.20803308486938477 s -DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=90783295793158214268778335865938606617, time:1750766622.298684s req_ids:[8, 400] -DEBUG 06-24 20:03:42 [manager.py:391] -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:290.88377952575684ms total_cost_time:290.9262180328369ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5504 prompt_cache_len:5151 prompt_cache_ratio:0.9358648255813954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:284.6791744232178ms total_cost_time:284.70468521118164ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5461 prompt_cache_len:5145 prompt_cache_ratio:0.9421351400842336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 -DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:42 [batch.py:51] router release req id 8 -INFO 06-24 20:03:42 [batch.py:51] router release req id 400 -INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.04487919807434082 s -INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.03967595100402832 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.04643511772155762 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.04262709617614746 s -DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=187981577892784661875476715935144497840, time:1750766622.4318779s req_ids:[8, 400] -DEBUG 06-24 20:03:42 [manager.py:391] -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:102.94938087463379ms total_cost_time:102.99420356750488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5505 prompt_cache_len:5151 prompt_cache_ratio:0.9356948228882834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:117.93255805969238ms total_cost_time:117.96808242797852ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:5462 prompt_cache_len:5145 prompt_cache_ratio:0.9419626510435738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 -DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:42 [batch.py:51] router release req id 8 -INFO 06-24 20:03:42 [batch.py:51] router release req id 400 -INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.06805729866027832 s -INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.04794931411743164 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.06943297386169434 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.05077719688415527 s -DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=67192859898432663458793980400878882239, time:1750766622.5619147s req_ids:[8, 400] -DEBUG 06-24 20:03:42 [manager.py:391] -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:147.1996307373047ms total_cost_time:147.24349975585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5506 prompt_cache_len:5151 prompt_cache_ratio:0.9355248819469669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:127.28071212768555ms total_cost_time:127.30717658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5463 prompt_cache_len:5145 prompt_cache_ratio:0.941790225151016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 -DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:42 [batch.py:51] router release req id 8 -INFO 06-24 20:03:42 [batch.py:51] router release req id 400 -INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.0445404052734375 s -INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.03985881805419922 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.04602456092834473 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.04290008544921875 s -DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=119249942676460819860930712482683750717, time:1750766622.694668s req_ids:[8, 400] -DEBUG 06-24 20:03:42 [manager.py:391] -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:123.16679954528809ms total_cost_time:123.21114540100098ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5507 prompt_cache_len:5151 prompt_cache_ratio:0.9353550027238061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:118.57032775878906ms total_cost_time:118.59679222106934ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5464 prompt_cache_len:5145 prompt_cache_ratio:0.9416178623718887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 -DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:42 [batch.py:51] router release req id 8 -INFO 06-24 20:03:42 [batch.py:51] router release req id 400 -INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.04457402229309082 s -INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.03928971290588379 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.04608941078186035 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.04224729537963867 s -DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=86997408004472544127746862513653604578, time:1750766622.8263876s req_ids:[8, 400] -DEBUG 06-24 20:03:42 [manager.py:391] -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:116.52898788452148ms total_cost_time:116.57238006591797ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5508 prompt_cache_len:5151 prompt_cache_ratio:0.9351851851851852 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 -INFO 06-24 20:03:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:122.48420715332031ms total_cost_time:122.52163887023926ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5465 prompt_cache_len:5145 prompt_cache_ratio:0.9414455626715462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 -DEBUG 06-24 20:03:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:42 [batch.py:51] router release req id 8 -INFO 06-24 20:03:42 [batch.py:51] router release req id 400 -INFO 06-24 20:03:42 [manager.py:224] router recive req id 8 cost time 0.0525965690612793 s -INFO 06-24 20:03:42 [manager.py:224] router recive req id 400 cost time 0.04213285446166992 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 8 cost time 0.05419492721557617 s -INFO 06-24 20:03:42 [manager.py:68] detokenization recv req id 400 cost time 0.0451512336730957 s -DEBUG 06-24 20:03:42 [manager.py:391] Prefill Batch: batch_id=231356186703579210864944373469635620353, time:1750766622.9579062s req_ids:[8, 400] -DEBUG 06-24 20:03:42 [manager.py:391] -ERROR 06-24 20:03:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:8 first_token_cost:132.45058059692383ms total_cost_time:132.4927806854248ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5509 prompt_cache_len:5151 prompt_cache_ratio:0.9350154292975131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:42 lightllm_req_id:400 first_token_cost:121.94538116455078ms total_cost_time:121.97256088256836ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5466 prompt_cache_len:5145 prompt_cache_ratio:0.9412733260153677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 -DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:43 [batch.py:51] router release req id 8 -INFO 06-24 20:03:43 [batch.py:51] router release req id 400 -INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.04428219795227051 s -INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.03845548629760742 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.04574108123779297 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.04154348373413086 s -DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=186837258394108882879939874262131534433, time:1750766623.0881293s req_ids:[8, 400] -DEBUG 06-24 20:03:43 [manager.py:391] -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:111.21487617492676ms total_cost_time:111.26089096069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5510 prompt_cache_len:5151 prompt_cache_ratio:0.9348457350272232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:117.1731948852539ms total_cost_time:117.21158027648926ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5467 prompt_cache_len:5145 prompt_cache_ratio:0.941101152368758 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 -DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:43 [batch.py:51] router release req id 8 -INFO 06-24 20:03:43 [batch.py:51] router release req id 400 -INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.21978068351745605 s -INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.20865702629089355 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.22155237197875977 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.21177148818969727 s -DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=17604161524869871110427595730865289720, time:1750766623.3826926s req_ids:[8, 400] -DEBUG 06-24 20:03:43 [manager.py:391] -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:297.1522808074951ms total_cost_time:297.1968650817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5511 prompt_cache_len:5151 prompt_cache_ratio:0.934676102340773 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:290.67516326904297ms total_cost_time:290.73309898376465ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5468 prompt_cache_len:5145 prompt_cache_ratio:0.9409290416971471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 -DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:43 [batch.py:51] router release req id 8 -INFO 06-24 20:03:43 [batch.py:51] router release req id 400 -INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.05164051055908203 s -INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.04220080375671387 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.05293536186218262 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.04477095603942871 s -DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=312604985944213681506895109499589382257, time:1750766623.516448s req_ids:[8, 400] -DEBUG 06-24 20:03:43 [manager.py:391] -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:132.46941566467285ms total_cost_time:132.51352310180664ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5512 prompt_cache_len:5151 prompt_cache_ratio:0.9345065312046444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:122.98202514648438ms total_cost_time:123.00777435302734ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5469 prompt_cache_len:5145 prompt_cache_ratio:0.9407569939659901 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 -DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:43 [batch.py:51] router release req id 8 -INFO 06-24 20:03:43 [batch.py:51] router release req id 400 -INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.04400753974914551 s -INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.0388491153717041 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.04533267021179199 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.041558027267456055 s -DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=315082085694117032309544724066217879520, time:1750766623.6505597s req_ids:[8, 400] -DEBUG 06-24 20:03:43 [manager.py:391] -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:111.83452606201172ms total_cost_time:111.87887191772461ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5513 prompt_cache_len:5151 prompt_cache_ratio:0.9343370215853437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:119.6587085723877ms total_cost_time:119.69518661499023ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5470 prompt_cache_len:5145 prompt_cache_ratio:0.9405850091407678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 -DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:43 [batch.py:51] router release req id 8 -INFO 06-24 20:03:43 [batch.py:51] router release req id 400 -INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.06146049499511719 s -INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.049262285232543945 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.06301474571228027 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.05216240882873535 s -DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=78055535892047011246499798760042306458, time:1750766623.785757s req_ids:[8, 400] -DEBUG 06-24 20:03:43 [manager.py:391] -DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:144.48142051696777ms total_cost_time:144.52433586120605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5514 prompt_cache_len:5151 prompt_cache_ratio:0.9341675734494015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:132.27033615112305ms total_cost_time:132.29703903198242ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5471 prompt_cache_len:5145 prompt_cache_ratio:0.9404130871869859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 -DEBUG 06-24 20:03:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:43 [batch.py:51] router release req id 8 -INFO 06-24 20:03:43 [batch.py:51] router release req id 400 -INFO 06-24 20:03:43 [manager.py:224] router recive req id 8 cost time 0.049451589584350586 s -INFO 06-24 20:03:43 [manager.py:224] router recive req id 400 cost time 0.043874263763427734 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 8 cost time 0.05081796646118164 s -INFO 06-24 20:03:43 [manager.py:68] detokenization recv req id 400 cost time 0.04680299758911133 s -DEBUG 06-24 20:03:43 [manager.py:391] Prefill Batch: batch_id=322251915806443079126708697199415980913, time:1750766623.9265716s req_ids:[8, 400] -DEBUG 06-24 20:03:43 [manager.py:391] -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:131.2546730041504ms total_cost_time:131.3004493713379ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5515 prompt_cache_len:5151 prompt_cache_ratio:0.9339981867633727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:400 first_token_cost:125.88262557983398ms total_cost_time:125.90956687927246ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:5472 prompt_cache_len:5145 prompt_cache_ratio:0.9402412280701754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 -DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:44 [batch.py:51] router release req id 8 -INFO 06-24 20:03:44 [batch.py:51] router release req id 400 -INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.04454922676086426 s -INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.03897547721862793 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.045972347259521484 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04199409484863281 s -DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=299495826279835281760745670739021474310, time:1750766624.0616508s req_ids:[8, 400] -DEBUG 06-24 20:03:44 [manager.py:391] -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:43 lightllm_req_id:8 first_token_cost:113.20281028747559ms total_cost_time:113.24453353881836ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5516 prompt_cache_len:5151 prompt_cache_ratio:0.9338288614938361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:119.94528770446777ms total_cost_time:119.98271942138672ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5473 prompt_cache_len:5145 prompt_cache_ratio:0.9400694317558925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 -DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:44 [batch.py:51] router release req id 8 -INFO 06-24 20:03:44 [batch.py:51] router release req id 400 -INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.0609588623046875 s -INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04912614822387695 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.06236624717712402 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.05178523063659668 s -DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=116432272721049931657962064873459148633, time:1750766624.1966136s req_ids:[8, 400] -DEBUG 06-24 20:03:44 [manager.py:391] -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:149.01041984558105ms total_cost_time:149.05261993408203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5517 prompt_cache_len:5151 prompt_cache_ratio:0.9336595976073954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:03:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 63017.683 tokens/s -DEBUG 06-24 20:03:44 [stats.py:37] Avg prompt tokens throughput: 62994.439 tokens/s -DEBUG 06-24 20:03:44 [stats.py:37] Avg generate tokens throughput: 23.244 tokens/s -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:288.29431533813477ms total_cost_time:288.33794593811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5474 prompt_cache_len:5145 prompt_cache_ratio:0.9398976982097187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 -DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:44 [batch.py:51] router release req id 8 -INFO 06-24 20:03:44 [batch.py:51] router release req id 400 -INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.1996145248413086 s -INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04862332344055176 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.20099687576293945 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.05128026008605957 s -DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=304880668639142176118440230530898120204, time:1750766624.4889433s req_ids:[8, 400] -DEBUG 06-24 20:03:44 [manager.py:391] -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:297.84703254699707ms total_cost_time:297.89066314697266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5518 prompt_cache_len:5151 prompt_cache_ratio:0.9334903950706778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:180.69219589233398ms total_cost_time:180.73153495788574ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5475 prompt_cache_len:5145 prompt_cache_ratio:0.9397260273972603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 -DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:44 [batch.py:51] router release req id 8 -INFO 06-24 20:03:44 [batch.py:51] router release req id 400 -INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.045824527740478516 s -INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04057025909423828 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.04743218421936035 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04332256317138672 s -DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=83522112589896216874883248937495033633, time:1750766624.6760492s req_ids:[8, 400] -DEBUG 06-24 20:03:44 [manager.py:391] -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:126.85799598693848ms total_cost_time:126.90234184265137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5519 prompt_cache_len:5151 prompt_cache_ratio:0.9333212538503352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:121.47402763366699ms total_cost_time:121.50073051452637ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5476 prompt_cache_len:5145 prompt_cache_ratio:0.9395544192841491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 -DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:44 [batch.py:51] router release req id 8 -INFO 06-24 20:03:44 [batch.py:51] router release req id 400 -INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.043167829513549805 s -INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.037203073501586914 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.04454374313354492 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04012250900268555 s -DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=197995134768779456357079816216559976218, time:1750766624.8096952s req_ids:[8, 400] -DEBUG 06-24 20:03:44 [manager.py:391] -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:114.8991584777832ms total_cost_time:114.9437427520752ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5520 prompt_cache_len:5151 prompt_cache_ratio:0.9331521739130435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:120.85151672363281ms total_cost_time:120.88847160339355ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5477 prompt_cache_len:5145 prompt_cache_ratio:0.9393828738360416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 -DEBUG 06-24 20:03:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:44 [batch.py:51] router release req id 8 -INFO 06-24 20:03:44 [batch.py:51] router release req id 400 -INFO 06-24 20:03:44 [manager.py:224] router recive req id 8 cost time 0.05646634101867676 s -INFO 06-24 20:03:44 [manager.py:224] router recive req id 400 cost time 0.04578828811645508 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 8 cost time 0.057877540588378906 s -INFO 06-24 20:03:44 [manager.py:68] detokenization recv req id 400 cost time 0.04868459701538086 s -DEBUG 06-24 20:03:44 [manager.py:391] Prefill Batch: batch_id=331227107796104284259476061553842188105, time:1750766624.9439874s req_ids:[8, 400] -DEBUG 06-24 20:03:44 [manager.py:391] -ERROR 06-24 20:03:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:8 first_token_cost:138.11302185058594ms total_cost_time:138.15736770629883ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5521 prompt_cache_len:5151 prompt_cache_ratio:0.9329831552255027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:44 lightllm_req_id:400 first_token_cost:127.67314910888672ms total_cost_time:127.7010440826416ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:5478 prompt_cache_len:5145 prompt_cache_ratio:0.93921139101862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 -DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:45 [batch.py:51] router release req id 8 -INFO 06-24 20:03:45 [batch.py:51] router release req id 400 -INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.0426783561706543 s -INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.0374298095703125 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.04419422149658203 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.04030585289001465 s -DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=247484327087147223817135377377494183167, time:1750766625.0761447s req_ids:[8, 400] -DEBUG 06-24 20:03:45 [manager.py:391] -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:102.44369506835938ms total_cost_time:102.48780250549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5522 prompt_cache_len:5151 prompt_cache_ratio:0.9328141977544367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:115.73505401611328ms total_cost_time:115.77391624450684ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:5479 prompt_cache_len:5145 prompt_cache_ratio:0.9390399707975908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 -DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:45 [batch.py:51] router release req id 8 -INFO 06-24 20:03:45 [batch.py:51] router release req id 400 -INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.22487568855285645 s -INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.2073667049407959 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.22641658782958984 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.21023082733154297 s -DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=153907764573784560787405508689444493967, time:1750766625.3675983s req_ids:[8, 400] -DEBUG 06-24 20:03:45 [manager.py:391] -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:297.96576499938965ms total_cost_time:298.01130294799805ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5523 prompt_cache_len:5151 prompt_cache_ratio:0.9326453014665942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:292.02842712402344ms total_cost_time:292.0670509338379ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5480 prompt_cache_len:5145 prompt_cache_ratio:0.9388686131386861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 -DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:45 [batch.py:51] router release req id 8 -INFO 06-24 20:03:45 [batch.py:51] router release req id 400 -INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.0554201602935791 s -INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.0442962646484375 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.05676436424255371 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.04711747169494629 s -DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=187434277561789540605526981935759976782, time:1750766625.501658s req_ids:[8, 400] -DEBUG 06-24 20:03:45 [manager.py:391] -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:137.85171508789062ms total_cost_time:137.8943920135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5524 prompt_cache_len:5151 prompt_cache_ratio:0.9324764663287473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:126.92117691040039ms total_cost_time:126.94644927978516ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5481 prompt_cache_len:5145 prompt_cache_ratio:0.9386973180076629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 -DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:45 [batch.py:51] router release req id 8 -INFO 06-24 20:03:45 [batch.py:51] router release req id 400 -INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.04479622840881348 s -INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.039304494857788086 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.04622650146484375 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.0420534610748291 s -DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=264308891410722713848022818762895973215, time:1750766625.6364613s req_ids:[8, 400] -DEBUG 06-24 20:03:45 [manager.py:391] -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:107.63239860534668ms total_cost_time:107.67745971679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5525 prompt_cache_len:5151 prompt_cache_ratio:0.9323076923076923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:119.30274963378906ms total_cost_time:119.34113502502441ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:5482 prompt_cache_len:5145 prompt_cache_ratio:0.9385260853703028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 -DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:45 [batch.py:51] router release req id 8 -INFO 06-24 20:03:45 [batch.py:51] router release req id 400 -INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.06586790084838867 s -INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.04921102523803711 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.06728625297546387 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.05193519592285156 s -DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=18691014356808507469132335014769692393, time:1750766625.771753s req_ids:[8, 400] -DEBUG 06-24 20:03:45 [manager.py:391] -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:148.7126350402832ms total_cost_time:148.7562656402588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5526 prompt_cache_len:5151 prompt_cache_ratio:0.9321389793702497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:132.02452659606934ms total_cost_time:132.05265998840332ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:5483 prompt_cache_len:5145 prompt_cache_ratio:0.9383549151924129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 -DEBUG 06-24 20:03:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:45 [batch.py:51] router release req id 8 -INFO 06-24 20:03:45 [batch.py:51] router release req id 400 -INFO 06-24 20:03:45 [manager.py:224] router recive req id 8 cost time 0.04455828666687012 s -INFO 06-24 20:03:45 [manager.py:224] router recive req id 400 cost time 0.03950905799865723 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 8 cost time 0.04588031768798828 s -INFO 06-24 20:03:45 [manager.py:68] detokenization recv req id 400 cost time 0.04236721992492676 s -DEBUG 06-24 20:03:45 [manager.py:391] Prefill Batch: batch_id=12676110873694008020192149709738807323, time:1750766625.9051952s req_ids:[8, 400] -DEBUG 06-24 20:03:45 [manager.py:391] -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:125.11587142944336ms total_cost_time:125.15950202941895ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5527 prompt_cache_len:5151 prompt_cache_ratio:0.931970327483264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:120.25666236877441ms total_cost_time:120.28217315673828ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5484 prompt_cache_len:5145 prompt_cache_ratio:0.938183807439825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 -INFO 06-24 20:03:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 -DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:46 [batch.py:51] router release req id 8 -INFO 06-24 20:03:46 [batch.py:51] router release req id 400 -INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.04464387893676758 s -INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.039578914642333984 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.046161651611328125 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.042708396911621094 s -DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=201556259165941510903643945325035808616, time:1750766626.0400708s req_ids:[8, 400] -DEBUG 06-24 20:03:46 [manager.py:391] -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:8 first_token_cost:115.11421203613281ms total_cost_time:115.1571273803711ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5528 prompt_cache_len:5151 prompt_cache_ratio:0.9318017366136034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:45 lightllm_req_id:400 first_token_cost:121.15693092346191ms total_cost_time:121.19293212890625ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5485 prompt_cache_len:5145 prompt_cache_ratio:0.9380127620783957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 -DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:46 [batch.py:51] router release req id 8 -INFO 06-24 20:03:46 [batch.py:51] router release req id 400 -INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.05908489227294922 s -INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.04814624786376953 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.060518741607666016 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.05104660987854004 s -DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=253770962005398030300629816593420802149, time:1750766626.1750672s req_ids:[8, 400] -DEBUG 06-24 20:03:46 [manager.py:391] -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:140.1996612548828ms total_cost_time:140.2437686920166ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5529 prompt_cache_len:5151 prompt_cache_ratio:0.9316332067281606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:129.44293022155762ms total_cost_time:129.4689178466797ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5486 prompt_cache_len:5145 prompt_cache_ratio:0.9378417790740066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 -INFO 06-24 20:03:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:46 [batch.py:51] router release req id 8 -INFO 06-24 20:03:46 [batch.py:51] router release req id 400 -INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.20795464515686035 s -INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.20274591445922852 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.20946216583251953 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.2056889533996582 s -DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=156680837650655014671997852344688153877, time:1750766626.4721198s req_ids:[8, 400] -DEBUG 06-24 20:03:46 [manager.py:391] -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:288.5935306549072ms total_cost_time:288.6366844177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5530 prompt_cache_len:5151 prompt_cache_ratio:0.9314647377938517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:283.2956314086914ms total_cost_time:283.3213806152344ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5487 prompt_cache_len:5145 prompt_cache_ratio:0.9376708583925643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 -DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:46 [batch.py:51] router release req id 8 -INFO 06-24 20:03:46 [batch.py:51] router release req id 400 -INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.04379010200500488 s -INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.03882551193237305 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.04519939422607422 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.04165530204772949 s -DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=133392077922077514522657980236303511853, time:1750766626.6053221s req_ids:[8, 400] -DEBUG 06-24 20:03:46 [manager.py:391] -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:107.7430248260498ms total_cost_time:107.7883243560791ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5531 prompt_cache_len:5151 prompt_cache_ratio:0.9312963297776171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:117.47455596923828ms total_cost_time:117.51222610473633ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:5488 prompt_cache_len:5145 prompt_cache_ratio:0.9375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 -DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:46 [batch.py:51] router release req id 8 -INFO 06-24 20:03:46 [batch.py:51] router release req id 400 -INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.06258964538574219 s -INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.048950910568237305 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.06399869918823242 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.05165672302246094 s -DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=101931795556715982969217131548027683321, time:1750766626.7395117s req_ids:[8, 400] -DEBUG 06-24 20:03:46 [manager.py:391] -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:144.5009708404541ms total_cost_time:144.5446014404297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5532 prompt_cache_len:5151 prompt_cache_ratio:0.9311279826464208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:130.81812858581543ms total_cost_time:130.8448314666748ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5489 prompt_cache_len:5145 prompt_cache_ratio:0.93732920386227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 -DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:46 [batch.py:51] router release req id 8 -INFO 06-24 20:03:46 [batch.py:51] router release req id 400 -INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.04419112205505371 s -INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.03883171081542969 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.04554915428161621 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.041629791259765625 s -DEBUG 06-24 20:03:46 [manager.py:391] Prefill Batch: batch_id=314138048499919673355402826676390677180, time:1750766626.8723164s req_ids:[8, 400] -DEBUG 06-24 20:03:46 [manager.py:391] -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:98.74296188354492ms total_cost_time:98.785400390625ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5533 prompt_cache_len:5151 prompt_cache_ratio:0.9309596963672511 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 -ERROR 06-24 20:03:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:118.4380054473877ms total_cost_time:118.47472190856934ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5490 prompt_cache_len:5145 prompt_cache_ratio:0.9371584699453552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 -DEBUG 06-24 20:03:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:46 [batch.py:51] router release req id 8 -INFO 06-24 20:03:46 [batch.py:51] router release req id 400 -INFO 06-24 20:03:46 [manager.py:224] router recive req id 8 cost time 0.07383012771606445 s -INFO 06-24 20:03:46 [manager.py:224] router recive req id 400 cost time 0.04933571815490723 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 8 cost time 0.07524251937866211 s -INFO 06-24 20:03:46 [manager.py:68] detokenization recv req id 400 cost time 0.05212998390197754 s -DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=310806778698467022174868719174041794873, time:1750766627.0079288s req_ids:[8, 400] -DEBUG 06-24 20:03:47 [manager.py:391] -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:8 first_token_cost:147.68552780151367ms total_cost_time:147.72582054138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5534 prompt_cache_len:5151 prompt_cache_ratio:0.9307914709071197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:46 lightllm_req_id:400 first_token_cost:133.26168060302734ms total_cost_time:133.2986354827881ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:5491 prompt_cache_len:5145 prompt_cache_ratio:0.9369877982152613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 -DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:47 [batch.py:51] router release req id 8 -INFO 06-24 20:03:47 [batch.py:51] router release req id 400 -INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.05562281608581543 s -INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.04543948173522949 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.057027578353881836 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.04819226264953613 s -DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=334942273163944649720874485700909627920, time:1750766627.141425s req_ids:[8, 400] -DEBUG 06-24 20:03:47 [manager.py:391] -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:136.88945770263672ms total_cost_time:136.9326114654541ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5535 prompt_cache_len:5151 prompt_cache_ratio:0.9306233062330623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:126.69157981872559ms total_cost_time:126.71780586242676ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:5492 prompt_cache_len:5145 prompt_cache_ratio:0.9368171886380189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 -DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:47 [batch.py:51] router release req id 8 -INFO 06-24 20:03:47 [batch.py:51] router release req id 400 -INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.04271697998046875 s -INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.03731036186218262 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.04421520233154297 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.04021120071411133 s -DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=53401666113286143618847854401605892820, time:1750766627.2767801s req_ids:[8, 400] -DEBUG 06-24 20:03:47 [manager.py:391] -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:104.56609725952148ms total_cost_time:104.61068153381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5536 prompt_cache_len:5151 prompt_cache_ratio:0.9304552023121387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:118.47949028015137ms total_cost_time:118.51763725280762ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5493 prompt_cache_len:5145 prompt_cache_ratio:0.9366466411796832 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 -DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:47 [batch.py:51] router release req id 8 -INFO 06-24 20:03:47 [batch.py:51] router release req id 400 -INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.22628259658813477 s -INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.20778775215148926 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.22774934768676758 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.21056890487670898 s -DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=286394626559695203673277923630421906574, time:1750766627.5705795s req_ids:[8, 400] -DEBUG 06-24 20:03:47 [manager.py:391] -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:299.0703582763672ms total_cost_time:299.11208152770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5537 prompt_cache_len:5151 prompt_cache_ratio:0.9302871591114322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:291.6295528411865ms total_cost_time:291.6688919067383ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5494 prompt_cache_len:5145 prompt_cache_ratio:0.9364761558063341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 -DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:47 [batch.py:51] router release req id 8 -INFO 06-24 20:03:47 [batch.py:51] router release req id 400 -INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.05706977844238281 s -INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.04649829864501953 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.05867171287536621 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.049718379974365234 s -DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=320127671036857911014605057862772085025, time:1750766627.7046661s req_ids:[8, 400] -DEBUG 06-24 20:03:47 [manager.py:391] -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:138.3826732635498ms total_cost_time:138.4265422821045ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5538 prompt_cache_len:5151 prompt_cache_ratio:0.9301191765980499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:127.88009643554688ms total_cost_time:127.90536880493164ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5495 prompt_cache_len:5145 prompt_cache_ratio:0.9363057324840764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 -DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:47 [batch.py:51] router release req id 8 -INFO 06-24 20:03:47 [batch.py:51] router release req id 400 -INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.044013023376464844 s -INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.03803229331970215 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.045525550842285156 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.04123520851135254 s -DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=12086117763728895102413116460746267287, time:1750766627.8372653s req_ids:[8, 400] -DEBUG 06-24 20:03:47 [manager.py:391] -DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:105.90934753417969ms total_cost_time:105.95273971557617ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5539 prompt_cache_len:5151 prompt_cache_ratio:0.9299512547391225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 -ERROR 06-24 20:03:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:118.24178695678711ms total_cost_time:118.27921867370605ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5496 prompt_cache_len:5145 prompt_cache_ratio:0.9361353711790393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 -DEBUG 06-24 20:03:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:47 [batch.py:51] router release req id 8 -INFO 06-24 20:03:47 [batch.py:51] router release req id 400 -INFO 06-24 20:03:47 [manager.py:224] router recive req id 8 cost time 0.06685781478881836 s -INFO 06-24 20:03:47 [manager.py:224] router recive req id 400 cost time 0.04898667335510254 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 8 cost time 0.06833100318908691 s -INFO 06-24 20:03:47 [manager.py:68] detokenization recv req id 400 cost time 0.051708221435546875 s -DEBUG 06-24 20:03:47 [manager.py:391] Prefill Batch: batch_id=120041866166639342335179457361542587693, time:1750766627.9714532s req_ids:[8, 400] -DEBUG 06-24 20:03:47 [manager.py:391] -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:8 first_token_cost:147.21965789794922ms total_cost_time:147.2647190093994ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5540 prompt_cache_len:5151 prompt_cache_ratio:0.9297833935018051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:47 lightllm_req_id:400 first_token_cost:129.29749488830566ms total_cost_time:129.32467460632324ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:5497 prompt_cache_len:5145 prompt_cache_ratio:0.9359650718573768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 -DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:48 [batch.py:51] router release req id 8 -INFO 06-24 20:03:48 [batch.py:51] router release req id 400 -INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.04334235191345215 s -INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.03780770301818848 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.04509329795837402 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.04091644287109375 s -DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=221936066483979035956775190440768114010, time:1750766628.1051788s req_ids:[8, 400] -DEBUG 06-24 20:03:48 [manager.py:391] -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:95.16119956970215ms total_cost_time:95.20530700683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5541 prompt_cache_len:5151 prompt_cache_ratio:0.9296155928532756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:115.97657203674316ms total_cost_time:116.01519584655762ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5498 prompt_cache_len:5145 prompt_cache_ratio:0.9357948344852673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 -DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:48 [batch.py:51] router release req id 8 -INFO 06-24 20:03:48 [batch.py:51] router release req id 400 -INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.07483887672424316 s -INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.048903703689575195 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.07623744010925293 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.051641225814819336 s -DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=152322144279589248641743862674881001419, time:1750766628.2377915s req_ids:[8, 400] -DEBUG 06-24 20:03:48 [manager.py:391] -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:148.7255096435547ms total_cost_time:148.76723289489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5542 prompt_cache_len:5151 prompt_cache_ratio:0.9294478527607362 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:134.01174545288086ms total_cost_time:134.0482234954834ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:5499 prompt_cache_len:5145 prompt_cache_ratio:0.9356246590289143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 -DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:48 [batch.py:51] router release req id 8 -INFO 06-24 20:03:48 [batch.py:51] router release req id 400 -INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.05364990234375 s -INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.042586326599121094 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.055147409439086914 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.04562711715698242 s -DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=215269824782322251363880896491424866789, time:1750766628.3708467s req_ids:[8, 400] -DEBUG 06-24 20:03:48 [manager.py:391] -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:147.68671989440918ms total_cost_time:147.72748947143555ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5543 prompt_cache_len:5151 prompt_cache_ratio:0.9292801731914125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:283.672571182251ms total_cost_time:283.71644020080566ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5500 prompt_cache_len:5145 prompt_cache_ratio:0.9354545454545454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 -DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:48 [batch.py:51] router release req id 8 -INFO 06-24 20:03:48 [batch.py:51] router release req id 400 -INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.1965477466583252 s -INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.04872417449951172 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.19802474975585938 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.05181384086608887 s -DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=318372952371553185847151775354030162498, time:1750766628.6667292s req_ids:[8, 400] -DEBUG 06-24 20:03:48 [manager.py:391] -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:279.89864349365234ms total_cost_time:279.94251251220703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5544 prompt_cache_len:5151 prompt_cache_ratio:0.9291125541125541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:132.14111328125ms total_cost_time:132.16710090637207ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5501 prompt_cache_len:5145 prompt_cache_ratio:0.935284493728413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 -DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:48 [batch.py:51] router release req id 8 -INFO 06-24 20:03:48 [batch.py:51] router release req id 400 -INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.04356694221496582 s -INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.03825020790100098 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.04512166976928711 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.04124569892883301 s -DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=296582109010489214940315681468494418463, time:1750766628.802788s req_ids:[8, 400] -DEBUG 06-24 20:03:48 [manager.py:391] -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:113.4040355682373ms total_cost_time:113.46554756164551ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:5545 prompt_cache_len:5151 prompt_cache_ratio:0.9289449954914337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:124.0847110748291ms total_cost_time:124.12452697753906ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:5502 prompt_cache_len:5145 prompt_cache_ratio:0.9351145038167938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 -DEBUG 06-24 20:03:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:48 [batch.py:51] router release req id 8 -INFO 06-24 20:03:48 [batch.py:51] router release req id 400 -INFO 06-24 20:03:48 [manager.py:224] router recive req id 8 cost time 0.06295490264892578 s -INFO 06-24 20:03:48 [manager.py:224] router recive req id 400 cost time 0.04826617240905762 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 8 cost time 0.0644371509552002 s -INFO 06-24 20:03:48 [manager.py:68] detokenization recv req id 400 cost time 0.05129122734069824 s -DEBUG 06-24 20:03:48 [manager.py:391] Prefill Batch: batch_id=79803719322006594055946657073260988836, time:1750766628.9417806s req_ids:[8, 400] -DEBUG 06-24 20:03:48 [manager.py:391] -ERROR 06-24 20:03:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:8 first_token_cost:144.4876194000244ms total_cost_time:144.54078674316406ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:5546 prompt_cache_len:5151 prompt_cache_ratio:0.928777497295348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:48 lightllm_req_id:400 first_token_cost:130.08546829223633ms total_cost_time:130.12409210205078ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:5503 prompt_cache_len:5145 prompt_cache_ratio:0.9349445756859894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 -DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:49 [batch.py:51] router release req id 8 -INFO 06-24 20:03:49 [batch.py:51] router release req id 400 -INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.04366755485534668 s -INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.036544084548950195 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.04512953758239746 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.03946065902709961 s -DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=230287926030944861198511329496486400930, time:1750766629.0752885s req_ids:[8, 400] -DEBUG 06-24 20:03:49 [manager.py:391] -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:124.55892562866211ms total_cost_time:124.59397315979004ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5547 prompt_cache_len:5151 prompt_cache_ratio:0.9286100594916171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:117.35129356384277ms total_cost_time:117.37418174743652ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:5504 prompt_cache_len:5145 prompt_cache_ratio:0.9347747093023255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 -DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:49 [batch.py:51] router release req id 8 -INFO 06-24 20:03:49 [batch.py:51] router release req id 400 -INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.0453035831451416 s -INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.04056715965270996 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.04673361778259277 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.04362845420837402 s -DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=289907555184314936234175598367136515337, time:1750766629.208247s req_ids:[8, 400] -DEBUG 06-24 20:03:49 [manager.py:391] -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:115.77248573303223ms total_cost_time:115.79751968383789ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:5548 prompt_cache_len:5151 prompt_cache_ratio:0.9284426820475847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:119.31490898132324ms total_cost_time:119.33517456054688ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:5505 prompt_cache_len:5145 prompt_cache_ratio:0.9346049046321526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 -DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:49 [batch.py:51] router release req id 8 -INFO 06-24 20:03:49 [batch.py:51] router release req id 400 -INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.21399998664855957 s -INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.20673632621765137 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.21562457084655762 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.21011686325073242 s -DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=313928895486517905047856224137802913716, time:1750766629.4994113s req_ids:[8, 400] -DEBUG 06-24 20:03:49 [manager.py:391] -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:295.9709167480469ms total_cost_time:296.01454734802246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5549 prompt_cache_len:5151 prompt_cache_ratio:0.9282753649306181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:288.91539573669434ms total_cost_time:288.9413833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:5506 prompt_cache_len:5145 prompt_cache_ratio:0.9344351616418453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 -DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:49 [batch.py:51] router release req id 8 -INFO 06-24 20:03:49 [batch.py:51] router release req id 400 -INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.043306589126586914 s -INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.03699469566345215 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.04485034942626953 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.03997516632080078 s -DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=302165632055316284503673141368883412172, time:1750766629.634829s req_ids:[8, 400] -DEBUG 06-24 20:03:49 [manager.py:391] -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:127.28691101074219ms total_cost_time:127.34794616699219ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5550 prompt_cache_len:5151 prompt_cache_ratio:0.9281081081081081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:121.10614776611328ms total_cost_time:121.1385726928711ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:5507 prompt_cache_len:5145 prompt_cache_ratio:0.9342654802978028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 -DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:49 [batch.py:51] router release req id 8 -INFO 06-24 20:03:49 [batch.py:51] router release req id 400 -INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.04432249069213867 s -INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.03849601745605469 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.045792579650878906 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.04160785675048828 s -DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=179893346050329409575819012616851504298, time:1750766629.7717881s req_ids:[8, 400] -DEBUG 06-24 20:03:49 [manager.py:391] -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:114.7909164428711ms total_cost_time:114.83335494995117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5551 prompt_cache_len:5151 prompt_cache_ratio:0.9279409115474689 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:121.24443054199219ms total_cost_time:121.28448486328125ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:5508 prompt_cache_len:5145 prompt_cache_ratio:0.9340958605664488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 -DEBUG 06-24 20:03:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:49 [batch.py:51] router release req id 8 -INFO 06-24 20:03:49 [batch.py:51] router release req id 400 -INFO 06-24 20:03:49 [manager.py:224] router recive req id 8 cost time 0.05814552307128906 s -INFO 06-24 20:03:49 [manager.py:224] router recive req id 400 cost time 0.046526193618774414 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 8 cost time 0.059670448303222656 s -INFO 06-24 20:03:49 [manager.py:68] detokenization recv req id 400 cost time 0.04970812797546387 s -DEBUG 06-24 20:03:49 [manager.py:391] Prefill Batch: batch_id=224037300127965857219291631110412050042, time:1750766629.904954s req_ids:[8, 400] -DEBUG 06-24 20:03:49 [manager.py:391] -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:138.37289810180664ms total_cost_time:138.41819763183594ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5552 prompt_cache_len:5151 prompt_cache_ratio:0.9277737752161384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:126.95980072021484ms total_cost_time:126.98554992675781ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5509 prompt_cache_len:5145 prompt_cache_ratio:0.9339263024142312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 -INFO 06-24 20:03:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 -DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:50 [batch.py:51] router release req id 8 -INFO 06-24 20:03:50 [batch.py:51] router release req id 400 -INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.045163869857788086 s -INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.0397946834564209 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.046715736389160156 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.04283475875854492 s -DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=248576220608251832721359498514563264115, time:1750766630.0397003s req_ids:[8, 400] -DEBUG 06-24 20:03:50 [manager.py:391] -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:8 first_token_cost:104.52818870544434ms total_cost_time:104.56991195678711ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5553 prompt_cache_len:5151 prompt_cache_ratio:0.9276066990815776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:49 lightllm_req_id:400 first_token_cost:122.15995788574219ms total_cost_time:122.20072746276855ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5510 prompt_cache_len:5145 prompt_cache_ratio:0.9337568058076225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 -DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:50 [batch.py:51] router release req id 8 -INFO 06-24 20:03:50 [batch.py:51] router release req id 400 -INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.07105517387390137 s -INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.04810810089111328 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.07260012626647949 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.05129837989807129 s -DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=287433237452974455422368717376177179954, time:1750766630.1743073s req_ids:[8, 400] -DEBUG 06-24 20:03:50 [manager.py:391] -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:148.9541530609131ms total_cost_time:148.99659156799316ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5554 prompt_cache_len:5151 prompt_cache_ratio:0.9274396831112711 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:127.86078453063965ms total_cost_time:127.90322303771973ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5511 prompt_cache_len:5145 prompt_cache_ratio:0.9335873707131193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 -DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:50 [batch.py:51] router release req id 8 -INFO 06-24 20:03:50 [batch.py:51] router release req id 400 -INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.04954338073730469 s -INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.04231762886047363 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.05103754997253418 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.0451962947845459 s -DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=62352939666092407470671319705844274205, time:1750766630.306252s req_ids:[8, 400] -DEBUG 06-24 20:03:50 [manager.py:391] -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:128.33070755004883ms total_cost_time:128.37553024291992ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5555 prompt_cache_len:5151 prompt_cache_ratio:0.9272727272727272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:121.09827995300293ms total_cost_time:121.12569808959961ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:5512 prompt_cache_len:5145 prompt_cache_ratio:0.9334179970972424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 -DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:50 [batch.py:51] router release req id 8 -INFO 06-24 20:03:50 [batch.py:51] router release req id 400 -INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.20601224899291992 s -INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.2006831169128418 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.20753169059753418 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.20346760749816895 s -DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=85347856848935128290599672774571096314, time:1750766630.600038s req_ids:[8, 400] -DEBUG 06-24 20:03:50 [manager.py:391] -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:264.86754417419434ms total_cost_time:264.91284370422363ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5556 prompt_cache_len:5151 prompt_cache_ratio:0.9271058315334774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:281.42714500427246ms total_cost_time:281.4662456512451ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5513 prompt_cache_len:5145 prompt_cache_ratio:0.9332486849265372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 -DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:50 [batch.py:51] router release req id 8 -INFO 06-24 20:03:50 [batch.py:51] router release req id 400 -INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.07044792175292969 s -INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.0492548942565918 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.0717921257019043 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.051905155181884766 s -DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=278795412201298918084725537181851329702, time:1750766630.7359498s req_ids:[8, 400] -DEBUG 06-24 20:03:50 [manager.py:391] -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:146.41332626342773ms total_cost_time:146.45671844482422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5557 prompt_cache_len:5151 prompt_cache_ratio:0.9269389958610761 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:127.98404693603516ms total_cost_time:128.0190944671631ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:5514 prompt_cache_len:5145 prompt_cache_ratio:0.9330794341675734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 -DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:50 [batch.py:51] router release req id 8 -INFO 06-24 20:03:50 [batch.py:51] router release req id 400 -INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.05019736289978027 s -INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.04398465156555176 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.051659584045410156 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.04692268371582031 s -DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=217362292710259913346326911155204926233, time:1750766630.8664691s req_ids:[8, 400] -DEBUG 06-24 20:03:50 [manager.py:391] -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:129.0762424468994ms total_cost_time:129.1358470916748ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5558 prompt_cache_len:5151 prompt_cache_ratio:0.9267722202231018 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:123.16155433654785ms total_cost_time:123.20518493652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5515 prompt_cache_len:5145 prompt_cache_ratio:0.9329102447869447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 -INFO 06-24 20:03:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 -DEBUG 06-24 20:03:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:50 [batch.py:51] router release req id 8 -INFO 06-24 20:03:50 [batch.py:51] router release req id 400 -INFO 06-24 20:03:50 [manager.py:224] router recive req id 8 cost time 0.04176926612854004 s -INFO 06-24 20:03:50 [manager.py:224] router recive req id 400 cost time 0.03438711166381836 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 8 cost time 0.043443918228149414 s -INFO 06-24 20:03:50 [manager.py:68] detokenization recv req id 400 cost time 0.037467241287231445 s -DEBUG 06-24 20:03:50 [manager.py:391] Prefill Batch: batch_id=76538349414580394956434546424765338338, time:1750766630.9962099s req_ids:[8, 400] -DEBUG 06-24 20:03:50 [manager.py:391] -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:8 first_token_cost:112.7469539642334ms total_cost_time:112.78319358825684ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:5559 prompt_cache_len:5151 prompt_cache_ratio:0.926605504587156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:50 lightllm_req_id:400 first_token_cost:116.64700508117676ms total_cost_time:116.67704582214355ms,out_token_counter:1 mean_per_token_cost_time: 0.030040740966796875ms prompt_token_num:5516 prompt_cache_len:5145 prompt_cache_ratio:0.932741116751269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 -DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:51 [batch.py:51] router release req id 8 -INFO 06-24 20:03:51 [batch.py:51] router release req id 400 -INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.05428123474121094 s -INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.04339885711669922 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.055805206298828125 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.046477317810058594 s -DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=168779598982240737530705505383963460604, time:1750766631.1274736s req_ids:[8, 400] -DEBUG 06-24 20:03:51 [manager.py:391] -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:134.31024551391602ms total_cost_time:134.3369483947754ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:5560 prompt_cache_len:5151 prompt_cache_ratio:0.9264388489208633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:123.39329719543457ms total_cost_time:123.40927124023438ms,out_token_counter:1 mean_per_token_cost_time: 0.015974044799804688ms prompt_token_num:5517 prompt_cache_len:5145 prompt_cache_ratio:0.9325720500271887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 -DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:51 [batch.py:51] router release req id 8 -INFO 06-24 20:03:51 [batch.py:51] router release req id 400 -INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.04616212844848633 s -INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.0428471565246582 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.04780006408691406 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.04586291313171387 s -DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=153338967591333737190287911065236901823, time:1750766631.2608087s req_ids:[8, 400] -DEBUG 06-24 20:03:51 [manager.py:391] -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:112.12873458862305ms total_cost_time:112.17427253723145ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5561 prompt_cache_len:5151 prompt_cache_ratio:0.926272253191872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:121.60754203796387ms total_cost_time:121.64568901062012ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:5518 prompt_cache_len:5145 prompt_cache_ratio:0.93240304458137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 -DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:51 [batch.py:51] router release req id 8 -INFO 06-24 20:03:51 [batch.py:51] router release req id 400 -INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.05998349189758301 s -INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.046689748764038086 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.06171107292175293 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.04974627494812012 s -DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=224155254661675059795262565558056929326, time:1750766631.3913925s req_ids:[8, 400] -DEBUG 06-24 20:03:51 [manager.py:391] -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:138.44037055969238ms total_cost_time:138.48328590393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5562 prompt_cache_len:5151 prompt_cache_ratio:0.9261057173678533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:125.13875961303711ms total_cost_time:125.1668930053711ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:5519 prompt_cache_len:5145 prompt_cache_ratio:0.9322341003805037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 -DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:51 [batch.py:51] router release req id 8 -INFO 06-24 20:03:51 [batch.py:51] router release req id 400 -INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.2050621509552002 s -INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.19929218292236328 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.20676565170288086 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.202545166015625 s -DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=189503719940156742628622832890297902010, time:1750766631.6838496s req_ids:[8, 400] -DEBUG 06-24 20:03:51 [manager.py:391] -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:284.7471237182617ms total_cost_time:284.7902774810791ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5563 prompt_cache_len:5151 prompt_cache_ratio:0.9259392414165019 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:279.0682315826416ms total_cost_time:279.09398078918457ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:5520 prompt_cache_len:5145 prompt_cache_ratio:0.9320652173913043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 -DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:51 [batch.py:51] router release req id 8 -INFO 06-24 20:03:51 [batch.py:51] router release req id 400 -INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.04391121864318848 s -INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.03834700584411621 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.0454249382019043 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.04142618179321289 s -DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=330447073011267437104863266520286198877, time:1750766631.8156574s req_ids:[8, 400] -DEBUG 06-24 20:03:51 [manager.py:391] -DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 current batch size: 2 -DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:116.67895317077637ms total_cost_time:116.72306060791016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5564 prompt_cache_len:5151 prompt_cache_ratio:0.9257728253055356 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 -INFO 06-24 20:03:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:123.31032752990723ms total_cost_time:123.34966659545898ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:5521 prompt_cache_len:5145 prompt_cache_ratio:0.9318963955805107 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 -DEBUG 06-24 20:03:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:51 [batch.py:51] router release req id 8 -INFO 06-24 20:03:51 [batch.py:51] router release req id 400 -INFO 06-24 20:03:51 [manager.py:224] router recive req id 8 cost time 0.05378365516662598 s -INFO 06-24 20:03:51 [manager.py:224] router recive req id 400 cost time 0.04241490364074707 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 8 cost time 0.05529165267944336 s -INFO 06-24 20:03:51 [manager.py:68] detokenization recv req id 400 cost time 0.045420169830322266 s -DEBUG 06-24 20:03:51 [manager.py:391] Prefill Batch: batch_id=314057218962455482309286796550956194582, time:1750766631.946214s req_ids:[8, 400] -DEBUG 06-24 20:03:51 [manager.py:391] -ERROR 06-24 20:03:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:8 first_token_cost:132.40289688110352ms total_cost_time:132.4610710144043ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5565 prompt_cache_len:5151 prompt_cache_ratio:0.9256064690026954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:51 lightllm_req_id:400 first_token_cost:121.3080883026123ms total_cost_time:121.34933471679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5522 prompt_cache_len:5145 prompt_cache_ratio:0.9317276349148859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 -DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:52 [batch.py:51] router release req id 8 -INFO 06-24 20:03:52 [batch.py:51] router release req id 400 -INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.04295539855957031 s -INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.03507637977600098 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.04437541961669922 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.038108110427856445 s -DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=229062865899602578597511652248994862537, time:1750766632.0796459s req_ids:[8, 400] -DEBUG 06-24 20:03:52 [manager.py:391] -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:108.74557495117188ms total_cost_time:108.79230499267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5566 prompt_cache_len:5151 prompt_cache_ratio:0.9254401724757456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:113.79718780517578ms total_cost_time:113.83748054504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5523 prompt_cache_len:5145 prompt_cache_ratio:0.9315589353612167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 -DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:52 [batch.py:51] router release req id 8 -INFO 06-24 20:03:52 [batch.py:51] router release req id 400 -INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.05973100662231445 s -INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.04776358604431152 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.0613408088684082 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.05068635940551758 s -DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=41358319528901141233982019330024275285, time:1750766632.2092133s req_ids:[8, 400] -DEBUG 06-24 20:03:52 [manager.py:391] -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:138.08107376098633ms total_cost_time:138.1094455718994ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:5567 prompt_cache_len:5151 prompt_cache_ratio:0.9252739356924735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:125.89120864868164ms total_cost_time:125.90932846069336ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:5524 prompt_cache_len:5145 prompt_cache_ratio:0.9313902968863143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 -DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:52 [batch.py:51] router release req id 8 -INFO 06-24 20:03:52 [batch.py:51] router release req id 400 -INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.04565072059631348 s -INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.042510032653808594 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.04734373092651367 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.045532941818237305 s -DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=221491810908273780899804798753281648965, time:1750766632.3402593s req_ids:[8, 400] -DEBUG 06-24 20:03:52 [manager.py:391] -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:105.50785064697266ms total_cost_time:105.53336143493652ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:5568 prompt_cache_len:5151 prompt_cache_ratio:0.9251077586206896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:119.49658393859863ms total_cost_time:119.51732635498047ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:5525 prompt_cache_len:5145 prompt_cache_ratio:0.9312217194570136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 -DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:52 [batch.py:51] router release req id 8 -INFO 06-24 20:03:52 [batch.py:51] router release req id 400 -INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.06690049171447754 s -INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.050124168395996094 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.06833982467651367 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.053025007247924805 s -DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=140035562548706990595932859983144222185, time:1750766632.4720235s req_ids:[8, 400] -DEBUG 06-24 20:03:52 [manager.py:391] -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:149.66607093811035ms total_cost_time:149.68609809875488ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:5569 prompt_cache_len:5151 prompt_cache_ratio:0.9249416412282276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:306.80036544799805ms total_cost_time:306.84638023376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5526 prompt_cache_len:5145 prompt_cache_ratio:0.9310532030401737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 -DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:52 [batch.py:51] router release req id 8 -INFO 06-24 20:03:52 [batch.py:51] router release req id 400 -INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.2213153839111328 s -INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.04813241958618164 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.22272658348083496 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.051221370697021484 s -DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=25093827216994407787298731174224802561, time:1750766632.7835267s req_ids:[8, 400] -DEBUG 06-24 20:03:52 [manager.py:391] -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:298.2964515686035ms total_cost_time:298.3403205871582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5570 prompt_cache_len:5151 prompt_cache_ratio:0.9247755834829443 mtp_avg_token_per_step:1.0 -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:127.93445587158203ms total_cost_time:127.97045707702637ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:5527 prompt_cache_len:5145 prompt_cache_ratio:0.9308847476026778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 -DEBUG 06-24 20:03:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:52 [batch.py:51] router release req id 8 -INFO 06-24 20:03:52 [batch.py:51] router release req id 400 -INFO 06-24 20:03:52 [manager.py:224] router recive req id 8 cost time 0.0499424934387207 s -INFO 06-24 20:03:52 [manager.py:224] router recive req id 400 cost time 0.04373764991760254 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 8 cost time 0.05132174491882324 s -INFO 06-24 20:03:52 [manager.py:68] detokenization recv req id 400 cost time 0.04667043685913086 s -DEBUG 06-24 20:03:52 [manager.py:391] Prefill Batch: batch_id=118070640230890672790058146712677872330, time:1750766632.915787s req_ids:[8, 400] -DEBUG 06-24 20:03:52 [manager.py:391] -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -ERROR 06-24 20:03:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:129.42767143249512ms total_cost_time:129.4727325439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5571 prompt_cache_len:5151 prompt_cache_ratio:0.9246095853527194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:123.40521812438965ms total_cost_time:123.43168258666992ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:5528 prompt_cache_len:5145 prompt_cache_ratio:0.9307163531114327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 -INFO 06-24 20:03:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 8 -INFO 06-24 20:03:53 [batch.py:51] router release req id 400 -INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.04362940788269043 s -INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.03808116912841797 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.04511308670043945 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.040969133377075195 s -DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=246547327416313757871574971943264278749, time:1750766633.0468106s req_ids:[8, 400] -DEBUG 06-24 20:03:53 [manager.py:391] -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:8 first_token_cost:113.75117301940918ms total_cost_time:113.79432678222656ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5572 prompt_cache_len:5151 prompt_cache_ratio:0.9244436468054559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:52 lightllm_req_id:400 first_token_cost:131.791353225708ms total_cost_time:131.82926177978516ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:5529 prompt_cache_len:5145 prompt_cache_ratio:0.9305480195333695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 8 -INFO 06-24 20:03:53 [batch.py:51] router release req id 400 -INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.07991933822631836 s -INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.056919097900390625 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.08159661293029785 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.0599365234375 s -INFO 06-24 20:03:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:03:53 [statics_utils.py:24] mean first cost: 199.25973987296916 ms -INFO 06-24 20:03:53 [statics_utils.py:24] mean per token cost: 0.1913509480589316 ms -DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=271533157199108687666199393799358983866, time:1750766633.2024448s req_ids:[8, 400] -DEBUG 06-24 20:03:53 [manager.py:391] -INFO 06-24 20:03:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:151.09562873840332ms total_cost_time:151.1397361755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5573 prompt_cache_len:5151 prompt_cache_ratio:0.9242777678090794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:137.0244026184082ms total_cost_time:137.06111907958984ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:5530 prompt_cache_len:5145 prompt_cache_ratio:0.930379746835443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 8 -INFO 06-24 20:03:53 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_999 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_999 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_999 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_999 -INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.10678935050964355 s -DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=118189855342107601929975265918989066578, time:1750766633.3677108s req_ids:[8] -DEBUG 06-24 20:03:53 [manager.py:391] -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.10851716995239258 s -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:152.89044380187988ms total_cost_time:152.93121337890625ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5574 prompt_cache_len:5151 prompt_cache_ratio:0.9241119483315393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 8 -INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.1755075454711914 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.17688369750976562 s -DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=206890208886519794914466304678742276031, time:1750766633.4648275s req_ids:[400] -DEBUG 06-24 20:03:53 [manager.py:391] -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_999 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_999 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_999 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_999 -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:390.4855251312256ms total_cost_time:390.5303478240967ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5531 prompt_cache_len:5145 prompt_cache_ratio:0.930211534984632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 400 -INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.26761531829833984 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.26903581619262695 s -DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=300086502488774677854934614702268158373, time:1750766633.7060769s req_ids:[8] -DEBUG 06-24 20:03:53 [manager.py:391] -DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:319.516658782959ms total_cost_time:319.55838203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5575 prompt_cache_len:5151 prompt_cache_ratio:0.9239461883408072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 8 -INFO 06-24 20:03:53 [manager.py:224] router recive req id 8 cost time 0.02121567726135254 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 8 cost time 0.02264857292175293 s -DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=298668374296881365915358082417663999794, time:1750766633.784471s req_ids:[8] -DEBUG 06-24 20:03:53 [manager.py:391] -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:72.21007347106934ms total_cost_time:72.24917411804199ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:5576 prompt_cache_len:5151 prompt_cache_ratio:0.9237804878048781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:53 [manager.py:224] router recive req id 400 cost time 0.20575737953186035 s -INFO 06-24 20:03:53 [manager.py:68] detokenization recv req id 400 cost time 0.20719575881958008 s -DEBUG 06-24 20:03:53 [manager.py:391] Prefill Batch: batch_id=120711897993918734575775504122451669840, time:1750766633.8951025s req_ids:[400] -DEBUG 06-24 20:03:53 [manager.py:391] -ERROR 06-24 20:03:53 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:53 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:277.09007263183594ms total_cost_time:277.1332263946533ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5532 prompt_cache_len:5145 prompt_cache_ratio:0.9300433839479393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 -DEBUG 06-24 20:03:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:53 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.20678138732910156 s -INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.2082827091217041 s -DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=175617659414096469247437226850579789817, time:1750766634.04318s req_ids:[8] -DEBUG 06-24 20:03:54 [manager.py:391] -ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:8 first_token_cost:268.36490631103516ms total_cost_time:268.40901374816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5577 prompt_cache_len:5151 prompt_cache_ratio:0.9236148466917697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 -DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:54 [manager.py:224] router recive req id 400 cost time 0.2080368995666504 s -INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 400 cost time 0.20966148376464844 s -DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=191458082002302381861759455538494431665, time:1750766634.1755795s req_ids:[400] -DEBUG 06-24 20:03:54 [manager.py:391] -ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:53 lightllm_req_id:400 first_token_cost:279.50215339660645ms total_cost_time:279.54721450805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5533 prompt_cache_len:5145 prompt_cache_ratio:0.9298752936923911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 -DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:54 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.20798039436340332 s -INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.20949530601501465 s -DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=120231025276007316275672584283598805958, time:1750766634.3156736s req_ids:[8] -DEBUG 06-24 20:03:54 [manager.py:391] -ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:284.0301990509033ms total_cost_time:284.0754985809326ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5578 prompt_cache_len:5151 prompt_cache_ratio:0.9234492649695232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 -DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:54 [manager.py:224] router recive req id 400 cost time 0.20765185356140137 s -INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 400 cost time 0.2092914581298828 s -DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=209445341372169626103509412408618463930, time:1750766634.467842s req_ids:[400] -DEBUG 06-24 20:03:54 [manager.py:391] -DEBUG 06-24 20:03:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 66533.460 tokens/s -DEBUG 06-24 20:03:54 [stats.py:37] Avg prompt tokens throughput: 66509.391 tokens/s -DEBUG 06-24 20:03:54 [stats.py:37] Avg generate tokens throughput: 24.070 tokens/s -ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 first_token_cost:292.45805740356445ms total_cost_time:292.50192642211914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5534 prompt_cache_len:5145 prompt_cache_ratio:0.9297072641850379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 -DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:54 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.2077946662902832 s -INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.20938658714294434 s -DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=260413419221066590120254872903618999432, time:1750766634.6162775s req_ids:[8] -DEBUG 06-24 20:03:54 [manager.py:391] -ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:295.87769508361816ms total_cost_time:295.928955078125ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:5579 prompt_cache_len:5151 prompt_cache_ratio:0.9232837426062018 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 -DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:54 [manager.py:224] router recive req id 400 cost time 0.20644187927246094 s -INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 400 cost time 0.2079486846923828 s -DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=231282726551859208564745742786319460151, time:1750766634.7701323s req_ids:[400] -DEBUG 06-24 20:03:54 [manager.py:391] -ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 first_token_cost:298.5856533050537ms total_cost_time:298.6283302307129ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5535 prompt_cache_len:5145 prompt_cache_ratio:0.9295392953929539 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 -DEBUG 06-24 20:03:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:54 [batch.py:51] router release req id 400 -DEBUG 06-24 20:03:54 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:54 [manager.py:283] -DEBUG 06-24 20:03:54 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:54 [manager.py:284] -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:54 [manager.py:224] router recive req id 8 cost time 0.20926380157470703 s -INFO 06-24 20:03:54 [manager.py:68] detokenization recv req id 8 cost time 0.21129131317138672 s -DEBUG 06-24 20:03:54 [manager.py:391] Prefill Batch: batch_id=5916925452140600368580878633475366511, time:1750766634.921155s req_ids:[8] -DEBUG 06-24 20:03:54 [manager.py:391] -ERROR 06-24 20:03:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:296.6790199279785ms total_cost_time:296.7219352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5580 prompt_cache_len:5151 prompt_cache_ratio:0.9231182795698925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 -DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:55 [manager.py:224] router recive req id 400 cost time 0.20787477493286133 s -INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 400 cost time 0.20943260192871094 s -DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=131405400482022463597985776673580920487, time:1750766635.073419s req_ids:[400] -DEBUG 06-24 20:03:55 [manager.py:391] -ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:400 first_token_cost:294.4645881652832ms total_cost_time:294.5091724395752ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5536 prompt_cache_len:5145 prompt_cache_ratio:0.929371387283237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 -DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:55 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:55 [manager.py:224] router recive req id 8 cost time 0.2076876163482666 s -INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 8 cost time 0.20923233032226562 s -DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=7300979067914406541592100823111090822, time:1750766635.2207983s req_ids:[8] -DEBUG 06-24 20:03:55 [manager.py:391] -ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:54 lightllm_req_id:8 first_token_cost:290.7235622406006ms total_cost_time:290.7674312591553ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5581 prompt_cache_len:5151 prompt_cache_ratio:0.9229528758287046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 -DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:55 [manager.py:224] router recive req id 400 cost time 0.20677852630615234 s -INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 400 cost time 0.20830726623535156 s -DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=152348736204183644698880921748109334448, time:1750766635.3683143s req_ids:[400] -DEBUG 06-24 20:03:55 [manager.py:391] -ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 first_token_cost:296.7367172241211ms total_cost_time:296.7798709869385ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5537 prompt_cache_len:5145 prompt_cache_ratio:0.9292035398230089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 -DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:55 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:55 [manager.py:224] router recive req id 8 cost time 0.20754265785217285 s -INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 8 cost time 0.20917081832885742 s -DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=82231552322518582949077899934064058113, time:1750766635.5225294s req_ids:[8] -DEBUG 06-24 20:03:55 [manager.py:391] -ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 first_token_cost:299.1311550140381ms total_cost_time:299.1750240325928ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5582 prompt_cache_len:5151 prompt_cache_ratio:0.9227875313507703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 -DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:55 [manager.py:224] router recive req id 400 cost time 0.2080698013305664 s -INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 400 cost time 0.20959138870239258 s -DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=194532406525690553316439469078000011290, time:1750766635.6719136s req_ids:[400] -DEBUG 06-24 20:03:55 [manager.py:391] -ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 first_token_cost:293.64848136901855ms total_cost_time:293.69115829467773ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5538 prompt_cache_len:5145 prompt_cache_ratio:0.929035752979415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 -DEBUG 06-24 20:03:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:55 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:55 [manager.py:224] router recive req id 8 cost time 0.20764589309692383 s -INFO 06-24 20:03:55 [manager.py:68] detokenization recv req id 8 cost time 0.20922613143920898 s -DEBUG 06-24 20:03:55 [manager.py:391] Prefill Batch: batch_id=89056654487569766537445314901610860260, time:1750766635.8223937s req_ids:[8] -DEBUG 06-24 20:03:55 [manager.py:391] -ERROR 06-24 20:03:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 first_token_cost:300.92930793762207ms total_cost_time:300.97246170043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5583 prompt_cache_len:5151 prompt_cache_ratio:0.922622246104245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 -DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:56 [manager.py:224] router recive req id 400 cost time 0.4081413745880127 s -INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 400 cost time 0.40973734855651855 s -DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=159253867299297248986598983857718176201, time:1750766636.1493287s req_ids:[400] -DEBUG 06-24 20:03:56 [manager.py:391] -ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:56 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:400 first_token_cost:470.92437744140625ms total_cost_time:470.966100692749ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5539 prompt_cache_len:5145 prompt_cache_ratio:0.9288680267196244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 -DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:56 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:56 [manager.py:224] router recive req id 8 cost time 0.4112677574157715 s -INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 8 cost time 0.4130704402923584 s -DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=290322282031980276679568861411399006067, time:1750766636.335271s req_ids:[8] -DEBUG 06-24 20:03:56 [manager.py:391] -ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:55 lightllm_req_id:8 first_token_cost:519.6452140808105ms total_cost_time:519.6881294250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5584 prompt_cache_len:5151 prompt_cache_ratio:0.9224570200573066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 -DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:56 [manager.py:224] router recive req id 400 cost time 0.30808353424072266 s -INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 400 cost time 0.30986714363098145 s -DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=213367066965451755239282467278423336233, time:1750766636.5411975s req_ids:[400] -DEBUG 06-24 20:03:56 [manager.py:391] -ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:56 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 first_token_cost:406.42809867858887ms total_cost_time:406.47006034851074ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5540 prompt_cache_len:5145 prompt_cache_ratio:0.9287003610108303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 -DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:56 [batch.py:51] router release req id 400 -INFO 06-24 20:03:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:56 [manager.py:224] router recive req id 8 cost time 0.30820584297180176 s -INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 8 cost time 0.31000399589538574 s -DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=36784291884233071155657384924118802316, time:1750766636.748188s req_ids:[8] -DEBUG 06-24 20:03:56 [manager.py:391] -ERROR 06-24 20:03:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 first_token_cost:409.0914726257324ms total_cost_time:409.1348648071289ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5585 prompt_cache_len:5151 prompt_cache_ratio:0.9222918531781558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 -DEBUG 06-24 20:03:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:56 [manager.py:224] router recive req id 400 cost time 0.3094446659088135 s -INFO 06-24 20:03:56 [manager.py:68] detokenization recv req id 400 cost time 0.3112037181854248 s -DEBUG 06-24 20:03:56 [manager.py:391] Prefill Batch: batch_id=47933076243050267512741088183108664072, time:1750766636.95525s req_ids:[400] -DEBUG 06-24 20:03:56 [manager.py:391] -ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:400 first_token_cost:411.13829612731934ms total_cost_time:411.2122058868408ms,out_token_counter:1 mean_per_token_cost_time: 0.07390975952148438ms prompt_token_num:5541 prompt_cache_len:5145 prompt_cache_ratio:0.9285327558202491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 -DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:57 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:57 [manager.py:224] router recive req id 8 cost time 0.30771684646606445 s -INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 8 cost time 0.30957651138305664 s -DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=85893696854207356519531292520676573317, time:1750766637.164642s req_ids:[8] -DEBUG 06-24 20:03:57 [manager.py:391] -ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:56 lightllm_req_id:8 first_token_cost:412.52946853637695ms total_cost_time:412.57214546203613ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5586 prompt_cache_len:5151 prompt_cache_ratio:0.9221267454350162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 -DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:57 [manager.py:224] router recive req id 400 cost time 0.3083832263946533 s -INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 400 cost time 0.3101034164428711 s -DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=133049827962665090558556615733996681227, time:1750766637.3753276s req_ids:[400] -DEBUG 06-24 20:03:57 [manager.py:391] -ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 first_token_cost:414.25585746765137ms total_cost_time:414.3080711364746ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:5542 prompt_cache_len:5145 prompt_cache_ratio:0.9283652111151209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 -DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:57 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:57 [manager.py:224] router recive req id 8 cost time 0.3082447052001953 s -INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 8 cost time 0.3100576400756836 s -DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=184752098914869427963870186203463903736, time:1750766637.5862875s req_ids:[8] -DEBUG 06-24 20:03:57 [manager.py:391] -DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:03:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 first_token_cost:416.69726371765137ms total_cost_time:416.72635078430176ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:5587 prompt_cache_len:5151 prompt_cache_ratio:0.9219616967961339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 -DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:57 [manager.py:224] router recive req id 400 cost time 0.30835843086242676 s -INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 400 cost time 0.31026625633239746 s -DEBUG 06-24 20:03:57 [manager.py:391] Prefill Batch: batch_id=337284436783423636467000215915826243165, time:1750766637.797384s req_ids:[400] -DEBUG 06-24 20:03:57 [manager.py:391] -ERROR 06-24 20:03:57 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:57 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 first_token_cost:415.9364700317383ms total_cost_time:415.9808158874512ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5543 prompt_cache_len:5145 prompt_cache_ratio:0.9281977268627097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 -DEBUG 06-24 20:03:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:57 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:57 [manager.py:224] router recive req id 8 cost time 0.30741071701049805 s -INFO 06-24 20:03:57 [manager.py:68] detokenization recv req id 8 cost time 0.30914735794067383 s -DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=183381485783916380044657226258425736513, time:1750766638.0071425s req_ids:[8] -DEBUG 06-24 20:03:58 [manager.py:391] -ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:8 first_token_cost:416.25499725341797ms total_cost_time:416.29767417907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5588 prompt_cache_len:5151 prompt_cache_ratio:0.9217967072297781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 -DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:58 [manager.py:224] router recive req id 400 cost time 0.30820560455322266 s -INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 400 cost time 0.31025171279907227 s -DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=325009666800542668627770362727147024858, time:1750766638.217403s req_ids:[400] -DEBUG 06-24 20:03:58 [manager.py:391] -ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:58 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:57 lightllm_req_id:400 first_token_cost:416.4443016052246ms total_cost_time:416.4876937866211ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5544 prompt_cache_len:5145 prompt_cache_ratio:0.928030303030303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 -DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:58 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:58 [manager.py:224] router recive req id 8 cost time 0.3092689514160156 s -INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 8 cost time 0.311384916305542 s -DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=320695993755427275267903011700269169569, time:1750766638.4285886s req_ids:[8] -DEBUG 06-24 20:03:58 [manager.py:391] -ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 first_token_cost:416.8996810913086ms total_cost_time:416.9437885284424ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5589 prompt_cache_len:5151 prompt_cache_ratio:0.9216317767042405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 -DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:58 [manager.py:224] router recive req id 400 cost time 0.30898094177246094 s -INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 400 cost time 0.31093597412109375 s -DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=116259232714662220742437296011044703608, time:1750766638.6391995s req_ids:[400] -DEBUG 06-24 20:03:58 [manager.py:391] -ERROR 06-24 20:03:58 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:58 [manager.py:162] detoken release req id 400 -INFO 06-24 20:03:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 first_token_cost:415.13705253601074ms total_cost_time:415.1802062988281ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5545 prompt_cache_len:5145 prompt_cache_ratio:0.9278629395852119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 -DEBUG 06-24 20:03:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:58 [batch.py:51] router release req id 400 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:58 [manager.py:224] router recive req id 8 cost time 0.3078181743621826 s -INFO 06-24 20:03:58 [manager.py:68] detokenization recv req id 8 cost time 0.3097081184387207 s -DEBUG 06-24 20:03:58 [manager.py:391] Prefill Batch: batch_id=313887881215751464440488569991646795119, time:1750766638.8514457s req_ids:[8] -DEBUG 06-24 20:03:58 [manager.py:391] -ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:8 first_token_cost:584.6686363220215ms total_cost_time:584.7115516662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5590 prompt_cache_len:5151 prompt_cache_ratio:0.9214669051878355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 -DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.10744333267211914 s -INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.10941529273986816 s -DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=170229261247117821417554070380268408229, time:1750766639.2209778s req_ids:[8] -DEBUG 06-24 20:03:59 [manager.py:391] -ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:193.56203079223633ms total_cost_time:193.603515625ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5591 prompt_cache_len:5151 prompt_cache_ratio:0.9213020926489001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 -DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.1070556640625 s -INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.10885930061340332 s -DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=256572012918454229085549620369003075712, time:1750766639.428526s req_ids:[8] -DEBUG 06-24 20:03:59 [manager.py:391] -ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:204.75411415100098ms total_cost_time:204.8037052154541ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:5592 prompt_cache_len:5151 prompt_cache_ratio:0.921137339055794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 -DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.10832595825195312 s -INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.11004018783569336 s -DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=7017378000446258317538721738950539737, time:1750766639.6379635s req_ids:[8] -DEBUG 06-24 20:03:59 [manager.py:391] -ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:204.96273040771484ms total_cost_time:205.00564575195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5593 prompt_cache_len:5151 prompt_cache_ratio:0.9209726443768997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 -DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:03:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:03:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:03:59 [manager.py:224] router recive req id 8 cost time 0.10710859298706055 s -INFO 06-24 20:03:59 [manager.py:68] detokenization recv req id 8 cost time 0.10886669158935547 s -DEBUG 06-24 20:03:59 [manager.py:391] Prefill Batch: batch_id=207812318088513596096976772340931268235, time:1750766639.845761s req_ids:[8] -DEBUG 06-24 20:03:59 [manager.py:391] -ERROR 06-24 20:03:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:03:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:03:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:204.00428771972656ms total_cost_time:204.05030250549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5594 prompt_cache_len:5151 prompt_cache_ratio:0.9208080085806221 mtp_avg_token_per_step:1.0 -INFO 06-24 20:03:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 -DEBUG 06-24 20:03:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:03:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:03:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:03:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:03:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:00 [manager.py:224] router recive req id 8 cost time 0.10685467720031738 s -INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 8 cost time 0.1087028980255127 s -DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=119500951787489351729722916769637496863, time:1750766640.058325s req_ids:[8] -DEBUG 06-24 20:04:00 [manager.py:391] -ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:59 lightllm_req_id:8 first_token_cost:207.11684226989746ms total_cost_time:207.15928077697754ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5595 prompt_cache_len:5151 prompt_cache_ratio:0.9206434316353888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 -DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:00 [batch.py:51] router release req id 8 -INFO 06-24 20:04:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:00 [manager.py:224] router recive req id 400 cost time 1.5198016166687012 s -INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 400 cost time 1.5215346813201904 s -DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=312625324778887380231774992193424212118, time:1750766640.267414s req_ids:[400] -DEBUG 06-24 20:04:00 [manager.py:391] -ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:00 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:03:58 lightllm_req_id:400 first_token_cost:1622.63822555542ms total_cost_time:1622.6818561553955ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5546 prompt_cache_len:5145 prompt_cache_ratio:0.927695636494771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 -DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:00 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:00 [manager.py:224] router recive req id 8 cost time 0.3097348213195801 s -INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 8 cost time 0.31153059005737305 s -DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=191448337359629793380209850348657511519, time:1750766640.4759295s req_ids:[8] -DEBUG 06-24 20:04:00 [manager.py:391] -ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 first_token_cost:414.45374488830566ms total_cost_time:414.49880599975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5596 prompt_cache_len:5151 prompt_cache_ratio:0.9204789135096497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 -DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:00 [manager.py:224] router recive req id 400 cost time 0.3084886074066162 s -INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 400 cost time 0.3103458881378174 s -DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=167887182440130761946522220894601097220, time:1750766640.687438s req_ids:[400] -DEBUG 06-24 20:04:00 [manager.py:391] -ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:00 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 first_token_cost:412.19377517700195ms total_cost_time:412.23883628845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5547 prompt_cache_len:5145 prompt_cache_ratio:0.9275283937263386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 -DEBUG 06-24 20:04:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:00 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:00 [manager.py:224] router recive req id 8 cost time 0.3095569610595703 s -INFO 06-24 20:04:00 [manager.py:68] detokenization recv req id 8 cost time 0.3113560676574707 s -DEBUG 06-24 20:04:00 [manager.py:391] Prefill Batch: batch_id=5289778497843780574292314570987780027, time:1750766640.8948448s req_ids:[8] -DEBUG 06-24 20:04:00 [manager.py:391] -ERROR 06-24 20:04:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 first_token_cost:413.35344314575195ms total_cost_time:413.39898109436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5597 prompt_cache_len:5151 prompt_cache_ratio:0.9203144541718777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 -DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:01 [manager.py:224] router recive req id 400 cost time 0.30982255935668945 s -INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 400 cost time 0.3119990825653076 s -DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=207913426562109027215512195925360584916, time:1750766641.1051707s req_ids:[400] -DEBUG 06-24 20:04:01 [manager.py:391] -ERROR 06-24 20:04:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:01 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:400 first_token_cost:409.6224308013916ms total_cost_time:409.6653461456299ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5548 prompt_cache_len:5145 prompt_cache_ratio:0.9273612112472963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 -DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:01 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:01 [manager.py:224] router recive req id 8 cost time 0.3093705177307129 s -INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 8 cost time 0.31146836280822754 s -DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=51395948235105543153758312289523783491, time:1750766641.3151486s req_ids:[8] -DEBUG 06-24 20:04:01 [manager.py:391] -ERROR 06-24 20:04:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:00 lightllm_req_id:8 first_token_cost:412.16206550598145ms total_cost_time:412.2049808502197ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5598 prompt_cache_len:5151 prompt_cache_ratio:0.920150053590568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:8 -DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:01 [manager.py:224] router recive req id 400 cost time 0.3088245391845703 s -INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 400 cost time 0.3106422424316406 s -DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=181999333882499944085172552825451111517, time:1750766641.5259075s req_ids:[400] -DEBUG 06-24 20:04:01 [manager.py:391] -ERROR 06-24 20:04:01 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:01 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 first_token_cost:621.3717460632324ms total_cost_time:621.4141845703125ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5549 prompt_cache_len:5145 prompt_cache_ratio:0.9271940890250495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 -DEBUG 06-24 20:04:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:01 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:01 [manager.py:224] router recive req id 8 cost time 0.5109429359436035 s -INFO 06-24 20:04:01 [manager.py:68] detokenization recv req id 8 cost time 0.5127873420715332 s -DEBUG 06-24 20:04:01 [manager.py:391] Prefill Batch: batch_id=34063390236824430431743071058540932817, time:1750766641.9308975s req_ids:[8] -DEBUG 06-24 20:04:01 [manager.py:391] -ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:8 first_token_cost:611.9697093963623ms total_cost_time:612.0131015777588ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5599 prompt_cache_len:5151 prompt_cache_ratio:0.9199857117342383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 -DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:02 [manager.py:224] router recive req id 400 cost time 0.30853796005249023 s -INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 400 cost time 0.3106873035430908 s -DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=122162866983859921578777979631834896650, time:1750766642.1407092s req_ids:[400] -DEBUG 06-24 20:04:02 [manager.py:391] -ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:02 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:01 lightllm_req_id:400 first_token_cost:404.829740524292ms total_cost_time:404.8733711242676ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5550 prompt_cache_len:5145 prompt_cache_ratio:0.927027027027027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 -DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:02 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:02 [manager.py:224] router recive req id 8 cost time 0.3110980987548828 s -INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 8 cost time 0.31302738189697266 s -DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=178531426736335887918101514346955716625, time:1750766642.3484359s req_ids:[8] -DEBUG 06-24 20:04:02 [manager.py:391] -ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 first_token_cost:413.24806213378906ms total_cost_time:413.29169273376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5600 prompt_cache_len:5151 prompt_cache_ratio:0.9198214285714286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 -DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:02 [manager.py:224] router recive req id 400 cost time 0.309192419052124 s -INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 400 cost time 0.31116652488708496 s -DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=9356760427016274025068612944708979669, time:1750766642.560211s req_ids:[400] -DEBUG 06-24 20:04:02 [manager.py:391] -ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:02 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 first_token_cost:414.3857955932617ms total_cost_time:414.4296646118164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5551 prompt_cache_len:5145 prompt_cache_ratio:0.926860025220681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 -DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:02 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:02 [manager.py:224] router recive req id 8 cost time 0.3094029426574707 s -INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 8 cost time 0.311328649520874 s -DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=150881320521901634364777904695554824780, time:1750766642.7679093s req_ids:[8] -DEBUG 06-24 20:04:02 [manager.py:391] -ERROR 06-24 20:04:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 first_token_cost:409.25073623657227ms total_cost_time:409.2881679534912ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:5601 prompt_cache_len:5151 prompt_cache_ratio:0.9196572040707016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 -DEBUG 06-24 20:04:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:02 [manager.py:224] router recive req id 400 cost time 0.3088548183441162 s -INFO 06-24 20:04:02 [manager.py:68] detokenization recv req id 400 cost time 0.3109605312347412 s -DEBUG 06-24 20:04:02 [manager.py:391] Prefill Batch: batch_id=201395187720541241109434204510260875519, time:1750766642.9753573s req_ids:[400] -DEBUG 06-24 20:04:02 [manager.py:391] -ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:03 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:400 first_token_cost:409.70611572265625ms total_cost_time:409.74998474121094ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5552 prompt_cache_len:5145 prompt_cache_ratio:0.926693083573487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 -DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:03 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:03 [manager.py:224] router recive req id 8 cost time 0.307326078414917 s -INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 8 cost time 0.30927395820617676 s -DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=207794210652918811786776671903815756052, time:1750766643.1843364s req_ids:[8] -DEBUG 06-24 20:04:03 [manager.py:391] -DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:02 lightllm_req_id:8 first_token_cost:414.50023651123047ms total_cost_time:414.54410552978516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5602 prompt_cache_len:5151 prompt_cache_ratio:0.9194930382006427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 -DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:03 [manager.py:224] router recive req id 400 cost time 0.30852389335632324 s -INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 400 cost time 0.3105156421661377 s -DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=303746443963813672154060956251192604052, time:1750766643.3955302s req_ids:[400] -DEBUG 06-24 20:04:03 [manager.py:391] -ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:03 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 first_token_cost:414.3822193145752ms total_cost_time:414.4270420074463ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5553 prompt_cache_len:5145 prompt_cache_ratio:0.9265262020529443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 -DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:03 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:03 [manager.py:224] router recive req id 8 cost time 0.3101041316986084 s -INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 8 cost time 0.31215476989746094 s -DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=143059255189025818730587806413496586422, time:1750766643.6039033s req_ids:[8] -DEBUG 06-24 20:04:03 [manager.py:391] -ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 first_token_cost:413.632869720459ms total_cost_time:413.67626190185547ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5603 prompt_cache_len:5151 prompt_cache_ratio:0.919328930929859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 -DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:03 [manager.py:224] router recive req id 400 cost time 0.3083205223083496 s -INFO 06-24 20:04:03 [manager.py:68] detokenization recv req id 400 cost time 0.3108999729156494 s -DEBUG 06-24 20:04:03 [manager.py:391] Prefill Batch: batch_id=159119699053717267174256553391988467768, time:1750766643.8156955s req_ids:[400] -DEBUG 06-24 20:04:03 [manager.py:391] -ERROR 06-24 20:04:03 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:03 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 first_token_cost:414.597749710083ms total_cost_time:414.6406650543213ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5554 prompt_cache_len:5145 prompt_cache_ratio:0.9263593806265754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 -DEBUG 06-24 20:04:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:03 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:03 [manager.py:224] router recive req id 8 cost time 0.30870532989501953 s -INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 8 cost time 0.310497522354126 s -DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=137408571669613238069218075157386923670, time:1750766644.026565s req_ids:[8] -DEBUG 06-24 20:04:04 [manager.py:391] -ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:8 first_token_cost:416.58830642700195ms total_cost_time:416.6300296783447ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5604 prompt_cache_len:5151 prompt_cache_ratio:0.9191648822269807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 -DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:04 [manager.py:224] router recive req id 400 cost time 0.5117514133453369 s -INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 400 cost time 0.5139124393463135 s -DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=311570343597427374078576646439964112696, time:1750766644.4392066s req_ids:[400] -DEBUG 06-24 20:04:04 [manager.py:391] -ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:04:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 26054.055 tokens/s -DEBUG 06-24 20:04:04 [stats.py:37] Avg prompt tokens throughput: 26044.605 tokens/s -DEBUG 06-24 20:04:04 [stats.py:37] Avg generate tokens throughput: 9.450 tokens/s -INFO 06-24 20:04:04 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:03 lightllm_req_id:400 first_token_cost:620.6552982330322ms total_cost_time:620.6998825073242ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5555 prompt_cache_len:5145 prompt_cache_ratio:0.9261926192619262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 -DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:04 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:04 [manager.py:224] router recive req id 8 cost time 0.5112729072570801 s -INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 8 cost time 0.5130696296691895 s -DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=258484979404999532749697903307901579785, time:1750766644.654779s req_ids:[8] -DEBUG 06-24 20:04:04 [manager.py:391] -DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 first_token_cost:623.0220794677734ms total_cost_time:623.0645179748535ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5605 prompt_cache_len:5151 prompt_cache_ratio:0.9190008920606602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 -DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:04 [manager.py:224] router recive req id 400 cost time 0.30897068977355957 s -INFO 06-24 20:04:04 [manager.py:68] detokenization recv req id 400 cost time 0.31076908111572266 s -DEBUG 06-24 20:04:04 [manager.py:391] Prefill Batch: batch_id=202936217410018198300462329942163462523, time:1750766644.8643155s req_ids:[400] -DEBUG 06-24 20:04:04 [manager.py:391] -ERROR 06-24 20:04:04 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:04 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 first_token_cost:415.6372547149658ms total_cost_time:415.679931640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5556 prompt_cache_len:5145 prompt_cache_ratio:0.9260259179265659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 -DEBUG 06-24 20:04:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:04 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:05 [manager.py:224] router recive req id 8 cost time 0.3082394599914551 s -INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 8 cost time 0.3101339340209961 s -DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=333318630767069807526149463415231793460, time:1750766645.070772s req_ids:[8] -DEBUG 06-24 20:04:05 [manager.py:391] -ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:8 first_token_cost:410.3398323059082ms total_cost_time:410.3813171386719ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5606 prompt_cache_len:5151 prompt_cache_ratio:0.9188369603995719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 -DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:05 [manager.py:224] router recive req id 400 cost time 0.3093225955963135 s -INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 400 cost time 0.31135058403015137 s -DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=292298541697273391556195931370633541622, time:1750766645.280804s req_ids:[400] -DEBUG 06-24 20:04:05 [manager.py:391] -ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:05 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:04 lightllm_req_id:400 first_token_cost:409.59906578063965ms total_cost_time:409.64221954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5557 prompt_cache_len:5145 prompt_cache_ratio:0.9258592765880871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 -DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:05 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:05 [manager.py:224] router recive req id 8 cost time 0.3061075210571289 s -INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 8 cost time 0.3079698085784912 s -DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=220642754003056481251368748772506181474, time:1750766645.4900327s req_ids:[8] -DEBUG 06-24 20:04:05 [manager.py:391] -ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 first_token_cost:414.9055480957031ms total_cost_time:414.9479866027832ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5607 prompt_cache_len:5151 prompt_cache_ratio:0.918673087212413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 -DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:05 [manager.py:224] router recive req id 400 cost time 0.3074033260345459 s -INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 400 cost time 0.30938100814819336 s -DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=97411769377960097701004443046286862738, time:1750766645.7023854s req_ids:[400] -DEBUG 06-24 20:04:05 [manager.py:391] -ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:05 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 first_token_cost:416.32556915283203ms total_cost_time:416.3699150085449ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5558 prompt_cache_len:5145 prompt_cache_ratio:0.9256926952141058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 -DEBUG 06-24 20:04:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:05 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:05 [manager.py:224] router recive req id 8 cost time 0.3090088367462158 s -INFO 06-24 20:04:05 [manager.py:68] detokenization recv req id 8 cost time 0.31144118309020996 s -DEBUG 06-24 20:04:05 [manager.py:391] Prefill Batch: batch_id=127216266947550499551767509357218711452, time:1750766645.9182296s req_ids:[8] -DEBUG 06-24 20:04:05 [manager.py:391] -ERROR 06-24 20:04:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:8 first_token_cost:424.82495307922363ms total_cost_time:424.8678684234619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5608 prompt_cache_len:5151 prompt_cache_ratio:0.918509272467903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:8 -DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.3076171875 s -INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.31009960174560547 s -DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=70710339150044694638913242786478349956, time:1750766646.0963833s req_ids:[400] -DEBUG 06-24 20:04:06 [manager.py:391] -ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:05 lightllm_req_id:400 first_token_cost:384.6170902252197ms total_cost_time:384.6616744995117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5559 prompt_cache_len:5145 prompt_cache_ratio:0.9255261737722612 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 -DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:06 [batch.py:51] router release req id 400 -INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10553979873657227 s -INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10801196098327637 s -DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=184800006221488638324880188008560939079, time:1750766646.2977662s req_ids:[400] -DEBUG 06-24 20:04:06 [manager.py:391] -ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:204.96511459350586ms total_cost_time:205.00874519348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5560 prompt_cache_len:5145 prompt_cache_ratio:0.9253597122302158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 -DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:06 [batch.py:51] router release req id 400 -INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10636520385742188 s -INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10876321792602539 s -DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=554085911765599510962848408216386845, time:1750766646.5093455s req_ids:[400] -DEBUG 06-24 20:04:06 [manager.py:391] -ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:206.5277099609375ms total_cost_time:206.57110214233398ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5561 prompt_cache_len:5145 prompt_cache_ratio:0.9251933105556555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 -DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:06 [batch.py:51] router release req id 400 -INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10669255256652832 s -INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10873794555664062 s -DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=200924581100489386741797026421560403360, time:1750766646.7189913s req_ids:[400] -DEBUG 06-24 20:04:06 [manager.py:391] -ERROR 06-24 20:04:06 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:06 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:202.8677463531494ms total_cost_time:202.9109001159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5562 prompt_cache_len:5145 prompt_cache_ratio:0.9250269687162891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 -DEBUG 06-24 20:04:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:06 [batch.py:51] router release req id 400 -INFO 06-24 20:04:06 [manager.py:224] router recive req id 400 cost time 0.10732865333557129 s -INFO 06-24 20:04:06 [manager.py:68] detokenization recv req id 400 cost time 0.10935163497924805 s -DEBUG 06-24 20:04:06 [manager.py:391] Prefill Batch: batch_id=200704122696106381218141459743677525434, time:1750766646.9346104s req_ids:[400] -DEBUG 06-24 20:04:06 [manager.py:391] -ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:400 first_token_cost:206.53438568115234ms total_cost_time:206.57777786254883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5563 prompt_cache_len:5145 prompt_cache_ratio:0.924860686679849 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 -DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:07 [batch.py:51] router release req id 400 -INFO 06-24 20:04:07 [manager.py:224] router recive req id 400 cost time 0.10532832145690918 s -INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 400 cost time 0.10737133026123047 s -DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=83912536543841667595844539094836774719, time:1750766647.1395905s req_ids:[400] -DEBUG 06-24 20:04:07 [manager.py:391] -ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 first_token_cost:206.1467170715332ms total_cost_time:206.1898708343506ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5564 prompt_cache_len:5145 prompt_cache_ratio:0.9246944644140905 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 -DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:07 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:07 [manager.py:224] router recive req id 8 cost time 1.319875955581665 s -INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 8 cost time 1.3217723369598389 s -DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=48203246045638171774164962894794333738, time:1750766647.3597903s req_ids:[8] -DEBUG 06-24 20:04:07 [manager.py:391] -ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:06 lightllm_req_id:8 first_token_cost:1436.5203380584717ms total_cost_time:1436.56325340271ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5609 prompt_cache_len:5151 prompt_cache_ratio:0.9183455161347834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 -DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:07 [manager.py:224] router recive req id 400 cost time 0.30842065811157227 s -INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 400 cost time 0.31039977073669434 s -DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=171329892020683745980329694996334406256, time:1750766647.5371072s req_ids:[400] -DEBUG 06-24 20:04:07 [manager.py:391] -ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:07 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 first_token_cost:378.12304496765137ms total_cost_time:378.16762924194336ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5565 prompt_cache_len:5145 prompt_cache_ratio:0.9245283018867925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 -DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:07 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:07 [manager.py:224] router recive req id 8 cost time 0.20805764198303223 s -INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 8 cost time 0.2098839282989502 s -DEBUG 06-24 20:04:07 [manager.py:391] Prefill Batch: batch_id=178477640574207755194116447892704888316, time:1750766647.6621356s req_ids:[8] -DEBUG 06-24 20:04:07 [manager.py:391] -ERROR 06-24 20:04:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 first_token_cost:431.0300350189209ms total_cost_time:431.0739040374756ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5610 prompt_cache_len:5151 prompt_cache_ratio:0.9181818181818182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 -DEBUG 06-24 20:04:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:07 [manager.py:224] router recive req id 8 cost time 0.10762858390808105 s -INFO 06-24 20:04:07 [manager.py:68] detokenization recv req id 8 cost time 0.1094970703125 s -DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=186946421580586713852412184587159165742, time:1750766648.0047326s req_ids:[8] -DEBUG 06-24 20:04:08 [manager.py:391] -ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:8 first_token_cost:193.50433349609375ms total_cost_time:193.54867935180664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5611 prompt_cache_len:5151 prompt_cache_ratio:0.9180181785777937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 -DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10751056671142578 s -INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.10948300361633301 s -DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=84464561349473505979403028501226303687, time:1750766648.2057552s req_ids:[8] -DEBUG 06-24 20:04:08 [manager.py:391] -ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:201.83396339416504ms total_cost_time:201.8759250640869ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5612 prompt_cache_len:5151 prompt_cache_ratio:0.9178545972915182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 -DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10818624496459961 s -INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.11023426055908203 s -DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=24021638303123893874954171386458452313, time:1750766648.4140673s req_ids:[8] -DEBUG 06-24 20:04:08 [manager.py:391] -ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:207.09514617919922ms total_cost_time:207.1397304534912ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5613 prompt_cache_len:5151 prompt_cache_ratio:0.9176910742918225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 -DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s -INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.11104893684387207 s -DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=270707997427265943533780191389721800738, time:1750766648.6249912s req_ids:[8] -DEBUG 06-24 20:04:08 [manager.py:391] -ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:203.89366149902344ms total_cost_time:203.94277572631836ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:5614 prompt_cache_len:5151 prompt_cache_ratio:0.9175276095475596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 -DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:08 [manager.py:224] router recive req id 8 cost time 0.10756587982177734 s -INFO 06-24 20:04:08 [manager.py:68] detokenization recv req id 8 cost time 0.1095733642578125 s -DEBUG 06-24 20:04:08 [manager.py:391] Prefill Batch: batch_id=164582218623876241791561632462622767483, time:1750766648.8353434s req_ids:[8] -DEBUG 06-24 20:04:08 [manager.py:391] -ERROR 06-24 20:04:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:205.1072120666504ms total_cost_time:205.13629913330078ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:5615 prompt_cache_len:5151 prompt_cache_ratio:0.9173642030276047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 -DEBUG 06-24 20:04:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:09 [manager.py:224] router recive req id 8 cost time 0.10462737083435059 s -INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 8 cost time 0.10654592514038086 s -DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=254881221985780640766236556553831137552, time:1750766649.0447145s req_ids:[8] -DEBUG 06-24 20:04:09 [manager.py:391] -ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:08 lightllm_req_id:8 first_token_cost:203.81903648376465ms total_cost_time:203.86290550231934ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5616 prompt_cache_len:5151 prompt_cache_ratio:0.9172008547008547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 -DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:09 [manager.py:224] router recive req id 400 cost time 1.621199607849121 s -INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 400 cost time 1.6231095790863037 s -DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=32125577664086894184896364509563615517, time:1750766649.2482486s req_ids:[400] -DEBUG 06-24 20:04:09 [manager.py:391] -ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:09 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:07 lightllm_req_id:400 first_token_cost:1720.1716899871826ms total_cost_time:1720.2157974243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5566 prompt_cache_len:5145 prompt_cache_ratio:0.9243621990657563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 -DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:09 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:09 [manager.py:224] router recive req id 8 cost time 0.3096799850463867 s -INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 8 cost time 0.3110342025756836 s -DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=46730805093707675538857214692509971952, time:1750766649.458498s req_ids:[8] -DEBUG 06-24 20:04:09 [manager.py:391] -ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 first_token_cost:412.33348846435547ms total_cost_time:412.3809337615967ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:5617 prompt_cache_len:5151 prompt_cache_ratio:0.9170375645362293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 -DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:09 [manager.py:224] router recive req id 400 cost time 0.3077678680419922 s -INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 400 cost time 0.3090324401855469 s -DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=23474063618402929745215318299546271937, time:1750766649.6704204s req_ids:[400] -DEBUG 06-24 20:04:09 [manager.py:391] -ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:09 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 first_token_cost:417.39487648010254ms total_cost_time:417.4520969390869ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5567 prompt_cache_len:5145 prompt_cache_ratio:0.9241961559188072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 -DEBUG 06-24 20:04:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:09 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:09 [manager.py:224] router recive req id 8 cost time 0.3105649948120117 s -INFO 06-24 20:04:09 [manager.py:68] detokenization recv req id 8 cost time 0.31255459785461426 s -DEBUG 06-24 20:04:09 [manager.py:391] Prefill Batch: batch_id=133798484089321286677542925208158411055, time:1750766649.881909s req_ids:[8] -DEBUG 06-24 20:04:09 [manager.py:391] -ERROR 06-24 20:04:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 first_token_cost:417.1261787414551ms total_cost_time:417.1566963195801ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:5618 prompt_cache_len:5151 prompt_cache_ratio:0.91687433250267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 -DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:10 [manager.py:224] router recive req id 400 cost time 0.31122732162475586 s -INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 400 cost time 0.3131442070007324 s -DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=172288862546244909271305936984538185972, time:1750766650.0923932s req_ids:[400] -DEBUG 06-24 20:04:10 [manager.py:391] -DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:10 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:400 first_token_cost:416.95690155029297ms total_cost_time:417.00077056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5568 prompt_cache_len:5145 prompt_cache_ratio:0.9240301724137931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 -DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:10 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:10 [manager.py:224] router recive req id 8 cost time 0.3083064556121826 s -INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 8 cost time 0.3102741241455078 s -DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=107904784171822393551071728358014687071, time:1750766650.305406s req_ids:[8] -DEBUG 06-24 20:04:10 [manager.py:391] -ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:09 lightllm_req_id:8 first_token_cost:417.01221466064453ms total_cost_time:417.055606842041ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5619 prompt_cache_len:5151 prompt_cache_ratio:0.9167111585691404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 -DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:10 [manager.py:224] router recive req id 400 cost time 0.3084132671356201 s -INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 400 cost time 0.3103957176208496 s -DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=238665974378854712962824593539115985316, time:1750766650.5179589s req_ids:[400] -DEBUG 06-24 20:04:10 [manager.py:391] -ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:10 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 first_token_cost:418.26868057250977ms total_cost_time:418.31302642822266ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5569 prompt_cache_len:5145 prompt_cache_ratio:0.9238642485185851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 -DEBUG 06-24 20:04:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:10 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:10 [manager.py:224] router recive req id 8 cost time 0.5093872547149658 s -INFO 06-24 20:04:10 [manager.py:68] detokenization recv req id 8 cost time 0.5114932060241699 s -DEBUG 06-24 20:04:10 [manager.py:391] Prefill Batch: batch_id=67063561849501242349282800334071899284, time:1750766650.9135842s req_ids:[8] -DEBUG 06-24 20:04:10 [manager.py:391] -ERROR 06-24 20:04:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 first_token_cost:599.778413772583ms total_cost_time:599.820613861084ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5620 prompt_cache_len:5151 prompt_cache_ratio:0.9165480427046263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 -DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:11 [manager.py:224] router recive req id 8 cost time 0.10658454895019531 s -INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 8 cost time 0.10840177536010742 s -DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=271885194208326370355595120234574521378, time:1750766651.1221745s req_ids:[8] -DEBUG 06-24 20:04:11 [manager.py:391] -ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:8 first_token_cost:202.90279388427734ms total_cost_time:202.94666290283203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5621 prompt_cache_len:5151 prompt_cache_ratio:0.9163849848781356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 -DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:11 [manager.py:224] router recive req id 8 cost time 0.10694456100463867 s -INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 8 cost time 0.10935521125793457 s -DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=85327986339808664354167931846470096604, time:1750766651.330387s req_ids:[8] -DEBUG 06-24 20:04:11 [manager.py:391] -ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 first_token_cost:210.88147163391113ms total_cost_time:210.93320846557617ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:5622 prompt_cache_len:5151 prompt_cache_ratio:0.916221985058698 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 -DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:11 [manager.py:224] router recive req id 400 cost time 0.9161441326141357 s -DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=255114344367927461894644818240771348121, time:1750766651.5211604s req_ids:[400] -DEBUG 06-24 20:04:11 [manager.py:391] -INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 400 cost time 0.9184982776641846 s -ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:11 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:10 lightllm_req_id:400 first_token_cost:987.316370010376ms total_cost_time:987.3597621917725ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5570 prompt_cache_len:5145 prompt_cache_ratio:0.9236983842010772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 -DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:11 [batch.py:51] router release req id 400 -INFO 06-24 20:04:11 [manager.py:224] router recive req id 400 cost time 0.10622859001159668 s -INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 400 cost time 0.1085507869720459 s -DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=221375098914235331110466844929578447172, time:1750766651.717867s req_ids:[400] -DEBUG 06-24 20:04:11 [manager.py:391] -ERROR 06-24 20:04:11 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:11 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 first_token_cost:200.5767822265625ms total_cost_time:200.62017440795898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5571 prompt_cache_len:5145 prompt_cache_ratio:0.9235325794291869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 -DEBUG 06-24 20:04:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:11 [batch.py:51] router release req id 400 -INFO 06-24 20:04:11 [manager.py:224] router recive req id 400 cost time 0.10710501670837402 s -INFO 06-24 20:04:11 [manager.py:68] detokenization recv req id 400 cost time 0.10856413841247559 s -DEBUG 06-24 20:04:11 [manager.py:391] Prefill Batch: batch_id=200016491595426344810975366264990745223, time:1750766651.9256s req_ids:[400] -DEBUG 06-24 20:04:11 [manager.py:391] -ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:400 first_token_cost:205.04140853881836ms total_cost_time:205.08432388305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5572 prompt_cache_len:5145 prompt_cache_ratio:0.9233668341708543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 -DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:12 [batch.py:51] router release req id 400 -INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10688185691833496 s -INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.10837268829345703 s -DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=290968300566471388616889901941260062635, time:1750766652.1350586s req_ids:[400] -DEBUG 06-24 20:04:12 [manager.py:391] -ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:204.4215202331543ms total_cost_time:204.46443557739258ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5573 prompt_cache_len:5145 prompt_cache_ratio:0.9232011483940427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 -DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:12 [batch.py:51] router release req id 400 -INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10579776763916016 s -INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.1070852279663086 s -DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=87038016299531989214465367225567892343, time:1750766652.3449104s req_ids:[400] -DEBUG 06-24 20:04:12 [manager.py:391] -ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:202.08096504211426ms total_cost_time:202.12316513061523ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5574 prompt_cache_len:5145 prompt_cache_ratio:0.9230355220667384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 -DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:12 [batch.py:51] router release req id 400 -INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10552120208740234 s -INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.10744905471801758 s -DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=157257869562054399718573486539702768882, time:1750766652.5505366s req_ids:[400] -DEBUG 06-24 20:04:12 [manager.py:391] -ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:205.1711082458496ms total_cost_time:205.214262008667ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5575 prompt_cache_len:5145 prompt_cache_ratio:0.9228699551569507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 -DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:12 [batch.py:51] router release req id 400 -INFO 06-24 20:04:12 [manager.py:224] router recive req id 400 cost time 0.10639142990112305 s -INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 400 cost time 0.1083984375 s -DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=228977404470933678350538036202567535770, time:1750766652.774231s req_ids:[400] -DEBUG 06-24 20:04:12 [manager.py:391] -ERROR 06-24 20:04:12 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:12 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:217.78535842895508ms total_cost_time:217.82875061035156ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5576 prompt_cache_len:5145 prompt_cache_ratio:0.9227044476327116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 -DEBUG 06-24 20:04:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:12 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:12 [manager.py:224] router recive req id 8 cost time 1.524458408355713 s -INFO 06-24 20:04:12 [manager.py:68] detokenization recv req id 8 cost time 1.5265793800354004 s -DEBUG 06-24 20:04:12 [manager.py:391] Prefill Batch: batch_id=205944119524255088767704860780994970925, time:1750766652.951166s req_ids:[8] -DEBUG 06-24 20:04:12 [manager.py:391] -DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:11 lightllm_req_id:8 first_token_cost:1596.1339473724365ms total_cost_time:1596.1766242980957ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5623 prompt_cache_len:5151 prompt_cache_ratio:0.9160590432153655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 -DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:13 [manager.py:224] router recive req id 400 cost time 0.20746803283691406 s -INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 400 cost time 0.20907902717590332 s -DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=31616259705159304457888948372497427736, time:1750766653.0783455s req_ids:[400] -DEBUG 06-24 20:04:13 [manager.py:391] -ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:13 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:12 lightllm_req_id:400 first_token_cost:261.32655143737793ms total_cost_time:261.3687515258789ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5577 prompt_cache_len:5145 prompt_cache_ratio:0.9225389994620764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 -DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:13 [batch.py:51] router release req id 400 -INFO 06-24 20:04:13 [manager.py:224] router recive req id 400 cost time 0.3084428310394287 s -INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 400 cost time 0.31079840660095215 s -DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=190970186933295161960179467510202146044, time:1750766653.4490385s req_ids:[400] -DEBUG 06-24 20:04:13 [manager.py:391] -ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:13 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 first_token_cost:405.5614471435547ms total_cost_time:405.6057929992676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5578 prompt_cache_len:5145 prompt_cache_ratio:0.922373610613123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 -DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:13 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:13 [manager.py:224] router recive req id 8 cost time 0.6158738136291504 s -INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 8 cost time 0.618344783782959 s -DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=274311673038307767228916345367006234921, time:1750766653.6625578s req_ids:[8] -DEBUG 06-24 20:04:13 [manager.py:391] -ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 first_token_cost:718.9376354217529ms total_cost_time:718.9795970916748ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5624 prompt_cache_len:5151 prompt_cache_ratio:0.9158961593172119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 -DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:13 [manager.py:224] router recive req id 400 cost time 0.30918264389038086 s -INFO 06-24 20:04:13 [manager.py:68] detokenization recv req id 400 cost time 0.3112337589263916 s -DEBUG 06-24 20:04:13 [manager.py:391] Prefill Batch: batch_id=48818713253908030137800363645514311753, time:1750766653.8722582s req_ids:[400] -DEBUG 06-24 20:04:13 [manager.py:391] -ERROR 06-24 20:04:13 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:13 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 first_token_cost:417.39559173583984ms total_cost_time:417.4387454986572ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5579 prompt_cache_len:5145 prompt_cache_ratio:0.9222082810539524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 -DEBUG 06-24 20:04:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:13 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.3089447021484375 s -INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.3109443187713623 s -DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=329163786732395761775920919753492668433, time:1750766654.08399s req_ids:[8] -DEBUG 06-24 20:04:14 [manager.py:391] -ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:8 first_token_cost:414.6718978881836ms total_cost_time:414.7169589996338ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5625 prompt_cache_len:5151 prompt_cache_ratio:0.9157333333333333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 -DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:14 [manager.py:224] router recive req id 400 cost time 0.3087480068206787 s -INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 400 cost time 0.3111083507537842 s -DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=234297625513481218783368336363701821556, time:1750766654.2945468s req_ids:[400] -DEBUG 06-24 20:04:14 [manager.py:391] -ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:14 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:13 lightllm_req_id:400 first_token_cost:427.23655700683594ms total_cost_time:427.2916316986084ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:5580 prompt_cache_len:5145 prompt_cache_ratio:0.9220430107526881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:400 -DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:14 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.30880260467529297 s -INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.31030941009521484 s -DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=140714409445643900515517372517306199786, time:1750766654.4887865s req_ids:[8] -DEBUG 06-24 20:04:14 [manager.py:391] -ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:04:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 26195.741 tokens/s -DEBUG 06-24 20:04:14 [stats.py:37] Avg prompt tokens throughput: 26186.373 tokens/s -DEBUG 06-24 20:04:14 [stats.py:37] Avg generate tokens throughput: 9.369 tokens/s -INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:386.26694679260254ms total_cost_time:386.31248474121094ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5626 prompt_cache_len:5151 prompt_cache_ratio:0.9155705652328475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 -DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.10758066177368164 s -INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.10879826545715332 s -DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=200228179790043914477172086933595455941, time:1750766654.6832614s req_ids:[8] -DEBUG 06-24 20:04:14 [manager.py:391] -ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:204.68807220458984ms total_cost_time:204.73217964172363ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5627 prompt_cache_len:5151 prompt_cache_ratio:0.9154078549848943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 -DEBUG 06-24 20:04:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:14 [manager.py:224] router recive req id 8 cost time 0.1076350212097168 s -INFO 06-24 20:04:14 [manager.py:68] detokenization recv req id 8 cost time 0.10956835746765137 s -DEBUG 06-24 20:04:14 [manager.py:391] Prefill Batch: batch_id=128691561035046703622596447858747696117, time:1750766654.898549s req_ids:[8] -DEBUG 06-24 20:04:14 [manager.py:391] -ERROR 06-24 20:04:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:206.11953735351562ms total_cost_time:206.16531372070312ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5628 prompt_cache_len:5151 prompt_cache_ratio:0.9152452025586354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 -DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:15 [manager.py:224] router recive req id 8 cost time 0.10662460327148438 s -INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 8 cost time 0.10849905014038086 s -DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=106639017692861415357043238974669823809, time:1750766655.1052353s req_ids:[8] -DEBUG 06-24 20:04:15 [manager.py:391] -ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:8 first_token_cost:204.27489280700684ms total_cost_time:204.3170928955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5629 prompt_cache_len:5151 prompt_cache_ratio:0.9150826079232546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 -DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:15 [manager.py:224] router recive req id 8 cost time 0.10686659812927246 s -INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 8 cost time 0.10877180099487305 s -DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=141085462522987770831758864760465568946, time:1750766655.3168218s req_ids:[8] -DEBUG 06-24 20:04:15 [manager.py:391] -ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 first_token_cost:206.4509391784668ms total_cost_time:206.49433135986328ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5630 prompt_cache_len:5151 prompt_cache_ratio:0.9149200710479574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 -DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:15 [manager.py:224] router recive req id 8 cost time 0.10761833190917969 s -INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 8 cost time 0.1096029281616211 s -DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=319052547375508828994819630930400019073, time:1750766655.5269716s req_ids:[8] -DEBUG 06-24 20:04:15 [manager.py:391] -ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 first_token_cost:206.6199779510498ms total_cost_time:206.6650390625ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5631 prompt_cache_len:5151 prompt_cache_ratio:0.9147575919019713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 -DEBUG 06-24 20:04:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:15 [manager.py:224] router recive req id 400 cost time 1.3225617408752441 s -INFO 06-24 20:04:15 [manager.py:68] detokenization recv req id 400 cost time 1.3246512413024902 s -DEBUG 06-24 20:04:15 [manager.py:391] Prefill Batch: batch_id=101624204149623261217881075467097066163, time:1750766655.7365716s req_ids:[400] -DEBUG 06-24 20:04:15 [manager.py:391] -DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:15 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:15 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:14 lightllm_req_id:400 first_token_cost:1586.2393379211426ms total_cost_time:1586.28511428833ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5581 prompt_cache_len:5145 prompt_cache_ratio:0.9218777996774772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:400 -DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:16 [batch.py:51] router release req id 400 -INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.10750150680541992 s -INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.10942196846008301 s -DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=131142781887999190799469431461008130202, time:1750766656.1028008s req_ids:[400] -DEBUG 06-24 20:04:16 [manager.py:391] -ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:400 first_token_cost:198.8089084625244ms total_cost_time:198.85516166687012ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5582 prompt_cache_len:5145 prompt_cache_ratio:0.9217126477964888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 -DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:16 [batch.py:51] router release req id 400 -INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.10784649848937988 s -INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.10967588424682617 s -DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=37783991900662089517054200824284359370, time:1750766656.3254364s req_ids:[400] -DEBUG 06-24 20:04:16 [manager.py:391] -INFO 06-24 20:04:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 first_token_cost:219.4504737854004ms total_cost_time:219.49315071105957ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5583 prompt_cache_len:5145 prompt_cache_ratio:0.9215475550779151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 -DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:16 [batch.py:51] router release req id 400 -INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.10908699035644531 s -INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.11089563369750977 s -DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=5067486847551785240389677237162414539, time:1750766656.538432s req_ids:[400] -DEBUG 06-24 20:04:16 [manager.py:391] -ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:16 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 first_token_cost:205.13319969177246ms total_cost_time:205.1858901977539ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:5584 prompt_cache_len:5145 prompt_cache_ratio:0.9213825214899714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 -DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:16 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:16 [manager.py:224] router recive req id 8 cost time 1.1151704788208008 s -INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 8 cost time 1.1170532703399658 s -DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=306730665257022193479385530307676922530, time:1750766656.7497196s req_ids:[8] -DEBUG 06-24 20:04:16 [manager.py:391] -ERROR 06-24 20:04:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:15 lightllm_req_id:8 first_token_cost:1215.9790992736816ms total_cost_time:1216.0212993621826ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5632 prompt_cache_len:5151 prompt_cache_ratio:0.9145951704545454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:8 -DEBUG 06-24 20:04:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:16 [manager.py:224] router recive req id 400 cost time 0.30753374099731445 s -INFO 06-24 20:04:16 [manager.py:68] detokenization recv req id 400 cost time 0.30948758125305176 s -DEBUG 06-24 20:04:16 [manager.py:391] Prefill Batch: batch_id=71303722397184712584653631993487326899, time:1750766656.9596272s req_ids:[400] -DEBUG 06-24 20:04:16 [manager.py:391] -ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:400 first_token_cost:413.42759132385254ms total_cost_time:413.47193717956543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5585 prompt_cache_len:5145 prompt_cache_ratio:0.9212175470008953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 -DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:17 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:17 [manager.py:224] router recive req id 8 cost time 0.3075568675994873 s -INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 8 cost time 0.3095417022705078 s -DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=252064728799863974062995502885127462607, time:1750766657.170206s req_ids:[8] -DEBUG 06-24 20:04:17 [manager.py:391] -ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:16 lightllm_req_id:8 first_token_cost:415.8060550689697ms total_cost_time:415.85230827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5633 prompt_cache_len:5151 prompt_cache_ratio:0.9144328066749512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 -DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:17 [manager.py:224] router recive req id 400 cost time 0.30767273902893066 s -INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 400 cost time 0.3096649646759033 s -DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=66287034318572387062281965083639895851, time:1750766657.3795717s req_ids:[400] -DEBUG 06-24 20:04:17 [manager.py:391] -ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 first_token_cost:414.63780403137207ms total_cost_time:414.68071937561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5586 prompt_cache_len:5145 prompt_cache_ratio:0.9210526315789473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 -DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:17 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:17 [manager.py:224] router recive req id 8 cost time 0.30847835540771484 s -INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 8 cost time 0.31049108505249023 s -DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=228938074943624491590312570889158603010, time:1750766657.5886242s req_ids:[8] -DEBUG 06-24 20:04:17 [manager.py:391] -ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 first_token_cost:410.4933738708496ms total_cost_time:410.536527633667ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5634 prompt_cache_len:5151 prompt_cache_ratio:0.9142705005324814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 -DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:17 [manager.py:224] router recive req id 400 cost time 0.3087329864501953 s -INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 400 cost time 0.3108234405517578 s -DEBUG 06-24 20:04:17 [manager.py:391] Prefill Batch: batch_id=60087535810734423348138057221743926182, time:1750766657.7949853s req_ids:[400] -DEBUG 06-24 20:04:17 [manager.py:391] -ERROR 06-24 20:04:17 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:17 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 first_token_cost:408.66971015930176ms total_cost_time:408.71238708496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5587 prompt_cache_len:5145 prompt_cache_ratio:0.9208877751924109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 -DEBUG 06-24 20:04:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:17 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:17 [manager.py:224] router recive req id 8 cost time 0.30881237983703613 s -INFO 06-24 20:04:17 [manager.py:68] detokenization recv req id 8 cost time 0.31071996688842773 s -DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=302156104039420587193820993304785054710, time:1750766658.002426s req_ids:[8] -DEBUG 06-24 20:04:18 [manager.py:391] -ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:8 first_token_cost:409.6033573150635ms total_cost_time:409.64531898498535ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5635 prompt_cache_len:5151 prompt_cache_ratio:0.9141082519964507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 -DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:18 [manager.py:224] router recive req id 400 cost time 0.3086738586425781 s -INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 400 cost time 0.31061649322509766 s -DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=240258550456378011144236932454084892034, time:1750766658.2128952s req_ids:[400] -DEBUG 06-24 20:04:18 [manager.py:391] -ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:17 lightllm_req_id:400 first_token_cost:414.517879486084ms total_cost_time:414.56127166748047ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5588 prompt_cache_len:5145 prompt_cache_ratio:0.920722977809592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 -DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:18 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:18 [manager.py:224] router recive req id 8 cost time 0.30826854705810547 s -INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 8 cost time 0.3102855682373047 s -DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=196887082411526954351833857334841843938, time:1750766658.4303896s req_ids:[8] -DEBUG 06-24 20:04:18 [manager.py:391] -ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 first_token_cost:424.88932609558105ms total_cost_time:424.93295669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5636 prompt_cache_len:5151 prompt_cache_ratio:0.9139460610361959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 -DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:18 [manager.py:224] router recive req id 400 cost time 0.30877089500427246 s -INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 400 cost time 0.3108034133911133 s -DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=45976309311112245365689531616693751266, time:1750766658.6139958s req_ids:[400] -DEBUG 06-24 20:04:18 [manager.py:391] -ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 first_token_cost:385.8602046966553ms total_cost_time:385.90240478515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5589 prompt_cache_len:5145 prompt_cache_ratio:0.9205582393988191 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 -DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:18 [batch.py:51] router release req id 400 -INFO 06-24 20:04:18 [manager.py:224] router recive req id 400 cost time 0.1065371036529541 s -INFO 06-24 20:04:18 [manager.py:68] detokenization recv req id 400 cost time 0.10845494270324707 s -DEBUG 06-24 20:04:18 [manager.py:391] Prefill Batch: batch_id=54542278724559732195745943898052701471, time:1750766658.8116658s req_ids:[400] -DEBUG 06-24 20:04:18 [manager.py:391] -ERROR 06-24 20:04:18 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:18 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 first_token_cost:200.8652687072754ms total_cost_time:200.90913772583008ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5590 prompt_cache_len:5145 prompt_cache_ratio:0.9203935599284436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 -DEBUG 06-24 20:04:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:18 [batch.py:51] router release req id 400 -INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.10749650001525879 s -INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.1094827651977539 s -DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=270565812123462728368077659759711507818, time:1750766659.0198867s req_ids:[400] -DEBUG 06-24 20:04:19 [manager.py:391] -ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:400 first_token_cost:204.64134216308594ms total_cost_time:204.69999313354492ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:5591 prompt_cache_len:5145 prompt_cache_ratio:0.9202289393668396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 -DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:19 [batch.py:51] router release req id 400 -INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.10609102249145508 s -INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.10812973976135254 s -DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=153541719542779166919510374484718041959, time:1750766659.2338116s req_ids:[400] -DEBUG 06-24 20:04:19 [manager.py:391] -ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 first_token_cost:207.10468292236328ms total_cost_time:207.12995529174805ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:5592 prompt_cache_len:5145 prompt_cache_ratio:0.9200643776824035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 -DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:19 [batch.py:51] router release req id 400 -INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.30823493003845215 s -INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.31044840812683105 s -DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=324154969492212450185372527199611848940, time:1750766659.653571s req_ids:[400] -DEBUG 06-24 20:04:19 [manager.py:391] -ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:19 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 first_token_cost:424.0915775299072ms total_cost_time:424.135684967041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5593 prompt_cache_len:5145 prompt_cache_ratio:0.9198998748435544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 -DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:19 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:19 [manager.py:224] router recive req id 8 cost time 1.314558982849121 s -INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 8 cost time 1.3165841102600098 s -DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=167409177748517338554553146583799168964, time:1750766659.836794s req_ids:[8] -DEBUG 06-24 20:04:19 [manager.py:391] -ERROR 06-24 20:04:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:18 lightllm_req_id:8 first_token_cost:1386.7998123168945ms total_cost_time:1386.845350265503ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5637 prompt_cache_len:5151 prompt_cache_ratio:0.9137839276210751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:8 -DEBUG 06-24 20:04:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:19 [manager.py:224] router recive req id 400 cost time 0.20762252807617188 s -INFO 06-24 20:04:19 [manager.py:68] detokenization recv req id 400 cost time 0.20920252799987793 s -DEBUG 06-24 20:04:19 [manager.py:391] Prefill Batch: batch_id=38653714603830349367107816191720438860, time:1750766659.9654753s req_ids:[400] -DEBUG 06-24 20:04:19 [manager.py:391] -ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:400 first_token_cost:272.2601890563965ms total_cost_time:272.3045349121094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5594 prompt_cache_len:5145 prompt_cache_ratio:0.9197354308187343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 -DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:20 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.20933055877685547 s -INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.2111823558807373 s -DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=225095216543165772522562669608361056402, time:1750766660.1385944s req_ids:[8] -DEBUG 06-24 20:04:20 [manager.py:391] -ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:19 lightllm_req_id:8 first_token_cost:298.11859130859375ms total_cost_time:298.16436767578125ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5638 prompt_cache_len:5151 prompt_cache_ratio:0.9136218517204683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 -DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.10911870002746582 s -INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.11103701591491699 s -DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=145729852968060938998594991669210077344, time:1750766660.3380792s req_ids:[8] -DEBUG 06-24 20:04:20 [manager.py:391] -ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 first_token_cost:203.887939453125ms total_cost_time:203.93133163452148ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5639 prompt_cache_len:5151 prompt_cache_ratio:0.9134598333037772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 -DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.10652518272399902 s -INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.10875916481018066 s -DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=157120653617087176788172413152632552767, time:1750766660.547775s req_ids:[8] -DEBUG 06-24 20:04:20 [manager.py:391] -ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 first_token_cost:203.8261890411377ms total_cost_time:203.86981964111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5640 prompt_cache_len:5151 prompt_cache_ratio:0.9132978723404256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 -DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:20 [manager.py:224] router recive req id 400 cost time 0.7133662700653076 s -INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 400 cost time 0.7155048847198486 s -DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=61239665237718427724579099054391844062, time:1750766660.757504s req_ids:[400] -DEBUG 06-24 20:04:20 [manager.py:391] -ERROR 06-24 20:04:20 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:20 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 first_token_cost:813.7984275817871ms total_cost_time:813.8401508331299ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5595 prompt_cache_len:5145 prompt_cache_ratio:0.9195710455764075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 -DEBUG 06-24 20:04:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:20 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:20 [manager.py:224] router recive req id 8 cost time 0.3086857795715332 s -INFO 06-24 20:04:20 [manager.py:68] detokenization recv req id 8 cost time 0.3107452392578125 s -DEBUG 06-24 20:04:20 [manager.py:391] Prefill Batch: batch_id=103569202099643300315125404549459298966, time:1750766660.9634693s req_ids:[8] -DEBUG 06-24 20:04:20 [manager.py:391] -ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:8 first_token_cost:413.16795349121094ms total_cost_time:413.2249355316162ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:5641 prompt_cache_len:5151 prompt_cache_ratio:0.9131359687998581 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 -DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.3083314895629883 s -INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.31038761138916016 s -DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=243116708294120437174279276546882770620, time:1750766661.1802902s req_ids:[400] -DEBUG 06-24 20:04:21 [manager.py:391] -ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:20 lightllm_req_id:400 first_token_cost:421.3275909423828ms total_cost_time:421.3700294494629ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5596 prompt_cache_len:5145 prompt_cache_ratio:0.9194067190850608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 -DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:21 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:21 [manager.py:224] router recive req id 8 cost time 0.30976271629333496 s -INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 8 cost time 0.3117525577545166 s -DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=159589017922822320368261474168818125164, time:1750766661.3951037s req_ids:[8] -DEBUG 06-24 20:04:21 [manager.py:391] -ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 first_token_cost:424.87502098083496ms total_cost_time:424.91841316223145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5642 prompt_cache_len:5151 prompt_cache_ratio:0.912974122651542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 -DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.30779099464416504 s -INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.3097355365753174 s -DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=204027139029410204625150196260025536466, time:1750766661.5850406s req_ids:[400] -DEBUG 06-24 20:04:21 [manager.py:391] -ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 first_token_cost:391.99233055114746ms total_cost_time:392.03691482543945ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5597 prompt_cache_len:5145 prompt_cache_ratio:0.9192424513132035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 -DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:21 [batch.py:51] router release req id 400 -INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.10668253898620605 s -INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.10908055305480957 s -DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=282459104284730930658656003323083253895, time:1750766661.7867818s req_ids:[400] -DEBUG 06-24 20:04:21 [manager.py:391] -ERROR 06-24 20:04:21 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:21 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 first_token_cost:201.26986503601074ms total_cost_time:201.31325721740723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5598 prompt_cache_len:5145 prompt_cache_ratio:0.9190782422293676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 -DEBUG 06-24 20:04:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:21 [batch.py:51] router release req id 400 -INFO 06-24 20:04:21 [manager.py:224] router recive req id 400 cost time 0.10654735565185547 s -INFO 06-24 20:04:21 [manager.py:68] detokenization recv req id 400 cost time 0.10901069641113281 s -DEBUG 06-24 20:04:21 [manager.py:391] Prefill Batch: batch_id=307189154209660327102313172525819870474, time:1750766661.995562s req_ids:[400] -DEBUG 06-24 20:04:21 [manager.py:391] -ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:400 first_token_cost:206.86888694763184ms total_cost_time:206.91204071044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5599 prompt_cache_len:5145 prompt_cache_ratio:0.9189140918021075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 -DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:22 [batch.py:51] router release req id 400 -INFO 06-24 20:04:22 [manager.py:224] router recive req id 400 cost time 0.10664033889770508 s -INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 400 cost time 0.10859560966491699 s -DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=13886497406636090368314140559902134472, time:1750766662.2057035s req_ids:[400] -DEBUG 06-24 20:04:22 [manager.py:391] -ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 first_token_cost:368.3967590332031ms total_cost_time:368.4399127960205ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5600 prompt_cache_len:5145 prompt_cache_ratio:0.91875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 -DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:22 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:22 [manager.py:224] router recive req id 8 cost time 1.01613187789917 s -INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 8 cost time 1.0179226398468018 s -DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=131848877663081915398085650813740170950, time:1750766662.5382745s req_ids:[8] -DEBUG 06-24 20:04:22 [manager.py:391] -ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:21 lightllm_req_id:8 first_token_cost:1121.265172958374ms total_cost_time:1121.309518814087ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5643 prompt_cache_len:5151 prompt_cache_ratio:0.9128123338649654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 -DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:22 [manager.py:224] router recive req id 400 cost time 0.20801854133605957 s -INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 400 cost time 0.21001982688903809 s -DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=58024617009333453881726821185111954685, time:1750766662.695757s req_ids:[400] -DEBUG 06-24 20:04:22 [manager.py:391] -ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:22 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 first_token_cost:305.3700923919678ms total_cost_time:305.41157722473145ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5601 prompt_cache_len:5145 prompt_cache_ratio:0.9185859667916444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 -DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:22 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:22 [manager.py:224] router recive req id 8 cost time 0.2079474925994873 s -INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 8 cost time 0.20982789993286133 s -DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=267724025021584382246605540177094573446, time:1750766662.8270555s req_ids:[8] -DEBUG 06-24 20:04:22 [manager.py:391] -ERROR 06-24 20:04:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 first_token_cost:261.14869117736816ms total_cost_time:261.19232177734375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5644 prompt_cache_len:5151 prompt_cache_ratio:0.9126506024096386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 -DEBUG 06-24 20:04:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:22 [manager.py:224] router recive req id 400 cost time 0.20712733268737793 s -INFO 06-24 20:04:22 [manager.py:68] detokenization recv req id 400 cost time 0.20850658416748047 s -DEBUG 06-24 20:04:22 [manager.py:391] Prefill Batch: batch_id=259379537031038628830206513313025065573, time:1750766662.992749s req_ids:[400] -DEBUG 06-24 20:04:22 [manager.py:391] -ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:400 first_token_cost:295.987606048584ms total_cost_time:296.0324287414551ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5602 prompt_cache_len:5145 prompt_cache_ratio:0.9184219921456622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 -DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:23 [batch.py:51] router release req id 400 -INFO 06-24 20:04:23 [manager.py:224] router recive req id 400 cost time 0.10604095458984375 s -INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 400 cost time 0.10753870010375977 s -INFO 06-24 20:04:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:04:23 [statics_utils.py:24] mean first cost: 225.2109487233796 ms -INFO 06-24 20:04:23 [statics_utils.py:24] mean per token cost: 0.172888190729092 ms -DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=222675924948564142134540015902361335341, time:1750766663.1987753s req_ids:[400] -DEBUG 06-24 20:04:23 [manager.py:391] -ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 first_token_cost:212.1272087097168ms total_cost_time:212.17036247253418ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5603 prompt_cache_len:5145 prompt_cache_ratio:0.9182580760306979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 -DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:23 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:23 [manager.py:224] router recive req id 8 cost time 0.5107765197753906 s -INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 8 cost time 0.5132813453674316 s -DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=289335265318048660066163943527052332403, time:1750766663.409441s req_ids:[8] -DEBUG 06-24 20:04:23 [manager.py:391] -ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:22 lightllm_req_id:8 first_token_cost:612.8072738647461ms total_cost_time:612.8509044647217ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5645 prompt_cache_len:5151 prompt_cache_ratio:0.912488928255093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 -DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:23 [manager.py:224] router recive req id 400 cost time 0.30922818183898926 s -INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 400 cost time 0.31137776374816895 s -DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=60569115480878674443553201192736048164, time:1750766663.615808s req_ids:[400] -DEBUG 06-24 20:04:23 [manager.py:391] -ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:23 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 first_token_cost:408.801794052124ms total_cost_time:408.846378326416ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5604 prompt_cache_len:5145 prompt_cache_ratio:0.9180942184154176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 -DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:23 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:23 [manager.py:224] router recive req id 8 cost time 0.3077116012573242 s -INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 8 cost time 0.3091452121734619 s -DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=4066682712827294027601391126372753086, time:1750766663.8217094s req_ids:[8] -DEBUG 06-24 20:04:23 [manager.py:391] -ERROR 06-24 20:04:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 first_token_cost:371.9310760498047ms total_cost_time:371.97422981262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5646 prompt_cache_len:5151 prompt_cache_ratio:0.9123273113708821 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 -DEBUG 06-24 20:04:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:23 [manager.py:224] router recive req id 8 cost time 0.10661888122558594 s -INFO 06-24 20:04:23 [manager.py:68] detokenization recv req id 8 cost time 0.10792207717895508 s -DEBUG 06-24 20:04:23 [manager.py:391] Prefill Batch: batch_id=96237076485747580019305067878142776834, time:1750766663.9900763s req_ids:[8] -DEBUG 06-24 20:04:23 [manager.py:391] -ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:8 first_token_cost:158.81991386413574ms total_cost_time:158.86187553405762ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5647 prompt_cache_len:5151 prompt_cache_ratio:0.9121657517265804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 -DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:24 [manager.py:224] router recive req id 400 cost time 0.41042613983154297 s -INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 400 cost time 0.4124715328216553 s -DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=289818615711437329685663686575810306959, time:1750766664.1209888s req_ids:[400] -DEBUG 06-24 20:04:24 [manager.py:391] -ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:23 lightllm_req_id:400 first_token_cost:479.60519790649414ms total_cost_time:479.65025901794434ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5605 prompt_cache_len:5145 prompt_cache_ratio:0.9179304192685103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 -DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:24 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:24 [manager.py:224] router recive req id 8 cost time 0.20775485038757324 s -INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 8 cost time 0.20942115783691406 s -DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=249438425325088372674096711372955382234, time:1750766664.273658s req_ids:[8] -DEBUG 06-24 20:04:24 [manager.py:391] -ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 first_token_cost:294.5408821105957ms total_cost_time:294.583797454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5648 prompt_cache_len:5151 prompt_cache_ratio:0.9120042492917847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 -DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:24 [manager.py:224] router recive req id 400 cost time 0.20707249641418457 s -INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 400 cost time 0.20958900451660156 s -DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=333649231572607279468698784082731831287, time:1750766664.4237874s req_ids:[400] -DEBUG 06-24 20:04:24 [manager.py:391] -ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:04:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 26709.539 tokens/s -DEBUG 06-24 20:04:24 [stats.py:37] Avg prompt tokens throughput: 26700.027 tokens/s -DEBUG 06-24 20:04:24 [stats.py:37] Avg generate tokens throughput: 9.513 tokens/s -INFO 06-24 20:04:24 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 first_token_cost:463.6354446411133ms total_cost_time:463.6814594268799ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5606 prompt_cache_len:5145 prompt_cache_ratio:0.9177666785586871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 -DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:24 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:24 [manager.py:224] router recive req id 8 cost time 0.40807080268859863 s -INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 8 cost time 0.40987658500671387 s -DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=186044466313300840807587836412841909570, time:1750766664.7712786s req_ids:[8] -DEBUG 06-24 20:04:24 [manager.py:391] -ERROR 06-24 20:04:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 first_token_cost:511.3251209259033ms total_cost_time:511.3685131072998ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5649 prompt_cache_len:5151 prompt_cache_ratio:0.9118428040361126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 -DEBUG 06-24 20:04:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:24 [manager.py:224] router recive req id 8 cost time 0.10853338241577148 s -INFO 06-24 20:04:24 [manager.py:68] detokenization recv req id 8 cost time 0.11043643951416016 s -DEBUG 06-24 20:04:24 [manager.py:391] Prefill Batch: batch_id=183803393045587895134373374550283597211, time:1750766664.9759927s req_ids:[8] -DEBUG 06-24 20:04:24 [manager.py:391] -ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:8 first_token_cost:201.8110752105713ms total_cost_time:201.85589790344238ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5650 prompt_cache_len:5151 prompt_cache_ratio:0.9116814159292035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 -DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:25 [manager.py:224] router recive req id 8 cost time 0.10721635818481445 s -INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 8 cost time 0.10920286178588867 s -DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=37519645894131366662978579660104636506, time:1750766665.1972146s req_ids:[8] -DEBUG 06-24 20:04:25 [manager.py:391] -ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 first_token_cost:219.00534629821777ms total_cost_time:219.04563903808594ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:5651 prompt_cache_len:5151 prompt_cache_ratio:0.9115200849407185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 -DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:25 [manager.py:224] router recive req id 400 cost time 0.7219085693359375 s -INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 400 cost time 0.7239117622375488 s -DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=171345870804642682737352943122718561806, time:1750766665.409969s req_ids:[400] -DEBUG 06-24 20:04:25 [manager.py:391] -ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:24 lightllm_req_id:400 first_token_cost:832.0600986480713ms total_cost_time:832.1030139923096ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5607 prompt_cache_len:5145 prompt_cache_ratio:0.9176029962546817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 -DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:25 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:25 [manager.py:224] router recive req id 8 cost time 0.30924534797668457 s -INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 8 cost time 0.3111088275909424 s -DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=193352093618118088583869029643011308453, time:1750766665.6208284s req_ids:[8] -DEBUG 06-24 20:04:25 [manager.py:391] -ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 first_token_cost:418.0893898010254ms total_cost_time:418.14589500427246ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:5652 prompt_cache_len:5151 prompt_cache_ratio:0.9113588110403397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 -DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:25 [manager.py:224] router recive req id 400 cost time 0.3096654415130615 s -INFO 06-24 20:04:25 [manager.py:68] detokenization recv req id 400 cost time 0.31154608726501465 s -DEBUG 06-24 20:04:25 [manager.py:391] Prefill Batch: batch_id=274120091185543058063977585149068597229, time:1750766665.834958s req_ids:[400] -DEBUG 06-24 20:04:25 [manager.py:391] -ERROR 06-24 20:04:25 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:25 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 first_token_cost:426.99575424194336ms total_cost_time:427.04081535339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5608 prompt_cache_len:5145 prompt_cache_ratio:0.9174393723252496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 -DEBUG 06-24 20:04:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:25 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:26 [manager.py:224] router recive req id 8 cost time 0.3108954429626465 s -INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 8 cost time 0.31281447410583496 s -DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=69613368012098856667054515992332491011, time:1750766666.048118s req_ids:[8] -DEBUG 06-24 20:04:26 [manager.py:391] -ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:8 first_token_cost:419.79146003723145ms total_cost_time:419.8341369628906ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5653 prompt_cache_len:5151 prompt_cache_ratio:0.9111975941977711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 -DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:26 [manager.py:224] router recive req id 400 cost time 0.30904102325439453 s -INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 400 cost time 0.31108784675598145 s -DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=102633915022225956462908525721125467616, time:1750766666.2586112s req_ids:[400] -DEBUG 06-24 20:04:26 [manager.py:391] -ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:26 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:25 lightllm_req_id:400 first_token_cost:416.0346984863281ms total_cost_time:416.0771369934082ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5609 prompt_cache_len:5145 prompt_cache_ratio:0.9172758067391692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 -DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:26 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:26 [manager.py:224] router recive req id 8 cost time 0.30986738204956055 s -INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 8 cost time 0.3118159770965576 s -DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=265487944058360102931858283804504114088, time:1750766666.4691482s req_ids:[8] -DEBUG 06-24 20:04:26 [manager.py:391] -ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 first_token_cost:418.54381561279297ms total_cost_time:418.58816146850586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5654 prompt_cache_len:5151 prompt_cache_ratio:0.9110364343827378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 -DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:26 [manager.py:224] router recive req id 400 cost time 0.30727362632751465 s -INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 400 cost time 0.3091881275177002 s -DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=75879430666684743418787605388217885521, time:1750766666.6813915s req_ids:[400] -DEBUG 06-24 20:04:26 [manager.py:391] -ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:26 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 first_token_cost:417.93131828308105ms total_cost_time:417.97423362731934ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5610 prompt_cache_len:5145 prompt_cache_ratio:0.9171122994652406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 -DEBUG 06-24 20:04:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:26 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:26 [manager.py:224] router recive req id 8 cost time 0.30787110328674316 s -INFO 06-24 20:04:26 [manager.py:68] detokenization recv req id 8 cost time 0.3097517490386963 s -DEBUG 06-24 20:04:26 [manager.py:391] Prefill Batch: batch_id=66381436706951621725767110887667211672, time:1750766666.8923821s req_ids:[8] -DEBUG 06-24 20:04:26 [manager.py:391] -ERROR 06-24 20:04:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 first_token_cost:417.3908233642578ms total_cost_time:417.4325466156006ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5655 prompt_cache_len:5151 prompt_cache_ratio:0.9108753315649868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 -DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:27 [manager.py:224] router recive req id 400 cost time 0.512087345123291 s -INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 400 cost time 0.5140905380249023 s -DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=313968677053965986341434509452258267899, time:1750766667.307603s req_ids:[400] -DEBUG 06-24 20:04:27 [manager.py:391] -ERROR 06-24 20:04:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:27 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:400 first_token_cost:623.9781379699707ms total_cost_time:624.0227222442627ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5611 prompt_cache_len:5145 prompt_cache_ratio:0.9169488504722866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 -DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:27 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:27 [manager.py:224] router recive req id 8 cost time 0.5114901065826416 s -INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 8 cost time 0.5134925842285156 s -DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=259692355608096860857597314863558876253, time:1750766667.522568s req_ids:[8] -DEBUG 06-24 20:04:27 [manager.py:391] -ERROR 06-24 20:04:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:26 lightllm_req_id:8 first_token_cost:622.0667362213135ms total_cost_time:622.1106052398682ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5656 prompt_cache_len:5151 prompt_cache_ratio:0.9107142857142857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:8 -DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:27 [manager.py:224] router recive req id 400 cost time 0.3098890781402588 s -INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 400 cost time 0.31192898750305176 s -DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=280832277086313392181954795847818229028, time:1750766667.729502s req_ids:[400] -DEBUG 06-24 20:04:27 [manager.py:391] -ERROR 06-24 20:04:27 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:27 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 first_token_cost:414.203405380249ms total_cost_time:414.2477512359619ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5612 prompt_cache_len:5145 prompt_cache_ratio:0.9167854597291518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 -DEBUG 06-24 20:04:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:27 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:27 [manager.py:224] router recive req id 8 cost time 0.3082160949707031 s -INFO 06-24 20:04:27 [manager.py:68] detokenization recv req id 8 cost time 0.31084251403808594 s -DEBUG 06-24 20:04:27 [manager.py:391] Prefill Batch: batch_id=25547495440991790670954121898604414010, time:1750766667.940811s req_ids:[8] -DEBUG 06-24 20:04:27 [manager.py:391] -ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:8 first_token_cost:414.9432182312012ms total_cost_time:414.98732566833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5657 prompt_cache_len:5151 prompt_cache_ratio:0.9105532968004243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 -DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:28 [manager.py:224] router recive req id 400 cost time 0.30869483947753906 s -INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 400 cost time 0.3102104663848877 s -DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=19325133157743702338532232414436161151, time:1750766668.1507266s req_ids:[400] -DEBUG 06-24 20:04:28 [manager.py:391] -ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:28 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:27 lightllm_req_id:400 first_token_cost:415.47417640686035ms total_cost_time:415.5158996582031ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5613 prompt_cache_len:5145 prompt_cache_ratio:0.9166221272047034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 -DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:28 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:28 [manager.py:224] router recive req id 8 cost time 0.3092689514160156 s -INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 8 cost time 0.31162071228027344 s -DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=274595003761940960646626444977832413736, time:1750766668.3598099s req_ids:[8] -DEBUG 06-24 20:04:28 [manager.py:391] -ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 first_token_cost:414.22581672668457ms total_cost_time:414.26753997802734ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5658 prompt_cache_len:5151 prompt_cache_ratio:0.9103923647932132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 -DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:28 [manager.py:224] router recive req id 400 cost time 0.3091588020324707 s -INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 400 cost time 0.3114774227142334 s -INFO 06-24 20:04:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=17663044513857767370761652076563602295, time:1750766668.5701125s req_ids:[400] -DEBUG 06-24 20:04:28 [manager.py:391] -ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:28 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 first_token_cost:409.98125076293945ms total_cost_time:410.0227355957031ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5614 prompt_cache_len:5145 prompt_cache_ratio:0.9164588528678305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 -DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:28 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:28 [manager.py:224] router recive req id 8 cost time 0.3099038600921631 s -INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 8 cost time 0.3123795986175537 s -DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=50026042330341118691835468528673360440, time:1750766668.775991s req_ids:[8] -DEBUG 06-24 20:04:28 [manager.py:391] -ERROR 06-24 20:04:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 first_token_cost:411.2386703491211ms total_cost_time:411.2820625305176ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5659 prompt_cache_len:5151 prompt_cache_ratio:0.9102314896624846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 -DEBUG 06-24 20:04:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:28 [manager.py:224] router recive req id 400 cost time 0.3096802234649658 s -INFO 06-24 20:04:28 [manager.py:68] detokenization recv req id 400 cost time 0.31191039085388184 s -DEBUG 06-24 20:04:28 [manager.py:391] Prefill Batch: batch_id=152405585892290894040670415658955982272, time:1750766668.9882126s req_ids:[400] -DEBUG 06-24 20:04:28 [manager.py:391] -ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:400 first_token_cost:415.50207138061523ms total_cost_time:415.5445098876953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5615 prompt_cache_len:5145 prompt_cache_ratio:0.9162956366874443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 -DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:29 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:29 [manager.py:224] router recive req id 8 cost time 0.3096752166748047 s -INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 8 cost time 0.31159234046936035 s -DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=146997980909769386210843937019735865025, time:1750766669.1992402s req_ids:[8] -DEBUG 06-24 20:04:29 [manager.py:391] -ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:28 lightllm_req_id:8 first_token_cost:419.7840690612793ms total_cost_time:419.827938079834ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5660 prompt_cache_len:5151 prompt_cache_ratio:0.9100706713780918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 -DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:29 [manager.py:224] router recive req id 400 cost time 0.308474063873291 s -INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 400 cost time 0.310366153717041 s -DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=228195534521292472823714719262961439497, time:1750766669.4134781s req_ids:[400] -DEBUG 06-24 20:04:29 [manager.py:391] -ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 first_token_cost:421.5538501739502ms total_cost_time:421.5981960296631ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5616 prompt_cache_len:5145 prompt_cache_ratio:0.9161324786324786 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 -DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:29 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:29 [manager.py:224] router recive req id 8 cost time 0.30808281898498535 s -INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 8 cost time 0.31003451347351074 s -DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=42054421374652623759736304095637773418, time:1750766669.6245844s req_ids:[8] -DEBUG 06-24 20:04:29 [manager.py:391] -DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 first_token_cost:419.32058334350586ms total_cost_time:419.3615913391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5661 prompt_cache_len:5151 prompt_cache_ratio:0.9099099099099099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 -DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:29 [manager.py:224] router recive req id 400 cost time 0.30889320373535156 s -INFO 06-24 20:04:29 [manager.py:68] detokenization recv req id 400 cost time 0.3107783794403076 s -DEBUG 06-24 20:04:29 [manager.py:391] Prefill Batch: batch_id=108717596022332119038349818396712496673, time:1750766669.836832s req_ids:[400] -DEBUG 06-24 20:04:29 [manager.py:391] -ERROR 06-24 20:04:29 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:29 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 first_token_cost:417.1173572540283ms total_cost_time:417.1617031097412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5617 prompt_cache_len:5145 prompt_cache_ratio:0.9159693786718889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 -DEBUG 06-24 20:04:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:29 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:30 [manager.py:224] router recive req id 8 cost time 0.3091428279876709 s -INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 8 cost time 0.31108808517456055 s -DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=175723662860696045028588059268083472272, time:1750766670.048848s req_ids:[8] -DEBUG 06-24 20:04:30 [manager.py:391] -ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:8 first_token_cost:417.9821014404297ms total_cost_time:418.02525520324707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5662 prompt_cache_len:5151 prompt_cache_ratio:0.9097492052278346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 -DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:30 [manager.py:224] router recive req id 400 cost time 0.30869340896606445 s -INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 400 cost time 0.3109416961669922 s -DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=230019865189014978099274913506664142356, time:1750766670.260391s req_ids:[400] -DEBUG 06-24 20:04:30 [manager.py:391] -ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:29 lightllm_req_id:400 first_token_cost:420.01795768737793ms total_cost_time:420.0613498687744ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5618 prompt_cache_len:5145 prompt_cache_ratio:0.915806336774653 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 -DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:30 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:30 [manager.py:224] router recive req id 8 cost time 0.30882906913757324 s -INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 8 cost time 0.31121158599853516 s -DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=75358273206282626847728868763252732028, time:1750766670.4759884s req_ids:[8] -DEBUG 06-24 20:04:30 [manager.py:391] -ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 first_token_cost:425.137996673584ms total_cost_time:425.18115043640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5663 prompt_cache_len:5151 prompt_cache_ratio:0.9095885573017836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 -DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:30 [manager.py:224] router recive req id 400 cost time 0.30824971199035645 s -INFO 06-24 20:04:30 [manager.py:68] detokenization recv req id 400 cost time 0.31012630462646484 s -DEBUG 06-24 20:04:30 [manager.py:391] Prefill Batch: batch_id=275758295323467180306429924285007725737, time:1750766670.6900518s req_ids:[400] -DEBUG 06-24 20:04:30 [manager.py:391] -ERROR 06-24 20:04:30 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:30 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 first_token_cost:585.8213901519775ms total_cost_time:585.8657360076904ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5619 prompt_cache_len:5145 prompt_cache_ratio:0.9156433529097704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 -DEBUG 06-24 20:04:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:30 [batch.py:51] router release req id 400 -INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10662460327148438 s -INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.1090700626373291 s -DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=245775026616329675552314482109721014078, time:1750766671.0574462s req_ids:[400] -DEBUG 06-24 20:04:31 [manager.py:391] -ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:400 first_token_cost:197.7367401123047ms total_cost_time:197.78084754943848ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5620 prompt_cache_len:5145 prompt_cache_ratio:0.9154804270462633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 -DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:31 [batch.py:51] router release req id 400 -INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10666823387145996 s -INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.10853815078735352 s -DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=104430840096773392534867993462466302863, time:1750766671.2651033s req_ids:[400] -DEBUG 06-24 20:04:31 [manager.py:391] -ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:207.52763748168945ms total_cost_time:207.57246017456055ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5621 prompt_cache_len:5145 prompt_cache_ratio:0.9153175591531756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 -DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:31 [batch.py:51] router release req id 400 -INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10657572746276855 s -INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.10851931571960449 s -DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=101274259892882747630248991049910571147, time:1750766671.4846306s req_ids:[400] -DEBUG 06-24 20:04:31 [manager.py:391] -ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:217.04864501953125ms total_cost_time:217.09179878234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5622 prompt_cache_len:5145 prompt_cache_ratio:0.9151547491995731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 -DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:31 [batch.py:51] router release req id 400 -INFO 06-24 20:04:31 [manager.py:224] router recive req id 400 cost time 0.10630679130554199 s -INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 400 cost time 0.10842442512512207 s -DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=27387733789106417195472477669736267062, time:1750766671.6976998s req_ids:[400] -DEBUG 06-24 20:04:31 [manager.py:391] -ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:31 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:207.4732780456543ms total_cost_time:207.51643180847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5623 prompt_cache_len:5145 prompt_cache_ratio:0.9149919971545438 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 -DEBUG 06-24 20:04:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:31 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:31 [manager.py:224] router recive req id 8 cost time 1.317298412322998 s -INFO 06-24 20:04:31 [manager.py:68] detokenization recv req id 8 cost time 1.31976318359375 s -DEBUG 06-24 20:04:31 [manager.py:391] Prefill Batch: batch_id=88067096955312514899097457428015425714, time:1750766671.9103594s req_ids:[8] -DEBUG 06-24 20:04:31 [manager.py:391] -ERROR 06-24 20:04:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:30 lightllm_req_id:8 first_token_cost:1427.6151657104492ms total_cost_time:1427.657127380371ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5664 prompt_cache_len:5151 prompt_cache_ratio:0.909427966101695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:8 -DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:32 [batch.py:51] router release req id 8 -INFO 06-24 20:04:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.3099067211151123 s -INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.312009334564209 s -DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=210672878257417834276140693577676006786, time:1750766672.1201878s req_ids:[400] -DEBUG 06-24 20:04:32 [manager.py:391] -ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:400 first_token_cost:420.1347827911377ms total_cost_time:420.1769828796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5624 prompt_cache_len:5145 prompt_cache_ratio:0.9148293029871978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 -DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:32 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:32 [manager.py:224] router recive req id 8 cost time 0.3080563545227051 s -INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 8 cost time 0.31051015853881836 s -DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=34684555807152716900325497140577943430, time:1750766672.3331628s req_ids:[8] -DEBUG 06-24 20:04:32 [manager.py:391] -ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:31 lightllm_req_id:8 first_token_cost:418.5612201690674ms total_cost_time:418.60318183898926ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5665 prompt_cache_len:5151 prompt_cache_ratio:0.9092674315975287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:8 -DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.3078746795654297 s -INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.3094809055328369 s -DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=11875430463453379121647450097018435833, time:1750766672.5181153s req_ids:[400] -DEBUG 06-24 20:04:32 [manager.py:391] -ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:375.0605583190918ms total_cost_time:375.1039505004883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5625 prompt_cache_len:5145 prompt_cache_ratio:0.9146666666666666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 -DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:32 [batch.py:51] router release req id 400 -INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.10575532913208008 s -INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.1068563461303711 s -DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=330514215247724816831253146616404161418, time:1750766672.708729s req_ids:[400] -DEBUG 06-24 20:04:32 [manager.py:391] -ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:197.6935863494873ms total_cost_time:197.73626327514648ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5626 prompt_cache_len:5145 prompt_cache_ratio:0.9145040881621045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 -DEBUG 06-24 20:04:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:32 [batch.py:51] router release req id 400 -INFO 06-24 20:04:32 [manager.py:224] router recive req id 400 cost time 0.1054985523223877 s -INFO 06-24 20:04:32 [manager.py:68] detokenization recv req id 400 cost time 0.10739850997924805 s -DEBUG 06-24 20:04:32 [manager.py:391] Prefill Batch: batch_id=312758865148485819698060459175714972626, time:1750766672.9136446s req_ids:[400] -DEBUG 06-24 20:04:32 [manager.py:391] -ERROR 06-24 20:04:32 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:32 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:204.3013572692871ms total_cost_time:204.3447494506836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5627 prompt_cache_len:5145 prompt_cache_ratio:0.914341567442687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 -DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:33 [batch.py:51] router release req id 400 -INFO 06-24 20:04:33 [manager.py:224] router recive req id 400 cost time 0.10535597801208496 s -INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 400 cost time 0.10726690292358398 s -DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=69962802871803665103388021206773155541, time:1750766673.125092s req_ids:[400] -DEBUG 06-24 20:04:33 [manager.py:391] -ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:400 first_token_cost:206.3610553741455ms total_cost_time:206.4042091369629ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5628 prompt_cache_len:5145 prompt_cache_ratio:0.914179104477612 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 -DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:33 [batch.py:51] router release req id 400 -INFO 06-24 20:04:33 [manager.py:224] router recive req id 400 cost time 0.1056208610534668 s -INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 400 cost time 0.10749316215515137 s -DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=313351747199595949105718220789064124413, time:1750766673.3359003s req_ids:[400] -DEBUG 06-24 20:04:33 [manager.py:391] -ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 first_token_cost:224.17807579040527ms total_cost_time:224.22075271606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5629 prompt_cache_len:5145 prompt_cache_ratio:0.9140166992360987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 -DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:33 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:33 [manager.py:224] router recive req id 8 cost time 1.1144230365753174 s -INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 8 cost time 1.1163041591644287 s -DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=260619455572711705380796894834199775779, time:1750766673.5556192s req_ids:[8] -DEBUG 06-24 20:04:33 [manager.py:391] -ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:32 lightllm_req_id:8 first_token_cost:1202.5015354156494ms total_cost_time:1202.544927597046ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5666 prompt_cache_len:5151 prompt_cache_ratio:0.9091069537592658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:8 -DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:33 [manager.py:224] router recive req id 400 cost time 0.4094274044036865 s -INFO 06-24 20:04:33 [manager.py:68] detokenization recv req id 400 cost time 0.4111461639404297 s -DEBUG 06-24 20:04:33 [manager.py:391] Prefill Batch: batch_id=64030824649360195371290145436512644687, time:1750766673.880678s req_ids:[400] -DEBUG 06-24 20:04:33 [manager.py:391] -ERROR 06-24 20:04:33 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:33 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 first_token_cost:509.01246070861816ms total_cost_time:509.05680656433105ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5630 prompt_cache_len:5145 prompt_cache_ratio:0.9138543516873889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 -DEBUG 06-24 20:04:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:33 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.40862011909484863 s -INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.4106321334838867 s -DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=325290205884475920655626869544547932609, time:1750766674.0413604s req_ids:[8] -DEBUG 06-24 20:04:34 [manager.py:391] -ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:8 first_token_cost:482.53345489501953ms total_cost_time:482.5766086578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5667 prompt_cache_len:5151 prompt_cache_ratio:0.9089465325569084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 -DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:34 [manager.py:224] router recive req id 400 cost time 0.2065746784210205 s -INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 400 cost time 0.20849061012268066 s -DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=171744086798174465394960038580020109525, time:1750766674.1996436s req_ids:[400] -DEBUG 06-24 20:04:34 [manager.py:391] -ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:33 lightllm_req_id:400 first_token_cost:308.95519256591797ms total_cost_time:308.99786949157715ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5631 prompt_cache_len:5145 prompt_cache_ratio:0.9136920618007459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 -DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:34 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.20766568183898926 s -INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.20949316024780273 s -DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=89325531173735694157580037301506625719, time:1750766674.3571036s req_ids:[8] -DEBUG 06-24 20:04:34 [manager.py:391] -ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 first_token_cost:284.5907211303711ms total_cost_time:284.6336364746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5668 prompt_cache_len:5151 prompt_cache_ratio:0.9087861679604798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 -DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:34 [manager.py:224] router recive req id 400 cost time 0.20780515670776367 s -INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 400 cost time 0.20971226692199707 s -DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=49606304116919569563348578611173658801, time:1750766674.4865568s req_ids:[400] -DEBUG 06-24 20:04:34 [manager.py:391] -ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 first_token_cost:277.24432945251465ms total_cost_time:277.28843688964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5632 prompt_cache_len:5145 prompt_cache_ratio:0.9135298295454546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 -DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:34 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.2059001922607422 s -INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.20777082443237305 s -DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=265866957393977817062778671378972033982, time:1750766674.6404688s req_ids:[8] -DEBUG 06-24 20:04:34 [manager.py:391] -ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:04:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 26365.262 tokens/s -DEBUG 06-24 20:04:34 [stats.py:37] Avg prompt tokens throughput: 26355.911 tokens/s -DEBUG 06-24 20:04:34 [stats.py:37] Avg generate tokens throughput: 9.351 tokens/s -INFO 06-24 20:04:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 first_token_cost:296.97728157043457ms total_cost_time:297.01948165893555ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5669 prompt_cache_len:5151 prompt_cache_ratio:0.9086258599400246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 -DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:34 [manager.py:224] router recive req id 400 cost time 0.20730829238891602 s -INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 400 cost time 0.2088913917541504 s -DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=246618846592341577453872614353058456766, time:1750766674.7658603s req_ids:[400] -DEBUG 06-24 20:04:34 [manager.py:391] -ERROR 06-24 20:04:34 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:34 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 first_token_cost:255.13339042663574ms total_cost_time:255.17511367797852ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5633 prompt_cache_len:5145 prompt_cache_ratio:0.913367654890822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 -DEBUG 06-24 20:04:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:34 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:34 [manager.py:224] router recive req id 8 cost time 0.20806622505187988 s -INFO 06-24 20:04:34 [manager.py:68] detokenization recv req id 8 cost time 0.20998001098632812 s -DEBUG 06-24 20:04:34 [manager.py:391] Prefill Batch: batch_id=266033674225234743379494724366949788510, time:1750766674.928068s req_ids:[8] -DEBUG 06-24 20:04:34 [manager.py:391] -ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:8 first_token_cost:301.3298511505127ms total_cost_time:301.3739585876465ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5670 prompt_cache_len:5151 prompt_cache_ratio:0.9084656084656084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 -DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:35 [manager.py:224] router recive req id 8 cost time 0.10692334175109863 s -INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 8 cost time 0.10894179344177246 s -DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=127333895195900833682563255736194010035, time:1750766675.134944s req_ids:[8] -DEBUG 06-24 20:04:35 [manager.py:391] -ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 first_token_cost:202.5163173675537ms total_cost_time:202.5587558746338ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5671 prompt_cache_len:5151 prompt_cache_ratio:0.908305413507318 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 -DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:35 [manager.py:224] router recive req id 400 cost time 0.5143365859985352 s -INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 400 cost time 0.5163264274597168 s -DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=89562770169016691265677828974921242445, time:1750766675.344586s req_ids:[400] -DEBUG 06-24 20:04:35 [manager.py:391] -ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:34 lightllm_req_id:400 first_token_cost:615.959882736206ms total_cost_time:616.0030364990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5634 prompt_cache_len:5145 prompt_cache_ratio:0.9132055378061767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 -DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:35 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:35 [manager.py:224] router recive req id 8 cost time 0.31000399589538574 s -INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 8 cost time 0.31206798553466797 s -DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=164887647849996819659555358266871580144, time:1750766675.5574384s req_ids:[8] -DEBUG 06-24 20:04:35 [manager.py:391] -ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 first_token_cost:417.73152351379395ms total_cost_time:417.77491569519043ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5672 prompt_cache_len:5151 prompt_cache_ratio:0.9081452750352609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 -DEBUG 06-24 20:04:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:35 [manager.py:224] router recive req id 400 cost time 0.3086886405944824 s -INFO 06-24 20:04:35 [manager.py:68] detokenization recv req id 400 cost time 0.3106675148010254 s -DEBUG 06-24 20:04:35 [manager.py:391] Prefill Batch: batch_id=79571776836947761431111974952328200580, time:1750766675.768938s req_ids:[400] -DEBUG 06-24 20:04:35 [manager.py:391] -ERROR 06-24 20:04:35 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:35 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 first_token_cost:418.09749603271484ms total_cost_time:418.14184188842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5635 prompt_cache_len:5145 prompt_cache_ratio:0.9130434782608695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 -DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:36 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:36 [manager.py:224] router recive req id 8 cost time 0.511737585067749 s -INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 8 cost time 0.5138809680938721 s -DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=2162508810407588903125049595172361574, time:1750766676.1846812s req_ids:[8] -DEBUG 06-24 20:04:36 [manager.py:391] -ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:8 first_token_cost:627.1755695343018ms total_cost_time:627.2196769714355ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5673 prompt_cache_len:5151 prompt_cache_ratio:0.9079851930195664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:8 -DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.510279655456543 s -INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.5121898651123047 s -DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=254655526889719640650951190293082509633, time:1750766676.3686175s req_ids:[400] -DEBUG 06-24 20:04:36 [manager.py:391] -ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:35 lightllm_req_id:400 first_token_cost:578.1567096710205ms total_cost_time:578.1996250152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5636 prompt_cache_len:5145 prompt_cache_ratio:0.9128814762242725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 -DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:36 [batch.py:51] router release req id 400 -INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.10657835006713867 s -INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.1085822582244873 s -DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=221232665752490468542733048316117506841, time:1750766676.5676017s req_ids:[400] -DEBUG 06-24 20:04:36 [manager.py:391] -ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 first_token_cost:210.08896827697754ms total_cost_time:210.13188362121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5637 prompt_cache_len:5145 prompt_cache_ratio:0.9127195316657797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 -DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:36 [batch.py:51] router release req id 400 -INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.10637378692626953 s -INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.10827088356018066 s -DEBUG 06-24 20:04:36 [manager.py:391] Prefill Batch: batch_id=286724357129523429998803644379594978912, time:1750766676.786888s req_ids:[400] -DEBUG 06-24 20:04:36 [manager.py:391] -ERROR 06-24 20:04:36 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:36 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 first_token_cost:216.97258949279785ms total_cost_time:217.01717376708984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5638 prompt_cache_len:5145 prompt_cache_ratio:0.9125576445548067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 -DEBUG 06-24 20:04:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:36 [batch.py:51] router release req id 400 -INFO 06-24 20:04:36 [manager.py:224] router recive req id 400 cost time 0.10560345649719238 s -INFO 06-24 20:04:36 [manager.py:68] detokenization recv req id 400 cost time 0.10782718658447266 s -DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=191525821003922652859320717804982534719, time:1750766677.0100248s req_ids:[400] -DEBUG 06-24 20:04:37 [manager.py:391] -ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:400 first_token_cost:216.31097793579102ms total_cost_time:216.3550853729248ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5639 prompt_cache_len:5145 prompt_cache_ratio:0.9123958148607909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 -DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:37 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:37 [manager.py:224] router recive req id 8 cost time 0.9140686988830566 s -INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 8 cost time 0.9160575866699219 s -DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=163907820373443062416644321794240740359, time:1750766677.2219796s req_ids:[8] -DEBUG 06-24 20:04:37 [manager.py:391] -ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:36 lightllm_req_id:8 first_token_cost:1024.9671936035156ms total_cost_time:1025.0091552734375ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5674 prompt_cache_len:5151 prompt_cache_ratio:0.9078251674303842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 -DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:37 [manager.py:224] router recive req id 400 cost time 0.30860328674316406 s -INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 400 cost time 0.31053948402404785 s -DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=319396226296364540450281355798139834019, time:1750766677.4376144s req_ids:[400] -DEBUG 06-24 20:04:37 [manager.py:391] -ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 first_token_cost:421.6318130493164ms total_cost_time:421.6747283935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5640 prompt_cache_len:5145 prompt_cache_ratio:0.9122340425531915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 -DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:37 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:37 [manager.py:224] router recive req id 8 cost time 0.3094217777252197 s -INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 8 cost time 0.31142520904541016 s -DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=281282747343354252946288018410869502683, time:1750766677.6488605s req_ids:[8] -DEBUG 06-24 20:04:37 [manager.py:391] -ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 first_token_cost:424.4091510772705ms total_cost_time:424.4527816772461ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5675 prompt_cache_len:5151 prompt_cache_ratio:0.9076651982378855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 -DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:37 [manager.py:224] router recive req id 400 cost time 0.30932164192199707 s -INFO 06-24 20:04:37 [manager.py:68] detokenization recv req id 400 cost time 0.3112781047821045 s -DEBUG 06-24 20:04:37 [manager.py:391] Prefill Batch: batch_id=121468439610544082702813010234779980892, time:1750766677.8607125s req_ids:[400] -DEBUG 06-24 20:04:37 [manager.py:391] -ERROR 06-24 20:04:37 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:37 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 first_token_cost:418.63322257995605ms total_cost_time:418.67709159851074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5641 prompt_cache_len:5145 prompt_cache_ratio:0.9120723276014892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 -DEBUG 06-24 20:04:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:37 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:38 [manager.py:224] router recive req id 8 cost time 0.30885982513427734 s -INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 8 cost time 0.31072378158569336 s -DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=161802575735781331894724396562522092801, time:1750766678.0751016s req_ids:[8] -DEBUG 06-24 20:04:38 [manager.py:391] -ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:8 first_token_cost:419.76022720336914ms total_cost_time:419.8031425476074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5676 prompt_cache_len:5151 prompt_cache_ratio:0.9075052854122622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 -DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:38 [manager.py:224] router recive req id 400 cost time 0.3077247142791748 s -INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 400 cost time 0.3096005916595459 s -DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=198951561416077516317407409602318125521, time:1750766678.2862713s req_ids:[400] -DEBUG 06-24 20:04:38 [manager.py:391] -ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:38 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:37 lightllm_req_id:400 first_token_cost:420.4702377319336ms total_cost_time:420.5131530761719ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5642 prompt_cache_len:5145 prompt_cache_ratio:0.9119106699751861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:400 -DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:38 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:38 [manager.py:224] router recive req id 8 cost time 0.30901241302490234 s -INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 8 cost time 0.3110020160675049 s -DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=229601218436893746002106774031571183260, time:1750766678.5000477s req_ids:[8] -DEBUG 06-24 20:04:38 [manager.py:391] -ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 first_token_cost:586.0247611999512ms total_cost_time:586.068868637085ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5677 prompt_cache_len:5151 prompt_cache_ratio:0.9073454289237273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 -DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:38 [manager.py:224] router recive req id 8 cost time 0.10721111297607422 s -INFO 06-24 20:04:38 [manager.py:68] detokenization recv req id 8 cost time 0.10932254791259766 s -DEBUG 06-24 20:04:38 [manager.py:391] Prefill Batch: batch_id=163240519829322029400849542008585715736, time:1750766678.8683822s req_ids:[8] -DEBUG 06-24 20:04:38 [manager.py:391] -ERROR 06-24 20:04:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 first_token_cost:195.4202651977539ms total_cost_time:195.46127319335938ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5678 prompt_cache_len:5151 prompt_cache_ratio:0.907185628742515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 -DEBUG 06-24 20:04:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:39 [manager.py:224] router recive req id 8 cost time 0.1069481372833252 s -INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 8 cost time 0.1097860336303711 s -DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=199709813388192927801921418909868392749, time:1750766679.0771613s req_ids:[8] -DEBUG 06-24 20:04:39 [manager.py:391] -ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:8 first_token_cost:206.33435249328613ms total_cost_time:206.38227462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:5679 prompt_cache_len:5151 prompt_cache_ratio:0.9070258848388801 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 -DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:39 [manager.py:224] router recive req id 8 cost time 0.10710477828979492 s -INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 8 cost time 0.10860323905944824 s -DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=184115028979038260271768415064738473035, time:1750766679.2982056s req_ids:[8] -DEBUG 06-24 20:04:39 [manager.py:391] -ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 first_token_cost:217.01693534851074ms total_cost_time:217.0734405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:5680 prompt_cache_len:5151 prompt_cache_ratio:0.9068661971830986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 -DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:39 [manager.py:224] router recive req id 400 cost time 1.1173467636108398 s -INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 400 cost time 1.119295597076416 s -DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=76890548786841819057424085917244917185, time:1750766679.5088637s req_ids:[400] -DEBUG 06-24 20:04:39 [manager.py:391] -DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:39 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:38 lightllm_req_id:400 first_token_cost:1217.9288864135742ms total_cost_time:1217.9768085479736ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:5643 prompt_cache_len:5145 prompt_cache_ratio:0.9117490696438065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:400 -DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:39 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:39 [manager.py:224] router recive req id 8 cost time 0.3089413642883301 s -INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 8 cost time 0.3108973503112793 s -DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=238816029726489064559650873776002167569, time:1750766679.7193406s req_ids:[8] -DEBUG 06-24 20:04:39 [manager.py:391] -ERROR 06-24 20:04:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 first_token_cost:411.9548797607422ms total_cost_time:412.01281547546387ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5681 prompt_cache_len:5151 prompt_cache_ratio:0.9067065657454674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 -DEBUG 06-24 20:04:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:39 [manager.py:224] router recive req id 400 cost time 0.3084983825683594 s -INFO 06-24 20:04:39 [manager.py:68] detokenization recv req id 400 cost time 0.31043124198913574 s -DEBUG 06-24 20:04:39 [manager.py:391] Prefill Batch: batch_id=198439397561961686508740764038122291163, time:1750766679.9275932s req_ids:[400] -DEBUG 06-24 20:04:39 [manager.py:391] -ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:400 first_token_cost:413.56778144836426ms total_cost_time:413.6159420013428ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:5644 prompt_cache_len:5145 prompt_cache_ratio:0.9115875265768958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 -DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:40 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:40 [manager.py:224] router recive req id 8 cost time 0.30963945388793945 s -INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 8 cost time 0.3118703365325928 s -DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=284451545658476670028795054388903790287, time:1750766680.1369154s req_ids:[8] -DEBUG 06-24 20:04:40 [manager.py:391] -ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:39 lightllm_req_id:8 first_token_cost:414.442777633667ms total_cost_time:414.5050048828125ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:5682 prompt_cache_len:5151 prompt_cache_ratio:0.9065469904963042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 -DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:40 [manager.py:224] router recive req id 400 cost time 0.3061957359313965 s -INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 400 cost time 0.3081645965576172 s -DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=49281792639441259715908606114685642403, time:1750766680.3554153s req_ids:[400] -DEBUG 06-24 20:04:40 [manager.py:391] -ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 first_token_cost:420.5350875854492ms total_cost_time:420.5927848815918ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:5645 prompt_cache_len:5145 prompt_cache_ratio:0.9114260407440212 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 -DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:40 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:40 [manager.py:224] router recive req id 8 cost time 0.308119535446167 s -INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 8 cost time 0.3102080821990967 s -DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=77537507892950804258424326965689820137, time:1750766680.5689583s req_ids:[8] -DEBUG 06-24 20:04:40 [manager.py:391] -ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 first_token_cost:423.74539375305176ms total_cost_time:423.80261421203613ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5683 prompt_cache_len:5151 prompt_cache_ratio:0.9063874714059476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 -DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:40 [manager.py:224] router recive req id 400 cost time 0.3093605041503906 s -INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 400 cost time 0.3114025592803955 s -DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=231077214220768226158390882545871859983, time:1750766680.7764683s req_ids:[400] -DEBUG 06-24 20:04:40 [manager.py:391] -ERROR 06-24 20:04:40 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:40 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 first_token_cost:412.51087188720703ms total_cost_time:412.57166862487793ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:5646 prompt_cache_len:5145 prompt_cache_ratio:0.9112646121147715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 -DEBUG 06-24 20:04:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:40 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:40 [manager.py:224] router recive req id 8 cost time 0.3090543746948242 s -INFO 06-24 20:04:40 [manager.py:68] detokenization recv req id 8 cost time 0.3110494613647461 s -DEBUG 06-24 20:04:40 [manager.py:391] Prefill Batch: batch_id=213257354288457420099931071678105665424, time:1750766680.984003s req_ids:[8] -DEBUG 06-24 20:04:40 [manager.py:391] -ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:8 first_token_cost:410.5720520019531ms total_cost_time:410.6316566467285ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5684 prompt_cache_len:5151 prompt_cache_ratio:0.9062280084447573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 -DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:41 [manager.py:224] router recive req id 400 cost time 0.3093991279602051 s -INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 400 cost time 0.3113586902618408 s -DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=314352178399198090909360555160418580868, time:1750766681.1980062s req_ids:[400] -DEBUG 06-24 20:04:41 [manager.py:391] -ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:40 lightllm_req_id:400 first_token_cost:417.0494079589844ms total_cost_time:417.10925102233887ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5647 prompt_cache_len:5145 prompt_cache_ratio:0.9111032406587568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 -DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:41 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:41 [manager.py:224] router recive req id 8 cost time 0.30893898010253906 s -INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 8 cost time 0.3108949661254883 s -DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=65949083496933556795388735210093816087, time:1750766681.4106402s req_ids:[8] -DEBUG 06-24 20:04:41 [manager.py:391] -ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 first_token_cost:418.95580291748047ms total_cost_time:419.01636123657227ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5685 prompt_cache_len:5151 prompt_cache_ratio:0.9060686015831134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 -DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:41 [manager.py:224] router recive req id 400 cost time 0.3096637725830078 s -INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 400 cost time 0.3117501735687256 s -DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=177520096979157414252634423009530821455, time:1750766681.621189s req_ids:[400] -DEBUG 06-24 20:04:41 [manager.py:391] -ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:41 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 first_token_cost:417.65785217285156ms total_cost_time:417.71674156188965ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5648 prompt_cache_len:5145 prompt_cache_ratio:0.9109419263456091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 -DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:41 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:41 [manager.py:224] router recive req id 8 cost time 0.3078150749206543 s -INFO 06-24 20:04:41 [manager.py:68] detokenization recv req id 8 cost time 0.30978965759277344 s -DEBUG 06-24 20:04:41 [manager.py:391] Prefill Batch: batch_id=202632747836797405902521990662265850731, time:1750766681.832451s req_ids:[8] -DEBUG 06-24 20:04:41 [manager.py:391] -ERROR 06-24 20:04:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 first_token_cost:416.51391983032227ms total_cost_time:416.55993461608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5686 prompt_cache_len:5151 prompt_cache_ratio:0.9059092507914175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 -DEBUG 06-24 20:04:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:42 [manager.py:224] router recive req id 400 cost time 0.30810976028442383 s -INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 400 cost time 0.3100616931915283 s -DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=70803459128417048406517131869305373762, time:1750766682.043486s req_ids:[400] -DEBUG 06-24 20:04:42 [manager.py:391] -ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:400 first_token_cost:415.08984565734863ms total_cost_time:415.1475429534912ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:5649 prompt_cache_len:5145 prompt_cache_ratio:0.9107806691449815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 -DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:42 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:42 [manager.py:224] router recive req id 8 cost time 0.511451244354248 s -INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 8 cost time 0.5135776996612549 s -DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=338464011028521707890579194849210524668, time:1750766682.454394s req_ids:[8] -DEBUG 06-24 20:04:42 [manager.py:391] -ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:41 lightllm_req_id:8 first_token_cost:614.6280765533447ms total_cost_time:614.687442779541ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5687 prompt_cache_len:5151 prompt_cache_ratio:0.9057499560400915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 -DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:42 [manager.py:224] router recive req id 400 cost time 0.5124404430389404 s -INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 400 cost time 0.5148177146911621 s -DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=336682961697485079907097793222929059251, time:1750766682.6634202s req_ids:[400] -DEBUG 06-24 20:04:42 [manager.py:391] -ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:42 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 first_token_cost:615.0248050689697ms total_cost_time:615.0851249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:5650 prompt_cache_len:5145 prompt_cache_ratio:0.9106194690265487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 -DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:42 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:42 [manager.py:224] router recive req id 8 cost time 0.30994224548339844 s -DEBUG 06-24 20:04:42 [manager.py:391] Prefill Batch: batch_id=121908194961370419794076150173976525776, time:1750766682.8500907s req_ids:[8] -DEBUG 06-24 20:04:42 [manager.py:391] -INFO 06-24 20:04:42 [manager.py:68] detokenization recv req id 8 cost time 0.3122062683105469 s -ERROR 06-24 20:04:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 first_token_cost:382.2512626647949ms total_cost_time:382.2979927062988ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:5688 prompt_cache_len:5151 prompt_cache_ratio:0.9055907172995781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 -DEBUG 06-24 20:04:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.10645484924316406 s -INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.10883474349975586 s -DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=330111146981880258855633662505339619234, time:1750766683.0493422s req_ids:[8] -DEBUG 06-24 20:04:43 [manager.py:391] -ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:8 first_token_cost:200.64043998718262ms total_cost_time:200.6993293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5689 prompt_cache_len:5151 prompt_cache_ratio:0.905431534540341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 -DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.10669779777526855 s -INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.10876584053039551 s -DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=206154178136398720364593122611215617596, time:1750766683.2587278s req_ids:[8] -DEBUG 06-24 20:04:43 [manager.py:391] -ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:205.4452896118164ms total_cost_time:205.4886817932129ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5690 prompt_cache_len:5151 prompt_cache_ratio:0.9052724077328647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 -DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s -INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.10959935188293457 s -DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=207252447716160850565952498543545064374, time:1750766683.4787369s req_ids:[8] -DEBUG 06-24 20:04:43 [manager.py:391] -ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:219.04635429382324ms total_cost_time:219.09117698669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5691 prompt_cache_len:5151 prompt_cache_ratio:0.9051133368476542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 -DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:43 [manager.py:224] router recive req id 400 cost time 0.9144036769866943 s -INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 400 cost time 0.9162411689758301 s -DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=213672650856280428828660864356281812103, time:1750766683.6936774s req_ids:[400] -DEBUG 06-24 20:04:43 [manager.py:391] -ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:43 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:42 lightllm_req_id:400 first_token_cost:1021.101713180542ms total_cost_time:1021.1467742919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5651 prompt_cache_len:5145 prompt_cache_ratio:0.9104583259600071 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:400 -DEBUG 06-24 20:04:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:43 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:43 [manager.py:224] router recive req id 8 cost time 0.30865025520324707 s -INFO 06-24 20:04:43 [manager.py:68] detokenization recv req id 8 cost time 0.31075358390808105 s -DEBUG 06-24 20:04:43 [manager.py:391] Prefill Batch: batch_id=163301353999026199169867054189724113463, time:1750766683.8990397s req_ids:[8] -DEBUG 06-24 20:04:43 [manager.py:391] -ERROR 06-24 20:04:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:414.55674171447754ms total_cost_time:414.5991802215576ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5692 prompt_cache_len:5151 prompt_cache_ratio:0.9049543218552354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 -DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:44 [manager.py:224] router recive req id 400 cost time 0.30908846855163574 s -INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 400 cost time 0.31117868423461914 s -DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=117612225119529810443846089762043338009, time:1750766684.113066s req_ids:[400] -DEBUG 06-24 20:04:44 [manager.py:391] -ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:44 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:400 first_token_cost:417.2031879425049ms total_cost_time:417.24681854248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5652 prompt_cache_len:5145 prompt_cache_ratio:0.9102972399150743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:400 -DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:44 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.3089888095855713 s -INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.31087708473205566 s -DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=218431612304564700257402266090414565888, time:1750766684.300993s req_ids:[8] -DEBUG 06-24 20:04:44 [manager.py:391] -ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:43 lightllm_req_id:8 first_token_cost:384.31239128112793ms total_cost_time:384.3567371368408ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5693 prompt_cache_len:5151 prompt_cache_ratio:0.9047953627261549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 -DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.10764265060424805 s -INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.10977363586425781 s -DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=110106381893062816804621591280419638100, time:1750766684.507227s req_ids:[8] -DEBUG 06-24 20:04:44 [manager.py:391] -ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 first_token_cost:209.51199531555176ms total_cost_time:209.55514907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5694 prompt_cache_len:5151 prompt_cache_ratio:0.90463645943098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 -DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.10616922378540039 s -INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.10811662673950195 s -DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=326597459932964243164385754977380249797, time:1750766684.7166786s req_ids:[8] -DEBUG 06-24 20:04:44 [manager.py:391] -DEBUG 06-24 20:04:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 26021.774 tokens/s -DEBUG 06-24 20:04:44 [stats.py:37] Avg prompt tokens throughput: 26012.690 tokens/s -DEBUG 06-24 20:04:44 [stats.py:37] Avg generate tokens throughput: 9.084 tokens/s -ERROR 06-24 20:04:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 first_token_cost:204.63800430297852ms total_cost_time:204.6821117401123ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5695 prompt_cache_len:5151 prompt_cache_ratio:0.9044776119402985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 -DEBUG 06-24 20:04:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:44 [manager.py:224] router recive req id 8 cost time 0.10760092735290527 s -INFO 06-24 20:04:44 [manager.py:68] detokenization recv req id 8 cost time 0.10941934585571289 s -DEBUG 06-24 20:04:44 [manager.py:391] Prefill Batch: batch_id=10078288841663145562204914900154846088, time:1750766684.9391642s req_ids:[8] -DEBUG 06-24 20:04:44 [manager.py:391] -ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:8 first_token_cost:220.09539604187012ms total_cost_time:220.1399803161621ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5696 prompt_cache_len:5151 prompt_cache_ratio:0.9043188202247191 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 -DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:45 [manager.py:224] router recive req id 400 cost time 0.9152262210845947 s -INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 400 cost time 0.9171676635742188 s -DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=163663124050652963259814320452496553198, time:1750766685.1184983s req_ids:[400] -DEBUG 06-24 20:04:45 [manager.py:391] -ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:44 lightllm_req_id:400 first_token_cost:1149.8684883117676ms total_cost_time:1149.9123573303223ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5653 prompt_cache_len:5145 prompt_cache_ratio:0.9101362108614894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 -DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:45 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:45 [manager.py:224] router recive req id 8 cost time 0.40941333770751953 s -INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 8 cost time 0.4111747741699219 s -DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=175437420378587792969565869366659016119, time:1750766685.4380581s req_ids:[8] -DEBUG 06-24 20:04:45 [manager.py:391] -ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 first_token_cost:471.8492031097412ms total_cost_time:471.8914031982422ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5697 prompt_cache_len:5151 prompt_cache_ratio:0.9041600842548709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 -DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:45 [manager.py:224] router recive req id 400 cost time 0.20727205276489258 s -INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 400 cost time 0.20953583717346191 s -DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=172368450072793462988911448962309576158, time:1750766685.5909636s req_ids:[400] -DEBUG 06-24 20:04:45 [manager.py:391] -ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 first_token_cost:299.3342876434326ms total_cost_time:299.3783950805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5654 prompt_cache_len:5145 prompt_cache_ratio:0.9099752387690131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 -DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:45 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:45 [manager.py:224] router recive req id 8 cost time 0.20816802978515625 s -INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 8 cost time 0.21016597747802734 s -DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=216661643526117133661916650847868086868, time:1750766685.7196925s req_ids:[8] -DEBUG 06-24 20:04:45 [manager.py:391] -ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 first_token_cost:257.5211524963379ms total_cost_time:257.56263732910156ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5698 prompt_cache_len:5151 prompt_cache_ratio:0.904001404001404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 -DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:45 [manager.py:224] router recive req id 400 cost time 0.20802879333496094 s -INFO 06-24 20:04:45 [manager.py:68] detokenization recv req id 400 cost time 0.21039342880249023 s -DEBUG 06-24 20:04:45 [manager.py:391] Prefill Batch: batch_id=246863256532932488438424654702580039988, time:1750766685.8817806s req_ids:[400] -DEBUG 06-24 20:04:45 [manager.py:391] -ERROR 06-24 20:04:45 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:45 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 first_token_cost:289.54362869262695ms total_cost_time:289.58678245544434ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5655 prompt_cache_len:5145 prompt_cache_ratio:0.9098143236074271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 -DEBUG 06-24 20:04:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:45 [batch.py:51] router release req id 400 -INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.10634613037109375 s -INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.10883140563964844 s -DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=24651384845974106098525625217995412876, time:1750766686.0769413s req_ids:[400] -DEBUG 06-24 20:04:46 [manager.py:391] -ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:400 first_token_cost:199.64361190795898ms total_cost_time:199.68652725219727ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5656 prompt_cache_len:5145 prompt_cache_ratio:0.9096534653465347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 -DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:46 [batch.py:51] router release req id 400 -INFO 06-24 20:04:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.10564398765563965 s -INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.10811328887939453 s -DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=144159961740778618288810797570851666246, time:1750766686.283774s req_ids:[400] -DEBUG 06-24 20:04:46 [manager.py:391] -INFO 06-24 20:04:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:201.37524604797363ms total_cost_time:201.4179229736328ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5657 prompt_cache_len:5145 prompt_cache_ratio:0.9094926639561605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 -DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:46 [batch.py:51] router release req id 400 -INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.10633087158203125 s -INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.10842418670654297 s -DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=17284165915086370021538909296821719237, time:1750766686.4925659s req_ids:[400] -DEBUG 06-24 20:04:46 [manager.py:391] -ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:204.15425300598145ms total_cost_time:204.19788360595703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5658 prompt_cache_len:5145 prompt_cache_ratio:0.9093319194061505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 -DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:46 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:46 [manager.py:224] router recive req id 8 cost time 0.9133801460266113 s -INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 8 cost time 0.9158332347869873 s -DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=122817067944151802617181614890454090499, time:1750766686.701242s req_ids:[8] -DEBUG 06-24 20:04:46 [manager.py:391] -ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:45 lightllm_req_id:8 first_token_cost:1015.7935619354248ms total_cost_time:1015.836238861084ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5699 prompt_cache_len:5151 prompt_cache_ratio:0.9038427794349886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:8 -DEBUG 06-24 20:04:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:46 [manager.py:224] router recive req id 400 cost time 0.3097188472747803 s -INFO 06-24 20:04:46 [manager.py:68] detokenization recv req id 400 cost time 0.3121352195739746 s -DEBUG 06-24 20:04:46 [manager.py:391] Prefill Batch: batch_id=329089550415523590411374788682737839562, time:1750766686.91025s req_ids:[400] -DEBUG 06-24 20:04:46 [manager.py:391] -ERROR 06-24 20:04:46 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:46 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:410.7358455657959ms total_cost_time:410.7794761657715ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5659 prompt_cache_len:5145 prompt_cache_ratio:0.9091712316663721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 -DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:47 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:47 [manager.py:224] router recive req id 8 cost time 0.30860233306884766 s -INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 8 cost time 0.31087613105773926 s -DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=232273989438799037360249230173836104102, time:1750766687.115208s req_ids:[8] -DEBUG 06-24 20:04:47 [manager.py:391] -ERROR 06-24 20:04:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:8 first_token_cost:407.0768356323242ms total_cost_time:407.1202278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5700 prompt_cache_len:5151 prompt_cache_ratio:0.9036842105263158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 -DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:47 [manager.py:224] router recive req id 400 cost time 0.3076043128967285 s -INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 400 cost time 0.3100552558898926 s -DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=265496327611751902510110317473881812156, time:1750766687.3257086s req_ids:[400] -DEBUG 06-24 20:04:47 [manager.py:391] -ERROR 06-24 20:04:47 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:47 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:46 lightllm_req_id:400 first_token_cost:411.834716796875ms total_cost_time:411.8795394897461ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5660 prompt_cache_len:5145 prompt_cache_ratio:0.9090106007067138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:400 -DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:47 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:47 [manager.py:224] router recive req id 8 cost time 0.30820298194885254 s -INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 8 cost time 0.3107006549835205 s -DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=183049706644804370980583090330969853270, time:1750766687.5458608s req_ids:[8] -DEBUG 06-24 20:04:47 [manager.py:391] -ERROR 06-24 20:04:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 first_token_cost:626.4033317565918ms total_cost_time:626.446008682251ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5701 prompt_cache_len:5151 prompt_cache_ratio:0.9035256972460972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 -DEBUG 06-24 20:04:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:47 [manager.py:224] router recive req id 400 cost time 0.5088727474212646 s -INFO 06-24 20:04:47 [manager.py:68] detokenization recv req id 400 cost time 0.511070728302002 s -DEBUG 06-24 20:04:47 [manager.py:391] Prefill Batch: batch_id=98250522113964867449966060801872254665, time:1750766687.9506626s req_ids:[400] -DEBUG 06-24 20:04:47 [manager.py:391] -DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:400 first_token_cost:602.9376983642578ms total_cost_time:602.9801368713379ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5661 prompt_cache_len:5145 prompt_cache_ratio:0.9088500264970854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 -DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:48 [batch.py:51] router release req id 400 -INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.10689663887023926 s -INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.10939717292785645 s -DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=182089974327337710747881807405631920038, time:1750766688.1398792s req_ids:[400] -DEBUG 06-24 20:04:48 [manager.py:391] -ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:196.55108451843262ms total_cost_time:196.5944766998291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5662 prompt_cache_len:5145 prompt_cache_ratio:0.9086895090074179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 -DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:48 [batch.py:51] router release req id 400 -INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.10655665397644043 s -INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.10905027389526367 s -DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=215010597675904505882197426979035769356, time:1750766688.346191s req_ids:[400] -DEBUG 06-24 20:04:48 [manager.py:391] -ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:204.91957664489746ms total_cost_time:204.96225357055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5663 prompt_cache_len:5145 prompt_cache_ratio:0.9085290482076638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 -DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:48 [batch.py:51] router release req id 400 -INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.10645341873168945 s -INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.10881233215332031 s -DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=205277262191933819866665744085576678260, time:1750766688.5641122s req_ids:[400] -DEBUG 06-24 20:04:48 [manager.py:391] -ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:48 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:216.02320671081543ms total_cost_time:216.06802940368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5664 prompt_cache_len:5145 prompt_cache_ratio:0.9083686440677966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 -DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:48 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:48 [manager.py:224] router recive req id 8 cost time 0.9128780364990234 s -INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 8 cost time 0.9151980876922607 s -DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=227812101927005582835809075482377489280, time:1750766688.7788894s req_ids:[8] -DEBUG 06-24 20:04:48 [manager.py:391] -ERROR 06-24 20:04:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:47 lightllm_req_id:8 first_token_cost:1024.5492458343506ms total_cost_time:1024.5928764343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5702 prompt_cache_len:5151 prompt_cache_ratio:0.9033672395650649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:8 -DEBUG 06-24 20:04:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:48 [manager.py:224] router recive req id 400 cost time 0.3083226680755615 s -INFO 06-24 20:04:48 [manager.py:68] detokenization recv req id 400 cost time 0.31069517135620117 s -DEBUG 06-24 20:04:48 [manager.py:391] Prefill Batch: batch_id=148516326123153400229336644511095299264, time:1750766688.9903343s req_ids:[400] -DEBUG 06-24 20:04:48 [manager.py:391] -ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:49 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:400 first_token_cost:420.60184478759766ms total_cost_time:420.64666748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5665 prompt_cache_len:5145 prompt_cache_ratio:0.9082082965578111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:400 -DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:49 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.3084142208099365 s -INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.31072425842285156 s -DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=283983126973254979148276198952788970976, time:1750766689.1771278s req_ids:[8] -DEBUG 06-24 20:04:49 [manager.py:391] -ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:48 lightllm_req_id:8 first_token_cost:385.6847286224365ms total_cost_time:385.7271671295166ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5703 prompt_cache_len:5151 prompt_cache_ratio:0.9032088374539716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 -DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10664892196655273 s -INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.10912060737609863 s -DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=29921964045408058613861902179592031636, time:1750766689.3765888s req_ids:[8] -DEBUG 06-24 20:04:49 [manager.py:391] -ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:198.75311851501465ms total_cost_time:198.79531860351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5704 prompt_cache_len:5151 prompt_cache_ratio:0.9030504908835905 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 -DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s -INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s -DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=322576437281389737012380474863019993050, time:1750766689.5845346s req_ids:[8] -DEBUG 06-24 20:04:49 [manager.py:391] -ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:203.94372940063477ms total_cost_time:203.98712158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5705 prompt_cache_len:5151 prompt_cache_ratio:0.9028921998247151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 -DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:49 [batch.py:51] router release req id 8 -INFO 06-24 20:04:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10781621932983398 s -INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.11023902893066406 s -DEBUG 06-24 20:04:49 [manager.py:391] Prefill Batch: batch_id=323816389533592988068489069445212040170, time:1750766689.7937713s req_ids:[8] -DEBUG 06-24 20:04:49 [manager.py:391] -ERROR 06-24 20:04:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:202.4238109588623ms total_cost_time:202.46553421020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5706 prompt_cache_len:5151 prompt_cache_ratio:0.9027339642481599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 -DEBUG 06-24 20:04:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:49 [manager.py:224] router recive req id 8 cost time 0.10784506797790527 s -INFO 06-24 20:04:49 [manager.py:68] detokenization recv req id 8 cost time 0.11037206649780273 s -DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=22622370757039100382496238670709732894, time:1750766690.0020409s req_ids:[8] -DEBUG 06-24 20:04:50 [manager.py:391] -ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:8 first_token_cost:205.70755004882812ms total_cost_time:205.74951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5707 prompt_cache_len:5151 prompt_cache_ratio:0.902575784124759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 -DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:50 [manager.py:224] router recive req id 8 cost time 0.3096926212310791 s -INFO 06-24 20:04:50 [manager.py:68] detokenization recv req id 8 cost time 0.3116261959075928 s -DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=292587818131219856968208427087618715976, time:1750766690.413841s req_ids:[8] -DEBUG 06-24 20:04:50 [manager.py:391] -ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 first_token_cost:412.26673126220703ms total_cost_time:412.3117923736572ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5708 prompt_cache_len:5151 prompt_cache_ratio:0.9024176594253679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 -DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:50 [manager.py:224] router recive req id 400 cost time 1.5242524147033691 s -INFO 06-24 20:04:50 [manager.py:68] detokenization recv req id 400 cost time 1.5255200862884521 s -DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=173907113493410583194997226579373946351, time:1750766690.6311872s req_ids:[400] -DEBUG 06-24 20:04:50 [manager.py:391] -ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:50 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:49 lightllm_req_id:400 first_token_cost:1634.0522766113281ms total_cost_time:1634.0985298156738ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5666 prompt_cache_len:5145 prompt_cache_ratio:0.9080480056477233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:400 -DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:50 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:50 [manager.py:224] router recive req id 8 cost time 0.3071877956390381 s -INFO 06-24 20:04:50 [manager.py:68] detokenization recv req id 8 cost time 0.3091301918029785 s -DEBUG 06-24 20:04:50 [manager.py:391] Prefill Batch: batch_id=33881538930883266712733723143691938371, time:1750766690.8412223s req_ids:[8] -DEBUG 06-24 20:04:50 [manager.py:391] -ERROR 06-24 20:04:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 first_token_cost:417.85669326782227ms total_cost_time:417.89889335632324ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5709 prompt_cache_len:5151 prompt_cache_ratio:0.9022595901208618 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 -DEBUG 06-24 20:04:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.3080763816833496 s -INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.30994510650634766 s -DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=16051916616745021094404272860513880049, time:1750766691.0497572s req_ids:[400] -DEBUG 06-24 20:04:51 [manager.py:391] -ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:400 first_token_cost:402.0876884460449ms total_cost_time:402.1298885345459ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5667 prompt_cache_len:5145 prompt_cache_ratio:0.9078877713075701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 -DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:51 [batch.py:51] router release req id 400 -INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.10568571090698242 s -INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.10769367218017578 s -DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=128854531344332501434149094872965343677, time:1750766691.2456982s req_ids:[400] -DEBUG 06-24 20:04:51 [manager.py:391] -ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:199.27549362182617ms total_cost_time:199.31840896606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5668 prompt_cache_len:5145 prompt_cache_ratio:0.9077275935074101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 -DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:51 [batch.py:51] router release req id 400 -INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.1060330867767334 s -INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.10811138153076172 s -DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=253381936533107567974633391698156350016, time:1750766691.4527936s req_ids:[400] -DEBUG 06-24 20:04:51 [manager.py:391] -ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:203.26495170593262ms total_cost_time:203.3083438873291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5669 prompt_cache_len:5145 prompt_cache_ratio:0.9075674722173223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 -DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:51 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:51 [manager.py:224] router recive req id 8 cost time 0.7141966819763184 s -INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 8 cost time 0.7162535190582275 s -DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=304702094745712667880901013369344024730, time:1750766691.6647267s req_ids:[8] -DEBUG 06-24 20:04:51 [manager.py:391] -ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:50 lightllm_req_id:8 first_token_cost:815.3431415557861ms total_cost_time:815.3870105743408ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5710 prompt_cache_len:5151 prompt_cache_ratio:0.9021015761821366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:8 -DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:51 [manager.py:224] router recive req id 400 cost time 0.30938267707824707 s -INFO 06-24 20:04:51 [manager.py:68] detokenization recv req id 400 cost time 0.3113985061645508 s -DEBUG 06-24 20:04:51 [manager.py:391] Prefill Batch: batch_id=300487454556998765228661300091106802215, time:1750766691.8712232s req_ids:[400] -DEBUG 06-24 20:04:51 [manager.py:391] -ERROR 06-24 20:04:51 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:51 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:415.20166397094727ms total_cost_time:415.2636528015137ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5670 prompt_cache_len:5145 prompt_cache_ratio:0.9074074074074074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 -DEBUG 06-24 20:04:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:51 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.30792999267578125 s -INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.31049609184265137 s -DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=315081947474904341695195417872365038660, time:1750766692.0836608s req_ids:[8] -DEBUG 06-24 20:04:52 [manager.py:391] -ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:8 first_token_cost:413.3622646331787ms total_cost_time:413.4242534637451ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5711 prompt_cache_len:5151 prompt_cache_ratio:0.9019436175801085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 -DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:52 [manager.py:224] router recive req id 400 cost time 0.3092968463897705 s -INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 400 cost time 0.31174230575561523 s -DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=125930803334487563282495153173683742070, time:1750766692.2929304s req_ids:[400] -DEBUG 06-24 20:04:52 [manager.py:391] -ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:52 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:51 lightllm_req_id:400 first_token_cost:415.27557373046875ms total_cost_time:415.33803939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:5671 prompt_cache_len:5145 prompt_cache_ratio:0.907247399047787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:400 -DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:52 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.3092052936553955 s -INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.3114452362060547 s -DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=72472019645874224158999492481589443406, time:1750766692.502862s req_ids:[8] -DEBUG 06-24 20:04:52 [manager.py:391] -ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:375.37074089050293ms total_cost_time:375.4134178161621ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5712 prompt_cache_len:5151 prompt_cache_ratio:0.9017857142857143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 -DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.10750532150268555 s -INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s -DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=192894018930828817714540143392083110720, time:1750766692.6717808s req_ids:[8] -DEBUG 06-24 20:04:52 [manager.py:391] -ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:195.43814659118652ms total_cost_time:195.4793930053711ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5713 prompt_cache_len:5151 prompt_cache_ratio:0.9016278662699108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 -DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:52 [manager.py:224] router recive req id 8 cost time 0.1066884994506836 s -INFO 06-24 20:04:52 [manager.py:68] detokenization recv req id 8 cost time 0.10910654067993164 s -DEBUG 06-24 20:04:52 [manager.py:391] Prefill Batch: batch_id=223283102207699624486799217184405780381, time:1750766692.8780618s req_ids:[8] -DEBUG 06-24 20:04:52 [manager.py:391] -ERROR 06-24 20:04:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:203.70769500732422ms total_cost_time:203.75323295593262ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5714 prompt_cache_len:5151 prompt_cache_ratio:0.9014700735036751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 -DEBUG 06-24 20:04:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10788607597351074 s -INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.11030745506286621 s -DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=205526755471922629579056083789637944359, time:1750766693.086588s req_ids:[8] -DEBUG 06-24 20:04:53 [manager.py:391] -ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:8 first_token_cost:204.00643348693848ms total_cost_time:204.05268669128418ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5715 prompt_cache_len:5151 prompt_cache_ratio:0.9013123359580052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 -INFO 06-24 20:04:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:04:53 [statics_utils.py:24] mean first cost: 246.2682920538222 ms -INFO 06-24 20:04:53 [statics_utils.py:24] mean per token cost: 0.15915296835531711 ms -DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:53 [batch.py:51] router release req id 8 -INFO 06-24 20:04:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10720634460449219 s -INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.10953497886657715 s -DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=320357002696483650559604665863145892017, time:1750766693.2960813s req_ids:[8] -DEBUG 06-24 20:04:53 [manager.py:391] -DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:202.37040519714355ms total_cost_time:202.41379737854004ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5716 prompt_cache_len:5151 prompt_cache_ratio:0.9011546536039188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 -DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10757923126220703 s -INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.1099393367767334 s -DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=216161743593470336928733501896887501067, time:1750766693.504927s req_ids:[8] -DEBUG 06-24 20:04:53 [manager.py:391] -ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:198.20404052734375ms total_cost_time:198.24600219726562ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5717 prompt_cache_len:5151 prompt_cache_ratio:0.900997026412454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 -DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:53 [manager.py:224] router recive req id 8 cost time 0.10776972770690918 s -INFO 06-24 20:04:53 [manager.py:68] detokenization recv req id 8 cost time 0.10998654365539551 s -DEBUG 06-24 20:04:53 [manager.py:391] Prefill Batch: batch_id=189314636555872345955745140823273754267, time:1750766693.708809s req_ids:[8] -DEBUG 06-24 20:04:53 [manager.py:391] -ERROR 06-24 20:04:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:368.9858913421631ms total_cost_time:369.02809143066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5718 prompt_cache_len:5151 prompt_cache_ratio:0.9008394543546695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 -DEBUG 06-24 20:04:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 1.618631362915039 s -INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 1.620178461074829 s -DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=40009490660995977837610130489078534975, time:1750766694.0417767s req_ids:[400] -DEBUG 06-24 20:04:54 [manager.py:391] -ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:52 lightllm_req_id:400 first_token_cost:1714.5402431488037ms total_cost_time:1714.5841121673584ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5672 prompt_cache_len:5145 prompt_cache_ratio:0.9070874471086037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 -DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:54 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:54 [manager.py:224] router recive req id 8 cost time 0.20849370956420898 s -INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 8 cost time 0.21015381813049316 s -DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=253010800630327560619489577778847290846, time:1750766694.1877847s req_ids:[8] -DEBUG 06-24 20:04:54 [manager.py:391] -ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:53 lightllm_req_id:8 first_token_cost:286.01717948913574ms total_cost_time:286.0586643218994ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5719 prompt_cache_len:5151 prompt_cache_ratio:0.9006819374016436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:8 -DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.2096419334411621 s -INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.21126556396484375 s -DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=116271670008784988759190857839146343925, time:1750766694.3468943s req_ids:[400] -DEBUG 06-24 20:04:54 [manager.py:391] -ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:322.7963447570801ms total_cost_time:322.85380363464355ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:5673 prompt_cache_len:5145 prompt_cache_ratio:0.9069275515600211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 -DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:54 [batch.py:51] router release req id 400 -INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.10844874382019043 s -INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.11029458045959473 s -DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=126248704643683339107502075368214882280, time:1750766694.5469823s req_ids:[400] -DEBUG 06-24 20:04:54 [manager.py:391] -ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:183.93754959106445ms total_cost_time:183.98404121398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5674 prompt_cache_len:5145 prompt_cache_ratio:0.9067677123722242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 -DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:54 [batch.py:51] router release req id 400 -INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.10668373107910156 s -INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.10847616195678711 s -DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=198171503447834658918958200783168086027, time:1750766694.7434638s req_ids:[400] -DEBUG 06-24 20:04:54 [manager.py:391] -DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:04:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:04:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 26662.052 tokens/s -DEBUG 06-24 20:04:54 [stats.py:37] Avg prompt tokens throughput: 26652.677 tokens/s -DEBUG 06-24 20:04:54 [stats.py:37] Avg generate tokens throughput: 9.374 tokens/s -ERROR 06-24 20:04:54 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:54 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:202.50892639160156ms total_cost_time:202.55184173583984ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5675 prompt_cache_len:5145 prompt_cache_ratio:0.9066079295154185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 -DEBUG 06-24 20:04:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:54 [batch.py:51] router release req id 400 -INFO 06-24 20:04:54 [manager.py:224] router recive req id 400 cost time 0.10699200630187988 s -INFO 06-24 20:04:54 [manager.py:68] detokenization recv req id 400 cost time 0.10894918441772461 s -DEBUG 06-24 20:04:54 [manager.py:391] Prefill Batch: batch_id=161970145910271087535302473905871669583, time:1750766694.9530463s req_ids:[400] -DEBUG 06-24 20:04:54 [manager.py:391] -ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:400 first_token_cost:205.56020736694336ms total_cost_time:205.6286334991455ms,out_token_counter:1 mean_per_token_cost_time: 0.06842613220214844ms prompt_token_num:5676 prompt_cache_len:5145 prompt_cache_ratio:0.9064482029598309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 -DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:55 [batch.py:51] router release req id 400 -INFO 06-24 20:04:55 [manager.py:224] router recive req id 400 cost time 0.10602784156799316 s -INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 400 cost time 0.10803985595703125 s -DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=65066789377104650516429213773322431548, time:1750766695.1625385s req_ids:[400] -DEBUG 06-24 20:04:55 [manager.py:391] -ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 first_token_cost:202.80861854553223ms total_cost_time:202.8520107269287ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5677 prompt_cache_len:5145 prompt_cache_ratio:0.906288532675709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 -DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:55 [batch.py:51] router release req id 400 -INFO 06-24 20:04:55 [manager.py:224] router recive req id 400 cost time 0.10705208778381348 s -INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 400 cost time 0.10904884338378906 s -DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=176586124287956918208555003790325823595, time:1750766695.3724778s req_ids:[400] -DEBUG 06-24 20:04:55 [manager.py:391] -ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 first_token_cost:204.59651947021484ms total_cost_time:204.63919639587402ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5678 prompt_cache_len:5145 prompt_cache_ratio:0.9061289186333216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 -DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:55 [batch.py:51] router release req id 400 -INFO 06-24 20:04:55 [manager.py:224] router recive req id 400 cost time 0.10549163818359375 s -INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 400 cost time 0.10749077796936035 s -DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=136583679248283343175744675896981640847, time:1750766695.579152s req_ids:[400] -DEBUG 06-24 20:04:55 [manager.py:391] -ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 400 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:55 [manager.py:162] detoken release req id 400 -INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:400 first_token_cost:204.06723022460938ms total_cost_time:204.10966873168945ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5679 prompt_cache_len:5145 prompt_cache_ratio:0.9059693608029583 mtp_avg_token_per_step:1.0 -DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:55 [batch.py:51] router release req id 400 -WARNING 06-24 20:04:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:55 [manager.py:224] router recive req id 8 cost time 1.5194005966186523 s -INFO 06-24 20:04:55 [manager.py:68] detokenization recv req id 8 cost time 1.5213453769683838 s -DEBUG 06-24 20:04:55 [manager.py:391] Prefill Batch: batch_id=15041665363720838538079186944811534888, time:1750766695.7968402s req_ids:[8] -DEBUG 06-24 20:04:55 [manager.py:391] -ERROR 06-24 20:04:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:54 lightllm_req_id:8 first_token_cost:1643.8179016113281ms total_cost_time:1643.8605785369873ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5720 prompt_cache_len:5151 prompt_cache_ratio:0.9005244755244756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:8 -DEBUG 06-24 20:04:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.1068272590637207 s -INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.1088106632232666 s -DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=278347708599964483240077937720713792497, time:1750766696.0390255s req_ids:[8] -DEBUG 06-24 20:04:56 [manager.py:391] -ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:55 lightllm_req_id:8 first_token_cost:202.28290557861328ms total_cost_time:202.32605934143066ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5721 prompt_cache_len:5151 prompt_cache_ratio:0.9003670686942842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 -DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.10782361030578613 s -INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.1096649169921875 s -DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=153324905297797732512861729865462903889, time:1750766696.2310464s req_ids:[8] -DEBUG 06-24 20:04:56 [manager.py:391] -ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:199.63812828063965ms total_cost_time:199.68104362487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5722 prompt_cache_len:5151 prompt_cache_ratio:0.900209716882209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 -DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.10724329948425293 s -INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.10904407501220703 s -DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=305473172064898620442050375000787693269, time:1750766696.429837s req_ids:[8] -DEBUG 06-24 20:04:56 [manager.py:391] -ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:197.07036018371582ms total_cost_time:197.1139907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5723 prompt_cache_len:5151 prompt_cache_ratio:0.9000524200594094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 -DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:56 [batch.py:51] router release req id 8 -INFO 06-24 20:04:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:56 [manager.py:224] router recive req id 8 cost time 0.3091866970062256 s -INFO 06-24 20:04:56 [manager.py:68] detokenization recv req id 8 cost time 0.3110661506652832 s -DEBUG 06-24 20:04:56 [manager.py:391] Prefill Batch: batch_id=20657536978945875873694093644438157362, time:1750766696.841732s req_ids:[8] -DEBUG 06-24 20:04:56 [manager.py:391] -ERROR 06-24 20:04:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:416.89538955688477ms total_cost_time:416.93973541259766ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5724 prompt_cache_len:5151 prompt_cache_ratio:0.899895178197065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 -DEBUG 06-24 20:04:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10754704475402832 s -INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10974979400634766 s -DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=85430479859416578367773771191346731104, time:1750766697.0585027s req_ids:[8] -DEBUG 06-24 20:04:57 [manager.py:391] -ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:56 lightllm_req_id:8 first_token_cost:199.20921325683594ms total_cost_time:199.25260543823242ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5725 prompt_cache_len:5151 prompt_cache_ratio:0.8997379912663755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 -DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10786080360412598 s -INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.11005949974060059 s -DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=239271924188976622532618717111387774876, time:1750766697.2601724s req_ids:[8] -DEBUG 06-24 20:04:57 [manager.py:391] -ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:206.35318756103516ms total_cost_time:206.39514923095703ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5726 prompt_cache_len:5151 prompt_cache_ratio:0.8995808592385609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 -DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10774111747741699 s -INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10969734191894531 s -DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=255138084813131291229253304246351800296, time:1750766697.47109s req_ids:[8] -DEBUG 06-24 20:04:57 [manager.py:391] -ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:203.95541191101074ms total_cost_time:203.9968967437744ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5727 prompt_cache_len:5151 prompt_cache_ratio:0.8994237820848612 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 -DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.1070702075958252 s -INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10900163650512695 s -DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=202071991438630213149720896562394242777, time:1750766697.6807415s req_ids:[8] -DEBUG 06-24 20:04:57 [manager.py:391] -ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:207.43083953857422ms total_cost_time:207.4739933013916ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5728 prompt_cache_len:5151 prompt_cache_ratio:0.8992667597765364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 -DEBUG 06-24 20:04:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:57 [manager.py:224] router recive req id 8 cost time 0.10714483261108398 s -INFO 06-24 20:04:57 [manager.py:68] detokenization recv req id 8 cost time 0.10911250114440918 s -DEBUG 06-24 20:04:57 [manager.py:391] Prefill Batch: batch_id=108591845078999475025865364389844744327, time:1750766697.8918238s req_ids:[8] -DEBUG 06-24 20:04:57 [manager.py:391] -ERROR 06-24 20:04:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:205.24311065673828ms total_cost_time:205.28578758239746ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5729 prompt_cache_len:5151 prompt_cache_ratio:0.8991097922848664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 -DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10677218437194824 s -INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10875654220581055 s -DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=118012874673255617870726242782240703785, time:1750766698.1028883s req_ids:[8] -DEBUG 06-24 20:04:58 [manager.py:391] -ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:57 lightllm_req_id:8 first_token_cost:207.6582908630371ms total_cost_time:207.70263671875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5730 prompt_cache_len:5151 prompt_cache_ratio:0.8989528795811519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 -DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10684442520141602 s -INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10895943641662598 s -DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=61087500756971532013078352537064784191, time:1750766698.3204014s req_ids:[8] -DEBUG 06-24 20:04:58 [manager.py:391] -ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:215.37494659423828ms total_cost_time:215.41833877563477ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5731 prompt_cache_len:5151 prompt_cache_ratio:0.8987960216367126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 -DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.1074361801147461 s -INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10943603515625 s -DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=235024219290302201653255911622681890535, time:1750766698.5344603s req_ids:[8] -DEBUG 06-24 20:04:58 [manager.py:391] -ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:205.9457302093506ms total_cost_time:205.98840713500977ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5732 prompt_cache_len:5151 prompt_cache_ratio:0.8986392184228891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 -DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10622668266296387 s -INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10880160331726074 s -DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=104347069001241844731537851767817136887, time:1750766698.7452984s req_ids:[8] -DEBUG 06-24 20:04:58 [manager.py:391] -ERROR 06-24 20:04:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:205.88135719299316ms total_cost_time:205.92474937438965ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5733 prompt_cache_len:5151 prompt_cache_ratio:0.8984824699110413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 -DEBUG 06-24 20:04:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:58 [manager.py:224] router recive req id 8 cost time 0.10685157775878906 s -INFO 06-24 20:04:58 [manager.py:68] detokenization recv req id 8 cost time 0.10872435569763184 s -DEBUG 06-24 20:04:58 [manager.py:391] Prefill Batch: batch_id=250335721755451031387941079474067836139, time:1750766698.9557574s req_ids:[8] -DEBUG 06-24 20:04:58 [manager.py:391] -ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:58 lightllm_req_id:8 first_token_cost:209.93471145629883ms total_cost_time:209.99670028686523ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5734 prompt_cache_len:5151 prompt_cache_ratio:0.8983257760725497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 -DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.10800528526306152 s -INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.10999011993408203 s -DEBUG 06-24 20:04:59 [manager.py:391] Prefill Batch: batch_id=239711596511076242932222209802381732628, time:1750766699.170205s req_ids:[8] -DEBUG 06-24 20:04:59 [manager.py:391] -ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:207.79752731323242ms total_cost_time:207.8413963317871ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5735 prompt_cache_len:5151 prompt_cache_ratio:0.8981691368788143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 -DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.30968570709228516 s -INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.31178712844848633 s -DEBUG 06-24 20:04:59 [manager.py:391] Prefill Batch: batch_id=300218878288816692014235150037783802335, time:1750766699.581199s req_ids:[8] -DEBUG 06-24 20:04:59 [manager.py:391] -ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:410.930871963501ms total_cost_time:410.9766483306885ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5736 prompt_cache_len:5151 prompt_cache_ratio:0.8980125523012552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 -DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.10744333267211914 s -INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s -DEBUG 06-24 20:04:59 [manager.py:391] Prefill Batch: batch_id=1919938180372308027723679799623066717, time:1750766699.800035s req_ids:[8] -DEBUG 06-24 20:04:59 [manager.py:391] -ERROR 06-24 20:04:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:04:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:04:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:211.6997241973877ms total_cost_time:211.74287796020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5737 prompt_cache_len:5151 prompt_cache_ratio:0.8978560223113126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:04:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 -DEBUG 06-24 20:04:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:04:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:04:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:04:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:04:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:04:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:04:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:04:59 [manager.py:224] router recive req id 8 cost time 0.10733413696289062 s -INFO 06-24 20:04:59 [manager.py:68] detokenization recv req id 8 cost time 0.10940718650817871 s -DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=203434089938694816062575501607869245256, time:1750766700.0145855s req_ids:[8] -DEBUG 06-24 20:05:00 [manager.py:391] -ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:04:59 lightllm_req_id:8 first_token_cost:208.50896835327148ms total_cost_time:208.55379104614258ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5738 prompt_cache_len:5151 prompt_cache_ratio:0.8976995468804462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 -DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s -INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.10906696319580078 s -DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=302872426694256758003294603731148177150, time:1750766700.2258925s req_ids:[8] -DEBUG 06-24 20:05:00 [manager.py:391] -ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:205.53922653198242ms total_cost_time:205.5821418762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5739 prompt_cache_len:5151 prompt_cache_ratio:0.8975431259801359 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 -DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.10794782638549805 s -INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.11002612113952637 s -DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=303808542095675626587744342309024702372, time:1750766700.4371383s req_ids:[8] -DEBUG 06-24 20:05:00 [manager.py:391] -ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:206.72321319580078ms total_cost_time:206.76708221435547ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5740 prompt_cache_len:5151 prompt_cache_ratio:0.8973867595818815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 -DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.1069943904876709 s -INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.10900378227233887 s -DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=111504520963685480952633465283710070091, time:1750766700.6589499s req_ids:[8] -DEBUG 06-24 20:05:00 [manager.py:391] -ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:217.74578094482422ms total_cost_time:217.789888381958ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5741 prompt_cache_len:5151 prompt_cache_ratio:0.8972304476572026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 -DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:00 [manager.py:224] router recive req id 8 cost time 0.10703730583190918 s -INFO 06-24 20:05:00 [manager.py:68] detokenization recv req id 8 cost time 0.1090235710144043 s -DEBUG 06-24 20:05:00 [manager.py:391] Prefill Batch: batch_id=127996145972491537755493280075179957429, time:1750766700.8699112s req_ids:[8] -DEBUG 06-24 20:05:00 [manager.py:391] -ERROR 06-24 20:05:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:204.6818733215332ms total_cost_time:204.7252655029297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5742 prompt_cache_len:5151 prompt_cache_ratio:0.8970741901776385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 -DEBUG 06-24 20:05:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.1079246997833252 s -INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.11011481285095215 s -DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=45895676423769608889288885297305455429, time:1750766701.080419s req_ids:[8] -DEBUG 06-24 20:05:01 [manager.py:391] -ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:00 lightllm_req_id:8 first_token_cost:207.67498016357422ms total_cost_time:207.719087600708ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5743 prompt_cache_len:5151 prompt_cache_ratio:0.8969179871147483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 -DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.10787606239318848 s -INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.10987472534179688 s -DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=278972452376312214600403569731339308678, time:1750766701.292256s req_ids:[8] -DEBUG 06-24 20:05:01 [manager.py:391] -ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:205.75547218322754ms total_cost_time:205.80005645751953ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5744 prompt_cache_len:5151 prompt_cache_ratio:0.8967618384401114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 -DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.1069791316986084 s -INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.10891246795654297 s -DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=53723302638781883671031746079039398741, time:1750766701.5030367s req_ids:[8] -DEBUG 06-24 20:05:01 [manager.py:391] -ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:207.33094215393066ms total_cost_time:207.37433433532715ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5745 prompt_cache_len:5151 prompt_cache_ratio:0.8966057441253263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 -DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:01 [batch.py:51] router release req id 8 -DEBUG 06-24 20:05:01 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:01 [manager.py:283] -DEBUG 06-24 20:05:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:01 [manager.py:284] -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.10800743103027344 s -INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.10996198654174805 s -DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=6583665372527310875395246951436451323, time:1750766701.7173142s req_ids:[8] -DEBUG 06-24 20:05:01 [manager.py:391] -ERROR 06-24 20:05:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:211.02190017700195ms total_cost_time:211.06505393981934ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5746 prompt_cache_len:5151 prompt_cache_ratio:0.8964497041420119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 -DEBUG 06-24 20:05:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:01 [manager.py:224] router recive req id 8 cost time 0.10840344429016113 s -INFO 06-24 20:05:01 [manager.py:68] detokenization recv req id 8 cost time 0.11039209365844727 s -DEBUG 06-24 20:05:01 [manager.py:391] Prefill Batch: batch_id=227961160379845112214730476415351999420, time:1750766701.9299242s req_ids:[8] -DEBUG 06-24 20:05:01 [manager.py:391] -ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:01 lightllm_req_id:8 first_token_cost:391.42560958862305ms total_cost_time:391.46971702575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5747 prompt_cache_len:5151 prompt_cache_ratio:0.8962937184618062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 -DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.1082773208618164 s -INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.11025810241699219 s -DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=63826387628659194673666553928481840964, time:1750766702.3187146s req_ids:[8] -DEBUG 06-24 20:05:02 [manager.py:391] -ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:192.4724578857422ms total_cost_time:192.51513481140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5748 prompt_cache_len:5151 prompt_cache_ratio:0.8961377870563675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 -DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.10725855827331543 s -INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.10930609703063965 s -DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=242626655092276276549633438089888098613, time:1750766702.5230777s req_ids:[8] -DEBUG 06-24 20:05:02 [manager.py:391] -ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:204.559326171875ms total_cost_time:204.60176467895508ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5749 prompt_cache_len:5151 prompt_cache_ratio:0.8959819098973735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 -DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.10707783699035645 s -INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.10912775993347168 s -DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=327704206048615379506805875646450782640, time:1750766702.7327387s req_ids:[8] -DEBUG 06-24 20:05:02 [manager.py:391] -ERROR 06-24 20:05:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:206.73584938049316ms total_cost_time:206.77924156188965ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5750 prompt_cache_len:5151 prompt_cache_ratio:0.8958260869565218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 -DEBUG 06-24 20:05:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:02 [manager.py:224] router recive req id 8 cost time 0.1070711612701416 s -INFO 06-24 20:05:02 [manager.py:68] detokenization recv req id 8 cost time 0.10914158821105957 s -DEBUG 06-24 20:05:02 [manager.py:391] Prefill Batch: batch_id=274543162032134645142562845588714487038, time:1750766702.9418006s req_ids:[8] -DEBUG 06-24 20:05:02 [manager.py:391] -ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:02 lightllm_req_id:8 first_token_cost:201.80368423461914ms total_cost_time:201.84826850891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5751 prompt_cache_len:5151 prompt_cache_ratio:0.8956703182055294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 -DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10801315307617188 s -INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.11002492904663086 s -DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=293893490561595804141949037555889786040, time:1750766703.1522796s req_ids:[8] -DEBUG 06-24 20:05:03 [manager.py:391] -DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:208.30321311950684ms total_cost_time:208.3451747894287ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5752 prompt_cache_len:5151 prompt_cache_ratio:0.8955146036161336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 -DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10771870613098145 s -INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10963129997253418 s -DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=4964440078896330546164792007037856921, time:1750766703.3647814s req_ids:[8] -DEBUG 06-24 20:05:03 [manager.py:391] -ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:208.1897258758545ms total_cost_time:208.23240280151367ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5753 prompt_cache_len:5151 prompt_cache_ratio:0.8953589431600903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 -DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10685038566589355 s -INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10886311531066895 s -DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=323472937902062206953977693610557973810, time:1750766703.5760076s req_ids:[8] -DEBUG 06-24 20:05:03 [manager.py:391] -ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:206.634521484375ms total_cost_time:206.681489944458ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:5754 prompt_cache_len:5151 prompt_cache_ratio:0.8952033368091762 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 -DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10692191123962402 s -INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10872364044189453 s -DEBUG 06-24 20:05:03 [manager.py:391] Prefill Batch: batch_id=234837898369592640570817486594358598183, time:1750766703.7892866s req_ids:[8] -DEBUG 06-24 20:05:03 [manager.py:391] -ERROR 06-24 20:05:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:207.46755599975586ms total_cost_time:207.51142501831055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5755 prompt_cache_len:5151 prompt_cache_ratio:0.8950477845351867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 -DEBUG 06-24 20:05:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:03 [manager.py:224] router recive req id 8 cost time 0.10761642456054688 s -INFO 06-24 20:05:03 [manager.py:68] detokenization recv req id 8 cost time 0.10946798324584961 s -DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=108919957625773694461047180063897201550, time:1750766704.0124083s req_ids:[8] -DEBUG 06-24 20:05:04 [manager.py:391] -ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:03 lightllm_req_id:8 first_token_cost:221.9388484954834ms total_cost_time:221.98200225830078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5756 prompt_cache_len:5151 prompt_cache_ratio:0.8948922863099374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 -DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10738134384155273 s -INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.10931730270385742 s -DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=18799080369994022439404773831492439134, time:1750766704.2259586s req_ids:[8] -DEBUG 06-24 20:05:04 [manager.py:391] -ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:203.6902904510498ms total_cost_time:203.7355899810791ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5757 prompt_cache_len:5151 prompt_cache_ratio:0.8947368421052632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 -DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10828614234924316 s -INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.11040067672729492 s -DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=218174013308053724178459402773829623128, time:1750766704.4359329s req_ids:[8] -DEBUG 06-24 20:05:04 [manager.py:391] -ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:205.54804801940918ms total_cost_time:205.59382438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5758 prompt_cache_len:5151 prompt_cache_ratio:0.8945814518930184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 -DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10723209381103516 s -INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s -DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=66466915453822609485099053518473648142, time:1750766704.6457856s req_ids:[8] -DEBUG 06-24 20:05:04 [manager.py:391] -ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:206.24732971191406ms total_cost_time:206.29167556762695ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5759 prompt_cache_len:5151 prompt_cache_ratio:0.8944261156450772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 -DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:04 [manager.py:224] router recive req id 8 cost time 0.10659122467041016 s -INFO 06-24 20:05:04 [manager.py:68] detokenization recv req id 8 cost time 0.10861349105834961 s -DEBUG 06-24 20:05:04 [manager.py:391] Prefill Batch: batch_id=218936826633139380110908798735265870496, time:1750766704.8566241s req_ids:[8] -DEBUG 06-24 20:05:04 [manager.py:391] -DEBUG 06-24 20:05:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 25526.128 tokens/s -DEBUG 06-24 20:05:04 [stats.py:37] Avg prompt tokens throughput: 25517.229 tokens/s -DEBUG 06-24 20:05:04 [stats.py:37] Avg generate tokens throughput: 8.900 tokens/s -ERROR 06-24 20:05:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:206.22873306274414ms total_cost_time:206.27188682556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5760 prompt_cache_len:5151 prompt_cache_ratio:0.8942708333333333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 -DEBUG 06-24 20:05:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.10706615447998047 s -INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s -DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=177559919126004287006541243321830193542, time:1750766705.0709262s req_ids:[8] -DEBUG 06-24 20:05:05 [manager.py:391] -ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:04 lightllm_req_id:8 first_token_cost:208.28509330749512ms total_cost_time:208.3296775817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5761 prompt_cache_len:5151 prompt_cache_ratio:0.8941156049296997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 -DEBUG 06-24 20:05:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.10860252380371094 s -INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.11062359809875488 s -DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=260605159470986714372952081425709552275, time:1750766705.2805793s req_ids:[8] -DEBUG 06-24 20:05:05 [manager.py:391] -ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:206.01963996887207ms total_cost_time:206.06279373168945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5762 prompt_cache_len:5151 prompt_cache_ratio:0.893960430406109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 -DEBUG 06-24 20:05:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.10709214210510254 s -INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.10913920402526855 s -DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=231842679769872168355842081128557101267, time:1750766705.4970627s req_ids:[8] -DEBUG 06-24 20:05:05 [manager.py:391] -ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:211.26532554626465ms total_cost_time:211.32373809814453ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:5763 prompt_cache_len:5151 prompt_cache_ratio:0.8938053097345132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 -DEBUG 06-24 20:05:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:05 [manager.py:224] router recive req id 8 cost time 0.30991148948669434 s -INFO 06-24 20:05:05 [manager.py:68] detokenization recv req id 8 cost time 0.3120236396789551 s -DEBUG 06-24 20:05:05 [manager.py:391] Prefill Batch: batch_id=87612591875752402812856526846736536191, time:1750766705.9108155s req_ids:[8] -DEBUG 06-24 20:05:05 [manager.py:391] -ERROR 06-24 20:05:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:408.60533714294434ms total_cost_time:408.65063667297363ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5764 prompt_cache_len:5151 prompt_cache_ratio:0.8936502428868841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 -DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10705685615539551 s -INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10927033424377441 s -DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=14730066086647046556245556632612497778, time:1750766706.1245298s req_ids:[8] -DEBUG 06-24 20:05:06 [manager.py:391] -ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:05 lightllm_req_id:8 first_token_cost:207.45110511779785ms total_cost_time:207.49568939208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5765 prompt_cache_len:5151 prompt_cache_ratio:0.8934952298352125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 -DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10709977149963379 s -INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10918474197387695 s -DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=98560683644153745234278928176951897804, time:1750766706.3351285s req_ids:[8] -DEBUG 06-24 20:05:06 [manager.py:391] -ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:206.7577838897705ms total_cost_time:206.8023681640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5766 prompt_cache_len:5151 prompt_cache_ratio:0.8933402705515089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 -DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10695767402648926 s -INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.1088707447052002 s -DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=91904649221834591631630286063090173675, time:1750766706.5488129s req_ids:[8] -DEBUG 06-24 20:05:06 [manager.py:391] -ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:206.6361904144287ms total_cost_time:206.6802978515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5767 prompt_cache_len:5151 prompt_cache_ratio:0.893185365007803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 -DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.1076207160949707 s -INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10965418815612793 s -DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=124920877431328838951323347641989458266, time:1750766706.760562s req_ids:[8] -DEBUG 06-24 20:05:06 [manager.py:391] -ERROR 06-24 20:05:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:208.3134651184082ms total_cost_time:208.3566188812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5768 prompt_cache_len:5151 prompt_cache_ratio:0.8930305131761442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 -DEBUG 06-24 20:05:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:06 [manager.py:224] router recive req id 8 cost time 0.10801982879638672 s -INFO 06-24 20:05:06 [manager.py:68] detokenization recv req id 8 cost time 0.10996198654174805 s -DEBUG 06-24 20:05:06 [manager.py:391] Prefill Batch: batch_id=77603677738351411987356897578994262550, time:1750766706.9751596s req_ids:[8] -DEBUG 06-24 20:05:06 [manager.py:391] -ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:06 lightllm_req_id:8 first_token_cost:207.5350284576416ms total_cost_time:207.5800895690918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5769 prompt_cache_len:5151 prompt_cache_ratio:0.8928757150286012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 -DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.10751748085021973 s -INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10949850082397461 s -DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=79615452045447308888397770338320692495, time:1750766707.1871088s req_ids:[8] -DEBUG 06-24 20:05:07 [manager.py:391] -ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:204.21147346496582ms total_cost_time:204.2555809020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5770 prompt_cache_len:5151 prompt_cache_ratio:0.8927209705372617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 -DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:07 [batch.py:51] router release req id 8 -INFO 06-24 20:05:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.1066596508026123 s -INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10865187644958496 s -DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=144468863874210372699079954971167395869, time:1750766707.396074s req_ids:[8] -DEBUG 06-24 20:05:07 [manager.py:391] -ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:206.22777938842773ms total_cost_time:206.2704563140869ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5771 prompt_cache_len:5151 prompt_cache_ratio:0.8925662796742332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 -DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.10692596435546875 s -INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10894227027893066 s -DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=25761989621393823015193604318921973648, time:1750766707.606788s req_ids:[8] -DEBUG 06-24 20:05:07 [manager.py:391] -ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:206.3465118408203ms total_cost_time:206.390380859375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5772 prompt_cache_len:5151 prompt_cache_ratio:0.8924116424116424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 -DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:07 [manager.py:224] router recive req id 8 cost time 0.1063992977142334 s -INFO 06-24 20:05:07 [manager.py:68] detokenization recv req id 8 cost time 0.10853219032287598 s -DEBUG 06-24 20:05:07 [manager.py:391] Prefill Batch: batch_id=162610706470779863226333979744849588775, time:1750766707.8215358s req_ids:[8] -DEBUG 06-24 20:05:07 [manager.py:391] -ERROR 06-24 20:05:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:207.2300910949707ms total_cost_time:207.2734832763672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5773 prompt_cache_len:5151 prompt_cache_ratio:0.8922570587216352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 -DEBUG 06-24 20:05:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10752677917480469 s -INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.10966229438781738 s -DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=291940951167392547587030341205023199549, time:1750766708.0333269s req_ids:[8] -DEBUG 06-24 20:05:08 [manager.py:391] -ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:07 lightllm_req_id:8 first_token_cost:207.59034156799316ms total_cost_time:207.63325691223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5774 prompt_cache_len:5151 prompt_cache_ratio:0.8921025285763768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 -DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10712432861328125 s -INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.10905694961547852 s -DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=275879283798856027872695398187194918483, time:1750766708.2449744s req_ids:[8] -DEBUG 06-24 20:05:08 [manager.py:391] -ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:203.65285873413086ms total_cost_time:203.69815826416016ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5775 prompt_cache_len:5151 prompt_cache_ratio:0.8919480519480519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 -DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10818028450012207 s -INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.11021614074707031 s -DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=28390632015667287064039146458720193527, time:1750766708.4557414s req_ids:[8] -DEBUG 06-24 20:05:08 [manager.py:391] -ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:205.6260108947754ms total_cost_time:205.67035675048828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5776 prompt_cache_len:5151 prompt_cache_ratio:0.8917936288088643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 -DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:08 [manager.py:224] router recive req id 8 cost time 0.10655045509338379 s -INFO 06-24 20:05:08 [manager.py:68] detokenization recv req id 8 cost time 0.10858750343322754 s -DEBUG 06-24 20:05:08 [manager.py:391] Prefill Batch: batch_id=146096954067290975391965013368975270309, time:1750766708.667068s req_ids:[8] -DEBUG 06-24 20:05:08 [manager.py:391] -ERROR 06-24 20:05:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:374.57799911499023ms total_cost_time:374.62329864501953ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5777 prompt_cache_len:5151 prompt_cache_ratio:0.8916392591310369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 -DEBUG 06-24 20:05:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10678386688232422 s -INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10870599746704102 s -DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=36524539953869462874242074860281253601, time:1750766709.0393448s req_ids:[8] -DEBUG 06-24 20:05:09 [manager.py:391] -ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:08 lightllm_req_id:8 first_token_cost:196.34199142456055ms total_cost_time:196.38538360595703ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5778 prompt_cache_len:5151 prompt_cache_ratio:0.891484942886812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 -DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10776853561401367 s -INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10972189903259277 s -DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=276618074281118259245716822964162521808, time:1750766709.2469084s req_ids:[8] -DEBUG 06-24 20:05:09 [manager.py:391] -ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:206.80522918701172ms total_cost_time:206.8490982055664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5779 prompt_cache_len:5151 prompt_cache_ratio:0.8913306800484513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 -DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10642623901367188 s -INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10843157768249512 s -DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=318736392974999057298735277582301699681, time:1750766709.4591906s req_ids:[8] -DEBUG 06-24 20:05:09 [manager.py:391] -ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:210.28375625610352ms total_cost_time:210.3271484375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5780 prompt_cache_len:5151 prompt_cache_ratio:0.8911764705882353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 -DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10671567916870117 s -INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10876822471618652 s -DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=132488056754414973470364588351876737565, time:1750766709.673921s req_ids:[8] -DEBUG 06-24 20:05:09 [manager.py:391] -ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:208.88400077819824ms total_cost_time:208.92906188964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5781 prompt_cache_len:5151 prompt_cache_ratio:0.8910223144784639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 -DEBUG 06-24 20:05:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:09 [manager.py:224] router recive req id 8 cost time 0.10674715042114258 s -INFO 06-24 20:05:09 [manager.py:68] detokenization recv req id 8 cost time 0.10876774787902832 s -DEBUG 06-24 20:05:09 [manager.py:391] Prefill Batch: batch_id=90094379394264099241369854396910634783, time:1750766709.8875275s req_ids:[8] -DEBUG 06-24 20:05:09 [manager.py:391] -ERROR 06-24 20:05:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:209.21039581298828ms total_cost_time:209.25331115722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5782 prompt_cache_len:5151 prompt_cache_ratio:0.8908682116914562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 -DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10627365112304688 s -INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.10871076583862305 s -DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=21057350097725668172911839561212442127, time:1750766710.1010895s req_ids:[8] -DEBUG 06-24 20:05:10 [manager.py:391] -DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:09 lightllm_req_id:8 first_token_cost:206.038236618042ms total_cost_time:206.08234405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5783 prompt_cache_len:5151 prompt_cache_ratio:0.8907141621995504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 -DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10687136650085449 s -INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.1093435287475586 s -DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=35628506560609197654704605018266396970, time:1750766710.3096273s req_ids:[8] -DEBUG 06-24 20:05:10 [manager.py:391] -ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:206.1631679534912ms total_cost_time:206.207275390625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5784 prompt_cache_len:5151 prompt_cache_ratio:0.8905601659751037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 -DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10755681991577148 s -INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.10986495018005371 s -DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=152162783008818919106651546405641910318, time:1750766710.5231626s req_ids:[8] -DEBUG 06-24 20:05:10 [manager.py:391] -ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:208.27150344848633ms total_cost_time:208.31584930419922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5785 prompt_cache_len:5151 prompt_cache_ratio:0.8904062229904927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 -DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10642790794372559 s -INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.1088871955871582 s -DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=216347873005092744382780977226077477487, time:1750766710.7363703s req_ids:[8] -DEBUG 06-24 20:05:10 [manager.py:391] -ERROR 06-24 20:05:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:208.91189575195312ms total_cost_time:208.95671844482422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5786 prompt_cache_len:5151 prompt_cache_ratio:0.8902523332181127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 -DEBUG 06-24 20:05:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:10 [batch.py:51] router release req id 8 -INFO 06-24 20:05:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:10 [manager.py:224] router recive req id 8 cost time 0.10697364807128906 s -INFO 06-24 20:05:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940957069396973 s -DEBUG 06-24 20:05:10 [manager.py:391] Prefill Batch: batch_id=43943551713752290202805634881428227714, time:1750766710.9501908s req_ids:[8] -DEBUG 06-24 20:05:10 [manager.py:391] -ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:10 lightllm_req_id:8 first_token_cost:209.27953720092773ms total_cost_time:209.32316780090332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5787 prompt_cache_len:5151 prompt_cache_ratio:0.8900984966303784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 -DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.1080322265625 s -INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.11028528213500977 s -DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=240832909227799961582119376593104367910, time:1750766711.1658766s req_ids:[8] -DEBUG 06-24 20:05:11 [manager.py:391] -ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:209.4900608062744ms total_cost_time:209.5344066619873ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5788 prompt_cache_len:5151 prompt_cache_ratio:0.8899447131997236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 -DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.10696601867675781 s -INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.10886812210083008 s -DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=289519504952717583805549600934727055522, time:1750766711.3785887s req_ids:[8] -DEBUG 06-24 20:05:11 [manager.py:391] -ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:362.9326820373535ms total_cost_time:362.9908561706543ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5789 prompt_cache_len:5151 prompt_cache_ratio:0.8897909828986008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 -DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.1066598892211914 s -INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.1087496280670166 s -DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=134766458977726517470176220755305125321, time:1750766711.7367752s req_ids:[8] -DEBUG 06-24 20:05:11 [manager.py:391] -ERROR 06-24 20:05:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:196.29645347595215ms total_cost_time:196.33817672729492ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5790 prompt_cache_len:5151 prompt_cache_ratio:0.8896373056994819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 -DEBUG 06-24 20:05:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:11 [manager.py:224] router recive req id 8 cost time 0.10658574104309082 s -INFO 06-24 20:05:11 [manager.py:68] detokenization recv req id 8 cost time 0.10868191719055176 s -DEBUG 06-24 20:05:11 [manager.py:391] Prefill Batch: batch_id=47189270108033356106181443410887077415, time:1750766711.9459033s req_ids:[8] -DEBUG 06-24 20:05:11 [manager.py:391] -ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:11 lightllm_req_id:8 first_token_cost:205.51705360412598ms total_cost_time:205.56020736694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5791 prompt_cache_len:5151 prompt_cache_ratio:0.8894836815748576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 -DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.10634469985961914 s -INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10836625099182129 s -DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=189471481732654903033303870766754283024, time:1750766712.1566558s req_ids:[8] -DEBUG 06-24 20:05:12 [manager.py:391] -ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:207.34071731567383ms total_cost_time:207.38554000854492ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5792 prompt_cache_len:5151 prompt_cache_ratio:0.8893301104972375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 -DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.1066131591796875 s -INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.1086428165435791 s -DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=79423937492376416983331505730315262902, time:1750766712.367584s req_ids:[8] -DEBUG 06-24 20:05:12 [manager.py:391] -ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:207.19432830810547ms total_cost_time:207.23867416381836ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5793 prompt_cache_len:5151 prompt_cache_ratio:0.8891765924391507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 -DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.10646390914916992 s -INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10858941078186035 s -DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=60991017761758826280836123093395979176, time:1750766712.5842266s req_ids:[8] -DEBUG 06-24 20:05:12 [manager.py:391] -ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:207.9479694366455ms total_cost_time:207.9923152923584ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5794 prompt_cache_len:5151 prompt_cache_ratio:0.8890231273731446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 -DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.1067509651184082 s -INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10883498191833496 s -DEBUG 06-24 20:05:12 [manager.py:391] Prefill Batch: batch_id=54328422634416349732020305227558359940, time:1750766712.7942815s req_ids:[8] -DEBUG 06-24 20:05:12 [manager.py:391] -ERROR 06-24 20:05:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:208.12439918518066ms total_cost_time:208.17017555236816ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5795 prompt_cache_len:5151 prompt_cache_ratio:0.888869715271786 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 -DEBUG 06-24 20:05:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:12 [manager.py:224] router recive req id 8 cost time 0.10720133781433105 s -INFO 06-24 20:05:12 [manager.py:68] detokenization recv req id 8 cost time 0.10956335067749023 s -DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=236316453303555827082710330540676193913, time:1750766713.0068474s req_ids:[8] -DEBUG 06-24 20:05:13 [manager.py:391] -ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:12 lightllm_req_id:8 first_token_cost:204.8788070678711ms total_cost_time:204.92291450500488ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5796 prompt_cache_len:5151 prompt_cache_ratio:0.8887163561076604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 -DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10703420639038086 s -INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.10920500755310059 s -DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=81941996111944769696434313259102623752, time:1750766713.2170575s req_ids:[8] -DEBUG 06-24 20:05:13 [manager.py:391] -ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:205.84654808044434ms total_cost_time:205.87682723999023ms,out_token_counter:1 mean_per_token_cost_time: 0.030279159545898438ms prompt_token_num:5797 prompt_cache_len:5151 prompt_cache_ratio:0.8885630498533724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 -DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10696887969970703 s -INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.1090691089630127 s -DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=254257039385188194563945046877701641528, time:1750766713.437088s req_ids:[8] -DEBUG 06-24 20:05:13 [manager.py:391] -ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:217.15712547302246ms total_cost_time:217.20290184020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5798 prompt_cache_len:5151 prompt_cache_ratio:0.8884097964815454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 -DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10662555694580078 s -INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.10878634452819824 s -DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=55647634337077060957137484828783468822, time:1750766713.6504781s req_ids:[8] -DEBUG 06-24 20:05:13 [manager.py:391] -ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:206.96115493774414ms total_cost_time:207.02171325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5799 prompt_cache_len:5151 prompt_cache_ratio:0.8882565959648215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 -DEBUG 06-24 20:05:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:13 [manager.py:224] router recive req id 8 cost time 0.10782122611999512 s -INFO 06-24 20:05:13 [manager.py:68] detokenization recv req id 8 cost time 0.11004376411437988 s -DEBUG 06-24 20:05:13 [manager.py:391] Prefill Batch: batch_id=9610420739971923566883961934099402716, time:1750766713.8669808s req_ids:[8] -DEBUG 06-24 20:05:13 [manager.py:391] -ERROR 06-24 20:05:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:212.2344970703125ms total_cost_time:212.294340133667ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5800 prompt_cache_len:5151 prompt_cache_ratio:0.8881034482758621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 -DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.30803561210632324 s -INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.3106107711791992 s -DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=13436109040149684680829302530524818223, time:1750766714.282535s req_ids:[8] -DEBUG 06-24 20:05:14 [manager.py:391] -ERROR 06-24 20:05:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:13 lightllm_req_id:8 first_token_cost:413.24472427368164ms total_cost_time:413.29002380371094ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5801 prompt_cache_len:5151 prompt_cache_ratio:0.887950353387347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 -DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.10530900955200195 s -INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.10771965980529785 s -DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=329279760679769720433216955031007764877, time:1750766714.500566s req_ids:[8] -DEBUG 06-24 20:05:14 [manager.py:391] -ERROR 06-24 20:05:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 first_token_cost:209.1238498687744ms total_cost_time:209.1691493988037ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5802 prompt_cache_len:5151 prompt_cache_ratio:0.8877973112719751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 -DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.1066434383392334 s -INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.10902261734008789 s -DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=211524719874424797528463845671927998653, time:1750766714.7134264s req_ids:[8] -DEBUG 06-24 20:05:14 [manager.py:391] -ERROR 06-24 20:05:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 first_token_cost:202.74686813354492ms total_cost_time:202.7912139892578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5803 prompt_cache_len:5151 prompt_cache_ratio:0.8876443219024642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 -DEBUG 06-24 20:05:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:14 [manager.py:224] router recive req id 8 cost time 0.10783743858337402 s -INFO 06-24 20:05:14 [manager.py:68] detokenization recv req id 8 cost time 0.11019611358642578 s -DEBUG 06-24 20:05:14 [manager.py:391] Prefill Batch: batch_id=257126260772189521574354081748128420223, time:1750766714.9216049s req_ids:[8] -DEBUG 06-24 20:05:14 [manager.py:391] -DEBUG 06-24 20:05:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 25287.556 tokens/s -DEBUG 06-24 20:05:14 [stats.py:37] Avg prompt tokens throughput: 25278.813 tokens/s -DEBUG 06-24 20:05:14 [stats.py:37] Avg generate tokens throughput: 8.743 tokens/s -ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:14 lightllm_req_id:8 first_token_cost:205.67703247070312ms total_cost_time:205.7204246520996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5804 prompt_cache_len:5151 prompt_cache_ratio:0.8874913852515507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 -DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.1078336238861084 s -INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.11023712158203125 s -DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=13156001173514735912048462017979583811, time:1750766715.1319818s req_ids:[8] -DEBUG 06-24 20:05:15 [manager.py:391] -ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:204.44178581237793ms total_cost_time:204.49328422546387ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:5805 prompt_cache_len:5151 prompt_cache_ratio:0.8873385012919897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 -DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10854482650756836 s -INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.11105656623840332 s -DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=41588808320688186178318159976668018710, time:1750766715.3420794s req_ids:[8] -DEBUG 06-24 20:05:15 [manager.py:391] -ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:204.90074157714844ms total_cost_time:204.92291450500488ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:5806 prompt_cache_len:5151 prompt_cache_ratio:0.8871856699965552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 -DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10726284980773926 s -INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.10971283912658691 s -DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=208163391892305609467255759563020615290, time:1750766715.554053s req_ids:[8] -DEBUG 06-24 20:05:15 [manager.py:391] -ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:204.96678352355957ms total_cost_time:205.02948760986328ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:5807 prompt_cache_len:5151 prompt_cache_ratio:0.8870328913380403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 -DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10689473152160645 s -INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.10939645767211914 s -DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=217544170007707265945958517846435127239, time:1750766715.7624652s req_ids:[8] -DEBUG 06-24 20:05:15 [manager.py:391] -ERROR 06-24 20:05:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:206.8009376525879ms total_cost_time:206.8798542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.07891654968261719ms prompt_token_num:5808 prompt_cache_len:5151 prompt_cache_ratio:0.8868801652892562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 -DEBUG 06-24 20:05:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:15 [manager.py:224] router recive req id 8 cost time 0.10953927040100098 s -INFO 06-24 20:05:15 [manager.py:68] detokenization recv req id 8 cost time 0.1119527816772461 s -DEBUG 06-24 20:05:15 [manager.py:391] Prefill Batch: batch_id=205441924898933949804459501007173506549, time:1750766715.9742575s req_ids:[8] -DEBUG 06-24 20:05:15 [manager.py:391] -ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:15 lightllm_req_id:8 first_token_cost:205.03687858581543ms total_cost_time:205.0802707672119ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5809 prompt_cache_len:5151 prompt_cache_ratio:0.8867274918230332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 -DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10679507255554199 s -INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s -DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=300752411717777120746350395164648101424, time:1750766716.1856523s req_ids:[8] -DEBUG 06-24 20:05:16 [manager.py:391] -ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:205.4128646850586ms total_cost_time:205.4586410522461ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5810 prompt_cache_len:5151 prompt_cache_ratio:0.8865748709122203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 -DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:16 [batch.py:51] router release req id 8 -INFO 06-24 20:05:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10884547233581543 s -INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.11116385459899902 s -DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=215626646824194162052921541144742565453, time:1750766716.3953905s req_ids:[8] -DEBUG 06-24 20:05:16 [manager.py:391] -ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:205.29699325561523ms total_cost_time:205.35516738891602ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5811 prompt_cache_len:5151 prompt_cache_ratio:0.8864223025296851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 -DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10906529426574707 s -INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.11155939102172852 s -DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=3446350724196355247226250001055512086, time:1750766716.6058247s req_ids:[8] -DEBUG 06-24 20:05:16 [manager.py:391] -ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:205.75928688049316ms total_cost_time:205.81555366516113ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:5812 prompt_cache_len:5151 prompt_cache_ratio:0.8862697866483138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 -DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:16 [manager.py:224] router recive req id 8 cost time 0.10892486572265625 s -INFO 06-24 20:05:16 [manager.py:68] detokenization recv req id 8 cost time 0.11088681221008301 s -DEBUG 06-24 20:05:16 [manager.py:391] Prefill Batch: batch_id=210129042323211614437430385285636176916, time:1750766716.816875s req_ids:[8] -DEBUG 06-24 20:05:16 [manager.py:391] -ERROR 06-24 20:05:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:203.16314697265625ms total_cost_time:203.2148838043213ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:5813 prompt_cache_len:5151 prompt_cache_ratio:0.8861173232410116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 -DEBUG 06-24 20:05:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.11028552055358887 s -INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.11284804344177246 s -DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=180970217023457905140559726872068014940, time:1750766717.0265222s req_ids:[8] -DEBUG 06-24 20:05:17 [manager.py:391] -DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:16 lightllm_req_id:8 first_token_cost:220.8724021911621ms total_cost_time:220.92342376708984ms,out_token_counter:1 mean_per_token_cost_time: 0.051021575927734375ms prompt_token_num:5814 prompt_cache_len:5151 prompt_cache_ratio:0.8859649122807017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 -DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.10945701599121094 s -INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.11162209510803223 s -DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=193554039648567789626035390633491588137, time:1750766717.2510054s req_ids:[8] -DEBUG 06-24 20:05:17 [manager.py:391] -ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:200.3152370452881ms total_cost_time:200.36005973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5815 prompt_cache_len:5151 prompt_cache_ratio:0.8858125537403267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 -DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.11030697822570801 s -INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.11234593391418457 s -DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=55238196607474429053847315643017236839, time:1750766717.4584095s req_ids:[8] -DEBUG 06-24 20:05:17 [manager.py:391] -ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:205.81603050231934ms total_cost_time:205.86013793945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5816 prompt_cache_len:5151 prompt_cache_ratio:0.8856602475928473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 -DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:17 [manager.py:224] router recive req id 8 cost time 0.10747265815734863 s -INFO 06-24 20:05:17 [manager.py:68] detokenization recv req id 8 cost time 0.10960149765014648 s -DEBUG 06-24 20:05:17 [manager.py:391] Prefill Batch: batch_id=292379887231049043727563229835918325595, time:1750766717.674671s req_ids:[8] -DEBUG 06-24 20:05:17 [manager.py:391] -ERROR 06-24 20:05:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:376.6369819641113ms total_cost_time:376.6825199127197ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5817 prompt_cache_len:5151 prompt_cache_ratio:0.8855079938112429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 -DEBUG 06-24 20:05:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.1075439453125 s -INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10962128639221191 s -DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=334019733525901534901339728637892430523, time:1750766718.042526s req_ids:[8] -DEBUG 06-24 20:05:18 [manager.py:391] -ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:17 lightllm_req_id:8 first_token_cost:196.17557525634766ms total_cost_time:196.23541831970215ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5818 prompt_cache_len:5151 prompt_cache_ratio:0.8853557923685115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 -DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.10746884346008301 s -INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10949230194091797 s -DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=305836860098545249966328480463826565677, time:1750766718.2515116s req_ids:[8] -DEBUG 06-24 20:05:18 [manager.py:391] -ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:205.7344913482666ms total_cost_time:205.7955265045166ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5819 prompt_cache_len:5151 prompt_cache_ratio:0.8852036432376698 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 -DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.10719537734985352 s -INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10932517051696777 s -DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=332796991343996048987693077930960817217, time:1750766718.4628248s req_ids:[8] -DEBUG 06-24 20:05:18 [manager.py:391] -ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:207.78203010559082ms total_cost_time:207.84306526184082ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5820 prompt_cache_len:5151 prompt_cache_ratio:0.8850515463917525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 -DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.1082456111907959 s -INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.11083102226257324 s -DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=7520305571926688053102247409640375342, time:1750766718.6747494s req_ids:[8] -DEBUG 06-24 20:05:18 [manager.py:391] -ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:218.3535099029541ms total_cost_time:218.41096878051758ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:5821 prompt_cache_len:5151 prompt_cache_ratio:0.8848995018038138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 -DEBUG 06-24 20:05:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:18 [manager.py:224] router recive req id 8 cost time 0.1075887680053711 s -INFO 06-24 20:05:18 [manager.py:68] detokenization recv req id 8 cost time 0.10993719100952148 s -DEBUG 06-24 20:05:18 [manager.py:391] Prefill Batch: batch_id=36497736523476442285915324166227208847, time:1750766718.8964424s req_ids:[8] -DEBUG 06-24 20:05:18 [manager.py:391] -ERROR 06-24 20:05:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:199.76496696472168ms total_cost_time:199.80835914611816ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5822 prompt_cache_len:5151 prompt_cache_ratio:0.8847475094469255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 -DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10756778717041016 s -INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.10985994338989258 s -DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=284586521460332609945855395351023572554, time:1750766719.109991s req_ids:[8] -DEBUG 06-24 20:05:19 [manager.py:391] -ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:18 lightllm_req_id:8 first_token_cost:226.40442848205566ms total_cost_time:226.46617889404297ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5823 prompt_cache_len:5151 prompt_cache_ratio:0.8845955692941783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 -DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10872483253479004 s -INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.11112427711486816 s -DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=286819375068313634828754931559565596181, time:1750766719.3325064s req_ids:[8] -DEBUG 06-24 20:05:19 [manager.py:391] -ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:214.3564224243164ms total_cost_time:214.4174575805664ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5824 prompt_cache_len:5151 prompt_cache_ratio:0.8844436813186813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 -DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10754227638244629 s -INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.10963273048400879 s -DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=82125310477825531762142640063632383831, time:1750766719.5512064s req_ids:[8] -DEBUG 06-24 20:05:19 [manager.py:391] -ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:214.08319473266602ms total_cost_time:214.1427993774414ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5825 prompt_cache_len:5151 prompt_cache_ratio:0.8842918454935622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 -DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10732102394104004 s -INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.1091463565826416 s -DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=189606707303042446458717177826965963969, time:1750766719.7695777s req_ids:[8] -DEBUG 06-24 20:05:19 [manager.py:391] -ERROR 06-24 20:05:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:217.70262718200684ms total_cost_time:217.74744987487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5826 prompt_cache_len:5151 prompt_cache_ratio:0.884140061791967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 -DEBUG 06-24 20:05:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:19 [manager.py:224] router recive req id 8 cost time 0.10733699798583984 s -INFO 06-24 20:05:19 [manager.py:68] detokenization recv req id 8 cost time 0.1094660758972168 s -DEBUG 06-24 20:05:19 [manager.py:391] Prefill Batch: batch_id=55657284113546525446108847269412476574, time:1750766719.9930034s req_ids:[8] -DEBUG 06-24 20:05:19 [manager.py:391] -ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:19 lightllm_req_id:8 first_token_cost:212.53108978271484ms total_cost_time:212.59450912475586ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:5827 prompt_cache_len:5151 prompt_cache_ratio:0.8839883301870602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 -DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:20 [manager.py:224] router recive req id 8 cost time 0.10719966888427734 s -INFO 06-24 20:05:20 [manager.py:68] detokenization recv req id 8 cost time 0.10927724838256836 s -DEBUG 06-24 20:05:20 [manager.py:391] Prefill Batch: batch_id=249207288594956523524363858007463676643, time:1750766720.2126641s req_ids:[8] -DEBUG 06-24 20:05:20 [manager.py:391] -ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:213.87243270874023ms total_cost_time:213.93108367919922ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:5828 prompt_cache_len:5151 prompt_cache_ratio:0.8838366506520247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 -DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:20 [manager.py:224] router recive req id 8 cost time 0.10686016082763672 s -INFO 06-24 20:05:20 [manager.py:68] detokenization recv req id 8 cost time 0.10880136489868164 s -DEBUG 06-24 20:05:20 [manager.py:391] Prefill Batch: batch_id=50211751967394745518179093856042512969, time:1750766720.424666s req_ids:[8] -DEBUG 06-24 20:05:20 [manager.py:391] -ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:191.2245750427246ms total_cost_time:191.2863254547119ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:5829 prompt_cache_len:5151 prompt_cache_ratio:0.8836850231600618 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 -DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:20 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s -INFO 06-24 20:05:20 [manager.py:68] detokenization recv req id 8 cost time 0.10937118530273438 s -DEBUG 06-24 20:05:20 [manager.py:391] Prefill Batch: batch_id=105172989624798812679579100617991249290, time:1750766720.6247327s req_ids:[8] -DEBUG 06-24 20:05:20 [manager.py:391] -ERROR 06-24 20:05:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:201.30491256713867ms total_cost_time:201.36547088623047ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5830 prompt_cache_len:5151 prompt_cache_ratio:0.8835334476843911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 -DEBUG 06-24 20:05:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.31052231788635254 s -INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.3116328716278076 s -DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=29634646512671778184617612723094905530, time:1750766721.0317552s req_ids:[8] -DEBUG 06-24 20:05:21 [manager.py:391] -ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:20 lightllm_req_id:8 first_token_cost:407.0782661437988ms total_cost_time:407.14073181152344ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:5831 prompt_cache_len:5151 prompt_cache_ratio:0.8833819241982507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 -DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.10670280456542969 s -INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.1086571216583252 s -DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=273613196677162498547765352273728690154, time:1750766721.2475622s req_ids:[8] -DEBUG 06-24 20:05:21 [manager.py:391] -ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:209.00964736938477ms total_cost_time:209.05542373657227ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5832 prompt_cache_len:5151 prompt_cache_ratio:0.8832304526748971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 -DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:21 [batch.py:51] router release req id 8 -INFO 06-24 20:05:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.1073005199432373 s -INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.10923051834106445 s -DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=186528521138944257094372448521661314318, time:1750766721.4587982s req_ids:[8] -DEBUG 06-24 20:05:21 [manager.py:391] -ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:203.7353515625ms total_cost_time:203.7956714630127ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:5833 prompt_cache_len:5151 prompt_cache_ratio:0.883079033087605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 -DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.10814571380615234 s -INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.11010575294494629 s -DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=287868392629287258833983322258470314665, time:1750766721.6688561s req_ids:[8] -DEBUG 06-24 20:05:21 [manager.py:391] -ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:205.49583435058594ms total_cost_time:205.55520057678223ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5834 prompt_cache_len:5151 prompt_cache_ratio:0.8829276654096675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 -DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:21 [manager.py:224] router recive req id 8 cost time 0.10844874382019043 s -INFO 06-24 20:05:21 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s -DEBUG 06-24 20:05:21 [manager.py:391] Prefill Batch: batch_id=183426558836825497026989485321622697486, time:1750766721.8795202s req_ids:[8] -DEBUG 06-24 20:05:21 [manager.py:391] -ERROR 06-24 20:05:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:205.31606674194336ms total_cost_time:205.37209510803223ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:5835 prompt_cache_len:5151 prompt_cache_ratio:0.8827763496143959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 -DEBUG 06-24 20:05:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10737228393554688 s -INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10944461822509766 s -DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=323924746044384931008380151683726151909, time:1750766722.0887597s req_ids:[8] -DEBUG 06-24 20:05:22 [manager.py:391] -ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:21 lightllm_req_id:8 first_token_cost:205.5795192718506ms total_cost_time:205.63888549804688ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5836 prompt_cache_len:5151 prompt_cache_ratio:0.88262508567512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 -DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10657358169555664 s -INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10860443115234375 s -DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=4314745179259761099362783005863162849, time:1750766722.3078513s req_ids:[8] -DEBUG 06-24 20:05:22 [manager.py:391] -ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:221.96006774902344ms total_cost_time:222.0172882080078ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5837 prompt_cache_len:5151 prompt_cache_ratio:0.8824738735651876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 -DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10709643363952637 s -INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10907626152038574 s -DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=320889084037266710720023836220273636333, time:1750766722.5297847s req_ids:[8] -DEBUG 06-24 20:05:22 [manager.py:391] -ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:208.1303596496582ms total_cost_time:208.1761360168457ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5838 prompt_cache_len:5151 prompt_cache_ratio:0.8823227132579651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 -DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10703325271606445 s -INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.10903215408325195 s -DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=318193857822376333687922538946690967845, time:1750766722.739481s req_ids:[8] -DEBUG 06-24 20:05:22 [manager.py:391] -ERROR 06-24 20:05:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:203.9961814880371ms total_cost_time:204.05817031860352ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:5839 prompt_cache_len:5151 prompt_cache_ratio:0.8821716047268368 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 -DEBUG 06-24 20:05:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:22 [manager.py:224] router recive req id 8 cost time 0.10788345336914062 s -INFO 06-24 20:05:22 [manager.py:68] detokenization recv req id 8 cost time 0.11006331443786621 s -DEBUG 06-24 20:05:22 [manager.py:391] Prefill Batch: batch_id=285871223811438429528001651557791563795, time:1750766722.9635055s req_ids:[8] -DEBUG 06-24 20:05:22 [manager.py:391] -ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:22 lightllm_req_id:8 first_token_cost:220.4742431640625ms total_cost_time:220.5188274383545ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5840 prompt_cache_len:5151 prompt_cache_ratio:0.8820205479452055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 -DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.10673141479492188 s -INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.10866951942443848 s -DEBUG 06-24 20:05:23 [manager.py:391] Prefill Batch: batch_id=61199987539522993132786479677822146577, time:1750766723.17567s req_ids:[8] -DEBUG 06-24 20:05:23 [manager.py:391] -INFO 06-24 20:05:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:05:23 [statics_utils.py:24] mean first cost: 246.08994080085674 ms -INFO 06-24 20:05:23 [statics_utils.py:24] mean per token cost: 0.1487752992162806 ms -ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:206.679105758667ms total_cost_time:206.73727989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5841 prompt_cache_len:5151 prompt_cache_ratio:0.881869542886492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 -DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.10946226119995117 s -INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.11142492294311523 s -DEBUG 06-24 20:05:23 [manager.py:391] Prefill Batch: batch_id=302376348211471132063346124126741473894, time:1750766723.3861055s req_ids:[8] -DEBUG 06-24 20:05:23 [manager.py:391] -ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:203.66311073303223ms total_cost_time:203.71007919311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:5842 prompt_cache_len:5151 prompt_cache_ratio:0.8817185895241356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 -DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.3092224597930908 s -INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.31124067306518555 s -DEBUG 06-24 20:05:23 [manager.py:391] Prefill Batch: batch_id=325893024976816570180956671979932518530, time:1750766723.7928898s req_ids:[8] -DEBUG 06-24 20:05:23 [manager.py:391] -ERROR 06-24 20:05:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:404.9248695373535ms total_cost_time:404.9839973449707ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5843 prompt_cache_len:5151 prompt_cache_ratio:0.8815676878315933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 -DEBUG 06-24 20:05:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:23 [manager.py:224] router recive req id 8 cost time 0.1081240177154541 s -INFO 06-24 20:05:23 [manager.py:68] detokenization recv req id 8 cost time 0.11003232002258301 s -DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=140514976188979608081788129592750819575, time:1750766724.007185s req_ids:[8] -DEBUG 06-24 20:05:24 [manager.py:391] -ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:23 lightllm_req_id:8 first_token_cost:206.3581943511963ms total_cost_time:206.40230178833008ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5844 prompt_cache_len:5151 prompt_cache_ratio:0.8814168377823408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 -DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.10740828514099121 s -INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.1094675064086914 s -DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=299218053319811646133950392668267074423, time:1750766724.2164278s req_ids:[8] -DEBUG 06-24 20:05:24 [manager.py:391] -ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:205.11126518249512ms total_cost_time:205.1548957824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5845 prompt_cache_len:5151 prompt_cache_ratio:0.8812660393498717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 -DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.10695695877075195 s -INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.1090247631072998 s -DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=26565515681475904456185421761033144494, time:1750766724.4277542s req_ids:[8] -DEBUG 06-24 20:05:24 [manager.py:391] -ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:207.41558074951172ms total_cost_time:207.4582576751709ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5846 prompt_cache_len:5151 prompt_cache_ratio:0.8811152925076976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 -DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.11001420021057129 s -INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.11198973655700684 s -DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=137461144901055138580842537814338046640, time:1750766724.645591s req_ids:[8] -DEBUG 06-24 20:05:24 [manager.py:391] -ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:212.88204193115234ms total_cost_time:212.92638778686523ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5847 prompt_cache_len:5151 prompt_cache_ratio:0.8809645972293484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 -DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:24 [manager.py:224] router recive req id 8 cost time 0.10644078254699707 s -INFO 06-24 20:05:24 [manager.py:68] detokenization recv req id 8 cost time 0.10825562477111816 s -DEBUG 06-24 20:05:24 [manager.py:391] Prefill Batch: batch_id=166883432195156598061953158148513638313, time:1750766724.85619s req_ids:[8] -DEBUG 06-24 20:05:24 [manager.py:391] -ERROR 06-24 20:05:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:168.95627975463867ms total_cost_time:168.99776458740234ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5848 prompt_cache_len:5151 prompt_cache_ratio:0.8808139534883721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 -DEBUG 06-24 20:05:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:24 [batch.py:51] router release req id 8 -INFO 06-24 20:05:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10547590255737305 s -INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10696935653686523 s -DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=56530541912003010724294856250128985736, time:1750766725.0274258s req_ids:[8] -DEBUG 06-24 20:05:25 [manager.py:391] -DEBUG 06-24 20:05:25 [stats.py:37] Avg tokens(prompt+generate) throughput: 25956.663 tokens/s -DEBUG 06-24 20:05:25 [stats.py:37] Avg prompt tokens throughput: 25947.757 tokens/s -DEBUG 06-24 20:05:25 [stats.py:37] Avg generate tokens throughput: 8.906 tokens/s -ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:24 lightllm_req_id:8 first_token_cost:162.4436378479004ms total_cost_time:162.5041961669922ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5849 prompt_cache_len:5151 prompt_cache_ratio:0.8806633612583348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 -DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10846948623657227 s -INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.11008334159851074 s -DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=87267703143384155545502246740985402850, time:1750766725.189954s req_ids:[8] -DEBUG 06-24 20:05:25 [manager.py:391] -ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:153.9146900177002ms total_cost_time:153.95545959472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:5850 prompt_cache_len:5151 prompt_cache_ratio:0.8805128205128205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 -DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10838460922241211 s -INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10997509956359863 s -DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=260575524142660679089000493264261182054, time:1750766725.349024s req_ids:[8] -DEBUG 06-24 20:05:25 [manager.py:391] -DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:153.51176261901855ms total_cost_time:153.55634689331055ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5851 prompt_cache_len:5151 prompt_cache_ratio:0.8803623312254315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 -DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10769081115722656 s -INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10959649085998535 s -DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=64777232367542088665923281608286836412, time:1750766725.508749s req_ids:[8] -DEBUG 06-24 20:05:25 [manager.py:391] -ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:189.50343132019043ms total_cost_time:189.5453929901123ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5852 prompt_cache_len:5151 prompt_cache_ratio:0.8802118933697881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 -DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10701632499694824 s -INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10892820358276367 s -DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=56673744498726400636949890867722889474, time:1750766725.7076936s req_ids:[8] -DEBUG 06-24 20:05:25 [manager.py:391] -ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:201.39050483703613ms total_cost_time:201.432466506958ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5853 prompt_cache_len:5151 prompt_cache_ratio:0.8800615069195284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 -DEBUG 06-24 20:05:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:25 [manager.py:224] router recive req id 8 cost time 0.10679388046264648 s -INFO 06-24 20:05:25 [manager.py:68] detokenization recv req id 8 cost time 0.10886192321777344 s -DEBUG 06-24 20:05:25 [manager.py:391] Prefill Batch: batch_id=188389654727391739722603997664810042262, time:1750766725.9165316s req_ids:[8] -DEBUG 06-24 20:05:25 [manager.py:391] -ERROR 06-24 20:05:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:25 lightllm_req_id:8 first_token_cost:203.20391654968262ms total_cost_time:203.2461166381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5854 prompt_cache_len:5151 prompt_cache_ratio:0.8799111718483088 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 -DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.31009674072265625 s -INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.31215357780456543 s -DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=244491530443690671120648079858459053813, time:1750766726.319744s req_ids:[8] -DEBUG 06-24 20:05:26 [manager.py:391] -ERROR 06-24 20:05:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:404.25658226013184ms total_cost_time:404.30235862731934ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5855 prompt_cache_len:5151 prompt_cache_ratio:0.8797608881298036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 -DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.1066434383392334 s -INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.1085808277130127 s -DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=216128223810542315090735730623472779013, time:1750766726.5354388s req_ids:[8] -DEBUG 06-24 20:05:26 [manager.py:391] -ERROR 06-24 20:05:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:209.34033393859863ms total_cost_time:209.38444137573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5856 prompt_cache_len:5151 prompt_cache_ratio:0.8796106557377049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 -DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.10633611679077148 s -INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.10825395584106445 s -DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=266760414679073014297838827265769629821, time:1750766726.7458067s req_ids:[8] -DEBUG 06-24 20:05:26 [manager.py:391] -DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:202.99577713012695ms total_cost_time:203.05156707763672ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:5857 prompt_cache_len:5151 prompt_cache_ratio:0.8794604746457231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 -DEBUG 06-24 20:05:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:26 [manager.py:224] router recive req id 8 cost time 0.10837936401367188 s -INFO 06-24 20:05:26 [manager.py:68] detokenization recv req id 8 cost time 0.11048579216003418 s -DEBUG 06-24 20:05:26 [manager.py:391] Prefill Batch: batch_id=220466120611512142612463095873976691933, time:1750766726.9550085s req_ids:[8] -DEBUG 06-24 20:05:26 [manager.py:391] -ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:26 lightllm_req_id:8 first_token_cost:203.57227325439453ms total_cost_time:203.61733436584473ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5858 prompt_cache_len:5151 prompt_cache_ratio:0.8793103448275862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 -DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10771727561950684 s -INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.1097266674041748 s -DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=134821454466315863154539601422746433918, time:1750766727.1628861s req_ids:[8] -DEBUG 06-24 20:05:27 [manager.py:391] -ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:204.0553092956543ms total_cost_time:204.09631729125977ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5859 prompt_cache_len:5151 prompt_cache_ratio:0.8791602662570405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 -DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10698294639587402 s -INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.10895490646362305 s -DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=132862812230334863190403326526797720794, time:1750766727.3710592s req_ids:[8] -DEBUG 06-24 20:05:27 [manager.py:391] -ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:205.60050010681152ms total_cost_time:205.65533638000488ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:5860 prompt_cache_len:5151 prompt_cache_ratio:0.8790102389078498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 -DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10823917388916016 s -INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.11017918586730957 s -DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=276831155914955110812739613017683809734, time:1750766727.5828426s req_ids:[8] -DEBUG 06-24 20:05:27 [manager.py:391] -ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:203.90558242797852ms total_cost_time:203.9499282836914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5861 prompt_cache_len:5151 prompt_cache_ratio:0.8788602627537963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 -DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10755062103271484 s -INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.10968732833862305 s -DEBUG 06-24 20:05:27 [manager.py:391] Prefill Batch: batch_id=158837560188965962129419693574975003791, time:1750766727.7916977s req_ids:[8] -DEBUG 06-24 20:05:27 [manager.py:391] -ERROR 06-24 20:05:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:205.20853996276855ms total_cost_time:205.25169372558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5862 prompt_cache_len:5151 prompt_cache_ratio:0.8787103377686797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 -DEBUG 06-24 20:05:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:27 [manager.py:224] router recive req id 8 cost time 0.10861587524414062 s -INFO 06-24 20:05:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106574535369873 s -DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=267365800964887204896803713617676680851, time:1750766728.006392s req_ids:[8] -DEBUG 06-24 20:05:28 [manager.py:391] -ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:27 lightllm_req_id:8 first_token_cost:207.3071002960205ms total_cost_time:207.3521614074707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5863 prompt_cache_len:5151 prompt_cache_ratio:0.8785604639263176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 -DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:28 [manager.py:224] router recive req id 8 cost time 0.10674691200256348 s -INFO 06-24 20:05:28 [manager.py:68] detokenization recv req id 8 cost time 0.10873675346374512 s -DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=99786576739328307645200362814535502059, time:1750766728.214684s req_ids:[8] -DEBUG 06-24 20:05:28 [manager.py:391] -ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:206.00223541259766ms total_cost_time:206.04515075683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5864 prompt_cache_len:5151 prompt_cache_ratio:0.8784106412005457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 -DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:28 [manager.py:224] router recive req id 8 cost time 0.10743165016174316 s -INFO 06-24 20:05:28 [manager.py:68] detokenization recv req id 8 cost time 0.10934901237487793 s -DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=164259632419712909047324340898853753917, time:1750766728.424846s req_ids:[8] -DEBUG 06-24 20:05:28 [manager.py:391] -ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:205.06787300109863ms total_cost_time:205.1100730895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5865 prompt_cache_len:5151 prompt_cache_ratio:0.8782608695652174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 -DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:28 [manager.py:224] router recive req id 8 cost time 0.10778355598449707 s -INFO 06-24 20:05:28 [manager.py:68] detokenization recv req id 8 cost time 0.10963559150695801 s -DEBUG 06-24 20:05:28 [manager.py:391] Prefill Batch: batch_id=302518441080223523771180885206035461137, time:1750766728.6457758s req_ids:[8] -DEBUG 06-24 20:05:28 [manager.py:391] -ERROR 06-24 20:05:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:218.1985378265381ms total_cost_time:218.24097633361816ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5866 prompt_cache_len:5151 prompt_cache_ratio:0.8781111489942038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 -DEBUG 06-24 20:05:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.3093092441558838 s -INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.3112492561340332 s -DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=53001004075857478438966631503466376375, time:1750766729.055668s req_ids:[8] -DEBUG 06-24 20:05:29 [manager.py:391] -ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:28 lightllm_req_id:8 first_token_cost:405.87592124938965ms total_cost_time:405.92002868652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5867 prompt_cache_len:5151 prompt_cache_ratio:0.8779614794613942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 -DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10670900344848633 s -INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10856771469116211 s -DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=164708127857232841700698221882702931243, time:1750766729.268062s req_ids:[8] -DEBUG 06-24 20:05:29 [manager.py:391] -ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:207.7329158782959ms total_cost_time:207.77440071105957ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5868 prompt_cache_len:5151 prompt_cache_ratio:0.8778118609406953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 -DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10664701461791992 s -INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10817813873291016 s -DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=180964126688548559453482646540539033502, time:1750766729.4795063s req_ids:[8] -DEBUG 06-24 20:05:29 [manager.py:391] -ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:170.82476615905762ms total_cost_time:170.8657741546631ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5869 prompt_cache_len:5151 prompt_cache_ratio:0.8776622934060317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 -DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10749125480651855 s -INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10942888259887695 s -DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=81118989916174723421674683087830855928, time:1750766729.6521387s req_ids:[8] -DEBUG 06-24 20:05:29 [manager.py:391] -ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:195.43838500976562ms total_cost_time:195.48320770263672ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5870 prompt_cache_len:5151 prompt_cache_ratio:0.8775127768313459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 -DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:29 [manager.py:224] router recive req id 8 cost time 0.10671496391296387 s -INFO 06-24 20:05:29 [manager.py:68] detokenization recv req id 8 cost time 0.10876822471618652 s -DEBUG 06-24 20:05:29 [manager.py:391] Prefill Batch: batch_id=181945187667740630881186361410800597670, time:1750766729.8545318s req_ids:[8] -DEBUG 06-24 20:05:29 [manager.py:391] -ERROR 06-24 20:05:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:205.5642604827881ms total_cost_time:205.60908317565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5871 prompt_cache_len:5151 prompt_cache_ratio:0.8773633111905978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 -DEBUG 06-24 20:05:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.10664796829223633 s -INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10855674743652344 s -DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=29715160147586941010783745802076900099, time:1750766730.0643985s req_ids:[8] -DEBUG 06-24 20:05:30 [manager.py:391] -ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:29 lightllm_req_id:8 first_token_cost:205.42550086975098ms total_cost_time:205.47175407409668ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5872 prompt_cache_len:5151 prompt_cache_ratio:0.8772138964577657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 -DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s -INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10993838310241699 s -DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=124273127636215433551459457371888764151, time:1750766730.2756112s req_ids:[8] -DEBUG 06-24 20:05:30 [manager.py:391] -ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:206.1138153076172ms total_cost_time:206.15911483764648ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5873 prompt_cache_len:5151 prompt_cache_ratio:0.8770645326068449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 -DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.1075901985168457 s -INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10959625244140625 s -DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=134650803688637093581155315749523218553, time:1750766730.4856517s req_ids:[8] -DEBUG 06-24 20:05:30 [manager.py:391] -ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:206.09211921691895ms total_cost_time:206.13551139831543ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5874 prompt_cache_len:5151 prompt_cache_ratio:0.8769152196118488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 -DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.1062462329864502 s -INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10814857482910156 s -DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=222140940433738275078013409828372511769, time:1750766730.695645s req_ids:[8] -DEBUG 06-24 20:05:30 [manager.py:391] -ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:204.1454315185547ms total_cost_time:204.18810844421387ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5875 prompt_cache_len:5151 prompt_cache_ratio:0.8767659574468085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 -DEBUG 06-24 20:05:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:30 [manager.py:224] router recive req id 8 cost time 0.10668683052062988 s -INFO 06-24 20:05:30 [manager.py:68] detokenization recv req id 8 cost time 0.10866093635559082 s -DEBUG 06-24 20:05:30 [manager.py:391] Prefill Batch: batch_id=265221968152092291397755966274140640296, time:1750766730.9055886s req_ids:[8] -DEBUG 06-24 20:05:30 [manager.py:391] -ERROR 06-24 20:05:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:207.7949047088623ms total_cost_time:207.8378200531006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5876 prompt_cache_len:5151 prompt_cache_ratio:0.8766167460857727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 -DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.10646438598632812 s -INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.10852289199829102 s -DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=236270317846096033880888357995200803581, time:1750766731.114795s req_ids:[8] -DEBUG 06-24 20:05:31 [manager.py:391] -ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:30 lightllm_req_id:8 first_token_cost:202.65817642211914ms total_cost_time:202.70037651062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5877 prompt_cache_len:5151 prompt_cache_ratio:0.8764675855028076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 -DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.10719609260559082 s -INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.10924053192138672 s -DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=164403856939229580166367368514261368258, time:1750766731.3288934s req_ids:[8] -DEBUG 06-24 20:05:31 [manager.py:391] -ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:211.03167533874512ms total_cost_time:211.0755443572998ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5878 prompt_cache_len:5151 prompt_cache_ratio:0.8763184756719973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 -DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.1076197624206543 s -INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.1094961166381836 s -DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=269635757327963367040826164988685440570, time:1750766731.538054s req_ids:[8] -DEBUG 06-24 20:05:31 [manager.py:391] -ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:367.10381507873535ms total_cost_time:367.14720726013184ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5879 prompt_cache_len:5151 prompt_cache_ratio:0.8761694165674434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 -DEBUG 06-24 20:05:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:31 [manager.py:224] router recive req id 8 cost time 0.10816717147827148 s -INFO 06-24 20:05:31 [manager.py:68] detokenization recv req id 8 cost time 0.11020207405090332 s -DEBUG 06-24 20:05:31 [manager.py:391] Prefill Batch: batch_id=214866989383641299867387913467734368570, time:1750766731.9029057s req_ids:[8] -DEBUG 06-24 20:05:31 [manager.py:391] -ERROR 06-24 20:05:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:195.87206840515137ms total_cost_time:195.91856002807617ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:5880 prompt_cache_len:5151 prompt_cache_ratio:0.8760204081632653 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 -DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s -INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.1100149154663086 s -DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=106725213212099907266868693060755539329, time:1750766732.1144238s req_ids:[8] -DEBUG 06-24 20:05:32 [manager.py:391] -ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:31 lightllm_req_id:8 first_token_cost:210.91699600219727ms total_cost_time:210.97517013549805ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:5881 prompt_cache_len:5151 prompt_cache_ratio:0.8758714504335997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 -DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10773730278015137 s -INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.10972929000854492 s -DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=101947368284228535189896198802988323202, time:1750766732.3278925s req_ids:[8] -DEBUG 06-24 20:05:32 [manager.py:391] -ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:208.09173583984375ms total_cost_time:208.14967155456543ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:5882 prompt_cache_len:5151 prompt_cache_ratio:0.8757225433526011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 -DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10857439041137695 s -INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.11048221588134766 s -DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=97679567535992670821866787776492695721, time:1750766732.5382626s req_ids:[8] -DEBUG 06-24 20:05:32 [manager.py:391] -ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:205.1219940185547ms total_cost_time:205.18183708190918ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5883 prompt_cache_len:5151 prompt_cache_ratio:0.8755736868944416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 -DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10775184631347656 s -INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.10975289344787598 s -DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=178435416047852832337546773932555505404, time:1750766732.7475448s req_ids:[8] -DEBUG 06-24 20:05:32 [manager.py:391] -ERROR 06-24 20:05:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:204.9119472503662ms total_cost_time:204.9720287322998ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:5884 prompt_cache_len:5151 prompt_cache_ratio:0.8754248810333106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 -DEBUG 06-24 20:05:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:32 [manager.py:224] router recive req id 8 cost time 0.10698151588439941 s -INFO 06-24 20:05:32 [manager.py:68] detokenization recv req id 8 cost time 0.10898375511169434 s -DEBUG 06-24 20:05:32 [manager.py:391] Prefill Batch: batch_id=123666270782056740528551928461279383569, time:1750766732.955824s req_ids:[8] -DEBUG 06-24 20:05:32 [manager.py:391] -ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:32 lightllm_req_id:8 first_token_cost:204.07986640930176ms total_cost_time:204.12826538085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:5885 prompt_cache_len:5151 prompt_cache_ratio:0.8752761257434155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 -DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10792684555053711 s -INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10981273651123047 s -DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=23923425125933659962189477342226765404, time:1750766733.1656048s req_ids:[8] -DEBUG 06-24 20:05:33 [manager.py:391] -ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:205.38711547851562ms total_cost_time:205.4462432861328ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5886 prompt_cache_len:5151 prompt_cache_ratio:0.8751274209989807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 -DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10747241973876953 s -INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10938882827758789 s -DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=21342355386349848596045215303548627103, time:1750766733.3752594s req_ids:[8] -DEBUG 06-24 20:05:33 [manager.py:391] -ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:203.85456085205078ms total_cost_time:203.91440391540527ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5887 prompt_cache_len:5151 prompt_cache_ratio:0.8749787667742484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 -DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.1076822280883789 s -INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10958719253540039 s -DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=109405726276601417605796498330202614, time:1750766733.5846536s req_ids:[8] -DEBUG 06-24 20:05:33 [manager.py:391] -DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:204.209566116333ms total_cost_time:204.29134368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.08177757263183594ms prompt_token_num:5888 prompt_cache_len:5151 prompt_cache_ratio:0.8748301630434783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 -DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10813713073730469 s -INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.11012673377990723 s -DEBUG 06-24 20:05:33 [manager.py:391] Prefill Batch: batch_id=265887485452802363983375518624640218209, time:1750766733.793306s req_ids:[8] -DEBUG 06-24 20:05:33 [manager.py:391] -ERROR 06-24 20:05:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:206.98094367980957ms total_cost_time:207.03983306884766ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5889 prompt_cache_len:5151 prompt_cache_ratio:0.8746816097809476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 -DEBUG 06-24 20:05:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:33 [manager.py:224] router recive req id 8 cost time 0.10696673393249512 s -INFO 06-24 20:05:33 [manager.py:68] detokenization recv req id 8 cost time 0.10896015167236328 s -DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=183284529631983018413602053033903009801, time:1750766734.0030892s req_ids:[8] -DEBUG 06-24 20:05:34 [manager.py:391] -ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:33 lightllm_req_id:8 first_token_cost:203.200101852417ms total_cost_time:203.2606601715088ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5890 prompt_cache_len:5151 prompt_cache_ratio:0.8745331069609508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 -DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:34 [manager.py:224] router recive req id 8 cost time 0.10656476020812988 s -INFO 06-24 20:05:34 [manager.py:68] detokenization recv req id 8 cost time 0.10844945907592773 s -DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=329287718120830507429386941046441970615, time:1750766734.2107918s req_ids:[8] -DEBUG 06-24 20:05:34 [manager.py:391] -ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:204.87046241760254ms total_cost_time:204.91671562194824ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:5891 prompt_cache_len:5151 prompt_cache_ratio:0.8743846545578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 -DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:34 [manager.py:224] router recive req id 8 cost time 0.1074211597442627 s -INFO 06-24 20:05:34 [manager.py:68] detokenization recv req id 8 cost time 0.10935449600219727 s -DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=118783105674865520560722692819544422296, time:1750766734.420839s req_ids:[8] -DEBUG 06-24 20:05:34 [manager.py:391] -ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:405.9295654296875ms total_cost_time:405.9736728668213ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5892 prompt_cache_len:5151 prompt_cache_ratio:0.8742362525458248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 -DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:34 [manager.py:224] router recive req id 8 cost time 0.10655832290649414 s -INFO 06-24 20:05:34 [manager.py:68] detokenization recv req id 8 cost time 0.10847234725952148 s -DEBUG 06-24 20:05:34 [manager.py:391] Prefill Batch: batch_id=42792763810505906401079886865951985128, time:1750766734.823878s req_ids:[8] -DEBUG 06-24 20:05:34 [manager.py:391] -ERROR 06-24 20:05:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:197.0210075378418ms total_cost_time:197.0658302307129ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5893 prompt_cache_len:5151 prompt_cache_ratio:0.8740879008993722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 -DEBUG 06-24 20:05:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10727286338806152 s -INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10917186737060547 s -DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=321263043437776973982265304590274819667, time:1750766735.0438063s req_ids:[8] -DEBUG 06-24 20:05:35 [manager.py:391] -DEBUG 06-24 20:05:35 [stats.py:37] Avg tokens(prompt+generate) throughput: 26388.493 tokens/s -DEBUG 06-24 20:05:35 [stats.py:37] Avg prompt tokens throughput: 26379.508 tokens/s -DEBUG 06-24 20:05:35 [stats.py:37] Avg generate tokens throughput: 8.985 tokens/s -ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:34 lightllm_req_id:8 first_token_cost:218.8894748687744ms total_cost_time:218.9333438873291ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5894 prompt_cache_len:5151 prompt_cache_ratio:0.8739395995928062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 -DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.1068718433380127 s -INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10873031616210938 s -DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=144628765856183523328501397702438528985, time:1750766735.2574515s req_ids:[8] -DEBUG 06-24 20:05:35 [manager.py:391] -ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:206.05969429016113ms total_cost_time:206.1018943786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5895 prompt_cache_len:5151 prompt_cache_ratio:0.8737913486005089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 -DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10648083686828613 s -INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10841155052185059 s -DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=259031312636184950882605828011309746693, time:1750766735.4679787s req_ids:[8] -DEBUG 06-24 20:05:35 [manager.py:391] -ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:211.1058235168457ms total_cost_time:211.1494541168213ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5896 prompt_cache_len:5151 prompt_cache_ratio:0.8736431478968792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 -DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10660958290100098 s -INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10854458808898926 s -DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=265383747771238565895225696460475206297, time:1750766735.694489s req_ids:[8] -DEBUG 06-24 20:05:35 [manager.py:391] -ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:219.5894718170166ms total_cost_time:219.6335792541504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5897 prompt_cache_len:5151 prompt_cache_ratio:0.8734949974563337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 -DEBUG 06-24 20:05:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:35 [manager.py:224] router recive req id 8 cost time 0.10738754272460938 s -INFO 06-24 20:05:35 [manager.py:68] detokenization recv req id 8 cost time 0.10929608345031738 s -DEBUG 06-24 20:05:35 [manager.py:391] Prefill Batch: batch_id=292285804024102488685631352250269746294, time:1750766735.9105105s req_ids:[8] -DEBUG 06-24 20:05:35 [manager.py:391] -ERROR 06-24 20:05:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:209.5177173614502ms total_cost_time:209.55872535705566ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:5898 prompt_cache_len:5151 prompt_cache_ratio:0.8733468972533062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 -DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10753750801086426 s -INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10939884185791016 s -DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=245816984363902994808846964949334554694, time:1750766736.1198466s req_ids:[8] -DEBUG 06-24 20:05:36 [manager.py:391] -ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:35 lightllm_req_id:8 first_token_cost:203.1242847442627ms total_cost_time:203.16720008850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5899 prompt_cache_len:5151 prompt_cache_ratio:0.8731988472622478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 -DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10772562026977539 s -INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10960125923156738 s -DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=168634482844164825401986559154295363480, time:1750766736.3298683s req_ids:[8] -DEBUG 06-24 20:05:36 [manager.py:391] -ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:205.79171180725098ms total_cost_time:205.83415031433105ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5900 prompt_cache_len:5151 prompt_cache_ratio:0.8730508474576271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 -DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10741281509399414 s -INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10930275917053223 s -DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=171193577296389203140549453621901726523, time:1750766736.5403395s req_ids:[8] -DEBUG 06-24 20:05:36 [manager.py:391] -ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:207.75318145751953ms total_cost_time:207.7951431274414ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5901 prompt_cache_len:5151 prompt_cache_ratio:0.8729028978139298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 -DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10689401626586914 s -INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10878610610961914 s -DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=148517907394385037002585101021750028516, time:1750766736.7500126s req_ids:[8] -DEBUG 06-24 20:05:36 [manager.py:391] -ERROR 06-24 20:05:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:204.00190353393555ms total_cost_time:204.04481887817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5902 prompt_cache_len:5151 prompt_cache_ratio:0.8727549983056591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 -DEBUG 06-24 20:05:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:36 [manager.py:224] router recive req id 8 cost time 0.10775041580200195 s -INFO 06-24 20:05:36 [manager.py:68] detokenization recv req id 8 cost time 0.10962390899658203 s -DEBUG 06-24 20:05:36 [manager.py:391] Prefill Batch: batch_id=178241337152403988583453577654269440205, time:1750766736.9606419s req_ids:[8] -DEBUG 06-24 20:05:36 [manager.py:391] -ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:36 lightllm_req_id:8 first_token_cost:206.09188079833984ms total_cost_time:206.13527297973633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5903 prompt_cache_len:5151 prompt_cache_ratio:0.8726071489073353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 -DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:37 [manager.py:224] router recive req id 8 cost time 0.10789990425109863 s -INFO 06-24 20:05:37 [manager.py:68] detokenization recv req id 8 cost time 0.10987186431884766 s -DEBUG 06-24 20:05:37 [manager.py:391] Prefill Batch: batch_id=217514073107859485860528390999687253932, time:1750766737.1712465s req_ids:[8] -DEBUG 06-24 20:05:37 [manager.py:391] -ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:204.92243766784668ms total_cost_time:204.98156547546387ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5904 prompt_cache_len:5151 prompt_cache_ratio:0.8724593495934959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 -DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:37 [manager.py:224] router recive req id 8 cost time 0.30886101722717285 s -INFO 06-24 20:05:37 [manager.py:68] detokenization recv req id 8 cost time 0.31081414222717285 s -DEBUG 06-24 20:05:37 [manager.py:391] Prefill Batch: batch_id=151456831129273920842199557142009969426, time:1750766737.597954s req_ids:[8] -DEBUG 06-24 20:05:37 [manager.py:391] -ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:422.0590591430664ms total_cost_time:422.105073928833ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5905 prompt_cache_len:5151 prompt_cache_ratio:0.872311600338696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 -DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:37 [manager.py:224] router recive req id 8 cost time 0.10759091377258301 s -INFO 06-24 20:05:37 [manager.py:68] detokenization recv req id 8 cost time 0.10963249206542969 s -DEBUG 06-24 20:05:37 [manager.py:391] Prefill Batch: batch_id=38094264507912673617324947294864808263, time:1750766737.8069682s req_ids:[8] -DEBUG 06-24 20:05:37 [manager.py:391] -ERROR 06-24 20:05:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:204.30684089660645ms total_cost_time:204.36787605285645ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:5906 prompt_cache_len:5151 prompt_cache_ratio:0.8721639011175076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 -DEBUG 06-24 20:05:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10756897926330566 s -INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.10954737663269043 s -DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=163010358919744624737680257640292067652, time:1750766738.0158596s req_ids:[8] -DEBUG 06-24 20:05:38 [manager.py:391] -ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:37 lightllm_req_id:8 first_token_cost:220.81398963928223ms total_cost_time:220.87335586547852ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:5907 prompt_cache_len:5151 prompt_cache_ratio:0.87201625190452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 -DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.11127209663391113 s -INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.1132051944732666 s -DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=111244164488570237428228703712044603737, time:1750766738.234603s req_ids:[8] -DEBUG 06-24 20:05:38 [manager.py:391] -ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:195.7724094390869ms total_cost_time:195.93048095703125ms,out_token_counter:1 mean_per_token_cost_time: 0.15807151794433594ms prompt_token_num:5908 prompt_cache_len:5151 prompt_cache_ratio:0.8718686526743399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 -DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10906171798706055 s -INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.1109914779663086 s -DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=257019394870488663321458114262251973497, time:1750766738.4406104s req_ids:[8] -DEBUG 06-24 20:05:38 [manager.py:391] -ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:195.86491584777832ms total_cost_time:195.9061622619629ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:5909 prompt_cache_len:5151 prompt_cache_ratio:0.8717211034015908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 -DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10768747329711914 s -INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.1095888614654541 s -DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=316151401935455185858536369403285693098, time:1750766738.645771s req_ids:[8] -DEBUG 06-24 20:05:38 [manager.py:391] -ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:214.85352516174316ms total_cost_time:214.89834785461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5910 prompt_cache_len:5151 prompt_cache_ratio:0.8715736040609137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 -DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:38 [manager.py:224] router recive req id 8 cost time 0.10767364501953125 s -INFO 06-24 20:05:38 [manager.py:68] detokenization recv req id 8 cost time 0.10961174964904785 s -DEBUG 06-24 20:05:38 [manager.py:391] Prefill Batch: batch_id=322094231416616230583717242209418326981, time:1750766738.8593504s req_ids:[8] -DEBUG 06-24 20:05:38 [manager.py:391] -ERROR 06-24 20:05:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:198.1973648071289ms total_cost_time:198.25315475463867ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:5911 prompt_cache_len:5151 prompt_cache_ratio:0.8714261546269667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 -DEBUG 06-24 20:05:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10902285575866699 s -INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.11087703704833984 s -INFO 06-24 20:05:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=114660758787471047487035244980950098389, time:1750766739.0680265s req_ids:[8] -DEBUG 06-24 20:05:39 [manager.py:391] -ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:38 lightllm_req_id:8 first_token_cost:204.21338081359863ms total_cost_time:204.25891876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5912 prompt_cache_len:5151 prompt_cache_ratio:0.8712787550744249 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 -DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10720062255859375 s -INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.10924005508422852 s -DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=249176044338494554850261883115622713228, time:1750766739.2778916s req_ids:[8] -DEBUG 06-24 20:05:39 [manager.py:391] -ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:214.21146392822266ms total_cost_time:214.25628662109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:5913 prompt_cache_len:5151 prompt_cache_ratio:0.8711314053779807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 -DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10844969749450684 s -INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.11038780212402344 s -DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=157419726146854861868801583056416366792, time:1750766739.4927173s req_ids:[8] -DEBUG 06-24 20:05:39 [manager.py:391] -ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:199.97692108154297ms total_cost_time:200.03247261047363ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:5914 prompt_cache_len:5151 prompt_cache_ratio:0.8709841055123436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 -DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10835099220275879 s -INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.11035275459289551 s -DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=77273508830135499011469575554018049537, time:1750766739.7001789s req_ids:[8] -DEBUG 06-24 20:05:39 [manager.py:391] -ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:203.25064659118652ms total_cost_time:203.2949924468994ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5915 prompt_cache_len:5151 prompt_cache_ratio:0.8708368554522401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 -DEBUG 06-24 20:05:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:39 [manager.py:224] router recive req id 8 cost time 0.10684943199157715 s -INFO 06-24 20:05:39 [manager.py:68] detokenization recv req id 8 cost time 0.10877513885498047 s -DEBUG 06-24 20:05:39 [manager.py:391] Prefill Batch: batch_id=133179247841005326712862507837341167875, time:1750766739.9093688s req_ids:[8] -DEBUG 06-24 20:05:39 [manager.py:391] -ERROR 06-24 20:05:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:207.11803436279297ms total_cost_time:207.16142654418945ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5916 prompt_cache_len:5151 prompt_cache_ratio:0.8706896551724138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 -DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10852932929992676 s -INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.1104896068572998 s -DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=181626636729052983992307442542036322159, time:1750766740.122249s req_ids:[8] -DEBUG 06-24 20:05:40 [manager.py:391] -ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:39 lightllm_req_id:8 first_token_cost:367.49744415283203ms total_cost_time:367.5415515899658ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5917 prompt_cache_len:5151 prompt_cache_ratio:0.8705425046476255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 -DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10830473899841309 s -INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.11044979095458984 s -DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=152001827886474994107589433056427983166, time:1750766740.4863083s req_ids:[8] -DEBUG 06-24 20:05:40 [manager.py:391] -ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:204.80799674987793ms total_cost_time:204.85353469848633ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5918 prompt_cache_len:5151 prompt_cache_ratio:0.8703954038526529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 -DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10707783699035645 s -INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.10896968841552734 s -DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=300633274443823352844279249176860097927, time:1750766740.698046s req_ids:[8] -DEBUG 06-24 20:05:40 [manager.py:391] -DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:197.4780559539795ms total_cost_time:197.52001762390137ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5919 prompt_cache_len:5151 prompt_cache_ratio:0.870248352762291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 -DEBUG 06-24 20:05:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:40 [manager.py:224] router recive req id 8 cost time 0.10670804977416992 s -INFO 06-24 20:05:40 [manager.py:68] detokenization recv req id 8 cost time 0.10865283012390137 s -DEBUG 06-24 20:05:40 [manager.py:391] Prefill Batch: batch_id=108102634263331283575687312091738207278, time:1750766740.9043477s req_ids:[8] -DEBUG 06-24 20:05:40 [manager.py:391] -ERROR 06-24 20:05:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:204.82683181762695ms total_cost_time:204.87046241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5920 prompt_cache_len:5151 prompt_cache_ratio:0.8701013513513514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 -DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.10911393165588379 s -INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.11126136779785156 s -DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=235916167943205028888782710612671262144, time:1750766741.1142182s req_ids:[8] -DEBUG 06-24 20:05:41 [manager.py:391] -ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:40 lightllm_req_id:8 first_token_cost:205.95335960388184ms total_cost_time:205.99651336669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5921 prompt_cache_len:5151 prompt_cache_ratio:0.869954399594663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 -DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.1074364185333252 s -INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10950422286987305 s -DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=301608564280635476313578938345141703166, time:1750766741.3290462s req_ids:[8] -DEBUG 06-24 20:05:41 [manager.py:391] -ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:211.7331027984619ms total_cost_time:211.7929458618164ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:5922 prompt_cache_len:5151 prompt_cache_ratio:0.869807497467072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 -DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.1075282096862793 s -INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10944795608520508 s -DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=171429433126248174320469178699743283025, time:1750766741.5392385s req_ids:[8] -DEBUG 06-24 20:05:41 [manager.py:391] -ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:204.1032314300537ms total_cost_time:204.1473388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5923 prompt_cache_len:5151 prompt_cache_ratio:0.8696606449434409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 -DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.10632443428039551 s -INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10831475257873535 s -DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=275252570970759150657999435858015477785, time:1750766741.7484965s req_ids:[8] -DEBUG 06-24 20:05:41 [manager.py:391] -ERROR 06-24 20:05:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:204.60772514343262ms total_cost_time:204.6511173248291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5924 prompt_cache_len:5151 prompt_cache_ratio:0.8695138419986496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 -DEBUG 06-24 20:05:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:41 [manager.py:224] router recive req id 8 cost time 0.10732293128967285 s -INFO 06-24 20:05:41 [manager.py:68] detokenization recv req id 8 cost time 0.10935759544372559 s -DEBUG 06-24 20:05:41 [manager.py:391] Prefill Batch: batch_id=44404975909782431791128604979760313821, time:1750766741.9576838s req_ids:[8] -DEBUG 06-24 20:05:41 [manager.py:391] -ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:41 lightllm_req_id:8 first_token_cost:206.30502700805664ms total_cost_time:206.34913444519043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5925 prompt_cache_len:5151 prompt_cache_ratio:0.8693670886075949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 -DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.10872936248779297 s -INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s -DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=102912430039983633823064929478532407910, time:1750766742.1720471s req_ids:[8] -DEBUG 06-24 20:05:42 [manager.py:391] -ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:210.62040328979492ms total_cost_time:210.6640338897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5926 prompt_cache_len:5151 prompt_cache_ratio:0.8692203847451907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 -DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.10639762878417969 s -INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.10830879211425781 s -DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=277821636332524685911340699948649125369, time:1750766742.3843956s req_ids:[8] -DEBUG 06-24 20:05:42 [manager.py:391] -ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:225.9845733642578ms total_cost_time:226.0286808013916ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5927 prompt_cache_len:5151 prompt_cache_ratio:0.8690737303863675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 -DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:42 [batch.py:51] router release req id 8 -INFO 06-24 20:05:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.1065208911895752 s -INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.10833454132080078 s -DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=35995670673423249789911032409996801814, time:1750766742.6158297s req_ids:[8] -DEBUG 06-24 20:05:42 [manager.py:391] -ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:201.30038261413574ms total_cost_time:201.34258270263672ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5928 prompt_cache_len:5151 prompt_cache_ratio:0.8689271255060729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 -DEBUG 06-24 20:05:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:42 [manager.py:224] router recive req id 8 cost time 0.10826253890991211 s -INFO 06-24 20:05:42 [manager.py:68] detokenization recv req id 8 cost time 0.11031079292297363 s -DEBUG 06-24 20:05:42 [manager.py:391] Prefill Batch: batch_id=16379239979929699217054309005453892902, time:1750766742.8247406s req_ids:[8] -DEBUG 06-24 20:05:42 [manager.py:391] -ERROR 06-24 20:05:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:206.3913345336914ms total_cost_time:206.4359188079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5929 prompt_cache_len:5151 prompt_cache_ratio:0.8687805700792713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 -DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.3093404769897461 s -INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.3112220764160156 s -DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=129439380047136193216685825403293327015, time:1750766743.251915s req_ids:[8] -DEBUG 06-24 20:05:43 [manager.py:391] -ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:42 lightllm_req_id:8 first_token_cost:427.47044563293457ms total_cost_time:427.52552032470703ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:5930 prompt_cache_len:5151 prompt_cache_ratio:0.8686340640809443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 -DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.10823655128479004 s -INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.110137939453125 s -DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=127426075875698976452704002990004581271, time:1750766743.4636261s req_ids:[8] -DEBUG 06-24 20:05:43 [manager.py:391] -ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:205.5501937866211ms total_cost_time:205.59358596801758ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5931 prompt_cache_len:5151 prompt_cache_ratio:0.86848760748609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 -DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.10851263999938965 s -INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.1105799674987793 s -DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=201901603690389837099490448017098832166, time:1750766743.6756063s req_ids:[8] -DEBUG 06-24 20:05:43 [manager.py:391] -ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:202.36611366271973ms total_cost_time:202.4099826812744ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5932 prompt_cache_len:5151 prompt_cache_ratio:0.8683412002697235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 -DEBUG 06-24 20:05:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:43 [manager.py:224] router recive req id 8 cost time 0.1070563793182373 s -INFO 06-24 20:05:43 [manager.py:68] detokenization recv req id 8 cost time 0.10891222953796387 s -DEBUG 06-24 20:05:43 [manager.py:391] Prefill Batch: batch_id=135160899731608524104247853024960416166, time:1750766743.8817742s req_ids:[8] -DEBUG 06-24 20:05:43 [manager.py:391] -ERROR 06-24 20:05:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:202.99744606018066ms total_cost_time:203.03964614868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:5933 prompt_cache_len:5151 prompt_cache_ratio:0.8681948424068768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 -DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10766291618347168 s -INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10962176322937012 s -DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=163682811720249964848261335850125851149, time:1750766744.0892234s req_ids:[8] -DEBUG 06-24 20:05:44 [manager.py:391] -ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:43 lightllm_req_id:8 first_token_cost:202.41141319274902ms total_cost_time:202.45671272277832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5934 prompt_cache_len:5151 prompt_cache_ratio:0.8680485338725986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 -DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10722947120666504 s -INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10920333862304688 s -DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=214140426092099590280341965821289001537, time:1750766744.302957s req_ids:[8] -DEBUG 06-24 20:05:44 [manager.py:391] -ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:212.5225067138672ms total_cost_time:212.56661415100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:5935 prompt_cache_len:5151 prompt_cache_ratio:0.8679022746419545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 -DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10871243476867676 s -INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.11063480377197266 s -DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=124423844859483329659866094771703773225, time:1750766744.5143557s req_ids:[8] -DEBUG 06-24 20:05:44 [manager.py:391] -ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:205.07287979125977ms total_cost_time:205.11460304260254ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5936 prompt_cache_len:5151 prompt_cache_ratio:0.867756064690027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 -DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.1070559024810791 s -INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10898566246032715 s -DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=12073605763005068082579359030135786602, time:1750766744.7248528s req_ids:[8] -DEBUG 06-24 20:05:44 [manager.py:391] -ERROR 06-24 20:05:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:217.36812591552734ms total_cost_time:217.41366386413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5937 prompt_cache_len:5151 prompt_cache_ratio:0.8676099039919151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 -DEBUG 06-24 20:05:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:44 [manager.py:224] router recive req id 8 cost time 0.10693073272705078 s -INFO 06-24 20:05:44 [manager.py:68] detokenization recv req id 8 cost time 0.10883164405822754 s -DEBUG 06-24 20:05:44 [manager.py:391] Prefill Batch: batch_id=90805269083624779918806064048377080440, time:1750766744.9490588s req_ids:[8] -DEBUG 06-24 20:05:44 [manager.py:391] -ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:44 lightllm_req_id:8 first_token_cost:207.7796459197998ms total_cost_time:207.83400535583496ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:5938 prompt_cache_len:5151 prompt_cache_ratio:0.867463792522735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 -DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.10896849632263184 s -INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.11092185974121094 s -DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=54128234162562651870199740421830130095, time:1750766745.1586773s req_ids:[8] -DEBUG 06-24 20:05:45 [manager.py:391] -DEBUG 06-24 20:05:45 [stats.py:37] Avg tokens(prompt+generate) throughput: 26333.194 tokens/s -DEBUG 06-24 20:05:45 [stats.py:37] Avg prompt tokens throughput: 26324.296 tokens/s -DEBUG 06-24 20:05:45 [stats.py:37] Avg generate tokens throughput: 8.898 tokens/s -ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:204.71572875976562ms total_cost_time:204.7584056854248ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5939 prompt_cache_len:5151 prompt_cache_ratio:0.8673177302576192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 -DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.10736083984375 s -INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.10935401916503906 s -DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=112842464064063731892480378855172805627, time:1750766745.368699s req_ids:[8] -DEBUG 06-24 20:05:45 [manager.py:391] -ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:208.58120918273926ms total_cost_time:208.62269401550293ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5940 prompt_cache_len:5151 prompt_cache_ratio:0.8671717171717171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 -DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.10722136497497559 s -INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.1091463565826416 s -DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=160339710711583950389343699079330280289, time:1750766745.5796046s req_ids:[8] -DEBUG 06-24 20:05:45 [manager.py:391] -ERROR 06-24 20:05:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:209.84625816345215ms total_cost_time:209.88821983337402ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5941 prompt_cache_len:5151 prompt_cache_ratio:0.8670257532401953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 -DEBUG 06-24 20:05:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:45 [manager.py:224] router recive req id 8 cost time 0.30837249755859375 s -INFO 06-24 20:05:45 [manager.py:68] detokenization recv req id 8 cost time 0.3104386329650879 s -DEBUG 06-24 20:05:45 [manager.py:391] Prefill Batch: batch_id=24260915096826942641782324216921801579, time:1750766745.9963193s req_ids:[8] -DEBUG 06-24 20:05:45 [manager.py:391] -ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:45 lightllm_req_id:8 first_token_cost:418.51139068603516ms total_cost_time:418.55502128601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5942 prompt_cache_len:5151 prompt_cache_ratio:0.8668798384382362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 -DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10840630531311035 s -INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.11040091514587402 s -DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=270284450233264779789483556926320525598, time:1750766746.230256s req_ids:[8] -DEBUG 06-24 20:05:46 [manager.py:391] -ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:217.2107696533203ms total_cost_time:217.2558307647705ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:5943 prompt_cache_len:5151 prompt_cache_ratio:0.8667339727410399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 -INFO 06-24 20:05:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10868406295776367 s -INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.11052703857421875 s -DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=160130006833086861845403311815971520988, time:1750766746.4473963s req_ids:[8] -DEBUG 06-24 20:05:46 [manager.py:391] -ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:209.99526977539062ms total_cost_time:210.03961563110352ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5944 prompt_cache_len:5151 prompt_cache_ratio:0.8665881561238223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 -DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10767602920532227 s -INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.10963249206542969 s -DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=141508397205182845527168418005562325703, time:1750766746.6605678s req_ids:[8] -DEBUG 06-24 20:05:46 [manager.py:391] -ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:212.493896484375ms total_cost_time:212.5382423400879ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5945 prompt_cache_len:5151 prompt_cache_ratio:0.8664423885618167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 -DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:46 [manager.py:224] router recive req id 8 cost time 0.10787725448608398 s -INFO 06-24 20:05:46 [manager.py:68] detokenization recv req id 8 cost time 0.10988855361938477 s -DEBUG 06-24 20:05:46 [manager.py:391] Prefill Batch: batch_id=328975330132373566534277658549044379774, time:1750766746.8724654s req_ids:[8] -DEBUG 06-24 20:05:46 [manager.py:391] -ERROR 06-24 20:05:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:206.94327354431152ms total_cost_time:207.0004940032959ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:5946 prompt_cache_len:5151 prompt_cache_ratio:0.8662966700302724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 -DEBUG 06-24 20:05:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10690045356750488 s -INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.10879755020141602 s -DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=97554934084245309769905067245421374941, time:1750766747.0847585s req_ids:[8] -DEBUG 06-24 20:05:47 [manager.py:391] -ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:46 lightllm_req_id:8 first_token_cost:212.7671241760254ms total_cost_time:212.81170845031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5947 prompt_cache_len:5151 prompt_cache_ratio:0.866151000504456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 -DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10864901542663574 s -INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.11059999465942383 s -DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=201741406272564656110662075331555046199, time:1750766747.297187s req_ids:[8] -DEBUG 06-24 20:05:47 [manager.py:391] -ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:195.6155300140381ms total_cost_time:195.67322731018066ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:5948 prompt_cache_len:5151 prompt_cache_ratio:0.8660053799596503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 -DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10891103744506836 s -INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.11089944839477539 s -DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=89413130611419990146106968658329412848, time:1750766747.5209644s req_ids:[8] -DEBUG 06-24 20:05:47 [manager.py:391] -ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:216.02249145507812ms total_cost_time:216.0813808441162ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:5949 prompt_cache_len:5151 prompt_cache_ratio:0.8658598083711548 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 -DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10759353637695312 s -INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.10949969291687012 s -DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=36496211028081786553224994863031676824, time:1750766747.7373848s req_ids:[8] -DEBUG 06-24 20:05:47 [manager.py:391] -ERROR 06-24 20:05:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:207.36384391784668ms total_cost_time:207.40675926208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5950 prompt_cache_len:5151 prompt_cache_ratio:0.8657142857142858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 -DEBUG 06-24 20:05:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:47 [manager.py:224] router recive req id 8 cost time 0.10863590240478516 s -INFO 06-24 20:05:47 [manager.py:68] detokenization recv req id 8 cost time 0.11072468757629395 s -DEBUG 06-24 20:05:47 [manager.py:391] Prefill Batch: batch_id=128278051825058905235174300962432237617, time:1750766747.9420974s req_ids:[8] -DEBUG 06-24 20:05:47 [manager.py:391] -ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:47 lightllm_req_id:8 first_token_cost:202.33607292175293ms total_cost_time:202.3794651031494ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5951 prompt_cache_len:5151 prompt_cache_ratio:0.8655688119643757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 -DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.10887002944946289 s -INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.11083364486694336 s -DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=99076477718657234264185631570161401892, time:1750766748.1506126s req_ids:[8] -DEBUG 06-24 20:05:48 [manager.py:391] -ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:213.29450607299805ms total_cost_time:213.34004402160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:5952 prompt_cache_len:5151 prompt_cache_ratio:0.8654233870967742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 -DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.1094810962677002 s -INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.11152768135070801 s -DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=41483792469898467468946724507271740079, time:1750766748.3643184s req_ids:[8] -DEBUG 06-24 20:05:48 [manager.py:391] -ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:198.3785629272461ms total_cost_time:198.43769073486328ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5953 prompt_cache_len:5151 prompt_cache_ratio:0.865278011086847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 -DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.10754990577697754 s -INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.10966134071350098 s -DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=125902274602996686839827660586087944252, time:1750766748.57264s req_ids:[8] -DEBUG 06-24 20:05:48 [manager.py:391] -ERROR 06-24 20:05:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:389.24503326416016ms total_cost_time:389.30416107177734ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:5954 prompt_cache_len:5151 prompt_cache_ratio:0.8651326839099764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 -DEBUG 06-24 20:05:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:48 [manager.py:224] router recive req id 8 cost time 0.10943293571472168 s -INFO 06-24 20:05:48 [manager.py:68] detokenization recv req id 8 cost time 0.11158585548400879 s -DEBUG 06-24 20:05:48 [manager.py:391] Prefill Batch: batch_id=93825880385553514424003276212867291768, time:1750766748.9620855s req_ids:[8] -DEBUG 06-24 20:05:48 [manager.py:391] -ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:48 lightllm_req_id:8 first_token_cost:200.7579803466797ms total_cost_time:200.80041885375977ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5955 prompt_cache_len:5151 prompt_cache_ratio:0.8649874055415617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 -DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.10632729530334473 s -INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.10819625854492188 s -DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=232116031451780088576439673604026543872, time:1750766749.1782868s req_ids:[8] -DEBUG 06-24 20:05:49 [manager.py:391] -DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:212.9521369934082ms total_cost_time:213.0117416381836ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:5956 prompt_cache_len:5151 prompt_cache_ratio:0.8648421759570182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 -DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.1081385612487793 s -INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.11017322540283203 s -DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=213150724174912118763928887621427491964, time:1750766749.392174s req_ids:[8] -DEBUG 06-24 20:05:49 [manager.py:391] -ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:214.97178077697754ms total_cost_time:215.0135040283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5957 prompt_cache_len:5151 prompt_cache_ratio:0.8646969951317778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 -DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.1094357967376709 s -INFO 06-24 20:05:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.11170125007629395 s -DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=290957100067134969791149684646401836800, time:1750766749.603784s req_ids:[8] -DEBUG 06-24 20:05:49 [manager.py:391] -ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:197.1290111541748ms total_cost_time:197.1719264984131ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5958 prompt_cache_len:5151 prompt_cache_ratio:0.8645518630412891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 -DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:49 [manager.py:224] router recive req id 8 cost time 0.10773658752441406 s -INFO 06-24 20:05:49 [manager.py:68] detokenization recv req id 8 cost time 0.1096949577331543 s -DEBUG 06-24 20:05:49 [manager.py:391] Prefill Batch: batch_id=265698227134966737820480521263344306228, time:1750766749.8126616s req_ids:[8] -DEBUG 06-24 20:05:49 [manager.py:391] -ERROR 06-24 20:05:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:206.65812492370605ms total_cost_time:206.70127868652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5959 prompt_cache_len:5151 prompt_cache_ratio:0.864406779661017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 -DEBUG 06-24 20:05:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10831665992736816 s -INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.11037087440490723 s -DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=66509292563218109666350743280734973981, time:1750766750.0224252s req_ids:[8] -DEBUG 06-24 20:05:50 [manager.py:391] -ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:49 lightllm_req_id:8 first_token_cost:212.57567405700684ms total_cost_time:212.61930465698242ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5960 prompt_cache_len:5151 prompt_cache_ratio:0.8642617449664429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 -DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.1101219654083252 s -INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.11220383644104004 s -DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=68311589154665885032269309568024454609, time:1750766750.2369223s req_ids:[8] -DEBUG 06-24 20:05:50 [manager.py:391] -ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:198.98462295532227ms total_cost_time:199.02634620666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:5961 prompt_cache_len:5151 prompt_cache_ratio:0.8641167589330649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 -DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10746598243713379 s -INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.10944890975952148 s -DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=187089546669287271830158747387289628042, time:1750766750.4448397s req_ids:[8] -DEBUG 06-24 20:05:50 [manager.py:391] -ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:206.61354064941406ms total_cost_time:206.65717124938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5962 prompt_cache_len:5151 prompt_cache_ratio:0.8639718215363972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 -DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10704159736633301 s -INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.10902833938598633 s -DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=336832597321368935844338338553773306677, time:1750766750.6548817s req_ids:[8] -DEBUG 06-24 20:05:50 [manager.py:391] -ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:203.58777046203613ms total_cost_time:203.63163948059082ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5963 prompt_cache_len:5151 prompt_cache_ratio:0.8638269327519705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 -DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:50 [manager.py:224] router recive req id 8 cost time 0.10625648498535156 s -INFO 06-24 20:05:50 [manager.py:68] detokenization recv req id 8 cost time 0.10817217826843262 s -DEBUG 06-24 20:05:50 [manager.py:391] Prefill Batch: batch_id=88711568420442278984566495200523889085, time:1750766750.8724267s req_ids:[8] -DEBUG 06-24 20:05:50 [manager.py:391] -ERROR 06-24 20:05:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:215.52801132202148ms total_cost_time:215.57188034057617ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5964 prompt_cache_len:5151 prompt_cache_ratio:0.863682092555332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 -DEBUG 06-24 20:05:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.10653257369995117 s -INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.10875248908996582 s -DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=282462538562656376028030544333096649558, time:1750766751.0828886s req_ids:[8] -DEBUG 06-24 20:05:51 [manager.py:391] -ERROR 06-24 20:05:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:50 lightllm_req_id:8 first_token_cost:212.22829818725586ms total_cost_time:212.27264404296875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5965 prompt_cache_len:5151 prompt_cache_ratio:0.8635373009220453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 -DEBUG 06-24 20:05:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.10907864570617676 s -INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.11126470565795898 s -DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=233528624480736105939881513180315876313, time:1750766751.2968867s req_ids:[8] -DEBUG 06-24 20:05:51 [manager.py:391] -ERROR 06-24 20:05:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 first_token_cost:198.78625869750977ms total_cost_time:198.82917404174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5966 prompt_cache_len:5151 prompt_cache_ratio:0.8633925578276902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 -DEBUG 06-24 20:05:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.3094794750213623 s -INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.3116569519042969 s -DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=204490699424441161818721750353064685123, time:1750766751.7089698s req_ids:[8] -DEBUG 06-24 20:05:51 [manager.py:391] -ERROR 06-24 20:05:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 first_token_cost:413.13838958740234ms total_cost_time:413.18440437316895ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:5967 prompt_cache_len:5151 prompt_cache_ratio:0.8632478632478633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 -DEBUG 06-24 20:05:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:51 [manager.py:224] router recive req id 8 cost time 0.10658478736877441 s -INFO 06-24 20:05:51 [manager.py:68] detokenization recv req id 8 cost time 0.1085357666015625 s -DEBUG 06-24 20:05:51 [manager.py:391] Prefill Batch: batch_id=310507340447272352776526822154154186446, time:1750766751.9393249s req_ids:[8] -DEBUG 06-24 20:05:51 [manager.py:391] -ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:51 lightllm_req_id:8 first_token_cost:222.6102352142334ms total_cost_time:222.65267372131348ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:5968 prompt_cache_len:5151 prompt_cache_ratio:0.863103217158177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 -DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10782361030578613 s -INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.1099996566772461 s -DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=233544677597134714539503231736653218209, time:1750766752.1509347s req_ids:[8] -DEBUG 06-24 20:05:52 [manager.py:391] -ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:215.27099609375ms total_cost_time:215.3148651123047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5969 prompt_cache_len:5151 prompt_cache_ratio:0.8629586195342603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 -DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10892891883850098 s -INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.11084771156311035 s -DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=174444203205848138744276360695841723479, time:1750766752.37145s req_ids:[8] -DEBUG 06-24 20:05:52 [manager.py:391] -ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:205.51061630249023ms total_cost_time:205.5532932281494ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5970 prompt_cache_len:5151 prompt_cache_ratio:0.8628140703517588 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 -DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10739898681640625 s -INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.10953855514526367 s -DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=112352746556105372410892225798683454658, time:1750766752.5811937s req_ids:[8] -DEBUG 06-24 20:05:52 [manager.py:391] -ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:207.19170570373535ms total_cost_time:207.23485946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5971 prompt_cache_len:5151 prompt_cache_ratio:0.8626695695863339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 -DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10709023475646973 s -INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.10908055305480957 s -DEBUG 06-24 20:05:52 [manager.py:391] Prefill Batch: batch_id=81601539618143760172780318311462308195, time:1750766752.7926688s req_ids:[8] -DEBUG 06-24 20:05:52 [manager.py:391] -ERROR 06-24 20:05:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:205.88088035583496ms total_cost_time:205.92427253723145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5972 prompt_cache_len:5151 prompt_cache_ratio:0.8625251172136638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 -DEBUG 06-24 20:05:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:52 [manager.py:224] router recive req id 8 cost time 0.10826730728149414 s -INFO 06-24 20:05:52 [manager.py:68] detokenization recv req id 8 cost time 0.11013412475585938 s -DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=87497033073507314089656603941486919313, time:1750766753.0036s req_ids:[8] -DEBUG 06-24 20:05:53 [manager.py:391] -ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:52 lightllm_req_id:8 first_token_cost:206.53915405273438ms total_cost_time:206.58254623413086ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5973 prompt_cache_len:5151 prompt_cache_ratio:0.8623807132094425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 -DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:53 [batch.py:51] router release req id 8 -INFO 06-24 20:05:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:05:53 [statics_utils.py:24] mean first cost: 243.96374326227877 ms -INFO 06-24 20:05:53 [statics_utils.py:24] mean per token cost: 0.14018381944647604 ms -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.10816001892089844 s -INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.11017537117004395 s -DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=160541479694583813617532479330037742474, time:1750766753.2145755s req_ids:[8] -DEBUG 06-24 20:05:53 [manager.py:391] -ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:205.8546543121338ms total_cost_time:205.89852333068848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5974 prompt_cache_len:5151 prompt_cache_ratio:0.8622363575493807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 -DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.1065666675567627 s -INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.10859274864196777 s -DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=136995348244606884830980312973085187167, time:1750766753.4409115s req_ids:[8] -DEBUG 06-24 20:05:53 [manager.py:391] -ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:222.83363342285156ms total_cost_time:222.87821769714355ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5975 prompt_cache_len:5151 prompt_cache_ratio:0.862092050209205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 -DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.10744261741638184 s -INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.10944604873657227 s -DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=50777332959909825465698093432459104499, time:1750766753.6536458s req_ids:[8] -DEBUG 06-24 20:05:53 [manager.py:391] -ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:219.46430206298828ms total_cost_time:219.50793266296387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5976 prompt_cache_len:5151 prompt_cache_ratio:0.8619477911646586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 -DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:53 [manager.py:224] router recive req id 8 cost time 0.10897660255432129 s -INFO 06-24 20:05:53 [manager.py:68] detokenization recv req id 8 cost time 0.11095976829528809 s -DEBUG 06-24 20:05:53 [manager.py:391] Prefill Batch: batch_id=232954780820797137435187514800106915712, time:1750766753.8694084s req_ids:[8] -DEBUG 06-24 20:05:53 [manager.py:391] -ERROR 06-24 20:05:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:195.71852684020996ms total_cost_time:195.76191902160645ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5977 prompt_cache_len:5151 prompt_cache_ratio:0.8618035803915007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 -DEBUG 06-24 20:05:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10754776000976562 s -INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.10950040817260742 s -DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=18527719879534534413897382772101630377, time:1750766754.0758874s req_ids:[8] -DEBUG 06-24 20:05:54 [manager.py:391] -ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:53 lightllm_req_id:8 first_token_cost:209.98454093933105ms total_cost_time:210.02650260925293ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5978 prompt_cache_len:5151 prompt_cache_ratio:0.8616594178655068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 -DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10907244682312012 s -INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.11116528511047363 s -DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=112919749214389839482693669424151537171, time:1750766754.287938s req_ids:[8] -DEBUG 06-24 20:05:54 [manager.py:391] -ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:358.3719730377197ms total_cost_time:358.414888381958ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:5979 prompt_cache_len:5151 prompt_cache_ratio:0.8615153035624686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 -DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10930490493774414 s -INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.11124348640441895 s -DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=144461363616351480408530902084407063625, time:1750766754.6468432s req_ids:[8] -DEBUG 06-24 20:05:54 [manager.py:391] -ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:196.75254821777344ms total_cost_time:196.7945098876953ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5980 prompt_cache_len:5151 prompt_cache_ratio:0.861371237458194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 -DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:54 [manager.py:224] router recive req id 8 cost time 0.10689306259155273 s -INFO 06-24 20:05:54 [manager.py:68] detokenization recv req id 8 cost time 0.10881638526916504 s -DEBUG 06-24 20:05:54 [manager.py:391] Prefill Batch: batch_id=154027440084857640457042252118385257471, time:1750766754.8560915s req_ids:[8] -DEBUG 06-24 20:05:54 [manager.py:391] -ERROR 06-24 20:05:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:214.1861915588379ms total_cost_time:214.23053741455078ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:5981 prompt_cache_len:5151 prompt_cache_ratio:0.8612272195285069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 -DEBUG 06-24 20:05:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10979127883911133 s -INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.11171960830688477 s -DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=147797385138120558451791632165306027119, time:1750766755.0709698s req_ids:[8] -DEBUG 06-24 20:05:55 [manager.py:391] -ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:54 lightllm_req_id:8 first_token_cost:201.19714736938477ms total_cost_time:201.24053955078125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5982 prompt_cache_len:5151 prompt_cache_ratio:0.8610832497492478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 -DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10650348663330078 s -INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10848164558410645 s -DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=16381280202610690378562921273254971086, time:1750766755.281764s req_ids:[8] -DEBUG 06-24 20:05:55 [manager.py:391] -DEBUG 06-24 20:05:55 [stats.py:37] Avg tokens(prompt+generate) throughput: 25920.329 tokens/s -DEBUG 06-24 20:05:55 [stats.py:37] Avg prompt tokens throughput: 25911.636 tokens/s -DEBUG 06-24 20:05:55 [stats.py:37] Avg generate tokens throughput: 8.693 tokens/s -ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:205.52515983581543ms total_cost_time:205.5678367614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:5983 prompt_cache_len:5151 prompt_cache_ratio:0.8609393280962728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 -DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10726261138916016 s -INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10942721366882324 s -DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=274677425798838795707847528826137074219, time:1750766755.4967396s req_ids:[8] -DEBUG 06-24 20:05:55 [manager.py:391] -ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:208.60934257507324ms total_cost_time:208.65249633789062ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5984 prompt_cache_len:5151 prompt_cache_ratio:0.8607954545454546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 -DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10795474052429199 s -INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10983943939208984 s -DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=137642812051166655080394664561366117838, time:1750766755.7114305s req_ids:[8] -DEBUG 06-24 20:05:55 [manager.py:391] -ERROR 06-24 20:05:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:211.4851474761963ms total_cost_time:211.5304470062256ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5985 prompt_cache_len:5151 prompt_cache_ratio:0.8606516290726817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 -DEBUG 06-24 20:05:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:55 [manager.py:224] router recive req id 8 cost time 0.10726571083068848 s -INFO 06-24 20:05:55 [manager.py:68] detokenization recv req id 8 cost time 0.10927128791809082 s -DEBUG 06-24 20:05:55 [manager.py:391] Prefill Batch: batch_id=107752442356689597624157973513950714135, time:1750766755.920558s req_ids:[8] -DEBUG 06-24 20:05:55 [manager.py:391] -ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:55 lightllm_req_id:8 first_token_cost:206.92682266235352ms total_cost_time:206.9873809814453ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:5986 prompt_cache_len:5151 prompt_cache_ratio:0.860507851653859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 -DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.1083228588104248 s -INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.11037993431091309 s -DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=205542709139994294980388602615776041534, time:1750766756.132257s req_ids:[8] -DEBUG 06-24 20:05:56 [manager.py:391] -DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:05:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:210.50405502319336ms total_cost_time:210.54720878601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:5987 prompt_cache_len:5151 prompt_cache_ratio:0.8603641222649073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 -DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10917329788208008 s -INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.11127543449401855 s -DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=246875837048353226761367920699651202360, time:1750766756.344702s req_ids:[8] -DEBUG 06-24 20:05:56 [manager.py:391] -ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:196.08736038208008ms total_cost_time:196.13122940063477ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5988 prompt_cache_len:5151 prompt_cache_ratio:0.8602204408817635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 -DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s -INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.1097874641418457 s -DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=124258850992971139955494198883481897181, time:1750766756.548638s req_ids:[8] -DEBUG 06-24 20:05:56 [manager.py:391] -ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:201.03883743286133ms total_cost_time:201.0822296142578ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5989 prompt_cache_len:5151 prompt_cache_ratio:0.8600768074803807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 -DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:05:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10655832290649414 s -INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.10842108726501465 s -DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=212001235388484545402876139944954118167, time:1750766756.7544267s req_ids:[8] -DEBUG 06-24 20:05:56 [manager.py:391] -ERROR 06-24 20:05:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:210.4170322418213ms total_cost_time:210.46066284179688ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:5990 prompt_cache_len:5151 prompt_cache_ratio:0.8599332220367278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 -DEBUG 06-24 20:05:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:56 [manager.py:224] router recive req id 8 cost time 0.10787153244018555 s -INFO 06-24 20:05:56 [manager.py:68] detokenization recv req id 8 cost time 0.10989618301391602 s -DEBUG 06-24 20:05:56 [manager.py:391] Prefill Batch: batch_id=195118263744690451613304927831181151582, time:1750766756.9678667s req_ids:[8] -DEBUG 06-24 20:05:56 [manager.py:391] -ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:56 lightllm_req_id:8 first_token_cost:366.00184440612793ms total_cost_time:366.04881286621094ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:5991 prompt_cache_len:5151 prompt_cache_ratio:0.8597896845267902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 -DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.10695695877075195 s -INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.1089930534362793 s -DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=121972919079684487616597755571680397213, time:1750766757.332667s req_ids:[8] -DEBUG 06-24 20:05:57 [manager.py:391] -ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:194.9460506439209ms total_cost_time:194.98944282531738ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5992 prompt_cache_len:5151 prompt_cache_ratio:0.8596461949265688 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 -DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.1072087287902832 s -INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.10913610458374023 s -DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=8547435340866420949849757099965804813, time:1750766757.542335s req_ids:[8] -DEBUG 06-24 20:05:57 [manager.py:391] -ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:207.12828636169434ms total_cost_time:207.17406272888184ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:5993 prompt_cache_len:5151 prompt_cache_ratio:0.8595027532120808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 -DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.10664653778076172 s -INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.10862231254577637 s -DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=271941149118837530208832393733773513095, time:1750766757.7521827s req_ids:[8] -DEBUG 06-24 20:05:57 [manager.py:391] -ERROR 06-24 20:05:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:215.93117713928223ms total_cost_time:215.97647666931152ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:5994 prompt_cache_len:5151 prompt_cache_ratio:0.8593593593593594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 -DEBUG 06-24 20:05:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:57 [manager.py:224] router recive req id 8 cost time 0.10994219779968262 s -INFO 06-24 20:05:57 [manager.py:68] detokenization recv req id 8 cost time 0.11186718940734863 s -DEBUG 06-24 20:05:57 [manager.py:391] Prefill Batch: batch_id=205601139107771026948904433732531077996, time:1750766757.9676685s req_ids:[8] -DEBUG 06-24 20:05:57 [manager.py:391] -ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:57 lightllm_req_id:8 first_token_cost:159.2123508453369ms total_cost_time:159.2557430267334ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:5995 prompt_cache_len:5151 prompt_cache_ratio:0.8592160133444537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 -DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10760664939880371 s -INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.1096200942993164 s -DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=73719984015007487088031442765655411250, time:1750766758.1318653s req_ids:[8] -DEBUG 06-24 20:05:58 [manager.py:391] -ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:195.2533721923828ms total_cost_time:195.2979564666748ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:5996 prompt_cache_len:5151 prompt_cache_ratio:0.859072715143429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 -DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10694169998168945 s -INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.1089327335357666 s -DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=159971970776071997799528475970192885580, time:1750766758.3341148s req_ids:[8] -DEBUG 06-24 20:05:58 [manager.py:391] -ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:200.5155086517334ms total_cost_time:200.55699348449707ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:5997 prompt_cache_len:5151 prompt_cache_ratio:0.8589294647323662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 -DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10676407814025879 s -INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.10884356498718262 s -DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=157241660816996135308371881641383297949, time:1750766758.5422552s req_ids:[8] -DEBUG 06-24 20:05:58 [manager.py:391] -ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:207.43274688720703ms total_cost_time:207.4747085571289ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:5998 prompt_cache_len:5151 prompt_cache_ratio:0.8587862620873624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 -DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10662698745727539 s -INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.10860681533813477 s -DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=111218329920431504922144730136015821807, time:1750766758.754807s req_ids:[8] -DEBUG 06-24 20:05:58 [manager.py:391] -ERROR 06-24 20:05:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:207.19122886657715ms total_cost_time:207.23509788513184ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:5999 prompt_cache_len:5151 prompt_cache_ratio:0.8586431071845307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 -DEBUG 06-24 20:05:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:58 [manager.py:224] router recive req id 8 cost time 0.10687136650085449 s -INFO 06-24 20:05:58 [manager.py:68] detokenization recv req id 8 cost time 0.10888338088989258 s -DEBUG 06-24 20:05:58 [manager.py:391] Prefill Batch: batch_id=259461990585894679082593061496660544785, time:1750766758.963067s req_ids:[8] -DEBUG 06-24 20:05:58 [manager.py:391] -ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:58 lightllm_req_id:8 first_token_cost:209.4886302947998ms total_cost_time:209.53059196472168ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6000 prompt_cache_len:5151 prompt_cache_ratio:0.8585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 -DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10695028305053711 s -INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.10888791084289551 s -DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=163430304304878730975312654902442757575, time:1750766759.1738033s req_ids:[8] -DEBUG 06-24 20:05:59 [manager.py:391] -ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:202.6839256286621ms total_cost_time:202.7263641357422ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6001 prompt_cache_len:5151 prompt_cache_ratio:0.8583569405099151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 -DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10641145706176758 s -INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s -DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=161240589611204228590144522119641037923, time:1750766759.3914907s req_ids:[8] -DEBUG 06-24 20:05:59 [manager.py:391] -ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:215.20304679870605ms total_cost_time:215.24453163146973ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6002 prompt_cache_len:5151 prompt_cache_ratio:0.8582139286904366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 -DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10641741752624512 s -INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.10846757888793945 s -DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=107875550869559230957324294041768019506, time:1750766759.6005511s req_ids:[8] -DEBUG 06-24 20:05:59 [manager.py:391] -ERROR 06-24 20:05:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:05:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:05:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:212.98813819885254ms total_cost_time:213.0300998687744ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6003 prompt_cache_len:5151 prompt_cache_ratio:0.8580709645177411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:05:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 -DEBUG 06-24 20:05:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:05:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:05:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:05:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:05:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:05:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:05:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:05:59 [manager.py:224] router recive req id 8 cost time 0.10885977745056152 s -INFO 06-24 20:05:59 [manager.py:68] detokenization recv req id 8 cost time 0.11082673072814941 s -DEBUG 06-24 20:05:59 [manager.py:391] Prefill Batch: batch_id=110438577607883684908074342491993246647, time:1750766759.813092s req_ids:[8] -DEBUG 06-24 20:05:59 [manager.py:391] -ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:05:59 lightllm_req_id:8 first_token_cost:354.4738292694092ms total_cost_time:354.51793670654297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6004 prompt_cache_len:5151 prompt_cache_ratio:0.8579280479680214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 -DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10744810104370117 s -INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.10941052436828613 s -DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=123685105457773945532704575093532578410, time:1750766760.1686082s req_ids:[8] -DEBUG 06-24 20:06:00 [manager.py:391] -ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:195.87326049804688ms total_cost_time:195.91808319091797ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6005 prompt_cache_len:5151 prompt_cache_ratio:0.8577851790174854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 -DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10641646385192871 s -INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.10831856727600098 s -DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=151111491359179037882126005651959245572, time:1750766760.3833294s req_ids:[8] -DEBUG 06-24 20:06:00 [manager.py:391] -ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:207.7465057373047ms total_cost_time:207.80706405639648ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6006 prompt_cache_len:5151 prompt_cache_ratio:0.8576423576423576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 -DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10820984840393066 s -INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.11015892028808594 s -DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=87715402846227758269307481141806007782, time:1750766760.5877888s req_ids:[8] -DEBUG 06-24 20:06:00 [manager.py:391] -ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:210.9675407409668ms total_cost_time:211.01045608520508ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6007 prompt_cache_len:5151 prompt_cache_ratio:0.857499583818878 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 -DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10796141624450684 s -INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.1098470687866211 s -DEBUG 06-24 20:06:00 [manager.py:391] Prefill Batch: batch_id=227833650595297036567008934024828968605, time:1750766760.8022716s req_ids:[8] -DEBUG 06-24 20:06:00 [manager.py:391] -ERROR 06-24 20:06:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:199.4950771331787ms total_cost_time:199.5375156402588ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6008 prompt_cache_len:5151 prompt_cache_ratio:0.8573568575233023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 -DEBUG 06-24 20:06:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:00 [manager.py:224] router recive req id 8 cost time 0.10747599601745605 s -INFO 06-24 20:06:00 [manager.py:68] detokenization recv req id 8 cost time 0.10933423042297363 s -DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=336515560310489788587484226026842813644, time:1750766761.0088184s req_ids:[8] -DEBUG 06-24 20:06:01 [manager.py:391] -ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:00 lightllm_req_id:8 first_token_cost:208.65273475646973ms total_cost_time:208.6946964263916ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6009 prompt_cache_len:5151 prompt_cache_ratio:0.8572141787319022 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 -DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s -INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.10909914970397949 s -DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=319563846693610181713917807249832365667, time:1750766761.219058s req_ids:[8] -DEBUG 06-24 20:06:01 [manager.py:391] -ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:197.25298881530762ms total_cost_time:197.2970962524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6010 prompt_cache_len:5151 prompt_cache_ratio:0.857071547420965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 -DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10690975189208984 s -INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.10890483856201172 s -DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=100321666177804041049961602043741686447, time:1750766761.4301834s req_ids:[8] -DEBUG 06-24 20:06:01 [manager.py:391] -ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:209.97977256774902ms total_cost_time:210.0234031677246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6011 prompt_cache_len:5151 prompt_cache_ratio:0.8569289635667942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 -DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10692715644836426 s -INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.10898065567016602 s -DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=332169097897630086266387728060200201779, time:1750766761.640435s req_ids:[8] -DEBUG 06-24 20:06:01 [manager.py:391] -DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:205.73091506958008ms total_cost_time:205.77430725097656ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6012 prompt_cache_len:5151 prompt_cache_ratio:0.8567864271457086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 -DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:01 [manager.py:224] router recive req id 8 cost time 0.10831022262573242 s -INFO 06-24 20:06:01 [manager.py:68] detokenization recv req id 8 cost time 0.11019253730773926 s -DEBUG 06-24 20:06:01 [manager.py:391] Prefill Batch: batch_id=41775081533758781953175881899275237685, time:1750766761.851978s req_ids:[8] -DEBUG 06-24 20:06:01 [manager.py:391] -ERROR 06-24 20:06:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:209.75136756896973ms total_cost_time:209.8104953765869ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:6013 prompt_cache_len:5151 prompt_cache_ratio:0.8566439381340429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 -DEBUG 06-24 20:06:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10854363441467285 s -INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.11044883728027344 s -DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=75410760041881952069521960672429947763, time:1750766762.0625553s req_ids:[8] -DEBUG 06-24 20:06:02 [manager.py:391] -ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:01 lightllm_req_id:8 first_token_cost:166.3646697998047ms total_cost_time:166.40353202819824ms,out_token_counter:1 mean_per_token_cost_time: 0.03886222839355469ms prompt_token_num:6014 prompt_cache_len:5151 prompt_cache_ratio:0.8565014965081477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 -DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10667276382446289 s -INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.10857677459716797 s -DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=30432970171464819289030910924875046901, time:1750766762.2331657s req_ids:[8] -DEBUG 06-24 20:06:02 [manager.py:391] -ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:162.2323989868164ms total_cost_time:162.27412223815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6015 prompt_cache_len:5151 prompt_cache_ratio:0.856359102244389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 -DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10741424560546875 s -INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.1093289852142334 s -DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=191506844905751641003751721247594467984, time:1750766762.3952725s req_ids:[8] -DEBUG 06-24 20:06:02 [manager.py:391] -ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:353.7178039550781ms total_cost_time:353.762149810791ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6016 prompt_cache_len:5151 prompt_cache_ratio:0.856216755319149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 -DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10684728622436523 s -INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.10874176025390625 s -DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=270946205760246357945941888305565268850, time:1750766762.7534137s req_ids:[8] -DEBUG 06-24 20:06:02 [manager.py:391] -ERROR 06-24 20:06:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:195.12009620666504ms total_cost_time:195.16396522521973ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6017 prompt_cache_len:5151 prompt_cache_ratio:0.856074455708825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 -DEBUG 06-24 20:06:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:02 [manager.py:224] router recive req id 8 cost time 0.10697412490844727 s -INFO 06-24 20:06:02 [manager.py:68] detokenization recv req id 8 cost time 0.10887312889099121 s -DEBUG 06-24 20:06:02 [manager.py:391] Prefill Batch: batch_id=21704738408738167715889986739234348234, time:1750766762.9611528s req_ids:[8] -DEBUG 06-24 20:06:02 [manager.py:391] -ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:02 lightllm_req_id:8 first_token_cost:205.857515335083ms total_cost_time:205.9018611907959ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6018 prompt_cache_len:5151 prompt_cache_ratio:0.8559322033898306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 -DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.10663151741027832 s -INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.10866212844848633 s -DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=311611103771604976056265245455257470882, time:1750766763.1723723s req_ids:[8] -DEBUG 06-24 20:06:03 [manager.py:391] -ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:214.49923515319824ms total_cost_time:214.54238891601562ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6019 prompt_cache_len:5151 prompt_cache_ratio:0.8557899983385945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 -DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.10869503021240234 s -INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.11067461967468262 s -DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=94220829450184913536506285841266441674, time:1750766763.3867571s req_ids:[8] -DEBUG 06-24 20:06:03 [manager.py:391] -ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:200.69003105163574ms total_cost_time:200.73294639587402ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6020 prompt_cache_len:5151 prompt_cache_ratio:0.8556478405315615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 -DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.10764884948730469 s -INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.10959863662719727 s -DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=56415503249021951910052789260636417351, time:1750766763.5967462s req_ids:[8] -DEBUG 06-24 20:06:03 [manager.py:391] -ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:209.8684310913086ms total_cost_time:209.91110801696777ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6021 prompt_cache_len:5151 prompt_cache_ratio:0.8555057299451918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 -DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:03 [batch.py:51] router release req id 8 -INFO 06-24 20:06:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:03 [manager.py:224] router recive req id 8 cost time 0.1061089038848877 s -INFO 06-24 20:06:03 [manager.py:68] detokenization recv req id 8 cost time 0.1080174446105957 s -DEBUG 06-24 20:06:03 [manager.py:391] Prefill Batch: batch_id=134213832146758924644754119611002437326, time:1750766763.8197763s req_ids:[8] -DEBUG 06-24 20:06:03 [manager.py:391] -ERROR 06-24 20:06:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:211.5938663482666ms total_cost_time:211.63558959960938ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6022 prompt_cache_len:5151 prompt_cache_ratio:0.8553636665559615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 -DEBUG 06-24 20:06:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10748958587646484 s -INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10955405235290527 s -DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=102901567638538750537742707993187222280, time:1750766764.027554s req_ids:[8] -DEBUG 06-24 20:06:04 [manager.py:391] -ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:03 lightllm_req_id:8 first_token_cost:208.54949951171875ms total_cost_time:208.59384536743164ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6023 prompt_cache_len:5151 prompt_cache_ratio:0.8552216503403619 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 -DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10725641250610352 s -INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10916709899902344 s -DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=91782245481498522849448084337209493358, time:1750766764.2506578s req_ids:[8] -DEBUG 06-24 20:06:04 [manager.py:391] -ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:215.19994735717773ms total_cost_time:215.26074409484863ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6024 prompt_cache_len:5151 prompt_cache_ratio:0.8550796812749004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 -DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.1073763370513916 s -INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10931038856506348 s -DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=266852163226625083481240219310839221658, time:1750766764.459633s req_ids:[8] -DEBUG 06-24 20:06:04 [manager.py:391] -ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:203.10592651367188ms total_cost_time:203.14908027648926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6025 prompt_cache_len:5151 prompt_cache_ratio:0.8549377593360996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 -DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10775899887084961 s -INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.10973167419433594 s -DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=340080733285546621874399297211970847306, time:1750766764.6679301s req_ids:[8] -DEBUG 06-24 20:06:04 [manager.py:391] -ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:204.55098152160645ms total_cost_time:204.59389686584473ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6026 prompt_cache_len:5151 prompt_cache_ratio:0.8547958845004978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 -DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:04 [manager.py:224] router recive req id 8 cost time 0.10724592208862305 s -INFO 06-24 20:06:04 [manager.py:68] detokenization recv req id 8 cost time 0.1093282699584961 s -DEBUG 06-24 20:06:04 [manager.py:391] Prefill Batch: batch_id=245105947362803867143756495800275066361, time:1750766764.881278s req_ids:[8] -DEBUG 06-24 20:06:04 [manager.py:391] -ERROR 06-24 20:06:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:204.9539089202881ms total_cost_time:205.0156593322754ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:6027 prompt_cache_len:5151 prompt_cache_ratio:0.8546540567446491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 -DEBUG 06-24 20:06:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.10771870613098145 s -INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.1096956729888916 s -DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=306419502479036330333451320681627632543, time:1750766765.0877676s req_ids:[8] -DEBUG 06-24 20:06:05 [manager.py:391] -ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:04 lightllm_req_id:8 first_token_cost:216.9969081878662ms total_cost_time:217.0393466949463ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6028 prompt_cache_len:5151 prompt_cache_ratio:0.8545122760451228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 -DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.20849990844726562 s -INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.2102954387664795 s -DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=99326755756285572003130029737357720463, time:1750766765.43657s req_ids:[8] -DEBUG 06-24 20:06:05 [manager.py:391] -DEBUG 06-24 20:06:05 [stats.py:37] Avg tokens(prompt+generate) throughput: 27218.602 tokens/s -DEBUG 06-24 20:06:05 [stats.py:37] Avg prompt tokens throughput: 27209.542 tokens/s -DEBUG 06-24 20:06:05 [stats.py:37] Avg generate tokens throughput: 9.060 tokens/s -ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:306.0033321380615ms total_cost_time:306.0462474822998ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6029 prompt_cache_len:5151 prompt_cache_ratio:0.8543705423785038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 -DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.1068732738494873 s -INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.10877823829650879 s -DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=75675991891991260095550043185155196406, time:1750766765.623562s req_ids:[8] -DEBUG 06-24 20:06:05 [manager.py:391] -ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:217.3304557800293ms total_cost_time:217.37337112426758ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6030 prompt_cache_len:5151 prompt_cache_ratio:0.8542288557213931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 -DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:05 [manager.py:224] router recive req id 8 cost time 0.10902070999145508 s -INFO 06-24 20:06:05 [manager.py:68] detokenization recv req id 8 cost time 0.11091446876525879 s -DEBUG 06-24 20:06:05 [manager.py:391] Prefill Batch: batch_id=154242974668682939103279576651745810799, time:1750766765.8366601s req_ids:[8] -DEBUG 06-24 20:06:05 [manager.py:391] -DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:05 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:197.47400283813477ms total_cost_time:197.5235939025879ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:6031 prompt_cache_len:5151 prompt_cache_ratio:0.8540872160504063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 -DEBUG 06-24 20:06:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10708832740783691 s -INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10910630226135254 s -DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=6655774158398050954417595977197143387, time:1750766766.0454202s req_ids:[8] -DEBUG 06-24 20:06:06 [manager.py:391] -ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:05 lightllm_req_id:8 first_token_cost:205.43169975280762ms total_cost_time:205.4755687713623ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6032 prompt_cache_len:5151 prompt_cache_ratio:0.853945623342175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 -DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10643863677978516 s -INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10824823379516602 s -DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=151743558694802791331520334670078416812, time:1750766766.2707515s req_ids:[8] -DEBUG 06-24 20:06:06 [manager.py:391] -ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:220.88360786437988ms total_cost_time:220.9632396697998ms,out_token_counter:1 mean_per_token_cost_time: 0.07963180541992188ms prompt_token_num:6033 prompt_cache_len:5151 prompt_cache_ratio:0.8538040775733466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 -DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10901618003845215 s -INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.11097264289855957 s -DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=279400124865038890338536200820076921188, time:1750766766.4800735s req_ids:[8] -DEBUG 06-24 20:06:06 [manager.py:391] -ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:202.6515007019043ms total_cost_time:202.7144432067871ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:6034 prompt_cache_len:5151 prompt_cache_ratio:0.8536625787205834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 -DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10762691497802734 s -INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10955572128295898 s -DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=199150676529489813273115984516576434350, time:1750766766.6890802s req_ids:[8] -DEBUG 06-24 20:06:06 [manager.py:391] -ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:218.58716011047363ms total_cost_time:218.63222122192383ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6035 prompt_cache_len:5151 prompt_cache_ratio:0.8535211267605634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 -DEBUG 06-24 20:06:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:06 [manager.py:224] router recive req id 8 cost time 0.10685229301452637 s -INFO 06-24 20:06:06 [manager.py:68] detokenization recv req id 8 cost time 0.10884475708007812 s -DEBUG 06-24 20:06:06 [manager.py:391] Prefill Batch: batch_id=243928687079722007172797608296389297938, time:1750766766.906342s req_ids:[8] -DEBUG 06-24 20:06:06 [manager.py:391] -ERROR 06-24 20:06:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:194.69833374023438ms total_cost_time:194.75555419921875ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6036 prompt_cache_len:5151 prompt_cache_ratio:0.8533797216699801 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 -DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10849905014038086 s -INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.11041975021362305 s -DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=159630121017897343577225404866710007114, time:1750766767.1100843s req_ids:[8] -DEBUG 06-24 20:06:07 [manager.py:391] -ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:06 lightllm_req_id:8 first_token_cost:200.87647438049316ms total_cost_time:200.91843605041504ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6037 prompt_cache_len:5151 prompt_cache_ratio:0.8532383634255425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 -DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:07 [batch.py:51] router release req id 8 -INFO 06-24 20:06:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10791015625 s -INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10988855361938477 s -DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=89359874644234034230952322667151343683, time:1750766767.317074s req_ids:[8] -DEBUG 06-24 20:06:07 [manager.py:391] -ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:211.639404296875ms total_cost_time:211.68088912963867ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6038 prompt_cache_len:5151 prompt_cache_ratio:0.8530970520039748 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 -DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10653114318847656 s -INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10852456092834473 s -DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=275890797649249121971842471571247033934, time:1750766767.5315483s req_ids:[8] -DEBUG 06-24 20:06:07 [manager.py:391] -ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:201.78961753845215ms total_cost_time:201.83229446411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6039 prompt_cache_len:5151 prompt_cache_ratio:0.8529557873820169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 -DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10765457153320312 s -INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10967731475830078 s -DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=208362326449834795111760892590877417449, time:1750766767.7452273s req_ids:[8] -DEBUG 06-24 20:06:07 [manager.py:391] -ERROR 06-24 20:06:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:210.93225479125977ms total_cost_time:210.97373962402344ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6040 prompt_cache_len:5151 prompt_cache_ratio:0.8528145695364239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 -DEBUG 06-24 20:06:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:07 [manager.py:224] router recive req id 8 cost time 0.10795807838439941 s -INFO 06-24 20:06:07 [manager.py:68] detokenization recv req id 8 cost time 0.10999369621276855 s -DEBUG 06-24 20:06:07 [manager.py:391] Prefill Batch: batch_id=58035316657469180835239157941735372030, time:1750766767.955685s req_ids:[8] -DEBUG 06-24 20:06:07 [manager.py:391] -ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:07 lightllm_req_id:8 first_token_cost:362.180233001709ms total_cost_time:362.2410297393799ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6041 prompt_cache_len:5151 prompt_cache_ratio:0.8526733984439663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 -DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10658884048461914 s -INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.1086421012878418 s -DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=157975994899605062849202051970836880543, time:1750766768.316626s req_ids:[8] -DEBUG 06-24 20:06:08 [manager.py:391] -ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:198.49276542663574ms total_cost_time:198.53591918945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6042 prompt_cache_len:5151 prompt_cache_ratio:0.85253227408143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 -DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s -INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.11021566390991211 s -DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=179660772761188551798476275861152715091, time:1750766768.5318959s req_ids:[8] -DEBUG 06-24 20:06:08 [manager.py:391] -ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:212.25500106811523ms total_cost_time:212.30077743530273ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6043 prompt_cache_len:5151 prompt_cache_ratio:0.8523911964256164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 -DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:08 [batch.py:51] router release req id 8 -DEBUG 06-24 20:06:08 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:08 [manager.py:283] -DEBUG 06-24 20:06:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:08 [manager.py:284] -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10654163360595703 s -INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.10849785804748535 s -DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=12692235407278998001087392039979179807, time:1750766768.7447062s req_ids:[8] -DEBUG 06-24 20:06:08 [manager.py:391] -ERROR 06-24 20:06:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:206.26139640808105ms total_cost_time:206.30598068237305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6044 prompt_cache_len:5151 prompt_cache_ratio:0.8522501654533422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 -DEBUG 06-24 20:06:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:08 [manager.py:224] router recive req id 8 cost time 0.10837578773498535 s -INFO 06-24 20:06:08 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s -DEBUG 06-24 20:06:08 [manager.py:391] Prefill Batch: batch_id=162360349079261907905771395898871572899, time:1750766768.9540884s req_ids:[8] -DEBUG 06-24 20:06:08 [manager.py:391] -ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:08 lightllm_req_id:8 first_token_cost:205.55973052978516ms total_cost_time:205.60216903686523ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6045 prompt_cache_len:5151 prompt_cache_ratio:0.8521091811414392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 -DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.11013984680175781 s -INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.11215329170227051 s -DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=131047694827510380239155065517779337875, time:1750766769.1639116s req_ids:[8] -DEBUG 06-24 20:06:09 [manager.py:391] -ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:203.13048362731934ms total_cost_time:203.1722068786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6046 prompt_cache_len:5151 prompt_cache_ratio:0.8519682434667549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 -DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.1082005500793457 s -INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s -DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=218160160220883725165295105687964819184, time:1750766769.3737366s req_ids:[8] -DEBUG 06-24 20:06:09 [manager.py:391] -ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:208.40001106262207ms total_cost_time:208.44173431396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6047 prompt_cache_len:5151 prompt_cache_ratio:0.8518273524061518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 -DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.10958194732666016 s -INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.1115567684173584 s -DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=53057828346809511827489385185871316848, time:1750766769.5858638s req_ids:[8] -DEBUG 06-24 20:06:09 [manager.py:391] -ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:205.28101921081543ms total_cost_time:205.3239345550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6048 prompt_cache_len:5151 prompt_cache_ratio:0.8516865079365079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 -DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.10804224014282227 s -INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.11006307601928711 s -DEBUG 06-24 20:06:09 [manager.py:391] Prefill Batch: batch_id=138425581106410269064914626258390239917, time:1750766769.7964027s req_ids:[8] -DEBUG 06-24 20:06:09 [manager.py:391] -ERROR 06-24 20:06:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:208.55164527893066ms total_cost_time:208.59479904174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6049 prompt_cache_len:5151 prompt_cache_ratio:0.8515457100347165 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 -DEBUG 06-24 20:06:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:09 [manager.py:224] router recive req id 8 cost time 0.10650086402893066 s -INFO 06-24 20:06:09 [manager.py:68] detokenization recv req id 8 cost time 0.1083528995513916 s -DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=224298259361201211348792717512227495518, time:1750766770.0200956s req_ids:[8] -DEBUG 06-24 20:06:10 [manager.py:391] -ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:09 lightllm_req_id:8 first_token_cost:216.31431579589844ms total_cost_time:216.37248992919922ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:6050 prompt_cache_len:5151 prompt_cache_ratio:0.851404958677686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 -DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:10 [manager.py:224] router recive req id 8 cost time 0.1081082820892334 s -INFO 06-24 20:06:10 [manager.py:68] detokenization recv req id 8 cost time 0.1101071834564209 s -DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=238904984276494675904643293176135096303, time:1750766770.2286634s req_ids:[8] -DEBUG 06-24 20:06:10 [manager.py:391] -ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:203.7792205810547ms total_cost_time:203.82142066955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6051 prompt_cache_len:5151 prompt_cache_ratio:0.8512642538423401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 -DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:10 [manager.py:224] router recive req id 8 cost time 0.10772180557250977 s -INFO 06-24 20:06:10 [manager.py:68] detokenization recv req id 8 cost time 0.10970711708068848 s -DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=221655274772514624022504259940694232320, time:1750766770.4377048s req_ids:[8] -DEBUG 06-24 20:06:10 [manager.py:391] -ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:217.21744537353516ms total_cost_time:217.25916862487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6052 prompt_cache_len:5151 prompt_cache_ratio:0.851123595505618 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 -DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:10 [manager.py:224] router recive req id 8 cost time 0.10675215721130371 s -INFO 06-24 20:06:10 [manager.py:68] detokenization recv req id 8 cost time 0.10876965522766113 s -DEBUG 06-24 20:06:10 [manager.py:391] Prefill Batch: batch_id=316861390726326815761557355728947650701, time:1750766770.6543322s req_ids:[8] -DEBUG 06-24 20:06:10 [manager.py:391] -ERROR 06-24 20:06:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:198.29273223876953ms total_cost_time:198.33827018737793ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6053 prompt_cache_len:5151 prompt_cache_ratio:0.8509829836444738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 -DEBUG 06-24 20:06:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.30963826179504395 s -INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.3117208480834961 s -DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=151781003075594576265789595161954593258, time:1750766771.0629683s req_ids:[8] -DEBUG 06-24 20:06:11 [manager.py:391] -ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:10 lightllm_req_id:8 first_token_cost:406.2960147857666ms total_cost_time:406.3389301300049ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6054 prompt_cache_len:5151 prompt_cache_ratio:0.8508424182358771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 -DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10912775993347168 s -INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.11102628707885742 s -DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=321827521104182704746025711132435272079, time:1750766771.2758784s req_ids:[8] -DEBUG 06-24 20:06:11 [manager.py:391] -ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:206.94208145141602ms total_cost_time:206.9854736328125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6055 prompt_cache_len:5151 prompt_cache_ratio:0.8507018992568125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 -DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10611176490783691 s -INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.10805320739746094 s -DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=68861608703555874757328734142258805857, time:1750766771.4874628s req_ids:[8] -DEBUG 06-24 20:06:11 [manager.py:391] -ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:206.6643238067627ms total_cost_time:206.70723915100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6056 prompt_cache_len:5151 prompt_cache_ratio:0.85056142668428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 -DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10673284530639648 s -INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.10860633850097656 s -DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=169491117484089444130241157449145170975, time:1750766771.6982973s req_ids:[8] -DEBUG 06-24 20:06:11 [manager.py:391] -ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:222.20373153686523ms total_cost_time:222.24712371826172ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6057 prompt_cache_len:5151 prompt_cache_ratio:0.8504210004952947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 -DEBUG 06-24 20:06:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:11 [manager.py:224] router recive req id 8 cost time 0.10990595817565918 s -INFO 06-24 20:06:11 [manager.py:68] detokenization recv req id 8 cost time 0.1118631362915039 s -DEBUG 06-24 20:06:11 [manager.py:391] Prefill Batch: batch_id=117162993049022160015391316251353143723, time:1750766771.9168928s req_ids:[8] -DEBUG 06-24 20:06:11 [manager.py:391] -ERROR 06-24 20:06:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:193.58587265014648ms total_cost_time:193.62926483154297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6058 prompt_cache_len:5151 prompt_cache_ratio:0.8502806206668868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 -DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10622549057006836 s -INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10801482200622559 s -DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=301093093537012768122185984318227343637, time:1750766772.1185257s req_ids:[8] -DEBUG 06-24 20:06:12 [manager.py:391] -ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:11 lightllm_req_id:8 first_token_cost:191.68853759765625ms total_cost_time:191.73240661621094ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6059 prompt_cache_len:5151 prompt_cache_ratio:0.8501402871761017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 -DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10796427726745605 s -INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10984611511230469 s -DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=325631143478273732789486490704266853008, time:1750766772.3168254s req_ids:[8] -DEBUG 06-24 20:06:12 [manager.py:391] -ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:200.03080368041992ms total_cost_time:200.0730037689209ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6060 prompt_cache_len:5151 prompt_cache_ratio:0.85 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 -DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10736083984375 s -INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10931134223937988 s -DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=124308350601463742845531964351284427057, time:1750766772.524571s req_ids:[8] -DEBUG 06-24 20:06:12 [manager.py:391] -ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:203.98378372192383ms total_cost_time:204.02765274047852ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6061 prompt_cache_len:5151 prompt_cache_ratio:0.8498597591156575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 -DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.10757184028625488 s -INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10946822166442871 s -DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=23353637144100624278590679040910324258, time:1750766772.7329254s req_ids:[8] -DEBUG 06-24 20:06:12 [manager.py:391] -DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:204.91456985473633ms total_cost_time:204.9577236175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6062 prompt_cache_len:5151 prompt_cache_ratio:0.849719564500165 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 -DEBUG 06-24 20:06:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:12 [manager.py:224] router recive req id 8 cost time 0.1079249382019043 s -INFO 06-24 20:06:12 [manager.py:68] detokenization recv req id 8 cost time 0.10988140106201172 s -DEBUG 06-24 20:06:12 [manager.py:391] Prefill Batch: batch_id=127819628385223202540910316486211405321, time:1750766772.943142s req_ids:[8] -DEBUG 06-24 20:06:12 [manager.py:391] -ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:12 lightllm_req_id:8 first_token_cost:206.18557929992676ms total_cost_time:206.2389850616455ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:6063 prompt_cache_len:5151 prompt_cache_ratio:0.8495794161306284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 -DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10665512084960938 s -INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10851263999938965 s -DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=171567262922505299513823119486732596219, time:1750766773.1544828s req_ids:[8] -DEBUG 06-24 20:06:13 [manager.py:391] -ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:205.98769187927246ms total_cost_time:206.03084564208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6064 prompt_cache_len:5151 prompt_cache_ratio:0.8494393139841688 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 -DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10744571685791016 s -INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10932493209838867 s -DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=260528880721881099362340415622763548570, time:1750766773.3639982s req_ids:[8] -DEBUG 06-24 20:06:13 [manager.py:391] -ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:205.40261268615723ms total_cost_time:205.4445743560791ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6065 prompt_cache_len:5151 prompt_cache_ratio:0.8492992580379225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 -DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10728693008422852 s -INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10917234420776367 s -DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=300043569360611283178988110519808921296, time:1750766773.5755944s req_ids:[8] -DEBUG 06-24 20:06:13 [manager.py:391] -ERROR 06-24 20:06:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:366.35351181030273ms total_cost_time:366.39881134033203ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6066 prompt_cache_len:5151 prompt_cache_ratio:0.8491592482690405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 -DEBUG 06-24 20:06:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:13 [manager.py:224] router recive req id 8 cost time 0.10702824592590332 s -INFO 06-24 20:06:13 [manager.py:68] detokenization recv req id 8 cost time 0.10894966125488281 s -DEBUG 06-24 20:06:13 [manager.py:391] Prefill Batch: batch_id=323105297088449021781211148333292932588, time:1750766773.9380095s req_ids:[8] -DEBUG 06-24 20:06:13 [manager.py:391] -ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:13 lightllm_req_id:8 first_token_cost:199.0494728088379ms total_cost_time:199.10216331481934ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:6067 prompt_cache_len:5151 prompt_cache_ratio:0.8490192846546893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 -DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10783886909484863 s -INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1096656322479248 s -DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=31083084694115692945721492883722296898, time:1750766774.1551318s req_ids:[8] -DEBUG 06-24 20:06:14 [manager.py:391] -ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:209.68914031982422ms total_cost_time:209.7313404083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6068 prompt_cache_len:5151 prompt_cache_ratio:0.8488793671720501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 -DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10698962211608887 s -INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.10889911651611328 s -DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=291955693929551057049281708590529636693, time:1750766774.3644059s req_ids:[8] -DEBUG 06-24 20:06:14 [manager.py:391] -ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:206.25996589660645ms total_cost_time:206.30168914794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6069 prompt_cache_len:5151 prompt_cache_ratio:0.8487394957983193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 -DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:14 [batch.py:51] router release req id 8 -INFO 06-24 20:06:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10714554786682129 s -INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1091616153717041 s -DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=210093120209305620961475658908497460456, time:1750766774.5746593s req_ids:[8] -DEBUG 06-24 20:06:14 [manager.py:391] -ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:204.98418807983398ms total_cost_time:205.02686500549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6070 prompt_cache_len:5151 prompt_cache_ratio:0.8485996705107084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 -DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10711860656738281 s -INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1091606616973877 s -DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=68058616912046301648369426858999133725, time:1750766774.7847652s req_ids:[8] -DEBUG 06-24 20:06:14 [manager.py:391] -ERROR 06-24 20:06:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:205.4755687713623ms total_cost_time:205.5196762084961ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6071 prompt_cache_len:5151 prompt_cache_ratio:0.8484598912864437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 -DEBUG 06-24 20:06:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:14 [manager.py:224] router recive req id 8 cost time 0.10659432411193848 s -INFO 06-24 20:06:14 [manager.py:68] detokenization recv req id 8 cost time 0.1085045337677002 s -DEBUG 06-24 20:06:14 [manager.py:391] Prefill Batch: batch_id=266869136503257706735618173599352139853, time:1750766774.9953895s req_ids:[8] -DEBUG 06-24 20:06:14 [manager.py:391] -ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:14 lightllm_req_id:8 first_token_cost:205.93500137329102ms total_cost_time:205.9950828552246ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6072 prompt_cache_len:5151 prompt_cache_ratio:0.8483201581027668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 -DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10963273048400879 s -INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.11151504516601562 s -DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=19534602718431580561814923327965498632, time:1750766775.2048912s req_ids:[8] -DEBUG 06-24 20:06:15 [manager.py:391] -ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:204.5152187347412ms total_cost_time:204.5602798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6073 prompt_cache_len:5151 prompt_cache_ratio:0.8481804709369339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 -DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10764241218566895 s -INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.10958504676818848 s -DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=228492006378238062042066841337975883367, time:1750766775.416704s req_ids:[8] -DEBUG 06-24 20:06:15 [manager.py:391] -ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:06:15 [stats.py:37] Avg tokens(prompt+generate) throughput: 27075.462 tokens/s -DEBUG 06-24 20:06:15 [stats.py:37] Avg prompt tokens throughput: 27066.418 tokens/s -DEBUG 06-24 20:06:15 [stats.py:37] Avg generate tokens throughput: 9.044 tokens/s -INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:208.6033821105957ms total_cost_time:208.6479663848877ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6074 prompt_cache_len:5151 prompt_cache_ratio:0.8480408297662166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 -DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10997176170349121 s -INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.11201000213623047 s -DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=51643496317541814573320543684449009021, time:1750766775.6281173s req_ids:[8] -DEBUG 06-24 20:06:15 [manager.py:391] -ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:203.54938507080078ms total_cost_time:203.59253883361816ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6075 prompt_cache_len:5151 prompt_cache_ratio:0.8479012345679012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 -DEBUG 06-24 20:06:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:15 [manager.py:224] router recive req id 8 cost time 0.10733246803283691 s -INFO 06-24 20:06:15 [manager.py:68] detokenization recv req id 8 cost time 0.1094510555267334 s -DEBUG 06-24 20:06:15 [manager.py:391] Prefill Batch: batch_id=172076749074984437575466145834307755679, time:1750766775.8367238s req_ids:[8] -DEBUG 06-24 20:06:15 [manager.py:391] -ERROR 06-24 20:06:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:205.7514190673828ms total_cost_time:205.7960033416748ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6076 prompt_cache_len:5151 prompt_cache_ratio:0.847761685319289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 -DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.30954909324645996 s -INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.3115055561065674 s -DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=193559354369953011294342665744597919469, time:1750766776.2482228s req_ids:[8] -DEBUG 06-24 20:06:16 [manager.py:391] -ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:15 lightllm_req_id:8 first_token_cost:411.9577407836914ms total_cost_time:412.0030403137207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6077 prompt_cache_len:5151 prompt_cache_ratio:0.8476221819976962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 -INFO 06-24 20:06:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.10954904556274414 s -INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.1114511489868164 s -DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=230695407670521310928532476937615309847, time:1750766776.4668732s req_ids:[8] -DEBUG 06-24 20:06:16 [manager.py:391] -ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:206.78019523620605ms total_cost_time:206.81428909301758ms,out_token_counter:1 mean_per_token_cost_time: 0.03409385681152344ms prompt_token_num:6078 prompt_cache_len:5151 prompt_cache_ratio:0.8474827245804541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 -DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.1049497127532959 s -INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.10688304901123047 s -DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=271268013338566478107730195079943376472, time:1750766776.6821978s req_ids:[8] -DEBUG 06-24 20:06:16 [manager.py:391] -ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:214.33568000793457ms total_cost_time:214.3561840057373ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6079 prompt_cache_len:5151 prompt_cache_ratio:0.8473433130449087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 -DEBUG 06-24 20:06:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:16 [manager.py:224] router recive req id 8 cost time 0.10604643821716309 s -INFO 06-24 20:06:16 [manager.py:68] detokenization recv req id 8 cost time 0.10781574249267578 s -DEBUG 06-24 20:06:16 [manager.py:391] Prefill Batch: batch_id=332304996389003127346489551757001399497, time:1750766776.894991s req_ids:[8] -DEBUG 06-24 20:06:16 [manager.py:391] -ERROR 06-24 20:06:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:205.8694362640381ms total_cost_time:205.916166305542ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:6080 prompt_cache_len:5151 prompt_cache_ratio:0.8472039473684211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 -DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.1078634262084961 s -INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10965752601623535 s -DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=238444553988125691603373839760759709187, time:1750766777.1175091s req_ids:[8] -DEBUG 06-24 20:06:17 [manager.py:391] -ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:16 lightllm_req_id:8 first_token_cost:225.15010833740234ms total_cost_time:225.19397735595703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6081 prompt_cache_len:5151 prompt_cache_ratio:0.8470646275283671 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 -DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10692858695983887 s -INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10859870910644531 s -DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=136989867071156774195720343540565107209, time:1750766777.344901s req_ids:[8] -DEBUG 06-24 20:06:17 [manager.py:391] -ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:206.60972595214844ms total_cost_time:206.65216445922852ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6082 prompt_cache_len:5151 prompt_cache_ratio:0.8469253535021375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 -DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10762357711791992 s -INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10931563377380371 s -DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=307858555659764366283867965664957438427, time:1750766777.5510867s req_ids:[8] -DEBUG 06-24 20:06:17 [manager.py:391] -ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:203.7034034729004ms total_cost_time:203.7487030029297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6083 prompt_cache_len:5151 prompt_cache_ratio:0.8467861252671379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 -DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10495376586914062 s -INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10667181015014648 s -DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=255122935323283699467144391108828202401, time:1750766777.7637253s req_ids:[8] -DEBUG 06-24 20:06:17 [manager.py:391] -ERROR 06-24 20:06:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:202.62598991394043ms total_cost_time:202.6498317718506ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6084 prompt_cache_len:5151 prompt_cache_ratio:0.846646942800789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 -DEBUG 06-24 20:06:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:17 [manager.py:224] router recive req id 8 cost time 0.10427260398864746 s -INFO 06-24 20:06:17 [manager.py:68] detokenization recv req id 8 cost time 0.10612249374389648 s -DEBUG 06-24 20:06:17 [manager.py:391] Prefill Batch: batch_id=125637894730720629841251063555081598942, time:1750766777.9607599s req_ids:[8] -DEBUG 06-24 20:06:17 [manager.py:391] -ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:17 lightllm_req_id:8 first_token_cost:212.07904815673828ms total_cost_time:212.10193634033203ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6085 prompt_cache_len:5151 prompt_cache_ratio:0.8465078060805259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 -DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.10420560836791992 s -INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.10605216026306152 s -DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=164090879465811865734923686937803075974, time:1750766778.1825109s req_ids:[8] -DEBUG 06-24 20:06:18 [manager.py:391] -ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:208.40144157409668ms total_cost_time:208.42504501342773ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6086 prompt_cache_len:5151 prompt_cache_ratio:0.8463687150837989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 -DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.10448503494262695 s -INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.10638952255249023 s -DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=311849000398133340649487743385729532947, time:1750766778.3891091s req_ids:[8] -DEBUG 06-24 20:06:18 [manager.py:391] -ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:204.4525146484375ms total_cost_time:204.47468757629395ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6087 prompt_cache_len:5151 prompt_cache_ratio:0.8462296697880729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 -DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.10472893714904785 s -INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.10666227340698242 s -DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=92187267207809233539537458920588809941, time:1750766778.597368s req_ids:[8] -DEBUG 06-24 20:06:18 [manager.py:391] -ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:203.83715629577637ms total_cost_time:203.8590908050537ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6088 prompt_cache_len:5151 prompt_cache_ratio:0.8460906701708278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 -DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:18 [manager.py:224] router recive req id 8 cost time 0.20444226264953613 s -INFO 06-24 20:06:18 [manager.py:68] detokenization recv req id 8 cost time 0.2062511444091797 s -DEBUG 06-24 20:06:18 [manager.py:391] Prefill Batch: batch_id=206062541825738528709371198982046326987, time:1750766778.894758s req_ids:[8] -DEBUG 06-24 20:06:18 [manager.py:391] -ERROR 06-24 20:06:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:246.4456558227539ms total_cost_time:246.46902084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6089 prompt_cache_len:5151 prompt_cache_ratio:0.8459517162095582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 -DEBUG 06-24 20:06:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10510683059692383 s -INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10703277587890625 s -DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=230953239405980865784183960803700014176, time:1750766779.040687s req_ids:[8] -DEBUG 06-24 20:06:19 [manager.py:391] -ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:18 lightllm_req_id:8 first_token_cost:177.89173126220703ms total_cost_time:177.91414260864258ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6090 prompt_cache_len:5151 prompt_cache_ratio:0.8458128078817734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 -DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10443329811096191 s -INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10633063316345215 s -DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=84761899105140769856861837934947310840, time:1750766779.2340703s req_ids:[8] -DEBUG 06-24 20:06:19 [manager.py:391] -ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:201.36070251464844ms total_cost_time:201.3843059539795ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6091 prompt_cache_len:5151 prompt_cache_ratio:0.8456739451649975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 -DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10441803932189941 s -INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.1063077449798584 s -DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=125005615929491505393152090411030572673, time:1750766779.441638s req_ids:[8] -DEBUG 06-24 20:06:19 [manager.py:391] -ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:200.89960098266602ms total_cost_time:200.92320442199707ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6092 prompt_cache_len:5151 prompt_cache_ratio:0.8455351280367696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 -DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10431599617004395 s -INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10630369186401367 s -DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=197508703828335778843817948177504437509, time:1750766779.6448328s req_ids:[8] -DEBUG 06-24 20:06:19 [manager.py:391] -ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:193.07923316955566ms total_cost_time:193.1009292602539ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6093 prompt_cache_len:5151 prompt_cache_ratio:0.8453963564746431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 -DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:19 [manager.py:224] router recive req id 8 cost time 0.10539793968200684 s -INFO 06-24 20:06:19 [manager.py:68] detokenization recv req id 8 cost time 0.10735702514648438 s -DEBUG 06-24 20:06:19 [manager.py:391] Prefill Batch: batch_id=124580408642116343837523537077441585861, time:1750766779.8427448s req_ids:[8] -DEBUG 06-24 20:06:19 [manager.py:391] -ERROR 06-24 20:06:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:205.38067817687988ms total_cost_time:205.40213584899902ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6094 prompt_cache_len:5151 prompt_cache_ratio:0.8452576304561864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 -DEBUG 06-24 20:06:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10436224937438965 s -INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10629153251647949 s -DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=10934076778701682580893587398551280918, time:1750766780.0554173s req_ids:[8] -DEBUG 06-24 20:06:20 [manager.py:391] -ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:19 lightllm_req_id:8 first_token_cost:210.27636528015137ms total_cost_time:210.2982997894287ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6095 prompt_cache_len:5151 prompt_cache_ratio:0.8451189499589827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 -DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10620927810668945 s -INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10808396339416504 s -DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=308841724527493195675627558880119212621, time:1750766780.2664921s req_ids:[8] -DEBUG 06-24 20:06:20 [manager.py:391] -ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:211.1341953277588ms total_cost_time:211.16042137145996ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:6096 prompt_cache_len:5151 prompt_cache_ratio:0.84498031496063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 -DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10492682456970215 s -INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.1071019172668457 s -DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=188825993691424026567293187609409176411, time:1750766780.480337s req_ids:[8] -DEBUG 06-24 20:06:20 [manager.py:391] -ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:209.34438705444336ms total_cost_time:209.3672752380371ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6097 prompt_cache_len:5151 prompt_cache_ratio:0.8448417254387404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 -DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10441708564758301 s -INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10637331008911133 s -DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=334849299458908717533360891303073156047, time:1750766780.705667s req_ids:[8] -DEBUG 06-24 20:06:20 [manager.py:391] -ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:219.49410438537598ms total_cost_time:219.51580047607422ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6098 prompt_cache_len:5151 prompt_cache_ratio:0.8447031813709412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 -DEBUG 06-24 20:06:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:20 [manager.py:224] router recive req id 8 cost time 0.10492897033691406 s -INFO 06-24 20:06:20 [manager.py:68] detokenization recv req id 8 cost time 0.10697770118713379 s -DEBUG 06-24 20:06:20 [manager.py:391] Prefill Batch: batch_id=247251171324266444486086501888682516170, time:1750766780.9174411s req_ids:[8] -DEBUG 06-24 20:06:20 [manager.py:391] -ERROR 06-24 20:06:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:20 lightllm_req_id:8 first_token_cost:209.33914184570312ms total_cost_time:209.36155319213867ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6099 prompt_cache_len:5151 prompt_cache_ratio:0.8445646827348746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 -DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10485672950744629 s -INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.10689377784729004 s -DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=233826107457444844923875639856547905262, time:1750766781.1366055s req_ids:[8] -DEBUG 06-24 20:06:21 [manager.py:391] -ERROR 06-24 20:06:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:227.76436805725098ms total_cost_time:227.78725624084473ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6100 prompt_cache_len:5151 prompt_cache_ratio:0.8444262295081967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 -DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10472440719604492 s -INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.1067342758178711 s -DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=121978652246319463831904810709488873129, time:1750766781.3581998s req_ids:[8] -DEBUG 06-24 20:06:21 [manager.py:391] -ERROR 06-24 20:06:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:200.26731491088867ms total_cost_time:200.29044151306152ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6101 prompt_cache_len:5151 prompt_cache_ratio:0.844287821668579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 -DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10430002212524414 s -INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.10643291473388672 s -INFO 06-24 20:06:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=148022403606126598876806728390213116473, time:1750766781.5776339s req_ids:[8] -DEBUG 06-24 20:06:21 [manager.py:391] -ERROR 06-24 20:06:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:383.4555149078369ms total_cost_time:383.47935676574707ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6102 prompt_cache_len:5151 prompt_cache_ratio:0.844149459193707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 -DEBUG 06-24 20:06:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:21 [manager.py:224] router recive req id 8 cost time 0.10547161102294922 s -INFO 06-24 20:06:21 [manager.py:68] detokenization recv req id 8 cost time 0.10766482353210449 s -DEBUG 06-24 20:06:21 [manager.py:391] Prefill Batch: batch_id=167325784571661921149648466926566223250, time:1750766781.948074s req_ids:[8] -DEBUG 06-24 20:06:21 [manager.py:391] -ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:21 lightllm_req_id:8 first_token_cost:203.66406440734863ms total_cost_time:203.6874294281006ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6103 prompt_cache_len:5151 prompt_cache_ratio:0.8440111420612814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 -DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10441040992736816 s -INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10657358169555664 s -DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=278309699379054471619229642513523834756, time:1750766782.1691618s req_ids:[8] -DEBUG 06-24 20:06:22 [manager.py:391] -ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:218.82963180541992ms total_cost_time:218.86062622070312ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:6104 prompt_cache_len:5151 prompt_cache_ratio:0.843872870249017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 -DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.1043233871459961 s -INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10654139518737793 s -DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=34458947381605882051351193243915910112, time:1750766782.3847382s req_ids:[8] -DEBUG 06-24 20:06:22 [manager.py:391] -ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:210.9212875366211ms total_cost_time:210.94441413879395ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6105 prompt_cache_len:5151 prompt_cache_ratio:0.8437346437346437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 -DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10333704948425293 s -INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10522675514221191 s -DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=7272999439659390420071915596219741207, time:1750766782.593363s req_ids:[8] -DEBUG 06-24 20:06:22 [manager.py:391] -ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:188.57383728027344ms total_cost_time:188.59338760375977ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6106 prompt_cache_len:5151 prompt_cache_ratio:0.8435964624959057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 -DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10317826271057129 s -INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10506844520568848 s -DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=141317760587143255624025853604179120910, time:1750766782.7940385s req_ids:[8] -DEBUG 06-24 20:06:22 [manager.py:391] -ERROR 06-24 20:06:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:208.00113677978516ms total_cost_time:208.02044868469238ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6107 prompt_cache_len:5151 prompt_cache_ratio:0.8434583265105616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 -DEBUG 06-24 20:06:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:22 [manager.py:224] router recive req id 8 cost time 0.10430788993835449 s -INFO 06-24 20:06:22 [manager.py:68] detokenization recv req id 8 cost time 0.10638236999511719 s -DEBUG 06-24 20:06:22 [manager.py:391] Prefill Batch: batch_id=272977373493258621916293948208431436365, time:1750766782.9938126s req_ids:[8] -DEBUG 06-24 20:06:22 [manager.py:391] -ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:22 lightllm_req_id:8 first_token_cost:203.53412628173828ms total_cost_time:203.5536766052246ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6108 prompt_cache_len:5151 prompt_cache_ratio:0.843320235756385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 -DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10313224792480469 s -INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10507392883300781 s -INFO 06-24 20:06:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=25300919702447270741370121062232038812, time:1750766783.2045593s req_ids:[8] -DEBUG 06-24 20:06:23 [manager.py:391] -ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:209.61236953735352ms total_cost_time:209.63215827941895ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6109 prompt_cache_len:5151 prompt_cache_ratio:0.8431821902111638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 -DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10340404510498047 s -INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10528254508972168 s -DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=305353340372624027929144639176119755243, time:1750766783.4176474s req_ids:[8] -DEBUG 06-24 20:06:23 [manager.py:391] -ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:204.96726036071777ms total_cost_time:204.9880027770996ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6110 prompt_cache_len:5151 prompt_cache_ratio:0.8430441898527005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 -DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10335230827331543 s -INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10536670684814453 s -DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=290960491537300519251285696251829625472, time:1750766783.6261122s req_ids:[8] -DEBUG 06-24 20:06:23 [manager.py:391] -ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:208.93383026123047ms total_cost_time:208.9533805847168ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6111 prompt_cache_len:5151 prompt_cache_ratio:0.842906234658812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 -DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:23 [manager.py:224] router recive req id 8 cost time 0.10455441474914551 s -INFO 06-24 20:06:23 [manager.py:68] detokenization recv req id 8 cost time 0.10655093193054199 s -DEBUG 06-24 20:06:23 [manager.py:391] Prefill Batch: batch_id=218080960870294761331534899352510751129, time:1750766783.8440077s req_ids:[8] -DEBUG 06-24 20:06:23 [manager.py:391] -ERROR 06-24 20:06:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:214.10655975341797ms total_cost_time:214.1263484954834ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6112 prompt_cache_len:5151 prompt_cache_ratio:0.8427683246073299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 -DEBUG 06-24 20:06:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10477519035339355 s -INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.1069033145904541 s -DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=224552355293082718388846205899768770206, time:1750766784.061049s req_ids:[8] -DEBUG 06-24 20:06:24 [manager.py:391] -ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:23 lightllm_req_id:8 first_token_cost:205.98769187927246ms total_cost_time:206.0093879699707ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6113 prompt_cache_len:5151 prompt_cache_ratio:0.8426304596761001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 -DEBUG 06-24 20:06:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10348773002624512 s -INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.10551333427429199 s -DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=246947666861936024699748553683267397229, time:1750766784.2651703s req_ids:[8] -DEBUG 06-24 20:06:24 [manager.py:391] -ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:206.99667930603027ms total_cost_time:207.0169448852539ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6114 prompt_cache_len:5151 prompt_cache_ratio:0.8424926398429833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 -DEBUG 06-24 20:06:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10422301292419434 s -INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.10622429847717285 s -DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=299756380200373611391072335718948034271, time:1750766784.481824s req_ids:[8] -DEBUG 06-24 20:06:24 [manager.py:391] -ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:382.73024559020996ms total_cost_time:382.7517032623291ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6115 prompt_cache_len:5151 prompt_cache_ratio:0.8423548650858544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 -DEBUG 06-24 20:06:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:24 [manager.py:224] router recive req id 8 cost time 0.10453963279724121 s -INFO 06-24 20:06:24 [manager.py:68] detokenization recv req id 8 cost time 0.1064455509185791 s -DEBUG 06-24 20:06:24 [manager.py:391] Prefill Batch: batch_id=309494970834871294122755161822399773162, time:1750766784.854899s req_ids:[8] -DEBUG 06-24 20:06:24 [manager.py:391] -ERROR 06-24 20:06:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:207.58342742919922ms total_cost_time:207.60369300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6116 prompt_cache_len:5151 prompt_cache_ratio:0.842217135382603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 -DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.10311412811279297 s -INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10492730140686035 s -DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=135146763130120856991333150772475227718, time:1750766785.0594633s req_ids:[8] -DEBUG 06-24 20:06:25 [manager.py:391] -ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:24 lightllm_req_id:8 first_token_cost:176.2712001800537ms total_cost_time:176.29051208496094ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6117 prompt_cache_len:5151 prompt_cache_ratio:0.8420794507111329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 -DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.1046912670135498 s -INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10671329498291016 s -DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=194703729639997601733597106153453669572, time:1750766785.2571788s req_ids:[8] -DEBUG 06-24 20:06:25 [manager.py:391] -ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:213.38415145874023ms total_cost_time:213.40346336364746ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6118 prompt_cache_len:5151 prompt_cache_ratio:0.8419418110493625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 -DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.10375404357910156 s -INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10602784156799316 s -DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=141128383125760913879594072874103801806, time:1750766785.4771478s req_ids:[8] -DEBUG 06-24 20:06:25 [manager.py:391] -DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:06:25 [stats.py:37] Avg tokens(prompt+generate) throughput: 27280.706 tokens/s -DEBUG 06-24 20:06:25 [stats.py:37] Avg prompt tokens throughput: 27271.760 tokens/s -DEBUG 06-24 20:06:25 [stats.py:37] Avg generate tokens throughput: 8.946 tokens/s -INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:215.24596214294434ms total_cost_time:215.26670455932617ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6119 prompt_cache_len:5151 prompt_cache_ratio:0.8418042163752247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 -DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.10345172882080078 s -INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.10543012619018555 s -DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=38810360127257287293362483462333117489, time:1750766785.6892889s req_ids:[8] -DEBUG 06-24 20:06:25 [manager.py:391] -ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:206.25734329223633ms total_cost_time:206.27713203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6120 prompt_cache_len:5151 prompt_cache_ratio:0.8416666666666667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 -DEBUG 06-24 20:06:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:25 [manager.py:224] router recive req id 8 cost time 0.1035909652709961 s -INFO 06-24 20:06:25 [manager.py:68] detokenization recv req id 8 cost time 0.1055753231048584 s -DEBUG 06-24 20:06:25 [manager.py:391] Prefill Batch: batch_id=226052987839706201670884770574816148378, time:1750766785.9013026s req_ids:[8] -DEBUG 06-24 20:06:25 [manager.py:391] -ERROR 06-24 20:06:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:209.53035354614258ms total_cost_time:209.5503807067871ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6121 prompt_cache_len:5151 prompt_cache_ratio:0.8415291619016501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 -DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10443997383117676 s -INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.1064460277557373 s -DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=22096028405950588065764586516796394258, time:1750766786.1141126s req_ids:[8] -DEBUG 06-24 20:06:26 [manager.py:391] -ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:25 lightllm_req_id:8 first_token_cost:206.12549781799316ms total_cost_time:206.1457633972168ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6122 prompt_cache_len:5151 prompt_cache_ratio:0.841391702058151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 -DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10439729690551758 s -INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10635900497436523 s -DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=325243897191519754945790619986661389796, time:1750766786.3255363s req_ids:[8] -DEBUG 06-24 20:06:26 [manager.py:391] -ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:207.55910873413086ms total_cost_time:207.5810432434082ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6123 prompt_cache_len:5151 prompt_cache_ratio:0.8412542871141597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 -DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10320019721984863 s -INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10521268844604492 s -DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=84765910776844190678842431717733350882, time:1750766786.5344055s req_ids:[8] -DEBUG 06-24 20:06:26 [manager.py:391] -ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:201.52926445007324ms total_cost_time:201.55024528503418ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6124 prompt_cache_len:5151 prompt_cache_ratio:0.8411169170476812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 -DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.10446548461914062 s -INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10644865036010742 s -DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=143383876673861167419868309980376649490, time:1750766786.740108s req_ids:[8] -DEBUG 06-24 20:06:26 [manager.py:391] -ERROR 06-24 20:06:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:207.60035514831543ms total_cost_time:207.62109756469727ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6125 prompt_cache_len:5151 prompt_cache_ratio:0.8409795918367347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 -DEBUG 06-24 20:06:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:26 [manager.py:224] router recive req id 8 cost time 0.1042790412902832 s -INFO 06-24 20:06:26 [manager.py:68] detokenization recv req id 8 cost time 0.10624027252197266 s -DEBUG 06-24 20:06:26 [manager.py:391] Prefill Batch: batch_id=125871245928418826983408541299915505592, time:1750766786.952758s req_ids:[8] -DEBUG 06-24 20:06:26 [manager.py:391] -ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:26 lightllm_req_id:8 first_token_cost:202.84652709960938ms total_cost_time:202.8663158416748ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6126 prompt_cache_len:5151 prompt_cache_ratio:0.8408423114593536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 -DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10442042350769043 s -INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10631179809570312 s -DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=331288092322122495617245829926784338048, time:1750766787.1602623s req_ids:[8] -DEBUG 06-24 20:06:27 [manager.py:391] -ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:409.299373626709ms total_cost_time:409.3189239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6127 prompt_cache_len:5151 prompt_cache_ratio:0.8407050758935858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 -DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10421133041381836 s -INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10604667663574219 s -DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=260568356287072644763237119143286650608, time:1750766787.5677457s req_ids:[8] -DEBUG 06-24 20:06:27 [manager.py:391] -ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:201.1582851409912ms total_cost_time:201.17807388305664ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6128 prompt_cache_len:5151 prompt_cache_ratio:0.8405678851174935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 -DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10314106941223145 s -INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10496234893798828 s -DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=267458317937483801225361854082613453127, time:1750766787.7785275s req_ids:[8] -DEBUG 06-24 20:06:27 [manager.py:391] -ERROR 06-24 20:06:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:208.12082290649414ms total_cost_time:208.14132690429688ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6129 prompt_cache_len:5151 prompt_cache_ratio:0.8404307391091532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 -DEBUG 06-24 20:06:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:27 [manager.py:224] router recive req id 8 cost time 0.10388016700744629 s -INFO 06-24 20:06:27 [manager.py:68] detokenization recv req id 8 cost time 0.10584092140197754 s -DEBUG 06-24 20:06:27 [manager.py:391] Prefill Batch: batch_id=331763519628841190231568367292577767933, time:1750766787.9901304s req_ids:[8] -DEBUG 06-24 20:06:27 [manager.py:391] -ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:27 lightllm_req_id:8 first_token_cost:207.94296264648438ms total_cost_time:207.9622745513916ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6130 prompt_cache_len:5151 prompt_cache_ratio:0.8402936378466558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 -DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.10319399833679199 s -INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10501551628112793 s -DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=305752710936130591689075335204265638774, time:1750766788.2016904s req_ids:[8] -DEBUG 06-24 20:06:28 [manager.py:391] -ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:209.5024585723877ms total_cost_time:209.52272415161133ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6131 prompt_cache_len:5151 prompt_cache_ratio:0.8401565813081063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 -DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.10292696952819824 s -INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10473394393920898 s -DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=145967706689425026602361702885872410523, time:1750766788.4141562s req_ids:[8] -DEBUG 06-24 20:06:28 [manager.py:391] -ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:206.97331428527832ms total_cost_time:206.99238777160645ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6132 prompt_cache_len:5151 prompt_cache_ratio:0.8400195694716243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 -DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.1044154167175293 s -INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10626769065856934 s -DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=191264099948528474948886321304434144103, time:1750766788.6259215s req_ids:[8] -DEBUG 06-24 20:06:28 [manager.py:391] -ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:210.4043960571289ms total_cost_time:210.42346954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6133 prompt_cache_len:5151 prompt_cache_ratio:0.8398826023153432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 -DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:28 [manager.py:224] router recive req id 8 cost time 0.10449385643005371 s -INFO 06-24 20:06:28 [manager.py:68] detokenization recv req id 8 cost time 0.10637187957763672 s -DEBUG 06-24 20:06:28 [manager.py:391] Prefill Batch: batch_id=242090911718453434807775366384303598317, time:1750766788.8500962s req_ids:[8] -DEBUG 06-24 20:06:28 [manager.py:391] -ERROR 06-24 20:06:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:219.9575901031494ms total_cost_time:219.97594833374023ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6134 prompt_cache_len:5151 prompt_cache_ratio:0.8397456798174111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 -DEBUG 06-24 20:06:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.1030726432800293 s -INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.1049191951751709 s -DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=157431062221938925865839713271753476351, time:1750766789.0636861s req_ids:[8] -DEBUG 06-24 20:06:29 [manager.py:391] -ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:28 lightllm_req_id:8 first_token_cost:207.50665664672852ms total_cost_time:207.52644538879395ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6135 prompt_cache_len:5151 prompt_cache_ratio:0.8396088019559902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 -DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.10311484336853027 s -INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.1049809455871582 s -DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=134929130697788601482045786854694649371, time:1750766789.2742567s req_ids:[8] -DEBUG 06-24 20:06:29 [manager.py:391] -ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:206.66742324829102ms total_cost_time:206.68601989746094ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6136 prompt_cache_len:5151 prompt_cache_ratio:0.8394719687092569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 -DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.10301494598388672 s -INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.10478711128234863 s -DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=15777616477381629866767359523101059399, time:1750766789.4847536s req_ids:[8] -DEBUG 06-24 20:06:29 [manager.py:391] -ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:370.1438903808594ms total_cost_time:370.1643943786621ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6137 prompt_cache_len:5151 prompt_cache_ratio:0.8393351800554016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 -DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:29 [manager.py:224] router recive req id 8 cost time 0.10407805442810059 s -INFO 06-24 20:06:29 [manager.py:68] detokenization recv req id 8 cost time 0.10587096214294434 s -DEBUG 06-24 20:06:29 [manager.py:391] Prefill Batch: batch_id=253517709683889396140375011504581507997, time:1750766789.8540235s req_ids:[8] -DEBUG 06-24 20:06:29 [manager.py:391] -ERROR 06-24 20:06:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:196.46215438842773ms total_cost_time:196.48003578186035ms,out_token_counter:1 mean_per_token_cost_time: 0.017881393432617188ms prompt_token_num:6138 prompt_cache_len:5151 prompt_cache_ratio:0.8391984359726296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 -DEBUG 06-24 20:06:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10304665565490723 s -INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10489773750305176 s -DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=184257812429556298306099130896696400107, time:1750766790.0608459s req_ids:[8] -DEBUG 06-24 20:06:30 [manager.py:391] -ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:29 lightllm_req_id:8 first_token_cost:205.90758323669434ms total_cost_time:205.92665672302246ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6139 prompt_cache_len:5151 prompt_cache_ratio:0.8390617364391595 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 -DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10294175148010254 s -INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.1047518253326416 s -DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=164203541249587692058122716266663712570, time:1750766790.269834s req_ids:[8] -DEBUG 06-24 20:06:30 [manager.py:391] -ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:205.7480812072754ms total_cost_time:205.76763153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6140 prompt_cache_len:5151 prompt_cache_ratio:0.8389250814332248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 -DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10336852073669434 s -INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10518383979797363 s -DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=231616877556518156345629522787451039788, time:1750766790.4834332s req_ids:[8] -DEBUG 06-24 20:06:30 [manager.py:391] -ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:208.9226245880127ms total_cost_time:208.94122123718262ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6141 prompt_cache_len:5151 prompt_cache_ratio:0.8387884709330727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 -DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10297250747680664 s -INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10527801513671875 s -DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=238878848851664897259707789097795683145, time:1750766790.694265s req_ids:[8] -DEBUG 06-24 20:06:30 [manager.py:391] -ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:207.36098289489746ms total_cost_time:207.3800563812256ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6142 prompt_cache_len:5151 prompt_cache_ratio:0.8386519049169652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 -DEBUG 06-24 20:06:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:30 [manager.py:224] router recive req id 8 cost time 0.10459351539611816 s -INFO 06-24 20:06:30 [manager.py:68] detokenization recv req id 8 cost time 0.10649776458740234 s -DEBUG 06-24 20:06:30 [manager.py:391] Prefill Batch: batch_id=70952653644051467278305454036591872139, time:1750766790.9066322s req_ids:[8] -DEBUG 06-24 20:06:30 [manager.py:391] -ERROR 06-24 20:06:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:215.03210067749023ms total_cost_time:215.05093574523926ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6143 prompt_cache_len:5151 prompt_cache_ratio:0.8385153833631775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 -DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10431385040283203 s -INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10614824295043945 s -DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=295336196461553647570053591516784724948, time:1750766791.1214995s req_ids:[8] -DEBUG 06-24 20:06:31 [manager.py:391] -DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:30 lightllm_req_id:8 first_token_cost:205.16204833984375ms total_cost_time:205.18183708190918ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6144 prompt_cache_len:5151 prompt_cache_ratio:0.83837890625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 -DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10414886474609375 s -INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10599756240844727 s -DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=61852508994208769950654920921410801492, time:1750766791.3335516s req_ids:[8] -DEBUG 06-24 20:06:31 [manager.py:391] -ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:205.33370971679688ms total_cost_time:205.3532600402832ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6145 prompt_cache_len:5151 prompt_cache_ratio:0.8382424735557363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 -DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.1040341854095459 s -INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10590124130249023 s -DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=188559802601152452400496849733930747664, time:1750766791.5422516s req_ids:[8] -DEBUG 06-24 20:06:31 [manager.py:391] -ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:206.26163482666016ms total_cost_time:206.28023147583008ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6146 prompt_cache_len:5151 prompt_cache_ratio:0.8381060852587049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 -DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10401034355163574 s -INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10580015182495117 s -DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=1619228682943050669769246352976244528, time:1750766791.7528481s req_ids:[8] -DEBUG 06-24 20:06:31 [manager.py:391] -ERROR 06-24 20:06:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:206.04252815246582ms total_cost_time:206.06112480163574ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6147 prompt_cache_len:5151 prompt_cache_ratio:0.8379697413372377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 -DEBUG 06-24 20:06:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:31 [manager.py:224] router recive req id 8 cost time 0.10291433334350586 s -INFO 06-24 20:06:31 [manager.py:68] detokenization recv req id 8 cost time 0.10476374626159668 s -DEBUG 06-24 20:06:31 [manager.py:391] Prefill Batch: batch_id=287959402040441899584032853745045000267, time:1750766791.9619412s req_ids:[8] -DEBUG 06-24 20:06:31 [manager.py:391] -ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:31 lightllm_req_id:8 first_token_cost:203.6137580871582ms total_cost_time:203.63378524780273ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6148 prompt_cache_len:5151 prompt_cache_ratio:0.8378334417696812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 -DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.3045976161956787 s -INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.30650949478149414 s -DEBUG 06-24 20:06:32 [manager.py:391] Prefill Batch: batch_id=316620463359896037430047501284133251457, time:1750766792.3726034s req_ids:[8] -DEBUG 06-24 20:06:32 [manager.py:391] -ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:412.89305686950684ms total_cost_time:412.91236877441406ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6149 prompt_cache_len:5151 prompt_cache_ratio:0.8376971865343958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 -DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.10439634323120117 s -INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.10628390312194824 s -DEBUG 06-24 20:06:32 [manager.py:391] Prefill Batch: batch_id=87037759884092886547309206577495653779, time:1750766792.5882473s req_ids:[8] -DEBUG 06-24 20:06:32 [manager.py:391] -DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:209.57469940185547ms total_cost_time:209.5937728881836ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6150 prompt_cache_len:5151 prompt_cache_ratio:0.8375609756097561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 -DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.10426449775695801 s -INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.10615897178649902 s -DEBUG 06-24 20:06:32 [manager.py:391] Prefill Batch: batch_id=146792570732096537179118145993766077747, time:1750766792.8015797s req_ids:[8] -DEBUG 06-24 20:06:32 [manager.py:391] -ERROR 06-24 20:06:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:206.88343048095703ms total_cost_time:206.90202713012695ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6151 prompt_cache_len:5151 prompt_cache_ratio:0.8374248089741505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 -DEBUG 06-24 20:06:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:32 [manager.py:224] router recive req id 8 cost time 0.10303759574890137 s -INFO 06-24 20:06:32 [manager.py:68] detokenization recv req id 8 cost time 0.10489201545715332 s -DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=59318418209261092103843418343049738002, time:1750766793.0117295s req_ids:[8] -DEBUG 06-24 20:06:33 [manager.py:391] -ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:32 lightllm_req_id:8 first_token_cost:209.16748046875ms total_cost_time:209.18822288513184ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6152 prompt_cache_len:5151 prompt_cache_ratio:0.8372886866059818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 -DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10385942459106445 s -INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10579681396484375 s -DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=114481994863591629468448857816390295088, time:1750766793.2241678s req_ids:[8] -DEBUG 06-24 20:06:33 [manager.py:391] -ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:206.43258094787598ms total_cost_time:206.4530849456787ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6153 prompt_cache_len:5151 prompt_cache_ratio:0.8371526084836665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 -DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10329389572143555 s -INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10515832901000977 s -DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=9572132528447915679900250264896881297, time:1750766793.4365184s req_ids:[8] -DEBUG 06-24 20:06:33 [manager.py:391] -ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:208.44197273254395ms total_cost_time:208.46056938171387ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6154 prompt_cache_len:5151 prompt_cache_ratio:0.8370165745856354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 -DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10425424575805664 s -INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10609817504882812 s -DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=90337888777422112167426687610347983383, time:1750766793.6470857s req_ids:[8] -DEBUG 06-24 20:06:33 [manager.py:391] -ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:207.63397216796875ms total_cost_time:207.65376091003418ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6155 prompt_cache_len:5151 prompt_cache_ratio:0.836880584890333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 -DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:33 [manager.py:224] router recive req id 8 cost time 0.10345053672790527 s -INFO 06-24 20:06:33 [manager.py:68] detokenization recv req id 8 cost time 0.10531258583068848 s -DEBUG 06-24 20:06:33 [manager.py:391] Prefill Batch: batch_id=252429212484329802484051078304187855971, time:1750766793.856358s req_ids:[8] -DEBUG 06-24 20:06:33 [manager.py:391] -ERROR 06-24 20:06:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:208.61434936523438ms total_cost_time:208.6338996887207ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6156 prompt_cache_len:5151 prompt_cache_ratio:0.8367446393762183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 -DEBUG 06-24 20:06:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10409355163574219 s -INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.1061556339263916 s -DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=6472940603598857380552641733867814902, time:1750766794.0691113s req_ids:[8] -DEBUG 06-24 20:06:34 [manager.py:391] -ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:33 lightllm_req_id:8 first_token_cost:207.3190212249756ms total_cost_time:207.3376178741455ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6157 prompt_cache_len:5151 prompt_cache_ratio:0.8366087380217638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 -DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10341620445251465 s -INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10528111457824707 s -DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=4476805069943366652938146203316118081, time:1750766794.2795162s req_ids:[8] -DEBUG 06-24 20:06:34 [manager.py:391] -ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:207.2463035583496ms total_cost_time:207.26680755615234ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6158 prompt_cache_len:5151 prompt_cache_ratio:0.8364728808054563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 -DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10430383682250977 s -INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10614371299743652 s -DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=302005619389447012991952819556627360605, time:1750766794.4938734s req_ids:[8] -DEBUG 06-24 20:06:34 [manager.py:391] -ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:206.79545402526855ms total_cost_time:206.81452751159668ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6159 prompt_cache_len:5151 prompt_cache_ratio:0.8363370677057964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 -DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10430335998535156 s -INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10613608360290527 s -DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=332032272582930476422586949342274249412, time:1750766794.701615s req_ids:[8] -DEBUG 06-24 20:06:34 [manager.py:391] -ERROR 06-24 20:06:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:205.30390739440918ms total_cost_time:205.3244113922119ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6160 prompt_cache_len:5151 prompt_cache_ratio:0.8362012987012987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 -DEBUG 06-24 20:06:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:34 [manager.py:224] router recive req id 8 cost time 0.10301518440246582 s -INFO 06-24 20:06:34 [manager.py:68] detokenization recv req id 8 cost time 0.10484004020690918 s -DEBUG 06-24 20:06:34 [manager.py:391] Prefill Batch: batch_id=333851615067819169426055893232590404915, time:1750766794.910514s req_ids:[8] -DEBUG 06-24 20:06:34 [manager.py:391] -ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:34 lightllm_req_id:8 first_token_cost:349.86233711242676ms total_cost_time:349.8823642730713ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6161 prompt_cache_len:5151 prompt_cache_ratio:0.8360655737704918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 -DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10310220718383789 s -INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10494661331176758 s -DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=291336576570570190445050849118329221926, time:1750766795.2574146s req_ids:[8] -DEBUG 06-24 20:06:35 [manager.py:391] -ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:199.2471218109131ms total_cost_time:199.2659568786621ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6162 prompt_cache_len:5151 prompt_cache_ratio:0.8359298928919182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 -DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10412168502807617 s -INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10608029365539551 s -DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=156860295167591743292688129874894849012, time:1750766795.467346s req_ids:[8] -DEBUG 06-24 20:06:35 [manager.py:391] -ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:207.63754844665527ms total_cost_time:207.6573371887207ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6163 prompt_cache_len:5151 prompt_cache_ratio:0.8357942560441344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 -DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10349726676940918 s -INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10544228553771973 s -DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=334727225578316448577823985276164232942, time:1750766795.6793237s req_ids:[8] -DEBUG 06-24 20:06:35 [manager.py:391] -DEBUG 06-24 20:06:35 [stats.py:37] Avg tokens(prompt+generate) throughput: 27318.235 tokens/s -DEBUG 06-24 20:06:35 [stats.py:37] Avg prompt tokens throughput: 27309.441 tokens/s -DEBUG 06-24 20:06:35 [stats.py:37] Avg generate tokens throughput: 8.794 tokens/s -ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:210.0365161895752ms total_cost_time:210.05535125732422ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6164 prompt_cache_len:5151 prompt_cache_ratio:0.8356586632057106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 -DEBUG 06-24 20:06:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:35 [manager.py:224] router recive req id 8 cost time 0.10286688804626465 s -INFO 06-24 20:06:35 [manager.py:68] detokenization recv req id 8 cost time 0.10472989082336426 s -DEBUG 06-24 20:06:35 [manager.py:391] Prefill Batch: batch_id=143005380746227313454870115002179804716, time:1750766795.9022946s req_ids:[8] -DEBUG 06-24 20:06:35 [manager.py:391] -ERROR 06-24 20:06:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:214.3383026123047ms total_cost_time:214.35785293579102ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6165 prompt_cache_len:5151 prompt_cache_ratio:0.8355231143552312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 -DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10303282737731934 s -INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10496664047241211 s -DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=236052058903476792443385631655913286730, time:1750766796.1101658s req_ids:[8] -DEBUG 06-24 20:06:36 [manager.py:391] -ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:35 lightllm_req_id:8 first_token_cost:204.14185523986816ms total_cost_time:204.16021347045898ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6166 prompt_cache_len:5151 prompt_cache_ratio:0.8353876094712942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 -DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10411882400512695 s -INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10662460327148438 s -DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=242333186663282558248076690973249687968, time:1750766796.3190448s req_ids:[8] -DEBUG 06-24 20:06:36 [manager.py:391] -ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:207.48114585876465ms total_cost_time:207.50021934509277ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6167 prompt_cache_len:5151 prompt_cache_ratio:0.8352521485325117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 -DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10408401489257812 s -INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.1060020923614502 s -DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=127334321396271924451989612847392834481, time:1750766796.5293477s req_ids:[8] -DEBUG 06-24 20:06:36 [manager.py:391] -ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:206.2242031097412ms total_cost_time:206.24518394470215ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6168 prompt_cache_len:5151 prompt_cache_ratio:0.8351167315175098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 -DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.10327529907226562 s -INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10522007942199707 s -DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=299900336200277287405435521368851611491, time:1750766796.740181s req_ids:[8] -DEBUG 06-24 20:06:36 [manager.py:391] -ERROR 06-24 20:06:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:206.7258358001709ms total_cost_time:206.74586296081543ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6169 prompt_cache_len:5151 prompt_cache_ratio:0.8349813584049278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 -DEBUG 06-24 20:06:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:36 [manager.py:224] router recive req id 8 cost time 0.1044614315032959 s -INFO 06-24 20:06:36 [manager.py:68] detokenization recv req id 8 cost time 0.10641098022460938 s -DEBUG 06-24 20:06:36 [manager.py:391] Prefill Batch: batch_id=262983140690848707483768694406291156567, time:1750766796.9512544s req_ids:[8] -DEBUG 06-24 20:06:36 [manager.py:391] -ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:36 lightllm_req_id:8 first_token_cost:211.61174774169922ms total_cost_time:211.63177490234375ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6170 prompt_cache_len:5151 prompt_cache_ratio:0.8348460291734198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 -DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10322833061218262 s -INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10515427589416504 s -DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=184326619287352883060671791297834522162, time:1750766797.164035s req_ids:[8] -DEBUG 06-24 20:06:37 [manager.py:391] -ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:205.49535751342773ms total_cost_time:205.51490783691406ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6171 prompt_cache_len:5151 prompt_cache_ratio:0.8347107438016529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 -DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10317730903625488 s -INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10503506660461426 s -DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=110844453664365176085731817877030912463, time:1750766797.3733673s req_ids:[8] -DEBUG 06-24 20:06:37 [manager.py:391] -ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:208.7550163269043ms total_cost_time:208.77361297607422ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6172 prompt_cache_len:5151 prompt_cache_ratio:0.8345755022683085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 -DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10453629493713379 s -INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10655426979064941 s -DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=143021464239665314309801794821729374873, time:1750766797.585675s req_ids:[8] -DEBUG 06-24 20:06:37 [manager.py:391] -ERROR 06-24 20:06:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:203.4311294555664ms total_cost_time:203.45067977905273ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6173 prompt_cache_len:5151 prompt_cache_ratio:0.8344403045520816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 -DEBUG 06-24 20:06:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:37 [manager.py:224] router recive req id 8 cost time 0.10401487350463867 s -INFO 06-24 20:06:37 [manager.py:68] detokenization recv req id 8 cost time 0.10593509674072266 s -DEBUG 06-24 20:06:37 [manager.py:391] Prefill Batch: batch_id=95783369034580814269639877441897335712, time:1750766797.7920573s req_ids:[8] -DEBUG 06-24 20:06:37 [manager.py:391] -ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:37 lightllm_req_id:8 first_token_cost:367.5997257232666ms total_cost_time:367.61927604675293ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6174 prompt_cache_len:5151 prompt_cache_ratio:0.8343051506316812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 -DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10430145263671875 s -INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10635948181152344 s -DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=182465649665468414357396296172358654706, time:1750766798.1579986s req_ids:[8] -DEBUG 06-24 20:06:38 [manager.py:391] -DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:197.03984260559082ms total_cost_time:197.06273078918457ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6175 prompt_cache_len:5151 prompt_cache_ratio:0.83417004048583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 -DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10313987731933594 s -INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10503530502319336 s -DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=67433550744957107559143879422456748340, time:1750766798.3666937s req_ids:[8] -DEBUG 06-24 20:06:38 [manager.py:391] -ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:208.3439826965332ms total_cost_time:208.36353302001953ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6176 prompt_cache_len:5151 prompt_cache_ratio:0.8340349740932642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 -DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10311341285705566 s -INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10551595687866211 s -DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=274340827700731012035498431325864876539, time:1750766798.5774026s req_ids:[8] -DEBUG 06-24 20:06:38 [manager.py:391] -ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:206.76374435424805ms total_cost_time:206.78257942199707ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6177 prompt_cache_len:5151 prompt_cache_ratio:0.8338999514327343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 -DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10402250289916992 s -INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10584306716918945 s -DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=213846850821126457013818831797157702175, time:1750766798.78863s req_ids:[8] -DEBUG 06-24 20:06:38 [manager.py:391] -ERROR 06-24 20:06:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:203.45091819763184ms total_cost_time:203.46927642822266ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6178 prompt_cache_len:5151 prompt_cache_ratio:0.8337649724830042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 -DEBUG 06-24 20:06:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:38 [manager.py:224] router recive req id 8 cost time 0.10430240631103516 s -INFO 06-24 20:06:38 [manager.py:68] detokenization recv req id 8 cost time 0.10632991790771484 s -DEBUG 06-24 20:06:38 [manager.py:391] Prefill Batch: batch_id=68544995257468624353025646154177443132, time:1750766798.992736s req_ids:[8] -DEBUG 06-24 20:06:38 [manager.py:391] -ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:38 lightllm_req_id:8 first_token_cost:201.82037353515625ms total_cost_time:201.84063911437988ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6179 prompt_cache_len:5151 prompt_cache_ratio:0.8336300372228516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 -DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10308575630187988 s -INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10507011413574219 s -DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=252022908310939251057633468845722367738, time:1750766799.201477s req_ids:[8] -DEBUG 06-24 20:06:39 [manager.py:391] -ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:209.2607021331787ms total_cost_time:209.28001403808594ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6180 prompt_cache_len:5151 prompt_cache_ratio:0.833495145631068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 -DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10311412811279297 s -INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10517549514770508 s -DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=25219512254762808842410937951525288431, time:1750766799.4265058s req_ids:[8] -DEBUG 06-24 20:06:39 [manager.py:391] -ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:217.18883514404297ms total_cost_time:217.2069549560547ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6181 prompt_cache_len:5151 prompt_cache_ratio:0.8333602976864585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 -DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10399603843688965 s -INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10604739189147949 s -DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=271346729542096737699955016952924407771, time:1750766799.6428423s req_ids:[8] -DEBUG 06-24 20:06:39 [manager.py:391] -ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:208.94336700439453ms total_cost_time:208.96315574645996ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6182 prompt_cache_len:5151 prompt_cache_ratio:0.8332254933678421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 -DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:39 [manager.py:224] router recive req id 8 cost time 0.10331344604492188 s -INFO 06-24 20:06:39 [manager.py:68] detokenization recv req id 8 cost time 0.10523438453674316 s -DEBUG 06-24 20:06:39 [manager.py:391] Prefill Batch: batch_id=182671081899220230471401592469597276619, time:1750766799.8473756s req_ids:[8] -DEBUG 06-24 20:06:39 [manager.py:391] -ERROR 06-24 20:06:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:201.94315910339355ms total_cost_time:201.96294784545898ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6183 prompt_cache_len:5151 prompt_cache_ratio:0.8330907326540514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 -DEBUG 06-24 20:06:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10319900512695312 s -INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10564565658569336 s -DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=277174947382323183378112580624573963225, time:1750766800.0534825s req_ids:[8] -DEBUG 06-24 20:06:40 [manager.py:391] -ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:39 lightllm_req_id:8 first_token_cost:208.09149742126465ms total_cost_time:208.11080932617188ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6184 prompt_cache_len:5151 prompt_cache_ratio:0.8329560155239327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 -DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10322165489196777 s -INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.1051323413848877 s -DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=220627652321598522111115160464918740206, time:1750766800.2663488s req_ids:[8] -DEBUG 06-24 20:06:40 [manager.py:391] -ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:206.50863647460938ms total_cost_time:206.5277099609375ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6185 prompt_cache_len:5151 prompt_cache_ratio:0.832821341956346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 -DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10337185859680176 s -INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10518670082092285 s -DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=78318963350391885158059624854948249041, time:1750766800.4801636s req_ids:[8] -DEBUG 06-24 20:06:40 [manager.py:391] -ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:214.39647674560547ms total_cost_time:214.4150733947754ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6186 prompt_cache_len:5151 prompt_cache_ratio:0.8326867119301649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 -DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10411834716796875 s -INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10598087310791016 s -DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=169647311881210589823113231037780987484, time:1750766800.6932726s req_ids:[8] -DEBUG 06-24 20:06:40 [manager.py:391] -ERROR 06-24 20:06:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:205.49821853637695ms total_cost_time:205.51729202270508ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6187 prompt_cache_len:5151 prompt_cache_ratio:0.8325521254242767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 -DEBUG 06-24 20:06:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:40 [manager.py:224] router recive req id 8 cost time 0.10323953628540039 s -INFO 06-24 20:06:40 [manager.py:68] detokenization recv req id 8 cost time 0.10518479347229004 s -DEBUG 06-24 20:06:40 [manager.py:391] Prefill Batch: batch_id=254358326656507889614853692253052500646, time:1750766800.9027264s req_ids:[8] -DEBUG 06-24 20:06:40 [manager.py:391] -ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:40 lightllm_req_id:8 first_token_cost:395.1089382171631ms total_cost_time:395.1294422149658ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6188 prompt_cache_len:5151 prompt_cache_ratio:0.8324175824175825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 -DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10402226448059082 s -INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.1060795783996582 s -DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=230859904132794620911184000909303711837, time:1750766801.299617s req_ids:[8] -DEBUG 06-24 20:06:41 [manager.py:391] -ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:201.52044296264648ms total_cost_time:201.5397548675537ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6189 prompt_cache_len:5151 prompt_cache_ratio:0.8322830828889967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 -DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10439944267272949 s -INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.10636425018310547 s -DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=119316076468777548694444272363925472556, time:1750766801.5083835s req_ids:[8] -DEBUG 06-24 20:06:41 [manager.py:391] -ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:223.29211235046387ms total_cost_time:223.3123779296875ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6190 prompt_cache_len:5151 prompt_cache_ratio:0.8321486268174475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 -DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10424399375915527 s -INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.10613846778869629 s -DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=185920297840509855886648159365815234920, time:1750766801.7281063s req_ids:[8] -DEBUG 06-24 20:06:41 [manager.py:391] -ERROR 06-24 20:06:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:208.9974880218506ms total_cost_time:209.0170383453369ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6191 prompt_cache_len:5151 prompt_cache_ratio:0.8320142141818769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 -DEBUG 06-24 20:06:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:41 [manager.py:224] router recive req id 8 cost time 0.10422992706298828 s -INFO 06-24 20:06:41 [manager.py:68] detokenization recv req id 8 cost time 0.10590147972106934 s -DEBUG 06-24 20:06:41 [manager.py:391] Prefill Batch: batch_id=145809805742492902515718657724567250407, time:1750766801.947688s req_ids:[8] -DEBUG 06-24 20:06:41 [manager.py:391] -ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:41 lightllm_req_id:8 first_token_cost:208.20069313049316ms total_cost_time:208.221435546875ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6192 prompt_cache_len:5151 prompt_cache_ratio:0.8318798449612403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 -DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10312366485595703 s -INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10494232177734375 s -DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=50987435135096950734258525826769416672, time:1750766802.158477s req_ids:[8] -DEBUG 06-24 20:06:42 [manager.py:391] -ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:217.50664710998535ms total_cost_time:217.52595901489258ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6193 prompt_cache_len:5151 prompt_cache_ratio:0.8317455191345067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 -DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10332465171813965 s -INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10443711280822754 s -DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=334874776106186390169014285899188065114, time:1750766802.3751802s req_ids:[8] -DEBUG 06-24 20:06:42 [manager.py:391] -ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:214.39099311828613ms total_cost_time:214.41030502319336ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6194 prompt_cache_len:5151 prompt_cache_ratio:0.8316112366806587 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 -DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10326886177062988 s -INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10509824752807617 s -DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=260422862321947458316288145973784263899, time:1750766802.5897987s req_ids:[8] -DEBUG 06-24 20:06:42 [manager.py:391] -ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:194.32640075683594ms total_cost_time:194.34523582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6195 prompt_cache_len:5151 prompt_cache_ratio:0.8314769975786925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 -DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10461735725402832 s -INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.1063685417175293 s -INFO 06-24 20:06:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:06:42 [manager.py:391] Prefill Batch: batch_id=249308235477996095651910616905792056065, time:1750766802.7936606s req_ids:[8] -DEBUG 06-24 20:06:42 [manager.py:391] -ERROR 06-24 20:06:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:206.70318603515625ms total_cost_time:206.72249794006348ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6196 prompt_cache_len:5151 prompt_cache_ratio:0.8313428018076178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 -DEBUG 06-24 20:06:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:42 [manager.py:224] router recive req id 8 cost time 0.10488033294677734 s -INFO 06-24 20:06:42 [manager.py:68] detokenization recv req id 8 cost time 0.10678529739379883 s -DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=181533786784591662249938083573719314708, time:1750766803.0051754s req_ids:[8] -DEBUG 06-24 20:06:43 [manager.py:391] -ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:42 lightllm_req_id:8 first_token_cost:207.15045928955078ms total_cost_time:207.1692943572998ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6197 prompt_cache_len:5151 prompt_cache_ratio:0.831208649346458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 -DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:43 [manager.py:224] router recive req id 8 cost time 0.10412096977233887 s -INFO 06-24 20:06:43 [manager.py:68] detokenization recv req id 8 cost time 0.10596418380737305 s -DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=109538553350496572726145691345586193581, time:1750766803.2160833s req_ids:[8] -DEBUG 06-24 20:06:43 [manager.py:391] -ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:207.48090744018555ms total_cost_time:207.50021934509277ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6198 prompt_cache_len:5151 prompt_cache_ratio:0.8310745401742498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 -DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:43 [manager.py:224] router recive req id 8 cost time 0.10294246673583984 s -INFO 06-24 20:06:43 [manager.py:68] detokenization recv req id 8 cost time 0.10469818115234375 s -DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=238703736818791049971439601774946443404, time:1750766803.4310713s req_ids:[8] -DEBUG 06-24 20:06:43 [manager.py:391] -ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:210.35265922546387ms total_cost_time:210.3719711303711ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6199 prompt_cache_len:5151 prompt_cache_ratio:0.8309404742700436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 -DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:43 [manager.py:224] router recive req id 8 cost time 0.10540556907653809 s -INFO 06-24 20:06:43 [manager.py:68] detokenization recv req id 8 cost time 0.10732626914978027 s -DEBUG 06-24 20:06:43 [manager.py:391] Prefill Batch: batch_id=223678429410222826247402301657597914347, time:1750766803.6550295s req_ids:[8] -DEBUG 06-24 20:06:43 [manager.py:391] -DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:404.99401092529297ms total_cost_time:405.03716468811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6200 prompt_cache_len:5151 prompt_cache_ratio:0.8308064516129032 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 -DEBUG 06-24 20:06:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10730195045471191 s -INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.10953140258789062 s -DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=129089206010392321674126505504979631485, time:1750766804.0544941s req_ids:[8] -DEBUG 06-24 20:06:44 [manager.py:391] -ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:43 lightllm_req_id:8 first_token_cost:209.48481559753418ms total_cost_time:209.52987670898438ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6201 prompt_cache_len:5151 prompt_cache_ratio:0.8306724721819061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 -DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10790276527404785 s -INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.11055445671081543 s -DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=274580558183632382412876717582796393967, time:1750766804.2763412s req_ids:[8] -DEBUG 06-24 20:06:44 [manager.py:391] -ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:213.32550048828125ms total_cost_time:213.38391304016113ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6202 prompt_cache_len:5151 prompt_cache_ratio:0.8305385359561431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 -DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10883355140686035 s -INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.1107327938079834 s -DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=29222527585448756392550174064691259774, time:1750766804.4776585s req_ids:[8] -DEBUG 06-24 20:06:44 [manager.py:391] -ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:210.4020118713379ms total_cost_time:210.4473114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6203 prompt_cache_len:5151 prompt_cache_ratio:0.8304046429147187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 -DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.10832619667053223 s -INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.11061882972717285 s -DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=311283468831619481868701538590607731076, time:1750766804.700409s req_ids:[8] -DEBUG 06-24 20:06:44 [manager.py:391] -ERROR 06-24 20:06:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:212.74065971374512ms total_cost_time:212.78786659240723ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6204 prompt_cache_len:5151 prompt_cache_ratio:0.8302707930367504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 -DEBUG 06-24 20:06:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:44 [manager.py:224] router recive req id 8 cost time 0.1071622371673584 s -INFO 06-24 20:06:44 [manager.py:68] detokenization recv req id 8 cost time 0.10907649993896484 s -DEBUG 06-24 20:06:44 [manager.py:391] Prefill Batch: batch_id=11467304696616277608568626861512787192, time:1750766804.9270504s req_ids:[8] -DEBUG 06-24 20:06:44 [manager.py:391] -ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:44 lightllm_req_id:8 first_token_cost:221.24814987182617ms total_cost_time:221.29487991333008ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:6205 prompt_cache_len:5151 prompt_cache_ratio:0.8301369863013699 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 -DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10755300521850586 s -INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.10967040061950684 s -DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=121899999699021260392889196242249340422, time:1750766805.1445925s req_ids:[8] -DEBUG 06-24 20:06:45 [manager.py:391] -ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:203.92584800720215ms total_cost_time:203.97329330444336ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:6206 prompt_cache_len:5151 prompt_cache_ratio:0.8300032226877215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 -DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10819458961486816 s -INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.11011457443237305 s -DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=62139265564817838173973606682531378410, time:1750766805.3625162s req_ids:[8] -DEBUG 06-24 20:06:45 [manager.py:391] -ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:213.14048767089844ms total_cost_time:213.18411827087402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6207 prompt_cache_len:5151 prompt_cache_ratio:0.8298695021749638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 -DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10736441612243652 s -INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.10924363136291504 s -DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=171636578239590667322213797124729344498, time:1750766805.5734103s req_ids:[8] -DEBUG 06-24 20:06:45 [manager.py:391] -ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:204.57720756530762ms total_cost_time:204.6198844909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6208 prompt_cache_len:5151 prompt_cache_ratio:0.829735824742268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 -DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.10900235176086426 s -INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.11114168167114258 s -DEBUG 06-24 20:06:45 [manager.py:391] Prefill Batch: batch_id=224668051744289019973723205584528606375, time:1750766805.7891595s req_ids:[8] -DEBUG 06-24 20:06:45 [manager.py:391] -DEBUG 06-24 20:06:45 [stats.py:37] Avg tokens(prompt+generate) throughput: 27548.044 tokens/s -DEBUG 06-24 20:06:45 [stats.py:37] Avg prompt tokens throughput: 27539.142 tokens/s -DEBUG 06-24 20:06:45 [stats.py:37] Avg generate tokens throughput: 8.902 tokens/s -ERROR 06-24 20:06:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:213.3009433746338ms total_cost_time:213.3462429046631ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6209 prompt_cache_len:5151 prompt_cache_ratio:0.8296021903688194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 -DEBUG 06-24 20:06:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:45 [manager.py:224] router recive req id 8 cost time 0.1069183349609375 s -INFO 06-24 20:06:45 [manager.py:68] detokenization recv req id 8 cost time 0.10888552665710449 s -DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=142026440483083702224366022291479131288, time:1750766806.000784s req_ids:[8] -DEBUG 06-24 20:06:46 [manager.py:391] -ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:45 lightllm_req_id:8 first_token_cost:366.5480613708496ms total_cost_time:366.5933609008789ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6210 prompt_cache_len:5151 prompt_cache_ratio:0.8294685990338164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 -DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:46 [batch.py:51] router release req id 8 -INFO 06-24 20:06:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10657691955566406 s -INFO 06-24 20:06:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.10846877098083496 s -DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=273392915733129403376475945402901694394, time:1750766806.3655977s req_ids:[8] -DEBUG 06-24 20:06:46 [manager.py:391] -ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:196.54202461242676ms total_cost_time:196.58303260803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6211 prompt_cache_len:5151 prompt_cache_ratio:0.8293350507164707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 -DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10917329788208008 s -INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.11118197441101074 s -DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=305699887275326704054820597455406318012, time:1750766806.5731392s req_ids:[8] -DEBUG 06-24 20:06:46 [manager.py:391] -ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:198.47774505615234ms total_cost_time:198.52066040039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6212 prompt_cache_len:5151 prompt_cache_ratio:0.8292015453960078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 -DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10764551162719727 s -INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.10951662063598633 s -DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=266878638444826133262585614104181434482, time:1750766806.7766533s req_ids:[8] -DEBUG 06-24 20:06:46 [manager.py:391] -ERROR 06-24 20:06:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:199.54538345336914ms total_cost_time:199.58806037902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6213 prompt_cache_len:5151 prompt_cache_ratio:0.8290680830516659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 -DEBUG 06-24 20:06:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:46 [manager.py:224] router recive req id 8 cost time 0.10656952857971191 s -INFO 06-24 20:06:46 [manager.py:68] detokenization recv req id 8 cost time 0.10836505889892578 s -DEBUG 06-24 20:06:46 [manager.py:391] Prefill Batch: batch_id=246813748868512437775352192216081579281, time:1750766806.9820025s req_ids:[8] -DEBUG 06-24 20:06:46 [manager.py:391] -ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:46 lightllm_req_id:8 first_token_cost:200.48856735229492ms total_cost_time:200.5324363708496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6214 prompt_cache_len:5151 prompt_cache_ratio:0.8289346636626972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 -DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.1078941822052002 s -INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.10967326164245605 s -DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=134260068625676113925637427492131411216, time:1750766807.1867747s req_ids:[8] -DEBUG 06-24 20:06:47 [manager.py:391] -ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:200.62756538391113ms total_cost_time:200.6702423095703ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6215 prompt_cache_len:5151 prompt_cache_ratio:0.8288012872083669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 -DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.10832095146179199 s -INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.11028814315795898 s -DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=308530330604852826819614108825896848946, time:1750766807.3954499s req_ids:[8] -DEBUG 06-24 20:06:47 [manager.py:391] -ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:205.66153526306152ms total_cost_time:205.7032585144043ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6216 prompt_cache_len:5151 prompt_cache_ratio:0.8286679536679536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 -DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.10776114463806152 s -INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.10967135429382324 s -DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=85663535150515529166413223636137479560, time:1750766807.6048172s req_ids:[8] -DEBUG 06-24 20:06:47 [manager.py:391] -ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:202.73160934448242ms total_cost_time:202.77714729309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6217 prompt_cache_len:5151 prompt_cache_ratio:0.8285346630207495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 -DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:47 [manager.py:224] router recive req id 8 cost time 0.1067345142364502 s -INFO 06-24 20:06:47 [manager.py:68] detokenization recv req id 8 cost time 0.10862588882446289 s -DEBUG 06-24 20:06:47 [manager.py:391] Prefill Batch: batch_id=109039265709184128189277658901280970833, time:1750766807.8154387s req_ids:[8] -DEBUG 06-24 20:06:47 [manager.py:391] -ERROR 06-24 20:06:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:205.98244667053223ms total_cost_time:206.02869987487793ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6218 prompt_cache_len:5151 prompt_cache_ratio:0.8284014152460598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 -DEBUG 06-24 20:06:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10805654525756836 s -INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.11022210121154785 s -DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=221001382116346561839749205486953955098, time:1750766808.0252206s req_ids:[8] -DEBUG 06-24 20:06:48 [manager.py:391] -ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:47 lightllm_req_id:8 first_token_cost:202.30531692504883ms total_cost_time:202.347993850708ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6219 prompt_cache_len:5151 prompt_cache_ratio:0.8282682103232031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 -DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10684490203857422 s -INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.10876941680908203 s -DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=302157331811672016174837861696821348702, time:1750766808.2332687s req_ids:[8] -DEBUG 06-24 20:06:48 [manager.py:391] -ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:204.38170433044434ms total_cost_time:204.42438125610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6220 prompt_cache_len:5151 prompt_cache_ratio:0.8281350482315113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 -DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10763430595397949 s -INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.11004114151000977 s -DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=204819967202021584442150035400988437551, time:1750766808.4419105s req_ids:[8] -DEBUG 06-24 20:06:48 [manager.py:391] -ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:205.69634437561035ms total_cost_time:205.73997497558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6221 prompt_cache_len:5151 prompt_cache_ratio:0.8280019289503295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 -DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.20728421211242676 s -INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.20887303352355957 s -DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=253577637024759995790024656554170786603, time:1750766808.7463598s req_ids:[8] -DEBUG 06-24 20:06:48 [manager.py:391] -ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:254.15349006652832ms total_cost_time:254.1940212249756ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:6222 prompt_cache_len:5151 prompt_cache_ratio:0.8278688524590164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 -DEBUG 06-24 20:06:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:48 [manager.py:224] router recive req id 8 cost time 0.10745429992675781 s -INFO 06-24 20:06:48 [manager.py:68] detokenization recv req id 8 cost time 0.1093282699584961 s -DEBUG 06-24 20:06:48 [manager.py:391] Prefill Batch: batch_id=148159794852021763307896848468820039632, time:1750766808.9071126s req_ids:[8] -DEBUG 06-24 20:06:48 [manager.py:391] -ERROR 06-24 20:06:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:193.20249557495117ms total_cost_time:193.24660301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6223 prompt_cache_len:5151 prompt_cache_ratio:0.8277358187369436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 -DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10662388801574707 s -INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.10866904258728027 s -DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=209085834863442157280202752124128141146, time:1750766809.110937s req_ids:[8] -DEBUG 06-24 20:06:49 [manager.py:391] -ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:48 lightllm_req_id:8 first_token_cost:203.71294021606445ms total_cost_time:203.75609397888184ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6224 prompt_cache_len:5151 prompt_cache_ratio:0.8276028277634961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 -DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10829401016235352 s -INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.11018681526184082 s -DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=335171324055220064096823808819897812607, time:1750766809.3196244s req_ids:[8] -DEBUG 06-24 20:06:49 [manager.py:391] -ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:202.64410972595215ms total_cost_time:202.74066925048828ms,out_token_counter:1 mean_per_token_cost_time: 0.09655952453613281ms prompt_token_num:6225 prompt_cache_len:5151 prompt_cache_ratio:0.8274698795180723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 -DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.1082005500793457 s -INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.11034369468688965 s -DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=164680843762860334860357277778222115978, time:1750766809.5286891s req_ids:[8] -DEBUG 06-24 20:06:49 [manager.py:391] -ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:202.66151428222656ms total_cost_time:202.70323753356934ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6226 prompt_cache_len:5151 prompt_cache_ratio:0.8273369739800835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 -DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10777735710144043 s -INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.10981535911560059 s -DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=87545499633224233074531612892889704748, time:1750766809.738937s req_ids:[8] -DEBUG 06-24 20:06:49 [manager.py:391] -ERROR 06-24 20:06:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:06:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:208.34994316101074ms total_cost_time:208.39238166809082ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6227 prompt_cache_len:5151 prompt_cache_ratio:0.8272041111289545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 -DEBUG 06-24 20:06:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:49 [manager.py:224] router recive req id 8 cost time 0.10716462135314941 s -INFO 06-24 20:06:49 [manager.py:68] detokenization recv req id 8 cost time 0.10915470123291016 s -DEBUG 06-24 20:06:49 [manager.py:391] Prefill Batch: batch_id=296706002826935978710635342014476682588, time:1750766809.9653463s req_ids:[8] -DEBUG 06-24 20:06:49 [manager.py:391] -ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:49 lightllm_req_id:8 first_token_cost:219.23303604125977ms total_cost_time:219.27595138549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6228 prompt_cache_len:5151 prompt_cache_ratio:0.8270712909441233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 -DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.1068580150604248 s -INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10877799987792969 s -DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=300437126235879746737221230586241630333, time:1750766810.1783924s req_ids:[8] -DEBUG 06-24 20:06:50 [manager.py:391] -ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:207.77654647827148ms total_cost_time:207.81946182250977ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6229 prompt_cache_len:5151 prompt_cache_ratio:0.826938513405041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 -DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.10719418525695801 s -INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10918426513671875 s -DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=246924350557468428398411045117208200383, time:1750766810.390594s req_ids:[8] -DEBUG 06-24 20:06:50 [manager.py:391] -ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:205.86395263671875ms total_cost_time:205.90758323669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6230 prompt_cache_len:5151 prompt_cache_ratio:0.8268057784911718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 -DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.10747575759887695 s -INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10946869850158691 s -DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=291651901307532973010767736686075293355, time:1750766810.601247s req_ids:[8] -DEBUG 06-24 20:06:50 [manager.py:391] -ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:205.4286003112793ms total_cost_time:205.47151565551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6231 prompt_cache_len:5151 prompt_cache_ratio:0.8266730861819933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 -DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:50 [manager.py:224] router recive req id 8 cost time 0.10773587226867676 s -INFO 06-24 20:06:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972976684570312 s -DEBUG 06-24 20:06:50 [manager.py:391] Prefill Batch: batch_id=96089753093821298213595850066179703477, time:1750766810.8131654s req_ids:[8] -DEBUG 06-24 20:06:50 [manager.py:391] -ERROR 06-24 20:06:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:206.81262016296387ms total_cost_time:206.85553550720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6232 prompt_cache_len:5151 prompt_cache_ratio:0.8265404364569962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 -DEBUG 06-24 20:06:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10669112205505371 s -INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.10853266716003418 s -DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=40826812959059000403406219261630428467, time:1750766811.0239308s req_ids:[8] -DEBUG 06-24 20:06:51 [manager.py:391] -ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:50 lightllm_req_id:8 first_token_cost:215.85488319396973ms total_cost_time:215.8958911895752ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6233 prompt_cache_len:5151 prompt_cache_ratio:0.8264078292956842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 -DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10732531547546387 s -INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.1092824935913086 s -DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=164039272474673313599094395242539904108, time:1750766811.242305s req_ids:[8] -DEBUG 06-24 20:06:51 [manager.py:391] -ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:346.759557723999ms total_cost_time:346.8043804168701ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6234 prompt_cache_len:5151 prompt_cache_ratio:0.8262752646775746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 -DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10660624504089355 s -INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.1085355281829834 s -DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=300933581240107300780060504450370359901, time:1750766811.5903049s req_ids:[8] -DEBUG 06-24 20:06:51 [manager.py:391] -ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:195.3415870666504ms total_cost_time:195.38354873657227ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6235 prompt_cache_len:5151 prompt_cache_ratio:0.8261427425821972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 -DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s -INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s -DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=16910913011531094755825903362567027860, time:1750766811.796252s req_ids:[8] -DEBUG 06-24 20:06:51 [manager.py:391] -ERROR 06-24 20:06:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:199.65267181396484ms total_cost_time:199.69487190246582ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6236 prompt_cache_len:5151 prompt_cache_ratio:0.8260102629890955 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 -DEBUG 06-24 20:06:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:51 [manager.py:224] router recive req id 8 cost time 0.1065359115600586 s -INFO 06-24 20:06:51 [manager.py:68] detokenization recv req id 8 cost time 0.10842132568359375 s -DEBUG 06-24 20:06:51 [manager.py:391] Prefill Batch: batch_id=78818862649933872031442039225507011228, time:1750766811.9922535s req_ids:[8] -DEBUG 06-24 20:06:51 [manager.py:391] -ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:51 lightllm_req_id:8 first_token_cost:182.1267604827881ms total_cost_time:182.16991424560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6237 prompt_cache_len:5151 prompt_cache_ratio:0.8258778258778259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 -DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.10797667503356934 s -INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.10986804962158203 s -DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=85419056267207516503026081972988258446, time:1750766812.1870546s req_ids:[8] -DEBUG 06-24 20:06:52 [manager.py:391] -ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:199.815034866333ms total_cost_time:199.85675811767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6238 prompt_cache_len:5151 prompt_cache_ratio:0.8257454312279576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 -DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.1072838306427002 s -INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.10918402671813965 s -DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=114045466805254336772843708094003856883, time:1750766812.4078274s req_ids:[8] -DEBUG 06-24 20:06:52 [manager.py:391] -ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:218.57929229736328ms total_cost_time:218.62196922302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6239 prompt_cache_len:5151 prompt_cache_ratio:0.8256130790190735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 -DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.10686635971069336 s -INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.10872793197631836 s -DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=53933632271953530123873540843571492602, time:1750766812.619721s req_ids:[8] -DEBUG 06-24 20:06:52 [manager.py:391] -ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:202.91924476623535ms total_cost_time:202.97908782958984ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:6240 prompt_cache_len:5151 prompt_cache_ratio:0.8254807692307692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 -DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:52 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s -INFO 06-24 20:06:52 [manager.py:68] detokenization recv req id 8 cost time 0.11093783378601074 s -DEBUG 06-24 20:06:52 [manager.py:391] Prefill Batch: batch_id=160183513984481951008537484254434947779, time:1750766812.823167s req_ids:[8] -DEBUG 06-24 20:06:52 [manager.py:391] -ERROR 06-24 20:06:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:195.20235061645508ms total_cost_time:195.25718688964844ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:6241 prompt_cache_len:5151 prompt_cache_ratio:0.8253485018426534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 -DEBUG 06-24 20:06:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10863232612609863 s -INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.11055445671081543 s -DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=166742508698764139519972053962035536941, time:1750766813.0258133s req_ids:[8] -DEBUG 06-24 20:06:53 [manager.py:391] -ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:52 lightllm_req_id:8 first_token_cost:197.1435546875ms total_cost_time:197.18647003173828ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6242 prompt_cache_len:5151 prompt_cache_ratio:0.825216276834348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 -DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:53 [batch.py:51] router release req id 8 -INFO 06-24 20:06:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:06:53 [statics_utils.py:24] mean first cost: 240.26094247783655 ms -INFO 06-24 20:06:53 [statics_utils.py:24] mean per token cost: 0.12467752169683273 ms -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10694599151611328 s -INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.10882019996643066 s -INFO 06-24 20:06:53 [manager.py:620] left req id 8can release False refcount 3 -DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=218407537650659866215547164604819415142, time:1750766813.2296014s req_ids:[8] -DEBUG 06-24 20:06:53 [manager.py:391] -ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:201.85470581054688ms total_cost_time:201.89666748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6243 prompt_cache_len:5151 prompt_cache_ratio:0.8250840941854878 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 -DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:53 [batch.py:51] router release req id 8 -INFO 06-24 20:06:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10700011253356934 s -INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.10888886451721191 s -DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=332238364146753330790334037284437236205, time:1750766813.4536078s req_ids:[8] -DEBUG 06-24 20:06:53 [manager.py:391] -ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:218.49966049194336ms total_cost_time:218.55688095092773ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6244 prompt_cache_len:5151 prompt_cache_ratio:0.8249519538757207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 -DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10814261436462402 s -INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.11003708839416504 s -DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=182149710840249271173379627299389147373, time:1750766813.6589174s req_ids:[8] -DEBUG 06-24 20:06:53 [manager.py:391] -ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:200.4873752593994ms total_cost_time:200.52862167358398ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6245 prompt_cache_len:5151 prompt_cache_ratio:0.8248198558847077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 -DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:53 [manager.py:224] router recive req id 8 cost time 0.10765600204467773 s -INFO 06-24 20:06:53 [manager.py:68] detokenization recv req id 8 cost time 0.1095285415649414 s -DEBUG 06-24 20:06:53 [manager.py:391] Prefill Batch: batch_id=45256018481458876725907852320652525514, time:1750766813.865812s req_ids:[8] -DEBUG 06-24 20:06:53 [manager.py:391] -ERROR 06-24 20:06:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:201.10702514648438ms total_cost_time:201.14874839782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6246 prompt_cache_len:5151 prompt_cache_ratio:0.824687800192123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 -DEBUG 06-24 20:06:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10970616340637207 s -INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.1114950180053711 s -DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=65634498828482663384644033816235928042, time:1750766814.0750844s req_ids:[8] -DEBUG 06-24 20:06:54 [manager.py:391] -ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:53 lightllm_req_id:8 first_token_cost:331.79616928100586ms total_cost_time:331.84051513671875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6247 prompt_cache_len:5151 prompt_cache_ratio:0.8245557867776533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 -DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10823464393615723 s -INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s -DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=332597275972844949813723299032572109235, time:1750766814.4025528s req_ids:[8] -DEBUG 06-24 20:06:54 [manager.py:391] -ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:186.49959564208984ms total_cost_time:186.54298782348633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6248 prompt_cache_len:5151 prompt_cache_ratio:0.8244238156209988 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 -DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10712909698486328 s -INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.10910415649414062 s -DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=130040751941261851027312512392497380335, time:1750766814.6009617s req_ids:[8] -DEBUG 06-24 20:06:54 [manager.py:391] -ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:199.02896881103516ms total_cost_time:199.07259941101074ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6249 prompt_cache_len:5151 prompt_cache_ratio:0.8242918867018723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 -DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:54 [manager.py:224] router recive req id 8 cost time 0.10797381401062012 s -INFO 06-24 20:06:54 [manager.py:68] detokenization recv req id 8 cost time 0.10961174964904785 s -DEBUG 06-24 20:06:54 [manager.py:391] Prefill Batch: batch_id=77434561325384223784516655122473469878, time:1750766814.8076317s req_ids:[8] -DEBUG 06-24 20:06:54 [manager.py:391] -ERROR 06-24 20:06:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:204.2853832244873ms total_cost_time:204.33878898620605ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:6250 prompt_cache_len:5151 prompt_cache_ratio:0.82416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 -DEBUG 06-24 20:06:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10884523391723633 s -INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.11082124710083008 s -DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=105930493638842463816699007718508160007, time:1750766815.017161s req_ids:[8] -DEBUG 06-24 20:06:55 [manager.py:391] -ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:54 lightllm_req_id:8 first_token_cost:204.80632781982422ms total_cost_time:204.8487663269043ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6251 prompt_cache_len:5151 prompt_cache_ratio:0.8240281554951208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 -DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10810351371765137 s -INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.11004281044006348 s -DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=34186843696439327860141355779217284187, time:1750766815.2269428s req_ids:[8] -DEBUG 06-24 20:06:55 [manager.py:391] -ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:202.59785652160645ms total_cost_time:202.64434814453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:6252 prompt_cache_len:5151 prompt_cache_ratio:0.8238963531669866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 -DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.1072838306427002 s -INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.10922074317932129 s -DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=223347132229164407715252728174003840411, time:1750766815.4361951s req_ids:[8] -DEBUG 06-24 20:06:55 [manager.py:391] -ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:213.54317665100098ms total_cost_time:213.58561515808105ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6253 prompt_cache_len:5151 prompt_cache_ratio:0.8237645929953622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 -DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10922884941101074 s -INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.1112070083618164 s -DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=216880376403258175119033497322489105606, time:1750766815.6509798s req_ids:[8] -DEBUG 06-24 20:06:55 [manager.py:391] -ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:196.22468948364258ms total_cost_time:196.26808166503906ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6254 prompt_cache_len:5151 prompt_cache_ratio:0.8236328749600256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 -DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:55 [manager.py:224] router recive req id 8 cost time 0.10836195945739746 s -INFO 06-24 20:06:55 [manager.py:68] detokenization recv req id 8 cost time 0.11027145385742188 s -DEBUG 06-24 20:06:55 [manager.py:391] Prefill Batch: batch_id=210224789270281068689775519564836292732, time:1750766815.856839s req_ids:[8] -DEBUG 06-24 20:06:55 [manager.py:391] -DEBUG 06-24 20:06:55 [stats.py:37] Avg tokens(prompt+generate) throughput: 28485.836 tokens/s -DEBUG 06-24 20:06:55 [stats.py:37] Avg prompt tokens throughput: 28476.698 tokens/s -DEBUG 06-24 20:06:55 [stats.py:37] Avg generate tokens throughput: 9.138 tokens/s -ERROR 06-24 20:06:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:202.15725898742676ms total_cost_time:202.20160484313965ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6255 prompt_cache_len:5151 prompt_cache_ratio:0.8235011990407674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 -DEBUG 06-24 20:06:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.1064293384552002 s -INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10833048820495605 s -DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=10059680914770981924600929935817399605, time:1750766816.0652816s req_ids:[8] -DEBUG 06-24 20:06:56 [manager.py:391] -ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:55 lightllm_req_id:8 first_token_cost:211.09771728515625ms total_cost_time:211.14110946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6256 prompt_cache_len:5151 prompt_cache_ratio:0.8233695652173914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 -DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10664820671081543 s -INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10855364799499512 s -DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=250605462008026985249952580069170181870, time:1750766816.2881517s req_ids:[8] -DEBUG 06-24 20:06:56 [manager.py:391] -DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:06:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:210.81256866455078ms total_cost_time:210.85691452026367ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6257 prompt_cache_len:5151 prompt_cache_ratio:0.823237973469714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 -DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10656118392944336 s -INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.1084437370300293 s -DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=266275702577405762715278288984290961619, time:1750766816.496682s req_ids:[8] -DEBUG 06-24 20:06:56 [manager.py:391] -ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:202.66222953796387ms total_cost_time:202.70442962646484ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6258 prompt_cache_len:5151 prompt_cache_ratio:0.8231064237775647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 -DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10759592056274414 s -INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10963559150695801 s -DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=52680681458027716104963416745762809161, time:1750766816.7081451s req_ids:[8] -DEBUG 06-24 20:06:56 [manager.py:391] -ERROR 06-24 20:06:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:207.322359085083ms total_cost_time:207.3662281036377ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6259 prompt_cache_len:5151 prompt_cache_ratio:0.8229749161207861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 -DEBUG 06-24 20:06:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:56 [batch.py:51] router release req id 8 -INFO 06-24 20:06:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:56 [manager.py:224] router recive req id 8 cost time 0.10671734809875488 s -INFO 06-24 20:06:56 [manager.py:68] detokenization recv req id 8 cost time 0.10856771469116211 s -DEBUG 06-24 20:06:56 [manager.py:391] Prefill Batch: batch_id=161459789615312660008044195983531698770, time:1750766816.9282625s req_ids:[8] -DEBUG 06-24 20:06:56 [manager.py:391] -ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:56 lightllm_req_id:8 first_token_cost:213.0436897277832ms total_cost_time:213.0882740020752ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6260 prompt_cache_len:5151 prompt_cache_ratio:0.8228434504792332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 -DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10694146156311035 s -INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.10882925987243652 s -DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=176693256279375892864322822773122197327, time:1750766817.137513s req_ids:[8] -DEBUG 06-24 20:06:57 [manager.py:391] -ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:366.4078712463379ms total_cost_time:366.45030975341797ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6261 prompt_cache_len:5151 prompt_cache_ratio:0.8227120268327743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 -DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10706925392150879 s -INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.10900592803955078 s -DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=87983332168444301666264180499709868541, time:1750766817.5006573s req_ids:[8] -DEBUG 06-24 20:06:57 [manager.py:391] -ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:196.25568389892578ms total_cost_time:196.29836082458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6262 prompt_cache_len:5151 prompt_cache_ratio:0.8225806451612904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 -DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10747027397155762 s -INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.10936450958251953 s -DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=167020859558833719829316254360198560208, time:1750766817.7020197s req_ids:[8] -DEBUG 06-24 20:06:57 [manager.py:391] -ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:193.99094581604004ms total_cost_time:194.03457641601562ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6263 prompt_cache_len:5151 prompt_cache_ratio:0.8224493054446751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 -DEBUG 06-24 20:06:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:57 [manager.py:224] router recive req id 8 cost time 0.10661840438842773 s -INFO 06-24 20:06:57 [manager.py:68] detokenization recv req id 8 cost time 0.1085209846496582 s -DEBUG 06-24 20:06:57 [manager.py:391] Prefill Batch: batch_id=324110138131552846090648391225548136608, time:1750766817.9055629s req_ids:[8] -DEBUG 06-24 20:06:57 [manager.py:391] -ERROR 06-24 20:06:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:202.94427871704102ms total_cost_time:202.9893398284912ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6264 prompt_cache_len:5151 prompt_cache_ratio:0.8223180076628352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 -DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10723733901977539 s -INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.1091461181640625 s -DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=38251015215320874434302751048638888587, time:1750766818.1147664s req_ids:[8] -DEBUG 06-24 20:06:58 [manager.py:391] -ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:57 lightllm_req_id:8 first_token_cost:196.10095024108887ms total_cost_time:196.14434242248535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6265 prompt_cache_len:5151 prompt_cache_ratio:0.8221867517956903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 -DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s -INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.11023616790771484 s -DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=9660085581430418594847180062588788319, time:1750766818.3151138s req_ids:[8] -DEBUG 06-24 20:06:58 [manager.py:391] -ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:200.51956176757812ms total_cost_time:200.5615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6266 prompt_cache_len:5151 prompt_cache_ratio:0.8220555378231726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 -DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10734724998474121 s -INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.10916566848754883 s -DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=16772820196595019676616217413099751234, time:1750766818.5233934s req_ids:[8] -DEBUG 06-24 20:06:58 [manager.py:391] -ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:205.16347885131836ms total_cost_time:205.20544052124023ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6267 prompt_cache_len:5151 prompt_cache_ratio:0.8219243657252274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 -DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.1067202091217041 s -INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.1086726188659668 s -DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=271289577921490993594641244974333103262, time:1750766818.7322161s req_ids:[8] -DEBUG 06-24 20:06:58 [manager.py:391] -ERROR 06-24 20:06:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:221.5423583984375ms total_cost_time:221.58527374267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6268 prompt_cache_len:5151 prompt_cache_ratio:0.8217932354818124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 -DEBUG 06-24 20:06:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:58 [manager.py:224] router recive req id 8 cost time 0.10800552368164062 s -INFO 06-24 20:06:58 [manager.py:68] detokenization recv req id 8 cost time 0.10998249053955078 s -DEBUG 06-24 20:06:58 [manager.py:391] Prefill Batch: batch_id=124357690349298273630683956741641857356, time:1750766818.9531121s req_ids:[8] -DEBUG 06-24 20:06:58 [manager.py:391] -ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:58 lightllm_req_id:8 first_token_cost:194.4746971130371ms total_cost_time:194.5192813873291ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6269 prompt_cache_len:5151 prompt_cache_ratio:0.8216621470728984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 -DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10786676406860352 s -INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10974884033203125 s -DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=132784692881031485489833489246918940542, time:1750766819.1633801s req_ids:[8] -DEBUG 06-24 20:06:59 [manager.py:391] -ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:209.5623016357422ms total_cost_time:209.60474014282227ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6270 prompt_cache_len:5151 prompt_cache_ratio:0.8215311004784689 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 -DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10734248161315918 s -INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10926032066345215 s -DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=123968070347101568990641047573881143169, time:1750766819.3750129s req_ids:[8] -DEBUG 06-24 20:06:59 [manager.py:391] -ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:212.02874183654785ms total_cost_time:212.07141876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6271 prompt_cache_len:5151 prompt_cache_ratio:0.8214000956785201 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 -DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10727787017822266 s -INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10912442207336426 s -DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=307802305702148274201862609672065561028, time:1750766819.600406s req_ids:[8] -DEBUG 06-24 20:06:59 [manager.py:391] -ERROR 06-24 20:06:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:06:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:06:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:209.84244346618652ms total_cost_time:209.8853588104248ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6272 prompt_cache_len:5151 prompt_cache_ratio:0.8212691326530612 mtp_avg_token_per_step:1.0 -INFO 06-24 20:06:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 -DEBUG 06-24 20:06:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:06:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:06:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:06:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:06:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:06:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:06:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:06:59 [manager.py:224] router recive req id 8 cost time 0.10697102546691895 s -INFO 06-24 20:06:59 [manager.py:68] detokenization recv req id 8 cost time 0.10884475708007812 s -DEBUG 06-24 20:06:59 [manager.py:391] Prefill Batch: batch_id=302351527808587643478256935575585367699, time:1750766819.8060408s req_ids:[8] -DEBUG 06-24 20:06:59 [manager.py:391] -ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:06:59 lightllm_req_id:8 first_token_cost:366.01781845092773ms total_cost_time:366.0621643066406ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6273 prompt_cache_len:5151 prompt_cache_ratio:0.8211382113821138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 -DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s -INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10961246490478516 s -DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=222700841902183682758413966446678588761, time:1750766820.1714184s req_ids:[8] -DEBUG 06-24 20:07:00 [manager.py:391] -ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:199.37920570373535ms total_cost_time:199.42307472229004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6274 prompt_cache_len:5151 prompt_cache_ratio:0.8210073318457125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 -DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10677456855773926 s -INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10863423347473145 s -INFO 06-24 20:07:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=197730867870280356015220970242127861243, time:1750766820.381235s req_ids:[8] -DEBUG 06-24 20:07:00 [manager.py:391] -ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:207.55910873413086ms total_cost_time:207.60202407836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6275 prompt_cache_len:5151 prompt_cache_ratio:0.8208764940239044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 -DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10723495483398438 s -INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10901212692260742 s -DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=233151851286911992311521907088453205328, time:1750766820.5982914s req_ids:[8] -DEBUG 06-24 20:07:00 [manager.py:391] -ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:211.95673942565918ms total_cost_time:211.99917793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6276 prompt_cache_len:5151 prompt_cache_ratio:0.8207456978967496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 -DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:00 [manager.py:224] router recive req id 8 cost time 0.10640764236450195 s -INFO 06-24 20:07:00 [manager.py:68] detokenization recv req id 8 cost time 0.10834455490112305 s -DEBUG 06-24 20:07:00 [manager.py:391] Prefill Batch: batch_id=285505395774361927297530496356465379467, time:1750766820.8113303s req_ids:[8] -DEBUG 06-24 20:07:00 [manager.py:391] -ERROR 06-24 20:07:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:207.43894577026367ms total_cost_time:207.48066902160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6277 prompt_cache_len:5151 prompt_cache_ratio:0.8206149434443205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 -DEBUG 06-24 20:07:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.1066133975982666 s -INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10844779014587402 s -DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=335461953799354196498880748715811272009, time:1750766821.0242538s req_ids:[8] -DEBUG 06-24 20:07:01 [manager.py:391] -ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:00 lightllm_req_id:8 first_token_cost:207.3826789855957ms total_cost_time:207.4275016784668ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6278 prompt_cache_len:5151 prompt_cache_ratio:0.8204842306467027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 -DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10675048828125 s -INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10863375663757324 s -DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=73641268550376281433009704859033172918, time:1750766821.233939s req_ids:[8] -DEBUG 06-24 20:07:01 [manager.py:391] -ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:206.97426795959473ms total_cost_time:207.01980590820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6279 prompt_cache_len:5151 prompt_cache_ratio:0.8203535594839942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 -DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10720539093017578 s -INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10905885696411133 s -DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=171288799635770241690022970561266420750, time:1750766821.4456205s req_ids:[8] -DEBUG 06-24 20:07:01 [manager.py:391] -ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:206.223726272583ms total_cost_time:206.2814235687256ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:6280 prompt_cache_len:5151 prompt_cache_ratio:0.8202229299363057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 -DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10727858543395996 s -INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.1091909408569336 s -DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=42353602582363933174833826509349783660, time:1750766821.6573157s req_ids:[8] -DEBUG 06-24 20:07:01 [manager.py:391] -ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:207.64470100402832ms total_cost_time:207.6873779296875ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6281 prompt_cache_len:5151 prompt_cache_ratio:0.8200923419837606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 -DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:01 [manager.py:224] router recive req id 8 cost time 0.10759615898132324 s -INFO 06-24 20:07:01 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s -DEBUG 06-24 20:07:01 [manager.py:391] Prefill Batch: batch_id=25270987587876083140233900462483169127, time:1750766821.8686085s req_ids:[8] -DEBUG 06-24 20:07:01 [manager.py:391] -DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:206.4368724822998ms total_cost_time:206.4800262451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6282 prompt_cache_len:5151 prompt_cache_ratio:0.8199617956064947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 -DEBUG 06-24 20:07:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10627222061157227 s -INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.10811400413513184 s -DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=79964922483371076737724541484025367847, time:1750766822.0804527s req_ids:[8] -DEBUG 06-24 20:07:02 [manager.py:391] -ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:01 lightllm_req_id:8 first_token_cost:370.09429931640625ms total_cost_time:370.13864517211914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6283 prompt_cache_len:5151 prompt_cache_ratio:0.819831290784657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 -DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s -INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.10991287231445312 s -DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=267673378373259684656720675566839986487, time:1750766822.4501612s req_ids:[8] -DEBUG 06-24 20:07:02 [manager.py:391] -ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:199.59378242492676ms total_cost_time:199.63645935058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6284 prompt_cache_len:5151 prompt_cache_ratio:0.8197008274984087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 -DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10738730430603027 s -INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.1092677116394043 s -DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=85288884119915974765861794932711927272, time:1750766822.6613731s req_ids:[8] -DEBUG 06-24 20:07:02 [manager.py:391] -ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:206.69126510620117ms total_cost_time:206.71892166137695ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:6285 prompt_cache_len:5151 prompt_cache_ratio:0.8195704057279236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 -DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:02 [manager.py:224] router recive req id 8 cost time 0.10660934448242188 s -INFO 06-24 20:07:02 [manager.py:68] detokenization recv req id 8 cost time 0.10843873023986816 s -DEBUG 06-24 20:07:02 [manager.py:391] Prefill Batch: batch_id=250322631767396809232977903781263862389, time:1750766822.8744023s req_ids:[8] -DEBUG 06-24 20:07:02 [manager.py:391] -ERROR 06-24 20:07:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:209.98477935791016ms total_cost_time:210.01172065734863ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6286 prompt_cache_len:5151 prompt_cache_ratio:0.8194400254533885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 -DEBUG 06-24 20:07:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10694265365600586 s -INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10878276824951172 s -DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=177574486411429186707302830692711678300, time:1750766823.086578s req_ids:[8] -DEBUG 06-24 20:07:03 [manager.py:391] -ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:02 lightllm_req_id:8 first_token_cost:207.69429206848145ms total_cost_time:207.72242546081543ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:6287 prompt_cache_len:5151 prompt_cache_ratio:0.8193096866550024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 -DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10679078102111816 s -INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10862374305725098 s -DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=51277520975690333125523558163218814845, time:1750766823.3048594s req_ids:[8] -DEBUG 06-24 20:07:03 [manager.py:391] -ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:214.9331569671631ms total_cost_time:214.96081352233887ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:6288 prompt_cache_len:5151 prompt_cache_ratio:0.8191793893129771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 -DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10646820068359375 s -INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10824942588806152 s -DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=5766265000539410180438159610318032100, time:1750766823.5238473s req_ids:[8] -DEBUG 06-24 20:07:03 [manager.py:391] -ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:175.83990097045898ms total_cost_time:175.86827278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:6289 prompt_cache_len:5151 prompt_cache_ratio:0.8190491334075369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 -DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10739779472351074 s -INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.10916614532470703 s -DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=16684104428779221276763783555591099387, time:1750766823.692883s req_ids:[8] -DEBUG 06-24 20:07:03 [manager.py:391] -ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:216.68529510498047ms total_cost_time:216.71175956726074ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:6290 prompt_cache_len:5151 prompt_cache_ratio:0.8189189189189189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 -DEBUG 06-24 20:07:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:03 [batch.py:51] router release req id 8 -INFO 06-24 20:07:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:03 [manager.py:224] router recive req id 8 cost time 0.10528826713562012 s -INFO 06-24 20:07:03 [manager.py:68] detokenization recv req id 8 cost time 0.1070716381072998 s -DEBUG 06-24 20:07:03 [manager.py:391] Prefill Batch: batch_id=24762815047925415713503284958301536277, time:1750766823.911648s req_ids:[8] -DEBUG 06-24 20:07:03 [manager.py:391] -ERROR 06-24 20:07:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:197.3257064819336ms total_cost_time:197.35407829284668ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:6291 prompt_cache_len:5151 prompt_cache_ratio:0.8187887458273725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 -DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.10687541961669922 s -INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.10869812965393066 s -DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=255011625029333719041046047767468977013, time:1750766824.1245944s req_ids:[8] -DEBUG 06-24 20:07:04 [manager.py:391] -ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:03 lightllm_req_id:8 first_token_cost:214.02645111083984ms total_cost_time:214.05529975891113ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:6292 prompt_cache_len:5151 prompt_cache_ratio:0.8186586141131595 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 -DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.10755395889282227 s -INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.10945653915405273 s -DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=266104762228652156360118773751536407822, time:1750766824.3392954s req_ids:[8] -DEBUG 06-24 20:07:04 [manager.py:391] -ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:209.02752876281738ms total_cost_time:209.05494689941406ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:6293 prompt_cache_len:5151 prompt_cache_ratio:0.8185285237565549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 -DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.10691380500793457 s -INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.10870242118835449 s -DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=97126666895751136968492784880361397351, time:1750766824.5633032s req_ids:[8] -DEBUG 06-24 20:07:04 [manager.py:391] -ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:219.9697494506836ms total_cost_time:220.01242637634277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6294 prompt_cache_len:5151 prompt_cache_ratio:0.8183984747378455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 -DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:04 [manager.py:224] router recive req id 8 cost time 0.2067258358001709 s -INFO 06-24 20:07:04 [manager.py:68] detokenization recv req id 8 cost time 0.20825886726379395 s -DEBUG 06-24 20:07:04 [manager.py:391] Prefill Batch: batch_id=51649544520622392456444384656326100225, time:1750766824.8679576s req_ids:[8] -DEBUG 06-24 20:07:04 [manager.py:391] -DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:257.5376033782959ms total_cost_time:257.56239891052246ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:6295 prompt_cache_len:5151 prompt_cache_ratio:0.8182684670373312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 -DEBUG 06-24 20:07:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10592269897460938 s -INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10780072212219238 s -DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=96468283147581372195952446556949833115, time:1750766825.028596s req_ids:[8] -DEBUG 06-24 20:07:05 [manager.py:391] -ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:04 lightllm_req_id:8 first_token_cost:193.63164901733398ms total_cost_time:193.65978240966797ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:6296 prompt_cache_len:5151 prompt_cache_ratio:0.818138500635324 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 -DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10592317581176758 s -INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.1077120304107666 s -DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=120227863708073623699945365080452543162, time:1750766825.2335815s req_ids:[8] -DEBUG 06-24 20:07:05 [manager.py:391] -ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:203.60136032104492ms total_cost_time:203.64618301391602ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6297 prompt_cache_len:5151 prompt_cache_ratio:0.8180085755121487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 -DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.1078500747680664 s -INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10978126525878906 s -DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=276007112557467347048263257914014445458, time:1750766825.4390893s req_ids:[8] -DEBUG 06-24 20:07:05 [manager.py:391] -ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:205.16705513000488ms total_cost_time:205.20925521850586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6298 prompt_cache_len:5151 prompt_cache_ratio:0.8178786916481423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 -DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10771870613098145 s -INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10953354835510254 s -DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=298385317018520693020262406703948482418, time:1750766825.6500566s req_ids:[8] -DEBUG 06-24 20:07:05 [manager.py:391] -ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:205.94000816345215ms total_cost_time:205.98602294921875ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6299 prompt_cache_len:5151 prompt_cache_ratio:0.8177488490236545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 -DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:05 [manager.py:224] router recive req id 8 cost time 0.10706138610839844 s -INFO 06-24 20:07:05 [manager.py:68] detokenization recv req id 8 cost time 0.10888218879699707 s -DEBUG 06-24 20:07:05 [manager.py:391] Prefill Batch: batch_id=103505822365411880605617934658433225513, time:1750766825.8600256s req_ids:[8] -DEBUG 06-24 20:07:05 [manager.py:391] -DEBUG 06-24 20:07:05 [stats.py:37] Avg tokens(prompt+generate) throughput: 28250.717 tokens/s -DEBUG 06-24 20:07:05 [stats.py:37] Avg prompt tokens throughput: 28241.720 tokens/s -DEBUG 06-24 20:07:05 [stats.py:37] Avg generate tokens throughput: 8.997 tokens/s -ERROR 06-24 20:07:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:206.23183250427246ms total_cost_time:206.27522468566895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6300 prompt_cache_len:5151 prompt_cache_ratio:0.8176190476190476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 -DEBUG 06-24 20:07:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.10689926147460938 s -INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10864400863647461 s -DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=230687595483476645233685325472748887999, time:1750766826.0723994s req_ids:[8] -DEBUG 06-24 20:07:06 [manager.py:391] -ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:05 lightllm_req_id:8 first_token_cost:204.40101623535156ms total_cost_time:204.44464683532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6301 prompt_cache_len:5151 prompt_cache_ratio:0.8174892874146961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 -DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.10797691345214844 s -INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10982036590576172 s -DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=182785419055422512682711868491062587720, time:1750766826.2811992s req_ids:[8] -DEBUG 06-24 20:07:06 [manager.py:391] -ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:206.9571018218994ms total_cost_time:207.01074600219727ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:6302 prompt_cache_len:5151 prompt_cache_ratio:0.817359568390987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 -DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.1086881160736084 s -INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.1105799674987793 s -DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=297151291847982058467551477555438132616, time:1750766826.4940038s req_ids:[8] -DEBUG 06-24 20:07:06 [manager.py:391] -ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:205.19685745239258ms total_cost_time:205.23977279663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6303 prompt_cache_len:5151 prompt_cache_ratio:0.8172298905283198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 -DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.10636687278747559 s -INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10826826095581055 s -DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=37990712776260456808002344871438098923, time:1750766826.7081707s req_ids:[8] -DEBUG 06-24 20:07:06 [manager.py:391] -ERROR 06-24 20:07:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:210.42275428771973ms total_cost_time:210.4642391204834ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6304 prompt_cache_len:5151 prompt_cache_ratio:0.8171002538071066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 -DEBUG 06-24 20:07:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:06 [manager.py:224] router recive req id 8 cost time 0.1074073314666748 s -INFO 06-24 20:07:06 [manager.py:68] detokenization recv req id 8 cost time 0.10922908782958984 s -DEBUG 06-24 20:07:06 [manager.py:391] Prefill Batch: batch_id=274444876454159010134984836494717448998, time:1750766826.920513s req_ids:[8] -DEBUG 06-24 20:07:06 [manager.py:391] -ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:06 lightllm_req_id:8 first_token_cost:205.49678802490234ms total_cost_time:205.54018020629883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6305 prompt_cache_len:5151 prompt_cache_ratio:0.8169706582077716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 -DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.10774493217468262 s -INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.10967612266540527 s -DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=178641240275241983487802954710813495574, time:1750766827.1304538s req_ids:[8] -DEBUG 06-24 20:07:07 [manager.py:391] -ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:205.20257949829102ms total_cost_time:205.24930953979492ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:6306 prompt_cache_len:5151 prompt_cache_ratio:0.8168411037107517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 -DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.10707879066467285 s -INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.10895323753356934 s -DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=17589881590357641309354845225860252106, time:1750766827.3413534s req_ids:[8] -DEBUG 06-24 20:07:07 [manager.py:391] -ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:356.14490509033203ms total_cost_time:356.189489364624ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6307 prompt_cache_len:5151 prompt_cache_ratio:0.816711590296496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 -DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.1071619987487793 s -INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.10892271995544434 s -DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=163581982390625610938982657172341055317, time:1750766827.6944473s req_ids:[8] -DEBUG 06-24 20:07:07 [manager.py:391] -ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:154.15120124816895ms total_cost_time:154.19244766235352ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6308 prompt_cache_len:5151 prompt_cache_ratio:0.8165821179454661 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 -DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:07 [manager.py:224] router recive req id 8 cost time 0.10689616203308105 s -INFO 06-24 20:07:07 [manager.py:68] detokenization recv req id 8 cost time 0.1084136962890625 s -DEBUG 06-24 20:07:07 [manager.py:391] Prefill Batch: batch_id=191572045227264855488903072312529771122, time:1750766827.855373s req_ids:[8] -DEBUG 06-24 20:07:07 [manager.py:391] -ERROR 06-24 20:07:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:154.72054481506348ms total_cost_time:154.76369857788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6309 prompt_cache_len:5151 prompt_cache_ratio:0.816452686638136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 -DEBUG 06-24 20:07:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10703372955322266 s -INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.10900568962097168 s -DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=132789265975471341735305218110494931049, time:1750766828.015344s req_ids:[8] -DEBUG 06-24 20:07:08 [manager.py:391] -ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:07 lightllm_req_id:8 first_token_cost:191.03670120239258ms total_cost_time:191.08033180236816ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6310 prompt_cache_len:5151 prompt_cache_ratio:0.8163232963549921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 -DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10677742958068848 s -INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.10879850387573242 s -DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=192255500328984601628079668141315857591, time:1750766828.2172751s req_ids:[8] -DEBUG 06-24 20:07:08 [manager.py:391] -ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:202.1007537841797ms total_cost_time:202.14414596557617ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6311 prompt_cache_len:5151 prompt_cache_ratio:0.816193947076533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 -DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10811519622802734 s -INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.1100454330444336 s -DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=18908211197279030093139072172310386004, time:1750766828.4267254s req_ids:[8] -DEBUG 06-24 20:07:08 [manager.py:391] -ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:205.25383949279785ms total_cost_time:205.29580116271973ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6312 prompt_cache_len:5151 prompt_cache_ratio:0.81606463878327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 -DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.1078639030456543 s -INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s -DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=304734313958465676574906816411856615702, time:1750766828.6378846s req_ids:[8] -DEBUG 06-24 20:07:08 [manager.py:391] -ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:203.75776290893555ms total_cost_time:203.80187034606934ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6313 prompt_cache_len:5151 prompt_cache_ratio:0.8159353714557263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 -DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:08 [manager.py:224] router recive req id 8 cost time 0.10929536819458008 s -INFO 06-24 20:07:08 [manager.py:68] detokenization recv req id 8 cost time 0.11126470565795898 s -DEBUG 06-24 20:07:08 [manager.py:391] Prefill Batch: batch_id=302046486410946255995924752177643996239, time:1750766828.846932s req_ids:[8] -DEBUG 06-24 20:07:08 [manager.py:391] -ERROR 06-24 20:07:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:206.14910125732422ms total_cost_time:206.19463920593262ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6314 prompt_cache_len:5151 prompt_cache_ratio:0.8158061450744377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 -DEBUG 06-24 20:07:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.1069483757019043 s -INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.108917236328125 s -DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=162904604133774740698790562668641650740, time:1750766829.0587995s req_ids:[8] -DEBUG 06-24 20:07:09 [manager.py:391] -ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:08 lightllm_req_id:8 first_token_cost:204.95963096618652ms total_cost_time:205.0023078918457ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6315 prompt_cache_len:5151 prompt_cache_ratio:0.8156769596199525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 -DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10761833190917969 s -INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10947585105895996 s -DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=25582089516851104834398826522381157047, time:1750766829.2689612s req_ids:[8] -DEBUG 06-24 20:07:09 [manager.py:391] -ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:205.61909675598145ms total_cost_time:205.66201210021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6316 prompt_cache_len:5151 prompt_cache_ratio:0.8155478150728309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 -DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10694742202758789 s -INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10881257057189941 s -DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=287963106526761434012182075965223740303, time:1750766829.4804862s req_ids:[8] -DEBUG 06-24 20:07:09 [manager.py:391] -ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:203.62019538879395ms total_cost_time:203.66215705871582ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6317 prompt_cache_len:5151 prompt_cache_ratio:0.8154187114136457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 -DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10693550109863281 s -INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10880184173583984 s -DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=210133155114166104210233651792608782672, time:1750766829.6901078s req_ids:[8] -DEBUG 06-24 20:07:09 [manager.py:391] -ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:207.33046531677246ms total_cost_time:207.37385749816895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6318 prompt_cache_len:5151 prompt_cache_ratio:0.8152896486229819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 -DEBUG 06-24 20:07:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:09 [manager.py:224] router recive req id 8 cost time 0.10802721977233887 s -INFO 06-24 20:07:09 [manager.py:68] detokenization recv req id 8 cost time 0.10995125770568848 s -DEBUG 06-24 20:07:09 [manager.py:391] Prefill Batch: batch_id=189851099262274948310761481398596293227, time:1750766829.9012098s req_ids:[8] -DEBUG 06-24 20:07:09 [manager.py:391] -ERROR 06-24 20:07:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:205.64889907836914ms total_cost_time:205.69419860839844ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6319 prompt_cache_len:5151 prompt_cache_ratio:0.815160626681437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 -DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.10736727714538574 s -INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.10868597030639648 s -DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=312173652376760028409097280408613978126, time:1750766830.1109817s req_ids:[8] -DEBUG 06-24 20:07:10 [manager.py:391] -DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:09 lightllm_req_id:8 first_token_cost:368.61443519592285ms total_cost_time:368.6566352844238ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6320 prompt_cache_len:5151 prompt_cache_ratio:0.8150316455696203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 -DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.1066122055053711 s -INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.1091306209564209 s -DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=88423840053187159557205621470393843093, time:1750766830.4774318s req_ids:[8] -DEBUG 06-24 20:07:10 [manager.py:391] -ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:196.0165500640869ms total_cost_time:196.0604190826416ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6321 prompt_cache_len:5151 prompt_cache_ratio:0.8149027052681538 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 -DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.10685968399047852 s -INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.10883116722106934 s -DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=333099592941879735870532094106130606005, time:1750766830.6868808s req_ids:[8] -DEBUG 06-24 20:07:10 [manager.py:391] -ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:209.0911865234375ms total_cost_time:209.13386344909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6322 prompt_cache_len:5151 prompt_cache_ratio:0.8147738057576717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 -DEBUG 06-24 20:07:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:10 [manager.py:224] router recive req id 8 cost time 0.10683274269104004 s -INFO 06-24 20:07:10 [manager.py:68] detokenization recv req id 8 cost time 0.10876750946044922 s -DEBUG 06-24 20:07:10 [manager.py:391] Prefill Batch: batch_id=163465259530753467948429459369948400073, time:1750766830.899803s req_ids:[8] -DEBUG 06-24 20:07:10 [manager.py:391] -ERROR 06-24 20:07:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:205.69252967834473ms total_cost_time:205.73878288269043ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6323 prompt_cache_len:5151 prompt_cache_ratio:0.8146449470188202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 -DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:11 [batch.py:51] router release req id 8 -INFO 06-24 20:07:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10798096656799316 s -INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.11010384559631348 s -DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=202894628804384893588529496768088320187, time:1750766831.1116807s req_ids:[8] -DEBUG 06-24 20:07:11 [manager.py:391] -ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:10 lightllm_req_id:8 first_token_cost:207.88121223449707ms total_cost_time:207.92341232299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6324 prompt_cache_len:5151 prompt_cache_ratio:0.8145161290322581 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 -DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10676240921020508 s -INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10882115364074707 s -DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=177097290730940882150571417584864333509, time:1750766831.3237572s req_ids:[8] -DEBUG 06-24 20:07:11 [manager.py:391] -ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:205.39617538452148ms total_cost_time:205.43885231018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6325 prompt_cache_len:5151 prompt_cache_ratio:0.8143873517786562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 -DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10675740242004395 s -INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10855460166931152 s -DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=110014469419275104128822460684444345126, time:1750766831.5341318s req_ids:[8] -DEBUG 06-24 20:07:11 [manager.py:391] -ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:206.7577838897705ms total_cost_time:206.8004608154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6326 prompt_cache_len:5151 prompt_cache_ratio:0.8142586152386975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 -DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10686016082763672 s -INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10878109931945801 s -DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=257364788617188246510131075627668714136, time:1750766831.7453012s req_ids:[8] -DEBUG 06-24 20:07:11 [manager.py:391] -ERROR 06-24 20:07:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:205.37066459655762ms total_cost_time:205.4131031036377ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6327 prompt_cache_len:5151 prompt_cache_ratio:0.8141299193930773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 -DEBUG 06-24 20:07:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:11 [manager.py:224] router recive req id 8 cost time 0.10708928108215332 s -INFO 06-24 20:07:11 [manager.py:68] detokenization recv req id 8 cost time 0.10901355743408203 s -DEBUG 06-24 20:07:11 [manager.py:391] Prefill Batch: batch_id=325648488257159057133851401469301939451, time:1750766831.9559932s req_ids:[8] -DEBUG 06-24 20:07:11 [manager.py:391] -ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:11 lightllm_req_id:8 first_token_cost:207.09633827209473ms total_cost_time:207.1387767791748ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6328 prompt_cache_len:5151 prompt_cache_ratio:0.8140012642225032 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 -DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10672450065612793 s -INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10849714279174805 s -DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=132453781875957037588690724459947097817, time:1750766832.1788855s req_ids:[8] -DEBUG 06-24 20:07:12 [manager.py:391] -ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:214.79010581970215ms total_cost_time:214.83278274536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6329 prompt_cache_len:5151 prompt_cache_ratio:0.8138726497076947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 -DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10647225379943848 s -INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10846161842346191 s -DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=323409448048934599318098732198484163370, time:1750766832.3898735s req_ids:[8] -DEBUG 06-24 20:07:12 [manager.py:391] -ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:207.00860023498535ms total_cost_time:207.05223083496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6330 prompt_cache_len:5151 prompt_cache_ratio:0.8137440758293839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 -DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10744881629943848 s -INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10941863059997559 s -DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=322698242599262235056343798566421410464, time:1750766832.6010017s req_ids:[8] -DEBUG 06-24 20:07:12 [manager.py:391] -ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:201.85160636901855ms total_cost_time:201.89380645751953ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6331 prompt_cache_len:5151 prompt_cache_ratio:0.8136155425683146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 -DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:12 [manager.py:224] router recive req id 8 cost time 0.10683369636535645 s -INFO 06-24 20:07:12 [manager.py:68] detokenization recv req id 8 cost time 0.10871744155883789 s -DEBUG 06-24 20:07:12 [manager.py:391] Prefill Batch: batch_id=262486461884198547315762310011626530634, time:1750766832.8103404s req_ids:[8] -DEBUG 06-24 20:07:12 [manager.py:391] -ERROR 06-24 20:07:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:207.00478553771973ms total_cost_time:207.0457935333252ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6332 prompt_cache_len:5151 prompt_cache_ratio:0.8134870499052432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 -DEBUG 06-24 20:07:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10806059837341309 s -INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10994625091552734 s -DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=163989098569810015552727274174789614259, time:1750766833.0214827s req_ids:[8] -DEBUG 06-24 20:07:13 [manager.py:391] -ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:12 lightllm_req_id:8 first_token_cost:206.1619758605957ms total_cost_time:206.2058448791504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6333 prompt_cache_len:5151 prompt_cache_ratio:0.8133585978209379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 -DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10739779472351074 s -INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10926175117492676 s -DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=209797224307846317215786823261265272329, time:1750766833.2323744s req_ids:[8] -DEBUG 06-24 20:07:13 [manager.py:391] -ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:407.4885845184326ms total_cost_time:407.53173828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6334 prompt_cache_len:5151 prompt_cache_ratio:0.8132301862961794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 -DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10700106620788574 s -INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10889363288879395 s -DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=60083835334385541856362906243124156144, time:1750766833.6380823s req_ids:[8] -DEBUG 06-24 20:07:13 [manager.py:391] -ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:198.8966464996338ms total_cost_time:198.93908500671387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6335 prompt_cache_len:5151 prompt_cache_ratio:0.8131018153117601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 -DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:13 [manager.py:224] router recive req id 8 cost time 0.10657310485839844 s -INFO 06-24 20:07:13 [manager.py:68] detokenization recv req id 8 cost time 0.10898518562316895 s -DEBUG 06-24 20:07:13 [manager.py:391] Prefill Batch: batch_id=293504249439672959360506913917727065091, time:1750766833.8493621s req_ids:[8] -DEBUG 06-24 20:07:13 [manager.py:391] -ERROR 06-24 20:07:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:207.66305923461914ms total_cost_time:207.70668983459473ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6336 prompt_cache_len:5151 prompt_cache_ratio:0.8129734848484849 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 -DEBUG 06-24 20:07:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10710978507995605 s -INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.10901498794555664 s -DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=87974495286386069027186687485462158597, time:1750766834.0613477s req_ids:[8] -DEBUG 06-24 20:07:14 [manager.py:391] -ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:13 lightllm_req_id:8 first_token_cost:205.99126815795898ms total_cost_time:206.0372829437256ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6337 prompt_cache_len:5151 prompt_cache_ratio:0.8128451948871706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 -DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.1072545051574707 s -INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.10909748077392578 s -DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=143219557366296097436126848172060224175, time:1750766834.2725668s req_ids:[8] -DEBUG 06-24 20:07:14 [manager.py:391] -ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:206.35056495666504ms total_cost_time:206.39371871948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6338 prompt_cache_len:5151 prompt_cache_ratio:0.8127169454086463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 -DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s -INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.11014318466186523 s -DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=173361394412458330951528551107039917015, time:1750766834.4885712s req_ids:[8] -DEBUG 06-24 20:07:14 [manager.py:391] -ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:208.27817916870117ms total_cost_time:208.32443237304688ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6339 prompt_cache_len:5151 prompt_cache_ratio:0.812588736393753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 -DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10757327079772949 s -INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.10956501960754395 s -DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=292497859625989229548108690944400072367, time:1750766834.695327s req_ids:[8] -DEBUG 06-24 20:07:14 [manager.py:391] -ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:205.98602294921875ms total_cost_time:206.02655410766602ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:6340 prompt_cache_len:5151 prompt_cache_ratio:0.8124605678233439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 -DEBUG 06-24 20:07:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:14 [manager.py:224] router recive req id 8 cost time 0.10669374465942383 s -INFO 06-24 20:07:14 [manager.py:68] detokenization recv req id 8 cost time 0.1086270809173584 s -DEBUG 06-24 20:07:14 [manager.py:391] Prefill Batch: batch_id=35339774385412500604545992424730141804, time:1750766834.904591s req_ids:[8] -DEBUG 06-24 20:07:14 [manager.py:391] -ERROR 06-24 20:07:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:200.30927658081055ms total_cost_time:200.3500461578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:6341 prompt_cache_len:5151 prompt_cache_ratio:0.8123324396782842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 -DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10723614692687988 s -INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10921382904052734 s -DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=202134237878158183597786875001077339014, time:1750766835.110387s req_ids:[8] -DEBUG 06-24 20:07:15 [manager.py:391] -ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:14 lightllm_req_id:8 first_token_cost:205.54113388061523ms total_cost_time:205.5821418762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6342 prompt_cache_len:5151 prompt_cache_ratio:0.8122043519394513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 -DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s -INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.1097257137298584 s -DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=88428806570050185851345105856444148325, time:1750766835.321182s req_ids:[8] -DEBUG 06-24 20:07:15 [manager.py:391] -ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:204.392671585083ms total_cost_time:204.4353485107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6343 prompt_cache_len:5151 prompt_cache_ratio:0.8120763045877345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 -DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10690093040466309 s -INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10879397392272949 s -DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=218191751874392554043143463791311183068, time:1750766835.529388s req_ids:[8] -DEBUG 06-24 20:07:15 [manager.py:391] -ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:203.19056510925293ms total_cost_time:203.2334804534912ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6344 prompt_cache_len:5151 prompt_cache_ratio:0.8119482976040353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 -DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10767173767089844 s -INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10957527160644531 s -DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=309177496473773233353882741587049419028, time:1750766835.738482s req_ids:[8] -DEBUG 06-24 20:07:15 [manager.py:391] -DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:205.7363986968994ms total_cost_time:205.7802677154541ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6345 prompt_cache_len:5151 prompt_cache_ratio:0.8118203309692671 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 -DEBUG 06-24 20:07:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:15 [manager.py:224] router recive req id 8 cost time 0.10633349418640137 s -INFO 06-24 20:07:15 [manager.py:68] detokenization recv req id 8 cost time 0.10826826095581055 s -DEBUG 06-24 20:07:15 [manager.py:391] Prefill Batch: batch_id=294852681012369532813217791210735018956, time:1750766835.9492528s req_ids:[8] -DEBUG 06-24 20:07:15 [manager.py:391] -DEBUG 06-24 20:07:15 [stats.py:37] Avg tokens(prompt+generate) throughput: 28840.002 tokens/s -DEBUG 06-24 20:07:15 [stats.py:37] Avg prompt tokens throughput: 28830.884 tokens/s -DEBUG 06-24 20:07:15 [stats.py:37] Avg generate tokens throughput: 9.119 tokens/s -ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:15 lightllm_req_id:8 first_token_cost:367.66576766967773ms total_cost_time:367.71154403686523ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6346 prompt_cache_len:5151 prompt_cache_ratio:0.8116924046643555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 -DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.1073160171508789 s -INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.10920143127441406 s -DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=120921729330711434707927188879017047718, time:1750766836.3149257s req_ids:[8] -DEBUG 06-24 20:07:16 [manager.py:391] -INFO 06-24 20:07:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:198.17256927490234ms total_cost_time:198.21524620056152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6347 prompt_cache_len:5151 prompt_cache_ratio:0.8115645186702379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 -DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.10835099220275879 s -INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.11026167869567871 s -DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=141646032115225800210923034905016704361, time:1750766836.529948s req_ids:[8] -DEBUG 06-24 20:07:16 [manager.py:391] -ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:212.98837661743164ms total_cost_time:213.0296230316162ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6348 prompt_cache_len:5151 prompt_cache_ratio:0.8114366729678639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 -DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.10669851303100586 s -INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.10860323905944824 s -DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=166448275712807393193990223908205624051, time:1750766836.7471905s req_ids:[8] -DEBUG 06-24 20:07:16 [manager.py:391] -ERROR 06-24 20:07:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:212.4464511871338ms total_cost_time:212.48817443847656ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6349 prompt_cache_len:5151 prompt_cache_ratio:0.811308867538195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 -DEBUG 06-24 20:07:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:16 [manager.py:224] router recive req id 8 cost time 0.10805416107177734 s -INFO 06-24 20:07:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099860668182373 s -DEBUG 06-24 20:07:16 [manager.py:391] Prefill Batch: batch_id=139501197248710750614944743135828879782, time:1750766836.9598958s req_ids:[8] -DEBUG 06-24 20:07:16 [manager.py:391] -ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:16 lightllm_req_id:8 first_token_cost:209.63597297668457ms total_cost_time:209.67817306518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6350 prompt_cache_len:5151 prompt_cache_ratio:0.8111811023622048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 -DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10668635368347168 s -INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10855841636657715 s -DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=112490514022899890125824570279857099747, time:1750766837.1794329s req_ids:[8] -DEBUG 06-24 20:07:17 [manager.py:391] -ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:213.2546901702881ms total_cost_time:213.29665184020996ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6351 prompt_cache_len:5151 prompt_cache_ratio:0.8110533774208786 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 -DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10648345947265625 s -INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10846996307373047 s -DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=220229294329623826670807193264817125641, time:1750766837.391519s req_ids:[8] -DEBUG 06-24 20:07:17 [manager.py:391] -ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:205.63578605651855ms total_cost_time:205.67798614501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6352 prompt_cache_len:5151 prompt_cache_ratio:0.8109256926952141 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 -DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10703635215759277 s -INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10911345481872559 s -DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=16023825690020712549425117917313100034, time:1750766837.602196s req_ids:[8] -DEBUG 06-24 20:07:17 [manager.py:391] -ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:204.77032661437988ms total_cost_time:204.833984375ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:6353 prompt_cache_len:5151 prompt_cache_ratio:0.8107980481662207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 -DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:17 [manager.py:224] router recive req id 8 cost time 0.10665130615234375 s -INFO 06-24 20:07:17 [manager.py:68] detokenization recv req id 8 cost time 0.10867047309875488 s -DEBUG 06-24 20:07:17 [manager.py:391] Prefill Batch: batch_id=95677870683987626635732540697801840946, time:1750766837.8132424s req_ids:[8] -DEBUG 06-24 20:07:17 [manager.py:391] -ERROR 06-24 20:07:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:208.05835723876953ms total_cost_time:208.1000804901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6354 prompt_cache_len:5151 prompt_cache_ratio:0.8106704438149197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 -DEBUG 06-24 20:07:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.1080622673034668 s -INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.10997867584228516 s -DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=123627088178139103949308910765401113629, time:1750766838.024191s req_ids:[8] -DEBUG 06-24 20:07:18 [manager.py:391] -ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:17 lightllm_req_id:8 first_token_cost:202.74758338928223ms total_cost_time:202.7890682220459ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6355 prompt_cache_len:5151 prompt_cache_ratio:0.8105428796223446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 -DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.1071023941040039 s -INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.10908746719360352 s -DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=123847528957609546147921281445750786101, time:1750766838.2357662s req_ids:[8] -DEBUG 06-24 20:07:18 [manager.py:391] -ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:382.43579864501953ms total_cost_time:382.4808597564697ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6356 prompt_cache_len:5151 prompt_cache_ratio:0.8104153555695406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 -DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.10815548896789551 s -INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.11003828048706055 s -DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=62133768182538830403883456505768189413, time:1750766838.6142645s req_ids:[8] -DEBUG 06-24 20:07:18 [manager.py:391] -ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:193.8459873199463ms total_cost_time:193.90177726745605ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:6357 prompt_cache_len:5151 prompt_cache_ratio:0.8102878716375649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 -DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:18 [manager.py:224] router recive req id 8 cost time 0.10763406753540039 s -INFO 06-24 20:07:18 [manager.py:68] detokenization recv req id 8 cost time 0.10969257354736328 s -DEBUG 06-24 20:07:18 [manager.py:391] Prefill Batch: batch_id=179135501778210700476988432898635020351, time:1750766838.819131s req_ids:[8] -DEBUG 06-24 20:07:18 [manager.py:391] -ERROR 06-24 20:07:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:204.12755012512207ms total_cost_time:204.17094230651855ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6358 prompt_cache_len:5151 prompt_cache_ratio:0.8101604278074866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 -DEBUG 06-24 20:07:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10785388946533203 s -INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10979819297790527 s -DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=65984342547863205415590248220489148013, time:1750766839.0274625s req_ids:[8] -DEBUG 06-24 20:07:19 [manager.py:391] -ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:18 lightllm_req_id:8 first_token_cost:199.05638694763184ms total_cost_time:199.09906387329102ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6359 prompt_cache_len:5151 prompt_cache_ratio:0.8100330240603869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 -DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10671067237854004 s -INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10861539840698242 s -DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=47975082915100702174733019989173075689, time:1750766839.2327614s req_ids:[8] -DEBUG 06-24 20:07:19 [manager.py:391] -ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:201.033353805542ms total_cost_time:201.07531547546387ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6360 prompt_cache_len:5151 prompt_cache_ratio:0.8099056603773584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 -DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10798478126525879 s -INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s -DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=243077904758446502764099184827245061155, time:1750766839.4368393s req_ids:[8] -DEBUG 06-24 20:07:19 [manager.py:391] -ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:201.42841339111328ms total_cost_time:201.47037506103516ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6361 prompt_cache_len:5151 prompt_cache_ratio:0.8097783367395064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 -DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10754156112670898 s -INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10959219932556152 s -DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=205123648775903889972532964438178937365, time:1750766839.6459172s req_ids:[8] -DEBUG 06-24 20:07:19 [manager.py:391] -ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:202.6822566986084ms total_cost_time:202.72493362426758ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6362 prompt_cache_len:5151 prompt_cache_ratio:0.8096510531279472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 -DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:19 [manager.py:224] router recive req id 8 cost time 0.10790205001831055 s -INFO 06-24 20:07:19 [manager.py:68] detokenization recv req id 8 cost time 0.10994124412536621 s -DEBUG 06-24 20:07:19 [manager.py:391] Prefill Batch: batch_id=134284420754432855497992682180999440343, time:1750766839.8544524s req_ids:[8] -DEBUG 06-24 20:07:19 [manager.py:391] -ERROR 06-24 20:07:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:205.1706314086914ms total_cost_time:205.2140235900879ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6363 prompt_cache_len:5151 prompt_cache_ratio:0.8095238095238095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 -DEBUG 06-24 20:07:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10717129707336426 s -INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.10926532745361328 s -DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=184744521303190872415718353400539099429, time:1750766840.0665824s req_ids:[8] -DEBUG 06-24 20:07:20 [manager.py:391] -ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:19 lightllm_req_id:8 first_token_cost:207.67641067504883ms total_cost_time:207.7198028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6364 prompt_cache_len:5151 prompt_cache_ratio:0.8093966059082338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 -DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10633111000061035 s -INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.10866570472717285 s -DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=101552565110780088886243180887610041320, time:1750766840.2785263s req_ids:[8] -DEBUG 06-24 20:07:20 [manager.py:391] -ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:182.85560607910156ms total_cost_time:182.89899826049805ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6365 prompt_cache_len:5151 prompt_cache_ratio:0.8092694422623723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 -DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10821270942687988 s -INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.1102297306060791 s -DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=90239129122698199280613024996017023570, time:1750766840.4611707s req_ids:[8] -DEBUG 06-24 20:07:20 [manager.py:391] -ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:194.061279296875ms total_cost_time:194.10347938537598ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6366 prompt_cache_len:5151 prompt_cache_ratio:0.8091423185673893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 -DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.10751891136169434 s -INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.10947084426879883 s -DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=194902336225767236775902628182845033582, time:1750766840.666747s req_ids:[8] -DEBUG 06-24 20:07:20 [manager.py:391] -ERROR 06-24 20:07:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:200.1965045928955ms total_cost_time:200.2410888671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6367 prompt_cache_len:5151 prompt_cache_ratio:0.8090152348044605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 -DEBUG 06-24 20:07:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:20 [manager.py:224] router recive req id 8 cost time 0.20749855041503906 s -INFO 06-24 20:07:20 [manager.py:68] detokenization recv req id 8 cost time 0.20907974243164062 s -DEBUG 06-24 20:07:20 [manager.py:391] Prefill Batch: batch_id=26124297775799202445823680602694593347, time:1750766840.9656844s req_ids:[8] -DEBUG 06-24 20:07:20 [manager.py:391] -ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:20 lightllm_req_id:8 first_token_cost:255.76138496398926ms total_cost_time:255.80668449401855ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6368 prompt_cache_len:5151 prompt_cache_ratio:0.8088881909547738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 -DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10706520080566406 s -INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.10909414291381836 s -DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=337776873639168811494052602187561451099, time:1750766841.1237805s req_ids:[8] -DEBUG 06-24 20:07:21 [manager.py:391] -ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:184.64112281799316ms total_cost_time:184.68546867370605ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6369 prompt_cache_len:5151 prompt_cache_ratio:0.8087611869995289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 -DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10754966735839844 s -INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.10940194129943848 s -DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=14911192895663488767947941062116202134, time:1750766841.3194096s req_ids:[8] -DEBUG 06-24 20:07:21 [manager.py:391] -ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:200.02412796020508ms total_cost_time:200.06823539733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6370 prompt_cache_len:5151 prompt_cache_ratio:0.8086342229199373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 -DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10806417465209961 s -INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.11005210876464844 s -DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=122924971126051225713531411324070708049, time:1750766841.526016s req_ids:[8] -DEBUG 06-24 20:07:21 [manager.py:391] -ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:203.76920700073242ms total_cost_time:203.8123607635498ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6371 prompt_cache_len:5151 prompt_cache_ratio:0.8085072986972218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 -DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:21 [batch.py:51] router release req id 8 -INFO 06-24 20:07:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10654067993164062 s -INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.1084127426147461 s -DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=295376191670073971033604862462797949927, time:1750766841.7344024s req_ids:[8] -DEBUG 06-24 20:07:21 [manager.py:391] -ERROR 06-24 20:07:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:205.49321174621582ms total_cost_time:205.53898811340332ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6372 prompt_cache_len:5151 prompt_cache_ratio:0.8083804143126178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 -DEBUG 06-24 20:07:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:21 [manager.py:224] router recive req id 8 cost time 0.10669946670532227 s -INFO 06-24 20:07:21 [manager.py:68] detokenization recv req id 8 cost time 0.1087040901184082 s -DEBUG 06-24 20:07:21 [manager.py:391] Prefill Batch: batch_id=37940593202108407917900423591364897712, time:1750766841.9439292s req_ids:[8] -DEBUG 06-24 20:07:21 [manager.py:391] -ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:21 lightllm_req_id:8 first_token_cost:204.69951629638672ms total_cost_time:204.7433853149414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6373 prompt_cache_len:5151 prompt_cache_ratio:0.8082535697473717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 -DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10732197761535645 s -INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10929989814758301 s -DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=316165221962432977187434463312088224185, time:1750766842.1528869s req_ids:[8] -DEBUG 06-24 20:07:22 [manager.py:391] -ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:203.0313014984131ms total_cost_time:203.07517051696777ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6374 prompt_cache_len:5151 prompt_cache_ratio:0.8081267649827424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 -DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.1074533462524414 s -INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10943222045898438 s -DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=67211706646516948832983196714321265062, time:1750766842.3607624s req_ids:[8] -DEBUG 06-24 20:07:22 [manager.py:391] -ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:206.24423027038574ms total_cost_time:206.28690719604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6375 prompt_cache_len:5151 prompt_cache_ratio:0.808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 -DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10659098625183105 s -INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10848021507263184 s -DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=258443828891221266878168040515043055088, time:1750766842.5725026s req_ids:[8] -DEBUG 06-24 20:07:22 [manager.py:391] -ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:202.25882530212402ms total_cost_time:202.301025390625ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6376 prompt_cache_len:5151 prompt_cache_ratio:0.8078732747804266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 -DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s -INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.11019158363342285 s -DEBUG 06-24 20:07:22 [manager.py:391] Prefill Batch: batch_id=87868904835949324238163358204285998800, time:1750766842.7833178s req_ids:[8] -DEBUG 06-24 20:07:22 [manager.py:391] -ERROR 06-24 20:07:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:211.85803413391113ms total_cost_time:211.9009494781494ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6377 prompt_cache_len:5151 prompt_cache_ratio:0.807746589305316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 -DEBUG 06-24 20:07:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:22 [manager.py:224] router recive req id 8 cost time 0.10758566856384277 s -INFO 06-24 20:07:22 [manager.py:68] detokenization recv req id 8 cost time 0.10953187942504883 s -DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=254954695257033260540786436652530404236, time:1750766843.0026777s req_ids:[8] -DEBUG 06-24 20:07:23 [manager.py:391] -ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:22 lightllm_req_id:8 first_token_cost:214.25652503967285ms total_cost_time:214.29991722106934ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6378 prompt_cache_len:5151 prompt_cache_ratio:0.8076199435559737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 -DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:23 [batch.py:51] router release req id 8 -INFO 06-24 20:07:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:07:23 [statics_utils.py:24] mean first cost: 238.5498575294847 ms -INFO 06-24 20:07:23 [statics_utils.py:24] mean per token cost: 0.11900594229094989 ms -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.1067807674407959 s -INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10863876342773438 s -DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=295321640378937250801195798856079967885, time:1750766843.223434s req_ids:[8] -DEBUG 06-24 20:07:23 [manager.py:391] -ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:214.50185775756836ms total_cost_time:214.54715728759766ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6379 prompt_cache_len:5151 prompt_cache_ratio:0.8074933375137169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 -DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.1067051887512207 s -INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10854434967041016 s -DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=207689085523368735196677137944278563454, time:1750766843.434768s req_ids:[8] -DEBUG 06-24 20:07:23 [manager.py:391] -ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:357.27787017822266ms total_cost_time:357.32269287109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6380 prompt_cache_len:5151 prompt_cache_ratio:0.8073667711598747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 -DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.10730171203613281 s -INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10913777351379395 s -DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=101658396661300611045323289221455467678, time:1750766843.7879584s req_ids:[8] -DEBUG 06-24 20:07:23 [manager.py:391] -ERROR 06-24 20:07:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:195.60575485229492ms total_cost_time:195.62721252441406ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6381 prompt_cache_len:5151 prompt_cache_ratio:0.8072402444757875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 -DEBUG 06-24 20:07:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:23 [manager.py:224] router recive req id 8 cost time 0.10783743858337402 s -INFO 06-24 20:07:23 [manager.py:68] detokenization recv req id 8 cost time 0.10978078842163086 s -DEBUG 06-24 20:07:23 [manager.py:391] Prefill Batch: batch_id=338813048448525875163439705584924304477, time:1750766843.9942977s req_ids:[8] -DEBUG 06-24 20:07:23 [manager.py:391] -ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:23 lightllm_req_id:8 first_token_cost:205.6710720062256ms total_cost_time:205.71541786193848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6382 prompt_cache_len:5151 prompt_cache_ratio:0.8071137574428079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 -DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.10778260231018066 s -INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10968732833862305 s -DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=310836859909545211380722812137295434747, time:1750766844.2100923s req_ids:[8] -DEBUG 06-24 20:07:24 [manager.py:391] -DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:24 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:209.33103561401367ms total_cost_time:209.37466621398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6383 prompt_cache_len:5151 prompt_cache_ratio:0.8069873100422998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 -DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.10744976997375488 s -INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10937643051147461 s -DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=191853468497508748025345220208125563413, time:1750766844.41712s req_ids:[8] -DEBUG 06-24 20:07:24 [manager.py:391] -ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:199.13792610168457ms total_cost_time:199.17941093444824ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6384 prompt_cache_len:5151 prompt_cache_ratio:0.8068609022556391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 -DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.10740399360656738 s -INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10941457748413086 s -DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=276586219381523023915976329825600895267, time:1750766844.6209106s req_ids:[8] -DEBUG 06-24 20:07:24 [manager.py:391] -ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:202.90207862854004ms total_cost_time:202.9428482055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:6385 prompt_cache_len:5151 prompt_cache_ratio:0.806734534064213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 -DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:24 [manager.py:224] router recive req id 8 cost time 0.1068572998046875 s -INFO 06-24 20:07:24 [manager.py:68] detokenization recv req id 8 cost time 0.10864877700805664 s -DEBUG 06-24 20:07:24 [manager.py:391] Prefill Batch: batch_id=69964543967062815121492064754984586966, time:1750766844.8288844s req_ids:[8] -DEBUG 06-24 20:07:24 [manager.py:391] -ERROR 06-24 20:07:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:201.0507583618164ms total_cost_time:201.0955810546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6386 prompt_cache_len:5151 prompt_cache_ratio:0.8066082054494206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 -DEBUG 06-24 20:07:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.1066596508026123 s -INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.10859131813049316 s -DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=126492723920003215010828325983851292807, time:1750766845.0362968s req_ids:[8] -DEBUG 06-24 20:07:25 [manager.py:391] -ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:24 lightllm_req_id:8 first_token_cost:204.41222190856934ms total_cost_time:204.4541835784912ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6387 prompt_cache_len:5151 prompt_cache_ratio:0.8064819163926726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 -DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.10781979560852051 s -INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.11107778549194336 s -DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=7238148324664032186187916021557349381, time:1750766845.2590988s req_ids:[8] -DEBUG 06-24 20:07:25 [manager.py:391] -ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:218.22834014892578ms total_cost_time:218.27149391174316ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6388 prompt_cache_len:5151 prompt_cache_ratio:0.8063556668753914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 -DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.10799598693847656 s -INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.10998725891113281 s -DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=188447203118245659279288565994770417111, time:1750766845.4696946s req_ids:[8] -DEBUG 06-24 20:07:25 [manager.py:391] -ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:207.3822021484375ms total_cost_time:207.4282169342041ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6389 prompt_cache_len:5151 prompt_cache_ratio:0.8062294568790108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 -DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.1071019172668457 s -INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.1089470386505127 s -DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=280726450229926050658059604256089491298, time:1750766845.6876192s req_ids:[8] -DEBUG 06-24 20:07:25 [manager.py:391] -ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:215.08216857910156ms total_cost_time:215.12794494628906ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6390 prompt_cache_len:5151 prompt_cache_ratio:0.8061032863849765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 -DEBUG 06-24 20:07:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:25 [manager.py:224] router recive req id 8 cost time 0.10803675651550293 s -INFO 06-24 20:07:25 [manager.py:68] detokenization recv req id 8 cost time 0.11008882522583008 s -DEBUG 06-24 20:07:25 [manager.py:391] Prefill Batch: batch_id=160188690540386492952828426374536732900, time:1750766845.9009745s req_ids:[8] -DEBUG 06-24 20:07:25 [manager.py:391] -ERROR 06-24 20:07:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:07:25 [stats.py:37] Avg tokens(prompt+generate) throughput: 28570.013 tokens/s -DEBUG 06-24 20:07:25 [stats.py:37] Avg prompt tokens throughput: 28560.945 tokens/s -DEBUG 06-24 20:07:25 [stats.py:37] Avg generate tokens throughput: 9.068 tokens/s -INFO 06-24 20:07:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:208.44101905822754ms total_cost_time:208.48464965820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6391 prompt_cache_len:5151 prompt_cache_ratio:0.8059771553747458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 -DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.10737466812133789 s -INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.10932230949401855 s -DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=228604054740154981689772448206816494487, time:1750766846.1310523s req_ids:[8] -DEBUG 06-24 20:07:26 [manager.py:391] -ERROR 06-24 20:07:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:25 lightllm_req_id:8 first_token_cost:222.02277183532715ms total_cost_time:222.06592559814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6392 prompt_cache_len:5151 prompt_cache_ratio:0.8058510638297872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 -DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.1095120906829834 s -INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.11154937744140625 s -DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=312915713167388912111118957156371639118, time:1750766846.3414807s req_ids:[8] -DEBUG 06-24 20:07:26 [manager.py:391] -ERROR 06-24 20:07:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 first_token_cost:391.4530277252197ms total_cost_time:391.4990425109863ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6393 prompt_cache_len:5151 prompt_cache_ratio:0.8057250117315814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 -DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.10832071304321289 s -INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s -DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=102406642911270979826871072940478293446, time:1750766846.7300222s req_ids:[8] -DEBUG 06-24 20:07:26 [manager.py:391] -ERROR 06-24 20:07:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 first_token_cost:196.09403610229492ms total_cost_time:196.1379051208496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6394 prompt_cache_len:5151 prompt_cache_ratio:0.8055989990616202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 -DEBUG 06-24 20:07:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:26 [manager.py:224] router recive req id 8 cost time 0.10853123664855957 s -INFO 06-24 20:07:26 [manager.py:68] detokenization recv req id 8 cost time 0.11041951179504395 s -DEBUG 06-24 20:07:26 [manager.py:391] Prefill Batch: batch_id=196786156479095613644525651729462782558, time:1750766846.9373221s req_ids:[8] -DEBUG 06-24 20:07:26 [manager.py:391] -ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:26 lightllm_req_id:8 first_token_cost:207.87644386291504ms total_cost_time:207.92222023010254ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6395 prompt_cache_len:5151 prompt_cache_ratio:0.8054730258014073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 -DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10716009140014648 s -INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10893034934997559 s -DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=45001006765452936920707263297335184944, time:1750766847.1490228s req_ids:[8] -DEBUG 06-24 20:07:27 [manager.py:391] -ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:201.88379287719727ms total_cost_time:201.92790031433105ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6396 prompt_cache_len:5151 prompt_cache_ratio:0.8053470919324578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 -DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10765314102172852 s -INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10933279991149902 s -DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=94268636562644782262260521302255298770, time:1750766847.3564608s req_ids:[8] -DEBUG 06-24 20:07:27 [manager.py:391] -ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:203.31048965454102ms total_cost_time:203.3538818359375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6397 prompt_cache_len:5151 prompt_cache_ratio:0.8052211974362983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 -DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10724377632141113 s -INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10928487777709961 s -DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=43899599509596085957061115679107144678, time:1750766847.5634959s req_ids:[8] -DEBUG 06-24 20:07:27 [manager.py:391] -ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:202.90303230285645ms total_cost_time:202.94475555419922ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6398 prompt_cache_len:5151 prompt_cache_ratio:0.805095342294467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 -DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10718941688537598 s -INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10891366004943848 s -DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=83915149644297881688912840550406248485, time:1750766847.780428s req_ids:[8] -DEBUG 06-24 20:07:27 [manager.py:391] -ERROR 06-24 20:07:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:212.28647232055664ms total_cost_time:212.33105659484863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6399 prompt_cache_len:5151 prompt_cache_ratio:0.8049695264885138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 -DEBUG 06-24 20:07:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:27 [manager.py:224] router recive req id 8 cost time 0.10703325271606445 s -INFO 06-24 20:07:27 [manager.py:68] detokenization recv req id 8 cost time 0.10871243476867676 s -DEBUG 06-24 20:07:27 [manager.py:391] Prefill Batch: batch_id=209476011459872347304076654898771444962, time:1750766847.9947937s req_ids:[8] -DEBUG 06-24 20:07:27 [manager.py:391] -ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:27 lightllm_req_id:8 first_token_cost:210.66999435424805ms total_cost_time:210.71338653564453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6400 prompt_cache_len:5151 prompt_cache_ratio:0.80484375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 -DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10742998123168945 s -INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.10912156105041504 s -DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=2262121424716448946689578322702580357, time:1750766848.2041864s req_ids:[8] -DEBUG 06-24 20:07:28 [manager.py:391] -ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:203.71532440185547ms total_cost_time:203.75919342041016ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6401 prompt_cache_len:5151 prompt_cache_ratio:0.8047180128104984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 -DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10802078247070312 s -INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.11005854606628418 s -DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=22364813314466253442878272072547929050, time:1750766848.4126894s req_ids:[8] -DEBUG 06-24 20:07:28 [manager.py:391] -ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:203.51910591125488ms total_cost_time:203.56345176696777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6402 prompt_cache_len:5151 prompt_cache_ratio:0.8045923149015932 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 -DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10760021209716797 s -INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.10951399803161621 s -DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=250155622624881252824915617003879091350, time:1750766848.6211555s req_ids:[8] -DEBUG 06-24 20:07:28 [manager.py:391] -ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:207.7162265777588ms total_cost_time:207.76009559631348ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6403 prompt_cache_len:5151 prompt_cache_ratio:0.8044666562548806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 -DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:28 [manager.py:224] router recive req id 8 cost time 0.10750627517700195 s -INFO 06-24 20:07:28 [manager.py:68] detokenization recv req id 8 cost time 0.10954046249389648 s -DEBUG 06-24 20:07:28 [manager.py:391] Prefill Batch: batch_id=38931049553816605501730715291273131191, time:1750766848.8326027s req_ids:[8] -DEBUG 06-24 20:07:28 [manager.py:391] -ERROR 06-24 20:07:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:198.07910919189453ms total_cost_time:198.1217861175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6404 prompt_cache_len:5151 prompt_cache_ratio:0.8043410368519676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 -DEBUG 06-24 20:07:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10748672485351562 s -INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.10951066017150879 s -DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=161586170111866255721537388258675518262, time:1750766849.0367486s req_ids:[8] -DEBUG 06-24 20:07:29 [manager.py:391] -ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:28 lightllm_req_id:8 first_token_cost:203.16839218139648ms total_cost_time:203.2146453857422ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6405 prompt_cache_len:5151 prompt_cache_ratio:0.804215456674473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 -DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10824990272521973 s -INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s -DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=17251701210669346271855920599126243357, time:1750766849.2449346s req_ids:[8] -DEBUG 06-24 20:07:29 [manager.py:391] -ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:202.87203788757324ms total_cost_time:202.91590690612793ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6406 prompt_cache_len:5151 prompt_cache_ratio:0.8040899157040274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 -DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10820698738098145 s -INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.11011862754821777 s -DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=230030077836748532013066974298752324777, time:1750766849.45452s req_ids:[8] -DEBUG 06-24 20:07:29 [manager.py:391] -ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:372.114896774292ms total_cost_time:372.1587657928467ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6407 prompt_cache_len:5151 prompt_cache_ratio:0.8039644139222725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 -DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:29 [manager.py:224] router recive req id 8 cost time 0.10744667053222656 s -INFO 06-24 20:07:29 [manager.py:68] detokenization recv req id 8 cost time 0.10946440696716309 s -DEBUG 06-24 20:07:29 [manager.py:391] Prefill Batch: batch_id=19327026418283520824285305625103672068, time:1750766849.8295848s req_ids:[8] -DEBUG 06-24 20:07:29 [manager.py:391] -DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:202.25071907043457ms total_cost_time:202.29458808898926ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6408 prompt_cache_len:5151 prompt_cache_ratio:0.8038389513108615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 -DEBUG 06-24 20:07:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.109527587890625 s -INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.11152815818786621 s -DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=155311556029215410727470590641399905063, time:1750766850.037623s req_ids:[8] -DEBUG 06-24 20:07:30 [manager.py:391] -ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:29 lightllm_req_id:8 first_token_cost:202.6810646057129ms total_cost_time:202.72421836853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6409 prompt_cache_len:5151 prompt_cache_ratio:0.8037135278514589 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 -DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10698485374450684 s -INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.1090095043182373 s -DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=243042772619747918448172936446622181532, time:1750766850.2441173s req_ids:[8] -DEBUG 06-24 20:07:30 [manager.py:391] -ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:202.8818130493164ms total_cost_time:202.92377471923828ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6410 prompt_cache_len:5151 prompt_cache_ratio:0.803588143525741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 -DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10778427124023438 s -INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.10981512069702148 s -DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=61099337780632933381892147634630145822, time:1750766850.453742s req_ids:[8] -DEBUG 06-24 20:07:30 [manager.py:391] -ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:204.4198513031006ms total_cost_time:204.46467399597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6411 prompt_cache_len:5151 prompt_cache_ratio:0.8034627983153955 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 -DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10766291618347168 s -INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.1096959114074707 s -DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=18289494735402077466102731521670451904, time:1750766850.6624231s req_ids:[8] -DEBUG 06-24 20:07:30 [manager.py:391] -ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:204.5886516571045ms total_cost_time:204.63228225708008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6412 prompt_cache_len:5151 prompt_cache_ratio:0.803337492202121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 -DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:30 [manager.py:224] router recive req id 8 cost time 0.10874009132385254 s -INFO 06-24 20:07:30 [manager.py:68] detokenization recv req id 8 cost time 0.11085844039916992 s -DEBUG 06-24 20:07:30 [manager.py:391] Prefill Batch: batch_id=162766293716190275354455887563044039384, time:1750766850.8714647s req_ids:[8] -DEBUG 06-24 20:07:30 [manager.py:391] -ERROR 06-24 20:07:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:200.96921920776367ms total_cost_time:201.01213455200195ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6413 prompt_cache_len:5151 prompt_cache_ratio:0.8032122251676282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 -DEBUG 06-24 20:07:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10924148559570312 s -INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.11180567741394043 s -DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=174748775501565243194153653442195068240, time:1750766851.0758317s req_ids:[8] -DEBUG 06-24 20:07:31 [manager.py:391] -ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:30 lightllm_req_id:8 first_token_cost:202.7149200439453ms total_cost_time:202.7592658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6414 prompt_cache_len:5151 prompt_cache_ratio:0.803086997193639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 -DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10758638381958008 s -INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.10970354080200195 s -DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=219999819138247977150489355215866715369, time:1750766851.2862265s req_ids:[8] -DEBUG 06-24 20:07:31 [manager.py:391] -ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:209.64956283569336ms total_cost_time:209.71059799194336ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:6415 prompt_cache_len:5151 prompt_cache_ratio:0.8029618082618862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 -DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.11081480979919434 s -INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.11283278465270996 s -DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=46746523174745507555522880867120847269, time:1750766851.5102866s req_ids:[8] -DEBUG 06-24 20:07:31 [manager.py:391] -ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:226.93657875061035ms total_cost_time:226.98044776916504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6416 prompt_cache_len:5151 prompt_cache_ratio:0.8028366583541147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 -DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10687565803527832 s -INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.10877633094787598 s -DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=281149605712715575170090412663862597737, time:1750766851.7211468s req_ids:[8] -DEBUG 06-24 20:07:31 [manager.py:391] -ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:184.7665309906006ms total_cost_time:184.82661247253418ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6417 prompt_cache_len:5151 prompt_cache_ratio:0.8027115474520804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 -DEBUG 06-24 20:07:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:31 [manager.py:224] router recive req id 8 cost time 0.10501790046691895 s -INFO 06-24 20:07:31 [manager.py:68] detokenization recv req id 8 cost time 0.10707521438598633 s -DEBUG 06-24 20:07:31 [manager.py:391] Prefill Batch: batch_id=320220794002356667151525472959349061768, time:1750766851.9367573s req_ids:[8] -DEBUG 06-24 20:07:31 [manager.py:391] -ERROR 06-24 20:07:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:31 lightllm_req_id:8 first_token_cost:201.99012756347656ms total_cost_time:202.03733444213867ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6418 prompt_cache_len:5151 prompt_cache_ratio:0.8025864755375507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 -DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s -INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.1108243465423584 s -DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=789083248672545067439897866642060409, time:1750766852.129937s req_ids:[8] -DEBUG 06-24 20:07:32 [manager.py:391] -ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:369.2138195037842ms total_cost_time:369.2584037780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6419 prompt_cache_len:5151 prompt_cache_ratio:0.8024614425923041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 -DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.10785198211669922 s -INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.10991716384887695 s -DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=58100478191498191975058167756618835645, time:1750766852.49919s req_ids:[8] -DEBUG 06-24 20:07:32 [manager.py:391] -ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:189.3751621246338ms total_cost_time:189.41903114318848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6420 prompt_cache_len:5151 prompt_cache_ratio:0.8023364485981308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 -DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:32 [batch.py:51] router release req id 8 -DEBUG 06-24 20:07:32 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:32 [manager.py:283] -DEBUG 06-24 20:07:32 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:32 [manager.py:284] -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.11073827743530273 s -INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.11358118057250977 s -DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=319430017784338069508014450912251849191, time:1750766852.6971173s req_ids:[8] -DEBUG 06-24 20:07:32 [manager.py:391] -ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:181.44679069519043ms total_cost_time:181.49375915527344ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:6421 prompt_cache_len:5151 prompt_cache_ratio:0.8022114935368323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 -DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:32 [manager.py:224] router recive req id 8 cost time 0.1074972152709961 s -INFO 06-24 20:07:32 [manager.py:68] detokenization recv req id 8 cost time 0.10864424705505371 s -DEBUG 06-24 20:07:32 [manager.py:391] Prefill Batch: batch_id=88577200680938638912766003500699994522, time:1750766852.885413s req_ids:[8] -DEBUG 06-24 20:07:32 [manager.py:391] -ERROR 06-24 20:07:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:174.9260425567627ms total_cost_time:174.9706268310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6422 prompt_cache_len:5151 prompt_cache_ratio:0.8020865773902212 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 -DEBUG 06-24 20:07:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10654830932617188 s -INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.10848212242126465 s -DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=290329378628107991180201689580422536189, time:1750766853.0610497s req_ids:[8] -DEBUG 06-24 20:07:33 [manager.py:391] -ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:32 lightllm_req_id:8 first_token_cost:197.31974601745605ms total_cost_time:197.36170768737793ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6423 prompt_cache_len:5151 prompt_cache_ratio:0.8019617001401215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 -DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s -INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.1096189022064209 s -DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=235416984454171649416937346371902768846, time:1750766853.2752743s req_ids:[8] -DEBUG 06-24 20:07:33 [manager.py:391] -ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:213.42229843139648ms total_cost_time:213.44232559204102ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6424 prompt_cache_len:5151 prompt_cache_ratio:0.8018368617683687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 -DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10640430450439453 s -INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.1081845760345459 s -DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=181722341802678490307138560262165851373, time:1750766853.4837644s req_ids:[8] -DEBUG 06-24 20:07:33 [manager.py:391] -ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:213.19150924682617ms total_cost_time:213.23704719543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6425 prompt_cache_len:5151 prompt_cache_ratio:0.8017120622568094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 -DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10821413993835449 s -INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.1102755069732666 s -DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=312925680023984549522281534196669479487, time:1750766853.698264s req_ids:[8] -DEBUG 06-24 20:07:33 [manager.py:391] -ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:193.90606880187988ms total_cost_time:193.92871856689453ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6426 prompt_cache_len:5151 prompt_cache_ratio:0.8015873015873016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 -DEBUG 06-24 20:07:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:33 [manager.py:224] router recive req id 8 cost time 0.10847854614257812 s -INFO 06-24 20:07:33 [manager.py:68] detokenization recv req id 8 cost time 0.10965394973754883 s -DEBUG 06-24 20:07:33 [manager.py:391] Prefill Batch: batch_id=229135695201755924284186637707151974331, time:1750766853.8980339s req_ids:[8] -DEBUG 06-24 20:07:33 [manager.py:391] -DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:194.48232650756836ms total_cost_time:194.50974464416504ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:6427 prompt_cache_len:5151 prompt_cache_ratio:0.8014625797417146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 -DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.10703754425048828 s -INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.1089315414428711 s -DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=242998436925304072190618214014751954340, time:1750766854.0953164s req_ids:[8] -DEBUG 06-24 20:07:34 [manager.py:391] -ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:33 lightllm_req_id:8 first_token_cost:198.71759414672852ms total_cost_time:198.744535446167ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6428 prompt_cache_len:5151 prompt_cache_ratio:0.801337896701929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 -DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.1057596206665039 s -INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.10777163505554199 s -DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=303348420371880266158676659486538769898, time:1750766854.2984066s req_ids:[8] -DEBUG 06-24 20:07:34 [manager.py:391] -ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:391.40796661376953ms total_cost_time:391.42823219299316ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6429 prompt_cache_len:5151 prompt_cache_ratio:0.8012132524498367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 -DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.10339975357055664 s -INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.10541796684265137 s -DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=154021285788498723307609437758763867769, time:1750766854.6887193s req_ids:[8] -DEBUG 06-24 20:07:34 [manager.py:391] -ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:209.01846885681152ms total_cost_time:209.04159545898438ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6430 prompt_cache_len:5151 prompt_cache_ratio:0.8010886469673406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 -DEBUG 06-24 20:07:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:34 [manager.py:224] router recive req id 8 cost time 0.10462045669555664 s -INFO 06-24 20:07:34 [manager.py:68] detokenization recv req id 8 cost time 0.10613250732421875 s -DEBUG 06-24 20:07:34 [manager.py:391] Prefill Batch: batch_id=78589999635016606555057666509739248537, time:1750766854.9032788s req_ids:[8] -DEBUG 06-24 20:07:34 [manager.py:391] -ERROR 06-24 20:07:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:193.39346885681152ms total_cost_time:193.42637062072754ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:6431 prompt_cache_len:5151 prompt_cache_ratio:0.8009640802363551 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 -DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.1039276123046875 s -INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10494732856750488 s -DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=277651636101555860355521136388073706659, time:1750766855.1197765s req_ids:[8] -DEBUG 06-24 20:07:35 [manager.py:391] -ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:34 lightllm_req_id:8 first_token_cost:174.38149452209473ms total_cost_time:174.40223693847656ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6432 prompt_cache_len:5151 prompt_cache_ratio:0.800839552238806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 -DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10335040092468262 s -INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10427260398864746 s -DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=127003829063534249482766862879952724486, time:1750766855.282155s req_ids:[8] -DEBUG 06-24 20:07:35 [manager.py:391] -ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:163.2828712463379ms total_cost_time:163.33436965942383ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:6433 prompt_cache_len:5151 prompt_cache_ratio:0.8007150629566299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 -DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10391640663146973 s -INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10492277145385742 s -DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=152945352410245173994213632169770991675, time:1750766855.445187s req_ids:[8] -DEBUG 06-24 20:07:35 [manager.py:391] -ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:179.7318458557129ms total_cost_time:179.75568771362305ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6434 prompt_cache_len:5151 prompt_cache_ratio:0.800590612371775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 -DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10452556610107422 s -INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10578370094299316 s -DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=184524012450491782012372307907848467272, time:1750766855.6294634s req_ids:[8] -DEBUG 06-24 20:07:35 [manager.py:391] -ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:184.51476097106934ms total_cost_time:184.54599380493164ms,out_token_counter:1 mean_per_token_cost_time: 0.031232833862304688ms prompt_token_num:6435 prompt_cache_len:5151 prompt_cache_ratio:0.8004662004662004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 -DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10402798652648926 s -INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10498666763305664 s -DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=70282385645472796513511399873863791238, time:1750766855.8179278s req_ids:[8] -DEBUG 06-24 20:07:35 [manager.py:391] -ERROR 06-24 20:07:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:165.47727584838867ms total_cost_time:165.4980182647705ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6436 prompt_cache_len:5151 prompt_cache_ratio:0.800341827221877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 -DEBUG 06-24 20:07:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:35 [manager.py:224] router recive req id 8 cost time 0.10463714599609375 s -INFO 06-24 20:07:35 [manager.py:68] detokenization recv req id 8 cost time 0.10551190376281738 s -DEBUG 06-24 20:07:35 [manager.py:391] Prefill Batch: batch_id=42542426947358313940718517522071905179, time:1750766855.992846s req_ids:[8] -DEBUG 06-24 20:07:35 [manager.py:391] -DEBUG 06-24 20:07:35 [stats.py:37] Avg tokens(prompt+generate) throughput: 29490.393 tokens/s -DEBUG 06-24 20:07:35 [stats.py:37] Avg prompt tokens throughput: 29481.300 tokens/s -DEBUG 06-24 20:07:35 [stats.py:37] Avg generate tokens throughput: 9.092 tokens/s -ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:35 lightllm_req_id:8 first_token_cost:188.60864639282227ms total_cost_time:188.6749267578125ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:6437 prompt_cache_len:5151 prompt_cache_ratio:0.8002174926207861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 -DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.10462069511413574 s -INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.10640382766723633 s -DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=272904143396359929035142605338255024995, time:1750766856.1916833s req_ids:[8] -DEBUG 06-24 20:07:36 [manager.py:391] -ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:193.71747970581055ms total_cost_time:193.75348091125488ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:6438 prompt_cache_len:5151 prompt_cache_ratio:0.8000931966449207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 -DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.10660505294799805 s -INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.10811328887939453 s -DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=29542705308469339236021555113426164790, time:1750766856.3891482s req_ids:[8] -DEBUG 06-24 20:07:36 [manager.py:391] -ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:190.80066680908203ms total_cost_time:190.83285331726074ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:6439 prompt_cache_len:5151 prompt_cache_ratio:0.7999689392762851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 -DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.10590386390686035 s -INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.10761308670043945 s -DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=183847986241234897755007620097506754741, time:1750766856.58589s req_ids:[8] -DEBUG 06-24 20:07:36 [manager.py:391] -ERROR 06-24 20:07:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:194.08059120178223ms total_cost_time:194.1068172454834ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:6440 prompt_cache_len:5151 prompt_cache_ratio:0.7998447204968944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 -DEBUG 06-24 20:07:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:36 [manager.py:224] router recive req id 8 cost time 0.30413079261779785 s -INFO 06-24 20:07:36 [manager.py:68] detokenization recv req id 8 cost time 0.30516481399536133 s -DEBUG 06-24 20:07:36 [manager.py:391] Prefill Batch: batch_id=243528582557082727624923433659228573093, time:1750766856.9848533s req_ids:[8] -DEBUG 06-24 20:07:36 [manager.py:391] -ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:36 lightllm_req_id:8 first_token_cost:396.390438079834ms total_cost_time:396.41523361206055ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:6441 prompt_cache_len:5151 prompt_cache_ratio:0.799720540288775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 -DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10393023490905762 s -INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10497665405273438 s -DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=194638898884734219030451220826304650726, time:1750766857.182197s req_ids:[8] -DEBUG 06-24 20:07:37 [manager.py:391] -ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:195.8754062652588ms total_cost_time:195.9228515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:6442 prompt_cache_len:5151 prompt_cache_ratio:0.7995963986339646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 -DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10329461097717285 s -INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10413932800292969 s -DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=340134401958962425068140661363234134989, time:1750766857.3802767s req_ids:[8] -DEBUG 06-24 20:07:37 [manager.py:391] -ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:194.23937797546387ms total_cost_time:194.26345825195312ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6443 prompt_cache_len:5151 prompt_cache_ratio:0.7994722955145118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 -DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10335493087768555 s -INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10423803329467773 s -DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=146273797675852474904534626028972506895, time:1750766857.5782025s req_ids:[8] -DEBUG 06-24 20:07:37 [manager.py:391] -ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:196.50530815124512ms total_cost_time:196.52938842773438ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6444 prompt_cache_len:5151 prompt_cache_ratio:0.7993482309124768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 -DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10355377197265625 s -INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10455203056335449 s -DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=302558271918179564273532316603594788709, time:1750766857.7772455s req_ids:[8] -DEBUG 06-24 20:07:37 [manager.py:391] -ERROR 06-24 20:07:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:195.89781761169434ms total_cost_time:195.91999053955078ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6445 prompt_cache_len:5151 prompt_cache_ratio:0.7992242048099302 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 -DEBUG 06-24 20:07:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:37 [manager.py:224] router recive req id 8 cost time 0.10390257835388184 s -INFO 06-24 20:07:37 [manager.py:68] detokenization recv req id 8 cost time 0.10486221313476562 s -DEBUG 06-24 20:07:37 [manager.py:391] Prefill Batch: batch_id=270572504772751118693218582419165926723, time:1750766857.9739037s req_ids:[8] -DEBUG 06-24 20:07:37 [manager.py:391] -ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:37 lightllm_req_id:8 first_token_cost:192.3084259033203ms total_cost_time:192.3379898071289ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:6446 prompt_cache_len:5151 prompt_cache_ratio:0.7991002171889544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 -DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10349249839782715 s -INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10440373420715332 s -DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=116178694139485277705486577136024336778, time:1750766858.173152s req_ids:[8] -DEBUG 06-24 20:07:38 [manager.py:391] -ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:194.9012279510498ms total_cost_time:194.92363929748535ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6447 prompt_cache_len:5151 prompt_cache_ratio:0.7989762680316427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 -DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10451436042785645 s -INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10538744926452637 s -DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=194128265109180259560578825727958352792, time:1750766858.3682892s req_ids:[8] -DEBUG 06-24 20:07:38 [manager.py:391] -ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:193.57848167419434ms total_cost_time:193.60041618347168ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6448 prompt_cache_len:5151 prompt_cache_ratio:0.7988523573200993 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 -DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10422682762145996 s -INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10503220558166504 s -DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=51658120734796558279444531196498436430, time:1750766858.5658338s req_ids:[8] -DEBUG 06-24 20:07:38 [manager.py:391] -ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:193.02678108215332ms total_cost_time:193.06540489196777ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:6449 prompt_cache_len:5151 prompt_cache_ratio:0.7987284850364398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 -DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10414910316467285 s -INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10494208335876465 s -DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=315696345731366669095935905320573377262, time:1750766858.7622507s req_ids:[8] -DEBUG 06-24 20:07:38 [manager.py:391] -ERROR 06-24 20:07:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:194.54312324523926ms total_cost_time:194.5657730102539ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6450 prompt_cache_len:5151 prompt_cache_ratio:0.7986046511627907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 -DEBUG 06-24 20:07:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:38 [manager.py:224] router recive req id 8 cost time 0.10428786277770996 s -INFO 06-24 20:07:38 [manager.py:68] detokenization recv req id 8 cost time 0.10522341728210449 s -DEBUG 06-24 20:07:38 [manager.py:391] Prefill Batch: batch_id=206908801593370767481508339372952323565, time:1750766858.9602518s req_ids:[8] -DEBUG 06-24 20:07:38 [manager.py:391] -ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:38 lightllm_req_id:8 first_token_cost:194.69785690307617ms total_cost_time:194.72074508666992ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6451 prompt_cache_len:5151 prompt_cache_ratio:0.7984808556812897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 -DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10310029983520508 s -INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10400819778442383 s -DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=132553718270098213108977223783422420076, time:1750766859.1570463s req_ids:[8] -DEBUG 06-24 20:07:39 [manager.py:391] -DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:195.36590576171875ms total_cost_time:195.3887939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6452 prompt_cache_len:5151 prompt_cache_ratio:0.7983570985740855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 -DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10332798957824707 s -INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10425090789794922 s -DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=58385016245443314743285601264492238964, time:1750766859.35493s req_ids:[8] -DEBUG 06-24 20:07:39 [manager.py:391] -ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:393.7079906463623ms total_cost_time:393.73016357421875ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6453 prompt_cache_len:5151 prompt_cache_ratio:0.798233379823338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 -DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10347723960876465 s -INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10437154769897461 s -DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=298196186531360028699079477309244576656, time:1750766859.75108s req_ids:[8] -DEBUG 06-24 20:07:39 [manager.py:391] -ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:195.9521770477295ms total_cost_time:195.97291946411133ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6454 prompt_cache_len:5151 prompt_cache_ratio:0.7981096994112179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 -DEBUG 06-24 20:07:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:39 [manager.py:224] router recive req id 8 cost time 0.10316324234008789 s -INFO 06-24 20:07:39 [manager.py:68] detokenization recv req id 8 cost time 0.10407018661499023 s -DEBUG 06-24 20:07:39 [manager.py:391] Prefill Batch: batch_id=116113632928286299315228017006581560825, time:1750766859.948663s req_ids:[8] -DEBUG 06-24 20:07:39 [manager.py:391] -ERROR 06-24 20:07:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:185.45985221862793ms total_cost_time:185.4841709136963ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6455 prompt_cache_len:5151 prompt_cache_ratio:0.797986057319907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 -DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10398268699645996 s -INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.1051173210144043 s -DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=223798825897450242420293689125319637877, time:1750766860.12405s req_ids:[8] -DEBUG 06-24 20:07:40 [manager.py:391] -ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:39 lightllm_req_id:8 first_token_cost:194.43273544311523ms total_cost_time:194.45490837097168ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6456 prompt_cache_len:5151 prompt_cache_ratio:0.7978624535315985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 -DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10289788246154785 s -INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10374879837036133 s -DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=245930913687896846212758576001524123335, time:1750766860.3314426s req_ids:[8] -DEBUG 06-24 20:07:40 [manager.py:391] -ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:181.4565658569336ms total_cost_time:181.47540092468262ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6457 prompt_cache_len:5151 prompt_cache_ratio:0.7977388880284962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 -DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10250973701477051 s -INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10334658622741699 s -DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=300814501955891480708230787553195268902, time:1750766860.5038974s req_ids:[8] -DEBUG 06-24 20:07:40 [manager.py:391] -ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:171.5857982635498ms total_cost_time:171.60415649414062ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6458 prompt_cache_len:5151 prompt_cache_ratio:0.7976153607928151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 -DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10261392593383789 s -INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10353565216064453 s -DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=328856043597080550726110006887545014462, time:1750766860.677881s req_ids:[8] -DEBUG 06-24 20:07:40 [manager.py:391] -ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:172.2245216369629ms total_cost_time:172.24502563476562ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6459 prompt_cache_len:5151 prompt_cache_ratio:0.7974918718067813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 -DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:40 [manager.py:224] router recive req id 8 cost time 0.10254526138305664 s -INFO 06-24 20:07:40 [manager.py:68] detokenization recv req id 8 cost time 0.10358953475952148 s -DEBUG 06-24 20:07:40 [manager.py:391] Prefill Batch: batch_id=26918721920605767040580489940094063371, time:1750766860.8488936s req_ids:[8] -DEBUG 06-24 20:07:40 [manager.py:391] -ERROR 06-24 20:07:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:187.08419799804688ms total_cost_time:187.1044635772705ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6460 prompt_cache_len:5151 prompt_cache_ratio:0.7973684210526316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 -DEBUG 06-24 20:07:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10273194313049316 s -INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10462546348571777 s -DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=57578275140190219088614946745857418471, time:1750766861.0205421s req_ids:[8] -DEBUG 06-24 20:07:41 [manager.py:391] -ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:40 lightllm_req_id:8 first_token_cost:147.7639675140381ms total_cost_time:147.78423309326172ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6461 prompt_cache_len:5151 prompt_cache_ratio:0.7972450085126142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 -DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.1022794246673584 s -INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.1030263900756836 s -DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=86719047522043758040005091706188635523, time:1750766861.1807175s req_ids:[8] -DEBUG 06-24 20:07:41 [manager.py:391] -ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:160.45236587524414ms total_cost_time:160.47120094299316ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6462 prompt_cache_len:5151 prompt_cache_ratio:0.797121634168988 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 -DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10258722305297852 s -INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10351133346557617 s -DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=181444785753583738642322945006367839524, time:1750766861.356838s req_ids:[8] -DEBUG 06-24 20:07:41 [manager.py:391] -ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:173.39158058166504ms total_cost_time:173.41089248657227ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6463 prompt_cache_len:5151 prompt_cache_ratio:0.7969982980040229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 -DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10244941711425781 s -INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.1033635139465332 s -DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=17893851396138027308882101329374453316, time:1750766861.5269644s req_ids:[8] -DEBUG 06-24 20:07:41 [manager.py:391] -ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:170.6695556640625ms total_cost_time:170.68934440612793ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6464 prompt_cache_len:5151 prompt_cache_ratio:0.796875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 -DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10333967208862305 s -INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10422086715698242 s -DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=135337036080186965582565406197571437930, time:1750766861.7009006s req_ids:[8] -DEBUG 06-24 20:07:41 [manager.py:391] -ERROR 06-24 20:07:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:169.06380653381348ms total_cost_time:169.0833568572998ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6465 prompt_cache_len:5151 prompt_cache_ratio:0.7967517401392111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 -DEBUG 06-24 20:07:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:41 [manager.py:224] router recive req id 8 cost time 0.10337305068969727 s -INFO 06-24 20:07:41 [manager.py:68] detokenization recv req id 8 cost time 0.10422825813293457 s -DEBUG 06-24 20:07:41 [manager.py:391] Prefill Batch: batch_id=45995322978030122009249596550023693257, time:1750766861.8719761s req_ids:[8] -DEBUG 06-24 20:07:41 [manager.py:391] -ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:41 lightllm_req_id:8 first_token_cost:424.64423179626465ms total_cost_time:424.663782119751ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6466 prompt_cache_len:5151 prompt_cache_ratio:0.7966285184039592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 -DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.10356831550598145 s -INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.1044759750366211 s -DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=172093534301486729678058609197864293171, time:1750766862.2985606s req_ids:[8] -DEBUG 06-24 20:07:42 [manager.py:391] -ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:170.24946212768555ms total_cost_time:170.26972770690918ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6467 prompt_cache_len:5151 prompt_cache_ratio:0.7965053347765579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 -DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.1034841537475586 s -INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.1043236255645752 s -DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=242272056309317997160533368002445237385, time:1750766862.4699495s req_ids:[8] -DEBUG 06-24 20:07:42 [manager.py:391] -ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:169.60573196411133ms total_cost_time:169.62647438049316ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6468 prompt_cache_len:5151 prompt_cache_ratio:0.7963821892393321 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 -DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.10242438316345215 s -INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.10326004028320312 s -DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=301129148018629460522289228551652108236, time:1750766862.6481237s req_ids:[8] -DEBUG 06-24 20:07:42 [manager.py:391] -ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:175.79221725463867ms total_cost_time:175.8124828338623ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6469 prompt_cache_len:5151 prompt_cache_ratio:0.7962590817746175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 -DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.10325741767883301 s -INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.10408353805541992 s -DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=124810405975926903198085887044105251657, time:1750766862.820404s req_ids:[8] -DEBUG 06-24 20:07:42 [manager.py:391] -ERROR 06-24 20:07:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:170.84908485412598ms total_cost_time:170.8686351776123ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6470 prompt_cache_len:5151 prompt_cache_ratio:0.7961360123647604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 -DEBUG 06-24 20:07:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:42 [manager.py:224] router recive req id 8 cost time 0.1024470329284668 s -INFO 06-24 20:07:42 [manager.py:68] detokenization recv req id 8 cost time 0.10325360298156738 s -DEBUG 06-24 20:07:42 [manager.py:391] Prefill Batch: batch_id=173101263434430583469388973091177795010, time:1750766862.9929597s req_ids:[8] -DEBUG 06-24 20:07:42 [manager.py:391] -DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:42 lightllm_req_id:8 first_token_cost:188.1082057952881ms total_cost_time:188.1265640258789ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6471 prompt_cache_len:5151 prompt_cache_ratio:0.7960129809921187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 -DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:43 [batch.py:51] router release req id 8 -INFO 06-24 20:07:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10419631004333496 s -INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10508179664611816 s -DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=21036456429622598966336122260429662214, time:1750766863.1802194s req_ids:[8] -DEBUG 06-24 20:07:43 [manager.py:391] -ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:157.57250785827637ms total_cost_time:157.61661529541016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6472 prompt_cache_len:5151 prompt_cache_ratio:0.7958899876390606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 -DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10724711418151855 s -INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10822868347167969 s -DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=330701111064747031129313013078277290890, time:1750766863.333606s req_ids:[8] -DEBUG 06-24 20:07:43 [manager.py:391] -ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:182.23023414611816ms total_cost_time:182.2524070739746ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6473 prompt_cache_len:5151 prompt_cache_ratio:0.7957670322879654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 -DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10329079627990723 s -INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10423421859741211 s -DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=206348574152710904833298126315754254957, time:1750766863.5302026s req_ids:[8] -DEBUG 06-24 20:07:43 [manager.py:391] -ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:198.17018508911133ms total_cost_time:198.18902015686035ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6474 prompt_cache_len:5151 prompt_cache_ratio:0.7956441149212233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 -DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10338377952575684 s -INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.1044011116027832 s -DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=14628350517036034029041546758611589131, time:1750766863.7297199s req_ids:[8] -DEBUG 06-24 20:07:43 [manager.py:391] -ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:201.98464393615723ms total_cost_time:202.00490951538086ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6475 prompt_cache_len:5151 prompt_cache_ratio:0.7955212355212355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 -DEBUG 06-24 20:07:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:43 [manager.py:224] router recive req id 8 cost time 0.10248184204101562 s -INFO 06-24 20:07:43 [manager.py:68] detokenization recv req id 8 cost time 0.10329771041870117 s -DEBUG 06-24 20:07:43 [manager.py:391] Prefill Batch: batch_id=116438294479522528102917667725403806416, time:1750766863.9369898s req_ids:[8] -DEBUG 06-24 20:07:43 [manager.py:391] -ERROR 06-24 20:07:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:43 lightllm_req_id:8 first_token_cost:190.69361686706543ms total_cost_time:190.7174587249756ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6476 prompt_cache_len:5151 prompt_cache_ratio:0.7953983940704138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 -DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10366106033325195 s -INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.10451054573059082 s -DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=257773982139388132358875865290343669063, time:1750766864.1263807s req_ids:[8] -DEBUG 06-24 20:07:44 [manager.py:391] -ERROR 06-24 20:07:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:201.57980918884277ms total_cost_time:201.5979290008545ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6477 prompt_cache_len:5151 prompt_cache_ratio:0.7952755905511811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 -DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10302853584289551 s -INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.10385489463806152 s -DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=126242766787683742505483276964342121391, time:1750766864.333875s req_ids:[8] -DEBUG 06-24 20:07:44 [manager.py:391] -ERROR 06-24 20:07:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:202.41689682006836ms total_cost_time:202.44598388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:6478 prompt_cache_len:5151 prompt_cache_ratio:0.795152824945971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 -DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10467195510864258 s -INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.1056675910949707 s -DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=184181375275239015472833738009614651858, time:1750766864.5355773s req_ids:[8] -DEBUG 06-24 20:07:44 [manager.py:391] -ERROR 06-24 20:07:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:180.2206039428711ms total_cost_time:180.24349212646484ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6479 prompt_cache_len:5151 prompt_cache_ratio:0.795030097237228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 -DEBUG 06-24 20:07:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:44 [manager.py:224] router recive req id 8 cost time 0.10301971435546875 s -INFO 06-24 20:07:44 [manager.py:68] detokenization recv req id 8 cost time 0.10394024848937988 s -DEBUG 06-24 20:07:44 [manager.py:391] Prefill Batch: batch_id=224880872042154363729060713509458329734, time:1750766864.7275956s req_ids:[8] -DEBUG 06-24 20:07:44 [manager.py:391] -ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:44 lightllm_req_id:8 first_token_cost:474.4093418121338ms total_cost_time:474.4288921356201ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6480 prompt_cache_len:5151 prompt_cache_ratio:0.7949074074074074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 -DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10244035720825195 s -INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10339188575744629 s -DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=238614409872273377792341325968228639345, time:1750766865.1740808s req_ids:[8] -DEBUG 06-24 20:07:45 [manager.py:391] -ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:162.42098808288574ms total_cost_time:162.44029998779297ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6481 prompt_cache_len:5151 prompt_cache_ratio:0.7947847554389754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 -DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10355401039123535 s -INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10449981689453125 s -DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=193675608813001665297894471724137490471, time:1750766865.371104s req_ids:[8] -DEBUG 06-24 20:07:45 [manager.py:391] -ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:208.56308937072754ms total_cost_time:208.58311653137207ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6482 prompt_cache_len:5151 prompt_cache_ratio:0.7946621413144092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 -DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10247588157653809 s -INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.1033773422241211 s -DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=157937386824530053381217531711538744977, time:1750766865.5688033s req_ids:[8] -DEBUG 06-24 20:07:45 [manager.py:391] -ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:182.85226821899414ms total_cost_time:182.87229537963867ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6483 prompt_cache_len:5151 prompt_cache_ratio:0.7945395650161962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 -DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.10266375541687012 s -INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10357666015625 s -DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=12752785488541668364279761820450439520, time:1750766865.765305s req_ids:[8] -DEBUG 06-24 20:07:45 [manager.py:391] -ERROR 06-24 20:07:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:194.76771354675293ms total_cost_time:194.78702545166016ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6484 prompt_cache_len:5151 prompt_cache_ratio:0.7944170265268353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 -DEBUG 06-24 20:07:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:45 [manager.py:224] router recive req id 8 cost time 0.1042332649230957 s -INFO 06-24 20:07:45 [manager.py:68] detokenization recv req id 8 cost time 0.10509419441223145 s -DEBUG 06-24 20:07:45 [manager.py:391] Prefill Batch: batch_id=325010508588490039617232469909551234557, time:1750766865.961748s req_ids:[8] -DEBUG 06-24 20:07:45 [manager.py:391] -ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:07:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 30945.143 tokens/s -DEBUG 06-24 20:07:46 [stats.py:37] Avg prompt tokens throughput: 30935.468 tokens/s -DEBUG 06-24 20:07:46 [stats.py:37] Avg generate tokens throughput: 9.675 tokens/s -INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:45 lightllm_req_id:8 first_token_cost:194.16165351867676ms total_cost_time:194.18621063232422ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6485 prompt_cache_len:5151 prompt_cache_ratio:0.7942945258288358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 -DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10251975059509277 s -INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10334253311157227 s -DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=210536128736576338302969145410798114958, time:1750766866.1284277s req_ids:[8] -DEBUG 06-24 20:07:46 [manager.py:391] -ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:166.29838943481445ms total_cost_time:166.31793975830078ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6486 prompt_cache_len:5151 prompt_cache_ratio:0.7941720629047179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 -DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10255765914916992 s -INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.1034550666809082 s -DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=84915782470430408052689888535711959985, time:1750766866.3256173s req_ids:[8] -DEBUG 06-24 20:07:46 [manager.py:391] -INFO 06-24 20:07:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:182.97195434570312ms total_cost_time:182.99293518066406ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6487 prompt_cache_len:5151 prompt_cache_ratio:0.7940496377370125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 -DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10295248031616211 s -INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10389399528503418 s -DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=267643638216845622132249038847759953193, time:1750766866.5030222s req_ids:[8] -DEBUG 06-24 20:07:46 [manager.py:391] -ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:186.02776527404785ms total_cost_time:186.05828285217285ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:6488 prompt_cache_len:5151 prompt_cache_ratio:0.7939272503082614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 -DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:46 [batch.py:51] router release req id 8 -INFO 06-24 20:07:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.10259699821472168 s -INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10336923599243164 s -DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=239583653720130785327050135800804767963, time:1750766866.6853502s req_ids:[8] -DEBUG 06-24 20:07:46 [manager.py:391] -ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:217.8034782409668ms total_cost_time:217.82255172729492ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6489 prompt_cache_len:5151 prompt_cache_ratio:0.7938049006010172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 -DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:46 [manager.py:224] router recive req id 8 cost time 0.1036078929901123 s -INFO 06-24 20:07:46 [manager.py:68] detokenization recv req id 8 cost time 0.10441040992736816 s -DEBUG 06-24 20:07:46 [manager.py:391] Prefill Batch: batch_id=198961092386080139032010520120304111370, time:1750766866.903222s req_ids:[8] -DEBUG 06-24 20:07:46 [manager.py:391] -ERROR 06-24 20:07:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:161.41867637634277ms total_cost_time:161.4377498626709ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6490 prompt_cache_len:5151 prompt_cache_ratio:0.7936825885978428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 -DEBUG 06-24 20:07:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10264968872070312 s -INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.10354804992675781 s -DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=239179229814974888201377999842684913324, time:1750766867.0657022s req_ids:[8] -DEBUG 06-24 20:07:47 [manager.py:391] -ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:46 lightllm_req_id:8 first_token_cost:187.73150444030762ms total_cost_time:187.75033950805664ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6491 prompt_cache_len:5151 prompt_cache_ratio:0.7935603142813126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 -DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10272336006164551 s -INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.1035912036895752 s -DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=103078364069653881977574875583836388135, time:1750766867.2584398s req_ids:[8] -DEBUG 06-24 20:07:47 [manager.py:391] -ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:438.89904022216797ms total_cost_time:438.9188289642334ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6492 prompt_cache_len:5151 prompt_cache_ratio:0.7934380776340111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 -DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10373973846435547 s -INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.10455632209777832 s -DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=109159410370116480327486610713283133136, time:1750766867.6966515s req_ids:[8] -DEBUG 06-24 20:07:47 [manager.py:391] -ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:190.22774696350098ms total_cost_time:190.2477741241455ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6493 prompt_cache_len:5151 prompt_cache_ratio:0.7933158786385338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 -DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:47 [manager.py:224] router recive req id 8 cost time 0.10334134101867676 s -INFO 06-24 20:07:47 [manager.py:68] detokenization recv req id 8 cost time 0.10419917106628418 s -DEBUG 06-24 20:07:47 [manager.py:391] Prefill Batch: batch_id=140166208106447383022192479348029819820, time:1750766867.8912528s req_ids:[8] -DEBUG 06-24 20:07:47 [manager.py:391] -ERROR 06-24 20:07:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:191.88570976257324ms total_cost_time:191.90478324890137ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6494 prompt_cache_len:5151 prompt_cache_ratio:0.7931937172774869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 -DEBUG 06-24 20:07:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10387253761291504 s -INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10473036766052246 s -DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=132906829058250632681023425364877577196, time:1750766868.0871184s req_ids:[8] -DEBUG 06-24 20:07:48 [manager.py:391] -ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:47 lightllm_req_id:8 first_token_cost:206.07376098632812ms total_cost_time:206.09450340270996ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6495 prompt_cache_len:5151 prompt_cache_ratio:0.7930715935334873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 -DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10361838340759277 s -INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10440587997436523 s -DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=326812407101579893880070637079805300251, time:1750766868.2971044s req_ids:[8] -DEBUG 06-24 20:07:48 [manager.py:391] -ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:185.5919361114502ms total_cost_time:185.61148643493652ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6496 prompt_cache_len:5151 prompt_cache_ratio:0.7929495073891626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 -DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10248780250549316 s -INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10331225395202637 s -DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=270063229367083878588308833398558635598, time:1750766868.494132s req_ids:[8] -DEBUG 06-24 20:07:48 [manager.py:391] -ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:193.8483715057373ms total_cost_time:193.86863708496094ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6497 prompt_cache_len:5151 prompt_cache_ratio:0.792827458827151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 -DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10350632667541504 s -INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.10425710678100586 s -DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=87368749600254114626615310403483453831, time:1750766868.6902633s req_ids:[8] -DEBUG 06-24 20:07:48 [manager.py:391] -ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:209.93399620056152ms total_cost_time:209.95378494262695ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6498 prompt_cache_len:5151 prompt_cache_ratio:0.7927054478301016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 -DEBUG 06-24 20:07:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:48 [manager.py:224] router recive req id 8 cost time 0.10268306732177734 s -INFO 06-24 20:07:48 [manager.py:68] detokenization recv req id 8 cost time 0.1034245491027832 s -DEBUG 06-24 20:07:48 [manager.py:391] Prefill Batch: batch_id=331275246263277867899907139286774510423, time:1750766868.8917224s req_ids:[8] -DEBUG 06-24 20:07:48 [manager.py:391] -ERROR 06-24 20:07:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:207.2732448577881ms total_cost_time:207.29398727416992ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6499 prompt_cache_len:5151 prompt_cache_ratio:0.792583474380674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 -DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.10332179069519043 s -INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10424184799194336 s -DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=147686522597013606763777386451352467469, time:1750766869.1008904s req_ids:[8] -DEBUG 06-24 20:07:49 [manager.py:391] -ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:48 lightllm_req_id:8 first_token_cost:174.53360557556152ms total_cost_time:174.55410957336426ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6500 prompt_cache_len:5151 prompt_cache_ratio:0.7924615384615384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 -DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.10369658470153809 s -INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10461044311523438 s -DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=197541847940923658611654503838558822155, time:1750766869.278513s req_ids:[8] -DEBUG 06-24 20:07:49 [manager.py:391] -ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:175.35758018493652ms total_cost_time:175.37760734558105ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6501 prompt_cache_len:5151 prompt_cache_ratio:0.792339640055376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 -DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.10256409645080566 s -INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10338973999023438 s -DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=132702706947485900854455702773939660389, time:1750766869.4572473s req_ids:[8] -DEBUG 06-24 20:07:49 [manager.py:391] -ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:433.2864284515381ms total_cost_time:433.31122398376465ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:6502 prompt_cache_len:5151 prompt_cache_ratio:0.7922177791448785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 -DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:49 [manager.py:224] router recive req id 8 cost time 0.1038060188293457 s -INFO 06-24 20:07:49 [manager.py:68] detokenization recv req id 8 cost time 0.10461235046386719 s -DEBUG 06-24 20:07:49 [manager.py:391] Prefill Batch: batch_id=171165086247102797236517380674753479330, time:1750766869.8937516s req_ids:[8] -DEBUG 06-24 20:07:49 [manager.py:391] -ERROR 06-24 20:07:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:177.9007911682129ms total_cost_time:177.92105674743652ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6503 prompt_cache_len:5151 prompt_cache_ratio:0.7920959557127479 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 -DEBUG 06-24 20:07:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.1025228500366211 s -INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10327839851379395 s -DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=196327177555609758556694261281867455577, time:1750766870.0731492s req_ids:[8] -DEBUG 06-24 20:07:50 [manager.py:391] -ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:49 lightllm_req_id:8 first_token_cost:174.6354103088379ms total_cost_time:174.6542453765869ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6504 prompt_cache_len:5151 prompt_cache_ratio:0.7919741697416974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 -DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10346078872680664 s -INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10420608520507812 s -DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=207416091501988028743947181853378527167, time:1750766870.2453249s req_ids:[8] -DEBUG 06-24 20:07:50 [manager.py:391] -ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:170.43542861938477ms total_cost_time:170.4549789428711ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6505 prompt_cache_len:5151 prompt_cache_ratio:0.7918524212144504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 -DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10283708572387695 s -INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.1035912036895752 s -DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=323945026505037003124992966061812271389, time:1750766870.4202967s req_ids:[8] -DEBUG 06-24 20:07:50 [manager.py:391] -ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:173.45452308654785ms total_cost_time:173.475980758667ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6506 prompt_cache_len:5151 prompt_cache_ratio:0.7917307101137412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 -DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10260987281799316 s -INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10342669486999512 s -DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=113942899061612323935628313099834531792, time:1750766870.5894356s req_ids:[8] -DEBUG 06-24 20:07:50 [manager.py:391] -ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:166.90754890441895ms total_cost_time:166.9290065765381ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6507 prompt_cache_len:5151 prompt_cache_ratio:0.7916090364223144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 -DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10246610641479492 s -INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10323548316955566 s -DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=274947130823457823363986664492025835745, time:1750766870.7652779s req_ids:[8] -DEBUG 06-24 20:07:50 [manager.py:391] -ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:176.1176586151123ms total_cost_time:176.13720893859863ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6508 prompt_cache_len:5151 prompt_cache_ratio:0.7914874001229256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 -DEBUG 06-24 20:07:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:50 [manager.py:224] router recive req id 8 cost time 0.10254955291748047 s -INFO 06-24 20:07:50 [manager.py:68] detokenization recv req id 8 cost time 0.10338950157165527 s -DEBUG 06-24 20:07:50 [manager.py:391] Prefill Batch: batch_id=130058276645781014263339357133166153295, time:1750766870.9402163s req_ids:[8] -DEBUG 06-24 20:07:50 [manager.py:391] -DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:173.39444160461426ms total_cost_time:173.4147071838379ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6509 prompt_cache_len:5151 prompt_cache_ratio:0.7913658011983408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 -DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10358500480651855 s -INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10450530052185059 s -DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=5812738722704364118282413355817042452, time:1750766871.1172047s req_ids:[8] -DEBUG 06-24 20:07:51 [manager.py:391] -ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:50 lightllm_req_id:8 first_token_cost:175.1117706298828ms total_cost_time:175.13084411621094ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6510 prompt_cache_len:5151 prompt_cache_ratio:0.7912442396313364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 -DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10371518135070801 s -INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10462570190429688 s -DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=247592817930205042458930687386827020192, time:1750766871.2958677s req_ids:[8] -DEBUG 06-24 20:07:51 [manager.py:391] -ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:175.83870887756348ms total_cost_time:175.8594512939453ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6511 prompt_cache_len:5151 prompt_cache_ratio:0.7911227154046997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 -DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10241174697875977 s -INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10323238372802734 s -DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=304053334779035361387276040703562775692, time:1750766871.4723759s req_ids:[8] -DEBUG 06-24 20:07:51 [manager.py:391] -ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:174.79944229125977ms total_cost_time:174.8180389404297ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:6512 prompt_cache_len:5151 prompt_cache_ratio:0.7910012285012284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 -DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:51 [manager.py:224] router recive req id 8 cost time 0.10261321067810059 s -INFO 06-24 20:07:51 [manager.py:68] detokenization recv req id 8 cost time 0.10345220565795898 s -DEBUG 06-24 20:07:51 [manager.py:391] Prefill Batch: batch_id=98817249459650791093243683381362720667, time:1750766871.6492512s req_ids:[8] -DEBUG 06-24 20:07:51 [manager.py:391] -ERROR 06-24 20:07:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:174.96109008789062ms total_cost_time:174.98064041137695ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6513 prompt_cache_len:5151 prompt_cache_ratio:0.790879778903731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 -DEBUG 06-24 20:07:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.3042147159576416 s -INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.3050215244293213 s -DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=47834146034141635528381887216696462926, time:1750766872.0170374s req_ids:[8] -DEBUG 06-24 20:07:52 [manager.py:391] -ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:51 lightllm_req_id:8 first_token_cost:365.4131889343262ms total_cost_time:365.433931350708ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6514 prompt_cache_len:5151 prompt_cache_ratio:0.7907583665950261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 -DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10378766059875488 s -INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10460710525512695 s -DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=21575093312374063629381745541516707452, time:1750766872.192353s req_ids:[8] -DEBUG 06-24 20:07:52 [manager.py:391] -ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:174.21817779541016ms total_cost_time:174.23772811889648ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6515 prompt_cache_len:5151 prompt_cache_ratio:0.7906369915579432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 -DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10374283790588379 s -INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10457229614257812 s -DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=295222997128615938268548297679684524458, time:1750766872.371079s req_ids:[8] -DEBUG 06-24 20:07:52 [manager.py:391] -ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:175.92453956604004ms total_cost_time:175.94432830810547ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6516 prompt_cache_len:5151 prompt_cache_ratio:0.7905156537753223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 -DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10378265380859375 s -INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10456061363220215 s -DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=65618126885616188691528652477715923791, time:1750766872.5473342s req_ids:[8] -DEBUG 06-24 20:07:52 [manager.py:391] -ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:174.70502853393555ms total_cost_time:174.72434043884277ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6517 prompt_cache_len:5151 prompt_cache_ratio:0.7903943532300138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 -DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10382294654846191 s -INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10475444793701172 s -DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=252896642010716564800578357476524136997, time:1750766872.7297418s req_ids:[8] -DEBUG 06-24 20:07:52 [manager.py:391] -ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:179.66580390930176ms total_cost_time:179.68416213989258ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6518 prompt_cache_len:5151 prompt_cache_ratio:0.7902730899048788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 -DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:52 [manager.py:224] router recive req id 8 cost time 0.10277605056762695 s -INFO 06-24 20:07:52 [manager.py:68] detokenization recv req id 8 cost time 0.10357379913330078 s -DEBUG 06-24 20:07:52 [manager.py:391] Prefill Batch: batch_id=28642480268450966155805030762440984506, time:1750766872.9061286s req_ids:[8] -DEBUG 06-24 20:07:52 [manager.py:391] -ERROR 06-24 20:07:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:175.29749870300293ms total_cost_time:175.31728744506836ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6519 prompt_cache_len:5151 prompt_cache_ratio:0.7901518637827888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 -DEBUG 06-24 20:07:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10249972343444824 s -INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10328888893127441 s -DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=304737385422966190443938002033288797537, time:1750766873.084035s req_ids:[8] -DEBUG 06-24 20:07:53 [manager.py:391] -ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:52 lightllm_req_id:8 first_token_cost:175.37569999694824ms total_cost_time:175.39548873901367ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6520 prompt_cache_len:5151 prompt_cache_ratio:0.7900306748466258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 -DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:53 [batch.py:51] router release req id 8 -INFO 06-24 20:07:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:07:53 [statics_utils.py:24] mean first cost: 236.52147444883605 ms -INFO 06-24 20:07:53 [statics_utils.py:24] mean per token cost: 0.11301781988396738 ms -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10278987884521484 s -INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10362768173217773 s -DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=158842327934775242871236920039881476087, time:1750766873.262754s req_ids:[8] -DEBUG 06-24 20:07:53 [manager.py:391] -ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:178.36284637451172ms total_cost_time:178.38263511657715ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6521 prompt_cache_len:5151 prompt_cache_ratio:0.7899095230792823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 -DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10351037979125977 s -INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.1043095588684082 s -DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=10357838863325675974813224689161317079, time:1750766873.440043s req_ids:[8] -DEBUG 06-24 20:07:53 [manager.py:391] -ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:173.9809513092041ms total_cost_time:174.00097846984863ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6522 prompt_cache_len:5151 prompt_cache_ratio:0.7897884084636615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 -DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10332727432250977 s -INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10410523414611816 s -DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=289848153505505827798820199662533696319, time:1750766873.6173978s req_ids:[8] -DEBUG 06-24 20:07:53 [manager.py:391] -ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:177.02484130859375ms total_cost_time:177.04439163208008ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6523 prompt_cache_len:5151 prompt_cache_ratio:0.7896673309826767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 -DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.1026606559753418 s -INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10347604751586914 s -DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=125892725516290441340947676297137038584, time:1750766873.7966766s req_ids:[8] -DEBUG 06-24 20:07:53 [manager.py:391] -ERROR 06-24 20:07:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:177.98089981079102ms total_cost_time:178.00235748291016ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6524 prompt_cache_len:5151 prompt_cache_ratio:0.789546290619252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 -DEBUG 06-24 20:07:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:53 [manager.py:224] router recive req id 8 cost time 0.10268115997314453 s -INFO 06-24 20:07:53 [manager.py:68] detokenization recv req id 8 cost time 0.10345125198364258 s -DEBUG 06-24 20:07:53 [manager.py:391] Prefill Batch: batch_id=254529937322885649592467878415780614853, time:1750766873.9737563s req_ids:[8] -DEBUG 06-24 20:07:53 [manager.py:391] -ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:53 lightllm_req_id:8 first_token_cost:174.01123046875ms total_cost_time:174.03101921081543ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6525 prompt_cache_len:5151 prompt_cache_ratio:0.7894252873563219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 -DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10265874862670898 s -INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10345745086669922 s -DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=51015691918098510435944764786793492115, time:1750766874.1501138s req_ids:[8] -DEBUG 06-24 20:07:54 [manager.py:391] -ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:426.3758659362793ms total_cost_time:426.39780044555664ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6526 prompt_cache_len:5151 prompt_cache_ratio:0.7893043211768311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 -DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10339879989624023 s -INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10421299934387207 s -DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=141315925142610384200663510540177604937, time:1750766874.573598s req_ids:[8] -DEBUG 06-24 20:07:54 [manager.py:391] -ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:175.7214069366455ms total_cost_time:175.73952674865723ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6527 prompt_cache_len:5151 prompt_cache_ratio:0.7891833920637352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 -DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10258793830871582 s -INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10336422920227051 s -DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=100355875104822616395829803571393076726, time:1750766874.7503808s req_ids:[8] -DEBUG 06-24 20:07:54 [manager.py:391] -DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:155.55596351623535ms total_cost_time:155.57551383972168ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6528 prompt_cache_len:5151 prompt_cache_ratio:0.7890625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 -DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:54 [manager.py:224] router recive req id 8 cost time 0.10266566276550293 s -INFO 06-24 20:07:54 [manager.py:68] detokenization recv req id 8 cost time 0.10358262062072754 s -DEBUG 06-24 20:07:54 [manager.py:391] Prefill Batch: batch_id=285742318986483779281097104917762138299, time:1750766874.9060862s req_ids:[8] -DEBUG 06-24 20:07:54 [manager.py:391] -ERROR 06-24 20:07:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:164.43634033203125ms total_cost_time:164.46232795715332ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:6529 prompt_cache_len:5151 prompt_cache_ratio:0.7889416449686016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 -DEBUG 06-24 20:07:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10251736640930176 s -INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10344433784484863 s -DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=43538634372651627202407946678715001201, time:1750766875.078549s req_ids:[8] -DEBUG 06-24 20:07:55 [manager.py:391] -ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:54 lightllm_req_id:8 first_token_cost:170.0155735015869ms total_cost_time:170.03583908081055ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6530 prompt_cache_len:5151 prompt_cache_ratio:0.7888208269525268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 -DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10277628898620605 s -INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10362124443054199 s -DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=271618805711854084481240478047996891232, time:1750766875.2505336s req_ids:[8] -DEBUG 06-24 20:07:55 [manager.py:391] -ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:169.84820365905762ms total_cost_time:169.86894607543945ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6531 prompt_cache_len:5151 prompt_cache_ratio:0.7887000459347726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 -DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10404205322265625 s -INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10500597953796387 s -DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=231315357401635360308444963447657535406, time:1750766875.4223404s req_ids:[8] -DEBUG 06-24 20:07:55 [manager.py:391] -ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:170.74847221374512ms total_cost_time:170.76849937438965ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6532 prompt_cache_len:5151 prompt_cache_ratio:0.7885793018983466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 -DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10335278511047363 s -INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10417366027832031 s -DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=147128586873853265288275953916815218265, time:1750766875.5946658s req_ids:[8] -DEBUG 06-24 20:07:55 [manager.py:391] -ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:170.28355598449707ms total_cost_time:170.3052520751953ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6533 prompt_cache_len:5151 prompt_cache_ratio:0.7884585948262667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 -DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.10343480110168457 s -INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10420489311218262 s -DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=284428589149872889558063286235950643025, time:1750766875.7661471s req_ids:[8] -DEBUG 06-24 20:07:55 [manager.py:391] -ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:168.17426681518555ms total_cost_time:168.19357872009277ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6534 prompt_cache_len:5151 prompt_cache_ratio:0.788337924701561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 -DEBUG 06-24 20:07:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:55 [manager.py:224] router recive req id 8 cost time 0.1026608943939209 s -INFO 06-24 20:07:55 [manager.py:68] detokenization recv req id 8 cost time 0.10346555709838867 s -DEBUG 06-24 20:07:55 [manager.py:391] Prefill Batch: batch_id=63933040122762846626464688629048596383, time:1750766875.9372768s req_ids:[8] -DEBUG 06-24 20:07:55 [manager.py:391] -ERROR 06-24 20:07:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:169.93021965026855ms total_cost_time:169.94953155517578ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6535 prompt_cache_len:5151 prompt_cache_ratio:0.7882172915072686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 -DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10241246223449707 s -INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10318970680236816 s -DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=138547024478328743551876514470640210897, time:1750766876.110058s req_ids:[8] -DEBUG 06-24 20:07:56 [manager.py:391] -DEBUG 06-24 20:07:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 32916.094 tokens/s -DEBUG 06-24 20:07:56 [stats.py:37] Avg prompt tokens throughput: 32906.085 tokens/s -DEBUG 06-24 20:07:56 [stats.py:37] Avg generate tokens throughput: 10.009 tokens/s -ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:55 lightllm_req_id:8 first_token_cost:170.49741744995117ms total_cost_time:170.5174446105957ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6536 prompt_cache_len:5151 prompt_cache_ratio:0.7880966952264382 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 -DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10358166694641113 s -INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10453343391418457 s -DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=159648989565732161263113577589633625872, time:1750766876.2815194s req_ids:[8] -DEBUG 06-24 20:07:56 [manager.py:391] -ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:169.93021965026855ms total_cost_time:169.94857788085938ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:6537 prompt_cache_len:5151 prompt_cache_ratio:0.7879761358421294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 -DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10334897041320801 s -INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10424160957336426 s -DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=289769732960280758261643545077930612867, time:1750766876.4532168s req_ids:[8] -DEBUG 06-24 20:07:56 [manager.py:391] -ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:169.04211044311523ms total_cost_time:169.06023025512695ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:6538 prompt_cache_len:5151 prompt_cache_ratio:0.787855613337412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 -DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:56 [manager.py:224] router recive req id 8 cost time 0.10336875915527344 s -INFO 06-24 20:07:56 [manager.py:68] detokenization recv req id 8 cost time 0.10414791107177734 s -DEBUG 06-24 20:07:56 [manager.py:391] Prefill Batch: batch_id=211647016632928102464812513062562131563, time:1750766876.62437s req_ids:[8] -DEBUG 06-24 20:07:56 [manager.py:391] -ERROR 06-24 20:07:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:400.82526206970215ms total_cost_time:400.8443355560303ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6539 prompt_cache_len:5151 prompt_cache_ratio:0.7877351276953662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 -DEBUG 06-24 20:07:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10285353660583496 s -INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10371613502502441 s -DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=288441894007253336103581840206777446839, time:1750766877.0275624s req_ids:[8] -DEBUG 06-24 20:07:57 [manager.py:391] -ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:56 lightllm_req_id:8 first_token_cost:172.44482040405273ms total_cost_time:172.46675491333008ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6540 prompt_cache_len:5151 prompt_cache_ratio:0.7876146788990825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 -DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:57 [batch.py:51] router release req id 8 -INFO 06-24 20:07:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10265493392944336 s -INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10341644287109375 s -DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=194596496276751477478281780720891319031, time:1750766877.202853s req_ids:[8] -DEBUG 06-24 20:07:57 [manager.py:391] -ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:171.68617248535156ms total_cost_time:171.7054843902588ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6541 prompt_cache_len:5151 prompt_cache_ratio:0.7874942669316618 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 -DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10521960258483887 s -INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10711956024169922 s -DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=164828262874223036048735912254273320823, time:1750766877.3745232s req_ids:[8] -DEBUG 06-24 20:07:57 [manager.py:391] -ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:184.9040985107422ms total_cost_time:184.92412567138672ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6542 prompt_cache_len:5151 prompt_cache_ratio:0.7873738917762152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 -DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.104736328125 s -INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10657811164855957 s -DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=137674331790217986982697619566593818697, time:1750766877.561502s req_ids:[8] -DEBUG 06-24 20:07:57 [manager.py:391] -ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:191.82157516479492ms total_cost_time:191.8647289276123ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6543 prompt_cache_len:5151 prompt_cache_ratio:0.7872535534158643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 -DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.10742020606994629 s -INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.10930013656616211 s -DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=293662292481314682657824029098564819856, time:1750766877.7555928s req_ids:[8] -DEBUG 06-24 20:07:57 [manager.py:391] -ERROR 06-24 20:07:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:194.95129585266113ms total_cost_time:194.99611854553223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6544 prompt_cache_len:5151 prompt_cache_ratio:0.7871332518337408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 -DEBUG 06-24 20:07:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:57 [manager.py:224] router recive req id 8 cost time 0.107940673828125 s -INFO 06-24 20:07:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098635196685791 s -DEBUG 06-24 20:07:57 [manager.py:391] Prefill Batch: batch_id=114934815720830893613429674677034081803, time:1750766877.958956s req_ids:[8] -DEBUG 06-24 20:07:57 [manager.py:391] -ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:57 lightllm_req_id:8 first_token_cost:204.4389247894287ms total_cost_time:204.4832706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6545 prompt_cache_len:5151 prompt_cache_ratio:0.787012987012987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 -DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.1081538200378418 s -INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.11033940315246582 s -DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=158581515700955884791283099870840440588, time:1750766878.1624103s req_ids:[8] -DEBUG 06-24 20:07:58 [manager.py:391] -ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:203.92942428588867ms total_cost_time:203.97210121154785ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6546 prompt_cache_len:5151 prompt_cache_ratio:0.7868927589367553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 -DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.1072695255279541 s -INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.10922908782958984 s -DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=151508599920298688002298122847873345598, time:1750766878.377107s req_ids:[8] -DEBUG 06-24 20:07:58 [manager.py:391] -ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:211.0598087310791ms total_cost_time:211.11059188842773ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:6547 prompt_cache_len:5151 prompt_cache_ratio:0.7867725675882083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 -DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.10934662818908691 s -INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.11128926277160645 s -DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=166146776672678505851302311666809791106, time:1750766878.5869389s req_ids:[8] -DEBUG 06-24 20:07:58 [manager.py:391] -ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:203.4778594970703ms total_cost_time:203.521728515625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6548 prompt_cache_len:5151 prompt_cache_ratio:0.7866524129505192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 -DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:58 [manager.py:224] router recive req id 8 cost time 0.10992240905761719 s -INFO 06-24 20:07:58 [manager.py:68] detokenization recv req id 8 cost time 0.11178469657897949 s -DEBUG 06-24 20:07:58 [manager.py:391] Prefill Batch: batch_id=134472441908852779603807620094580836635, time:1750766878.803043s req_ids:[8] -DEBUG 06-24 20:07:58 [manager.py:391] -ERROR 06-24 20:07:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:217.35215187072754ms total_cost_time:217.40388870239258ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:6549 prompt_cache_len:5151 prompt_cache_ratio:0.7865322950068713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 -DEBUG 06-24 20:07:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.10872578620910645 s -INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s -DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=302152688899111280828711351932827939935, time:1750766879.0242462s req_ids:[8] -DEBUG 06-24 20:07:59 [manager.py:391] -ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:58 lightllm_req_id:8 first_token_cost:205.2772045135498ms total_cost_time:205.3220272064209ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6550 prompt_cache_len:5151 prompt_cache_ratio:0.786412213740458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 -DEBUG 06-24 20:07:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.10761308670043945 s -INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.10943222045898438 s -DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=157115331287803616465708494985069641858, time:1750766879.2332423s req_ids:[8] -DEBUG 06-24 20:07:59 [manager.py:391] -ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:206.0844898223877ms total_cost_time:206.11119270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:6551 prompt_cache_len:5151 prompt_cache_ratio:0.7862921691344833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 -DEBUG 06-24 20:07:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.1066441535949707 s -INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.1088716983795166 s -DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=284172145492665634682701765775469285327, time:1750766879.4446352s req_ids:[8] -DEBUG 06-24 20:07:59 [manager.py:391] -ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:217.85783767700195ms total_cost_time:217.90146827697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6552 prompt_cache_len:5151 prompt_cache_ratio:0.7861721611721612 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 -DEBUG 06-24 20:07:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:07:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:07:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:07:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:07:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:07:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:07:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:07:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:07:59 [manager.py:224] router recive req id 8 cost time 0.10570859909057617 s -INFO 06-24 20:07:59 [manager.py:68] detokenization recv req id 8 cost time 0.10671186447143555 s -DEBUG 06-24 20:07:59 [manager.py:391] Prefill Batch: batch_id=90993685465906767626893706212207034830, time:1750766879.6824048s req_ids:[8] -DEBUG 06-24 20:07:59 [manager.py:391] -DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:07:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:07:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:07:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:07:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:433.7937831878662ms total_cost_time:433.81333351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6553 prompt_cache_len:5151 prompt_cache_ratio:0.786052189836716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:07:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 -DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10309886932373047 s -INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10398721694946289 s -DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=149886257626241376539418311650901892603, time:1750766880.1080983s req_ids:[8] -DEBUG 06-24 20:08:00 [manager.py:391] -ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:07:59 lightllm_req_id:8 first_token_cost:212.97597885131836ms total_cost_time:212.9983901977539ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6554 prompt_cache_len:5151 prompt_cache_ratio:0.7859322551113823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 -DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10407137870788574 s -INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10485458374023438 s -DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=256024165726407114040412341183948101373, time:1750766880.3303475s req_ids:[8] -DEBUG 06-24 20:08:00 [manager.py:391] -ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:219.4368839263916ms total_cost_time:219.45929527282715ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6555 prompt_cache_len:5151 prompt_cache_ratio:0.785812356979405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 -DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.1039581298828125 s -INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10474157333374023 s -DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=300762525322920996386191485192252530492, time:1750766880.5457523s req_ids:[8] -DEBUG 06-24 20:08:00 [manager.py:391] -ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:217.1928882598877ms total_cost_time:217.21434593200684ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6556 prompt_cache_len:5151 prompt_cache_ratio:0.7856924954240391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 -DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10271978378295898 s -INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10364174842834473 s -DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=227318149890494559415593650287784392178, time:1750766880.7664857s req_ids:[8] -DEBUG 06-24 20:08:00 [manager.py:391] -ERROR 06-24 20:08:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:214.768648147583ms total_cost_time:214.80846405029297ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:6557 prompt_cache_len:5151 prompt_cache_ratio:0.7855726704285496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 -DEBUG 06-24 20:08:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:00 [manager.py:224] router recive req id 8 cost time 0.10338187217712402 s -INFO 06-24 20:08:00 [manager.py:68] detokenization recv req id 8 cost time 0.10436272621154785 s -DEBUG 06-24 20:08:00 [manager.py:391] Prefill Batch: batch_id=244868837653894440989355634197952921510, time:1750766880.9743795s req_ids:[8] -DEBUG 06-24 20:08:00 [manager.py:391] -ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:00 lightllm_req_id:8 first_token_cost:181.32376670837402ms total_cost_time:181.3504695892334ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:6558 prompt_cache_len:5151 prompt_cache_ratio:0.7854528819762122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 -DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10641312599182129 s -INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10723352432250977 s -DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=32215411473256686390517951858213228889, time:1750766881.1715913s req_ids:[8] -DEBUG 06-24 20:08:01 [manager.py:391] -ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:195.09363174438477ms total_cost_time:195.1162815093994ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6559 prompt_cache_len:5151 prompt_cache_ratio:0.7853331300503126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 -DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10326266288757324 s -INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10404109954833984 s -DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=103309180369724906960659546231745570996, time:1750766881.3687525s req_ids:[8] -DEBUG 06-24 20:08:01 [manager.py:391] -ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:195.32155990600586ms total_cost_time:195.3451633453369ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6560 prompt_cache_len:5151 prompt_cache_ratio:0.7852134146341463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 -DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10395693778991699 s -INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10489392280578613 s -DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=93991114498658739753755441224830960712, time:1750766881.5680447s req_ids:[8] -DEBUG 06-24 20:08:01 [manager.py:391] -ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:197.13950157165527ms total_cost_time:197.16644287109375ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6561 prompt_cache_len:5151 prompt_cache_ratio:0.7850937357110197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 -DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10436201095581055 s -INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10525846481323242 s -DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=269220682656883608215008651585342217569, time:1750766881.766165s req_ids:[8] -DEBUG 06-24 20:08:01 [manager.py:391] -ERROR 06-24 20:08:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:196.3801383972168ms total_cost_time:196.40064239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6562 prompt_cache_len:5151 prompt_cache_ratio:0.7849740932642487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 -DEBUG 06-24 20:08:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:01 [manager.py:224] router recive req id 8 cost time 0.10332465171813965 s -INFO 06-24 20:08:01 [manager.py:68] detokenization recv req id 8 cost time 0.10425758361816406 s -DEBUG 06-24 20:08:01 [manager.py:391] Prefill Batch: batch_id=27057745429025615999948500940191507240, time:1750766881.9659078s req_ids:[8] -DEBUG 06-24 20:08:01 [manager.py:391] -ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:01 lightllm_req_id:8 first_token_cost:196.31361961364746ms total_cost_time:196.3369846343994ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6563 prompt_cache_len:5151 prompt_cache_ratio:0.7848544872771598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 -DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.10347414016723633 s -INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.1043548583984375 s -DEBUG 06-24 20:08:02 [manager.py:391] Prefill Batch: batch_id=296527693582730257078243109686320094362, time:1750766882.162575s req_ids:[8] -DEBUG 06-24 20:08:02 [manager.py:391] -ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:195.57642936706543ms total_cost_time:195.59955596923828ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6564 prompt_cache_len:5151 prompt_cache_ratio:0.7847349177330896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 -DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.10339570045471191 s -INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.1040952205657959 s -DEBUG 06-24 20:08:02 [manager.py:391] Prefill Batch: batch_id=33483788748351140146589963042480571007, time:1750766882.3294718s req_ids:[8] -DEBUG 06-24 20:08:02 [manager.py:391] -ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:447.11947441101074ms total_cost_time:447.1430778503418ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:6565 prompt_cache_len:5151 prompt_cache_ratio:0.7846153846153846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 -DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.10449981689453125 s -INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.10548734664916992 s -DEBUG 06-24 20:08:02 [manager.py:391] Prefill Batch: batch_id=304989367871509899525266204744141931004, time:1750766882.813156s req_ids:[8] -DEBUG 06-24 20:08:02 [manager.py:391] -ERROR 06-24 20:08:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:197.88312911987305ms total_cost_time:197.9050636291504ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6566 prompt_cache_len:5151 prompt_cache_ratio:0.7844958879074018 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 -DEBUG 06-24 20:08:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:02 [manager.py:224] router recive req id 8 cost time 0.1044623851776123 s -INFO 06-24 20:08:02 [manager.py:68] detokenization recv req id 8 cost time 0.10537910461425781 s -DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=115118470218929170677551198842567753359, time:1750766883.0132272s req_ids:[8] -DEBUG 06-24 20:08:03 [manager.py:391] -ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:02 lightllm_req_id:8 first_token_cost:200.0129222869873ms total_cost_time:200.03437995910645ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6567 prompt_cache_len:5151 prompt_cache_ratio:0.784376427592508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 -DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.10448288917541504 s -INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10543084144592285 s -DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=230181831405509754905413801276211858985, time:1750766883.2155313s req_ids:[8] -DEBUG 06-24 20:08:03 [manager.py:391] -ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:198.44794273376465ms total_cost_time:198.4696388244629ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6568 prompt_cache_len:5151 prompt_cache_ratio:0.7842570036540804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 -DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.1034080982208252 s -DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=33134982648399779774370338875152717013, time:1750766883.3820357s req_ids:[8] -DEBUG 06-24 20:08:03 [manager.py:391] -INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10437512397766113 s -ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:164.88099098205566ms total_cost_time:164.90530967712402ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6569 prompt_cache_len:5151 prompt_cache_ratio:0.7841376160755061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 -DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.1043388843536377 s -INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10526895523071289 s -DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=167901434297497913687850021988397450348, time:1750766883.5831628s req_ids:[8] -DEBUG 06-24 20:08:03 [manager.py:391] -ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:198.5776424407959ms total_cost_time:198.60005378723145ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6570 prompt_cache_len:5151 prompt_cache_ratio:0.7840182648401827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 -DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.10458755493164062 s -INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10553836822509766 s -DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=94136648362729192975334448357501526764, time:1750766883.7845218s req_ids:[8] -DEBUG 06-24 20:08:03 [manager.py:391] -ERROR 06-24 20:08:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:200.33025741577148ms total_cost_time:200.35243034362793ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:6571 prompt_cache_len:5151 prompt_cache_ratio:0.7838989499315173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 -DEBUG 06-24 20:08:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:03 [manager.py:224] router recive req id 8 cost time 0.10350799560546875 s -INFO 06-24 20:08:03 [manager.py:68] detokenization recv req id 8 cost time 0.10448670387268066 s -DEBUG 06-24 20:08:03 [manager.py:391] Prefill Batch: batch_id=289307394101204197849394223303276118495, time:1750766883.9560091s req_ids:[8] -DEBUG 06-24 20:08:03 [manager.py:391] -ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:03 lightllm_req_id:8 first_token_cost:167.5553321838379ms total_cost_time:167.586088180542ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:6572 prompt_cache_len:5151 prompt_cache_ratio:0.7837796713329276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 -DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.1033778190612793 s -INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10430407524108887 s -DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=201323526094499140131704555544006882246, time:1750766884.1225228s req_ids:[8] -DEBUG 06-24 20:08:04 [manager.py:391] -ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:165.7862663269043ms total_cost_time:165.80939292907715ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6573 prompt_cache_len:5151 prompt_cache_ratio:0.7836604290278412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 -DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.1036076545715332 s -DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=98671405032938867657604737324888287387, time:1750766884.2903838s req_ids:[8] -DEBUG 06-24 20:08:04 [manager.py:391] -INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10445094108581543 s -ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:164.2436981201172ms total_cost_time:164.26610946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6574 prompt_cache_len:5151 prompt_cache_ratio:0.7835412229996958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 -DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.10348010063171387 s -INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10436701774597168 s -DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=119329707580809012539896616823420033557, time:1750766884.4906654s req_ids:[8] -DEBUG 06-24 20:08:04 [manager.py:391] -ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:198.41957092285156ms total_cost_time:198.4424591064453ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6575 prompt_cache_len:5151 prompt_cache_ratio:0.7834220532319391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 -DEBUG 06-24 20:08:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:04 [manager.py:224] router recive req id 8 cost time 0.10427093505859375 s -INFO 06-24 20:08:04 [manager.py:68] detokenization recv req id 8 cost time 0.10523080825805664 s -DEBUG 06-24 20:08:04 [manager.py:391] Prefill Batch: batch_id=11017830086969121781467561119364908084, time:1750766884.6910179s req_ids:[8] -DEBUG 06-24 20:08:04 [manager.py:391] -ERROR 06-24 20:08:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:402.57811546325684ms total_cost_time:402.6014804840088ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6576 prompt_cache_len:5151 prompt_cache_ratio:0.7833029197080292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 -DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10341358184814453 s -INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10424661636352539 s -DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=150539983563989196837051547827601227710, time:1750766885.063207s req_ids:[8] -DEBUG 06-24 20:08:05 [manager.py:391] -ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:04 lightllm_req_id:8 first_token_cost:164.44969177246094ms total_cost_time:164.47162628173828ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6577 prompt_cache_len:5151 prompt_cache_ratio:0.7831838224114338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 -DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10350227355957031 s -DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=167570025489540151050218267461662009768, time:1750766885.229989s req_ids:[8] -DEBUG 06-24 20:08:05 [manager.py:391] -INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10447192192077637 s -ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:163.86079788208008ms total_cost_time:163.88320922851562ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6578 prompt_cache_len:5151 prompt_cache_ratio:0.7830647613256309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 -DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10347270965576172 s -INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10427165031433105 s -DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=317389812520166892422619678154494894804, time:1750766885.4291837s req_ids:[8] -DEBUG 06-24 20:08:05 [manager.py:391] -ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:198.08626174926758ms total_cost_time:198.10891151428223ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6579 prompt_cache_len:5151 prompt_cache_ratio:0.7829457364341085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 -DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10291624069213867 s -INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.10373187065124512 s -DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=296462087924179389608161661769965199444, time:1750766885.6216543s req_ids:[8] -DEBUG 06-24 20:08:05 [manager.py:391] -ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:178.99513244628906ms total_cost_time:179.0158748626709ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6580 prompt_cache_len:5151 prompt_cache_ratio:0.7828267477203648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 -DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.1031029224395752 s -INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.1039421558380127 s -DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=60413574130035240158253699094597767795, time:1750766885.8008034s req_ids:[8] -DEBUG 06-24 20:08:05 [manager.py:391] -ERROR 06-24 20:08:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:177.2780418395996ms total_cost_time:177.30069160461426ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6581 prompt_cache_len:5151 prompt_cache_ratio:0.7827077951679076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 -DEBUG 06-24 20:08:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:05 [manager.py:224] router recive req id 8 cost time 0.10329365730285645 s -INFO 06-24 20:08:05 [manager.py:68] detokenization recv req id 8 cost time 0.1040806770324707 s -DEBUG 06-24 20:08:05 [manager.py:391] Prefill Batch: batch_id=66276631188760833674115637294585181631, time:1750766885.9826605s req_ids:[8] -DEBUG 06-24 20:08:05 [manager.py:391] -ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:05 lightllm_req_id:8 first_token_cost:180.9689998626709ms total_cost_time:180.99284172058105ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6582 prompt_cache_len:5151 prompt_cache_ratio:0.7825888787602553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 -DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.10315942764282227 s -INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10401463508605957 s -DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=139215653085053419297325284996226614444, time:1750766886.1635964s req_ids:[8] -DEBUG 06-24 20:08:06 [manager.py:391] -DEBUG 06-24 20:08:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 30677.094 tokens/s -DEBUG 06-24 20:08:06 [stats.py:37] Avg prompt tokens throughput: 30667.744 tokens/s -DEBUG 06-24 20:08:06 [stats.py:37] Avg generate tokens throughput: 9.350 tokens/s -ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:176.93519592285156ms total_cost_time:176.9545078277588ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6583 prompt_cache_len:5151 prompt_cache_ratio:0.7824699984809358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 -DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.10346245765686035 s -INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10430788993835449 s -DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=297115931424411766673074528789680894868, time:1750766886.342778s req_ids:[8] -DEBUG 06-24 20:08:06 [manager.py:391] -ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:178.06482315063477ms total_cost_time:178.08938026428223ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6584 prompt_cache_len:5151 prompt_cache_ratio:0.7823511543134872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 -DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.10317134857177734 s -INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10408687591552734 s -DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=209692352950660815403228627761921158643, time:1750766886.5237727s req_ids:[8] -DEBUG 06-24 20:08:06 [manager.py:391] -ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:177.7477264404297ms total_cost_time:177.77037620544434ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6585 prompt_cache_len:5151 prompt_cache_ratio:0.7822323462414579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 -DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.1035304069519043 s -INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10434651374816895 s -DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=289609579578588072205425068740855085820, time:1750766886.704047s req_ids:[8] -DEBUG 06-24 20:08:06 [manager.py:391] -DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:179.27956581115723ms total_cost_time:179.29983139038086ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6586 prompt_cache_len:5151 prompt_cache_ratio:0.7821135742484057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 -DEBUG 06-24 20:08:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:06 [manager.py:224] router recive req id 8 cost time 0.1035468578338623 s -INFO 06-24 20:08:06 [manager.py:68] detokenization recv req id 8 cost time 0.10448288917541504 s -DEBUG 06-24 20:08:06 [manager.py:391] Prefill Batch: batch_id=293129712444990751025023324333042776492, time:1750766886.8852599s req_ids:[8] -DEBUG 06-24 20:08:06 [manager.py:391] -ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:06 lightllm_req_id:8 first_token_cost:445.36304473876953ms total_cost_time:445.3871250152588ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6587 prompt_cache_len:5151 prompt_cache_ratio:0.7819948383178988 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 -DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.10301995277404785 s -INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.1038365364074707 s -DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=4495600393670880256734311935803767945, time:1750766887.3331206s req_ids:[8] -DEBUG 06-24 20:08:07 [manager.py:391] -ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:179.77285385131836ms total_cost_time:179.793119430542ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6588 prompt_cache_len:5151 prompt_cache_ratio:0.7818761384335154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 -DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.10347890853881836 s -INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.10431361198425293 s -DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=166688360723241871799307283720319327218, time:1750766887.5145986s req_ids:[8] -DEBUG 06-24 20:08:07 [manager.py:391] -ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:180.05776405334473ms total_cost_time:180.07683753967285ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6589 prompt_cache_len:5151 prompt_cache_ratio:0.7817574745788435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 -DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.1026756763458252 s -INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.10354232788085938 s -INFO 06-24 20:08:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=246253059507484621452892384083505044897, time:1750766887.696044s req_ids:[8] -DEBUG 06-24 20:08:07 [manager.py:391] -ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:177.9465675354004ms total_cost_time:177.96611785888672ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6590 prompt_cache_len:5151 prompt_cache_ratio:0.7816388467374811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 -DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:07 [manager.py:224] router recive req id 8 cost time 0.10301518440246582 s -INFO 06-24 20:08:07 [manager.py:68] detokenization recv req id 8 cost time 0.10375452041625977 s -DEBUG 06-24 20:08:07 [manager.py:391] Prefill Batch: batch_id=111977189218298297154917991367338786883, time:1750766887.875277s req_ids:[8] -DEBUG 06-24 20:08:07 [manager.py:391] -ERROR 06-24 20:08:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:177.95920372009277ms total_cost_time:177.9789924621582ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6591 prompt_cache_len:5151 prompt_cache_ratio:0.781520254893036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 -DEBUG 06-24 20:08:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10365486145019531 s -INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10451006889343262 s -DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=255903059030314638768994547491062777866, time:1750766888.0569398s req_ids:[8] -DEBUG 06-24 20:08:08 [manager.py:391] -ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:07 lightllm_req_id:8 first_token_cost:178.3130168914795ms total_cost_time:178.33304405212402ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6592 prompt_cache_len:5151 prompt_cache_ratio:0.7814016990291263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 -DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10294103622436523 s -INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10374617576599121 s -DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=196964873592574270824987986178047185210, time:1750766888.2355106s req_ids:[8] -DEBUG 06-24 20:08:08 [manager.py:391] -ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:177.72698402404785ms total_cost_time:177.7479648590088ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6593 prompt_cache_len:5151 prompt_cache_ratio:0.7812831791293796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 -DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10266518592834473 s -INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10341143608093262 s -DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=241875484116784925527753277529395867868, time:1750766888.4160109s req_ids:[8] -DEBUG 06-24 20:08:08 [manager.py:391] -ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:178.78985404968262ms total_cost_time:178.80964279174805ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6594 prompt_cache_len:5151 prompt_cache_ratio:0.781164695177434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 -DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10264062881469727 s -INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.1034703254699707 s -DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=118916997724574188583551769676313403706, time:1750766888.5966334s req_ids:[8] -DEBUG 06-24 20:08:08 [manager.py:391] -ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:178.06482315063477ms total_cost_time:178.08914184570312ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6595 prompt_cache_len:5151 prompt_cache_ratio:0.781046247156937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 -DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10382437705993652 s -INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10496211051940918 s -DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=258364177596127837824218742533220279316, time:1750766888.7769299s req_ids:[8] -DEBUG 06-24 20:08:08 [manager.py:391] -ERROR 06-24 20:08:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:179.78310585021973ms total_cost_time:179.80504035949707ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6596 prompt_cache_len:5151 prompt_cache_ratio:0.7809278350515464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 -DEBUG 06-24 20:08:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:08 [manager.py:224] router recive req id 8 cost time 0.10369372367858887 s -INFO 06-24 20:08:08 [manager.py:68] detokenization recv req id 8 cost time 0.10471844673156738 s -DEBUG 06-24 20:08:08 [manager.py:391] Prefill Batch: batch_id=151555169824861727898014115286026299808, time:1750766888.9572575s req_ids:[8] -DEBUG 06-24 20:08:08 [manager.py:391] -ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:08 lightllm_req_id:8 first_token_cost:176.50532722473145ms total_cost_time:176.52416229248047ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6597 prompt_cache_len:5151 prompt_cache_ratio:0.7808094588449295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 -DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10276484489440918 s -INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10384726524353027 s -DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=162944470859520650962697595420898744012, time:1750766889.1364498s req_ids:[8] -DEBUG 06-24 20:08:09 [manager.py:391] -ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:177.76846885681152ms total_cost_time:177.78825759887695ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6598 prompt_cache_len:5151 prompt_cache_ratio:0.7806911185207639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 -DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10268974304199219 s -INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10345578193664551 s -DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=74048051138518053639834027369433209462, time:1750766889.3190787s req_ids:[8] -DEBUG 06-24 20:08:09 [manager.py:391] -ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:376.4479160308838ms total_cost_time:376.4667510986328ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:6599 prompt_cache_len:5151 prompt_cache_ratio:0.7805728140627368 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 -DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10375857353210449 s -INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10451221466064453 s -DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=14188385992404870168531881843669727848, time:1750766889.6986046s req_ids:[8] -DEBUG 06-24 20:08:09 [manager.py:391] -ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:181.2765598297119ms total_cost_time:181.29587173461914ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6600 prompt_cache_len:5151 prompt_cache_ratio:0.7804545454545454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 -DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:09 [manager.py:224] router recive req id 8 cost time 0.10363626480102539 s -INFO 06-24 20:08:09 [manager.py:68] detokenization recv req id 8 cost time 0.10441303253173828 s -DEBUG 06-24 20:08:09 [manager.py:391] Prefill Batch: batch_id=67879041528280267818434537724890625298, time:1750766889.8798192s req_ids:[8] -DEBUG 06-24 20:08:09 [manager.py:391] -ERROR 06-24 20:08:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:180.03535270690918ms total_cost_time:180.05776405334473ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6601 prompt_cache_len:5151 prompt_cache_ratio:0.780336312679897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 -DEBUG 06-24 20:08:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10311365127563477 s -INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.1040492057800293 s -DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=131685317018766945159821954781761140121, time:1750766890.0603228s req_ids:[8] -DEBUG 06-24 20:08:10 [manager.py:391] -ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:09 lightllm_req_id:8 first_token_cost:177.68502235412598ms total_cost_time:177.70767211914062ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6602 prompt_cache_len:5151 prompt_cache_ratio:0.7802181157225083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 -DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10274410247802734 s -INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10351276397705078 s -DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=256644791612131770199390578521525171997, time:1750766890.2396007s req_ids:[8] -DEBUG 06-24 20:08:10 [manager.py:391] -ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:177.46424674987793ms total_cost_time:177.48689651489258ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6603 prompt_cache_len:5151 prompt_cache_ratio:0.7800999545661064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 -DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10282659530639648 s -INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10361433029174805 s -DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=216266420918941880875924200015291850098, time:1750766890.4202123s req_ids:[8] -DEBUG 06-24 20:08:10 [manager.py:391] -ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:177.40201950073242ms total_cost_time:177.42204666137695ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6604 prompt_cache_len:5151 prompt_cache_ratio:0.7799818291944276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 -DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10255980491638184 s -INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10336518287658691 s -DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=153538677785110043256186593347632316745, time:1750766890.6002853s req_ids:[8] -DEBUG 06-24 20:08:10 [manager.py:391] -ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:176.58686637878418ms total_cost_time:176.6073703765869ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6605 prompt_cache_len:5151 prompt_cache_ratio:0.7798637395912188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 -DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10318136215209961 s -INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10396027565002441 s -DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=280835678200776518496273460347817462003, time:1750766890.7757876s req_ids:[8] -DEBUG 06-24 20:08:10 [manager.py:391] -ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:173.13742637634277ms total_cost_time:173.1586456298828ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:6606 prompt_cache_len:5151 prompt_cache_ratio:0.7797456857402362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 -DEBUG 06-24 20:08:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:10 [manager.py:224] router recive req id 8 cost time 0.10281610488891602 s -INFO 06-24 20:08:10 [manager.py:68] detokenization recv req id 8 cost time 0.10387015342712402 s -DEBUG 06-24 20:08:10 [manager.py:391] Prefill Batch: batch_id=21220642554357495732906291539070260379, time:1750766890.949562s req_ids:[8] -DEBUG 06-24 20:08:10 [manager.py:391] -ERROR 06-24 20:08:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:171.5095043182373ms total_cost_time:171.52953147888184ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6607 prompt_cache_len:5151 prompt_cache_ratio:0.7796276676252459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 -DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10353755950927734 s -INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10467290878295898 s -DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=36827867700163695493528678262358920762, time:1750766891.1236122s req_ids:[8] -DEBUG 06-24 20:08:11 [manager.py:391] -ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:10 lightllm_req_id:8 first_token_cost:173.53153228759766ms total_cost_time:173.5513210296631ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6608 prompt_cache_len:5151 prompt_cache_ratio:0.7795096852300242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 -DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10258340835571289 s -INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10365843772888184 s -DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=124606570890185929232611631494251867940, time:1750766891.298665s req_ids:[8] -DEBUG 06-24 20:08:11 [manager.py:391] -ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:171.70190811157227ms total_cost_time:171.7216968536377ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6609 prompt_cache_len:5151 prompt_cache_ratio:0.7793917385383567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 -DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10259151458740234 s -INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10367035865783691 s -DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=232945961897203821972240630802946431598, time:1750766891.4733114s req_ids:[8] -DEBUG 06-24 20:08:11 [manager.py:391] -ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:395.4133987426758ms total_cost_time:395.4334259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6610 prompt_cache_len:5151 prompt_cache_ratio:0.7792738275340393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 -DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:11 [manager.py:224] router recive req id 8 cost time 0.10354828834533691 s -INFO 06-24 20:08:11 [manager.py:68] detokenization recv req id 8 cost time 0.10467338562011719 s -DEBUG 06-24 20:08:11 [manager.py:391] Prefill Batch: batch_id=53101368892789926988651189164262826618, time:1750766891.8700006s req_ids:[8] -DEBUG 06-24 20:08:11 [manager.py:391] -DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:11 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:171.41366004943848ms total_cost_time:171.4344024658203ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6611 prompt_cache_len:5151 prompt_cache_ratio:0.7791559522008773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 -DEBUG 06-24 20:08:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10256123542785645 s -INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10346746444702148 s -DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=302174623266224902665345525997973858971, time:1750766892.0436416s req_ids:[8] -DEBUG 06-24 20:08:12 [manager.py:391] -ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:11 lightllm_req_id:8 first_token_cost:171.86212539672852ms total_cost_time:171.88453674316406ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6612 prompt_cache_len:5151 prompt_cache_ratio:0.7790381125226861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 -DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10287785530090332 s -INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10374259948730469 s -DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=3032045629400578820823193840322397544, time:1750766892.2170122s req_ids:[8] -DEBUG 06-24 20:08:12 [manager.py:391] -ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:170.67360877990723ms total_cost_time:170.69530487060547ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:6613 prompt_cache_len:5151 prompt_cache_ratio:0.7789203084832905 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 -DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.1036539077758789 s -INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10462379455566406 s -DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=279082285981614683865750171668569640259, time:1750766892.3901708s req_ids:[8] -DEBUG 06-24 20:08:12 [manager.py:391] -ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:170.93181610107422ms total_cost_time:170.9582805633545ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:6614 prompt_cache_len:5151 prompt_cache_ratio:0.7788025400665256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 -DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.1042630672454834 s -INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10511112213134766 s -DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=261742954679300659917031500397173604981, time:1750766892.5636303s req_ids:[8] -DEBUG 06-24 20:08:12 [manager.py:391] -ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:174.6382713317871ms total_cost_time:174.66282844543457ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6615 prompt_cache_len:5151 prompt_cache_ratio:0.7786848072562358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 -DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10440301895141602 s -INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.10520243644714355 s -DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=52609022134127389406442247711739875126, time:1750766892.7406607s req_ids:[8] -DEBUG 06-24 20:08:12 [manager.py:391] -ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:171.431303024292ms total_cost_time:171.45705223083496ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:6616 prompt_cache_len:5151 prompt_cache_ratio:0.7785671100362757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 -DEBUG 06-24 20:08:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:12 [manager.py:224] router recive req id 8 cost time 0.10311579704284668 s -INFO 06-24 20:08:12 [manager.py:68] detokenization recv req id 8 cost time 0.1040494441986084 s -DEBUG 06-24 20:08:12 [manager.py:391] Prefill Batch: batch_id=235041608358570639903522591436355314691, time:1750766892.9145389s req_ids:[8] -DEBUG 06-24 20:08:12 [manager.py:391] -ERROR 06-24 20:08:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:172.09434509277344ms total_cost_time:172.1212863922119ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:6617 prompt_cache_len:5151 prompt_cache_ratio:0.7784494483905093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 -DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10426878929138184 s -INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10519886016845703 s -DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=194773498160783962394009866550121854296, time:1750766893.0884979s req_ids:[8] -DEBUG 06-24 20:08:13 [manager.py:391] -ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:12 lightllm_req_id:8 first_token_cost:170.78471183776855ms total_cost_time:170.8052158355713ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6618 prompt_cache_len:5151 prompt_cache_ratio:0.7783318223028105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 -DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10252165794372559 s -INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10339879989624023 s -DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=68391446853682902637113617250917353863, time:1750766893.261682s req_ids:[8] -DEBUG 06-24 20:08:13 [manager.py:391] -ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:171.83279991149902ms total_cost_time:171.85688018798828ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:6619 prompt_cache_len:5151 prompt_cache_ratio:0.778214231757063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 -DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10317540168762207 s -INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10394597053527832 s -DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=232134761998851497740550972545339718604, time:1750766893.4364195s req_ids:[8] -DEBUG 06-24 20:08:13 [manager.py:391] -ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:174.53837394714355ms total_cost_time:174.5603084564209ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6620 prompt_cache_len:5151 prompt_cache_ratio:0.7780966767371601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 -DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.10300922393798828 s -INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.10384058952331543 s -DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=300258016195312242781607881392428318636, time:1750766893.612446s req_ids:[8] -DEBUG 06-24 20:08:13 [manager.py:391] -ERROR 06-24 20:08:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:171.44203186035156ms total_cost_time:171.4613437652588ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6621 prompt_cache_len:5151 prompt_cache_ratio:0.777979157227005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 -DEBUG 06-24 20:08:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:13 [manager.py:224] router recive req id 8 cost time 0.30655336380004883 s -INFO 06-24 20:08:13 [manager.py:68] detokenization recv req id 8 cost time 0.30755615234375 s -DEBUG 06-24 20:08:13 [manager.py:391] Prefill Batch: batch_id=76616850857632319776638515296303990014, time:1750766893.9724815s req_ids:[8] -DEBUG 06-24 20:08:13 [manager.py:391] -ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:13 lightllm_req_id:8 first_token_cost:374.4938373565674ms total_cost_time:374.5379447937012ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6622 prompt_cache_len:5151 prompt_cache_ratio:0.7778616732105105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 -DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10610008239746094 s -INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.10734772682189941 s -DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=108516167701310695909846229215343784401, time:1750766894.1566823s req_ids:[8] -DEBUG 06-24 20:08:14 [manager.py:391] -ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:197.31950759887695ms total_cost_time:197.36480712890625ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6623 prompt_cache_len:5151 prompt_cache_ratio:0.777744224671599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 -DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.1066889762878418 s -INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.1084146499633789 s -DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=99576434557634153304010861976032655950, time:1750766894.3741653s req_ids:[8] -DEBUG 06-24 20:08:14 [manager.py:391] -ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:213.9589786529541ms total_cost_time:214.00094032287598ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6624 prompt_cache_len:5151 prompt_cache_ratio:0.7776268115942029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 -DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10662484169006348 s -INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.10846686363220215 s -DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=81390603297347369586015502132699054087, time:1750766894.5825822s req_ids:[8] -DEBUG 06-24 20:08:14 [manager.py:391] -ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:206.7403793334961ms total_cost_time:206.78400993347168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6625 prompt_cache_len:5151 prompt_cache_ratio:0.7775094339622641 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 -DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:14 [batch.py:51] router release req id 8 -INFO 06-24 20:08:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10774016380310059 s -INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.1095573902130127 s -DEBUG 06-24 20:08:14 [manager.py:391] Prefill Batch: batch_id=238190361934916103154338218314280947890, time:1750766894.793305s req_ids:[8] -DEBUG 06-24 20:08:14 [manager.py:391] -ERROR 06-24 20:08:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:204.2081356048584ms total_cost_time:204.25057411193848ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6626 prompt_cache_len:5151 prompt_cache_ratio:0.7773920917597343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 -DEBUG 06-24 20:08:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:14 [manager.py:224] router recive req id 8 cost time 0.10751938819885254 s -INFO 06-24 20:08:14 [manager.py:68] detokenization recv req id 8 cost time 0.10940122604370117 s -DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=304291945790671430844889712059379354506, time:1750766895.002599s req_ids:[8] -DEBUG 06-24 20:08:15 [manager.py:391] -ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:14 lightllm_req_id:8 first_token_cost:217.2844409942627ms total_cost_time:217.33903884887695ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:6627 prompt_cache_len:5151 prompt_cache_ratio:0.7772747849705749 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 -DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10763382911682129 s -INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.10955333709716797 s -DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=179281977347027537178778990850163758956, time:1750766895.2221758s req_ids:[8] -DEBUG 06-24 20:08:15 [manager.py:391] -ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:207.3519229888916ms total_cost_time:207.40222930908203ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:6628 prompt_cache_len:5151 prompt_cache_ratio:0.7771575135787568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 -DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10854339599609375 s -INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.11043167114257812 s -DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=116141932841926246748307144103376131, time:1750766895.436326s req_ids:[8] -DEBUG 06-24 20:08:15 [manager.py:391] -ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:210.1418972015381ms total_cost_time:210.1907730102539ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:6629 prompt_cache_len:5151 prompt_cache_ratio:0.7770402775682607 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 -DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s -INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.11076021194458008 s -DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=313644453672583407323065939380400725469, time:1750766895.6425743s req_ids:[8] -DEBUG 06-24 20:08:15 [manager.py:391] -ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:202.17633247375488ms total_cost_time:202.22187042236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6630 prompt_cache_len:5151 prompt_cache_ratio:0.7769230769230769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 -DEBUG 06-24 20:08:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:15 [manager.py:224] router recive req id 8 cost time 0.10620403289794922 s -INFO 06-24 20:08:15 [manager.py:68] detokenization recv req id 8 cost time 0.10825181007385254 s -DEBUG 06-24 20:08:15 [manager.py:391] Prefill Batch: batch_id=178386253344971705894471970057605489820, time:1750766895.8554792s req_ids:[8] -DEBUG 06-24 20:08:15 [manager.py:391] -ERROR 06-24 20:08:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:222.16510772705078ms total_cost_time:222.1968173980713ms,out_token_counter:1 mean_per_token_cost_time: 0.03170967102050781ms prompt_token_num:6631 prompt_cache_len:5151 prompt_cache_ratio:0.7768059116272056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 -DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10491085052490234 s -INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10579061508178711 s -DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=100654872267871011793600096768215123932, time:1750766896.093473s req_ids:[8] -DEBUG 06-24 20:08:16 [manager.py:391] -ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:15 lightllm_req_id:8 first_token_cost:188.3232593536377ms total_cost_time:188.35163116455078ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:6632 prompt_cache_len:5151 prompt_cache_ratio:0.7766887816646562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 -DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10452127456665039 s -INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10539364814758301 s -DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=331272067694196189234109459581233641012, time:1750766896.2733557s req_ids:[8] -DEBUG 06-24 20:08:16 [manager.py:391] -DEBUG 06-24 20:08:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 32693.724 tokens/s -DEBUG 06-24 20:08:16 [stats.py:37] Avg prompt tokens throughput: 32683.833 tokens/s -DEBUG 06-24 20:08:16 [stats.py:37] Avg generate tokens throughput: 9.891 tokens/s -INFO 06-24 20:08:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:366.90473556518555ms total_cost_time:366.9276237487793ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:6633 prompt_cache_len:5151 prompt_cache_ratio:0.7765716870194482 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 -DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10358071327209473 s -INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10437870025634766 s -DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=314050034208254503942930170399987118245, time:1750766896.6405125s req_ids:[8] -DEBUG 06-24 20:08:16 [manager.py:391] -ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:173.50506782531738ms total_cost_time:173.53129386901855ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:6634 prompt_cache_len:5151 prompt_cache_ratio:0.7764546276756105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 -DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10352206230163574 s -INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.1042184829711914 s -DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=339445298279271882749399439891659184736, time:1750766896.814091s req_ids:[8] -DEBUG 06-24 20:08:16 [manager.py:391] -ERROR 06-24 20:08:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:170.58706283569336ms total_cost_time:170.6104278564453ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6635 prompt_cache_len:5151 prompt_cache_ratio:0.7763376036171816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 -DEBUG 06-24 20:08:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:16 [manager.py:224] router recive req id 8 cost time 0.10346174240112305 s -INFO 06-24 20:08:16 [manager.py:68] detokenization recv req id 8 cost time 0.10422515869140625 s -DEBUG 06-24 20:08:16 [manager.py:391] Prefill Batch: batch_id=121562355206119065467765885132694553295, time:1750766896.9900672s req_ids:[8] -DEBUG 06-24 20:08:16 [manager.py:391] -DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:16 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:16 lightllm_req_id:8 first_token_cost:175.92597007751465ms total_cost_time:175.94647407531738ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6636 prompt_cache_len:5151 prompt_cache_ratio:0.7762206148282098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 -DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10375618934631348 s -INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10448598861694336 s -DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=41966788393262626009712097227105373913, time:1750766897.166263s req_ids:[8] -DEBUG 06-24 20:08:17 [manager.py:391] -ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:172.20640182495117ms total_cost_time:172.23238945007324ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:6637 prompt_cache_len:5151 prompt_cache_ratio:0.7761036612927528 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 -DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10448217391967773 s -INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10520744323730469 s -DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=218651858331743370911797444053562495163, time:1750766897.3408914s req_ids:[8] -DEBUG 06-24 20:08:17 [manager.py:391] -ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:172.6679801940918ms total_cost_time:172.69325256347656ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:6638 prompt_cache_len:5151 prompt_cache_ratio:0.775986742994878 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 -DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10456490516662598 s -INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10544252395629883 s -DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=301851994734178787066283953352489382906, time:1750766897.5125735s req_ids:[8] -DEBUG 06-24 20:08:17 [manager.py:391] -ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:169.2962646484375ms total_cost_time:169.32082176208496ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:6639 prompt_cache_len:5151 prompt_cache_ratio:0.7758698599186624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 -DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10445165634155273 s -INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10521769523620605 s -DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=214612147480416088725447606320552949597, time:1750766897.6860292s req_ids:[8] -DEBUG 06-24 20:08:17 [manager.py:391] -ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:170.99809646606445ms total_cost_time:171.02456092834473ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:6640 prompt_cache_len:5151 prompt_cache_ratio:0.7757530120481928 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 -DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:17 [manager.py:224] router recive req id 8 cost time 0.10480189323425293 s -INFO 06-24 20:08:17 [manager.py:68] detokenization recv req id 8 cost time 0.10550880432128906 s -DEBUG 06-24 20:08:17 [manager.py:391] Prefill Batch: batch_id=288837377376845741450783638332174716442, time:1750766897.8611555s req_ids:[8] -DEBUG 06-24 20:08:17 [manager.py:391] -ERROR 06-24 20:08:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:173.22945594787598ms total_cost_time:173.2501983642578ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6641 prompt_cache_len:5151 prompt_cache_ratio:0.7756361993675651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 -DEBUG 06-24 20:08:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10285186767578125 s -INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10360407829284668 s -DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=328686461131071067099056829337821508011, time:1750766898.036024s req_ids:[8] -DEBUG 06-24 20:08:18 [manager.py:391] -ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:17 lightllm_req_id:8 first_token_cost:171.71931266784668ms total_cost_time:171.7393398284912ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6642 prompt_cache_len:5151 prompt_cache_ratio:0.7755194218608853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 -DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10268735885620117 s -INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10346508026123047 s -DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=81491088297838954735987998757817457308, time:1750766898.210867s req_ids:[8] -DEBUG 06-24 20:08:18 [manager.py:391] -ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:174.09157752990723ms total_cost_time:174.11160469055176ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6643 prompt_cache_len:5151 prompt_cache_ratio:0.7754026795122686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 -DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10339236259460449 s -INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10413932800292969 s -DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=17660781774325945835768061267815100187, time:1750766898.3865294s req_ids:[8] -DEBUG 06-24 20:08:18 [manager.py:391] -ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:171.9064712524414ms total_cost_time:171.92769050598145ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:6644 prompt_cache_len:5151 prompt_cache_ratio:0.7752859723058398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 -DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.30435895919799805 s -INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.3051755428314209 s -DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=210535194157748794671121346160281012819, time:1750766898.752301s req_ids:[8] -DEBUG 06-24 20:08:18 [manager.py:391] -ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:364.168643951416ms total_cost_time:364.18938636779785ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6645 prompt_cache_len:5151 prompt_cache_ratio:0.7751693002257336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 -DEBUG 06-24 20:08:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:18 [manager.py:224] router recive req id 8 cost time 0.10298967361450195 s -INFO 06-24 20:08:18 [manager.py:68] detokenization recv req id 8 cost time 0.10371994972229004 s -DEBUG 06-24 20:08:18 [manager.py:391] Prefill Batch: batch_id=20501690227470305272477878612189158988, time:1750766898.9278543s req_ids:[8] -DEBUG 06-24 20:08:18 [manager.py:391] -ERROR 06-24 20:08:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:173.384428024292ms total_cost_time:173.40683937072754ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6646 prompt_cache_len:5151 prompt_cache_ratio:0.7750526632560939 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 -DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10364818572998047 s -INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10438823699951172 s -DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=328369143440921323501441110847433197116, time:1750766899.1038973s req_ids:[8] -DEBUG 06-24 20:08:19 [manager.py:391] -ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:18 lightllm_req_id:8 first_token_cost:173.97117614746094ms total_cost_time:173.99215698242188ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6647 prompt_cache_len:5151 prompt_cache_ratio:0.7749360613810742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 -DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10349678993225098 s -INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.1042640209197998 s -DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=292253474812179226473531301739829822962, time:1750766899.2789533s req_ids:[8] -DEBUG 06-24 20:08:19 [manager.py:391] -ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:176.0251522064209ms total_cost_time:176.04565620422363ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6648 prompt_cache_len:5151 prompt_cache_ratio:0.7748194945848376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 -DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10339236259460449 s -INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10417342185974121 s -DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=270958768949171847383913317475610678651, time:1750766899.4568348s req_ids:[8] -DEBUG 06-24 20:08:19 [manager.py:391] -ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:183.93754959106445ms total_cost_time:183.9590072631836ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6649 prompt_cache_len:5151 prompt_cache_ratio:0.7747029628515566 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 -DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10332274436950684 s -INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10415863990783691 s -DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=177971793414114685818885274700060432217, time:1750766899.654955s req_ids:[8] -DEBUG 06-24 20:08:19 [manager.py:391] -ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:196.45428657531738ms total_cost_time:196.47765159606934ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:6650 prompt_cache_len:5151 prompt_cache_ratio:0.7745864661654135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 -DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:19 [manager.py:224] router recive req id 8 cost time 0.10332489013671875 s -INFO 06-24 20:08:19 [manager.py:68] detokenization recv req id 8 cost time 0.10413789749145508 s -DEBUG 06-24 20:08:19 [manager.py:391] Prefill Batch: batch_id=11842557855751636329965958002161259417, time:1750766899.8524349s req_ids:[8] -DEBUG 06-24 20:08:19 [manager.py:391] -ERROR 06-24 20:08:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:194.01240348815918ms total_cost_time:194.03553009033203ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6651 prompt_cache_len:5151 prompt_cache_ratio:0.7744700045105999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 -DEBUG 06-24 20:08:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.10362505912780762 s -INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10444235801696777 s -DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=235311089144352083349883720089446389135, time:1750766900.0458918s req_ids:[8] -DEBUG 06-24 20:08:20 [manager.py:391] -ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:19 lightllm_req_id:8 first_token_cost:192.31939315795898ms total_cost_time:192.34371185302734ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:6652 prompt_cache_len:5151 prompt_cache_ratio:0.7743535778713169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 -DEBUG 06-24 20:08:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.10342693328857422 s -INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10423541069030762 s -DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=291984512690000998264627330919083872621, time:1750766900.2438397s req_ids:[8] -DEBUG 06-24 20:08:20 [manager.py:391] -ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:194.66876983642578ms total_cost_time:194.69189643859863ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:6653 prompt_cache_len:5151 prompt_cache_ratio:0.7742371862317752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 -DEBUG 06-24 20:08:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.1035163402557373 s -INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10433268547058105 s -DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=28245207229027621910766393667237963024, time:1750766900.4409645s req_ids:[8] -DEBUG 06-24 20:08:20 [manager.py:391] -ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:193.53079795837402ms total_cost_time:193.55463981628418ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6654 prompt_cache_len:5151 prompt_cache_ratio:0.7741208295761948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 -DEBUG 06-24 20:08:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:20 [manager.py:224] router recive req id 8 cost time 0.10385274887084961 s -INFO 06-24 20:08:20 [manager.py:68] detokenization recv req id 8 cost time 0.10463690757751465 s -DEBUG 06-24 20:08:20 [manager.py:391] Prefill Batch: batch_id=317514030739637553419146091810570466023, time:1750766900.638091s req_ids:[8] -DEBUG 06-24 20:08:20 [manager.py:391] -ERROR 06-24 20:08:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:471.22955322265625ms total_cost_time:471.2533950805664ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:6655 prompt_cache_len:5151 prompt_cache_ratio:0.7740045078888054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 -DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10342526435852051 s -INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.10432195663452148 s -DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=30980645598591650455106540333409595111, time:1750766901.1110702s req_ids:[8] -DEBUG 06-24 20:08:21 [manager.py:391] -ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:20 lightllm_req_id:8 first_token_cost:194.4868564605713ms total_cost_time:194.50926780700684ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6656 prompt_cache_len:5151 prompt_cache_ratio:0.7738882211538461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 -DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10336017608642578 s -INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.1042332649230957 s -DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=34121926877449444250388161550306996865, time:1750766901.3078477s req_ids:[8] -DEBUG 06-24 20:08:21 [manager.py:391] -ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:195.55211067199707ms total_cost_time:195.5733299255371ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:6657 prompt_cache_len:5151 prompt_cache_ratio:0.7737719693555656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 -DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10371208190917969 s -INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.10457515716552734 s -DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=287432616830226294511326950660990517217, time:1750766901.5050535s req_ids:[8] -DEBUG 06-24 20:08:21 [manager.py:391] -ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:193.43280792236328ms total_cost_time:193.45808029174805ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:6658 prompt_cache_len:5151 prompt_cache_ratio:0.7736557524782217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 -DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.1029806137084961 s -INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.10371804237365723 s -DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=175664264706789410897304020809037391109, time:1750766901.701097s req_ids:[8] -DEBUG 06-24 20:08:21 [manager.py:391] -ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:194.43726539611816ms total_cost_time:194.4587230682373ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6659 prompt_cache_len:5151 prompt_cache_ratio:0.773539570506082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 -DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:21 [manager.py:224] router recive req id 8 cost time 0.10389161109924316 s -INFO 06-24 20:08:21 [manager.py:68] detokenization recv req id 8 cost time 0.104644775390625 s -DEBUG 06-24 20:08:21 [manager.py:391] Prefill Batch: batch_id=10630165977194723510697770059608746840, time:1750766901.8937702s req_ids:[8] -DEBUG 06-24 20:08:21 [manager.py:391] -ERROR 06-24 20:08:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:180.11474609375ms total_cost_time:180.13548851013184ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6660 prompt_cache_len:5151 prompt_cache_ratio:0.7734234234234234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 -DEBUG 06-24 20:08:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10329294204711914 s -INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10410022735595703 s -DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=128063307483509719508947582742323474444, time:1750766902.069227s req_ids:[8] -DEBUG 06-24 20:08:22 [manager.py:391] -DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:22 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:21 lightllm_req_id:8 first_token_cost:171.63372039794922ms total_cost_time:171.6594696044922ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:6661 prompt_cache_len:5151 prompt_cache_ratio:0.7733073112145323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 -DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10325241088867188 s -INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10403227806091309 s -DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=46927959820916164977552300981163483105, time:1750766902.2432866s req_ids:[8] -DEBUG 06-24 20:08:22 [manager.py:391] -ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:173.1710433959961ms total_cost_time:173.19369316101074ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:6662 prompt_cache_len:5151 prompt_cache_ratio:0.7731912338637046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 -DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10293006896972656 s -INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10372734069824219 s -DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=62987125412093965661292374868193298594, time:1750766902.4182777s req_ids:[8] -DEBUG 06-24 20:08:22 [manager.py:391] -ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:171.2493896484375ms total_cost_time:171.26846313476562ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6663 prompt_cache_len:5151 prompt_cache_ratio:0.7730751913552454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 -DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10373187065124512 s -INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10460710525512695 s -DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=302858764404270177606600070937502381217, time:1750766902.5922801s req_ids:[8] -DEBUG 06-24 20:08:22 [manager.py:391] -ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:171.84019088745117ms total_cost_time:171.8597412109375ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6664 prompt_cache_len:5151 prompt_cache_ratio:0.7729591836734694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 -DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10385394096374512 s -INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10466408729553223 s -DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=259728389754750789146250968403973284523, time:1750766902.766782s req_ids:[8] -DEBUG 06-24 20:08:22 [manager.py:391] -ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:172.30916023254395ms total_cost_time:172.32966423034668ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6665 prompt_cache_len:5151 prompt_cache_ratio:0.7728432108027007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 -DEBUG 06-24 20:08:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:22 [manager.py:224] router recive req id 8 cost time 0.10370969772338867 s -INFO 06-24 20:08:22 [manager.py:68] detokenization recv req id 8 cost time 0.10449647903442383 s -DEBUG 06-24 20:08:22 [manager.py:391] Prefill Batch: batch_id=2136601516786641120303337454849602120, time:1750766902.9407043s req_ids:[8] -DEBUG 06-24 20:08:22 [manager.py:391] -ERROR 06-24 20:08:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:171.33569717407227ms total_cost_time:171.3576316833496ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6666 prompt_cache_len:5151 prompt_cache_ratio:0.7727272727272727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 -DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.1028757095336914 s -INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10366630554199219 s -DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=122267241236350591908030647070893183518, time:1750766903.115273s req_ids:[8] -DEBUG 06-24 20:08:23 [manager.py:391] -INFO 06-24 20:08:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:22 lightllm_req_id:8 first_token_cost:364.73870277404785ms total_cost_time:364.78447914123535ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6667 prompt_cache_len:5151 prompt_cache_ratio:0.7726113694315284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 -DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10740351676940918 s -INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10833358764648438 s -DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=164102660930050860680511214386531734555, time:1750766903.4801629s req_ids:[8] -DEBUG 06-24 20:08:23 [manager.py:391] -ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:169.39020156860352ms total_cost_time:169.40927505493164ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6668 prompt_cache_len:5151 prompt_cache_ratio:0.77249550089982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 -DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10348629951477051 s -INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10426950454711914 s -DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=17869486175194536211254003213995010229, time:1750766903.6546123s req_ids:[8] -DEBUG 06-24 20:08:23 [manager.py:391] -ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:171.9977855682373ms total_cost_time:172.01972007751465ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:6669 prompt_cache_len:5151 prompt_cache_ratio:0.7723796671165092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 -DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10376715660095215 s -INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10466957092285156 s -DEBUG 06-24 20:08:23 [manager.py:391] Prefill Batch: batch_id=10259493781062500999804112004293445854, time:1750766903.8270166s req_ids:[8] -DEBUG 06-24 20:08:23 [manager.py:391] -ERROR 06-24 20:08:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:169.92759704589844ms total_cost_time:169.94810104370117ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6670 prompt_cache_len:5151 prompt_cache_ratio:0.772263868065967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 -DEBUG 06-24 20:08:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:23 [manager.py:224] router recive req id 8 cost time 0.10370993614196777 s -INFO 06-24 20:08:23 [manager.py:68] detokenization recv req id 8 cost time 0.10449790954589844 s -DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=220635371705624340100337866565929489887, time:1750766904.0005183s req_ids:[8] -DEBUG 06-24 20:08:24 [manager.py:391] -ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:23 lightllm_req_id:8 first_token_cost:172.16253280639648ms total_cost_time:172.18351364135742ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:6671 prompt_cache_len:5151 prompt_cache_ratio:0.7721481037325738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 -DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10272789001464844 s -INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10355091094970703 s -DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=246183150744101542631592476689679010154, time:1750766904.1753678s req_ids:[8] -DEBUG 06-24 20:08:24 [manager.py:391] -ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:171.90814018249512ms total_cost_time:171.92888259887695ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6672 prompt_cache_len:5151 prompt_cache_ratio:0.7720323741007195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 -DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10359883308410645 s -INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10427284240722656 s -DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=323063720290417171130286498329359322088, time:1750766904.3478436s req_ids:[8] -DEBUG 06-24 20:08:24 [manager.py:391] -ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:171.66972160339355ms total_cost_time:171.6899871826172ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:6673 prompt_cache_len:5151 prompt_cache_ratio:0.7719166791548029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 -DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10263442993164062 s -INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10336804389953613 s -DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=234848190280298446323037109848195276285, time:1750766904.5229247s req_ids:[8] -DEBUG 06-24 20:08:24 [manager.py:391] -ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:173.44999313354492ms total_cost_time:173.47049713134766ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6674 prompt_cache_len:5151 prompt_cache_ratio:0.7718010188792328 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 -DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.10278844833374023 s -INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.10344481468200684 s -DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=21845366977061307162928420200092222127, time:1750766904.6973526s req_ids:[8] -DEBUG 06-24 20:08:24 [manager.py:391] -ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:174.4372844696045ms total_cost_time:174.45778846740723ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6675 prompt_cache_len:5151 prompt_cache_ratio:0.771685393258427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 -DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:24 [manager.py:224] router recive req id 8 cost time 0.1026163101196289 s -INFO 06-24 20:08:24 [manager.py:68] detokenization recv req id 8 cost time 0.1034235954284668 s -DEBUG 06-24 20:08:24 [manager.py:391] Prefill Batch: batch_id=222536560041210690881446825363828074025, time:1750766904.8749282s req_ids:[8] -DEBUG 06-24 20:08:24 [manager.py:391] -ERROR 06-24 20:08:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:173.46549034118652ms total_cost_time:173.48551750183105ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6676 prompt_cache_len:5151 prompt_cache_ratio:0.7715698022768125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 -DEBUG 06-24 20:08:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10376620292663574 s -INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10460615158081055 s -DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=76850591441180677852678766913997708516, time:1750766905.0495467s req_ids:[8] -DEBUG 06-24 20:08:25 [manager.py:391] -ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:24 lightllm_req_id:8 first_token_cost:171.45204544067383ms total_cost_time:171.47278785705566ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6677 prompt_cache_len:5151 prompt_cache_ratio:0.7714542459188258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 -DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10357403755187988 s -INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10443258285522461 s -DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=158138099099929713803848566779531729145, time:1750766905.222758s req_ids:[8] -DEBUG 06-24 20:08:25 [manager.py:391] -ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:171.52810096740723ms total_cost_time:171.54812812805176ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:6678 prompt_cache_len:5151 prompt_cache_ratio:0.7713387241689128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 -DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.3035445213317871 s -INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.3043382167816162 s -DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=213742922549194738191601459810872188563, time:1750766905.589661s req_ids:[8] -DEBUG 06-24 20:08:25 [manager.py:391] -ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:364.54296112060547ms total_cost_time:364.5627498626709ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:6679 prompt_cache_len:5151 prompt_cache_ratio:0.7712232370115286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 -DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10274505615234375 s -INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10355663299560547 s -DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=268013514714247676222980776834001766051, time:1750766905.7636595s req_ids:[8] -DEBUG 06-24 20:08:25 [manager.py:391] -ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:171.14734649658203ms total_cost_time:171.16880416870117ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6680 prompt_cache_len:5151 prompt_cache_ratio:0.7711077844311377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 -DEBUG 06-24 20:08:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:25 [manager.py:224] router recive req id 8 cost time 0.10281825065612793 s -INFO 06-24 20:08:25 [manager.py:68] detokenization recv req id 8 cost time 0.10369467735290527 s -DEBUG 06-24 20:08:25 [manager.py:391] Prefill Batch: batch_id=4544020287834425213191641493530896407, time:1750766905.9374914s req_ids:[8] -DEBUG 06-24 20:08:25 [manager.py:391] -ERROR 06-24 20:08:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:172.18422889709473ms total_cost_time:172.20377922058105ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:6681 prompt_cache_len:5151 prompt_cache_ratio:0.7709923664122137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 -DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10272216796875 s -INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10355377197265625 s -DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=302792190368058105885812858556684838605, time:1750766906.1119337s req_ids:[8] -DEBUG 06-24 20:08:26 [manager.py:391] -ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:25 lightllm_req_id:8 first_token_cost:171.39577865600586ms total_cost_time:171.4150905609131ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6682 prompt_cache_len:5151 prompt_cache_ratio:0.7708769829392398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 -DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.1025991439819336 s -INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10343623161315918 s -DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=99641745374432970864789277615389258348, time:1750766906.2853358s req_ids:[8] -DEBUG 06-24 20:08:26 [manager.py:391] -DEBUG 06-24 20:08:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 33262.512 tokens/s -DEBUG 06-24 20:08:26 [stats.py:37] Avg prompt tokens throughput: 33252.524 tokens/s -DEBUG 06-24 20:08:26 [stats.py:37] Avg generate tokens throughput: 9.988 tokens/s -ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:172.50490188598633ms total_cost_time:172.52421379089355ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:6683 prompt_cache_len:5151 prompt_cache_ratio:0.770761633996708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 -DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10355591773986816 s -INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.1043710708618164 s -DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=255537719101623284236138329010103564888, time:1750766906.4597776s req_ids:[8] -DEBUG 06-24 20:08:26 [manager.py:391] -ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:170.81952095031738ms total_cost_time:170.8385944366455ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:6684 prompt_cache_len:5151 prompt_cache_ratio:0.7706463195691203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 -DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10346436500549316 s -INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10428833961486816 s -DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=255963418735040898194772303425898154574, time:1750766906.6313071s req_ids:[8] -DEBUG 06-24 20:08:26 [manager.py:391] -ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:170.0420379638672ms total_cost_time:170.08519172668457ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6685 prompt_cache_len:5151 prompt_cache_ratio:0.7705310396409872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 -DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10663580894470215 s -INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.10767817497253418 s -DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=67055651969537020383802311921946454342, time:1750766906.800675s req_ids:[8] -DEBUG 06-24 20:08:26 [manager.py:391] -DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:166.78261756896973ms total_cost_time:166.8260097503662ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6686 prompt_cache_len:5151 prompt_cache_ratio:0.7704157941968292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 -DEBUG 06-24 20:08:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:26 [manager.py:224] router recive req id 8 cost time 0.10634708404541016 s -INFO 06-24 20:08:26 [manager.py:68] detokenization recv req id 8 cost time 0.1082160472869873 s -DEBUG 06-24 20:08:26 [manager.py:391] Prefill Batch: batch_id=21935383184900354097340083238716987528, time:1750766906.9691164s req_ids:[8] -DEBUG 06-24 20:08:26 [manager.py:391] -ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:26 lightllm_req_id:8 first_token_cost:181.05196952819824ms total_cost_time:181.09560012817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6687 prompt_cache_len:5151 prompt_cache_ratio:0.7703005832211754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 -DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.10605335235595703 s -INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.10798907279968262 s -DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=213359968021568292966472708527835167370, time:1750766907.1608782s req_ids:[8] -DEBUG 06-24 20:08:27 [manager.py:391] -ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:198.18115234375ms total_cost_time:198.2254981994629ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6688 prompt_cache_len:5151 prompt_cache_ratio:0.7701854066985646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 -DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.10673999786376953 s -INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.1088719367980957 s -DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=281303154695637182775781952614451809064, time:1750766907.37003s req_ids:[8] -DEBUG 06-24 20:08:27 [manager.py:391] -ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:211.62176132202148ms total_cost_time:211.66658401489258ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6689 prompt_cache_len:5151 prompt_cache_ratio:0.7700702646135447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 -DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.11572265625 s -INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.1182248592376709 s -DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=239751704778660776030788681335740157755, time:1750766907.5761216s req_ids:[8] -DEBUG 06-24 20:08:27 [manager.py:391] -ERROR 06-24 20:08:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:203.93681526184082ms total_cost_time:203.9813995361328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6690 prompt_cache_len:5151 prompt_cache_ratio:0.7699551569506726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 -DEBUG 06-24 20:08:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:27 [manager.py:224] router recive req id 8 cost time 0.3085286617279053 s -INFO 06-24 20:08:27 [manager.py:68] detokenization recv req id 8 cost time 0.31034135818481445 s -DEBUG 06-24 20:08:27 [manager.py:391] Prefill Batch: batch_id=246395412899516577919414705014750691435, time:1750766907.9752414s req_ids:[8] -DEBUG 06-24 20:08:27 [manager.py:391] -ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:27 lightllm_req_id:8 first_token_cost:370.9242343902588ms total_cost_time:370.9678649902344ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6691 prompt_cache_len:5151 prompt_cache_ratio:0.7698400836945151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 -DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10677194595336914 s -INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.10872364044189453 s -DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=282620410023735375457222546593774511403, time:1750766908.1566496s req_ids:[8] -DEBUG 06-24 20:08:28 [manager.py:391] -ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:196.72751426696777ms total_cost_time:196.77209854125977ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6692 prompt_cache_len:5151 prompt_cache_ratio:0.7697250448296473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 -DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10802602767944336 s -INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.11000442504882812 s -DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=252990733704484395681354265889919238969, time:1750766908.3592584s req_ids:[8] -DEBUG 06-24 20:08:28 [manager.py:391] -ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:210.81829071044922ms total_cost_time:210.86812019348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:6693 prompt_cache_len:5151 prompt_cache_ratio:0.7696100403406544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 -DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10723209381103516 s -INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.1092526912689209 s -DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=42176902470794745955215985218822114854, time:1750766908.5746381s req_ids:[8] -DEBUG 06-24 20:08:28 [manager.py:391] -ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:205.11317253112793ms total_cost_time:205.15727996826172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6694 prompt_cache_len:5151 prompt_cache_ratio:0.7694950702121303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 -DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10800719261169434 s -INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.11029195785522461 s -DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=24783726129431110386393331515845013217, time:1750766908.786741s req_ids:[8] -DEBUG 06-24 20:08:28 [manager.py:391] -ERROR 06-24 20:08:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:199.57685470581055ms total_cost_time:199.61881637573242ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6695 prompt_cache_len:5151 prompt_cache_ratio:0.7693801344286781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 -DEBUG 06-24 20:08:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:28 [manager.py:224] router recive req id 8 cost time 0.10670304298400879 s -INFO 06-24 20:08:28 [manager.py:68] detokenization recv req id 8 cost time 0.10862040519714355 s -DEBUG 06-24 20:08:28 [manager.py:391] Prefill Batch: batch_id=59928214765592580147659844039457142889, time:1750766908.9908593s req_ids:[8] -DEBUG 06-24 20:08:28 [manager.py:391] -ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:28 lightllm_req_id:8 first_token_cost:201.70235633850098ms total_cost_time:201.7230987548828ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:6696 prompt_cache_len:5151 prompt_cache_ratio:0.7692652329749103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 -DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10657286643981934 s -INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.10802721977233887 s -DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=49264600708782802041134166380472424368, time:1750766909.1990623s req_ids:[8] -DEBUG 06-24 20:08:29 [manager.py:391] -ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:204.6678066253662ms total_cost_time:204.73599433898926ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:6697 prompt_cache_len:5151 prompt_cache_ratio:0.7691503658354487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 -DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10910367965698242 s -INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.11104536056518555 s -DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=292686549458837563439949141375681683571, time:1750766909.4069147s req_ids:[8] -DEBUG 06-24 20:08:29 [manager.py:391] -ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:204.19001579284668ms total_cost_time:204.23388481140137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6698 prompt_cache_len:5151 prompt_cache_ratio:0.7690355329949239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 -DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10703229904174805 s -INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.10890078544616699 s -DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=196819749059306390656920415950124368006, time:1750766909.6300597s req_ids:[8] -DEBUG 06-24 20:08:29 [manager.py:391] -ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:224.19118881225586ms total_cost_time:224.23672676086426ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6699 prompt_cache_len:5151 prompt_cache_ratio:0.7689207344379758 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 -DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:29 [manager.py:224] router recive req id 8 cost time 0.10824179649353027 s -INFO 06-24 20:08:29 [manager.py:68] detokenization recv req id 8 cost time 0.11065244674682617 s -DEBUG 06-24 20:08:29 [manager.py:391] Prefill Batch: batch_id=168406442583207974979581954703224952412, time:1750766909.8480458s req_ids:[8] -DEBUG 06-24 20:08:29 [manager.py:391] -ERROR 06-24 20:08:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:211.39764785766602ms total_cost_time:211.44366264343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6700 prompt_cache_len:5151 prompt_cache_ratio:0.7688059701492538 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 -DEBUG 06-24 20:08:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10813093185424805 s -INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11003565788269043 s -DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=147332535392471569285841464770054614332, time:1750766910.0680475s req_ids:[8] -DEBUG 06-24 20:08:30 [manager.py:391] -ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:29 lightllm_req_id:8 first_token_cost:213.8960361480713ms total_cost_time:213.94085884094238ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6701 prompt_cache_len:5151 prompt_cache_ratio:0.7686912401134159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 -DEBUG 06-24 20:08:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10788178443908691 s -INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11001801490783691 s -DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=191959397599350931026694696406300295960, time:1750766910.2828557s req_ids:[8] -DEBUG 06-24 20:08:30 [manager.py:391] -ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:391.9696807861328ms total_cost_time:392.0145034790039ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6702 prompt_cache_len:5151 prompt_cache_ratio:0.7685765443151298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 -DEBUG 06-24 20:08:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10800313949584961 s -INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11012554168701172 s -DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=318456931529128675692668461057963277990, time:1750766910.6727326s req_ids:[8] -DEBUG 06-24 20:08:30 [manager.py:391] -ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:197.2205638885498ms total_cost_time:197.2658634185791ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6703 prompt_cache_len:5151 prompt_cache_ratio:0.768461882739072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 -DEBUG 06-24 20:08:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:30 [manager.py:224] router recive req id 8 cost time 0.10857272148132324 s -INFO 06-24 20:08:30 [manager.py:68] detokenization recv req id 8 cost time 0.11085963249206543 s -DEBUG 06-24 20:08:30 [manager.py:391] Prefill Batch: batch_id=12813427073584617697323634515723354772, time:1750766910.8819442s req_ids:[8] -DEBUG 06-24 20:08:30 [manager.py:391] -ERROR 06-24 20:08:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:208.4197998046875ms total_cost_time:208.4660530090332ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6704 prompt_cache_len:5151 prompt_cache_ratio:0.7683472553699284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 -DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10700583457946777 s -INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.10899066925048828 s -DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=330135876875456796282989728479313720206, time:1750766911.0954363s req_ids:[8] -DEBUG 06-24 20:08:31 [manager.py:391] -ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:30 lightllm_req_id:8 first_token_cost:207.30304718017578ms total_cost_time:207.34691619873047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6705 prompt_cache_len:5151 prompt_cache_ratio:0.7682326621923937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 -DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10723423957824707 s -INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.10912322998046875 s -DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=105608192307819772141236887839164647781, time:1750766911.317005s req_ids:[8] -DEBUG 06-24 20:08:31 [manager.py:391] -ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:214.94388580322266ms total_cost_time:214.98918533325195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6706 prompt_cache_len:5151 prompt_cache_ratio:0.7681181031911721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 -DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10789966583251953 s -INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s -DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=295008873756857632056297240885271838370, time:1750766911.5304515s req_ids:[8] -DEBUG 06-24 20:08:31 [manager.py:391] -ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:206.12430572509766ms total_cost_time:206.16650581359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6707 prompt_cache_len:5151 prompt_cache_ratio:0.7680035783509765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 -DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10923147201538086 s -INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.11121416091918945 s -DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=170280736880172826854863411924707197143, time:1750766911.7496917s req_ids:[8] -DEBUG 06-24 20:08:31 [manager.py:391] -ERROR 06-24 20:08:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:216.45402908325195ms total_cost_time:216.49765968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6708 prompt_cache_len:5151 prompt_cache_ratio:0.7678890876565295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 -DEBUG 06-24 20:08:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:31 [manager.py:224] router recive req id 8 cost time 0.10793232917785645 s -INFO 06-24 20:08:31 [manager.py:68] detokenization recv req id 8 cost time 0.1100013256072998 s -DEBUG 06-24 20:08:31 [manager.py:391] Prefill Batch: batch_id=284962741641141091681157017492724357699, time:1750766911.9628575s req_ids:[8] -DEBUG 06-24 20:08:31 [manager.py:391] -ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:31 lightllm_req_id:8 first_token_cost:205.674409866333ms total_cost_time:205.7197093963623ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6709 prompt_cache_len:5151 prompt_cache_ratio:0.7677746310925623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 -DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10763907432556152 s -INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s -DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=197591840544237168808461760452749615432, time:1750766912.18445s req_ids:[8] -DEBUG 06-24 20:08:32 [manager.py:391] -ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:217.30542182922363ms total_cost_time:217.3483371734619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6710 prompt_cache_len:5151 prompt_cache_ratio:0.7676602086438152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 -DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s -INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.10982060432434082 s -DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=294090999140326597692134080247387816444, time:1750766912.3987265s req_ids:[8] -DEBUG 06-24 20:08:32 [manager.py:391] -DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:207.57770538330078ms total_cost_time:207.62062072753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6711 prompt_cache_len:5151 prompt_cache_ratio:0.767545820295038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 -DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:32 [batch.py:51] router release req id 8 -INFO 06-24 20:08:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10788297653198242 s -INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.1097724437713623 s -DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=154088919688947973616282415618627949520, time:1750766912.6118033s req_ids:[8] -DEBUG 06-24 20:08:32 [manager.py:391] -ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:208.5883617401123ms total_cost_time:208.6503505706787ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:6712 prompt_cache_len:5151 prompt_cache_ratio:0.7674314660309892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 -DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:32 [manager.py:224] router recive req id 8 cost time 0.10695457458496094 s -INFO 06-24 20:08:32 [manager.py:68] detokenization recv req id 8 cost time 0.10904908180236816 s -DEBUG 06-24 20:08:32 [manager.py:391] Prefill Batch: batch_id=106320657917346147897819065032855247952, time:1750766912.8234613s req_ids:[8] -DEBUG 06-24 20:08:32 [manager.py:391] -ERROR 06-24 20:08:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:205.4438591003418ms total_cost_time:205.48772811889648ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6713 prompt_cache_len:5151 prompt_cache_ratio:0.7673171458364367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 -DEBUG 06-24 20:08:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10835957527160645 s -INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.11027336120605469 s -DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=161230471123807431630621270657695533810, time:1750766913.03567s req_ids:[8] -DEBUG 06-24 20:08:33 [manager.py:391] -ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:32 lightllm_req_id:8 first_token_cost:378.5703182220459ms total_cost_time:378.6156177520752ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6714 prompt_cache_len:5151 prompt_cache_ratio:0.7672028596961573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 -DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10730147361755371 s -INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.10979223251342773 s -DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=241516068784982826095322496324239754400, time:1750766913.4131095s req_ids:[8] -DEBUG 06-24 20:08:33 [manager.py:391] -ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:200.15525817871094ms total_cost_time:200.19841194152832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6715 prompt_cache_len:5151 prompt_cache_ratio:0.7670886075949367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 -DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10809659957885742 s -INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.11013984680175781 s -DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=19411609795473690828923505217126781678, time:1750766913.6256084s req_ids:[8] -DEBUG 06-24 20:08:33 [manager.py:391] -ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:199.5542049407959ms total_cost_time:199.5992660522461ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6716 prompt_cache_len:5151 prompt_cache_ratio:0.76697438951757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 -DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:33 [manager.py:224] router recive req id 8 cost time 0.10680651664733887 s -INFO 06-24 20:08:33 [manager.py:68] detokenization recv req id 8 cost time 0.10880661010742188 s -DEBUG 06-24 20:08:33 [manager.py:391] Prefill Batch: batch_id=51453387134912828855516948669800398278, time:1750766913.8259282s req_ids:[8] -DEBUG 06-24 20:08:33 [manager.py:391] -ERROR 06-24 20:08:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:202.8799057006836ms total_cost_time:202.92282104492188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6717 prompt_cache_len:5151 prompt_cache_ratio:0.7668602054488611 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 -DEBUG 06-24 20:08:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10848355293273926 s -INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.11042618751525879 s -DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=148889840732107309444267874568511472426, time:1750766914.0353277s req_ids:[8] -DEBUG 06-24 20:08:34 [manager.py:391] -ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:33 lightllm_req_id:8 first_token_cost:205.82103729248047ms total_cost_time:205.86681365966797ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6718 prompt_cache_len:5151 prompt_cache_ratio:0.7667460553736231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 -DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10651183128356934 s -INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.1084744930267334 s -DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=334973309206528870297413195019794328832, time:1750766914.2570422s req_ids:[8] -DEBUG 06-24 20:08:34 [manager.py:391] -ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:220.3505039215088ms total_cost_time:220.3960418701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6719 prompt_cache_len:5151 prompt_cache_ratio:0.766631939276678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 -DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10807442665100098 s -INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.10994410514831543 s -DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=29452326635476435385003753671757923992, time:1750766914.4723237s req_ids:[8] -DEBUG 06-24 20:08:34 [manager.py:391] -ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:205.72686195373535ms total_cost_time:205.77001571655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6720 prompt_cache_len:5151 prompt_cache_ratio:0.7665178571428571 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 -DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10782670974731445 s -INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.10998344421386719 s -DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=294678871133913544834755032474214803707, time:1750766914.6806602s req_ids:[8] -DEBUG 06-24 20:08:34 [manager.py:391] -ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:208.30631256103516ms total_cost_time:208.35232734680176ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6721 prompt_cache_len:5151 prompt_cache_ratio:0.7664038089570004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 -DEBUG 06-24 20:08:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:34 [manager.py:224] router recive req id 8 cost time 0.10719728469848633 s -INFO 06-24 20:08:34 [manager.py:68] detokenization recv req id 8 cost time 0.10911440849304199 s -DEBUG 06-24 20:08:34 [manager.py:391] Prefill Batch: batch_id=201595841732882854870594208350390536789, time:1750766914.8939748s req_ids:[8] -DEBUG 06-24 20:08:34 [manager.py:391] -ERROR 06-24 20:08:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:207.25607872009277ms total_cost_time:207.29994773864746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6722 prompt_cache_len:5151 prompt_cache_ratio:0.7662897947039572 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 -DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.10716581344604492 s -INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.10923075675964355 s -DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=8416288677439228005391345051958034976, time:1750766915.1061032s req_ids:[8] -DEBUG 06-24 20:08:35 [manager.py:391] -ERROR 06-24 20:08:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:34 lightllm_req_id:8 first_token_cost:205.89399337768555ms total_cost_time:205.93905448913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6723 prompt_cache_len:5151 prompt_cache_ratio:0.7661758143685855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 -DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:35 [batch.py:51] router release req id 8 -DEBUG 06-24 20:08:35 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:35 [manager.py:283] -DEBUG 06-24 20:08:35 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:35 [manager.py:284] -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.10804224014282227 s -INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.11012840270996094 s -DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=88507336782021023440795641149498961248, time:1750766915.3204677s req_ids:[8] -DEBUG 06-24 20:08:35 [manager.py:391] -ERROR 06-24 20:08:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 first_token_cost:209.8546028137207ms total_cost_time:209.89990234375ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6724 prompt_cache_len:5151 prompt_cache_ratio:0.7660618679357525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 -DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.10892963409423828 s -INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.11083745956420898 s -DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=296357628105699983245747034866890123566, time:1750766915.533806s req_ids:[8] -DEBUG 06-24 20:08:35 [manager.py:391] -ERROR 06-24 20:08:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 first_token_cost:207.54241943359375ms total_cost_time:207.60369300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:6725 prompt_cache_len:5151 prompt_cache_ratio:0.7659479553903346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 -DEBUG 06-24 20:08:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:35 [manager.py:224] router recive req id 8 cost time 0.3115348815917969 s -INFO 06-24 20:08:35 [manager.py:68] detokenization recv req id 8 cost time 0.3135504722595215 s -DEBUG 06-24 20:08:35 [manager.py:391] Prefill Batch: batch_id=13678000044143905445337379650049336645, time:1750766915.942661s req_ids:[8] -DEBUG 06-24 20:08:35 [manager.py:391] -ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:35 lightllm_req_id:8 first_token_cost:404.27327156066895ms total_cost_time:404.31880950927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6726 prompt_cache_len:5151 prompt_cache_ratio:0.7658340767172168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 -DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:36 [batch.py:51] router release req id 8 -INFO 06-24 20:08:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.10723447799682617 s -INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.10917496681213379 s -DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=193495345133577661924044501876538101680, time:1750766916.157448s req_ids:[8] -DEBUG 06-24 20:08:36 [manager.py:391] -ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:206.6938877105713ms total_cost_time:206.73751831054688ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6727 prompt_cache_len:5151 prompt_cache_ratio:0.7657202319012933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 -DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.10784435272216797 s -INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.10993790626525879 s -DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=502238838404425100428816804310129816, time:1750766916.3701265s req_ids:[8] -DEBUG 06-24 20:08:36 [manager.py:391] -DEBUG 06-24 20:08:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 29931.537 tokens/s -DEBUG 06-24 20:08:36 [stats.py:37] Avg prompt tokens throughput: 29922.613 tokens/s -DEBUG 06-24 20:08:36 [stats.py:37] Avg generate tokens throughput: 8.924 tokens/s -ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:208.4496021270752ms total_cost_time:208.4941864013672ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6728 prompt_cache_len:5151 prompt_cache_ratio:0.7656064209274673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 -DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.10714316368103027 s -INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.10918450355529785 s -DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=19189512817642001562274960438499800950, time:1750766916.5954013s req_ids:[8] -DEBUG 06-24 20:08:36 [manager.py:391] -ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:221.49205207824707ms total_cost_time:221.55404090881348ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:6729 prompt_cache_len:5151 prompt_cache_ratio:0.765492643780651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 -DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:36 [manager.py:224] router recive req id 8 cost time 0.1089019775390625 s -INFO 06-24 20:08:36 [manager.py:68] detokenization recv req id 8 cost time 0.1108555793762207 s -DEBUG 06-24 20:08:36 [manager.py:391] Prefill Batch: batch_id=165444330422026395286628862689801865748, time:1750766916.8115408s req_ids:[8] -DEBUG 06-24 20:08:36 [manager.py:391] -ERROR 06-24 20:08:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:208.5425853729248ms total_cost_time:208.5862159729004ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6730 prompt_cache_len:5151 prompt_cache_ratio:0.7653789004457652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 -DEBUG 06-24 20:08:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.1072244644165039 s -INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.10916733741760254 s -DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=253491670847908351536112789244110220432, time:1750766917.0247726s req_ids:[8] -DEBUG 06-24 20:08:37 [manager.py:391] -ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:36 lightllm_req_id:8 first_token_cost:209.02490615844727ms total_cost_time:209.06805992126465ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6731 prompt_cache_len:5151 prompt_cache_ratio:0.7652651909077403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 -DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.10931062698364258 s -INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.111572265625 s -DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=331747267810991488939375663638036191785, time:1750766917.2388408s req_ids:[8] -DEBUG 06-24 20:08:37 [manager.py:391] -ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:209.73849296569824ms total_cost_time:209.78403091430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6732 prompt_cache_len:5151 prompt_cache_ratio:0.7651515151515151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 -DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.10712218284606934 s -INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.10914182662963867 s -DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=292301759242789518715217514065416443314, time:1750766917.4506118s req_ids:[8] -DEBUG 06-24 20:08:37 [manager.py:391] -ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:208.24551582336426ms total_cost_time:208.29057693481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6733 prompt_cache_len:5151 prompt_cache_ratio:0.7650378731620378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 -DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.11014699935913086 s -INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.11208868026733398 s -DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=240059531966130898876800830055992379952, time:1750766917.6628726s req_ids:[8] -DEBUG 06-24 20:08:37 [manager.py:391] -ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:203.30238342285156ms total_cost_time:203.34649085998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6734 prompt_cache_len:5151 prompt_cache_ratio:0.7649242649242649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 -DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:37 [manager.py:224] router recive req id 8 cost time 0.10933279991149902 s -INFO 06-24 20:08:37 [manager.py:68] detokenization recv req id 8 cost time 0.11142420768737793 s -DEBUG 06-24 20:08:37 [manager.py:391] Prefill Batch: batch_id=183685201305384668864030912205276770777, time:1750766917.8723195s req_ids:[8] -DEBUG 06-24 20:08:37 [manager.py:391] -ERROR 06-24 20:08:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:193.62521171569824ms total_cost_time:193.70460510253906ms,out_token_counter:1 mean_per_token_cost_time: 0.07939338684082031ms prompt_token_num:6735 prompt_cache_len:5151 prompt_cache_ratio:0.7648106904231626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 -DEBUG 06-24 20:08:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10838985443115234 s -INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102895736694336 s -DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=246287987853511476426529927330699365189, time:1750766918.075125s req_ids:[8] -DEBUG 06-24 20:08:38 [manager.py:391] -ERROR 06-24 20:08:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:37 lightllm_req_id:8 first_token_cost:206.25877380371094ms total_cost_time:206.3007354736328ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6736 prompt_cache_len:5151 prompt_cache_ratio:0.7646971496437055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 -DEBUG 06-24 20:08:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s -INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.10979485511779785 s -DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=127372801947996544036491227655613577675, time:1750766918.2873216s req_ids:[8] -DEBUG 06-24 20:08:38 [manager.py:391] -ERROR 06-24 20:08:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 first_token_cost:420.26615142822266ms total_cost_time:420.32384872436523ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:6737 prompt_cache_len:5151 prompt_cache_ratio:0.7645836425708773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 -DEBUG 06-24 20:08:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10878634452819824 s -INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s -DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=60047819696147762557686670744336811587, time:1750766918.7065413s req_ids:[8] -DEBUG 06-24 20:08:38 [manager.py:391] -ERROR 06-24 20:08:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 first_token_cost:200.78182220458984ms total_cost_time:200.82640647888184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6738 prompt_cache_len:5151 prompt_cache_ratio:0.7644701691896705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 -DEBUG 06-24 20:08:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:38 [manager.py:224] router recive req id 8 cost time 0.10904502868652344 s -INFO 06-24 20:08:38 [manager.py:68] detokenization recv req id 8 cost time 0.11105728149414062 s -DEBUG 06-24 20:08:38 [manager.py:391] Prefill Batch: batch_id=252497352531398885708197813635021907175, time:1750766918.9194736s req_ids:[8] -DEBUG 06-24 20:08:38 [manager.py:391] -ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:38 lightllm_req_id:8 first_token_cost:208.50443840026855ms total_cost_time:208.54997634887695ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6739 prompt_cache_len:5151 prompt_cache_ratio:0.7643567294850868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 -DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10783934593200684 s -INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.10975980758666992 s -DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=115610693477718233507326140136722902166, time:1750766919.131967s req_ids:[8] -DEBUG 06-24 20:08:39 [manager.py:391] -ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:206.129789352417ms total_cost_time:206.17318153381348ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6740 prompt_cache_len:5151 prompt_cache_ratio:0.7642433234421365 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 -DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s -INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.10969066619873047 s -DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=29717720000699866761248176799642494381, time:1750766919.3449202s req_ids:[8] -DEBUG 06-24 20:08:39 [manager.py:391] -ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:211.65013313293457ms total_cost_time:211.69567108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6741 prompt_cache_len:5151 prompt_cache_ratio:0.7641299510458389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 -DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10995030403137207 s -INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.11197638511657715 s -DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=322503750348632202975705303397126319806, time:1750766919.5671275s req_ids:[8] -DEBUG 06-24 20:08:39 [manager.py:391] -DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:212.76497840881348ms total_cost_time:212.82505989074707ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6742 prompt_cache_len:5151 prompt_cache_ratio:0.7640166122812222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 -DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10605931282043457 s -INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.10794281959533691 s -DEBUG 06-24 20:08:39 [manager.py:391] Prefill Batch: batch_id=10230627022835069444484710851566607728, time:1750766919.787143s req_ids:[8] -DEBUG 06-24 20:08:39 [manager.py:391] -ERROR 06-24 20:08:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:218.9924716949463ms total_cost_time:219.03491020202637ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6743 prompt_cache_len:5151 prompt_cache_ratio:0.7639033071333234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 -DEBUG 06-24 20:08:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:39 [manager.py:224] router recive req id 8 cost time 0.10812187194824219 s -INFO 06-24 20:08:39 [manager.py:68] detokenization recv req id 8 cost time 0.11025619506835938 s -DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=219568035245375300369524968770702965573, time:1750766920.0023599s req_ids:[8] -DEBUG 06-24 20:08:40 [manager.py:391] -ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:39 lightllm_req_id:8 first_token_cost:209.49721336364746ms total_cost_time:209.54251289367676ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6744 prompt_cache_len:5151 prompt_cache_ratio:0.7637900355871886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 -DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.10781288146972656 s -INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.10973000526428223 s -DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=228273723028112542180981318754322040018, time:1750766920.2161644s req_ids:[8] -DEBUG 06-24 20:08:40 [manager.py:391] -ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:209.34104919433594ms total_cost_time:209.38682556152344ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6745 prompt_cache_len:5151 prompt_cache_ratio:0.7636767976278726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 -DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.1090383529663086 s -INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.11095738410949707 s -DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=104230606977252100823412701979251462906, time:1750766920.429848s req_ids:[8] -DEBUG 06-24 20:08:40 [manager.py:391] -ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:207.19408988952637ms total_cost_time:207.23819732666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6746 prompt_cache_len:5151 prompt_cache_ratio:0.7635635932404388 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 -DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.10814166069030762 s -INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.1100924015045166 s -DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=282316973358304690643278264402494620452, time:1750766920.642904s req_ids:[8] -DEBUG 06-24 20:08:40 [manager.py:391] -ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:208.74834060668945ms total_cost_time:208.79340171813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6747 prompt_cache_len:5151 prompt_cache_ratio:0.76345042240996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 -DEBUG 06-24 20:08:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:40 [manager.py:224] router recive req id 8 cost time 0.10831809043884277 s -INFO 06-24 20:08:40 [manager.py:68] detokenization recv req id 8 cost time 0.11012482643127441 s -DEBUG 06-24 20:08:40 [manager.py:391] Prefill Batch: batch_id=169237986028943535165137430126255829517, time:1750766920.8552425s req_ids:[8] -DEBUG 06-24 20:08:40 [manager.py:391] -ERROR 06-24 20:08:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:201.8909454345703ms total_cost_time:201.9338607788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6748 prompt_cache_len:5151 prompt_cache_ratio:0.7633372851215174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 -DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.309434175491333 s -INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.31143736839294434 s -DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=78320379318328277967915325647279185675, time:1750766921.2802517s req_ids:[8] -DEBUG 06-24 20:08:41 [manager.py:391] -ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:40 lightllm_req_id:8 first_token_cost:430.3269386291504ms total_cost_time:430.3874969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6749 prompt_cache_len:5151 prompt_cache_ratio:0.7632241813602015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 -DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.10868668556213379 s -INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.11049103736877441 s -DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=91094922499811045905987609088869949117, time:1750766921.5126514s req_ids:[8] -DEBUG 06-24 20:08:41 [manager.py:391] -ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:222.55468368530273ms total_cost_time:222.5971221923828ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6750 prompt_cache_len:5151 prompt_cache_ratio:0.7631111111111111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 -DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.10689067840576172 s -INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.10877752304077148 s -DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=209984400732997473891358820165404926895, time:1750766921.7268076s req_ids:[8] -DEBUG 06-24 20:08:41 [manager.py:391] -ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:205.5513858795166ms total_cost_time:205.59382438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6751 prompt_cache_len:5151 prompt_cache_ratio:0.7629980743593542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 -DEBUG 06-24 20:08:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:41 [manager.py:224] router recive req id 8 cost time 0.1072847843170166 s -INFO 06-24 20:08:41 [manager.py:68] detokenization recv req id 8 cost time 0.1091163158416748 s -DEBUG 06-24 20:08:41 [manager.py:391] Prefill Batch: batch_id=313109080307095368606341924757917796701, time:1750766921.9455562s req_ids:[8] -DEBUG 06-24 20:08:41 [manager.py:391] -ERROR 06-24 20:08:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:173.61974716186523ms total_cost_time:173.66480827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6752 prompt_cache_len:5151 prompt_cache_ratio:0.7628850710900474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 -DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.1069488525390625 s -INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.10880041122436523 s -DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=178909068906954253669261535474164004569, time:1750766922.1149204s req_ids:[8] -DEBUG 06-24 20:08:42 [manager.py:391] -ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:41 lightllm_req_id:8 first_token_cost:193.3763027191162ms total_cost_time:193.4225559234619ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6753 prompt_cache_len:5151 prompt_cache_ratio:0.7627721012883163 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 -DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.10831952095031738 s -INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.11026406288146973 s -DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=176572999536272374346531094946946535012, time:1750766922.3159695s req_ids:[8] -DEBUG 06-24 20:08:42 [manager.py:391] -ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:205.01422882080078ms total_cost_time:205.07216453552246ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:6754 prompt_cache_len:5151 prompt_cache_ratio:0.7626591649392952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 -DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.11023354530334473 s -INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.11216330528259277 s -DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=62672343667016140725304176744281383838, time:1750766922.524904s req_ids:[8] -DEBUG 06-24 20:08:42 [manager.py:391] -ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:203.3534049987793ms total_cost_time:203.3989429473877ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6755 prompt_cache_len:5151 prompt_cache_ratio:0.7625462620281274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 -DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.10689973831176758 s -INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.10880351066589355 s -DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=31613983164895435762734545647385562653, time:1750766922.735242s req_ids:[8] -DEBUG 06-24 20:08:42 [manager.py:391] -ERROR 06-24 20:08:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:205.57260513305664ms total_cost_time:205.6252956390381ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:6756 prompt_cache_len:5151 prompt_cache_ratio:0.7624333925399644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 -DEBUG 06-24 20:08:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:42 [manager.py:224] router recive req id 8 cost time 0.10717105865478516 s -INFO 06-24 20:08:42 [manager.py:68] detokenization recv req id 8 cost time 0.10948967933654785 s -DEBUG 06-24 20:08:42 [manager.py:391] Prefill Batch: batch_id=119888786870526259577977256775036426299, time:1750766922.946584s req_ids:[8] -DEBUG 06-24 20:08:42 [manager.py:391] -ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:42 lightllm_req_id:8 first_token_cost:205.11388778686523ms total_cost_time:205.17230033874512ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6757 prompt_cache_len:5151 prompt_cache_ratio:0.7623205564599674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 -DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.10773324966430664 s -INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.10978198051452637 s -INFO 06-24 20:08:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=184512487882974925633390378294419813939, time:1750766923.1572506s req_ids:[8] -DEBUG 06-24 20:08:43 [manager.py:391] -ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:206.5298557281494ms total_cost_time:206.59136772155762ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:6758 prompt_cache_len:5151 prompt_cache_ratio:0.7622077537733057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 -DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.107391357421875 s -INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.10934066772460938 s -DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=177411912099606378266306546535816316849, time:1750766923.369534s req_ids:[8] -DEBUG 06-24 20:08:43 [manager.py:391] -ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:208.49084854125977ms total_cost_time:208.54997634887695ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:6759 prompt_cache_len:5151 prompt_cache_ratio:0.7620949844651576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 -DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.3097422122955322 s -DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=61198221408760454322586257974543910418, time:1750766923.772991s req_ids:[8] -DEBUG 06-24 20:08:43 [manager.py:391] -INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.3116874694824219 s -ERROR 06-24 20:08:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:395.9174156188965ms total_cost_time:395.9622383117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6760 prompt_cache_len:5151 prompt_cache_ratio:0.7619822485207101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 -DEBUG 06-24 20:08:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:43 [manager.py:224] router recive req id 8 cost time 0.10772514343261719 s -INFO 06-24 20:08:43 [manager.py:68] detokenization recv req id 8 cost time 0.1097402572631836 s -DEBUG 06-24 20:08:43 [manager.py:391] Prefill Batch: batch_id=335057589859209807641592147543714777312, time:1750766923.9870877s req_ids:[8] -DEBUG 06-24 20:08:43 [manager.py:391] -ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:43 lightllm_req_id:8 first_token_cost:208.72139930725098ms total_cost_time:208.76836776733398ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:6761 prompt_cache_len:5151 prompt_cache_ratio:0.761869545925159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 -DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10708761215209961 s -INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.10911822319030762 s -DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=35779362752252582917354885307538951500, time:1750766924.198649s req_ids:[8] -DEBUG 06-24 20:08:44 [manager.py:391] -ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:205.27076721191406ms total_cost_time:205.31392097473145ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6762 prompt_cache_len:5151 prompt_cache_ratio:0.761756876663709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 -DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10691070556640625 s -INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.1089942455291748 s -DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=63260897216070476343805153363406224280, time:1750766924.4098787s req_ids:[8] -DEBUG 06-24 20:08:44 [manager.py:391] -ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:204.15019989013672ms total_cost_time:204.19740676879883ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6763 prompt_cache_len:5151 prompt_cache_ratio:0.7616442407215732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 -DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10646581649780273 s -INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.10845637321472168 s -DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=187291577263790937726844947324173144257, time:1750766924.6195474s req_ids:[8] -DEBUG 06-24 20:08:44 [manager.py:391] -ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:207.55791664123535ms total_cost_time:207.60393142700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6764 prompt_cache_len:5151 prompt_cache_ratio:0.761531638083974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 -DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:44 [manager.py:224] router recive req id 8 cost time 0.10716724395751953 s -INFO 06-24 20:08:44 [manager.py:68] detokenization recv req id 8 cost time 0.10917997360229492 s -DEBUG 06-24 20:08:44 [manager.py:391] Prefill Batch: batch_id=274430659751551982977824336866464089830, time:1750766924.82964s req_ids:[8] -DEBUG 06-24 20:08:44 [manager.py:391] -ERROR 06-24 20:08:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:205.75523376464844ms total_cost_time:205.7967185974121ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6765 prompt_cache_len:5151 prompt_cache_ratio:0.761419068736142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 -DEBUG 06-24 20:08:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10792946815490723 s -INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.11000204086303711 s -DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=195274094301156479846658081054958228848, time:1750766925.0416074s req_ids:[8] -DEBUG 06-24 20:08:45 [manager.py:391] -ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:44 lightllm_req_id:8 first_token_cost:206.80546760559082ms total_cost_time:206.8495750427246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6766 prompt_cache_len:5151 prompt_cache_ratio:0.7613065326633166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 -DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10720324516296387 s -INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.10921907424926758 s -DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=194890738647437413925465202597311011598, time:1750766925.2535632s req_ids:[8] -DEBUG 06-24 20:08:45 [manager.py:391] -ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:205.7485580444336ms total_cost_time:205.79266548156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6767 prompt_cache_len:5151 prompt_cache_ratio:0.7611940298507462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 -DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10851359367370605 s -INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s -DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=143949160973121822218989297233902846223, time:1750766925.4638603s req_ids:[8] -DEBUG 06-24 20:08:45 [manager.py:391] -ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:203.8130760192871ms total_cost_time:203.8578987121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6768 prompt_cache_len:5151 prompt_cache_ratio:0.7610815602836879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 -DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10704565048217773 s -INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.10903477668762207 s -DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=237750400561157892323276976140745843408, time:1750766925.6737409s req_ids:[8] -DEBUG 06-24 20:08:45 [manager.py:391] -ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:205.5661678314209ms total_cost_time:205.60932159423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6769 prompt_cache_len:5151 prompt_cache_ratio:0.7609691239474073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 -DEBUG 06-24 20:08:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:45 [manager.py:224] router recive req id 8 cost time 0.10779547691345215 s -INFO 06-24 20:08:45 [manager.py:68] detokenization recv req id 8 cost time 0.10983681678771973 s -DEBUG 06-24 20:08:45 [manager.py:391] Prefill Batch: batch_id=189529664323192768928888967928959667068, time:1750766925.8851635s req_ids:[8] -DEBUG 06-24 20:08:45 [manager.py:391] -ERROR 06-24 20:08:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:206.1140537261963ms total_cost_time:206.15768432617188ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6770 prompt_cache_len:5151 prompt_cache_ratio:0.7608567208271787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 -DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.10626983642578125 s -INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.10800480842590332 s -DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=224287249814167857040856566119867663100, time:1750766926.0976348s req_ids:[8] -DEBUG 06-24 20:08:46 [manager.py:391] -ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:45 lightllm_req_id:8 first_token_cost:168.68257522583008ms total_cost_time:168.72501373291016ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6771 prompt_cache_len:5151 prompt_cache_ratio:0.7607443509082853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 -DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:46 [batch.py:51] router release req id 8 -INFO 06-24 20:08:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.31071925163269043 s -INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.3133690357208252 s -DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=82035410088159140682469824966854678640, time:1750766926.4846282s req_ids:[8] -DEBUG 06-24 20:08:46 [manager.py:391] -DEBUG 06-24 20:08:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 29374.630 tokens/s -DEBUG 06-24 20:08:46 [stats.py:37] Avg prompt tokens throughput: 29365.930 tokens/s -DEBUG 06-24 20:08:46 [stats.py:37] Avg generate tokens throughput: 8.700 tokens/s -ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 first_token_cost:421.76318168640137ms total_cost_time:421.8168258666992ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:6772 prompt_cache_len:5151 prompt_cache_ratio:0.7606320141760189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 -DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:46 [batch.py:51] router release req id 8 -INFO 06-24 20:08:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.10804224014282227 s -INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999441146850586 s -DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=313234008715192439293167780811178395306, time:1750766926.7005136s req_ids:[8] -DEBUG 06-24 20:08:46 [manager.py:391] -ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 first_token_cost:207.5216770172119ms total_cost_time:207.5662612915039ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6773 prompt_cache_len:5151 prompt_cache_ratio:0.7605197106156799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 -DEBUG 06-24 20:08:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:46 [manager.py:224] router recive req id 8 cost time 0.10767555236816406 s -INFO 06-24 20:08:46 [manager.py:68] detokenization recv req id 8 cost time 0.10966253280639648 s -DEBUG 06-24 20:08:46 [manager.py:391] Prefill Batch: batch_id=92477025772287950603912353368303292975, time:1750766926.912773s req_ids:[8] -DEBUG 06-24 20:08:46 [manager.py:391] -ERROR 06-24 20:08:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:46 lightllm_req_id:8 first_token_cost:204.00500297546387ms total_cost_time:204.05006408691406ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6774 prompt_cache_len:5151 prompt_cache_ratio:0.7604074402125776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 -DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.1072385311126709 s -INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.10925507545471191 s -DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=322910606595456292834510955635338698519, time:1750766927.122272s req_ids:[8] -DEBUG 06-24 20:08:47 [manager.py:391] -ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:207.2908878326416ms total_cost_time:207.33380317687988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6775 prompt_cache_len:5151 prompt_cache_ratio:0.7602952029520296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 -DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.1081242561340332 s -INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.11072325706481934 s -DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=94265110629409966291883574646290877440, time:1750766927.334459s req_ids:[8] -DEBUG 06-24 20:08:47 [manager.py:391] -ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:204.44893836975098ms total_cost_time:204.49471473693848ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6776 prompt_cache_len:5151 prompt_cache_ratio:0.7601829988193625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 -DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.10788154602050781 s -INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.10980916023254395 s -DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=283530304557294388573507479508404600870, time:1750766927.5450237s req_ids:[8] -DEBUG 06-24 20:08:47 [manager.py:391] -ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:205.18112182617188ms total_cost_time:205.22618293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6777 prompt_cache_len:5151 prompt_cache_ratio:0.7600708277999114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 -DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.10916948318481445 s -INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.11110472679138184 s -DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=68868785949950384980426062600360074558, time:1750766927.7571042s req_ids:[8] -DEBUG 06-24 20:08:47 [manager.py:391] -ERROR 06-24 20:08:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:207.5815200805664ms total_cost_time:207.6256275177002ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6778 prompt_cache_len:5151 prompt_cache_ratio:0.7599586898790204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 -DEBUG 06-24 20:08:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:47 [manager.py:224] router recive req id 8 cost time 0.10992765426635742 s -INFO 06-24 20:08:47 [manager.py:68] detokenization recv req id 8 cost time 0.11233949661254883 s -DEBUG 06-24 20:08:47 [manager.py:391] Prefill Batch: batch_id=152892528172023292954586396489725154654, time:1750766927.967879s req_ids:[8] -DEBUG 06-24 20:08:47 [manager.py:391] -DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:47 lightllm_req_id:8 first_token_cost:205.15727996826172ms total_cost_time:205.2011489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6779 prompt_cache_len:5151 prompt_cache_ratio:0.7598465850420416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 -DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10751724243164062 s -INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10934662818908691 s -DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=165488218452614913446654519369134634504, time:1750766928.1790571s req_ids:[8] -DEBUG 06-24 20:08:48 [manager.py:391] -ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:206.11000061035156ms total_cost_time:206.15386962890625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6780 prompt_cache_len:5151 prompt_cache_ratio:0.7597345132743363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 -DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10778355598449707 s -INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10981631278991699 s -DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=252450605717652009987969899972673146229, time:1750766928.3899784s req_ids:[8] -DEBUG 06-24 20:08:48 [manager.py:391] -ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:375.4286766052246ms total_cost_time:375.4911422729492ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:6781 prompt_cache_len:5151 prompt_cache_ratio:0.7596224745612742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 -DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10796713829040527 s -INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10994935035705566 s -DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=23516597177518177035405939292839748097, time:1750766928.7651787s req_ids:[8] -DEBUG 06-24 20:08:48 [manager.py:391] -ERROR 06-24 20:08:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:198.49681854248047ms total_cost_time:198.54092597961426ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6782 prompt_cache_len:5151 prompt_cache_ratio:0.7595104688882336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 -DEBUG 06-24 20:08:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:48 [manager.py:224] router recive req id 8 cost time 0.10778665542602539 s -INFO 06-24 20:08:48 [manager.py:68] detokenization recv req id 8 cost time 0.10985922813415527 s -DEBUG 06-24 20:08:48 [manager.py:391] Prefill Batch: batch_id=203485221896755871233354274138310620708, time:1750766928.9750814s req_ids:[8] -DEBUG 06-24 20:08:48 [manager.py:391] -ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:48 lightllm_req_id:8 first_token_cost:206.74967765808105ms total_cost_time:206.79211616516113ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6783 prompt_cache_len:5151 prompt_cache_ratio:0.7593984962406015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 -DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10686278343200684 s -INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.10868310928344727 s -DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=254655720894725969309801202973118646342, time:1750766929.1878257s req_ids:[8] -DEBUG 06-24 20:08:49 [manager.py:391] -ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:208.44173431396484ms total_cost_time:208.48870277404785ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:6784 prompt_cache_len:5151 prompt_cache_ratio:0.7592865566037735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 -DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10705327987670898 s -INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.10907459259033203 s -DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=96219160658219615205499946749251361804, time:1750766929.4013824s req_ids:[8] -DEBUG 06-24 20:08:49 [manager.py:391] -ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:206.74586296081543ms total_cost_time:206.80665969848633ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6785 prompt_cache_len:5151 prompt_cache_ratio:0.759174649963154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 -DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10830354690551758 s -INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.11021828651428223 s -DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=94380587574532285646200314595935828628, time:1750766929.6126935s req_ids:[8] -DEBUG 06-24 20:08:49 [manager.py:391] -ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:206.5408229827881ms total_cost_time:206.5868377685547ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6786 prompt_cache_len:5151 prompt_cache_ratio:0.7590627763041556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 -DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:49 [manager.py:224] router recive req id 8 cost time 0.10819768905639648 s -INFO 06-24 20:08:49 [manager.py:68] detokenization recv req id 8 cost time 0.11020970344543457 s -DEBUG 06-24 20:08:49 [manager.py:391] Prefill Batch: batch_id=78197480041733542648039021190915062801, time:1750766929.8380508s req_ids:[8] -DEBUG 06-24 20:08:49 [manager.py:391] -ERROR 06-24 20:08:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:221.7421531677246ms total_cost_time:221.8027114868164ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6787 prompt_cache_len:5151 prompt_cache_ratio:0.7589509356121998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 -DEBUG 06-24 20:08:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s -INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.11014485359191895 s -DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=147765756231835770340044809408454696949, time:1750766930.0530963s req_ids:[8] -DEBUG 06-24 20:08:50 [manager.py:391] -ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:49 lightllm_req_id:8 first_token_cost:205.6901454925537ms total_cost_time:205.7344913482666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6788 prompt_cache_len:5151 prompt_cache_ratio:0.7588391278727166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 -DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10959291458129883 s -INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.11153912544250488 s -DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=190585900172372510502676911678700753440, time:1750766930.2658298s req_ids:[8] -DEBUG 06-24 20:08:50 [manager.py:391] -ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:208.2803249359131ms total_cost_time:208.32562446594238ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6789 prompt_cache_len:5151 prompt_cache_ratio:0.7587273530711445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 -DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10746288299560547 s -INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.10941505432128906 s -DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=5147996313211811105363772403616630933, time:1750766930.4791634s req_ids:[8] -DEBUG 06-24 20:08:50 [manager.py:391] -ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:207.23247528076172ms total_cost_time:207.2770595550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6790 prompt_cache_len:5151 prompt_cache_ratio:0.7586156111929307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 -DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.10779356956481934 s -INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.10982370376586914 s -DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=286606952964628576938799592731346262469, time:1750766930.6908398s req_ids:[8] -DEBUG 06-24 20:08:50 [manager.py:391] -ERROR 06-24 20:08:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:206.76517486572266ms total_cost_time:206.81047439575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6791 prompt_cache_len:5151 prompt_cache_ratio:0.7585039022235311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 -DEBUG 06-24 20:08:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:50 [manager.py:224] router recive req id 8 cost time 0.1078798770904541 s -INFO 06-24 20:08:50 [manager.py:68] detokenization recv req id 8 cost time 0.11066484451293945 s -DEBUG 06-24 20:08:50 [manager.py:391] Prefill Batch: batch_id=230765914308757202153944771379198524618, time:1750766930.9033625s req_ids:[8] -DEBUG 06-24 20:08:50 [manager.py:391] -ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:50 lightllm_req_id:8 first_token_cost:379.7633647918701ms total_cost_time:379.8089027404785ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6792 prompt_cache_len:5151 prompt_cache_ratio:0.7583922261484098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 -DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10911059379577637 s -INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.11124348640441895 s -DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=119612395175745845849597073514319604438, time:1750766931.2842295s req_ids:[8] -DEBUG 06-24 20:08:51 [manager.py:391] -ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:217.79155731201172ms total_cost_time:217.8349494934082ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6793 prompt_cache_len:5151 prompt_cache_ratio:0.7582805829530399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 -DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10807228088378906 s -INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.11004281044006348 s -DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=286494089978722960493267422863171282923, time:1750766931.5057797s req_ids:[8] -DEBUG 06-24 20:08:51 [manager.py:391] -ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:195.30606269836426ms total_cost_time:195.35183906555176ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6794 prompt_cache_len:5151 prompt_cache_ratio:0.7581689726229026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 -DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10836315155029297 s -INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.11052322387695312 s -DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=260873694769622915261915499260171345399, time:1750766931.7134244s req_ids:[8] -DEBUG 06-24 20:08:51 [manager.py:391] -ERROR 06-24 20:08:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:205.3229808807373ms total_cost_time:205.3675651550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6795 prompt_cache_len:5151 prompt_cache_ratio:0.7580573951434879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 -DEBUG 06-24 20:08:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:51 [manager.py:224] router recive req id 8 cost time 0.10749626159667969 s -INFO 06-24 20:08:51 [manager.py:68] detokenization recv req id 8 cost time 0.10945987701416016 s -DEBUG 06-24 20:08:51 [manager.py:391] Prefill Batch: batch_id=133640124527346400198273736458967993871, time:1750766931.921991s req_ids:[8] -DEBUG 06-24 20:08:51 [manager.py:391] -ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:51 lightllm_req_id:8 first_token_cost:205.23810386657715ms total_cost_time:205.28268814086914ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6796 prompt_cache_len:5151 prompt_cache_ratio:0.7579458505002943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 -DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10841917991638184 s -INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.11042213439941406 s -DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=21778620527042892497097852364577625813, time:1750766932.1333263s req_ids:[8] -DEBUG 06-24 20:08:52 [manager.py:391] -ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:207.06605911254883ms total_cost_time:207.11064338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6797 prompt_cache_len:5151 prompt_cache_ratio:0.7578343386788289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 -DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10849881172180176 s -INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.1110239028930664 s -DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=125288009436500817618780378276220361105, time:1750766932.3468506s req_ids:[8] -DEBUG 06-24 20:08:52 [manager.py:391] -ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:207.18073844909668ms total_cost_time:207.22413063049316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6798 prompt_cache_len:5151 prompt_cache_ratio:0.7577228596646073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 -DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10761857032775879 s -INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.10953736305236816 s -DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=98304187566559320348146116816158238604, time:1750766932.558749s req_ids:[8] -DEBUG 06-24 20:08:52 [manager.py:391] -ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:205.87730407714844ms total_cost_time:205.92093467712402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6799 prompt_cache_len:5151 prompt_cache_ratio:0.7576114134431534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 -DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10853362083435059 s -INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.11053848266601562 s -DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=217080632940604623842237636984809670372, time:1750766932.7734692s req_ids:[8] -DEBUG 06-24 20:08:52 [manager.py:391] -ERROR 06-24 20:08:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:206.88724517822266ms total_cost_time:206.94565773010254ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6800 prompt_cache_len:5151 prompt_cache_ratio:0.7575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 -DEBUG 06-24 20:08:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:52 [manager.py:224] router recive req id 8 cost time 0.10735058784484863 s -INFO 06-24 20:08:52 [manager.py:68] detokenization recv req id 8 cost time 0.10947871208190918 s -DEBUG 06-24 20:08:52 [manager.py:391] Prefill Batch: batch_id=296738469189727214031625300847602126259, time:1750766932.9834988s req_ids:[8] -DEBUG 06-24 20:08:52 [manager.py:391] -ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:52 lightllm_req_id:8 first_token_cost:204.9269676208496ms total_cost_time:204.9720287322998ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6801 prompt_cache_len:5151 prompt_cache_ratio:0.7573886193206881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 -DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.10728693008422852 s -INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.10942673683166504 s -INFO 06-24 20:08:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:08:53 [statics_utils.py:24] mean first cost: 233.3704283006215 ms -INFO 06-24 20:08:53 [statics_utils.py:24] mean per token cost: 0.1037253025240309 ms -DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=309421786694701307951979883044879572168, time:1750766933.1935613s req_ids:[8] -DEBUG 06-24 20:08:53 [manager.py:391] -INFO 06-24 20:08:53 [manager.py:620] left req id 8can release False refcount 4 -ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:204.47659492492676ms total_cost_time:204.51998710632324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6802 prompt_cache_len:5151 prompt_cache_ratio:0.7572772713907674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 -DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.1067037582397461 s -INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.10866117477416992 s -DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=214407128989713558225604226204020395788, time:1750766933.4035594s req_ids:[8] -DEBUG 06-24 20:08:53 [manager.py:391] -ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:369.9917793273926ms total_cost_time:370.03564834594727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6803 prompt_cache_len:5151 prompt_cache_ratio:0.757165956195796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 -DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:08:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.10904097557067871 s -INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.11112403869628906 s -DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=152945604529670885507645194524750962048, time:1750766933.7736366s req_ids:[8] -DEBUG 06-24 20:08:53 [manager.py:391] -DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:08:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:08:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:199.55158233642578ms total_cost_time:199.59521293640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6804 prompt_cache_len:5151 prompt_cache_ratio:0.7570546737213404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 -DEBUG 06-24 20:08:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:53 [manager.py:224] router recive req id 8 cost time 0.10995936393737793 s -INFO 06-24 20:08:53 [manager.py:68] detokenization recv req id 8 cost time 0.11216163635253906 s -DEBUG 06-24 20:08:53 [manager.py:391] Prefill Batch: batch_id=37265643018336761248807333963735391818, time:1750766933.986626s req_ids:[8] -DEBUG 06-24 20:08:53 [manager.py:391] -ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:53 lightllm_req_id:8 first_token_cost:212.0048999786377ms total_cost_time:212.0521068572998ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:6805 prompt_cache_len:5151 prompt_cache_ratio:0.7569434239529758 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 -DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10766005516052246 s -INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.1096038818359375 s -DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=298547032732709273951645993427227226019, time:1750766934.2168617s req_ids:[8] -DEBUG 06-24 20:08:54 [manager.py:391] -ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:216.31860733032227ms total_cost_time:216.36056900024414ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6806 prompt_cache_len:5151 prompt_cache_ratio:0.7568322068762856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 -DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s -INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.11019778251647949 s -DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=340277087876868199815569875476864188595, time:1750766934.4249306s req_ids:[8] -DEBUG 06-24 20:08:54 [manager.py:391] -ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:204.54812049865723ms total_cost_time:204.5919895172119ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6807 prompt_cache_len:5151 prompt_cache_ratio:0.756721022476862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 -DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10575222969055176 s -INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.10774803161621094 s -DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=75556858598528452147306081279350670220, time:1750766934.6351163s req_ids:[8] -DEBUG 06-24 20:08:54 [manager.py:391] -ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:205.95622062683105ms total_cost_time:206.00032806396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6808 prompt_cache_len:5151 prompt_cache_ratio:0.7566098707403055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 -DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:54 [manager.py:224] router recive req id 8 cost time 0.10661792755126953 s -INFO 06-24 20:08:54 [manager.py:68] detokenization recv req id 8 cost time 0.10867190361022949 s -DEBUG 06-24 20:08:54 [manager.py:391] Prefill Batch: batch_id=153441860747713807877371441881734185883, time:1750766934.8477068s req_ids:[8] -DEBUG 06-24 20:08:54 [manager.py:391] -ERROR 06-24 20:08:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:204.98895645141602ms total_cost_time:205.0337791442871ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6809 prompt_cache_len:5151 prompt_cache_ratio:0.756498751652225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 -DEBUG 06-24 20:08:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s -INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.11023402214050293 s -DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=117284022306644619603333435732420065603, time:1750766935.0608492s req_ids:[8] -DEBUG 06-24 20:08:55 [manager.py:391] -ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:54 lightllm_req_id:8 first_token_cost:209.8078727722168ms total_cost_time:209.85865592956543ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:6810 prompt_cache_len:5151 prompt_cache_ratio:0.7563876651982379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 -DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10847997665405273 s -INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.11081933975219727 s -DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=124068384156100191802701432997152032523, time:1750766935.275859s req_ids:[8] -DEBUG 06-24 20:08:55 [manager.py:391] -ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:226.94778442382812ms total_cost_time:226.9916534423828ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6811 prompt_cache_len:5151 prompt_cache_ratio:0.7562766113639701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 -DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10743856430053711 s -INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.10943031311035156 s -DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=245254695863644878168693848881388718397, time:1750766935.5013602s req_ids:[8] -DEBUG 06-24 20:08:55 [manager.py:391] -ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:194.793701171875ms total_cost_time:194.85020637512207ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:6812 prompt_cache_len:5151 prompt_cache_ratio:0.7561655901350558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 -DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10984206199645996 s -INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.11184835433959961 s -DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=149609619247020072101385771269713651258, time:1750766935.7130337s req_ids:[8] -DEBUG 06-24 20:08:55 [manager.py:391] -ERROR 06-24 20:08:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:213.55295181274414ms total_cost_time:213.59014511108398ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:6813 prompt_cache_len:5151 prompt_cache_ratio:0.7560546014971379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 -DEBUG 06-24 20:08:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:55 [manager.py:224] router recive req id 8 cost time 0.10763239860534668 s -INFO 06-24 20:08:55 [manager.py:68] detokenization recv req id 8 cost time 0.10960817337036133 s -DEBUG 06-24 20:08:55 [manager.py:391] Prefill Batch: batch_id=304787358479949976232689580980107115929, time:1750766935.92544s req_ids:[8] -DEBUG 06-24 20:08:55 [manager.py:391] -ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:55 lightllm_req_id:8 first_token_cost:201.9345760345459ms total_cost_time:201.98512077331543ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:6814 prompt_cache_len:5151 prompt_cache_ratio:0.7559436454358673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 -DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.10815787315368652 s -INFO 06-24 20:08:56 [manager.py:68] detokenization recv req id 8 cost time 0.11001777648925781 s -DEBUG 06-24 20:08:56 [manager.py:391] Prefill Batch: batch_id=149849551543287818636116492078764238697, time:1750766936.137794s req_ids:[8] -DEBUG 06-24 20:08:56 [manager.py:391] -ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:208.57787132263184ms total_cost_time:208.62936973571777ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:6815 prompt_cache_len:5151 prompt_cache_ratio:0.7558327219369039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 -DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.31054115295410156 s -INFO 06-24 20:08:56 [manager.py:68] detokenization recv req id 8 cost time 0.3125176429748535 s -DEBUG 06-24 20:08:56 [manager.py:391] Prefill Batch: batch_id=103416269575912463498794671518578235352, time:1750766936.5595899s req_ids:[8] -DEBUG 06-24 20:08:56 [manager.py:391] -DEBUG 06-24 20:08:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 29682.081 tokens/s -DEBUG 06-24 20:08:56 [stats.py:37] Avg prompt tokens throughput: 29673.347 tokens/s -DEBUG 06-24 20:08:56 [stats.py:37] Avg generate tokens throughput: 8.735 tokens/s -ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:421.1719036102295ms total_cost_time:421.2336540222168ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:6816 prompt_cache_len:5151 prompt_cache_ratio:0.7557218309859155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 -DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.10702180862426758 s -INFO 06-24 20:08:56 [manager.py:68] detokenization recv req id 8 cost time 0.10955953598022461 s -DEBUG 06-24 20:08:56 [manager.py:391] Prefill Batch: batch_id=43174193868915020687055380947455832254, time:1750766936.7763374s req_ids:[8] -DEBUG 06-24 20:08:56 [manager.py:391] -ERROR 06-24 20:08:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:224.73406791687012ms total_cost_time:224.78818893432617ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:6817 prompt_cache_len:5151 prompt_cache_ratio:0.7556109725685786 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 -DEBUG 06-24 20:08:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:56 [manager.py:224] router recive req id 8 cost time 0.10881280899047852 s -INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.11111950874328613 s -DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=340106471239495112144221860027334925483, time:1750766937.006206s req_ids:[8] -DEBUG 06-24 20:08:57 [manager.py:391] -ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:56 lightllm_req_id:8 first_token_cost:196.56896591186523ms total_cost_time:196.6257095336914ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:6818 prompt_cache_len:5151 prompt_cache_ratio:0.7555001466705779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 -DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:57 [batch.py:51] router release req id 8 -INFO 06-24 20:08:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10696625709533691 s -INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.1088876724243164 s -DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=98128387816006441505230501916506021110, time:1750766937.2127125s req_ids:[8] -DEBUG 06-24 20:08:57 [manager.py:391] -ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:203.72653007507324ms total_cost_time:203.7796974182129ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:6819 prompt_cache_len:5151 prompt_cache_ratio:0.7553893532776067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 -DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s -INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.11032414436340332 s -DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=97578720243199485041838651943656236325, time:1750766937.4236917s req_ids:[8] -DEBUG 06-24 20:08:57 [manager.py:391] -ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:204.48660850524902ms total_cost_time:204.5307159423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6820 prompt_cache_len:5151 prompt_cache_ratio:0.7552785923753665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 -DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10671806335449219 s -INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.10860967636108398 s -DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=100091910470335941765493008615469347300, time:1750766937.6339097s req_ids:[8] -DEBUG 06-24 20:08:57 [manager.py:391] -ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:203.54819297790527ms total_cost_time:203.59277725219727ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6821 prompt_cache_len:5151 prompt_cache_ratio:0.7551678639495675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 -DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:57 [manager.py:224] router recive req id 8 cost time 0.10645532608032227 s -INFO 06-24 20:08:57 [manager.py:68] detokenization recv req id 8 cost time 0.10857105255126953 s -DEBUG 06-24 20:08:57 [manager.py:391] Prefill Batch: batch_id=224474604729714835671989505433088524141, time:1750766937.8435917s req_ids:[8] -DEBUG 06-24 20:08:57 [manager.py:391] -ERROR 06-24 20:08:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:206.38346672058105ms total_cost_time:206.42685890197754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6822 prompt_cache_len:5151 prompt_cache_ratio:0.7550571679859279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 -DEBUG 06-24 20:08:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.10757827758789062 s -INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.10955333709716797 s -DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=199609835461489872909080927121389934457, time:1750766938.054261s req_ids:[8] -DEBUG 06-24 20:08:58 [manager.py:391] -ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:57 lightllm_req_id:8 first_token_cost:204.34188842773438ms total_cost_time:204.38623428344727ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6823 prompt_cache_len:5151 prompt_cache_ratio:0.7549465044701744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 -DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.10746264457702637 s -INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.10984015464782715 s -DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=288733824546877206061754072661364099862, time:1750766938.26539s req_ids:[8] -DEBUG 06-24 20:08:58 [manager.py:391] -ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:205.04307746887207ms total_cost_time:205.08646965026855ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6824 prompt_cache_len:5151 prompt_cache_ratio:0.7548358733880423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 -DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.10722804069519043 s -INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.10912728309631348 s -DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=178520428975840801954627838230231340903, time:1750766938.482138s req_ids:[8] -DEBUG 06-24 20:08:58 [manager.py:391] -ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:379.61316108703613ms total_cost_time:379.6572685241699ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6825 prompt_cache_len:5151 prompt_cache_ratio:0.7547252747252747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 -DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:58 [manager.py:224] router recive req id 8 cost time 0.1092538833618164 s -INFO 06-24 20:08:58 [manager.py:68] detokenization recv req id 8 cost time 0.11130690574645996 s -DEBUG 06-24 20:08:58 [manager.py:391] Prefill Batch: batch_id=73373417575017730238666576942647667645, time:1750766938.8548827s req_ids:[8] -DEBUG 06-24 20:08:58 [manager.py:391] -ERROR 06-24 20:08:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:198.24528694152832ms total_cost_time:198.29130172729492ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6826 prompt_cache_len:5151 prompt_cache_ratio:0.7546147084676238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 -DEBUG 06-24 20:08:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10714983940124512 s -INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.1089472770690918 s -DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=59873730228461247030151046067252736987, time:1750766939.066585s req_ids:[8] -DEBUG 06-24 20:08:59 [manager.py:391] -ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:58 lightllm_req_id:8 first_token_cost:208.70399475097656ms total_cost_time:208.74881744384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6827 prompt_cache_len:5151 prompt_cache_ratio:0.7545041746008496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 -DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10799932479858398 s -INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.10986971855163574 s -DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=323417206050919408324145488795259418716, time:1750766939.2786152s req_ids:[8] -DEBUG 06-24 20:08:59 [manager.py:391] -ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:203.67813110351562ms total_cost_time:203.72271537780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6828 prompt_cache_len:5151 prompt_cache_ratio:0.7543936731107206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 -DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.1080775260925293 s -INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.11007094383239746 s -DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=147344583457436821196947579985878308915, time:1750766939.4896247s req_ids:[8] -DEBUG 06-24 20:08:59 [manager.py:391] -ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:207.95893669128418ms total_cost_time:208.00161361694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6829 prompt_cache_len:5151 prompt_cache_ratio:0.7542832039830136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 -DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10713648796081543 s -INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.1090245246887207 s -DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=23660667302062305760683070084689719247, time:1750766939.7025998s req_ids:[8] -DEBUG 06-24 20:08:59 [manager.py:391] -ERROR 06-24 20:08:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:08:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:08:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:207.43608474731445ms total_cost_time:207.4875831604004ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:6830 prompt_cache_len:5151 prompt_cache_ratio:0.7541727672035139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:08:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 -DEBUG 06-24 20:08:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:08:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:08:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:08:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:08:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:08:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:08:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:08:59 [manager.py:224] router recive req id 8 cost time 0.10644268989562988 s -INFO 06-24 20:08:59 [manager.py:68] detokenization recv req id 8 cost time 0.10818147659301758 s -DEBUG 06-24 20:08:59 [manager.py:391] Prefill Batch: batch_id=183799344215477268207882276421314014863, time:1750766939.915935s req_ids:[8] -DEBUG 06-24 20:08:59 [manager.py:391] -ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:08:59 lightllm_req_id:8 first_token_cost:230.1347255706787ms total_cost_time:230.1807403564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6831 prompt_cache_len:5151 prompt_cache_ratio:0.754062362758015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 -DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10837030410766602 s -INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.11026239395141602 s -DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=119383577268180528697244155241791447470, time:1750766940.143927s req_ids:[8] -DEBUG 06-24 20:09:00 [manager.py:391] -ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:196.0914134979248ms total_cost_time:196.13361358642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6832 prompt_cache_len:5151 prompt_cache_ratio:0.7539519906323185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 -DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10720062255859375 s -INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.10900545120239258 s -DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=163951889323953808350912125679091832088, time:1750766940.352612s req_ids:[8] -DEBUG 06-24 20:09:00 [manager.py:391] -ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:205.93023300170898ms total_cost_time:205.98530769348145ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:6833 prompt_cache_len:5151 prompt_cache_ratio:0.7538416508122348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 -DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10826969146728516 s -INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100001335144043 s -DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=79849963693416527876517226336232406480, time:1750766940.5618372s req_ids:[8] -DEBUG 06-24 20:09:00 [manager.py:391] -ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:164.49689865112305ms total_cost_time:164.53814506530762ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6834 prompt_cache_len:5151 prompt_cache_ratio:0.753731343283582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 -DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:00 [batch.py:51] router release req id 8 -INFO 06-24 20:09:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10645294189453125 s -INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.10864400863647461 s -DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=85094641878749162022666796669973486460, time:1750766940.7325542s req_ids:[8] -DEBUG 06-24 20:09:00 [manager.py:391] -DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:194.08249855041504ms total_cost_time:194.12636756896973ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6835 prompt_cache_len:5151 prompt_cache_ratio:0.7536210680321873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 -DEBUG 06-24 20:09:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:00 [manager.py:224] router recive req id 8 cost time 0.10729455947875977 s -INFO 06-24 20:09:00 [manager.py:68] detokenization recv req id 8 cost time 0.10937047004699707 s -DEBUG 06-24 20:09:00 [manager.py:391] Prefill Batch: batch_id=76860467498452797639012823698468457235, time:1750766940.9344769s req_ids:[8] -DEBUG 06-24 20:09:00 [manager.py:391] -ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:00 lightllm_req_id:8 first_token_cost:419.10624504089355ms total_cost_time:419.15035247802734ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6836 prompt_cache_len:5151 prompt_cache_ratio:0.7535108250438853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 -DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.10732245445251465 s -INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s -DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=274781979090449504542227148969917865417, time:1750766941.3535109s req_ids:[8] -DEBUG 06-24 20:09:01 [manager.py:391] -ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:199.13363456726074ms total_cost_time:199.17798042297363ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6837 prompt_cache_len:5151 prompt_cache_ratio:0.7534006143045195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 -DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.10813426971435547 s -INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.10989689826965332 s -DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=62969786028292721098604257036862143940, time:1750766941.5628517s req_ids:[8] -DEBUG 06-24 20:09:01 [manager.py:391] -ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:225.75807571411133ms total_cost_time:225.80242156982422ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6838 prompt_cache_len:5151 prompt_cache_ratio:0.7532904357999415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 -DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.1071007251739502 s -INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.10882806777954102 s -DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=322686030477036829512376333636657150402, time:1750766941.7895255s req_ids:[8] -DEBUG 06-24 20:09:01 [manager.py:391] -ERROR 06-24 20:09:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:191.88761711120605ms total_cost_time:191.93220138549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6839 prompt_cache_len:5151 prompt_cache_ratio:0.7531802895160111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 -DEBUG 06-24 20:09:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:01 [manager.py:224] router recive req id 8 cost time 0.1100006103515625 s -INFO 06-24 20:09:01 [manager.py:68] detokenization recv req id 8 cost time 0.11208724975585938 s -DEBUG 06-24 20:09:01 [manager.py:391] Prefill Batch: batch_id=250424276227907648336379964360058923222, time:1750766941.9914446s req_ids:[8] -DEBUG 06-24 20:09:01 [manager.py:391] -ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:01 lightllm_req_id:8 first_token_cost:201.51662826538086ms total_cost_time:201.56407356262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:6840 prompt_cache_len:5151 prompt_cache_ratio:0.7530701754385964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 -DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10894370079040527 s -INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.11093306541442871 s -DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=236305860541665600868445785598499599432, time:1750766942.200533s req_ids:[8] -DEBUG 06-24 20:09:02 [manager.py:391] -ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:206.65335655212402ms total_cost_time:206.6974639892578ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6841 prompt_cache_len:5151 prompt_cache_ratio:0.7529600935535741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 -DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10677123069763184 s -INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.10891556739807129 s -DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=321468663311855045553034986311707378586, time:1750766942.4127455s req_ids:[8] -DEBUG 06-24 20:09:02 [manager.py:391] -ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:208.5282802581787ms total_cost_time:208.5719108581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6842 prompt_cache_len:5151 prompt_cache_ratio:0.7528500438468284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 -DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10765385627746582 s -INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.10957527160644531 s -DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=32482110370897785384533908274468476902, time:1750766942.626984s req_ids:[8] -DEBUG 06-24 20:09:02 [manager.py:391] -ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:207.15904235839844ms total_cost_time:207.2007656097412ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6843 prompt_cache_len:5151 prompt_cache_ratio:0.7527400263042525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 -DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:02 [manager.py:224] router recive req id 8 cost time 0.10643839836120605 s -INFO 06-24 20:09:02 [manager.py:68] detokenization recv req id 8 cost time 0.10841703414916992 s -DEBUG 06-24 20:09:02 [manager.py:391] Prefill Batch: batch_id=309063169396142430458222121257222346538, time:1750766942.8380742s req_ids:[8] -DEBUG 06-24 20:09:02 [manager.py:391] -ERROR 06-24 20:09:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:203.65452766418457ms total_cost_time:203.69744300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6844 prompt_cache_len:5151 prompt_cache_ratio:0.7526300409117476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 -DEBUG 06-24 20:09:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10966181755065918 s -INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.11162686347961426 s -DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=116825656213073181334035216133489873534, time:1750766943.0487237s req_ids:[8] -DEBUG 06-24 20:09:03 [manager.py:391] -ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:02 lightllm_req_id:8 first_token_cost:204.65707778930664ms total_cost_time:204.71549034118652ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6845 prompt_cache_len:5151 prompt_cache_ratio:0.7525200876552228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 -DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10768723487854004 s -INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.10962891578674316 s -DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=259937918151389654459772075964894621910, time:1750766943.2730591s req_ids:[8] -DEBUG 06-24 20:09:03 [manager.py:391] -ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:218.2483673095703ms total_cost_time:218.2931900024414ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6846 prompt_cache_len:5151 prompt_cache_ratio:0.752410166520596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 -DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10729837417602539 s -INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.10911059379577637 s -DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=70849868679831145275039319567461479376, time:1750766943.4835455s req_ids:[8] -DEBUG 06-24 20:09:03 [manager.py:391] -ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:393.38088035583496ms total_cost_time:393.44048500061035ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:6847 prompt_cache_len:5151 prompt_cache_ratio:0.7523002774937929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 -DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:03 [manager.py:224] router recive req id 8 cost time 0.10747075080871582 s -INFO 06-24 20:09:03 [manager.py:68] detokenization recv req id 8 cost time 0.10949158668518066 s -DEBUG 06-24 20:09:03 [manager.py:391] Prefill Batch: batch_id=121929378039989573052113394819959477599, time:1750766943.8775961s req_ids:[8] -DEBUG 06-24 20:09:03 [manager.py:391] -ERROR 06-24 20:09:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:195.3420639038086ms total_cost_time:195.4021453857422ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6848 prompt_cache_len:5151 prompt_cache_ratio:0.7521904205607477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 -DEBUG 06-24 20:09:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10731697082519531 s -INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.10939478874206543 s -DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=328724829950183079474138097475080964284, time:1750766944.0826952s req_ids:[8] -DEBUG 06-24 20:09:04 [manager.py:391] -ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:03 lightllm_req_id:8 first_token_cost:201.9493579864502ms total_cost_time:202.00800895690918ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:6849 prompt_cache_len:5151 prompt_cache_ratio:0.7520805957074025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 -DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10774731636047363 s -INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.10981392860412598 s -DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=232347241197283823669963992043093891493, time:1750766944.3003318s req_ids:[8] -DEBUG 06-24 20:09:04 [manager.py:391] -ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:211.39812469482422ms total_cost_time:211.4424705505371ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6850 prompt_cache_len:5151 prompt_cache_ratio:0.7519708029197081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 -DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10698461532592773 s -INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.1089639663696289 s -DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=65750638268671543037787516011075628749, time:1750766944.510203s req_ids:[8] -DEBUG 06-24 20:09:04 [manager.py:391] -ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:206.71987533569336ms total_cost_time:206.77924156188965ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:6851 prompt_cache_len:5151 prompt_cache_ratio:0.7518610421836228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 -DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.1082448959350586 s -INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.11028242111206055 s -DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=234222845476793630706353687329533810956, time:1750766944.7211623s req_ids:[8] -DEBUG 06-24 20:09:04 [manager.py:391] -ERROR 06-24 20:09:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:227.32257843017578ms total_cost_time:227.37431526184082ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:6852 prompt_cache_len:5151 prompt_cache_ratio:0.7517513134851138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 -DEBUG 06-24 20:09:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:04 [manager.py:224] router recive req id 8 cost time 0.10767650604248047 s -INFO 06-24 20:09:04 [manager.py:68] detokenization recv req id 8 cost time 0.10995149612426758 s -DEBUG 06-24 20:09:04 [manager.py:391] Prefill Batch: batch_id=1170669171345206435821314363264910166, time:1750766944.9487925s req_ids:[8] -DEBUG 06-24 20:09:04 [manager.py:391] -ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:04 lightllm_req_id:8 first_token_cost:192.64888763427734ms total_cost_time:192.69442558288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6853 prompt_cache_len:5151 prompt_cache_ratio:0.7516416168101562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 -DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10715842247009277 s -INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.10909366607666016 s -DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=263287990501476734327041944388234170440, time:1750766945.1522245s req_ids:[8] -DEBUG 06-24 20:09:05 [manager.py:391] -ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:201.75457000732422ms total_cost_time:201.80058479309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6854 prompt_cache_len:5151 prompt_cache_ratio:0.751531952144733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 -DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10840868949890137 s -INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11042022705078125 s -DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=257665570324946241474742966192998510411, time:1750766945.3609002s req_ids:[8] -DEBUG 06-24 20:09:05 [manager.py:391] -ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:203.0351161956787ms total_cost_time:203.07660102844238ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6855 prompt_cache_len:5151 prompt_cache_ratio:0.7514223194748358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 -DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10810661315917969 s -INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11008810997009277 s -DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=286546985851962698788457962709141533939, time:1750766945.5706441s req_ids:[8] -DEBUG 06-24 20:09:05 [manager.py:391] -ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:202.17323303222656ms total_cost_time:202.21877098083496ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6856 prompt_cache_len:5151 prompt_cache_ratio:0.7513127187864644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 -DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.10901427268981934 s -INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11105036735534668 s -DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=155611884862911419224665382034283406138, time:1750766945.7794344s req_ids:[8] -DEBUG 06-24 20:09:05 [manager.py:391] -ERROR 06-24 20:09:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:206.5272331237793ms total_cost_time:206.5727710723877ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6857 prompt_cache_len:5151 prompt_cache_ratio:0.7512031500656263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 -DEBUG 06-24 20:09:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:05 [manager.py:224] router recive req id 8 cost time 0.11002707481384277 s -INFO 06-24 20:09:05 [manager.py:68] detokenization recv req id 8 cost time 0.11203193664550781 s -DEBUG 06-24 20:09:05 [manager.py:391] Prefill Batch: batch_id=49175244881618462133460933587070937997, time:1750766945.9898036s req_ids:[8] -DEBUG 06-24 20:09:05 [manager.py:391] -ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:05 lightllm_req_id:8 first_token_cost:199.1562843322754ms total_cost_time:199.17678833007812ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:6858 prompt_cache_len:5151 prompt_cache_ratio:0.7510936132983377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 -DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.1050572395324707 s -INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.10691261291503906 s -DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=69714902999983563607694563702086794760, time:1750766946.1942093s req_ids:[8] -DEBUG 06-24 20:09:06 [manager.py:391] -ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:204.8354148864746ms total_cost_time:204.8799991607666ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6859 prompt_cache_len:5151 prompt_cache_ratio:0.7509841084706226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 -DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.3096740245819092 s -INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.3117990493774414 s -DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=37281641384380666703021203269817209625, time:1750766946.5993214s req_ids:[8] -DEBUG 06-24 20:09:06 [manager.py:391] -DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:09:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 29978.461 tokens/s -DEBUG 06-24 20:09:06 [stats.py:37] Avg prompt tokens throughput: 29969.696 tokens/s -DEBUG 06-24 20:09:06 [stats.py:37] Avg generate tokens throughput: 8.765 tokens/s -ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:397.98450469970703ms total_cost_time:398.0603218078613ms,out_token_counter:1 mean_per_token_cost_time: 0.07581710815429688ms prompt_token_num:6860 prompt_cache_len:5151 prompt_cache_ratio:0.7508746355685131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 -DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.10731649398803711 s -INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.10928988456726074 s -DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=33120558522242884783691575350515062608, time:1750766946.8051884s req_ids:[8] -DEBUG 06-24 20:09:06 [manager.py:391] -ERROR 06-24 20:09:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:161.95297241210938ms total_cost_time:161.99660301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6861 prompt_cache_len:5151 prompt_cache_ratio:0.7507651945780498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 -DEBUG 06-24 20:09:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:06 [manager.py:224] router recive req id 8 cost time 0.10732150077819824 s -INFO 06-24 20:09:06 [manager.py:68] detokenization recv req id 8 cost time 0.1093292236328125 s -DEBUG 06-24 20:09:06 [manager.py:391] Prefill Batch: batch_id=155198956505296983996858079202329459734, time:1750766946.9746046s req_ids:[8] -DEBUG 06-24 20:09:06 [manager.py:391] -ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:06 lightllm_req_id:8 first_token_cost:193.58587265014648ms total_cost_time:193.62878799438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6862 prompt_cache_len:5151 prompt_cache_ratio:0.7506557854852812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 -DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.1067512035369873 s -INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.10921096801757812 s -DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=207074435996427102295964350657990814924, time:1750766947.174065s req_ids:[8] -DEBUG 06-24 20:09:07 [manager.py:391] -ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:203.57465744018555ms total_cost_time:203.61900329589844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6863 prompt_cache_len:5151 prompt_cache_ratio:0.7505464082762641 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 -DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.10803747177124023 s -INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.1099693775177002 s -DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=20402048368169934665425366240280627766, time:1750766947.3892162s req_ids:[8] -DEBUG 06-24 20:09:07 [manager.py:391] -ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:209.9456787109375ms total_cost_time:209.98859405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6864 prompt_cache_len:5151 prompt_cache_ratio:0.7504370629370629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 -DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.10746312141418457 s -INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.10918307304382324 s -DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=197319660250053152617949901274091911221, time:1750766947.60713s req_ids:[8] -DEBUG 06-24 20:09:07 [manager.py:391] -ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:177.66165733337402ms total_cost_time:177.70719528198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6865 prompt_cache_len:5151 prompt_cache_ratio:0.7503277494537509 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 -DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:07 [batch.py:51] router release req id 8 -INFO 06-24 20:09:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.1069784164428711 s -INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.10892438888549805 s -DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=267338712490869647730092118815062869706, time:1750766947.7823315s req_ids:[8] -DEBUG 06-24 20:09:07 [manager.py:391] -ERROR 06-24 20:09:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:193.45998764038086ms total_cost_time:193.50361824035645ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6866 prompt_cache_len:5151 prompt_cache_ratio:0.750218467812409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 -DEBUG 06-24 20:09:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:07 [manager.py:224] router recive req id 8 cost time 0.10831260681152344 s -INFO 06-24 20:09:07 [manager.py:68] detokenization recv req id 8 cost time 0.11018776893615723 s -DEBUG 06-24 20:09:07 [manager.py:391] Prefill Batch: batch_id=204035463261510792662876690861850653045, time:1750766947.9989896s req_ids:[8] -DEBUG 06-24 20:09:07 [manager.py:391] -ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:07 lightllm_req_id:8 first_token_cost:218.31727027893066ms total_cost_time:218.36137771606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6867 prompt_cache_len:5151 prompt_cache_ratio:0.7501092179991262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 -DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10693669319152832 s -INFO 06-24 20:09:08 [manager.py:68] detokenization recv req id 8 cost time 0.10891032218933105 s -DEBUG 06-24 20:09:08 [manager.py:391] Prefill Batch: batch_id=230147653845344523764166485638477591089, time:1750766948.2102313s req_ids:[8] -DEBUG 06-24 20:09:08 [manager.py:391] -ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:207.7343463897705ms total_cost_time:207.7770233154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6868 prompt_cache_len:5151 prompt_cache_ratio:0.75 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 -DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10773301124572754 s -INFO 06-24 20:09:08 [manager.py:68] detokenization recv req id 8 cost time 0.10959887504577637 s -DEBUG 06-24 20:09:08 [manager.py:391] Prefill Batch: batch_id=234047829248130842097119857624736495919, time:1750766948.4233541s req_ids:[8] -DEBUG 06-24 20:09:08 [manager.py:391] -ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:379.7280788421631ms total_cost_time:379.7736167907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6869 prompt_cache_len:5151 prompt_cache_ratio:0.7498908138011355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 -DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10706400871276855 s -INFO 06-24 20:09:08 [manager.py:68] detokenization recv req id 8 cost time 0.10911965370178223 s -DEBUG 06-24 20:09:08 [manager.py:391] Prefill Batch: batch_id=222085876255501036351347160999741977617, time:1750766948.8017652s req_ids:[8] -DEBUG 06-24 20:09:08 [manager.py:391] -ERROR 06-24 20:09:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:200.37460327148438ms total_cost_time:200.42061805725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6870 prompt_cache_len:5151 prompt_cache_ratio:0.7497816593886463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 -DEBUG 06-24 20:09:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:08 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s -INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.10992431640625 s -DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=319863202815236015216974838829498862323, time:1750766949.0147793s req_ids:[8] -DEBUG 06-24 20:09:09 [manager.py:391] -ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:08 lightllm_req_id:8 first_token_cost:208.35518836975098ms total_cost_time:208.40048789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6871 prompt_cache_len:5151 prompt_cache_ratio:0.7496725367486538 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 -DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.1088404655456543 s -INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.11135482788085938 s -DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=70540158241112889960598274416747593011, time:1750766949.2287843s req_ids:[8] -DEBUG 06-24 20:09:09 [manager.py:391] -ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:205.87897300720215ms total_cost_time:205.92260360717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6872 prompt_cache_len:5151 prompt_cache_ratio:0.7495634458672875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 -DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.10782003402709961 s -INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.10984349250793457 s -DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=201733612295772734934790242833815407511, time:1750766949.4391215s req_ids:[8] -DEBUG 06-24 20:09:09 [manager.py:391] -ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:206.9566249847412ms total_cost_time:207.0000171661377ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6873 prompt_cache_len:5151 prompt_cache_ratio:0.7494543867306853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 -DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.10796689987182617 s -INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.1098933219909668 s -DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=226601812317982880887238551623539738984, time:1750766949.651246s req_ids:[8] -DEBUG 06-24 20:09:09 [manager.py:391] -ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:214.9984836578369ms total_cost_time:215.043306350708ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6874 prompt_cache_len:5151 prompt_cache_ratio:0.7493453593249927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 -DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:09 [manager.py:224] router recive req id 8 cost time 0.10877227783203125 s -INFO 06-24 20:09:09 [manager.py:68] detokenization recv req id 8 cost time 0.11069965362548828 s -DEBUG 06-24 20:09:09 [manager.py:391] Prefill Batch: batch_id=9104256372056234133142155551511902485, time:1750766949.8706276s req_ids:[8] -DEBUG 06-24 20:09:09 [manager.py:391] -ERROR 06-24 20:09:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:195.04141807556152ms total_cost_time:195.0831413269043ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6875 prompt_cache_len:5151 prompt_cache_ratio:0.7492363636363636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 -DEBUG 06-24 20:09:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10768628120422363 s -INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.10963058471679688 s -DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=143529874133760861206366770873756909096, time:1750766950.0731673s req_ids:[8] -DEBUG 06-24 20:09:10 [manager.py:391] -ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:09 lightllm_req_id:8 first_token_cost:203.66859436035156ms total_cost_time:203.71174812316895ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6876 prompt_cache_len:5151 prompt_cache_ratio:0.7491273996509599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 -DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10804533958435059 s -INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.1102914810180664 s -DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=327287713082070026360185408397921363772, time:1750766950.2827823s req_ids:[8] -DEBUG 06-24 20:09:10 [manager.py:391] -ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:202.87299156188965ms total_cost_time:202.91757583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6877 prompt_cache_len:5151 prompt_cache_ratio:0.7490184673549513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 -DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10824990272521973 s -INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.11025214195251465 s -DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=290356987819548676593119064791683122350, time:1750766950.4911025s req_ids:[8] -DEBUG 06-24 20:09:10 [manager.py:391] -ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:200.45924186706543ms total_cost_time:200.5026340484619ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6878 prompt_cache_len:5151 prompt_cache_ratio:0.7489095667345158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 -DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10813188552856445 s -INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.11025047302246094 s -DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=273200868083007745610716192236653973761, time:1750766950.6973672s req_ids:[8] -DEBUG 06-24 20:09:10 [manager.py:391] -ERROR 06-24 20:09:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:205.77383041381836ms total_cost_time:205.81698417663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6879 prompt_cache_len:5151 prompt_cache_ratio:0.7488006977758395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 -DEBUG 06-24 20:09:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:10 [manager.py:224] router recive req id 8 cost time 0.10717916488647461 s -INFO 06-24 20:09:10 [manager.py:68] detokenization recv req id 8 cost time 0.10932278633117676 s -DEBUG 06-24 20:09:10 [manager.py:391] Prefill Batch: batch_id=17284495574199326630179094213809081405, time:1750766950.908958s req_ids:[8] -DEBUG 06-24 20:09:10 [manager.py:391] -ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:10 lightllm_req_id:8 first_token_cost:373.74067306518555ms total_cost_time:373.7826347351074ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6880 prompt_cache_len:5151 prompt_cache_ratio:0.7486918604651163 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 -DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:11 [batch.py:51] router release req id 8 -INFO 06-24 20:09:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.10826635360717773 s -INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.11054682731628418 s -DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=8620235176817787976818710038643389818, time:1750766951.2825701s req_ids:[8] -DEBUG 06-24 20:09:11 [manager.py:391] -ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:219.10667419433594ms total_cost_time:219.15245056152344ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6881 prompt_cache_len:5151 prompt_cache_ratio:0.7485830547885481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 -DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.10707664489746094 s -INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.1088857650756836 s -DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=252907209971013680774389553894057493820, time:1750766951.5209494s req_ids:[8] -DEBUG 06-24 20:09:11 [manager.py:391] -ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:211.4250659942627ms total_cost_time:211.4694118499756ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6882 prompt_cache_len:5151 prompt_cache_ratio:0.7484742807323452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 -DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.1071321964263916 s -INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.10899853706359863 s -DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=247052002028324460539124605807992495673, time:1750766951.7299132s req_ids:[8] -DEBUG 06-24 20:09:11 [manager.py:391] -ERROR 06-24 20:09:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:206.12645149230957ms total_cost_time:206.16984367370605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6883 prompt_cache_len:5151 prompt_cache_ratio:0.7483655382827256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 -DEBUG 06-24 20:09:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:11 [manager.py:224] router recive req id 8 cost time 0.10673737525939941 s -INFO 06-24 20:09:11 [manager.py:68] detokenization recv req id 8 cost time 0.10851359367370605 s -DEBUG 06-24 20:09:11 [manager.py:391] Prefill Batch: batch_id=137057678445348523698642094917231283017, time:1750766951.940449s req_ids:[8] -DEBUG 06-24 20:09:11 [manager.py:391] -ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:11 lightllm_req_id:8 first_token_cost:200.85430145263672ms total_cost_time:200.90031623840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6884 prompt_cache_len:5151 prompt_cache_ratio:0.7482568274259151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 -DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10753655433654785 s -INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.10952997207641602 s -DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=1659404641005524033557153981725153282, time:1750766952.1494915s req_ids:[8] -DEBUG 06-24 20:09:12 [manager.py:391] -DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:206.44354820251465ms total_cost_time:206.48932456970215ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6885 prompt_cache_len:5151 prompt_cache_ratio:0.7481481481481481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 -DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10764527320861816 s -INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.10967707633972168 s -DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=216572230065717302837359203033343949958, time:1750766952.360585s req_ids:[8] -DEBUG 06-24 20:09:12 [manager.py:391] -ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:209.58614349365234ms total_cost_time:209.63096618652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6886 prompt_cache_len:5151 prompt_cache_ratio:0.7480395004356666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 -DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10689711570739746 s -INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.1086876392364502 s -DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=246623927898465359324282066273072788907, time:1750766952.574779s req_ids:[8] -DEBUG 06-24 20:09:12 [manager.py:391] -ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:169.9545383453369ms total_cost_time:169.9965000152588ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6887 prompt_cache_len:5151 prompt_cache_ratio:0.7479308842747205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 -DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10834097862243652 s -INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.1103358268737793 s -DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=243956624227053944202027504946768286228, time:1750766952.749194s req_ids:[8] -DEBUG 06-24 20:09:12 [manager.py:391] -ERROR 06-24 20:09:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:210.08849143981934ms total_cost_time:210.13259887695312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6888 prompt_cache_len:5151 prompt_cache_ratio:0.747822299651568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 -DEBUG 06-24 20:09:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:12 [manager.py:224] router recive req id 8 cost time 0.10877776145935059 s -INFO 06-24 20:09:12 [manager.py:68] detokenization recv req id 8 cost time 0.1108391284942627 s -DEBUG 06-24 20:09:12 [manager.py:391] Prefill Batch: batch_id=289805519157454348748486061440403572758, time:1750766952.9717371s req_ids:[8] -DEBUG 06-24 20:09:12 [manager.py:391] -ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:12 lightllm_req_id:8 first_token_cost:212.11719512939453ms total_cost_time:212.16082572937012ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6889 prompt_cache_len:5151 prompt_cache_ratio:0.7477137465524749 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 -DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.10876154899597168 s -INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.11076807975769043 s -DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=257192159113495849581322003778988938223, time:1750766953.18366s req_ids:[8] -DEBUG 06-24 20:09:13 [manager.py:391] -ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:205.7807445526123ms total_cost_time:205.8238983154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6890 prompt_cache_len:5151 prompt_cache_ratio:0.7476052249637155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 -DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.1078345775604248 s -INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.10988545417785645 s -DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=290147897035526126563697176154353902618, time:1750766953.3955362s req_ids:[8] -DEBUG 06-24 20:09:13 [manager.py:391] -ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:380.28979301452637ms total_cost_time:380.33318519592285ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6891 prompt_cache_len:5151 prompt_cache_ratio:0.7474967348715716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 -DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.10729479789733887 s -INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.10923552513122559 s -DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=323716766473355109703222426048066109140, time:1750766953.7767453s req_ids:[8] -DEBUG 06-24 20:09:13 [manager.py:391] -ERROR 06-24 20:09:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:177.16336250305176ms total_cost_time:177.20532417297363ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:6892 prompt_cache_len:5151 prompt_cache_ratio:0.7473882762623332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 -DEBUG 06-24 20:09:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:13 [manager.py:224] router recive req id 8 cost time 0.10741925239562988 s -INFO 06-24 20:09:13 [manager.py:68] detokenization recv req id 8 cost time 0.1093900203704834 s -DEBUG 06-24 20:09:13 [manager.py:391] Prefill Batch: batch_id=43378016928329572813687070252260745346, time:1750766953.9596915s req_ids:[8] -DEBUG 06-24 20:09:13 [manager.py:391] -ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:13 lightllm_req_id:8 first_token_cost:191.34759902954102ms total_cost_time:191.392183303833ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6893 prompt_cache_len:5151 prompt_cache_ratio:0.747279849122298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 -DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10767960548400879 s -INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.10964226722717285 s -DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=289560743015190712296058488262278038096, time:1750766954.1729689s req_ids:[8] -DEBUG 06-24 20:09:14 [manager.py:391] -ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:224.24602508544922ms total_cost_time:224.2898941040039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6894 prompt_cache_len:5151 prompt_cache_ratio:0.747171453437772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 -DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10858798027038574 s -INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.1107337474822998 s -DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=338255403435816586150679977298720005636, time:1750766954.3899124s req_ids:[8] -DEBUG 06-24 20:09:14 [manager.py:391] -ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:208.0247402191162ms total_cost_time:208.0695629119873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6895 prompt_cache_len:5151 prompt_cache_ratio:0.7470630891950689 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 -DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10733175277709961 s -INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.10930442810058594 s -DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=44909381193533248699894426576118612518, time:1750766954.6031084s req_ids:[8] -DEBUG 06-24 20:09:14 [manager.py:391] -ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:204.1170597076416ms total_cost_time:204.17404174804688ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:6896 prompt_cache_len:5151 prompt_cache_ratio:0.7469547563805105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 -DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:14 [batch.py:51] router release req id 8 -INFO 06-24 20:09:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:14 [manager.py:224] router recive req id 8 cost time 0.10976910591125488 s -INFO 06-24 20:09:14 [manager.py:68] detokenization recv req id 8 cost time 0.11162853240966797 s -DEBUG 06-24 20:09:14 [manager.py:391] Prefill Batch: batch_id=318899340255647978147881978148277003537, time:1750766954.8116658s req_ids:[8] -DEBUG 06-24 20:09:14 [manager.py:391] -ERROR 06-24 20:09:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:200.8841037750244ms total_cost_time:200.9267807006836ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6897 prompt_cache_len:5151 prompt_cache_ratio:0.7468464549804262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 -DEBUG 06-24 20:09:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10575222969055176 s -INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.10673165321350098 s -DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=205081967549116427551053729990620260228, time:1750766955.0334196s req_ids:[8] -DEBUG 06-24 20:09:15 [manager.py:391] -ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:14 lightllm_req_id:8 first_token_cost:219.7573184967041ms total_cost_time:219.8007106781006ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6898 prompt_cache_len:5151 prompt_cache_ratio:0.746738184981154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 -DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10760235786437988 s -INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.10871553421020508 s -DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=51606372709773453561888983896277976951, time:1750766955.251612s req_ids:[8] -DEBUG 06-24 20:09:15 [manager.py:391] -ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:210.8633518218994ms total_cost_time:210.91556549072266ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:6899 prompt_cache_len:5151 prompt_cache_ratio:0.746629946369039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 -DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10944318771362305 s -INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.11136126518249512 s -DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=248416926676330097048324237649015957789, time:1750766955.4652324s req_ids:[8] -DEBUG 06-24 20:09:15 [manager.py:391] -ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:208.16826820373535ms total_cost_time:208.21070671081543ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6900 prompt_cache_len:5151 prompt_cache_ratio:0.7465217391304347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 -DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10890722274780273 s -INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.11077380180358887 s -DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=190700846429975878505122255953311583677, time:1750766955.68941s req_ids:[8] -DEBUG 06-24 20:09:15 [manager.py:391] -ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:222.35941886901855ms total_cost_time:222.40543365478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6901 prompt_cache_len:5151 prompt_cache_ratio:0.7464135632517026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 -DEBUG 06-24 20:09:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:15 [manager.py:224] router recive req id 8 cost time 0.10781979560852051 s -INFO 06-24 20:09:15 [manager.py:68] detokenization recv req id 8 cost time 0.11097288131713867 s -DEBUG 06-24 20:09:15 [manager.py:391] Prefill Batch: batch_id=238411128429305238840896180064888003698, time:1750766955.9050894s req_ids:[8] -DEBUG 06-24 20:09:15 [manager.py:391] -ERROR 06-24 20:09:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:208.07218551635742ms total_cost_time:208.1167697906494ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6902 prompt_cache_len:5151 prompt_cache_ratio:0.7463054187192119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 -DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.10788130760192871 s -INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099236011505127 s -DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=138922657926341970864806457679063138580, time:1750766956.1171858s req_ids:[8] -DEBUG 06-24 20:09:16 [manager.py:391] -ERROR 06-24 20:09:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:15 lightllm_req_id:8 first_token_cost:195.09601593017578ms total_cost_time:195.13988494873047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6903 prompt_cache_len:5151 prompt_cache_ratio:0.7461973055193394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 -INFO 06-24 20:09:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.30887508392333984 s -INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.3106410503387451 s -DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=326096745887338067339182938990879355421, time:1750766956.5299559s req_ids:[8] -DEBUG 06-24 20:09:16 [manager.py:391] -ERROR 06-24 20:09:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:09:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 30259.254 tokens/s -DEBUG 06-24 20:09:16 [stats.py:37] Avg prompt tokens throughput: 30250.363 tokens/s -DEBUG 06-24 20:09:16 [stats.py:37] Avg generate tokens throughput: 8.890 tokens/s -INFO 06-24 20:09:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 first_token_cost:415.1158332824707ms total_cost_time:415.1618480682373ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:6904 prompt_cache_len:5151 prompt_cache_ratio:0.7460892236384704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 -DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.10692238807678223 s -INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.10878539085388184 s -DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=78713534651843993670620097241845470399, time:1750766956.738417s req_ids:[8] -DEBUG 06-24 20:09:16 [manager.py:391] -ERROR 06-24 20:09:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 first_token_cost:206.86793327331543ms total_cost_time:206.91323280334473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6905 prompt_cache_len:5151 prompt_cache_ratio:0.7459811730629978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 -DEBUG 06-24 20:09:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:16 [manager.py:224] router recive req id 8 cost time 0.10825014114379883 s -INFO 06-24 20:09:16 [manager.py:68] detokenization recv req id 8 cost time 0.11014580726623535 s -DEBUG 06-24 20:09:16 [manager.py:391] Prefill Batch: batch_id=207125816614046794154027677691358135447, time:1750766956.9508383s req_ids:[8] -DEBUG 06-24 20:09:16 [manager.py:391] -ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:16 lightllm_req_id:8 first_token_cost:206.72345161437988ms total_cost_time:206.76898956298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6906 prompt_cache_len:5151 prompt_cache_ratio:0.7458731537793223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 -DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10745477676391602 s -INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.1093134880065918 s -DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=166948275256378102374637279380540858950, time:1750766957.1645246s req_ids:[8] -DEBUG 06-24 20:09:17 [manager.py:391] -ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:170.87578773498535ms total_cost_time:170.91870307922363ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6907 prompt_cache_len:5151 prompt_cache_ratio:0.7457651657738527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 -DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10688591003417969 s -INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.10886406898498535 s -DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=140786328696448609608918440601761507838, time:1750766957.3361902s req_ids:[8] -DEBUG 06-24 20:09:17 [manager.py:391] -ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:196.46358489990234ms total_cost_time:196.52152061462402ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:6908 prompt_cache_len:5151 prompt_cache_ratio:0.7456572090330053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 -DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10738682746887207 s -INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.10918283462524414 s -DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=140511340718790453745836007395595072154, time:1750766957.5425222s req_ids:[8] -DEBUG 06-24 20:09:17 [manager.py:391] -ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:206.28714561462402ms total_cost_time:206.33172988891602ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6909 prompt_cache_len:5151 prompt_cache_ratio:0.7455492835432045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 -DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.11002516746520996 s -INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.1120290756225586 s -DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=248743935404132737802664156037262138498, time:1750766957.7560642s req_ids:[8] -DEBUG 06-24 20:09:17 [manager.py:391] -DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:208.27746391296387ms total_cost_time:208.32276344299316ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6910 prompt_cache_len:5151 prompt_cache_ratio:0.7454413892908828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 -DEBUG 06-24 20:09:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:17 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s -INFO 06-24 20:09:17 [manager.py:68] detokenization recv req id 8 cost time 0.10982656478881836 s -DEBUG 06-24 20:09:17 [manager.py:391] Prefill Batch: batch_id=17057963950935521477427715846137666943, time:1750766957.9690385s req_ids:[8] -DEBUG 06-24 20:09:17 [manager.py:391] -ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:17 lightllm_req_id:8 first_token_cost:169.33107376098633ms total_cost_time:169.3727970123291ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:6911 prompt_cache_len:5151 prompt_cache_ratio:0.7453335262624801 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 -DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.10722470283508301 s -INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.10918045043945312 s -DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=90082410373012013453206017776557508317, time:1750766958.142211s req_ids:[8] -DEBUG 06-24 20:09:18 [manager.py:391] -ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:203.44161987304688ms total_cost_time:203.48477363586426ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6912 prompt_cache_len:5151 prompt_cache_ratio:0.7452256944444444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 -DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.10779023170471191 s -INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.10977792739868164 s -DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=291103716472239899615552377186162032266, time:1750766958.3554807s req_ids:[8] -DEBUG 06-24 20:09:18 [manager.py:391] -ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:393.6293125152588ms total_cost_time:393.6734199523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6913 prompt_cache_len:5151 prompt_cache_ratio:0.7451178938232316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 -DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.1068875789642334 s -INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.10873818397521973 s -DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=92612920590475344131013022557893615747, time:1750766958.749083s req_ids:[8] -DEBUG 06-24 20:09:18 [manager.py:391] -ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:166.01967811584473ms total_cost_time:166.0597324371338ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:6914 prompt_cache_len:5151 prompt_cache_ratio:0.7450101243853052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 -DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:18 [manager.py:224] router recive req id 8 cost time 0.1089925765991211 s -INFO 06-24 20:09:18 [manager.py:68] detokenization recv req id 8 cost time 0.11065363883972168 s -DEBUG 06-24 20:09:18 [manager.py:391] Prefill Batch: batch_id=175184431586980232786083489790913563920, time:1750766958.9189248s req_ids:[8] -DEBUG 06-24 20:09:18 [manager.py:391] -ERROR 06-24 20:09:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:163.5587215423584ms total_cost_time:163.61570358276367ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:6915 prompt_cache_len:5151 prompt_cache_ratio:0.7449023861171367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 -DEBUG 06-24 20:09:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10628199577331543 s -INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10824418067932129 s -DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=144038533184179620941465623302262351484, time:1750766959.0870543s req_ids:[8] -DEBUG 06-24 20:09:19 [manager.py:391] -ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:18 lightllm_req_id:8 first_token_cost:201.00879669189453ms total_cost_time:201.0519504547119ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6916 prompt_cache_len:5151 prompt_cache_ratio:0.7447946790052054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 -DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.1060936450958252 s -INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10773110389709473 s -DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=61763980811306878369260783804852400501, time:1750766959.2990544s req_ids:[8] -DEBUG 06-24 20:09:19 [manager.py:391] -ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:169.10481452941895ms total_cost_time:169.14701461791992ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6917 prompt_cache_len:5151 prompt_cache_ratio:0.7446870030359982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 -DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10651516914367676 s -INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10838627815246582 s -DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=47429139345182858383808607468568021253, time:1750766959.470113s req_ids:[8] -DEBUG 06-24 20:09:19 [manager.py:391] -ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:196.86484336853027ms total_cost_time:196.91014289855957ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6918 prompt_cache_len:5151 prompt_cache_ratio:0.7445793581960104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 -DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10749602317810059 s -INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.10922670364379883 s -DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=100565686933187798535807967242389494034, time:1750766959.6809075s req_ids:[8] -DEBUG 06-24 20:09:19 [manager.py:391] -ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:209.6548080444336ms total_cost_time:209.69772338867188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6919 prompt_cache_len:5151 prompt_cache_ratio:0.7444717444717445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 -DEBUG 06-24 20:09:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:19 [manager.py:224] router recive req id 8 cost time 0.10738682746887207 s -INFO 06-24 20:09:19 [manager.py:68] detokenization recv req id 8 cost time 0.1093289852142334 s -DEBUG 06-24 20:09:19 [manager.py:391] Prefill Batch: batch_id=95968143407433037559571242837024856065, time:1750766959.8890162s req_ids:[8] -DEBUG 06-24 20:09:19 [manager.py:391] -ERROR 06-24 20:09:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:204.93412017822266ms total_cost_time:204.99134063720703ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6920 prompt_cache_len:5151 prompt_cache_ratio:0.744364161849711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 -DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.11037707328796387 s -INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.11228799819946289 s -DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=60634089658338092162473874348933465268, time:1750766960.1070666s req_ids:[8] -DEBUG 06-24 20:09:20 [manager.py:391] -ERROR 06-24 20:09:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:19 lightllm_req_id:8 first_token_cost:216.01319313049316ms total_cost_time:216.05777740478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6921 prompt_cache_len:5151 prompt_cache_ratio:0.7442566103164283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 -DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.10679411888122559 s -INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.10862588882446289 s -DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=89951810445199885337027229627688685946, time:1750766960.3245802s req_ids:[8] -DEBUG 06-24 20:09:20 [manager.py:391] -ERROR 06-24 20:09:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 first_token_cost:211.25197410583496ms total_cost_time:211.30895614624023ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:6922 prompt_cache_len:5151 prompt_cache_ratio:0.7441490898584224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 -DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.10959982872009277 s -INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.11153149604797363 s -DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=157279279165038591863805754921717179559, time:1750766960.5393896s req_ids:[8] -DEBUG 06-24 20:09:20 [manager.py:391] -ERROR 06-24 20:09:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 first_token_cost:206.33244514465332ms total_cost_time:206.3765525817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6923 prompt_cache_len:5151 prompt_cache_ratio:0.7440416004622273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 -DEBUG 06-24 20:09:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:20 [manager.py:224] router recive req id 8 cost time 0.10747456550598145 s -INFO 06-24 20:09:20 [manager.py:68] detokenization recv req id 8 cost time 0.10942387580871582 s -DEBUG 06-24 20:09:20 [manager.py:391] Prefill Batch: batch_id=174758288416002072189864863234840683521, time:1750766960.7512164s req_ids:[8] -DEBUG 06-24 20:09:20 [manager.py:391] -ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:20 lightllm_req_id:8 first_token_cost:400.0568389892578ms total_cost_time:400.1152515411377ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6924 prompt_cache_len:5151 prompt_cache_ratio:0.7439341421143848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 -DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s -INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.11032986640930176 s -DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=320888267519610555281189143097010194747, time:1750766961.151767s req_ids:[8] -DEBUG 06-24 20:09:21 [manager.py:391] -ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:201.6298770904541ms total_cost_time:201.6735076904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6925 prompt_cache_len:5151 prompt_cache_ratio:0.743826714801444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 -DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.10711407661437988 s -INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.10901141166687012 s -DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=207287127815546269654234663340172237141, time:1750766961.3650198s req_ids:[8] -DEBUG 06-24 20:09:21 [manager.py:391] -ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:209.4118595123291ms total_cost_time:209.4554901123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:6926 prompt_cache_len:5151 prompt_cache_ratio:0.7437193185099624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 -DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.10863828659057617 s -INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.11040234565734863 s -DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=35929562461058030591916718192874871548, time:1750766961.5888963s req_ids:[8] -DEBUG 06-24 20:09:21 [manager.py:391] -ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:177.36005783081055ms total_cost_time:177.40631103515625ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:6927 prompt_cache_len:5151 prompt_cache_ratio:0.743611953226505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 -DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.11058378219604492 s -INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.11247730255126953 s -DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=135423499751192587588180300544723488528, time:1750766961.7581558s req_ids:[8] -DEBUG 06-24 20:09:21 [manager.py:391] -ERROR 06-24 20:09:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:195.2371597290039ms total_cost_time:195.29318809509277ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:6928 prompt_cache_len:5151 prompt_cache_ratio:0.7435046189376443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 -DEBUG 06-24 20:09:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:21 [manager.py:224] router recive req id 8 cost time 0.11017489433288574 s -INFO 06-24 20:09:21 [manager.py:68] detokenization recv req id 8 cost time 0.1122288703918457 s -DEBUG 06-24 20:09:21 [manager.py:391] Prefill Batch: batch_id=17749891700881748912510547420416998015, time:1750766961.9704053s req_ids:[8] -DEBUG 06-24 20:09:21 [manager.py:391] -ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:21 lightllm_req_id:8 first_token_cost:216.73274040222168ms total_cost_time:216.79139137268066ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:6929 prompt_cache_len:5151 prompt_cache_ratio:0.743397315629961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 -DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.11032485961914062 s -INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.1122133731842041 s -DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=96170561581559741611959441122627898410, time:1750766962.186606s req_ids:[8] -DEBUG 06-24 20:09:22 [manager.py:391] -ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:209.76710319519043ms total_cost_time:209.8236083984375ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:6930 prompt_cache_len:5151 prompt_cache_ratio:0.7432900432900433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 -DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.10929703712463379 s -INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.11138916015625 s -DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=292199263672672958146266758669996439660, time:1750766962.4028592s req_ids:[8] -DEBUG 06-24 20:09:22 [manager.py:391] -ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:209.5053195953369ms total_cost_time:209.55920219421387ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:6931 prompt_cache_len:5151 prompt_cache_ratio:0.743182801904487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 -DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.10923552513122559 s -INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.11129999160766602 s -DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=286061526647172331166079189505142377498, time:1750766962.6165218s req_ids:[8] -DEBUG 06-24 20:09:22 [manager.py:391] -ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:207.411527633667ms total_cost_time:207.47089385986328ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:6932 prompt_cache_len:5151 prompt_cache_ratio:0.7430755914598961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 -DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:22 [manager.py:224] router recive req id 8 cost time 0.11026859283447266 s -INFO 06-24 20:09:22 [manager.py:68] detokenization recv req id 8 cost time 0.1121664047241211 s -DEBUG 06-24 20:09:22 [manager.py:391] Prefill Batch: batch_id=192833902552775011150571354853801803225, time:1750766962.8279874s req_ids:[8] -DEBUG 06-24 20:09:22 [manager.py:391] -ERROR 06-24 20:09:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:204.43248748779297ms total_cost_time:204.49161529541016ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:6933 prompt_cache_len:5151 prompt_cache_ratio:0.7429684119428819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 -DEBUG 06-24 20:09:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.11007881164550781 s -INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.11205935478210449 s -DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=271720725053971868565770338556972290025, time:1750766963.0397692s req_ids:[8] -DEBUG 06-24 20:09:23 [manager.py:391] -ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:22 lightllm_req_id:8 first_token_cost:209.5029354095459ms total_cost_time:209.5623016357422ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:6934 prompt_cache_len:5151 prompt_cache_ratio:0.7428612633400634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 -DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:23 [batch.py:51] router release req id 8 -INFO 06-24 20:09:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:09:23 [statics_utils.py:24] mean first cost: 232.68165588378906 ms -INFO 06-24 20:09:23 [statics_utils.py:24] mean per token cost: 0.10072723610154566 ms -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.10782480239868164 s -INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.11026597023010254 s -DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=195648951257120370286849110014651569748, time:1750766963.2543972s req_ids:[8] -DEBUG 06-24 20:09:23 [manager.py:391] -DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:377.0885467529297ms total_cost_time:377.1328926086426ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6935 prompt_cache_len:5151 prompt_cache_ratio:0.7427541456380677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 -DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.1070699691772461 s -INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.10921168327331543 s -DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=335158384099918207981021655922702029707, time:1750766963.6349595s req_ids:[8] -DEBUG 06-24 20:09:23 [manager.py:391] -ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:202.63290405273438ms total_cost_time:202.68726348876953ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:6936 prompt_cache_len:5151 prompt_cache_ratio:0.7426470588235294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 -DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:23 [manager.py:224] router recive req id 8 cost time 0.10824394226074219 s -INFO 06-24 20:09:23 [manager.py:68] detokenization recv req id 8 cost time 0.11061930656433105 s -DEBUG 06-24 20:09:23 [manager.py:391] Prefill Batch: batch_id=294327538711928867078826592577168536712, time:1750766963.8495407s req_ids:[8] -DEBUG 06-24 20:09:23 [manager.py:391] -ERROR 06-24 20:09:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:170.34530639648438ms total_cost_time:170.38655281066895ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:6937 prompt_cache_len:5151 prompt_cache_ratio:0.7425400028830906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 -DEBUG 06-24 20:09:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.10738801956176758 s -INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.10925555229187012 s -DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=233634497428596586444807743092893542204, time:1750766964.0211234s req_ids:[8] -DEBUG 06-24 20:09:24 [manager.py:391] -ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:23 lightllm_req_id:8 first_token_cost:200.27852058410645ms total_cost_time:200.32191276550293ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6938 prompt_cache_len:5151 prompt_cache_ratio:0.7424329778034016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 -DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.10926032066345215 s -INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11113333702087402 s -DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=274999008431250849009176309288609288878, time:1750766964.230775s req_ids:[8] -DEBUG 06-24 20:09:24 [manager.py:391] -ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:207.3037624359131ms total_cost_time:207.35812187194824ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:6939 prompt_cache_len:5151 prompt_cache_ratio:0.7423259835711198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 -DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.11025500297546387 s -INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11198592185974121 s -DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=70549287475986639571941520420270440627, time:1750766964.453925s req_ids:[8] -DEBUG 06-24 20:09:24 [manager.py:391] -ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:219.37227249145508ms total_cost_time:219.42949295043945ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:6940 prompt_cache_len:5151 prompt_cache_ratio:0.7422190201729106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 -DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.11045241355895996 s -INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11247825622558594 s -DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=221345667760810605225683432600496978715, time:1750766964.66762s req_ids:[8] -DEBUG 06-24 20:09:24 [manager.py:391] -ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:205.7323455810547ms total_cost_time:205.79123497009277ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:6941 prompt_cache_len:5151 prompt_cache_ratio:0.7421120875954473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 -DEBUG 06-24 20:09:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:24 [manager.py:224] router recive req id 8 cost time 0.11027932167053223 s -INFO 06-24 20:09:24 [manager.py:68] detokenization recv req id 8 cost time 0.11237263679504395 s -DEBUG 06-24 20:09:24 [manager.py:391] Prefill Batch: batch_id=110506748024976645147247124505962285937, time:1750766964.8813798s req_ids:[8] -DEBUG 06-24 20:09:24 [manager.py:391] -ERROR 06-24 20:09:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:209.05017852783203ms total_cost_time:209.10906791687012ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:6942 prompt_cache_len:5151 prompt_cache_ratio:0.7420051858254105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 -DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10861992835998535 s -INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.11039590835571289 s -DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=166740636188599530597183013295445101056, time:1750766965.0980039s req_ids:[8] -DEBUG 06-24 20:09:25 [manager.py:391] -ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:24 lightllm_req_id:8 first_token_cost:164.45207595825195ms total_cost_time:164.49308395385742ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6943 prompt_cache_len:5151 prompt_cache_ratio:0.7418983148494886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 -DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.11044907569885254 s -INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.1124732494354248 s -DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=153134180068656956769970409914680309911, time:1750766965.2644672s req_ids:[8] -DEBUG 06-24 20:09:25 [manager.py:391] -ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:202.70729064941406ms total_cost_time:202.7297019958496ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:6944 prompt_cache_len:5151 prompt_cache_ratio:0.7417914746543779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 -DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10792016983032227 s -INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.10978984832763672 s -DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=323837272937981191487168154030710788429, time:1750766965.4868433s req_ids:[8] -DEBUG 06-24 20:09:25 [manager.py:391] -ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:220.73698043823242ms total_cost_time:220.78180313110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6945 prompt_cache_len:5151 prompt_cache_ratio:0.7416846652267819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 -DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10756206512451172 s -INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.1094970703125 s -DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=222476395595585701786148228484142619695, time:1750766965.7046075s req_ids:[8] -DEBUG 06-24 20:09:25 [manager.py:391] -ERROR 06-24 20:09:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:208.1773281097412ms total_cost_time:208.21881294250488ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:6946 prompt_cache_len:5151 prompt_cache_ratio:0.7415778865534121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 -DEBUG 06-24 20:09:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:25 [manager.py:224] router recive req id 8 cost time 0.10687923431396484 s -INFO 06-24 20:09:25 [manager.py:68] detokenization recv req id 8 cost time 0.10938239097595215 s -DEBUG 06-24 20:09:25 [manager.py:391] Prefill Batch: batch_id=109069371626195611967487238876797404441, time:1750766965.9163272s req_ids:[8] -DEBUG 06-24 20:09:25 [manager.py:391] -ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:25 lightllm_req_id:8 first_token_cost:206.72059059143066ms total_cost_time:206.76374435424805ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6947 prompt_cache_len:5151 prompt_cache_ratio:0.7414711386209875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 -DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.3108196258544922 s -INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.3127915859222412 s -DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=147953532134185098139541498918780709382, time:1750766966.3391767s req_ids:[8] -DEBUG 06-24 20:09:26 [manager.py:391] -ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:418.80011558532715ms total_cost_time:418.84350776672363ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6948 prompt_cache_len:5151 prompt_cache_ratio:0.7413644214162349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 -DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.10737156867980957 s -INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.10929346084594727 s -DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=72148535345002952708956666187890394553, time:1750766966.5562384s req_ids:[8] -DEBUG 06-24 20:09:26 [manager.py:391] -ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:09:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 31081.671 tokens/s -DEBUG 06-24 20:09:26 [stats.py:37] Avg prompt tokens throughput: 31072.700 tokens/s -DEBUG 06-24 20:09:26 [stats.py:37] Avg generate tokens throughput: 8.971 tokens/s -INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:208.6639404296875ms total_cost_time:208.723783493042ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:6949 prompt_cache_len:5151 prompt_cache_ratio:0.7412577349258886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 -DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.10642313957214355 s -INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.1087346076965332 s -DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=254639638272510080486951607467912890903, time:1750766966.7685235s req_ids:[8] -DEBUG 06-24 20:09:26 [manager.py:391] -ERROR 06-24 20:09:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:198.2276439666748ms total_cost_time:198.2710361480713ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6950 prompt_cache_len:5151 prompt_cache_ratio:0.7411510791366906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 -DEBUG 06-24 20:09:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:26 [manager.py:224] router recive req id 8 cost time 0.10822582244873047 s -INFO 06-24 20:09:26 [manager.py:68] detokenization recv req id 8 cost time 0.11042547225952148 s -DEBUG 06-24 20:09:26 [manager.py:391] Prefill Batch: batch_id=179204686512772318719832661932022784952, time:1750766966.9787612s req_ids:[8] -DEBUG 06-24 20:09:26 [manager.py:391] -ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:26 lightllm_req_id:8 first_token_cost:228.39069366455078ms total_cost_time:228.45101356506348ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:6951 prompt_cache_len:5151 prompt_cache_ratio:0.7410444540353905 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 -DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.1079556941986084 s -INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.1097869873046875 s -DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=140654937359732499131976907812900226960, time:1750766967.213281s req_ids:[8] -DEBUG 06-24 20:09:27 [manager.py:391] -ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:214.77055549621582ms total_cost_time:214.85400199890137ms,out_token_counter:1 mean_per_token_cost_time: 0.08344650268554688ms prompt_token_num:6952 prompt_cache_len:5151 prompt_cache_ratio:0.7409378596087457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 -DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.10795712471008301 s -INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.11027359962463379 s -DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=250633257242078789352267710236054104706, time:1750766967.4331238s req_ids:[8] -DEBUG 06-24 20:09:27 [manager.py:391] -ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:174.80087280273438ms total_cost_time:174.84545707702637ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:6953 prompt_cache_len:5151 prompt_cache_ratio:0.7408312958435208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 -DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.10847735404968262 s -INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.11091375350952148 s -DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=199177758026373373726423367977729069901, time:1750766967.614291s req_ids:[8] -DEBUG 06-24 20:09:27 [manager.py:391] -ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:210.71982383728027ms total_cost_time:210.76536178588867ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:6954 prompt_cache_len:5151 prompt_cache_ratio:0.7407247627264884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 -DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:27 [manager.py:224] router recive req id 8 cost time 0.10720205307006836 s -INFO 06-24 20:09:27 [manager.py:68] detokenization recv req id 8 cost time 0.1088874340057373 s -DEBUG 06-24 20:09:27 [manager.py:391] Prefill Batch: batch_id=8676309028398779381981767618697758672, time:1750766967.8321767s req_ids:[8] -DEBUG 06-24 20:09:27 [manager.py:391] -ERROR 06-24 20:09:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:214.22624588012695ms total_cost_time:214.27273750305176ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:6955 prompt_cache_len:5151 prompt_cache_ratio:0.7406182602444285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 -DEBUG 06-24 20:09:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.1082761287689209 s -INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.11023426055908203 s -DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=111332636945963924268903145981125856586, time:1750766968.0520105s req_ids:[8] -DEBUG 06-24 20:09:28 [manager.py:391] -ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:27 lightllm_req_id:8 first_token_cost:211.38596534729004ms total_cost_time:211.44676208496094ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:6956 prompt_cache_len:5151 prompt_cache_ratio:0.7405117883841288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 -DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.10715413093566895 s -INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.1090705394744873 s -DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=216271417722196592755908961979558241576, time:1750766968.2621663s req_ids:[8] -DEBUG 06-24 20:09:28 [manager.py:391] -ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:368.70741844177246ms total_cost_time:368.75247955322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6957 prompt_cache_len:5151 prompt_cache_ratio:0.7404053471323846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 -DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.10851812362670898 s -INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.11017322540283203 s -DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=337211624246682531128525082782226120046, time:1750766968.6433308s req_ids:[8] -DEBUG 06-24 20:09:28 [manager.py:391] -ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:170.34554481506348ms total_cost_time:170.38655281066895ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:6958 prompt_cache_len:5151 prompt_cache_ratio:0.7402989364759989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 -DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:28 [manager.py:224] router recive req id 8 cost time 0.10801959037780762 s -INFO 06-24 20:09:28 [manager.py:68] detokenization recv req id 8 cost time 0.10977411270141602 s -DEBUG 06-24 20:09:28 [manager.py:391] Prefill Batch: batch_id=38476791923426436566418821364010559105, time:1750766968.8154457s req_ids:[8] -DEBUG 06-24 20:09:28 [manager.py:391] -ERROR 06-24 20:09:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:211.60292625427246ms total_cost_time:211.64870262145996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:6959 prompt_cache_len:5151 prompt_cache_ratio:0.7401925564017818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 -DEBUG 06-24 20:09:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10740494728088379 s -INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.10944366455078125 s -DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=198360749475603404131342523960186004621, time:1750766969.0389304s req_ids:[8] -DEBUG 06-24 20:09:29 [manager.py:391] -ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:28 lightllm_req_id:8 first_token_cost:210.36219596862793ms total_cost_time:210.41417121887207ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:6960 prompt_cache_len:5151 prompt_cache_ratio:0.7400862068965517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 -DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10890364646911621 s -INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.11096334457397461 s -DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=39025271007477647371353754296646693976, time:1750766969.250993s req_ids:[8] -DEBUG 06-24 20:09:29 [manager.py:391] -ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:205.16109466552734ms total_cost_time:205.20615577697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6961 prompt_cache_len:5151 prompt_cache_ratio:0.739979887947134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 -DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10674333572387695 s -INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.10856485366821289 s -DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=95735099957216022983105811825329965616, time:1750766969.4630466s req_ids:[8] -DEBUG 06-24 20:09:29 [manager.py:391] -ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:206.06732368469238ms total_cost_time:206.11119270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6962 prompt_cache_len:5151 prompt_cache_ratio:0.7398735995403619 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 -DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10825181007385254 s -INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.11058330535888672 s -DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=211863718066400655253570485250189148065, time:1750766969.6753507s req_ids:[8] -DEBUG 06-24 20:09:29 [manager.py:391] -ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:206.60948753356934ms total_cost_time:206.65383338928223ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6963 prompt_cache_len:5151 prompt_cache_ratio:0.7397673416630762 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 -DEBUG 06-24 20:09:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:29 [manager.py:224] router recive req id 8 cost time 0.10793614387512207 s -INFO 06-24 20:09:29 [manager.py:68] detokenization recv req id 8 cost time 0.11043882369995117 s -DEBUG 06-24 20:09:29 [manager.py:391] Prefill Batch: batch_id=12422594749623131341584589390751468642, time:1750766969.8867943s req_ids:[8] -DEBUG 06-24 20:09:29 [manager.py:391] -ERROR 06-24 20:09:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:206.15077018737793ms total_cost_time:206.193208694458ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6964 prompt_cache_len:5151 prompt_cache_ratio:0.7396611143021252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 -DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10778665542602539 s -INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.11002302169799805 s -DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=145069796834992381023761704493784268065, time:1750766970.0988657s req_ids:[8] -DEBUG 06-24 20:09:30 [manager.py:391] -ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:29 lightllm_req_id:8 first_token_cost:207.14783668518066ms total_cost_time:207.17597007751465ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:6965 prompt_cache_len:5151 prompt_cache_ratio:0.7395549174443646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 -DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10721230506896973 s -INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.1096944808959961 s -DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=103159980895018830692316190209700517025, time:1750766970.3109448s req_ids:[8] -DEBUG 06-24 20:09:30 [manager.py:391] -DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:207.4904441833496ms total_cost_time:207.5343132019043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:6966 prompt_cache_len:5151 prompt_cache_ratio:0.7394487510766581 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 -DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10827398300170898 s -INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.11072301864624023 s -DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=175392749261144683863549251990985085587, time:1750766970.5217838s req_ids:[8] -DEBUG 06-24 20:09:30 [manager.py:391] -ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:205.74474334716797ms total_cost_time:205.78885078430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:6967 prompt_cache_len:5151 prompt_cache_ratio:0.7393426151858763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 -DEBUG 06-24 20:09:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:30 [manager.py:224] router recive req id 8 cost time 0.10711097717285156 s -INFO 06-24 20:09:30 [manager.py:68] detokenization recv req id 8 cost time 0.10912895202636719 s -DEBUG 06-24 20:09:30 [manager.py:391] Prefill Batch: batch_id=119111949642591540425370972446168910753, time:1750766970.7333875s req_ids:[8] -DEBUG 06-24 20:09:30 [manager.py:391] -ERROR 06-24 20:09:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:375.12660026550293ms total_cost_time:375.1804828643799ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:6968 prompt_cache_len:5151 prompt_cache_ratio:0.7392365097588978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 -DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10806131362915039 s -INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.11010003089904785 s -DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=123228940517763598775949017972227999338, time:1750766971.1088696s req_ids:[8] -DEBUG 06-24 20:09:31 [manager.py:391] -ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:30 lightllm_req_id:8 first_token_cost:198.81916046142578ms total_cost_time:198.87924194335938ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6969 prompt_cache_len:5151 prompt_cache_ratio:0.7391304347826086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 -DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10688018798828125 s -INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.10885357856750488 s -DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=178153890777087341587262821169997035394, time:1750766971.3192205s req_ids:[8] -DEBUG 06-24 20:09:31 [manager.py:391] -ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:204.13875579833984ms total_cost_time:204.19931411743164ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:6970 prompt_cache_len:5151 prompt_cache_ratio:0.7390243902439024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 -DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10819411277770996 s -INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.11038827896118164 s -DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=259928739140403687249978736846855060101, time:1750766971.5372643s req_ids:[8] -DEBUG 06-24 20:09:31 [manager.py:391] -ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:213.39678764343262ms total_cost_time:213.43994140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:6971 prompt_cache_len:5151 prompt_cache_ratio:0.7389183761296801 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 -DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10725569725036621 s -INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.1096963882446289 s -DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=10565937004940407226703449215169879609, time:1750766971.7503073s req_ids:[8] -DEBUG 06-24 20:09:31 [manager.py:391] -ERROR 06-24 20:09:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:205.56044578552246ms total_cost_time:205.60312271118164ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:6972 prompt_cache_len:5151 prompt_cache_ratio:0.7388123924268503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 -DEBUG 06-24 20:09:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:31 [manager.py:224] router recive req id 8 cost time 0.10792183876037598 s -INFO 06-24 20:09:31 [manager.py:68] detokenization recv req id 8 cost time 0.11002397537231445 s -DEBUG 06-24 20:09:31 [manager.py:391] Prefill Batch: batch_id=168194516618368278289754010730757320479, time:1750766971.9616761s req_ids:[8] -DEBUG 06-24 20:09:31 [manager.py:391] -ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:31 lightllm_req_id:8 first_token_cost:201.60889625549316ms total_cost_time:201.65181159973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6973 prompt_cache_len:5151 prompt_cache_ratio:0.738706439122329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 -DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.10809206962585449 s -INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11025071144104004 s -DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=157016632265416001099357344839958277751, time:1750766972.1703467s req_ids:[8] -DEBUG 06-24 20:09:32 [manager.py:391] -ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:205.24859428405762ms total_cost_time:205.3084373474121ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:6974 prompt_cache_len:5151 prompt_cache_ratio:0.7386005162030399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 -DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.11090588569641113 s -INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11348652839660645 s -DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=258701100296728703800063755114376830065, time:1750766972.3801236s req_ids:[8] -DEBUG 06-24 20:09:32 [manager.py:391] -ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:213.46569061279297ms total_cost_time:213.52744102478027ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:6975 prompt_cache_len:5151 prompt_cache_ratio:0.738494623655914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 -DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.10808801651000977 s -INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s -DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=304169721027801386697519607523599451519, time:1750766972.6044662s req_ids:[8] -DEBUG 06-24 20:09:32 [manager.py:391] -ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:208.53328704833984ms total_cost_time:208.57858657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6976 prompt_cache_len:5151 prompt_cache_ratio:0.7383887614678899 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 -DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:32 [manager.py:224] router recive req id 8 cost time 0.10920381546020508 s -INFO 06-24 20:09:32 [manager.py:68] detokenization recv req id 8 cost time 0.11121153831481934 s -DEBUG 06-24 20:09:32 [manager.py:391] Prefill Batch: batch_id=286196213032217293205577654344663361900, time:1750766972.817637s req_ids:[8] -DEBUG 06-24 20:09:32 [manager.py:391] -ERROR 06-24 20:09:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:203.1397819519043ms total_cost_time:203.1846046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6977 prompt_cache_len:5151 prompt_cache_ratio:0.7382829296259137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 -DEBUG 06-24 20:09:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10750555992126465 s -INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.10956645011901855 s -DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=261152835254390078385340898973950328560, time:1750766973.0260477s req_ids:[8] -DEBUG 06-24 20:09:33 [manager.py:391] -ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:32 lightllm_req_id:8 first_token_cost:205.7485580444336ms total_cost_time:205.79195022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6978 prompt_cache_len:5151 prompt_cache_ratio:0.7381771281169389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 -DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10771703720092773 s -INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.10969138145446777 s -DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=250056607442531606481754718267049377893, time:1750766973.236964s req_ids:[8] -DEBUG 06-24 20:09:33 [manager.py:391] -ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:409.4517230987549ms total_cost_time:409.4970226287842ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6979 prompt_cache_len:5151 prompt_cache_ratio:0.7380713569279267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 -DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10772275924682617 s -INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.11020708084106445 s -DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=177041933191104831327555416569420636289, time:1750766973.6506557s req_ids:[8] -DEBUG 06-24 20:09:33 [manager.py:391] -ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:196.88010215759277ms total_cost_time:196.92039489746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:6980 prompt_cache_len:5151 prompt_cache_ratio:0.7379656160458453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 -DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:33 [manager.py:224] router recive req id 8 cost time 0.10817742347717285 s -INFO 06-24 20:09:33 [manager.py:68] detokenization recv req id 8 cost time 0.11012411117553711 s -DEBUG 06-24 20:09:33 [manager.py:391] Prefill Batch: batch_id=218745766024046803353739353880985413365, time:1750766973.8579645s req_ids:[8] -DEBUG 06-24 20:09:33 [manager.py:391] -ERROR 06-24 20:09:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:205.5497169494629ms total_cost_time:205.59310913085938ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:6981 prompt_cache_len:5151 prompt_cache_ratio:0.7378599054576708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 -DEBUG 06-24 20:09:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.1093759536743164 s -INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11125946044921875 s -DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=599292272217602083766337463703539381, time:1750766974.0714693s req_ids:[8] -DEBUG 06-24 20:09:34 [manager.py:391] -ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:33 lightllm_req_id:8 first_token_cost:208.65797996520996ms total_cost_time:208.71806144714355ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:6982 prompt_cache_len:5151 prompt_cache_ratio:0.7377542251503867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 -DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.10860562324523926 s -INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11049032211303711 s -DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=203636070319301594395778703950407125825, time:1750766974.2837803s req_ids:[8] -DEBUG 06-24 20:09:34 [manager.py:391] -ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:202.4235725402832ms total_cost_time:202.4819850921631ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6983 prompt_cache_len:5151 prompt_cache_ratio:0.7376485751109838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 -DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.1108095645904541 s -INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11267828941345215 s -DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=192737950361043851321723575816291530039, time:1750766974.4973862s req_ids:[8] -DEBUG 06-24 20:09:34 [manager.py:391] -ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:210.54363250732422ms total_cost_time:210.6020450592041ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:6984 prompt_cache_len:5151 prompt_cache_ratio:0.7375429553264605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 -DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.1110999584197998 s -INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11306285858154297 s -DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=201176628500201158513346679242497888119, time:1750766974.7085865s req_ids:[8] -DEBUG 06-24 20:09:34 [manager.py:391] -ERROR 06-24 20:09:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:206.49313926696777ms total_cost_time:206.55465126037598ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:6985 prompt_cache_len:5151 prompt_cache_ratio:0.7374373657838225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 -DEBUG 06-24 20:09:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:34 [manager.py:224] router recive req id 8 cost time 0.10787487030029297 s -INFO 06-24 20:09:34 [manager.py:68] detokenization recv req id 8 cost time 0.11038708686828613 s -DEBUG 06-24 20:09:34 [manager.py:391] Prefill Batch: batch_id=240948202692280021182180199113987502852, time:1750766974.924493s req_ids:[8] -DEBUG 06-24 20:09:34 [manager.py:391] -ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:34 lightllm_req_id:8 first_token_cost:202.4669647216797ms total_cost_time:202.5282382965088ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:6986 prompt_cache_len:5151 prompt_cache_ratio:0.737331806470083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 -DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10996460914611816 s -INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.11197376251220703 s -DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=198135432441563189570564884917085807517, time:1750766975.1314347s req_ids:[8] -DEBUG 06-24 20:09:35 [manager.py:391] -ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:199.89752769470215ms total_cost_time:199.95379447937012ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:6987 prompt_cache_len:5151 prompt_cache_ratio:0.7372262773722628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 -DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.11095595359802246 s -INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.11195731163024902 s -DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=102432406846420757366985783377264954113, time:1750766975.3409455s req_ids:[8] -DEBUG 06-24 20:09:35 [manager.py:391] -ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:195.44053077697754ms total_cost_time:195.51897048950195ms,out_token_counter:1 mean_per_token_cost_time: 0.07843971252441406ms prompt_token_num:6988 prompt_cache_len:5151 prompt_cache_ratio:0.7371207784773898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 -DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10727858543395996 s -INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.10924100875854492 s -DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=54035056628067357207579998115660089949, time:1750766975.5463512s req_ids:[8] -DEBUG 06-24 20:09:35 [manager.py:391] -ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:205.84988594055176ms total_cost_time:205.89208602905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:6989 prompt_cache_len:5151 prompt_cache_ratio:0.7370153097724996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 -DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10758256912231445 s -INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.10947656631469727 s -DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=288648455980673662204845089783337947518, time:1750766975.7587504s req_ids:[8] -DEBUG 06-24 20:09:35 [manager.py:391] -ERROR 06-24 20:09:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:206.2537670135498ms total_cost_time:206.298828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:6990 prompt_cache_len:5151 prompt_cache_ratio:0.7369098712446351 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 -DEBUG 06-24 20:09:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:35 [manager.py:224] router recive req id 8 cost time 0.10798335075378418 s -INFO 06-24 20:09:35 [manager.py:68] detokenization recv req id 8 cost time 0.11006593704223633 s -DEBUG 06-24 20:09:35 [manager.py:391] Prefill Batch: batch_id=248423882294222575476569912793031191179, time:1750766975.9713943s req_ids:[8] -DEBUG 06-24 20:09:35 [manager.py:391] -DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:35 lightllm_req_id:8 first_token_cost:206.98213577270508ms total_cost_time:207.02505111694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6991 prompt_cache_len:5151 prompt_cache_ratio:0.7368044628808468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 -DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.3092825412750244 s -INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.3112952709197998 s -DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=203956867761942255643498051693877618256, time:1750766976.402257s req_ids:[8] -DEBUG 06-24 20:09:36 [manager.py:391] -ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:431.8583011627197ms total_cost_time:431.903600692749ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:6992 prompt_cache_len:5151 prompt_cache_ratio:0.7366990846681922 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 -DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.10582518577575684 s -INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.10760116577148438 s -DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=212014047260529389858333058757487263518, time:1750766976.6253958s req_ids:[8] -DEBUG 06-24 20:09:36 [manager.py:391] -ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:09:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 30592.531 tokens/s -DEBUG 06-24 20:09:36 [stats.py:37] Avg prompt tokens throughput: 30583.757 tokens/s -DEBUG 06-24 20:09:36 [stats.py:37] Avg generate tokens throughput: 8.774 tokens/s -INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:174.41368103027344ms total_cost_time:174.43513870239258ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:6993 prompt_cache_len:5151 prompt_cache_ratio:0.7365937365937366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 -DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.10584735870361328 s -INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.10781383514404297 s -DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=317990431194270743129708758431052549586, time:1750766976.7967572s req_ids:[8] -DEBUG 06-24 20:09:36 [manager.py:391] -ERROR 06-24 20:09:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:195.44291496276855ms total_cost_time:195.48535346984863ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:6994 prompt_cache_len:5151 prompt_cache_ratio:0.7364884186445525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 -DEBUG 06-24 20:09:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:36 [manager.py:224] router recive req id 8 cost time 0.10747766494750977 s -INFO 06-24 20:09:36 [manager.py:68] detokenization recv req id 8 cost time 0.10955286026000977 s -DEBUG 06-24 20:09:36 [manager.py:391] Prefill Batch: batch_id=85245416132971713248762513453855472676, time:1750766976.9970174s req_ids:[8] -DEBUG 06-24 20:09:36 [manager.py:391] -ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:36 lightllm_req_id:8 first_token_cost:198.5175609588623ms total_cost_time:198.5604763031006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6995 prompt_cache_len:5151 prompt_cache_ratio:0.7363831308077198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 -DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10686349868774414 s -INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.10886216163635254 s -DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=310669307802095323216903216716115554466, time:1750766977.205106s req_ids:[8] -DEBUG 06-24 20:09:37 [manager.py:391] -ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:203.68123054504395ms total_cost_time:203.72414588928223ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:6996 prompt_cache_len:5151 prompt_cache_ratio:0.7362778730703259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 -DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10695457458496094 s -INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.10897111892700195 s -DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=76207439430784735306067978913635058296, time:1750766977.416222s req_ids:[8] -DEBUG 06-24 20:09:37 [manager.py:391] -DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:205.85227012634277ms total_cost_time:205.89709281921387ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6997 prompt_cache_len:5151 prompt_cache_ratio:0.7361726454194655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 -DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10820794105529785 s -INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102745532989502 s -DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=170546230080354542849015320167025605022, time:1750766977.630516s req_ids:[8] -DEBUG 06-24 20:09:37 [manager.py:391] -ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:209.5344066619873ms total_cost_time:209.5792293548584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:6998 prompt_cache_len:5151 prompt_cache_ratio:0.7360674478422407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 -DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:37 [manager.py:224] router recive req id 8 cost time 0.10785794258117676 s -INFO 06-24 20:09:37 [manager.py:68] detokenization recv req id 8 cost time 0.10991692543029785 s -DEBUG 06-24 20:09:37 [manager.py:391] Prefill Batch: batch_id=275940969347150611953668944432863123474, time:1750766977.8439746s req_ids:[8] -DEBUG 06-24 20:09:37 [manager.py:391] -ERROR 06-24 20:09:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:209.5324993133545ms total_cost_time:209.57684516906738ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:6999 prompt_cache_len:5151 prompt_cache_ratio:0.7359622803257608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 -DEBUG 06-24 20:09:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.10918760299682617 s -INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.11139082908630371 s -DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=109969116172603545355387431760986067321, time:1750766978.069327s req_ids:[8] -DEBUG 06-24 20:09:38 [manager.py:391] -ERROR 06-24 20:09:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:37 lightllm_req_id:8 first_token_cost:233.43849182128906ms total_cost_time:233.48236083984375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7000 prompt_cache_len:5151 prompt_cache_ratio:0.7358571428571429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 -DEBUG 06-24 20:09:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s -INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.11079955101013184 s -DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=233533177281800167923189218572262983328, time:1750766978.3195708s req_ids:[8] -DEBUG 06-24 20:09:38 [manager.py:391] -ERROR 06-24 20:09:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 first_token_cost:445.5878734588623ms total_cost_time:445.6343650817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7001 prompt_cache_len:5151 prompt_cache_ratio:0.735752035423511 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 -DEBUG 06-24 20:09:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.1077260971069336 s -INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.1103205680847168 s -DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=29852215741968064702695415886492974499, time:1750766978.7431128s req_ids:[8] -DEBUG 06-24 20:09:38 [manager.py:391] -ERROR 06-24 20:09:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 first_token_cost:198.1651782989502ms total_cost_time:198.2095241546631ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7002 prompt_cache_len:5151 prompt_cache_ratio:0.7356469580119965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 -DEBUG 06-24 20:09:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:38 [manager.py:224] router recive req id 8 cost time 0.1074211597442627 s -INFO 06-24 20:09:38 [manager.py:68] detokenization recv req id 8 cost time 0.1096947193145752 s -DEBUG 06-24 20:09:38 [manager.py:391] Prefill Batch: batch_id=62184181445982319536237629206583589940, time:1750766978.9545083s req_ids:[8] -DEBUG 06-24 20:09:38 [manager.py:391] -ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:38 lightllm_req_id:8 first_token_cost:206.89630508422852ms total_cost_time:206.9411277770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7003 prompt_cache_len:5151 prompt_cache_ratio:0.7355419106097387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 -DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s -INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.11056256294250488 s -DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=221300506093925473206125020576025282748, time:1750766979.17306s req_ids:[8] -DEBUG 06-24 20:09:39 [manager.py:391] -ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:216.92585945129395ms total_cost_time:216.96853637695312ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7004 prompt_cache_len:5151 prompt_cache_ratio:0.7354368932038835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 -DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10778546333312988 s -INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.10986018180847168 s -DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=9565274677745250305264589072776311165, time:1750766979.3985596s req_ids:[8] -DEBUG 06-24 20:09:39 [manager.py:391] -ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:220.4720973968506ms total_cost_time:220.52836418151855ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7005 prompt_cache_len:5151 prompt_cache_ratio:0.7353319057815846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 -DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:39 [batch.py:51] router release req id 8 -INFO 06-24 20:09:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s -INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.10980582237243652 s -DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=145742353410665672479627251815149880301, time:1750766979.615521s req_ids:[8] -DEBUG 06-24 20:09:39 [manager.py:391] -ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:210.34812927246094ms total_cost_time:210.39199829101562ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7006 prompt_cache_len:5151 prompt_cache_ratio:0.7352269483300029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 -DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:39 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s -INFO 06-24 20:09:39 [manager.py:68] detokenization recv req id 8 cost time 0.10942506790161133 s -DEBUG 06-24 20:09:39 [manager.py:391] Prefill Batch: batch_id=295453608528418028322228214570836746228, time:1750766979.8308558s req_ids:[8] -DEBUG 06-24 20:09:39 [manager.py:391] -ERROR 06-24 20:09:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:203.84669303894043ms total_cost_time:203.89175415039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7007 prompt_cache_len:5151 prompt_cache_ratio:0.7351220208363065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 -DEBUG 06-24 20:09:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10746884346008301 s -INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.10948443412780762 s -DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=169824647646680485101925820862177075342, time:1750766980.041262s req_ids:[8] -DEBUG 06-24 20:09:40 [manager.py:391] -ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:39 lightllm_req_id:8 first_token_cost:203.04083824157715ms total_cost_time:203.08470726013184ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7008 prompt_cache_len:5151 prompt_cache_ratio:0.7350171232876712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 -DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10655736923217773 s -INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.1083834171295166 s -DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=273997678334228137087699773507148443513, time:1750766980.2555933s req_ids:[8] -DEBUG 06-24 20:09:40 [manager.py:391] -ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:210.90102195739746ms total_cost_time:210.94369888305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7009 prompt_cache_len:5151 prompt_cache_ratio:0.7349122556712798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 -DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10744285583496094 s -INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.1092984676361084 s -DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=10498821165853357417995097129855956008, time:1750766980.467268s req_ids:[8] -DEBUG 06-24 20:09:40 [manager.py:391] -ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:204.82563972473145ms total_cost_time:204.86831665039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7010 prompt_cache_len:5151 prompt_cache_ratio:0.7348074179743224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 -DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10811519622802734 s -INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.11019611358642578 s -DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=2875633161752592044663104080438821551, time:1750766980.6768613s req_ids:[8] -DEBUG 06-24 20:09:40 [manager.py:391] -ERROR 06-24 20:09:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:203.0632495880127ms total_cost_time:203.10688018798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7011 prompt_cache_len:5151 prompt_cache_ratio:0.7347026101839966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 -DEBUG 06-24 20:09:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:40 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s -INFO 06-24 20:09:40 [manager.py:68] detokenization recv req id 8 cost time 0.10880517959594727 s -DEBUG 06-24 20:09:40 [manager.py:391] Prefill Batch: batch_id=227277140553399177127145677353846599201, time:1750766980.886514s req_ids:[8] -DEBUG 06-24 20:09:40 [manager.py:391] -ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:40 lightllm_req_id:8 first_token_cost:340.3136730194092ms total_cost_time:340.3587341308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7012 prompt_cache_len:5151 prompt_cache_ratio:0.7345978322875071 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 -DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10779619216918945 s -INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.10994434356689453 s -DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=20408072357736825724115787358674884017, time:1750766981.2277904s req_ids:[8] -DEBUG 06-24 20:09:41 [manager.py:391] -ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:190.0317668914795ms total_cost_time:190.08302688598633ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:7013 prompt_cache_len:5151 prompt_cache_ratio:0.7344930842720662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 -DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10681271553039551 s -INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.10930013656616211 s -DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=270841038525781732133971214150114997634, time:1750766981.4292374s req_ids:[8] -DEBUG 06-24 20:09:41 [manager.py:391] -ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:216.75634384155273ms total_cost_time:216.7990207672119ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7014 prompt_cache_len:5151 prompt_cache_ratio:0.7343883661248931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 -DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10716390609741211 s -INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.1091916561126709 s -DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=158424158616074215731579477275511466768, time:1750766981.6577358s req_ids:[8] -DEBUG 06-24 20:09:41 [manager.py:391] -ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:205.45434951782227ms total_cost_time:205.49750328063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7015 prompt_cache_len:5151 prompt_cache_ratio:0.7342836778332146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 -DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:41 [manager.py:224] router recive req id 8 cost time 0.10782527923583984 s -INFO 06-24 20:09:41 [manager.py:68] detokenization recv req id 8 cost time 0.10983753204345703 s -DEBUG 06-24 20:09:41 [manager.py:391] Prefill Batch: batch_id=247131592517961113993600040711359329630, time:1750766981.8631s req_ids:[8] -DEBUG 06-24 20:09:41 [manager.py:391] -ERROR 06-24 20:09:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:201.61080360412598ms total_cost_time:201.66683197021484ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:7016 prompt_cache_len:5151 prompt_cache_ratio:0.7341790193842646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 -DEBUG 06-24 20:09:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.11096954345703125 s -INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.1128995418548584 s -DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=216585471176658254948146543493173651465, time:1750766982.0709512s req_ids:[8] -DEBUG 06-24 20:09:42 [manager.py:391] -ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:41 lightllm_req_id:8 first_token_cost:204.76603507995605ms total_cost_time:204.82301712036133ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:7017 prompt_cache_len:5151 prompt_cache_ratio:0.7340743907652844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 -DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.10823297500610352 s -INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.11024284362792969 s -DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=44664904328901707916365973852833571992, time:1750766982.2825196s req_ids:[8] -DEBUG 06-24 20:09:42 [manager.py:391] -ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:206.12478256225586ms total_cost_time:206.18462562561035ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:7018 prompt_cache_len:5151 prompt_cache_ratio:0.7339697919635224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 -DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.10662221908569336 s -INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.10856461524963379 s -DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=86823121673107716449771199287487425682, time:1750766982.506394s req_ids:[8] -DEBUG 06-24 20:09:42 [manager.py:391] -ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:216.47000312805176ms total_cost_time:216.51101112365723ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:7019 prompt_cache_len:5151 prompt_cache_ratio:0.7338652229662345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 -DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.1081686019897461 s -INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.11025094985961914 s -DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=72938464951548285394554736563170334019, time:1750766982.715524s req_ids:[8] -DEBUG 06-24 20:09:42 [manager.py:391] -ERROR 06-24 20:09:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:202.7730941772461ms total_cost_time:202.82673835754395ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:7020 prompt_cache_len:5151 prompt_cache_ratio:0.7337606837606837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 -DEBUG 06-24 20:09:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:42 [manager.py:224] router recive req id 8 cost time 0.10881757736206055 s -INFO 06-24 20:09:42 [manager.py:68] detokenization recv req id 8 cost time 0.11084365844726562 s -DEBUG 06-24 20:09:42 [manager.py:391] Prefill Batch: batch_id=321433588193429024606349864537648851852, time:1750766982.925698s req_ids:[8] -DEBUG 06-24 20:09:42 [manager.py:391] -ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:42 lightllm_req_id:8 first_token_cost:207.11588859558105ms total_cost_time:207.16190338134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7021 prompt_cache_len:5151 prompt_cache_ratio:0.7336561743341404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 -DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:43 [batch.py:51] router release req id 8 -INFO 06-24 20:09:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.10778141021728516 s -INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100766658782959 s -DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=129285581661108605582588076291320214339, time:1750766983.1382234s req_ids:[8] -DEBUG 06-24 20:09:43 [manager.py:391] -DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:206.5272331237793ms total_cost_time:206.58588409423828ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7022 prompt_cache_len:5151 prompt_cache_ratio:0.733551694673882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 -DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.10907912254333496 s -INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.11107110977172852 s -DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=223976299216702650389736003719716837816, time:1750766983.3560214s req_ids:[8] -DEBUG 06-24 20:09:43 [manager.py:391] -ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:379.31084632873535ms total_cost_time:379.35709953308105ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7023 prompt_cache_len:5151 prompt_cache_ratio:0.7334472447671935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 -DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.11153697967529297 s -INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.11349892616271973 s -DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=212574178696194885909043746347840892454, time:1750766983.7301126s req_ids:[8] -DEBUG 06-24 20:09:43 [manager.py:391] -ERROR 06-24 20:09:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:200.22106170654297ms total_cost_time:200.27899742126465ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:7024 prompt_cache_len:5151 prompt_cache_ratio:0.7333428246013668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 -DEBUG 06-24 20:09:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:43 [manager.py:224] router recive req id 8 cost time 0.10841131210327148 s -INFO 06-24 20:09:43 [manager.py:68] detokenization recv req id 8 cost time 0.11044144630432129 s -DEBUG 06-24 20:09:43 [manager.py:391] Prefill Batch: batch_id=133893781925173173090075751705681009770, time:1750766983.9429348s req_ids:[8] -DEBUG 06-24 20:09:43 [manager.py:391] -ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:43 lightllm_req_id:8 first_token_cost:203.2606601715088ms total_cost_time:203.30548286437988ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7025 prompt_cache_len:5151 prompt_cache_ratio:0.733238434163701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 -DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10793471336364746 s -INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.10985827445983887 s -DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=297123514128696453537523559760719515628, time:1750766984.1515064s req_ids:[8] -DEBUG 06-24 20:09:44 [manager.py:391] -ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:203.7038803100586ms total_cost_time:203.74727249145508ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7026 prompt_cache_len:5151 prompt_cache_ratio:0.7331340734415029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 -DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.11017799377441406 s -INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.11210918426513672 s -DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=42637584963984286553057089134083885588, time:1750766984.3652472s req_ids:[8] -DEBUG 06-24 20:09:44 [manager.py:391] -ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:210.03389358520508ms total_cost_time:210.1118564605713ms,out_token_counter:1 mean_per_token_cost_time: 0.07796287536621094ms prompt_token_num:7027 prompt_cache_len:5151 prompt_cache_ratio:0.7330297424220862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 -DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10775613784790039 s -INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.10984587669372559 s -DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=38565836076627402788130896525905266864, time:1750766984.5799851s req_ids:[8] -DEBUG 06-24 20:09:44 [manager.py:391] -ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:206.90298080444336ms total_cost_time:206.96067810058594ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:7028 prompt_cache_len:5151 prompt_cache_ratio:0.7329254410927718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 -DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10861492156982422 s -INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.11050271987915039 s -DEBUG 06-24 20:09:44 [manager.py:391] Prefill Batch: batch_id=63290235803964911040438474994995393305, time:1750766984.7926052s req_ids:[8] -DEBUG 06-24 20:09:44 [manager.py:391] -ERROR 06-24 20:09:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:206.46405220031738ms total_cost_time:206.50768280029297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7029 prompt_cache_len:5151 prompt_cache_ratio:0.7328211694408877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 -DEBUG 06-24 20:09:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:44 [manager.py:224] router recive req id 8 cost time 0.10668683052062988 s -INFO 06-24 20:09:44 [manager.py:68] detokenization recv req id 8 cost time 0.10860586166381836 s -DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=249275468540962447202843775934637463001, time:1750766985.0068457s req_ids:[8] -DEBUG 06-24 20:09:45 [manager.py:391] -ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:44 lightllm_req_id:8 first_token_cost:208.5249423980713ms total_cost_time:208.58097076416016ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:7030 prompt_cache_len:5151 prompt_cache_ratio:0.7327169274537696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 -DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s -INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.10975384712219238 s -DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=59074255187604148962979577516464474336, time:1750766985.2205453s req_ids:[8] -DEBUG 06-24 20:09:45 [manager.py:391] -ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:205.6748867034912ms total_cost_time:205.718994140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7031 prompt_cache_len:5151 prompt_cache_ratio:0.7326127151187598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 -DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.1074984073638916 s -INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.10940432548522949 s -DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=115766482473309226226165846090986474542, time:1750766985.431283s req_ids:[8] -DEBUG 06-24 20:09:45 [manager.py:391] -ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:204.93555068969727ms total_cost_time:204.98037338256836ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7032 prompt_cache_len:5151 prompt_cache_ratio:0.7325085324232082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 -DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.10828900337219238 s -INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.11043286323547363 s -DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=251860991257689450838886816552349828798, time:1750766985.6457691s req_ids:[8] -DEBUG 06-24 20:09:45 [manager.py:391] -ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:210.0510597229004ms total_cost_time:210.10780334472656ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7033 prompt_cache_len:5151 prompt_cache_ratio:0.7324043793544718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 -DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:45 [manager.py:224] router recive req id 8 cost time 0.10745859146118164 s -INFO 06-24 20:09:45 [manager.py:68] detokenization recv req id 8 cost time 0.10940384864807129 s -DEBUG 06-24 20:09:45 [manager.py:391] Prefill Batch: batch_id=321394784721419565652845163418924300160, time:1750766985.8593209s req_ids:[8] -DEBUG 06-24 20:09:45 [manager.py:391] -ERROR 06-24 20:09:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:206.94947242736816ms total_cost_time:206.99405670166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7034 prompt_cache_len:5151 prompt_cache_ratio:0.7323002558999147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 -DEBUG 06-24 20:09:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.1090095043182373 s -INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s -DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=8019153211479629106314897447107924278, time:1750766986.0705528s req_ids:[8] -DEBUG 06-24 20:09:46 [manager.py:391] -ERROR 06-24 20:09:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:45 lightllm_req_id:8 first_token_cost:205.13606071472168ms total_cost_time:205.17992973327637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7035 prompt_cache_len:5151 prompt_cache_ratio:0.7321961620469083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 -DEBUG 06-24 20:09:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:46 [batch.py:51] router release req id 8 -INFO 06-24 20:09:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.3093726634979248 s -INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.3113830089569092 s -DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=254185250865029745042799285138379544140, time:1750766986.4877672s req_ids:[8] -DEBUG 06-24 20:09:46 [manager.py:391] -ERROR 06-24 20:09:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 first_token_cost:415.8670902252197ms total_cost_time:415.912389755249ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7036 prompt_cache_len:5151 prompt_cache_ratio:0.7320920977828311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 -DEBUG 06-24 20:09:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.10840535163879395 s -INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.11080646514892578 s -DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=224861289895817570093112946243918685720, time:1750766986.7065287s req_ids:[8] -DEBUG 06-24 20:09:46 [manager.py:391] -DEBUG 06-24 20:09:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 30770.076 tokens/s -DEBUG 06-24 20:09:46 [stats.py:37] Avg prompt tokens throughput: 30761.406 tokens/s -DEBUG 06-24 20:09:46 [stats.py:37] Avg generate tokens throughput: 8.670 tokens/s -ERROR 06-24 20:09:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 first_token_cost:208.8794708251953ms total_cost_time:208.9235782623291ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7037 prompt_cache_len:5151 prompt_cache_ratio:0.7319880630950689 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 -DEBUG 06-24 20:09:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:46 [manager.py:224] router recive req id 8 cost time 0.10781168937683105 s -INFO 06-24 20:09:46 [manager.py:68] detokenization recv req id 8 cost time 0.10968327522277832 s -DEBUG 06-24 20:09:46 [manager.py:391] Prefill Batch: batch_id=39086507492685723244897939402818620777, time:1750766986.919084s req_ids:[8] -DEBUG 06-24 20:09:46 [manager.py:391] -ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:46 lightllm_req_id:8 first_token_cost:208.25505256652832ms total_cost_time:208.298921585083ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7038 prompt_cache_len:5151 prompt_cache_ratio:0.7318840579710145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 -DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.10778284072875977 s -INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.10979104042053223 s -DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=144112067349485135343119971217517788557, time:1750766987.1344683s req_ids:[8] -DEBUG 06-24 20:09:47 [manager.py:391] -ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:213.1030559539795ms total_cost_time:213.14764022827148ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7039 prompt_cache_len:5151 prompt_cache_ratio:0.7317800823980679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 -DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.1077113151550293 s -INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.1103219985961914 s -DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=34153906626530375061910247054427422010, time:1750766987.347248s req_ids:[8] -DEBUG 06-24 20:09:47 [manager.py:391] -ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:201.32780075073242ms total_cost_time:201.3850212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7040 prompt_cache_len:5151 prompt_cache_ratio:0.7316761363636364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 -DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.10864877700805664 s -INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.11054801940917969 s -DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=193271645915833834442346805406463976612, time:1750766987.5595512s req_ids:[8] -DEBUG 06-24 20:09:47 [manager.py:391] -ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:205.78956604003906ms total_cost_time:205.83367347717285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7041 prompt_cache_len:5151 prompt_cache_ratio:0.7315722198551342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 -DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.1080617904663086 s -INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.1099705696105957 s -DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=149474528920331779775155659984858842044, time:1750766987.7699776s req_ids:[8] -DEBUG 06-24 20:09:47 [manager.py:391] -ERROR 06-24 20:09:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:206.26091957092285ms total_cost_time:206.30240440368652ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7042 prompt_cache_len:5151 prompt_cache_ratio:0.7314683328599829 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 -DEBUG 06-24 20:09:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:47 [manager.py:224] router recive req id 8 cost time 0.10842061042785645 s -INFO 06-24 20:09:47 [manager.py:68] detokenization recv req id 8 cost time 0.11040520668029785 s -DEBUG 06-24 20:09:47 [manager.py:391] Prefill Batch: batch_id=71217097253853702939773252014690635086, time:1750766987.9828205s req_ids:[8] -DEBUG 06-24 20:09:47 [manager.py:391] -ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:47 lightllm_req_id:8 first_token_cost:202.76308059692383ms total_cost_time:202.80814170837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7043 prompt_cache_len:5151 prompt_cache_ratio:0.7313644753656112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 -DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10756468772888184 s -INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.10949110984802246 s -DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=68165960913190061882280910302921811922, time:1750766988.1957924s req_ids:[8] -DEBUG 06-24 20:09:48 [manager.py:391] -ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:214.6279811859131ms total_cost_time:214.674711227417ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7044 prompt_cache_len:5151 prompt_cache_ratio:0.7312606473594548 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 -DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10690045356750488 s -INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.10863351821899414 s -DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=188178136399052333078211054209845536012, time:1750766988.4247224s req_ids:[8] -DEBUG 06-24 20:09:48 [manager.py:391] -ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:356.88304901123047ms total_cost_time:356.92739486694336ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7045 prompt_cache_len:5151 prompt_cache_ratio:0.7311568488289567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 -DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10758829116821289 s -INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.10933208465576172 s -DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=288692115567613850583141710337677405389, time:1750766988.7676241s req_ids:[8] -DEBUG 06-24 20:09:48 [manager.py:391] -ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:158.58793258666992ms total_cost_time:158.6306095123291ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7046 prompt_cache_len:5151 prompt_cache_ratio:0.7310530797615669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 -DEBUG 06-24 20:09:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:48 [manager.py:224] router recive req id 8 cost time 0.10745406150817871 s -INFO 06-24 20:09:48 [manager.py:68] detokenization recv req id 8 cost time 0.1090250015258789 s -DEBUG 06-24 20:09:48 [manager.py:391] Prefill Batch: batch_id=331250265777993948946108102614150056771, time:1750766988.9314344s req_ids:[8] -DEBUG 06-24 20:09:48 [manager.py:391] -ERROR 06-24 20:09:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:156.31985664367676ms total_cost_time:156.36277198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7047 prompt_cache_len:5151 prompt_cache_ratio:0.7309493401447424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 -DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10769033432006836 s -INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10982561111450195 s -DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=32205043930665808063507706587165837699, time:1750766989.0947518s req_ids:[8] -DEBUG 06-24 20:09:49 [manager.py:391] -ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:48 lightllm_req_id:8 first_token_cost:197.20053672790527ms total_cost_time:197.24297523498535ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7048 prompt_cache_len:5151 prompt_cache_ratio:0.7308456299659478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 -DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10632896423339844 s -INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10814166069030762 s -DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=62252368727601393800237313363757503139, time:1750766989.2996423s req_ids:[8] -DEBUG 06-24 20:09:49 [manager.py:391] -ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:165.00401496887207ms total_cost_time:165.04645347595215ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7049 prompt_cache_len:5151 prompt_cache_ratio:0.7307419492126542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 -DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10790109634399414 s -INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10969328880310059 s -DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=285115388595355004479184503824237321996, time:1750766989.4693289s req_ids:[8] -DEBUG 06-24 20:09:49 [manager.py:391] -ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:178.96056175231934ms total_cost_time:179.0030002593994ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7050 prompt_cache_len:5151 prompt_cache_ratio:0.7306382978723405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 -DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10732197761535645 s -INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10933041572570801 s -DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=99618431375576687749249584378704478292, time:1750766989.6511416s req_ids:[8] -DEBUG 06-24 20:09:49 [manager.py:391] -ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:187.02173233032227ms total_cost_time:187.06560134887695ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7051 prompt_cache_len:5151 prompt_cache_ratio:0.7305346759324919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 -DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:49 [manager.py:224] router recive req id 8 cost time 0.10711240768432617 s -INFO 06-24 20:09:49 [manager.py:68] detokenization recv req id 8 cost time 0.10888886451721191 s -DEBUG 06-24 20:09:49 [manager.py:391] Prefill Batch: batch_id=186464892543180224506767692052298905781, time:1750766989.8468451s req_ids:[8] -DEBUG 06-24 20:09:49 [manager.py:391] -ERROR 06-24 20:09:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:162.51230239868164ms total_cost_time:162.55474090576172ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7052 prompt_cache_len:5151 prompt_cache_ratio:0.7304310833806013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 -DEBUG 06-24 20:09:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10682177543640137 s -INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10859322547912598 s -DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=331081502006136302528339927990028052165, time:1750766990.012144s req_ids:[8] -DEBUG 06-24 20:09:50 [manager.py:391] -DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:49 lightllm_req_id:8 first_token_cost:158.58888626098633ms total_cost_time:158.6308479309082ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7053 prompt_cache_len:5151 prompt_cache_ratio:0.7303275202041685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 -DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.1069798469543457 s -INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10868024826049805 s -DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=241564626071293052156886647871064829037, time:1750766990.1771505s req_ids:[8] -DEBUG 06-24 20:09:50 [manager.py:391] -ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:158.4620475769043ms total_cost_time:158.50543975830078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7054 prompt_cache_len:5151 prompt_cache_ratio:0.7302239863907003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 -DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10781049728393555 s -INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10969066619873047 s -DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=250381936632162221198521456350413183183, time:1750766990.339713s req_ids:[8] -DEBUG 06-24 20:09:50 [manager.py:391] -ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:156.9061279296875ms total_cost_time:156.9499969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7055 prompt_cache_len:5151 prompt_cache_ratio:0.7301204819277108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 -DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10957837104797363 s -INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.1116032600402832 s -DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=289450376407739396891543488800770439243, time:1750766990.5032516s req_ids:[8] -DEBUG 06-24 20:09:50 [manager.py:391] -ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:373.4128475189209ms total_cost_time:373.457670211792ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7056 prompt_cache_len:5151 prompt_cache_ratio:0.7300170068027211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 -DEBUG 06-24 20:09:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:50 [manager.py:224] router recive req id 8 cost time 0.10774660110473633 s -INFO 06-24 20:09:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972261428833008 s -DEBUG 06-24 20:09:50 [manager.py:391] Prefill Batch: batch_id=166926929915001501458874246876964424846, time:1750766990.8827965s req_ids:[8] -DEBUG 06-24 20:09:50 [manager.py:391] -ERROR 06-24 20:09:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:199.41186904907227ms total_cost_time:199.45478439331055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7057 prompt_cache_len:5151 prompt_cache_ratio:0.7299135610032592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 -DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10807490348815918 s -INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.10982346534729004 s -DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=243798304501683709424806666007937849613, time:1750766991.0885587s req_ids:[8] -DEBUG 06-24 20:09:51 [manager.py:391] -ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:50 lightllm_req_id:8 first_token_cost:198.50850105285645ms total_cost_time:198.55356216430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7058 prompt_cache_len:5151 prompt_cache_ratio:0.7298101445168603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 -DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10636711120605469 s -INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.10811972618103027 s -DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=206753105262050173169055719858943823414, time:1750766991.2953157s req_ids:[8] -DEBUG 06-24 20:09:51 [manager.py:391] -ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:167.222261428833ms total_cost_time:167.2656536102295ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7059 prompt_cache_len:5151 prompt_cache_ratio:0.7297067573310667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 -DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:51 [batch.py:51] router release req id 8 -DEBUG 06-24 20:09:51 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:51 [manager.py:283] -DEBUG 06-24 20:09:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:51 [manager.py:284] -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10506558418273926 s -INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.10691666603088379 s -DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=313898699638901334526512905155452506375, time:1750766991.4720092s req_ids:[8] -DEBUG 06-24 20:09:51 [manager.py:391] -ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:205.81865310668945ms total_cost_time:205.84511756896973ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7060 prompt_cache_len:5151 prompt_cache_ratio:0.7296033994334278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 -DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10541939735412598 s -INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.1073312759399414 s -DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=182807026759947702512988392440017852853, time:1750766991.6790426s req_ids:[8] -DEBUG 06-24 20:09:51 [manager.py:391] -ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:171.7660427093506ms total_cost_time:171.80991172790527ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7061 prompt_cache_len:5151 prompt_cache_ratio:0.7295000708114998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 -DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:51 [manager.py:224] router recive req id 8 cost time 0.10789656639099121 s -INFO 06-24 20:09:51 [manager.py:68] detokenization recv req id 8 cost time 0.11008071899414062 s -DEBUG 06-24 20:09:51 [manager.py:391] Prefill Batch: batch_id=196182240311157225562385165972302170902, time:1750766991.8514597s req_ids:[8] -DEBUG 06-24 20:09:51 [manager.py:391] -ERROR 06-24 20:09:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:159.41119194030762ms total_cost_time:159.4550609588623ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7062 prompt_cache_len:5151 prompt_cache_ratio:0.7293967714528462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 -DEBUG 06-24 20:09:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10716867446899414 s -INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10949945449829102 s -DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=302703372957099633730743925568492783910, time:1750766992.0162678s req_ids:[8] -DEBUG 06-24 20:09:52 [manager.py:391] -ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:51 lightllm_req_id:8 first_token_cost:160.28547286987305ms total_cost_time:160.33077239990234ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7063 prompt_cache_len:5151 prompt_cache_ratio:0.7292935013450376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 -DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10603976249694824 s -INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10834789276123047 s -DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=253418889061719988393636136768012854595, time:1750766992.1826224s req_ids:[8] -DEBUG 06-24 20:09:52 [manager.py:391] -ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:157.32598304748535ms total_cost_time:157.35340118408203ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7064 prompt_cache_len:5151 prompt_cache_ratio:0.7291902604756512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 -DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10661768913269043 s -INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.1088569164276123 s -DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=3443360195119048305859640309605250915, time:1750766992.3617861s req_ids:[8] -DEBUG 06-24 20:09:52 [manager.py:391] -ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:175.18091201782227ms total_cost_time:175.22454261779785ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7065 prompt_cache_len:5151 prompt_cache_ratio:0.7290870488322717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 -DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10587453842163086 s -INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10802173614501953 s -DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=177798015500150624239925639168977529816, time:1750766992.5247648s req_ids:[8] -DEBUG 06-24 20:09:52 [manager.py:391] -ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:156.72612190246582ms total_cost_time:156.7530632019043ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7066 prompt_cache_len:5151 prompt_cache_ratio:0.7289838664024908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 -DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:52 [manager.py:224] router recive req id 8 cost time 0.10684490203857422 s -INFO 06-24 20:09:52 [manager.py:68] detokenization recv req id 8 cost time 0.10918951034545898 s -DEBUG 06-24 20:09:52 [manager.py:391] Prefill Batch: batch_id=301204671251040806212397617958475393477, time:1750766992.6902378s req_ids:[8] -DEBUG 06-24 20:09:52 [manager.py:391] -ERROR 06-24 20:09:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:346.18401527404785ms total_cost_time:346.2209701538086ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:7067 prompt_cache_len:5151 prompt_cache_ratio:0.7288807131739069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 -DEBUG 06-24 20:09:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10605931282043457 s -INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10823893547058105 s -DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=91275080856245484722246561375573889001, time:1750766993.0414362s req_ids:[8] -DEBUG 06-24 20:09:53 [manager.py:391] -ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:52 lightllm_req_id:8 first_token_cost:162.37735748291016ms total_cost_time:162.40310668945312ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:7068 prompt_cache_len:5151 prompt_cache_ratio:0.7287775891341256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 -DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:53 [batch.py:51] router release req id 8 -INFO 06-24 20:09:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:09:53 [statics_utils.py:24] mean first cost: 231.91998568904629 ms -INFO 06-24 20:09:53 [statics_utils.py:24] mean per token cost: 0.09801591679639993 ms -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10585689544677734 s -INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10819530487060547 s -DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=30424601777397132778491633213816572716, time:1750766993.2079294s req_ids:[8] -DEBUG 06-24 20:09:53 [manager.py:391] -ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:156.9344997406006ms total_cost_time:156.96215629577637ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:7069 prompt_cache_len:5151 prompt_cache_ratio:0.7286744942707597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 -DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10516571998596191 s -INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10756969451904297 s -DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=261790003214515784803493285501489270068, time:1750766993.374977s req_ids:[8] -DEBUG 06-24 20:09:53 [manager.py:391] -ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:162.30535507202148ms total_cost_time:162.33181953430176ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7070 prompt_cache_len:5151 prompt_cache_ratio:0.7285714285714285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 -DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10466432571411133 s -INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.10694384574890137 s -DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=126479945513051239574125682163455071749, time:1750766993.5413985s req_ids:[8] -DEBUG 06-24 20:09:53 [manager.py:391] -ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:162.40787506103516ms total_cost_time:162.43481636047363ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7071 prompt_cache_len:5151 prompt_cache_ratio:0.728468392023759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 -DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10760045051574707 s -INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.1100015640258789 s -DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=64884687019624248573500707260782935831, time:1750766993.708866s req_ids:[8] -DEBUG 06-24 20:09:53 [manager.py:391] -ERROR 06-24 20:09:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:211.73810958862305ms total_cost_time:211.78436279296875ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7072 prompt_cache_len:5151 prompt_cache_ratio:0.7283653846153846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 -DEBUG 06-24 20:09:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:53 [manager.py:224] router recive req id 8 cost time 0.10813117027282715 s -INFO 06-24 20:09:53 [manager.py:68] detokenization recv req id 8 cost time 0.11033320426940918 s -DEBUG 06-24 20:09:53 [manager.py:391] Prefill Batch: batch_id=305228987426012316669365352995151230602, time:1750766993.9291317s req_ids:[8] -DEBUG 06-24 20:09:53 [manager.py:391] -ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:53 lightllm_req_id:8 first_token_cost:213.82975578308105ms total_cost_time:213.87434005737305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7073 prompt_cache_len:5151 prompt_cache_ratio:0.728262406333946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 -DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10740447044372559 s -INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.10982728004455566 s -DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=122004746788792347499561384702186841266, time:1750766994.1441474s req_ids:[8] -DEBUG 06-24 20:09:54 [manager.py:391] -ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:213.1040096282959ms total_cost_time:213.148832321167ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7074 prompt_cache_len:5151 prompt_cache_ratio:0.7281594571670907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 -DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10576248168945312 s -INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.10813736915588379 s -DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=333308016617898820066244908809605307267, time:1750766994.364741s req_ids:[8] -DEBUG 06-24 20:09:54 [manager.py:391] -ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:188.12847137451172ms total_cost_time:188.1735324859619ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7075 prompt_cache_len:5151 prompt_cache_ratio:0.7280565371024735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 -DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10874009132385254 s -INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.11118721961975098 s -DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=30434778279694482488887304214310495836, time:1750766994.5583978s req_ids:[8] -DEBUG 06-24 20:09:54 [manager.py:391] -ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:203.8884162902832ms total_cost_time:203.9315700531006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7076 prompt_cache_len:5151 prompt_cache_ratio:0.7279536461277558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 -DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10727119445800781 s -INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.10957622528076172 s -DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=242682439793286847111665614236775714354, time:1750766994.769921s req_ids:[8] -DEBUG 06-24 20:09:54 [manager.py:391] -ERROR 06-24 20:09:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:207.05914497375488ms total_cost_time:207.10325241088867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7077 prompt_cache_len:5151 prompt_cache_ratio:0.7278507842306062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 -DEBUG 06-24 20:09:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:54 [manager.py:224] router recive req id 8 cost time 0.10812258720397949 s -INFO 06-24 20:09:54 [manager.py:68] detokenization recv req id 8 cost time 0.11047148704528809 s -DEBUG 06-24 20:09:54 [manager.py:391] Prefill Batch: batch_id=132979071581710553935111917518909292964, time:1750766994.9811187s req_ids:[8] -DEBUG 06-24 20:09:54 [manager.py:391] -DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:09:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:54 lightllm_req_id:8 first_token_cost:206.13479614257812ms total_cost_time:206.18057250976562ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7078 prompt_cache_len:5151 prompt_cache_ratio:0.7277479513987002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 -DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:55 [manager.py:224] router recive req id 8 cost time 0.10833382606506348 s -INFO 06-24 20:09:55 [manager.py:68] detokenization recv req id 8 cost time 0.1106715202331543 s -DEBUG 06-24 20:09:55 [manager.py:391] Prefill Batch: batch_id=12379410039002637848782271546084553361, time:1750766995.205292s req_ids:[8] -DEBUG 06-24 20:09:55 [manager.py:391] -ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:221.91691398620605ms total_cost_time:221.96054458618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7079 prompt_cache_len:5151 prompt_cache_ratio:0.7276451476197203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 -DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:55 [manager.py:224] router recive req id 8 cost time 0.30980658531188965 s -DEBUG 06-24 20:09:55 [manager.py:391] Prefill Batch: batch_id=284150519502720374912508937930736402279, time:1750766995.6104312s req_ids:[8] -DEBUG 06-24 20:09:55 [manager.py:391] -INFO 06-24 20:09:55 [manager.py:68] detokenization recv req id 8 cost time 0.3123300075531006 s -ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:394.1361904144287ms total_cost_time:394.1817283630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7080 prompt_cache_len:5151 prompt_cache_ratio:0.7275423728813559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 -DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:55 [manager.py:224] router recive req id 8 cost time 0.10777783393859863 s -INFO 06-24 20:09:55 [manager.py:68] detokenization recv req id 8 cost time 0.10963153839111328 s -DEBUG 06-24 20:09:55 [manager.py:391] Prefill Batch: batch_id=230937608692170841542470437433688085535, time:1750766995.8246305s req_ids:[8] -DEBUG 06-24 20:09:55 [manager.py:391] -ERROR 06-24 20:09:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:210.43109893798828ms total_cost_time:210.47496795654297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7081 prompt_cache_len:5151 prompt_cache_ratio:0.7274396271713035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 -DEBUG 06-24 20:09:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10862255096435547 s -INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.11070847511291504 s -DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=90349251318207458683030239345966701490, time:1750766996.0496924s req_ids:[8] -DEBUG 06-24 20:09:56 [manager.py:391] -ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:55 lightllm_req_id:8 first_token_cost:223.42419624328613ms total_cost_time:223.46735000610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7082 prompt_cache_len:5151 prompt_cache_ratio:0.7273369104772663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 -DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10870242118835449 s -INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.11126542091369629 s -DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=296523933662993490342538824803118486461, time:1750766996.2671366s req_ids:[8] -DEBUG 06-24 20:09:56 [manager.py:391] -ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:208.74381065368652ms total_cost_time:208.78863334655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7083 prompt_cache_len:5151 prompt_cache_ratio:0.7272342227869547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 -DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10796070098876953 s -INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.11027121543884277 s -DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=143133874909342421042249310486773561035, time:1750766996.4773555s req_ids:[8] -DEBUG 06-24 20:09:56 [manager.py:391] -ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:207.7808380126953ms total_cost_time:207.8239917755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7084 prompt_cache_len:5151 prompt_cache_ratio:0.7271315640880859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 -DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.10755062103271484 s -INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.10998749732971191 s -DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=159127175986164890766557908686690992255, time:1750766996.691371s req_ids:[8] -DEBUG 06-24 20:09:56 [manager.py:391] -ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:09:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 33653.398 tokens/s -DEBUG 06-24 20:09:56 [stats.py:37] Avg prompt tokens throughput: 33643.770 tokens/s -DEBUG 06-24 20:09:56 [stats.py:37] Avg generate tokens throughput: 9.628 tokens/s -INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:211.92049980163574ms total_cost_time:211.96460723876953ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7085 prompt_cache_len:5151 prompt_cache_ratio:0.7270289343683839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 -DEBUG 06-24 20:09:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:56 [manager.py:224] router recive req id 8 cost time 0.1064913272857666 s -INFO 06-24 20:09:56 [manager.py:68] detokenization recv req id 8 cost time 0.10892057418823242 s -DEBUG 06-24 20:09:56 [manager.py:391] Prefill Batch: batch_id=116660142203876957422519773013296766280, time:1750766996.9073567s req_ids:[8] -DEBUG 06-24 20:09:56 [manager.py:391] -ERROR 06-24 20:09:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:202.3153305053711ms total_cost_time:202.3599147796631ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7086 prompt_cache_len:5151 prompt_cache_ratio:0.72692633361558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 -DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10732316970825195 s -INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.10963249206542969 s -DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=175233227699193740443038301441889862525, time:1750766997.1230116s req_ids:[8] -DEBUG 06-24 20:09:57 [manager.py:391] -ERROR 06-24 20:09:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:09:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:56 lightllm_req_id:8 first_token_cost:211.8685245513916ms total_cost_time:211.91167831420898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7087 prompt_cache_len:5151 prompt_cache_ratio:0.7268237618174122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 -DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10704779624938965 s -INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.10946440696716309 s -DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=98909302763353059390833891356971687981, time:1750766997.338112s req_ids:[8] -DEBUG 06-24 20:09:57 [manager.py:391] -ERROR 06-24 20:09:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 first_token_cost:208.1737518310547ms total_cost_time:208.21785926818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7088 prompt_cache_len:5151 prompt_cache_ratio:0.7267212189616253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 -DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10718512535095215 s -INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.1096186637878418 s -DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=314700914326133862294647399136865429122, time:1750766997.5496855s req_ids:[8] -DEBUG 06-24 20:09:57 [manager.py:391] -ERROR 06-24 20:09:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 first_token_cost:406.96001052856445ms total_cost_time:406.9812297821045ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:7089 prompt_cache_len:5151 prompt_cache_ratio:0.7266187050359713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 -DEBUG 06-24 20:09:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:57 [manager.py:224] router recive req id 8 cost time 0.10350728034973145 s -INFO 06-24 20:09:57 [manager.py:68] detokenization recv req id 8 cost time 0.1057441234588623 s -DEBUG 06-24 20:09:57 [manager.py:391] Prefill Batch: batch_id=178255191269902766868388880060371012633, time:1750766997.95519s req_ids:[8] -DEBUG 06-24 20:09:57 [manager.py:391] -ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:57 lightllm_req_id:8 first_token_cost:201.54666900634766ms total_cost_time:201.59053802490234ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7090 prompt_cache_len:5151 prompt_cache_ratio:0.7265162200282087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 -DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10675621032714844 s -INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.10917186737060547 s -DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=132847899587549461555309619505983659335, time:1750766998.1771355s req_ids:[8] -DEBUG 06-24 20:09:58 [manager.py:391] -ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:238.2211685180664ms total_cost_time:238.2643222808838ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7091 prompt_cache_len:5151 prompt_cache_ratio:0.7264137639261035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 -DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10737037658691406 s -INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.1097867488861084 s -DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=315418887624735474318815576230088215970, time:1750766998.408618s req_ids:[8] -DEBUG 06-24 20:09:58 [manager.py:391] -ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:201.14493370056152ms total_cost_time:201.1876106262207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7092 prompt_cache_len:5151 prompt_cache_ratio:0.7263113367174281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 -DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10841560363769531 s -INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.11083102226257324 s -DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=8258850778964970381986830952183533027, time:1750766998.6193967s req_ids:[8] -DEBUG 06-24 20:09:58 [manager.py:391] -ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:206.50362968444824ms total_cost_time:206.56371116638184ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7093 prompt_cache_len:5151 prompt_cache_ratio:0.7262089383899619 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 -DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:58 [manager.py:224] router recive req id 8 cost time 0.10534429550170898 s -INFO 06-24 20:09:58 [manager.py:68] detokenization recv req id 8 cost time 0.10771918296813965 s -DEBUG 06-24 20:09:58 [manager.py:391] Prefill Batch: batch_id=249823406403785494193284548172758073531, time:1750766998.830035s req_ids:[8] -DEBUG 06-24 20:09:58 [manager.py:391] -ERROR 06-24 20:09:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:205.2445411682129ms total_cost_time:205.28888702392578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7094 prompt_cache_len:5151 prompt_cache_ratio:0.7261065689314914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 -DEBUG 06-24 20:09:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10730648040771484 s -INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.10958099365234375 s -DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=273608528422316564533548418979934368023, time:1750766999.048555s req_ids:[8] -DEBUG 06-24 20:09:59 [manager.py:391] -ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:58 lightllm_req_id:8 first_token_cost:214.07842636108398ms total_cost_time:214.12181854248047ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7095 prompt_cache_len:5151 prompt_cache_ratio:0.7260042283298097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 -DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10831522941589355 s -INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.11031603813171387 s -DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=230914194810430733131020604214937675244, time:1750766999.2714353s req_ids:[8] -DEBUG 06-24 20:09:59 [manager.py:391] -ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:216.9184684753418ms total_cost_time:216.96114540100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7096 prompt_cache_len:5151 prompt_cache_ratio:0.7259019165727171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 -DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s -INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.11054420471191406 s -DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=53720769683128440599646698110962512146, time:1750766999.4839795s req_ids:[8] -DEBUG 06-24 20:09:59 [manager.py:391] -ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:205.34467697143555ms total_cost_time:205.40189743041992ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7097 prompt_cache_len:5151 prompt_cache_ratio:0.7257996336480202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 -DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10876607894897461 s -INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.11078786849975586 s -DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=322139315507372282227487931761803289286, time:1750766999.696448s req_ids:[8] -DEBUG 06-24 20:09:59 [manager.py:391] -ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:218.41192245483398ms total_cost_time:218.45602989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7098 prompt_cache_len:5151 prompt_cache_ratio:0.7256973795435334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 -DEBUG 06-24 20:09:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:09:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:09:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:09:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:09:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:09:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:09:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:09:59 [manager.py:224] router recive req id 8 cost time 0.10675263404846191 s -INFO 06-24 20:09:59 [manager.py:68] detokenization recv req id 8 cost time 0.10858440399169922 s -DEBUG 06-24 20:09:59 [manager.py:391] Prefill Batch: batch_id=197836052983697661160900405722389322420, time:1750766999.914774s req_ids:[8] -DEBUG 06-24 20:09:59 [manager.py:391] -ERROR 06-24 20:09:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:09:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:09:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:193.80593299865723ms total_cost_time:193.85147094726562ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7099 prompt_cache_len:5151 prompt_cache_ratio:0.7255951542470771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:09:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 -DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.10581588745117188 s -INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.10826253890991211 s -DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=148742198960908114037739102838572146263, time:1750767000.118381s req_ids:[8] -DEBUG 06-24 20:10:00 [manager.py:391] -ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:09:59 lightllm_req_id:8 first_token_cost:377.7203559875488ms total_cost_time:377.7649402618408ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7100 prompt_cache_len:5151 prompt_cache_ratio:0.7254929577464789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 -DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.1080925464630127 s -INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.11050152778625488 s -DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=37550767906915893554883909801970245636, time:1750767000.4976037s req_ids:[8] -DEBUG 06-24 20:10:00 [manager.py:391] -ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 first_token_cost:196.30789756774902ms total_cost_time:196.3496208190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7101 prompt_cache_len:5151 prompt_cache_ratio:0.7253907900295733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 -DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.10767555236816406 s -INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.11003637313842773 s -DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=101428883375530902768809643651798357481, time:1750767000.703656s req_ids:[8] -DEBUG 06-24 20:10:00 [manager.py:391] -ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 first_token_cost:202.32129096984863ms total_cost_time:202.3639678955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7102 prompt_cache_len:5151 prompt_cache_ratio:0.7252886510842016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 -DEBUG 06-24 20:10:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:00 [manager.py:224] router recive req id 8 cost time 0.10772466659545898 s -INFO 06-24 20:10:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100306510925293 s -DEBUG 06-24 20:10:00 [manager.py:391] Prefill Batch: batch_id=18934513631104061511468611443629907904, time:1750767000.9131138s req_ids:[8] -DEBUG 06-24 20:10:00 [manager.py:391] -DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:00 lightllm_req_id:8 first_token_cost:208.4505558013916ms total_cost_time:208.49204063415527ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7103 prompt_cache_len:5151 prompt_cache_ratio:0.725186540898212 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 -DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10950255393981934 s -INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.11156868934631348 s -DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=259610609344834905459112606133938913722, time:1750767001.1250837s req_ids:[8] -DEBUG 06-24 20:10:01 [manager.py:391] -ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:165.7874584197998ms total_cost_time:165.82751274108887ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:7104 prompt_cache_len:5151 prompt_cache_ratio:0.7250844594594594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 -DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10723376274108887 s -INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.10957098007202148 s -DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=66882572977318960984681088177092046486, time:1750767001.2957704s req_ids:[8] -DEBUG 06-24 20:10:01 [manager.py:391] -ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:197.05724716186523ms total_cost_time:197.10016250610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7105 prompt_cache_len:5151 prompt_cache_ratio:0.7249824067558057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 -DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10780215263366699 s -INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101067066192627 s -DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=133520236464024991491466422264837499737, time:1750767001.4988065s req_ids:[8] -DEBUG 06-24 20:10:01 [manager.py:391] -ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:202.22854614257812ms total_cost_time:202.2714614868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7106 prompt_cache_len:5151 prompt_cache_ratio:0.7248803827751196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 -DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10849189758300781 s -INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.11085939407348633 s -DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=336014274809402884963973388019554214518, time:1750767001.7067683s req_ids:[8] -DEBUG 06-24 20:10:01 [manager.py:391] -ERROR 06-24 20:10:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:203.66573333740234ms total_cost_time:203.70888710021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7107 prompt_cache_len:5151 prompt_cache_ratio:0.7247783875052765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 -DEBUG 06-24 20:10:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:01 [manager.py:224] router recive req id 8 cost time 0.10725140571594238 s -INFO 06-24 20:10:01 [manager.py:68] detokenization recv req id 8 cost time 0.10976409912109375 s -DEBUG 06-24 20:10:01 [manager.py:391] Prefill Batch: batch_id=178841979544913129152945944260351364455, time:1750767001.917101s req_ids:[8] -DEBUG 06-24 20:10:01 [manager.py:391] -ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:01 lightllm_req_id:8 first_token_cost:205.0316333770752ms total_cost_time:205.0759792327881ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7108 prompt_cache_len:5151 prompt_cache_ratio:0.7246764209341587 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 -DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.1075131893157959 s -INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.10952949523925781 s -DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=266439027130059369469566099634846372756, time:1750767002.1301055s req_ids:[8] -DEBUG 06-24 20:10:02 [manager.py:391] -ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:206.67266845703125ms total_cost_time:206.71796798706055ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7109 prompt_cache_len:5151 prompt_cache_ratio:0.7245744830496553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 -DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.10746502876281738 s -INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.10948848724365234 s -DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=330550870813570894071692884842101098320, time:1750767002.342767s req_ids:[8] -DEBUG 06-24 20:10:02 [manager.py:391] -ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:207.36098289489746ms total_cost_time:207.40842819213867ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:7110 prompt_cache_len:5151 prompt_cache_ratio:0.7244725738396625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 -DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.1098031997680664 s -INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.11171603202819824 s -DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=105342325395231877605480585374180517953, time:1750767002.5550268s req_ids:[8] -DEBUG 06-24 20:10:02 [manager.py:391] -ERROR 06-24 20:10:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:377.02322006225586ms total_cost_time:377.06851959228516ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7111 prompt_cache_len:5151 prompt_cache_ratio:0.7243706932920827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 -DEBUG 06-24 20:10:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:02 [manager.py:224] router recive req id 8 cost time 0.10738873481750488 s -INFO 06-24 20:10:02 [manager.py:68] detokenization recv req id 8 cost time 0.10934972763061523 s -DEBUG 06-24 20:10:02 [manager.py:391] Prefill Batch: batch_id=70920666620170024826776621011409511558, time:1750767002.942237s req_ids:[8] -DEBUG 06-24 20:10:02 [manager.py:391] -ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:02 lightllm_req_id:8 first_token_cost:210.5538845062256ms total_cost_time:210.59846878051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7112 prompt_cache_len:5151 prompt_cache_ratio:0.7242688413948256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 -DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.10734081268310547 s -INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10942983627319336 s -DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=150809633389647638864255792000559816358, time:1750767003.1551838s req_ids:[8] -DEBUG 06-24 20:10:03 [manager.py:391] -ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:207.46755599975586ms total_cost_time:207.51023292541504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7113 prompt_cache_len:5151 prompt_cache_ratio:0.7241670181358076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 -DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.10794305801391602 s -INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10991120338439941 s -DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=194140702077090127405579358861391543500, time:1750767003.366889s req_ids:[8] -DEBUG 06-24 20:10:03 [manager.py:391] -ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:201.18188858032227ms total_cost_time:201.22504234313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7114 prompt_cache_len:5151 prompt_cache_ratio:0.7240652235029519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 -DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.10780477523803711 s -INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10969281196594238 s -DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=17381470317216304067070525988180645294, time:1750767003.5744162s req_ids:[8] -DEBUG 06-24 20:10:03 [manager.py:391] -ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:207.31377601623535ms total_cost_time:207.35692977905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7115 prompt_cache_len:5151 prompt_cache_ratio:0.7239634574841883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 -DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.1068735122680664 s -INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10877752304077148 s -DEBUG 06-24 20:10:03 [manager.py:391] Prefill Batch: batch_id=314688588496185346754580480667134413986, time:1750767003.7954772s req_ids:[8] -DEBUG 06-24 20:10:03 [manager.py:391] -ERROR 06-24 20:10:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:212.61930465698242ms total_cost_time:212.6638889312744ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7116 prompt_cache_len:5151 prompt_cache_ratio:0.7238617200674536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 -DEBUG 06-24 20:10:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:03 [manager.py:224] router recive req id 8 cost time 0.1070547103881836 s -INFO 06-24 20:10:03 [manager.py:68] detokenization recv req id 8 cost time 0.10911083221435547 s -DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=279707048686434325747606007748573292003, time:1750767004.0062456s req_ids:[8] -DEBUG 06-24 20:10:04 [manager.py:391] -ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:03 lightllm_req_id:8 first_token_cost:207.0310115814209ms total_cost_time:207.08703994750977ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:7117 prompt_cache_len:5151 prompt_cache_ratio:0.7237600112406913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 -DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10927510261535645 s -INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.1113123893737793 s -DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=280897291166937183142754931010607511219, time:1750767004.2183535s req_ids:[8] -DEBUG 06-24 20:10:04 [manager.py:391] -ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:224.03979301452637ms total_cost_time:224.08437728881836ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7118 prompt_cache_len:5151 prompt_cache_ratio:0.7236583309918516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 -DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10692644119262695 s -INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.10895466804504395 s -DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=150305683124795077441087728834588842407, time:1750767004.442925s req_ids:[8] -DEBUG 06-24 20:10:04 [manager.py:391] -ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:197.77822494506836ms total_cost_time:197.82066345214844ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7119 prompt_cache_len:5151 prompt_cache_ratio:0.7235566793088917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 -DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s -INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s -DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=58687478010630007145795110407163309477, time:1750767004.6539078s req_ids:[8] -DEBUG 06-24 20:10:04 [manager.py:391] -ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:205.03592491149902ms total_cost_time:205.0788402557373ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7120 prompt_cache_len:5151 prompt_cache_ratio:0.7234550561797752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 -DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:04 [manager.py:224] router recive req id 8 cost time 0.10757994651794434 s -INFO 06-24 20:10:04 [manager.py:68] detokenization recv req id 8 cost time 0.10946941375732422 s -DEBUG 06-24 20:10:04 [manager.py:391] Prefill Batch: batch_id=110949885598389622226292911100575103825, time:1750767004.8633108s req_ids:[8] -DEBUG 06-24 20:10:04 [manager.py:391] -ERROR 06-24 20:10:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:192.09647178649902ms total_cost_time:192.1408176422119ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7121 prompt_cache_len:5151 prompt_cache_ratio:0.7233534615924729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 -DEBUG 06-24 20:10:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.1072380542755127 s -INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.10909438133239746 s -DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=74167005708575374579923380212747340759, time:1750767005.0570939s req_ids:[8] -DEBUG 06-24 20:10:05 [manager.py:391] -ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:04 lightllm_req_id:8 first_token_cost:194.33093070983887ms total_cost_time:194.37599182128906ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7122 prompt_cache_len:5151 prompt_cache_ratio:0.723251895534962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 -DEBUG 06-24 20:10:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.10817623138427734 s -INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.11003684997558594 s -DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=269285268965367244088909877791527324841, time:1750767005.2695458s req_ids:[8] -DEBUG 06-24 20:10:05 [manager.py:391] -ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:215.32654762268066ms total_cost_time:215.36517143249512ms,out_token_counter:1 mean_per_token_cost_time: 0.038623809814453125ms prompt_token_num:7123 prompt_cache_len:5151 prompt_cache_ratio:0.7231503579952268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 -DEBUG 06-24 20:10:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.3100724220275879 s -INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.31199026107788086 s -DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=232982395339919514132300219642630077310, time:1750767005.6938589s req_ids:[8] -DEBUG 06-24 20:10:05 [manager.py:391] -ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:421.9181537628174ms total_cost_time:421.9627380371094ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7124 prompt_cache_len:5151 prompt_cache_ratio:0.7230488489612578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 -DEBUG 06-24 20:10:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:05 [manager.py:224] router recive req id 8 cost time 0.10701680183410645 s -INFO 06-24 20:10:05 [manager.py:68] detokenization recv req id 8 cost time 0.10880899429321289 s -DEBUG 06-24 20:10:05 [manager.py:391] Prefill Batch: batch_id=326622681737859601353889855588158573919, time:1750767005.8993278s req_ids:[8] -DEBUG 06-24 20:10:05 [manager.py:391] -ERROR 06-24 20:10:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:182.3709011077881ms total_cost_time:182.4171543121338ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7125 prompt_cache_len:5151 prompt_cache_ratio:0.7229473684210527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 -DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10747170448303223 s -INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10933375358581543 s -DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=129036071752079148896783508566670962042, time:1750767006.0958092s req_ids:[8] -DEBUG 06-24 20:10:06 [manager.py:391] -ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:05 lightllm_req_id:8 first_token_cost:208.4064483642578ms total_cost_time:208.451509475708ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7126 prompt_cache_len:5151 prompt_cache_ratio:0.7228459163626157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 -DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.1079704761505127 s -INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s -DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=290488082846029475565293271506841012760, time:1750767006.3095121s req_ids:[8] -DEBUG 06-24 20:10:06 [manager.py:391] -ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:210.30616760253906ms total_cost_time:210.35289764404297ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7127 prompt_cache_len:5151 prompt_cache_ratio:0.7227444927739581 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 -DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10725808143615723 s -INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10919976234436035 s -DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=293347923792413291535025684709885828334, time:1750767006.523225s req_ids:[8] -DEBUG 06-24 20:10:06 [manager.py:391] -DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:201.59220695495605ms total_cost_time:201.63679122924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7128 prompt_cache_len:5151 prompt_cache_ratio:0.7226430976430976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 -DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10776448249816895 s -INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10968828201293945 s -DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=305020993537177111931357738681263566223, time:1750767006.7439685s req_ids:[8] -DEBUG 06-24 20:10:06 [manager.py:391] -ERROR 06-24 20:10:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:10:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 31137.346 tokens/s -DEBUG 06-24 20:10:06 [stats.py:37] Avg prompt tokens throughput: 31128.587 tokens/s -DEBUG 06-24 20:10:06 [stats.py:37] Avg generate tokens throughput: 8.759 tokens/s -INFO 06-24 20:10:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:217.0412540435791ms total_cost_time:217.0851230621338ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7129 prompt_cache_len:5151 prompt_cache_ratio:0.7225417309580586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 -DEBUG 06-24 20:10:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:06 [manager.py:224] router recive req id 8 cost time 0.10730504989624023 s -INFO 06-24 20:10:06 [manager.py:68] detokenization recv req id 8 cost time 0.10923457145690918 s -DEBUG 06-24 20:10:06 [manager.py:391] Prefill Batch: batch_id=238388200100258151270442128106986777960, time:1750767006.9574132s req_ids:[8] -DEBUG 06-24 20:10:06 [manager.py:391] -ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:06 lightllm_req_id:8 first_token_cost:209.3968391418457ms total_cost_time:209.4414234161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7130 prompt_cache_len:5151 prompt_cache_ratio:0.7224403927068723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 -DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:07 [manager.py:224] router recive req id 8 cost time 0.10738587379455566 s -INFO 06-24 20:10:07 [manager.py:68] detokenization recv req id 8 cost time 0.10946536064147949 s -DEBUG 06-24 20:10:07 [manager.py:391] Prefill Batch: batch_id=23181921022295476850797447240014940962, time:1750767007.1693873s req_ids:[8] -DEBUG 06-24 20:10:07 [manager.py:391] -ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:197.71742820739746ms total_cost_time:197.75962829589844ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7131 prompt_cache_len:5151 prompt_cache_ratio:0.7223390828775768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 -DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:07 [manager.py:224] router recive req id 8 cost time 0.10732722282409668 s -INFO 06-24 20:10:07 [manager.py:68] detokenization recv req id 8 cost time 0.10924172401428223 s -DEBUG 06-24 20:10:07 [manager.py:391] Prefill Batch: batch_id=315806402130743580461888335256824080900, time:1750767007.3734066s req_ids:[8] -DEBUG 06-24 20:10:07 [manager.py:391] -ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:223.82807731628418ms total_cost_time:223.87266159057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7132 prompt_cache_len:5151 prompt_cache_ratio:0.7222378014582165 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 -DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:07 [manager.py:224] router recive req id 8 cost time 0.10881352424621582 s -INFO 06-24 20:10:07 [manager.py:68] detokenization recv req id 8 cost time 0.11068058013916016 s -DEBUG 06-24 20:10:07 [manager.py:391] Prefill Batch: batch_id=176546428790214740621802520055242272636, time:1750767007.6101258s req_ids:[8] -DEBUG 06-24 20:10:07 [manager.py:391] -ERROR 06-24 20:10:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:409.56640243530273ms total_cost_time:409.61265563964844ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7133 prompt_cache_len:5151 prompt_cache_ratio:0.7221365484368428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 -DEBUG 06-24 20:10:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10712885856628418 s -INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.10949420928955078 s -DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=318306890930790770495520493631439597028, time:1750767008.0142307s req_ids:[8] -DEBUG 06-24 20:10:08 [manager.py:391] -ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:07 lightllm_req_id:8 first_token_cost:191.51616096496582ms total_cost_time:191.5607452392578ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7134 prompt_cache_len:5151 prompt_cache_ratio:0.7220353238015139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 -DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10833883285522461 s -INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s -DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=191904024574924427026689802931918122501, time:1750767008.2202315s req_ids:[8] -DEBUG 06-24 20:10:08 [manager.py:391] -ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:209.55371856689453ms total_cost_time:209.60164070129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:7135 prompt_cache_len:5151 prompt_cache_ratio:0.7219341275402943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 -DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10799002647399902 s -INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.1098778247833252 s -DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=257084366272053646165539103018325668475, time:1750767008.4312084s req_ids:[8] -DEBUG 06-24 20:10:08 [manager.py:391] -ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:208.12034606933594ms total_cost_time:208.16421508789062ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7136 prompt_cache_len:5151 prompt_cache_ratio:0.7218329596412556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 -DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10794901847839355 s -INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.10990619659423828 s -DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=73995569084425848046208426251939111890, time:1750767008.644523s req_ids:[8] -DEBUG 06-24 20:10:08 [manager.py:391] -ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:223.36864471435547ms total_cost_time:223.41346740722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7137 prompt_cache_len:5151 prompt_cache_ratio:0.7217318200924758 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 -DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:08 [manager.py:224] router recive req id 8 cost time 0.10720109939575195 s -INFO 06-24 20:10:08 [manager.py:68] detokenization recv req id 8 cost time 0.1092977523803711 s -DEBUG 06-24 20:10:08 [manager.py:391] Prefill Batch: batch_id=276137271877318547874868435815613362142, time:1750767008.8668494s req_ids:[8] -DEBUG 06-24 20:10:08 [manager.py:391] -ERROR 06-24 20:10:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:191.58148765563965ms total_cost_time:191.62583351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7138 prompt_cache_len:5151 prompt_cache_ratio:0.7216307088820398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 -DEBUG 06-24 20:10:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10808753967285156 s -INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.11002016067504883 s -DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=117382491611288558201896011651250459515, time:1750767009.0679352s req_ids:[8] -DEBUG 06-24 20:10:09 [manager.py:391] -ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:08 lightllm_req_id:8 first_token_cost:205.92713356018066ms total_cost_time:205.98149299621582ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:7139 prompt_cache_len:5151 prompt_cache_ratio:0.7215296259980389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 -DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10872864723205566 s -INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.11072444915771484 s -DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=153017171538353874900998120190634285094, time:1750767009.2767518s req_ids:[8] -DEBUG 06-24 20:10:09 [manager.py:391] -ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:195.9857940673828ms total_cost_time:196.03252410888672ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7140 prompt_cache_len:5151 prompt_cache_ratio:0.7214285714285714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 -DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10745024681091309 s -INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.10979080200195312 s -DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=42047959268200999976377909954876377860, time:1750767009.4818919s req_ids:[8] -DEBUG 06-24 20:10:09 [manager.py:391] -ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:200.54006576538086ms total_cost_time:200.58536529541016ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7141 prompt_cache_len:5151 prompt_cache_ratio:0.721327545161742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 -DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.1068730354309082 s -INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.10879397392272949 s -DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=330096932053594308892812119960859029107, time:1750767009.688927s req_ids:[8] -DEBUG 06-24 20:10:09 [manager.py:391] -ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:202.5928497314453ms total_cost_time:202.6371955871582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7142 prompt_cache_len:5151 prompt_cache_ratio:0.7212265471856623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 -DEBUG 06-24 20:10:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:09 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s -INFO 06-24 20:10:09 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s -DEBUG 06-24 20:10:09 [manager.py:391] Prefill Batch: batch_id=305608827174941284113587090752990707681, time:1750767009.9055269s req_ids:[8] -DEBUG 06-24 20:10:09 [manager.py:391] -ERROR 06-24 20:10:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:214.7996425628662ms total_cost_time:214.84613418579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7143 prompt_cache_len:5151 prompt_cache_ratio:0.7211255774884502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 -DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10856389999389648 s -INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.11052584648132324 s -DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=24602101484355933588773515392067287557, time:1750767010.1177237s req_ids:[8] -DEBUG 06-24 20:10:10 [manager.py:391] -ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:09 lightllm_req_id:8 first_token_cost:366.4212226867676ms total_cost_time:366.46509170532227ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7144 prompt_cache_len:5151 prompt_cache_ratio:0.7210246360582306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 -DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10800027847290039 s -INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.10982823371887207 s -DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=21201348492042893980037036695048958244, time:1750767010.4847126s req_ids:[8] -DEBUG 06-24 20:10:10 [manager.py:391] -ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 first_token_cost:199.1877555847168ms total_cost_time:199.2504596710205ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:7145 prompt_cache_len:5151 prompt_cache_ratio:0.7209237228831351 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 -DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10828638076782227 s -INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.11027240753173828 s -DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=160426337095385273733612065057675098173, time:1750767010.696382s req_ids:[8] -DEBUG 06-24 20:10:10 [manager.py:391] -ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 first_token_cost:210.1461887359619ms total_cost_time:210.1917266845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7146 prompt_cache_len:5151 prompt_cache_ratio:0.7208228379513014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 -DEBUG 06-24 20:10:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:10 [manager.py:224] router recive req id 8 cost time 0.10698652267456055 s -INFO 06-24 20:10:10 [manager.py:68] detokenization recv req id 8 cost time 0.10810542106628418 s -DEBUG 06-24 20:10:10 [manager.py:391] Prefill Batch: batch_id=116900175550160533350180533268538788657, time:1750767010.9129367s req_ids:[8] -DEBUG 06-24 20:10:10 [manager.py:391] -ERROR 06-24 20:10:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:10 lightllm_req_id:8 first_token_cost:209.29360389709473ms total_cost_time:209.35416221618652ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:7147 prompt_cache_len:5151 prompt_cache_ratio:0.7207219812508745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 -DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.10809493064880371 s -INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.1101679801940918 s -DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=248482763243138562391745014721123308021, time:1750767011.1320522s req_ids:[8] -DEBUG 06-24 20:10:11 [manager.py:391] -ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:211.66706085205078ms total_cost_time:211.70663833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:7148 prompt_cache_len:5151 prompt_cache_ratio:0.7206211527700056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 -DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.1065671443939209 s -INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.10860967636108398 s -DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=190870487122387489309474453686741319734, time:1750767011.3455086s req_ids:[8] -DEBUG 06-24 20:10:11 [manager.py:391] -ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:208.98842811584473ms total_cost_time:209.03611183166504ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7149 prompt_cache_len:5151 prompt_cache_ratio:0.7205203524968528 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 -DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.10767006874084473 s -INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.10955929756164551 s -DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=225427075520997457843880120865797903053, time:1750767011.5601594s req_ids:[8] -DEBUG 06-24 20:10:11 [manager.py:391] -ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:206.09402656555176ms total_cost_time:206.13884925842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7150 prompt_cache_len:5151 prompt_cache_ratio:0.7204195804195804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 -DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.1083977222442627 s -INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.11046719551086426 s -DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=243342279008509018952944565838802223707, time:1750767011.7762108s req_ids:[8] -DEBUG 06-24 20:10:11 [manager.py:391] -ERROR 06-24 20:10:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:215.00778198242188ms total_cost_time:215.04998207092285ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7151 prompt_cache_len:5151 prompt_cache_ratio:0.72031883652636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 -DEBUG 06-24 20:10:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:11 [manager.py:224] router recive req id 8 cost time 0.10793185234069824 s -INFO 06-24 20:10:11 [manager.py:68] detokenization recv req id 8 cost time 0.10987544059753418 s -DEBUG 06-24 20:10:11 [manager.py:391] Prefill Batch: batch_id=96835342792509319395585885408570039088, time:1750767011.9984121s req_ids:[8] -DEBUG 06-24 20:10:11 [manager.py:391] -ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:11 lightllm_req_id:8 first_token_cost:213.20199966430664ms total_cost_time:213.2434844970703ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7152 prompt_cache_len:5151 prompt_cache_ratio:0.7202181208053692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 -DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10709333419799805 s -INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.10905122756958008 s -DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=224574353546801253410498667577537715500, time:1750767012.2069333s req_ids:[8] -DEBUG 06-24 20:10:12 [manager.py:391] -DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:205.89542388916016ms total_cost_time:205.93857765197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7153 prompt_cache_len:5151 prompt_cache_ratio:0.7201174332447924 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 -DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10823273658752441 s -INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.11028242111206055 s -DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=292732686581944479537784308821433568301, time:1750767012.4196084s req_ids:[8] -DEBUG 06-24 20:10:12 [manager.py:391] -ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:208.48369598388672ms total_cost_time:208.54616165161133ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:7154 prompt_cache_len:5151 prompt_cache_ratio:0.7200167738328208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 -DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10779929161071777 s -INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.10975408554077148 s -DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=28317246882918998221432272335114133467, time:1750767012.643038s req_ids:[8] -DEBUG 06-24 20:10:12 [manager.py:391] -ERROR 06-24 20:10:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:362.424373626709ms total_cost_time:362.4711036682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7155 prompt_cache_len:5151 prompt_cache_ratio:0.719916142557652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 -DEBUG 06-24 20:10:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:12 [manager.py:224] router recive req id 8 cost time 0.10862851142883301 s -INFO 06-24 20:10:12 [manager.py:68] detokenization recv req id 8 cost time 0.11054372787475586 s -DEBUG 06-24 20:10:12 [manager.py:391] Prefill Batch: batch_id=209049702126121689527432925685733818285, time:1750767012.9997888s req_ids:[8] -DEBUG 06-24 20:10:12 [manager.py:391] -ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:12 lightllm_req_id:8 first_token_cost:196.6550350189209ms total_cost_time:196.6991424560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7156 prompt_cache_len:5151 prompt_cache_ratio:0.7198155394074902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 -DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.10712027549743652 s -INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.10897350311279297 s -DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=181401506268972188852523975402527821510, time:1750767013.2124963s req_ids:[8] -DEBUG 06-24 20:10:13 [manager.py:391] -ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:212.84914016723633ms total_cost_time:212.89372444152832ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7157 prompt_cache_len:5151 prompt_cache_ratio:0.7197149643705463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 -DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.1082768440246582 s -INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.11030888557434082 s -DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=175813898411919452132436765316407837202, time:1750767013.4260056s req_ids:[8] -DEBUG 06-24 20:10:13 [manager.py:391] -ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:209.7475528717041ms total_cost_time:209.7926139831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7158 prompt_cache_len:5151 prompt_cache_ratio:0.7196144174350377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 -DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s -INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.1104886531829834 s -DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=211385711464919028396476986522095651354, time:1750767013.6496053s req_ids:[8] -DEBUG 06-24 20:10:13 [manager.py:391] -ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:217.09370613098145ms total_cost_time:217.13805198669434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7159 prompt_cache_len:5151 prompt_cache_ratio:0.7195138985891885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 -DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:13 [manager.py:224] router recive req id 8 cost time 0.10737347602844238 s -INFO 06-24 20:10:13 [manager.py:68] detokenization recv req id 8 cost time 0.10931396484375 s -DEBUG 06-24 20:10:13 [manager.py:391] Prefill Batch: batch_id=221518014329207604782594996932027391297, time:1750767013.8646321s req_ids:[8] -DEBUG 06-24 20:10:13 [manager.py:391] -ERROR 06-24 20:10:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:206.88343048095703ms total_cost_time:206.92873001098633ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7160 prompt_cache_len:5151 prompt_cache_ratio:0.7194134078212291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 -DEBUG 06-24 20:10:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.1075444221496582 s -INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10938000679016113 s -DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=280595608036306510486759159359317774786, time:1750767014.078916s req_ids:[8] -DEBUG 06-24 20:10:14 [manager.py:391] -ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:13 lightllm_req_id:8 first_token_cost:207.7345848083496ms total_cost_time:207.7775001525879ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7161 prompt_cache_len:5151 prompt_cache_ratio:0.7193129451193967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 -DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.10759282112121582 s -INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10939288139343262 s -DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=67938495537675149659507722131604408284, time:1750767014.2943754s req_ids:[8] -DEBUG 06-24 20:10:14 [manager.py:391] -ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:210.54983139038086ms total_cost_time:210.59632301330566ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7162 prompt_cache_len:5151 prompt_cache_ratio:0.7192125104719352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 -DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.1074514389038086 s -INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10936117172241211 s -DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=306563091185353362046616685526690681352, time:1750767014.508425s req_ids:[8] -DEBUG 06-24 20:10:14 [manager.py:391] -ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:208.1432342529297ms total_cost_time:208.1892490386963ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7163 prompt_cache_len:5151 prompt_cache_ratio:0.7191121038670948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 -DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.10753560066223145 s -INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10946917533874512 s -DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=43022290005250700893678364406478221217, time:1750767014.7211523s req_ids:[8] -DEBUG 06-24 20:10:14 [manager.py:391] -ERROR 06-24 20:10:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:207.8566551208496ms total_cost_time:207.9014778137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7164 prompt_cache_len:5151 prompt_cache_ratio:0.7190117252931323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 -DEBUG 06-24 20:10:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:14 [manager.py:224] router recive req id 8 cost time 0.10719156265258789 s -INFO 06-24 20:10:14 [manager.py:68] detokenization recv req id 8 cost time 0.10924053192138672 s -DEBUG 06-24 20:10:14 [manager.py:391] Prefill Batch: batch_id=277291343155307770143807634968099963263, time:1750767014.9347205s req_ids:[8] -DEBUG 06-24 20:10:14 [manager.py:391] -ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:14 lightllm_req_id:8 first_token_cost:213.40203285217285ms total_cost_time:213.42825889587402ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7165 prompt_cache_len:5151 prompt_cache_ratio:0.7189113747383112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 -DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:15 [manager.py:224] router recive req id 8 cost time 0.10596847534179688 s -INFO 06-24 20:10:15 [manager.py:68] detokenization recv req id 8 cost time 0.10802054405212402 s -DEBUG 06-24 20:10:15 [manager.py:391] Prefill Batch: batch_id=289661613349018392918383164196431019396, time:1750767015.1655276s req_ids:[8] -DEBUG 06-24 20:10:15 [manager.py:391] -ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:224.54261779785156ms total_cost_time:224.58744049072266ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7166 prompt_cache_len:5151 prompt_cache_ratio:0.7188110521909015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 -DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:15 [manager.py:224] router recive req id 8 cost time 0.10807943344116211 s -INFO 06-24 20:10:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022043228149414 s -DEBUG 06-24 20:10:15 [manager.py:391] Prefill Batch: batch_id=271730576213155666345063098772542448954, time:1750767015.3820226s req_ids:[8] -DEBUG 06-24 20:10:15 [manager.py:391] -ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:208.78148078918457ms total_cost_time:208.82582664489746ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7167 prompt_cache_len:5151 prompt_cache_ratio:0.7187107576391796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 -DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:15 [manager.py:224] router recive req id 8 cost time 0.30916380882263184 s -INFO 06-24 20:10:15 [manager.py:68] detokenization recv req id 8 cost time 0.3112506866455078 s -DEBUG 06-24 20:10:15 [manager.py:391] Prefill Batch: batch_id=247636245169448681274982394630374559129, time:1750767015.8055675s req_ids:[8] -DEBUG 06-24 20:10:15 [manager.py:391] -ERROR 06-24 20:10:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:417.7429676055908ms total_cost_time:417.7863597869873ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7168 prompt_cache_len:5151 prompt_cache_ratio:0.7186104910714286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 -DEBUG 06-24 20:10:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s -INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.11060523986816406 s -DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=80719615642951113721846341072722340409, time:1750767016.0286608s req_ids:[8] -DEBUG 06-24 20:10:16 [manager.py:391] -ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:15 lightllm_req_id:8 first_token_cost:214.16473388671875ms total_cost_time:214.21098709106445ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7169 prompt_cache_len:5151 prompt_cache_ratio:0.718510252475938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 -DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10839486122131348 s -INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.11031055450439453 s -DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=180987930712356888811838622202771241091, time:1750767016.2424474s req_ids:[8] -DEBUG 06-24 20:10:16 [manager.py:391] -ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:206.91609382629395ms total_cost_time:206.95972442626953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7170 prompt_cache_len:5151 prompt_cache_ratio:0.7184100418410042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 -DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:16 [batch.py:51] router release req id 8 -INFO 06-24 20:10:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10810708999633789 s -INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.11025428771972656 s -DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=214080404080471687872836387010066170044, time:1750767016.4537523s req_ids:[8] -DEBUG 06-24 20:10:16 [manager.py:391] -ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:204.1475772857666ms total_cost_time:204.1914463043213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7171 prompt_cache_len:5151 prompt_cache_ratio:0.7183098591549296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 -DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.10870623588562012 s -INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.1107931137084961 s -DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=332392529189229823549960462193953147603, time:1750767016.6638665s req_ids:[8] -DEBUG 06-24 20:10:16 [manager.py:391] -ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:207.13400840759277ms total_cost_time:207.17954635620117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7172 prompt_cache_len:5151 prompt_cache_ratio:0.7182097044060234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 -DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:16 [manager.py:224] router recive req id 8 cost time 0.1072385311126709 s -INFO 06-24 20:10:16 [manager.py:68] detokenization recv req id 8 cost time 0.10926318168640137 s -DEBUG 06-24 20:10:16 [manager.py:391] Prefill Batch: batch_id=64018560222206016299779069699227649297, time:1750767016.8747811s req_ids:[8] -DEBUG 06-24 20:10:16 [manager.py:391] -DEBUG 06-24 20:10:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 31327.678 tokens/s -DEBUG 06-24 20:10:16 [stats.py:37] Avg prompt tokens throughput: 31319.019 tokens/s -DEBUG 06-24 20:10:16 [stats.py:37] Avg generate tokens throughput: 8.659 tokens/s -ERROR 06-24 20:10:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:206.85267448425293ms total_cost_time:206.89725875854492ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7173 prompt_cache_len:5151 prompt_cache_ratio:0.7181095775826014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 -DEBUG 06-24 20:10:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10745930671691895 s -INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.1094667911529541 s -DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=288548564847830269233578903021808522298, time:1750767017.088555s req_ids:[8] -DEBUG 06-24 20:10:17 [manager.py:391] -ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:16 lightllm_req_id:8 first_token_cost:204.81491088867188ms total_cost_time:204.85806465148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7174 prompt_cache_len:5151 prompt_cache_ratio:0.7180094786729858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 -DEBUG 06-24 20:10:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10778141021728516 s -INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.10980701446533203 s -DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=334186496730858969894756895045469871523, time:1750767017.2976313s req_ids:[8] -DEBUG 06-24 20:10:17 [manager.py:391] -ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:199.68175888061523ms total_cost_time:199.7241973876953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7175 prompt_cache_len:5151 prompt_cache_ratio:0.7179094076655053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 -DEBUG 06-24 20:10:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10738372802734375 s -INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.10931992530822754 s -DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=18306681105736782257776387587061967468, time:1750767017.5098698s req_ids:[8] -DEBUG 06-24 20:10:17 [manager.py:391] -ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:209.5024585723877ms total_cost_time:209.5472812652588ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7176 prompt_cache_len:5151 prompt_cache_ratio:0.717809364548495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 -DEBUG 06-24 20:10:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:17 [manager.py:224] router recive req id 8 cost time 0.10798025131225586 s -INFO 06-24 20:10:17 [manager.py:68] detokenization recv req id 8 cost time 0.11020469665527344 s -DEBUG 06-24 20:10:17 [manager.py:391] Prefill Batch: batch_id=120362025528813721755658995554779320411, time:1750767017.7219095s req_ids:[8] -DEBUG 06-24 20:10:17 [manager.py:391] -ERROR 06-24 20:10:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:384.80663299560547ms total_cost_time:384.85193252563477ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7177 prompt_cache_len:5151 prompt_cache_ratio:0.7177093493102967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 -DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10746073722839355 s -INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10961508750915527 s -DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=305892352742916224250096128670539733067, time:1750767018.1035385s req_ids:[8] -DEBUG 06-24 20:10:18 [manager.py:391] -ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:17 lightllm_req_id:8 first_token_cost:201.826810836792ms total_cost_time:201.86924934387207ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7178 prompt_cache_len:5151 prompt_cache_ratio:0.7176093619392588 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 -DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10703825950622559 s -INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10912251472473145 s -DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=120065790336193888102739497046430547834, time:1750767018.315625s req_ids:[8] -DEBUG 06-24 20:10:18 [manager.py:391] -ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:209.22350883483887ms total_cost_time:209.26713943481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7179 prompt_cache_len:5151 prompt_cache_ratio:0.7175094024237358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 -DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.1076345443725586 s -INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10971808433532715 s -INFO 06-24 20:10:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=339490786432690786413431494579132048795, time:1750767018.5306323s req_ids:[8] -DEBUG 06-24 20:10:18 [manager.py:391] -ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:203.32026481628418ms total_cost_time:203.37986946105957ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7180 prompt_cache_len:5151 prompt_cache_ratio:0.7174094707520892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 -DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10857868194580078 s -INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.11054754257202148 s -DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=232498509910486336261472994260498195806, time:1750767018.7451828s req_ids:[8] -DEBUG 06-24 20:10:18 [manager.py:391] -ERROR 06-24 20:10:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:208.2045078277588ms total_cost_time:208.266019821167ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:7181 prompt_cache_len:5151 prompt_cache_ratio:0.7173095669126862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 -DEBUG 06-24 20:10:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:18 [manager.py:224] router recive req id 8 cost time 0.10731768608093262 s -INFO 06-24 20:10:18 [manager.py:68] detokenization recv req id 8 cost time 0.10918283462524414 s -DEBUG 06-24 20:10:18 [manager.py:391] Prefill Batch: batch_id=256977974851781653653669263487424330118, time:1750767018.9734213s req_ids:[8] -DEBUG 06-24 20:10:18 [manager.py:391] -ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:18 lightllm_req_id:8 first_token_cost:222.95141220092773ms total_cost_time:223.01340103149414ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:7182 prompt_cache_len:5151 prompt_cache_ratio:0.7172096908939014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 -DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.10789346694946289 s -INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.10995078086853027 s -DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=332521767171950538506777204357045243607, time:1750767019.1863844s req_ids:[8] -DEBUG 06-24 20:10:19 [manager.py:391] -ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:197.11709022521973ms total_cost_time:197.16858863830566ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:7183 prompt_cache_len:5151 prompt_cache_ratio:0.7171098426841153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 -DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.10996317863464355 s -INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.11208271980285645 s -DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=297669703632240953516048109199720389073, time:1750767019.3891451s req_ids:[8] -DEBUG 06-24 20:10:19 [manager.py:391] -DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:200.2410888671875ms total_cost_time:200.3006935119629ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7184 prompt_cache_len:5151 prompt_cache_ratio:0.717010022271715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 -DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.11086797714233398 s -INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.1128854751586914 s -DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=14371472575789124717291915108220881311, time:1750767019.5936017s req_ids:[8] -DEBUG 06-24 20:10:19 [manager.py:391] -ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:200.9263038635254ms total_cost_time:200.98090171813965ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:7185 prompt_cache_len:5151 prompt_cache_ratio:0.716910229645094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 -DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:19 [manager.py:224] router recive req id 8 cost time 0.10885262489318848 s -INFO 06-24 20:10:19 [manager.py:68] detokenization recv req id 8 cost time 0.11088871955871582 s -DEBUG 06-24 20:10:19 [manager.py:391] Prefill Batch: batch_id=303526261337772790197775478694906210796, time:1750767019.8088892s req_ids:[8] -DEBUG 06-24 20:10:19 [manager.py:391] -ERROR 06-24 20:10:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:212.41164207458496ms total_cost_time:212.45932579040527ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7186 prompt_cache_len:5151 prompt_cache_ratio:0.7168104647926524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 -DEBUG 06-24 20:10:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.10983777046203613 s -INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.11191916465759277 s -DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=56333294574531672692607443881386467465, time:1750767020.0238993s req_ids:[8] -DEBUG 06-24 20:10:20 [manager.py:391] -ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:19 lightllm_req_id:8 first_token_cost:205.84583282470703ms total_cost_time:205.90591430664062ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7187 prompt_cache_len:5151 prompt_cache_ratio:0.7167107277027968 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 -DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.10804891586303711 s -INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.1105196475982666 s -DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=275587993470186061924883128807917108381, time:1750767020.24008s req_ids:[8] -DEBUG 06-24 20:10:20 [manager.py:391] -ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:386.81697845458984ms total_cost_time:386.859655380249ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7188 prompt_cache_len:5151 prompt_cache_ratio:0.7166110183639399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 -DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.10769462585449219 s -INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.10980987548828125 s -DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=14457955551232871508301530811737527859, time:1750767020.6219192s req_ids:[8] -DEBUG 06-24 20:10:20 [manager.py:391] -ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:200.3650665283203ms total_cost_time:200.41871070861816ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:7189 prompt_cache_len:5151 prompt_cache_ratio:0.7165113367645013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 -DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:20 [manager.py:224] router recive req id 8 cost time 0.11113858222961426 s -INFO 06-24 20:10:20 [manager.py:68] detokenization recv req id 8 cost time 0.11323070526123047 s -DEBUG 06-24 20:10:20 [manager.py:391] Prefill Batch: batch_id=3215234047010236800195495082582627068, time:1750767020.8365908s req_ids:[8] -DEBUG 06-24 20:10:20 [manager.py:391] -ERROR 06-24 20:10:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:210.16693115234375ms total_cost_time:210.22391319274902ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:7190 prompt_cache_len:5151 prompt_cache_ratio:0.7164116828929068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 -DEBUG 06-24 20:10:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10957598686218262 s -INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.11176133155822754 s -DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=128746066182812263521487385908887434637, time:1750767021.049714s req_ids:[8] -DEBUG 06-24 20:10:21 [manager.py:391] -ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:20 lightllm_req_id:8 first_token_cost:204.41865921020508ms total_cost_time:204.46181297302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7191 prompt_cache_len:5151 prompt_cache_ratio:0.7163120567375887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 -DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10727429389953613 s -INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.10939335823059082 s -DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=326285463919855974268593810677104993759, time:1750767021.261176s req_ids:[8] -DEBUG 06-24 20:10:21 [manager.py:391] -ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:203.6299705505371ms total_cost_time:203.6759853363037ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7192 prompt_cache_len:5151 prompt_cache_ratio:0.7162124582869855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 -DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10591602325439453 s -INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.10784745216369629 s -DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=43057916862155693040484303281464402648, time:1750767021.4726374s req_ids:[8] -DEBUG 06-24 20:10:21 [manager.py:391] -ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:208.34732055664062ms total_cost_time:208.39214324951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7193 prompt_cache_len:5151 prompt_cache_ratio:0.7161128875295426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 -DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.1085667610168457 s -INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.11104249954223633 s -DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=29042436613385024766462786427722377952, time:1750767021.6895385s req_ids:[8] -DEBUG 06-24 20:10:21 [manager.py:391] -ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:212.1596336364746ms total_cost_time:212.2032642364502ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7194 prompt_cache_len:5151 prompt_cache_ratio:0.7160133444537115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 -DEBUG 06-24 20:10:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:21 [manager.py:224] router recive req id 8 cost time 0.10733461380004883 s -INFO 06-24 20:10:21 [manager.py:68] detokenization recv req id 8 cost time 0.10953998565673828 s -DEBUG 06-24 20:10:21 [manager.py:391] Prefill Batch: batch_id=312441112655158214139687751498353124083, time:1750767021.904089s req_ids:[8] -DEBUG 06-24 20:10:21 [manager.py:391] -ERROR 06-24 20:10:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:208.3151340484619ms total_cost_time:208.37879180908203ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:7195 prompt_cache_len:5151 prompt_cache_ratio:0.71591382904795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 -DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10904717445373535 s -INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.11102581024169922 s -DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=638280569979463467694848177759158162, time:1750767022.1253746s req_ids:[8] -DEBUG 06-24 20:10:22 [manager.py:391] -ERROR 06-24 20:10:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:21 lightllm_req_id:8 first_token_cost:211.6076946258545ms total_cost_time:211.65013313293457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7196 prompt_cache_len:5151 prompt_cache_ratio:0.7158143413007226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 -DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10704517364501953 s -INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.1090700626373291 s -DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=168891913480738567038559936616958775430, time:1750767022.3402126s req_ids:[8] -DEBUG 06-24 20:10:22 [manager.py:391] -ERROR 06-24 20:10:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 first_token_cost:212.97025680541992ms total_cost_time:213.0138874053955ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7197 prompt_cache_len:5151 prompt_cache_ratio:0.7157148812005002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 -DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10906386375427246 s -INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.11098265647888184 s -DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=267764532393415287241209309393756621666, time:1750767022.5575345s req_ids:[8] -DEBUG 06-24 20:10:22 [manager.py:391] -ERROR 06-24 20:10:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 first_token_cost:210.10589599609375ms total_cost_time:210.14928817749023ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7198 prompt_cache_len:5151 prompt_cache_ratio:0.7156154487357599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 -DEBUG 06-24 20:10:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:22 [manager.py:224] router recive req id 8 cost time 0.10879158973693848 s -INFO 06-24 20:10:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s -DEBUG 06-24 20:10:22 [manager.py:391] Prefill Batch: batch_id=324710147842344023793192062781897973722, time:1750767022.7708187s req_ids:[8] -DEBUG 06-24 20:10:22 [manager.py:391] -ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:22 lightllm_req_id:8 first_token_cost:375.8392333984375ms total_cost_time:375.8819103240967ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7199 prompt_cache_len:5151 prompt_cache_ratio:0.7155160438949855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 -DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10745596885681152 s -INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.1094050407409668 s -DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=302984740435381875287107172358051071348, time:1750767023.148048s req_ids:[8] -DEBUG 06-24 20:10:23 [manager.py:391] -INFO 06-24 20:10:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:10:23 [statics_utils.py:24] mean first cost: 231.49240948393353 ms -INFO 06-24 20:10:23 [statics_utils.py:24] mean per token cost: 0.09554309566870256 ms -ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:199.30553436279297ms total_cost_time:199.34821128845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7200 prompt_cache_len:5151 prompt_cache_ratio:0.7154166666666667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 -DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10734796524047852 s -INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.10925817489624023 s -DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=221237606668244531027029436085037952968, time:1750767023.3595424s req_ids:[8] -DEBUG 06-24 20:10:23 [manager.py:391] -ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:205.47747611999512ms total_cost_time:205.5225372314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7201 prompt_cache_len:5151 prompt_cache_ratio:0.7153173170393001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 -DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10808229446411133 s -INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.11000752449035645 s -DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=308580203769821789153304500767938642136, time:1750767023.5758736s req_ids:[8] -DEBUG 06-24 20:10:23 [manager.py:391] -ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:213.00649642944336ms total_cost_time:213.05036544799805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7202 prompt_cache_len:5151 prompt_cache_ratio:0.7152179950013885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 -DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10677742958068848 s -INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.1087338924407959 s -DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=180349067545523260537495373194874626496, time:1750767023.78955s req_ids:[8] -DEBUG 06-24 20:10:23 [manager.py:391] -ERROR 06-24 20:10:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:205.09982109069824ms total_cost_time:205.14249801635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7203 prompt_cache_len:5151 prompt_cache_ratio:0.7151187005414411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 -DEBUG 06-24 20:10:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:23 [manager.py:224] router recive req id 8 cost time 0.10800766944885254 s -INFO 06-24 20:10:23 [manager.py:68] detokenization recv req id 8 cost time 0.11002349853515625 s -DEBUG 06-24 20:10:23 [manager.py:391] Prefill Batch: batch_id=161657790310231586776724987055712014107, time:1750767023.9990604s req_ids:[8] -DEBUG 06-24 20:10:23 [manager.py:391] -ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:23 lightllm_req_id:8 first_token_cost:201.89762115478516ms total_cost_time:201.94196701049805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7204 prompt_cache_len:5151 prompt_cache_ratio:0.7150194336479734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 -DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.1097569465637207 s -INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.11174345016479492 s -DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=162933438657109837295909713186048139132, time:1750767024.212169s req_ids:[8] -DEBUG 06-24 20:10:24 [manager.py:391] -ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:212.28289604187012ms total_cost_time:212.33057975769043ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7205 prompt_cache_len:5151 prompt_cache_ratio:0.7149201943095073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 -DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.10838007926940918 s -INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.11041665077209473 s -DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=118792546758758780161445794108205256710, time:1750767024.4280128s req_ids:[8] -DEBUG 06-24 20:10:24 [manager.py:391] -ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:207.60178565979004ms total_cost_time:207.62872695922852ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7206 prompt_cache_len:5151 prompt_cache_ratio:0.7148209825145712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 -DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.10358405113220215 s -INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.10547900199890137 s -DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=92216390046390601306457832625247722278, time:1750767024.6502898s req_ids:[8] -DEBUG 06-24 20:10:24 [manager.py:391] -ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:215.93689918518066ms total_cost_time:215.98315238952637ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7207 prompt_cache_len:5151 prompt_cache_ratio:0.7147217982516998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 -DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:24 [manager.py:224] router recive req id 8 cost time 0.10808849334716797 s -INFO 06-24 20:10:24 [manager.py:68] detokenization recv req id 8 cost time 0.11014246940612793 s -DEBUG 06-24 20:10:24 [manager.py:391] Prefill Batch: batch_id=328501875706381556995345619339009139193, time:1750767024.8711586s req_ids:[8] -DEBUG 06-24 20:10:24 [manager.py:391] -ERROR 06-24 20:10:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:216.06063842773438ms total_cost_time:216.1116600036621ms,out_token_counter:1 mean_per_token_cost_time: 0.051021575927734375ms prompt_token_num:7208 prompt_cache_len:5151 prompt_cache_ratio:0.714622641509434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 -DEBUG 06-24 20:10:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.1073007583618164 s -INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.109222412109375 s -DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=234656382681164417105014294113369149439, time:1750767025.0868006s req_ids:[8] -DEBUG 06-24 20:10:25 [manager.py:391] -DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:24 lightllm_req_id:8 first_token_cost:202.55351066589355ms total_cost_time:202.61669158935547ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:7209 prompt_cache_len:5151 prompt_cache_ratio:0.7145235122763213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 -DEBUG 06-24 20:10:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.1071627140045166 s -INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.10919332504272461 s -DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=276193358371705551075212688797899138558, time:1750767025.2967112s req_ids:[8] -DEBUG 06-24 20:10:25 [manager.py:391] -ERROR 06-24 20:10:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 first_token_cost:204.76531982421875ms total_cost_time:204.80799674987793ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7210 prompt_cache_len:5151 prompt_cache_ratio:0.7144244105409154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 -DEBUG 06-24 20:10:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.10821151733398438 s -INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.11015629768371582 s -DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=232772307891548812685691808767956124537, time:1750767025.5086195s req_ids:[8] -DEBUG 06-24 20:10:25 [manager.py:391] -ERROR 06-24 20:10:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 first_token_cost:209.78498458862305ms total_cost_time:209.82956886291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7211 prompt_cache_len:5151 prompt_cache_ratio:0.7143253362917764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 -DEBUG 06-24 20:10:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:25 [manager.py:224] router recive req id 8 cost time 0.3096437454223633 s -INFO 06-24 20:10:25 [manager.py:68] detokenization recv req id 8 cost time 0.3116188049316406 s -DEBUG 06-24 20:10:25 [manager.py:391] Prefill Batch: batch_id=251799269907912582352124314833577177188, time:1750767025.9346156s req_ids:[8] -DEBUG 06-24 20:10:25 [manager.py:391] -ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:25 lightllm_req_id:8 first_token_cost:420.5431938171387ms total_cost_time:420.58730125427246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7212 prompt_cache_len:5151 prompt_cache_ratio:0.7142262895174709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 -DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10733985900878906 s -INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.10922908782958984 s -DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=130494591751210110622407879704260863672, time:1750767026.1518312s req_ids:[8] -DEBUG 06-24 20:10:26 [manager.py:391] -ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:205.8122158050537ms total_cost_time:205.85155487060547ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:7213 prompt_cache_len:5151 prompt_cache_ratio:0.7141272702065715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 -DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10892915725708008 s -INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.1107180118560791 s -DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=38793771621875564070348385916962534616, time:1750767026.362907s req_ids:[8] -DEBUG 06-24 20:10:26 [manager.py:391] -ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:169.37780380249023ms total_cost_time:169.4192886352539ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7214 prompt_cache_len:5151 prompt_cache_ratio:0.7140282783476574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 -DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10682797431945801 s -INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.10856986045837402 s -DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=340226059752779660504320568643710369387, time:1750767026.5366027s req_ids:[8] -DEBUG 06-24 20:10:26 [manager.py:391] -DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:162.08958625793457ms total_cost_time:162.13417053222656ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7215 prompt_cache_len:5151 prompt_cache_ratio:0.7139293139293139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 -DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10839653015136719 s -INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.11039376258850098 s -DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=212537708616635262251282615363484367122, time:1750767026.7013197s req_ids:[8] -DEBUG 06-24 20:10:26 [manager.py:391] -ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:192.80719757080078ms total_cost_time:192.85321235656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7216 prompt_cache_len:5151 prompt_cache_ratio:0.7138303769401331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 -DEBUG 06-24 20:10:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:26 [manager.py:224] router recive req id 8 cost time 0.10718154907226562 s -INFO 06-24 20:10:26 [manager.py:68] detokenization recv req id 8 cost time 0.10921978950500488 s -DEBUG 06-24 20:10:26 [manager.py:391] Prefill Batch: batch_id=218654729717630473533709349939016141917, time:1750767026.9042196s req_ids:[8] -DEBUG 06-24 20:10:26 [manager.py:391] -DEBUG 06-24 20:10:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 31576.075 tokens/s -DEBUG 06-24 20:10:26 [stats.py:37] Avg prompt tokens throughput: 31567.301 tokens/s -DEBUG 06-24 20:10:26 [stats.py:37] Avg generate tokens throughput: 8.774 tokens/s -ERROR 06-24 20:10:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:202.26526260375977ms total_cost_time:202.31008529663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7217 prompt_cache_len:5151 prompt_cache_ratio:0.7137314673687127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 -DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.10702991485595703 s -INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.10892105102539062 s -DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=211973182059193687553247121749063789130, time:1750767027.1158285s req_ids:[8] -DEBUG 06-24 20:10:27 [manager.py:391] -ERROR 06-24 20:10:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:26 lightllm_req_id:8 first_token_cost:204.62870597839355ms total_cost_time:204.67233657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7218 prompt_cache_len:5151 prompt_cache_ratio:0.7136325852036576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 -DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.10733699798583984 s -INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.10929393768310547 s -DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=298911771670311815287180430790603675105, time:1750767027.324702s req_ids:[8] -DEBUG 06-24 20:10:27 [manager.py:391] -ERROR 06-24 20:10:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 first_token_cost:202.7270793914795ms total_cost_time:202.76975631713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7219 prompt_cache_len:5151 prompt_cache_ratio:0.713533730433578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 -DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.1071012020111084 s -INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.1091609001159668 s -DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=23628964565569154186533181962007594750, time:1750767027.53783s req_ids:[8] -DEBUG 06-24 20:10:27 [manager.py:391] -ERROR 06-24 20:10:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 first_token_cost:207.31067657470703ms total_cost_time:207.35406875610352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7220 prompt_cache_len:5151 prompt_cache_ratio:0.7134349030470915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 -DEBUG 06-24 20:10:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:27 [manager.py:224] router recive req id 8 cost time 0.1074671745300293 s -INFO 06-24 20:10:27 [manager.py:68] detokenization recv req id 8 cost time 0.1095113754272461 s -DEBUG 06-24 20:10:27 [manager.py:391] Prefill Batch: batch_id=119896601454468249564775799677033631108, time:1750767027.750543s req_ids:[8] -DEBUG 06-24 20:10:27 [manager.py:391] -ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:27 lightllm_req_id:8 first_token_cost:380.95760345458984ms total_cost_time:381.00266456604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7221 prompt_cache_len:5151 prompt_cache_ratio:0.7133361030328209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 -DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10892295837402344 s -INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.1109170913696289 s -DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=93410540193450095456408940389700922066, time:1750767028.130421s req_ids:[8] -DEBUG 06-24 20:10:28 [manager.py:391] -ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:202.62813568115234ms total_cost_time:202.67271995544434ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7222 prompt_cache_len:5151 prompt_cache_ratio:0.7132373303793963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 -DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10788846015930176 s -INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.10988640785217285 s -DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=39464193718203905937358236735442189758, time:1750767028.3447165s req_ids:[8] -DEBUG 06-24 20:10:28 [manager.py:391] -ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:201.65252685546875ms total_cost_time:201.69615745544434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7223 prompt_cache_len:5151 prompt_cache_ratio:0.7131385850754534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 -DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10877466201782227 s -INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.11069393157958984 s -DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=122662691721303781997121285494684971650, time:1750767028.5568492s req_ids:[8] -DEBUG 06-24 20:10:28 [manager.py:391] -ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:210.05582809448242ms total_cost_time:210.0989818572998ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7224 prompt_cache_len:5151 prompt_cache_ratio:0.7130398671096345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 -DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s -INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.11002564430236816 s -DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=15729667685554205224443925442438866255, time:1750767028.7681086s req_ids:[8] -DEBUG 06-24 20:10:28 [manager.py:391] -ERROR 06-24 20:10:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:201.97248458862305ms total_cost_time:202.01945304870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7225 prompt_cache_len:5151 prompt_cache_ratio:0.7129411764705882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 -DEBUG 06-24 20:10:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:28 [manager.py:224] router recive req id 8 cost time 0.1078035831451416 s -INFO 06-24 20:10:28 [manager.py:68] detokenization recv req id 8 cost time 0.10998249053955078 s -DEBUG 06-24 20:10:28 [manager.py:391] Prefill Batch: batch_id=157369194917989399585955947836580024713, time:1750767028.975092s req_ids:[8] -DEBUG 06-24 20:10:28 [manager.py:391] -ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:28 lightllm_req_id:8 first_token_cost:205.43384552001953ms total_cost_time:205.4765224456787ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7226 prompt_cache_len:5151 prompt_cache_ratio:0.7128425131469692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 -DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:29 [batch.py:51] router release req id 8 -INFO 06-24 20:10:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s -INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.1091303825378418 s -DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=124855075529052620064855014365017169230, time:1750767029.1848948s req_ids:[8] -DEBUG 06-24 20:10:29 [manager.py:391] -ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:206.38346672058105ms total_cost_time:206.42638206481934ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7227 prompt_cache_len:5151 prompt_cache_ratio:0.7127438771274388 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 -DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.10744881629943848 s -INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.1095266342163086 s -DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=324192866249057640123247515126778826469, time:1750767029.3982937s req_ids:[8] -DEBUG 06-24 20:10:29 [manager.py:391] -ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:200.80208778381348ms total_cost_time:200.8492946624756ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7228 prompt_cache_len:5151 prompt_cache_ratio:0.7126452684006641 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 -DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.10861611366271973 s -INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.11068964004516602 s -DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=180048252551783266138792039764171005669, time:1750767029.6094892s req_ids:[8] -DEBUG 06-24 20:10:29 [manager.py:391] -ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:211.83037757873535ms total_cost_time:211.87639236450195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7229 prompt_cache_len:5151 prompt_cache_ratio:0.7125466869553189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 -DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:29 [manager.py:224] router recive req id 8 cost time 0.1069951057434082 s -INFO 06-24 20:10:29 [manager.py:68] detokenization recv req id 8 cost time 0.10897970199584961 s -DEBUG 06-24 20:10:29 [manager.py:391] Prefill Batch: batch_id=318325686242876287528242902171023528252, time:1750767029.8241057s req_ids:[8] -DEBUG 06-24 20:10:29 [manager.py:391] -ERROR 06-24 20:10:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:204.84423637390137ms total_cost_time:204.89001274108887ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7230 prompt_cache_len:5151 prompt_cache_ratio:0.712448132780083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 -DEBUG 06-24 20:10:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.10920882225036621 s -INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.11115670204162598 s -DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=324494310953011021023746819849373659221, time:1750767030.0353692s req_ids:[8] -DEBUG 06-24 20:10:30 [manager.py:391] -ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:29 lightllm_req_id:8 first_token_cost:205.01279830932617ms total_cost_time:205.05881309509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7231 prompt_cache_len:5151 prompt_cache_ratio:0.7123496058636426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 -DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.10704183578491211 s -INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.10891246795654297 s -DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=267027640250509903135018762451620827533, time:1750767030.2468457s req_ids:[8] -DEBUG 06-24 20:10:30 [manager.py:391] -ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:383.5582733154297ms total_cost_time:383.6038112640381ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7232 prompt_cache_len:5151 prompt_cache_ratio:0.7122511061946902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 -DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.1085367202758789 s -INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.1103353500366211 s -DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=162924482506875424748913019064898020679, time:1750767030.633132s req_ids:[8] -DEBUG 06-24 20:10:30 [manager.py:391] -ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:159.61050987243652ms total_cost_time:159.65652465820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7233 prompt_cache_len:5151 prompt_cache_ratio:0.7121526337619245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 -DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.10731077194213867 s -INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.10909295082092285 s -DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=186526876941831326948447572594480015686, time:1750767030.796385s req_ids:[8] -DEBUG 06-24 20:10:30 [manager.py:391] -ERROR 06-24 20:10:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:190.2024745941162ms total_cost_time:190.2477741241455ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7234 prompt_cache_len:5151 prompt_cache_ratio:0.7120541885540503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 -DEBUG 06-24 20:10:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:30 [manager.py:224] router recive req id 8 cost time 0.1077108383178711 s -INFO 06-24 20:10:30 [manager.py:68] detokenization recv req id 8 cost time 0.10975289344787598 s -DEBUG 06-24 20:10:30 [manager.py:391] Prefill Batch: batch_id=274481688101174119330160124376803015988, time:1750767030.9959865s req_ids:[8] -DEBUG 06-24 20:10:30 [manager.py:391] -ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:30 lightllm_req_id:8 first_token_cost:204.15091514587402ms total_cost_time:204.19812202453613ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7235 prompt_cache_len:5151 prompt_cache_ratio:0.7119557705597789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 -DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10736680030822754 s -INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s -DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=62316219555418782600933360323131736727, time:1750767031.208444s req_ids:[8] -DEBUG 06-24 20:10:31 [manager.py:391] -ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:201.07650756835938ms total_cost_time:201.12109184265137ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7236 prompt_cache_len:5151 prompt_cache_ratio:0.7118573797678275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 -DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10753369331359863 s -INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10956740379333496 s -DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=74697170739964952193704113619547784909, time:1750767031.4148755s req_ids:[8] -DEBUG 06-24 20:10:31 [manager.py:391] -ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.80036735534668ms total_cost_time:204.84375953674316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7237 prompt_cache_len:5151 prompt_cache_ratio:0.7117590161669199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 -DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10768771171569824 s -INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10961508750915527 s -DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=56741706128974825936677918356888018615, time:1750767031.6266813s req_ids:[8] -DEBUG 06-24 20:10:31 [manager.py:391] -ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.3769359588623ms total_cost_time:204.41746711730957ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:7238 prompt_cache_len:5151 prompt_cache_ratio:0.7116606797457862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 -DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:31 [manager.py:224] router recive req id 8 cost time 0.10805654525756836 s -INFO 06-24 20:10:31 [manager.py:68] detokenization recv req id 8 cost time 0.10999107360839844 s -DEBUG 06-24 20:10:31 [manager.py:391] Prefill Batch: batch_id=252434110650053846965505190618454983385, time:1750767031.8353941s req_ids:[8] -DEBUG 06-24 20:10:31 [manager.py:391] -ERROR 06-24 20:10:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.10537719726562ms total_cost_time:204.14996147155762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7239 prompt_cache_len:5151 prompt_cache_ratio:0.7115623704931621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 -DEBUG 06-24 20:10:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.10898637771606445 s -INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.1111764907836914 s -DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=22607813476252651674226409249720354387, time:1750767032.0439928s req_ids:[8] -DEBUG 06-24 20:10:32 [manager.py:391] -DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:31 lightllm_req_id:8 first_token_cost:204.19740676879883ms total_cost_time:204.2393684387207ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7240 prompt_cache_len:5151 prompt_cache_ratio:0.7114640883977901 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 -DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.10725021362304688 s -INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.109466552734375 s -DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=314179247102126788612725897522878929399, time:1750767032.2612433s req_ids:[8] -DEBUG 06-24 20:10:32 [manager.py:391] -ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:173.16007614135742ms total_cost_time:173.2020378112793ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7241 prompt_cache_len:5151 prompt_cache_ratio:0.7113658334484187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 -DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.10866951942443848 s -INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.11076235771179199 s -DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=87865614705104011906865004172318973259, time:1750767032.4333034s req_ids:[8] -DEBUG 06-24 20:10:32 [manager.py:391] -ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:198.68040084838867ms total_cost_time:198.72570037841797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7242 prompt_cache_len:5151 prompt_cache_ratio:0.7112676056338029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 -DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:32 [batch.py:51] router release req id 8 -INFO 06-24 20:10:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:32 [manager.py:224] router recive req id 8 cost time 0.1074972152709961 s -INFO 06-24 20:10:32 [manager.py:68] detokenization recv req id 8 cost time 0.10953211784362793 s -DEBUG 06-24 20:10:32 [manager.py:391] Prefill Batch: batch_id=155436571193538276875067680947187297986, time:1750767032.6370661s req_ids:[8] -DEBUG 06-24 20:10:32 [manager.py:391] -ERROR 06-24 20:10:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:369.52781677246094ms total_cost_time:369.57406997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7243 prompt_cache_len:5151 prompt_cache_ratio:0.7111694049427033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 -DEBUG 06-24 20:10:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10856461524963379 s -INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.11059951782226562 s -DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=336935784673416017800023648627878177954, time:1750767033.0074594s req_ids:[8] -DEBUG 06-24 20:10:33 [manager.py:391] -ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:32 lightllm_req_id:8 first_token_cost:197.30734825134277ms total_cost_time:197.35121726989746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7244 prompt_cache_len:5151 prompt_cache_ratio:0.7110712313638874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 -DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10713624954223633 s -INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.10920381546020508 s -DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=209369300805153890953581975562276003969, time:1750767033.2155s req_ids:[8] -DEBUG 06-24 20:10:33 [manager.py:391] -ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:200.37603378295898ms total_cost_time:200.42061805725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7245 prompt_cache_len:5151 prompt_cache_ratio:0.7109730848861283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 -DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10648012161254883 s -INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.10804009437561035 s -DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=266898182224695576309616086475220382267, time:1750767033.4206557s req_ids:[8] -DEBUG 06-24 20:10:33 [manager.py:391] -ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:203.70888710021973ms total_cost_time:203.75418663024902ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7246 prompt_cache_len:5151 prompt_cache_ratio:0.710874965498206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 -DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10617375373840332 s -INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.10814499855041504 s -DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=33333936293761564256867121031755046839, time:1750767033.6349773s req_ids:[8] -DEBUG 06-24 20:10:33 [manager.py:391] -ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:211.8818759918213ms total_cost_time:211.9009494781494ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:7247 prompt_cache_len:5151 prompt_cache_ratio:0.7107768731889057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 -DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:33 [manager.py:224] router recive req id 8 cost time 0.10917878150939941 s -INFO 06-24 20:10:33 [manager.py:68] detokenization recv req id 8 cost time 0.11110424995422363 s -DEBUG 06-24 20:10:33 [manager.py:391] Prefill Batch: batch_id=2733833664530300635195244076976798363, time:1750767033.8469546s req_ids:[8] -DEBUG 06-24 20:10:33 [manager.py:391] -ERROR 06-24 20:10:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:202.20375061035156ms total_cost_time:202.24666595458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7248 prompt_cache_len:5151 prompt_cache_ratio:0.7106788079470199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 -DEBUG 06-24 20:10:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.10806751251220703 s -INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.10985827445983887 s -DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=325979888953816957162591866982931370752, time:1750767034.0554545s req_ids:[8] -DEBUG 06-24 20:10:34 [manager.py:391] -ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:33 lightllm_req_id:8 first_token_cost:168.75863075256348ms total_cost_time:168.80178451538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7249 prompt_cache_len:5151 prompt_cache_ratio:0.7105807697613464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 -DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.10898685455322266 s -INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.11130332946777344 s -DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=321049004300940147340612428139819631718, time:1750767034.2264402s req_ids:[8] -DEBUG 06-24 20:10:34 [manager.py:391] -ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:203.00769805908203ms total_cost_time:203.05132865905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7250 prompt_cache_len:5151 prompt_cache_ratio:0.7104827586206897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 -DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.1091756820678711 s -INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.11124229431152344 s -DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=233057664364048161071206213507931870307, time:1750767034.4363675s req_ids:[8] -DEBUG 06-24 20:10:34 [manager.py:391] -ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:201.68375968933105ms total_cost_time:201.72667503356934ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7251 prompt_cache_len:5151 prompt_cache_ratio:0.7103847745138602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 -DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.10716652870178223 s -INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.10918164253234863 s -DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=164413056384362301709939298631924542218, time:1750767034.645918s req_ids:[8] -DEBUG 06-24 20:10:34 [manager.py:391] -ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:211.56811714172363ms total_cost_time:211.61341667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7252 prompt_cache_len:5151 prompt_cache_ratio:0.7102868174296746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 -DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:34 [manager.py:224] router recive req id 8 cost time 0.1080026626586914 s -INFO 06-24 20:10:34 [manager.py:68] detokenization recv req id 8 cost time 0.1099698543548584 s -DEBUG 06-24 20:10:34 [manager.py:391] Prefill Batch: batch_id=328328421806607295482736433101757707892, time:1750767034.8594172s req_ids:[8] -DEBUG 06-24 20:10:34 [manager.py:391] -ERROR 06-24 20:10:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:204.2546272277832ms total_cost_time:204.2982578277588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7253 prompt_cache_len:5151 prompt_cache_ratio:0.7101888873569557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 -DEBUG 06-24 20:10:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s -INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.11028552055358887 s -DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=69668054540230122915841868564786102137, time:1750767035.069043s req_ids:[8] -DEBUG 06-24 20:10:35 [manager.py:391] -ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:34 lightllm_req_id:8 first_token_cost:198.03547859191895ms total_cost_time:198.08053970336914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7254 prompt_cache_len:5151 prompt_cache_ratio:0.7100909842845327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 -DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.1076955795288086 s -INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.1094350814819336 s -DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=48444522966012316390466421214308243207, time:1750767035.2744632s req_ids:[8] -DEBUG 06-24 20:10:35 [manager.py:391] -ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:166.80407524108887ms total_cost_time:166.83125495910645ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:7255 prompt_cache_len:5151 prompt_cache_ratio:0.7099931082012405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 -DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.30983519554138184 s -INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.3120441436767578 s -DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=19523284449129905030255925222745505602, time:1750767035.653979s req_ids:[8] -DEBUG 06-24 20:10:35 [manager.py:391] -ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:416.34416580200195ms total_cost_time:416.38827323913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7256 prompt_cache_len:5151 prompt_cache_ratio:0.7098952590959207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 -DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:35 [manager.py:224] router recive req id 8 cost time 0.10999369621276855 s -INFO 06-24 20:10:35 [manager.py:68] detokenization recv req id 8 cost time 0.11206793785095215 s -DEBUG 06-24 20:10:35 [manager.py:391] Prefill Batch: batch_id=197646652824363134957097674590760498934, time:1750767035.8692129s req_ids:[8] -DEBUG 06-24 20:10:35 [manager.py:391] -ERROR 06-24 20:10:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:200.73699951171875ms total_cost_time:200.77943801879883ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7257 prompt_cache_len:5151 prompt_cache_ratio:0.7097974369574204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 -DEBUG 06-24 20:10:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10861921310424805 s -INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.10996055603027344 s -DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=301940679035798947702126647785207321382, time:1750767036.0899715s req_ids:[8] -DEBUG 06-24 20:10:36 [manager.py:391] -ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:35 lightllm_req_id:8 first_token_cost:219.66290473937988ms total_cost_time:219.72179412841797ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7258 prompt_cache_len:5151 prompt_cache_ratio:0.7096996417745935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 -DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10868430137634277 s -INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.11057138442993164 s -DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=226595325471622424024653024689584587024, time:1750767036.3136182s req_ids:[8] -DEBUG 06-24 20:10:36 [manager.py:391] -ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:222.46861457824707ms total_cost_time:222.51224517822266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7259 prompt_cache_len:5151 prompt_cache_ratio:0.7096018735362998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 -DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10711359977722168 s -INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.10921120643615723 s -DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=298886262915320744112737120774802138228, time:1750767036.5290208s req_ids:[8] -DEBUG 06-24 20:10:36 [manager.py:391] -ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:204.3755054473877ms total_cost_time:204.43344116210938ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:7260 prompt_cache_len:5151 prompt_cache_ratio:0.709504132231405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 -DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10895490646362305 s -INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.11085987091064453 s -DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=235150593230651662889508222522870167543, time:1750767036.7382686s req_ids:[8] -DEBUG 06-24 20:10:36 [manager.py:391] -ERROR 06-24 20:10:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:204.79750633239746ms total_cost_time:204.84328269958496ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7261 prompt_cache_len:5151 prompt_cache_ratio:0.7094064178487811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 -DEBUG 06-24 20:10:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:36 [manager.py:224] router recive req id 8 cost time 0.10806512832641602 s -INFO 06-24 20:10:36 [manager.py:68] detokenization recv req id 8 cost time 0.10982584953308105 s -DEBUG 06-24 20:10:36 [manager.py:391] Prefill Batch: batch_id=220528497006738283949075094653301708227, time:1750767036.9499292s req_ids:[8] -DEBUG 06-24 20:10:36 [manager.py:391] -DEBUG 06-24 20:10:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 32440.702 tokens/s -DEBUG 06-24 20:10:36 [stats.py:37] Avg prompt tokens throughput: 32431.743 tokens/s -DEBUG 06-24 20:10:36 [stats.py:37] Avg generate tokens throughput: 8.959 tokens/s -ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:36 lightllm_req_id:8 first_token_cost:209.8388671875ms total_cost_time:209.883451461792ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7262 prompt_cache_len:5151 prompt_cache_ratio:0.7093087303773066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 -DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.10781431198120117 s -INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.10967803001403809 s -DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=90077169022495300155589441148934207215, time:1750767037.1622255s req_ids:[8] -DEBUG 06-24 20:10:37 [manager.py:391] -ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:209.15555953979492ms total_cost_time:209.1994285583496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7263 prompt_cache_len:5151 prompt_cache_ratio:0.7092110698058653 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 -DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.1071176528930664 s -INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.10887813568115234 s -DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=243431306123721172412479796478299610021, time:1750767037.3761737s req_ids:[8] -DEBUG 06-24 20:10:37 [manager.py:391] -ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:205.63054084777832ms total_cost_time:205.67631721496582ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7264 prompt_cache_len:5151 prompt_cache_ratio:0.709113436123348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 -DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.10825252532958984 s -INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.11010313034057617 s -DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=235663764225709031458581537271945563588, time:1750767037.5893896s req_ids:[8] -DEBUG 06-24 20:10:37 [manager.py:391] -ERROR 06-24 20:10:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:379.90880012512207ms total_cost_time:379.9548149108887ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7265 prompt_cache_len:5151 prompt_cache_ratio:0.709015829318651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 -DEBUG 06-24 20:10:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:37 [manager.py:224] router recive req id 8 cost time 0.1084291934967041 s -INFO 06-24 20:10:37 [manager.py:68] detokenization recv req id 8 cost time 0.11028742790222168 s -DEBUG 06-24 20:10:37 [manager.py:391] Prefill Batch: batch_id=18099044438268323268202813297715585331, time:1750767037.9660473s req_ids:[8] -DEBUG 06-24 20:10:37 [manager.py:391] -ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:37 lightllm_req_id:8 first_token_cost:195.08767127990723ms total_cost_time:195.1448917388916ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7266 prompt_cache_len:5151 prompt_cache_ratio:0.7089182493806772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 -DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10971903800964355 s -INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.11169052124023438 s -DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=112801311629044844999252805501410288395, time:1750767038.170881s req_ids:[8] -DEBUG 06-24 20:10:38 [manager.py:391] -ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:204.33807373046875ms total_cost_time:204.38551902770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:7267 prompt_cache_len:5151 prompt_cache_ratio:0.7088206962983349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 -DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10705018043518066 s -INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.10895490646362305 s -DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=209686261940783936477428539149758498168, time:1750767038.3808196s req_ids:[8] -DEBUG 06-24 20:10:38 [manager.py:391] -ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:208.73022079467773ms total_cost_time:208.77432823181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7268 prompt_cache_len:5151 prompt_cache_ratio:0.7087231700605393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 -DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s -INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.11025071144104004 s -DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=167877870579220179736183427106828961482, time:1750767038.593764s req_ids:[8] -DEBUG 06-24 20:10:38 [manager.py:391] -ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:207.3662281036377ms total_cost_time:207.41009712219238ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7269 prompt_cache_len:5151 prompt_cache_ratio:0.7086256706562113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 -DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10982775688171387 s -INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.11152338981628418 s -DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=72788187165892404612792987463776655359, time:1750767038.805863s req_ids:[8] -DEBUG 06-24 20:10:38 [manager.py:391] -ERROR 06-24 20:10:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:169.755220413208ms total_cost_time:169.7995662689209ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7270 prompt_cache_len:5151 prompt_cache_ratio:0.7085281980742778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 -DEBUG 06-24 20:10:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:38 [manager.py:224] router recive req id 8 cost time 0.10771036148071289 s -INFO 06-24 20:10:38 [manager.py:68] detokenization recv req id 8 cost time 0.10951542854309082 s -DEBUG 06-24 20:10:38 [manager.py:391] Prefill Batch: batch_id=237640782563767769112376076906352866175, time:1750767038.9784048s req_ids:[8] -DEBUG 06-24 20:10:38 [manager.py:391] -DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:38 lightllm_req_id:8 first_token_cost:196.21515274047852ms total_cost_time:196.2599754333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7271 prompt_cache_len:5151 prompt_cache_ratio:0.7084307523036721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 -DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10825443267822266 s -INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.10993027687072754 s -DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=192314997140673783629424668319017003407, time:1750767039.1811612s req_ids:[8] -DEBUG 06-24 20:10:39 [manager.py:391] -ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:206.78997039794922ms total_cost_time:206.8338394165039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7272 prompt_cache_len:5151 prompt_cache_ratio:0.7083333333333334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 -DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10756731033325195 s -INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.10937666893005371 s -DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=48575477382155707358820825832496407423, time:1750767039.3924413s req_ids:[8] -DEBUG 06-24 20:10:39 [manager.py:391] -ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:209.03682708740234ms total_cost_time:209.08021926879883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7273 prompt_cache_len:5151 prompt_cache_ratio:0.7082359411522068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 -DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10834050178527832 s -INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.11032438278198242 s -DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=168370239596028939439786990877196622028, time:1750767039.6107357s req_ids:[8] -DEBUG 06-24 20:10:39 [manager.py:391] -ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:219.4812297821045ms total_cost_time:219.52486038208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7274 prompt_cache_len:5151 prompt_cache_ratio:0.7081385757492439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 -DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:39 [manager.py:224] router recive req id 8 cost time 0.10720968246459961 s -INFO 06-24 20:10:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096642017364502 s -DEBUG 06-24 20:10:39 [manager.py:391] Prefill Batch: batch_id=35173719214567630523763336769299016594, time:1750767039.8446443s req_ids:[8] -DEBUG 06-24 20:10:39 [manager.py:391] -ERROR 06-24 20:10:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:222.79119491577148ms total_cost_time:222.8376865386963ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7275 prompt_cache_len:5151 prompt_cache_ratio:0.708041237113402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 -DEBUG 06-24 20:10:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.10679936408996582 s -INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10865998268127441 s -DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=186037920559638541463209925243644068529, time:1750767040.0587113s req_ids:[8] -DEBUG 06-24 20:10:40 [manager.py:391] -ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:39 lightllm_req_id:8 first_token_cost:373.95310401916504ms total_cost_time:373.9800453186035ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7276 prompt_cache_len:5151 prompt_cache_ratio:0.7079439252336449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 -DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.105194091796875 s -INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10732150077819824 s -DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=160500783290146718397027439758564782031, time:1750767040.4340706s req_ids:[8] -DEBUG 06-24 20:10:40 [manager.py:391] -ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:201.94363594055176ms total_cost_time:201.96866989135742ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:7277 prompt_cache_len:5151 prompt_cache_ratio:0.7078466400989418 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 -DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.10588622093200684 s -INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10793495178222656 s -DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=26450578057135793677165423474092906501, time:1750767040.6545265s req_ids:[8] -DEBUG 06-24 20:10:40 [manager.py:391] -ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:217.17405319213867ms total_cost_time:217.19813346862793ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:7278 prompt_cache_len:5151 prompt_cache_ratio:0.7077493816982687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 -DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:40 [manager.py:224] router recive req id 8 cost time 0.1053471565246582 s -INFO 06-24 20:10:40 [manager.py:68] detokenization recv req id 8 cost time 0.10740137100219727 s -DEBUG 06-24 20:10:40 [manager.py:391] Prefill Batch: batch_id=65111602603070663856775687233915174605, time:1750767040.8696058s req_ids:[8] -DEBUG 06-24 20:10:40 [manager.py:391] -ERROR 06-24 20:10:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:209.3832492828369ms total_cost_time:209.4438076019287ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:7279 prompt_cache_len:5151 prompt_cache_ratio:0.7076521500206072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 -DEBUG 06-24 20:10:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10835027694702148 s -INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.1102135181427002 s -DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=340212487959947910105271440381458304022, time:1750767041.083428s req_ids:[8] -DEBUG 06-24 20:10:41 [manager.py:391] -ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:40 lightllm_req_id:8 first_token_cost:165.66967964172363ms total_cost_time:165.7125949859619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7280 prompt_cache_len:5151 prompt_cache_ratio:0.7075549450549451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 -DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10791921615600586 s -INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.1098477840423584 s -DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=120705658204575691334420220369801096005, time:1750767041.2537646s req_ids:[8] -DEBUG 06-24 20:10:41 [manager.py:391] -ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:199.62644577026367ms total_cost_time:199.66959953308105ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7281 prompt_cache_len:5151 prompt_cache_ratio:0.707457766790276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 -DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10724782943725586 s -INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.10930800437927246 s -DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=282924825497786259832217238419173368504, time:1750767041.460665s req_ids:[8] -DEBUG 06-24 20:10:41 [manager.py:391] -ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:205.11531829833984ms total_cost_time:205.16014099121094ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7282 prompt_cache_len:5151 prompt_cache_ratio:0.7073606152156001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 -DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10693979263305664 s -INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.10883164405822754 s -DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=103283245792934247857275274985882658373, time:1750767041.6732085s req_ids:[8] -DEBUG 06-24 20:10:41 [manager.py:391] -ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:205.39188385009766ms total_cost_time:205.45077323913574ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7283 prompt_cache_len:5151 prompt_cache_ratio:0.7072634903199231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 -DEBUG 06-24 20:10:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:41 [manager.py:224] router recive req id 8 cost time 0.10941433906555176 s -INFO 06-24 20:10:41 [manager.py:68] detokenization recv req id 8 cost time 0.11139178276062012 s -DEBUG 06-24 20:10:41 [manager.py:391] Prefill Batch: batch_id=80593836945201073127840683486257672087, time:1750767041.8846195s req_ids:[8] -DEBUG 06-24 20:10:41 [manager.py:391] -ERROR 06-24 20:10:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:200.61898231506348ms total_cost_time:200.65879821777344ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:7284 prompt_cache_len:5151 prompt_cache_ratio:0.707166392092257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 -DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.10816264152526855 s -INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.11006665229797363 s -DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=204473922872318872093972750179917253080, time:1750767042.0916972s req_ids:[8] -DEBUG 06-24 20:10:42 [manager.py:391] -ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:41 lightllm_req_id:8 first_token_cost:201.3876438140869ms total_cost_time:201.4334201812744ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7285 prompt_cache_len:5151 prompt_cache_ratio:0.7070693205216197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 -DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.10764122009277344 s -INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.10962128639221191 s -DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=128124623813229323274220139740691275408, time:1750767042.2988038s req_ids:[8] -DEBUG 06-24 20:10:42 [manager.py:391] -ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:361.8454933166504ms total_cost_time:361.9041442871094ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7286 prompt_cache_len:5151 prompt_cache_ratio:0.7069722755970355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 -DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.10896444320678711 s -INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.11142396926879883 s -DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=97926578070434944574522350007247309946, time:1750767042.664236s req_ids:[8] -DEBUG 06-24 20:10:42 [manager.py:391] -ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:195.53494453430176ms total_cost_time:195.57905197143555ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7287 prompt_cache_len:5151 prompt_cache_ratio:0.706875257307534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 -DEBUG 06-24 20:10:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:42 [manager.py:224] router recive req id 8 cost time 0.1096341609954834 s -INFO 06-24 20:10:42 [manager.py:68] detokenization recv req id 8 cost time 0.11160492897033691 s -DEBUG 06-24 20:10:42 [manager.py:391] Prefill Batch: batch_id=132158968149909586994938019318998920611, time:1750767042.876787s req_ids:[8] -DEBUG 06-24 20:10:42 [manager.py:391] -ERROR 06-24 20:10:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:211.32564544677734ms total_cost_time:211.37070655822754ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7288 prompt_cache_len:5151 prompt_cache_ratio:0.7067782656421515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 -DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10715079307556152 s -INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.1091609001159668 s -DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=138349920203990430016601246316735737527, time:1750767043.0899665s req_ids:[8] -DEBUG 06-24 20:10:43 [manager.py:391] -ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:42 lightllm_req_id:8 first_token_cost:201.6618251800537ms total_cost_time:201.7068862915039ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7289 prompt_cache_len:5151 prompt_cache_ratio:0.7066813005899301 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 -DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.1072683334350586 s -INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.1091618537902832 s -DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=124741380771814824092382270157001046431, time:1750767043.298819s req_ids:[8] -DEBUG 06-24 20:10:43 [manager.py:391] -ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:205.76190948486328ms total_cost_time:205.80530166625977ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7290 prompt_cache_len:5151 prompt_cache_ratio:0.7065843621399177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 -DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10787701606750488 s -INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.10998868942260742 s -DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=25476171138796315862199414570633814024, time:1750767043.5151877s req_ids:[8] -DEBUG 06-24 20:10:43 [manager.py:391] -ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:212.10646629333496ms total_cost_time:212.15319633483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7291 prompt_cache_len:5151 prompt_cache_ratio:0.7064874502811685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 -DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10770273208618164 s -INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.11013102531433105 s -DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=103668737836625702171761116550150663454, time:1750767043.7285688s req_ids:[8] -DEBUG 06-24 20:10:43 [manager.py:391] -ERROR 06-24 20:10:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:208.526611328125ms total_cost_time:208.5702419281006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7292 prompt_cache_len:5151 prompt_cache_ratio:0.7063905650027428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 -DEBUG 06-24 20:10:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:43 [manager.py:224] router recive req id 8 cost time 0.10764503479003906 s -INFO 06-24 20:10:43 [manager.py:68] detokenization recv req id 8 cost time 0.11005091667175293 s -DEBUG 06-24 20:10:43 [manager.py:391] Prefill Batch: batch_id=85909545883973092041015216591173295903, time:1750767043.9410982s req_ids:[8] -DEBUG 06-24 20:10:43 [manager.py:391] -ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:43 lightllm_req_id:8 first_token_cost:206.86864852905273ms total_cost_time:206.91180229187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7293 prompt_cache_len:5151 prompt_cache_ratio:0.7062937062937062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 -DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.11012911796569824 s -INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.11256933212280273 s -DEBUG 06-24 20:10:44 [manager.py:391] Prefill Batch: batch_id=142901869390163619075359529293124477504, time:1750767044.1521049s req_ids:[8] -DEBUG 06-24 20:10:44 [manager.py:391] -ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:204.4088840484619ms total_cost_time:204.46443557739258ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:7294 prompt_cache_len:5151 prompt_cache_ratio:0.7061968741431314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 -DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.1071171760559082 s -INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.10947155952453613 s -DEBUG 06-24 20:10:44 [manager.py:391] Prefill Batch: batch_id=281543898630429601221932504852734354682, time:1750767044.3683124s req_ids:[8] -DEBUG 06-24 20:10:44 [manager.py:391] -ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:222.33104705810547ms total_cost_time:222.39017486572266ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7295 prompt_cache_len:5151 prompt_cache_ratio:0.706100068540096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 -DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.10967874526977539 s -INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.11212897300720215 s -DEBUG 06-24 20:10:44 [manager.py:391] Prefill Batch: batch_id=119883666103027908705969322485426229156, time:1750767044.5854666s req_ids:[8] -DEBUG 06-24 20:10:44 [manager.py:391] -DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:201.3082504272461ms total_cost_time:201.35188102722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7296 prompt_cache_len:5151 prompt_cache_ratio:0.7060032894736842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 -DEBUG 06-24 20:10:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:44 [manager.py:224] router recive req id 8 cost time 0.3094611167907715 s -INFO 06-24 20:10:44 [manager.py:68] detokenization recv req id 8 cost time 0.3120291233062744 s -DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=194823158417543623527934530994831563619, time:1750767045.0140455s req_ids:[8] -DEBUG 06-24 20:10:45 [manager.py:391] -ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:44 lightllm_req_id:8 first_token_cost:429.87537384033203ms total_cost_time:429.9194812774658ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7297 prompt_cache_len:5151 prompt_cache_ratio:0.7059065369329861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 -DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10968494415283203 s -INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.11201214790344238 s -DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=233477900293633246251300850828977870970, time:1750767045.2294223s req_ids:[8] -DEBUG 06-24 20:10:45 [manager.py:391] -ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:208.8766098022461ms total_cost_time:208.92047882080078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7298 prompt_cache_len:5151 prompt_cache_ratio:0.7058098109070978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 -DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10767269134521484 s -INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.10997438430786133 s -DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=62271928968247115429868126046498150319, time:1750767045.441696s req_ids:[8] -DEBUG 06-24 20:10:45 [manager.py:391] -ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:209.68341827392578ms total_cost_time:209.74206924438477ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7299 prompt_cache_len:5151 prompt_cache_ratio:0.7057131113851213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 -DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10932350158691406 s -INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.11162996292114258 s -DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=283890927453082896090528697948699020209, time:1750767045.654972s req_ids:[8] -DEBUG 06-24 20:10:45 [manager.py:391] -ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:205.7938575744629ms total_cost_time:205.83701133728027ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7300 prompt_cache_len:5151 prompt_cache_ratio:0.7056164383561644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 -DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:45 [manager.py:224] router recive req id 8 cost time 0.10733652114868164 s -INFO 06-24 20:10:45 [manager.py:68] detokenization recv req id 8 cost time 0.10981154441833496 s -DEBUG 06-24 20:10:45 [manager.py:391] Prefill Batch: batch_id=37450076678544449488050399548789079055, time:1750767045.8679988s req_ids:[8] -DEBUG 06-24 20:10:45 [manager.py:391] -ERROR 06-24 20:10:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:208.04476737976074ms total_cost_time:208.08911323547363ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7301 prompt_cache_len:5151 prompt_cache_ratio:0.7055197918093412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 -DEBUG 06-24 20:10:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.1076059341430664 s -INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.10993838310241699 s -DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=214245342262997392794930319917498776295, time:1750767046.080606s req_ids:[8] -DEBUG 06-24 20:10:46 [manager.py:391] -DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:45 lightllm_req_id:8 first_token_cost:206.91800117492676ms total_cost_time:206.9406509399414ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:7302 prompt_cache_len:5151 prompt_cache_ratio:0.7054231717337716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 -DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10524439811706543 s -INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.10753488540649414 s -DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=101718473352239467030873801629077513741, time:1750767046.2934191s req_ids:[8] -DEBUG 06-24 20:10:46 [manager.py:391] -ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:211.2421989440918ms total_cost_time:211.28582954406738ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7303 prompt_cache_len:5151 prompt_cache_ratio:0.7053265781185815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 -INFO 06-24 20:10:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10908842086791992 s -INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.11162447929382324 s -DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=203044898641526475654016607069188912737, time:1750767046.5072513s req_ids:[8] -DEBUG 06-24 20:10:46 [manager.py:391] -ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:209.32650566101074ms total_cost_time:209.3665599822998ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:7304 prompt_cache_len:5151 prompt_cache_ratio:0.7052300109529025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 -DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10796213150024414 s -INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999488830566406 s -INFO 06-24 20:10:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=32340213592411030954585997555498175887, time:1750767046.7198253s req_ids:[8] -DEBUG 06-24 20:10:46 [manager.py:391] -ERROR 06-24 20:10:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:203.83477210998535ms total_cost_time:203.89318466186523ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:7305 prompt_cache_len:5151 prompt_cache_ratio:0.7051334702258727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 -DEBUG 06-24 20:10:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:46 [manager.py:224] router recive req id 8 cost time 0.10955238342285156 s -INFO 06-24 20:10:46 [manager.py:68] detokenization recv req id 8 cost time 0.11141824722290039 s -DEBUG 06-24 20:10:46 [manager.py:391] Prefill Batch: batch_id=83335548004852674011446397348800612464, time:1750767046.925753s req_ids:[8] -DEBUG 06-24 20:10:46 [manager.py:391] -ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:10:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 31888.636 tokens/s -DEBUG 06-24 20:10:47 [stats.py:37] Avg prompt tokens throughput: 31879.784 tokens/s -DEBUG 06-24 20:10:47 [stats.py:37] Avg generate tokens throughput: 8.852 tokens/s -INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:46 lightllm_req_id:8 first_token_cost:199.49865341186523ms total_cost_time:199.540376663208ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7306 prompt_cache_len:5151 prompt_cache_ratio:0.7050369559266356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 -DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.10735225677490234 s -INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.10929346084594727 s -DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=162247465693118716121961573930951560220, time:1750767047.1321516s req_ids:[8] -DEBUG 06-24 20:10:47 [manager.py:391] -ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:383.2435607910156ms total_cost_time:383.2898139953613ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7307 prompt_cache_len:5151 prompt_cache_ratio:0.7049404680443411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 -DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.10828995704650879 s -INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.1101689338684082 s -DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=222498418444786361982687246301777696453, time:1750767047.5158505s req_ids:[8] -DEBUG 06-24 20:10:47 [manager.py:391] -ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:195.90425491333008ms total_cost_time:195.95003128051758ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7308 prompt_cache_len:5151 prompt_cache_ratio:0.7048440065681445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 -DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.10785698890686035 s -INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.10981583595275879 s -DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=131810273439665120696535019073103969246, time:1750767047.7264242s req_ids:[8] -DEBUG 06-24 20:10:47 [manager.py:391] -ERROR 06-24 20:10:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:208.65941047668457ms total_cost_time:208.70256423950195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7309 prompt_cache_len:5151 prompt_cache_ratio:0.7047475714872076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 -DEBUG 06-24 20:10:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:47 [manager.py:224] router recive req id 8 cost time 0.11016201972961426 s -INFO 06-24 20:10:47 [manager.py:68] detokenization recv req id 8 cost time 0.11211538314819336 s -DEBUG 06-24 20:10:47 [manager.py:391] Prefill Batch: batch_id=242558619468284472902352478849998760525, time:1750767047.9373255s req_ids:[8] -DEBUG 06-24 20:10:47 [manager.py:391] -ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:47 lightllm_req_id:8 first_token_cost:207.34763145446777ms total_cost_time:207.39245414733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7310 prompt_cache_len:5151 prompt_cache_ratio:0.7046511627906977 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 -DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10828900337219238 s -INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.11032509803771973 s -DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=12884510638409178235254292396741050371, time:1750767048.1501095s req_ids:[8] -DEBUG 06-24 20:10:48 [manager.py:391] -ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:204.1158676147461ms total_cost_time:204.15997505187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7311 prompt_cache_len:5151 prompt_cache_ratio:0.7045547804677883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 -DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10796117782592773 s -INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.11000251770019531 s -DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=148057922452117575294072474300219806118, time:1750767048.3592095s req_ids:[8] -DEBUG 06-24 20:10:48 [manager.py:391] -ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:203.84740829467773ms total_cost_time:203.89032363891602ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7312 prompt_cache_len:5151 prompt_cache_ratio:0.7044584245076586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 -DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10847043991088867 s -INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.11024236679077148 s -DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=172975489887196558188711160745698945670, time:1750767048.5673492s req_ids:[8] -DEBUG 06-24 20:10:48 [manager.py:391] -ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:203.25112342834473ms total_cost_time:203.2938003540039ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7313 prompt_cache_len:5151 prompt_cache_ratio:0.7043620948994941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 -DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10832405090332031 s -INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.1095724105834961 s -DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=131807397670034056073256056305535833953, time:1750767048.7788599s req_ids:[8] -DEBUG 06-24 20:10:48 [manager.py:391] -ERROR 06-24 20:10:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:200.07824897766113ms total_cost_time:200.1323699951172ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:7314 prompt_cache_len:5151 prompt_cache_ratio:0.7042657916324856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 -DEBUG 06-24 20:10:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:48 [manager.py:224] router recive req id 8 cost time 0.10791158676147461 s -INFO 06-24 20:10:48 [manager.py:68] detokenization recv req id 8 cost time 0.10936141014099121 s -DEBUG 06-24 20:10:48 [manager.py:391] Prefill Batch: batch_id=155511238821116433683993091218704466907, time:1750767048.9847224s req_ids:[8] -DEBUG 06-24 20:10:48 [manager.py:391] -ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:48 lightllm_req_id:8 first_token_cost:206.18653297424316ms total_cost_time:206.22920989990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7315 prompt_cache_len:5151 prompt_cache_ratio:0.7041695146958304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 -DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:49 [manager.py:224] router recive req id 8 cost time 0.11001396179199219 s -INFO 06-24 20:10:49 [manager.py:68] detokenization recv req id 8 cost time 0.11113762855529785 s -DEBUG 06-24 20:10:49 [manager.py:391] Prefill Batch: batch_id=214104080320373597442255604445414321819, time:1750767049.1997285s req_ids:[8] -DEBUG 06-24 20:10:49 [manager.py:391] -ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:205.89780807495117ms total_cost_time:205.93762397766113ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:7316 prompt_cache_len:5151 prompt_cache_ratio:0.7040732640787315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 -DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:49 [manager.py:224] router recive req id 8 cost time 0.10821032524108887 s -INFO 06-24 20:10:49 [manager.py:68] detokenization recv req id 8 cost time 0.10951757431030273 s -DEBUG 06-24 20:10:49 [manager.py:391] Prefill Batch: batch_id=216691462765695122594664936852641311230, time:1750767049.4068573s req_ids:[8] -DEBUG 06-24 20:10:49 [manager.py:391] -ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:409.5494747161865ms total_cost_time:409.5945358276367ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7317 prompt_cache_len:5151 prompt_cache_ratio:0.7039770397703977 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 -DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:49 [manager.py:224] router recive req id 8 cost time 0.10844063758850098 s -INFO 06-24 20:10:49 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s -DEBUG 06-24 20:10:49 [manager.py:391] Prefill Batch: batch_id=31662051254250471892806448142275701230, time:1750767049.8192294s req_ids:[8] -DEBUG 06-24 20:10:49 [manager.py:391] -ERROR 06-24 20:10:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:199.5689868927002ms total_cost_time:199.62096214294434ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:7318 prompt_cache_len:5151 prompt_cache_ratio:0.7038808417600437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 -DEBUG 06-24 20:10:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.1084280014038086 s -INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.10986733436584473 s -DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=335360585898198737944900427118214954570, time:1750767050.0268033s req_ids:[8] -DEBUG 06-24 20:10:50 [manager.py:391] -ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:49 lightllm_req_id:8 first_token_cost:203.83977890014648ms total_cost_time:203.88412475585938ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7319 prompt_cache_len:5151 prompt_cache_ratio:0.7037846700368903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 -DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:50 [batch.py:51] router release req id 8 -INFO 06-24 20:10:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10906481742858887 s -INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.11031866073608398 s -DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=284093857157982465514860989567850654524, time:1750767050.2383218s req_ids:[8] -DEBUG 06-24 20:10:50 [manager.py:391] -ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:205.91068267822266ms total_cost_time:205.95550537109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7320 prompt_cache_len:5151 prompt_cache_ratio:0.703688524590164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 -DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10796904563903809 s -INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.10928559303283691 s -DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=159647160188703735805016841660777369951, time:1750767050.4501858s req_ids:[8] -DEBUG 06-24 20:10:50 [manager.py:391] -ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:205.05237579345703ms total_cost_time:205.09672164916992ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7321 prompt_cache_len:5151 prompt_cache_ratio:0.7035924054090971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 -DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10866475105285645 s -INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.1103360652923584 s -DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=65021463197538769737667566934514872154, time:1750767050.6760638s req_ids:[8] -DEBUG 06-24 20:10:50 [manager.py:391] -ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:237.54262924194336ms total_cost_time:237.58649826049805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7322 prompt_cache_len:5151 prompt_cache_ratio:0.7034963124829282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 -DEBUG 06-24 20:10:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:50 [manager.py:224] router recive req id 8 cost time 0.10817289352416992 s -INFO 06-24 20:10:50 [manager.py:68] detokenization recv req id 8 cost time 0.11016368865966797 s -DEBUG 06-24 20:10:50 [manager.py:391] Prefill Batch: batch_id=204447481982750042091179497161814546802, time:1750767050.8989658s req_ids:[8] -DEBUG 06-24 20:10:50 [manager.py:391] -ERROR 06-24 20:10:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:200.20365715026855ms total_cost_time:200.2699375152588ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:7323 prompt_cache_len:5151 prompt_cache_ratio:0.7034002458009013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 -DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.11021018028259277 s -INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.1122748851776123 s -DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=279205993072441188444220509166110482086, time:1750767051.1119697s req_ids:[8] -DEBUG 06-24 20:10:51 [manager.py:391] -ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:50 lightllm_req_id:8 first_token_cost:206.6948413848877ms total_cost_time:206.73680305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7324 prompt_cache_len:5151 prompt_cache_ratio:0.7033042053522666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 -DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.10750222206115723 s -INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.10950231552124023 s -DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=22380373094516319786997426597829126667, time:1750767051.3232763s req_ids:[8] -DEBUG 06-24 20:10:51 [manager.py:391] -ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:205.83486557006836ms total_cost_time:205.87944984436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7325 prompt_cache_len:5151 prompt_cache_ratio:0.7032081911262799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 -DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.10957860946655273 s -INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.11160802841186523 s -DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=159896924075375755684379307928335316542, time:1750767051.5456898s req_ids:[8] -DEBUG 06-24 20:10:51 [manager.py:391] -ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:215.43574333190918ms total_cost_time:215.47484397888184ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:7326 prompt_cache_len:5151 prompt_cache_ratio:0.7031122031122031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 -DEBUG 06-24 20:10:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:51 [manager.py:224] router recive req id 8 cost time 0.10700154304504395 s -INFO 06-24 20:10:51 [manager.py:68] detokenization recv req id 8 cost time 0.10908865928649902 s -DEBUG 06-24 20:10:51 [manager.py:391] Prefill Batch: batch_id=270898054671327502493741152687266374370, time:1750767051.754808s req_ids:[8] -DEBUG 06-24 20:10:51 [manager.py:391] -DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:202.91876792907715ms total_cost_time:202.96406745910645ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7327 prompt_cache_len:5151 prompt_cache_ratio:0.703016241299304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 -DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.3110976219177246 s -INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.3131113052368164 s -DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=22735911283396578895209749089605901233, time:1750767052.17783s req_ids:[8] -DEBUG 06-24 20:10:52 [manager.py:391] -ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:51 lightllm_req_id:8 first_token_cost:426.2092113494873ms total_cost_time:426.2659549713135ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7328 prompt_cache_len:5151 prompt_cache_ratio:0.7029203056768559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 -DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.10930109024047852 s -INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.11132264137268066 s -DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=323241569393630522845609790233088084645, time:1750767052.3975043s req_ids:[8] -DEBUG 06-24 20:10:52 [manager.py:391] -ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:205.52444458007812ms total_cost_time:205.5683135986328ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7329 prompt_cache_len:5151 prompt_cache_ratio:0.7028243962341384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 -DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.10826683044433594 s -INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.11028647422790527 s -DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=173376044225472621086365261218701864554, time:1750767052.6120102s req_ids:[8] -DEBUG 06-24 20:10:52 [manager.py:391] -ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:205.8084011077881ms total_cost_time:205.8548927307129ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7330 prompt_cache_len:5151 prompt_cache_ratio:0.7027285129604366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 -DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:52 [manager.py:224] router recive req id 8 cost time 0.10857319831848145 s -INFO 06-24 20:10:52 [manager.py:68] detokenization recv req id 8 cost time 0.11063051223754883 s -DEBUG 06-24 20:10:52 [manager.py:391] Prefill Batch: batch_id=247706796193667989818515651279577926631, time:1750767052.8245676s req_ids:[8] -DEBUG 06-24 20:10:52 [manager.py:391] -ERROR 06-24 20:10:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:205.21283149719238ms total_cost_time:205.25646209716797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7331 prompt_cache_len:5151 prompt_cache_ratio:0.7026326558450416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 -DEBUG 06-24 20:10:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10773444175720215 s -INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.10974884033203125 s -DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=303874733997143244583882110787305815590, time:1750767053.0342126s req_ids:[8] -DEBUG 06-24 20:10:53 [manager.py:391] -ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:52 lightllm_req_id:8 first_token_cost:206.7413330078125ms total_cost_time:206.7859172821045ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7332 prompt_cache_len:5151 prompt_cache_ratio:0.7025368248772504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 -DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:53 [batch.py:51] router release req id 8 -INFO 06-24 20:10:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:10:53 [statics_utils.py:24] mean first cost: 230.9962014731095 ms -INFO 06-24 20:10:53 [statics_utils.py:24] mean per token cost: 0.09323150455453197 ms -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10877418518066406 s -INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.11076951026916504 s -INFO 06-24 20:10:53 [manager.py:620] left req id 8can release False refcount 3 -DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=80566298018534670368675466917469600892, time:1750767053.2463005s req_ids:[8] -DEBUG 06-24 20:10:53 [manager.py:391] -DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:205.5346965789795ms total_cost_time:205.59954643249512ms,out_token_counter:1 mean_per_token_cost_time: 0.064849853515625ms prompt_token_num:7333 prompt_cache_len:5151 prompt_cache_ratio:0.7024410200463658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 -DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10708403587341309 s -INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.1089775562286377 s -DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=317839866243944364860789863817722667801, time:1750767053.4598207s req_ids:[8] -DEBUG 06-24 20:10:53 [manager.py:391] -ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:208.9989185333252ms total_cost_time:209.04207229614258ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7334 prompt_cache_len:5151 prompt_cache_ratio:0.7023452413416962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 -DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.10699081420898438 s -INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.10904383659362793 s -DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=151879409406905662584552354619321757026, time:1750767053.6736982s req_ids:[8] -DEBUG 06-24 20:10:53 [manager.py:391] -ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:221.76122665405273ms total_cost_time:221.81987762451172ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7335 prompt_cache_len:5151 prompt_cache_ratio:0.7022494887525562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 -DEBUG 06-24 20:10:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:53 [manager.py:224] router recive req id 8 cost time 0.1093897819519043 s -INFO 06-24 20:10:53 [manager.py:68] detokenization recv req id 8 cost time 0.11072707176208496 s -DEBUG 06-24 20:10:53 [manager.py:391] Prefill Batch: batch_id=57716537177508755309603139784448212464, time:1750767053.9091198s req_ids:[8] -DEBUG 06-24 20:10:53 [manager.py:391] -ERROR 06-24 20:10:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:53 lightllm_req_id:8 first_token_cost:218.83416175842285ms total_cost_time:218.87993812561035ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7336 prompt_cache_len:5151 prompt_cache_ratio:0.7021537622682661 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 -DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s -INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.11016392707824707 s -DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=131112938989285941164537059457064845759, time:1750767054.1251578s req_ids:[8] -DEBUG 06-24 20:10:54 [manager.py:391] -ERROR 06-24 20:10:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:209.9928855895996ms total_cost_time:210.0374698638916ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7337 prompt_cache_len:5151 prompt_cache_ratio:0.7020580618781518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 -DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.10692691802978516 s -INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.1094052791595459 s -DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=212613693472918364232639730004469037145, time:1750767054.3402786s req_ids:[8] -DEBUG 06-24 20:10:54 [manager.py:391] -ERROR 06-24 20:10:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:378.12018394470215ms total_cost_time:378.16357612609863ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7338 prompt_cache_len:5151 prompt_cache_ratio:0.7019623875715454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 -DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.10837459564208984 s -INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.11049795150756836 s -DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=1179205756127142338216323444751613379, time:1750767054.7193651s req_ids:[8] -DEBUG 06-24 20:10:54 [manager.py:391] -ERROR 06-24 20:10:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:202.06236839294434ms total_cost_time:202.10623741149902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7339 prompt_cache_len:5151 prompt_cache_ratio:0.7018667393377844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 -DEBUG 06-24 20:10:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:54 [manager.py:224] router recive req id 8 cost time 0.10769081115722656 s -INFO 06-24 20:10:54 [manager.py:68] detokenization recv req id 8 cost time 0.10984110832214355 s -DEBUG 06-24 20:10:54 [manager.py:391] Prefill Batch: batch_id=71641422852253229981606777297116572436, time:1750767054.9300795s req_ids:[8] -DEBUG 06-24 20:10:54 [manager.py:391] -ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:54 lightllm_req_id:8 first_token_cost:205.78813552856445ms total_cost_time:205.84583282470703ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:7340 prompt_cache_len:5151 prompt_cache_ratio:0.7017711171662125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 -DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10889339447021484 s -INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11090278625488281 s -DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=201054428637749086976384757490202543929, time:1750767055.1429398s req_ids:[8] -DEBUG 06-24 20:10:55 [manager.py:391] -ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.05213737487793ms total_cost_time:205.0952911376953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7341 prompt_cache_len:5151 prompt_cache_ratio:0.701675521046179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 -DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10735297203063965 s -INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.10945558547973633 s -DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=136333656841176072696256853514154589337, time:1750767055.3535612s req_ids:[8] -DEBUG 06-24 20:10:55 [manager.py:391] -ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.2309513092041ms total_cost_time:205.28507232666016ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:7342 prompt_cache_len:5151 prompt_cache_ratio:0.701579950967039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 -DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s -INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11039948463439941 s -DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=158370428512695886902722263078567266670, time:1750767055.5645573s req_ids:[8] -DEBUG 06-24 20:10:55 [manager.py:391] -ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.6751251220703ms total_cost_time:205.71613311767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:7343 prompt_cache_len:5151 prompt_cache_ratio:0.7014844069181534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 -DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.10776782035827637 s -INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11047887802124023 s -DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=202473020901166999540914998573824548304, time:1750767055.776688s req_ids:[8] -DEBUG 06-24 20:10:55 [manager.py:391] -ERROR 06-24 20:10:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:208.9860439300537ms total_cost_time:209.03873443603516ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7344 prompt_cache_len:5151 prompt_cache_ratio:0.7013888888888888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 -DEBUG 06-24 20:10:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:55 [manager.py:224] router recive req id 8 cost time 0.1083371639251709 s -INFO 06-24 20:10:55 [manager.py:68] detokenization recv req id 8 cost time 0.11045527458190918 s -DEBUG 06-24 20:10:55 [manager.py:391] Prefill Batch: batch_id=210499403461884205756609329565684921909, time:1750767055.9909363s req_ids:[8] -DEBUG 06-24 20:10:55 [manager.py:391] -ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:55 lightllm_req_id:8 first_token_cost:205.78861236572266ms total_cost_time:205.83295822143555ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7345 prompt_cache_len:5151 prompt_cache_ratio:0.7012933968686181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 -DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:56 [manager.py:224] router recive req id 8 cost time 0.10727882385253906 s -INFO 06-24 20:10:56 [manager.py:68] detokenization recv req id 8 cost time 0.10935759544372559 s -DEBUG 06-24 20:10:56 [manager.py:391] Prefill Batch: batch_id=214992066548604897783592603758629816394, time:1750767056.2052305s req_ids:[8] -DEBUG 06-24 20:10:56 [manager.py:391] -ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:209.97166633605957ms total_cost_time:210.01577377319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7346 prompt_cache_len:5151 prompt_cache_ratio:0.7011979308467193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 -DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:56 [manager.py:224] router recive req id 8 cost time 0.10955357551574707 s -INFO 06-24 20:10:56 [manager.py:68] detokenization recv req id 8 cost time 0.11095857620239258 s -DEBUG 06-24 20:10:56 [manager.py:391] Prefill Batch: batch_id=252496551072470857929874306331235812320, time:1750767056.4200165s req_ids:[8] -DEBUG 06-24 20:10:56 [manager.py:391] -ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:207.57436752319336ms total_cost_time:207.61775970458984ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7347 prompt_cache_len:5151 prompt_cache_ratio:0.7011024908125766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 -DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:56 [manager.py:224] router recive req id 8 cost time 0.1080775260925293 s -INFO 06-24 20:10:56 [manager.py:68] detokenization recv req id 8 cost time 0.11006593704223633 s -DEBUG 06-24 20:10:56 [manager.py:391] Prefill Batch: batch_id=83842210521168743064324428769028079487, time:1750767056.631914s req_ids:[8] -DEBUG 06-24 20:10:56 [manager.py:391] -ERROR 06-24 20:10:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:382.60769844055176ms total_cost_time:382.65275955200195ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7348 prompt_cache_len:5151 prompt_cache_ratio:0.7010070767555797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 -DEBUG 06-24 20:10:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s -INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10955929756164551 s -DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=128429492537027966226609494702577465483, time:1750767057.0157988s req_ids:[8] -DEBUG 06-24 20:10:57 [manager.py:391] -DEBUG 06-24 20:10:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 31481.431 tokens/s -DEBUG 06-24 20:10:57 [stats.py:37] Avg prompt tokens throughput: 31472.941 tokens/s -DEBUG 06-24 20:10:57 [stats.py:37] Avg generate tokens throughput: 8.490 tokens/s -ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:56 lightllm_req_id:8 first_token_cost:200.7002830505371ms total_cost_time:200.7436752319336ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7349 prompt_cache_len:5151 prompt_cache_ratio:0.7009116886651245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 -DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10685420036315918 s -INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10940146446228027 s -DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=288418038020713596785352609430658875254, time:1750767057.2284973s req_ids:[8] -DEBUG 06-24 20:10:57 [manager.py:391] -ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:208.94718170166016ms total_cost_time:208.99200439453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7350 prompt_cache_len:5151 prompt_cache_ratio:0.7008163265306122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 -DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10704827308654785 s -INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10893797874450684 s -DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=148478776457784561871096967559142469882, time:1750767057.4491668s req_ids:[8] -DEBUG 06-24 20:10:57 [manager.py:391] -ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:216.23945236206055ms total_cost_time:216.28308296203613ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7351 prompt_cache_len:5151 prompt_cache_ratio:0.7007209903414502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 -DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10738039016723633 s -INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10949850082397461 s -DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=194783267836401790684942073979424805220, time:1750767057.6631773s req_ids:[8] -DEBUG 06-24 20:10:57 [manager.py:391] -ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:201.64895057678223ms total_cost_time:201.6913890838623ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7352 prompt_cache_len:5151 prompt_cache_ratio:0.7006256800870512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 -DEBUG 06-24 20:10:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:57 [manager.py:224] router recive req id 8 cost time 0.10724782943725586 s -INFO 06-24 20:10:57 [manager.py:68] detokenization recv req id 8 cost time 0.10930109024047852 s -DEBUG 06-24 20:10:57 [manager.py:391] Prefill Batch: batch_id=321620402646718828637577748429759378866, time:1750767057.87115s req_ids:[8] -DEBUG 06-24 20:10:57 [manager.py:391] -ERROR 06-24 20:10:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:205.9316635131836ms total_cost_time:205.9946060180664ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:7353 prompt_cache_len:5151 prompt_cache_ratio:0.700530395756834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 -DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s -INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.10997700691223145 s -DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=1813075882591651542096087047651490230, time:1750767058.0976398s req_ids:[8] -DEBUG 06-24 20:10:58 [manager.py:391] -ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:57 lightllm_req_id:8 first_token_cost:221.12226486206055ms total_cost_time:221.16613388061523ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7354 prompt_cache_len:5151 prompt_cache_ratio:0.700435137340223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 -DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.11002707481384277 s -INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.11221837997436523 s -DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=331777170448266398232102119832378698544, time:1750767058.311498s req_ids:[8] -DEBUG 06-24 20:10:58 [manager.py:391] -ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:205.93929290771484ms total_cost_time:205.98554611206055ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7355 prompt_cache_len:5151 prompt_cache_ratio:0.7003399048266485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 -DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.10786080360412598 s -INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s -DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=103223605056089905235631302160333488042, time:1750767058.5231457s req_ids:[8] -DEBUG 06-24 20:10:58 [manager.py:391] -ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:203.97138595581055ms total_cost_time:204.02765274047852ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7356 prompt_cache_len:5151 prompt_cache_ratio:0.7002446982055465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 -DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.1081852912902832 s -INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.11012864112854004 s -DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=182632211483088519975840098218018721385, time:1750767058.7347765s req_ids:[8] -DEBUG 06-24 20:10:58 [manager.py:391] -ERROR 06-24 20:10:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:204.68759536743164ms total_cost_time:204.73241806030273ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7357 prompt_cache_len:5151 prompt_cache_ratio:0.7001495174663586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 -DEBUG 06-24 20:10:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:58 [manager.py:224] router recive req id 8 cost time 0.10700583457946777 s -INFO 06-24 20:10:58 [manager.py:68] detokenization recv req id 8 cost time 0.10902738571166992 s -DEBUG 06-24 20:10:58 [manager.py:391] Prefill Batch: batch_id=216356244524370284264384879171389254477, time:1750767058.9457476s req_ids:[8] -DEBUG 06-24 20:10:58 [manager.py:391] -DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:10:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:58 lightllm_req_id:8 first_token_cost:219.1781997680664ms total_cost_time:219.2220687866211ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7358 prompt_cache_len:5151 prompt_cache_ratio:0.7000543625985323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 -DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:59 [manager.py:224] router recive req id 8 cost time 0.30957484245300293 s -INFO 06-24 20:10:59 [manager.py:68] detokenization recv req id 8 cost time 0.3116602897644043 s -DEBUG 06-24 20:10:59 [manager.py:391] Prefill Batch: batch_id=2264642607349805404401190042475125373, time:1750767059.3760958s req_ids:[8] -DEBUG 06-24 20:10:59 [manager.py:391] -ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:417.2382354736328ms total_cost_time:417.283296585083ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7359 prompt_cache_len:5151 prompt_cache_ratio:0.6999592335915206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 -DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:59 [manager.py:224] router recive req id 8 cost time 0.10847735404968262 s -INFO 06-24 20:10:59 [manager.py:68] detokenization recv req id 8 cost time 0.11086463928222656 s -DEBUG 06-24 20:10:59 [manager.py:391] Prefill Batch: batch_id=66519183249440458341463583743828010204, time:1750767059.592086s req_ids:[8] -DEBUG 06-24 20:10:59 [manager.py:391] -ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:206.94756507873535ms total_cost_time:206.99429512023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7360 prompt_cache_len:5151 prompt_cache_ratio:0.6998641304347826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 -DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:10:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:10:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:10:59 [manager.py:224] router recive req id 8 cost time 0.10973572731018066 s -INFO 06-24 20:10:59 [manager.py:68] detokenization recv req id 8 cost time 0.11189842224121094 s -DEBUG 06-24 20:10:59 [manager.py:391] Prefill Batch: batch_id=128010648796410410069244495630260560290, time:1750767059.8066742s req_ids:[8] -DEBUG 06-24 20:10:59 [manager.py:391] -ERROR 06-24 20:10:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:10:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:10:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:206.68530464172363ms total_cost_time:206.73155784606934ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7361 prompt_cache_len:5151 prompt_cache_ratio:0.6997690531177829 mtp_avg_token_per_step:1.0 -INFO 06-24 20:10:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 -DEBUG 06-24 20:10:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:10:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:10:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:10:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:10:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.1090555191040039 s -INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s -DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=73649524278685635958343730194231164109, time:1750767060.0386567s req_ids:[8] -DEBUG 06-24 20:11:00 [manager.py:391] -ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:10:59 lightllm_req_id:8 first_token_cost:225.81052780151367ms total_cost_time:225.86822509765625ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:7362 prompt_cache_len:5151 prompt_cache_ratio:0.6996740016299918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 -DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.11015439033508301 s -INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.11232709884643555 s -DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=83706735499495914136268620612917560818, time:1750767060.2527764s req_ids:[8] -DEBUG 06-24 20:11:00 [manager.py:391] -ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:213.3166790008545ms total_cost_time:213.3643627166748ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7363 prompt_cache_len:5151 prompt_cache_ratio:0.6995789759608855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 -DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.10936665534973145 s -INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.11142969131469727 s -DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=284050316983779356658319656710424368611, time:1750767060.4682941s req_ids:[8] -DEBUG 06-24 20:11:00 [manager.py:391] -DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:200.6206512451172ms total_cost_time:200.66237449645996ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7364 prompt_cache_len:5151 prompt_cache_ratio:0.6994839760999457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 -DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.10686254501342773 s -INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.10883450508117676 s -DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=19753685728978327083420076091198538540, time:1750767060.679862s req_ids:[8] -DEBUG 06-24 20:11:00 [manager.py:391] -ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:201.65491104125977ms total_cost_time:201.69782638549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7365 prompt_cache_len:5151 prompt_cache_ratio:0.6993890020366599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 -DEBUG 06-24 20:11:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:00 [batch.py:51] router release req id 8 -INFO 06-24 20:11:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:00 [manager.py:224] router recive req id 8 cost time 0.10874629020690918 s -INFO 06-24 20:11:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105797290802002 s -DEBUG 06-24 20:11:00 [manager.py:391] Prefill Batch: batch_id=49545670795472131664474338655634924967, time:1750767060.8987358s req_ids:[8] -DEBUG 06-24 20:11:00 [manager.py:391] -ERROR 06-24 20:11:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:220.58963775634766ms total_cost_time:220.63231468200684ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7366 prompt_cache_len:5151 prompt_cache_ratio:0.6992940537605213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 -DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10546112060546875 s -INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.10737824440002441 s -DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=227299348526851670516411576296245009630, time:1750767061.1127481s req_ids:[8] -DEBUG 06-24 20:11:01 [manager.py:391] -ERROR 06-24 20:11:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:00 lightllm_req_id:8 first_token_cost:204.18882369995117ms total_cost_time:204.23364639282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7367 prompt_cache_len:5151 prompt_cache_ratio:0.699199131261029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 -DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10712885856628418 s -INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.10918450355529785 s -DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=294904393198683879510940222567556944702, time:1750767061.3243184s req_ids:[8] -DEBUG 06-24 20:11:01 [manager.py:391] -ERROR 06-24 20:11:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 first_token_cost:203.16743850708008ms total_cost_time:203.21011543273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7368 prompt_cache_len:5151 prompt_cache_ratio:0.6991042345276873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 -DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10910272598266602 s -INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.11115813255310059 s -DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=329111139101046428596288774370877026745, time:1750767061.5334024s req_ids:[8] -DEBUG 06-24 20:11:01 [manager.py:391] -ERROR 06-24 20:11:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 first_token_cost:383.50439071655273ms total_cost_time:383.54945182800293ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7369 prompt_cache_len:5151 prompt_cache_ratio:0.6990093635500068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 -DEBUG 06-24 20:11:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:01 [manager.py:224] router recive req id 8 cost time 0.10748457908630371 s -INFO 06-24 20:11:01 [manager.py:68] detokenization recv req id 8 cost time 0.10957789421081543 s -DEBUG 06-24 20:11:01 [manager.py:391] Prefill Batch: batch_id=55895185674284968159937508345713491013, time:1750767061.918857s req_ids:[8] -DEBUG 06-24 20:11:01 [manager.py:391] -ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:01 lightllm_req_id:8 first_token_cost:201.11513137817383ms total_cost_time:201.16376876831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:7370 prompt_cache_len:5151 prompt_cache_ratio:0.6989145183175034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 -DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.11032509803771973 s -INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.11241269111633301 s -DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=82191193730356992513365928255088525865, time:1750767062.130072s req_ids:[8] -DEBUG 06-24 20:11:02 [manager.py:391] -ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:201.37691497802734ms total_cost_time:201.4174461364746ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:7371 prompt_cache_len:5151 prompt_cache_ratio:0.6988196988196989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 -DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10887813568115234 s -INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.11119318008422852 s -DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=138765018916252575226916705138083812411, time:1750767062.349686s req_ids:[8] -DEBUG 06-24 20:11:02 [manager.py:391] -ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:218.44983100891113ms total_cost_time:218.49322319030762ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7372 prompt_cache_len:5151 prompt_cache_ratio:0.6987249050461205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 -DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10881543159484863 s -INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.11093473434448242 s -DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=19661280138114560118248711856672004970, time:1750767062.5639071s req_ids:[8] -DEBUG 06-24 20:11:02 [manager.py:391] -ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:226.96638107299805ms total_cost_time:227.01311111450195ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7373 prompt_cache_len:5151 prompt_cache_ratio:0.6986301369863014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 -DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10653948783874512 s -INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.10851645469665527 s -DEBUG 06-24 20:11:02 [manager.py:391] Prefill Batch: batch_id=152400839019831143599401829167042997652, time:1750767062.790551s req_ids:[8] -DEBUG 06-24 20:11:02 [manager.py:391] -ERROR 06-24 20:11:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:202.80098915100098ms total_cost_time:202.84438133239746ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7374 prompt_cache_len:5151 prompt_cache_ratio:0.6985353946297803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 -DEBUG 06-24 20:11:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:02 [manager.py:224] router recive req id 8 cost time 0.10629916191101074 s -INFO 06-24 20:11:02 [manager.py:68] detokenization recv req id 8 cost time 0.10836982727050781 s -DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=111804252221161114773714917822451462088, time:1750767063.014768s req_ids:[8] -DEBUG 06-24 20:11:03 [manager.py:391] -ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:02 lightllm_req_id:8 first_token_cost:210.43944358825684ms total_cost_time:210.45780181884766ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:7375 prompt_cache_len:5151 prompt_cache_ratio:0.6984406779661017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 -DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.10505843162536621 s -INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.10683917999267578 s -DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=4638162854259765252694369501378054258, time:1750767063.2233026s req_ids:[8] -DEBUG 06-24 20:11:03 [manager.py:391] -ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:191.82348251342773ms total_cost_time:191.87045097351074ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7376 prompt_cache_len:5151 prompt_cache_ratio:0.6983459869848156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 -DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.1076042652130127 s -INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.10959005355834961 s -DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=317523030494298247547354973342494453228, time:1750767063.4166574s req_ids:[8] -DEBUG 06-24 20:11:03 [manager.py:391] -ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:205.20472526550293ms total_cost_time:205.26385307312012ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7377 prompt_cache_len:5151 prompt_cache_ratio:0.6982513216754779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 -DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.10801386833190918 s -INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.11006474494934082 s -DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=324472046932402268415492465854863902769, time:1750767063.6267533s req_ids:[8] -DEBUG 06-24 20:11:03 [manager.py:391] -ERROR 06-24 20:11:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:196.43759727478027ms total_cost_time:196.47932052612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7378 prompt_cache_len:5151 prompt_cache_ratio:0.6981566820276498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 -DEBUG 06-24 20:11:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:03 [manager.py:224] router recive req id 8 cost time 0.10714912414550781 s -INFO 06-24 20:11:03 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s -DEBUG 06-24 20:11:03 [manager.py:391] Prefill Batch: batch_id=93925838868649264423400233645302644116, time:1750767063.8278341s req_ids:[8] -DEBUG 06-24 20:11:03 [manager.py:391] -ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:03 lightllm_req_id:8 first_token_cost:359.33876037597656ms total_cost_time:359.38334465026855ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7379 prompt_cache_len:5151 prompt_cache_ratio:0.6980620680308985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 -DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10827159881591797 s -INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.11037421226501465 s -DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=291168360001639087648506427455969920477, time:1750767064.1908667s req_ids:[8] -DEBUG 06-24 20:11:04 [manager.py:391] -ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:202.8830051422119ms total_cost_time:202.927827835083ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7380 prompt_cache_len:5151 prompt_cache_ratio:0.6979674796747968 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 -DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:04 [batch.py:51] router release req id 8 -INFO 06-24 20:11:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10750436782836914 s -INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.10943198204040527 s -DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=87886597218279036644459730700986809497, time:1750767064.410748s req_ids:[8] -DEBUG 06-24 20:11:04 [manager.py:391] -ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:214.57195281982422ms total_cost_time:214.6139144897461ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7381 prompt_cache_len:5151 prompt_cache_ratio:0.697872916948923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 -DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10829567909240723 s -INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.1107473373413086 s -DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=3799367668225916929373851165032803228, time:1750767064.6259s req_ids:[8] -DEBUG 06-24 20:11:04 [manager.py:391] -ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:206.11572265625ms total_cost_time:206.1593532562256ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7382 prompt_cache_len:5151 prompt_cache_ratio:0.6977783798428611 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 -DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:04 [manager.py:224] router recive req id 8 cost time 0.10827946662902832 s -INFO 06-24 20:11:04 [manager.py:68] detokenization recv req id 8 cost time 0.11044955253601074 s -DEBUG 06-24 20:11:04 [manager.py:391] Prefill Batch: batch_id=111627068176744698271439517244740906258, time:1750767064.8386378s req_ids:[8] -DEBUG 06-24 20:11:04 [manager.py:391] -ERROR 06-24 20:11:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:203.77326011657715ms total_cost_time:203.81593704223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7383 prompt_cache_len:5151 prompt_cache_ratio:0.6976838683462008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 -DEBUG 06-24 20:11:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.1067655086517334 s -INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.10935497283935547 s -DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=254904684821976854590610823142432295856, time:1750767065.047482s req_ids:[8] -DEBUG 06-24 20:11:05 [manager.py:391] -ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:04 lightllm_req_id:8 first_token_cost:204.5764923095703ms total_cost_time:204.6217918395996ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7384 prompt_cache_len:5151 prompt_cache_ratio:0.6975893824485374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 -DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.10688376426696777 s -INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.1089639663696289 s -DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=59771303662292638734836263390182195397, time:1750767065.2616735s req_ids:[8] -DEBUG 06-24 20:11:05 [manager.py:391] -ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:208.573579788208ms total_cost_time:208.6317539215088ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:7385 prompt_cache_len:5151 prompt_cache_ratio:0.697494922139472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 -DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.1101675033569336 s -INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.11226344108581543 s -DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=16595835252883497813491293212124451774, time:1750767065.4747312s req_ids:[8] -DEBUG 06-24 20:11:05 [manager.py:391] -ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:207.62133598327637ms total_cost_time:207.66568183898926ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7386 prompt_cache_len:5151 prompt_cache_ratio:0.6974004874086109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 -DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.10902690887451172 s -INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.11122322082519531 s -DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=157185814312803514271869536270425242219, time:1750767065.6896033s req_ids:[8] -DEBUG 06-24 20:11:05 [manager.py:391] -ERROR 06-24 20:11:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:220.05009651184082ms total_cost_time:220.0925350189209ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7387 prompt_cache_len:5151 prompt_cache_ratio:0.6973060782455666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 -DEBUG 06-24 20:11:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:05 [manager.py:224] router recive req id 8 cost time 0.10790824890136719 s -INFO 06-24 20:11:05 [manager.py:68] detokenization recv req id 8 cost time 0.11027979850769043 s -DEBUG 06-24 20:11:05 [manager.py:391] Prefill Batch: batch_id=249767058022291023164933967361379440630, time:1750767065.9313347s req_ids:[8] -DEBUG 06-24 20:11:05 [manager.py:391] -ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:05 lightllm_req_id:8 first_token_cost:224.26986694335938ms total_cost_time:224.33018684387207ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:7388 prompt_cache_len:5151 prompt_cache_ratio:0.6972116946399567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 -DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.10842061042785645 s -INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.11052155494689941 s -DEBUG 06-24 20:11:06 [manager.py:391] Prefill Batch: batch_id=72988453671020820833492621132462256626, time:1750767066.151361s req_ids:[8] -DEBUG 06-24 20:11:06 [manager.py:391] -DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:221.15564346313477ms total_cost_time:221.20070457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7389 prompt_cache_len:5151 prompt_cache_ratio:0.6971173365814048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 -DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.30930161476135254 s -INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.31130504608154297 s -DEBUG 06-24 20:11:06 [manager.py:391] Prefill Batch: batch_id=68609562879362198066827347485825263785, time:1750767066.5766842s req_ids:[8] -DEBUG 06-24 20:11:06 [manager.py:391] -ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:414.3202304840088ms total_cost_time:414.3640995025635ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7390 prompt_cache_len:5151 prompt_cache_ratio:0.6970230040595399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 -DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.10757780075073242 s -INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.10947847366333008 s -DEBUG 06-24 20:11:06 [manager.py:391] Prefill Batch: batch_id=333201663625202988012110722551252224850, time:1750767066.7924585s req_ids:[8] -DEBUG 06-24 20:11:06 [manager.py:391] -ERROR 06-24 20:11:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:204.40268516540527ms total_cost_time:204.44655418395996ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7391 prompt_cache_len:5151 prompt_cache_ratio:0.6969286970639967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 -DEBUG 06-24 20:11:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:06 [manager.py:224] router recive req id 8 cost time 0.10867071151733398 s -INFO 06-24 20:11:06 [manager.py:68] detokenization recv req id 8 cost time 0.11066508293151855 s -DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=214718624767574882782097065110452616956, time:1750767067.003334s req_ids:[8] -DEBUG 06-24 20:11:07 [manager.py:391] -ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:11:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 31479.013 tokens/s -DEBUG 06-24 20:11:07 [stats.py:37] Avg prompt tokens throughput: 31470.374 tokens/s -DEBUG 06-24 20:11:07 [stats.py:37] Avg generate tokens throughput: 8.638 tokens/s -INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:06 lightllm_req_id:8 first_token_cost:205.96885681152344ms total_cost_time:206.02750778198242ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:7392 prompt_cache_len:5151 prompt_cache_ratio:0.6968344155844156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 -DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.11130332946777344 s -INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11334538459777832 s -DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=181972095624183535281932648711790474471, time:1750767067.216142s req_ids:[8] -DEBUG 06-24 20:11:07 [manager.py:391] -ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:215.81101417541504ms total_cost_time:215.8682346343994ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:7393 prompt_cache_len:5151 prompt_cache_ratio:0.6967401596104423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 -DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.11033225059509277 s -INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11220502853393555 s -DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=199431724135214611544607853810347254679, time:1750767067.4325259s req_ids:[8] -DEBUG 06-24 20:11:07 [manager.py:391] -ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:156.98814392089844ms total_cost_time:157.0439338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:7394 prompt_cache_len:5151 prompt_cache_ratio:0.6966459291317284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 -DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.10992789268493652 s -INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11201977729797363 s -DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=209834024425992269867616880824656120165, time:1750767067.5966449s req_ids:[8] -DEBUG 06-24 20:11:07 [manager.py:391] -DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:218.57547760009766ms total_cost_time:218.6279296875ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:7395 prompt_cache_len:5151 prompt_cache_ratio:0.696551724137931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 -DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:07 [manager.py:224] router recive req id 8 cost time 0.10839295387268066 s -INFO 06-24 20:11:07 [manager.py:68] detokenization recv req id 8 cost time 0.11030125617980957 s -DEBUG 06-24 20:11:07 [manager.py:391] Prefill Batch: batch_id=180742489393682375246702233078365593616, time:1750767067.8252866s req_ids:[8] -DEBUG 06-24 20:11:07 [manager.py:391] -ERROR 06-24 20:11:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:198.90308380126953ms total_cost_time:198.94671440124512ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7396 prompt_cache_len:5151 prompt_cache_ratio:0.6964575446187128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 -DEBUG 06-24 20:11:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.10776162147521973 s -INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.10971236228942871 s -DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=124965555470040271358904910163908282243, time:1750767068.0312061s req_ids:[8] -DEBUG 06-24 20:11:08 [manager.py:391] -ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:07 lightllm_req_id:8 first_token_cost:204.24413681030273ms total_cost_time:204.29682731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7397 prompt_cache_len:5151 prompt_cache_ratio:0.6963633905637421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 -DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.10903739929199219 s -INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.11103367805480957 s -DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=93154533368300523820641178779356754311, time:1750767068.2396228s req_ids:[8] -DEBUG 06-24 20:11:08 [manager.py:391] -ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:201.99966430664062ms total_cost_time:202.0413875579834ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7398 prompt_cache_len:5151 prompt_cache_ratio:0.6962692619626926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 -DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.10927486419677734 s -INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.11123466491699219 s -DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=129381620587932354756920402419366880146, time:1750767068.4518588s req_ids:[8] -DEBUG 06-24 20:11:08 [manager.py:391] -ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:218.37139129638672ms total_cost_time:218.4302806854248ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7399 prompt_cache_len:5151 prompt_cache_ratio:0.6961751588052439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 -DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:08 [manager.py:224] router recive req id 8 cost time 0.11020779609680176 s -INFO 06-24 20:11:08 [manager.py:68] detokenization recv req id 8 cost time 0.1122291088104248 s -DEBUG 06-24 20:11:08 [manager.py:391] Prefill Batch: batch_id=236683490441226378863854356949677232959, time:1750767068.6766586s req_ids:[8] -DEBUG 06-24 20:11:08 [manager.py:391] -ERROR 06-24 20:11:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:378.2684803009033ms total_cost_time:378.3297538757324ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:7400 prompt_cache_len:5151 prompt_cache_ratio:0.6960810810810811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 -DEBUG 06-24 20:11:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10901474952697754 s -INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.11107969284057617 s -DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=267939142040932546910369753345547863350, time:1750767069.056185s req_ids:[8] -DEBUG 06-24 20:11:09 [manager.py:391] -ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:08 lightllm_req_id:8 first_token_cost:206.70175552368164ms total_cost_time:206.74514770507812ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7401 prompt_cache_len:5151 prompt_cache_ratio:0.6959870287798946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 -DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10707855224609375 s -INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10902571678161621 s -DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=20764349457028876236182632729912366716, time:1750767069.2684903s req_ids:[8] -DEBUG 06-24 20:11:09 [manager.py:391] -ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:196.47812843322754ms total_cost_time:196.52104377746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7402 prompt_cache_len:5151 prompt_cache_ratio:0.6958930018913807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 -DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10776662826538086 s -INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10979747772216797 s -DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=171483059124038557330035222879340141391, time:1750767069.4743934s req_ids:[8] -DEBUG 06-24 20:11:09 [manager.py:391] -ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:203.54223251342773ms total_cost_time:203.58538627624512ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7403 prompt_cache_len:5151 prompt_cache_ratio:0.6957990004052411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 -DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10772895812988281 s -INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10973620414733887 s -DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=101431306433446687075158170020865578733, time:1750767069.6884556s req_ids:[8] -DEBUG 06-24 20:11:09 [manager.py:391] -ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:209.35964584350586ms total_cost_time:209.40232276916504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7404 prompt_cache_len:5151 prompt_cache_ratio:0.6957050243111832 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 -DEBUG 06-24 20:11:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:09 [manager.py:224] router recive req id 8 cost time 0.10604548454284668 s -INFO 06-24 20:11:09 [manager.py:68] detokenization recv req id 8 cost time 0.10797524452209473 s -DEBUG 06-24 20:11:09 [manager.py:391] Prefill Batch: batch_id=152705021199227741693674680543932485769, time:1750767069.90363s req_ids:[8] -DEBUG 06-24 20:11:09 [manager.py:391] -ERROR 06-24 20:11:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:09 lightllm_req_id:8 first_token_cost:212.05759048461914ms total_cost_time:212.10241317749023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7405 prompt_cache_len:5151 prompt_cache_ratio:0.6956110735989196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 -DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10819077491760254 s -INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.11017942428588867 s -DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=214049058359539040459960797503223730991, time:1750767070.1380618s req_ids:[8] -DEBUG 06-24 20:11:10 [manager.py:391] -ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:215.23141860961914ms total_cost_time:215.27624130249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7406 prompt_cache_len:5151 prompt_cache_ratio:0.695517148258169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 -DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10821723937988281 s -INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.11037039756774902 s -DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=292120518234624111530100389802582502124, time:1750767070.3432667s req_ids:[8] -DEBUG 06-24 20:11:10 [manager.py:391] -ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:211.05694770812988ms total_cost_time:211.09962463378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7407 prompt_cache_len:5151 prompt_cache_ratio:0.6954232482786553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 -DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.11038351058959961 s -INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.11192965507507324 s -DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=174630318551087950671781791321461696608, time:1750767070.5567055s req_ids:[8] -DEBUG 06-24 20:11:10 [manager.py:391] -ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:197.75724411010742ms total_cost_time:197.84116744995117ms,out_token_counter:1 mean_per_token_cost_time: 0.08392333984375ms prompt_token_num:7408 prompt_cache_len:5151 prompt_cache_ratio:0.6953293736501079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 -DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10664963722229004 s -INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.1079397201538086 s -DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=114849242951004337205286534813722412650, time:1750767070.776647s req_ids:[8] -DEBUG 06-24 20:11:10 [manager.py:391] -ERROR 06-24 20:11:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:220.4306125640869ms total_cost_time:220.47686576843262ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7409 prompt_cache_len:5151 prompt_cache_ratio:0.6952355243622621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 -DEBUG 06-24 20:11:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:10 [manager.py:224] router recive req id 8 cost time 0.10818648338317871 s -INFO 06-24 20:11:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940980911254883 s -DEBUG 06-24 20:11:10 [manager.py:391] Prefill Batch: batch_id=26131201015284449413325560935713121364, time:1750767070.9910412s req_ids:[8] -DEBUG 06-24 20:11:10 [manager.py:391] -ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:10 lightllm_req_id:8 first_token_cost:372.9691505432129ms total_cost_time:372.9894161224365ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:7410 prompt_cache_len:5151 prompt_cache_ratio:0.6951417004048583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 -DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.10274195671081543 s -INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.10380315780639648 s -DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=97925376814572714015306892286008678909, time:1750767071.3679955s req_ids:[8] -DEBUG 06-24 20:11:11 [manager.py:391] -ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:162.0182991027832ms total_cost_time:162.03832626342773ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:7411 prompt_cache_len:5151 prompt_cache_ratio:0.6950479017676426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 -DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.10619020462036133 s -INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.10744142532348633 s -DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=184130130615286016847116781770405684457, time:1750767071.5327094s req_ids:[8] -DEBUG 06-24 20:11:11 [manager.py:391] -ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:192.09837913513184ms total_cost_time:192.14224815368652ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7412 prompt_cache_len:5151 prompt_cache_ratio:0.694954128440367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 -DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.11013078689575195 s -INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.11205625534057617 s -DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=52809970427344754986774336602757269588, time:1750767071.7294004s req_ids:[8] -DEBUG 06-24 20:11:11 [manager.py:391] -ERROR 06-24 20:11:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:208.51850509643555ms total_cost_time:208.56499671936035ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7413 prompt_cache_len:5151 prompt_cache_ratio:0.6948603804127883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 -DEBUG 06-24 20:11:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:11 [manager.py:224] router recive req id 8 cost time 0.1053769588470459 s -INFO 06-24 20:11:11 [manager.py:68] detokenization recv req id 8 cost time 0.1073904037475586 s -DEBUG 06-24 20:11:11 [manager.py:391] Prefill Batch: batch_id=34179166253060384155157933142749296764, time:1750767071.9433348s req_ids:[8] -DEBUG 06-24 20:11:11 [manager.py:391] -ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:11 lightllm_req_id:8 first_token_cost:201.90191268920898ms total_cost_time:201.94625854492188ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7414 prompt_cache_len:5151 prompt_cache_ratio:0.6947666576746695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 -DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10776591300964355 s -INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.10968875885009766 s -DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=157556507505558106713233822930765687305, time:1750767072.1567602s req_ids:[8] -DEBUG 06-24 20:11:12 [manager.py:391] -ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:207.85236358642578ms total_cost_time:207.89670944213867ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7415 prompt_cache_len:5151 prompt_cache_ratio:0.6946729602157788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 -DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s -INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s -DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=226310388708464242968343142667638750119, time:1750767072.3669736s req_ids:[8] -DEBUG 06-24 20:11:12 [manager.py:391] -ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:226.70555114746094ms total_cost_time:226.74989700317383ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7416 prompt_cache_len:5151 prompt_cache_ratio:0.69457928802589 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 -DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10812592506408691 s -INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.1101694107055664 s -DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=226088692224405038323400085663291837562, time:1750767072.593881s req_ids:[8] -DEBUG 06-24 20:11:12 [manager.py:391] -ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:196.3212490081787ms total_cost_time:196.36249542236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:7417 prompt_cache_len:5151 prompt_cache_ratio:0.6944856410947823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 -DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:12 [manager.py:224] router recive req id 8 cost time 0.10728883743286133 s -INFO 06-24 20:11:12 [manager.py:68] detokenization recv req id 8 cost time 0.10930466651916504 s -DEBUG 06-24 20:11:12 [manager.py:391] Prefill Batch: batch_id=286384624214673506357911175950543719865, time:1750767072.7984557s req_ids:[8] -DEBUG 06-24 20:11:12 [manager.py:391] -ERROR 06-24 20:11:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:207.95679092407227ms total_cost_time:207.99970626831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7418 prompt_cache_len:5151 prompt_cache_ratio:0.6943920194122405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 -DEBUG 06-24 20:11:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.10766029357910156 s -INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.10934829711914062 s -DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=84916617351802788885427210951537737186, time:1750767073.0118542s req_ids:[8] -DEBUG 06-24 20:11:13 [manager.py:391] -ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:12 lightllm_req_id:8 first_token_cost:162.6584529876709ms total_cost_time:162.7035140991211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7419 prompt_cache_len:5151 prompt_cache_ratio:0.694298422968055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 -DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.10761117935180664 s -INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.10947656631469727 s -DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=87141610058439750630199920144190091425, time:1750767073.183684s req_ids:[8] -DEBUG 06-24 20:11:13 [manager.py:391] -DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:200.14405250549316ms total_cost_time:200.18863677978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7420 prompt_cache_len:5151 prompt_cache_ratio:0.6942048517520215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 -DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.30913734436035156 s -INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.3111867904663086 s -DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=326211178319112308269114931908100826260, time:1750767073.596389s req_ids:[8] -DEBUG 06-24 20:11:13 [manager.py:391] -ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:438.3435249328613ms total_cost_time:438.3866786956787ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7421 prompt_cache_len:5151 prompt_cache_ratio:0.6941113057539415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 -DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:13 [manager.py:224] router recive req id 8 cost time 0.10975146293640137 s -INFO 06-24 20:11:13 [manager.py:68] detokenization recv req id 8 cost time 0.11178946495056152 s -DEBUG 06-24 20:11:13 [manager.py:391] Prefill Batch: batch_id=51661140725856637525718398489677062871, time:1750767073.830754s req_ids:[8] -DEBUG 06-24 20:11:13 [manager.py:391] -ERROR 06-24 20:11:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:198.30679893493652ms total_cost_time:198.3499526977539ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7422 prompt_cache_len:5151 prompt_cache_ratio:0.6940177849636217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 -DEBUG 06-24 20:11:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10762214660644531 s -INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10967612266540527 s -DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=337922778637490160285430918550413108406, time:1750767074.0506709s req_ids:[8] -DEBUG 06-24 20:11:14 [manager.py:391] -ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:13 lightllm_req_id:8 first_token_cost:222.5050926208496ms total_cost_time:222.5501537322998ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7423 prompt_cache_len:5151 prompt_cache_ratio:0.6939242893708744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 -DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10769844055175781 s -INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10961508750915527 s -DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=76539053179047477132070559233635810338, time:1750767074.2663903s req_ids:[8] -DEBUG 06-24 20:11:14 [manager.py:391] -ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:214.41149711608887ms total_cost_time:214.45417404174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7424 prompt_cache_len:5151 prompt_cache_ratio:0.6938308189655172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 -DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10786223411560059 s -INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10962533950805664 s -DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=180680170160272296930782298953702396896, time:1750767074.4827452s req_ids:[8] -DEBUG 06-24 20:11:14 [manager.py:391] -ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:199.28836822509766ms total_cost_time:199.33056831359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7425 prompt_cache_len:5151 prompt_cache_ratio:0.6937373737373738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 -DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10782170295715332 s -INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10989975929260254 s -DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=245509376800608299763815822673360042683, time:1750767074.6889277s req_ids:[8] -DEBUG 06-24 20:11:14 [manager.py:391] -DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:198.66704940795898ms total_cost_time:198.68803024291992ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:7426 prompt_cache_len:5151 prompt_cache_ratio:0.6936439536762725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 -DEBUG 06-24 20:11:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:14 [manager.py:224] router recive req id 8 cost time 0.10653877258300781 s -INFO 06-24 20:11:14 [manager.py:68] detokenization recv req id 8 cost time 0.10848116874694824 s -DEBUG 06-24 20:11:14 [manager.py:391] Prefill Batch: batch_id=277915457632603868987146929440189743098, time:1750767074.8918285s req_ids:[8] -DEBUG 06-24 20:11:14 [manager.py:391] -ERROR 06-24 20:11:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:14 lightllm_req_id:8 first_token_cost:225.98743438720703ms total_cost_time:226.043701171875ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7427 prompt_cache_len:5151 prompt_cache_ratio:0.693550558772048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 -DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.1076657772064209 s -INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.10950064659118652 s -DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=25926950429729095360007155456661765313, time:1750767075.1248958s req_ids:[8] -DEBUG 06-24 20:11:15 [manager.py:391] -ERROR 06-24 20:11:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:200.58202743530273ms total_cost_time:200.63090324401855ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:7428 prompt_cache_len:5151 prompt_cache_ratio:0.6934571890145396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 -DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.1073753833770752 s -INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.10968637466430664 s -DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=67999856864234004939624225716167560111, time:1750767075.3329852s req_ids:[8] -DEBUG 06-24 20:11:15 [manager.py:391] -ERROR 06-24 20:11:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:206.77566528320312ms total_cost_time:206.82048797607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7429 prompt_cache_len:5151 prompt_cache_ratio:0.6933638443935927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 -DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.10675168037414551 s -INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.10875368118286133 s -DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=79460822365509253196476621537295212197, time:1750767075.5446875s req_ids:[8] -DEBUG 06-24 20:11:15 [manager.py:391] -ERROR 06-24 20:11:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:202.8963565826416ms total_cost_time:202.9573917388916ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:7430 prompt_cache_len:5151 prompt_cache_ratio:0.6932705248990578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 -DEBUG 06-24 20:11:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:15 [manager.py:224] router recive req id 8 cost time 0.10844230651855469 s -INFO 06-24 20:11:15 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s -DEBUG 06-24 20:11:15 [manager.py:391] Prefill Batch: batch_id=4503896045434052233063513679634790994, time:1750767075.7627897s req_ids:[8] -DEBUG 06-24 20:11:15 [manager.py:391] -ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:15 lightllm_req_id:8 first_token_cost:381.8814754486084ms total_cost_time:381.9265365600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7431 prompt_cache_len:5151 prompt_cache_ratio:0.6931772305207913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 -DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.1080014705657959 s -INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10982012748718262 s -DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=124455734227133753084207299854245542880, time:1750767076.1367433s req_ids:[8] -DEBUG 06-24 20:11:16 [manager.py:391] -ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:158.19287300109863ms total_cost_time:158.2345962524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:7432 prompt_cache_len:5151 prompt_cache_ratio:0.6930839612486545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 -DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:16 [batch.py:51] router release req id 8 -DEBUG 06-24 20:11:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:16 [manager.py:283] -DEBUG 06-24 20:11:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:16 [manager.py:284] -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10798978805541992 s -INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099090576171875 s -DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=181111378110961650159993324375680167398, time:1750767076.298811s req_ids:[8] -DEBUG 06-24 20:11:16 [manager.py:391] -ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:194.3991184234619ms total_cost_time:194.4422721862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7433 prompt_cache_len:5151 prompt_cache_ratio:0.6929907170725145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 -DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:16 [batch.py:51] router release req id 8 -INFO 06-24 20:11:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10695552825927734 s -INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10882115364074707 s -DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=232659404386592625834311807803533803212, time:1750767076.503519s req_ids:[8] -DEBUG 06-24 20:11:16 [manager.py:391] -ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:202.82483100891113ms total_cost_time:202.86893844604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7434 prompt_cache_len:5151 prompt_cache_ratio:0.6928974979822438 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 -DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10710835456848145 s -INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10903692245483398 s -DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=329355241449413619017569938229929147761, time:1750767076.7153666s req_ids:[8] -DEBUG 06-24 20:11:16 [manager.py:391] -ERROR 06-24 20:11:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:206.4192295074463ms total_cost_time:206.46429061889648ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7435 prompt_cache_len:5151 prompt_cache_ratio:0.6928043039677202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 -DEBUG 06-24 20:11:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:16 [manager.py:224] router recive req id 8 cost time 0.10724806785583496 s -INFO 06-24 20:11:16 [manager.py:68] detokenization recv req id 8 cost time 0.10914921760559082 s -DEBUG 06-24 20:11:16 [manager.py:391] Prefill Batch: batch_id=256184503709959002366324047395618549361, time:1750767076.9335728s req_ids:[8] -DEBUG 06-24 20:11:16 [manager.py:391] -ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:16 lightllm_req_id:8 first_token_cost:216.7816162109375ms total_cost_time:216.8259620666504ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7436 prompt_cache_len:5151 prompt_cache_ratio:0.6927111350188273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 -DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s -INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.11059260368347168 s -DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=4548978999315013399410810697814234927, time:1750767077.1575794s req_ids:[8] -DEBUG 06-24 20:11:17 [manager.py:391] -DEBUG 06-24 20:11:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 33143.256 tokens/s -DEBUG 06-24 20:11:17 [stats.py:37] Avg prompt tokens throughput: 33134.419 tokens/s -DEBUG 06-24 20:11:17 [stats.py:37] Avg generate tokens throughput: 8.838 tokens/s -ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:218.3389663696289ms total_cost_time:218.4011936187744ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:7437 prompt_cache_len:5151 prompt_cache_ratio:0.6926179911254539 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 -DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.1065680980682373 s -INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.10840845108032227 s -DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=73690730217841824382647300993480774425, time:1750767077.3747609s req_ids:[8] -DEBUG 06-24 20:11:17 [manager.py:391] -ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:169.7535514831543ms total_cost_time:169.7983741760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7438 prompt_cache_len:5151 prompt_cache_ratio:0.6925248722774939 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 -DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.10841512680053711 s -INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s -DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=55373161161021486853917574618699261695, time:1750767077.5481749s req_ids:[8] -DEBUG 06-24 20:11:17 [manager.py:391] -ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:201.6761302947998ms total_cost_time:201.7202377319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7439 prompt_cache_len:5151 prompt_cache_ratio:0.6924317784648474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 -DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.1088259220123291 s -INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.1107625961303711 s -DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=8834535322681766073089467998353454417, time:1750767077.755431s req_ids:[8] -DEBUG 06-24 20:11:17 [manager.py:391] -ERROR 06-24 20:11:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:205.50155639648438ms total_cost_time:205.56068420410156ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7440 prompt_cache_len:5151 prompt_cache_ratio:0.6923387096774194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 -DEBUG 06-24 20:11:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:17 [manager.py:224] router recive req id 8 cost time 0.10922956466674805 s -INFO 06-24 20:11:17 [manager.py:68] detokenization recv req id 8 cost time 0.11108207702636719 s -DEBUG 06-24 20:11:17 [manager.py:391] Prefill Batch: batch_id=4084781113246519992814150865807414548, time:1750767077.9766252s req_ids:[8] -DEBUG 06-24 20:11:17 [manager.py:391] -ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:17 lightllm_req_id:8 first_token_cost:378.11851501464844ms total_cost_time:378.16357612609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7441 prompt_cache_len:5151 prompt_cache_ratio:0.6922456659051203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 -DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10913872718811035 s -INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.11162185668945312 s -DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=108525833346023201694568122436972465913, time:1750767078.344695s req_ids:[8] -DEBUG 06-24 20:11:18 [manager.py:391] -ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:192.08145141601562ms total_cost_time:192.1248435974121ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7442 prompt_cache_len:5151 prompt_cache_ratio:0.6921526471378662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 -DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:18 [batch.py:51] router release req id 8 -INFO 06-24 20:11:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10696196556091309 s -INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.10875058174133301 s -DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=99470848060880521869391212208684377943, time:1750767078.5464842s req_ids:[8] -DEBUG 06-24 20:11:18 [manager.py:391] -ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:166.85724258422852ms total_cost_time:166.8999195098877ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7443 prompt_cache_len:5151 prompt_cache_ratio:0.6920596533655784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 -DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10774803161621094 s -INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.10971832275390625 s -DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=130461255637545816791186424850565626532, time:1750767078.7157888s req_ids:[8] -DEBUG 06-24 20:11:18 [manager.py:391] -ERROR 06-24 20:11:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:198.8506317138672ms total_cost_time:198.89330863952637ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7444 prompt_cache_len:5151 prompt_cache_ratio:0.6919666845781838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 -DEBUG 06-24 20:11:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:18 [manager.py:224] router recive req id 8 cost time 0.10785436630249023 s -INFO 06-24 20:11:18 [manager.py:68] detokenization recv req id 8 cost time 0.10980916023254395 s -DEBUG 06-24 20:11:18 [manager.py:391] Prefill Batch: batch_id=162081000095136418946882237973577674770, time:1750767078.91929s req_ids:[8] -DEBUG 06-24 20:11:18 [manager.py:391] -ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:18 lightllm_req_id:8 first_token_cost:200.19936561584473ms total_cost_time:200.24418830871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7445 prompt_cache_len:5151 prompt_cache_ratio:0.6918737407656145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 -DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.10715079307556152 s -INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.10953211784362793 s -DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=162595257166887762818683052027363676628, time:1750767079.131464s req_ids:[8] -DEBUG 06-24 20:11:19 [manager.py:391] -ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:208.85205268859863ms total_cost_time:208.89616012573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7446 prompt_cache_len:5151 prompt_cache_ratio:0.6917808219178082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 -DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.10780525207519531 s -INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.10961461067199707 s -DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=253277512845888381906924330504823816849, time:1750767079.3446136s req_ids:[8] -DEBUG 06-24 20:11:19 [manager.py:391] -ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:205.20377159118652ms total_cost_time:205.2462100982666ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7447 prompt_cache_len:5151 prompt_cache_ratio:0.6916879280247079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 -DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.10796952247619629 s -INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.10999917984008789 s -DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=82380109372754230062951009038544942766, time:1750767079.5548475s req_ids:[8] -DEBUG 06-24 20:11:19 [manager.py:391] -ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:203.39035987854004ms total_cost_time:203.43470573425293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7448 prompt_cache_len:5151 prompt_cache_ratio:0.6915950590762621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 -DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.11012721061706543 s -INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.11200881004333496 s -DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=103645447359087133093166208600145588326, time:1750767079.7633677s req_ids:[8] -DEBUG 06-24 20:11:19 [manager.py:391] -ERROR 06-24 20:11:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:202.2995948791504ms total_cost_time:202.35228538513184ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7449 prompt_cache_len:5151 prompt_cache_ratio:0.6915022150624245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 -DEBUG 06-24 20:11:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:19 [manager.py:224] router recive req id 8 cost time 0.11132621765136719 s -INFO 06-24 20:11:19 [manager.py:68] detokenization recv req id 8 cost time 0.11318039894104004 s -DEBUG 06-24 20:11:19 [manager.py:391] Prefill Batch: batch_id=243298994352140601102470509383565096354, time:1750767079.984716s req_ids:[8] -DEBUG 06-24 20:11:19 [manager.py:391] -ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:19 lightllm_req_id:8 first_token_cost:218.98913383483887ms total_cost_time:219.04754638671875ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:7450 prompt_cache_len:5151 prompt_cache_ratio:0.6914093959731543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 -DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:20 [manager.py:224] router recive req id 8 cost time 0.10958313941955566 s -INFO 06-24 20:11:20 [manager.py:68] detokenization recv req id 8 cost time 0.11211585998535156 s -DEBUG 06-24 20:11:20 [manager.py:391] Prefill Batch: batch_id=263186011479994774986520148202932534887, time:1750767080.1980124s req_ids:[8] -DEBUG 06-24 20:11:20 [manager.py:391] -DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:204.32257652282715ms total_cost_time:204.3769359588623ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:7451 prompt_cache_len:5151 prompt_cache_ratio:0.6913166017984164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 -DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:20 [manager.py:224] router recive req id 8 cost time 0.31166982650756836 s -INFO 06-24 20:11:20 [manager.py:68] detokenization recv req id 8 cost time 0.31423354148864746 s -DEBUG 06-24 20:11:20 [manager.py:391] Prefill Batch: batch_id=269404262892281471926627286984742575383, time:1750767080.6199296s req_ids:[8] -DEBUG 06-24 20:11:20 [manager.py:391] -ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:437.56890296936035ms total_cost_time:437.61181831359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7452 prompt_cache_len:5151 prompt_cache_ratio:0.6912238325281803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 -DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:20 [manager.py:224] router recive req id 8 cost time 0.10823607444763184 s -INFO 06-24 20:11:20 [manager.py:68] detokenization recv req id 8 cost time 0.11075544357299805 s -DEBUG 06-24 20:11:20 [manager.py:391] Prefill Batch: batch_id=326802644971231985589783645817220190968, time:1750767080.8522918s req_ids:[8] -DEBUG 06-24 20:11:20 [manager.py:391] -ERROR 06-24 20:11:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:202.67319679260254ms total_cost_time:202.71849632263184ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7453 prompt_cache_len:5151 prompt_cache_ratio:0.6911310881524219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 -DEBUG 06-24 20:11:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.1074366569519043 s -INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s -DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=305387787074210830426694086908688281117, time:1750767081.062045s req_ids:[8] -DEBUG 06-24 20:11:21 [manager.py:391] -ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:20 lightllm_req_id:8 first_token_cost:206.90321922302246ms total_cost_time:206.94637298583984ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7454 prompt_cache_len:5151 prompt_cache_ratio:0.6910383686611216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 -DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10714578628540039 s -INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.10965657234191895 s -DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=335538953564640558639313983719977406366, time:1750767081.274611s req_ids:[8] -DEBUG 06-24 20:11:21 [manager.py:391] -ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:216.41969680786133ms total_cost_time:216.46404266357422ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7455 prompt_cache_len:5151 prompt_cache_ratio:0.6909456740442655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 -DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10731291770935059 s -INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.10965490341186523 s -DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=245071617843898293241549653545049277766, time:1750767081.4994216s req_ids:[8] -DEBUG 06-24 20:11:21 [manager.py:391] -ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:212.62454986572266ms total_cost_time:212.68010139465332ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:7456 prompt_cache_len:5151 prompt_cache_ratio:0.6908530042918455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 -DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10844779014587402 s -INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.1104135513305664 s -DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=124356320736153591286829794059769685363, time:1750767081.7180753s req_ids:[8] -DEBUG 06-24 20:11:21 [manager.py:391] -DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:212.80932426452637ms total_cost_time:212.85247802734375ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7457 prompt_cache_len:5151 prompt_cache_ratio:0.6907603593938582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 -DEBUG 06-24 20:11:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:21 [manager.py:224] router recive req id 8 cost time 0.10788798332214355 s -INFO 06-24 20:11:21 [manager.py:68] detokenization recv req id 8 cost time 0.1099703311920166 s -DEBUG 06-24 20:11:21 [manager.py:391] Prefill Batch: batch_id=144240155567352064500614094323357681162, time:1750767081.9312556s req_ids:[8] -DEBUG 06-24 20:11:21 [manager.py:391] -ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:21 lightllm_req_id:8 first_token_cost:208.04643630981445ms total_cost_time:208.09102058410645ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7458 prompt_cache_len:5151 prompt_cache_ratio:0.6906677393403057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 -DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10896611213684082 s -INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.11120104789733887 s -DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=232573094133731883191370021607807576027, time:1750767082.1584105s req_ids:[8] -DEBUG 06-24 20:11:22 [manager.py:391] -ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:221.5559482574463ms total_cost_time:221.60100936889648ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7459 prompt_cache_len:5151 prompt_cache_ratio:0.6905751441211959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 -DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10775876045227051 s -INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.109527587890625 s -DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=184912065706389910249888098198937428946, time:1750767082.3729267s req_ids:[8] -DEBUG 06-24 20:11:22 [manager.py:391] -ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:168.52879524230957ms total_cost_time:168.57028007507324ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7460 prompt_cache_len:5151 prompt_cache_ratio:0.6904825737265415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 -DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10914802551269531 s -INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.1110544204711914 s -DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=310569826816793906961358973129667899148, time:1750767082.5438545s req_ids:[8] -DEBUG 06-24 20:11:22 [manager.py:391] -ERROR 06-24 20:11:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:198.85492324829102ms total_cost_time:198.897123336792ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7461 prompt_cache_len:5151 prompt_cache_ratio:0.6903900281463611 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 -DEBUG 06-24 20:11:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:22 [manager.py:224] router recive req id 8 cost time 0.10770034790039062 s -INFO 06-24 20:11:22 [manager.py:68] detokenization recv req id 8 cost time 0.11043095588684082 s -DEBUG 06-24 20:11:22 [manager.py:391] Prefill Batch: batch_id=8757093422609606970826310645553977256, time:1750767082.7522476s req_ids:[8] -DEBUG 06-24 20:11:22 [manager.py:391] -ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:22 lightllm_req_id:8 first_token_cost:373.24976921081543ms total_cost_time:373.2926845550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7462 prompt_cache_len:5151 prompt_cache_ratio:0.6902975073706781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 -DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10822749137878418 s -INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.11038589477539062 s -DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=327421759691147094336038244002227703615, time:1750767083.1257067s req_ids:[8] -DEBUG 06-24 20:11:23 [manager.py:391] -INFO 06-24 20:11:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:11:23 [statics_utils.py:24] mean first cost: 230.6960552901547 ms -INFO 06-24 20:11:23 [statics_utils.py:24] mean per token cost: 0.09123260213915535 ms -ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:198.78888130187988ms total_cost_time:198.8520622253418ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:7463 prompt_cache_len:5151 prompt_cache_ratio:0.6902050113895216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 -DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10706543922424316 s -INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.10910439491271973 s -DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=257869851516972095649333889571118661386, time:1750767083.338935s req_ids:[8] -DEBUG 06-24 20:11:23 [manager.py:391] -ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:209.35893058776855ms total_cost_time:209.40327644348145ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7464 prompt_cache_len:5151 prompt_cache_ratio:0.690112540192926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 -DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10680937767028809 s -INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.10877466201782227 s -DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=210604851289719266499750429799966188411, time:1750767083.5520976s req_ids:[8] -DEBUG 06-24 20:11:23 [manager.py:391] -ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:206.77614212036133ms total_cost_time:206.817626953125ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:7465 prompt_cache_len:5151 prompt_cache_ratio:0.690020093770931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 -DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.10725927352905273 s -INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.1095116138458252 s -DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=137598415086722974131867197063478172252, time:1750767083.7657492s req_ids:[8] -DEBUG 06-24 20:11:23 [manager.py:391] -ERROR 06-24 20:11:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:211.5950584411621ms total_cost_time:211.6398811340332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7466 prompt_cache_len:5151 prompt_cache_ratio:0.6899276721135815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 -DEBUG 06-24 20:11:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:23 [manager.py:224] router recive req id 8 cost time 0.1072840690612793 s -INFO 06-24 20:11:23 [manager.py:68] detokenization recv req id 8 cost time 0.10971522331237793 s -DEBUG 06-24 20:11:23 [manager.py:391] Prefill Batch: batch_id=308689576169670154108324346939187040182, time:1750767083.9808764s req_ids:[8] -DEBUG 06-24 20:11:23 [manager.py:391] -ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:23 lightllm_req_id:8 first_token_cost:207.10325241088867ms total_cost_time:207.14783668518066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7467 prompt_cache_len:5151 prompt_cache_ratio:0.6898352752109281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 -DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10664486885070801 s -INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.10896682739257812 s -DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=84787565096822400543265844336594142702, time:1750767084.1936512s req_ids:[8] -DEBUG 06-24 20:11:24 [manager.py:391] -ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:207.25226402282715ms total_cost_time:207.29732513427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7468 prompt_cache_len:5151 prompt_cache_ratio:0.6897429030530262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 -DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10811305046081543 s -INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.11062479019165039 s -DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=175643542312453904322308021175820148236, time:1750767084.4237344s req_ids:[8] -DEBUG 06-24 20:11:24 [manager.py:391] -ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:228.55281829833984ms total_cost_time:228.59668731689453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7469 prompt_cache_len:5151 prompt_cache_ratio:0.689650555629937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 -DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10729050636291504 s -INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.10970401763916016 s -DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=208989913259873464523209405562986098916, time:1750767084.641867s req_ids:[8] -DEBUG 06-24 20:11:24 [manager.py:391] -ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:207.2129249572754ms total_cost_time:207.2584629058838ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7470 prompt_cache_len:5151 prompt_cache_ratio:0.6895582329317269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 -DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:24 [manager.py:224] router recive req id 8 cost time 0.10843729972839355 s -INFO 06-24 20:11:24 [manager.py:68] detokenization recv req id 8 cost time 0.1105043888092041 s -DEBUG 06-24 20:11:24 [manager.py:391] Prefill Batch: batch_id=188445056309105053618701648374319636945, time:1750767084.8526504s req_ids:[8] -DEBUG 06-24 20:11:24 [manager.py:391] -ERROR 06-24 20:11:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:201.68685913085938ms total_cost_time:201.73120498657227ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7471 prompt_cache_len:5151 prompt_cache_ratio:0.6894659349484674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 -DEBUG 06-24 20:11:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10793828964233398 s -INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.10985016822814941 s -DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=99413025731883509549028864441292816538, time:1750767085.067927s req_ids:[8] -DEBUG 06-24 20:11:25 [manager.py:391] -ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:24 lightllm_req_id:8 first_token_cost:387.1722221374512ms total_cost_time:387.24756240844727ms,out_token_counter:1 mean_per_token_cost_time: 0.07534027099609375ms prompt_token_num:7472 prompt_cache_len:5151 prompt_cache_ratio:0.6893736616702355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 -DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10883045196533203 s -INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.11080384254455566 s -DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=324676176774033674555688877787615452218, time:1750767085.452931s req_ids:[8] -DEBUG 06-24 20:11:25 [manager.py:391] -ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:203.4132480621338ms total_cost_time:203.45711708068848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7473 prompt_cache_len:5151 prompt_cache_ratio:0.6892814130871137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 -DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10664939880371094 s -INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.10846543312072754 s -DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=334730216528391277831921803284621599088, time:1750767085.676555s req_ids:[8] -DEBUG 06-24 20:11:25 [manager.py:391] -ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:220.7179069519043ms total_cost_time:220.76177597045898ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7474 prompt_cache_len:5151 prompt_cache_ratio:0.6891891891891891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 -DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:25 [manager.py:224] router recive req id 8 cost time 0.10716867446899414 s -INFO 06-24 20:11:25 [manager.py:68] detokenization recv req id 8 cost time 0.10887670516967773 s -DEBUG 06-24 20:11:25 [manager.py:391] Prefill Batch: batch_id=131079256920421875665600843456104989984, time:1750767085.8906987s req_ids:[8] -DEBUG 06-24 20:11:25 [manager.py:391] -ERROR 06-24 20:11:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:167.35219955444336ms total_cost_time:167.39654541015625ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7475 prompt_cache_len:5151 prompt_cache_ratio:0.6890969899665552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 -DEBUG 06-24 20:11:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10800027847290039 s -INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.1100006103515625 s -DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=114162242014997088566202584070837452204, time:1750767086.061665s req_ids:[8] -DEBUG 06-24 20:11:26 [manager.py:391] -ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:25 lightllm_req_id:8 first_token_cost:194.67544555664062ms total_cost_time:194.71096992492676ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:7476 prompt_cache_len:5151 prompt_cache_ratio:0.6890048154093098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 -DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10736870765686035 s -INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.1092071533203125 s -DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=11936535065731273734170187497077689518, time:1750767086.261259s req_ids:[8] -DEBUG 06-24 20:11:26 [manager.py:391] -ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:167.16933250427246ms total_cost_time:167.21510887145996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7477 prompt_cache_len:5151 prompt_cache_ratio:0.6889126655075565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 -DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10825896263122559 s -INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.11023664474487305 s -DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=229544341479842231107019660074869882799, time:1750767086.4390116s req_ids:[8] -DEBUG 06-24 20:11:26 [manager.py:391] -ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:203.45664024353027ms total_cost_time:203.50027084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7478 prompt_cache_len:5151 prompt_cache_ratio:0.6888205402514042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 -DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10705971717834473 s -INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.1089634895324707 s -DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=123198301956496034737157668314164997966, time:1750767086.6444924s req_ids:[8] -DEBUG 06-24 20:11:26 [manager.py:391] -ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:206.09760284423828ms total_cost_time:206.14123344421387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7479 prompt_cache_len:5151 prompt_cache_ratio:0.6887284396309667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 -DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:26 [manager.py:224] router recive req id 8 cost time 0.10989570617675781 s -INFO 06-24 20:11:26 [manager.py:68] detokenization recv req id 8 cost time 0.11193203926086426 s -DEBUG 06-24 20:11:26 [manager.py:391] Prefill Batch: batch_id=64542103636832748140191840346770710671, time:1750767086.8549757s req_ids:[8] -DEBUG 06-24 20:11:26 [manager.py:391] -ERROR 06-24 20:11:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:211.1523151397705ms total_cost_time:211.198091506958ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7480 prompt_cache_len:5151 prompt_cache_ratio:0.6886363636363636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 -DEBUG 06-24 20:11:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10768365859985352 s -INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.10941195487976074 s -DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=82058919095171448801317716285573376702, time:1750767087.0699363s req_ids:[8] -DEBUG 06-24 20:11:27 [manager.py:391] -ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:26 lightllm_req_id:8 first_token_cost:162.54186630249023ms total_cost_time:162.58573532104492ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7481 prompt_cache_len:5151 prompt_cache_ratio:0.6885443122577195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 -DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10779142379760742 s -INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.10930180549621582 s -DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=118189453328384174911612043717663907831, time:1750767087.2380433s req_ids:[8] -DEBUG 06-24 20:11:27 [manager.py:391] -DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:11:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 33309.372 tokens/s -DEBUG 06-24 20:11:27 [stats.py:37] Avg prompt tokens throughput: 33300.445 tokens/s -DEBUG 06-24 20:11:27 [stats.py:37] Avg generate tokens throughput: 8.928 tokens/s -ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:198.31442832946777ms total_cost_time:198.35853576660156ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7482 prompt_cache_len:5151 prompt_cache_ratio:0.6884522854851644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 -DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10681772232055664 s -INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.1086418628692627 s -DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=295166475996341443049158862173550238938, time:1750767087.4505174s req_ids:[8] -DEBUG 06-24 20:11:27 [manager.py:391] -ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:372.89929389953613ms total_cost_time:372.94459342956543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7483 prompt_cache_len:5151 prompt_cache_ratio:0.6883602833088334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 -DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:27 [manager.py:224] router recive req id 8 cost time 0.10837912559509277 s -INFO 06-24 20:11:27 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s -DEBUG 06-24 20:11:27 [manager.py:391] Prefill Batch: batch_id=214533361182107069727169985622831811017, time:1750767087.8181543s req_ids:[8] -DEBUG 06-24 20:11:27 [manager.py:391] -ERROR 06-24 20:11:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:204.63275909423828ms total_cost_time:204.67782020568848ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7484 prompt_cache_len:5151 prompt_cache_ratio:0.6882683057188669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 -DEBUG 06-24 20:11:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10764098167419434 s -INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s -DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=12773058526286456511775650609802154919, time:1750767088.040381s req_ids:[8] -DEBUG 06-24 20:11:28 [manager.py:391] -ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:27 lightllm_req_id:8 first_token_cost:217.33999252319336ms total_cost_time:217.38290786743164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7485 prompt_cache_len:5151 prompt_cache_ratio:0.6881763527054108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 -DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10837984085083008 s -INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.11039233207702637 s -DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=254281285714112820972224071806047020584, time:1750767088.2532353s req_ids:[8] -DEBUG 06-24 20:11:28 [manager.py:391] -ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:212.95976638793945ms total_cost_time:213.02056312561035ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:7486 prompt_cache_len:5151 prompt_cache_ratio:0.688084424258616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 -DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10738515853881836 s -INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.10944533348083496 s -DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=40799480694245296062838350882707044361, time:1750767088.4840286s req_ids:[8] -DEBUG 06-24 20:11:28 [manager.py:391] -ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:216.27068519592285ms total_cost_time:216.31360054016113ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7487 prompt_cache_len:5151 prompt_cache_ratio:0.687992520368639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 -DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s -INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.10977029800415039 s -DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=129384408894430325270825048918733684249, time:1750767088.6984239s req_ids:[8] -DEBUG 06-24 20:11:28 [manager.py:391] -ERROR 06-24 20:11:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:207.7648639678955ms total_cost_time:207.8094482421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7488 prompt_cache_len:5151 prompt_cache_ratio:0.6879006410256411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 -DEBUG 06-24 20:11:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:28 [manager.py:224] router recive req id 8 cost time 0.10870885848999023 s -INFO 06-24 20:11:28 [manager.py:68] detokenization recv req id 8 cost time 0.11074256896972656 s -DEBUG 06-24 20:11:28 [manager.py:391] Prefill Batch: batch_id=29604001605695794923323631120808993684, time:1750767088.932468s req_ids:[8] -DEBUG 06-24 20:11:28 [manager.py:391] -ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:28 lightllm_req_id:8 first_token_cost:232.32483863830566ms total_cost_time:232.36799240112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7489 prompt_cache_len:5151 prompt_cache_ratio:0.687808786219789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 -DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10772967338562012 s -INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.10995650291442871 s -DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=182831017166580633193818875579910703007, time:1750767089.1505642s req_ids:[8] -DEBUG 06-24 20:11:29 [manager.py:391] -ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:210.16335487365723ms total_cost_time:210.2067470550537ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7490 prompt_cache_len:5151 prompt_cache_ratio:0.687716955941255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 -DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10936236381530762 s -INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.11062192916870117 s -DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=15615732650749503656476020085573723261, time:1750767089.367472s req_ids:[8] -DEBUG 06-24 20:11:29 [manager.py:391] -ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:207.64398574829102ms total_cost_time:207.6883316040039ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7491 prompt_cache_len:5151 prompt_cache_ratio:0.6876251501802163 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 -DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10740160942077637 s -INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.10868215560913086 s -DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=318122953185650221205474782875383564674, time:1750767089.577351s req_ids:[8] -DEBUG 06-24 20:11:29 [manager.py:391] -ERROR 06-24 20:11:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:206.93302154541016ms total_cost_time:206.97712898254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7492 prompt_cache_len:5151 prompt_cache_ratio:0.6875333689268553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 -DEBUG 06-24 20:11:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:29 [manager.py:224] router recive req id 8 cost time 0.10736703872680664 s -INFO 06-24 20:11:29 [manager.py:68] detokenization recv req id 8 cost time 0.1088874340057373 s -DEBUG 06-24 20:11:29 [manager.py:391] Prefill Batch: batch_id=187166761824039047294628319431594801377, time:1750767089.7890775s req_ids:[8] -DEBUG 06-24 20:11:29 [manager.py:391] -ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:29 lightllm_req_id:8 first_token_cost:378.8154125213623ms total_cost_time:378.8430690765381ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:7493 prompt_cache_len:5151 prompt_cache_ratio:0.6874416121713599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 -DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.1045064926147461 s -INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.10646700859069824 s -DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=332405169542008112923217216009417027023, time:1750767090.170222s req_ids:[8] -DEBUG 06-24 20:11:30 [manager.py:391] -ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:203.68099212646484ms total_cost_time:203.7062644958496ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:7494 prompt_cache_len:5151 prompt_cache_ratio:0.6873498799039232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 -DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.1045072078704834 s -INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.1064760684967041 s -DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=303114048412922466176841591166455847461, time:1750767090.3816154s req_ids:[8] -DEBUG 06-24 20:11:30 [manager.py:391] -ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:206.03251457214355ms total_cost_time:206.05707168579102ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:7495 prompt_cache_len:5151 prompt_cache_ratio:0.6872581721147432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 -DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.10559296607971191 s -INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.1081080436706543 s -DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=26337156931803281927853578809734759500, time:1750767090.5929859s req_ids:[8] -DEBUG 06-24 20:11:30 [manager.py:391] -ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:210.1123332977295ms total_cost_time:210.13736724853516ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:7496 prompt_cache_len:5151 prompt_cache_ratio:0.6871664887940235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 -DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:30 [manager.py:224] router recive req id 8 cost time 0.10436773300170898 s -INFO 06-24 20:11:30 [manager.py:68] detokenization recv req id 8 cost time 0.10628604888916016 s -DEBUG 06-24 20:11:30 [manager.py:391] Prefill Batch: batch_id=288449249694049706828524221040679568578, time:1750767090.8113515s req_ids:[8] -DEBUG 06-24 20:11:30 [manager.py:391] -ERROR 06-24 20:11:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:213.66143226623535ms total_cost_time:213.68646621704102ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:7497 prompt_cache_len:5151 prompt_cache_ratio:0.6870748299319728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 -DEBUG 06-24 20:11:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.1052398681640625 s -INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.10728645324707031 s -DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=148742170778064327355130550573034594329, time:1750767091.0237875s req_ids:[8] -DEBUG 06-24 20:11:31 [manager.py:391] -ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:30 lightllm_req_id:8 first_token_cost:201.77960395812988ms total_cost_time:201.80583000183105ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7498 prompt_cache_len:5151 prompt_cache_ratio:0.686983195518805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 -DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10531330108642578 s -INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.1078486442565918 s -DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=106859183797288018885583155067256657616, time:1750767091.2316837s req_ids:[8] -DEBUG 06-24 20:11:31 [manager.py:391] -ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:210.30235290527344ms total_cost_time:210.33334732055664ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:7499 prompt_cache_len:5151 prompt_cache_ratio:0.6868915855447393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 -DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10526418685913086 s -INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.10725760459899902 s -DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=49529748292441829881381191867094834059, time:1750767091.4458873s req_ids:[8] -DEBUG 06-24 20:11:31 [manager.py:391] -ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:204.73289489746094ms total_cost_time:204.7586441040039ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:7500 prompt_cache_len:5151 prompt_cache_ratio:0.6868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 -DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10531926155090332 s -INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.10719704627990723 s -DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=115221810126877614795910595506306583049, time:1750767091.655888s req_ids:[8] -DEBUG 06-24 20:11:31 [manager.py:391] -ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:207.32903480529785ms total_cost_time:207.35549926757812ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7501 prompt_cache_len:5151 prompt_cache_ratio:0.6867084388748167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 -DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:31 [manager.py:224] router recive req id 8 cost time 0.10444140434265137 s -INFO 06-24 20:11:31 [manager.py:68] detokenization recv req id 8 cost time 0.1063833236694336 s -DEBUG 06-24 20:11:31 [manager.py:391] Prefill Batch: batch_id=260062700437060553398605326927876665386, time:1750767091.8686435s req_ids:[8] -DEBUG 06-24 20:11:31 [manager.py:391] -ERROR 06-24 20:11:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:174.882173538208ms total_cost_time:174.9119758605957ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:7502 prompt_cache_len:5151 prompt_cache_ratio:0.6866169021594242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 -DEBUG 06-24 20:11:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10431885719299316 s -INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10614132881164551 s -DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=211957748278428885069148269148538428227, time:1750767092.048504s req_ids:[8] -DEBUG 06-24 20:11:32 [manager.py:391] -ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:31 lightllm_req_id:8 first_token_cost:382.4605941772461ms total_cost_time:382.48753547668457ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7503 prompt_cache_len:5151 prompt_cache_ratio:0.6865253898440624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 -DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10559415817260742 s -INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10757589340209961 s -DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=295971484134711392985058761331930024081, time:1750767092.4298196s req_ids:[8] -DEBUG 06-24 20:11:32 [manager.py:391] -ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:204.78582382202148ms total_cost_time:204.80895042419434ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:7504 prompt_cache_len:5151 prompt_cache_ratio:0.6864339019189766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 -DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:32 [batch.py:51] router release req id 8 -INFO 06-24 20:11:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10549354553222656 s -INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10752511024475098 s -DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=195268353624139903076023955487563032689, time:1750767092.6445408s req_ids:[8] -DEBUG 06-24 20:11:32 [manager.py:391] -ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:208.94694328308105ms total_cost_time:208.97364616394043ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:7505 prompt_cache_len:5151 prompt_cache_ratio:0.6863424383744171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 -DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:32 [manager.py:224] router recive req id 8 cost time 0.10540103912353516 s -INFO 06-24 20:11:32 [manager.py:68] detokenization recv req id 8 cost time 0.10739970207214355 s -DEBUG 06-24 20:11:32 [manager.py:391] Prefill Batch: batch_id=112160947376472336544252143786192419154, time:1750767092.862713s req_ids:[8] -DEBUG 06-24 20:11:32 [manager.py:391] -ERROR 06-24 20:11:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:209.6409797668457ms total_cost_time:209.66553688049316ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:7506 prompt_cache_len:5151 prompt_cache_ratio:0.6862509992006395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 -DEBUG 06-24 20:11:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10442042350769043 s -INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.1063838005065918 s -DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=258279668907049344798782978390288345325, time:1750767093.0751154s req_ids:[8] -DEBUG 06-24 20:11:33 [manager.py:391] -DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:32 lightllm_req_id:8 first_token_cost:212.00823783874512ms total_cost_time:212.0344638824463ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7507 prompt_cache_len:5151 prompt_cache_ratio:0.6861595843879046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 -DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10449528694152832 s -INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.10650968551635742 s -DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=81699811517547993716394177816612957527, time:1750767093.2871106s req_ids:[8] -DEBUG 06-24 20:11:33 [manager.py:391] -ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:205.6715488433838ms total_cost_time:205.69467544555664ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:7508 prompt_cache_len:5151 prompt_cache_ratio:0.6860681939264784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 -DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10442471504211426 s -INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.10634374618530273 s -DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=190290976791288828654431990360719391443, time:1750767093.4977794s req_ids:[8] -DEBUG 06-24 20:11:33 [manager.py:391] -ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:205.75618743896484ms total_cost_time:205.7805061340332ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:7509 prompt_cache_len:5151 prompt_cache_ratio:0.685976827806632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 -DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.1049196720123291 s -INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.1069495677947998 s -DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=11793138929972073944316525303950363704, time:1750767093.7122924s req_ids:[8] -DEBUG 06-24 20:11:33 [manager.py:391] -ERROR 06-24 20:11:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:209.7482681274414ms total_cost_time:209.77020263671875ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:7510 prompt_cache_len:5151 prompt_cache_ratio:0.6858854860186419 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 -DEBUG 06-24 20:11:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:33 [manager.py:224] router recive req id 8 cost time 0.10476851463317871 s -INFO 06-24 20:11:33 [manager.py:68] detokenization recv req id 8 cost time 0.10694265365600586 s -DEBUG 06-24 20:11:33 [manager.py:391] Prefill Batch: batch_id=307708771651216880233589300116879201600, time:1750767093.9284694s req_ids:[8] -DEBUG 06-24 20:11:33 [manager.py:391] -ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:33 lightllm_req_id:8 first_token_cost:214.34330940246582ms total_cost_time:214.36572074890137ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:7511 prompt_cache_len:5151 prompt_cache_ratio:0.6857941685527893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 -DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.10448598861694336 s -INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10652923583984375 s -DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=30600763680293083363578683379073791214, time:1750767094.1431246s req_ids:[8] -DEBUG 06-24 20:11:34 [manager.py:391] -ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:212.8288745880127ms total_cost_time:212.85533905029297ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7512 prompt_cache_len:5151 prompt_cache_ratio:0.685702875399361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 -DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.10454583168029785 s -INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10652470588684082 s -DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=265964139982383590784195405489511878612, time:1750767094.3621747s req_ids:[8] -DEBUG 06-24 20:11:34 [manager.py:391] -ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:370.0528144836426ms total_cost_time:370.0745105743408ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:7513 prompt_cache_len:5151 prompt_cache_ratio:0.685611606548649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 -DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.1051325798034668 s -INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10709118843078613 s -DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=7063782699805092857792049105384080889, time:1750767094.726963s req_ids:[8] -DEBUG 06-24 20:11:34 [manager.py:391] -ERROR 06-24 20:11:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:201.98750495910645ms total_cost_time:202.0111083984375ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7514 prompt_cache_len:5151 prompt_cache_ratio:0.6855203619909502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 -DEBUG 06-24 20:11:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:34 [manager.py:224] router recive req id 8 cost time 0.10419821739196777 s -INFO 06-24 20:11:34 [manager.py:68] detokenization recv req id 8 cost time 0.10611510276794434 s -DEBUG 06-24 20:11:34 [manager.py:391] Prefill Batch: batch_id=311779049452557566036070398878018274550, time:1750767094.9387593s req_ids:[8] -DEBUG 06-24 20:11:34 [manager.py:391] -ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:34 lightllm_req_id:8 first_token_cost:206.85863494873047ms total_cost_time:206.8796157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:7515 prompt_cache_len:5151 prompt_cache_ratio:0.6854291417165669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 -DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10629749298095703 s -INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.10837244987487793 s -DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=299071190233896129594866488433455035880, time:1750767095.151677s req_ids:[8] -DEBUG 06-24 20:11:35 [manager.py:391] -ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:212.843656539917ms total_cost_time:212.88728713989258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7516 prompt_cache_len:5151 prompt_cache_ratio:0.6853379457158063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 -DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10713768005371094 s -INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.1090383529663086 s -DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=287004762905690154214341945911835012485, time:1750767095.3674567s req_ids:[8] -DEBUG 06-24 20:11:35 [manager.py:391] -ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:201.07221603393555ms total_cost_time:201.11727714538574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7517 prompt_cache_len:5151 prompt_cache_ratio:0.6852467739789809 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 -DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10812616348266602 s -INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.11048054695129395 s -DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=117829934997772956437382426471130174788, time:1750767095.5762248s req_ids:[8] -DEBUG 06-24 20:11:35 [manager.py:391] -ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:206.65311813354492ms total_cost_time:206.70580863952637ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:7518 prompt_cache_len:5151 prompt_cache_ratio:0.6851556264964086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 -DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.1086435317993164 s -INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.11118030548095703 s -DEBUG 06-24 20:11:35 [manager.py:391] Prefill Batch: batch_id=231881408363574442044207623135395878581, time:1750767095.7877162s req_ids:[8] -DEBUG 06-24 20:11:35 [manager.py:391] -ERROR 06-24 20:11:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:206.8009376525879ms total_cost_time:206.8462371826172ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7519 prompt_cache_len:5151 prompt_cache_ratio:0.6850645032584121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 -DEBUG 06-24 20:11:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:35 [manager.py:224] router recive req id 8 cost time 0.10734796524047852 s -INFO 06-24 20:11:35 [manager.py:68] detokenization recv req id 8 cost time 0.10944104194641113 s -DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=206436451016202294829374883891855345838, time:1750767096.0001028s req_ids:[8] -DEBUG 06-24 20:11:36 [manager.py:391] -ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:35 lightllm_req_id:8 first_token_cost:208.50849151611328ms total_cost_time:208.55116844177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7520 prompt_cache_len:5151 prompt_cache_ratio:0.6849734042553192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 -DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:36 [manager.py:224] router recive req id 8 cost time 0.10778212547302246 s -INFO 06-24 20:11:36 [manager.py:68] detokenization recv req id 8 cost time 0.10971713066101074 s -DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=36419375788548306324888363376126952575, time:1750767096.224387s req_ids:[8] -DEBUG 06-24 20:11:36 [manager.py:391] -ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:218.96052360534668ms total_cost_time:219.02036666870117ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:7521 prompt_cache_len:5151 prompt_cache_ratio:0.6848823294774631 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 -DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:36 [manager.py:224] router recive req id 8 cost time 0.10792851448059082 s -INFO 06-24 20:11:36 [manager.py:68] detokenization recv req id 8 cost time 0.10989141464233398 s -DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=65494866243394028663424782378883036630, time:1750767096.4399028s req_ids:[8] -DEBUG 06-24 20:11:36 [manager.py:391] -ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:208.50777626037598ms total_cost_time:208.55236053466797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7522 prompt_cache_len:5151 prompt_cache_ratio:0.6847912789151821 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 -DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:36 [manager.py:224] router recive req id 8 cost time 0.10727715492248535 s -INFO 06-24 20:11:36 [manager.py:68] detokenization recv req id 8 cost time 0.10921669006347656 s -DEBUG 06-24 20:11:36 [manager.py:391] Prefill Batch: batch_id=113294654407010233336789794421959043848, time:1750767096.6536722s req_ids:[8] -DEBUG 06-24 20:11:36 [manager.py:391] -ERROR 06-24 20:11:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:380.4318904876709ms total_cost_time:380.4774284362793ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7523 prompt_cache_len:5151 prompt_cache_ratio:0.6847002525588196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 -DEBUG 06-24 20:11:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10796737670898438 s -INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.10996437072753906 s -DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=185489745184099290425601377004170766334, time:1750767097.0340314s req_ids:[8] -DEBUG 06-24 20:11:37 [manager.py:391] -ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:36 lightllm_req_id:8 first_token_cost:198.9598274230957ms total_cost_time:199.00226593017578ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7524 prompt_cache_len:5151 prompt_cache_ratio:0.6846092503987241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 -DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10871219635009766 s -INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.11089444160461426 s -DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=285073187843206962474167598254521177838, time:1750767097.2414315s req_ids:[8] -DEBUG 06-24 20:11:37 [manager.py:391] -DEBUG 06-24 20:11:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 32266.478 tokens/s -DEBUG 06-24 20:11:37 [stats.py:37] Avg prompt tokens throughput: 32257.881 tokens/s -DEBUG 06-24 20:11:37 [stats.py:37] Avg generate tokens throughput: 8.598 tokens/s -ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:203.9337158203125ms total_cost_time:203.97686958312988ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7525 prompt_cache_len:5151 prompt_cache_ratio:0.6845182724252492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 -DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.1080472469329834 s -INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.11011815071105957 s -DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=42078760260490209465379607529668271940, time:1750767097.451762s req_ids:[8] -DEBUG 06-24 20:11:37 [manager.py:391] -ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:205.580472946167ms total_cost_time:205.6272029876709ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7526 prompt_cache_len:5151 prompt_cache_ratio:0.6844273186287536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 -DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10798072814941406 s -INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.1099848747253418 s -DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=160840032468572075283996328480700834242, time:1750767097.6638014s req_ids:[8] -DEBUG 06-24 20:11:37 [manager.py:391] -ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:206.96544647216797ms total_cost_time:207.00883865356445ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7527 prompt_cache_len:5151 prompt_cache_ratio:0.6843363889996015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 -DEBUG 06-24 20:11:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:37 [manager.py:224] router recive req id 8 cost time 0.10991859436035156 s -INFO 06-24 20:11:37 [manager.py:68] detokenization recv req id 8 cost time 0.11197042465209961 s -DEBUG 06-24 20:11:37 [manager.py:391] Prefill Batch: batch_id=301518563105008016666437752492078553157, time:1750767097.8765974s req_ids:[8] -DEBUG 06-24 20:11:37 [manager.py:391] -ERROR 06-24 20:11:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:207.67712593078613ms total_cost_time:207.72218704223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7528 prompt_cache_len:5151 prompt_cache_ratio:0.6842454835281615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 -DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10884809494018555 s -INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.11092162132263184 s -DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=68758664152356652860138478944933548676, time:1750767098.0902777s req_ids:[8] -DEBUG 06-24 20:11:38 [manager.py:391] -ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:37 lightllm_req_id:8 first_token_cost:209.43331718444824ms total_cost_time:209.4864845275879ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:7529 prompt_cache_len:5151 prompt_cache_ratio:0.6841546022048081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 -DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.11102914810180664 s -INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.11359858512878418 s -DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=218956699181932895097716661028125152524, time:1750767098.3051922s req_ids:[8] -DEBUG 06-24 20:11:38 [manager.py:391] -ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:209.76567268371582ms total_cost_time:209.8097801208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7530 prompt_cache_len:5151 prompt_cache_ratio:0.6840637450199203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 -DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10788798332214355 s -INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.1101222038269043 s -DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=61625995151615594766650670104290739021, time:1750767098.519472s req_ids:[8] -DEBUG 06-24 20:11:38 [manager.py:391] -ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:207.37743377685547ms total_cost_time:207.4110507965088ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:7531 prompt_cache_len:5151 prompt_cache_ratio:0.6839729119638827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 -DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10763835906982422 s -INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.10962843894958496 s -DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=13973968831911278706293974065734950827, time:1750767098.733651s req_ids:[8] -DEBUG 06-24 20:11:38 [manager.py:391] -DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:207.87906646728516ms total_cost_time:207.92555809020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7532 prompt_cache_len:5151 prompt_cache_ratio:0.6838821030270844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 -DEBUG 06-24 20:11:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:38 [manager.py:224] router recive req id 8 cost time 0.10722851753234863 s -INFO 06-24 20:11:38 [manager.py:68] detokenization recv req id 8 cost time 0.10943794250488281 s -DEBUG 06-24 20:11:38 [manager.py:391] Prefill Batch: batch_id=113025326024336420487456254158916326313, time:1750767098.9467137s req_ids:[8] -DEBUG 06-24 20:11:38 [manager.py:391] -ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:38 lightllm_req_id:8 first_token_cost:204.82373237609863ms total_cost_time:204.86879348754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7533 prompt_cache_len:5151 prompt_cache_ratio:0.6837913181999203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 -DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:39 [manager.py:224] router recive req id 8 cost time 0.30832552909851074 s -INFO 06-24 20:11:39 [manager.py:68] detokenization recv req id 8 cost time 0.31042003631591797 s -DEBUG 06-24 20:11:39 [manager.py:391] Prefill Batch: batch_id=216369464472762570067837417006755498285, time:1750767099.3705409s req_ids:[8] -DEBUG 06-24 20:11:39 [manager.py:391] -ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:427.26826667785645ms total_cost_time:427.31356620788574ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7534 prompt_cache_len:5151 prompt_cache_ratio:0.68370055747279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 -DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:39 [manager.py:224] router recive req id 8 cost time 0.10771679878234863 s -INFO 06-24 20:11:39 [manager.py:68] detokenization recv req id 8 cost time 0.10969042778015137 s -INFO 06-24 20:11:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:11:39 [manager.py:391] Prefill Batch: batch_id=238311092774122782516572350187902785928, time:1750767099.590941s req_ids:[8] -DEBUG 06-24 20:11:39 [manager.py:391] -ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:209.47551727294922ms total_cost_time:209.5189094543457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7535 prompt_cache_len:5151 prompt_cache_ratio:0.6836098208360982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 -DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:39 [manager.py:224] router recive req id 8 cost time 0.10835886001586914 s -INFO 06-24 20:11:39 [manager.py:68] detokenization recv req id 8 cost time 0.11038613319396973 s -DEBUG 06-24 20:11:39 [manager.py:391] Prefill Batch: batch_id=152539768811612996889041455011969975972, time:1750767099.8051589s req_ids:[8] -DEBUG 06-24 20:11:39 [manager.py:391] -ERROR 06-24 20:11:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:207.5784206390381ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7536 prompt_cache_len:5151 prompt_cache_ratio:0.6835191082802548 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 -DEBUG 06-24 20:11:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10856986045837402 s -INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.11115241050720215 s -DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=338177010296469615718055883587469689926, time:1750767100.0188751s req_ids:[8] -DEBUG 06-24 20:11:40 [manager.py:391] -ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:39 lightllm_req_id:8 first_token_cost:208.77718925476074ms total_cost_time:208.82010459899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7537 prompt_cache_len:5151 prompt_cache_ratio:0.6834284197956747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 -DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10960793495178223 s -INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.11184453964233398 s -DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=152018828105213930908298841951801194192, time:1750767100.2322216s req_ids:[8] -DEBUG 06-24 20:11:40 [manager.py:391] -DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:208.33849906921387ms total_cost_time:208.38475227355957ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7538 prompt_cache_len:5151 prompt_cache_ratio:0.683337755372778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 -DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10802984237670898 s -INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.10991406440734863 s -DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=282522593512654099620333723473342585014, time:1750767100.453024s req_ids:[8] -DEBUG 06-24 20:11:40 [manager.py:391] -ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:211.56644821166992ms total_cost_time:211.6100788116455ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7539 prompt_cache_len:5151 prompt_cache_ratio:0.6832471150019896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 -DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.10759425163269043 s -INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.10956716537475586 s -DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=47214779218334967578483089504079496138, time:1750767100.6636777s req_ids:[8] -DEBUG 06-24 20:11:40 [manager.py:391] -ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:206.36272430419922ms total_cost_time:206.4075469970703ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7540 prompt_cache_len:5151 prompt_cache_ratio:0.6831564986737401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 -DEBUG 06-24 20:11:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:40 [manager.py:224] router recive req id 8 cost time 0.1087038516998291 s -INFO 06-24 20:11:40 [manager.py:68] detokenization recv req id 8 cost time 0.11061716079711914 s -DEBUG 06-24 20:11:40 [manager.py:391] Prefill Batch: batch_id=30757906366604137415553569177955159724, time:1750767100.876402s req_ids:[8] -DEBUG 06-24 20:11:40 [manager.py:391] -ERROR 06-24 20:11:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:207.70788192749023ms total_cost_time:207.75222778320312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7541 prompt_cache_len:5151 prompt_cache_ratio:0.6830659063784644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 -DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.11059093475341797 s -INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.1126549243927002 s -DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=127068417032066405177607604255771580738, time:1750767101.0890172s req_ids:[8] -DEBUG 06-24 20:11:41 [manager.py:391] -ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:40 lightllm_req_id:8 first_token_cost:205.88278770446777ms total_cost_time:205.9159278869629ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:7542 prompt_cache_len:5151 prompt_cache_ratio:0.682975338106603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 -DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.10812687873840332 s -INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s -DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=245845754647715021886777661706995994807, time:1750767101.2992468s req_ids:[8] -DEBUG 06-24 20:11:41 [manager.py:391] -ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:207.32426643371582ms total_cost_time:207.38530158996582ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:7543 prompt_cache_len:5151 prompt_cache_ratio:0.6828847938486013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 -DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.3088817596435547 s -DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=80510929785446294805860615412067534417, time:1750767101.7020075s req_ids:[8] -DEBUG 06-24 20:11:41 [manager.py:391] -INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.31073546409606934 s -ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:391.14999771118164ms total_cost_time:391.19410514831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7544 prompt_cache_len:5151 prompt_cache_ratio:0.6827942735949099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 -DEBUG 06-24 20:11:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:41 [manager.py:224] router recive req id 8 cost time 0.10804080963134766 s -INFO 06-24 20:11:41 [manager.py:68] detokenization recv req id 8 cost time 0.10985875129699707 s -DEBUG 06-24 20:11:41 [manager.py:391] Prefill Batch: batch_id=281879656881785090116938225451205953164, time:1750767101.910918s req_ids:[8] -DEBUG 06-24 20:11:41 [manager.py:391] -ERROR 06-24 20:11:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:204.27465438842773ms total_cost_time:204.32066917419434ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7545 prompt_cache_len:5151 prompt_cache_ratio:0.6827037773359841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 -DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10866284370422363 s -INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.1106119155883789 s -DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=42259830633954016871273066399335502987, time:1750767102.1210551s req_ids:[8] -DEBUG 06-24 20:11:42 [manager.py:391] -ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:41 lightllm_req_id:8 first_token_cost:207.2775363922119ms total_cost_time:207.3209285736084ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7546 prompt_cache_len:5151 prompt_cache_ratio:0.6826133050622847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 -DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10869193077087402 s -INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.11075258255004883 s -DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=215726875174416369074625016089047465238, time:1750767102.3349273s req_ids:[8] -DEBUG 06-24 20:11:42 [manager.py:391] -ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:204.12588119506836ms total_cost_time:204.17094230651855ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7547 prompt_cache_len:5151 prompt_cache_ratio:0.6825228567642772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 -DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10876870155334473 s -INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.11080527305603027 s -DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=320313802556848223172971554747640775661, time:1750767102.550362s req_ids:[8] -DEBUG 06-24 20:11:42 [manager.py:391] -ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:215.01898765563965ms total_cost_time:215.06404876708984ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7548 prompt_cache_len:5151 prompt_cache_ratio:0.6824324324324325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 -DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10975289344787598 s -INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.11162018775939941 s -DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=313359995978491942668677638544510187266, time:1750767102.7674503s req_ids:[8] -DEBUG 06-24 20:11:42 [manager.py:391] -ERROR 06-24 20:11:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:205.8999538421631ms total_cost_time:205.9464454650879ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7549 prompt_cache_len:5151 prompt_cache_ratio:0.6823420320572261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 -DEBUG 06-24 20:11:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:42 [manager.py:224] router recive req id 8 cost time 0.10744285583496094 s -INFO 06-24 20:11:42 [manager.py:68] detokenization recv req id 8 cost time 0.10928511619567871 s -DEBUG 06-24 20:11:42 [manager.py:391] Prefill Batch: batch_id=236825976613469005315546055762565439020, time:1750767102.976845s req_ids:[8] -DEBUG 06-24 20:11:42 [manager.py:391] -ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:42 lightllm_req_id:8 first_token_cost:205.885648727417ms total_cost_time:205.92951774597168ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7550 prompt_cache_len:5151 prompt_cache_ratio:0.682251655629139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 -DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:43 [manager.py:224] router recive req id 8 cost time 0.10826611518859863 s -INFO 06-24 20:11:43 [manager.py:68] detokenization recv req id 8 cost time 0.11011958122253418 s -DEBUG 06-24 20:11:43 [manager.py:391] Prefill Batch: batch_id=88132883812326784471063410761757039423, time:1750767103.191181s req_ids:[8] -DEBUG 06-24 20:11:43 [manager.py:391] -ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:209.75089073181152ms total_cost_time:209.794282913208ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7551 prompt_cache_len:5151 prompt_cache_ratio:0.6821613031386571 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 -DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:43 [manager.py:224] router recive req id 8 cost time 0.10725665092468262 s -INFO 06-24 20:11:43 [manager.py:68] detokenization recv req id 8 cost time 0.10895681381225586 s -DEBUG 06-24 20:11:43 [manager.py:391] Prefill Batch: batch_id=66356796860217165281483759098578758348, time:1750767103.4195976s req_ids:[8] -DEBUG 06-24 20:11:43 [manager.py:391] -ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:227.25939750671387ms total_cost_time:227.30422019958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7552 prompt_cache_len:5151 prompt_cache_ratio:0.6820709745762712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 -DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:43 [manager.py:224] router recive req id 8 cost time 0.1074371337890625 s -INFO 06-24 20:11:43 [manager.py:68] detokenization recv req id 8 cost time 0.10940742492675781 s -DEBUG 06-24 20:11:43 [manager.py:391] Prefill Batch: batch_id=126705362625043862283877706389894545841, time:1750767103.6429734s req_ids:[8] -DEBUG 06-24 20:11:43 [manager.py:391] -ERROR 06-24 20:11:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:374.84216690063477ms total_cost_time:374.88842010498047ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7553 prompt_cache_len:5151 prompt_cache_ratio:0.6819806699324772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 -DEBUG 06-24 20:11:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10911941528320312 s -INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.11129951477050781 s -DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=186556218471417168246214132513990802721, time:1750767104.0130157s req_ids:[8] -DEBUG 06-24 20:11:44 [manager.py:391] -ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:43 lightllm_req_id:8 first_token_cost:200.9432315826416ms total_cost_time:200.9880542755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7554 prompt_cache_len:5151 prompt_cache_ratio:0.681890389197776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 -DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10832977294921875 s -INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.1102285385131836 s -DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=169988591339832919397211719879761852865, time:1750767104.2266111s req_ids:[8] -DEBUG 06-24 20:11:44 [manager.py:391] -ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:215.70873260498047ms total_cost_time:215.7454490661621ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:7555 prompt_cache_len:5151 prompt_cache_ratio:0.6818001323626737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 -DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.1071772575378418 s -INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.10895824432373047 s -DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=76418448813543687857113272721814682787, time:1750767104.459956s req_ids:[8] -DEBUG 06-24 20:11:44 [manager.py:391] -ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:179.49700355529785ms total_cost_time:179.52322959899902ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7556 prompt_cache_len:5151 prompt_cache_ratio:0.6817098994176813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 -DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10622930526733398 s -INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.1081995964050293 s -DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=121887375925958240352771498361143910745, time:1750767104.629618s req_ids:[8] -DEBUG 06-24 20:11:44 [manager.py:391] -ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:198.26602935791016ms total_cost_time:198.29368591308594ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:7557 prompt_cache_len:5151 prompt_cache_ratio:0.6816196903533148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 -DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:44 [manager.py:224] router recive req id 8 cost time 0.10699081420898438 s -INFO 06-24 20:11:44 [manager.py:68] detokenization recv req id 8 cost time 0.10898280143737793 s -DEBUG 06-24 20:11:44 [manager.py:391] Prefill Batch: batch_id=294791716470291628319199793820009573133, time:1750767104.8342476s req_ids:[8] -DEBUG 06-24 20:11:44 [manager.py:391] -ERROR 06-24 20:11:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:209.25021171569824ms total_cost_time:209.27762985229492ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7558 prompt_cache_len:5151 prompt_cache_ratio:0.6815295051600953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 -DEBUG 06-24 20:11:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10629391670227051 s -INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.10812830924987793 s -DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=133862789411179326842288275223241386351, time:1750767105.0505066s req_ids:[8] -DEBUG 06-24 20:11:45 [manager.py:391] -ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:44 lightllm_req_id:8 first_token_cost:209.53369140625ms total_cost_time:209.5620632171631ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:7559 prompt_cache_len:5151 prompt_cache_ratio:0.6814393438285488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 -DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10702371597290039 s -INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.10891437530517578 s -DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=179617423170358196531557431207277395617, time:1750767105.2633886s req_ids:[8] -DEBUG 06-24 20:11:45 [manager.py:391] -ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:209.18798446655273ms total_cost_time:209.21587944030762ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:7560 prompt_cache_len:5151 prompt_cache_ratio:0.6813492063492064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 -DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10806560516357422 s -INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.10988736152648926 s -DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=145768680628490351418632385925288715384, time:1750767105.4791107s req_ids:[8] -DEBUG 06-24 20:11:45 [manager.py:391] -ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:212.5697135925293ms total_cost_time:212.6142978668213ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7561 prompt_cache_len:5151 prompt_cache_ratio:0.6812590927126041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 -DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.10834622383117676 s -INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.11034440994262695 s -DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=138648251566974517984286031088452072751, time:1750767105.6970541s req_ids:[8] -DEBUG 06-24 20:11:45 [manager.py:391] -ERROR 06-24 20:11:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:211.6379737854004ms total_cost_time:211.68208122253418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7562 prompt_cache_len:5151 prompt_cache_ratio:0.6811690029092833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 -DEBUG 06-24 20:11:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:45 [manager.py:224] router recive req id 8 cost time 0.1078345775604248 s -INFO 06-24 20:11:45 [manager.py:68] detokenization recv req id 8 cost time 0.1098167896270752 s -DEBUG 06-24 20:11:45 [manager.py:391] Prefill Batch: batch_id=134283487158023206086155534826987988275, time:1750767105.9123216s req_ids:[8] -DEBUG 06-24 20:11:45 [manager.py:391] -DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:45 lightllm_req_id:8 first_token_cost:404.76417541503906ms total_cost_time:404.80685234069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7563 prompt_cache_len:5151 prompt_cache_ratio:0.6810789369297897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 -DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.10766768455505371 s -INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.10963678359985352 s -DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=325059121281060223623189607702544014239, time:1750767106.3145409s req_ids:[8] -DEBUG 06-24 20:11:46 [manager.py:391] -ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:205.06739616394043ms total_cost_time:205.11388778686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7564 prompt_cache_len:5151 prompt_cache_ratio:0.6809888947646747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 -INFO 06-24 20:11:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.10802888870239258 s -INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.11025404930114746 s -DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=186911982696731476522228026045104293876, time:1750767106.5277236s req_ids:[8] -DEBUG 06-24 20:11:46 [manager.py:391] -ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:204.0121555328369ms total_cost_time:204.0560245513916ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7565 prompt_cache_len:5151 prompt_cache_ratio:0.6808988764044944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 -DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.1082913875579834 s -INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.11031985282897949 s -DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=91165102410507712713925566915790908705, time:1750767106.7484918s req_ids:[8] -DEBUG 06-24 20:11:46 [manager.py:391] -ERROR 06-24 20:11:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:219.69151496887207ms total_cost_time:219.73562240600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7566 prompt_cache_len:5151 prompt_cache_ratio:0.6808088818398097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 -DEBUG 06-24 20:11:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:46 [manager.py:224] router recive req id 8 cost time 0.10805869102478027 s -INFO 06-24 20:11:46 [manager.py:68] detokenization recv req id 8 cost time 0.11002302169799805 s -DEBUG 06-24 20:11:46 [manager.py:391] Prefill Batch: batch_id=5552189506530361009785273682136623347, time:1750767106.9661233s req_ids:[8] -DEBUG 06-24 20:11:46 [manager.py:391] -ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:46 lightllm_req_id:8 first_token_cost:208.53281021118164ms total_cost_time:208.57596397399902ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7567 prompt_cache_len:5151 prompt_cache_ratio:0.6807189110611868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 -DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.1072683334350586 s -INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.1093595027923584 s -DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=261840028535865296972554656400637385727, time:1750767107.178723s req_ids:[8] -DEBUG 06-24 20:11:47 [manager.py:391] -ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:11:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 32398.280 tokens/s -DEBUG 06-24 20:11:47 [stats.py:37] Avg prompt tokens throughput: 32389.597 tokens/s -DEBUG 06-24 20:11:47 [stats.py:37] Avg generate tokens throughput: 8.683 tokens/s -INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:206.29239082336426ms total_cost_time:206.33578300476074ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7568 prompt_cache_len:5151 prompt_cache_ratio:0.6806289640591966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 -DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.1073460578918457 s -INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.10930418968200684 s -DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=334830395248494753189233186727094003830, time:1750767107.3902807s req_ids:[8] -DEBUG 06-24 20:11:47 [manager.py:391] -ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:206.892728805542ms total_cost_time:206.93469047546387ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7569 prompt_cache_len:5151 prompt_cache_ratio:0.6805390408244154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 -DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.1075127124786377 s -INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.10863971710205078 s -DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=3640695315095079824981478921322872342, time:1750767107.6019921s req_ids:[8] -DEBUG 06-24 20:11:47 [manager.py:391] -ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:205.45649528503418ms total_cost_time:205.49964904785156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7570 prompt_cache_len:5151 prompt_cache_ratio:0.6804491413474241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 -DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:47 [manager.py:224] router recive req id 8 cost time 0.10765957832336426 s -INFO 06-24 20:11:47 [manager.py:68] detokenization recv req id 8 cost time 0.10934925079345703 s -DEBUG 06-24 20:11:47 [manager.py:391] Prefill Batch: batch_id=259176706692511777023216105829224292808, time:1750767107.8130593s req_ids:[8] -DEBUG 06-24 20:11:47 [manager.py:391] -ERROR 06-24 20:11:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:208.45770835876465ms total_cost_time:208.50253105163574ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7571 prompt_cache_len:5151 prompt_cache_ratio:0.6803592656188087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 -DEBUG 06-24 20:11:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.10832381248474121 s -INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.11025476455688477 s -DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=269065755103071112415442395774572655020, time:1750767108.0271504s req_ids:[8] -DEBUG 06-24 20:11:48 [manager.py:391] -ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:47 lightllm_req_id:8 first_token_cost:203.34267616271973ms total_cost_time:203.3863067626953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7572 prompt_cache_len:5151 prompt_cache_ratio:0.68026941362916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 -DEBUG 06-24 20:11:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.1074068546295166 s -INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.10947895050048828 s -DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=292513892436549303524914500434221788481, time:1750767108.2419765s req_ids:[8] -DEBUG 06-24 20:11:48 [manager.py:391] -ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:212.76211738586426ms total_cost_time:212.80455589294434ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7573 prompt_cache_len:5151 prompt_cache_ratio:0.6801795853690743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 -DEBUG 06-24 20:11:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.30934977531433105 s -INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.3115513324737549 s -DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=55627887512008716128218348700463282596, time:1750767108.6658175s req_ids:[8] -DEBUG 06-24 20:11:48 [manager.py:391] -ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:425.94265937805176ms total_cost_time:425.98652839660645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7574 prompt_cache_len:5151 prompt_cache_ratio:0.6800897808291524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 -DEBUG 06-24 20:11:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:48 [manager.py:224] router recive req id 8 cost time 0.10815787315368652 s -INFO 06-24 20:11:48 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s -DEBUG 06-24 20:11:48 [manager.py:391] Prefill Batch: batch_id=159034666683320048925259149267094192233, time:1750767108.8889012s req_ids:[8] -DEBUG 06-24 20:11:48 [manager.py:391] -ERROR 06-24 20:11:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:212.13340759277344ms total_cost_time:212.17823028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7575 prompt_cache_len:5151 prompt_cache_ratio:0.68 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 -DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10863566398620605 s -INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.11065936088562012 s -DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=105745694030715500471268079531589838319, time:1750767109.1039073s req_ids:[8] -DEBUG 06-24 20:11:49 [manager.py:391] -ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:48 lightllm_req_id:8 first_token_cost:207.5655460357666ms total_cost_time:207.60846138000488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7576 prompt_cache_len:5151 prompt_cache_ratio:0.6799102428722281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 -DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10725212097167969 s -INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.1095428466796875 s -DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=282135909232614285931765542060023114506, time:1750767109.316409s req_ids:[8] -DEBUG 06-24 20:11:49 [manager.py:391] -ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:205.9769630432129ms total_cost_time:206.01940155029297ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7577 prompt_cache_len:5151 prompt_cache_ratio:0.6798205094364524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 -DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10726594924926758 s -INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.10962557792663574 s -DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=188216836749803026845342195855905657739, time:1750767109.5292065s req_ids:[8] -DEBUG 06-24 20:11:49 [manager.py:391] -ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:205.60789108276367ms total_cost_time:205.65080642700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7578 prompt_cache_len:5151 prompt_cache_ratio:0.6797307996832938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 -DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10712385177612305 s -INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.1095280647277832 s -DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=336313110730404584116123823137357753181, time:1750767109.7404168s req_ids:[8] -DEBUG 06-24 20:11:49 [manager.py:391] -ERROR 06-24 20:11:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:204.86688613891602ms total_cost_time:204.9100399017334ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7579 prompt_cache_len:5151 prompt_cache_ratio:0.6796411136033778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 -DEBUG 06-24 20:11:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:49 [manager.py:224] router recive req id 8 cost time 0.10727143287658691 s -INFO 06-24 20:11:49 [manager.py:68] detokenization recv req id 8 cost time 0.10964322090148926 s -DEBUG 06-24 20:11:49 [manager.py:391] Prefill Batch: batch_id=206080538548647739792168901607128742659, time:1750767109.9512205s req_ids:[8] -DEBUG 06-24 20:11:49 [manager.py:391] -ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:49 lightllm_req_id:8 first_token_cost:207.3988914489746ms total_cost_time:207.4434757232666ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7580 prompt_cache_len:5151 prompt_cache_ratio:0.6795514511873351 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 -DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:50 [batch.py:51] router release req id 8 -INFO 06-24 20:11:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:50 [manager.py:224] router recive req id 8 cost time 0.10822415351867676 s -INFO 06-24 20:11:50 [manager.py:68] detokenization recv req id 8 cost time 0.11013269424438477 s -DEBUG 06-24 20:11:50 [manager.py:391] Prefill Batch: batch_id=80145215812688536994433277270156737848, time:1750767110.1638384s req_ids:[8] -DEBUG 06-24 20:11:50 [manager.py:391] -ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:208.0380916595459ms total_cost_time:208.0826759338379ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7581 prompt_cache_len:5151 prompt_cache_ratio:0.6794618124258014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 -DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:50 [manager.py:224] router recive req id 8 cost time 0.10722947120666504 s -INFO 06-24 20:11:50 [manager.py:68] detokenization recv req id 8 cost time 0.10914778709411621 s -DEBUG 06-24 20:11:50 [manager.py:391] Prefill Batch: batch_id=99644866612821332998155598073138688888, time:1750767110.3906262s req_ids:[8] -DEBUG 06-24 20:11:50 [manager.py:391] -ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:222.015380859375ms total_cost_time:222.05758094787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7582 prompt_cache_len:5151 prompt_cache_ratio:0.679372197309417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 -DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:50 [manager.py:224] router recive req id 8 cost time 0.10823392868041992 s -INFO 06-24 20:11:50 [manager.py:68] detokenization recv req id 8 cost time 0.11014461517333984 s -DEBUG 06-24 20:11:50 [manager.py:391] Prefill Batch: batch_id=176151888092917954387613094052147616261, time:1750767110.6053104s req_ids:[8] -DEBUG 06-24 20:11:50 [manager.py:391] -ERROR 06-24 20:11:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:206.31098747253418ms total_cost_time:206.35604858398438ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7583 prompt_cache_len:5151 prompt_cache_ratio:0.6792826058288276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 -DEBUG 06-24 20:11:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.31011343002319336 s -INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.3121047019958496 s -DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=307182656201380843456086862858552060262, time:1750767111.0246234s req_ids:[8] -DEBUG 06-24 20:11:51 [manager.py:391] -ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:50 lightllm_req_id:8 first_token_cost:419.76261138916016ms total_cost_time:419.80624198913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7584 prompt_cache_len:5151 prompt_cache_ratio:0.6791930379746836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 -DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10803031921386719 s -INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.10997939109802246 s -DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=306002531093132578532058517711718487539, time:1750767111.2456334s req_ids:[8] -DEBUG 06-24 20:11:51 [manager.py:391] -ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:210.26873588562012ms total_cost_time:210.3121280670166ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7585 prompt_cache_len:5151 prompt_cache_ratio:0.6791034937376401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 -DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10836124420166016 s -INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.11037492752075195 s -DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=47301069928570188080534922027948646235, time:1750767111.4572446s req_ids:[8] -DEBUG 06-24 20:11:51 [manager.py:391] -ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:200.98257064819336ms total_cost_time:201.02572441101074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7586 prompt_cache_len:5151 prompt_cache_ratio:0.6790139731083575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 -DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10720705986022949 s -INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.10921883583068848 s -DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=20063513931273837474188628653112903437, time:1750767111.6647258s req_ids:[8] -DEBUG 06-24 20:11:51 [manager.py:391] -ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:206.5417766571045ms total_cost_time:206.58588409423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7587 prompt_cache_len:5151 prompt_cache_ratio:0.678924476077501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 -DEBUG 06-24 20:11:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:51 [manager.py:224] router recive req id 8 cost time 0.10907506942749023 s -INFO 06-24 20:11:51 [manager.py:68] detokenization recv req id 8 cost time 0.1110982894897461 s -DEBUG 06-24 20:11:51 [manager.py:391] Prefill Batch: batch_id=331378806526777745438802288831548262516, time:1750767111.8865666s req_ids:[8] -DEBUG 06-24 20:11:51 [manager.py:391] -ERROR 06-24 20:11:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:215.82365036010742ms total_cost_time:215.867280960083ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7588 prompt_cache_len:5151 prompt_cache_ratio:0.6788350026357406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 -DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.10786557197570801 s -INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.10974454879760742 s -DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=134589987200880221805656027682236387595, time:1750767112.0985985s req_ids:[8] -DEBUG 06-24 20:11:52 [manager.py:391] -ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:51 lightllm_req_id:8 first_token_cost:201.0822296142578ms total_cost_time:201.12919807434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7589 prompt_cache_len:5151 prompt_cache_ratio:0.6787455527737515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 -DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.10866641998291016 s -INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.11063885688781738 s -DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=265795104378361882361495694846097974291, time:1750767112.304473s req_ids:[8] -DEBUG 06-24 20:11:52 [manager.py:391] -ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:204.71692085266113ms total_cost_time:204.76412773132324ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7590 prompt_cache_len:5151 prompt_cache_ratio:0.6786561264822134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 -DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.10732603073120117 s -INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.10934734344482422 s -DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=326500589668166019911679618073762534047, time:1750767112.5148087s req_ids:[8] -DEBUG 06-24 20:11:52 [manager.py:391] -ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:205.76024055480957ms total_cost_time:205.80363273620605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7591 prompt_cache_len:5151 prompt_cache_ratio:0.6785667237518114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 -DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.1073613166809082 s -INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.10937070846557617 s -DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=225464502858199488910453500859387784207, time:1750767112.7271273s req_ids:[8] -DEBUG 06-24 20:11:52 [manager.py:391] -ERROR 06-24 20:11:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:191.90573692321777ms total_cost_time:191.95127487182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7592 prompt_cache_len:5151 prompt_cache_ratio:0.6784773445732349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 -DEBUG 06-24 20:11:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:52 [manager.py:224] router recive req id 8 cost time 0.1114962100982666 s -INFO 06-24 20:11:52 [manager.py:68] detokenization recv req id 8 cost time 0.11359786987304688 s -DEBUG 06-24 20:11:52 [manager.py:391] Prefill Batch: batch_id=177648526989387100364248466222953646722, time:1750767112.9252858s req_ids:[8] -DEBUG 06-24 20:11:52 [manager.py:391] -ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:52 lightllm_req_id:8 first_token_cost:359.00402069091797ms total_cost_time:359.12060737609863ms,out_token_counter:1 mean_per_token_cost_time: 0.11658668518066406ms prompt_token_num:7593 prompt_cache_len:5151 prompt_cache_ratio:0.678387988937179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 -INFO 06-24 20:11:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:11:53 [statics_utils.py:24] mean first cost: 230.45424360725747 ms -INFO 06-24 20:11:53 [statics_utils.py:24] mean per token cost: 0.08918244930490664 ms -DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.1082758903503418 s -INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.11029934883117676 s -DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=247120571047092908805065118308696284587, time:1750767113.2921638s req_ids:[8] -DEBUG 06-24 20:11:53 [manager.py:391] -ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:201.8113136291504ms total_cost_time:201.8566131591797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7594 prompt_cache_len:5151 prompt_cache_ratio:0.6782986568343429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 -DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.10799646377563477 s -INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.10996532440185547 s -DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=156741713883222348929408060229348054386, time:1750767113.5038817s req_ids:[8] -DEBUG 06-24 20:11:53 [manager.py:391] -ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:207.9160213470459ms total_cost_time:207.98015594482422ms,out_token_counter:1 mean_per_token_cost_time: 0.06413459777832031ms prompt_token_num:7595 prompt_cache_len:5151 prompt_cache_ratio:0.6782093482554312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 -DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:11:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.10706853866577148 s -INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.10963296890258789 s -DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=22160039114344207846908912752363658009, time:1750767113.7199109s req_ids:[8] -DEBUG 06-24 20:11:53 [manager.py:391] -ERROR 06-24 20:11:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:194.5345401763916ms total_cost_time:194.5805549621582ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7596 prompt_cache_len:5151 prompt_cache_ratio:0.6781200631911533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 -DEBUG 06-24 20:11:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:53 [manager.py:224] router recive req id 8 cost time 0.10690903663635254 s -INFO 06-24 20:11:53 [manager.py:68] detokenization recv req id 8 cost time 0.10880517959594727 s -DEBUG 06-24 20:11:53 [manager.py:391] Prefill Batch: batch_id=318069371706745337923303718372962645116, time:1750767113.918359s req_ids:[8] -DEBUG 06-24 20:11:53 [manager.py:391] -ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:53 lightllm_req_id:8 first_token_cost:205.275297164917ms total_cost_time:205.3208351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7597 prompt_cache_len:5151 prompt_cache_ratio:0.6780308016322233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 -DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10772275924682617 s -INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974574089050293 s -DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=294179280277235238044681467199038308348, time:1750767114.129605s req_ids:[8] -DEBUG 06-24 20:11:54 [manager.py:391] -ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:207.5364589691162ms total_cost_time:207.5817584991455ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7598 prompt_cache_len:5151 prompt_cache_ratio:0.6779415635693603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 -DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10748744010925293 s -INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10944557189941406 s -DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=191603652701972640168689369227027687579, time:1750767114.3409472s req_ids:[8] -DEBUG 06-24 20:11:54 [manager.py:391] -ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:208.08172225952148ms total_cost_time:208.12726020812988ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7599 prompt_cache_len:5151 prompt_cache_ratio:0.6778523489932886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 -DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10667276382446289 s -INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10857391357421875 s -DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=54371763715272126649577982863107339549, time:1750767114.5551345s req_ids:[8] -DEBUG 06-24 20:11:54 [manager.py:391] -DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:11:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:204.96869087219238ms total_cost_time:205.01303672790527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7600 prompt_cache_len:5151 prompt_cache_ratio:0.6777631578947368 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 -DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10717558860778809 s -INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10929083824157715 s -DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=262799399220093104716059103035021439543, time:1750767114.7641528s req_ids:[8] -DEBUG 06-24 20:11:54 [manager.py:391] -ERROR 06-24 20:11:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:197.54457473754883ms total_cost_time:197.59011268615723ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7601 prompt_cache_len:5151 prompt_cache_ratio:0.6776739902644389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 -DEBUG 06-24 20:11:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:54 [manager.py:224] router recive req id 8 cost time 0.10757279396057129 s -INFO 06-24 20:11:54 [manager.py:68] detokenization recv req id 8 cost time 0.10966157913208008 s -DEBUG 06-24 20:11:54 [manager.py:391] Prefill Batch: batch_id=110870420036702772096070893083722228545, time:1750767114.969113s req_ids:[8] -DEBUG 06-24 20:11:54 [manager.py:391] -ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:54 lightllm_req_id:8 first_token_cost:205.0192356109619ms total_cost_time:205.0638198852539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7602 prompt_cache_len:5151 prompt_cache_ratio:0.6775848460931334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 -DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10756349563598633 s -INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10939645767211914 s -DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=210220237649531196875526394833870996893, time:1750767115.182449s req_ids:[8] -DEBUG 06-24 20:11:55 [manager.py:391] -ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:361.6604804992676ms total_cost_time:361.7062568664551ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7603 prompt_cache_len:5151 prompt_cache_ratio:0.6774957253715639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 -DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10712647438049316 s -INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10901856422424316 s -DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=175504085763459210803417320798174422131, time:1750767115.545004s req_ids:[8] -DEBUG 06-24 20:11:55 [manager.py:391] -ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:201.48563385009766ms total_cost_time:201.53021812438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7604 prompt_cache_len:5151 prompt_cache_ratio:0.6774066280904787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 -DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10767269134521484 s -INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10972809791564941 s -DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=246909247659818984089727209313713175225, time:1750767115.7549067s req_ids:[8] -DEBUG 06-24 20:11:55 [manager.py:391] -ERROR 06-24 20:11:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:167.2191619873047ms total_cost_time:167.26303100585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7605 prompt_cache_len:5151 prompt_cache_ratio:0.6773175542406311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 -DEBUG 06-24 20:11:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:55 [manager.py:224] router recive req id 8 cost time 0.10723996162414551 s -INFO 06-24 20:11:55 [manager.py:68] detokenization recv req id 8 cost time 0.10920476913452148 s -DEBUG 06-24 20:11:55 [manager.py:391] Prefill Batch: batch_id=157032801151998973443450838199777836797, time:1750767115.926761s req_ids:[8] -DEBUG 06-24 20:11:55 [manager.py:391] -ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:55 lightllm_req_id:8 first_token_cost:202.64434814453125ms total_cost_time:202.68917083740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7606 prompt_cache_len:5151 prompt_cache_ratio:0.6772285038127794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 -DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10741066932678223 s -INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.10977816581726074 s -DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=240035920349394977824314060253533495167, time:1750767116.1468995s req_ids:[8] -DEBUG 06-24 20:11:56 [manager.py:391] -ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:220.50857543945312ms total_cost_time:220.55315971374512ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7607 prompt_cache_len:5151 prompt_cache_ratio:0.6771394767976864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 -DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.1080935001373291 s -INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.11004972457885742 s -DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=69803246132653740458522977531718677978, time:1750767116.3658953s req_ids:[8] -DEBUG 06-24 20:11:56 [manager.py:391] -ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:207.82232284545898ms total_cost_time:207.86786079406738ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7608 prompt_cache_len:5151 prompt_cache_ratio:0.6770504731861199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 -DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s -INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.11011123657226562 s -DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=161361274899183701647745323484916032548, time:1750767116.584003s req_ids:[8] -DEBUG 06-24 20:11:56 [manager.py:391] -ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:216.62139892578125ms total_cost_time:216.66669845581055ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7609 prompt_cache_len:5151 prompt_cache_ratio:0.6769614929688527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 -DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10826539993286133 s -INFO 06-24 20:11:56 [manager.py:68] detokenization recv req id 8 cost time 0.11025524139404297 s -DEBUG 06-24 20:11:56 [manager.py:391] Prefill Batch: batch_id=224161456428403044134976587197314518946, time:1750767116.7989717s req_ids:[8] -DEBUG 06-24 20:11:56 [manager.py:391] -ERROR 06-24 20:11:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:205.9805393218994ms total_cost_time:206.0248851776123ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7610 prompt_cache_len:5151 prompt_cache_ratio:0.6768725361366623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 -DEBUG 06-24 20:11:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:56 [manager.py:224] router recive req id 8 cost time 0.10729455947875977 s -INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.10940861701965332 s -DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=230468343094795497387444710483104175921, time:1750767117.0107594s req_ids:[8] -DEBUG 06-24 20:11:57 [manager.py:391] -ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:56 lightllm_req_id:8 first_token_cost:206.28714561462402ms total_cost_time:206.33244514465332ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7611 prompt_cache_len:5151 prompt_cache_ratio:0.6767836026803311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 -DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:57 [batch.py:51] router release req id 8 -INFO 06-24 20:11:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:57 [manager.py:224] router recive req id 8 cost time 0.11011266708374023 s -INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.11212778091430664 s -DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=138092859862136241658117489900640823354, time:1750767117.2220254s req_ids:[8] -DEBUG 06-24 20:11:57 [manager.py:391] -ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:11:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 33253.528 tokens/s -DEBUG 06-24 20:11:57 [stats.py:37] Avg prompt tokens throughput: 33244.768 tokens/s -DEBUG 06-24 20:11:57 [stats.py:37] Avg generate tokens throughput: 8.760 tokens/s -INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:206.5145969390869ms total_cost_time:206.5596580505371ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7612 prompt_cache_len:5151 prompt_cache_ratio:0.6766946925906463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 -DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:57 [manager.py:224] router recive req id 8 cost time 0.1081092357635498 s -INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.11008954048156738 s -DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=307895635031848941299399201531484764646, time:1750767117.4424143s req_ids:[8] -DEBUG 06-24 20:11:57 [manager.py:391] -ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:214.8268222808838ms total_cost_time:214.86926078796387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7613 prompt_cache_len:5151 prompt_cache_ratio:0.6766058058584001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 -DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:57 [manager.py:224] router recive req id 8 cost time 0.308910608291626 s -INFO 06-24 20:11:57 [manager.py:68] detokenization recv req id 8 cost time 0.3109011650085449 s -DEBUG 06-24 20:11:57 [manager.py:391] Prefill Batch: batch_id=136917777098879793241144358799949189195, time:1750767117.8635302s req_ids:[8] -DEBUG 06-24 20:11:57 [manager.py:391] -ERROR 06-24 20:11:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:420.8080768585205ms total_cost_time:420.8526611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7614 prompt_cache_len:5151 prompt_cache_ratio:0.6765169424743893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 -DEBUG 06-24 20:11:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10798335075378418 s -INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.11007046699523926 s -DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=180865609731338338075095484017174829893, time:1750767118.0840385s req_ids:[8] -DEBUG 06-24 20:11:58 [manager.py:391] -ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:57 lightllm_req_id:8 first_token_cost:210.04533767700195ms total_cost_time:210.08920669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7615 prompt_cache_len:5151 prompt_cache_ratio:0.6764281024294156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 -DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10816621780395508 s -INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.11015844345092773 s -DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=145160727108707129997868071611482595468, time:1750767118.2986574s req_ids:[8] -DEBUG 06-24 20:11:58 [manager.py:391] -ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:204.01453971862793ms total_cost_time:204.05888557434082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7616 prompt_cache_len:5151 prompt_cache_ratio:0.6763392857142857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 -DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10793232917785645 s -INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.1100320816040039 s -DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=319578231464490195713350762204816044405, time:1750767118.5067341s req_ids:[8] -DEBUG 06-24 20:11:58 [manager.py:391] -ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:204.132080078125ms total_cost_time:204.1773796081543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7617 prompt_cache_len:5151 prompt_cache_ratio:0.6762504923198109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 -DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10730981826782227 s -INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.10927486419677734 s -DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=206517665322153722171346162811302919141, time:1750767118.7168157s req_ids:[8] -DEBUG 06-24 20:11:58 [manager.py:391] -ERROR 06-24 20:11:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:205.3208351135254ms total_cost_time:205.36565780639648ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7618 prompt_cache_len:5151 prompt_cache_ratio:0.6761617222368076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 -DEBUG 06-24 20:11:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:58 [manager.py:224] router recive req id 8 cost time 0.10852384567260742 s -INFO 06-24 20:11:58 [manager.py:68] detokenization recv req id 8 cost time 0.11053061485290527 s -DEBUG 06-24 20:11:58 [manager.py:391] Prefill Batch: batch_id=182806468058710080011925634816711119674, time:1750767118.9331715s req_ids:[8] -DEBUG 06-24 20:11:58 [manager.py:391] -ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:58 lightllm_req_id:8 first_token_cost:213.98615837097168ms total_cost_time:214.03098106384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7619 prompt_cache_len:5151 prompt_cache_ratio:0.6760729754560966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 -DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s -INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.10947751998901367 s -DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=125058214398531642428446553485042514064, time:1750767119.1481361s req_ids:[8] -DEBUG 06-24 20:11:59 [manager.py:391] -ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:205.12819290161133ms total_cost_time:205.17396926879883ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7620 prompt_cache_len:5151 prompt_cache_ratio:0.675984251968504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 -DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.10817837715148926 s -INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.11013245582580566 s -DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=266503259897760547314518672065873128037, time:1750767119.3667228s req_ids:[8] -DEBUG 06-24 20:11:59 [manager.py:391] -ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:217.32163429260254ms total_cost_time:217.38266944885254ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:7621 prompt_cache_len:5151 prompt_cache_ratio:0.6758955517648603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 -DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.10742998123168945 s -INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.10950732231140137 s -DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=96431240528635776001762764699951333799, time:1750767119.5825422s req_ids:[8] -DEBUG 06-24 20:11:59 [manager.py:391] -ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:202.8813362121582ms total_cost_time:202.9252052307129ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7622 prompt_cache_len:5151 prompt_cache_ratio:0.6758068748360011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 -DEBUG 06-24 20:11:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:11:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:11:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:11:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:11:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:11:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:11:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:11:59 [manager.py:224] router recive req id 8 cost time 0.1074988842010498 s -INFO 06-24 20:11:59 [manager.py:68] detokenization recv req id 8 cost time 0.10958600044250488 s -DEBUG 06-24 20:11:59 [manager.py:391] Prefill Batch: batch_id=65827784738082953039640762525229544695, time:1750767119.7984495s req_ids:[8] -DEBUG 06-24 20:11:59 [manager.py:391] -ERROR 06-24 20:11:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:11:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:11:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:210.25753021240234ms total_cost_time:210.31785011291504ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:7623 prompt_cache_len:5151 prompt_cache_ratio:0.6757182211727666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:11:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 -DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.3103930950164795 s -INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.31258440017700195 s -DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=20141742625225259944710266829088301339, time:1750767120.2114785s req_ids:[8] -DEBUG 06-24 20:12:00 [manager.py:391] -ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:11:59 lightllm_req_id:8 first_token_cost:410.63976287841797ms total_cost_time:410.68577766418457ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7624 prompt_cache_len:5151 prompt_cache_ratio:0.6756295907660022 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 -DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.10883092880249023 s -INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.11085295677185059 s -DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=90148409981598706303780972151094956415, time:1750767120.4277575s req_ids:[8] -DEBUG 06-24 20:12:00 [manager.py:391] -ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:210.47687530517578ms total_cost_time:210.52145957946777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7625 prompt_cache_len:5151 prompt_cache_ratio:0.6755409836065573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 -DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.10720205307006836 s -INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.10912775993347168 s -INFO 06-24 20:12:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=84636828358444117249686718153847102598, time:1750767120.6417718s req_ids:[8] -DEBUG 06-24 20:12:00 [manager.py:391] -ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:209.49268341064453ms total_cost_time:209.53989028930664ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7626 prompt_cache_len:5151 prompt_cache_ratio:0.6754523996852871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 -DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:00 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s -INFO 06-24 20:12:00 [manager.py:68] detokenization recv req id 8 cost time 0.10981082916259766 s -DEBUG 06-24 20:12:00 [manager.py:391] Prefill Batch: batch_id=315821611015599099231123208784346763090, time:1750767120.8559885s req_ids:[8] -DEBUG 06-24 20:12:00 [manager.py:391] -ERROR 06-24 20:12:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:208.13274383544922ms total_cost_time:208.17828178405762ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7627 prompt_cache_len:5151 prompt_cache_ratio:0.675363838993051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 -DEBUG 06-24 20:12:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10771894454956055 s -INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.10971808433532715 s -DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=202394250765275040888192629281793333950, time:1750767121.069141s req_ids:[8] -DEBUG 06-24 20:12:01 [manager.py:391] -ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:00 lightllm_req_id:8 first_token_cost:208.41169357299805ms total_cost_time:208.45651626586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7628 prompt_cache_len:5151 prompt_cache_ratio:0.6752753015207131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 -DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10761618614196777 s -INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.10958719253540039 s -DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=154807317957381621669301476022717652428, time:1750767121.2832618s req_ids:[8] -DEBUG 06-24 20:12:01 [manager.py:391] -ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:204.00357246398926ms total_cost_time:204.04767990112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7629 prompt_cache_len:5151 prompt_cache_ratio:0.6751867872591427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 -DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10805916786193848 s -INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.11003541946411133 s -DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=29302947999594689954497796196213616164, time:1750767121.4928668s req_ids:[8] -DEBUG 06-24 20:12:01 [manager.py:391] -ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:208.71925354003906ms total_cost_time:208.76407623291016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7630 prompt_cache_len:5151 prompt_cache_ratio:0.6750982961992136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 -DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10754013061523438 s -INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.1095438003540039 s -DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=138418184835968109005212377708670425308, time:1750767121.7076094s req_ids:[8] -DEBUG 06-24 20:12:01 [manager.py:391] -ERROR 06-24 20:12:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:204.85424995422363ms total_cost_time:204.90074157714844ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7631 prompt_cache_len:5151 prompt_cache_ratio:0.6750098283318045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 -DEBUG 06-24 20:12:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:01 [manager.py:224] router recive req id 8 cost time 0.10854339599609375 s -INFO 06-24 20:12:01 [manager.py:68] detokenization recv req id 8 cost time 0.11053705215454102 s -DEBUG 06-24 20:12:01 [manager.py:391] Prefill Batch: batch_id=251343756333749997203949398879495622773, time:1750767121.9175932s req_ids:[8] -DEBUG 06-24 20:12:01 [manager.py:391] -ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:01 lightllm_req_id:8 first_token_cost:207.7937126159668ms total_cost_time:207.83662796020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7632 prompt_cache_len:5151 prompt_cache_ratio:0.6749213836477987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 -DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.1076517105102539 s -INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.1096038818359375 s -DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=105915941757655465419408989286278896191, time:1750767122.1317947s req_ids:[8] -DEBUG 06-24 20:12:02 [manager.py:391] -ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:359.1322898864746ms total_cost_time:359.1773509979248ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7633 prompt_cache_len:5151 prompt_cache_ratio:0.6748329621380846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 -DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.1076362133026123 s -INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.1096043586730957 s -DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=292466292324528418532920161671775069269, time:1750767122.4912443s req_ids:[8] -DEBUG 06-24 20:12:02 [manager.py:391] -ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:198.82726669311523ms total_cost_time:198.8697052001953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7634 prompt_cache_len:5151 prompt_cache_ratio:0.6747445637935552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 -DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.10865116119384766 s -INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.11061263084411621 s -DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=251356633320645433807908352290572514401, time:1750767122.7008545s req_ids:[8] -DEBUG 06-24 20:12:02 [manager.py:391] -ERROR 06-24 20:12:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:206.88843727111816ms total_cost_time:206.94279670715332ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:7635 prompt_cache_len:5151 prompt_cache_ratio:0.674656188605108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 -DEBUG 06-24 20:12:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:02 [manager.py:224] router recive req id 8 cost time 0.10819005966186523 s -INFO 06-24 20:12:02 [manager.py:68] detokenization recv req id 8 cost time 0.11022734642028809 s -DEBUG 06-24 20:12:02 [manager.py:391] Prefill Batch: batch_id=228999123688250112746799481675882906663, time:1750767122.915243s req_ids:[8] -DEBUG 06-24 20:12:02 [manager.py:391] -ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:02 lightllm_req_id:8 first_token_cost:213.81688117980957ms total_cost_time:213.86098861694336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7636 prompt_cache_len:5151 prompt_cache_ratio:0.6745678365636459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 -DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10842037200927734 s -INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.11054277420043945 s -DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=222487139147151296085300585172215025662, time:1750767123.129836s req_ids:[8] -DEBUG 06-24 20:12:03 [manager.py:391] -DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:197.14689254760742ms total_cost_time:197.1902847290039ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7637 prompt_cache_len:5151 prompt_cache_ratio:0.6744795076600759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 -DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10741138458251953 s -INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.10955047607421875 s -DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=201567458647512280476042599797373910715, time:1750767123.3382328s req_ids:[8] -DEBUG 06-24 20:12:03 [manager.py:391] -ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:206.2857151031494ms total_cost_time:206.3305377960205ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7638 prompt_cache_len:5151 prompt_cache_ratio:0.6743912018853103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 -DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10785889625549316 s -INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.10998892784118652 s -DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=59434207318227779999270853848336715770, time:1750767123.5517983s req_ids:[8] -DEBUG 06-24 20:12:03 [manager.py:391] -ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:205.5060863494873ms total_cost_time:205.5490016937256ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7639 prompt_cache_len:5151 prompt_cache_ratio:0.6743029192302658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 -DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.10821413993835449 s -INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s -DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=17700833921834285883276378318189027449, time:1750767123.7606602s req_ids:[8] -DEBUG 06-24 20:12:03 [manager.py:391] -ERROR 06-24 20:12:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:203.60040664672852ms total_cost_time:203.6449909210205ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7640 prompt_cache_len:5151 prompt_cache_ratio:0.6742146596858639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 -DEBUG 06-24 20:12:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:03 [manager.py:224] router recive req id 8 cost time 0.1075904369354248 s -INFO 06-24 20:12:03 [manager.py:68] detokenization recv req id 8 cost time 0.11019110679626465 s -DEBUG 06-24 20:12:03 [manager.py:391] Prefill Batch: batch_id=286149354074145959431110702610805139441, time:1750767123.9697864s req_ids:[8] -DEBUG 06-24 20:12:03 [manager.py:391] -ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:03 lightllm_req_id:8 first_token_cost:202.79431343078613ms total_cost_time:202.8367519378662ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7641 prompt_cache_len:5151 prompt_cache_ratio:0.6741264232430311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 -DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:04 [batch.py:51] router release req id 8 -INFO 06-24 20:12:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10877656936645508 s -INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.11089229583740234 s -DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=272417992743359392322715543009164785209, time:1750767124.1806426s req_ids:[8] -DEBUG 06-24 20:12:04 [manager.py:391] -ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:206.83598518371582ms total_cost_time:206.88199996948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7642 prompt_cache_len:5151 prompt_cache_ratio:0.6740382098926982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 -DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10679244995117188 s -INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.1085963249206543 s -DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=45954975210836917177091388612510632464, time:1750767124.3916602s req_ids:[8] -DEBUG 06-24 20:12:04 [manager.py:391] -ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:343.69969367980957ms total_cost_time:343.72615814208984ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:7643 prompt_cache_len:5151 prompt_cache_ratio:0.6739500196258014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 -DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10479950904846191 s -INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.10668444633483887 s -DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=293091237295508245519746781460967692957, time:1750767124.737938s req_ids:[8] -DEBUG 06-24 20:12:04 [manager.py:391] -ERROR 06-24 20:12:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:186.66505813598633ms total_cost_time:186.692476272583ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7644 prompt_cache_len:5151 prompt_cache_ratio:0.673861852433281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 -DEBUG 06-24 20:12:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:04 [manager.py:224] router recive req id 8 cost time 0.10714316368103027 s -INFO 06-24 20:12:04 [manager.py:68] detokenization recv req id 8 cost time 0.10845398902893066 s -DEBUG 06-24 20:12:04 [manager.py:391] Prefill Batch: batch_id=337865258369175204784794031953478057126, time:1750767124.9302933s req_ids:[8] -DEBUG 06-24 20:12:04 [manager.py:391] -ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:04 lightllm_req_id:8 first_token_cost:203.68027687072754ms total_cost_time:203.72509956359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7645 prompt_cache_len:5151 prompt_cache_ratio:0.6737737083060824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 -DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10900211334228516 s -INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.1111598014831543 s -DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=194631978790029223599206045056590355021, time:1750767125.1390193s req_ids:[8] -DEBUG 06-24 20:12:05 [manager.py:391] -ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:205.50990104675293ms total_cost_time:205.55567741394043ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7646 prompt_cache_len:5151 prompt_cache_ratio:0.6736855872351556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 -DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10923027992248535 s -INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063909530639648 s -DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=308001038756916865905015348005747319476, time:1750767125.350848s req_ids:[8] -DEBUG 06-24 20:12:05 [manager.py:391] -ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:210.052490234375ms total_cost_time:210.0963592529297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7647 prompt_cache_len:5151 prompt_cache_ratio:0.6735974892114555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 -DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10803365707397461 s -INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.10923099517822266 s -DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=127060175205574686085944506796575274085, time:1750767125.5634408s req_ids:[8] -DEBUG 06-24 20:12:05 [manager.py:391] -ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:207.98707008361816ms total_cost_time:208.03260803222656ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7648 prompt_cache_len:5151 prompt_cache_ratio:0.6735094142259415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 -DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10728073120117188 s -INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.10932040214538574 s -DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=213957160664273498099397797333643204139, time:1750767125.7777202s req_ids:[8] -DEBUG 06-24 20:12:05 [manager.py:391] -ERROR 06-24 20:12:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:207.83090591430664ms total_cost_time:207.87644386291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7649 prompt_cache_len:5151 prompt_cache_ratio:0.6734213622695777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 -DEBUG 06-24 20:12:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:05 [manager.py:224] router recive req id 8 cost time 0.10753250122070312 s -INFO 06-24 20:12:05 [manager.py:68] detokenization recv req id 8 cost time 0.10967326164245605 s -DEBUG 06-24 20:12:05 [manager.py:391] Prefill Batch: batch_id=13061883082707811418455032452779136763, time:1750767125.9915092s req_ids:[8] -DEBUG 06-24 20:12:05 [manager.py:391] -ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:05 lightllm_req_id:8 first_token_cost:204.47278022766113ms total_cost_time:204.51736450195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7650 prompt_cache_len:5151 prompt_cache_ratio:0.6733333333333333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 -DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:06 [manager.py:224] router recive req id 8 cost time 0.10706090927124023 s -INFO 06-24 20:12:06 [manager.py:68] detokenization recv req id 8 cost time 0.10899138450622559 s -DEBUG 06-24 20:12:06 [manager.py:391] Prefill Batch: batch_id=256030095120020264533364286603217459869, time:1750767126.206879s req_ids:[8] -DEBUG 06-24 20:12:06 [manager.py:391] -ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:209.35964584350586ms total_cost_time:209.40446853637695ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7651 prompt_cache_len:5151 prompt_cache_ratio:0.6732453274081819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 -DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:06 [manager.py:224] router recive req id 8 cost time 0.10760188102722168 s -INFO 06-24 20:12:06 [manager.py:68] detokenization recv req id 8 cost time 0.10959053039550781 s -DEBUG 06-24 20:12:06 [manager.py:391] Prefill Batch: batch_id=77668998125334631537902299014074144355, time:1750767126.41798s req_ids:[8] -DEBUG 06-24 20:12:06 [manager.py:391] -ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:206.41303062438965ms total_cost_time:206.43925666809082ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7652 prompt_cache_len:5151 prompt_cache_ratio:0.6731573444851019 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 -DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:06 [manager.py:224] router recive req id 8 cost time 0.1050422191619873 s -INFO 06-24 20:12:06 [manager.py:68] detokenization recv req id 8 cost time 0.10705232620239258 s -DEBUG 06-24 20:12:06 [manager.py:391] Prefill Batch: batch_id=99039694711886695764364046387431626529, time:1750767126.634283s req_ids:[8] -DEBUG 06-24 20:12:06 [manager.py:391] -ERROR 06-24 20:12:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:210.53385734558105ms total_cost_time:210.5538845062256ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:7653 prompt_cache_len:5151 prompt_cache_ratio:0.6730693845550765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 -DEBUG 06-24 20:12:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.31047582626342773 s -INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.3126389980316162 s -DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=157716120506166927391665716434283762813, time:1750767127.058635s req_ids:[8] -DEBUG 06-24 20:12:07 [manager.py:391] -ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:06 lightllm_req_id:8 first_token_cost:425.78625679016113ms total_cost_time:425.83179473876953ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7654 prompt_cache_len:5151 prompt_cache_ratio:0.6729814476090933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 -DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10737419128417969 s -INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s -DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=107131856995001008665159195517854166514, time:1750767127.2778528s req_ids:[8] -DEBUG 06-24 20:12:07 [manager.py:391] -ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:12:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 32650.535 tokens/s -DEBUG 06-24 20:12:07 [stats.py:37] Avg prompt tokens throughput: 32641.983 tokens/s -DEBUG 06-24 20:12:07 [stats.py:37] Avg generate tokens throughput: 8.552 tokens/s -INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:209.21754837036133ms total_cost_time:209.26427841186523ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7655 prompt_cache_len:5151 prompt_cache_ratio:0.672893533638145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 -DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10753488540649414 s -INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.10886025428771973 s -DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=64058647012786924515775172497937836678, time:1750767127.4913738s req_ids:[8] -DEBUG 06-24 20:12:07 [manager.py:391] -ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:203.23824882507324ms total_cost_time:203.28211784362793ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7656 prompt_cache_len:5151 prompt_cache_ratio:0.6728056426332288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 -DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:07 [batch.py:51] router release req id 8 -INFO 06-24 20:12:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10768985748291016 s -INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.10971474647521973 s -DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=310553461627661597263289366800552946098, time:1750767127.7002566s req_ids:[8] -DEBUG 06-24 20:12:07 [manager.py:391] -ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:204.05912399291992ms total_cost_time:204.08129692077637ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7657 prompt_cache_len:5151 prompt_cache_ratio:0.6727177745853468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 -DEBUG 06-24 20:12:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:07 [manager.py:224] router recive req id 8 cost time 0.10602450370788574 s -INFO 06-24 20:12:07 [manager.py:68] detokenization recv req id 8 cost time 0.10721468925476074 s -DEBUG 06-24 20:12:07 [manager.py:391] Prefill Batch: batch_id=17602799060686228765768248359399999234, time:1750767127.9095516s req_ids:[8] -DEBUG 06-24 20:12:07 [manager.py:391] -ERROR 06-24 20:12:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:07 lightllm_req_id:8 first_token_cost:212.22567558288574ms total_cost_time:212.26882934570312ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7658 prompt_cache_len:5151 prompt_cache_ratio:0.6726299294855054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 -DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.1080634593963623 s -INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.11032533645629883 s -DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=168358859685662161197510960453472682439, time:1750767128.1249661s req_ids:[8] -DEBUG 06-24 20:12:08 [manager.py:391] -ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:212.3239040374756ms total_cost_time:212.3696804046631ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7659 prompt_cache_len:5151 prompt_cache_ratio:0.672542107324716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 -DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10715460777282715 s -INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10905909538269043 s -DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=177548496048096521164403243664799188756, time:1750767128.3417008s req_ids:[8] -DEBUG 06-24 20:12:08 [manager.py:391] -ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:164.5498275756836ms total_cost_time:164.59298133850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7660 prompt_cache_len:5151 prompt_cache_ratio:0.6724543080939948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 -DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10836148262023926 s -INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10955214500427246 s -DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=99453293519797623160995451792571262350, time:1750767128.5119169s req_ids:[8] -DEBUG 06-24 20:12:08 [manager.py:391] -ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:204.50091361999512ms total_cost_time:204.5290470123291ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:7661 prompt_cache_len:5151 prompt_cache_ratio:0.6723665317843623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 -DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10555791854858398 s -INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10746908187866211 s -DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=277848746396038846409600628438658162249, time:1750767128.7217531s req_ids:[8] -DEBUG 06-24 20:12:08 [manager.py:391] -ERROR 06-24 20:12:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:204.00071144104004ms total_cost_time:204.0231227874756ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:7662 prompt_cache_len:5151 prompt_cache_ratio:0.6722787783868441 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 -DEBUG 06-24 20:12:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:08 [manager.py:224] router recive req id 8 cost time 0.10603570938110352 s -INFO 06-24 20:12:08 [manager.py:68] detokenization recv req id 8 cost time 0.10817313194274902 s -DEBUG 06-24 20:12:08 [manager.py:391] Prefill Batch: batch_id=14829012066021962959277559814804674120, time:1750767128.931706s req_ids:[8] -DEBUG 06-24 20:12:08 [manager.py:391] -ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:08 lightllm_req_id:8 first_token_cost:211.59863471984863ms total_cost_time:211.64178848266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7663 prompt_cache_len:5151 prompt_cache_ratio:0.6721910478924703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 -DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.30983853340148926 s -INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.3120276927947998 s -DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=148716461557774016630600037119486825080, time:1750767129.3526757s req_ids:[8] -DEBUG 06-24 20:12:09 [manager.py:391] -ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:414.55531120300293ms total_cost_time:414.60251808166504ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7664 prompt_cache_len:5151 prompt_cache_ratio:0.6721033402922756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 -DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.10713529586791992 s -INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.1086127758026123 s -DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=102939958713478243095372399820662756318, time:1750767129.5714526s req_ids:[8] -DEBUG 06-24 20:12:09 [manager.py:391] -ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:182.1737289428711ms total_cost_time:182.21640586853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7665 prompt_cache_len:5151 prompt_cache_ratio:0.6720156555772994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 -DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.10753655433654785 s -INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.10950660705566406 s -DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=267231234815814363712815229519286381196, time:1750767129.75272s req_ids:[8] -DEBUG 06-24 20:12:09 [manager.py:391] -ERROR 06-24 20:12:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:195.14942169189453ms total_cost_time:195.19519805908203ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7666 prompt_cache_len:5151 prompt_cache_ratio:0.6719279937385859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 -DEBUG 06-24 20:12:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:09 [manager.py:224] router recive req id 8 cost time 0.10741758346557617 s -INFO 06-24 20:12:09 [manager.py:68] detokenization recv req id 8 cost time 0.10999536514282227 s -DEBUG 06-24 20:12:09 [manager.py:391] Prefill Batch: batch_id=170566789586579577405896794004604895525, time:1750767129.9568775s req_ids:[8] -DEBUG 06-24 20:12:09 [manager.py:391] -ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:09 lightllm_req_id:8 first_token_cost:206.69150352478027ms total_cost_time:206.73775672912598ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7667 prompt_cache_len:5151 prompt_cache_ratio:0.6718403547671841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 -DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.10818648338317871 s -INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.11029410362243652 s -DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=4218543985224994588653419520821831190, time:1750767130.1686184s req_ids:[8] -DEBUG 06-24 20:12:10 [manager.py:391] -ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:211.56597137451172ms total_cost_time:211.61365509033203ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7668 prompt_cache_len:5151 prompt_cache_ratio:0.6717527386541471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 -DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.1087486743927002 s -INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s -DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=133130163678548114148860185451658996365, time:1750767130.384927s req_ids:[8] -DEBUG 06-24 20:12:10 [manager.py:391] -ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:164.26324844360352ms total_cost_time:164.3667221069336ms,out_token_counter:1 mean_per_token_cost_time: 0.10347366333007812ms prompt_token_num:7669 prompt_cache_len:5151 prompt_cache_ratio:0.6716651453905333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 -DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.1072995662689209 s -INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.10864686965942383 s -DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=158684119483686090009265898146213015132, time:1750767130.557638s req_ids:[8] -DEBUG 06-24 20:12:10 [manager.py:391] -ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:200.8810043334961ms total_cost_time:200.9272575378418ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7670 prompt_cache_len:5151 prompt_cache_ratio:0.6715775749674054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 -DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.10821962356567383 s -INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.10956811904907227 s -DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=235949549936487960068486758082145082128, time:1750767130.7630873s req_ids:[8] -DEBUG 06-24 20:12:10 [manager.py:391] -ERROR 06-24 20:12:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:206.0713768005371ms total_cost_time:206.1171531677246ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7671 prompt_cache_len:5151 prompt_cache_ratio:0.6714900273758311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 -DEBUG 06-24 20:12:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:10 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s -INFO 06-24 20:12:10 [manager.py:68] detokenization recv req id 8 cost time 0.11005616188049316 s -DEBUG 06-24 20:12:10 [manager.py:391] Prefill Batch: batch_id=184004980944507935669551138043796048011, time:1750767130.9766197s req_ids:[8] -DEBUG 06-24 20:12:10 [manager.py:391] -ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:10 lightllm_req_id:8 first_token_cost:208.72735977172852ms total_cost_time:208.7728977203369ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7672 prompt_cache_len:5151 prompt_cache_ratio:0.6714025026068822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 -DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:11 [batch.py:51] router release req id 8 -INFO 06-24 20:12:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10737156867980957 s -INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.1095740795135498 s -DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=229408986791501043074889080270158216643, time:1750767131.18818s req_ids:[8] -DEBUG 06-24 20:12:11 [manager.py:391] -ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:376.5888214111328ms total_cost_time:376.6167163848877ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:7673 prompt_cache_len:5151 prompt_cache_ratio:0.6713150006516356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 -DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10551071166992188 s -INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.10756969451904297 s -DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=206072783669655577257534812506327375500, time:1750767131.5705001s req_ids:[8] -DEBUG 06-24 20:12:11 [manager.py:391] -DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:11 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:204.9424648284912ms total_cost_time:204.9863338470459ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7674 prompt_cache_len:5151 prompt_cache_ratio:0.6712275215011728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 -DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10942339897155762 s -INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.11137795448303223 s -DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=130202095981086571737380495270041754654, time:1750767131.7792754s req_ids:[8] -DEBUG 06-24 20:12:11 [manager.py:391] -ERROR 06-24 20:12:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:206.22587203979492ms total_cost_time:206.2702178955078ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7675 prompt_cache_len:5151 prompt_cache_ratio:0.6711400651465798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 -DEBUG 06-24 20:12:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:11 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s -INFO 06-24 20:12:11 [manager.py:68] detokenization recv req id 8 cost time 0.11037397384643555 s -DEBUG 06-24 20:12:11 [manager.py:391] Prefill Batch: batch_id=121103546113517516161015130683696696512, time:1750767131.9910636s req_ids:[8] -DEBUG 06-24 20:12:11 [manager.py:391] -ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:11 lightllm_req_id:8 first_token_cost:206.82287216186523ms total_cost_time:206.86769485473633ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7676 prompt_cache_len:5151 prompt_cache_ratio:0.6710526315789473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 -DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10876703262329102 s -INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.11081337928771973 s -DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=232835002047103208372584284681637773844, time:1750767132.2038646s req_ids:[8] -DEBUG 06-24 20:12:12 [manager.py:391] -ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:208.64462852478027ms total_cost_time:208.69064331054688ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7677 prompt_cache_len:5151 prompt_cache_ratio:0.6709652207893708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 -DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10805964469909668 s -INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.11000943183898926 s -DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=98536507745594849840746674159457798075, time:1750767132.4180126s req_ids:[8] -DEBUG 06-24 20:12:12 [manager.py:391] -ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:210.20746231079102ms total_cost_time:210.25419235229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7678 prompt_cache_len:5151 prompt_cache_ratio:0.6708778327689503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 -DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10870909690856934 s -INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.1106724739074707 s -DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=46269012733314900325424536999501968109, time:1750767132.63283s req_ids:[8] -DEBUG 06-24 20:12:12 [manager.py:391] -ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:201.16472244262695ms total_cost_time:201.20811462402344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7679 prompt_cache_len:5151 prompt_cache_ratio:0.6707904675087902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 -DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:12 [manager.py:224] router recive req id 8 cost time 0.10878801345825195 s -INFO 06-24 20:12:12 [manager.py:68] detokenization recv req id 8 cost time 0.11066699028015137 s -DEBUG 06-24 20:12:12 [manager.py:391] Prefill Batch: batch_id=21370404631229234843900594685176242183, time:1750767132.8399675s req_ids:[8] -DEBUG 06-24 20:12:12 [manager.py:391] -ERROR 06-24 20:12:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:205.3818702697754ms total_cost_time:205.4276466369629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7680 prompt_cache_len:5151 prompt_cache_ratio:0.670703125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 -DEBUG 06-24 20:12:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.10796570777893066 s -INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.11004972457885742 s -DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=7960748760137472986901542241846735523, time:1750767133.0535378s req_ids:[8] -DEBUG 06-24 20:12:13 [manager.py:391] -ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:12 lightllm_req_id:8 first_token_cost:212.77689933776855ms total_cost_time:212.82172203063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7681 prompt_cache_len:5151 prompt_cache_ratio:0.6706158052336936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 -DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.1076669692993164 s -INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.1096959114074707 s -DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=253761194687533787300615710681600011697, time:1750767133.267918s req_ids:[8] -DEBUG 06-24 20:12:13 [manager.py:391] -ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:200.47783851623535ms total_cost_time:200.52266120910645ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7682 prompt_cache_len:5151 prompt_cache_ratio:0.6705285082009893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 -DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.10928630828857422 s -INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.11135578155517578 s -DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=320017400431626443251218785640864818234, time:1750767133.4771929s req_ids:[8] -DEBUG 06-24 20:12:13 [manager.py:391] -ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:380.59544563293457ms total_cost_time:380.64050674438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7683 prompt_cache_len:5151 prompt_cache_ratio:0.6704412338930106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 -DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:13 [manager.py:224] router recive req id 8 cost time 0.10880637168884277 s -INFO 06-24 20:12:13 [manager.py:68] detokenization recv req id 8 cost time 0.11016345024108887 s -DEBUG 06-24 20:12:13 [manager.py:391] Prefill Batch: batch_id=39548840540517503657260333740346636623, time:1750767133.860254s req_ids:[8] -DEBUG 06-24 20:12:13 [manager.py:391] -ERROR 06-24 20:12:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:202.54278182983398ms total_cost_time:202.58784294128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7684 prompt_cache_len:5151 prompt_cache_ratio:0.6703539823008849 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 -DEBUG 06-24 20:12:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:14 [manager.py:224] router recive req id 8 cost time 0.10897946357727051 s -INFO 06-24 20:12:14 [manager.py:68] detokenization recv req id 8 cost time 0.11058378219604492 s -DEBUG 06-24 20:12:14 [manager.py:391] Prefill Batch: batch_id=218850475855003369037865176542166040708, time:1750767134.0789475s req_ids:[8] -DEBUG 06-24 20:12:14 [manager.py:391] -ERROR 06-24 20:12:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:13 lightllm_req_id:8 first_token_cost:213.79446983337402ms total_cost_time:213.8388156890869ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7685 prompt_cache_len:5151 prompt_cache_ratio:0.6702667534157449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:14 [manager.py:106] timer detokenize batch cost time 405.87592124938965 ms -INFO 06-24 20:12:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 -DEBUG 06-24 20:12:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:14 [batch.py:51] router release req id 8 -INFO 06-24 20:12:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:14 [manager.py:224] router recive req id 8 cost time 0.10557174682617188 s -INFO 06-24 20:12:14 [manager.py:68] detokenization recv req id 8 cost time 0.10760688781738281 s -DEBUG 06-24 20:12:14 [manager.py:391] Prefill Batch: batch_id=310767629775045048878038319543665394526, time:1750767134.699976s req_ids:[8] -DEBUG 06-24 20:12:14 [manager.py:391] -ERROR 06-24 20:12:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 first_token_cost:213.48023414611816ms total_cost_time:213.50765228271484ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7686 prompt_cache_len:5151 prompt_cache_ratio:0.6701795472287275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 -DEBUG 06-24 20:12:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:14 [manager.py:224] router recive req id 8 cost time 0.10688400268554688 s -INFO 06-24 20:12:14 [manager.py:68] detokenization recv req id 8 cost time 0.10906672477722168 s -DEBUG 06-24 20:12:14 [manager.py:391] Prefill Batch: batch_id=180654914250473417241057671004655026311, time:1750767134.9177299s req_ids:[8] -DEBUG 06-24 20:12:14 [manager.py:391] -ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:14 lightllm_req_id:8 first_token_cost:211.99798583984375ms total_cost_time:212.04400062561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7687 prompt_cache_len:5151 prompt_cache_ratio:0.6700923637309744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 -DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10992050170898438 s -INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11266183853149414 s -DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=133785205864378210191389362876875571011, time:1750767135.1301816s req_ids:[8] -DEBUG 06-24 20:12:15 [manager.py:391] -ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:206.6192626953125ms total_cost_time:206.6652774810791ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7688 prompt_cache_len:5151 prompt_cache_ratio:0.6700052029136316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 -DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10812544822692871 s -INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11023354530334473 s -DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=261013022283967441306782793176483601438, time:1750767135.3546646s req_ids:[8] -DEBUG 06-24 20:12:15 [manager.py:391] -ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:216.59374237060547ms total_cost_time:216.63856506347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7689 prompt_cache_len:5151 prompt_cache_ratio:0.6699180647678502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 -DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10773611068725586 s -INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.1096184253692627 s -DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=246975148987580736987947544367019044143, time:1750767135.565465s req_ids:[8] -DEBUG 06-24 20:12:15 [manager.py:391] -ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:207.74054527282715ms total_cost_time:207.78703689575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7690 prompt_cache_len:5151 prompt_cache_ratio:0.6698309492847855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 -DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10917162895202637 s -INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11115694046020508 s -DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=230622756890826987753124950719996624279, time:1750767135.7781389s req_ids:[8] -DEBUG 06-24 20:12:15 [manager.py:391] -ERROR 06-24 20:12:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:206.06660842895508ms total_cost_time:206.11214637756348ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7691 prompt_cache_len:5151 prompt_cache_ratio:0.6697438564555974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 -DEBUG 06-24 20:12:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:15 [manager.py:224] router recive req id 8 cost time 0.10837030410766602 s -INFO 06-24 20:12:15 [manager.py:68] detokenization recv req id 8 cost time 0.11024665832519531 s -DEBUG 06-24 20:12:15 [manager.py:391] Prefill Batch: batch_id=234589154091863519003143456498446367496, time:1750767135.9904277s req_ids:[8] -DEBUG 06-24 20:12:15 [manager.py:391] -ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:15 lightllm_req_id:8 first_token_cost:200.2401351928711ms total_cost_time:200.2854347229004ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7692 prompt_cache_len:5151 prompt_cache_ratio:0.6696567862714509 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 -DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:16 [manager.py:224] router recive req id 8 cost time 0.1098325252532959 s -INFO 06-24 20:12:16 [manager.py:68] detokenization recv req id 8 cost time 0.11195087432861328 s -DEBUG 06-24 20:12:16 [manager.py:391] Prefill Batch: batch_id=192520768049687979528146644289515731775, time:1750767136.2001874s req_ids:[8] -DEBUG 06-24 20:12:16 [manager.py:391] -ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:209.7034454345703ms total_cost_time:209.75041389465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7693 prompt_cache_len:5151 prompt_cache_ratio:0.6695697387235149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 -INFO 06-24 20:12:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:16 [manager.py:224] router recive req id 8 cost time 0.31085777282714844 s -INFO 06-24 20:12:16 [manager.py:68] detokenization recv req id 8 cost time 0.3128995895385742 s -DEBUG 06-24 20:12:16 [manager.py:391] Prefill Batch: batch_id=308741460652673925749285390957746583388, time:1750767136.6203775s req_ids:[8] -DEBUG 06-24 20:12:16 [manager.py:391] -ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:419.5363521575928ms total_cost_time:419.58045959472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7694 prompt_cache_len:5151 prompt_cache_ratio:0.6694827138029633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 -DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:16 [manager.py:224] router recive req id 8 cost time 0.10721468925476074 s -INFO 06-24 20:12:16 [manager.py:68] detokenization recv req id 8 cost time 0.10908961296081543 s -DEBUG 06-24 20:12:16 [manager.py:391] Prefill Batch: batch_id=264058196261211212583347829409656135085, time:1750767136.839376s req_ids:[8] -DEBUG 06-24 20:12:16 [manager.py:391] -ERROR 06-24 20:12:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:209.17272567749023ms total_cost_time:209.21850204467773ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7695 prompt_cache_len:5151 prompt_cache_ratio:0.6693957115009747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 -DEBUG 06-24 20:12:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10735630989074707 s -INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.10927581787109375 s -DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=150606508140671605389677804750096182026, time:1750767137.0517712s req_ids:[8] -DEBUG 06-24 20:12:17 [manager.py:391] -ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:16 lightllm_req_id:8 first_token_cost:203.34935188293457ms total_cost_time:203.39250564575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7696 prompt_cache_len:5151 prompt_cache_ratio:0.6693087318087318 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 -DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10749530792236328 s -INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.10958266258239746 s -DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=268460407826695096637248993225957928846, time:1750767137.2606175s req_ids:[8] -DEBUG 06-24 20:12:17 [manager.py:391] -ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:205.9767246246338ms total_cost_time:206.01940155029297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7697 prompt_cache_len:5151 prompt_cache_ratio:0.6692217747174224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 -DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10762858390808105 s -INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.10958552360534668 s -DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=296825565490880410815270795500349032426, time:1750767137.4707358s req_ids:[8] -DEBUG 06-24 20:12:17 [manager.py:391] -DEBUG 06-24 20:12:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 32668.766 tokens/s -DEBUG 06-24 20:12:17 [stats.py:37] Avg prompt tokens throughput: 32660.357 tokens/s -DEBUG 06-24 20:12:17 [stats.py:37] Avg generate tokens throughput: 8.410 tokens/s -ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:205.07574081420898ms total_cost_time:205.11817932128906ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7698 prompt_cache_len:5151 prompt_cache_ratio:0.6691348402182385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 -DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10821342468261719 s -INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.11021924018859863 s -DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=252351336283897341091789067156162653979, time:1750767137.6836154s req_ids:[8] -DEBUG 06-24 20:12:17 [manager.py:391] -ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:204.44965362548828ms total_cost_time:204.49519157409668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7699 prompt_cache_len:5151 prompt_cache_ratio:0.6690479283023769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 -DEBUG 06-24 20:12:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:17 [manager.py:224] router recive req id 8 cost time 0.10996174812316895 s -INFO 06-24 20:12:17 [manager.py:68] detokenization recv req id 8 cost time 0.11191773414611816 s -DEBUG 06-24 20:12:17 [manager.py:391] Prefill Batch: batch_id=119558839409258222953942602133175770821, time:1750767137.8948174s req_ids:[8] -DEBUG 06-24 20:12:17 [manager.py:391] -ERROR 06-24 20:12:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:208.46891403198242ms total_cost_time:208.5118293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7700 prompt_cache_len:5151 prompt_cache_ratio:0.668961038961039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 -DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.10960507392883301 s -INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.11094236373901367 s -DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=261640368785005968656837102395284740508, time:1750767138.1096537s req_ids:[8] -DEBUG 06-24 20:12:18 [manager.py:391] -ERROR 06-24 20:12:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:17 lightllm_req_id:8 first_token_cost:210.07251739501953ms total_cost_time:210.11829376220703ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7701 prompt_cache_len:5151 prompt_cache_ratio:0.6688741721854304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 -DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.10744738578796387 s -INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.10941767692565918 s -DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=205171123453491851650481666414018619079, time:1750767138.3314912s req_ids:[8] -DEBUG 06-24 20:12:18 [manager.py:391] -ERROR 06-24 20:12:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 first_token_cost:215.61193466186523ms total_cost_time:215.65699577331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7702 prompt_cache_len:5151 prompt_cache_ratio:0.6687873279667619 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 -DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.10752558708190918 s -INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.1094503402709961 s -DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=132006334873363944205563863252642714645, time:1750767138.5454113s req_ids:[8] -DEBUG 06-24 20:12:18 [manager.py:391] -ERROR 06-24 20:12:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 first_token_cost:205.25312423706055ms total_cost_time:205.29913902282715ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7703 prompt_cache_len:5151 prompt_cache_ratio:0.6687005062962482 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 -DEBUG 06-24 20:12:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:18 [manager.py:224] router recive req id 8 cost time 0.30987095832824707 s -INFO 06-24 20:12:18 [manager.py:68] detokenization recv req id 8 cost time 0.31196093559265137 s -DEBUG 06-24 20:12:18 [manager.py:391] Prefill Batch: batch_id=240661992237263133656300588294017518136, time:1750767138.9639163s req_ids:[8] -DEBUG 06-24 20:12:18 [manager.py:391] -ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:18 lightllm_req_id:8 first_token_cost:420.123815536499ms total_cost_time:420.17626762390137ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:7704 prompt_cache_len:5151 prompt_cache_ratio:0.668613707165109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 -DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10863184928894043 s -INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.11055755615234375 s -DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=302495773197347062738671692802458817917, time:1750767139.179904s req_ids:[8] -DEBUG 06-24 20:12:19 [manager.py:391] -ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:205.29627799987793ms total_cost_time:205.34133911132812ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7705 prompt_cache_len:5151 prompt_cache_ratio:0.6685269305645685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 -DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10806751251220703 s -INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.10954713821411133 s -DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=12897941176936508498152477794098447621, time:1750767139.3916538s req_ids:[8] -DEBUG 06-24 20:12:19 [manager.py:391] -ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:168.67685317993164ms total_cost_time:168.71976852416992ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7706 prompt_cache_len:5151 prompt_cache_ratio:0.6684401764858552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 -DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.1074838638305664 s -INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.10933780670166016 s -DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=180589104954624473766497886043115660844, time:1750767139.5653625s req_ids:[8] -DEBUG 06-24 20:12:19 [manager.py:391] -ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:198.52781295776367ms total_cost_time:198.56977462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7707 prompt_cache_len:5151 prompt_cache_ratio:0.6683534449202024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 -DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10831499099731445 s -INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.1103978157043457 s -DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=230211948437807052932271922465470976024, time:1750767139.7691188s req_ids:[8] -DEBUG 06-24 20:12:19 [manager.py:391] -ERROR 06-24 20:12:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:201.15923881530762ms total_cost_time:201.20620727539062ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7708 prompt_cache_len:5151 prompt_cache_ratio:0.668266735858848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 -DEBUG 06-24 20:12:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:19 [manager.py:224] router recive req id 8 cost time 0.10763978958129883 s -INFO 06-24 20:12:19 [manager.py:68] detokenization recv req id 8 cost time 0.1094048023223877 s -DEBUG 06-24 20:12:19 [manager.py:391] Prefill Batch: batch_id=332211914892233867929005211017714547721, time:1750767139.9764647s req_ids:[8] -DEBUG 06-24 20:12:19 [manager.py:391] -ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:19 lightllm_req_id:8 first_token_cost:204.09536361694336ms total_cost_time:204.14090156555176ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7709 prompt_cache_len:5151 prompt_cache_ratio:0.6681800492930341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 -DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.10725879669189453 s -INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.10940909385681152 s -DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=149564056068108416224796820993696048419, time:1750767140.1865942s req_ids:[8] -DEBUG 06-24 20:12:20 [manager.py:391] -ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:207.3678970336914ms total_cost_time:207.4124813079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7710 prompt_cache_len:5151 prompt_cache_ratio:0.6680933852140077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 -DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.10846519470214844 s -INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.11063575744628906 s -DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=229798759804972537941191959631736045427, time:1750767140.3997397s req_ids:[8] -DEBUG 06-24 20:12:20 [manager.py:391] -DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:207.30018615722656ms total_cost_time:207.35979080200195ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7711 prompt_cache_len:5151 prompt_cache_ratio:0.6680067436130204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 -DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.1108713150024414 s -INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.11307740211486816 s -DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=241340085920983260504230117320264374974, time:1750767140.613256s req_ids:[8] -DEBUG 06-24 20:12:20 [manager.py:391] -ERROR 06-24 20:12:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:204.65588569641113ms total_cost_time:204.70094680786133ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7712 prompt_cache_len:5151 prompt_cache_ratio:0.6679201244813278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 -DEBUG 06-24 20:12:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:20 [manager.py:224] router recive req id 8 cost time 0.10714578628540039 s -INFO 06-24 20:12:20 [manager.py:68] detokenization recv req id 8 cost time 0.10882282257080078 s -DEBUG 06-24 20:12:20 [manager.py:391] Prefill Batch: batch_id=248639011001771450836073963359765139428, time:1750767140.8201125s req_ids:[8] -DEBUG 06-24 20:12:20 [manager.py:391] -ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:20 lightllm_req_id:8 first_token_cost:338.34385871887207ms total_cost_time:338.38868141174316ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7713 prompt_cache_len:5151 prompt_cache_ratio:0.6678335278101906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 -DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10871028900146484 s -INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.11068439483642578 s -DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=144120124511206351885462145130861118921, time:1750767141.1623044s req_ids:[8] -DEBUG 06-24 20:12:21 [manager.py:391] -ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:195.56093215942383ms total_cost_time:195.59407234191895ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:7714 prompt_cache_len:5151 prompt_cache_ratio:0.6677469535908738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 -DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10694384574890137 s -INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.10892462730407715 s -DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=208840935093514335426674163763014891756, time:1750767141.366234s req_ids:[8] -DEBUG 06-24 20:12:21 [manager.py:391] -ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:201.84326171875ms total_cost_time:201.887845993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7715 prompt_cache_len:5151 prompt_cache_ratio:0.6676604018146468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 -DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10913610458374023 s -INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.11117959022521973 s -DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=285504034967834411243699953463678513157, time:1750767141.575837s req_ids:[8] -DEBUG 06-24 20:12:21 [manager.py:391] -ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:205.338716506958ms total_cost_time:205.3830623626709ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7716 prompt_cache_len:5151 prompt_cache_ratio:0.6675738724727839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 -DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:21 [batch.py:51] router release req id 8 -INFO 06-24 20:12:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.10834789276123047 s -INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.11042308807373047 s -DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=174464156689809822244051483718707031920, time:1750767141.7871873s req_ids:[8] -DEBUG 06-24 20:12:21 [manager.py:391] -ERROR 06-24 20:12:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:206.1479091644287ms total_cost_time:206.1920166015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7717 prompt_cache_len:5151 prompt_cache_ratio:0.6674873655565634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 -DEBUG 06-24 20:12:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:21 [manager.py:224] router recive req id 8 cost time 0.1105356216430664 s -INFO 06-24 20:12:21 [manager.py:68] detokenization recv req id 8 cost time 0.1125020980834961 s -DEBUG 06-24 20:12:21 [manager.py:391] Prefill Batch: batch_id=26686273271799114541198605393210678670, time:1750767141.9996161s req_ids:[8] -DEBUG 06-24 20:12:21 [manager.py:391] -ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:21 lightllm_req_id:8 first_token_cost:206.5446376800537ms total_cost_time:206.5901756286621ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7718 prompt_cache_len:5151 prompt_cache_ratio:0.6674008810572687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 -DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10787439346313477 s -INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.10995125770568848 s -DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=148557605442988544374942217686259573352, time:1750767142.2116847s req_ids:[8] -DEBUG 06-24 20:12:22 [manager.py:391] -ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:203.79018783569336ms total_cost_time:203.83405685424805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7719 prompt_cache_len:5151 prompt_cache_ratio:0.6673144189661874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 -DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10939669609069824 s -INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.11067676544189453 s -DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=126733585052604101675858451730984682268, time:1750767142.4187284s req_ids:[8] -DEBUG 06-24 20:12:22 [manager.py:391] -ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:204.7417163848877ms total_cost_time:204.7865390777588ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7720 prompt_cache_len:5151 prompt_cache_ratio:0.6672279792746114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 -DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10797595977783203 s -INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.10955095291137695 s -DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=86298599241888146922444121195539820547, time:1750767142.630999s req_ids:[8] -DEBUG 06-24 20:12:22 [manager.py:391] -ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:205.76953887939453ms total_cost_time:205.7936191558838ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:7721 prompt_cache_len:5151 prompt_cache_ratio:0.6671415619738376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 -DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:22 [manager.py:224] router recive req id 8 cost time 0.10894560813903809 s -INFO 06-24 20:12:22 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s -DEBUG 06-24 20:12:22 [manager.py:391] Prefill Batch: batch_id=9505275690836169138130013725301276836, time:1750767142.8449075s req_ids:[8] -DEBUG 06-24 20:12:22 [manager.py:391] -ERROR 06-24 20:12:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:206.2525749206543ms total_cost_time:206.2971591949463ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7722 prompt_cache_len:5151 prompt_cache_ratio:0.6670551670551671 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 -DEBUG 06-24 20:12:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.1074216365814209 s -INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.10885047912597656 s -DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=259952194685564632073835239786928160486, time:1750767143.0609267s req_ids:[8] -DEBUG 06-24 20:12:23 [manager.py:391] -INFO 06-24 20:12:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:12:23 [statics_utils.py:24] mean first cost: 230.12276527711066 ms -INFO 06-24 20:12:23 [statics_utils.py:24] mean per token cost: 0.08743647966695665 ms -ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:22 lightllm_req_id:8 first_token_cost:385.7707977294922ms total_cost_time:385.8165740966797ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7723 prompt_cache_len:5151 prompt_cache_ratio:0.6669687945099054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 -DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.10878491401672363 s -INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.11073446273803711 s -DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=169273566874592448716949563324576011950, time:1750767143.4428437s req_ids:[8] -DEBUG 06-24 20:12:23 [manager.py:391] -ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:201.88164710998535ms total_cost_time:201.92527770996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7724 prompt_cache_len:5151 prompt_cache_ratio:0.666882444329363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 -DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.10835146903991699 s -INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.10967659950256348 s -DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=245600736928763126660960009319826389092, time:1750767143.652081s req_ids:[8] -DEBUG 06-24 20:12:23 [manager.py:391] -ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:204.73551750183105ms total_cost_time:204.78391647338867ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:7725 prompt_cache_len:5151 prompt_cache_ratio:0.6667961165048544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 -DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:23 [manager.py:224] router recive req id 8 cost time 0.10957837104797363 s -INFO 06-24 20:12:23 [manager.py:68] detokenization recv req id 8 cost time 0.11088252067565918 s -DEBUG 06-24 20:12:23 [manager.py:391] Prefill Batch: batch_id=79715703878086391527046643944288800918, time:1750767143.864175s req_ids:[8] -DEBUG 06-24 20:12:23 [manager.py:391] -ERROR 06-24 20:12:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:206.32624626159668ms total_cost_time:206.39467239379883ms,out_token_counter:1 mean_per_token_cost_time: 0.06842613220214844ms prompt_token_num:7726 prompt_cache_len:5151 prompt_cache_ratio:0.6667098110276987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 -DEBUG 06-24 20:12:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10866737365722656 s -INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11005306243896484 s -DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=176965807144545710478674132240663513675, time:1750767144.076287s req_ids:[8] -DEBUG 06-24 20:12:24 [manager.py:391] -ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:23 lightllm_req_id:8 first_token_cost:205.83248138427734ms total_cost_time:205.87825775146484ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7727 prompt_cache_len:5151 prompt_cache_ratio:0.6666235278892196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 -DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s -INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.1110234260559082 s -DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=279049199565125134186941059577293749666, time:1750767144.287476s req_ids:[8] -DEBUG 06-24 20:12:24 [manager.py:391] -ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:204.16998863220215ms total_cost_time:204.21481132507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7728 prompt_cache_len:5151 prompt_cache_ratio:0.6665372670807453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 -DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s -INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11019587516784668 s -DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=143448438893372300916084685539371458777, time:1750767144.4993286s req_ids:[8] -DEBUG 06-24 20:12:24 [manager.py:391] -ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:206.5730094909668ms total_cost_time:206.6185474395752ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7729 prompt_cache_len:5151 prompt_cache_ratio:0.6664510285936085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 -DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:24 [batch.py:51] router release req id 8 -DEBUG 06-24 20:12:24 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:24 [manager.py:283] -DEBUG 06-24 20:12:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:24 [manager.py:284] -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.11134552955627441 s -INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11343932151794434 s -DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=88999850516548248954728590606272073656, time:1750767144.7137043s req_ids:[8] -DEBUG 06-24 20:12:24 [manager.py:391] -ERROR 06-24 20:12:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:209.00869369506836ms total_cost_time:209.05804634094238ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:7730 prompt_cache_len:5151 prompt_cache_ratio:0.6663648124191461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 -DEBUG 06-24 20:12:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:24 [manager.py:224] router recive req id 8 cost time 0.10801482200622559 s -INFO 06-24 20:12:24 [manager.py:68] detokenization recv req id 8 cost time 0.11011385917663574 s -DEBUG 06-24 20:12:24 [manager.py:391] Prefill Batch: batch_id=4682599189205451547800957209215186178, time:1750767144.9265842s req_ids:[8] -DEBUG 06-24 20:12:24 [manager.py:391] -ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:24 lightllm_req_id:8 first_token_cost:205.91330528259277ms total_cost_time:205.97243309020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:7731 prompt_cache_len:5151 prompt_cache_ratio:0.6662786185487001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 -DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.10938358306884766 s -INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.11140847206115723 s -DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=335529262386809074759956794830698865310, time:1750767145.1380873s req_ids:[8] -DEBUG 06-24 20:12:25 [manager.py:391] -ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:206.95090293884277ms total_cost_time:206.99620246887207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7732 prompt_cache_len:5151 prompt_cache_ratio:0.6661924469736161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 -DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.10702657699584961 s -INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.1083521842956543 s -DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=224084969392382344109697068137149680109, time:1750767145.364397s req_ids:[8] -DEBUG 06-24 20:12:25 [manager.py:391] -ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:220.31092643737793ms total_cost_time:220.3371524810791ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7733 prompt_cache_len:5151 prompt_cache_ratio:0.6661062976852451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 -DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.3071630001068115 s -INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.3090696334838867 s -DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=313963277010549572786154352066598814281, time:1750767145.7864816s req_ids:[8] -DEBUG 06-24 20:12:25 [manager.py:391] -ERROR 06-24 20:12:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:373.1262683868408ms total_cost_time:373.1725215911865ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7734 prompt_cache_len:5151 prompt_cache_ratio:0.6660201706749418 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 -DEBUG 06-24 20:12:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:25 [manager.py:224] router recive req id 8 cost time 0.1074821949005127 s -INFO 06-24 20:12:25 [manager.py:68] detokenization recv req id 8 cost time 0.1094813346862793 s -DEBUG 06-24 20:12:25 [manager.py:391] Prefill Batch: batch_id=215035803195593453922325933986124224781, time:1750767145.9510503s req_ids:[8] -DEBUG 06-24 20:12:25 [manager.py:391] -ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:25 lightllm_req_id:8 first_token_cost:192.97003746032715ms total_cost_time:193.01652908325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7735 prompt_cache_len:5151 prompt_cache_ratio:0.6659340659340659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 -DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10812568664550781 s -INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.11020541191101074 s -DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=224545698279231772613087379879108321542, time:1750767146.154076s req_ids:[8] -DEBUG 06-24 20:12:26 [manager.py:391] -ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:203.97043228149414ms total_cost_time:204.01644706726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7736 prompt_cache_len:5151 prompt_cache_ratio:0.6658479834539814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 -DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10738110542297363 s -INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s -DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=11158265035190960162037378457957103384, time:1750767146.3628578s req_ids:[8] -DEBUG 06-24 20:12:26 [manager.py:391] -ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:204.79083061218262ms total_cost_time:204.83636856079102ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7737 prompt_cache_len:5151 prompt_cache_ratio:0.6657619232260567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 -DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10793471336364746 s -INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.10927271842956543 s -DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=7269796909582166636603784686954752189, time:1750767146.575159s req_ids:[8] -DEBUG 06-24 20:12:26 [manager.py:391] -ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:204.76150512695312ms total_cost_time:204.80680465698242ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7738 prompt_cache_len:5151 prompt_cache_ratio:0.6656758852416645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 -DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10741353034973145 s -INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s -DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=316266665537376092956464913607339418477, time:1750767146.7871783s req_ids:[8] -DEBUG 06-24 20:12:26 [manager.py:391] -ERROR 06-24 20:12:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:203.2008171081543ms total_cost_time:203.2465934753418ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7739 prompt_cache_len:5151 prompt_cache_ratio:0.6655898694921825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 -DEBUG 06-24 20:12:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:26 [manager.py:224] router recive req id 8 cost time 0.10771036148071289 s -INFO 06-24 20:12:26 [manager.py:68] detokenization recv req id 8 cost time 0.10985040664672852 s -DEBUG 06-24 20:12:26 [manager.py:391] Prefill Batch: batch_id=126411687016370857589073125108218174947, time:1750767146.9985607s req_ids:[8] -DEBUG 06-24 20:12:26 [manager.py:391] -ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:26 lightllm_req_id:8 first_token_cost:208.42885971069336ms total_cost_time:208.47511291503906ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7740 prompt_cache_len:5151 prompt_cache_ratio:0.6655038759689923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 -DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:27 [manager.py:224] router recive req id 8 cost time 0.1076359748840332 s -INFO 06-24 20:12:27 [manager.py:68] detokenization recv req id 8 cost time 0.10892248153686523 s -DEBUG 06-24 20:12:27 [manager.py:391] Prefill Batch: batch_id=17402659547059232416830831147864305262, time:1750767147.2127025s req_ids:[8] -DEBUG 06-24 20:12:27 [manager.py:391] -ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:210.89673042297363ms total_cost_time:210.94083786010742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7741 prompt_cache_len:5151 prompt_cache_ratio:0.6654179046634802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 -DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:27 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s -INFO 06-24 20:12:27 [manager.py:68] detokenization recv req id 8 cost time 0.10982751846313477 s -DEBUG 06-24 20:12:27 [manager.py:391] Prefill Batch: batch_id=22603639528273711943121190585010557166, time:1750767147.4257321s req_ids:[8] -DEBUG 06-24 20:12:27 [manager.py:391] -DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:12:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 33851.921 tokens/s -DEBUG 06-24 20:12:27 [stats.py:37] Avg prompt tokens throughput: 33843.054 tokens/s -DEBUG 06-24 20:12:27 [stats.py:37] Avg generate tokens throughput: 8.867 tokens/s -INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:203.8273811340332ms total_cost_time:203.8719654083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7742 prompt_cache_len:5151 prompt_cache_ratio:0.665331955567037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 -DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:27 [manager.py:224] router recive req id 8 cost time 0.10746526718139648 s -INFO 06-24 20:12:27 [manager.py:68] detokenization recv req id 8 cost time 0.1094365119934082 s -DEBUG 06-24 20:12:27 [manager.py:391] Prefill Batch: batch_id=133782690042444740467600107048354996762, time:1750767147.635496s req_ids:[8] -DEBUG 06-24 20:12:27 [manager.py:391] -ERROR 06-24 20:12:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:204.47468757629395ms total_cost_time:204.51903343200684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7743 prompt_cache_len:5151 prompt_cache_ratio:0.6652460286710578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 -DEBUG 06-24 20:12:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.3091590404510498 s -INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.31121110916137695 s -DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=130245217265561447649736767689436308715, time:1750767148.0482833s req_ids:[8] -DEBUG 06-24 20:12:28 [manager.py:391] -ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:27 lightllm_req_id:8 first_token_cost:412.13274002075195ms total_cost_time:412.17708587646484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7744 prompt_cache_len:5151 prompt_cache_ratio:0.6651601239669421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 -DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.10729217529296875 s -INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.10968756675720215 s -DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=157193302478056662613369267669173880795, time:1750767148.2702112s req_ids:[8] -DEBUG 06-24 20:12:28 [manager.py:391] -ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:214.6470546722412ms total_cost_time:214.6921157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7745 prompt_cache_len:5151 prompt_cache_ratio:0.6650742414460943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 -DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.10764551162719727 s -INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.10962462425231934 s -DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=1704467605426256655837637487786126806, time:1750767148.4832547s req_ids:[8] -DEBUG 06-24 20:12:28 [manager.py:391] -ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:205.3995132446289ms total_cost_time:205.4445743560791ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7746 prompt_cache_len:5151 prompt_cache_ratio:0.6649883810999225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 -DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.1076817512512207 s -INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.10967016220092773 s -DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=6434365157893586288999586432007946816, time:1750767148.6931903s req_ids:[8] -DEBUG 06-24 20:12:28 [manager.py:391] -ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:216.2320613861084ms total_cost_time:216.2761688232422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7747 prompt_cache_len:5151 prompt_cache_ratio:0.66490254291984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 -DEBUG 06-24 20:12:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:28 [manager.py:224] router recive req id 8 cost time 0.10848093032836914 s -INFO 06-24 20:12:28 [manager.py:68] detokenization recv req id 8 cost time 0.11082887649536133 s -DEBUG 06-24 20:12:28 [manager.py:391] Prefill Batch: batch_id=253371907630901544703125539572568389707, time:1750767148.921384s req_ids:[8] -DEBUG 06-24 20:12:28 [manager.py:391] -DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:28 lightllm_req_id:8 first_token_cost:200.87790489196777ms total_cost_time:200.92153549194336ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7748 prompt_cache_len:5151 prompt_cache_ratio:0.6648167268972638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 -DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10836338996887207 s -INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.1104283332824707 s -DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=294760328714458150904326144520642097709, time:1750767149.1180046s req_ids:[8] -DEBUG 06-24 20:12:29 [manager.py:391] -ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:200.00171661376953ms total_cost_time:200.02079010009766ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:7749 prompt_cache_len:5151 prompt_cache_ratio:0.664730933023616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 -DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10668683052062988 s -INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.10857534408569336 s -DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=182197754750730165326018836625523681415, time:1750767149.3273027s req_ids:[8] -DEBUG 06-24 20:12:29 [manager.py:391] -ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:207.200288772583ms total_cost_time:207.26323127746582ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:7750 prompt_cache_len:5151 prompt_cache_ratio:0.6646451612903226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 -DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10785531997680664 s -INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.1098642349243164 s -DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=325301622988792319447016016894741091713, time:1750767149.5395696s req_ids:[8] -DEBUG 06-24 20:12:29 [manager.py:391] -ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:206.7737579345703ms total_cost_time:206.8178653717041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7751 prompt_cache_len:5151 prompt_cache_ratio:0.6645594116888144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 -DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10776567459106445 s -INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.10900306701660156 s -DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=90143514973807723855892648219674968306, time:1750767149.7529294s req_ids:[8] -DEBUG 06-24 20:12:29 [manager.py:391] -ERROR 06-24 20:12:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:201.24292373657227ms total_cost_time:201.28726959228516ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7752 prompt_cache_len:5151 prompt_cache_ratio:0.6644736842105263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 -DEBUG 06-24 20:12:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:29 [manager.py:224] router recive req id 8 cost time 0.10864615440368652 s -INFO 06-24 20:12:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005449295043945 s -DEBUG 06-24 20:12:29 [manager.py:391] Prefill Batch: batch_id=27666137300216555166214798127999450547, time:1750767149.9574769s req_ids:[8] -DEBUG 06-24 20:12:29 [manager.py:391] -ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:29 lightllm_req_id:8 first_token_cost:366.65892601013184ms total_cost_time:366.7027950286865ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7753 prompt_cache_len:5151 prompt_cache_ratio:0.6643879788468979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 -DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10871458053588867 s -INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.11086153984069824 s -DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=240265696609253545021720102462037834059, time:1750767150.325432s req_ids:[8] -DEBUG 06-24 20:12:30 [manager.py:391] -ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:195.845365524292ms total_cost_time:195.88875770568848ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7754 prompt_cache_len:5151 prompt_cache_ratio:0.6643022955893733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 -DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10811996459960938 s -INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.10926270484924316 s -DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=72977870460239377973795779508969227193, time:1750767150.5287335s req_ids:[8] -DEBUG 06-24 20:12:30 [manager.py:391] -ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:201.33256912231445ms total_cost_time:201.37739181518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7755 prompt_cache_len:5151 prompt_cache_ratio:0.6642166344294004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 -DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10946059226989746 s -INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.11147427558898926 s -DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=199341405449302213243985967462397805978, time:1750767150.7394466s req_ids:[8] -DEBUG 06-24 20:12:30 [manager.py:391] -ERROR 06-24 20:12:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:208.88400077819824ms total_cost_time:208.92786979675293ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7756 prompt_cache_len:5151 prompt_cache_ratio:0.6641309953584322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 -DEBUG 06-24 20:12:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:30 [manager.py:224] router recive req id 8 cost time 0.10812044143676758 s -INFO 06-24 20:12:30 [manager.py:68] detokenization recv req id 8 cost time 0.11000251770019531 s -DEBUG 06-24 20:12:30 [manager.py:391] Prefill Batch: batch_id=187695372359525935045798531110759600929, time:1750767150.9533205s req_ids:[8] -DEBUG 06-24 20:12:30 [manager.py:391] -ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:30 lightllm_req_id:8 first_token_cost:204.31184768676758ms total_cost_time:204.36429977416992ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:7757 prompt_cache_len:5151 prompt_cache_ratio:0.6640453783679258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 -DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10888242721557617 s -INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11018800735473633 s -DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=135950551838856282707149986398196395175, time:1750767151.1627944s req_ids:[8] -DEBUG 06-24 20:12:31 [manager.py:391] -ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.44195175170898ms total_cost_time:205.48725128173828ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7758 prompt_cache_len:5151 prompt_cache_ratio:0.6639597834493426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 -DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.1094508171081543 s -INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11143136024475098 s -DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=181811441888191325594103853691638711493, time:1750767151.3747268s req_ids:[8] -DEBUG 06-24 20:12:31 [manager.py:391] -ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.73997497558594ms total_cost_time:205.78527450561523ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7759 prompt_cache_len:5151 prompt_cache_ratio:0.6638742105941488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 -DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10869550704956055 s -INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s -DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=53156924217010073971313872542626871691, time:1750767151.5875807s req_ids:[8] -DEBUG 06-24 20:12:31 [manager.py:391] -ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.97505569458008ms total_cost_time:206.01940155029297ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7760 prompt_cache_len:5151 prompt_cache_ratio:0.6637886597938144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 -DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10748052597045898 s -INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.10946774482727051 s -DEBUG 06-24 20:12:31 [manager.py:391] Prefill Batch: batch_id=22357898136628181695422432697323554644, time:1750767151.7974072s req_ids:[8] -DEBUG 06-24 20:12:31 [manager.py:391] -ERROR 06-24 20:12:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:202.7263641357422ms total_cost_time:202.76999473571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7761 prompt_cache_len:5151 prompt_cache_ratio:0.6637031310398145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 -DEBUG 06-24 20:12:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:31 [manager.py:224] router recive req id 8 cost time 0.10904574394226074 s -INFO 06-24 20:12:31 [manager.py:68] detokenization recv req id 8 cost time 0.11095952987670898 s -DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=257084032899589677067335827102224164149, time:1750767152.0070477s req_ids:[8] -DEBUG 06-24 20:12:32 [manager.py:391] -ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:31 lightllm_req_id:8 first_token_cost:205.49678802490234ms total_cost_time:205.53922653198242ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7762 prompt_cache_len:5151 prompt_cache_ratio:0.6636176243236279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 -DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:32 [manager.py:224] router recive req id 8 cost time 0.1072990894317627 s -INFO 06-24 20:12:32 [manager.py:68] detokenization recv req id 8 cost time 0.10857272148132324 s -DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=180878602208322106040086715830079363267, time:1750767152.219672s req_ids:[8] -DEBUG 06-24 20:12:32 [manager.py:391] -ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:378.05914878845215ms total_cost_time:378.10468673706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7763 prompt_cache_len:5151 prompt_cache_ratio:0.6635321396367384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 -DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:32 [manager.py:224] router recive req id 8 cost time 0.10700464248657227 s -INFO 06-24 20:12:32 [manager.py:68] detokenization recv req id 8 cost time 0.1089162826538086 s -DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=210890120535656640545125953461980127135, time:1750767152.6039314s req_ids:[8] -DEBUG 06-24 20:12:32 [manager.py:391] -ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:205.43551445007324ms total_cost_time:205.47962188720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7764 prompt_cache_len:5151 prompt_cache_ratio:0.6634466769706336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 -DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:32 [manager.py:224] router recive req id 8 cost time 0.10841846466064453 s -INFO 06-24 20:12:32 [manager.py:68] detokenization recv req id 8 cost time 0.11043071746826172 s -DEBUG 06-24 20:12:32 [manager.py:391] Prefill Batch: batch_id=80634286891850296675645363892271664036, time:1750767152.8122385s req_ids:[8] -DEBUG 06-24 20:12:32 [manager.py:391] -ERROR 06-24 20:12:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:203.65071296691895ms total_cost_time:203.69505882263184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7765 prompt_cache_len:5151 prompt_cache_ratio:0.6633612363168062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 -DEBUG 06-24 20:12:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10745573043823242 s -INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.10939455032348633 s -DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=153560186904683195565756766824542719587, time:1750767153.0251248s req_ids:[8] -DEBUG 06-24 20:12:33 [manager.py:391] -ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:32 lightllm_req_id:8 first_token_cost:207.66735076904297ms total_cost_time:207.71193504333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7766 prompt_cache_len:5151 prompt_cache_ratio:0.6632758176667525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 -DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10809016227722168 s -INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.110076904296875 s -DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=270881466284648982921449910782393816994, time:1750767153.2375104s req_ids:[8] -DEBUG 06-24 20:12:33 [manager.py:391] -ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:206.99238777160645ms total_cost_time:207.01289176940918ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:7767 prompt_cache_len:5151 prompt_cache_ratio:0.6631904210119738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 -DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10776233673095703 s -INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.10962939262390137 s -DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=193418318937954715154867499288586150488, time:1750767153.451543s req_ids:[8] -DEBUG 06-24 20:12:33 [manager.py:391] -ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:207.96895027160645ms total_cost_time:208.01305770874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7768 prompt_cache_len:5151 prompt_cache_ratio:0.6631050463439753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 -DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10694694519042969 s -INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.10904693603515625 s -DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=117157342473209478339769115234884940205, time:1750767153.6646492s req_ids:[8] -DEBUG 06-24 20:12:33 [manager.py:391] -ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:205.11174201965332ms total_cost_time:205.15775680541992ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7769 prompt_cache_len:5151 prompt_cache_ratio:0.6630196936542669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 -DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:33 [manager.py:224] router recive req id 8 cost time 0.10735154151916504 s -INFO 06-24 20:12:33 [manager.py:68] detokenization recv req id 8 cost time 0.1093301773071289 s -DEBUG 06-24 20:12:33 [manager.py:391] Prefill Batch: batch_id=77828536841264767715912798409615444473, time:1750767153.8733015s req_ids:[8] -DEBUG 06-24 20:12:33 [manager.py:391] -ERROR 06-24 20:12:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:203.67980003356934ms total_cost_time:203.72366905212402ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7770 prompt_cache_len:5151 prompt_cache_ratio:0.6629343629343629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 -DEBUG 06-24 20:12:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.10811924934387207 s -INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.10978579521179199 s -DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=201837767333182358225249047165835573782, time:1750767154.0818121s req_ids:[8] -DEBUG 06-24 20:12:34 [manager.py:391] -ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:33 lightllm_req_id:8 first_token_cost:200.04606246948242ms total_cost_time:200.09112358093262ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7771 prompt_cache_len:5151 prompt_cache_ratio:0.6628490541757818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 -DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.10912919044494629 s -INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.11119580268859863 s -DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=281489527916733601197115499436543554149, time:1750767154.2872586s req_ids:[8] -DEBUG 06-24 20:12:34 [manager.py:391] -ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:211.0421657562256ms total_cost_time:211.08770370483398ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7772 prompt_cache_len:5151 prompt_cache_ratio:0.6627637673700463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 -DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.1073155403137207 s -INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.1092538833618164 s -DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=176443349013428303898407700054557740915, time:1750767154.5122185s req_ids:[8] -DEBUG 06-24 20:12:34 [manager.py:391] -DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:214.52617645263672ms total_cost_time:214.57195281982422ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7773 prompt_cache_len:5151 prompt_cache_ratio:0.6626785025086839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 -DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:34 [manager.py:224] router recive req id 8 cost time 0.3106222152709961 s -INFO 06-24 20:12:34 [manager.py:68] detokenization recv req id 8 cost time 0.3123915195465088 s -DEBUG 06-24 20:12:34 [manager.py:391] Prefill Batch: batch_id=129921337774109002810425281359679226152, time:1750767154.9278512s req_ids:[8] -DEBUG 06-24 20:12:34 [manager.py:391] -ERROR 06-24 20:12:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:357.1920394897461ms total_cost_time:357.23328590393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:7774 prompt_cache_len:5151 prompt_cache_ratio:0.6625932595832261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 -DEBUG 06-24 20:12:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.10799431800842285 s -DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=260307923561819129592192910237041806741, time:1750767155.07599s req_ids:[8] -DEBUG 06-24 20:12:35 [manager.py:391] -INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10993480682373047 s -ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:34 lightllm_req_id:8 first_token_cost:180.46259880065918ms total_cost_time:180.50670623779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7775 prompt_cache_len:5151 prompt_cache_ratio:0.662508038585209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 -DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.1080474853515625 s -INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.11003637313842773 s -DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=275895040678148417252126426520469427253, time:1750767155.2777753s req_ids:[8] -DEBUG 06-24 20:12:35 [manager.py:391] -ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:205.92951774597168ms total_cost_time:205.97410202026367ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7776 prompt_cache_len:5151 prompt_cache_ratio:0.6624228395061729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 -DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.1070864200592041 s -INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10905838012695312 s -DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=303300826189958568204005249402960769560, time:1750767155.4842093s req_ids:[8] -DEBUG 06-24 20:12:35 [manager.py:391] -ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:206.96210861206055ms total_cost_time:207.01909065246582ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:7777 prompt_cache_len:5151 prompt_cache_ratio:0.6623376623376623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 -DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.10723567008972168 s -INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10923385620117188 s -DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=241088646162826871141598090224176662276, time:1750767155.698701s req_ids:[8] -DEBUG 06-24 20:12:35 [manager.py:391] -ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:206.26235008239746ms total_cost_time:206.30502700805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7778 prompt_cache_len:5151 prompt_cache_ratio:0.6622525070712265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 -DEBUG 06-24 20:12:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:35 [manager.py:224] router recive req id 8 cost time 0.10809946060180664 s -INFO 06-24 20:12:35 [manager.py:68] detokenization recv req id 8 cost time 0.10971474647521973 s -DEBUG 06-24 20:12:35 [manager.py:391] Prefill Batch: batch_id=220943618113280144075856033904954626563, time:1750767155.9082694s req_ids:[8] -DEBUG 06-24 20:12:35 [manager.py:391] -DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:204.92887496948242ms total_cost_time:204.95152473449707ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:7779 prompt_cache_len:5151 prompt_cache_ratio:0.6621673736984188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 -DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10387206077575684 s -INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10563850402832031 s -DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=153072631190134629818925698619498487620, time:1750767156.1211104s req_ids:[8] -DEBUG 06-24 20:12:36 [manager.py:391] -ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:35 lightllm_req_id:8 first_token_cost:210.0512981414795ms total_cost_time:210.09588241577148ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7780 prompt_cache_len:5151 prompt_cache_ratio:0.6620822622107969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 -DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10711550712585449 s -INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10898876190185547 s -DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=56054287968830783638118173843652018438, time:1750767156.3335786s req_ids:[8] -DEBUG 06-24 20:12:36 [manager.py:391] -ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:208.11080932617188ms total_cost_time:208.15443992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7781 prompt_cache_len:5151 prompt_cache_ratio:0.6619971725999229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 -DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10720109939575195 s -INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10901713371276855 s -DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=28647881830278230452660578503466640222, time:1750767156.546369s req_ids:[8] -DEBUG 06-24 20:12:36 [manager.py:391] -ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:202.35037803649902ms total_cost_time:202.3932933807373ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7782 prompt_cache_len:5151 prompt_cache_ratio:0.6619121048573632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 -DEBUG 06-24 20:12:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:36 [manager.py:224] router recive req id 8 cost time 0.10718464851379395 s -INFO 06-24 20:12:36 [manager.py:68] detokenization recv req id 8 cost time 0.10903191566467285 s -DEBUG 06-24 20:12:36 [manager.py:391] Prefill Batch: batch_id=276846434074140928183005385687770531311, time:1750767156.754601s req_ids:[8] -DEBUG 06-24 20:12:36 [manager.py:391] -ERROR 06-24 20:12:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:206.92873001098633ms total_cost_time:206.9721221923828ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7783 prompt_cache_len:5151 prompt_cache_ratio:0.6618270589746884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 -DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.20819950103759766 s -INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.21006560325622559 s -DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=254734296519192551992241417985805698524, time:1750767157.06225s req_ids:[8] -DEBUG 06-24 20:12:37 [manager.py:391] -ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:36 lightllm_req_id:8 first_token_cost:257.28774070739746ms total_cost_time:257.34663009643555ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:7784 prompt_cache_len:5151 prompt_cache_ratio:0.6617420349434738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 -DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10843873023986816 s -INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.11045074462890625 s -DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=97058257592607103553118273396713083541, time:1750767157.224329s req_ids:[8] -DEBUG 06-24 20:12:37 [manager.py:391] -ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:188.84706497192383ms total_cost_time:188.89260292053223ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7785 prompt_cache_len:5151 prompt_cache_ratio:0.6616570327552986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 -DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10940051078796387 s -INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.11054277420043945 s -DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=256131329317586280008538418931330994118, time:1750767157.4238315s req_ids:[8] -DEBUG 06-24 20:12:37 [manager.py:391] -ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:169.87323760986328ms total_cost_time:169.91615295410156ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7786 prompt_cache_len:5151 prompt_cache_ratio:0.6615720524017468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 -DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10767412185668945 s -INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.10871481895446777 s -DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=65455097099429867480787763502281785780, time:1750767157.5987215s req_ids:[8] -DEBUG 06-24 20:12:37 [manager.py:391] -DEBUG 06-24 20:12:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 34639.451 tokens/s -DEBUG 06-24 20:12:37 [stats.py:37] Avg prompt tokens throughput: 34630.631 tokens/s -DEBUG 06-24 20:12:37 [stats.py:37] Avg generate tokens throughput: 8.821 tokens/s -ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:162.6131534576416ms total_cost_time:162.65606880187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7787 prompt_cache_len:5151 prompt_cache_ratio:0.6614870938744061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 -DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.10680270195007324 s -INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.10870742797851562 s -DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=238117005491855037305178138852115182568, time:1750767157.7652826s req_ids:[8] -DEBUG 06-24 20:12:37 [manager.py:391] -ERROR 06-24 20:12:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:190.52934646606445ms total_cost_time:190.57440757751465ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7788 prompt_cache_len:5151 prompt_cache_ratio:0.661402157164869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 -DEBUG 06-24 20:12:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:37 [manager.py:224] router recive req id 8 cost time 0.11024713516235352 s -INFO 06-24 20:12:37 [manager.py:68] detokenization recv req id 8 cost time 0.11217045783996582 s -DEBUG 06-24 20:12:37 [manager.py:391] Prefill Batch: batch_id=121074299000091681149389648773347520305, time:1750767157.961781s req_ids:[8] -DEBUG 06-24 20:12:37 [manager.py:391] -ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:37 lightllm_req_id:8 first_token_cost:199.86653327941895ms total_cost_time:199.9490261077881ms,out_token_counter:1 mean_per_token_cost_time: 0.08249282836914062ms prompt_token_num:7789 prompt_cache_len:5151 prompt_cache_ratio:0.6613172422647323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 -DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.10735106468200684 s -INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.10967612266540527 s -DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=226875499815262637808329613381709186994, time:1750767158.171505s req_ids:[8] -DEBUG 06-24 20:12:38 [manager.py:391] -ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:204.27656173706055ms total_cost_time:204.32209968566895ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7790 prompt_cache_len:5151 prompt_cache_ratio:0.6612323491655969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 -DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.1064910888671875 s -INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.10843586921691895 s -DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=248913798969780591702474178697412042669, time:1750767158.3914535s req_ids:[8] -DEBUG 06-24 20:12:38 [manager.py:391] -ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:208.2197666168213ms total_cost_time:208.27984809875488ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7791 prompt_cache_len:5151 prompt_cache_ratio:0.6611474778590681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 -DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.11180996894836426 s -INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.11304974555969238 s -DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=151660816767079489641917233573865364065, time:1750767158.608534s req_ids:[8] -DEBUG 06-24 20:12:38 [manager.py:391] -DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:210.5121612548828ms total_cost_time:210.69598197937012ms,out_token_counter:1 mean_per_token_cost_time: 0.1838207244873047ms prompt_token_num:7792 prompt_cache_len:5151 prompt_cache_ratio:0.6610626283367557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 -DEBUG 06-24 20:12:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:38 [manager.py:224] router recive req id 8 cost time 0.10995340347290039 s -INFO 06-24 20:12:38 [manager.py:68] detokenization recv req id 8 cost time 0.11208152770996094 s -DEBUG 06-24 20:12:38 [manager.py:391] Prefill Batch: batch_id=140696447984941706432247929926933686536, time:1750767158.8200881s req_ids:[8] -DEBUG 06-24 20:12:38 [manager.py:391] -ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:38 lightllm_req_id:8 first_token_cost:358.59203338623047ms total_cost_time:358.6394786834717ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:7793 prompt_cache_len:5151 prompt_cache_ratio:0.6609778005902733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 -DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.11174178123474121 s -INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11391520500183105 s -DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=290166508850084323246739154706895201051, time:1750767159.1830564s req_ids:[8] -DEBUG 06-24 20:12:39 [manager.py:391] -ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:199.75876808166504ms total_cost_time:199.8155117034912ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7794 prompt_cache_len:5151 prompt_cache_ratio:0.6608929946112394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 -DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.10836052894592285 s -INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11015701293945312 s -DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=310206031373922942876128471285687560017, time:1750767159.3933094s req_ids:[8] -DEBUG 06-24 20:12:39 [manager.py:391] -ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:204.6835422515869ms total_cost_time:204.72955703735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7795 prompt_cache_len:5151 prompt_cache_ratio:0.6608082103912765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 -DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.1085212230682373 s -INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11055874824523926 s -DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=284318134349137126464202645957340367228, time:1750767159.6054559s req_ids:[8] -DEBUG 06-24 20:12:39 [manager.py:391] -ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:208.99081230163574ms total_cost_time:209.03682708740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7796 prompt_cache_len:5151 prompt_cache_ratio:0.6607234479220113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 -DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:39 [manager.py:224] router recive req id 8 cost time 0.11321520805358887 s -INFO 06-24 20:12:39 [manager.py:68] detokenization recv req id 8 cost time 0.11528158187866211 s -DEBUG 06-24 20:12:39 [manager.py:391] Prefill Batch: batch_id=163468458279842602933693085200637565145, time:1750767159.8204372s req_ids:[8] -DEBUG 06-24 20:12:39 [manager.py:391] -ERROR 06-24 20:12:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:209.7485065460205ms total_cost_time:209.85150337219238ms,out_token_counter:1 mean_per_token_cost_time: 0.102996826171875ms prompt_token_num:7797 prompt_cache_len:5151 prompt_cache_ratio:0.660638707195075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 -DEBUG 06-24 20:12:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10813760757446289 s -INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.1100921630859375 s -DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=329266234108695424827554144819913739540, time:1750767160.0452194s req_ids:[8] -DEBUG 06-24 20:12:40 [manager.py:391] -ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:39 lightllm_req_id:8 first_token_cost:216.89844131469727ms total_cost_time:216.94111824035645ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7798 prompt_cache_len:5151 prompt_cache_ratio:0.6605539882021031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 -DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.1082448959350586 s -INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.11010146141052246 s -DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=277837382740778638043234555736824370474, time:1750767160.2572002s req_ids:[8] -DEBUG 06-24 20:12:40 [manager.py:391] -ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:204.56409454345703ms total_cost_time:204.6067714691162ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7799 prompt_cache_len:5151 prompt_cache_ratio:0.6604692909347353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 -DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10883879661560059 s -INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.11078977584838867 s -DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=288971633494187793332790798573218423484, time:1750767160.4674702s req_ids:[8] -DEBUG 06-24 20:12:40 [manager.py:391] -ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:206.72345161437988ms total_cost_time:206.76827430725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7800 prompt_cache_len:5151 prompt_cache_ratio:0.6603846153846153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 -DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10765361785888672 s -INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.10959291458129883 s -DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=59754223598116099589598867257513819109, time:1750767160.6790953s req_ids:[8] -DEBUG 06-24 20:12:40 [manager.py:391] -ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:205.5494785308838ms total_cost_time:205.59310913085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7801 prompt_cache_len:5151 prompt_cache_ratio:0.6602999615433919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 -DEBUG 06-24 20:12:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:40 [manager.py:224] router recive req id 8 cost time 0.10789132118225098 s -INFO 06-24 20:12:40 [manager.py:68] detokenization recv req id 8 cost time 0.10994839668273926 s -DEBUG 06-24 20:12:40 [manager.py:391] Prefill Batch: batch_id=313314583547577911770182655310939576242, time:1750767160.8909373s req_ids:[8] -DEBUG 06-24 20:12:40 [manager.py:391] -ERROR 06-24 20:12:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:206.07304573059082ms total_cost_time:206.1166763305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7802 prompt_cache_len:5151 prompt_cache_ratio:0.6602153294027172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 -DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10886621475219727 s -INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.1108088493347168 s -DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=287262326593647541029907940415650754223, time:1750767161.103584s req_ids:[8] -DEBUG 06-24 20:12:41 [manager.py:391] -ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:40 lightllm_req_id:8 first_token_cost:374.6631145477295ms total_cost_time:374.7081756591797ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7803 prompt_cache_len:5151 prompt_cache_ratio:0.6601307189542484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 -DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10940361022949219 s -INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.11131906509399414 s -DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=8444240540389227913382922186749930798, time:1750767161.4795408s req_ids:[8] -DEBUG 06-24 20:12:41 [manager.py:391] -ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.0924301147461ms total_cost_time:205.1386833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7804 prompt_cache_len:5151 prompt_cache_ratio:0.6600461301896463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 -DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10970306396484375 s -INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.11165761947631836 s -DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=322950922185280246242499609836644840532, time:1750767161.692342s req_ids:[8] -DEBUG 06-24 20:12:41 [manager.py:391] -ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.8258056640625ms total_cost_time:205.8694362640381ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7805 prompt_cache_len:5151 prompt_cache_ratio:0.6599615631005765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 -DEBUG 06-24 20:12:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:41 [manager.py:224] router recive req id 8 cost time 0.10847926139831543 s -INFO 06-24 20:12:41 [manager.py:68] detokenization recv req id 8 cost time 0.1104745864868164 s -DEBUG 06-24 20:12:41 [manager.py:391] Prefill Batch: batch_id=166950637527419435414020971497767797773, time:1750767161.9040914s req_ids:[8] -DEBUG 06-24 20:12:41 [manager.py:391] -ERROR 06-24 20:12:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.75571060180664ms total_cost_time:205.79910278320312ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7806 prompt_cache_len:5151 prompt_cache_ratio:0.6598770176787087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 -DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s -INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.11052465438842773 s -DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=87715912668220206645377143317363891508, time:1750767162.117333s req_ids:[8] -DEBUG 06-24 20:12:42 [manager.py:391] -ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:41 lightllm_req_id:8 first_token_cost:205.91998100280762ms total_cost_time:205.963134765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7807 prompt_cache_len:5151 prompt_cache_ratio:0.6597924939157167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 -DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10818910598754883 s -INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.11009931564331055 s -DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=206859655588565935639958019697132375743, time:1750767162.3277104s req_ids:[8] -DEBUG 06-24 20:12:42 [manager.py:391] -ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:203.4900188446045ms total_cost_time:203.53388786315918ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7808 prompt_cache_len:5151 prompt_cache_ratio:0.6597079918032787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 -DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10830044746398926 s -INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s -DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=105027466097580820553409443519945041224, time:1750767162.5361772s req_ids:[8] -DEBUG 06-24 20:12:42 [manager.py:391] -ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:206.60758018493652ms total_cost_time:206.65264129638672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7809 prompt_cache_len:5151 prompt_cache_ratio:0.6596235113330772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 -DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10750699043273926 s -INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.10945916175842285 s -DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=338869147424300712385214900071881679254, time:1750767162.7489462s req_ids:[8] -DEBUG 06-24 20:12:42 [manager.py:391] -ERROR 06-24 20:12:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:208.63914489746094ms total_cost_time:208.665132522583ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:7810 prompt_cache_len:5151 prompt_cache_ratio:0.659539052496799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 -DEBUG 06-24 20:12:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:42 [manager.py:224] router recive req id 8 cost time 0.10767674446105957 s -INFO 06-24 20:12:42 [manager.py:68] detokenization recv req id 8 cost time 0.10961031913757324 s -DEBUG 06-24 20:12:42 [manager.py:391] Prefill Batch: batch_id=273643189204603867902547357672135946779, time:1750767162.9625251s req_ids:[8] -DEBUG 06-24 20:12:42 [manager.py:391] -ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:42 lightllm_req_id:8 first_token_cost:209.04016494750977ms total_cost_time:209.08522605895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7811 prompt_cache_len:5151 prompt_cache_ratio:0.6594546152861349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 -DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:43 [manager.py:224] router recive req id 8 cost time 0.10741043090820312 s -INFO 06-24 20:12:43 [manager.py:68] detokenization recv req id 8 cost time 0.1094810962677002 s -DEBUG 06-24 20:12:43 [manager.py:391] Prefill Batch: batch_id=209662032974470144078574535756495997810, time:1750767163.184626s req_ids:[8] -DEBUG 06-24 20:12:43 [manager.py:391] -ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:216.8440818786621ms total_cost_time:216.8865203857422ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7812 prompt_cache_len:5151 prompt_cache_ratio:0.6593701996927803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 -DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:43 [manager.py:224] router recive req id 8 cost time 0.10881900787353516 s -INFO 06-24 20:12:43 [manager.py:68] detokenization recv req id 8 cost time 0.11075806617736816 s -DEBUG 06-24 20:12:43 [manager.py:391] Prefill Batch: batch_id=66348501409928737007563581828109310192, time:1750767163.3994532s req_ids:[8] -DEBUG 06-24 20:12:43 [manager.py:391] -ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:204.49209213256836ms total_cost_time:204.53453063964844ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7813 prompt_cache_len:5151 prompt_cache_ratio:0.6592858057084346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 -DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:43 [manager.py:224] router recive req id 8 cost time 0.31018972396850586 s -INFO 06-24 20:12:43 [manager.py:68] detokenization recv req id 8 cost time 0.3122715950012207 s -DEBUG 06-24 20:12:43 [manager.py:391] Prefill Batch: batch_id=282802671165673485803220431081463154781, time:1750767163.8082213s req_ids:[8] -DEBUG 06-24 20:12:43 [manager.py:391] -ERROR 06-24 20:12:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:410.7203483581543ms total_cost_time:410.7632637023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7814 prompt_cache_len:5151 prompt_cache_ratio:0.6592014333248016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 -DEBUG 06-24 20:12:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10756707191467285 s -INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.11001849174499512 s -DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=307054873825381039363732680820361925079, time:1750767164.026393s req_ids:[8] -DEBUG 06-24 20:12:44 [manager.py:391] -ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:43 lightllm_req_id:8 first_token_cost:208.6312770843506ms total_cost_time:208.67443084716797ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7815 prompt_cache_len:5151 prompt_cache_ratio:0.6591170825335892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 -DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10797810554504395 s -INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.10997533798217773 s -DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=112390849237437208587327163039240949921, time:1750767164.239071s req_ids:[8] -DEBUG 06-24 20:12:44 [manager.py:391] -ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:207.7314853668213ms total_cost_time:207.77535438537598ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7816 prompt_cache_len:5151 prompt_cache_ratio:0.6590327533265097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 -DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10856461524963379 s -INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.11053609848022461 s -DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=113759178430491661709473551108915496947, time:1750767164.4567106s req_ids:[8] -DEBUG 06-24 20:12:44 [manager.py:391] -ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:211.82727813720703ms total_cost_time:211.87210083007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7817 prompt_cache_len:5151 prompt_cache_ratio:0.6589484456952795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 -DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10897397994995117 s -INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.1109311580657959 s -DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=57724224046778320720279463542096146490, time:1750767164.6691864s req_ids:[8] -DEBUG 06-24 20:12:44 [manager.py:391] -ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:205.4297924041748ms total_cost_time:205.4731845855713ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7818 prompt_cache_len:5151 prompt_cache_ratio:0.6588641596316194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 -DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:44 [manager.py:224] router recive req id 8 cost time 0.10769510269165039 s -INFO 06-24 20:12:44 [manager.py:68] detokenization recv req id 8 cost time 0.10958504676818848 s -DEBUG 06-24 20:12:44 [manager.py:391] Prefill Batch: batch_id=292588218726670146515795331089167792488, time:1750767164.8796887s req_ids:[8] -DEBUG 06-24 20:12:44 [manager.py:391] -ERROR 06-24 20:12:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:201.68447494506836ms total_cost_time:201.72882080078125ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7819 prompt_cache_len:5151 prompt_cache_ratio:0.6587798951272541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 -DEBUG 06-24 20:12:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10802650451660156 s -INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.10992264747619629 s -DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=286169459701384622266023073823860286096, time:1750767165.0856876s req_ids:[8] -DEBUG 06-24 20:12:45 [manager.py:391] -ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:44 lightllm_req_id:8 first_token_cost:201.53212547302246ms total_cost_time:201.57551765441895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7820 prompt_cache_len:5151 prompt_cache_ratio:0.658695652173913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 -DEBUG 06-24 20:12:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10865211486816406 s -INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.11058354377746582 s -DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=235034714627007687674623075359729493511, time:1750767165.2935016s req_ids:[8] -DEBUG 06-24 20:12:45 [manager.py:391] -ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:204.60987091064453ms total_cost_time:204.65373992919922ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7821 prompt_cache_len:5151 prompt_cache_ratio:0.6586114307633295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 -DEBUG 06-24 20:12:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10772347450256348 s -INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.10961556434631348 s -DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=82528607743229659664222798962670511548, time:1750767165.5035062s req_ids:[8] -DEBUG 06-24 20:12:45 [manager.py:391] -ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:204.85591888427734ms total_cost_time:204.8795223236084ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7822 prompt_cache_len:5151 prompt_cache_ratio:0.6585272308872411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 -DEBUG 06-24 20:12:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:45 [manager.py:224] router recive req id 8 cost time 0.10699963569641113 s -INFO 06-24 20:12:45 [manager.py:68] detokenization recv req id 8 cost time 0.10897159576416016 s -DEBUG 06-24 20:12:45 [manager.py:391] Prefill Batch: batch_id=244572937814260211114096187091869256825, time:1750767165.7132423s req_ids:[8] -DEBUG 06-24 20:12:45 [manager.py:391] -DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:207.88908004760742ms total_cost_time:207.9336643218994ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7823 prompt_cache_len:5151 prompt_cache_ratio:0.6584430525373898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 -DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.31109118461608887 s -INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.3131904602050781 s -DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=124631973272479657648092345425973111333, time:1750767166.1239002s req_ids:[8] -DEBUG 06-24 20:12:46 [manager.py:391] -ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:45 lightllm_req_id:8 first_token_cost:406.83746337890625ms total_cost_time:406.9020748138428ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:7824 prompt_cache_len:5151 prompt_cache_ratio:0.6583588957055214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 -DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.10747361183166504 s -INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10948896408081055 s -DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=229035754067486776153710022692966777221, time:1750767166.3412209s req_ids:[8] -DEBUG 06-24 20:12:46 [manager.py:391] -ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:206.0229778289795ms total_cost_time:206.0678005218506ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7825 prompt_cache_len:5151 prompt_cache_ratio:0.6582747603833866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 -DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.1079871654510498 s -INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999488830566406 s -DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=75116731492428493444384759793893627504, time:1750767166.55168s req_ids:[8] -DEBUG 06-24 20:12:46 [manager.py:391] -ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:206.61234855651855ms total_cost_time:206.65574073791504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7826 prompt_cache_len:5151 prompt_cache_ratio:0.6581906465627396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 -DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.10775208473205566 s -INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10971760749816895 s -DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=24430523302828009429234383318060190626, time:1750767166.7773473s req_ids:[8] -DEBUG 06-24 20:12:46 [manager.py:391] -ERROR 06-24 20:12:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:223.97589683532715ms total_cost_time:224.02167320251465ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7827 prompt_cache_len:5151 prompt_cache_ratio:0.6581065542353393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 -DEBUG 06-24 20:12:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:46 [manager.py:224] router recive req id 8 cost time 0.10783267021179199 s -INFO 06-24 20:12:46 [manager.py:68] detokenization recv req id 8 cost time 0.10982656478881836 s -DEBUG 06-24 20:12:46 [manager.py:391] Prefill Batch: batch_id=202067431516456296234557740695017272739, time:1750767166.9950624s req_ids:[8] -DEBUG 06-24 20:12:46 [manager.py:391] -ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:46 lightllm_req_id:8 first_token_cost:206.45928382873535ms total_cost_time:206.50124549865723ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:7828 prompt_cache_len:5151 prompt_cache_ratio:0.6580224833929484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 -DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10797858238220215 s -INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.1098945140838623 s -DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=173437858391750328019739884941327596300, time:1750767167.2053876s req_ids:[8] -DEBUG 06-24 20:12:47 [manager.py:391] -DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:205.63745498657227ms total_cost_time:205.68156242370605ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7829 prompt_cache_len:5151 prompt_cache_ratio:0.6579384340273343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 -DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10883021354675293 s -INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.11079955101013184 s -DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=74453388191925478653278384864177402429, time:1750767167.4185927s req_ids:[8] -DEBUG 06-24 20:12:47 [manager.py:391] -ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:204.40340042114258ms total_cost_time:204.44846153259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7830 prompt_cache_len:5151 prompt_cache_ratio:0.6578544061302682 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 -DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10787391662597656 s -INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.1099708080291748 s -DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=282155727067769280782993866955231938843, time:1750767167.628151s req_ids:[8] -DEBUG 06-24 20:12:47 [manager.py:391] -DEBUG 06-24 20:12:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 34268.414 tokens/s -DEBUG 06-24 20:12:47 [stats.py:37] Avg prompt tokens throughput: 34259.640 tokens/s -DEBUG 06-24 20:12:47 [stats.py:37] Avg generate tokens throughput: 8.774 tokens/s -ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:206.35747909545898ms total_cost_time:206.40087127685547ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7831 prompt_cache_len:5151 prompt_cache_ratio:0.6577703996935257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 -DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:47 [manager.py:224] router recive req id 8 cost time 0.10757184028625488 s -INFO 06-24 20:12:47 [manager.py:68] detokenization recv req id 8 cost time 0.10948538780212402 s -DEBUG 06-24 20:12:47 [manager.py:391] Prefill Batch: batch_id=108920066960515860144407616421605717055, time:1750767167.8410118s req_ids:[8] -DEBUG 06-24 20:12:47 [manager.py:391] -ERROR 06-24 20:12:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:207.8542709350586ms total_cost_time:207.89813995361328ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7832 prompt_cache_len:5151 prompt_cache_ratio:0.6576864147088867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 -DEBUG 06-24 20:12:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.10874629020690918 s -INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.1107170581817627 s -DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=236193475925226025747992817951678770063, time:1750767168.0548775s req_ids:[8] -DEBUG 06-24 20:12:48 [manager.py:391] -ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:47 lightllm_req_id:8 first_token_cost:358.74032974243164ms total_cost_time:358.7837219238281ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7833 prompt_cache_len:5151 prompt_cache_ratio:0.6576024511681348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 -DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.10766434669494629 s -INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.10966897010803223 s -DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=6837067223728868161548059738235814118, time:1750767168.4143562s req_ids:[8] -DEBUG 06-24 20:12:48 [manager.py:391] -ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:202.64267921447754ms total_cost_time:202.68607139587402ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7834 prompt_cache_len:5151 prompt_cache_ratio:0.6575185090630584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 -DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.10733699798583984 s -INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.10928058624267578 s -DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=173510149407034626369929831193475074888, time:1750767168.6263971s req_ids:[8] -DEBUG 06-24 20:12:48 [manager.py:391] -ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:205.04474639892578ms total_cost_time:205.06572723388672ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:7835 prompt_cache_len:5151 prompt_cache_ratio:0.6574345883854499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 -DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:48 [manager.py:224] router recive req id 8 cost time 0.106842041015625 s -INFO 06-24 20:12:48 [manager.py:68] detokenization recv req id 8 cost time 0.10872197151184082 s -DEBUG 06-24 20:12:48 [manager.py:391] Prefill Batch: batch_id=221223644396528069925247365640292506895, time:1750767168.8385878s req_ids:[8] -DEBUG 06-24 20:12:48 [manager.py:391] -ERROR 06-24 20:12:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:207.32474327087402ms total_cost_time:207.3671817779541ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7836 prompt_cache_len:5151 prompt_cache_ratio:0.6573506891271057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 -DEBUG 06-24 20:12:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10775184631347656 s -INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10968565940856934 s -DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=281517890659331691620816104545894738764, time:1750767169.0551896s req_ids:[8] -DEBUG 06-24 20:12:49 [manager.py:391] -ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:48 lightllm_req_id:8 first_token_cost:212.385892868042ms total_cost_time:212.43023872375488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7837 prompt_cache_len:5151 prompt_cache_ratio:0.6572668112798264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 -DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.1076512336730957 s -INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10953330993652344 s -DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=268326555053036279353775213955998023840, time:1750767169.270256s req_ids:[8] -DEBUG 06-24 20:12:49 [manager.py:391] -ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:209.21063423156738ms total_cost_time:209.25474166870117ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7838 prompt_cache_len:5151 prompt_cache_ratio:0.6571829548354172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 -DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10802793502807617 s -INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10988783836364746 s -DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=307489675214198564511685809218087725720, time:1750767169.4838462s req_ids:[8] -DEBUG 06-24 20:12:49 [manager.py:391] -ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:207.52382278442383ms total_cost_time:207.5479030609131ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:7839 prompt_cache_len:5151 prompt_cache_ratio:0.6570991197856869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 -DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10446023941040039 s -INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10647201538085938 s -DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=231708289772467728640701324307162813532, time:1750767169.6993258s req_ids:[8] -DEBUG 06-24 20:12:49 [manager.py:391] -ERROR 06-24 20:12:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:210.81280708312988ms total_cost_time:210.83426475524902ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:7840 prompt_cache_len:5151 prompt_cache_ratio:0.657015306122449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 -DEBUG 06-24 20:12:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:49 [manager.py:224] router recive req id 8 cost time 0.10384535789489746 s -INFO 06-24 20:12:49 [manager.py:68] detokenization recv req id 8 cost time 0.10575532913208008 s -DEBUG 06-24 20:12:49 [manager.py:391] Prefill Batch: batch_id=141116724313608378497178101585232724247, time:1750767169.923279s req_ids:[8] -DEBUG 06-24 20:12:49 [manager.py:391] -ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:49 lightllm_req_id:8 first_token_cost:220.87478637695312ms total_cost_time:220.89695930480957ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7841 prompt_cache_len:5151 prompt_cache_ratio:0.6569315138375207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 -DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.1054677963256836 s -INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.10742735862731934 s -DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=303587160006366534194467963279072552093, time:1750767170.1389878s req_ids:[8] -DEBUG 06-24 20:12:50 [manager.py:391] -ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:209.57040786743164ms total_cost_time:209.5935344696045ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:7842 prompt_cache_len:5151 prompt_cache_ratio:0.6568477429227237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 -DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.10389113426208496 s -INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.10590410232543945 s -DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=24512486689610865578662831877832408314, time:1750767170.3536935s req_ids:[8] -DEBUG 06-24 20:12:50 [manager.py:391] -ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:419.89946365356445ms total_cost_time:420.0477600097656ms,out_token_counter:1 mean_per_token_cost_time: 0.14829635620117188ms prompt_token_num:7843 prompt_cache_len:5151 prompt_cache_ratio:0.656763993369884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 -DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.10954093933105469 s -INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.11141633987426758 s -DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=40115888345967496188149387683151479576, time:1750767170.7707872s req_ids:[8] -DEBUG 06-24 20:12:50 [manager.py:391] -ERROR 06-24 20:12:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:185.6365203857422ms total_cost_time:185.6846809387207ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:7844 prompt_cache_len:5151 prompt_cache_ratio:0.6566802651708312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 -DEBUG 06-24 20:12:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:50 [manager.py:224] router recive req id 8 cost time 0.10816168785095215 s -INFO 06-24 20:12:50 [manager.py:68] detokenization recv req id 8 cost time 0.10990571975708008 s -DEBUG 06-24 20:12:50 [manager.py:391] Prefill Batch: batch_id=197401546882747500383506495433550272837, time:1750767170.9653738s req_ids:[8] -DEBUG 06-24 20:12:50 [manager.py:391] -ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:50 lightllm_req_id:8 first_token_cost:199.86772537231445ms total_cost_time:199.89323616027832ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:7845 prompt_cache_len:5151 prompt_cache_ratio:0.6565965583173996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 -DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.10799360275268555 s -INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.10997724533081055 s -DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=307534822013909809222983270193302572045, time:1750767171.1752682s req_ids:[8] -DEBUG 06-24 20:12:51 [manager.py:391] -ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:205.25574684143066ms total_cost_time:205.29890060424805ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7846 prompt_cache_len:5151 prompt_cache_ratio:0.6565128728014274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 -DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.10863518714904785 s -INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.11071467399597168 s -DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=5329549067393894531597531817611664621, time:1750767171.3861141s req_ids:[8] -DEBUG 06-24 20:12:51 [manager.py:391] -ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:206.06040954589844ms total_cost_time:206.10332489013672ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7847 prompt_cache_len:5151 prompt_cache_ratio:0.6564292086147573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 -DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.11042332649230957 s -INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.11253833770751953 s -DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=307273961558421150940248929403324173681, time:1750767171.6003299s req_ids:[8] -DEBUG 06-24 20:12:51 [manager.py:391] -ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:207.83734321594238ms total_cost_time:207.88121223449707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7848 prompt_cache_len:5151 prompt_cache_ratio:0.6563455657492355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 -DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:51 [manager.py:224] router recive req id 8 cost time 0.10757637023925781 s -INFO 06-24 20:12:51 [manager.py:68] detokenization recv req id 8 cost time 0.10957503318786621 s -DEBUG 06-24 20:12:51 [manager.py:391] Prefill Batch: batch_id=237644735317629934630430100903887691055, time:1750767171.816964s req_ids:[8] -DEBUG 06-24 20:12:51 [manager.py:391] -ERROR 06-24 20:12:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:212.48126029968262ms total_cost_time:212.5253677368164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7849 prompt_cache_len:5151 prompt_cache_ratio:0.6562619441967129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 -DEBUG 06-24 20:12:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.10747790336608887 s -INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s -DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=315041943837192311843458098095749415180, time:1750767172.0288498s req_ids:[8] -DEBUG 06-24 20:12:52 [manager.py:391] -ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:51 lightllm_req_id:8 first_token_cost:200.76680183410645ms total_cost_time:200.78420639038086ms,out_token_counter:1 mean_per_token_cost_time: 0.017404556274414062ms prompt_token_num:7850 prompt_cache_len:5151 prompt_cache_ratio:0.6561783439490446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 -DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.1056528091430664 s -INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.10761499404907227 s -DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=253688449457827194157943166102316284723, time:1750767172.2339876s req_ids:[8] -DEBUG 06-24 20:12:52 [manager.py:391] -ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:197.22747802734375ms total_cost_time:197.27182388305664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7851 prompt_cache_len:5151 prompt_cache_ratio:0.6560947649980894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 -DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.11025881767272949 s -INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.11274528503417969 s -DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=12431557209936470488714697809467793092, time:1750767172.43621s req_ids:[8] -DEBUG 06-24 20:12:52 [manager.py:391] -ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:193.8316822052002ms total_cost_time:193.8765048980713ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7852 prompt_cache_len:5151 prompt_cache_ratio:0.6560112073357106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 -DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:52 [manager.py:224] router recive req id 8 cost time 0.11105155944824219 s -INFO 06-24 20:12:52 [manager.py:68] detokenization recv req id 8 cost time 0.1139066219329834 s -DEBUG 06-24 20:12:52 [manager.py:391] Prefill Batch: batch_id=163246012947028245846152416365491824755, time:1750767172.635632s req_ids:[8] -DEBUG 06-24 20:12:52 [manager.py:391] -ERROR 06-24 20:12:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:194.3669319152832ms total_cost_time:194.4105625152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7853 prompt_cache_len:5151 prompt_cache_ratio:0.6559276709537756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 -DEBUG 06-24 20:12:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.3107764720916748 s -INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.31225156784057617 s -DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=154965861668476664084580937826507589001, time:1750767173.0613384s req_ids:[8] -DEBUG 06-24 20:12:53 [manager.py:391] -ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:52 lightllm_req_id:8 first_token_cost:430.57847023010254ms total_cost_time:430.6020736694336ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7854 prompt_cache_len:5151 prompt_cache_ratio:0.6558441558441559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 -DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:53 [batch.py:51] router release req id 8 -INFO 06-24 20:12:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:12:53 [statics_utils.py:24] mean first cost: 229.8583200950235 ms -INFO 06-24 20:12:53 [statics_utils.py:24] mean per token cost: 0.08583945283048107 ms -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.1072380542755127 s -INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.10808849334716797 s -DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=296597469887509482248910859973923037228, time:1750767173.2782252s req_ids:[8] -DEBUG 06-24 20:12:53 [manager.py:391] -ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:172.61576652526855ms total_cost_time:172.65987396240234ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7855 prompt_cache_len:5151 prompt_cache_ratio:0.655760661998727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 -DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.11081051826477051 s -INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.1128232479095459 s -DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=268361294092545847191864485264169355821, time:1750767173.4565158s req_ids:[8] -DEBUG 06-24 20:12:53 [manager.py:391] -ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:188.3225440979004ms total_cost_time:188.36593627929688ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7856 prompt_cache_len:5151 prompt_cache_ratio:0.6556771894093686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 -DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.10772824287414551 s -INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.10971498489379883 s -DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=251681310256606377179874524905202762770, time:1750767173.6421583s req_ids:[8] -DEBUG 06-24 20:12:53 [manager.py:391] -ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:204.22792434692383ms total_cost_time:204.2853832244873ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:7857 prompt_cache_len:5151 prompt_cache_ratio:0.6555937380679648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 -DEBUG 06-24 20:12:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:53 [manager.py:224] router recive req id 8 cost time 0.1049950122833252 s -INFO 06-24 20:12:53 [manager.py:68] detokenization recv req id 8 cost time 0.10697484016418457 s -DEBUG 06-24 20:12:53 [manager.py:391] Prefill Batch: batch_id=124473054105582977757357013000252202510, time:1750767173.8729613s req_ids:[8] -DEBUG 06-24 20:12:53 [manager.py:391] -ERROR 06-24 20:12:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:230.40080070495605ms total_cost_time:230.44800758361816ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7858 prompt_cache_len:5151 prompt_cache_ratio:0.6555103079664036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 -DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.10741066932678223 s -INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10998845100402832 s -DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=126360516433953115765849857296199212361, time:1750767174.0914202s req_ids:[8] -DEBUG 06-24 20:12:54 [manager.py:391] -ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:53 lightllm_req_id:8 first_token_cost:216.02845191955566ms total_cost_time:216.050386428833ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:7859 prompt_cache_len:5151 prompt_cache_ratio:0.6554268990965771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 -DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.10408782958984375 s -INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10613632202148438 s -DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=22547531148127120559754087384139046939, time:1750767174.310107s req_ids:[8] -DEBUG 06-24 20:12:54 [manager.py:391] -DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:207.38506317138672ms total_cost_time:207.40723609924316ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7860 prompt_cache_len:5151 prompt_cache_ratio:0.6553435114503817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 -DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.10633254051208496 s -INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10843443870544434 s -DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=263023415094047271043780797761014411231, time:1750767174.5220256s req_ids:[8] -DEBUG 06-24 20:12:54 [manager.py:391] -ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:206.82358741760254ms total_cost_time:206.84552192687988ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:7861 prompt_cache_len:5151 prompt_cache_ratio:0.6552601450197176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 -DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.1042168140411377 s -INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10651326179504395 s -DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=300604934885119637634836726330644569553, time:1750767174.7398539s req_ids:[8] -DEBUG 06-24 20:12:54 [manager.py:391] -ERROR 06-24 20:12:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:217.3776626586914ms total_cost_time:217.39912033081055ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:7862 prompt_cache_len:5151 prompt_cache_ratio:0.6551767997964895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 -DEBUG 06-24 20:12:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:54 [manager.py:224] router recive req id 8 cost time 0.1043400764465332 s -INFO 06-24 20:12:54 [manager.py:68] detokenization recv req id 8 cost time 0.10589241981506348 s -DEBUG 06-24 20:12:54 [manager.py:391] Prefill Batch: batch_id=190357288578506274695988281917095770923, time:1750767174.9545193s req_ids:[8] -DEBUG 06-24 20:12:54 [manager.py:391] -ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:54 lightllm_req_id:8 first_token_cost:204.93769645690918ms total_cost_time:204.9582004547119ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:7863 prompt_cache_len:5151 prompt_cache_ratio:0.6550934757726059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 -DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:55 [manager.py:224] router recive req id 8 cost time 0.3059842586517334 s -INFO 06-24 20:12:55 [manager.py:68] detokenization recv req id 8 cost time 0.30852556228637695 s -DEBUG 06-24 20:12:55 [manager.py:391] Prefill Batch: batch_id=230141403210811406881505948612778582404, time:1750767175.3794127s req_ids:[8] -DEBUG 06-24 20:12:55 [manager.py:391] -ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:428.67207527160645ms total_cost_time:428.6983013153076ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:7864 prompt_cache_len:5151 prompt_cache_ratio:0.6550101729399797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 -DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:55 [manager.py:224] router recive req id 8 cost time 0.10584163665771484 s -INFO 06-24 20:12:55 [manager.py:68] detokenization recv req id 8 cost time 0.10785794258117676 s -DEBUG 06-24 20:12:55 [manager.py:391] Prefill Batch: batch_id=264321377950579036539958183155249432759, time:1750767175.5973854s req_ids:[8] -DEBUG 06-24 20:12:55 [manager.py:391] -ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:192.63839721679688ms total_cost_time:192.69180297851562ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:7865 prompt_cache_len:5151 prompt_cache_ratio:0.6549268912905276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 -DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:55 [manager.py:224] router recive req id 8 cost time 0.10584592819213867 s -INFO 06-24 20:12:55 [manager.py:68] detokenization recv req id 8 cost time 0.10764598846435547 s -DEBUG 06-24 20:12:55 [manager.py:391] Prefill Batch: batch_id=217045488101720811049419811924290649885, time:1750767175.816658s req_ids:[8] -DEBUG 06-24 20:12:55 [manager.py:391] -DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:12:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:12:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:205.68275451660156ms total_cost_time:205.74569702148438ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:7866 prompt_cache_len:5151 prompt_cache_ratio:0.6548436308161708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 -DEBUG 06-24 20:12:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.11109757423400879 s -INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.11252498626708984 s -DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=298134573649110912622343440084621034180, time:1750767176.0103226s req_ids:[8] -DEBUG 06-24 20:12:56 [manager.py:391] -ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:55 lightllm_req_id:8 first_token_cost:202.03065872192383ms total_cost_time:202.07595825195312ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7867 prompt_cache_len:5151 prompt_cache_ratio:0.6547603915088344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 -DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.1062161922454834 s -INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.10811352729797363 s -DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=184728238813309456036900917964433343305, time:1750767176.219719s req_ids:[8] -DEBUG 06-24 20:12:56 [manager.py:391] -ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:201.86352729797363ms total_cost_time:201.91001892089844ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7868 prompt_cache_len:5151 prompt_cache_ratio:0.6546771733604474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 -DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.10471677780151367 s -INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.10607218742370605 s -DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=111950901616908714130900100285260309411, time:1750767176.4368408s req_ids:[8] -DEBUG 06-24 20:12:56 [manager.py:391] -ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:170.7768440246582ms total_cost_time:170.79639434814453ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:7869 prompt_cache_len:5151 prompt_cache_ratio:0.6545939763629431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 -DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.10854768753051758 s -INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.11049771308898926 s -DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=78415624879051548732735224168072897535, time:1750767176.6045241s req_ids:[8] -DEBUG 06-24 20:12:56 [manager.py:391] -ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:199.60999488830566ms total_cost_time:199.65553283691406ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7870 prompt_cache_len:5151 prompt_cache_ratio:0.6545108005082592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 -DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:56 [manager.py:224] router recive req id 8 cost time 0.10727787017822266 s -INFO 06-24 20:12:56 [manager.py:68] detokenization recv req id 8 cost time 0.10885024070739746 s -DEBUG 06-24 20:12:56 [manager.py:391] Prefill Batch: batch_id=233414516269008725324805499745081472802, time:1750767176.8186522s req_ids:[8] -DEBUG 06-24 20:12:56 [manager.py:391] -ERROR 06-24 20:12:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:205.0018310546875ms total_cost_time:205.02448081970215ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:7871 prompt_cache_len:5151 prompt_cache_ratio:0.6544276457883369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 -DEBUG 06-24 20:12:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10424470901489258 s -INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10634493827819824 s -DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=247953068239166334887973339496606245823, time:1750767177.025059s req_ids:[8] -DEBUG 06-24 20:12:57 [manager.py:391] -ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:56 lightllm_req_id:8 first_token_cost:212.68701553344727ms total_cost_time:212.7082347869873ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:7872 prompt_cache_len:5151 prompt_cache_ratio:0.6543445121951219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 -DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10459303855895996 s -INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10650992393493652 s -DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=302238872280038165254578060229914404602, time:1750767177.2418559s req_ids:[8] -DEBUG 06-24 20:12:57 [manager.py:391] -ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:394.3946361541748ms total_cost_time:394.41680908203125ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:7873 prompt_cache_len:5151 prompt_cache_ratio:0.654261399720564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 -DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10344314575195312 s -INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10525107383728027 s -DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=286503473111675095607754861358278990518, time:1750767177.635418s req_ids:[8] -DEBUG 06-24 20:12:57 [manager.py:391] -DEBUG 06-24 20:12:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 33752.300 tokens/s -DEBUG 06-24 20:12:57 [stats.py:37] Avg prompt tokens throughput: 33743.706 tokens/s -DEBUG 06-24 20:12:57 [stats.py:37] Avg generate tokens throughput: 8.594 tokens/s -ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:199.71442222595215ms total_cost_time:199.7373104095459ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:7874 prompt_cache_len:5151 prompt_cache_ratio:0.6541783083566167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 -DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:57 [manager.py:224] router recive req id 8 cost time 0.10605216026306152 s -INFO 06-24 20:12:57 [manager.py:68] detokenization recv req id 8 cost time 0.10798931121826172 s -DEBUG 06-24 20:12:57 [manager.py:391] Prefill Batch: batch_id=127078506917606954820349593645339128213, time:1750767177.8405166s req_ids:[8] -DEBUG 06-24 20:12:57 [manager.py:391] -ERROR 06-24 20:12:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:205.74307441711426ms total_cost_time:205.77001571655273ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:7875 prompt_cache_len:5151 prompt_cache_ratio:0.6540952380952381 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 -DEBUG 06-24 20:12:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10694503784179688 s -INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10920977592468262 s -DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=233992111268628424688566563356844023088, time:1750767178.052152s req_ids:[8] -DEBUG 06-24 20:12:58 [manager.py:391] -ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:57 lightllm_req_id:8 first_token_cost:217.02051162719727ms total_cost_time:217.04649925231934ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:7876 prompt_cache_len:5151 prompt_cache_ratio:0.6540121889283901 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 -DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10575175285339355 s -INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10756134986877441 s -DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=251061990466641925608856123154577490800, time:1750767178.271426s req_ids:[8] -DEBUG 06-24 20:12:58 [manager.py:391] -ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:205.11794090270996ms total_cost_time:205.14535903930664ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:7877 prompt_cache_len:5151 prompt_cache_ratio:0.6539291608480386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 -DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10613703727722168 s -DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=51097521555061455467387534951474416660, time:1750767178.46793s req_ids:[8] -DEBUG 06-24 20:12:58 [manager.py:391] -INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10809063911437988 s -ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:181.502103805542ms total_cost_time:181.54668807983398ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7878 prompt_cache_len:5151 prompt_cache_ratio:0.6538461538461539 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 -DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10791659355163574 s -INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10947346687316895 s -DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=146551847265213755202561051179619212524, time:1750767178.6773195s req_ids:[8] -DEBUG 06-24 20:12:58 [manager.py:391] -ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:197.2217559814453ms total_cost_time:197.2670555114746ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7879 prompt_cache_len:5151 prompt_cache_ratio:0.65376316791471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 -DEBUG 06-24 20:12:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:58 [manager.py:224] router recive req id 8 cost time 0.10718035697937012 s -INFO 06-24 20:12:58 [manager.py:68] detokenization recv req id 8 cost time 0.10902762413024902 s -DEBUG 06-24 20:12:58 [manager.py:391] Prefill Batch: batch_id=89115862987678142568656268587083587720, time:1750767178.8684704s req_ids:[8] -DEBUG 06-24 20:12:58 [manager.py:391] -ERROR 06-24 20:12:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:201.29919052124023ms total_cost_time:201.34282112121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7880 prompt_cache_len:5151 prompt_cache_ratio:0.6536802030456853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 -DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.1101839542388916 s -INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.11231207847595215 s -DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=162986498573728941537294242118658574543, time:1750767179.0723193s req_ids:[8] -DEBUG 06-24 20:12:59 [manager.py:391] -ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:58 lightllm_req_id:8 first_token_cost:158.28657150268555ms total_cost_time:158.32996368408203ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7881 prompt_cache_len:5151 prompt_cache_ratio:0.653597259231062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 -DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.1068410873413086 s -INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.10806107521057129 s -DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=185412020946522520068467839363895827249, time:1750767179.2357886s req_ids:[8] -DEBUG 06-24 20:12:59 [manager.py:391] -ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:189.55397605895996ms total_cost_time:189.57757949829102ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:7882 prompt_cache_len:5151 prompt_cache_ratio:0.6535143364628266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 -DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.10394978523254395 s -INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.1055293083190918 s -DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=142059734378155772776807005061767237422, time:1750767179.432318s req_ids:[8] -DEBUG 06-24 20:12:59 [manager.py:391] -ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:374.5393753051758ms total_cost_time:374.58276748657227ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7883 prompt_cache_len:5151 prompt_cache_ratio:0.6534314347329697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 -DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.10754704475402832 s -INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.10938000679016113 s -DEBUG 06-24 20:12:59 [manager.py:391] Prefill Batch: batch_id=123713240672608311838517953530146017920, time:1750767179.8118942s req_ids:[8] -DEBUG 06-24 20:12:59 [manager.py:391] -ERROR 06-24 20:12:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:12:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:12:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:188.19832801818848ms total_cost_time:188.24338912963867ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7884 prompt_cache_len:5151 prompt_cache_ratio:0.6533485540334856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:12:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 -DEBUG 06-24 20:12:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:12:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:12:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:12:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:12:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:12:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:12:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:12:59 [manager.py:224] router recive req id 8 cost time 0.10844039916992188 s -INFO 06-24 20:12:59 [manager.py:68] detokenization recv req id 8 cost time 0.11043953895568848 s -DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=67427140879333357418330329786273903796, time:1750767180.00242s req_ids:[8] -DEBUG 06-24 20:13:00 [manager.py:391] -ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:12:59 lightllm_req_id:8 first_token_cost:188.52758407592773ms total_cost_time:188.57288360595703ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7885 prompt_cache_len:5151 prompt_cache_ratio:0.6532656943563728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 -DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.10796689987182617 s -INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.10975480079650879 s -DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=2900130359649208526240365784995170837, time:1750767180.209241s req_ids:[8] -DEBUG 06-24 20:13:00 [manager.py:391] -ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:200.38962364196777ms total_cost_time:200.43253898620605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7886 prompt_cache_len:5151 prompt_cache_ratio:0.6531828556936343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 -DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.10701131820678711 s -INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.10850691795349121 s -DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=46424425140334738714158305292346791267, time:1750767180.402475s req_ids:[8] -DEBUG 06-24 20:13:00 [manager.py:391] -ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:186.9971752166748ms total_cost_time:187.0410442352295ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7887 prompt_cache_len:5151 prompt_cache_ratio:0.6531000380372766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 -DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.10892844200134277 s -INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.1109616756439209 s -DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=302313526594270025396732375261683113736, time:1750767180.594776s req_ids:[8] -DEBUG 06-24 20:13:00 [manager.py:391] -ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:185.89496612548828ms total_cost_time:185.9586238861084ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:7888 prompt_cache_len:5151 prompt_cache_ratio:0.6530172413793104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 -DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.1083219051361084 s -INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.11023306846618652 s -DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=152033509839383854772700794251605781894, time:1750767180.790537s req_ids:[8] -DEBUG 06-24 20:13:00 [manager.py:391] -ERROR 06-24 20:13:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:198.99439811706543ms total_cost_time:199.03993606567383ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:7889 prompt_cache_len:5151 prompt_cache_ratio:0.6529344657117505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 -DEBUG 06-24 20:13:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:00 [manager.py:224] router recive req id 8 cost time 0.1084446907043457 s -INFO 06-24 20:13:00 [manager.py:68] detokenization recv req id 8 cost time 0.11056041717529297 s -DEBUG 06-24 20:13:00 [manager.py:391] Prefill Batch: batch_id=253908231683318308956088896497234494859, time:1750767180.997681s req_ids:[8] -DEBUG 06-24 20:13:00 [manager.py:391] -ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:00 lightllm_req_id:8 first_token_cost:214.94698524475098ms total_cost_time:215.00849723815918ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:7890 prompt_cache_len:5151 prompt_cache_ratio:0.652851711026616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 -DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:01 [manager.py:224] router recive req id 8 cost time 0.10896611213684082 s -INFO 06-24 20:13:01 [manager.py:68] detokenization recv req id 8 cost time 0.11059403419494629 s -DEBUG 06-24 20:13:01 [manager.py:391] Prefill Batch: batch_id=182488562083900766787788617840230840474, time:1750767181.212677s req_ids:[8] -DEBUG 06-24 20:13:01 [manager.py:391] -ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:196.36821746826172ms total_cost_time:196.4733600616455ms,out_token_counter:1 mean_per_token_cost_time: 0.10514259338378906ms prompt_token_num:7891 prompt_cache_len:5151 prompt_cache_ratio:0.6527689773159295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 -DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:01 [manager.py:224] router recive req id 8 cost time 0.10830569267272949 s -INFO 06-24 20:13:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101984977722168 s -DEBUG 06-24 20:13:01 [manager.py:391] Prefill Batch: batch_id=80303069115567512710135163929348897385, time:1750767181.4186814s req_ids:[8] -DEBUG 06-24 20:13:01 [manager.py:391] -ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:197.70026206970215ms total_cost_time:197.74293899536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:7892 prompt_cache_len:5151 prompt_cache_ratio:0.6526862645717182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 -DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:01 [manager.py:224] router recive req id 8 cost time 0.10770034790039062 s -INFO 06-24 20:13:01 [manager.py:68] detokenization recv req id 8 cost time 0.10953450202941895 s -DEBUG 06-24 20:13:01 [manager.py:391] Prefill Batch: batch_id=90066832745162117730290751837561427420, time:1750767181.62345s req_ids:[8] -DEBUG 06-24 20:13:01 [manager.py:391] -ERROR 06-24 20:13:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:209.19036865234375ms total_cost_time:209.23566818237305ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7893 prompt_cache_len:5151 prompt_cache_ratio:0.6526035727860129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 -DEBUG 06-24 20:13:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.31040382385253906 s -INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.3123970031738281 s -DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=39778133448029726069810616260304593093, time:1750767182.0531948s req_ids:[8] -DEBUG 06-24 20:13:02 [manager.py:391] -ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:01 lightllm_req_id:8 first_token_cost:422.976016998291ms total_cost_time:423.0198860168457ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7894 prompt_cache_len:5151 prompt_cache_ratio:0.6525209019508488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 -DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10775566101074219 s -INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.1098167896270752 s -DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=44698441075349810791249508441735903748, time:1750767182.2686117s req_ids:[8] -DEBUG 06-24 20:13:02 [manager.py:391] -ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:204.42795753479004ms total_cost_time:204.47421073913574ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7895 prompt_cache_len:5151 prompt_cache_ratio:0.6524382520582648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 -DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10716891288757324 s -INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.1090552806854248 s -DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=177655514827792966030129165712151640473, time:1750767182.4973726s req_ids:[8] -DEBUG 06-24 20:13:02 [manager.py:391] -ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:208.7094783782959ms total_cost_time:208.7538242340088ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7896 prompt_cache_len:5151 prompt_cache_ratio:0.6523556231003039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 -DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10757899284362793 s -INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.10967493057250977 s -DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=62159712570324668989569367859642289889, time:1750767182.6907918s req_ids:[8] -DEBUG 06-24 20:13:02 [manager.py:391] -ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:200.35243034362793ms total_cost_time:200.3955841064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7897 prompt_cache_len:5151 prompt_cache_ratio:0.6522730150690136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 -DEBUG 06-24 20:13:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:02 [manager.py:224] router recive req id 8 cost time 0.10933208465576172 s -INFO 06-24 20:13:02 [manager.py:68] detokenization recv req id 8 cost time 0.11121678352355957 s -DEBUG 06-24 20:13:02 [manager.py:391] Prefill Batch: batch_id=108348845364740487634179052781422269795, time:1750767182.8985214s req_ids:[8] -DEBUG 06-24 20:13:02 [manager.py:391] -ERROR 06-24 20:13:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:203.49597930908203ms total_cost_time:203.54056358337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7898 prompt_cache_len:5151 prompt_cache_ratio:0.6521904279564447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 -DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10881543159484863 s -INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.11079144477844238 s -DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=6605860236587641384885165777443432748, time:1750767183.1096194s req_ids:[8] -DEBUG 06-24 20:13:03 [manager.py:391] -ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:02 lightllm_req_id:8 first_token_cost:201.32732391357422ms total_cost_time:201.3697624206543ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7899 prompt_cache_len:5151 prompt_cache_ratio:0.6521078617546525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 -DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10791945457458496 s -INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.10991621017456055 s -DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=289821486839042290796382636304548662413, time:1750767183.3158336s req_ids:[8] -DEBUG 06-24 20:13:03 [manager.py:391] -ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:206.1479091644287ms total_cost_time:206.1939239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7900 prompt_cache_len:5151 prompt_cache_ratio:0.6520253164556962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 -DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10901093482971191 s -INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.11100316047668457 s -DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=275711844514365880620415851361824461795, time:1750767183.5266416s req_ids:[8] -DEBUG 06-24 20:13:03 [manager.py:391] -ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:204.11133766174316ms total_cost_time:204.15568351745605ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7901 prompt_cache_len:5151 prompt_cache_ratio:0.6519427920516391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 -DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10836672782897949 s -INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.1102902889251709 s -DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=123694756116836612678339531542087918998, time:1750767183.7368822s req_ids:[8] -DEBUG 06-24 20:13:03 [manager.py:391] -ERROR 06-24 20:13:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:197.0236301422119ms total_cost_time:197.0670223236084ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7902 prompt_cache_len:5151 prompt_cache_ratio:0.6518602885345482 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 -DEBUG 06-24 20:13:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:03 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s -INFO 06-24 20:13:03 [manager.py:68] detokenization recv req id 8 cost time 0.10955214500427246 s -DEBUG 06-24 20:13:03 [manager.py:391] Prefill Batch: batch_id=123775485109388652976857619359068068796, time:1750767183.9393003s req_ids:[8] -DEBUG 06-24 20:13:03 [manager.py:391] -ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:03 lightllm_req_id:8 first_token_cost:203.72915267944336ms total_cost_time:203.78541946411133ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:7903 prompt_cache_len:5151 prompt_cache_ratio:0.651777805896495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 -DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.3103783130645752 s -INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.3123209476470947 s -DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=335894166381298754951731075756251215310, time:1750767184.3479855s req_ids:[8] -DEBUG 06-24 20:13:04 [manager.py:391] -ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:374.713659286499ms total_cost_time:374.7575283050537ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7904 prompt_cache_len:5151 prompt_cache_ratio:0.6516953441295547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 -DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.10678696632385254 s -INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.1085667610168457 s -DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=237800872036648745128847456497235724796, time:1750767184.5361698s req_ids:[8] -DEBUG 06-24 20:13:04 [manager.py:391] -ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:173.8452911376953ms total_cost_time:173.89750480651855ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:7905 prompt_cache_len:5151 prompt_cache_ratio:0.6516129032258065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 -DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.10713076591491699 s -INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.10918450355529785 s -DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=39255547682258642672138592418649323969, time:1750767184.7130153s req_ids:[8] -DEBUG 06-24 20:13:04 [manager.py:391] -ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:179.38971519470215ms total_cost_time:179.43620681762695ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7906 prompt_cache_len:5151 prompt_cache_ratio:0.6515304831773336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 -DEBUG 06-24 20:13:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:04 [manager.py:224] router recive req id 8 cost time 0.10713577270507812 s -INFO 06-24 20:13:04 [manager.py:68] detokenization recv req id 8 cost time 0.1089634895324707 s -DEBUG 06-24 20:13:04 [manager.py:391] Prefill Batch: batch_id=173481927397527039614188545363467387475, time:1750767184.902614s req_ids:[8] -DEBUG 06-24 20:13:04 [manager.py:391] -ERROR 06-24 20:13:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:185.92357635498047ms total_cost_time:185.96696853637695ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7907 prompt_cache_len:5151 prompt_cache_ratio:0.6514480839762236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 -DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10714173316955566 s -INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.10898113250732422 s -DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=190578080940050250896555868563495249006, time:1750767185.0922885s req_ids:[8] -DEBUG 06-24 20:13:05 [manager.py:391] -ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:04 lightllm_req_id:8 first_token_cost:182.1463108062744ms total_cost_time:182.1916103363037ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7908 prompt_cache_len:5151 prompt_cache_ratio:0.6513657056145675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 -DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10744380950927734 s -INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.10920333862304688 s -DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=266639939143359683303859665175877895688, time:1750767185.2803514s req_ids:[8] -DEBUG 06-24 20:13:05 [manager.py:391] -ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:184.91744995117188ms total_cost_time:184.96417999267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7909 prompt_cache_len:5151 prompt_cache_ratio:0.6512833480844608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 -DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10914444923400879 s -INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.11112594604492188 s -DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=340250830668207411367088568228091772786, time:1750767185.465908s req_ids:[8] -DEBUG 06-24 20:13:05 [manager.py:391] -ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:204.15782928466797ms total_cost_time:204.20360565185547ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7910 prompt_cache_len:5151 prompt_cache_ratio:0.6512010113780026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 -DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10770893096923828 s -INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.10984253883361816 s -DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=152551029912292411840390979526489326153, time:1750767185.6765435s req_ids:[8] -DEBUG 06-24 20:13:05 [manager.py:391] -ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:202.4819850921631ms total_cost_time:202.52680778503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7911 prompt_cache_len:5151 prompt_cache_ratio:0.6511186954872962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 -DEBUG 06-24 20:13:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:05 [manager.py:224] router recive req id 8 cost time 0.10759139060974121 s -INFO 06-24 20:13:05 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s -DEBUG 06-24 20:13:05 [manager.py:391] Prefill Batch: batch_id=121832564358685040198901278251330890652, time:1750767185.8840709s req_ids:[8] -DEBUG 06-24 20:13:05 [manager.py:391] -ERROR 06-24 20:13:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:206.82978630065918ms total_cost_time:206.90298080444336ms,out_token_counter:1 mean_per_token_cost_time: 0.07319450378417969ms prompt_token_num:7912 prompt_cache_len:5151 prompt_cache_ratio:0.651036400404449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 -DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.10760879516601562 s -INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.10966038703918457 s -DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=13153126015800782008200206057395262524, time:1750767186.096664s req_ids:[8] -DEBUG 06-24 20:13:06 [manager.py:391] -ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:05 lightllm_req_id:8 first_token_cost:359.50756072998047ms total_cost_time:359.5540523529053ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:7913 prompt_cache_len:5151 prompt_cache_ratio:0.6509541261215721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 -DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.1082000732421875 s -INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.11023831367492676 s -DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=226346503673033865273859597184193016819, time:1750767186.4575088s req_ids:[8] -DEBUG 06-24 20:13:06 [manager.py:391] -ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:197.99566268920898ms total_cost_time:198.03905487060547ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7914 prompt_cache_len:5151 prompt_cache_ratio:0.650871872630781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 -DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.10837984085083008 s -INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.11020445823669434 s -DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=209418053219207086454191845195469947598, time:1750767186.6653643s req_ids:[8] -DEBUG 06-24 20:13:06 [manager.py:391] -ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:201.39050483703613ms total_cost_time:201.43532752990723ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7915 prompt_cache_len:5151 prompt_cache_ratio:0.6507896399241946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 -DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:06 [manager.py:224] router recive req id 8 cost time 0.10780835151672363 s -INFO 06-24 20:13:06 [manager.py:68] detokenization recv req id 8 cost time 0.10981535911560059 s -DEBUG 06-24 20:13:06 [manager.py:391] Prefill Batch: batch_id=172098795589316693942510050371878336921, time:1750767186.873043s req_ids:[8] -DEBUG 06-24 20:13:06 [manager.py:391] -DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:205.95598220825195ms total_cost_time:206.00104331970215ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7916 prompt_cache_len:5151 prompt_cache_ratio:0.6507074279939363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 -DEBUG 06-24 20:13:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10806965827941895 s -INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.10998010635375977 s -DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=65473686777600051045740449582883380537, time:1750767187.0843024s req_ids:[8] -DEBUG 06-24 20:13:07 [manager.py:391] -ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:06 lightllm_req_id:8 first_token_cost:205.81698417663574ms total_cost_time:205.86085319519043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7917 prompt_cache_len:5151 prompt_cache_ratio:0.6506252368321334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 -DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.1078944206237793 s -INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.1099245548248291 s -DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=171341821113669339273368265654742421590, time:1750767187.2954495s req_ids:[8] -DEBUG 06-24 20:13:07 [manager.py:391] -ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:205.60932159423828ms total_cost_time:205.65390586853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7918 prompt_cache_len:5151 prompt_cache_ratio:0.6505430664309169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 -DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10785794258117676 s -INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.10975098609924316 s -DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=272346332865439823118449374424592895924, time:1750767187.5076268s req_ids:[8] -DEBUG 06-24 20:13:07 [manager.py:391] -ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:202.61573791503906ms total_cost_time:202.66008377075195ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7919 prompt_cache_len:5151 prompt_cache_ratio:0.650460916782422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 -DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10987687110900879 s -INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.11219382286071777 s -DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=81938992990779597663101351136034808955, time:1750767187.714619s req_ids:[8] -DEBUG 06-24 20:13:07 [manager.py:391] -DEBUG 06-24 20:13:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 36051.913 tokens/s -DEBUG 06-24 20:13:07 [stats.py:37] Avg prompt tokens throughput: 36042.786 tokens/s -DEBUG 06-24 20:13:07 [stats.py:37] Avg generate tokens throughput: 9.128 tokens/s -ERROR 06-24 20:13:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:204.15234565734863ms total_cost_time:204.19836044311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7920 prompt_cache_len:5151 prompt_cache_ratio:0.6503787878787879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 -DEBUG 06-24 20:13:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:07 [manager.py:224] router recive req id 8 cost time 0.10734820365905762 s -INFO 06-24 20:13:07 [manager.py:68] detokenization recv req id 8 cost time 0.10941362380981445 s -DEBUG 06-24 20:13:07 [manager.py:391] Prefill Batch: batch_id=230638070399174263728168433577006515524, time:1750767187.935181s req_ids:[8] -DEBUG 06-24 20:13:07 [manager.py:391] -ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:07 lightllm_req_id:8 first_token_cost:213.0739688873291ms total_cost_time:213.1185531616211ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7921 prompt_cache_len:5151 prompt_cache_ratio:0.6502966797121575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 -DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.1088099479675293 s -INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.11080026626586914 s -DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=236806128764871171734798511644940751512, time:1750767188.1417224s req_ids:[8] -DEBUG 06-24 20:13:08 [manager.py:391] -ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:203.0937671661377ms total_cost_time:203.13763618469238ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7922 prompt_cache_len:5151 prompt_cache_ratio:0.6502145922746781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 -DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.10749149322509766 s -INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.10953092575073242 s -DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=240696199067258421943873084107753893066, time:1750767188.3506975s req_ids:[8] -DEBUG 06-24 20:13:08 [manager.py:391] -ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:367.6939010620117ms total_cost_time:367.74158477783203ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:7923 prompt_cache_len:5151 prompt_cache_ratio:0.6501325255585005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 -DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.10718536376953125 s -INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.10913324356079102 s -DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=46218260897524850275658883935598911374, time:1750767188.72318s req_ids:[8] -DEBUG 06-24 20:13:08 [manager.py:391] -ERROR 06-24 20:13:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:203.67884635925293ms total_cost_time:203.72247695922852ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7924 prompt_cache_len:5151 prompt_cache_ratio:0.6500504795557799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 -DEBUG 06-24 20:13:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:08 [manager.py:224] router recive req id 8 cost time 0.10895037651062012 s -INFO 06-24 20:13:08 [manager.py:68] detokenization recv req id 8 cost time 0.11003494262695312 s -DEBUG 06-24 20:13:08 [manager.py:391] Prefill Batch: batch_id=87202976777824432516454560716265878975, time:1750767188.9416347s req_ids:[8] -DEBUG 06-24 20:13:08 [manager.py:391] -ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:08 lightllm_req_id:8 first_token_cost:214.57767486572266ms total_cost_time:214.62297439575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7925 prompt_cache_len:5151 prompt_cache_ratio:0.6499684542586751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 -DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10826468467712402 s -INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.11015796661376953 s -DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=235785113419115753343401632089706793226, time:1750767189.158465s req_ids:[8] -DEBUG 06-24 20:13:09 [manager.py:391] -ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:209.26785469055176ms total_cost_time:209.31458473205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7926 prompt_cache_len:5151 prompt_cache_ratio:0.6498864496593489 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 -DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10793662071228027 s -INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.10944557189941406 s -DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=246578625919351810075038972413302716947, time:1750767189.3711963s req_ids:[8] -DEBUG 06-24 20:13:09 [manager.py:391] -ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:205.60860633850098ms total_cost_time:205.65247535705566ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7927 prompt_cache_len:5151 prompt_cache_ratio:0.6498044657499684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 -DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10806536674499512 s -INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.10982680320739746 s -DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=169929738287265903565011866029648777119, time:1750767189.5820098s req_ids:[8] -DEBUG 06-24 20:13:09 [manager.py:391] -ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:206.95090293884277ms total_cost_time:206.99787139892578ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7928 prompt_cache_len:5151 prompt_cache_ratio:0.6497225025227044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 -DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:09 [manager.py:224] router recive req id 8 cost time 0.10829949378967285 s -INFO 06-24 20:13:09 [manager.py:68] detokenization recv req id 8 cost time 0.11024141311645508 s -DEBUG 06-24 20:13:09 [manager.py:391] Prefill Batch: batch_id=12643162153161367092062928905874082559, time:1750767189.8050375s req_ids:[8] -DEBUG 06-24 20:13:09 [manager.py:391] -ERROR 06-24 20:13:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:218.69254112243652ms total_cost_time:218.75262260437012ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:7929 prompt_cache_len:5151 prompt_cache_ratio:0.6496405599697314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 -DEBUG 06-24 20:13:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10791230201721191 s -INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.10975050926208496 s -DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=172499203099025787549400814559674835046, time:1750767190.0256903s req_ids:[8] -DEBUG 06-24 20:13:10 [manager.py:391] -ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:09 lightllm_req_id:8 first_token_cost:214.1721248626709ms total_cost_time:214.2188549041748ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7930 prompt_cache_len:5151 prompt_cache_ratio:0.6495586380832282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 -DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10918116569519043 s -INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.11094164848327637 s -DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=82108129192379092560528808504768085724, time:1750767190.2386286s req_ids:[8] -DEBUG 06-24 20:13:10 [manager.py:391] -ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:170.58277130126953ms total_cost_time:170.6857681274414ms,out_token_counter:1 mean_per_token_cost_time: 0.102996826171875ms prompt_token_num:7931 prompt_cache_len:5151 prompt_cache_ratio:0.6494767368553777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 -DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10782790184020996 s -INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.10999035835266113 s -DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=190329048025523621231116025914761801299, time:1750767190.413228s req_ids:[8] -DEBUG 06-24 20:13:10 [manager.py:391] -ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:196.61879539489746ms total_cost_time:196.66337966918945ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7932 prompt_cache_len:5151 prompt_cache_ratio:0.6493948562783661 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 -DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:10 [manager.py:224] router recive req id 8 cost time 0.10846233367919922 s -INFO 06-24 20:13:10 [manager.py:68] detokenization recv req id 8 cost time 0.11048293113708496 s -DEBUG 06-24 20:13:10 [manager.py:391] Prefill Batch: batch_id=3605447209226160400306186525110061324, time:1750767190.616704s req_ids:[8] -DEBUG 06-24 20:13:10 [manager.py:391] -ERROR 06-24 20:13:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:203.51839065551758ms total_cost_time:203.56440544128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7933 prompt_cache_len:5151 prompt_cache_ratio:0.6493129963443842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 -DEBUG 06-24 20:13:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.3096907138824463 s -INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.3118479251861572 s -DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=50382169885984366261941334393795361844, time:1750767191.0408227s req_ids:[8] -DEBUG 06-24 20:13:11 [manager.py:391] -ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:10 lightllm_req_id:8 first_token_cost:420.7601547241211ms total_cost_time:420.8204746246338ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:7934 prompt_cache_len:5151 prompt_cache_ratio:0.6492311570456264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 -DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.10867834091186523 s -INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.11068081855773926 s -DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=45859994211609285005085830790199660753, time:1750767191.2545052s req_ids:[8] -DEBUG 06-24 20:13:11 [manager.py:391] -ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:206.18057250976562ms total_cost_time:206.21871948242188ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:7935 prompt_cache_len:5151 prompt_cache_ratio:0.6491493383742911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 -DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.11047768592834473 s -INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.11294674873352051 s -DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=189644505788787314905127435530256993724, time:1750767191.4665513s req_ids:[8] -DEBUG 06-24 20:13:11 [manager.py:391] -ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:204.70213890075684ms total_cost_time:204.74672317504883ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7936 prompt_cache_len:5151 prompt_cache_ratio:0.6490675403225806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 -DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:11 [batch.py:51] router release req id 8 -INFO 06-24 20:13:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.1082768440246582 s -INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.11016082763671875 s -DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=219821321002343569697730238889961466858, time:1750767191.6753488s req_ids:[8] -DEBUG 06-24 20:13:11 [manager.py:391] -ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:204.57887649536133ms total_cost_time:204.60963249206543ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:7937 prompt_cache_len:5151 prompt_cache_ratio:0.6489857628827013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 -DEBUG 06-24 20:13:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:11 [manager.py:224] router recive req id 8 cost time 0.1078028678894043 s -INFO 06-24 20:13:11 [manager.py:68] detokenization recv req id 8 cost time 0.10977792739868164 s -DEBUG 06-24 20:13:11 [manager.py:391] Prefill Batch: batch_id=298207810121136073618616016893981759736, time:1750767191.8858616s req_ids:[8] -DEBUG 06-24 20:13:11 [manager.py:391] -ERROR 06-24 20:13:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:202.73661613464355ms total_cost_time:202.78334617614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:7938 prompt_cache_len:5151 prompt_cache_ratio:0.6489040060468632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 -DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.1087496280670166 s -INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.11087179183959961 s -DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=251706297057370749193957462732433719124, time:1750767192.095412s req_ids:[8] -DEBUG 06-24 20:13:12 [manager.py:391] -ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:11 lightllm_req_id:8 first_token_cost:205.4281234741211ms total_cost_time:205.4729461669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7939 prompt_cache_len:5151 prompt_cache_ratio:0.6488222698072805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 -DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.1075582504272461 s -INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.10955047607421875 s -DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=261201989736215395061009724072290674442, time:1750767192.306056s req_ids:[8] -DEBUG 06-24 20:13:12 [manager.py:391] -ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:204.02836799621582ms total_cost_time:204.07366752624512ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7940 prompt_cache_len:5151 prompt_cache_ratio:0.6487405541561713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 -DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.10903692245483398 s -INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.11104106903076172 s -DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=157037935830945172001923291864364440951, time:1750767192.5170796s req_ids:[8] -DEBUG 06-24 20:13:12 [manager.py:391] -ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:203.6001682281494ms total_cost_time:203.6597728729248ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:7941 prompt_cache_len:5151 prompt_cache_ratio:0.6486588590857575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 -DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.10979104042053223 s -INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.1118476390838623 s -DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=100849140383447369085005502856360103607, time:1750767192.7250402s req_ids:[8] -DEBUG 06-24 20:13:12 [manager.py:391] -ERROR 06-24 20:13:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:198.75741004943848ms total_cost_time:198.80247116088867ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7942 prompt_cache_len:5151 prompt_cache_ratio:0.648577184588265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 -DEBUG 06-24 20:13:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:12 [manager.py:224] router recive req id 8 cost time 0.1079094409942627 s -INFO 06-24 20:13:12 [manager.py:68] detokenization recv req id 8 cost time 0.10998392105102539 s -DEBUG 06-24 20:13:12 [manager.py:391] Prefill Batch: batch_id=188509554225064819795100941825890153293, time:1750767192.9326692s req_ids:[8] -DEBUG 06-24 20:13:12 [manager.py:391] -ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:12 lightllm_req_id:8 first_token_cost:205.7960033416748ms total_cost_time:205.8405876159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7943 prompt_cache_len:5151 prompt_cache_ratio:0.6484955306559235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 -DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.3096151351928711 s -INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.311603307723999 s -DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=165924940871535103958487311751476384802, time:1750767193.3451605s req_ids:[8] -DEBUG 06-24 20:13:13 [manager.py:391] -ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:411.43321990966797ms total_cost_time:411.47923469543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7944 prompt_cache_len:5151 prompt_cache_ratio:0.6484138972809668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 -DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.10728573799133301 s -INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.10907793045043945 s -DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=72000306954325854122893826628957377457, time:1750767193.5611944s req_ids:[8] -DEBUG 06-24 20:13:13 [manager.py:391] -ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:176.60236358642578ms total_cost_time:176.64480209350586ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:7945 prompt_cache_len:5151 prompt_cache_ratio:0.6483322844556325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 -DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.11027312278747559 s -INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.11226415634155273 s -DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=26976301927374775297573615978889401296, time:1750767193.7499294s req_ids:[8] -DEBUG 06-24 20:13:13 [manager.py:391] -ERROR 06-24 20:13:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:196.70963287353516ms total_cost_time:196.75374031066895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7946 prompt_cache_len:5151 prompt_cache_ratio:0.6482506921721621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 -DEBUG 06-24 20:13:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:13 [manager.py:224] router recive req id 8 cost time 0.10876059532165527 s -INFO 06-24 20:13:13 [manager.py:68] detokenization recv req id 8 cost time 0.1107943058013916 s -DEBUG 06-24 20:13:13 [manager.py:391] Prefill Batch: batch_id=17795288803662155671368493058997001428, time:1750767193.9432232s req_ids:[8] -DEBUG 06-24 20:13:13 [manager.py:391] -ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:13 lightllm_req_id:8 first_token_cost:194.3838596343994ms total_cost_time:194.42105293273926ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:7947 prompt_cache_len:5151 prompt_cache_ratio:0.6481691204228011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 -DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s -INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.10978436470031738 s -DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=102365839971098244946502808781547108031, time:1750767194.149403s req_ids:[8] -DEBUG 06-24 20:13:14 [manager.py:391] -ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:205.3685188293457ms total_cost_time:205.4131031036377ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7948 prompt_cache_len:5151 prompt_cache_ratio:0.6480875691997987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 -DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10853886604309082 s -INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.1105196475982666 s -DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=108852590672583081808096388461669505108, time:1750767194.3614717s req_ids:[8] -DEBUG 06-24 20:13:14 [manager.py:391] -ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:205.60407638549805ms total_cost_time:205.64842224121094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7949 prompt_cache_len:5151 prompt_cache_ratio:0.6480060384954083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 -DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s -INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.11044073104858398 s -DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=193820767272036912060725222124523004983, time:1750767194.5727732s req_ids:[8] -DEBUG 06-24 20:13:14 [manager.py:391] -ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:204.72121238708496ms total_cost_time:204.76484298706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7950 prompt_cache_len:5151 prompt_cache_ratio:0.6479245283018867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 -DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.10715079307556152 s -INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.10897445678710938 s -DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=100201376188519901765503312076035811969, time:1750767194.7837389s req_ids:[8] -DEBUG 06-24 20:13:14 [manager.py:391] -ERROR 06-24 20:13:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:168.532133102417ms total_cost_time:168.5769557952881ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7951 prompt_cache_len:5151 prompt_cache_ratio:0.6478430386114954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 -DEBUG 06-24 20:13:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:14 [manager.py:224] router recive req id 8 cost time 0.1085958480834961 s -INFO 06-24 20:13:14 [manager.py:68] detokenization recv req id 8 cost time 0.1105966567993164 s -DEBUG 06-24 20:13:14 [manager.py:391] Prefill Batch: batch_id=80175787666720453699911710298814146851, time:1750767194.9575622s req_ids:[8] -DEBUG 06-24 20:13:14 [manager.py:391] -ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:14 lightllm_req_id:8 first_token_cost:195.47295570373535ms total_cost_time:195.51801681518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7952 prompt_cache_len:5151 prompt_cache_ratio:0.647761569416499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 -DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:15 [batch.py:51] router release req id 8 -INFO 06-24 20:13:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.10825014114379883 s -INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.1103367805480957 s -DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=291063739834872920566854934898680732024, time:1750767195.1586053s req_ids:[8] -DEBUG 06-24 20:13:15 [manager.py:391] -DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:362.4105453491211ms total_cost_time:362.4553680419922ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7953 prompt_cache_len:5151 prompt_cache_ratio:0.6476801207091664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 -DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.1086263656616211 s -INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.11065411567687988 s -DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=241528825606693664583590914976001702960, time:1750767195.5245929s req_ids:[8] -DEBUG 06-24 20:13:15 [manager.py:391] -ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:203.05347442626953ms total_cost_time:203.0966281890869ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7954 prompt_cache_len:5151 prompt_cache_ratio:0.6475986924817702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 -DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.10764503479003906 s -INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.10975980758666992 s -DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=19452618694001067806174437045959048889, time:1750767195.7363954s req_ids:[8] -DEBUG 06-24 20:13:15 [manager.py:391] -ERROR 06-24 20:13:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:205.1067352294922ms total_cost_time:205.15131950378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7955 prompt_cache_len:5151 prompt_cache_ratio:0.6475172847265871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 -DEBUG 06-24 20:13:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:15 [manager.py:224] router recive req id 8 cost time 0.10867929458618164 s -INFO 06-24 20:13:15 [manager.py:68] detokenization recv req id 8 cost time 0.11081171035766602 s -DEBUG 06-24 20:13:15 [manager.py:391] Prefill Batch: batch_id=311462142156239411046205829092249669040, time:1750767195.948825s req_ids:[8] -DEBUG 06-24 20:13:15 [manager.py:391] -ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:15 lightllm_req_id:8 first_token_cost:207.8399658203125ms total_cost_time:207.8852653503418ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7956 prompt_cache_len:5151 prompt_cache_ratio:0.6474358974358975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 -DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10894131660461426 s -INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11071896553039551 s -DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=177499050341133244490451836185537531683, time:1750767196.1605725s req_ids:[8] -DEBUG 06-24 20:13:16 [manager.py:391] -ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:167.80614852905273ms total_cost_time:167.8483486175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:7957 prompt_cache_len:5151 prompt_cache_ratio:0.6473545306019857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 -DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10842585563659668 s -INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s -DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=178673114598216798993756536284522377100, time:1750767196.3331385s req_ids:[8] -DEBUG 06-24 20:13:16 [manager.py:391] -ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:199.65696334838867ms total_cost_time:199.70202445983887ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7958 prompt_cache_len:5151 prompt_cache_ratio:0.64727318421714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 -INFO 06-24 20:13:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.1084892749786377 s -INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11057305335998535 s -DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=323102430961154705342918125938112755382, time:1750767196.5380726s req_ids:[8] -DEBUG 06-24 20:13:16 [manager.py:391] -ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:204.66208457946777ms total_cost_time:204.70690727233887ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7959 prompt_cache_len:5151 prompt_cache_ratio:0.6471918582736524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 -DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10920429229736328 s -INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.11141395568847656 s -DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=84207559269411626679544389255632117579, time:1750767196.7503664s req_ids:[8] -DEBUG 06-24 20:13:16 [manager.py:391] -ERROR 06-24 20:13:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:204.3752670288086ms total_cost_time:204.41865921020508ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7960 prompt_cache_len:5151 prompt_cache_ratio:0.6471105527638191 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 -DEBUG 06-24 20:13:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:16 [manager.py:224] router recive req id 8 cost time 0.10753083229064941 s -INFO 06-24 20:13:16 [manager.py:68] detokenization recv req id 8 cost time 0.10955405235290527 s -DEBUG 06-24 20:13:16 [manager.py:391] Prefill Batch: batch_id=10991299536171667354359562329823648315, time:1750767196.9578123s req_ids:[8] -DEBUG 06-24 20:13:16 [manager.py:391] -ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:16 lightllm_req_id:8 first_token_cost:205.98673820495605ms total_cost_time:206.03084564208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7961 prompt_cache_len:5151 prompt_cache_ratio:0.6470292676799397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 -DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10987234115600586 s -INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.11175227165222168 s -DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=76897185658495584524723295597724586468, time:1750767197.170621s req_ids:[8] -DEBUG 06-24 20:13:17 [manager.py:391] -ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:209.35392379760742ms total_cost_time:209.4588279724121ms,out_token_counter:1 mean_per_token_cost_time: 0.1049041748046875ms prompt_token_num:7962 prompt_cache_len:5151 prompt_cache_ratio:0.646948003014318 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 -DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10850858688354492 s -INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.11055994033813477 s -DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=162023126535662793328347509828152377396, time:1750767197.3830712s req_ids:[8] -DEBUG 06-24 20:13:17 [manager.py:391] -ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:373.0955123901367ms total_cost_time:373.1415271759033ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7963 prompt_cache_len:5151 prompt_cache_ratio:0.6468667587592616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 -DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10791873931884766 s -INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.10986161231994629 s -DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=324684405530242042566443508761391729032, time:1750767197.761375s req_ids:[8] -DEBUG 06-24 20:13:17 [manager.py:391] -DEBUG 06-24 20:13:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 34793.244 tokens/s -DEBUG 06-24 20:13:17 [stats.py:37] Avg prompt tokens throughput: 34784.485 tokens/s -DEBUG 06-24 20:13:17 [stats.py:37] Avg generate tokens throughput: 8.759 tokens/s -ERROR 06-24 20:13:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:199.9685764312744ms total_cost_time:200.0138759613037ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7964 prompt_cache_len:5151 prompt_cache_ratio:0.6467855349070819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 -DEBUG 06-24 20:13:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:17 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s -INFO 06-24 20:13:17 [manager.py:68] detokenization recv req id 8 cost time 0.1101539134979248 s -DEBUG 06-24 20:13:17 [manager.py:391] Prefill Batch: batch_id=162766083698922772247394849587732474131, time:1750767197.972497s req_ids:[8] -DEBUG 06-24 20:13:17 [manager.py:391] -ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:17 lightllm_req_id:8 first_token_cost:204.81300354003906ms total_cost_time:204.85591888427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7965 prompt_cache_len:5151 prompt_cache_ratio:0.6467043314500942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 -DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10822749137878418 s -INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.11039209365844727 s -DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=33312534504588364210253046676642202074, time:1750767198.1811037s req_ids:[8] -DEBUG 06-24 20:13:18 [manager.py:391] -ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:206.2242031097412ms total_cost_time:206.2702178955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:7966 prompt_cache_len:5151 prompt_cache_ratio:0.6466231483806176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 -DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10970854759216309 s -INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.1118166446685791 s -DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=136079266570731363741327080388681070218, time:1750767198.3936095s req_ids:[8] -DEBUG 06-24 20:13:18 [manager.py:391] -ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:207.21983909606934ms total_cost_time:207.26370811462402ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7967 prompt_cache_len:5151 prompt_cache_ratio:0.6465419856909753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 -DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10869240760803223 s -INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.11053967475891113 s -DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=185906662755667593526781665256658650992, time:1750767198.6071663s req_ids:[8] -DEBUG 06-24 20:13:18 [manager.py:391] -ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:219.13981437683105ms total_cost_time:219.18702125549316ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:7968 prompt_cache_len:5151 prompt_cache_ratio:0.646460843373494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 -DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:18 [manager.py:224] router recive req id 8 cost time 0.10873961448669434 s -INFO 06-24 20:13:18 [manager.py:68] detokenization recv req id 8 cost time 0.11064028739929199 s -DEBUG 06-24 20:13:18 [manager.py:391] Prefill Batch: batch_id=132403851612439993135478933216699860822, time:1750767198.8411586s req_ids:[8] -DEBUG 06-24 20:13:18 [manager.py:391] -ERROR 06-24 20:13:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:217.43011474609375ms total_cost_time:217.47398376464844ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7969 prompt_cache_len:5151 prompt_cache_ratio:0.6463797214205045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 -DEBUG 06-24 20:13:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.10910558700561523 s -INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.11117362976074219 s -DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=252219083471049875361079567845645639394, time:1750767199.0516593s req_ids:[8] -DEBUG 06-24 20:13:19 [manager.py:391] -ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:18 lightllm_req_id:8 first_token_cost:203.60255241394043ms total_cost_time:203.64665985107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7970 prompt_cache_len:5151 prompt_cache_ratio:0.6462986198243412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 -DEBUG 06-24 20:13:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.10843896865844727 s -INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.11037373542785645 s -DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=144389132066047642571520266776842730411, time:1750767199.264224s req_ids:[8] -DEBUG 06-24 20:13:19 [manager.py:391] -ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:206.02703094482422ms total_cost_time:206.0716152191162ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:7971 prompt_cache_len:5151 prompt_cache_ratio:0.6462175385773429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 -DEBUG 06-24 20:13:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.1077725887298584 s -INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.1098170280456543 s -DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=273780341120515948085496226309683162227, time:1750767199.4756942s req_ids:[8] -DEBUG 06-24 20:13:19 [manager.py:391] -ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:210.5717658996582ms total_cost_time:210.61468124389648ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:7972 prompt_cache_len:5151 prompt_cache_ratio:0.6461364776718515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 -DEBUG 06-24 20:13:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:19 [manager.py:224] router recive req id 8 cost time 0.10817337036132812 s -INFO 06-24 20:13:19 [manager.py:68] detokenization recv req id 8 cost time 0.1101233959197998 s -DEBUG 06-24 20:13:19 [manager.py:391] Prefill Batch: batch_id=220345837017607607103329194752039998753, time:1750767199.7025595s req_ids:[8] -DEBUG 06-24 20:13:19 [manager.py:391] -ERROR 06-24 20:13:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:219.36297416687012ms total_cost_time:219.40875053405762ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7973 prompt_cache_len:5151 prompt_cache_ratio:0.6460554371002132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 -DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.3097507953643799 s -INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.3119313716888428 s -DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=253200887723432504106460219835936193951, time:1750767200.1237783s req_ids:[8] -DEBUG 06-24 20:13:20 [manager.py:391] -ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:19 lightllm_req_id:8 first_token_cost:421.9388961791992ms total_cost_time:421.9832420349121ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7974 prompt_cache_len:5151 prompt_cache_ratio:0.6459744168547781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 -DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10828685760498047 s -INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.11050748825073242 s -DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=318400449752076669691656270146283111966, time:1750767200.3446972s req_ids:[8] -DEBUG 06-24 20:13:20 [manager.py:391] -ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:204.50353622436523ms total_cost_time:204.54859733581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7975 prompt_cache_len:5151 prompt_cache_ratio:0.6458934169278997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 -DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10860872268676758 s -INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.11049008369445801 s -DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=311269633079447175632913857033269319962, time:1750767200.5615158s req_ids:[8] -DEBUG 06-24 20:13:20 [manager.py:391] -ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:213.07730674743652ms total_cost_time:213.13953399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:7976 prompt_cache_len:5151 prompt_cache_ratio:0.6458124373119358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 -DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10779881477355957 s -INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.10982728004455566 s -DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=191985133987512044440306912255200162810, time:1750767200.7734928s req_ids:[8] -DEBUG 06-24 20:13:20 [manager.py:391] -ERROR 06-24 20:13:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:206.60042762756348ms total_cost_time:206.64429664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7977 prompt_cache_len:5151 prompt_cache_ratio:0.6457314779992478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 -DEBUG 06-24 20:13:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:20 [manager.py:224] router recive req id 8 cost time 0.10785651206970215 s -INFO 06-24 20:13:20 [manager.py:68] detokenization recv req id 8 cost time 0.10977029800415039 s -DEBUG 06-24 20:13:20 [manager.py:391] Prefill Batch: batch_id=57672486614255653457036316272276538107, time:1750767200.9856186s req_ids:[8] -DEBUG 06-24 20:13:20 [manager.py:391] -ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:20 lightllm_req_id:8 first_token_cost:206.9869041442871ms total_cost_time:207.0310115814209ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7978 prompt_cache_len:5151 prompt_cache_ratio:0.645650538982201 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 -DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.11068034172058105 s -INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.1126868724822998 s -DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=225618012719256400572124766861540711218, time:1750767201.1983144s req_ids:[8] -DEBUG 06-24 20:13:21 [manager.py:391] -ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:206.6514492034912ms total_cost_time:206.6962718963623ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7979 prompt_cache_len:5151 prompt_cache_ratio:0.6455696202531646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 -DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s -INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.10996317863464355 s -DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=258356704852361665615646251548933254746, time:1750767201.4103487s req_ids:[8] -DEBUG 06-24 20:13:21 [manager.py:391] -ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:208.5404396057129ms total_cost_time:208.58359336853027ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:7980 prompt_cache_len:5151 prompt_cache_ratio:0.6454887218045112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 -DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10837483406066895 s -INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.11040210723876953 s -DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=108367910749049095762624023086147352603, time:1750767201.623241s req_ids:[8] -DEBUG 06-24 20:13:21 [manager.py:391] -ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:206.39729499816895ms total_cost_time:206.44235610961914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:7981 prompt_cache_len:5151 prompt_cache_ratio:0.6454078436286179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 -DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10700106620788574 s -INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.10878109931945801 s -DEBUG 06-24 20:13:21 [manager.py:391] Prefill Batch: batch_id=316821462377410352800186330713157359991, time:1750767201.8359358s req_ids:[8] -DEBUG 06-24 20:13:21 [manager.py:391] -ERROR 06-24 20:13:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:165.3287410736084ms total_cost_time:165.37213325500488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:7982 prompt_cache_len:5151 prompt_cache_ratio:0.6453269857178652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 -DEBUG 06-24 20:13:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:21 [manager.py:224] router recive req id 8 cost time 0.10832405090332031 s -INFO 06-24 20:13:21 [manager.py:68] detokenization recv req id 8 cost time 0.11040139198303223 s -DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=16792091570059411058082003683852118587, time:1750767202.0052469s req_ids:[8] -DEBUG 06-24 20:13:22 [manager.py:391] -ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:21 lightllm_req_id:8 first_token_cost:198.2433795928955ms total_cost_time:198.2886791229248ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7983 prompt_cache_len:5151 prompt_cache_ratio:0.6452461480646373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 -DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:22 [manager.py:224] router recive req id 8 cost time 0.3109469413757324 s -INFO 06-24 20:13:22 [manager.py:68] detokenization recv req id 8 cost time 0.31298017501831055 s -DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=183208197559468521810446693559550644944, time:1750767202.419358s req_ids:[8] -DEBUG 06-24 20:13:22 [manager.py:391] -ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:420.7897186279297ms total_cost_time:420.8345413208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7984 prompt_cache_len:5151 prompt_cache_ratio:0.6451653306613226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 -DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:22 [manager.py:224] router recive req id 8 cost time 0.10910367965698242 s -INFO 06-24 20:13:22 [manager.py:68] detokenization recv req id 8 cost time 0.11121320724487305 s -DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=233162659706203182095491710370881821821, time:1750767202.6382375s req_ids:[8] -DEBUG 06-24 20:13:22 [manager.py:391] -ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:210.29925346374512ms total_cost_time:210.3445529937744ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7985 prompt_cache_len:5151 prompt_cache_ratio:0.6450845335003131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 -DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:22 [manager.py:224] router recive req id 8 cost time 0.10764837265014648 s -INFO 06-24 20:13:22 [manager.py:68] detokenization recv req id 8 cost time 0.10964298248291016 s -DEBUG 06-24 20:13:22 [manager.py:391] Prefill Batch: batch_id=175047108610562315219631427288663882275, time:1750767202.8521001s req_ids:[8] -DEBUG 06-24 20:13:22 [manager.py:391] -ERROR 06-24 20:13:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:208.36496353149414ms total_cost_time:208.40930938720703ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7986 prompt_cache_len:5151 prompt_cache_ratio:0.6450037565740045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 -DEBUG 06-24 20:13:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10775923728942871 s -INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.10974526405334473 s -DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=89107591834513802921518294138822072337, time:1750767203.0652199s req_ids:[8] -DEBUG 06-24 20:13:23 [manager.py:391] -ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:22 lightllm_req_id:8 first_token_cost:203.17339897155762ms total_cost_time:203.2170295715332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:7987 prompt_cache_len:5151 prompt_cache_ratio:0.6449229998747965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 -DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:23 [batch.py:51] router release req id 8 -INFO 06-24 20:13:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:13:23 [statics_utils.py:24] mean first cost: 229.48906573588252 ms -INFO 06-24 20:13:23 [statics_utils.py:24] mean per token cost: 0.08431392781199956 ms -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10917448997497559 s -INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.11129927635192871 s -DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=71308346544288660219036677005860516361, time:1750767203.2821803s req_ids:[8] -DEBUG 06-24 20:13:23 [manager.py:391] -ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:206.68816566467285ms total_cost_time:206.74896240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:7988 prompt_cache_len:5151 prompt_cache_ratio:0.6448422633950927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 -DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10909271240234375 s -INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.11080384254455566 s -DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=230830846054799028040890274283817565569, time:1750767203.4892309s req_ids:[8] -DEBUG 06-24 20:13:23 [manager.py:391] -ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:204.8323154449463ms total_cost_time:204.87618446350098ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7989 prompt_cache_len:5151 prompt_cache_ratio:0.6447615471273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 -DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10789275169372559 s -INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.10983514785766602 s -DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=191704778279780603684049451138835019044, time:1750767203.6991696s req_ids:[8] -DEBUG 06-24 20:13:23 [manager.py:391] -DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:206.88652992248535ms total_cost_time:206.93039894104004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:7990 prompt_cache_len:5151 prompt_cache_ratio:0.6446808510638298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 -DEBUG 06-24 20:13:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:23 [manager.py:224] router recive req id 8 cost time 0.10717439651489258 s -INFO 06-24 20:13:23 [manager.py:68] detokenization recv req id 8 cost time 0.10902690887451172 s -DEBUG 06-24 20:13:23 [manager.py:391] Prefill Batch: batch_id=43003146286534761905898694334188232933, time:1750767203.914639s req_ids:[8] -DEBUG 06-24 20:13:23 [manager.py:391] -ERROR 06-24 20:13:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:23 lightllm_req_id:8 first_token_cost:200.76966285705566ms total_cost_time:200.81496238708496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:7991 prompt_cache_len:5151 prompt_cache_ratio:0.6446001751970968 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 -DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.10965108871459961 s -INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.11164569854736328 s -DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=18367124191187797340708065209219967734, time:1750767204.1204052s req_ids:[8] -DEBUG 06-24 20:13:24 [manager.py:391] -ERROR 06-24 20:13:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:204.31923866271973ms total_cost_time:204.36406135559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7992 prompt_cache_len:5151 prompt_cache_ratio:0.6445195195195195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 -DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.10828542709350586 s -INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.11034154891967773 s -DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=44964723072825769166666211452241269907, time:1750767204.329934s req_ids:[8] -DEBUG 06-24 20:13:24 [manager.py:391] -ERROR 06-24 20:13:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:211.6379737854004ms total_cost_time:211.6849422454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:7993 prompt_cache_len:5151 prompt_cache_ratio:0.6444388840235206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 -DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.31157779693603516 s -INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.3135087490081787 s -DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=9328866232456109829399433226299765447, time:1750767204.7530556s req_ids:[8] -DEBUG 06-24 20:13:24 [manager.py:391] -ERROR 06-24 20:13:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:418.00379753112793ms total_cost_time:418.0605411529541ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:7994 prompt_cache_len:5151 prompt_cache_ratio:0.6443582687015261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 -DEBUG 06-24 20:13:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:24 [manager.py:224] router recive req id 8 cost time 0.10833311080932617 s -INFO 06-24 20:13:24 [manager.py:68] detokenization recv req id 8 cost time 0.11092782020568848 s -DEBUG 06-24 20:13:24 [manager.py:391] Prefill Batch: batch_id=154038376663072427616142803706380359878, time:1750767204.971421s req_ids:[8] -DEBUG 06-24 20:13:24 [manager.py:391] -ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:24 lightllm_req_id:8 first_token_cost:207.00407028198242ms total_cost_time:207.05032348632812ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:7995 prompt_cache_len:5151 prompt_cache_ratio:0.6442776735459662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 -DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10859155654907227 s -INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.11118459701538086 s -DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=7534830560099383221859710888313675221, time:1750767205.184723s req_ids:[8] -DEBUG 06-24 20:13:25 [manager.py:391] -DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:208.06527137756348ms total_cost_time:208.11104774475098ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:7996 prompt_cache_len:5151 prompt_cache_ratio:0.6441970985492746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 -DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10689687728881836 s -INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.10886645317077637 s -DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=276594682596052906015886712976346641591, time:1750767205.4019175s req_ids:[8] -DEBUG 06-24 20:13:25 [manager.py:391] -ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:214.57529067993164ms total_cost_time:214.62011337280273ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:7997 prompt_cache_len:5151 prompt_cache_ratio:0.6441165437038889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 -DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10746264457702637 s -INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.1093595027923584 s -DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=337967489359525602128814517579388664542, time:1750767205.6165328s req_ids:[8] -DEBUG 06-24 20:13:25 [manager.py:391] -ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:208.77599716186523ms total_cost_time:208.82010459899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:7998 prompt_cache_len:5151 prompt_cache_ratio:0.6440360090022506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 -DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:25 [batch.py:51] router release req id 8 -INFO 06-24 20:13:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:25 [manager.py:224] router recive req id 8 cost time 0.10781717300415039 s -INFO 06-24 20:13:25 [manager.py:68] detokenization recv req id 8 cost time 0.1098320484161377 s -DEBUG 06-24 20:13:25 [manager.py:391] Prefill Batch: batch_id=68138939121714499093178933998493010849, time:1750767205.829303s req_ids:[8] -DEBUG 06-24 20:13:25 [manager.py:391] -ERROR 06-24 20:13:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:199.751615524292ms total_cost_time:199.79596138000488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:7999 prompt_cache_len:5151 prompt_cache_ratio:0.6439554944368046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 -DEBUG 06-24 20:13:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.1088552474975586 s -INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.11085844039916992 s -DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=337770350031183940183620976415810615760, time:1750767206.034402s req_ids:[8] -DEBUG 06-24 20:13:26 [manager.py:391] -ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:25 lightllm_req_id:8 first_token_cost:205.71184158325195ms total_cost_time:205.77120780944824ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:8000 prompt_cache_len:5151 prompt_cache_ratio:0.643875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 -DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.10732841491699219 s -INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.10937738418579102 s -DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=205096531034771445278059301635308550610, time:1750767206.2481425s req_ids:[8] -DEBUG 06-24 20:13:26 [manager.py:391] -ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:201.73382759094238ms total_cost_time:201.77817344665527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8001 prompt_cache_len:5151 prompt_cache_ratio:0.6437945256842894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 -DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.10805034637451172 s -INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.11062455177307129 s -DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=301276533795778854086758299293579705845, time:1750767206.4560614s req_ids:[8] -DEBUG 06-24 20:13:26 [manager.py:391] -ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:207.29708671569824ms total_cost_time:207.34238624572754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8002 prompt_cache_len:5151 prompt_cache_ratio:0.6437140714821294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 -DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:26 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s -INFO 06-24 20:13:26 [manager.py:68] detokenization recv req id 8 cost time 0.11055207252502441 s -DEBUG 06-24 20:13:26 [manager.py:391] Prefill Batch: batch_id=324273851657995431666401824684728740649, time:1750767206.6699553s req_ids:[8] -DEBUG 06-24 20:13:26 [manager.py:391] -ERROR 06-24 20:13:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:364.02392387390137ms total_cost_time:364.13049697875977ms,out_token_counter:1 mean_per_token_cost_time: 0.10657310485839844ms prompt_token_num:8003 prompt_cache_len:5151 prompt_cache_ratio:0.6436336373859802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 -DEBUG 06-24 20:13:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.1087644100189209 s -INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106576919555664 s -DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=189669802997694798807429187492652562204, time:1750767207.0344183s req_ids:[8] -DEBUG 06-24 20:13:27 [manager.py:391] -ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:26 lightllm_req_id:8 first_token_cost:201.77054405212402ms total_cost_time:201.81632041931152ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8004 prompt_cache_len:5151 prompt_cache_ratio:0.6435532233883059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 -DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10774064064025879 s -INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1096642017364502 s -DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=85299969823290596692718017623202377358, time:1750767207.249235s req_ids:[8] -DEBUG 06-24 20:13:27 [manager.py:391] -ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:211.09342575073242ms total_cost_time:211.13920211791992ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8005 prompt_cache_len:5151 prompt_cache_ratio:0.643472829481574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 -DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10740804672241211 s -INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1091604232788086 s -DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=189792808382240699496813175578660920062, time:1750767207.4628148s req_ids:[8] -DEBUG 06-24 20:13:27 [manager.py:391] -ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:166.86320304870605ms total_cost_time:166.90635681152344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8006 prompt_cache_len:5151 prompt_cache_ratio:0.6433924556582563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 -DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10839605331420898 s -INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.11035776138305664 s -DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=170986225010251020910561319639399839689, time:1750767207.636874s req_ids:[8] -DEBUG 06-24 20:13:27 [manager.py:391] -ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:199.1877555847168ms total_cost_time:199.23138618469238ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8007 prompt_cache_len:5151 prompt_cache_ratio:0.643312101910828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 -DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:27 [manager.py:224] router recive req id 8 cost time 0.10945844650268555 s -INFO 06-24 20:13:27 [manager.py:68] detokenization recv req id 8 cost time 0.1114344596862793 s -DEBUG 06-24 20:13:27 [manager.py:391] Prefill Batch: batch_id=75593178513738109247761338142377745273, time:1750767207.845318s req_ids:[8] -DEBUG 06-24 20:13:27 [manager.py:391] -DEBUG 06-24 20:13:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 34856.687 tokens/s -DEBUG 06-24 20:13:27 [stats.py:37] Avg prompt tokens throughput: 34847.961 tokens/s -DEBUG 06-24 20:13:27 [stats.py:37] Avg generate tokens throughput: 8.727 tokens/s -ERROR 06-24 20:13:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:205.20973205566406ms total_cost_time:205.25383949279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8008 prompt_cache_len:5151 prompt_cache_ratio:0.6432317682317682 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 -DEBUG 06-24 20:13:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10815954208374023 s -INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.11006617546081543 s -DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=28755440544609540965911731894135949409, time:1750767208.051244s req_ids:[8] -DEBUG 06-24 20:13:28 [manager.py:391] -ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:27 lightllm_req_id:8 first_token_cost:203.7208080291748ms total_cost_time:203.7651538848877ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8009 prompt_cache_len:5151 prompt_cache_ratio:0.6431514546135597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 -DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10823607444763184 s -INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.11016321182250977 s -DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=311237116191207968242434546976502323983, time:1750767208.2658818s req_ids:[8] -DEBUG 06-24 20:13:28 [manager.py:391] -ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:214.29872512817383ms total_cost_time:214.3573760986328ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:8010 prompt_cache_len:5151 prompt_cache_ratio:0.6430711610486891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 -DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10785222053527832 s -INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.10986137390136719 s -DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=298065199028497056066814213255053417322, time:1750767208.4818952s req_ids:[8] -DEBUG 06-24 20:13:28 [manager.py:391] -ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:205.31201362609863ms total_cost_time:205.35564422607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8011 prompt_cache_len:5151 prompt_cache_ratio:0.6429908875296467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 -DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:28 [manager.py:224] router recive req id 8 cost time 0.10765743255615234 s -INFO 06-24 20:13:28 [manager.py:68] detokenization recv req id 8 cost time 0.10885310173034668 s -DEBUG 06-24 20:13:28 [manager.py:391] Prefill Batch: batch_id=248753396601127990204836315934974615862, time:1750767208.704955s req_ids:[8] -DEBUG 06-24 20:13:28 [manager.py:391] -ERROR 06-24 20:13:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:372.20048904418945ms total_cost_time:372.24602699279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8012 prompt_cache_len:5151 prompt_cache_ratio:0.6429106340489266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 -DEBUG 06-24 20:13:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10778951644897461 s -INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.10983562469482422 s -DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=62356335541022719459942403410366310392, time:1750767209.0688243s req_ids:[8] -DEBUG 06-24 20:13:29 [manager.py:391] -ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:28 lightllm_req_id:8 first_token_cost:203.68480682373047ms total_cost_time:203.72939109802246ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8013 prompt_cache_len:5151 prompt_cache_ratio:0.6428304005990266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 -DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10899496078491211 s -INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.11028265953063965 s -DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=132568857199084135860643036116585255987, time:1750767209.2864473s req_ids:[8] -DEBUG 06-24 20:13:29 [manager.py:391] -ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:225.76236724853516ms total_cost_time:225.82364082336426ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:8014 prompt_cache_len:5151 prompt_cache_ratio:0.6427501871724483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 -DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10742568969726562 s -INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.10915470123291016 s -DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=75760665572769204223699169450466371408, time:1750767209.5185494s req_ids:[8] -DEBUG 06-24 20:13:29 [manager.py:391] -ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:213.62018585205078ms total_cost_time:213.66524696350098ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8015 prompt_cache_len:5151 prompt_cache_ratio:0.6426699937616969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 -DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10858964920043945 s -INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.11053895950317383 s -DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=295603085150416418505031132224439432380, time:1750767209.7340767s req_ids:[8] -DEBUG 06-24 20:13:29 [manager.py:391] -ERROR 06-24 20:13:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:213.37461471557617ms total_cost_time:213.41991424560547ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8016 prompt_cache_len:5151 prompt_cache_ratio:0.6425898203592815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 -DEBUG 06-24 20:13:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:29 [manager.py:224] router recive req id 8 cost time 0.10859918594360352 s -INFO 06-24 20:13:29 [manager.py:68] detokenization recv req id 8 cost time 0.11073017120361328 s -DEBUG 06-24 20:13:29 [manager.py:391] Prefill Batch: batch_id=96874062735073194553146675415417018416, time:1750767209.9483294s req_ids:[8] -DEBUG 06-24 20:13:29 [manager.py:391] -ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:29 lightllm_req_id:8 first_token_cost:205.9154510498047ms total_cost_time:205.96051216125488ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8017 prompt_cache_len:5151 prompt_cache_ratio:0.6425096669577148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 -DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.10903596878051758 s -INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.11094260215759277 s -DEBUG 06-24 20:13:30 [manager.py:391] Prefill Batch: batch_id=241011568176580033652275330164393240203, time:1750767210.160697s req_ids:[8] -DEBUG 06-24 20:13:30 [manager.py:391] -ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:206.85863494873047ms total_cost_time:206.92038536071777ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:8018 prompt_cache_len:5151 prompt_cache_ratio:0.6424295335495136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 -DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.10845661163330078 s -INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.11046242713928223 s -DEBUG 06-24 20:13:30 [manager.py:391] Prefill Batch: batch_id=91908757559130525624874254999915434982, time:1750767210.3803387s req_ids:[8] -DEBUG 06-24 20:13:30 [manager.py:391] -ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:210.2665901184082ms total_cost_time:210.3114128112793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8019 prompt_cache_len:5151 prompt_cache_ratio:0.6423494201271979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 -DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.10772395133972168 s -INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.1098027229309082 s -DEBUG 06-24 20:13:30 [manager.py:391] Prefill Batch: batch_id=82546041890165863939652097839496998136, time:1750767210.593952s req_ids:[8] -DEBUG 06-24 20:13:30 [manager.py:391] -ERROR 06-24 20:13:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:206.68268203735352ms total_cost_time:206.72893524169922ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8020 prompt_cache_len:5151 prompt_cache_ratio:0.6422693266832917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 -DEBUG 06-24 20:13:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:30 [manager.py:224] router recive req id 8 cost time 0.3095567226409912 s -INFO 06-24 20:13:30 [manager.py:68] detokenization recv req id 8 cost time 0.31077003479003906 s -DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=92071940266793824855499522807865262678, time:1750767211.0057893s req_ids:[8] -DEBUG 06-24 20:13:31 [manager.py:391] -ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:30 lightllm_req_id:8 first_token_cost:413.41614723205566ms total_cost_time:413.46120834350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8021 prompt_cache_len:5151 prompt_cache_ratio:0.6421892532103229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 -DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10883212089538574 s -INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.11078023910522461 s -DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=232439080478786691291367156715416963605, time:1750767211.223302s req_ids:[8] -DEBUG 06-24 20:13:31 [manager.py:391] -ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:205.5830955505371ms total_cost_time:205.6264877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8022 prompt_cache_len:5151 prompt_cache_ratio:0.6421091997008227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 -DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10797405242919922 s -INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.10987138748168945 s -DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=80115564579417612783810801856783968278, time:1750767211.4359064s req_ids:[8] -DEBUG 06-24 20:13:31 [manager.py:391] -ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:212.51821517944336ms total_cost_time:212.56422996520996ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8023 prompt_cache_len:5151 prompt_cache_ratio:0.6420291661473264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 -DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10848045349121094 s -INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.11026144027709961 s -DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=293943441949701320686664402720610770772, time:1750767211.6528914s req_ids:[8] -DEBUG 06-24 20:13:31 [manager.py:391] -ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:203.3693790435791ms total_cost_time:203.4139633178711ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8024 prompt_cache_len:5151 prompt_cache_ratio:0.6419491525423728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 -DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:31 [manager.py:224] router recive req id 8 cost time 0.10758447647094727 s -INFO 06-24 20:13:31 [manager.py:68] detokenization recv req id 8 cost time 0.10954022407531738 s -DEBUG 06-24 20:13:31 [manager.py:391] Prefill Batch: batch_id=171561965923437039977024084455466477433, time:1750767211.8630927s req_ids:[8] -DEBUG 06-24 20:13:31 [manager.py:391] -ERROR 06-24 20:13:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:204.21791076660156ms total_cost_time:204.26201820373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8025 prompt_cache_len:5151 prompt_cache_ratio:0.6418691588785047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 -DEBUG 06-24 20:13:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10892868041992188 s -INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.11079859733581543 s -DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=163393512212347573480059928391264765222, time:1750767212.0808082s req_ids:[8] -DEBUG 06-24 20:13:32 [manager.py:391] -ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:31 lightllm_req_id:8 first_token_cost:216.19558334350586ms total_cost_time:216.24374389648438ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:8026 prompt_cache_len:5151 prompt_cache_ratio:0.6417891851482681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 -DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10766339302062988 s -INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.10946893692016602 s -DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=27567450746955744522232242561639321977, time:1750767212.3012102s req_ids:[8] -DEBUG 06-24 20:13:32 [manager.py:391] -DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:220.05391120910645ms total_cost_time:220.09801864624023ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8027 prompt_cache_len:5151 prompt_cache_ratio:0.6417092313442133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 -DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10823369026184082 s -INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.11010909080505371 s -DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=171102489936591826138659442117401955408, time:1750767212.5201666s req_ids:[8] -DEBUG 06-24 20:13:32 [manager.py:391] -ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:169.95000839233398ms total_cost_time:169.99316215515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8028 prompt_cache_len:5151 prompt_cache_ratio:0.6416292974588939 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 -DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:32 [manager.py:224] router recive req id 8 cost time 0.10801458358764648 s -INFO 06-24 20:13:32 [manager.py:68] detokenization recv req id 8 cost time 0.10981464385986328 s -DEBUG 06-24 20:13:32 [manager.py:391] Prefill Batch: batch_id=169796326162823785676721683044717755850, time:1750767212.697953s req_ids:[8] -DEBUG 06-24 20:13:32 [manager.py:391] -ERROR 06-24 20:13:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:208.99462699890137ms total_cost_time:209.04111862182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8029 prompt_cache_len:5151 prompt_cache_ratio:0.6415493834848673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 -DEBUG 06-24 20:13:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.3095996379852295 s -INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.31188511848449707 s -DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=330063706882244893526928047670001724203, time:1750767213.1190164s req_ids:[8] -DEBUG 06-24 20:13:33 [manager.py:391] -ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:32 lightllm_req_id:8 first_token_cost:417.2329902648926ms total_cost_time:417.2811508178711ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:8030 prompt_cache_len:5151 prompt_cache_ratio:0.641469489414695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 -DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10786080360412598 s -INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10964775085449219 s -DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=24472232322048738001534544070683166294, time:1750767213.339208s req_ids:[8] -DEBUG 06-24 20:13:33 [manager.py:391] -ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:213.5946750640869ms total_cost_time:213.6402130126953ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8031 prompt_cache_len:5151 prompt_cache_ratio:0.6413896152409414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 -DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10765647888183594 s -INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10948395729064941 s -DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=271701817042320459685940580244195258829, time:1750767213.5552058s req_ids:[8] -DEBUG 06-24 20:13:33 [manager.py:391] -ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:206.8326473236084ms total_cost_time:206.8774700164795ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8032 prompt_cache_len:5151 prompt_cache_ratio:0.6413097609561753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 -DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10721659660339355 s -INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10896587371826172 s -DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=259117447860807868689616613469691422374, time:1750767213.7701948s req_ids:[8] -DEBUG 06-24 20:13:33 [manager.py:391] -DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:218.2292938232422ms total_cost_time:218.27459335327148ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8033 prompt_cache_len:5151 prompt_cache_ratio:0.641229926552969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 -DEBUG 06-24 20:13:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:33 [manager.py:224] router recive req id 8 cost time 0.10794472694396973 s -INFO 06-24 20:13:33 [manager.py:68] detokenization recv req id 8 cost time 0.10982871055603027 s -DEBUG 06-24 20:13:33 [manager.py:391] Prefill Batch: batch_id=37808013535902847515610826973021025300, time:1750767213.9884288s req_ids:[8] -DEBUG 06-24 20:13:33 [manager.py:391] -ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:33 lightllm_req_id:8 first_token_cost:205.36017417907715ms total_cost_time:205.40738105773926ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:8034 prompt_cache_len:5151 prompt_cache_ratio:0.6411501120238984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 -DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.10774040222167969 s -INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.10932111740112305 s -DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=315084858904030512185046253995798691755, time:1750767214.2176788s req_ids:[8] -DEBUG 06-24 20:13:34 [manager.py:391] -ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:182.18350410461426ms total_cost_time:182.22904205322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8035 prompt_cache_len:5151 prompt_cache_ratio:0.6410703173615433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 -DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.10826969146728516 s -INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.11013627052307129 s -DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=19148182157460675066394449996674157111, time:1750767214.3901174s req_ids:[8] -DEBUG 06-24 20:13:34 [manager.py:391] -ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:200.08158683776855ms total_cost_time:200.12640953063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8036 prompt_cache_len:5151 prompt_cache_ratio:0.6409905425584869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 -DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.11209464073181152 s -INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.1137993335723877 s -DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=119397928418846814921679876938158418701, time:1750767214.6058753s req_ids:[8] -DEBUG 06-24 20:13:34 [manager.py:391] -ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:208.71448516845703ms total_cost_time:208.79602432250977ms,out_token_counter:1 mean_per_token_cost_time: 0.08153915405273438ms prompt_token_num:8037 prompt_cache_len:5151 prompt_cache_ratio:0.6409107876073161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 -DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:34 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s -INFO 06-24 20:13:34 [manager.py:68] detokenization recv req id 8 cost time 0.10983085632324219 s -DEBUG 06-24 20:13:34 [manager.py:391] Prefill Batch: batch_id=212586833083281771472720913898747214808, time:1750767214.8126934s req_ids:[8] -DEBUG 06-24 20:13:34 [manager.py:391] -ERROR 06-24 20:13:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:204.67448234558105ms total_cost_time:204.71858978271484ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8038 prompt_cache_len:5151 prompt_cache_ratio:0.640831052500622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 -DEBUG 06-24 20:13:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10861563682556152 s -INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.11058545112609863 s -DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=126600861762066455238703936800154693957, time:1750767215.0257301s req_ids:[8] -DEBUG 06-24 20:13:35 [manager.py:391] -ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:34 lightllm_req_id:8 first_token_cost:209.99598503112793ms total_cost_time:210.04199981689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8039 prompt_cache_len:5151 prompt_cache_ratio:0.6407513372309989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 -DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s -INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.11072826385498047 s -DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=178861688980500179736056911155426334307, time:1750767215.2415404s req_ids:[8] -DEBUG 06-24 20:13:35 [manager.py:391] -ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:415.13991355895996ms total_cost_time:415.18688201904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8040 prompt_cache_len:5151 prompt_cache_ratio:0.6406716417910447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 -DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10831832885742188 s -INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.1103360652923584 s -DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=313483101149062552106507321958323623725, time:1750767215.6610317s req_ids:[8] -DEBUG 06-24 20:13:35 [manager.py:391] -ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:199.04637336730957ms total_cost_time:199.1097927093506ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:8041 prompt_cache_len:5151 prompt_cache_ratio:0.6405919661733616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 -DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:35 [manager.py:224] router recive req id 8 cost time 0.10947322845458984 s -INFO 06-24 20:13:35 [manager.py:68] detokenization recv req id 8 cost time 0.1114356517791748 s -DEBUG 06-24 20:13:35 [manager.py:391] Prefill Batch: batch_id=160943622196415814569415706327143919507, time:1750767215.8656864s req_ids:[8] -DEBUG 06-24 20:13:35 [manager.py:391] -ERROR 06-24 20:13:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:202.47626304626465ms total_cost_time:202.52108573913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8042 prompt_cache_len:5151 prompt_cache_ratio:0.6405123103705546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 -DEBUG 06-24 20:13:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.1073598861694336 s -INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.10905051231384277 s -DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=215495799654406256577686628097255323083, time:1750767216.0800626s req_ids:[8] -DEBUG 06-24 20:13:36 [manager.py:391] -ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:35 lightllm_req_id:8 first_token_cost:207.04150199890137ms total_cost_time:207.08537101745605ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8043 prompt_cache_len:5151 prompt_cache_ratio:0.6404326743752331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 -DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10731029510498047 s -INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.1090700626373291 s -DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=135729905471102727047599571379728491656, time:1750767216.298246s req_ids:[8] -DEBUG 06-24 20:13:36 [manager.py:391] -ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:214.8873805999756ms total_cost_time:214.93077278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8044 prompt_cache_len:5151 prompt_cache_ratio:0.64035305818001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 -DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10834431648254395 s -INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.1101984977722168 s -INFO 06-24 20:13:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=218653994900652475462081452584280600779, time:1750767216.5263934s req_ids:[8] -DEBUG 06-24 20:13:36 [manager.py:391] -ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:225.31366348266602ms total_cost_time:225.35991668701172ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8045 prompt_cache_len:5151 prompt_cache_ratio:0.6402734617775016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 -DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10830545425415039 s -INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.11014938354492188 s -DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=329454382465862086743430419182510531469, time:1750767216.741093s req_ids:[8] -DEBUG 06-24 20:13:36 [manager.py:391] -ERROR 06-24 20:13:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:206.83765411376953ms total_cost_time:206.88128471374512ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8046 prompt_cache_len:5151 prompt_cache_ratio:0.6401938851603282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 -DEBUG 06-24 20:13:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:36 [manager.py:224] router recive req id 8 cost time 0.10912752151489258 s -INFO 06-24 20:13:36 [manager.py:68] detokenization recv req id 8 cost time 0.11120343208312988 s -DEBUG 06-24 20:13:36 [manager.py:391] Prefill Batch: batch_id=138989923197628097627799582094864550929, time:1750767216.956738s req_ids:[8] -DEBUG 06-24 20:13:36 [manager.py:391] -ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:36 lightllm_req_id:8 first_token_cost:212.17751502990723ms total_cost_time:212.22209930419922ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8047 prompt_cache_len:5151 prompt_cache_ratio:0.6401143283211135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 -DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.10907220840454102 s -INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.11097025871276855 s -DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=29119291134425823645822612133371008602, time:1750767217.1700191s req_ids:[8] -DEBUG 06-24 20:13:37 [manager.py:391] -ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:205.000638961792ms total_cost_time:205.0485610961914ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:8048 prompt_cache_len:5151 prompt_cache_ratio:0.6400347912524851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 -DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.1082615852355957 s -INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.11008000373840332 s -DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=109790732004628960669754922390521204587, time:1750767217.385679s req_ids:[8] -DEBUG 06-24 20:13:37 [manager.py:391] -ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:214.17713165283203ms total_cost_time:214.22243118286133ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8049 prompt_cache_len:5151 prompt_cache_ratio:0.6399552739470742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 -DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.10813307762145996 s -INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.10985231399536133 s -DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=322464502042012871840696675926425672648, time:1750767217.604803s req_ids:[8] -DEBUG 06-24 20:13:37 [manager.py:391] -ERROR 06-24 20:13:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:13:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 33704.165 tokens/s -DEBUG 06-24 20:13:37 [stats.py:37] Avg prompt tokens throughput: 33695.672 tokens/s -DEBUG 06-24 20:13:37 [stats.py:37] Avg generate tokens throughput: 8.493 tokens/s -INFO 06-24 20:13:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:372.5893497467041ms total_cost_time:372.6356029510498ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8050 prompt_cache_len:5151 prompt_cache_ratio:0.6398757763975156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 -DEBUG 06-24 20:13:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:37 [manager.py:224] router recive req id 8 cost time 0.10793542861938477 s -INFO 06-24 20:13:37 [manager.py:68] detokenization recv req id 8 cost time 0.10982799530029297 s -DEBUG 06-24 20:13:37 [manager.py:391] Prefill Batch: batch_id=231065977064941673351029818506398440574, time:1750767217.977116s req_ids:[8] -DEBUG 06-24 20:13:37 [manager.py:391] -ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:37 lightllm_req_id:8 first_token_cost:208.2195281982422ms total_cost_time:208.2662582397461ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:8051 prompt_cache_len:5151 prompt_cache_ratio:0.6397962985964476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 -DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10788154602050781 s -INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10966300964355469 s -DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=147389770831574650088893201530362838930, time:1750767218.1944437s req_ids:[8] -DEBUG 06-24 20:13:38 [manager.py:391] -ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:208.22572708129883ms total_cost_time:208.28580856323242ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8052 prompt_cache_len:5151 prompt_cache_ratio:0.6397168405365127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 -DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10769414901733398 s -INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10968661308288574 s -DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=305374590684094392150729281460347867612, time:1750767218.4148922s req_ids:[8] -DEBUG 06-24 20:13:38 [manager.py:391] -ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:211.7440700531006ms total_cost_time:211.76695823669434ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:8053 prompt_cache_len:5151 prompt_cache_ratio:0.6396374022103564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 -DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10475301742553711 s -INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10663676261901855 s -DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=133189338529028411515339446870221423781, time:1750767218.626101s req_ids:[8] -DEBUG 06-24 20:13:38 [manager.py:391] -ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:210.89887619018555ms total_cost_time:210.94632148742676ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:8054 prompt_cache_len:5151 prompt_cache_ratio:0.6395579836106282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 -DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:38 [manager.py:224] router recive req id 8 cost time 0.10790419578552246 s -INFO 06-24 20:13:38 [manager.py:68] detokenization recv req id 8 cost time 0.10973048210144043 s -DEBUG 06-24 20:13:38 [manager.py:391] Prefill Batch: batch_id=235679107474053359259283436901151122929, time:1750767218.8414223s req_ids:[8] -DEBUG 06-24 20:13:38 [manager.py:391] -ERROR 06-24 20:13:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:204.91909980773926ms total_cost_time:204.96559143066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8055 prompt_cache_len:5151 prompt_cache_ratio:0.6394785847299814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 -DEBUG 06-24 20:13:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.10909795761108398 s -INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.11096954345703125 s -DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=305589090865375666920949736731106693642, time:1750767219.0565562s req_ids:[8] -DEBUG 06-24 20:13:39 [manager.py:391] -ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:38 lightllm_req_id:8 first_token_cost:205.93881607055664ms total_cost_time:205.98411560058594ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8056 prompt_cache_len:5151 prompt_cache_ratio:0.6393992055610725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 -DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.1076662540435791 s -INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096951961517334 s -DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=151767450900899089826474746433593334684, time:1750767219.2669268s req_ids:[8] -DEBUG 06-24 20:13:39 [manager.py:391] -ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:212.1875286102295ms total_cost_time:212.233304977417ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8057 prompt_cache_len:5151 prompt_cache_ratio:0.639319846096562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 -DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.10685420036315918 s -INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.10872578620910645 s -DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=211842810678018342263127347609701582751, time:1750767219.4814346s req_ids:[8] -DEBUG 06-24 20:13:39 [manager.py:391] -DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:196.80261611938477ms total_cost_time:196.8250274658203ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:8058 prompt_cache_len:5151 prompt_cache_ratio:0.6392405063291139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 -DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.10632634162902832 s -INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.1081697940826416 s -DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=88591534724784794718488785318291708654, time:1750767219.6838667s req_ids:[8] -DEBUG 06-24 20:13:39 [manager.py:391] -ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:199.39875602722168ms total_cost_time:199.44334030151367ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8059 prompt_cache_len:5151 prompt_cache_ratio:0.639161186251396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 -DEBUG 06-24 20:13:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:39 [manager.py:224] router recive req id 8 cost time 0.1082456111907959 s -INFO 06-24 20:13:39 [manager.py:68] detokenization recv req id 8 cost time 0.1100766658782959 s -DEBUG 06-24 20:13:39 [manager.py:391] Prefill Batch: batch_id=261998647676513450391335054519485690806, time:1750767219.8888779s req_ids:[8] -DEBUG 06-24 20:13:39 [manager.py:391] -ERROR 06-24 20:13:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:205.4159641265869ms total_cost_time:205.460786819458ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8060 prompt_cache_len:5151 prompt_cache_ratio:0.6390818858560794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 -DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.3099782466888428 s -INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.31195569038391113 s -DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=254125505485077604956992921997470737741, time:1750767220.3018322s req_ids:[8] -DEBUG 06-24 20:13:40 [manager.py:391] -ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:39 lightllm_req_id:8 first_token_cost:409.5466136932373ms total_cost_time:409.5914363861084ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8061 prompt_cache_len:5151 prompt_cache_ratio:0.6390026051358392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 -DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.1072845458984375 s -INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s -DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=295609575480806864960928682887784431310, time:1750767220.5287113s req_ids:[8] -DEBUG 06-24 20:13:40 [manager.py:391] -ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:181.4250946044922ms total_cost_time:181.46872520446777ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8062 prompt_cache_len:5151 prompt_cache_ratio:0.638923344083354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 -DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.10769844055175781 s -INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.10959029197692871 s -DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=198778040995821253941882945232673870698, time:1750767220.6995816s req_ids:[8] -DEBUG 06-24 20:13:40 [manager.py:391] -ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:199.5084285736084ms total_cost_time:199.55182075500488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8063 prompt_cache_len:5151 prompt_cache_ratio:0.638844102691306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 -DEBUG 06-24 20:13:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:40 [manager.py:224] router recive req id 8 cost time 0.10851788520812988 s -INFO 06-24 20:13:40 [manager.py:68] detokenization recv req id 8 cost time 0.11049413681030273 s -DEBUG 06-24 20:13:40 [manager.py:391] Prefill Batch: batch_id=49064737822486721512052770965989559594, time:1750767220.9076686s req_ids:[8] -DEBUG 06-24 20:13:40 [manager.py:391] -DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:206.45952224731445ms total_cost_time:206.50506019592285ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8064 prompt_cache_len:5151 prompt_cache_ratio:0.6387648809523809 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 -DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10778975486755371 s -INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.10974502563476562 s -DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=301305406224313279995046004031161261926, time:1750767221.119459s req_ids:[8] -DEBUG 06-24 20:13:41 [manager.py:391] -ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:40 lightllm_req_id:8 first_token_cost:206.51817321777344ms total_cost_time:206.56418800354004ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8065 prompt_cache_len:5151 prompt_cache_ratio:0.6386856788592684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 -DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10805702209472656 s -INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s -DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=71840767398812457825887583429247173729, time:1750767221.336955s req_ids:[8] -DEBUG 06-24 20:13:41 [manager.py:391] -ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:212.37921714782715ms total_cost_time:212.42260932922363ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8066 prompt_cache_len:5151 prompt_cache_ratio:0.6386064964046615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 -DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10795164108276367 s -INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.10991454124450684 s -DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=262640897278135896921666262285222479589, time:1750767221.549863s req_ids:[8] -DEBUG 06-24 20:13:41 [manager.py:391] -ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:203.74345779418945ms total_cost_time:203.78780364990234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8067 prompt_cache_len:5151 prompt_cache_ratio:0.638527333581257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 -DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10877060890197754 s -INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.11073660850524902 s -DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=320349095467898117374916659957531924132, time:1750767221.7579339s req_ids:[8] -DEBUG 06-24 20:13:41 [manager.py:391] -ERROR 06-24 20:13:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:205.0940990447998ms total_cost_time:205.16443252563477ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:8068 prompt_cache_len:5151 prompt_cache_ratio:0.638448190381755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 -DEBUG 06-24 20:13:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:41 [manager.py:224] router recive req id 8 cost time 0.10901927947998047 s -INFO 06-24 20:13:41 [manager.py:68] detokenization recv req id 8 cost time 0.11095738410949707 s -DEBUG 06-24 20:13:41 [manager.py:391] Prefill Batch: batch_id=203255366901912356807155428081710494454, time:1750767221.9690363s req_ids:[8] -DEBUG 06-24 20:13:41 [manager.py:391] -ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:41 lightllm_req_id:8 first_token_cost:203.22060585021973ms total_cost_time:203.263521194458ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8069 prompt_cache_len:5151 prompt_cache_ratio:0.6383690667988599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 -DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:42 [manager.py:224] router recive req id 8 cost time 0.10870885848999023 s -INFO 06-24 20:13:42 [manager.py:68] detokenization recv req id 8 cost time 0.11066079139709473 s -DEBUG 06-24 20:13:42 [manager.py:391] Prefill Batch: batch_id=330590240013976199529664246209082262821, time:1750767222.1791081s req_ids:[8] -DEBUG 06-24 20:13:42 [manager.py:391] -ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:205.42049407958984ms total_cost_time:205.46674728393555ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8070 prompt_cache_len:5151 prompt_cache_ratio:0.6382899628252788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 -DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:42 [manager.py:224] router recive req id 8 cost time 0.31090641021728516 s -INFO 06-24 20:13:42 [manager.py:68] detokenization recv req id 8 cost time 0.3122224807739258 s -DEBUG 06-24 20:13:42 [manager.py:391] Prefill Batch: batch_id=161336232136907122838365072471345207341, time:1750767222.60948s req_ids:[8] -DEBUG 06-24 20:13:42 [manager.py:391] -ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:431.6565990447998ms total_cost_time:431.7011833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8071 prompt_cache_len:5151 prompt_cache_ratio:0.6382108784537232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 -DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:42 [manager.py:224] router recive req id 8 cost time 0.1084742546081543 s -INFO 06-24 20:13:42 [manager.py:68] detokenization recv req id 8 cost time 0.11041069030761719 s -DEBUG 06-24 20:13:42 [manager.py:391] Prefill Batch: batch_id=194174635208848866564785900138590427762, time:1750767222.8305907s req_ids:[8] -DEBUG 06-24 20:13:42 [manager.py:391] -ERROR 06-24 20:13:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:210.26968955993652ms total_cost_time:210.3135585784912ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8072 prompt_cache_len:5151 prompt_cache_ratio:0.6381318136769079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 -DEBUG 06-24 20:13:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10826420783996582 s -INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.10992193222045898 s -DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=70019449877000478808206091789589732597, time:1750767223.0435102s req_ids:[8] -DEBUG 06-24 20:13:43 [manager.py:391] -ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:42 lightllm_req_id:8 first_token_cost:171.6468334197998ms total_cost_time:171.6938018798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8073 prompt_cache_len:5151 prompt_cache_ratio:0.6380527684875511 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 -DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s -INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.10986661911010742 s -DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=159349141074113812258836080997744185009, time:1750767223.2213142s req_ids:[8] -DEBUG 06-24 20:13:43 [manager.py:391] -ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:202.85677909851074ms total_cost_time:202.90160179138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8074 prompt_cache_len:5151 prompt_cache_ratio:0.637973742878375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 -DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10805773735046387 s -INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100778579711914 s -DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=308266019018937835188037493226891837083, time:1750767223.4299216s req_ids:[8] -DEBUG 06-24 20:13:43 [manager.py:391] -ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:206.21776580810547ms total_cost_time:206.26401901245117ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8075 prompt_cache_len:5151 prompt_cache_ratio:0.6378947368421053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 -DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10716605186462402 s -INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.1090848445892334 s -DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=209930011095205863501781125335445617491, time:1750767223.6416006s req_ids:[8] -DEBUG 06-24 20:13:43 [manager.py:391] -ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:204.3149471282959ms total_cost_time:204.34260368347168ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8076 prompt_cache_len:5151 prompt_cache_ratio:0.637815750371471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 -DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:43 [manager.py:224] router recive req id 8 cost time 0.10679864883422852 s -INFO 06-24 20:13:43 [manager.py:68] detokenization recv req id 8 cost time 0.1087641716003418 s -DEBUG 06-24 20:13:43 [manager.py:391] Prefill Batch: batch_id=190721890390853134489228581767649819552, time:1750767223.8610218s req_ids:[8] -DEBUG 06-24 20:13:43 [manager.py:391] -ERROR 06-24 20:13:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:213.7596607208252ms total_cost_time:213.78779411315918ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8077 prompt_cache_len:5151 prompt_cache_ratio:0.6377367834592051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 -DEBUG 06-24 20:13:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.1047365665435791 s -INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.10672664642333984 s -DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=107489743280337487220905519960400172082, time:1750767224.0701993s req_ids:[8] -DEBUG 06-24 20:13:44 [manager.py:391] -ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:43 lightllm_req_id:8 first_token_cost:202.6221752166748ms total_cost_time:202.6503086090088ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8078 prompt_cache_len:5151 prompt_cache_ratio:0.637657836098044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 -DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.10600566864013672 s -INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.10810065269470215 s -DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=202015832633553004289211074090195720630, time:1750767224.2802904s req_ids:[8] -DEBUG 06-24 20:13:44 [manager.py:391] -ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:201.59626007080078ms total_cost_time:201.63202285766602ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:8079 prompt_cache_len:5151 prompt_cache_ratio:0.6375789082807278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 -DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.10818314552307129 s -INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.11006522178649902 s -DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=203334258871617260662525445579672495712, time:1750767224.49161s req_ids:[8] -DEBUG 06-24 20:13:44 [manager.py:391] -ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:203.7971019744873ms total_cost_time:203.84454727172852ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:8080 prompt_cache_len:5151 prompt_cache_ratio:0.6375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 -DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:44 [manager.py:224] router recive req id 8 cost time 0.1088247299194336 s -INFO 06-24 20:13:44 [manager.py:68] detokenization recv req id 8 cost time 0.11080384254455566 s -DEBUG 06-24 20:13:44 [manager.py:391] Prefill Batch: batch_id=294482469373479857291287404792816699961, time:1750767224.7010427s req_ids:[8] -DEBUG 06-24 20:13:44 [manager.py:391] -ERROR 06-24 20:13:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:363.36755752563477ms total_cost_time:363.4159564971924ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:8081 prompt_cache_len:5151 prompt_cache_ratio:0.6374211112486079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 -DEBUG 06-24 20:13:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10929203033447266 s -INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.1113898754119873 s -DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=241194734210511093161432585006183178886, time:1750767225.0655677s req_ids:[8] -DEBUG 06-24 20:13:45 [manager.py:391] -ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:44 lightllm_req_id:8 first_token_cost:197.91603088378906ms total_cost_time:197.96085357666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8082 prompt_cache_len:5151 prompt_cache_ratio:0.6373422420193021 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 -DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10849666595458984 s -INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.11074209213256836 s -DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=146978215860167253039851290843201513281, time:1750767225.2720957s req_ids:[8] -DEBUG 06-24 20:13:45 [manager.py:391] -ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.4464817047119ms total_cost_time:205.4903507232666ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8083 prompt_cache_len:5151 prompt_cache_ratio:0.6372633923048373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 -DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10818219184875488 s -INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.11010217666625977 s -DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=218889070414234497520232911618951723960, time:1750767225.484616s req_ids:[8] -DEBUG 06-24 20:13:45 [manager.py:391] -ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.99651336669922ms total_cost_time:206.04300498962402ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8084 prompt_cache_len:5151 prompt_cache_ratio:0.6371845620979713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 -DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10773944854736328 s -INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.10968947410583496 s -DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=225968309768180993327228550401013305867, time:1750767225.695383s req_ids:[8] -DEBUG 06-24 20:13:45 [manager.py:391] -ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.31821250915527ms total_cost_time:205.36398887634277ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8085 prompt_cache_len:5151 prompt_cache_ratio:0.6371057513914656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 -DEBUG 06-24 20:13:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:45 [manager.py:224] router recive req id 8 cost time 0.10732555389404297 s -INFO 06-24 20:13:45 [manager.py:68] detokenization recv req id 8 cost time 0.10925912857055664 s -DEBUG 06-24 20:13:45 [manager.py:391] Prefill Batch: batch_id=160033239014355013892670493839584912528, time:1750767225.9065716s req_ids:[8] -DEBUG 06-24 20:13:45 [manager.py:391] -ERROR 06-24 20:13:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:205.84416389465332ms total_cost_time:205.8887481689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8086 prompt_cache_len:5151 prompt_cache_ratio:0.6370269601780856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 -DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.1104738712310791 s -INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.11252474784851074 s -DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=31386857032168329666902550638757640541, time:1750767226.118384s req_ids:[8] -DEBUG 06-24 20:13:46 [manager.py:391] -ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:45 lightllm_req_id:8 first_token_cost:203.90558242797852ms total_cost_time:203.963041305542ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:8087 prompt_cache_len:5151 prompt_cache_ratio:0.6369481884505998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 -DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s -INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.11064386367797852 s -DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=133551726946520198733980140423400851976, time:1750767226.328581s req_ids:[8] -DEBUG 06-24 20:13:46 [manager.py:391] -ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:206.42876625061035ms total_cost_time:206.4962387084961ms,out_token_counter:1 mean_per_token_cost_time: 0.06747245788574219ms prompt_token_num:8088 prompt_cache_len:5151 prompt_cache_ratio:0.6368694362017804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 -INFO 06-24 20:13:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10809016227722168 s -INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s -DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=279137965828223822875134489357048231187, time:1750767226.5410378s req_ids:[8] -DEBUG 06-24 20:13:46 [manager.py:391] -ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:201.5066146850586ms total_cost_time:201.54976844787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8089 prompt_cache_len:5151 prompt_cache_ratio:0.6367907034244035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 -DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10738563537597656 s -INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.10937952995300293 s -DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=133140476523604600798955754364762278591, time:1750767226.7490964s req_ids:[8] -DEBUG 06-24 20:13:46 [manager.py:391] -ERROR 06-24 20:13:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:203.57346534729004ms total_cost_time:203.61828804016113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8090 prompt_cache_len:5151 prompt_cache_ratio:0.6367119901112485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 -DEBUG 06-24 20:13:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:46 [manager.py:224] router recive req id 8 cost time 0.10778141021728516 s -INFO 06-24 20:13:46 [manager.py:68] detokenization recv req id 8 cost time 0.10970473289489746 s -DEBUG 06-24 20:13:46 [manager.py:391] Prefill Batch: batch_id=199175326906523073053260394173069683128, time:1750767226.9585898s req_ids:[8] -DEBUG 06-24 20:13:46 [manager.py:391] -ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:46 lightllm_req_id:8 first_token_cost:363.095760345459ms total_cost_time:363.1410598754883ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8091 prompt_cache_len:5151 prompt_cache_ratio:0.6366332962550982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 -DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10843300819396973 s -INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.11034512519836426 s -DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=96058079967577283384805348767671051698, time:1750767227.326059s req_ids:[8] -DEBUG 06-24 20:13:47 [manager.py:391] -ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:204.69236373901367ms total_cost_time:204.73647117614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8092 prompt_cache_len:5151 prompt_cache_ratio:0.6365546218487395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 -DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10884380340576172 s -INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.11073541641235352 s -DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=105523112436045670858046030849275993520, time:1750767227.539137s req_ids:[8] -DEBUG 06-24 20:13:47 [manager.py:391] -ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:208.0233097076416ms total_cost_time:208.0678939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8093 prompt_cache_len:5151 prompt_cache_ratio:0.6364759668849623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 -DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10761046409606934 s -INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.10950446128845215 s -DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=73850314913349288611568059276265842885, time:1750767227.7534378s req_ids:[8] -DEBUG 06-24 20:13:47 [manager.py:391] -ERROR 06-24 20:13:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:209.72013473510742ms total_cost_time:209.7647190093994ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8094 prompt_cache_len:5151 prompt_cache_ratio:0.6363973313565604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 -DEBUG 06-24 20:13:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:47 [manager.py:224] router recive req id 8 cost time 0.10832905769348145 s -INFO 06-24 20:13:47 [manager.py:68] detokenization recv req id 8 cost time 0.11021876335144043 s -DEBUG 06-24 20:13:47 [manager.py:391] Prefill Batch: batch_id=121527046461158447765531230086802271831, time:1750767227.9661796s req_ids:[8] -DEBUG 06-24 20:13:47 [manager.py:391] -DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:13:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 35932.030 tokens/s -DEBUG 06-24 20:13:47 [stats.py:37] Avg prompt tokens throughput: 35923.229 tokens/s -DEBUG 06-24 20:13:47 [stats.py:37] Avg generate tokens throughput: 8.801 tokens/s -ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:47 lightllm_req_id:8 first_token_cost:205.40499687194824ms total_cost_time:205.45053482055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8095 prompt_cache_len:5151 prompt_cache_ratio:0.6363187152563311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 -DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s -INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.10999464988708496 s -DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=314332061752910045469982093564041865488, time:1750767228.1750445s req_ids:[8] -DEBUG 06-24 20:13:48 [manager.py:391] -ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:203.97496223449707ms total_cost_time:204.02026176452637ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8096 prompt_cache_len:5151 prompt_cache_ratio:0.6362401185770751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 -DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.1085348129272461 s -INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.11058545112609863 s -DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=270553068105683448834996038342538367326, time:1750767228.3865838s req_ids:[8] -DEBUG 06-24 20:13:48 [manager.py:391] -ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:205.7032585144043ms total_cost_time:205.7483196258545ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8097 prompt_cache_len:5151 prompt_cache_ratio:0.6361615413115969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 -DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.10804080963134766 s -INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.11022686958312988 s -DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=211954486614562433502001795866789504837, time:1750767228.5986605s req_ids:[8] -DEBUG 06-24 20:13:48 [manager.py:391] -ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:206.30311965942383ms total_cost_time:206.345796585083ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8098 prompt_cache_len:5151 prompt_cache_ratio:0.6360829834527044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 -DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:48 [manager.py:224] router recive req id 8 cost time 0.10732579231262207 s -INFO 06-24 20:13:48 [manager.py:68] detokenization recv req id 8 cost time 0.1092672348022461 s -DEBUG 06-24 20:13:48 [manager.py:391] Prefill Batch: batch_id=155124168504351182725194535457896336114, time:1750767228.810134s req_ids:[8] -DEBUG 06-24 20:13:48 [manager.py:391] -ERROR 06-24 20:13:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:206.02941513061523ms total_cost_time:206.07328414916992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8099 prompt_cache_len:5151 prompt_cache_ratio:0.6360044449932091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 -DEBUG 06-24 20:13:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.10775995254516602 s -INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.10976719856262207 s -DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=106071030128258246122994942756063897024, time:1750767229.0220828s req_ids:[8] -DEBUG 06-24 20:13:49 [manager.py:391] -ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:48 lightllm_req_id:8 first_token_cost:206.12025260925293ms total_cost_time:206.16459846496582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8100 prompt_cache_len:5151 prompt_cache_ratio:0.6359259259259259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 -DEBUG 06-24 20:13:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.31028151512145996 s -INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.3122560977935791 s -DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=98627209788490222932552879441707546660, time:1750767229.4442008s req_ids:[8] -DEBUG 06-24 20:13:49 [manager.py:391] -DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:424.4811534881592ms total_cost_time:424.52526092529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8101 prompt_cache_len:5151 prompt_cache_ratio:0.6358474262436736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 -DEBUG 06-24 20:13:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.10773062705993652 s -INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.10921597480773926 s -DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=121194993821246751232135909639346608298, time:1750767229.6659307s req_ids:[8] -DEBUG 06-24 20:13:49 [manager.py:391] -ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:208.4791660308838ms total_cost_time:208.52398872375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8102 prompt_cache_len:5151 prompt_cache_ratio:0.6357689459392742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 -DEBUG 06-24 20:13:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:49 [manager.py:224] router recive req id 8 cost time 0.10701775550842285 s -INFO 06-24 20:13:49 [manager.py:68] detokenization recv req id 8 cost time 0.108978271484375 s -DEBUG 06-24 20:13:49 [manager.py:391] Prefill Batch: batch_id=221921391047084398129342456341527162847, time:1750767229.8775554s req_ids:[8] -DEBUG 06-24 20:13:49 [manager.py:391] -ERROR 06-24 20:13:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:208.86492729187012ms total_cost_time:208.9085578918457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8103 prompt_cache_len:5151 prompt_cache_ratio:0.6356904850055535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 -DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.11203742027282715 s -INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.1140599250793457 s -DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=252668172921633708187507389841897805641, time:1750767230.0908892s req_ids:[8] -DEBUG 06-24 20:13:50 [manager.py:391] -ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:49 lightllm_req_id:8 first_token_cost:209.12837982177734ms total_cost_time:209.26737785339355ms,out_token_counter:1 mean_per_token_cost_time: 0.13899803161621094ms prompt_token_num:8104 prompt_cache_len:5151 prompt_cache_ratio:0.6356120434353406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 -DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.10787224769592285 s -INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.10981941223144531 s -DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=37321015057118796252739577025384193267, time:1750767230.3028307s req_ids:[8] -DEBUG 06-24 20:13:50 [manager.py:391] -ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:209.88988876342773ms total_cost_time:209.93447303771973ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8105 prompt_cache_len:5151 prompt_cache_ratio:0.6355336212214683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 -DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.1075437068939209 s -INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.10955810546875 s -DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=222030678851909785274861156665840841904, time:1750767230.51725s req_ids:[8] -DEBUG 06-24 20:13:50 [manager.py:391] -ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:203.48095893859863ms total_cost_time:203.52435111999512ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8106 prompt_cache_len:5151 prompt_cache_ratio:0.6354552183567728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 -DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.1087188720703125 s -INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.11078500747680664 s -DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=312983583374346170053176241200150171805, time:1750767230.7239282s req_ids:[8] -DEBUG 06-24 20:13:50 [manager.py:391] -ERROR 06-24 20:13:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:204.79297637939453ms total_cost_time:204.83732223510742ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8107 prompt_cache_len:5151 prompt_cache_ratio:0.635376834834094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 -DEBUG 06-24 20:13:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:50 [manager.py:224] router recive req id 8 cost time 0.10879945755004883 s -INFO 06-24 20:13:50 [manager.py:68] detokenization recv req id 8 cost time 0.11074399948120117 s -DEBUG 06-24 20:13:50 [manager.py:391] Prefill Batch: batch_id=170544941664411486915413319735891134310, time:1750767230.9484057s req_ids:[8] -DEBUG 06-24 20:13:50 [manager.py:391] -ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:50 lightllm_req_id:8 first_token_cost:222.89609909057617ms total_cost_time:222.94092178344727ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8108 prompt_cache_len:5151 prompt_cache_ratio:0.6352984706462753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 -DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.10770821571350098 s -INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.10974907875061035 s -DEBUG 06-24 20:13:51 [manager.py:391] Prefill Batch: batch_id=93902888379961682109737453866265062270, time:1750767231.1609998s req_ids:[8] -DEBUG 06-24 20:13:51 [manager.py:391] -ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:208.5719108581543ms total_cost_time:208.61577987670898ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8109 prompt_cache_len:5151 prompt_cache_ratio:0.6352201257861635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 -DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.3106982707977295 s -INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.31279563903808594 s -DEBUG 06-24 20:13:51 [manager.py:391] Prefill Batch: batch_id=170289657816857018488338971290654525701, time:1750767231.5765553s req_ids:[8] -DEBUG 06-24 20:13:51 [manager.py:391] -ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:413.257360458374ms total_cost_time:413.30456733703613ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:8110 prompt_cache_len:5151 prompt_cache_ratio:0.6351418002466092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 -DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.10869359970092773 s -INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.1107168197631836 s -DEBUG 06-24 20:13:51 [manager.py:391] Prefill Batch: batch_id=323111179435295905993405715949752953950, time:1750767231.7944765s req_ids:[8] -DEBUG 06-24 20:13:51 [manager.py:391] -ERROR 06-24 20:13:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:209.97953414916992ms total_cost_time:210.0236415863037ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8111 prompt_cache_len:5151 prompt_cache_ratio:0.635063494020466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 -DEBUG 06-24 20:13:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:51 [manager.py:224] router recive req id 8 cost time 0.10794782638549805 s -INFO 06-24 20:13:51 [manager.py:68] detokenization recv req id 8 cost time 0.10992670059204102 s -DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=337363253957841892024073588369729281564, time:1750767232.0061998s req_ids:[8] -DEBUG 06-24 20:13:52 [manager.py:391] -ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:51 lightllm_req_id:8 first_token_cost:202.73876190185547ms total_cost_time:202.79908180236816ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8112 prompt_cache_len:5151 prompt_cache_ratio:0.6349852071005917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 -DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.11054372787475586 s -INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11249423027038574 s -DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=315769267226245320941435438690373019161, time:1750767232.215018s req_ids:[8] -DEBUG 06-24 20:13:52 [manager.py:391] -ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:202.7900218963623ms total_cost_time:202.8346061706543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8113 prompt_cache_len:5151 prompt_cache_ratio:0.6349069394798471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 -DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.10931086540222168 s -INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11178398132324219 s -DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=249247419710835236530314416190627642121, time:1750767232.4252582s req_ids:[8] -DEBUG 06-24 20:13:52 [manager.py:391] -ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:205.76143264770508ms total_cost_time:205.80625534057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8114 prompt_cache_len:5151 prompt_cache_ratio:0.6348286911510969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 -DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.10795187950134277 s -INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11035752296447754 s -DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=78808598712691966219372036419946189344, time:1750767232.6374888s req_ids:[8] -DEBUG 06-24 20:13:52 [manager.py:391] -ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:207.48329162597656ms total_cost_time:207.52811431884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8115 prompt_cache_len:5151 prompt_cache_ratio:0.6347504621072089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 -DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:52 [manager.py:224] router recive req id 8 cost time 0.10850644111633301 s -INFO 06-24 20:13:52 [manager.py:68] detokenization recv req id 8 cost time 0.11056971549987793 s -DEBUG 06-24 20:13:52 [manager.py:391] Prefill Batch: batch_id=273276107363366275342978176810852430166, time:1750767232.8499973s req_ids:[8] -DEBUG 06-24 20:13:52 [manager.py:391] -ERROR 06-24 20:13:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:203.35817337036133ms total_cost_time:203.40228080749512ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8116 prompt_cache_len:5151 prompt_cache_ratio:0.6346722523410547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 -DEBUG 06-24 20:13:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10728931427001953 s -INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.10935616493225098 s -DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=83269596180312643652643871002693855222, time:1750767233.0581539s req_ids:[8] -DEBUG 06-24 20:13:53 [manager.py:391] -ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:52 lightllm_req_id:8 first_token_cost:207.4739933013916ms total_cost_time:207.5178623199463ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8117 prompt_cache_len:5151 prompt_cache_ratio:0.6345940618455094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 -DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:53 [batch.py:51] router release req id 8 -INFO 06-24 20:13:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10751700401306152 s -INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.10982465744018555 s -DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=152022429020625600733726434338023991900, time:1750767233.2724051s req_ids:[8] -DEBUG 06-24 20:13:53 [manager.py:391] -ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:374.78041648864746ms total_cost_time:374.82500076293945ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8118 prompt_cache_len:5151 prompt_cache_ratio:0.6345158906134516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 -DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10859441757202148 s -INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.11058688163757324 s -DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=139669373905826357507293369115477898259, time:1750767233.6479955s req_ids:[8] -DEBUG 06-24 20:13:53 [manager.py:391] -ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:204.6663761138916ms total_cost_time:204.7119140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8119 prompt_cache_len:5151 prompt_cache_ratio:0.6344377386377633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 -DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:53 [manager.py:224] router recive req id 8 cost time 0.10773491859436035 s -INFO 06-24 20:13:53 [manager.py:68] detokenization recv req id 8 cost time 0.10980701446533203 s -DEBUG 06-24 20:13:53 [manager.py:391] Prefill Batch: batch_id=192703656837568399344039916382832874153, time:1750767233.8604848s req_ids:[8] -DEBUG 06-24 20:13:53 [manager.py:391] -ERROR 06-24 20:13:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:201.69878005981445ms total_cost_time:201.74288749694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8120 prompt_cache_len:5151 prompt_cache_ratio:0.63435960591133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 -DEBUG 06-24 20:13:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10777592658996582 s -INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.10977697372436523 s -DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=93883353495894323270944665940305027394, time:1750767234.0687232s req_ids:[8] -DEBUG 06-24 20:13:54 [manager.py:391] -ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:53 lightllm_req_id:8 first_token_cost:212.7540111541748ms total_cost_time:212.81051635742188ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:8121 prompt_cache_len:5151 prompt_cache_ratio:0.634281492427041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 -DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10798382759094238 s -INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.10998058319091797 s -DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=120185731613970969993399398035329968520, time:1750767234.286131s req_ids:[8] -DEBUG 06-24 20:13:54 [manager.py:391] -ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:201.2033462524414ms total_cost_time:201.246976852417ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8122 prompt_cache_len:5151 prompt_cache_ratio:0.6342033981777887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 -DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10770392417907715 s -INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s -DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=35888724780471275313928195310894683225, time:1750767234.511684s req_ids:[8] -DEBUG 06-24 20:13:54 [manager.py:391] -ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:222.6095199584961ms total_cost_time:222.6581573486328ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:8123 prompt_cache_len:5151 prompt_cache_ratio:0.6341253231564693 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 -DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.11200356483459473 s -INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.11416745185852051 s -DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=323760679570478905223602029173197638473, time:1750767234.7270212s req_ids:[8] -DEBUG 06-24 20:13:54 [manager.py:391] -ERROR 06-24 20:13:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:207.5190544128418ms total_cost_time:207.61752128601074ms,out_token_counter:1 mean_per_token_cost_time: 0.09846687316894531ms prompt_token_num:8124 prompt_cache_len:5151 prompt_cache_ratio:0.6340472673559823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 -DEBUG 06-24 20:13:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:54 [manager.py:224] router recive req id 8 cost time 0.10851311683654785 s -INFO 06-24 20:13:54 [manager.py:68] detokenization recv req id 8 cost time 0.11048150062561035 s -DEBUG 06-24 20:13:54 [manager.py:391] Prefill Batch: batch_id=144232891784506052291040991711523305370, time:1750767234.9395273s req_ids:[8] -DEBUG 06-24 20:13:54 [manager.py:391] -ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:54 lightllm_req_id:8 first_token_cost:207.14902877807617ms total_cost_time:207.19504356384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8125 prompt_cache_len:5151 prompt_cache_ratio:0.6339692307692307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 -DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:55 [manager.py:224] router recive req id 8 cost time 0.10739564895629883 s -INFO 06-24 20:13:55 [manager.py:68] detokenization recv req id 8 cost time 0.10935521125793457 s -DEBUG 06-24 20:13:55 [manager.py:391] Prefill Batch: batch_id=213472795848757023692836494534585206047, time:1750767235.159325s req_ids:[8] -DEBUG 06-24 20:13:55 [manager.py:391] -ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:217.1652317047119ms total_cost_time:217.21172332763672ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8126 prompt_cache_len:5151 prompt_cache_ratio:0.6338912133891214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 -DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:55 [manager.py:224] router recive req id 8 cost time 0.10814499855041504 s -INFO 06-24 20:13:55 [manager.py:68] detokenization recv req id 8 cost time 0.1100912094116211 s -DEBUG 06-24 20:13:55 [manager.py:391] Prefill Batch: batch_id=184152968178471537931364003203748091832, time:1750767235.3752327s req_ids:[8] -DEBUG 06-24 20:13:55 [manager.py:391] -ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:209.3331813812256ms total_cost_time:209.37728881835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8127 prompt_cache_len:5151 prompt_cache_ratio:0.633813215208564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 -DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:55 [manager.py:224] router recive req id 8 cost time 0.3099689483642578 s -INFO 06-24 20:13:55 [manager.py:68] detokenization recv req id 8 cost time 0.3119676113128662 s -DEBUG 06-24 20:13:55 [manager.py:391] Prefill Batch: batch_id=107749667326992076180421441000421056615, time:1750767235.8063462s req_ids:[8] -DEBUG 06-24 20:13:55 [manager.py:391] -ERROR 06-24 20:13:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:429.027795791626ms total_cost_time:429.07238006591797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8128 prompt_cache_len:5151 prompt_cache_ratio:0.6337352362204725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 -DEBUG 06-24 20:13:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10877585411071777 s -INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.11067652702331543 s -DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=306315429860793680452079003117736245369, time:1750767236.0260406s req_ids:[8] -DEBUG 06-24 20:13:56 [manager.py:391] -ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:55 lightllm_req_id:8 first_token_cost:207.19337463378906ms total_cost_time:207.23676681518555ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8129 prompt_cache_len:5151 prompt_cache_ratio:0.6336572764177636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 -DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10743141174316406 s -INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.10941052436828613 s -DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=194206650407103124662620301303233951784, time:1750767236.2360225s req_ids:[8] -DEBUG 06-24 20:13:56 [manager.py:391] -ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:204.8027515411377ms total_cost_time:204.8473358154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8130 prompt_cache_len:5151 prompt_cache_ratio:0.6335793357933579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 -DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10790085792541504 s -INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.10985898971557617 s -DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=171769448427063724711245677916851727497, time:1750767236.4470484s req_ids:[8] -DEBUG 06-24 20:13:56 [manager.py:391] -ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:206.75039291381836ms total_cost_time:206.79497718811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8131 prompt_cache_len:5151 prompt_cache_ratio:0.6335014143401796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 -DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10873651504516602 s -INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.11079764366149902 s -DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=168692241335269363455072678160687170632, time:1750767236.6600668s req_ids:[8] -DEBUG 06-24 20:13:56 [manager.py:391] -ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:209.05470848083496ms total_cost_time:209.09881591796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8132 prompt_cache_len:5151 prompt_cache_ratio:0.6334235120511559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 -DEBUG 06-24 20:13:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:56 [manager.py:224] router recive req id 8 cost time 0.10744166374206543 s -INFO 06-24 20:13:56 [manager.py:68] detokenization recv req id 8 cost time 0.10937261581420898 s -DEBUG 06-24 20:13:56 [manager.py:391] Prefill Batch: batch_id=56391575287625588199529243349556454808, time:1750767236.879535s req_ids:[8] -DEBUG 06-24 20:13:56 [manager.py:391] -ERROR 06-24 20:13:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:211.15970611572266ms total_cost_time:211.20500564575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8133 prompt_cache_len:5151 prompt_cache_ratio:0.633345628919218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 -DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s -INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s -DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=236141038221640238371131846152720113750, time:1750767237.0933435s req_ids:[8] -DEBUG 06-24 20:13:57 [manager.py:391] -ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:56 lightllm_req_id:8 first_token_cost:211.93361282348633ms total_cost_time:211.96651458740234ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:8134 prompt_cache_len:5151 prompt_cache_ratio:0.6332677649373002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 -DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10651326179504395 s -INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.10872602462768555 s -DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=280094735408299385741857150805681554139, time:1750767237.3088446s req_ids:[8] -DEBUG 06-24 20:13:57 [manager.py:391] -ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:205.17706871032715ms total_cost_time:205.21950721740723ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8135 prompt_cache_len:5151 prompt_cache_ratio:0.6331899200983405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 -DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10834670066833496 s -INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.11030459403991699 s -DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=247827023491710030646849402339736665414, time:1750767237.5185273s req_ids:[8] -DEBUG 06-24 20:13:57 [manager.py:391] -ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:210.6621265411377ms total_cost_time:210.7079029083252ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8136 prompt_cache_len:5151 prompt_cache_ratio:0.6331120943952803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 -DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10869622230529785 s -INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.11071562767028809 s -DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=171020838325629176785303915474818867972, time:1750767237.735349s req_ids:[8] -DEBUG 06-24 20:13:57 [manager.py:391] -ERROR 06-24 20:13:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:209.05518531799316ms total_cost_time:209.10120010375977ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8137 prompt_cache_len:5151 prompt_cache_ratio:0.6330342878210643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 -DEBUG 06-24 20:13:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:57 [batch.py:51] router release req id 8 -INFO 06-24 20:13:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:57 [manager.py:224] router recive req id 8 cost time 0.10773134231567383 s -INFO 06-24 20:13:57 [manager.py:68] detokenization recv req id 8 cost time 0.10966658592224121 s -DEBUG 06-24 20:13:57 [manager.py:391] Prefill Batch: batch_id=247857284863722506367431758765285983902, time:1750767237.9537423s req_ids:[8] -DEBUG 06-24 20:13:57 [manager.py:391] -DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:13:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:13:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 34143.317 tokens/s -DEBUG 06-24 20:13:58 [stats.py:37] Avg prompt tokens throughput: 34134.808 tokens/s -DEBUG 06-24 20:13:58 [stats.py:37] Avg generate tokens throughput: 8.508 tokens/s -INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:57 lightllm_req_id:8 first_token_cost:363.0220890045166ms total_cost_time:363.0683422088623ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8138 prompt_cache_len:5151 prompt_cache_ratio:0.6329565003686409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 -DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10869288444519043 s -INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.11066389083862305 s -DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=187487224563553522143730522357142036644, time:1750767238.314153s req_ids:[8] -DEBUG 06-24 20:13:58 [manager.py:391] -ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:204.07390594482422ms total_cost_time:204.1182518005371ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8139 prompt_cache_len:5151 prompt_cache_ratio:0.632878732030962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 -DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10882019996643066 s -INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s -DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=314945835761892214818346075508883585304, time:1750767238.5274227s req_ids:[8] -DEBUG 06-24 20:13:58 [manager.py:391] -ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:206.7878246307373ms total_cost_time:206.8307399749756ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8140 prompt_cache_len:5151 prompt_cache_ratio:0.6328009828009828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 -DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10771703720092773 s -INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.10958027839660645 s -DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=263417574246561362819796126301078586497, time:1750767238.7400277s req_ids:[8] -DEBUG 06-24 20:13:58 [manager.py:391] -ERROR 06-24 20:13:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:204.33926582336426ms total_cost_time:204.38385009765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8141 prompt_cache_len:5151 prompt_cache_ratio:0.632723252671662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 -DEBUG 06-24 20:13:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:58 [manager.py:224] router recive req id 8 cost time 0.10765194892883301 s -INFO 06-24 20:13:58 [manager.py:68] detokenization recv req id 8 cost time 0.10968232154846191 s -DEBUG 06-24 20:13:58 [manager.py:391] Prefill Batch: batch_id=215016467225992275090012779667877494795, time:1750767238.9563518s req_ids:[8] -DEBUG 06-24 20:13:58 [manager.py:391] -ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:58 lightllm_req_id:8 first_token_cost:210.02674102783203ms total_cost_time:210.07108688354492ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8142 prompt_cache_len:5151 prompt_cache_ratio:0.6326455416359617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 -DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.10844302177429199 s -INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.11056399345397949 s -DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=36770850189304221059211412149047832042, time:1750767239.177923s req_ids:[8] -DEBUG 06-24 20:13:59 [manager.py:391] -ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:218.5971736907959ms total_cost_time:218.6427116394043ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8143 prompt_cache_len:5151 prompt_cache_ratio:0.6325678496868476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 -DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.10827469825744629 s -INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s -DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=299121397611924274749621096919289546738, time:1750767239.3912458s req_ids:[8] -DEBUG 06-24 20:13:59 [manager.py:391] -ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:206.024169921875ms total_cost_time:206.0678005218506ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8144 prompt_cache_len:5151 prompt_cache_ratio:0.6324901768172888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 -DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.1089179515838623 s -INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.11082673072814941 s -DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=109876234039614677024550444379341383400, time:1750767239.6031022s req_ids:[8] -DEBUG 06-24 20:13:59 [manager.py:391] -ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:203.59015464782715ms total_cost_time:203.63521575927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8145 prompt_cache_len:5151 prompt_cache_ratio:0.6324125230202579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 -DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:13:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:13:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:13:59 [manager.py:224] router recive req id 8 cost time 0.1079263687133789 s -INFO 06-24 20:13:59 [manager.py:68] detokenization recv req id 8 cost time 0.10975527763366699 s -DEBUG 06-24 20:13:59 [manager.py:391] Prefill Batch: batch_id=108296032935899573809964476709793020728, time:1750767239.8128839s req_ids:[8] -DEBUG 06-24 20:13:59 [manager.py:391] -ERROR 06-24 20:13:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:13:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:13:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:200.6673812866211ms total_cost_time:200.71005821228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8146 prompt_cache_len:5151 prompt_cache_ratio:0.6323348882887306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:13:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 -DEBUG 06-24 20:13:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:13:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:13:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:13:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:13:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10916948318481445 s -INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.11116528511047363 s -DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=30166649213215935523335351540027366351, time:1750767240.0184507s req_ids:[8] -DEBUG 06-24 20:14:00 [manager.py:391] -ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:13:59 lightllm_req_id:8 first_token_cost:207.71121978759766ms total_cost_time:207.75556564331055ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8147 prompt_cache_len:5151 prompt_cache_ratio:0.6322572726156868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 -DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10744524002075195 s -INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.1093745231628418 s -DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=66687902070621836592232725297456087870, time:1750767240.231232s req_ids:[8] -DEBUG 06-24 20:14:00 [manager.py:391] -ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:365.54718017578125ms total_cost_time:365.59271812438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8148 prompt_cache_len:5151 prompt_cache_ratio:0.632179675994109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 -DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10818886756896973 s -INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.1101377010345459 s -DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=124206705536624127938749662369392206157, time:1750767240.5986862s req_ids:[8] -DEBUG 06-24 20:14:00 [manager.py:391] -ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:199.2659568786621ms total_cost_time:199.32317733764648ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:8149 prompt_cache_len:5151 prompt_cache_ratio:0.6321020984169837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 -DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:00 [manager.py:224] router recive req id 8 cost time 0.10762882232666016 s -INFO 06-24 20:14:00 [manager.py:68] detokenization recv req id 8 cost time 0.10964536666870117 s -DEBUG 06-24 20:14:00 [manager.py:391] Prefill Batch: batch_id=232137978353780083554307627247549918360, time:1750767240.8090641s req_ids:[8] -DEBUG 06-24 20:14:00 [manager.py:391] -ERROR 06-24 20:14:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:204.75339889526367ms total_cost_time:204.79869842529297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8150 prompt_cache_len:5151 prompt_cache_ratio:0.6320245398773006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 -DEBUG 06-24 20:14:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10795259475708008 s -INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.1100625991821289 s -DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=82097101414835021965599505539140264462, time:1750767241.0209415s req_ids:[8] -DEBUG 06-24 20:14:01 [manager.py:391] -ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:00 lightllm_req_id:8 first_token_cost:205.99961280822754ms total_cost_time:206.04491233825684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8151 prompt_cache_len:5151 prompt_cache_ratio:0.631947000368053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 -DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10771536827087402 s -INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.10973620414733887 s -DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=119557458893656592067856531594900213445, time:1750767241.23264s req_ids:[8] -DEBUG 06-24 20:14:01 [manager.py:391] -ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:211.6868495941162ms total_cost_time:211.73405647277832ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:8152 prompt_cache_len:5151 prompt_cache_ratio:0.6318694798822375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 -DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:01 [batch.py:51] router release req id 8 -INFO 06-24 20:14:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10933756828308105 s -INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.11141347885131836 s -DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=210002580486205887339835189436195781795, time:1750767241.4465356s req_ids:[8] -DEBUG 06-24 20:14:01 [manager.py:391] -ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:203.62281799316406ms total_cost_time:203.66597175598145ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8153 prompt_cache_len:5151 prompt_cache_ratio:0.6317919784128542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 -DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10802412033081055 s -INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.11004638671875 s -DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=136195498455880647709803462812207535773, time:1750767241.656808s req_ids:[8] -DEBUG 06-24 20:14:01 [manager.py:391] -ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:203.446626663208ms total_cost_time:203.4902572631836ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8154 prompt_cache_len:5151 prompt_cache_ratio:0.6317144959529065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 -DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:01 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s -INFO 06-24 20:14:01 [manager.py:68] detokenization recv req id 8 cost time 0.10999011993408203 s -DEBUG 06-24 20:14:01 [manager.py:391] Prefill Batch: batch_id=222816441817315718288396958081052877764, time:1750767241.8676915s req_ids:[8] -DEBUG 06-24 20:14:01 [manager.py:391] -ERROR 06-24 20:14:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:193.29547882080078ms total_cost_time:193.34006309509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8155 prompt_cache_len:5151 prompt_cache_ratio:0.6316370324954016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 -DEBUG 06-24 20:14:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.10818672180175781 s -INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.11019086837768555 s -DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=204118815246304273799472226266466998678, time:1750767242.0661912s req_ids:[8] -DEBUG 06-24 20:14:02 [manager.py:391] -ERROR 06-24 20:14:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:01 lightllm_req_id:8 first_token_cost:205.72471618652344ms total_cost_time:205.7664394378662ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8156 prompt_cache_len:5151 prompt_cache_ratio:0.6315595880333497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 -DEBUG 06-24 20:14:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.10853719711303711 s -INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.11060047149658203 s -DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=85815348899662107165800156844841642807, time:1750767242.278215s req_ids:[8] -DEBUG 06-24 20:14:02 [manager.py:391] -ERROR 06-24 20:14:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 first_token_cost:209.50555801391602ms total_cost_time:209.55252647399902ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8157 prompt_cache_len:5151 prompt_cache_ratio:0.6314821625597646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 -DEBUG 06-24 20:14:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s -INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.11053848266601562 s -DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=265564970458232278766696871673682265576, time:1750767242.5081575s req_ids:[8] -DEBUG 06-24 20:14:02 [manager.py:391] -ERROR 06-24 20:14:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 first_token_cost:220.38626670837402ms total_cost_time:220.43085098266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8158 prompt_cache_len:5151 prompt_cache_ratio:0.6314047560676637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 -DEBUG 06-24 20:14:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:02 [manager.py:224] router recive req id 8 cost time 0.31031346321105957 s -INFO 06-24 20:14:02 [manager.py:68] detokenization recv req id 8 cost time 0.3123600482940674 s -DEBUG 06-24 20:14:02 [manager.py:391] Prefill Batch: batch_id=291321288946418798067932882494203163434, time:1750767242.9429576s req_ids:[8] -DEBUG 06-24 20:14:02 [manager.py:391] -ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:02 lightllm_req_id:8 first_token_cost:431.03551864624023ms total_cost_time:431.1201572418213ms,out_token_counter:1 mean_per_token_cost_time: 0.08463859558105469ms prompt_token_num:8159 prompt_cache_len:5151 prompt_cache_ratio:0.6313273685500674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 -DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10814476013183594 s -INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.1102902889251709 s -DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=123380920499488466507541365330463193228, time:1750767243.1581955s req_ids:[8] -DEBUG 06-24 20:14:03 [manager.py:391] -ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:195.48416137695312ms total_cost_time:195.5277919769287ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8160 prompt_cache_len:5151 prompt_cache_ratio:0.63125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 -DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10854482650756836 s -INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.11057138442993164 s -DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=295859059032607079717317744804826931656, time:1750767243.361531s req_ids:[8] -DEBUG 06-24 20:14:03 [manager.py:391] -ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:197.2215175628662ms total_cost_time:197.2653865814209ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8161 prompt_cache_len:5151 prompt_cache_ratio:0.6311726504104889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 -DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10789775848388672 s -INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s -DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=158060897042739489710531752338729260904, time:1750767243.5683115s req_ids:[8] -DEBUG 06-24 20:14:03 [manager.py:391] -ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:204.3445110321045ms total_cost_time:204.38814163208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8162 prompt_cache_len:5151 prompt_cache_ratio:0.631095319774565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 -DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10597372055053711 s -INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.1080019474029541 s -DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=235162392724253059263539254686898747124, time:1750767243.7760034s req_ids:[8] -DEBUG 06-24 20:14:03 [manager.py:391] -ERROR 06-24 20:14:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:203.77159118652344ms total_cost_time:203.8135528564453ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8163 prompt_cache_len:5151 prompt_cache_ratio:0.6310180080852628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 -DEBUG 06-24 20:14:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:03 [manager.py:224] router recive req id 8 cost time 0.10777449607849121 s -INFO 06-24 20:14:03 [manager.py:68] detokenization recv req id 8 cost time 0.10962677001953125 s -DEBUG 06-24 20:14:03 [manager.py:391] Prefill Batch: batch_id=292629814067217144237502741386069374171, time:1750767243.9983857s req_ids:[8] -DEBUG 06-24 20:14:03 [manager.py:391] -ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:03 lightllm_req_id:8 first_token_cost:190.66977500915527ms total_cost_time:190.71412086486816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8164 prompt_cache_len:5151 prompt_cache_ratio:0.6309407153356198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 -DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10720038414001465 s -INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10909485816955566 s -DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=191433332946761968458717048529716102269, time:1750767244.1889997s req_ids:[8] -DEBUG 06-24 20:14:04 [manager.py:391] -ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:184.7519874572754ms total_cost_time:184.79537963867188ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8165 prompt_cache_len:5151 prompt_cache_ratio:0.6308634415186772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 -DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10744166374206543 s -INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10933661460876465 s -DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=258997889155956506282448517417427670301, time:1750767244.3711169s req_ids:[8] -DEBUG 06-24 20:14:04 [manager.py:391] -ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:172.27745056152344ms total_cost_time:172.32203483581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8166 prompt_cache_len:5151 prompt_cache_ratio:0.6307861866274798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 -DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10735678672790527 s -INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.1094062328338623 s -DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=37727982923305833901555603148115169449, time:1750767244.547978s req_ids:[8] -DEBUG 06-24 20:14:04 [manager.py:391] -ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:178.0240535736084ms total_cost_time:178.06744575500488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8167 prompt_cache_len:5151 prompt_cache_ratio:0.6307089506550753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 -DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10743284225463867 s -INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10945582389831543 s -DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=249115819185616173968662537037994923968, time:1750767244.7388084s req_ids:[8] -DEBUG 06-24 20:14:04 [manager.py:391] -ERROR 06-24 20:14:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:212.67271041870117ms total_cost_time:212.71681785583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8168 prompt_cache_len:5151 prompt_cache_ratio:0.6306317335945152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 -DEBUG 06-24 20:14:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:04 [batch.py:51] router release req id 8 -INFO 06-24 20:14:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:04 [manager.py:224] router recive req id 8 cost time 0.10772371292114258 s -INFO 06-24 20:14:04 [manager.py:68] detokenization recv req id 8 cost time 0.10971260070800781 s -DEBUG 06-24 20:14:04 [manager.py:391] Prefill Batch: batch_id=201972237846244439557616264509139902619, time:1750767244.9525092s req_ids:[8] -DEBUG 06-24 20:14:04 [manager.py:391] -DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:04 lightllm_req_id:8 first_token_cost:363.07334899902344ms total_cost_time:363.11912536621094ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8169 prompt_cache_len:5151 prompt_cache_ratio:0.6305545354388542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 -DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.10810232162475586 s -INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021280288696289 s -DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=7119149069212245141719419761246496855, time:1750767245.3185558s req_ids:[8] -DEBUG 06-24 20:14:05 [manager.py:391] -ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:201.9331455230713ms total_cost_time:201.97463035583496ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8170 prompt_cache_len:5151 prompt_cache_ratio:0.6304773561811505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 -DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.10893368721008301 s -INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.11098885536193848 s -DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=173053887485502014893886567756538319646, time:1750767245.5305269s req_ids:[8] -DEBUG 06-24 20:14:05 [manager.py:391] -ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:207.66472816467285ms total_cost_time:207.70788192749023ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8171 prompt_cache_len:5151 prompt_cache_ratio:0.6304001958144658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 -DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.10841846466064453 s -INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.11046099662780762 s -DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=130770835150352112341883891056301955236, time:1750767245.7455034s req_ids:[8] -DEBUG 06-24 20:14:05 [manager.py:391] -ERROR 06-24 20:14:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:202.70323753356934ms total_cost_time:202.7449607849121ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8172 prompt_cache_len:5151 prompt_cache_ratio:0.6303230543318649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 -DEBUG 06-24 20:14:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:05 [manager.py:224] router recive req id 8 cost time 0.1082160472869873 s -INFO 06-24 20:14:05 [manager.py:68] detokenization recv req id 8 cost time 0.1101384162902832 s -DEBUG 06-24 20:14:05 [manager.py:391] Prefill Batch: batch_id=251779298724697805889993182398210140621, time:1750767245.9533567s req_ids:[8] -DEBUG 06-24 20:14:05 [manager.py:391] -ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:05 lightllm_req_id:8 first_token_cost:208.909273147583ms total_cost_time:208.953857421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8173 prompt_cache_len:5151 prompt_cache_ratio:0.6302459317264163 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 -DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.10827398300170898 s -INFO 06-24 20:14:06 [manager.py:68] detokenization recv req id 8 cost time 0.11014747619628906 s -DEBUG 06-24 20:14:06 [manager.py:391] Prefill Batch: batch_id=31221589617152319978797119831902836282, time:1750767246.1648116s req_ids:[8] -DEBUG 06-24 20:14:06 [manager.py:391] -ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:203.13787460327148ms total_cost_time:203.18102836608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8174 prompt_cache_len:5151 prompt_cache_ratio:0.6301688279911916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 -DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.10723757743835449 s -INFO 06-24 20:14:06 [manager.py:68] detokenization recv req id 8 cost time 0.10924100875854492 s -DEBUG 06-24 20:14:06 [manager.py:391] Prefill Batch: batch_id=35085441372502870374992534548054757330, time:1750767246.37842s req_ids:[8] -DEBUG 06-24 20:14:06 [manager.py:391] -ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:210.3259563446045ms total_cost_time:210.37030220031738ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8175 prompt_cache_len:5151 prompt_cache_ratio:0.6300917431192661 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 -DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:06 [batch.py:51] router release req id 8 -DEBUG 06-24 20:14:06 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:06 [manager.py:283] -DEBUG 06-24 20:14:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:06 [manager.py:284] -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.10761165618896484 s -INFO 06-24 20:14:06 [manager.py:68] detokenization recv req id 8 cost time 0.10963797569274902 s -DEBUG 06-24 20:14:06 [manager.py:391] Prefill Batch: batch_id=239538971643298834180927939180398719612, time:1750767246.5926855s req_ids:[8] -DEBUG 06-24 20:14:06 [manager.py:391] -ERROR 06-24 20:14:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:212.39948272705078ms total_cost_time:212.44525909423828ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8176 prompt_cache_len:5151 prompt_cache_ratio:0.6300146771037182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 -DEBUG 06-24 20:14:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:06 [manager.py:224] router recive req id 8 cost time 0.30980801582336426 s -INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.3118412494659424 s -DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=195563440773596450115787238403469999120, time:1750767247.0107722s req_ids:[8] -DEBUG 06-24 20:14:07 [manager.py:391] -ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:06 lightllm_req_id:8 first_token_cost:410.7322692871094ms total_cost_time:410.7778072357178ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8177 prompt_cache_len:5151 prompt_cache_ratio:0.6299376299376299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 -DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10856127738952637 s -INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11006402969360352 s -DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=323956641456327166886418403578493900429, time:1750767247.2368467s req_ids:[8] -DEBUG 06-24 20:14:07 [manager.py:391] -ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:220.0922966003418ms total_cost_time:220.15666961669922ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:8178 prompt_cache_len:5151 prompt_cache_ratio:0.6298606016140865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 -DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10838127136230469 s -INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s -DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=238740210514643135315666247753766729533, time:1750767247.451941s req_ids:[8] -DEBUG 06-24 20:14:07 [manager.py:391] -ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:205.2443027496338ms total_cost_time:205.28674125671387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8179 prompt_cache_len:5151 prompt_cache_ratio:0.6297835921261767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 -DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10870504379272461 s -INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11093282699584961 s -DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=161814794533151180275238584937388708918, time:1750767247.6649897s req_ids:[8] -DEBUG 06-24 20:14:07 [manager.py:391] -ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:207.36026763916016ms total_cost_time:207.42034912109375ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8180 prompt_cache_len:5151 prompt_cache_ratio:0.6297066014669926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 -DEBUG 06-24 20:14:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:07 [manager.py:224] router recive req id 8 cost time 0.10823798179626465 s -INFO 06-24 20:14:07 [manager.py:68] detokenization recv req id 8 cost time 0.11041474342346191 s -DEBUG 06-24 20:14:07 [manager.py:391] Prefill Batch: batch_id=31551757430701560686430636243854652421, time:1750767247.8768175s req_ids:[8] -DEBUG 06-24 20:14:07 [manager.py:391] -ERROR 06-24 20:14:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:207.08847045898438ms total_cost_time:207.13424682617188ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8181 prompt_cache_len:5151 prompt_cache_ratio:0.6296296296296297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 -DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.10760188102722168 s -INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.10944724082946777 s -DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=88992985543877452403508850705920334389, time:1750767248.0906332s req_ids:[8] -DEBUG 06-24 20:14:08 [manager.py:391] -ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:07 lightllm_req_id:8 first_token_cost:206.04658126831055ms total_cost_time:206.08949661254883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8182 prompt_cache_len:5151 prompt_cache_ratio:0.6295526766071865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 -DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.10908031463623047 s -INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.11165714263916016 s -DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=292683208528598043607404119324130709373, time:1750767248.3036668s req_ids:[8] -DEBUG 06-24 20:14:08 [manager.py:391] -DEBUG 06-24 20:14:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 36326.220 tokens/s -DEBUG 06-24 20:14:08 [stats.py:37] Avg prompt tokens throughput: 36317.419 tokens/s -DEBUG 06-24 20:14:08 [stats.py:37] Avg generate tokens throughput: 8.801 tokens/s -ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:365.9791946411133ms total_cost_time:366.0237789154053ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8183 prompt_cache_len:5151 prompt_cache_ratio:0.6294757423927655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 -DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.1081998348236084 s -INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s -DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=8271224489075767715714486620883965716, time:1750767248.6767516s req_ids:[8] -DEBUG 06-24 20:14:08 [manager.py:391] -ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:201.08389854431152ms total_cost_time:201.1275291442871ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8184 prompt_cache_len:5151 prompt_cache_ratio:0.6293988269794721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 -DEBUG 06-24 20:14:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:08 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s -INFO 06-24 20:14:08 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s -DEBUG 06-24 20:14:08 [manager.py:391] Prefill Batch: batch_id=60147356553226541158890653771445803652, time:1750767248.8808453s req_ids:[8] -DEBUG 06-24 20:14:08 [manager.py:391] -ERROR 06-24 20:14:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:208.05859565734863ms total_cost_time:208.10365676879883ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8185 prompt_cache_len:5151 prompt_cache_ratio:0.6293219303604154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 -DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10869359970092773 s -INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.11067771911621094 s -DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=87896529675835842931984396752353875373, time:1750767249.0933948s req_ids:[8] -DEBUG 06-24 20:14:09 [manager.py:391] -ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:08 lightllm_req_id:8 first_token_cost:201.76315307617188ms total_cost_time:201.80797576904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8186 prompt_cache_len:5151 prompt_cache_ratio:0.6292450525287075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 -DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10827112197875977 s -INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.1102759838104248 s -DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=11852556071448998926506365861048590445, time:1750767249.304143s req_ids:[8] -DEBUG 06-24 20:14:09 [manager.py:391] -ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:207.88121223449707ms total_cost_time:207.92651176452637ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8187 prompt_cache_len:5151 prompt_cache_ratio:0.6291681934774642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 -DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10780072212219238 s -INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.10976672172546387 s -DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=287418870297530242715208631967386355799, time:1750767249.5179174s req_ids:[8] -DEBUG 06-24 20:14:09 [manager.py:391] -ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:206.83956146240234ms total_cost_time:206.88152313232422ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8188 prompt_cache_len:5151 prompt_cache_ratio:0.6290913531998046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 -DEBUG 06-24 20:14:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:09 [manager.py:224] router recive req id 8 cost time 0.10835814476013184 s -INFO 06-24 20:14:09 [manager.py:68] detokenization recv req id 8 cost time 0.11041402816772461 s -DEBUG 06-24 20:14:09 [manager.py:391] Prefill Batch: batch_id=338202895807794559099934398269674607235, time:1750767249.7306821s req_ids:[8] -DEBUG 06-24 20:14:09 [manager.py:391] -ERROR 06-24 20:14:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:371.8433380126953ms total_cost_time:371.8881607055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8189 prompt_cache_len:5151 prompt_cache_ratio:0.6290145316888509 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 -DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s -INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.10983800888061523 s -DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=94871757423233156945319531700478610751, time:1750767250.1038203s req_ids:[8] -DEBUG 06-24 20:14:10 [manager.py:391] -ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:09 lightllm_req_id:8 first_token_cost:181.35309219360352ms total_cost_time:181.3967227935791ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8190 prompt_cache_len:5151 prompt_cache_ratio:0.6289377289377289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 -DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10856246948242188 s -INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.11069750785827637 s -DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=93978342822466537734810043780781309390, time:1750767250.2922082s req_ids:[8] -DEBUG 06-24 20:14:10 [manager.py:391] -ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:201.52783393859863ms total_cost_time:201.5702724456787ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8191 prompt_cache_len:5151 prompt_cache_ratio:0.6288609449395678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 -DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10747694969177246 s -INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.11003804206848145 s -DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=163152984329460008773210291647198671893, time:1750767250.5013144s req_ids:[8] -DEBUG 06-24 20:14:10 [manager.py:391] -ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:207.15975761413574ms total_cost_time:207.21793174743652ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:8192 prompt_cache_len:5151 prompt_cache_ratio:0.6287841796875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 -DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.11188268661499023 s -INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.11404538154602051 s -DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=216061748797111718729639123577209416088, time:1750767250.7148685s req_ids:[8] -DEBUG 06-24 20:14:10 [manager.py:391] -ERROR 06-24 20:14:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:207.92245864868164ms total_cost_time:208.0237865447998ms,out_token_counter:1 mean_per_token_cost_time: 0.10132789611816406ms prompt_token_num:8193 prompt_cache_len:5151 prompt_cache_ratio:0.6287074331746613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 -DEBUG 06-24 20:14:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:10 [manager.py:224] router recive req id 8 cost time 0.10775589942932129 s -INFO 06-24 20:14:10 [manager.py:68] detokenization recv req id 8 cost time 0.10970067977905273 s -DEBUG 06-24 20:14:10 [manager.py:391] Prefill Batch: batch_id=278247609722941237089342914051659026378, time:1750767250.9275854s req_ids:[8] -DEBUG 06-24 20:14:10 [manager.py:391] -ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:10 lightllm_req_id:8 first_token_cost:206.9568634033203ms total_cost_time:207.0009708404541ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8194 prompt_cache_len:5151 prompt_cache_ratio:0.6286307053941909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 -DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.10873222351074219 s -INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.1106879711151123 s -DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=9589251780774518106927254503588327133, time:1750767251.1398485s req_ids:[8] -DEBUG 06-24 20:14:11 [manager.py:391] -ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:204.86974716186523ms total_cost_time:204.91361618041992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8195 prompt_cache_len:5151 prompt_cache_ratio:0.6285539963392313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 -DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.10881590843200684 s -INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.11082577705383301 s -DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=139860195202762718650448344765985069076, time:1750767251.3538775s req_ids:[8] -DEBUG 06-24 20:14:11 [manager.py:391] -ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:208.88328552246094ms total_cost_time:208.92667770385742ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8196 prompt_cache_len:5151 prompt_cache_ratio:0.6284773060029283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 -DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.31079816818237305 s -INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.31270885467529297 s -DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=283716800539442594557754057455017025459, time:1750767251.7616658s req_ids:[8] -DEBUG 06-24 20:14:11 [manager.py:391] -ERROR 06-24 20:14:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:376.6818046569824ms total_cost_time:376.7259120941162ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8197 prompt_cache_len:5151 prompt_cache_ratio:0.6284006343784312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 -DEBUG 06-24 20:14:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:11 [manager.py:224] router recive req id 8 cost time 0.1074686050415039 s -INFO 06-24 20:14:11 [manager.py:68] detokenization recv req id 8 cost time 0.10940265655517578 s -DEBUG 06-24 20:14:11 [manager.py:391] Prefill Batch: batch_id=309199642275444589512129230919881262491, time:1750767251.9454248s req_ids:[8] -DEBUG 06-24 20:14:11 [manager.py:391] -ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:11 lightllm_req_id:8 first_token_cost:190.57011604309082ms total_cost_time:190.6137466430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8198 prompt_cache_len:5151 prompt_cache_ratio:0.6283239814588925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 -DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.10809516906738281 s -INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.11020398139953613 s -DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=41261312792576203565358700047348206384, time:1750767252.1430855s req_ids:[8] -DEBUG 06-24 20:14:12 [manager.py:391] -ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:203.53436470031738ms total_cost_time:203.57584953308105ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8199 prompt_cache_len:5151 prompt_cache_ratio:0.628247347237468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 -DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.10726428031921387 s -INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.10926389694213867 s -DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=11101219059342487458976537520204190429, time:1750767252.3562844s req_ids:[8] -DEBUG 06-24 20:14:12 [manager.py:391] -DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:208.5745334625244ms total_cost_time:208.6169719696045ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8200 prompt_cache_len:5151 prompt_cache_ratio:0.6281707317073171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 -DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.10827302932739258 s -INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.11030721664428711 s -DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=257465763184886632177255511979938622553, time:1750767252.5679746s req_ids:[8] -DEBUG 06-24 20:14:12 [manager.py:391] -ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:204.91409301757812ms total_cost_time:204.9562931060791ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8201 prompt_cache_len:5151 prompt_cache_ratio:0.6280941348616023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 -DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.1081991195678711 s -INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.11079120635986328 s -DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=125565558197597165677612537791340702063, time:1750767252.7779922s req_ids:[8] -DEBUG 06-24 20:14:12 [manager.py:391] -ERROR 06-24 20:14:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:206.22706413269043ms total_cost_time:206.2692642211914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8202 prompt_cache_len:5151 prompt_cache_ratio:0.6280175566934894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 -DEBUG 06-24 20:14:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:12 [manager.py:224] router recive req id 8 cost time 0.1077721118927002 s -INFO 06-24 20:14:12 [manager.py:68] detokenization recv req id 8 cost time 0.10992860794067383 s -DEBUG 06-24 20:14:12 [manager.py:391] Prefill Batch: batch_id=287135127654882255258564632646566994497, time:1750767252.9914038s req_ids:[8] -DEBUG 06-24 20:14:12 [manager.py:391] -ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:12 lightllm_req_id:8 first_token_cost:206.94351196289062ms total_cost_time:206.9845199584961ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:8203 prompt_cache_len:5151 prompt_cache_ratio:0.6279409971961477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 -DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.1081533432006836 s -INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.11029958724975586 s -DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=115936952661952553947224920661484405447, time:1750767253.2038233s req_ids:[8] -DEBUG 06-24 20:14:13 [manager.py:391] -ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:206.64596557617188ms total_cost_time:206.68816566467285ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8204 prompt_cache_len:5151 prompt_cache_ratio:0.6278644563627499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 -DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.10834622383117676 s -INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.11039519309997559 s -DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=190919211405241075570177676424753789204, time:1750767253.416654s req_ids:[8] -DEBUG 06-24 20:14:13 [manager.py:391] -ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:207.26680755615234ms total_cost_time:207.32998847961426ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:8205 prompt_cache_len:5151 prompt_cache_ratio:0.6277879341864717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 -DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.10870003700256348 s -INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.1107950210571289 s -DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=322303341283369671817303233250120732131, time:1750767253.628697s req_ids:[8] -DEBUG 06-24 20:14:13 [manager.py:391] -ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:209.8846435546875ms total_cost_time:209.92779731750488ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8206 prompt_cache_len:5151 prompt_cache_ratio:0.6277114306604923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 -DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:13 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s -INFO 06-24 20:14:13 [manager.py:68] detokenization recv req id 8 cost time 0.1102762222290039 s -DEBUG 06-24 20:14:13 [manager.py:391] Prefill Batch: batch_id=80904245895879391559551529713843570840, time:1750767253.8510287s req_ids:[8] -DEBUG 06-24 20:14:13 [manager.py:391] -ERROR 06-24 20:14:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:215.81792831420898ms total_cost_time:215.86084365844727ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8207 prompt_cache_len:5151 prompt_cache_ratio:0.6276349457779944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 -DEBUG 06-24 20:14:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.10705065727233887 s -INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.10907602310180664 s -DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=253125965026509362139665168163568890401, time:1750767254.0669687s req_ids:[8] -DEBUG 06-24 20:14:14 [manager.py:391] -ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:13 lightllm_req_id:8 first_token_cost:205.08289337158203ms total_cost_time:205.1255702972412ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8208 prompt_cache_len:5151 prompt_cache_ratio:0.6275584795321637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 -DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.1074686050415039 s -INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.11006450653076172 s -DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=160537196442207926638327708239825641714, time:1750767254.277483s req_ids:[8] -DEBUG 06-24 20:14:14 [manager.py:391] -ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:208.68253707885742ms total_cost_time:208.7252140045166ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8209 prompt_cache_len:5151 prompt_cache_ratio:0.6274820319161896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 -DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.10750818252563477 s -INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.10958695411682129 s -DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=15534810910617778980985037523048271680, time:1750767254.492441s req_ids:[8] -DEBUG 06-24 20:14:14 [manager.py:391] -ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:207.11731910705566ms total_cost_time:207.16142654418945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8210 prompt_cache_len:5151 prompt_cache_ratio:0.6274056029232643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 -DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:14 [manager.py:224] router recive req id 8 cost time 0.10750031471252441 s -INFO 06-24 20:14:14 [manager.py:68] detokenization recv req id 8 cost time 0.10962557792663574 s -DEBUG 06-24 20:14:14 [manager.py:391] Prefill Batch: batch_id=178290631810559916833148922978289016388, time:1750767254.7044828s req_ids:[8] -DEBUG 06-24 20:14:14 [manager.py:391] -ERROR 06-24 20:14:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:377.6285648345947ms total_cost_time:377.6721954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8211 prompt_cache_len:5151 prompt_cache_ratio:0.6273291925465838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 -DEBUG 06-24 20:14:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10750293731689453 s -INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.10943055152893066 s -DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=255349555432442777664593291734636731032, time:1750767255.0836663s req_ids:[8] -DEBUG 06-24 20:14:15 [manager.py:391] -ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:14 lightllm_req_id:8 first_token_cost:202.55470275878906ms total_cost_time:202.59857177734375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8212 prompt_cache_len:5151 prompt_cache_ratio:0.6272528007793473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 -DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10868954658508301 s -INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s -DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=167519846264033505778519093198355888213, time:1750767255.296174s req_ids:[8] -DEBUG 06-24 20:14:15 [manager.py:391] -ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:206.71939849853516ms total_cost_time:206.76350593566895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8213 prompt_cache_len:5151 prompt_cache_ratio:0.6271764276147571 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 -DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10826683044433594 s -INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.11033415794372559 s -DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=295995382628478015370261587843171751805, time:1750767255.5086071s req_ids:[8] -DEBUG 06-24 20:14:15 [manager.py:391] -ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:211.47632598876953ms total_cost_time:211.5466594696045ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:8214 prompt_cache_len:5151 prompt_cache_ratio:0.627100073046019 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 -DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10816478729248047 s -INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013031005859375 s -DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=155453941317106121589221654295195309468, time:1750767255.731334s req_ids:[8] -DEBUG 06-24 20:14:15 [manager.py:391] -ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:179.002046585083ms total_cost_time:179.0473461151123ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8215 prompt_cache_len:5151 prompt_cache_ratio:0.6270237370663421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 -DEBUG 06-24 20:14:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:15 [manager.py:224] router recive req id 8 cost time 0.10717415809631348 s -INFO 06-24 20:14:15 [manager.py:68] detokenization recv req id 8 cost time 0.10920119285583496 s -DEBUG 06-24 20:14:15 [manager.py:391] Prefill Batch: batch_id=6533685983715925204155624854801809804, time:1750767255.9090567s req_ids:[8] -DEBUG 06-24 20:14:15 [manager.py:391] -ERROR 06-24 20:14:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:194.00715827941895ms total_cost_time:194.04983520507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8216 prompt_cache_len:5151 prompt_cache_ratio:0.6269474196689386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 -DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10762786865234375 s -INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.1095588207244873 s -DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=300283714207460228834679828023080769626, time:1750767256.1206934s req_ids:[8] -DEBUG 06-24 20:14:16 [manager.py:391] -ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:15 lightllm_req_id:8 first_token_cost:211.24649047851562ms total_cost_time:211.29202842712402ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8217 prompt_cache_len:5151 prompt_cache_ratio:0.6268711208470245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 -DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10800671577453613 s -INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.1099848747253418 s -DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=109778357837254034357768967643383568761, time:1750767256.3332043s req_ids:[8] -DEBUG 06-24 20:14:16 [manager.py:391] -ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:168.15757751464844ms total_cost_time:168.1997776031494ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8218 prompt_cache_len:5151 prompt_cache_ratio:0.6267948405938184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 -DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:16 [batch.py:51] router release req id 8 -INFO 06-24 20:14:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10783529281616211 s -INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.10969233512878418 s -DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=5055397033538650112742120216689907639, time:1750767256.5079765s req_ids:[8] -DEBUG 06-24 20:14:16 [manager.py:391] -ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:199.01061058044434ms total_cost_time:199.05376434326172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8219 prompt_cache_len:5151 prompt_cache_ratio:0.6267185789025429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 -DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.10752010345458984 s -INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.10950398445129395 s -DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=9947145703033310605896879809801799068, time:1750767256.7120574s req_ids:[8] -DEBUG 06-24 20:14:16 [manager.py:391] -ERROR 06-24 20:14:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:205.28173446655273ms total_cost_time:205.32608032226562ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8220 prompt_cache_len:5151 prompt_cache_ratio:0.6266423357664234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 -DEBUG 06-24 20:14:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:16 [manager.py:224] router recive req id 8 cost time 0.1080164909362793 s -INFO 06-24 20:14:16 [manager.py:68] detokenization recv req id 8 cost time 0.1101224422454834 s -DEBUG 06-24 20:14:16 [manager.py:391] Prefill Batch: batch_id=226676471529500660787201094188944670688, time:1750767256.9246058s req_ids:[8] -DEBUG 06-24 20:14:16 [manager.py:391] -ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:16 lightllm_req_id:8 first_token_cost:208.1894874572754ms total_cost_time:208.23168754577637ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8221 prompt_cache_len:5151 prompt_cache_ratio:0.6265661111786888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 -DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10817980766296387 s -INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.11015081405639648 s -DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=101035877380059583711512576753120789996, time:1750767257.1380749s req_ids:[8] -DEBUG 06-24 20:14:17 [manager.py:391] -ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:204.0235996246338ms total_cost_time:204.06651496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8222 prompt_cache_len:5151 prompt_cache_ratio:0.6264899051325712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 -DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10762524604797363 s -INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.1097564697265625 s -DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=97230976004576439437522054495135724801, time:1750767257.3581214s req_ids:[8] -DEBUG 06-24 20:14:17 [manager.py:391] -ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:218.46604347229004ms total_cost_time:218.50895881652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8223 prompt_cache_len:5151 prompt_cache_ratio:0.626413717621306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 -DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10696935653686523 s -INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.10883212089538574 s -DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=321673893865463017831728517079690072985, time:1750767257.5723488s req_ids:[8] -DEBUG 06-24 20:14:17 [manager.py:391] -ERROR 06-24 20:14:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:389.79458808898926ms total_cost_time:389.83964920043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8224 prompt_cache_len:5151 prompt_cache_ratio:0.6263375486381323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 -DEBUG 06-24 20:14:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:17 [manager.py:224] router recive req id 8 cost time 0.10845732688903809 s -INFO 06-24 20:14:17 [manager.py:68] detokenization recv req id 8 cost time 0.11050844192504883 s -DEBUG 06-24 20:14:17 [manager.py:391] Prefill Batch: batch_id=149695382554569083848457790860284428069, time:1750767257.9630108s req_ids:[8] -DEBUG 06-24 20:14:17 [manager.py:391] -DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:17 lightllm_req_id:8 first_token_cost:199.80239868164062ms total_cost_time:199.84745979309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8225 prompt_cache_len:5151 prompt_cache_ratio:0.6262613981762918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 -DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.10874342918395996 s -INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.11095333099365234 s -DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=221254139614361055759412105128705395804, time:1750767258.1729584s req_ids:[8] -DEBUG 06-24 20:14:18 [manager.py:391] -ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:200.2253532409668ms total_cost_time:200.26922225952148ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8226 prompt_cache_len:5151 prompt_cache_ratio:0.6261852662290299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 -DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.1085975170135498 s -INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.11046504974365234 s -DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=65235300840212817652322914723492353706, time:1750767258.3791482s req_ids:[8] -DEBUG 06-24 20:14:18 [manager.py:391] -DEBUG 06-24 20:14:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 35843.772 tokens/s -DEBUG 06-24 20:14:18 [stats.py:37] Avg prompt tokens throughput: 35835.037 tokens/s -DEBUG 06-24 20:14:18 [stats.py:37] Avg generate tokens throughput: 8.734 tokens/s -ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:166.45073890686035ms total_cost_time:166.49436950683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8227 prompt_cache_len:5151 prompt_cache_ratio:0.6261091527895952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 -DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.10725903511047363 s -INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.10923290252685547 s -DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=315172784806527849080130116182337377788, time:1750767258.548727s req_ids:[8] -DEBUG 06-24 20:14:18 [manager.py:391] -ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:195.41430473327637ms total_cost_time:195.45722007751465ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8228 prompt_cache_len:5151 prompt_cache_ratio:0.6260330578512396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 -DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.10753631591796875 s -INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.10978555679321289 s -DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=140352905812694760434308362871764846989, time:1750767258.7517388s req_ids:[8] -DEBUG 06-24 20:14:18 [manager.py:391] -ERROR 06-24 20:14:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:206.31742477416992ms total_cost_time:206.3617706298828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8229 prompt_cache_len:5151 prompt_cache_ratio:0.6259569814072183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 -DEBUG 06-24 20:14:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:18 [manager.py:224] router recive req id 8 cost time 0.1087944507598877 s -INFO 06-24 20:14:18 [manager.py:68] detokenization recv req id 8 cost time 0.11092829704284668 s -DEBUG 06-24 20:14:18 [manager.py:391] Prefill Batch: batch_id=290956557832134340733766902606977100570, time:1750767258.9648273s req_ids:[8] -DEBUG 06-24 20:14:18 [manager.py:391] -ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:18 lightllm_req_id:8 first_token_cost:206.50458335876465ms total_cost_time:206.54964447021484ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8230 prompt_cache_len:5151 prompt_cache_ratio:0.6258809234507898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 -DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:19 [batch.py:51] router release req id 8 -INFO 06-24 20:14:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10880088806152344 s -INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11085772514343262 s -DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=447598318239783903004791516437181127, time:1750767259.176396s req_ids:[8] -DEBUG 06-24 20:14:19 [manager.py:391] -ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:206.10618591308594ms total_cost_time:206.14886283874512ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8231 prompt_cache_len:5151 prompt_cache_ratio:0.6258048839752156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 -DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10781145095825195 s -INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.10961008071899414 s -DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=187418386908774338035271253996348562646, time:1750767259.3909872s req_ids:[8] -DEBUG 06-24 20:14:19 [manager.py:391] -ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:170.2268123626709ms total_cost_time:170.26925086975098ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8232 prompt_cache_len:5151 prompt_cache_ratio:0.625728862973761 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 -DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10814571380615234 s -INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11033248901367188 s -DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=253447295995371973390112169892863222402, time:1750767259.5643253s req_ids:[8] -DEBUG 06-24 20:14:19 [manager.py:391] -ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:199.36609268188477ms total_cost_time:199.46885108947754ms,out_token_counter:1 mean_per_token_cost_time: 0.10275840759277344ms prompt_token_num:8233 prompt_cache_len:5151 prompt_cache_ratio:0.6256528604396939 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 -DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10892462730407715 s -INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11092472076416016 s -DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=82258284478348215431311361104312834180, time:1750767259.768058s req_ids:[8] -DEBUG 06-24 20:14:19 [manager.py:391] -ERROR 06-24 20:14:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:203.81760597229004ms total_cost_time:203.86052131652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8234 prompt_cache_len:5151 prompt_cache_ratio:0.6255768763662861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 -DEBUG 06-24 20:14:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:19 [manager.py:224] router recive req id 8 cost time 0.10844206809997559 s -INFO 06-24 20:14:19 [manager.py:68] detokenization recv req id 8 cost time 0.11023783683776855 s -DEBUG 06-24 20:14:19 [manager.py:391] Prefill Batch: batch_id=92954427773002363299141466529320844484, time:1750767259.9854786s req_ids:[8] -DEBUG 06-24 20:14:19 [manager.py:391] -ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:19 lightllm_req_id:8 first_token_cost:214.7824764251709ms total_cost_time:214.8275375366211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8235 prompt_cache_len:5151 prompt_cache_ratio:0.6255009107468124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 -DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.31051158905029297 s -INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.31244874000549316 s -DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=259760011847428101482912530310620493266, time:1750767260.4085298s req_ids:[8] -DEBUG 06-24 20:14:20 [manager.py:391] -ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:427.0789623260498ms total_cost_time:427.1233081817627ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8236 prompt_cache_len:5151 prompt_cache_ratio:0.6254249635745508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 -DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.1078040599822998 s -INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.10976982116699219 s -DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=71313572895144100455817948563024647518, time:1750767260.6392076s req_ids:[8] -DEBUG 06-24 20:14:20 [manager.py:391] -ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:177.9806613922119ms total_cost_time:178.02166938781738ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:8237 prompt_cache_len:5151 prompt_cache_ratio:0.6253490348427826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 -DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.10767316818237305 s -INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.10929441452026367 s -DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=2887190597889122890984753067612664319, time:1750767260.8201468s req_ids:[8] -DEBUG 06-24 20:14:20 [manager.py:391] -ERROR 06-24 20:14:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:167.10972785949707ms total_cost_time:167.15264320373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8238 prompt_cache_len:5151 prompt_cache_ratio:0.6252731245447924 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 -DEBUG 06-24 20:14:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:20 [manager.py:224] router recive req id 8 cost time 0.10843825340270996 s -INFO 06-24 20:14:20 [manager.py:68] detokenization recv req id 8 cost time 0.11052846908569336 s -DEBUG 06-24 20:14:20 [manager.py:391] Prefill Batch: batch_id=312197214977796842315867089994688204412, time:1750767260.985843s req_ids:[8] -DEBUG 06-24 20:14:20 [manager.py:391] -ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:20 lightllm_req_id:8 first_token_cost:195.16587257385254ms total_cost_time:195.20974159240723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8239 prompt_cache_len:5151 prompt_cache_ratio:0.6251972326738682 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 -DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.10851025581359863 s -INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.11055898666381836 s -DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=229704925054678512378765839987309525486, time:1750767261.1883245s req_ids:[8] -DEBUG 06-24 20:14:21 [manager.py:391] -ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:209.89227294921875ms total_cost_time:209.93804931640625ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8240 prompt_cache_len:5151 prompt_cache_ratio:0.6251213592233009 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 -DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.1075289249420166 s -INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.10940814018249512 s -DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=180794770876922782244201066993298748507, time:1750767261.4023495s req_ids:[8] -DEBUG 06-24 20:14:21 [manager.py:391] -ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:205.17849922180176ms total_cost_time:205.22165298461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8241 prompt_cache_len:5151 prompt_cache_ratio:0.6250455041863852 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 -DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.10814547538757324 s -INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s -DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=188577339144872038960020900946029490097, time:1750767261.613874s req_ids:[8] -DEBUG 06-24 20:14:21 [manager.py:391] -ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:204.39672470092773ms total_cost_time:204.43964004516602ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8242 prompt_cache_len:5151 prompt_cache_ratio:0.6249696675564184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 -DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:21 [manager.py:224] router recive req id 8 cost time 0.10811972618103027 s -INFO 06-24 20:14:21 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s -DEBUG 06-24 20:14:21 [manager.py:391] Prefill Batch: batch_id=263832933928543762401941698108738763233, time:1750767261.834627s req_ids:[8] -DEBUG 06-24 20:14:21 [manager.py:391] -ERROR 06-24 20:14:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:215.71826934814453ms total_cost_time:215.7604694366455ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8243 prompt_cache_len:5151 prompt_cache_ratio:0.6248938493267014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 -DEBUG 06-24 20:14:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.10656547546386719 s -INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.10846757888793945 s -DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=131235268036437667941543454761610152737, time:1750767262.0531976s req_ids:[8] -DEBUG 06-24 20:14:22 [manager.py:391] -ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:21 lightllm_req_id:8 first_token_cost:206.99620246887207ms total_cost_time:207.03959465026855ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8244 prompt_cache_len:5151 prompt_cache_ratio:0.6248180494905385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 -DEBUG 06-24 20:14:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.10750865936279297 s -INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.1096189022064209 s -DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=180550156393750232738339251266751671679, time:1750767262.2627933s req_ids:[8] -DEBUG 06-24 20:14:22 [manager.py:391] -ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 first_token_cost:207.43441581726074ms total_cost_time:207.47780799865723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8245 prompt_cache_len:5151 prompt_cache_ratio:0.6247422680412371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 -DEBUG 06-24 20:14:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:22 [batch.py:51] router release req id 8 -INFO 06-24 20:14:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.3101494312286377 s -INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.31208348274230957 s -DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=309096607483377877924253773921989934079, time:1750767262.6878161s req_ids:[8] -DEBUG 06-24 20:14:22 [manager.py:391] -ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 first_token_cost:425.5216121673584ms total_cost_time:425.5659580230713ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8246 prompt_cache_len:5151 prompt_cache_ratio:0.6246665049721077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 -DEBUG 06-24 20:14:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:22 [manager.py:224] router recive req id 8 cost time 0.10834026336669922 s -INFO 06-24 20:14:22 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s -DEBUG 06-24 20:14:22 [manager.py:391] Prefill Batch: batch_id=214336013238935208013151337043050903740, time:1750767262.9071925s req_ids:[8] -DEBUG 06-24 20:14:22 [manager.py:391] -ERROR 06-24 20:14:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:22 lightllm_req_id:8 first_token_cost:211.6241455078125ms total_cost_time:211.66729927062988ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8247 prompt_cache_len:5151 prompt_cache_ratio:0.6245907602764642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 -DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s -INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.11038875579833984 s -DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=71926704239353949756396946235142243440, time:1750767263.1236458s req_ids:[8] -DEBUG 06-24 20:14:23 [manager.py:391] -INFO 06-24 20:14:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:14:23 [statics_utils.py:24] mean first cost: 229.1424531623558 ms -INFO 06-24 20:14:23 [statics_utils.py:24] mean per token cost: 0.08177307755679901 ms -ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:213.36603164672852ms total_cost_time:213.4072780609131ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8248 prompt_cache_len:5151 prompt_cache_ratio:0.6245150339476236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 -DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10755252838134766 s -INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.1094655990600586 s -DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=129629145510785701664431823672365451520, time:1750767263.3415504s req_ids:[8] -DEBUG 06-24 20:14:23 [manager.py:391] -ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:203.57966423034668ms total_cost_time:203.62472534179688ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8249 prompt_cache_len:5151 prompt_cache_ratio:0.6244393259789065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 -DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s -INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.10915112495422363 s -DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=299453358395684065531327476054548970987, time:1750767263.5596383s req_ids:[8] -DEBUG 06-24 20:14:23 [manager.py:391] -ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:214.2322063446045ms total_cost_time:214.27440643310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8250 prompt_cache_len:5151 prompt_cache_ratio:0.6243636363636363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 -DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10797619819641113 s -INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.10981130599975586 s -DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=225000609200119526222412495525724153030, time:1750767263.7734196s req_ids:[8] -DEBUG 06-24 20:14:23 [manager.py:391] -ERROR 06-24 20:14:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:208.51373672485352ms total_cost_time:208.5561752319336ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8251 prompt_cache_len:5151 prompt_cache_ratio:0.62428796509514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 -DEBUG 06-24 20:14:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:23 [manager.py:224] router recive req id 8 cost time 0.10834026336669922 s -INFO 06-24 20:14:23 [manager.py:68] detokenization recv req id 8 cost time 0.1105647087097168 s -DEBUG 06-24 20:14:23 [manager.py:391] Prefill Batch: batch_id=211503971166047731307337885990702781615, time:1750767263.9868767s req_ids:[8] -DEBUG 06-24 20:14:23 [manager.py:391] -ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:23 lightllm_req_id:8 first_token_cost:207.11946487426758ms total_cost_time:207.16118812561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8252 prompt_cache_len:5151 prompt_cache_ratio:0.6242123121667474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 -DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.10796880722045898 s -INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.10969901084899902 s -DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=67402228077757464222528501877411027566, time:1750767264.2000492s req_ids:[8] -DEBUG 06-24 20:14:24 [manager.py:391] -ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:170.99261283874512ms total_cost_time:171.0355281829834ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8253 prompt_cache_len:5151 prompt_cache_ratio:0.624136677571792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 -DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.1082315444946289 s -INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.11031961441040039 s -DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=93038811055957424303285982173718902292, time:1750767264.3747873s req_ids:[8] -DEBUG 06-24 20:14:24 [manager.py:391] -ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:215.3012752532959ms total_cost_time:215.34466743469238ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8254 prompt_cache_len:5151 prompt_cache_ratio:0.6240610613036104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 -DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.10755395889282227 s -INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.10955214500427246 s -DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=43529484863156197402552797155113384608, time:1750767264.6065595s req_ids:[8] -DEBUG 06-24 20:14:24 [manager.py:391] -ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:224.7765064239502ms total_cost_time:224.8239517211914ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:8255 prompt_cache_len:5151 prompt_cache_ratio:0.6239854633555421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 -DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:24 [manager.py:224] router recive req id 8 cost time 0.10848069190979004 s -INFO 06-24 20:14:24 [manager.py:68] detokenization recv req id 8 cost time 0.11055374145507812 s -DEBUG 06-24 20:14:24 [manager.py:391] Prefill Batch: batch_id=214465736342507255828468403906776192055, time:1750767264.8246212s req_ids:[8] -DEBUG 06-24 20:14:24 [manager.py:391] -ERROR 06-24 20:14:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:206.44545555114746ms total_cost_time:206.48670196533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8256 prompt_cache_len:5151 prompt_cache_ratio:0.6239098837209303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 -DEBUG 06-24 20:14:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.10762310028076172 s -INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.1098337173461914 s -DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=307488381447178661372840019066980107206, time:1750767265.0386279s req_ids:[8] -DEBUG 06-24 20:14:25 [manager.py:391] -ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:24 lightllm_req_id:8 first_token_cost:210.3099822998047ms total_cost_time:210.35170555114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8257 prompt_cache_len:5151 prompt_cache_ratio:0.623834322393121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 -DEBUG 06-24 20:14:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.10818266868591309 s -INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.11029767990112305 s -DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=263465528909816318169942742778347583501, time:1750767265.258069s req_ids:[8] -DEBUG 06-24 20:14:25 [manager.py:391] -ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:206.7885398864746ms total_cost_time:206.8500518798828ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8258 prompt_cache_len:5151 prompt_cache_ratio:0.6237587793654638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 -DEBUG 06-24 20:14:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.10738492012023926 s -INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.108734130859375 s -DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=106745785907896658005997409885388092530, time:1750767265.4700048s req_ids:[8] -DEBUG 06-24 20:14:25 [manager.py:391] -ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:206.7568302154541ms total_cost_time:206.79926872253418ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8259 prompt_cache_len:5151 prompt_cache_ratio:0.6236832546313112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 -DEBUG 06-24 20:14:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:25 [manager.py:224] router recive req id 8 cost time 0.3093998432159424 s -INFO 06-24 20:14:25 [manager.py:68] detokenization recv req id 8 cost time 0.3109891414642334 s -DEBUG 06-24 20:14:25 [manager.py:391] Prefill Batch: batch_id=234763077732606254503033857667197305946, time:1750767265.8925054s req_ids:[8] -DEBUG 06-24 20:14:25 [manager.py:391] -ERROR 06-24 20:14:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:422.38450050354004ms total_cost_time:422.4283695220947ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8260 prompt_cache_len:5151 prompt_cache_ratio:0.6236077481840193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 -DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s -INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.11030840873718262 s -DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=17240633472292672143665764368059250320, time:1750767266.1111033s req_ids:[8] -DEBUG 06-24 20:14:26 [manager.py:391] -ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:25 lightllm_req_id:8 first_token_cost:206.04395866394043ms total_cost_time:206.08830451965332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8261 prompt_cache_len:5151 prompt_cache_ratio:0.6235322600169471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 -DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10781216621398926 s -INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.10979866981506348 s -DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=86927474474676127669923044268970747394, time:1750767266.3223767s req_ids:[8] -DEBUG 06-24 20:14:26 [manager.py:391] -ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:209.90419387817383ms total_cost_time:209.94853973388672ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8262 prompt_cache_len:5151 prompt_cache_ratio:0.6234567901234568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 -DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10804343223571777 s -INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.1101233959197998 s -DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=3150598490457491812277847727764372341, time:1750767266.5382953s req_ids:[8] -DEBUG 06-24 20:14:26 [manager.py:391] -ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:210.58058738708496ms total_cost_time:210.62564849853516ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8263 prompt_cache_len:5151 prompt_cache_ratio:0.6233813384969139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 -DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10896849632263184 s -INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.11104869842529297 s -DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=239566653616966397240727414952784915557, time:1750767266.7536767s req_ids:[8] -DEBUG 06-24 20:14:26 [manager.py:391] -ERROR 06-24 20:14:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:208.1770896911621ms total_cost_time:208.21785926818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:8264 prompt_cache_len:5151 prompt_cache_ratio:0.6233059051306873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 -DEBUG 06-24 20:14:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:26 [manager.py:224] router recive req id 8 cost time 0.10832047462463379 s -INFO 06-24 20:14:26 [manager.py:68] detokenization recv req id 8 cost time 0.11035537719726562 s -DEBUG 06-24 20:14:26 [manager.py:391] Prefill Batch: batch_id=100180753851236980139415075938875614750, time:1750767266.979056s req_ids:[8] -DEBUG 06-24 20:14:26 [manager.py:391] -ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:26 lightllm_req_id:8 first_token_cost:224.39312934875488ms total_cost_time:224.43890571594238ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8265 prompt_cache_len:5151 prompt_cache_ratio:0.6232304900181488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 -DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.10860085487365723 s -INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.11065864562988281 s -DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=124623409531242877131000662623302193333, time:1750767267.1968863s req_ids:[8] -DEBUG 06-24 20:14:27 [manager.py:391] -ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:202.48937606811523ms total_cost_time:202.5320529937744ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8266 prompt_cache_len:5151 prompt_cache_ratio:0.6231550931526736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 -DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.10868096351623535 s -INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.11013412475585938 s -DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=327817020758628935242704148188284735278, time:1750767267.4100006s req_ids:[8] -DEBUG 06-24 20:14:27 [manager.py:391] -ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:210.97707748413086ms total_cost_time:211.01975440979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8267 prompt_cache_len:5151 prompt_cache_ratio:0.62307971452764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 -DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.1084139347076416 s -INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.10999441146850586 s -DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=16901268603879458421438969290504287105, time:1750767267.6184614s req_ids:[8] -DEBUG 06-24 20:14:27 [manager.py:391] -DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:193.68624687194824ms total_cost_time:193.72940063476562ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8268 prompt_cache_len:5151 prompt_cache_ratio:0.6230043541364296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 -DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:27 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s -INFO 06-24 20:14:27 [manager.py:68] detokenization recv req id 8 cost time 0.11147642135620117 s -DEBUG 06-24 20:14:27 [manager.py:391] Prefill Batch: batch_id=211159885629803706771634935212835108818, time:1750767267.8217897s req_ids:[8] -DEBUG 06-24 20:14:27 [manager.py:391] -ERROR 06-24 20:14:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:207.36169815063477ms total_cost_time:207.40342140197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8269 prompt_cache_len:5151 prompt_cache_ratio:0.6229290119724271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 -DEBUG 06-24 20:14:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10817670822143555 s -INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.11025643348693848 s -DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=170304497000061674757970776327095639772, time:1750767268.0360324s req_ids:[8] -DEBUG 06-24 20:14:28 [manager.py:391] -ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:27 lightllm_req_id:8 first_token_cost:212.2206687927246ms total_cost_time:212.2635841369629ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8270 prompt_cache_len:5151 prompt_cache_ratio:0.6228536880290205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 -DEBUG 06-24 20:14:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10811829566955566 s -INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.1101527214050293 s -DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=261242068489394431808768014005859942328, time:1750767268.2529294s req_ids:[8] -DEBUG 06-24 20:14:28 [manager.py:391] -ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:14:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 35757.238 tokens/s -DEBUG 06-24 20:14:28 [stats.py:37] Avg prompt tokens throughput: 35748.473 tokens/s -DEBUG 06-24 20:14:28 [stats.py:37] Avg generate tokens throughput: 8.765 tokens/s -INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:400.2413749694824ms total_cost_time:400.2852439880371ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8271 prompt_cache_len:5151 prompt_cache_ratio:0.622778382299601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 -DEBUG 06-24 20:14:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10861802101135254 s -INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.11068129539489746 s -DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=212190245655084559465324612830320502117, time:1750767268.6567762s req_ids:[8] -DEBUG 06-24 20:14:28 [manager.py:391] -ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:206.07495307922363ms total_cost_time:206.1178684234619ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8272 prompt_cache_len:5151 prompt_cache_ratio:0.6227030947775629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 -DEBUG 06-24 20:14:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:28 [manager.py:224] router recive req id 8 cost time 0.10780549049377441 s -INFO 06-24 20:14:28 [manager.py:68] detokenization recv req id 8 cost time 0.1101381778717041 s -DEBUG 06-24 20:14:28 [manager.py:391] Prefill Batch: batch_id=255036863182439795815741971756166669901, time:1750767268.8774004s req_ids:[8] -DEBUG 06-24 20:14:28 [manager.py:391] -ERROR 06-24 20:14:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:211.33184432983398ms total_cost_time:211.37475967407227ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8273 prompt_cache_len:5151 prompt_cache_ratio:0.6226278254563037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 -DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10758471488952637 s -INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.10974645614624023 s -DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=24171152060961190799496513964774866282, time:1750767269.0941477s req_ids:[8] -DEBUG 06-24 20:14:29 [manager.py:391] -ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:28 lightllm_req_id:8 first_token_cost:211.34328842163086ms total_cost_time:211.38763427734375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8274 prompt_cache_len:5151 prompt_cache_ratio:0.6225525743292241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 -DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10880017280578613 s -INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.1108100414276123 s -DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=208588402509499513113172047790400436350, time:1750767269.3137026s req_ids:[8] -DEBUG 06-24 20:14:29 [manager.py:391] -ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:215.6527042388916ms total_cost_time:215.75617790222168ms,out_token_counter:1 mean_per_token_cost_time: 0.10347366333007812ms prompt_token_num:8275 prompt_cache_len:5151 prompt_cache_ratio:0.6224773413897281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 -DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10863494873046875 s -INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.11060500144958496 s -DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=107522958879956309643900032138484948880, time:1750767269.535218s req_ids:[8] -DEBUG 06-24 20:14:29 [manager.py:391] -ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:215.7437801361084ms total_cost_time:215.7883644104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8276 prompt_cache_len:5151 prompt_cache_ratio:0.6224021266312229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 -DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:29 [batch.py:51] router release req id 8 -INFO 06-24 20:14:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10823273658752441 s -INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.11080193519592285 s -DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=331023510023733657338219015572884901752, time:1750767269.7505565s req_ids:[8] -DEBUG 06-24 20:14:29 [manager.py:391] -ERROR 06-24 20:14:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:206.24923706054688ms total_cost_time:206.29215240478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8277 prompt_cache_len:5151 prompt_cache_ratio:0.6223269300471185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 -DEBUG 06-24 20:14:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:29 [manager.py:224] router recive req id 8 cost time 0.10905647277832031 s -INFO 06-24 20:14:29 [manager.py:68] detokenization recv req id 8 cost time 0.11113715171813965 s -DEBUG 06-24 20:14:29 [manager.py:391] Prefill Batch: batch_id=111459193273702210075804418297171762800, time:1750767269.9626977s req_ids:[8] -DEBUG 06-24 20:14:29 [manager.py:391] -ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:29 lightllm_req_id:8 first_token_cost:205.3811550140381ms total_cost_time:205.42287826538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8278 prompt_cache_len:5151 prompt_cache_ratio:0.6222517516308287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 -DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10811614990234375 s -INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.1100625991821289 s -DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=235318352632475267082330510763280703969, time:1750767270.1743755s req_ids:[8] -DEBUG 06-24 20:14:30 [manager.py:391] -ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:210.7243537902832ms total_cost_time:210.7696533203125ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8279 prompt_cache_len:5151 prompt_cache_ratio:0.62217659137577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 -DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10775947570800781 s -INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.10903596878051758 s -DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=268666975616545456718146529699510418, time:1750767270.391569s req_ids:[8] -DEBUG 06-24 20:14:30 [manager.py:391] -ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:208.10723304748535ms total_cost_time:208.15086364746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8280 prompt_cache_len:5151 prompt_cache_ratio:0.6221014492753624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 -DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10767984390258789 s -INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.10975003242492676 s -DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=49819110030416092095459864328612333161, time:1750767270.6047833s req_ids:[8] -DEBUG 06-24 20:14:30 [manager.py:391] -ERROR 06-24 20:14:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:369.74620819091797ms total_cost_time:369.78793144226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8281 prompt_cache_len:5151 prompt_cache_ratio:0.6220263253230286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 -DEBUG 06-24 20:14:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:30 [manager.py:224] router recive req id 8 cost time 0.10841679573059082 s -INFO 06-24 20:14:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110222339630127 s -DEBUG 06-24 20:14:30 [manager.py:391] Prefill Batch: batch_id=298677913122716067951241139475505847516, time:1750767270.976117s req_ids:[8] -DEBUG 06-24 20:14:30 [manager.py:391] -ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:30 lightllm_req_id:8 first_token_cost:204.4961452484131ms total_cost_time:204.53667640686035ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:8282 prompt_cache_len:5151 prompt_cache_ratio:0.6219512195121951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 -DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s -INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11010146141052246 s -DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=131337185881214976532187266676560497071, time:1750767271.189098s req_ids:[8] -DEBUG 06-24 20:14:31 [manager.py:391] -ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:206.48550987243652ms total_cost_time:206.55584335327148ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:8283 prompt_cache_len:5151 prompt_cache_ratio:0.6218761318362912 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 -DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10838603973388672 s -INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11054754257202148 s -DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=206879154648522365262448133744336640793, time:1750767271.4012792s req_ids:[8] -DEBUG 06-24 20:14:31 [manager.py:391] -ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:203.54676246643066ms total_cost_time:203.58920097351074ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8284 prompt_cache_len:5151 prompt_cache_ratio:0.6218010622887494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 -DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10805845260620117 s -INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11010932922363281 s -DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=194677959630076490721400359373378351942, time:1750767271.6098862s req_ids:[8] -DEBUG 06-24 20:14:31 [manager.py:391] -ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:204.98371124267578ms total_cost_time:205.02638816833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8285 prompt_cache_len:5151 prompt_cache_ratio:0.6217260108630054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 -DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:31 [manager.py:224] router recive req id 8 cost time 0.10866212844848633 s -INFO 06-24 20:14:31 [manager.py:68] detokenization recv req id 8 cost time 0.11059236526489258 s -DEBUG 06-24 20:14:31 [manager.py:391] Prefill Batch: batch_id=144117828241798914200188985456885003077, time:1750767271.8229425s req_ids:[8] -DEBUG 06-24 20:14:31 [manager.py:391] -ERROR 06-24 20:14:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:207.51690864562988ms total_cost_time:207.55934715270996ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8286 prompt_cache_len:5151 prompt_cache_ratio:0.6216509775524982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 -DEBUG 06-24 20:14:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.10919046401977539 s -INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.11148500442504883 s -DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=210026803768110602240005798992642147381, time:1750767272.0332983s req_ids:[8] -DEBUG 06-24 20:14:32 [manager.py:391] -ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:31 lightllm_req_id:8 first_token_cost:205.06572723388672ms total_cost_time:205.1072120666504ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8287 prompt_cache_len:5151 prompt_cache_ratio:0.6215759623506697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 -DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.1069631576538086 s -INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.10886931419372559 s -DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=269520222326580752155272498254908090933, time:1750767272.2449923s req_ids:[8] -DEBUG 06-24 20:14:32 [manager.py:391] -ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:207.23557472229004ms total_cost_time:207.27825164794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8288 prompt_cache_len:5151 prompt_cache_ratio:0.6215009652509652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 -DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.1070396900177002 s -INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.10958743095397949 s -DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=68457551406381783675109409507429387027, time:1750767272.458577s req_ids:[8] -DEBUG 06-24 20:14:32 [manager.py:391] -ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:203.41253280639648ms total_cost_time:203.45497131347656ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8289 prompt_cache_len:5151 prompt_cache_ratio:0.6214259862468332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 -DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:32 [manager.py:224] router recive req id 8 cost time 0.3110806941986084 s -INFO 06-24 20:14:32 [manager.py:68] detokenization recv req id 8 cost time 0.3133068084716797 s -DEBUG 06-24 20:14:32 [manager.py:391] Prefill Batch: batch_id=311861998419128760240120615040794478039, time:1750767272.873438s req_ids:[8] -DEBUG 06-24 20:14:32 [manager.py:391] -ERROR 06-24 20:14:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:415.0123596191406ms total_cost_time:415.0543212890625ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8290 prompt_cache_len:5151 prompt_cache_ratio:0.6213510253317249 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 -DEBUG 06-24 20:14:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10884928703308105 s -INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.11092305183410645 s -DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=304775374273399976944644037986543762210, time:1750767273.0888023s req_ids:[8] -DEBUG 06-24 20:14:33 [manager.py:391] -ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:32 lightllm_req_id:8 first_token_cost:207.91912078857422ms total_cost_time:207.9794406890869ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8291 prompt_cache_len:5151 prompt_cache_ratio:0.6212760824990954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 -DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10836505889892578 s -INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.11060380935668945 s -DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=109481890043208347135331468406609282952, time:1750767273.3025763s req_ids:[8] -DEBUG 06-24 20:14:33 [manager.py:391] -ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:206.13765716552734ms total_cost_time:206.19750022888184ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8292 prompt_cache_len:5151 prompt_cache_ratio:0.6212011577424024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 -DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10810208320617676 s -INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.11006021499633789 s -DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=160556306680002546400351079511857133113, time:1750767273.512699s req_ids:[8] -DEBUG 06-24 20:14:33 [manager.py:391] -ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:202.91876792907715ms total_cost_time:202.96120643615723ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8293 prompt_cache_len:5151 prompt_cache_ratio:0.6211262510551068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 -DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.1075899600982666 s -INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.10979104042053223 s -DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=32074529192952349618100839597978615920, time:1750767273.7267318s req_ids:[8] -DEBUG 06-24 20:14:33 [manager.py:391] -ERROR 06-24 20:14:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:211.94052696228027ms total_cost_time:211.98534965515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8294 prompt_cache_len:5151 prompt_cache_ratio:0.6210513624306728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 -DEBUG 06-24 20:14:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:33 [manager.py:224] router recive req id 8 cost time 0.10739302635192871 s -INFO 06-24 20:14:33 [manager.py:68] detokenization recv req id 8 cost time 0.10944819450378418 s -DEBUG 06-24 20:14:33 [manager.py:391] Prefill Batch: batch_id=243760956130616791617009914156589848019, time:1750767273.9389782s req_ids:[8] -DEBUG 06-24 20:14:33 [manager.py:391] -ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:33 lightllm_req_id:8 first_token_cost:208.13679695129395ms total_cost_time:208.18114280700684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8295 prompt_cache_len:5151 prompt_cache_ratio:0.6209764918625678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 -DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.10746598243713379 s -INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.10936212539672852 s -DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=189770559000292328682272700480913325791, time:1750767274.1518123s req_ids:[8] -DEBUG 06-24 20:14:34 [manager.py:391] -ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:211.21573448181152ms total_cost_time:211.2584114074707ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8296 prompt_cache_len:5151 prompt_cache_ratio:0.6209016393442623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 -DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.10821247100830078 s -INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.11012148857116699 s -DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=169396735050538373371631332208990012691, time:1750767274.36697s req_ids:[8] -DEBUG 06-24 20:14:34 [manager.py:391] -ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:368.1051731109619ms total_cost_time:368.1497573852539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8297 prompt_cache_len:5151 prompt_cache_ratio:0.6208268048692298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 -DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.10783123970031738 s -INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.10971808433532715 s -DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=167956627076954069194956913411280167363, time:1750767274.7405546s req_ids:[8] -DEBUG 06-24 20:14:34 [manager.py:391] -ERROR 06-24 20:14:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:200.6845474243164ms total_cost_time:200.7300853729248ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8298 prompt_cache_len:5151 prompt_cache_ratio:0.6207519884309473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 -DEBUG 06-24 20:14:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:34 [manager.py:224] router recive req id 8 cost time 0.1078042984008789 s -INFO 06-24 20:14:34 [manager.py:68] detokenization recv req id 8 cost time 0.1105194091796875 s -DEBUG 06-24 20:14:34 [manager.py:391] Prefill Batch: batch_id=301337830344655309401875836347015862071, time:1750767274.954106s req_ids:[8] -DEBUG 06-24 20:14:34 [manager.py:391] -DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:34 lightllm_req_id:8 first_token_cost:212.02421188354492ms total_cost_time:212.0678424835205ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8299 prompt_cache_len:5151 prompt_cache_ratio:0.6206771900228943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 -DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10830998420715332 s -INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.11098384857177734 s -DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=65587622665194329579089570487194253022, time:1750767275.1678586s req_ids:[8] -DEBUG 06-24 20:14:35 [manager.py:391] -ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:219.67768669128418ms total_cost_time:219.71988677978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8300 prompt_cache_len:5151 prompt_cache_ratio:0.6206024096385542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 -DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10743093490600586 s -INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.10927939414978027 s -DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=63493732624778298924648741448306459558, time:1750767275.4087806s req_ids:[8] -DEBUG 06-24 20:14:35 [manager.py:391] -ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:220.22652626037598ms total_cost_time:220.27063369750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8301 prompt_cache_len:5151 prompt_cache_ratio:0.6205276472714131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 -DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10848736763000488 s -INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.11052966117858887 s -DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=211688468286845774453822823612595637726, time:1750767275.6236768s req_ids:[8] -DEBUG 06-24 20:14:35 [manager.py:391] -ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:211.63105964660645ms total_cost_time:211.67373657226562ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8302 prompt_cache_len:5151 prompt_cache_ratio:0.6204529029149602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 -DEBUG 06-24 20:14:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:35 [manager.py:224] router recive req id 8 cost time 0.10779428482055664 s -INFO 06-24 20:14:35 [manager.py:68] detokenization recv req id 8 cost time 0.10966897010803223 s -DEBUG 06-24 20:14:35 [manager.py:391] Prefill Batch: batch_id=6904799698057618520689627287651683187, time:1750767275.836534s req_ids:[8] -DEBUG 06-24 20:14:35 [manager.py:391] -ERROR 06-24 20:14:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:169.9512004852295ms total_cost_time:169.99411582946777ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8303 prompt_cache_len:5151 prompt_cache_ratio:0.6203781765626882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 -DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.31055521965026855 s -INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.3125629425048828 s -DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=277802937496219477041554422591933751385, time:1750767276.2197196s req_ids:[8] -DEBUG 06-24 20:14:36 [manager.py:391] -ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:35 lightllm_req_id:8 first_token_cost:421.2357997894287ms total_cost_time:421.2782382965088ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8304 prompt_cache_len:5151 prompt_cache_ratio:0.6203034682080925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 -DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.10714888572692871 s -INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.10903310775756836 s -DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=255252996458118514134069515587603915157, time:1750767276.4395554s req_ids:[8] -DEBUG 06-24 20:14:36 [manager.py:391] -DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:36 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:209.80405807495117ms total_cost_time:209.8684310913086ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:8305 prompt_cache_len:5151 prompt_cache_ratio:0.6202287778446719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 -DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.10686445236206055 s -INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.10863447189331055 s -DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=315718203251225857716152392641209894182, time:1750767276.652343s req_ids:[8] -DEBUG 06-24 20:14:36 [manager.py:391] -ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:167.88148880004883ms total_cost_time:167.9251194000244ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8306 prompt_cache_len:5151 prompt_cache_ratio:0.6201541054659282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 -DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:36 [manager.py:224] router recive req id 8 cost time 0.10840702056884766 s -INFO 06-24 20:14:36 [manager.py:68] detokenization recv req id 8 cost time 0.11016082763671875 s -DEBUG 06-24 20:14:36 [manager.py:391] Prefill Batch: batch_id=35520727223488568443893203475961670141, time:1750767276.8285089s req_ids:[8] -DEBUG 06-24 20:14:36 [manager.py:391] -ERROR 06-24 20:14:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:197.03364372253418ms total_cost_time:197.07679748535156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8307 prompt_cache_len:5151 prompt_cache_ratio:0.6200794510653665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 -DEBUG 06-24 20:14:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10570096969604492 s -INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.1066579818725586 s -DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=274696903048460602471430352125434336653, time:1750767277.027263s req_ids:[8] -DEBUG 06-24 20:14:37 [manager.py:391] -ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:36 lightllm_req_id:8 first_token_cost:162.8549098968506ms total_cost_time:162.87612915039062ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8308 prompt_cache_len:5151 prompt_cache_ratio:0.6200048146364949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 -DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10597825050354004 s -INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.10806965827941895 s -DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=66087891383537191982797430514580655308, time:1750767277.194863s req_ids:[8] -DEBUG 06-24 20:14:37 [manager.py:391] -ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:194.40293312072754ms total_cost_time:194.44775581359863ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8309 prompt_cache_len:5151 prompt_cache_ratio:0.6199301961728246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 -DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10847067832946777 s -INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.11049866676330566 s -DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=251522397432514335940512181293752170135, time:1750767277.3953865s req_ids:[8] -DEBUG 06-24 20:14:37 [manager.py:391] -ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:365.2081489562988ms total_cost_time:365.2515411376953ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8310 prompt_cache_len:5151 prompt_cache_ratio:0.6198555956678701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 -DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.1099708080291748 s -INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.11186432838439941 s -DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=161021902991964345720263375301572541431, time:1750767277.7721167s req_ids:[8] -DEBUG 06-24 20:14:37 [manager.py:391] -ERROR 06-24 20:14:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:213.8359546661377ms total_cost_time:213.87124061584473ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:8311 prompt_cache_len:5151 prompt_cache_ratio:0.6197810131151485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 -DEBUG 06-24 20:14:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:37 [manager.py:224] router recive req id 8 cost time 0.10755538940429688 s -INFO 06-24 20:14:37 [manager.py:68] detokenization recv req id 8 cost time 0.10964846611022949 s -DEBUG 06-24 20:14:37 [manager.py:391] Prefill Batch: batch_id=295648583996366004321371936179937087096, time:1750767277.9884977s req_ids:[8] -DEBUG 06-24 20:14:37 [manager.py:391] -ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:37 lightllm_req_id:8 first_token_cost:224.6232032775879ms total_cost_time:224.66421127319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:8312 prompt_cache_len:5151 prompt_cache_ratio:0.6197064485081809 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 -DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s -INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.1098639965057373 s -DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=298479220233737810524018769446339314478, time:1750767278.2154572s req_ids:[8] -DEBUG 06-24 20:14:38 [manager.py:391] -ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:202.3754119873047ms total_cost_time:202.41928100585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8313 prompt_cache_len:5151 prompt_cache_ratio:0.6196319018404908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 -DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.1082923412322998 s -INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.11022043228149414 s -DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=252102363990941462669573429678951581757, time:1750767278.4254768s req_ids:[8] -DEBUG 06-24 20:14:38 [manager.py:391] -ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:218.89925003051758ms total_cost_time:218.94168853759766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8314 prompt_cache_len:5151 prompt_cache_ratio:0.619557373105605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 -DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.10834932327270508 s -INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.11026597023010254 s -DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=312271822081104715641743960189888542274, time:1750767278.6513693s req_ids:[8] -DEBUG 06-24 20:14:38 [manager.py:391] -DEBUG 06-24 20:14:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 36072.292 tokens/s -DEBUG 06-24 20:14:38 [stats.py:37] Avg prompt tokens throughput: 36063.694 tokens/s -DEBUG 06-24 20:14:38 [stats.py:37] Avg generate tokens throughput: 8.598 tokens/s -ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:202.10552215576172ms total_cost_time:202.1486759185791ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8315 prompt_cache_len:5151 prompt_cache_ratio:0.6194828622970535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 -DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:38 [manager.py:224] router recive req id 8 cost time 0.10877037048339844 s -INFO 06-24 20:14:38 [manager.py:68] detokenization recv req id 8 cost time 0.11081075668334961 s -DEBUG 06-24 20:14:38 [manager.py:391] Prefill Batch: batch_id=138931135602462018121515202530146619532, time:1750767278.8584652s req_ids:[8] -DEBUG 06-24 20:14:38 [manager.py:391] -ERROR 06-24 20:14:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:207.3657512664795ms total_cost_time:207.40818977355957ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8316 prompt_cache_len:5151 prompt_cache_ratio:0.6194083694083694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 -DEBUG 06-24 20:14:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.1085202693939209 s -INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.11047053337097168 s -DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=218982852697502198158719175564218522053, time:1750767279.072043s req_ids:[8] -DEBUG 06-24 20:14:39 [manager.py:391] -ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:38 lightllm_req_id:8 first_token_cost:374.18341636657715ms total_cost_time:374.22704696655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8317 prompt_cache_len:5151 prompt_cache_ratio:0.6193338944330888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 -DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.1084434986114502 s -INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.1103827953338623 s -DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=300991428693439536423626809739855937382, time:1750767279.4470572s req_ids:[8] -DEBUG 06-24 20:14:39 [manager.py:391] -ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:202.6839256286621ms total_cost_time:202.7280330657959ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8318 prompt_cache_len:5151 prompt_cache_ratio:0.6192594373647512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 -DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.1073768138885498 s -INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.1094367504119873 s -DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=163824231564315183873231817994491432707, time:1750767279.6601083s req_ids:[8] -DEBUG 06-24 20:14:39 [manager.py:391] -ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:205.12080192565918ms total_cost_time:205.16395568847656ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8319 prompt_cache_len:5151 prompt_cache_ratio:0.6191849981968987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 -DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:39 [manager.py:224] router recive req id 8 cost time 0.10744690895080566 s -INFO 06-24 20:14:39 [manager.py:68] detokenization recv req id 8 cost time 0.10952043533325195 s -DEBUG 06-24 20:14:39 [manager.py:391] Prefill Batch: batch_id=258402454133813239926229406276900726653, time:1750767279.870637s req_ids:[8] -DEBUG 06-24 20:14:39 [manager.py:391] -ERROR 06-24 20:14:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:205.98125457763672ms total_cost_time:206.0253620147705ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8320 prompt_cache_len:5151 prompt_cache_ratio:0.6191105769230769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 -DEBUG 06-24 20:14:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10741162300109863 s -INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.10940837860107422 s -DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=15768748149000020772775328294597903454, time:1750767280.0876532s req_ids:[8] -DEBUG 06-24 20:14:40 [manager.py:391] -ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:39 lightllm_req_id:8 first_token_cost:213.46163749694824ms total_cost_time:213.50502967834473ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8321 prompt_cache_len:5151 prompt_cache_ratio:0.6190361735368345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 -DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:40 [batch.py:51] router release req id 8 -INFO 06-24 20:14:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10871005058288574 s -INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.1106112003326416 s -DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=35748051260586873718890216062716901531, time:1750767280.30079s req_ids:[8] -DEBUG 06-24 20:14:40 [manager.py:391] -ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:206.08973503112793ms total_cost_time:206.13336563110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8322 prompt_cache_len:5151 prompt_cache_ratio:0.6189617880317232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 -DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s -INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.11020874977111816 s -DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=203062104970345147628927787627688872702, time:1750767280.513943s req_ids:[8] -DEBUG 06-24 20:14:40 [manager.py:391] -ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:205.20377159118652ms total_cost_time:205.24907112121582ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8323 prompt_cache_len:5151 prompt_cache_ratio:0.6188874204012976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 -DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10747337341308594 s -INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.10849165916442871 s -DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=92721954162718563247843241532194790696, time:1750767280.7225084s req_ids:[8] -DEBUG 06-24 20:14:40 [manager.py:391] -ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:185.90521812438965ms total_cost_time:185.94908714294434ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8324 prompt_cache_len:5151 prompt_cache_ratio:0.6188130706391158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 -DEBUG 06-24 20:14:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:40 [manager.py:224] router recive req id 8 cost time 0.10887598991394043 s -INFO 06-24 20:14:40 [manager.py:68] detokenization recv req id 8 cost time 0.1108696460723877 s -DEBUG 06-24 20:14:40 [manager.py:391] Prefill Batch: batch_id=27910674266167320054595452940333283810, time:1750767280.9121764s req_ids:[8] -DEBUG 06-24 20:14:40 [manager.py:391] -ERROR 06-24 20:14:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:40 lightllm_req_id:8 first_token_cost:201.6618251800537ms total_cost_time:201.76339149475098ms,out_token_counter:1 mean_per_token_cost_time: 0.10156631469726562ms prompt_token_num:8325 prompt_cache_len:5151 prompt_cache_ratio:0.6187387387387387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 -DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10860204696655273 s -INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.11075520515441895 s -DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=39104579483530487045217095644296521828, time:1750767281.119557s req_ids:[8] -DEBUG 06-24 20:14:41 [manager.py:391] -ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:206.68601989746094ms total_cost_time:206.72941207885742ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8326 prompt_cache_len:5151 prompt_cache_ratio:0.6186644246937305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 -DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10857677459716797 s -INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.11064720153808594 s -DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=127764584215628286168403979539007149501, time:1750767281.333407s req_ids:[8] -DEBUG 06-24 20:14:41 [manager.py:391] -ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:210.10112762451172ms total_cost_time:210.1449966430664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8327 prompt_cache_len:5151 prompt_cache_ratio:0.6185901284976583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 -DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10875415802001953 s -INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.11067628860473633 s -DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=4958469037022221137828440278288455800, time:1750767281.5466924s req_ids:[8] -DEBUG 06-24 20:14:41 [manager.py:391] -ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:210.4480266571045ms total_cost_time:210.4935646057129ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8328 prompt_cache_len:5151 prompt_cache_ratio:0.6185158501440923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 -DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10725069046020508 s -INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.10926938056945801 s -DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=272250437461255759268203854122825154056, time:1750767281.7616775s req_ids:[8] -DEBUG 06-24 20:14:41 [manager.py:391] -ERROR 06-24 20:14:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:210.14738082885742ms total_cost_time:210.1914882659912ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8329 prompt_cache_len:5151 prompt_cache_ratio:0.6184415896266058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 -DEBUG 06-24 20:14:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:41 [manager.py:224] router recive req id 8 cost time 0.10744261741638184 s -INFO 06-24 20:14:41 [manager.py:68] detokenization recv req id 8 cost time 0.10949468612670898 s -DEBUG 06-24 20:14:41 [manager.py:391] Prefill Batch: batch_id=189423115506842996907537481977005635238, time:1750767281.9772346s req_ids:[8] -DEBUG 06-24 20:14:41 [manager.py:391] -DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:41 lightllm_req_id:8 first_token_cost:203.86242866516113ms total_cost_time:203.90605926513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8330 prompt_cache_len:5151 prompt_cache_ratio:0.6183673469387755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 -DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.1076972484588623 s -INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.10976147651672363 s -DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=231737037246298845956164207363075937542, time:1750767282.189191s req_ids:[8] -DEBUG 06-24 20:14:42 [manager.py:391] -ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:205.69658279418945ms total_cost_time:205.74069023132324ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8331 prompt_cache_len:5151 prompt_cache_ratio:0.6182931220741807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 -DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.10817527770996094 s -INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.11019253730773926 s -DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=204833891265901063353923436250621708968, time:1750767282.3999195s req_ids:[8] -DEBUG 06-24 20:14:42 [manager.py:391] -ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:364.6695613861084ms total_cost_time:364.7119998931885ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8332 prompt_cache_len:5151 prompt_cache_ratio:0.6182189150264042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 -DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.10700535774230957 s -INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.10903620719909668 s -DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=153021718533794694911774247721165952692, time:1750767282.766928s req_ids:[8] -DEBUG 06-24 20:14:42 [manager.py:391] -ERROR 06-24 20:14:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:202.74639129638672ms total_cost_time:202.7895450592041ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8333 prompt_cache_len:5151 prompt_cache_ratio:0.6181447257890316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 -DEBUG 06-24 20:14:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:42 [manager.py:224] router recive req id 8 cost time 0.10812854766845703 s -INFO 06-24 20:14:42 [manager.py:68] detokenization recv req id 8 cost time 0.11017179489135742 s -DEBUG 06-24 20:14:42 [manager.py:391] Prefill Batch: batch_id=307875200559235023189034847413241857077, time:1750767282.977633s req_ids:[8] -DEBUG 06-24 20:14:42 [manager.py:391] -ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:42 lightllm_req_id:8 first_token_cost:211.81178092956543ms total_cost_time:211.85302734375ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8334 prompt_cache_len:5151 prompt_cache_ratio:0.6180705543556515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 -DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.1084592342376709 s -INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.11037611961364746 s -DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=244938656511849479856530537959277250774, time:1750767283.1920686s req_ids:[8] -DEBUG 06-24 20:14:43 [manager.py:391] -ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:202.30746269226074ms total_cost_time:202.35013961791992ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8335 prompt_cache_len:5151 prompt_cache_ratio:0.6179964007198561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 -DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.10822558403015137 s -INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.1102132797241211 s -DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=30774481689037904860931704222377865461, time:1750767283.4027514s req_ids:[8] -DEBUG 06-24 20:14:43 [manager.py:391] -ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:205.64770698547363ms total_cost_time:205.6906223297119ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8336 prompt_cache_len:5151 prompt_cache_ratio:0.6179222648752399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 -DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.10813593864440918 s -INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.11007213592529297 s -DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=339001691673391085264550844361019495214, time:1750767283.6140687s req_ids:[8] -DEBUG 06-24 20:14:43 [manager.py:391] -ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:212.65101432800293ms total_cost_time:212.69536018371582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8337 prompt_cache_len:5151 prompt_cache_ratio:0.6178481468154012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 -DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:43 [batch.py:51] router release req id 8 -INFO 06-24 20:14:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:43 [manager.py:224] router recive req id 8 cost time 0.10802412033081055 s -INFO 06-24 20:14:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100313663482666 s -DEBUG 06-24 20:14:43 [manager.py:391] Prefill Batch: batch_id=261940475427338622658296194332473660030, time:1750767283.8327336s req_ids:[8] -DEBUG 06-24 20:14:43 [manager.py:391] -ERROR 06-24 20:14:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:200.1497745513916ms total_cost_time:200.19268989562988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8338 prompt_cache_len:5151 prompt_cache_ratio:0.617774046533941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 -DEBUG 06-24 20:14:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10875749588012695 s -INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.1107792854309082 s -DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=337781020124583152190153779169000766806, time:1750767284.0380256s req_ids:[8] -DEBUG 06-24 20:14:44 [manager.py:391] -ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:43 lightllm_req_id:8 first_token_cost:203.39345932006836ms total_cost_time:203.43661308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8339 prompt_cache_len:5151 prompt_cache_ratio:0.6176999640244634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 -DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10731673240661621 s -INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.1092982292175293 s -DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=273895288870707467973267963694377667971, time:1750767284.2479763s req_ids:[8] -DEBUG 06-24 20:14:44 [manager.py:391] -ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:209.8081111907959ms total_cost_time:209.85102653503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8340 prompt_cache_len:5151 prompt_cache_ratio:0.6176258992805755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 -DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10702204704284668 s -INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.10901165008544922 s -DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=73239091614846805757568216182305887005, time:1750767284.4646623s req_ids:[8] -DEBUG 06-24 20:14:44 [manager.py:391] -ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:207.46827125549316ms total_cost_time:207.51142501831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8341 prompt_cache_len:5151 prompt_cache_ratio:0.6175518522958878 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 -DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10699844360351562 s -INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.10890674591064453 s -DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=276057004754196455625317364162476827661, time:1750767284.676558s req_ids:[8] -DEBUG 06-24 20:14:44 [manager.py:391] -ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:208.73117446899414ms total_cost_time:208.7724208831787ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8342 prompt_cache_len:5151 prompt_cache_ratio:0.6174778230640134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 -DEBUG 06-24 20:14:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:44 [manager.py:224] router recive req id 8 cost time 0.10847115516662598 s -INFO 06-24 20:14:44 [manager.py:68] detokenization recv req id 8 cost time 0.11041617393493652 s -DEBUG 06-24 20:14:44 [manager.py:391] Prefill Batch: batch_id=167386728822424099788423938465730215994, time:1750767284.8904045s req_ids:[8] -DEBUG 06-24 20:14:44 [manager.py:391] -ERROR 06-24 20:14:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:211.11011505126953ms total_cost_time:211.1525535583496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8343 prompt_cache_len:5151 prompt_cache_ratio:0.6174038115785688 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 -DEBUG 06-24 20:14:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:45 [manager.py:224] router recive req id 8 cost time 0.10886025428771973 s -INFO 06-24 20:14:45 [manager.py:68] detokenization recv req id 8 cost time 0.11068582534790039 s -DEBUG 06-24 20:14:45 [manager.py:391] Prefill Batch: batch_id=248199366923918170706963929973859510082, time:1750767285.1048195s req_ids:[8] -DEBUG 06-24 20:14:45 [manager.py:391] -ERROR 06-24 20:14:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:44 lightllm_req_id:8 first_token_cost:367.0048713684082ms total_cost_time:367.0461177825928ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8344 prompt_cache_len:5151 prompt_cache_ratio:0.6173298178331735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 -DEBUG 06-24 20:14:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:45 [manager.py:224] router recive req id 8 cost time 0.10794305801391602 s -INFO 06-24 20:14:45 [manager.py:68] detokenization recv req id 8 cost time 0.11001968383789062 s -DEBUG 06-24 20:14:45 [manager.py:391] Prefill Batch: batch_id=337267696374520296586378518285888365504, time:1750767285.47608s req_ids:[8] -DEBUG 06-24 20:14:45 [manager.py:391] -ERROR 06-24 20:14:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 first_token_cost:201.66969299316406ms total_cost_time:201.71093940734863ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8345 prompt_cache_len:5151 prompt_cache_ratio:0.61725584182145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 -DEBUG 06-24 20:14:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:45 [manager.py:224] router recive req id 8 cost time 0.10873889923095703 s -INFO 06-24 20:14:45 [manager.py:68] detokenization recv req id 8 cost time 0.11086297035217285 s -DEBUG 06-24 20:14:45 [manager.py:391] Prefill Batch: batch_id=99011530542658766002368443279198558949, time:1750767285.6876237s req_ids:[8] -DEBUG 06-24 20:14:45 [manager.py:391] -ERROR 06-24 20:14:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:45 lightllm_req_id:8 first_token_cost:224.52926635742188ms total_cost_time:224.57051277160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8346 prompt_cache_len:5151 prompt_cache_ratio:0.6171818835370237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:14:46 [manager.py:106] timer detokenize batch cost time 1095.607042312622 ms -INFO 06-24 20:14:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:46 lightllm_req_id:8 -DEBUG 06-24 20:14:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.1082310676574707 s -INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.11037611961364746 s -DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=160364814654248776765176563932240351650, time:1750767287.0144155s req_ids:[8] -DEBUG 06-24 20:14:47 [manager.py:391] -ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:46 lightllm_req_id:8 first_token_cost:215.3482437133789ms total_cost_time:215.3935432434082ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8347 prompt_cache_len:5151 prompt_cache_ratio:0.6171079429735234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 -DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10779523849487305 s -INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.1098337173461914 s -DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=144620008047979263520750210776378677178, time:1750767287.2318034s req_ids:[8] -DEBUG 06-24 20:14:47 [manager.py:391] -ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:207.30209350585938ms total_cost_time:207.35979080200195ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:8348 prompt_cache_len:5151 prompt_cache_ratio:0.6170340201245808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 -DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s -INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.11042547225952148 s -DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=160944829997916299009439741916119532297, time:1750767287.453559s req_ids:[8] -DEBUG 06-24 20:14:47 [manager.py:391] -ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:216.45808219909668ms total_cost_time:216.49885177612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:8349 prompt_cache_len:5151 prompt_cache_ratio:0.6169601149838304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 -DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10754942893981934 s -INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.10944414138793945 s -DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=75719440186970097660606079336839045869, time:1750767287.6681027s req_ids:[8] -DEBUG 06-24 20:14:47 [manager.py:391] -ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:207.98587799072266ms total_cost_time:208.03141593933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8350 prompt_cache_len:5151 prompt_cache_ratio:0.6168862275449102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 -DEBUG 06-24 20:14:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:47 [manager.py:224] router recive req id 8 cost time 0.10752749443054199 s -INFO 06-24 20:14:47 [manager.py:68] detokenization recv req id 8 cost time 0.10954117774963379 s -DEBUG 06-24 20:14:47 [manager.py:391] Prefill Batch: batch_id=218792159429198702307235322663060631927, time:1750767287.8833733s req_ids:[8] -DEBUG 06-24 20:14:47 [manager.py:391] -ERROR 06-24 20:14:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:207.15999603271484ms total_cost_time:207.20434188842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8351 prompt_cache_len:5151 prompt_cache_ratio:0.6168123578014609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 -DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10816121101379395 s -INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.11015844345092773 s -DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=254979686932485161415815347422465485103, time:1750767288.1009836s req_ids:[8] -DEBUG 06-24 20:14:48 [manager.py:391] -ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:47 lightllm_req_id:8 first_token_cost:211.17210388183594ms total_cost_time:211.21525764465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8352 prompt_cache_len:5151 prompt_cache_ratio:0.6167385057471264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 -DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10988593101501465 s -INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.11238360404968262 s -DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=297813054370892844616543908556902429450, time:1750767288.3134503s req_ids:[8] -DEBUG 06-24 20:14:48 [manager.py:391] -ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:206.6657543182373ms total_cost_time:206.7110538482666ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8353 prompt_cache_len:5151 prompt_cache_ratio:0.6166646713755537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 -DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10724020004272461 s -INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.10899758338928223 s -DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=185173315891996855591033788824963142451, time:1750767288.5246487s req_ids:[8] -DEBUG 06-24 20:14:48 [manager.py:391] -ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:172.7902889251709ms total_cost_time:172.8341579437256ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8354 prompt_cache_len:5151 prompt_cache_ratio:0.6165908546803927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 -DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:48 [manager.py:224] router recive req id 8 cost time 0.10793781280517578 s -INFO 06-24 20:14:48 [manager.py:68] detokenization recv req id 8 cost time 0.10980987548828125 s -DEBUG 06-24 20:14:48 [manager.py:391] Prefill Batch: batch_id=120665254578290436136896004776420250263, time:1750767288.7016625s req_ids:[8] -DEBUG 06-24 20:14:48 [manager.py:391] -DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:14:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 33182.379 tokens/s -DEBUG 06-24 20:14:48 [stats.py:37] Avg prompt tokens throughput: 33174.420 tokens/s -DEBUG 06-24 20:14:48 [stats.py:37] Avg generate tokens throughput: 7.960 tokens/s -ERROR 06-24 20:14:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:160.81666946411133ms total_cost_time:160.8600616455078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8355 prompt_cache_len:5151 prompt_cache_ratio:0.6165170556552962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 -DEBUG 06-24 20:14:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.3103959560394287 s -INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.3124523162841797 s -DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=297253794906935847718989086604809386366, time:1750767289.072066s req_ids:[8] -DEBUG 06-24 20:14:49 [manager.py:391] -ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:48 lightllm_req_id:8 first_token_cost:414.48330879211426ms total_cost_time:414.52527046203613ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8356 prompt_cache_len:5151 prompt_cache_ratio:0.6164432742939205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 -DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10755109786987305 s -INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.10960149765014648 s -DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=96331022194745966552273779111210363613, time:1750767289.290974s req_ids:[8] -DEBUG 06-24 20:14:49 [manager.py:391] -ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:204.2560577392578ms total_cost_time:204.2980194091797ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8357 prompt_cache_len:5151 prompt_cache_ratio:0.6163695105899246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 -DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s -INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.11016678810119629 s -DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=168997122465318584218215688668621920189, time:1750767289.5015683s req_ids:[8] -DEBUG 06-24 20:14:49 [manager.py:391] -ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:207.15618133544922ms total_cost_time:207.2007656097412ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8358 prompt_cache_len:5151 prompt_cache_ratio:0.6162957645369705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 -DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10836982727050781 s -INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.11050152778625488 s -DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=296702129191292968734477058715179826979, time:1750767289.7148378s req_ids:[8] -DEBUG 06-24 20:14:49 [manager.py:391] -ERROR 06-24 20:14:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:210.57605743408203ms total_cost_time:210.63685417175293ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8359 prompt_cache_len:5151 prompt_cache_ratio:0.6162220361287235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 -DEBUG 06-24 20:14:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:49 [manager.py:224] router recive req id 8 cost time 0.10753488540649414 s -INFO 06-24 20:14:49 [manager.py:68] detokenization recv req id 8 cost time 0.10964584350585938 s -DEBUG 06-24 20:14:49 [manager.py:391] Prefill Batch: batch_id=115688155636789223614638512260721104005, time:1750767289.926333s req_ids:[8] -DEBUG 06-24 20:14:49 [manager.py:391] -ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:49 lightllm_req_id:8 first_token_cost:204.26225662231445ms total_cost_time:204.30755615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8360 prompt_cache_len:5151 prompt_cache_ratio:0.6161483253588517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 -DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s -INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.10969114303588867 s -DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=264133553713560860929982643855878300920, time:1750767290.1420069s req_ids:[8] -DEBUG 06-24 20:14:50 [manager.py:391] -DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:167.55223274230957ms total_cost_time:167.59395599365234ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8361 prompt_cache_len:5151 prompt_cache_ratio:0.6160746322210262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 -DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10890054702758789 s -INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.11092042922973633 s -DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=57929572887054814088646815737964715958, time:1750767290.311229s req_ids:[8] -DEBUG 06-24 20:14:50 [manager.py:391] -ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:200.65665245056152ms total_cost_time:200.700044631958ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8362 prompt_cache_len:5151 prompt_cache_ratio:0.6160009567089213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 -DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10888075828552246 s -INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.11140680313110352 s -DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=168627028583777841942655497440907742276, time:1750767290.5188572s req_ids:[8] -DEBUG 06-24 20:14:50 [manager.py:391] -ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:200.39033889770508ms total_cost_time:200.4373073577881ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8363 prompt_cache_len:5151 prompt_cache_ratio:0.6159272988162143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 -DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10813260078430176 s -INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.10934662818908691 s -DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=325934028108119775500361158950074120832, time:1750767290.7251697s req_ids:[8] -DEBUG 06-24 20:14:50 [manager.py:391] -ERROR 06-24 20:14:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:206.4075469970703ms total_cost_time:206.4497470855713ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8364 prompt_cache_len:5151 prompt_cache_ratio:0.6158536585365854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 -DEBUG 06-24 20:14:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:50 [manager.py:224] router recive req id 8 cost time 0.10829019546508789 s -INFO 06-24 20:14:50 [manager.py:68] detokenization recv req id 8 cost time 0.11039328575134277 s -DEBUG 06-24 20:14:50 [manager.py:391] Prefill Batch: batch_id=308675889738169970898209975872523305190, time:1750767290.9383552s req_ids:[8] -DEBUG 06-24 20:14:50 [manager.py:391] -ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:50 lightllm_req_id:8 first_token_cost:207.61847496032715ms total_cost_time:207.65995979309082ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8365 prompt_cache_len:5151 prompt_cache_ratio:0.6157800358637179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 -DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.3103630542755127 s -INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.31244659423828125 s -DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=246077629767251498303869218290539575363, time:1750767291.350653s req_ids:[8] -DEBUG 06-24 20:14:51 [manager.py:391] -ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:406.74543380737305ms total_cost_time:406.7883491516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8366 prompt_cache_len:5151 prompt_cache_ratio:0.6157064307912982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 -DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.10696649551391602 s -INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.10817837715148926 s -DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=21434543799022693081551161487831978689, time:1750767291.5654447s req_ids:[8] -DEBUG 06-24 20:14:51 [manager.py:391] -DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:209.69462394714355ms total_cost_time:209.73873138427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8367 prompt_cache_len:5151 prompt_cache_ratio:0.6156328433130154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 -DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.10743880271911621 s -INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.10864853858947754 s -DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=151796774140644941281476469251469813652, time:1750767291.7821524s req_ids:[8] -DEBUG 06-24 20:14:51 [manager.py:391] -ERROR 06-24 20:14:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:211.25221252441406ms total_cost_time:211.29512786865234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8368 prompt_cache_len:5151 prompt_cache_ratio:0.6155592734225621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 -DEBUG 06-24 20:14:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:51 [manager.py:224] router recive req id 8 cost time 0.1068124771118164 s -INFO 06-24 20:14:51 [manager.py:68] detokenization recv req id 8 cost time 0.10881900787353516 s -DEBUG 06-24 20:14:51 [manager.py:391] Prefill Batch: batch_id=261262458726762182673500422414928363311, time:1750767291.9968534s req_ids:[8] -DEBUG 06-24 20:14:51 [manager.py:391] -ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:51 lightllm_req_id:8 first_token_cost:209.4264030456543ms total_cost_time:209.46907997131348ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8369 prompt_cache_len:5151 prompt_cache_ratio:0.6154857211136336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 -DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.1084146499633789 s -INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.10968708992004395 s -DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=169319382204054847427064048251158443429, time:1750767292.2101078s req_ids:[8] -DEBUG 06-24 20:14:52 [manager.py:391] -ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:202.52084732055664ms total_cost_time:202.56423950195312ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8370 prompt_cache_len:5151 prompt_cache_ratio:0.6154121863799283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 -DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.1072540283203125 s -INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.10987067222595215 s -DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=231120673683649118079756514706535394070, time:1750767292.4177868s req_ids:[8] -DEBUG 06-24 20:14:52 [manager.py:391] -ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:207.91387557983398ms total_cost_time:207.95845985412598ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8371 prompt_cache_len:5151 prompt_cache_ratio:0.6153386692151476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 -DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.10770034790039062 s -INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.10880279541015625 s -DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=328689474952543886466471767609274016914, time:1750767292.6460326s req_ids:[8] -DEBUG 06-24 20:14:52 [manager.py:391] -ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:225.88253021240234ms total_cost_time:225.92902183532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8372 prompt_cache_len:5151 prompt_cache_ratio:0.6152651696129957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 -DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:52 [manager.py:224] router recive req id 8 cost time 0.11045432090759277 s -INFO 06-24 20:14:52 [manager.py:68] detokenization recv req id 8 cost time 0.11163544654846191 s -DEBUG 06-24 20:14:52 [manager.py:391] Prefill Batch: batch_id=267770518497033857782039773375141812507, time:1750767292.8618057s req_ids:[8] -DEBUG 06-24 20:14:52 [manager.py:391] -ERROR 06-24 20:14:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:204.7557830810547ms total_cost_time:204.79965209960938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8373 prompt_cache_len:5151 prompt_cache_ratio:0.6151916875671802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 -DEBUG 06-24 20:14:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10892891883850098 s -INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.11017274856567383 s -DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=227773135032657690402595550675335516755, time:1750767293.0695074s req_ids:[8] -DEBUG 06-24 20:14:53 [manager.py:391] -ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:52 lightllm_req_id:8 first_token_cost:200.50430297851562ms total_cost_time:200.54960250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8374 prompt_cache_len:5151 prompt_cache_ratio:0.6151182230714115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 -INFO 06-24 20:14:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:14:53 [statics_utils.py:24] mean first cost: 228.9509126247103 ms -INFO 06-24 20:14:53 [statics_utils.py:24] mean per token cost: 0.0805954998983492 ms -DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10893130302429199 s -INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.11007571220397949 s -DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=229087766411493737261611588252799228858, time:1750767293.2928994s req_ids:[8] -DEBUG 06-24 20:14:53 [manager.py:391] -ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:216.30406379699707ms total_cost_time:216.35890007019043ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:8375 prompt_cache_len:5151 prompt_cache_ratio:0.615044776119403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 -DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10812067985534668 s -INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.10922050476074219 s -DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=129537030089034207998032222786913640221, time:1750767293.5096447s req_ids:[8] -DEBUG 06-24 20:14:53 [manager.py:391] -ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:210.94822883605957ms total_cost_time:210.99448204040527ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8376 prompt_cache_len:5151 prompt_cache_ratio:0.6149713467048711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 -DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10728216171264648 s -INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.10836052894592285 s -DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=165266274327625847730226731789770395319, time:1750767293.7259486s req_ids:[8] -DEBUG 06-24 20:14:53 [manager.py:391] -ERROR 06-24 20:14:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:211.5194797515869ms total_cost_time:211.5652561187744ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8377 prompt_cache_len:5151 prompt_cache_ratio:0.6148979348215352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 -DEBUG 06-24 20:14:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:53 [manager.py:224] router recive req id 8 cost time 0.10747385025024414 s -INFO 06-24 20:14:53 [manager.py:68] detokenization recv req id 8 cost time 0.10867190361022949 s -DEBUG 06-24 20:14:53 [manager.py:391] Prefill Batch: batch_id=258691949810301513525959011385237201284, time:1750767293.9384341s req_ids:[8] -DEBUG 06-24 20:14:53 [manager.py:391] -ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:53 lightllm_req_id:8 first_token_cost:208.18161964416504ms total_cost_time:208.22715759277344ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8378 prompt_cache_len:5151 prompt_cache_ratio:0.6148245404631177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 -DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:54 [manager.py:224] router recive req id 8 cost time 0.10882234573364258 s -INFO 06-24 20:14:54 [manager.py:68] detokenization recv req id 8 cost time 0.11013221740722656 s -DEBUG 06-24 20:14:54 [manager.py:391] Prefill Batch: batch_id=191262296906833126698612474128247323370, time:1750767294.1518295s req_ids:[8] -DEBUG 06-24 20:14:54 [manager.py:391] -ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:206.1898708343506ms total_cost_time:206.23350143432617ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8379 prompt_cache_len:5151 prompt_cache_ratio:0.614751163623344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 -DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:54 [manager.py:224] router recive req id 8 cost time 0.31197428703308105 s -INFO 06-24 20:14:54 [manager.py:68] detokenization recv req id 8 cost time 0.3131895065307617 s -DEBUG 06-24 20:14:54 [manager.py:391] Prefill Batch: batch_id=240628571668215059209245563739069729640, time:1750767294.568028s req_ids:[8] -DEBUG 06-24 20:14:54 [manager.py:391] -ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:427.8912544250488ms total_cost_time:427.9353618621826ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8380 prompt_cache_len:5151 prompt_cache_ratio:0.6146778042959428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 -DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:54 [manager.py:224] router recive req id 8 cost time 0.10899066925048828 s -INFO 06-24 20:14:54 [manager.py:68] detokenization recv req id 8 cost time 0.11098504066467285 s -DEBUG 06-24 20:14:54 [manager.py:391] Prefill Batch: batch_id=54355864517937947992768217391971135850, time:1750767294.8007379s req_ids:[8] -DEBUG 06-24 20:14:54 [manager.py:391] -ERROR 06-24 20:14:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:210.82782745361328ms total_cost_time:210.87241172790527ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8381 prompt_cache_len:5151 prompt_cache_ratio:0.6146044624746451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 -DEBUG 06-24 20:14:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10777497291564941 s -INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.10876846313476562 s -DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=263781650599547323521369778180731406609, time:1750767295.009364s req_ids:[8] -DEBUG 06-24 20:14:55 [manager.py:391] -ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:54 lightllm_req_id:8 first_token_cost:198.96340370178223ms total_cost_time:199.0053653717041ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8382 prompt_cache_len:5151 prompt_cache_ratio:0.6145311381531854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 -DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10738754272460938 s -INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.10932254791259766 s -DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=124992048622619558696193766618707086575, time:1750767295.2172015s req_ids:[8] -DEBUG 06-24 20:14:55 [manager.py:391] -ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:199.78928565979004ms total_cost_time:199.83267784118652ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8383 prompt_cache_len:5151 prompt_cache_ratio:0.6144578313253012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 -DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10921335220336914 s -INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.11159729957580566 s -DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=193523000075637311573989991248064101422, time:1750767295.4257834s req_ids:[8] -DEBUG 06-24 20:14:55 [manager.py:391] -ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:202.20661163330078ms total_cost_time:202.24928855895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8384 prompt_cache_len:5151 prompt_cache_ratio:0.6143845419847328 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 -DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10804438591003418 s -INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.11002802848815918 s -DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=8907332747248072743063263310547633692, time:1750767295.62989s req_ids:[8] -DEBUG 06-24 20:14:55 [manager.py:391] -ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:203.9949893951416ms total_cost_time:204.03814315795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8385 prompt_cache_len:5151 prompt_cache_ratio:0.6143112701252236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 -DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:55 [manager.py:224] router recive req id 8 cost time 0.10847043991088867 s -INFO 06-24 20:14:55 [manager.py:68] detokenization recv req id 8 cost time 0.11044430732727051 s -DEBUG 06-24 20:14:55 [manager.py:391] Prefill Batch: batch_id=240996194893828963635763653932288036861, time:1750767295.838813s req_ids:[8] -DEBUG 06-24 20:14:55 [manager.py:391] -ERROR 06-24 20:14:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:205.45530319213867ms total_cost_time:205.49726486206055ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8386 prompt_cache_len:5151 prompt_cache_ratio:0.61423801574052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 -DEBUG 06-24 20:14:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10777068138122559 s -INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.10971331596374512 s -DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=37632074984236548834336501665566198293, time:1750767296.0514276s req_ids:[8] -DEBUG 06-24 20:14:56 [manager.py:391] -ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:55 lightllm_req_id:8 first_token_cost:208.6179256439209ms total_cost_time:208.6637020111084ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8387 prompt_cache_len:5151 prompt_cache_ratio:0.614164778824371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 -DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10894060134887695 s -INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.11086487770080566 s -DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=248851822877703466818619519155165118854, time:1750767296.261816s req_ids:[8] -DEBUG 06-24 20:14:56 [manager.py:391] -ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:203.5691738128662ms total_cost_time:203.6142349243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8388 prompt_cache_len:5151 prompt_cache_ratio:0.6140915593705293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 -DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10698580741882324 s -INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.10884881019592285 s -DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=56177992392048865470351637431435911958, time:1750767296.4714868s req_ids:[8] -DEBUG 06-24 20:14:56 [manager.py:391] -ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:197.18146324157715ms total_cost_time:197.22390174865723ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8389 prompt_cache_len:5151 prompt_cache_ratio:0.61401835737275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 -DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.10841798782348633 s -INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.11037135124206543 s -DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=232692198900104402470385540106632706254, time:1750767296.6851459s req_ids:[8] -DEBUG 06-24 20:14:56 [manager.py:391] -ERROR 06-24 20:14:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:208.77933502197266ms total_cost_time:208.82296562194824ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8390 prompt_cache_len:5151 prompt_cache_ratio:0.6139451728247914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 -DEBUG 06-24 20:14:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:56 [manager.py:224] router recive req id 8 cost time 0.1071012020111084 s -INFO 06-24 20:14:56 [manager.py:68] detokenization recv req id 8 cost time 0.10942292213439941 s -DEBUG 06-24 20:14:56 [manager.py:391] Prefill Batch: batch_id=290898094398610724015060743423334700994, time:1750767296.897478s req_ids:[8] -DEBUG 06-24 20:14:56 [manager.py:391] -ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:56 lightllm_req_id:8 first_token_cost:370.6681728363037ms total_cost_time:370.7118034362793ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8391 prompt_cache_len:5151 prompt_cache_ratio:0.6138720057204148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 -DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.10814356803894043 s -INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.11028432846069336 s -DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=118080281514754145619810589840061686976, time:1750767297.266583s req_ids:[8] -DEBUG 06-24 20:14:57 [manager.py:391] -ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:205.23667335510254ms total_cost_time:205.27911186218262ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8392 prompt_cache_len:5151 prompt_cache_ratio:0.6137988560533841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 -DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.1081233024597168 s -INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s -DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=270615625680643213940768743007461210296, time:1750767297.4789965s req_ids:[8] -DEBUG 06-24 20:14:57 [manager.py:391] -ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:204.4045925140381ms total_cost_time:204.44869995117188ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8393 prompt_cache_len:5151 prompt_cache_ratio:0.6137257238174669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 -DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.10827851295471191 s -INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.1103219985961914 s -DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=208165520351236135149491979903072636517, time:1750767297.6904929s req_ids:[8] -DEBUG 06-24 20:14:57 [manager.py:391] -ERROR 06-24 20:14:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:203.9508819580078ms total_cost_time:203.9966583251953ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8394 prompt_cache_len:5151 prompt_cache_ratio:0.6136526090064331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 -DEBUG 06-24 20:14:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:57 [manager.py:224] router recive req id 8 cost time 0.10821247100830078 s -INFO 06-24 20:14:57 [manager.py:68] detokenization recv req id 8 cost time 0.11022233963012695 s -DEBUG 06-24 20:14:57 [manager.py:391] Prefill Batch: batch_id=325989771635030704425533231108756568641, time:1750767297.907327s req_ids:[8] -DEBUG 06-24 20:14:57 [manager.py:391] -ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:57 lightllm_req_id:8 first_token_cost:221.46868705749512ms total_cost_time:221.53377532958984ms,out_token_counter:1 mean_per_token_cost_time: 0.06508827209472656ms prompt_token_num:8395 prompt_cache_len:5151 prompt_cache_ratio:0.613579511614056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 -DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10776233673095703 s -INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.10970234870910645 s -DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=61790169856867526185735406743537073462, time:1750767298.1492138s req_ids:[8] -DEBUG 06-24 20:14:58 [manager.py:391] -ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:228.38616371154785ms total_cost_time:228.43122482299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8396 prompt_cache_len:5151 prompt_cache_ratio:0.6135064316341114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 -DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10759520530700684 s -INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.10950136184692383 s -DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=337543175081435050361682484932943899218, time:1750767298.3638692s req_ids:[8] -DEBUG 06-24 20:14:58 [manager.py:391] -ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:206.9075107574463ms total_cost_time:206.9711685180664ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:8397 prompt_cache_len:5151 prompt_cache_ratio:0.6134333690603787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 -DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10845017433166504 s -INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.11011910438537598 s -DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=17590771758064458560663809220335429279, time:1750767298.5764377s req_ids:[8] -DEBUG 06-24 20:14:58 [manager.py:391] -DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:14:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:168.1840419769287ms total_cost_time:168.2283878326416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8398 prompt_cache_len:5151 prompt_cache_ratio:0.6133603238866396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 -DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10730242729187012 s -INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.1091454029083252 s -DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=55275128846947129734103868336165707921, time:1750767298.7486298s req_ids:[8] -DEBUG 06-24 20:14:58 [manager.py:391] -DEBUG 06-24 20:14:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 36697.345 tokens/s -DEBUG 06-24 20:14:58 [stats.py:37] Avg prompt tokens throughput: 36688.587 tokens/s -DEBUG 06-24 20:14:58 [stats.py:37] Avg generate tokens throughput: 8.759 tokens/s -ERROR 06-24 20:14:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:200.87027549743652ms total_cost_time:200.913667678833ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8399 prompt_cache_len:5151 prompt_cache_ratio:0.6132872961066793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 -DEBUG 06-24 20:14:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:58 [manager.py:224] router recive req id 8 cost time 0.10864400863647461 s -INFO 06-24 20:14:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052227020263672 s -DEBUG 06-24 20:14:58 [manager.py:391] Prefill Batch: batch_id=54161961121955347321221076462912968396, time:1750767298.953305s req_ids:[8] -DEBUG 06-24 20:14:58 [manager.py:391] -ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:58 lightllm_req_id:8 first_token_cost:201.9021511077881ms total_cost_time:201.94649696350098ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8400 prompt_cache_len:5151 prompt_cache_ratio:0.6132142857142857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 -DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10792207717895508 s -INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.11030006408691406 s -DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=59594178784249676758607097844937174089, time:1750767299.1693125s req_ids:[8] -DEBUG 06-24 20:14:59 [manager.py:391] -ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:373.40712547302246ms total_cost_time:373.45194816589355ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8401 prompt_cache_len:5151 prompt_cache_ratio:0.6131412927032496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 -DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10827875137329102 s -INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.11060738563537598 s -DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=10241095929983728542422852066010047376, time:1750767299.5401566s req_ids:[8] -DEBUG 06-24 20:14:59 [manager.py:391] -ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:204.0271759033203ms total_cost_time:204.0688991546631ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8402 prompt_cache_len:5151 prompt_cache_ratio:0.6130683170673649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 -DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10727477073669434 s -INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.10913205146789551 s -DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=224034263545368386471211661971263470861, time:1750767299.7539341s req_ids:[8] -DEBUG 06-24 20:14:59 [manager.py:391] -ERROR 06-24 20:14:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:14:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:14:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:177.87623405456543ms total_cost_time:177.93011665344238ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:8403 prompt_cache_len:5151 prompt_cache_ratio:0.6129953588004284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:14:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 -DEBUG 06-24 20:14:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:14:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:14:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:14:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:14:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:14:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:14:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:14:59 [manager.py:224] router recive req id 8 cost time 0.10712265968322754 s -INFO 06-24 20:14:59 [manager.py:68] detokenization recv req id 8 cost time 0.10819649696350098 s -DEBUG 06-24 20:14:59 [manager.py:391] Prefill Batch: batch_id=237413407909576479746994851068780530194, time:1750767299.934086s req_ids:[8] -DEBUG 06-24 20:14:59 [manager.py:391] -ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:14:59 lightllm_req_id:8 first_token_cost:196.30885124206543ms total_cost_time:196.35343551635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8404 prompt_cache_len:5151 prompt_cache_ratio:0.6129224178962399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 -DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.1076807975769043 s -INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.10956597328186035 s -DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=308123753582056048531065578681317644641, time:1750767300.1434653s req_ids:[8] -DEBUG 06-24 20:15:00 [manager.py:391] -ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:207.48376846313477ms total_cost_time:207.52739906311035ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8405 prompt_cache_len:5151 prompt_cache_ratio:0.612849494348602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 -DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.10744905471801758 s -INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.10932421684265137 s -DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=137197264246329344014321356755399205236, time:1750767300.3545356s req_ids:[8] -DEBUG 06-24 20:15:00 [manager.py:391] -ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:209.8677158355713ms total_cost_time:209.91086959838867ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8406 prompt_cache_len:5151 prompt_cache_ratio:0.6127765881513205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 -DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.1072547435760498 s -INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.10924887657165527 s -DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=101885760199815517281230032851123741432, time:1750767300.5690255s req_ids:[8] -DEBUG 06-24 20:15:00 [manager.py:391] -ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:203.643798828125ms total_cost_time:203.6881446838379ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8407 prompt_cache_len:5151 prompt_cache_ratio:0.6127036992982039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 -DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.10894060134887695 s -INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.11147570610046387 s -DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=14885947683403805306594203694629827631, time:1750767300.7812178s req_ids:[8] -DEBUG 06-24 20:15:00 [manager.py:391] -ERROR 06-24 20:15:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:209.0001106262207ms total_cost_time:209.04278755187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8408 prompt_cache_len:5151 prompt_cache_ratio:0.6126308277830638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 -DEBUG 06-24 20:15:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:00 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s -INFO 06-24 20:15:00 [manager.py:68] detokenization recv req id 8 cost time 0.11019206047058105 s -DEBUG 06-24 20:15:00 [manager.py:391] Prefill Batch: batch_id=3223486102555821366067162042301857276, time:1750767300.9932365s req_ids:[8] -DEBUG 06-24 20:15:00 [manager.py:391] -ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:00 lightllm_req_id:8 first_token_cost:208.50658416748047ms total_cost_time:208.54926109313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8409 prompt_cache_len:5151 prompt_cache_ratio:0.6125579735997146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 -DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:01 [manager.py:224] router recive req id 8 cost time 0.3110232353210449 s -INFO 06-24 20:15:01 [manager.py:68] detokenization recv req id 8 cost time 0.312283992767334 s -DEBUG 06-24 20:15:01 [manager.py:391] Prefill Batch: batch_id=129538661814799410571869211096606593282, time:1750767301.4131074s req_ids:[8] -DEBUG 06-24 20:15:01 [manager.py:391] -ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:418.21837425231934ms total_cost_time:418.2605743408203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8410 prompt_cache_len:5151 prompt_cache_ratio:0.6124851367419738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 -DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:01 [manager.py:224] router recive req id 8 cost time 0.10874199867248535 s -INFO 06-24 20:15:01 [manager.py:68] detokenization recv req id 8 cost time 0.11006569862365723 s -DEBUG 06-24 20:15:01 [manager.py:391] Prefill Batch: batch_id=244235888560123911845585581849849240793, time:1750767301.630859s req_ids:[8] -DEBUG 06-24 20:15:01 [manager.py:391] -ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:207.02648162841797ms total_cost_time:207.06939697265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8411 prompt_cache_len:5151 prompt_cache_ratio:0.6124123172036618 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 -DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:01 [manager.py:224] router recive req id 8 cost time 0.10769319534301758 s -INFO 06-24 20:15:01 [manager.py:68] detokenization recv req id 8 cost time 0.1089482307434082 s -DEBUG 06-24 20:15:01 [manager.py:391] Prefill Batch: batch_id=54519829939308747405056562360300828622, time:1750767301.8409214s req_ids:[8] -DEBUG 06-24 20:15:01 [manager.py:391] -ERROR 06-24 20:15:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:208.1136703491211ms total_cost_time:208.15682411193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8412 prompt_cache_len:5151 prompt_cache_ratio:0.612339514978602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 -DEBUG 06-24 20:15:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.10814070701599121 s -INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.11016035079956055 s -DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=158095796602175375334813036413737706951, time:1750767302.0515876s req_ids:[8] -DEBUG 06-24 20:15:02 [manager.py:391] -ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:01 lightllm_req_id:8 first_token_cost:200.77753067016602ms total_cost_time:200.8211612701416ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8413 prompt_cache_len:5151 prompt_cache_ratio:0.6122667300606205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 -DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.10800385475158691 s -INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.10998320579528809 s -DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=221934834960834592626627247088426473337, time:1750767302.2627792s req_ids:[8] -DEBUG 06-24 20:15:02 [manager.py:391] -ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:208.3725929260254ms total_cost_time:208.4181308746338ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8414 prompt_cache_len:5151 prompt_cache_ratio:0.6121939624435465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 -DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.1079258918762207 s -INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.10961413383483887 s -DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=295339679434532293143780347051093598663, time:1750767302.476235s req_ids:[8] -DEBUG 06-24 20:15:02 [manager.py:391] -ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:175.39525032043457ms total_cost_time:175.43697357177734ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8415 prompt_cache_len:5151 prompt_cache_ratio:0.6121212121212121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 -DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.1074972152709961 s -INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.10934257507324219 s -DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=310665530674894052116640070306649479440, time:1750767302.6542237s req_ids:[8] -DEBUG 06-24 20:15:02 [manager.py:391] -ERROR 06-24 20:15:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:193.10379028320312ms total_cost_time:193.1476593017578ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8416 prompt_cache_len:5151 prompt_cache_ratio:0.6120484790874525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 -DEBUG 06-24 20:15:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:02 [manager.py:224] router recive req id 8 cost time 0.10875177383422852 s -INFO 06-24 20:15:02 [manager.py:68] detokenization recv req id 8 cost time 0.1107492446899414 s -DEBUG 06-24 20:15:02 [manager.py:391] Prefill Batch: batch_id=86426648157262359196472262747095022900, time:1750767302.8553464s req_ids:[8] -DEBUG 06-24 20:15:02 [manager.py:391] -ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:02 lightllm_req_id:8 first_token_cost:368.99447441101074ms total_cost_time:369.03834342956543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8417 prompt_cache_len:5151 prompt_cache_ratio:0.6119757633361055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 -DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10817837715148926 s -INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.11009645462036133 s -DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=116645101844515112408615210018666121642, time:1750767303.2273679s req_ids:[8] -DEBUG 06-24 20:15:03 [manager.py:391] -ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:203.43852043151855ms total_cost_time:203.48000526428223ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8418 prompt_cache_len:5151 prompt_cache_ratio:0.6119030648610121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 -DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10825324058532715 s -INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s -DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=290852715701895200239015688235404418295, time:1750767303.4411628s req_ids:[8] -DEBUG 06-24 20:15:03 [manager.py:391] -ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:203.83024215698242ms total_cost_time:203.8707733154297ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:8419 prompt_cache_len:5151 prompt_cache_ratio:0.6118303836560162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 -DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10701465606689453 s -INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.1088418960571289 s -DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=157217911038384463259229455355640947255, time:1750767303.6496248s req_ids:[8] -DEBUG 06-24 20:15:03 [manager.py:391] -ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:204.90765571594238ms total_cost_time:204.95033264160156ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8420 prompt_cache_len:5151 prompt_cache_ratio:0.6117577197149644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 -DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:03 [manager.py:224] router recive req id 8 cost time 0.10895085334777832 s -INFO 06-24 20:15:03 [manager.py:68] detokenization recv req id 8 cost time 0.11089587211608887 s -DEBUG 06-24 20:15:03 [manager.py:391] Prefill Batch: batch_id=319585823200911135751075547187108423502, time:1750767303.860282s req_ids:[8] -DEBUG 06-24 20:15:03 [manager.py:391] -ERROR 06-24 20:15:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:206.90417289733887ms total_cost_time:206.94947242736816ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8421 prompt_cache_len:5151 prompt_cache_ratio:0.6116850730317065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 -DEBUG 06-24 20:15:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.10723400115966797 s -INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.10927343368530273 s -DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=297867023668335867617050734588457361992, time:1750767304.0747416s req_ids:[8] -DEBUG 06-24 20:15:04 [manager.py:391] -ERROR 06-24 20:15:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:03 lightllm_req_id:8 first_token_cost:205.90710639953613ms total_cost_time:205.9495449066162ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8422 prompt_cache_len:5151 prompt_cache_ratio:0.611612443600095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 -DEBUG 06-24 20:15:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.10744118690490723 s -INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.10948848724365234 s -DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=224037054886901283131742567933337772403, time:1750767304.284884s req_ids:[8] -DEBUG 06-24 20:15:04 [manager.py:391] -ERROR 06-24 20:15:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 first_token_cost:206.47096633911133ms total_cost_time:206.5141201019287ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8423 prompt_cache_len:5151 prompt_cache_ratio:0.6115398314139855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 -DEBUG 06-24 20:15:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.31050705909729004 s -INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.31285834312438965 s -DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=250127663779496055721299875716886669029, time:1750767304.707225s req_ids:[8] -DEBUG 06-24 20:15:04 [manager.py:391] -ERROR 06-24 20:15:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 first_token_cost:423.7239360809326ms total_cost_time:423.7651824951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8424 prompt_cache_len:5151 prompt_cache_ratio:0.6114672364672364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 -DEBUG 06-24 20:15:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:04 [manager.py:224] router recive req id 8 cost time 0.10828733444213867 s -INFO 06-24 20:15:04 [manager.py:68] detokenization recv req id 8 cost time 0.1102590560913086 s -DEBUG 06-24 20:15:04 [manager.py:391] Prefill Batch: batch_id=328532782729164800199216219570759092521, time:1750767304.9299095s req_ids:[8] -DEBUG 06-24 20:15:04 [manager.py:391] -ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:04 lightllm_req_id:8 first_token_cost:211.38882637023926ms total_cost_time:211.43341064453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8425 prompt_cache_len:5151 prompt_cache_ratio:0.6113946587537092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 -DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10788679122924805 s -INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.11036539077758789 s -DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=317426667700253739143720540712034569793, time:1750767305.1479073s req_ids:[8] -DEBUG 06-24 20:15:05 [manager.py:391] -ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:211.44533157348633ms total_cost_time:211.4884853363037ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8426 prompt_cache_len:5151 prompt_cache_ratio:0.611322098267268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 -DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10812115669250488 s -INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.11005473136901855 s -DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=208698387415990703524925066827822658945, time:1750767305.360969s req_ids:[8] -DEBUG 06-24 20:15:05 [manager.py:391] -ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:206.81405067443848ms total_cost_time:206.85625076293945ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8427 prompt_cache_len:5151 prompt_cache_ratio:0.61124955500178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 -DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10798907279968262 s -INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.1099851131439209 s -DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=299413205335424934533331883544922609080, time:1750767305.5721238s req_ids:[8] -DEBUG 06-24 20:15:05 [manager.py:391] -ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:200.47807693481445ms total_cost_time:200.52433013916016ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8428 prompt_cache_len:5151 prompt_cache_ratio:0.6111770289511154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 -DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.11088323593139648 s -INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.1128392219543457 s -DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=213006831136104534703128364769423283324, time:1750767305.780454s req_ids:[8] -DEBUG 06-24 20:15:05 [manager.py:391] -ERROR 06-24 20:15:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:202.5439739227295ms total_cost_time:202.62718200683594ms,out_token_counter:1 mean_per_token_cost_time: 0.08320808410644531ms prompt_token_num:8429 prompt_cache_len:5151 prompt_cache_ratio:0.611104520109147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 -DEBUG 06-24 20:15:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:05 [manager.py:224] router recive req id 8 cost time 0.10747718811035156 s -INFO 06-24 20:15:05 [manager.py:68] detokenization recv req id 8 cost time 0.10953688621520996 s -DEBUG 06-24 20:15:05 [manager.py:391] Prefill Batch: batch_id=150703326799795933127368813083694868111, time:1750767305.9936242s req_ids:[8] -DEBUG 06-24 20:15:05 [manager.py:391] -ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:05 lightllm_req_id:8 first_token_cost:362.5922203063965ms total_cost_time:362.6365661621094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8430 prompt_cache_len:5151 prompt_cache_ratio:0.6110320284697509 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 -DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.10839605331420898 s -INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11036109924316406 s -DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=295351742298487985870258412058856895801, time:1750767306.3593252s req_ids:[8] -DEBUG 06-24 20:15:06 [manager.py:391] -ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:205.20973205566406ms total_cost_time:205.25431632995605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8431 prompt_cache_len:5151 prompt_cache_ratio:0.6109595540268058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 -DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.1082000732421875 s -INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11009383201599121 s -DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=187028261677400707019438634439829499872, time:1750767306.5734897s req_ids:[8] -DEBUG 06-24 20:15:06 [manager.py:391] -ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:203.03702354431152ms total_cost_time:203.08160781860352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8432 prompt_cache_len:5151 prompt_cache_ratio:0.6108870967741935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 -DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.10884976387023926 s -INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075949668884277 s -DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=16977108093388773543187093875157760379, time:1750767306.7825294s req_ids:[8] -DEBUG 06-24 20:15:06 [manager.py:391] -ERROR 06-24 20:15:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:205.9328556060791ms total_cost_time:205.97553253173828ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8433 prompt_cache_len:5151 prompt_cache_ratio:0.6108146567057986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 -DEBUG 06-24 20:15:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:06 [manager.py:224] router recive req id 8 cost time 0.10910415649414062 s -INFO 06-24 20:15:06 [manager.py:68] detokenization recv req id 8 cost time 0.11111116409301758 s -DEBUG 06-24 20:15:06 [manager.py:391] Prefill Batch: batch_id=35186303771664444843385352262013707111, time:1750767306.9942148s req_ids:[8] -DEBUG 06-24 20:15:06 [manager.py:391] -ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:06 lightllm_req_id:8 first_token_cost:209.19322967529297ms total_cost_time:209.23781394958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8434 prompt_cache_len:5151 prompt_cache_ratio:0.6107422338155086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 -DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s -INFO 06-24 20:15:07 [manager.py:68] detokenization recv req id 8 cost time 0.11007118225097656 s -DEBUG 06-24 20:15:07 [manager.py:391] Prefill Batch: batch_id=212458558143022334187553734603168329978, time:1750767307.208388s req_ids:[8] -DEBUG 06-24 20:15:07 [manager.py:391] -DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:206.6512107849121ms total_cost_time:206.6943645477295ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8435 prompt_cache_len:5151 prompt_cache_ratio:0.610669828097214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 -DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10773158073425293 s -INFO 06-24 20:15:07 [manager.py:68] detokenization recv req id 8 cost time 0.10949325561523438 s -DEBUG 06-24 20:15:07 [manager.py:391] Prefill Batch: batch_id=311047565757583755063146628048837157573, time:1750767307.4208276s req_ids:[8] -DEBUG 06-24 20:15:07 [manager.py:391] -ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:202.99220085144043ms total_cost_time:203.0353546142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8436 prompt_cache_len:5151 prompt_cache_ratio:0.6105974395448079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 -DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10765337944030762 s -INFO 06-24 20:15:07 [manager.py:68] detokenization recv req id 8 cost time 0.11033344268798828 s -DEBUG 06-24 20:15:07 [manager.py:391] Prefill Batch: batch_id=70525317052197544717407791725934589966, time:1750767307.6295514s req_ids:[8] -DEBUG 06-24 20:15:07 [manager.py:391] -ERROR 06-24 20:15:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:373.9476203918457ms total_cost_time:373.9917278289795ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8437 prompt_cache_len:5151 prompt_cache_ratio:0.6105250681521868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 -DEBUG 06-24 20:15:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:07 [manager.py:224] router recive req id 8 cost time 0.10785627365112305 s -INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.109893798828125 s -DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=295426365423609987860349739567250163495, time:1750767308.0050988s req_ids:[8] -DEBUG 06-24 20:15:08 [manager.py:391] -ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:07 lightllm_req_id:8 first_token_cost:196.03323936462402ms total_cost_time:196.07806205749512ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8438 prompt_cache_len:5151 prompt_cache_ratio:0.6104527139132496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 -DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10577726364135742 s -INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.10772323608398438 s -DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=130595018932769216662651260710651922661, time:1750767308.2106023s req_ids:[8] -DEBUG 06-24 20:15:08 [manager.py:391] -ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:205.98983764648438ms total_cost_time:206.03370666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8439 prompt_cache_len:5151 prompt_cache_ratio:0.6103803768218983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 -DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10849189758300781 s -INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.11099791526794434 s -DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=160734116978551564000830878190382787404, time:1750767308.423383s req_ids:[8] -DEBUG 06-24 20:15:08 [manager.py:391] -ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:207.83042907714844ms total_cost_time:207.87334442138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8440 prompt_cache_len:5151 prompt_cache_ratio:0.6103080568720379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 -DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:08 [batch.py:51] router release req id 8 -INFO 06-24 20:15:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10791397094726562 s -INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s -DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=316873265787230008035939602988657140647, time:1750767308.6370327s req_ids:[8] -DEBUG 06-24 20:15:08 [manager.py:391] -ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:210.27660369873047ms total_cost_time:210.31975746154785ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8441 prompt_cache_len:5151 prompt_cache_ratio:0.6102357540575761 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 -DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:08 [manager.py:224] router recive req id 8 cost time 0.10810351371765137 s -INFO 06-24 20:15:08 [manager.py:68] detokenization recv req id 8 cost time 0.1099402904510498 s -DEBUG 06-24 20:15:08 [manager.py:391] Prefill Batch: batch_id=257433987272126904175471229630861215682, time:1750767308.8514311s req_ids:[8] -DEBUG 06-24 20:15:08 [manager.py:391] -DEBUG 06-24 20:15:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 35849.960 tokens/s -DEBUG 06-24 20:15:08 [stats.py:37] Avg prompt tokens throughput: 35841.448 tokens/s -DEBUG 06-24 20:15:08 [stats.py:37] Avg generate tokens throughput: 8.512 tokens/s -ERROR 06-24 20:15:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:203.66501808166504ms total_cost_time:203.70817184448242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8442 prompt_cache_len:5151 prompt_cache_ratio:0.6101634683724236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 -DEBUG 06-24 20:15:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10793185234069824 s -INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.10981631278991699 s -DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=170949480989550182365094535809482750049, time:1750767309.0619617s req_ids:[8] -DEBUG 06-24 20:15:09 [manager.py:391] -ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:08 lightllm_req_id:8 first_token_cost:206.3138484954834ms total_cost_time:206.3581943511963ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8443 prompt_cache_len:5151 prompt_cache_ratio:0.6100911998104939 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 -DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.1084451675415039 s -INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.11036300659179688 s -DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=195792750900178838713643202427079486527, time:1750767309.2735946s req_ids:[8] -DEBUG 06-24 20:15:09 [manager.py:391] -ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:211.24911308288574ms total_cost_time:211.29560470581055ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8444 prompt_cache_len:5151 prompt_cache_ratio:0.6100189483657035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 -DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10817909240722656 s -INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.11081838607788086 s -DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=15353624148820505063541424233944416766, time:1750767309.4869719s req_ids:[8] -DEBUG 06-24 20:15:09 [manager.py:391] -ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:200.29950141906738ms total_cost_time:200.34408569335938ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8445 prompt_cache_len:5151 prompt_cache_ratio:0.6099467140319715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 -DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10745525360107422 s -INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.10932540893554688 s -DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=320950916089381884813448321355557982584, time:1750767309.6973982s req_ids:[8] -DEBUG 06-24 20:15:09 [manager.py:391] -ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:203.2158374786377ms total_cost_time:203.25851440429688ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8446 prompt_cache_len:5151 prompt_cache_ratio:0.6098744968032205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 -DEBUG 06-24 20:15:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:09 [manager.py:224] router recive req id 8 cost time 0.10837554931640625 s -INFO 06-24 20:15:09 [manager.py:68] detokenization recv req id 8 cost time 0.11045408248901367 s -DEBUG 06-24 20:15:09 [manager.py:391] Prefill Batch: batch_id=96690897178817779332938651548598764994, time:1750767309.9054024s req_ids:[8] -DEBUG 06-24 20:15:09 [manager.py:391] -ERROR 06-24 20:15:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:202.5461196899414ms total_cost_time:202.5899887084961ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8447 prompt_cache_len:5151 prompt_cache_ratio:0.6098022966733752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 -DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.11004924774169922 s -INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11198139190673828 s -DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=302327857834520467008844304676849488523, time:1750767310.1111403s req_ids:[8] -DEBUG 06-24 20:15:10 [manager.py:391] -ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:09 lightllm_req_id:8 first_token_cost:203.76110076904297ms total_cost_time:203.8249969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:8448 prompt_cache_len:5151 prompt_cache_ratio:0.6097301136363636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 -DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.10928583145141602 s -INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11123204231262207 s -DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=143226974034097371617636099111396372073, time:1750767310.3229887s req_ids:[8] -DEBUG 06-24 20:15:10 [manager.py:391] -ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:206.27689361572266ms total_cost_time:206.32219314575195ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8449 prompt_cache_len:5151 prompt_cache_ratio:0.6096579476861167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 -DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s -INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11018133163452148 s -DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=271057018768547075267753270214297248275, time:1750767310.537294s req_ids:[8] -DEBUG 06-24 20:15:10 [manager.py:391] -ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:209.75494384765625ms total_cost_time:209.79952812194824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8450 prompt_cache_len:5151 prompt_cache_ratio:0.609585798816568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 -DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.10807132720947266 s -INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.11001229286193848 s -DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=152788734019961385649920779463865574522, time:1750767310.749922s req_ids:[8] -DEBUG 06-24 20:15:10 [manager.py:391] -ERROR 06-24 20:15:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:202.94928550720215ms total_cost_time:202.99458503723145ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8451 prompt_cache_len:5151 prompt_cache_ratio:0.6095136670216542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 -DEBUG 06-24 20:15:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:10 [manager.py:224] router recive req id 8 cost time 0.1072092056274414 s -INFO 06-24 20:15:10 [manager.py:68] detokenization recv req id 8 cost time 0.10906267166137695 s -DEBUG 06-24 20:15:10 [manager.py:391] Prefill Batch: batch_id=54521800750868192945465877837284109421, time:1750767310.9676654s req_ids:[8] -DEBUG 06-24 20:15:10 [manager.py:391] -ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:10 lightllm_req_id:8 first_token_cost:374.6373653411865ms total_cost_time:374.6829032897949ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8452 prompt_cache_len:5151 prompt_cache_ratio:0.6094415522953147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 -DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10817241668701172 s -INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.11021685600280762 s -DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=155400084622596006515751292754945294661, time:1750767311.3379538s req_ids:[8] -DEBUG 06-24 20:15:11 [manager.py:391] -ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:203.78875732421875ms total_cost_time:203.83262634277344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8453 prompt_cache_len:5151 prompt_cache_ratio:0.6093694546314917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 -DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10749006271362305 s -INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.10959362983703613 s -DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=246866300748825310386851339264167772556, time:1750767311.5514486s req_ids:[8] -DEBUG 06-24 20:15:11 [manager.py:391] -ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:207.40628242492676ms total_cost_time:207.45015144348145ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8454 prompt_cache_len:5151 prompt_cache_ratio:0.6092973740241305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 -DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10779953002929688 s -INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.10988879203796387 s -DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=90405470861237417620041826766799821619, time:1750767311.7645261s req_ids:[8] -DEBUG 06-24 20:15:11 [manager.py:391] -ERROR 06-24 20:15:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:206.4671516418457ms total_cost_time:206.50982856750488ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8455 prompt_cache_len:5151 prompt_cache_ratio:0.6092253104671792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 -DEBUG 06-24 20:15:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:11 [manager.py:224] router recive req id 8 cost time 0.10887336730957031 s -INFO 06-24 20:15:11 [manager.py:68] detokenization recv req id 8 cost time 0.1108405590057373 s -DEBUG 06-24 20:15:11 [manager.py:391] Prefill Batch: batch_id=92521648342276699864805984054588147693, time:1750767311.978629s req_ids:[8] -DEBUG 06-24 20:15:11 [manager.py:391] -ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:11 lightllm_req_id:8 first_token_cost:209.03944969177246ms total_cost_time:209.08427238464355ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8456 prompt_cache_len:5151 prompt_cache_ratio:0.6091532639545885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 -DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.10832571983337402 s -INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.11016154289245605 s -DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=306821650775601618321656819602050241520, time:1750767312.1932003s req_ids:[8] -DEBUG 06-24 20:15:12 [manager.py:391] -ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:206.77709579467773ms total_cost_time:206.82311058044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8457 prompt_cache_len:5151 prompt_cache_ratio:0.6090812344803122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 -DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.10801172256469727 s -INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.10984253883361816 s -DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=58619328649905711436624774804867760944, time:1750767312.4106028s req_ids:[8] -DEBUG 06-24 20:15:12 [manager.py:391] -ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:213.0730152130127ms total_cost_time:213.11664581298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8458 prompt_cache_len:5151 prompt_cache_ratio:0.6090092220383069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 -DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.1085515022277832 s -INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.11048746109008789 s -DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=165015383856099417288269464491488748107, time:1750767312.6243236s req_ids:[8] -DEBUG 06-24 20:15:12 [manager.py:391] -ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:207.442045211792ms total_cost_time:207.48639106750488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8459 prompt_cache_len:5151 prompt_cache_ratio:0.6089372266225322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 -DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:12 [manager.py:224] router recive req id 8 cost time 0.10841798782348633 s -INFO 06-24 20:15:12 [manager.py:68] detokenization recv req id 8 cost time 0.11050105094909668 s -DEBUG 06-24 20:15:12 [manager.py:391] Prefill Batch: batch_id=159767453362005887747708929850226556457, time:1750767312.8363004s req_ids:[8] -DEBUG 06-24 20:15:12 [manager.py:391] -DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:206.62951469421387ms total_cost_time:206.67433738708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8460 prompt_cache_len:5151 prompt_cache_ratio:0.6088652482269503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 -DEBUG 06-24 20:15:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10735774040222168 s -INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.10933923721313477 s -DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=115532773430623435681575884018880805220, time:1750767313.04956s req_ids:[8] -DEBUG 06-24 20:15:13 [manager.py:391] -ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:12 lightllm_req_id:8 first_token_cost:207.60750770568848ms total_cost_time:207.65161514282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8461 prompt_cache_len:5151 prompt_cache_ratio:0.6087932868455266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 -DEBUG 06-24 20:15:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10860180854797363 s -INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.11053609848022461 s -DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=298894202637373361076223219613686520612, time:1750767313.2677681s req_ids:[8] -DEBUG 06-24 20:15:13 [manager.py:391] -ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:214.24007415771484ms total_cost_time:214.28322792053223ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8462 prompt_cache_len:5151 prompt_cache_ratio:0.6087213424722288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 -DEBUG 06-24 20:15:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10805249214172363 s -INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s -DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=58954914252949819266978352605403739981, time:1750767313.4887788s req_ids:[8] -DEBUG 06-24 20:15:13 [manager.py:391] -ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:212.87846565246582ms total_cost_time:212.9230499267578ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8463 prompt_cache_len:5151 prompt_cache_ratio:0.608649415101028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 -DEBUG 06-24 20:15:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:13 [manager.py:224] router recive req id 8 cost time 0.10867047309875488 s -INFO 06-24 20:15:13 [manager.py:68] detokenization recv req id 8 cost time 0.11060690879821777 s -DEBUG 06-24 20:15:13 [manager.py:391] Prefill Batch: batch_id=60888327154630375669830410076507793305, time:1750767313.7013774s req_ids:[8] -DEBUG 06-24 20:15:13 [manager.py:391] -ERROR 06-24 20:15:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:387.2199058532715ms total_cost_time:387.2649669647217ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8464 prompt_cache_len:5151 prompt_cache_ratio:0.6085775047258979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 -DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10889792442321777 s -INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.11089253425598145 s -DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=283953288730117449369057787490348485431, time:1750767314.090492s req_ids:[8] -DEBUG 06-24 20:15:14 [manager.py:391] -ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:13 lightllm_req_id:8 first_token_cost:208.47678184509277ms total_cost_time:208.5251808166504ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:8465 prompt_cache_len:5151 prompt_cache_ratio:0.6085056113408152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 -DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10806775093078613 s -INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.10997605323791504 s -DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=22071053498242958339452132597611902174, time:1750767314.3107696s req_ids:[8] -DEBUG 06-24 20:15:14 [manager.py:391] -ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:200.0107765197754ms total_cost_time:200.0570297241211ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8466 prompt_cache_len:5151 prompt_cache_ratio:0.608433734939759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 -DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10996365547180176 s -INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.11196446418762207 s -DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=173114695993577092812998530470225851035, time:1750767314.5127943s req_ids:[8] -DEBUG 06-24 20:15:14 [manager.py:391] -ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:206.21180534362793ms total_cost_time:206.25567436218262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8467 prompt_cache_len:5151 prompt_cache_ratio:0.608361875516712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 -DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10863852500915527 s -INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.11069989204406738 s -DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=280071428544312896133544943344611301108, time:1750767314.725252s req_ids:[8] -DEBUG 06-24 20:15:14 [manager.py:391] -ERROR 06-24 20:15:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:204.35142517089844ms total_cost_time:204.39529418945312ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8468 prompt_cache_len:5151 prompt_cache_ratio:0.608290033065659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 -DEBUG 06-24 20:15:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:14 [manager.py:224] router recive req id 8 cost time 0.10763955116271973 s -INFO 06-24 20:15:14 [manager.py:68] detokenization recv req id 8 cost time 0.10954809188842773 s -DEBUG 06-24 20:15:14 [manager.py:391] Prefill Batch: batch_id=86440286479720367570019129694858265234, time:1750767314.9408438s req_ids:[8] -DEBUG 06-24 20:15:14 [manager.py:391] -ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:14 lightllm_req_id:8 first_token_cost:207.33022689819336ms total_cost_time:207.37171173095703ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8469 prompt_cache_len:5151 prompt_cache_ratio:0.608218207580588 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 -DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10832095146179199 s -INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.1102285385131836 s -DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=52800548585344657027239445122208056954, time:1750767315.1515646s req_ids:[8] -DEBUG 06-24 20:15:15 [manager.py:391] -ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:169.42548751831055ms total_cost_time:169.46840286254883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8470 prompt_cache_len:5151 prompt_cache_ratio:0.60814639905549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 -DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10683631896972656 s -INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.10877180099487305 s -DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=261284322877345006314508110249412414775, time:1750767315.324305s req_ids:[8] -DEBUG 06-24 20:15:15 [manager.py:391] -ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:196.08306884765625ms total_cost_time:196.12622261047363ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8471 prompt_cache_len:5151 prompt_cache_ratio:0.6080746074843584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 -DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10768699645996094 s -INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.10961031913757324 s -DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=12269258894682624816136188255232647632, time:1750767315.5255272s req_ids:[8] -DEBUG 06-24 20:15:15 [manager.py:391] -ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:202.6808261871338ms total_cost_time:202.7263641357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8472 prompt_cache_len:5151 prompt_cache_ratio:0.6080028328611898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 -DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:15 [batch.py:51] router release req id 8 -DEBUG 06-24 20:15:15 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:15 [manager.py:283] -DEBUG 06-24 20:15:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:15 [manager.py:284] -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10805964469909668 s -INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.11002826690673828 s -DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=165807093125889596731618697130676111141, time:1750767315.7361126s req_ids:[8] -DEBUG 06-24 20:15:15 [manager.py:391] -ERROR 06-24 20:15:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:207.31806755065918ms total_cost_time:207.36241340637207ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8473 prompt_cache_len:5151 prompt_cache_ratio:0.6079310751799835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 -DEBUG 06-24 20:15:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:15 [manager.py:224] router recive req id 8 cost time 0.10732078552246094 s -INFO 06-24 20:15:15 [manager.py:68] detokenization recv req id 8 cost time 0.10940051078796387 s -DEBUG 06-24 20:15:15 [manager.py:391] Prefill Batch: batch_id=2803494314530524709596668655958977477, time:1750767315.9491577s req_ids:[8] -DEBUG 06-24 20:15:15 [manager.py:391] -ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:15 lightllm_req_id:8 first_token_cost:203.8130760192871ms total_cost_time:203.8586139678955ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8474 prompt_cache_len:5151 prompt_cache_ratio:0.6078593344347416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 -DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.10854220390319824 s -INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.11040973663330078 s -DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=91036598762403695309762399747701284438, time:1750767316.1576025s req_ids:[8] -DEBUG 06-24 20:15:16 [manager.py:391] -ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:220.9019660949707ms total_cost_time:220.94488143920898ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8475 prompt_cache_len:5151 prompt_cache_ratio:0.6077876106194691 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 -DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.3099963665008545 s -INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.31188297271728516 s -DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=210730471232504555605085140701265999772, time:1750767316.5884387s req_ids:[8] -DEBUG 06-24 20:15:16 [manager.py:391] -ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:386.704683303833ms total_cost_time:386.7485523223877ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8476 prompt_cache_len:5151 prompt_cache_ratio:0.6077159037281736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 -DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.10864734649658203 s -INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.11059951782226562 s -DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=321442465610725303638229306882563553601, time:1750767316.773562s req_ids:[8] -DEBUG 06-24 20:15:16 [manager.py:391] -ERROR 06-24 20:15:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:194.67926025390625ms total_cost_time:194.72503662109375ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8477 prompt_cache_len:5151 prompt_cache_ratio:0.6076442137548661 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 -DEBUG 06-24 20:15:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:16 [batch.py:51] router release req id 8 -INFO 06-24 20:15:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:16 [manager.py:224] router recive req id 8 cost time 0.10824704170227051 s -INFO 06-24 20:15:16 [manager.py:68] detokenization recv req id 8 cost time 0.11017656326293945 s -DEBUG 06-24 20:15:16 [manager.py:391] Prefill Batch: batch_id=236176162028475234781851806680295783278, time:1750767316.9777544s req_ids:[8] -DEBUG 06-24 20:15:16 [manager.py:391] -ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:16 lightllm_req_id:8 first_token_cost:204.88929748535156ms total_cost_time:204.93340492248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8478 prompt_cache_len:5151 prompt_cache_ratio:0.6075725406935598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 -DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.10919857025146484 s -INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11129307746887207 s -DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=1125608822110276401943498766983581029, time:1750767317.1898224s req_ids:[8] -DEBUG 06-24 20:15:17 [manager.py:391] -ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:208.64415168762207ms total_cost_time:208.68802070617676ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8479 prompt_cache_len:5151 prompt_cache_ratio:0.607500884538271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 -DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.10887384414672852 s -INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11079645156860352 s -DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=303643803246148690888786967502782939238, time:1750767317.4039333s req_ids:[8] -DEBUG 06-24 20:15:17 [manager.py:391] -ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:205.76047897338867ms total_cost_time:205.86299896240234ms,out_token_counter:1 mean_per_token_cost_time: 0.10251998901367188ms prompt_token_num:8480 prompt_cache_len:5151 prompt_cache_ratio:0.6074292452830189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 -DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.10893440246582031 s -INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s -DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=157943569294492984502555634239809805970, time:1750767317.6145113s req_ids:[8] -DEBUG 06-24 20:15:17 [manager.py:391] -ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:203.6266326904297ms total_cost_time:203.67074012756348ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8481 prompt_cache_len:5151 prompt_cache_ratio:0.6073576229218253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 -DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:17 [manager.py:224] router recive req id 8 cost time 0.1080772876739502 s -INFO 06-24 20:15:17 [manager.py:68] detokenization recv req id 8 cost time 0.11020040512084961 s -DEBUG 06-24 20:15:17 [manager.py:391] Prefill Batch: batch_id=173741878135272598941561213663270583986, time:1750767317.8238358s req_ids:[8] -DEBUG 06-24 20:15:17 [manager.py:391] -ERROR 06-24 20:15:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:205.07192611694336ms total_cost_time:205.13176918029785ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8482 prompt_cache_len:5151 prompt_cache_ratio:0.6072860174487149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 -DEBUG 06-24 20:15:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.10827064514160156 s -INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.11030912399291992 s -DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=195946146804776235230595986216628919743, time:1750767318.037506s req_ids:[8] -DEBUG 06-24 20:15:18 [manager.py:391] -ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:17 lightllm_req_id:8 first_token_cost:204.3924331665039ms total_cost_time:204.4353485107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8483 prompt_cache_len:5151 prompt_cache_ratio:0.6072144288577155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 -DEBUG 06-24 20:15:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.10776805877685547 s -INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.10968446731567383 s -DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=154750650718031942259477710082548771845, time:1750767318.2587488s req_ids:[8] -DEBUG 06-24 20:15:18 [manager.py:391] -ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:220.81708908081055ms total_cost_time:220.86000442504883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8484 prompt_cache_len:5151 prompt_cache_ratio:0.6071428571428571 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 -DEBUG 06-24 20:15:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.10806560516357422 s -INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.10996723175048828 s -DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=77800032504309576078524225678141175589, time:1750767318.47809s req_ids:[8] -DEBUG 06-24 20:15:18 [manager.py:391] -DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:211.91668510437012ms total_cost_time:211.9598388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8485 prompt_cache_len:5151 prompt_cache_ratio:0.6070713022981733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 -DEBUG 06-24 20:15:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:18 [manager.py:224] router recive req id 8 cost time 0.3107593059539795 s -INFO 06-24 20:15:18 [manager.py:68] detokenization recv req id 8 cost time 0.3128194808959961 s -DEBUG 06-24 20:15:18 [manager.py:391] Prefill Batch: batch_id=151485347633670953105748647304515732091, time:1750767318.8961577s req_ids:[8] -DEBUG 06-24 20:15:18 [manager.py:391] -DEBUG 06-24 20:15:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 37086.855 tokens/s -DEBUG 06-24 20:15:18 [stats.py:37] Avg prompt tokens throughput: 37078.094 tokens/s -DEBUG 06-24 20:15:18 [stats.py:37] Avg generate tokens throughput: 8.761 tokens/s -ERROR 06-24 20:15:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:412.8682613372803ms total_cost_time:412.91356086730957ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8486 prompt_cache_len:5151 prompt_cache_ratio:0.6069997643176998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 -DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.11022090911865234 s -INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.11217188835144043 s -DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=239152782978784526261356649094679474939, time:1750767319.1107416s req_ids:[8] -DEBUG 06-24 20:15:19 [manager.py:391] -ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:18 lightllm_req_id:8 first_token_cost:216.81690216064453ms total_cost_time:216.8593406677246ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8487 prompt_cache_len:5151 prompt_cache_ratio:0.6069282431954754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 -DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10955166816711426 s -INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.1115579605102539 s -DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=208035959298632149078947995208968925946, time:1750767319.337627s req_ids:[8] -DEBUG 06-24 20:15:19 [manager.py:391] -ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:212.4018669128418ms total_cost_time:212.4459743499756ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8488 prompt_cache_len:5151 prompt_cache_ratio:0.6068567389255419 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 -DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10800457000732422 s -INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.10993337631225586 s -DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=128612469570573097013692548871673272703, time:1750767319.5492456s req_ids:[8] -DEBUG 06-24 20:15:19 [manager.py:391] -ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:204.49042320251465ms total_cost_time:204.53405380249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8489 prompt_cache_len:5151 prompt_cache_ratio:0.6067852515019437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 -DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10846066474914551 s -INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.11035776138305664 s -DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=294771058514122612498391887698161269515, time:1750767319.758234s req_ids:[8] -DEBUG 06-24 20:15:19 [manager.py:391] -ERROR 06-24 20:15:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:207.2005271911621ms total_cost_time:207.2453498840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8490 prompt_cache_len:5151 prompt_cache_ratio:0.606713780918728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 -DEBUG 06-24 20:15:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:19 [manager.py:224] router recive req id 8 cost time 0.10765337944030762 s -INFO 06-24 20:15:19 [manager.py:68] detokenization recv req id 8 cost time 0.1096642017364502 s -DEBUG 06-24 20:15:19 [manager.py:391] Prefill Batch: batch_id=5619122663642315377369401010211490363, time:1750767319.971431s req_ids:[8] -DEBUG 06-24 20:15:19 [manager.py:391] -DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:19 lightllm_req_id:8 first_token_cost:201.06220245361328ms total_cost_time:201.10535621643066ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8491 prompt_cache_len:5151 prompt_cache_ratio:0.6066423271699446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 -DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10716485977172852 s -INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10951375961303711 s -DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=139793673587263089691533404429428714574, time:1750767320.1853154s req_ids:[8] -DEBUG 06-24 20:15:20 [manager.py:391] -ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:209.6257209777832ms total_cost_time:209.6688747406006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8492 prompt_cache_len:5151 prompt_cache_ratio:0.6065708902496467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 -DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10730266571044922 s -INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10915446281433105 s -DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=70718289190808696025515795977135806550, time:1750767320.3997424s req_ids:[8] -DEBUG 06-24 20:15:20 [manager.py:391] -ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:209.64860916137695ms total_cost_time:209.69176292419434ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8493 prompt_cache_len:5151 prompt_cache_ratio:0.6064994701518898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 -DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10724711418151855 s -INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10913610458374023 s -DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=89085277759505408142025224176861574031, time:1750767320.6124532s req_ids:[8] -DEBUG 06-24 20:15:20 [manager.py:391] -ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:205.41763305664062ms total_cost_time:205.4603099822998ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8494 prompt_cache_len:5151 prompt_cache_ratio:0.6064280668707323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 -DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:20 [manager.py:224] router recive req id 8 cost time 0.10781502723693848 s -INFO 06-24 20:15:20 [manager.py:68] detokenization recv req id 8 cost time 0.10973834991455078 s -DEBUG 06-24 20:15:20 [manager.py:391] Prefill Batch: batch_id=92916704179941237488806598321225664813, time:1750767320.829385s req_ids:[8] -DEBUG 06-24 20:15:20 [manager.py:391] -ERROR 06-24 20:15:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:212.51416206359863ms total_cost_time:212.5563621520996ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8495 prompt_cache_len:5151 prompt_cache_ratio:0.6063566804002354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 -DEBUG 06-24 20:15:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.10965752601623535 s -INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.11220622062683105 s -DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=140906510723481573319937088547607829840, time:1750767321.0414336s req_ids:[8] -DEBUG 06-24 20:15:21 [manager.py:391] -ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:20 lightllm_req_id:8 first_token_cost:207.91363716125488ms total_cost_time:207.95679092407227ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8496 prompt_cache_len:5151 prompt_cache_ratio:0.6062853107344632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 -DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.1080319881439209 s -INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.10970449447631836 s -DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=323068116461531204609375460164869341927, time:1750767321.2607837s req_ids:[8] -DEBUG 06-24 20:15:21 [manager.py:391] -ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:177.63423919677734ms total_cost_time:177.67596244812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8497 prompt_cache_len:5151 prompt_cache_ratio:0.6062139578674827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 -DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.10791468620300293 s -INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.10965108871459961 s -DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=119454969091182412363521423605531501278, time:1750767321.4376922s req_ids:[8] -DEBUG 06-24 20:15:21 [manager.py:391] -ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:162.48345375061035ms total_cost_time:162.52732276916504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8498 prompt_cache_len:5151 prompt_cache_ratio:0.6061426217933631 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 -DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.10746407508850098 s -INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.10956668853759766 s -DEBUG 06-24 20:15:21 [manager.py:391] Prefill Batch: batch_id=118945864247282320913323320385437838865, time:1750767321.6031396s req_ids:[8] -DEBUG 06-24 20:15:21 [manager.py:391] -ERROR 06-24 20:15:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:191.15352630615234ms total_cost_time:191.19763374328613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8499 prompt_cache_len:5151 prompt_cache_ratio:0.6060713025061772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 -DEBUG 06-24 20:15:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:21 [manager.py:224] router recive req id 8 cost time 0.31008005142211914 s -INFO 06-24 20:15:21 [manager.py:68] detokenization recv req id 8 cost time 0.3126652240753174 s -DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=236196416598195270259149948309937087996, time:1750767322.0249398s req_ids:[8] -DEBUG 06-24 20:15:22 [manager.py:391] -ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:21 lightllm_req_id:8 first_token_cost:432.2967529296875ms total_cost_time:432.342529296875ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8500 prompt_cache_len:5151 prompt_cache_ratio:0.606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 -DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10975432395935059 s -INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.11237764358520508 s -DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=29553248500851598115511775970752351938, time:1750767322.2451496s req_ids:[8] -DEBUG 06-24 20:15:22 [manager.py:391] -ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:206.1011791229248ms total_cost_time:206.1452865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8501 prompt_cache_len:5151 prompt_cache_ratio:0.6059287142689095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 -DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10796380043029785 s -INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.10999441146850586 s -DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=331758296688782613664854051868210306741, time:1750767322.454536s req_ids:[8] -DEBUG 06-24 20:15:22 [manager.py:391] -ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:206.16436004638672ms total_cost_time:206.2070369720459ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8502 prompt_cache_len:5151 prompt_cache_ratio:0.6058574453069866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 -DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10791468620300293 s -INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.10997939109802246 s -DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=53853493672397416991690197629247613602, time:1750767322.6689768s req_ids:[8] -DEBUG 06-24 20:15:22 [manager.py:391] -ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:212.0647430419922ms total_cost_time:212.10932731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8503 prompt_cache_len:5151 prompt_cache_ratio:0.6057861931083147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 -DEBUG 06-24 20:15:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:22 [manager.py:224] router recive req id 8 cost time 0.10798788070678711 s -INFO 06-24 20:15:22 [manager.py:68] detokenization recv req id 8 cost time 0.10991144180297852 s -DEBUG 06-24 20:15:22 [manager.py:391] Prefill Batch: batch_id=312577998458600255496097158259796688924, time:1750767322.8920121s req_ids:[8] -DEBUG 06-24 20:15:22 [manager.py:391] -ERROR 06-24 20:15:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:212.1286392211914ms total_cost_time:212.1717929840088ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8504 prompt_cache_len:5151 prompt_cache_ratio:0.6057149576669802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 -DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10831618309020996 s -INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.11040163040161133 s -DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=217993710780494814174624110127196104236, time:1750767323.1065829s req_ids:[8] -DEBUG 06-24 20:15:23 [manager.py:391] -ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:22 lightllm_req_id:8 first_token_cost:207.81421661376953ms total_cost_time:207.85903930664062ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8505 prompt_cache_len:5151 prompt_cache_ratio:0.6056437389770724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:15:23 [statics_utils.py:24] mean first cost: 228.77406631596386 ms -INFO 06-24 20:15:23 [statics_utils.py:24] mean per token cost: 0.0794685119446591 ms -INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 -DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10848784446716309 s -INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s -DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=129247782875002465410019254433696615145, time:1750767323.3214767s req_ids:[8] -DEBUG 06-24 20:15:23 [manager.py:391] -ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:207.02171325683594ms total_cost_time:207.06629753112793ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8506 prompt_cache_len:5151 prompt_cache_ratio:0.6055725370326828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 -DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10709810256958008 s -INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.10911154747009277 s -DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=132656349958959940570247664895628701529, time:1750767323.5356083s req_ids:[8] -DEBUG 06-24 20:15:23 [manager.py:391] -ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:206.62689208984375ms total_cost_time:206.66956901550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8507 prompt_cache_len:5151 prompt_cache_ratio:0.6055013518279064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 -DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s -INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.10977053642272949 s -DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=209991409122001065423055592853578051718, time:1750767323.7486072s req_ids:[8] -DEBUG 06-24 20:15:23 [manager.py:391] -ERROR 06-24 20:15:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:205.74355125427246ms total_cost_time:205.78646659851074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8508 prompt_cache_len:5151 prompt_cache_ratio:0.6054301833568406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 -DEBUG 06-24 20:15:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:23 [manager.py:224] router recive req id 8 cost time 0.10754060745239258 s -INFO 06-24 20:15:23 [manager.py:68] detokenization recv req id 8 cost time 0.10933113098144531 s -DEBUG 06-24 20:15:23 [manager.py:391] Prefill Batch: batch_id=545429987252481740797419247378719823, time:1750767323.9614074s req_ids:[8] -DEBUG 06-24 20:15:23 [manager.py:391] -ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:23 lightllm_req_id:8 first_token_cost:204.6341896057129ms total_cost_time:204.67758178710938ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8509 prompt_cache_len:5151 prompt_cache_ratio:0.6053590316135856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 -DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10900712013244629 s -INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s -DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=16392692472748186276922570269306858353, time:1750767324.1703444s req_ids:[8] -DEBUG 06-24 20:15:24 [manager.py:391] -ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:206.49361610412598ms total_cost_time:206.53581619262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8510 prompt_cache_len:5151 prompt_cache_ratio:0.6052878965922445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 -DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10696744918823242 s -INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.10900473594665527 s -DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=264358132955926801498855851406260134625, time:1750767324.383713s req_ids:[8] -DEBUG 06-24 20:15:24 [manager.py:391] -ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:379.11510467529297ms total_cost_time:379.17256355285645ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:8511 prompt_cache_len:5151 prompt_cache_ratio:0.6052167782869228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 -DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10718250274658203 s -INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.10905051231384277 s -DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=307384835284001986686130560529391821686, time:1750767324.7678869s req_ids:[8] -DEBUG 06-24 20:15:24 [manager.py:391] -ERROR 06-24 20:15:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:201.60412788391113ms total_cost_time:201.64871215820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8512 prompt_cache_len:5151 prompt_cache_ratio:0.6051456766917294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 -DEBUG 06-24 20:15:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:24 [manager.py:224] router recive req id 8 cost time 0.10816192626953125 s -INFO 06-24 20:15:24 [manager.py:68] detokenization recv req id 8 cost time 0.11013555526733398 s -DEBUG 06-24 20:15:24 [manager.py:391] Prefill Batch: batch_id=157596596356067655189486930346008664597, time:1750767324.976057s req_ids:[8] -DEBUG 06-24 20:15:24 [manager.py:391] -ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:24 lightllm_req_id:8 first_token_cost:208.2669734954834ms total_cost_time:208.30869674682617ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8513 prompt_cache_len:5151 prompt_cache_ratio:0.6050745918007753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 -DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10870790481567383 s -INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.11059260368347168 s -DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=136807545754151353094143520203897769620, time:1750767325.1892252s req_ids:[8] -DEBUG 06-24 20:15:25 [manager.py:391] -ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:208.48727226257324ms total_cost_time:208.52947235107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8514 prompt_cache_len:5151 prompt_cache_ratio:0.6050035236081748 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 -DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10793948173522949 s -INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.10996174812316895 s -DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=190668272506028533420422347869940039755, time:1750767325.403062s req_ids:[8] -DEBUG 06-24 20:15:25 [manager.py:391] -ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:208.68444442749023ms total_cost_time:208.72902870178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8515 prompt_cache_len:5151 prompt_cache_ratio:0.6049324721080446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 -DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10694670677185059 s -INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.10886240005493164 s -DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=295735398739757235695089759779826457435, time:1750767325.6214058s req_ids:[8] -DEBUG 06-24 20:15:25 [manager.py:391] -ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:209.94949340820312ms total_cost_time:209.9921703338623ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8516 prompt_cache_len:5151 prompt_cache_ratio:0.6048614372945045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 -DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:25 [manager.py:224] router recive req id 8 cost time 0.10792183876037598 s -INFO 06-24 20:15:25 [manager.py:68] detokenization recv req id 8 cost time 0.10997772216796875 s -DEBUG 06-24 20:15:25 [manager.py:391] Prefill Batch: batch_id=159856648931109268603093480541227155024, time:1750767325.8343954s req_ids:[8] -DEBUG 06-24 20:15:25 [manager.py:391] -ERROR 06-24 20:15:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:207.08608627319336ms total_cost_time:207.12947845458984ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8517 prompt_cache_len:5151 prompt_cache_ratio:0.6047904191616766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 -DEBUG 06-24 20:15:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s -INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.11120939254760742 s -DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=153381652037389632006705761380732530862, time:1750767326.0464973s req_ids:[8] -DEBUG 06-24 20:15:26 [manager.py:391] -ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:25 lightllm_req_id:8 first_token_cost:211.93742752075195ms total_cost_time:211.98177337646484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8518 prompt_cache_len:5151 prompt_cache_ratio:0.6047194177036863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 -DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s -INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.11084365844726562 s -DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=52974734607087009590033185457552262859, time:1750767326.2623494s req_ids:[8] -DEBUG 06-24 20:15:26 [manager.py:391] -ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:210.05773544311523ms total_cost_time:210.10184288024902ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8519 prompt_cache_len:5151 prompt_cache_ratio:0.6046484329146613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 -DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.10731816291809082 s -INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.10934042930603027 s -DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=186729565521199244258777162153374024553, time:1750767326.4896312s req_ids:[8] -DEBUG 06-24 20:15:26 [manager.py:391] -ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:220.2146053314209ms total_cost_time:220.27254104614258ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:8520 prompt_cache_len:5151 prompt_cache_ratio:0.6045774647887324 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 -DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:26 [manager.py:224] router recive req id 8 cost time 0.1069338321685791 s -INFO 06-24 20:15:26 [manager.py:68] detokenization recv req id 8 cost time 0.10888218879699707 s -DEBUG 06-24 20:15:26 [manager.py:391] Prefill Batch: batch_id=230520383117236471779716580503064090288, time:1750767326.7091699s req_ids:[8] -DEBUG 06-24 20:15:26 [manager.py:391] -ERROR 06-24 20:15:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:370.4872131347656ms total_cost_time:370.530366897583ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8521 prompt_cache_len:5151 prompt_cache_ratio:0.6045065133200329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 -DEBUG 06-24 20:15:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10833072662353516 s -INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.11028170585632324 s -DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=188583129675801176551374106725754953476, time:1750767327.0792832s req_ids:[8] -DEBUG 06-24 20:15:27 [manager.py:391] -DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:26 lightllm_req_id:8 first_token_cost:204.50687408447266ms total_cost_time:204.54764366149902ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:8522 prompt_cache_len:5151 prompt_cache_ratio:0.6044355785026989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 -DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10864448547363281 s -INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.11068391799926758 s -DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=21417598207230193205827627196295039304, time:1750767327.2929027s req_ids:[8] -DEBUG 06-24 20:15:27 [manager.py:391] -ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:208.86778831481934ms total_cost_time:208.91141891479492ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8523 prompt_cache_len:5151 prompt_cache_ratio:0.6043646603308694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 -DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10826826095581055 s -INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.11036849021911621 s -DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=204157378981048470601218725884134895874, time:1750767327.5084636s req_ids:[8] -DEBUG 06-24 20:15:27 [manager.py:391] -ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:209.5775604248047ms total_cost_time:209.62262153625488ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8524 prompt_cache_len:5151 prompt_cache_ratio:0.6042937587986861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 -DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10720229148864746 s -INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.10923910140991211 s -DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=196925882664769043045292876473712605708, time:1750767327.7219563s req_ids:[8] -DEBUG 06-24 20:15:27 [manager.py:391] -ERROR 06-24 20:15:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:212.20731735229492ms total_cost_time:212.2495174407959ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8525 prompt_cache_len:5151 prompt_cache_ratio:0.6042228739002933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 -DEBUG 06-24 20:15:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:27 [manager.py:224] router recive req id 8 cost time 0.10759973526000977 s -INFO 06-24 20:15:27 [manager.py:68] detokenization recv req id 8 cost time 0.10956144332885742 s -DEBUG 06-24 20:15:27 [manager.py:391] Prefill Batch: batch_id=333347743545925540421858354844586208734, time:1750767327.9414845s req_ids:[8] -DEBUG 06-24 20:15:27 [manager.py:391] -ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:27 lightllm_req_id:8 first_token_cost:209.57088470458984ms total_cost_time:209.61499214172363ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8526 prompt_cache_len:5151 prompt_cache_ratio:0.6041520056298382 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 -DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.10722780227661133 s -INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.10918569564819336 s -DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=173394053266153671737294083980024516414, time:1750767328.1564298s req_ids:[8] -DEBUG 06-24 20:15:28 [manager.py:391] -ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:206.79044723510742ms total_cost_time:206.8338394165039ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8527 prompt_cache_len:5151 prompt_cache_ratio:0.6040811539814707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 -DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.10726308822631836 s -INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.1092684268951416 s -DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=30303021994612302613101917295478603769, time:1750767328.369641s req_ids:[8] -DEBUG 06-24 20:15:28 [manager.py:391] -ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:207.13019371032715ms total_cost_time:207.17334747314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8528 prompt_cache_len:5151 prompt_cache_ratio:0.6040103189493433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 -DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.1081399917602539 s -INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.1101694107055664 s -DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=87859193108282997470079537069547101951, time:1750767328.5805612s req_ids:[8] -DEBUG 06-24 20:15:28 [manager.py:391] -ERROR 06-24 20:15:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:204.9391269683838ms total_cost_time:204.98085021972656ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:8529 prompt_cache_len:5151 prompt_cache_ratio:0.6039395005276117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 -DEBUG 06-24 20:15:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:28 [manager.py:224] router recive req id 8 cost time 0.3103761672973633 s -INFO 06-24 20:15:28 [manager.py:68] detokenization recv req id 8 cost time 0.31238365173339844 s -DEBUG 06-24 20:15:28 [manager.py:391] Prefill Batch: batch_id=192378186404137852813846886238068411766, time:1750767328.9998293s req_ids:[8] -DEBUG 06-24 20:15:28 [manager.py:391] -DEBUG 06-24 20:15:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 37060.934 tokens/s -DEBUG 06-24 20:15:29 [stats.py:37] Avg prompt tokens throughput: 37052.224 tokens/s -DEBUG 06-24 20:15:29 [stats.py:37] Avg generate tokens throughput: 8.709 tokens/s -ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:28 lightllm_req_id:8 first_token_cost:420.90392112731934ms total_cost_time:420.9468364715576ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8530 prompt_cache_len:5151 prompt_cache_ratio:0.6038686987104338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 -DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10927295684814453 s -INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.11128926277160645 s -DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=13630894034014213155189222552455811800, time:1750767329.2193623s req_ids:[8] -DEBUG 06-24 20:15:29 [manager.py:391] -ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:215.56591987609863ms total_cost_time:215.6083583831787ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8531 prompt_cache_len:5151 prompt_cache_ratio:0.6037979134919704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 -DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10687112808227539 s -INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.1089940071105957 s -DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=123939434798998087351274025107418030535, time:1750767329.4379287s req_ids:[8] -DEBUG 06-24 20:15:29 [manager.py:391] -ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:206.0568332672119ms total_cost_time:206.0995101928711ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8532 prompt_cache_len:5151 prompt_cache_ratio:0.6037271448663853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 -DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10761880874633789 s -INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.10955572128295898 s -DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=243310875721114746564180993787040486386, time:1750767329.6489604s req_ids:[8] -DEBUG 06-24 20:15:29 [manager.py:391] -ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:204.34260368347168ms total_cost_time:204.38599586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8533 prompt_cache_len:5151 prompt_cache_ratio:0.6036563928278449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 -DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:29 [manager.py:224] router recive req id 8 cost time 0.10699176788330078 s -INFO 06-24 20:15:29 [manager.py:68] detokenization recv req id 8 cost time 0.1089179515838623 s -INFO 06-24 20:15:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:15:29 [manager.py:391] Prefill Batch: batch_id=332983164824958923548316388721037697989, time:1750767329.867805s req_ids:[8] -DEBUG 06-24 20:15:29 [manager.py:391] -ERROR 06-24 20:15:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:212.85510063171387ms total_cost_time:212.89634704589844ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:8534 prompt_cache_len:5151 prompt_cache_ratio:0.603585657370518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 -DEBUG 06-24 20:15:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10807514190673828 s -INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.1099996566772461 s -DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=204690613152714575734141582294821453951, time:1750767330.0830824s req_ids:[8] -DEBUG 06-24 20:15:30 [manager.py:391] -ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:29 lightllm_req_id:8 first_token_cost:210.8631134033203ms total_cost_time:210.9203338623047ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:8535 prompt_cache_len:5151 prompt_cache_ratio:0.6035149384885764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 -DEBUG 06-24 20:15:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s -INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s -DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=264596307124406072009738350548654803927, time:1750767330.2997074s req_ids:[8] -DEBUG 06-24 20:15:30 [manager.py:391] -ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:205.33299446105957ms total_cost_time:205.37614822387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8536 prompt_cache_len:5151 prompt_cache_ratio:0.6034442361761949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 -DEBUG 06-24 20:15:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10949897766113281 s -INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.11140990257263184 s -DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=7656796507038843357386589394699898810, time:1750767330.5143342s req_ids:[8] -DEBUG 06-24 20:15:30 [manager.py:391] -ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:377.9163360595703ms total_cost_time:378.05795669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.14162063598632812ms prompt_token_num:8537 prompt_cache_len:5151 prompt_cache_ratio:0.6033735504275507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 -DEBUG 06-24 20:15:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:30 [manager.py:224] router recive req id 8 cost time 0.10921645164489746 s -INFO 06-24 20:15:30 [manager.py:68] detokenization recv req id 8 cost time 0.11147499084472656 s -DEBUG 06-24 20:15:30 [manager.py:391] Prefill Batch: batch_id=72463650261458093323445190087648720601, time:1750767330.8881588s req_ids:[8] -DEBUG 06-24 20:15:30 [manager.py:391] -ERROR 06-24 20:15:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:203.48834991455078ms total_cost_time:203.53174209594727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8538 prompt_cache_len:5151 prompt_cache_ratio:0.6033028812368236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 -DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10732603073120117 s -INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.10932707786560059 s -DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=294024890204422956578584808167364229279, time:1750767331.0994558s req_ids:[8] -DEBUG 06-24 20:15:31 [manager.py:391] -ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:30 lightllm_req_id:8 first_token_cost:202.6362419128418ms total_cost_time:202.67939567565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8539 prompt_cache_len:5151 prompt_cache_ratio:0.6032322285981965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 -DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.1081993579864502 s -INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.11025023460388184 s -DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=280495079524397457292074127742404385637, time:1750767331.3198676s req_ids:[8] -DEBUG 06-24 20:15:31 [manager.py:391] -ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:217.99111366271973ms total_cost_time:218.03593635559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8540 prompt_cache_len:5151 prompt_cache_ratio:0.6031615925058548 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 -DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10800290107727051 s -INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.10984945297241211 s -DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=96600855143595073290606236396454472021, time:1750767331.5370731s req_ids:[8] -DEBUG 06-24 20:15:31 [manager.py:391] -ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:209.59162712097168ms total_cost_time:209.63549613952637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8541 prompt_cache_len:5151 prompt_cache_ratio:0.6030909729539866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 -DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10858273506164551 s -INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.11062145233154297 s -DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=42693446784899497132833166372419739678, time:1750767331.7514043s req_ids:[8] -DEBUG 06-24 20:15:31 [manager.py:391] -ERROR 06-24 20:15:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:209.0129852294922ms total_cost_time:209.05709266662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8542 prompt_cache_len:5151 prompt_cache_ratio:0.603020369936783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 -DEBUG 06-24 20:15:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:31 [manager.py:224] router recive req id 8 cost time 0.10814070701599121 s -INFO 06-24 20:15:31 [manager.py:68] detokenization recv req id 8 cost time 0.11011576652526855 s -DEBUG 06-24 20:15:31 [manager.py:391] Prefill Batch: batch_id=215624332510421590446244100583823725723, time:1750767331.9646316s req_ids:[8] -DEBUG 06-24 20:15:31 [manager.py:391] -ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:31 lightllm_req_id:8 first_token_cost:210.8144760131836ms total_cost_time:210.85739135742188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8543 prompt_cache_len:5151 prompt_cache_ratio:0.6029497834484373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 -DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:32 [manager.py:224] router recive req id 8 cost time 0.3100240230560303 s -INFO 06-24 20:15:32 [manager.py:68] detokenization recv req id 8 cost time 0.3120403289794922 s -DEBUG 06-24 20:15:32 [manager.py:391] Prefill Batch: batch_id=46321486772111671361927148212392424674, time:1750767332.3850904s req_ids:[8] -DEBUG 06-24 20:15:32 [manager.py:391] -ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:417.1028137207031ms total_cost_time:417.1490669250488ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8544 prompt_cache_len:5151 prompt_cache_ratio:0.6028792134831461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 -DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:32 [manager.py:224] router recive req id 8 cost time 0.1090548038482666 s -INFO 06-24 20:15:32 [manager.py:68] detokenization recv req id 8 cost time 0.11089825630187988 s -DEBUG 06-24 20:15:32 [manager.py:391] Prefill Batch: batch_id=306033276156440412874305306476695504459, time:1750767332.6066794s req_ids:[8] -DEBUG 06-24 20:15:32 [manager.py:391] -ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:207.89813995361328ms total_cost_time:207.94153213500977ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8545 prompt_cache_len:5151 prompt_cache_ratio:0.6028086600351082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 -DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:32 [manager.py:224] router recive req id 8 cost time 0.1074361801147461 s -INFO 06-24 20:15:32 [manager.py:68] detokenization recv req id 8 cost time 0.10928082466125488 s -DEBUG 06-24 20:15:32 [manager.py:391] Prefill Batch: batch_id=20162927309447926211624322733699390612, time:1750767332.818703s req_ids:[8] -DEBUG 06-24 20:15:32 [manager.py:391] -ERROR 06-24 20:15:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:208.5130214691162ms total_cost_time:208.5549831390381ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8546 prompt_cache_len:5151 prompt_cache_ratio:0.6027381230985256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 -DEBUG 06-24 20:15:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10717391967773438 s -INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.10886669158935547 s -DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=249745067147078360916428441156672066786, time:1750767333.030117s req_ids:[8] -DEBUG 06-24 20:15:33 [manager.py:391] -ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:32 lightllm_req_id:8 first_token_cost:206.0108184814453ms total_cost_time:206.0544490814209ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8547 prompt_cache_len:5151 prompt_cache_ratio:0.6026676026676027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 -DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10731363296508789 s -INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.10934734344482422 s -DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=11482566865862535043272814229549764421, time:1750767333.2413735s req_ids:[8] -DEBUG 06-24 20:15:33 [manager.py:391] -ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:201.12967491149902ms total_cost_time:201.1730670928955ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8548 prompt_cache_len:5151 prompt_cache_ratio:0.6025970987365465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 -DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:33 [batch.py:51] router release req id 8 -INFO 06-24 20:15:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10761499404907227 s -INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.10944437980651855 s -DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=132949898404242696109846845492448455548, time:1750767333.4491472s req_ids:[8] -DEBUG 06-24 20:15:33 [manager.py:391] -ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:205.80339431762695ms total_cost_time:205.84821701049805ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8549 prompt_cache_len:5151 prompt_cache_ratio:0.6025266112995672 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 -DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:33 [manager.py:224] router recive req id 8 cost time 0.10796189308166504 s -INFO 06-24 20:15:33 [manager.py:68] detokenization recv req id 8 cost time 0.1101076602935791 s -DEBUG 06-24 20:15:33 [manager.py:391] Prefill Batch: batch_id=114858188229823419875342144611938573990, time:1750767333.6585143s req_ids:[8] -DEBUG 06-24 20:15:33 [manager.py:391] -ERROR 06-24 20:15:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:356.0502529144287ms total_cost_time:356.0957908630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8550 prompt_cache_len:5151 prompt_cache_ratio:0.6024561403508772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 -DEBUG 06-24 20:15:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10864830017089844 s -INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.11056327819824219 s -DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=190316705489918916240635013301405709730, time:1750767334.0184188s req_ids:[8] -DEBUG 06-24 20:15:34 [manager.py:391] -ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:33 lightllm_req_id:8 first_token_cost:204.8182487487793ms total_cost_time:204.89215850830078ms,out_token_counter:1 mean_per_token_cost_time: 0.07390975952148438ms prompt_token_num:8551 prompt_cache_len:5151 prompt_cache_ratio:0.6023856858846919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 -DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10746121406555176 s -INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.10939455032348633 s -DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=44094763010231987726551674294648442065, time:1750767334.2314978s req_ids:[8] -DEBUG 06-24 20:15:34 [manager.py:391] -ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:206.2854766845703ms total_cost_time:206.3298225402832ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8552 prompt_cache_len:5151 prompt_cache_ratio:0.6023152478952292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 -DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10814142227172852 s -INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.11009716987609863 s -DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=238787319704647890257745665277201401324, time:1750767334.445922s req_ids:[8] -DEBUG 06-24 20:15:34 [manager.py:391] -ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:207.10134506225586ms total_cost_time:207.14521408081055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8553 prompt_cache_len:5151 prompt_cache_ratio:0.60224482637671 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 -DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10856938362121582 s -INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.11056971549987793 s -DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=230193925894755696956863277133683705673, time:1750767334.656881s req_ids:[8] -DEBUG 06-24 20:15:34 [manager.py:391] -ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:203.9942741394043ms total_cost_time:204.0390968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8554 prompt_cache_len:5151 prompt_cache_ratio:0.6021744213233575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 -DEBUG 06-24 20:15:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:34 [manager.py:224] router recive req id 8 cost time 0.10873913764953613 s -INFO 06-24 20:15:34 [manager.py:68] detokenization recv req id 8 cost time 0.1106414794921875 s -DEBUG 06-24 20:15:34 [manager.py:391] Prefill Batch: batch_id=75850696276421864765907797025711940246, time:1750767334.8785403s req_ids:[8] -DEBUG 06-24 20:15:34 [manager.py:391] -ERROR 06-24 20:15:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:220.08633613586426ms total_cost_time:220.13044357299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8555 prompt_cache_len:5151 prompt_cache_ratio:0.602104032729398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 -DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10839223861694336 s -INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.1103212833404541 s -DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=249491654645543725449353112108620191805, time:1750767335.0942698s req_ids:[8] -DEBUG 06-24 20:15:35 [manager.py:391] -ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:34 lightllm_req_id:8 first_token_cost:221.77767753601074ms total_cost_time:221.82106971740723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8556 prompt_cache_len:5151 prompt_cache_ratio:0.6020336605890603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 -DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10730099678039551 s -INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.10916018486022949 s -DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=143019255305219443752567633993119414304, time:1750767335.3205013s req_ids:[8] -DEBUG 06-24 20:15:35 [manager.py:391] -ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:369.60816383361816ms total_cost_time:369.65084075927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8557 prompt_cache_len:5151 prompt_cache_ratio:0.6019633048965759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 -DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10713911056518555 s -INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.10909199714660645 s -DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=242926426628296345919738222321851993142, time:1750767335.692064s req_ids:[8] -DEBUG 06-24 20:15:35 [manager.py:391] -ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:200.57272911071777ms total_cost_time:200.636625289917ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:8558 prompt_cache_len:5151 prompt_cache_ratio:0.601892965646179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 -DEBUG 06-24 20:15:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:35 [manager.py:224] router recive req id 8 cost time 0.10735130310058594 s -INFO 06-24 20:15:35 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s -DEBUG 06-24 20:15:35 [manager.py:391] Prefill Batch: batch_id=297944609435722229786627409721087980954, time:1750767335.9061604s req_ids:[8] -DEBUG 06-24 20:15:35 [manager.py:391] -DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:189.9120807647705ms total_cost_time:189.9728775024414ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8559 prompt_cache_len:5151 prompt_cache_ratio:0.6018226428321065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 -DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10923027992248535 s -INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11106276512145996 s -DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=115144012956924170296528295967195715759, time:1750767336.0984185s req_ids:[8] -DEBUG 06-24 20:15:36 [manager.py:391] -ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:35 lightllm_req_id:8 first_token_cost:200.5321979522705ms total_cost_time:200.5774974822998ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8560 prompt_cache_len:5151 prompt_cache_ratio:0.6017523364485982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 -DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10886454582214355 s -INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11092972755432129 s -DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=320430095561094426093602546940470333338, time:1750767336.3137348s req_ids:[8] -DEBUG 06-24 20:15:36 [manager.py:391] -ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:214.25318717956543ms total_cost_time:214.2956256866455ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8561 prompt_cache_len:5151 prompt_cache_ratio:0.6016820464898961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 -DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10876703262329102 s -INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11062788963317871 s -DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=159549008973744229682345434415274231323, time:1750767336.5287848s req_ids:[8] -DEBUG 06-24 20:15:36 [manager.py:391] -ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:204.0703296661377ms total_cost_time:204.12921905517578ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:8562 prompt_cache_len:5151 prompt_cache_ratio:0.6016117729502453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 -DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10885500907897949 s -INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11072564125061035 s -DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=246482161281524109034759150350025120444, time:1750767336.7375507s req_ids:[8] -DEBUG 06-24 20:15:36 [manager.py:391] -ERROR 06-24 20:15:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:207.37338066101074ms total_cost_time:207.43584632873535ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:8563 prompt_cache_len:5151 prompt_cache_ratio:0.6015415158238935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 -DEBUG 06-24 20:15:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:36 [batch.py:51] router release req id 8 -INFO 06-24 20:15:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:36 [manager.py:224] router recive req id 8 cost time 0.10849761962890625 s -INFO 06-24 20:15:36 [manager.py:68] detokenization recv req id 8 cost time 0.11033082008361816 s -DEBUG 06-24 20:15:36 [manager.py:391] Prefill Batch: batch_id=281939244502849994131118807085798143521, time:1750767336.952223s req_ids:[8] -DEBUG 06-24 20:15:36 [manager.py:391] -ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:36 lightllm_req_id:8 first_token_cost:207.00788497924805ms total_cost_time:207.05389976501465ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8564 prompt_cache_len:5151 prompt_cache_ratio:0.6014712751050911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 -DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.10722565650939941 s -INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.10927534103393555 s -DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=321476571598713713911984069092578972050, time:1750767337.1646285s req_ids:[8] -DEBUG 06-24 20:15:37 [manager.py:391] -ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:207.30900764465332ms total_cost_time:207.3690891265869ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8565 prompt_cache_len:5151 prompt_cache_ratio:0.601401050788091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 -DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.10891103744506836 s -INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.11089110374450684 s -DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=301093304575456325421500402413931644072, time:1750767337.3827157s req_ids:[8] -DEBUG 06-24 20:15:37 [manager.py:391] -ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:212.432861328125ms total_cost_time:212.4931812286377ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8566 prompt_cache_len:5151 prompt_cache_ratio:0.6013308428671492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 -DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.10717988014221191 s -INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.10914850234985352 s -DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=199186451515677796896414973713591242148, time:1750767337.5962982s req_ids:[8] -DEBUG 06-24 20:15:37 [manager.py:391] -ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:209.48457717895508ms total_cost_time:209.54275131225586ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:8567 prompt_cache_len:5151 prompt_cache_ratio:0.6012606513365238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 -DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:37 [manager.py:224] router recive req id 8 cost time 0.11002993583679199 s -INFO 06-24 20:15:37 [manager.py:68] detokenization recv req id 8 cost time 0.11197090148925781 s -DEBUG 06-24 20:15:37 [manager.py:391] Prefill Batch: batch_id=243929967683984173325726418544622365672, time:1750767337.8114219s req_ids:[8] -DEBUG 06-24 20:15:37 [manager.py:391] -ERROR 06-24 20:15:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:207.54337310791016ms total_cost_time:207.63206481933594ms,out_token_counter:1 mean_per_token_cost_time: 0.08869171142578125ms prompt_token_num:8568 prompt_cache_len:5151 prompt_cache_ratio:0.6011904761904762 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 -DEBUG 06-24 20:15:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10728788375854492 s -INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.10916757583618164 s -DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=53004489249112506426435280025071636276, time:1750767338.0247083s req_ids:[8] -DEBUG 06-24 20:15:38 [manager.py:391] -ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:37 lightllm_req_id:8 first_token_cost:206.0554027557373ms total_cost_time:206.14099502563477ms,out_token_counter:1 mean_per_token_cost_time: 0.08559226989746094ms prompt_token_num:8569 prompt_cache_len:5151 prompt_cache_ratio:0.6011203174232699 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 -DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10764575004577637 s -INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.10944104194641113 s -DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=311776384963781421621041571655868517066, time:1750767338.2370148s req_ids:[8] -DEBUG 06-24 20:15:38 [manager.py:391] -ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:168.93625259399414ms total_cost_time:168.99585723876953ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:8570 prompt_cache_len:5151 prompt_cache_ratio:0.6010501750291716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 -DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s -INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.11005473136901855 s -DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=218580787118050884884352546886344104858, time:1750767338.4094918s req_ids:[8] -DEBUG 06-24 20:15:38 [manager.py:391] -ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:193.14956665039062ms total_cost_time:193.21012496948242ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:8571 prompt_cache_len:5151 prompt_cache_ratio:0.6009800490024502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 -DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s -INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.11043143272399902 s -DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=170231516057042826970898834271448194058, time:1750767338.6086905s req_ids:[8] -DEBUG 06-24 20:15:38 [manager.py:391] -ERROR 06-24 20:15:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:355.67188262939453ms total_cost_time:355.7169437408447ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8572 prompt_cache_len:5151 prompt_cache_ratio:0.6009099393373775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 -DEBUG 06-24 20:15:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:38 [manager.py:224] router recive req id 8 cost time 0.10757780075073242 s -INFO 06-24 20:15:38 [manager.py:68] detokenization recv req id 8 cost time 0.10950732231140137 s -DEBUG 06-24 20:15:38 [manager.py:391] Prefill Batch: batch_id=210511229865047557556495184117328315576, time:1750767338.9716892s req_ids:[8] -DEBUG 06-24 20:15:38 [manager.py:391] -ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:15:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 36572.764 tokens/s -DEBUG 06-24 20:15:39 [stats.py:37] Avg prompt tokens throughput: 36564.113 tokens/s -DEBUG 06-24 20:15:39 [stats.py:37] Avg generate tokens throughput: 8.650 tokens/s -INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:38 lightllm_req_id:8 first_token_cost:203.41086387634277ms total_cost_time:203.45425605773926ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8573 prompt_cache_len:5151 prompt_cache_ratio:0.6008398460282282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 -DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10854673385620117 s -INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.11056852340698242 s -DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=75056801216580952783662098251538391641, time:1750767339.1838658s req_ids:[8] -DEBUG 06-24 20:15:39 [manager.py:391] -ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:204.8492431640625ms total_cost_time:204.8947811126709ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8574 prompt_cache_len:5151 prompt_cache_ratio:0.6007697690692793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 -DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10810518264770508 s -INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.11005187034606934 s -DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=163924599713165710140080591958966886862, time:1750767339.393993s req_ids:[8] -DEBUG 06-24 20:15:39 [manager.py:391] -ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:173.9063262939453ms total_cost_time:173.96783828735352ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8575 prompt_cache_len:5151 prompt_cache_ratio:0.6006997084548105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 -DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10794901847839355 s -INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.10953044891357422 s -DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=89551969239288181716671943866777561815, time:1750767339.575326s req_ids:[8] -DEBUG 06-24 20:15:39 [manager.py:391] -ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:202.82888412475586ms total_cost_time:202.87275314331055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8576 prompt_cache_len:5151 prompt_cache_ratio:0.6006296641791045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 -DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10716629028320312 s -INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.10916686058044434 s -DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=28138723432161160509557309431187563911, time:1750767339.7832804s req_ids:[8] -DEBUG 06-24 20:15:39 [manager.py:391] -ERROR 06-24 20:15:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:204.3466567993164ms total_cost_time:204.3898105621338ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8577 prompt_cache_len:5151 prompt_cache_ratio:0.6005596362364463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 -DEBUG 06-24 20:15:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:39 [manager.py:224] router recive req id 8 cost time 0.10756325721740723 s -INFO 06-24 20:15:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096506118774414 s -DEBUG 06-24 20:15:39 [manager.py:391] Prefill Batch: batch_id=104645560876462385456351091684670193820, time:1750767339.9949799s req_ids:[8] -DEBUG 06-24 20:15:39 [manager.py:391] -ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:39 lightllm_req_id:8 first_token_cost:206.44450187683105ms total_cost_time:206.48789405822754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8578 prompt_cache_len:5151 prompt_cache_ratio:0.6004896246211238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 -DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10828137397766113 s -INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11016154289245605 s -DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=40205074096559871779339078023710601417, time:1750767340.2076926s req_ids:[8] -DEBUG 06-24 20:15:40 [manager.py:391] -ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:208.19854736328125ms total_cost_time:208.25743675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:8579 prompt_cache_len:5151 prompt_cache_ratio:0.6004196293274274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 -DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:40 [batch.py:51] router release req id 8 -INFO 06-24 20:15:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10867428779602051 s -INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11059784889221191 s -DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=299444953691497921732522428330361829093, time:1750767340.421328s req_ids:[8] -DEBUG 06-24 20:15:40 [manager.py:391] -ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:205.75284957885742ms total_cost_time:205.7960033416748ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8580 prompt_cache_len:5151 prompt_cache_ratio:0.6003496503496504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 -DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10962677001953125 s -INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11159539222717285 s -DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=298113432649806000816871885751269445462, time:1750767340.6340387s req_ids:[8] -DEBUG 06-24 20:15:40 [manager.py:391] -ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:208.50086212158203ms total_cost_time:208.54473114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8581 prompt_cache_len:5151 prompt_cache_ratio:0.6002796876820883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 -DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:40 [manager.py:224] router recive req id 8 cost time 0.10837984085083008 s -INFO 06-24 20:15:40 [manager.py:68] detokenization recv req id 8 cost time 0.11042141914367676 s -DEBUG 06-24 20:15:40 [manager.py:391] Prefill Batch: batch_id=67606458072795178270782355267272470688, time:1750767340.8462453s req_ids:[8] -DEBUG 06-24 20:15:40 [manager.py:391] -ERROR 06-24 20:15:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:206.89797401428223ms total_cost_time:206.96020126342773ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:8582 prompt_cache_len:5151 prompt_cache_ratio:0.6002097413190398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 -DEBUG 06-24 20:15:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s -INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.11038827896118164 s -DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=59716419677110937178594468400947965854, time:1750767341.058243s req_ids:[8] -DEBUG 06-24 20:15:41 [manager.py:391] -ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:40 lightllm_req_id:8 first_token_cost:204.41794395446777ms total_cost_time:204.46348190307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8583 prompt_cache_len:5151 prompt_cache_ratio:0.600139811254806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 -DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s -INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.10997295379638672 s -DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=219318013247674326003980485475133496555, time:1750767341.2689927s req_ids:[8] -DEBUG 06-24 20:15:41 [manager.py:391] -DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:371.25253677368164ms total_cost_time:371.29664421081543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8584 prompt_cache_len:5151 prompt_cache_ratio:0.6000698974836906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 -DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.10729312896728516 s -INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.10914826393127441 s -DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=20858375911226063548689145554477107170, time:1750767341.6436179s req_ids:[8] -DEBUG 06-24 20:15:41 [manager.py:391] -ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:202.15272903442383ms total_cost_time:202.21424102783203ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8585 prompt_cache_len:5151 prompt_cache_ratio:0.6 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 -DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:41 [manager.py:224] router recive req id 8 cost time 0.10736250877380371 s -INFO 06-24 20:15:41 [manager.py:68] detokenization recv req id 8 cost time 0.10925126075744629 s -DEBUG 06-24 20:15:41 [manager.py:391] Prefill Batch: batch_id=200731905809083425961225210310501200279, time:1750767341.8565917s req_ids:[8] -DEBUG 06-24 20:15:41 [manager.py:391] -ERROR 06-24 20:15:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:209.62905883789062ms total_cost_time:209.67388153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8586 prompt_cache_len:5151 prompt_cache_ratio:0.5999301187980434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 -DEBUG 06-24 20:15:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.10831928253173828 s -INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11024999618530273 s -DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=27311638682989539438306748475180700865, time:1750767342.0713277s req_ids:[8] -DEBUG 06-24 20:15:42 [manager.py:391] -ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:41 lightllm_req_id:8 first_token_cost:204.44130897521973ms total_cost_time:204.4839859008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8587 prompt_cache_len:5151 prompt_cache_ratio:0.5998602538721323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 -DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.11011385917663574 s -INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11210441589355469 s -DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=329406751854207152957844446401031356306, time:1750767342.2797034s req_ids:[8] -DEBUG 06-24 20:15:42 [manager.py:391] -ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:204.72025871276855ms total_cost_time:204.76222038269043ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8588 prompt_cache_len:5151 prompt_cache_ratio:0.5997904052165812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 -DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.1084134578704834 s -INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11047482490539551 s -DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=103453187026908672081498915049816639244, time:1750767342.4918904s req_ids:[8] -DEBUG 06-24 20:15:42 [manager.py:391] -ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:206.23493194580078ms total_cost_time:206.27903938293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8589 prompt_cache_len:5151 prompt_cache_ratio:0.5997205728257073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 -DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.10736870765686035 s -INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.10936188697814941 s -DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=222679028378662759729419216246031457564, time:1750767342.704289s req_ids:[8] -DEBUG 06-24 20:15:42 [manager.py:391] -ERROR 06-24 20:15:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:209.7301483154297ms total_cost_time:209.77306365966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8590 prompt_cache_len:5151 prompt_cache_ratio:0.5996507566938301 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 -DEBUG 06-24 20:15:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:42 [manager.py:224] router recive req id 8 cost time 0.10755157470703125 s -INFO 06-24 20:15:42 [manager.py:68] detokenization recv req id 8 cost time 0.11011815071105957 s -DEBUG 06-24 20:15:42 [manager.py:391] Prefill Batch: batch_id=252183364096878671407651159214985288889, time:1750767342.9198287s req_ids:[8] -DEBUG 06-24 20:15:42 [manager.py:391] -ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:42 lightllm_req_id:8 first_token_cost:207.7035903930664ms total_cost_time:207.7462673187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8591 prompt_cache_len:5151 prompt_cache_ratio:0.5995809568152718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 -DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10781407356262207 s -INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.1098785400390625 s -DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=237381923391058047345873965357739994921, time:1750767343.135878s req_ids:[8] -DEBUG 06-24 20:15:43 [manager.py:391] -ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:209.63120460510254ms total_cost_time:209.67507362365723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8592 prompt_cache_len:5151 prompt_cache_ratio:0.5995111731843575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 -DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10786104202270508 s -INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.10969352722167969 s -DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=95811665106767276770797142424348274625, time:1750767343.3543055s req_ids:[8] -DEBUG 06-24 20:15:43 [manager.py:391] -ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:214.27607536315918ms total_cost_time:214.32065963745117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8593 prompt_cache_len:5151 prompt_cache_ratio:0.5994414057954148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 -DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10824370384216309 s -INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.11003923416137695 s -DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=164390461516707769514663475345394406834, time:1750767343.5694385s req_ids:[8] -DEBUG 06-24 20:15:43 [manager.py:391] -ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:206.56585693359375ms total_cost_time:206.61067962646484ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8594 prompt_cache_len:5151 prompt_cache_ratio:0.599371654642774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 -DEBUG 06-24 20:15:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:43 [manager.py:224] router recive req id 8 cost time 0.10872650146484375 s -INFO 06-24 20:15:43 [manager.py:68] detokenization recv req id 8 cost time 0.1109933853149414 s -DEBUG 06-24 20:15:43 [manager.py:391] Prefill Batch: batch_id=222259092305952009232631941802004750841, time:1750767343.7823122s req_ids:[8] -DEBUG 06-24 20:15:43 [manager.py:391] -ERROR 06-24 20:15:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:207.03792572021484ms total_cost_time:207.08227157592773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8595 prompt_cache_len:5151 prompt_cache_ratio:0.5993019197207679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 -DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.3097259998321533 s -INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.31168603897094727 s -DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=188805496688941522141198763281802615338, time:1750767344.1944442s req_ids:[8] -DEBUG 06-24 20:15:44 [manager.py:391] -ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:43 lightllm_req_id:8 first_token_cost:411.13734245300293ms total_cost_time:411.18359565734863ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:8596 prompt_cache_len:5151 prompt_cache_ratio:0.599232201023732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 -DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.1083517074584961 s -INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.11099696159362793 s -DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=225682591460667085788851878430834158903, time:1750767344.4108748s req_ids:[8] -DEBUG 06-24 20:15:44 [manager.py:391] -ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:206.96783065795898ms total_cost_time:207.0138454437256ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8597 prompt_cache_len:5151 prompt_cache_ratio:0.5991624985460045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 -DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.10881209373474121 s -INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.11085844039916992 s -DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=196682584126845284413674532490968553575, time:1750767344.6248553s req_ids:[8] -DEBUG 06-24 20:15:44 [manager.py:391] -ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:207.03721046447754ms total_cost_time:207.08250999450684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8598 prompt_cache_len:5151 prompt_cache_ratio:0.599092812281926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 -DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:44 [manager.py:224] router recive req id 8 cost time 0.1081552505493164 s -INFO 06-24 20:15:44 [manager.py:68] detokenization recv req id 8 cost time 0.11017847061157227 s -DEBUG 06-24 20:15:44 [manager.py:391] Prefill Batch: batch_id=223858395401425842925905640274644645428, time:1750767344.8375568s req_ids:[8] -DEBUG 06-24 20:15:44 [manager.py:391] -ERROR 06-24 20:15:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:204.51116561889648ms total_cost_time:204.55336570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8599 prompt_cache_len:5151 prompt_cache_ratio:0.5990231422258402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 -DEBUG 06-24 20:15:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10856413841247559 s -INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.1106114387512207 s -DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=20850219675375486767706871389917731487, time:1750767345.0451617s req_ids:[8] -DEBUG 06-24 20:15:45 [manager.py:391] -ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:44 lightllm_req_id:8 first_token_cost:202.56972312927246ms total_cost_time:202.61216163635254ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8600 prompt_cache_len:5151 prompt_cache_ratio:0.5989534883720931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 -DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10725831985473633 s -INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.1091611385345459 s -DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=293515760268833438742091879849543317162, time:1750767345.2556036s req_ids:[8] -DEBUG 06-24 20:15:45 [manager.py:391] -ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:204.83994483947754ms total_cost_time:204.88524436950684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8601 prompt_cache_len:5151 prompt_cache_ratio:0.5988838507150331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 -DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10742855072021484 s -INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.1093144416809082 s -DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=84380401665391759316985189299352539421, time:1750767345.4661472s req_ids:[8] -DEBUG 06-24 20:15:45 [manager.py:391] -ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:204.4520378112793ms total_cost_time:204.4963836669922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8602 prompt_cache_len:5151 prompt_cache_ratio:0.5988142292490118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 -DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10878443717956543 s -INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s -DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=79237542835125964022601807237201019938, time:1750767345.6788263s req_ids:[8] -DEBUG 06-24 20:15:45 [manager.py:391] -ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:205.54804801940918ms total_cost_time:205.59382438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8603 prompt_cache_len:5151 prompt_cache_ratio:0.5987446239683831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 -DEBUG 06-24 20:15:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:45 [manager.py:224] router recive req id 8 cost time 0.10827112197875977 s -INFO 06-24 20:15:45 [manager.py:68] detokenization recv req id 8 cost time 0.11027789115905762 s -DEBUG 06-24 20:15:45 [manager.py:391] Prefill Batch: batch_id=100013118033941836532123354560368255615, time:1750767345.8921201s req_ids:[8] -DEBUG 06-24 20:15:45 [manager.py:391] -ERROR 06-24 20:15:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:207.0302963256836ms total_cost_time:207.07416534423828ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8604 prompt_cache_len:5151 prompt_cache_ratio:0.5986750348675035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 -DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.10726284980773926 s -INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.10929536819458008 s -DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=325703733726895029902083958698920612783, time:1750767346.1054127s req_ids:[8] -DEBUG 06-24 20:15:46 [manager.py:391] -ERROR 06-24 20:15:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:45 lightllm_req_id:8 first_token_cost:202.17108726501465ms total_cost_time:202.193021774292ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:8605 prompt_cache_len:5151 prompt_cache_ratio:0.5986054619407322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 -DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.3094618320465088 s -INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.3114497661590576 s -DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=3890015597716843859054928432341133673, time:1750767346.5108368s req_ids:[8] -DEBUG 06-24 20:15:46 [manager.py:391] -ERROR 06-24 20:15:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 first_token_cost:406.827449798584ms total_cost_time:406.87060356140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8606 prompt_cache_len:5151 prompt_cache_ratio:0.5985359051824308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 -DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.10828638076782227 s -INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.11067986488342285 s -DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=192015898212516578439792107589247682627, time:1750767346.7272089s req_ids:[8] -DEBUG 06-24 20:15:46 [manager.py:391] -ERROR 06-24 20:15:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 first_token_cost:207.12661743164062ms total_cost_time:207.1692943572998ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8607 prompt_cache_len:5151 prompt_cache_ratio:0.5984663645869641 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 -DEBUG 06-24 20:15:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:46 [batch.py:51] router release req id 8 -INFO 06-24 20:15:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:46 [manager.py:224] router recive req id 8 cost time 0.10808467864990234 s -INFO 06-24 20:15:46 [manager.py:68] detokenization recv req id 8 cost time 0.10943388938903809 s -DEBUG 06-24 20:15:46 [manager.py:391] Prefill Batch: batch_id=56221773896007466417818778916375672063, time:1750767346.941499s req_ids:[8] -DEBUG 06-24 20:15:46 [manager.py:391] -ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:46 lightllm_req_id:8 first_token_cost:208.40144157409668ms total_cost_time:208.44459533691406ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8608 prompt_cache_len:5151 prompt_cache_ratio:0.5983968401486989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 -DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10941267013549805 s -INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.11085391044616699 s -DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=145506447045775515652484870123089572861, time:1750767347.1563153s req_ids:[8] -DEBUG 06-24 20:15:47 [manager.py:391] -ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:206.8309783935547ms total_cost_time:206.88486099243164ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:8609 prompt_cache_len:5151 prompt_cache_ratio:0.5983273318620049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 -DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10898399353027344 s -INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.11042118072509766 s -DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=147354888719752725993282252457687051201, time:1750767347.370712s req_ids:[8] -DEBUG 06-24 20:15:47 [manager.py:391] -ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:208.44554901123047ms total_cost_time:208.47272872924805ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8610 prompt_cache_len:5151 prompt_cache_ratio:0.5982578397212543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 -DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10700583457946777 s -INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.10828399658203125 s -INFO 06-24 20:15:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=98893103449333088770465065003625350011, time:1750767347.5875823s req_ids:[8] -DEBUG 06-24 20:15:47 [manager.py:391] -ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:209.64741706848145ms total_cost_time:209.67507362365723ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8611 prompt_cache_len:5151 prompt_cache_ratio:0.5981883637208222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 -DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:47 [manager.py:224] router recive req id 8 cost time 0.10707592964172363 s -INFO 06-24 20:15:47 [manager.py:68] detokenization recv req id 8 cost time 0.10837316513061523 s -DEBUG 06-24 20:15:47 [manager.py:391] Prefill Batch: batch_id=134816064425607312544796702415860207762, time:1750767347.8028426s req_ids:[8] -DEBUG 06-24 20:15:47 [manager.py:391] -ERROR 06-24 20:15:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:209.52367782592773ms total_cost_time:209.55181121826172ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8612 prompt_cache_len:5151 prompt_cache_ratio:0.5981189038550859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 -DEBUG 06-24 20:15:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.1069183349609375 s -INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10822081565856934 s -DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=127545821474840010215304932526415060315, time:1750767348.016962s req_ids:[8] -DEBUG 06-24 20:15:48 [manager.py:391] -ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:47 lightllm_req_id:8 first_token_cost:210.07442474365234ms total_cost_time:210.10351181030273ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:8613 prompt_cache_len:5151 prompt_cache_ratio:0.5980494601184256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 -DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.1067662239074707 s -INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10810685157775879 s -DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=76707537922744350734486545071765588201, time:1750767348.2327988s req_ids:[8] -DEBUG 06-24 20:15:48 [manager.py:391] -ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:209.5181941986084ms total_cost_time:209.54489707946777ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8614 prompt_cache_len:5151 prompt_cache_ratio:0.597980032505224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 -DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.10725879669189453 s -INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10860252380371094 s -DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=132889472464794341510608824653082723641, time:1750767348.4482346s req_ids:[8] -DEBUG 06-24 20:15:48 [manager.py:391] -ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:213.3018970489502ms total_cost_time:213.32907676696777ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8615 prompt_cache_len:5151 prompt_cache_ratio:0.5979106210098665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 -DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.10726690292358398 s -INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10848641395568848 s -DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=27353820628027354836925368631840326932, time:1750767348.664918s req_ids:[8] -DEBUG 06-24 20:15:48 [manager.py:391] -ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:214.43581581115723ms total_cost_time:214.49732780456543ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8616 prompt_cache_len:5151 prompt_cache_ratio:0.5978412256267409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 -DEBUG 06-24 20:15:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:48 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s -INFO 06-24 20:15:48 [manager.py:68] detokenization recv req id 8 cost time 0.10919308662414551 s -DEBUG 06-24 20:15:48 [manager.py:391] Prefill Batch: batch_id=80834130963619343026104698173639495931, time:1750767348.8791428s req_ids:[8] -DEBUG 06-24 20:15:48 [manager.py:391] -ERROR 06-24 20:15:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:203.58729362487793ms total_cost_time:203.6144733428955ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8617 prompt_cache_len:5151 prompt_cache_ratio:0.597771846350238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 -DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.10778212547302246 s -INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.10976290702819824 s -DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=327947773210663783207989291382117020807, time:1750767349.091376s req_ids:[8] -DEBUG 06-24 20:15:49 [manager.py:391] -DEBUG 06-24 20:15:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 38559.720 tokens/s -DEBUG 06-24 20:15:49 [stats.py:37] Avg prompt tokens throughput: 38550.850 tokens/s -DEBUG 06-24 20:15:49 [stats.py:37] Avg generate tokens throughput: 8.870 tokens/s -ERROR 06-24 20:15:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:48 lightllm_req_id:8 first_token_cost:212.01086044311523ms total_cost_time:212.0378017425537ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8618 prompt_cache_len:5151 prompt_cache_ratio:0.5977024831747505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 -DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.10765194892883301 s -INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.10959124565124512 s -DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=195110582542508504166370787372578000150, time:1750767349.3075764s req_ids:[8] -DEBUG 06-24 20:15:49 [manager.py:391] -ERROR 06-24 20:15:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 first_token_cost:211.98606491088867ms total_cost_time:212.04590797424316ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8619 prompt_cache_len:5151 prompt_cache_ratio:0.5976331360946746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 -DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.3115818500518799 s -INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.31351208686828613 s -DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=4675993680884649036898655705398425481, time:1750767349.7262797s req_ids:[8] -DEBUG 06-24 20:15:49 [manager.py:391] -ERROR 06-24 20:15:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 first_token_cost:415.8141613006592ms total_cost_time:415.84014892578125ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:8620 prompt_cache_len:5151 prompt_cache_ratio:0.5975638051044083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 -DEBUG 06-24 20:15:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:49 [manager.py:224] router recive req id 8 cost time 0.1069791316986084 s -INFO 06-24 20:15:49 [manager.py:68] detokenization recv req id 8 cost time 0.10884737968444824 s -DEBUG 06-24 20:15:49 [manager.py:391] Prefill Batch: batch_id=336463105600932342479240478027388247716, time:1750767349.9503822s req_ids:[8] -DEBUG 06-24 20:15:49 [manager.py:391] -ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:49 lightllm_req_id:8 first_token_cost:213.47546577453613ms total_cost_time:213.5019302368164ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8621 prompt_cache_len:5151 prompt_cache_ratio:0.5974944901983529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 -DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10730576515197754 s -INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.10967278480529785 s -DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=177511773517448391718539318962146872220, time:1750767350.1670706s req_ids:[8] -DEBUG 06-24 20:15:50 [manager.py:391] -ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:210.69049835205078ms total_cost_time:210.71720123291016ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8622 prompt_cache_len:5151 prompt_cache_ratio:0.5974251913709117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 -DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10719943046569824 s -INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.109100341796875 s -DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=210839971689128467243315078449690056794, time:1750767350.3833394s req_ids:[8] -DEBUG 06-24 20:15:50 [manager.py:391] -ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:211.01021766662598ms total_cost_time:211.03644371032715ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:8623 prompt_cache_len:5151 prompt_cache_ratio:0.5973559086164908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 -DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.1074674129486084 s -INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.10946178436279297 s -DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=35467724990460301039167846945245487855, time:1750767350.597482s req_ids:[8] -DEBUG 06-24 20:15:50 [manager.py:391] -ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:211.30824089050293ms total_cost_time:211.3358974456787ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8624 prompt_cache_len:5151 prompt_cache_ratio:0.5972866419294991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 -DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10677528381347656 s -INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.1085672378540039 s -DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=187635642155877287331304612064724676632, time:1750767350.8128176s req_ids:[8] -DEBUG 06-24 20:15:50 [manager.py:391] -ERROR 06-24 20:15:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:166.40019416809082ms total_cost_time:166.4261817932129ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:8625 prompt_cache_len:5151 prompt_cache_ratio:0.5972173913043478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 -DEBUG 06-24 20:15:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:50 [manager.py:224] router recive req id 8 cost time 0.10712671279907227 s -INFO 06-24 20:15:50 [manager.py:68] detokenization recv req id 8 cost time 0.1091756820678711 s -DEBUG 06-24 20:15:50 [manager.py:391] Prefill Batch: batch_id=145838419759000090318337819669025115810, time:1750767350.9809206s req_ids:[8] -DEBUG 06-24 20:15:50 [manager.py:391] -ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:50 lightllm_req_id:8 first_token_cost:204.6494483947754ms total_cost_time:204.67591285705566ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8626 prompt_cache_len:5151 prompt_cache_ratio:0.597148156735451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 -DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10735964775085449 s -INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.1092691421508789 s -DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=305902874484456029029440808145185695119, time:1750767351.1936886s req_ids:[8] -DEBUG 06-24 20:15:51 [manager.py:391] -DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:211.80319786071777ms total_cost_time:211.83109283447266ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8627 prompt_cache_len:5151 prompt_cache_ratio:0.597078938217225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 -DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10698461532592773 s -INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.10890388488769531 s -DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=80570532653570611367847552903431975869, time:1750767351.4103572s req_ids:[8] -DEBUG 06-24 20:15:51 [manager.py:391] -ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:211.49182319641113ms total_cost_time:211.51995658874512ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8628 prompt_cache_len:5151 prompt_cache_ratio:0.597009735744089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 -DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10724449157714844 s -INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s -DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=10089758097853426141610710946775075217, time:1750767351.6261542s req_ids:[8] -DEBUG 06-24 20:15:51 [manager.py:391] -ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:211.49396896362305ms total_cost_time:211.52114868164062ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8629 prompt_cache_len:5151 prompt_cache_ratio:0.5969405493104647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 -DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:51 [manager.py:224] router recive req id 8 cost time 0.10691308975219727 s -INFO 06-24 20:15:51 [manager.py:68] detokenization recv req id 8 cost time 0.10881423950195312 s -DEBUG 06-24 20:15:51 [manager.py:391] Prefill Batch: batch_id=285404613080003683239576750062296706496, time:1750767351.8433146s req_ids:[8] -DEBUG 06-24 20:15:51 [manager.py:391] -ERROR 06-24 20:15:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:214.09273147583008ms total_cost_time:214.12229537963867ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:8630 prompt_cache_len:5151 prompt_cache_ratio:0.5968713789107764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 -DEBUG 06-24 20:15:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10705399513244629 s -INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.1089177131652832 s -DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=61198116048990528694271310173769103177, time:1750767352.0603604s req_ids:[8] -DEBUG 06-24 20:15:52 [manager.py:391] -ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:51 lightllm_req_id:8 first_token_cost:377.07042694091797ms total_cost_time:377.09665298461914ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:8631 prompt_cache_len:5151 prompt_cache_ratio:0.5968022245394509 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 -DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10684728622436523 s -INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.10882735252380371 s -DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=89910616842636114760477870414949685048, time:1750767352.4374418s req_ids:[8] -DEBUG 06-24 20:15:52 [manager.py:391] -ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:205.5208683013916ms total_cost_time:205.54852485656738ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8632 prompt_cache_len:5151 prompt_cache_ratio:0.5967330861909175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 -DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10734868049621582 s -INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.1092081069946289 s -DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=266617003662529885924990680195403131116, time:1750767352.6528072s req_ids:[8] -DEBUG 06-24 20:15:52 [manager.py:391] -ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:212.4636173248291ms total_cost_time:212.49079704284668ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8633 prompt_cache_len:5151 prompt_cache_ratio:0.5966639638596085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 -DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:52 [manager.py:224] router recive req id 8 cost time 0.10766434669494629 s -INFO 06-24 20:15:52 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s -DEBUG 06-24 20:15:52 [manager.py:391] Prefill Batch: batch_id=236414599979207992362211421766389776763, time:1750767352.8662076s req_ids:[8] -DEBUG 06-24 20:15:52 [manager.py:391] -ERROR 06-24 20:15:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:211.75885200500488ms total_cost_time:211.78698539733887ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8634 prompt_cache_len:5151 prompt_cache_ratio:0.5965948575399583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 -DEBUG 06-24 20:15:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10729670524597168 s -INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s -DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=155870433669323759513484788182705282360, time:1750767353.0824475s req_ids:[8] -DEBUG 06-24 20:15:53 [manager.py:391] -ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:52 lightllm_req_id:8 first_token_cost:209.57326889038086ms total_cost_time:209.59877967834473ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:8635 prompt_cache_len:5151 prompt_cache_ratio:0.5965257672264042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 -INFO 06-24 20:15:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:15:53 [statics_utils.py:24] mean first cost: 228.65261757368168 ms -INFO 06-24 20:15:53 [statics_utils.py:24] mean per token cost: 0.07839269675614903 ms -DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10674834251403809 s -INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10862064361572266 s -DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=304191109677564794397697662014726846233, time:1750767353.2970173s req_ids:[8] -DEBUG 06-24 20:15:53 [manager.py:391] -ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:211.03453636169434ms total_cost_time:211.06410026550293ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:8636 prompt_cache_len:5151 prompt_cache_ratio:0.5964566929133859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 -DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10773897171020508 s -INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.1096646785736084 s -DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=310087298978583217011911034363535990094, time:1750767353.5119967s req_ids:[8] -DEBUG 06-24 20:15:53 [manager.py:391] -ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:210.88242530822754ms total_cost_time:210.91032028198242ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8637 prompt_cache_len:5151 prompt_cache_ratio:0.5963876345953456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 -DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10732030868530273 s -INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10922122001647949 s -DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=155887026690505235392210963667584678908, time:1750767353.7277205s req_ids:[8] -DEBUG 06-24 20:15:53 [manager.py:391] -ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:211.92693710327148ms total_cost_time:211.95435523986816ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8638 prompt_cache_len:5151 prompt_cache_ratio:0.5963185922667285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 -DEBUG 06-24 20:15:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:53 [manager.py:224] router recive req id 8 cost time 0.10707974433898926 s -INFO 06-24 20:15:53 [manager.py:68] detokenization recv req id 8 cost time 0.10883784294128418 s -DEBUG 06-24 20:15:53 [manager.py:391] Prefill Batch: batch_id=141046997045262487931318167717964286773, time:1750767353.943042s req_ids:[8] -DEBUG 06-24 20:15:53 [manager.py:391] -ERROR 06-24 20:15:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:172.56975173950195ms total_cost_time:172.59693145751953ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8639 prompt_cache_len:5151 prompt_cache_ratio:0.5962495659219818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 -DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.10758209228515625 s -INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.10924577713012695 s -DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=126523020401569277934202558477030988759, time:1750767354.1188173s req_ids:[8] -DEBUG 06-24 20:15:54 [manager.py:391] -ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:53 lightllm_req_id:8 first_token_cost:168.1206226348877ms total_cost_time:168.14708709716797ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8640 prompt_cache_len:5151 prompt_cache_ratio:0.5961805555555556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 -DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.1068108081817627 s -INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.10861063003540039 s -DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=325895951143817156589602412438011297519, time:1750767354.2898452s req_ids:[8] -DEBUG 06-24 20:15:54 [manager.py:391] -ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:354.04253005981445ms total_cost_time:354.0680408477783ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:8641 prompt_cache_len:5151 prompt_cache_ratio:0.5961115611619026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 -DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:54 [batch.py:51] router release req id 8 -INFO 06-24 20:15:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.10723423957824707 s -INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.1091611385345459 s -DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=335950042744354890384831743976553383240, time:1750767354.647487s req_ids:[8] -DEBUG 06-24 20:15:54 [manager.py:391] -ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:203.71031761169434ms total_cost_time:203.7370204925537ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8642 prompt_cache_len:5151 prompt_cache_ratio:0.5960425827354779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 -DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:54 [manager.py:224] router recive req id 8 cost time 0.10797977447509766 s -INFO 06-24 20:15:54 [manager.py:68] detokenization recv req id 8 cost time 0.10989880561828613 s -DEBUG 06-24 20:15:54 [manager.py:391] Prefill Batch: batch_id=75479750132709722285116054173934217801, time:1750767354.85957s req_ids:[8] -DEBUG 06-24 20:15:54 [manager.py:391] -ERROR 06-24 20:15:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:209.81478691101074ms total_cost_time:209.8410129547119ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:8643 prompt_cache_len:5151 prompt_cache_ratio:0.5959736202707393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 -DEBUG 06-24 20:15:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10711431503295898 s -INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10898470878601074 s -DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=296958158523771365378641166377561885391, time:1750767355.0735836s req_ids:[8] -DEBUG 06-24 20:15:55 [manager.py:391] -ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:54 lightllm_req_id:8 first_token_cost:210.34884452819824ms total_cost_time:210.37626266479492ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8644 prompt_cache_len:5151 prompt_cache_ratio:0.5959046737621472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 -DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10794830322265625 s -INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10994148254394531 s -DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=102357261601462566273949875584226125655, time:1750767355.3002286s req_ids:[8] -DEBUG 06-24 20:15:55 [manager.py:391] -ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:223.7377166748047ms total_cost_time:223.76012802124023ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:8645 prompt_cache_len:5151 prompt_cache_ratio:0.5958357432041642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 -DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10712385177612305 s -INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s -DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=211027751720498640499274939534724818704, time:1750767355.519101s req_ids:[8] -DEBUG 06-24 20:15:55 [manager.py:391] -ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:212.7857208251953ms total_cost_time:212.8145694732666ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:8646 prompt_cache_len:5151 prompt_cache_ratio:0.5957668285912561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 -DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10724210739135742 s -INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10933160781860352 s -DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=168972539811369224694438541514754513152, time:1750767355.734947s req_ids:[8] -DEBUG 06-24 20:15:55 [manager.py:391] -ERROR 06-24 20:15:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:211.46059036254883ms total_cost_time:211.4884853363037ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8647 prompt_cache_len:5151 prompt_cache_ratio:0.5956979299178906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 -DEBUG 06-24 20:15:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:55 [manager.py:224] router recive req id 8 cost time 0.10657978057861328 s -INFO 06-24 20:15:55 [manager.py:68] detokenization recv req id 8 cost time 0.10850024223327637 s -DEBUG 06-24 20:15:55 [manager.py:391] Prefill Batch: batch_id=102156733295605558217938263674384558340, time:1750767355.956836s req_ids:[8] -DEBUG 06-24 20:15:55 [manager.py:391] -ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:55 lightllm_req_id:8 first_token_cost:217.99802780151367ms total_cost_time:218.02377700805664ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:8648 prompt_cache_len:5151 prompt_cache_ratio:0.5956290471785384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 -DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:56 [manager.py:224] router recive req id 8 cost time 0.10700392723083496 s -INFO 06-24 20:15:56 [manager.py:68] detokenization recv req id 8 cost time 0.10892295837402344 s -DEBUG 06-24 20:15:56 [manager.py:391] Prefill Batch: batch_id=45415003266625811108577610172499233463, time:1750767356.1747308s req_ids:[8] -DEBUG 06-24 20:15:56 [manager.py:391] -ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:210.5996608734131ms total_cost_time:210.62707901000977ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8649 prompt_cache_len:5151 prompt_cache_ratio:0.5955601803676726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 -DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:56 [manager.py:224] router recive req id 8 cost time 0.30879640579223633 s -INFO 06-24 20:15:56 [manager.py:68] detokenization recv req id 8 cost time 0.31078219413757324 s -DEBUG 06-24 20:15:56 [manager.py:391] Prefill Batch: batch_id=238811191861258323942676912626546299049, time:1750767356.5888302s req_ids:[8] -DEBUG 06-24 20:15:56 [manager.py:391] -ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:415.82345962524414ms total_cost_time:415.90118408203125ms,out_token_counter:1 mean_per_token_cost_time: 0.07772445678710938ms prompt_token_num:8650 prompt_cache_len:5151 prompt_cache_ratio:0.5954913294797688 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 -DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:56 [manager.py:224] router recive req id 8 cost time 0.10813140869140625 s -INFO 06-24 20:15:56 [manager.py:68] detokenization recv req id 8 cost time 0.11083292961120605 s -DEBUG 06-24 20:15:56 [manager.py:391] Prefill Batch: batch_id=265007676071601060982558305109637889232, time:1750767356.8096282s req_ids:[8] -DEBUG 06-24 20:15:56 [manager.py:391] -ERROR 06-24 20:15:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:207.02075958251953ms total_cost_time:207.0484161376953ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8651 prompt_cache_len:5151 prompt_cache_ratio:0.5954224945093053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 -DEBUG 06-24 20:15:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10718894004821777 s -INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.10924386978149414 s -DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=322469694216646632210091238024816269063, time:1750767357.0232863s req_ids:[8] -DEBUG 06-24 20:15:57 [manager.py:391] -ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:56 lightllm_req_id:8 first_token_cost:211.81774139404297ms total_cost_time:211.86208724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8652 prompt_cache_len:5151 prompt_cache_ratio:0.5953536754507628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 -DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.1085367202758789 s -INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.11073803901672363 s -DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=56816474326029314301617780521708812376, time:1750767357.237419s req_ids:[8] -DEBUG 06-24 20:15:57 [manager.py:391] -ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:204.32496070861816ms total_cost_time:204.36906814575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8653 prompt_cache_len:5151 prompt_cache_ratio:0.5952848722986247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 -DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10855603218078613 s -INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.11053133010864258 s -DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=25346880510727909014291123664824867652, time:1750767357.4468966s req_ids:[8] -DEBUG 06-24 20:15:57 [manager.py:391] -ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:204.2715549468994ms total_cost_time:204.3159008026123ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8654 prompt_cache_len:5151 prompt_cache_ratio:0.5952160850473769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 -DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10801410675048828 s -INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.10997891426086426 s -DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=281232410824694217759969696144858928011, time:1750767357.6576092s req_ids:[8] -DEBUG 06-24 20:15:57 [manager.py:391] -ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:206.39872550964355ms total_cost_time:206.44235610961914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8655 prompt_cache_len:5151 prompt_cache_ratio:0.5951473136915078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 -DEBUG 06-24 20:15:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:57 [manager.py:224] router recive req id 8 cost time 0.10784459114074707 s -INFO 06-24 20:15:57 [manager.py:68] detokenization recv req id 8 cost time 0.10973072052001953 s -DEBUG 06-24 20:15:57 [manager.py:391] Prefill Batch: batch_id=240856594904333817151454836605617472116, time:1750767357.8764405s req_ids:[8] -DEBUG 06-24 20:15:57 [manager.py:391] -ERROR 06-24 20:15:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:215.72566032409668ms total_cost_time:215.76905250549316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8656 prompt_cache_len:5151 prompt_cache_ratio:0.5950785582255084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 -DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.109344482421875 s -INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.11129450798034668 s -DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=299768019684239505048008319246407127342, time:1750767358.0925808s req_ids:[8] -DEBUG 06-24 20:15:58 [manager.py:391] -ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:57 lightllm_req_id:8 first_token_cost:376.6744136810303ms total_cost_time:376.71828269958496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8657 prompt_cache_len:5151 prompt_cache_ratio:0.595009818643872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 -DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.10904812812805176 s -INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.1110067367553711 s -DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=264986679659096398218824789331350931220, time:1750767358.4745119s req_ids:[8] -DEBUG 06-24 20:15:58 [manager.py:391] -DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 first_token_cost:199.97930526733398ms total_cost_time:200.02412796020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8658 prompt_cache_len:5151 prompt_cache_ratio:0.5949410949410949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 -DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.10773396492004395 s -INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.10977816581726074 s -DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=162980380089519838335840101450101071635, time:1750767358.6924164s req_ids:[8] -DEBUG 06-24 20:15:58 [manager.py:391] -ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 first_token_cost:217.44441986083984ms total_cost_time:217.48900413513184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8659 prompt_cache_len:5151 prompt_cache_ratio:0.5948723871116757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 -DEBUG 06-24 20:15:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:58 [manager.py:224] router recive req id 8 cost time 0.10896730422973633 s -INFO 06-24 20:15:58 [manager.py:68] detokenization recv req id 8 cost time 0.11114668846130371 s -DEBUG 06-24 20:15:58 [manager.py:391] Prefill Batch: batch_id=63886982681376039620085478176367845467, time:1750767358.9095316s req_ids:[8] -DEBUG 06-24 20:15:58 [manager.py:391] -ERROR 06-24 20:15:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:58 lightllm_req_id:8 first_token_cost:208.07814598083496ms total_cost_time:208.12058448791504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8660 prompt_cache_len:5151 prompt_cache_ratio:0.5948036951501154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 -DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.10745525360107422 s -INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.10947155952453613 s -DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=281904837146979810894191269082872227960, time:1750767359.1242416s req_ids:[8] -DEBUG 06-24 20:15:59 [manager.py:391] -DEBUG 06-24 20:15:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 37039.050 tokens/s -DEBUG 06-24 20:15:59 [stats.py:37] Avg prompt tokens throughput: 37030.478 tokens/s -DEBUG 06-24 20:15:59 [stats.py:37] Avg generate tokens throughput: 8.572 tokens/s -ERROR 06-24 20:15:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:208.8186740875244ms total_cost_time:208.8615894317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8661 prompt_cache_len:5151 prompt_cache_ratio:0.5947350190509179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 -DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.10762786865234375 s -INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.10952901840209961 s -DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=262071714936887769086296028453352968731, time:1750767359.3404036s req_ids:[8] -DEBUG 06-24 20:15:59 [manager.py:391] -ERROR 06-24 20:15:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:209.1360092163086ms total_cost_time:209.1810703277588ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8662 prompt_cache_len:5151 prompt_cache_ratio:0.5946663588085892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 -DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.10784721374511719 s -INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.1098017692565918 s -DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=184024350559996861939459891170327758071, time:1750767359.5564532s req_ids:[8] -DEBUG 06-24 20:15:59 [manager.py:391] -ERROR 06-24 20:15:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:15:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:15:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:205.8548927307129ms total_cost_time:205.89900016784668ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8663 prompt_cache_len:5151 prompt_cache_ratio:0.5945977144176382 mtp_avg_token_per_step:1.0 -INFO 06-24 20:15:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 -DEBUG 06-24 20:15:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:15:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:15:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:15:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:15:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:15:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:15:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:15:59 [manager.py:224] router recive req id 8 cost time 0.310746431350708 s -INFO 06-24 20:15:59 [manager.py:68] detokenization recv req id 8 cost time 0.31266188621520996 s -DEBUG 06-24 20:15:59 [manager.py:391] Prefill Batch: batch_id=215917235149705184108547697784927502634, time:1750767359.9797094s req_ids:[8] -DEBUG 06-24 20:15:59 [manager.py:391] -DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:15:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:15:59 lightllm_req_id:8 first_token_cost:425.08935928344727ms total_cost_time:425.13227462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8664 prompt_cache_len:5151 prompt_cache_ratio:0.5945290858725761 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 -DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10819792747497559 s -INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.11010050773620605 s -DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=136147127035164528318686398312869560033, time:1750767360.1970098s req_ids:[8] -DEBUG 06-24 20:16:00 [manager.py:391] -ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:208.2345485687256ms total_cost_time:208.27960968017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8665 prompt_cache_len:5151 prompt_cache_ratio:0.594460473167917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 -DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10801577568054199 s -INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.11006474494934082 s -DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=180758557791862045687178387349191666464, time:1750767360.410471s req_ids:[8] -DEBUG 06-24 20:16:00 [manager.py:391] -ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:209.89727973937988ms total_cost_time:209.94138717651367ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8666 prompt_cache_len:5151 prompt_cache_ratio:0.5943918762981768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 -DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10722923278808594 s -INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.10900115966796875 s -DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=264424419732114430214278448015456873755, time:1750767360.6251872s req_ids:[8] -DEBUG 06-24 20:16:00 [manager.py:391] -ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:207.0600986480713ms total_cost_time:207.10420608520508ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8667 prompt_cache_len:5151 prompt_cache_ratio:0.5943232952578748 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 -DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:00 [manager.py:224] router recive req id 8 cost time 0.10905838012695312 s -INFO 06-24 20:16:00 [manager.py:68] detokenization recv req id 8 cost time 0.11100101470947266 s -DEBUG 06-24 20:16:00 [manager.py:391] Prefill Batch: batch_id=318207036420058163763350942030782280447, time:1750767360.837229s req_ids:[8] -DEBUG 06-24 20:16:00 [manager.py:391] -ERROR 06-24 20:16:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:207.85975456237793ms total_cost_time:207.8876495361328ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8668 prompt_cache_len:5151 prompt_cache_ratio:0.5942547300415321 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 -DEBUG 06-24 20:16:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10723304748535156 s -INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.10925006866455078 s -DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=127163040213379171266489653912401960636, time:1750767361.0534785s req_ids:[8] -DEBUG 06-24 20:16:01 [manager.py:391] -ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:00 lightllm_req_id:8 first_token_cost:209.38897132873535ms total_cost_time:209.41734313964844ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:8669 prompt_cache_len:5151 prompt_cache_ratio:0.5941861806436729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 -DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10711216926574707 s -INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.10894656181335449 s -DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=255821967515769890219147620404690002674, time:1750767361.2678485s req_ids:[8] -DEBUG 06-24 20:16:01 [manager.py:391] -ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:379.76789474487305ms total_cost_time:379.7943592071533ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:8670 prompt_cache_len:5151 prompt_cache_ratio:0.5941176470588235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 -DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10705971717834473 s -INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.10810732841491699 s -DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=149495825451803480385567228029064539290, time:1750767361.6488042s req_ids:[8] -DEBUG 06-24 20:16:01 [manager.py:391] -ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:209.14316177368164ms total_cost_time:209.17105674743652ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8671 prompt_cache_len:5151 prompt_cache_ratio:0.5940491292815131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 -DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:01 [manager.py:224] router recive req id 8 cost time 0.10714292526245117 s -INFO 06-24 20:16:01 [manager.py:68] detokenization recv req id 8 cost time 0.1089487075805664 s -INFO 06-24 20:16:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:16:01 [manager.py:391] Prefill Batch: batch_id=189302794967851091212629870885199358219, time:1750767361.8647485s req_ids:[8] -DEBUG 06-24 20:16:01 [manager.py:391] -ERROR 06-24 20:16:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:210.1898193359375ms total_cost_time:210.21056175231934ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8672 prompt_cache_len:5151 prompt_cache_ratio:0.5939806273062731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 -DEBUG 06-24 20:16:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.10576415061950684 s -INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.10767340660095215 s -DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=181424321466035071202026380552985488323, time:1750767362.0779436s req_ids:[8] -DEBUG 06-24 20:16:02 [manager.py:391] -ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:01 lightllm_req_id:8 first_token_cost:209.28287506103516ms total_cost_time:209.32698249816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8673 prompt_cache_len:5151 prompt_cache_ratio:0.5939121411276375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 -DEBUG 06-24 20:16:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.10823345184326172 s -INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.11013150215148926 s -DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=113611843110585115971311669170005158087, time:1750767362.2917545s req_ids:[8] -DEBUG 06-24 20:16:02 [manager.py:391] -ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:210.61372756958008ms total_cost_time:210.65950393676758ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8674 prompt_cache_len:5151 prompt_cache_ratio:0.593843670740143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 -DEBUG 06-24 20:16:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.11094880104064941 s -INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.11320042610168457 s -DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=4001469884290988289042580270407328078, time:1750767362.507861s req_ids:[8] -DEBUG 06-24 20:16:02 [manager.py:391] -ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:209.55348014831543ms total_cost_time:209.5966339111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8675 prompt_cache_len:5151 prompt_cache_ratio:0.5937752161383285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 -DEBUG 06-24 20:16:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:02 [manager.py:224] router recive req id 8 cost time 0.10718703269958496 s -INFO 06-24 20:16:02 [manager.py:68] detokenization recv req id 8 cost time 0.10923242568969727 s -DEBUG 06-24 20:16:02 [manager.py:391] Prefill Batch: batch_id=330765037508280068294476077613368300209, time:1750767362.723427s req_ids:[8] -DEBUG 06-24 20:16:02 [manager.py:391] -ERROR 06-24 20:16:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:383.8794231414795ms total_cost_time:383.9235305786133ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8676 prompt_cache_len:5151 prompt_cache_ratio:0.5937067773167358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 -DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10804200172424316 s -INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.11053204536437988 s -DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=177213500750246411712767087063684572205, time:1750767363.1107733s req_ids:[8] -DEBUG 06-24 20:16:03 [manager.py:391] -ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:02 lightllm_req_id:8 first_token_cost:200.9139060974121ms total_cost_time:200.9563446044922ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8677 prompt_cache_len:5151 prompt_cache_ratio:0.593638354269909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 -DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10839653015136719 s -INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.11030101776123047 s -DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=203163806683013812110058414340153594354, time:1750767363.3197696s req_ids:[8] -DEBUG 06-24 20:16:03 [manager.py:391] -ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:209.35416221618652ms total_cost_time:209.39898490905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8678 prompt_cache_len:5151 prompt_cache_ratio:0.5935699469923945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 -DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.1096808910369873 s -INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.111602783203125 s -DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=5067769829712468248354023280183657293, time:1750767363.5347543s req_ids:[8] -DEBUG 06-24 20:16:03 [manager.py:391] -ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:201.6754150390625ms total_cost_time:201.7204761505127ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8679 prompt_cache_len:5151 prompt_cache_ratio:0.5935015554787418 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 -DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10851836204528809 s -INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.11040997505187988 s -DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=258089597404816309814586125524604010746, time:1750767363.7430842s req_ids:[8] -DEBUG 06-24 20:16:03 [manager.py:391] -ERROR 06-24 20:16:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:208.34922790527344ms total_cost_time:208.39619636535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:8680 prompt_cache_len:5151 prompt_cache_ratio:0.5934331797235023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 -DEBUG 06-24 20:16:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:03 [manager.py:224] router recive req id 8 cost time 0.10818028450012207 s -INFO 06-24 20:16:03 [manager.py:68] detokenization recv req id 8 cost time 0.1101083755493164 s -DEBUG 06-24 20:16:03 [manager.py:391] Prefill Batch: batch_id=204218946128987072182407181653151540657, time:1750767363.9575243s req_ids:[8] -DEBUG 06-24 20:16:03 [manager.py:391] -ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:03 lightllm_req_id:8 first_token_cost:207.94034004211426ms total_cost_time:207.98420906066895ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8681 prompt_cache_len:5151 prompt_cache_ratio:0.5933648197212302 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 -DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:04 [manager.py:224] router recive req id 8 cost time 0.10734105110168457 s -INFO 06-24 20:16:04 [manager.py:68] detokenization recv req id 8 cost time 0.10925555229187012 s -DEBUG 06-24 20:16:04 [manager.py:391] Prefill Batch: batch_id=236517805150392480682572797221682854369, time:1750767364.1699874s req_ids:[8] -DEBUG 06-24 20:16:04 [manager.py:391] -ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:206.07757568359375ms total_cost_time:206.12168312072754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8682 prompt_cache_len:5151 prompt_cache_ratio:0.5932964754664823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 -DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:04 [manager.py:224] router recive req id 8 cost time 0.10804462432861328 s -INFO 06-24 20:16:04 [manager.py:68] detokenization recv req id 8 cost time 0.10991764068603516 s -DEBUG 06-24 20:16:04 [manager.py:391] Prefill Batch: batch_id=51431258918605310321494633830180937774, time:1750767364.3896003s req_ids:[8] -DEBUG 06-24 20:16:04 [manager.py:391] -ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:212.7225399017334ms total_cost_time:212.7666473388672ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8683 prompt_cache_len:5151 prompt_cache_ratio:0.5932281469538178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 -DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:04 [manager.py:224] router recive req id 8 cost time 0.10983657836914062 s -INFO 06-24 20:16:04 [manager.py:68] detokenization recv req id 8 cost time 0.11180520057678223 s -DEBUG 06-24 20:16:04 [manager.py:391] Prefill Batch: batch_id=170874834403489074314604561210507021655, time:1750767364.6024904s req_ids:[8] -DEBUG 06-24 20:16:04 [manager.py:391] -ERROR 06-24 20:16:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:209.40160751342773ms total_cost_time:209.44571495056152ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8684 prompt_cache_len:5151 prompt_cache_ratio:0.5931598341777983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 -DEBUG 06-24 20:16:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.30855894088745117 s -INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.3106086254119873 s -DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=310247974285068922102516162120012122234, time:1750767365.0220683s req_ids:[8] -DEBUG 06-24 20:16:05 [manager.py:391] -ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:04 lightllm_req_id:8 first_token_cost:419.85535621643066ms total_cost_time:419.88277435302734ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:8685 prompt_cache_len:5151 prompt_cache_ratio:0.5930915371329879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 -DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.10738539695739746 s -INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10921978950500488 s -DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=207372117307222087664790296364762701120, time:1750767365.2452528s req_ids:[8] -DEBUG 06-24 20:16:05 [manager.py:391] -ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:213.49501609802246ms total_cost_time:213.52052688598633ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:8686 prompt_cache_len:5151 prompt_cache_ratio:0.5930232558139535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 -DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.1069488525390625 s -INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10882735252380371 s -DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=169609836848961795597468318677719612174, time:1750767365.4632072s req_ids:[8] -DEBUG 06-24 20:16:05 [manager.py:391] -ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:211.52901649475098ms total_cost_time:211.55667304992676ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8687 prompt_cache_len:5151 prompt_cache_ratio:0.5929549902152642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 -DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.10489583015441895 s -INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10698080062866211 s -DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=202239432366594529834013516959295320576, time:1750767365.6761682s req_ids:[8] -DEBUG 06-24 20:16:05 [manager.py:391] -ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:207.41510391235352ms total_cost_time:207.4434757232666ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:8688 prompt_cache_len:5151 prompt_cache_ratio:0.5928867403314917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 -DEBUG 06-24 20:16:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:05 [manager.py:224] router recive req id 8 cost time 0.10578298568725586 s -INFO 06-24 20:16:05 [manager.py:68] detokenization recv req id 8 cost time 0.10781383514404297 s -DEBUG 06-24 20:16:05 [manager.py:391] Prefill Batch: batch_id=288751712017526861861479764673698867775, time:1750767365.8899243s req_ids:[8] -DEBUG 06-24 20:16:05 [manager.py:391] -ERROR 06-24 20:16:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:208.9378833770752ms total_cost_time:208.96553993225098ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8689 prompt_cache_len:5151 prompt_cache_ratio:0.5928185061572102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 -DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10756659507751465 s -INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.1094825267791748 s -DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=208920756717776334734611819740183739442, time:1750767366.1011784s req_ids:[8] -DEBUG 06-24 20:16:06 [manager.py:391] -ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:05 lightllm_req_id:8 first_token_cost:203.86075973510742ms total_cost_time:203.8886547088623ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8690 prompt_cache_len:5151 prompt_cache_ratio:0.5927502876869966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 -DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10629487037658691 s -INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.10841989517211914 s -DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=259455445063889171371942406201898698242, time:1750767366.3110995s req_ids:[8] -DEBUG 06-24 20:16:06 [manager.py:391] -ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:210.07609367370605ms total_cost_time:210.10375022888184ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8691 prompt_cache_len:5151 prompt_cache_ratio:0.5926820849154297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 -DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10675621032714844 s -INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.10857462882995605 s -DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=32362421047188359885756907501820259508, time:1750767366.5277376s req_ids:[8] -DEBUG 06-24 20:16:06 [manager.py:391] -ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:209.73801612854004ms total_cost_time:209.77544784545898ms,out_token_counter:1 mean_per_token_cost_time: 0.03743171691894531ms prompt_token_num:8692 prompt_cache_len:5151 prompt_cache_ratio:0.5926138978370916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 -DEBUG 06-24 20:16:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:06 [manager.py:224] router recive req id 8 cost time 0.10706329345703125 s -INFO 06-24 20:16:06 [manager.py:68] detokenization recv req id 8 cost time 0.10959291458129883 s -DEBUG 06-24 20:16:06 [manager.py:391] Prefill Batch: batch_id=311100661761634716503769196727668022713, time:1750767366.742244s req_ids:[8] -DEBUG 06-24 20:16:06 [manager.py:391] -ERROR 06-24 20:16:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:370.6338405609131ms total_cost_time:370.65863609313965ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:8693 prompt_cache_len:5151 prompt_cache_ratio:0.5925457264465662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 -DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10666203498840332 s -INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10868978500366211 s -DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=79784031661188739511220751542289004333, time:1750767367.114693s req_ids:[8] -DEBUG 06-24 20:16:07 [manager.py:391] -ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:06 lightllm_req_id:8 first_token_cost:205.0039768218994ms total_cost_time:205.0473690032959ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8694 prompt_cache_len:5151 prompt_cache_ratio:0.5924775707384403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 -DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10744500160217285 s -INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10947108268737793 s -DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=257767445152528213482133776975009908613, time:1750767367.3265533s req_ids:[8] -DEBUG 06-24 20:16:07 [manager.py:391] -DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:205.64031600952148ms total_cost_time:205.68490028381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8695 prompt_cache_len:5151 prompt_cache_ratio:0.592409430707303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 -DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10780167579650879 s -INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10973429679870605 s -DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=195625561088872498217914411626738598735, time:1750767367.5370066s req_ids:[8] -DEBUG 06-24 20:16:07 [manager.py:391] -ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:202.6519775390625ms total_cost_time:202.6960849761963ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8696 prompt_cache_len:5151 prompt_cache_ratio:0.5923413063477461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 -DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10866999626159668 s -INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.11024069786071777 s -DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=19283396263920187241399721999610692776, time:1750767367.755712s req_ids:[8] -DEBUG 06-24 20:16:07 [manager.py:391] -ERROR 06-24 20:16:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:217.79799461364746ms total_cost_time:217.84019470214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8697 prompt_cache_len:5151 prompt_cache_ratio:0.5922731976543636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 -DEBUG 06-24 20:16:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:07 [manager.py:224] router recive req id 8 cost time 0.10722088813781738 s -INFO 06-24 20:16:07 [manager.py:68] detokenization recv req id 8 cost time 0.10918879508972168 s -DEBUG 06-24 20:16:07 [manager.py:391] Prefill Batch: batch_id=271790284896158510476850575334084442174, time:1750767367.9705822s req_ids:[8] -DEBUG 06-24 20:16:07 [manager.py:391] -ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:07 lightllm_req_id:8 first_token_cost:206.75325393676758ms total_cost_time:206.79831504821777ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8698 prompt_cache_len:5151 prompt_cache_ratio:0.5922051046217521 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 -DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10728669166564941 s -INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.10923266410827637 s -DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=47382662291830107211040676635290520277, time:1750767368.1816206s req_ids:[8] -DEBUG 06-24 20:16:08 [manager.py:391] -ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:204.76174354553223ms total_cost_time:204.80585098266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8699 prompt_cache_len:5151 prompt_cache_ratio:0.5921370272445109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 -DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10866141319274902 s -INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.11075639724731445 s -DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=302629048100371886877782436519795339402, time:1750767368.3906238s req_ids:[8] -DEBUG 06-24 20:16:08 [manager.py:391] -ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:202.11386680603027ms total_cost_time:202.15988159179688ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8700 prompt_cache_len:5151 prompt_cache_ratio:0.5920689655172414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 -DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10893964767456055 s -INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.11090612411499023 s -DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=303501947060614020763465538572715567884, time:1750767368.5989063s req_ids:[8] -DEBUG 06-24 20:16:08 [manager.py:391] -ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:204.0235996246338ms total_cost_time:204.06651496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8701 prompt_cache_len:5151 prompt_cache_ratio:0.5920009194345478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 -DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:08 [manager.py:224] router recive req id 8 cost time 0.10833120346069336 s -INFO 06-24 20:16:08 [manager.py:68] detokenization recv req id 8 cost time 0.11029338836669922 s -DEBUG 06-24 20:16:08 [manager.py:391] Prefill Batch: batch_id=315477166867868578492856780249458172545, time:1750767368.8109736s req_ids:[8] -DEBUG 06-24 20:16:08 [manager.py:391] -ERROR 06-24 20:16:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:210.93106269836426ms total_cost_time:210.97636222839355ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8702 prompt_cache_len:5151 prompt_cache_ratio:0.5919328889910366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 -DEBUG 06-24 20:16:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s -INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.10979247093200684 s -DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=16482676512270489144640571868669405872, time:1750767369.038684s req_ids:[8] -DEBUG 06-24 20:16:09 [manager.py:391] -ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:16:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 35897.968 tokens/s -DEBUG 06-24 20:16:09 [stats.py:37] Avg prompt tokens throughput: 35889.602 tokens/s -DEBUG 06-24 20:16:09 [stats.py:37] Avg generate tokens throughput: 8.366 tokens/s -INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:08 lightllm_req_id:8 first_token_cost:377.9129981994629ms total_cost_time:377.96616554260254ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:8703 prompt_cache_len:5151 prompt_cache_ratio:0.5918648741813168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 -DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s -INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.11006641387939453 s -DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=131724145680361051239260274794207620583, time:1750767369.4084675s req_ids:[8] -DEBUG 06-24 20:16:09 [manager.py:391] -ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:203.66716384887695ms total_cost_time:203.70984077453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8704 prompt_cache_len:5151 prompt_cache_ratio:0.591796875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 -DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.10864090919494629 s -INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.11057400703430176 s -DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=103997066129633895867027092551755400213, time:1750767369.6216373s req_ids:[8] -DEBUG 06-24 20:16:09 [manager.py:391] -ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:206.18152618408203ms total_cost_time:206.2244415283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8705 prompt_cache_len:5151 prompt_cache_ratio:0.5917288914417002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 -DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:09 [manager.py:224] router recive req id 8 cost time 0.1069650650024414 s -INFO 06-24 20:16:09 [manager.py:68] detokenization recv req id 8 cost time 0.10903596878051758 s -DEBUG 06-24 20:16:09 [manager.py:391] Prefill Batch: batch_id=194549958552717845118952086581843546921, time:1750767369.8354504s req_ids:[8] -DEBUG 06-24 20:16:09 [manager.py:391] -ERROR 06-24 20:16:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:212.04662322998047ms total_cost_time:212.07237243652344ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:8706 prompt_cache_len:5151 prompt_cache_ratio:0.5916609235010337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 -DEBUG 06-24 20:16:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10604190826416016 s -INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10821986198425293 s -DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=46121748165978154648408797380860218320, time:1750767370.052102s req_ids:[8] -DEBUG 06-24 20:16:10 [manager.py:391] -ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:09 lightllm_req_id:8 first_token_cost:210.37936210632324ms total_cost_time:210.40773391723633ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:8707 prompt_cache_len:5151 prompt_cache_ratio:0.5915929711726198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 -DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10545992851257324 s -INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10731244087219238 s -DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=329168963753702867377021078792865332110, time:1750767370.2668214s req_ids:[8] -DEBUG 06-24 20:16:10 [manager.py:391] -ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:205.35802841186523ms total_cost_time:205.3837776184082ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:8708 prompt_cache_len:5151 prompt_cache_ratio:0.5915250344510795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 -DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10583877563476562 s -INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10796618461608887 s -DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=233384485912848265270322361903112607844, time:1750767370.4785018s req_ids:[8] -DEBUG 06-24 20:16:10 [manager.py:391] -ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:211.05480194091797ms total_cost_time:211.08341217041016ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:8709 prompt_cache_len:5151 prompt_cache_ratio:0.5914571133310369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 -DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10612893104553223 s -INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10837912559509277 s -DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=7420920920516343220003201347939572463, time:1750767370.6944659s req_ids:[8] -DEBUG 06-24 20:16:10 [manager.py:391] -ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:209.88798141479492ms total_cost_time:209.9156379699707ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8710 prompt_cache_len:5151 prompt_cache_ratio:0.5913892078071182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 -DEBUG 06-24 20:16:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:10 [manager.py:224] router recive req id 8 cost time 0.10608386993408203 s -INFO 06-24 20:16:10 [manager.py:68] detokenization recv req id 8 cost time 0.10830497741699219 s -DEBUG 06-24 20:16:10 [manager.py:391] Prefill Batch: batch_id=243535940201473379212920217087159912199, time:1750767370.910673s req_ids:[8] -DEBUG 06-24 20:16:10 [manager.py:391] -ERROR 06-24 20:16:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:10 lightllm_req_id:8 first_token_cost:211.90643310546875ms total_cost_time:211.93337440490723ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8711 prompt_cache_len:5151 prompt_cache_ratio:0.5913213178739525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 -DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.3087458610534668 s -INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.31102561950683594 s -DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=144063290390221686119286330991390126674, time:1750767371.3406572s req_ids:[8] -DEBUG 06-24 20:16:11 [manager.py:391] -ERROR 06-24 20:16:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:428.07936668395996ms total_cost_time:428.10654640197754ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8712 prompt_cache_len:5151 prompt_cache_ratio:0.5912534435261708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 -DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.10510563850402832 s -INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.10730385780334473 s -DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=19249680141597300110959663883363574542, time:1750767371.5593305s req_ids:[8] -DEBUG 06-24 20:16:11 [manager.py:391] -ERROR 06-24 20:16:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:212.04686164855957ms total_cost_time:212.07404136657715ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:8713 prompt_cache_len:5151 prompt_cache_ratio:0.591185584758407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 -DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.10614824295043945 s -INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.10812520980834961 s -DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=172177723434462251239210485406265618413, time:1750767371.7754364s req_ids:[8] -DEBUG 06-24 20:16:11 [manager.py:391] -ERROR 06-24 20:16:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:209.10215377807617ms total_cost_time:209.13004875183105ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:8714 prompt_cache_len:5151 prompt_cache_ratio:0.5911177415652972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 -DEBUG 06-24 20:16:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:11 [manager.py:224] router recive req id 8 cost time 0.10571885108947754 s -INFO 06-24 20:16:11 [manager.py:68] detokenization recv req id 8 cost time 0.10768508911132812 s -DEBUG 06-24 20:16:11 [manager.py:391] Prefill Batch: batch_id=223909422759655011471442408260330310183, time:1750767371.9887822s req_ids:[8] -DEBUG 06-24 20:16:11 [manager.py:391] -ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:11 lightllm_req_id:8 first_token_cost:207.62324333190918ms total_cost_time:207.65018463134766ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8715 prompt_cache_len:5151 prompt_cache_ratio:0.5910499139414802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 -DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10627436637878418 s -INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10730338096618652 s -DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=271852047108543187908820001499860862915, time:1750767372.2010179s req_ids:[8] -DEBUG 06-24 20:16:12 [manager.py:391] -ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:208.5425853729248ms total_cost_time:208.5702419281006ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8716 prompt_cache_len:5151 prompt_cache_ratio:0.590982101881597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 -DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10498785972595215 s -INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10691118240356445 s -INFO 06-24 20:16:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=187256827203914294803876434169418556722, time:1750767372.4131129s req_ids:[8] -DEBUG 06-24 20:16:12 [manager.py:391] -ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:208.62054824829102ms total_cost_time:208.6472511291504ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8717 prompt_cache_len:5151 prompt_cache_ratio:0.5909143053802914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 -DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10538864135742188 s -INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10729742050170898 s -DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=13884611575273157329040532088828212280, time:1750767372.6283948s req_ids:[8] -DEBUG 06-24 20:16:12 [manager.py:391] -ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:208.63986015319824ms total_cost_time:208.66751670837402ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:8718 prompt_cache_len:5151 prompt_cache_ratio:0.5908465244322092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 -DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:12 [manager.py:224] router recive req id 8 cost time 0.10469317436218262 s -INFO 06-24 20:16:12 [manager.py:68] detokenization recv req id 8 cost time 0.10655355453491211 s -DEBUG 06-24 20:16:12 [manager.py:391] Prefill Batch: batch_id=140476871476550883239163342624988036280, time:1750767372.8416803s req_ids:[8] -DEBUG 06-24 20:16:12 [manager.py:391] -ERROR 06-24 20:16:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:210.04438400268555ms total_cost_time:210.07108688354492ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:8719 prompt_cache_len:5151 prompt_cache_ratio:0.5907787590319991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 -DEBUG 06-24 20:16:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.1052546501159668 s -INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10753989219665527 s -DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=332791027690318538794781537968684689773, time:1750767373.0555527s req_ids:[8] -DEBUG 06-24 20:16:13 [manager.py:391] -ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:12 lightllm_req_id:8 first_token_cost:364.9423122406006ms total_cost_time:364.96543884277344ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:8720 prompt_cache_len:5151 prompt_cache_ratio:0.590711009174312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 -DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.10478782653808594 s -INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10681843757629395 s -DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=4631933807010807125183525636181255813, time:1750767373.4213347s req_ids:[8] -DEBUG 06-24 20:16:13 [manager.py:391] -ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:204.09393310546875ms total_cost_time:204.1168212890625ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:8721 prompt_cache_len:5151 prompt_cache_ratio:0.5906432748538012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 -DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.10667252540588379 s -INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10883951187133789 s -DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=34043117042336419745321865639889501708, time:1750767373.6342566s req_ids:[8] -DEBUG 06-24 20:16:13 [manager.py:391] -ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:211.26866340637207ms total_cost_time:211.31420135498047ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8722 prompt_cache_len:5151 prompt_cache_ratio:0.5905755560651227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 -DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:13 [manager.py:224] router recive req id 8 cost time 0.10731697082519531 s -INFO 06-24 20:16:13 [manager.py:68] detokenization recv req id 8 cost time 0.10941195487976074 s -DEBUG 06-24 20:16:13 [manager.py:391] Prefill Batch: batch_id=9249543655199973640489540039419633258, time:1750767373.848966s req_ids:[8] -DEBUG 06-24 20:16:13 [manager.py:391] -ERROR 06-24 20:16:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:207.1220874786377ms total_cost_time:207.1816921234131ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:8723 prompt_cache_len:5151 prompt_cache_ratio:0.5905078528029347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 -DEBUG 06-24 20:16:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10972309112548828 s -INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.11177182197570801 s -DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=326921199938883247148799735788973904828, time:1750767374.0674932s req_ids:[8] -DEBUG 06-24 20:16:14 [manager.py:391] -ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:13 lightllm_req_id:8 first_token_cost:212.1729850769043ms total_cost_time:212.2180461883545ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8724 prompt_cache_len:5151 prompt_cache_ratio:0.5904401650618982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 -DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10836982727050781 s -INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.11042642593383789 s -DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=200133532509875701156239908622072334871, time:1750767374.2803633s req_ids:[8] -DEBUG 06-24 20:16:14 [manager.py:391] -ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:207.45086669921875ms total_cost_time:207.49545097351074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8725 prompt_cache_len:5151 prompt_cache_ratio:0.5903724928366763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 -DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10899949073791504 s -INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.11115360260009766 s -DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=135315976197620084233604134598492109448, time:1750767374.4936907s req_ids:[8] -DEBUG 06-24 20:16:14 [manager.py:391] -DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:208.2517147064209ms total_cost_time:208.2970142364502ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8726 prompt_cache_len:5151 prompt_cache_ratio:0.5903048361219344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 -DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.1071934700012207 s -INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.10894465446472168 s -DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=286862521401608182539655590773229034014, time:1750767374.7074695s req_ids:[8] -DEBUG 06-24 20:16:14 [manager.py:391] -ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:168.77341270446777ms total_cost_time:168.81537437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:8727 prompt_cache_len:5151 prompt_cache_ratio:0.5902371949123411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 -DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:14 [manager.py:224] router recive req id 8 cost time 0.10729241371154785 s -INFO 06-24 20:16:14 [manager.py:68] detokenization recv req id 8 cost time 0.1093740463256836 s -DEBUG 06-24 20:16:14 [manager.py:391] Prefill Batch: batch_id=158992273611537624406086570137397483174, time:1750767374.8799243s req_ids:[8] -DEBUG 06-24 20:16:14 [manager.py:391] -ERROR 06-24 20:16:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:196.63381576538086ms total_cost_time:196.67530059814453ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8728 prompt_cache_len:5151 prompt_cache_ratio:0.5901695692025665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 -DEBUG 06-24 20:16:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10527539253234863 s -INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10721206665039062 s -DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=168880936568733871627491218112729785383, time:1750767375.0829747s req_ids:[8] -DEBUG 06-24 20:16:15 [manager.py:391] -ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:14 lightllm_req_id:8 first_token_cost:206.35724067687988ms total_cost_time:206.3758373260498ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8729 prompt_cache_len:5151 prompt_cache_ratio:0.5901019589872838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 -DEBUG 06-24 20:16:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10484600067138672 s -INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10686612129211426 s -DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=302227700900147570840535272781260276347, time:1750767375.3026495s req_ids:[8] -DEBUG 06-24 20:16:15 [manager.py:391] -ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:421.6330051422119ms total_cost_time:421.65374755859375ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8730 prompt_cache_len:5151 prompt_cache_ratio:0.5900343642611684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 -DEBUG 06-24 20:16:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10430073738098145 s -INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10616922378540039 s -DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=63482562015916414113306188278525560586, time:1750767375.7192562s req_ids:[8] -DEBUG 06-24 20:16:15 [manager.py:391] -ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:162.85014152526855ms total_cost_time:162.86969184875488ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8731 prompt_cache_len:5151 prompt_cache_ratio:0.5899667850188982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 -DEBUG 06-24 20:16:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:15 [manager.py:224] router recive req id 8 cost time 0.10403323173522949 s -INFO 06-24 20:16:15 [manager.py:68] detokenization recv req id 8 cost time 0.10598111152648926 s -DEBUG 06-24 20:16:15 [manager.py:391] Prefill Batch: batch_id=336997077797936274652031650712566758865, time:1750767375.8871655s req_ids:[8] -DEBUG 06-24 20:16:15 [manager.py:391] -ERROR 06-24 20:16:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:202.77094841003418ms total_cost_time:202.803373336792ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:8732 prompt_cache_len:5151 prompt_cache_ratio:0.5898992212551535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 -DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:16 [batch.py:51] router release req id 8 -DEBUG 06-24 20:16:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:16 [manager.py:283] -DEBUG 06-24 20:16:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:16 [manager.py:284] -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10474252700805664 s -INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10690736770629883 s -DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=170774377588143166325422058446289834445, time:1750767376.1103315s req_ids:[8] -DEBUG 06-24 20:16:16 [manager.py:391] -ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:15 lightllm_req_id:8 first_token_cost:224.67279434204102ms total_cost_time:224.69282150268555ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8733 prompt_cache_len:5151 prompt_cache_ratio:0.5898316729646169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 -DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10485458374023438 s -INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10703325271606445 s -DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=332294470804965214014087820768410649528, time:1750767376.3275404s req_ids:[8] -DEBUG 06-24 20:16:16 [manager.py:391] -ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:210.55221557617188ms total_cost_time:210.5715274810791ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8734 prompt_cache_len:5151 prompt_cache_ratio:0.5897641401419739 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 -DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10464143753051758 s -INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10667777061462402 s -DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=101295379088705130650927365669849669152, time:1750767376.5415006s req_ids:[8] -DEBUG 06-24 20:16:16 [manager.py:391] -ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:210.46781539916992ms total_cost_time:210.48665046691895ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8735 prompt_cache_len:5151 prompt_cache_ratio:0.5896966227819118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 -DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10372543334960938 s -INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.1056509017944336 s -DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=295518405310055073146589171151809110219, time:1750767376.756528s req_ids:[8] -DEBUG 06-24 20:16:16 [manager.py:391] -ERROR 06-24 20:16:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:210.55841445922852ms total_cost_time:210.57939529418945ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8736 prompt_cache_len:5151 prompt_cache_ratio:0.5896291208791209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 -DEBUG 06-24 20:16:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:16 [batch.py:51] router release req id 8 -INFO 06-24 20:16:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:16 [manager.py:224] router recive req id 8 cost time 0.10360527038574219 s -INFO 06-24 20:16:16 [manager.py:68] detokenization recv req id 8 cost time 0.10576343536376953 s -DEBUG 06-24 20:16:16 [manager.py:391] Prefill Batch: batch_id=94503661745512680274114572090088447911, time:1750767376.9700577s req_ids:[8] -DEBUG 06-24 20:16:16 [manager.py:391] -ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:16 lightllm_req_id:8 first_token_cost:209.49935913085938ms total_cost_time:209.52630043029785ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:8737 prompt_cache_len:5151 prompt_cache_ratio:0.5895616344282935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 -DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.10488033294677734 s -INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.1069498062133789 s -DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=171763503198930521907460316055785763951, time:1750767377.1816206s req_ids:[8] -DEBUG 06-24 20:16:17 [manager.py:391] -ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:211.1220359802246ms total_cost_time:211.14230155944824ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8738 prompt_cache_len:5151 prompt_cache_ratio:0.5894941634241245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 -DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.30524754524230957 s -INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.3073999881744385 s -DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=215837103245348277297928525518434552758, time:1750767377.595684s req_ids:[8] -DEBUG 06-24 20:16:17 [manager.py:391] -ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:416.69487953186035ms total_cost_time:416.715145111084ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8739 prompt_cache_len:5151 prompt_cache_ratio:0.5894267078613113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 -DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.10466599464416504 s -INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.10650253295898438 s -DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=162089664782168773349651029167792305258, time:1750767377.8154814s req_ids:[8] -DEBUG 06-24 20:16:17 [manager.py:391] -ERROR 06-24 20:16:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:175.7352352142334ms total_cost_time:175.75407028198242ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8740 prompt_cache_len:5151 prompt_cache_ratio:0.5893592677345538 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 -DEBUG 06-24 20:16:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:17 [manager.py:224] router recive req id 8 cost time 0.10465288162231445 s -INFO 06-24 20:16:17 [manager.py:68] detokenization recv req id 8 cost time 0.10663151741027832 s -DEBUG 06-24 20:16:17 [manager.py:391] Prefill Batch: batch_id=1599071863362262453522374957829331511, time:1750767377.9907365s req_ids:[8] -DEBUG 06-24 20:16:17 [manager.py:391] -ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:17 lightllm_req_id:8 first_token_cost:200.20794868469238ms total_cost_time:200.22916793823242ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8741 prompt_cache_len:5151 prompt_cache_ratio:0.589291843038554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 -DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.1046442985534668 s -INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.1066122055053711 s -DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=294996034983037430652853083310942822325, time:1750767378.1944015s req_ids:[8] -DEBUG 06-24 20:16:18 [manager.py:391] -ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:208.21833610534668ms total_cost_time:208.2374095916748ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8742 prompt_cache_len:5151 prompt_cache_ratio:0.5892244337680165 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 -DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.10406684875488281 s -INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.10614824295043945 s -DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=280572983269317351810717964606967105386, time:1750767378.406361s req_ids:[8] -DEBUG 06-24 20:16:18 [manager.py:391] -ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:212.39995956420898ms total_cost_time:212.41998672485352ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8743 prompt_cache_len:5151 prompt_cache_ratio:0.5891570399176485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 -DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.10390901565551758 s -INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.10599231719970703 s -DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=276191162601313425422058820333468261833, time:1750767378.6203165s req_ids:[8] -DEBUG 06-24 20:16:18 [manager.py:391] -ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:210.53004264831543ms total_cost_time:210.55054664611816ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8744 prompt_cache_len:5151 prompt_cache_ratio:0.5890896614821592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 -DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:18 [manager.py:224] router recive req id 8 cost time 0.10494637489318848 s -INFO 06-24 20:16:18 [manager.py:68] detokenization recv req id 8 cost time 0.10599446296691895 s -DEBUG 06-24 20:16:18 [manager.py:391] Prefill Batch: batch_id=247941048554051494990060859181010391591, time:1750767378.8325007s req_ids:[8] -DEBUG 06-24 20:16:18 [manager.py:391] -ERROR 06-24 20:16:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:208.67228507995605ms total_cost_time:208.69112014770508ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8745 prompt_cache_len:5151 prompt_cache_ratio:0.5890222984562608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 -DEBUG 06-24 20:16:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.10383343696594238 s -INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10591936111450195 s -DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=273348610996156805508150395931525832654, time:1750767379.0467112s req_ids:[8] -DEBUG 06-24 20:16:19 [manager.py:391] -ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:18 lightllm_req_id:8 first_token_cost:210.52861213684082ms total_cost_time:210.54863929748535ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8746 prompt_cache_len:5151 prompt_cache_ratio:0.5889549508346673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 -DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.10515952110290527 s -INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10713791847229004 s -DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=252981414446781748076929619620919768907, time:1750767379.2579706s req_ids:[8] -DEBUG 06-24 20:16:19 [manager.py:391] -ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:16:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 37591.918 tokens/s -DEBUG 06-24 20:16:19 [stats.py:37] Avg prompt tokens throughput: 37583.304 tokens/s -DEBUG 06-24 20:16:19 [stats.py:37] Avg generate tokens throughput: 8.615 tokens/s -INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:365.71407318115234ms total_cost_time:365.7352924346924ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8747 prompt_cache_len:5151 prompt_cache_ratio:0.5888876186120956 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 -DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.10472226142883301 s -INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10680270195007324 s -DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=317641688166258195193937745914572189230, time:1750767379.6245546s req_ids:[8] -DEBUG 06-24 20:16:19 [manager.py:391] -ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:208.3423137664795ms total_cost_time:208.36210250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8748 prompt_cache_len:5151 prompt_cache_ratio:0.5888203017832647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 -DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:19 [manager.py:224] router recive req id 8 cost time 0.1046137809753418 s -INFO 06-24 20:16:19 [manager.py:68] detokenization recv req id 8 cost time 0.10658407211303711 s -DEBUG 06-24 20:16:19 [manager.py:391] Prefill Batch: batch_id=144329472682892245180671667966026191821, time:1750767379.8385735s req_ids:[8] -DEBUG 06-24 20:16:19 [manager.py:391] -ERROR 06-24 20:16:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:213.11020851135254ms total_cost_time:213.12999725341797ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8749 prompt_cache_len:5151 prompt_cache_ratio:0.5887530003428963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 -DEBUG 06-24 20:16:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10480546951293945 s -INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10673904418945312 s -DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=235626832170164669715842314633802476442, time:1750767380.0527763s req_ids:[8] -DEBUG 06-24 20:16:20 [manager.py:391] -ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:19 lightllm_req_id:8 first_token_cost:211.15970611572266ms total_cost_time:211.17877960205078ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8750 prompt_cache_len:5151 prompt_cache_ratio:0.5886857142857143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 -DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10426712036132812 s -INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10603213310241699 s -DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=167413973569693302042290786962719356422, time:1750767380.268253s req_ids:[8] -DEBUG 06-24 20:16:20 [manager.py:391] -ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:210.94965934753418ms total_cost_time:210.9694480895996ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8751 prompt_cache_len:5151 prompt_cache_ratio:0.588618443606445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 -DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10378193855285645 s -INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10575699806213379 s -DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=286362606315066561801943840118291955688, time:1750767380.4810402s req_ids:[8] -DEBUG 06-24 20:16:20 [manager.py:391] -ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:212.21256256103516ms total_cost_time:212.23139762878418ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8752 prompt_cache_len:5151 prompt_cache_ratio:0.5885511882998172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 -DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.1040952205657959 s -INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10635948181152344 s -DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=191432946535640696478067250426176412232, time:1750767380.696372s req_ids:[8] -DEBUG 06-24 20:16:20 [manager.py:391] -ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:215.06834030151367ms total_cost_time:215.0874137878418ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8753 prompt_cache_len:5151 prompt_cache_ratio:0.5884839483605621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 -DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:20 [manager.py:224] router recive req id 8 cost time 0.10411977767944336 s -INFO 06-24 20:16:20 [manager.py:68] detokenization recv req id 8 cost time 0.10587573051452637 s -DEBUG 06-24 20:16:20 [manager.py:391] Prefill Batch: batch_id=88092127503740288236844686559287892016, time:1750767380.9120975s req_ids:[8] -DEBUG 06-24 20:16:20 [manager.py:391] -ERROR 06-24 20:16:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:169.76022720336914ms total_cost_time:169.77882385253906ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8754 prompt_cache_len:5151 prompt_cache_ratio:0.5884167237834133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 -DEBUG 06-24 20:16:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.10489106178283691 s -INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10696220397949219 s -DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=294203860385911019585072928586074079954, time:1750767381.083713s req_ids:[8] -DEBUG 06-24 20:16:21 [manager.py:391] -ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:20 lightllm_req_id:8 first_token_cost:201.54690742492676ms total_cost_time:201.5671730041504ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8755 prompt_cache_len:5151 prompt_cache_ratio:0.5883495145631068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 -DEBUG 06-24 20:16:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.1046302318572998 s -INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10649275779724121 s -DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=131168778024007096473233232306347925352, time:1750767381.289694s req_ids:[8] -DEBUG 06-24 20:16:21 [manager.py:391] -ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:208.86874198913574ms total_cost_time:208.88757705688477ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8756 prompt_cache_len:5151 prompt_cache_ratio:0.588282320694381 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 -DEBUG 06-24 20:16:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.10311722755432129 s -INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10496735572814941 s -DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=66519058456742941817911103978958069628, time:1750767381.513503s req_ids:[8] -DEBUG 06-24 20:16:21 [manager.py:391] -DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:388.045072555542ms total_cost_time:388.06605339050293ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8757 prompt_cache_len:5151 prompt_cache_ratio:0.5882151421719767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 -DEBUG 06-24 20:16:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:21 [manager.py:224] router recive req id 8 cost time 0.10496950149536133 s -INFO 06-24 20:16:21 [manager.py:68] detokenization recv req id 8 cost time 0.10691165924072266 s -DEBUG 06-24 20:16:21 [manager.py:391] Prefill Batch: batch_id=204434587636336291910313496141720463984, time:1750767381.8909197s req_ids:[8] -DEBUG 06-24 20:16:21 [manager.py:391] -ERROR 06-24 20:16:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:207.48043060302734ms total_cost_time:207.49974250793457ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8758 prompt_cache_len:5151 prompt_cache_ratio:0.5881479789906371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 -DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10391068458557129 s -INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10591554641723633 s -DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=165244173061017888312944627697171620074, time:1750767382.113133s req_ids:[8] -DEBUG 06-24 20:16:22 [manager.py:391] -ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:21 lightllm_req_id:8 first_token_cost:215.50369262695312ms total_cost_time:215.52371978759766ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8759 prompt_cache_len:5151 prompt_cache_ratio:0.5880808311451079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 -DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10477709770202637 s -INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10653543472290039 s -DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=319181208121993822473453360483614274991, time:1750767382.326287s req_ids:[8] -DEBUG 06-24 20:16:22 [manager.py:391] -ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:199.33414459228516ms total_cost_time:199.3541717529297ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8760 prompt_cache_len:5151 prompt_cache_ratio:0.588013698630137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 -DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10375332832336426 s -INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10541892051696777 s -DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=94071744550325598922276257492388202909, time:1750767382.530367s req_ids:[8] -DEBUG 06-24 20:16:22 [manager.py:391] -ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:205.36136627197266ms total_cost_time:205.3813934326172ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8761 prompt_cache_len:5151 prompt_cache_ratio:0.5879465814404748 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 -DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10384249687194824 s -INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10559821128845215 s -DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=53098447361049124127382189744973606075, time:1750767382.7405095s req_ids:[8] -DEBUG 06-24 20:16:22 [manager.py:391] -ERROR 06-24 20:16:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:210.73079109191895ms total_cost_time:210.75129508972168ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8762 prompt_cache_len:5151 prompt_cache_ratio:0.5878794795708743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 -DEBUG 06-24 20:16:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:22 [manager.py:224] router recive req id 8 cost time 0.10338616371154785 s -INFO 06-24 20:16:22 [manager.py:68] detokenization recv req id 8 cost time 0.10505104064941406 s -DEBUG 06-24 20:16:22 [manager.py:391] Prefill Batch: batch_id=141680682515886791874495618927637929146, time:1750767382.9531124s req_ids:[8] -DEBUG 06-24 20:16:22 [manager.py:391] -ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:22 lightllm_req_id:8 first_token_cost:206.35151863098145ms total_cost_time:206.37130737304688ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8763 prompt_cache_len:5151 prompt_cache_ratio:0.5878123930160903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 -DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.10464811325073242 s -INFO 06-24 20:16:23 [manager.py:68] detokenization recv req id 8 cost time 0.10631752014160156 s -DEBUG 06-24 20:16:23 [manager.py:391] Prefill Batch: batch_id=249004431784392409067435279394965296702, time:1750767383.1654112s req_ids:[8] -DEBUG 06-24 20:16:23 [manager.py:391] -INFO 06-24 20:16:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:209.4593048095703ms total_cost_time:209.47813987731934ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8764 prompt_cache_len:5151 prompt_cache_ratio:0.5877453217708809 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 -DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.1036221981048584 s -INFO 06-24 20:16:23 [manager.py:68] detokenization recv req id 8 cost time 0.10536456108093262 s -DEBUG 06-24 20:16:23 [manager.py:391] Prefill Batch: batch_id=11609722059271124313419090558757325430, time:1750767383.3790019s req_ids:[8] -DEBUG 06-24 20:16:23 [manager.py:391] -ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:209.98597145080566ms total_cost_time:210.0064754486084ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8765 prompt_cache_len:5151 prompt_cache_ratio:0.5876782658300057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 -DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.30646324157714844 s -INFO 06-24 20:16:23 [manager.py:68] detokenization recv req id 8 cost time 0.30840301513671875 s -DEBUG 06-24 20:16:23 [manager.py:391] Prefill Batch: batch_id=109716450048481134079151200332423457731, time:1750767383.7983263s req_ids:[8] -DEBUG 06-24 20:16:23 [manager.py:391] -ERROR 06-24 20:16:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:418.1685447692871ms total_cost_time:418.18857192993164ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8766 prompt_cache_len:5151 prompt_cache_ratio:0.5876112251882273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 -DEBUG 06-24 20:16:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:23 [manager.py:224] router recive req id 8 cost time 0.10481548309326172 s -INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10664510726928711 s -DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=204791949817607283898888766678076877504, time:1750767384.0150526s req_ids:[8] -DEBUG 06-24 20:16:24 [manager.py:391] -ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:23 lightllm_req_id:8 first_token_cost:208.93049240112305ms total_cost_time:208.94980430603027ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8767 prompt_cache_len:5151 prompt_cache_ratio:0.5875441998403103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 -DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10427236557006836 s -INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10607147216796875 s -DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=152195190818401465144187580916051028767, time:1750767384.2286193s req_ids:[8] -DEBUG 06-24 20:16:24 [manager.py:391] -ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:169.4321632385254ms total_cost_time:169.45219039916992ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8768 prompt_cache_len:5151 prompt_cache_ratio:0.5874771897810219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 -DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10375404357910156 s -INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10554265975952148 s -DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=304787887041618853777309058110288350723, time:1750767384.39944s req_ids:[8] -DEBUG 06-24 20:16:24 [manager.py:391] -ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:197.28851318359375ms total_cost_time:197.30782508850098ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8769 prompt_cache_len:5151 prompt_cache_ratio:0.5874101950051317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 -DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10514974594116211 s -INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10708189010620117 s -DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=249345828863053844020423460869549491031, time:1750767384.6020055s req_ids:[8] -DEBUG 06-24 20:16:24 [manager.py:391] -ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:210.89482307434082ms total_cost_time:210.91413497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8770 prompt_cache_len:5151 prompt_cache_ratio:0.5873432155074116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 -DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:24 [manager.py:224] router recive req id 8 cost time 0.10459423065185547 s -INFO 06-24 20:16:24 [manager.py:68] detokenization recv req id 8 cost time 0.10632681846618652 s -DEBUG 06-24 20:16:24 [manager.py:391] Prefill Batch: batch_id=56133271329187069498374586045577948728, time:1750767384.8311439s req_ids:[8] -DEBUG 06-24 20:16:24 [manager.py:391] -ERROR 06-24 20:16:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:224.5187759399414ms total_cost_time:224.53784942626953ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8771 prompt_cache_len:5151 prompt_cache_ratio:0.5872762512826359 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 -DEBUG 06-24 20:16:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.10343217849731445 s -INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10505175590515137 s -DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=89660768129950293395287248560988318949, time:1750767385.0484066s req_ids:[8] -DEBUG 06-24 20:16:25 [manager.py:391] -ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:24 lightllm_req_id:8 first_token_cost:211.33136749267578ms total_cost_time:211.35234832763672ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8772 prompt_cache_len:5151 prompt_cache_ratio:0.5872093023255814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 -DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.10459017753601074 s -INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10635209083557129 s -DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=47849658358945076852346753505784044149, time:1750767385.264066s req_ids:[8] -DEBUG 06-24 20:16:25 [manager.py:391] -ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:210.1120948791504ms total_cost_time:210.13140678405762ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8773 prompt_cache_len:5151 prompt_cache_ratio:0.587142368631027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 -DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.1034085750579834 s -INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10529303550720215 s -DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=74183999344847385141801403082104224026, time:1750767385.478397s req_ids:[8] -DEBUG 06-24 20:16:25 [manager.py:391] -ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:370.042085647583ms total_cost_time:370.06235122680664ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8774 prompt_cache_len:5151 prompt_cache_ratio:0.5870754501937543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 -DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:25 [manager.py:224] router recive req id 8 cost time 0.10485267639160156 s -INFO 06-24 20:16:25 [manager.py:68] detokenization recv req id 8 cost time 0.10669231414794922 s -DEBUG 06-24 20:16:25 [manager.py:391] Prefill Batch: batch_id=272544114785116394160225213551452294788, time:1750767385.8482215s req_ids:[8] -DEBUG 06-24 20:16:25 [manager.py:391] -ERROR 06-24 20:16:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:203.66835594177246ms total_cost_time:203.6879062652588ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8775 prompt_cache_len:5151 prompt_cache_ratio:0.587008547008547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 -DEBUG 06-24 20:16:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10463356971740723 s -INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10641670227050781 s -DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=99249653751804177961496670406427403703, time:1750767386.0595105s req_ids:[8] -DEBUG 06-24 20:16:26 [manager.py:391] -ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:25 lightllm_req_id:8 first_token_cost:206.9103717803955ms total_cost_time:206.94351196289062ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:8776 prompt_cache_len:5151 prompt_cache_ratio:0.5869416590701915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 -DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10458111763000488 s -INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10641193389892578 s -DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=141260591270106173224662149232128788010, time:1750767386.2735436s req_ids:[8] -DEBUG 06-24 20:16:26 [manager.py:391] -ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:211.89141273498535ms total_cost_time:211.9121551513672ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8777 prompt_cache_len:5151 prompt_cache_ratio:0.5868747863734761 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 -DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10479331016540527 s -INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.1066582202911377 s -DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=220727153197882344242724035165312102565, time:1750767386.4888551s req_ids:[8] -DEBUG 06-24 20:16:26 [manager.py:391] -ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:209.1062068939209ms total_cost_time:209.12528038024902ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8778 prompt_cache_len:5151 prompt_cache_ratio:0.5868079289131921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 -DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10424017906188965 s -INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10614705085754395 s -DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=92669549218114962188388017205752186781, time:1750767386.7029374s req_ids:[8] -DEBUG 06-24 20:16:26 [manager.py:391] -ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:173.0809211730957ms total_cost_time:173.09975624084473ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8779 prompt_cache_len:5151 prompt_cache_ratio:0.5867410866841326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 -DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:26 [manager.py:224] router recive req id 8 cost time 0.10455751419067383 s -INFO 06-24 20:16:26 [manager.py:68] detokenization recv req id 8 cost time 0.10622549057006836 s -DEBUG 06-24 20:16:26 [manager.py:391] Prefill Batch: batch_id=116808246936285906623955355588607232853, time:1750767386.8763094s req_ids:[8] -DEBUG 06-24 20:16:26 [manager.py:391] -ERROR 06-24 20:16:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:165.27581214904785ms total_cost_time:165.29583930969238ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8780 prompt_cache_len:5151 prompt_cache_ratio:0.5866742596810934 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 -DEBUG 06-24 20:16:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.10466766357421875 s -INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10653090476989746 s -DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=308526404622330065319904016450984911265, time:1750767387.047423s req_ids:[8] -DEBUG 06-24 20:16:27 [manager.py:391] -ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:26 lightllm_req_id:8 first_token_cost:200.73390007019043ms total_cost_time:200.75535774230957ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:8781 prompt_cache_len:5151 prompt_cache_ratio:0.5866074478988725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 -DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.10506296157836914 s -INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10688519477844238 s -DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=315016866289753239878057919839284217177, time:1750767387.25336s req_ids:[8] -DEBUG 06-24 20:16:27 [manager.py:391] -ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:208.9407444000244ms total_cost_time:208.96005630493164ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8782 prompt_cache_len:5151 prompt_cache_ratio:0.5865406513322705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 -DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.10479354858398438 s -INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10657191276550293 s -DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=40077149989252318110167162392132544835, time:1750767387.479015s req_ids:[8] -DEBUG 06-24 20:16:27 [manager.py:391] -ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:225.73494911193848ms total_cost_time:225.7537841796875ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8783 prompt_cache_len:5151 prompt_cache_ratio:0.5864738699760902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 -DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:27 [manager.py:224] router recive req id 8 cost time 0.1041257381439209 s -INFO 06-24 20:16:27 [manager.py:68] detokenization recv req id 8 cost time 0.10512471199035645 s -DEBUG 06-24 20:16:27 [manager.py:391] Prefill Batch: batch_id=5773166514640876433548213832257652289, time:1750767387.694719s req_ids:[8] -DEBUG 06-24 20:16:27 [manager.py:391] -ERROR 06-24 20:16:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:339.4918441772461ms total_cost_time:339.5123481750488ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8784 prompt_cache_len:5151 prompt_cache_ratio:0.5864071038251366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 -DEBUG 06-24 20:16:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.1046438217163086 s -INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10630655288696289 s -DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=208704371763789621224403990918087754640, time:1750767388.0358486s req_ids:[8] -DEBUG 06-24 20:16:28 [manager.py:391] -ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:27 lightllm_req_id:8 first_token_cost:193.4802532196045ms total_cost_time:193.50194931030273ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8785 prompt_cache_len:5151 prompt_cache_ratio:0.5863403528742174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 -DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.1048130989074707 s -INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.1067664623260498 s -DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=312136464773319852682973084497703641753, time:1750767388.2409534s req_ids:[8] -DEBUG 06-24 20:16:28 [manager.py:391] -ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:208.30273628234863ms total_cost_time:208.32228660583496ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8786 prompt_cache_len:5151 prompt_cache_ratio:0.5862736171181425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 -DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.10377693176269531 s -INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10563921928405762 s -DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=337601474522438756842260111276025163081, time:1750767388.453865s req_ids:[8] -DEBUG 06-24 20:16:28 [manager.py:391] -ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:209.76758003234863ms total_cost_time:209.78665351867676ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8787 prompt_cache_len:5151 prompt_cache_ratio:0.5862068965517241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 -DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.10399413108825684 s -INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10561823844909668 s -DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=223429798951520209433591624666148734166, time:1750767388.6852233s req_ids:[8] -DEBUG 06-24 20:16:28 [manager.py:391] -DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:229.295015335083ms total_cost_time:229.31528091430664ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8788 prompt_cache_len:5151 prompt_cache_ratio:0.586140191169777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 -DEBUG 06-24 20:16:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:28 [manager.py:224] router recive req id 8 cost time 0.1045689582824707 s -INFO 06-24 20:16:28 [manager.py:68] detokenization recv req id 8 cost time 0.10635972023010254 s -DEBUG 06-24 20:16:28 [manager.py:391] Prefill Batch: batch_id=207396175817713021326841319391094247496, time:1750767388.902168s req_ids:[8] -DEBUG 06-24 20:16:28 [manager.py:391] -ERROR 06-24 20:16:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:211.2863063812256ms total_cost_time:211.30609512329102ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8789 prompt_cache_len:5151 prompt_cache_ratio:0.586073500967118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 -DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.10355138778686523 s -INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.10531854629516602 s -DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=60575587449976754069523328598613846306, time:1750767389.117616s req_ids:[8] -DEBUG 06-24 20:16:29 [manager.py:391] -ERROR 06-24 20:16:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:28 lightllm_req_id:8 first_token_cost:210.97040176391602ms total_cost_time:210.98947525024414ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8790 prompt_cache_len:5151 prompt_cache_ratio:0.5860068259385666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 -DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.10452079772949219 s -INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.10623693466186523 s -DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=312540868062217735163241866263049271418, time:1750767389.3335767s req_ids:[8] -DEBUG 06-24 20:16:29 [manager.py:391] -ERROR 06-24 20:16:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 first_token_cost:210.8142375946045ms total_cost_time:210.83378791809082ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8791 prompt_cache_len:5151 prompt_cache_ratio:0.5859401660789444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 -DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.10476541519165039 s -INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.10655045509338379 s -DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=259723317891891195970569752834540119650, time:1750767389.5493307s req_ids:[8] -DEBUG 06-24 20:16:29 [manager.py:391] -DEBUG 06-24 20:16:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 39280.059 tokens/s -DEBUG 06-24 20:16:29 [stats.py:37] Avg prompt tokens throughput: 39271.203 tokens/s -DEBUG 06-24 20:16:29 [stats.py:37] Avg generate tokens throughput: 8.856 tokens/s -ERROR 06-24 20:16:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 first_token_cost:212.15486526489258ms total_cost_time:212.174654006958ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8792 prompt_cache_len:5151 prompt_cache_ratio:0.5858735213830755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 -DEBUG 06-24 20:16:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:29 [manager.py:224] router recive req id 8 cost time 0.305072546005249 s -INFO 06-24 20:16:29 [manager.py:68] detokenization recv req id 8 cost time 0.30690622329711914 s -DEBUG 06-24 20:16:29 [manager.py:391] Prefill Batch: batch_id=40578652665155967973819427498442073667, time:1750767389.9553926s req_ids:[8] -DEBUG 06-24 20:16:29 [manager.py:391] -ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:29 lightllm_req_id:8 first_token_cost:394.4094181060791ms total_cost_time:394.4284915924072ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8793 prompt_cache_len:5151 prompt_cache_ratio:0.5858068918457864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 -DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:30 [batch.py:51] router release req id 8 -INFO 06-24 20:16:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10373520851135254 s -INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10554218292236328 s -DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=339822657400394861750711768947973783395, time:1750767390.1610081s req_ids:[8] -DEBUG 06-24 20:16:30 [manager.py:391] -ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:207.21077919006348ms total_cost_time:207.2300910949707ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8794 prompt_cache_len:5151 prompt_cache_ratio:0.5857402774619058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 -DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10360860824584961 s -INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10528182983398438 s -DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=226272608757914475370867309104144677031, time:1750767390.372933s req_ids:[8] -DEBUG 06-24 20:16:30 [manager.py:391] -ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:209.2416286468506ms total_cost_time:209.26165580749512ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8795 prompt_cache_len:5151 prompt_cache_ratio:0.5856736782262649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 -DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10392498970031738 s -INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10568690299987793 s -DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=28835755361875341592108171298952713476, time:1750767390.5859728s req_ids:[8] -DEBUG 06-24 20:16:30 [manager.py:391] -ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:207.54051208496094ms total_cost_time:207.56006240844727ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8796 prompt_cache_len:5151 prompt_cache_ratio:0.5856070941336972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 -DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10471987724304199 s -INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10640454292297363 s -DEBUG 06-24 20:16:30 [manager.py:391] Prefill Batch: batch_id=307747629390181273451922907191906202392, time:1750767390.798153s req_ids:[8] -DEBUG 06-24 20:16:30 [manager.py:391] -ERROR 06-24 20:16:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:209.53011512756348ms total_cost_time:209.5503807067871ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8797 prompt_cache_len:5151 prompt_cache_ratio:0.5855405251790383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 -DEBUG 06-24 20:16:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:30 [manager.py:224] router recive req id 8 cost time 0.10442852973937988 s -INFO 06-24 20:16:30 [manager.py:68] detokenization recv req id 8 cost time 0.10609745979309082 s -DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=304944213721777500798656876618708118014, time:1750767391.0128114s req_ids:[8] -DEBUG 06-24 20:16:31 [manager.py:391] -ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:30 lightllm_req_id:8 first_token_cost:208.85515213012695ms total_cost_time:208.87517929077148ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8798 prompt_cache_len:5151 prompt_cache_ratio:0.5854739713571266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 -DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.10422801971435547 s -INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10547852516174316 s -DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=103180211320769869563425088920794833794, time:1750767391.2275288s req_ids:[8] -DEBUG 06-24 20:16:31 [manager.py:391] -ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:170.21822929382324ms total_cost_time:170.23849487304688ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8799 prompt_cache_len:5151 prompt_cache_ratio:0.5854074326628026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 -DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.1033775806427002 s -INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10492801666259766 s -DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=193255226241230815017993224650949095534, time:1750767391.4004557s req_ids:[8] -DEBUG 06-24 20:16:31 [manager.py:391] -ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:206.32290840148926ms total_cost_time:206.3431739807129ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8800 prompt_cache_len:5151 prompt_cache_ratio:0.5853409090909091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 -DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.10471296310424805 s -INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10641598701477051 s -DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=255434347819822951052762368778676545409, time:1750767391.6112704s req_ids:[8] -DEBUG 06-24 20:16:31 [manager.py:391] -ERROR 06-24 20:16:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:364.3062114715576ms total_cost_time:364.32623863220215ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8801 prompt_cache_len:5151 prompt_cache_ratio:0.5852744006362913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 -DEBUG 06-24 20:16:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:31 [manager.py:224] router recive req id 8 cost time 0.10463857650756836 s -INFO 06-24 20:16:31 [manager.py:68] detokenization recv req id 8 cost time 0.10628962516784668 s -DEBUG 06-24 20:16:31 [manager.py:391] Prefill Batch: batch_id=297786129285146124572162186225087022901, time:1750767391.9783964s req_ids:[8] -DEBUG 06-24 20:16:31 [manager.py:391] -ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:31 lightllm_req_id:8 first_token_cost:205.72447776794434ms total_cost_time:205.74450492858887ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8802 prompt_cache_len:5151 prompt_cache_ratio:0.5852079072937969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 -DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10479140281677246 s -INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10651254653930664 s -DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=58166223878735851830397291476238206219, time:1750767392.1898077s req_ids:[8] -DEBUG 06-24 20:16:32 [manager.py:391] -ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:207.86762237548828ms total_cost_time:207.8862190246582ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8803 prompt_cache_len:5151 prompt_cache_ratio:0.5851414290582756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 -DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10389447212219238 s -INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10556435585021973 s -DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=187962914697178080270694169678475419136, time:1750767392.4015887s req_ids:[8] -DEBUG 06-24 20:16:32 [manager.py:391] -ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:209.34534072875977ms total_cost_time:209.3651294708252ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8804 prompt_cache_len:5151 prompt_cache_ratio:0.5850749659245797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 -DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10369753837585449 s -INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10545635223388672 s -DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=236678181613034067494605680322394829588, time:1750767392.6157513s req_ids:[8] -DEBUG 06-24 20:16:32 [manager.py:391] -ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:211.19236946105957ms total_cost_time:211.2114429473877ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8805 prompt_cache_len:5151 prompt_cache_ratio:0.5850085178875639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 -DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:32 [manager.py:224] router recive req id 8 cost time 0.10386109352111816 s -INFO 06-24 20:16:32 [manager.py:68] detokenization recv req id 8 cost time 0.10544943809509277 s -DEBUG 06-24 20:16:32 [manager.py:391] Prefill Batch: batch_id=210340224440324938728608345569770828259, time:1750767392.829011s req_ids:[8] -DEBUG 06-24 20:16:32 [manager.py:391] -ERROR 06-24 20:16:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:191.4525032043457ms total_cost_time:191.47157669067383ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8806 prompt_cache_len:5151 prompt_cache_ratio:0.584942084942085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 -DEBUG 06-24 20:16:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10380196571350098 s -INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10571694374084473 s -DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=308353486822531169101062417156737327353, time:1750767393.024687s req_ids:[8] -DEBUG 06-24 20:16:33 [manager.py:391] -ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:32 lightllm_req_id:8 first_token_cost:212.22686767578125ms total_cost_time:212.24617958068848ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8807 prompt_cache_len:5151 prompt_cache_ratio:0.5848756670830022 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 -DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.1044614315032959 s -INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10650300979614258 s -DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=84630901579156035836851274580048383936, time:1750767393.2395406s req_ids:[8] -DEBUG 06-24 20:16:33 [manager.py:391] -ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:205.82818984985352ms total_cost_time:205.84678649902344ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8808 prompt_cache_len:5151 prompt_cache_ratio:0.5848092643051771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 -DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10492753982543945 s -INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10693907737731934 s -DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=276147475871266061044697032850987544470, time:1750767393.4508243s req_ids:[8] -DEBUG 06-24 20:16:33 [manager.py:391] -ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:208.91737937927246ms total_cost_time:208.93573760986328ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:8809 prompt_cache_len:5151 prompt_cache_ratio:0.5847428766034737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 -DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:33 [batch.py:51] router release req id 8 -INFO 06-24 20:16:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10495209693908691 s -INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10706472396850586 s -DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=137073501901715090507185354789866282187, time:1750767393.6641302s req_ids:[8] -DEBUG 06-24 20:16:33 [manager.py:391] -ERROR 06-24 20:16:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:213.59801292419434ms total_cost_time:213.61637115478516ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:8810 prompt_cache_len:5151 prompt_cache_ratio:0.5846765039727583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 -DEBUG 06-24 20:16:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:33 [manager.py:224] router recive req id 8 cost time 0.10477375984191895 s -INFO 06-24 20:16:33 [manager.py:68] detokenization recv req id 8 cost time 0.10671091079711914 s -DEBUG 06-24 20:16:33 [manager.py:391] Prefill Batch: batch_id=19143163195591878355582258715949879318, time:1750767393.8798652s req_ids:[8] -DEBUG 06-24 20:16:33 [manager.py:391] -ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:33 lightllm_req_id:8 first_token_cost:365.9818172454834ms total_cost_time:366.0016059875488ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8811 prompt_cache_len:5151 prompt_cache_ratio:0.5846101464078992 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 -DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.10363912582397461 s -INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10549044609069824 s -DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=127666171004250215499789485444500759210, time:1750767394.2478504s req_ids:[8] -DEBUG 06-24 20:16:34 [manager.py:391] -ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:206.13598823547363ms total_cost_time:206.15601539611816ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8812 prompt_cache_len:5151 prompt_cache_ratio:0.5845438039037676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 -DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.10422730445861816 s -INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10622835159301758 s -DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=282014173667933612999771397970384953737, time:1750767394.4615579s req_ids:[8] -DEBUG 06-24 20:16:34 [manager.py:391] -DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:208.03332328796387ms total_cost_time:208.05740356445312ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:8813 prompt_cache_len:5151 prompt_cache_ratio:0.5844774764552366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 -DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.1037750244140625 s -INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10574221611022949 s -DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=70106468550658532288802452627722681366, time:1750767394.6753662s req_ids:[8] -DEBUG 06-24 20:16:34 [manager.py:391] -ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:209.07902717590332ms total_cost_time:209.09953117370605ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8814 prompt_cache_len:5151 prompt_cache_ratio:0.5844111640571817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 -DEBUG 06-24 20:16:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:34 [manager.py:224] router recive req id 8 cost time 0.10462236404418945 s -INFO 06-24 20:16:34 [manager.py:68] detokenization recv req id 8 cost time 0.10663843154907227 s -DEBUG 06-24 20:16:34 [manager.py:391] Prefill Batch: batch_id=204993705148776029419516887194964731953, time:1750767394.8905427s req_ids:[8] -DEBUG 06-24 20:16:34 [manager.py:391] -ERROR 06-24 20:16:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:215.15488624572754ms total_cost_time:215.17395973205566ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8815 prompt_cache_len:5151 prompt_cache_ratio:0.584344866704481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 -DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10475730895996094 s -INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.1067037582397461 s -DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=299625396579342395573599578292802477759, time:1750767395.104231s req_ids:[8] -DEBUG 06-24 20:16:35 [manager.py:391] -ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:34 lightllm_req_id:8 first_token_cost:205.27315139770508ms total_cost_time:205.2912712097168ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:8816 prompt_cache_len:5151 prompt_cache_ratio:0.5842785843920145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 -DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10374069213867188 s -INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.10559892654418945 s -DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=283674224862485783665140796794994506073, time:1750767395.3205547s req_ids:[8] -DEBUG 06-24 20:16:35 [manager.py:391] -ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:212.33010292053223ms total_cost_time:212.34989166259766ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8817 prompt_cache_len:5151 prompt_cache_ratio:0.5842123171146648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 -DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10499453544616699 s -INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.10706472396850586 s -DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=170725035517218595382162291972276622406, time:1750767395.5360687s req_ids:[8] -DEBUG 06-24 20:16:35 [manager.py:391] -ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:214.17593955993652ms total_cost_time:214.19429779052734ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:8818 prompt_cache_len:5151 prompt_cache_ratio:0.5841460648673169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 -DEBUG 06-24 20:16:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:35 [manager.py:224] router recive req id 8 cost time 0.10378313064575195 s -INFO 06-24 20:16:35 [manager.py:68] detokenization recv req id 8 cost time 0.10566186904907227 s -DEBUG 06-24 20:16:35 [manager.py:391] Prefill Batch: batch_id=131608194390991472688069427284636497936, time:1750767395.7624066s req_ids:[8] -DEBUG 06-24 20:16:35 [manager.py:391] -ERROR 06-24 20:16:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:228.1937599182129ms total_cost_time:228.21545600891113ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8819 prompt_cache_len:5151 prompt_cache_ratio:0.5840798276448577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 -DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.3063206672668457 s -INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.3084678649902344 s -DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=259674633504515716307373476339123297260, time:1750767396.1812396s req_ids:[8] -DEBUG 06-24 20:16:36 [manager.py:391] -ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:35 lightllm_req_id:8 first_token_cost:409.5134735107422ms total_cost_time:409.5327854156494ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8820 prompt_cache_len:5151 prompt_cache_ratio:0.5840136054421768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 -DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.10466265678405762 s -INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.10669922828674316 s -DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=245515372743785362468523164696002866630, time:1750767396.399474s req_ids:[8] -DEBUG 06-24 20:16:36 [manager.py:391] -ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:214.45226669311523ms total_cost_time:214.47110176086426ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8821 prompt_cache_len:5151 prompt_cache_ratio:0.5839473982541662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 -DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.10449433326721191 s -INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.10635828971862793 s -DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=283473949696293549861105790889982474077, time:1750767396.6155508s req_ids:[8] -DEBUG 06-24 20:16:36 [manager.py:391] -ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:212.27097511291504ms total_cost_time:212.29004859924316ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8822 prompt_cache_len:5151 prompt_cache_ratio:0.5838812060757198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 -DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:36 [manager.py:224] router recive req id 8 cost time 0.10393285751342773 s -INFO 06-24 20:16:36 [manager.py:68] detokenization recv req id 8 cost time 0.10576295852661133 s -DEBUG 06-24 20:16:36 [manager.py:391] Prefill Batch: batch_id=162770612379230473001523855757170336381, time:1750767396.8295727s req_ids:[8] -DEBUG 06-24 20:16:36 [manager.py:391] -ERROR 06-24 20:16:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:206.30908012390137ms total_cost_time:206.3279151916504ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8823 prompt_cache_len:5151 prompt_cache_ratio:0.5838150289017341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 -DEBUG 06-24 20:16:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10371184349060059 s -INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10552716255187988 s -DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=109590026684623195051473853864703762715, time:1750767397.0416324s req_ids:[8] -DEBUG 06-24 20:16:37 [manager.py:391] -ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:36 lightllm_req_id:8 first_token_cost:207.34333992004395ms total_cost_time:207.36265182495117ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8824 prompt_cache_len:5151 prompt_cache_ratio:0.5837488667271079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 -DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10460257530212402 s -INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10644841194152832 s -DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=114394081305104645478759791613039329994, time:1750767397.2720864s req_ids:[8] -DEBUG 06-24 20:16:37 [manager.py:391] -ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:229.15387153625488ms total_cost_time:229.17413711547852ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8825 prompt_cache_len:5151 prompt_cache_ratio:0.5836827195467422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 -DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10352158546447754 s -INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10539937019348145 s -DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=143263901701319551427814554020459952277, time:1750767397.4907007s req_ids:[8] -DEBUG 06-24 20:16:37 [manager.py:391] -ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:209.64646339416504ms total_cost_time:209.66577529907227ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8826 prompt_cache_len:5151 prompt_cache_ratio:0.5836165873555405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 -DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10483193397521973 s -INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10669445991516113 s -DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=136181734587534091121086275678260560741, time:1750767397.7017734s req_ids:[8] -DEBUG 06-24 20:16:37 [manager.py:391] -ERROR 06-24 20:16:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:208.2996368408203ms total_cost_time:208.31823348999023ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8827 prompt_cache_len:5151 prompt_cache_ratio:0.5835504701484083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 -DEBUG 06-24 20:16:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:37 [manager.py:224] router recive req id 8 cost time 0.10407733917236328 s -INFO 06-24 20:16:37 [manager.py:68] detokenization recv req id 8 cost time 0.10521602630615234 s -DEBUG 06-24 20:16:37 [manager.py:391] Prefill Batch: batch_id=27048329408268098867669136642947898749, time:1750767397.9158554s req_ids:[8] -DEBUG 06-24 20:16:37 [manager.py:391] -ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:37 lightllm_req_id:8 first_token_cost:365.94414710998535ms total_cost_time:365.9639358520508ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8828 prompt_cache_len:5151 prompt_cache_ratio:0.5834843679202537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 -DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10360574722290039 s -INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.10550236701965332 s -DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=15049161101907821679235322609179138380, time:1750767398.2821703s req_ids:[8] -DEBUG 06-24 20:16:38 [manager.py:391] -ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:198.23265075683594ms total_cost_time:198.25148582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8829 prompt_cache_len:5151 prompt_cache_ratio:0.5834182806659871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 -DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10488224029541016 s -INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.1067347526550293 s -DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=35169564321805584275233690160675784002, time:1750767398.4927359s req_ids:[8] -DEBUG 06-24 20:16:38 [manager.py:391] -ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:211.27963066101074ms total_cost_time:211.29894256591797ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8830 prompt_cache_len:5151 prompt_cache_ratio:0.5833522083805209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 -DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10374879837036133 s -INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.10560226440429688 s -DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=210542581009229946575912674407740530007, time:1750767398.7038364s req_ids:[8] -DEBUG 06-24 20:16:38 [manager.py:391] -ERROR 06-24 20:16:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:207.5819969177246ms total_cost_time:207.60154724121094ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8831 prompt_cache_len:5151 prompt_cache_ratio:0.5832861510587702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 -DEBUG 06-24 20:16:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:38 [manager.py:224] router recive req id 8 cost time 0.10487771034240723 s -INFO 06-24 20:16:38 [manager.py:68] detokenization recv req id 8 cost time 0.10689735412597656 s -DEBUG 06-24 20:16:38 [manager.py:391] Prefill Batch: batch_id=131167741982988970437691125072495916103, time:1750767398.91603s req_ids:[8] -DEBUG 06-24 20:16:38 [manager.py:391] -ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:38 lightllm_req_id:8 first_token_cost:208.58240127563477ms total_cost_time:208.6031436920166ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8832 prompt_cache_len:5151 prompt_cache_ratio:0.5832201086956522 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 -DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10381484031677246 s -INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10565853118896484 s -DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=223353594043057109881424729889617159111, time:1750767399.1293898s req_ids:[8] -DEBUG 06-24 20:16:39 [manager.py:391] -ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:209.05065536499023ms total_cost_time:209.06925201416016ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8833 prompt_cache_len:5151 prompt_cache_ratio:0.5831540812860863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 -DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10398173332214355 s -INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10587263107299805 s -DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=264577059700914300962971012242922819357, time:1750767399.3437293s req_ids:[8] -DEBUG 06-24 20:16:39 [manager.py:391] -ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:209.2435359954834ms total_cost_time:209.26427841186523ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8834 prompt_cache_len:5151 prompt_cache_ratio:0.5830880688249943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 -DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10385608673095703 s -INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10574460029602051 s -DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=28654973021796738562290714145069808892, time:1750767399.5623739s req_ids:[8] -DEBUG 06-24 20:16:39 [manager.py:391] -DEBUG 06-24 20:16:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 37859.789 tokens/s -DEBUG 06-24 20:16:39 [stats.py:37] Avg prompt tokens throughput: 37851.200 tokens/s -DEBUG 06-24 20:16:39 [stats.py:37] Avg generate tokens throughput: 8.589 tokens/s -ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:217.34976768493652ms total_cost_time:217.37146377563477ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8835 prompt_cache_len:5151 prompt_cache_ratio:0.5830220713073005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 -DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10383081436157227 s -INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10582327842712402 s -DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=246337646210155313187594556677981933975, time:1750767399.7782652s req_ids:[8] -DEBUG 06-24 20:16:39 [manager.py:391] -ERROR 06-24 20:16:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:212.9356861114502ms total_cost_time:212.95642852783203ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8836 prompt_cache_len:5151 prompt_cache_ratio:0.5829560887279311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 -DEBUG 06-24 20:16:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:39 [manager.py:224] router recive req id 8 cost time 0.10350203514099121 s -INFO 06-24 20:16:39 [manager.py:68] detokenization recv req id 8 cost time 0.10544085502624512 s -DEBUG 06-24 20:16:39 [manager.py:391] Prefill Batch: batch_id=125029425744185041451188223092067272870, time:1750767399.9932396s req_ids:[8] -DEBUG 06-24 20:16:39 [manager.py:391] -ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:39 lightllm_req_id:8 first_token_cost:211.06958389282227ms total_cost_time:211.0888957977295ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8837 prompt_cache_len:5151 prompt_cache_ratio:0.5828901210818151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 -DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:40 [manager.py:224] router recive req id 8 cost time 0.10472512245178223 s -INFO 06-24 20:16:40 [manager.py:68] detokenization recv req id 8 cost time 0.10666632652282715 s -DEBUG 06-24 20:16:40 [manager.py:391] Prefill Batch: batch_id=108739890810296384291883431736135630121, time:1750767400.2146661s req_ids:[8] -DEBUG 06-24 20:16:40 [manager.py:391] -ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:372.6067543029785ms total_cost_time:372.62678146362305ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8838 prompt_cache_len:5151 prompt_cache_ratio:0.5828241683638832 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 -DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:40 [manager.py:224] router recive req id 8 cost time 0.10480642318725586 s -INFO 06-24 20:16:40 [manager.py:68] detokenization recv req id 8 cost time 0.10683774948120117 s -DEBUG 06-24 20:16:40 [manager.py:391] Prefill Batch: batch_id=137182879272665246365222353747197067933, time:1750767400.5910578s req_ids:[8] -DEBUG 06-24 20:16:40 [manager.py:391] -ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:223.97947311401367ms total_cost_time:223.9995002746582ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8839 prompt_cache_len:5151 prompt_cache_ratio:0.582758230569069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 -DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:40 [manager.py:224] router recive req id 8 cost time 0.10363221168518066 s -INFO 06-24 20:16:40 [manager.py:68] detokenization recv req id 8 cost time 0.10560035705566406 s -DEBUG 06-24 20:16:40 [manager.py:391] Prefill Batch: batch_id=43623040478605321900639345151403157762, time:1750767400.8228626s req_ids:[8] -DEBUG 06-24 20:16:40 [manager.py:391] -ERROR 06-24 20:16:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:215.90185165405273ms total_cost_time:215.92307090759277ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8840 prompt_cache_len:5151 prompt_cache_ratio:0.5826923076923077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 -DEBUG 06-24 20:16:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.10401391983032227 s -INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10594487190246582 s -DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=162256754651009415115314769247972693438, time:1750767401.0461504s req_ids:[8] -DEBUG 06-24 20:16:41 [manager.py:391] -ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:40 lightllm_req_id:8 first_token_cost:219.75326538085938ms total_cost_time:219.7725772857666ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8841 prompt_cache_len:5151 prompt_cache_ratio:0.5826263997285375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 -DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.1053001880645752 s -INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10736322402954102 s -DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=254538388498492174788304656452027119200, time:1750767401.2610793s req_ids:[8] -DEBUG 06-24 20:16:41 [manager.py:391] -ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:208.22930335998535ms total_cost_time:208.2505226135254ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8842 prompt_cache_len:5151 prompt_cache_ratio:0.5825605066726984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 -DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.1048133373260498 s -INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10672450065612793 s -DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=7716106725998897597704434543669567369, time:1750767401.4738746s req_ids:[8] -DEBUG 06-24 20:16:41 [manager.py:391] -ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:212.8303050994873ms total_cost_time:212.84937858581543ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8843 prompt_cache_len:5151 prompt_cache_ratio:0.5824946285197331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 -DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.10459351539611816 s -INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10648560523986816 s -DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=294638686245784400283276032346098376840, time:1750767401.6894193s req_ids:[8] -DEBUG 06-24 20:16:41 [manager.py:391] -DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:210.49261093139648ms total_cost_time:210.51359176635742ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8844 prompt_cache_len:5151 prompt_cache_ratio:0.5824287652645862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 -DEBUG 06-24 20:16:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:41 [manager.py:224] router recive req id 8 cost time 0.1035163402557373 s -INFO 06-24 20:16:41 [manager.py:68] detokenization recv req id 8 cost time 0.10536384582519531 s -DEBUG 06-24 20:16:41 [manager.py:391] Prefill Batch: batch_id=334334543676120450436359452421978621661, time:1750767401.9036028s req_ids:[8] -DEBUG 06-24 20:16:41 [manager.py:391] -ERROR 06-24 20:16:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:41 lightllm_req_id:8 first_token_cost:212.61024475097656ms total_cost_time:212.62884140014648ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8845 prompt_cache_len:5151 prompt_cache_ratio:0.5823629169022047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 -DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.10465669631958008 s -INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.10654997825622559 s -DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=201531809068318213141755797680642456468, time:1750767402.1179054s req_ids:[8] -DEBUG 06-24 20:16:42 [manager.py:391] -ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:205.92212677001953ms total_cost_time:205.94191551208496ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8846 prompt_cache_len:5151 prompt_cache_ratio:0.5822970834275378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 -DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.3053457736968994 s -INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.3073160648345947 s -DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=15051369468374919355692436498628890880, time:1750767402.5313802s req_ids:[8] -DEBUG 06-24 20:16:42 [manager.py:391] -ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:411.0136032104492ms total_cost_time:411.03267669677734ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8847 prompt_cache_len:5151 prompt_cache_ratio:0.5822312648355374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 -DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.10471534729003906 s -INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.1066884994506836 s -DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=63887917890302927817416729281654573746, time:1750767402.747182s req_ids:[8] -DEBUG 06-24 20:16:42 [manager.py:391] -ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:208.4176540374756ms total_cost_time:208.4369659423828ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8848 prompt_cache_len:5151 prompt_cache_ratio:0.5821654611211573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 -DEBUG 06-24 20:16:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:42 [manager.py:224] router recive req id 8 cost time 0.10439324378967285 s -INFO 06-24 20:16:42 [manager.py:68] detokenization recv req id 8 cost time 0.10666942596435547 s -DEBUG 06-24 20:16:42 [manager.py:391] Prefill Batch: batch_id=216554087575526819063725601505553162961, time:1750767402.9593923s req_ids:[8] -DEBUG 06-24 20:16:42 [manager.py:391] -ERROR 06-24 20:16:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:155.1969051361084ms total_cost_time:155.23838996887207ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:8849 prompt_cache_len:5151 prompt_cache_ratio:0.5820996722793536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 -DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.10394620895385742 s -INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.10590600967407227 s -DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=76743676554788380930598546864352706572, time:1750767403.107903s req_ids:[8] -DEBUG 06-24 20:16:43 [manager.py:391] -DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:42 lightllm_req_id:8 first_token_cost:180.8948516845703ms total_cost_time:180.91440200805664ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8850 prompt_cache_len:5151 prompt_cache_ratio:0.5820338983050848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 -DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.10469651222229004 s -INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.1066141128540039 s -DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=152393125699855840421739180105974931880, time:1750767403.3042023s req_ids:[8] -DEBUG 06-24 20:16:43 [manager.py:391] -ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:203.460693359375ms total_cost_time:203.48191261291504ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8851 prompt_cache_len:5151 prompt_cache_ratio:0.5819681391933115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 -DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.1046745777130127 s -INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.10648918151855469 s -DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=94896021534387880590814201143537496803, time:1750767403.5137482s req_ids:[8] -DEBUG 06-24 20:16:43 [manager.py:391] -ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:207.89837837219238ms total_cost_time:207.9179286956787ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8852 prompt_cache_len:5151 prompt_cache_ratio:0.5819023949389969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 -DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.10469985008239746 s -INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.1065220832824707 s -DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=280050071233375162656695273907039330622, time:1750767403.726173s req_ids:[8] -DEBUG 06-24 20:16:43 [manager.py:391] -ERROR 06-24 20:16:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:210.62850952148438ms total_cost_time:210.6485366821289ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8853 prompt_cache_len:5151 prompt_cache_ratio:0.5818366655371061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 -DEBUG 06-24 20:16:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:43 [manager.py:224] router recive req id 8 cost time 0.1048436164855957 s -INFO 06-24 20:16:43 [manager.py:68] detokenization recv req id 8 cost time 0.10674571990966797 s -DEBUG 06-24 20:16:43 [manager.py:391] Prefill Batch: batch_id=302825814543166830955819650169748163577, time:1750767403.9407737s req_ids:[8] -DEBUG 06-24 20:16:43 [manager.py:391] -ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:43 lightllm_req_id:8 first_token_cost:209.80215072631836ms total_cost_time:209.8228931427002ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8854 prompt_cache_len:5151 prompt_cache_ratio:0.5817709509826068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 -DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.1039879322052002 s -INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.10601520538330078 s -DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=30968407067180982044031480715023409910, time:1750767404.155743s req_ids:[8] -DEBUG 06-24 20:16:44 [manager.py:391] -ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:372.23243713378906ms total_cost_time:372.2541332244873ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8855 prompt_cache_len:5151 prompt_cache_ratio:0.5817052512704687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 -DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.10376596450805664 s -INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.1055753231048584 s -DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=13431603512594475036307027071388676733, time:1750767404.5293074s req_ids:[8] -DEBUG 06-24 20:16:44 [manager.py:391] -ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:195.4789161682129ms total_cost_time:195.4977512359619ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8856 prompt_cache_len:5151 prompt_cache_ratio:0.5816395663956639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 -DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.10516762733459473 s -INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.10706400871276855 s -DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=194646685453142923901148799521286369227, time:1750767404.729637s req_ids:[8] -DEBUG 06-24 20:16:44 [manager.py:391] -ERROR 06-24 20:16:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:205.2299976348877ms total_cost_time:205.24859428405762ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:8857 prompt_cache_len:5151 prompt_cache_ratio:0.581573896353167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 -DEBUG 06-24 20:16:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:44 [manager.py:224] router recive req id 8 cost time 0.10390806198120117 s -INFO 06-24 20:16:44 [manager.py:68] detokenization recv req id 8 cost time 0.10580658912658691 s -DEBUG 06-24 20:16:44 [manager.py:391] Prefill Batch: batch_id=265369481400982825314228131935786264454, time:1750767404.9390323s req_ids:[8] -DEBUG 06-24 20:16:44 [manager.py:391] -ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:44 lightllm_req_id:8 first_token_cost:211.0896110534668ms total_cost_time:211.11011505126953ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8858 prompt_cache_len:5151 prompt_cache_ratio:0.5815082411379544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 -DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10492920875549316 s -INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.10681581497192383 s -DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=140419740045284432005756807358752268198, time:1750767405.153852s req_ids:[8] -DEBUG 06-24 20:16:45 [manager.py:391] -ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:207.29613304138184ms total_cost_time:207.31592178344727ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8859 prompt_cache_len:5151 prompt_cache_ratio:0.5814426007450051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 -DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10389375686645508 s -INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.1057901382446289 s -DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=241322000246597304240476955131738362531, time:1750767405.371206s req_ids:[8] -DEBUG 06-24 20:16:45 [manager.py:391] -ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:215.00182151794434ms total_cost_time:215.02137184143066ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8860 prompt_cache_len:5151 prompt_cache_ratio:0.5813769751693002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 -DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10375308990478516 s -INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.10569286346435547 s -DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=194202088129139056798334056657092975400, time:1750767405.5869563s req_ids:[8] -DEBUG 06-24 20:16:45 [manager.py:391] -ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:214.33568000793457ms total_cost_time:214.35546875ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8861 prompt_cache_len:5151 prompt_cache_ratio:0.5813113644058233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 -DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10451149940490723 s -INFO 06-24 20:16:45 [manager.py:68] detokenization recv req id 8 cost time 0.10636067390441895 s -DEBUG 06-24 20:16:45 [manager.py:391] Prefill Batch: batch_id=284682553372056024422139272034783985704, time:1750767405.8031723s req_ids:[8] -DEBUG 06-24 20:16:45 [manager.py:391] -ERROR 06-24 20:16:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:206.69317245483398ms total_cost_time:206.71319961547852ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8862 prompt_cache_len:5151 prompt_cache_ratio:0.5812457684495599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 -DEBUG 06-24 20:16:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:45 [manager.py:224] router recive req id 8 cost time 0.10395431518554688 s -INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.10574555397033691 s -DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=268323881653810775414952847802068330695, time:1750767406.0159044s req_ids:[8] -DEBUG 06-24 20:16:46 [manager.py:391] -ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:45 lightllm_req_id:8 first_token_cost:209.05113220214844ms total_cost_time:209.07211303710938ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8863 prompt_cache_len:5151 prompt_cache_ratio:0.5811801872954981 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 -DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:46 [manager.py:224] router recive req id 8 cost time 0.10363435745239258 s -INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.1055450439453125 s -DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=93669106560416661597049720443583595761, time:1750767406.228901s req_ids:[8] -DEBUG 06-24 20:16:46 [manager.py:391] -ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:211.69805526733398ms total_cost_time:211.72618865966797ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:8864 prompt_cache_len:5151 prompt_cache_ratio:0.5811146209386282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 -DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:46 [manager.py:224] router recive req id 8 cost time 0.10480451583862305 s -INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.1066749095916748 s -DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=79452523022682796777905236322953823783, time:1750767406.4440258s req_ids:[8] -DEBUG 06-24 20:16:46 [manager.py:391] -ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:370.0978755950928ms total_cost_time:370.1174259185791ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8865 prompt_cache_len:5151 prompt_cache_ratio:0.5810490693739425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 -DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:46 [manager.py:224] router recive req id 8 cost time 0.10348701477050781 s -INFO 06-24 20:16:46 [manager.py:68] detokenization recv req id 8 cost time 0.10527491569519043 s -DEBUG 06-24 20:16:46 [manager.py:391] Prefill Batch: batch_id=103297020694432281771462674145471856787, time:1750767406.8150291s req_ids:[8] -DEBUG 06-24 20:16:46 [manager.py:391] -ERROR 06-24 20:16:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:198.87638092041016ms total_cost_time:198.89521598815918ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:8866 prompt_cache_len:5151 prompt_cache_ratio:0.5809835325964359 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 -INFO 06-24 20:16:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:16:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10459518432617188 s -INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.1064763069152832 s -DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=59511156612033797311193696909033272538, time:1750767407.027178s req_ids:[8] -DEBUG 06-24 20:16:47 [manager.py:391] -ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:46 lightllm_req_id:8 first_token_cost:211.17281913757324ms total_cost_time:211.19308471679688ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8867 prompt_cache_len:5151 prompt_cache_ratio:0.5809180106011053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 -DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10391068458557129 s -INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.10575056076049805 s -DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=65216870026988162575463225909234083661, time:1750767407.2381s req_ids:[8] -DEBUG 06-24 20:16:47 [manager.py:391] -ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:211.26937866210938ms total_cost_time:211.289644241333ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8868 prompt_cache_len:5151 prompt_cache_ratio:0.5808525033829499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 -DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10445523262023926 s -INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.1061551570892334 s -DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=101550760431193320321205862337742963104, time:1750767407.4532542s req_ids:[8] -DEBUG 06-24 20:16:47 [manager.py:391] -ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:209.3801498413086ms total_cost_time:209.39970016479492ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8869 prompt_cache_len:5151 prompt_cache_ratio:0.5807870109369715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 -DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10376787185668945 s -INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.10548567771911621 s -DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=134766213092950017905986676544824762022, time:1750767407.670551s req_ids:[8] -DEBUG 06-24 20:16:47 [manager.py:391] -ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:206.47668838500977ms total_cost_time:206.4957618713379ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8870 prompt_cache_len:5151 prompt_cache_ratio:0.5807215332581737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 -DEBUG 06-24 20:16:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:47 [manager.py:224] router recive req id 8 cost time 0.10480737686157227 s -INFO 06-24 20:16:47 [manager.py:68] detokenization recv req id 8 cost time 0.10650205612182617 s -DEBUG 06-24 20:16:47 [manager.py:391] Prefill Batch: batch_id=120576872507559406119096675302525861864, time:1750767407.8770566s req_ids:[8] -DEBUG 06-24 20:16:47 [manager.py:391] -ERROR 06-24 20:16:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:205.39379119873047ms total_cost_time:205.4128646850586ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8871 prompt_cache_len:5151 prompt_cache_ratio:0.5806560703415624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 -DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.10469222068786621 s -INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.10646343231201172 s -DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=177647161323810744699679365650847905483, time:1750767408.0991855s req_ids:[8] -DEBUG 06-24 20:16:48 [manager.py:391] -ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:47 lightllm_req_id:8 first_token_cost:226.17149353027344ms total_cost_time:226.19128227233887ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8872 prompt_cache_len:5151 prompt_cache_ratio:0.5805906221821461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 -DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.1045684814453125 s -INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.10635733604431152 s -DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=209578003027673564330038902945213060539, time:1750767408.3167858s req_ids:[8] -DEBUG 06-24 20:16:48 [manager.py:391] -ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 first_token_cost:376.16491317749023ms total_cost_time:376.1858940124512ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:8873 prompt_cache_len:5151 prompt_cache_ratio:0.5805251887749352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 -DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.10478472709655762 s -INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.10660290718078613 s -DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=230569964652894574798238065813458826228, time:1750767408.6950932s req_ids:[8] -DEBUG 06-24 20:16:48 [manager.py:391] -ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 first_token_cost:202.5277614593506ms total_cost_time:202.54778861999512ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8874 prompt_cache_len:5151 prompt_cache_ratio:0.5804597701149425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 -DEBUG 06-24 20:16:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:48 [manager.py:224] router recive req id 8 cost time 0.10381793975830078 s -INFO 06-24 20:16:48 [manager.py:68] detokenization recv req id 8 cost time 0.1056816577911377 s -DEBUG 06-24 20:16:48 [manager.py:391] Prefill Batch: batch_id=238393472863888964353804205166056023551, time:1750767408.9107988s req_ids:[8] -DEBUG 06-24 20:16:48 [manager.py:391] -DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:48 lightllm_req_id:8 first_token_cost:214.1425609588623ms total_cost_time:214.202880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8875 prompt_cache_len:5151 prompt_cache_ratio:0.5803943661971831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 -DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10945320129394531 s -INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.11137104034423828 s -DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=65750811711056253505751207091851935002, time:1750767409.1231465s req_ids:[8] -DEBUG 06-24 20:16:49 [manager.py:391] -ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:203.7336826324463ms total_cost_time:203.7956714630127ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8876 prompt_cache_len:5151 prompt_cache_ratio:0.5803289770166742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 -DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.1078953742980957 s -INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.10978126525878906 s -DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=23083063524322175496351897776305135026, time:1750767409.332368s req_ids:[8] -DEBUG 06-24 20:16:49 [manager.py:391] -ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:209.57422256469727ms total_cost_time:209.63406562805176ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8877 prompt_cache_len:5151 prompt_cache_ratio:0.5802636025684352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 -DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10747480392456055 s -INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.10945940017700195 s -DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=254825678638041634889819029363112995762, time:1750767409.547216s req_ids:[8] -DEBUG 06-24 20:16:49 [manager.py:391] -ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:16:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 37830.726 tokens/s -DEBUG 06-24 20:16:49 [stats.py:37] Avg prompt tokens throughput: 37822.086 tokens/s -DEBUG 06-24 20:16:49 [stats.py:37] Avg generate tokens throughput: 8.640 tokens/s -INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:202.91900634765625ms total_cost_time:202.98004150390625ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8878 prompt_cache_len:5151 prompt_cache_ratio:0.5801982428474882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 -DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10749506950378418 s -INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.10941147804260254 s -DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=220977894733042176549695653389239948043, time:1750767409.757163s req_ids:[8] -DEBUG 06-24 20:16:49 [manager.py:391] -ERROR 06-24 20:16:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:204.9703598022461ms total_cost_time:205.0302028656006ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8879 prompt_cache_len:5151 prompt_cache_ratio:0.5801328978488568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 -DEBUG 06-24 20:16:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:49 [manager.py:224] router recive req id 8 cost time 0.10914397239685059 s -INFO 06-24 20:16:49 [manager.py:68] detokenization recv req id 8 cost time 0.11113762855529785 s -DEBUG 06-24 20:16:49 [manager.py:391] Prefill Batch: batch_id=102342554391041541289847739531866024393, time:1750767409.9684293s req_ids:[8] -DEBUG 06-24 20:16:49 [manager.py:391] -ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:49 lightllm_req_id:8 first_token_cost:211.97891235351562ms total_cost_time:212.04090118408203ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8880 prompt_cache_len:5151 prompt_cache_ratio:0.5800675675675676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 -DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:50 [manager.py:224] router recive req id 8 cost time 0.10838770866394043 s -INFO 06-24 20:16:50 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s -DEBUG 06-24 20:16:50 [manager.py:391] Prefill Batch: batch_id=77379161617885543387170311483269266961, time:1750767410.1845493s req_ids:[8] -DEBUG 06-24 20:16:50 [manager.py:391] -ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:198.43196868896484ms total_cost_time:198.49181175231934ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8881 prompt_cache_len:5151 prompt_cache_ratio:0.5800022519986489 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 -DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:50 [manager.py:224] router recive req id 8 cost time 0.31032609939575195 s -INFO 06-24 20:16:50 [manager.py:68] detokenization recv req id 8 cost time 0.31241750717163086 s -DEBUG 06-24 20:16:50 [manager.py:391] Prefill Batch: batch_id=161146434150085977531548893645227060842, time:1750767410.5975466s req_ids:[8] -DEBUG 06-24 20:16:50 [manager.py:391] -ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:419.28863525390625ms total_cost_time:419.34871673583984ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8882 prompt_cache_len:5151 prompt_cache_ratio:0.5799369511371313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 -DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:50 [manager.py:224] router recive req id 8 cost time 0.10771846771240234 s -INFO 06-24 20:16:50 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s -DEBUG 06-24 20:16:50 [manager.py:391] Prefill Batch: batch_id=223533037483694452704536624846233538098, time:1750767410.8171737s req_ids:[8] -DEBUG 06-24 20:16:50 [manager.py:391] -ERROR 06-24 20:16:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:205.69992065429688ms total_cost_time:205.76190948486328ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8883 prompt_cache_len:5151 prompt_cache_ratio:0.579871664978048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 -DEBUG 06-24 20:16:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10814189910888672 s -INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.1100759506225586 s -DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=325960077848963605311963975656039667236, time:1750767411.0276473s req_ids:[8] -DEBUG 06-24 20:16:51 [manager.py:391] -ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:50 lightllm_req_id:8 first_token_cost:204.23030853271484ms total_cost_time:204.29039001464844ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8884 prompt_cache_len:5151 prompt_cache_ratio:0.579806393516434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 -DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10739517211914062 s -INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.10924100875854492 s -DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=72846018538006314650065173458323404836, time:1750767411.2403064s req_ids:[8] -DEBUG 06-24 20:16:51 [manager.py:391] -ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:208.9991569519043ms total_cost_time:209.0590000152588ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8885 prompt_cache_len:5151 prompt_cache_ratio:0.579741136747327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 -DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.1084601879119873 s -INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s -DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=254973895708894659982170889728110681850, time:1750767411.4542308s req_ids:[8] -DEBUG 06-24 20:16:51 [manager.py:391] -ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:208.8949680328369ms total_cost_time:208.9557647705078ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8886 prompt_cache_len:5151 prompt_cache_ratio:0.5796758946657664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 -DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s -INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.11073422431945801 s -DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=65266010499210479839951921874118006657, time:1750767411.688298s req_ids:[8] -DEBUG 06-24 20:16:51 [manager.py:391] -ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:230.24630546569824ms total_cost_time:230.30710220336914ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8887 prompt_cache_len:5151 prompt_cache_ratio:0.5796106672667942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 -DEBUG 06-24 20:16:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:51 [manager.py:224] router recive req id 8 cost time 0.10822081565856934 s -INFO 06-24 20:16:51 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s -DEBUG 06-24 20:16:51 [manager.py:391] Prefill Batch: batch_id=26305488056991098575544296758668740659, time:1750767411.9046948s req_ids:[8] -DEBUG 06-24 20:16:51 [manager.py:391] -ERROR 06-24 20:16:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:206.12740516662598ms total_cost_time:206.2089443206787ms,out_token_counter:1 mean_per_token_cost_time: 0.08153915405273438ms prompt_token_num:8888 prompt_cache_len:5151 prompt_cache_ratio:0.5795454545454546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 -DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.10830187797546387 s -INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.11014938354492188 s -DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=198116328097361737523860085723017705778, time:1750767412.1289146s req_ids:[8] -DEBUG 06-24 20:16:52 [manager.py:391] -ERROR 06-24 20:16:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:51 lightllm_req_id:8 first_token_cost:218.91450881958008ms total_cost_time:218.97339820861816ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:8889 prompt_cache_len:5151 prompt_cache_ratio:0.5794802564967938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 -DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.1072239875793457 s -INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.10906600952148438 s -DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=198520961871868292016833177284348416371, time:1750767412.3547006s req_ids:[8] -DEBUG 06-24 20:16:52 [manager.py:391] -ERROR 06-24 20:16:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 first_token_cost:431.73837661743164ms total_cost_time:431.7595958709717ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8890 prompt_cache_len:5151 prompt_cache_ratio:0.5794150731158605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 -DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.10363602638244629 s -INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.10537362098693848 s -DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=71144932878219058634035291584882570372, time:1750767412.7787616s req_ids:[8] -DEBUG 06-24 20:16:52 [manager.py:391] -ERROR 06-24 20:16:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 first_token_cost:184.03267860412598ms total_cost_time:184.0522289276123ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8891 prompt_cache_len:5151 prompt_cache_ratio:0.5793499043977055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 -DEBUG 06-24 20:16:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:52 [manager.py:224] router recive req id 8 cost time 0.10348629951477051 s -INFO 06-24 20:16:52 [manager.py:68] detokenization recv req id 8 cost time 0.10547852516174316 s -DEBUG 06-24 20:16:52 [manager.py:391] Prefill Batch: batch_id=153600737672794742458455272146577399276, time:1750767412.9681041s req_ids:[8] -DEBUG 06-24 20:16:52 [manager.py:391] -ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:52 lightllm_req_id:8 first_token_cost:201.71713829040527ms total_cost_time:201.7369270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8892 prompt_cache_len:5151 prompt_cache_ratio:0.5792847503373819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 -DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10386228561401367 s -INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10591769218444824 s -DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=327409902056847920467439628655131409168, time:1750767413.1867535s req_ids:[8] -DEBUG 06-24 20:16:53 [manager.py:391] -INFO 06-24 20:16:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:16:53 [statics_utils.py:24] mean first cost: 228.63510679831708 ms -INFO 06-24 20:16:53 [statics_utils.py:24] mean per token cost: 0.07553394364940436 ms -INFO 06-24 20:16:53 [manager.py:620] left req id 8can release False refcount 4 -ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:220.98207473754883ms total_cost_time:221.00257873535156ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8893 prompt_cache_len:5151 prompt_cache_ratio:0.5792196109299449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 -DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10386157035827637 s -INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10567188262939453 s -DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=126333554365285730588813938880852083247, time:1750767413.4021406s req_ids:[8] -DEBUG 06-24 20:16:53 [manager.py:391] -ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:211.01617813110352ms total_cost_time:211.03739738464355ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:8894 prompt_cache_len:5151 prompt_cache_ratio:0.579154486170452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 -DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10481858253479004 s -INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10680890083312988 s -DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=141236306381527821731829417056961246032, time:1750767413.61614s req_ids:[8] -DEBUG 06-24 20:16:53 [manager.py:391] -ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:208.6324691772461ms total_cost_time:208.65154266357422ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8895 prompt_cache_len:5151 prompt_cache_ratio:0.5790893760539629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 -DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:53 [manager.py:224] router recive req id 8 cost time 0.10359716415405273 s -INFO 06-24 20:16:53 [manager.py:68] detokenization recv req id 8 cost time 0.10563015937805176 s -DEBUG 06-24 20:16:53 [manager.py:391] Prefill Batch: batch_id=283248732016914005400940595114035059812, time:1750767413.830578s req_ids:[8] -DEBUG 06-24 20:16:53 [manager.py:391] -ERROR 06-24 20:16:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:212.10741996765137ms total_cost_time:212.127685546875ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8896 prompt_cache_len:5151 prompt_cache_ratio:0.5790242805755396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 -DEBUG 06-24 20:16:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10489249229431152 s -INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.10672855377197266 s -DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=218385129957549424183202662383248988496, time:1750767414.0442324s req_ids:[8] -DEBUG 06-24 20:16:54 [manager.py:391] -ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:53 lightllm_req_id:8 first_token_cost:206.4380645751953ms total_cost_time:206.45713806152344ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8897 prompt_cache_len:5151 prompt_cache_ratio:0.5789591997302461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 -DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10338759422302246 s -INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.10506391525268555 s -DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=191428750527290991069837316898020542357, time:1750767414.2572935s req_ids:[8] -DEBUG 06-24 20:16:54 [manager.py:391] -ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:205.6427001953125ms total_cost_time:205.66320419311523ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:8898 prompt_cache_len:5151 prompt_cache_ratio:0.578894133513149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 -DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10454893112182617 s -INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.10633492469787598 s -DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=118029275559865095275624757838938465751, time:1750767414.4665225s req_ids:[8] -DEBUG 06-24 20:16:54 [manager.py:391] -ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:376.0397434234619ms total_cost_time:376.06143951416016ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:8899 prompt_cache_len:5151 prompt_cache_ratio:0.5788290819193168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 -DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:54 [manager.py:224] router recive req id 8 cost time 0.10453557968139648 s -INFO 06-24 20:16:54 [manager.py:68] detokenization recv req id 8 cost time 0.1064293384552002 s -DEBUG 06-24 20:16:54 [manager.py:391] Prefill Batch: batch_id=52128857476596176353068296351094791082, time:1750767414.8445861s req_ids:[8] -DEBUG 06-24 20:16:54 [manager.py:391] -ERROR 06-24 20:16:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:199.36490058898926ms total_cost_time:199.3856430053711ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8900 prompt_cache_len:5151 prompt_cache_ratio:0.5787640449438203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 -DEBUG 06-24 20:16:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.10363078117370605 s -INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.10544633865356445 s -INFO 06-24 20:16:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=33406292991481172648410339890085150551, time:1750767415.0504093s req_ids:[8] -DEBUG 06-24 20:16:55 [manager.py:391] -ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:54 lightllm_req_id:8 first_token_cost:208.21762084960938ms total_cost_time:208.2383632659912ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8901 prompt_cache_len:5151 prompt_cache_ratio:0.5786990225817323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 -DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.1047217845916748 s -INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.1064906120300293 s -DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=93803866099495995578927176410997910072, time:1750767415.264898s req_ids:[8] -DEBUG 06-24 20:16:55 [manager.py:391] -ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:208.8930606842041ms total_cost_time:208.91451835632324ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:8902 prompt_cache_len:5151 prompt_cache_ratio:0.5786340148281285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 -DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.1039731502532959 s -INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.1059565544128418 s -DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=132243958957585845351225120375591536808, time:1750767415.4772036s req_ids:[8] -DEBUG 06-24 20:16:55 [manager.py:391] -ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:210.22891998291016ms total_cost_time:210.24799346923828ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8903 prompt_cache_len:5151 prompt_cache_ratio:0.578569021678086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 -DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.10372495651245117 s -INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.10572385787963867 s -DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=306141301356205742773285364268045159607, time:1750767415.6886394s req_ids:[8] -DEBUG 06-24 20:16:55 [manager.py:391] -ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:202.93045043945312ms total_cost_time:202.95119285583496ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:8904 prompt_cache_len:5151 prompt_cache_ratio:0.5785040431266847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 -DEBUG 06-24 20:16:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:55 [manager.py:224] router recive req id 8 cost time 0.1049342155456543 s -INFO 06-24 20:16:55 [manager.py:68] detokenization recv req id 8 cost time 0.10688138008117676 s -DEBUG 06-24 20:16:55 [manager.py:391] Prefill Batch: batch_id=128943705185753112795225162597262618703, time:1750767415.9034827s req_ids:[8] -DEBUG 06-24 20:16:55 [manager.py:391] -ERROR 06-24 20:16:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:217.24390983581543ms total_cost_time:217.26369857788086ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8905 prompt_cache_len:5151 prompt_cache_ratio:0.5784390791690062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 -DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.1043250560760498 s -INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10641336441040039 s -DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=269739212138248251170632620236071610367, time:1750767416.1211972s req_ids:[8] -DEBUG 06-24 20:16:56 [manager.py:391] -DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:16:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:16:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:55 lightllm_req_id:8 first_token_cost:211.72833442687988ms total_cost_time:211.74860000610352ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8906 prompt_cache_len:5151 prompt_cache_ratio:0.5783741298001347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 -DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.10384654998779297 s -INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10519909858703613 s -DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=258508451126726824213222623803758099614, time:1750767416.3383834s req_ids:[8] -DEBUG 06-24 20:16:56 [manager.py:391] -ERROR 06-24 20:16:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 first_token_cost:395.71166038513184ms total_cost_time:395.73121070861816ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:8907 prompt_cache_len:5151 prompt_cache_ratio:0.5783091950151567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 -DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.10404109954833984 s -INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10604572296142578 s -DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=92245419213367687007050010692471114560, time:1750767416.7341688s req_ids:[8] -DEBUG 06-24 20:16:56 [manager.py:391] -ERROR 06-24 20:16:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 first_token_cost:205.18064498901367ms total_cost_time:205.1997184753418ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8908 prompt_cache_len:5151 prompt_cache_ratio:0.5782442748091603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 -DEBUG 06-24 20:16:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:56 [manager.py:224] router recive req id 8 cost time 0.1052248477935791 s -INFO 06-24 20:16:56 [manager.py:68] detokenization recv req id 8 cost time 0.10723423957824707 s -DEBUG 06-24 20:16:56 [manager.py:391] Prefill Batch: batch_id=261456838854199833172417584909902952380, time:1750767416.951939s req_ids:[8] -DEBUG 06-24 20:16:56 [manager.py:391] -ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:56 lightllm_req_id:8 first_token_cost:216.08448028564453ms total_cost_time:216.10355377197266ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8909 prompt_cache_len:5151 prompt_cache_ratio:0.5781793691772366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 -DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.10483694076538086 s -INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.10669279098510742 s -DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=5678412599399719150484060546967361395, time:1750767417.1648765s req_ids:[8] -DEBUG 06-24 20:16:57 [manager.py:391] -ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:209.63215827941895ms total_cost_time:209.65147018432617ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:8910 prompt_cache_len:5151 prompt_cache_ratio:0.5781144781144781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 -DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.10432052612304688 s -INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.1064443588256836 s -DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=274094934530483401352735122898718705961, time:1750767417.3805385s req_ids:[8] -DEBUG 06-24 20:16:57 [manager.py:391] -ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:210.17074584960938ms total_cost_time:210.1898193359375ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:8911 prompt_cache_len:5151 prompt_cache_ratio:0.5780496016159803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 -DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.1039268970489502 s -INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.10576772689819336 s -DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=226172706906735974955229582283256859477, time:1750767417.5931454s req_ids:[8] -DEBUG 06-24 20:16:57 [manager.py:391] -ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:209.90419387817383ms total_cost_time:209.92422103881836ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:8912 prompt_cache_len:5151 prompt_cache_ratio:0.5779847396768402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 -DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:57 [manager.py:224] router recive req id 8 cost time 0.10412359237670898 s -INFO 06-24 20:16:57 [manager.py:68] detokenization recv req id 8 cost time 0.10606908798217773 s -DEBUG 06-24 20:16:57 [manager.py:391] Prefill Batch: batch_id=293381907861407826719254971643435041269, time:1750767417.8057432s req_ids:[8] -DEBUG 06-24 20:16:57 [manager.py:391] -ERROR 06-24 20:16:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:210.16407012939453ms total_cost_time:210.18433570861816ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:8913 prompt_cache_len:5151 prompt_cache_ratio:0.5779198922921576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 -DEBUG 06-24 20:16:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10502433776855469 s -INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.10705208778381348 s -DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=310946579013909743027242563710376532228, time:1750767418.0210376s req_ids:[8] -DEBUG 06-24 20:16:58 [manager.py:391] -ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:57 lightllm_req_id:8 first_token_cost:207.51953125ms total_cost_time:207.53931999206543ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:8914 prompt_cache_len:5151 prompt_cache_ratio:0.5778550594570339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 -DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10654377937316895 s -INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.10862350463867188 s -DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=77772935397058980906716150862557068636, time:1750767418.2367756s req_ids:[8] -DEBUG 06-24 20:16:58 [manager.py:391] -ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:211.46583557128906ms total_cost_time:211.50875091552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8915 prompt_cache_len:5151 prompt_cache_ratio:0.5777902411665732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 -DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10906577110290527 s -INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.11095929145812988 s -DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=229380487342592059473794077439139877362, time:1750767418.448589s req_ids:[8] -DEBUG 06-24 20:16:58 [manager.py:391] -ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:372.59364128112793ms total_cost_time:372.6377487182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8916 prompt_cache_len:5151 prompt_cache_ratio:0.5777254374158816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 -DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:58 [manager.py:224] router recive req id 8 cost time 0.10831546783447266 s -INFO 06-24 20:16:58 [manager.py:68] detokenization recv req id 8 cost time 0.1101830005645752 s -DEBUG 06-24 20:16:58 [manager.py:391] Prefill Batch: batch_id=233375322972774472048694040053618011096, time:1750767418.8250642s req_ids:[8] -DEBUG 06-24 20:16:58 [manager.py:391] -ERROR 06-24 20:16:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:193.53485107421875ms total_cost_time:193.57943534851074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8917 prompt_cache_len:5151 prompt_cache_ratio:0.5776606482000672 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 -DEBUG 06-24 20:16:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10781192779541016 s -INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s -DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=307005661884289463220043092330619282146, time:1750767419.0303292s req_ids:[8] -DEBUG 06-24 20:16:59 [manager.py:391] -ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:58 lightllm_req_id:8 first_token_cost:211.28559112548828ms total_cost_time:211.34686470031738ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:8918 prompt_cache_len:5151 prompt_cache_ratio:0.5775958735142409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 -DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10736584663391113 s -INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.10921335220336914 s -DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=30755010886150805956282936688883985615, time:1750767419.2480152s req_ids:[8] -DEBUG 06-24 20:16:59 [manager.py:391] -ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:218.52564811706543ms total_cost_time:218.58620643615723ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:8919 prompt_cache_len:5151 prompt_cache_ratio:0.5775311133535149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 -DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10774779319763184 s -INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.10946917533874512 s -DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=160303467202031100191546329919921789217, time:1750767419.4659355s req_ids:[8] -DEBUG 06-24 20:16:59 [manager.py:391] -ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:166.61620140075684ms total_cost_time:166.67771339416504ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8920 prompt_cache_len:5151 prompt_cache_ratio:0.5774663677130045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 -DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.10836958885192871 s -INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.1100759506225586 s -DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=19315419347180410015663360530496610774, time:1750767419.6416104s req_ids:[8] -DEBUG 06-24 20:16:59 [manager.py:391] -DEBUG 06-24 20:16:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 38243.306 tokens/s -DEBUG 06-24 20:16:59 [stats.py:37] Avg prompt tokens throughput: 38234.814 tokens/s -DEBUG 06-24 20:16:59 [stats.py:37] Avg generate tokens throughput: 8.492 tokens/s -ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:170.0572967529297ms total_cost_time:170.1183319091797ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8921 prompt_cache_len:5151 prompt_cache_ratio:0.5774016365878265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 -DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:16:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:16:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:16:59 [manager.py:224] router recive req id 8 cost time 0.1082611083984375 s -INFO 06-24 20:16:59 [manager.py:68] detokenization recv req id 8 cost time 0.11008000373840332 s -DEBUG 06-24 20:16:59 [manager.py:391] Prefill Batch: batch_id=319220720399587162835807293767505459398, time:1750767419.813506s req_ids:[8] -DEBUG 06-24 20:16:59 [manager.py:391] -ERROR 06-24 20:16:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:16:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:16:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:201.8752098083496ms total_cost_time:201.93743705749512ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:8922 prompt_cache_len:5151 prompt_cache_ratio:0.5773369199731002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:16:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 -DEBUG 06-24 20:16:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:16:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:16:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:16:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:16:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10869240760803223 s -INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s -DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=317787984170760226683943948325725029628, time:1750767420.0185385s req_ids:[8] -DEBUG 06-24 20:17:00 [manager.py:391] -ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:16:59 lightllm_req_id:8 first_token_cost:211.63344383239746ms total_cost_time:211.69304847717285ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:8923 prompt_cache_len:5151 prompt_cache_ratio:0.5772722178639471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 -DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s -INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.11091446876525879 s -DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=42331215773084319277829802411603563280, time:1750767420.233102s req_ids:[8] -DEBUG 06-24 20:17:00 [manager.py:391] -ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:205.23357391357422ms total_cost_time:205.291748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:8924 prompt_cache_len:5151 prompt_cache_ratio:0.5772075302554908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 -DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10750651359558105 s -INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.10943222045898438 s -DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=282766992145340919586161690440461338436, time:1750767420.442701s req_ids:[8] -DEBUG 06-24 20:17:00 [manager.py:391] -ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:359.5857620239258ms total_cost_time:359.6305847167969ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8925 prompt_cache_len:5151 prompt_cache_ratio:0.5771428571428572 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 -DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:00 [manager.py:224] router recive req id 8 cost time 0.10858154296875 s -INFO 06-24 20:17:00 [manager.py:68] detokenization recv req id 8 cost time 0.11070418357849121 s -DEBUG 06-24 20:17:00 [manager.py:391] Prefill Batch: batch_id=88681147693703614893608177227914178313, time:1750767420.8084419s req_ids:[8] -DEBUG 06-24 20:17:00 [manager.py:391] -ERROR 06-24 20:17:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:204.5130729675293ms total_cost_time:204.5576572418213ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8926 prompt_cache_len:5151 prompt_cache_ratio:0.5770781985211741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 -DEBUG 06-24 20:17:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10773921012878418 s -INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.10979866981506348 s -DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=124778285220701490301268569749554298348, time:1750767421.0209086s req_ids:[8] -DEBUG 06-24 20:17:01 [manager.py:391] -ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:00 lightllm_req_id:8 first_token_cost:202.59881019592285ms total_cost_time:202.64196395874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8927 prompt_cache_len:5151 prompt_cache_ratio:0.5770135543855719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 -DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10986852645874023 s -INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.11184334754943848 s -DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=271529714286811458371990989738542139090, time:1750767421.228776s req_ids:[8] -DEBUG 06-24 20:17:01 [manager.py:391] -ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:204.12468910217285ms total_cost_time:204.20455932617188ms,out_token_counter:1 mean_per_token_cost_time: 0.07987022399902344ms prompt_token_num:8928 prompt_cache_len:5151 prompt_cache_ratio:0.5769489247311828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 -DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10862350463867188 s -INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.11059761047363281 s -DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=94065213730399421592732381490742650244, time:1750767421.4409108s req_ids:[8] -DEBUG 06-24 20:17:01 [manager.py:391] -ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:205.72733879089355ms total_cost_time:205.77001571655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8929 prompt_cache_len:5151 prompt_cache_ratio:0.5768843095531414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 -DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10886764526367188 s -INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.1108255386352539 s -DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=176143583590473473350966258398461638983, time:1750767421.6582363s req_ids:[8] -DEBUG 06-24 20:17:01 [manager.py:391] -ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:213.24896812438965ms total_cost_time:213.29474449157715ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8930 prompt_cache_len:5151 prompt_cache_ratio:0.5768197088465845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 -DEBUG 06-24 20:17:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:01 [manager.py:224] router recive req id 8 cost time 0.10754132270812988 s -INFO 06-24 20:17:01 [manager.py:68] detokenization recv req id 8 cost time 0.1097116470336914 s -DEBUG 06-24 20:17:01 [manager.py:391] Prefill Batch: batch_id=254146057358101458130434997105210517413, time:1750767421.8734066s req_ids:[8] -DEBUG 06-24 20:17:01 [manager.py:391] -DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:209.87319946289062ms total_cost_time:209.93995666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0667572021484375ms prompt_token_num:8931 prompt_cache_len:5151 prompt_cache_ratio:0.576755122606651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 -DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.10810542106628418 s -INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.11005926132202148 s -DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=87262195068118814638037801191140550472, time:1750767422.0885563s req_ids:[8] -DEBUG 06-24 20:17:02 [manager.py:391] -ERROR 06-24 20:17:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:01 lightllm_req_id:8 first_token_cost:207.55743980407715ms total_cost_time:207.60226249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8932 prompt_cache_len:5151 prompt_cache_ratio:0.5766905508284819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 -DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.30936336517333984 s -INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.31136536598205566 s -DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=330993943936161122929188293806647668386, time:1750767422.5048394s req_ids:[8] -DEBUG 06-24 20:17:02 [manager.py:391] -ERROR 06-24 20:17:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 first_token_cost:413.3129119873047ms total_cost_time:413.3586883544922ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8933 prompt_cache_len:5151 prompt_cache_ratio:0.5766259935072204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 -DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.10788488388061523 s -INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.10979819297790527 s -DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=267373276406362962481736055622660704725, time:1750767422.7207217s req_ids:[8] -DEBUG 06-24 20:17:02 [manager.py:391] -ERROR 06-24 20:17:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 first_token_cost:206.31957054138184ms total_cost_time:206.36534690856934ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8934 prompt_cache_len:5151 prompt_cache_ratio:0.5765614506380121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 -DEBUG 06-24 20:17:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:02 [manager.py:224] router recive req id 8 cost time 0.10776710510253906 s -INFO 06-24 20:17:02 [manager.py:68] detokenization recv req id 8 cost time 0.10980963706970215 s -DEBUG 06-24 20:17:02 [manager.py:391] Prefill Batch: batch_id=215351417917954018970827477105021411992, time:1750767422.9345574s req_ids:[8] -DEBUG 06-24 20:17:02 [manager.py:391] -ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:02 lightllm_req_id:8 first_token_cost:204.1923999786377ms total_cost_time:204.2388916015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:8935 prompt_cache_len:5151 prompt_cache_ratio:0.5764969222160045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 -DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.1069483757019043 s -INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.10881519317626953 s -DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=11619329866567844193776717127063708485, time:1750767423.141983s req_ids:[8] -DEBUG 06-24 20:17:03 [manager.py:391] -ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:204.48040962219238ms total_cost_time:204.52380180358887ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8936 prompt_cache_len:5151 prompt_cache_ratio:0.5764324082363473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 -DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.1084129810333252 s -INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.11026692390441895 s -DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=122322695847459535119690543336852838010, time:1750767423.3523335s req_ids:[8] -DEBUG 06-24 20:17:03 [manager.py:391] -DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:207.12780952453613ms total_cost_time:207.17144012451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8937 prompt_cache_len:5151 prompt_cache_ratio:0.5763679086941926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 -DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.10873985290527344 s -INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.11067819595336914 s -DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=32850394343742274739563683495382795667, time:1750767423.566508s req_ids:[8] -DEBUG 06-24 20:17:03 [manager.py:391] -ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:208.59885215759277ms total_cost_time:208.64272117614746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8938 prompt_cache_len:5151 prompt_cache_ratio:0.5763034235846946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 -DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.10757231712341309 s -INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.10936284065246582 s -DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=161904701201333922480716624142633716983, time:1750767423.781403s req_ids:[8] -DEBUG 06-24 20:17:03 [manager.py:391] -ERROR 06-24 20:17:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:206.68506622314453ms total_cost_time:206.72917366027832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8939 prompt_cache_len:5151 prompt_cache_ratio:0.5762389529030093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 -DEBUG 06-24 20:17:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:03 [manager.py:224] router recive req id 8 cost time 0.10871124267578125 s -INFO 06-24 20:17:03 [manager.py:68] detokenization recv req id 8 cost time 0.11063432693481445 s -DEBUG 06-24 20:17:03 [manager.py:391] Prefill Batch: batch_id=264584927080220929741692407812855701077, time:1750767423.9925942s req_ids:[8] -DEBUG 06-24 20:17:03 [manager.py:391] -ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:03 lightllm_req_id:8 first_token_cost:205.46841621398926ms total_cost_time:205.51204681396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8940 prompt_cache_len:5151 prompt_cache_ratio:0.5761744966442953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 -DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:04 [manager.py:224] router recive req id 8 cost time 0.10752177238464355 s -INFO 06-24 20:17:04 [manager.py:68] detokenization recv req id 8 cost time 0.10940361022949219 s -DEBUG 06-24 20:17:04 [manager.py:391] Prefill Batch: batch_id=130158855593065881367684946236579652488, time:1750767424.2064216s req_ids:[8] -DEBUG 06-24 20:17:04 [manager.py:391] -ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:379.6501159667969ms total_cost_time:379.6954154968262ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:8941 prompt_cache_len:5151 prompt_cache_ratio:0.5761100548037132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 -DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:04 [manager.py:224] router recive req id 8 cost time 0.10875964164733887 s -INFO 06-24 20:17:04 [manager.py:68] detokenization recv req id 8 cost time 0.11069393157958984 s -DEBUG 06-24 20:17:04 [manager.py:391] Prefill Batch: batch_id=318680504543133720992798802114768743113, time:1750767424.584957s req_ids:[8] -DEBUG 06-24 20:17:04 [manager.py:391] -ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:204.85901832580566ms total_cost_time:204.90336418151855ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8942 prompt_cache_len:5151 prompt_cache_ratio:0.5760456273764258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 -DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:04 [manager.py:224] router recive req id 8 cost time 0.10762333869934082 s -INFO 06-24 20:17:04 [manager.py:68] detokenization recv req id 8 cost time 0.10960125923156738 s -DEBUG 06-24 20:17:04 [manager.py:391] Prefill Batch: batch_id=337005028610997234267978534964101998304, time:1750767424.7983696s req_ids:[8] -DEBUG 06-24 20:17:04 [manager.py:391] -ERROR 06-24 20:17:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:206.82716369628906ms total_cost_time:206.87174797058105ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8943 prompt_cache_len:5151 prompt_cache_ratio:0.5759812143575981 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 -DEBUG 06-24 20:17:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10777044296264648 s -INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.1096956729888916 s -DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=239378083890809986108219071636611794478, time:1750767425.0132086s req_ids:[8] -DEBUG 06-24 20:17:05 [manager.py:391] -ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:04 lightllm_req_id:8 first_token_cost:205.37233352661133ms total_cost_time:205.4152488708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8944 prompt_cache_len:5151 prompt_cache_ratio:0.5759168157423972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 -DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10763835906982422 s -INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.10951018333435059 s -DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=273087776931210503476202860879448796078, time:1750767425.2226858s req_ids:[8] -DEBUG 06-24 20:17:05 [manager.py:391] -ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:207.43417739868164ms total_cost_time:207.47876167297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8945 prompt_cache_len:5151 prompt_cache_ratio:0.5758524315259922 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 -DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10783839225769043 s -INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.10973763465881348 s -DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=161288152311888900138671598490094586865, time:1750767425.4357536s req_ids:[8] -DEBUG 06-24 20:17:05 [manager.py:391] -ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:206.34698867797852ms total_cost_time:206.3906192779541ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8946 prompt_cache_len:5151 prompt_cache_ratio:0.5757880617035547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 -DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10864925384521484 s -INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063289642333984 s -DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=150764785052589225061479550015254395552, time:1750767425.6488895s req_ids:[8] -DEBUG 06-24 20:17:05 [manager.py:391] -ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:204.04696464538574ms total_cost_time:204.09131050109863ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:8947 prompt_cache_len:5151 prompt_cache_ratio:0.5757237062702582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 -DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:05 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s -INFO 06-24 20:17:05 [manager.py:68] detokenization recv req id 8 cost time 0.11030292510986328 s -DEBUG 06-24 20:17:05 [manager.py:391] Prefill Batch: batch_id=128240808934864838081752262430029952182, time:1750767425.8701012s req_ids:[8] -DEBUG 06-24 20:17:05 [manager.py:391] -ERROR 06-24 20:17:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:220.184326171875ms total_cost_time:220.2298641204834ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8948 prompt_cache_len:5151 prompt_cache_ratio:0.5756593652212785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 -DEBUG 06-24 20:17:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10833930969238281 s -INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.1101994514465332 s -DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=147854794251447613253983833023172051708, time:1750767426.0857434s req_ids:[8] -DEBUG 06-24 20:17:06 [manager.py:391] -ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:05 lightllm_req_id:8 first_token_cost:210.01505851745605ms total_cost_time:210.07537841796875ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8949 prompt_cache_len:5151 prompt_cache_ratio:0.5755950385517935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 -DEBUG 06-24 20:17:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10926938056945801 s -INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.11120438575744629 s -DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=4639161228803901275697230139548544674, time:1750767426.3014596s req_ids:[8] -DEBUG 06-24 20:17:06 [manager.py:391] -ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:381.9706439971924ms total_cost_time:382.016658782959ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8950 prompt_cache_len:5151 prompt_cache_ratio:0.5755307262569832 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 -DEBUG 06-24 20:17:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10883021354675293 s -INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.11018729209899902 s -DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=336611141573958973034701297104283935118, time:1750767426.6861308s req_ids:[8] -DEBUG 06-24 20:17:06 [manager.py:391] -ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:211.24577522277832ms total_cost_time:211.29250526428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:8951 prompt_cache_len:5151 prompt_cache_ratio:0.57546642833203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 -DEBUG 06-24 20:17:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:06 [manager.py:224] router recive req id 8 cost time 0.10782551765441895 s -INFO 06-24 20:17:06 [manager.py:68] detokenization recv req id 8 cost time 0.10980486869812012 s -DEBUG 06-24 20:17:06 [manager.py:391] Prefill Batch: batch_id=162029279459218751810730563047351020255, time:1750767426.9059296s req_ids:[8] -DEBUG 06-24 20:17:06 [manager.py:391] -ERROR 06-24 20:17:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:206.8805694580078ms total_cost_time:206.9237232208252ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8952 prompt_cache_len:5151 prompt_cache_ratio:0.5754021447721179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 -DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1091313362121582 s -INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.11023974418640137 s -DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=252357408677479524072174775130571251815, time:1750767427.1188245s req_ids:[8] -DEBUG 06-24 20:17:07 [manager.py:391] -ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:06 lightllm_req_id:8 first_token_cost:208.77861976623535ms total_cost_time:208.84132385253906ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:8953 prompt_cache_len:5151 prompt_cache_ratio:0.5753378755724338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 -DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1088566780090332 s -INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.11098074913024902 s -DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=304378158466525731755261021306525801988, time:1750767427.3383908s req_ids:[8] -DEBUG 06-24 20:17:07 [manager.py:391] -ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:212.94045448303223ms total_cost_time:212.9843235015869ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8954 prompt_cache_len:5151 prompt_cache_ratio:0.5752736207281662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 -DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.10798907279968262 s -INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.10992693901062012 s -DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=220194227559716799541579462165990975640, time:1750767427.5532007s req_ids:[8] -DEBUG 06-24 20:17:07 [manager.py:391] -ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:206.1326503753662ms total_cost_time:206.15458488464355ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:8955 prompt_cache_len:5151 prompt_cache_ratio:0.5752093802345059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 -DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1077566146850586 s -INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.10979771614074707 s -DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=105720818530875730109728932282672909083, time:1750767427.7648869s req_ids:[8] -DEBUG 06-24 20:17:07 [manager.py:391] -ERROR 06-24 20:17:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:206.9849967956543ms total_cost_time:207.02815055847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8956 prompt_cache_len:5151 prompt_cache_ratio:0.5751451540866458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 -DEBUG 06-24 20:17:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:07 [manager.py:224] router recive req id 8 cost time 0.1081993579864502 s -INFO 06-24 20:17:07 [manager.py:68] detokenization recv req id 8 cost time 0.11017036437988281 s -DEBUG 06-24 20:17:07 [manager.py:391] Prefill Batch: batch_id=242058706025797307114532665330738583874, time:1750767427.9790566s req_ids:[8] -DEBUG 06-24 20:17:07 [manager.py:391] -ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:07 lightllm_req_id:8 first_token_cost:207.98277854919434ms total_cost_time:208.04309844970703ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8957 prompt_cache_len:5151 prompt_cache_ratio:0.5750809422797811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 -DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10831499099731445 s -INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.11014699935913086 s -DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=257195158114178892403542433549024805195, time:1750767428.1923118s req_ids:[8] -DEBUG 06-24 20:17:08 [manager.py:391] -ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:375.61917304992676ms total_cost_time:375.68116188049316ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:8958 prompt_cache_len:5151 prompt_cache_ratio:0.5750167448091091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 -DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10812783241271973 s -INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.11004018783569336 s -DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=170627348129163633396542544179569303694, time:1750767428.5711615s req_ids:[8] -DEBUG 06-24 20:17:08 [manager.py:391] -ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:202.03518867492676ms total_cost_time:202.09527015686035ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8959 prompt_cache_len:5151 prompt_cache_ratio:0.5749525616698292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 -DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10867595672607422 s -INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.11054563522338867 s -DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=180196441877832190385726831487869867362, time:1750767428.781529s req_ids:[8] -DEBUG 06-24 20:17:08 [manager.py:391] -ERROR 06-24 20:17:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:207.5796127319336ms total_cost_time:207.6406478881836ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8960 prompt_cache_len:5151 prompt_cache_ratio:0.5748883928571429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 -DEBUG 06-24 20:17:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:08 [manager.py:224] router recive req id 8 cost time 0.10787105560302734 s -INFO 06-24 20:17:08 [manager.py:68] detokenization recv req id 8 cost time 0.10983920097351074 s -DEBUG 06-24 20:17:08 [manager.py:391] Prefill Batch: batch_id=151547087549632477647200108727397404230, time:1750767428.9915733s req_ids:[8] -DEBUG 06-24 20:17:08 [manager.py:391] -ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:08 lightllm_req_id:8 first_token_cost:203.5653591156006ms total_cost_time:203.62591743469238ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:8961 prompt_cache_len:5151 prompt_cache_ratio:0.5748242383662537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 -DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s -INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.11039876937866211 s -DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=143536032062813711634125537969059759941, time:1750767429.1996453s req_ids:[8] -DEBUG 06-24 20:17:09 [manager.py:391] -DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:09 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:203.8888931274414ms total_cost_time:203.9487361907959ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:8962 prompt_cache_len:5151 prompt_cache_ratio:0.5747600981923677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 -DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s -INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.10955548286437988 s -DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=327357447050371120880584020934600147968, time:1750767429.4100945s req_ids:[8] -DEBUG 06-24 20:17:09 [manager.py:391] -ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:207.26871490478516ms total_cost_time:207.33118057250977ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:8963 prompt_cache_len:5151 prompt_cache_ratio:0.5746959723306928 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 -DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10740828514099121 s -INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.10932064056396484 s -DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=317751659001078979533008830335795931463, time:1750767429.622811s req_ids:[8] -DEBUG 06-24 20:17:09 [manager.py:391] -ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:17:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 38204.903 tokens/s -DEBUG 06-24 20:17:09 [stats.py:37] Avg prompt tokens throughput: 38196.262 tokens/s -DEBUG 06-24 20:17:09 [stats.py:37] Avg generate tokens throughput: 8.641 tokens/s -INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:208.01329612731934ms total_cost_time:208.07361602783203ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:8964 prompt_cache_len:5151 prompt_cache_ratio:0.5746318607764391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 -DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:09 [manager.py:224] router recive req id 8 cost time 0.10842132568359375 s -INFO 06-24 20:17:09 [manager.py:68] detokenization recv req id 8 cost time 0.11028861999511719 s -DEBUG 06-24 20:17:09 [manager.py:391] Prefill Batch: batch_id=170109001635047167598307515247845342046, time:1750767429.834858s req_ids:[8] -DEBUG 06-24 20:17:09 [manager.py:391] -ERROR 06-24 20:17:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:207.65280723571777ms total_cost_time:207.71288871765137ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:8965 prompt_cache_len:5151 prompt_cache_ratio:0.5745677635248188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 -DEBUG 06-24 20:17:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.10710358619689941 s -INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.1092824935913086 s -DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=94941777096042333939239926973690463079, time:1750767430.0464866s req_ids:[8] -DEBUG 06-24 20:17:10 [manager.py:391] -ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:09 lightllm_req_id:8 first_token_cost:205.82008361816406ms total_cost_time:205.86299896240234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8966 prompt_cache_len:5151 prompt_cache_ratio:0.5745036805710462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 -DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s -INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.1100919246673584 s -DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=20948888814035641340630326814713687000, time:1750767430.2565775s req_ids:[8] -DEBUG 06-24 20:17:10 [manager.py:391] -ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:321.32434844970703ms total_cost_time:321.37036323547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:8967 prompt_cache_len:5151 prompt_cache_ratio:0.5744396119103379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 -DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.10753393173217773 s -DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=319780607465907028802153902613469241116, time:1750767430.571691s req_ids:[8] -DEBUG 06-24 20:17:10 [manager.py:391] -INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940790176391602 s -ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:178.66015434265137ms total_cost_time:178.70283126831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8968 prompt_cache_len:5151 prompt_cache_ratio:0.5743755575379126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 -DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.11011791229248047 s -INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.11207294464111328 s -DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=95237488466607486815177656623541483809, time:1750767430.7627072s req_ids:[8] -DEBUG 06-24 20:17:10 [manager.py:391] -ERROR 06-24 20:17:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:204.81610298156738ms total_cost_time:204.85877990722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8969 prompt_cache_len:5151 prompt_cache_ratio:0.574311517448991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 -DEBUG 06-24 20:17:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:10 [manager.py:224] router recive req id 8 cost time 0.1054530143737793 s -INFO 06-24 20:17:10 [manager.py:68] detokenization recv req id 8 cost time 0.10735964775085449 s -DEBUG 06-24 20:17:10 [manager.py:391] Prefill Batch: batch_id=194471783958277788296526476928783097899, time:1750767430.973444s req_ids:[8] -DEBUG 06-24 20:17:10 [manager.py:391] -ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:10 lightllm_req_id:8 first_token_cost:206.85553550720215ms total_cost_time:206.89916610717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:8970 prompt_cache_len:5151 prompt_cache_ratio:0.574247491638796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 -DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10900115966796875 s -INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.11103010177612305 s -DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=56372432435871421302477774095305757951, time:1750767431.1847541s req_ids:[8] -DEBUG 06-24 20:17:11 [manager.py:391] -ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:205.72185516357422ms total_cost_time:205.76763153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:8971 prompt_cache_len:5151 prompt_cache_ratio:0.5741834801025527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 -DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10796046257019043 s -INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.10991525650024414 s -DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=245126801952321825184825781904625560542, time:1750767431.3977723s req_ids:[8] -DEBUG 06-24 20:17:11 [manager.py:391] -ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:207.17167854309082ms total_cost_time:207.23247528076172ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:8972 prompt_cache_len:5151 prompt_cache_ratio:0.5741194828354882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 -DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10748577117919922 s -INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.10933446884155273 s -DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=320381136194951906015885340203962231488, time:1750767431.6124935s req_ids:[8] -DEBUG 06-24 20:17:11 [manager.py:391] -ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:167.47331619262695ms total_cost_time:167.51742362976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8973 prompt_cache_len:5151 prompt_cache_ratio:0.5740554998328319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 -DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10822081565856934 s -INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.11022782325744629 s -DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=326111066798307479613743618530441851660, time:1750767431.7862663s req_ids:[8] -DEBUG 06-24 20:17:11 [manager.py:391] -ERROR 06-24 20:17:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:203.48882675170898ms total_cost_time:203.5503387451172ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:8974 prompt_cache_len:5151 prompt_cache_ratio:0.573991531089815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 -DEBUG 06-24 20:17:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:11 [manager.py:224] router recive req id 8 cost time 0.10819005966186523 s -INFO 06-24 20:17:11 [manager.py:68] detokenization recv req id 8 cost time 0.11022138595581055 s -DEBUG 06-24 20:17:11 [manager.py:391] Prefill Batch: batch_id=25370381696809871478274298444054125041, time:1750767431.9942765s req_ids:[8] -DEBUG 06-24 20:17:11 [manager.py:391] -ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:11 lightllm_req_id:8 first_token_cost:207.35478401184082ms total_cost_time:207.3993682861328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:8975 prompt_cache_len:5151 prompt_cache_ratio:0.5739275766016713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 -DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.10856842994689941 s -INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.1110682487487793 s -DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=5168625437787586946947508355080883226, time:1750767432.208292s req_ids:[8] -DEBUG 06-24 20:17:12 [manager.py:391] -ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:362.560510635376ms total_cost_time:362.6229763031006ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:8976 prompt_cache_len:5151 prompt_cache_ratio:0.5738636363636364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 -DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.10741543769836426 s -INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.10935306549072266 s -DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=167974589616316682929243987443939473914, time:1750767432.573994s req_ids:[8] -DEBUG 06-24 20:17:12 [manager.py:391] -ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:201.59125328063965ms total_cost_time:201.65300369262695ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:8977 prompt_cache_len:5151 prompt_cache_ratio:0.573799710370948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 -DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.1091909408569336 s -INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.11119747161865234 s -DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=28282993139065527106023617924931212631, time:1750767432.7834418s req_ids:[8] -DEBUG 06-24 20:17:12 [manager.py:391] -ERROR 06-24 20:17:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:204.73766326904297ms total_cost_time:204.78034019470215ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:8978 prompt_cache_len:5151 prompt_cache_ratio:0.573735798618846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 -DEBUG 06-24 20:17:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:12 [manager.py:224] router recive req id 8 cost time 0.10888886451721191 s -INFO 06-24 20:17:12 [manager.py:68] detokenization recv req id 8 cost time 0.11105895042419434 s -DEBUG 06-24 20:17:12 [manager.py:391] Prefill Batch: batch_id=39209003267617238746486925946491960509, time:1750767432.9954407s req_ids:[8] -DEBUG 06-24 20:17:12 [manager.py:391] -ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:12 lightllm_req_id:8 first_token_cost:209.303617477417ms total_cost_time:209.34772491455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8979 prompt_cache_len:5151 prompt_cache_ratio:0.5736719011025727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 -DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.11058306694030762 s -INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.11206984519958496 s -DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=147854703863899957613959827721668984010, time:1750767433.2199225s req_ids:[8] -DEBUG 06-24 20:17:13 [manager.py:391] -ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:221.86756134033203ms total_cost_time:221.9560146331787ms,out_token_counter:1 mean_per_token_cost_time: 0.08845329284667969ms prompt_token_num:8980 prompt_cache_len:5151 prompt_cache_ratio:0.5736080178173719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 -DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.10795736312866211 s -INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.10993409156799316 s -DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=30966462781944354513156019909001183947, time:1750767433.436091s req_ids:[8] -DEBUG 06-24 20:17:13 [manager.py:391] -ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:207.66115188598633ms total_cost_time:207.70502090454102ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:8981 prompt_cache_len:5151 prompt_cache_ratio:0.5735441487584901 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 -DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.1087651252746582 s -INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.11070942878723145 s -DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=336216294804511645343909080101336282157, time:1750767433.6485589s req_ids:[8] -DEBUG 06-24 20:17:13 [manager.py:391] -ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:211.25388145446777ms total_cost_time:211.29679679870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8982 prompt_cache_len:5151 prompt_cache_ratio:0.5734802939211757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 -DEBUG 06-24 20:17:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:13 [manager.py:224] router recive req id 8 cost time 0.10705280303955078 s -INFO 06-24 20:17:13 [manager.py:68] detokenization recv req id 8 cost time 0.10893011093139648 s -DEBUG 06-24 20:17:13 [manager.py:391] Prefill Batch: batch_id=58390068932930900808980097322551633293, time:1750767433.8705475s req_ids:[8] -DEBUG 06-24 20:17:13 [manager.py:391] -ERROR 06-24 20:17:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:172.346830368042ms total_cost_time:172.38974571228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8983 prompt_cache_len:5151 prompt_cache_ratio:0.573416453300679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 -DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.31026673316955566 s -INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.3122293949127197 s -DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=236177969380402165124684040469374380689, time:1750767434.2561026s req_ids:[8] -DEBUG 06-24 20:17:14 [manager.py:391] -ERROR 06-24 20:17:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:13 lightllm_req_id:8 first_token_cost:422.3446846008301ms total_cost_time:422.38879203796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8984 prompt_cache_len:5151 prompt_cache_ratio:0.5733526268922529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 -DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.10797691345214844 s -INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.11008620262145996 s -DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=237143991917641885445852323576828894334, time:1750767434.474272s req_ids:[8] -DEBUG 06-24 20:17:14 [manager.py:391] -ERROR 06-24 20:17:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 first_token_cost:208.67109298706055ms total_cost_time:208.71400833129883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8985 prompt_cache_len:5151 prompt_cache_ratio:0.5732888146911519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 -DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.10833191871643066 s -INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.11031770706176758 s -DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=138984329150426342176995524047403727780, time:1750767434.6878843s req_ids:[8] -DEBUG 06-24 20:17:14 [manager.py:391] -ERROR 06-24 20:17:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 first_token_cost:220.61920166015625ms total_cost_time:220.66259384155273ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:8986 prompt_cache_len:5151 prompt_cache_ratio:0.573225016692633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 -DEBUG 06-24 20:17:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:14 [manager.py:224] router recive req id 8 cost time 0.10757899284362793 s -INFO 06-24 20:17:14 [manager.py:68] detokenization recv req id 8 cost time 0.10960054397583008 s -DEBUG 06-24 20:17:14 [manager.py:391] Prefill Batch: batch_id=269644690554574174219149255460480978028, time:1750767434.917639s req_ids:[8] -DEBUG 06-24 20:17:14 [manager.py:391] -DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:14 lightllm_req_id:8 first_token_cost:218.49632263183594ms total_cost_time:218.53876113891602ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:8987 prompt_cache_len:5151 prompt_cache_ratio:0.5731612328919551 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 -DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10743427276611328 s -INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10957050323486328 s -DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=102922138129445785441771554311907151348, time:1750767435.1353962s req_ids:[8] -DEBUG 06-24 20:17:15 [manager.py:391] -ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:208.7390422821045ms total_cost_time:208.7841033935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:8988 prompt_cache_len:5151 prompt_cache_ratio:0.5730974632843792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 -DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10801506042480469 s -INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10991835594177246 s -DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=291958425610691376673525108379897308714, time:1750767435.3505938s req_ids:[8] -DEBUG 06-24 20:17:15 [manager.py:391] -ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:202.94618606567383ms total_cost_time:203.0038833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:8989 prompt_cache_len:5151 prompt_cache_ratio:0.5730337078651685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 -DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10979413986206055 s -INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.11176419258117676 s -DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=320846410685894208290744524425102739160, time:1750767435.561704s req_ids:[8] -DEBUG 06-24 20:17:15 [manager.py:391] -ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:206.6805362701416ms total_cost_time:206.7253589630127ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:8990 prompt_cache_len:5151 prompt_cache_ratio:0.5729699666295884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 -DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10731959342956543 s -INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10929369926452637 s -DEBUG 06-24 20:17:15 [manager.py:391] Prefill Batch: batch_id=319386890125927505938048275963745384882, time:1750767435.7882884s req_ids:[8] -DEBUG 06-24 20:17:15 [manager.py:391] -ERROR 06-24 20:17:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:222.58663177490234ms total_cost_time:222.62954711914062ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8991 prompt_cache_len:5151 prompt_cache_ratio:0.5729062395729062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 -DEBUG 06-24 20:17:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:15 [manager.py:224] router recive req id 8 cost time 0.10774660110473633 s -INFO 06-24 20:17:15 [manager.py:68] detokenization recv req id 8 cost time 0.10964441299438477 s -DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=195956202944059190015444435539391641456, time:1750767436.0037181s req_ids:[8] -DEBUG 06-24 20:17:16 [manager.py:391] -ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:15 lightllm_req_id:8 first_token_cost:382.1301460266113ms total_cost_time:382.1756839752197ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:8992 prompt_cache_len:5151 prompt_cache_ratio:0.5728425266903915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 -DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.10847806930541992 s -INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.1104423999786377 s -DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=49357955995182287461900278190609818441, time:1750767436.3856146s req_ids:[8] -DEBUG 06-24 20:17:16 [manager.py:391] -ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:213.00888061523438ms total_cost_time:213.05298805236816ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:8993 prompt_cache_len:5151 prompt_cache_ratio:0.5727788279773157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 -DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:16 [batch.py:51] router release req id 8 -DEBUG 06-24 20:17:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:16 [manager.py:283] -DEBUG 06-24 20:17:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:16 [manager.py:284] -INFO 06-24 20:17:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.10714864730834961 s -INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.10892081260681152 s -DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=84574821323557679604401101532113065562, time:1750767436.6248374s req_ids:[8] -DEBUG 06-24 20:17:16 [manager.py:391] -ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:185.96982955932617ms total_cost_time:186.03110313415527ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:8994 prompt_cache_len:5151 prompt_cache_ratio:0.5727151434289527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 -DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.1082773208618164 s -INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.11006617546081543 s -DEBUG 06-24 20:17:16 [manager.py:391] Prefill Batch: batch_id=140785608006063803498228717442618505207, time:1750767436.800333s req_ids:[8] -DEBUG 06-24 20:17:16 [manager.py:391] -ERROR 06-24 20:17:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:197.953462600708ms total_cost_time:198.014497756958ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:8995 prompt_cache_len:5151 prompt_cache_ratio:0.5726514730405781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 -INFO 06-24 20:17:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:17:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:16 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s -INFO 06-24 20:17:16 [manager.py:68] detokenization recv req id 8 cost time 0.11030411720275879 s -DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=39291744285264861009561210828910134939, time:1750767437.0102444s req_ids:[8] -DEBUG 06-24 20:17:17 [manager.py:391] -ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:16 lightllm_req_id:8 first_token_cost:211.2751007080078ms total_cost_time:211.33732795715332ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:8996 prompt_cache_len:5151 prompt_cache_ratio:0.57258781680747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 -DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.1082315444946289 s -INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10998821258544922 s -DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=216708081498569652965055329754715418135, time:1750767437.2232928s req_ids:[8] -DEBUG 06-24 20:17:17 [manager.py:391] -ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:179.1555881500244ms total_cost_time:179.1985034942627ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:8997 prompt_cache_len:5151 prompt_cache_ratio:0.5725241747249084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 -DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.10799098014831543 s -INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10960793495178223 s -DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=304732051042790933190058072696307991216, time:1750767437.4154117s req_ids:[8] -DEBUG 06-24 20:17:17 [manager.py:391] -ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:175.51207542419434ms total_cost_time:175.55522918701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:8998 prompt_cache_len:5151 prompt_cache_ratio:0.5724605467881752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 -DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.1068735122680664 s -INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10867500305175781 s -DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=257843741810405947796855847109888483563, time:1750767437.597309s req_ids:[8] -DEBUG 06-24 20:17:17 [manager.py:391] -ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:187.91961669921875ms total_cost_time:187.96181678771973ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:8999 prompt_cache_len:5151 prompt_cache_ratio:0.5723969329925547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 -DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.10790395736694336 s -INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.10962176322937012 s -DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=15663230959827011995206672374632577466, time:1750767437.7875779s req_ids:[8] -DEBUG 06-24 20:17:17 [manager.py:391] -ERROR 06-24 20:17:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:184.26966667175293ms total_cost_time:184.3111515045166ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:9000 prompt_cache_len:5151 prompt_cache_ratio:0.5723333333333334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 -DEBUG 06-24 20:17:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:17 [manager.py:224] router recive req id 8 cost time 0.10826230049133301 s -INFO 06-24 20:17:17 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s -DEBUG 06-24 20:17:17 [manager.py:391] Prefill Batch: batch_id=62629272663282118140314877088396506222, time:1750767437.9801936s req_ids:[8] -DEBUG 06-24 20:17:17 [manager.py:391] -ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:17 lightllm_req_id:8 first_token_cost:365.93103408813477ms total_cost_time:365.97442626953125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9001 prompt_cache_len:5151 prompt_cache_ratio:0.5722697478057993 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 -DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s -INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.10983872413635254 s -DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=85980072883571051902589798313651817770, time:1750767438.3426404s req_ids:[8] -DEBUG 06-24 20:17:18 [manager.py:391] -ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:184.10015106201172ms total_cost_time:184.1440200805664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9002 prompt_cache_len:5151 prompt_cache_ratio:0.5722061764052433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 -DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10875177383422852 s -INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.11060881614685059 s -DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=152755467072835928158139155363844326983, time:1750767438.5326786s req_ids:[8] -DEBUG 06-24 20:17:18 [manager.py:391] -ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:196.08235359191895ms total_cost_time:196.14124298095703ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9003 prompt_cache_len:5151 prompt_cache_ratio:0.5721426191269576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 -DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10917854309082031 s -INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.11104226112365723 s -DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=241118503997975820102745188354966991075, time:1750767438.7390392s req_ids:[8] -DEBUG 06-24 20:17:18 [manager.py:391] -ERROR 06-24 20:17:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:200.46114921569824ms total_cost_time:200.52146911621094ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9004 prompt_cache_len:5151 prompt_cache_ratio:0.5720790759662372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 -DEBUG 06-24 20:17:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:18 [manager.py:224] router recive req id 8 cost time 0.10772991180419922 s -INFO 06-24 20:17:18 [manager.py:68] detokenization recv req id 8 cost time 0.10966253280639648 s -DEBUG 06-24 20:17:18 [manager.py:391] Prefill Batch: batch_id=33147665566299173786912972765177007072, time:1750767438.949131s req_ids:[8] -DEBUG 06-24 20:17:18 [manager.py:391] -ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:18 lightllm_req_id:8 first_token_cost:205.10411262512207ms total_cost_time:205.16633987426758ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:9005 prompt_cache_len:5151 prompt_cache_ratio:0.5720155469183786 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 -DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.1074063777923584 s -INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.10925436019897461 s -DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=128409507262889350094567875217795717011, time:1750767439.1589413s req_ids:[8] -DEBUG 06-24 20:17:19 [manager.py:391] -ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:207.12590217590332ms total_cost_time:207.1704864501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9006 prompt_cache_len:5151 prompt_cache_ratio:0.5719520319786808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 -DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.10898900032043457 s -INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.11092376708984375 s -DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=136934274041532793254251756157230925124, time:1750767439.3780718s req_ids:[8] -DEBUG 06-24 20:17:19 [manager.py:391] -ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:213.0272388458252ms total_cost_time:213.0897045135498ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:9007 prompt_cache_len:5151 prompt_cache_ratio:0.5718885311424448 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 -DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.10892391204833984 s -INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.11095309257507324 s -DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=249250275298257318043637550316592501130, time:1750767439.5923343s req_ids:[8] -DEBUG 06-24 20:17:19 [manager.py:391] -ERROR 06-24 20:17:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:210.15286445617676ms total_cost_time:210.19577980041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9008 prompt_cache_len:5151 prompt_cache_ratio:0.5718250444049734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 -DEBUG 06-24 20:17:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:19 [manager.py:224] router recive req id 8 cost time 0.1071922779083252 s -INFO 06-24 20:17:19 [manager.py:68] detokenization recv req id 8 cost time 0.10924553871154785 s -DEBUG 06-24 20:17:19 [manager.py:391] Prefill Batch: batch_id=198666066097588211875632792731312869956, time:1750767439.8079598s req_ids:[8] -DEBUG 06-24 20:17:19 [manager.py:391] -DEBUG 06-24 20:17:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 40054.030 tokens/s -DEBUG 06-24 20:17:19 [stats.py:37] Avg prompt tokens throughput: 40045.218 tokens/s -DEBUG 06-24 20:17:19 [stats.py:37] Avg generate tokens throughput: 8.813 tokens/s -ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:19 lightllm_req_id:8 first_token_cost:381.58631324768066ms total_cost_time:381.63161277770996ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9009 prompt_cache_len:5151 prompt_cache_ratio:0.5717615717615717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 -DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10896515846252441 s -INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.11098122596740723 s -DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=198733991319755332806791202154025012942, time:1750767440.201033s req_ids:[8] -DEBUG 06-24 20:17:20 [manager.py:391] -ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:210.06011962890625ms total_cost_time:210.10541915893555ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9010 prompt_cache_len:5151 prompt_cache_ratio:0.5716981132075472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 -DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10785126686096191 s -INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.10985112190246582 s -DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=133655503512312833281338068472638587025, time:1750767440.41831s req_ids:[8] -DEBUG 06-24 20:17:20 [manager.py:391] -ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:208.64486694335938ms total_cost_time:208.68897438049316ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9011 prompt_cache_len:5151 prompt_cache_ratio:0.5716346687382089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 -DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10863208770751953 s -INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.11052298545837402 s -DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=140633077150729094966817065045580390780, time:1750767440.6324441s req_ids:[8] -DEBUG 06-24 20:17:20 [manager.py:391] -DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:207.54528045654297ms total_cost_time:207.58962631225586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9012 prompt_cache_len:5151 prompt_cache_ratio:0.5715712383488681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 -DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:20 [manager.py:224] router recive req id 8 cost time 0.10838818550109863 s -INFO 06-24 20:17:20 [manager.py:68] detokenization recv req id 8 cost time 0.1103830337524414 s -DEBUG 06-24 20:17:20 [manager.py:391] Prefill Batch: batch_id=151872465838131387182803064336637216896, time:1750767440.8476827s req_ids:[8] -DEBUG 06-24 20:17:20 [manager.py:391] -ERROR 06-24 20:17:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:206.51817321777344ms total_cost_time:206.56371116638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9013 prompt_cache_len:5151 prompt_cache_ratio:0.5715078220348385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 -DEBUG 06-24 20:17:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10859489440917969 s -INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.11052942276000977 s -DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=1215350245682702007953321924069838026, time:1750767441.0620859s req_ids:[8] -DEBUG 06-24 20:17:21 [manager.py:391] -ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:20 lightllm_req_id:8 first_token_cost:208.1902027130127ms total_cost_time:208.235502243042ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9014 prompt_cache_len:5151 prompt_cache_ratio:0.5714444197914356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 -DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10790085792541504 s -INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.1098184585571289 s -DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=200057129046233163701636388622520631242, time:1750767441.2764268s req_ids:[8] -DEBUG 06-24 20:17:21 [manager.py:391] -ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:209.41996574401855ms total_cost_time:209.46598052978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9015 prompt_cache_len:5151 prompt_cache_ratio:0.5713810316139767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 -DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10886144638061523 s -INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.11093521118164062 s -DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=19280522459072846009628019658793759557, time:1750767441.49288s req_ids:[8] -DEBUG 06-24 20:17:21 [manager.py:391] -ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:204.79059219360352ms total_cost_time:204.833984375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9016 prompt_cache_len:5151 prompt_cache_ratio:0.5713176574977817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 -DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10880160331726074 s -INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.11057925224304199 s -DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=281370543332253051238967638396506410158, time:1750767441.7012935s req_ids:[8] -DEBUG 06-24 20:17:21 [manager.py:391] -ERROR 06-24 20:17:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:205.53207397460938ms total_cost_time:205.57570457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9017 prompt_cache_len:5151 prompt_cache_ratio:0.5712542974381724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 -DEBUG 06-24 20:17:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:21 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s -INFO 06-24 20:17:21 [manager.py:68] detokenization recv req id 8 cost time 0.1105349063873291 s -DEBUG 06-24 20:17:21 [manager.py:391] Prefill Batch: batch_id=112737093018953084253985439686167327786, time:1750767441.9127367s req_ids:[8] -DEBUG 06-24 20:17:21 [manager.py:391] -ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:21 lightllm_req_id:8 first_token_cost:380.78808784484863ms total_cost_time:380.83386421203613ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9018 prompt_cache_len:5151 prompt_cache_ratio:0.5711909514304724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 -DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.1091303825378418 s -INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.11105918884277344 s -DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=156059677279708698171622819195425621317, time:1750767442.298076s req_ids:[8] -DEBUG 06-24 20:17:22 [manager.py:391] -ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:206.1631679534912ms total_cost_time:206.2084674835205ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9019 prompt_cache_len:5151 prompt_cache_ratio:0.5711276194700078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 -DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.1071929931640625 s -INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.1091768741607666 s -DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=120376976019836288580240986842697140808, time:1750767442.511536s req_ids:[8] -DEBUG 06-24 20:17:22 [manager.py:391] -ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:211.49635314941406ms total_cost_time:211.54260635375977ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9020 prompt_cache_len:5151 prompt_cache_ratio:0.5710643015521064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 -DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.10979819297790527 s -INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.11175346374511719 s -DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=183790117633128580297676593823407639860, time:1750767442.7292178s req_ids:[8] -DEBUG 06-24 20:17:22 [manager.py:391] -ERROR 06-24 20:17:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:205.31415939331055ms total_cost_time:205.3697109222412ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:9021 prompt_cache_len:5151 prompt_cache_ratio:0.5710009976720984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 -DEBUG 06-24 20:17:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:22 [manager.py:224] router recive req id 8 cost time 0.10855817794799805 s -INFO 06-24 20:17:22 [manager.py:68] detokenization recv req id 8 cost time 0.11048388481140137 s -DEBUG 06-24 20:17:22 [manager.py:391] Prefill Batch: batch_id=195418052297677259974204386944888487026, time:1750767442.940089s req_ids:[8] -DEBUG 06-24 20:17:22 [manager.py:391] -ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:22 lightllm_req_id:8 first_token_cost:204.77294921875ms total_cost_time:204.8165798187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9022 prompt_cache_len:5151 prompt_cache_ratio:0.5709377078253159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 -DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10881948471069336 s -INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.11065077781677246 s -DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=1757326764188527763622896894796764475, time:1750767443.154273s req_ids:[8] -DEBUG 06-24 20:17:23 [manager.py:391] -INFO 06-24 20:17:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:213.08016777038574ms total_cost_time:213.12355995178223ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9023 prompt_cache_len:5151 prompt_cache_ratio:0.570874432007093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 -DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10898113250732422 s -INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.11083245277404785 s -DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=246196932323326183899368089162140857770, time:1750767443.3705597s req_ids:[8] -DEBUG 06-24 20:17:23 [manager.py:391] -ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:206.06732368469238ms total_cost_time:206.11166954040527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9024 prompt_cache_len:5151 prompt_cache_ratio:0.570811170212766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 -DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10822367668151855 s -INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.1103522777557373 s -DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=195846588865691015174616264513569790305, time:1750767443.5838308s req_ids:[8] -DEBUG 06-24 20:17:23 [manager.py:391] -ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:214.4794464111328ms total_cost_time:214.5226001739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9025 prompt_cache_len:5151 prompt_cache_ratio:0.5707479224376731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 -DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:23 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s -INFO 06-24 20:17:23 [manager.py:68] detokenization recv req id 8 cost time 0.10938382148742676 s -DEBUG 06-24 20:17:23 [manager.py:391] Prefill Batch: batch_id=21168327506130646957229349341382917958, time:1750767443.8168314s req_ids:[8] -DEBUG 06-24 20:17:23 [manager.py:391] -ERROR 06-24 20:17:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:222.15700149536133ms total_cost_time:222.1992015838623ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9026 prompt_cache_len:5151 prompt_cache_ratio:0.5706846886771549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 -DEBUG 06-24 20:17:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.1083230972290039 s -INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.1103212833404541 s -DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=255905150786887199098845183028234185825, time:1750767444.0340273s req_ids:[8] -DEBUG 06-24 20:17:24 [manager.py:391] -ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:23 lightllm_req_id:8 first_token_cost:363.8937473297119ms total_cost_time:363.95716667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:9027 prompt_cache_len:5151 prompt_cache_ratio:0.5706214689265536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 -DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.10850405693054199 s -INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.11040639877319336 s -DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=182285771540374548568624860982144453646, time:1750767444.4017656s req_ids:[8] -DEBUG 06-24 20:17:24 [manager.py:391] -ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:206.1941623687744ms total_cost_time:206.2368392944336ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9028 prompt_cache_len:5151 prompt_cache_ratio:0.570558263181214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 -DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.10766720771789551 s -INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.10957884788513184 s -DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=24169608865987072195238911802103291561, time:1750767444.6247532s req_ids:[8] -DEBUG 06-24 20:17:24 [manager.py:391] -ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:212.71681785583496ms total_cost_time:212.75925636291504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9029 prompt_cache_len:5151 prompt_cache_ratio:0.5704950714364825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 -DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:24 [manager.py:224] router recive req id 8 cost time 0.10879993438720703 s -INFO 06-24 20:17:24 [manager.py:68] detokenization recv req id 8 cost time 0.11068439483642578 s -DEBUG 06-24 20:17:24 [manager.py:391] Prefill Batch: batch_id=291012521357822056089159012780735255280, time:1750767444.846235s req_ids:[8] -DEBUG 06-24 20:17:24 [manager.py:391] -ERROR 06-24 20:17:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:214.83206748962402ms total_cost_time:214.87712860107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9030 prompt_cache_len:5151 prompt_cache_ratio:0.5704318936877076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 -DEBUG 06-24 20:17:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10991501808166504 s -INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.11185383796691895 s -DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=106195945024620536298013656357598419643, time:1750767445.057377s req_ids:[8] -DEBUG 06-24 20:17:25 [manager.py:391] -ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:24 lightllm_req_id:8 first_token_cost:203.89604568481445ms total_cost_time:203.94086837768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9031 prompt_cache_len:5151 prompt_cache_ratio:0.5703687299302402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 -DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10819649696350098 s -INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.11005353927612305 s -DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=143383355291374166620225071413529534936, time:1750767445.2665021s req_ids:[8] -DEBUG 06-24 20:17:25 [manager.py:391] -ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:205.02924919128418ms total_cost_time:205.0769329071045ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:9032 prompt_cache_len:5151 prompt_cache_ratio:0.5703055801594331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 -DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10988163948059082 s -INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.11184239387512207 s -DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=212993884098863265608577302874415928432, time:1750767445.4804173s req_ids:[8] -DEBUG 06-24 20:17:25 [manager.py:391] -ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:205.04188537597656ms total_cost_time:205.05952835083008ms,out_token_counter:1 mean_per_token_cost_time: 0.017642974853515625ms prompt_token_num:9033 prompt_cache_len:5151 prompt_cache_ratio:0.570242444370641 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 -DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:25 [manager.py:224] router recive req id 8 cost time 0.10689139366149902 s -INFO 06-24 20:17:25 [manager.py:68] detokenization recv req id 8 cost time 0.10867929458618164 s -DEBUG 06-24 20:17:25 [manager.py:391] Prefill Batch: batch_id=125589872394213683625029000943628122230, time:1750767445.6948245s req_ids:[8] -DEBUG 06-24 20:17:25 [manager.py:391] -ERROR 06-24 20:17:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:216.00866317749023ms total_cost_time:216.05229377746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9034 prompt_cache_len:5151 prompt_cache_ratio:0.5701793225592208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 -DEBUG 06-24 20:17:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.3091757297515869 s -INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.3111307621002197 s -DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=108651578238852569605351403456609193733, time:1750767446.1142063s req_ids:[8] -DEBUG 06-24 20:17:26 [manager.py:391] -ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:25 lightllm_req_id:8 first_token_cost:418.5481071472168ms total_cost_time:418.5929298400879ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9035 prompt_cache_len:5151 prompt_cache_ratio:0.5701162147205313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 -DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s -INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.10962319374084473 s -DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=62901160930639870454073417731847839307, time:1750767446.3348627s req_ids:[8] -DEBUG 06-24 20:17:26 [manager.py:391] -ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:202.03042030334473ms total_cost_time:202.09312438964844ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:9036 prompt_cache_len:5151 prompt_cache_ratio:0.5700531208499336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 -DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.10986542701721191 s -INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.11174917221069336 s -DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=86593469892047858363957617796270065255, time:1750767446.54246s req_ids:[8] -DEBUG 06-24 20:17:26 [manager.py:391] -ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:208.26077461242676ms total_cost_time:208.30416679382324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9037 prompt_cache_len:5151 prompt_cache_ratio:0.5699900409427907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 -DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.1077268123626709 s -INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s -DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=254274687396927669781078363049304102166, time:1750767446.7533488s req_ids:[8] -DEBUG 06-24 20:17:26 [manager.py:391] -ERROR 06-24 20:17:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:165.28844833374023ms total_cost_time:165.33517837524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9038 prompt_cache_len:5151 prompt_cache_ratio:0.5699269749944678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 -DEBUG 06-24 20:17:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:26 [manager.py:224] router recive req id 8 cost time 0.109405517578125 s -INFO 06-24 20:17:26 [manager.py:68] detokenization recv req id 8 cost time 0.1112985610961914 s -DEBUG 06-24 20:17:26 [manager.py:391] Prefill Batch: batch_id=172830358942321650316837731707075342368, time:1750767446.9250538s req_ids:[8] -DEBUG 06-24 20:17:26 [manager.py:391] -ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:26 lightllm_req_id:8 first_token_cost:195.87278366088867ms total_cost_time:195.91736793518066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9039 prompt_cache_len:5151 prompt_cache_ratio:0.5698639230003318 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 -DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.1081991195678711 s -INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.11019468307495117 s -DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=184066609769409526265405192645799088222, time:1750767447.126606s req_ids:[8] -DEBUG 06-24 20:17:27 [manager.py:391] -ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:200.03056526184082ms total_cost_time:200.0751495361328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9040 prompt_cache_len:5151 prompt_cache_ratio:0.5698008849557522 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 -DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.10763955116271973 s -INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.10964417457580566 s -DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=309811716278438147874745734196503976459, time:1750767447.3448002s req_ids:[8] -DEBUG 06-24 20:17:27 [manager.py:391] -ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:214.28823471069336ms total_cost_time:214.33234214782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9041 prompt_cache_len:5151 prompt_cache_ratio:0.5697378608561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 -DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.10972857475280762 s -INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.11169075965881348 s -DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=171341820387311616198659689439275757973, time:1750767447.5529075s req_ids:[8] -DEBUG 06-24 20:17:27 [manager.py:391] -ERROR 06-24 20:17:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:206.89082145690918ms total_cost_time:206.93612098693848ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9042 prompt_cache_len:5151 prompt_cache_ratio:0.5696748506967485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 -DEBUG 06-24 20:17:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:27 [manager.py:224] router recive req id 8 cost time 0.10792064666748047 s -INFO 06-24 20:17:27 [manager.py:68] detokenization recv req id 8 cost time 0.10989713668823242 s -DEBUG 06-24 20:17:27 [manager.py:391] Prefill Batch: batch_id=204175858112969646678721911364068548984, time:1750767447.765457s req_ids:[8] -DEBUG 06-24 20:17:27 [manager.py:391] -DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:27 lightllm_req_id:8 first_token_cost:372.9212284088135ms total_cost_time:372.9667663574219ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9043 prompt_cache_len:5151 prompt_cache_ratio:0.569611854473073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 -DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10844302177429199 s -INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11043357849121094 s -DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=102148160034655764285792572463043678115, time:1750767448.1383677s req_ids:[8] -DEBUG 06-24 20:17:28 [manager.py:391] -ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:207.23581314086914ms total_cost_time:207.27968215942383ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9044 prompt_cache_len:5151 prompt_cache_ratio:0.5695488721804511 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 -DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s -INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11036992073059082 s -DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=294823730050369668290038911640181721084, time:1750767448.3540196s req_ids:[8] -DEBUG 06-24 20:17:28 [manager.py:391] -ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:207.2749137878418ms total_cost_time:207.3192596435547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9045 prompt_cache_len:5151 prompt_cache_ratio:0.569485903814262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 -DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10800004005432129 s -INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s -DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=121740804808151401357241240308012416184, time:1750767448.5651655s req_ids:[8] -DEBUG 06-24 20:17:28 [manager.py:391] -ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:205.33251762390137ms total_cost_time:205.37686347961426ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9046 prompt_cache_len:5151 prompt_cache_ratio:0.5694229493698872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 -DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.1090395450592041 s -INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11165952682495117 s -DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=22273556943361234544611201048355700871, time:1750767448.777193s req_ids:[8] -DEBUG 06-24 20:17:28 [manager.py:391] -ERROR 06-24 20:17:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:203.34672927856445ms total_cost_time:203.39059829711914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9047 prompt_cache_len:5151 prompt_cache_ratio:0.5693600088427103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 -DEBUG 06-24 20:17:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:28 [manager.py:224] router recive req id 8 cost time 0.10867571830749512 s -INFO 06-24 20:17:28 [manager.py:68] detokenization recv req id 8 cost time 0.11060976982116699 s -DEBUG 06-24 20:17:28 [manager.py:391] Prefill Batch: batch_id=176365281971759274054800114141409402130, time:1750767448.9841177s req_ids:[8] -DEBUG 06-24 20:17:28 [manager.py:391] -ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:28 lightllm_req_id:8 first_token_cost:205.93738555908203ms total_cost_time:205.98173141479492ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9048 prompt_cache_len:5151 prompt_cache_ratio:0.5692970822281167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 -DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.106719970703125 s -INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.10845565795898438 s -DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=69539051245927013469841173789718370101, time:1750767449.1968307s req_ids:[8] -DEBUG 06-24 20:17:29 [manager.py:391] -ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:166.42332077026367ms total_cost_time:166.46432876586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9049 prompt_cache_len:5151 prompt_cache_ratio:0.5692341695214941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 -DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.10751485824584961 s -INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.10939741134643555 s -DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=248696540663083424477640112466331200452, time:1750767449.3686295s req_ids:[8] -DEBUG 06-24 20:17:29 [manager.py:391] -ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:202.26216316223145ms total_cost_time:202.30555534362793ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9050 prompt_cache_len:5151 prompt_cache_ratio:0.569171270718232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 -DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.10850191116333008 s -INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.11038804054260254 s -DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=90237852150894263542821758350988211241, time:1750767449.5745094s req_ids:[8] -DEBUG 06-24 20:17:29 [manager.py:391] -ERROR 06-24 20:17:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:205.17325401306152ms total_cost_time:205.2159309387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9051 prompt_cache_len:5151 prompt_cache_ratio:0.5691083858137223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 -DEBUG 06-24 20:17:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:29 [manager.py:224] router recive req id 8 cost time 0.10884952545166016 s -INFO 06-24 20:17:29 [manager.py:68] detokenization recv req id 8 cost time 0.11072039604187012 s -DEBUG 06-24 20:17:29 [manager.py:391] Prefill Batch: batch_id=112798261252937456210403558858631971237, time:1750767449.784551s req_ids:[8] -DEBUG 06-24 20:17:29 [manager.py:391] -ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:17:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 37959.698 tokens/s -DEBUG 06-24 20:17:30 [stats.py:37] Avg prompt tokens throughput: 37951.196 tokens/s -DEBUG 06-24 20:17:30 [stats.py:37] Avg generate tokens throughput: 8.502 tokens/s -INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:29 lightllm_req_id:8 first_token_cost:374.25947189331055ms total_cost_time:374.30334091186523ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9052 prompt_cache_len:5151 prompt_cache_ratio:0.5690455148033584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 -DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10845637321472168 s -INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.11043190956115723 s -DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=46172499855835102535638605376491679381, time:1750767450.1641493s req_ids:[8] -DEBUG 06-24 20:17:30 [manager.py:391] -ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:207.9174518585205ms total_cost_time:207.95965194702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9053 prompt_cache_len:5151 prompt_cache_ratio:0.5689826576825362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 -DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10796356201171875 s -INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.10982584953308105 s -DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=150432740938159030885871041910383665118, time:1750767450.3774915s req_ids:[8] -DEBUG 06-24 20:17:30 [manager.py:391] -ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:201.35855674743652ms total_cost_time:201.4012336730957ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9054 prompt_cache_len:5151 prompt_cache_ratio:0.5689198144466534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 -DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10838580131530762 s -INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.11031365394592285 s -DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=263058377155870401741826142622502305159, time:1750767450.5855207s req_ids:[8] -DEBUG 06-24 20:17:30 [manager.py:391] -ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:205.00802993774414ms total_cost_time:205.05023002624512ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9055 prompt_cache_len:5151 prompt_cache_ratio:0.5688569850911099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 -DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:30 [batch.py:51] router release req id 8 -INFO 06-24 20:17:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:30 [manager.py:224] router recive req id 8 cost time 0.10722827911376953 s -INFO 06-24 20:17:30 [manager.py:68] detokenization recv req id 8 cost time 0.1092989444732666 s -DEBUG 06-24 20:17:30 [manager.py:391] Prefill Batch: batch_id=230652721732071936982121543856970753777, time:1750767450.7956138s req_ids:[8] -DEBUG 06-24 20:17:30 [manager.py:391] -ERROR 06-24 20:17:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:210.04891395568848ms total_cost_time:210.09159088134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9056 prompt_cache_len:5151 prompt_cache_ratio:0.5687941696113075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 -DEBUG 06-24 20:17:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10843086242675781 s -INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.11037874221801758 s -DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=278207321136303310530199541732912331009, time:1750767451.0104725s req_ids:[8] -DEBUG 06-24 20:17:31 [manager.py:391] -ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:30 lightllm_req_id:8 first_token_cost:203.37390899658203ms total_cost_time:203.4165859222412ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9057 prompt_cache_len:5151 prompt_cache_ratio:0.5687313680026499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 -DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10772299766540527 s -INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.10971379280090332 s -DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=155752698134876873499115911755912711820, time:1750767451.2209694s req_ids:[8] -DEBUG 06-24 20:17:31 [manager.py:391] -ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:206.41136169433594ms total_cost_time:206.45427703857422ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9058 prompt_cache_len:5151 prompt_cache_ratio:0.5686685802605431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 -DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10894918441772461 s -INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.11092782020568848 s -DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=143443123846673624039053526833371009288, time:1750767451.4346554s req_ids:[8] -DEBUG 06-24 20:17:31 [manager.py:391] -ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:207.75938034057617ms total_cost_time:207.80134201049805ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9059 prompt_cache_len:5151 prompt_cache_ratio:0.5686058063803952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 -DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:31 [manager.py:224] router recive req id 8 cost time 0.10797429084777832 s -INFO 06-24 20:17:31 [manager.py:68] detokenization recv req id 8 cost time 0.10985302925109863 s -DEBUG 06-24 20:17:31 [manager.py:391] Prefill Batch: batch_id=282167227268283711297495119859275404138, time:1750767451.6453416s req_ids:[8] -DEBUG 06-24 20:17:31 [manager.py:391] -ERROR 06-24 20:17:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:366.6999340057373ms total_cost_time:366.741418838501ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:9060 prompt_cache_len:5151 prompt_cache_ratio:0.5685430463576159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 -DEBUG 06-24 20:17:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.1090693473815918 s -INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.11113476753234863 s -DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=240630527962494075863602263035231183536, time:1750767452.0170205s req_ids:[8] -DEBUG 06-24 20:17:32 [manager.py:391] -ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:31 lightllm_req_id:8 first_token_cost:197.45516777038574ms total_cost_time:197.49760627746582ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9061 prompt_cache_len:5151 prompt_cache_ratio:0.5684803001876173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 -DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10878276824951172 s -INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.11053228378295898 s -DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=79683932674048073758332838157254158076, time:1750767452.229957s req_ids:[8] -DEBUG 06-24 20:17:32 [manager.py:391] -ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:176.06449127197266ms total_cost_time:176.10621452331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9062 prompt_cache_len:5151 prompt_cache_ratio:0.5684175678658133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 -DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10784244537353516 s -INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10953879356384277 s -DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=130564116145833584033082582601019075580, time:1750767452.4035175s req_ids:[8] -DEBUG 06-24 20:17:32 [manager.py:391] -ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:163.3918285369873ms total_cost_time:163.4347438812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9063 prompt_cache_len:5151 prompt_cache_ratio:0.56835484938762 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 -DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s -INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10952544212341309 s -DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=234526844808907882324166229756105218512, time:1750767452.5742404s req_ids:[8] -DEBUG 06-24 20:17:32 [manager.py:391] -ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:194.0765380859375ms total_cost_time:194.1208839416504ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9064 prompt_cache_len:5151 prompt_cache_ratio:0.5682921447484555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 -DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10748624801635742 s -INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10972380638122559 s -DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=119075549861745558479836765483475474716, time:1750767452.773023s req_ids:[8] -DEBUG 06-24 20:17:32 [manager.py:391] -ERROR 06-24 20:17:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:200.042724609375ms total_cost_time:200.08468627929688ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9065 prompt_cache_len:5151 prompt_cache_ratio:0.5682294539437397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 -DEBUG 06-24 20:17:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:32 [manager.py:224] router recive req id 8 cost time 0.10761618614196777 s -INFO 06-24 20:17:32 [manager.py:68] detokenization recv req id 8 cost time 0.10954093933105469 s -DEBUG 06-24 20:17:32 [manager.py:391] Prefill Batch: batch_id=162918449311404895902465367164190345699, time:1750767452.9807253s req_ids:[8] -DEBUG 06-24 20:17:32 [manager.py:391] -ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:32 lightllm_req_id:8 first_token_cost:203.20534706115723ms total_cost_time:203.24969291687012ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9066 prompt_cache_len:5151 prompt_cache_ratio:0.5681667769688947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 -DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.10988783836364746 s -INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.11181783676147461 s -DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=146179967116824440059055427832698202671, time:1750767453.1911597s req_ids:[8] -DEBUG 06-24 20:17:33 [manager.py:391] -ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:205.30033111572266ms total_cost_time:205.38902282714844ms,out_token_counter:1 mean_per_token_cost_time: 0.08869171142578125ms prompt_token_num:9067 prompt_cache_len:5151 prompt_cache_ratio:0.5681041138193449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 -DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.1073455810546875 s -INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.10901021957397461 s -DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=144658769682933511972550424461314205895, time:1750767453.4035618s req_ids:[8] -DEBUG 06-24 20:17:33 [manager.py:391] -DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:205.8427333831787ms total_cost_time:205.8861255645752ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9068 prompt_cache_len:5151 prompt_cache_ratio:0.5680414644905161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 -DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.10862994194030762 s -INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.11048126220703125 s -DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=211262395551471538955593498382367406063, time:1750767453.6156483s req_ids:[8] -DEBUG 06-24 20:17:33 [manager.py:391] -ERROR 06-24 20:17:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:367.1834468841553ms total_cost_time:367.22660064697266ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9069 prompt_cache_len:5151 prompt_cache_ratio:0.5679788289778366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 -DEBUG 06-24 20:17:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:33 [manager.py:224] router recive req id 8 cost time 0.10819196701049805 s -INFO 06-24 20:17:33 [manager.py:68] detokenization recv req id 8 cost time 0.10995149612426758 s -DEBUG 06-24 20:17:33 [manager.py:391] Prefill Batch: batch_id=107820975240906965561470660412339631837, time:1750767453.9860559s req_ids:[8] -DEBUG 06-24 20:17:33 [manager.py:391] -ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:33 lightllm_req_id:8 first_token_cost:200.2890110015869ms total_cost_time:200.3335952758789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9070 prompt_cache_len:5151 prompt_cache_ratio:0.5679162072767365 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 -DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10507702827453613 s -INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.10679769515991211 s -DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=205225209936931002587392856928062240427, time:1750767454.1950545s req_ids:[8] -DEBUG 06-24 20:17:34 [manager.py:391] -ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:205.68013191223145ms total_cost_time:205.7020664215088ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:9071 prompt_cache_len:5151 prompt_cache_ratio:0.567853599382648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 -DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10712933540344238 s -INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.10892057418823242 s -DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=158194142036279451302455411303820262690, time:1750767454.4101603s req_ids:[8] -DEBUG 06-24 20:17:34 [manager.py:391] -ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:207.57675170898438ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9072 prompt_cache_len:5151 prompt_cache_ratio:0.5677910052910053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 -DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10757827758789062 s -INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.10940098762512207 s -DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=44347171820135762684373692154377166590, time:1750767454.6227002s req_ids:[8] -DEBUG 06-24 20:17:34 [manager.py:391] -ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:210.30497550964355ms total_cost_time:210.34812927246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9073 prompt_cache_len:5151 prompt_cache_ratio:0.5677284249972445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 -DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:34 [manager.py:224] router recive req id 8 cost time 0.10779452323913574 s -INFO 06-24 20:17:34 [manager.py:68] detokenization recv req id 8 cost time 0.1096189022064209 s -DEBUG 06-24 20:17:34 [manager.py:391] Prefill Batch: batch_id=46577318760181191084934310514382168482, time:1750767454.8394735s req_ids:[8] -DEBUG 06-24 20:17:34 [manager.py:391] -ERROR 06-24 20:17:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:206.35986328125ms total_cost_time:206.4046859741211ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9074 prompt_cache_len:5151 prompt_cache_ratio:0.5676658584968041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 -DEBUG 06-24 20:17:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.10792279243469238 s -INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.10965871810913086 s -DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=303056134939306169761253453337698456772, time:1750767455.0517132s req_ids:[8] -DEBUG 06-24 20:17:35 [manager.py:391] -ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:34 lightllm_req_id:8 first_token_cost:204.6523094177246ms total_cost_time:204.6959400177002ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9075 prompt_cache_len:5151 prompt_cache_ratio:0.567603305785124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 -DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.1083219051361084 s -INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.11013603210449219 s -DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=297931845880827676857265525178087800212, time:1750767455.2621672s req_ids:[8] -DEBUG 06-24 20:17:35 [manager.py:391] -ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:206.160306930542ms total_cost_time:206.20274543762207ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9076 prompt_cache_len:5151 prompt_cache_ratio:0.5675407668576465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 -DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.10890460014343262 s -INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.11056351661682129 s -DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=204897955006555471481438489697139057746, time:1750767455.4739044s req_ids:[8] -DEBUG 06-24 20:17:35 [manager.py:391] -ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:208.65845680236816ms total_cost_time:208.70208740234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9077 prompt_cache_len:5151 prompt_cache_ratio:0.567478241709816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 -DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:35 [manager.py:224] router recive req id 8 cost time 0.10822010040283203 s -INFO 06-24 20:17:35 [manager.py:68] detokenization recv req id 8 cost time 0.10999894142150879 s -DEBUG 06-24 20:17:35 [manager.py:391] Prefill Batch: batch_id=336487824731695015526660155724652317351, time:1750767455.6898077s req_ids:[8] -DEBUG 06-24 20:17:35 [manager.py:391] -ERROR 06-24 20:17:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:359.75050926208496ms total_cost_time:359.79557037353516ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9078 prompt_cache_len:5151 prompt_cache_ratio:0.5674157303370787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 -DEBUG 06-24 20:17:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10883045196533203 s -INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.11066341400146484 s -DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=25067442058673772039438090688048044975, time:1750767456.0530531s req_ids:[8] -DEBUG 06-24 20:17:36 [manager.py:391] -ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:35 lightllm_req_id:8 first_token_cost:205.09004592895508ms total_cost_time:205.13510704040527ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9079 prompt_cache_len:5151 prompt_cache_ratio:0.5673532327348827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 -DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10714077949523926 s -INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.10876750946044922 s -DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=172061770431736595010632408677400606391, time:1750767456.2652411s req_ids:[8] -DEBUG 06-24 20:17:36 [manager.py:391] -ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:165.95458984375ms total_cost_time:165.99559783935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9080 prompt_cache_len:5151 prompt_cache_ratio:0.5672907488986784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 -DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10815882682800293 s -INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.10992598533630371 s -DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=208117479228312208466612396778261736848, time:1750767456.4358087s req_ids:[8] -DEBUG 06-24 20:17:36 [manager.py:391] -ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:196.24924659729004ms total_cost_time:196.29359245300293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9081 prompt_cache_len:5151 prompt_cache_ratio:0.567228278823918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 -DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10804128646850586 s -INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.10978412628173828 s -DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=127525080092998326386041601173677015839, time:1750767456.6366534s req_ids:[8] -DEBUG 06-24 20:17:36 [manager.py:391] -ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:200.66285133361816ms total_cost_time:200.70672035217285ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9082 prompt_cache_len:5151 prompt_cache_ratio:0.567165822506056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 -DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:36 [manager.py:224] router recive req id 8 cost time 0.10831427574157715 s -INFO 06-24 20:17:36 [manager.py:68] detokenization recv req id 8 cost time 0.11000680923461914 s -DEBUG 06-24 20:17:36 [manager.py:391] Prefill Batch: batch_id=206351178039516672771856355312574216531, time:1750767456.8456137s req_ids:[8] -DEBUG 06-24 20:17:36 [manager.py:391] -ERROR 06-24 20:17:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:205.61456680297852ms total_cost_time:205.6746482849121ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:9083 prompt_cache_len:5151 prompt_cache_ratio:0.5671033799405483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 -DEBUG 06-24 20:17:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.10999631881713867 s -INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.11179184913635254 s -DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=128704728056128551549436374752652465424, time:1750767457.0579705s req_ids:[8] -DEBUG 06-24 20:17:37 [manager.py:391] -ERROR 06-24 20:17:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:36 lightllm_req_id:8 first_token_cost:202.2109031677246ms total_cost_time:202.2531032562256ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9084 prompt_cache_len:5151 prompt_cache_ratio:0.5670409511228534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 -DEBUG 06-24 20:17:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.10807204246520996 s -INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.10982942581176758 s -DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=170928825256746817523069897019407902532, time:1750767457.2780888s req_ids:[8] -DEBUG 06-24 20:17:37 [manager.py:391] -ERROR 06-24 20:17:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 first_token_cost:219.6662425994873ms total_cost_time:219.710111618042ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9085 prompt_cache_len:5151 prompt_cache_ratio:0.5669785360484315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 -DEBUG 06-24 20:17:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.3105344772338867 s -INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.31235814094543457 s -DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=115373669173046901443542345329097316294, time:1750767457.696327s req_ids:[8] -DEBUG 06-24 20:17:37 [manager.py:391] -ERROR 06-24 20:17:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 first_token_cost:415.76671600341797ms total_cost_time:415.81130027770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9086 prompt_cache_len:5151 prompt_cache_ratio:0.5669161347127449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 -DEBUG 06-24 20:17:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:37 [manager.py:224] router recive req id 8 cost time 0.10874485969543457 s -INFO 06-24 20:17:37 [manager.py:68] detokenization recv req id 8 cost time 0.1104886531829834 s -DEBUG 06-24 20:17:37 [manager.py:391] Prefill Batch: batch_id=176432542799910577045655493018298909826, time:1750767457.9167128s req_ids:[8] -DEBUG 06-24 20:17:37 [manager.py:391] -ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:37 lightllm_req_id:8 first_token_cost:211.62009239196777ms total_cost_time:211.66467666625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9087 prompt_cache_len:5151 prompt_cache_ratio:0.5668537471112578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 -DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10857939720153809 s -INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.11076688766479492 s -DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=182146823058740913217817137935532365988, time:1750767458.1316423s req_ids:[8] -DEBUG 06-24 20:17:38 [manager.py:391] -ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:207.46111869812012ms total_cost_time:207.5057029724121ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9088 prompt_cache_len:5151 prompt_cache_ratio:0.5667913732394366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 -DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10753560066223145 s -INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.10953974723815918 s -DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=89878620528512706800593880300899142713, time:1750767458.3464398s req_ids:[8] -DEBUG 06-24 20:17:38 [manager.py:391] -ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:207.1681022644043ms total_cost_time:207.18860626220703ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:9089 prompt_cache_len:5151 prompt_cache_ratio:0.5667290130927495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 -DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10614609718322754 s -INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.10801076889038086 s -DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=142634653769112240282900307423188055642, time:1750767458.5582223s req_ids:[8] -DEBUG 06-24 20:17:38 [manager.py:391] -ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:209.13219451904297ms total_cost_time:209.17582511901855ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9090 prompt_cache_len:5151 prompt_cache_ratio:0.5666666666666667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 -DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.11035585403442383 s -INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.11220788955688477 s -DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=138316223372688956716404561153924373845, time:1750767458.7721753s req_ids:[8] -DEBUG 06-24 20:17:38 [manager.py:391] -ERROR 06-24 20:17:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:208.3299160003662ms total_cost_time:208.3740234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9091 prompt_cache_len:5151 prompt_cache_ratio:0.5666043339566604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 -DEBUG 06-24 20:17:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:38 [manager.py:224] router recive req id 8 cost time 0.10854911804199219 s -INFO 06-24 20:17:38 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s -DEBUG 06-24 20:17:38 [manager.py:391] Prefill Batch: batch_id=263362447210243111690473103560753751188, time:1750767458.986477s req_ids:[8] -DEBUG 06-24 20:17:38 [manager.py:391] -ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:38 lightllm_req_id:8 first_token_cost:208.37640762329102ms total_cost_time:208.4212303161621ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9092 prompt_cache_len:5151 prompt_cache_ratio:0.566542014958205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 -DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10800814628601074 s -INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10989236831665039 s -DEBUG 06-24 20:17:39 [manager.py:391] Prefill Batch: batch_id=76227912907781212266002389760860021171, time:1750767459.200261s req_ids:[8] -DEBUG 06-24 20:17:39 [manager.py:391] -ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:208.53757858276367ms total_cost_time:208.58025550842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9093 prompt_cache_len:5151 prompt_cache_ratio:0.5664797096667766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 -DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10858511924743652 s -INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10979557037353516 s -DEBUG 06-24 20:17:39 [manager.py:391] Prefill Batch: batch_id=242238091212291235386295463381145533090, time:1750767459.415203s req_ids:[8] -DEBUG 06-24 20:17:39 [manager.py:391] -ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:380.14984130859375ms total_cost_time:380.19442558288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9094 prompt_cache_len:5151 prompt_cache_ratio:0.5664174180778535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 -DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10776996612548828 s -INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10909104347229004 s -DEBUG 06-24 20:17:39 [manager.py:391] Prefill Batch: batch_id=302640422053831647328045485026408915332, time:1750767459.7989516s req_ids:[8] -DEBUG 06-24 20:17:39 [manager.py:391] -ERROR 06-24 20:17:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:197.05891609191895ms total_cost_time:197.10230827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9095 prompt_cache_len:5151 prompt_cache_ratio:0.5663551401869159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 -DEBUG 06-24 20:17:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:39 [manager.py:224] router recive req id 8 cost time 0.10747122764587402 s -INFO 06-24 20:17:39 [manager.py:68] detokenization recv req id 8 cost time 0.10871553421020508 s -DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=263564323815847502650207188799560671159, time:1750767460.010209s req_ids:[8] -DEBUG 06-24 20:17:40 [manager.py:391] -ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:17:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 39715.214 tokens/s -DEBUG 06-24 20:17:40 [stats.py:37] Avg prompt tokens throughput: 39706.462 tokens/s -DEBUG 06-24 20:17:40 [stats.py:37] Avg generate tokens throughput: 8.751 tokens/s -INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:39 lightllm_req_id:8 first_token_cost:210.89959144592285ms total_cost_time:210.94393730163574ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9096 prompt_cache_len:5151 prompt_cache_ratio:0.5662928759894459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 -DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10834217071533203 s -INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.10960221290588379 s -DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=70963243783554287864233748988416880656, time:1750767460.2310975s req_ids:[8] -DEBUG 06-24 20:17:40 [manager.py:391] -ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:212.6145362854004ms total_cost_time:212.6600742340088ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9097 prompt_cache_len:5151 prompt_cache_ratio:0.5662306254809277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 -DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10781097412109375 s -INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.10892009735107422 s -DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=51976469388299548267846981846796998420, time:1750767460.45682s req_ids:[8] -DEBUG 06-24 20:17:40 [manager.py:391] -ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:223.53863716125488ms total_cost_time:223.58274459838867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9098 prompt_cache_len:5151 prompt_cache_ratio:0.5661683886568477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 -DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10772061347961426 s -INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.10902929306030273 s -DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=223775799852905756568018494896860267300, time:1750767460.6696396s req_ids:[8] -DEBUG 06-24 20:17:40 [manager.py:391] -DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:204.43201065063477ms total_cost_time:204.47659492492676ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9099 prompt_cache_len:5151 prompt_cache_ratio:0.5661061655126937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 -DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:40 [manager.py:224] router recive req id 8 cost time 0.10688972473144531 s -INFO 06-24 20:17:40 [manager.py:68] detokenization recv req id 8 cost time 0.108123779296875 s -DEBUG 06-24 20:17:40 [manager.py:391] Prefill Batch: batch_id=58518247834017251204217895661955953350, time:1750767460.8881118s req_ids:[8] -DEBUG 06-24 20:17:40 [manager.py:391] -ERROR 06-24 20:17:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:195.45221328735352ms total_cost_time:195.4967975616455ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9100 prompt_cache_len:5151 prompt_cache_ratio:0.5660439560439561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 -DEBUG 06-24 20:17:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.10792064666748047 s -INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.1091775894165039 s -DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=150296117049281581506110310316176293477, time:1750767461.0849931s req_ids:[8] -DEBUG 06-24 20:17:41 [manager.py:391] -ERROR 06-24 20:17:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:40 lightllm_req_id:8 first_token_cost:208.18161964416504ms total_cost_time:208.22620391845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9101 prompt_cache_len:5151 prompt_cache_ratio:0.5659817602461268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 -DEBUG 06-24 20:17:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.10841155052185059 s -INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.10972237586975098 s -DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=283807308141732662757355879473149139330, time:1750767461.2936542s req_ids:[8] -DEBUG 06-24 20:17:41 [manager.py:391] -ERROR 06-24 20:17:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 first_token_cost:211.56024932861328ms total_cost_time:211.61866188049316ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:9102 prompt_cache_len:5151 prompt_cache_ratio:0.5659195781147001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 -DEBUG 06-24 20:17:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.1072230339050293 s -INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.1084749698638916 s -DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=260761783970519086594320656913962518965, time:1750767461.522846s req_ids:[8] -DEBUG 06-24 20:17:41 [manager.py:391] -ERROR 06-24 20:17:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 first_token_cost:445.6913471221924ms total_cost_time:445.7359313964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9103 prompt_cache_len:5151 prompt_cache_ratio:0.565857409645172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 -DEBUG 06-24 20:17:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:41 [manager.py:224] router recive req id 8 cost time 0.1087038516998291 s -INFO 06-24 20:17:41 [manager.py:68] detokenization recv req id 8 cost time 0.10989832878112793 s -DEBUG 06-24 20:17:41 [manager.py:391] Prefill Batch: batch_id=228751951960390180023537572176241025601, time:1750767461.9604895s req_ids:[8] -DEBUG 06-24 20:17:41 [manager.py:391] -ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:41 lightllm_req_id:8 first_token_cost:204.85234260559082ms total_cost_time:204.8962116241455ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9104 prompt_cache_len:5151 prompt_cache_ratio:0.5657952548330404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 -DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10744142532348633 s -INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.10933899879455566 s -DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=29451791423767860307010991991050173493, time:1750767462.1734905s req_ids:[8] -DEBUG 06-24 20:17:42 [manager.py:391] -ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:204.1487693786621ms total_cost_time:204.1916847229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9105 prompt_cache_len:5151 prompt_cache_ratio:0.5657331136738056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 -DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10832381248474121 s -INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.11037683486938477 s -DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=272304765429070754483715665818444208693, time:1750767462.3926804s req_ids:[8] -DEBUG 06-24 20:17:42 [manager.py:391] -ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:210.64472198486328ms total_cost_time:210.68763732910156ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9106 prompt_cache_len:5151 prompt_cache_ratio:0.5656709861629695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 -DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10882401466369629 s -INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.11069965362548828 s -DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=200383950542084731665368227552392277527, time:1750767462.59986s req_ids:[8] -DEBUG 06-24 20:17:42 [manager.py:391] -ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:216.48216247558594ms total_cost_time:216.52722358703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9107 prompt_cache_len:5151 prompt_cache_ratio:0.5656088722960361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 -DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:42 [manager.py:224] router recive req id 8 cost time 0.10847091674804688 s -INFO 06-24 20:17:42 [manager.py:68] detokenization recv req id 8 cost time 0.11031556129455566 s -DEBUG 06-24 20:17:42 [manager.py:391] Prefill Batch: batch_id=44745802921034371653264177543152351047, time:1750767462.8273537s req_ids:[8] -DEBUG 06-24 20:17:42 [manager.py:391] -ERROR 06-24 20:17:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:208.91714096069336ms total_cost_time:208.96124839782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9108 prompt_cache_len:5151 prompt_cache_ratio:0.5655467720685112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 -DEBUG 06-24 20:17:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10837960243225098 s -INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.10955262184143066 s -DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=263217397880628502499983181186969279215, time:1750767463.0443947s req_ids:[8] -DEBUG 06-24 20:17:43 [manager.py:391] -ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:42 lightllm_req_id:8 first_token_cost:213.53483200073242ms total_cost_time:213.58013153076172ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9109 prompt_cache_len:5151 prompt_cache_ratio:0.5654846854759029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 -DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10771608352661133 s -INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.10882735252380371 s -DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=193410434992252893072806644306609567462, time:1750767463.2641819s req_ids:[8] -DEBUG 06-24 20:17:43 [manager.py:391] -ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:216.5396213531494ms total_cost_time:216.60518646240234ms,out_token_counter:1 mean_per_token_cost_time: 0.06556510925292969ms prompt_token_num:9110 prompt_cache_len:5151 prompt_cache_ratio:0.5654226125137212 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 -DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10950517654418945 s -INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.1106252670288086 s -DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=237669666995255187717755512307023890439, time:1750767463.477472s req_ids:[8] -DEBUG 06-24 20:17:43 [manager.py:391] -ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:375.3175735473633ms total_cost_time:375.37360191345215ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:9111 prompt_cache_len:5151 prompt_cache_ratio:0.5653605531774778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 -DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:43 [manager.py:224] router recive req id 8 cost time 0.10981297492980957 s -INFO 06-24 20:17:43 [manager.py:68] detokenization recv req id 8 cost time 0.11100244522094727 s -DEBUG 06-24 20:17:43 [manager.py:391] Prefill Batch: batch_id=63843366113483220398848954350055386240, time:1750767463.8617978s req_ids:[8] -DEBUG 06-24 20:17:43 [manager.py:391] -ERROR 06-24 20:17:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:202.47125625610352ms total_cost_time:202.5151252746582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9112 prompt_cache_len:5151 prompt_cache_ratio:0.5652985074626866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 -DEBUG 06-24 20:17:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10839724540710449 s -INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10955262184143066 s -DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=89653658001103304936097936224465830486, time:1750767464.0794227s req_ids:[8] -DEBUG 06-24 20:17:44 [manager.py:391] -ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:43 lightllm_req_id:8 first_token_cost:215.33608436584473ms total_cost_time:215.378999710083ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9113 prompt_cache_len:5151 prompt_cache_ratio:0.5652364753648634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 -DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s -INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10882139205932617 s -DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=128607251516059381620689092756968946354, time:1750767464.2904258s req_ids:[8] -DEBUG 06-24 20:17:44 [manager.py:391] -ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:173.07281494140625ms total_cost_time:173.11477661132812ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9114 prompt_cache_len:5151 prompt_cache_ratio:0.565174456879526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 -DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.1074223518371582 s -INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10846853256225586 s -DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=31223166606576611599743235604451514682, time:1750767464.4679406s req_ids:[8] -DEBUG 06-24 20:17:44 [manager.py:391] -ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:195.7530975341797ms total_cost_time:195.79744338989258ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9115 prompt_cache_len:5151 prompt_cache_ratio:0.5651124520021942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 -DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10872054100036621 s -INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.10981893539428711 s -DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=336084515208951996784736391515857103132, time:1750767464.680144s req_ids:[8] -DEBUG 06-24 20:17:44 [manager.py:391] -ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:213.5787010192871ms total_cost_time:213.63162994384766ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:9116 prompt_cache_len:5151 prompt_cache_ratio:0.5650504607283896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 -DEBUG 06-24 20:17:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:44 [manager.py:224] router recive req id 8 cost time 0.10743880271911621 s -INFO 06-24 20:17:44 [manager.py:68] detokenization recv req id 8 cost time 0.1084895133972168 s -INFO 06-24 20:17:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:17:44 [manager.py:391] Prefill Batch: batch_id=249687192207439366523226706485167352983, time:1750767464.9037137s req_ids:[8] -DEBUG 06-24 20:17:44 [manager.py:391] -ERROR 06-24 20:17:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:212.3849391937256ms total_cost_time:212.42761611938477ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9117 prompt_cache_len:5151 prompt_cache_ratio:0.564988483053636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 -DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.10857534408569336 s -INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s -DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=137303332310323471266158825118051753771, time:1750767465.1168795s req_ids:[8] -DEBUG 06-24 20:17:45 [manager.py:391] -ERROR 06-24 20:17:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:44 lightllm_req_id:8 first_token_cost:207.43131637573242ms total_cost_time:207.4742317199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9118 prompt_cache_len:5151 prompt_cache_ratio:0.5649265189734591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 -DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.1082618236541748 s -INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10927772521972656 s -DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=248893422368750251349530465773989141894, time:1750767465.3226542s req_ids:[8] -DEBUG 06-24 20:17:45 [manager.py:391] -ERROR 06-24 20:17:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 first_token_cost:221.28534317016602ms total_cost_time:221.32635116577148ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9119 prompt_cache_len:5151 prompt_cache_ratio:0.5648645684833863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 -DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s -INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10936641693115234 s -DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=175471572388505947408846434978229123936, time:1750767465.5620322s req_ids:[8] -DEBUG 06-24 20:17:45 [manager.py:391] -ERROR 06-24 20:17:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 first_token_cost:385.8215808868408ms total_cost_time:385.8823776245117ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:9120 prompt_cache_len:5151 prompt_cache_ratio:0.5648026315789474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 -DEBUG 06-24 20:17:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:45 [manager.py:224] router recive req id 8 cost time 0.10846948623657227 s -INFO 06-24 20:17:45 [manager.py:68] detokenization recv req id 8 cost time 0.10959267616271973 s -DEBUG 06-24 20:17:45 [manager.py:391] Prefill Batch: batch_id=229000686109359084199154972758708999837, time:1750767465.9417825s req_ids:[8] -DEBUG 06-24 20:17:45 [manager.py:391] -ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:45 lightllm_req_id:8 first_token_cost:200.00505447387695ms total_cost_time:200.04844665527344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9121 prompt_cache_len:5151 prompt_cache_ratio:0.5647407082556737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 -DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10840702056884766 s -INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.109527587890625 s -DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=321196718147821671106974363826428519690, time:1750767466.1471102s req_ids:[8] -DEBUG 06-24 20:17:46 [manager.py:391] -ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:217.70811080932617ms total_cost_time:217.75126457214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9122 prompt_cache_len:5151 prompt_cache_ratio:0.5646787985090989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 -DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10815286636352539 s -INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.10911417007446289 s -DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=26260114944191526090112052080083379405, time:1750767466.3696215s req_ids:[8] -DEBUG 06-24 20:17:46 [manager.py:391] -ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:212.6443386077881ms total_cost_time:212.68582344055176ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:9123 prompt_cache_len:5151 prompt_cache_ratio:0.5646169023347583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 -DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10736417770385742 s -INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.10934853553771973 s -DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=130571995628882504245055712227674726714, time:1750767466.5869458s req_ids:[8] -DEBUG 06-24 20:17:46 [manager.py:391] -DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:195.23859024047852ms total_cost_time:195.2815055847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9124 prompt_cache_len:5151 prompt_cache_ratio:0.5645550197281894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 -DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10753417015075684 s -INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.10938525199890137 s -DEBUG 06-24 20:17:46 [manager.py:391] Prefill Batch: batch_id=48138046015850802785012282112927102814, time:1750767466.7869596s req_ids:[8] -DEBUG 06-24 20:17:46 [manager.py:391] -ERROR 06-24 20:17:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:200.8800506591797ms total_cost_time:200.92177391052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9125 prompt_cache_len:5151 prompt_cache_ratio:0.5644931506849316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 -DEBUG 06-24 20:17:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:46 [batch.py:51] router release req id 8 -INFO 06-24 20:17:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:46 [manager.py:224] router recive req id 8 cost time 0.10873937606811523 s -INFO 06-24 20:17:46 [manager.py:68] detokenization recv req id 8 cost time 0.11086440086364746 s -DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=159136781388559923265721056429741570705, time:1750767467.0088549s req_ids:[8] -DEBUG 06-24 20:17:47 [manager.py:391] -ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:46 lightllm_req_id:8 first_token_cost:216.9356346130371ms total_cost_time:216.97640419006348ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:9126 prompt_cache_len:5151 prompt_cache_ratio:0.5644312952005259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 -DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:47 [manager.py:224] router recive req id 8 cost time 0.10831403732299805 s -INFO 06-24 20:17:47 [manager.py:68] detokenization recv req id 8 cost time 0.11028647422790527 s -DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=89080046073269820237940655444263389399, time:1750767467.217674s req_ids:[8] -DEBUG 06-24 20:17:47 [manager.py:391] -ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:204.59890365600586ms total_cost_time:204.63895797729492ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:9127 prompt_cache_len:5151 prompt_cache_ratio:0.564369453270516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 -DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:47 [manager.py:224] router recive req id 8 cost time 0.10799431800842285 s -INFO 06-24 20:17:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994219779968262 s -DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=314897341560074333632355274740919162265, time:1750767467.4281774s req_ids:[8] -DEBUG 06-24 20:17:47 [manager.py:391] -ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:206.31742477416992ms total_cost_time:206.3593864440918ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9128 prompt_cache_len:5151 prompt_cache_ratio:0.564307624890447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 -DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:47 [manager.py:224] router recive req id 8 cost time 0.10794615745544434 s -INFO 06-24 20:17:47 [manager.py:68] detokenization recv req id 8 cost time 0.10988497734069824 s -DEBUG 06-24 20:17:47 [manager.py:391] Prefill Batch: batch_id=179681104245930564168593553247355939673, time:1750767467.6568062s req_ids:[8] -DEBUG 06-24 20:17:47 [manager.py:391] -ERROR 06-24 20:17:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:401.17835998535156ms total_cost_time:401.22103691101074ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9129 prompt_cache_len:5151 prompt_cache_ratio:0.5642458100558659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 -DEBUG 06-24 20:17:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10796546936035156 s -INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.10995364189147949 s -DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=163339092325223953140991650558096356050, time:1750767468.0450165s req_ids:[8] -DEBUG 06-24 20:17:48 [manager.py:391] -ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:47 lightllm_req_id:8 first_token_cost:201.14469528198242ms total_cost_time:201.1857032775879ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9130 prompt_cache_len:5151 prompt_cache_ratio:0.564184008762322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 -DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10825681686401367 s -INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11035370826721191 s -DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=264392873418988787551537759616035734224, time:1750767468.2613184s req_ids:[8] -DEBUG 06-24 20:17:48 [manager.py:391] -ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:212.92400360107422ms total_cost_time:212.9662036895752ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9131 prompt_cache_len:5151 prompt_cache_ratio:0.5641222210053664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 -DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:48 [batch.py:51] router release req id 8 -INFO 06-24 20:17:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10813093185424805 s -INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11011695861816406 s -DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=151232507364924187237706629753431039614, time:1750767468.4736757s req_ids:[8] -DEBUG 06-24 20:17:48 [manager.py:391] -ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:206.97879791259766ms total_cost_time:207.01956748962402ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:9132 prompt_cache_len:5151 prompt_cache_ratio:0.5640604467805519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 -DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10829973220825195 s -INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11043930053710938 s -DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=79540876517241164630107096143610535091, time:1750767468.687593s req_ids:[8] -DEBUG 06-24 20:17:48 [manager.py:391] -ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:204.93698120117188ms total_cost_time:204.97870445251465ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9133 prompt_cache_len:5151 prompt_cache_ratio:0.5639986860834337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 -DEBUG 06-24 20:17:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:48 [manager.py:224] router recive req id 8 cost time 0.10817217826843262 s -INFO 06-24 20:17:48 [manager.py:68] detokenization recv req id 8 cost time 0.11029314994812012 s -DEBUG 06-24 20:17:48 [manager.py:391] Prefill Batch: batch_id=116460816963910177617645820307223448997, time:1750767468.8991795s req_ids:[8] -DEBUG 06-24 20:17:48 [manager.py:391] -ERROR 06-24 20:17:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:206.1934471130371ms total_cost_time:206.2373161315918ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9134 prompt_cache_len:5151 prompt_cache_ratio:0.5639369389095686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 -DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.10797429084777832 s -INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.1099083423614502 s -DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=169626425627265771429907825004631984783, time:1750767469.1138191s req_ids:[8] -DEBUG 06-24 20:17:49 [manager.py:391] -ERROR 06-24 20:17:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:48 lightllm_req_id:8 first_token_cost:206.15673065185547ms total_cost_time:206.19678497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:9135 prompt_cache_len:5151 prompt_cache_ratio:0.5638752052545156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 -DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.10873222351074219 s -INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.11085367202758789 s -DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=268093803980831210387764013211877021655, time:1750767469.3255413s req_ids:[8] -DEBUG 06-24 20:17:49 [manager.py:391] -ERROR 06-24 20:17:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 first_token_cost:203.57894897460938ms total_cost_time:203.62091064453125ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9136 prompt_cache_len:5151 prompt_cache_ratio:0.5638134851138353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 -DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.3112161159515381 s -INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.3132617473602295 s -DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=40780508849527415984221058469774820982, time:1750767469.7391853s req_ids:[8] -DEBUG 06-24 20:17:49 [manager.py:391] -ERROR 06-24 20:17:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 first_token_cost:417.52171516418457ms total_cost_time:417.5453186035156ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:9137 prompt_cache_len:5151 prompt_cache_ratio:0.5637517784830908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 -DEBUG 06-24 20:17:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:49 [manager.py:224] router recive req id 8 cost time 0.10578680038452148 s -INFO 06-24 20:17:49 [manager.py:68] detokenization recv req id 8 cost time 0.10777139663696289 s -DEBUG 06-24 20:17:49 [manager.py:391] Prefill Batch: batch_id=31742633132422482604816317503867970792, time:1750767469.959137s req_ids:[8] -DEBUG 06-24 20:17:49 [manager.py:391] -ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:49 lightllm_req_id:8 first_token_cost:209.54442024230957ms total_cost_time:209.56945419311523ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:9138 prompt_cache_len:5151 prompt_cache_ratio:0.5636900853578464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 -DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.10494494438171387 s -INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.10709381103515625 s -DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=224426812676846252755451243756798516722, time:1750767470.189711s req_ids:[8] -DEBUG 06-24 20:17:50 [manager.py:391] -DEBUG 06-24 20:17:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 38852.019 tokens/s -DEBUG 06-24 20:17:50 [stats.py:37] Avg prompt tokens throughput: 38843.598 tokens/s -DEBUG 06-24 20:17:50 [stats.py:37] Avg generate tokens throughput: 8.421 tokens/s -ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:225.65460205078125ms total_cost_time:225.68106651306152ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:9139 prompt_cache_len:5151 prompt_cache_ratio:0.5636284057336689 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 -DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.1061105728149414 s -INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.10815787315368652 s -DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=151839675575688813695308174843151410742, time:1750767470.4111671s req_ids:[8] -DEBUG 06-24 20:17:50 [manager.py:391] -ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:216.06135368347168ms total_cost_time:216.10474586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9140 prompt_cache_len:5151 prompt_cache_ratio:0.5635667396061269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 -DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s -INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.11080288887023926 s -DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=267268160903167560583673742182398741362, time:1750767470.6247582s req_ids:[8] -DEBUG 06-24 20:17:50 [manager.py:391] -ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:203.64689826965332ms total_cost_time:203.69315147399902ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9141 prompt_cache_len:5151 prompt_cache_ratio:0.5635050869707909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 -DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:50 [manager.py:224] router recive req id 8 cost time 0.10885405540466309 s -INFO 06-24 20:17:50 [manager.py:68] detokenization recv req id 8 cost time 0.11076545715332031 s -DEBUG 06-24 20:17:50 [manager.py:391] Prefill Batch: batch_id=318366147512324683774626486903332458750, time:1750767470.8458142s req_ids:[8] -DEBUG 06-24 20:17:50 [manager.py:391] -ERROR 06-24 20:17:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:218.42074394226074ms total_cost_time:218.46485137939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9142 prompt_cache_len:5151 prompt_cache_ratio:0.5634434478232334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 -DEBUG 06-24 20:17:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10855865478515625 s -INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.110595703125 s -DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=310012664102105101403075701128513841867, time:1750767471.0596693s req_ids:[8] -DEBUG 06-24 20:17:51 [manager.py:391] -ERROR 06-24 20:17:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:50 lightllm_req_id:8 first_token_cost:211.90905570983887ms total_cost_time:211.95340156555176ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9143 prompt_cache_len:5151 prompt_cache_ratio:0.5633818221590288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 -DEBUG 06-24 20:17:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s -INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.10993790626525879 s -DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=318784245780716834934045844335098572798, time:1750767471.2862723s req_ids:[8] -DEBUG 06-24 20:17:51 [manager.py:391] -ERROR 06-24 20:17:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 first_token_cost:215.69252014160156ms total_cost_time:215.73805809020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9144 prompt_cache_len:5151 prompt_cache_ratio:0.5633202099737533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 -DEBUG 06-24 20:17:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10901212692260742 s -INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.11103296279907227 s -DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=200541937897549555525460644966036483649, time:1750767471.513238s req_ids:[8] -DEBUG 06-24 20:17:51 [manager.py:391] -ERROR 06-24 20:17:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 first_token_cost:411.73243522644043ms total_cost_time:411.7765426635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9145 prompt_cache_len:5151 prompt_cache_ratio:0.5632586112629853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 -DEBUG 06-24 20:17:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -INFO 06-24 20:17:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:17:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:51 [manager.py:224] router recive req id 8 cost time 0.10890722274780273 s -INFO 06-24 20:17:51 [manager.py:68] detokenization recv req id 8 cost time 0.1104881763458252 s -DEBUG 06-24 20:17:51 [manager.py:391] Prefill Batch: batch_id=51817998049738479400287841593904923331, time:1750767471.9167204s req_ids:[8] -DEBUG 06-24 20:17:51 [manager.py:391] -ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:51 lightllm_req_id:8 first_token_cost:202.6212215423584ms total_cost_time:202.6650905609131ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9146 prompt_cache_len:5151 prompt_cache_ratio:0.5631970260223048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 -DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.10770058631896973 s -INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.10972261428833008 s -DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=125243099393884674255731487202836969405, time:1750767472.1270342s req_ids:[8] -DEBUG 06-24 20:17:52 [manager.py:391] -ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:207.3037624359131ms total_cost_time:207.3497772216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9147 prompt_cache_len:5151 prompt_cache_ratio:0.5631354542472942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 -DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.107696533203125 s -INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.10968756675720215 s -DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=30639912477286841283430553634645288861, time:1750767472.3412244s req_ids:[8] -DEBUG 06-24 20:17:52 [manager.py:391] -ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:203.216552734375ms total_cost_time:203.2601833343506ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9148 prompt_cache_len:5151 prompt_cache_ratio:0.5630738959335374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 -DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.1087639331817627 s -INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.11066746711730957 s -DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=266707414069044576990849343319810443864, time:1750767472.5515623s req_ids:[8] -DEBUG 06-24 20:17:52 [manager.py:391] -ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:206.63094520568848ms total_cost_time:206.67386054992676ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9149 prompt_cache_len:5151 prompt_cache_ratio:0.5630123510766204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 -DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.10834503173828125 s -INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.1103827953338623 s -DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=94734751028820478190516509802155873537, time:1750767472.7643964s req_ids:[8] -DEBUG 06-24 20:17:52 [manager.py:391] -ERROR 06-24 20:17:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:206.132173538208ms total_cost_time:206.1758041381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9150 prompt_cache_len:5151 prompt_cache_ratio:0.5629508196721311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 -DEBUG 06-24 20:17:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:52 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s -INFO 06-24 20:17:52 [manager.py:68] detokenization recv req id 8 cost time 0.1107335090637207 s -DEBUG 06-24 20:17:52 [manager.py:391] Prefill Batch: batch_id=9552556883086670841568969081011581313, time:1750767472.9903944s req_ids:[8] -DEBUG 06-24 20:17:52 [manager.py:391] -ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:52 lightllm_req_id:8 first_token_cost:221.23169898986816ms total_cost_time:221.27366065979004ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9151 prompt_cache_len:5151 prompt_cache_ratio:0.5628893017156595 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 -DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:53 [manager.py:224] router recive req id 8 cost time 0.10829997062683105 s -INFO 06-24 20:17:53 [manager.py:68] detokenization recv req id 8 cost time 0.11030721664428711 s -INFO 06-24 20:17:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:17:53 [statics_utils.py:24] mean first cost: 228.4931887659228 ms -INFO 06-24 20:17:53 [statics_utils.py:24] mean per token cost: 0.07382688314471175 ms -DEBUG 06-24 20:17:53 [manager.py:391] Prefill Batch: batch_id=324711151569472514536706116248160378323, time:1750767473.203997s req_ids:[8] -DEBUG 06-24 20:17:53 [manager.py:391] -ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:209.30719375610352ms total_cost_time:209.35988426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:9152 prompt_cache_len:5151 prompt_cache_ratio:0.5628277972027972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 -DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:53 [manager.py:224] router recive req id 8 cost time 0.10935235023498535 s -INFO 06-24 20:17:53 [manager.py:68] detokenization recv req id 8 cost time 0.11139464378356934 s -DEBUG 06-24 20:17:53 [manager.py:391] Prefill Batch: batch_id=214006993374620486464232341518666973826, time:1750767473.417888s req_ids:[8] -DEBUG 06-24 20:17:53 [manager.py:391] -ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:209.87915992736816ms total_cost_time:209.92398262023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9153 prompt_cache_len:5151 prompt_cache_ratio:0.562766306129138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 -DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:53 [manager.py:224] router recive req id 8 cost time 0.10793447494506836 s -INFO 06-24 20:17:53 [manager.py:68] detokenization recv req id 8 cost time 0.1100761890411377 s -DEBUG 06-24 20:17:53 [manager.py:391] Prefill Batch: batch_id=253767855611546421588786042887253644750, time:1750767473.6321132s req_ids:[8] -DEBUG 06-24 20:17:53 [manager.py:391] -ERROR 06-24 20:17:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:372.35212326049805ms total_cost_time:372.3728656768799ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:9154 prompt_cache_len:5151 prompt_cache_ratio:0.5627048284902775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 -DEBUG 06-24 20:17:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10731840133666992 s -INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.10938501358032227 s -DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=331909048137418625658946409003102388320, time:1750767474.0119374s req_ids:[8] -DEBUG 06-24 20:17:54 [manager.py:391] -DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:53 lightllm_req_id:8 first_token_cost:207.20505714416504ms total_cost_time:207.24964141845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9155 prompt_cache_len:5151 prompt_cache_ratio:0.5626433642818132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 -DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10929989814758301 s -INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.11120271682739258 s -DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=287390887733117223773472737635453980604, time:1750767474.2265604s req_ids:[8] -DEBUG 06-24 20:17:54 [manager.py:391] -ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:207.94296264648438ms total_cost_time:207.98683166503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9156 prompt_cache_len:5151 prompt_cache_ratio:0.5625819134993447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 -DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10787606239318848 s -INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.11002779006958008 s -DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=283487759017220183620895517911980688769, time:1750767474.441972s req_ids:[8] -DEBUG 06-24 20:17:54 [manager.py:391] -ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:209.20085906982422ms total_cost_time:209.2432975769043ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9157 prompt_cache_len:5151 prompt_cache_ratio:0.5625204761384733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 -DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10753440856933594 s -INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.10953354835510254 s -DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=277191049498297397188555628965409990833, time:1750767474.6522064s req_ids:[8] -DEBUG 06-24 20:17:54 [manager.py:391] -ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:200.75011253356934ms total_cost_time:200.79421997070312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9158 prompt_cache_len:5151 prompt_cache_ratio:0.5624590521948024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 -DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:54 [manager.py:224] router recive req id 8 cost time 0.10764384269714355 s -INFO 06-24 20:17:54 [manager.py:68] detokenization recv req id 8 cost time 0.10975313186645508 s -DEBUG 06-24 20:17:54 [manager.py:391] Prefill Batch: batch_id=303703254543840744832812142557198748455, time:1750767474.8732293s req_ids:[8] -DEBUG 06-24 20:17:54 [manager.py:391] -ERROR 06-24 20:17:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:218.07098388671875ms total_cost_time:218.11461448669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9159 prompt_cache_len:5151 prompt_cache_ratio:0.5623976416639371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 -DEBUG 06-24 20:17:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10857176780700684 s -INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.110565185546875 s -DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=240360103760360458085029011617726902207, time:1750767475.0846636s req_ids:[8] -DEBUG 06-24 20:17:55 [manager.py:391] -ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:54 lightllm_req_id:8 first_token_cost:202.8360366821289ms total_cost_time:202.8806209564209ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9160 prompt_cache_len:5151 prompt_cache_ratio:0.5623362445414847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 -DEBUG 06-24 20:17:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10770058631896973 s -INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.10961270332336426 s -DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=58917766878084295456849795659540730102, time:1750767475.2919686s req_ids:[8] -DEBUG 06-24 20:17:55 [manager.py:391] -ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:202.9721736907959ms total_cost_time:203.0172348022461ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9161 prompt_cache_len:5151 prompt_cache_ratio:0.5622748608230542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 -DEBUG 06-24 20:17:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10858511924743652 s -INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.11113643646240234 s -DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=305639944509063170281314874698033301728, time:1750767475.5022302s req_ids:[8] -DEBUG 06-24 20:17:55 [manager.py:391] -ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:384.38940048217773ms total_cost_time:384.43517684936523ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9162 prompt_cache_len:5151 prompt_cache_ratio:0.5622134905042567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 -DEBUG 06-24 20:17:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:55 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s -INFO 06-24 20:17:55 [manager.py:68] detokenization recv req id 8 cost time 0.10993170738220215 s -DEBUG 06-24 20:17:55 [manager.py:391] Prefill Batch: batch_id=29690471294648345669967766610822702366, time:1750767475.8896215s req_ids:[8] -DEBUG 06-24 20:17:55 [manager.py:391] -ERROR 06-24 20:17:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:204.56910133361816ms total_cost_time:204.61440086364746ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9163 prompt_cache_len:5151 prompt_cache_ratio:0.562152133580705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 -DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s -INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.1098015308380127 s -DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=220611319126008586915767920987530128428, time:1750767476.1014805s req_ids:[8] -DEBUG 06-24 20:17:56 [manager.py:391] -ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:55 lightllm_req_id:8 first_token_cost:207.08250999450684ms total_cost_time:207.1242332458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9164 prompt_cache_len:5151 prompt_cache_ratio:0.562090790048014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 -DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10835552215576172 s -INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.1102457046508789 s -DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=236789039643149174638226162559960928145, time:1750767476.3129618s req_ids:[8] -DEBUG 06-24 20:17:56 [manager.py:391] -ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:207.52644538879395ms total_cost_time:207.57079124450684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9165 prompt_cache_len:5151 prompt_cache_ratio:0.5620294599018003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 -DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10904622077941895 s -INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.11103224754333496 s -DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=308589171101176568547402010041205559053, time:1750767476.527668s req_ids:[8] -DEBUG 06-24 20:17:56 [manager.py:391] -ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:206.63166046142578ms total_cost_time:206.6953182220459ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:9166 prompt_cache_len:5151 prompt_cache_ratio:0.5619681431376827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 -DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10864520072937012 s -INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.11070895195007324 s -DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=330792659487416461321722315400079216502, time:1750767476.7409718s req_ids:[8] -DEBUG 06-24 20:17:56 [manager.py:391] -ERROR 06-24 20:17:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:203.98283004760742ms total_cost_time:204.0269374847412ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9167 prompt_cache_len:5151 prompt_cache_ratio:0.5619068397512818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 -DEBUG 06-24 20:17:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:56 [manager.py:224] router recive req id 8 cost time 0.10741710662841797 s -INFO 06-24 20:17:56 [manager.py:68] detokenization recv req id 8 cost time 0.10938501358032227 s -DEBUG 06-24 20:17:56 [manager.py:391] Prefill Batch: batch_id=47901682726416836059836200692660336761, time:1750767476.9498265s req_ids:[8] -DEBUG 06-24 20:17:56 [manager.py:391] -ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:56 lightllm_req_id:8 first_token_cost:202.3603916168213ms total_cost_time:202.409029006958ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:9168 prompt_cache_len:5151 prompt_cache_ratio:0.5618455497382199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 -DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10722565650939941 s -INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.10907697677612305 s -DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=2751666521234312501967093984730327320, time:1750767477.170453s req_ids:[8] -DEBUG 06-24 20:17:57 [manager.py:391] -ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:220.99018096923828ms total_cost_time:221.03643417358398ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9169 prompt_cache_len:5151 prompt_cache_ratio:0.5617842730941215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 -DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10926604270935059 s -INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.11126208305358887 s -DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=120130646417052014959436074914889918533, time:1750767477.3853238s req_ids:[8] -DEBUG 06-24 20:17:57 [manager.py:391] -ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:205.7507038116455ms total_cost_time:205.7960033416748ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9170 prompt_cache_len:5151 prompt_cache_ratio:0.5617230098146129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 -DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s -INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.1096956729888916 s -DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=187012901259584873121280241010995897899, time:1750767477.5939145s req_ids:[8] -DEBUG 06-24 20:17:57 [manager.py:391] -ERROR 06-24 20:17:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:366.91737174987793ms total_cost_time:366.9595718383789ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9171 prompt_cache_len:5151 prompt_cache_ratio:0.5616617598953222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 -DEBUG 06-24 20:17:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:57 [manager.py:224] router recive req id 8 cost time 0.10728216171264648 s -INFO 06-24 20:17:57 [manager.py:68] detokenization recv req id 8 cost time 0.10853028297424316 s -DEBUG 06-24 20:17:57 [manager.py:391] Prefill Batch: batch_id=145257320452728365833048809989216592140, time:1750767477.9662383s req_ids:[8] -DEBUG 06-24 20:17:57 [manager.py:391] -ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:57 lightllm_req_id:8 first_token_cost:204.3168544769287ms total_cost_time:204.3609619140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9172 prompt_cache_len:5151 prompt_cache_ratio:0.5616005233318796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 -DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10955238342285156 s -INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11149144172668457 s -DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=95427247315366658458260292105757213725, time:1750767478.1785378s req_ids:[8] -DEBUG 06-24 20:17:58 [manager.py:391] -ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:205.17849922180176ms total_cost_time:205.26409149169922ms,out_token_counter:1 mean_per_token_cost_time: 0.08559226989746094ms prompt_token_num:9173 prompt_cache_len:5151 prompt_cache_ratio:0.5615393001199172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 -DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10813760757446289 s -INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11025571823120117 s -DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=100975865134194915189570895870596366508, time:1750767478.4067347s req_ids:[8] -DEBUG 06-24 20:17:58 [manager.py:391] -ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:225.87919235229492ms total_cost_time:225.9237766265869ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9174 prompt_cache_len:5151 prompt_cache_ratio:0.5614780902550687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 -DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10902166366577148 s -INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11071252822875977 s -DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=96322102084514881656518143232030772973, time:1750767478.6241958s req_ids:[8] -DEBUG 06-24 20:17:58 [manager.py:391] -ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:207.53240585327148ms total_cost_time:207.5819969177246ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:9175 prompt_cache_len:5151 prompt_cache_ratio:0.56141689373297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 -DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:58 [manager.py:224] router recive req id 8 cost time 0.10872745513916016 s -INFO 06-24 20:17:58 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s -DEBUG 06-24 20:17:58 [manager.py:391] Prefill Batch: batch_id=318394982054173243714165842305838472535, time:1750767478.8371236s req_ids:[8] -DEBUG 06-24 20:17:58 [manager.py:391] -ERROR 06-24 20:17:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:17:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:204.70714569091797ms total_cost_time:204.75006103515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9176 prompt_cache_len:5151 prompt_cache_ratio:0.5613557105492589 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 -DEBUG 06-24 20:17:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10760021209716797 s -INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.10947918891906738 s -DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=137287051620844474141744064446558746008, time:1750767479.0458934s req_ids:[8] -DEBUG 06-24 20:17:59 [manager.py:391] -ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:58 lightllm_req_id:8 first_token_cost:205.45434951782227ms total_cost_time:205.49750328063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9177 prompt_cache_len:5151 prompt_cache_ratio:0.5612945406995751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 -DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10834145545959473 s -INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.1103677749633789 s -DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=303656569711345954085768466428423768664, time:1750767479.2584782s req_ids:[8] -DEBUG 06-24 20:17:59 [manager.py:391] -ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:208.5881233215332ms total_cost_time:208.6319923400879ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9178 prompt_cache_len:5151 prompt_cache_ratio:0.5612333841795598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 -DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10766220092773438 s -INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.11025714874267578 s -DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=121588650703532696032183635134485630204, time:1750767479.4725976s req_ids:[8] -DEBUG 06-24 20:17:59 [manager.py:391] -ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:210.56652069091797ms total_cost_time:210.61134338378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9179 prompt_cache_len:5151 prompt_cache_ratio:0.5611722409848567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 -DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:17:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:17:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:17:59 [manager.py:224] router recive req id 8 cost time 0.10809016227722168 s -INFO 06-24 20:17:59 [manager.py:68] detokenization recv req id 8 cost time 0.11002111434936523 s -DEBUG 06-24 20:17:59 [manager.py:391] Prefill Batch: batch_id=78649853741524488062543666123288661923, time:1750767479.6935399s req_ids:[8] -DEBUG 06-24 20:17:59 [manager.py:391] -DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:17:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:17:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:17:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:17:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:374.2671012878418ms total_cost_time:374.3126392364502ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9180 prompt_cache_len:5151 prompt_cache_ratio:0.5611111111111111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:17:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 -DEBUG 06-24 20:17:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:17:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:17:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:17:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:17:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.10827326774597168 s -INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.11011338233947754 s -DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=334951981988306762992992621850953188183, time:1750767480.0709212s req_ids:[8] -DEBUG 06-24 20:18:00 [manager.py:391] -ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:17:59 lightllm_req_id:8 first_token_cost:212.01562881469727ms total_cost_time:212.06068992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9181 prompt_cache_len:5151 prompt_cache_ratio:0.5610499945539702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 -DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.10805463790893555 s -INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.11006879806518555 s -DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=141463028044222706786599367214542703915, time:1750767480.2843537s req_ids:[8] -DEBUG 06-24 20:18:00 [manager.py:391] -DEBUG 06-24 20:18:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 39031.682 tokens/s -DEBUG 06-24 20:18:00 [stats.py:37] Avg prompt tokens throughput: 39023.162 tokens/s -DEBUG 06-24 20:18:00 [stats.py:37] Avg generate tokens throughput: 8.519 tokens/s -ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:210.74295043945312ms total_cost_time:210.7858657836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9182 prompt_cache_len:5151 prompt_cache_ratio:0.560988891309083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 -DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.1053628921508789 s -INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.10784459114074707 s -DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=123848837855498994080969459403205099170, time:1750767480.502519s req_ids:[8] -DEBUG 06-24 20:18:00 [manager.py:391] -ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:202.67486572265625ms total_cost_time:202.71944999694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9183 prompt_cache_len:5151 prompt_cache_ratio:0.5609278013721006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 -DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.1080477237701416 s -INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.10992217063903809 s -DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=146032783821635420479968329603284437169, time:1750767480.7128665s req_ids:[8] -DEBUG 06-24 20:18:00 [manager.py:391] -ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:170.00269889831543ms total_cost_time:170.0448989868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9184 prompt_cache_len:5151 prompt_cache_ratio:0.5608667247386759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 -DEBUG 06-24 20:18:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:00 [manager.py:224] router recive req id 8 cost time 0.10802102088928223 s -INFO 06-24 20:18:00 [manager.py:68] detokenization recv req id 8 cost time 0.10985898971557617 s -DEBUG 06-24 20:18:00 [manager.py:391] Prefill Batch: batch_id=310822315137735517657485655362343319827, time:1750767480.8869317s req_ids:[8] -DEBUG 06-24 20:18:00 [manager.py:391] -ERROR 06-24 20:18:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:198.7283229827881ms total_cost_time:198.81248474121094ms,out_token_counter:1 mean_per_token_cost_time: 0.08416175842285156ms prompt_token_num:9185 prompt_cache_len:5151 prompt_cache_ratio:0.5608056614044638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 -DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.10826635360717773 s -INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.11035466194152832 s -DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=216477383643683980862385346214404857276, time:1750767481.0965877s req_ids:[8] -DEBUG 06-24 20:18:01 [manager.py:391] -ERROR 06-24 20:18:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:00 lightllm_req_id:8 first_token_cost:210.2828025817871ms total_cost_time:210.3273868560791ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9186 prompt_cache_len:5151 prompt_cache_ratio:0.5607446113651209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 -DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.10789704322814941 s -INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.10989069938659668 s -DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=157400207346203648751317866692448464223, time:1750767481.3097582s req_ids:[8] -DEBUG 06-24 20:18:01 [manager.py:391] -ERROR 06-24 20:18:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 first_token_cost:206.2993049621582ms total_cost_time:206.345796585083ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9187 prompt_cache_len:5151 prompt_cache_ratio:0.5606835746163057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 -DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.3104102611541748 s -INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.31241559982299805 s -DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=337259564766646680565441084991899011358, time:1750767481.7277277s req_ids:[8] -DEBUG 06-24 20:18:01 [manager.py:391] -ERROR 06-24 20:18:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 first_token_cost:419.4505214691162ms total_cost_time:419.4951057434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9188 prompt_cache_len:5151 prompt_cache_ratio:0.5606225511536788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 -DEBUG 06-24 20:18:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:01 [manager.py:224] router recive req id 8 cost time 0.1077888011932373 s -INFO 06-24 20:18:01 [manager.py:68] detokenization recv req id 8 cost time 0.10938882827758789 s -DEBUG 06-24 20:18:01 [manager.py:391] Prefill Batch: batch_id=245808586545818793940237183825811874109, time:1750767481.9486392s req_ids:[8] -DEBUG 06-24 20:18:01 [manager.py:391] -ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:01 lightllm_req_id:8 first_token_cost:209.63335037231445ms total_cost_time:209.67721939086914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9189 prompt_cache_len:5151 prompt_cache_ratio:0.5605615409729023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 -DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10892820358276367 s -INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.11088681221008301 s -DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=9127106633765892834038504792990398407, time:1750767482.1621401s req_ids:[8] -DEBUG 06-24 20:18:02 [manager.py:391] -ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:208.92596244812012ms total_cost_time:208.9710235595703ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9190 prompt_cache_len:5151 prompt_cache_ratio:0.5605005440696409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 -DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10820245742797852 s -INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.11028337478637695 s -DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=253111041487356381843795679361273236578, time:1750767482.375281s req_ids:[8] -DEBUG 06-24 20:18:02 [manager.py:391] -ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:205.02424240112305ms total_cost_time:205.06811141967773ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9191 prompt_cache_len:5151 prompt_cache_ratio:0.5604395604395604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 -DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10712623596191406 s -INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.10904693603515625 s -DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=295304094055455162681260328310239442188, time:1750767482.5975747s req_ids:[8] -DEBUG 06-24 20:18:02 [manager.py:391] -ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:222.4125862121582ms total_cost_time:222.4557399749756ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9192 prompt_cache_len:5151 prompt_cache_ratio:0.560378590078329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 -DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:02 [manager.py:224] router recive req id 8 cost time 0.10883975028991699 s -INFO 06-24 20:18:02 [manager.py:68] detokenization recv req id 8 cost time 0.11096668243408203 s -DEBUG 06-24 20:18:02 [manager.py:391] Prefill Batch: batch_id=214888043985544017507894540235459245545, time:1750767482.8149986s req_ids:[8] -DEBUG 06-24 20:18:02 [manager.py:391] -ERROR 06-24 20:18:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:209.46741104125977ms total_cost_time:209.51223373413086ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9193 prompt_cache_len:5151 prompt_cache_ratio:0.5603176329816164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 -DEBUG 06-24 20:18:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10790586471557617 s -INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.1100149154663086 s -DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=83934191685152002874158912726003421552, time:1750767483.0288465s req_ids:[8] -DEBUG 06-24 20:18:03 [manager.py:391] -ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:02 lightllm_req_id:8 first_token_cost:206.99691772460938ms total_cost_time:207.04317092895508ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9194 prompt_cache_len:5151 prompt_cache_ratio:0.5602566891450946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 -DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10789227485656738 s -INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.10981059074401855 s -DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=196632631739059630910480213559346553875, time:1750767483.243221s req_ids:[8] -DEBUG 06-24 20:18:03 [manager.py:391] -ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:207.5943946838379ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:9195 prompt_cache_len:5151 prompt_cache_ratio:0.5601957585644372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 -DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10853862762451172 s -INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.11049151420593262 s -DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=263542503111660210103797759936809571995, time:1750767483.4565628s req_ids:[8] -DEBUG 06-24 20:18:03 [manager.py:391] -ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:379.7135353088379ms total_cost_time:379.7571659088135ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9196 prompt_cache_len:5151 prompt_cache_ratio:0.5601348412353198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 -DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:03 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s -INFO 06-24 20:18:03 [manager.py:68] detokenization recv req id 8 cost time 0.1100318431854248 s -DEBUG 06-24 20:18:03 [manager.py:391] Prefill Batch: batch_id=10866354140679370042787661828376891234, time:1750767483.8377013s req_ids:[8] -DEBUG 06-24 20:18:03 [manager.py:391] -ERROR 06-24 20:18:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:186.171293258667ms total_cost_time:186.19036674499512ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:9197 prompt_cache_len:5151 prompt_cache_ratio:0.5600739371534196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 -DEBUG 06-24 20:18:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10355091094970703 s -INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10531949996948242 s -DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=116403008540925730974961226649763478141, time:1750767484.0284977s req_ids:[8] -DEBUG 06-24 20:18:04 [manager.py:391] -ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:03 lightllm_req_id:8 first_token_cost:161.64445877075195ms total_cost_time:161.66973114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:9198 prompt_cache_len:5151 prompt_cache_ratio:0.5600130463144162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 -DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10378670692443848 s -INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10484933853149414 s -DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=288617315235715295294410479010533969353, time:1750767484.1955159s req_ids:[8] -DEBUG 06-24 20:18:04 [manager.py:391] -ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:193.2976245880127ms total_cost_time:193.31908226013184ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:9199 prompt_cache_len:5151 prompt_cache_ratio:0.5599521687139907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 -DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.1043548583984375 s -INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10623621940612793 s -DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=109473945284411648595648756599354109041, time:1750767484.3974924s req_ids:[8] -DEBUG 06-24 20:18:04 [manager.py:391] -ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:195.57499885559082ms total_cost_time:195.59597969055176ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:9200 prompt_cache_len:5151 prompt_cache_ratio:0.5598913043478261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 -DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10421562194824219 s -INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10519051551818848 s -DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=253448908328166926368113286808427068371, time:1750767484.5957174s req_ids:[8] -DEBUG 06-24 20:18:04 [manager.py:391] -ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:209.14649963378906ms total_cost_time:209.17057991027832ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:9201 prompt_cache_len:5151 prompt_cache_ratio:0.5598304532116074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 -DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:04 [manager.py:224] router recive req id 8 cost time 0.10439085960388184 s -INFO 06-24 20:18:04 [manager.py:68] detokenization recv req id 8 cost time 0.10626935958862305 s -DEBUG 06-24 20:18:04 [manager.py:391] Prefill Batch: batch_id=209749523721773349881434321096166843830, time:1750767484.8089638s req_ids:[8] -DEBUG 06-24 20:18:04 [manager.py:391] -ERROR 06-24 20:18:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:209.5654010772705ms total_cost_time:209.58590507507324ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:9202 prompt_cache_len:5151 prompt_cache_ratio:0.5597696153010215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 -DEBUG 06-24 20:18:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10353398323059082 s -INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10541343688964844 s -DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=276266234381847925214626001471024049600, time:1750767485.0224288s req_ids:[8] -DEBUG 06-24 20:18:05 [manager.py:391] -ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:04 lightllm_req_id:8 first_token_cost:168.57004165649414ms total_cost_time:168.59102249145508ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:9203 prompt_cache_len:5151 prompt_cache_ratio:0.559708790611757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 -DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10354781150817871 s -INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10534286499023438 s -DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=286509355390923617819774299002137245791, time:1750767485.1954172s req_ids:[8] -DEBUG 06-24 20:18:05 [manager.py:391] -ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:169.17133331298828ms total_cost_time:169.19326782226562ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:9204 prompt_cache_len:5151 prompt_cache_ratio:0.5596479791395046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 -DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10430097579956055 s -INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10606932640075684 s -DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=301363027013557716718759043022598039087, time:1750767485.368912s req_ids:[8] -DEBUG 06-24 20:18:05 [manager.py:391] -ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:373.7328052520752ms total_cost_time:373.75593185424805ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:9205 prompt_cache_len:5151 prompt_cache_ratio:0.5595871808799565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 -DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10645341873168945 s -INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.10819268226623535 s -DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=243044375339155104131630395535847049845, time:1750767485.7443063s req_ids:[8] -DEBUG 06-24 20:18:05 [manager.py:391] -ERROR 06-24 20:18:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:179.96478080749512ms total_cost_time:179.99267578125ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:9206 prompt_cache_len:5151 prompt_cache_ratio:0.5595263958288073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 -DEBUG 06-24 20:18:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:05 [manager.py:224] router recive req id 8 cost time 0.10833430290222168 s -INFO 06-24 20:18:05 [manager.py:68] detokenization recv req id 8 cost time 0.11029529571533203 s -DEBUG 06-24 20:18:05 [manager.py:391] Prefill Batch: batch_id=291116813354111419392828936215741224674, time:1750767485.9328382s req_ids:[8] -DEBUG 06-24 20:18:05 [manager.py:391] -ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:05 lightllm_req_id:8 first_token_cost:210.22534370422363ms total_cost_time:210.28447151184082ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:9207 prompt_cache_len:5151 prompt_cache_ratio:0.559465623981753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 -DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.1106710433959961 s -INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11278367042541504 s -DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=157300921725614050422063393098954047532, time:1750767486.147935s req_ids:[8] -DEBUG 06-24 20:18:06 [manager.py:391] -ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:208.96291732788086ms total_cost_time:209.00893211364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9208 prompt_cache_len:5151 prompt_cache_ratio:0.5594048653344917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 -DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.10781669616699219 s -INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.10979843139648438 s -DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=136540907133243327421181140505822317347, time:1750767486.3594477s req_ids:[8] -DEBUG 06-24 20:18:06 [manager.py:391] -ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:204.2832374572754ms total_cost_time:204.32686805725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9209 prompt_cache_len:5151 prompt_cache_ratio:0.5593441198827234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 -DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.10934066772460938 s -INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11043119430541992 s -DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=38847635736323849600539643608520480090, time:1750767486.572793s req_ids:[8] -DEBUG 06-24 20:18:06 [manager.py:391] -ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:203.63831520080566ms total_cost_time:203.68146896362305ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9210 prompt_cache_len:5151 prompt_cache_ratio:0.5592833876221498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 -DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.10918641090393066 s -INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11118936538696289 s -DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=278353172333314132252100784832366968289, time:1750767486.7797415s req_ids:[8] -DEBUG 06-24 20:18:06 [manager.py:391] -DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:207.59153366088867ms total_cost_time:207.63373374938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9211 prompt_cache_len:5151 prompt_cache_ratio:0.5592226685484747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 -DEBUG 06-24 20:18:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:06 [manager.py:224] router recive req id 8 cost time 0.1088860034942627 s -INFO 06-24 20:18:06 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s -DEBUG 06-24 20:18:06 [manager.py:391] Prefill Batch: batch_id=291041939818782909066901000207242699234, time:1750767486.9952748s req_ids:[8] -DEBUG 06-24 20:18:06 [manager.py:391] -ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:06 lightllm_req_id:8 first_token_cost:208.02879333496094ms total_cost_time:208.07147026062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9212 prompt_cache_len:5151 prompt_cache_ratio:0.5591619626574034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 -DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10822534561157227 s -INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.11018252372741699 s -DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=54775796579205562981517156095891831232, time:1750767487.2134013s req_ids:[8] -DEBUG 06-24 20:18:07 [manager.py:391] -ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:381.70766830444336ms total_cost_time:381.75082206726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9213 prompt_cache_len:5151 prompt_cache_ratio:0.5591012699446435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 -DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10726642608642578 s -INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.1090385913848877 s -DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=291840191099463998705193355317684656286, time:1750767487.5977561s req_ids:[8] -DEBUG 06-24 20:18:07 [manager.py:391] -ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:171.67425155639648ms total_cost_time:171.71549797058105ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9214 prompt_cache_len:5151 prompt_cache_ratio:0.559040590405904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 -DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10785436630249023 s -INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.10976839065551758 s -DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=313584767816956809456261967902595767443, time:1750767487.7722263s req_ids:[8] -DEBUG 06-24 20:18:07 [manager.py:391] -ERROR 06-24 20:18:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:197.4942684173584ms total_cost_time:197.53766059875488ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9215 prompt_cache_len:5151 prompt_cache_ratio:0.5589799240368963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 -DEBUG 06-24 20:18:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:07 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s -INFO 06-24 20:18:07 [manager.py:68] detokenization recv req id 8 cost time 0.11052632331848145 s -DEBUG 06-24 20:18:07 [manager.py:391] Prefill Batch: batch_id=294917274875824626067374536945453251015, time:1750767487.9754372s req_ids:[8] -DEBUG 06-24 20:18:07 [manager.py:391] -ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:07 lightllm_req_id:8 first_token_cost:203.4780979156494ms total_cost_time:203.5212516784668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9216 prompt_cache_len:5151 prompt_cache_ratio:0.5589192708333334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 -DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10718655586242676 s -INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.10896897315979004 s -DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=249188649074010266052419688548737651785, time:1750767488.1889348s req_ids:[8] -DEBUG 06-24 20:18:08 [manager.py:391] -ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:169.996976852417ms total_cost_time:170.04132270812988ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9217 prompt_cache_len:5151 prompt_cache_ratio:0.5588586307909298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 -DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10850834846496582 s -INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11036348342895508 s -DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=113807042986531951054382870618652886722, time:1750767488.3604047s req_ids:[8] -DEBUG 06-24 20:18:08 [manager.py:391] -ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:200.98400115966797ms total_cost_time:201.02810859680176ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9218 prompt_cache_len:5151 prompt_cache_ratio:0.5587980039054025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 -DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10933995246887207 s -INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11136507987976074 s -DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=245462981656364611925886645624053020232, time:1750767488.569732s req_ids:[8] -DEBUG 06-24 20:18:08 [manager.py:391] -ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:210.60657501220703ms total_cost_time:210.65402030944824ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9219 prompt_cache_len:5151 prompt_cache_ratio:0.5587373901724699 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 -DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10848426818847656 s -INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11044025421142578 s -DEBUG 06-24 20:18:08 [manager.py:391] Prefill Batch: batch_id=142689946323887981608749569061928626350, time:1750767488.7905803s req_ids:[8] -DEBUG 06-24 20:18:08 [manager.py:391] -ERROR 06-24 20:18:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:209.57398414611816ms total_cost_time:209.61856842041016ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9220 prompt_cache_len:5151 prompt_cache_ratio:0.5586767895878525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 -DEBUG 06-24 20:18:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:08 [manager.py:224] router recive req id 8 cost time 0.10766053199768066 s -INFO 06-24 20:18:08 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s -DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=155902034642491250222671982729313440036, time:1750767489.0026062s req_ids:[8] -DEBUG 06-24 20:18:09 [manager.py:391] -ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:08 lightllm_req_id:8 first_token_cost:199.93853569030762ms total_cost_time:199.98455047607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9221 prompt_cache_len:5151 prompt_cache_ratio:0.5586162021472725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 -DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10877132415771484 s -INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.11083006858825684 s -DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=318814423123739856498830528638324771759, time:1750767489.208464s req_ids:[8] -DEBUG 06-24 20:18:09 [manager.py:391] -ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:373.1358051300049ms total_cost_time:373.1799125671387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9222 prompt_cache_len:5151 prompt_cache_ratio:0.5585556278464542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 -DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:09 [batch.py:51] router release req id 8 -INFO 06-24 20:18:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10887670516967773 s -INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.1109459400177002 s -DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=68798332249564284410481520464894479180, time:1750767489.5835423s req_ids:[8] -DEBUG 06-24 20:18:09 [manager.py:391] -ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:199.30291175842285ms total_cost_time:199.34487342834473ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9223 prompt_cache_len:5151 prompt_cache_ratio:0.5584950666811233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 -DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10753130912780762 s -INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.10953783988952637 s -DEBUG 06-24 20:18:09 [manager.py:391] Prefill Batch: batch_id=187937522190474941985785309124092816112, time:1750767489.791203s req_ids:[8] -DEBUG 06-24 20:18:09 [manager.py:391] -ERROR 06-24 20:18:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:208.6939811706543ms total_cost_time:208.75144004821777ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:9224 prompt_cache_len:5151 prompt_cache_ratio:0.5584345186470078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 -DEBUG 06-24 20:18:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:09 [manager.py:224] router recive req id 8 cost time 0.10855317115783691 s -INFO 06-24 20:18:09 [manager.py:68] detokenization recv req id 8 cost time 0.11047530174255371 s -DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=61687263355418844533672509070856761787, time:1750767490.0062573s req_ids:[8] -DEBUG 06-24 20:18:10 [manager.py:391] -ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:09 lightllm_req_id:8 first_token_cost:209.9595069885254ms total_cost_time:210.00266075134277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9225 prompt_cache_len:5151 prompt_cache_ratio:0.5583739837398374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 -DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.10782790184020996 s -INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.11033797264099121 s -DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=270942227467139598072154930580873847292, time:1750767490.2213788s req_ids:[8] -DEBUG 06-24 20:18:10 [manager.py:391] -ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:18:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 40406.846 tokens/s -DEBUG 06-24 20:18:10 [stats.py:37] Avg prompt tokens throughput: 40397.969 tokens/s -DEBUG 06-24 20:18:10 [stats.py:37] Avg generate tokens throughput: 8.878 tokens/s -INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:207.85975456237793ms total_cost_time:207.90433883666992ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9226 prompt_cache_len:5151 prompt_cache_ratio:0.5583134619553436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 -DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.10894584655761719 s -INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.11102747917175293 s -DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=161604242942835847877440478245941411520, time:1750767490.4347165s req_ids:[8] -DEBUG 06-24 20:18:10 [manager.py:391] -ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:205.6138515472412ms total_cost_time:205.6584358215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9227 prompt_cache_len:5151 prompt_cache_ratio:0.5582529532892598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 -DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.107818603515625 s -INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.10956764221191406 s -DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=41541567059871594269309513321040468585, time:1750767490.6470358s req_ids:[8] -DEBUG 06-24 20:18:10 [manager.py:391] -ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:206.68745040893555ms total_cost_time:206.73060417175293ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9228 prompt_cache_len:5151 prompt_cache_ratio:0.5581924577373212 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 -DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:10 [manager.py:224] router recive req id 8 cost time 0.10746645927429199 s -INFO 06-24 20:18:10 [manager.py:68] detokenization recv req id 8 cost time 0.10937380790710449 s -DEBUG 06-24 20:18:10 [manager.py:391] Prefill Batch: batch_id=294037725165213729733911474611206253000, time:1750767490.8599105s req_ids:[8] -DEBUG 06-24 20:18:10 [manager.py:391] -ERROR 06-24 20:18:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:168.32256317138672ms total_cost_time:168.3657169342041ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9229 prompt_cache_len:5151 prompt_cache_ratio:0.5581319752952649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 -DEBUG 06-24 20:18:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s -INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s -DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=333407896447195116791964486331549569617, time:1750767491.033047s req_ids:[8] -DEBUG 06-24 20:18:11 [manager.py:391] -ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:10 lightllm_req_id:8 first_token_cost:200.3951072692871ms total_cost_time:200.4525661468506ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:9230 prompt_cache_len:5151 prompt_cache_ratio:0.5580715059588299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 -DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.10868453979492188 s -INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.11070799827575684 s -DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=202693955467251572401102332806698819724, time:1750767491.2391808s req_ids:[8] -DEBUG 06-24 20:18:11 [manager.py:391] -ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:366.76025390625ms total_cost_time:366.804838180542ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9231 prompt_cache_len:5151 prompt_cache_ratio:0.5580110497237569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 -DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.10784006118774414 s -INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.10991334915161133 s -DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=186146418654037744484623706774939876899, time:1750767491.610858s req_ids:[8] -DEBUG 06-24 20:18:11 [manager.py:391] -ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:208.60004425048828ms total_cost_time:208.64367485046387ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9232 prompt_cache_len:5151 prompt_cache_ratio:0.5579506065857885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 -DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:11 [manager.py:224] router recive req id 8 cost time 0.1072852611541748 s -INFO 06-24 20:18:11 [manager.py:68] detokenization recv req id 8 cost time 0.1093437671661377 s -DEBUG 06-24 20:18:11 [manager.py:391] Prefill Batch: batch_id=6149988071369590703869920764234230863, time:1750767491.8253691s req_ids:[8] -DEBUG 06-24 20:18:11 [manager.py:391] -ERROR 06-24 20:18:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:208.80532264709473ms total_cost_time:208.8484764099121ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9233 prompt_cache_len:5151 prompt_cache_ratio:0.5578901765406693 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 -DEBUG 06-24 20:18:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10888051986694336 s -INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.11082148551940918 s -DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=8945734480304843139971568632826844243, time:1750767492.0410483s req_ids:[8] -DEBUG 06-24 20:18:12 [manager.py:391] -ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:11 lightllm_req_id:8 first_token_cost:206.53080940246582ms total_cost_time:206.5749168395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9234 prompt_cache_len:5151 prompt_cache_ratio:0.5578297595841456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 -DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10751771926879883 s -INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.10950446128845215 s -DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=121662816512365697660177215695735140528, time:1750767492.2681649s req_ids:[8] -DEBUG 06-24 20:18:12 [manager.py:391] -ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:223.8781452178955ms total_cost_time:223.93417358398438ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:9235 prompt_cache_len:5151 prompt_cache_ratio:0.5577693557119654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 -DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10810589790344238 s -INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s -DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=28106967925632819298593912517321093571, time:1750767492.4884746s req_ids:[8] -DEBUG 06-24 20:18:12 [manager.py:391] -DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:172.30868339538574ms total_cost_time:172.35231399536133ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9236 prompt_cache_len:5151 prompt_cache_ratio:0.5577089649198788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 -DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10757255554199219 s -INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.10959076881408691 s -DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=2563500130423229341058323665593143397, time:1750767492.6611607s req_ids:[8] -DEBUG 06-24 20:18:12 [manager.py:391] -ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:201.88498497009277ms total_cost_time:201.92718505859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9237 prompt_cache_len:5151 prompt_cache_ratio:0.5576485872036375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 -DEBUG 06-24 20:18:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:12 [manager.py:224] router recive req id 8 cost time 0.10846853256225586 s -INFO 06-24 20:18:12 [manager.py:68] detokenization recv req id 8 cost time 0.11049723625183105 s -DEBUG 06-24 20:18:12 [manager.py:391] Prefill Batch: batch_id=224317777876410552669034916666169451278, time:1750767492.8704154s req_ids:[8] -DEBUG 06-24 20:18:12 [manager.py:391] -ERROR 06-24 20:18:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:209.47527885437012ms total_cost_time:209.5198631286621ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9238 prompt_cache_len:5151 prompt_cache_ratio:0.5575882225589954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 -DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.3102076053619385 s -INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.3121776580810547 s -DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=134881263633653972088042414046087733416, time:1750767493.2959967s req_ids:[8] -DEBUG 06-24 20:18:13 [manager.py:391] -ERROR 06-24 20:18:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:12 lightllm_req_id:8 first_token_cost:428.6642074584961ms total_cost_time:428.7071228027344ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9239 prompt_cache_len:5151 prompt_cache_ratio:0.557527870981708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 -DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.10733175277709961 s -INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.1093282699584961 s -DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=236522868321740353771811365255938366896, time:1750767493.5195332s req_ids:[8] -DEBUG 06-24 20:18:13 [manager.py:391] -ERROR 06-24 20:18:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 first_token_cost:210.6010913848877ms total_cost_time:210.64448356628418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9240 prompt_cache_len:5151 prompt_cache_ratio:0.5574675324675324 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 -DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.10880756378173828 s -INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.11073040962219238 s -DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=272753175952379862801614731178030771490, time:1750767493.744156s req_ids:[8] -DEBUG 06-24 20:18:13 [manager.py:391] -ERROR 06-24 20:18:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 first_token_cost:216.40920639038086ms total_cost_time:216.45236015319824ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9241 prompt_cache_len:5151 prompt_cache_ratio:0.5574072070122281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 -DEBUG 06-24 20:18:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:13 [manager.py:224] router recive req id 8 cost time 0.1086273193359375 s -INFO 06-24 20:18:13 [manager.py:68] detokenization recv req id 8 cost time 0.11057710647583008 s -DEBUG 06-24 20:18:13 [manager.py:391] Prefill Batch: batch_id=61308780382437033073465857022945156385, time:1750767493.9592402s req_ids:[8] -DEBUG 06-24 20:18:13 [manager.py:391] -DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:13 lightllm_req_id:8 first_token_cost:208.50777626037598ms total_cost_time:208.55164527893066ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9242 prompt_cache_len:5151 prompt_cache_ratio:0.557346894611556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 -DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10876655578613281 s -INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11072850227355957 s -DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=162267478617837692775161738158906774165, time:1750767494.1726017s req_ids:[8] -DEBUG 06-24 20:18:14 [manager.py:391] -ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:207.9598903656006ms total_cost_time:207.98277854919434ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:9243 prompt_cache_len:5151 prompt_cache_ratio:0.5572865952612788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 -DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10710334777832031 s -INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.1090860366821289 s -DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=52915046624392844168256738402097734950, time:1750767494.3875766s req_ids:[8] -DEBUG 06-24 20:18:14 [manager.py:391] -ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:212.02325820922852ms total_cost_time:212.0671272277832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9244 prompt_cache_len:5151 prompt_cache_ratio:0.5572263089571614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 -DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10860586166381836 s -INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11048126220703125 s -DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=307328457360070613572742122512232295155, time:1750767494.6012526s req_ids:[8] -DEBUG 06-24 20:18:14 [manager.py:391] -ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:173.07376861572266ms total_cost_time:173.1276512145996ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:9245 prompt_cache_len:5151 prompt_cache_ratio:0.5571660356949703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 -DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.1083214282989502 s -INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11020350456237793 s -DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=245947172845256045016585494392602646452, time:1750767494.7781713s req_ids:[8] -DEBUG 06-24 20:18:14 [manager.py:391] -ERROR 06-24 20:18:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:199.2805004119873ms total_cost_time:199.32174682617188ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9246 prompt_cache_len:5151 prompt_cache_ratio:0.5571057754704737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 -DEBUG 06-24 20:18:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:14 [manager.py:224] router recive req id 8 cost time 0.10867953300476074 s -INFO 06-24 20:18:14 [manager.py:68] detokenization recv req id 8 cost time 0.11072921752929688 s -DEBUG 06-24 20:18:14 [manager.py:391] Prefill Batch: batch_id=299830371531881070758015621079097548133, time:1750767494.9832478s req_ids:[8] -DEBUG 06-24 20:18:14 [manager.py:391] -ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:14 lightllm_req_id:8 first_token_cost:381.1075687408447ms total_cost_time:381.1523914337158ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9247 prompt_cache_len:5151 prompt_cache_ratio:0.557045528279442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 -DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10730576515197754 s -INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.10923504829406738 s -DEBUG 06-24 20:18:15 [manager.py:391] Prefill Batch: batch_id=16670438905765915958486712514952063797, time:1750767495.3720822s req_ids:[8] -DEBUG 06-24 20:18:15 [manager.py:391] -ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:205.94477653503418ms total_cost_time:205.98769187927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9248 prompt_cache_len:5151 prompt_cache_ratio:0.5569852941176471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 -DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10808324813842773 s -INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013364791870117 s -DEBUG 06-24 20:18:15 [manager.py:391] Prefill Batch: batch_id=6869835389927009416340042045619703461, time:1750767495.582303s req_ids:[8] -DEBUG 06-24 20:18:15 [manager.py:391] -ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:205.7027816772461ms total_cost_time:205.74665069580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9249 prompt_cache_len:5151 prompt_cache_ratio:0.5569250729808628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 -DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10878205299377441 s -INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s -DEBUG 06-24 20:18:15 [manager.py:391] Prefill Batch: batch_id=159869955967810791958616804271818440393, time:1750767495.7965612s req_ids:[8] -DEBUG 06-24 20:18:15 [manager.py:391] -ERROR 06-24 20:18:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:209.60450172424316ms total_cost_time:209.64765548706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9250 prompt_cache_len:5151 prompt_cache_ratio:0.5568648648648649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 -DEBUG 06-24 20:18:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:15 [manager.py:224] router recive req id 8 cost time 0.10600805282592773 s -INFO 06-24 20:18:15 [manager.py:68] detokenization recv req id 8 cost time 0.10790634155273438 s -DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=237468569893546502185594882489795657096, time:1750767496.0130298s req_ids:[8] -DEBUG 06-24 20:18:16 [manager.py:391] -ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:15 lightllm_req_id:8 first_token_cost:174.6382713317871ms total_cost_time:174.6695041656494ms,out_token_counter:1 mean_per_token_cost_time: 0.031232833862304688ms prompt_token_num:9251 prompt_cache_len:5151 prompt_cache_ratio:0.5568046697654307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 -DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.10894203186035156 s -INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.11080813407897949 s -DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=123835282712431282502350845761591512436, time:1750767496.187982s req_ids:[8] -DEBUG 06-24 20:18:16 [manager.py:391] -ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:193.94230842590332ms total_cost_time:193.98736953735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9252 prompt_cache_len:5151 prompt_cache_ratio:0.5567444876783398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 -DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.10850286483764648 s -INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.11047959327697754 s -DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=286216554820334194591642683786481301623, time:1750767496.3889387s req_ids:[8] -DEBUG 06-24 20:18:16 [manager.py:391] -ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:205.77669143676758ms total_cost_time:205.82151412963867ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9253 prompt_cache_len:5151 prompt_cache_ratio:0.5566843185993732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 -DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.11022305488586426 s -INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.11215329170227051 s -DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=35689826949743344270340906774914469563, time:1750767496.5983038s req_ids:[8] -DEBUG 06-24 20:18:16 [manager.py:391] -ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:205.5976390838623ms total_cost_time:205.6419849395752ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9254 prompt_cache_len:5151 prompt_cache_ratio:0.5566241625243138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 -DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:16 [batch.py:51] router release req id 8 -DEBUG 06-24 20:18:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:16 [manager.py:283] -DEBUG 06-24 20:18:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:16 [manager.py:284] -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:16 [manager.py:224] router recive req id 8 cost time 0.11145830154418945 s -INFO 06-24 20:18:16 [manager.py:68] detokenization recv req id 8 cost time 0.1133875846862793 s -DEBUG 06-24 20:18:16 [manager.py:391] Prefill Batch: batch_id=137219438306877450289715106310044438445, time:1750767496.826416s req_ids:[8] -DEBUG 06-24 20:18:16 [manager.py:391] -ERROR 06-24 20:18:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:212.4176025390625ms total_cost_time:212.4629020690918ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9255 prompt_cache_len:5151 prompt_cache_ratio:0.5565640194489465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 -INFO 06-24 20:18:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:18:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10646414756774902 s -INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.10833477973937988 s -DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=139765296529452748325281898390301738771, time:1750767497.037263s req_ids:[8] -DEBUG 06-24 20:18:17 [manager.py:391] -ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:16 lightllm_req_id:8 first_token_cost:360.5999946594238ms total_cost_time:360.64624786376953ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9256 prompt_cache_len:5151 prompt_cache_ratio:0.5565038893690579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 -DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10972166061401367 s -INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.11159968376159668 s -DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=13479720253126084349936411105145005324, time:1750767497.3968194s req_ids:[8] -DEBUG 06-24 20:18:17 [manager.py:391] -ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:200.44875144958496ms total_cost_time:200.49309730529785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9257 prompt_cache_len:5151 prompt_cache_ratio:0.5564437722804364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 -DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10809636116027832 s -INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.11004519462585449 s -DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=144089109438440321818126075292956114440, time:1750767497.6275244s req_ids:[8] -DEBUG 06-24 20:18:17 [manager.py:391] -ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:189.2240047454834ms total_cost_time:189.28194046020508ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:9258 prompt_cache_len:5151 prompt_cache_ratio:0.5563836681788723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 -DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:17 [manager.py:224] router recive req id 8 cost time 0.10775613784790039 s -INFO 06-24 20:18:17 [manager.py:68] detokenization recv req id 8 cost time 0.10898590087890625 s -DEBUG 06-24 20:18:17 [manager.py:391] Prefill Batch: batch_id=72921378620789925350388034805777535770, time:1750767497.8224013s req_ids:[8] -DEBUG 06-24 20:18:17 [manager.py:391] -ERROR 06-24 20:18:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:237.3814582824707ms total_cost_time:237.4093532562256ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:9259 prompt_cache_len:5151 prompt_cache_ratio:0.5563235770601577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 -DEBUG 06-24 20:18:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.10494184494018555 s -INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10596060752868652 s -DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=167507405522539109322776770635082025472, time:1750767498.036946s req_ids:[8] -DEBUG 06-24 20:18:18 [manager.py:391] -ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:17 lightllm_req_id:8 first_token_cost:207.02767372131348ms total_cost_time:207.05485343933105ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:9260 prompt_cache_len:5151 prompt_cache_ratio:0.5562634989200864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 -DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.1051173210144043 s -INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10646891593933105 s -DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=99475044803566855321305779250944595585, time:1750767498.251523s req_ids:[8] -DEBUG 06-24 20:18:18 [manager.py:391] -ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:211.96484565734863ms total_cost_time:211.99345588684082ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:9261 prompt_cache_len:5151 prompt_cache_ratio:0.5562034337544541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 -DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.10596013069152832 s -INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10733699798583984 s -DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=235535005008876972047763154644867852441, time:1750767498.470751s req_ids:[8] -DEBUG 06-24 20:18:18 [manager.py:391] -ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:197.20721244812012ms total_cost_time:197.25418090820312ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9262 prompt_cache_len:5151 prompt_cache_ratio:0.5561433815590585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 -DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.1062314510345459 s -INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10717487335205078 s -DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=94180847616631123256090805014619159555, time:1750767498.6722412s req_ids:[8] -DEBUG 06-24 20:18:18 [manager.py:391] -ERROR 06-24 20:18:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:167.39130020141602ms total_cost_time:167.43874549865723ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9263 prompt_cache_len:5151 prompt_cache_ratio:0.5560833423296988 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 -DEBUG 06-24 20:18:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:18 [manager.py:224] router recive req id 8 cost time 0.10508942604064941 s -INFO 06-24 20:18:18 [manager.py:68] detokenization recv req id 8 cost time 0.10714268684387207 s -DEBUG 06-24 20:18:18 [manager.py:391] Prefill Batch: batch_id=185553661054948646830759762350409376006, time:1750767498.8456492s req_ids:[8] -DEBUG 06-24 20:18:18 [manager.py:391] -ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:18 lightllm_req_id:8 first_token_cost:376.3432502746582ms total_cost_time:376.3887882232666ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9264 prompt_cache_len:5151 prompt_cache_ratio:0.5560233160621761 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 -DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.1087956428527832 s -INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.11065840721130371 s -DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=79251223731374372924200933267263716182, time:1750767499.2361445s req_ids:[8] -DEBUG 06-24 20:18:19 [manager.py:391] -ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:211.4865779876709ms total_cost_time:211.5318775177002ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9265 prompt_cache_len:5151 prompt_cache_ratio:0.5559633027522936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 -DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.11125540733337402 s -INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.1130983829498291 s -DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=50923862431308956081764183268362327832, time:1750767499.4541466s req_ids:[8] -DEBUG 06-24 20:18:19 [manager.py:391] -ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:202.18992233276367ms total_cost_time:202.23331451416016ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9266 prompt_cache_len:5151 prompt_cache_ratio:0.5559033023958558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 -DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.10988903045654297 s -INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.11139440536499023 s -DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=235871802952763893913245885880066581351, time:1750767499.6495616s req_ids:[8] -DEBUG 06-24 20:18:19 [manager.py:391] -ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:183.08210372924805ms total_cost_time:183.12764167785645ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9267 prompt_cache_len:5151 prompt_cache_ratio:0.5558433149886695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 -DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:19 [manager.py:224] router recive req id 8 cost time 0.10810232162475586 s -INFO 06-24 20:18:19 [manager.py:68] detokenization recv req id 8 cost time 0.10954689979553223 s -DEBUG 06-24 20:18:19 [manager.py:391] Prefill Batch: batch_id=151278103815947906254878416266465318475, time:1750767499.8375328s req_ids:[8] -DEBUG 06-24 20:18:19 [manager.py:391] -ERROR 06-24 20:18:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:189.58592414855957ms total_cost_time:189.63241577148438ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9268 prompt_cache_len:5151 prompt_cache_ratio:0.555783340526543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 -DEBUG 06-24 20:18:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10874652862548828 s -INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.11016178131103516 s -DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=167090878864262363791757727692719337491, time:1750767500.0425417s req_ids:[8] -DEBUG 06-24 20:18:20 [manager.py:391] -ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:19 lightllm_req_id:8 first_token_cost:213.8054370880127ms total_cost_time:213.8497829437256ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9269 prompt_cache_len:5151 prompt_cache_ratio:0.5557233790052865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 -DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:20 [batch.py:51] router release req id 8 -INFO 06-24 20:18:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10780763626098633 s -INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.10922718048095703 s -DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=213622004282571873146338095809093423526, time:1750767500.256435s req_ids:[8] -DEBUG 06-24 20:18:20 [manager.py:391] -ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:18:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 40552.159 tokens/s -DEBUG 06-24 20:18:20 [stats.py:37] Avg prompt tokens throughput: 40543.392 tokens/s -DEBUG 06-24 20:18:20 [stats.py:37] Avg generate tokens throughput: 8.768 tokens/s -INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:211.82513236999512ms total_cost_time:211.89284324645996ms,out_token_counter:1 mean_per_token_cost_time: 0.06771087646484375ms prompt_token_num:9270 prompt_cache_len:5151 prompt_cache_ratio:0.555663430420712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 -DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10743951797485352 s -INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.10899853706359863 s -DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=306435043135976212889729874607454252705, time:1750767500.478957s req_ids:[8] -DEBUG 06-24 20:18:20 [manager.py:391] -ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:211.18593215942383ms total_cost_time:211.23147010803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9271 prompt_cache_len:5151 prompt_cache_ratio:0.5556034947686334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 -DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.10786890983581543 s -INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.10983014106750488 s -DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=28582706080833806698273810992275762909, time:1750767500.6879413s req_ids:[8] -DEBUG 06-24 20:18:20 [manager.py:391] -ERROR 06-24 20:18:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:195.4348087310791ms total_cost_time:195.4934597015381ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:9272 prompt_cache_len:5151 prompt_cache_ratio:0.5555435720448663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 -DEBUG 06-24 20:18:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:20 [manager.py:224] router recive req id 8 cost time 0.11024308204650879 s -INFO 06-24 20:18:20 [manager.py:68] detokenization recv req id 8 cost time 0.111541748046875 s -DEBUG 06-24 20:18:20 [manager.py:391] Prefill Batch: batch_id=234425215806055000723350193049643644508, time:1750767500.8986363s req_ids:[8] -DEBUG 06-24 20:18:20 [manager.py:391] -DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:20 lightllm_req_id:8 first_token_cost:442.69871711730957ms total_cost_time:442.74425506591797ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9273 prompt_cache_len:5151 prompt_cache_ratio:0.555483662245228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 -DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10820770263671875 s -INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.10960888862609863 s -DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=173472924936806867508056562063117831374, time:1750767501.3493648s req_ids:[8] -DEBUG 06-24 20:18:21 [manager.py:391] -ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:190.75751304626465ms total_cost_time:190.80090522766113ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9274 prompt_cache_len:5151 prompt_cache_ratio:0.5554237653655381 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 -DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10833573341369629 s -INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s -DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=248877013934999467466663353685816589997, time:1750767501.5447094s req_ids:[8] -DEBUG 06-24 20:18:21 [manager.py:391] -ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:210.15667915344238ms total_cost_time:210.20030975341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9275 prompt_cache_len:5151 prompt_cache_ratio:0.5553638814016173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 -DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10832500457763672 s -INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.11026644706726074 s -DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=206697942667618486938747599179932142208, time:1750767501.7642777s req_ids:[8] -DEBUG 06-24 20:18:21 [manager.py:391] -ERROR 06-24 20:18:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:207.11565017700195ms total_cost_time:207.15832710266113ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9276 prompt_cache_len:5151 prompt_cache_ratio:0.5553040103492884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 -DEBUG 06-24 20:18:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:21 [manager.py:224] router recive req id 8 cost time 0.10628151893615723 s -INFO 06-24 20:18:21 [manager.py:68] detokenization recv req id 8 cost time 0.10829019546508789 s -DEBUG 06-24 20:18:21 [manager.py:391] Prefill Batch: batch_id=79602965760426490859251933426994669030, time:1750767501.9781897s req_ids:[8] -DEBUG 06-24 20:18:21 [manager.py:391] -ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:21 lightllm_req_id:8 first_token_cost:193.54987144470215ms total_cost_time:193.59421730041504ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9277 prompt_cache_len:5151 prompt_cache_ratio:0.5552441522043764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 -DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10804128646850586 s -INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.11005330085754395 s -DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=218868102160626047326833669659735298845, time:1750767502.1653292s req_ids:[8] -DEBUG 06-24 20:18:22 [manager.py:391] -ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:167.61541366577148ms total_cost_time:167.6352024078369ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:9278 prompt_cache_len:5151 prompt_cache_ratio:0.5551843069627075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 -DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10601425170898438 s -INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.10805964469909668 s -DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=333066603158334479207830083779621765869, time:1750767502.3386338s req_ids:[8] -DEBUG 06-24 20:18:22 [manager.py:391] -ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:203.94206047058105ms total_cost_time:203.96184921264648ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:9279 prompt_cache_len:5151 prompt_cache_ratio:0.5551244746201099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 -DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10566854476928711 s -INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.10762691497802734 s -DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=56036461237177231349389298812581987410, time:1750767502.5464158s req_ids:[8] -DEBUG 06-24 20:18:22 [manager.py:391] -ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:206.08854293823242ms total_cost_time:206.14886283874512ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9280 prompt_cache_len:5151 prompt_cache_ratio:0.5550646551724138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 -DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10836315155029297 s -INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.11014533042907715 s -DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=261543143878827586351449428631907676263, time:1750767502.7578053s req_ids:[8] -DEBUG 06-24 20:18:22 [manager.py:391] -ERROR 06-24 20:18:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:190.68598747253418ms total_cost_time:190.75632095336914ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:9281 prompt_cache_len:5151 prompt_cache_ratio:0.555004848615451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 -DEBUG 06-24 20:18:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:22 [manager.py:224] router recive req id 8 cost time 0.10836338996887207 s -INFO 06-24 20:18:22 [manager.py:68] detokenization recv req id 8 cost time 0.1103050708770752 s -DEBUG 06-24 20:18:22 [manager.py:391] Prefill Batch: batch_id=49218571529429249530037835628964195033, time:1750767502.9537623s req_ids:[8] -DEBUG 06-24 20:18:22 [manager.py:391] -INFO 06-24 20:18:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:22 lightllm_req_id:8 first_token_cost:461.90738677978516ms total_cost_time:461.95244789123535ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9282 prompt_cache_len:5151 prompt_cache_ratio:0.554945054945055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 -DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:23 [manager.py:224] router recive req id 8 cost time 0.10889053344726562 s -INFO 06-24 20:18:23 [manager.py:68] detokenization recv req id 8 cost time 0.11084556579589844 s -DEBUG 06-24 20:18:23 [manager.py:391] Prefill Batch: batch_id=218259182610396984124906220540967509014, time:1750767503.4218028s req_ids:[8] -DEBUG 06-24 20:18:23 [manager.py:391] -ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:192.19660758972168ms total_cost_time:192.23928451538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9283 prompt_cache_len:5151 prompt_cache_ratio:0.5548852741570613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 -DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:23 [manager.py:224] router recive req id 8 cost time 0.10771393775939941 s -INFO 06-24 20:18:23 [manager.py:68] detokenization recv req id 8 cost time 0.10953140258789062 s -DEBUG 06-24 20:18:23 [manager.py:391] Prefill Batch: batch_id=256973600115054141520035110761201180734, time:1750767503.6188889s req_ids:[8] -DEBUG 06-24 20:18:23 [manager.py:391] -ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:206.11023902893066ms total_cost_time:206.15458488464355ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9284 prompt_cache_len:5151 prompt_cache_ratio:0.5548255062473072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 -DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:23 [manager.py:224] router recive req id 8 cost time 0.10799455642700195 s -INFO 06-24 20:18:23 [manager.py:68] detokenization recv req id 8 cost time 0.10981535911560059 s -DEBUG 06-24 20:18:23 [manager.py:391] Prefill Batch: batch_id=157510512380801049924596876221921057106, time:1750767503.8452344s req_ids:[8] -DEBUG 06-24 20:18:23 [manager.py:391] -ERROR 06-24 20:18:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:219.4230556488037ms total_cost_time:219.465970993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9285 prompt_cache_len:5151 prompt_cache_ratio:0.5547657512116316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 -DEBUG 06-24 20:18:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.10682368278503418 s -INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.10877752304077148 s -DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=104763866741169426899088457190312884850, time:1750767504.0593114s req_ids:[8] -DEBUG 06-24 20:18:24 [manager.py:391] -ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:23 lightllm_req_id:8 first_token_cost:208.1167697906494ms total_cost_time:208.1601619720459ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9286 prompt_cache_len:5151 prompt_cache_ratio:0.5547060090458755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 -DEBUG 06-24 20:18:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.10815072059631348 s -INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.11007118225097656 s -DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=145942437893780209477351729661652265046, time:1750767504.2735126s req_ids:[8] -DEBUG 06-24 20:18:24 [manager.py:391] -ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:207.63826370239258ms total_cost_time:207.68070220947266ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9287 prompt_cache_len:5151 prompt_cache_ratio:0.5546462797458813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 -DEBUG 06-24 20:18:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.11002445220947266 s -INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.1118314266204834 s -DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=2010277666601747235759860607133754849, time:1750767504.488255s req_ids:[8] -DEBUG 06-24 20:18:24 [manager.py:391] -ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:196.64716720581055ms total_cost_time:196.7294216156006ms,out_token_counter:1 mean_per_token_cost_time: 0.08225440979003906ms prompt_token_num:9288 prompt_cache_len:5151 prompt_cache_ratio:0.5545865633074936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 -DEBUG 06-24 20:18:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:24 [manager.py:224] router recive req id 8 cost time 0.10836338996887207 s -INFO 06-24 20:18:24 [manager.py:68] detokenization recv req id 8 cost time 0.10941386222839355 s -DEBUG 06-24 20:18:24 [manager.py:391] Prefill Batch: batch_id=321205629702195802134837893422444120690, time:1750767504.6965811s req_ids:[8] -DEBUG 06-24 20:18:24 [manager.py:391] -ERROR 06-24 20:18:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:210.676908493042ms total_cost_time:210.71743965148926ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:9289 prompt_cache_len:5151 prompt_cache_ratio:0.5545268597265582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 -DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.30936360359191895 s -INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.3112771511077881 s -DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=216598510945938762506284028978742102057, time:1750767505.1171427s req_ids:[8] -DEBUG 06-24 20:18:25 [manager.py:391] -ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:24 lightllm_req_id:8 first_token_cost:417.59777069091797ms total_cost_time:417.64163970947266ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9290 prompt_cache_len:5151 prompt_cache_ratio:0.5544671689989236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 -DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.10881257057189941 s -INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.11099767684936523 s -DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=13860048375773957268377798216700581130, time:1750767505.3362734s req_ids:[8] -DEBUG 06-24 20:18:25 [manager.py:391] -ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:208.9977264404297ms total_cost_time:209.04254913330078ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9291 prompt_cache_len:5151 prompt_cache_ratio:0.5544074911204392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 -DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.10749053955078125 s -INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.10885119438171387 s -DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=109527754818997647442026457489800682961, time:1750767505.5643132s req_ids:[8] -DEBUG 06-24 20:18:25 [manager.py:391] -ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:231.45270347595215ms total_cost_time:231.49824142456055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9292 prompt_cache_len:5151 prompt_cache_ratio:0.5543478260869565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 -DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.10773015022277832 s -INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.10973262786865234 s -DEBUG 06-24 20:18:25 [manager.py:391] Prefill Batch: batch_id=99489189420439418206205310491258387303, time:1750767505.7867193s req_ids:[8] -DEBUG 06-24 20:18:25 [manager.py:391] -ERROR 06-24 20:18:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:209.55276489257812ms total_cost_time:209.5956802368164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9293 prompt_cache_len:5151 prompt_cache_ratio:0.554288173894329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 -DEBUG 06-24 20:18:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:25 [manager.py:224] router recive req id 8 cost time 0.1088552474975586 s -INFO 06-24 20:18:25 [manager.py:68] detokenization recv req id 8 cost time 0.11071157455444336 s -DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=277961791235706237973368595058590240302, time:1750767506.0015595s req_ids:[8] -DEBUG 06-24 20:18:26 [manager.py:391] -ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:25 lightllm_req_id:8 first_token_cost:208.45484733581543ms total_cost_time:208.50014686584473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9294 prompt_cache_len:5151 prompt_cache_ratio:0.5542285345384119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 -DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s -INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.11002850532531738 s -DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=205051634422228306511857245631748981867, time:1750767506.21907s req_ids:[8] -DEBUG 06-24 20:18:26 [manager.py:391] -ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:212.78667449951172ms total_cost_time:212.83292770385742ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9295 prompt_cache_len:5151 prompt_cache_ratio:0.5541689080150619 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 -DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10893726348876953 s -INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.11103367805480957 s -DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=229365423320898490452490995590992021882, time:1750767506.43325s req_ids:[8] -DEBUG 06-24 20:18:26 [manager.py:391] -ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:212.0378017425537ms total_cost_time:212.0823860168457ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9296 prompt_cache_len:5151 prompt_cache_ratio:0.5541092943201377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 -DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10761380195617676 s -INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.10961270332336426 s -DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=234825169486191299430931135766740615847, time:1750767506.6493888s req_ids:[8] -DEBUG 06-24 20:18:26 [manager.py:391] -ERROR 06-24 20:18:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:205.91306686401367ms total_cost_time:205.96718788146973ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:9297 prompt_cache_len:5151 prompt_cache_ratio:0.5540496934494998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 -DEBUG 06-24 20:18:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:26 [manager.py:224] router recive req id 8 cost time 0.10991501808166504 s -INFO 06-24 20:18:26 [manager.py:68] detokenization recv req id 8 cost time 0.11198067665100098 s -DEBUG 06-24 20:18:26 [manager.py:391] Prefill Batch: batch_id=229628546010705264986746719872545878502, time:1750767506.8626723s req_ids:[8] -DEBUG 06-24 20:18:26 [manager.py:391] -DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:26 lightllm_req_id:8 first_token_cost:445.1429843902588ms total_cost_time:445.1878070831299ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9298 prompt_cache_len:5151 prompt_cache_ratio:0.5539901053990105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 -DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.10818338394165039 s -INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.11002349853515625 s -DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=128692169339911869054071585537008417943, time:1750767507.3132699s req_ids:[8] -DEBUG 06-24 20:18:27 [manager.py:391] -ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:186.33222579956055ms total_cost_time:186.37633323669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9299 prompt_cache_len:5151 prompt_cache_ratio:0.5539305301645339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 -DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s -INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.1097872257232666 s -DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=7770038333936731247008112935368640563, time:1750767507.503761s req_ids:[8] -DEBUG 06-24 20:18:27 [manager.py:391] -ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:206.52461051940918ms total_cost_time:206.58516883850098ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:9300 prompt_cache_len:5151 prompt_cache_ratio:0.5538709677419354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 -DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.10881304740905762 s -INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.1107797622680664 s -DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=277655607905477690693443742949139009905, time:1750767507.7287114s req_ids:[8] -DEBUG 06-24 20:18:27 [manager.py:391] -ERROR 06-24 20:18:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:217.7283763885498ms total_cost_time:217.7729606628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9301 prompt_cache_len:5151 prompt_cache_ratio:0.5538114181270831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 -DEBUG 06-24 20:18:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:27 [manager.py:224] router recive req id 8 cost time 0.1086578369140625 s -INFO 06-24 20:18:27 [manager.py:68] detokenization recv req id 8 cost time 0.11062479019165039 s -DEBUG 06-24 20:18:27 [manager.py:391] Prefill Batch: batch_id=297475880019424184746866488691393176874, time:1750767507.9440336s req_ids:[8] -DEBUG 06-24 20:18:27 [manager.py:391] -ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:27 lightllm_req_id:8 first_token_cost:212.8291130065918ms total_cost_time:212.8753662109375ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9302 prompt_cache_len:5151 prompt_cache_ratio:0.5537518813158461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 -DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10807418823242188 s -INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s -DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=241969782247805231732837880334125039754, time:1750767508.1608758s req_ids:[8] -DEBUG 06-24 20:18:28 [manager.py:391] -ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:201.25293731689453ms total_cost_time:201.2951374053955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9303 prompt_cache_len:5151 prompt_cache_ratio:0.5536923573040955 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 -DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10933423042297363 s -INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.11117267608642578 s -DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=246465716219894826919124601409653288353, time:1750767508.365621s req_ids:[8] -DEBUG 06-24 20:18:28 [manager.py:391] -ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:178.969144821167ms total_cost_time:179.01134490966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9304 prompt_cache_len:5151 prompt_cache_ratio:0.5536328460877042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 -DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10780048370361328 s -INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.10975456237792969 s -DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=128021281595461283110850988560047475623, time:1750767508.5656278s req_ids:[8] -DEBUG 06-24 20:18:28 [manager.py:391] -ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:214.80751037597656ms total_cost_time:214.85066413879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9305 prompt_cache_len:5151 prompt_cache_ratio:0.5535733476625471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 -DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10764622688293457 s -INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.10966849327087402 s -DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=202596029832712123338483804742122003053, time:1750767508.7796519s req_ids:[8] -DEBUG 06-24 20:18:28 [manager.py:391] -ERROR 06-24 20:18:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:218.98174285888672ms total_cost_time:219.0268039703369ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9306 prompt_cache_len:5151 prompt_cache_ratio:0.5535138620245004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 -DEBUG 06-24 20:18:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:28 [manager.py:224] router recive req id 8 cost time 0.10808849334716797 s -INFO 06-24 20:18:28 [manager.py:68] detokenization recv req id 8 cost time 0.10995650291442871 s -DEBUG 06-24 20:18:28 [manager.py:391] Prefill Batch: batch_id=300314773720501405388706156056359889001, time:1750767508.998383s req_ids:[8] -DEBUG 06-24 20:18:28 [manager.py:391] -ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:28 lightllm_req_id:8 first_token_cost:416.99838638305664ms total_cost_time:417.04440116882324ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9307 prompt_cache_len:5151 prompt_cache_ratio:0.5534543891694423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 -DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:29 [manager.py:224] router recive req id 8 cost time 0.10834741592407227 s -INFO 06-24 20:18:29 [manager.py:68] detokenization recv req id 8 cost time 0.11021995544433594 s -DEBUG 06-24 20:18:29 [manager.py:391] Prefill Batch: batch_id=156134443441179910532476980756793851694, time:1750767509.417482s req_ids:[8] -DEBUG 06-24 20:18:29 [manager.py:391] -ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:190.11688232421875ms total_cost_time:190.16265869140625ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9308 prompt_cache_len:5151 prompt_cache_ratio:0.5533949290932532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 -DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:29 [manager.py:224] router recive req id 8 cost time 0.11026859283447266 s -INFO 06-24 20:18:29 [manager.py:68] detokenization recv req id 8 cost time 0.11221957206726074 s -DEBUG 06-24 20:18:29 [manager.py:391] Prefill Batch: batch_id=29741714824452702047626919819519651661, time:1750767509.6132147s req_ids:[8] -DEBUG 06-24 20:18:29 [manager.py:391] -ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:206.4664363861084ms total_cost_time:206.5110206604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9309 prompt_cache_len:5151 prompt_cache_ratio:0.5533354817918144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 -DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:29 [manager.py:224] router recive req id 8 cost time 0.10843014717102051 s -INFO 06-24 20:18:29 [manager.py:68] detokenization recv req id 8 cost time 0.11044096946716309 s -DEBUG 06-24 20:18:29 [manager.py:391] Prefill Batch: batch_id=142904512957857027768425589480482727394, time:1750767509.8271465s req_ids:[8] -DEBUG 06-24 20:18:29 [manager.py:391] -ERROR 06-24 20:18:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:209.41567420959473ms total_cost_time:209.4581127166748ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9310 prompt_cache_len:5151 prompt_cache_ratio:0.5532760472610097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 -DEBUG 06-24 20:18:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10794925689697266 s -INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.10990381240844727 s -DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=235328718225795424197232008588189723202, time:1750767510.0445127s req_ids:[8] -DEBUG 06-24 20:18:30 [manager.py:391] -ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:29 lightllm_req_id:8 first_token_cost:208.88113975524902ms total_cost_time:208.92715454101562ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9311 prompt_cache_len:5151 prompt_cache_ratio:0.5532166254967243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 -DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10751795768737793 s -INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.10961222648620605 s -DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=69298779961062641417040540879669527204, time:1750767510.2584512s req_ids:[8] -DEBUG 06-24 20:18:30 [manager.py:391] -ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:18:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 39027.657 tokens/s -DEBUG 06-24 20:18:30 [stats.py:37] Avg prompt tokens throughput: 39019.258 tokens/s -DEBUG 06-24 20:18:30 [stats.py:37] Avg generate tokens throughput: 8.399 tokens/s -INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:209.37609672546387ms total_cost_time:209.42211151123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9312 prompt_cache_len:5151 prompt_cache_ratio:0.5531572164948454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 -DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10875153541564941 s -INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.11082983016967773 s -DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=170612911993628356115839532835378819843, time:1750767510.4744725s req_ids:[8] -DEBUG 06-24 20:18:30 [manager.py:391] -ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:214.00070190429688ms total_cost_time:214.04671669006348ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9313 prompt_cache_len:5151 prompt_cache_ratio:0.5530978202512616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 -DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.1086275577545166 s -INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.11059856414794922 s -DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=34912333179990918236291747625646930366, time:1750767510.6942954s req_ids:[8] -DEBUG 06-24 20:18:30 [manager.py:391] -ERROR 06-24 20:18:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:210.6003761291504ms total_cost_time:210.64352989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9314 prompt_cache_len:5151 prompt_cache_ratio:0.5530384367618638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 -DEBUG 06-24 20:18:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:30 [manager.py:224] router recive req id 8 cost time 0.10834860801696777 s -INFO 06-24 20:18:30 [manager.py:68] detokenization recv req id 8 cost time 0.11030817031860352 s -DEBUG 06-24 20:18:30 [manager.py:391] Prefill Batch: batch_id=181406102390634474070741000622698709786, time:1750767510.9251325s req_ids:[8] -DEBUG 06-24 20:18:30 [manager.py:391] -ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:30 lightllm_req_id:8 first_token_cost:442.89708137512207ms total_cost_time:442.94023513793945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9315 prompt_cache_len:5151 prompt_cache_ratio:0.5529790660225443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 -DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.10807108879089355 s -INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10985684394836426 s -DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=137837470209392868632115177107509220677, time:1750767511.3546028s req_ids:[8] -DEBUG 06-24 20:18:31 [manager.py:391] -ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:186.47313117980957ms total_cost_time:186.51318550109863ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:9316 prompt_cache_len:5151 prompt_cache_ratio:0.5529197080291971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 -DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.1076512336730957 s -INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10954117774963379 s -DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=76203475649878561557883432830425158775, time:1750767511.547423s req_ids:[8] -DEBUG 06-24 20:18:31 [manager.py:391] -ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:207.3805332183838ms total_cost_time:207.4265480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9317 prompt_cache_len:5151 prompt_cache_ratio:0.5528603627777181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 -DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.10741329193115234 s -INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10932397842407227 s -DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=40654939884451686189700077947709147658, time:1750767511.7605965s req_ids:[8] -DEBUG 06-24 20:18:31 [manager.py:391] -ERROR 06-24 20:18:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:205.05857467651367ms total_cost_time:205.10268211364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9318 prompt_cache_len:5151 prompt_cache_ratio:0.5528010302640052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 -DEBUG 06-24 20:18:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:31 [manager.py:224] router recive req id 8 cost time 0.10809087753295898 s -INFO 06-24 20:18:31 [manager.py:68] detokenization recv req id 8 cost time 0.10986971855163574 s -DEBUG 06-24 20:18:31 [manager.py:391] Prefill Batch: batch_id=98541880123704918089254753171065239816, time:1750767511.9777577s req_ids:[8] -DEBUG 06-24 20:18:31 [manager.py:391] -ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:31 lightllm_req_id:8 first_token_cost:213.52076530456543ms total_cost_time:213.56511116027832ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9319 prompt_cache_len:5151 prompt_cache_ratio:0.5527417104839575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 -DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s -INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.10982084274291992 s -DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=76720681156965560673183795948250117124, time:1750767512.1991284s req_ids:[8] -DEBUG 06-24 20:18:32 [manager.py:391] -ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:215.45028686523438ms total_cost_time:215.49510955810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9320 prompt_cache_len:5151 prompt_cache_ratio:0.5526824034334764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 -DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.10825514793395996 s -INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.11030054092407227 s -DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=36275727703857046198552211349742062891, time:1750767512.4153743s req_ids:[8] -DEBUG 06-24 20:18:32 [manager.py:391] -ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:209.02228355407715ms total_cost_time:209.06710624694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9321 prompt_cache_len:5151 prompt_cache_ratio:0.5526231091084648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 -DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.10814690589904785 s -INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.11009573936462402 s -DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=95650636783919024741987046886419605073, time:1750767512.6303039s req_ids:[8] -DEBUG 06-24 20:18:32 [manager.py:391] -ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:209.48004722595215ms total_cost_time:209.53369140625ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:9322 prompt_cache_len:5151 prompt_cache_ratio:0.5525638275048272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 -DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:32 [manager.py:224] router recive req id 8 cost time 0.1079254150390625 s -INFO 06-24 20:18:32 [manager.py:68] detokenization recv req id 8 cost time 0.10995674133300781 s -DEBUG 06-24 20:18:32 [manager.py:391] Prefill Batch: batch_id=289532035324924961885950948972866132163, time:1750767512.8454669s req_ids:[8] -DEBUG 06-24 20:18:32 [manager.py:391] -DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:219.01988983154297ms total_cost_time:219.06447410583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9323 prompt_cache_len:5151 prompt_cache_ratio:0.5525045586184705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 -DEBUG 06-24 20:18:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.10896658897399902 s -INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.1112065315246582 s -DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=92887215491084387930586888064477053107, time:1750767513.0668216s req_ids:[8] -DEBUG 06-24 20:18:33 [manager.py:391] -ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:32 lightllm_req_id:8 first_token_cost:401.4897346496582ms total_cost_time:401.5324115753174ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9324 prompt_cache_len:5151 prompt_cache_ratio:0.5524453024453024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 -DEBUG 06-24 20:18:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.11154389381408691 s -INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.11363959312438965 s -DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=63527918430597368618261515189355407709, time:1750767513.474653s req_ids:[8] -DEBUG 06-24 20:18:33 [manager.py:391] -ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 first_token_cost:205.85203170776367ms total_cost_time:205.89733123779297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9325 prompt_cache_len:5151 prompt_cache_ratio:0.5523860589812333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 -DEBUG 06-24 20:18:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.10879158973693848 s -INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.11087441444396973 s -DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=113150326052703750499165396307381437021, time:1750767513.6899714s req_ids:[8] -DEBUG 06-24 20:18:33 [manager.py:391] -ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 first_token_cost:209.76948738098145ms total_cost_time:209.81287956237793ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9326 prompt_cache_len:5151 prompt_cache_ratio:0.5523268282221746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 -DEBUG 06-24 20:18:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:33 [manager.py:224] router recive req id 8 cost time 0.10864043235778809 s -INFO 06-24 20:18:33 [manager.py:68] detokenization recv req id 8 cost time 0.11075663566589355 s -DEBUG 06-24 20:18:33 [manager.py:391] Prefill Batch: batch_id=62799057106168825341701852419915411600, time:1750767513.908324s req_ids:[8] -DEBUG 06-24 20:18:33 [manager.py:391] -ERROR 06-24 20:18:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:33 lightllm_req_id:8 first_token_cost:209.0778350830078ms total_cost_time:209.13076400756836ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:9327 prompt_cache_len:5151 prompt_cache_ratio:0.5522676101640399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 -DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10821986198425293 s -INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11040449142456055 s -DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=124776404089529678682760472780511614881, time:1750767514.1200025s req_ids:[8] -DEBUG 06-24 20:18:34 [manager.py:391] -ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:208.2815170288086ms total_cost_time:208.3263397216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9328 prompt_cache_len:5151 prompt_cache_ratio:0.5522084048027445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 -DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:34 [batch.py:51] router release req id 8 -INFO 06-24 20:18:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10935163497924805 s -INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.1112971305847168 s -DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=315537113531127486414261624523771889181, time:1750767514.3334146s req_ids:[8] -DEBUG 06-24 20:18:34 [manager.py:391] -ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:212.68773078918457ms total_cost_time:212.73255348205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9329 prompt_cache_len:5151 prompt_cache_ratio:0.5521492121342052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 -DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10846114158630371 s -INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11061692237854004 s -DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=32979992957941090545508609990595984356, time:1750767514.5508199s req_ids:[8] -DEBUG 06-24 20:18:34 [manager.py:391] -ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:207.5488567352295ms total_cost_time:207.59344100952148ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9330 prompt_cache_len:5151 prompt_cache_ratio:0.5520900321543408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 -DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10885047912597656 s -INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11108112335205078 s -DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=170470851419015322578034815119610885777, time:1750767514.772356s req_ids:[8] -DEBUG 06-24 20:18:34 [manager.py:391] -ERROR 06-24 20:18:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:216.66955947875977ms total_cost_time:216.71247482299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9331 prompt_cache_len:5151 prompt_cache_ratio:0.5520308648590719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 -DEBUG 06-24 20:18:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:34 [manager.py:224] router recive req id 8 cost time 0.10915899276733398 s -INFO 06-24 20:18:34 [manager.py:68] detokenization recv req id 8 cost time 0.11110806465148926 s -DEBUG 06-24 20:18:34 [manager.py:391] Prefill Batch: batch_id=178736315697040475494833979055037879660, time:1750767514.986667s req_ids:[8] -DEBUG 06-24 20:18:34 [manager.py:391] -ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:34 lightllm_req_id:8 first_token_cost:209.24687385559082ms total_cost_time:209.2916965484619ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9332 prompt_cache_len:5151 prompt_cache_ratio:0.5519717102443206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 -DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:35 [manager.py:224] router recive req id 8 cost time 0.1079397201538086 s -INFO 06-24 20:18:35 [manager.py:68] detokenization recv req id 8 cost time 0.10989212989807129 s -DEBUG 06-24 20:18:35 [manager.py:391] Prefill Batch: batch_id=247711944520664981764574613839250111909, time:1750767515.2101607s req_ids:[8] -DEBUG 06-24 20:18:35 [manager.py:391] -ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:439.8694038391113ms total_cost_time:439.9127960205078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9333 prompt_cache_len:5151 prompt_cache_ratio:0.5519125683060109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 -DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:35 [manager.py:224] router recive req id 8 cost time 0.10918641090393066 s -INFO 06-24 20:18:35 [manager.py:68] detokenization recv req id 8 cost time 0.11126565933227539 s -DEBUG 06-24 20:18:35 [manager.py:391] Prefill Batch: batch_id=27283157052080378889087002154835988353, time:1750767515.646029s req_ids:[8] -DEBUG 06-24 20:18:35 [manager.py:391] -ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:213.83953094482422ms total_cost_time:213.88578414916992ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9334 prompt_cache_len:5151 prompt_cache_ratio:0.5518534390400686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 -DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:35 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s -INFO 06-24 20:18:35 [manager.py:68] detokenization recv req id 8 cost time 0.11034464836120605 s -DEBUG 06-24 20:18:35 [manager.py:391] Prefill Batch: batch_id=6003502344676777078635133244805083622, time:1750767515.8646538s req_ids:[8] -DEBUG 06-24 20:18:35 [manager.py:391] -ERROR 06-24 20:18:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:199.8591423034668ms total_cost_time:199.90229606628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9335 prompt_cache_len:5151 prompt_cache_ratio:0.551794322442421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 -DEBUG 06-24 20:18:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10879635810852051 s -INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.1105797290802002 s -DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=286500980243029802175118015059578604034, time:1750767516.0720313s req_ids:[8] -DEBUG 06-24 20:18:36 [manager.py:391] -ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:35 lightllm_req_id:8 first_token_cost:207.7338695526123ms total_cost_time:207.75628089904785ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:9336 prompt_cache_len:5151 prompt_cache_ratio:0.5517352185089974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 -DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10849475860595703 s -INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.10988807678222656 s -DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=204267680588585439730087053501275674940, time:1750767516.2864718s req_ids:[8] -DEBUG 06-24 20:18:36 [manager.py:391] -ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:208.96410942077637ms total_cost_time:209.00750160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9337 prompt_cache_len:5151 prompt_cache_ratio:0.5516761272357288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 -DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10759305953979492 s -INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.1095890998840332 s -DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=168800904832590199896499429484171025660, time:1750767516.5007553s req_ids:[8] -DEBUG 06-24 20:18:36 [manager.py:391] -ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:202.87823677062988ms total_cost_time:202.92019844055176ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9338 prompt_cache_len:5151 prompt_cache_ratio:0.5516170486185479 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 -DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.10832834243774414 s -INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.11032247543334961 s -DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=42025246364622437227302490396544114000, time:1750767516.709963s req_ids:[8] -DEBUG 06-24 20:18:36 [manager.py:391] -ERROR 06-24 20:18:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:211.8513584136963ms total_cost_time:211.87448501586914ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:9339 prompt_cache_len:5151 prompt_cache_ratio:0.5515579826533891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 -DEBUG 06-24 20:18:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:36 [manager.py:224] router recive req id 8 cost time 0.1059417724609375 s -INFO 06-24 20:18:36 [manager.py:68] detokenization recv req id 8 cost time 0.10794925689697266 s -DEBUG 06-24 20:18:36 [manager.py:391] Prefill Batch: batch_id=104057965778393181074431870685340784025, time:1750767516.9262657s req_ids:[8] -DEBUG 06-24 20:18:36 [manager.py:391] -ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:36 lightllm_req_id:8 first_token_cost:206.12359046936035ms total_cost_time:206.16888999938965ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9340 prompt_cache_len:5151 prompt_cache_ratio:0.5514989293361884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 -DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.31062889099121094 s -INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.31249213218688965 s -DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=25499900574824098872545825137498387261, time:1750767517.3445113s req_ids:[8] -DEBUG 06-24 20:18:37 [manager.py:391] -ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:421.9679832458496ms total_cost_time:422.0123291015625ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9341 prompt_cache_len:5151 prompt_cache_ratio:0.5514398886628841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 -DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.10869860649108887 s -INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.11075878143310547 s -DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=166555585389564763292371171382386431482, time:1750767517.5735655s req_ids:[8] -DEBUG 06-24 20:18:37 [manager.py:391] -ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:218.86610984802246ms total_cost_time:218.91093254089355ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9342 prompt_cache_len:5151 prompt_cache_ratio:0.5513808606294155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 -DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.10706019401550293 s -INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.10889029502868652 s -DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=130581220891805976431492587565184113702, time:1750767517.790377s req_ids:[8] -DEBUG 06-24 20:18:37 [manager.py:391] -ERROR 06-24 20:18:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:169.97814178466797ms total_cost_time:170.02272605895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9343 prompt_cache_len:5151 prompt_cache_ratio:0.5513218452317243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 -DEBUG 06-24 20:18:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:37 [manager.py:224] router recive req id 8 cost time 0.10769343376159668 s -INFO 06-24 20:18:37 [manager.py:68] detokenization recv req id 8 cost time 0.10934662818908691 s -DEBUG 06-24 20:18:37 [manager.py:391] Prefill Batch: batch_id=109506230651132938181981831808426416821, time:1750767517.9632144s req_ids:[8] -DEBUG 06-24 20:18:37 [manager.py:391] -ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:37 lightllm_req_id:8 first_token_cost:207.94272422790527ms total_cost_time:207.96942710876465ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:9344 prompt_cache_len:5151 prompt_cache_ratio:0.5512628424657534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 -DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10770702362060547 s -INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10967350006103516 s -DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=301399392725949253400427459311522439131, time:1750767518.1788976s req_ids:[8] -DEBUG 06-24 20:18:38 [manager.py:391] -ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:205.74188232421875ms total_cost_time:205.78622817993164ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9345 prompt_cache_len:5151 prompt_cache_ratio:0.5512038523274478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 -DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10753536224365234 s -INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10942363739013672 s -DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=13218318386344334593966450951141386357, time:1750767518.388607s req_ids:[8] -DEBUG 06-24 20:18:38 [manager.py:391] -ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:201.08580589294434ms total_cost_time:201.12872123718262ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9346 prompt_cache_len:5151 prompt_cache_ratio:0.5511448748127541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 -DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10760927200317383 s -INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10945677757263184 s -DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=70530452077290694520202534944311138704, time:1750767518.5950859s req_ids:[8] -DEBUG 06-24 20:18:38 [manager.py:391] -ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:204.32686805725098ms total_cost_time:204.36906814575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9347 prompt_cache_len:5151 prompt_cache_ratio:0.5510859099176206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 -DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:38 [manager.py:224] router recive req id 8 cost time 0.10796475410461426 s -INFO 06-24 20:18:38 [manager.py:68] detokenization recv req id 8 cost time 0.10994291305541992 s -DEBUG 06-24 20:18:38 [manager.py:391] Prefill Batch: batch_id=332207915498795661502667307258914507765, time:1750767518.8071723s req_ids:[8] -DEBUG 06-24 20:18:38 [manager.py:391] -ERROR 06-24 20:18:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:211.93838119506836ms total_cost_time:211.98296546936035ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9348 prompt_cache_len:5151 prompt_cache_ratio:0.5510269576379975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 -DEBUG 06-24 20:18:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10814428329467773 s -INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.11002182960510254 s -DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=310147536882246356484416830328790103349, time:1750767519.0245087s req_ids:[8] -DEBUG 06-24 20:18:39 [manager.py:391] -ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:38 lightllm_req_id:8 first_token_cost:423.3407974243164ms total_cost_time:423.3858585357666ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9349 prompt_cache_len:5151 prompt_cache_ratio:0.5509680179698363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 -DEBUG 06-24 20:18:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10873913764953613 s -INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s -DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=238971824242477823997992666575446067972, time:1750767519.4494917s req_ids:[8] -DEBUG 06-24 20:18:39 [manager.py:391] -ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:204.04553413391113ms total_cost_time:204.09035682678223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9350 prompt_cache_len:5151 prompt_cache_ratio:0.5509090909090909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 -DEBUG 06-24 20:18:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10822677612304688 s -INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.11039257049560547 s -DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=127989588424597249568666812976295338326, time:1750767519.6629457s req_ids:[8] -DEBUG 06-24 20:18:39 [manager.py:391] -ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:211.3635540008545ms total_cost_time:211.4090919494629ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9351 prompt_cache_len:5151 prompt_cache_ratio:0.5508501764517164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 -DEBUG 06-24 20:18:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:39 [manager.py:224] router recive req id 8 cost time 0.10748291015625 s -INFO 06-24 20:18:39 [manager.py:68] detokenization recv req id 8 cost time 0.10936212539672852 s -DEBUG 06-24 20:18:39 [manager.py:391] Prefill Batch: batch_id=25893295861398403746239887537156817257, time:1750767519.8897254s req_ids:[8] -DEBUG 06-24 20:18:39 [manager.py:391] -ERROR 06-24 20:18:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:217.24390983581543ms total_cost_time:217.2873020172119ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9352 prompt_cache_len:5151 prompt_cache_ratio:0.5507912745936698 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 -DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.10776853561401367 s -INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.10974001884460449 s -DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=308317648888152489951184035022976224901, time:1750767520.1026177s req_ids:[8] -DEBUG 06-24 20:18:40 [manager.py:391] -ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:39 lightllm_req_id:8 first_token_cost:207.28707313537598ms total_cost_time:207.33189582824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9353 prompt_cache_len:5151 prompt_cache_ratio:0.5507323853309098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 -DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.1092219352722168 s -INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11133742332458496 s -DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=218821704727674153128340409062844407954, time:1750767520.315385s req_ids:[8] -DEBUG 06-24 20:18:40 [manager.py:391] -DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:18:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 39003.346 tokens/s -DEBUG 06-24 20:18:40 [stats.py:37] Avg prompt tokens throughput: 38994.990 tokens/s -DEBUG 06-24 20:18:40 [stats.py:37] Avg generate tokens throughput: 8.356 tokens/s -INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:205.9774398803711ms total_cost_time:206.0232162475586ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9354 prompt_cache_len:5151 prompt_cache_ratio:0.550673508659397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 -DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s -INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11085867881774902 s -DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=226575243793892133848431750106548334429, time:1750767520.5258307s req_ids:[8] -DEBUG 06-24 20:18:40 [manager.py:391] -ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:205.6262493133545ms total_cost_time:205.68513870239258ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9355 prompt_cache_len:5151 prompt_cache_ratio:0.5506146445750936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 -DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.1089935302734375 s -INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11098623275756836 s -DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=66272512420969035470347482591649382126, time:1750767520.7386668s req_ids:[8] -DEBUG 06-24 20:18:40 [manager.py:391] -ERROR 06-24 20:18:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:208.44388008117676ms total_cost_time:208.49108695983887ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:9356 prompt_cache_len:5151 prompt_cache_ratio:0.5505557930739632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 -DEBUG 06-24 20:18:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:40 [manager.py:224] router recive req id 8 cost time 0.10799527168273926 s -INFO 06-24 20:18:40 [manager.py:68] detokenization recv req id 8 cost time 0.11007523536682129 s -DEBUG 06-24 20:18:40 [manager.py:391] Prefill Batch: batch_id=229036434022024464609025832765807767815, time:1750767520.9517496s req_ids:[8] -DEBUG 06-24 20:18:40 [manager.py:391] -ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:40 lightllm_req_id:8 first_token_cost:207.18979835510254ms total_cost_time:207.23509788513184ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9357 prompt_cache_len:5151 prompt_cache_ratio:0.5504969541519718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 -DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.10941338539123535 s -INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.11125898361206055 s -DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=87114537866521618636260549898888358894, time:1750767521.1657667s req_ids:[8] -DEBUG 06-24 20:18:41 [manager.py:391] -ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:383.85462760925293ms total_cost_time:383.90159606933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9358 prompt_cache_len:5151 prompt_cache_ratio:0.5504381278050866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 -DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.10951876640319824 s -INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.11127638816833496 s -DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=48552633070679790607339979998988219511, time:1750767521.555132s req_ids:[8] -DEBUG 06-24 20:18:41 [manager.py:391] -ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:199.7838020324707ms total_cost_time:199.8128890991211ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:9359 prompt_cache_len:5151 prompt_cache_ratio:0.5503793140292766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 -DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.10812091827392578 s -INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.10992288589477539 s -DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=98609587146583736099450961868090129595, time:1750767521.760574s req_ids:[8] -DEBUG 06-24 20:18:41 [manager.py:391] -ERROR 06-24 20:18:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:213.6392593383789ms total_cost_time:213.6697769165039ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:9360 prompt_cache_len:5151 prompt_cache_ratio:0.5503205128205129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 -DEBUG 06-24 20:18:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:41 [manager.py:224] router recive req id 8 cost time 0.11086583137512207 s -INFO 06-24 20:18:41 [manager.py:68] detokenization recv req id 8 cost time 0.11292386054992676 s -DEBUG 06-24 20:18:41 [manager.py:391] Prefill Batch: batch_id=181592482738529884555520050847254447139, time:1750767521.977503s req_ids:[8] -DEBUG 06-24 20:18:41 [manager.py:391] -ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:41 lightllm_req_id:8 first_token_cost:204.84399795532227ms total_cost_time:204.88858222961426ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9361 prompt_cache_len:5151 prompt_cache_ratio:0.5502617241747677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 -DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.1089169979095459 s -INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11077046394348145 s -DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=267151359357687867087498539843923783841, time:1750767522.1893723s req_ids:[8] -DEBUG 06-24 20:18:42 [manager.py:391] -ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:202.68797874450684ms total_cost_time:202.73113250732422ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9362 prompt_cache_len:5151 prompt_cache_ratio:0.5502029480880154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 -DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.10802245140075684 s -INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11003398895263672 s -DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=133598150387002545302050663775136369399, time:1750767522.3971968s req_ids:[8] -DEBUG 06-24 20:18:42 [manager.py:391] -ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:207.2298526763916ms total_cost_time:207.2741985321045ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9363 prompt_cache_len:5151 prompt_cache_ratio:0.5501441845562319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 -DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.1107182502746582 s -INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11272120475769043 s -DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=157302896165237297351661399855202390176, time:1750767522.6169362s req_ids:[8] -DEBUG 06-24 20:18:42 [manager.py:391] -ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:214.5845890045166ms total_cost_time:214.613676071167ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:9364 prompt_cache_len:5151 prompt_cache_ratio:0.5500854335753951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 -DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:42 [manager.py:224] router recive req id 8 cost time 0.1088252067565918 s -INFO 06-24 20:18:42 [manager.py:68] detokenization recv req id 8 cost time 0.11083674430847168 s -DEBUG 06-24 20:18:42 [manager.py:391] Prefill Batch: batch_id=255563398915611487234220896017189281182, time:1750767522.8328726s req_ids:[8] -DEBUG 06-24 20:18:42 [manager.py:391] -ERROR 06-24 20:18:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:202.47554779052734ms total_cost_time:202.51917839050293ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9365 prompt_cache_len:5151 prompt_cache_ratio:0.5500266951414843 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 -DEBUG 06-24 20:18:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.10991597175598145 s -INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.11195826530456543 s -DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=314184928531339062925874346514626080480, time:1750767523.0391216s req_ids:[8] -DEBUG 06-24 20:18:43 [manager.py:391] -ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:42 lightllm_req_id:8 first_token_cost:206.44807815551758ms total_cost_time:206.49147033691406ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9366 prompt_cache_len:5151 prompt_cache_ratio:0.5499679692504804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 -DEBUG 06-24 20:18:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.30935072898864746 s -INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.3113832473754883 s -DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=139197000991035658328178546052033031064, time:1750767523.4658768s req_ids:[8] -DEBUG 06-24 20:18:43 [manager.py:391] -ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:423.9389896392822ms total_cost_time:423.9802360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9367 prompt_cache_len:5151 prompt_cache_ratio:0.5499092558983666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 -DEBUG 06-24 20:18:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.11211156845092773 s -INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.1140298843383789 s -DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=122832590025042177571353688047423274681, time:1750767523.6851263s req_ids:[8] -DEBUG 06-24 20:18:43 [manager.py:391] -ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:210.6630802154541ms total_cost_time:210.7086181640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9368 prompt_cache_len:5151 prompt_cache_ratio:0.5498505550811272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 -DEBUG 06-24 20:18:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:43 [manager.py:224] router recive req id 8 cost time 0.11084985733032227 s -INFO 06-24 20:18:43 [manager.py:68] detokenization recv req id 8 cost time 0.11280035972595215 s -DEBUG 06-24 20:18:43 [manager.py:391] Prefill Batch: batch_id=120220290847923262846426664980744685994, time:1750767523.8983703s req_ids:[8] -DEBUG 06-24 20:18:43 [manager.py:391] -ERROR 06-24 20:18:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:207.32975006103516ms total_cost_time:207.3824405670166ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:9369 prompt_cache_len:5151 prompt_cache_ratio:0.5497918667947487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 -DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.1091146469116211 s -INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.11119389533996582 s -DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=52079594539728887853133797607053525671, time:1750767524.1128175s req_ids:[8] -DEBUG 06-24 20:18:44 [manager.py:391] -ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:43 lightllm_req_id:8 first_token_cost:208.28843116760254ms total_cost_time:208.33420753479004ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9370 prompt_cache_len:5151 prompt_cache_ratio:0.5497331910352188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 -DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.10828757286071777 s -INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.11014413833618164 s -DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=248199669595846345552003821994408247405, time:1750767524.328374s req_ids:[8] -DEBUG 06-24 20:18:44 [manager.py:391] -ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:169.68417167663574ms total_cost_time:169.7239875793457ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:9371 prompt_cache_len:5151 prompt_cache_ratio:0.5496745277985273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 -DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.10805249214172363 s -INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.10998845100402832 s -DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=225642183276704916980652706082082690436, time:1750767524.502245s req_ids:[8] -DEBUG 06-24 20:18:44 [manager.py:391] -ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:204.4217586517334ms total_cost_time:204.4832706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:9372 prompt_cache_len:5151 prompt_cache_ratio:0.5496158770806658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 -DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.11166977882385254 s -INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.1136016845703125 s -DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=264403272174568280559111571776879344908, time:1750767524.7099493s req_ids:[8] -DEBUG 06-24 20:18:44 [manager.py:391] -ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:198.29106330871582ms total_cost_time:198.3344554901123ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9373 prompt_cache_len:5151 prompt_cache_ratio:0.5495572388776272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 -DEBUG 06-24 20:18:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:44 [manager.py:224] router recive req id 8 cost time 0.10821819305419922 s -INFO 06-24 20:18:44 [manager.py:68] detokenization recv req id 8 cost time 0.11019420623779297 s -DEBUG 06-24 20:18:44 [manager.py:391] Prefill Batch: batch_id=330136369390958067927249883902542692257, time:1750767524.916127s req_ids:[8] -DEBUG 06-24 20:18:44 [manager.py:391] -ERROR 06-24 20:18:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:44 lightllm_req_id:8 first_token_cost:201.60770416259766ms total_cost_time:201.65395736694336ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9374 prompt_cache_len:5151 prompt_cache_ratio:0.5494986131854065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 -DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.10972070693969727 s -INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.11091494560241699 s -DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=199307586066732721805485094986834489661, time:1750767525.138953s req_ids:[8] -DEBUG 06-24 20:18:45 [manager.py:391] -ERROR 06-24 20:18:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:417.9110527038574ms total_cost_time:417.9561138153076ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9375 prompt_cache_len:5151 prompt_cache_ratio:0.54944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 -DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.1088111400604248 s -INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.11104559898376465 s -DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=109023194330465550049776881491839779585, time:1750767525.5454657s req_ids:[8] -DEBUG 06-24 20:18:45 [manager.py:391] -ERROR 06-24 20:18:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:205.98125457763672ms total_cost_time:206.024169921875ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9376 prompt_cache_len:5151 prompt_cache_ratio:0.5493813993174061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 -DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.10960984230041504 s -INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.11161208152770996 s -DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=16195195559669248089523741743603130201, time:1750767525.7616897s req_ids:[8] -DEBUG 06-24 20:18:45 [manager.py:391] -ERROR 06-24 20:18:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:207.08703994750977ms total_cost_time:207.13067054748535ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9377 prompt_cache_len:5151 prompt_cache_ratio:0.5493228111336248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 -DEBUG 06-24 20:18:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:45 [manager.py:224] router recive req id 8 cost time 0.10966229438781738 s -INFO 06-24 20:18:45 [manager.py:68] detokenization recv req id 8 cost time 0.1115868091583252 s -DEBUG 06-24 20:18:45 [manager.py:391] Prefill Batch: batch_id=9615418946363929900161907388398816718, time:1750767525.98121s req_ids:[8] -DEBUG 06-24 20:18:45 [manager.py:391] -ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:45 lightllm_req_id:8 first_token_cost:215.19827842712402ms total_cost_time:215.2423858642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9378 prompt_cache_len:5151 prompt_cache_ratio:0.5492642354446577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 -DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s -INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.11122393608093262 s -DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=135172372355074243145447348303505900241, time:1750767526.1937318s req_ids:[8] -DEBUG 06-24 20:18:46 [manager.py:391] -DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:207.0639133453369ms total_cost_time:207.1084976196289ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9379 prompt_cache_len:5151 prompt_cache_ratio:0.5492056722465082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 -DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.1081092357635498 s -INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.11007857322692871 s -DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=299605073543821601526506511985845887223, time:1750767526.4068062s req_ids:[8] -DEBUG 06-24 20:18:46 [manager.py:391] -ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:207.7922821044922ms total_cost_time:207.83638954162598ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9380 prompt_cache_len:5151 prompt_cache_ratio:0.5491471215351812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 -DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.10841178894042969 s -INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s -DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=4133840953107508809564462407498961705, time:1750767526.621742s req_ids:[8] -DEBUG 06-24 20:18:46 [manager.py:391] -ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:210.71982383728027ms total_cost_time:210.77322959899902ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:9381 prompt_cache_len:5151 prompt_cache_ratio:0.5490885833066838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 -DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:46 [manager.py:224] router recive req id 8 cost time 0.10928010940551758 s -INFO 06-24 20:18:46 [manager.py:68] detokenization recv req id 8 cost time 0.1114192008972168 s -DEBUG 06-24 20:18:46 [manager.py:391] Prefill Batch: batch_id=42426426853873469032012346939218910241, time:1750767526.837774s req_ids:[8] -DEBUG 06-24 20:18:46 [manager.py:391] -ERROR 06-24 20:18:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:209.76805686950684ms total_cost_time:209.81454849243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9382 prompt_cache_len:5151 prompt_cache_ratio:0.549030057557024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:18:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 -DEBUG 06-24 20:18:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.10871076583862305 s -INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11073851585388184 s -DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=182824948200295845000190611337664749412, time:1750767527.0577874s req_ids:[8] -DEBUG 06-24 20:18:47 [manager.py:391] -ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:46 lightllm_req_id:8 first_token_cost:213.98019790649414ms total_cost_time:214.02597427368164ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9383 prompt_cache_len:5151 prompt_cache_ratio:0.5489715442822125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 -DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.10923576354980469 s -INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s -DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=151856997356907522226943261919843619495, time:1750767527.2728353s req_ids:[8] -DEBUG 06-24 20:18:47 [manager.py:391] -ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:374.50599670410156ms total_cost_time:374.54867362976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9384 prompt_cache_len:5151 prompt_cache_ratio:0.5489130434782609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 -DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.1082010269165039 s -INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11027312278747559 s -DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=233871465672500407220371451279721491773, time:1750767527.6512487s req_ids:[8] -DEBUG 06-24 20:18:47 [manager.py:391] -ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:205.95979690551758ms total_cost_time:206.00318908691406ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9385 prompt_cache_len:5151 prompt_cache_ratio:0.5488545551411828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 -DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:47 [manager.py:224] router recive req id 8 cost time 0.10913610458374023 s -INFO 06-24 20:18:47 [manager.py:68] detokenization recv req id 8 cost time 0.11117696762084961 s -DEBUG 06-24 20:18:47 [manager.py:391] Prefill Batch: batch_id=297309026873538633858659643157580554756, time:1750767527.866224s req_ids:[8] -DEBUG 06-24 20:18:47 [manager.py:391] -ERROR 06-24 20:18:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:217.09728240966797ms total_cost_time:217.15736389160156ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:9386 prompt_cache_len:5151 prompt_cache_ratio:0.5487960792669934 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 -DEBUG 06-24 20:18:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.11004376411437988 s -INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.1119997501373291 s -DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=162733558480038583898720770607588723769, time:1750767528.0852365s req_ids:[8] -DEBUG 06-24 20:18:48 [manager.py:391] -ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:47 lightllm_req_id:8 first_token_cost:203.40919494628906ms total_cost_time:203.45306396484375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9387 prompt_cache_len:5151 prompt_cache_ratio:0.5487376158517098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 -DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.1091451644897461 s -INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11127972602844238 s -DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=169623573602419946838825179666390091926, time:1750767528.3049574s req_ids:[8] -DEBUG 06-24 20:18:48 [manager.py:391] -ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:216.15242958068848ms total_cost_time:216.20965003967285ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:9388 prompt_cache_len:5151 prompt_cache_ratio:0.5486791648913507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 -DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:48 [batch.py:51] router release req id 8 -INFO 06-24 20:18:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.109832763671875 s -INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11170387268066406 s -DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=297018891785275048985309785701515093129, time:1750767528.5212207s req_ids:[8] -DEBUG 06-24 20:18:48 [manager.py:391] -ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:203.46713066101074ms total_cost_time:203.51219177246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9389 prompt_cache_len:5151 prompt_cache_ratio:0.5486207263819363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 -DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.10860276222229004 s -INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11062216758728027 s -DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=267181301379852238359380246302893777478, time:1750767528.7373126s req_ids:[8] -DEBUG 06-24 20:18:48 [manager.py:391] -ERROR 06-24 20:18:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:213.06490898132324ms total_cost_time:213.11092376708984ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9390 prompt_cache_len:5151 prompt_cache_ratio:0.5485623003194888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 -DEBUG 06-24 20:18:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:48 [manager.py:224] router recive req id 8 cost time 0.1126554012298584 s -INFO 06-24 20:18:48 [manager.py:68] detokenization recv req id 8 cost time 0.11465835571289062 s -DEBUG 06-24 20:18:48 [manager.py:391] Prefill Batch: batch_id=186548386531207567525495565350509579688, time:1750767528.9518423s req_ids:[8] -DEBUG 06-24 20:18:48 [manager.py:391] -ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:48 lightllm_req_id:8 first_token_cost:207.42154121398926ms total_cost_time:207.46517181396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9391 prompt_cache_len:5151 prompt_cache_ratio:0.5485038867000319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 -DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.10889291763305664 s -INFO 06-24 20:18:49 [manager.py:68] detokenization recv req id 8 cost time 0.11064529418945312 s -DEBUG 06-24 20:18:49 [manager.py:391] Prefill Batch: batch_id=89390263682661142275801071360181944707, time:1750767529.1626248s req_ids:[8] -DEBUG 06-24 20:18:49 [manager.py:391] -ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:203.28426361083984ms total_cost_time:203.32646369934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9392 prompt_cache_len:5151 prompt_cache_ratio:0.5484454855195912 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 -DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.3094174861907959 s -INFO 06-24 20:18:49 [manager.py:68] detokenization recv req id 8 cost time 0.31140756607055664 s -DEBUG 06-24 20:18:49 [manager.py:391] Prefill Batch: batch_id=297934755376552635271450498841153617337, time:1750767529.5788825s req_ids:[8] -DEBUG 06-24 20:18:49 [manager.py:391] -ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:417.18554496765137ms total_cost_time:417.2508716583252ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:9393 prompt_cache_len:5151 prompt_cache_ratio:0.5483870967741935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 -DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.10480403900146484 s -INFO 06-24 20:18:49 [manager.py:68] detokenization recv req id 8 cost time 0.10668563842773438 s -DEBUG 06-24 20:18:49 [manager.py:391] Prefill Batch: batch_id=132068247613130601702428966680131553729, time:1750767529.795925s req_ids:[8] -DEBUG 06-24 20:18:49 [manager.py:391] -ERROR 06-24 20:18:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:209.02538299560547ms total_cost_time:209.06972885131836ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9394 prompt_cache_len:5151 prompt_cache_ratio:0.548328720459868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 -DEBUG 06-24 20:18:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:49 [manager.py:224] router recive req id 8 cost time 0.10793519020080566 s -INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972857475280762 s -DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=316164893744554260328226341423848644545, time:1750767530.009122s req_ids:[8] -DEBUG 06-24 20:18:50 [manager.py:391] -ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:49 lightllm_req_id:8 first_token_cost:207.74507522583008ms total_cost_time:207.76605606079102ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:9395 prompt_cache_len:5151 prompt_cache_ratio:0.548270356572645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 -DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.11018586158752441 s -INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.11193156242370605 s -DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=315354061486305444181261415085774655567, time:1750767530.2287476s req_ids:[8] -DEBUG 06-24 20:18:50 [manager.py:391] -ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:210.45613288879395ms total_cost_time:210.50024032592773ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9396 prompt_cache_len:5151 prompt_cache_ratio:0.5482120051085568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 -DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.10770535469055176 s -INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.10960245132446289 s -DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=114158765976512418394771001316948076572, time:1750767530.439466s req_ids:[8] -DEBUG 06-24 20:18:50 [manager.py:391] -DEBUG 06-24 20:18:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 40169.092 tokens/s -DEBUG 06-24 20:18:50 [stats.py:37] Avg prompt tokens throughput: 40160.625 tokens/s -DEBUG 06-24 20:18:50 [stats.py:37] Avg generate tokens throughput: 8.467 tokens/s -ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:208.68372917175293ms total_cost_time:208.72807502746582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9397 prompt_cache_len:5151 prompt_cache_ratio:0.5481536660636374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 -DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.10834312438964844 s -INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s -DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=139199871069183012273871281835521743878, time:1750767530.6544921s req_ids:[8] -DEBUG 06-24 20:18:50 [manager.py:391] -ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:209.3522548675537ms total_cost_time:209.39898490905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9398 prompt_cache_len:5151 prompt_cache_ratio:0.5480953394339221 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 -DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:50 [manager.py:224] router recive req id 8 cost time 0.11009550094604492 s -INFO 06-24 20:18:50 [manager.py:68] detokenization recv req id 8 cost time 0.11172819137573242 s -DEBUG 06-24 20:18:50 [manager.py:391] Prefill Batch: batch_id=199532604982527366255059577840737705965, time:1750767530.870041s req_ids:[8] -DEBUG 06-24 20:18:50 [manager.py:391] -ERROR 06-24 20:18:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:211.93289756774902ms total_cost_time:211.98725700378418ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:9399 prompt_cache_len:5151 prompt_cache_ratio:0.5480370252154485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 -DEBUG 06-24 20:18:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10924887657165527 s -INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.1113734245300293 s -DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=182991710461050797466267311570039038538, time:1750767531.0863528s req_ids:[8] -DEBUG 06-24 20:18:51 [manager.py:391] -ERROR 06-24 20:18:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:50 lightllm_req_id:8 first_token_cost:206.51841163635254ms total_cost_time:206.57610893249512ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:9400 prompt_cache_len:5151 prompt_cache_ratio:0.5479787234042554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 -DEBUG 06-24 20:18:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10970067977905273 s -INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.11171579360961914 s -DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=339655276680888302437381787282246012443, time:1750767531.2998667s req_ids:[8] -DEBUG 06-24 20:18:51 [manager.py:391] -ERROR 06-24 20:18:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 first_token_cost:394.90699768066406ms total_cost_time:394.95062828063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9401 prompt_cache_len:5151 prompt_cache_ratio:0.5479204339963833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 -DEBUG 06-24 20:18:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10926032066345215 s -INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.11127591133117676 s -DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=262788721655121252534215125111773818001, time:1750767531.6992686s req_ids:[8] -DEBUG 06-24 20:18:51 [manager.py:391] -ERROR 06-24 20:18:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 first_token_cost:206.4039707183838ms total_cost_time:206.44879341125488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9402 prompt_cache_len:5151 prompt_cache_ratio:0.5478621569878749 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 -DEBUG 06-24 20:18:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:51 [manager.py:224] router recive req id 8 cost time 0.10871410369873047 s -INFO 06-24 20:18:51 [manager.py:68] detokenization recv req id 8 cost time 0.11070585250854492 s -DEBUG 06-24 20:18:51 [manager.py:391] Prefill Batch: batch_id=292959802285941863907899806875229480893, time:1750767531.9140542s req_ids:[8] -DEBUG 06-24 20:18:51 [manager.py:391] -ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:51 lightllm_req_id:8 first_token_cost:219.57707405090332ms total_cost_time:219.6202278137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9403 prompt_cache_len:5151 prompt_cache_ratio:0.547803892374774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 -DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.10837268829345703 s -INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.11033439636230469 s -DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=187459358324289468524560066457744687584, time:1750767532.1515164s req_ids:[8] -DEBUG 06-24 20:18:52 [manager.py:391] -DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:223.4935760498047ms total_cost_time:223.5565185546875ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:9404 prompt_cache_len:5151 prompt_cache_ratio:0.5477456401531263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 -DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.10851550102233887 s -INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.11053824424743652 s -DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=261923118487710086690324391739921401622, time:1750767532.3681855s req_ids:[8] -DEBUG 06-24 20:18:52 [manager.py:391] -ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:210.22963523864746ms total_cost_time:210.27398109436035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9405 prompt_cache_len:5151 prompt_cache_ratio:0.5476874003189792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 -DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.10806679725646973 s -INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.11008000373840332 s -DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=203780571424773688482413383182967370344, time:1750767532.5842621s req_ids:[8] -DEBUG 06-24 20:18:52 [manager.py:391] -ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:208.38236808776855ms total_cost_time:208.42719078063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9406 prompt_cache_len:5151 prompt_cache_ratio:0.5476291728683819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 -DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:52 [manager.py:224] router recive req id 8 cost time 0.1079559326171875 s -INFO 06-24 20:18:52 [manager.py:68] detokenization recv req id 8 cost time 0.10992765426635742 s -DEBUG 06-24 20:18:52 [manager.py:391] Prefill Batch: batch_id=92210435780845335127759300314314792120, time:1750767532.7988975s req_ids:[8] -DEBUG 06-24 20:18:52 [manager.py:391] -ERROR 06-24 20:18:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:207.23271369934082ms total_cost_time:207.2765827178955ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9407 prompt_cache_len:5151 prompt_cache_ratio:0.547570957797385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 -DEBUG 06-24 20:18:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.10776758193969727 s -INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.10970354080200195 s -DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=278687665012442664750509981945815305378, time:1750767533.0158496s req_ids:[8] -DEBUG 06-24 20:18:53 [manager.py:391] -ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:52 lightllm_req_id:8 first_token_cost:216.22300148010254ms total_cost_time:216.26663208007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9408 prompt_cache_len:5151 prompt_cache_ratio:0.5475127551020408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 -INFO 06-24 20:18:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:18:53 [statics_utils.py:24] mean first cost: 228.46019012876977 ms -INFO 06-24 20:18:53 [statics_utils.py:24] mean per token cost: 0.07230114423918113 ms -INFO 06-24 20:18:53 [manager.py:620] left req id 8can release True refcount 3 -DEBUG 06-24 20:18:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.3087015151977539 s -INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.31076693534851074 s -DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=265539280178491077772461977828877762888, time:1750767533.4396644s req_ids:[8] -DEBUG 06-24 20:18:53 [manager.py:391] -ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:420.426607131958ms total_cost_time:420.47119140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9409 prompt_cache_len:5151 prompt_cache_ratio:0.5474545647784037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 -DEBUG 06-24 20:18:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.10936355590820312 s -INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.11156678199768066 s -DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=97403972049179491936060101463389399531, time:1750767533.6609626s req_ids:[8] -DEBUG 06-24 20:18:53 [manager.py:391] -DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:211.20238304138184ms total_cost_time:211.24815940856934ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9410 prompt_cache_len:5151 prompt_cache_ratio:0.5473963868225292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 -DEBUG 06-24 20:18:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:53 [manager.py:224] router recive req id 8 cost time 0.10793066024780273 s -INFO 06-24 20:18:53 [manager.py:68] detokenization recv req id 8 cost time 0.11013913154602051 s -DEBUG 06-24 20:18:53 [manager.py:391] Prefill Batch: batch_id=31175632113729125041931272519313904898, time:1750767533.8777382s req_ids:[8] -DEBUG 06-24 20:18:53 [manager.py:391] -ERROR 06-24 20:18:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:213.53840827941895ms total_cost_time:213.60445022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.06604194641113281ms prompt_token_num:9411 prompt_cache_len:5151 prompt_cache_ratio:0.5473382212304749 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 -DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10778617858886719 s -INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.1097252368927002 s -DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=4729224428327037314363497422221951586, time:1750767534.0984704s req_ids:[8] -DEBUG 06-24 20:18:54 [manager.py:391] -ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:53 lightllm_req_id:8 first_token_cost:207.56125450134277ms total_cost_time:207.60726928710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9412 prompt_cache_len:5151 prompt_cache_ratio:0.5472800679983001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 -DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.1089482307434082 s -INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11090445518493652 s -DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=321831177419449398569006073744884989978, time:1750767534.3093169s req_ids:[8] -DEBUG 06-24 20:18:54 [manager.py:391] -ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:207.1363925933838ms total_cost_time:207.1833610534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9413 prompt_cache_len:5151 prompt_cache_ratio:0.5472219271220652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 -DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10808348655700684 s -INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11010575294494629 s -DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=89893149663159545215300810750263035418, time:1750767534.5232947s req_ids:[8] -DEBUG 06-24 20:18:54 [manager.py:391] -ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:208.73236656188965ms total_cost_time:208.77671241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9414 prompt_cache_len:5151 prompt_cache_ratio:0.547163798597833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 -DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10962390899658203 s -INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11159110069274902 s -DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=54683786452400120704727034693169809602, time:1750767534.7369127s req_ids:[8] -DEBUG 06-24 20:18:54 [manager.py:391] -ERROR 06-24 20:18:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:210.49952507019043ms total_cost_time:210.54434776306152ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9415 prompt_cache_len:5151 prompt_cache_ratio:0.5471056824216676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 -DEBUG 06-24 20:18:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:54 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s -INFO 06-24 20:18:54 [manager.py:68] detokenization recv req id 8 cost time 0.11021161079406738 s -DEBUG 06-24 20:18:54 [manager.py:391] Prefill Batch: batch_id=148011747951849578399687801810939653708, time:1750767534.964154s req_ids:[8] -DEBUG 06-24 20:18:54 [manager.py:391] -ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:54 lightllm_req_id:8 first_token_cost:219.05231475830078ms total_cost_time:219.09523010253906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9416 prompt_cache_len:5151 prompt_cache_ratio:0.5470475785896347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 -DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10786843299865723 s -INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.10985231399536133 s -DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=204865736885565238525231608572197216964, time:1750767535.1773098s req_ids:[8] -DEBUG 06-24 20:18:55 [manager.py:391] -ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:377.5053024291992ms total_cost_time:377.5506019592285ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9417 prompt_cache_len:5151 prompt_cache_ratio:0.5469894870978018 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 -DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:55 [batch.py:51] router release req id 8 -INFO 06-24 20:18:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10969090461730957 s -INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.11163568496704102 s -DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=11092848218925159794959842236554362811, time:1750767535.5578635s req_ids:[8] -DEBUG 06-24 20:18:55 [manager.py:391] -ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:197.92580604553223ms total_cost_time:198.03094863891602ms,out_token_counter:1 mean_per_token_cost_time: 0.10514259338378906ms prompt_token_num:9418 prompt_cache_len:5151 prompt_cache_ratio:0.5469314079422383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 -DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10831356048583984 s -INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.11027789115905762 s -DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=319850299488603346951369610268920219756, time:1750767535.76459s req_ids:[8] -DEBUG 06-24 20:18:55 [manager.py:391] -ERROR 06-24 20:18:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:206.06017112731934ms total_cost_time:206.10618591308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9419 prompt_cache_len:5151 prompt_cache_ratio:0.5468733411190148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 -DEBUG 06-24 20:18:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:55 [manager.py:224] router recive req id 8 cost time 0.10834050178527832 s -INFO 06-24 20:18:55 [manager.py:68] detokenization recv req id 8 cost time 0.11037397384643555 s -DEBUG 06-24 20:18:55 [manager.py:391] Prefill Batch: batch_id=238603301335089879016021315979476664983, time:1750767535.977811s req_ids:[8] -DEBUG 06-24 20:18:55 [manager.py:391] -ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:55 lightllm_req_id:8 first_token_cost:211.6084098815918ms total_cost_time:211.6537094116211ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9420 prompt_cache_len:5151 prompt_cache_ratio:0.5468152866242039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 -DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10863232612609863 s -INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.11059308052062988 s -DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=53781545484616126902212899771880517662, time:1750767536.19361s req_ids:[8] -DEBUG 06-24 20:18:56 [manager.py:391] -ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:205.86609840393066ms total_cost_time:205.90996742248535ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9421 prompt_cache_len:5151 prompt_cache_ratio:0.5467572444538796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 -DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10859203338623047 s -INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.11063337326049805 s -DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=57756363042208772537728315925561342116, time:1750767536.4048595s req_ids:[8] -DEBUG 06-24 20:18:56 [manager.py:391] -ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:203.31430435180664ms total_cost_time:203.35888862609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9422 prompt_cache_len:5151 prompt_cache_ratio:0.546699214604118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 -DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10839700698852539 s -INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.1109018325805664 s -DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=305158957503334160047298534300741861132, time:1750767536.6167164s req_ids:[8] -DEBUG 06-24 20:18:56 [manager.py:391] -ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:210.76416969299316ms total_cost_time:210.80994606018066ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9423 prompt_cache_len:5151 prompt_cache_ratio:0.5466411970709965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 -DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:56 [manager.py:224] router recive req id 8 cost time 0.10758256912231445 s -INFO 06-24 20:18:56 [manager.py:68] detokenization recv req id 8 cost time 0.1102304458618164 s -DEBUG 06-24 20:18:56 [manager.py:391] Prefill Batch: batch_id=183581129594587028601362430169446285478, time:1750767536.8312707s req_ids:[8] -DEBUG 06-24 20:18:56 [manager.py:391] -ERROR 06-24 20:18:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:205.8863639831543ms total_cost_time:205.9321403503418ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9424 prompt_cache_len:5151 prompt_cache_ratio:0.5465831918505942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 -DEBUG 06-24 20:18:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10754275321960449 s -INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.10944366455078125 s -DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=22903453100214854687007838486022663608, time:1750767537.0434313s req_ids:[8] -DEBUG 06-24 20:18:57 [manager.py:391] -ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:56 lightllm_req_id:8 first_token_cost:207.73720741271973ms total_cost_time:207.7794075012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9425 prompt_cache_len:5151 prompt_cache_ratio:0.5465251989389921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 -DEBUG 06-24 20:18:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10801863670349121 s -INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.11040210723876953 s -DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=152759777809005182149710808316383880828, time:1750767537.257061s req_ids:[8] -DEBUG 06-24 20:18:57 [manager.py:391] -ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:412.3709201812744ms total_cost_time:412.4152660369873ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9426 prompt_cache_len:5151 prompt_cache_ratio:0.5464672183322724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 -DEBUG 06-24 20:18:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10882282257080078 s -INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.11108613014221191 s -DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=325258290724073984265378628684246156202, time:1750767537.6715803s req_ids:[8] -DEBUG 06-24 20:18:57 [manager.py:391] -ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:202.681303024292ms total_cost_time:202.75163650512695ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:9427 prompt_cache_len:5151 prompt_cache_ratio:0.5464092500265195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 -DEBUG 06-24 20:18:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:57 [manager.py:224] router recive req id 8 cost time 0.10939645767211914 s -INFO 06-24 20:18:57 [manager.py:68] detokenization recv req id 8 cost time 0.11139297485351562 s -DEBUG 06-24 20:18:57 [manager.py:391] Prefill Batch: batch_id=47817126347833342274045891292977599211, time:1750767537.8843465s req_ids:[8] -DEBUG 06-24 20:18:57 [manager.py:391] -ERROR 06-24 20:18:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:205.57594299316406ms total_cost_time:205.62171936035156ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9428 prompt_cache_len:5151 prompt_cache_ratio:0.5463512940178192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 -DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10871505737304688 s -INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.110504150390625 s -DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=222374452230323870745990206841735553668, time:1750767538.0955043s req_ids:[8] -DEBUG 06-24 20:18:58 [manager.py:391] -ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:57 lightllm_req_id:8 first_token_cost:205.81841468811035ms total_cost_time:205.86776733398438ms,out_token_counter:1 mean_per_token_cost_time: 0.04935264587402344ms prompt_token_num:9429 prompt_cache_len:5151 prompt_cache_ratio:0.546293350302259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 -DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s -INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.11097216606140137 s -DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=153768267260050529550081842154272344564, time:1750767538.3067088s req_ids:[8] -DEBUG 06-24 20:18:58 [manager.py:391] -ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:209.83529090881348ms total_cost_time:209.87868309020996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9430 prompt_cache_len:5151 prompt_cache_ratio:0.5462354188759279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 -DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.1081700325012207 s -INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.11021614074707031 s -DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=303762531824133431773636501771497935694, time:1750767538.5231895s req_ids:[8] -DEBUG 06-24 20:18:58 [manager.py:391] -ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:207.83543586730957ms total_cost_time:207.88073539733887ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9431 prompt_cache_len:5151 prompt_cache_ratio:0.5461774997349168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 -DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10811829566955566 s -INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.1098334789276123 s -DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=65437858423016728761309742618881641393, time:1750767538.7374206s req_ids:[8] -DEBUG 06-24 20:18:58 [manager.py:391] -ERROR 06-24 20:18:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:207.05008506774902ms total_cost_time:207.0937156677246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9432 prompt_cache_len:5151 prompt_cache_ratio:0.5461195928753181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 -DEBUG 06-24 20:18:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:58 [manager.py:224] router recive req id 8 cost time 0.10936498641967773 s -INFO 06-24 20:18:58 [manager.py:68] detokenization recv req id 8 cost time 0.11126136779785156 s -DEBUG 06-24 20:18:58 [manager.py:391] Prefill Batch: batch_id=108333826383628663131756463913541451914, time:1750767538.949054s req_ids:[8] -DEBUG 06-24 20:18:58 [manager.py:391] -ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:58 lightllm_req_id:8 first_token_cost:209.60068702697754ms total_cost_time:209.64622497558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9433 prompt_cache_len:5151 prompt_cache_ratio:0.5460616982932259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 -DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.10907554626464844 s -INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.1108694076538086 s -DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=314316401261113685739685346255097421033, time:1750767539.1785715s req_ids:[8] -DEBUG 06-24 20:18:59 [manager.py:391] -ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:370.0385093688965ms total_cost_time:370.0826168060303ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9434 prompt_cache_len:5151 prompt_cache_ratio:0.546003815984736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 -DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.10898995399475098 s -INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.11100888252258301 s -DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=268943082935752230655200174301782136504, time:1750767539.5378783s req_ids:[8] -DEBUG 06-24 20:18:59 [manager.py:391] -DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:18:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:200.51336288452148ms total_cost_time:200.55747032165527ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9435 prompt_cache_len:5151 prompt_cache_ratio:0.5459459459459459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 -DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.107452392578125 s -INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s -DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=17277010042072423231340309437570342852, time:1750767539.746249s req_ids:[8] -DEBUG 06-24 20:18:59 [manager.py:391] -ERROR 06-24 20:18:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:18:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:18:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:206.39896392822266ms total_cost_time:206.44283294677734ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9436 prompt_cache_len:5151 prompt_cache_ratio:0.5458880881729546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:18:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 -DEBUG 06-24 20:18:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:18:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:18:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:18:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:18:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:18:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:18:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:18:59 [manager.py:224] router recive req id 8 cost time 0.10831236839294434 s -INFO 06-24 20:18:59 [manager.py:68] detokenization recv req id 8 cost time 0.1102139949798584 s -DEBUG 06-24 20:18:59 [manager.py:391] Prefill Batch: batch_id=59702198927824160580232890571734388647, time:1750767539.9583795s req_ids:[8] -DEBUG 06-24 20:18:59 [manager.py:391] -ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:18:59 lightllm_req_id:8 first_token_cost:206.91752433776855ms total_cost_time:206.9721221923828ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:9437 prompt_cache_len:5151 prompt_cache_ratio:0.5458302426618629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 -DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10961127281188965 s -INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.11148548126220703 s -DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=242225668455424046068694241133695064255, time:1750767540.1704946s req_ids:[8] -DEBUG 06-24 20:19:00 [manager.py:391] -ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:205.0337791442871ms total_cost_time:205.0797939300537ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9438 prompt_cache_len:5151 prompt_cache_ratio:0.545772409408773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 -DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10842084884643555 s -INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s -DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=263133332819952811469962226474279628575, time:1750767540.383601s req_ids:[8] -DEBUG 06-24 20:19:00 [manager.py:391] -ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:19:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 39441.173 tokens/s -DEBUG 06-24 20:19:00 [stats.py:37] Avg prompt tokens throughput: 39432.700 tokens/s -DEBUG 06-24 20:19:00 [stats.py:37] Avg generate tokens throughput: 8.473 tokens/s -INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:207.89837837219238ms total_cost_time:207.94177055358887ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9439 prompt_cache_len:5151 prompt_cache_ratio:0.5457145884097891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 -DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10970330238342285 s -INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.1118171215057373 s -DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=226329075419561500816913074426180506792, time:1750767540.6078472s req_ids:[8] -DEBUG 06-24 20:19:00 [manager.py:391] -ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:222.59521484375ms total_cost_time:222.63836860656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9440 prompt_cache_len:5151 prompt_cache_ratio:0.545656779661017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 -DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:00 [manager.py:224] router recive req id 8 cost time 0.10817837715148926 s -INFO 06-24 20:19:00 [manager.py:68] detokenization recv req id 8 cost time 0.11065411567687988 s -DEBUG 06-24 20:19:00 [manager.py:391] Prefill Batch: batch_id=238816902533675331945785924356883807428, time:1750767540.8258314s req_ids:[8] -DEBUG 06-24 20:19:00 [manager.py:391] -ERROR 06-24 20:19:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:209.20634269714355ms total_cost_time:209.25259590148926ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9441 prompt_cache_len:5151 prompt_cache_ratio:0.5455989831585637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 -DEBUG 06-24 20:19:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.10836386680603027 s -INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.1103971004486084 s -DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=15733038250147580558669482168204162376, time:1750767541.040767s req_ids:[8] -DEBUG 06-24 20:19:01 [manager.py:391] -ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:00 lightllm_req_id:8 first_token_cost:207.83638954162598ms total_cost_time:207.88192749023438ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9442 prompt_cache_len:5151 prompt_cache_ratio:0.5455411988985385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 -DEBUG 06-24 20:19:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.3105044364929199 s -INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.3126044273376465 s -DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=271529283589110587012337969316096141285, time:1750767541.4614305s req_ids:[8] -DEBUG 06-24 20:19:01 [manager.py:391] -ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:419.94166374206543ms total_cost_time:419.9855327606201ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9443 prompt_cache_len:5151 prompt_cache_ratio:0.5454834268770518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 -DEBUG 06-24 20:19:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.10896015167236328 s -INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.11089801788330078 s -DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=158195400641797000310195576083438978260, time:1750767541.6831138s req_ids:[8] -DEBUG 06-24 20:19:01 [manager.py:391] -ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:208.571195602417ms total_cost_time:208.61458778381348ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9444 prompt_cache_len:5151 prompt_cache_ratio:0.545425667090216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 -DEBUG 06-24 20:19:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:01 [manager.py:224] router recive req id 8 cost time 0.10894489288330078 s -INFO 06-24 20:19:01 [manager.py:68] detokenization recv req id 8 cost time 0.11076998710632324 s -DEBUG 06-24 20:19:01 [manager.py:391] Prefill Batch: batch_id=282688474064382112335567784890489238390, time:1750767541.8951578s req_ids:[8] -DEBUG 06-24 20:19:01 [manager.py:391] -ERROR 06-24 20:19:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:209.63048934936523ms total_cost_time:209.67650413513184ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9445 prompt_cache_len:5151 prompt_cache_ratio:0.5453679195341451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 -DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s -INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.11104369163513184 s -DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=291015430165318849778810117084862349157, time:1750767542.1092677s req_ids:[8] -DEBUG 06-24 20:19:02 [manager.py:391] -ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:01 lightllm_req_id:8 first_token_cost:206.47096633911133ms total_cost_time:206.5136432647705ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9446 prompt_cache_len:5151 prompt_cache_ratio:0.5453101842049545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 -DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10803437232971191 s -INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.10984230041503906 s -DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=61872181520849338771502150678026515647, time:1750767542.3224409s req_ids:[8] -DEBUG 06-24 20:19:02 [manager.py:391] -ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:208.8174819946289ms total_cost_time:208.8611125946045ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9447 prompt_cache_len:5151 prompt_cache_ratio:0.5452524610987615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 -DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10890746116638184 s -INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.11090087890625 s -DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=116608658700308673101809449414433011588, time:1750767542.5380657s req_ids:[8] -DEBUG 06-24 20:19:02 [manager.py:391] -ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:208.06384086608887ms total_cost_time:208.10770988464355ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9448 prompt_cache_len:5151 prompt_cache_ratio:0.5451947502116851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 -DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.1086874008178711 s -INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.11060333251953125 s -DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=295424685662002956594976529346418163893, time:1750767542.7639275s req_ids:[8] -DEBUG 06-24 20:19:02 [manager.py:391] -ERROR 06-24 20:19:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:223.24848175048828ms total_cost_time:223.29306602478027ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9449 prompt_cache_len:5151 prompt_cache_ratio:0.5451370515398455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 -DEBUG 06-24 20:19:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:02 [manager.py:224] router recive req id 8 cost time 0.10746431350708008 s -INFO 06-24 20:19:02 [manager.py:68] detokenization recv req id 8 cost time 0.10933303833007812 s -DEBUG 06-24 20:19:02 [manager.py:391] Prefill Batch: batch_id=295167941398399488695322548363752242229, time:1750767542.9805148s req_ids:[8] -DEBUG 06-24 20:19:02 [manager.py:391] -ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:02 lightllm_req_id:8 first_token_cost:208.5418701171875ms total_cost_time:208.5862159729004ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9450 prompt_cache_len:5151 prompt_cache_ratio:0.545079365079365 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 -DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s -INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.11149716377258301 s -DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=4268324458217295308772877589307294260, time:1750767543.1943116s req_ids:[8] -DEBUG 06-24 20:19:03 [manager.py:391] -ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:362.43414878845215ms total_cost_time:362.47873306274414ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9451 prompt_cache_len:5151 prompt_cache_ratio:0.5450216908263675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 -DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.1077430248260498 s -INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.10975003242492676 s -DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=96906300838378948526988093718372400457, time:1750767543.559697s req_ids:[8] -DEBUG 06-24 20:19:03 [manager.py:391] -ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:203.8254737854004ms total_cost_time:203.8705348968506ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9452 prompt_cache_len:5151 prompt_cache_ratio:0.5449640287769785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 -DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.11033368110656738 s -INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.11282134056091309 s -DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=199669966303887786433219487880555653168, time:1750767543.7714417s req_ids:[8] -DEBUG 06-24 20:19:03 [manager.py:391] -ERROR 06-24 20:19:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:205.60431480407715ms total_cost_time:205.64723014831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9453 prompt_cache_len:5151 prompt_cache_ratio:0.5449063789273246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 -DEBUG 06-24 20:19:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:03 [manager.py:224] router recive req id 8 cost time 0.10771346092224121 s -INFO 06-24 20:19:03 [manager.py:68] detokenization recv req id 8 cost time 0.1096799373626709 s -DEBUG 06-24 20:19:03 [manager.py:391] Prefill Batch: batch_id=247111579861919871318120546939081245571, time:1750767543.982007s req_ids:[8] -DEBUG 06-24 20:19:03 [manager.py:391] -ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:03 lightllm_req_id:8 first_token_cost:206.03466033935547ms total_cost_time:206.07829093933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9454 prompt_cache_len:5151 prompt_cache_ratio:0.544848741273535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 -DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.1084599494934082 s -INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.11040973663330078 s -DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=273016106680339891853076998086805456151, time:1750767544.1938465s req_ids:[8] -DEBUG 06-24 20:19:04 [manager.py:391] -ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:207.31472969055176ms total_cost_time:207.35931396484375ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9455 prompt_cache_len:5151 prompt_cache_ratio:0.5447911158117398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 -DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.1080021858215332 s -INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.10993194580078125 s -DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=240333772975707828403398244207926738996, time:1750767544.408639s req_ids:[8] -DEBUG 06-24 20:19:04 [manager.py:391] -ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:207.71169662475586ms total_cost_time:207.75580406188965ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9456 prompt_cache_len:5151 prompt_cache_ratio:0.5447335025380711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 -DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.10868000984191895 s -INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.11065006256103516 s -DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=209200369564992502423188406848762028987, time:1750767544.6251745s req_ids:[8] -DEBUG 06-24 20:19:04 [manager.py:391] -ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:210.2367877960205ms total_cost_time:210.2828025817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9457 prompt_cache_len:5151 prompt_cache_ratio:0.5446759014486624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 -DEBUG 06-24 20:19:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:04 [manager.py:224] router recive req id 8 cost time 0.10830855369567871 s -INFO 06-24 20:19:04 [manager.py:68] detokenization recv req id 8 cost time 0.11050295829772949 s -DEBUG 06-24 20:19:04 [manager.py:391] Prefill Batch: batch_id=324149106799743092640747740386183345474, time:1750767544.8375664s req_ids:[8] -DEBUG 06-24 20:19:04 [manager.py:391] -ERROR 06-24 20:19:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:209.72561836242676ms total_cost_time:209.76996421813965ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9458 prompt_cache_len:5151 prompt_cache_ratio:0.544618312539649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 -DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.30976319313049316 s -INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.3116185665130615 s -DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=41503862466856879213908494385940195898, time:1750767545.2591815s req_ids:[8] -DEBUG 06-24 20:19:05 [manager.py:391] -ERROR 06-24 20:19:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:04 lightllm_req_id:8 first_token_cost:416.28026962280273ms total_cost_time:416.3224697113037ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9459 prompt_cache_len:5151 prompt_cache_ratio:0.5445607358071678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 -DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.10837793350219727 s -INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.11024618148803711 s -DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=131699589002594736062749373963028149548, time:1750767545.476052s req_ids:[8] -DEBUG 06-24 20:19:05 [manager.py:391] -ERROR 06-24 20:19:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 first_token_cost:209.37132835388184ms total_cost_time:209.41686630249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9460 prompt_cache_len:5151 prompt_cache_ratio:0.5445031712473573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 -DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.10815834999084473 s -INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.11012411117553711 s -DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=264795559358717156597182826990643116323, time:1750767545.69836s req_ids:[8] -DEBUG 06-24 20:19:05 [manager.py:391] -ERROR 06-24 20:19:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 first_token_cost:218.7039852142334ms total_cost_time:218.7492847442627ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9461 prompt_cache_len:5151 prompt_cache_ratio:0.5444456188563577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 -DEBUG 06-24 20:19:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:05 [manager.py:224] router recive req id 8 cost time 0.10855913162231445 s -INFO 06-24 20:19:05 [manager.py:68] detokenization recv req id 8 cost time 0.11064863204956055 s -DEBUG 06-24 20:19:05 [manager.py:391] Prefill Batch: batch_id=203902309525946269025761712424853015704, time:1750767545.9189591s req_ids:[8] -DEBUG 06-24 20:19:05 [manager.py:391] -ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:05 lightllm_req_id:8 first_token_cost:215.40331840515137ms total_cost_time:215.44623374938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9462 prompt_cache_len:5151 prompt_cache_ratio:0.5443880786303107 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 -DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10906291007995605 s -INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.11094069480895996 s -INFO 06-24 20:19:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=322587704771296475903020819518451033441, time:1750767546.1347663s req_ids:[8] -DEBUG 06-24 20:19:06 [manager.py:391] -ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:209.36250686645508ms total_cost_time:209.40566062927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9463 prompt_cache_len:5151 prompt_cache_ratio:0.5443305505653598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 -DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10847878456115723 s -INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.11031651496887207 s -DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=294645377803875822034081822554445740978, time:1750767546.3500075s req_ids:[8] -DEBUG 06-24 20:19:06 [manager.py:391] -ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:209.26928520202637ms total_cost_time:209.31506156921387ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9464 prompt_cache_len:5151 prompt_cache_ratio:0.5442730346576501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 -DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10892868041992188 s -INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075615882873535 s -DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=295782618736663910957284834744563470319, time:1750767546.5653384s req_ids:[8] -DEBUG 06-24 20:19:06 [manager.py:391] -ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:205.04474639892578ms total_cost_time:205.08885383605957ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9465 prompt_cache_len:5151 prompt_cache_ratio:0.5442155309033281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 -DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.1080169677734375 s -INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.10976362228393555 s -DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=56991221673068594709085878753444164988, time:1750767546.7745323s req_ids:[8] -DEBUG 06-24 20:19:06 [manager.py:391] -DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:205.90496063232422ms total_cost_time:205.9471607208252ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9466 prompt_cache_len:5151 prompt_cache_ratio:0.5441580392985421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 -DEBUG 06-24 20:19:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:06 [manager.py:224] router recive req id 8 cost time 0.10750150680541992 s -INFO 06-24 20:19:06 [manager.py:68] detokenization recv req id 8 cost time 0.1093897819519043 s -DEBUG 06-24 20:19:06 [manager.py:391] Prefill Batch: batch_id=267574377036043559422860292007781721319, time:1750767546.9868793s req_ids:[8] -DEBUG 06-24 20:19:06 [manager.py:391] -ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:06 lightllm_req_id:8 first_token_cost:403.3827781677246ms total_cost_time:403.4271240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9467 prompt_cache_len:5151 prompt_cache_ratio:0.5441005598394423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 -DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:07 [manager.py:224] router recive req id 8 cost time 0.10743188858032227 s -INFO 06-24 20:19:07 [manager.py:68] detokenization recv req id 8 cost time 0.10907721519470215 s -DEBUG 06-24 20:19:07 [manager.py:391] Prefill Batch: batch_id=225326639399567361155247027550066138265, time:1750767547.3933082s req_ids:[8] -DEBUG 06-24 20:19:07 [manager.py:391] -ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:189.45550918579102ms total_cost_time:189.5136833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:9468 prompt_cache_len:5151 prompt_cache_ratio:0.54404309252218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 -DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:07 [manager.py:224] router recive req id 8 cost time 0.10745549201965332 s -INFO 06-24 20:19:07 [manager.py:68] detokenization recv req id 8 cost time 0.10935425758361816 s -DEBUG 06-24 20:19:07 [manager.py:391] Prefill Batch: batch_id=56186086545072274754574405586959243690, time:1750767547.589018s req_ids:[8] -DEBUG 06-24 20:19:07 [manager.py:391] -ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:205.07359504699707ms total_cost_time:205.12843132019043ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:9469 prompt_cache_len:5151 prompt_cache_ratio:0.5439856373429084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 -DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:07 [manager.py:224] router recive req id 8 cost time 0.10938835144042969 s -INFO 06-24 20:19:07 [manager.py:68] detokenization recv req id 8 cost time 0.11147403717041016 s -DEBUG 06-24 20:19:07 [manager.py:391] Prefill Batch: batch_id=278393525547507602372702473068399333662, time:1750767547.8034744s req_ids:[8] -DEBUG 06-24 20:19:07 [manager.py:391] -ERROR 06-24 20:19:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:209.82766151428223ms total_cost_time:209.87176895141602ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9470 prompt_cache_len:5151 prompt_cache_ratio:0.5439281942977825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 -DEBUG 06-24 20:19:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.11033773422241211 s -INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.11284065246582031 s -DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=30861567242428310679956261929504483368, time:1750767548.0184994s req_ids:[8] -DEBUG 06-24 20:19:08 [manager.py:391] -ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:07 lightllm_req_id:8 first_token_cost:208.72902870178223ms total_cost_time:208.77432823181152ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9471 prompt_cache_len:5151 prompt_cache_ratio:0.5438707633829585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 -DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.10844087600708008 s -INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.1104891300201416 s -DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=213848588314479385216982733295015197075, time:1750767548.2323484s req_ids:[8] -DEBUG 06-24 20:19:08 [manager.py:391] -ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:206.9847583770752ms total_cost_time:207.02815055847168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9472 prompt_cache_len:5151 prompt_cache_ratio:0.5438133445945946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 -DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.1094660758972168 s -INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.11162710189819336 s -DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=226394144107209850889417979980786010794, time:1750767548.4448607s req_ids:[8] -DEBUG 06-24 20:19:08 [manager.py:391] -ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:206.15434646606445ms total_cost_time:206.20012283325195ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9473 prompt_cache_len:5151 prompt_cache_ratio:0.5437559379288505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 -DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.10860443115234375 s -INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.1106109619140625 s -DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=134474692013101829275825186048702199760, time:1750767548.6643627s req_ids:[8] -DEBUG 06-24 20:19:08 [manager.py:391] -ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:216.98999404907227ms total_cost_time:217.0417308807373ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:9474 prompt_cache_len:5151 prompt_cache_ratio:0.5436985433818873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 -DEBUG 06-24 20:19:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:08 [manager.py:224] router recive req id 8 cost time 0.10820698738098145 s -INFO 06-24 20:19:08 [manager.py:68] detokenization recv req id 8 cost time 0.11027240753173828 s -DEBUG 06-24 20:19:08 [manager.py:391] Prefill Batch: batch_id=83528304455845781622705596875364155863, time:1750767548.8934457s req_ids:[8] -DEBUG 06-24 20:19:08 [manager.py:391] -ERROR 06-24 20:19:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:222.62978553771973ms total_cost_time:222.6734161376953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9475 prompt_cache_len:5151 prompt_cache_ratio:0.543641160949868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 -DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10805892944335938 s -INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.11013460159301758 s -DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=238106543101396802876624115340730734705, time:1750767549.1095243s req_ids:[8] -DEBUG 06-24 20:19:09 [manager.py:391] -ERROR 06-24 20:19:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:08 lightllm_req_id:8 first_token_cost:372.93124198913574ms total_cost_time:372.9748725891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9476 prompt_cache_len:5151 prompt_cache_ratio:0.5435837906289573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 -DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s -INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.10981011390686035 s -DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=282077581832763734508013699435374229069, time:1750767549.4863234s req_ids:[8] -DEBUG 06-24 20:19:09 [manager.py:391] -ERROR 06-24 20:19:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 first_token_cost:205.15179634094238ms total_cost_time:205.19614219665527ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9477 prompt_cache_len:5151 prompt_cache_ratio:0.5435264324153213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 -DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:09 [batch.py:51] router release req id 8 -INFO 06-24 20:19:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10847854614257812 s -INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.11092901229858398 s -DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=85327576226851586285838097394048263372, time:1750767549.699758s req_ids:[8] -DEBUG 06-24 20:19:09 [manager.py:391] -ERROR 06-24 20:19:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 first_token_cost:207.9770565032959ms total_cost_time:208.03380012512207ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:9478 prompt_cache_len:5151 prompt_cache_ratio:0.5434690863051277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 -DEBUG 06-24 20:19:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:09 [manager.py:224] router recive req id 8 cost time 0.10799145698547363 s -INFO 06-24 20:19:09 [manager.py:68] detokenization recv req id 8 cost time 0.11026763916015625 s -DEBUG 06-24 20:19:09 [manager.py:391] Prefill Batch: batch_id=247560034264618077555436327086463475058, time:1750767549.9136674s req_ids:[8] -DEBUG 06-24 20:19:09 [manager.py:391] -ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:09 lightllm_req_id:8 first_token_cost:206.37845993041992ms total_cost_time:206.42995834350586ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:9479 prompt_cache_len:5151 prompt_cache_ratio:0.5434117522945459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 -DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.11108636856079102 s -INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.11324763298034668 s -DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=180825912745911338013169056130461679319, time:1750767550.1280706s req_ids:[8] -DEBUG 06-24 20:19:10 [manager.py:391] -ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:205.9471607208252ms total_cost_time:205.9915065765381ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9480 prompt_cache_len:5151 prompt_cache_ratio:0.5433544303797468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 -DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.10816287994384766 s -INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.11032557487487793 s -DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=101835608740629359015223320197928803454, time:1750767550.3416312s req_ids:[8] -DEBUG 06-24 20:19:10 [manager.py:391] -ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:209.42974090576172ms total_cost_time:209.4733715057373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9481 prompt_cache_len:5151 prompt_cache_ratio:0.5432971205569033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 -DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.10552549362182617 s -INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.10720968246459961 s -DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=228273762864539407426857273064807498466, time:1750767550.5549245s req_ids:[8] -DEBUG 06-24 20:19:10 [manager.py:391] -DEBUG 06-24 20:19:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 40354.344 tokens/s -DEBUG 06-24 20:19:10 [stats.py:37] Avg prompt tokens throughput: 40345.914 tokens/s -DEBUG 06-24 20:19:10 [stats.py:37] Avg generate tokens throughput: 8.430 tokens/s -ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:169.13342475891113ms total_cost_time:169.1567897796631ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:9482 prompt_cache_len:5151 prompt_cache_ratio:0.5432398228221894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 -DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.1047368049621582 s -INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.10667061805725098 s -DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=288252154001088804211405016006749618686, time:1750767550.7278805s req_ids:[8] -DEBUG 06-24 20:19:10 [manager.py:391] -ERROR 06-24 20:19:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:167.92869567871094ms total_cost_time:167.95921325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:9483 prompt_cache_len:5151 prompt_cache_ratio:0.5431825371717811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 -DEBUG 06-24 20:19:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:10 [manager.py:224] router recive req id 8 cost time 0.1066136360168457 s -INFO 06-24 20:19:10 [manager.py:68] detokenization recv req id 8 cost time 0.10862851142883301 s -DEBUG 06-24 20:19:10 [manager.py:391] Prefill Batch: batch_id=247737292023069941320950625484644864714, time:1750767550.9017634s req_ids:[8] -DEBUG 06-24 20:19:10 [manager.py:391] -ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:10 lightllm_req_id:8 first_token_cost:367.02537536621094ms total_cost_time:367.07162857055664ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9484 prompt_cache_len:5151 prompt_cache_ratio:0.5431252636018558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 -DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s -INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11083602905273438 s -DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=44098795136216788450014540460655884495, time:1750767551.2704563s req_ids:[8] -DEBUG 06-24 20:19:11 [manager.py:391] -ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:205.52873611450195ms total_cost_time:205.57308197021484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9485 prompt_cache_len:5151 prompt_cache_ratio:0.5430680021085925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 -DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.1091923713684082 s -INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11120176315307617 s -DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=177442249279889811091579178205460721184, time:1750767551.4848654s req_ids:[8] -DEBUG 06-24 20:19:11 [manager.py:391] -ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:212.0215892791748ms total_cost_time:212.083101272583ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:9486 prompt_cache_len:5151 prompt_cache_ratio:0.543010752688172 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 -DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.10817384719848633 s -INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029434204101562 s -DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=336038313640778110676563599774092670938, time:1750767551.701238s req_ids:[8] -DEBUG 06-24 20:19:11 [manager.py:391] -ERROR 06-24 20:19:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:209.55443382263184ms total_cost_time:209.59925651550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9487 prompt_cache_len:5151 prompt_cache_ratio:0.5429535153367766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 -DEBUG 06-24 20:19:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:11 [manager.py:224] router recive req id 8 cost time 0.10802698135375977 s -INFO 06-24 20:19:11 [manager.py:68] detokenization recv req id 8 cost time 0.11002445220947266 s -DEBUG 06-24 20:19:11 [manager.py:391] Prefill Batch: batch_id=303804628563870263405600602250252311117, time:1750767551.9175334s req_ids:[8] -DEBUG 06-24 20:19:11 [manager.py:391] -ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:11 lightllm_req_id:8 first_token_cost:211.20429039001465ms total_cost_time:211.24839782714844ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9488 prompt_cache_len:5151 prompt_cache_ratio:0.5428962900505903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 -DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.10898590087890625 s -INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11110591888427734 s -DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=132238584770086110177881922986494741513, time:1750767552.1332054s req_ids:[8] -DEBUG 06-24 20:19:12 [manager.py:391] -ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:203.43351364135742ms total_cost_time:203.4759521484375ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9489 prompt_cache_len:5151 prompt_cache_ratio:0.5428390768257982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 -DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.10841846466064453 s -INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11053037643432617 s -DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=315894240340197927946327027547828814518, time:1750767552.3441603s req_ids:[8] -DEBUG 06-24 20:19:12 [manager.py:391] -ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:203.17721366882324ms total_cost_time:203.22132110595703ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9490 prompt_cache_len:5151 prompt_cache_ratio:0.542781875658588 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 -DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.10885214805603027 s -INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11083221435546875 s -DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=204405236055117453385275209946975602480, time:1750767552.5552635s req_ids:[8] -DEBUG 06-24 20:19:12 [manager.py:391] -DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:212.15200424194336ms total_cost_time:212.19587326049805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9491 prompt_cache_len:5151 prompt_cache_ratio:0.542724686545148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 -DEBUG 06-24 20:19:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:12 [manager.py:224] router recive req id 8 cost time 0.1088871955871582 s -INFO 06-24 20:19:12 [manager.py:68] detokenization recv req id 8 cost time 0.11100602149963379 s -DEBUG 06-24 20:19:12 [manager.py:391] Prefill Batch: batch_id=34227615085832645541041483757012205983, time:1750767552.7697768s req_ids:[8] -DEBUG 06-24 20:19:12 [manager.py:391] -ERROR 06-24 20:19:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:208.98699760437012ms total_cost_time:209.0299129486084ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9492 prompt_cache_len:5151 prompt_cache_ratio:0.5426675094816688 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 -DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:13 [batch.py:51] router release req id 8 -INFO 06-24 20:19:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.31023097038269043 s -INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.3125762939453125 s -DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=97002728500330342113595149147540985558, time:1750767553.1910343s req_ids:[8] -DEBUG 06-24 20:19:13 [manager.py:391] -ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:12 lightllm_req_id:8 first_token_cost:422.1360683441162ms total_cost_time:422.1808910369873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9493 prompt_cache_len:5151 prompt_cache_ratio:0.5426103444643422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 -DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.10722112655639648 s -INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.10917949676513672 s -DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=225222273515392958428850864945066509604, time:1750767553.414655s req_ids:[8] -DEBUG 06-24 20:19:13 [manager.py:391] -ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:215.1482105255127ms total_cost_time:215.19231796264648ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9494 prompt_cache_len:5151 prompt_cache_ratio:0.5425531914893617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 -DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.10870981216430664 s -INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.1108095645904541 s -DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=296007100506641432479422898517724582854, time:1750767553.6320462s req_ids:[8] -DEBUG 06-24 20:19:13 [manager.py:391] -ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:213.63091468811035ms total_cost_time:213.67740631103516ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9495 prompt_cache_len:5151 prompt_cache_ratio:0.5424960505529226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 -DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:13 [manager.py:224] router recive req id 8 cost time 0.11020994186401367 s -INFO 06-24 20:19:13 [manager.py:68] detokenization recv req id 8 cost time 0.11221861839294434 s -DEBUG 06-24 20:19:13 [manager.py:391] Prefill Batch: batch_id=146773525895336688697443739669053541394, time:1750767553.850315s req_ids:[8] -DEBUG 06-24 20:19:13 [manager.py:391] -ERROR 06-24 20:19:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:209.2602252960205ms total_cost_time:209.3203067779541ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:9496 prompt_cache_len:5151 prompt_cache_ratio:0.5424389216512215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 -DEBUG 06-24 20:19:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.1103522777557373 s -INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11238718032836914 s -DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=214805554404507599878926251264682041329, time:1750767554.0687454s req_ids:[8] -DEBUG 06-24 20:19:14 [manager.py:391] -DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:13 lightllm_req_id:8 first_token_cost:210.44301986694336ms total_cost_time:210.48617362976074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9497 prompt_cache_len:5151 prompt_cache_ratio:0.5423818047804569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 -DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10722899436950684 s -INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.10880637168884277 s -DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=318326850601847422570142924270443736419, time:1750767554.287194s req_ids:[8] -DEBUG 06-24 20:19:14 [manager.py:391] -ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:201.8606662750244ms total_cost_time:201.9050121307373ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9498 prompt_cache_len:5151 prompt_cache_ratio:0.5423246999368289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 -DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10889935493469238 s -INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11104607582092285 s -DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=183688773839862824106086197837875434139, time:1750767554.4893334s req_ids:[8] -DEBUG 06-24 20:19:14 [manager.py:391] -ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:203.58586311340332ms total_cost_time:203.63259315490723ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9499 prompt_cache_len:5151 prompt_cache_ratio:0.5422676071165385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 -DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10922122001647949 s -INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11057519912719727 s -DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=310808304937008975223275583377398731745, time:1750767554.7005208s req_ids:[8] -DEBUG 06-24 20:19:14 [manager.py:391] -ERROR 06-24 20:19:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:202.99887657165527ms total_cost_time:203.04274559020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9500 prompt_cache_len:5151 prompt_cache_ratio:0.5422105263157895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 -DEBUG 06-24 20:19:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:14 [manager.py:224] router recive req id 8 cost time 0.10844230651855469 s -INFO 06-24 20:19:14 [manager.py:68] detokenization recv req id 8 cost time 0.11062192916870117 s -DEBUG 06-24 20:19:14 [manager.py:391] Prefill Batch: batch_id=239244549908637628386442658515458888733, time:1750767554.911279s req_ids:[8] -DEBUG 06-24 20:19:14 [manager.py:391] -ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:14 lightllm_req_id:8 first_token_cost:410.0067615509033ms total_cost_time:410.0496768951416ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9501 prompt_cache_len:5151 prompt_cache_ratio:0.5421534575307863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 -DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.10775256156921387 s -INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.10965538024902344 s -DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=191338304622728367090180797029487094315, time:1750767555.324572s req_ids:[8] -DEBUG 06-24 20:19:15 [manager.py:391] -ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:205.57689666748047ms total_cost_time:205.62076568603516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9502 prompt_cache_len:5151 prompt_cache_ratio:0.5420964007577352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 -DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.1077115535736084 s -INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.10961246490478516 s -DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=218936684090068143895796848420811936729, time:1750767555.542097s req_ids:[8] -DEBUG 06-24 20:19:15 [manager.py:391] -ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:213.39035034179688ms total_cost_time:213.43302726745605ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9503 prompt_cache_len:5151 prompt_cache_ratio:0.5420393559928444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 -DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.10808229446411133 s -INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.11019086837768555 s -DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=142486151678572551032481283114735252395, time:1750767555.7547362s req_ids:[8] -DEBUG 06-24 20:19:15 [manager.py:391] -ERROR 06-24 20:19:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:206.71415328979492ms total_cost_time:206.7577838897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9504 prompt_cache_len:5151 prompt_cache_ratio:0.5419823232323232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 -DEBUG 06-24 20:19:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:15 [manager.py:224] router recive req id 8 cost time 0.10767674446105957 s -INFO 06-24 20:19:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098172664642334 s -DEBUG 06-24 20:19:15 [manager.py:391] Prefill Batch: batch_id=288090420595560667864837319391484491471, time:1750767555.9673603s req_ids:[8] -DEBUG 06-24 20:19:15 [manager.py:391] -ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:15 lightllm_req_id:8 first_token_cost:213.40465545654297ms total_cost_time:213.44971656799316ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9505 prompt_cache_len:5151 prompt_cache_ratio:0.5419253024723829 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 -DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:16 [manager.py:224] router recive req id 8 cost time 0.10886001586914062 s -INFO 06-24 20:19:16 [manager.py:68] detokenization recv req id 8 cost time 0.11083221435546875 s -DEBUG 06-24 20:19:16 [manager.py:391] Prefill Batch: batch_id=207724348780860885516595774927431818758, time:1750767556.1854522s req_ids:[8] -DEBUG 06-24 20:19:16 [manager.py:391] -ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:205.2781581878662ms total_cost_time:205.322265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9506 prompt_cache_len:5151 prompt_cache_ratio:0.5418682937092363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 -DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:16 [manager.py:224] router recive req id 8 cost time 0.1080629825592041 s -INFO 06-24 20:19:16 [manager.py:68] detokenization recv req id 8 cost time 0.11017942428588867 s -DEBUG 06-24 20:19:16 [manager.py:391] Prefill Batch: batch_id=130734616665938504041972883646328416702, time:1750767556.3937082s req_ids:[8] -DEBUG 06-24 20:19:16 [manager.py:391] -ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:198.4117031097412ms total_cost_time:198.4546184539795ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9507 prompt_cache_len:5151 prompt_cache_ratio:0.5418112969390975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 -DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:16 [manager.py:224] router recive req id 8 cost time 0.10869455337524414 s -INFO 06-24 20:19:16 [manager.py:68] detokenization recv req id 8 cost time 0.11001014709472656 s -DEBUG 06-24 20:19:16 [manager.py:391] Prefill Batch: batch_id=13971209076046169750798382194947000057, time:1750767556.607396s req_ids:[8] -DEBUG 06-24 20:19:16 [manager.py:391] -ERROR 06-24 20:19:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:220.4296588897705ms total_cost_time:220.475435256958ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9508 prompt_cache_len:5151 prompt_cache_ratio:0.5417543121581826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 -DEBUG 06-24 20:19:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:16 [batch.py:51] router release req id 8 -INFO 06-24 20:19:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.3106698989868164 s -INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.31276607513427734 s -DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=55453135206861195895021387691924791469, time:1750767557.0442412s req_ids:[8] -DEBUG 06-24 20:19:17 [manager.py:391] -ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:16 lightllm_req_id:8 first_token_cost:430.9651851654053ms total_cost_time:431.01000785827637ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9509 prompt_cache_len:5151 prompt_cache_ratio:0.541697339362709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 -DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10848641395568848 s -INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.10988187789916992 s -DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=199971673062834587022588979287134911493, time:1750767557.2681487s req_ids:[8] -DEBUG 06-24 20:19:17 [manager.py:391] -ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:209.84721183776855ms total_cost_time:209.89298820495605ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9510 prompt_cache_len:5151 prompt_cache_ratio:0.5416403785488959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 -DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10822296142578125 s -INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.11002039909362793 s -DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=71989213771161549090560814828924794690, time:1750767557.4869645s req_ids:[8] -DEBUG 06-24 20:19:17 [manager.py:391] -ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:210.77895164489746ms total_cost_time:210.82329750061035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9511 prompt_cache_len:5151 prompt_cache_ratio:0.5415834297129639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 -DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10845375061035156 s -INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.1102607250213623 s -DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=113755833423514972031954830890727553652, time:1750767557.697606s req_ids:[8] -DEBUG 06-24 20:19:17 [manager.py:391] -ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:167.23871231079102ms total_cost_time:167.2821044921875ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9512 prompt_cache_len:5151 prompt_cache_ratio:0.5415264928511354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 -DEBUG 06-24 20:19:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:17 [manager.py:224] router recive req id 8 cost time 0.10773134231567383 s -INFO 06-24 20:19:17 [manager.py:68] detokenization recv req id 8 cost time 0.10973024368286133 s -DEBUG 06-24 20:19:17 [manager.py:391] Prefill Batch: batch_id=52209263675876751794618345501765567275, time:1750767557.8718507s req_ids:[8] -DEBUG 06-24 20:19:17 [manager.py:391] -ERROR 06-24 20:19:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:200.4692554473877ms total_cost_time:200.51336288452148ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9513 prompt_cache_len:5151 prompt_cache_ratio:0.5414695679596342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 -DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10806655883789062 s -INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.10998988151550293 s -DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=224784590751389838433745243158358563839, time:1750767558.0914156s req_ids:[8] -DEBUG 06-24 20:19:18 [manager.py:391] -ERROR 06-24 20:19:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:17 lightllm_req_id:8 first_token_cost:221.65250778198242ms total_cost_time:221.69828414916992ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9514 prompt_cache_len:5151 prompt_cache_ratio:0.5414126550346857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 -DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10782504081726074 s -INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.10928106307983398 s -DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=17292155745083185855352960589703203170, time:1750767558.306342s req_ids:[8] -DEBUG 06-24 20:19:18 [manager.py:391] -ERROR 06-24 20:19:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 first_token_cost:209.92112159729004ms total_cost_time:209.96761322021484ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9515 prompt_cache_len:5151 prompt_cache_ratio:0.5413557540725171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 -DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:18 [batch.py:51] router release req id 8 -DEBUG 06-24 20:19:18 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:18 [manager.py:283] -DEBUG 06-24 20:19:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:18 [manager.py:284] -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10889410972595215 s -INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.11099004745483398 s -DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=63366768640992635962556872624868741592, time:1750767558.5225437s req_ids:[8] -DEBUG 06-24 20:19:18 [manager.py:391] -ERROR 06-24 20:19:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 first_token_cost:212.5074863433838ms total_cost_time:212.55207061767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9516 prompt_cache_len:5151 prompt_cache_ratio:0.5412988650693569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 -DEBUG 06-24 20:19:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:18 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s -INFO 06-24 20:19:18 [manager.py:68] detokenization recv req id 8 cost time 0.11055278778076172 s -DEBUG 06-24 20:19:18 [manager.py:391] Prefill Batch: batch_id=194052217673907494389417897687899324300, time:1750767558.738362s req_ids:[8] -DEBUG 06-24 20:19:18 [manager.py:391] -ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:18 lightllm_req_id:8 first_token_cost:394.6666717529297ms total_cost_time:394.7134017944336ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9517 prompt_cache_len:5151 prompt_cache_ratio:0.5412419880214353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 -DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.11365795135498047 s -DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=28059836524251615797012362759534471435, time:1750767559.137432s req_ids:[8] -DEBUG 06-24 20:19:19 [manager.py:391] -INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.1158287525177002 s -ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:205.28316497802734ms total_cost_time:205.40165901184082ms,out_token_counter:1 mean_per_token_cost_time: 0.11849403381347656ms prompt_token_num:9518 prompt_cache_len:5151 prompt_cache_ratio:0.5411851229249842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 -DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10947799682617188 s -INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.11166143417358398 s -DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=11790123669005909731693445998418181061, time:1750767559.3500266s req_ids:[8] -DEBUG 06-24 20:19:19 [manager.py:391] -ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:206.0532569885254ms total_cost_time:206.09760284423828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9519 prompt_cache_len:5151 prompt_cache_ratio:0.541128269776237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 -DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10766863822937012 s -INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.10904765129089355 s -DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=115384545291841855096144685263377372201, time:1750767559.5649986s req_ids:[8] -DEBUG 06-24 20:19:19 [manager.py:391] -ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:209.28192138671875ms total_cost_time:209.32793617248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9520 prompt_cache_len:5151 prompt_cache_ratio:0.5410714285714285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 -DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10901808738708496 s -INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.11021900177001953 s -DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=9476506861008827652652614131979112897, time:1750767559.7779527s req_ids:[8] -DEBUG 06-24 20:19:19 [manager.py:391] -ERROR 06-24 20:19:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:205.95121383666992ms total_cost_time:205.99603652954102ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9521 prompt_cache_len:5151 prompt_cache_ratio:0.5410145993067955 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 -DEBUG 06-24 20:19:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:19 [manager.py:224] router recive req id 8 cost time 0.10802936553955078 s -INFO 06-24 20:19:19 [manager.py:68] detokenization recv req id 8 cost time 0.10995340347290039 s -DEBUG 06-24 20:19:19 [manager.py:391] Prefill Batch: batch_id=252137954223621262321451064199893388429, time:1750767559.9908183s req_ids:[8] -DEBUG 06-24 20:19:19 [manager.py:391] -ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:19 lightllm_req_id:8 first_token_cost:208.57834815979004ms total_cost_time:208.62388610839844ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9522 prompt_cache_len:5151 prompt_cache_ratio:0.5409577819785759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 -DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10810279846191406 s -INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.11028480529785156 s -DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=130160574120391630341066702531193944386, time:1750767560.2063177s req_ids:[8] -DEBUG 06-24 20:19:20 [manager.py:391] -ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:211.0464572906494ms total_cost_time:211.0910415649414ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9523 prompt_cache_len:5151 prompt_cache_ratio:0.5409009765830095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 -DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10767173767089844 s -INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.1099088191986084 s -DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=100423744268563885973072802906235578345, time:1750767560.4226995s req_ids:[8] -DEBUG 06-24 20:19:20 [manager.py:391] -ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:210.3407382965088ms total_cost_time:210.3862762451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9524 prompt_cache_len:5151 prompt_cache_ratio:0.5408441831163376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 -DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10889577865600586 s -INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.11086201667785645 s -DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=24865963991163631886412013540410867617, time:1750767560.6381278s req_ids:[8] -DEBUG 06-24 20:19:20 [manager.py:391] -DEBUG 06-24 20:19:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 40537.004 tokens/s -DEBUG 06-24 20:19:20 [stats.py:37] Avg prompt tokens throughput: 40528.475 tokens/s -DEBUG 06-24 20:19:20 [stats.py:37] Avg generate tokens throughput: 8.529 tokens/s -ERROR 06-24 20:19:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:208.50515365600586ms total_cost_time:208.54949951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9525 prompt_cache_len:5151 prompt_cache_ratio:0.5407874015748031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 -DEBUG 06-24 20:19:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:20 [manager.py:224] router recive req id 8 cost time 0.10859537124633789 s -INFO 06-24 20:19:20 [manager.py:68] detokenization recv req id 8 cost time 0.11077427864074707 s -DEBUG 06-24 20:19:20 [manager.py:391] Prefill Batch: batch_id=136914751062597012646751259227228527018, time:1750767560.8526795s req_ids:[8] -DEBUG 06-24 20:19:20 [manager.py:391] -ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:20 lightllm_req_id:8 first_token_cost:368.2398796081543ms total_cost_time:368.2827949523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9526 prompt_cache_len:5151 prompt_cache_ratio:0.5407306319546504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 -DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.10880064964294434 s -INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.1109018325805664 s -DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=80738908151693307333219153041502589941, time:1750767561.2244606s req_ids:[8] -DEBUG 06-24 20:19:21 [manager.py:391] -ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:206.30645751953125ms total_cost_time:206.34961128234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9527 prompt_cache_len:5151 prompt_cache_ratio:0.5406738742521255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 -DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.10936832427978516 s -INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.11132526397705078 s -DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=181606426344102808309183871147793687985, time:1750767561.4390697s req_ids:[8] -DEBUG 06-24 20:19:21 [manager.py:391] -DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:208.1446647644043ms total_cost_time:208.1918716430664ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:9528 prompt_cache_len:5151 prompt_cache_ratio:0.5406171284634761 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 -DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.10818052291870117 s -INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.11021733283996582 s -DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=160365818577538819032419899103222203738, time:1750767561.6532326s req_ids:[8] -DEBUG 06-24 20:19:21 [manager.py:391] -ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:205.5966854095459ms total_cost_time:205.6412696838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9529 prompt_cache_len:5151 prompt_cache_ratio:0.5405603945849512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 -DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:21 [manager.py:224] router recive req id 8 cost time 0.1076664924621582 s -INFO 06-24 20:19:21 [manager.py:68] detokenization recv req id 8 cost time 0.1097254753112793 s -DEBUG 06-24 20:19:21 [manager.py:391] Prefill Batch: batch_id=194070719926577308181044475807128642788, time:1750767561.8626502s req_ids:[8] -DEBUG 06-24 20:19:21 [manager.py:391] -ERROR 06-24 20:19:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:203.81402969360352ms total_cost_time:203.86028289794922ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9530 prompt_cache_len:5151 prompt_cache_ratio:0.5405036726128016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 -DEBUG 06-24 20:19:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s -INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.10939931869506836 s -DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=126213006280750415131768862422010611418, time:1750767562.0719163s req_ids:[8] -DEBUG 06-24 20:19:22 [manager.py:391] -ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:21 lightllm_req_id:8 first_token_cost:206.23016357421875ms total_cost_time:206.27641677856445ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9531 prompt_cache_len:5151 prompt_cache_ratio:0.5404469625432798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 -DEBUG 06-24 20:19:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.1090552806854248 s -INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.11110067367553711 s -DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=79424146977121731248240642927187242761, time:1750767562.2847207s req_ids:[8] -DEBUG 06-24 20:19:22 [manager.py:391] -ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:207.3347568511963ms total_cost_time:207.37910270690918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9532 prompt_cache_len:5151 prompt_cache_ratio:0.5403902643726395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 -DEBUG 06-24 20:19:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.10875368118286133 s -INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.11073803901672363 s -DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=297479554818252738698913234966264014214, time:1750767562.499257s req_ids:[8] -DEBUG 06-24 20:19:22 [manager.py:391] -ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:209.0737819671631ms total_cost_time:209.11836624145508ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9533 prompt_cache_len:5151 prompt_cache_ratio:0.5403335780971362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 -DEBUG 06-24 20:19:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:22 [manager.py:224] router recive req id 8 cost time 0.10875797271728516 s -INFO 06-24 20:19:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089181900024414 s -DEBUG 06-24 20:19:22 [manager.py:391] Prefill Batch: batch_id=90693199049472413933671555881737149274, time:1750767562.7133358s req_ids:[8] -DEBUG 06-24 20:19:22 [manager.py:391] -ERROR 06-24 20:19:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:377.06637382507324ms total_cost_time:377.11143493652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9534 prompt_cache_len:5151 prompt_cache_ratio:0.5402769037130271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 -DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s -INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.11072087287902832 s -DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=257001935623558950551430280768637795232, time:1750767563.094092s req_ids:[8] -DEBUG 06-24 20:19:23 [manager.py:391] -ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:22 lightllm_req_id:8 first_token_cost:210.15214920043945ms total_cost_time:210.19840240478516ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9535 prompt_cache_len:5151 prompt_cache_ratio:0.5402202412165705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 -INFO 06-24 20:19:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:19:23 [statics_utils.py:24] mean first cost: 228.52410058048707 ms -INFO 06-24 20:19:23 [statics_utils.py:24] mean per token cost: 0.07166113834181964 ms -DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10849547386169434 s -INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.110626220703125 s -DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=80094487189028559078407482541304070027, time:1750767563.3237152s req_ids:[8] -DEBUG 06-24 20:19:23 [manager.py:391] -ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:221.86684608459473ms total_cost_time:221.9107151031494ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9536 prompt_cache_len:5151 prompt_cache_ratio:0.5401635906040269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 -DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10872888565063477 s -INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.11081767082214355 s -DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=111626047785192724796877611334782543585, time:1750767563.5400145s req_ids:[8] -DEBUG 06-24 20:19:23 [manager.py:391] -ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:208.15467834472656ms total_cost_time:208.19902420043945ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9537 prompt_cache_len:5151 prompt_cache_ratio:0.5401069518716578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 -DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s -INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.10961151123046875 s -DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=108966486587751156095835843742284198978, time:1750767563.7544625s req_ids:[8] -DEBUG 06-24 20:19:23 [manager.py:391] -ERROR 06-24 20:19:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:208.6658477783203ms total_cost_time:208.7087631225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9538 prompt_cache_len:5151 prompt_cache_ratio:0.5400503250157266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 -DEBUG 06-24 20:19:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:23 [manager.py:224] router recive req id 8 cost time 0.10812950134277344 s -INFO 06-24 20:19:23 [manager.py:68] detokenization recv req id 8 cost time 0.10941147804260254 s -DEBUG 06-24 20:19:23 [manager.py:391] Prefill Batch: batch_id=212201549310206662969522477041190209317, time:1750767563.9870532s req_ids:[8] -DEBUG 06-24 20:19:23 [manager.py:391] -ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:23 lightllm_req_id:8 first_token_cost:231.83012008666992ms total_cost_time:231.87518119812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9539 prompt_cache_len:5151 prompt_cache_ratio:0.5399937100324982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 -DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:24 [manager.py:224] router recive req id 8 cost time 0.10798168182373047 s -INFO 06-24 20:19:24 [manager.py:68] detokenization recv req id 8 cost time 0.11004853248596191 s -DEBUG 06-24 20:19:24 [manager.py:391] Prefill Batch: batch_id=124557792094451792280938614199486510096, time:1750767564.2057323s req_ids:[8] -DEBUG 06-24 20:19:24 [manager.py:391] -ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:210.48450469970703ms total_cost_time:210.53004264831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9540 prompt_cache_len:5151 prompt_cache_ratio:0.539937106918239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 -DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:24 [manager.py:224] router recive req id 8 cost time 0.10900306701660156 s -INFO 06-24 20:19:24 [manager.py:68] detokenization recv req id 8 cost time 0.11091899871826172 s -DEBUG 06-24 20:19:24 [manager.py:391] Prefill Batch: batch_id=243273264063016273493189198844713274205, time:1750767564.4221249s req_ids:[8] -DEBUG 06-24 20:19:24 [manager.py:391] -ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:208.4803581237793ms total_cost_time:208.526611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9541 prompt_cache_len:5151 prompt_cache_ratio:0.539880515669217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 -DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:24 [manager.py:224] router recive req id 8 cost time 0.10789036750793457 s -INFO 06-24 20:19:24 [manager.py:68] detokenization recv req id 8 cost time 0.10928082466125488 s -DEBUG 06-24 20:19:24 [manager.py:391] Prefill Batch: batch_id=269677482003745466953213236010634757687, time:1750767564.6356347s req_ids:[8] -DEBUG 06-24 20:19:24 [manager.py:391] -ERROR 06-24 20:19:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:207.10349082946777ms total_cost_time:207.14926719665527ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9542 prompt_cache_len:5151 prompt_cache_ratio:0.539823936281702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 -DEBUG 06-24 20:19:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.3095424175262451 s -INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.31163716316223145 s -DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=292533113241390086186084715142591626153, time:1750767565.0527327s req_ids:[8] -DEBUG 06-24 20:19:25 [manager.py:391] -ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:24 lightllm_req_id:8 first_token_cost:414.92724418640137ms total_cost_time:414.97230529785156ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9543 prompt_cache_len:5151 prompt_cache_ratio:0.5397673687519647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 -DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10739898681640625 s -INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.10953974723815918 s -DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=129773338545182260910553542519800138269, time:1750767565.2676132s req_ids:[8] -DEBUG 06-24 20:19:25 [manager.py:391] -ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:207.1065902709961ms total_cost_time:207.1511745452881ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9544 prompt_cache_len:5151 prompt_cache_ratio:0.5397108130762783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 -DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10579442977905273 s -INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.1070091724395752 s -DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=74123601327788204749057157391408544246, time:1750767565.4809144s req_ids:[8] -DEBUG 06-24 20:19:25 [manager.py:391] -ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:207.9331874847412ms total_cost_time:207.95917510986328ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:9545 prompt_cache_len:5151 prompt_cache_ratio:0.5396542692509168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 -DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10538864135742188 s -INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.10716891288757324 s -DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=334029106264724612480043666535895921578, time:1750767565.694826s req_ids:[8] -DEBUG 06-24 20:19:25 [manager.py:391] -ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:207.19647407531738ms total_cost_time:207.24081993103027ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9546 prompt_cache_len:5151 prompt_cache_ratio:0.5395977372721559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 -DEBUG 06-24 20:19:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:25 [manager.py:224] router recive req id 8 cost time 0.10828471183776855 s -INFO 06-24 20:19:25 [manager.py:68] detokenization recv req id 8 cost time 0.11030364036560059 s -DEBUG 06-24 20:19:25 [manager.py:391] Prefill Batch: batch_id=205611890943692983669794516857737120455, time:1750767565.9102569s req_ids:[8] -DEBUG 06-24 20:19:25 [manager.py:391] -ERROR 06-24 20:19:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:25 lightllm_req_id:8 first_token_cost:213.61494064331055ms total_cost_time:213.66119384765625ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9547 prompt_cache_len:5151 prompt_cache_ratio:0.5395412171362731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 -DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10779547691345215 s -INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.10987567901611328 s -DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=313802351801410313454618246998111029132, time:1750767566.1262188s req_ids:[8] -DEBUG 06-24 20:19:26 [manager.py:391] -ERROR 06-24 20:19:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:209.7339630126953ms total_cost_time:209.7783088684082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9548 prompt_cache_len:5151 prompt_cache_ratio:0.5394847088395476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 -DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10685610771179199 s -INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.10838532447814941 s -DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=338483364382048143294704091766177915800, time:1750767566.3410172s req_ids:[8] -DEBUG 06-24 20:19:26 [manager.py:391] -ERROR 06-24 20:19:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:168.44654083251953ms total_cost_time:168.49040985107422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9549 prompt_cache_len:5151 prompt_cache_ratio:0.5394282123782596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 -DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10874652862548828 s -INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.11086010932922363 s -DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=110130595648166600230264661782633902792, time:1750767566.514189s req_ids:[8] -DEBUG 06-24 20:19:26 [manager.py:391] -ERROR 06-24 20:19:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:198.69494438171387ms total_cost_time:198.73714447021484ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9550 prompt_cache_len:5151 prompt_cache_ratio:0.5393717277486911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 -DEBUG 06-24 20:19:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:26 [manager.py:224] router recive req id 8 cost time 0.10772919654846191 s -INFO 06-24 20:19:26 [manager.py:68] detokenization recv req id 8 cost time 0.10915470123291016 s -DEBUG 06-24 20:19:26 [manager.py:391] Prefill Batch: batch_id=183456611728232452377588134043477587051, time:1750767566.718857s req_ids:[8] -DEBUG 06-24 20:19:26 [manager.py:391] -ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:26 lightllm_req_id:8 first_token_cost:408.28990936279297ms total_cost_time:408.33401679992676ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9551 prompt_cache_len:5151 prompt_cache_ratio:0.539315254947126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 -DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10887289047241211 s -INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.1108551025390625 s -DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=231885829364772420925828996676835353467, time:1750767567.1310544s req_ids:[8] -DEBUG 06-24 20:19:27 [manager.py:391] -ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:205.81889152526855ms total_cost_time:205.86156845092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9552 prompt_cache_len:5151 prompt_cache_ratio:0.5392587939698492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 -DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10846161842346191 s -INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020874977111816 s -INFO 06-24 20:19:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=230519776746442261614442812435782099247, time:1750767567.3507257s req_ids:[8] -DEBUG 06-24 20:19:27 [manager.py:391] -ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:214.37835693359375ms total_cost_time:214.42151069641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9553 prompt_cache_len:5151 prompt_cache_ratio:0.5392023448131477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 -DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10761857032775879 s -INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.10957598686218262 s -DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=288984510766525124591215800061005886763, time:1750767567.5653968s req_ids:[8] -DEBUG 06-24 20:19:27 [manager.py:391] -ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:209.12909507751465ms total_cost_time:209.15579795837402ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:9554 prompt_cache_len:5151 prompt_cache_ratio:0.5391459074733096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 -DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10548710823059082 s -INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.10752391815185547 s -DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=52218567995083725170193084871394546637, time:1750767567.7789428s req_ids:[8] -DEBUG 06-24 20:19:27 [manager.py:391] -ERROR 06-24 20:19:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:204.16927337646484ms total_cost_time:204.19573783874512ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:9555 prompt_cache_len:5151 prompt_cache_ratio:0.5390894819466248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 -DEBUG 06-24 20:19:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:27 [manager.py:224] router recive req id 8 cost time 0.10528802871704102 s -INFO 06-24 20:19:27 [manager.py:68] detokenization recv req id 8 cost time 0.10711431503295898 s -DEBUG 06-24 20:19:27 [manager.py:391] Prefill Batch: batch_id=157585441127913167258305155106687687915, time:1750767567.9876807s req_ids:[8] -DEBUG 06-24 20:19:27 [manager.py:391] -ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:27 lightllm_req_id:8 first_token_cost:167.93274879455566ms total_cost_time:167.97399520874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9556 prompt_cache_len:5151 prompt_cache_ratio:0.5390330682293847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 -DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:28 [manager.py:224] router recive req id 8 cost time 0.10731768608093262 s -INFO 06-24 20:19:28 [manager.py:68] detokenization recv req id 8 cost time 0.1092996597290039 s -DEBUG 06-24 20:19:28 [manager.py:391] Prefill Batch: batch_id=280119468062835181607479627794192860184, time:1750767568.1598387s req_ids:[8] -DEBUG 06-24 20:19:28 [manager.py:391] -ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:197.54648208618164ms total_cost_time:197.59273529052734ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9557 prompt_cache_len:5151 prompt_cache_ratio:0.5389766663178822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 -DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:28 [manager.py:224] router recive req id 8 cost time 0.10871362686157227 s -INFO 06-24 20:19:28 [manager.py:68] detokenization recv req id 8 cost time 0.11076211929321289 s -DEBUG 06-24 20:19:28 [manager.py:391] Prefill Batch: batch_id=97628362716929959377425897611923726797, time:1750767568.3639367s req_ids:[8] -DEBUG 06-24 20:19:28 [manager.py:391] -ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:206.6347599029541ms total_cost_time:206.6788673400879ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9558 prompt_cache_len:5151 prompt_cache_ratio:0.5389202762084118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 -DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:28 [manager.py:224] router recive req id 8 cost time 0.3106393814086914 s -INFO 06-24 20:19:28 [manager.py:68] detokenization recv req id 8 cost time 0.31271839141845703 s -DEBUG 06-24 20:19:28 [manager.py:391] Prefill Batch: batch_id=33576749409770292392699614730198400965, time:1750767568.7915688s req_ids:[8] -DEBUG 06-24 20:19:28 [manager.py:391] -ERROR 06-24 20:19:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:428.8289546966553ms total_cost_time:428.8756847381592ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9559 prompt_cache_len:5151 prompt_cache_ratio:0.5388638978972696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 -DEBUG 06-24 20:19:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10970783233642578 s -INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11167287826538086 s -DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=297082691612105092246512813917874413429, time:1750767569.0141037s req_ids:[8] -DEBUG 06-24 20:19:29 [manager.py:391] -ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:28 lightllm_req_id:8 first_token_cost:211.40313148498535ms total_cost_time:211.45009994506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9560 prompt_cache_len:5151 prompt_cache_ratio:0.5388075313807531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 -DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10769104957580566 s -INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.10964608192443848 s -DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=196269175707732820190716200779654786997, time:1750767569.230571s req_ids:[8] -DEBUG 06-24 20:19:29 [manager.py:391] -ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:204.73027229309082ms total_cost_time:204.7741413116455ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9561 prompt_cache_len:5151 prompt_cache_ratio:0.5387511766551616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 -DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10869479179382324 s -INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11064982414245605 s -DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=54062305983063016907630298902198025130, time:1750767569.4407966s req_ids:[8] -DEBUG 06-24 20:19:29 [manager.py:391] -ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:209.2912197113037ms total_cost_time:209.3358039855957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9562 prompt_cache_len:5151 prompt_cache_ratio:0.5386948337167956 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 -DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10982155799865723 s -INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11184358596801758 s -DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=301855681546919206607842265377921371713, time:1750767569.6558874s req_ids:[8] -DEBUG 06-24 20:19:29 [manager.py:391] -ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:206.88748359680176ms total_cost_time:206.94446563720703ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:9563 prompt_cache_len:5151 prompt_cache_ratio:0.5386385025619576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 -DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:29 [manager.py:224] router recive req id 8 cost time 0.10836148262023926 s -INFO 06-24 20:19:29 [manager.py:68] detokenization recv req id 8 cost time 0.11038517951965332 s -DEBUG 06-24 20:19:29 [manager.py:391] Prefill Batch: batch_id=245796407903921052772078424454213224369, time:1750767569.8700001s req_ids:[8] -DEBUG 06-24 20:19:29 [manager.py:391] -ERROR 06-24 20:19:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:207.5631618499756ms total_cost_time:207.60726928710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9564 prompt_cache_len:5151 prompt_cache_ratio:0.5385821831869511 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 -DEBUG 06-24 20:19:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10905790328979492 s -INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.11098599433898926 s -DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=212273581476518115166191443412697119141, time:1750767570.084183s req_ids:[8] -DEBUG 06-24 20:19:30 [manager.py:391] -DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:29 lightllm_req_id:8 first_token_cost:207.14187622070312ms total_cost_time:207.1843147277832ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9565 prompt_cache_len:5151 prompt_cache_ratio:0.5385258755880815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 -DEBUG 06-24 20:19:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10780835151672363 s -INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.10980820655822754 s -DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=250025269233431222942089966411710680897, time:1750767570.2964118s req_ids:[8] -DEBUG 06-24 20:19:30 [manager.py:391] -ERROR 06-24 20:19:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 first_token_cost:206.71749114990234ms total_cost_time:206.76064491271973ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9566 prompt_cache_len:5151 prompt_cache_ratio:0.5384695797616559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 -DEBUG 06-24 20:19:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10832524299621582 s -INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.11042594909667969 s -DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=52585544584111991476653927595130533793, time:1750767570.5083225s req_ids:[8] -DEBUG 06-24 20:19:30 [manager.py:391] -ERROR 06-24 20:19:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:19:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 39525.673 tokens/s -DEBUG 06-24 20:19:30 [stats.py:37] Avg prompt tokens throughput: 39517.295 tokens/s -DEBUG 06-24 20:19:30 [stats.py:37] Avg generate tokens throughput: 8.377 tokens/s -INFO 06-24 20:19:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 first_token_cost:396.05212211608887ms total_cost_time:396.09551429748535ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9567 prompt_cache_len:5151 prompt_cache_ratio:0.5384132957039824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 -DEBUG 06-24 20:19:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:30 [batch.py:51] router release req id 8 -INFO 06-24 20:19:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:30 [manager.py:224] router recive req id 8 cost time 0.10800790786743164 s -INFO 06-24 20:19:30 [manager.py:68] detokenization recv req id 8 cost time 0.11003613471984863 s -DEBUG 06-24 20:19:30 [manager.py:391] Prefill Batch: batch_id=263333304832050029111477575052525426633, time:1750767570.910313s req_ids:[8] -DEBUG 06-24 20:19:30 [manager.py:391] -ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:30 lightllm_req_id:8 first_token_cost:210.48736572265625ms total_cost_time:210.53171157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9568 prompt_cache_len:5151 prompt_cache_ratio:0.5383570234113713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 -DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10956621170043945 s -INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.1116175651550293 s -DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=287340723267448761295025781415023718825, time:1750767571.1271217s req_ids:[8] -DEBUG 06-24 20:19:31 [manager.py:391] -ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:206.1927318572998ms total_cost_time:206.2361240386963ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9569 prompt_cache_len:5151 prompt_cache_ratio:0.5383007628801337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 -DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10833024978637695 s -INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.11043787002563477 s -DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=17760565847091197084904475265661525694, time:1750767571.339339s req_ids:[8] -DEBUG 06-24 20:19:31 [manager.py:391] -ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:208.99367332458496ms total_cost_time:209.03873443603516ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9570 prompt_cache_len:5151 prompt_cache_ratio:0.538244514106583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 -DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.1095888614654541 s -INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.11133456230163574 s -DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=117789067196858661312594493710677531381, time:1750767571.5537932s req_ids:[8] -DEBUG 06-24 20:19:31 [manager.py:391] -ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:168.3812141418457ms total_cost_time:168.42174530029297ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:9571 prompt_cache_len:5151 prompt_cache_ratio:0.5381882770870338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 -DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10822796821594238 s -INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.11014819145202637 s -DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=97424972194900291150588369756505001331, time:1750767571.7277553s req_ids:[8] -DEBUG 06-24 20:19:31 [manager.py:391] -ERROR 06-24 20:19:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:201.24483108520508ms total_cost_time:201.28703117370605ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9572 prompt_cache_len:5151 prompt_cache_ratio:0.5381320518178019 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 -DEBUG 06-24 20:19:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:31 [manager.py:224] router recive req id 8 cost time 0.10737776756286621 s -INFO 06-24 20:19:31 [manager.py:68] detokenization recv req id 8 cost time 0.10928106307983398 s -DEBUG 06-24 20:19:31 [manager.py:391] Prefill Batch: batch_id=288528960775953015444062707085079258345, time:1750767571.934154s req_ids:[8] -DEBUG 06-24 20:19:31 [manager.py:391] -ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:31 lightllm_req_id:8 first_token_cost:204.85258102416992ms total_cost_time:204.8969268798828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9573 prompt_cache_len:5151 prompt_cache_ratio:0.5380758382952052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 -DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.10795903205871582 s -INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.11003899574279785 s -DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=44216944368792910296516066325835168438, time:1750767572.1459544s req_ids:[8] -DEBUG 06-24 20:19:32 [manager.py:391] -ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:209.78260040283203ms total_cost_time:209.82909202575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9574 prompt_cache_len:5151 prompt_cache_ratio:0.538019636515563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 -DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.10894012451171875 s -INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.11100149154663086 s -DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=317957170919018299222671624538445976168, time:1750767572.3677666s req_ids:[8] -DEBUG 06-24 20:19:32 [manager.py:391] -ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:379.1632652282715ms total_cost_time:379.209041595459ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9575 prompt_cache_len:5151 prompt_cache_ratio:0.5379634464751958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 -DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.1084294319152832 s -INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.11042213439941406 s -DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=36089649378477838612247118710426139676, time:1750767572.7478623s req_ids:[8] -DEBUG 06-24 20:19:32 [manager.py:391] -ERROR 06-24 20:19:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:205.6279182434082ms total_cost_time:205.6715488433838ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9576 prompt_cache_len:5151 prompt_cache_ratio:0.5379072681704261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 -DEBUG 06-24 20:19:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:32 [manager.py:224] router recive req id 8 cost time 0.10768270492553711 s -INFO 06-24 20:19:32 [manager.py:68] detokenization recv req id 8 cost time 0.10957598686218262 s -DEBUG 06-24 20:19:32 [manager.py:391] Prefill Batch: batch_id=198763118647607762005862418134122915282, time:1750767572.963563s req_ids:[8] -DEBUG 06-24 20:19:32 [manager.py:391] -ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:32 lightllm_req_id:8 first_token_cost:209.36894416809082ms total_cost_time:209.4125747680664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9577 prompt_cache_len:5151 prompt_cache_ratio:0.5378511015975775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 -DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10787320137023926 s -INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.10976696014404297 s -DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=214163634930176399251974770767307927226, time:1750767573.1844647s req_ids:[8] -DEBUG 06-24 20:19:33 [manager.py:391] -ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:216.37582778930664ms total_cost_time:216.41850471496582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9578 prompt_cache_len:5151 prompt_cache_ratio:0.5377949467529756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 -DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10867905616760254 s -INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11063575744628906 s -DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=121387143422924937791818423750414505546, time:1750767573.4008336s req_ids:[8] -DEBUG 06-24 20:19:33 [manager.py:391] -ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:205.17325401306152ms total_cost_time:205.21855354309082ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9579 prompt_cache_len:5151 prompt_cache_ratio:0.5377388036329471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 -DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10829663276672363 s -INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11023902893066406 s -DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=259602775629928287214063378682794537207, time:1750767573.616585s req_ids:[8] -DEBUG 06-24 20:19:33 [manager.py:391] -ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:213.85645866394043ms total_cost_time:213.90080451965332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9580 prompt_cache_len:5151 prompt_cache_ratio:0.5376826722338205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 -DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.10869026184082031 s -INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11096405982971191 s -DEBUG 06-24 20:19:33 [manager.py:391] Prefill Batch: batch_id=315242433687348694896291394848095769635, time:1750767573.8304882s req_ids:[8] -DEBUG 06-24 20:19:33 [manager.py:391] -ERROR 06-24 20:19:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:167.6466464996338ms total_cost_time:167.68765449523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9581 prompt_cache_len:5151 prompt_cache_ratio:0.5376265525519257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 -DEBUG 06-24 20:19:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:33 [manager.py:224] router recive req id 8 cost time 0.1082453727722168 s -INFO 06-24 20:19:33 [manager.py:68] detokenization recv req id 8 cost time 0.11026930809020996 s -DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=258432937727112355898745677113389726167, time:1750767574.0031037s req_ids:[8] -DEBUG 06-24 20:19:34 [manager.py:391] -ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:33 lightllm_req_id:8 first_token_cost:198.55833053588867ms total_cost_time:198.60148429870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9582 prompt_cache_len:5151 prompt_cache_ratio:0.5375704445835943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 -DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:34 [manager.py:224] router recive req id 8 cost time 0.1087195873260498 s -INFO 06-24 20:19:34 [manager.py:68] detokenization recv req id 8 cost time 0.11071324348449707 s -DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=210982940087692593205265939631316583737, time:1750767574.2077737s req_ids:[8] -DEBUG 06-24 20:19:34 [manager.py:391] -ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:377.1669864654541ms total_cost_time:377.2149085998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:9583 prompt_cache_len:5151 prompt_cache_ratio:0.5375143483251591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 -DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:34 [manager.py:224] router recive req id 8 cost time 0.1089010238647461 s -INFO 06-24 20:19:34 [manager.py:68] detokenization recv req id 8 cost time 0.11097145080566406 s -DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=109907447556258030749571459732535756548, time:1750767574.5974846s req_ids:[8] -DEBUG 06-24 20:19:34 [manager.py:391] -ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:213.4850025177002ms total_cost_time:213.52863311767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9584 prompt_cache_len:5151 prompt_cache_ratio:0.5374582637729549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 -DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:34 [manager.py:224] router recive req id 8 cost time 0.10787844657897949 s -INFO 06-24 20:19:34 [manager.py:68] detokenization recv req id 8 cost time 0.10994791984558105 s -DEBUG 06-24 20:19:34 [manager.py:391] Prefill Batch: batch_id=38372522123268998299215385022443258171, time:1750767574.8116379s req_ids:[8] -DEBUG 06-24 20:19:34 [manager.py:391] -ERROR 06-24 20:19:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:206.8185806274414ms total_cost_time:206.8636417388916ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9585 prompt_cache_len:5151 prompt_cache_ratio:0.5374021909233176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 -DEBUG 06-24 20:19:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10898995399475098 s -INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.1110222339630127 s -DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=5223430205994202387819560736507652473, time:1750767575.0241735s req_ids:[8] -DEBUG 06-24 20:19:35 [manager.py:391] -ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:34 lightllm_req_id:8 first_token_cost:209.60521697998047ms total_cost_time:209.65003967285156ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9586 prompt_cache_len:5151 prompt_cache_ratio:0.537346129772585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 -DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10707974433898926 s -INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.10904955863952637 s -DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=273720352666573770646096172097715243995, time:1750767575.237574s req_ids:[8] -DEBUG 06-24 20:19:35 [manager.py:391] -ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:204.21481132507324ms total_cost_time:204.26058769226074ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9587 prompt_cache_len:5151 prompt_cache_ratio:0.537290080317096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 -DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10780549049377441 s -INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.10979771614074707 s -DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=238753019418245568751878399799239336327, time:1750767575.4485145s req_ids:[8] -DEBUG 06-24 20:19:35 [manager.py:391] -ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:207.18812942504883ms total_cost_time:207.2310447692871ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9588 prompt_cache_len:5151 prompt_cache_ratio:0.5372340425531915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 -DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10790371894836426 s -INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.10988044738769531 s -DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=305836452495562443389262573850058974031, time:1750767575.661738s req_ids:[8] -DEBUG 06-24 20:19:35 [manager.py:391] -ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:208.5120677947998ms total_cost_time:208.5561752319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9589 prompt_cache_len:5151 prompt_cache_ratio:0.5371780164772135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 -DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:35 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s -INFO 06-24 20:19:35 [manager.py:68] detokenization recv req id 8 cost time 0.11011552810668945 s -DEBUG 06-24 20:19:35 [manager.py:391] Prefill Batch: batch_id=181763942477498207314635101086111422094, time:1750767575.875582s req_ids:[8] -DEBUG 06-24 20:19:35 [manager.py:391] -DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:168.32995414733887ms total_cost_time:168.37430000305176ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9590 prompt_cache_len:5151 prompt_cache_ratio:0.5371220020855058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 -DEBUG 06-24 20:19:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.10794878005981445 s -INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.10995316505432129 s -DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=93906630767897306992456986658360172706, time:1750767576.0496395s req_ids:[8] -DEBUG 06-24 20:19:36 [manager.py:391] -ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:35 lightllm_req_id:8 first_token_cost:202.41355895996094ms total_cost_time:202.45838165283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9591 prompt_cache_len:5151 prompt_cache_ratio:0.5370659993744135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 -DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.10797667503356934 s -INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s -DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=303985534831315510848588688480693363609, time:1750767576.2572515s req_ids:[8] -DEBUG 06-24 20:19:36 [manager.py:391] -ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:375.39124488830566ms total_cost_time:375.43749809265137ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9592 prompt_cache_len:5151 prompt_cache_ratio:0.5370100083402836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 -DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.1093149185180664 s -INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.1115114688873291 s -DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=170592797656311299238016900463283825150, time:1750767576.636555s req_ids:[8] -DEBUG 06-24 20:19:36 [manager.py:391] -ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:207.122802734375ms total_cost_time:207.1676254272461ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9593 prompt_cache_len:5151 prompt_cache_ratio:0.5369540289794642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 -DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:36 [manager.py:224] router recive req id 8 cost time 0.10947227478027344 s -INFO 06-24 20:19:36 [manager.py:68] detokenization recv req id 8 cost time 0.11193680763244629 s -DEBUG 06-24 20:19:36 [manager.py:391] Prefill Batch: batch_id=64360228190931432567425016605874837677, time:1750767576.853484s req_ids:[8] -DEBUG 06-24 20:19:36 [manager.py:391] -ERROR 06-24 20:19:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:211.3351821899414ms total_cost_time:211.3807201385498ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9594 prompt_cache_len:5151 prompt_cache_ratio:0.5368980612883052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 -DEBUG 06-24 20:19:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10926604270935059 s -INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.11128616333007812 s -DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=207897350207409508537204026300791898488, time:1750767577.0707061s req_ids:[8] -DEBUG 06-24 20:19:37 [manager.py:391] -ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:36 lightllm_req_id:8 first_token_cost:211.57240867614746ms total_cost_time:211.61603927612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9595 prompt_cache_len:5151 prompt_cache_ratio:0.5368421052631579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 -DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10646939277648926 s -INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10856175422668457 s -DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=151763881482794762220657913381451240919, time:1750767577.2855875s req_ids:[8] -DEBUG 06-24 20:19:37 [manager.py:391] -ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:206.87580108642578ms total_cost_time:206.90035820007324ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:9596 prompt_cache_len:5151 prompt_cache_ratio:0.5367861609003751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 -DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10506772994995117 s -INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10704326629638672 s -DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=51301851881255104305251584834485696610, time:1750767577.5004802s req_ids:[8] -DEBUG 06-24 20:19:37 [manager.py:391] -ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:210.0660800933838ms total_cost_time:210.09039878845215ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:9597 prompt_cache_len:5151 prompt_cache_ratio:0.5367302281963113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 -DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10460305213928223 s -INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10661578178405762 s -DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=236579386352676430611043429926459770089, time:1750767577.7144868s req_ids:[8] -DEBUG 06-24 20:19:37 [manager.py:391] -ERROR 06-24 20:19:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:210.68525314331055ms total_cost_time:210.7095718383789ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:9598 prompt_cache_len:5151 prompt_cache_ratio:0.5366743071473223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 -DEBUG 06-24 20:19:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:37 [manager.py:224] router recive req id 8 cost time 0.10498881340026855 s -INFO 06-24 20:19:37 [manager.py:68] detokenization recv req id 8 cost time 0.10705161094665527 s -DEBUG 06-24 20:19:37 [manager.py:391] Prefill Batch: batch_id=75817467312477508519932669955118119489, time:1750767577.9297729s req_ids:[8] -DEBUG 06-24 20:19:37 [manager.py:391] -ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:37 lightllm_req_id:8 first_token_cost:221.8313217163086ms total_cost_time:221.85373306274414ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:9599 prompt_cache_len:5151 prompt_cache_ratio:0.5366183977497656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 -DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10483479499816895 s -INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.10686635971069336 s -DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=88536805584247665376282388142544233056, time:1750767578.161501s req_ids:[8] -DEBUG 06-24 20:19:38 [manager.py:391] -ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:373.1272220611572ms total_cost_time:373.1505870819092ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:9600 prompt_cache_len:5151 prompt_cache_ratio:0.5365625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 -DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10686826705932617 s -INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.10893988609313965 s -DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=150395789900956025810420455991126483628, time:1750767578.53168s req_ids:[8] -DEBUG 06-24 20:19:38 [manager.py:391] -ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:209.1827392578125ms total_cost_time:209.22613143920898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9601 prompt_cache_len:5151 prompt_cache_ratio:0.536506613894386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 -DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10860419273376465 s -INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.11052918434143066 s -DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=146761882375458639845954091863197012843, time:1750767578.758405s req_ids:[8] -DEBUG 06-24 20:19:38 [manager.py:391] -ERROR 06-24 20:19:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:223.54555130004883ms total_cost_time:223.59132766723633ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9602 prompt_cache_len:5151 prompt_cache_ratio:0.5364507394292856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 -DEBUG 06-24 20:19:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:38 [manager.py:224] router recive req id 8 cost time 0.10777544975280762 s -INFO 06-24 20:19:38 [manager.py:68] detokenization recv req id 8 cost time 0.10980081558227539 s -DEBUG 06-24 20:19:38 [manager.py:391] Prefill Batch: batch_id=159859511184832409921937340587114494483, time:1750767578.9766366s req_ids:[8] -DEBUG 06-24 20:19:38 [manager.py:391] -ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:38 lightllm_req_id:8 first_token_cost:208.113431930542ms total_cost_time:208.15682411193848ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9603 prompt_cache_len:5151 prompt_cache_ratio:0.5363948766010622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 -DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10802173614501953 s -INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.11008429527282715 s -DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=175633452015431345896353213018620224902, time:1750767579.1903057s req_ids:[8] -DEBUG 06-24 20:19:39 [manager.py:391] -ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:208.25505256652832ms total_cost_time:208.2986831665039ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9604 prompt_cache_len:5151 prompt_cache_ratio:0.5363390254060808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 -DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10990667343139648 s -INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.11195731163024902 s -DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=178076400401502766169889305514435031838, time:1750767579.4037645s req_ids:[8] -DEBUG 06-24 20:19:39 [manager.py:391] -ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:208.4963321685791ms total_cost_time:208.5425853729248ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9605 prompt_cache_len:5151 prompt_cache_ratio:0.536283185840708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 -DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s -INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.1104421615600586 s -DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=36459739328261783013923791821027753616, time:1750767579.6179442s req_ids:[8] -DEBUG 06-24 20:19:39 [manager.py:391] -ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:208.81986618041992ms total_cost_time:208.8637351989746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9606 prompt_cache_len:5151 prompt_cache_ratio:0.5362273579013117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 -DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:39 [manager.py:224] router recive req id 8 cost time 0.10804915428161621 s -INFO 06-24 20:19:39 [manager.py:68] detokenization recv req id 8 cost time 0.11017060279846191 s -DEBUG 06-24 20:19:39 [manager.py:391] Prefill Batch: batch_id=18844510223548048568892691055504029337, time:1750767579.8327348s req_ids:[8] -DEBUG 06-24 20:19:39 [manager.py:391] -ERROR 06-24 20:19:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:209.1388702392578ms total_cost_time:209.1834545135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9607 prompt_cache_len:5151 prompt_cache_ratio:0.5361715415842615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 -DEBUG 06-24 20:19:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.10931921005249023 s -INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11142230033874512 s -DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=268007091527590011736570927146725383483, time:1750767580.0461495s req_ids:[8] -DEBUG 06-24 20:19:40 [manager.py:391] -ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:39 lightllm_req_id:8 first_token_cost:377.69269943237305ms total_cost_time:377.73799896240234ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9608 prompt_cache_len:5151 prompt_cache_ratio:0.5361157368859284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 -DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.10853052139282227 s -INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11058163642883301 s -DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=229352083071119222617866018625946274470, time:1750767580.4286027s req_ids:[8] -DEBUG 06-24 20:19:40 [manager.py:391] -ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:201.29752159118652ms total_cost_time:201.3411521911621ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9609 prompt_cache_len:5151 prompt_cache_ratio:0.536059943802685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 -DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.1082611083984375 s -INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11035346984863281 s -DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=31637374943393758850933222485018680898, time:1750767580.6359897s req_ids:[8] -DEBUG 06-24 20:19:40 [manager.py:391] -ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:206.17318153381348ms total_cost_time:206.21728897094727ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9610 prompt_cache_len:5151 prompt_cache_ratio:0.5360041623309053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 -DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:40 [manager.py:224] router recive req id 8 cost time 0.10934758186340332 s -INFO 06-24 20:19:40 [manager.py:68] detokenization recv req id 8 cost time 0.11137151718139648 s -DEBUG 06-24 20:19:40 [manager.py:391] Prefill Batch: batch_id=233943322740278592188911471722080358872, time:1750767580.8497343s req_ids:[8] -DEBUG 06-24 20:19:40 [manager.py:391] -DEBUG 06-24 20:19:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 41928.831 tokens/s -DEBUG 06-24 20:19:40 [stats.py:37] Avg prompt tokens throughput: 41920.187 tokens/s -DEBUG 06-24 20:19:40 [stats.py:37] Avg generate tokens throughput: 8.644 tokens/s -ERROR 06-24 20:19:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:209.73682403564453ms total_cost_time:209.78212356567383ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9611 prompt_cache_len:5151 prompt_cache_ratio:0.5359483924669649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 -DEBUG 06-24 20:19:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10899615287780762 s -INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.11102080345153809 s -DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=185494743997697275557776341276094814104, time:1750767581.064922s req_ids:[8] -DEBUG 06-24 20:19:41 [manager.py:391] -ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:40 lightllm_req_id:8 first_token_cost:208.2674503326416ms total_cost_time:208.30774307250977ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:9612 prompt_cache_len:5151 prompt_cache_ratio:0.535892634207241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 -DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10728740692138672 s -INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.10913562774658203 s -DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=175102617024324890490308687935322662722, time:1750767581.2819605s req_ids:[8] -DEBUG 06-24 20:19:41 [manager.py:391] -ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:207.84687995910645ms total_cost_time:207.89074897766113ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9613 prompt_cache_len:5151 prompt_cache_ratio:0.535836887548112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 -DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10780453681945801 s -INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.1098785400390625 s -DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=269670587385769109448781059973211664034, time:1750767581.4929845s req_ids:[8] -DEBUG 06-24 20:19:41 [manager.py:391] -ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:212.39519119262695ms total_cost_time:212.45145797729492ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:9614 prompt_cache_len:5151 prompt_cache_ratio:0.5357811524859579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 -DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10951662063598633 s -INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.11159110069274902 s -DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=77081444725224209734323118311909887603, time:1750767581.7214305s req_ids:[8] -DEBUG 06-24 20:19:41 [manager.py:391] -DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:218.81890296936035ms total_cost_time:218.86444091796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9615 prompt_cache_len:5151 prompt_cache_ratio:0.5357254290171607 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 -DEBUG 06-24 20:19:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:41 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s -INFO 06-24 20:19:41 [manager.py:68] detokenization recv req id 8 cost time 0.11015987396240234 s -DEBUG 06-24 20:19:41 [manager.py:391] Prefill Batch: batch_id=11777221909152026451009235240742205842, time:1750767581.9367328s req_ids:[8] -DEBUG 06-24 20:19:41 [manager.py:391] -ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:41 lightllm_req_id:8 first_token_cost:410.8567237854004ms total_cost_time:410.9020233154297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9616 prompt_cache_len:5151 prompt_cache_ratio:0.5356697171381032 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 -DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.107452392578125 s -INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10946321487426758 s -DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=248237562646056106519644143213199493711, time:1750767582.3506112s req_ids:[8] -DEBUG 06-24 20:19:42 [manager.py:391] -ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:183.5329532623291ms total_cost_time:183.577299118042ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9617 prompt_cache_len:5151 prompt_cache_ratio:0.53561401684517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 -DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.10807561874389648 s -INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10997867584228516 s -DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=1332380366734783349342886585031602093, time:1750767582.5448482s req_ids:[8] -DEBUG 06-24 20:19:42 [manager.py:391] -ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:207.78918266296387ms total_cost_time:207.83352851867676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9618 prompt_cache_len:5151 prompt_cache_ratio:0.5355583281347474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 -DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.10608553886413574 s -INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10804009437561035 s -DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=255841502713761468798142083990519807549, time:1750767582.7551804s req_ids:[8] -DEBUG 06-24 20:19:42 [manager.py:391] -ERROR 06-24 20:19:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:207.29851722717285ms total_cost_time:207.3218822479248ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:9619 prompt_cache_len:5151 prompt_cache_ratio:0.5355026510032228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 -DEBUG 06-24 20:19:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:42 [manager.py:224] router recive req id 8 cost time 0.1047048568725586 s -INFO 06-24 20:19:42 [manager.py:68] detokenization recv req id 8 cost time 0.10686159133911133 s -DEBUG 06-24 20:19:42 [manager.py:391] Prefill Batch: batch_id=252199975673626752452097158141448863182, time:1750767582.9680789s req_ids:[8] -DEBUG 06-24 20:19:42 [manager.py:391] -ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:42 lightllm_req_id:8 first_token_cost:209.11073684692383ms total_cost_time:209.13338661193848ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:9620 prompt_cache_len:5151 prompt_cache_ratio:0.5354469854469854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 -DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10455536842346191 s -INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10656976699829102 s -DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=103296147603089221884845970984593799546, time:1750767583.1825933s req_ids:[8] -DEBUG 06-24 20:19:43 [manager.py:391] -ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:210.6165885925293ms total_cost_time:210.63876152038574ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:9621 prompt_cache_len:5151 prompt_cache_ratio:0.5353913314624259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 -DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10435891151428223 s -INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10607600212097168 s -DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=68497284328355858332725188410175934439, time:1750767583.400153s req_ids:[8] -DEBUG 06-24 20:19:43 [manager.py:391] -ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:169.81887817382812ms total_cost_time:169.84105110168457ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:9622 prompt_cache_len:5151 prompt_cache_ratio:0.5353356890459364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 -DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.1042783260345459 s -INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10596060752868652 s -DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=32377755342894174844317467030654539845, time:1750767583.5694542s req_ids:[8] -DEBUG 06-24 20:19:43 [manager.py:391] -ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:168.3330535888672ms total_cost_time:168.37835311889648ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9623 prompt_cache_len:5151 prompt_cache_ratio:0.5352800581939104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 -DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10821843147277832 s -INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10990452766418457 s -DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=167175564499737749048255909064311163788, time:1750767583.7411168s req_ids:[8] -DEBUG 06-24 20:19:43 [manager.py:391] -ERROR 06-24 20:19:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:193.86601448059082ms total_cost_time:193.91369819641113ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:9624 prompt_cache_len:5151 prompt_cache_ratio:0.5352244389027432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 -DEBUG 06-24 20:19:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:43 [manager.py:224] router recive req id 8 cost time 0.10806441307067871 s -INFO 06-24 20:19:43 [manager.py:68] detokenization recv req id 8 cost time 0.10993766784667969 s -DEBUG 06-24 20:19:43 [manager.py:391] Prefill Batch: batch_id=107380764574381396375119990113434129122, time:1750767583.942508s req_ids:[8] -DEBUG 06-24 20:19:43 [manager.py:391] -ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:43 lightllm_req_id:8 first_token_cost:372.9238510131836ms total_cost_time:372.9679584503174ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9625 prompt_cache_len:5151 prompt_cache_ratio:0.5351688311688312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 -DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.1083076000213623 s -INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.11017656326293945 s -DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=119787481166582829081045979957857650818, time:1750767584.321902s req_ids:[8] -DEBUG 06-24 20:19:44 [manager.py:391] -ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:201.8585205078125ms total_cost_time:201.9050121307373ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9626 prompt_cache_len:5151 prompt_cache_ratio:0.5351132349885727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 -DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.1086740493774414 s -INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.11048316955566406 s -DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=190169904185174574302182704166990217707, time:1750767584.5300026s req_ids:[8] -DEBUG 06-24 20:19:44 [manager.py:391] -ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:203.71437072753906ms total_cost_time:203.7365436553955ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:9627 prompt_cache_len:5151 prompt_cache_ratio:0.535057650358367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 -DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.10483598709106445 s -INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.10669517517089844 s -DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=81192098810479394604549223639369035146, time:1750767584.741568s req_ids:[8] -DEBUG 06-24 20:19:44 [manager.py:391] -ERROR 06-24 20:19:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:209.31100845336914ms total_cost_time:209.35797691345215ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:9628 prompt_cache_len:5151 prompt_cache_ratio:0.5350020772746157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 -DEBUG 06-24 20:19:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:44 [manager.py:224] router recive req id 8 cost time 0.1078801155090332 s -INFO 06-24 20:19:44 [manager.py:68] detokenization recv req id 8 cost time 0.11005592346191406 s -DEBUG 06-24 20:19:44 [manager.py:391] Prefill Batch: batch_id=90929364060730447218088175153487262607, time:1750767584.9540465s req_ids:[8] -DEBUG 06-24 20:19:44 [manager.py:391] -ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:44 lightllm_req_id:8 first_token_cost:207.81493186950684ms total_cost_time:207.85856246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9629 prompt_cache_len:5151 prompt_cache_ratio:0.5349465157337211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 -DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:45 [batch.py:51] router release req id 8 -INFO 06-24 20:19:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.10886859893798828 s -INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.11109542846679688 s -DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=266265460499678806136626940419325990771, time:1750767585.169764s req_ids:[8] -DEBUG 06-24 20:19:45 [manager.py:391] -ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:209.98835563659668ms total_cost_time:210.03103256225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9630 prompt_cache_len:5151 prompt_cache_ratio:0.5348909657320873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 -DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.10800457000732422 s -INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.10962057113647461 s -DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=26206371376692073210248424946010359137, time:1750767585.385586s req_ids:[8] -DEBUG 06-24 20:19:45 [manager.py:391] -ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:208.46319198608398ms total_cost_time:208.52160453796387ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:9631 prompt_cache_len:5151 prompt_cache_ratio:0.5348354272661198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 -DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.10795426368713379 s -INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.10982060432434082 s -DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=247775928449296558252822840896997218541, time:1750767585.5987737s req_ids:[8] -DEBUG 06-24 20:19:45 [manager.py:391] -ERROR 06-24 20:19:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:209.81264114379883ms total_cost_time:209.8560333251953ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9632 prompt_cache_len:5151 prompt_cache_ratio:0.5347799003322259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 -DEBUG 06-24 20:19:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:45 [manager.py:224] router recive req id 8 cost time 0.1079416275024414 s -INFO 06-24 20:19:45 [manager.py:68] detokenization recv req id 8 cost time 0.10979652404785156 s -DEBUG 06-24 20:19:45 [manager.py:391] Prefill Batch: batch_id=82041442209767478687089909084370622680, time:1750767585.8140917s req_ids:[8] -DEBUG 06-24 20:19:45 [manager.py:391] -ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:45 lightllm_req_id:8 first_token_cost:393.59259605407715ms total_cost_time:393.6350345611572ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9633 prompt_cache_len:5151 prompt_cache_ratio:0.5347243849268141 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 -DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.10912013053894043 s -INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.11094117164611816 s -DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=10816723901541761915552143528320509050, time:1750767586.2128546s req_ids:[8] -DEBUG 06-24 20:19:46 [manager.py:391] -ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:206.2673568725586ms total_cost_time:206.3138484954834ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9634 prompt_cache_len:5151 prompt_cache_ratio:0.5346688810462944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 -DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.1077127456665039 s -INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.10964798927307129 s -DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=219409397432468855112201426791909145260, time:1750767586.424971s req_ids:[8] -DEBUG 06-24 20:19:46 [manager.py:391] -ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:204.9872875213623ms total_cost_time:205.0333023071289ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9635 prompt_cache_len:5151 prompt_cache_ratio:0.5346133886870783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 -DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.108245849609375 s -INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s -DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=61213992967660673274579254354653416875, time:1750767586.638555s req_ids:[8] -DEBUG 06-24 20:19:46 [manager.py:391] -ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:211.49206161499023ms total_cost_time:211.53569221496582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9636 prompt_cache_len:5151 prompt_cache_ratio:0.5345579078455791 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 -DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:46 [manager.py:224] router recive req id 8 cost time 0.10981345176696777 s -INFO 06-24 20:19:46 [manager.py:68] detokenization recv req id 8 cost time 0.11157822608947754 s -DEBUG 06-24 20:19:46 [manager.py:391] Prefill Batch: batch_id=82216709151957465245689891190725870592, time:1750767586.8546174s req_ids:[8] -DEBUG 06-24 20:19:46 [manager.py:391] -ERROR 06-24 20:19:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:168.28155517578125ms total_cost_time:168.32375526428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9637 prompt_cache_len:5151 prompt_cache_ratio:0.5345024385182111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 -DEBUG 06-24 20:19:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:46 [batch.py:51] router release req id 8 -INFO 06-24 20:19:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.10820412635803223 s -INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.10933494567871094 s -DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=5909871157666746440511889913313135747, time:1750767587.0280712s req_ids:[8] -DEBUG 06-24 20:19:47 [manager.py:391] -ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:46 lightllm_req_id:8 first_token_cost:199.72777366638184ms total_cost_time:199.7697353363037ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9638 prompt_cache_len:5151 prompt_cache_ratio:0.5344469807013903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 -DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.10823774337768555 s -INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s -DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=338969082829078649723663384628191990768, time:1750767587.2337573s req_ids:[8] -DEBUG 06-24 20:19:47 [manager.py:391] -ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:205.85250854492188ms total_cost_time:205.89685440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9639 prompt_cache_len:5151 prompt_cache_ratio:0.5343915343915344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 -DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.10940694808959961 s -INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.11127281188964844 s -DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=77075791345459186315944657609144642559, time:1750767587.445883s req_ids:[8] -DEBUG 06-24 20:19:47 [manager.py:391] -DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:209.7799777984619ms total_cost_time:209.8245620727539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9640 prompt_cache_len:5151 prompt_cache_ratio:0.5343360995850622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 -DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:47 [manager.py:224] router recive req id 8 cost time 0.30974411964416504 s -INFO 06-24 20:19:47 [manager.py:68] detokenization recv req id 8 cost time 0.3116919994354248 s -DEBUG 06-24 20:19:47 [manager.py:391] Prefill Batch: batch_id=337176811945769070226098979939527429647, time:1750767587.8643425s req_ids:[8] -DEBUG 06-24 20:19:47 [manager.py:391] -ERROR 06-24 20:19:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:414.9456024169922ms total_cost_time:414.9911403656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9641 prompt_cache_len:5151 prompt_cache_ratio:0.5342806762783944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 -DEBUG 06-24 20:19:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10763859748840332 s -INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.10981607437133789 s -DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=277845337380524985396413421470870837342, time:1750767588.082584s req_ids:[8] -DEBUG 06-24 20:19:48 [manager.py:391] -ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:47 lightllm_req_id:8 first_token_cost:208.3914279937744ms total_cost_time:208.4345817565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9642 prompt_cache_len:5151 prompt_cache_ratio:0.5342252644679527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 -DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10906314849853516 s -INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.1110994815826416 s -DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=178330296721344943658068439239499366502, time:1750767588.2978003s req_ids:[8] -DEBUG 06-24 20:19:48 [manager.py:391] -ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:208.81414413452148ms total_cost_time:208.8615894317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9643 prompt_cache_len:5151 prompt_cache_ratio:0.5341698641501608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 -DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.1104133129119873 s -INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.11239743232727051 s -DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=328505392454928217944061255683414460129, time:1750767588.5152035s req_ids:[8] -DEBUG 06-24 20:19:48 [manager.py:391] -ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:210.96515655517578ms total_cost_time:211.0116481781006ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9644 prompt_cache_len:5151 prompt_cache_ratio:0.5341144753214434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 -DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10952258110046387 s -INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.11135435104370117 s -DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=240716525428923366772979994225780464591, time:1750767588.7297585s req_ids:[8] -DEBUG 06-24 20:19:48 [manager.py:391] -ERROR 06-24 20:19:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:204.4689655303955ms total_cost_time:204.55241203308105ms,out_token_counter:1 mean_per_token_cost_time: 0.08344650268554688ms prompt_token_num:9645 prompt_cache_len:5151 prompt_cache_ratio:0.534059097978227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 -DEBUG 06-24 20:19:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:48 [manager.py:224] router recive req id 8 cost time 0.10913419723510742 s -INFO 06-24 20:19:48 [manager.py:68] detokenization recv req id 8 cost time 0.1110525131225586 s -DEBUG 06-24 20:19:48 [manager.py:391] Prefill Batch: batch_id=105953088862040771555172251900164209198, time:1750767588.9494414s req_ids:[8] -DEBUG 06-24 20:19:48 [manager.py:391] -DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:48 lightllm_req_id:8 first_token_cost:214.10703659057617ms total_cost_time:214.15328979492188ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9646 prompt_cache_len:5151 prompt_cache_ratio:0.5340037321169396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 -DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10718011856079102 s -INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.1092064380645752 s -DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=5793026601148823038621048170859671440, time:1750767589.1635518s req_ids:[8] -DEBUG 06-24 20:19:49 [manager.py:391] -ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:207.75079727172852ms total_cost_time:207.7934741973877ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9647 prompt_cache_len:5151 prompt_cache_ratio:0.5339483777340106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 -DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10806703567504883 s -INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.11015439033508301 s -DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=189290661953232890015189260527305789059, time:1750767589.3778455s req_ids:[8] -DEBUG 06-24 20:19:49 [manager.py:391] -ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:378.7095546722412ms total_cost_time:378.7548542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9648 prompt_cache_len:5151 prompt_cache_ratio:0.5338930348258707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 -DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10819745063781738 s -INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.10997843742370605 s -DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=108538101536752074888691480378199928414, time:1750767589.75791s req_ids:[8] -DEBUG 06-24 20:19:49 [manager.py:391] -ERROR 06-24 20:19:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:205.44004440307617ms total_cost_time:205.48391342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9649 prompt_cache_len:5151 prompt_cache_ratio:0.5338377033889522 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 -DEBUG 06-24 20:19:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:49 [manager.py:224] router recive req id 8 cost time 0.10827207565307617 s -INFO 06-24 20:19:49 [manager.py:68] detokenization recv req id 8 cost time 0.11020135879516602 s -DEBUG 06-24 20:19:49 [manager.py:391] Prefill Batch: batch_id=91914723675173458180240889002944990615, time:1750767589.9707835s req_ids:[8] -DEBUG 06-24 20:19:49 [manager.py:391] -ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:49 lightllm_req_id:8 first_token_cost:205.17802238464355ms total_cost_time:205.23571968078613ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:9650 prompt_cache_len:5151 prompt_cache_ratio:0.5337823834196891 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 -DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.1084902286529541 s -INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.1105508804321289 s -DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=118622405010069062828969351452705723509, time:1750767590.1838999s req_ids:[8] -DEBUG 06-24 20:19:50 [manager.py:391] -ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:208.56785774230957ms total_cost_time:208.61220359802246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9651 prompt_cache_len:5151 prompt_cache_ratio:0.5337270749145167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 -DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.1076650619506836 s -INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.10958623886108398 s -DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=88527693400179694043787640988555732381, time:1750767590.3957908s req_ids:[8] -DEBUG 06-24 20:19:50 [manager.py:391] -ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:165.15398025512695ms total_cost_time:165.19594192504883ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9652 prompt_cache_len:5151 prompt_cache_ratio:0.5336717778698715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 -DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.10805988311767578 s -INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.11009454727172852 s -DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=238477254660501812556750645205057513737, time:1750767590.5671477s req_ids:[8] -DEBUG 06-24 20:19:50 [manager.py:391] -ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:198.6837387084961ms total_cost_time:198.72713088989258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9653 prompt_cache_len:5151 prompt_cache_ratio:0.5336164922821921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 -DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.10936832427978516 s -INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.11141037940979004 s -DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=120190807129728072917122877568634250347, time:1750767590.7731225s req_ids:[8] -DEBUG 06-24 20:19:50 [manager.py:391] -ERROR 06-24 20:19:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:19:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 41386.095 tokens/s -DEBUG 06-24 20:19:50 [stats.py:37] Avg prompt tokens throughput: 41377.404 tokens/s -DEBUG 06-24 20:19:50 [stats.py:37] Avg generate tokens throughput: 8.691 tokens/s -INFO 06-24 20:19:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:205.95550537109375ms total_cost_time:205.99842071533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9654 prompt_cache_len:5151 prompt_cache_ratio:0.533561218147918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 -DEBUG 06-24 20:19:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:50 [manager.py:224] router recive req id 8 cost time 0.10737466812133789 s -INFO 06-24 20:19:50 [manager.py:68] detokenization recv req id 8 cost time 0.11003565788269043 s -DEBUG 06-24 20:19:50 [manager.py:391] Prefill Batch: batch_id=289606991163884517033352215602545557612, time:1750767590.9866502s req_ids:[8] -DEBUG 06-24 20:19:50 [manager.py:391] -ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:50 lightllm_req_id:8 first_token_cost:205.413818359375ms total_cost_time:205.4600715637207ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9655 prompt_cache_len:5151 prompt_cache_ratio:0.5335059554634904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 -DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:51 [manager.py:224] router recive req id 8 cost time 0.10761260986328125 s -INFO 06-24 20:19:51 [manager.py:68] detokenization recv req id 8 cost time 0.10953593254089355 s -DEBUG 06-24 20:19:51 [manager.py:391] Prefill Batch: batch_id=252240909737154361592616327307883508517, time:1750767591.1984615s req_ids:[8] -DEBUG 06-24 20:19:51 [manager.py:391] -ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:208.1279754638672ms total_cost_time:208.17279815673828ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9656 prompt_cache_len:5151 prompt_cache_ratio:0.5334507042253521 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 -DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:51 [manager.py:224] router recive req id 8 cost time 0.10926938056945801 s -INFO 06-24 20:19:51 [manager.py:68] detokenization recv req id 8 cost time 0.11123514175415039 s -DEBUG 06-24 20:19:51 [manager.py:391] Prefill Batch: batch_id=55450587156414847949319668924742244432, time:1750767591.4135814s req_ids:[8] -DEBUG 06-24 20:19:51 [manager.py:391] -ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:379.9633979797363ms total_cost_time:380.01084327697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9657 prompt_cache_len:5151 prompt_cache_ratio:0.5333954644299472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 -DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:51 [manager.py:224] router recive req id 8 cost time 0.10783934593200684 s -INFO 06-24 20:19:51 [manager.py:68] detokenization recv req id 8 cost time 0.10993671417236328 s -DEBUG 06-24 20:19:51 [manager.py:391] Prefill Batch: batch_id=289482793539444389640029991272583324012, time:1750767591.796596s req_ids:[8] -DEBUG 06-24 20:19:51 [manager.py:391] -ERROR 06-24 20:19:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:206.32386207580566ms total_cost_time:206.36796951293945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9658 prompt_cache_len:5151 prompt_cache_ratio:0.5333402360737213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 -DEBUG 06-24 20:19:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10834980010986328 s -INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.11035370826721191 s -DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=274274551328341439320814498529382498710, time:1750767592.0126216s req_ids:[8] -DEBUG 06-24 20:19:52 [manager.py:391] -ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:51 lightllm_req_id:8 first_token_cost:210.9549045562744ms total_cost_time:210.9990119934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9659 prompt_cache_len:5151 prompt_cache_ratio:0.5332850191531214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 -DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10761070251464844 s -INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.10961461067199707 s -DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=290258757460850802994861219246012240054, time:1750767592.2294054s req_ids:[8] -DEBUG 06-24 20:19:52 [manager.py:391] -ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:211.78531646728516ms total_cost_time:211.83037757873535ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9660 prompt_cache_len:5151 prompt_cache_ratio:0.5332298136645963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 -DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10919737815856934 s -INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.11133623123168945 s -DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=4057051399639971067341323173359020704, time:1750767592.4452207s req_ids:[8] -DEBUG 06-24 20:19:52 [manager.py:391] -ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:208.7228298187256ms total_cost_time:208.7686061859131ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9661 prompt_cache_len:5151 prompt_cache_ratio:0.5331746196045958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 -DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.10785746574401855 s -INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.10957622528076172 s -DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=257679984017375814772329898543858974518, time:1750767592.6610968s req_ids:[8] -DEBUG 06-24 20:19:52 [manager.py:391] -ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:211.94052696228027ms total_cost_time:211.9905948638916ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:9662 prompt_cache_len:5151 prompt_cache_ratio:0.5331194369695715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 -DEBUG 06-24 20:19:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:52 [manager.py:224] router recive req id 8 cost time 0.1080777645111084 s -INFO 06-24 20:19:52 [manager.py:68] detokenization recv req id 8 cost time 0.10955238342285156 s -DEBUG 06-24 20:19:52 [manager.py:391] Prefill Batch: batch_id=43310941231281132321980867946359944188, time:1750767592.8800669s req_ids:[8] -DEBUG 06-24 20:19:52 [manager.py:391] -ERROR 06-24 20:19:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:209.50651168823242ms total_cost_time:209.55395698547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:9663 prompt_cache_len:5151 prompt_cache_ratio:0.5330642657559764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 -DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.10901665687561035 s -INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11037921905517578 s -DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=183327004959039561849445538396153208268, time:1750767593.0960507s req_ids:[8] -DEBUG 06-24 20:19:53 [manager.py:391] -ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:52 lightllm_req_id:8 first_token_cost:210.74843406677246ms total_cost_time:210.79182624816895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9664 prompt_cache_len:5151 prompt_cache_ratio:0.5330091059602649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 -INFO 06-24 20:19:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:19:53 [statics_utils.py:24] mean first cost: 228.4807292764012 ms -INFO 06-24 20:19:53 [statics_utils.py:24] mean per token cost: 0.07096020363066266 ms -DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.10826587677001953 s -INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11018848419189453 s -DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=61534437760081381791483118735268215302, time:1750767593.311766s req_ids:[8] -DEBUG 06-24 20:19:53 [manager.py:391] -ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:370.9876537322998ms total_cost_time:371.0362911224365ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:9665 prompt_cache_len:5151 prompt_cache_ratio:0.532953957578893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 -DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.1088414192199707 s -INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11076903343200684 s -DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=116500071940017242191780697842006230253, time:1750767593.6870768s req_ids:[8] -DEBUG 06-24 20:19:53 [manager.py:391] -ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:205.92212677001953ms total_cost_time:205.9652805328369ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9666 prompt_cache_len:5151 prompt_cache_ratio:0.5328988206083178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 -DEBUG 06-24 20:19:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:53 [manager.py:224] router recive req id 8 cost time 0.10887527465820312 s -INFO 06-24 20:19:53 [manager.py:68] detokenization recv req id 8 cost time 0.11103081703186035 s -DEBUG 06-24 20:19:53 [manager.py:391] Prefill Batch: batch_id=30854642515604314840476333541084311588, time:1750767593.9008296s req_ids:[8] -DEBUG 06-24 20:19:53 [manager.py:391] -ERROR 06-24 20:19:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:207.45110511779785ms total_cost_time:207.49568939208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9667 prompt_cache_len:5151 prompt_cache_ratio:0.5328436950449984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 -DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10738492012023926 s -INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.10953545570373535 s -DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=226836570111958039524872607850970301475, time:1750767594.1134188s req_ids:[8] -DEBUG 06-24 20:19:54 [manager.py:391] -ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:53 lightllm_req_id:8 first_token_cost:205.88994026184082ms total_cost_time:205.9330940246582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9668 prompt_cache_len:5151 prompt_cache_ratio:0.5327885808853952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 -DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10756158828735352 s -INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s -DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=82264845266696754626794600739316367045, time:1750767594.3326564s req_ids:[8] -DEBUG 06-24 20:19:54 [manager.py:391] -ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:214.68544006347656ms total_cost_time:214.72930908203125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9669 prompt_cache_len:5151 prompt_cache_ratio:0.5327334781259696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 -DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10922956466674805 s -INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.1119394302368164 s -DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=11008737174155588908555256577496295557, time:1750767594.5472918s req_ids:[8] -DEBUG 06-24 20:19:54 [manager.py:391] -ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:207.41844177246094ms total_cost_time:207.46326446533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9670 prompt_cache_len:5151 prompt_cache_ratio:0.5326783867631851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 -DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10850667953491211 s -INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.11050701141357422 s -DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=33699697577897786755012014422252778798, time:1750767594.7625592s req_ids:[8] -DEBUG 06-24 20:19:54 [manager.py:391] -DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:19:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:19:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:213.31238746643066ms total_cost_time:213.35816383361816ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9671 prompt_cache_len:5151 prompt_cache_ratio:0.5326233067935063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 -DEBUG 06-24 20:19:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:54 [manager.py:224] router recive req id 8 cost time 0.10765314102172852 s -INFO 06-24 20:19:54 [manager.py:68] detokenization recv req id 8 cost time 0.10915923118591309 s -DEBUG 06-24 20:19:54 [manager.py:391] Prefill Batch: batch_id=257682736714229978696341492184405662504, time:1750767594.980271s req_ids:[8] -DEBUG 06-24 20:19:54 [manager.py:391] -ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:54 lightllm_req_id:8 first_token_cost:208.3144187927246ms total_cost_time:208.357572555542ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9672 prompt_cache_len:5151 prompt_cache_ratio:0.5325682382133995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 -DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:55 [manager.py:224] router recive req id 8 cost time 0.10796546936035156 s -INFO 06-24 20:19:55 [manager.py:68] detokenization recv req id 8 cost time 0.1099393367767334 s -DEBUG 06-24 20:19:55 [manager.py:391] Prefill Batch: batch_id=122103183999943483690161780774362462390, time:1750767595.1965408s req_ids:[8] -DEBUG 06-24 20:19:55 [manager.py:391] -ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:378.44347953796387ms total_cost_time:378.50141525268555ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:9673 prompt_cache_len:5151 prompt_cache_ratio:0.5325131810193322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 -DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:55 [manager.py:224] router recive req id 8 cost time 0.10935091972351074 s -INFO 06-24 20:19:55 [manager.py:68] detokenization recv req id 8 cost time 0.11134171485900879 s -DEBUG 06-24 20:19:55 [manager.py:391] Prefill Batch: batch_id=151140804814309667086523599918860505684, time:1750767595.5769887s req_ids:[8] -DEBUG 06-24 20:19:55 [manager.py:391] -ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:204.6210765838623ms total_cost_time:204.6670913696289ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9674 prompt_cache_len:5151 prompt_cache_ratio:0.5324581352077734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 -DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:55 [manager.py:224] router recive req id 8 cost time 0.10819649696350098 s -INFO 06-24 20:19:55 [manager.py:68] detokenization recv req id 8 cost time 0.1100771427154541 s -DEBUG 06-24 20:19:55 [manager.py:391] Prefill Batch: batch_id=196112063531861301829800378987176710025, time:1750767595.8050344s req_ids:[8] -DEBUG 06-24 20:19:55 [manager.py:391] -ERROR 06-24 20:19:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:222.7628231048584ms total_cost_time:222.80573844909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9675 prompt_cache_len:5151 prompt_cache_ratio:0.5324031007751938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 -DEBUG 06-24 20:19:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.108245849609375 s -INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.1094820499420166 s -DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=49368387533370030219862401609699854618, time:1750767596.0219862s req_ids:[8] -DEBUG 06-24 20:19:56 [manager.py:391] -ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:55 lightllm_req_id:8 first_token_cost:212.30173110961914ms total_cost_time:212.34607696533203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9676 prompt_cache_len:5151 prompt_cache_ratio:0.5323480777180654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 -DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10780096054077148 s -INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.1098487377166748 s -DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=291426017437452501690655650071724354074, time:1750767596.2438524s req_ids:[8] -DEBUG 06-24 20:19:56 [manager.py:391] -ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:204.5419216156006ms total_cost_time:204.58412170410156ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9677 prompt_cache_len:5151 prompt_cache_ratio:0.5322930660328614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 -DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10704183578491211 s -INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.10912060737609863 s -DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=132331023165531569777583027104103853008, time:1750767596.4543126s req_ids:[8] -DEBUG 06-24 20:19:56 [manager.py:391] -ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:210.95538139343262ms total_cost_time:210.9990119934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9678 prompt_cache_len:5151 prompt_cache_ratio:0.532238065716057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 -DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10843658447265625 s -INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.11041522026062012 s -DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=105535843362200375441152399740705343610, time:1750767596.6680608s req_ids:[8] -DEBUG 06-24 20:19:56 [manager.py:391] -ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:209.0318202972412ms total_cost_time:209.0766429901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9679 prompt_cache_len:5151 prompt_cache_ratio:0.5321830767641286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 -DEBUG 06-24 20:19:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:56 [manager.py:224] router recive req id 8 cost time 0.10870575904846191 s -INFO 06-24 20:19:56 [manager.py:68] detokenization recv req id 8 cost time 0.11064457893371582 s -DEBUG 06-24 20:19:56 [manager.py:391] Prefill Batch: batch_id=316450409325792394578830814589121240415, time:1750767596.8813577s req_ids:[8] -DEBUG 06-24 20:19:56 [manager.py:391] -ERROR 06-24 20:19:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:208.2517147064209ms total_cost_time:208.2967758178711ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9680 prompt_cache_len:5151 prompt_cache_ratio:0.5321280991735537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 -DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.10721921920776367 s -INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.10911226272583008 s -DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=151435867354600278596016892986945710528, time:1750767597.0956845s req_ids:[8] -DEBUG 06-24 20:19:57 [manager.py:391] -ERROR 06-24 20:19:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:56 lightllm_req_id:8 first_token_cost:378.7839412689209ms total_cost_time:378.8266181945801ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9681 prompt_cache_len:5151 prompt_cache_ratio:0.5320731329408119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 -DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.10887265205383301 s -INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.11090707778930664 s -DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=216428318150012989131646076857912083580, time:1750767597.4785457s req_ids:[8] -DEBUG 06-24 20:19:57 [manager.py:391] -ERROR 06-24 20:19:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 first_token_cost:207.51571655273438ms total_cost_time:207.55982398986816ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9682 prompt_cache_len:5151 prompt_cache_ratio:0.5320181780623838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 -DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.1089317798614502 s -INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.11025166511535645 s -DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=284942597387553562974535114275199676351, time:1750767597.6952114s req_ids:[8] -DEBUG 06-24 20:19:57 [manager.py:391] -ERROR 06-24 20:19:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 first_token_cost:210.41607856750488ms total_cost_time:210.46161651611328ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9683 prompt_cache_len:5151 prompt_cache_ratio:0.5319632345347516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 -DEBUG 06-24 20:19:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:57 [manager.py:224] router recive req id 8 cost time 0.10808873176574707 s -INFO 06-24 20:19:57 [manager.py:68] detokenization recv req id 8 cost time 0.10999822616577148 s -DEBUG 06-24 20:19:57 [manager.py:391] Prefill Batch: batch_id=314745124465930407969631336400647724769, time:1750767597.9147997s req_ids:[8] -DEBUG 06-24 20:19:57 [manager.py:391] -ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:57 lightllm_req_id:8 first_token_cost:212.68510818481445ms total_cost_time:212.73088455200195ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9684 prompt_cache_len:5151 prompt_cache_ratio:0.531908302354399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 -DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10717177391052246 s -INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.1090695858001709 s -DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=298352208137805440241609976595646173245, time:1750767598.1310885s req_ids:[8] -DEBUG 06-24 20:19:58 [manager.py:391] -ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:211.0593318939209ms total_cost_time:211.1051082611084ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9685 prompt_cache_len:5151 prompt_cache_ratio:0.531853381517811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 -DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10740971565246582 s -INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.10944247245788574 s -DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=193257439957698840387376032147910334337, time:1750767598.3481166s req_ids:[8] -DEBUG 06-24 20:19:58 [manager.py:391] -ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:215.25239944458008ms total_cost_time:215.29483795166016ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9686 prompt_cache_len:5151 prompt_cache_ratio:0.5317984720214743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 -DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10818743705749512 s -INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.11005377769470215 s -DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=112530607752741615985389883147609090559, time:1750767598.5670755s req_ids:[8] -DEBUG 06-24 20:19:58 [manager.py:391] -ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:211.22503280639648ms total_cost_time:211.26937866210938ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9687 prompt_cache_len:5151 prompt_cache_ratio:0.5317435738618768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 -DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10843396186828613 s -INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052417755126953 s -DEBUG 06-24 20:19:58 [manager.py:391] Prefill Batch: batch_id=88414819670413727865434900862166015933, time:1750767598.7858176s req_ids:[8] -DEBUG 06-24 20:19:58 [manager.py:391] -ERROR 06-24 20:19:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:211.15612983703613ms total_cost_time:211.20142936706543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9688 prompt_cache_len:5151 prompt_cache_ratio:0.5316886870355079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 -DEBUG 06-24 20:19:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:58 [manager.py:224] router recive req id 8 cost time 0.10820984840393066 s -INFO 06-24 20:19:58 [manager.py:68] detokenization recv req id 8 cost time 0.11040639877319336 s -DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=99584855930640605521930884488617686947, time:1750767599.0019405s req_ids:[8] -DEBUG 06-24 20:19:59 [manager.py:391] -ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:58 lightllm_req_id:8 first_token_cost:210.21771430969238ms total_cost_time:210.26134490966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9689 prompt_cache_len:5151 prompt_cache_ratio:0.5316338115388585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 -DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:59 [batch.py:51] router release req id 8 -INFO 06-24 20:19:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:59 [manager.py:224] router recive req id 8 cost time 0.10815572738647461 s -INFO 06-24 20:19:59 [manager.py:68] detokenization recv req id 8 cost time 0.11027741432189941 s -DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=3002714152209460191439921782906540841, time:1750767599.2250035s req_ids:[8] -DEBUG 06-24 20:19:59 [manager.py:391] -ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:420.9277629852295ms total_cost_time:420.9721088409424ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9690 prompt_cache_len:5151 prompt_cache_ratio:0.531578947368421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 -DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:59 [manager.py:224] router recive req id 8 cost time 0.10848808288574219 s -INFO 06-24 20:19:59 [manager.py:68] detokenization recv req id 8 cost time 0.11047148704528809 s -DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=28300136114432458160799638570859105825, time:1750767599.6430626s req_ids:[8] -DEBUG 06-24 20:19:59 [manager.py:391] -ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:203.93657684326172ms total_cost_time:203.9799690246582ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9691 prompt_cache_len:5151 prompt_cache_ratio:0.5315240945206893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 -DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:19:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:19:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:19:59 [manager.py:224] router recive req id 8 cost time 0.10787534713745117 s -INFO 06-24 20:19:59 [manager.py:68] detokenization recv req id 8 cost time 0.1099238395690918 s -DEBUG 06-24 20:19:59 [manager.py:391] Prefill Batch: batch_id=294119150604922861310329704527725479104, time:1750767599.8577213s req_ids:[8] -DEBUG 06-24 20:19:59 [manager.py:391] -ERROR 06-24 20:19:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:19:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:19:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:215.35849571228027ms total_cost_time:215.41595458984375ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:9692 prompt_cache_len:5151 prompt_cache_ratio:0.5314692529921585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:19:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 -DEBUG 06-24 20:19:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:19:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:19:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:19:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:19:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10817480087280273 s -INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.11006760597229004 s -DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=278389338409202568406036434146731462022, time:1750767600.07603s req_ids:[8] -DEBUG 06-24 20:20:00 [manager.py:391] -ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:19:59 lightllm_req_id:8 first_token_cost:201.32923126220703ms total_cost_time:201.3709545135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9693 prompt_cache_len:5151 prompt_cache_ratio:0.5314144227793253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 -DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10808563232421875 s -INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.11000180244445801 s -DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=207063587305631109318568896515575232334, time:1750767600.2866547s req_ids:[8] -DEBUG 06-24 20:20:00 [manager.py:391] -ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:211.42959594726562ms total_cost_time:211.4734649658203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9694 prompt_cache_len:5151 prompt_cache_ratio:0.5313596038786879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 -DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10993242263793945 s -INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.11201906204223633 s -DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=286798964550320642445133678155355526126, time:1750767600.5044332s req_ids:[8] -DEBUG 06-24 20:20:00 [manager.py:391] -ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:210.83378791809082ms total_cost_time:210.8771800994873ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9695 prompt_cache_len:5151 prompt_cache_ratio:0.5313047962867458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 -DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10873270034790039 s -INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100771427154541 s -DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=66789275500910736260065763294017829934, time:1750767600.7201176s req_ids:[8] -DEBUG 06-24 20:20:00 [manager.py:391] -DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:213.00125122070312ms total_cost_time:213.04678916931152ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9696 prompt_cache_len:5151 prompt_cache_ratio:0.53125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 -DEBUG 06-24 20:20:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:00 [manager.py:224] router recive req id 8 cost time 0.10827422142028809 s -INFO 06-24 20:20:00 [manager.py:68] detokenization recv req id 8 cost time 0.10954117774963379 s -DEBUG 06-24 20:20:00 [manager.py:391] Prefill Batch: batch_id=320413334884219757723972715791519580452, time:1750767600.9377885s req_ids:[8] -DEBUG 06-24 20:20:00 [manager.py:391] -DEBUG 06-24 20:20:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 41295.184 tokens/s -DEBUG 06-24 20:20:00 [stats.py:37] Avg prompt tokens throughput: 41286.749 tokens/s -DEBUG 06-24 20:20:00 [stats.py:37] Avg generate tokens throughput: 8.435 tokens/s -ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:00 lightllm_req_id:8 first_token_cost:207.40675926208496ms total_cost_time:207.45158195495605ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9697 prompt_cache_len:5151 prompt_cache_ratio:0.531195215014953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 -DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.10836577415466309 s -INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s -DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=71626440281056613528981303272735811647, time:1750767601.1539295s req_ids:[8] -DEBUG 06-24 20:20:01 [manager.py:391] -ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:387.91608810424805ms total_cost_time:387.95948028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9698 prompt_cache_len:5151 prompt_cache_ratio:0.5311404413281089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 -DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.10857081413269043 s -INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.11046648025512695 s -DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=317786889022225189547959250526869868959, time:1750767601.5469432s req_ids:[8] -DEBUG 06-24 20:20:01 [manager.py:391] -ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:205.38949966430664ms total_cost_time:205.43217658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9699 prompt_cache_len:5151 prompt_cache_ratio:0.5310856789359728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 -DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.10821413993835449 s -INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.11019349098205566 s -DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=180216853736114142907152478111313433814, time:1750767601.762811s req_ids:[8] -DEBUG 06-24 20:20:01 [manager.py:391] -ERROR 06-24 20:20:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:210.9987735748291ms total_cost_time:211.045503616333ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9700 prompt_cache_len:5151 prompt_cache_ratio:0.5310309278350516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 -DEBUG 06-24 20:20:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:01 [manager.py:224] router recive req id 8 cost time 0.1072690486907959 s -INFO 06-24 20:20:01 [manager.py:68] detokenization recv req id 8 cost time 0.10918712615966797 s -DEBUG 06-24 20:20:01 [manager.py:391] Prefill Batch: batch_id=116212148515364075124477238192984311207, time:1750767601.9770606s req_ids:[8] -DEBUG 06-24 20:20:01 [manager.py:391] -ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:01 lightllm_req_id:8 first_token_cost:209.39970016479492ms total_cost_time:209.4428539276123ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9701 prompt_cache_len:5151 prompt_cache_ratio:0.5309761880218534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 -DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.10869288444519043 s -INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.11078715324401855 s -DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=277410707678491262230001769956113740678, time:1750767602.1921508s req_ids:[8] -DEBUG 06-24 20:20:02 [manager.py:391] -ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:211.4722728729248ms total_cost_time:211.5161418914795ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9702 prompt_cache_len:5151 prompt_cache_ratio:0.530921459492888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 -DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.10790181159973145 s -INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.1098783016204834 s -DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=257867785472341609180314496652977881833, time:1750767602.409584s req_ids:[8] -DEBUG 06-24 20:20:02 [manager.py:391] -ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:207.81373977661133ms total_cost_time:207.8573703765869ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9703 prompt_cache_len:5151 prompt_cache_ratio:0.5308667422446666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 -DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.1087956428527832 s -INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s -DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=284368287669520152223162542718497634385, time:1750767602.624502s req_ids:[8] -DEBUG 06-24 20:20:02 [manager.py:391] -ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:209.5036506652832ms total_cost_time:209.54585075378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9704 prompt_cache_len:5151 prompt_cache_ratio:0.5308120362737015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 -DEBUG 06-24 20:20:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:02 [manager.py:224] router recive req id 8 cost time 0.10782718658447266 s -INFO 06-24 20:20:02 [manager.py:68] detokenization recv req id 8 cost time 0.10980463027954102 s -DEBUG 06-24 20:20:02 [manager.py:391] Prefill Batch: batch_id=75812024211430808579145112321927363575, time:1750767602.850213s req_ids:[8] -DEBUG 06-24 20:20:02 [manager.py:391] -ERROR 06-24 20:20:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:222.06878662109375ms total_cost_time:222.11313247680664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9705 prompt_cache_len:5151 prompt_cache_ratio:0.530757341576507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 -DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.30924367904663086 s -INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.31187939643859863 s -DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=268524233455071503148872553174597338444, time:1750767603.2704127s req_ids:[8] -DEBUG 06-24 20:20:03 [manager.py:391] -ERROR 06-24 20:20:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:02 lightllm_req_id:8 first_token_cost:419.6591377258301ms total_cost_time:419.70300674438477ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9706 prompt_cache_len:5151 prompt_cache_ratio:0.5307026581495982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 -DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.10849547386169434 s -INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.11052656173706055 s -DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=87058785314371425299572973846072185675, time:1750767603.4954422s req_ids:[8] -DEBUG 06-24 20:20:03 [manager.py:391] -ERROR 06-24 20:20:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 first_token_cost:213.0870819091797ms total_cost_time:213.13023567199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9707 prompt_cache_len:5151 prompt_cache_ratio:0.5306479859894921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 -DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.10860037803649902 s -INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s -DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=180067148754010167147546538725913019379, time:1750767603.7122824s req_ids:[8] -DEBUG 06-24 20:20:03 [manager.py:391] -ERROR 06-24 20:20:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 first_token_cost:211.29751205444336ms total_cost_time:211.33923530578613ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9708 prompt_cache_len:5151 prompt_cache_ratio:0.5305933250927071 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 -DEBUG 06-24 20:20:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:03 [manager.py:224] router recive req id 8 cost time 0.10843038558959961 s -INFO 06-24 20:20:03 [manager.py:68] detokenization recv req id 8 cost time 0.11058330535888672 s -DEBUG 06-24 20:20:03 [manager.py:391] Prefill Batch: batch_id=320780667185077450600428379004137468125, time:1750767603.9307284s req_ids:[8] -DEBUG 06-24 20:20:03 [manager.py:391] -ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:03 lightllm_req_id:8 first_token_cost:212.73422241210938ms total_cost_time:212.77737617492676ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9709 prompt_cache_len:5151 prompt_cache_ratio:0.5305386754557627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 -DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10729622840881348 s -INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.10938119888305664 s -DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=304119155003335547984900864040435993815, time:1750767604.1480935s req_ids:[8] -DEBUG 06-24 20:20:04 [manager.py:391] -ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:211.70282363891602ms total_cost_time:211.745023727417ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9710 prompt_cache_len:5151 prompt_cache_ratio:0.5304840370751802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 -DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10798478126525879 s -INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.1100165843963623 s -DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=57956615167176663607350909813748036224, time:1750767604.3646386s req_ids:[8] -DEBUG 06-24 20:20:04 [manager.py:391] -ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:206.4492702484131ms total_cost_time:206.49361610412598ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9711 prompt_cache_len:5151 prompt_cache_ratio:0.5304294099474822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 -DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10833120346069336 s -INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.11028528213500977 s -DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=197773222691356057595139139091455200404, time:1750767604.577911s req_ids:[8] -DEBUG 06-24 20:20:04 [manager.py:391] -ERROR 06-24 20:20:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:209.0616226196289ms total_cost_time:209.1047763824463ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9712 prompt_cache_len:5151 prompt_cache_ratio:0.5303747940691927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 -DEBUG 06-24 20:20:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:04 [manager.py:224] router recive req id 8 cost time 0.10735583305358887 s -INFO 06-24 20:20:04 [manager.py:68] detokenization recv req id 8 cost time 0.10958647727966309 s -DEBUG 06-24 20:20:04 [manager.py:391] Prefill Batch: batch_id=133865151070505433415795782479915516220, time:1750767604.795446s req_ids:[8] -DEBUG 06-24 20:20:04 [manager.py:391] -ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:04 lightllm_req_id:8 first_token_cost:404.6018123626709ms total_cost_time:404.6444892883301ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9713 prompt_cache_len:5151 prompt_cache_ratio:0.5303201894368372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 -DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.10837554931640625 s -INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.11059951782226562 s -DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=261384487814129794694582142692399419620, time:1750767605.2019336s req_ids:[8] -DEBUG 06-24 20:20:05 [manager.py:391] -ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:202.57830619812012ms total_cost_time:202.6386260986328ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9714 prompt_cache_len:5151 prompt_cache_ratio:0.5302655960469426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 -DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.11020708084106445 s -INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.11228585243225098 s -DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=142412961228486802287719729961435390319, time:1750767605.4108384s req_ids:[8] -DEBUG 06-24 20:20:05 [manager.py:391] -ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:197.62372970581055ms total_cost_time:197.66592979431152ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9715 prompt_cache_len:5151 prompt_cache_ratio:0.5302110138960371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 -DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.10779213905334473 s -INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.10984182357788086 s -DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=139089787080126564143897494991910015649, time:1750767605.6238022s req_ids:[8] -DEBUG 06-24 20:20:05 [manager.py:391] -ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:217.88859367370605ms total_cost_time:217.93174743652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9716 prompt_cache_len:5151 prompt_cache_ratio:0.5301564429806505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 -DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:05 [manager.py:224] router recive req id 8 cost time 0.10772871971130371 s -INFO 06-24 20:20:05 [manager.py:68] detokenization recv req id 8 cost time 0.10964083671569824 s -DEBUG 06-24 20:20:05 [manager.py:391] Prefill Batch: batch_id=20662626919278309107181029170062485971, time:1750767605.8387673s req_ids:[8] -DEBUG 06-24 20:20:05 [manager.py:391] -ERROR 06-24 20:20:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:210.5274200439453ms total_cost_time:210.5717658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9717 prompt_cache_len:5151 prompt_cache_ratio:0.530101883297314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 -DEBUG 06-24 20:20:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.10916566848754883 s -INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.11130118370056152 s -DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=275328782566432825580440050425848449424, time:1750767606.0566213s req_ids:[8] -DEBUG 06-24 20:20:06 [manager.py:391] -ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:05 lightllm_req_id:8 first_token_cost:207.5340747833252ms total_cost_time:207.55457878112793ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:9718 prompt_cache_len:5151 prompt_cache_ratio:0.5300473348425602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 -DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:06 [batch.py:51] router release req id 8 -INFO 06-24 20:20:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.10758185386657715 s -INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.10864138603210449 s -DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=311670395322849918340537004391606645662, time:1750767606.271722s req_ids:[8] -DEBUG 06-24 20:20:06 [manager.py:391] -ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:215.31391143798828ms total_cost_time:215.35921096801758ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9719 prompt_cache_len:5151 prompt_cache_ratio:0.5299927976129232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 -DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.11006689071655273 s -INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.11207365989685059 s -DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=227032008099157770842561474505562710616, time:1750767606.4877062s req_ids:[8] -DEBUG 06-24 20:20:06 [manager.py:391] -ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:208.83965492248535ms total_cost_time:208.88304710388184ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9720 prompt_cache_len:5151 prompt_cache_ratio:0.5299382716049382 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 -DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.10750770568847656 s -INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.1094820499420166 s -DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=167659032782312380567456520451444155887, time:1750767606.7103145s req_ids:[8] -DEBUG 06-24 20:20:06 [manager.py:391] -ERROR 06-24 20:20:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:217.96393394470215ms total_cost_time:218.00780296325684ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9721 prompt_cache_len:5151 prompt_cache_ratio:0.5298837568151424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 -DEBUG 06-24 20:20:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:06 [manager.py:224] router recive req id 8 cost time 0.1091160774230957 s -INFO 06-24 20:20:06 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s -DEBUG 06-24 20:20:06 [manager.py:391] Prefill Batch: batch_id=231589246666996406668481052321002255627, time:1750767606.9278843s req_ids:[8] -DEBUG 06-24 20:20:06 [manager.py:391] -ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:06 lightllm_req_id:8 first_token_cost:379.05001640319824ms total_cost_time:379.09579277038574ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9722 prompt_cache_len:5151 prompt_cache_ratio:0.529829253240074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 -DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10877776145935059 s -INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.11014008522033691 s -DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=2225338221513613032582196025480608493, time:1750767607.3097916s req_ids:[8] -DEBUG 06-24 20:20:07 [manager.py:391] -ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:201.27224922180176ms total_cost_time:201.31611824035645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9723 prompt_cache_len:5151 prompt_cache_ratio:0.5297747608762727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 -DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10768795013427734 s -INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.10944771766662598 s -DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=98023260160296989839209806430023744184, time:1750767607.5199409s req_ids:[8] -DEBUG 06-24 20:20:07 [manager.py:391] -ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:206.62593841552734ms total_cost_time:206.67243003845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9724 prompt_cache_len:5151 prompt_cache_ratio:0.5297202797202797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 -DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10803031921386719 s -INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.10987663269042969 s -DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=222003945428386060345322280753844824522, time:1750767607.7354288s req_ids:[8] -DEBUG 06-24 20:20:07 [manager.py:391] -ERROR 06-24 20:20:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:206.46286010742188ms total_cost_time:206.50911331176758ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9725 prompt_cache_len:5151 prompt_cache_ratio:0.5296658097686375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 -DEBUG 06-24 20:20:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:07 [manager.py:224] router recive req id 8 cost time 0.10958075523376465 s -INFO 06-24 20:20:07 [manager.py:68] detokenization recv req id 8 cost time 0.11166143417358398 s -DEBUG 06-24 20:20:07 [manager.py:391] Prefill Batch: batch_id=7506650279647652430122012581182043003, time:1750767607.9453754s req_ids:[8] -DEBUG 06-24 20:20:07 [manager.py:391] -ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:07 lightllm_req_id:8 first_token_cost:209.57708358764648ms total_cost_time:209.62071418762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9726 prompt_cache_len:5151 prompt_cache_ratio:0.5296113510178901 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 -DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10826992988586426 s -INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.11015701293945312 s -DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=320924440453652897726031525962464717816, time:1750767608.162904s req_ids:[8] -DEBUG 06-24 20:20:08 [manager.py:391] -DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:211.73954010009766ms total_cost_time:211.78269386291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9727 prompt_cache_len:5151 prompt_cache_ratio:0.5295569034645832 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 -DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s -INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.11005640029907227 s -DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=128795407385784873548843615826024573754, time:1750767608.3789635s req_ids:[8] -DEBUG 06-24 20:20:08 [manager.py:391] -ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:211.5039825439453ms total_cost_time:211.5485668182373ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9728 prompt_cache_len:5151 prompt_cache_ratio:0.5295024671052632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 -DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10843563079833984 s -INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.1104273796081543 s -DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=71782776674557046093692865703428277706, time:1750767608.5975492s req_ids:[8] -DEBUG 06-24 20:20:08 [manager.py:391] -ERROR 06-24 20:20:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:212.25333213806152ms total_cost_time:212.2976779937744ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9729 prompt_cache_len:5151 prompt_cache_ratio:0.5294480419364785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 -DEBUG 06-24 20:20:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:08 [manager.py:224] router recive req id 8 cost time 0.10800671577453613 s -INFO 06-24 20:20:08 [manager.py:68] detokenization recv req id 8 cost time 0.10994529724121094 s -DEBUG 06-24 20:20:08 [manager.py:391] Prefill Batch: batch_id=73004945621707262405562216788078786334, time:1750767608.816916s req_ids:[8] -DEBUG 06-24 20:20:08 [manager.py:391] -ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:08 lightllm_req_id:8 first_token_cost:368.8981533050537ms total_cost_time:368.9446449279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9730 prompt_cache_len:5151 prompt_cache_ratio:0.5293936279547791 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 -DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.10924386978149414 s -INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.11092066764831543 s -DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=199719459417463798778382736204090210748, time:1750767609.1886976s req_ids:[8] -DEBUG 06-24 20:20:09 [manager.py:391] -ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:209.80286598205566ms total_cost_time:209.86342430114746ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:9731 prompt_cache_len:5151 prompt_cache_ratio:0.5293392251567156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 -DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.10919904708862305 s -INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.11160755157470703 s -DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=184553118692978129706589994601828174068, time:1750767609.4075675s req_ids:[8] -DEBUG 06-24 20:20:09 [manager.py:391] -ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:209.59734916687012ms total_cost_time:209.6405029296875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9732 prompt_cache_len:5151 prompt_cache_ratio:0.5292848335388409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 -DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.10779762268066406 s -INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.10973834991455078 s -DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=300798638160309421166442466786889989121, time:1750767609.62327s req_ids:[8] -DEBUG 06-24 20:20:09 [manager.py:391] -ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:210.12496948242188ms total_cost_time:210.16764640808105ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9733 prompt_cache_len:5151 prompt_cache_ratio:0.5292304530977088 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 -DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:09 [manager.py:224] router recive req id 8 cost time 0.1069486141204834 s -INFO 06-24 20:20:09 [manager.py:68] detokenization recv req id 8 cost time 0.10860681533813477 s -DEBUG 06-24 20:20:09 [manager.py:391] Prefill Batch: batch_id=231850447464200916526537811520732908171, time:1750767609.8438127s req_ids:[8] -DEBUG 06-24 20:20:09 [manager.py:391] -ERROR 06-24 20:20:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:213.02342414855957ms total_cost_time:213.06681632995605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9734 prompt_cache_len:5151 prompt_cache_ratio:0.5291760838298747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 -DEBUG 06-24 20:20:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.10913634300231934 s -INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.11109685897827148 s -DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=289202097216434212225687696875073447604, time:1750767610.0580661s req_ids:[8] -DEBUG 06-24 20:20:10 [manager.py:391] -ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:09 lightllm_req_id:8 first_token_cost:210.85596084594727ms total_cost_time:210.89792251586914ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9735 prompt_cache_len:5151 prompt_cache_ratio:0.5291217257318952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 -DEBUG 06-24 20:20:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.10852670669555664 s -INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.1098325252532959 s -DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=168811509918177182176994056287941076826, time:1750767610.2750983s req_ids:[8] -DEBUG 06-24 20:20:10 [manager.py:391] -ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:209.62786674499512ms total_cost_time:209.6724510192871ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9736 prompt_cache_len:5151 prompt_cache_ratio:0.5290673788003287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 -DEBUG 06-24 20:20:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.1086888313293457 s -INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s -DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=145214871140402672326201844330547131825, time:1750767610.4909701s req_ids:[8] -DEBUG 06-24 20:20:10 [manager.py:391] -ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:212.04280853271484ms total_cost_time:212.08906173706055ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9737 prompt_cache_len:5151 prompt_cache_ratio:0.5290130430317346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 -DEBUG 06-24 20:20:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:10 [manager.py:224] router recive req id 8 cost time 0.10897374153137207 s -INFO 06-24 20:20:10 [manager.py:68] detokenization recv req id 8 cost time 0.11086082458496094 s -DEBUG 06-24 20:20:10 [manager.py:391] Prefill Batch: batch_id=66533355568975188491230653003823496247, time:1750767610.715345s req_ids:[8] -DEBUG 06-24 20:20:10 [manager.py:391] -ERROR 06-24 20:20:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:20:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 39689.050 tokens/s -DEBUG 06-24 20:20:10 [stats.py:37] Avg prompt tokens throughput: 39680.784 tokens/s -DEBUG 06-24 20:20:10 [stats.py:37] Avg generate tokens throughput: 8.266 tokens/s -INFO 06-24 20:20:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:391.3445472717285ms total_cost_time:391.3888931274414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9738 prompt_cache_len:5151 prompt_cache_ratio:0.5289587184226741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 -DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10860300064086914 s -INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.11061906814575195 s -DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=278175583104001119323103361152710516508, time:1750767611.1027768s req_ids:[8] -DEBUG 06-24 20:20:11 [manager.py:391] -ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:10 lightllm_req_id:8 first_token_cost:205.24024963378906ms total_cost_time:205.28483390808105ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9739 prompt_cache_len:5151 prompt_cache_ratio:0.5289044049697094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 -DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.1078789234161377 s -INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.10990262031555176 s -DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=121969728957628981588249574813757099980, time:1750767611.3190236s req_ids:[8] -DEBUG 06-24 20:20:11 [manager.py:391] -ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:208.62603187561035ms total_cost_time:208.67109298706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9740 prompt_cache_len:5151 prompt_cache_ratio:0.5288501026694045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 -DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10906982421875 s -INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.11112666130065918 s -DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=202613982771872757106878612523922298752, time:1750767611.531273s req_ids:[8] -DEBUG 06-24 20:20:11 [manager.py:391] -ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:207.64994621276855ms total_cost_time:207.69500732421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9741 prompt_cache_len:5151 prompt_cache_ratio:0.5287958115183246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 -DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10886812210083008 s -INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.1108698844909668 s -DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=200415677098959675704101023206881190065, time:1750767611.7458744s req_ids:[8] -DEBUG 06-24 20:20:11 [manager.py:391] -ERROR 06-24 20:20:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:207.60798454284668ms total_cost_time:207.65209197998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9742 prompt_cache_len:5151 prompt_cache_ratio:0.5287415315130364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 -DEBUG 06-24 20:20:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:11 [manager.py:224] router recive req id 8 cost time 0.10907864570617676 s -INFO 06-24 20:20:11 [manager.py:68] detokenization recv req id 8 cost time 0.11102557182312012 s -DEBUG 06-24 20:20:11 [manager.py:391] Prefill Batch: batch_id=133174399326987779142144286260679923670, time:1750767611.9606745s req_ids:[8] -DEBUG 06-24 20:20:11 [manager.py:391] -ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:11 lightllm_req_id:8 first_token_cost:204.33878898620605ms total_cost_time:204.38265800476074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9743 prompt_cache_len:5151 prompt_cache_ratio:0.5286872626501078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 -DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.10783267021179199 s -INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.10978102684020996 s -DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=37728146127731746141090949004683340721, time:1750767612.1694937s req_ids:[8] -DEBUG 06-24 20:20:12 [manager.py:391] -ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:208.52112770080566ms total_cost_time:208.56499671936035ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9744 prompt_cache_len:5151 prompt_cache_ratio:0.5286330049261084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 -DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.10953593254089355 s -INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.11155128479003906 s -DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=320185153706412132023007155815806705641, time:1750767612.384032s req_ids:[8] -DEBUG 06-24 20:20:12 [manager.py:391] -ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:206.7255973815918ms total_cost_time:206.7697048187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9745 prompt_cache_len:5151 prompt_cache_ratio:0.528578758337609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 -DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.108154296875 s -INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.11012005805969238 s -DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=237416041686495269323906896404812372160, time:1750767612.6001775s req_ids:[8] -DEBUG 06-24 20:20:12 [manager.py:391] -ERROR 06-24 20:20:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:379.03594970703125ms total_cost_time:379.08005714416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9746 prompt_cache_len:5151 prompt_cache_ratio:0.528524522881182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 -DEBUG 06-24 20:20:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:12 [manager.py:224] router recive req id 8 cost time 0.1080007553100586 s -INFO 06-24 20:20:12 [manager.py:68] detokenization recv req id 8 cost time 0.10975980758666992 s -DEBUG 06-24 20:20:12 [manager.py:391] Prefill Batch: batch_id=154080746999841864462739118397049498419, time:1750767612.9819417s req_ids:[8] -DEBUG 06-24 20:20:12 [manager.py:391] -ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:12 lightllm_req_id:8 first_token_cost:200.23465156555176ms total_cost_time:200.28042793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9747 prompt_cache_len:5151 prompt_cache_ratio:0.528470298553401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 -DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10796737670898438 s -INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.10976910591125488 s -DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=199767120005644601791785465444193265663, time:1750767613.1892571s req_ids:[8] -DEBUG 06-24 20:20:13 [manager.py:391] -ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:213.3004665374756ms total_cost_time:213.34481239318848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9748 prompt_cache_len:5151 prompt_cache_ratio:0.5284160853508412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 -DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10779476165771484 s -INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.1098330020904541 s -DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=334036386015699906219525108573329421028, time:1750767613.4173067s req_ids:[8] -DEBUG 06-24 20:20:13 [manager.py:391] -ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:215.61431884765625ms total_cost_time:215.66081047058105ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9749 prompt_cache_len:5151 prompt_cache_ratio:0.5283618832700789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 -DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10934042930603027 s -INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.11146330833435059 s -DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=288433600148769554101834860726783745077, time:1750767613.6319258s req_ids:[8] -DEBUG 06-24 20:20:13 [manager.py:391] -ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:209.03301239013672ms total_cost_time:209.0756893157959ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9750 prompt_cache_len:5151 prompt_cache_ratio:0.5283076923076923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 -DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:13 [manager.py:224] router recive req id 8 cost time 0.10854721069335938 s -INFO 06-24 20:20:13 [manager.py:68] detokenization recv req id 8 cost time 0.11056923866271973 s -DEBUG 06-24 20:20:13 [manager.py:391] Prefill Batch: batch_id=145421800037629520217053451588917517742, time:1750767613.8485296s req_ids:[8] -DEBUG 06-24 20:20:13 [manager.py:391] -ERROR 06-24 20:20:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:211.77244186401367ms total_cost_time:211.81702613830566ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9751 prompt_cache_len:5151 prompt_cache_ratio:0.5282535124602605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 -DEBUG 06-24 20:20:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10895276069641113 s -INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.11111855506896973 s -DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=97827461153038952754172966853357628156, time:1750767614.06513s req_ids:[8] -DEBUG 06-24 20:20:14 [manager.py:391] -DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:13 lightllm_req_id:8 first_token_cost:209.90324020385742ms total_cost_time:209.9473476409912ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9752 prompt_cache_len:5151 prompt_cache_ratio:0.5281993437243643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 -DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10807275772094727 s -INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.11022043228149414 s -DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=303765686469944090660755388017390322347, time:1750767614.2807088s req_ids:[8] -DEBUG 06-24 20:20:14 [manager.py:391] -ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:206.17341995239258ms total_cost_time:206.21728897094727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9753 prompt_cache_len:5151 prompt_cache_ratio:0.5281451860965857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 -DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10798501968383789 s -INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.10979843139648438 s -DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=202412384200120253298412522299552946512, time:1750767614.4897735s req_ids:[8] -DEBUG 06-24 20:20:14 [manager.py:391] -ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:166.57066345214844ms total_cost_time:166.611909866333ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9754 prompt_cache_len:5151 prompt_cache_ratio:0.5280910395735083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 -DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:14 [manager.py:224] router recive req id 8 cost time 0.10865497589111328 s -INFO 06-24 20:20:14 [manager.py:68] detokenization recv req id 8 cost time 0.11060643196105957 s -DEBUG 06-24 20:20:14 [manager.py:391] Prefill Batch: batch_id=327809107792174381303047551066176707403, time:1750767614.6610632s req_ids:[8] -DEBUG 06-24 20:20:14 [manager.py:391] -ERROR 06-24 20:20:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:360.2261543273926ms total_cost_time:360.27002334594727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9755 prompt_cache_len:5151 prompt_cache_ratio:0.5280369041517171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 -DEBUG 06-24 20:20:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10855650901794434 s -INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.11051559448242188 s -DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=117519926021217637261315152730966681155, time:1750767615.0275495s req_ids:[8] -DEBUG 06-24 20:20:15 [manager.py:391] -ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:14 lightllm_req_id:8 first_token_cost:213.37127685546875ms total_cost_time:213.41514587402344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9756 prompt_cache_len:5151 prompt_cache_ratio:0.5279827798277983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 -DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10777068138122559 s -INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.10980653762817383 s -DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=95861185672663450850397823444368368132, time:1750767615.250367s req_ids:[8] -DEBUG 06-24 20:20:15 [manager.py:391] -ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:205.53994178771973ms total_cost_time:205.58404922485352ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9757 prompt_cache_len:5151 prompt_cache_ratio:0.5279286665983397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 -DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10919451713562012 s -INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.11130881309509277 s -DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=145356293704953526278090116327852215972, time:1750767615.460488s req_ids:[8] -DEBUG 06-24 20:20:15 [manager.py:391] -ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:205.6727409362793ms total_cost_time:205.7168483734131ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9758 prompt_cache_len:5151 prompt_cache_ratio:0.5278745644599303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 -DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.10807156562805176 s -INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098935604095459 s -DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=63229927499695075163877259537715446299, time:1750767615.6711202s req_ids:[8] -DEBUG 06-24 20:20:15 [manager.py:391] -ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:206.38346672058105ms total_cost_time:206.42852783203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9759 prompt_cache_len:5151 prompt_cache_ratio:0.5278204734091608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 -DEBUG 06-24 20:20:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:15 [manager.py:224] router recive req id 8 cost time 0.1088099479675293 s -INFO 06-24 20:20:15 [manager.py:68] detokenization recv req id 8 cost time 0.11082744598388672 s -DEBUG 06-24 20:20:15 [manager.py:391] Prefill Batch: batch_id=215663421535686870401861607311448518734, time:1750767615.8831227s req_ids:[8] -DEBUG 06-24 20:20:15 [manager.py:391] -ERROR 06-24 20:20:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:207.69333839416504ms total_cost_time:207.73816108703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9760 prompt_cache_len:5151 prompt_cache_ratio:0.527766393442623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 -DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.10880136489868164 s -INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.11086702346801758 s -DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=131907548108518511329528664180910140578, time:1750767616.0979292s req_ids:[8] -DEBUG 06-24 20:20:16 [manager.py:391] -ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:15 lightllm_req_id:8 first_token_cost:207.6129913330078ms total_cost_time:207.6585292816162ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9761 prompt_cache_len:5151 prompt_cache_ratio:0.5277123245569102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 -DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.1086893081665039 s -INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.11069178581237793 s -DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=302371100660683400851857243964768711469, time:1750767616.3113856s req_ids:[8] -DEBUG 06-24 20:20:16 [manager.py:391] -ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 first_token_cost:210.65568923950195ms total_cost_time:210.69979667663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9762 prompt_cache_len:5151 prompt_cache_ratio:0.527658266748617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 -DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.10814595222473145 s -INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.10964632034301758 s -DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=33560323923669897701082508714031620270, time:1750767616.527047s req_ids:[8] -DEBUG 06-24 20:20:16 [manager.py:391] -ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 first_token_cost:375.7026195526123ms total_cost_time:375.748872756958ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9763 prompt_cache_len:5151 prompt_cache_ratio:0.5276042200143398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 -DEBUG 06-24 20:20:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:16 [manager.py:224] router recive req id 8 cost time 0.10852193832397461 s -INFO 06-24 20:20:16 [manager.py:68] detokenization recv req id 8 cost time 0.11051058769226074 s -DEBUG 06-24 20:20:16 [manager.py:391] Prefill Batch: batch_id=81425868376065752073435208167205443781, time:1750767616.9066594s req_ids:[8] -DEBUG 06-24 20:20:16 [manager.py:391] -INFO 06-24 20:20:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:20:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:16 lightllm_req_id:8 first_token_cost:207.72123336791992ms total_cost_time:207.76724815368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9764 prompt_cache_len:5151 prompt_cache_ratio:0.5275501843506759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 -DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10856986045837402 s -INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.11054301261901855 s -DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=207371732129449384608120600511036528264, time:1750767617.1217468s req_ids:[8] -DEBUG 06-24 20:20:17 [manager.py:391] -ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:207.09753036499023ms total_cost_time:207.139253616333ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9765 prompt_cache_len:5151 prompt_cache_ratio:0.5274961597542243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 -DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10900259017944336 s -INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.1110084056854248 s -DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=289128333664217103081796475139398685184, time:1750767617.335392s req_ids:[8] -DEBUG 06-24 20:20:17 [manager.py:391] -ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:210.7090950012207ms total_cost_time:210.75153350830078ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9766 prompt_cache_len:5151 prompt_cache_ratio:0.5274421462215851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 -DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10854935646057129 s -INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.11040258407592773 s -DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=75049211106508628423885749013290240635, time:1750767617.551092s req_ids:[8] -DEBUG 06-24 20:20:17 [manager.py:391] -ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:206.88652992248535ms total_cost_time:206.92968368530273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9767 prompt_cache_len:5151 prompt_cache_ratio:0.52738814374936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 -DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.10755372047424316 s -INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.10950636863708496 s -DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=246156348777491631219598781280866151910, time:1750767617.7630782s req_ids:[8] -DEBUG 06-24 20:20:17 [manager.py:391] -ERROR 06-24 20:20:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:211.32707595825195ms total_cost_time:211.37213706970215ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9768 prompt_cache_len:5151 prompt_cache_ratio:0.5273341523341524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 -DEBUG 06-24 20:20:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:17 [manager.py:224] router recive req id 8 cost time 0.1085352897644043 s -INFO 06-24 20:20:17 [manager.py:68] detokenization recv req id 8 cost time 0.11045718193054199 s -DEBUG 06-24 20:20:17 [manager.py:391] Prefill Batch: batch_id=305521883373357154193388552541911291996, time:1750767617.9792042s req_ids:[8] -DEBUG 06-24 20:20:17 [manager.py:391] -ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:17 lightllm_req_id:8 first_token_cost:205.40976524353027ms total_cost_time:205.45482635498047ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9769 prompt_cache_len:5151 prompt_cache_ratio:0.5272801719725663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 -DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s -INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.10985708236694336 s -DEBUG 06-24 20:20:18 [manager.py:391] Prefill Batch: batch_id=207562563613590190357008297471056069914, time:1750767618.191489s req_ids:[8] -DEBUG 06-24 20:20:18 [manager.py:391] -ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:169.41452026367188ms total_cost_time:169.45695877075195ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9770 prompt_cache_len:5151 prompt_cache_ratio:0.5272262026612078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 -DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.31064558029174805 s -INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.312760591506958 s -DEBUG 06-24 20:20:18 [manager.py:391] Prefill Batch: batch_id=125098793639138365764655329189827525280, time:1750767618.5791636s req_ids:[8] -DEBUG 06-24 20:20:18 [manager.py:391] -ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:422.19018936157227ms total_cost_time:422.23453521728516ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9771 prompt_cache_len:5151 prompt_cache_ratio:0.5271722443966841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 -DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.10791611671447754 s -INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.1103813648223877 s -DEBUG 06-24 20:20:18 [manager.py:391] Prefill Batch: batch_id=97105121925774154059657236269240374964, time:1750767618.794579s req_ids:[8] -DEBUG 06-24 20:20:18 [manager.py:391] -ERROR 06-24 20:20:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:206.85791969299316ms total_cost_time:206.91680908203125ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9772 prompt_cache_len:5151 prompt_cache_ratio:0.5271182971756038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 -DEBUG 06-24 20:20:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:18 [manager.py:224] router recive req id 8 cost time 0.10720038414001465 s -INFO 06-24 20:20:18 [manager.py:68] detokenization recv req id 8 cost time 0.10927534103393555 s -DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=107320750101515810262680710997068668178, time:1750767619.0118597s req_ids:[8] -DEBUG 06-24 20:20:19 [manager.py:391] -ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:18 lightllm_req_id:8 first_token_cost:212.55135536193848ms total_cost_time:212.59665489196777ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9773 prompt_cache_len:5151 prompt_cache_ratio:0.5270643609945769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 -DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10910272598266602 s -INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.11104989051818848 s -DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=70653187323093500766273813701306117462, time:1750767619.22569s req_ids:[8] -DEBUG 06-24 20:20:19 [manager.py:391] -ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:208.84418487548828ms total_cost_time:208.88805389404297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9774 prompt_cache_len:5151 prompt_cache_ratio:0.5270104358502149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 -DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10796141624450684 s -INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.10993146896362305 s -DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=32355482779220167620655311851573586916, time:1750767619.4394698s req_ids:[8] -DEBUG 06-24 20:20:19 [manager.py:391] -ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:213.37580680847168ms total_cost_time:213.42110633850098ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9775 prompt_cache_len:5151 prompt_cache_ratio:0.5269565217391304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 -DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10910892486572266 s -INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.11127471923828125 s -DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=17853183955154101609264010542592806431, time:1750767619.6577818s req_ids:[8] -DEBUG 06-24 20:20:19 [manager.py:391] -ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:206.62450790405273ms total_cost_time:206.66742324829102ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9776 prompt_cache_len:5151 prompt_cache_ratio:0.5269026186579379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 -DEBUG 06-24 20:20:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:19 [batch.py:51] router release req id 8 -DEBUG 06-24 20:20:19 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:19 [manager.py:283] -DEBUG 06-24 20:20:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:19 [manager.py:284] -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:19 [manager.py:224] router recive req id 8 cost time 0.10840702056884766 s -INFO 06-24 20:20:19 [manager.py:68] detokenization recv req id 8 cost time 0.11057090759277344 s -DEBUG 06-24 20:20:19 [manager.py:391] Prefill Batch: batch_id=273764909050809210803730026898423823178, time:1750767619.8759189s req_ids:[8] -DEBUG 06-24 20:20:19 [manager.py:391] -ERROR 06-24 20:20:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:213.5486602783203ms total_cost_time:213.5915756225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9777 prompt_cache_len:5151 prompt_cache_ratio:0.5268487266032525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 -DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.10806488990783691 s -INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.1100611686706543 s -DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=56420925025816450662159351236814640231, time:1750767620.0921218s req_ids:[8] -DEBUG 06-24 20:20:20 [manager.py:391] -ERROR 06-24 20:20:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:19 lightllm_req_id:8 first_token_cost:422.2087860107422ms total_cost_time:422.2533702850342ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9778 prompt_cache_len:5151 prompt_cache_ratio:0.5267948455716915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 -DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.10817432403564453 s -INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s -DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=136265769951535728799517873709343292744, time:1750767620.5133588s req_ids:[8] -DEBUG 06-24 20:20:20 [manager.py:391] -ERROR 06-24 20:20:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 first_token_cost:200.76370239257812ms total_cost_time:200.80900192260742ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9779 prompt_cache_len:5151 prompt_cache_ratio:0.5267409755598732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 -DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s -INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s -DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=204351107882877299643086804897031632554, time:1750767620.7296488s req_ids:[8] -DEBUG 06-24 20:20:20 [manager.py:391] -ERROR 06-24 20:20:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 first_token_cost:209.22470092773438ms total_cost_time:209.27000045776367ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9780 prompt_cache_len:5151 prompt_cache_ratio:0.5266871165644171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 -DEBUG 06-24 20:20:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:20 [manager.py:224] router recive req id 8 cost time 0.10907840728759766 s -INFO 06-24 20:20:20 [manager.py:68] detokenization recv req id 8 cost time 0.1112210750579834 s -DEBUG 06-24 20:20:20 [manager.py:391] Prefill Batch: batch_id=125110857008695838466598375136438888116, time:1750767620.9396293s req_ids:[8] -DEBUG 06-24 20:20:20 [manager.py:391] -ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:20:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 41768.579 tokens/s -DEBUG 06-24 20:20:21 [stats.py:37] Avg prompt tokens throughput: 41760.021 tokens/s -DEBUG 06-24 20:20:21 [stats.py:37] Avg generate tokens throughput: 8.557 tokens/s -INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:20 lightllm_req_id:8 first_token_cost:209.86390113830566ms total_cost_time:209.90872383117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9781 prompt_cache_len:5151 prompt_cache_ratio:0.5266332685819446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 -DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.10857534408569336 s -INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11080789566040039 s -DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=157585264792120494257916827978896740898, time:1750767621.154516s req_ids:[8] -DEBUG 06-24 20:20:21 [manager.py:391] -ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:207.93461799621582ms total_cost_time:207.9787254333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9782 prompt_cache_len:5151 prompt_cache_ratio:0.5265794316090779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 -DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.1088714599609375 s -INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11101698875427246 s -DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=17677222310119034072956419658191149520, time:1750767621.3689423s req_ids:[8] -DEBUG 06-24 20:20:21 [manager.py:391] -DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:208.0848217010498ms total_cost_time:208.1305980682373ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9783 prompt_cache_len:5151 prompt_cache_ratio:0.526525605642441 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 -DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.10829401016235352 s -INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s -DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=321994167095462075870398911145069528169, time:1750767621.5824456s req_ids:[8] -DEBUG 06-24 20:20:21 [manager.py:391] -ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:209.20157432556152ms total_cost_time:209.244966506958ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9784 prompt_cache_len:5151 prompt_cache_ratio:0.526471790678659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 -DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:21 [manager.py:224] router recive req id 8 cost time 0.10802173614501953 s -INFO 06-24 20:20:21 [manager.py:68] detokenization recv req id 8 cost time 0.11006283760070801 s -DEBUG 06-24 20:20:21 [manager.py:391] Prefill Batch: batch_id=116340645852147583221677812670388553045, time:1750767621.8004498s req_ids:[8] -DEBUG 06-24 20:20:21 [manager.py:391] -ERROR 06-24 20:20:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:207.3664665222168ms total_cost_time:207.4110507965088ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9785 prompt_cache_len:5151 prompt_cache_ratio:0.5264179867143587 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 -DEBUG 06-24 20:20:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10824036598205566 s -INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11017704010009766 s -DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=208883472621056742786344414080557084607, time:1750767622.011575s req_ids:[8] -DEBUG 06-24 20:20:22 [manager.py:391] -ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:21 lightllm_req_id:8 first_token_cost:212.92519569396973ms total_cost_time:212.9685878753662ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9786 prompt_cache_len:5151 prompt_cache_ratio:0.5263641937461679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 -DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10841703414916992 s -INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11057090759277344 s -DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=103049623068262838665217529845026182488, time:1750767622.2284205s req_ids:[8] -DEBUG 06-24 20:20:22 [manager.py:391] -ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:371.8533515930176ms total_cost_time:371.89745903015137ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9787 prompt_cache_len:5151 prompt_cache_ratio:0.5263104117707162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 -DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10903811454772949 s -INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11100912094116211 s -DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=86248591457833197710644900335035567232, time:1750767622.604795s req_ids:[8] -DEBUG 06-24 20:20:22 [manager.py:391] -ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:213.3958339691162ms total_cost_time:213.4389877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9788 prompt_cache_len:5151 prompt_cache_ratio:0.5262566407846343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 -DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:22 [manager.py:224] router recive req id 8 cost time 0.10881710052490234 s -INFO 06-24 20:20:22 [manager.py:68] detokenization recv req id 8 cost time 0.11091732978820801 s -DEBUG 06-24 20:20:22 [manager.py:391] Prefill Batch: batch_id=76905037032621645064775549485879512866, time:1750767622.824462s req_ids:[8] -DEBUG 06-24 20:20:22 [manager.py:391] -ERROR 06-24 20:20:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:206.085205078125ms total_cost_time:206.1288356781006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9789 prompt_cache_len:5151 prompt_cache_ratio:0.5262028807845541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 -DEBUG 06-24 20:20:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10819768905639648 s -INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.11032652854919434 s -DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=150434956329889859531696042569191554330, time:1750767623.0371916s req_ids:[8] -DEBUG 06-24 20:20:23 [manager.py:391] -ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:22 lightllm_req_id:8 first_token_cost:210.74604988098145ms total_cost_time:210.79277992248535ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9790 prompt_cache_len:5151 prompt_cache_ratio:0.5261491317671093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 -DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:23 [batch.py:51] router release req id 8 -INFO 06-24 20:20:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:20:23 [statics_utils.py:24] mean first cost: 228.55497230674405 ms -INFO 06-24 20:20:23 [statics_utils.py:24] mean per token cost: 0.07034210481983891 ms -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10877752304077148 s -INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.11068272590637207 s -DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=326957584174311689796673248110270754317, time:1750767623.2515244s req_ids:[8] -DEBUG 06-24 20:20:23 [manager.py:391] -ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:207.98492431640625ms total_cost_time:208.02807807922363ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9791 prompt_cache_len:5151 prompt_cache_ratio:0.5260953937289348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 -DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10785365104675293 s -INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.10973358154296875 s -DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=174811010164042578254361644033611800985, time:1750767623.4669185s req_ids:[8] -DEBUG 06-24 20:20:23 [manager.py:391] -ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:206.30288124084473ms total_cost_time:206.34770393371582ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9792 prompt_cache_len:5151 prompt_cache_ratio:0.5260416666666666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 -DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10794401168823242 s -INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.10971474647521973 s -DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=318824003125236192598000679272060087072, time:1750767623.6812394s req_ids:[8] -DEBUG 06-24 20:20:23 [manager.py:391] -ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:212.48364448547363ms total_cost_time:212.52703666687012ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9793 prompt_cache_len:5151 prompt_cache_ratio:0.5259879505769427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 -DEBUG 06-24 20:20:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:23 [manager.py:224] router recive req id 8 cost time 0.10878515243530273 s -INFO 06-24 20:20:23 [manager.py:68] detokenization recv req id 8 cost time 0.1108546257019043 s -DEBUG 06-24 20:20:23 [manager.py:391] Prefill Batch: batch_id=48151011333779218242864024445519024527, time:1750767623.8955107s req_ids:[8] -DEBUG 06-24 20:20:23 [manager.py:391] -ERROR 06-24 20:20:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:208.18185806274414ms total_cost_time:208.22525024414062ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9794 prompt_cache_len:5151 prompt_cache_ratio:0.5259342454564019 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 -DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10798263549804688 s -INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.11023354530334473 s -DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=147640214283413526253516770315794689703, time:1750767624.1231794s req_ids:[8] -DEBUG 06-24 20:20:24 [manager.py:391] -ERROR 06-24 20:20:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:23 lightllm_req_id:8 first_token_cost:390.77186584472656ms total_cost_time:390.81764221191406ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9795 prompt_cache_len:5151 prompt_cache_ratio:0.5258805513016845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 -DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10931658744812012 s -INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s -DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=132185907965054953535872976648421573318, time:1750767624.505492s req_ids:[8] -DEBUG 06-24 20:20:24 [manager.py:391] -ERROR 06-24 20:20:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 first_token_cost:207.5040340423584ms total_cost_time:207.54766464233398ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9796 prompt_cache_len:5151 prompt_cache_ratio:0.5258268681094325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 -DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10922741889953613 s -INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.11113548278808594 s -DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=93988963094899952369079101234938533334, time:1750767624.720883s req_ids:[8] -DEBUG 06-24 20:20:24 [manager.py:391] -ERROR 06-24 20:20:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 first_token_cost:210.81089973449707ms total_cost_time:210.85572242736816ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9797 prompt_cache_len:5151 prompt_cache_ratio:0.5257731958762887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 -DEBUG 06-24 20:20:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:24 [manager.py:224] router recive req id 8 cost time 0.10846304893493652 s -INFO 06-24 20:20:24 [manager.py:68] detokenization recv req id 8 cost time 0.11062431335449219 s -DEBUG 06-24 20:20:24 [manager.py:391] Prefill Batch: batch_id=93796764862019116683755273855541655499, time:1750767624.9371994s req_ids:[8] -DEBUG 06-24 20:20:24 [manager.py:391] -ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:24 lightllm_req_id:8 first_token_cost:208.13298225402832ms total_cost_time:208.1770896911621ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9798 prompt_cache_len:5151 prompt_cache_ratio:0.5257195345988978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 -DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10859107971191406 s -INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.1105349063873291 s -DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=47472019500088227784608418801057187546, time:1750767625.1513755s req_ids:[8] -DEBUG 06-24 20:20:25 [manager.py:391] -ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:208.73618125915527ms total_cost_time:208.78076553344727ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9799 prompt_cache_len:5151 prompt_cache_ratio:0.5256658842739055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 -DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10790371894836426 s -INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.10987162590026855 s -DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=111426244481711219880549100700927874793, time:1750767625.3656385s req_ids:[8] -DEBUG 06-24 20:20:25 [manager.py:391] -ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:209.18893814086914ms total_cost_time:209.23328399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9800 prompt_cache_len:5151 prompt_cache_ratio:0.5256122448979592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 -DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10913419723510742 s -INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.1111152172088623 s -DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=54153523113677443566912752204703749372, time:1750767625.5809858s req_ids:[8] -DEBUG 06-24 20:20:25 [manager.py:391] -ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:211.08579635620117ms total_cost_time:211.13133430480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9801 prompt_cache_len:5151 prompt_cache_ratio:0.5255586164677074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 -DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:25 [manager.py:224] router recive req id 8 cost time 0.10812163352966309 s -INFO 06-24 20:20:25 [manager.py:68] detokenization recv req id 8 cost time 0.11011171340942383 s -DEBUG 06-24 20:20:25 [manager.py:391] Prefill Batch: batch_id=90066343870038748186176984786366386990, time:1750767625.7977364s req_ids:[8] -DEBUG 06-24 20:20:25 [manager.py:391] -ERROR 06-24 20:20:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:210.6034755706787ms total_cost_time:210.6480598449707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9802 prompt_cache_len:5151 prompt_cache_ratio:0.5255049989798001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 -DEBUG 06-24 20:20:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.10855221748352051 s -INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.11051201820373535 s -DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=170388547711268976016585658125538062117, time:1750767626.01283s req_ids:[8] -DEBUG 06-24 20:20:26 [manager.py:391] -ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:25 lightllm_req_id:8 first_token_cost:391.2055492401123ms total_cost_time:391.2489414215088ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9803 prompt_cache_len:5151 prompt_cache_ratio:0.5254513924308885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 -DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.10794734954833984 s -INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.10990118980407715 s -DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=272922261260460602336001289038308907645, time:1750767626.4105554s req_ids:[8] -DEBUG 06-24 20:20:26 [manager.py:391] -ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:202.11482048034668ms total_cost_time:202.1770477294922ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:9804 prompt_cache_len:5151 prompt_cache_ratio:0.5253977968176254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 -DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.10906267166137695 s -INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.11021757125854492 s -DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=245039867733681188862055160217377739558, time:1750767626.6206322s req_ids:[8] -DEBUG 06-24 20:20:26 [manager.py:391] -ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:220.2756404876709ms total_cost_time:220.3207015991211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9805 prompt_cache_len:5151 prompt_cache_ratio:0.5253442121366649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 -DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:26 [manager.py:224] router recive req id 8 cost time 0.1088247299194336 s -INFO 06-24 20:20:26 [manager.py:68] detokenization recv req id 8 cost time 0.11078596115112305 s -DEBUG 06-24 20:20:26 [manager.py:391] Prefill Batch: batch_id=293456262429453376833136859382916705405, time:1750767626.844889s req_ids:[8] -DEBUG 06-24 20:20:26 [manager.py:391] -ERROR 06-24 20:20:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:200.64330101013184ms total_cost_time:200.68740844726562ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9806 prompt_cache_len:5151 prompt_cache_ratio:0.5252906383846625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 -DEBUG 06-24 20:20:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.1080634593963623 s -INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.11000776290893555 s -DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=160751587742383152512342875336158542690, time:1750767627.0526578s req_ids:[8] -DEBUG 06-24 20:20:27 [manager.py:391] -ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:26 lightllm_req_id:8 first_token_cost:208.7228298187256ms total_cost_time:208.76693725585938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9807 prompt_cache_len:5151 prompt_cache_ratio:0.5252370755582747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 -DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10763049125671387 s -INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.10952997207641602 s -DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=339069058622311765922511838405776414884, time:1750767627.2667212s req_ids:[8] -DEBUG 06-24 20:20:27 [manager.py:391] -DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:201.65324211120605ms total_cost_time:201.69591903686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9808 prompt_cache_len:5151 prompt_cache_ratio:0.5251835236541599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 -DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10782146453857422 s -INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.10980725288391113 s -DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=193403816358995635362265405681291672318, time:1750767627.4749835s req_ids:[8] -DEBUG 06-24 20:20:27 [manager.py:391] -ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:206.1011791229248ms total_cost_time:206.1469554901123ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9809 prompt_cache_len:5151 prompt_cache_ratio:0.5251299826689775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 -DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10819411277770996 s -INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s -DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=286261063991233896328525595859785562276, time:1750767627.6866248s req_ids:[8] -DEBUG 06-24 20:20:27 [manager.py:391] -ERROR 06-24 20:20:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:207.92841911315918ms total_cost_time:207.97085762023926ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9810 prompt_cache_len:5151 prompt_cache_ratio:0.5250764525993884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 -DEBUG 06-24 20:20:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:27 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s -INFO 06-24 20:20:27 [manager.py:68] detokenization recv req id 8 cost time 0.11008405685424805 s -DEBUG 06-24 20:20:27 [manager.py:391] Prefill Batch: batch_id=340240703996619726981146317295080010198, time:1750767627.9008553s req_ids:[8] -DEBUG 06-24 20:20:27 [manager.py:391] -ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:27 lightllm_req_id:8 first_token_cost:409.4376564025879ms total_cost_time:409.4820022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9811 prompt_cache_len:5151 prompt_cache_ratio:0.5250229334420549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 -DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10784292221069336 s -INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.10868310928344727 s -DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=286847843633475391638477118772544232958, time:1750767628.3136668s req_ids:[8] -DEBUG 06-24 20:20:28 [manager.py:391] -ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:162.4279022216797ms total_cost_time:162.47153282165527ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9812 prompt_cache_len:5151 prompt_cache_ratio:0.5249694251936404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 -DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s -INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.1105203628540039 s -DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=166152652480891661699124313696404818514, time:1750767628.4819105s req_ids:[8] -DEBUG 06-24 20:20:28 [manager.py:391] -ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:198.99702072143555ms total_cost_time:199.0518569946289ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:9813 prompt_cache_len:5151 prompt_cache_ratio:0.5249159278508102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 -DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10885071754455566 s -INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.11068987846374512 s -DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=320224752019504705425281495072960923808, time:1750767628.6958935s req_ids:[8] -DEBUG 06-24 20:20:28 [manager.py:391] -ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:211.95316314697266ms total_cost_time:211.99822425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9814 prompt_cache_len:5151 prompt_cache_ratio:0.5248624414102303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 -DEBUG 06-24 20:20:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:28 [manager.py:224] router recive req id 8 cost time 0.10910892486572266 s -INFO 06-24 20:20:28 [manager.py:68] detokenization recv req id 8 cost time 0.11098527908325195 s -DEBUG 06-24 20:20:28 [manager.py:391] Prefill Batch: batch_id=141411309940188619781739269547183197588, time:1750767628.9102902s req_ids:[8] -DEBUG 06-24 20:20:28 [manager.py:391] -ERROR 06-24 20:20:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:28 lightllm_req_id:8 first_token_cost:207.40413665771484ms total_cost_time:207.45611190795898ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:9815 prompt_cache_len:5151 prompt_cache_ratio:0.5248089658685685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 -DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10896062850952148 s -INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.11025834083557129 s -DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=335251184606819733764496071821458711383, time:1750767629.126696s req_ids:[8] -DEBUG 06-24 20:20:29 [manager.py:391] -ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:215.1801586151123ms total_cost_time:215.2249813079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9816 prompt_cache_len:5151 prompt_cache_ratio:0.5247555012224939 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 -DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.11427855491638184 s -INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.1157076358795166 s -DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=63116750155687860411820697032352944097, time:1750767629.343398s req_ids:[8] -DEBUG 06-24 20:20:29 [manager.py:391] -ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:209.70535278320312ms total_cost_time:209.80429649353027ms,out_token_counter:1 mean_per_token_cost_time: 0.09894371032714844ms prompt_token_num:9817 prompt_cache_len:5151 prompt_cache_ratio:0.5247020474686768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 -DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10829758644104004 s -INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.1102914810180664 s -DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=297419112478891465939957762744821745548, time:1750767629.558996s req_ids:[8] -DEBUG 06-24 20:20:29 [manager.py:391] -ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:209.69343185424805ms total_cost_time:209.74016189575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9818 prompt_cache_len:5151 prompt_cache_ratio:0.5246486046037889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 -DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10995650291442871 s -INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.11198091506958008 s -DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=97266016639221576983154022186993573303, time:1750767629.7779675s req_ids:[8] -DEBUG 06-24 20:20:29 [manager.py:391] -ERROR 06-24 20:20:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:211.41672134399414ms total_cost_time:211.46059036254883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9819 prompt_cache_len:5151 prompt_cache_ratio:0.5245951726245035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 -DEBUG 06-24 20:20:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:29 [manager.py:224] router recive req id 8 cost time 0.10900735855102539 s -INFO 06-24 20:20:29 [manager.py:68] detokenization recv req id 8 cost time 0.11094880104064941 s -DEBUG 06-24 20:20:29 [manager.py:391] Prefill Batch: batch_id=246860284430075259600618888057837398864, time:1750767629.9944608s req_ids:[8] -DEBUG 06-24 20:20:29 [manager.py:391] -ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:29 lightllm_req_id:8 first_token_cost:369.89665031433105ms total_cost_time:369.9531555175781ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:9820 prompt_cache_len:5151 prompt_cache_ratio:0.5245417515274949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 -DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:30 [manager.py:224] router recive req id 8 cost time 0.10876083374023438 s -INFO 06-24 20:20:30 [manager.py:68] detokenization recv req id 8 cost time 0.11068248748779297 s -DEBUG 06-24 20:20:30 [manager.py:391] Prefill Batch: batch_id=149708187322455662487276173552151934686, time:1750767630.3696084s req_ids:[8] -DEBUG 06-24 20:20:30 [manager.py:391] -ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:208.54949951171875ms total_cost_time:208.59408378601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9821 prompt_cache_len:5151 prompt_cache_ratio:0.524488341309439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 -DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:30 [manager.py:224] router recive req id 8 cost time 0.11060047149658203 s -INFO 06-24 20:20:30 [manager.py:68] detokenization recv req id 8 cost time 0.11270356178283691 s -DEBUG 06-24 20:20:30 [manager.py:391] Prefill Batch: batch_id=67683242613066182270213141512976074559, time:1750767630.5833068s req_ids:[8] -DEBUG 06-24 20:20:30 [manager.py:391] -ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:209.77544784545898ms total_cost_time:209.82003211975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9822 prompt_cache_len:5151 prompt_cache_ratio:0.5244349419670128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 -DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:30 [manager.py:224] router recive req id 8 cost time 0.1075754165649414 s -INFO 06-24 20:20:30 [manager.py:68] detokenization recv req id 8 cost time 0.1095583438873291 s -DEBUG 06-24 20:20:30 [manager.py:391] Prefill Batch: batch_id=229364621618973955746910840269263571553, time:1750767630.7986398s req_ids:[8] -DEBUG 06-24 20:20:30 [manager.py:391] -ERROR 06-24 20:20:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:209.47694778442383ms total_cost_time:209.5181941986084ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9823 prompt_cache_len:5151 prompt_cache_ratio:0.5243815534968951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 -DEBUG 06-24 20:20:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10992312431335449 s -INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11184287071228027 s -DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=152587293620394071600994084970168883214, time:1750767631.0144174s req_ids:[8] -DEBUG 06-24 20:20:31 [manager.py:391] -ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 -DEBUG 06-24 20:20:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 41835.348 tokens/s -DEBUG 06-24 20:20:31 [stats.py:37] Avg prompt tokens throughput: 41826.814 tokens/s -DEBUG 06-24 20:20:31 [stats.py:37] Avg generate tokens throughput: 8.533 tokens/s -INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:30 lightllm_req_id:8 first_token_cost:211.57550811767578ms total_cost_time:211.64393424987793ms,out_token_counter:1 mean_per_token_cost_time: 0.06842613220214844ms prompt_token_num:9824 prompt_cache_len:5151 prompt_cache_ratio:0.5243281758957655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 -DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10873150825500488 s -INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11069369316101074 s -DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=83546231035720741649149733392079134143, time:1750767631.243892s req_ids:[8] -DEBUG 06-24 20:20:31 [manager.py:391] -ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:224.42126274108887ms total_cost_time:224.46656227111816ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9825 prompt_cache_len:5151 prompt_cache_ratio:0.5242748091603053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 -DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10757112503051758 s -INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.10963869094848633 s -DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=14564678536693774603978609421203651799, time:1750767631.4625828s req_ids:[8] -DEBUG 06-24 20:20:31 [manager.py:391] -ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:209.43951606750488ms total_cost_time:209.48386192321777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9826 prompt_cache_len:5151 prompt_cache_ratio:0.5242214532871973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 -DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.1086735725402832 s -INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11070656776428223 s -DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=296145160860894103250871002745783382680, time:1750767631.6775823s req_ids:[8] -DEBUG 06-24 20:20:31 [manager.py:391] -ERROR 06-24 20:20:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:210.13450622558594ms total_cost_time:210.1762294769287ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:9827 prompt_cache_len:5151 prompt_cache_ratio:0.5241681082731251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 -DEBUG 06-24 20:20:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:31 [manager.py:224] router recive req id 8 cost time 0.10884523391723633 s -INFO 06-24 20:20:31 [manager.py:68] detokenization recv req id 8 cost time 0.11079788208007812 s -DEBUG 06-24 20:20:31 [manager.py:391] Prefill Batch: batch_id=291404921401300082657533418019495111242, time:1750767631.8933513s req_ids:[8] -DEBUG 06-24 20:20:31 [manager.py:391] -ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:31 lightllm_req_id:8 first_token_cost:375.4565715789795ms total_cost_time:375.5018711090088ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9828 prompt_cache_len:5151 prompt_cache_ratio:0.5241147741147741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 -DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10831618309020996 s -INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11039400100708008 s -DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=305031747116538201747409735416777483557, time:1750767632.2716286s req_ids:[8] -DEBUG 06-24 20:20:32 [manager.py:391] -ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:203.37772369384766ms total_cost_time:203.42373847961426ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9829 prompt_cache_len:5151 prompt_cache_ratio:0.524061450808831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 -DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s -INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11006355285644531 s -DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=57136484573265414604000683062063195642, time:1750767632.4890945s req_ids:[8] -DEBUG 06-24 20:20:32 [manager.py:391] -ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:213.72008323669434ms total_cost_time:213.76609802246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9830 prompt_cache_len:5151 prompt_cache_ratio:0.5240081383519837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 -DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10954952239990234 s -INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11150813102722168 s -DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=16984100778801417762478380666952846194, time:1750767632.7032015s req_ids:[8] -DEBUG 06-24 20:20:32 [manager.py:391] -ERROR 06-24 20:20:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:206.99238777160645ms total_cost_time:207.03721046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9831 prompt_cache_len:5151 prompt_cache_ratio:0.5239548367409216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 -DEBUG 06-24 20:20:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:32 [manager.py:224] router recive req id 8 cost time 0.10895872116088867 s -INFO 06-24 20:20:32 [manager.py:68] detokenization recv req id 8 cost time 0.11083984375 s -DEBUG 06-24 20:20:32 [manager.py:391] Prefill Batch: batch_id=103827813637938114863672103537791564273, time:1750767632.9181502s req_ids:[8] -DEBUG 06-24 20:20:32 [manager.py:391] -ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:32 lightllm_req_id:8 first_token_cost:207.33904838562012ms total_cost_time:207.3826789855957ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9832 prompt_cache_len:5151 prompt_cache_ratio:0.5239015459723352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 -DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.10949373245239258 s -INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.11147117614746094 s -DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=269713370630256404373954428594009822731, time:1750767633.1315007s req_ids:[8] -DEBUG 06-24 20:20:33 [manager.py:391] -DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:209.05303955078125ms total_cost_time:209.09643173217773ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9833 prompt_cache_len:5151 prompt_cache_ratio:0.5238482660429167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 -DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.10938763618469238 s -INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.11140036582946777 s -DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=331487190744281758171180091463261590600, time:1750767633.3457603s req_ids:[8] -DEBUG 06-24 20:20:33 [manager.py:391] -ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:202.5470733642578ms total_cost_time:202.592134475708ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9834 prompt_cache_len:5151 prompt_cache_ratio:0.5237949969493594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 -DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.10939407348632812 s -INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.11140275001525879 s -DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=141709234723696794358221923432692018043, time:1750767633.5555518s req_ids:[8] -DEBUG 06-24 20:20:33 [manager.py:391] -ERROR 06-24 20:20:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:207.5185775756836ms total_cost_time:207.5653076171875ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9835 prompt_cache_len:5151 prompt_cache_ratio:0.5237417386883579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 -DEBUG 06-24 20:20:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:33 [manager.py:224] router recive req id 8 cost time 0.3118305206298828 s -INFO 06-24 20:20:33 [manager.py:68] detokenization recv req id 8 cost time 0.31439781188964844 s -DEBUG 06-24 20:20:33 [manager.py:391] Prefill Batch: batch_id=44593544588382884354724034125251082542, time:1750767633.9745944s req_ids:[8] -DEBUG 06-24 20:20:33 [manager.py:391] -ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:33 lightllm_req_id:8 first_token_cost:416.4111614227295ms total_cost_time:416.4576530456543ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9836 prompt_cache_len:5151 prompt_cache_ratio:0.5236884912566083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 -DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.10799813270568848 s -INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s -DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=278173540455071295297888750275793111741, time:1750767634.1925979s req_ids:[8] -DEBUG 06-24 20:20:34 [manager.py:391] -ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:210.25586128234863ms total_cost_time:210.30068397521973ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9837 prompt_cache_len:5151 prompt_cache_ratio:0.5236352546508082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 -DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.10789370536804199 s -INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.10988140106201172 s -DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=121175320744780678283327625287460900507, time:1750767634.4076784s req_ids:[8] -DEBUG 06-24 20:20:34 [manager.py:391] -ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:206.35294914245605ms total_cost_time:206.39824867248535ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9838 prompt_cache_len:5151 prompt_cache_ratio:0.523582028867656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 -DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.10833621025085449 s -INFO 06-24 20:20:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.11023712158203125 s -DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=316282947894223674005178801565186239309, time:1750767634.622619s req_ids:[8] -DEBUG 06-24 20:20:34 [manager.py:391] -DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:211.2584114074707ms total_cost_time:211.3046646118164ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9839 prompt_cache_len:5151 prompt_cache_ratio:0.523528813903852 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 -DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:34 [manager.py:224] router recive req id 8 cost time 0.11083984375 s -INFO 06-24 20:20:34 [manager.py:68] detokenization recv req id 8 cost time 0.11342048645019531 s -DEBUG 06-24 20:20:34 [manager.py:391] Prefill Batch: batch_id=217072354813086774953979272487721024322, time:1750767634.8397508s req_ids:[8] -DEBUG 06-24 20:20:34 [manager.py:391] -ERROR 06-24 20:20:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:209.8526954650879ms total_cost_time:209.8991870880127ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9840 prompt_cache_len:5151 prompt_cache_ratio:0.5234756097560975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 -DEBUG 06-24 20:20:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.10886096954345703 s -INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11093449592590332 s -DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=339370780111758554673234968269496834561, time:1750767635.053557s req_ids:[8] -DEBUG 06-24 20:20:35 [manager.py:391] -ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:34 lightllm_req_id:8 first_token_cost:203.6585807800293ms total_cost_time:203.70244979858398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9841 prompt_cache_len:5151 prompt_cache_ratio:0.5234224164210954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 -DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.1092061996459961 s -INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11118865013122559 s -DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=325788816525945214746441594029746809847, time:1750767635.2632673s req_ids:[8] -DEBUG 06-24 20:20:35 [manager.py:391] -ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:206.4645290374756ms total_cost_time:206.50744438171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9842 prompt_cache_len:5151 prompt_cache_ratio:0.5233692338955497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 -DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.1087639331817627 s -INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11079978942871094 s -DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=161422872285320130485490148285864108616, time:1750767635.4776294s req_ids:[8] -DEBUG 06-24 20:20:35 [manager.py:391] -ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:380.0981044769287ms total_cost_time:380.1426887512207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9843 prompt_cache_len:5151 prompt_cache_ratio:0.5233160621761658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 -DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:35 [manager.py:224] router recive req id 8 cost time 0.10795402526855469 s -INFO 06-24 20:20:35 [manager.py:68] detokenization recv req id 8 cost time 0.11012721061706543 s -DEBUG 06-24 20:20:35 [manager.py:391] Prefill Batch: batch_id=188554156942973155624809852656070165639, time:1750767635.859789s req_ids:[8] -DEBUG 06-24 20:20:35 [manager.py:391] -ERROR 06-24 20:20:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:205.65009117126465ms total_cost_time:205.69229125976562ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9844 prompt_cache_len:5151 prompt_cache_ratio:0.5232629012596506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 -DEBUG 06-24 20:20:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10892796516418457 s -INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.11101317405700684 s -DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=259447047004667800648840476073097527162, time:1750767636.0749018s req_ids:[8] -DEBUG 06-24 20:20:36 [manager.py:391] -ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:35 lightllm_req_id:8 first_token_cost:206.67529106140137ms total_cost_time:206.71844482421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9845 prompt_cache_len:5151 prompt_cache_ratio:0.5232097511427121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 -DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10952162742614746 s -INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.11075425148010254 s -DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=252974452959142126170074782789905306596, time:1750767636.2868354s req_ids:[8] -DEBUG 06-24 20:20:36 [manager.py:391] -ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:207.23557472229004ms total_cost_time:207.27920532226562ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9846 prompt_cache_len:5151 prompt_cache_ratio:0.5231566118220597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 -DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10776901245117188 s -INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.10902929306030273 s -DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=186829927064031739364695002353175291408, time:1750767636.4999228s req_ids:[8] -DEBUG 06-24 20:20:36 [manager.py:391] -ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:207.66520500183105ms total_cost_time:207.71098136901855ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9847 prompt_cache_len:5151 prompt_cache_ratio:0.5231034832944044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 -DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10792040824890137 s -INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.10983085632324219 s -DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=127039352766353270495954388025317877139, time:1750767636.7140849s req_ids:[8] -DEBUG 06-24 20:20:36 [manager.py:391] -ERROR 06-24 20:20:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:206.35581016540527ms total_cost_time:206.39944076538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9848 prompt_cache_len:5151 prompt_cache_ratio:0.5230503655564581 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 -DEBUG 06-24 20:20:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:36 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s -INFO 06-24 20:20:36 [manager.py:68] detokenization recv req id 8 cost time 0.10997605323791504 s -DEBUG 06-24 20:20:36 [manager.py:391] Prefill Batch: batch_id=175289394642325575729716716597674809733, time:1750767636.9314957s req_ids:[8] -DEBUG 06-24 20:20:36 [manager.py:391] -ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:36 lightllm_req_id:8 first_token_cost:214.19239044189453ms total_cost_time:214.23602104187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9849 prompt_cache_len:5151 prompt_cache_ratio:0.5229972586049345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 -DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.1087644100189209 s -INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.11075115203857422 s -DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=93794999698347656964797352548668484564, time:1750767637.1472352s req_ids:[8] -DEBUG 06-24 20:20:37 [manager.py:391] -ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:210.11734008789062ms total_cost_time:210.1614475250244ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9850 prompt_cache_len:5151 prompt_cache_ratio:0.5229441624365482 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 -DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.1088860034942627 s -INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.1111443042755127 s -DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=51006539445990156205023680618176401893, time:1750767637.366126s req_ids:[8] -DEBUG 06-24 20:20:37 [manager.py:391] -ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:212.16797828674316ms total_cost_time:212.21303939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9851 prompt_cache_len:5151 prompt_cache_ratio:0.5228910770480154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 -DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.10804510116577148 s -INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.10994410514831543 s -DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=87152793400464007447782248749656807151, time:1750767637.5794404s req_ids:[8] -DEBUG 06-24 20:20:37 [manager.py:391] -ERROR 06-24 20:20:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:380.48696517944336ms total_cost_time:380.53178787231445ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9852 prompt_cache_len:5151 prompt_cache_ratio:0.5228380024360536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 -DEBUG 06-24 20:20:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:37 [manager.py:224] router recive req id 8 cost time 0.10836648941040039 s -INFO 06-24 20:20:37 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s -DEBUG 06-24 20:20:37 [manager.py:391] Prefill Batch: batch_id=6912084223442481052783565917609126859, time:1750767637.9647207s req_ids:[8] -DEBUG 06-24 20:20:37 [manager.py:391] -ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:37 lightllm_req_id:8 first_token_cost:208.65392684936523ms total_cost_time:208.69803428649902ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9853 prompt_cache_len:5151 prompt_cache_ratio:0.5227849385973815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 -DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:38 [batch.py:51] router release req id 8 -INFO 06-24 20:20:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.10946941375732422 s -INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11136579513549805 s -DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=136808972309748891854450292745875917321, time:1750767638.1802363s req_ids:[8] -DEBUG 06-24 20:20:38 [manager.py:391] -ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:208.38594436645508ms total_cost_time:208.43076705932617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9854 prompt_cache_len:5151 prompt_cache_ratio:0.5227318855287193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 -DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.10810494422912598 s -INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11008358001708984 s -DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=338846449146995771767465987364617165023, time:1750767638.396286s req_ids:[8] -DEBUG 06-24 20:20:38 [manager.py:391] -ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:209.52343940734863ms total_cost_time:209.56707000732422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9855 prompt_cache_len:5151 prompt_cache_ratio:0.5226788432267885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 -DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s -INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11008572578430176 s -DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=283666213461422392242081606385119331258, time:1750767638.6117744s req_ids:[8] -DEBUG 06-24 20:20:38 [manager.py:391] -ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:171.08631134033203ms total_cost_time:171.12994194030762ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9856 prompt_cache_len:5151 prompt_cache_ratio:0.5226258116883117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 -DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.1077260971069336 s -INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.10978579521179199 s -DEBUG 06-24 20:20:38 [manager.py:391] Prefill Batch: batch_id=254932023129837077409726046965486703673, time:1750767638.7875166s req_ids:[8] -DEBUG 06-24 20:20:38 [manager.py:391] -ERROR 06-24 20:20:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:202.29053497314453ms total_cost_time:202.33464241027832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9857 prompt_cache_len:5151 prompt_cache_ratio:0.5225727909100132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 -DEBUG 06-24 20:20:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:38 [manager.py:224] router recive req id 8 cost time 0.11051297187805176 s -INFO 06-24 20:20:38 [manager.py:68] detokenization recv req id 8 cost time 0.11240506172180176 s -DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=234005756795913159361874983629903291277, time:1750767639.0004761s req_ids:[8] -DEBUG 06-24 20:20:39 [manager.py:391] -ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:38 lightllm_req_id:8 first_token_cost:209.88082885742188ms total_cost_time:209.92422103881836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9858 prompt_cache_len:5151 prompt_cache_ratio:0.5225197808886184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 -DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.1091451644897461 s -INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.11110162734985352 s -DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=331257924281503179256106769152840237726, time:1750767639.2108104s req_ids:[8] -DEBUG 06-24 20:20:39 [manager.py:391] -ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:207.01098442077637ms total_cost_time:207.05628395080566ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9859 prompt_cache_len:5151 prompt_cache_ratio:0.522466781620854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 -DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.10889220237731934 s -INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.11095523834228516 s -DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=149457176586173820492060779123933035311, time:1750767639.42298s req_ids:[8] -DEBUG 06-24 20:20:39 [manager.py:391] -ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:371.8738555908203ms total_cost_time:371.9189167022705ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9860 prompt_cache_len:5151 prompt_cache_ratio:0.5224137931034483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 -DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.10610008239746094 s -INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.10729217529296875 s -DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=298013877519844892731659186438830181986, time:1750767639.7999816s req_ids:[8] -DEBUG 06-24 20:20:39 [manager.py:391] -ERROR 06-24 20:20:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:163.8026237487793ms total_cost_time:163.82241249084473ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:9861 prompt_cache_len:5151 prompt_cache_ratio:0.5223608153331305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 -DEBUG 06-24 20:20:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:39 [manager.py:224] router recive req id 8 cost time 0.10590362548828125 s -INFO 06-24 20:20:39 [manager.py:68] detokenization recv req id 8 cost time 0.10789132118225098 s -DEBUG 06-24 20:20:39 [manager.py:391] Prefill Batch: batch_id=264764386632736105083480191454632587505, time:1750767639.9696183s req_ids:[8] -DEBUG 06-24 20:20:39 [manager.py:391] -ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:39 lightllm_req_id:8 first_token_cost:195.7104206085205ms total_cost_time:195.7564353942871ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9862 prompt_cache_len:5151 prompt_cache_ratio:0.5223078483066315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 -DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10863137245178223 s -INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.1106717586517334 s -DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=307259256675055424279737802234669274196, time:1750767640.1706963s req_ids:[8] -DEBUG 06-24 20:20:40 [manager.py:391] -ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:202.29554176330566ms total_cost_time:202.33964920043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9863 prompt_cache_len:5151 prompt_cache_ratio:0.5222548920206833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 -DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10933184623718262 s -INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.11138296127319336 s -DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=156895608538144327447908960179691300966, time:1750767640.3894298s req_ids:[8] -DEBUG 06-24 20:20:40 [manager.py:391] -DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:218.75905990600586ms total_cost_time:218.80269050598145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9864 prompt_cache_len:5151 prompt_cache_ratio:0.5222019464720195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 -DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10873961448669434 s -INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.11080479621887207 s -DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=9752078054635430684531351365962402658, time:1750767640.6056838s req_ids:[8] -DEBUG 06-24 20:20:40 [manager.py:391] -ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:209.78927612304688ms total_cost_time:209.83195304870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9865 prompt_cache_len:5151 prompt_cache_ratio:0.5221490116573746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 -DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10702347755432129 s -INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.1088094711303711 s -DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=2780910235941728093575381039294561682, time:1750767640.8217697s req_ids:[8] -DEBUG 06-24 20:20:40 [manager.py:391] -ERROR 06-24 20:20:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:170.34626007080078ms total_cost_time:170.38822174072266ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9866 prompt_cache_len:5151 prompt_cache_ratio:0.5220960875734847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 -DEBUG 06-24 20:20:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:40 [manager.py:224] router recive req id 8 cost time 0.10869812965393066 s -INFO 06-24 20:20:40 [manager.py:68] detokenization recv req id 8 cost time 0.11081051826477051 s -DEBUG 06-24 20:20:40 [manager.py:391] Prefill Batch: batch_id=129604416244387845644769763547787524816, time:1750767640.9955535s req_ids:[8] -DEBUG 06-24 20:20:40 [manager.py:391] -ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:40 lightllm_req_id:8 first_token_cost:204.76865768432617ms total_cost_time:204.81228828430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9867 prompt_cache_len:5151 prompt_cache_ratio:0.5220431742170872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 -DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10809636116027832 s -INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.11014342308044434 s -DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=20772459192545164914069084465998517447, time:1750767641.207221s req_ids:[8] -DEBUG 06-24 20:20:41 [manager.py:391] -DEBUG 06-24 20:20:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 42902.348 tokens/s -DEBUG 06-24 20:20:41 [stats.py:37] Avg prompt tokens throughput: 42893.734 tokens/s -DEBUG 06-24 20:20:41 [stats.py:37] Avg generate tokens throughput: 8.613 tokens/s -ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:377.6357173919678ms total_cost_time:377.68077850341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9868 prompt_cache_len:5151 prompt_cache_ratio:0.521990271584921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 -DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10770916938781738 s -INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.10953950881958008 s -DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=307878850242575289067884355466355414894, time:1750767641.5885391s req_ids:[8] -DEBUG 06-24 20:20:41 [manager.py:391] -ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:162.9774570465088ms total_cost_time:163.01989555358887ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9869 prompt_cache_len:5151 prompt_cache_ratio:0.5219373796737258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 -DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10884237289428711 s -INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.11084985733032227 s -DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=12089587263252223100125819026189013065, time:1750767641.76152s req_ids:[8] -DEBUG 06-24 20:20:41 [manager.py:391] -ERROR 06-24 20:20:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:201.28297805786133ms total_cost_time:201.32756233215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9870 prompt_cache_len:5151 prompt_cache_ratio:0.5218844984802432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 -DEBUG 06-24 20:20:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:41 [manager.py:224] router recive req id 8 cost time 0.10781645774841309 s -INFO 06-24 20:20:41 [manager.py:68] detokenization recv req id 8 cost time 0.10983085632324219 s -DEBUG 06-24 20:20:41 [manager.py:391] Prefill Batch: batch_id=259176926614692279723400673668586282655, time:1750767641.9662097s req_ids:[8] -DEBUG 06-24 20:20:41 [manager.py:391] -ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:41 lightllm_req_id:8 first_token_cost:207.14902877807617ms total_cost_time:207.20744132995605ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:9871 prompt_cache_len:5151 prompt_cache_ratio:0.5218316280012156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 -DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10855913162231445 s -INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.11060762405395508 s -DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=5868944616832487380587426543255908956, time:1750767642.1808763s req_ids:[8] -DEBUG 06-24 20:20:42 [manager.py:391] -ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:204.53715324401855ms total_cost_time:204.58221435546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9872 prompt_cache_len:5151 prompt_cache_ratio:0.5217787682333873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 -DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10855412483215332 s -INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.11075544357299805 s -DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=310221211109891676950394085893901676252, time:1750767642.3890727s req_ids:[8] -DEBUG 06-24 20:20:42 [manager.py:391] -ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:207.00621604919434ms total_cost_time:207.05103874206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9873 prompt_cache_len:5151 prompt_cache_ratio:0.5217259191735035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 -DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10772204399108887 s -INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.10957455635070801 s -DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=190098878393544036093794269424953305809, time:1750767642.6188695s req_ids:[8] -DEBUG 06-24 20:20:42 [manager.py:391] -ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:227.21219062805176ms total_cost_time:227.25653648376465ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9874 prompt_cache_len:5151 prompt_cache_ratio:0.5216730808183108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 -DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:42 [manager.py:224] router recive req id 8 cost time 0.10882234573364258 s -INFO 06-24 20:20:42 [manager.py:68] detokenization recv req id 8 cost time 0.11093616485595703 s -DEBUG 06-24 20:20:42 [manager.py:391] Prefill Batch: batch_id=21196999728586172224207675544167470291, time:1750767642.8376992s req_ids:[8] -DEBUG 06-24 20:20:42 [manager.py:391] -ERROR 06-24 20:20:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:205.99889755249023ms total_cost_time:206.04228973388672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9875 prompt_cache_len:5151 prompt_cache_ratio:0.521620253164557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 -DEBUG 06-24 20:20:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.10737037658691406 s -INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.1092824935913086 s -DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=320648744224014371053406123493583358585, time:1750767643.0478897s req_ids:[8] -DEBUG 06-24 20:20:43 [manager.py:391] -ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:42 lightllm_req_id:8 first_token_cost:374.87220764160156ms total_cost_time:374.91655349731445ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9876 prompt_cache_len:5151 prompt_cache_ratio:0.5215674362089915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 -DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.10786914825439453 s -INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.10996794700622559 s -DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=304273158954173830938695181251070496230, time:1750767643.4268143s req_ids:[8] -DEBUG 06-24 20:20:43 [manager.py:391] -ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:206.30812644958496ms total_cost_time:206.35247230529785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9877 prompt_cache_len:5151 prompt_cache_ratio:0.5215146299483648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 -DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.10673904418945312 s -INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.10860610008239746 s -DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=188135838357065998143554875599503838116, time:1750767643.6388216s req_ids:[8] -DEBUG 06-24 20:20:43 [manager.py:391] -ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:165.11940956115723ms total_cost_time:165.1625633239746ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9878 prompt_cache_len:5151 prompt_cache_ratio:0.5214618343794291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 -DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:43 [manager.py:224] router recive req id 8 cost time 0.1072540283203125 s -INFO 06-24 20:20:43 [manager.py:68] detokenization recv req id 8 cost time 0.1091301441192627 s -DEBUG 06-24 20:20:43 [manager.py:391] Prefill Batch: batch_id=74261669375289386687300662149375477014, time:1750767643.8102007s req_ids:[8] -DEBUG 06-24 20:20:43 [manager.py:391] -ERROR 06-24 20:20:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:200.9902000427246ms total_cost_time:201.03216171264648ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9879 prompt_cache_len:5151 prompt_cache_ratio:0.5214090494989372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 -DEBUG 06-24 20:20:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10801362991333008 s -INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11001777648925781 s -DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=164803211597925524100263864492898658049, time:1750767644.0165825s req_ids:[8] -DEBUG 06-24 20:20:44 [manager.py:391] -ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:43 lightllm_req_id:8 first_token_cost:204.41889762878418ms total_cost_time:204.46348190307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9880 prompt_cache_len:5151 prompt_cache_ratio:0.5213562753036437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 -DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10867190361022949 s -INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11074280738830566 s -DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=188125718408042883991489142682868166758, time:1750767644.2268527s req_ids:[8] -DEBUG 06-24 20:20:44 [manager.py:391] -ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:207.0612907409668ms total_cost_time:207.10515975952148ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9881 prompt_cache_len:5151 prompt_cache_ratio:0.5213035117903047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 -DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10909414291381836 s -INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.1110830307006836 s -DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=321062933222949637566173065974442666502, time:1750767644.4425356s req_ids:[8] -DEBUG 06-24 20:20:44 [manager.py:391] -ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:210.44230461120605ms total_cost_time:210.48712730407715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9882 prompt_cache_len:5151 prompt_cache_ratio:0.521250758955677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 -DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.10823988914489746 s -INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11024832725524902 s -DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=162440463483307804226659245769572298390, time:1750767644.6567304s req_ids:[8] -DEBUG 06-24 20:20:44 [manager.py:391] -ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:208.61101150512695ms total_cost_time:208.65488052368164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9883 prompt_cache_len:5151 prompt_cache_ratio:0.5211980167965192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 -DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:44 [manager.py:224] router recive req id 8 cost time 0.11024999618530273 s -INFO 06-24 20:20:44 [manager.py:68] detokenization recv req id 8 cost time 0.11221480369567871 s -DEBUG 06-24 20:20:44 [manager.py:391] Prefill Batch: batch_id=294384953879268681190450713043747543078, time:1750767644.8723514s req_ids:[8] -DEBUG 06-24 20:20:44 [manager.py:391] -ERROR 06-24 20:20:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:210.67333221435547ms total_cost_time:210.71839332580566ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9884 prompt_cache_len:5151 prompt_cache_ratio:0.5211452853095913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 -DEBUG 06-24 20:20:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.11099910736083984 s -INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11284756660461426 s -DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=191386931295212723312556467212755032414, time:1750767645.0882506s req_ids:[8] -DEBUG 06-24 20:20:45 [manager.py:391] -ERROR 06-24 20:20:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:44 lightllm_req_id:8 first_token_cost:408.4815979003906ms total_cost_time:408.5254669189453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9885 prompt_cache_len:5151 prompt_cache_ratio:0.521092564491654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 -DEBUG 06-24 20:20:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.10951733589172363 s -INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11160802841186523 s -DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=81663825210486118114073163964727236765, time:1750767645.501231s req_ids:[8] -DEBUG 06-24 20:20:45 [manager.py:391] -ERROR 06-24 20:20:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 first_token_cost:204.0395736694336ms total_cost_time:204.0998935699463ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:9886 prompt_cache_len:5151 prompt_cache_ratio:0.52103985433947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 -DEBUG 06-24 20:20:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.10838937759399414 s -INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11016654968261719 s -DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=168677982381934386854181754710081848340, time:1750767645.7109559s req_ids:[8] -DEBUG 06-24 20:20:45 [manager.py:391] -ERROR 06-24 20:20:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 first_token_cost:205.25479316711426ms total_cost_time:205.30033111572266ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9887 prompt_cache_len:5151 prompt_cache_ratio:0.5209871548498027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 -DEBUG 06-24 20:20:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:45 [manager.py:224] router recive req id 8 cost time 0.10902523994445801 s -INFO 06-24 20:20:45 [manager.py:68] detokenization recv req id 8 cost time 0.11073899269104004 s -DEBUG 06-24 20:20:45 [manager.py:391] Prefill Batch: batch_id=333624429488186261856074132625158568426, time:1750767645.9302318s req_ids:[8] -DEBUG 06-24 20:20:45 [manager.py:391] -ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:45 lightllm_req_id:8 first_token_cost:216.36629104614258ms total_cost_time:216.41111373901367ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9888 prompt_cache_len:5151 prompt_cache_ratio:0.5209344660194175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 -DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.10846805572509766 s -INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11040925979614258 s -DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=163631020007746300915768704500755498749, time:1750767646.1458554s req_ids:[8] -DEBUG 06-24 20:20:46 [manager.py:391] -DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:210.22605895996094ms total_cost_time:210.28494834899902ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:9889 prompt_cache_len:5151 prompt_cache_ratio:0.5208817878450804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 -DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.1082620620727539 s -INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11053633689880371 s -DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=243161681238343816480894071986445783410, time:1750767646.3674445s req_ids:[8] -DEBUG 06-24 20:20:46 [manager.py:391] -ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:217.18811988830566ms total_cost_time:217.23103523254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9890 prompt_cache_len:5151 prompt_cache_ratio:0.5208291203235591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 -DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.10822343826293945 s -INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11022257804870605 s -DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=13380787787805788429719768356090830890, time:1750767646.5867062s req_ids:[8] -DEBUG 06-24 20:20:46 [manager.py:391] -ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:210.46781539916992ms total_cost_time:210.51287651062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9891 prompt_cache_len:5151 prompt_cache_ratio:0.5207764634516227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 -DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:46 [manager.py:224] router recive req id 8 cost time 0.10948920249938965 s -INFO 06-24 20:20:46 [manager.py:68] detokenization recv req id 8 cost time 0.11154460906982422 s -DEBUG 06-24 20:20:46 [manager.py:391] Prefill Batch: batch_id=189841376659603989693115274284082644605, time:1750767646.8008347s req_ids:[8] -DEBUG 06-24 20:20:46 [manager.py:391] -ERROR 06-24 20:20:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:207.7009677886963ms total_cost_time:207.74579048156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9892 prompt_cache_len:5151 prompt_cache_ratio:0.5207238172260412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 -DEBUG 06-24 20:20:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:46 [batch.py:51] router release req id 8 -INFO 06-24 20:20:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.10839319229125977 s -INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.11043548583984375 s -DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=121393687532653621760617977440406562892, time:1750767647.0154874s req_ids:[8] -DEBUG 06-24 20:20:47 [manager.py:391] -ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:46 lightllm_req_id:8 first_token_cost:379.66012954711914ms total_cost_time:379.70590591430664ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9893 prompt_cache_len:5151 prompt_cache_ratio:0.5206711816435864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 -DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.1092677116394043 s -INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.11111807823181152 s -DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=321092132993971158357821639423962274450, time:1750767647.3973281s req_ids:[8] -DEBUG 06-24 20:20:47 [manager.py:391] -ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:205.3520679473877ms total_cost_time:205.39331436157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:9894 prompt_cache_len:5151 prompt_cache_ratio:0.520618556701031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 -DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.10869526863098145 s -INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.110687255859375 s -DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=164058486827198979877252200023749882637, time:1750767647.61033s req_ids:[8] -DEBUG 06-24 20:20:47 [manager.py:391] -ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:208.35232734680176ms total_cost_time:208.39595794677734ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9895 prompt_cache_len:5151 prompt_cache_ratio:0.5205659423951491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 -DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:47 [manager.py:224] router recive req id 8 cost time 0.10845208168029785 s -INFO 06-24 20:20:47 [manager.py:68] detokenization recv req id 8 cost time 0.11040091514587402 s -DEBUG 06-24 20:20:47 [manager.py:391] Prefill Batch: batch_id=146880981361548222439618596416026006337, time:1750767647.8229754s req_ids:[8] -DEBUG 06-24 20:20:47 [manager.py:391] -ERROR 06-24 20:20:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:204.7121524810791ms total_cost_time:204.75482940673828ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9896 prompt_cache_len:5151 prompt_cache_ratio:0.5205133387227162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 -DEBUG 06-24 20:20:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.1089634895324707 s -INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11137270927429199 s -DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=61218077597539260524514589169240912483, time:1750767648.0365417s req_ids:[8] -DEBUG 06-24 20:20:48 [manager.py:391] -ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:47 lightllm_req_id:8 first_token_cost:210.30378341674805ms total_cost_time:210.34765243530273ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9897 prompt_cache_len:5151 prompt_cache_ratio:0.5204607456805093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 -DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.10803031921386719 s -INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11069774627685547 s -DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=7654666468082137691885753503010683951, time:1750767648.2517345s req_ids:[8] -DEBUG 06-24 20:20:48 [manager.py:391] -ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:211.79485321044922ms total_cost_time:211.85684204101562ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:9898 prompt_cache_len:5151 prompt_cache_ratio:0.5204081632653061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 -DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.10976791381835938 s -INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11162590980529785 s -DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=226164015312736010738695072021288213789, time:1750767648.4679751s req_ids:[8] -DEBUG 06-24 20:20:48 [manager.py:391] -ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:206.41756057739258ms total_cost_time:206.46047592163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9899 prompt_cache_len:5151 prompt_cache_ratio:0.5203555914738862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 -DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:48 [manager.py:224] router recive req id 8 cost time 0.1081857681274414 s -INFO 06-24 20:20:48 [manager.py:68] detokenization recv req id 8 cost time 0.11014962196350098 s -DEBUG 06-24 20:20:48 [manager.py:391] Prefill Batch: batch_id=311516804142280656061973400366153985787, time:1750767648.681982s req_ids:[8] -DEBUG 06-24 20:20:48 [manager.py:391] -ERROR 06-24 20:20:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:209.08355712890625ms total_cost_time:209.12671089172363ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9900 prompt_cache_len:5151 prompt_cache_ratio:0.5203030303030303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 -DEBUG 06-24 20:20:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.3102424144744873 s -INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.31230950355529785 s -DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=317320175677442533329948112406176659206, time:1750767649.114971s req_ids:[8] -DEBUG 06-24 20:20:49 [manager.py:391] -ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:48 lightllm_req_id:8 first_token_cost:430.039644241333ms total_cost_time:430.0832748413086ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9901 prompt_cache_len:5151 prompt_cache_ratio:0.5202504797495202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 -DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10858631134033203 s -INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.11058497428894043 s -DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=273342720085073954242036592856850556877, time:1750767649.331593s req_ids:[8] -DEBUG 06-24 20:20:49 [manager.py:391] -ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:207.83519744873047ms total_cost_time:207.88049697875977ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9902 prompt_cache_len:5151 prompt_cache_ratio:0.5201979398101394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 -DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10814976692199707 s -INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.1094202995300293 s -DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=51326364993960170451700342698613703650, time:1750767649.5472248s req_ids:[8] -DEBUG 06-24 20:20:49 [manager.py:391] -ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:208.34589004516602ms total_cost_time:208.3895206451416ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9903 prompt_cache_len:5151 prompt_cache_ratio:0.5201454104816722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 -DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10816025733947754 s -INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.11025714874267578 s -DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=314088385529207409621295462312068046265, time:1750767649.7614405s req_ids:[8] -DEBUG 06-24 20:20:49 [manager.py:391] -ERROR 06-24 20:20:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:208.5108757019043ms total_cost_time:208.5866928100586ms,out_token_counter:1 mean_per_token_cost_time: 0.07581710815429688ms prompt_token_num:9904 prompt_cache_len:5151 prompt_cache_ratio:0.5200928917609047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 -DEBUG 06-24 20:20:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:49 [manager.py:224] router recive req id 8 cost time 0.10775375366210938 s -INFO 06-24 20:20:49 [manager.py:68] detokenization recv req id 8 cost time 0.10945653915405273 s -DEBUG 06-24 20:20:49 [manager.py:391] Prefill Batch: batch_id=173785239330932506444723210247542134277, time:1750767649.9750614s req_ids:[8] -DEBUG 06-24 20:20:49 [manager.py:391] -ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:49 lightllm_req_id:8 first_token_cost:206.7122459411621ms total_cost_time:206.75897598266602ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:9905 prompt_cache_len:5151 prompt_cache_ratio:0.5200403836446239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 -DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10947942733764648 s -INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.11140608787536621 s -DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=306505746546170394459764457884315686306, time:1750767650.1884894s req_ids:[8] -DEBUG 06-24 20:20:50 [manager.py:391] -ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:208.48727226257324ms total_cost_time:208.54687690734863ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:9906 prompt_cache_len:5151 prompt_cache_ratio:0.5199878861296184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 -DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10834908485412598 s -INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.11004447937011719 s -DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=277306760785593645747118556100421222228, time:1750767650.4037344s req_ids:[8] -DEBUG 06-24 20:20:50 [manager.py:391] -ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:206.25042915344238ms total_cost_time:206.29429817199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9907 prompt_cache_len:5151 prompt_cache_ratio:0.519935399212678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 -DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10816454887390137 s -INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.11022329330444336 s -DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=231771345956104574097618857690044203648, time:1750767650.615426s req_ids:[8] -DEBUG 06-24 20:20:50 [manager.py:391] -ERROR 06-24 20:20:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:380.59163093566895ms total_cost_time:380.63693046569824ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9908 prompt_cache_len:5151 prompt_cache_ratio:0.5198829228905935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 -DEBUG 06-24 20:20:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:50 [manager.py:224] router recive req id 8 cost time 0.10812735557556152 s -INFO 06-24 20:20:50 [manager.py:68] detokenization recv req id 8 cost time 0.10998082160949707 s -DEBUG 06-24 20:20:50 [manager.py:391] Prefill Batch: batch_id=261548013875112434465631004391562673004, time:1750767650.9996867s req_ids:[8] -DEBUG 06-24 20:20:50 [manager.py:391] -ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:50 lightllm_req_id:8 first_token_cost:162.41788864135742ms total_cost_time:162.46318817138672ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9909 prompt_cache_len:5151 prompt_cache_ratio:0.5198304571601574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 -DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.1088402271270752 s -INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.11102724075317383 s -DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=309018862987142025115431894535816575948, time:1750767651.1689436s req_ids:[8] -DEBUG 06-24 20:20:51 [manager.py:391] -ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:20:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 41378.152 tokens/s -DEBUG 06-24 20:20:51 [stats.py:37] Avg prompt tokens throughput: 41369.686 tokens/s -DEBUG 06-24 20:20:51 [stats.py:37] Avg generate tokens throughput: 8.466 tokens/s -INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:195.45292854309082ms total_cost_time:195.51658630371094ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:9910 prompt_cache_len:5151 prompt_cache_ratio:0.5197780020181635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 -DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.10827326774597168 s -INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000847816467285 s -DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=94489174169281886247160007134792499541, time:1750767651.370296s req_ids:[8] -DEBUG 06-24 20:20:51 [manager.py:391] -ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:198.7473964691162ms total_cost_time:198.7912654876709ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9911 prompt_cache_len:5151 prompt_cache_ratio:0.5197255574614065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 -DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.11065983772277832 s -INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.11197614669799805 s -DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=335807280223569339657691140883727269331, time:1750767651.5781727s req_ids:[8] -DEBUG 06-24 20:20:51 [manager.py:391] -ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:207.14592933654785ms total_cost_time:207.18955993652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9912 prompt_cache_len:5151 prompt_cache_ratio:0.5196731234866828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 -DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:51 [manager.py:224] router recive req id 8 cost time 0.10956597328186035 s -INFO 06-24 20:20:51 [manager.py:68] detokenization recv req id 8 cost time 0.1114192008972168 s -DEBUG 06-24 20:20:51 [manager.py:391] Prefill Batch: batch_id=320745414109206876440773494153284768016, time:1750767651.7941086s req_ids:[8] -DEBUG 06-24 20:20:51 [manager.py:391] -ERROR 06-24 20:20:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:210.07394790649414ms total_cost_time:210.11805534362793ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9913 prompt_cache_len:5151 prompt_cache_ratio:0.5196207000907899 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 -DEBUG 06-24 20:20:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.10977506637573242 s -INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.1116793155670166 s -DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=172244253849782513583953329480053825896, time:1750767652.0086656s req_ids:[8] -DEBUG 06-24 20:20:52 [manager.py:391] -ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:51 lightllm_req_id:8 first_token_cost:208.56976509094238ms total_cost_time:208.61244201660156ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9914 prompt_cache_len:5151 prompt_cache_ratio:0.5195682872705265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 -DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.10772299766540527 s -INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.10955381393432617 s -DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=270987464514143766364327673023628163689, time:1750767652.2292407s req_ids:[8] -DEBUG 06-24 20:20:52 [manager.py:391] -ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:215.58475494384766ms total_cost_time:215.62790870666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9915 prompt_cache_len:5151 prompt_cache_ratio:0.5195158850226929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 -DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.11062407493591309 s -INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.11256957054138184 s -DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=254126646303119867233221327869802306417, time:1750767652.445792s req_ids:[8] -DEBUG 06-24 20:20:52 [manager.py:391] -ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:217.43512153625488ms total_cost_time:217.48018264770508ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9916 prompt_cache_len:5151 prompt_cache_ratio:0.5194634933440904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 -DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:52 [manager.py:224] router recive req id 8 cost time 0.10887980461120605 s -INFO 06-24 20:20:52 [manager.py:68] detokenization recv req id 8 cost time 0.11092400550842285 s -DEBUG 06-24 20:20:52 [manager.py:391] Prefill Batch: batch_id=69246610673891094944948994819703108954, time:1750767652.6751306s req_ids:[8] -DEBUG 06-24 20:20:52 [manager.py:391] -ERROR 06-24 20:20:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:407.1533679962158ms total_cost_time:407.1977138519287ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9917 prompt_cache_len:5151 prompt_cache_ratio:0.5194111122315216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 -DEBUG 06-24 20:20:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10956811904907227 s -INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11143136024475098 s -DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=107666578661603905235050777174683000406, time:1750767653.0789864s req_ids:[8] -DEBUG 06-24 20:20:53 [manager.py:391] -ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:52 lightllm_req_id:8 first_token_cost:207.69238471984863ms total_cost_time:207.73577690124512ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9918 prompt_cache_len:5151 prompt_cache_ratio:0.5193587416817906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 -INFO 06-24 20:20:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:20:53 [statics_utils.py:24] mean first cost: 228.55945710240874 ms -INFO 06-24 20:20:53 [statics_utils.py:24] mean per token cost: 0.06977781880966027 ms -DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10997915267944336 s -INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11199450492858887 s -INFO 06-24 20:20:53 [manager.py:620] left req id 8can release False refcount 3 -DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=41412148109367468713269515340594385636, time:1750767653.2951975s req_ids:[8] -DEBUG 06-24 20:20:53 [manager.py:391] -ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:210.1731300354004ms total_cost_time:210.21795272827148ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9919 prompt_cache_len:5151 prompt_cache_ratio:0.5193063816917028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 -DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10846924781799316 s -INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11048078536987305 s -DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=22290365138575200200818037451803950864, time:1750767653.5114722s req_ids:[8] -DEBUG 06-24 20:20:53 [manager.py:391] -DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:207.9756259918213ms total_cost_time:208.0214023590088ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9920 prompt_cache_len:5151 prompt_cache_ratio:0.5192540322580645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 -DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10810089111328125 s -INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11024928092956543 s -DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=40686012056419928807309908434534712101, time:1750767653.7233381s req_ids:[8] -DEBUG 06-24 20:20:53 [manager.py:391] -ERROR 06-24 20:20:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:208.19664001464844ms total_cost_time:208.24265480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9921 prompt_cache_len:5151 prompt_cache_ratio:0.5192016933776837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 -DEBUG 06-24 20:20:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:53 [manager.py:224] router recive req id 8 cost time 0.10889339447021484 s -INFO 06-24 20:20:53 [manager.py:68] detokenization recv req id 8 cost time 0.11108589172363281 s -DEBUG 06-24 20:20:53 [manager.py:391] Prefill Batch: batch_id=297837684477516239657388566648269175007, time:1750767653.9395359s req_ids:[8] -DEBUG 06-24 20:20:53 [manager.py:391] -ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:53 lightllm_req_id:8 first_token_cost:209.65957641601562ms total_cost_time:209.70559120178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9922 prompt_cache_len:5151 prompt_cache_ratio:0.5191493650473695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 -DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10936951637268066 s -INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11138248443603516 s -DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=10970470048835329913858813642914984453, time:1750767654.1553917s req_ids:[8] -DEBUG 06-24 20:20:54 [manager.py:391] -ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:209.95163917541504ms total_cost_time:209.99693870544434ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9923 prompt_cache_len:5151 prompt_cache_ratio:0.5190970472639322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 -DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10816335678100586 s -INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012053489685059 s -DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=301056452421119756747039332133775401391, time:1750767654.3700624s req_ids:[8] -DEBUG 06-24 20:20:54 [manager.py:391] -ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:209.84601974487305ms total_cost_time:209.88988876342773ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9924 prompt_cache_len:5151 prompt_cache_ratio:0.5190447400241838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 -DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10878562927246094 s -INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11069750785827637 s -DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=135924630575370879627074432169980386401, time:1750767654.5877664s req_ids:[8] -DEBUG 06-24 20:20:54 [manager.py:391] -ERROR 06-24 20:20:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:369.5874214172363ms total_cost_time:369.63367462158203ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9925 prompt_cache_len:5151 prompt_cache_ratio:0.518992443324937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 -DEBUG 06-24 20:20:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:54 [manager.py:224] router recive req id 8 cost time 0.10813546180725098 s -INFO 06-24 20:20:54 [manager.py:68] detokenization recv req id 8 cost time 0.11005163192749023 s -DEBUG 06-24 20:20:54 [manager.py:391] Prefill Batch: batch_id=190992739326582510060511799598515707255, time:1750767654.959295s req_ids:[8] -DEBUG 06-24 20:20:54 [manager.py:391] -ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:54 lightllm_req_id:8 first_token_cost:207.5192928314209ms total_cost_time:207.5636386871338ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9926 prompt_cache_len:5151 prompt_cache_ratio:0.5189401571630062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 -DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10828781127929688 s -INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1102452278137207 s -DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=289454582770496445817708896174862167366, time:1750767655.1750066s req_ids:[8] -DEBUG 06-24 20:20:55 [manager.py:391] -ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:210.16550064086914ms total_cost_time:210.20984649658203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9927 prompt_cache_len:5151 prompt_cache_ratio:0.518887881535207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 -DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10693240165710449 s -INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1087641716003418 s -DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=280295909085302520328724668154749677591, time:1750767655.391663s req_ids:[8] -DEBUG 06-24 20:20:55 [manager.py:391] -ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:209.38515663146973ms total_cost_time:209.43093299865723ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:9928 prompt_cache_len:5151 prompt_cache_ratio:0.5188356164383562 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 -DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10798430442810059 s -INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1098330020904541 s -DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=15569096682967370885726323017318071405, time:1750767655.607369s req_ids:[8] -DEBUG 06-24 20:20:55 [manager.py:391] -ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:210.89863777160645ms total_cost_time:210.94465255737305ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9929 prompt_cache_len:5151 prompt_cache_ratio:0.5187833618692719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 -DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:55 [manager.py:224] router recive req id 8 cost time 0.10831308364868164 s -INFO 06-24 20:20:55 [manager.py:68] detokenization recv req id 8 cost time 0.1103055477142334 s -DEBUG 06-24 20:20:55 [manager.py:391] Prefill Batch: batch_id=278633271057273486695497892105762031023, time:1750767655.8240693s req_ids:[8] -DEBUG 06-24 20:20:55 [manager.py:391] -ERROR 06-24 20:20:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:207.535982131958ms total_cost_time:207.5803279876709ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9930 prompt_cache_len:5151 prompt_cache_ratio:0.5187311178247734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 -DEBUG 06-24 20:20:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.10807061195373535 s -INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11006784439086914 s -DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=60560146148668280968771922280226871867, time:1750767656.0383618s req_ids:[8] -DEBUG 06-24 20:20:56 [manager.py:391] -ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:55 lightllm_req_id:8 first_token_cost:211.9302749633789ms total_cost_time:211.9762897491455ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9931 prompt_cache_len:5151 prompt_cache_ratio:0.5186788843016816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 -DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s -INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11044812202453613 s -DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=11204574451932175979217886445579166321, time:1750767656.2615516s req_ids:[8] -DEBUG 06-24 20:20:56 [manager.py:391] -ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:178.88283729553223ms total_cost_time:178.9388656616211ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:9932 prompt_cache_len:5151 prompt_cache_ratio:0.5186266612968183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 -DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.10929179191589355 s -INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11136865615844727 s -DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=198318554860585403016489114115363768310, time:1750767656.4393396s req_ids:[8] -DEBUG 06-24 20:20:56 [manager.py:391] -ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:371.33073806762695ms total_cost_time:371.37532234191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9933 prompt_cache_len:5151 prompt_cache_ratio:0.518574448807007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 -DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:56 [manager.py:224] router recive req id 8 cost time 0.10884523391723633 s -INFO 06-24 20:20:56 [manager.py:68] detokenization recv req id 8 cost time 0.11069107055664062 s -DEBUG 06-24 20:20:56 [manager.py:391] Prefill Batch: batch_id=330590804664262865529786039201684416981, time:1750767656.8159032s req_ids:[8] -DEBUG 06-24 20:20:56 [manager.py:391] -ERROR 06-24 20:20:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:188.62175941467285ms total_cost_time:188.66705894470215ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9934 prompt_cache_len:5151 prompt_cache_ratio:0.5185222468290719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 -DEBUG 06-24 20:20:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10749483108520508 s -INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.10854244232177734 s -DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=123965026933437333118654804617355171576, time:1750767657.0100503s req_ids:[8] -DEBUG 06-24 20:20:57 [manager.py:391] -ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:56 lightllm_req_id:8 first_token_cost:206.3882350921631ms total_cost_time:206.43091201782227ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9935 prompt_cache_len:5151 prompt_cache_ratio:0.5184700553598389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 -DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.1088860034942627 s -INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.11084103584289551 s -DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=173571683204316120938873125343524767670, time:1750767657.2216396s req_ids:[8] -DEBUG 06-24 20:20:57 [manager.py:391] -ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:206.2366008758545ms total_cost_time:206.2821388244629ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9936 prompt_cache_len:5151 prompt_cache_ratio:0.5184178743961353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 -DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10791563987731934 s -INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.1096951961517334 s -DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=111840610641777610316120718853855267976, time:1750767657.434745s req_ids:[8] -DEBUG 06-24 20:20:57 [manager.py:391] -ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:168.1978702545166ms total_cost_time:168.23863983154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:9937 prompt_cache_len:5151 prompt_cache_ratio:0.5183657039347892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 -DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10871315002441406 s -INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.11058807373046875 s -DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=110501370039389872827039626051623097239, time:1750767657.6085336s req_ids:[8] -DEBUG 06-24 20:20:57 [manager.py:391] -ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:200.1338005065918ms total_cost_time:200.17623901367188ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9938 prompt_cache_len:5151 prompt_cache_ratio:0.5183135439726303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 -DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:57 [manager.py:224] router recive req id 8 cost time 0.10793113708496094 s -INFO 06-24 20:20:57 [manager.py:68] detokenization recv req id 8 cost time 0.10984683036804199 s -DEBUG 06-24 20:20:57 [manager.py:391] Prefill Batch: batch_id=10193515210701057199399957373723113977, time:1750767657.813302s req_ids:[8] -DEBUG 06-24 20:20:57 [manager.py:391] -ERROR 06-24 20:20:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:199.4149684906006ms total_cost_time:199.45907592773438ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9939 prompt_cache_len:5151 prompt_cache_ratio:0.5182613945064896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 -DEBUG 06-24 20:20:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10789108276367188 s -INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.10990023612976074 s -DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=258878670054681111014811377985024597995, time:1750767658.020507s req_ids:[8] -DEBUG 06-24 20:20:58 [manager.py:391] -ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:57 lightllm_req_id:8 first_token_cost:209.38777923583984ms total_cost_time:209.43045616149902ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9940 prompt_cache_len:5151 prompt_cache_ratio:0.5182092555331992 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 -DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s -INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.11026382446289062 s -DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=40536146178302451164866466184241225559, time:1750767658.2347372s req_ids:[8] -DEBUG 06-24 20:20:58 [manager.py:391] -ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:369.07172203063965ms total_cost_time:369.11606788635254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:9941 prompt_cache_len:5151 prompt_cache_ratio:0.5181571270495926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 -DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10814547538757324 s -INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s -DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=266016124966263148606793095131775562043, time:1750767658.6085687s req_ids:[8] -DEBUG 06-24 20:20:58 [manager.py:391] -ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:203.87506484985352ms total_cost_time:203.92107963562012ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9942 prompt_cache_len:5151 prompt_cache_ratio:0.5181050090525046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 -DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:58 [manager.py:224] router recive req id 8 cost time 0.10862040519714355 s -INFO 06-24 20:20:58 [manager.py:68] detokenization recv req id 8 cost time 0.11054205894470215 s -DEBUG 06-24 20:20:58 [manager.py:391] Prefill Batch: batch_id=303657712685032260571406807054286837166, time:1750767658.8234777s req_ids:[8] -DEBUG 06-24 20:20:58 [manager.py:391] -ERROR 06-24 20:20:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:207.8866958618164ms total_cost_time:207.9448699951172ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:9943 prompt_cache_len:5151 prompt_cache_ratio:0.518052901538771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 -DEBUG 06-24 20:20:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10811805725097656 s -INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.10971856117248535 s -DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=36443218352821080560374997261471680640, time:1750767659.036104s req_ids:[8] -DEBUG 06-24 20:20:59 [manager.py:391] -ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:58 lightllm_req_id:8 first_token_cost:207.86786079406738ms total_cost_time:207.91387557983398ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:9944 prompt_cache_len:5151 prompt_cache_ratio:0.5180008045052292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 -DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10793256759643555 s -INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.10988950729370117 s -DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=183136565938974985875453033110202944807, time:1750767659.2506392s req_ids:[8] -DEBUG 06-24 20:20:59 [manager.py:391] -DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:20:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:209.81597900390625ms total_cost_time:209.86056327819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9945 prompt_cache_len:5151 prompt_cache_ratio:0.517948717948718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 -DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10762524604797363 s -INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.1095426082611084 s -DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=132336491755839788229827732163589310296, time:1750767659.466637s req_ids:[8] -DEBUG 06-24 20:20:59 [manager.py:391] -ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:209.54632759094238ms total_cost_time:209.59830284118652ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:9946 prompt_cache_len:5151 prompt_cache_ratio:0.5178966418660769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 -DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:59 [batch.py:51] router release req id 8 -INFO 06-24 20:20:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.10875463485717773 s -INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.11060976982116699 s -DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=181758097636460314387901909898746724908, time:1750767659.682529s req_ids:[8] -DEBUG 06-24 20:20:59 [manager.py:391] -ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:209.00297164916992ms total_cost_time:209.04827117919922ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9947 prompt_cache_len:5151 prompt_cache_ratio:0.5178445762541469 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 -DEBUG 06-24 20:20:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:20:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:20:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:20:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:20:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:20:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:20:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:20:59 [manager.py:224] router recive req id 8 cost time 0.11018967628479004 s -INFO 06-24 20:20:59 [manager.py:68] detokenization recv req id 8 cost time 0.11212468147277832 s -DEBUG 06-24 20:20:59 [manager.py:391] Prefill Batch: batch_id=273274709059460904365627622650169209396, time:1750767659.8981586s req_ids:[8] -DEBUG 06-24 20:20:59 [manager.py:391] -ERROR 06-24 20:20:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:20:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:20:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:204.4076919555664ms total_cost_time:204.451322555542ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9948 prompt_cache_len:5151 prompt_cache_ratio:0.5177925211097708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:20:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 -DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10813689231872559 s -INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.11008691787719727 s -DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=185222268399279423144052114080611671725, time:1750767660.1057796s req_ids:[8] -DEBUG 06-24 20:21:00 [manager.py:391] -ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:20:59 lightllm_req_id:8 first_token_cost:204.7281265258789ms total_cost_time:204.7715187072754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9949 prompt_cache_len:5151 prompt_cache_ratio:0.5177404764297919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 -DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10974764823913574 s -INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.1116189956665039 s -DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=332464861543650424361201588821276783734, time:1750767660.3179812s req_ids:[8] -DEBUG 06-24 20:21:00 [manager.py:391] -ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:356.2922477722168ms total_cost_time:356.3370704650879ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9950 prompt_cache_len:5151 prompt_cache_ratio:0.5176884422110553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 -DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10858464241027832 s -INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s -DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=129166784603824183022138822911491398708, time:1750767660.6817322s req_ids:[8] -DEBUG 06-24 20:21:00 [manager.py:391] -ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:202.83007621765137ms total_cost_time:202.87060737609863ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:9951 prompt_cache_len:5151 prompt_cache_ratio:0.517636418450407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 -DEBUG 06-24 20:21:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:00 [manager.py:224] router recive req id 8 cost time 0.10750055313110352 s -INFO 06-24 20:21:00 [manager.py:68] detokenization recv req id 8 cost time 0.10927462577819824 s -DEBUG 06-24 20:21:00 [manager.py:391] Prefill Batch: batch_id=139641029111335068301324851956824432353, time:1750767660.8900936s req_ids:[8] -DEBUG 06-24 20:21:00 [manager.py:391] -ERROR 06-24 20:21:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:204.05268669128418ms total_cost_time:204.09727096557617ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9952 prompt_cache_len:5151 prompt_cache_ratio:0.5175844051446945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 -DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.1095573902130127 s -INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.11144638061523438 s -DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=26394718543908845080344080704352155005, time:1750767661.1009364s req_ids:[8] -DEBUG 06-24 20:21:01 [manager.py:391] -ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:00 lightllm_req_id:8 first_token_cost:204.1783332824707ms total_cost_time:204.23626899719238ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:9953 prompt_cache_len:5151 prompt_cache_ratio:0.5175324022907666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 -DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.10763168334960938 s -INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.1095285415649414 s -DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=129480446570101706200776640659649642876, time:1750767661.3101473s req_ids:[8] -DEBUG 06-24 20:21:01 [manager.py:391] -DEBUG 06-24 20:21:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 43438.683 tokens/s -DEBUG 06-24 20:21:01 [stats.py:37] Avg prompt tokens throughput: 43430.037 tokens/s -DEBUG 06-24 20:21:01 [stats.py:37] Avg generate tokens throughput: 8.646 tokens/s -ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:209.00321006774902ms total_cost_time:209.04970169067383ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:9954 prompt_cache_len:5151 prompt_cache_ratio:0.5174804098854732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 -DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.11225128173828125 s -INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.11424922943115234 s -DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=234080343148501379406998050925219224952, time:1750767661.536944s req_ids:[8] -DEBUG 06-24 20:21:01 [manager.py:391] -ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:224.49326515197754ms total_cost_time:224.53832626342773ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:9955 prompt_cache_len:5151 prompt_cache_ratio:0.5174284279256655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 -DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.10790848731994629 s -INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.10982251167297363 s -DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=60172369518621274140902527900406187761, time:1750767661.7570713s req_ids:[8] -DEBUG 06-24 20:21:01 [manager.py:391] -ERROR 06-24 20:21:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:205.19328117370605ms total_cost_time:205.23881912231445ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9956 prompt_cache_len:5151 prompt_cache_ratio:0.5173764564081961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 -DEBUG 06-24 20:21:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:01 [manager.py:224] router recive req id 8 cost time 0.10923910140991211 s -INFO 06-24 20:21:01 [manager.py:68] detokenization recv req id 8 cost time 0.11113166809082031 s -DEBUG 06-24 20:21:01 [manager.py:391] Prefill Batch: batch_id=112305038055508124378197539320394231454, time:1750767661.9681523s req_ids:[8] -DEBUG 06-24 20:21:01 [manager.py:391] -ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:01 lightllm_req_id:8 first_token_cost:208.34994316101074ms total_cost_time:208.39309692382812ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9957 prompt_cache_len:5151 prompt_cache_ratio:0.5173244953299186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 -DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10889005661010742 s -INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.11090254783630371 s -DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=69800990742856507034415933235353276116, time:1750767662.1824415s req_ids:[8] -DEBUG 06-24 20:21:02 [manager.py:391] -ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:371.83356285095215ms total_cost_time:371.87647819519043ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9958 prompt_cache_len:5151 prompt_cache_ratio:0.5172725446876882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 -DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10792708396911621 s -INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.10993671417236328 s -DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=118423450614302879731655470062462990466, time:1750767662.5589702s req_ids:[8] -DEBUG 06-24 20:21:02 [manager.py:391] -ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:205.38663864135742ms total_cost_time:205.4286003112793ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9959 prompt_cache_len:5151 prompt_cache_ratio:0.5172206044783613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 -DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10891938209533691 s -INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.11082792282104492 s -DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=162848021179699535963005010316348445442, time:1750767662.7718565s req_ids:[8] -DEBUG 06-24 20:21:02 [manager.py:391] -ERROR 06-24 20:21:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:207.62181282043457ms total_cost_time:207.66496658325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:9960 prompt_cache_len:5151 prompt_cache_ratio:0.5171686746987951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 -DEBUG 06-24 20:21:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:02 [manager.py:224] router recive req id 8 cost time 0.10903406143188477 s -INFO 06-24 20:21:02 [manager.py:68] detokenization recv req id 8 cost time 0.11103057861328125 s -DEBUG 06-24 20:21:02 [manager.py:391] Prefill Batch: batch_id=172428507386319781555218910924323746751, time:1750767662.9845843s req_ids:[8] -DEBUG 06-24 20:21:02 [manager.py:391] -ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:02 lightllm_req_id:8 first_token_cost:207.55815505981445ms total_cost_time:207.60202407836914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9961 prompt_cache_len:5151 prompt_cache_ratio:0.5171167553458488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 -DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:03 [batch.py:51] router release req id 8 -INFO 06-24 20:21:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10780954360961914 s -INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.10966944694519043 s -DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=24463816073667950368156730698820009816, time:1750767663.1980839s req_ids:[8] -DEBUG 06-24 20:21:03 [manager.py:391] -ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:206.94231986999512ms total_cost_time:206.9871425628662ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9962 prompt_cache_len:5151 prompt_cache_ratio:0.5170648464163823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 -DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10786771774291992 s -INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.10973930358886719 s -DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=169975598456152039738536182986348594070, time:1750767663.410929s req_ids:[8] -DEBUG 06-24 20:21:03 [manager.py:391] -ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:209.29884910583496ms total_cost_time:209.33914184570312ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:9963 prompt_cache_len:5151 prompt_cache_ratio:0.5170129479072568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 -DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10853981971740723 s -INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.11044859886169434 s -DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=143862426211613561403549361754594968269, time:1750767663.6276522s req_ids:[8] -DEBUG 06-24 20:21:03 [manager.py:391] -ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:209.75875854492188ms total_cost_time:209.80286598205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9964 prompt_cache_len:5151 prompt_cache_ratio:0.5169610598153352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 -DEBUG 06-24 20:21:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:03 [manager.py:224] router recive req id 8 cost time 0.10861992835998535 s -INFO 06-24 20:21:03 [manager.py:68] detokenization recv req id 8 cost time 0.1104421615600586 s -DEBUG 06-24 20:21:03 [manager.py:391] Prefill Batch: batch_id=93552411160066230399512086584808525191, time:1750767663.8560488s req_ids:[8] -DEBUG 06-24 20:21:03 [manager.py:391] -ERROR 06-24 20:21:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:224.93457794189453ms total_cost_time:224.9772548675537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9965 prompt_cache_len:5151 prompt_cache_ratio:0.5169091821374812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 -DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.30924248695373535 s -INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.31040191650390625 s -DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=279714164212704705235431471574101401370, time:1750767664.2726552s req_ids:[8] -DEBUG 06-24 20:21:04 [manager.py:391] -ERROR 06-24 20:21:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:03 lightllm_req_id:8 first_token_cost:413.6035442352295ms total_cost_time:413.6490821838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9966 prompt_cache_len:5151 prompt_cache_ratio:0.5168573148705599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 -DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s -INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.10991311073303223 s -DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=135690711032428744047720222660508084969, time:1750767664.491355s req_ids:[8] -DEBUG 06-24 20:21:04 [manager.py:391] -ERROR 06-24 20:21:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 first_token_cost:209.7623348236084ms total_cost_time:209.8076343536377ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9967 prompt_cache_len:5151 prompt_cache_ratio:0.5168054580114377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 -DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.11158490180969238 s -INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.11353611946105957 s -DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=183748781579142167714979272542056348758, time:1750767664.7064996s req_ids:[8] -DEBUG 06-24 20:21:04 [manager.py:391] -ERROR 06-24 20:21:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 first_token_cost:209.8398208618164ms total_cost_time:209.8827362060547ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9968 prompt_cache_len:5151 prompt_cache_ratio:0.5167536115569823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 -DEBUG 06-24 20:21:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:04 [manager.py:224] router recive req id 8 cost time 0.10816454887390137 s -INFO 06-24 20:21:04 [manager.py:68] detokenization recv req id 8 cost time 0.11001324653625488 s -DEBUG 06-24 20:21:04 [manager.py:391] Prefill Batch: batch_id=282814132950444738923495225033291252789, time:1750767664.9222932s req_ids:[8] -DEBUG 06-24 20:21:04 [manager.py:391] -ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:04 lightllm_req_id:8 first_token_cost:210.71791648864746ms total_cost_time:210.77871322631836ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:9969 prompt_cache_len:5151 prompt_cache_ratio:0.5167017755040626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 -DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.10755729675292969 s -INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.10951709747314453 s -DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=23388699106980153411544501890576106891, time:1750767665.1380618s req_ids:[8] -DEBUG 06-24 20:21:05 [manager.py:391] -ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:208.3725929260254ms total_cost_time:208.41360092163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:9970 prompt_cache_len:5151 prompt_cache_ratio:0.5166499498495486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 -DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.1087045669555664 s -INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.11057806015014648 s -DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=22878211419665127922682799821278860024, time:1750767665.3526773s req_ids:[8] -DEBUG 06-24 20:21:05 [manager.py:391] -ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:209.4407081604004ms total_cost_time:209.49554443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:9971 prompt_cache_len:5151 prompt_cache_ratio:0.5165981345903119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 -DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.10913610458374023 s -INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.11117863655090332 s -DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=232207132289008827513977614181120207721, time:1750767665.567956s req_ids:[8] -DEBUG 06-24 20:21:05 [manager.py:391] -ERROR 06-24 20:21:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:208.01281929016113ms total_cost_time:208.0550193786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9972 prompt_cache_len:5151 prompt_cache_ratio:0.516546329723225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 -DEBUG 06-24 20:21:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:05 [manager.py:224] router recive req id 8 cost time 0.10769248008728027 s -INFO 06-24 20:21:05 [manager.py:68] detokenization recv req id 8 cost time 0.10959625244140625 s -DEBUG 06-24 20:21:05 [manager.py:391] Prefill Batch: batch_id=215006537970871500549139802932536460796, time:1750767665.7822976s req_ids:[8] -DEBUG 06-24 20:21:05 [manager.py:391] -ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:05 lightllm_req_id:8 first_token_cost:377.66075134277344ms total_cost_time:377.70533561706543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9973 prompt_cache_len:5151 prompt_cache_ratio:0.516494535245162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 -DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.10881829261779785 s -INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.11079621315002441 s -DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=17100289891285958506334936168275958895, time:1750767666.1631534s req_ids:[8] -DEBUG 06-24 20:21:06 [manager.py:391] -ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:206.4826488494873ms total_cost_time:206.52484893798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9974 prompt_cache_len:5151 prompt_cache_ratio:0.5164427511529978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 -DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.10811972618103027 s -INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.11015677452087402 s -DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=179813701737561185777273573297107603470, time:1750767666.37906s req_ids:[8] -DEBUG 06-24 20:21:06 [manager.py:391] -ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:209.8236083984375ms total_cost_time:209.8681926727295ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9975 prompt_cache_len:5151 prompt_cache_ratio:0.5163909774436091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 -DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.1076042652130127 s -INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.10960030555725098 s -DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=250937847908470706637058327255795703214, time:1750767666.594391s req_ids:[8] -DEBUG 06-24 20:21:06 [manager.py:391] -DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:220.86191177368164ms total_cost_time:220.90411186218262ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9976 prompt_cache_len:5151 prompt_cache_ratio:0.5163392141138733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 -DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:06 [manager.py:224] router recive req id 8 cost time 0.10848808288574219 s -INFO 06-24 20:21:06 [manager.py:68] detokenization recv req id 8 cost time 0.11041259765625 s -DEBUG 06-24 20:21:06 [manager.py:391] Prefill Batch: batch_id=168796196435107336272721574124022867492, time:1750767666.8322642s req_ids:[8] -DEBUG 06-24 20:21:06 [manager.py:391] -ERROR 06-24 20:21:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:222.2137451171875ms total_cost_time:222.25642204284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:9977 prompt_cache_len:5151 prompt_cache_ratio:0.5162874611606696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 -DEBUG 06-24 20:21:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s -INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.10986924171447754 s -DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=307576314839161240363538225544365554187, time:1750767667.0494096s req_ids:[8] -DEBUG 06-24 20:21:07 [manager.py:391] -ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:06 lightllm_req_id:8 first_token_cost:206.03346824645996ms total_cost_time:206.07829093933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9978 prompt_cache_len:5151 prompt_cache_ratio:0.5162357185808779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 -DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s -INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.11013531684875488 s -DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=108183174978379412043035103833611140736, time:1750767667.2684855s req_ids:[8] -DEBUG 06-24 20:21:07 [manager.py:391] -ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:219.07758712768555ms total_cost_time:219.12288665771484ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:9979 prompt_cache_len:5151 prompt_cache_ratio:0.5161839863713799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 -DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10893917083740234 s -INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.11098456382751465 s -DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=282577177561919562890152058072844039607, time:1750767667.4852571s req_ids:[8] -DEBUG 06-24 20:21:07 [manager.py:391] -ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:210.87145805358887ms total_cost_time:210.91866493225098ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:9980 prompt_cache_len:5151 prompt_cache_ratio:0.5161322645290581 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 -DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10757708549499512 s -INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s -DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=120046370712717207346296916590439979355, time:1750767667.6976185s req_ids:[8] -DEBUG 06-24 20:21:07 [manager.py:391] -ERROR 06-24 20:21:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:204.37121391296387ms total_cost_time:204.41675186157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9981 prompt_cache_len:5151 prompt_cache_ratio:0.5160805530507965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 -DEBUG 06-24 20:21:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:07 [manager.py:224] router recive req id 8 cost time 0.10858297348022461 s -INFO 06-24 20:21:07 [manager.py:68] detokenization recv req id 8 cost time 0.1105186939239502 s -DEBUG 06-24 20:21:07 [manager.py:391] Prefill Batch: batch_id=222506289079808776796965038722241532514, time:1750767667.9145157s req_ids:[8] -DEBUG 06-24 20:21:07 [manager.py:391] -ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:07 lightllm_req_id:8 first_token_cost:375.5908012390137ms total_cost_time:375.63633918762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9982 prompt_cache_len:5151 prompt_cache_ratio:0.5160288519334802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 -DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10915088653564453 s -INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.1110074520111084 s -DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=94483011821250945046934461359916865649, time:1750767668.288998s req_ids:[8] -DEBUG 06-24 20:21:08 [manager.py:391] -ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:203.04465293884277ms total_cost_time:203.09019088745117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:9983 prompt_cache_len:5151 prompt_cache_ratio:0.5159771611739958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 -DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10854697227478027 s -INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.11046671867370605 s -DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=5297760170437189769376425518692500176, time:1750767668.5004077s req_ids:[8] -DEBUG 06-24 20:21:08 [manager.py:391] -ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:207.02123641967773ms total_cost_time:207.06605911254883ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:9984 prompt_cache_len:5151 prompt_cache_ratio:0.5159254807692307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 -DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10822296142578125 s -INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.11028051376342773 s -DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=276812833751982453210791032269573598053, time:1750767668.7128441s req_ids:[8] -DEBUG 06-24 20:21:08 [manager.py:391] -ERROR 06-24 20:21:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:207.3996067047119ms total_cost_time:207.41605758666992ms,out_token_counter:1 mean_per_token_cost_time: 0.016450881958007812ms prompt_token_num:9985 prompt_cache_len:5151 prompt_cache_ratio:0.5158738107160741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 -DEBUG 06-24 20:21:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:08 [manager.py:224] router recive req id 8 cost time 0.10799598693847656 s -INFO 06-24 20:21:08 [manager.py:68] detokenization recv req id 8 cost time 0.11006879806518555 s -DEBUG 06-24 20:21:08 [manager.py:391] Prefill Batch: batch_id=39765604879132248614607728293641060354, time:1750767668.9272919s req_ids:[8] -DEBUG 06-24 20:21:08 [manager.py:391] -ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:08 lightllm_req_id:8 first_token_cost:211.12680435180664ms total_cost_time:211.16971969604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:9986 prompt_cache_len:5151 prompt_cache_ratio:0.5158221510114159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 -DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10746955871582031 s -INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.10936903953552246 s -DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=294372783429016060194793841420548569083, time:1750767669.1445508s req_ids:[8] -DEBUG 06-24 20:21:09 [manager.py:391] -ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:207.17644691467285ms total_cost_time:207.22270011901855ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:9987 prompt_cache_len:5151 prompt_cache_ratio:0.5157705016521478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 -DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10744261741638184 s -INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.1091310977935791 s -DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=140881631131160804137596483978217227902, time:1750767669.354957s req_ids:[8] -DEBUG 06-24 20:21:09 [manager.py:391] -ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:163.1300449371338ms total_cost_time:163.17367553710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9988 prompt_cache_len:5151 prompt_cache_ratio:0.5157188626351622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 -DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10692048072814941 s -INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.10885047912597656 s -DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=138661740903121516764081382344389267930, time:1750767669.5257733s req_ids:[8] -DEBUG 06-24 20:21:09 [manager.py:391] -ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:195.5397129058838ms total_cost_time:195.58215141296387ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9989 prompt_cache_len:5151 prompt_cache_ratio:0.5156672339573531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 -DEBUG 06-24 20:21:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:09 [manager.py:224] router recive req id 8 cost time 0.10903811454772949 s -INFO 06-24 20:21:09 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s -DEBUG 06-24 20:21:09 [manager.py:391] Prefill Batch: batch_id=166849256525591806119343712592361928164, time:1750767669.7309976s req_ids:[8] -DEBUG 06-24 20:21:09 [manager.py:391] -ERROR 06-24 20:21:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:363.4190559387207ms total_cost_time:363.4629249572754ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9990 prompt_cache_len:5151 prompt_cache_ratio:0.5156156156156156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 -DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10833311080932617 s -INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.1096644401550293 s -DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=57587758008193816251025902434036326038, time:1750767670.0981374s req_ids:[8] -DEBUG 06-24 20:21:10 [manager.py:391] -ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:09 lightllm_req_id:8 first_token_cost:204.8776149749756ms total_cost_time:204.91981506347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:9991 prompt_cache_len:5151 prompt_cache_ratio:0.5155640076068462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 -DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:10 [batch.py:51] router release req id 8 -INFO 06-24 20:21:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10796904563903809 s -INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10923027992248535 s -DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=280835558985208009245880622555525650595, time:1750767670.3100107s req_ids:[8] -DEBUG 06-24 20:21:10 [manager.py:391] -ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:203.52697372436523ms total_cost_time:203.57155799865723ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:9992 prompt_cache_len:5151 prompt_cache_ratio:0.5155124099279423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 -DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s -INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10964322090148926 s -DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=90450806788128003340951744083752884362, time:1750767670.520659s req_ids:[8] -DEBUG 06-24 20:21:10 [manager.py:391] -ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:205.32751083374023ms total_cost_time:205.3694725036621ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9993 prompt_cache_len:5151 prompt_cache_ratio:0.5154608225758031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 -DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.10808920860290527 s -INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10940265655517578 s -DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=180899965111927181935825818141597714707, time:1750767670.742921s req_ids:[8] -DEBUG 06-24 20:21:10 [manager.py:391] -ERROR 06-24 20:21:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:219.99502182006836ms total_cost_time:220.03912925720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:9994 prompt_cache_len:5151 prompt_cache_ratio:0.5154092455473284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 -DEBUG 06-24 20:21:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:10 [manager.py:224] router recive req id 8 cost time 0.1080024242401123 s -INFO 06-24 20:21:10 [manager.py:68] detokenization recv req id 8 cost time 0.10934114456176758 s -DEBUG 06-24 20:21:10 [manager.py:391] Prefill Batch: batch_id=40104557225523290884687562116290541949, time:1750767670.958038s req_ids:[8] -DEBUG 06-24 20:21:10 [manager.py:391] -ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:10 lightllm_req_id:8 first_token_cost:208.94765853881836ms total_cost_time:208.99105072021484ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:9995 prompt_cache_len:5151 prompt_cache_ratio:0.5153576788394197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 -DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10812520980834961 s -INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.10949373245239258 s -DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=241801547083469787472972769137870442853, time:1750767671.1746902s req_ids:[8] -DEBUG 06-24 20:21:11 [manager.py:391] -ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:207.68404006958008ms total_cost_time:207.72790908813477ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:9996 prompt_cache_len:5151 prompt_cache_ratio:0.5153061224489796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 -DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10889649391174316 s -INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.11012411117553711 s -DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=58879665799645463204103263682261571893, time:1750767671.3969471s req_ids:[8] -DEBUG 06-24 20:21:11 [manager.py:391] -DEBUG 06-24 20:21:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 42536.888 tokens/s -DEBUG 06-24 20:21:11 [stats.py:37] Avg prompt tokens throughput: 42528.362 tokens/s -DEBUG 06-24 20:21:11 [stats.py:37] Avg generate tokens throughput: 8.526 tokens/s -ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:220.68381309509277ms total_cost_time:220.72625160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:9997 prompt_cache_len:5151 prompt_cache_ratio:0.5152545763729118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 -DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10813021659851074 s -INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.10937356948852539 s -DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=38477199961946528073559778566850430395, time:1750767671.6136253s req_ids:[8] -DEBUG 06-24 20:21:11 [manager.py:391] -ERROR 06-24 20:21:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:373.7192153930664ms total_cost_time:373.762845993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:9998 prompt_cache_len:5151 prompt_cache_ratio:0.5152030406081216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 -DEBUG 06-24 20:21:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:11 [manager.py:224] router recive req id 8 cost time 0.10929417610168457 s -INFO 06-24 20:21:11 [manager.py:68] detokenization recv req id 8 cost time 0.11058855056762695 s -DEBUG 06-24 20:21:11 [manager.py:391] Prefill Batch: batch_id=203684489208498424483161072921823719551, time:1750767671.9913633s req_ids:[8] -DEBUG 06-24 20:21:11 [manager.py:391] -ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:11 lightllm_req_id:8 first_token_cost:207.56864547729492ms total_cost_time:207.6106071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:9999 prompt_cache_len:5151 prompt_cache_ratio:0.5151515151515151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 -DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.11094117164611816 s -INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.11201906204223633 s -DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=196829641810847984290401703276510273359, time:1750767672.2064517s req_ids:[8] -DEBUG 06-24 20:21:12 [manager.py:391] -ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:208.51469039916992ms total_cost_time:208.59551429748535ms,out_token_counter:1 mean_per_token_cost_time: 0.08082389831542969ms prompt_token_num:10000 prompt_cache_len:5151 prompt_cache_ratio:0.5151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 -DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.10743951797485352 s -INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.10855865478515625 s -DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=243736951514448719717173695058871981784, time:1750767672.4224358s req_ids:[8] -DEBUG 06-24 20:21:12 [manager.py:391] -DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:208.41240882873535ms total_cost_time:208.45556259155273ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10001 prompt_cache_len:5151 prompt_cache_ratio:0.5150484951504849 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 -DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.10831999778747559 s -INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.10950946807861328 s -DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=207492440192782490309536909041136545524, time:1750767672.6367188s req_ids:[8] -DEBUG 06-24 20:21:12 [manager.py:391] -ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:207.8382968902588ms total_cost_time:207.88145065307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10002 prompt_cache_len:5151 prompt_cache_ratio:0.5149970005998801 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 -DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:12 [manager.py:224] router recive req id 8 cost time 0.10789656639099121 s -INFO 06-24 20:21:12 [manager.py:68] detokenization recv req id 8 cost time 0.10902714729309082 s -DEBUG 06-24 20:21:12 [manager.py:391] Prefill Batch: batch_id=190319853639039492489269438048347113176, time:1750767672.8516507s req_ids:[8] -DEBUG 06-24 20:21:12 [manager.py:391] -ERROR 06-24 20:21:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:209.66863632202148ms total_cost_time:209.72084999084473ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:10003 prompt_cache_len:5151 prompt_cache_ratio:0.5149455163450964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 -DEBUG 06-24 20:21:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.10807418823242188 s -INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10923933982849121 s -DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=221056692862367080316260394023245105224, time:1750767673.0665367s req_ids:[8] -DEBUG 06-24 20:21:13 [manager.py:391] -ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:12 lightllm_req_id:8 first_token_cost:208.76836776733398ms total_cost_time:208.81104469299316ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10004 prompt_cache_len:5151 prompt_cache_ratio:0.5148940423830468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 -DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.1076042652130127 s -INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10877776145935059 s -DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=320674779231369677908385547073158256476, time:1750767673.2812364s req_ids:[8] -DEBUG 06-24 20:21:13 [manager.py:391] -ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:207.15618133544922ms total_cost_time:207.2000503540039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10005 prompt_cache_len:5151 prompt_cache_ratio:0.5148425787106446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 -DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.10855579376220703 s -INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10974597930908203 s -DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=33524976610381482095798127027695539967, time:1750767673.4932196s req_ids:[8] -DEBUG 06-24 20:21:13 [manager.py:391] -ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:371.32978439331055ms total_cost_time:371.37413024902344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10006 prompt_cache_len:5151 prompt_cache_ratio:0.5147911253248051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 -DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:13 [manager.py:224] router recive req id 8 cost time 0.10773015022277832 s -INFO 06-24 20:21:13 [manager.py:68] detokenization recv req id 8 cost time 0.10886120796203613 s -DEBUG 06-24 20:21:13 [manager.py:391] Prefill Batch: batch_id=333394434092002072566714290280727377900, time:1750767673.8692977s req_ids:[8] -DEBUG 06-24 20:21:13 [manager.py:391] -ERROR 06-24 20:21:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:207.31043815612793ms total_cost_time:207.352876663208ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10007 prompt_cache_len:5151 prompt_cache_ratio:0.5147396822224443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 -DEBUG 06-24 20:21:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.1076500415802002 s -INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10875654220581055 s -DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=309755671929120129278934526914585655267, time:1750767674.0825815s req_ids:[8] -DEBUG 06-24 20:21:14 [manager.py:391] -ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:13 lightllm_req_id:8 first_token_cost:209.74969863891602ms total_cost_time:209.794282913208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10008 prompt_cache_len:5151 prompt_cache_ratio:0.5146882494004796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 -DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10811305046081543 s -INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10917401313781738 s -DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=1713855337256672981006411168032889607, time:1750767674.2980914s req_ids:[8] -DEBUG 06-24 20:21:14 [manager.py:391] -ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:207.72957801818848ms total_cost_time:207.77130126953125ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10009 prompt_cache_len:5151 prompt_cache_ratio:0.5146368268558298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 -DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10766983032226562 s -INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10874605178833008 s -DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=55819940519552811489077764129527821583, time:1750767674.5118341s req_ids:[8] -DEBUG 06-24 20:21:14 [manager.py:391] -ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:208.18305015563965ms total_cost_time:208.22548866271973ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10010 prompt_cache_len:5151 prompt_cache_ratio:0.5145854145854146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 -DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10849499702453613 s -INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10955595970153809 s -DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=272493082653217517005898172507864264139, time:1750767674.7393186s req_ids:[8] -DEBUG 06-24 20:21:14 [manager.py:391] -ERROR 06-24 20:21:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:226.426362991333ms total_cost_time:226.4697551727295ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10011 prompt_cache_len:5151 prompt_cache_ratio:0.5145340125861553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 -DEBUG 06-24 20:21:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:14 [manager.py:224] router recive req id 8 cost time 0.10840082168579102 s -INFO 06-24 20:21:14 [manager.py:68] detokenization recv req id 8 cost time 0.10949969291687012 s -DEBUG 06-24 20:21:14 [manager.py:391] Prefill Batch: batch_id=166510964683360023381071405971394095634, time:1750767674.959557s req_ids:[8] -DEBUG 06-24 20:21:14 [manager.py:391] -ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:14 lightllm_req_id:8 first_token_cost:210.55078506469727ms total_cost_time:210.5729579925537ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:10012 prompt_cache_len:5151 prompt_cache_ratio:0.514482620854974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 -DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:15 [manager.py:224] router recive req id 8 cost time 0.10811352729797363 s -INFO 06-24 20:21:15 [manager.py:68] detokenization recv req id 8 cost time 0.11001467704772949 s -DEBUG 06-24 20:21:15 [manager.py:391] Prefill Batch: batch_id=14913738956526192091614062670473285126, time:1750767675.1756477s req_ids:[8] -DEBUG 06-24 20:21:15 [manager.py:391] -ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:210.39891242980957ms total_cost_time:210.44373512268066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10013 prompt_cache_len:5151 prompt_cache_ratio:0.5144312393887945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 -DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:15 [manager.py:224] router recive req id 8 cost time 0.10783505439758301 s -INFO 06-24 20:21:15 [manager.py:68] detokenization recv req id 8 cost time 0.10967707633972168 s -DEBUG 06-24 20:21:15 [manager.py:391] Prefill Batch: batch_id=203186622563361582424386144580601664820, time:1750767675.3903215s req_ids:[8] -DEBUG 06-24 20:21:15 [manager.py:391] -ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:208.86921882629395ms total_cost_time:208.91523361206055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10014 prompt_cache_len:5151 prompt_cache_ratio:0.5143798681845416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 -DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:15 [manager.py:224] router recive req id 8 cost time 0.10798192024230957 s -INFO 06-24 20:21:15 [manager.py:68] detokenization recv req id 8 cost time 0.10978555679321289 s -DEBUG 06-24 20:21:15 [manager.py:391] Prefill Batch: batch_id=11419226073631827366246229797585728499, time:1750767675.6058543s req_ids:[8] -DEBUG 06-24 20:21:15 [manager.py:391] -ERROR 06-24 20:21:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:402.16970443725586ms total_cost_time:402.21309661865234ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10015 prompt_cache_len:5151 prompt_cache_ratio:0.5143285072391413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 -DEBUG 06-24 20:21:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10824799537658691 s -INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.11034226417541504 s -DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=35136070204536014558746874922082147397, time:1750767676.012616s req_ids:[8] -DEBUG 06-24 20:21:16 [manager.py:391] -ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:15 lightllm_req_id:8 first_token_cost:203.40204238891602ms total_cost_time:203.44805717468262ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10016 prompt_cache_len:5151 prompt_cache_ratio:0.5142771565495208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 -DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10773706436157227 s -INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.10965132713317871 s -DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=56638152722488877609078004718479959184, time:1750767676.2218294s req_ids:[8] -DEBUG 06-24 20:21:16 [manager.py:391] -ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:202.8980255126953ms total_cost_time:202.9399871826172ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10017 prompt_cache_len:5151 prompt_cache_ratio:0.5142258161126085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 -DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10808801651000977 s -INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.10997247695922852 s -DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=11701099602097811231512969437967722286, time:1750767676.4320555s req_ids:[8] -DEBUG 06-24 20:21:16 [manager.py:391] -ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:204.85711097717285ms total_cost_time:204.90097999572754ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10018 prompt_cache_len:5151 prompt_cache_ratio:0.5141744859253344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 -DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.10854387283325195 s -INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.11041522026062012 s -DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=182227443091019224901679317752640373229, time:1750767676.6429405s req_ids:[8] -DEBUG 06-24 20:21:16 [manager.py:391] -ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:206.84266090393066ms total_cost_time:206.88652992248535ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10019 prompt_cache_len:5151 prompt_cache_ratio:0.5141231659846291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 -DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:16 [manager.py:224] router recive req id 8 cost time 0.1093287467956543 s -INFO 06-24 20:21:16 [manager.py:68] detokenization recv req id 8 cost time 0.11126136779785156 s -DEBUG 06-24 20:21:16 [manager.py:391] Prefill Batch: batch_id=148329797919666168834807102624215072789, time:1750767676.8583493s req_ids:[8] -DEBUG 06-24 20:21:16 [manager.py:391] -ERROR 06-24 20:21:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:207.4108123779297ms total_cost_time:207.4570655822754ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10020 prompt_cache_len:5151 prompt_cache_ratio:0.5140718562874251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 -INFO 06-24 20:21:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:21:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.1085207462310791 s -INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.1104884147644043 s -DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=140381707006270261437390205709327570297, time:1750767677.0698934s req_ids:[8] -DEBUG 06-24 20:21:17 [manager.py:391] -ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:16 lightllm_req_id:8 first_token_cost:202.83174514770508ms total_cost_time:202.87585258483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10021 prompt_cache_len:5151 prompt_cache_ratio:0.5140205568306556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 -DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:17 [batch.py:51] router release req id 8 -INFO 06-24 20:21:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.10772323608398438 s -INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.10965847969055176 s -DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=279588767738352105954396921341654986589, time:1750767677.2788773s req_ids:[8] -DEBUG 06-24 20:21:17 [manager.py:391] -ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:205.56235313415527ms total_cost_time:205.60765266418457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10022 prompt_cache_len:5151 prompt_cache_ratio:0.5139692676112553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 -DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.10792398452758789 s -INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.10994768142700195 s -DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=83750187988892482383377707812776377455, time:1750767677.492415s req_ids:[8] -DEBUG 06-24 20:21:17 [manager.py:391] -ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:371.2608814239502ms total_cost_time:371.3047504425049ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10023 prompt_cache_len:5151 prompt_cache_ratio:0.5139179886261598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 -DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:17 [manager.py:224] router recive req id 8 cost time 0.10926437377929688 s -INFO 06-24 20:21:17 [manager.py:68] detokenization recv req id 8 cost time 0.11119294166564941 s -DEBUG 06-24 20:21:17 [manager.py:391] Prefill Batch: batch_id=97888943178495349940089458728766288825, time:1750767677.8678362s req_ids:[8] -DEBUG 06-24 20:21:17 [manager.py:391] -ERROR 06-24 20:21:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:204.0398120880127ms total_cost_time:204.08344268798828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10024 prompt_cache_len:5151 prompt_cache_ratio:0.5138667198723065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 -DEBUG 06-24 20:21:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.10876917839050293 s -INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.1107931137084961 s -DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=74609362827850303254627722039856694498, time:1750767678.0775187s req_ids:[8] -DEBUG 06-24 20:21:18 [manager.py:391] -ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:17 lightllm_req_id:8 first_token_cost:204.6823501586914ms total_cost_time:204.72478866577148ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10025 prompt_cache_len:5151 prompt_cache_ratio:0.5138154613466334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 -DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s -INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s -DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=197676765076988470015547424773743435579, time:1750767678.288581s req_ids:[8] -DEBUG 06-24 20:21:18 [manager.py:391] -DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:206.8500518798828ms total_cost_time:206.8920135498047ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10026 prompt_cache_len:5151 prompt_cache_ratio:0.5137642130460802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 -DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.10864424705505371 s -INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.11062765121459961 s -DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=218786671522666401582876905405076478179, time:1750767678.503636s req_ids:[8] -DEBUG 06-24 20:21:18 [manager.py:391] -ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:209.27190780639648ms total_cost_time:209.3186378479004ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:10027 prompt_cache_len:5151 prompt_cache_ratio:0.5137129749675875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 -DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.1094670295715332 s -INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.11155986785888672 s -DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=156214872504664219314625755335517720760, time:1750767678.7193499s req_ids:[8] -DEBUG 06-24 20:21:18 [manager.py:391] -ERROR 06-24 20:21:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:209.85102653503418ms total_cost_time:209.89489555358887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10028 prompt_cache_len:5151 prompt_cache_ratio:0.5136617471080973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 -DEBUG 06-24 20:21:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:18 [manager.py:224] router recive req id 8 cost time 0.1086277961730957 s -INFO 06-24 20:21:18 [manager.py:68] detokenization recv req id 8 cost time 0.11045622825622559 s -DEBUG 06-24 20:21:18 [manager.py:391] Prefill Batch: batch_id=101983033354726193911074334637439386235, time:1750767678.9340477s req_ids:[8] -DEBUG 06-24 20:21:18 [manager.py:391] -ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:18 lightllm_req_id:8 first_token_cost:209.42044258117676ms total_cost_time:209.46669578552246ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10029 prompt_cache_len:5151 prompt_cache_ratio:0.5136105294645528 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 -DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:19 [manager.py:224] router recive req id 8 cost time 0.10811114311218262 s -INFO 06-24 20:21:19 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s -DEBUG 06-24 20:21:19 [manager.py:391] Prefill Batch: batch_id=46514375001138887762635820552688981950, time:1750767679.1505723s req_ids:[8] -DEBUG 06-24 20:21:19 [manager.py:391] -ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:212.2490406036377ms total_cost_time:212.3098373413086ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:10030 prompt_cache_len:5151 prompt_cache_ratio:0.5135593220338983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 -DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:19 [manager.py:224] router recive req id 8 cost time 0.31372690200805664 s -INFO 06-24 20:21:19 [manager.py:68] detokenization recv req id 8 cost time 0.31608033180236816 s -DEBUG 06-24 20:21:19 [manager.py:391] Prefill Batch: batch_id=300772357665255402163766131268156828510, time:1750767679.576192s req_ids:[8] -DEBUG 06-24 20:21:19 [manager.py:391] -ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:420.94874382019043ms total_cost_time:420.9935665130615ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10031 prompt_cache_len:5151 prompt_cache_ratio:0.5135081248130795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 -DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:19 [manager.py:224] router recive req id 8 cost time 0.10795783996582031 s -INFO 06-24 20:21:19 [manager.py:68] detokenization recv req id 8 cost time 0.10981202125549316 s -DEBUG 06-24 20:21:19 [manager.py:391] Prefill Batch: batch_id=214689524642129865984303445807051624990, time:1750767679.7948742s req_ids:[8] -DEBUG 06-24 20:21:19 [manager.py:391] -DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:211.0157012939453ms total_cost_time:211.0612392425537ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10032 prompt_cache_len:5151 prompt_cache_ratio:0.5134569377990431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 -DEBUG 06-24 20:21:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.1106102466583252 s -INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11255002021789551 s -DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=60845672961888697452006653917860628399, time:1750767680.0109658s req_ids:[8] -DEBUG 06-24 20:21:20 [manager.py:391] -ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:19 lightllm_req_id:8 first_token_cost:207.61632919311523ms total_cost_time:207.66091346740723ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10033 prompt_cache_len:5151 prompt_cache_ratio:0.5134057609887371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 -DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.11033987998962402 s -INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11236858367919922 s -DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=274418926839260882035796834976540417292, time:1750767680.2266855s req_ids:[8] -DEBUG 06-24 20:21:20 [manager.py:391] -ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:206.99286460876465ms total_cost_time:207.03816413879395ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10034 prompt_cache_len:5151 prompt_cache_ratio:0.513354594379111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 -DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.10864782333374023 s -INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11064577102661133 s -DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=180548011243471030438661503036389484699, time:1750767680.437794s req_ids:[8] -DEBUG 06-24 20:21:20 [manager.py:391] -ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:202.50535011291504ms total_cost_time:202.57258415222168ms,out_token_counter:1 mean_per_token_cost_time: 0.06723403930664062ms prompt_token_num:10035 prompt_cache_len:5151 prompt_cache_ratio:0.5133034379671151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 -DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.10869574546813965 s -INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.11046695709228516 s -DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=200486883765685384610084020189093497944, time:1750767680.6474435s req_ids:[8] -DEBUG 06-24 20:21:20 [manager.py:391] -ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:166.26310348510742ms total_cost_time:166.3055419921875ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10036 prompt_cache_len:5151 prompt_cache_ratio:0.5132522917497011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 -DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:20 [batch.py:51] router release req id 8 -INFO 06-24 20:21:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:20 [manager.py:224] router recive req id 8 cost time 0.10906791687011719 s -INFO 06-24 20:21:20 [manager.py:68] detokenization recv req id 8 cost time 0.1109163761138916 s -DEBUG 06-24 20:21:20 [manager.py:391] Prefill Batch: batch_id=294965701930188687970334612481347691767, time:1750767680.8197846s req_ids:[8] -DEBUG 06-24 20:21:20 [manager.py:391] -ERROR 06-24 20:21:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:205.6879997253418ms total_cost_time:205.72876930236816ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10037 prompt_cache_len:5151 prompt_cache_ratio:0.5132011557238219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 -DEBUG 06-24 20:21:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.10898303985595703 s -INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.11095356941223145 s -DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=100659100704734653746762311064242140258, time:1750767681.0301135s req_ids:[8] -DEBUG 06-24 20:21:21 [manager.py:391] -ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:20 lightllm_req_id:8 first_token_cost:376.8141269683838ms total_cost_time:376.87230110168457ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:10038 prompt_cache_len:5151 prompt_cache_ratio:0.5131500298864315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 -DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:21 [batch.py:51] router release req id 8 -DEBUG 06-24 20:21:21 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:21 [manager.py:283] -DEBUG 06-24 20:21:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:21 [manager.py:284] -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.11017560958862305 s -INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.1121528148651123 s -DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=35295932470831007959154153158677129875, time:1750767681.4098794s req_ids:[8] -DEBUG 06-24 20:21:21 [manager.py:391] -DEBUG 06-24 20:21:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 42031.247 tokens/s -DEBUG 06-24 20:21:21 [stats.py:37] Avg prompt tokens throughput: 42022.858 tokens/s -DEBUG 06-24 20:21:21 [stats.py:37] Avg generate tokens throughput: 8.389 tokens/s -ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:202.7261257171631ms total_cost_time:202.77118682861328ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10039 prompt_cache_len:5151 prompt_cache_ratio:0.5130989142344855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 -DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.10851335525512695 s -INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.11042237281799316 s -DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=265116536251123485920688320572071467933, time:1750767681.624575s req_ids:[8] -DEBUG 06-24 20:21:21 [manager.py:391] -ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:209.29861068725586ms total_cost_time:209.34176445007324ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10040 prompt_cache_len:5151 prompt_cache_ratio:0.5130478087649403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 -DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:21 [manager.py:224] router recive req id 8 cost time 0.1091604232788086 s -INFO 06-24 20:21:21 [manager.py:68] detokenization recv req id 8 cost time 0.11104679107666016 s -DEBUG 06-24 20:21:21 [manager.py:391] Prefill Batch: batch_id=100863527701970576721803157899893907785, time:1750767681.8394673s req_ids:[8] -DEBUG 06-24 20:21:21 [manager.py:391] -ERROR 06-24 20:21:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:209.29670333862305ms total_cost_time:209.33938026428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10041 prompt_cache_len:5151 prompt_cache_ratio:0.5129967134747535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 -DEBUG 06-24 20:21:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10757780075073242 s -INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.10947084426879883 s -DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=40544905080563988853088973662104220682, time:1750767682.0554178s req_ids:[8] -DEBUG 06-24 20:21:22 [manager.py:391] -ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:21 lightllm_req_id:8 first_token_cost:210.2034091949463ms total_cost_time:210.24799346923828ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10042 prompt_cache_len:5151 prompt_cache_ratio:0.5129456283608843 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 -DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10867619514465332 s -INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.11047697067260742 s -DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=184777596034010017581833146526827337886, time:1750767682.2715046s req_ids:[8] -DEBUG 06-24 20:21:22 [manager.py:391] -ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:209.60068702697754ms total_cost_time:209.64503288269043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10043 prompt_cache_len:5151 prompt_cache_ratio:0.5128945534202928 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 -DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10893654823303223 s -INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.11083602905273438 s -DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=4247196397283856712166793552981517271, time:1750767682.4868393s req_ids:[8] -DEBUG 06-24 20:21:22 [manager.py:391] -ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:210.56222915649414ms total_cost_time:210.60585975646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10044 prompt_cache_len:5151 prompt_cache_ratio:0.5128434886499402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 -DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.1079404354095459 s -INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.1097254753112793 s -DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=322491184833666962516974785292692919732, time:1750767682.70236s req_ids:[8] -DEBUG 06-24 20:21:22 [manager.py:391] -ERROR 06-24 20:21:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:209.8088264465332ms total_cost_time:209.8519802093506ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10045 prompt_cache_len:5151 prompt_cache_ratio:0.5127924340467894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 -DEBUG 06-24 20:21:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:22 [manager.py:224] router recive req id 8 cost time 0.10824751853942871 s -INFO 06-24 20:21:22 [manager.py:68] detokenization recv req id 8 cost time 0.11027884483337402 s -DEBUG 06-24 20:21:22 [manager.py:391] Prefill Batch: batch_id=299665894762378377012958077671039109994, time:1750767682.9185252s req_ids:[8] -DEBUG 06-24 20:21:22 [manager.py:391] -ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:22 lightllm_req_id:8 first_token_cost:210.37888526916504ms total_cost_time:210.42394638061523ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10046 prompt_cache_len:5151 prompt_cache_ratio:0.5127413896078041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 -DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.10821390151977539 s -INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11014485359191895 s -DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=287179751297200290519753863543706986456, time:1750767683.1344461s req_ids:[8] -DEBUG 06-24 20:21:23 [manager.py:391] -INFO 06-24 20:21:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:21:23 [statics_utils.py:24] mean first cost: 228.526973546725 ms -INFO 06-24 20:21:23 [statics_utils.py:24] mean per token cost: 0.06921560093324891 ms -ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:380.9094429016113ms total_cost_time:380.9523582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10047 prompt_cache_len:5151 prompt_cache_ratio:0.5126903553299492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 -DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.10894441604614258 s -INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11083984375 s -DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=303547777474213759955366294495700183861, time:1750767683.5184271s req_ids:[8] -DEBUG 06-24 20:21:23 [manager.py:391] -ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:207.66949653625488ms total_cost_time:207.71360397338867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10048 prompt_cache_len:5151 prompt_cache_ratio:0.5126393312101911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 -DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.11051583290100098 s -INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11236310005187988 s -DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=174089199723783062845283306768373385238, time:1750767683.7348313s req_ids:[8] -DEBUG 06-24 20:21:23 [manager.py:391] -ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:171.39816284179688ms total_cost_time:171.44536972045898ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:10049 prompt_cache_len:5151 prompt_cache_ratio:0.512588317245497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 -DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:23 [manager.py:224] router recive req id 8 cost time 0.11118006706237793 s -INFO 06-24 20:21:23 [manager.py:68] detokenization recv req id 8 cost time 0.11280369758605957 s -DEBUG 06-24 20:21:23 [manager.py:391] Prefill Batch: batch_id=277400912700728182880664065744754812631, time:1750767683.9107313s req_ids:[8] -DEBUG 06-24 20:21:23 [manager.py:391] -ERROR 06-24 20:21:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:165.37165641784668ms total_cost_time:165.41600227355957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10050 prompt_cache_len:5151 prompt_cache_ratio:0.5125373134328358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 -DEBUG 06-24 20:21:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10824370384216309 s -INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11016845703125 s -DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=281318903391158202926042078210960709111, time:1750767684.0818222s req_ids:[8] -DEBUG 06-24 20:21:24 [manager.py:391] -ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:23 lightllm_req_id:8 first_token_cost:195.3134536743164ms total_cost_time:195.3575611114502ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10051 prompt_cache_len:5151 prompt_cache_ratio:0.5124863197691772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 -DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:24 [batch.py:51] router release req id 8 -INFO 06-24 20:21:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10954117774963379 s -INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11153411865234375 s -DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=336596084924390563913965478354008531509, time:1750767684.2821875s req_ids:[8] -DEBUG 06-24 20:21:24 [manager.py:391] -ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:205.11269569396973ms total_cost_time:205.15680313110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10052 prompt_cache_len:5151 prompt_cache_ratio:0.5124353362514923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 -DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s -INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11065840721130371 s -DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=139117914993178353133929287899439663027, time:1750767684.4962888s req_ids:[8] -DEBUG 06-24 20:21:24 [manager.py:391] -ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:210.78205108642578ms total_cost_time:210.82353591918945ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10053 prompt_cache_len:5151 prompt_cache_ratio:0.5123843628767533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 -DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10919833183288574 s -INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11106109619140625 s -DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=160088530373999995126289933252483621141, time:1750767684.710222s req_ids:[8] -DEBUG 06-24 20:21:24 [manager.py:391] -ERROR 06-24 20:21:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:208.09292793273926ms total_cost_time:208.14871788024902ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:10054 prompt_cache_len:5151 prompt_cache_ratio:0.5123333996419336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 -DEBUG 06-24 20:21:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:24 [manager.py:224] router recive req id 8 cost time 0.10789966583251953 s -INFO 06-24 20:21:24 [manager.py:68] detokenization recv req id 8 cost time 0.11001157760620117 s -DEBUG 06-24 20:21:24 [manager.py:391] Prefill Batch: batch_id=176572674145125742239267296485982782640, time:1750767684.9253588s req_ids:[8] -DEBUG 06-24 20:21:24 [manager.py:391] -ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:24 lightllm_req_id:8 first_token_cost:386.19279861450195ms total_cost_time:386.25192642211914ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:10055 prompt_cache_len:5151 prompt_cache_ratio:0.512282446544008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 -DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.1101226806640625 s -INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.11214423179626465 s -DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=299224584559672665683965250736448623562, time:1750767685.3151038s req_ids:[8] -DEBUG 06-24 20:21:25 [manager.py:391] -ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:196.16174697875977ms total_cost_time:196.1979866027832ms,out_token_counter:1 mean_per_token_cost_time: 0.0362396240234375ms prompt_token_num:10056 prompt_cache_len:5151 prompt_cache_ratio:0.5122315035799523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 -DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.10843014717102051 s -INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.11045479774475098 s -DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=79424366191923499171041814660667259058, time:1750767685.520111s req_ids:[8] -DEBUG 06-24 20:21:25 [manager.py:391] -DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:207.275390625ms total_cost_time:207.31878280639648ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10057 prompt_cache_len:5151 prompt_cache_ratio:0.5121805707467436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 -DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.10790133476257324 s -INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.10976362228393555 s -DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=214631679299171028309678258331186956528, time:1750767685.7337265s req_ids:[8] -DEBUG 06-24 20:21:25 [manager.py:391] -ERROR 06-24 20:21:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:208.49347114562988ms total_cost_time:208.53829383850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10058 prompt_cache_len:5151 prompt_cache_ratio:0.5121296480413601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 -DEBUG 06-24 20:21:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:25 [manager.py:224] router recive req id 8 cost time 0.10805559158325195 s -INFO 06-24 20:21:25 [manager.py:68] detokenization recv req id 8 cost time 0.11007952690124512 s -DEBUG 06-24 20:21:25 [manager.py:391] Prefill Batch: batch_id=179346366814783116467619421880454085909, time:1750767685.949073s req_ids:[8] -DEBUG 06-24 20:21:25 [manager.py:391] -ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:25 lightllm_req_id:8 first_token_cost:208.76574516296387ms total_cost_time:208.80842208862305ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10059 prompt_cache_len:5151 prompt_cache_ratio:0.5120787354607814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 -DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.10963153839111328 s -INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11166167259216309 s -DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=194597401526203248726829121736755434946, time:1750767686.163391s req_ids:[8] -DEBUG 06-24 20:21:26 [manager.py:391] -ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:208.7993621826172ms total_cost_time:208.84299278259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10060 prompt_cache_len:5151 prompt_cache_ratio:0.5120278330019881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 -DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.10854601860046387 s -INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11063599586486816 s -DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=297298980173384097366042720721692644236, time:1750767686.3796s req_ids:[8] -DEBUG 06-24 20:21:26 [manager.py:391] -ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:210.69884300231934ms total_cost_time:210.74438095092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10061 prompt_cache_len:5151 prompt_cache_ratio:0.511976940661962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 -DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.10895895957946777 s -INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11092448234558105 s -DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=92706566194223126333068329853782741279, time:1750767686.5944939s req_ids:[8] -DEBUG 06-24 20:21:26 [manager.py:391] -ERROR 06-24 20:21:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:206.6483497619629ms total_cost_time:206.6938877105713ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10062 prompt_cache_len:5151 prompt_cache_ratio:0.5119260584376863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 -DEBUG 06-24 20:21:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:26 [manager.py:224] router recive req id 8 cost time 0.1085212230682373 s -INFO 06-24 20:21:26 [manager.py:68] detokenization recv req id 8 cost time 0.11055326461791992 s -DEBUG 06-24 20:21:26 [manager.py:391] Prefill Batch: batch_id=17967257669469969246100229176614469242, time:1750767686.8067243s req_ids:[8] -DEBUG 06-24 20:21:26 [manager.py:391] -ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:26 lightllm_req_id:8 first_token_cost:376.48677825927734ms total_cost_time:376.53064727783203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10063 prompt_cache_len:5151 prompt_cache_ratio:0.5118751863261453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 -DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.1085057258605957 s -INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.11059427261352539 s -DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=7200054131084324765188545611968458852, time:1750767687.1868775s req_ids:[8] -DEBUG 06-24 20:21:27 [manager.py:391] -ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:205.93714714050293ms total_cost_time:205.98220825195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10064 prompt_cache_len:5151 prompt_cache_ratio:0.5118243243243243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 -DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.11086678504943848 s -INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.11277651786804199 s -DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=37203945455403723882523268096833036239, time:1750767687.4026315s req_ids:[8] -DEBUG 06-24 20:21:27 [manager.py:391] -ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:204.99014854431152ms total_cost_time:205.0340175628662ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10065 prompt_cache_len:5151 prompt_cache_ratio:0.5117734724292101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 -DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.10983061790466309 s -INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.1116647720336914 s -DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=120232757590422401504228173982753513999, time:1750767687.6123137s req_ids:[8] -DEBUG 06-24 20:21:27 [manager.py:391] -ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:207.18884468078613ms total_cost_time:207.23199844360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10066 prompt_cache_len:5151 prompt_cache_ratio:0.5117226306377906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 -DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:27 [batch.py:51] router release req id 8 -INFO 06-24 20:21:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:27 [manager.py:224] router recive req id 8 cost time 0.10816669464111328 s -INFO 06-24 20:21:27 [manager.py:68] detokenization recv req id 8 cost time 0.11012578010559082 s -DEBUG 06-24 20:21:27 [manager.py:391] Prefill Batch: batch_id=131606207316451692458795530323234626549, time:1750767687.824804s req_ids:[8] -DEBUG 06-24 20:21:27 [manager.py:391] -ERROR 06-24 20:21:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:206.53820037841797ms total_cost_time:206.58135414123535ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10067 prompt_cache_len:5151 prompt_cache_ratio:0.5116717989470547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 -DEBUG 06-24 20:21:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.10899972915649414 s -INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11099386215209961 s -DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=234026135713167960366335546678673767083, time:1750767688.0524561s req_ids:[8] -DEBUG 06-24 20:21:28 [manager.py:391] -ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:27 lightllm_req_id:8 first_token_cost:225.94308853149414ms total_cost_time:225.98886489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10068 prompt_cache_len:5151 prompt_cache_ratio:0.5116209773539928 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 -DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.10869646072387695 s -INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11069583892822266 s -DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=304959298521922266918311050979615680493, time:1750767688.2713532s req_ids:[8] -DEBUG 06-24 20:21:28 [manager.py:391] -ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:208.8947296142578ms total_cost_time:208.9536190032959ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:10069 prompt_cache_len:5151 prompt_cache_ratio:0.5115701658555963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 -DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.10811448097229004 s -INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11007261276245117 s -DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=112585559579808366174336296295767711100, time:1750767688.4867325s req_ids:[8] -DEBUG 06-24 20:21:28 [manager.py:391] -ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:209.60283279418945ms total_cost_time:209.64622497558594ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10070 prompt_cache_len:5151 prompt_cache_ratio:0.511519364448858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 -DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:28 [manager.py:224] router recive req id 8 cost time 0.1086876392364502 s -INFO 06-24 20:21:28 [manager.py:68] detokenization recv req id 8 cost time 0.11067390441894531 s -DEBUG 06-24 20:21:28 [manager.py:391] Prefill Batch: batch_id=286316830919673610697039027217866780400, time:1750767688.7025733s req_ids:[8] -DEBUG 06-24 20:21:28 [manager.py:391] -ERROR 06-24 20:21:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:375.4258155822754ms total_cost_time:375.46825408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10071 prompt_cache_len:5151 prompt_cache_ratio:0.5114685731307715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 -DEBUG 06-24 20:21:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10812616348266602 s -INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.11012005805969238 s -DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=92891636877492041921651033582298098354, time:1750767689.0808687s req_ids:[8] -DEBUG 06-24 20:21:29 [manager.py:391] -ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:28 lightllm_req_id:8 first_token_cost:203.28998565673828ms total_cost_time:203.33290100097656ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10072 prompt_cache_len:5151 prompt_cache_ratio:0.511417791898332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 -DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10799360275268555 s -INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005735397338867 s -DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=165887855038438303852356340589238204514, time:1750767689.2915168s req_ids:[8] -DEBUG 06-24 20:21:29 [manager.py:391] -ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:205.6889533996582ms total_cost_time:205.7335376739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10073 prompt_cache_len:5151 prompt_cache_ratio:0.5113670207485357 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 -DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10788702964782715 s -INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.10997152328491211 s -DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=244282371890683699895524948660143192664, time:1750767689.5037596s req_ids:[8] -DEBUG 06-24 20:21:29 [manager.py:391] -ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:207.15618133544922ms total_cost_time:207.2012424468994ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10074 prompt_cache_len:5151 prompt_cache_ratio:0.51131625967838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 -DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.11013126373291016 s -INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.11208105087280273 s -DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=122056371073381441588836800094874385254, time:1750767689.724118s req_ids:[8] -DEBUG 06-24 20:21:29 [manager.py:391] -ERROR 06-24 20:21:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:217.7290916442871ms total_cost_time:217.7717685699463ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10075 prompt_cache_len:5151 prompt_cache_ratio:0.5112655086848635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 -DEBUG 06-24 20:21:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:29 [manager.py:224] router recive req id 8 cost time 0.10781073570251465 s -INFO 06-24 20:21:29 [manager.py:68] detokenization recv req id 8 cost time 0.10978960990905762 s -DEBUG 06-24 20:21:29 [manager.py:391] Prefill Batch: batch_id=6315164233497349545243221144006092776, time:1750767689.9410348s req_ids:[8] -DEBUG 06-24 20:21:29 [manager.py:391] -ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:29 lightllm_req_id:8 first_token_cost:205.98244667053223ms total_cost_time:206.0403823852539ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:10076 prompt_cache_len:5151 prompt_cache_ratio:0.5112147677649861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 -DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.11011505126953125 s -INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.11220383644104004 s -DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=111518823631385236392230933709093442634, time:1750767690.1507657s req_ids:[8] -DEBUG 06-24 20:21:30 [manager.py:391] -ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:205.3365707397461ms total_cost_time:205.39569854736328ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:10077 prompt_cache_len:5151 prompt_cache_ratio:0.5111640369157487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 -DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.1114809513092041 s -INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.11359524726867676 s -DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=298061475193586900689244199838334770632, time:1750767690.3747528s req_ids:[8] -DEBUG 06-24 20:21:30 [manager.py:391] -ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:218.92857551574707ms total_cost_time:218.97149085998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10078 prompt_cache_len:5151 prompt_cache_ratio:0.5111133161341536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 -DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.1082923412322998 s -INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.11012983322143555 s -DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=331716534586550348579023054532073534859, time:1750767690.5911942s req_ids:[8] -DEBUG 06-24 20:21:30 [manager.py:391] -ERROR 06-24 20:21:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:208.8465690612793ms total_cost_time:208.8906764984131ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10079 prompt_cache_len:5151 prompt_cache_ratio:0.511062605417204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 -DEBUG 06-24 20:21:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:30 [manager.py:224] router recive req id 8 cost time 0.10916876792907715 s -INFO 06-24 20:21:30 [manager.py:68] detokenization recv req id 8 cost time 0.1109468936920166 s -DEBUG 06-24 20:21:30 [manager.py:391] Prefill Batch: batch_id=61858417265926975758481934947827921843, time:1750767690.8173528s req_ids:[8] -DEBUG 06-24 20:21:30 [manager.py:391] -ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:30 lightllm_req_id:8 first_token_cost:375.5214214324951ms total_cost_time:375.5674362182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10080 prompt_cache_len:5151 prompt_cache_ratio:0.5110119047619047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 -DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.10855698585510254 s -INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.11070060729980469 s -DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=79888184635082909339674954571069002676, time:1750767691.1846051s req_ids:[8] -DEBUG 06-24 20:21:31 [manager.py:391] -ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:188.67230415344238ms total_cost_time:188.71712684631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10081 prompt_cache_len:5151 prompt_cache_ratio:0.5109612141652614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 -DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.10771369934082031 s -INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.10967731475830078 s -DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=300508760914918063409485340260319963028, time:1750767691.3840997s req_ids:[8] -DEBUG 06-24 20:21:31 [manager.py:391] -DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:21:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 43014.791 tokens/s -DEBUG 06-24 20:21:31 [stats.py:37] Avg prompt tokens throughput: 43006.143 tokens/s -DEBUG 06-24 20:21:31 [stats.py:37] Avg generate tokens throughput: 8.648 tokens/s -INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:206.72369003295898ms total_cost_time:206.76612854003906ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10082 prompt_cache_len:5151 prompt_cache_ratio:0.5109105336242808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 -DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.1087794303894043 s -INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.11094975471496582 s -DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=170469209074526881710686611792377875569, time:1750767691.592726s req_ids:[8] -DEBUG 06-24 20:21:31 [manager.py:391] -ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:205.44815063476562ms total_cost_time:205.4905891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10083 prompt_cache_len:5151 prompt_cache_ratio:0.5108598631359714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 -DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:31 [manager.py:224] router recive req id 8 cost time 0.1087334156036377 s -INFO 06-24 20:21:31 [manager.py:68] detokenization recv req id 8 cost time 0.11064434051513672 s -DEBUG 06-24 20:21:31 [manager.py:391] Prefill Batch: batch_id=129629005656283229777702711253780644293, time:1750767691.8045309s req_ids:[8] -DEBUG 06-24 20:21:31 [manager.py:391] -ERROR 06-24 20:21:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:206.21132850646973ms total_cost_time:206.2525749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10084 prompt_cache_len:5151 prompt_cache_ratio:0.5108092026973423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 -DEBUG 06-24 20:21:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10755228996276855 s -INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.10948061943054199 s -DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=143642637379412976413601307716440487873, time:1750767692.019238s req_ids:[8] -DEBUG 06-24 20:21:32 [manager.py:391] -ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:31 lightllm_req_id:8 first_token_cost:210.85238456726074ms total_cost_time:210.89744567871094ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10085 prompt_cache_len:5151 prompt_cache_ratio:0.510758552305404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 -DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10807371139526367 s -INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s -DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=256154671319462269841832064768910285416, time:1750767692.2348306s req_ids:[8] -DEBUG 06-24 20:21:32 [manager.py:391] -ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:210.4175090789795ms total_cost_time:210.46161651611328ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10086 prompt_cache_len:5151 prompt_cache_ratio:0.5107079119571684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 -DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10767865180969238 s -INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.10959863662719727 s -DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=118886874068040384157817845070743212987, time:1750767692.4525623s req_ids:[8] -DEBUG 06-24 20:21:32 [manager.py:391] -ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:207.0333957672119ms total_cost_time:207.0760726928711ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10087 prompt_cache_len:5151 prompt_cache_ratio:0.5106572816496481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 -DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:32 [manager.py:224] router recive req id 8 cost time 0.10852527618408203 s -INFO 06-24 20:21:32 [manager.py:68] detokenization recv req id 8 cost time 0.11058855056762695 s -DEBUG 06-24 20:21:32 [manager.py:391] Prefill Batch: batch_id=335306741314306037779474740091012797355, time:1750767692.663629s req_ids:[8] -DEBUG 06-24 20:21:32 [manager.py:391] -ERROR 06-24 20:21:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:365.87023735046387ms total_cost_time:365.91649055480957ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10088 prompt_cache_len:5151 prompt_cache_ratio:0.5106066613798572 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 -DEBUG 06-24 20:21:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10902142524719238 s -INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.11098265647888184 s -DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=264820560990082970572277477303144795442, time:1750767693.043305s req_ids:[8] -DEBUG 06-24 20:21:33 [manager.py:391] -ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:32 lightllm_req_id:8 first_token_cost:219.15006637573242ms total_cost_time:219.1929817199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10089 prompt_cache_len:5151 prompt_cache_ratio:0.5105560511448112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 -DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s -INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.10983848571777344 s -DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=130701378259747965856741412539277597082, time:1750767693.2675397s req_ids:[8] -DEBUG 06-24 20:21:33 [manager.py:391] -ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:218.78504753112793ms total_cost_time:218.82939338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10090 prompt_cache_len:5151 prompt_cache_ratio:0.5105054509415262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 -DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10799241065979004 s -INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.10991406440734863 s -DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=279428142944436910302240759853843510097, time:1750767693.4851456s req_ids:[8] -DEBUG 06-24 20:21:33 [manager.py:391] -ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:207.747220993042ms total_cost_time:207.79156684875488ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10091 prompt_cache_len:5151 prompt_cache_ratio:0.5104548607670201 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 -DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.1083364486694336 s -INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.11035346984863281 s -DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=113374117859218219784320365427376479668, time:1750767693.7000391s req_ids:[8] -DEBUG 06-24 20:21:33 [manager.py:391] -ERROR 06-24 20:21:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:206.72106742858887ms total_cost_time:206.78210258483887ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10092 prompt_cache_len:5151 prompt_cache_ratio:0.5104042806183116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 -DEBUG 06-24 20:21:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:33 [manager.py:224] router recive req id 8 cost time 0.10768485069274902 s -INFO 06-24 20:21:33 [manager.py:68] detokenization recv req id 8 cost time 0.1097266674041748 s -DEBUG 06-24 20:21:33 [manager.py:391] Prefill Batch: batch_id=191613240756096007719658622299793828887, time:1750767693.924504s req_ids:[8] -DEBUG 06-24 20:21:33 [manager.py:391] -ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:33 lightllm_req_id:8 first_token_cost:221.25935554504395ms total_cost_time:221.30203247070312ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10093 prompt_cache_len:5151 prompt_cache_ratio:0.5103537104924205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 -DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10917091369628906 s -INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.11127543449401855 s -DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=319277156119376248216460916082578855936, time:1750767694.1404011s req_ids:[8] -DEBUG 06-24 20:21:34 [manager.py:391] -ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:206.59542083740234ms total_cost_time:206.63857460021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10094 prompt_cache_len:5151 prompt_cache_ratio:0.5103031503863681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 -DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s -INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.11107373237609863 s -DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=4135892073947698169525723417846813570, time:1750767694.3539965s req_ids:[8] -DEBUG 06-24 20:21:34 [manager.py:391] -ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:207.28826522827148ms total_cost_time:207.33237266540527ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10095 prompt_cache_len:5151 prompt_cache_ratio:0.5102526002971768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 -DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10818719863891602 s -INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.10989260673522949 s -DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=226497570214325670989996984532858612195, time:1750767694.5646572s req_ids:[8] -DEBUG 06-24 20:21:34 [manager.py:391] -ERROR 06-24 20:21:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:363.5573387145996ms total_cost_time:363.6033535003662ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10096 prompt_cache_len:5151 prompt_cache_ratio:0.5102020602218701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 -DEBUG 06-24 20:21:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:34 [manager.py:224] router recive req id 8 cost time 0.10860300064086914 s -INFO 06-24 20:21:34 [manager.py:68] detokenization recv req id 8 cost time 0.11040282249450684 s -DEBUG 06-24 20:21:34 [manager.py:391] Prefill Batch: batch_id=14935135176430777046342855340645245349, time:1750767694.9402506s req_ids:[8] -DEBUG 06-24 20:21:34 [manager.py:391] -ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:34 lightllm_req_id:8 first_token_cost:212.3713493347168ms total_cost_time:212.4154567718506ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10097 prompt_cache_len:5151 prompt_cache_ratio:0.5101515301574725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 -DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10794782638549805 s -INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.10952281951904297 s -DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=282820079747458981836030076136828667165, time:1750767695.152686s req_ids:[8] -DEBUG 06-24 20:21:35 [manager.py:391] -ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:164.48378562927246ms total_cost_time:164.52503204345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10098 prompt_cache_len:5151 prompt_cache_ratio:0.51010101010101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 -DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10914421081542969 s -INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s -DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=201336936424233267674965533192099887897, time:1750767695.3232732s req_ids:[8] -DEBUG 06-24 20:21:35 [manager.py:391] -ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:197.4184513092041ms total_cost_time:197.4625587463379ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10099 prompt_cache_len:5151 prompt_cache_ratio:0.5100505000495098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 -DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10861968994140625 s -INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11057329177856445 s -DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=309803038197483354678558607182292816702, time:1750767695.5274568s req_ids:[8] -DEBUG 06-24 20:21:35 [manager.py:391] -ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:206.80713653564453ms total_cost_time:206.85219764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10100 prompt_cache_len:5151 prompt_cache_ratio:0.51 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 -DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10872507095336914 s -INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s -DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=157645074527919689235193345428391882414, time:1750767695.7425334s req_ids:[8] -DEBUG 06-24 20:21:35 [manager.py:391] -ERROR 06-24 20:21:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:210.42728424072266ms total_cost_time:210.47186851501465ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10101 prompt_cache_len:5151 prompt_cache_ratio:0.5099495099495099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 -DEBUG 06-24 20:21:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:35 [manager.py:224] router recive req id 8 cost time 0.10873723030090332 s -INFO 06-24 20:21:35 [manager.py:68] detokenization recv req id 8 cost time 0.11066341400146484 s -DEBUG 06-24 20:21:35 [manager.py:391] Prefill Batch: batch_id=225060325160732444418554665517828245520, time:1750767695.958877s req_ids:[8] -DEBUG 06-24 20:21:35 [manager.py:391] -ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:35 lightllm_req_id:8 first_token_cost:207.55434036254883ms total_cost_time:207.59892463684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10102 prompt_cache_len:5151 prompt_cache_ratio:0.5098990298950703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 -DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.10800909996032715 s -INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.10981440544128418 s -DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=190329637547370070365289770444499947269, time:1750767696.1688066s req_ids:[8] -DEBUG 06-24 20:21:36 [manager.py:391] -ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:199.2652416229248ms total_cost_time:199.3091106414795ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10103 prompt_cache_len:5151 prompt_cache_ratio:0.5098485598337128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 -DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.108428955078125 s -INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s -DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=59374709559217071290271197130847305544, time:1750767696.3755941s req_ids:[8] -DEBUG 06-24 20:21:36 [manager.py:391] -ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:361.00292205810547ms total_cost_time:361.04822158813477ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10104 prompt_cache_len:5151 prompt_cache_ratio:0.5097980997624703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 -DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.11113739013671875 s -INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.1130683422088623 s -DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=261850602028187585112047889552634727812, time:1750767696.7415636s req_ids:[8] -DEBUG 06-24 20:21:36 [manager.py:391] -ERROR 06-24 20:21:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:207.12995529174805ms total_cost_time:207.17263221740723ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10105 prompt_cache_len:5151 prompt_cache_ratio:0.509747649678377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 -DEBUG 06-24 20:21:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:36 [manager.py:224] router recive req id 8 cost time 0.10809755325317383 s -INFO 06-24 20:21:36 [manager.py:68] detokenization recv req id 8 cost time 0.11001253128051758 s -DEBUG 06-24 20:21:36 [manager.py:391] Prefill Batch: batch_id=205001415949656960870601665210233184403, time:1750767696.9557235s req_ids:[8] -DEBUG 06-24 20:21:36 [manager.py:391] -ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:36 lightllm_req_id:8 first_token_cost:207.65328407287598ms total_cost_time:207.71121978759766ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:10106 prompt_cache_len:5151 prompt_cache_ratio:0.5096972095784682 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 -DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10825848579406738 s -INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.11018800735473633 s -DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=230390370823724051593308616579455274792, time:1750767697.1711361s req_ids:[8] -DEBUG 06-24 20:21:37 [manager.py:391] -DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:208.28747749328613ms total_cost_time:208.33063125610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10107 prompt_cache_len:5151 prompt_cache_ratio:0.5096467794597803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 -DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10770845413208008 s -INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.10963964462280273 s -DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=296049167846621257719703681367170025270, time:1750767697.3843527s req_ids:[8] -DEBUG 06-24 20:21:37 [manager.py:391] -ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:207.1223258972168ms total_cost_time:207.16571807861328ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10108 prompt_cache_len:5151 prompt_cache_ratio:0.509596359319351 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 -DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10924506187438965 s -INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.11127281188964844 s -DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=95441286448768445164103536021692416739, time:1750767697.6047597s req_ids:[8] -DEBUG 06-24 20:21:37 [manager.py:391] -ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:214.36476707458496ms total_cost_time:214.40911293029785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10109 prompt_cache_len:5151 prompt_cache_ratio:0.509545949154219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 -DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:37 [manager.py:224] router recive req id 8 cost time 0.10805630683898926 s -INFO 06-24 20:21:37 [manager.py:68] detokenization recv req id 8 cost time 0.1099863052368164 s -DEBUG 06-24 20:21:37 [manager.py:391] Prefill Batch: batch_id=36605886147250795582364140084630623331, time:1750767697.818904s req_ids:[8] -DEBUG 06-24 20:21:37 [manager.py:391] -ERROR 06-24 20:21:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:209.46955680847168ms total_cost_time:209.51461791992188ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10110 prompt_cache_len:5151 prompt_cache_ratio:0.5094955489614243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 -DEBUG 06-24 20:21:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.10912632942199707 s -INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.11100482940673828 s -DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=197695263134732850504251537393565290994, time:1750767698.033769s req_ids:[8] -DEBUG 06-24 20:21:38 [manager.py:391] -ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:37 lightllm_req_id:8 first_token_cost:366.4257526397705ms total_cost_time:366.4698600769043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10111 prompt_cache_len:5151 prompt_cache_ratio:0.5094451587380081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 -DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:38 [batch.py:51] router release req id 8 -INFO 06-24 20:21:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.11111092567443848 s -INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.11310458183288574 s -DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=283934971650228186582675623401445386604, time:1750767698.4041662s req_ids:[8] -DEBUG 06-24 20:21:38 [manager.py:391] -ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:206.36606216430664ms total_cost_time:206.41112327575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10112 prompt_cache_len:5151 prompt_cache_ratio:0.5093947784810127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 -DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.10784554481506348 s -INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.1097111701965332 s -DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=164449744627675095512392983723759750336, time:1750767698.6191638s req_ids:[8] -DEBUG 06-24 20:21:38 [manager.py:391] -ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:206.3119411468506ms total_cost_time:206.35437965393066ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10113 prompt_cache_len:5151 prompt_cache_ratio:0.5093444081874815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 -DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:38 [manager.py:224] router recive req id 8 cost time 0.10743045806884766 s -INFO 06-24 20:21:38 [manager.py:68] detokenization recv req id 8 cost time 0.10926127433776855 s -DEBUG 06-24 20:21:38 [manager.py:391] Prefill Batch: batch_id=288633960513105798776415814023379386339, time:1750767698.830983s req_ids:[8] -DEBUG 06-24 20:21:38 [manager.py:391] -ERROR 06-24 20:21:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:205.4903507232666ms total_cost_time:205.5344581604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10114 prompt_cache_len:5151 prompt_cache_ratio:0.5092940478544592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 -DEBUG 06-24 20:21:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10814619064331055 s -INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.10994601249694824 s -DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=173706481182719942940021081714847040786, time:1750767699.0430298s req_ids:[8] -DEBUG 06-24 20:21:39 [manager.py:391] -ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:38 lightllm_req_id:8 first_token_cost:207.26728439331055ms total_cost_time:207.30948448181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10115 prompt_cache_len:5151 prompt_cache_ratio:0.5092436974789916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 -DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10780930519104004 s -INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.10912609100341797 s -DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=332655904589645261763248773447313795386, time:1750767699.2549374s req_ids:[8] -DEBUG 06-24 20:21:39 [manager.py:391] -ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:205.71255683898926ms total_cost_time:205.75690269470215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10116 prompt_cache_len:5151 prompt_cache_ratio:0.5091933570581257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 -DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10863685607910156 s -INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.1106569766998291 s -DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=325104750316030565870346665723654718032, time:1750767699.4659076s req_ids:[8] -DEBUG 06-24 20:21:39 [manager.py:391] -ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:204.43224906921387ms total_cost_time:204.47421073913574ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10117 prompt_cache_len:5151 prompt_cache_ratio:0.5091430265889098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 -DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10936379432678223 s -INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.11110782623291016 s -DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=314890534290048280687948159627939184885, time:1750767699.6758273s req_ids:[8] -DEBUG 06-24 20:21:39 [manager.py:391] -ERROR 06-24 20:21:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:206.52461051940918ms total_cost_time:206.56657218933105ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10118 prompt_cache_len:5151 prompt_cache_ratio:0.509092706068393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 -DEBUG 06-24 20:21:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:39 [manager.py:224] router recive req id 8 cost time 0.10737013816833496 s -INFO 06-24 20:21:39 [manager.py:68] detokenization recv req id 8 cost time 0.1092071533203125 s -DEBUG 06-24 20:21:39 [manager.py:391] Prefill Batch: batch_id=43226466367538415541417858036634786327, time:1750767699.8911033s req_ids:[8] -DEBUG 06-24 20:21:39 [manager.py:391] -ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:39 lightllm_req_id:8 first_token_cost:362.335205078125ms total_cost_time:362.35928535461426ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:10119 prompt_cache_len:5151 prompt_cache_ratio:0.5090423954936258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 -DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10912823677062988 s -INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.11126351356506348 s -DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=166336810497066430244437046254189755467, time:1750767700.2569265s req_ids:[8] -DEBUG 06-24 20:21:40 [manager.py:391] -ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:209.51032638549805ms total_cost_time:209.55610275268555ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10120 prompt_cache_len:5151 prompt_cache_ratio:0.5089920948616601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 -DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10831880569458008 s -INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.11021924018859863 s -DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=106403930248651607666069980985364745510, time:1750767700.4718983s req_ids:[8] -DEBUG 06-24 20:21:40 [manager.py:391] -ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:205.85393905639648ms total_cost_time:205.89685440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10121 prompt_cache_len:5151 prompt_cache_ratio:0.5089418041695485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 -DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10861325263977051 s -INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.11052274703979492 s -DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=8974130209623849790482187180423655616, time:1750767700.6862683s req_ids:[8] -DEBUG 06-24 20:21:40 [manager.py:391] -ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:205.63793182373047ms total_cost_time:205.68132400512695ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10122 prompt_cache_len:5151 prompt_cache_ratio:0.508891523414345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 -DEBUG 06-24 20:21:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:40 [manager.py:224] router recive req id 8 cost time 0.10776281356811523 s -INFO 06-24 20:21:40 [manager.py:68] detokenization recv req id 8 cost time 0.10978221893310547 s -DEBUG 06-24 20:21:40 [manager.py:391] Prefill Batch: batch_id=148455892889619662892698805540155674336, time:1750767700.8962057s req_ids:[8] -DEBUG 06-24 20:21:40 [manager.py:391] -ERROR 06-24 20:21:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:207.58819580078125ms total_cost_time:207.63158798217773ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10123 prompt_cache_len:5151 prompt_cache_ratio:0.5088412525931049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 -DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.10893988609313965 s -INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.11081552505493164 s -DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=151584382138132060594754411575380425124, time:1750767701.1122565s req_ids:[8] -DEBUG 06-24 20:21:41 [manager.py:391] -ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:40 lightllm_req_id:8 first_token_cost:208.77742767333984ms total_cost_time:208.82272720336914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10124 prompt_cache_len:5151 prompt_cache_ratio:0.5087909917028842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 -DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.1078805923461914 s -INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.10974955558776855 s -DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=72702485005993852021724459423577230947, time:1750767701.3263645s req_ids:[8] -DEBUG 06-24 20:21:41 [manager.py:391] -ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 first_token_cost:208.78863334655762ms total_cost_time:208.8322639465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10125 prompt_cache_len:5151 prompt_cache_ratio:0.5087407407407407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 -DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.10910773277282715 s -INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.11106157302856445 s -DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=105555294745046929330579415626330631802, time:1750767701.5405877s req_ids:[8] -DEBUG 06-24 20:21:41 [manager.py:391] -DEBUG 06-24 20:21:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 44154.760 tokens/s -DEBUG 06-24 20:21:41 [stats.py:37] Avg prompt tokens throughput: 44146.121 tokens/s -DEBUG 06-24 20:21:41 [stats.py:37] Avg generate tokens throughput: 8.639 tokens/s -ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 first_token_cost:202.1503448486328ms total_cost_time:202.1937370300293ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10126 prompt_cache_len:5151 prompt_cache_ratio:0.5086904997037329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 -DEBUG 06-24 20:21:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:41 [manager.py:224] router recive req id 8 cost time 0.10892009735107422 s -INFO 06-24 20:21:41 [manager.py:68] detokenization recv req id 8 cost time 0.11082744598388672 s -DEBUG 06-24 20:21:41 [manager.py:391] Prefill Batch: batch_id=293805536961258826844948065298290622296, time:1750767701.7486289s req_ids:[8] -DEBUG 06-24 20:21:41 [manager.py:391] -ERROR 06-24 20:21:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:41 lightllm_req_id:8 first_token_cost:368.31116676330566ms total_cost_time:368.35622787475586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10127 prompt_cache_len:5151 prompt_cache_ratio:0.5086402685889208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 -DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.1084134578704834 s -INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.11037063598632812 s -DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=54144249806072607201261505484498455589, time:1750767702.1220245s req_ids:[8] -DEBUG 06-24 20:21:42 [manager.py:391] -ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:204.67066764831543ms total_cost_time:204.71549034118652ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10128 prompt_cache_len:5151 prompt_cache_ratio:0.5085900473933649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 -DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10775375366210938 s -INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.10958075523376465 s -DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=324461330008477282178186543620538000204, time:1750767702.3337145s req_ids:[8] -DEBUG 06-24 20:21:42 [manager.py:391] -ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:164.83092308044434ms total_cost_time:164.8707389831543ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10129 prompt_cache_len:5151 prompt_cache_ratio:0.5085398361141278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 -DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10750579833984375 s -INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.10938525199890137 s -DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=120654422921445400278997315909668341801, time:1750767702.5026917s req_ids:[8] -DEBUG 06-24 20:21:42 [manager.py:391] -ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:196.3038444519043ms total_cost_time:196.37203216552734ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:10130 prompt_cache_len:5151 prompt_cache_ratio:0.5084896347482725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 -DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10912394523620605 s -INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.11092329025268555 s -DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=81235468923126583492175219618464557839, time:1750767702.7066839s req_ids:[8] -DEBUG 06-24 20:21:42 [manager.py:391] -ERROR 06-24 20:21:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:201.83563232421875ms total_cost_time:201.88331604003906ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:10131 prompt_cache_len:5151 prompt_cache_ratio:0.5084394432928635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 -DEBUG 06-24 20:21:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:42 [manager.py:224] router recive req id 8 cost time 0.10843157768249512 s -INFO 06-24 20:21:42 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s -DEBUG 06-24 20:21:42 [manager.py:391] Prefill Batch: batch_id=240529802433699469972229452236451614887, time:1750767702.9152942s req_ids:[8] -DEBUG 06-24 20:21:42 [manager.py:391] -DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:42 lightllm_req_id:8 first_token_cost:206.8033218383789ms total_cost_time:206.8467140197754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10132 prompt_cache_len:5151 prompt_cache_ratio:0.5083892617449665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 -DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.10851168632507324 s -INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.1104578971862793 s -DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=195755025892400101355005074492519400520, time:1750767703.1259995s req_ids:[8] -DEBUG 06-24 20:21:43 [manager.py:391] -ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:205.20639419555664ms total_cost_time:205.2445411682129ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:10133 prompt_cache_len:5151 prompt_cache_ratio:0.5083390901016481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 -DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.10856986045837402 s -INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.11049890518188477 s -DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=320273511243223078608185442378006472281, time:1750767703.3398168s req_ids:[8] -DEBUG 06-24 20:21:43 [manager.py:391] -ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:206.93373680114746ms total_cost_time:206.97617530822754ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10134 prompt_cache_len:5151 prompt_cache_ratio:0.5082889283599763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 -DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.20868420600891113 s -INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.21079039573669434 s -DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=169960743377358416228472878042144367918, time:1750767703.682854s req_ids:[8] -DEBUG 06-24 20:21:43 [manager.py:391] -ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:327.3181915283203ms total_cost_time:327.3634910583496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10135 prompt_cache_len:5151 prompt_cache_ratio:0.5082387765170202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 -DEBUG 06-24 20:21:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:43 [manager.py:224] router recive req id 8 cost time 0.10889816284179688 s -INFO 06-24 20:21:43 [manager.py:68] detokenization recv req id 8 cost time 0.11055850982666016 s -DEBUG 06-24 20:21:43 [manager.py:391] Prefill Batch: batch_id=262609319509659942003145030118556452624, time:1750767703.8846385s req_ids:[8] -DEBUG 06-24 20:21:43 [manager.py:391] -ERROR 06-24 20:21:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:207.2441577911377ms total_cost_time:207.28683471679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10136 prompt_cache_len:5151 prompt_cache_ratio:0.5081886345698501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 -DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10828232765197754 s -INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.10981082916259766 s -DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=123312667081070237568459580423686935075, time:1750767704.0987656s req_ids:[8] -DEBUG 06-24 20:21:44 [manager.py:391] -ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:43 lightllm_req_id:8 first_token_cost:207.00335502624512ms total_cost_time:207.0634365081787ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:10137 prompt_cache_len:5151 prompt_cache_ratio:0.5081385025155372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 -DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10768485069274902 s -INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s -DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=40495947664485255597714034583587539764, time:1750767704.317576s req_ids:[8] -DEBUG 06-24 20:21:44 [manager.py:391] -ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:216.63331985473633ms total_cost_time:216.6762351989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10138 prompt_cache_len:5151 prompt_cache_ratio:0.5080883803511541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 -DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10746121406555176 s -INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.10952425003051758 s -DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=172981181791695416483957084520320403600, time:1750767704.5340722s req_ids:[8] -DEBUG 06-24 20:21:44 [manager.py:391] -ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:210.00313758850098ms total_cost_time:210.04867553710938ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10139 prompt_cache_len:5151 prompt_cache_ratio:0.5080382680737745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 -DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.1082763671875 s -INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.11029863357543945 s -DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=131169783785352733046111397100057899373, time:1750767704.7483761s req_ids:[8] -DEBUG 06-24 20:21:44 [manager.py:391] -ERROR 06-24 20:21:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:207.55743980407715ms total_cost_time:207.60226249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10140 prompt_cache_len:5151 prompt_cache_ratio:0.5079881656804733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 -DEBUG 06-24 20:21:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:44 [manager.py:224] router recive req id 8 cost time 0.10889291763305664 s -INFO 06-24 20:21:44 [manager.py:68] detokenization recv req id 8 cost time 0.11079955101013184 s -DEBUG 06-24 20:21:44 [manager.py:391] Prefill Batch: batch_id=115811425222702606777810986042052353041, time:1750767704.9646556s req_ids:[8] -DEBUG 06-24 20:21:44 [manager.py:391] -ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:44 lightllm_req_id:8 first_token_cost:209.2571258544922ms total_cost_time:209.30099487304688ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10141 prompt_cache_len:5151 prompt_cache_ratio:0.5079380731683266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 -DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10887289047241211 s -INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.11075782775878906 s -DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=132508977081290864847569260779378056284, time:1750767705.179054s req_ids:[8] -DEBUG 06-24 20:21:45 [manager.py:391] -ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:375.38719177246094ms total_cost_time:375.43177604675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10142 prompt_cache_len:5151 prompt_cache_ratio:0.5078879905344114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 -DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:45 [batch.py:51] router release req id 8 -INFO 06-24 20:21:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10858511924743652 s -INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.11038851737976074 s -DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=22145420406558370586398438984783546823, time:1750767705.5566025s req_ids:[8] -DEBUG 06-24 20:21:45 [manager.py:391] -ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:161.56554222106934ms total_cost_time:161.6058349609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10143 prompt_cache_len:5151 prompt_cache_ratio:0.507837917775806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 -DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10735464096069336 s -INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.1092376708984375 s -DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=313712040281680162188102139771420179062, time:1750767705.7251568s req_ids:[8] -DEBUG 06-24 20:21:45 [manager.py:391] -ERROR 06-24 20:21:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:196.5653896331787ms total_cost_time:196.6094970703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10144 prompt_cache_len:5151 prompt_cache_ratio:0.5077878548895899 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 -DEBUG 06-24 20:21:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:45 [manager.py:224] router recive req id 8 cost time 0.10772466659545898 s -INFO 06-24 20:21:45 [manager.py:68] detokenization recv req id 8 cost time 0.10976719856262207 s -DEBUG 06-24 20:21:45 [manager.py:391] Prefill Batch: batch_id=330362428935782045468292297518601909011, time:1750767705.928117s req_ids:[8] -DEBUG 06-24 20:21:45 [manager.py:391] -ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:45 lightllm_req_id:8 first_token_cost:206.37917518615723ms total_cost_time:206.42423629760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10145 prompt_cache_len:5151 prompt_cache_ratio:0.5077378018728438 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 -DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10794758796691895 s -INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.10994434356689453 s -DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=29180433877557666447985475617404811289, time:1750767706.143307s req_ids:[8] -DEBUG 06-24 20:21:46 [manager.py:391] -ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:210.16263961791992ms total_cost_time:210.2072238922119ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10146 prompt_cache_len:5151 prompt_cache_ratio:0.5076877587226494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 -DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10790538787841797 s -INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.10986089706420898 s -DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=274659681424589417645776321804664797426, time:1750767706.3585832s req_ids:[8] -DEBUG 06-24 20:21:46 [manager.py:391] -ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:208.37163925170898ms total_cost_time:208.41574668884277ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10147 prompt_cache_len:5151 prompt_cache_ratio:0.5076377254360895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 -DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10791230201721191 s -INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.10975241661071777 s -DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=329676922568742266876793498249649662445, time:1750767706.570521s req_ids:[8] -DEBUG 06-24 20:21:46 [manager.py:391] -ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:202.3310661315918ms total_cost_time:202.37445831298828ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10148 prompt_cache_len:5151 prompt_cache_ratio:0.5075877020102483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 -DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.11007261276245117 s -INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.11208224296569824 s -DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=108188104191186310020818999519498292593, time:1750767706.778726s req_ids:[8] -DEBUG 06-24 20:21:46 [manager.py:391] -ERROR 06-24 20:21:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:202.4984359741211ms total_cost_time:202.54039764404297ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10149 prompt_cache_len:5151 prompt_cache_ratio:0.507537688442211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 -DEBUG 06-24 20:21:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:46 [batch.py:51] router release req id 8 -INFO 06-24 20:21:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:46 [manager.py:224] router recive req id 8 cost time 0.10917258262634277 s -INFO 06-24 20:21:46 [manager.py:68] detokenization recv req id 8 cost time 0.11107277870178223 s -DEBUG 06-24 20:21:46 [manager.py:391] Prefill Batch: batch_id=192491815574463681595600070727692657857, time:1750767706.9876206s req_ids:[8] -DEBUG 06-24 20:21:46 [manager.py:391] -ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:46 lightllm_req_id:8 first_token_cost:371.40846252441406ms total_cost_time:371.45447731018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10150 prompt_cache_len:5151 prompt_cache_ratio:0.507487684729064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 -DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s -INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.1108701229095459 s -DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=221824144923647474597465254393548597416, time:1750767707.3654897s req_ids:[8] -DEBUG 06-24 20:21:47 [manager.py:391] -DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:203.88174057006836ms total_cost_time:203.92584800720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10151 prompt_cache_len:5151 prompt_cache_ratio:0.5074376908678948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 -DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10907578468322754 s -INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.11089920997619629 s -DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=55690150147460318077901751025777496396, time:1750767707.5768464s req_ids:[8] -DEBUG 06-24 20:21:47 [manager.py:391] -ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:204.41770553588867ms total_cost_time:204.45847511291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10152 prompt_cache_len:5151 prompt_cache_ratio:0.5073877068557919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 -DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10849905014038086 s -INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.11039566993713379 s -DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=114600798920360626666962762159538598071, time:1750767707.7860758s req_ids:[8] -DEBUG 06-24 20:21:47 [manager.py:391] -ERROR 06-24 20:21:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:203.47833633422852ms total_cost_time:203.5236358642578ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10153 prompt_cache_len:5151 prompt_cache_ratio:0.5073377326898454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 -DEBUG 06-24 20:21:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:47 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s -INFO 06-24 20:21:47 [manager.py:68] detokenization recv req id 8 cost time 0.11026263236999512 s -DEBUG 06-24 20:21:47 [manager.py:391] Prefill Batch: batch_id=264635228822724950254656684438632077668, time:1750767707.9974406s req_ids:[8] -DEBUG 06-24 20:21:47 [manager.py:391] -ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:47 lightllm_req_id:8 first_token_cost:211.17758750915527ms total_cost_time:211.22312545776367ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10154 prompt_cache_len:5151 prompt_cache_ratio:0.507287768367146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 -DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10880494117736816 s -INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.1108999252319336 s -DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=236765120341545939154367353873775367629, time:1750767708.2189193s req_ids:[8] -DEBUG 06-24 20:21:48 [manager.py:391] -ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:213.22917938232422ms total_cost_time:213.2742404937744ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10155 prompt_cache_len:5151 prompt_cache_ratio:0.5072378138847858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 -DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10959672927856445 s -INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.11145448684692383 s -DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=91883959434081743059970374307395749092, time:1750767708.4319596s req_ids:[8] -DEBUG 06-24 20:21:48 [manager.py:391] -ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:204.99944686889648ms total_cost_time:205.04474639892578ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10156 prompt_cache_len:5151 prompt_cache_ratio:0.5071878692398583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 -DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10863447189331055 s -INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.11058473587036133 s -DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=326062611992093738680094295157482954815, time:1750767708.6449058s req_ids:[8] -DEBUG 06-24 20:21:48 [manager.py:391] -ERROR 06-24 20:21:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:206.8791389465332ms total_cost_time:206.9227695465088ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10157 prompt_cache_len:5151 prompt_cache_ratio:0.5071379344294575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 -DEBUG 06-24 20:21:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:48 [manager.py:224] router recive req id 8 cost time 0.10817289352416992 s -INFO 06-24 20:21:48 [manager.py:68] detokenization recv req id 8 cost time 0.11031389236450195 s -DEBUG 06-24 20:21:48 [manager.py:391] Prefill Batch: batch_id=121666342581204545602804745496802067528, time:1750767708.8547683s req_ids:[8] -DEBUG 06-24 20:21:48 [manager.py:391] -ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:48 lightllm_req_id:8 first_token_cost:362.31517791748047ms total_cost_time:362.35904693603516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10158 prompt_cache_len:5151 prompt_cache_ratio:0.5070880094506792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 -DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10782313346862793 s -INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.10983729362487793 s -DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=200214342817685868408717861021666873381, time:1750767709.2235832s req_ids:[8] -DEBUG 06-24 20:21:49 [manager.py:391] -ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:197.64089584350586ms total_cost_time:197.68595695495605ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10159 prompt_cache_len:5151 prompt_cache_ratio:0.5070380943006202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 -DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10890889167785645 s -INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.11096453666687012 s -DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=107399705044772354379231881816944385808, time:1750767709.425727s req_ids:[8] -DEBUG 06-24 20:21:49 [manager.py:391] -ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:204.38909530639648ms total_cost_time:204.43367958068848ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10160 prompt_cache_len:5151 prompt_cache_ratio:0.506988188976378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 -DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10809540748596191 s -INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.10934281349182129 s -DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=39171556448706185149996231046413106475, time:1750767709.6371737s req_ids:[8] -DEBUG 06-24 20:21:49 [manager.py:391] -ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:206.4363956451416ms total_cost_time:206.47954940795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10161 prompt_cache_len:5151 prompt_cache_ratio:0.5069382934750517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 -DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:49 [manager.py:224] router recive req id 8 cost time 0.10760021209716797 s -INFO 06-24 20:21:49 [manager.py:68] detokenization recv req id 8 cost time 0.10950660705566406 s -DEBUG 06-24 20:21:49 [manager.py:391] Prefill Batch: batch_id=246151613058528285861338106235448375026, time:1750767709.8503969s req_ids:[8] -DEBUG 06-24 20:21:49 [manager.py:391] -ERROR 06-24 20:21:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:203.50265502929688ms total_cost_time:203.54723930358887ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10162 prompt_cache_len:5151 prompt_cache_ratio:0.5068884077937413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 -DEBUG 06-24 20:21:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.10870170593261719 s -INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.11012125015258789 s -DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=62611630133935355224573563501472198736, time:1750767710.0587509s req_ids:[8] -DEBUG 06-24 20:21:50 [manager.py:391] -ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:49 lightllm_req_id:8 first_token_cost:204.8792839050293ms total_cost_time:204.9243450164795ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10163 prompt_cache_len:5151 prompt_cache_ratio:0.5068385319295483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 -DEBUG 06-24 20:21:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.10873198509216309 s -INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.11067581176757812 s -DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=95319955804261135385267686831907450335, time:1750767710.2726073s req_ids:[8] -DEBUG 06-24 20:21:50 [manager.py:391] -ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 first_token_cost:209.1991901397705ms total_cost_time:209.244966506958ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10164 prompt_cache_len:5151 prompt_cache_ratio:0.5067886658795749 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 -DEBUG 06-24 20:21:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.10759711265563965 s -INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.10939288139343262 s -DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=143161653485812012324160082007040947348, time:1750767710.4848094s req_ids:[8] -DEBUG 06-24 20:21:50 [manager.py:391] -ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 first_token_cost:205.43885231018066ms total_cost_time:205.48248291015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10165 prompt_cache_len:5151 prompt_cache_ratio:0.5067388096409248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 -DEBUG 06-24 20:21:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:50 [manager.py:224] router recive req id 8 cost time 0.31098198890686035 s -INFO 06-24 20:21:50 [manager.py:68] detokenization recv req id 8 cost time 0.312960147857666 s -DEBUG 06-24 20:21:50 [manager.py:391] Prefill Batch: batch_id=59417629579731564947259900426237265669, time:1750767710.90077s req_ids:[8] -DEBUG 06-24 20:21:50 [manager.py:391] -ERROR 06-24 20:21:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:50 lightllm_req_id:8 first_token_cost:418.19214820861816ms total_cost_time:418.23816299438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10166 prompt_cache_len:5151 prompt_cache_ratio:0.5066889632107023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 -DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10958170890808105 s -INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11151480674743652 s -DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=181690066750847179644252591866895893861, time:1750767711.123757s req_ids:[8] -DEBUG 06-24 20:21:51 [manager.py:391] -ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:210.12187004089355ms total_cost_time:210.16716957092285ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10167 prompt_cache_len:5151 prompt_cache_ratio:0.5066391265860136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 -DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10908341407775879 s -INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11112666130065918 s -DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=141108360591117724241443149582779438395, time:1750767711.3396575s req_ids:[8] -DEBUG 06-24 20:21:51 [manager.py:391] -ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:209.06782150268555ms total_cost_time:209.11216735839844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10168 prompt_cache_len:5151 prompt_cache_ratio:0.5065892997639654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 -DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10872173309326172 s -INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11089777946472168 s -DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=306874814561215855798336839244601092985, time:1750767711.5562913s req_ids:[8] -DEBUG 06-24 20:21:51 [manager.py:391] -DEBUG 06-24 20:21:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 43575.933 tokens/s -DEBUG 06-24 20:21:51 [stats.py:37] Avg prompt tokens throughput: 43567.347 tokens/s -DEBUG 06-24 20:21:51 [stats.py:37] Avg generate tokens throughput: 8.586 tokens/s -ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:210.4470729827881ms total_cost_time:210.48951148986816ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10169 prompt_cache_len:5151 prompt_cache_ratio:0.5065394827416658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 -DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.10847735404968262 s -INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11040425300598145 s -DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=110390041671043797051975586016149013721, time:1750767711.7702425s req_ids:[8] -DEBUG 06-24 20:21:51 [manager.py:391] -ERROR 06-24 20:21:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:206.07233047485352ms total_cost_time:206.1169147491455ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10170 prompt_cache_len:5151 prompt_cache_ratio:0.5064896755162241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 -DEBUG 06-24 20:21:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:51 [manager.py:224] router recive req id 8 cost time 0.1081244945526123 s -INFO 06-24 20:21:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000418663024902 s -DEBUG 06-24 20:21:51 [manager.py:391] Prefill Batch: batch_id=207589740932310923478679326210970218798, time:1750767711.9834073s req_ids:[8] -DEBUG 06-24 20:21:51 [manager.py:391] -ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:51 lightllm_req_id:8 first_token_cost:209.122896194458ms total_cost_time:209.16748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10171 prompt_cache_len:5151 prompt_cache_ratio:0.5064398780847508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 -DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:52 [manager.py:224] router recive req id 8 cost time 0.10817599296569824 s -INFO 06-24 20:21:52 [manager.py:68] detokenization recv req id 8 cost time 0.11030149459838867 s -DEBUG 06-24 20:21:52 [manager.py:391] Prefill Batch: batch_id=250082861282191818878850867351228700242, time:1750767712.1983495s req_ids:[8] -DEBUG 06-24 20:21:52 [manager.py:391] -ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:206.9263458251953ms total_cost_time:206.9690227508545ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10172 prompt_cache_len:5151 prompt_cache_ratio:0.506390090444357 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 -DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:52 [manager.py:224] router recive req id 8 cost time 0.10918068885803223 s -INFO 06-24 20:21:52 [manager.py:68] detokenization recv req id 8 cost time 0.11125326156616211 s -DEBUG 06-24 20:21:52 [manager.py:391] Prefill Batch: batch_id=281921791089613627703830368274671382019, time:1750767712.413499s req_ids:[8] -DEBUG 06-24 20:21:52 [manager.py:391] -ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:210.07466316223145ms total_cost_time:210.11948585510254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10173 prompt_cache_len:5151 prompt_cache_ratio:0.5063403125921557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 -DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:52 [manager.py:224] router recive req id 8 cost time 0.3103041648864746 s -INFO 06-24 20:21:52 [manager.py:68] detokenization recv req id 8 cost time 0.3122215270996094 s -DEBUG 06-24 20:21:52 [manager.py:391] Prefill Batch: batch_id=132038998096165543760404663160512558533, time:1750767712.8346462s req_ids:[8] -DEBUG 06-24 20:21:52 [manager.py:391] -ERROR 06-24 20:21:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:408.4193706512451ms total_cost_time:408.4639549255371ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10174 prompt_cache_len:5151 prompt_cache_ratio:0.5062905445252605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 -DEBUG 06-24 20:21:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10894083976745605 s -INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11083841323852539 s -DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=217183463166192805370728066747468332152, time:1750767713.0422575s req_ids:[8] -DEBUG 06-24 20:21:53 [manager.py:391] -ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:52 lightllm_req_id:8 first_token_cost:209.35463905334473ms total_cost_time:209.41567420959473ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10175 prompt_cache_len:5151 prompt_cache_ratio:0.5062407862407863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 -DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:53 [batch.py:51] router release req id 8 -INFO 06-24 20:21:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10963582992553711 s -INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11186671257019043 s -DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=239194813887653020895748570470620342739, time:1750767713.2581968s req_ids:[8] -DEBUG 06-24 20:21:53 [manager.py:391] -ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:206.18891716003418ms total_cost_time:206.23445510864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10176 prompt_cache_len:5151 prompt_cache_ratio:0.5061910377358491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 -DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10833549499511719 s -INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s -DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=5937566445910207894640637634133177337, time:1750767713.4691164s req_ids:[8] -DEBUG 06-24 20:21:53 [manager.py:391] -ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:206.5727710723877ms total_cost_time:206.618070602417ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10177 prompt_cache_len:5151 prompt_cache_ratio:0.5061412990075661 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 -DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.1075894832611084 s -INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.10971879959106445 s -DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=54947203383929137795381133179969680500, time:1750767713.6841764s req_ids:[8] -DEBUG 06-24 20:21:53 [manager.py:391] -ERROR 06-24 20:21:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:208.77861976623535ms total_cost_time:208.80675315856934ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:10178 prompt_cache_len:5151 prompt_cache_ratio:0.5060915700530556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 -DEBUG 06-24 20:21:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:53 [manager.py:224] router recive req id 8 cost time 0.10849404335021973 s -INFO 06-24 20:21:53 [manager.py:68] detokenization recv req id 8 cost time 0.11066031455993652 s -DEBUG 06-24 20:21:53 [manager.py:391] Prefill Batch: batch_id=327989025836206708986247450474680594918, time:1750767713.899737s req_ids:[8] -DEBUG 06-24 20:21:53 [manager.py:391] -ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:53 lightllm_req_id:8 first_token_cost:223.9692211151123ms total_cost_time:224.0147590637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10179 prompt_cache_len:5151 prompt_cache_ratio:0.5060418508694371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 -DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10809850692749023 s -INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.11019420623779297 s -DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=154274389491382623239164633297486486447, time:1750767714.148191s req_ids:[8] -DEBUG 06-24 20:21:54 [manager.py:391] -ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:219.62237358093262ms total_cost_time:219.6669578552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10180 prompt_cache_len:5151 prompt_cache_ratio:0.5059921414538311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 -DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10608458518981934 s -INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.1078493595123291 s -DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=128063657254144586533000277854839495315, time:1750767714.351337s req_ids:[8] -DEBUG 06-24 20:21:54 [manager.py:391] -ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:335.7722759246826ms total_cost_time:335.8170986175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10181 prompt_cache_len:5151 prompt_cache_ratio:0.5059424418033592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 -DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10908198356628418 s -INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.1113426685333252 s -DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=279689998078639681657998157505518402627, time:1750767714.6907172s req_ids:[8] -DEBUG 06-24 20:21:54 [manager.py:391] -DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:21:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:195.67298889160156ms total_cost_time:195.72043418884277ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10182 prompt_cache_len:5151 prompt_cache_ratio:0.5058927519151444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 -DEBUG 06-24 20:21:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:54 [manager.py:224] router recive req id 8 cost time 0.10828709602355957 s -INFO 06-24 20:21:54 [manager.py:68] detokenization recv req id 8 cost time 0.11037468910217285 s -DEBUG 06-24 20:21:54 [manager.py:391] Prefill Batch: batch_id=189583021769601880643778622638744457130, time:1750767714.8958888s req_ids:[8] -DEBUG 06-24 20:21:54 [manager.py:391] -ERROR 06-24 20:21:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:207.6132297515869ms total_cost_time:207.65948295593262ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10183 prompt_cache_len:5151 prompt_cache_ratio:0.5058430717863105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 -DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.1076357364654541 s -INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.10966849327087402 s -DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=149850188785011090092291004190266491427, time:1750767715.1082509s req_ids:[8] -DEBUG 06-24 20:21:55 [manager.py:391] -ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:54 lightllm_req_id:8 first_token_cost:209.34343338012695ms total_cost_time:209.38801765441895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10184 prompt_cache_len:5151 prompt_cache_ratio:0.5057934014139828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 -DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s -INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.10986065864562988 s -DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=120125875770447005251387818846730992887, time:1750767715.3284495s req_ids:[8] -DEBUG 06-24 20:21:55 [manager.py:391] -ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:209.94257926940918ms total_cost_time:209.98764038085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10185 prompt_cache_len:5151 prompt_cache_ratio:0.5057437407952872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 -DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s -INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.11026573181152344 s -DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=38533931238485029816214169885775389096, time:1750767715.5410726s req_ids:[8] -DEBUG 06-24 20:21:55 [manager.py:391] -ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:210.4494571685791ms total_cost_time:210.5097770690918ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:10186 prompt_cache_len:5151 prompt_cache_ratio:0.5056940899273513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 -DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.1103827953338623 s -INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.11236381530761719 s -DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=47294859779598998354675238836512849143, time:1750767715.7572296s req_ids:[8] -DEBUG 06-24 20:21:55 [manager.py:391] -ERROR 06-24 20:21:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:167.8328514099121ms total_cost_time:167.87481307983398ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10187 prompt_cache_len:5151 prompt_cache_ratio:0.5056444488073034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 -DEBUG 06-24 20:21:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:55 [manager.py:224] router recive req id 8 cost time 0.10814785957336426 s -INFO 06-24 20:21:55 [manager.py:68] detokenization recv req id 8 cost time 0.11013913154602051 s -DEBUG 06-24 20:21:55 [manager.py:391] Prefill Batch: batch_id=183486134893157251971364779623943165028, time:1750767715.9320982s req_ids:[8] -DEBUG 06-24 20:21:55 [manager.py:391] -ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:55 lightllm_req_id:8 first_token_cost:198.73499870300293ms total_cost_time:198.78149032592773ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10188 prompt_cache_len:5151 prompt_cache_ratio:0.5055948174322733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 -DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.10853099822998047 s -INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.11054348945617676 s -DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=184456700281138368338089968995373025651, time:1750767716.1349926s req_ids:[8] -DEBUG 06-24 20:21:56 [manager.py:391] -ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:204.64229583740234ms total_cost_time:204.68640327453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10189 prompt_cache_len:5151 prompt_cache_ratio:0.5055451957993915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 -DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:21:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.3071279525756836 s -INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.3089561462402344 s -DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=119639971667094751148004675908539669559, time:1750767716.5551603s req_ids:[8] -DEBUG 06-24 20:21:56 [manager.py:391] -ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:373.9924430847168ms total_cost_time:374.0499019622803ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:10190 prompt_cache_len:5151 prompt_cache_ratio:0.50549558390579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 -DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.10834789276123047 s -INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.11015009880065918 s -DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=228065973816863935871663455940005107890, time:1750767716.724513s req_ids:[8] -DEBUG 06-24 20:21:56 [manager.py:391] -ERROR 06-24 20:21:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:197.0040798187256ms total_cost_time:197.0505714416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10191 prompt_cache_len:5151 prompt_cache_ratio:0.5054459817486017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 -DEBUG 06-24 20:21:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:56 [manager.py:224] router recive req id 8 cost time 0.10836172103881836 s -INFO 06-24 20:21:56 [manager.py:68] detokenization recv req id 8 cost time 0.10995745658874512 s -DEBUG 06-24 20:21:56 [manager.py:391] Prefill Batch: batch_id=127021827906575341326721937884342472059, time:1750767716.927526s req_ids:[8] -DEBUG 06-24 20:21:56 [manager.py:391] -ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:56 lightllm_req_id:8 first_token_cost:204.01668548583984ms total_cost_time:204.06031608581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10192 prompt_cache_len:5151 prompt_cache_ratio:0.5053963893249608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 -DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.10954976081848145 s -INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.11158561706542969 s -DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=191153412754150930387837211417413830895, time:1750767717.1404629s req_ids:[8] -DEBUG 06-24 20:21:57 [manager.py:391] -ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:207.9176902770996ms total_cost_time:208.0233097076416ms,out_token_counter:1 mean_per_token_cost_time: 0.10561943054199219ms prompt_token_num:10193 prompt_cache_len:5151 prompt_cache_ratio:0.5053468066320024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 -DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.1095125675201416 s -INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.1115577220916748 s -DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=151890497980791179063594521867435290218, time:1750767717.3539455s req_ids:[8] -DEBUG 06-24 20:21:57 [manager.py:391] -ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:205.93667030334473ms total_cost_time:205.98101615905762ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10194 prompt_cache_len:5151 prompt_cache_ratio:0.5052972336668629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 -DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.1084756851196289 s -INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.11057162284851074 s -DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=324985861580833485035245486627321442665, time:1750767717.5661154s req_ids:[8] -DEBUG 06-24 20:21:57 [manager.py:391] -ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:204.72359657287598ms total_cost_time:204.78200912475586ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:10195 prompt_cache_len:5151 prompt_cache_ratio:0.5052476704266797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 -DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s -INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098184585571289 s -DEBUG 06-24 20:21:57 [manager.py:391] Prefill Batch: batch_id=334435263317403939663933301836415221103, time:1750767717.7864873s req_ids:[8] -DEBUG 06-24 20:21:57 [manager.py:391] -ERROR 06-24 20:21:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:218.4736728668213ms total_cost_time:218.5196876525879ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10196 prompt_cache_len:5151 prompt_cache_ratio:0.5051981169085916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 -DEBUG 06-24 20:21:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:57 [manager.py:224] router recive req id 8 cost time 0.10912394523620605 s -INFO 06-24 20:21:57 [manager.py:68] detokenization recv req id 8 cost time 0.11124157905578613 s -DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=130574240565199710585131735138744884282, time:1750767718.0032194s req_ids:[8] -DEBUG 06-24 20:21:58 [manager.py:391] -ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:57 lightllm_req_id:8 first_token_cost:377.35724449157715ms total_cost_time:377.40278244018555ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10197 prompt_cache_len:5151 prompt_cache_ratio:0.5051485731097382 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 -DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:58 [manager.py:224] router recive req id 8 cost time 0.10932350158691406 s -INFO 06-24 20:21:58 [manager.py:68] detokenization recv req id 8 cost time 0.11142802238464355 s -DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=157090004188417983524334565221009191323, time:1750767718.3862798s req_ids:[8] -DEBUG 06-24 20:21:58 [manager.py:391] -ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:207.35406875610352ms total_cost_time:207.3974609375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10198 prompt_cache_len:5151 prompt_cache_ratio:0.5050990390272603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 -DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:58 [manager.py:224] router recive req id 8 cost time 0.10788917541503906 s -INFO 06-24 20:21:58 [manager.py:68] detokenization recv req id 8 cost time 0.10983967781066895 s -DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=316010042423684139405218087127307100707, time:1750767718.6064842s req_ids:[8] -DEBUG 06-24 20:21:58 [manager.py:391] -ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:215.00778198242188ms total_cost_time:215.05260467529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10199 prompt_cache_len:5151 prompt_cache_ratio:0.5050495146582998 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 -DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:58 [manager.py:224] router recive req id 8 cost time 0.10806822776794434 s -INFO 06-24 20:21:58 [manager.py:68] detokenization recv req id 8 cost time 0.11021280288696289 s -DEBUG 06-24 20:21:58 [manager.py:391] Prefill Batch: batch_id=224647700800854691780972548085507260734, time:1750767718.822453s req_ids:[8] -DEBUG 06-24 20:21:58 [manager.py:391] -ERROR 06-24 20:21:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:206.41469955444336ms total_cost_time:206.45976066589355ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10200 prompt_cache_len:5151 prompt_cache_ratio:0.505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 -DEBUG 06-24 20:21:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10800766944885254 s -INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.11006498336791992 s -DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=147427983136895024557591037632266843627, time:1750767719.0338118s req_ids:[8] -DEBUG 06-24 20:21:59 [manager.py:391] -ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:58 lightllm_req_id:8 first_token_cost:207.31115341186523ms total_cost_time:207.35645294189453ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10201 prompt_cache_len:5151 prompt_cache_ratio:0.504950495049505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 -DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10795950889587402 s -INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.11064958572387695 s -DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=260332699668594343608360380236410457888, time:1750767719.2463562s req_ids:[8] -DEBUG 06-24 20:21:59 [manager.py:391] -ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:204.1494846343994ms total_cost_time:204.1945457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10202 prompt_cache_len:5151 prompt_cache_ratio:0.50490099980396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 -DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10875129699707031 s -INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.11070466041564941 s -DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=260230835040301276240579915124230505773, time:1750767719.4573379s req_ids:[8] -DEBUG 06-24 20:21:59 [manager.py:391] -ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:208.8601589202881ms total_cost_time:208.90402793884277ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10203 prompt_cache_len:5151 prompt_cache_ratio:0.5048515142605117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 -DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10782957077026367 s -INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.10998201370239258 s -DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=153313817155264135467096212655365307807, time:1750767719.6713607s req_ids:[8] -DEBUG 06-24 20:21:59 [manager.py:391] -ERROR 06-24 20:21:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:21:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:21:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:204.4847011566162ms total_cost_time:204.5295238494873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10204 prompt_cache_len:5151 prompt_cache_ratio:0.5048020384163073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:21:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 -DEBUG 06-24 20:21:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:21:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:21:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:21:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:21:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:21:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:21:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:21:59 [manager.py:224] router recive req id 8 cost time 0.10742378234863281 s -INFO 06-24 20:21:59 [manager.py:68] detokenization recv req id 8 cost time 0.10963749885559082 s -DEBUG 06-24 20:21:59 [manager.py:391] Prefill Batch: batch_id=72309810925689308879123621374612041555, time:1750767719.8825214s req_ids:[8] -DEBUG 06-24 20:21:59 [manager.py:391] -ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:21:59 lightllm_req_id:8 first_token_cost:364.6209239959717ms total_cost_time:364.68052864074707ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:10205 prompt_cache_len:5151 prompt_cache_ratio:0.5047525722684958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 -DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10870194435119629 s -INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.11061263084411621 s -DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=192109418991214771039446883347904298435, time:1750767720.2531226s req_ids:[8] -DEBUG 06-24 20:22:00 [manager.py:391] -ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:198.2593536376953ms total_cost_time:198.3034610748291ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10206 prompt_cache_len:5151 prompt_cache_ratio:0.5047031158142269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 -DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10747885704040527 s -INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.10944294929504395 s -DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=129729238269397932006761705385547293121, time:1750767720.4562047s req_ids:[8] -DEBUG 06-24 20:22:00 [manager.py:391] -ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:206.5284252166748ms total_cost_time:206.5892219543457ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:10207 prompt_cache_len:5151 prompt_cache_ratio:0.5046536690506516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 -DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10808277130126953 s -INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.1100454330444336 s -DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=254475762851233455950344337717014695834, time:1750767720.6699603s req_ids:[8] -DEBUG 06-24 20:22:00 [manager.py:391] -ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:203.61971855163574ms total_cost_time:203.66477966308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10208 prompt_cache_len:5151 prompt_cache_ratio:0.5046042319749217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 -DEBUG 06-24 20:22:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:00 [manager.py:224] router recive req id 8 cost time 0.10911417007446289 s -INFO 06-24 20:22:00 [manager.py:68] detokenization recv req id 8 cost time 0.1110391616821289 s -DEBUG 06-24 20:22:00 [manager.py:391] Prefill Batch: batch_id=10258791185918051292096629133600954734, time:1750767720.8882546s req_ids:[8] -DEBUG 06-24 20:22:00 [manager.py:391] -ERROR 06-24 20:22:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:217.56887435913086ms total_cost_time:217.61178970336914ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10209 prompt_cache_len:5151 prompt_cache_ratio:0.5045548045841904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 -DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s -INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s -DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=318605398706177215223499471910484711821, time:1750767721.104027s req_ids:[8] -DEBUG 06-24 20:22:01 [manager.py:391] -ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:00 lightllm_req_id:8 first_token_cost:203.57012748718262ms total_cost_time:203.6125659942627ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10210 prompt_cache_len:5151 prompt_cache_ratio:0.5045053868756122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 -DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.10910177230834961 s -INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.11108994483947754 s -DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=314011069988068644181715927510292270202, time:1750767721.3135314s req_ids:[8] -DEBUG 06-24 20:22:01 [manager.py:391] -ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:206.9103717803955ms total_cost_time:206.9535255432129ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10211 prompt_cache_len:5151 prompt_cache_ratio:0.5044559788463422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 -DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.10814881324768066 s -INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.11008620262145996 s -DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=303818005635972922274620839295545195667, time:1750767721.5283315s req_ids:[8] -DEBUG 06-24 20:22:01 [manager.py:391] -ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:22:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 43561.657 tokens/s -DEBUG 06-24 20:22:01 [stats.py:37] Avg prompt tokens throughput: 43553.010 tokens/s -DEBUG 06-24 20:22:01 [stats.py:37] Avg generate tokens throughput: 8.647 tokens/s -INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:211.36140823364258ms total_cost_time:211.40527725219727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10212 prompt_cache_len:5151 prompt_cache_ratio:0.504406580493537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 -DEBUG 06-24 20:22:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:01 [manager.py:224] router recive req id 8 cost time 0.1076047420501709 s -INFO 06-24 20:22:01 [manager.py:68] detokenization recv req id 8 cost time 0.10971379280090332 s -DEBUG 06-24 20:22:01 [manager.py:391] Prefill Batch: batch_id=75970751537961997592694919467677988980, time:1750767721.744334s req_ids:[8] -DEBUG 06-24 20:22:01 [manager.py:391] -DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:365.8406734466553ms total_cost_time:365.88597297668457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10213 prompt_cache_len:5151 prompt_cache_ratio:0.5043571918143542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 -DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10786557197570801 s -INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.10976457595825195 s -DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=107989291788391464910735229822148876319, time:1750767722.1124654s req_ids:[8] -DEBUG 06-24 20:22:02 [manager.py:391] -ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:01 lightllm_req_id:8 first_token_cost:203.216552734375ms total_cost_time:203.25994491577148ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10214 prompt_cache_len:5151 prompt_cache_ratio:0.5043078128059526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 -DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10857725143432617 s -INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.11061835289001465 s -DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=107974031084677637687774769860780245952, time:1750767722.3252752s req_ids:[8] -DEBUG 06-24 20:22:02 [manager.py:391] -ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:203.95374298095703ms total_cost_time:203.99951934814453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10215 prompt_cache_len:5151 prompt_cache_ratio:0.504258443465492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 -DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.1074674129486084 s -INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.10944056510925293 s -DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=287625476360597362245278457823962472121, time:1750767722.5337555s req_ids:[8] -DEBUG 06-24 20:22:02 [manager.py:391] -ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:208.1589698791504ms total_cost_time:208.20307731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10216 prompt_cache_len:5151 prompt_cache_ratio:0.5042090837901331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 -DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10928845405578613 s -INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.11115384101867676 s -DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=239985181381730499148495444469618714068, time:1750767722.747441s req_ids:[8] -DEBUG 06-24 20:22:02 [manager.py:391] -ERROR 06-24 20:22:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:206.77804946899414ms total_cost_time:206.82191848754883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10217 prompt_cache_len:5151 prompt_cache_ratio:0.5041597337770383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 -DEBUG 06-24 20:22:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:02 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s -INFO 06-24 20:22:02 [manager.py:68] detokenization recv req id 8 cost time 0.1105036735534668 s -DEBUG 06-24 20:22:02 [manager.py:391] Prefill Batch: batch_id=104676612471079284225607484851989523785, time:1750767722.960841s req_ids:[8] -DEBUG 06-24 20:22:02 [manager.py:391] -ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:02 lightllm_req_id:8 first_token_cost:204.23531532287598ms total_cost_time:204.27918434143066ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10218 prompt_cache_len:5151 prompt_cache_ratio:0.5041103934233705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 -DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.10896015167236328 s -INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.11092162132263184 s -DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=316419618625399408460749509896687761630, time:1750767723.1691246s req_ids:[8] -DEBUG 06-24 20:22:03 [manager.py:391] -ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:204.3285369873047ms total_cost_time:204.37169075012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10219 prompt_cache_len:5151 prompt_cache_ratio:0.5040610627262941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 -DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.10750508308410645 s -INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.10929751396179199 s -DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=168651627738625351384139706124913322363, time:1750767723.3800666s req_ids:[8] -DEBUG 06-24 20:22:03 [manager.py:391] -ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:165.62962532043457ms total_cost_time:165.67230224609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10220 prompt_cache_len:5151 prompt_cache_ratio:0.5040117416829746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 -DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.10802149772644043 s -INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.11001062393188477 s -DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=257351464449025043258036408178703522858, time:1750767723.5496976s req_ids:[8] -DEBUG 06-24 20:22:03 [manager.py:391] -ERROR 06-24 20:22:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:370.3761100769043ms total_cost_time:370.4209327697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10221 prompt_cache_len:5151 prompt_cache_ratio:0.5039624302905782 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 -DEBUG 06-24 20:22:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:03 [manager.py:224] router recive req id 8 cost time 0.11108255386352539 s -INFO 06-24 20:22:03 [manager.py:68] detokenization recv req id 8 cost time 0.11302518844604492 s -DEBUG 06-24 20:22:03 [manager.py:391] Prefill Batch: batch_id=174259209055580641974967543231458468488, time:1750767723.9259326s req_ids:[8] -DEBUG 06-24 20:22:03 [manager.py:391] -ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:03 lightllm_req_id:8 first_token_cost:201.6751766204834ms total_cost_time:201.7202377319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10222 prompt_cache_len:5151 prompt_cache_ratio:0.5039131285462728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 -DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.1079566478729248 s -INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.10990190505981445 s -DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=235643982268707508209970922522028557091, time:1750767724.135265s req_ids:[8] -DEBUG 06-24 20:22:04 [manager.py:391] -ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:204.97608184814453ms total_cost_time:205.00850677490234ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:10223 prompt_cache_len:5151 prompt_cache_ratio:0.5038638364472269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 -DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10766029357910156 s -INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.10965776443481445 s -DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=48476886572061274099378784083312012222, time:1750767724.3460057s req_ids:[8] -DEBUG 06-24 20:22:04 [manager.py:391] -ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:207.31544494628906ms total_cost_time:207.36026763916016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10224 prompt_cache_len:5151 prompt_cache_ratio:0.5038145539906104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 -DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10860157012939453 s -INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.1106109619140625 s -DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=200476096092235810820775155040679686310, time:1750767724.5594409s req_ids:[8] -DEBUG 06-24 20:22:04 [manager.py:391] -ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:207.6582908630371ms total_cost_time:207.7012062072754ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10225 prompt_cache_len:5151 prompt_cache_ratio:0.5037652811735941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 -DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10790777206420898 s -INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.10983657836914062 s -DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=161883859476673767464460776695429900353, time:1750767724.7730112s req_ids:[8] -DEBUG 06-24 20:22:04 [manager.py:391] -ERROR 06-24 20:22:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:209.98907089233398ms total_cost_time:210.03365516662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10226 prompt_cache_len:5151 prompt_cache_ratio:0.5037160179933503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 -DEBUG 06-24 20:22:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:04 [manager.py:224] router recive req id 8 cost time 0.10895061492919922 s -INFO 06-24 20:22:04 [manager.py:68] detokenization recv req id 8 cost time 0.11091494560241699 s -DEBUG 06-24 20:22:04 [manager.py:391] Prefill Batch: batch_id=113656605777687224642386074159050625189, time:1750767724.9906356s req_ids:[8] -DEBUG 06-24 20:22:04 [manager.py:391] -ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:04 lightllm_req_id:8 first_token_cost:209.49292182922363ms total_cost_time:209.51485633850098ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:10227 prompt_cache_len:5151 prompt_cache_ratio:0.5036667644470519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 -DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.10772204399108887 s -INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.1097116470336914 s -DEBUG 06-24 20:22:05 [manager.py:391] Prefill Batch: batch_id=42246799337171411802174871268835855027, time:1750767725.206292s req_ids:[8] -DEBUG 06-24 20:22:05 [manager.py:391] -ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:209.78355407714844ms total_cost_time:209.82861518859863ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10228 prompt_cache_len:5151 prompt_cache_ratio:0.5036175205318733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 -DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.10733532905578613 s -INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.10919785499572754 s -DEBUG 06-24 20:22:05 [manager.py:391] Prefill Batch: batch_id=8721238828797903302586038883418131499, time:1750767725.4203289s req_ids:[8] -DEBUG 06-24 20:22:05 [manager.py:391] -ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:367.6156997680664ms total_cost_time:367.6612377166748ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10229 prompt_cache_len:5151 prompt_cache_ratio:0.5035682862449897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 -DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.1078639030456543 s -INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.10989069938659668 s -DEBUG 06-24 20:22:05 [manager.py:391] Prefill Batch: batch_id=106525218158279199168253248006685147668, time:1750767725.7908587s req_ids:[8] -DEBUG 06-24 20:22:05 [manager.py:391] -ERROR 06-24 20:22:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:206.7854404449463ms total_cost_time:206.82930946350098ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10230 prompt_cache_len:5151 prompt_cache_ratio:0.5035190615835777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 -DEBUG 06-24 20:22:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:05 [manager.py:224] router recive req id 8 cost time 0.1080930233001709 s -INFO 06-24 20:22:05 [manager.py:68] detokenization recv req id 8 cost time 0.11016416549682617 s -DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=329642262179184151283354890990399705301, time:1750767726.0066218s req_ids:[8] -DEBUG 06-24 20:22:06 [manager.py:391] -ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:05 lightllm_req_id:8 first_token_cost:205.5490016937256ms total_cost_time:205.59358596801758ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10231 prompt_cache_len:5151 prompt_cache_ratio:0.5034698465448147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 -DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.1092996597290039 s -INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.1113889217376709 s -DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=233333697384713715876170146428131205256, time:1750767726.2168474s req_ids:[8] -DEBUG 06-24 20:22:06 [manager.py:391] -ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:202.1772861480713ms total_cost_time:202.2233009338379ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10232 prompt_cache_len:5151 prompt_cache_ratio:0.5034206411258796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 -DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.10860633850097656 s -INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075234413146973 s -DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=113359060590504514651390816215956225575, time:1750767726.4248521s req_ids:[8] -DEBUG 06-24 20:22:06 [manager.py:391] -ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:207.0167064666748ms total_cost_time:207.061767578125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10233 prompt_cache_len:5151 prompt_cache_ratio:0.503371445323952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 -DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.10875296592712402 s -INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.11075067520141602 s -DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=38560652732732527171251416027733831092, time:1750767726.6375473s req_ids:[8] -DEBUG 06-24 20:22:06 [manager.py:391] -ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:203.61924171447754ms total_cost_time:203.66287231445312ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10234 prompt_cache_len:5151 prompt_cache_ratio:0.5033222591362126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 -DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:06 [manager.py:224] router recive req id 8 cost time 0.10834336280822754 s -INFO 06-24 20:22:06 [manager.py:68] detokenization recv req id 8 cost time 0.1103830337524414 s -DEBUG 06-24 20:22:06 [manager.py:391] Prefill Batch: batch_id=86730363826149479647827825782195051178, time:1750767726.8478534s req_ids:[8] -DEBUG 06-24 20:22:06 [manager.py:391] -ERROR 06-24 20:22:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:208.03189277648926ms total_cost_time:208.07743072509766ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10235 prompt_cache_len:5151 prompt_cache_ratio:0.5032730825598437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 -DEBUG 06-24 20:22:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.10842704772949219 s -INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.11045241355895996 s -DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=187787795358477786349138944684288949952, time:1750767727.0620134s req_ids:[8] -DEBUG 06-24 20:22:07 [manager.py:391] -ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:06 lightllm_req_id:8 first_token_cost:206.1784267425537ms total_cost_time:206.2222957611084ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10236 prompt_cache_len:5151 prompt_cache_ratio:0.5032239155920282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 -DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.10881853103637695 s -INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.11097979545593262 s -DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=221182453411316454599036893611322321598, time:1750767727.2743232s req_ids:[8] -DEBUG 06-24 20:22:07 [manager.py:391] -ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:369.6126937866211ms total_cost_time:369.657039642334ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10237 prompt_cache_len:5151 prompt_cache_ratio:0.5031747582299502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 -DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.10831689834594727 s -INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.11027050018310547 s -DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=312775591316998553067620147458162063250, time:1750767727.6489344s req_ids:[8] -DEBUG 06-24 20:22:07 [manager.py:391] -DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:201.10249519348145ms total_cost_time:201.14731788635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10238 prompt_cache_len:5151 prompt_cache_ratio:0.5031256104707951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 -DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:07 [manager.py:224] router recive req id 8 cost time 0.1077277660369873 s -INFO 06-24 20:22:07 [manager.py:68] detokenization recv req id 8 cost time 0.1094655990600586 s -DEBUG 06-24 20:22:07 [manager.py:391] Prefill Batch: batch_id=146454716266056223197323622324079044423, time:1750767727.855233s req_ids:[8] -DEBUG 06-24 20:22:07 [manager.py:391] -ERROR 06-24 20:22:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:200.75392723083496ms total_cost_time:200.79684257507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10239 prompt_cache_len:5151 prompt_cache_ratio:0.5030764723117492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 -DEBUG 06-24 20:22:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10940408706665039 s -INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11137104034423828 s -DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=72883751692198195468150224239538313170, time:1750767728.062727s req_ids:[8] -DEBUG 06-24 20:22:08 [manager.py:391] -ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:07 lightllm_req_id:8 first_token_cost:205.93929290771484ms total_cost_time:205.98363876342773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10240 prompt_cache_len:5151 prompt_cache_ratio:0.50302734375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 -DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10640192031860352 s -INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.10761260986328125 s -DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=4868146908764873148012394317783736212, time:1750767728.2743766s req_ids:[8] -DEBUG 06-24 20:22:08 [manager.py:391] -ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:205.12866973876953ms total_cost_time:205.1718235015869ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10241 prompt_cache_len:5151 prompt_cache_ratio:0.502978224782736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 -DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10842657089233398 s -INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11036539077758789 s -DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=200504998753112038203372185856995901251, time:1750767728.4874542s req_ids:[8] -DEBUG 06-24 20:22:08 [manager.py:391] -ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:210.19983291625977ms total_cost_time:210.24274826049805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10242 prompt_cache_len:5151 prompt_cache_ratio:0.502929115407147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 -DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10885238647460938 s -INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11075735092163086 s -DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=101126981036147944592795205662388662384, time:1750767728.701009s req_ids:[8] -DEBUG 06-24 20:22:08 [manager.py:391] -ERROR 06-24 20:22:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:203.83596420288086ms total_cost_time:203.87959480285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10243 prompt_cache_len:5151 prompt_cache_ratio:0.5028800156204237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 -DEBUG 06-24 20:22:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:08 [manager.py:224] router recive req id 8 cost time 0.10879039764404297 s -INFO 06-24 20:22:08 [manager.py:68] detokenization recv req id 8 cost time 0.11076831817626953 s -DEBUG 06-24 20:22:08 [manager.py:391] Prefill Batch: batch_id=73293308006869813752099840756692673174, time:1750767728.9089744s req_ids:[8] -DEBUG 06-24 20:22:08 [manager.py:391] -ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:08 lightllm_req_id:8 first_token_cost:367.9049015045166ms total_cost_time:367.9492473602295ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10244 prompt_cache_len:5151 prompt_cache_ratio:0.5028309254197579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 -DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.10838794708251953 s -INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11024689674377441 s -DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=50274572810304302519805148129903612458, time:1750767729.284289s req_ids:[8] -DEBUG 06-24 20:22:09 [manager.py:391] -ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:208.40883255004883ms total_cost_time:208.45317840576172ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10245 prompt_cache_len:5151 prompt_cache_ratio:0.5027818448023426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 -DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.10929727554321289 s -INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11112070083618164 s -DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=262020928575820431123970926129717187165, time:1750767729.499709s req_ids:[8] -DEBUG 06-24 20:22:09 [manager.py:391] -ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:209.61880683898926ms total_cost_time:209.66315269470215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10246 prompt_cache_len:5151 prompt_cache_ratio:0.5027327737653718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 -DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.10936474800109863 s -INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11121559143066406 s -DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=184255663300813413374191363012734676561, time:1750767729.7150693s req_ids:[8] -DEBUG 06-24 20:22:09 [manager.py:391] -ERROR 06-24 20:22:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:210.3862762451172ms total_cost_time:210.43014526367188ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10247 prompt_cache_len:5151 prompt_cache_ratio:0.5026837123060408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 -DEBUG 06-24 20:22:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:09 [manager.py:224] router recive req id 8 cost time 0.1083674430847168 s -INFO 06-24 20:22:09 [manager.py:68] detokenization recv req id 8 cost time 0.11017608642578125 s -DEBUG 06-24 20:22:09 [manager.py:391] Prefill Batch: batch_id=119689195309379197764747738086371071536, time:1750767729.9287121s req_ids:[8] -DEBUG 06-24 20:22:09 [manager.py:391] -ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:09 lightllm_req_id:8 first_token_cost:207.8537940979004ms total_cost_time:207.89790153503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10248 prompt_cache_len:5151 prompt_cache_ratio:0.5026346604215457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 -DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.10852217674255371 s -INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.11045384407043457 s -DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=120681180088671193582983245044616050580, time:1750767730.140855s req_ids:[8] -DEBUG 06-24 20:22:10 [manager.py:391] -ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:206.77709579467773ms total_cost_time:206.82311058044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10249 prompt_cache_len:5151 prompt_cache_ratio:0.5025856181090839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 -DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.10823702812194824 s -INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.11002874374389648 s -DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=263471411795505560058855517541088360478, time:1750767730.3569775s req_ids:[8] -DEBUG 06-24 20:22:10 [manager.py:391] -ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:206.48694038391113ms total_cost_time:206.53033256530762ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10250 prompt_cache_len:5151 prompt_cache_ratio:0.5025365853658537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 -DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.11025309562683105 s -INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.11238670349121094 s -DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=313826642341131770435029087243898610909, time:1750767730.5669968s req_ids:[8] -DEBUG 06-24 20:22:10 [manager.py:391] -ERROR 06-24 20:22:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:206.2661647796631ms total_cost_time:206.30908012390137ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10251 prompt_cache_len:5151 prompt_cache_ratio:0.5024875621890548 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 -DEBUG 06-24 20:22:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:10 [manager.py:224] router recive req id 8 cost time 0.10705161094665527 s -INFO 06-24 20:22:10 [manager.py:68] detokenization recv req id 8 cost time 0.10875248908996582 s -DEBUG 06-24 20:22:10 [manager.py:391] Prefill Batch: batch_id=69283379468962504050718329789364605222, time:1750767730.7793877s req_ids:[8] -DEBUG 06-24 20:22:10 [manager.py:391] -ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:10 lightllm_req_id:8 first_token_cost:363.9380931854248ms total_cost_time:363.9845848083496ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10252 prompt_cache_len:5151 prompt_cache_ratio:0.5024385485758877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 -DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s -INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.10930681228637695 s -DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=86354164656274642082945519685718996903, time:1750767731.1498017s req_ids:[8] -DEBUG 06-24 20:22:11 [manager.py:391] -ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:209.11884307861328ms total_cost_time:209.16509628295898ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10253 prompt_cache_len:5151 prompt_cache_ratio:0.5023895445235541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 -DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10789752006530762 s -INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.10974359512329102 s -DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=44936262327005927380570783316101625049, time:1750767731.3663714s req_ids:[8] -DEBUG 06-24 20:22:11 [manager.py:391] -ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:210.5429172515869ms total_cost_time:210.5875015258789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10254 prompt_cache_len:5151 prompt_cache_ratio:0.5023405500292569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 -DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10896730422973633 s -INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s -DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=150493458547882697654070984417728837376, time:1750767731.5829113s req_ids:[8] -DEBUG 06-24 20:22:11 [manager.py:391] -ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:22:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 43782.752 tokens/s -DEBUG 06-24 20:22:11 [stats.py:37] Avg prompt tokens throughput: 43774.197 tokens/s -DEBUG 06-24 20:22:11 [stats.py:37] Avg generate tokens throughput: 8.555 tokens/s -INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:208.61554145812988ms total_cost_time:208.66036415100098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10255 prompt_cache_len:5151 prompt_cache_ratio:0.5022915650901999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 -DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:11 [manager.py:224] router recive req id 8 cost time 0.10741400718688965 s -INFO 06-24 20:22:11 [manager.py:68] detokenization recv req id 8 cost time 0.10959720611572266 s -DEBUG 06-24 20:22:11 [manager.py:391] Prefill Batch: batch_id=63885352170483338346688268442786315651, time:1750767731.7954724s req_ids:[8] -DEBUG 06-24 20:22:11 [manager.py:391] -ERROR 06-24 20:22:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:208.13846588134766ms total_cost_time:208.18138122558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10256 prompt_cache_len:5151 prompt_cache_ratio:0.5022425897035881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 -DEBUG 06-24 20:22:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10816526412963867 s -INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.11024641990661621 s -DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=157305407585719661145844634786554562101, time:1750767732.0087693s req_ids:[8] -DEBUG 06-24 20:22:12 [manager.py:391] -ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:11 lightllm_req_id:8 first_token_cost:203.87721061706543ms total_cost_time:203.92251014709473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10257 prompt_cache_len:5151 prompt_cache_ratio:0.5021936238666277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 -DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10832023620605469 s -INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.11047816276550293 s -DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=16074861369786859127646378120335819979, time:1750767732.2188797s req_ids:[8] -DEBUG 06-24 20:22:12 [manager.py:391] -ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:206.7575454711914ms total_cost_time:206.8021297454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10258 prompt_cache_len:5151 prompt_cache_ratio:0.5021446675765256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 -DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10847878456115723 s -INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.11062431335449219 s -DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=146998822295428622619543684175371976206, time:1750767732.4335449s req_ids:[8] -DEBUG 06-24 20:22:12 [manager.py:391] -ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:207.89027214050293ms total_cost_time:207.93437957763672ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10259 prompt_cache_len:5151 prompt_cache_ratio:0.5020957208304903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 -DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10787796974182129 s -INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.10974884033203125 s -DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=158558053363817602165064488744067653815, time:1750767732.6469445s req_ids:[8] -DEBUG 06-24 20:22:12 [manager.py:391] -ERROR 06-24 20:22:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:335.2031707763672ms total_cost_time:335.2482318878174ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10260 prompt_cache_len:5151 prompt_cache_ratio:0.502046783625731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 -DEBUG 06-24 20:22:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:12 [manager.py:224] router recive req id 8 cost time 0.10782408714294434 s -INFO 06-24 20:22:12 [manager.py:68] detokenization recv req id 8 cost time 0.10959744453430176 s -DEBUG 06-24 20:22:12 [manager.py:391] Prefill Batch: batch_id=329893388024098318479951063285567523408, time:1750767732.9848046s req_ids:[8] -DEBUG 06-24 20:22:12 [manager.py:391] -ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:12 lightllm_req_id:8 first_token_cost:202.64244079589844ms total_cost_time:202.68774032592773ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10261 prompt_cache_len:5151 prompt_cache_ratio:0.5019978559594581 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 -DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10813713073730469 s -INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.11007070541381836 s -DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=313374967784078418398425981119381224697, time:1750767733.1976862s req_ids:[8] -DEBUG 06-24 20:22:13 [manager.py:391] -ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:208.20069313049316ms total_cost_time:208.24265480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10262 prompt_cache_len:5151 prompt_cache_ratio:0.5019489378288833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 -DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10907268524169922 s -INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.11114025115966797 s -DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=103361930918123696123472490777643683246, time:1750767733.422159s req_ids:[8] -DEBUG 06-24 20:22:13 [manager.py:391] -DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:220.21770477294922ms total_cost_time:220.2625274658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10263 prompt_cache_len:5151 prompt_cache_ratio:0.5019000292312189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 -DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s -INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.1106572151184082 s -DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=93316368097765262139110981880372248384, time:1750767733.6362405s req_ids:[8] -DEBUG 06-24 20:22:13 [manager.py:391] -ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:207.09490776062012ms total_cost_time:207.1394920349121ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10264 prompt_cache_len:5151 prompt_cache_ratio:0.5018511301636789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 -DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:13 [manager.py:224] router recive req id 8 cost time 0.10743427276611328 s -INFO 06-24 20:22:13 [manager.py:68] detokenization recv req id 8 cost time 0.10950136184692383 s -DEBUG 06-24 20:22:13 [manager.py:391] Prefill Batch: batch_id=257614060921682427594205754949900729768, time:1750767733.8500996s req_ids:[8] -DEBUG 06-24 20:22:13 [manager.py:391] -ERROR 06-24 20:22:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:207.91125297546387ms total_cost_time:207.95536041259766ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10265 prompt_cache_len:5151 prompt_cache_ratio:0.5018022406234779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 -DEBUG 06-24 20:22:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.10798048973083496 s -INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.10991263389587402 s -DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=254346960570548724651195692950773432735, time:1750767734.0625298s req_ids:[8] -DEBUG 06-24 20:22:14 [manager.py:391] -ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:13 lightllm_req_id:8 first_token_cost:204.66303825378418ms total_cost_time:204.70690727233887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10266 prompt_cache_len:5151 prompt_cache_ratio:0.5017533606078317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 -DEBUG 06-24 20:22:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.10927915573120117 s -INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.1112213134765625 s -DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=289802829278611423102773138686342498026, time:1750767734.2740417s req_ids:[8] -DEBUG 06-24 20:22:14 [manager.py:391] -ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 first_token_cost:205.75261116027832ms total_cost_time:205.7974338531494ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10267 prompt_cache_len:5151 prompt_cache_ratio:0.5017044901139573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 -DEBUG 06-24 20:22:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:14 [batch.py:51] router release req id 8 -INFO 06-24 20:22:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.3109145164489746 s -INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.31287550926208496 s -DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=23870346092718010681965928639307286375, time:1750767734.6874986s req_ids:[8] -DEBUG 06-24 20:22:14 [manager.py:391] -ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 first_token_cost:415.5890941619873ms total_cost_time:415.6339168548584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10268 prompt_cache_len:5151 prompt_cache_ratio:0.5016556291390728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 -DEBUG 06-24 20:22:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:14 [manager.py:224] router recive req id 8 cost time 0.10797548294067383 s -INFO 06-24 20:22:14 [manager.py:68] detokenization recv req id 8 cost time 0.10978198051452637 s -DEBUG 06-24 20:22:14 [manager.py:391] Prefill Batch: batch_id=31173844393602952100244297731096556051, time:1750767734.9096694s req_ids:[8] -DEBUG 06-24 20:22:14 [manager.py:391] -DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:14 lightllm_req_id:8 first_token_cost:209.7318172454834ms total_cost_time:209.7756862640381ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10269 prompt_cache_len:5151 prompt_cache_ratio:0.5016067776803973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 -DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10770082473754883 s -INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.1093893051147461 s -DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=196644230493390246230935138837953203705, time:1750767735.1240141s req_ids:[8] -DEBUG 06-24 20:22:15 [manager.py:391] -ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:167.55366325378418ms total_cost_time:167.59753227233887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10270 prompt_cache_len:5151 prompt_cache_ratio:0.501557935735151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 -DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10828399658203125 s -INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022782325744629 s -DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=33853320136259992533841961752353559037, time:1750767735.2958357s req_ids:[8] -DEBUG 06-24 20:22:15 [manager.py:391] -ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:200.55747032165527ms total_cost_time:200.60372352600098ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10271 prompt_cache_len:5151 prompt_cache_ratio:0.501509103300555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 -DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10780692100524902 s -INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098945140838623 s -DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=122125232397529444202375984015789356726, time:1750767735.5052178s req_ids:[8] -DEBUG 06-24 20:22:15 [manager.py:391] -ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:208.31632614135742ms total_cost_time:208.36210250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10272 prompt_cache_len:5151 prompt_cache_ratio:0.5014602803738317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 -DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10839486122131348 s -INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.11045479774475098 s -DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=58858929114961699880501059172692946852, time:1750767735.7188945s req_ids:[8] -DEBUG 06-24 20:22:15 [manager.py:391] -ERROR 06-24 20:22:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:205.20544052124023ms total_cost_time:205.24859428405762ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10273 prompt_cache_len:5151 prompt_cache_ratio:0.5014114669522048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 -DEBUG 06-24 20:22:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:15 [manager.py:224] router recive req id 8 cost time 0.10851240158081055 s -INFO 06-24 20:22:15 [manager.py:68] detokenization recv req id 8 cost time 0.1106576919555664 s -DEBUG 06-24 20:22:15 [manager.py:391] Prefill Batch: batch_id=165605065468315248266370372075708944722, time:1750767735.931981s req_ids:[8] -DEBUG 06-24 20:22:15 [manager.py:391] -ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:15 lightllm_req_id:8 first_token_cost:208.71210098266602ms total_cost_time:208.7554931640625ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10274 prompt_cache_len:5151 prompt_cache_ratio:0.5013626630328986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 -DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10880112648010254 s -INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.11083030700683594 s -DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=554535734519393799943423381313276459, time:1750767736.1582665s req_ids:[8] -DEBUG 06-24 20:22:16 [manager.py:391] -ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:387.3727321624756ms total_cost_time:387.4177932739258ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10275 prompt_cache_len:5151 prompt_cache_ratio:0.5013138686131386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 -DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10781359672546387 s -INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.10978150367736816 s -DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=49886438634246760885947694933797887742, time:1750767736.5392172s req_ids:[8] -DEBUG 06-24 20:22:16 [manager.py:391] -ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:208.3141803741455ms total_cost_time:208.3566188812256ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10276 prompt_cache_len:5151 prompt_cache_ratio:0.5012650836901518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 -DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10871720314025879 s -INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.11071133613586426 s -DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=129926623472035919997000197592832891459, time:1750767736.7627668s req_ids:[8] -DEBUG 06-24 20:22:16 [manager.py:391] -ERROR 06-24 20:22:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:219.7723388671875ms total_cost_time:219.8169231414795ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10277 prompt_cache_len:5151 prompt_cache_ratio:0.5012163082611657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 -DEBUG 06-24 20:22:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:16 [manager.py:224] router recive req id 8 cost time 0.10707497596740723 s -INFO 06-24 20:22:16 [manager.py:68] detokenization recv req id 8 cost time 0.10891509056091309 s -DEBUG 06-24 20:22:16 [manager.py:391] Prefill Batch: batch_id=313980723514558435763066028293727914177, time:1750767736.9791117s req_ids:[8] -DEBUG 06-24 20:22:16 [manager.py:391] -ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:16 lightllm_req_id:8 first_token_cost:206.27093315124512ms total_cost_time:206.315279006958ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10278 prompt_cache_len:5151 prompt_cache_ratio:0.5011675423234092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 -DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10842156410217285 s -INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.11056661605834961 s -DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=298013927179041960902023171726822679443, time:1750767737.1915488s req_ids:[8] -DEBUG 06-24 20:22:17 [manager.py:391] -ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:208.77337455749512ms total_cost_time:208.8301181793213ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:10279 prompt_cache_len:5151 prompt_cache_ratio:0.5011187858741123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 -DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10831022262573242 s -INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.11037993431091309 s -DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=281980234241351428324921683522295552104, time:1750767737.405839s req_ids:[8] -DEBUG 06-24 20:22:17 [manager.py:391] -ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:206.82883262634277ms total_cost_time:206.87270164489746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10280 prompt_cache_len:5151 prompt_cache_ratio:0.5010700389105058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 -DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10956811904907227 s -INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.11158370971679688 s -DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=238465566818917715473067542760257592925, time:1750767737.6186085s req_ids:[8] -DEBUG 06-24 20:22:17 [manager.py:391] -ERROR 06-24 20:22:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:205.92570304870605ms total_cost_time:205.96885681152344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10281 prompt_cache_len:5151 prompt_cache_ratio:0.501021301429822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 -DEBUG 06-24 20:22:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:17 [manager.py:224] router recive req id 8 cost time 0.10846877098083496 s -INFO 06-24 20:22:17 [manager.py:68] detokenization recv req id 8 cost time 0.1105496883392334 s -DEBUG 06-24 20:22:17 [manager.py:391] Prefill Batch: batch_id=161568975263501689945010728613233130396, time:1750767737.8321395s req_ids:[8] -DEBUG 06-24 20:22:17 [manager.py:391] -ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:17 lightllm_req_id:8 first_token_cost:364.0451431274414ms total_cost_time:364.0925884246826ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10282 prompt_cache_len:5151 prompt_cache_ratio:0.5009725734292939 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 -DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10827445983886719 s -INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11015462875366211 s -DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=329914915758111595283891041044952266875, time:1750767738.2110536s req_ids:[8] -DEBUG 06-24 20:22:18 [manager.py:391] -ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:226.30643844604492ms total_cost_time:226.3507843017578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10283 prompt_cache_len:5151 prompt_cache_ratio:0.5009238549061558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 -DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10829639434814453 s -INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11027097702026367 s -DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=301804600742518351186226696390072293639, time:1750767738.4322872s req_ids:[8] -DEBUG 06-24 20:22:18 [manager.py:391] -ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:206.496000289917ms total_cost_time:206.53891563415527ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10284 prompt_cache_len:5151 prompt_cache_ratio:0.5008751458576429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 -DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10814285278320312 s -INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11017632484436035 s -DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=21925370174256355441288257377919713421, time:1750767738.644661s req_ids:[8] -DEBUG 06-24 20:22:18 [manager.py:391] -ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:205.9495449066162ms total_cost_time:205.9950828552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10285 prompt_cache_len:5151 prompt_cache_ratio:0.5008264462809917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 -DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:18 [manager.py:224] router recive req id 8 cost time 0.10821652412414551 s -INFO 06-24 20:22:18 [manager.py:68] detokenization recv req id 8 cost time 0.11006355285644531 s -DEBUG 06-24 20:22:18 [manager.py:391] Prefill Batch: batch_id=250165149359517451626687889527235289342, time:1750767738.858237s req_ids:[8] -DEBUG 06-24 20:22:18 [manager.py:391] -ERROR 06-24 20:22:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:209.85770225524902ms total_cost_time:209.9020481109619ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10286 prompt_cache_len:5151 prompt_cache_ratio:0.5007777561734397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 -DEBUG 06-24 20:22:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.10851860046386719 s -INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.11052465438842773 s -DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=205136005706830345389167522242309785212, time:1750767739.0799248s req_ids:[8] -DEBUG 06-24 20:22:19 [manager.py:391] -ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:18 lightllm_req_id:8 first_token_cost:214.56527709960938ms total_cost_time:214.60890769958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10287 prompt_cache_len:5151 prompt_cache_ratio:0.5007290755322251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 -DEBUG 06-24 20:22:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.10886955261230469 s -INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.11066389083862305 s -DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=10241466212906694874199316097219585421, time:1750767739.2945216s req_ids:[8] -DEBUG 06-24 20:22:19 [manager.py:391] -ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:206.0873508453369ms total_cost_time:206.1305046081543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10288 prompt_cache_len:5151 prompt_cache_ratio:0.5006804043545878 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 -DEBUG 06-24 20:22:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.1092681884765625 s -INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.11133480072021484 s -DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=199519415269606847818021463458208936423, time:1750767739.5075781s req_ids:[8] -DEBUG 06-24 20:22:19 [manager.py:391] -ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:377.46500968933105ms total_cost_time:377.51102447509766ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10289 prompt_cache_len:5151 prompt_cache_ratio:0.5006317426377684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 -DEBUG 06-24 20:22:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:19 [manager.py:224] router recive req id 8 cost time 0.10747170448303223 s -INFO 06-24 20:22:19 [manager.py:68] detokenization recv req id 8 cost time 0.10933065414428711 s -DEBUG 06-24 20:22:19 [manager.py:391] Prefill Batch: batch_id=38893829181488244030219277355429765050, time:1750767739.8891523s req_ids:[8] -DEBUG 06-24 20:22:19 [manager.py:391] -ERROR 06-24 20:22:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:200.88815689086914ms total_cost_time:200.9294033050537ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10290 prompt_cache_len:5151 prompt_cache_ratio:0.5005830903790087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 -DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10808324813842773 s -INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.1096041202545166 s -DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=121505252080224643244078088261954986865, time:1750767740.1063125s req_ids:[8] -DEBUG 06-24 20:22:20 [manager.py:391] -ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:19 lightllm_req_id:8 first_token_cost:186.37824058532715ms total_cost_time:186.42091751098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10291 prompt_cache_len:5151 prompt_cache_ratio:0.5005344475755514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 -DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10882568359375 s -INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.1105794906616211 s -DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=82575235355024122005167262335914070838, time:1750767740.2955408s req_ids:[8] -DEBUG 06-24 20:22:20 [manager.py:391] -ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:200.0417709350586ms total_cost_time:200.08516311645508ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10292 prompt_cache_len:5151 prompt_cache_ratio:0.5004858142246404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 -DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10845041275024414 s -INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s -DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=208575206317893509792888435729128572495, time:1750767740.4953501s req_ids:[8] -DEBUG 06-24 20:22:20 [manager.py:391] -ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:208.0075740814209ms total_cost_time:208.04953575134277ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10293 prompt_cache_len:5151 prompt_cache_ratio:0.5004371903235209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 -DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10822200775146484 s -INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.10999655723571777 s -DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=250369875193676304520556204386907758406, time:1750767740.71641s req_ids:[8] -DEBUG 06-24 20:22:20 [manager.py:391] -DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:217.0083522796631ms total_cost_time:217.05031394958496ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10294 prompt_cache_len:5151 prompt_cache_ratio:0.5003885758694385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 -DEBUG 06-24 20:22:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:20 [manager.py:224] router recive req id 8 cost time 0.10842347145080566 s -INFO 06-24 20:22:20 [manager.py:68] detokenization recv req id 8 cost time 0.10994386672973633 s -DEBUG 06-24 20:22:20 [manager.py:391] Prefill Batch: batch_id=154885277650339155207194404052257143634, time:1750767740.931785s req_ids:[8] -DEBUG 06-24 20:22:20 [manager.py:391] -ERROR 06-24 20:22:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:165.63010215759277ms total_cost_time:165.67111015319824ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10295 prompt_cache_len:5151 prompt_cache_ratio:0.5003399708596405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 -DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10865283012390137 s -INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.11031556129455566 s -DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=308534654132256305042290277092515060481, time:1750767741.1028113s req_ids:[8] -DEBUG 06-24 20:22:21 [manager.py:391] -ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:20 lightllm_req_id:8 first_token_cost:198.43626022338867ms total_cost_time:198.47869873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10296 prompt_cache_len:5151 prompt_cache_ratio:0.5002913752913752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 -DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10818099975585938 s -INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.10988259315490723 s -DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=66197736360831328427488129173146641571, time:1750767741.3069665s req_ids:[8] -DEBUG 06-24 20:22:21 [manager.py:391] -ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:366.40381813049316ms total_cost_time:366.44864082336426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10297 prompt_cache_len:5151 prompt_cache_ratio:0.5002427891618918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 -DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10840463638305664 s -INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.1101539134979248 s -DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=265377633317718680268647285829868327465, time:1750767741.680321s req_ids:[8] -DEBUG 06-24 20:22:21 [manager.py:391] -DEBUG 06-24 20:22:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 44158.180 tokens/s -DEBUG 06-24 20:22:21 [stats.py:37] Avg prompt tokens throughput: 44149.688 tokens/s -DEBUG 06-24 20:22:21 [stats.py:37] Avg generate tokens throughput: 8.492 tokens/s -ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:209.61689949035645ms total_cost_time:209.66029167175293ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10298 prompt_cache_len:5151 prompt_cache_ratio:0.5001942124684404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 -DEBUG 06-24 20:22:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:21 [manager.py:224] router recive req id 8 cost time 0.10810565948486328 s -INFO 06-24 20:22:21 [manager.py:68] detokenization recv req id 8 cost time 0.1098027229309082 s -DEBUG 06-24 20:22:21 [manager.py:391] Prefill Batch: batch_id=122785721189510170987315232710289558622, time:1750767741.8996363s req_ids:[8] -DEBUG 06-24 20:22:21 [manager.py:391] -ERROR 06-24 20:22:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:212.43667602539062ms total_cost_time:212.48269081115723ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10299 prompt_cache_len:5151 prompt_cache_ratio:0.5001456452082726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 -DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10799312591552734 s -INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10988497734069824 s -DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=174845202187782950859567887247743806230, time:1750767742.1155999s req_ids:[8] -DEBUG 06-24 20:22:22 [manager.py:391] -ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:21 lightllm_req_id:8 first_token_cost:212.23092079162598ms total_cost_time:212.27383613586426ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10300 prompt_cache_len:5151 prompt_cache_ratio:0.5000970873786408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 -DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:22 [batch.py:51] router release req id 8 -DEBUG 06-24 20:22:22 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:22 [manager.py:283] -DEBUG 06-24 20:22:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:22 [manager.py:284] -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10791158676147461 s -INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10992312431335449 s -DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=31846366582046265711656508342113361118, time:1750767742.3334842s req_ids:[8] -DEBUG 06-24 20:22:22 [manager.py:391] -ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:209.20515060424805ms total_cost_time:209.24806594848633ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10301 prompt_cache_len:5151 prompt_cache_ratio:0.5000485389767984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 -DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10907316207885742 s -INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.11109018325805664 s -DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=255958905651420051754103079129364504700, time:1750767742.547657s req_ids:[8] -DEBUG 06-24 20:22:22 [manager.py:391] -ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:207.81707763671875ms total_cost_time:207.85999298095703ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10302 prompt_cache_len:5151 prompt_cache_ratio:0.5 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 -DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10781526565551758 s -INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10983610153198242 s -DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=114126566620706963311631077410174920024, time:1750767742.7637403s req_ids:[8] -DEBUG 06-24 20:22:22 [manager.py:391] -ERROR 06-24 20:22:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:206.1774730682373ms total_cost_time:206.2218189239502ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10303 prompt_cache_len:5151 prompt_cache_ratio:0.4999514704455013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 -DEBUG 06-24 20:22:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:22 [manager.py:224] router recive req id 8 cost time 0.10793519020080566 s -INFO 06-24 20:22:22 [manager.py:68] detokenization recv req id 8 cost time 0.10994720458984375 s -DEBUG 06-24 20:22:22 [manager.py:391] Prefill Batch: batch_id=163699608436570609887097916997682510572, time:1750767742.9783657s req_ids:[8] -DEBUG 06-24 20:22:22 [manager.py:391] -ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:22 lightllm_req_id:8 first_token_cost:212.0819091796875ms total_cost_time:212.1257781982422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10304 prompt_cache_len:5151 prompt_cache_ratio:0.499902950310559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 -DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:23 [manager.py:224] router recive req id 8 cost time 0.10909891128540039 s -INFO 06-24 20:22:23 [manager.py:68] detokenization recv req id 8 cost time 0.11120343208312988 s -DEBUG 06-24 20:22:23 [manager.py:391] Prefill Batch: batch_id=208990138659131123260733264382370260611, time:1750767743.195763s req_ids:[8] -DEBUG 06-24 20:22:23 [manager.py:391] -INFO 06-24 20:22:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:22:23 [statics_utils.py:24] mean first cost: 228.4506684546758 ms -INFO 06-24 20:22:23 [statics_utils.py:24] mean per token cost: 0.06815599078567418 ms -ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:376.68418884277344ms total_cost_time:376.7426013946533ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:10305 prompt_cache_len:5151 prompt_cache_ratio:0.49985443959243087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 -DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:23 [manager.py:224] router recive req id 8 cost time 0.10951113700866699 s -INFO 06-24 20:22:23 [manager.py:68] detokenization recv req id 8 cost time 0.1115875244140625 s -DEBUG 06-24 20:22:23 [manager.py:391] Prefill Batch: batch_id=126039944702570846460710720030489876343, time:1750767743.5768766s req_ids:[8] -DEBUG 06-24 20:22:23 [manager.py:391] -ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:205.9171199798584ms total_cost_time:205.9648036956787ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:10306 prompt_cache_len:5151 prompt_cache_ratio:0.4998059382883757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 -DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:23 [manager.py:224] router recive req id 8 cost time 0.10881567001342773 s -INFO 06-24 20:22:23 [manager.py:68] detokenization recv req id 8 cost time 0.11088776588439941 s -DEBUG 06-24 20:22:23 [manager.py:391] Prefill Batch: batch_id=244203032497385553012173206708891756280, time:1750767743.792903s req_ids:[8] -DEBUG 06-24 20:22:23 [manager.py:391] -ERROR 06-24 20:22:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:212.34440803527832ms total_cost_time:212.388277053833ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10307 prompt_cache_len:5151 prompt_cache_ratio:0.49975744639565345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 -DEBUG 06-24 20:22:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10779714584350586 s -INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.10982251167297363 s -DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=69741261299761267808557202108976926020, time:1750767744.0089617s req_ids:[8] -DEBUG 06-24 20:22:24 [manager.py:391] -ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:23 lightllm_req_id:8 first_token_cost:200.32548904418945ms total_cost_time:200.36768913269043ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10308 prompt_cache_len:5151 prompt_cache_ratio:0.49970896391152503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 -DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s -INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.11007261276245117 s -DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=98975056219619573849370661682914439005, time:1750767744.215854s req_ids:[8] -DEBUG 06-24 20:22:24 [manager.py:391] -ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:208.97316932678223ms total_cost_time:209.01894569396973ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10309 prompt_cache_len:5151 prompt_cache_ratio:0.4996604908332525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 -DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10784602165222168 s -INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.10991168022155762 s -DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=220273299568137152097911005850653866062, time:1750767744.4353983s req_ids:[8] -DEBUG 06-24 20:22:24 [manager.py:391] -ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:205.86109161376953ms total_cost_time:205.9025764465332ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10310 prompt_cache_len:5151 prompt_cache_ratio:0.49961202715809894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 -DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10803961753845215 s -INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.11015152931213379 s -DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=9982364954087842745798978268399770415, time:1750767744.6413734s req_ids:[8] -DEBUG 06-24 20:22:24 [manager.py:391] -ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:203.05681228637695ms total_cost_time:203.09877395629883ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10311 prompt_cache_len:5151 prompt_cache_ratio:0.4995635728833285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 -DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:24 [manager.py:224] router recive req id 8 cost time 0.10796904563903809 s -INFO 06-24 20:22:24 [manager.py:68] detokenization recv req id 8 cost time 0.10993123054504395 s -DEBUG 06-24 20:22:24 [manager.py:391] Prefill Batch: batch_id=130468150686709755216027306805361528812, time:1750767744.855029s req_ids:[8] -DEBUG 06-24 20:22:24 [manager.py:391] -ERROR 06-24 20:22:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:168.99704933166504ms total_cost_time:169.0382957458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10312 prompt_cache_len:5151 prompt_cache_ratio:0.49951512800620634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 -DEBUG 06-24 20:22:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10899186134338379 s -INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.11089849472045898 s -DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=260355014748932165591810164110156752293, time:1750767745.0275855s req_ids:[8] -DEBUG 06-24 20:22:25 [manager.py:391] -ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:24 lightllm_req_id:8 first_token_cost:208.93573760986328ms total_cost_time:208.97746086120605ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10313 prompt_cache_len:5151 prompt_cache_ratio:0.49946669252399883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 -DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10788869857788086 s -INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.10992264747619629 s -DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=224501941774865367286307918825286970399, time:1750767745.2511392s req_ids:[8] -DEBUG 06-24 20:22:25 [manager.py:391] -ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:374.4499683380127ms total_cost_time:374.4935989379883ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10314 prompt_cache_len:5151 prompt_cache_ratio:0.49941826643397325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 -DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10799574851989746 s -INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.10995864868164062 s -DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=172666715562177365570960651566326762540, time:1750767745.6217852s req_ids:[8] -DEBUG 06-24 20:22:25 [manager.py:391] -ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:207.14426040649414ms total_cost_time:207.18812942504883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10315 prompt_cache_len:5151 prompt_cache_ratio:0.499369849733398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 -DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:25 [manager.py:224] router recive req id 8 cost time 0.10773825645446777 s -INFO 06-24 20:22:25 [manager.py:68] detokenization recv req id 8 cost time 0.1097259521484375 s -DEBUG 06-24 20:22:25 [manager.py:391] Prefill Batch: batch_id=91160378675212571757982830841370027548, time:1750767745.8337877s req_ids:[8] -DEBUG 06-24 20:22:25 [manager.py:391] -ERROR 06-24 20:22:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:200.72364807128906ms total_cost_time:200.76584815979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10316 prompt_cache_len:5151 prompt_cache_ratio:0.49932144241954246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 -DEBUG 06-24 20:22:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10891938209533691 s -INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11085009574890137 s -DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=320883836373186921317180501895490250335, time:1750767746.0427434s req_ids:[8] -DEBUG 06-24 20:22:26 [manager.py:391] -ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:25 lightllm_req_id:8 first_token_cost:211.97032928466797ms total_cost_time:212.01467514038086ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10317 prompt_cache_len:5151 prompt_cache_ratio:0.49927304448967724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 -DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10814094543457031 s -INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s -DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=68407154439700804799213699883633472357, time:1750767746.2597349s req_ids:[8] -DEBUG 06-24 20:22:26 [manager.py:391] -ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:207.1669101715088ms total_cost_time:207.20934867858887ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10318 prompt_cache_len:5151 prompt_cache_ratio:0.49922465594107385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 -DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.1089632511138916 s -INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11100530624389648 s -DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=294625448838183442552014483913045551464, time:1750767746.4716535s req_ids:[8] -DEBUG 06-24 20:22:26 [manager.py:391] -DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:214.0216827392578ms total_cost_time:214.0657901763916ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10319 prompt_cache_len:5151 prompt_cache_ratio:0.49917627677100496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 -DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10877704620361328 s -INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11089229583740234 s -DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=63436086032351738149397686368699925569, time:1750767746.6890297s req_ids:[8] -DEBUG 06-24 20:22:26 [manager.py:391] -ERROR 06-24 20:22:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:203.26828956604004ms total_cost_time:203.31239700317383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10320 prompt_cache_len:5151 prompt_cache_ratio:0.4991279069767442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 -DEBUG 06-24 20:22:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:26 [manager.py:224] router recive req id 8 cost time 0.10803008079528809 s -INFO 06-24 20:22:26 [manager.py:68] detokenization recv req id 8 cost time 0.11008596420288086 s -DEBUG 06-24 20:22:26 [manager.py:391] Prefill Batch: batch_id=128642204472296785825428752584970832287, time:1750767746.8998265s req_ids:[8] -DEBUG 06-24 20:22:26 [manager.py:391] -ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:26 lightllm_req_id:8 first_token_cost:377.3789405822754ms total_cost_time:377.4247169494629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10321 prompt_cache_len:5151 prompt_cache_ratio:0.4990795465555663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 -DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10783672332763672 s -INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.10988616943359375 s -DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=73461674454890354100181805387096865509, time:1750767747.2817729s req_ids:[8] -DEBUG 06-24 20:22:27 [manager.py:391] -ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:201.7970085144043ms total_cost_time:201.8423080444336ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10322 prompt_cache_len:5151 prompt_cache_ratio:0.4990311955047471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 -DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10875558853149414 s -INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.11001753807067871 s -DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=240367748368764043544440764879147296010, time:1750767747.5031447s req_ids:[8] -DEBUG 06-24 20:22:27 [manager.py:391] -ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:221.00067138671875ms total_cost_time:221.04573249816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10323 prompt_cache_len:5151 prompt_cache_ratio:0.4989828538215635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 -DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10805892944335938 s -INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020350456237793 s -DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=313076179508639764375288314961895668028, time:1750767747.720255s req_ids:[8] -DEBUG 06-24 20:22:27 [manager.py:391] -ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:211.25173568725586ms total_cost_time:211.29631996154785ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10324 prompt_cache_len:5151 prompt_cache_ratio:0.4989345215032933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 -DEBUG 06-24 20:22:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:27 [manager.py:224] router recive req id 8 cost time 0.10803937911987305 s -INFO 06-24 20:22:27 [manager.py:68] detokenization recv req id 8 cost time 0.10968184471130371 s -DEBUG 06-24 20:22:27 [manager.py:391] Prefill Batch: batch_id=197981968021660726835889293629654436938, time:1750767747.9346037s req_ids:[8] -DEBUG 06-24 20:22:27 [manager.py:391] -ERROR 06-24 20:22:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:162.5385284423828ms total_cost_time:162.59407997131348ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:10325 prompt_cache_len:5151 prompt_cache_ratio:0.4988861985472155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 -DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10804319381713867 s -INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.10979580879211426 s -DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=317699360792474351072496784488527266591, time:1750767748.1027286s req_ids:[8] -DEBUG 06-24 20:22:28 [manager.py:391] -ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:27 lightllm_req_id:8 first_token_cost:197.30830192565918ms total_cost_time:197.35240936279297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10326 prompt_cache_len:5151 prompt_cache_ratio:0.4988378849506101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 -DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10747551918029785 s -INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.10917520523071289 s -DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=186163459001219566064830966149439004907, time:1750767748.3090603s req_ids:[8] -DEBUG 06-24 20:22:28 [manager.py:391] -ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:207.79943466186523ms total_cost_time:207.84330368041992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10327 prompt_cache_len:5151 prompt_cache_ratio:0.4987895807107582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 -DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10794806480407715 s -INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.10974287986755371 s -DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=163861439566978617340597609768246352247, time:1750767748.5209s req_ids:[8] -DEBUG 06-24 20:22:28 [manager.py:391] -ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:373.78954887390137ms total_cost_time:373.83484840393066ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10328 prompt_cache_len:5151 prompt_cache_ratio:0.4987412858249419 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 -DEBUG 06-24 20:22:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:28 [manager.py:224] router recive req id 8 cost time 0.10881161689758301 s -INFO 06-24 20:22:28 [manager.py:68] detokenization recv req id 8 cost time 0.1106107234954834 s -DEBUG 06-24 20:22:28 [manager.py:391] Prefill Batch: batch_id=195338795940714724733914543550023223251, time:1750767748.9029186s req_ids:[8] -DEBUG 06-24 20:22:28 [manager.py:391] -ERROR 06-24 20:22:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:204.10871505737305ms total_cost_time:204.15377616882324ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10329 prompt_cache_len:5151 prompt_cache_ratio:0.4986930002904444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 -DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.10907840728759766 s -INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s -DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=295255866699181792380412017212270903082, time:1750767749.1119401s req_ids:[8] -DEBUG 06-24 20:22:29 [manager.py:391] -ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:28 lightllm_req_id:8 first_token_cost:210.2653980255127ms total_cost_time:210.3102207183838ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10330 prompt_cache_len:5151 prompt_cache_ratio:0.49864472410454985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 -DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.10822081565856934 s -INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.10985040664672852 s -DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=121087541024802860329966955500409284694, time:1750767749.3291287s req_ids:[8] -DEBUG 06-24 20:22:29 [manager.py:391] -ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:168.0467128753662ms total_cost_time:168.0901050567627ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10331 prompt_cache_len:5151 prompt_cache_ratio:0.4985964572645436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 -DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.1076056957244873 s -INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s -DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=164893767722855489744220284806006295916, time:1750767749.5021951s req_ids:[8] -DEBUG 06-24 20:22:29 [manager.py:391] -ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:163.88535499572754ms total_cost_time:163.9273166656494ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10332 prompt_cache_len:5151 prompt_cache_ratio:0.498548199767712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 -DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.11025118827819824 s -INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.11222982406616211 s -DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=65560349686911681658934658032153356946, time:1750767749.6748888s req_ids:[8] -DEBUG 06-24 20:22:29 [manager.py:391] -ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:202.87442207336426ms total_cost_time:202.91757583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10333 prompt_cache_len:5151 prompt_cache_ratio:0.4984999516113423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 -DEBUG 06-24 20:22:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:29 [manager.py:224] router recive req id 8 cost time 0.10866403579711914 s -INFO 06-24 20:22:29 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s -DEBUG 06-24 20:22:29 [manager.py:391] Prefill Batch: batch_id=28480617321864542271562399776639258880, time:1750767749.8812187s req_ids:[8] -DEBUG 06-24 20:22:29 [manager.py:391] -ERROR 06-24 20:22:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:201.39074325561523ms total_cost_time:201.43461227416992ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10334 prompt_cache_len:5151 prompt_cache_ratio:0.49845171279272305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 -DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.10776185989379883 s -INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.10971951484680176 s -DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=11582177308451012744215777984298933547, time:1750767750.0917037s req_ids:[8] -DEBUG 06-24 20:22:30 [manager.py:391] -ERROR 06-24 20:22:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:29 lightllm_req_id:8 first_token_cost:212.10169792175293ms total_cost_time:212.14628219604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10335 prompt_cache_len:5151 prompt_cache_ratio:0.4984034833091437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 -DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.3115060329437256 s -INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.31355905532836914 s -DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=35273692193330766977074673194617282265, time:1750767750.5087545s req_ids:[8] -DEBUG 06-24 20:22:30 [manager.py:391] -ERROR 06-24 20:22:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 first_token_cost:417.5593852996826ms total_cost_time:417.6034927368164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10336 prompt_cache_len:5151 prompt_cache_ratio:0.49835526315789475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 -DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.10763692855834961 s -INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.10976076126098633 s -DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=334347332458898272621366859657519121522, time:1750767750.730738s req_ids:[8] -DEBUG 06-24 20:22:30 [manager.py:391] -ERROR 06-24 20:22:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 first_token_cost:208.67228507995605ms total_cost_time:208.71758460998535ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10337 prompt_cache_len:5151 prompt_cache_ratio:0.49830705233626776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 -DEBUG 06-24 20:22:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:30 [manager.py:224] router recive req id 8 cost time 0.10825014114379883 s -INFO 06-24 20:22:30 [manager.py:68] detokenization recv req id 8 cost time 0.11017370223999023 s -DEBUG 06-24 20:22:30 [manager.py:391] Prefill Batch: batch_id=43203262180246446197206706236438566724, time:1750767750.9441006s req_ids:[8] -DEBUG 06-24 20:22:30 [manager.py:391] -ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:30 lightllm_req_id:8 first_token_cost:207.24773406982422ms total_cost_time:207.2916030883789ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10338 prompt_cache_len:5151 prompt_cache_ratio:0.4982588508415554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 -DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10782265663146973 s -INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.10988378524780273 s -DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=16542570089010733181376663330825691080, time:1750767751.1584325s req_ids:[8] -DEBUG 06-24 20:22:31 [manager.py:391] -ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:208.7728977203369ms total_cost_time:208.8160514831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10339 prompt_cache_len:5151 prompt_cache_ratio:0.49821065867105135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 -DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10765194892883301 s -INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.10968732833862305 s -DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=35588095463613294945725117312591505899, time:1750767751.3737185s req_ids:[8] -DEBUG 06-24 20:22:31 [manager.py:391] -ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:211.20071411132812ms total_cost_time:211.24505996704102ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10340 prompt_cache_len:5151 prompt_cache_ratio:0.4981624758220503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 -DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10809707641601562 s -INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.1096038818359375 s -DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=38830237430708855680287110073681220014, time:1750767751.589177s req_ids:[8] -DEBUG 06-24 20:22:31 [manager.py:391] -ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:165.23408889770508ms total_cost_time:165.27438163757324ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10341 prompt_cache_len:5151 prompt_cache_ratio:0.498114302291848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 -DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.11124110221862793 s -INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.11335515975952148 s -DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=268351695856400630840101911890586519184, time:1750767751.7599645s req_ids:[8] -DEBUG 06-24 20:22:31 [manager.py:391] -DEBUG 06-24 20:22:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 45060.282 tokens/s -DEBUG 06-24 20:22:31 [stats.py:37] Avg prompt tokens throughput: 45051.552 tokens/s -DEBUG 06-24 20:22:31 [stats.py:37] Avg generate tokens throughput: 8.730 tokens/s -ERROR 06-24 20:22:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:202.25024223327637ms total_cost_time:202.29291915893555ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10342 prompt_cache_len:5151 prompt_cache_ratio:0.49806613807774125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 -DEBUG 06-24 20:22:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:31 [manager.py:224] router recive req id 8 cost time 0.10857415199279785 s -INFO 06-24 20:22:31 [manager.py:68] detokenization recv req id 8 cost time 0.11049056053161621 s -DEBUG 06-24 20:22:31 [manager.py:391] Prefill Batch: batch_id=150076075180652426484534194360203352199, time:1750767751.966243s req_ids:[8] -DEBUG 06-24 20:22:31 [manager.py:391] -ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:31 lightllm_req_id:8 first_token_cost:361.54627799987793ms total_cost_time:361.61160469055176ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:10343 prompt_cache_len:5151 prompt_cache_ratio:0.49801798317702795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 -DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:32 [batch.py:51] router release req id 8 -INFO 06-24 20:22:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10849618911743164 s -INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.1105647087097168 s -DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=257336771039768912987092749540052854179, time:1750767752.3332877s req_ids:[8] -DEBUG 06-24 20:22:32 [manager.py:391] -ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:207.12542533874512ms total_cost_time:207.1688175201416ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10344 prompt_cache_len:5151 prompt_cache_ratio:0.49796983758700697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 -DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10831665992736816 s -INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.11048007011413574 s -DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=250717237158648149939676946563682262513, time:1750767752.5588596s req_ids:[8] -DEBUG 06-24 20:22:32 [manager.py:391] -ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:225.05640983581543ms total_cost_time:225.10027885437012ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10345 prompt_cache_len:5151 prompt_cache_ratio:0.49792170130497826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 -DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10779213905334473 s -INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.10985279083251953 s -DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=268337710649910725284406591789103686737, time:1750767752.7777896s req_ids:[8] -DEBUG 06-24 20:22:32 [manager.py:391] -ERROR 06-24 20:22:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:203.39536666870117ms total_cost_time:203.43732833862305ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10346 prompt_cache_len:5151 prompt_cache_ratio:0.4978735743282428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 -DEBUG 06-24 20:22:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:32 [manager.py:224] router recive req id 8 cost time 0.10774445533752441 s -INFO 06-24 20:22:32 [manager.py:68] detokenization recv req id 8 cost time 0.10970377922058105 s -DEBUG 06-24 20:22:32 [manager.py:391] Prefill Batch: batch_id=173138731795658629343972024572512214313, time:1750767752.9873455s req_ids:[8] -DEBUG 06-24 20:22:32 [manager.py:391] -ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:32 lightllm_req_id:8 first_token_cost:205.11198043823242ms total_cost_time:205.1694393157959ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:10347 prompt_cache_len:5151 prompt_cache_ratio:0.49782545665410266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 -DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10764169692993164 s -INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.11034560203552246 s -DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=83739402286627884308481944804561816234, time:1750767753.1995344s req_ids:[8] -DEBUG 06-24 20:22:33 [manager.py:391] -ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:207.57675170898438ms total_cost_time:207.61942863464355ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10348 prompt_cache_len:5151 prompt_cache_ratio:0.49777734827986087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 -DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10864615440368652 s -INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.11075592041015625 s -DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=291965758908185270691687565653434800912, time:1750767753.4139185s req_ids:[8] -DEBUG 06-24 20:22:33 [manager.py:391] -ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:212.43739128112793ms total_cost_time:212.49628067016602ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:10349 prompt_cache_len:5151 prompt_cache_ratio:0.4977292492028215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 -DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10814833641052246 s -INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.10976815223693848 s -DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=246908911873727272078964649699445807867, time:1750767753.631525s req_ids:[8] -DEBUG 06-24 20:22:33 [manager.py:391] -DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:166.43905639648438ms total_cost_time:166.48101806640625ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10350 prompt_cache_len:5151 prompt_cache_ratio:0.49768115942028984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 -DEBUG 06-24 20:22:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:33 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s -INFO 06-24 20:22:33 [manager.py:68] detokenization recv req id 8 cost time 0.11031317710876465 s -DEBUG 06-24 20:22:33 [manager.py:391] Prefill Batch: batch_id=13580222801466031512273372410953187833, time:1750767753.8032863s req_ids:[8] -DEBUG 06-24 20:22:33 [manager.py:391] -ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:33 lightllm_req_id:8 first_token_cost:367.1088218688965ms total_cost_time:367.15149879455566ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10351 prompt_cache_len:5151 prompt_cache_ratio:0.49763307892957204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 -DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.10875272750854492 s -INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11083698272705078 s -DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=200651820041839026377652842415654156519, time:1750767754.1750822s req_ids:[8] -DEBUG 06-24 20:22:34 [manager.py:391] -ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:204.99300956726074ms total_cost_time:205.03640174865723ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10352 prompt_cache_len:5151 prompt_cache_ratio:0.49758500772797526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 -DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.10944247245788574 s -INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11207199096679688 s -DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=241917812701794860302151998051664334017, time:1750767754.386984s req_ids:[8] -DEBUG 06-24 20:22:34 [manager.py:391] -ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:211.17615699768066ms total_cost_time:211.22074127197266ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10353 prompt_cache_len:5151 prompt_cache_ratio:0.4975369458128079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 -DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.1088106632232666 s -INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11084604263305664 s -DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=191089809990868524168833930790470483477, time:1750767754.605447s req_ids:[8] -DEBUG 06-24 20:22:34 [manager.py:391] -ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:207.98468589782715ms total_cost_time:208.02783966064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10354 prompt_cache_len:5151 prompt_cache_ratio:0.4974888931813792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 -DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:34 [manager.py:224] router recive req id 8 cost time 0.10904359817504883 s -INFO 06-24 20:22:34 [manager.py:68] detokenization recv req id 8 cost time 0.11125016212463379 s -DEBUG 06-24 20:22:34 [manager.py:391] Prefill Batch: batch_id=269989263421000906117485194464522506677, time:1750767754.8309176s req_ids:[8] -DEBUG 06-24 20:22:34 [manager.py:391] -ERROR 06-24 20:22:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:223.8941192626953ms total_cost_time:223.94108772277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:10355 prompt_cache_len:5151 prompt_cache_ratio:0.4974408498309995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 -DEBUG 06-24 20:22:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s -INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s -DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=114931424319703533334877654731112586586, time:1750767755.0504596s req_ids:[8] -DEBUG 06-24 20:22:35 [manager.py:391] -ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:34 lightllm_req_id:8 first_token_cost:209.9628448486328ms total_cost_time:210.0057601928711ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10356 prompt_cache_len:5151 prompt_cache_ratio:0.4973928157589803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 -DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10949039459228516 s -INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.11225295066833496 s -DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=194020831668532145025468100928642168278, time:1750767755.2651596s req_ids:[8] -DEBUG 06-24 20:22:35 [manager.py:391] -ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:211.50922775268555ms total_cost_time:211.55428886413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10357 prompt_cache_len:5151 prompt_cache_ratio:0.49734479096263395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 -DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10762667655944824 s -INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.1095590591430664 s -DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=260435290179522882675273827721914900035, time:1750767755.4812856s req_ids:[8] -DEBUG 06-24 20:22:35 [manager.py:391] -ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:165.33350944519043ms total_cost_time:165.3749942779541ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10358 prompt_cache_len:5151 prompt_cache_ratio:0.497296775439274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 -DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:35 [manager.py:224] router recive req id 8 cost time 0.10807394981384277 s -INFO 06-24 20:22:35 [manager.py:68] detokenization recv req id 8 cost time 0.1098470687866211 s -DEBUG 06-24 20:22:35 [manager.py:391] Prefill Batch: batch_id=152532607541201060370658523858501765498, time:1750767755.6515634s req_ids:[8] -DEBUG 06-24 20:22:35 [manager.py:391] -ERROR 06-24 20:22:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:201.22003555297852ms total_cost_time:201.2653350830078ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10359 prompt_cache_len:5151 prompt_cache_ratio:0.4972487691862149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 -DEBUG 06-24 20:22:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.3097038269042969 s -INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.31162428855895996 s -DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=49033115129791760927870708781634549489, time:1750767756.0620825s req_ids:[8] -DEBUG 06-24 20:22:36 [manager.py:391] -ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:35 lightllm_req_id:8 first_token_cost:414.32905197143555ms total_cost_time:414.37411308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10360 prompt_cache_len:5151 prompt_cache_ratio:0.4972007722007722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 -DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10836315155029297 s -INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s -DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=63540603265704405844453718881465339435, time:1750767756.2801273s req_ids:[8] -DEBUG 06-24 20:22:36 [manager.py:391] -ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:207.6432704925537ms total_cost_time:207.7028751373291ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:10361 prompt_cache_len:5151 prompt_cache_ratio:0.4971527844802625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 -DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10796499252319336 s -INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.10999035835266113 s -DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=309673494027345793833177531987089616344, time:1750767756.4945261s req_ids:[8] -DEBUG 06-24 20:22:36 [manager.py:391] -ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:212.43619918823242ms total_cost_time:212.4803066253662ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10362 prompt_cache_len:5151 prompt_cache_ratio:0.49710480602200346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 -DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10894179344177246 s -INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.11113762855529785 s -DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=238694357498597557865724648557873600935, time:1750767756.7107904s req_ids:[8] -DEBUG 06-24 20:22:36 [manager.py:391] -ERROR 06-24 20:22:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:205.33370971679688ms total_cost_time:205.37900924682617ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10363 prompt_cache_len:5151 prompt_cache_ratio:0.4970568368233137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 -DEBUG 06-24 20:22:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:36 [manager.py:224] router recive req id 8 cost time 0.10792994499206543 s -INFO 06-24 20:22:36 [manager.py:68] detokenization recv req id 8 cost time 0.10984635353088379 s -DEBUG 06-24 20:22:36 [manager.py:391] Prefill Batch: batch_id=135707565040266280925665906191334458973, time:1750767756.9236083s req_ids:[8] -DEBUG 06-24 20:22:36 [manager.py:391] -ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:36 lightllm_req_id:8 first_token_cost:208.11843872070312ms total_cost_time:208.16326141357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10364 prompt_cache_len:5151 prompt_cache_ratio:0.4970088768815129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 -DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.1072397232055664 s -INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.10914826393127441 s -DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=131440745439312728152478431304230977580, time:1750767757.1364512s req_ids:[8] -DEBUG 06-24 20:22:37 [manager.py:391] -ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:162.61744499206543ms total_cost_time:162.6596450805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10365 prompt_cache_len:5151 prompt_cache_ratio:0.49696092619392185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 -DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.1081247329711914 s -INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s -DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=189343553643351174697631666878034050680, time:1750767757.3060403s req_ids:[8] -DEBUG 06-24 20:22:37 [manager.py:391] -ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:198.81868362426758ms total_cost_time:198.87638092041016ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:10366 prompt_cache_len:5151 prompt_cache_ratio:0.4969129847578622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 -DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.10904526710510254 s -INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.11103439331054688 s -DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=287566056518895171573620605533759385613, time:1750767757.5114832s req_ids:[8] -DEBUG 06-24 20:22:37 [manager.py:391] -ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:369.98820304870605ms total_cost_time:370.03135681152344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10367 prompt_cache_len:5151 prompt_cache_ratio:0.4968650525706569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 -DEBUG 06-24 20:22:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:37 [manager.py:224] router recive req id 8 cost time 0.10825991630554199 s -INFO 06-24 20:22:37 [manager.py:68] detokenization recv req id 8 cost time 0.11045241355895996 s -DEBUG 06-24 20:22:37 [manager.py:391] Prefill Batch: batch_id=191381530966494656238064068975601008873, time:1750767757.886067s req_ids:[8] -DEBUG 06-24 20:22:37 [manager.py:391] -ERROR 06-24 20:22:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:207.43513107299805ms total_cost_time:207.47756958007812ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10368 prompt_cache_len:5151 prompt_cache_ratio:0.49681712962962965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 -DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s -INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s -DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=94221755782992239755155540692380086222, time:1750767758.1010854s req_ids:[8] -DEBUG 06-24 20:22:38 [manager.py:391] -ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:37 lightllm_req_id:8 first_token_cost:208.25457572937012ms total_cost_time:208.2967758178711ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10369 prompt_cache_len:5151 prompt_cache_ratio:0.4967692159321053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 -DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10841250419616699 s -INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.11022281646728516 s -DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=111717341550881893506543347610036031397, time:1750767758.3156047s req_ids:[8] -DEBUG 06-24 20:22:38 [manager.py:391] -ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:208.28866958618164ms total_cost_time:208.33277702331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10370 prompt_cache_len:5151 prompt_cache_ratio:0.4967213114754098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 -DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10834074020385742 s -INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s -DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=236470228113403493009977736800627042858, time:1750767758.5294502s req_ids:[8] -DEBUG 06-24 20:22:38 [manager.py:391] -ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:207.10062980651855ms total_cost_time:207.14402198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10371 prompt_cache_len:5151 prompt_cache_ratio:0.4966734162568701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 -DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.10852861404418945 s -INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.11051154136657715 s -DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=105076126124137557891165488676981637224, time:1750767758.749048s req_ids:[8] -DEBUG 06-24 20:22:38 [manager.py:391] -ERROR 06-24 20:22:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:215.99364280700684ms total_cost_time:216.01390838623047ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:10372 prompt_cache_len:5151 prompt_cache_ratio:0.4966255302738141 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 -DEBUG 06-24 20:22:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:38 [manager.py:224] router recive req id 8 cost time 0.1067345142364502 s -INFO 06-24 20:22:38 [manager.py:68] detokenization recv req id 8 cost time 0.10840392112731934 s -DEBUG 06-24 20:22:38 [manager.py:391] Prefill Batch: batch_id=51983285751805140217907341864661589055, time:1750767758.966502s req_ids:[8] -DEBUG 06-24 20:22:38 [manager.py:391] -ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:38 lightllm_req_id:8 first_token_cost:200.08373260498047ms total_cost_time:200.12593269348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10373 prompt_cache_len:5151 prompt_cache_ratio:0.4965776535235708 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 -DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10847210884094238 s -INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.11029911041259766 s -DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=299417465918477213483861331001562030897, time:1750767759.1686184s req_ids:[8] -DEBUG 06-24 20:22:39 [manager.py:391] -ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:361.99307441711426ms total_cost_time:362.03765869140625ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10374 prompt_cache_len:5151 prompt_cache_ratio:0.4965297860034702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 -DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:39 [batch.py:51] router release req id 8 -INFO 06-24 20:22:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10876011848449707 s -INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.11058354377746582 s -DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=51417506593072752060489619094449772311, time:1750767759.5350707s req_ids:[8] -DEBUG 06-24 20:22:39 [manager.py:391] -DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:209.92660522460938ms total_cost_time:209.97023582458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10375 prompt_cache_len:5151 prompt_cache_ratio:0.4964819277108434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 -DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10879659652709961 s -INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.11060690879821777 s -DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=165496056998650210461027873656268197863, time:1750767759.7523823s req_ids:[8] -DEBUG 06-24 20:22:39 [manager.py:391] -ERROR 06-24 20:22:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:207.3063850402832ms total_cost_time:207.3495388031006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10376 prompt_cache_len:5151 prompt_cache_ratio:0.49643407864302236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 -DEBUG 06-24 20:22:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:39 [manager.py:224] router recive req id 8 cost time 0.10758614540100098 s -INFO 06-24 20:22:39 [manager.py:68] detokenization recv req id 8 cost time 0.10932064056396484 s -DEBUG 06-24 20:22:39 [manager.py:391] Prefill Batch: batch_id=251603891124680208239392832697422523312, time:1750767759.964518s req_ids:[8] -DEBUG 06-24 20:22:39 [manager.py:391] -ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:39 lightllm_req_id:8 first_token_cost:204.54072952270508ms total_cost_time:204.58340644836426ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10377 prompt_cache_len:5151 prompt_cache_ratio:0.4963862387973403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 -DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10840654373168945 s -INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.1102149486541748 s -DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=318423228381345826766951926946577405489, time:1750767760.1759562s req_ids:[8] -DEBUG 06-24 20:22:40 [manager.py:391] -ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:203.6149501800537ms total_cost_time:203.6592960357666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10378 prompt_cache_len:5151 prompt_cache_ratio:0.49633840817113123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 -DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10730433464050293 s -INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.10903501510620117 s -DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=99354618613098526477662078356212708650, time:1750767760.3848908s req_ids:[8] -DEBUG 06-24 20:22:40 [manager.py:391] -ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:202.0254135131836ms total_cost_time:202.0885944366455ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:10379 prompt_cache_len:5151 prompt_cache_ratio:0.49629058676173043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 -DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s -INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.10895085334777832 s -DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=247339359758741247886879341973693801372, time:1750767760.5972955s req_ids:[8] -DEBUG 06-24 20:22:40 [manager.py:391] -ERROR 06-24 20:22:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:209.14554595947266ms total_cost_time:209.19013023376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10380 prompt_cache_len:5151 prompt_cache_ratio:0.496242774566474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 -DEBUG 06-24 20:22:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:40 [manager.py:224] router recive req id 8 cost time 0.10872912406921387 s -INFO 06-24 20:22:40 [manager.py:68] detokenization recv req id 8 cost time 0.1104426383972168 s -DEBUG 06-24 20:22:40 [manager.py:391] Prefill Batch: batch_id=297604454597966095426785159653358283907, time:1750767760.8089724s req_ids:[8] -DEBUG 06-24 20:22:40 [manager.py:391] -ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:40 lightllm_req_id:8 first_token_cost:341.8080806732178ms total_cost_time:341.85171127319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10381 prompt_cache_len:5151 prompt_cache_ratio:0.49619497158269915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 -DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10809707641601562 s -INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11005353927612305 s -DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=310143746084296752404464710043421217265, time:1750767761.1567204s req_ids:[8] -DEBUG 06-24 20:22:41 [manager.py:391] -ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:203.41253280639648ms total_cost_time:203.45401763916016ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10382 prompt_cache_len:5151 prompt_cache_ratio:0.4961471778077442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 -DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10950016975402832 s -INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11162972450256348 s -DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=235449628526656197694447854520887458303, time:1750767761.368506s req_ids:[8] -DEBUG 06-24 20:22:41 [manager.py:391] -ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:207.169771194458ms total_cost_time:207.2138786315918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10383 prompt_cache_len:5151 prompt_cache_ratio:0.4960993932389483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 -DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10831284523010254 s -INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11029410362243652 s -DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=159722831330546396644859523699728712487, time:1750767761.5808394s req_ids:[8] -DEBUG 06-24 20:22:41 [manager.py:391] -ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:203.9780616760254ms total_cost_time:204.02264595031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10384 prompt_cache_len:5151 prompt_cache_ratio:0.49605161787365176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 -DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:41 [manager.py:224] router recive req id 8 cost time 0.10920047760009766 s -INFO 06-24 20:22:41 [manager.py:68] detokenization recv req id 8 cost time 0.11128544807434082 s -DEBUG 06-24 20:22:41 [manager.py:391] Prefill Batch: batch_id=240584616464094947768473992828521159306, time:1750767761.8018107s req_ids:[8] -DEBUG 06-24 20:22:41 [manager.py:391] -DEBUG 06-24 20:22:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 44388.222 tokens/s -DEBUG 06-24 20:22:41 [stats.py:37] Avg prompt tokens throughput: 44379.658 tokens/s -DEBUG 06-24 20:22:41 [stats.py:37] Avg generate tokens throughput: 8.564 tokens/s -ERROR 06-24 20:22:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:223.8442897796631ms total_cost_time:223.88815879821777ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10385 prompt_cache_len:5151 prompt_cache_ratio:0.496003851709196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 -DEBUG 06-24 20:22:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10802721977233887 s -INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.10994887351989746 s -DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=239061471339755844093469635866648094250, time:1750767762.0226786s req_ids:[8] -DEBUG 06-24 20:22:42 [manager.py:391] -ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:41 lightllm_req_id:8 first_token_cost:212.03994750976562ms total_cost_time:212.083101272583ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10386 prompt_cache_len:5151 prompt_cache_ratio:0.4959560947429232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 -DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10802197456359863 s -INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.10994195938110352 s -DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=87398471436767039242142754522288795150, time:1750767762.2398617s req_ids:[8] -DEBUG 06-24 20:22:42 [manager.py:391] -ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:209.15794372558594ms total_cost_time:209.20276641845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10387 prompt_cache_len:5151 prompt_cache_ratio:0.4959083469721768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 -DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10837531089782715 s -INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.1105353832244873 s -DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=251323818111197751307017457251095458666, time:1750767762.4553828s req_ids:[8] -DEBUG 06-24 20:22:42 [manager.py:391] -ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:208.8305950164795ms total_cost_time:208.87422561645508ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10388 prompt_cache_len:5151 prompt_cache_ratio:0.49586060839430113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 -DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:42 [manager.py:224] router recive req id 8 cost time 0.10580992698669434 s -INFO 06-24 20:22:42 [manager.py:68] detokenization recv req id 8 cost time 0.10754537582397461 s -DEBUG 06-24 20:22:42 [manager.py:391] Prefill Batch: batch_id=146176246297469748188130174631754789957, time:1750767762.6693764s req_ids:[8] -DEBUG 06-24 20:22:42 [manager.py:391] -ERROR 06-24 20:22:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:366.7149543762207ms total_cost_time:366.7581081390381ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10389 prompt_cache_len:5151 prompt_cache_ratio:0.4958128790066416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 -DEBUG 06-24 20:22:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:42 [batch.py:51] router release req id 8 -INFO 06-24 20:22:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.1089928150177002 s -INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.11114263534545898 s -DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=210261252165401293905842155245538837465, time:1750767763.0417302s req_ids:[8] -DEBUG 06-24 20:22:43 [manager.py:391] -ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:42 lightllm_req_id:8 first_token_cost:202.30650901794434ms total_cost_time:202.35061645507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10390 prompt_cache_len:5151 prompt_cache_ratio:0.49576515880654476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 -DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.10835862159729004 s -INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.11038422584533691 s -DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=5460438190186362292226597957233559975, time:1750767763.2514791s req_ids:[8] -DEBUG 06-24 20:22:43 [manager.py:391] -ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:207.24773406982422ms total_cost_time:207.291841506958ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10391 prompt_cache_len:5151 prompt_cache_ratio:0.4957174477913579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 -DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.10724186897277832 s -INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.10911369323730469 s -DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=178057043617394164313002345635573152615, time:1750767763.4637008s req_ids:[8] -DEBUG 06-24 20:22:43 [manager.py:391] -ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:165.50803184509277ms total_cost_time:165.55047035217285ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10392 prompt_cache_len:5151 prompt_cache_ratio:0.49566974595842955 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 -DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.1086428165435791 s -INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.11076068878173828 s -DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=83634940458071114933573786257754091595, time:1750767763.6395838s req_ids:[8] -DEBUG 06-24 20:22:43 [manager.py:391] -ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:207.47017860412598ms total_cost_time:207.51285552978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10393 prompt_cache_len:5151 prompt_cache_ratio:0.4956220533051092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 -DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:43 [manager.py:224] router recive req id 8 cost time 0.10850214958190918 s -INFO 06-24 20:22:43 [manager.py:68] detokenization recv req id 8 cost time 0.1104896068572998 s -DEBUG 06-24 20:22:43 [manager.py:391] Prefill Batch: batch_id=20481680301060264165034861047194207514, time:1750767763.8498523s req_ids:[8] -DEBUG 06-24 20:22:43 [manager.py:391] -ERROR 06-24 20:22:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:208.95862579345703ms total_cost_time:209.00249481201172ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10394 prompt_cache_len:5151 prompt_cache_ratio:0.49557436982874736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 -DEBUG 06-24 20:22:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10790872573852539 s -INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10985422134399414 s -DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=323595257289467470771758082766740219426, time:1750767764.0672977s req_ids:[8] -DEBUG 06-24 20:22:44 [manager.py:391] -ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:43 lightllm_req_id:8 first_token_cost:212.61906623840332ms total_cost_time:212.66436576843262ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10395 prompt_cache_len:5151 prompt_cache_ratio:0.49552669552669554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 -DEBUG 06-24 20:22:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10738778114318848 s -INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10942912101745605 s -DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=59156255639356315159514022224525904432, time:1750767764.2822669s req_ids:[8] -DEBUG 06-24 20:22:44 [manager.py:391] -ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:209.20562744140625ms total_cost_time:209.24925804138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10396 prompt_cache_len:5151 prompt_cache_ratio:0.49547903039630625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 -DEBUG 06-24 20:22:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10748815536499023 s -INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10939240455627441 s -DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=69624064984972370827450359108516402773, time:1750767764.5028427s req_ids:[8] -DEBUG 06-24 20:22:44 [manager.py:391] -ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:374.79686737060547ms total_cost_time:374.83739852905273ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10397 prompt_cache_len:5151 prompt_cache_ratio:0.4954313744349332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 -DEBUG 06-24 20:22:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:44 [manager.py:224] router recive req id 8 cost time 0.10746407508850098 s -INFO 06-24 20:22:44 [manager.py:68] detokenization recv req id 8 cost time 0.10932278633117676 s -DEBUG 06-24 20:22:44 [manager.py:391] Prefill Batch: batch_id=186889013300742734525179701844894886664, time:1750767764.876438s req_ids:[8] -DEBUG 06-24 20:22:44 [manager.py:391] -ERROR 06-24 20:22:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:207.03625679016113ms total_cost_time:207.09466934204102ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:10398 prompt_cache_len:5151 prompt_cache_ratio:0.49538372763993077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 -DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.10779929161071777 s -INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.1097571849822998 s -DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=12909657544039380466092619614552745530, time:1750767765.0898335s req_ids:[8] -DEBUG 06-24 20:22:45 [manager.py:391] -ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:44 lightllm_req_id:8 first_token_cost:204.970121383667ms total_cost_time:205.01327514648438ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10399 prompt_cache_len:5151 prompt_cache_ratio:0.4953360900086547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 -DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.10857057571411133 s -INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.11060047149658203 s -DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=318409688853941498638946132176251857335, time:1750767765.3006153s req_ids:[8] -DEBUG 06-24 20:22:45 [manager.py:391] -DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:201.43651962280273ms total_cost_time:201.47967338562012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10400 prompt_cache_len:5151 prompt_cache_ratio:0.4952884615384615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 -DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.1087179183959961 s -INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.11074113845825195 s -DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=228926717532228007552077052775030216516, time:1750767765.5090423s req_ids:[8] -DEBUG 06-24 20:22:45 [manager.py:391] -ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:205.7168483734131ms total_cost_time:205.76000213623047ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10401 prompt_cache_len:5151 prompt_cache_ratio:0.49524084222670894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 -DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.1083977222442627 s -INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.1103525161743164 s -DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=328429048975347538906994701743098777503, time:1750767765.7182033s req_ids:[8] -DEBUG 06-24 20:22:45 [manager.py:391] -ERROR 06-24 20:22:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:201.87711715698242ms total_cost_time:201.9200325012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10402 prompt_cache_len:5151 prompt_cache_ratio:0.4951932320707556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 -DEBUG 06-24 20:22:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:45 [manager.py:224] router recive req id 8 cost time 0.10844659805297852 s -INFO 06-24 20:22:45 [manager.py:68] detokenization recv req id 8 cost time 0.11043190956115723 s -DEBUG 06-24 20:22:45 [manager.py:391] Prefill Batch: batch_id=155443869086096911093776483229810947857, time:1750767765.9292936s req_ids:[8] -DEBUG 06-24 20:22:45 [manager.py:391] -ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:45 lightllm_req_id:8 first_token_cost:208.1735134124756ms total_cost_time:208.21762084960938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10403 prompt_cache_len:5151 prompt_cache_ratio:0.49514563106796117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 -DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10894131660461426 s -INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.11094856262207031 s -DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=264829523068263391088666724062988282778, time:1750767766.1422453s req_ids:[8] -DEBUG 06-24 20:22:46 [manager.py:391] -ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:206.67457580566406ms total_cost_time:206.72035217285156ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10404 prompt_cache_len:5151 prompt_cache_ratio:0.4950980392156863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 -DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10773897171020508 s -INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.10971426963806152 s -DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=124257638709429001144386082009509276255, time:1750767766.3552089s req_ids:[8] -DEBUG 06-24 20:22:46 [manager.py:391] -ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:208.1003189086914ms total_cost_time:208.1460952758789ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10405 prompt_cache_len:5151 prompt_cache_ratio:0.49505045651129265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 -DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:46 [batch.py:51] router release req id 8 -INFO 06-24 20:22:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10867857933044434 s -INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s -DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=116995072487734720922352423795703241754, time:1750767766.567579s req_ids:[8] -DEBUG 06-24 20:22:46 [manager.py:391] -ERROR 06-24 20:22:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:362.1079921722412ms total_cost_time:362.1535301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10406 prompt_cache_len:5151 prompt_cache_ratio:0.495002882952143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 -DEBUG 06-24 20:22:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:46 [manager.py:224] router recive req id 8 cost time 0.10918331146240234 s -INFO 06-24 20:22:46 [manager.py:68] detokenization recv req id 8 cost time 0.11118388175964355 s -DEBUG 06-24 20:22:46 [manager.py:391] Prefill Batch: batch_id=88819206029422537532015685153907971491, time:1750767766.936467s req_ids:[8] -DEBUG 06-24 20:22:46 [manager.py:391] -INFO 06-24 20:22:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:46 lightllm_req_id:8 first_token_cost:209.71131324768066ms total_cost_time:209.75542068481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10407 prompt_cache_len:5151 prompt_cache_ratio:0.49495531853560104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 -DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10837197303771973 s -INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.11031365394592285 s -DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=16271885427411925168492342617434444384, time:1750767767.151465s req_ids:[8] -DEBUG 06-24 20:22:47 [manager.py:391] -ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:210.70241928100586ms total_cost_time:210.74533462524414ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10408 prompt_cache_len:5151 prompt_cache_ratio:0.4949077632590315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 -DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10803365707397461 s -INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994601249694824 s -DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=38822038359984714596728068735187240472, time:1750767767.3651364s req_ids:[8] -DEBUG 06-24 20:22:47 [manager.py:391] -ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:206.21204376220703ms total_cost_time:206.2551975250244ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10409 prompt_cache_len:5151 prompt_cache_ratio:0.49486021711980016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 -DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.1075751781463623 s -INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.1095895767211914 s -DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=280812206711169103902733729860267969838, time:1750767767.5762281s req_ids:[8] -DEBUG 06-24 20:22:47 [manager.py:391] -ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:209.8076343536377ms total_cost_time:209.85150337219238ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10410 prompt_cache_len:5151 prompt_cache_ratio:0.4948126801152738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 -DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10891914367675781 s -INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.11096787452697754 s -DEBUG 06-24 20:22:47 [manager.py:391] Prefill Batch: batch_id=147385449822408347189565731094173806334, time:1750767767.790348s req_ids:[8] -DEBUG 06-24 20:22:47 [manager.py:391] -ERROR 06-24 20:22:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:209.98191833496094ms total_cost_time:210.02578735351562ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10411 prompt_cache_len:5151 prompt_cache_ratio:0.4947651522428201 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 -DEBUG 06-24 20:22:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:47 [manager.py:224] router recive req id 8 cost time 0.10765433311462402 s -INFO 06-24 20:22:47 [manager.py:68] detokenization recv req id 8 cost time 0.10961556434631348 s -DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=227370528430795809304782549255296297747, time:1750767768.0033023s req_ids:[8] -DEBUG 06-24 20:22:48 [manager.py:391] -ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:47 lightllm_req_id:8 first_token_cost:209.75732803344727ms total_cost_time:209.80191230773926ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10412 prompt_cache_len:5151 prompt_cache_ratio:0.4947176334998079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 -DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:48 [manager.py:224] router recive req id 8 cost time 0.10846781730651855 s -INFO 06-24 20:22:48 [manager.py:68] detokenization recv req id 8 cost time 0.11049675941467285 s -DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=240829012355463897240808543394434970127, time:1750767768.2195964s req_ids:[8] -DEBUG 06-24 20:22:48 [manager.py:391] -ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:391.1724090576172ms total_cost_time:391.2177085876465ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10413 prompt_cache_len:5151 prompt_cache_ratio:0.49467012388360704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 -DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:48 [manager.py:224] router recive req id 8 cost time 0.10822486877441406 s -INFO 06-24 20:22:48 [manager.py:68] detokenization recv req id 8 cost time 0.1102895736694336 s -DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=71830897086584306879759081330078242774, time:1750767768.6108136s req_ids:[8] -DEBUG 06-24 20:22:48 [manager.py:391] -ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:206.02869987487793ms total_cost_time:206.07280731201172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10414 prompt_cache_len:5151 prompt_cache_ratio:0.49462262339158825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 -DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:48 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s -INFO 06-24 20:22:48 [manager.py:68] detokenization recv req id 8 cost time 0.109466552734375 s -DEBUG 06-24 20:22:48 [manager.py:391] Prefill Batch: batch_id=246987984134468666153071729283832652315, time:1750767768.823078s req_ids:[8] -DEBUG 06-24 20:22:48 [manager.py:391] -ERROR 06-24 20:22:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:206.23159408569336ms total_cost_time:206.27403259277344ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10415 prompt_cache_len:5151 prompt_cache_ratio:0.49457513202112335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 -DEBUG 06-24 20:22:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s -INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.1107175350189209 s -DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=180186609349216854940815857423545582389, time:1750767769.036628s req_ids:[8] -DEBUG 06-24 20:22:49 [manager.py:391] -ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:48 lightllm_req_id:8 first_token_cost:209.7158432006836ms total_cost_time:209.75852012634277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10416 prompt_cache_len:5151 prompt_cache_ratio:0.49452764976958524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 -DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10825872421264648 s -INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.11025285720825195 s -DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=243503898184221356737906925237216812246, time:1750767769.2507477s req_ids:[8] -DEBUG 06-24 20:22:49 [manager.py:391] -ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:205.16300201416016ms total_cost_time:205.20710945129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10417 prompt_cache_len:5151 prompt_cache_ratio:0.4944801766343477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 -DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10890388488769531 s -INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.11065888404846191 s -DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=249625965274669926640242695938844330257, time:1750767769.4613028s req_ids:[8] -DEBUG 06-24 20:22:49 [manager.py:391] -ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:165.9567356109619ms total_cost_time:166.05496406555176ms,out_token_counter:1 mean_per_token_cost_time: 0.09822845458984375ms prompt_token_num:10418 prompt_cache_len:5151 prompt_cache_ratio:0.49443271261278554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 -DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10818600654602051 s -INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.11005735397338867 s -DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=148852134762508122417736352467842303500, time:1750767769.6321716s req_ids:[8] -DEBUG 06-24 20:22:49 [manager.py:391] -ERROR 06-24 20:22:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:196.43425941467285ms total_cost_time:196.47884368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10419 prompt_cache_len:5151 prompt_cache_ratio:0.4943852577022747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 -DEBUG 06-24 20:22:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:49 [manager.py:224] router recive req id 8 cost time 0.10740923881530762 s -INFO 06-24 20:22:49 [manager.py:68] detokenization recv req id 8 cost time 0.1093144416809082 s -DEBUG 06-24 20:22:49 [manager.py:391] Prefill Batch: batch_id=327703986457878781049287798450116800348, time:1750767769.8367155s req_ids:[8] -DEBUG 06-24 20:22:49 [manager.py:391] -ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:49 lightllm_req_id:8 first_token_cost:372.74765968322754ms total_cost_time:372.7917671203613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10420 prompt_cache_len:5151 prompt_cache_ratio:0.49433781190019194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 -DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.1084444522857666 s -INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.11031150817871094 s -DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=139079399488545447464800389498791243400, time:1750767770.2134678s req_ids:[8] -DEBUG 06-24 20:22:50 [manager.py:391] -ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:205.14583587646484ms total_cost_time:205.18755912780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10421 prompt_cache_len:5151 prompt_cache_ratio:0.49429037520391517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 -DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.10881662368774414 s -INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.11083126068115234 s -DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=86973358106598838811594499086279411004, time:1750767770.4241383s req_ids:[8] -DEBUG 06-24 20:22:50 [manager.py:391] -ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:204.68688011169434ms total_cost_time:204.73027229309082ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10422 prompt_cache_len:5151 prompt_cache_ratio:0.49424294761082327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 -DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.1076819896697998 s -INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s -DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=139958710593008038286319395373188238541, time:1750767770.6410074s req_ids:[8] -DEBUG 06-24 20:22:50 [manager.py:391] -ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:213.2716178894043ms total_cost_time:213.31405639648438ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10423 prompt_cache_len:5151 prompt_cache_ratio:0.49419552911829606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 -DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:50 [manager.py:224] router recive req id 8 cost time 0.10920190811157227 s -INFO 06-24 20:22:50 [manager.py:68] detokenization recv req id 8 cost time 0.11125731468200684 s -DEBUG 06-24 20:22:50 [manager.py:391] Prefill Batch: batch_id=145042264192795053439458397349048134922, time:1750767770.8538065s req_ids:[8] -DEBUG 06-24 20:22:50 [manager.py:391] -ERROR 06-24 20:22:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:207.7949047088623ms total_cost_time:207.8378200531006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10424 prompt_cache_len:5151 prompt_cache_ratio:0.49414811972371453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 -DEBUG 06-24 20:22:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.10817646980285645 s -INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.11019134521484375 s -DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=202766691988266413196423251581896815749, time:1750767771.0682747s req_ids:[8] -DEBUG 06-24 20:22:51 [manager.py:391] -DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:50 lightllm_req_id:8 first_token_cost:207.9489231109619ms total_cost_time:207.9932689666748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10425 prompt_cache_len:5151 prompt_cache_ratio:0.49410071942446043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 -DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.10806536674499512 s -INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s -DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=146316257230826306477061750623240975556, time:1750767771.2797039s req_ids:[8] -DEBUG 06-24 20:22:51 [manager.py:391] -ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:206.41517639160156ms total_cost_time:206.45785331726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10426 prompt_cache_len:5151 prompt_cache_ratio:0.49405332821791675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 -DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.10801148414611816 s -INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.10994482040405273 s -DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=23624167420889058281137695709302306861, time:1750767771.4950533s req_ids:[8] -DEBUG 06-24 20:22:51 [manager.py:391] -ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:210.30545234680176ms total_cost_time:210.34717559814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10427 prompt_cache_len:5151 prompt_cache_ratio:0.49400594610146736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 -DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:51 [manager.py:224] router recive req id 8 cost time 0.20879268646240234 s -INFO 06-24 20:22:51 [manager.py:68] detokenization recv req id 8 cost time 0.2104027271270752 s -DEBUG 06-24 20:22:51 [manager.py:391] Prefill Batch: batch_id=99879166003109065707071330531033644487, time:1750767771.8404758s req_ids:[8] -DEBUG 06-24 20:22:51 [manager.py:391] -DEBUG 06-24 20:22:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 44587.991 tokens/s -DEBUG 06-24 20:22:51 [stats.py:37] Avg prompt tokens throughput: 44579.423 tokens/s -DEBUG 06-24 20:22:51 [stats.py:37] Avg generate tokens throughput: 8.567 tokens/s -ERROR 06-24 20:22:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:295.8862781524658ms total_cost_time:295.9272861480713ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10428 prompt_cache_len:5151 prompt_cache_ratio:0.49395857307249713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 -DEBUG 06-24 20:22:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10832405090332031 s -INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.11017608642578125 s -DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=315385173719523273442675599510100875527, time:1750767772.0097556s req_ids:[8] -DEBUG 06-24 20:22:52 [manager.py:391] -ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:51 lightllm_req_id:8 first_token_cost:194.7479248046875ms total_cost_time:194.793701171875ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10429 prompt_cache_len:5151 prompt_cache_ratio:0.493911209128392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 -DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10792136192321777 s -INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.10981607437133789 s -DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=336860438054043947475534170342487314642, time:1750767772.2100503s req_ids:[8] -DEBUG 06-24 20:22:52 [manager.py:391] -ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:203.6590576171875ms total_cost_time:203.7029266357422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10430 prompt_cache_len:5151 prompt_cache_ratio:0.49386385426653884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 -DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10780143737792969 s -INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.1096503734588623 s -DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=101515308155050829823066307878377886044, time:1750767772.420043s req_ids:[8] -DEBUG 06-24 20:22:52 [manager.py:391] -ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:203.74059677124023ms total_cost_time:203.7830352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10431 prompt_cache_len:5151 prompt_cache_ratio:0.49381650848432557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 -DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10745477676391602 s -INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.10962343215942383 s -DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=272820416551857047257517459736564969014, time:1750767772.631151s req_ids:[8] -DEBUG 06-24 20:22:52 [manager.py:391] -ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:207.40818977355957ms total_cost_time:207.45277404785156ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10432 prompt_cache_len:5151 prompt_cache_ratio:0.4937691717791411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 -DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:52 [manager.py:224] router recive req id 8 cost time 0.10788512229919434 s -INFO 06-24 20:22:52 [manager.py:68] detokenization recv req id 8 cost time 0.10991239547729492 s -DEBUG 06-24 20:22:52 [manager.py:391] Prefill Batch: batch_id=248979808605345491094647678818473388570, time:1750767772.8438094s req_ids:[8] -DEBUG 06-24 20:22:52 [manager.py:391] -ERROR 06-24 20:22:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:206.8502902984619ms total_cost_time:206.8941593170166ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10433 prompt_cache_len:5151 prompt_cache_ratio:0.49372184414837533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 -DEBUG 06-24 20:22:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10816836357116699 s -INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.11022663116455078 s -DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=249465721101337701861493548868766225409, time:1750767773.0570183s req_ids:[8] -DEBUG 06-24 20:22:53 [manager.py:391] -ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:52 lightllm_req_id:8 first_token_cost:210.01029014587402ms total_cost_time:210.0527286529541ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10434 prompt_cache_len:5151 prompt_cache_ratio:0.4936745255894192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 -DEBUG 06-24 20:22:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:53 [batch.py:51] router release req id 8 -INFO 06-24 20:22:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:22:53 [statics_utils.py:24] mean first cost: 228.3881372238096 ms -INFO 06-24 20:22:53 [statics_utils.py:24] mean per token cost: 0.06765558074446179 ms -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10744404792785645 s -INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.10942673683166504 s -DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=318046112563954782749007296590387814937, time:1750767773.2735572s req_ids:[8] -DEBUG 06-24 20:22:53 [manager.py:391] -INFO 06-24 20:22:53 [manager.py:620] left req id 8can release False refcount 4 -ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:371.7174530029297ms total_cost_time:371.7615604400635ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10435 prompt_cache_len:5151 prompt_cache_ratio:0.4936272160996646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 -DEBUG 06-24 20:22:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:53 [batch.py:51] router release req id 8 -INFO 06-24 20:22:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10828995704650879 s -INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.11038398742675781 s -DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=220678945150700335371915804890000657273, time:1750767773.6516666s req_ids:[8] -DEBUG 06-24 20:22:53 [manager.py:391] -ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:213.12236785888672ms total_cost_time:213.18340301513672ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10436 prompt_cache_len:5151 prompt_cache_ratio:0.4935799156765044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 -DEBUG 06-24 20:22:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:53 [manager.py:224] router recive req id 8 cost time 0.10768389701843262 s -INFO 06-24 20:22:53 [manager.py:68] detokenization recv req id 8 cost time 0.1096353530883789 s -DEBUG 06-24 20:22:53 [manager.py:391] Prefill Batch: batch_id=59188974628554415539707732469254003781, time:1750767773.8818514s req_ids:[8] -DEBUG 06-24 20:22:53 [manager.py:391] -ERROR 06-24 20:22:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:227.38385200500488ms total_cost_time:227.42891311645508ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10437 prompt_cache_len:5151 prompt_cache_ratio:0.4935326243173326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 -DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10826635360717773 s -INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11025285720825195 s -DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=19325045056632669235205311910440370122, time:1750767774.101485s req_ids:[8] -DEBUG 06-24 20:22:54 [manager.py:391] -ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:53 lightllm_req_id:8 first_token_cost:209.08236503601074ms total_cost_time:209.12408828735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10438 prompt_cache_len:5151 prompt_cache_ratio:0.49348534201954397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 -DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10877180099487305 s -INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s -DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=88782648626342577758299477127353334893, time:1750767774.3165982s req_ids:[8] -DEBUG 06-24 20:22:54 [manager.py:391] -ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:207.92818069458008ms total_cost_time:207.97276496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10439 prompt_cache_len:5151 prompt_cache_ratio:0.49343806878053453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 -DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10912680625915527 s -INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11122798919677734 s -DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=272424104696004674183005754171202374518, time:1750767774.5278761s req_ids:[8] -DEBUG 06-24 20:22:54 [manager.py:391] -ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:209.9165916442871ms total_cost_time:209.9611759185791ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10440 prompt_cache_len:5151 prompt_cache_ratio:0.4933908045977011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 -DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10910916328430176 s -INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s -DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=128848024472553492963760173015557870097, time:1750767774.7420876s req_ids:[8] -DEBUG 06-24 20:22:54 [manager.py:391] -ERROR 06-24 20:22:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:213.31405639648438ms total_cost_time:213.35887908935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10441 prompt_cache_len:5151 prompt_cache_ratio:0.4933435494684417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 -DEBUG 06-24 20:22:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:54 [manager.py:224] router recive req id 8 cost time 0.10727071762084961 s -INFO 06-24 20:22:54 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s -DEBUG 06-24 20:22:54 [manager.py:391] Prefill Batch: batch_id=182947162379528098610882196974559594017, time:1750767774.9769313s req_ids:[8] -DEBUG 06-24 20:22:54 [manager.py:391] -ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:54 lightllm_req_id:8 first_token_cost:230.00144958496094ms total_cost_time:230.04531860351562ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10442 prompt_cache_len:5151 prompt_cache_ratio:0.4932963033901551 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 -DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10948300361633301 s -INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.1114342212677002 s -DEBUG 06-24 20:22:55 [manager.py:391] Prefill Batch: batch_id=98453330293958664740898350931813979507, time:1750767775.1945434s req_ids:[8] -DEBUG 06-24 20:22:55 [manager.py:391] -ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:376.0819435119629ms total_cost_time:376.1265277862549ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10443 prompt_cache_len:5151 prompt_cache_ratio:0.4932490663602413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 -DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10871076583862305 s -INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.11072254180908203 s -DEBUG 06-24 20:22:55 [manager.py:391] Prefill Batch: batch_id=184795237481759773573843833181978124666, time:1750767775.5734842s req_ids:[8] -DEBUG 06-24 20:22:55 [manager.py:391] -DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:22:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:210.1001739501953ms total_cost_time:210.14118194580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10444 prompt_cache_len:5151 prompt_cache_ratio:0.4932018383761011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 -DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10772371292114258 s -INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.10968661308288574 s -DEBUG 06-24 20:22:55 [manager.py:391] Prefill Batch: batch_id=2027963967725044653957012527503245660, time:1750767775.7895231s req_ids:[8] -DEBUG 06-24 20:22:55 [manager.py:391] -ERROR 06-24 20:22:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:212.24665641784668ms total_cost_time:212.29052543640137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10445 prompt_cache_len:5151 prompt_cache_ratio:0.49315461943513644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 -DEBUG 06-24 20:22:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:55 [manager.py:224] router recive req id 8 cost time 0.10800528526306152 s -INFO 06-24 20:22:55 [manager.py:68] detokenization recv req id 8 cost time 0.11001420021057129 s -DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=124382591521699313176766934786640915775, time:1750767776.0067759s req_ids:[8] -DEBUG 06-24 20:22:56 [manager.py:391] -ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:55 lightllm_req_id:8 first_token_cost:209.3179225921631ms total_cost_time:209.36083793640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10446 prompt_cache_len:5151 prompt_cache_ratio:0.49310740953475013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 -DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10764122009277344 s -INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.10956168174743652 s -DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=132535163713605869227513659293106035121, time:1750767776.218991s req_ids:[8] -DEBUG 06-24 20:22:56 [manager.py:391] -ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:210.22558212280273ms total_cost_time:210.2677822113037ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10447 prompt_cache_len:5151 prompt_cache_ratio:0.4930602086723461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 -DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10781359672546387 s -INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.1096954345703125 s -DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=324212774519960139960774985571884455850, time:1750767776.4352353s req_ids:[8] -DEBUG 06-24 20:22:56 [manager.py:391] -ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:211.86447143554688ms total_cost_time:211.90571784973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10448 prompt_cache_len:5151 prompt_cache_ratio:0.4930130168453293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 -DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10883021354675293 s -INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.11081051826477051 s -DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=30848355640080317684631723310004568333, time:1750767776.649874s req_ids:[8] -DEBUG 06-24 20:22:56 [manager.py:391] -ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:209.00630950927734ms total_cost_time:209.04803276062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10449 prompt_cache_len:5151 prompt_cache_ratio:0.4929658340511054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 -DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:56 [manager.py:224] router recive req id 8 cost time 0.10753369331359863 s -INFO 06-24 20:22:56 [manager.py:68] detokenization recv req id 8 cost time 0.10947465896606445 s -DEBUG 06-24 20:22:56 [manager.py:391] Prefill Batch: batch_id=335622299302438022868866674138545793902, time:1750767776.8647614s req_ids:[8] -DEBUG 06-24 20:22:56 [manager.py:391] -ERROR 06-24 20:22:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:211.23123168945312ms total_cost_time:211.2736701965332ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10450 prompt_cache_len:5151 prompt_cache_ratio:0.49291866028708137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 -DEBUG 06-24 20:22:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s -INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.1099545955657959 s -DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=166886130383756693301267210465919408749, time:1750767777.0787702s req_ids:[8] -DEBUG 06-24 20:22:57 [manager.py:391] -ERROR 06-24 20:22:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:22:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:56 lightllm_req_id:8 first_token_cost:211.31157875061035ms total_cost_time:211.35354042053223ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10451 prompt_cache_len:5151 prompt_cache_ratio:0.492871495550665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 -DEBUG 06-24 20:22:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.3103499412536621 s -INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.31239795684814453 s -DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=169021123394621979471360071303725217999, time:1750767777.5054705s req_ids:[8] -DEBUG 06-24 20:22:57 [manager.py:391] -ERROR 06-24 20:22:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 first_token_cost:428.2495975494385ms total_cost_time:428.29394340515137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10452 prompt_cache_len:5151 prompt_cache_ratio:0.4928243398392652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 -DEBUG 06-24 20:22:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.10771727561950684 s -INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.1097409725189209 s -DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=91810470405329715056891898998079490428, time:1750767777.727743s req_ids:[8] -DEBUG 06-24 20:22:57 [manager.py:391] -ERROR 06-24 20:22:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 first_token_cost:211.30919456481934ms total_cost_time:211.350679397583ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10453 prompt_cache_len:5151 prompt_cache_ratio:0.4927771931502918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 -DEBUG 06-24 20:22:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:57 [manager.py:224] router recive req id 8 cost time 0.10802078247070312 s -INFO 06-24 20:22:57 [manager.py:68] detokenization recv req id 8 cost time 0.10995340347290039 s -DEBUG 06-24 20:22:57 [manager.py:391] Prefill Batch: batch_id=5706802542527761967783740111423268254, time:1750767777.9442964s req_ids:[8] -DEBUG 06-24 20:22:57 [manager.py:391] -ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:57 lightllm_req_id:8 first_token_cost:212.18442916870117ms total_cost_time:212.22519874572754ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10454 prompt_cache_len:5151 prompt_cache_ratio:0.49273005548115556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 -DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10858607292175293 s -INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.11070036888122559 s -DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=260124197293030158320695005734089767279, time:1750767778.1595263s req_ids:[8] -DEBUG 06-24 20:22:58 [manager.py:391] -ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:207.72099494934082ms total_cost_time:207.7641487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10455 prompt_cache_len:5151 prompt_cache_ratio:0.4926829268292683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 -DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10747456550598145 s -INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.10947561264038086 s -DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=284506228455272881463741301845932095545, time:1750767778.3708541s req_ids:[8] -DEBUG 06-24 20:22:58 [manager.py:391] -ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:209.38491821289062ms total_cost_time:209.4266414642334ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10456 prompt_cache_len:5151 prompt_cache_ratio:0.49263580719204286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 -DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10828375816345215 s -INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.11039876937866211 s -DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=156171080608819117782932720389248611509, time:1750767778.5856686s req_ids:[8] -DEBUG 06-24 20:22:58 [manager.py:391] -ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:209.41948890686035ms total_cost_time:209.46145057678223ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10457 prompt_cache_len:5151 prompt_cache_ratio:0.49258869656689297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 -DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:58 [manager.py:224] router recive req id 8 cost time 0.10863661766052246 s -INFO 06-24 20:22:58 [manager.py:68] detokenization recv req id 8 cost time 0.11047744750976562 s -DEBUG 06-24 20:22:58 [manager.py:391] Prefill Batch: batch_id=269519166771300874000092730839186656302, time:1750767778.8052545s req_ids:[8] -DEBUG 06-24 20:22:58 [manager.py:391] -ERROR 06-24 20:22:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:216.19176864624023ms total_cost_time:216.23587608337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10458 prompt_cache_len:5151 prompt_cache_ratio:0.4925415949512335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 -DEBUG 06-24 20:22:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10763788223266602 s -INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.10963726043701172 s -DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=272580322249888308427890228088957698166, time:1750767779.0249705s req_ids:[8] -DEBUG 06-24 20:22:59 [manager.py:391] -ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:58 lightllm_req_id:8 first_token_cost:382.07101821899414ms total_cost_time:382.11727142333984ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10459 prompt_cache_len:5151 prompt_cache_ratio:0.49249450234248016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 -DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10936546325683594 s -INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.11151838302612305 s -DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=151809333693191699252600625198969621802, time:1750767779.4054992s req_ids:[8] -DEBUG 06-24 20:22:59 [manager.py:391] -ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:208.64295959472656ms total_cost_time:208.68921279907227ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10460 prompt_cache_len:5151 prompt_cache_ratio:0.4924474187380497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 -DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10772490501403809 s -INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.10988807678222656 s -DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=127315185224529253195029196074611261460, time:1750767779.6194077s req_ids:[8] -DEBUG 06-24 20:22:59 [manager.py:391] -ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:211.8544578552246ms total_cost_time:211.897611618042ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10461 prompt_cache_len:5151 prompt_cache_ratio:0.49240034413535994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 -DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:22:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:22:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:22:59 [manager.py:224] router recive req id 8 cost time 0.10922050476074219 s -INFO 06-24 20:22:59 [manager.py:68] detokenization recv req id 8 cost time 0.11114192008972168 s -DEBUG 06-24 20:22:59 [manager.py:391] Prefill Batch: batch_id=80138613385132679163632645306056136619, time:1750767779.8344874s req_ids:[8] -DEBUG 06-24 20:22:59 [manager.py:391] -ERROR 06-24 20:22:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:22:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:22:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:201.58672332763672ms total_cost_time:201.6289234161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10462 prompt_cache_len:5151 prompt_cache_ratio:0.4923532785318295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:22:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 -DEBUG 06-24 20:22:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:22:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:22:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:22:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:22:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10767722129821777 s -INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.1097257137298584 s -DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=10792118494910909596023623338620067069, time:1750767780.0423303s req_ids:[8] -DEBUG 06-24 20:23:00 [manager.py:391] -ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:22:59 lightllm_req_id:8 first_token_cost:212.89348602294922ms total_cost_time:212.9373550415039ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10463 prompt_cache_len:5151 prompt_cache_ratio:0.49230622192487816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 -DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10736083984375 s -INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.10946345329284668 s -DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=101626184689385828579756632635243552850, time:1750767780.2699656s req_ids:[8] -DEBUG 06-24 20:23:00 [manager.py:391] -ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:218.80555152893066ms total_cost_time:218.84846687316895ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10464 prompt_cache_len:5151 prompt_cache_ratio:0.4922591743119266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 -DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10758352279663086 s -INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.10966753959655762 s -DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=57605862771144922217086814013438927517, time:1750767780.4865959s req_ids:[8] -DEBUG 06-24 20:23:00 [manager.py:391] -ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:212.50510215759277ms total_cost_time:212.54968643188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10465 prompt_cache_len:5151 prompt_cache_ratio:0.49221213569039657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 -DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:00 [batch.py:51] router release req id 8 -INFO 06-24 20:23:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:00 [manager.py:224] router recive req id 8 cost time 0.10919952392578125 s -INFO 06-24 20:23:00 [manager.py:68] detokenization recv req id 8 cost time 0.11125564575195312 s -DEBUG 06-24 20:23:00 [manager.py:391] Prefill Batch: batch_id=41438589456498988321709129699360881836, time:1750767780.704985s req_ids:[8] -DEBUG 06-24 20:23:00 [manager.py:391] -ERROR 06-24 20:23:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:370.46194076538086ms total_cost_time:370.50509452819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10466 prompt_cache_len:5151 prompt_cache_ratio:0.4921651060577107 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 -DEBUG 06-24 20:23:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.10880327224731445 s -INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.11092638969421387 s -DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=41163294749033431170213538925187515621, time:1750767781.0783868s req_ids:[8] -DEBUG 06-24 20:23:01 [manager.py:391] -ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:00 lightllm_req_id:8 first_token_cost:206.45403861999512ms total_cost_time:206.4971923828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10467 prompt_cache_len:5151 prompt_cache_ratio:0.49211808541129265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 -DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.10809493064880371 s -INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.1103200912475586 s -DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=173632704893600197874911090120457634365, time:1750767781.2912803s req_ids:[8] -DEBUG 06-24 20:23:01 [manager.py:391] -ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:210.56842803955078ms total_cost_time:210.61420440673828ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10468 prompt_cache_len:5151 prompt_cache_ratio:0.49207107374856707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 -DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.1089630126953125 s -INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.11108183860778809 s -DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=215738003461033960067048973984508832112, time:1750767781.5060785s req_ids:[8] -DEBUG 06-24 20:23:01 [manager.py:391] -ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:205.47890663146973ms total_cost_time:205.5211067199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10469 prompt_cache_len:5151 prompt_cache_ratio:0.4920240710669596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 -DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.10775017738342285 s -INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.10981392860412598 s -DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=241713275119806770689790357155624972844, time:1750767781.7196922s req_ids:[8] -DEBUG 06-24 20:23:01 [manager.py:391] -ERROR 06-24 20:23:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:213.2251262664795ms total_cost_time:213.2706642150879ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10470 prompt_cache_len:5151 prompt_cache_ratio:0.49197707736389684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 -DEBUG 06-24 20:23:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:01 [manager.py:224] router recive req id 8 cost time 0.1092219352722168 s -INFO 06-24 20:23:01 [manager.py:68] detokenization recv req id 8 cost time 0.11122632026672363 s -DEBUG 06-24 20:23:01 [manager.py:391] Prefill Batch: batch_id=291639848862992160669210662031483363667, time:1750767781.936995s req_ids:[8] -DEBUG 06-24 20:23:01 [manager.py:391] -DEBUG 06-24 20:23:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 44512.380 tokens/s -DEBUG 06-24 20:23:01 [stats.py:37] Avg prompt tokens throughput: 44503.863 tokens/s -DEBUG 06-24 20:23:01 [stats.py:37] Avg generate tokens throughput: 8.517 tokens/s -ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:01 lightllm_req_id:8 first_token_cost:200.73914527893066ms total_cost_time:200.78015327453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10471 prompt_cache_len:5151 prompt_cache_ratio:0.4919300926368064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 -DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.10751748085021973 s -INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.10916972160339355 s -DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=278853586091680011417897211241521476387, time:1750767782.1441522s req_ids:[8] -DEBUG 06-24 20:23:02 [manager.py:391] -ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:169.76141929626465ms total_cost_time:169.80290412902832ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10472 prompt_cache_len:5151 prompt_cache_ratio:0.49188311688311687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 -DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.10759472846984863 s -INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.10938096046447754 s -DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=306152459111477007214166901652572626413, time:1750767782.3230655s req_ids:[8] -DEBUG 06-24 20:23:02 [manager.py:391] -ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:365.7214641571045ms total_cost_time:365.7674789428711ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10473 prompt_cache_len:5151 prompt_cache_ratio:0.49183615010025783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 -DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.1078646183013916 s -INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.10965180397033691 s -DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=252022900533972657594114832070296127450, time:1750767782.687387s req_ids:[8] -DEBUG 06-24 20:23:02 [manager.py:391] -ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:163.83814811706543ms total_cost_time:163.8798713684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10474 prompt_cache_len:5151 prompt_cache_ratio:0.4917891922856597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 -DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:02 [manager.py:224] router recive req id 8 cost time 0.10855317115783691 s -INFO 06-24 20:23:02 [manager.py:68] detokenization recv req id 8 cost time 0.11039209365844727 s -DEBUG 06-24 20:23:02 [manager.py:391] Prefill Batch: batch_id=266100389871840263774030788711563824288, time:1750767782.8577383s req_ids:[8] -DEBUG 06-24 20:23:02 [manager.py:391] -DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:02 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:199.85413551330566ms total_cost_time:199.89705085754395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10475 prompt_cache_len:5151 prompt_cache_ratio:0.49174224343675416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 -DEBUG 06-24 20:23:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10884881019592285 s -INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11089563369750977 s -DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=81240748417478525688089750528474676864, time:1750767783.0652025s req_ids:[8] -DEBUG 06-24 20:23:03 [manager.py:391] -ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:02 lightllm_req_id:8 first_token_cost:204.00500297546387ms total_cost_time:204.04958724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10476 prompt_cache_len:5151 prompt_cache_ratio:0.49169530355097363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 -DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10811710357666016 s -INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11011695861816406 s -DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=310797368702450596962733250694149652952, time:1750767783.2790608s req_ids:[8] -DEBUG 06-24 20:23:03 [manager.py:391] -ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:211.07172966003418ms total_cost_time:211.11440658569336ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10477 prompt_cache_len:5151 prompt_cache_ratio:0.4916483726257517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 -DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10810065269470215 s -INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11017513275146484 s -DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=225300777549212817003056728676960185560, time:1750767783.4926603s req_ids:[8] -DEBUG 06-24 20:23:03 [manager.py:391] -ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:205.7352066040039ms total_cost_time:205.7812213897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10478 prompt_cache_len:5151 prompt_cache_ratio:0.4916014506585226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 -DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10791897773742676 s -INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.11005520820617676 s -DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=191792093288073743015213700754746311756, time:1750767783.7023287s req_ids:[8] -DEBUG 06-24 20:23:03 [manager.py:391] -ERROR 06-24 20:23:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:206.1171531677246ms total_cost_time:206.15911483764648ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10479 prompt_cache_len:5151 prompt_cache_ratio:0.491554537646722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 -DEBUG 06-24 20:23:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:03 [manager.py:224] router recive req id 8 cost time 0.10774660110473633 s -INFO 06-24 20:23:03 [manager.py:68] detokenization recv req id 8 cost time 0.10989999771118164 s -DEBUG 06-24 20:23:03 [manager.py:391] Prefill Batch: batch_id=294477285376858161126122115592757642872, time:1750767783.9156473s req_ids:[8] -DEBUG 06-24 20:23:03 [manager.py:391] -ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:03 lightllm_req_id:8 first_token_cost:212.56566047668457ms total_cost_time:212.60929107666016ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10480 prompt_cache_len:5151 prompt_cache_ratio:0.49150763358778626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 -DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10884714126586914 s -INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.11080050468444824 s -DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=50663458582324664747167879331006438306, time:1750767784.1326072s req_ids:[8] -DEBUG 06-24 20:23:04 [manager.py:391] -ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:367.51747131347656ms total_cost_time:367.56277084350586ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10481 prompt_cache_len:5151 prompt_cache_ratio:0.4914607384791527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 -DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10897040367126465 s -INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.11092543601989746 s -DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=43145789675735818702076241218680629402, time:1750767784.5107534s req_ids:[8] -DEBUG 06-24 20:23:04 [manager.py:391] -ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:207.2737216949463ms total_cost_time:207.31687545776367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10482 prompt_cache_len:5151 prompt_cache_ratio:0.49141385231825985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 -DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10878944396972656 s -INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.11093568801879883 s -DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=150575292758730623191428989415665335439, time:1750767784.7262588s req_ids:[8] -DEBUG 06-24 20:23:04 [manager.py:391] -ERROR 06-24 20:23:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:209.5630168914795ms total_cost_time:209.60688591003418ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10483 prompt_cache_len:5151 prompt_cache_ratio:0.49136697510254695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 -DEBUG 06-24 20:23:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:04 [manager.py:224] router recive req id 8 cost time 0.10810184478759766 s -INFO 06-24 20:23:04 [manager.py:68] detokenization recv req id 8 cost time 0.1100456714630127 s -DEBUG 06-24 20:23:04 [manager.py:391] Prefill Batch: batch_id=36846644281662749423563983180010028874, time:1750767784.939978s req_ids:[8] -DEBUG 06-24 20:23:04 [manager.py:391] -ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:04 lightllm_req_id:8 first_token_cost:207.777738571167ms total_cost_time:207.8235149383545ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10484 prompt_cache_len:5151 prompt_cache_ratio:0.49132010682945443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 -DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10774493217468262 s -INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.10979700088500977 s -DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=213064408220166020891579270914192875526, time:1750767785.14998s req_ids:[8] -DEBUG 06-24 20:23:05 [manager.py:391] -ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:206.52174949645996ms total_cost_time:206.56538009643555ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10485 prompt_cache_len:5151 prompt_cache_ratio:0.4912732474964235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 -DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10796546936035156 s -INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.11000657081604004 s -DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=218780981206242701420356822070161866058, time:1750767785.3631122s req_ids:[8] -DEBUG 06-24 20:23:05 [manager.py:391] -ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:208.9996337890625ms total_cost_time:209.0439796447754ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10486 prompt_cache_len:5151 prompt_cache_ratio:0.49122639710089644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 -DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10928821563720703 s -INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.11124801635742188 s -DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=229545892437754620000139027760733219508, time:1750767785.577492s req_ids:[8] -DEBUG 06-24 20:23:05 [manager.py:391] -ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:204.1771411895752ms total_cost_time:204.21862602233887ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10487 prompt_cache_len:5151 prompt_cache_ratio:0.49117955564031657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 -DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10535836219787598 s -INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.10728192329406738 s -DEBUG 06-24 20:23:05 [manager.py:391] Prefill Batch: batch_id=30093450963617115161656054501090529296, time:1750767785.7879674s req_ids:[8] -DEBUG 06-24 20:23:05 [manager.py:391] -ERROR 06-24 20:23:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:205.39402961730957ms total_cost_time:205.43789863586426ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10488 prompt_cache_len:5151 prompt_cache_ratio:0.49113272311212813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 -DEBUG 06-24 20:23:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:05 [manager.py:224] router recive req id 8 cost time 0.10703635215759277 s -INFO 06-24 20:23:05 [manager.py:68] detokenization recv req id 8 cost time 0.10880780220031738 s -DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=137723828481734535150389609558279898463, time:1750767785.9999073s req_ids:[8] -DEBUG 06-24 20:23:06 [manager.py:391] -ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:05 lightllm_req_id:8 first_token_cost:375.3066062927246ms total_cost_time:375.3511905670166ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10489 prompt_cache_len:5151 prompt_cache_ratio:0.49108589951377635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 -DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.1089777946472168 s -INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.1110234260559082 s -DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=190431015586751524762106285687722969794, time:1750767786.3782377s req_ids:[8] -DEBUG 06-24 20:23:06 [manager.py:391] -ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:208.1315517425537ms total_cost_time:208.1742286682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10490 prompt_cache_len:5151 prompt_cache_ratio:0.49103908484270736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 -DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.10699129104614258 s -INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.1088259220123291 s -DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=120594010899663691325773467268633223684, time:1750767786.5925155s req_ids:[8] -DEBUG 06-24 20:23:06 [manager.py:391] -ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:169.4033145904541ms total_cost_time:169.44384574890137ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10491 prompt_cache_len:5151 prompt_cache_ratio:0.49099227909636833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 -DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.10834908485412598 s -INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.11026811599731445 s -DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=263983791762126028247243620869660656210, time:1750767786.7644932s req_ids:[8] -DEBUG 06-24 20:23:06 [manager.py:391] -ERROR 06-24 20:23:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:200.99902153015137ms total_cost_time:201.0185718536377ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:10492 prompt_cache_len:5151 prompt_cache_ratio:0.4909454822722074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 -DEBUG 06-24 20:23:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:06 [manager.py:224] router recive req id 8 cost time 0.1085512638092041 s -INFO 06-24 20:23:06 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s -DEBUG 06-24 20:23:06 [manager.py:391] Prefill Batch: batch_id=240028344186681621554208991752933577679, time:1750767786.9706728s req_ids:[8] -DEBUG 06-24 20:23:06 [manager.py:391] -ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:06 lightllm_req_id:8 first_token_cost:210.42442321777344ms total_cost_time:210.46853065490723ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10493 prompt_cache_len:5151 prompt_cache_ratio:0.4908986943676737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 -DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.10779929161071777 s -INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.10976457595825195 s -DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=262747532434907506117884079407748875214, time:1750767787.183103s req_ids:[8] -DEBUG 06-24 20:23:07 [manager.py:391] -ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:210.11590957641602ms total_cost_time:210.1764678955078ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10494 prompt_cache_len:5151 prompt_cache_ratio:0.4908519153802173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 -DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.1089942455291748 s -INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.11105012893676758 s -DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=284577445736752928889769921535287803629, time:1750767787.3979647s req_ids:[8] -DEBUG 06-24 20:23:07 [manager.py:391] -ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:210.7858657836914ms total_cost_time:210.82782745361328ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10495 prompt_cache_len:5151 prompt_cache_ratio:0.4908051453072892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 -DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.10880398750305176 s -INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.11085629463195801 s -DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=154481376097807786742278662746323362318, time:1750767787.613465s req_ids:[8] -DEBUG 06-24 20:23:07 [manager.py:391] -ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:209.92612838745117ms total_cost_time:209.97118949890137ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10496 prompt_cache_len:5151 prompt_cache_ratio:0.4907583841463415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 -DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:07 [manager.py:224] router recive req id 8 cost time 0.10907387733459473 s -INFO 06-24 20:23:07 [manager.py:68] detokenization recv req id 8 cost time 0.11106991767883301 s -DEBUG 06-24 20:23:07 [manager.py:391] Prefill Batch: batch_id=37382853190393095923771544333408533512, time:1750767787.826955s req_ids:[8] -DEBUG 06-24 20:23:07 [manager.py:391] -ERROR 06-24 20:23:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:207.5049877166748ms total_cost_time:207.54766464233398ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10497 prompt_cache_len:5151 prompt_cache_ratio:0.4907116318948271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 -DEBUG 06-24 20:23:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.1074838638305664 s -INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.10942721366882324 s -DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=143062463020457958938120092966961442471, time:1750767788.0508592s req_ids:[8] -DEBUG 06-24 20:23:08 [manager.py:391] -ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:07 lightllm_req_id:8 first_token_cost:377.3953914642334ms total_cost_time:377.4404525756836ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10498 prompt_cache_len:5151 prompt_cache_ratio:0.4906648885502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 -DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.1086273193359375 s -INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.11040854454040527 s -DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=151898880458988532233913271108351243350, time:1750767788.418869s req_ids:[8] -DEBUG 06-24 20:23:08 [manager.py:391] -ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:165.03167152404785ms total_cost_time:165.07363319396973ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10499 prompt_cache_len:5151 prompt_cache_ratio:0.4906181541099152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 -DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.10852670669555664 s -INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.11049151420593262 s -DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=220034366819782506788911284879478910035, time:1750767788.5876021s req_ids:[8] -DEBUG 06-24 20:23:08 [manager.py:391] -DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:196.58255577087402ms total_cost_time:196.624755859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10500 prompt_cache_len:5151 prompt_cache_ratio:0.49057142857142855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 -DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.10750532150268555 s -INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.1099696159362793 s -DEBUG 06-24 20:23:08 [manager.py:391] Prefill Batch: batch_id=165370249540247868389171365045703401023, time:1750767788.7913153s req_ids:[8] -DEBUG 06-24 20:23:08 [manager.py:391] -ERROR 06-24 20:23:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:210.31832695007324ms total_cost_time:210.36314964294434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10501 prompt_cache_len:5151 prompt_cache_ratio:0.49052471193219693 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 -DEBUG 06-24 20:23:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:08 [manager.py:224] router recive req id 8 cost time 0.10743188858032227 s -INFO 06-24 20:23:08 [manager.py:68] detokenization recv req id 8 cost time 0.10949254035949707 s -DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=46914548390978496425318454641099302077, time:1750767789.0054648s req_ids:[8] -DEBUG 06-24 20:23:09 [manager.py:391] -ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:08 lightllm_req_id:8 first_token_cost:208.7841033935547ms total_cost_time:208.82463455200195ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10502 prompt_cache_len:5151 prompt_cache_ratio:0.49047800418967813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 -DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:09 [manager.py:224] router recive req id 8 cost time 0.10836029052734375 s -INFO 06-24 20:23:09 [manager.py:68] detokenization recv req id 8 cost time 0.11055374145507812 s -DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=103392783865096425300454508626029030234, time:1750767789.2194126s req_ids:[8] -DEBUG 06-24 20:23:09 [manager.py:391] -ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:210.48998832702637ms total_cost_time:210.51263809204102ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:10503 prompt_cache_len:5151 prompt_cache_ratio:0.49043130534133106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 -DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:09 [manager.py:224] router recive req id 8 cost time 0.10521197319030762 s -INFO 06-24 20:23:09 [manager.py:68] detokenization recv req id 8 cost time 0.10705161094665527 s -DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=111043631714963482903786914078158498538, time:1750767789.4341786s req_ids:[8] -DEBUG 06-24 20:23:09 [manager.py:391] -ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:210.0672721862793ms total_cost_time:210.11114120483398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10504 prompt_cache_len:5151 prompt_cache_ratio:0.49038461538461536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 -DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:09 [manager.py:224] router recive req id 8 cost time 0.10747933387756348 s -INFO 06-24 20:23:09 [manager.py:68] detokenization recv req id 8 cost time 0.1093757152557373 s -DEBUG 06-24 20:23:09 [manager.py:391] Prefill Batch: batch_id=24059588089287389854361894552918399712, time:1750767789.6478791s req_ids:[8] -DEBUG 06-24 20:23:09 [manager.py:391] -ERROR 06-24 20:23:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:375.7820129394531ms total_cost_time:375.8258819580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10505 prompt_cache_len:5151 prompt_cache_ratio:0.4903379343169919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 -DEBUG 06-24 20:23:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10855627059936523 s -INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.1104116439819336 s -DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=55212438606298325580934466211135352144, time:1750767790.0256085s req_ids:[8] -DEBUG 06-24 20:23:10 [manager.py:391] -ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:09 lightllm_req_id:8 first_token_cost:165.21215438842773ms total_cost_time:165.27271270751953ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10506 prompt_cache_len:5151 prompt_cache_ratio:0.49029126213592233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 -DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10851144790649414 s -INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s -DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=304434084715376283973451933997430115545, time:1750767790.1957963s req_ids:[8] -DEBUG 06-24 20:23:10 [manager.py:391] -ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:200.20079612731934ms total_cost_time:200.2429962158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10507 prompt_cache_len:5151 prompt_cache_ratio:0.49024459883886934 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 -DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s -INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11005067825317383 s -DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=281535885799806600632874478105830657322, time:1750767790.400307s req_ids:[8] -DEBUG 06-24 20:23:10 [manager.py:391] -ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:202.38637924194336ms total_cost_time:202.42905616760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10508 prompt_cache_len:5151 prompt_cache_ratio:0.49019794442329656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 -DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10832619667053223 s -INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11037182807922363 s -DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=15741999783332113079593550735428721288, time:1750767790.6075797s req_ids:[8] -DEBUG 06-24 20:23:10 [manager.py:391] -ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:213.37461471557617ms total_cost_time:213.41848373413086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10509 prompt_cache_len:5151 prompt_cache_ratio:0.4901512988866686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 -DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:10 [manager.py:224] router recive req id 8 cost time 0.10882687568664551 s -INFO 06-24 20:23:10 [manager.py:68] detokenization recv req id 8 cost time 0.11090445518493652 s -DEBUG 06-24 20:23:10 [manager.py:391] Prefill Batch: batch_id=75524396585592636383898815363052284148, time:1750767790.8243484s req_ids:[8] -DEBUG 06-24 20:23:10 [manager.py:391] -ERROR 06-24 20:23:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:208.23907852172852ms total_cost_time:208.2819938659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10510 prompt_cache_len:5151 prompt_cache_ratio:0.490104662226451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 -DEBUG 06-24 20:23:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.1081385612487793 s -INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.11015939712524414 s -DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=139821773293256259985018704396819698635, time:1750767791.0376763s req_ids:[8] -DEBUG 06-24 20:23:11 [manager.py:391] -ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:10 lightllm_req_id:8 first_token_cost:209.14864540100098ms total_cost_time:209.19203758239746ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10511 prompt_cache_len:5151 prompt_cache_ratio:0.49005803444011037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 -DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.10781240463256836 s -INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.10995745658874512 s -DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=63553487088312537059409365035282471181, time:1750767791.2517588s req_ids:[8] -DEBUG 06-24 20:23:11 [manager.py:391] -ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:374.93014335632324ms total_cost_time:374.97520446777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10512 prompt_cache_len:5151 prompt_cache_ratio:0.4900114155251142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 -DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.10903644561767578 s -INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.11108827590942383 s -DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=89570451453719896526957637692585509599, time:1750767791.6315482s req_ids:[8] -DEBUG 06-24 20:23:11 [manager.py:391] -ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:207.6404094696045ms total_cost_time:207.68475532531738ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10513 prompt_cache_len:5151 prompt_cache_ratio:0.48996480547893084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 -DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:11 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s -INFO 06-24 20:23:11 [manager.py:68] detokenization recv req id 8 cost time 0.11004829406738281 s -DEBUG 06-24 20:23:11 [manager.py:391] Prefill Batch: batch_id=5659944395461967708871410630708063660, time:1750767791.8421195s req_ids:[8] -DEBUG 06-24 20:23:11 [manager.py:391] -ERROR 06-24 20:23:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:209.84339714050293ms total_cost_time:209.88702774047852ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10514 prompt_cache_len:5151 prompt_cache_ratio:0.4899182042990299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 -DEBUG 06-24 20:23:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10714864730834961 s -INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.10872173309326172 s -DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=57675362637170016775144474276464150143, time:1750767792.0561054s req_ids:[8] -DEBUG 06-24 20:23:12 [manager.py:391] -DEBUG 06-24 20:23:12 [stats.py:37] Avg tokens(prompt+generate) throughput: 45638.292 tokens/s -DEBUG 06-24 20:23:12 [stats.py:37] Avg prompt tokens throughput: 45629.595 tokens/s -DEBUG 06-24 20:23:12 [stats.py:37] Avg generate tokens throughput: 8.697 tokens/s -ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:11 lightllm_req_id:8 first_token_cost:206.31790161132812ms total_cost_time:206.3612937927246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10515 prompt_cache_len:5151 prompt_cache_ratio:0.4898716119828816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 -DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.1087808609008789 s -INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.11095190048217773 s -DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=177663106032884925446632845769406428673, time:1750767792.2669654s req_ids:[8] -DEBUG 06-24 20:23:12 [manager.py:391] -ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:209.74230766296387ms total_cost_time:209.78665351867676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10516 prompt_cache_len:5151 prompt_cache_ratio:0.4898250285279574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 -DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10909175872802734 s -INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.1111760139465332 s -DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=135133832850244019395752144513071703506, time:1750767792.482907s req_ids:[8] -DEBUG 06-24 20:23:12 [manager.py:391] -ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:212.22543716430664ms total_cost_time:212.26906776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10517 prompt_cache_len:5151 prompt_cache_ratio:0.4897784539317296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 -DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10767626762390137 s -INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.10969352722167969 s -DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=207174962002017792972199957986713731324, time:1750767792.6973548s req_ids:[8] -DEBUG 06-24 20:23:12 [manager.py:391] -ERROR 06-24 20:23:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:208.5108757019043ms total_cost_time:208.5549831390381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10518 prompt_cache_len:5151 prompt_cache_ratio:0.4897318881916714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 -DEBUG 06-24 20:23:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:12 [manager.py:224] router recive req id 8 cost time 0.10821151733398438 s -INFO 06-24 20:23:12 [manager.py:68] detokenization recv req id 8 cost time 0.11024260520935059 s -DEBUG 06-24 20:23:12 [manager.py:391] Prefill Batch: batch_id=107353707203886606969466179385401519479, time:1750767792.9124122s req_ids:[8] -DEBUG 06-24 20:23:12 [manager.py:391] -ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:12 lightllm_req_id:8 first_token_cost:211.75146102905273ms total_cost_time:211.79509162902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10519 prompt_cache_len:5151 prompt_cache_ratio:0.48968533130525715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 -DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.30990147590637207 s -INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.3121316432952881 s -DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=68359018073290960545308886771519367661, time:1750767793.3278317s req_ids:[8] -DEBUG 06-24 20:23:13 [manager.py:391] -ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:414.87932205200195ms total_cost_time:414.92271423339844ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10520 prompt_cache_len:5151 prompt_cache_ratio:0.48963878326996196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 -DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.10780072212219238 s -INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.10975384712219238 s -DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=145957623393675098924069957399053325304, time:1750767793.546723s req_ids:[8] -DEBUG 06-24 20:23:13 [manager.py:391] -ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:208.10341835021973ms total_cost_time:208.14871788024902ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10521 prompt_cache_len:5151 prompt_cache_ratio:0.4895922440832621 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 -DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.10798382759094238 s -INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.1100015640258789 s -DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=293650215581887429273889851938353575045, time:1750767793.7598019s req_ids:[8] -DEBUG 06-24 20:23:13 [manager.py:391] -ERROR 06-24 20:23:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:210.0965976715088ms total_cost_time:210.14142036437988ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10522 prompt_cache_len:5151 prompt_cache_ratio:0.4895457137426345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 -DEBUG 06-24 20:23:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:13 [manager.py:224] router recive req id 8 cost time 0.10884332656860352 s -INFO 06-24 20:23:13 [manager.py:68] detokenization recv req id 8 cost time 0.11082005500793457 s -DEBUG 06-24 20:23:13 [manager.py:391] Prefill Batch: batch_id=100975903948495585413790924824280540358, time:1750767793.9728243s req_ids:[8] -DEBUG 06-24 20:23:13 [manager.py:391] -ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:13 lightllm_req_id:8 first_token_cost:207.14592933654785ms total_cost_time:207.19027519226074ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10523 prompt_cache_len:5151 prompt_cache_ratio:0.4894991922455573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 -DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10815882682800293 s -INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.11005067825317383 s -DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=153453962920608169506472202848014528148, time:1750767794.184267s req_ids:[8] -DEBUG 06-24 20:23:14 [manager.py:391] -ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:207.96513557434082ms total_cost_time:208.01019668579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10524 prompt_cache_len:5151 prompt_cache_ratio:0.48945267958950966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 -DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10914397239685059 s -INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.11098837852478027 s -DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=19794675849032432206496704358834112755, time:1750767794.3985043s req_ids:[8] -DEBUG 06-24 20:23:14 [manager.py:391] -ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:210.96038818359375ms total_cost_time:211.00568771362305ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10525 prompt_cache_len:5151 prompt_cache_ratio:0.4894061757719715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 -DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10784387588500977 s -INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.1098334789276123 s -DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=181922587957900643269876737508005884083, time:1750767794.6149604s req_ids:[8] -DEBUG 06-24 20:23:14 [manager.py:391] -ERROR 06-24 20:23:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:212.74828910827637ms total_cost_time:212.79215812683105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10526 prompt_cache_len:5151 prompt_cache_ratio:0.4893596807904237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 -DEBUG 06-24 20:23:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:14 [manager.py:224] router recive req id 8 cost time 0.10597610473632812 s -INFO 06-24 20:23:14 [manager.py:68] detokenization recv req id 8 cost time 0.10796594619750977 s -DEBUG 06-24 20:23:14 [manager.py:391] Prefill Batch: batch_id=323006020968430285436397838857655278831, time:1750767794.8301084s req_ids:[8] -DEBUG 06-24 20:23:14 [manager.py:391] -ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:14 lightllm_req_id:8 first_token_cost:364.92252349853516ms total_cost_time:364.96710777282715ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10527 prompt_cache_len:5151 prompt_cache_ratio:0.48931319464234824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 -DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10895466804504395 s -INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.11102509498596191 s -DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=102201014220438848733240371042768520752, time:1750767795.1982744s req_ids:[8] -DEBUG 06-24 20:23:15 [manager.py:391] -ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:201.7369270324707ms total_cost_time:201.7807960510254ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10528 prompt_cache_len:5151 prompt_cache_ratio:0.489266717325228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 -DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10917949676513672 s -INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.11102151870727539 s -DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=8658408790011574165926964006319192123, time:1750767795.4109485s req_ids:[8] -DEBUG 06-24 20:23:15 [manager.py:391] -ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:212.59593963623047ms total_cost_time:212.63885498046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10529 prompt_cache_len:5151 prompt_cache_ratio:0.4892202488365467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 -DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10801982879638672 s -INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013579368591309 s -DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=77542708209534337192191955309151391650, time:1750767795.6236284s req_ids:[8] -DEBUG 06-24 20:23:15 [manager.py:391] -ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:211.57503128051758ms total_cost_time:211.63010597229004ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:10530 prompt_cache_len:5151 prompt_cache_ratio:0.4891737891737892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 -DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:15 [manager.py:224] router recive req id 8 cost time 0.10848355293273926 s -INFO 06-24 20:23:15 [manager.py:68] detokenization recv req id 8 cost time 0.1103982925415039 s -DEBUG 06-24 20:23:15 [manager.py:391] Prefill Batch: batch_id=205930217717701043080726446567956603082, time:1750767795.8382573s req_ids:[8] -DEBUG 06-24 20:23:15 [manager.py:391] -DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:210.5088233947754ms total_cost_time:210.55293083190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10531 prompt_cache_len:5151 prompt_cache_ratio:0.48912733833444116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 -DEBUG 06-24 20:23:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10814929008483887 s -INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11003756523132324 s -DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=207788433510289404428365533763712853781, time:1750767796.053094s req_ids:[8] -DEBUG 06-24 20:23:16 [manager.py:391] -ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:15 lightllm_req_id:8 first_token_cost:208.59527587890625ms total_cost_time:208.63938331604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10532 prompt_cache_len:5151 prompt_cache_ratio:0.4890808963159894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 -DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10886836051940918 s -INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s -DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=256434674743195982114371851166603146737, time:1750767796.266375s req_ids:[8] -DEBUG 06-24 20:23:16 [manager.py:391] -ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:208.8487148284912ms total_cost_time:208.8923454284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10533 prompt_cache_len:5151 prompt_cache_ratio:0.4890344631159214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 -DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10881400108337402 s -INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11084771156311035 s -DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=94169743975114847569830510820187016435, time:1750767796.4793234s req_ids:[8] -DEBUG 06-24 20:23:16 [manager.py:391] -ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:207.61370658874512ms total_cost_time:207.6563835144043ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10534 prompt_cache_len:5151 prompt_cache_ratio:0.48898803873172586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 -DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:16 [manager.py:224] router recive req id 8 cost time 0.10792875289916992 s -INFO 06-24 20:23:16 [manager.py:68] detokenization recv req id 8 cost time 0.11015033721923828 s -DEBUG 06-24 20:23:16 [manager.py:391] Prefill Batch: batch_id=225691613820244617850568937976307226938, time:1750767796.6911275s req_ids:[8] -DEBUG 06-24 20:23:16 [manager.py:391] -ERROR 06-24 20:23:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:366.84584617614746ms total_cost_time:366.89209938049316ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10535 prompt_cache_len:5151 prompt_cache_ratio:0.4889416231608923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 -DEBUG 06-24 20:23:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:16 [batch.py:51] router release req id 8 -INFO 06-24 20:23:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10950303077697754 s -INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.11141800880432129 s -DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=154499013009289501225264090005648834604, time:1750767797.0611374s req_ids:[8] -DEBUG 06-24 20:23:17 [manager.py:391] -ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:16 lightllm_req_id:8 first_token_cost:210.44468879699707ms total_cost_time:210.48712730407715ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10536 prompt_cache_len:5151 prompt_cache_ratio:0.48889521640091116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 -DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10916352272033691 s -INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.1111912727355957 s -DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=163983926928818824706495166708941450984, time:1750767797.2772782s req_ids:[8] -DEBUG 06-24 20:23:17 [manager.py:391] -ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:208.41193199157715ms total_cost_time:208.45508575439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10537 prompt_cache_len:5151 prompt_cache_ratio:0.48884881844927397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 -DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10790801048278809 s -INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.10983133316040039 s -DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=76070627260413980981175792313749074692, time:1750767797.4907858s req_ids:[8] -DEBUG 06-24 20:23:17 [manager.py:391] -ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:210.27159690856934ms total_cost_time:210.31498908996582ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10538 prompt_cache_len:5151 prompt_cache_ratio:0.4888024293034731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 -DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.10717511177062988 s -INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.10902643203735352 s -DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=271685061277719993712582177868272290572, time:1750767797.705348s req_ids:[8] -DEBUG 06-24 20:23:17 [manager.py:391] -ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:165.85183143615723ms total_cost_time:165.8928394317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10539 prompt_cache_len:5151 prompt_cache_ratio:0.488756048961002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 -DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:17 [manager.py:224] router recive req id 8 cost time 0.1081075668334961 s -INFO 06-24 20:23:17 [manager.py:68] detokenization recv req id 8 cost time 0.10997128486633301 s -DEBUG 06-24 20:23:17 [manager.py:391] Prefill Batch: batch_id=261552380299926029620334085981128685999, time:1750767797.8752513s req_ids:[8] -DEBUG 06-24 20:23:17 [manager.py:391] -ERROR 06-24 20:23:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:199.42259788513184ms total_cost_time:199.46694374084473ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10540 prompt_cache_len:5151 prompt_cache_ratio:0.48870967741935484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 -DEBUG 06-24 20:23:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.10916519165039062 s -INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.1111297607421875 s -DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=232785756382693521180138208655647740286, time:1750767798.0819907s req_ids:[8] -DEBUG 06-24 20:23:18 [manager.py:391] -ERROR 06-24 20:23:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:17 lightllm_req_id:8 first_token_cost:206.528902053833ms total_cost_time:206.5894603729248ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10541 prompt_cache_len:5151 prompt_cache_ratio:0.4886633146760269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 -DEBUG 06-24 20:23:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.10783815383911133 s -INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.10984992980957031 s -DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=280379045392154242532510553331309286785, time:1750767798.293527s req_ids:[8] -DEBUG 06-24 20:23:18 [manager.py:391] -ERROR 06-24 20:23:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 first_token_cost:203.64832878112793ms total_cost_time:203.6893367767334ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10542 prompt_cache_len:5151 prompt_cache_ratio:0.4886169607285145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 -DEBUG 06-24 20:23:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.10780787467956543 s -INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.1099240779876709 s -DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=58916834652057400942515487938459448049, time:1750767798.504073s req_ids:[8] -DEBUG 06-24 20:23:18 [manager.py:391] -ERROR 06-24 20:23:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 first_token_cost:209.14363861083984ms total_cost_time:209.18750762939453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10543 prompt_cache_len:5151 prompt_cache_ratio:0.4885706155743147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 -DEBUG 06-24 20:23:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:18 [manager.py:224] router recive req id 8 cost time 0.3103036880493164 s -INFO 06-24 20:23:18 [manager.py:68] detokenization recv req id 8 cost time 0.31230664253234863 s -DEBUG 06-24 20:23:18 [manager.py:391] Prefill Batch: batch_id=122757105661462498315367893442956340916, time:1750767798.925997s req_ids:[8] -DEBUG 06-24 20:23:18 [manager.py:391] -ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:18 lightllm_req_id:8 first_token_cost:418.88976097106934ms total_cost_time:418.9324378967285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10544 prompt_cache_len:5151 prompt_cache_ratio:0.48852427921092567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 -DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10802412033081055 s -INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11019182205200195 s -DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=289668057849387587738812066190441142892, time:1750767799.144604s req_ids:[8] -DEBUG 06-24 20:23:19 [manager.py:391] -ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:209.9313735961914ms total_cost_time:209.9757194519043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10545 prompt_cache_len:5151 prompt_cache_ratio:0.4884779516358464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 -DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10834431648254395 s -INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11027336120605469 s -DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=230445750422319362798883249835658808677, time:1750767799.3600543s req_ids:[8] -DEBUG 06-24 20:23:19 [manager.py:391] -ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:209.19513702392578ms total_cost_time:209.23781394958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10546 prompt_cache_len:5151 prompt_cache_ratio:0.4884316328465769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 -DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10862970352172852 s -INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.1108396053314209 s -DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=27761550774756758275482656260098989074, time:1750767799.575164s req_ids:[8] -DEBUG 06-24 20:23:19 [manager.py:391] -ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:207.98683166503906ms total_cost_time:208.02998542785645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10547 prompt_cache_len:5151 prompt_cache_ratio:0.4883853228406182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 -DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10860228538513184 s -INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11061239242553711 s -DEBUG 06-24 20:23:19 [manager.py:391] Prefill Batch: batch_id=230169112702596070491529601845165931442, time:1750767799.787658s req_ids:[8] -DEBUG 06-24 20:23:19 [manager.py:391] -ERROR 06-24 20:23:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:209.49578285217285ms total_cost_time:209.54179763793945ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10548 prompt_cache_len:5151 prompt_cache_ratio:0.4883390216154721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 -DEBUG 06-24 20:23:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:19 [manager.py:224] router recive req id 8 cost time 0.10814666748046875 s -INFO 06-24 20:23:19 [manager.py:68] detokenization recv req id 8 cost time 0.11004304885864258 s -DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=200096191489901066678812064764128600112, time:1750767800.0016806s req_ids:[8] -DEBUG 06-24 20:23:20 [manager.py:391] -ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:19 lightllm_req_id:8 first_token_cost:204.50401306152344ms total_cost_time:204.54716682434082ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10549 prompt_cache_len:5151 prompt_cache_ratio:0.4882927291686416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 -DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.1071479320526123 s -INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.10896968841552734 s -DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=140621637521862760995481892467228551119, time:1750767800.2124295s req_ids:[8] -DEBUG 06-24 20:23:20 [manager.py:391] -ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:166.36371612548828ms total_cost_time:166.40496253967285ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10550 prompt_cache_len:5151 prompt_cache_ratio:0.48824644549763035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 -DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.1080629825592041 s -INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.11005377769470215 s -DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=74824843944110781401531505610786734471, time:1750767800.383586s req_ids:[8] -DEBUG 06-24 20:23:20 [manager.py:391] -ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:365.23914337158203ms total_cost_time:365.2834892272949ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10551 prompt_cache_len:5151 prompt_cache_ratio:0.48820017059994314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 -DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.10902810096740723 s -INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.11103272438049316 s -DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=193120891851973332498648182633609516190, time:1750767800.75355s req_ids:[8] -DEBUG 06-24 20:23:20 [manager.py:391] -ERROR 06-24 20:23:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:206.0999870300293ms total_cost_time:206.1440944671631ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10552 prompt_cache_len:5151 prompt_cache_ratio:0.48815390447308565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 -DEBUG 06-24 20:23:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:20 [manager.py:224] router recive req id 8 cost time 0.10897541046142578 s -INFO 06-24 20:23:20 [manager.py:68] detokenization recv req id 8 cost time 0.11097311973571777 s -DEBUG 06-24 20:23:20 [manager.py:391] Prefill Batch: batch_id=258036025765972859156200257858143711777, time:1750767800.9670439s req_ids:[8] -DEBUG 06-24 20:23:20 [manager.py:391] -ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:20 lightllm_req_id:8 first_token_cost:208.5883617401123ms total_cost_time:208.6331844329834ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10553 prompt_cache_len:5151 prompt_cache_ratio:0.48810764711456456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 -DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10776066780090332 s -INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.10967063903808594 s -DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=297206624226052054167233196753259654715, time:1750767801.1924891s req_ids:[8] -DEBUG 06-24 20:23:21 [manager.py:391] -ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:219.54870223999023ms total_cost_time:219.59233283996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10554 prompt_cache_len:5151 prompt_cache_ratio:0.48806139852188746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 -DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10935735702514648 s -INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.11148405075073242 s -DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=298425519050738719949869265975783826697, time:1750767801.4076734s req_ids:[8] -DEBUG 06-24 20:23:21 [manager.py:391] -ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:209.34271812438965ms total_cost_time:209.38587188720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10555 prompt_cache_len:5151 prompt_cache_ratio:0.4880151586925628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 -DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10911273956298828 s -INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.1112065315246582 s -DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=79932727332359486502320936854070590208, time:1750767801.6230245s req_ids:[8] -DEBUG 06-24 20:23:21 [manager.py:391] -ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:207.71455764770508ms total_cost_time:207.75675773620605ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10556 prompt_cache_len:5151 prompt_cache_ratio:0.48796892762410005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 -DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:21 [manager.py:224] router recive req id 8 cost time 0.10787248611450195 s -INFO 06-24 20:23:21 [manager.py:68] detokenization recv req id 8 cost time 0.10994982719421387 s -DEBUG 06-24 20:23:21 [manager.py:391] Prefill Batch: batch_id=55003324000668117483760985119531555770, time:1750767801.8347437s req_ids:[8] -DEBUG 06-24 20:23:21 [manager.py:391] -ERROR 06-24 20:23:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:208.07194709777832ms total_cost_time:208.1146240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10557 prompt_cache_len:5151 prompt_cache_ratio:0.48792270531400966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 -DEBUG 06-24 20:23:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10726261138916016 s -INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.10909557342529297 s -DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=316414676733546544463998249901221899824, time:1750767802.0506444s req_ids:[8] -DEBUG 06-24 20:23:22 [manager.py:391] -ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:23:22 [stats.py:37] Avg tokens(prompt+generate) throughput: 44387.100 tokens/s -DEBUG 06-24 20:23:22 [stats.py:37] Avg prompt tokens throughput: 44378.579 tokens/s -DEBUG 06-24 20:23:22 [stats.py:37] Avg generate tokens throughput: 8.521 tokens/s -INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:21 lightllm_req_id:8 first_token_cost:333.85205268859863ms total_cost_time:333.8966369628906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10558 prompt_cache_len:5151 prompt_cache_ratio:0.48787649175980297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 -DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s -INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.10979080200195312 s -DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=281390346247682185979026902050459277796, time:1750767802.38847s req_ids:[8] -DEBUG 06-24 20:23:22 [manager.py:391] -ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:193.6802864074707ms total_cost_time:193.7251091003418ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10559 prompt_cache_len:5151 prompt_cache_ratio:0.48783028695899233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 -DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s -INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.11113619804382324 s -DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=35478402712308417166143843208469603603, time:1750767802.5893872s req_ids:[8] -DEBUG 06-24 20:23:22 [manager.py:391] -ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:205.81889152526855ms total_cost_time:205.86371421813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10560 prompt_cache_len:5151 prompt_cache_ratio:0.4877840909090909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 -DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:22 [manager.py:224] router recive req id 8 cost time 0.10867547988891602 s -INFO 06-24 20:23:22 [manager.py:68] detokenization recv req id 8 cost time 0.11062288284301758 s -DEBUG 06-24 20:23:22 [manager.py:391] Prefill Batch: batch_id=196644603409985866450823290147185251338, time:1750767802.8003201s req_ids:[8] -DEBUG 06-24 20:23:22 [manager.py:391] -ERROR 06-24 20:23:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:204.57220077514648ms total_cost_time:204.61535453796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10561 prompt_cache_len:5151 prompt_cache_ratio:0.48773790360761293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 -DEBUG 06-24 20:23:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.10889482498168945 s -INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.1109776496887207 s -DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=243676183230123385063439111350281356370, time:1750767803.0105016s req_ids:[8] -DEBUG 06-24 20:23:23 [manager.py:391] -DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:22 lightllm_req_id:8 first_token_cost:206.38132095336914ms total_cost_time:206.42375946044922ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10562 prompt_cache_len:5151 prompt_cache_ratio:0.4876917250520735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 -DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:23 [batch.py:51] router release req id 8 -INFO 06-24 20:23:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:23:23 [statics_utils.py:24] mean first cost: 228.40219983119954 ms -INFO 06-24 20:23:23 [statics_utils.py:24] mean per token cost: 0.06715881479464118 ms -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.10754632949829102 s -INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.10965466499328613 s -DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=5870643599275590524862380036959005878, time:1750767803.2238207s req_ids:[8] -DEBUG 06-24 20:23:23 [manager.py:391] -ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:207.0775032043457ms total_cost_time:207.11946487426758ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10563 prompt_cache_len:5151 prompt_cache_ratio:0.48764555523998865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 -DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.10800886154174805 s -INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.11006474494934082 s -DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=105230735345434795187875896015195942851, time:1750767803.4367042s req_ids:[8] -DEBUG 06-24 20:23:23 [manager.py:391] -ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:208.6644172668457ms total_cost_time:208.7085247039795ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10564 prompt_cache_len:5151 prompt_cache_ratio:0.48759939416887543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 -DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:23 [manager.py:224] router recive req id 8 cost time 0.1082766056060791 s -INFO 06-24 20:23:23 [manager.py:68] detokenization recv req id 8 cost time 0.1102759838104248 s -DEBUG 06-24 20:23:23 [manager.py:391] Prefill Batch: batch_id=38289967942352887997750733340412377668, time:1750767803.651139s req_ids:[8] -DEBUG 06-24 20:23:23 [manager.py:391] -ERROR 06-24 20:23:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:385.2496147155762ms total_cost_time:385.2953910827637ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10565 prompt_cache_len:5151 prompt_cache_ratio:0.48755324183625176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 -DEBUG 06-24 20:23:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s -INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.10891222953796387 s -DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=130898138005803516014141643763082471336, time:1750767804.051288s req_ids:[8] -DEBUG 06-24 20:23:24 [manager.py:391] -ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:23 lightllm_req_id:8 first_token_cost:178.18546295166016ms total_cost_time:178.22718620300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10566 prompt_cache_len:5151 prompt_cache_ratio:0.48750709823963656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 -DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10861063003540039 s -INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.11049389839172363 s -DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=339517920910523521811597183965644957664, time:1750767804.2253027s req_ids:[8] -DEBUG 06-24 20:23:24 [manager.py:391] -ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:194.6084499359131ms total_cost_time:194.65208053588867ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10567 prompt_cache_len:5151 prompt_cache_ratio:0.48746096337654965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 -DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.1086721420288086 s -INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.11075162887573242 s -DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=61941015305183658364223702153394623461, time:1750767804.4225938s req_ids:[8] -DEBUG 06-24 20:23:24 [manager.py:391] -ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:201.49874687194824ms total_cost_time:201.5397548675537ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10568 prompt_cache_len:5151 prompt_cache_ratio:0.48741483724451173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 -DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10868644714355469 s -INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.11084723472595215 s -DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=179471736249085892245051118598414867791, time:1750767804.630419s req_ids:[8] -DEBUG 06-24 20:23:24 [manager.py:391] -ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:206.5720558166504ms total_cost_time:206.59780502319336ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:10569 prompt_cache_len:5151 prompt_cache_ratio:0.4873687198410446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 -DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:24 [manager.py:224] router recive req id 8 cost time 0.10706186294555664 s -INFO 06-24 20:23:24 [manager.py:68] detokenization recv req id 8 cost time 0.10909080505371094 s -DEBUG 06-24 20:23:24 [manager.py:391] Prefill Batch: batch_id=310183199270551091418164842669890798865, time:1750767804.8465369s req_ids:[8] -DEBUG 06-24 20:23:24 [manager.py:391] -ERROR 06-24 20:23:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:204.6051025390625ms total_cost_time:204.63013648986816ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:10570 prompt_cache_len:5151 prompt_cache_ratio:0.48732261116367076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 -DEBUG 06-24 20:23:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10719609260559082 s -INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.10936093330383301 s -DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=268060445851607364708633959200458605647, time:1750767805.053917s req_ids:[8] -DEBUG 06-24 20:23:25 [manager.py:391] -ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:24 lightllm_req_id:8 first_token_cost:208.9698314666748ms total_cost_time:208.99605751037598ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:10571 prompt_cache_len:5151 prompt_cache_ratio:0.48727651120991394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 -DEBUG 06-24 20:23:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10809087753295898 s -INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s -DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=82041548072377950844375702232596312562, time:1750767805.2699723s req_ids:[8] -DEBUG 06-24 20:23:25 [manager.py:391] -ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 first_token_cost:207.99851417541504ms total_cost_time:208.02617073059082ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:10572 prompt_cache_len:5151 prompt_cache_ratio:0.48723041997729855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 -DEBUG 06-24 20:23:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10698366165161133 s -INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.10906767845153809 s -DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=277143668288910730814503220056621954306, time:1750767805.483336s req_ids:[8] -DEBUG 06-24 20:23:25 [manager.py:391] -ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 first_token_cost:417.11974143981934ms total_cost_time:417.1462059020996ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:10573 prompt_cache_len:5151 prompt_cache_ratio:0.48718433746335005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 -DEBUG 06-24 20:23:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:25 [manager.py:224] router recive req id 8 cost time 0.10743594169616699 s -INFO 06-24 20:23:25 [manager.py:68] detokenization recv req id 8 cost time 0.10943746566772461 s -DEBUG 06-24 20:23:25 [manager.py:391] Prefill Batch: batch_id=186367774265478952694738383213051807404, time:1750767805.9081852s req_ids:[8] -DEBUG 06-24 20:23:25 [manager.py:391] -ERROR 06-24 20:23:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:25 lightllm_req_id:8 first_token_cost:215.88540077209473ms total_cost_time:215.9113883972168ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:10574 prompt_cache_len:5151 prompt_cache_ratio:0.4871382636655949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 -DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10758638381958008 s -INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10956048965454102 s -DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=260159047027369305044269385414180651744, time:1750767806.123888s req_ids:[8] -DEBUG 06-24 20:23:26 [manager.py:391] -ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:211.10916137695312ms total_cost_time:211.134672164917ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:10575 prompt_cache_len:5151 prompt_cache_ratio:0.4870921985815603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 -DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10678219795227051 s -INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10883617401123047 s -DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=63895385407009942945228935080394885652, time:1750767806.341533s req_ids:[8] -DEBUG 06-24 20:23:26 [manager.py:391] -ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:208.8625431060791ms total_cost_time:208.88900756835938ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:10576 prompt_cache_len:5151 prompt_cache_ratio:0.48704614220877457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 -DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10753870010375977 s -INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10946846008300781 s -DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=138891744844753793670024022334827493990, time:1750767806.5557919s req_ids:[8] -DEBUG 06-24 20:23:26 [manager.py:391] -ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:213.1509780883789ms total_cost_time:213.19580078125ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10577 prompt_cache_len:5151 prompt_cache_ratio:0.48700009454476695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 -DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10870146751403809 s -INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.11069035530090332 s -DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=233933099612379178827063512492090623776, time:1750767806.772135s req_ids:[8] -DEBUG 06-24 20:23:26 [manager.py:391] -ERROR 06-24 20:23:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:209.1667652130127ms total_cost_time:209.19418334960938ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:10578 prompt_cache_len:5151 prompt_cache_ratio:0.4869540555870675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 -DEBUG 06-24 20:23:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:26 [manager.py:224] router recive req id 8 cost time 0.10773181915283203 s -INFO 06-24 20:23:26 [manager.py:68] detokenization recv req id 8 cost time 0.10970902442932129 s -DEBUG 06-24 20:23:26 [manager.py:391] Prefill Batch: batch_id=76633448612257674462946007913337663421, time:1750767806.9880245s req_ids:[8] -DEBUG 06-24 20:23:26 [manager.py:391] -ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:26 lightllm_req_id:8 first_token_cost:210.70098876953125ms total_cost_time:210.72626113891602ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:10579 prompt_cache_len:5151 prompt_cache_ratio:0.4869080253332073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 -DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:27 [manager.py:224] router recive req id 8 cost time 0.1075592041015625 s -INFO 06-24 20:23:27 [manager.py:68] detokenization recv req id 8 cost time 0.10967683792114258 s -DEBUG 06-24 20:23:27 [manager.py:391] Prefill Batch: batch_id=240835447256370895263204467819486178862, time:1750767807.202556s req_ids:[8] -DEBUG 06-24 20:23:27 [manager.py:391] -ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:210.0231647491455ms total_cost_time:210.0508213043213ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:10580 prompt_cache_len:5151 prompt_cache_ratio:0.4868620037807183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 -DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:27 [manager.py:224] router recive req id 8 cost time 0.1074669361114502 s -INFO 06-24 20:23:27 [manager.py:68] detokenization recv req id 8 cost time 0.10967493057250977 s -DEBUG 06-24 20:23:27 [manager.py:391] Prefill Batch: batch_id=73455545078193044399765939502571278176, time:1750767807.4185908s req_ids:[8] -DEBUG 06-24 20:23:27 [manager.py:391] -ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:378.8790702819824ms total_cost_time:378.9057731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:10581 prompt_cache_len:5151 prompt_cache_ratio:0.48681599092713357 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 -DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:27 [manager.py:224] router recive req id 8 cost time 0.10722684860229492 s -INFO 06-24 20:23:27 [manager.py:68] detokenization recv req id 8 cost time 0.10934042930603027 s -DEBUG 06-24 20:23:27 [manager.py:391] Prefill Batch: batch_id=287109929602429456417041128143866934349, time:1750767807.801624s req_ids:[8] -DEBUG 06-24 20:23:27 [manager.py:391] -ERROR 06-24 20:23:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:207.99851417541504ms total_cost_time:208.02545547485352ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:10582 prompt_cache_len:5151 prompt_cache_ratio:0.4867699867699868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 -DEBUG 06-24 20:23:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10712623596191406 s -INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.10898256301879883 s -DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=320503679515350941497512093487547089939, time:1750767808.0156s req_ids:[8] -DEBUG 06-24 20:23:28 [manager.py:391] -ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:27 lightllm_req_id:8 first_token_cost:209.46049690246582ms total_cost_time:209.48266983032227ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:10583 prompt_cache_len:5151 prompt_cache_ratio:0.4867239913068128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 -DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.1068272590637207 s -INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.10893821716308594 s -DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=213639688990821776167126999704675522673, time:1750767808.2301686s req_ids:[8] -DEBUG 06-24 20:23:28 [manager.py:391] -ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:193.8636302947998ms total_cost_time:193.90416145324707ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10584 prompt_cache_len:5151 prompt_cache_ratio:0.48667800453514737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 -DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10840320587158203 s -INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.11036419868469238 s -DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=90950471142427081649407821093144862325, time:1750767808.4258635s req_ids:[8] -DEBUG 06-24 20:23:28 [manager.py:391] -ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:208.33873748779297ms total_cost_time:208.38117599487305ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10585 prompt_cache_len:5151 prompt_cache_ratio:0.4866320264525272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 -DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10861039161682129 s -INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.11063122749328613 s -DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=171074563834729134138899235368959273302, time:1750767808.6405563s req_ids:[8] -DEBUG 06-24 20:23:28 [manager.py:391] -ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:202.03256607055664ms total_cost_time:202.07762718200684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10586 prompt_cache_len:5151 prompt_cache_ratio:0.4865860570564897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 -DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:28 [manager.py:224] router recive req id 8 cost time 0.10809993743896484 s -INFO 06-24 20:23:28 [manager.py:68] detokenization recv req id 8 cost time 0.11000466346740723 s -DEBUG 06-24 20:23:28 [manager.py:391] Prefill Batch: batch_id=107040977703879795177255903976388551349, time:1750767808.849747s req_ids:[8] -DEBUG 06-24 20:23:28 [manager.py:391] -DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:206.831693649292ms total_cost_time:206.87389373779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10587 prompt_cache_len:5151 prompt_cache_ratio:0.4865400963445735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 -DEBUG 06-24 20:23:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10877466201782227 s -INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.11095690727233887 s -DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=334223754105166662184624036780787201810, time:1750767809.060972s req_ids:[8] -DEBUG 06-24 20:23:29 [manager.py:391] -ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:28 lightllm_req_id:8 first_token_cost:198.8990306854248ms total_cost_time:198.9421844482422ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10588 prompt_cache_len:5151 prompt_cache_ratio:0.4864941443143181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 -DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10899925231933594 s -INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.1110072135925293 s -DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=142120503313511755528130963689549811088, time:1750767809.267129s req_ids:[8] -DEBUG 06-24 20:23:29 [manager.py:391] -ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:206.87484741210938ms total_cost_time:206.91776275634766ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10589 prompt_cache_len:5151 prompt_cache_ratio:0.48644820096326374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 -DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10847067832946777 s -INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.11034178733825684 s -DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=283039935415706212664921653027795952089, time:1750767809.4805195s req_ids:[8] -DEBUG 06-24 20:23:29 [manager.py:391] -ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:352.4484634399414ms total_cost_time:352.4930477142334ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10590 prompt_cache_len:5151 prompt_cache_ratio:0.48640226628895183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 -DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:29 [manager.py:224] router recive req id 8 cost time 0.10902261734008789 s -INFO 06-24 20:23:29 [manager.py:68] detokenization recv req id 8 cost time 0.1110677719116211 s -DEBUG 06-24 20:23:29 [manager.py:391] Prefill Batch: batch_id=42079004799456599193261064600250232129, time:1750767809.8659406s req_ids:[8] -DEBUG 06-24 20:23:29 [manager.py:391] -ERROR 06-24 20:23:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:234.1439723968506ms total_cost_time:234.18855667114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10591 prompt_cache_len:5151 prompt_cache_ratio:0.48635634028892455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 -DEBUG 06-24 20:23:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10889101028442383 s -INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.11081385612487793 s -DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=126775619072673405241715471735740029887, time:1750767810.0777638s req_ids:[8] -DEBUG 06-24 20:23:30 [manager.py:391] -ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:29 lightllm_req_id:8 first_token_cost:206.05850219726562ms total_cost_time:206.1009407043457ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10592 prompt_cache_len:5151 prompt_cache_ratio:0.4863104229607251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 -DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10877323150634766 s -INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.11083030700683594 s -DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=166323288787399514626021471323322929834, time:1750767810.290168s req_ids:[8] -DEBUG 06-24 20:23:30 [manager.py:391] -DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:30 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:206.1154842376709ms total_cost_time:206.1600685119629ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10593 prompt_cache_len:5151 prompt_cache_ratio:0.4862645143018975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 -DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10884332656860352 s -INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.1109318733215332 s -DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=198335673821234505339724825430855145339, time:1750767810.501339s req_ids:[8] -DEBUG 06-24 20:23:30 [manager.py:391] -ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:201.77793502807617ms total_cost_time:201.82228088378906ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10594 prompt_cache_len:5151 prompt_cache_ratio:0.4862186143099868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 -DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.1074223518371582 s -INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.10935688018798828 s -DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=48102405358065047708546072187677442745, time:1750767810.70928s req_ids:[8] -DEBUG 06-24 20:23:30 [manager.py:391] -ERROR 06-24 20:23:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:199.67269897460938ms total_cost_time:199.71632957458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10595 prompt_cache_len:5151 prompt_cache_ratio:0.48617272298253894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 -DEBUG 06-24 20:23:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:30 [manager.py:224] router recive req id 8 cost time 0.10909032821655273 s -INFO 06-24 20:23:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110081672668457 s -DEBUG 06-24 20:23:30 [manager.py:391] Prefill Batch: batch_id=208379217200951301345986660449640277309, time:1750767810.9157777s req_ids:[8] -DEBUG 06-24 20:23:30 [manager.py:391] -ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:30 lightllm_req_id:8 first_token_cost:205.69992065429688ms total_cost_time:205.74498176574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10596 prompt_cache_len:5151 prompt_cache_ratio:0.4861268403171008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 -DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s -INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11094284057617188 s -DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=34507118452913979053388996677929473451, time:1750767811.1277444s req_ids:[8] -DEBUG 06-24 20:23:31 [manager.py:391] -ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:372.27654457092285ms total_cost_time:372.32208251953125ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10597 prompt_cache_len:5151 prompt_cache_ratio:0.48608096631122016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 -DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10842514038085938 s -INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11046552658081055 s -DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=327505332942232623446345904272220035545, time:1750767811.5060194s req_ids:[8] -DEBUG 06-24 20:23:31 [manager.py:391] -ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:206.6490650177002ms total_cost_time:206.6946029663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10598 prompt_cache_len:5151 prompt_cache_ratio:0.48603510096244573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 -DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10845232009887695 s -INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11046886444091797 s -DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=240139758206292863285115753611639092354, time:1750767811.7210624s req_ids:[8] -DEBUG 06-24 20:23:31 [manager.py:391] -ERROR 06-24 20:23:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:208.39929580688477ms total_cost_time:208.44435691833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10599 prompt_cache_len:5151 prompt_cache_ratio:0.4859892442683272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 -DEBUG 06-24 20:23:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:31 [batch.py:51] router release req id 8 -DEBUG 06-24 20:23:31 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:31 [manager.py:283] -DEBUG 06-24 20:23:31 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:31 [manager.py:284] -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:31 [manager.py:224] router recive req id 8 cost time 0.10869789123535156 s -INFO 06-24 20:23:31 [manager.py:68] detokenization recv req id 8 cost time 0.11061882972717285 s -DEBUG 06-24 20:23:31 [manager.py:391] Prefill Batch: batch_id=272148052655120584761169423874977634142, time:1750767811.9344182s req_ids:[8] -DEBUG 06-24 20:23:31 [manager.py:391] -ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:31 lightllm_req_id:8 first_token_cost:207.38983154296875ms total_cost_time:207.43441581726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10600 prompt_cache_len:5151 prompt_cache_ratio:0.4859433962264151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 -DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.1087801456451416 s -INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.11084532737731934 s -DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=271417524959698733290066442598351556951, time:1750767812.1493838s req_ids:[8] -DEBUG 06-24 20:23:32 [manager.py:391] -ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:205.77478408813477ms total_cost_time:205.81698417663574ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10601 prompt_cache_len:5151 prompt_cache_ratio:0.48589755683426095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 -DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.10793566703796387 s -INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.10999536514282227 s -DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=222678921241954460341327115475400846160, time:1750767812.3599384s req_ids:[8] -DEBUG 06-24 20:23:32 [manager.py:391] -DEBUG 06-24 20:23:32 [stats.py:37] Avg tokens(prompt+generate) throughput: 46126.509 tokens/s -DEBUG 06-24 20:23:32 [stats.py:37] Avg prompt tokens throughput: 46117.891 tokens/s -DEBUG 06-24 20:23:32 [stats.py:37] Avg generate tokens throughput: 8.618 tokens/s -ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:208.22834968566895ms total_cost_time:208.27269554138184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10602 prompt_cache_len:5151 prompt_cache_ratio:0.48585172608941707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 -DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.1091618537902832 s -INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.11108589172363281 s -DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=316326715217238276791746487440939986756, time:1750767812.575488s req_ids:[8] -DEBUG 06-24 20:23:32 [manager.py:391] -ERROR 06-24 20:23:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:209.24091339111328ms total_cost_time:209.28406715393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10603 prompt_cache_len:5151 prompt_cache_ratio:0.48580590398943696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 -DEBUG 06-24 20:23:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:32 [manager.py:224] router recive req id 8 cost time 0.10836577415466309 s -INFO 06-24 20:23:32 [manager.py:68] detokenization recv req id 8 cost time 0.1104574203491211 s -DEBUG 06-24 20:23:32 [manager.py:391] Prefill Batch: batch_id=139749630265199994937330590862862922937, time:1750767812.7903078s req_ids:[8] -DEBUG 06-24 20:23:32 [manager.py:391] -ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:32 lightllm_req_id:8 first_token_cost:378.5426616668701ms total_cost_time:378.5865306854248ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10604 prompt_cache_len:5151 prompt_cache_ratio:0.48576009053187474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 -DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.10768985748291016 s -INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.10965514183044434 s -DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=340097525144110809251933922488635915205, time:1750767813.1737452s req_ids:[8] -DEBUG 06-24 20:23:33 [manager.py:391] -ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:200.30784606933594ms total_cost_time:200.35028457641602ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10605 prompt_cache_len:5151 prompt_cache_ratio:0.4857142857142857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 -DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.1081690788269043 s -INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.11021018028259277 s -DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=297397482124910181749692685116025943499, time:1750767813.3803527s req_ids:[8] -DEBUG 06-24 20:23:33 [manager.py:391] -ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:203.78589630126953ms total_cost_time:203.83048057556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10606 prompt_cache_len:5151 prompt_cache_ratio:0.4856684895342259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 -DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.10802316665649414 s -INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.11011195182800293 s -DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=84917473804953672582943777154564435861, time:1750767813.593819s req_ids:[8] -DEBUG 06-24 20:23:33 [manager.py:391] -ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:204.62679862976074ms total_cost_time:204.67138290405273ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10607 prompt_cache_len:5151 prompt_cache_ratio:0.4856227019892524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 -DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:33 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s -INFO 06-24 20:23:33 [manager.py:68] detokenization recv req id 8 cost time 0.11055707931518555 s -DEBUG 06-24 20:23:33 [manager.py:391] Prefill Batch: batch_id=137115911546530998967570934236193167372, time:1750767813.802926s req_ids:[8] -DEBUG 06-24 20:23:33 [manager.py:391] -ERROR 06-24 20:23:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:206.22706413269043ms total_cost_time:206.27164840698242ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10608 prompt_cache_len:5151 prompt_cache_ratio:0.4855769230769231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 -DEBUG 06-24 20:23:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.10810399055480957 s -INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.10992884635925293 s -DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=162908782544598867845808621660361357414, time:1750767814.016541s req_ids:[8] -DEBUG 06-24 20:23:34 [manager.py:391] -ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:33 lightllm_req_id:8 first_token_cost:165.35210609436035ms total_cost_time:165.39406776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10609 prompt_cache_len:5151 prompt_cache_ratio:0.48553115279479686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 -DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.10814261436462402 s -INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.11020112037658691 s -DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=235723158115036734154778957805550911242, time:1750767814.1856308s req_ids:[8] -DEBUG 06-24 20:23:34 [manager.py:391] -ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:201.1110782623291ms total_cost_time:201.155424118042ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10610 prompt_cache_len:5151 prompt_cache_ratio:0.48548539114043354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 -DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.10821938514709473 s -INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.11027860641479492 s -DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=303073645720057297861643302979422893529, time:1750767814.3947806s req_ids:[8] -DEBUG 06-24 20:23:34 [manager.py:391] -ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:211.33899688720703ms total_cost_time:211.38644218444824ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10611 prompt_cache_len:5151 prompt_cache_ratio:0.48543963811139385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 -DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.3105807304382324 s -INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.31245851516723633 s -DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=302156197188704868612635996696577633416, time:1750767814.8125212s req_ids:[8] -DEBUG 06-24 20:23:34 [manager.py:391] -ERROR 06-24 20:23:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:366.11318588256836ms total_cost_time:366.15467071533203ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10612 prompt_cache_len:5151 prompt_cache_ratio:0.48539389370523933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 -DEBUG 06-24 20:23:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:34 [manager.py:224] router recive req id 8 cost time 0.1081240177154541 s -INFO 06-24 20:23:34 [manager.py:68] detokenization recv req id 8 cost time 0.1098322868347168 s -DEBUG 06-24 20:23:34 [manager.py:391] Prefill Batch: batch_id=255032505572229513336141334685824311779, time:1750767814.9817455s req_ids:[8] -DEBUG 06-24 20:23:34 [manager.py:391] -ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:34 lightllm_req_id:8 first_token_cost:164.60776329040527ms total_cost_time:164.65115547180176ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10613 prompt_cache_len:5151 prompt_cache_ratio:0.4853481579195327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 -DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10796833038330078 s -INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.10997247695922852 s -DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=129505343074159542362675074786112375968, time:1750767815.1528082s req_ids:[8] -DEBUG 06-24 20:23:35 [manager.py:391] -ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:200.37293434143066ms total_cost_time:200.41537284851074ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10614 prompt_cache_len:5151 prompt_cache_ratio:0.4853024307518372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 -DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10848784446716309 s -INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.1104578971862793 s -DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=27466650968321358944828197415683535379, time:1750767815.3589172s req_ids:[8] -DEBUG 06-24 20:23:35 [manager.py:391] -ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:207.1366310119629ms total_cost_time:207.1826457977295ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10615 prompt_cache_len:5151 prompt_cache_ratio:0.4852567121997174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 -DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10842752456665039 s -INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.11050224304199219 s -DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=112957043977688005619213469462042723155, time:1750767815.5723581s req_ids:[8] -DEBUG 06-24 20:23:35 [manager.py:391] -ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:202.47960090637207ms total_cost_time:202.52227783203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10616 prompt_cache_len:5151 prompt_cache_ratio:0.4852110022607385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 -DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10964655876159668 s -INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.11170506477355957 s -DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=125655741680200497391371399699036762248, time:1750767815.7820084s req_ids:[8] -DEBUG 06-24 20:23:35 [manager.py:391] -ERROR 06-24 20:23:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:204.17165756225586ms total_cost_time:204.21361923217773ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10617 prompt_cache_len:5151 prompt_cache_ratio:0.4851653009324668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 -DEBUG 06-24 20:23:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:35 [manager.py:224] router recive req id 8 cost time 0.10821366310119629 s -INFO 06-24 20:23:35 [manager.py:68] detokenization recv req id 8 cost time 0.10993695259094238 s -DEBUG 06-24 20:23:35 [manager.py:391] Prefill Batch: batch_id=273062731695426888825031236330384365444, time:1750767815.9910028s req_ids:[8] -DEBUG 06-24 20:23:35 [manager.py:391] -ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:35 lightllm_req_id:8 first_token_cost:164.9177074432373ms total_cost_time:164.9608612060547ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10618 prompt_cache_len:5151 prompt_cache_ratio:0.4851196082124694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 -DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.10890078544616699 s -INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11092710494995117 s -DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=221439180676689136195214543215193167380, time:1750767816.1637483s req_ids:[8] -DEBUG 06-24 20:23:36 [manager.py:391] -ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:364.1057014465332ms total_cost_time:364.1490936279297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10619 prompt_cache_len:5151 prompt_cache_ratio:0.48507392409831435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 -DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.1085364818572998 s -INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11066365242004395 s -DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=89229329646768808914903790808376194295, time:1750767816.5318065s req_ids:[8] -DEBUG 06-24 20:23:36 [manager.py:391] -ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:208.22834968566895ms total_cost_time:208.27269554138184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10620 prompt_cache_len:5151 prompt_cache_ratio:0.4850282485875706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 -DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.10809969902038574 s -INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11031246185302734 s -DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=145472464764696407640405133442187164578, time:1750767816.747533s req_ids:[8] -DEBUG 06-24 20:23:36 [manager.py:391] -ERROR 06-24 20:23:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:206.82764053344727ms total_cost_time:206.87103271484375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10621 prompt_cache_len:5151 prompt_cache_ratio:0.4849825816778081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 -DEBUG 06-24 20:23:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:36 [manager.py:224] router recive req id 8 cost time 0.10804319381713867 s -INFO 06-24 20:23:36 [manager.py:68] detokenization recv req id 8 cost time 0.11004829406738281 s -DEBUG 06-24 20:23:36 [manager.py:391] Prefill Batch: batch_id=193270003046653424977718838941624939591, time:1750767816.9591281s req_ids:[8] -DEBUG 06-24 20:23:36 [manager.py:391] -ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:36 lightllm_req_id:8 first_token_cost:206.32624626159668ms total_cost_time:206.36892318725586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10622 prompt_cache_len:5151 prompt_cache_ratio:0.4849369233665976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 -DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.10931873321533203 s -INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.11132574081420898 s -DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=256367619262365976854395701806166182831, time:1750767817.171969s req_ids:[8] -DEBUG 06-24 20:23:37 [manager.py:391] -ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:206.63833618164062ms total_cost_time:206.6824436187744ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10623 prompt_cache_len:5151 prompt_cache_ratio:0.48489127365151086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 -DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.10870862007141113 s -INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.11061954498291016 s -DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=5554489246022689885220731856145702269, time:1750767817.3835218s req_ids:[8] -DEBUG 06-24 20:23:37 [manager.py:391] -DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:206.0258388519287ms total_cost_time:206.0694694519043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10624 prompt_cache_len:5151 prompt_cache_ratio:0.4848456325301205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 -DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.10889530181884766 s -INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.1109619140625 s -DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=313150806907564746184138131911282627875, time:1750767817.597566s req_ids:[8] -DEBUG 06-24 20:23:37 [manager.py:391] -ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:206.36892318725586ms total_cost_time:206.41207695007324ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10625 prompt_cache_len:5151 prompt_cache_ratio:0.4848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 -DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:37 [manager.py:224] router recive req id 8 cost time 0.1079869270324707 s -INFO 06-24 20:23:37 [manager.py:68] detokenization recv req id 8 cost time 0.10995793342590332 s -DEBUG 06-24 20:23:37 [manager.py:391] Prefill Batch: batch_id=250245316967746710561712901272111363475, time:1750767817.8092964s req_ids:[8] -DEBUG 06-24 20:23:37 [manager.py:391] -ERROR 06-24 20:23:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:210.06321907043457ms total_cost_time:210.10637283325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10626 prompt_cache_len:5151 prompt_cache_ratio:0.4847543760587239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 -DEBUG 06-24 20:23:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.1076967716217041 s -INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.10941481590270996 s -DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=221147738148463133489210833662887678276, time:1750767818.0257697s req_ids:[8] -DEBUG 06-24 20:23:38 [manager.py:391] -ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:37 lightllm_req_id:8 first_token_cost:375.1864433288574ms total_cost_time:375.230073928833ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10627 prompt_cache_len:5151 prompt_cache_ratio:0.4847087607038675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 -DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.1083371639251709 s -INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.11020040512084961 s -DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=152099942067070344777453022695172355520, time:1750767818.4054303s req_ids:[8] -DEBUG 06-24 20:23:38 [manager.py:391] -ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:207.32402801513672ms total_cost_time:207.37075805664062ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:10628 prompt_cache_len:5151 prompt_cache_ratio:0.48466315393300713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 -DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.10865902900695801 s -INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.11072468757629395 s -DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=168975119348926875282782134754947189000, time:1750767818.618262s req_ids:[8] -DEBUG 06-24 20:23:38 [manager.py:391] -ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:206.8798542022705ms total_cost_time:206.9227695465088ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10629 prompt_cache_len:5151 prompt_cache_ratio:0.48461755574372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 -DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:38 [manager.py:224] router recive req id 8 cost time 0.10803723335266113 s -INFO 06-24 20:23:38 [manager.py:68] detokenization recv req id 8 cost time 0.11008930206298828 s -DEBUG 06-24 20:23:38 [manager.py:391] Prefill Batch: batch_id=267731213131224281606172980302531083481, time:1750767818.831792s req_ids:[8] -DEBUG 06-24 20:23:38 [manager.py:391] -ERROR 06-24 20:23:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:209.42306518554688ms total_cost_time:209.46598052978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10630 prompt_cache_len:5151 prompt_cache_ratio:0.4845719661335842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 -DEBUG 06-24 20:23:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s -INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.1103515625 s -DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=330001906014905614014905661425570973309, time:1750767819.0456243s req_ids:[8] -DEBUG 06-24 20:23:39 [manager.py:391] -ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:38 lightllm_req_id:8 first_token_cost:208.06336402893066ms total_cost_time:208.10532569885254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10631 prompt_cache_len:5151 prompt_cache_ratio:0.4845263851001787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 -DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10813689231872559 s -INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.11024355888366699 s -DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=278826529962881305696837775261834205935, time:1750767819.2605176s req_ids:[8] -DEBUG 06-24 20:23:39 [manager.py:391] -ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:209.6705436706543ms total_cost_time:209.7160816192627ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10632 prompt_cache_len:5151 prompt_cache_ratio:0.4844808126410835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 -DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10958695411682129 s -INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.11166715621948242 s -DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=93392969545736111248099606069292205590, time:1750767819.475019s req_ids:[8] -DEBUG 06-24 20:23:39 [manager.py:391] -ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:206.80522918701172ms total_cost_time:206.8476676940918ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10633 prompt_cache_len:5151 prompt_cache_ratio:0.48443524875387944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 -DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.10943174362182617 s -INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.11159801483154297 s -DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=208545011489377269650163257881000511195, time:1750767819.6900053s req_ids:[8] -DEBUG 06-24 20:23:39 [manager.py:391] -ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:209.2292308807373ms total_cost_time:209.27071571350098ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10634 prompt_cache_len:5151 prompt_cache_ratio:0.4843896934361482 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 -DEBUG 06-24 20:23:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:39 [manager.py:224] router recive req id 8 cost time 0.1079559326171875 s -INFO 06-24 20:23:39 [manager.py:68] detokenization recv req id 8 cost time 0.10965657234191895 s -DEBUG 06-24 20:23:39 [manager.py:391] Prefill Batch: batch_id=180676490857397177878682984812351728203, time:1750767819.9048176s req_ids:[8] -DEBUG 06-24 20:23:39 [manager.py:391] -ERROR 06-24 20:23:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:204.30803298950195ms total_cost_time:204.35261726379395ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10635 prompt_cache_len:5151 prompt_cache_ratio:0.4843441466854725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 -DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.3109130859375 s -INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.3130512237548828 s -DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=150348215278820549945523686304392075076, time:1750767820.3175418s req_ids:[8] -DEBUG 06-24 20:23:40 [manager.py:391] -ERROR 06-24 20:23:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:39 lightllm_req_id:8 first_token_cost:415.00234603881836ms total_cost_time:415.04597663879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10636 prompt_cache_len:5151 prompt_cache_ratio:0.4842986084994359 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 -DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.10831451416015625 s -INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.11026358604431152 s -DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=262329211750554130783153273786557884932, time:1750767820.5357897s req_ids:[8] -DEBUG 06-24 20:23:40 [manager.py:391] -ERROR 06-24 20:23:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 first_token_cost:208.56618881225586ms total_cost_time:208.61172676086426ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10637 prompt_cache_len:5151 prompt_cache_ratio:0.4842530788756228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 -DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.10927391052246094 s -INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.1113123893737793 s -DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=287897987217858922400792256789562553996, time:1750767820.7516203s req_ids:[8] -DEBUG 06-24 20:23:40 [manager.py:391] -ERROR 06-24 20:23:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 first_token_cost:210.4330062866211ms total_cost_time:210.47544479370117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10638 prompt_cache_len:5151 prompt_cache_ratio:0.4842075578116187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 -DEBUG 06-24 20:23:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:40 [manager.py:224] router recive req id 8 cost time 0.1082754135131836 s -INFO 06-24 20:23:40 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s -DEBUG 06-24 20:23:40 [manager.py:391] Prefill Batch: batch_id=137085167271240889512760319724886421900, time:1750767820.965626s req_ids:[8] -DEBUG 06-24 20:23:40 [manager.py:391] -ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:40 lightllm_req_id:8 first_token_cost:204.22887802124023ms total_cost_time:204.27179336547852ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10639 prompt_cache_len:5151 prompt_cache_ratio:0.4841620453050099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 -DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10870933532714844 s -INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.11121797561645508 s -DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=333118848593950508715951355151827529269, time:1750767821.1781464s req_ids:[8] -DEBUG 06-24 20:23:41 [manager.py:391] -ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:211.5168571472168ms total_cost_time:211.5612030029297ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10640 prompt_cache_len:5151 prompt_cache_ratio:0.48411654135338344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 -DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10859847068786621 s -INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.11055421829223633 s -DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=86397651907940271866265643598529758351, time:1750767821.3931897s req_ids:[8] -DEBUG 06-24 20:23:41 [manager.py:391] -ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:212.1131420135498ms total_cost_time:212.1579647064209ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10641 prompt_cache_len:5151 prompt_cache_ratio:0.4840710459543276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 -DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10524606704711914 s -INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.10726404190063477 s -DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=206011775873037584811703789515482801084, time:1750767821.6120894s req_ids:[8] -DEBUG 06-24 20:23:41 [manager.py:391] -ERROR 06-24 20:23:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:214.23816680908203ms total_cost_time:214.2617702484131ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:10642 prompt_cache_len:5151 prompt_cache_ratio:0.4840255591054313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 -DEBUG 06-24 20:23:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:41 [manager.py:224] router recive req id 8 cost time 0.10766077041625977 s -INFO 06-24 20:23:41 [manager.py:68] detokenization recv req id 8 cost time 0.10966753959655762 s -DEBUG 06-24 20:23:41 [manager.py:391] Prefill Batch: batch_id=311694196846412224260593748937920000616, time:1750767821.84658s req_ids:[8] -DEBUG 06-24 20:23:41 [manager.py:391] -ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:41 lightllm_req_id:8 first_token_cost:443.0243968963623ms total_cost_time:443.0694580078125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10643 prompt_cache_len:5151 prompt_cache_ratio:0.4839800808042845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 -DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10808849334716797 s -INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.11008048057556152 s -DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=148544355907827009474108630982604618703, time:1750767822.2804937s req_ids:[8] -DEBUG 06-24 20:23:42 [manager.py:391] -ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:188.31300735473633ms total_cost_time:188.3561611175537ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10644 prompt_cache_len:5151 prompt_cache_ratio:0.483934611048478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 -DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10843491554260254 s -INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s -DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=151621553213416391732111555500045983368, time:1750767822.4809873s req_ids:[8] -DEBUG 06-24 20:23:42 [manager.py:391] -DEBUG 06-24 20:23:42 [stats.py:37] Avg tokens(prompt+generate) throughput: 45145.946 tokens/s -DEBUG 06-24 20:23:42 [stats.py:37] Avg prompt tokens throughput: 45137.449 tokens/s -DEBUG 06-24 20:23:42 [stats.py:37] Avg generate tokens throughput: 8.497 tokens/s -ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:212.3270034790039ms total_cost_time:212.3708724975586ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10645 prompt_cache_len:5151 prompt_cache_ratio:0.48388914983560355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 -DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10813021659851074 s -INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.1100163459777832 s -DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=214238780706192751055494745746163040799, time:1750767822.6998286s req_ids:[8] -DEBUG 06-24 20:23:42 [manager.py:391] -ERROR 06-24 20:23:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:210.57677268981934ms total_cost_time:210.6163501739502ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:10646 prompt_cache_len:5151 prompt_cache_ratio:0.4838436971632538 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 -DEBUG 06-24 20:23:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:42 [manager.py:224] router recive req id 8 cost time 0.10864853858947754 s -INFO 06-24 20:23:42 [manager.py:68] detokenization recv req id 8 cost time 0.1104884147644043 s -DEBUG 06-24 20:23:42 [manager.py:391] Prefill Batch: batch_id=339446908376033998910198224103806212095, time:1750767822.9275203s req_ids:[8] -DEBUG 06-24 20:23:42 [manager.py:391] -ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:42 lightllm_req_id:8 first_token_cost:208.08696746826172ms total_cost_time:208.1167697906494ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:10647 prompt_cache_len:5151 prompt_cache_ratio:0.48379825302902224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 -DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.11120367050170898 s -INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11307263374328613 s -DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=124290830385889220092693526906476368879, time:1750767823.1358042s req_ids:[8] -DEBUG 06-24 20:23:43 [manager.py:391] -ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:215.97981452941895ms total_cost_time:216.0499095916748ms,out_token_counter:1 mean_per_token_cost_time: 0.07009506225585938ms prompt_token_num:10648 prompt_cache_len:5151 prompt_cache_ratio:0.48375281743050336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 -DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.10850048065185547 s -INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s -DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=101625934798140967401666486975154215225, time:1750767823.3450735s req_ids:[8] -DEBUG 06-24 20:23:43 [manager.py:391] -ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:203.42063903808594ms total_cost_time:203.46379280090332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10649 prompt_cache_len:5151 prompt_cache_ratio:0.48370739036529253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 -DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.11176323890686035 s -INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11481523513793945 s -DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=252313877636946710484831720037112346053, time:1750767823.562718s req_ids:[8] -DEBUG 06-24 20:23:43 [manager.py:391] -ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:368.07703971862793ms total_cost_time:368.1197166442871ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10650 prompt_cache_len:5151 prompt_cache_ratio:0.4836619718309859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 -DEBUG 06-24 20:23:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:43 [manager.py:224] router recive req id 8 cost time 0.10887813568115234 s -INFO 06-24 20:23:43 [manager.py:68] detokenization recv req id 8 cost time 0.11077547073364258 s -DEBUG 06-24 20:23:43 [manager.py:391] Prefill Batch: batch_id=21840133844030543679182071304337891150, time:1750767823.9292629s req_ids:[8] -DEBUG 06-24 20:23:43 [manager.py:391] -ERROR 06-24 20:23:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:43 lightllm_req_id:8 first_token_cost:185.7903003692627ms total_cost_time:185.8351230621338ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10651 prompt_cache_len:5151 prompt_cache_ratio:0.48361656182518076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 -DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.10435867309570312 s -INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.10625720024108887 s -DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=15873991106523347331687712062790263459, time:1750767824.1263695s req_ids:[8] -DEBUG 06-24 20:23:44 [manager.py:391] -ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:179.63600158691406ms total_cost_time:179.68082427978516ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10652 prompt_cache_len:5151 prompt_cache_ratio:0.48357116034547504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 -DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.10965681076049805 s -INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.11171793937683105 s -DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=38579495468997730526565888989880967732, time:1750767824.3051748s req_ids:[8] -DEBUG 06-24 20:23:44 [manager.py:391] -ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:204.0245532989502ms total_cost_time:204.09631729125977ms,out_token_counter:1 mean_per_token_cost_time: 0.07176399230957031ms prompt_token_num:10653 prompt_cache_len:5151 prompt_cache_ratio:0.48352576738946773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 -DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.10858893394470215 s -INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.1112070083618164 s -DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=227405391970080179708346661690043954595, time:1750767824.5178235s req_ids:[8] -DEBUG 06-24 20:23:44 [manager.py:391] -ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:180.1128387451172ms total_cost_time:180.15694618225098ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10654 prompt_cache_len:5151 prompt_cache_ratio:0.48348038295475876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 -DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.11009860038757324 s -INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.11256837844848633 s -DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=63041697949929062079756114457479767921, time:1750767824.7020116s req_ids:[8] -DEBUG 06-24 20:23:44 [manager.py:391] -DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:200.10614395141602ms total_cost_time:200.16145706176758ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:10655 prompt_cache_len:5151 prompt_cache_ratio:0.48343500703894887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 -DEBUG 06-24 20:23:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:44 [manager.py:224] router recive req id 8 cost time 0.1092381477355957 s -INFO 06-24 20:23:44 [manager.py:68] detokenization recv req id 8 cost time 0.11118054389953613 s -DEBUG 06-24 20:23:44 [manager.py:391] Prefill Batch: batch_id=228312034285978187570696014221866018889, time:1750767824.9196234s req_ids:[8] -DEBUG 06-24 20:23:44 [manager.py:391] -ERROR 06-24 20:23:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:182.65557289123535ms total_cost_time:182.71493911743164ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:10656 prompt_cache_len:5151 prompt_cache_ratio:0.48338963963963966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 -DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.10949420928955078 s -INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.11132454872131348 s -DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=81243740425104727258934805178689139985, time:1750767825.1082983s req_ids:[8] -DEBUG 06-24 20:23:45 [manager.py:391] -ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:44 lightllm_req_id:8 first_token_cost:378.48472595214844ms total_cost_time:378.5390853881836ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:10657 prompt_cache_len:5151 prompt_cache_ratio:0.4833442807544337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 -DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.11299276351928711 s -INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.11409974098205566 s -DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=234860185096409239695385623524272842084, time:1750767825.4916956s req_ids:[8] -DEBUG 06-24 20:23:45 [manager.py:391] -ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:178.36785316467285ms total_cost_time:178.41529846191406ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10658 prompt_cache_len:5151 prompt_cache_ratio:0.4832989303809345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 -DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.10800552368164062 s -INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.10989522933959961 s -DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=91612672969963075368081108278523311072, time:1750767825.6717844s req_ids:[8] -DEBUG 06-24 20:23:45 [manager.py:391] -ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:192.47078895568848ms total_cost_time:192.51465797424316ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10659 prompt_cache_len:5151 prompt_cache_ratio:0.48325358851674644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 -DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:45 [manager.py:224] router recive req id 8 cost time 0.10889625549316406 s -INFO 06-24 20:23:45 [manager.py:68] detokenization recv req id 8 cost time 0.11060571670532227 s -DEBUG 06-24 20:23:45 [manager.py:391] Prefill Batch: batch_id=147832979923536364460403225239871503360, time:1750767825.8670907s req_ids:[8] -DEBUG 06-24 20:23:45 [manager.py:391] -ERROR 06-24 20:23:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:204.4699192047119ms total_cost_time:204.5285701751709ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:10660 prompt_cache_len:5151 prompt_cache_ratio:0.4832082551594747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 -DEBUG 06-24 20:23:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10883593559265137 s -INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11098265647888184 s -DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=270229440439650814110729001981561275464, time:1750767826.0822852s req_ids:[8] -DEBUG 06-24 20:23:46 [manager.py:391] -ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:45 lightllm_req_id:8 first_token_cost:212.7690315246582ms total_cost_time:212.80670166015625ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:10661 prompt_cache_len:5151 prompt_cache_ratio:0.48316293030672547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 -DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s -INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11031484603881836 s -DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=42937518761587788007493082736520631652, time:1750767826.3016825s req_ids:[8] -DEBUG 06-24 20:23:46 [manager.py:391] -ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:208.88090133666992ms total_cost_time:208.939790725708ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:10662 prompt_cache_len:5151 prompt_cache_ratio:0.4831176139561058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 -DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10854458808898926 s -INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11061620712280273 s -DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=117863431167037035758909124788019241721, time:1750767826.5178077s req_ids:[8] -DEBUG 06-24 20:23:46 [manager.py:391] -ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:194.70787048339844ms total_cost_time:194.75269317626953ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10663 prompt_cache_len:5151 prompt_cache_ratio:0.48307230610522367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 -DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10834836959838867 s -INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.1101534366607666 s -DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=97662689417604623489117637824285863802, time:1750767826.7126985s req_ids:[8] -DEBUG 06-24 20:23:46 [manager.py:391] -ERROR 06-24 20:23:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:203.06730270385742ms total_cost_time:203.1118869781494ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10664 prompt_cache_len:5151 prompt_cache_ratio:0.4830270067516879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 -DEBUG 06-24 20:23:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:46 [manager.py:224] router recive req id 8 cost time 0.10912418365478516 s -INFO 06-24 20:23:46 [manager.py:68] detokenization recv req id 8 cost time 0.11102676391601562 s -DEBUG 06-24 20:23:46 [manager.py:391] Prefill Batch: batch_id=169749224531654366591446790406140803226, time:1750767826.9267373s req_ids:[8] -DEBUG 06-24 20:23:46 [manager.py:391] -INFO 06-24 20:23:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:46 lightllm_req_id:8 first_token_cost:379.31132316589355ms total_cost_time:379.41956520080566ms,out_token_counter:1 mean_per_token_cost_time: 0.10824203491210938ms prompt_token_num:10665 prompt_cache_len:5151 prompt_cache_ratio:0.4829817158931083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 -DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.11178827285766602 s -INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.11414098739624023 s -DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=81153556601566133706049282912444126125, time:1750767827.3071413s req_ids:[8] -DEBUG 06-24 20:23:47 [manager.py:391] -ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:208.51922035217285ms total_cost_time:208.540678024292ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:10666 prompt_cache_len:5151 prompt_cache_ratio:0.4829364335270954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 -DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.10912132263183594 s -INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.11110973358154297 s -DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=52871520637463701419448039172645026839, time:1750767827.5219233s req_ids:[8] -DEBUG 06-24 20:23:47 [manager.py:391] -ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:213.57369422912598ms total_cost_time:213.63425254821777ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:10667 prompt_cache_len:5151 prompt_cache_ratio:0.4828911596512609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 -DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.10794949531555176 s -INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994839668273926 s -DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=108186006971086299309977487232989540047, time:1750767827.7466364s req_ids:[8] -DEBUG 06-24 20:23:47 [manager.py:391] -ERROR 06-24 20:23:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:212.32008934020996ms total_cost_time:212.38112449645996ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10668 prompt_cache_len:5151 prompt_cache_ratio:0.4828458942632171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 -DEBUG 06-24 20:23:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:47 [manager.py:224] router recive req id 8 cost time 0.10969066619873047 s -INFO 06-24 20:23:47 [manager.py:68] detokenization recv req id 8 cost time 0.11169958114624023 s -DEBUG 06-24 20:23:47 [manager.py:391] Prefill Batch: batch_id=72610180719680424435563766604314066350, time:1750767827.960047s req_ids:[8] -DEBUG 06-24 20:23:47 [manager.py:391] -ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:47 lightllm_req_id:8 first_token_cost:213.94085884094238ms total_cost_time:213.96446228027344ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:10669 prompt_cache_len:5151 prompt_cache_ratio:0.4828006373605774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 -DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.1086277961730957 s -INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.10969400405883789 s -DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=333772980623695511661739735097211941704, time:1750767828.1818814s req_ids:[8] -DEBUG 06-24 20:23:48 [manager.py:391] -ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:206.5715789794922ms total_cost_time:206.61377906799316ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10670 prompt_cache_len:5151 prompt_cache_ratio:0.48275538894095593 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 -DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.11216974258422852 s -INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.11537933349609375 s -DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=288415668871127338726786066776950821893, time:1750767828.3950696s req_ids:[8] -DEBUG 06-24 20:23:48 [manager.py:391] -ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:203.50241661071777ms total_cost_time:203.5224437713623ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:10671 prompt_cache_len:5151 prompt_cache_ratio:0.48271014900196796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 -DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.10844779014587402 s -INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.11052083969116211 s -DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=322413904694870279300954650204570840441, time:1750767828.6135628s req_ids:[8] -DEBUG 06-24 20:23:48 [manager.py:391] -ERROR 06-24 20:23:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:221.06289863586426ms total_cost_time:221.10819816589355ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10672 prompt_cache_len:5151 prompt_cache_ratio:0.48266491754122937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 -DEBUG 06-24 20:23:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:48 [manager.py:224] router recive req id 8 cost time 0.10811400413513184 s -INFO 06-24 20:23:48 [manager.py:68] detokenization recv req id 8 cost time 0.11016225814819336 s -DEBUG 06-24 20:23:48 [manager.py:391] Prefill Batch: batch_id=270226922518184353445799859756704897199, time:1750767828.8298934s req_ids:[8] -DEBUG 06-24 20:23:48 [manager.py:391] -ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:48 lightllm_req_id:8 first_token_cost:391.86549186706543ms total_cost_time:391.9100761413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10673 prompt_cache_len:5151 prompt_cache_ratio:0.48261969455635717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 -DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10824322700500488 s -INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.10987162590026855 s -DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=197906275834260757336317568461567777400, time:1750767829.2272139s req_ids:[8] -DEBUG 06-24 20:23:49 [manager.py:391] -ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:162.62149810791016ms total_cost_time:162.66179084777832ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10674 prompt_cache_len:5151 prompt_cache_ratio:0.48257448004496906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 -DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10820603370666504 s -INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.11016035079956055 s -DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=67734143250773433821065929062806834011, time:1750767829.3961296s req_ids:[8] -DEBUG 06-24 20:23:49 [manager.py:391] -ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:193.81284713745117ms total_cost_time:193.85337829589844ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10675 prompt_cache_len:5151 prompt_cache_ratio:0.4825292740046838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 -DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10800766944885254 s -INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.11000609397888184 s -DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=203829916245572850256466937165894313433, time:1750767829.5960543s req_ids:[8] -DEBUG 06-24 20:23:49 [manager.py:391] -ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:203.9964199066162ms total_cost_time:204.0388584136963ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10676 prompt_cache_len:5151 prompt_cache_ratio:0.482484076433121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 -DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:49 [manager.py:224] router recive req id 8 cost time 0.10884809494018555 s -INFO 06-24 20:23:49 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s -DEBUG 06-24 20:23:49 [manager.py:391] Prefill Batch: batch_id=186698064789001746432979593242088340498, time:1750767829.8082798s req_ids:[8] -DEBUG 06-24 20:23:49 [manager.py:391] -ERROR 06-24 20:23:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:200.65927505493164ms total_cost_time:200.7005214691162ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10677 prompt_cache_len:5151 prompt_cache_ratio:0.4824388873279011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 -DEBUG 06-24 20:23:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10858273506164551 s -INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.10976386070251465 s -DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=7913404158046129302988941109694578346, time:1750767830.0157604s req_ids:[8] -DEBUG 06-24 20:23:50 [manager.py:391] -ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:49 lightllm_req_id:8 first_token_cost:207.06439018249512ms total_cost_time:207.11159706115723ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:10678 prompt_cache_len:5151 prompt_cache_ratio:0.48239370668664544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 -DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10875368118286133 s -INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.11083722114562988 s -DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=135210640638269778821161138223910911827, time:1750767830.2430506s req_ids:[8] -DEBUG 06-24 20:23:50 [manager.py:391] -ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:220.2448844909668ms total_cost_time:220.289945602417ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10679 prompt_cache_len:5151 prompt_cache_ratio:0.4823485345069763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 -DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10802435874938965 s -INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.1099398136138916 s -DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=198760087066867120295020696037565098432, time:1750767830.4569502s req_ids:[8] -DEBUG 06-24 20:23:50 [manager.py:391] -DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:215.58785438537598ms total_cost_time:215.62933921813965ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10680 prompt_cache_len:5151 prompt_cache_ratio:0.48230337078651686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 -DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10905122756958008 s -INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.11125516891479492 s -DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=237169233008131878616511920714083933474, time:1750767830.68014s req_ids:[8] -DEBUG 06-24 20:23:50 [manager.py:391] -ERROR 06-24 20:23:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:209.02228355407715ms total_cost_time:209.06496047973633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10681 prompt_cache_len:5151 prompt_cache_ratio:0.4822582155228911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 -DEBUG 06-24 20:23:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:50 [manager.py:224] router recive req id 8 cost time 0.10776400566101074 s -INFO 06-24 20:23:50 [manager.py:68] detokenization recv req id 8 cost time 0.10981345176696777 s -DEBUG 06-24 20:23:50 [manager.py:391] Prefill Batch: batch_id=210315163464066059603919360789301688202, time:1750767830.8939602s req_ids:[8] -DEBUG 06-24 20:23:50 [manager.py:391] -ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:50 lightllm_req_id:8 first_token_cost:390.20299911499023ms total_cost_time:390.2461528778076ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10682 prompt_cache_len:5151 prompt_cache_ratio:0.48221306871372405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 -DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.1086738109588623 s -INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.11079525947570801 s -DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=251660503780118854255518607567236877300, time:1750767831.294425s req_ids:[8] -DEBUG 06-24 20:23:51 [manager.py:391] -ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:211.97104454040527ms total_cost_time:212.02540397644043ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:10683 prompt_cache_len:5151 prompt_cache_ratio:0.4821679303566414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 -DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.11112451553344727 s -INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.1130685806274414 s -DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=234111943529222761671528715282875900742, time:1750767831.5079129s req_ids:[8] -DEBUG 06-24 20:23:51 [manager.py:391] -ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:203.9949893951416ms total_cost_time:204.0390968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10684 prompt_cache_len:5151 prompt_cache_ratio:0.4821228004492699 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 -DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.10926389694213867 s -INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.11130666732788086 s -DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=222999318035116701983728321023642763620, time:1750767831.7233515s req_ids:[8] -DEBUG 06-24 20:23:51 [manager.py:391] -ERROR 06-24 20:23:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:208.4331512451172ms total_cost_time:208.47654342651367ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10685 prompt_cache_len:5151 prompt_cache_ratio:0.48207767898923726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 -DEBUG 06-24 20:23:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:51 [manager.py:224] router recive req id 8 cost time 0.10855579376220703 s -INFO 06-24 20:23:51 [manager.py:68] detokenization recv req id 8 cost time 0.11057019233703613 s -DEBUG 06-24 20:23:51 [manager.py:391] Prefill Batch: batch_id=307835549042614321149055445939970271297, time:1750767831.9341273s req_ids:[8] -DEBUG 06-24 20:23:51 [manager.py:391] -ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:51 lightllm_req_id:8 first_token_cost:204.8506736755371ms total_cost_time:204.8933506011963ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10686 prompt_cache_len:5151 prompt_cache_ratio:0.4820325659741718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 -DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.1083683967590332 s -INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.11037802696228027 s -DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=91772844773337578898989083111474286775, time:1750767832.150269s req_ids:[8] -DEBUG 06-24 20:23:52 [manager.py:391] -ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:210.91341972351074ms total_cost_time:210.97421646118164ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:10687 prompt_cache_len:5151 prompt_cache_ratio:0.481987461401703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 -DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.1079409122467041 s -INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.10984635353088379 s -DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=75832547725745977133393852346002912123, time:1750767832.3746183s req_ids:[8] -DEBUG 06-24 20:23:52 [manager.py:391] -ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:174.84331130981445ms total_cost_time:174.88574981689453ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10688 prompt_cache_len:5151 prompt_cache_ratio:0.4819423652694611 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 -DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.10880041122436523 s -INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.11082625389099121 s -DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=270789590240959522949107079358323926497, time:1750767832.5441716s req_ids:[8] -DEBUG 06-24 20:23:52 [manager.py:391] -DEBUG 06-24 20:23:52 [stats.py:37] Avg tokens(prompt+generate) throughput: 46650.816 tokens/s -DEBUG 06-24 20:23:52 [stats.py:37] Avg prompt tokens throughput: 46642.071 tokens/s -DEBUG 06-24 20:23:52 [stats.py:37] Avg generate tokens throughput: 8.745 tokens/s -ERROR 06-24 20:23:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:369.83656883239746ms total_cost_time:369.88162994384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10689 prompt_cache_len:5151 prompt_cache_ratio:0.48189727757507717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 -DEBUG 06-24 20:23:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:52 [manager.py:224] router recive req id 8 cost time 0.10785198211669922 s -INFO 06-24 20:23:52 [manager.py:68] detokenization recv req id 8 cost time 0.10990452766418457 s -DEBUG 06-24 20:23:52 [manager.py:391] Prefill Batch: batch_id=128986558449055108404606543307590100671, time:1750767832.9201627s req_ids:[8] -DEBUG 06-24 20:23:52 [manager.py:391] -ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:52 lightllm_req_id:8 first_token_cost:210.27898788452148ms total_cost_time:210.32404899597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10690 prompt_cache_len:5151 prompt_cache_ratio:0.48185219831618337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 -DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10891389846801758 s -INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.1110692024230957 s -DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=230348149291445634281189962399330557415, time:1750767833.1364863s req_ids:[8] -DEBUG 06-24 20:23:53 [manager.py:391] -INFO 06-24 20:23:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:23:53 [statics_utils.py:24] mean first cost: 228.3885136965524 ms -INFO 06-24 20:23:53 [statics_utils.py:24] mean per token cost: 0.06666320272762045 ms -ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:190.32955169677734ms total_cost_time:190.37127494812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10691 prompt_cache_len:5151 prompt_cache_ratio:0.4818071274904125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 -DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10895562171936035 s -INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11089825630187988 s -DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=264700196572954455242747262863448840469, time:1750767833.3327696s req_ids:[8] -DEBUG 06-24 20:23:53 [manager.py:391] -ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:205.0917148590088ms total_cost_time:205.13606071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10692 prompt_cache_len:5151 prompt_cache_ratio:0.48176206509539843 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 -DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10802769660949707 s -INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11028504371643066 s -DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=129180678680853555258553645214875593432, time:1750767833.544922s req_ids:[8] -DEBUG 06-24 20:23:53 [manager.py:391] -ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:191.91670417785645ms total_cost_time:191.95890426635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10693 prompt_cache_len:5151 prompt_cache_ratio:0.48171701112877585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 -DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10835886001586914 s -INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11041688919067383 s -DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=155518454137526704989845314060892491960, time:1750767833.7422945s req_ids:[8] -DEBUG 06-24 20:23:53 [manager.py:391] -ERROR 06-24 20:23:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:203.36437225341797ms total_cost_time:203.43661308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.07224082946777344ms prompt_token_num:10694 prompt_cache_len:5151 prompt_cache_ratio:0.48167196558818026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 -DEBUG 06-24 20:23:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:53 [manager.py:224] router recive req id 8 cost time 0.10913515090942383 s -INFO 06-24 20:23:53 [manager.py:68] detokenization recv req id 8 cost time 0.11118769645690918 s -DEBUG 06-24 20:23:53 [manager.py:391] Prefill Batch: batch_id=249169401981683144285093603531271845146, time:1750767833.9566832s req_ids:[8] -DEBUG 06-24 20:23:53 [manager.py:391] -ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:53 lightllm_req_id:8 first_token_cost:195.97434997558594ms total_cost_time:196.01798057556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10695 prompt_cache_len:5151 prompt_cache_ratio:0.4816269284712483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 -DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10731911659240723 s -INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.10935163497924805 s -DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=107128998757613472926782605086548178787, time:1750767834.1547458s req_ids:[8] -DEBUG 06-24 20:23:54 [manager.py:391] -ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:381.55436515808105ms total_cost_time:381.60181045532227ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:10696 prompt_cache_len:5151 prompt_cache_ratio:0.48158189977561705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 -DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10867738723754883 s -INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.11065816879272461 s -DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=64008715101679227716321198699870385136, time:1750767834.5568168s req_ids:[8] -DEBUG 06-24 20:23:54 [manager.py:391] -ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:217.66996383666992ms total_cost_time:217.7135944366455ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10697 prompt_cache_len:5151 prompt_cache_ratio:0.48153687949892493 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 -DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s -INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.11092901229858398 s -DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=265376003787397788659127011570572251459, time:1750767834.7715409s req_ids:[8] -DEBUG 06-24 20:23:54 [manager.py:391] -ERROR 06-24 20:23:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:218.30081939697266ms total_cost_time:218.34397315979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10698 prompt_cache_len:5151 prompt_cache_ratio:0.48149186763881097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 -DEBUG 06-24 20:23:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:54 [manager.py:224] router recive req id 8 cost time 0.10814213752746582 s -INFO 06-24 20:23:54 [manager.py:68] detokenization recv req id 8 cost time 0.11013364791870117 s -DEBUG 06-24 20:23:54 [manager.py:391] Prefill Batch: batch_id=269278623022823872825412091353666923420, time:1750767834.994287s req_ids:[8] -DEBUG 06-24 20:23:54 [manager.py:391] -ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:54 lightllm_req_id:8 first_token_cost:202.409029006958ms total_cost_time:202.4533748626709ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10699 prompt_cache_len:5151 prompt_cache_ratio:0.4814468641929152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 -DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10808968544006348 s -INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.11014080047607422 s -DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=59701431398920068084865099236450417992, time:1750767835.20299s req_ids:[8] -DEBUG 06-24 20:23:55 [manager.py:391] -ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:218.45293045043945ms total_cost_time:218.50013732910156ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:10700 prompt_cache_len:5151 prompt_cache_ratio:0.4814018691588785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 -DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10853910446166992 s -INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.11063551902770996 s -DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=126398336310759061144477729822954681834, time:1750767835.431107s req_ids:[8] -DEBUG 06-24 20:23:55 [manager.py:391] -ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:208.30965042114258ms total_cost_time:208.35089683532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10701 prompt_cache_len:5151 prompt_cache_ratio:0.4813568825343426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 -DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s -INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.11110973358154297 s -DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=98813940618275102018649396405603939756, time:1750767835.645553s req_ids:[8] -DEBUG 06-24 20:23:55 [manager.py:391] -ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:211.1976146697998ms total_cost_time:211.2410068511963ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10702 prompt_cache_len:5151 prompt_cache_ratio:0.4813119043169501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 -DEBUG 06-24 20:23:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:55 [manager.py:224] router recive req id 8 cost time 0.10791206359863281 s -INFO 06-24 20:23:55 [manager.py:68] detokenization recv req id 8 cost time 0.1099691390991211 s -DEBUG 06-24 20:23:55 [manager.py:391] Prefill Batch: batch_id=118024339126130997941646945762081180453, time:1750767835.859777s req_ids:[8] -DEBUG 06-24 20:23:55 [manager.py:391] -ERROR 06-24 20:23:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:207.14092254638672ms total_cost_time:207.1821689605713ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10703 prompt_cache_len:5151 prompt_cache_ratio:0.4812669345043446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 -DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.3102278709411621 s -INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.31241798400878906 s -DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=296087630125455127672542242979753064293, time:1750767836.2770848s req_ids:[8] -DEBUG 06-24 20:23:56 [manager.py:391] -ERROR 06-24 20:23:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:55 lightllm_req_id:8 first_token_cost:410.22467613220215ms total_cost_time:410.26830673217773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10704 prompt_cache_len:5151 prompt_cache_ratio:0.4812219730941704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 -DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.10864377021789551 s -INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.1106266975402832 s -DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=15595981713255265585960477553742326014, time:1750767836.4983408s req_ids:[8] -DEBUG 06-24 20:23:56 [manager.py:391] -ERROR 06-24 20:23:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 first_token_cost:212.39829063415527ms total_cost_time:212.44239807128906ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10705 prompt_cache_len:5151 prompt_cache_ratio:0.48117702008407287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 -DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.10819292068481445 s -INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.11032724380493164 s -DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=230267695599085021433248977537529177777, time:1750767836.707282s req_ids:[8] -DEBUG 06-24 20:23:56 [manager.py:391] -ERROR 06-24 20:23:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 first_token_cost:212.08739280700684ms total_cost_time:212.1284008026123ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10706 prompt_cache_len:5151 prompt_cache_ratio:0.4811320754716981 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 -DEBUG 06-24 20:23:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:56 [manager.py:224] router recive req id 8 cost time 0.10916376113891602 s -INFO 06-24 20:23:56 [manager.py:68] detokenization recv req id 8 cost time 0.11125349998474121 s -DEBUG 06-24 20:23:56 [manager.py:391] Prefill Batch: batch_id=275548206350685697968163711597966995333, time:1750767836.924826s req_ids:[8] -DEBUG 06-24 20:23:56 [manager.py:391] -ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:56 lightllm_req_id:8 first_token_cost:205.42597770690918ms total_cost_time:205.48415184020996ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:10707 prompt_cache_len:5151 prompt_cache_ratio:0.4810871392546932 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 -DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.10860085487365723 s -INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.11063098907470703 s -DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=277774122060853528414412325314303042339, time:1750767837.1497808s req_ids:[8] -DEBUG 06-24 20:23:57 [manager.py:391] -ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:213.25206756591797ms total_cost_time:213.29522132873535ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10708 prompt_cache_len:5151 prompt_cache_ratio:0.481042211430706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 -DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.10910558700561523 s -INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.11112499237060547 s -DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=270231197413791574474571152139501127584, time:1750767837.3587458s req_ids:[8] -DEBUG 06-24 20:23:57 [manager.py:391] -ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:222.52392768859863ms total_cost_time:222.56708145141602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10709 prompt_cache_len:5151 prompt_cache_ratio:0.4809972919973854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 -DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.1083674430847168 s -INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.1104121208190918 s -DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=3058379246399612404117581275065109711, time:1750767837.585144s req_ids:[8] -DEBUG 06-24 20:23:57 [manager.py:391] -ERROR 06-24 20:23:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:200.76799392700195ms total_cost_time:200.81090927124023ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10710 prompt_cache_len:5151 prompt_cache_ratio:0.48095238095238096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 -DEBUG 06-24 20:23:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:57 [manager.py:224] router recive req id 8 cost time 0.10744214057922363 s -INFO 06-24 20:23:57 [manager.py:68] detokenization recv req id 8 cost time 0.10933876037597656 s -DEBUG 06-24 20:23:57 [manager.py:391] Prefill Batch: batch_id=103714880411373908796091733900248391461, time:1750767837.7932312s req_ids:[8] -DEBUG 06-24 20:23:57 [manager.py:391] -DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:23:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:57 lightllm_req_id:8 first_token_cost:366.8181896209717ms total_cost_time:366.86205863952637ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10711 prompt_cache_len:5151 prompt_cache_ratio:0.4809074782933433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 -DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10797309875488281 s -INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.11007976531982422 s -DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=324966021014922013572157646460848739285, time:1750767838.1649745s req_ids:[8] -DEBUG 06-24 20:23:58 [manager.py:391] -ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:202.71539688110352ms total_cost_time:202.7592658996582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10712 prompt_cache_len:5151 prompt_cache_ratio:0.4808625840179238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 -DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10894632339477539 s -INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.1109156608581543 s -DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=299072658331046332733107882349381210608, time:1750767838.385487s req_ids:[8] -DEBUG 06-24 20:23:58 [manager.py:391] -ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:219.0418243408203ms total_cost_time:219.0854549407959ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10713 prompt_cache_len:5151 prompt_cache_ratio:0.48081769812377484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 -DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10895299911499023 s -INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.11100554466247559 s -DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=156143741523281114105600389034384419784, time:1750767838.6058166s req_ids:[8] -DEBUG 06-24 20:23:58 [manager.py:391] -ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:216.6738510131836ms total_cost_time:216.71795845031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10714 prompt_cache_len:5151 prompt_cache_ratio:0.48077282060854953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 -DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:58 [manager.py:224] router recive req id 8 cost time 0.10933256149291992 s -INFO 06-24 20:23:58 [manager.py:68] detokenization recv req id 8 cost time 0.11138224601745605 s -DEBUG 06-24 20:23:58 [manager.py:391] Prefill Batch: batch_id=264393583638041428580717229068760801382, time:1750767838.8222845s req_ids:[8] -DEBUG 06-24 20:23:58 [manager.py:391] -ERROR 06-24 20:23:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:200.11305809020996ms total_cost_time:200.15692710876465ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10715 prompt_cache_len:5151 prompt_cache_ratio:0.480727951469902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 -DEBUG 06-24 20:23:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s -INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.11069917678833008 s -DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=198529135340993877520313291630329503733, time:1750767839.0271547s req_ids:[8] -DEBUG 06-24 20:23:59 [manager.py:391] -ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:58 lightllm_req_id:8 first_token_cost:201.73311233520508ms total_cost_time:201.77745819091797ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10716 prompt_cache_len:5151 prompt_cache_ratio:0.48068309070548715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 -DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s -INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.1106414794921875 s -DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=284198784862633461632271629538957405243, time:1750767839.237208s req_ids:[8] -DEBUG 06-24 20:23:59 [manager.py:391] -ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:193.94683837890625ms total_cost_time:193.99094581604004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10717 prompt_cache_len:5151 prompt_cache_ratio:0.4806382383129607 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 -DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.1088407039642334 s -INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.11083579063415527 s -DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=267053340457785520782626839315947033665, time:1750767839.4338164s req_ids:[8] -DEBUG 06-24 20:23:59 [manager.py:391] -ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:198.9130973815918ms total_cost_time:198.95601272583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10718 prompt_cache_len:5151 prompt_cache_ratio:0.4805933942899795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 -DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:23:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:23:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:23:59 [manager.py:224] router recive req id 8 cost time 0.10761022567749023 s -INFO 06-24 20:23:59 [manager.py:68] detokenization recv req id 8 cost time 0.10955452919006348 s -DEBUG 06-24 20:23:59 [manager.py:391] Prefill Batch: batch_id=51221652377391903701067630840939121394, time:1750767839.6410067s req_ids:[8] -DEBUG 06-24 20:23:59 [manager.py:391] -ERROR 06-24 20:23:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:23:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:23:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:372.9057312011719ms total_cost_time:372.94840812683105ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10719 prompt_cache_len:5151 prompt_cache_ratio:0.48054855863420093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:23:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 -DEBUG 06-24 20:23:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:23:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:23:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:23:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:23:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10860466957092285 s -INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11067795753479004 s -DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=251332350983646552796903468876581772474, time:1750767840.0199685s req_ids:[8] -DEBUG 06-24 20:24:00 [manager.py:391] -ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:23:59 lightllm_req_id:8 first_token_cost:203.90772819519043ms total_cost_time:203.95207405090332ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10720 prompt_cache_len:5151 prompt_cache_ratio:0.48050373134328356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 -DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10891938209533691 s -INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11085367202758789 s -DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=146686470691144052386594523921544551465, time:1750767840.2307036s req_ids:[8] -DEBUG 06-24 20:24:00 [manager.py:391] -ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:207.95512199401855ms total_cost_time:207.99827575683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10721 prompt_cache_len:5151 prompt_cache_ratio:0.4804589124148867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 -DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10887432098388672 s -INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11080503463745117 s -DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=207760888865004936068370717202676193070, time:1750767840.4599965s req_ids:[8] -DEBUG 06-24 20:24:00 [manager.py:391] -ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:218.22381019592285ms total_cost_time:218.26577186584473ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10722 prompt_cache_len:5151 prompt_cache_ratio:0.4804141018466704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 -DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.10783123970031738 s -INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.10993814468383789 s -DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=220613603311843405958996491093145288629, time:1750767840.6702724s req_ids:[8] -DEBUG 06-24 20:24:00 [manager.py:391] -ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:204.83851432800293ms total_cost_time:204.8799991607666ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10723 prompt_cache_len:5151 prompt_cache_ratio:0.4803692996362958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 -DEBUG 06-24 20:24:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:00 [manager.py:224] router recive req id 8 cost time 0.1083531379699707 s -INFO 06-24 20:24:00 [manager.py:68] detokenization recv req id 8 cost time 0.11046481132507324 s -DEBUG 06-24 20:24:00 [manager.py:391] Prefill Batch: batch_id=217772716362626640885134701392074434266, time:1750767840.880608s req_ids:[8] -DEBUG 06-24 20:24:00 [manager.py:391] -ERROR 06-24 20:24:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:210.48569679260254ms total_cost_time:210.53075790405273ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10724 prompt_cache_len:5151 prompt_cache_ratio:0.4803245057814248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 -DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.10857367515563965 s -INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.11055231094360352 s -DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=192468556486101290387226371131989197327, time:1750767841.096291s req_ids:[8] -DEBUG 06-24 20:24:01 [manager.py:391] -ERROR 06-24 20:24:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:00 lightllm_req_id:8 first_token_cost:204.79130744934082ms total_cost_time:204.8332691192627ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10725 prompt_cache_len:5151 prompt_cache_ratio:0.48027972027972027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 -DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.10924100875854492 s -INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.11117362976074219 s -DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=42554461838680618714175192599270902236, time:1750767841.3109107s req_ids:[8] -DEBUG 06-24 20:24:01 [manager.py:391] -ERROR 06-24 20:24:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 first_token_cost:219.66171264648438ms total_cost_time:219.70510482788086ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10726 prompt_cache_len:5151 prompt_cache_ratio:0.4802349431288458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 -DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.10892248153686523 s -INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.11087870597839355 s -DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=199828450158011667643137030113230233506, time:1750767841.5344734s req_ids:[8] -DEBUG 06-24 20:24:01 [manager.py:391] -ERROR 06-24 20:24:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 first_token_cost:199.4497776031494ms total_cost_time:199.4919776916504ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10727 prompt_cache_len:5151 prompt_cache_ratio:0.48019017432646594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 -DEBUG 06-24 20:24:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:01 [manager.py:224] router recive req id 8 cost time 0.31087636947631836 s -INFO 06-24 20:24:01 [manager.py:68] detokenization recv req id 8 cost time 0.31309008598327637 s -DEBUG 06-24 20:24:01 [manager.py:391] Prefill Batch: batch_id=49265467270527786959968994580994255557, time:1750767841.9461951s req_ids:[8] -DEBUG 06-24 20:24:01 [manager.py:391] -ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:01 lightllm_req_id:8 first_token_cost:417.0718193054199ms total_cost_time:417.1159267425537ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10728 prompt_cache_len:5151 prompt_cache_ratio:0.48014541387024606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 -DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10925054550170898 s -INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s -DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=172617693533985912818214671510273411065, time:1750767842.1655045s req_ids:[8] -DEBUG 06-24 20:24:02 [manager.py:391] -ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:194.59009170532227ms total_cost_time:194.63181495666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10729 prompt_cache_len:5151 prompt_cache_ratio:0.48010066175785254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 -DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10784101486206055 s -INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.10970711708068848 s -DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=196427752106409739217779195260570260309, time:1750767842.364776s req_ids:[8] -DEBUG 06-24 20:24:02 [manager.py:391] -ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:201.00688934326172ms total_cost_time:201.0490894317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10730 prompt_cache_len:5151 prompt_cache_ratio:0.48005591798695246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 -DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10786056518554688 s -INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.10976409912109375 s -DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=200100508965565780367737855160578777980, time:1750767842.571383s req_ids:[8] -DEBUG 06-24 20:24:02 [manager.py:391] -DEBUG 06-24 20:24:02 [stats.py:37] Avg tokens(prompt+generate) throughput: 44870.521 tokens/s -DEBUG 06-24 20:24:02 [stats.py:37] Avg prompt tokens throughput: 44862.143 tokens/s -DEBUG 06-24 20:24:02 [stats.py:37] Avg generate tokens throughput: 8.377 tokens/s -ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:217.94700622558594ms total_cost_time:217.9892063140869ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10731 prompt_cache_len:5151 prompt_cache_ratio:0.48001118255521386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 -DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:02 [manager.py:224] router recive req id 8 cost time 0.10897374153137207 s -INFO 06-24 20:24:02 [manager.py:68] detokenization recv req id 8 cost time 0.11100220680236816 s -DEBUG 06-24 20:24:02 [manager.py:391] Prefill Batch: batch_id=51138943565381830818218484550439705829, time:1750767842.7980304s req_ids:[8] -DEBUG 06-24 20:24:02 [manager.py:391] -ERROR 06-24 20:24:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:205.18827438354492ms total_cost_time:205.2314281463623ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10732 prompt_cache_len:5151 prompt_cache_ratio:0.47996645546030564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 -DEBUG 06-24 20:24:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.10789227485656738 s -INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.10982131958007812 s -DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=25388630355155905461096916028627604075, time:1750767843.009123s req_ids:[8] -DEBUG 06-24 20:24:03 [manager.py:391] -ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:02 lightllm_req_id:8 first_token_cost:204.17141914367676ms total_cost_time:204.21481132507324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10733 prompt_cache_len:5151 prompt_cache_ratio:0.4799217366998975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 -DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.1081843376159668 s -INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.1102912425994873 s -DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=201818268034994996651493012410627372106, time:1750767843.2208703s req_ids:[8] -DEBUG 06-24 20:24:03 [manager.py:391] -ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:227.69641876220703ms total_cost_time:227.74076461791992ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10734 prompt_cache_len:5151 prompt_cache_ratio:0.4798770262716601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 -DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.10765838623046875 s -INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.10962891578674316 s -DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=332401348142539793786797219627372981415, time:1750767843.4607944s req_ids:[8] -DEBUG 06-24 20:24:03 [manager.py:391] -ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:371.9935417175293ms total_cost_time:372.0369338989258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10735 prompt_cache_len:5151 prompt_cache_ratio:0.47983232417326505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 -DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:03 [manager.py:224] router recive req id 8 cost time 0.10782670974731445 s -INFO 06-24 20:24:03 [manager.py:68] detokenization recv req id 8 cost time 0.10972452163696289 s -DEBUG 06-24 20:24:03 [manager.py:391] Prefill Batch: batch_id=302604484712196872849682937475553863818, time:1750767843.831173s req_ids:[8] -DEBUG 06-24 20:24:03 [manager.py:391] -ERROR 06-24 20:24:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:205.49607276916504ms total_cost_time:205.53851127624512ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10736 prompt_cache_len:5151 prompt_cache_ratio:0.4797876304023845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 -DEBUG 06-24 20:24:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10784792900085449 s -INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.10971331596374512 s -DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=148748523330451631860314822461356595214, time:1750767844.0504136s req_ids:[8] -DEBUG 06-24 20:24:04 [manager.py:391] -ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:03 lightllm_req_id:8 first_token_cost:210.21509170532227ms total_cost_time:210.27278900146484ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:10737 prompt_cache_len:5151 prompt_cache_ratio:0.4797429449566918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 -DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.11085748672485352 s -INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.11281442642211914 s -DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=317170953728911831547418954705786886814, time:1750767844.260936s req_ids:[8] -DEBUG 06-24 20:24:04 [manager.py:391] -ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:211.01927757263184ms total_cost_time:211.06815338134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:10738 prompt_cache_len:5151 prompt_cache_ratio:0.47969826783386105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 -DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10924696922302246 s -INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.1113121509552002 s -DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=171496180919496861921981393857607261372, time:1750767844.476972s req_ids:[8] -DEBUG 06-24 20:24:04 [manager.py:391] -ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:202.64530181884766ms total_cost_time:202.68988609313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10739 prompt_cache_len:5151 prompt_cache_ratio:0.4796535990315672 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 -DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10798096656799316 s -INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.1098928451538086 s -DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=257501631688147900391998460770447640441, time:1750767844.684372s req_ids:[8] -DEBUG 06-24 20:24:04 [manager.py:391] -ERROR 06-24 20:24:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:233.78276824951172ms total_cost_time:233.8271141052246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10740 prompt_cache_len:5151 prompt_cache_ratio:0.47960893854748604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 -DEBUG 06-24 20:24:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:04 [manager.py:224] router recive req id 8 cost time 0.10812091827392578 s -INFO 06-24 20:24:04 [manager.py:68] detokenization recv req id 8 cost time 0.11015486717224121 s -DEBUG 06-24 20:24:04 [manager.py:391] Prefill Batch: batch_id=160780646666661868479626756908500760630, time:1750767844.9286509s req_ids:[8] -DEBUG 06-24 20:24:04 [manager.py:391] -ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:04 lightllm_req_id:8 first_token_cost:206.7420482635498ms total_cost_time:206.7859172821045ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10741 prompt_cache_len:5151 prompt_cache_ratio:0.47956428637929427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 -DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.10817193984985352 s -INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021161079406738 s -DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=164088075040354100001853579320587468265, time:1750767845.145833s req_ids:[8] -DEBUG 06-24 20:24:05 [manager.py:391] -DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:05 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:364.78161811828613ms total_cost_time:364.8266792297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10742 prompt_cache_len:5151 prompt_cache_ratio:0.4795196425246695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 -DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.10856819152832031 s -INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11040568351745605 s -DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=251211423270579980946242852751081873387, time:1750767845.5102105s req_ids:[8] -DEBUG 06-24 20:24:05 [manager.py:391] -ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:194.28086280822754ms total_cost_time:194.32711601257324ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10743 prompt_cache_len:5151 prompt_cache_ratio:0.47947500698129014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 -DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.10876989364624023 s -INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063671112060547 s -DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=174416136732395153499323649493608178910, time:1750767845.7100375s req_ids:[8] -DEBUG 06-24 20:24:05 [manager.py:391] -ERROR 06-24 20:24:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:198.28391075134277ms total_cost_time:198.32730293273926ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10744 prompt_cache_len:5151 prompt_cache_ratio:0.47943037974683544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 -DEBUG 06-24 20:24:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:05 [manager.py:224] router recive req id 8 cost time 0.1087794303894043 s -INFO 06-24 20:24:05 [manager.py:68] detokenization recv req id 8 cost time 0.11063933372497559 s -DEBUG 06-24 20:24:05 [manager.py:391] Prefill Batch: batch_id=65585749970514938972855377332042222990, time:1750767845.9154105s req_ids:[8] -DEBUG 06-24 20:24:05 [manager.py:391] -ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:05 lightllm_req_id:8 first_token_cost:200.88791847229004ms total_cost_time:200.93178749084473ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10745 prompt_cache_len:5151 prompt_cache_ratio:0.4793857608189856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 -DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.10838985443115234 s -INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11026787757873535 s -DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=142783692068970976280737925044834053913, time:1750767846.1254692s req_ids:[8] -DEBUG 06-24 20:24:06 [manager.py:391] -ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:208.0838680267334ms total_cost_time:208.12487602233887ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10746 prompt_cache_len:5151 prompt_cache_ratio:0.47934115019542156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 -DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.10851097106933594 s -INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s -DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=287238441196153784219250544650975410489, time:1750767846.3405678s req_ids:[8] -DEBUG 06-24 20:24:06 [manager.py:391] -ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:206.56609535217285ms total_cost_time:206.60924911499023ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10747 prompt_cache_len:5151 prompt_cache_ratio:0.47929654787382525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 -DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.10890984535217285 s -INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11104226112365723 s -DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=167911722959536020331312950892771906409, time:1750767846.5506027s req_ids:[8] -DEBUG 06-24 20:24:06 [manager.py:391] -ERROR 06-24 20:24:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:205.22499084472656ms total_cost_time:205.26432991027832ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:10748 prompt_cache_len:5151 prompt_cache_ratio:0.4792519538518794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 -DEBUG 06-24 20:24:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:06 [manager.py:224] router recive req id 8 cost time 0.11003661155700684 s -INFO 06-24 20:24:06 [manager.py:68] detokenization recv req id 8 cost time 0.11189913749694824 s -DEBUG 06-24 20:24:06 [manager.py:391] Prefill Batch: batch_id=130344737612505861085711954689333776351, time:1750767846.774207s req_ids:[8] -DEBUG 06-24 20:24:06 [manager.py:391] -ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:06 lightllm_req_id:8 first_token_cost:384.3824863433838ms total_cost_time:384.4285011291504ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10749 prompt_cache_len:5151 prompt_cache_ratio:0.47920736812726766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 -DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10897707939147949 s -INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.11103510856628418 s -DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=182038272605659286343312156967195314663, time:1750767847.1523287s req_ids:[8] -DEBUG 06-24 20:24:07 [manager.py:391] -ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:204.18739318847656ms total_cost_time:204.2226791381836ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:10750 prompt_cache_len:5151 prompt_cache_ratio:0.4791627906976744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 -DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10920238494873047 s -INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.1112985610961914 s -DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=291382247164871731104609929825451669320, time:1750767847.363522s req_ids:[8] -DEBUG 06-24 20:24:07 [manager.py:391] -ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:205.8556079864502ms total_cost_time:205.89971542358398ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10751 prompt_cache_len:5151 prompt_cache_ratio:0.47911822156078504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 -DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10832095146179199 s -INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.10988473892211914 s -DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=16592229574058624099318811922377014957, time:1750767847.5763505s req_ids:[8] -DEBUG 06-24 20:24:07 [manager.py:391] -ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:165.92049598693848ms total_cost_time:165.96102714538574ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10752 prompt_cache_len:5151 prompt_cache_ratio:0.4790736607142857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 -DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10703134536743164 s -INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.1085355281829834 s -DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=139637892613551318759074546906306002270, time:1750767847.7476578s req_ids:[8] -DEBUG 06-24 20:24:07 [manager.py:391] -ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:163.75112533569336ms total_cost_time:163.77949714660645ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:10753 prompt_cache_len:5151 prompt_cache_ratio:0.47902910815586347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 -DEBUG 06-24 20:24:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:07 [manager.py:224] router recive req id 8 cost time 0.10895895957946777 s -INFO 06-24 20:24:07 [manager.py:68] detokenization recv req id 8 cost time 0.11090588569641113 s -DEBUG 06-24 20:24:07 [manager.py:391] Prefill Batch: batch_id=249652190936004082800217767480564630748, time:1750767847.9178398s req_ids:[8] -DEBUG 06-24 20:24:07 [manager.py:391] -ERROR 06-24 20:24:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:07 lightllm_req_id:8 first_token_cost:194.7028636932373ms total_cost_time:194.7472095489502ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10754 prompt_cache_len:5151 prompt_cache_ratio:0.47898456388320626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 -DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10929250717163086 s -INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.11130595207214355 s -DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=221781913911829846660381239241377729184, time:1750767848.1203308s req_ids:[8] -DEBUG 06-24 20:24:08 [manager.py:391] -ERROR 06-24 20:24:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:205.70755004882812ms total_cost_time:205.75332641601562ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10755 prompt_cache_len:5151 prompt_cache_ratio:0.4789400278940028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 -DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10740208625793457 s -INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.10946083068847656 s -DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=302761909522084610481286261636426979422, time:1750767848.3341181s req_ids:[8] -DEBUG 06-24 20:24:08 [manager.py:391] -ERROR 06-24 20:24:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:209.04994010925293ms total_cost_time:209.09428596496582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10756 prompt_cache_len:5151 prompt_cache_ratio:0.47889550018594274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 -DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s -INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.11092185974121094 s -DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=89202504642758512683915851653559567091, time:1750767848.5532866s req_ids:[8] -DEBUG 06-24 20:24:08 [manager.py:391] -ERROR 06-24 20:24:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:370.56446075439453ms total_cost_time:370.6066608428955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10757 prompt_cache_len:5151 prompt_cache_ratio:0.47885098075671656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 -DEBUG 06-24 20:24:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:08 [batch.py:51] router release req id 8 -INFO 06-24 20:24:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:08 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s -INFO 06-24 20:24:08 [manager.py:68] detokenization recv req id 8 cost time 0.10974907875061035 s -DEBUG 06-24 20:24:08 [manager.py:391] Prefill Batch: batch_id=289694805972595820869536730231962615714, time:1750767848.9226048s req_ids:[8] -DEBUG 06-24 20:24:08 [manager.py:391] -ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:08 lightllm_req_id:8 first_token_cost:204.80799674987793ms total_cost_time:204.8506736755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10758 prompt_cache_len:5151 prompt_cache_ratio:0.4788064696040156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 -DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.1088418960571289 s -INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11078190803527832 s -DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=293912928956826575103246580631463832972, time:1750767849.134654s req_ids:[8] -DEBUG 06-24 20:24:09 [manager.py:391] -ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:206.44164085388184ms total_cost_time:206.4833641052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10759 prompt_cache_len:5151 prompt_cache_ratio:0.4787619667255321 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 -DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.10816764831542969 s -INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11033034324645996 s -DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=212794437148878723669159633300920283150, time:1750767849.3490028s req_ids:[8] -DEBUG 06-24 20:24:09 [manager.py:391] -ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:234.96198654174805ms total_cost_time:235.00633239746094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10760 prompt_cache_len:5151 prompt_cache_ratio:0.4787174721189591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 -DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.10879993438720703 s -INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11084747314453125 s -DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=109239641811708365810478842460697069849, time:1750767849.5909824s req_ids:[8] -DEBUG 06-24 20:24:09 [manager.py:391] -ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:208.17112922668457ms total_cost_time:208.21261405944824ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10761 prompt_cache_len:5151 prompt_cache_ratio:0.4786729857819905 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 -DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:09 [manager.py:224] router recive req id 8 cost time 0.10876655578613281 s -INFO 06-24 20:24:09 [manager.py:68] detokenization recv req id 8 cost time 0.11078906059265137 s -DEBUG 06-24 20:24:09 [manager.py:391] Prefill Batch: batch_id=98552917317254311449898763242008320623, time:1750767849.8021007s req_ids:[8] -DEBUG 06-24 20:24:09 [manager.py:391] -ERROR 06-24 20:24:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:205.33418655395508ms total_cost_time:205.37662506103516ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10762 prompt_cache_len:5151 prompt_cache_ratio:0.4786285077123211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 -DEBUG 06-24 20:24:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10750269889831543 s -INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.10943102836608887 s -DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=127564470252352346717876962623455585077, time:1750767850.0156348s req_ids:[8] -DEBUG 06-24 20:24:10 [manager.py:391] -ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:09 lightllm_req_id:8 first_token_cost:209.6536159515381ms total_cost_time:209.69867706298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10763 prompt_cache_len:5151 prompt_cache_ratio:0.4785840379076466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 -DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10892701148986816 s -INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.11088204383850098 s -DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=150868658036208617431732372825691779449, time:1750767850.2288513s req_ids:[8] -DEBUG 06-24 20:24:10 [manager.py:391] -ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:199.51152801513672ms total_cost_time:199.5561122894287ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10764 prompt_cache_len:5151 prompt_cache_ratio:0.4785395763656633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 -DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10897326469421387 s -INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.1100163459777832 s -DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=71380276209538384830328682670042530080, time:1750767850.436105s req_ids:[8] -DEBUG 06-24 20:24:10 [manager.py:391] -ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:353.6221981048584ms total_cost_time:353.6550998687744ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:10765 prompt_cache_len:5151 prompt_cache_ratio:0.4784951230840687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 -DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.11044502258300781 s -INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.11248135566711426 s -DEBUG 06-24 20:24:10 [manager.py:391] Prefill Batch: batch_id=75211316245926906009737088829900074981, time:1750767850.7941175s req_ids:[8] -DEBUG 06-24 20:24:10 [manager.py:391] -ERROR 06-24 20:24:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:202.8036117553711ms total_cost_time:202.84605026245117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10766 prompt_cache_len:5151 prompt_cache_ratio:0.478450678060561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 -DEBUG 06-24 20:24:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:10 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s -INFO 06-24 20:24:10 [manager.py:68] detokenization recv req id 8 cost time 0.11000227928161621 s -DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=168022682579162153315700332558665629202, time:1750767851.0038307s req_ids:[8] -DEBUG 06-24 20:24:11 [manager.py:391] -DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:11 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:10 lightllm_req_id:8 first_token_cost:208.07123184204102ms total_cost_time:208.1143856048584ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10767 prompt_cache_len:5151 prompt_cache_ratio:0.4784062412928392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 -DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s -INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.11089801788330078 s -DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=126051779508853090757710073925539046807, time:1750767851.2241144s req_ids:[8] -DEBUG 06-24 20:24:11 [manager.py:391] -ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:213.58489990234375ms total_cost_time:213.61231803894043ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:10768 prompt_cache_len:5151 prompt_cache_ratio:0.4783618127786033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 -DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10929489135742188 s -INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.1110990047454834 s -DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=290568228657800051681832406438492987728, time:1750767851.4583247s req_ids:[8] -DEBUG 06-24 20:24:11 [manager.py:391] -ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:226.86028480529785ms total_cost_time:226.90320014953613ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10769 prompt_cache_len:5151 prompt_cache_ratio:0.4783173925155539 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 -DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10819077491760254 s -INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s -DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=188288922335868629308890191897948687337, time:1750767851.6718009s req_ids:[8] -DEBUG 06-24 20:24:11 [manager.py:391] -ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:207.83400535583496ms total_cost_time:207.87644386291504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10770 prompt_cache_len:5151 prompt_cache_ratio:0.4782729805013928 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 -DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:11 [manager.py:224] router recive req id 8 cost time 0.10868310928344727 s -INFO 06-24 20:24:11 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s -DEBUG 06-24 20:24:11 [manager.py:391] Prefill Batch: batch_id=42682203722834391342788395589040948005, time:1750767851.8861687s req_ids:[8] -DEBUG 06-24 20:24:11 [manager.py:391] -ERROR 06-24 20:24:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:194.88120079040527ms total_cost_time:194.92244720458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10771 prompt_cache_len:5151 prompt_cache_ratio:0.4782285767338223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 -DEBUG 06-24 20:24:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10906195640563965 s -INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.11097097396850586 s -DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=194133092976506195659437451908033849594, time:1750767852.086288s req_ids:[8] -DEBUG 06-24 20:24:12 [manager.py:391] -ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:11 lightllm_req_id:8 first_token_cost:202.56447792053223ms total_cost_time:202.6052474975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10772 prompt_cache_len:5151 prompt_cache_ratio:0.47818418121054584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 -DEBUG 06-24 20:24:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10802173614501953 s -INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.11005139350891113 s -DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=172318334408079033269481663472478622004, time:1750767852.297389s req_ids:[8] -DEBUG 06-24 20:24:12 [manager.py:391] -ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:209.05041694641113ms total_cost_time:209.0930938720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10773 prompt_cache_len:5151 prompt_cache_ratio:0.4781397939292676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 -DEBUG 06-24 20:24:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10735440254211426 s -INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.10921335220336914 s -DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=129358916107991694452287585063631588375, time:1750767852.5187638s req_ids:[8] -DEBUG 06-24 20:24:12 [manager.py:391] -ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:24:12 [stats.py:37] Avg tokens(prompt+generate) throughput: 45365.636 tokens/s -DEBUG 06-24 20:24:12 [stats.py:37] Avg prompt tokens throughput: 45357.102 tokens/s -DEBUG 06-24 20:24:12 [stats.py:37] Avg generate tokens throughput: 8.534 tokens/s -INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:371.4449405670166ms total_cost_time:371.4883327484131ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10774 prompt_cache_len:5151 prompt_cache_ratio:0.4780954148876926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 -DEBUG 06-24 20:24:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:12 [manager.py:224] router recive req id 8 cost time 0.10786890983581543 s -INFO 06-24 20:24:12 [manager.py:68] detokenization recv req id 8 cost time 0.1098935604095459 s -DEBUG 06-24 20:24:12 [manager.py:391] Prefill Batch: batch_id=209086027277713716773403183486383613773, time:1750767852.8877873s req_ids:[8] -DEBUG 06-24 20:24:12 [manager.py:391] -ERROR 06-24 20:24:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:215.61050415039062ms total_cost_time:215.64030647277832ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:10775 prompt_cache_len:5151 prompt_cache_ratio:0.47805104408352667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 -DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10780024528503418 s -INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.10969948768615723 s -DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=63163234909618854643246430838169750122, time:1750767853.108974s req_ids:[8] -DEBUG 06-24 20:24:13 [manager.py:391] -ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:12 lightllm_req_id:8 first_token_cost:205.3239345550537ms total_cost_time:205.3661346435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10776 prompt_cache_len:5151 prompt_cache_ratio:0.4780066815144766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 -DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10835409164428711 s -INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.11039900779724121 s -DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=182144463427977873176742853175542331291, time:1750767853.3218887s req_ids:[8] -DEBUG 06-24 20:24:13 [manager.py:391] -ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:207.7767848968506ms total_cost_time:207.81636238098145ms,out_token_counter:1 mean_per_token_cost_time: 0.039577484130859375ms prompt_token_num:10777 prompt_cache_len:5151 prompt_cache_ratio:0.47796232717825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 -DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.1089780330657959 s -INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.11080503463745117 s -DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=200079961560476587568593564455483042450, time:1750767853.535132s req_ids:[8] -DEBUG 06-24 20:24:13 [manager.py:391] -ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:184.2961311340332ms total_cost_time:184.33713912963867ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10778 prompt_cache_len:5151 prompt_cache_ratio:0.47791798107255523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 -DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10849308967590332 s -INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.11051130294799805 s -DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=78306049111434984175527622770959907871, time:1750767853.7255378s req_ids:[8] -DEBUG 06-24 20:24:13 [manager.py:391] -ERROR 06-24 20:24:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:201.8263339996338ms total_cost_time:201.8873691558838ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:10779 prompt_cache_len:5151 prompt_cache_ratio:0.47787364319510156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 -DEBUG 06-24 20:24:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:13 [manager.py:224] router recive req id 8 cost time 0.10805559158325195 s -INFO 06-24 20:24:13 [manager.py:68] detokenization recv req id 8 cost time 0.10995292663574219 s -DEBUG 06-24 20:24:13 [manager.py:391] Prefill Batch: batch_id=98419043005070836349699499373112244255, time:1750767853.9336083s req_ids:[8] -DEBUG 06-24 20:24:13 [manager.py:391] -ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:13 lightllm_req_id:8 first_token_cost:224.86233711242676ms total_cost_time:224.90715980529785ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10780 prompt_cache_len:5151 prompt_cache_ratio:0.47782931354359925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 -DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.10914802551269531 s -INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.1110680103302002 s -DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=71978612241389346520688122590416603505, time:1750767854.1627066s req_ids:[8] -DEBUG 06-24 20:24:14 [manager.py:391] -ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:201.45845413208008ms total_cost_time:201.50208473205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10781 prompt_cache_len:5151 prompt_cache_ratio:0.4777849921157592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 -DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.1086421012878418 s -INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.11053013801574707 s -DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=48498053969596050254934332781112936155, time:1750767854.3718393s req_ids:[8] -DEBUG 06-24 20:24:14 [manager.py:391] -ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:372.7872371673584ms total_cost_time:372.8322982788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10782 prompt_cache_len:5151 prompt_cache_ratio:0.4777406789092933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 -DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.10810112953186035 s -INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.1100468635559082 s -DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=166070689113236393782053105208444626906, time:1750767854.751016s req_ids:[8] -DEBUG 06-24 20:24:14 [manager.py:391] -ERROR 06-24 20:24:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:205.60145378112793ms total_cost_time:205.64532279968262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10783 prompt_cache_len:5151 prompt_cache_ratio:0.4776963739219141 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 -DEBUG 06-24 20:24:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:14 [manager.py:224] router recive req id 8 cost time 0.10870075225830078 s -INFO 06-24 20:24:14 [manager.py:68] detokenization recv req id 8 cost time 0.11064267158508301 s -DEBUG 06-24 20:24:14 [manager.py:391] Prefill Batch: batch_id=142801626985577905746739838597226807366, time:1750767854.9640563s req_ids:[8] -DEBUG 06-24 20:24:14 [manager.py:391] -ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:14 lightllm_req_id:8 first_token_cost:215.37184715270996ms total_cost_time:215.41619300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10784 prompt_cache_len:5151 prompt_cache_ratio:0.4776520771513353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 -DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s -INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022472381591797 s -DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=264430456725722859580643647089455610414, time:1750767855.1835248s req_ids:[8] -DEBUG 06-24 20:24:15 [manager.py:391] -ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:199.48792457580566ms total_cost_time:199.53036308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10785 prompt_cache_len:5151 prompt_cache_ratio:0.4776077885952712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 -DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10924196243286133 s -INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11119627952575684 s -DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=62600360220748935168774393988508228985, time:1750767855.3931115s req_ids:[8] -DEBUG 06-24 20:24:15 [manager.py:391] -ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:202.95238494873047ms total_cost_time:202.98075675964355ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:10786 prompt_cache_len:5151 prompt_cache_ratio:0.47756350825143706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 -DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10815954208374023 s -INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11017441749572754 s -DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=9818516937104409440033434300834038600, time:1750767855.601111s req_ids:[8] -DEBUG 06-24 20:24:15 [manager.py:391] -ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:206.97450637817383ms total_cost_time:207.017183303833ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10787 prompt_cache_len:5151 prompt_cache_ratio:0.47751923611754893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 -DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:15 [manager.py:224] router recive req id 8 cost time 0.10911917686462402 s -INFO 06-24 20:24:15 [manager.py:68] detokenization recv req id 8 cost time 0.11114931106567383 s -DEBUG 06-24 20:24:15 [manager.py:391] Prefill Batch: batch_id=227450569402351813193579353115699229835, time:1750767855.8142185s req_ids:[8] -DEBUG 06-24 20:24:15 [manager.py:391] -ERROR 06-24 20:24:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:208.82534980773926ms total_cost_time:208.86969566345215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10788 prompt_cache_len:5151 prompt_cache_ratio:0.4774749721913237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 -DEBUG 06-24 20:24:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10760331153869629 s -INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.10959053039550781 s -DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=189305704757693236948657209878271655758, time:1750767856.0288367s req_ids:[8] -DEBUG 06-24 20:24:16 [manager.py:391] -ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:15 lightllm_req_id:8 first_token_cost:205.36494255065918ms total_cost_time:205.40881156921387ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10789 prompt_cache_len:5151 prompt_cache_ratio:0.4774307164704792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 -DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10852360725402832 s -INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.11055135726928711 s -DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=97795670632794732542871006523381587464, time:1750767856.2398174s req_ids:[8] -DEBUG 06-24 20:24:16 [manager.py:391] -ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:385.1635456085205ms total_cost_time:385.2086067199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10790 prompt_cache_len:5151 prompt_cache_ratio:0.477386468952734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 -DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10811853408813477 s -INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.11014723777770996 s -DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=245601044069298272555660850083387609869, time:1750767856.6311982s req_ids:[8] -DEBUG 06-24 20:24:16 [manager.py:391] -ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:206.85887336730957ms total_cost_time:206.90083503723145ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10791 prompt_cache_len:5151 prompt_cache_ratio:0.47734222963580764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 -DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:16 [manager.py:224] router recive req id 8 cost time 0.10914969444274902 s -INFO 06-24 20:24:16 [manager.py:68] detokenization recv req id 8 cost time 0.11104822158813477 s -DEBUG 06-24 20:24:16 [manager.py:391] Prefill Batch: batch_id=208715796897808203111431737026342476467, time:1750767856.8446205s req_ids:[8] -DEBUG 06-24 20:24:16 [manager.py:391] -DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:16 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:199.44477081298828ms total_cost_time:199.48983192443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10792 prompt_cache_len:5151 prompt_cache_ratio:0.47729799851742033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 -DEBUG 06-24 20:24:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:16 [batch.py:51] router release req id 8 -INFO 06-24 20:24:16 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10918974876403809 s -INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.111083984375 s -DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=234866872768250299188643969761781890590, time:1750767857.0524092s req_ids:[8] -DEBUG 06-24 20:24:17 [manager.py:391] -ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:16 lightllm_req_id:8 first_token_cost:207.40747451782227ms total_cost_time:207.43608474731445ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:10793 prompt_cache_len:5151 prompt_cache_ratio:0.47725377559529325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 -DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10675525665283203 s -INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.10866498947143555 s -DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=18986666977046008601585727637914488933, time:1750767857.265152s req_ids:[8] -DEBUG 06-24 20:24:17 [manager.py:391] -ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:207.75175094604492ms total_cost_time:207.794189453125ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10794 prompt_cache_len:5151 prompt_cache_ratio:0.47720956086714844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 -DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10979485511779785 s -INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.11211514472961426 s -DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=115280758120090967896910607412664957338, time:1750767857.4821784s req_ids:[8] -DEBUG 06-24 20:24:17 [manager.py:391] -ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:211.4264965057373ms total_cost_time:211.4698886871338ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10795 prompt_cache_len:5151 prompt_cache_ratio:0.47716535433070867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 -DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10953760147094727 s -INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.11128616333007812 s -DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=68331389803577880124722455639779393750, time:1750767857.7135818s req_ids:[8] -DEBUG 06-24 20:24:17 [manager.py:391] -ERROR 06-24 20:24:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:220.9789752960205ms total_cost_time:221.0237979888916ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10796 prompt_cache_len:5151 prompt_cache_ratio:0.47712115598369764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 -DEBUG 06-24 20:24:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:17 [manager.py:224] router recive req id 8 cost time 0.10817432403564453 s -INFO 06-24 20:24:17 [manager.py:68] detokenization recv req id 8 cost time 0.11014366149902344 s -DEBUG 06-24 20:24:17 [manager.py:391] Prefill Batch: batch_id=259004581662586880751021761513001114818, time:1750767857.9310448s req_ids:[8] -DEBUG 06-24 20:24:17 [manager.py:391] -ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:17 lightllm_req_id:8 first_token_cost:216.38154983520508ms total_cost_time:216.42494201660156ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10797 prompt_cache_len:5151 prompt_cache_ratio:0.47707696582383996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 -DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:18 [manager.py:224] router recive req id 8 cost time 0.10811018943786621 s -INFO 06-24 20:24:18 [manager.py:68] detokenization recv req id 8 cost time 0.11003661155700684 s -DEBUG 06-24 20:24:18 [manager.py:391] Prefill Batch: batch_id=270475521476875308147399873503183429687, time:1750767858.161151s req_ids:[8] -DEBUG 06-24 20:24:18 [manager.py:391] -ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:416.7647361755371ms total_cost_time:416.8078899383545ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10798 prompt_cache_len:5151 prompt_cache_ratio:0.4770327838488609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 -DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:18 [manager.py:224] router recive req id 8 cost time 0.1099541187286377 s -INFO 06-24 20:24:18 [manager.py:68] detokenization recv req id 8 cost time 0.11181950569152832 s -DEBUG 06-24 20:24:18 [manager.py:391] Prefill Batch: batch_id=224147780121581683835399099229732185137, time:1750767858.570062s req_ids:[8] -DEBUG 06-24 20:24:18 [manager.py:391] -ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:204.58221435546875ms total_cost_time:204.62608337402344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10799 prompt_cache_len:5151 prompt_cache_ratio:0.47698861005648674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 -DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:18 [manager.py:224] router recive req id 8 cost time 0.10919642448425293 s -INFO 06-24 20:24:18 [manager.py:68] detokenization recv req id 8 cost time 0.11118865013122559 s -DEBUG 06-24 20:24:18 [manager.py:391] Prefill Batch: batch_id=291544573839569231486880046926706173409, time:1750767858.7961416s req_ids:[8] -DEBUG 06-24 20:24:18 [manager.py:391] -ERROR 06-24 20:24:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:223.49834442138672ms total_cost_time:223.5417366027832ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10800 prompt_cache_len:5151 prompt_cache_ratio:0.47694444444444445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 -DEBUG 06-24 20:24:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10874152183532715 s -INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.1107320785522461 s -DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=126882741137505825170664382651963786491, time:1750767859.034263s req_ids:[8] -DEBUG 06-24 20:24:19 [manager.py:391] -ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:18 lightllm_req_id:8 first_token_cost:230.8969497680664ms total_cost_time:230.9410572052002ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10801 prompt_cache_len:5151 prompt_cache_ratio:0.476900287010462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 -DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10783863067626953 s -INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.1097707748413086 s -DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=33274191379613403005663937931994357244, time:1750767859.2522402s req_ids:[8] -DEBUG 06-24 20:24:19 [manager.py:391] -ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:209.30790901184082ms total_cost_time:209.3510627746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10802 prompt_cache_len:5151 prompt_cache_ratio:0.4768561377522681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 -DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:19 [batch.py:51] router release req id 8 -INFO 06-24 20:24:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10868239402770996 s -INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.11055159568786621 s -DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=110838222292944246721338427152658399573, time:1750767859.4667668s req_ids:[8] -DEBUG 06-24 20:24:19 [manager.py:391] -ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:206.70247077941895ms total_cost_time:206.74514770507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10803 prompt_cache_len:5151 prompt_cache_ratio:0.47681199666759233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 -DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10910487174987793 s -INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.1110539436340332 s -DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=73995933859631482736998456554585832709, time:1750767859.6807106s req_ids:[8] -DEBUG 06-24 20:24:19 [manager.py:391] -ERROR 06-24 20:24:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:206.10594749450684ms total_cost_time:206.16722106933594ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:10804 prompt_cache_len:5151 prompt_cache_ratio:0.47676786375416513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 -DEBUG 06-24 20:24:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:19 [manager.py:224] router recive req id 8 cost time 0.10785245895385742 s -INFO 06-24 20:24:19 [manager.py:68] detokenization recv req id 8 cost time 0.10981202125549316 s -DEBUG 06-24 20:24:19 [manager.py:391] Prefill Batch: batch_id=269082745501199068373247816502917595734, time:1750767859.8946364s req_ids:[8] -DEBUG 06-24 20:24:19 [manager.py:391] -ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:19 lightllm_req_id:8 first_token_cost:223.7532138824463ms total_cost_time:223.79493713378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10805 prompt_cache_len:5151 prompt_cache_ratio:0.4767237390097177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 -DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.31000757217407227 s -INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.3120899200439453 s -DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=128583745450943816864648631344109646878, time:1750767860.3265762s req_ids:[8] -DEBUG 06-24 20:24:20 [manager.py:391] -ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:422.09768295288086ms total_cost_time:422.13964462280273ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10806 prompt_cache_len:5151 prompt_cache_ratio:0.47667962243198225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 -DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.1086423397064209 s -INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.11054801940917969 s -DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=131178656667762733696958885419669019068, time:1750767860.5643415s req_ids:[8] -DEBUG 06-24 20:24:20 [manager.py:391] -ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:214.53619003295898ms total_cost_time:214.57886695861816ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10807 prompt_cache_len:5151 prompt_cache_ratio:0.4766355140186916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 -DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.10837078094482422 s -INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.11042451858520508 s -DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=124617747324273846603562059079430451410, time:1750767860.7769513s req_ids:[8] -DEBUG 06-24 20:24:20 [manager.py:391] -ERROR 06-24 20:24:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:201.93123817443848ms total_cost_time:201.97319984436035ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10808 prompt_cache_len:5151 prompt_cache_ratio:0.4765914137675796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 -DEBUG 06-24 20:24:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:20 [manager.py:224] router recive req id 8 cost time 0.10834479331970215 s -INFO 06-24 20:24:20 [manager.py:68] detokenization recv req id 8 cost time 0.1102609634399414 s -DEBUG 06-24 20:24:20 [manager.py:391] Prefill Batch: batch_id=186971792895048784046568185742548046328, time:1750767860.9850655s req_ids:[8] -DEBUG 06-24 20:24:20 [manager.py:391] -ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:20 lightllm_req_id:8 first_token_cost:200.1030445098877ms total_cost_time:200.14476776123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10809 prompt_cache_len:5151 prompt_cache_ratio:0.4765473216763808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 -DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:21 [manager.py:224] router recive req id 8 cost time 0.10957145690917969 s -INFO 06-24 20:24:21 [manager.py:68] detokenization recv req id 8 cost time 0.11165475845336914 s -DEBUG 06-24 20:24:21 [manager.py:391] Prefill Batch: batch_id=181522422479894113185170750578680887712, time:1750767861.1932395s req_ids:[8] -DEBUG 06-24 20:24:21 [manager.py:391] -ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:202.08168029785156ms total_cost_time:202.12459564208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10810 prompt_cache_len:5151 prompt_cache_ratio:0.4765032377428307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 -DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:21 [manager.py:224] router recive req id 8 cost time 0.10766005516052246 s -INFO 06-24 20:24:21 [manager.py:68] detokenization recv req id 8 cost time 0.10960054397583008 s -DEBUG 06-24 20:24:21 [manager.py:391] Prefill Batch: batch_id=297937917307387728715662755789434559943, time:1750767861.4026966s req_ids:[8] -DEBUG 06-24 20:24:21 [manager.py:391] -ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:190.96922874450684ms total_cost_time:191.01333618164062ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10811 prompt_cache_len:5151 prompt_cache_ratio:0.47645916196466565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 -DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:21 [manager.py:224] router recive req id 8 cost time 0.10881233215332031 s -INFO 06-24 20:24:21 [manager.py:68] detokenization recv req id 8 cost time 0.11065316200256348 s -DEBUG 06-24 20:24:21 [manager.py:391] Prefill Batch: batch_id=236401982470537470583050246191566533518, time:1750767861.6034505s req_ids:[8] -DEBUG 06-24 20:24:21 [manager.py:391] -ERROR 06-24 20:24:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:198.31109046936035ms total_cost_time:198.35472106933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10812 prompt_cache_len:5151 prompt_cache_ratio:0.47641509433962265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 -DEBUG 06-24 20:24:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.3103361129760742 s -INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.3123133182525635 s -DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=136728510666566027234889955202915071360, time:1750767862.0228534s req_ids:[8] -DEBUG 06-24 20:24:22 [manager.py:391] -ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:21 lightllm_req_id:8 first_token_cost:426.8004894256592ms total_cost_time:426.84412002563477ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10813 prompt_cache_len:5151 prompt_cache_ratio:0.47637103486543975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 -DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.1089932918548584 s -INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.11092209815979004 s -DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=2662429270266680719154682225776745589, time:1750767862.2457292s req_ids:[8] -DEBUG 06-24 20:24:22 [manager.py:391] -ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:206.21681213378906ms total_cost_time:206.26091957092285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10814 prompt_cache_len:5151 prompt_cache_ratio:0.47632698353985575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 -DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.1079854965209961 s -INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.10985994338989258 s -DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=164783899033060553524355598594921154399, time:1750767862.4712558s req_ids:[8] -DEBUG 06-24 20:24:22 [manager.py:391] -ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:219.87175941467285ms total_cost_time:219.91634368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10815 prompt_cache_len:5151 prompt_cache_ratio:0.47628294036061025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 -DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.10911345481872559 s -INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.1111457347869873 s -DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=24853920658371752522957514594811717838, time:1750767862.6859393s req_ids:[8] -DEBUG 06-24 20:24:22 [manager.py:391] -ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:187.76917457580566ms total_cost_time:187.81208992004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10816 prompt_cache_len:5151 prompt_cache_ratio:0.4762389053254438 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 -DEBUG 06-24 20:24:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:22 [manager.py:224] router recive req id 8 cost time 0.10830020904541016 s -INFO 06-24 20:24:22 [manager.py:68] detokenization recv req id 8 cost time 0.1102604866027832 s -DEBUG 06-24 20:24:22 [manager.py:391] Prefill Batch: batch_id=208016824970155666301884642583086207468, time:1750767862.8779793s req_ids:[8] -DEBUG 06-24 20:24:22 [manager.py:391] -DEBUG 06-24 20:24:22 [stats.py:37] Avg tokens(prompt+generate) throughput: 45915.352 tokens/s -DEBUG 06-24 20:24:22 [stats.py:37] Avg prompt tokens throughput: 45906.947 tokens/s -DEBUG 06-24 20:24:22 [stats.py:37] Avg generate tokens throughput: 8.406 tokens/s -ERROR 06-24 20:24:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:199.46599006652832ms total_cost_time:199.50628280639648ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10817 prompt_cache_len:5151 prompt_cache_ratio:0.4761948784320976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 -DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10837912559509277 s -INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.11026453971862793 s -DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=64780947764510240840013128748340278325, time:1750767863.0952823s req_ids:[8] -DEBUG 06-24 20:24:23 [manager.py:391] -ERROR 06-24 20:24:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:22 lightllm_req_id:8 first_token_cost:225.15082359313965ms total_cost_time:225.19636154174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10818 prompt_cache_len:5151 prompt_cache_ratio:0.47615085967831394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 -INFO 06-24 20:24:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:24:23 [statics_utils.py:24] mean first cost: 228.4070945117157 ms -INFO 06-24 20:24:23 [statics_utils.py:24] mean per token cost: 0.06619702225697116 ms -DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10828232765197754 s -INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s -DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=201399538075378348725515446597072728738, time:1750767863.3216166s req_ids:[8] -DEBUG 06-24 20:24:23 [manager.py:391] -ERROR 06-24 20:24:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 first_token_cost:210.47711372375488ms total_cost_time:210.51859855651855ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10819 prompt_cache_len:5151 prompt_cache_ratio:0.47610684906183565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 -DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10920262336730957 s -INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.11110401153564453 s -DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=323737998160608752689168661959055636562, time:1750767863.5333922s req_ids:[8] -DEBUG 06-24 20:24:23 [manager.py:391] -ERROR 06-24 20:24:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 first_token_cost:377.899169921875ms total_cost_time:377.9451847076416ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10820 prompt_cache_len:5151 prompt_cache_ratio:0.47606284658040665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 -DEBUG 06-24 20:24:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:23 [manager.py:224] router recive req id 8 cost time 0.10791707038879395 s -INFO 06-24 20:24:23 [manager.py:68] detokenization recv req id 8 cost time 0.10990691184997559 s -DEBUG 06-24 20:24:23 [manager.py:391] Prefill Batch: batch_id=247958635171693639557141313031594123404, time:1750767863.9181013s req_ids:[8] -DEBUG 06-24 20:24:23 [manager.py:391] -ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:23 lightllm_req_id:8 first_token_cost:206.82406425476074ms total_cost_time:206.8498134613037ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:10821 prompt_cache_len:5151 prompt_cache_ratio:0.47601885223177154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 -DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10807561874389648 s -INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11005020141601562 s -DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=254421710127020056069583577876363249980, time:1750767864.1325543s req_ids:[8] -DEBUG 06-24 20:24:24 [manager.py:391] -ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:196.9776153564453ms total_cost_time:197.0210075378418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10822 prompt_cache_len:5151 prompt_cache_ratio:0.47597486601367583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 -DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10811567306518555 s -INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.10999608039855957 s -DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=76938493226455344663435973903319391108, time:1750767864.3362625s req_ids:[8] -DEBUG 06-24 20:24:24 [manager.py:391] -ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:198.12774658203125ms total_cost_time:198.17018508911133ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10823 prompt_cache_len:5151 prompt_cache_ratio:0.47593088792386584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 -DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10819172859191895 s -INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11019611358642578 s -DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=268485964219194106055190182159183929100, time:1750767864.540481s req_ids:[8] -DEBUG 06-24 20:24:24 [manager.py:391] -ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:210.16478538513184ms total_cost_time:210.21175384521484ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:10824 prompt_cache_len:5151 prompt_cache_ratio:0.4758869179600887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 -DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10902976989746094 s -INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11102294921875 s -DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=135887280350873173955036710447765478091, time:1750767864.76122s req_ids:[8] -DEBUG 06-24 20:24:24 [manager.py:391] -ERROR 06-24 20:24:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:205.72972297668457ms total_cost_time:205.77311515808105ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10825 prompt_cache_len:5151 prompt_cache_ratio:0.47584295612009236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 -DEBUG 06-24 20:24:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:24 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s -INFO 06-24 20:24:24 [manager.py:68] detokenization recv req id 8 cost time 0.11053133010864258 s -DEBUG 06-24 20:24:24 [manager.py:391] Prefill Batch: batch_id=154818544692671508774787500468294520812, time:1750767864.973172s req_ids:[8] -DEBUG 06-24 20:24:24 [manager.py:391] -ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:24 lightllm_req_id:8 first_token_cost:206.52246475219727ms total_cost_time:206.56847953796387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10826 prompt_cache_len:5151 prompt_cache_ratio:0.4757990024016257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 -DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:25 [manager.py:224] router recive req id 8 cost time 0.10811471939086914 s -INFO 06-24 20:24:25 [manager.py:68] detokenization recv req id 8 cost time 0.1102144718170166 s -DEBUG 06-24 20:24:25 [manager.py:391] Prefill Batch: batch_id=326911286291237827358936726961541192141, time:1750767865.1901429s req_ids:[8] -DEBUG 06-24 20:24:25 [manager.py:391] -ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:380.7556629180908ms total_cost_time:380.8012008666992ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10827 prompt_cache_len:5151 prompt_cache_ratio:0.47575505680243835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 -DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:25 [manager.py:224] router recive req id 8 cost time 0.10895633697509766 s -INFO 06-24 20:24:25 [manager.py:68] detokenization recv req id 8 cost time 0.11104869842529297 s -DEBUG 06-24 20:24:25 [manager.py:391] Prefill Batch: batch_id=337412359032131072961262888199619662754, time:1750767865.5777507s req_ids:[8] -DEBUG 06-24 20:24:25 [manager.py:391] -ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:202.87060737609863ms total_cost_time:202.91376113891602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10828 prompt_cache_len:5151 prompt_cache_ratio:0.47571111932028076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 -DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:25 [manager.py:224] router recive req id 8 cost time 0.10910940170288086 s -INFO 06-24 20:24:25 [manager.py:68] detokenization recv req id 8 cost time 0.11105942726135254 s -DEBUG 06-24 20:24:25 [manager.py:391] Prefill Batch: batch_id=319063083188676789548815247138352182551, time:1750767865.7810936s req_ids:[8] -DEBUG 06-24 20:24:25 [manager.py:391] -DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:219.32172775268555ms total_cost_time:219.36631202697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10829 prompt_cache_len:5151 prompt_cache_ratio:0.47566718995290425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 -DEBUG 06-24 20:24:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.1095271110534668 s -INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.11162400245666504 s -DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=40358148500700225298516758828592415216, time:1750767866.0184412s req_ids:[8] -DEBUG 06-24 20:24:26 [manager.py:391] -ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:25 lightllm_req_id:8 first_token_cost:217.69380569458008ms total_cost_time:217.73648262023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10830 prompt_cache_len:5151 prompt_cache_ratio:0.47562326869806093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 -DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10947203636169434 s -INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.1115105152130127 s -DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=299282416069555826910865675271940206953, time:1750767866.2331536s req_ids:[8] -DEBUG 06-24 20:24:26 [manager.py:391] -ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:207.1092128753662ms total_cost_time:207.1549892425537ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10831 prompt_cache_len:5151 prompt_cache_ratio:0.47557935555350384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 -DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10925984382629395 s -INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.11108970642089844 s -DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=262306183436909091257918624539846342954, time:1750767866.448401s req_ids:[8] -DEBUG 06-24 20:24:26 [manager.py:391] -ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:205.37114143371582ms total_cost_time:205.4150104522705ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10832 prompt_cache_len:5151 prompt_cache_ratio:0.4755354505169867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 -DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10962820053100586 s -INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.11157536506652832 s -DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=311413846545641472282743657615750024213, time:1750767866.6608236s req_ids:[8] -DEBUG 06-24 20:24:26 [manager.py:391] -ERROR 06-24 20:24:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:205.17420768737793ms total_cost_time:205.22022247314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10833 prompt_cache_len:5151 prompt_cache_ratio:0.4754915535862642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 -DEBUG 06-24 20:24:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:26 [manager.py:224] router recive req id 8 cost time 0.10821533203125 s -INFO 06-24 20:24:26 [manager.py:68] detokenization recv req id 8 cost time 0.10998201370239258 s -DEBUG 06-24 20:24:26 [manager.py:391] Prefill Batch: batch_id=231355119758628361943229308906396102166, time:1750767866.8702745s req_ids:[8] -DEBUG 06-24 20:24:26 [manager.py:391] -ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:26 lightllm_req_id:8 first_token_cost:367.57349967956543ms total_cost_time:367.61927604675293ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10834 prompt_cache_len:5151 prompt_cache_ratio:0.4754476647590917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 -DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10865116119384766 s -INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.11068916320800781 s -DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=228738129159180230158689076537900587386, time:1750767867.244832s req_ids:[8] -DEBUG 06-24 20:24:27 [manager.py:391] -ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:222.10192680358887ms total_cost_time:222.14651107788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10835 prompt_cache_len:5151 prompt_cache_ratio:0.47540378403322564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 -DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10954618453979492 s -INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.11162996292114258 s -DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=273688623905911706260616684668085505188, time:1750767867.4802883s req_ids:[8] -DEBUG 06-24 20:24:27 [manager.py:391] -ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:213.0577564239502ms total_cost_time:213.10186386108398ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10836 prompt_cache_len:5151 prompt_cache_ratio:0.47535991140642303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 -DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10900640487670898 s -INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.11101651191711426 s -DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=114264990221561923589977140161619552313, time:1750767867.694045s req_ids:[8] -DEBUG 06-24 20:24:27 [manager.py:391] -ERROR 06-24 20:24:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:217.07630157470703ms total_cost_time:217.1194553375244ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10837 prompt_cache_len:5151 prompt_cache_ratio:0.4753160468764418 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 -DEBUG 06-24 20:24:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:27 [manager.py:224] router recive req id 8 cost time 0.10773372650146484 s -INFO 06-24 20:24:27 [manager.py:68] detokenization recv req id 8 cost time 0.10971546173095703 s -DEBUG 06-24 20:24:27 [manager.py:391] Prefill Batch: batch_id=174939914628176378268606220059850549582, time:1750767867.9182785s req_ids:[8] -DEBUG 06-24 20:24:27 [manager.py:391] -ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:27 lightllm_req_id:8 first_token_cost:202.15868949890137ms total_cost_time:202.20398902893066ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10838 prompt_cache_len:5151 prompt_cache_ratio:0.4752721904410408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 -DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.10911011695861816 s -INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.11119365692138672 s -DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=306437313048454210550639792996461335026, time:1750767868.130565s req_ids:[8] -DEBUG 06-24 20:24:28 [manager.py:391] -ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:209.2735767364502ms total_cost_time:209.3188762664795ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10839 prompt_cache_len:5151 prompt_cache_ratio:0.47522834209797954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 -DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.1076655387878418 s -INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.10968327522277832 s -DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=220685543712029237566020074931051307607, time:1750767868.3420935s req_ids:[8] -DEBUG 06-24 20:24:28 [manager.py:391] -ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:185.62960624694824ms total_cost_time:185.67299842834473ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10840 prompt_cache_len:5151 prompt_cache_ratio:0.4751845018450184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 -DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.10771465301513672 s -INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.10969090461730957 s -DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=196195095218914494071318671488747480319, time:1750767868.5307157s req_ids:[8] -DEBUG 06-24 20:24:28 [manager.py:391] -ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:356.75716400146484ms total_cost_time:356.80246353149414ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10841 prompt_cache_len:5151 prompt_cache_ratio:0.4751406696799188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 -DEBUG 06-24 20:24:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:28 [manager.py:224] router recive req id 8 cost time 0.10843276977539062 s -INFO 06-24 20:24:28 [manager.py:68] detokenization recv req id 8 cost time 0.11054396629333496 s -DEBUG 06-24 20:24:28 [manager.py:391] Prefill Batch: batch_id=335029670910414896917996479968678177303, time:1750767868.895319s req_ids:[8] -DEBUG 06-24 20:24:28 [manager.py:391] -ERROR 06-24 20:24:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:204.76818084716797ms total_cost_time:204.80918884277344ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10842 prompt_cache_len:5151 prompt_cache_ratio:0.47509684560044274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 -DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10820579528808594 s -INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11022067070007324 s -DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=87887203475549119739862138476102386856, time:1750767869.1084838s req_ids:[8] -DEBUG 06-24 20:24:29 [manager.py:391] -ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:28 lightllm_req_id:8 first_token_cost:206.146240234375ms total_cost_time:206.18867874145508ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10843 prompt_cache_len:5151 prompt_cache_ratio:0.475053029604353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 -DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10922098159790039 s -INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11134052276611328 s -DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=219388262463495088475327393191995932341, time:1750767869.3206773s req_ids:[8] -DEBUG 06-24 20:24:29 [manager.py:391] -ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:192.36469268798828ms total_cost_time:192.40951538085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10844 prompt_cache_len:5151 prompt_cache_ratio:0.4750092216894135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 -DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10803961753845215 s -INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.10990405082702637 s -DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=239073059555298522504648309710936953492, time:1750767869.517839s req_ids:[8] -DEBUG 06-24 20:24:29 [manager.py:391] -ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:198.3029842376709ms total_cost_time:198.34589958190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10845 prompt_cache_len:5151 prompt_cache_ratio:0.47496542185338864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 -DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10922908782958984 s -INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11121726036071777 s -DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=283292565900355797946275534999830850510, time:1750767869.724987s req_ids:[8] -DEBUG 06-24 20:24:29 [manager.py:391] -ERROR 06-24 20:24:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:209.37633514404297ms total_cost_time:209.42068099975586ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10846 prompt_cache_len:5151 prompt_cache_ratio:0.47492163009404387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 -DEBUG 06-24 20:24:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:29 [manager.py:224] router recive req id 8 cost time 0.10853314399719238 s -INFO 06-24 20:24:29 [manager.py:68] detokenization recv req id 8 cost time 0.11050009727478027 s -DEBUG 06-24 20:24:29 [manager.py:391] Prefill Batch: batch_id=300581467551556192100840381984206946016, time:1750767869.939804s req_ids:[8] -DEBUG 06-24 20:24:29 [manager.py:391] -ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:29 lightllm_req_id:8 first_token_cost:208.3871364593506ms total_cost_time:208.43029022216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10847 prompt_cache_len:5151 prompt_cache_ratio:0.4748778464091454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 -DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.10950374603271484 s -INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.11115026473999023 s -DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=75666362721861881167432266497485201020, time:1750767870.154767s req_ids:[8] -DEBUG 06-24 20:24:30 [manager.py:391] -ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:370.0287342071533ms total_cost_time:370.0721263885498ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10848 prompt_cache_len:5151 prompt_cache_ratio:0.4748340707964602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 -DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.10784745216369629 s -INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.10950183868408203 s -DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=228779083446625926579925303326570201982, time:1750767870.5300694s req_ids:[8] -DEBUG 06-24 20:24:30 [manager.py:391] -ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:206.76779747009277ms total_cost_time:206.80904388427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10849 prompt_cache_len:5151 prompt_cache_ratio:0.4747903032537561 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 -DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.10873889923095703 s -INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.11064743995666504 s -DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=299665707491752528389684804570363213050, time:1750767870.7438598s req_ids:[8] -DEBUG 06-24 20:24:30 [manager.py:391] -ERROR 06-24 20:24:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:207.3671817779541ms total_cost_time:207.4127197265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10850 prompt_cache_len:5151 prompt_cache_ratio:0.4747465437788018 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 -DEBUG 06-24 20:24:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:30 [manager.py:224] router recive req id 8 cost time 0.1087653636932373 s -INFO 06-24 20:24:30 [manager.py:68] detokenization recv req id 8 cost time 0.11069345474243164 s -DEBUG 06-24 20:24:30 [manager.py:391] Prefill Batch: batch_id=138937758268697022350421193032769165751, time:1750767870.9585888s req_ids:[8] -DEBUG 06-24 20:24:30 [manager.py:391] -ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:30 lightllm_req_id:8 first_token_cost:210.51859855651855ms total_cost_time:210.56151390075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10851 prompt_cache_len:5151 prompt_cache_ratio:0.47470279236936685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 -DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.1081852912902832 s -INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.11017584800720215 s -DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=99505193865291110544526335879655372723, time:1750767871.171841s req_ids:[8] -DEBUG 06-24 20:24:31 [manager.py:391] -ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:207.3376178741455ms total_cost_time:207.3831558227539ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10852 prompt_cache_len:5151 prompt_cache_ratio:0.47465904902322154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 -DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.1076209545135498 s -INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.10957884788513184 s -DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=139318953999652032244206114862282420416, time:1750767871.392703s req_ids:[8] -DEBUG 06-24 20:24:31 [manager.py:391] -ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:214.34688568115234ms total_cost_time:214.38956260681152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10853 prompt_cache_len:5151 prompt_cache_ratio:0.4746153137381369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 -DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.10822153091430664 s -INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.11014938354492188 s -DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=59289135456067394338648058823749583710, time:1750767871.6070938s req_ids:[8] -DEBUG 06-24 20:24:31 [manager.py:391] -DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:206.2056064605713ms total_cost_time:206.24828338623047ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10854 prompt_cache_len:5151 prompt_cache_ratio:0.474571586511885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 -DEBUG 06-24 20:24:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:31 [manager.py:224] router recive req id 8 cost time 0.10850119590759277 s -INFO 06-24 20:24:31 [manager.py:68] detokenization recv req id 8 cost time 0.11050224304199219 s -DEBUG 06-24 20:24:31 [manager.py:391] Prefill Batch: batch_id=74041607491872898807968152182603757038, time:1750767871.8200922s req_ids:[8] -DEBUG 06-24 20:24:31 [manager.py:391] -ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:31 lightllm_req_id:8 first_token_cost:396.71993255615234ms total_cost_time:396.76594734191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10855 prompt_cache_len:5151 prompt_cache_ratio:0.4745278673422386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 -DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s -INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.10970377922058105 s -DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=214618193198167544858507238909870046152, time:1750767872.2227414s req_ids:[8] -DEBUG 06-24 20:24:32 [manager.py:391] -ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:185.88733673095703ms total_cost_time:185.9285831451416ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10856 prompt_cache_len:5151 prompt_cache_ratio:0.47448415622697127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 -DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10811543464660645 s -INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.11002397537231445 s -DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=267221460617357159327128193690877951519, time:1750767872.4123871s req_ids:[8] -DEBUG 06-24 20:24:32 [manager.py:391] -ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:199.74851608276367ms total_cost_time:199.78976249694824ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10857 prompt_cache_len:5151 prompt_cache_ratio:0.4744404531638574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 -DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10833239555358887 s -INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.11019396781921387 s -DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=160885142628664941763942458081259491150, time:1750767872.6202855s req_ids:[8] -DEBUG 06-24 20:24:32 [manager.py:391] -ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:200.8349895477295ms total_cost_time:200.87766647338867ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10858 prompt_cache_len:5151 prompt_cache_ratio:0.47439675815067234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 -DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:32 [manager.py:224] router recive req id 8 cost time 0.10890507698059082 s -INFO 06-24 20:24:32 [manager.py:68] detokenization recv req id 8 cost time 0.11117053031921387 s -DEBUG 06-24 20:24:32 [manager.py:391] Prefill Batch: batch_id=89893478114435880745364933513180263699, time:1750767872.826025s req_ids:[8] -DEBUG 06-24 20:24:32 [manager.py:391] -ERROR 06-24 20:24:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:24:32 [stats.py:37] Avg tokens(prompt+generate) throughput: 45404.392 tokens/s -DEBUG 06-24 20:24:32 [stats.py:37] Avg prompt tokens throughput: 45395.916 tokens/s -DEBUG 06-24 20:24:32 [stats.py:37] Avg generate tokens throughput: 8.477 tokens/s -INFO 06-24 20:24:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:195.48940658569336ms total_cost_time:195.52969932556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10859 prompt_cache_len:5151 prompt_cache_ratio:0.474353071185192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 -DEBUG 06-24 20:24:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.10757255554199219 s -INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.10939407348632812 s -DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=68020185219981003421405515163188924086, time:1750767873.0274575s req_ids:[8] -DEBUG 06-24 20:24:33 [manager.py:391] -ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:32 lightllm_req_id:8 first_token_cost:194.37670707702637ms total_cost_time:194.41723823547363ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10860 prompt_cache_len:5151 prompt_cache_ratio:0.47430939226519336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 -DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:33 [batch.py:51] router release req id 8 -DEBUG 06-24 20:24:33 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:33 [manager.py:283] -DEBUG 06-24 20:24:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:33 [manager.py:284] -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.11083436012268066 s -INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.11270833015441895 s -DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=278280742067585235608230908389161684435, time:1750767873.229916s req_ids:[8] -DEBUG 06-24 20:24:33 [manager.py:391] -ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:203.9356231689453ms total_cost_time:203.9775848388672ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10861 prompt_cache_len:5151 prompt_cache_ratio:0.4742657213884541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 -DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.10883522033691406 s -INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.11086559295654297 s -DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=293444444034670357492184309375589593003, time:1750767873.4397569s req_ids:[8] -DEBUG 06-24 20:24:33 [manager.py:391] -ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:373.75426292419434ms total_cost_time:373.798131942749ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10862 prompt_cache_len:5151 prompt_cache_ratio:0.47422205855275273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 -DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:33 [manager.py:224] router recive req id 8 cost time 0.10762286186218262 s -INFO 06-24 20:24:33 [manager.py:68] detokenization recv req id 8 cost time 0.1094825267791748 s -DEBUG 06-24 20:24:33 [manager.py:391] Prefill Batch: batch_id=83792886780307532386538439711702016073, time:1750767873.8217402s req_ids:[8] -DEBUG 06-24 20:24:33 [manager.py:391] -ERROR 06-24 20:24:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:216.4895534515381ms total_cost_time:216.53175354003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10863 prompt_cache_len:5151 prompt_cache_ratio:0.47417840375586856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 -DEBUG 06-24 20:24:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10820603370666504 s -INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.1101217269897461 s -DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=280553196755246944952367061962623130830, time:1750767874.0470817s req_ids:[8] -DEBUG 06-24 20:24:34 [manager.py:391] -ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:33 lightllm_req_id:8 first_token_cost:208.49227905273438ms total_cost_time:208.53710174560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10864 prompt_cache_len:5151 prompt_cache_ratio:0.4741347569955817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 -DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10782003402709961 s -INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.10973310470581055 s -DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=222644551111961605753970706817532874320, time:1750767874.2590318s req_ids:[8] -DEBUG 06-24 20:24:34 [manager.py:391] -ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:207.73005485534668ms total_cost_time:207.77583122253418ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10865 prompt_cache_len:5151 prompt_cache_ratio:0.47409111826967326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 -DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s -INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.11067724227905273 s -DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=36384078836160988772567403311637459425, time:1750767874.487443s req_ids:[8] -DEBUG 06-24 20:24:34 [manager.py:391] -ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:216.01057052612305ms total_cost_time:216.05324745178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10866 prompt_cache_len:5151 prompt_cache_ratio:0.4740474875759249 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 -DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10866141319274902 s -INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.11053347587585449 s -DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=170675586608628021412004808859431347869, time:1750767874.6986504s req_ids:[8] -DEBUG 06-24 20:24:34 [manager.py:391] -ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:200.91843605041504ms total_cost_time:200.9599208831787ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10867 prompt_cache_len:5151 prompt_cache_ratio:0.47400386491211927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 -DEBUG 06-24 20:24:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:34 [manager.py:224] router recive req id 8 cost time 0.10876798629760742 s -INFO 06-24 20:24:34 [manager.py:68] detokenization recv req id 8 cost time 0.11076784133911133 s -DEBUG 06-24 20:24:34 [manager.py:391] Prefill Batch: batch_id=80997773735846761352858698576908829805, time:1750767874.9066362s req_ids:[8] -DEBUG 06-24 20:24:34 [manager.py:391] -ERROR 06-24 20:24:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:34 lightllm_req_id:8 first_token_cost:198.96888732910156ms total_cost_time:199.01275634765625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10868 prompt_cache_len:5151 prompt_cache_ratio:0.4739602502760398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 -DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.10892057418823242 s -INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.11082148551940918 s -DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=230168416306254965909628278873836029935, time:1750767875.111541s req_ids:[8] -DEBUG 06-24 20:24:35 [manager.py:391] -ERROR 06-24 20:24:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:373.3396530151367ms total_cost_time:373.3832836151123ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10869 prompt_cache_len:5151 prompt_cache_ratio:0.4739166436654706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 -DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.10798788070678711 s -INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.10998272895812988 s -DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=5374375838922691930202817543991614742, time:1750767875.4946747s req_ids:[8] -DEBUG 06-24 20:24:35 [manager.py:391] -ERROR 06-24 20:24:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:207.04293251037598ms total_cost_time:207.08703994750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10870 prompt_cache_len:5151 prompt_cache_ratio:0.4738730450781969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 -DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.10774350166320801 s -INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.10958409309387207 s -DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=106502040168456818274107782591062261966, time:1750767875.7204733s req_ids:[8] -DEBUG 06-24 20:24:35 [manager.py:391] -ERROR 06-24 20:24:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:214.26987648010254ms total_cost_time:214.3115997314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10871 prompt_cache_len:5151 prompt_cache_ratio:0.4738294545120044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 -DEBUG 06-24 20:24:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:35 [manager.py:224] router recive req id 8 cost time 0.1096189022064209 s -INFO 06-24 20:24:35 [manager.py:68] detokenization recv req id 8 cost time 0.11168193817138672 s -DEBUG 06-24 20:24:35 [manager.py:391] Prefill Batch: batch_id=119186931908005883974108318337670792288, time:1750767875.9322429s req_ids:[8] -DEBUG 06-24 20:24:35 [manager.py:391] -ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:35 lightllm_req_id:8 first_token_cost:206.44044876098633ms total_cost_time:206.4824104309082ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10872 prompt_cache_len:5151 prompt_cache_ratio:0.4737858719646799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 -DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.1089925765991211 s -INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.11110568046569824 s -DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=177435800410538846829024406610637805353, time:1750767876.145778s req_ids:[8] -DEBUG 06-24 20:24:36 [manager.py:391] -ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:208.4822654724121ms total_cost_time:208.5261344909668ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10873 prompt_cache_len:5151 prompt_cache_ratio:0.47374229743401086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 -DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.10831594467163086 s -INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.11030244827270508 s -DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=69923881956386399398367749383381656968, time:1750767876.359894s req_ids:[8] -DEBUG 06-24 20:24:36 [manager.py:391] -ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:209.08594131469727ms total_cost_time:209.12933349609375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10874 prompt_cache_len:5151 prompt_cache_ratio:0.4736987309177855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 -DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s -INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.1105501651763916 s -DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=107627588829166425275469481876762792898, time:1750767876.5763588s req_ids:[8] -DEBUG 06-24 20:24:36 [manager.py:391] -ERROR 06-24 20:24:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:204.56719398498535ms total_cost_time:204.60915565490723ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10875 prompt_cache_len:5151 prompt_cache_ratio:0.4736551724137931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 -DEBUG 06-24 20:24:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:36 [manager.py:224] router recive req id 8 cost time 0.10785317420959473 s -INFO 06-24 20:24:36 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s -DEBUG 06-24 20:24:36 [manager.py:391] Prefill Batch: batch_id=126200423720258727818610355713433679090, time:1750767876.7894304s req_ids:[8] -DEBUG 06-24 20:24:36 [manager.py:391] -ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:36 lightllm_req_id:8 first_token_cost:378.223180770874ms total_cost_time:378.2694339752197ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10876 prompt_cache_len:5151 prompt_cache_ratio:0.4736116219198235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 -DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.10834455490112305 s -INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102745532989502 s -DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=339971271615870211527931684935746623285, time:1750767877.1773806s req_ids:[8] -DEBUG 06-24 20:24:37 [manager.py:391] -ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:208.74881744384766ms total_cost_time:208.79459381103516ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:10877 prompt_cache_len:5151 prompt_cache_ratio:0.47356807943366735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 -DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.10852956771850586 s -INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.1104886531829834 s -DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=92492642501727646258440637661620296617, time:1750767877.4000585s req_ids:[8] -DEBUG 06-24 20:24:37 [manager.py:391] -ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:217.02241897583008ms total_cost_time:217.06557273864746ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10878 prompt_cache_len:5151 prompt_cache_ratio:0.47352454495311636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 -DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.10815238952636719 s -INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.11017894744873047 s -DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=208590541246164892376004351661097941947, time:1750767877.614999s req_ids:[8] -DEBUG 06-24 20:24:37 [manager.py:391] -DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:209.45453643798828ms total_cost_time:209.50007438659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10879 prompt_cache_len:5151 prompt_cache_ratio:0.47348101847596286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 -DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:37 [manager.py:224] router recive req id 8 cost time 0.1089475154876709 s -INFO 06-24 20:24:37 [manager.py:68] detokenization recv req id 8 cost time 0.11086273193359375 s -DEBUG 06-24 20:24:37 [manager.py:391] Prefill Batch: batch_id=305262356473397094457770243311911652873, time:1750767877.8309855s req_ids:[8] -DEBUG 06-24 20:24:37 [manager.py:391] -ERROR 06-24 20:24:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:229.77781295776367ms total_cost_time:229.82192039489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10880 prompt_cache_len:5151 prompt_cache_ratio:0.4734375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 -DEBUG 06-24 20:24:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.10840272903442383 s -INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.11026120185852051 s -DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=282365897368069762220675826924039925104, time:1750767878.0698886s req_ids:[8] -DEBUG 06-24 20:24:38 [manager.py:391] -ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:37 lightllm_req_id:8 first_token_cost:203.38106155395508ms total_cost_time:203.42564582824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10881 prompt_cache_len:5151 prompt_cache_ratio:0.4733939895230218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 -DEBUG 06-24 20:24:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.10917949676513672 s -INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.11115241050720215 s -DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=259802676199441936582986270984547564185, time:1750767878.278576s req_ids:[8] -DEBUG 06-24 20:24:38 [manager.py:391] -ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:206.77757263183594ms total_cost_time:206.82406425476074ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:10882 prompt_cache_len:5151 prompt_cache_ratio:0.473350487042823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 -DEBUG 06-24 20:24:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.1078791618347168 s -INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.10957884788513184 s -DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=34047204652206304886801678833756286288, time:1750767878.4930499s req_ids:[8] -DEBUG 06-24 20:24:38 [manager.py:391] -ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:369.3690299987793ms total_cost_time:369.4119453430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10883 prompt_cache_len:5151 prompt_cache_ratio:0.4733069925571993 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 -DEBUG 06-24 20:24:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:38 [manager.py:224] router recive req id 8 cost time 0.10839176177978516 s -INFO 06-24 20:24:38 [manager.py:68] detokenization recv req id 8 cost time 0.11031889915466309 s -DEBUG 06-24 20:24:38 [manager.py:391] Prefill Batch: batch_id=317120276315686861681276196595491231137, time:1750767878.8678834s req_ids:[8] -DEBUG 06-24 20:24:38 [manager.py:391] -ERROR 06-24 20:24:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:204.24914360046387ms total_cost_time:204.29205894470215ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10884 prompt_cache_len:5151 prompt_cache_ratio:0.47326350606394707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 -DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s -INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.1095733642578125 s -DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=33944030376262388927398925524128089636, time:1750767879.0941868s req_ids:[8] -DEBUG 06-24 20:24:39 [manager.py:391] -ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:38 lightllm_req_id:8 first_token_cost:218.2607650756836ms total_cost_time:218.30391883850098ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10885 prompt_cache_len:5151 prompt_cache_ratio:0.47322002756086357 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 -DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10925984382629395 s -INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.11127638816833496 s -DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=107262527788658022358999065285327695054, time:1750767879.3045833s req_ids:[8] -DEBUG 06-24 20:24:39 [manager.py:391] -ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:203.23991775512695ms total_cost_time:203.28164100646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10886 prompt_cache_len:5151 prompt_cache_ratio:0.47317655704574685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 -DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10846161842346191 s -INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.11035990715026855 s -DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=46081308819362227579002331584933992547, time:1750767879.5124154s req_ids:[8] -DEBUG 06-24 20:24:39 [manager.py:391] -ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:201.03716850280762ms total_cost_time:201.0817527770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10887 prompt_cache_len:5151 prompt_cache_ratio:0.4731330945163957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 -DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.1090705394744873 s -INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.11110377311706543 s -DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=10021133459217627545228506299245332245, time:1750767879.7217019s req_ids:[8] -DEBUG 06-24 20:24:39 [manager.py:391] -ERROR 06-24 20:24:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:208.01305770874023ms total_cost_time:208.0554962158203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10888 prompt_cache_len:5151 prompt_cache_ratio:0.4730896399706098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 -DEBUG 06-24 20:24:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:39 [manager.py:224] router recive req id 8 cost time 0.10734415054321289 s -INFO 06-24 20:24:39 [manager.py:68] detokenization recv req id 8 cost time 0.1093454360961914 s -DEBUG 06-24 20:24:39 [manager.py:391] Prefill Batch: batch_id=263822063955114068278851596442481232629, time:1750767879.9365194s req_ids:[8] -DEBUG 06-24 20:24:39 [manager.py:391] -ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:39 lightllm_req_id:8 first_token_cost:207.3495388031006ms total_cost_time:207.39245414733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10889 prompt_cache_len:5151 prompt_cache_ratio:0.47304619340618975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 -DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10822892189025879 s -INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.11028289794921875 s -DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=210280134501110067164155157742607221410, time:1750767880.1484609s req_ids:[8] -DEBUG 06-24 20:24:40 [manager.py:391] -ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:380.5241584777832ms total_cost_time:380.5663585662842ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10890 prompt_cache_len:5151 prompt_cache_ratio:0.4730027548209366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 -DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10809874534606934 s -INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.10922479629516602 s -DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=265352066313410183063206468012034204845, time:1750767880.5355566s req_ids:[8] -DEBUG 06-24 20:24:40 [manager.py:391] -ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:165.5733585357666ms total_cost_time:165.61603546142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10891 prompt_cache_len:5151 prompt_cache_ratio:0.47295932421265263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 -DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10839176177978516 s -INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.11030769348144531 s -DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=251456888557177258443813027067746786420, time:1750767880.7052724s req_ids:[8] -DEBUG 06-24 20:24:40 [manager.py:391] -ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:185.05501747131348ms total_cost_time:185.09721755981445ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10892 prompt_cache_len:5151 prompt_cache_ratio:0.47291590157914065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 -DEBUG 06-24 20:24:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:40 [manager.py:224] router recive req id 8 cost time 0.10866189002990723 s -INFO 06-24 20:24:40 [manager.py:68] detokenization recv req id 8 cost time 0.10973453521728516 s -DEBUG 06-24 20:24:40 [manager.py:391] Prefill Batch: batch_id=245914695606664708775576781268815400974, time:1750767880.9008198s req_ids:[8] -DEBUG 06-24 20:24:40 [manager.py:391] -ERROR 06-24 20:24:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:206.09450340270996ms total_cost_time:206.15243911743164ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:10893 prompt_cache_len:5151 prompt_cache_ratio:0.4728724869182043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 -DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.10891294479370117 s -INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.11079573631286621 s -DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=101887825261589304007208350160206693217, time:1750767881.1199906s req_ids:[8] -DEBUG 06-24 20:24:41 [manager.py:391] -ERROR 06-24 20:24:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:40 lightllm_req_id:8 first_token_cost:224.50494766235352ms total_cost_time:224.5476245880127ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10894 prompt_cache_len:5151 prompt_cache_ratio:0.47282908022764825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 -DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s -INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.11063098907470703 s -DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=214954898533342632061536902634519897295, time:1750767881.3558893s req_ids:[8] -DEBUG 06-24 20:24:41 [manager.py:391] -ERROR 06-24 20:24:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 first_token_cost:211.1659049987793ms total_cost_time:211.20762825012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10895 prompt_cache_len:5151 prompt_cache_ratio:0.47278568150527767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 -DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.10734391212463379 s -INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.10932683944702148 s -DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=218337513485535596845324685750473561560, time:1750767881.5618024s req_ids:[8] -DEBUG 06-24 20:24:41 [manager.py:391] -ERROR 06-24 20:24:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 first_token_cost:192.4586296081543ms total_cost_time:192.49963760375977ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10896 prompt_cache_len:5151 prompt_cache_ratio:0.4727422907488987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 -DEBUG 06-24 20:24:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:41 [manager.py:224] router recive req id 8 cost time 0.1085808277130127 s -INFO 06-24 20:24:41 [manager.py:68] detokenization recv req id 8 cost time 0.11056733131408691 s -DEBUG 06-24 20:24:41 [manager.py:391] Prefill Batch: batch_id=10029732828477544196174222636642369191, time:1750767881.7576942s req_ids:[8] -DEBUG 06-24 20:24:41 [manager.py:391] -ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:41 lightllm_req_id:8 first_token_cost:365.9493923187256ms total_cost_time:365.99135398864746ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10897 prompt_cache_len:5151 prompt_cache_ratio:0.47269890795631825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 -DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10866427421569824 s -INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11053729057312012 s -DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=89298714437943802111078223951949266152, time:1750767882.130663s req_ids:[8] -DEBUG 06-24 20:24:42 [manager.py:391] -ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:208.2347869873047ms total_cost_time:208.27746391296387ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10898 prompt_cache_len:5151 prompt_cache_ratio:0.4726555331253441 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 -DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10860943794250488 s -INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11050939559936523 s -DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=125287686484093639407802940085021454986, time:1750767882.3441849s req_ids:[8] -DEBUG 06-24 20:24:42 [manager.py:391] -ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:202.35157012939453ms total_cost_time:202.3937702178955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10899 prompt_cache_len:5151 prompt_cache_ratio:0.47261216625378477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 -DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10972309112548828 s -INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11174726486206055 s -DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=194015431259488164473908552060062006398, time:1750767882.5541115s req_ids:[8] -DEBUG 06-24 20:24:42 [manager.py:391] -ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:214.09916877746582ms total_cost_time:214.1401767730713ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10900 prompt_cache_len:5151 prompt_cache_ratio:0.47256880733944956 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 -DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10827279090881348 s -INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.11017990112304688 s -DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=101445429387619800060986762653156020863, time:1750767882.786726s req_ids:[8] -DEBUG 06-24 20:24:42 [manager.py:391] -ERROR 06-24 20:24:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:212.62145042419434ms total_cost_time:212.66531944274902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10901 prompt_cache_len:5151 prompt_cache_ratio:0.4725254563801486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 -DEBUG 06-24 20:24:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:42 [manager.py:224] router recive req id 8 cost time 0.10784077644348145 s -INFO 06-24 20:24:42 [manager.py:68] detokenization recv req id 8 cost time 0.10982441902160645 s -DEBUG 06-24 20:24:42 [manager.py:391] Prefill Batch: batch_id=159820124881652022348939545354119478391, time:1750767882.9928703s req_ids:[8] -DEBUG 06-24 20:24:42 [manager.py:391] -DEBUG 06-24 20:24:42 [stats.py:37] Avg tokens(prompt+generate) throughput: 46392.159 tokens/s -DEBUG 06-24 20:24:42 [stats.py:37] Avg prompt tokens throughput: 46383.732 tokens/s -DEBUG 06-24 20:24:42 [stats.py:37] Avg generate tokens throughput: 8.427 tokens/s -ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:42 lightllm_req_id:8 first_token_cost:202.67033576965332ms total_cost_time:202.7122974395752ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10902 prompt_cache_len:5151 prompt_cache_ratio:0.4724821133736929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 -DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.1087350845336914 s -INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.1107933521270752 s -DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=272252779466097949260122015523550826908, time:1750767883.200577s req_ids:[8] -DEBUG 06-24 20:24:43 [manager.py:391] -ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:198.2250213623047ms total_cost_time:198.26698303222656ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10903 prompt_cache_len:5151 prompt_cache_ratio:0.47243877831789416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 -DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.10765767097473145 s -INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.10964107513427734 s -DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=169695719517349931775478722777873260303, time:1750767883.4069605s req_ids:[8] -DEBUG 06-24 20:24:43 [manager.py:391] -DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:370.1894283294678ms total_cost_time:370.23210525512695ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10904 prompt_cache_len:5151 prompt_cache_ratio:0.4723954512105649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 -DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s -INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.11008429527282715 s -DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=51330402607780362524125506241512606780, time:1750767883.788702s req_ids:[8] -DEBUG 06-24 20:24:43 [manager.py:391] -ERROR 06-24 20:24:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:210.27612686157227ms total_cost_time:210.31594276428223ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10905 prompt_cache_len:5151 prompt_cache_ratio:0.4723521320495186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 -DEBUG 06-24 20:24:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:43 [manager.py:224] router recive req id 8 cost time 0.10840129852294922 s -INFO 06-24 20:24:43 [manager.py:68] detokenization recv req id 8 cost time 0.11025094985961914 s -DEBUG 06-24 20:24:43 [manager.py:391] Prefill Batch: batch_id=212466844149246405288824435608906560238, time:1750767883.999424s req_ids:[8] -DEBUG 06-24 20:24:43 [manager.py:391] -ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:43 lightllm_req_id:8 first_token_cost:206.44402503967285ms total_cost_time:206.49027824401855ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:10906 prompt_cache_len:5151 prompt_cache_ratio:0.47230882083256925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 -DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10775232315063477 s -INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.1096491813659668 s -DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=136852280205437360063724410282738171784, time:1750767884.238032s req_ids:[8] -DEBUG 06-24 20:24:44 [manager.py:391] -ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:240.92507362365723ms total_cost_time:240.9684658050537ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10907 prompt_cache_len:5151 prompt_cache_ratio:0.4722655175575319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 -DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10857486724853516 s -INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.1106255054473877 s -DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=229181597960853191363201697234373304377, time:1750767884.470303s req_ids:[8] -DEBUG 06-24 20:24:44 [manager.py:391] -ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:193.66693496704102ms total_cost_time:193.7112808227539ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10908 prompt_cache_len:5151 prompt_cache_ratio:0.4722222222222222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 -DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10897970199584961 s -INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.11080479621887207 s -DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=205035126968644651400140422837560906596, time:1750767884.66123s req_ids:[8] -DEBUG 06-24 20:24:44 [manager.py:391] -ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:203.91297340393066ms total_cost_time:203.95398139953613ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:10909 prompt_cache_len:5151 prompt_cache_ratio:0.4721789348244569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 -DEBUG 06-24 20:24:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:44 [manager.py:224] router recive req id 8 cost time 0.10928225517272949 s -INFO 06-24 20:24:44 [manager.py:68] detokenization recv req id 8 cost time 0.11118388175964355 s -DEBUG 06-24 20:24:44 [manager.py:391] Prefill Batch: batch_id=82967006429085936762468577930320448417, time:1750767884.8732862s req_ids:[8] -DEBUG 06-24 20:24:44 [manager.py:391] -ERROR 06-24 20:24:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:221.62818908691406ms total_cost_time:221.67062759399414ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10910 prompt_cache_len:5151 prompt_cache_ratio:0.47213565536205315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 -DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10862040519714355 s -INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.11044979095458984 s -DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=315273219568503966902951962669287164399, time:1750767885.0999763s req_ids:[8] -DEBUG 06-24 20:24:45 [manager.py:391] -ERROR 06-24 20:24:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:44 lightllm_req_id:8 first_token_cost:367.6140308380127ms total_cost_time:367.6578998565674ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10911 prompt_cache_len:5151 prompt_cache_ratio:0.47209238383282925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 -DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10786843299865723 s -INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.10966992378234863 s -DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=106450795924799869861383999177691144656, time:1750767885.4955642s req_ids:[8] -DEBUG 06-24 20:24:45 [manager.py:391] -ERROR 06-24 20:24:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 first_token_cost:225.27790069580078ms total_cost_time:225.32081604003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10912 prompt_cache_len:5151 prompt_cache_ratio:0.4720491202346041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 -DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10841584205627441 s -INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.1104118824005127 s -DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=132659461931309042086297242559113777168, time:1750767885.7193222s req_ids:[8] -DEBUG 06-24 20:24:45 [manager.py:391] -ERROR 06-24 20:24:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 first_token_cost:229.84886169433594ms total_cost_time:229.89249229431152ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10913 prompt_cache_len:5151 prompt_cache_ratio:0.4720058645651975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 -DEBUG 06-24 20:24:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:45 [manager.py:224] router recive req id 8 cost time 0.10925698280334473 s -INFO 06-24 20:24:45 [manager.py:68] detokenization recv req id 8 cost time 0.11110305786132812 s -DEBUG 06-24 20:24:45 [manager.py:391] Prefill Batch: batch_id=41940932306153716515687513974792653497, time:1750767885.9501789s req_ids:[8] -DEBUG 06-24 20:24:45 [manager.py:391] -ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:45 lightllm_req_id:8 first_token_cost:211.1225128173828ms total_cost_time:211.16256713867188ms,out_token_counter:1 mean_per_token_cost_time: 0.0400543212890625ms prompt_token_num:10914 prompt_cache_len:5151 prompt_cache_ratio:0.4719626168224299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 -DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.10851192474365234 s -INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.11046600341796875 s -DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=233605971989351610780515739303899514610, time:1750767886.161082s req_ids:[8] -DEBUG 06-24 20:24:46 [manager.py:391] -ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:205.3513526916504ms total_cost_time:205.39259910583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10915 prompt_cache_len:5151 prompt_cache_ratio:0.4719193770041228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 -DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.10812211036682129 s -INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.10992598533630371 s -DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=58231030304581054856252346791869147386, time:1750767886.3827894s req_ids:[8] -DEBUG 06-24 20:24:46 [manager.py:391] -ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:213.4850025177002ms total_cost_time:213.52791786193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10916 prompt_cache_len:5151 prompt_cache_ratio:0.4718761451080982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 -DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.10854196548461914 s -INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.11049127578735352 s -DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=11656777408027231745733726448285505640, time:1750767886.5932484s req_ids:[8] -DEBUG 06-24 20:24:46 [manager.py:391] -ERROR 06-24 20:24:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:210.16645431518555ms total_cost_time:210.2217674255371ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:10917 prompt_cache_len:5151 prompt_cache_ratio:0.4718329211321792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 -DEBUG 06-24 20:24:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:46 [manager.py:224] router recive req id 8 cost time 0.1080315113067627 s -INFO 06-24 20:24:46 [manager.py:68] detokenization recv req id 8 cost time 0.11004114151000977 s -DEBUG 06-24 20:24:46 [manager.py:391] Prefill Batch: batch_id=100139812795096300803749440061667380995, time:1750767886.8087354s req_ids:[8] -DEBUG 06-24 20:24:46 [manager.py:391] -INFO 06-24 20:24:46 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:46 lightllm_req_id:8 first_token_cost:368.83020401000977ms total_cost_time:368.87216567993164ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10918 prompt_cache_len:5151 prompt_cache_ratio:0.47178970507418944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 -DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10795092582702637 s -INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.10996627807617188 s -DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=285788565624526851227342902177717252195, time:1750767887.1830673s req_ids:[8] -DEBUG 06-24 20:24:47 [manager.py:391] -ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:208.41693878173828ms total_cost_time:208.45890045166016ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10919 prompt_cache_len:5151 prompt_cache_ratio:0.4717464969319535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 -DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10846972465515137 s -INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.11055278778076172 s -DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=97698324848107576033834270346391985528, time:1750767887.3978343s req_ids:[8] -DEBUG 06-24 20:24:47 [manager.py:391] -ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:204.32162284851074ms total_cost_time:204.36358451843262ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10920 prompt_cache_len:5151 prompt_cache_ratio:0.4717032967032967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 -DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10847067832946777 s -INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.11063194274902344 s -DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=14140219593966169772838459958713913796, time:1750767887.6083336s req_ids:[8] -DEBUG 06-24 20:24:47 [manager.py:391] -ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:205.69634437561035ms total_cost_time:205.7361602783203ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10921 prompt_cache_len:5151 prompt_cache_ratio:0.4716601043860452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 -DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:47 [manager.py:224] router recive req id 8 cost time 0.10739755630493164 s -INFO 06-24 20:24:47 [manager.py:68] detokenization recv req id 8 cost time 0.10937190055847168 s -DEBUG 06-24 20:24:47 [manager.py:391] Prefill Batch: batch_id=216412995334650003412381185075546777784, time:1750767887.8192883s req_ids:[8] -DEBUG 06-24 20:24:47 [manager.py:391] -ERROR 06-24 20:24:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:204.0078639984131ms total_cost_time:204.05220985412598ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10922 prompt_cache_len:5151 prompt_cache_ratio:0.471616919978026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 -DEBUG 06-24 20:24:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.1085500717163086 s -INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.11059188842773438 s -DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=208946483766619121099759892243246796513, time:1750767888.0428193s req_ids:[8] -DEBUG 06-24 20:24:48 [manager.py:391] -ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:47 lightllm_req_id:8 first_token_cost:220.7636833190918ms total_cost_time:220.80683708190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10923 prompt_cache_len:5151 prompt_cache_ratio:0.47157374347706676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 -DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.10859084129333496 s -INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.11051416397094727 s -DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=87935912499322842865915877007782072511, time:1750767888.258504s req_ids:[8] -DEBUG 06-24 20:24:48 [manager.py:391] -ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:207.08727836608887ms total_cost_time:207.12924003601074ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10924 prompt_cache_len:5151 prompt_cache_ratio:0.471530574880996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 -DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.10897445678710938 s -INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.1109018325805664 s -DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=264784128148306195022092143807307220878, time:1750767888.4706998s req_ids:[8] -DEBUG 06-24 20:24:48 [manager.py:391] -ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:373.9173412322998ms total_cost_time:373.9604949951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10925 prompt_cache_len:5151 prompt_cache_ratio:0.471487414187643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 -DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:48 [manager.py:224] router recive req id 8 cost time 0.10450053215026855 s -INFO 06-24 20:24:48 [manager.py:68] detokenization recv req id 8 cost time 0.106414794921875 s -DEBUG 06-24 20:24:48 [manager.py:391] Prefill Batch: batch_id=229260501903678432970489491099494196926, time:1750767888.8507211s req_ids:[8] -DEBUG 06-24 20:24:48 [manager.py:391] -ERROR 06-24 20:24:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:208.4367275238037ms total_cost_time:208.4805965423584ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10926 prompt_cache_len:5151 prompt_cache_ratio:0.471444261394838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 -DEBUG 06-24 20:24:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10819602012634277 s -INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11025381088256836 s -DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=220604183703092022419360267287316948963, time:1750767889.0725574s req_ids:[8] -DEBUG 06-24 20:24:49 [manager.py:391] -ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:48 lightllm_req_id:8 first_token_cost:228.8060188293457ms total_cost_time:228.84654998779297ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10927 prompt_cache_len:5151 prompt_cache_ratio:0.47140111650041183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 -DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10865187644958496 s -INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11059689521789551 s -DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=318720838009962767872372776613120418306, time:1750767889.3001342s req_ids:[8] -DEBUG 06-24 20:24:49 [manager.py:391] -ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:205.352783203125ms total_cost_time:205.39402961730957ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10928 prompt_cache_len:5151 prompt_cache_ratio:0.4713579795021962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 -DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10882830619812012 s -INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11098957061767578 s -DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=68855842015969667642684444504385199708, time:1750767889.511678s req_ids:[8] -DEBUG 06-24 20:24:49 [manager.py:391] -DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:207.0331573486328ms total_cost_time:207.0748805999756ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10929 prompt_cache_len:5151 prompt_cache_ratio:0.4713148503980236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 -DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10819530487060547 s -INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11022734642028809 s -DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=246159267438483442789715534316995109189, time:1750767889.7456493s req_ids:[8] -DEBUG 06-24 20:24:49 [manager.py:391] -ERROR 06-24 20:24:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:220.60227394104004ms total_cost_time:220.64495086669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10930 prompt_cache_len:5151 prompt_cache_ratio:0.47127172918572735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 -DEBUG 06-24 20:24:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:49 [manager.py:224] router recive req id 8 cost time 0.10861039161682129 s -INFO 06-24 20:24:49 [manager.py:68] detokenization recv req id 8 cost time 0.11071324348449707 s -DEBUG 06-24 20:24:49 [manager.py:391] Prefill Batch: batch_id=164118021191299944436920485053693659340, time:1750767889.953331s req_ids:[8] -DEBUG 06-24 20:24:49 [manager.py:391] -ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:49 lightllm_req_id:8 first_token_cost:201.80559158325195ms total_cost_time:201.8454074859619ms,out_token_counter:1 mean_per_token_cost_time: 0.03981590270996094ms prompt_token_num:10931 prompt_cache_len:5151 prompt_cache_ratio:0.4712286158631415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 -DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.10887384414672852 s -INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11072826385498047 s -DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=102928109006884212720348205073220444329, time:1750767890.1596918s req_ids:[8] -DEBUG 06-24 20:24:50 [manager.py:391] -ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:366.08290672302246ms total_cost_time:366.12606048583984ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10932 prompt_cache_len:5151 prompt_cache_ratio:0.471185510428101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 -DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.1084897518157959 s -INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11046099662780762 s -DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=19069971729744856425886734276091272120, time:1750767890.532725s req_ids:[8] -DEBUG 06-24 20:24:50 [manager.py:391] -ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:211.6093635559082ms total_cost_time:211.65013313293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:10933 prompt_cache_len:5151 prompt_cache_ratio:0.47114241287844144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 -DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.1089169979095459 s -INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11091804504394531 s -DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=148679000241434196806065301444801950136, time:1750767890.7508476s req_ids:[8] -DEBUG 06-24 20:24:50 [manager.py:391] -ERROR 06-24 20:24:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:203.59206199645996ms total_cost_time:203.63521575927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10934 prompt_cache_len:5151 prompt_cache_ratio:0.47109932321199927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 -DEBUG 06-24 20:24:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:50 [manager.py:224] router recive req id 8 cost time 0.10827875137329102 s -INFO 06-24 20:24:50 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s -DEBUG 06-24 20:24:50 [manager.py:391] Prefill Batch: batch_id=105561534597595022551962044313357621383, time:1750767890.961276s req_ids:[8] -DEBUG 06-24 20:24:50 [manager.py:391] -ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:50 lightllm_req_id:8 first_token_cost:204.60128784179688ms total_cost_time:204.64539527893066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10935 prompt_cache_len:5151 prompt_cache_ratio:0.4710562414266118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 -DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10811114311218262 s -INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11023640632629395 s -DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=135692795865675350237709745462432514149, time:1750767891.1706388s req_ids:[8] -DEBUG 06-24 20:24:51 [manager.py:391] -ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:211.25006675720215ms total_cost_time:211.29322052001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10936 prompt_cache_len:5151 prompt_cache_ratio:0.47101316752011707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 -DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10840392112731934 s -INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11039328575134277 s -DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=32380444910491916643482462498259533801, time:1750767891.3868294s req_ids:[8] -DEBUG 06-24 20:24:51 [manager.py:391] -ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:211.5461826324463ms total_cost_time:211.58933639526367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10937 prompt_cache_len:5151 prompt_cache_ratio:0.47097010149035384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 -DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10880613327026367 s -INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11082148551940918 s -DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=68937887959954623273703679621972462857, time:1750767891.6181188s req_ids:[8] -DEBUG 06-24 20:24:51 [manager.py:391] -ERROR 06-24 20:24:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:220.68047523498535ms total_cost_time:220.72291374206543ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10938 prompt_cache_len:5151 prompt_cache_ratio:0.47092704333516183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 -DEBUG 06-24 20:24:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:51 [manager.py:224] router recive req id 8 cost time 0.10874199867248535 s -INFO 06-24 20:24:51 [manager.py:68] detokenization recv req id 8 cost time 0.11058640480041504 s -DEBUG 06-24 20:24:51 [manager.py:391] Prefill Batch: batch_id=294588956503756199855085351343378663918, time:1750767891.832062s req_ids:[8] -DEBUG 06-24 20:24:51 [manager.py:391] -ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:51 lightllm_req_id:8 first_token_cost:377.41804122924805ms total_cost_time:377.4607181549072ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10939 prompt_cache_len:5151 prompt_cache_ratio:0.4708839930523814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 -DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10870814323425293 s -INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.11069846153259277 s -DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=201435220906460308546142884443913596009, time:1750767892.2160795s req_ids:[8] -DEBUG 06-24 20:24:52 [manager.py:391] -ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:207.98945426940918ms total_cost_time:208.09245109558105ms,out_token_counter:1 mean_per_token_cost_time: 0.102996826171875ms prompt_token_num:10940 prompt_cache_len:5151 prompt_cache_ratio:0.47084095063985376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 -DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10914111137390137 s -INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.1111905574798584 s -DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=116164649243462971563752933598663948326, time:1750767892.4299443s req_ids:[8] -DEBUG 06-24 20:24:52 [manager.py:391] -ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:211.5030288696289ms total_cost_time:211.54499053955078ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10941 prompt_cache_len:5151 prompt_cache_ratio:0.4707979160954209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 -DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10729479789733887 s -INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.10904979705810547 s -DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=91735492980976720210806838287820443763, time:1750767892.6713095s req_ids:[8] -DEBUG 06-24 20:24:52 [manager.py:391] -ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:223.03104400634766ms total_cost_time:223.07586669921875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10942 prompt_cache_len:5151 prompt_cache_ratio:0.4707548894169256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 -DEBUG 06-24 20:24:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:52 [manager.py:224] router recive req id 8 cost time 0.10902881622314453 s -INFO 06-24 20:24:52 [manager.py:68] detokenization recv req id 8 cost time 0.1109917163848877 s -DEBUG 06-24 20:24:52 [manager.py:391] Prefill Batch: batch_id=176785476877511775210970924633655974045, time:1750767892.8790798s req_ids:[8] -DEBUG 06-24 20:24:52 [manager.py:391] -ERROR 06-24 20:24:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:204.60939407348633ms total_cost_time:204.6496868133545ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:10943 prompt_cache_len:5151 prompt_cache_ratio:0.4707118706022115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 -DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.10828566551208496 s -INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.1101381778717041 s -DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=171353771779874514667167471840540004477, time:1750767893.0931783s req_ids:[8] -DEBUG 06-24 20:24:53 [manager.py:391] -DEBUG 06-24 20:24:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 45432.913 tokens/s -DEBUG 06-24 20:24:53 [stats.py:37] Avg prompt tokens throughput: 45424.596 tokens/s -DEBUG 06-24 20:24:53 [stats.py:37] Avg generate tokens throughput: 8.317 tokens/s -ERROR 06-24 20:24:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:52 lightllm_req_id:8 first_token_cost:205.48677444458008ms total_cost_time:205.52825927734375ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:10944 prompt_cache_len:5151 prompt_cache_ratio:0.4706688596491228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 -INFO 06-24 20:24:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:24:53 [statics_utils.py:24] mean first cost: 228.46504027549784 ms -INFO 06-24 20:24:53 [statics_utils.py:24] mean per token cost: 0.0657624344455944 ms -DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.1084754467010498 s -INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.11040163040161133 s -INFO 06-24 20:24:53 [manager.py:620] left req id 8can release False refcount 3 -DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=26652434400936807727026221652818171390, time:1750767893.3039105s req_ids:[8] -DEBUG 06-24 20:24:53 [manager.py:391] -ERROR 06-24 20:24:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 first_token_cost:209.181547164917ms total_cost_time:209.22517776489258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10945 prompt_cache_len:5151 prompt_cache_ratio:0.4706258565555048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 -DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.10866808891296387 s -INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.11049532890319824 s -DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=161599805650350540285922550843091259, time:1750767893.5191762s req_ids:[8] -DEBUG 06-24 20:24:53 [manager.py:391] -ERROR 06-24 20:24:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 first_token_cost:390.44833183288574ms total_cost_time:390.49315452575684ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10946 prompt_cache_len:5151 prompt_cache_ratio:0.47058286131920335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 -DEBUG 06-24 20:24:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:53 [manager.py:224] router recive req id 8 cost time 0.10817718505859375 s -INFO 06-24 20:24:53 [manager.py:68] detokenization recv req id 8 cost time 0.11020755767822266 s -DEBUG 06-24 20:24:53 [manager.py:391] Prefill Batch: batch_id=239773195633103254504682491980729821090, time:1750767893.916511s req_ids:[8] -DEBUG 06-24 20:24:53 [manager.py:391] -ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:53 lightllm_req_id:8 first_token_cost:209.3362808227539ms total_cost_time:209.3815803527832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:10947 prompt_cache_len:5151 prompt_cache_ratio:0.47053987393806523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 -DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10760760307312012 s -INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.1096186637878418 s -DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=35663467200833528578369743059649874493, time:1750767894.1325288s req_ids:[8] -DEBUG 06-24 20:24:54 [manager.py:391] -ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:190.5364990234375ms total_cost_time:190.5820369720459ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:10948 prompt_cache_len:5151 prompt_cache_ratio:0.4704968944099379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 -DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10857868194580078 s -INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11052370071411133 s -DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=300993446520370801341520454025337202270, time:1750767894.3283625s req_ids:[8] -DEBUG 06-24 20:24:54 [manager.py:391] -ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:194.69308853149414ms total_cost_time:194.73552703857422ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10949 prompt_cache_len:5151 prompt_cache_ratio:0.47045392273266967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 -DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10808134078979492 s -INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012125015258789 s -DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=41200956394980182714851784649436247696, time:1750767894.5304635s req_ids:[8] -DEBUG 06-24 20:24:54 [manager.py:391] -ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:205.66058158874512ms total_cost_time:205.69872856140137ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:10950 prompt_cache_len:5151 prompt_cache_ratio:0.4704109589041096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 -DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10917282104492188 s -INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11129140853881836 s -DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=93226806135687424030961946094031877936, time:1750767894.7577457s req_ids:[8] -DEBUG 06-24 20:24:54 [manager.py:391] -ERROR 06-24 20:24:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:220.02291679382324ms total_cost_time:220.06773948669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:10951 prompt_cache_len:5151 prompt_cache_ratio:0.47036800292210756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 -DEBUG 06-24 20:24:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:54 [manager.py:224] router recive req id 8 cost time 0.10859441757202148 s -INFO 06-24 20:24:54 [manager.py:68] detokenization recv req id 8 cost time 0.11052250862121582 s -DEBUG 06-24 20:24:54 [manager.py:391] Prefill Batch: batch_id=284450256605183148976424778323510617375, time:1750767894.9723022s req_ids:[8] -DEBUG 06-24 20:24:54 [manager.py:391] -ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:54 lightllm_req_id:8 first_token_cost:209.03420448303223ms total_cost_time:209.0754508972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:10952 prompt_cache_len:5151 prompt_cache_ratio:0.47032505478451425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 -DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:55 [manager.py:224] router recive req id 8 cost time 0.10802817344665527 s -INFO 06-24 20:24:55 [manager.py:68] detokenization recv req id 8 cost time 0.10996294021606445 s -DEBUG 06-24 20:24:55 [manager.py:391] Prefill Batch: batch_id=140589585542792401983205584725270870725, time:1750767895.199469s req_ids:[8] -DEBUG 06-24 20:24:55 [manager.py:391] -ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:390.2087211608887ms total_cost_time:390.25187492370605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10953 prompt_cache_len:5151 prompt_cache_ratio:0.47028211448918106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 -DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:55 [manager.py:224] router recive req id 8 cost time 0.1087501049041748 s -INFO 06-24 20:24:55 [manager.py:68] detokenization recv req id 8 cost time 0.11075949668884277 s -DEBUG 06-24 20:24:55 [manager.py:391] Prefill Batch: batch_id=237743788539726318792599382159318905158, time:1750767895.583275s req_ids:[8] -DEBUG 06-24 20:24:55 [manager.py:391] -DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:24:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:205.86919784545898ms total_cost_time:205.91425895690918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:10954 prompt_cache_len:5151 prompt_cache_ratio:0.4702391820339602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 -DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:55 [manager.py:224] router recive req id 8 cost time 0.10922002792358398 s -INFO 06-24 20:24:55 [manager.py:68] detokenization recv req id 8 cost time 0.11127829551696777 s -DEBUG 06-24 20:24:55 [manager.py:391] Prefill Batch: batch_id=182349176677916534389067219809131330255, time:1750767895.795664s req_ids:[8] -DEBUG 06-24 20:24:55 [manager.py:391] -ERROR 06-24 20:24:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:205.23667335510254ms total_cost_time:205.27982711791992ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10955 prompt_cache_len:5151 prompt_cache_ratio:0.4701962574167047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 -DEBUG 06-24 20:24:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.1088104248046875 s -INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.11084842681884766 s -DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=92339679472969628116797528462646294274, time:1750767896.0085642s req_ids:[8] -DEBUG 06-24 20:24:56 [manager.py:391] -ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:55 lightllm_req_id:8 first_token_cost:222.320556640625ms total_cost_time:222.3665714263916ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10956 prompt_cache_len:5151 prompt_cache_ratio:0.47015334063526837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 -DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s -INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.10983538627624512 s -DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=198390400286481662728399578131965336993, time:1750767896.2364495s req_ids:[8] -DEBUG 06-24 20:24:56 [manager.py:391] -ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:208.24909210205078ms total_cost_time:208.29272270202637ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10957 prompt_cache_len:5151 prompt_cache_ratio:0.4701104316875057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 -DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10831904411315918 s -INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.11027884483337402 s -DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=207322269254523254893023325303703983713, time:1750767896.451326s req_ids:[8] -DEBUG 06-24 20:24:56 [manager.py:391] -ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:208.8165283203125ms total_cost_time:208.85992050170898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10958 prompt_cache_len:5151 prompt_cache_ratio:0.47006753057127215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 -DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10866928100585938 s -INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.11073923110961914 s -DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=246486173048261224009126858526853336847, time:1750767896.6664515s req_ids:[8] -DEBUG 06-24 20:24:56 [manager.py:391] -ERROR 06-24 20:24:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:208.4944248199463ms total_cost_time:208.53853225708008ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10959 prompt_cache_len:5151 prompt_cache_ratio:0.4700246372844238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 -DEBUG 06-24 20:24:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:56 [manager.py:224] router recive req id 8 cost time 0.10956621170043945 s -INFO 06-24 20:24:56 [manager.py:68] detokenization recv req id 8 cost time 0.1117401123046875 s -DEBUG 06-24 20:24:56 [manager.py:391] Prefill Batch: batch_id=191009822351454702493897905849106268357, time:1750767896.8803384s req_ids:[8] -DEBUG 06-24 20:24:56 [manager.py:391] -ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:56 lightllm_req_id:8 first_token_cost:373.00777435302734ms total_cost_time:373.05164337158203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10960 prompt_cache_len:5151 prompt_cache_ratio:0.4699817518248175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 -DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.1080784797668457 s -INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098487377166748 s -DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=1643617299687514190975862263622052154, time:1750767897.2585936s req_ids:[8] -DEBUG 06-24 20:24:57 [manager.py:391] -ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:207.5824737548828ms total_cost_time:207.61513710021973ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:10961 prompt_cache_len:5151 prompt_cache_ratio:0.4699388741903111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 -DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.10710334777832031 s -INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.10876941680908203 s -DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=125416550724025508630337956911477623945, time:1750767897.4721107s req_ids:[8] -DEBUG 06-24 20:24:57 [manager.py:391] -ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:208.44197273254395ms total_cost_time:208.48631858825684ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10962 prompt_cache_len:5151 prompt_cache_ratio:0.469896004378763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 -DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.1078500747680664 s -INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.10970807075500488 s -DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=302507011890399032147690736652154968658, time:1750767897.6886485s req_ids:[8] -DEBUG 06-24 20:24:57 [manager.py:391] -ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:208.55379104614258ms total_cost_time:208.59766006469727ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10963 prompt_cache_len:5151 prompt_cache_ratio:0.4698531423880325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 -DEBUG 06-24 20:24:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:57 [manager.py:224] router recive req id 8 cost time 0.10861968994140625 s -INFO 06-24 20:24:57 [manager.py:68] detokenization recv req id 8 cost time 0.11066341400146484 s -DEBUG 06-24 20:24:57 [manager.py:391] Prefill Batch: batch_id=9134990359299375051154067743545609088, time:1750767897.9038312s req_ids:[8] -DEBUG 06-24 20:24:57 [manager.py:391] -ERROR 06-24 20:24:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:57 lightllm_req_id:8 first_token_cost:204.0257453918457ms total_cost_time:204.0688991546631ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10964 prompt_cache_len:5151 prompt_cache_ratio:0.4698102882159796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 -DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10914182662963867 s -INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.11113429069519043 s -DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=150727366386609507334011677771605134922, time:1750767898.1154895s req_ids:[8] -DEBUG 06-24 20:24:58 [manager.py:391] -ERROR 06-24 20:24:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:203.5074234008789ms total_cost_time:203.54938507080078ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10965 prompt_cache_len:5151 prompt_cache_ratio:0.4697674418604651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 -DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10922551155090332 s -INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.11120390892028809 s -DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=208945096398424598201240716281623350743, time:1750767898.3277435s req_ids:[8] -DEBUG 06-24 20:24:58 [manager.py:391] -ERROR 06-24 20:24:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:219.76995468139648ms total_cost_time:219.81215476989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10966 prompt_cache_len:5151 prompt_cache_ratio:0.46972460331935073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 -DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10775995254516602 s -INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.10959863662719727 s -DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=41783980007221942872971874837054524652, time:1750767898.5518513s req_ids:[8] -DEBUG 06-24 20:24:58 [manager.py:391] -ERROR 06-24 20:24:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:24:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:375.5145072937012ms total_cost_time:375.55885314941406ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:10967 prompt_cache_len:5151 prompt_cache_ratio:0.46968177259049876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 -DEBUG 06-24 20:24:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:58 [manager.py:224] router recive req id 8 cost time 0.10774803161621094 s -INFO 06-24 20:24:58 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s -DEBUG 06-24 20:24:58 [manager.py:391] Prefill Batch: batch_id=112061913371024427504242625445728366824, time:1750767898.9353757s req_ids:[8] -DEBUG 06-24 20:24:58 [manager.py:391] -ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:58 lightllm_req_id:8 first_token_cost:206.90059661865234ms total_cost_time:206.94255828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:10968 prompt_cache_len:5151 prompt_cache_ratio:0.46963894967177244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 -DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10899996757507324 s -INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11074209213256836 s -DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=26394943478729843253093543010660655240, time:1750767899.1584845s req_ids:[8] -DEBUG 06-24 20:24:59 [manager.py:391] -ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:229.39252853393555ms total_cost_time:229.43568229675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10969 prompt_cache_len:5151 prompt_cache_ratio:0.46959613456103566 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 -DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10863995552062988 s -INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11060523986816406 s -DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=327490710558609932982459782384752831582, time:1750767899.383993s req_ids:[8] -DEBUG 06-24 20:24:59 [manager.py:391] -ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:205.27219772338867ms total_cost_time:205.31272888183594ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:10970 prompt_cache_len:5151 prompt_cache_ratio:0.4695533272561531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 -DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s -INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11079931259155273 s -DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=299803539333341661093336690054214816510, time:1750767899.5986736s req_ids:[8] -DEBUG 06-24 20:24:59 [manager.py:391] -ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:202.96120643615723ms total_cost_time:203.0034065246582ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10971 prompt_cache_len:5151 prompt_cache_ratio:0.4695105277549904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 -DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:24:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:24:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:24:59 [manager.py:224] router recive req id 8 cost time 0.10849213600158691 s -INFO 06-24 20:24:59 [manager.py:68] detokenization recv req id 8 cost time 0.11058807373046875 s -DEBUG 06-24 20:24:59 [manager.py:391] Prefill Batch: batch_id=65341757955831360629614135361614667782, time:1750767899.810125s req_ids:[8] -DEBUG 06-24 20:24:59 [manager.py:391] -ERROR 06-24 20:24:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:24:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:24:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:204.33759689331055ms total_cost_time:204.38075065612793ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10972 prompt_cache_len:5151 prompt_cache_ratio:0.46946773605541375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:24:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 -DEBUG 06-24 20:24:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:24:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:24:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:24:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:24:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.10849738121032715 s -INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11044692993164062 s -DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=132520184134146435247367707385271597786, time:1750767900.0213165s req_ids:[8] -DEBUG 06-24 20:25:00 [manager.py:391] -ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:24:59 lightllm_req_id:8 first_token_cost:207.17787742614746ms total_cost_time:207.22103118896484ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10973 prompt_cache_len:5151 prompt_cache_ratio:0.46942495215529023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 -DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.10801076889038086 s -INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11015439033508301 s -DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=125508405662339974468075661643086097510, time:1750767900.2417955s req_ids:[8] -DEBUG 06-24 20:25:00 [manager.py:391] -ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:387.3906135559082ms total_cost_time:387.434720993042ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:10974 prompt_cache_len:5151 prompt_cache_ratio:0.4693821760524877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 -DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.10835576057434082 s -INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11040234565734863 s -DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=186726039870728504177576506634984368872, time:1750767900.6309497s req_ids:[8] -DEBUG 06-24 20:25:00 [manager.py:391] -ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:206.8312168121338ms total_cost_time:206.87341690063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10975 prompt_cache_len:5151 prompt_cache_ratio:0.4693394077448747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 -DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:00 [manager.py:224] router recive req id 8 cost time 0.1087336540222168 s -INFO 06-24 20:25:00 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s -DEBUG 06-24 20:25:00 [manager.py:391] Prefill Batch: batch_id=24915973430362592166573701504421552533, time:1750767900.8476505s req_ids:[8] -DEBUG 06-24 20:25:00 [manager.py:391] -ERROR 06-24 20:25:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:208.52088928222656ms total_cost_time:208.56475830078125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10976 prompt_cache_len:5151 prompt_cache_ratio:0.4692966472303207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 -DEBUG 06-24 20:25:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10948896408081055 s -INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.11144614219665527 s -DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=44386261259921118586040133650130393220, time:1750767901.0774035s req_ids:[8] -DEBUG 06-24 20:25:01 [manager.py:391] -ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:00 lightllm_req_id:8 first_token_cost:226.48358345031738ms total_cost_time:226.52602195739746ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10977 prompt_cache_len:5151 prompt_cache_ratio:0.4692538945066958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 -DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10762333869934082 s -INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s -DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=59637866277869091910743124309150793871, time:1750767901.297847s req_ids:[8] -DEBUG 06-24 20:25:01 [manager.py:391] -ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:202.8827667236328ms total_cost_time:202.9266357421875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10978 prompt_cache_len:5151 prompt_cache_ratio:0.469211149571871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 -DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10781240463256836 s -INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.10973000526428223 s -DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=162252473971949375278815576958788518130, time:1750767901.50635s req_ids:[8] -DEBUG 06-24 20:25:01 [manager.py:391] -DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:208.74619483947754ms total_cost_time:208.78958702087402ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10979 prompt_cache_len:5151 prompt_cache_ratio:0.469168412423718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 -DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.10882711410522461 s -INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.11078834533691406 s -DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=179892210370198919613086942812545402799, time:1750767901.721192s req_ids:[8] -DEBUG 06-24 20:25:01 [manager.py:391] -ERROR 06-24 20:25:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:195.91569900512695ms total_cost_time:195.95909118652344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10980 prompt_cache_len:5151 prompt_cache_ratio:0.46912568306010927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 -DEBUG 06-24 20:25:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:01 [manager.py:224] router recive req id 8 cost time 0.108612060546875 s -INFO 06-24 20:25:01 [manager.py:68] detokenization recv req id 8 cost time 0.11055612564086914 s -DEBUG 06-24 20:25:01 [manager.py:391] Prefill Batch: batch_id=223490053546818487648360716521758253475, time:1750767901.9234662s req_ids:[8] -DEBUG 06-24 20:25:01 [manager.py:391] -ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:01 lightllm_req_id:8 first_token_cost:367.02561378479004ms total_cost_time:367.0690059661865ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10981 prompt_cache_len:5151 prompt_cache_ratio:0.46908296147891815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 -DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10926270484924316 s -INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.11128449440002441 s -DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=176429076499462402004017456885856899520, time:1750767902.2992656s req_ids:[8] -DEBUG 06-24 20:25:02 [manager.py:391] -ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:205.21974563598633ms total_cost_time:205.2614688873291ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:10982 prompt_cache_len:5151 prompt_cache_ratio:0.46904024767801855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 -DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10751652717590332 s -INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.10948038101196289 s -DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=212953398713861741680910848673347632066, time:1750767902.5192158s req_ids:[8] -DEBUG 06-24 20:25:02 [manager.py:391] -ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:211.65871620178223ms total_cost_time:211.7023468017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10983 prompt_cache_len:5151 prompt_cache_ratio:0.46899754165528545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 -DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10882115364074707 s -INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.11083841323852539 s -DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=186513123454523548710071360104218554888, time:1750767902.7329795s req_ids:[8] -DEBUG 06-24 20:25:02 [manager.py:391] -ERROR 06-24 20:25:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:208.01234245300293ms total_cost_time:208.05621147155762ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10984 prompt_cache_len:5151 prompt_cache_ratio:0.46895484340859434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 -DEBUG 06-24 20:25:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:02 [manager.py:224] router recive req id 8 cost time 0.10842680931091309 s -INFO 06-24 20:25:02 [manager.py:68] detokenization recv req id 8 cost time 0.11035442352294922 s -DEBUG 06-24 20:25:02 [manager.py:391] Prefill Batch: batch_id=203513209906631409851744046602084943175, time:1750767902.963063s req_ids:[8] -DEBUG 06-24 20:25:02 [manager.py:391] -ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:02 lightllm_req_id:8 first_token_cost:220.3836441040039ms total_cost_time:220.43967247009277ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:10985 prompt_cache_len:5151 prompt_cache_ratio:0.46891215293582156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 -DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.10895872116088867 s -INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.11109042167663574 s -DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=167347070558806449834273486071973947720, time:1750767903.1761827s req_ids:[8] -DEBUG 06-24 20:25:03 [manager.py:391] -DEBUG 06-24 20:25:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 45682.125 tokens/s -DEBUG 06-24 20:25:03 [stats.py:37] Avg prompt tokens throughput: 45673.795 tokens/s -DEBUG 06-24 20:25:03 [stats.py:37] Avg generate tokens throughput: 8.330 tokens/s -ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:205.6446075439453ms total_cost_time:205.7044506072998ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:10986 prompt_cache_len:5151 prompt_cache_ratio:0.4688694702348443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 -DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.10750818252563477 s -INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.10954999923706055 s -DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=38524974098975734154968948485018014751, time:1750767903.388591s req_ids:[8] -DEBUG 06-24 20:25:03 [manager.py:391] -ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:202.99053192138672ms total_cost_time:203.0327320098877ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:10987 prompt_cache_len:5151 prompt_cache_ratio:0.46882679530354054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 -DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.2085108757019043 s -INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.2102367877960205 s -DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=310771573391976478561448508599262084015, time:1750767903.7340581s req_ids:[8] -DEBUG 06-24 20:25:03 [manager.py:391] -ERROR 06-24 20:25:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:342.49186515808105ms total_cost_time:342.53549575805664ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10988 prompt_cache_len:5151 prompt_cache_ratio:0.46878412813978887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 -DEBUG 06-24 20:25:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:03 [manager.py:224] router recive req id 8 cost time 0.10930466651916504 s -INFO 06-24 20:25:03 [manager.py:68] detokenization recv req id 8 cost time 0.11124801635742188 s -DEBUG 06-24 20:25:03 [manager.py:391] Prefill Batch: batch_id=284015968686562445575221995188374653639, time:1750767903.9495783s req_ids:[8] -DEBUG 06-24 20:25:03 [manager.py:391] -ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:03 lightllm_req_id:8 first_token_cost:201.7674446105957ms total_cost_time:201.80988311767578ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:10989 prompt_cache_len:5151 prompt_cache_ratio:0.4687414687414687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 -DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10774779319763184 s -INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.10946965217590332 s -DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=32008894832648051164680964201439440802, time:1750767904.1595325s req_ids:[8] -DEBUG 06-24 20:25:04 [manager.py:391] -ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:162.09101676940918ms total_cost_time:162.13393211364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:10990 prompt_cache_len:5151 prompt_cache_ratio:0.46869881710646044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 -DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10866045951843262 s -INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s -DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=101473945456999423199478965036264695402, time:1750767904.3298225s req_ids:[8] -DEBUG 06-24 20:25:04 [manager.py:391] -ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:189.44168090820312ms total_cost_time:189.4848346710205ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10991 prompt_cache_len:5151 prompt_cache_ratio:0.4686561732326449 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 -DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10752391815185547 s -INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.10953330993652344 s -DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=239067842247763105509475293057826580218, time:1750767904.527247s req_ids:[8] -DEBUG 06-24 20:25:04 [manager.py:391] -ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:198.7326145172119ms total_cost_time:198.7764835357666ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10992 prompt_cache_len:5151 prompt_cache_ratio:0.46861353711790393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 -DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.10812735557556152 s -INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.11020588874816895 s -DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=332050113503828533682898100832298004327, time:1750767904.73525s req_ids:[8] -DEBUG 06-24 20:25:04 [manager.py:391] -ERROR 06-24 20:25:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:206.79402351379395ms total_cost_time:206.83789253234863ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:10993 prompt_cache_len:5151 prompt_cache_ratio:0.4685709087601201 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 -DEBUG 06-24 20:25:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:04 [manager.py:224] router recive req id 8 cost time 0.1077578067779541 s -INFO 06-24 20:25:04 [manager.py:68] detokenization recv req id 8 cost time 0.10988259315490723 s -DEBUG 06-24 20:25:04 [manager.py:391] Prefill Batch: batch_id=290831358805764053386973951263269502628, time:1750767904.9486945s req_ids:[8] -DEBUG 06-24 20:25:04 [manager.py:391] -ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:04 lightllm_req_id:8 first_token_cost:203.80306243896484ms total_cost_time:203.84669303894043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:10994 prompt_cache_len:5151 prompt_cache_ratio:0.4685282881571766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 -DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:05 [manager.py:224] router recive req id 8 cost time 0.10921978950500488 s -INFO 06-24 20:25:05 [manager.py:68] detokenization recv req id 8 cost time 0.1112368106842041 s -DEBUG 06-24 20:25:05 [manager.py:391] Prefill Batch: batch_id=246283051504272877428242662469774744072, time:1750767905.1610184s req_ids:[8] -DEBUG 06-24 20:25:05 [manager.py:391] -ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:204.7407627105713ms total_cost_time:204.7867774963379ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:10995 prompt_cache_len:5151 prompt_cache_ratio:0.4684856753069577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 -DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:05 [manager.py:224] router recive req id 8 cost time 0.3110203742980957 s -INFO 06-24 20:25:05 [manager.py:68] detokenization recv req id 8 cost time 0.3131449222564697 s -DEBUG 06-24 20:25:05 [manager.py:391] Prefill Batch: batch_id=236199623741823595863242590982543956334, time:1750767905.586208s req_ids:[8] -DEBUG 06-24 20:25:05 [manager.py:391] -ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:426.6078472137451ms total_cost_time:426.6524314880371ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:10996 prompt_cache_len:5151 prompt_cache_ratio:0.4684430702073481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 -DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:05 [manager.py:224] router recive req id 8 cost time 0.10963702201843262 s -INFO 06-24 20:25:05 [manager.py:68] detokenization recv req id 8 cost time 0.11136126518249512 s -DEBUG 06-24 20:25:05 [manager.py:391] Prefill Batch: batch_id=296991886895930450874436553332949911822, time:1750767905.8101702s req_ids:[8] -DEBUG 06-24 20:25:05 [manager.py:391] -ERROR 06-24 20:25:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:209.4876766204834ms total_cost_time:209.53035354614258ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:10997 prompt_cache_len:5151 prompt_cache_ratio:0.46840047285623354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 -DEBUG 06-24 20:25:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10854721069335938 s -INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11014699935913086 s -DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=315968899715823804983495787382043978343, time:1750767906.032475s req_ids:[8] -DEBUG 06-24 20:25:06 [manager.py:391] -ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:05 lightllm_req_id:8 first_token_cost:171.38123512268066ms total_cost_time:171.42462730407715ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:10998 prompt_cache_len:5151 prompt_cache_ratio:0.4683578832515003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 -DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s -INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.10939502716064453 s -DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=32366584276683570796796148143078077241, time:1750767906.2055047s req_ids:[8] -DEBUG 06-24 20:25:06 [manager.py:391] -ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:202.5439739227295ms total_cost_time:202.58712768554688ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:10999 prompt_cache_len:5151 prompt_cache_ratio:0.46831530139103555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 -DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10909199714660645 s -INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11112141609191895 s -DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=159689113489781304891688737690563233235, time:1750767906.4192116s req_ids:[8] -DEBUG 06-24 20:25:06 [manager.py:391] -ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:215.77763557434082ms total_cost_time:215.8198356628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11000 prompt_cache_len:5151 prompt_cache_ratio:0.4682727272727273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 -DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.1086890697479248 s -INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11051034927368164 s -DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=293444267596285824321227691670693505253, time:1750767906.6461842s req_ids:[8] -DEBUG 06-24 20:25:06 [manager.py:391] -ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:206.1319351196289ms total_cost_time:206.1760425567627ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11001 prompt_cache_len:5151 prompt_cache_ratio:0.4682301608944641 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 -DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:06 [manager.py:224] router recive req id 8 cost time 0.10898375511169434 s -INFO 06-24 20:25:06 [manager.py:68] detokenization recv req id 8 cost time 0.11067986488342285 s -DEBUG 06-24 20:25:06 [manager.py:391] Prefill Batch: batch_id=215899965806028378155742028925079266761, time:1750767906.8533325s req_ids:[8] -DEBUG 06-24 20:25:06 [manager.py:391] -ERROR 06-24 20:25:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:206.87294006347656ms total_cost_time:206.91752433776855ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11002 prompt_cache_len:5151 prompt_cache_ratio:0.46818760225413564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 -DEBUG 06-24 20:25:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.1082611083984375 s -INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.11000418663024902 s -DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=141023548823535960373480117120129702161, time:1750767907.0671422s req_ids:[8] -DEBUG 06-24 20:25:07 [manager.py:391] -ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:06 lightllm_req_id:8 first_token_cost:354.72846031188965ms total_cost_time:354.7663688659668ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:11003 prompt_cache_len:5151 prompt_cache_ratio:0.4681450513496319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 -DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.10873866081237793 s -INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.11034965515136719 s -DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=13423068330213695900549575188583955766, time:1750767907.4289787s req_ids:[8] -DEBUG 06-24 20:25:07 [manager.py:391] -DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:203.64618301391602ms total_cost_time:203.690767288208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11004 prompt_cache_len:5151 prompt_cache_ratio:0.4681025081788441 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 -DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.10787296295166016 s -INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.10967397689819336 s -DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=301726762223398247403587400384164024815, time:1750767907.6419141s req_ids:[8] -DEBUG 06-24 20:25:07 [manager.py:391] -ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:210.51669120788574ms total_cost_time:210.57939529418945ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:11005 prompt_cache_len:5151 prompt_cache_ratio:0.46805997273966377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 -DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:07 [manager.py:224] router recive req id 8 cost time 0.10864424705505371 s -INFO 06-24 20:25:07 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s -DEBUG 06-24 20:25:07 [manager.py:391] Prefill Batch: batch_id=166441483505318824770184638606147183388, time:1750767907.8575377s req_ids:[8] -DEBUG 06-24 20:25:07 [manager.py:391] -ERROR 06-24 20:25:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:202.0699977874756ms total_cost_time:202.12650299072266ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11006 prompt_cache_len:5151 prompt_cache_ratio:0.46801744502998366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 -DEBUG 06-24 20:25:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.10848855972290039 s -INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.11011743545532227 s -DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=2355817118054547944584222565196555268, time:1750767908.0685067s req_ids:[8] -DEBUG 06-24 20:25:08 [manager.py:391] -ERROR 06-24 20:25:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:07 lightllm_req_id:8 first_token_cost:200.2277374267578ms total_cost_time:200.2713680267334ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11007 prompt_cache_len:5151 prompt_cache_ratio:0.46797492504769694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 -DEBUG 06-24 20:25:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.10778975486755371 s -INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.10967254638671875 s -DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=126171459759067692380093046443747151458, time:1750767908.2791915s req_ids:[8] -DEBUG 06-24 20:25:08 [manager.py:391] -ERROR 06-24 20:25:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 first_token_cost:208.7085247039795ms total_cost_time:208.7533473968506ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11008 prompt_cache_len:5151 prompt_cache_ratio:0.4679324127906977 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 -DEBUG 06-24 20:25:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.10936236381530762 s -INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.11108756065368652 s -DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=125602237716375221927895824662390007591, time:1750767908.4954042s req_ids:[8] -DEBUG 06-24 20:25:08 [manager.py:391] -ERROR 06-24 20:25:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 first_token_cost:221.47417068481445ms total_cost_time:221.51923179626465ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11009 prompt_cache_len:5151 prompt_cache_ratio:0.46788990825688076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 -DEBUG 06-24 20:25:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:08 [manager.py:224] router recive req id 8 cost time 0.30959033966064453 s -INFO 06-24 20:25:08 [manager.py:68] detokenization recv req id 8 cost time 0.3115084171295166 s -DEBUG 06-24 20:25:08 [manager.py:391] Prefill Batch: batch_id=265292549760178803862155251834285446603, time:1750767908.9287045s req_ids:[8] -DEBUG 06-24 20:25:08 [manager.py:391] -ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:08 lightllm_req_id:8 first_token_cost:414.170503616333ms total_cost_time:414.2143726348877ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11010 prompt_cache_len:5151 prompt_cache_ratio:0.4678474114441417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 -DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10877442359924316 s -INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11062455177307129 s -DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=140794219055071588158487676512780845043, time:1750767909.1559234s req_ids:[8] -DEBUG 06-24 20:25:09 [manager.py:391] -ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:211.37285232543945ms total_cost_time:211.42244338989258ms,out_token_counter:1 mean_per_token_cost_time: 0.049591064453125ms prompt_token_num:11011 prompt_cache_len:5151 prompt_cache_ratio:0.4678049223503769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 -DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10944294929504395 s -INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11141252517700195 s -DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=37054347876872359630693905365473309072, time:1750767909.3656437s req_ids:[8] -DEBUG 06-24 20:25:09 [manager.py:391] -ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:206.53414726257324ms total_cost_time:206.5865993499756ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:11012 prompt_cache_len:5151 prompt_cache_ratio:0.46776244097348346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 -DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10843896865844727 s -INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11033439636230469 s -DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=23044828172233357758090435000118500730, time:1750767909.5838363s req_ids:[8] -DEBUG 06-24 20:25:09 [manager.py:391] -ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:207.40246772766113ms total_cost_time:207.46231079101562ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:11013 prompt_cache_len:5151 prompt_cache_ratio:0.4677199673113593 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 -DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:09 [manager.py:224] router recive req id 8 cost time 0.10840630531311035 s -INFO 06-24 20:25:09 [manager.py:68] detokenization recv req id 8 cost time 0.11041784286499023 s -DEBUG 06-24 20:25:09 [manager.py:391] Prefill Batch: batch_id=127320018042921417183111771500204408536, time:1750767909.7965522s req_ids:[8] -DEBUG 06-24 20:25:09 [manager.py:391] -ERROR 06-24 20:25:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:204.6334743499756ms total_cost_time:204.69331741333008ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:11014 prompt_cache_len:5151 prompt_cache_ratio:0.46767750136190306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 -DEBUG 06-24 20:25:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.1079099178314209 s -INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.10983037948608398 s -DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=150388860240897527475752073312404715211, time:1750767910.0073388s req_ids:[8] -DEBUG 06-24 20:25:10 [manager.py:391] -ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:09 lightllm_req_id:8 first_token_cost:203.31454277038574ms total_cost_time:203.3684253692627ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:11015 prompt_cache_len:5151 prompt_cache_ratio:0.46763504312301407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 -DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.10911846160888672 s -INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.11112523078918457 s -DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=71343971786385498784624178730287540414, time:1750767910.2221158s req_ids:[8] -DEBUG 06-24 20:25:10 [manager.py:391] -ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:373.46410751342773ms total_cost_time:373.50988388061523ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11016 prompt_cache_len:5151 prompt_cache_ratio:0.4675925925925926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 -DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.10854005813598633 s -INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.11055278778076172 s -DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=340071322014091706224156906589741976364, time:1750767910.600805s req_ids:[8] -DEBUG 06-24 20:25:10 [manager.py:391] -ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:199.73111152648926ms total_cost_time:199.77545738220215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11017 prompt_cache_len:5151 prompt_cache_ratio:0.46755014976853954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 -DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:10 [manager.py:224] router recive req id 8 cost time 0.10879087448120117 s -INFO 06-24 20:25:10 [manager.py:68] detokenization recv req id 8 cost time 0.11083006858825684 s -DEBUG 06-24 20:25:10 [manager.py:391] Prefill Batch: batch_id=170133167789734103335699522806250943683, time:1750767910.8093421s req_ids:[8] -DEBUG 06-24 20:25:10 [manager.py:391] -ERROR 06-24 20:25:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:200.87313652038574ms total_cost_time:200.91700553894043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11018 prompt_cache_len:5151 prompt_cache_ratio:0.46750771464875657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 -DEBUG 06-24 20:25:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10863947868347168 s -INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.11050748825073242 s -DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=252096996960790285640134065687128052954, time:1750767911.0181422s req_ids:[8] -DEBUG 06-24 20:25:11 [manager.py:391] -ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:10 lightllm_req_id:8 first_token_cost:201.35903358459473ms total_cost_time:201.4024257659912ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11019 prompt_cache_len:5151 prompt_cache_ratio:0.4674652872311462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 -DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10793566703796387 s -INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.10989522933959961 s -DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=218576145005571795574538850280361506027, time:1750767911.2274828s req_ids:[8] -DEBUG 06-24 20:25:11 [manager.py:391] -ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:203.8097381591797ms total_cost_time:203.8710117340088ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:11020 prompt_cache_len:5151 prompt_cache_ratio:0.4674228675136116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 -DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10775423049926758 s -INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.10971641540527344 s -DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=17935944016900144544572488286861400602, time:1750767911.4394884s req_ids:[8] -DEBUG 06-24 20:25:11 [manager.py:391] -ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:203.8886547088623ms total_cost_time:203.94587516784668ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:11021 prompt_cache_len:5151 prompt_cache_ratio:0.4673804554940568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 -DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10902047157287598 s -INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.11106252670288086 s -DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=104053406936977228035362061297123958608, time:1750767911.6667821s req_ids:[8] -DEBUG 06-24 20:25:11 [manager.py:391] -ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:227.4765968322754ms total_cost_time:227.5223731994629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11022 prompt_cache_len:5151 prompt_cache_ratio:0.4673380511703865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 -DEBUG 06-24 20:25:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:11 [manager.py:224] router recive req id 8 cost time 0.10854411125183105 s -INFO 06-24 20:25:11 [manager.py:68] detokenization recv req id 8 cost time 0.110443115234375 s -DEBUG 06-24 20:25:11 [manager.py:391] Prefill Batch: batch_id=267124607333919216056254147109271017199, time:1750767911.8849857s req_ids:[8] -DEBUG 06-24 20:25:11 [manager.py:391] -ERROR 06-24 20:25:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:210.5708122253418ms total_cost_time:210.6163501739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11023 prompt_cache_len:5151 prompt_cache_ratio:0.4672956545405062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 -DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10791397094726562 s -INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.10988593101501465 s -DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=152399012636591430237987181666178272072, time:1750767912.1027646s req_ids:[8] -DEBUG 06-24 20:25:12 [manager.py:391] -ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:11 lightllm_req_id:8 first_token_cost:200.68717002868652ms total_cost_time:200.73223114013672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11024 prompt_cache_len:5151 prompt_cache_ratio:0.4672532656023222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 -DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10937929153442383 s -INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.11139297485351562 s -DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=147773605902685727343334701270613743133, time:1750767912.3132493s req_ids:[8] -DEBUG 06-24 20:25:12 [manager.py:391] -ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:366.1487102508545ms total_cost_time:366.1956787109375ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:11025 prompt_cache_len:5151 prompt_cache_ratio:0.4672108843537415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 -DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10840678215026855 s -INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.11041712760925293 s -DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=22438155217631460827440440838400347236, time:1750767912.6847022s req_ids:[8] -DEBUG 06-24 20:25:12 [manager.py:391] -ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:199.61023330688477ms total_cost_time:199.65887069702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:11026 prompt_cache_len:5151 prompt_cache_ratio:0.46716851079267185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 -DEBUG 06-24 20:25:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:12 [manager.py:224] router recive req id 8 cost time 0.10959672927856445 s -DEBUG 06-24 20:25:12 [manager.py:391] Prefill Batch: batch_id=47895542750260608607403270965683851029, time:1750767912.8924437s req_ids:[8] -DEBUG 06-24 20:25:12 [manager.py:391] -INFO 06-24 20:25:12 [manager.py:68] detokenization recv req id 8 cost time 0.11156988143920898 s -ERROR 06-24 20:25:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:201.08723640441895ms total_cost_time:201.13158226013184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11027 prompt_cache_len:5151 prompt_cache_ratio:0.46712614491702187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 -DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10831165313720703 s -INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.11024093627929688 s -DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=291493530953703379327499073516200638955, time:1750767913.1026127s req_ids:[8] -DEBUG 06-24 20:25:13 [manager.py:391] -ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:12 lightllm_req_id:8 first_token_cost:163.53178024291992ms total_cost_time:163.59210014343262ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:11028 prompt_cache_len:5151 prompt_cache_ratio:0.46708378672470074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 -DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10914325714111328 s -INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.11087656021118164 s -DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=330774999887377105411399425737617302340, time:1750767913.2731836s req_ids:[8] -DEBUG 06-24 20:25:13 [manager.py:391] -DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:25:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 46888.836 tokens/s -DEBUG 06-24 20:25:13 [stats.py:37] Avg prompt tokens throughput: 46880.319 tokens/s -DEBUG 06-24 20:25:13 [stats.py:37] Avg generate tokens throughput: 8.517 tokens/s -ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:162.34087944030762ms total_cost_time:162.3859405517578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11029 prompt_cache_len:5151 prompt_cache_ratio:0.46704143621361865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 -DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10892438888549805 s -INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.11088752746582031 s -DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=96850845085649871216899343035006957172, time:1750767913.4444385s req_ids:[8] -DEBUG 06-24 20:25:13 [manager.py:391] -ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:199.75852966308594ms total_cost_time:199.80239868164062ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11030 prompt_cache_len:5151 prompt_cache_ratio:0.4669990933816863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 -DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10799050331115723 s -INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s -DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=311921249046831160339682210038473275200, time:1750767913.6532633s req_ids:[8] -DEBUG 06-24 20:25:13 [manager.py:391] -ERROR 06-24 20:25:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:207.62228965759277ms total_cost_time:207.66639709472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11031 prompt_cache_len:5151 prompt_cache_ratio:0.4669567582268153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 -DEBUG 06-24 20:25:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:13 [manager.py:224] router recive req id 8 cost time 0.10788750648498535 s -INFO 06-24 20:25:13 [manager.py:68] detokenization recv req id 8 cost time 0.10983824729919434 s -DEBUG 06-24 20:25:13 [manager.py:391] Prefill Batch: batch_id=174245706390833024609646349073037230517, time:1750767913.8684309s req_ids:[8] -DEBUG 06-24 20:25:13 [manager.py:391] -ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:13 lightllm_req_id:8 first_token_cost:381.45899772644043ms total_cost_time:381.5033435821533ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11032 prompt_cache_len:5151 prompt_cache_ratio:0.4669144307469181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 -DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.10913538932800293 s -INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.11106061935424805 s -DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=265928188380368634629488755356110149565, time:1750767914.258097s req_ids:[8] -DEBUG 06-24 20:25:14 [manager.py:391] -ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:202.5585174560547ms total_cost_time:202.60119438171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11033 prompt_cache_len:5151 prompt_cache_ratio:0.46687211093990755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 -DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.10926342010498047 s -INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s -DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=158436454127789123398916276811409387678, time:1750767914.4693985s req_ids:[8] -DEBUG 06-24 20:25:14 [manager.py:391] -ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:211.4732265472412ms total_cost_time:211.50588989257812ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:11034 prompt_cache_len:5151 prompt_cache_ratio:0.4668297988036977 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 -DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.1082925796508789 s -INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.11020755767822266 s -DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=98900805525056713815711249718806638451, time:1750767914.6879966s req_ids:[8] -DEBUG 06-24 20:25:14 [manager.py:391] -ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:204.64658737182617ms total_cost_time:204.69045639038086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11035 prompt_cache_len:5151 prompt_cache_ratio:0.466787494336203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 -DEBUG 06-24 20:25:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:14 [manager.py:224] router recive req id 8 cost time 0.10917925834655762 s -INFO 06-24 20:25:14 [manager.py:68] detokenization recv req id 8 cost time 0.11108994483947754 s -DEBUG 06-24 20:25:14 [manager.py:391] Prefill Batch: batch_id=300692489463183932016449859238483987510, time:1750767914.8997097s req_ids:[8] -DEBUG 06-24 20:25:14 [manager.py:391] -ERROR 06-24 20:25:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:203.08613777160645ms total_cost_time:203.13096046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11036 prompt_cache_len:5151 prompt_cache_ratio:0.4667451975353389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 -DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.10804963111877441 s -INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.10994434356689453 s -DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=163049181786745697808295311451197836306, time:1750767915.1137922s req_ids:[8] -DEBUG 06-24 20:25:15 [manager.py:391] -ERROR 06-24 20:25:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:14 lightllm_req_id:8 first_token_cost:204.5729160308838ms total_cost_time:204.61726188659668ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11037 prompt_cache_len:5151 prompt_cache_ratio:0.46670290839902145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 -DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.10807108879089355 s -INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.10991740226745605 s -DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=320480809806303284898901144646976126681, time:1750767915.3226473s req_ids:[8] -DEBUG 06-24 20:25:15 [manager.py:391] -ERROR 06-24 20:25:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 first_token_cost:201.87854766845703ms total_cost_time:201.92313194274902ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11038 prompt_cache_len:5151 prompt_cache_ratio:0.4666606269251676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 -DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.1087639331817627 s -INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.11068940162658691 s -DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=69609281931315877409300297575156938131, time:1750767915.5422692s req_ids:[8] -DEBUG 06-24 20:25:15 [manager.py:391] -ERROR 06-24 20:25:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 first_token_cost:381.09540939331055ms total_cost_time:381.119966506958ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:11039 prompt_cache_len:5151 prompt_cache_ratio:0.4666183531116949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 -DEBUG 06-24 20:25:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:15 [manager.py:224] router recive req id 8 cost time 0.10785245895385742 s -INFO 06-24 20:25:15 [manager.py:68] detokenization recv req id 8 cost time 0.10984444618225098 s -DEBUG 06-24 20:25:15 [manager.py:391] Prefill Batch: batch_id=339656269842694207095725495281503580912, time:1750767915.9255369s req_ids:[8] -DEBUG 06-24 20:25:15 [manager.py:391] -ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:15 lightllm_req_id:8 first_token_cost:206.96425437927246ms total_cost_time:207.00764656066895ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11040 prompt_cache_len:5151 prompt_cache_ratio:0.46657608695652175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 -DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.11191177368164062 s -INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11389040946960449 s -DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=115968914764257379043226683810409339471, time:1750767916.143424s req_ids:[8] -DEBUG 06-24 20:25:16 [manager.py:391] -ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:216.22943878173828ms total_cost_time:216.28069877624512ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:11041 prompt_cache_len:5151 prompt_cache_ratio:0.46653382845756725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 -DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.1082305908203125 s -INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11023640632629395 s -DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=6786558978228475158633591213856061927, time:1750767916.3636193s req_ids:[8] -DEBUG 06-24 20:25:16 [manager.py:391] -ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:203.9804458618164ms total_cost_time:204.0245532989502ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11042 prompt_cache_len:5151 prompt_cache_ratio:0.46649157761275134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 -DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.10851168632507324 s -INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11040592193603516 s -DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=189398100750898496546498054606022776907, time:1750767916.5765371s req_ids:[8] -DEBUG 06-24 20:25:16 [manager.py:391] -ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:199.55134391784668ms total_cost_time:199.5999813079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:11043 prompt_cache_len:5151 prompt_cache_ratio:0.4664493344199946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 -DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.10899019241333008 s -INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11087822914123535 s -DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=110350007444522521049195243340796643427, time:1750767916.7829885s req_ids:[8] -DEBUG 06-24 20:25:16 [manager.py:391] -ERROR 06-24 20:25:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:202.9271125793457ms total_cost_time:202.9581069946289ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:11044 prompt_cache_len:5151 prompt_cache_ratio:0.4664070988772184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 -DEBUG 06-24 20:25:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:16 [manager.py:224] router recive req id 8 cost time 0.10925054550170898 s -INFO 06-24 20:25:16 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s -DEBUG 06-24 20:25:16 [manager.py:391] Prefill Batch: batch_id=141790355407290772649623370791432942647, time:1750767916.9954743s req_ids:[8] -DEBUG 06-24 20:25:16 [manager.py:391] -INFO 06-24 20:25:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:16 lightllm_req_id:8 first_token_cost:208.8296413421631ms total_cost_time:208.86898040771484ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:11045 prompt_cache_len:5151 prompt_cache_ratio:0.46636487098234497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 -DEBUG 06-24 20:25:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:17 [manager.py:224] router recive req id 8 cost time 0.31320858001708984 s -INFO 06-24 20:25:17 [manager.py:68] detokenization recv req id 8 cost time 0.3151977062225342 s -DEBUG 06-24 20:25:17 [manager.py:391] Prefill Batch: batch_id=147970453522678204358955391066890193122, time:1750767917.426935s req_ids:[8] -DEBUG 06-24 20:25:17 [manager.py:391] -ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:427.7620315551758ms total_cost_time:427.7822971343994ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11046 prompt_cache_len:5151 prompt_cache_ratio:0.4663226507332971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 -DEBUG 06-24 20:25:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:17 [manager.py:224] router recive req id 8 cost time 0.10931539535522461 s -INFO 06-24 20:25:17 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s -DEBUG 06-24 20:25:17 [manager.py:391] Prefill Batch: batch_id=50351564120266722813972483734561850647, time:1750767917.6527383s req_ids:[8] -DEBUG 06-24 20:25:17 [manager.py:391] -ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:209.96642112731934ms total_cost_time:210.01172065734863ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11047 prompt_cache_len:5151 prompt_cache_ratio:0.4662804381279986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 -DEBUG 06-24 20:25:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:17 [manager.py:224] router recive req id 8 cost time 0.10875391960144043 s -INFO 06-24 20:25:17 [manager.py:68] detokenization recv req id 8 cost time 0.11075425148010254 s -DEBUG 06-24 20:25:17 [manager.py:391] Prefill Batch: batch_id=218812451139121876736373793069470096762, time:1750767917.8658092s req_ids:[8] -DEBUG 06-24 20:25:17 [manager.py:391] -ERROR 06-24 20:25:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:214.48612213134766ms total_cost_time:214.52903747558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11048 prompt_cache_len:5151 prompt_cache_ratio:0.4662382331643736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 -DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10839343070983887 s -INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.11015462875366211 s -DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=231073611885592840595329162406489065542, time:1750767918.1087198s req_ids:[8] -DEBUG 06-24 20:25:18 [manager.py:391] -ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:17 lightllm_req_id:8 first_token_cost:219.681978225708ms total_cost_time:219.7260856628418ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11049 prompt_cache_len:5151 prompt_cache_ratio:0.4661960358403475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 -DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10791540145874023 s -INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.10980510711669922 s -DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=109633651086520927716503125412899517118, time:1750767918.3140824s req_ids:[8] -DEBUG 06-24 20:25:18 [manager.py:391] -ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:197.79539108276367ms total_cost_time:197.83806800842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11050 prompt_cache_len:5151 prompt_cache_ratio:0.46615384615384614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 -DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10983920097351074 s -INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.11186075210571289 s -DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=247344740288634562219542175442343136680, time:1750767918.5208998s req_ids:[8] -DEBUG 06-24 20:25:18 [manager.py:391] -ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:196.6390609741211ms total_cost_time:196.6836452484131ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11051 prompt_cache_len:5151 prompt_cache_ratio:0.46611166410279614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 -DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.10842418670654297 s -INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.11030912399291992 s -DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=157408501703582063740007155728725523775, time:1750767918.7263312s req_ids:[8] -DEBUG 06-24 20:25:18 [manager.py:391] -ERROR 06-24 20:25:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:198.87018203735352ms total_cost_time:198.9133358001709ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11052 prompt_cache_len:5151 prompt_cache_ratio:0.46606948968512485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 -DEBUG 06-24 20:25:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:18 [manager.py:224] router recive req id 8 cost time 0.1090688705444336 s -INFO 06-24 20:25:18 [manager.py:68] detokenization recv req id 8 cost time 0.1109781265258789 s -DEBUG 06-24 20:25:18 [manager.py:391] Prefill Batch: batch_id=105829950909169031213261361866851910650, time:1750767918.9309134s req_ids:[8] -DEBUG 06-24 20:25:18 [manager.py:391] -ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:18 lightllm_req_id:8 first_token_cost:211.0307216644287ms total_cost_time:211.0753059387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11053 prompt_cache_len:5151 prompt_cache_ratio:0.46602732289876053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 -DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:19 [manager.py:224] router recive req id 8 cost time 0.310549259185791 s -INFO 06-24 20:25:19 [manager.py:68] detokenization recv req id 8 cost time 0.3125417232513428 s -DEBUG 06-24 20:25:19 [manager.py:391] Prefill Batch: batch_id=195421280554318518100734172537429406999, time:1750767919.3553078s req_ids:[8] -DEBUG 06-24 20:25:19 [manager.py:391] -ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:417.9105758666992ms total_cost_time:417.9539680480957ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11054 prompt_cache_len:5151 prompt_cache_ratio:0.465985163741632 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 -DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:19 [manager.py:224] router recive req id 8 cost time 0.10904431343078613 s -INFO 06-24 20:25:19 [manager.py:68] detokenization recv req id 8 cost time 0.1109614372253418 s -DEBUG 06-24 20:25:19 [manager.py:391] Prefill Batch: batch_id=62027748841707886598867492590792261745, time:1750767919.5777316s req_ids:[8] -DEBUG 06-24 20:25:19 [manager.py:391] -ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:209.30242538452148ms total_cost_time:209.34677124023438ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11055 prompt_cache_len:5151 prompt_cache_ratio:0.46594301221166895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 -DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:19 [manager.py:224] router recive req id 8 cost time 0.10767960548400879 s -INFO 06-24 20:25:19 [manager.py:68] detokenization recv req id 8 cost time 0.10970854759216309 s -DEBUG 06-24 20:25:19 [manager.py:391] Prefill Batch: batch_id=78624740772480355262160082468327271760, time:1750767919.7950985s req_ids:[8] -DEBUG 06-24 20:25:19 [manager.py:391] -ERROR 06-24 20:25:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:205.5981159210205ms total_cost_time:205.65223693847656ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:11056 prompt_cache_len:5151 prompt_cache_ratio:0.4659008683068017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 -DEBUG 06-24 20:25:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:19 [batch.py:51] router release req id 8 -INFO 06-24 20:25:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.11041069030761719 s -INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.11235713958740234 s -DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=240187883049473306771525049756512655464, time:1750767920.0088003s req_ids:[8] -DEBUG 06-24 20:25:20 [manager.py:391] -ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:19 lightllm_req_id:8 first_token_cost:207.67903327941895ms total_cost_time:207.72147178649902ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11057 prompt_cache_len:5151 prompt_cache_ratio:0.4658587320249616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 -DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.1089324951171875 s -INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.11089801788330078 s -DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=57641011239707614968513979134791314606, time:1750767920.2235348s req_ids:[8] -DEBUG 06-24 20:25:20 [manager.py:391] -ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:204.20193672180176ms total_cost_time:204.2231559753418ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:11058 prompt_cache_len:5151 prompt_cache_ratio:0.4658166033640803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 -DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.10805296897888184 s -INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.10930562019348145 s -DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=185355541823603023135636544889825030700, time:1750767920.4424906s req_ids:[8] -DEBUG 06-24 20:25:20 [manager.py:391] -ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:217.46587753295898ms total_cost_time:217.50998497009277ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11059 prompt_cache_len:5151 prompt_cache_ratio:0.4657744823220906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 -DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.10768413543701172 s -INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s -DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=160945961856777328717147829912851632710, time:1750767920.6624548s req_ids:[8] -DEBUG 06-24 20:25:20 [manager.py:391] -ERROR 06-24 20:25:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:207.21840858459473ms total_cost_time:207.2601318359375ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11060 prompt_cache_len:5151 prompt_cache_ratio:0.4657323688969259 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 -DEBUG 06-24 20:25:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:20 [manager.py:224] router recive req id 8 cost time 0.10838031768798828 s -INFO 06-24 20:25:20 [manager.py:68] detokenization recv req id 8 cost time 0.11020278930664062 s -DEBUG 06-24 20:25:20 [manager.py:391] Prefill Batch: batch_id=118473117702927182272277196891368324162, time:1750767920.8764567s req_ids:[8] -DEBUG 06-24 20:25:20 [manager.py:391] -ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:20 lightllm_req_id:8 first_token_cost:368.82901191711426ms total_cost_time:368.87216567993164ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11061 prompt_cache_len:5151 prompt_cache_ratio:0.46569026308652023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 -DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.10848546028137207 s -INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11053586006164551 s -DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=214513630186461970401251132675502210801, time:1750767921.2508235s req_ids:[8] -DEBUG 06-24 20:25:21 [manager.py:391] -ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:205.4421901702881ms total_cost_time:205.46746253967285ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11062 prompt_cache_len:5151 prompt_cache_ratio:0.46564816488880856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 -DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s -INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11089920997619629 s -DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=43784840294194501859005622296924316960, time:1750767921.4655201s req_ids:[8] -DEBUG 06-24 20:25:21 [manager.py:391] -ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:209.43593978881836ms total_cost_time:209.47813987731934ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11063 prompt_cache_len:5151 prompt_cache_ratio:0.46560607430172646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 -DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.1085207462310791 s -INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s -DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=24126909912162452126179353321195390044, time:1750767921.6831574s req_ids:[8] -DEBUG 06-24 20:25:21 [manager.py:391] -ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:210.4172706604004ms total_cost_time:210.4625701904297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11064 prompt_cache_len:5151 prompt_cache_ratio:0.4655639913232104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 -DEBUG 06-24 20:25:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:21 [manager.py:224] router recive req id 8 cost time 0.10938763618469238 s -INFO 06-24 20:25:21 [manager.py:68] detokenization recv req id 8 cost time 0.11146354675292969 s -DEBUG 06-24 20:25:21 [manager.py:391] Prefill Batch: batch_id=308812589413045109722906406993545244190, time:1750767921.8963435s req_ids:[8] -DEBUG 06-24 20:25:21 [manager.py:391] -ERROR 06-24 20:25:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:208.54926109313965ms total_cost_time:208.58478546142578ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:11065 prompt_cache_len:5151 prompt_cache_ratio:0.46552191595119746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 -DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.10896515846252441 s -INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089706420898438 s -DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=283255848241193956429740388411759440391, time:1750767922.1134622s req_ids:[8] -DEBUG 06-24 20:25:22 [manager.py:391] -DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:22 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:21 lightllm_req_id:8 first_token_cost:211.73977851867676ms total_cost_time:211.78197860717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11066 prompt_cache_len:5151 prompt_cache_ratio:0.46547984818362553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 -DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.10912871360778809 s -INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.11118650436401367 s -DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=270325970034754073609856668479527343402, time:1750767922.3298197s req_ids:[8] -DEBUG 06-24 20:25:22 [manager.py:391] -ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:204.91981506347656ms total_cost_time:204.94508743286133ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11067 prompt_cache_len:5151 prompt_cache_ratio:0.46543778801843316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 -DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.20946812629699707 s -INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.2110605239868164 s -DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=133469832019886332970065160001084835426, time:1750767922.675348s req_ids:[8] -DEBUG 06-24 20:25:22 [manager.py:391] -ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:328.2022476196289ms total_cost_time:328.2475471496582ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11068 prompt_cache_len:5151 prompt_cache_ratio:0.46539573545355983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 -DEBUG 06-24 20:25:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:22 [manager.py:224] router recive req id 8 cost time 0.10953760147094727 s -INFO 06-24 20:25:22 [manager.py:68] detokenization recv req id 8 cost time 0.11147904396057129 s -DEBUG 06-24 20:25:22 [manager.py:391] Prefill Batch: batch_id=108235777082007959142269489808914289363, time:1750767922.880083s req_ids:[8] -DEBUG 06-24 20:25:22 [manager.py:391] -ERROR 06-24 20:25:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:214.19286727905273ms total_cost_time:214.22815322875977ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:11069 prompt_cache_len:5151 prompt_cache_ratio:0.4653536904869455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 -DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10860633850097656 s -INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11095690727233887 s -DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=190533959141344948774104734191331908886, time:1750767923.0962203s req_ids:[8] -DEBUG 06-24 20:25:23 [manager.py:391] -ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:22 lightllm_req_id:8 first_token_cost:210.64472198486328ms total_cost_time:210.68978309631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11070 prompt_cache_len:5151 prompt_cache_ratio:0.46531165311653117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 -INFO 06-24 20:25:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10838079452514648 s -INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11039376258850098 s -DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=62324914675680166012592345846195423792, time:1750767923.311654s req_ids:[8] -DEBUG 06-24 20:25:23 [manager.py:391] -DEBUG 06-24 20:25:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 46241.738 tokens/s -DEBUG 06-24 20:25:23 [stats.py:37] Avg prompt tokens throughput: 46233.370 tokens/s -DEBUG 06-24 20:25:23 [stats.py:37] Avg generate tokens throughput: 8.368 tokens/s -ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:206.21275901794434ms total_cost_time:206.25543594360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11071 prompt_cache_len:5151 prompt_cache_ratio:0.46526962334025834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 -DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:23 [batch.py:51] router release req id 8 -INFO 06-24 20:25:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10853147506713867 s -INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11052131652832031 s -DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=49218787953997543610336323362450078156, time:1750767923.5254507s req_ids:[8] -DEBUG 06-24 20:25:23 [manager.py:391] -ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:208.24027061462402ms total_cost_time:208.2836627960205ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11072 prompt_cache_len:5151 prompt_cache_ratio:0.46522760115606937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 -DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.11001729965209961 s -INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11201858520507812 s -DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=120041814334311103601898999032968909665, time:1750767923.7401342s req_ids:[8] -DEBUG 06-24 20:25:23 [manager.py:391] -ERROR 06-24 20:25:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:208.86874198913574ms total_cost_time:208.91404151916504ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11073 prompt_cache_len:5151 prompt_cache_ratio:0.46518558656190734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 -DEBUG 06-24 20:25:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:23 [manager.py:224] router recive req id 8 cost time 0.10870122909545898 s -INFO 06-24 20:25:23 [manager.py:68] detokenization recv req id 8 cost time 0.11070108413696289 s -DEBUG 06-24 20:25:23 [manager.py:391] Prefill Batch: batch_id=279846507558977257047905484755083750700, time:1750767923.9555442s req_ids:[8] -DEBUG 06-24 20:25:23 [manager.py:391] -ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:23 lightllm_req_id:8 first_token_cost:393.51606369018555ms total_cost_time:393.55993270874023ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11074 prompt_cache_len:5151 prompt_cache_ratio:0.4651435795557161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 -DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s -INFO 06-24 20:25:24 [manager.py:68] detokenization recv req id 8 cost time 0.10991239547729492 s -DEBUG 06-24 20:25:24 [manager.py:391] Prefill Batch: batch_id=119782361379696284897016117227795364534, time:1750767924.354608s req_ids:[8] -DEBUG 06-24 20:25:24 [manager.py:391] -ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:209.7337245941162ms total_cost_time:209.7773551940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11075 prompt_cache_len:5151 prompt_cache_ratio:0.4651015801354402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 -DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.11052966117858887 s -INFO 06-24 20:25:24 [manager.py:68] detokenization recv req id 8 cost time 0.11258244514465332 s -DEBUG 06-24 20:25:24 [manager.py:391] Prefill Batch: batch_id=145639881033555246450459613603699706198, time:1750767924.571384s req_ids:[8] -DEBUG 06-24 20:25:24 [manager.py:391] -ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:209.0606689453125ms total_cost_time:209.0909481048584ms,out_token_counter:1 mean_per_token_cost_time: 0.030279159545898438ms prompt_token_num:11076 prompt_cache_len:5151 prompt_cache_ratio:0.46505958829902494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 -DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.10882115364074707 s -INFO 06-24 20:25:24 [manager.py:68] detokenization recv req id 8 cost time 0.11095571517944336 s -DEBUG 06-24 20:25:24 [manager.py:391] Prefill Batch: batch_id=258926410392211647189162317411903107225, time:1750767924.7879057s req_ids:[8] -DEBUG 06-24 20:25:24 [manager.py:391] -ERROR 06-24 20:25:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:211.4708423614502ms total_cost_time:211.51375770568848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11077 prompt_cache_len:5151 prompt_cache_ratio:0.46501760404441633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 -DEBUG 06-24 20:25:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:24 [manager.py:224] router recive req id 8 cost time 0.10876178741455078 s -INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11064958572387695 s -DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=125958852354001450414928985445268669239, time:1750767925.0042796s req_ids:[8] -DEBUG 06-24 20:25:25 [manager.py:391] -ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:24 lightllm_req_id:8 first_token_cost:203.50313186645508ms total_cost_time:203.54723930358887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11078 prompt_cache_len:5151 prompt_cache_ratio:0.4649756273695613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 -DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.10855913162231445 s -INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11045360565185547 s -DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=269678914457357181411670260256868690077, time:1750767925.212702s req_ids:[8] -DEBUG 06-24 20:25:25 [manager.py:391] -ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:206.59422874450684ms total_cost_time:206.63809776306152ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11079 prompt_cache_len:5151 prompt_cache_ratio:0.46493365827240724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 -DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.1090846061706543 s -INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11115384101867676 s -DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=197382431154624714820614900134569532808, time:1750767925.4279888s req_ids:[8] -DEBUG 06-24 20:25:25 [manager.py:391] -ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:209.54656600952148ms total_cost_time:209.5932960510254ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11080 prompt_cache_len:5151 prompt_cache_ratio:0.46489169675090253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 -DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.109130859375 s -INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11117196083068848 s -DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=219254516025300040524167287147177090162, time:1750767925.6429906s req_ids:[8] -DEBUG 06-24 20:25:25 [manager.py:391] -ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:208.50229263305664ms total_cost_time:208.54640007019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11081 prompt_cache_len:5151 prompt_cache_ratio:0.4648497428029961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 -DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:25 [manager.py:224] router recive req id 8 cost time 0.10936880111694336 s -INFO 06-24 20:25:25 [manager.py:68] detokenization recv req id 8 cost time 0.11142396926879883 s -DEBUG 06-24 20:25:25 [manager.py:391] Prefill Batch: batch_id=309250596194093480125393263703027001964, time:1750767925.858672s req_ids:[8] -DEBUG 06-24 20:25:25 [manager.py:391] -ERROR 06-24 20:25:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:210.2832794189453ms total_cost_time:210.3266716003418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11082 prompt_cache_len:5151 prompt_cache_ratio:0.4648077964266378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 -DEBUG 06-24 20:25:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s -INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.11096906661987305 s -DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=207157506587033543506168475684329136912, time:1750767926.0745623s req_ids:[8] -DEBUG 06-24 20:25:26 [manager.py:391] -ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:25 lightllm_req_id:8 first_token_cost:366.06860160827637ms total_cost_time:366.11294746398926ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11083 prompt_cache_len:5151 prompt_cache_ratio:0.464765857619778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 -DEBUG 06-24 20:25:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10832548141479492 s -INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.11030173301696777 s -DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=237840922273794409516084193594128282556, time:1750767926.4445338s req_ids:[8] -DEBUG 06-24 20:25:26 [manager.py:391] -ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:205.78289031982422ms total_cost_time:205.8253288269043ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11084 prompt_cache_len:5151 prompt_cache_ratio:0.4647239263803681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 -DEBUG 06-24 20:25:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10832929611206055 s -INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.11033987998962402 s -DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=187977930147848187257274476633530567724, time:1750767926.6597478s req_ids:[8] -DEBUG 06-24 20:25:26 [manager.py:391] -DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:210.62374114990234ms total_cost_time:210.66761016845703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11085 prompt_cache_len:5151 prompt_cache_ratio:0.46468200270635995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 -DEBUG 06-24 20:25:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:26 [manager.py:224] router recive req id 8 cost time 0.10859179496765137 s -INFO 06-24 20:25:26 [manager.py:68] detokenization recv req id 8 cost time 0.1105659008026123 s -DEBUG 06-24 20:25:26 [manager.py:391] Prefill Batch: batch_id=167017124625254406992575970536084199568, time:1750767926.8764267s req_ids:[8] -DEBUG 06-24 20:25:26 [manager.py:391] -ERROR 06-24 20:25:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:210.62707901000977ms total_cost_time:210.67070960998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11086 prompt_cache_len:5151 prompt_cache_ratio:0.46464008659570627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 -DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.10912537574768066 s -INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s -DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=303839045978607697831287277491128577907, time:1750767927.097608s req_ids:[8] -DEBUG 06-24 20:25:27 [manager.py:391] -ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:26 lightllm_req_id:8 first_token_cost:213.23919296264648ms total_cost_time:213.28401565551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11087 prompt_cache_len:5151 prompt_cache_ratio:0.4645981780463606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 -DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.1087641716003418 s -INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.11069107055664062 s -DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=290093537135643762700485837043146974348, time:1750767927.3118103s req_ids:[8] -DEBUG 06-24 20:25:27 [manager.py:391] -ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 first_token_cost:208.27054977416992ms total_cost_time:208.30583572387695ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:11088 prompt_cache_len:5151 prompt_cache_ratio:0.46455627705627706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 -DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.10898351669311523 s -INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.11097955703735352 s -DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=75745730969974428267303103263423555694, time:1750767927.5279648s req_ids:[8] -DEBUG 06-24 20:25:27 [manager.py:391] -ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 first_token_cost:210.7090950012207ms total_cost_time:210.7548713684082ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11089 prompt_cache_len:5151 prompt_cache_ratio:0.4645143836234106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 -DEBUG 06-24 20:25:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:27 [manager.py:224] router recive req id 8 cost time 0.10877704620361328 s -INFO 06-24 20:25:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106715202331543 s -DEBUG 06-24 20:25:27 [manager.py:391] Prefill Batch: batch_id=245168853283421910559697992856874886657, time:1750767927.7417164s req_ids:[8] -DEBUG 06-24 20:25:27 [manager.py:391] -ERROR 06-24 20:25:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:27 lightllm_req_id:8 first_token_cost:373.0051517486572ms total_cost_time:373.0490207672119ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11090 prompt_cache_len:5151 prompt_cache_ratio:0.46447249774571686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 -DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10890889167785645 s -INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11089372634887695 s -DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=221447922227528537556424716756044270055, time:1750767928.122231s req_ids:[8] -DEBUG 06-24 20:25:28 [manager.py:391] -ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:208.57000350952148ms total_cost_time:208.61148834228516ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11091 prompt_cache_len:5151 prompt_cache_ratio:0.46443061942115227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 -DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10917544364929199 s -INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11123156547546387 s -DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=245191141696883479329481177620378910005, time:1750767928.3382533s req_ids:[8] -DEBUG 06-24 20:25:28 [manager.py:391] -ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:208.3873748779297ms total_cost_time:208.43100547790527ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11092 prompt_cache_len:5151 prompt_cache_ratio:0.464388748647674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 -DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10801100730895996 s -INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.10988473892211914 s -DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=172588350429751307805867297552769050388, time:1750767928.5529335s req_ids:[8] -DEBUG 06-24 20:25:28 [manager.py:391] -ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:209.20991897583008ms total_cost_time:209.25545692443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11093 prompt_cache_len:5151 prompt_cache_ratio:0.4643468854232399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 -DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.1093606948852539 s -INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11128926277160645 s -DEBUG 06-24 20:25:28 [manager.py:391] Prefill Batch: batch_id=317517874249556394587978533129933962262, time:1750767928.7827284s req_ids:[8] -DEBUG 06-24 20:25:28 [manager.py:391] -ERROR 06-24 20:25:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:226.0568141937256ms total_cost_time:226.10116004943848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11094 prompt_cache_len:5151 prompt_cache_ratio:0.46430502974580856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 -DEBUG 06-24 20:25:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:28 [manager.py:224] router recive req id 8 cost time 0.10859394073486328 s -INFO 06-24 20:25:28 [manager.py:68] detokenization recv req id 8 cost time 0.11044883728027344 s -DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=318818292330889325153635039285397752544, time:1750767929.000657s req_ids:[8] -DEBUG 06-24 20:25:29 [manager.py:391] -ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:28 lightllm_req_id:8 first_token_cost:210.21580696105957ms total_cost_time:210.26062965393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11095 prompt_cache_len:5151 prompt_cache_ratio:0.46426318161333935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 -DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:29 [manager.py:224] router recive req id 8 cost time 0.10895967483520508 s -INFO 06-24 20:25:29 [manager.py:68] detokenization recv req id 8 cost time 0.11095523834228516 s -DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=38161358032602167707836339268522799583, time:1750767929.2165122s req_ids:[8] -DEBUG 06-24 20:25:29 [manager.py:391] -ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:209.42950248718262ms total_cost_time:209.4743251800537ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11096 prompt_cache_len:5151 prompt_cache_ratio:0.46422134102379237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 -DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:29 [manager.py:224] router recive req id 8 cost time 0.10853338241577148 s -INFO 06-24 20:25:29 [manager.py:68] detokenization recv req id 8 cost time 0.11043906211853027 s -DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=124416931320129021169732395325475745920, time:1750767929.4325705s req_ids:[8] -DEBUG 06-24 20:25:29 [manager.py:391] -ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:372.85685539245605ms total_cost_time:372.90072441101074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11097 prompt_cache_len:5151 prompt_cache_ratio:0.4641795079751284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 -DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:29 [manager.py:224] router recive req id 8 cost time 0.10817313194274902 s -INFO 06-24 20:25:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005187034606934 s -DEBUG 06-24 20:25:29 [manager.py:391] Prefill Batch: batch_id=51560055976150133814045539790501777640, time:1750767929.8101168s req_ids:[8] -DEBUG 06-24 20:25:29 [manager.py:391] -ERROR 06-24 20:25:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:207.19218254089355ms total_cost_time:207.23438262939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11098 prompt_cache_len:5151 prompt_cache_ratio:0.46413768246530906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 -DEBUG 06-24 20:25:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10866975784301758 s -INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11061716079711914 s -DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=116171309960766227373065629902343167505, time:1750767930.0237982s req_ids:[8] -DEBUG 06-24 20:25:30 [manager.py:391] -ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:29 lightllm_req_id:8 first_token_cost:206.99357986450195ms total_cost_time:207.03721046447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11099 prompt_cache_len:5151 prompt_cache_ratio:0.4640958644922966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 -DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10843992233276367 s -INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11040043830871582 s -DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=182196979992663665022198272781392172937, time:1750767930.2389524s req_ids:[8] -DEBUG 06-24 20:25:30 [manager.py:391] -ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:207.71193504333496ms total_cost_time:207.75437355041504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11100 prompt_cache_len:5151 prompt_cache_ratio:0.46405405405405403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 -DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10913443565368652 s -INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110076904296875 s -DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=233354501475540860522152146710280193868, time:1750767930.4527864s req_ids:[8] -DEBUG 06-24 20:25:30 [manager.py:391] -ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:199.16987419128418ms total_cost_time:199.21302795410156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11101 prompt_cache_len:5151 prompt_cache_ratio:0.46401225114854516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 -DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10828685760498047 s -INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11017847061157227 s -DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=179281723532368675573574127619412070963, time:1750767930.6649694s req_ids:[8] -DEBUG 06-24 20:25:30 [manager.py:391] -ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:216.55583381652832ms total_cost_time:216.599702835083ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11102 prompt_cache_len:5151 prompt_cache_ratio:0.46397045577373447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 -DEBUG 06-24 20:25:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:30 [manager.py:224] router recive req id 8 cost time 0.10846257209777832 s -INFO 06-24 20:25:30 [manager.py:68] detokenization recv req id 8 cost time 0.11097407341003418 s -DEBUG 06-24 20:25:30 [manager.py:391] Prefill Batch: batch_id=87235901923087418351773157862292902954, time:1750767930.8799567s req_ids:[8] -DEBUG 06-24 20:25:30 [manager.py:391] -ERROR 06-24 20:25:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:209.70749855041504ms total_cost_time:209.7492218017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11103 prompt_cache_len:5151 prompt_cache_ratio:0.46392866792758714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 -DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.20918488502502441 s -INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.21095752716064453 s -DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=226373184080802225085790709708248195854, time:1750767931.2281322s req_ids:[8] -DEBUG 06-24 20:25:31 [manager.py:391] -DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:30 lightllm_req_id:8 first_token_cost:327.3022174835205ms total_cost_time:327.359676361084ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:11104 prompt_cache_len:5151 prompt_cache_ratio:0.4638868876080692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 -DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.11044073104858398 s -INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.11236310005187988 s -DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=15633322016495255087389502373661203996, time:1750767931.4371145s req_ids:[8] -DEBUG 06-24 20:25:31 [manager.py:391] -ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:210.68120002746582ms total_cost_time:210.723876953125ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11105 prompt_cache_len:5151 prompt_cache_ratio:0.4638451148131472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 -DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s -INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997796058654785 s -DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=295886789954009101897328696003224553245, time:1750767931.6477757s req_ids:[8] -DEBUG 06-24 20:25:31 [manager.py:391] -ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:204.65731620788574ms total_cost_time:204.69951629638672ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11106 prompt_cache_len:5151 prompt_cache_ratio:0.4638033495407888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 -DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:31 [manager.py:224] router recive req id 8 cost time 0.10925722122192383 s -INFO 06-24 20:25:31 [manager.py:68] detokenization recv req id 8 cost time 0.11120438575744629 s -DEBUG 06-24 20:25:31 [manager.py:391] Prefill Batch: batch_id=107915076213899794467901080151949180408, time:1750767931.8589225s req_ids:[8] -DEBUG 06-24 20:25:31 [manager.py:391] -ERROR 06-24 20:25:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:209.2154026031494ms total_cost_time:209.2571258544922ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11107 prompt_cache_len:5151 prompt_cache_ratio:0.4637615917889619 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 -DEBUG 06-24 20:25:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.10756874084472656 s -INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.10936951637268066 s -DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=305710140614759888826700020649970614928, time:1750767932.0727541s req_ids:[8] -DEBUG 06-24 20:25:32 [manager.py:391] -ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:31 lightllm_req_id:8 first_token_cost:166.85914993286133ms total_cost_time:166.9018268585205ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11108 prompt_cache_len:5151 prompt_cache_ratio:0.46371984155563556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 -DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.10837244987487793 s -INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.11022830009460449 s -DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=212736682107379962593283161525997070800, time:1750767932.2486763s req_ids:[8] -DEBUG 06-24 20:25:32 [manager.py:391] -ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:168.196439743042ms total_cost_time:168.23697090148926ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:11109 prompt_cache_len:5151 prompt_cache_ratio:0.46367809883877936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 -DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.10849571228027344 s -INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.11035013198852539 s -DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=195347392297703852837044612140960351594, time:1750767932.4185035s req_ids:[8] -DEBUG 06-24 20:25:32 [manager.py:391] -ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:195.25671005249023ms total_cost_time:195.2989101409912ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11110 prompt_cache_len:5151 prompt_cache_ratio:0.4636363636363636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 -DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.1103060245513916 s -INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.11270642280578613 s -DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=84596657245073357369268745342761533559, time:1750767932.6204383s req_ids:[8] -DEBUG 06-24 20:25:32 [manager.py:391] -ERROR 06-24 20:25:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:203.81522178649902ms total_cost_time:203.8578987121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11111 prompt_cache_len:5151 prompt_cache_ratio:0.46359463594635947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 -DEBUG 06-24 20:25:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:32 [manager.py:224] router recive req id 8 cost time 0.20949673652648926 s -INFO 06-24 20:25:32 [manager.py:68] detokenization recv req id 8 cost time 0.2112898826599121 s -DEBUG 06-24 20:25:32 [manager.py:391] Prefill Batch: batch_id=128503035966156720912968903011065700237, time:1750767932.9635081s req_ids:[8] -DEBUG 06-24 20:25:32 [manager.py:391] -ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:32 lightllm_req_id:8 first_token_cost:324.446439743042ms total_cost_time:324.4905471801758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11112 prompt_cache_len:5151 prompt_cache_ratio:0.4635529157667387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 -DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10955357551574707 s -INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.11199784278869629 s -DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=145404330724008354895274998548086240101, time:1750767933.1598861s req_ids:[8] -DEBUG 06-24 20:25:33 [manager.py:391] -ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:206.15458488464355ms total_cost_time:206.20012283325195ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11113 prompt_cache_len:5151 prompt_cache_ratio:0.4635112030954738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 -DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10813426971435547 s -INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.1100316047668457 s -DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=9144302920476584488922873767895384919, time:1750767933.3737514s req_ids:[8] -DEBUG 06-24 20:25:33 [manager.py:391] -DEBUG 06-24 20:25:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 47413.887 tokens/s -DEBUG 06-24 20:25:33 [stats.py:37] Avg prompt tokens throughput: 47405.340 tokens/s -DEBUG 06-24 20:25:33 [stats.py:37] Avg generate tokens throughput: 8.547 tokens/s -ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:208.00018310546875ms total_cost_time:208.04500579833984ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11114 prompt_cache_len:5151 prompt_cache_ratio:0.46346949793053804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 -DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10966324806213379 s -INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.1116340160369873 s -DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=95782170374879522558022526129305773215, time:1750767933.5885692s req_ids:[8] -DEBUG 06-24 20:25:33 [manager.py:391] -ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:206.02774620056152ms total_cost_time:206.07280731201172ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11115 prompt_cache_len:5151 prompt_cache_ratio:0.46342780026990554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 -DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:33 [manager.py:224] router recive req id 8 cost time 0.10850310325622559 s -INFO 06-24 20:25:33 [manager.py:68] detokenization recv req id 8 cost time 0.11043500900268555 s -DEBUG 06-24 20:25:33 [manager.py:391] Prefill Batch: batch_id=43193749227152974251874695833952128618, time:1750767933.801056s req_ids:[8] -DEBUG 06-24 20:25:33 [manager.py:391] -ERROR 06-24 20:25:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:203.42469215393066ms total_cost_time:203.46760749816895ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11116 prompt_cache_len:5151 prompt_cache_ratio:0.4633861101115509 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 -DEBUG 06-24 20:25:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10806870460510254 s -INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.10992574691772461 s -DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=115772606167034794302507912061604900434, time:1750767934.0105765s req_ids:[8] -DEBUG 06-24 20:25:34 [manager.py:391] -ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:33 lightllm_req_id:8 first_token_cost:207.65924453735352ms total_cost_time:207.70716667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:11117 prompt_cache_len:5151 prompt_cache_ratio:0.46334442745344967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 -DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10789012908935547 s -INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.10988759994506836 s -DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=248177335806546126270314759246855419441, time:1750767934.2265623s req_ids:[8] -DEBUG 06-24 20:25:34 [manager.py:391] -ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:210.77990531921387ms total_cost_time:210.82448959350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11118 prompt_cache_len:5151 prompt_cache_ratio:0.463302752293578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 -DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10895705223083496 s -INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.11095118522644043 s -DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=252095948888419129872148335923691411078, time:1750767934.4432082s req_ids:[8] -DEBUG 06-24 20:25:34 [manager.py:391] -ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:367.7494525909424ms total_cost_time:367.7937984466553ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11119 prompt_cache_len:5151 prompt_cache_ratio:0.4632610846299128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 -DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:34 [manager.py:224] router recive req id 8 cost time 0.10952091217041016 s -INFO 06-24 20:25:34 [manager.py:68] detokenization recv req id 8 cost time 0.11141562461853027 s -DEBUG 06-24 20:25:34 [manager.py:391] Prefill Batch: batch_id=32435762298879189182419070200553278517, time:1750767934.8157332s req_ids:[8] -DEBUG 06-24 20:25:34 [manager.py:391] -ERROR 06-24 20:25:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:202.52084732055664ms total_cost_time:202.56447792053223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11120 prompt_cache_len:5151 prompt_cache_ratio:0.46321942446043163 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 -DEBUG 06-24 20:25:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10845088958740234 s -INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.11047840118408203 s -DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=127026865755350757135458866933897548759, time:1750767935.0268588s req_ids:[8] -DEBUG 06-24 20:25:35 [manager.py:391] -ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:34 lightllm_req_id:8 first_token_cost:206.0871124267578ms total_cost_time:206.129789352417ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11121 prompt_cache_len:5151 prompt_cache_ratio:0.46317777178311303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 -DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10774755477905273 s -INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.10977792739868164 s -DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=145121599873230649645292789453545336048, time:1750767935.238132s req_ids:[8] -DEBUG 06-24 20:25:35 [manager.py:391] -ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:211.17615699768066ms total_cost_time:211.21954917907715ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11122 prompt_cache_len:5151 prompt_cache_ratio:0.463136126595936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 -DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10825848579406738 s -INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.11020827293395996 s -DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=130288643086958652924169537159688768963, time:1750767935.458125s req_ids:[8] -DEBUG 06-24 20:25:35 [manager.py:391] -DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:221.4512825012207ms total_cost_time:221.4961051940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11123 prompt_cache_len:5151 prompt_cache_ratio:0.4630944888968803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 -DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10793161392211914 s -INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.1099998950958252 s -DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=31164107278039846269554156588571841989, time:1750767935.6903238s req_ids:[8] -DEBUG 06-24 20:25:35 [manager.py:391] -ERROR 06-24 20:25:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:218.73784065246582ms total_cost_time:218.7809944152832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11124 prompt_cache_len:5151 prompt_cache_ratio:0.46305285868392665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 -DEBUG 06-24 20:25:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:35 [manager.py:224] router recive req id 8 cost time 0.10925626754760742 s -INFO 06-24 20:25:35 [manager.py:68] detokenization recv req id 8 cost time 0.11129117012023926 s -DEBUG 06-24 20:25:35 [manager.py:391] Prefill Batch: batch_id=12673321060624182540699073370241257001, time:1750767935.9079998s req_ids:[8] -DEBUG 06-24 20:25:35 [manager.py:391] -ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:35 lightllm_req_id:8 first_token_cost:209.73801612854004ms total_cost_time:209.78236198425293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11125 prompt_cache_len:5151 prompt_cache_ratio:0.46301123595505617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 -DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.31128692626953125 s -INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.3132343292236328 s -DEBUG 06-24 20:25:36 [manager.py:391] Prefill Batch: batch_id=308635690007744316379971799515130099524, time:1750767936.3317842s req_ids:[8] -DEBUG 06-24 20:25:36 [manager.py:391] -ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:422.9094982147217ms total_cost_time:422.95360565185547ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11126 prompt_cache_len:5151 prompt_cache_ratio:0.46296962070825093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 -DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.1078193187713623 s -INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.10979127883911133 s -DEBUG 06-24 20:25:36 [manager.py:391] Prefill Batch: batch_id=120979393721561685099125683157420093553, time:1750767936.5567918s req_ids:[8] -DEBUG 06-24 20:25:36 [manager.py:391] -ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:207.17358589172363ms total_cost_time:207.2160243988037ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11127 prompt_cache_len:5151 prompt_cache_ratio:0.46292801294149366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 -DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.10902285575866699 s -INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.11092972755432129 s -DEBUG 06-24 20:25:36 [manager.py:391] Prefill Batch: batch_id=221382809260110510572216169854992769527, time:1750767936.7786467s req_ids:[8] -DEBUG 06-24 20:25:36 [manager.py:391] -ERROR 06-24 20:25:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:223.93465042114258ms total_cost_time:223.97971153259277ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11128 prompt_cache_len:5151 prompt_cache_ratio:0.4628864126527678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 -DEBUG 06-24 20:25:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:36 [manager.py:224] router recive req id 8 cost time 0.10931897163391113 s -INFO 06-24 20:25:36 [manager.py:68] detokenization recv req id 8 cost time 0.11124277114868164 s -DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=86994068965633756317730874767842180970, time:1750767937.000422s req_ids:[8] -DEBUG 06-24 20:25:37 [manager.py:391] -DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:36 lightllm_req_id:8 first_token_cost:207.48567581176758ms total_cost_time:207.53026008605957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11129 prompt_cache_len:5151 prompt_cache_ratio:0.4628448198400575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 -DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:37 [manager.py:224] router recive req id 8 cost time 0.10770726203918457 s -INFO 06-24 20:25:37 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s -DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=196797045874225654180459128228331497825, time:1750767937.2104974s req_ids:[8] -DEBUG 06-24 20:25:37 [manager.py:391] -ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:202.3320198059082ms total_cost_time:202.3768424987793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11130 prompt_cache_len:5151 prompt_cache_ratio:0.4628032345013477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 -DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:37 [manager.py:224] router recive req id 8 cost time 0.10769844055175781 s -INFO 06-24 20:25:37 [manager.py:68] detokenization recv req id 8 cost time 0.10954999923706055 s -DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=339690355215540402093425954345440820495, time:1750767937.4222012s req_ids:[8] -DEBUG 06-24 20:25:37 [manager.py:391] -ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:210.9365463256836ms total_cost_time:210.9813690185547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11131 prompt_cache_len:5151 prompt_cache_ratio:0.46276165663462404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 -DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:37 [batch.py:51] router release req id 8 -INFO 06-24 20:25:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:37 [manager.py:224] router recive req id 8 cost time 0.10801529884338379 s -INFO 06-24 20:25:37 [manager.py:68] detokenization recv req id 8 cost time 0.10974574089050293 s -DEBUG 06-24 20:25:37 [manager.py:391] Prefill Batch: batch_id=17244442465584114171153167631917277710, time:1750767937.6389496s req_ids:[8] -DEBUG 06-24 20:25:37 [manager.py:391] -ERROR 06-24 20:25:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:379.67681884765625ms total_cost_time:379.72092628479004ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11132 prompt_cache_len:5151 prompt_cache_ratio:0.4627200862378728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 -DEBUG 06-24 20:25:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10887956619262695 s -INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11067485809326172 s -DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=282248845161600412671559526758315249379, time:1750767938.0227494s req_ids:[8] -DEBUG 06-24 20:25:38 [manager.py:391] -ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:37 lightllm_req_id:8 first_token_cost:204.6351432800293ms total_cost_time:204.67901229858398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11133 prompt_cache_len:5151 prompt_cache_ratio:0.4626785233090811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 -DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10800862312316895 s -INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.10982298851013184 s -DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=271121871529044472867643759891134182395, time:1750767938.2393641s req_ids:[8] -DEBUG 06-24 20:25:38 [manager.py:391] -ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:216.8567180633545ms total_cost_time:216.9020175933838ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11134 prompt_cache_len:5151 prompt_cache_ratio:0.46263696784623676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 -DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10843729972839355 s -INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11018848419189453 s -DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=124942762680647127012528418742735616080, time:1750767938.4636104s req_ids:[8] -DEBUG 06-24 20:25:38 [manager.py:391] -ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:219.4969654083252ms total_cost_time:219.54083442687988ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11135 prompt_cache_len:5151 prompt_cache_ratio:0.46259541984732827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 -DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10860395431518555 s -INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11023378372192383 s -DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=121423580737532697336283195293369227221, time:1750767938.6822505s req_ids:[8] -DEBUG 06-24 20:25:38 [manager.py:391] -ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:211.6711139678955ms total_cost_time:211.7166519165039ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11136 prompt_cache_len:5151 prompt_cache_ratio:0.4625538793103448 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 -DEBUG 06-24 20:25:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:38 [manager.py:224] router recive req id 8 cost time 0.10858154296875 s -INFO 06-24 20:25:38 [manager.py:68] detokenization recv req id 8 cost time 0.11037635803222656 s -DEBUG 06-24 20:25:38 [manager.py:391] Prefill Batch: batch_id=3125788643256561957500492272091304599, time:1750767938.9003363s req_ids:[8] -DEBUG 06-24 20:25:38 [manager.py:391] -ERROR 06-24 20:25:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:210.91604232788086ms total_cost_time:210.95991134643555ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11137 prompt_cache_len:5151 prompt_cache_ratio:0.4625123462332765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 -DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10831594467163086 s -INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.11016631126403809 s -DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=61874171904735315196740611368707132493, time:1750767939.1180692s req_ids:[8] -DEBUG 06-24 20:25:39 [manager.py:391] -ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:38 lightllm_req_id:8 first_token_cost:209.34247970581055ms total_cost_time:209.38491821289062ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11138 prompt_cache_len:5151 prompt_cache_ratio:0.4624708206141138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 -DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10866570472717285 s -INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.1104438304901123 s -DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=186881242741176309753745312091076405172, time:1750767939.3299415s req_ids:[8] -DEBUG 06-24 20:25:39 [manager.py:391] -ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 first_token_cost:205.02614974975586ms total_cost_time:205.06954193115234ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11139 prompt_cache_len:5151 prompt_cache_ratio:0.46242930245084835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 -DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10898280143737793 s -INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.11075067520141602 s -DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=262671635216697498895206479420789481210, time:1750767939.5406687s req_ids:[8] -DEBUG 06-24 20:25:39 [manager.py:391] -ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 first_token_cost:200.9906768798828ms total_cost_time:201.0347843170166ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11140 prompt_cache_len:5151 prompt_cache_ratio:0.46238779174147215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 -DEBUG 06-24 20:25:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:39 [manager.py:224] router recive req id 8 cost time 0.10828852653503418 s -INFO 06-24 20:25:39 [manager.py:68] detokenization recv req id 8 cost time 0.10997629165649414 s -DEBUG 06-24 20:25:39 [manager.py:391] Prefill Batch: batch_id=188044664937380550806604728511659837786, time:1750767939.7468073s req_ids:[8] -DEBUG 06-24 20:25:39 [manager.py:391] -ERROR 06-24 20:25:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:39 lightllm_req_id:8 first_token_cost:362.0104789733887ms total_cost_time:362.05577850341797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11141 prompt_cache_len:5151 prompt_cache_ratio:0.4623462884839781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 -DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10895848274230957 s -INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11063981056213379 s -DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=337952715152166917315863431125231126135, time:1750767940.1153479s req_ids:[8] -DEBUG 06-24 20:25:40 [manager.py:391] -ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:205.6131362915039ms total_cost_time:205.65509796142578ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11142 prompt_cache_len:5151 prompt_cache_ratio:0.4623047926763597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 -DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10972762107849121 s -INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11156535148620605 s -DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=259900849962681352694072210396452252238, time:1750767940.3273528s req_ids:[8] -DEBUG 06-24 20:25:40 [manager.py:391] -ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:206.73680305480957ms total_cost_time:206.77947998046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11143 prompt_cache_len:5151 prompt_cache_ratio:0.46226330431661133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 -DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10932683944702148 s -INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11102080345153809 s -DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=25046574423748518068777328530932896507, time:1750767940.540604s req_ids:[8] -DEBUG 06-24 20:25:40 [manager.py:391] -ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:206.0708999633789ms total_cost_time:206.11333847045898ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11144 prompt_cache_len:5151 prompt_cache_ratio:0.46222182340272794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 -DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.1078801155090332 s -INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.10959053039550781 s -DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=160821965760916896419444989153616375382, time:1750767940.75258s req_ids:[8] -DEBUG 06-24 20:25:40 [manager.py:391] -ERROR 06-24 20:25:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:202.77714729309082ms total_cost_time:202.83055305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:11145 prompt_cache_len:5151 prompt_cache_ratio:0.46218034993270524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 -DEBUG 06-24 20:25:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:40 [manager.py:224] router recive req id 8 cost time 0.10945606231689453 s -INFO 06-24 20:25:40 [manager.py:68] detokenization recv req id 8 cost time 0.11123228073120117 s -DEBUG 06-24 20:25:40 [manager.py:391] Prefill Batch: batch_id=259626571560270840883194891904077338875, time:1750767940.9622366s req_ids:[8] -DEBUG 06-24 20:25:40 [manager.py:391] -ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:40 lightllm_req_id:8 first_token_cost:215.3785228729248ms total_cost_time:215.42119979858398ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11146 prompt_cache_len:5151 prompt_cache_ratio:0.4621388839045397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 -DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:41 [batch.py:51] router release req id 8 -INFO 06-24 20:25:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10793566703796387 s -INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.10964155197143555 s -DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=96998974849376860085037391574407460002, time:1750767941.183218s req_ids:[8] -DEBUG 06-24 20:25:41 [manager.py:391] -ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:206.24303817749023ms total_cost_time:206.28762245178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11147 prompt_cache_len:5151 prompt_cache_ratio:0.4620974253162286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 -DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s -INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.11078333854675293 s -DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=3653920264216194599469784658078446045, time:1750767941.397143s req_ids:[8] -DEBUG 06-24 20:25:41 [manager.py:391] -ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:368.4954643249512ms total_cost_time:368.54004859924316ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11148 prompt_cache_len:5151 prompt_cache_ratio:0.46205597416576966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 -DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10850882530212402 s -INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.11030316352844238 s -DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=36642264767058044978014565974041359019, time:1750767941.7707489s req_ids:[8] -DEBUG 06-24 20:25:41 [manager.py:391] -ERROR 06-24 20:25:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:204.90264892578125ms total_cost_time:204.94604110717773ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11149 prompt_cache_len:5151 prompt_cache_ratio:0.46201453045116153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 -DEBUG 06-24 20:25:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:41 [manager.py:224] router recive req id 8 cost time 0.10806059837341309 s -INFO 06-24 20:25:41 [manager.py:68] detokenization recv req id 8 cost time 0.10985803604125977 s -DEBUG 06-24 20:25:41 [manager.py:391] Prefill Batch: batch_id=118839201808375947487591079649151722695, time:1750767941.9840114s req_ids:[8] -DEBUG 06-24 20:25:41 [manager.py:391] -ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:41 lightllm_req_id:8 first_token_cost:181.2129020690918ms total_cost_time:181.2584400177002ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11150 prompt_cache_len:5151 prompt_cache_ratio:0.4619730941704036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 -DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10907149314880371 s -INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s -DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=107828279208627329649730299709555950553, time:1750767942.1694126s req_ids:[8] -DEBUG 06-24 20:25:42 [manager.py:391] -ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:201.4937400817871ms total_cost_time:201.53498649597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:11151 prompt_cache_len:5151 prompt_cache_ratio:0.4619316653214958 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 -DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10853290557861328 s -INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.11046028137207031 s -DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=54046315724757524841583394225002929322, time:1750767942.3775918s req_ids:[8] -DEBUG 06-24 20:25:42 [manager.py:391] -ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:207.2126865386963ms total_cost_time:207.25560188293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11152 prompt_cache_len:5151 prompt_cache_ratio:0.46189024390243905 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 -DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s -INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.11025691032409668 s -DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=189812344605512561412568087932388314541, time:1750767942.5932176s req_ids:[8] -DEBUG 06-24 20:25:42 [manager.py:391] -ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:203.19390296936035ms total_cost_time:203.23657989501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11153 prompt_cache_len:5151 prompt_cache_ratio:0.46184882991123466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 -DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:42 [manager.py:224] router recive req id 8 cost time 0.10958409309387207 s -INFO 06-24 20:25:42 [manager.py:68] detokenization recv req id 8 cost time 0.11149787902832031 s -DEBUG 06-24 20:25:42 [manager.py:391] Prefill Batch: batch_id=172551300430105657011214463668115915341, time:1750767942.801311s req_ids:[8] -DEBUG 06-24 20:25:42 [manager.py:391] -DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:207.10301399230957ms total_cost_time:207.14759826660156ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11154 prompt_cache_len:5151 prompt_cache_ratio:0.46180742334588487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 -DEBUG 06-24 20:25:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10775566101074219 s -INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.10972476005554199 s -DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=139648324707433889983516894084049022510, time:1750767943.015711s req_ids:[8] -DEBUG 06-24 20:25:43 [manager.py:391] -ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:42 lightllm_req_id:8 first_token_cost:377.8820037841797ms total_cost_time:377.9277801513672ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11155 prompt_cache_len:5151 prompt_cache_ratio:0.46176602420439267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 -DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10924839973449707 s -INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.1111762523651123 s -DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=5203874965181619320558996852881165799, time:1750767943.3993924s req_ids:[8] -DEBUG 06-24 20:25:43 [manager.py:391] -DEBUG 06-24 20:25:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 46658.702 tokens/s -DEBUG 06-24 20:25:43 [stats.py:37] Avg prompt tokens throughput: 46650.324 tokens/s -DEBUG 06-24 20:25:43 [stats.py:37] Avg generate tokens throughput: 8.379 tokens/s -ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:208.48512649536133ms total_cost_time:208.5280418395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11156 prompt_cache_len:5151 prompt_cache_ratio:0.4617246324847616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 -DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10820555686950684 s -INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.11027288436889648 s -DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=49961118885089864852284929686999496263, time:1750767943.6125195s req_ids:[8] -DEBUG 06-24 20:25:43 [manager.py:391] -ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:207.21817016601562ms total_cost_time:207.2598934173584ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11157 prompt_cache_len:5151 prompt_cache_ratio:0.46168324818499595 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 -DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:43 [manager.py:224] router recive req id 8 cost time 0.10920524597167969 s -INFO 06-24 20:25:43 [manager.py:68] detokenization recv req id 8 cost time 0.11132979393005371 s -DEBUG 06-24 20:25:43 [manager.py:391] Prefill Batch: batch_id=329996763042401809050833955923634684589, time:1750767943.8286061s req_ids:[8] -DEBUG 06-24 20:25:43 [manager.py:391] -ERROR 06-24 20:25:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:208.87184143066406ms total_cost_time:208.91571044921875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11158 prompt_cache_len:5151 prompt_cache_ratio:0.4616418713031009 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 -DEBUG 06-24 20:25:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s -INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.1098475456237793 s -DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=244804736377187328062874283071988572325, time:1750767944.0455852s req_ids:[8] -DEBUG 06-24 20:25:44 [manager.py:391] -ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:43 lightllm_req_id:8 first_token_cost:210.89863777160645ms total_cost_time:210.94250679016113ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11159 prompt_cache_len:5151 prompt_cache_ratio:0.4616005018370822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 -DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.10902547836303711 s -INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.11074209213256836 s -DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=330148272395400578762552907403444886990, time:1750767944.270929s req_ids:[8] -DEBUG 06-24 20:25:44 [manager.py:391] -ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:221.82679176330566ms total_cost_time:221.86923027038574ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11160 prompt_cache_len:5151 prompt_cache_ratio:0.4615591397849462 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 -DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:44 [batch.py:51] router release req id 8 -DEBUG 06-24 20:25:44 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:44 [manager.py:283] -DEBUG 06-24 20:25:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:44 [manager.py:284] -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.10596561431884766 s -INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.10811305046081543 s -DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=57409527709465018589801429172703123517, time:1750767944.490232s req_ids:[8] -DEBUG 06-24 20:25:44 [manager.py:391] -ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:212.0990753173828ms total_cost_time:212.1427059173584ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11161 prompt_cache_len:5151 prompt_cache_ratio:0.4615177851447003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 -DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:44 [manager.py:224] router recive req id 8 cost time 0.20920705795288086 s -INFO 06-24 20:25:44 [manager.py:68] detokenization recv req id 8 cost time 0.21117901802062988 s -DEBUG 06-24 20:25:44 [manager.py:391] Prefill Batch: batch_id=217664819202473743652153413774704696039, time:1750767944.840448s req_ids:[8] -DEBUG 06-24 20:25:44 [manager.py:391] -ERROR 06-24 20:25:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:323.2393264770508ms total_cost_time:323.2836723327637ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11162 prompt_cache_len:5151 prompt_cache_ratio:0.4614764379143523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 -DEBUG 06-24 20:25:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10878229141235352 s -INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.11085176467895508 s -DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=46445724330574002254492969347188547297, time:1750767945.0343122s req_ids:[8] -DEBUG 06-24 20:25:45 [manager.py:391] -ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:44 lightllm_req_id:8 first_token_cost:210.28566360473633ms total_cost_time:210.32953262329102ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11163 prompt_cache_len:5151 prompt_cache_ratio:0.4614350980919108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 -DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10823440551757812 s -INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.11016654968261719 s -DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=149074844862896461652644093698713273875, time:1750767945.2513912s req_ids:[8] -DEBUG 06-24 20:25:45 [manager.py:391] -ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:207.85021781921387ms total_cost_time:207.89337158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11164 prompt_cache_len:5151 prompt_cache_ratio:0.46139376567538515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 -DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10852193832397461 s -INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.11024594306945801 s -DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=119763768197880086668122498636452411216, time:1750767945.4649055s req_ids:[8] -DEBUG 06-24 20:25:45 [manager.py:391] -ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:218.07098388671875ms total_cost_time:218.11485290527344ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11165 prompt_cache_len:5151 prompt_cache_ratio:0.46135244066278547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 -DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s -INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.1102900505065918 s -DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=84079207668812155783665912506435679124, time:1750767945.6950543s req_ids:[8] -DEBUG 06-24 20:25:45 [manager.py:391] -ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:213.6697769165039ms total_cost_time:213.71173858642578ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11166 prompt_cache_len:5151 prompt_cache_ratio:0.4613111230521225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 -DEBUG 06-24 20:25:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:45 [manager.py:224] router recive req id 8 cost time 0.10805106163024902 s -INFO 06-24 20:25:45 [manager.py:68] detokenization recv req id 8 cost time 0.10978078842163086 s -DEBUG 06-24 20:25:45 [manager.py:391] Prefill Batch: batch_id=165883030364847782361472892921901271213, time:1750767945.910225s req_ids:[8] -DEBUG 06-24 20:25:45 [manager.py:391] -ERROR 06-24 20:25:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:169.32439804077148ms total_cost_time:169.36659812927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11167 prompt_cache_len:5151 prompt_cache_ratio:0.4612698128414077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 -DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.10834860801696777 s -INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.1100151538848877 s -DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=54132622284027162537311498691708173450, time:1750767946.0890448s req_ids:[8] -DEBUG 06-24 20:25:46 [manager.py:391] -ERROR 06-24 20:25:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:45 lightllm_req_id:8 first_token_cost:199.88489151000977ms total_cost_time:199.93138313293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11168 prompt_cache_len:5151 prompt_cache_ratio:0.4612285100286533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 -DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.1088714599609375 s -INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.11089110374450684 s -DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=24537302393121698266634140219236330365, time:1750767946.29093s req_ids:[8] -DEBUG 06-24 20:25:46 [manager.py:391] -ERROR 06-24 20:25:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 first_token_cost:201.5364170074463ms total_cost_time:201.57909393310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11169 prompt_cache_len:5151 prompt_cache_ratio:0.4611872146118721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 -DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.3106367588043213 s -INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.3124217987060547 s -DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=123982638266107499824677076915041570758, time:1750767946.7068677s req_ids:[8] -DEBUG 06-24 20:25:46 [manager.py:391] -ERROR 06-24 20:25:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 first_token_cost:422.3780632019043ms total_cost_time:422.4209785461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11170 prompt_cache_len:5151 prompt_cache_ratio:0.4611459265890779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 -DEBUG 06-24 20:25:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:46 [manager.py:224] router recive req id 8 cost time 0.10792160034179688 s -INFO 06-24 20:25:46 [manager.py:68] detokenization recv req id 8 cost time 0.10965633392333984 s -DEBUG 06-24 20:25:46 [manager.py:391] Prefill Batch: batch_id=106452595989471815785371611743839577572, time:1750767946.9303849s req_ids:[8] -DEBUG 06-24 20:25:46 [manager.py:391] -INFO 06-24 20:25:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:46 lightllm_req_id:8 first_token_cost:209.23089981079102ms total_cost_time:209.275484085083ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11171 prompt_cache_len:5151 prompt_cache_ratio:0.4611046459582848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 -DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.1084897518157959 s -INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049652099609375 s -DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=114685928309222512310488314422587807485, time:1750767947.1451664s req_ids:[8] -DEBUG 06-24 20:25:47 [manager.py:391] -ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:209.01155471801758ms total_cost_time:209.05637741088867ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11172 prompt_cache_len:5151 prompt_cache_ratio:0.4610633727175081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 -DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.10873126983642578 s -INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049079895019531 s -DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=72316770941450827702610844038710298986, time:1750767947.36655s req_ids:[8] -DEBUG 06-24 20:25:47 [manager.py:391] -ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:214.21480178833008ms total_cost_time:214.25747871398926ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11173 prompt_cache_len:5151 prompt_cache_ratio:0.46102210686476325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 -DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.10784077644348145 s -INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.10956072807312012 s -DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=87817464921592979368196152741620303663, time:1750767947.581398s req_ids:[8] -DEBUG 06-24 20:25:47 [manager.py:391] -ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:205.74522018432617ms total_cost_time:205.78789710998535ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11174 prompt_cache_len:5151 prompt_cache_ratio:0.46098084839806697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 -DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:47 [manager.py:224] router recive req id 8 cost time 0.10854935646057129 s -INFO 06-24 20:25:47 [manager.py:68] detokenization recv req id 8 cost time 0.11048412322998047 s -DEBUG 06-24 20:25:47 [manager.py:391] Prefill Batch: batch_id=148082750295088411465971644469628405429, time:1750767947.8032556s req_ids:[8] -DEBUG 06-24 20:25:47 [manager.py:391] -ERROR 06-24 20:25:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:220.78561782836914ms total_cost_time:220.82948684692383ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11175 prompt_cache_len:5151 prompt_cache_ratio:0.46093959731543627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 -DEBUG 06-24 20:25:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10920262336730957 s -INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.11111927032470703 s -DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=128340550515642918757719751375923611887, time:1750767948.021261s req_ids:[8] -DEBUG 06-24 20:25:48 [manager.py:391] -ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:47 lightllm_req_id:8 first_token_cost:209.98811721801758ms total_cost_time:210.03031730651855ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11176 prompt_cache_len:5151 prompt_cache_ratio:0.46089835361488907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 -DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10781693458557129 s -INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.10961723327636719 s -DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=193775786252250670810907511983021223711, time:1750767948.235575s req_ids:[8] -DEBUG 06-24 20:25:48 [manager.py:391] -ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:365.0550842285156ms total_cost_time:365.100622177124ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11177 prompt_cache_len:5151 prompt_cache_ratio:0.46085711729444395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 -DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10844087600708008 s -INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.11048269271850586 s -DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=319276296615278593772124852534154969861, time:1750767948.6057298s req_ids:[8] -DEBUG 06-24 20:25:48 [manager.py:391] -ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:198.70710372924805ms total_cost_time:198.75144958496094ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11178 prompt_cache_len:5151 prompt_cache_ratio:0.46081588835212023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 -DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:48 [manager.py:224] router recive req id 8 cost time 0.10766768455505371 s -INFO 06-24 20:25:48 [manager.py:68] detokenization recv req id 8 cost time 0.10952353477478027 s -DEBUG 06-24 20:25:48 [manager.py:391] Prefill Batch: batch_id=147860356139436930635487677766282570104, time:1750767948.814506s req_ids:[8] -DEBUG 06-24 20:25:48 [manager.py:391] -DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:201.08962059020996ms total_cost_time:201.13468170166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11179 prompt_cache_len:5151 prompt_cache_ratio:0.4607746667859379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 -DEBUG 06-24 20:25:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10838532447814941 s -INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11033797264099121 s -DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=189293136119145321284639818961578370738, time:1750767949.019157s req_ids:[8] -DEBUG 06-24 20:25:49 [manager.py:391] -ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:48 lightllm_req_id:8 first_token_cost:206.23135566711426ms total_cost_time:206.27641677856445ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11180 prompt_cache_len:5151 prompt_cache_ratio:0.46073345259391774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 -DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10854458808898926 s -INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11053681373596191 s -DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=160956794957745848934778320432889249197, time:1750767949.2341528s req_ids:[8] -DEBUG 06-24 20:25:49 [manager.py:391] -ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:206.5746784210205ms total_cost_time:206.6178321838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11181 prompt_cache_len:5151 prompt_cache_ratio:0.460692245774081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 -DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10897278785705566 s -INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11072397232055664 s -DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=227258375166824062241581769556234726236, time:1750767949.4453437s req_ids:[8] -DEBUG 06-24 20:25:49 [manager.py:391] -ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:207.41772651672363ms total_cost_time:207.4596881866455ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11182 prompt_cache_len:5151 prompt_cache_ratio:0.46065104632445003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 -DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:49 [manager.py:224] router recive req id 8 cost time 0.10860466957092285 s -INFO 06-24 20:25:49 [manager.py:68] detokenization recv req id 8 cost time 0.11041808128356934 s -DEBUG 06-24 20:25:49 [manager.py:391] Prefill Batch: batch_id=311479246608194945273891753388793926207, time:1750767949.6600506s req_ids:[8] -DEBUG 06-24 20:25:49 [manager.py:391] -ERROR 06-24 20:25:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:208.97388458251953ms total_cost_time:209.01799201965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11183 prompt_cache_len:5151 prompt_cache_ratio:0.46060985424304746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 -DEBUG 06-24 20:25:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.3114607334136963 s -INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.31347131729125977 s -DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=280762290074625127239324814370163256611, time:1750767950.075504s req_ids:[8] -DEBUG 06-24 20:25:50 [manager.py:391] -ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:49 lightllm_req_id:8 first_token_cost:416.165828704834ms total_cost_time:416.2101745605469ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11184 prompt_cache_len:5151 prompt_cache_ratio:0.460568669527897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 -DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.10894536972045898 s -INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.11070537567138672 s -DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=188519534426275655655265502203566423407, time:1750767950.298984s req_ids:[8] -DEBUG 06-24 20:25:50 [manager.py:391] -DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:212.0816707611084ms total_cost_time:212.1264934539795ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11185 prompt_cache_len:5151 prompt_cache_ratio:0.4605274921770228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 -DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.11035990715026855 s -INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.1122446060180664 s -DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=64999397793335607774646753802047469436, time:1750767950.5160794s req_ids:[8] -DEBUG 06-24 20:25:50 [manager.py:391] -ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:210.99400520324707ms total_cost_time:211.03954315185547ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11186 prompt_cache_len:5151 prompt_cache_ratio:0.46048632218844987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 -DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.10893917083740234 s -INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.11069536209106445 s -DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=49968410958006549512276453064409451426, time:1750767950.7346478s req_ids:[8] -DEBUG 06-24 20:25:50 [manager.py:391] -ERROR 06-24 20:25:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:208.99343490600586ms total_cost_time:209.03968811035156ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11187 prompt_cache_len:5151 prompt_cache_ratio:0.4604451595602038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 -DEBUG 06-24 20:25:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:50 [manager.py:224] router recive req id 8 cost time 0.10945820808410645 s -INFO 06-24 20:25:50 [manager.py:68] detokenization recv req id 8 cost time 0.11144113540649414 s -DEBUG 06-24 20:25:50 [manager.py:391] Prefill Batch: batch_id=265119435943804343477315773219832739768, time:1750767950.9482555s req_ids:[8] -DEBUG 06-24 20:25:50 [manager.py:391] -ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:50 lightllm_req_id:8 first_token_cost:209.77091789245605ms total_cost_time:209.81478691101074ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11188 prompt_cache_len:5151 prompt_cache_ratio:0.46040400429031103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 -DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.10801911354064941 s -INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.10978555679321289 s -DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=74034475414137444883916761557159536649, time:1750767951.1714928s req_ids:[8] -DEBUG 06-24 20:25:51 [manager.py:391] -ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:218.77312660217285ms total_cost_time:218.82987022399902ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:11189 prompt_cache_len:5151 prompt_cache_ratio:0.46036285637679863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 -DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.11174511909484863 s -INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.11390519142150879 s -DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=144891372487941409104192668260801218380, time:1750767951.38967s req_ids:[8] -DEBUG 06-24 20:25:51 [manager.py:391] -ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:372.99084663391113ms total_cost_time:373.035192489624ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11190 prompt_cache_len:5151 prompt_cache_ratio:0.4603217158176944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 -DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.10808205604553223 s -INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.10991144180297852 s -DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=338841170270227160347942460682715518200, time:1750767951.7677221s req_ids:[8] -DEBUG 06-24 20:25:51 [manager.py:391] -ERROR 06-24 20:25:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:211.62080764770508ms total_cost_time:211.66563034057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11191 prompt_cache_len:5151 prompt_cache_ratio:0.4602805826110267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 -DEBUG 06-24 20:25:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:51 [manager.py:224] router recive req id 8 cost time 0.10843706130981445 s -INFO 06-24 20:25:51 [manager.py:68] detokenization recv req id 8 cost time 0.11047792434692383 s -DEBUG 06-24 20:25:51 [manager.py:391] Prefill Batch: batch_id=149352152380683973233077147035485192505, time:1750767951.9864156s req_ids:[8] -DEBUG 06-24 20:25:51 [manager.py:391] -ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:51 lightllm_req_id:8 first_token_cost:207.29517936706543ms total_cost_time:207.3378562927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11192 prompt_cache_len:5151 prompt_cache_ratio:0.46023945675482486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 -DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s -INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11047196388244629 s -DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=73488640495548751367252325088386536898, time:1750767952.2127275s req_ids:[8] -DEBUG 06-24 20:25:52 [manager.py:391] -ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:221.29225730895996ms total_cost_time:221.34947776794434ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:11193 prompt_cache_len:5151 prompt_cache_ratio:0.46019833824711875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 -DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10980939865112305 s -INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11184406280517578 s -DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=130893527658173060171342307907102459381, time:1750767952.4365778s req_ids:[8] -DEBUG 06-24 20:25:52 [manager.py:391] -ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:215.70634841918945ms total_cost_time:215.74854850769043ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11194 prompt_cache_len:5151 prompt_cache_ratio:0.46015722708593887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 -DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10840082168579102 s -INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11023139953613281 s -DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=170637831079713259155186482168321421346, time:1750767952.6523867s req_ids:[8] -DEBUG 06-24 20:25:52 [manager.py:391] -ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:211.87734603881836ms total_cost_time:211.93528175354004ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:11195 prompt_cache_len:5151 prompt_cache_ratio:0.46011612326931667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 -DEBUG 06-24 20:25:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:52 [manager.py:224] router recive req id 8 cost time 0.10932016372680664 s -INFO 06-24 20:25:52 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s -DEBUG 06-24 20:25:52 [manager.py:391] Prefill Batch: batch_id=80690822138922442665920115762477770843, time:1750767952.8757017s req_ids:[8] -DEBUG 06-24 20:25:52 [manager.py:391] -ERROR 06-24 20:25:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:215.50393104553223ms total_cost_time:215.5461311340332ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11196 prompt_cache_len:5151 prompt_cache_ratio:0.460075026795284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 -DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10933327674865723 s -INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.11133050918579102 s -DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=85829892216819711307675662536282017392, time:1750767953.0908775s req_ids:[8] -DEBUG 06-24 20:25:53 [manager.py:391] -ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:52 lightllm_req_id:8 first_token_cost:205.80267906188965ms total_cost_time:205.85393905639648ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:11197 prompt_cache_len:5151 prompt_cache_ratio:0.4600339376618737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 -INFO 06-24 20:25:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:25:53 [statics_utils.py:24] mean first cost: 228.53138703912163 ms -INFO 06-24 20:25:53 [statics_utils.py:24] mean per token cost: 0.06495310405514507 ms -DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10874748229980469 s -INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.1105809211730957 s -DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=263731771409684962267467292344641673315, time:1750767953.306762s req_ids:[8] -DEBUG 06-24 20:25:53 [manager.py:391] -ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:212.3258113861084ms total_cost_time:212.3711109161377ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11198 prompt_cache_len:5151 prompt_cache_ratio:0.45999285586711913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 -DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10870122909545898 s -INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.11052417755126953 s -DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=133018165551858372697049718840142825231, time:1750767953.5218735s req_ids:[8] -DEBUG 06-24 20:25:53 [manager.py:391] -DEBUG 06-24 20:25:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 47492.335 tokens/s -DEBUG 06-24 20:25:53 [stats.py:37] Avg prompt tokens throughput: 47483.839 tokens/s -DEBUG 06-24 20:25:53 [stats.py:37] Avg generate tokens throughput: 8.496 tokens/s -ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:366.7020797729492ms total_cost_time:366.7478561401367ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11199 prompt_cache_len:5151 prompt_cache_ratio:0.4599517814090544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 -DEBUG 06-24 20:25:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:53 [manager.py:224] router recive req id 8 cost time 0.10956907272338867 s -INFO 06-24 20:25:53 [manager.py:68] detokenization recv req id 8 cost time 0.11136507987976074 s -DEBUG 06-24 20:25:53 [manager.py:391] Prefill Batch: batch_id=80880279672633625537068992083431662177, time:1750767953.895029s req_ids:[8] -DEBUG 06-24 20:25:53 [manager.py:391] -ERROR 06-24 20:25:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:206.09712600708008ms total_cost_time:206.14075660705566ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11200 prompt_cache_len:5151 prompt_cache_ratio:0.4599107142857143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 -DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10913801193237305 s -INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11109447479248047 s -DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=317843542865908022763827005595154390778, time:1750767954.1077976s req_ids:[8] -DEBUG 06-24 20:25:54 [manager.py:391] -ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:53 lightllm_req_id:8 first_token_cost:208.16850662231445ms total_cost_time:208.21285247802734ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11201 prompt_cache_len:5151 prompt_cache_ratio:0.45986965449513434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 -DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10917043685913086 s -INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11093807220458984 s -DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=286875971114440177236939576407735518126, time:1750767954.323387s req_ids:[8] -DEBUG 06-24 20:25:54 [manager.py:391] -ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:210.73579788208008ms total_cost_time:210.78038215637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11202 prompt_cache_len:5151 prompt_cache_ratio:0.4598286020353508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 -DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10886478424072266 s -INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s -DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=231109430398236148908368708013247645028, time:1750767954.5386887s req_ids:[8] -DEBUG 06-24 20:25:54 [manager.py:391] -ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:203.91607284545898ms total_cost_time:203.95755767822266ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11203 prompt_cache_len:5151 prompt_cache_ratio:0.4597875569044006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 -DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10821819305419922 s -INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11023163795471191 s -DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=77626396419253498207176511821291314283, time:1750767954.7493284s req_ids:[8] -DEBUG 06-24 20:25:54 [manager.py:391] -ERROR 06-24 20:25:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:208.12463760375977ms total_cost_time:208.16683769226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11204 prompt_cache_len:5151 prompt_cache_ratio:0.4597465191003213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 -DEBUG 06-24 20:25:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:54 [manager.py:224] router recive req id 8 cost time 0.10900020599365234 s -INFO 06-24 20:25:54 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s -DEBUG 06-24 20:25:54 [manager.py:391] Prefill Batch: batch_id=23150143155328957043200738156119095887, time:1750767954.9646504s req_ids:[8] -DEBUG 06-24 20:25:54 [manager.py:391] -ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:54 lightllm_req_id:8 first_token_cost:211.8968963623047ms total_cost_time:211.94195747375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11205 prompt_cache_len:5151 prompt_cache_ratio:0.4597054886211513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 -DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:55 [manager.py:224] router recive req id 8 cost time 0.10993266105651855 s -INFO 06-24 20:25:55 [manager.py:68] detokenization recv req id 8 cost time 0.11190581321716309 s -DEBUG 06-24 20:25:55 [manager.py:391] Prefill Batch: batch_id=55767900246292417211155152748313203398, time:1750767955.1884246s req_ids:[8] -DEBUG 06-24 20:25:55 [manager.py:391] -ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:392.2703266143799ms total_cost_time:392.3149108886719ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11206 prompt_cache_len:5151 prompt_cache_ratio:0.4596644654649295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 -DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:55 [manager.py:224] router recive req id 8 cost time 0.10836172103881836 s -INFO 06-24 20:25:55 [manager.py:68] detokenization recv req id 8 cost time 0.11023950576782227 s -DEBUG 06-24 20:25:55 [manager.py:391] Prefill Batch: batch_id=164527572803977870745295268566261406795, time:1750767955.5812283s req_ids:[8] -DEBUG 06-24 20:25:55 [manager.py:391] -ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:210.73412895202637ms total_cost_time:210.77799797058105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11207 prompt_cache_len:5151 prompt_cache_ratio:0.4596234496296957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 -DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:55 [manager.py:224] router recive req id 8 cost time 0.10875511169433594 s -INFO 06-24 20:25:55 [manager.py:68] detokenization recv req id 8 cost time 0.11043071746826172 s -DEBUG 06-24 20:25:55 [manager.py:391] Prefill Batch: batch_id=249332543064611217522547137842540590165, time:1750767955.81096s req_ids:[8] -DEBUG 06-24 20:25:55 [manager.py:391] -ERROR 06-24 20:25:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:225.1737117767334ms total_cost_time:225.21710395812988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11208 prompt_cache_len:5151 prompt_cache_ratio:0.45958244111349034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 -DEBUG 06-24 20:25:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10975861549377441 s -INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11146712303161621 s -DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=217816344161342361305824930591571254752, time:1750767956.02838s req_ids:[8] -DEBUG 06-24 20:25:56 [manager.py:391] -ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:55 lightllm_req_id:8 first_token_cost:207.55887031555176ms total_cost_time:207.60273933410645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11209 prompt_cache_len:5151 prompt_cache_ratio:0.45954143991435453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 -DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10924673080444336 s -INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11103963851928711 s -DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=123399736346916210884584408429838890547, time:1750767956.242102s req_ids:[8] -DEBUG 06-24 20:25:56 [manager.py:391] -DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:25:56 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:226.98593139648438ms total_cost_time:227.03099250793457ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11210 prompt_cache_len:5151 prompt_cache_ratio:0.4595004460303301 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 -DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s -INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.10961222648620605 s -DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=192619073638595655446233641955157924952, time:1750767956.4752457s req_ids:[8] -DEBUG 06-24 20:25:56 [manager.py:391] -ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:208.62746238708496ms total_cost_time:208.67061614990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11211 prompt_cache_len:5151 prompt_cache_ratio:0.4594594594594595 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 -DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10927915573120117 s -INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11108040809631348 s -DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=32951570599200842330775672443641526263, time:1750767956.6913674s req_ids:[8] -DEBUG 06-24 20:25:56 [manager.py:391] -ERROR 06-24 20:25:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:209.41853523254395ms total_cost_time:209.46049690246582ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11212 prompt_cache_len:5151 prompt_cache_ratio:0.45941848019978593 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 -DEBUG 06-24 20:25:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:56 [manager.py:224] router recive req id 8 cost time 0.10825395584106445 s -INFO 06-24 20:25:56 [manager.py:68] detokenization recv req id 8 cost time 0.11007857322692871 s -DEBUG 06-24 20:25:56 [manager.py:391] Prefill Batch: batch_id=166118293358450063680863634343748821579, time:1750767956.9082463s req_ids:[8] -DEBUG 06-24 20:25:56 [manager.py:391] -ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:56 lightllm_req_id:8 first_token_cost:364.4275665283203ms total_cost_time:364.4838333129883ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:11213 prompt_cache_len:5151 prompt_cache_ratio:0.4593775082493534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 -DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.11087226867675781 s -INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.11270666122436523 s -DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=293739073556052417241431033732295422498, time:1750767957.2756562s req_ids:[8] -DEBUG 06-24 20:25:57 [manager.py:391] -ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:210.9227180480957ms total_cost_time:210.9668254852295ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11214 prompt_cache_len:5151 prompt_cache_ratio:0.4593365436062065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 -DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.10880589485168457 s -INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.11055517196655273 s -DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=52909281713978269269218247155557999196, time:1750767957.4925122s req_ids:[8] -DEBUG 06-24 20:25:57 [manager.py:391] -ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:199.72801208496094ms total_cost_time:199.77116584777832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11215 prompt_cache_len:5151 prompt_cache_ratio:0.45929558626839057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 -DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.10809707641601562 s -INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.10980749130249023 s -DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=319350523074503099941646626693374884664, time:1750767957.69783s req_ids:[8] -DEBUG 06-24 20:25:57 [manager.py:391] -ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:202.26502418518066ms total_cost_time:202.30770111083984ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11216 prompt_cache_len:5151 prompt_cache_ratio:0.4592546362339515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 -DEBUG 06-24 20:25:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:57 [manager.py:224] router recive req id 8 cost time 0.10936689376831055 s -INFO 06-24 20:25:57 [manager.py:68] detokenization recv req id 8 cost time 0.11116886138916016 s -DEBUG 06-24 20:25:57 [manager.py:391] Prefill Batch: batch_id=22957089167045619743112453645183691208, time:1750767957.9077053s req_ids:[8] -DEBUG 06-24 20:25:57 [manager.py:391] -ERROR 06-24 20:25:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:57 lightllm_req_id:8 first_token_cost:206.3891887664795ms total_cost_time:206.43258094787598ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11217 prompt_cache_len:5151 prompt_cache_ratio:0.4592136935009361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 -DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.1083531379699707 s -INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.11003684997558594 s -DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=163067805529481262471084503223152821526, time:1750767958.1205783s req_ids:[8] -DEBUG 06-24 20:25:58 [manager.py:391] -ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:205.81698417663574ms total_cost_time:205.85989952087402ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11218 prompt_cache_len:5151 prompt_cache_ratio:0.4591727580673917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 -DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.10839676856994629 s -INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.11007428169250488 s -DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=39764052644217033536907195100089712839, time:1750767958.332651s req_ids:[8] -DEBUG 06-24 20:25:58 [manager.py:391] -ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:199.9807357788086ms total_cost_time:200.0260353088379ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11219 prompt_cache_len:5151 prompt_cache_ratio:0.4591318299313664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 -DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.20824027061462402 s -INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.2098243236541748 s -DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=299066448191850611521187492018277222152, time:1750767958.6729167s req_ids:[8] -DEBUG 06-24 20:25:58 [manager.py:391] -ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:315.0198459625244ms total_cost_time:315.0625228881836ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11220 prompt_cache_len:5151 prompt_cache_ratio:0.4590909090909091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 -DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:58 [manager.py:224] router recive req id 8 cost time 0.10952544212341309 s -INFO 06-24 20:25:58 [manager.py:68] detokenization recv req id 8 cost time 0.11131620407104492 s -DEBUG 06-24 20:25:58 [manager.py:391] Prefill Batch: batch_id=307380978469258506185536755668808576869, time:1750767958.8622751s req_ids:[8] -DEBUG 06-24 20:25:58 [manager.py:391] -ERROR 06-24 20:25:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:200.0105381011963ms total_cost_time:200.05416870117188ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11221 prompt_cache_len:5151 prompt_cache_ratio:0.45904999554406917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 -DEBUG 06-24 20:25:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.1086118221282959 s -INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.1105804443359375 s -DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=158084384804882526376986441716304266200, time:1750767959.0707095s req_ids:[8] -DEBUG 06-24 20:25:59 [manager.py:391] -ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:58 lightllm_req_id:8 first_token_cost:208.160400390625ms total_cost_time:208.2047462463379ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11222 prompt_cache_len:5151 prompt_cache_ratio:0.45900908928889683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 -DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:25:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.1090390682220459 s -INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.11094546318054199 s -DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=301473186249150482031430453549109251318, time:1750767959.2845762s req_ids:[8] -DEBUG 06-24 20:25:59 [manager.py:391] -ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:205.2004337310791ms total_cost_time:205.2445411682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11223 prompt_cache_len:5151 prompt_cache_ratio:0.45896819032344294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 -DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.10946536064147949 s -INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.11149406433105469 s -DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=28504294760429664993602488854901057735, time:1750767959.495689s req_ids:[8] -DEBUG 06-24 20:25:59 [manager.py:391] -ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:206.23183250427246ms total_cost_time:206.27427101135254ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11224 prompt_cache_len:5151 prompt_cache_ratio:0.4589272986457591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 -DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.10875320434570312 s -INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.11065816879272461 s -DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=183686639091116065320504674348004302075, time:1750767959.7092662s req_ids:[8] -DEBUG 06-24 20:25:59 [manager.py:391] -ERROR 06-24 20:25:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:25:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:25:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:204.16688919067383ms total_cost_time:204.2231559753418ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:11225 prompt_cache_len:5151 prompt_cache_ratio:0.45888641425389753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:25:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 -DEBUG 06-24 20:25:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:25:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:25:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:25:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:25:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:25:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:25:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:25:59 [manager.py:224] router recive req id 8 cost time 0.10824942588806152 s -INFO 06-24 20:25:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101081371307373 s -DEBUG 06-24 20:25:59 [manager.py:391] Prefill Batch: batch_id=17229421081879705955672328151934889576, time:1750767959.9185324s req_ids:[8] -DEBUG 06-24 20:25:59 [manager.py:391] -ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:25:59 lightllm_req_id:8 first_token_cost:204.20312881469727ms total_cost_time:204.24699783325195ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11226 prompt_cache_len:5151 prompt_cache_ratio:0.45884553714591125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 -DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.10955500602722168 s -INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.11152005195617676 s -DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=31659782244698662635339579640156553230, time:1750767960.1293104s req_ids:[8] -DEBUG 06-24 20:26:00 [manager.py:391] -ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:207.0779800415039ms total_cost_time:207.20577239990234ms,out_token_counter:1 mean_per_token_cost_time: 0.1277923583984375ms prompt_token_num:11227 prompt_cache_len:5151 prompt_cache_ratio:0.4588046673198539 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 -DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.31215572357177734 s -INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.3131403923034668 s -DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=244888835574884074259595726065343259247, time:1750767960.554022s req_ids:[8] -DEBUG 06-24 20:26:00 [manager.py:391] -ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:388.5037899017334ms total_cost_time:388.5462284088135ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11228 prompt_cache_len:5151 prompt_cache_ratio:0.4587638047737798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 -DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s -INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.11017918586730957 s -DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=262334479289199117656458803378175858252, time:1750767960.7410038s req_ids:[8] -DEBUG 06-24 20:26:00 [manager.py:391] -DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:205.430269241333ms total_cost_time:205.4729461669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11229 prompt_cache_len:5151 prompt_cache_ratio:0.45872294950574405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 -DEBUG 06-24 20:26:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:00 [manager.py:224] router recive req id 8 cost time 0.10819125175476074 s -INFO 06-24 20:26:00 [manager.py:68] detokenization recv req id 8 cost time 0.10941743850708008 s -DEBUG 06-24 20:26:00 [manager.py:391] Prefill Batch: batch_id=301890032422770017905165957991238444691, time:1750767960.9533818s req_ids:[8] -DEBUG 06-24 20:26:00 [manager.py:391] -ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:00 lightllm_req_id:8 first_token_cost:203.4003734588623ms total_cost_time:203.4444808959961ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11230 prompt_cache_len:5151 prompt_cache_ratio:0.45868210151380234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 -DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s -INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.10985302925109863 s -DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=305673189611192845774694390486587168902, time:1750767961.1626606s req_ids:[8] -DEBUG 06-24 20:26:01 [manager.py:391] -ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:196.35820388793945ms total_cost_time:196.40016555786133ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11231 prompt_cache_len:5151 prompt_cache_ratio:0.45864126079601103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 -DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10840606689453125 s -INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.1105949878692627 s -DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=174772808380784528423380686181676252198, time:1750767961.36501s req_ids:[8] -DEBUG 06-24 20:26:01 [manager.py:391] -ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:201.9329071044922ms total_cost_time:201.97439193725586ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11232 prompt_cache_len:5151 prompt_cache_ratio:0.45860042735042733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 -DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.11081147193908691 s -INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.11329960823059082 s -DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=272905921339039769932231050802913662488, time:1750767961.573702s req_ids:[8] -DEBUG 06-24 20:26:01 [manager.py:391] -ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:201.37739181518555ms total_cost_time:201.42078399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11233 prompt_cache_len:5151 prompt_cache_ratio:0.4585596011751091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 -DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10803723335266113 s -INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.11013126373291016 s -DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=114638664198897981987831844213670532265, time:1750767961.7821412s req_ids:[8] -DEBUG 06-24 20:26:01 [manager.py:391] -ERROR 06-24 20:26:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:205.20544052124023ms total_cost_time:205.26385307312012ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:11234 prompt_cache_len:5151 prompt_cache_ratio:0.45851878226811466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 -DEBUG 06-24 20:26:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:01 [manager.py:224] router recive req id 8 cost time 0.10880136489868164 s -INFO 06-24 20:26:01 [manager.py:68] detokenization recv req id 8 cost time 0.11089563369750977 s -DEBUG 06-24 20:26:01 [manager.py:391] Prefill Batch: batch_id=12365548228051085034294240206831260674, time:1750767961.9940774s req_ids:[8] -DEBUG 06-24 20:26:01 [manager.py:391] -ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:01 lightllm_req_id:8 first_token_cost:370.73755264282227ms total_cost_time:370.78070640563965ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11235 prompt_cache_len:5151 prompt_cache_ratio:0.45847797062750334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 -DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:02 [manager.py:224] router recive req id 8 cost time 0.10906291007995605 s -INFO 06-24 20:26:02 [manager.py:68] detokenization recv req id 8 cost time 0.1109774112701416 s -DEBUG 06-24 20:26:02 [manager.py:391] Prefill Batch: batch_id=148676860203763872233086019394765005131, time:1750767962.370223s req_ids:[8] -DEBUG 06-24 20:26:02 [manager.py:391] -ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:212.47076988220215ms total_cost_time:212.51463890075684ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11236 prompt_cache_len:5151 prompt_cache_ratio:0.458437166251335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 -DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:02 [manager.py:224] router recive req id 8 cost time 0.10925436019897461 s -INFO 06-24 20:26:02 [manager.py:68] detokenization recv req id 8 cost time 0.11114811897277832 s -DEBUG 06-24 20:26:02 [manager.py:391] Prefill Batch: batch_id=221046936391536458802155584223302532467, time:1750767962.595492s req_ids:[8] -DEBUG 06-24 20:26:02 [manager.py:391] -ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:215.87252616882324ms total_cost_time:215.91567993164062ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11237 prompt_cache_len:5151 prompt_cache_ratio:0.4583963691376702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 -DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:02 [manager.py:224] router recive req id 8 cost time 0.10960984230041504 s -INFO 06-24 20:26:02 [manager.py:68] detokenization recv req id 8 cost time 0.11165714263916016 s -DEBUG 06-24 20:26:02 [manager.py:391] Prefill Batch: batch_id=242569650793657085624521258133671531599, time:1750767962.8130102s req_ids:[8] -DEBUG 06-24 20:26:02 [manager.py:391] -ERROR 06-24 20:26:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:212.3270034790039ms total_cost_time:212.3711109161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11238 prompt_cache_len:5151 prompt_cache_ratio:0.4583555792845702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 -DEBUG 06-24 20:26:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.10921549797058105 s -INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.11115503311157227 s -DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=308993720512887489016386776826550996017, time:1750767963.0311773s req_ids:[8] -DEBUG 06-24 20:26:03 [manager.py:391] -ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:02 lightllm_req_id:8 first_token_cost:203.59396934509277ms total_cost_time:203.64022254943848ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11239 prompt_cache_len:5151 prompt_cache_ratio:0.458314796690097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 -DEBUG 06-24 20:26:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.10904431343078613 s -INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.1111452579498291 s -DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=99044722305033676493800813008332797978, time:1750767963.2415202s req_ids:[8] -DEBUG 06-24 20:26:03 [manager.py:391] -ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:211.12871170043945ms total_cost_time:211.17281913757324ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11240 prompt_cache_len:5151 prompt_cache_ratio:0.45827402135231315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 -DEBUG 06-24 20:26:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.10951042175292969 s -INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.11151885986328125 s -DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=55375619301397570329370247801168319216, time:1750767963.457796s req_ids:[8] -DEBUG 06-24 20:26:03 [manager.py:391] -ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:26:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 47032.278 tokens/s -DEBUG 06-24 20:26:03 [stats.py:37] Avg prompt tokens throughput: 47023.796 tokens/s -DEBUG 06-24 20:26:03 [stats.py:37] Avg generate tokens throughput: 8.482 tokens/s -INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:200.67381858825684ms total_cost_time:200.71649551391602ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11241 prompt_cache_len:5151 prompt_cache_ratio:0.4582332532692821 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 -DEBUG 06-24 20:26:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:03 [manager.py:224] router recive req id 8 cost time 0.3119161128997803 s -INFO 06-24 20:26:03 [manager.py:68] detokenization recv req id 8 cost time 0.31391477584838867 s -DEBUG 06-24 20:26:03 [manager.py:391] Prefill Batch: batch_id=153901134923433956679032741580141758220, time:1750767963.8687677s req_ids:[8] -DEBUG 06-24 20:26:03 [manager.py:391] -ERROR 06-24 20:26:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:415.91429710388184ms total_cost_time:415.9576892852783ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11242 prompt_cache_len:5151 prompt_cache_ratio:0.4581924924390678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 -DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10954427719116211 s -INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11155509948730469 s -DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=291187297143050862178532218198044812676, time:1750767964.0920463s req_ids:[8] -DEBUG 06-24 20:26:04 [manager.py:391] -ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:03 lightllm_req_id:8 first_token_cost:211.93981170654297ms total_cost_time:211.98534965515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11243 prompt_cache_len:5151 prompt_cache_ratio:0.4581517388597349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 -DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10998868942260742 s -INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11206722259521484 s -DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=270423514890344864862399757824728430659, time:1750767964.3119063s req_ids:[8] -DEBUG 06-24 20:26:04 [manager.py:391] -ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:212.27216720581055ms total_cost_time:212.31627464294434ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11244 prompt_cache_len:5151 prompt_cache_ratio:0.458110992529349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 -DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10653829574584961 s -INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.10851645469665527 s -DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=195776544748432086561086287990080593810, time:1750767964.5288057s req_ids:[8] -DEBUG 06-24 20:26:04 [manager.py:391] -ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:210.56723594665527ms total_cost_time:210.5886936187744ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:11245 prompt_cache_len:5151 prompt_cache_ratio:0.458070253445976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 -DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.1096639633178711 s -INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11151504516601562 s -DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=219274744085130042291280004643340980841, time:1750767964.7509363s req_ids:[8] -DEBUG 06-24 20:26:04 [manager.py:391] -ERROR 06-24 20:26:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:208.73188972473145ms total_cost_time:208.77504348754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11246 prompt_cache_len:5151 prompt_cache_ratio:0.45802952160768273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 -DEBUG 06-24 20:26:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:04 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s -INFO 06-24 20:26:04 [manager.py:68] detokenization recv req id 8 cost time 0.11030268669128418 s -DEBUG 06-24 20:26:04 [manager.py:391] Prefill Batch: batch_id=335649374393550114723614249435161696338, time:1750767964.9576435s req_ids:[8] -DEBUG 06-24 20:26:04 [manager.py:391] -ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:04 lightllm_req_id:8 first_token_cost:209.7790241241455ms total_cost_time:209.8255157470703ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11247 prompt_cache_len:5151 prompt_cache_ratio:0.4579887970125367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 -DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10883975028991699 s -INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.11080694198608398 s -DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=193489682365858112513942442325673003307, time:1750767965.1785867s req_ids:[8] -DEBUG 06-24 20:26:05 [manager.py:391] -ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:377.579927444458ms total_cost_time:377.6247501373291ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11248 prompt_cache_len:5151 prompt_cache_ratio:0.45794807965860596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 -DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10851073265075684 s -INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.10966610908508301 s -DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=147072921569340290482623928426686444997, time:1750767965.5567265s req_ids:[8] -DEBUG 06-24 20:26:05 [manager.py:391] -ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:206.50196075439453ms total_cost_time:206.54559135437012ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11249 prompt_cache_len:5151 prompt_cache_ratio:0.45790736954395944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 -DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10911059379577637 s -INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021947860717773 s -DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=338437693473026976019893062755860321127, time:1750767965.7717526s req_ids:[8] -DEBUG 06-24 20:26:05 [manager.py:391] -ERROR 06-24 20:26:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:207.83662796020508ms total_cost_time:207.88025856018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11250 prompt_cache_len:5151 prompt_cache_ratio:0.45786666666666664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 -DEBUG 06-24 20:26:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:05 [manager.py:224] router recive req id 8 cost time 0.10976719856262207 s -INFO 06-24 20:26:05 [manager.py:68] detokenization recv req id 8 cost time 0.11104297637939453 s -DEBUG 06-24 20:26:05 [manager.py:391] Prefill Batch: batch_id=160647839520372619481269839033520612532, time:1750767965.9868262s req_ids:[8] -DEBUG 06-24 20:26:05 [manager.py:391] -ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:05 lightllm_req_id:8 first_token_cost:205.50203323364258ms total_cost_time:205.54685592651367ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11251 prompt_cache_len:5151 prompt_cache_ratio:0.4578259710247978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 -DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10916495323181152 s -INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.11030387878417969 s -DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=126556329494024880215464756933356050355, time:1750767966.198195s req_ids:[8] -DEBUG 06-24 20:26:06 [manager.py:391] -ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:219.54798698425293ms total_cost_time:219.59209442138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11252 prompt_cache_len:5151 prompt_cache_ratio:0.45778528261642376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 -DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10930585861206055 s -INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.11048054695129395 s -DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=62187457971661103540520164136833608530, time:1750767966.4298642s req_ids:[8] -DEBUG 06-24 20:26:06 [manager.py:391] -ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:215.7280445098877ms total_cost_time:215.7738208770752ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11253 prompt_cache_len:5151 prompt_cache_ratio:0.4577446014396161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 -DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10944795608520508 s -INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.11074185371398926 s -DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=131838611028285146385984241394885899351, time:1750767966.64681s req_ids:[8] -DEBUG 06-24 20:26:06 [manager.py:391] -ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:208.62460136413574ms total_cost_time:208.66823196411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11254 prompt_cache_len:5151 prompt_cache_ratio:0.45770392749244715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 -DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:06 [manager.py:224] router recive req id 8 cost time 0.10847711563110352 s -INFO 06-24 20:26:06 [manager.py:68] detokenization recv req id 8 cost time 0.10963821411132812 s -DEBUG 06-24 20:26:06 [manager.py:391] Prefill Batch: batch_id=241857908542651133902165598773542387523, time:1750767966.8622508s req_ids:[8] -DEBUG 06-24 20:26:06 [manager.py:391] -ERROR 06-24 20:26:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:206.4988613128662ms total_cost_time:206.5443992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11255 prompt_cache_len:5151 prompt_cache_ratio:0.4576632607729898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 -DEBUG 06-24 20:26:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.10992908477783203 s -INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.11111664772033691 s -DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=138051315716398893646675880817545965873, time:1750767967.0732079s req_ids:[8] -DEBUG 06-24 20:26:07 [manager.py:391] -ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:06 lightllm_req_id:8 first_token_cost:202.2709846496582ms total_cost_time:202.3155689239502ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11256 prompt_cache_len:5151 prompt_cache_ratio:0.4576226012793177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 -DEBUG 06-24 20:26:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.10862278938293457 s -INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.10985612869262695 s -DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=1494732066993444599016582861791172614, time:1750767967.2836668s req_ids:[8] -DEBUG 06-24 20:26:07 [manager.py:391] -ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:379.406213760376ms total_cost_time:379.4515132904053ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11257 prompt_cache_len:5151 prompt_cache_ratio:0.4575819490095052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 -DEBUG 06-24 20:26:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.10995006561279297 s -INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.11110949516296387 s -DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=203125469333152460123102920036630721850, time:1750767967.668061s req_ids:[8] -DEBUG 06-24 20:26:07 [manager.py:391] -ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:213.69552612304688ms total_cost_time:213.73915672302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11258 prompt_cache_len:5151 prompt_cache_ratio:0.4575413039616273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 -DEBUG 06-24 20:26:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:07 [manager.py:224] router recive req id 8 cost time 0.1096796989440918 s -INFO 06-24 20:26:07 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s -DEBUG 06-24 20:26:07 [manager.py:391] Prefill Batch: batch_id=217966987924815486190498413785445177438, time:1750767967.8890185s req_ids:[8] -DEBUG 06-24 20:26:07 [manager.py:391] -ERROR 06-24 20:26:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:204.84375953674316ms total_cost_time:204.88691329956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11259 prompt_cache_len:5151 prompt_cache_ratio:0.45750066613375967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 -DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10817575454711914 s -INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.1092677116394043 s -DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=308127273991097065733287733968035147704, time:1750767968.100468s req_ids:[8] -DEBUG 06-24 20:26:08 [manager.py:391] -DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:07 lightllm_req_id:8 first_token_cost:204.5130729675293ms total_cost_time:204.55574989318848ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11260 prompt_cache_len:5151 prompt_cache_ratio:0.4574600355239787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 -DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10820341110229492 s -INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.10946798324584961 s -DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=258179461002424836209556900699704093445, time:1750767968.312114s req_ids:[8] -DEBUG 06-24 20:26:08 [manager.py:391] -ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:205.31797409057617ms total_cost_time:205.36208152770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11261 prompt_cache_len:5151 prompt_cache_ratio:0.4574194121303614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 -DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10839557647705078 s -INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.10967707633972168 s -DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=63949427930367751873803710878523381977, time:1750767968.5304134s req_ids:[8] -DEBUG 06-24 20:26:08 [manager.py:391] -ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:219.02799606323242ms total_cost_time:219.0711498260498ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11262 prompt_cache_len:5151 prompt_cache_ratio:0.45737879595098563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 -DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10937762260437012 s -INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.11055612564086914 s -DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=56850275852242140050308302253577113672, time:1750767968.7629683s req_ids:[8] -DEBUG 06-24 20:26:08 [manager.py:391] -ERROR 06-24 20:26:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:225.83484649658203ms total_cost_time:225.87919235229492ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11263 prompt_cache_len:5151 prompt_cache_ratio:0.4573381869839297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 -DEBUG 06-24 20:26:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:08 [manager.py:224] router recive req id 8 cost time 0.10933399200439453 s -INFO 06-24 20:26:08 [manager.py:68] detokenization recv req id 8 cost time 0.11041665077209473 s -DEBUG 06-24 20:26:08 [manager.py:391] Prefill Batch: batch_id=34105963039754750066749136282018883461, time:1750767968.9821277s req_ids:[8] -DEBUG 06-24 20:26:08 [manager.py:391] -ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:08 lightllm_req_id:8 first_token_cost:377.7334690093994ms total_cost_time:377.7790069580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11264 prompt_cache_len:5151 prompt_cache_ratio:0.4572975852272727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 -DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:09 [manager.py:224] router recive req id 8 cost time 0.10851573944091797 s -INFO 06-24 20:26:09 [manager.py:68] detokenization recv req id 8 cost time 0.10964155197143555 s -DEBUG 06-24 20:26:09 [manager.py:391] Prefill Batch: batch_id=19095889817465579136343533300142380175, time:1750767969.3661919s req_ids:[8] -DEBUG 06-24 20:26:09 [manager.py:391] -ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:209.69605445861816ms total_cost_time:209.73825454711914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11265 prompt_cache_len:5151 prompt_cache_ratio:0.45725699067909453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 -DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:09 [manager.py:224] router recive req id 8 cost time 0.10933375358581543 s -INFO 06-24 20:26:09 [manager.py:68] detokenization recv req id 8 cost time 0.11044454574584961 s -DEBUG 06-24 20:26:09 [manager.py:391] Prefill Batch: batch_id=79274668974497416484897064565463450212, time:1750767969.5841417s req_ids:[8] -DEBUG 06-24 20:26:09 [manager.py:391] -ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:210.82210540771484ms total_cost_time:210.86549758911133ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11266 prompt_cache_len:5151 prompt_cache_ratio:0.4572164033374756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 -DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:09 [manager.py:224] router recive req id 8 cost time 0.10828256607055664 s -INFO 06-24 20:26:09 [manager.py:68] detokenization recv req id 8 cost time 0.10935091972351074 s -DEBUG 06-24 20:26:09 [manager.py:391] Prefill Batch: batch_id=260560438635004129331078285518322478479, time:1750767969.8007002s req_ids:[8] -DEBUG 06-24 20:26:09 [manager.py:391] -ERROR 06-24 20:26:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:204.13756370544434ms total_cost_time:204.18334007263184ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11267 prompt_cache_len:5151 prompt_cache_ratio:0.45717582320049704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 -DEBUG 06-24 20:26:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10800862312316895 s -INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.10894465446472168 s -DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=251752899469460670908613343792427179307, time:1750767970.0219915s req_ids:[8] -DEBUG 06-24 20:26:10 [manager.py:391] -ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:09 lightllm_req_id:8 first_token_cost:180.8645725250244ms total_cost_time:180.91845512390137ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:11268 prompt_cache_len:5151 prompt_cache_ratio:0.4571352502662407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 -DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10878109931945801 s -INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.10980987548828125 s -DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=209597318491448379436580926226140439772, time:1750767970.1973817s req_ids:[8] -DEBUG 06-24 20:26:10 [manager.py:391] -ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:198.90213012695312ms total_cost_time:198.9455223083496ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11269 prompt_cache_len:5151 prompt_cache_ratio:0.45709468453278906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 -DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10851001739501953 s -INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.10958385467529297 s -DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=161346480565352301828588657394768184066, time:1750767970.400968s req_ids:[8] -DEBUG 06-24 20:26:10 [manager.py:391] -ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:201.36570930480957ms total_cost_time:201.41148567199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11270 prompt_cache_len:5151 prompt_cache_ratio:0.45705412599822537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 -DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.10963797569274902 s -INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.11083054542541504 s -DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=331527834065189892939502591772632032253, time:1750767970.6090841s req_ids:[8] -DEBUG 06-24 20:26:10 [manager.py:391] -ERROR 06-24 20:26:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:372.2808361053467ms total_cost_time:372.32398986816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11271 prompt_cache_len:5151 prompt_cache_ratio:0.45701357466063347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 -DEBUG 06-24 20:26:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:10 [manager.py:224] router recive req id 8 cost time 0.1088407039642334 s -INFO 06-24 20:26:10 [manager.py:68] detokenization recv req id 8 cost time 0.11051726341247559 s -DEBUG 06-24 20:26:10 [manager.py:391] Prefill Batch: batch_id=276671189589423619105974982118657083169, time:1750767970.9888742s req_ids:[8] -DEBUG 06-24 20:26:10 [manager.py:391] -ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:10 lightllm_req_id:8 first_token_cost:207.34477043151855ms total_cost_time:207.3655128479004ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:11272 prompt_cache_len:5151 prompt_cache_ratio:0.45697303051809796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 -DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10885500907897949 s -INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.11050128936767578 s -DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=144061461213553357965332449036236461709, time:1750767971.203853s req_ids:[8] -DEBUG 06-24 20:26:11 [manager.py:391] -ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:210.81066131591797ms total_cost_time:210.85739135742188ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11273 prompt_cache_len:5151 prompt_cache_ratio:0.45693249356870397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 -DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10847926139831543 s -INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.11026525497436523 s -DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=163339919744459332966536289086932056875, time:1750767971.420313s req_ids:[8] -DEBUG 06-24 20:26:11 [manager.py:391] -ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:210.0660800933838ms total_cost_time:210.1123332977295ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11274 prompt_cache_len:5151 prompt_cache_ratio:0.45689196381053754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 -DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10905981063842773 s -INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.1107020378112793 s -DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=185837218332341883511079298510321949953, time:1750767971.649145s req_ids:[8] -DEBUG 06-24 20:26:11 [manager.py:391] -ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:231.27174377441406ms total_cost_time:231.31632804870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11275 prompt_cache_len:5151 prompt_cache_ratio:0.45685144124168514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 -DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:11 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:11 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:11 [manager.py:224] router recive req id 8 cost time 0.10891318321228027 s -INFO 06-24 20:26:11 [manager.py:68] detokenization recv req id 8 cost time 0.11055874824523926 s -DEBUG 06-24 20:26:11 [manager.py:391] Prefill Batch: batch_id=86824818261893391895231446447607193171, time:1750767971.877605s req_ids:[8] -DEBUG 06-24 20:26:11 [manager.py:391] -ERROR 06-24 20:26:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:206.62379264831543ms total_cost_time:206.66837692260742ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11276 prompt_cache_len:5151 prompt_cache_ratio:0.45681092586023414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 -DEBUG 06-24 20:26:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:11 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.10988378524780273 s -INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.11172008514404297 s -DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=308306770146295562239145635502111898243, time:1750767972.0860767s req_ids:[8] -DEBUG 06-24 20:26:12 [manager.py:391] -ERROR 06-24 20:26:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:11 lightllm_req_id:8 first_token_cost:207.23247528076172ms total_cost_time:207.2761058807373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11277 prompt_cache_len:5151 prompt_cache_ratio:0.4567704176642724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 -DEBUG 06-24 20:26:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.30823588371276855 s -INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.3101463317871094 s -DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=125344495659288145192733369506081803725, time:1750767972.5095296s req_ids:[8] -DEBUG 06-24 20:26:12 [manager.py:391] -ERROR 06-24 20:26:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 first_token_cost:422.97816276550293ms total_cost_time:423.0232238769531ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11278 prompt_cache_len:5151 prompt_cache_ratio:0.4567299166518886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 -DEBUG 06-24 20:26:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.10869956016540527 s -INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.11038422584533691 s -DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=160738129914993255726466849061252261033, time:1750767972.731867s req_ids:[8] -DEBUG 06-24 20:26:12 [manager.py:391] -ERROR 06-24 20:26:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 first_token_cost:216.24112129211426ms total_cost_time:216.28642082214355ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11279 prompt_cache_len:5151 prompt_cache_ratio:0.4566894228211721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 -DEBUG 06-24 20:26:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:12 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:12 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:12 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:12 [manager.py:224] router recive req id 8 cost time 0.10871553421020508 s -INFO 06-24 20:26:12 [manager.py:68] detokenization recv req id 8 cost time 0.1105961799621582 s -DEBUG 06-24 20:26:12 [manager.py:391] Prefill Batch: batch_id=185304419832789553591628579434099127523, time:1750767972.9518573s req_ids:[8] -DEBUG 06-24 20:26:12 [manager.py:391] -ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:12 lightllm_req_id:8 first_token_cost:207.3228359222412ms total_cost_time:207.3667049407959ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11280 prompt_cache_len:5151 prompt_cache_ratio:0.4566489361702128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 -DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10724878311157227 s -INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.10888242721557617 s -DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=186264232746282001577870847187809657723, time:1750767973.1643958s req_ids:[8] -DEBUG 06-24 20:26:13 [manager.py:391] -ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:202.2378444671631ms total_cost_time:202.29077339172363ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:11281 prompt_cache_len:5151 prompt_cache_ratio:0.4566084566971013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 -DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10848832130432129 s -INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11010956764221191 s -DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=322860355188726963150795917259081146176, time:1750767973.3744392s req_ids:[8] -DEBUG 06-24 20:26:13 [manager.py:391] -ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:206.8345546722412ms total_cost_time:206.8803310394287ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11282 prompt_cache_len:5151 prompt_cache_ratio:0.45656798439992907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 -DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10892033576965332 s -INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11047625541687012 s -DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=118125867046129939159039507677707706969, time:1750767973.5999134s req_ids:[8] -DEBUG 06-24 20:26:13 [manager.py:391] -DEBUG 06-24 20:26:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 47047.488 tokens/s -DEBUG 06-24 20:26:13 [stats.py:37] Avg prompt tokens throughput: 47039.234 tokens/s -DEBUG 06-24 20:26:13 [stats.py:37] Avg generate tokens throughput: 8.254 tokens/s -ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:181.84471130371094ms total_cost_time:181.88881874084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11283 prompt_cache_len:5151 prompt_cache_ratio:0.4565275192767881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 -DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s -INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11078047752380371 s -DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=112873597615610054254562740479559641027, time:1750767973.7802098s req_ids:[8] -DEBUG 06-24 20:26:13 [manager.py:391] -ERROR 06-24 20:26:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:206.39824867248535ms total_cost_time:206.44426345825195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11284 prompt_cache_len:5151 prompt_cache_ratio:0.456487061325771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 -DEBUG 06-24 20:26:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:13 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:13 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:13 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:13 [manager.py:224] router recive req id 8 cost time 0.1096494197845459 s -INFO 06-24 20:26:13 [manager.py:68] detokenization recv req id 8 cost time 0.11161208152770996 s -DEBUG 06-24 20:26:13 [manager.py:391] Prefill Batch: batch_id=205180190037667318600096996415815111178, time:1750767973.9948084s req_ids:[8] -DEBUG 06-24 20:26:13 [manager.py:391] -ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:13 lightllm_req_id:8 first_token_cost:215.37494659423828ms total_cost_time:215.41905403137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11285 prompt_cache_len:5151 prompt_cache_ratio:0.4564466105449712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 -DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:14 [manager.py:224] router recive req id 8 cost time 0.3105800151824951 s -INFO 06-24 20:26:14 [manager.py:68] detokenization recv req id 8 cost time 0.3125650882720947 s -DEBUG 06-24 20:26:14 [manager.py:391] Prefill Batch: batch_id=137810114838088679488165386796430432505, time:1750767974.4229722s req_ids:[8] -DEBUG 06-24 20:26:14 [manager.py:391] -ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:427.6111125946045ms total_cost_time:427.6549816131592ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11286 prompt_cache_len:5151 prompt_cache_ratio:0.4564061669324827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 -DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:14 [manager.py:224] router recive req id 8 cost time 0.10907411575317383 s -INFO 06-24 20:26:14 [manager.py:68] detokenization recv req id 8 cost time 0.11088013648986816 s -DEBUG 06-24 20:26:14 [manager.py:391] Prefill Batch: batch_id=297796101534190454609186611644242683749, time:1750767974.6524305s req_ids:[8] -DEBUG 06-24 20:26:14 [manager.py:391] -ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:212.158203125ms total_cost_time:212.2037410736084ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11287 prompt_cache_len:5151 prompt_cache_ratio:0.4563657304864003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 -DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:14 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:14 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:14 [manager.py:224] router recive req id 8 cost time 0.10953116416931152 s -INFO 06-24 20:26:14 [manager.py:68] detokenization recv req id 8 cost time 0.11121964454650879 s -DEBUG 06-24 20:26:14 [manager.py:391] Prefill Batch: batch_id=72693179056495963781051038105227078731, time:1750767974.8650627s req_ids:[8] -DEBUG 06-24 20:26:14 [manager.py:391] -ERROR 06-24 20:26:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:204.4229507446289ms total_cost_time:204.46467399597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11288 prompt_cache_len:5151 prompt_cache_ratio:0.4563253012048193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 -DEBUG 06-24 20:26:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:14 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10900068283081055 s -INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.11018228530883789 s -DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=229281614202594749884833054060028505629, time:1750767975.0783257s req_ids:[8] -DEBUG 06-24 20:26:15 [manager.py:391] -ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:14 lightllm_req_id:8 first_token_cost:208.88590812683105ms total_cost_time:208.93073081970215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11289 prompt_cache_len:5151 prompt_cache_ratio:0.45628487908583576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 -DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10843944549560547 s -INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.10967159271240234 s -DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=7252026405452961440475041215343050690, time:1750767975.2889245s req_ids:[8] -DEBUG 06-24 20:26:15 [manager.py:391] -ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:205.62291145324707ms total_cost_time:205.66630363464355ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11290 prompt_cache_len:5151 prompt_cache_ratio:0.4562444641275465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 -DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10802292823791504 s -INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.10920166969299316 s -DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=292121671372678296948743167208940814162, time:1750767975.501563s req_ids:[8] -DEBUG 06-24 20:26:15 [manager.py:391] -ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:215.78693389892578ms total_cost_time:215.83080291748047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11291 prompt_cache_len:5151 prompt_cache_ratio:0.4562040563280489 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 -DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s -INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.1106269359588623 s -DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=167126543321187815025510790059819837797, time:1750767975.7346318s req_ids:[8] -DEBUG 06-24 20:26:15 [manager.py:391] -ERROR 06-24 20:26:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:217.84520149230957ms total_cost_time:217.89002418518066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11292 prompt_cache_len:5151 prompt_cache_ratio:0.45616365568544104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 -DEBUG 06-24 20:26:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:15 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:15 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:15 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:15 [manager.py:224] router recive req id 8 cost time 0.10907411575317383 s -INFO 06-24 20:26:15 [manager.py:68] detokenization recv req id 8 cost time 0.1101067066192627 s -DEBUG 06-24 20:26:15 [manager.py:391] Prefill Batch: batch_id=286776391979479844886627327610146560863, time:1750767975.9701903s req_ids:[8] -DEBUG 06-24 20:26:15 [manager.py:391] -ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:15 lightllm_req_id:8 first_token_cost:402.7695655822754ms total_cost_time:402.8127193450928ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11293 prompt_cache_len:5151 prompt_cache_ratio:0.4561232621978217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 -DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:16 [manager.py:224] router recive req id 8 cost time 0.10854601860046387 s -INFO 06-24 20:26:16 [manager.py:68] detokenization recv req id 8 cost time 0.10978102684020996 s -DEBUG 06-24 20:26:16 [manager.py:391] Prefill Batch: batch_id=252107103326517643006759469335937164295, time:1750767976.358605s req_ids:[8] -DEBUG 06-24 20:26:16 [manager.py:391] -ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:217.11373329162598ms total_cost_time:217.15593338012695ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11294 prompt_cache_len:5151 prompt_cache_ratio:0.4560828758632902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 -DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:16 [manager.py:224] router recive req id 8 cost time 0.10895252227783203 s -INFO 06-24 20:26:16 [manager.py:68] detokenization recv req id 8 cost time 0.11017966270446777 s -DEBUG 06-24 20:26:16 [manager.py:391] Prefill Batch: batch_id=170531223112125849082843449353061117680, time:1750767976.5894241s req_ids:[8] -DEBUG 06-24 20:26:16 [manager.py:391] -ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:214.44225311279297ms total_cost_time:214.48445320129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11295 prompt_cache_len:5151 prompt_cache_ratio:0.4560424966799469 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 -DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:16 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:16 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:16 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:16 [manager.py:224] router recive req id 8 cost time 0.108917236328125 s -INFO 06-24 20:26:16 [manager.py:68] detokenization recv req id 8 cost time 0.11008596420288086 s -DEBUG 06-24 20:26:16 [manager.py:391] Prefill Batch: batch_id=314118861202967707367364989739656188126, time:1750767976.802385s req_ids:[8] -DEBUG 06-24 20:26:16 [manager.py:391] -ERROR 06-24 20:26:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:207.01932907104492ms total_cost_time:207.0610523223877ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11296 prompt_cache_len:5151 prompt_cache_ratio:0.45600212464589235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 -DEBUG 06-24 20:26:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:16 [batch.py:51] router release req id 8 -INFO 06-24 20:26:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.10770916938781738 s -INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.10885000228881836 s -DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=178327848448299990497374716595544087587, time:1750767977.0161476s req_ids:[8] -DEBUG 06-24 20:26:17 [manager.py:391] -DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -INFO 06-24 20:26:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:16 lightllm_req_id:8 first_token_cost:203.66978645324707ms total_cost_time:203.71317863464355ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11297 prompt_cache_len:5151 prompt_cache_ratio:0.4559617597592281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 -DEBUG 06-24 20:26:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.10938239097595215 s -INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.11040425300598145 s -DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=43316320791307593159388745178545435379, time:1750767977.2269416s req_ids:[8] -DEBUG 06-24 20:26:17 [manager.py:391] -ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:208.4674835205078ms total_cost_time:208.5113525390625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11298 prompt_cache_len:5151 prompt_cache_ratio:0.4559214020180563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 -DEBUG 06-24 20:26:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.10912966728210449 s -INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.11030125617980957 s -DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=127887462431862415738984607234380205949, time:1750767977.4425018s req_ids:[8] -DEBUG 06-24 20:26:17 [manager.py:391] -ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:202.9898166656494ms total_cost_time:203.0341625213623ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11299 prompt_cache_len:5151 prompt_cache_ratio:0.4558810514204797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 -DEBUG 06-24 20:26:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:17 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:17 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:17 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:17 [manager.py:224] router recive req id 8 cost time 0.31160950660705566 s -INFO 06-24 20:26:17 [manager.py:68] detokenization recv req id 8 cost time 0.3129100799560547 s -DEBUG 06-24 20:26:17 [manager.py:391] Prefill Batch: batch_id=228611041444497815714782792390180076051, time:1750767977.865634s req_ids:[8] -DEBUG 06-24 20:26:17 [manager.py:391] -ERROR 06-24 20:26:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:431.5640926361084ms total_cost_time:431.6103458404541ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11300 prompt_cache_len:5151 prompt_cache_ratio:0.45584070796460174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 -DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.1097419261932373 s -INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.11102819442749023 s -DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=324781071075089926809079762976872438874, time:1750767978.0904195s req_ids:[8] -DEBUG 06-24 20:26:18 [manager.py:391] -ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:17 lightllm_req_id:8 first_token_cost:227.88715362548828ms total_cost_time:227.93269157409668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11301 prompt_cache_len:5151 prompt_cache_ratio:0.45580037164852666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 -DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10910916328430176 s -INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.11031055450439453 s -DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=217889621557458946267952590344456185602, time:1750767978.3285296s req_ids:[8] -DEBUG 06-24 20:26:18 [manager.py:391] -ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:219.17200088500977ms total_cost_time:219.21491622924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11302 prompt_cache_len:5151 prompt_cache_ratio:0.45576004247035923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 -DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10755014419555664 s -INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.10865950584411621 s -DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=14910185193590493480396849653693461633, time:1750767978.571229s req_ids:[8] -DEBUG 06-24 20:26:18 [manager.py:391] -DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:188.71283531188965ms total_cost_time:188.75551223754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11303 prompt_cache_len:5151 prompt_cache_ratio:0.4557197204282049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 -DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10828781127929688 s -INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.10927796363830566 s -DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=79820120496242415191842462846185701598, time:1750767978.7425067s req_ids:[8] -DEBUG 06-24 20:26:18 [manager.py:391] -ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:166.65172576904297ms total_cost_time:166.69321060180664ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11304 prompt_cache_len:5151 prompt_cache_ratio:0.45567940552016983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 -DEBUG 06-24 20:26:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:18 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:18 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:18 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:18 [manager.py:224] router recive req id 8 cost time 0.10799193382263184 s -INFO 06-24 20:26:18 [manager.py:68] detokenization recv req id 8 cost time 0.109039306640625 s -DEBUG 06-24 20:26:18 [manager.py:391] Prefill Batch: batch_id=179704393806689913170615001582257983838, time:1750767978.916534s req_ids:[8] -DEBUG 06-24 20:26:18 [manager.py:391] -ERROR 06-24 20:26:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:167.58203506469727ms total_cost_time:167.62447357177734ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11305 prompt_cache_len:5151 prompt_cache_ratio:0.4556390977443609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 -DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.10766983032226562 s -INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.10878896713256836 s -DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=241508012542157698679710058487985212370, time:1750767979.0940008s req_ids:[8] -DEBUG 06-24 20:26:19 [manager.py:391] -ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:18 lightllm_req_id:8 first_token_cost:371.7687129974365ms total_cost_time:371.8135356903076ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11306 prompt_cache_len:5151 prompt_cache_ratio:0.45559879709888557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 -DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.1110987663269043 s -INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.1122431755065918 s -DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=214429699826335761947874168521675479824, time:1750767979.467753s req_ids:[8] -DEBUG 06-24 20:26:19 [manager.py:391] -ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 first_token_cost:219.11954879760742ms total_cost_time:219.1624641418457ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11307 prompt_cache_len:5151 prompt_cache_ratio:0.45555850358185196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 -DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.10937237739562988 s -INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.11053276062011719 s -DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=91671198261330310031839677121591964614, time:1750767979.7045693s req_ids:[8] -DEBUG 06-24 20:26:19 [manager.py:391] -ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 first_token_cost:215.30914306640625ms total_cost_time:215.35205841064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11308 prompt_cache_len:5151 prompt_cache_ratio:0.4555182171913689 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 -DEBUG 06-24 20:26:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:19 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:19 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:19 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:19 [manager.py:224] router recive req id 8 cost time 0.10691666603088379 s -INFO 06-24 20:26:19 [manager.py:68] detokenization recv req id 8 cost time 0.10784244537353516 s -DEBUG 06-24 20:26:19 [manager.py:391] Prefill Batch: batch_id=251071743910972663839529809746892314462, time:1750767979.9208558s req_ids:[8] -DEBUG 06-24 20:26:19 [manager.py:391] -ERROR 06-24 20:26:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:19 lightllm_req_id:8 first_token_cost:196.05255126953125ms total_cost_time:196.09713554382324ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11309 prompt_cache_len:5151 prompt_cache_ratio:0.45547793792554603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 -DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10841155052185059 s -INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.11031150817871094 s -DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=40930484799322893938813653789957674048, time:1750767980.11918s req_ids:[8] -DEBUG 06-24 20:26:20 [manager.py:391] -ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:202.41308212280273ms total_cost_time:202.45718955993652ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11310 prompt_cache_len:5151 prompt_cache_ratio:0.4554376657824934 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 -DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10751128196716309 s -INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.1099696159362793 s -DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=161011448909021956858177146153711865943, time:1750767980.3309908s req_ids:[8] -DEBUG 06-24 20:26:20 [manager.py:391] -ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:203.23872566223145ms total_cost_time:203.28259468078613ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11311 prompt_cache_len:5151 prompt_cache_ratio:0.4553974007603218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 -DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:20 [batch.py:51] router release req id 8 -INFO 06-24 20:26:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10833954811096191 s -INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.1103668212890625 s -DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=327643006633026172537630322673166151125, time:1750767980.5408645s req_ids:[8] -DEBUG 06-24 20:26:20 [manager.py:391] -ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:209.3367576599121ms total_cost_time:209.3803882598877ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11312 prompt_cache_len:5151 prompt_cache_ratio:0.45535714285714285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 -DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10852575302124023 s -INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.11081743240356445 s -DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=140703759801477550523870686876265536940, time:1750767980.7657156s req_ids:[8] -DEBUG 06-24 20:26:20 [manager.py:391] -ERROR 06-24 20:26:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:221.77720069885254ms total_cost_time:221.82106971740723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11313 prompt_cache_len:5151 prompt_cache_ratio:0.4553168920710687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 -DEBUG 06-24 20:26:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:20 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:20 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:20 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:20 [manager.py:224] router recive req id 8 cost time 0.10939669609069824 s -INFO 06-24 20:26:20 [manager.py:68] detokenization recv req id 8 cost time 0.11151862144470215 s -DEBUG 06-24 20:26:20 [manager.py:391] Prefill Batch: batch_id=200654988338487005953524189509481411246, time:1750767980.98896s req_ids:[8] -DEBUG 06-24 20:26:20 [manager.py:391] -ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:20 lightllm_req_id:8 first_token_cost:214.40863609313965ms total_cost_time:214.45226669311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11314 prompt_cache_len:5151 prompt_cache_ratio:0.45527664840021215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 -DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10774922370910645 s -INFO 06-24 20:26:21 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s -DEBUG 06-24 20:26:21 [manager.py:391] Prefill Batch: batch_id=337308567436354901208803084212329339179, time:1750767981.2054055s req_ids:[8] -DEBUG 06-24 20:26:21 [manager.py:391] -ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:367.5389289855957ms total_cost_time:367.5847053527832ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11315 prompt_cache_len:5151 prompt_cache_ratio:0.4552364118426867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 -DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10843276977539062 s -INFO 06-24 20:26:21 [manager.py:68] detokenization recv req id 8 cost time 0.11056399345397949 s -DEBUG 06-24 20:26:21 [manager.py:391] Prefill Batch: batch_id=105221000426500780698965551045841528316, time:1750767981.5778193s req_ids:[8] -DEBUG 06-24 20:26:21 [manager.py:391] -ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:205.8546543121338ms total_cost_time:205.89756965637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11316 prompt_cache_len:5151 prompt_cache_ratio:0.4551961823966066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 -DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10951471328735352 s -INFO 06-24 20:26:21 [manager.py:68] detokenization recv req id 8 cost time 0.11151766777038574 s -DEBUG 06-24 20:26:21 [manager.py:391] Prefill Batch: batch_id=36465977988968600892308478293091944700, time:1750767981.791205s req_ids:[8] -DEBUG 06-24 20:26:21 [manager.py:391] -ERROR 06-24 20:26:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:206.9544792175293ms total_cost_time:206.9993019104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11317 prompt_cache_len:5151 prompt_cache_ratio:0.4551559600600866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 -DEBUG 06-24 20:26:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:21 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:21 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:21 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:21 [manager.py:224] router recive req id 8 cost time 0.10930657386779785 s -INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11141824722290039 s -DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=130269794980461272780526004639887080436, time:1750767982.0038283s req_ids:[8] -DEBUG 06-24 20:26:22 [manager.py:391] -ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:21 lightllm_req_id:8 first_token_cost:207.12590217590332ms total_cost_time:207.1690559387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11318 prompt_cache_len:5151 prompt_cache_ratio:0.4551157448312423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 -DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10969305038452148 s -INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11173176765441895 s -DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=202768348669964710432896394972755337439, time:1750767982.2158418s req_ids:[8] -DEBUG 06-24 20:26:22 [manager.py:391] -ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:206.6037654876709ms total_cost_time:206.6478729248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11319 prompt_cache_len:5151 prompt_cache_ratio:0.4550755367081898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 -DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10843372344970703 s -INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11035585403442383 s -DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=312177278267008731944846057538717953174, time:1750767982.430606s req_ids:[8] -DEBUG 06-24 20:26:22 [manager.py:391] -ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:207.98230171203613ms total_cost_time:208.02807807922363ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11320 prompt_cache_len:5151 prompt_cache_ratio:0.4550353356890459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 -DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10565781593322754 s -INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.10756993293762207 s -DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=60917728067241092268470813058031508247, time:1750767982.652749s req_ids:[8] -DEBUG 06-24 20:26:22 [manager.py:391] -ERROR 06-24 20:26:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:218.47152709960938ms total_cost_time:218.53184700012207ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:11321 prompt_cache_len:5151 prompt_cache_ratio:0.4549951417719283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 -DEBUG 06-24 20:26:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:22 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:22 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:22 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:22 [manager.py:224] router recive req id 8 cost time 0.10898494720458984 s -INFO 06-24 20:26:22 [manager.py:68] detokenization recv req id 8 cost time 0.11101698875427246 s -DEBUG 06-24 20:26:22 [manager.py:391] Prefill Batch: batch_id=82432998254571733186314519522606324271, time:1750767982.868839s req_ids:[8] -DEBUG 06-24 20:26:22 [manager.py:391] -ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:22 lightllm_req_id:8 first_token_cost:369.2936897277832ms total_cost_time:369.33135986328125ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:11322 prompt_cache_len:5151 prompt_cache_ratio:0.45495495495495497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 -DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:23 [batch.py:51] router release req id 8 -INFO 06-24 20:26:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.10802960395812988 s -INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.10998368263244629 s -DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=9695969472066297818038024760146791449, time:1750767983.2446148s req_ids:[8] -DEBUG 06-24 20:26:23 [manager.py:391] -ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:203.31573486328125ms total_cost_time:203.35984230041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11323 prompt_cache_len:5151 prompt_cache_ratio:0.4549147752362448 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 -DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.10913705825805664 s -INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.11101198196411133 s -DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=304131336952290925737796181334933443959, time:1750767983.4543283s req_ids:[8] -DEBUG 06-24 20:26:23 [manager.py:391] -ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:209.04850959777832ms total_cost_time:209.09476280212402ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11324 prompt_cache_len:5151 prompt_cache_ratio:0.4548746026139173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 -DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.1082909107208252 s -INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.11040592193603516 s -DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=205178083546841053321471601687807998437, time:1750767983.669755s req_ids:[8] -DEBUG 06-24 20:26:23 [manager.py:391] -DEBUG 06-24 20:26:23 [stats.py:37] Avg tokens(prompt+generate) throughput: 47155.767 tokens/s -DEBUG 06-24 20:26:23 [stats.py:37] Avg prompt tokens throughput: 47147.426 tokens/s -DEBUG 06-24 20:26:23 [stats.py:37] Avg generate tokens throughput: 8.341 tokens/s -ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:206.8483829498291ms total_cost_time:206.89058303833008ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11325 prompt_cache_len:5151 prompt_cache_ratio:0.4548344370860927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 -DEBUG 06-24 20:26:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:23 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:23 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:23 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:23 [manager.py:224] router recive req id 8 cost time 0.10956335067749023 s -INFO 06-24 20:26:23 [manager.py:68] detokenization recv req id 8 cost time 0.1114048957824707 s -DEBUG 06-24 20:26:23 [manager.py:391] Prefill Batch: batch_id=260175145077191704055382274588877738234, time:1750767983.8822963s req_ids:[8] -DEBUG 06-24 20:26:23 [manager.py:391] -ERROR 06-24 20:26:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:208.53018760681152ms total_cost_time:208.573579788208ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11326 prompt_cache_len:5151 prompt_cache_ratio:0.4547942786508918 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 -DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.1092689037322998 s -INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.11125850677490234 s -DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=39682747746281319206905361057939582000, time:1750767984.0996873s req_ids:[8] -DEBUG 06-24 20:26:24 [manager.py:391] -ERROR 06-24 20:26:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:23 lightllm_req_id:8 first_token_cost:210.52813529968262ms total_cost_time:210.5724811553955ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11327 prompt_cache_len:5151 prompt_cache_ratio:0.45475412730643594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 -DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.10861968994140625 s -INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.11048293113708496 s -DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=311340011915937694398643413874466027315, time:1750767984.312459s req_ids:[8] -DEBUG 06-24 20:26:24 [manager.py:391] -DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:24 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 first_token_cost:206.7086696624756ms total_cost_time:206.75230026245117ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11328 prompt_cache_len:5151 prompt_cache_ratio:0.4547139830508475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 -DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.10832738876342773 s -INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.1101679801940918 s -DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=309287973592991387652661829860412963241, time:1750767984.5274093s req_ids:[8] -DEBUG 06-24 20:26:24 [manager.py:391] -ERROR 06-24 20:26:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 first_token_cost:377.579927444458ms total_cost_time:377.6240348815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11329 prompt_cache_len:5151 prompt_cache_ratio:0.4546738458822491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 -DEBUG 06-24 20:26:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:24 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:24 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:24 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:24 [manager.py:224] router recive req id 8 cost time 0.10851454734802246 s -INFO 06-24 20:26:24 [manager.py:68] detokenization recv req id 8 cost time 0.11035871505737305 s -DEBUG 06-24 20:26:24 [manager.py:391] Prefill Batch: batch_id=27603626724685253889428863651976169081, time:1750767984.9116282s req_ids:[8] -DEBUG 06-24 20:26:24 [manager.py:391] -ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:24 lightllm_req_id:8 first_token_cost:209.5491886138916ms total_cost_time:209.59234237670898ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11330 prompt_cache_len:5151 prompt_cache_ratio:0.4546337157987643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 -DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.10896658897399902 s -INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s -DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=34178364314481396363755139553507762015, time:1750767985.1284096s req_ids:[8] -DEBUG 06-24 20:26:25 [manager.py:391] -ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:204.63204383850098ms total_cost_time:204.67615127563477ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11331 prompt_cache_len:5151 prompt_cache_ratio:0.45459359279851735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 -DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.1092534065246582 s -INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11097335815429688 s -DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=249296467447821938308198099825357915319, time:1750767985.3384955s req_ids:[8] -DEBUG 06-24 20:26:25 [manager.py:391] -ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:204.25701141357422ms total_cost_time:204.3163776397705ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:11332 prompt_cache_len:5151 prompt_cache_ratio:0.4545534768796329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 -DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.10921812057495117 s -INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11102747917175293 s -DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=78316491577682309637965559131496201583, time:1750767985.5491347s req_ids:[8] -DEBUG 06-24 20:26:25 [manager.py:391] -ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:205.62386512756348ms total_cost_time:205.66749572753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11333 prompt_cache_len:5151 prompt_cache_ratio:0.45451336804023645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 -DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.10871505737304688 s -INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11046481132507324 s -DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=122065705189992464653415244186399711763, time:1750767985.7599568s req_ids:[8] -DEBUG 06-24 20:26:25 [manager.py:391] -ERROR 06-24 20:26:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:209.92088317871094ms total_cost_time:209.96546745300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11334 prompt_cache_len:5151 prompt_cache_ratio:0.4544732662784542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 -DEBUG 06-24 20:26:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:25 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:25 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:25 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:25 [manager.py:224] router recive req id 8 cost time 0.1089777946472168 s -INFO 06-24 20:26:25 [manager.py:68] detokenization recv req id 8 cost time 0.11072158813476562 s -DEBUG 06-24 20:26:25 [manager.py:391] Prefill Batch: batch_id=143003878709639294044029488484888653732, time:1750767985.9822032s req_ids:[8] -DEBUG 06-24 20:26:25 [manager.py:391] -ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:25 lightllm_req_id:8 first_token_cost:214.02597427368164ms total_cost_time:214.0491008758545ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:11335 prompt_cache_len:5151 prompt_cache_ratio:0.45443317159241287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 -DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:26 [manager.py:224] router recive req id 8 cost time 0.31182003021240234 s -INFO 06-24 20:26:26 [manager.py:68] detokenization recv req id 8 cost time 0.31377577781677246 s -DEBUG 06-24 20:26:26 [manager.py:391] Prefill Batch: batch_id=32236844390206068601112508178367091998, time:1750767986.40213s req_ids:[8] -DEBUG 06-24 20:26:26 [manager.py:391] -ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:423.19512367248535ms total_cost_time:423.23994636535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11336 prompt_cache_len:5151 prompt_cache_ratio:0.4543930839802399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 -DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:26 [manager.py:224] router recive req id 8 cost time 0.1087794303894043 s -INFO 06-24 20:26:26 [manager.py:68] detokenization recv req id 8 cost time 0.11055564880371094 s -DEBUG 06-24 20:26:26 [manager.py:391] Prefill Batch: batch_id=169410883395063664411798780232010256983, time:1750767986.6263075s req_ids:[8] -DEBUG 06-24 20:26:26 [manager.py:391] -ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:212.70370483398438ms total_cost_time:212.74995803833008ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11337 prompt_cache_len:5151 prompt_cache_ratio:0.4543530034400635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 -DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:26 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:26 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:26 [manager.py:224] router recive req id 8 cost time 0.10803914070129395 s -INFO 06-24 20:26:26 [manager.py:68] detokenization recv req id 8 cost time 0.10982060432434082 s -DEBUG 06-24 20:26:26 [manager.py:391] Prefill Batch: batch_id=216687525975914802886077759414369259106, time:1750767986.8498383s req_ids:[8] -DEBUG 06-24 20:26:26 [manager.py:391] -ERROR 06-24 20:26:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:217.75555610656738ms total_cost_time:217.79942512512207ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11338 prompt_cache_len:5151 prompt_cache_ratio:0.4543129299700123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 -DEBUG 06-24 20:26:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:26 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10854077339172363 s -INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020398139953613 s -DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=338129568213209360859016629128822091365, time:1750767987.0666065s req_ids:[8] -DEBUG 06-24 20:26:27 [manager.py:391] -ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:26 lightllm_req_id:8 first_token_cost:209.5339298248291ms total_cost_time:209.578275680542ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11339 prompt_cache_len:5151 prompt_cache_ratio:0.4542728635682159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 -DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.1084892749786377 s -INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.1103816032409668 s -DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=171116618902568112251584663188528449727, time:1750767987.2832882s req_ids:[8] -DEBUG 06-24 20:26:27 [manager.py:391] -ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:210.7853889465332ms total_cost_time:210.82758903503418ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11340 prompt_cache_len:5151 prompt_cache_ratio:0.4542328042328042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 -DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10910153388977051 s -INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.11072969436645508 s -DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=334730239873310975360380266584526570352, time:1750767987.501249s req_ids:[8] -DEBUG 06-24 20:26:27 [manager.py:391] -ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:205.5039405822754ms total_cost_time:205.52492141723633ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11341 prompt_cache_len:5151 prompt_cache_ratio:0.4541927519619081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 -DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10885810852050781 s -INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.11057686805725098 s -DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=298943297872181446800450271937263463798, time:1750767987.7214313s req_ids:[8] -DEBUG 06-24 20:26:27 [manager.py:391] -ERROR 06-24 20:26:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:218.65200996398926ms total_cost_time:218.69564056396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11342 prompt_cache_len:5151 prompt_cache_ratio:0.454152706753659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 -DEBUG 06-24 20:26:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:27 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:27 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:27 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:27 [manager.py:224] router recive req id 8 cost time 0.10837554931640625 s -INFO 06-24 20:26:27 [manager.py:68] detokenization recv req id 8 cost time 0.1101832389831543 s -DEBUG 06-24 20:26:27 [manager.py:391] Prefill Batch: batch_id=196405579000194876979895677647670296953, time:1750767987.93744s req_ids:[8] -DEBUG 06-24 20:26:27 [manager.py:391] -ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:27 lightllm_req_id:8 first_token_cost:211.57217025756836ms total_cost_time:211.61508560180664ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11343 prompt_cache_len:5151 prompt_cache_ratio:0.4541126686061888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 -DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:28 [manager.py:224] router recive req id 8 cost time 0.311542272567749 s -INFO 06-24 20:26:28 [manager.py:68] detokenization recv req id 8 cost time 0.31353259086608887 s -DEBUG 06-24 20:26:28 [manager.py:391] Prefill Batch: batch_id=248457901287186131751066140030955032277, time:1750767988.3580234s req_ids:[8] -DEBUG 06-24 20:26:28 [manager.py:391] -ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:421.7522144317627ms total_cost_time:421.7956066131592ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11344 prompt_cache_len:5151 prompt_cache_ratio:0.45407263751763044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 -DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:28 [manager.py:224] router recive req id 8 cost time 0.10861063003540039 s -INFO 06-24 20:26:28 [manager.py:68] detokenization recv req id 8 cost time 0.11063790321350098 s -DEBUG 06-24 20:26:28 [manager.py:391] Prefill Batch: batch_id=259658653019970542523968057709153410302, time:1750767988.5809s req_ids:[8] -DEBUG 06-24 20:26:28 [manager.py:391] -ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:205.8999538421631ms total_cost_time:205.92808723449707ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:11345 prompt_cache_len:5151 prompt_cache_ratio:0.45403261348611723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 -DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:28 [manager.py:224] router recive req id 8 cost time 0.10797905921936035 s -INFO 06-24 20:26:28 [manager.py:68] detokenization recv req id 8 cost time 0.10991764068603516 s -DEBUG 06-24 20:26:28 [manager.py:391] Prefill Batch: batch_id=222440150659913816449069914640604637324, time:1750767988.7921424s req_ids:[8] -DEBUG 06-24 20:26:28 [manager.py:391] -ERROR 06-24 20:26:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:211.35330200195312ms total_cost_time:211.3964557647705ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11346 prompt_cache_len:5151 prompt_cache_ratio:0.4539925965097832 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 -DEBUG 06-24 20:26:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:28 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:28 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:28 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10892534255981445 s -INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11107707023620605 s -DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=222292560635342062546591543331922077526, time:1750767989.0079732s req_ids:[8] -DEBUG 06-24 20:26:29 [manager.py:391] -ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:28 lightllm_req_id:8 first_token_cost:209.09404754638672ms total_cost_time:209.1386318206787ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11347 prompt_cache_len:5151 prompt_cache_ratio:0.453952586586763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 -DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10895252227783203 s -INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11097502708435059 s -DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=338230677424122649872558070560957660399, time:1750767989.2222083s req_ids:[8] -DEBUG 06-24 20:26:29 [manager.py:391] -ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:207.77583122253418ms total_cost_time:207.81779289245605ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11348 prompt_cache_len:5151 prompt_cache_ratio:0.4539125837151921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 -DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10911893844604492 s -INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s -DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=4626725752643212933268018661862218008, time:1750767989.4383032s req_ids:[8] -DEBUG 06-24 20:26:29 [manager.py:391] -ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:206.8188190460205ms total_cost_time:206.862211227417ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11349 prompt_cache_len:5151 prompt_cache_ratio:0.45387258789320645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 -DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10837316513061523 s -INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.1102747917175293 s -DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=297461174187350274559017096912900356462, time:1750767989.650257s req_ids:[8] -DEBUG 06-24 20:26:29 [manager.py:391] -ERROR 06-24 20:26:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:202.7604579925537ms total_cost_time:202.8062343597412ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11350 prompt_cache_len:5151 prompt_cache_ratio:0.45383259911894275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 -DEBUG 06-24 20:26:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:29 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:29 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:29 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:29 [manager.py:224] router recive req id 8 cost time 0.10965394973754883 s -INFO 06-24 20:26:29 [manager.py:68] detokenization recv req id 8 cost time 0.11154770851135254 s -DEBUG 06-24 20:26:29 [manager.py:391] Prefill Batch: batch_id=105161746586994409708630787358915763210, time:1750767989.858172s req_ids:[8] -DEBUG 06-24 20:26:29 [manager.py:391] -ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:29 lightllm_req_id:8 first_token_cost:366.93644523620605ms total_cost_time:366.98126792907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11351 prompt_cache_len:5151 prompt_cache_ratio:0.4537926173905383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 -DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.1083536148071289 s -INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.11027407646179199 s -DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=60630480102621067251015614715213106818, time:1750767990.2315123s req_ids:[8] -DEBUG 06-24 20:26:30 [manager.py:391] -ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:206.75063133239746ms total_cost_time:206.79306983947754ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11352 prompt_cache_len:5151 prompt_cache_ratio:0.4537526427061311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 -DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.1104733943939209 s -INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.1124274730682373 s -DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=132806970684671152990470722869136624712, time:1750767990.445534s req_ids:[8] -DEBUG 06-24 20:26:30 [manager.py:391] -ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:209.16056632995605ms total_cost_time:209.20729637145996ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11353 prompt_cache_len:5151 prompt_cache_ratio:0.45371267506385976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 -DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.10949921607971191 s -INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.11154699325561523 s -DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=136385300513875509685261741832367442213, time:1750767990.660243s req_ids:[8] -DEBUG 06-24 20:26:30 [manager.py:391] -ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:208.13417434692383ms total_cost_time:208.17828178405762ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11354 prompt_cache_len:5151 prompt_cache_ratio:0.45367271446186364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 -DEBUG 06-24 20:26:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:30 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:30 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:30 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:30 [manager.py:224] router recive req id 8 cost time 0.10901641845703125 s -INFO 06-24 20:26:30 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s -DEBUG 06-24 20:26:30 [manager.py:391] Prefill Batch: batch_id=172886899845265051946288289449321298856, time:1750767990.8819432s req_ids:[8] -DEBUG 06-24 20:26:30 [manager.py:391] -ERROR 06-24 20:26:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:216.4146900177002ms total_cost_time:216.45760536193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11355 prompt_cache_len:5151 prompt_cache_ratio:0.4536327608982827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 -DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.10955286026000977 s -INFO 06-24 20:26:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.11154699325561523 s -DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=335865772468804170213563667899280026517, time:1750767991.099628s req_ids:[8] -DEBUG 06-24 20:26:31 [manager.py:391] -ERROR 06-24 20:26:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:30 lightllm_req_id:8 first_token_cost:209.36894416809082ms total_cost_time:209.4130516052246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11356 prompt_cache_len:5151 prompt_cache_ratio:0.4535928143712575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 -DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.10839104652404785 s -INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.11033916473388672 s -DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=290859144232830616038600295663153287429, time:1750767991.3142734s req_ids:[8] -DEBUG 06-24 20:26:31 [manager.py:391] -ERROR 06-24 20:26:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 first_token_cost:207.25035667419434ms total_cost_time:207.29660987854004ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11357 prompt_cache_len:5151 prompt_cache_ratio:0.4535528748789293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 -DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.3114175796508789 s -INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.31343603134155273 s -DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=181844160483954208423074101142688983172, time:1750767991.736137s req_ids:[8] -DEBUG 06-24 20:26:31 [manager.py:391] -ERROR 06-24 20:26:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 first_token_cost:423.42233657836914ms total_cost_time:423.46668243408203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11358 prompt_cache_len:5151 prompt_cache_ratio:0.45351294241944007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 -DEBUG 06-24 20:26:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:31 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:31 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:31 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:31 [manager.py:224] router recive req id 8 cost time 0.10940384864807129 s -INFO 06-24 20:26:31 [manager.py:68] detokenization recv req id 8 cost time 0.11147737503051758 s -DEBUG 06-24 20:26:31 [manager.py:391] Prefill Batch: batch_id=326225473762673937009723440728985659466, time:1750767991.9602845s req_ids:[8] -DEBUG 06-24 20:26:31 [manager.py:391] -ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:31 lightllm_req_id:8 first_token_cost:211.96556091308594ms total_cost_time:212.00966835021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11359 prompt_cache_len:5151 prompt_cache_ratio:0.4534730169909323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 -DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.10866165161132812 s -INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11064934730529785 s -DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=279074985643360394393559222810769976533, time:1750767992.1766987s req_ids:[8] -DEBUG 06-24 20:26:32 [manager.py:391] -ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:208.36997032165527ms total_cost_time:208.41431617736816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11360 prompt_cache_len:5151 prompt_cache_ratio:0.4534330985915493 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 -DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.10968160629272461 s -INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11177682876586914 s -DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=261988646200074728369550623351821313202, time:1750767992.3928876s req_ids:[8] -DEBUG 06-24 20:26:32 [manager.py:391] -ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:205.4741382598877ms total_cost_time:205.5184841156006ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11361 prompt_cache_len:5151 prompt_cache_ratio:0.4533931872194349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 -DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.10889220237731934 s -INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11086153984069824 s -DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=321489768770417620956852882477783075083, time:1750767992.603351s req_ids:[8] -DEBUG 06-24 20:26:32 [manager.py:391] -ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:207.97252655029297ms total_cost_time:208.01663398742676ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11362 prompt_cache_len:5151 prompt_cache_ratio:0.4533532828727337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 -DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:32 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:32 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:32 [manager.py:224] router recive req id 8 cost time 0.1093134880065918 s -INFO 06-24 20:26:32 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s -DEBUG 06-24 20:26:32 [manager.py:391] Prefill Batch: batch_id=54906911637315129445403298455417082941, time:1750767992.8157206s req_ids:[8] -DEBUG 06-24 20:26:32 [manager.py:391] -ERROR 06-24 20:26:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:205.94453811645508ms total_cost_time:205.99031448364258ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11363 prompt_cache_len:5151 prompt_cache_ratio:0.4533133855495908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 -DEBUG 06-24 20:26:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:32 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10937333106994629 s -INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11137938499450684 s -DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=241134323400175779274229919301016180857, time:1750767993.0300534s req_ids:[8] -DEBUG 06-24 20:26:33 [manager.py:391] -ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:32 lightllm_req_id:8 first_token_cost:378.4646987915039ms total_cost_time:378.5083293914795ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11364 prompt_cache_len:5151 prompt_cache_ratio:0.4532734952481521 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 -DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10859012603759766 s -INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11057782173156738 s -DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=273199403309646748187973744250050924557, time:1750767993.4138806s req_ids:[8] -DEBUG 06-24 20:26:33 [manager.py:391] -ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:207.26943016052246ms total_cost_time:207.3063850402832ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:11365 prompt_cache_len:5151 prompt_cache_ratio:0.45323361196656403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 -DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10879206657409668 s -INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11094546318054199 s -DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=107272170421819284538234374938743824210, time:1750767993.6293507s req_ids:[8] -DEBUG 06-24 20:26:33 [manager.py:391] -ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:26:33 [stats.py:37] Avg tokens(prompt+generate) throughput: 46294.618 tokens/s -DEBUG 06-24 20:26:33 [stats.py:37] Avg prompt tokens throughput: 46286.360 tokens/s -DEBUG 06-24 20:26:33 [stats.py:37] Avg generate tokens throughput: 8.259 tokens/s -INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:208.44483375549316ms total_cost_time:208.48965644836426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11366 prompt_cache_len:5151 prompt_cache_ratio:0.4531937357029738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 -DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:33 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:33 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:33 [manager.py:224] router recive req id 8 cost time 0.10878562927246094 s -INFO 06-24 20:26:33 [manager.py:68] detokenization recv req id 8 cost time 0.11078953742980957 s -DEBUG 06-24 20:26:33 [manager.py:391] Prefill Batch: batch_id=148499893491984726916772807895403790720, time:1750767993.8429947s req_ids:[8] -DEBUG 06-24 20:26:33 [manager.py:391] -ERROR 06-24 20:26:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:207.37910270690918ms total_cost_time:207.42416381835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11367 prompt_cache_len:5151 prompt_cache_ratio:0.4531538664555292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 -DEBUG 06-24 20:26:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:33 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.1089622974395752 s -INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11093544960021973 s -DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=205426324059627278166729247241098073496, time:1750767994.0570447s req_ids:[8] -DEBUG 06-24 20:26:34 [manager.py:391] -ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:33 lightllm_req_id:8 first_token_cost:205.3356170654297ms total_cost_time:205.37877082824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11368 prompt_cache_len:5151 prompt_cache_ratio:0.45311400422237863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 -DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10974240303039551 s -INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11173748970031738 s -DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=248443153045428672825345312726372391444, time:1750767994.2689846s req_ids:[8] -DEBUG 06-24 20:26:34 [manager.py:391] -ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:208.14967155456543ms total_cost_time:208.19544792175293ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11369 prompt_cache_len:5151 prompt_cache_ratio:0.4530741490016712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 -DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10836076736450195 s -INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.1104133129119873 s -DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=291476315973823668338462929636608152168, time:1750767994.4854543s req_ids:[8] -DEBUG 06-24 20:26:34 [manager.py:391] -ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:210.74891090393066ms total_cost_time:210.79492568969727ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11370 prompt_cache_len:5151 prompt_cache_ratio:0.4530343007915567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 -DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:34 [batch.py:51] router release req id 8 -INFO 06-24 20:26:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10911965370178223 s -INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11114192008972168 s -DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=264584476397341822822803240795818981259, time:1750767994.7011971s req_ids:[8] -DEBUG 06-24 20:26:34 [manager.py:391] -DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:208.42337608337402ms total_cost_time:208.46939086914062ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11371 prompt_cache_len:5151 prompt_cache_ratio:0.4529944595901856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 -DEBUG 06-24 20:26:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:34 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:34 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:34 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:34 [manager.py:224] router recive req id 8 cost time 0.10861063003540039 s -INFO 06-24 20:26:34 [manager.py:68] detokenization recv req id 8 cost time 0.11081218719482422 s -DEBUG 06-24 20:26:34 [manager.py:391] Prefill Batch: batch_id=307213387413008843593500556736448595602, time:1750767994.9145882s req_ids:[8] -DEBUG 06-24 20:26:34 [manager.py:391] -ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:34 lightllm_req_id:8 first_token_cost:202.73280143737793ms total_cost_time:202.77714729309082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11372 prompt_cache_len:5151 prompt_cache_ratio:0.45295462539570874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 -DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.10908842086791992 s -INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.11111235618591309 s -DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=25521592814777134978935121426408989843, time:1750767995.1259975s req_ids:[8] -DEBUG 06-24 20:26:35 [manager.py:391] -ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:362.5319004058838ms total_cost_time:362.5774383544922ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11373 prompt_cache_len:5151 prompt_cache_ratio:0.452914798206278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 -DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.10841512680053711 s -INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.11037445068359375 s -DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=114171028174874015045170094747811605847, time:1750767995.4957058s req_ids:[8] -DEBUG 06-24 20:26:35 [manager.py:391] -ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:208.61053466796875ms total_cost_time:208.65654945373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11374 prompt_cache_len:5151 prompt_cache_ratio:0.4528749780200457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 -DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.10834693908691406 s -INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.11034703254699707 s -DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=186737104494599180258515166641893672656, time:1750767995.712553s req_ids:[8] -DEBUG 06-24 20:26:35 [manager.py:391] -ERROR 06-24 20:26:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:204.38504219055176ms total_cost_time:204.43034172058105ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11375 prompt_cache_len:5151 prompt_cache_ratio:0.45283516483516484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 -DEBUG 06-24 20:26:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:35 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:35 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:35 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:35 [manager.py:224] router recive req id 8 cost time 0.11012601852416992 s -INFO 06-24 20:26:35 [manager.py:68] detokenization recv req id 8 cost time 0.1120750904083252 s -DEBUG 06-24 20:26:35 [manager.py:391] Prefill Batch: batch_id=128634963514083866325217959331542497750, time:1750767995.92352s req_ids:[8] -DEBUG 06-24 20:26:35 [manager.py:391] -ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:35 lightllm_req_id:8 first_token_cost:206.9869041442871ms total_cost_time:207.0307731628418ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11376 prompt_cache_len:5151 prompt_cache_ratio:0.45279535864978904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 -DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10995841026306152 s -INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.11197876930236816 s -DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=87795253211386982363984227314366980904, time:1750767996.135674s req_ids:[8] -DEBUG 06-24 20:26:36 [manager.py:391] -ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:206.45713806152344ms total_cost_time:206.50172233581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11377 prompt_cache_len:5151 prompt_cache_ratio:0.4527555594620726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 -DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10920000076293945 s -INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.11117291450500488 s -DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=301826638614852868588612436839899234328, time:1750767996.3500583s req_ids:[8] -DEBUG 06-24 20:26:36 [manager.py:391] -ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:208.45484733581543ms total_cost_time:208.49943161010742ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11378 prompt_cache_len:5151 prompt_cache_ratio:0.4527157672701705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 -DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10819053649902344 s -INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.11025023460388184 s -DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=90844823201287249829461701521362730125, time:1750767996.5659115s req_ids:[8] -DEBUG 06-24 20:26:36 [manager.py:391] -ERROR 06-24 20:26:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:210.85143089294434ms total_cost_time:210.89529991149902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11379 prompt_cache_len:5151 prompt_cache_ratio:0.45267598207223836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 -DEBUG 06-24 20:26:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:36 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:36 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:36 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:36 [manager.py:224] router recive req id 8 cost time 0.10940051078796387 s -INFO 06-24 20:26:36 [manager.py:68] detokenization recv req id 8 cost time 0.111419677734375 s -DEBUG 06-24 20:26:36 [manager.py:391] Prefill Batch: batch_id=204707555654713909214322936798646306507, time:1750767996.7813227s req_ids:[8] -DEBUG 06-24 20:26:36 [manager.py:391] -ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:36 lightllm_req_id:8 first_token_cost:370.10788917541504ms total_cost_time:370.15295028686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11380 prompt_cache_len:5151 prompt_cache_ratio:0.45263620386643233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 -DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10904479026794434 s -INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.11105465888977051 s -DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=52519310238188551038995325150239412051, time:1750767997.1558664s req_ids:[8] -DEBUG 06-24 20:26:37 [manager.py:391] -ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:204.63871955871582ms total_cost_time:204.6811580657959ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11381 prompt_cache_len:5151 prompt_cache_ratio:0.45259643265090943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 -DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10858583450317383 s -INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.11052322387695312 s -DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=258501726409303998206374352036702716262, time:1750767997.3686936s req_ids:[8] -DEBUG 06-24 20:26:37 [manager.py:391] -ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:201.2655735015869ms total_cost_time:201.3101577758789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11382 prompt_cache_len:5151 prompt_cache_ratio:0.4525566684238271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 -DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10866022109985352 s -INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.1106576919555664 s -DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=20894106409733256911119845056825164036, time:1750767997.5757205s req_ids:[8] -DEBUG 06-24 20:26:37 [manager.py:391] -ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:202.99863815307617ms total_cost_time:203.04155349731445ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11383 prompt_cache_len:5151 prompt_cache_ratio:0.4525169111833436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 -DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.1092841625213623 s -INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.11115097999572754 s -DEBUG 06-24 20:26:37 [manager.py:391] Prefill Batch: batch_id=115214968010863144661984277475568275754, time:1750767997.7856598s req_ids:[8] -DEBUG 06-24 20:26:37 [manager.py:391] -ERROR 06-24 20:26:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:209.43140983581543ms total_cost_time:209.4738483428955ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11384 prompt_cache_len:5151 prompt_cache_ratio:0.4524771609276177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 -DEBUG 06-24 20:26:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:37 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:37 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:37 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:37 [manager.py:224] router recive req id 8 cost time 0.10804915428161621 s -INFO 06-24 20:26:37 [manager.py:68] detokenization recv req id 8 cost time 0.10999059677124023 s -DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=257445135311906933558898804516976076100, time:1750767998.001561s req_ids:[8] -DEBUG 06-24 20:26:38 [manager.py:391] -ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:37 lightllm_req_id:8 first_token_cost:207.02695846557617ms total_cost_time:207.06987380981445ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11385 prompt_cache_len:5151 prompt_cache_ratio:0.45243741765480894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 -DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:38 [manager.py:224] router recive req id 8 cost time 0.10863876342773438 s -INFO 06-24 20:26:38 [manager.py:68] detokenization recv req id 8 cost time 0.11054873466491699 s -DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=281490540142519963759154562484838144044, time:1750767998.2157903s req_ids:[8] -DEBUG 06-24 20:26:38 [manager.py:391] -ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:210.3729248046875ms total_cost_time:210.4172706604004ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11386 prompt_cache_len:5151 prompt_cache_ratio:0.45239768136307745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 -DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:38 [manager.py:224] router recive req id 8 cost time 0.1085047721862793 s -INFO 06-24 20:26:38 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s -DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=148884076342120898899233499658111567467, time:1750767998.432516s req_ids:[8] -DEBUG 06-24 20:26:38 [manager.py:391] -ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:374.1121292114258ms total_cost_time:374.157190322876ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11387 prompt_cache_len:5151 prompt_cache_ratio:0.452357952050584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 -DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:38 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:38 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:38 [manager.py:224] router recive req id 8 cost time 0.10919356346130371 s -INFO 06-24 20:26:38 [manager.py:68] detokenization recv req id 8 cost time 0.11113786697387695 s -DEBUG 06-24 20:26:38 [manager.py:391] Prefill Batch: batch_id=27374442269757230621831463518361342881, time:1750767998.8111942s req_ids:[8] -DEBUG 06-24 20:26:38 [manager.py:391] -ERROR 06-24 20:26:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:206.37965202331543ms total_cost_time:206.39920234680176ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11388 prompt_cache_len:5151 prompt_cache_ratio:0.45231822971549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 -DEBUG 06-24 20:26:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:38 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10820770263671875 s -INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.1101827621459961 s -DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=213192048238280243935100427740786931934, time:1750767999.024374s req_ids:[8] -DEBUG 06-24 20:26:39 [manager.py:391] -ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:38 lightllm_req_id:8 first_token_cost:209.8240852355957ms total_cost_time:209.86557006835938ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11389 prompt_cache_len:5151 prompt_cache_ratio:0.4522785143559575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 -DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10762572288513184 s -INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.10945487022399902 s -DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=206440229183086917267791875277986428336, time:1750767999.2401114s req_ids:[8] -DEBUG 06-24 20:26:39 [manager.py:391] -ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:165.33756256103516ms total_cost_time:165.37928581237793ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11390 prompt_cache_len:5151 prompt_cache_ratio:0.45223880597014926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 -DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10698175430297852 s -INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.1088414192199707 s -DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=140382066545869588582599414852405393603, time:1750767999.4119606s req_ids:[8] -DEBUG 06-24 20:26:39 [manager.py:391] -ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:194.8864459991455ms total_cost_time:194.94390487670898ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:11391 prompt_cache_len:5151 prompt_cache_ratio:0.4521991045562286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 -DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10831832885742188 s -INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.11076164245605469 s -DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=76133194832878272861788721531963478199, time:1750767999.6237864s req_ids:[8] -DEBUG 06-24 20:26:39 [manager.py:391] -ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:217.76151657104492ms total_cost_time:217.8058624267578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11392 prompt_cache_len:5151 prompt_cache_ratio:0.45215941011235955 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 -DEBUG 06-24 20:26:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:39 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:39 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:39 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:39 [manager.py:224] router recive req id 8 cost time 0.10841512680053711 s -INFO 06-24 20:26:39 [manager.py:68] detokenization recv req id 8 cost time 0.11044740676879883 s -DEBUG 06-24 20:26:39 [manager.py:391] Prefill Batch: batch_id=204949515390140145931873366284051228650, time:1750767999.8412514s req_ids:[8] -DEBUG 06-24 20:26:39 [manager.py:391] -ERROR 06-24 20:26:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:215.8217430114746ms total_cost_time:215.8651351928711ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11393 prompt_cache_len:5151 prompt_cache_ratio:0.45211972263670674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 -DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.30970191955566406 s -INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.3116495609283447 s -DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=150427684650236453665033685190671536992, time:1750768000.2657373s req_ids:[8] -DEBUG 06-24 20:26:40 [manager.py:391] -ERROR 06-24 20:26:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:39 lightllm_req_id:8 first_token_cost:418.7474250793457ms total_cost_time:418.7924861907959ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11394 prompt_cache_len:5151 prompt_cache_ratio:0.45208004212743547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 -DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.10851621627807617 s -INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.11084198951721191 s -DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=130781456027489091625715341011975799698, time:1750768000.4956565s req_ids:[8] -DEBUG 06-24 20:26:40 [manager.py:391] -ERROR 06-24 20:26:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 first_token_cost:217.58174896240234ms total_cost_time:217.62514114379883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11395 prompt_cache_len:5151 prompt_cache_ratio:0.45204036858271174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 -DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.10803055763244629 s -INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.10998296737670898 s -DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=299077365785168116130587742559388680871, time:1750768000.7119126s req_ids:[8] -DEBUG 06-24 20:26:40 [manager.py:391] -ERROR 06-24 20:26:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 first_token_cost:213.8376235961914ms total_cost_time:213.8814926147461ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11396 prompt_cache_len:5151 prompt_cache_ratio:0.452000702000702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 -DEBUG 06-24 20:26:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:40 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:40 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:40 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:40 [manager.py:224] router recive req id 8 cost time 0.10790705680847168 s -INFO 06-24 20:26:40 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s -DEBUG 06-24 20:26:40 [manager.py:391] Prefill Batch: batch_id=196218395509997873495202234878257590502, time:1750768000.9460695s req_ids:[8] -DEBUG 06-24 20:26:40 [manager.py:391] -ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:40 lightllm_req_id:8 first_token_cost:226.98497772216797ms total_cost_time:227.03051567077637ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11397 prompt_cache_len:5151 prompt_cache_ratio:0.4519610423795736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 -DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10677742958068848 s -INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.10873651504516602 s -DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=212507571335281422355347999807802694187, time:1750768001.1855285s req_ids:[8] -DEBUG 06-24 20:26:41 [manager.py:391] -ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:231.99772834777832ms total_cost_time:232.04302787780762ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11398 prompt_cache_len:5151 prompt_cache_ratio:0.4519213897174943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 -DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10826325416564941 s -INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.11086654663085938 s -DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=213600899489594284358647393024741369149, time:1750768001.4043288s req_ids:[8] -DEBUG 06-24 20:26:41 [manager.py:391] -ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:204.95367050170898ms total_cost_time:204.9996852874756ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11399 prompt_cache_len:5151 prompt_cache_ratio:0.4518817440126327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 -DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10845947265625 s -INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.11044502258300781 s -DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=159107525586297725010270990157540721132, time:1750768001.6252797s req_ids:[8] -DEBUG 06-24 20:26:41 [manager.py:391] -ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:210.52932739257812ms total_cost_time:210.5724811553955ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11400 prompt_cache_len:5151 prompt_cache_ratio:0.4518421052631579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 -DEBUG 06-24 20:26:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:41 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:41 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:41 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:41 [manager.py:224] router recive req id 8 cost time 0.10933613777160645 s -INFO 06-24 20:26:41 [manager.py:68] detokenization recv req id 8 cost time 0.11127448081970215 s -DEBUG 06-24 20:26:41 [manager.py:391] Prefill Batch: batch_id=227527269978586458485432297574598978644, time:1750768001.8337352s req_ids:[8] -DEBUG 06-24 20:26:41 [manager.py:391] -ERROR 06-24 20:26:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:205.4150104522705ms total_cost_time:205.4576873779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11401 prompt_cache_len:5151 prompt_cache_ratio:0.45180247346723973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 -DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.3108491897583008 s -INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.31271815299987793 s -DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=286860496175689513962416210175661979854, time:1750768002.2453833s req_ids:[8] -DEBUG 06-24 20:26:42 [manager.py:391] -DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:41 lightllm_req_id:8 first_token_cost:403.84507179260254ms total_cost_time:403.88989448547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11402 prompt_cache_len:5151 prompt_cache_ratio:0.4517628486230486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 -DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.10840129852294922 s -INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.1103048324584961 s -DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=176163417493011799583658813601979509312, time:1750768002.457731s req_ids:[8] -DEBUG 06-24 20:26:42 [manager.py:391] -ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:201.6594409942627ms total_cost_time:201.71165466308594ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:11403 prompt_cache_len:5151 prompt_cache_ratio:0.4517232307287556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 -DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.10890698432922363 s -INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s -DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=106128599891318826603399998058830146012, time:1750768002.6623335s req_ids:[8] -DEBUG 06-24 20:26:42 [manager.py:391] -ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:223.01411628723145ms total_cost_time:223.04844856262207ms,out_token_counter:1 mean_per_token_cost_time: 0.034332275390625ms prompt_token_num:11404 prompt_cache_len:5151 prompt_cache_ratio:0.45168361978253246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 -DEBUG 06-24 20:26:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:42 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:42 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:42 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:42 [manager.py:224] router recive req id 8 cost time 0.10883474349975586 s -INFO 06-24 20:26:42 [manager.py:68] detokenization recv req id 8 cost time 0.11072516441345215 s -DEBUG 06-24 20:26:42 [manager.py:391] Prefill Batch: batch_id=3181749269374408313580529893380950792, time:1750768002.8914652s req_ids:[8] -DEBUG 06-24 20:26:42 [manager.py:391] -ERROR 06-24 20:26:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:212.60643005371094ms total_cost_time:212.65697479248047ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:11405 prompt_cache_len:5151 prompt_cache_ratio:0.45164401578255153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 -DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.11085867881774902 s -INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11284160614013672 s -DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=126213053735992297747727765233620748022, time:1750768003.11114s req_ids:[8] -DEBUG 06-24 20:26:43 [manager.py:391] -ERROR 06-24 20:26:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:42 lightllm_req_id:8 first_token_cost:206.70223236083984ms total_cost_time:206.74538612365723ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11406 prompt_cache_len:5151 prompt_cache_ratio:0.4516044187269858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 -DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.10862207412719727 s -INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11069226264953613 s -DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=182526447339702735058013246334959616999, time:1750768003.3246064s req_ids:[8] -DEBUG 06-24 20:26:43 [manager.py:391] -ERROR 06-24 20:26:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 first_token_cost:217.20576286315918ms total_cost_time:217.26131439208984ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:11407 prompt_cache_len:5151 prompt_cache_ratio:0.45156482861400893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 -DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.10885095596313477 s -INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11086726188659668 s -DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=190338994372865627187810971480252783295, time:1750768003.5499256s req_ids:[8] -DEBUG 06-24 20:26:43 [manager.py:391] -ERROR 06-24 20:26:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 first_token_cost:203.7954330444336ms total_cost_time:203.82452011108398ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:11408 prompt_cache_len:5151 prompt_cache_ratio:0.45152524544179523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 -DEBUG 06-24 20:26:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:43 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:43 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:43 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:43 [manager.py:224] router recive req id 8 cost time 0.10924029350280762 s -INFO 06-24 20:26:43 [manager.py:68] detokenization recv req id 8 cost time 0.11170077323913574 s -DEBUG 06-24 20:26:43 [manager.py:391] Prefill Batch: batch_id=258024537748378165907671864074395131937, time:1750768003.7615721s req_ids:[8] -DEBUG 06-24 20:26:43 [manager.py:391] -DEBUG 06-24 20:26:43 [stats.py:37] Avg tokens(prompt+generate) throughput: 48774.682 tokens/s -DEBUG 06-24 20:26:43 [stats.py:37] Avg prompt tokens throughput: 48766.217 tokens/s -DEBUG 06-24 20:26:43 [stats.py:37] Avg generate tokens throughput: 8.465 tokens/s -ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:43 lightllm_req_id:8 first_token_cost:371.15025520324707ms total_cost_time:371.1967468261719ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11409 prompt_cache_len:5151 prompt_cache_ratio:0.45148566920851957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 -DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.10888147354125977 s -INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.11086130142211914 s -DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=311922921466040430043422932988566209656, time:1750768004.1368408s req_ids:[8] -DEBUG 06-24 20:26:44 [manager.py:391] -ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:209.5491886138916ms total_cost_time:209.5925807952881ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11410 prompt_cache_len:5151 prompt_cache_ratio:0.45144609991235757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 -DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.10971546173095703 s -INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.11107087135314941 s -DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=120879733504583710329324544207586062309, time:1750768004.3547466s req_ids:[8] -DEBUG 06-24 20:26:44 [manager.py:391] -ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:207.11350440979004ms total_cost_time:207.13567733764648ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:11411 prompt_cache_len:5151 prompt_cache_ratio:0.4514065375514854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 -DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.1065976619720459 s -INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.10849571228027344 s -DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=291734879934071544148210507075532637455, time:1750768004.566464s req_ids:[8] -DEBUG 06-24 20:26:44 [manager.py:391] -ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:209.9325656890869ms total_cost_time:209.9778652191162ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11412 prompt_cache_len:5151 prompt_cache_ratio:0.45136698212407994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 -DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.11020016670227051 s -INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.11215686798095703 s -DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=87323746205618040945949114337264768957, time:1750768004.7813244s req_ids:[8] -DEBUG 06-24 20:26:44 [manager.py:391] -ERROR 06-24 20:26:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:210.0822925567627ms total_cost_time:210.12544631958008ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11413 prompt_cache_len:5151 prompt_cache_ratio:0.45132743362831856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 -DEBUG 06-24 20:26:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:44 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:44 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:44 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:44 [manager.py:224] router recive req id 8 cost time 0.10901474952697754 s -INFO 06-24 20:26:44 [manager.py:68] detokenization recv req id 8 cost time 0.1110537052154541 s -DEBUG 06-24 20:26:44 [manager.py:391] Prefill Batch: batch_id=230258467232759542547302720330070511707, time:1750768004.9965813s req_ids:[8] -DEBUG 06-24 20:26:44 [manager.py:391] -ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:44 lightllm_req_id:8 first_token_cost:206.3300609588623ms total_cost_time:206.3760757446289ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11414 prompt_cache_len:5151 prompt_cache_ratio:0.4512878920623795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 -DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:45 [manager.py:224] router recive req id 8 cost time 0.10862517356872559 s -INFO 06-24 20:26:45 [manager.py:68] detokenization recv req id 8 cost time 0.11072111129760742 s -DEBUG 06-24 20:26:45 [manager.py:391] Prefill Batch: batch_id=219321759911789689682295614936008568054, time:1750768005.209134s req_ids:[8] -DEBUG 06-24 20:26:45 [manager.py:391] -ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:207.82208442687988ms total_cost_time:207.86690711975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11415 prompt_cache_len:5151 prompt_cache_ratio:0.4512483574244415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 -DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:45 [manager.py:224] router recive req id 8 cost time 0.31070661544799805 s -INFO 06-24 20:26:45 [manager.py:68] detokenization recv req id 8 cost time 0.31270909309387207 s -DEBUG 06-24 20:26:45 [manager.py:391] Prefill Batch: batch_id=36450839091420756264867220889890774424, time:1750768005.6396902s req_ids:[8] -DEBUG 06-24 20:26:45 [manager.py:391] -ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:430.55152893066406ms total_cost_time:430.59587478637695ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11416 prompt_cache_len:5151 prompt_cache_ratio:0.45120882971268395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 -DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:45 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:45 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:45 [manager.py:224] router recive req id 8 cost time 0.1082448959350586 s -INFO 06-24 20:26:45 [manager.py:68] detokenization recv req id 8 cost time 0.11033034324645996 s -DEBUG 06-24 20:26:45 [manager.py:391] Prefill Batch: batch_id=303341939372676893112978830545252178676, time:1750768005.8645077s req_ids:[8] -DEBUG 06-24 20:26:45 [manager.py:391] -ERROR 06-24 20:26:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:213.22131156921387ms total_cost_time:213.26613426208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11417 prompt_cache_len:5151 prompt_cache_ratio:0.45116930892528684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 -DEBUG 06-24 20:26:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:45 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10911011695861816 s -INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.11105871200561523 s -DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=248047106192503922537643561133400774504, time:1750768006.0796413s req_ids:[8] -DEBUG 06-24 20:26:46 [manager.py:391] -ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:45 lightllm_req_id:8 first_token_cost:205.73163032531738ms total_cost_time:205.80124855041504ms,out_token_counter:1 mean_per_token_cost_time: 0.06961822509765625ms prompt_token_num:11418 prompt_cache_len:5151 prompt_cache_ratio:0.4511297950604309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 -DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10999226570129395 s -INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.11195850372314453 s -DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=303525403802918620619032291181053833173, time:1750768006.2931178s req_ids:[8] -DEBUG 06-24 20:26:46 [manager.py:391] -ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:205.7502269744873ms total_cost_time:205.7943344116211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11419 prompt_cache_len:5151 prompt_cache_ratio:0.4510902881162974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 -DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.1082453727722168 s -INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.1102604866027832 s -DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=100219738425038210171617287914864328515, time:1750768006.5054529s req_ids:[8] -DEBUG 06-24 20:26:46 [manager.py:391] -ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:209.14721488952637ms total_cost_time:209.19084548950195ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11420 prompt_cache_len:5151 prompt_cache_ratio:0.4510507880910683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 -DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:46 [batch.py:51] router release req id 8 -DEBUG 06-24 20:26:46 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:46 [manager.py:283] -DEBUG 06-24 20:26:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:46 [manager.py:284] -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10875248908996582 s -INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.1108248233795166 s -DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=279852271004735822952112466336953152437, time:1750768006.7226346s req_ids:[8] -DEBUG 06-24 20:26:46 [manager.py:391] -ERROR 06-24 20:26:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:236.45448684692383ms total_cost_time:236.4962100982666ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11421 prompt_cache_len:5151 prompt_cache_ratio:0.4510112949829262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 -DEBUG 06-24 20:26:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:46 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:46 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:46 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:46 [manager.py:224] router recive req id 8 cost time 0.10879373550415039 s -INFO 06-24 20:26:46 [manager.py:68] detokenization recv req id 8 cost time 0.11072301864624023 s -DEBUG 06-24 20:26:46 [manager.py:391] Prefill Batch: batch_id=305205635422123650950816419876549788098, time:1750768006.9650004s req_ids:[8] -DEBUG 06-24 20:26:46 [manager.py:391] -INFO 06-24 20:26:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:46 lightllm_req_id:8 first_token_cost:377.07042694091797ms total_cost_time:377.11358070373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11422 prompt_cache_len:5151 prompt_cache_ratio:0.4509718087900543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 -DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:47 [manager.py:224] router recive req id 8 cost time 0.10909199714660645 s -INFO 06-24 20:26:47 [manager.py:68] detokenization recv req id 8 cost time 0.1111299991607666 s -DEBUG 06-24 20:26:47 [manager.py:391] Prefill Batch: batch_id=333926148951189250711962467588047871261, time:1750768007.3492296s req_ids:[8] -DEBUG 06-24 20:26:47 [manager.py:391] -ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:211.61270141601562ms total_cost_time:211.6560935974121ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11423 prompt_cache_len:5151 prompt_cache_ratio:0.45093232951063644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 -DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:47 [manager.py:224] router recive req id 8 cost time 0.10814476013183594 s -INFO 06-24 20:26:47 [manager.py:68] detokenization recv req id 8 cost time 0.11011648178100586 s -DEBUG 06-24 20:26:47 [manager.py:391] Prefill Batch: batch_id=55658512300784675739254653756980415550, time:1750768007.5651648s req_ids:[8] -DEBUG 06-24 20:26:47 [manager.py:391] -ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:208.88996124267578ms total_cost_time:208.93526077270508ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11424 prompt_cache_len:5151 prompt_cache_ratio:0.45089285714285715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 -DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:47 [manager.py:224] router recive req id 8 cost time 0.10985422134399414 s -INFO 06-24 20:26:47 [manager.py:68] detokenization recv req id 8 cost time 0.11188435554504395 s -DEBUG 06-24 20:26:47 [manager.py:391] Prefill Batch: batch_id=237362733282102233471689686941624240488, time:1750768007.7897766s req_ids:[8] -DEBUG 06-24 20:26:47 [manager.py:391] -ERROR 06-24 20:26:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:221.01187705993652ms total_cost_time:221.05669975280762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11425 prompt_cache_len:5151 prompt_cache_ratio:0.4508533916849015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 -DEBUG 06-24 20:26:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:47 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:47 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:47 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10961675643920898 s -INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11180782318115234 s -DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=114049171174467687963947708250224734256, time:1750768008.0093539s req_ids:[8] -DEBUG 06-24 20:26:48 [manager.py:391] -ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:47 lightllm_req_id:8 first_token_cost:218.73116493225098ms total_cost_time:218.77336502075195ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11426 prompt_cache_len:5151 prompt_cache_ratio:0.4508139331349554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 -DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10853314399719238 s -INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11058449745178223 s -DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=196520198194620165377355846199711763385, time:1750768008.2341673s req_ids:[8] -DEBUG 06-24 20:26:48 [manager.py:391] -ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:211.1365795135498ms total_cost_time:211.1814022064209ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11427 prompt_cache_len:5151 prompt_cache_ratio:0.450774481491205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 -DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10811281204223633 s -INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11017990112304688 s -DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=32757324444749637181646689254531061584, time:1750768008.4508927s req_ids:[8] -DEBUG 06-24 20:26:48 [manager.py:391] -ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:208.45961570739746ms total_cost_time:208.50372314453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11428 prompt_cache_len:5151 prompt_cache_ratio:0.45073503675183757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 -DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:48 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10965847969055176 s -INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11160421371459961 s -DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=305702368448471810369552601750959548542, time:1750768008.6656163s req_ids:[8] -DEBUG 06-24 20:26:48 [manager.py:391] -ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:202.43573188781738ms total_cost_time:202.47817039489746ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11429 prompt_cache_len:5151 prompt_cache_ratio:0.4506955989150407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 -DEBUG 06-24 20:26:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:48 [batch.py:51] router release req id 8 -INFO 06-24 20:26:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:48 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:48 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:48 [manager.py:224] router recive req id 8 cost time 0.10840606689453125 s -INFO 06-24 20:26:48 [manager.py:68] detokenization recv req id 8 cost time 0.11050200462341309 s -DEBUG 06-24 20:26:48 [manager.py:391] Prefill Batch: batch_id=4521175117241422025448549874625258358, time:1750768008.8800495s req_ids:[8] -DEBUG 06-24 20:26:48 [manager.py:391] -ERROR 06-24 20:26:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:214.5235538482666ms total_cost_time:214.5674228668213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11430 prompt_cache_len:5151 prompt_cache_ratio:0.4506561679790026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 -DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.10989189147949219 s -INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.1119835376739502 s -DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=63514199755677022723127054143908885287, time:1750768009.098402s req_ids:[8] -DEBUG 06-24 20:26:49 [manager.py:391] -ERROR 06-24 20:26:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:48 lightllm_req_id:8 first_token_cost:380.2375793457031ms total_cost_time:380.2812099456787ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11431 prompt_cache_len:5151 prompt_cache_ratio:0.45061674394191237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 -DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.10895848274230957 s -INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.11121821403503418 s -DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=41853940536315301038744106336320683341, time:1750768009.4829752s req_ids:[8] -DEBUG 06-24 20:26:49 [manager.py:391] -ERROR 06-24 20:26:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 first_token_cost:213.1514549255371ms total_cost_time:213.1936550140381ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11432 prompt_cache_len:5151 prompt_cache_ratio:0.4505773268019594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 -DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.1089181900024414 s -INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.1110846996307373 s -DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=27459495678707253398485182674393685392, time:1750768009.702209s req_ids:[8] -DEBUG 06-24 20:26:49 [manager.py:391] -DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 first_token_cost:209.45000648498535ms total_cost_time:209.49363708496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11433 prompt_cache_len:5151 prompt_cache_ratio:0.45053791655733405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 -DEBUG 06-24 20:26:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:49 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:49 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:49 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:49 [manager.py:224] router recive req id 8 cost time 0.10866260528564453 s -INFO 06-24 20:26:49 [manager.py:68] detokenization recv req id 8 cost time 0.11062884330749512 s -DEBUG 06-24 20:26:49 [manager.py:391] Prefill Batch: batch_id=81542483225300755427449891853681682264, time:1750768009.9243603s req_ids:[8] -DEBUG 06-24 20:26:49 [manager.py:391] -ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:49 lightllm_req_id:8 first_token_cost:212.14914321899414ms total_cost_time:212.1884822845459ms,out_token_counter:1 mean_per_token_cost_time: 0.03933906555175781ms prompt_token_num:11434 prompt_cache_len:5151 prompt_cache_ratio:0.450498513206227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 -DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.1092071533203125 s -INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.1112823486328125 s -DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=197084051234444713678637824712257515374, time:1750768010.1371236s req_ids:[8] -DEBUG 06-24 20:26:50 [manager.py:391] -ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:207.1986198425293ms total_cost_time:207.24177360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11435 prompt_cache_len:5151 prompt_cache_ratio:0.4504591167468299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 -DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.10924458503723145 s -INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.11135411262512207 s -DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=201670840651413510233811322827439234604, time:1750768010.3512115s req_ids:[8] -DEBUG 06-24 20:26:50 [manager.py:391] -ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:205.69252967834473ms total_cost_time:205.7352066040039ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11436 prompt_cache_len:5151 prompt_cache_ratio:0.45041972717733475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 -DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.10809135437011719 s -INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.11013293266296387 s -DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=70450764736635334753073304445525320932, time:1750768010.5642529s req_ids:[8] -DEBUG 06-24 20:26:50 [manager.py:391] -ERROR 06-24 20:26:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:209.52939987182617ms total_cost_time:209.57279205322266ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11437 prompt_cache_len:5151 prompt_cache_ratio:0.4503803444959342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 -DEBUG 06-24 20:26:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:50 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:50 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:50 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:50 [manager.py:224] router recive req id 8 cost time 0.10941457748413086 s -INFO 06-24 20:26:50 [manager.py:68] detokenization recv req id 8 cost time 0.11160469055175781 s -DEBUG 06-24 20:26:50 [manager.py:391] Prefill Batch: batch_id=61631857586846066140267766410269495383, time:1750768010.7781177s req_ids:[8] -DEBUG 06-24 20:26:50 [manager.py:391] -ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:50 lightllm_req_id:8 first_token_cost:372.424840927124ms total_cost_time:372.4701404571533ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11438 prompt_cache_len:5151 prompt_cache_ratio:0.4503409687008218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 -DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10923218727111816 s -INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.1111307144165039 s -DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=139610691711192668916302126633545156275, time:1750768011.1583805s req_ids:[8] -DEBUG 06-24 20:26:51 [manager.py:391] -ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:207.1998119354248ms total_cost_time:207.2439193725586ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11439 prompt_cache_len:5151 prompt_cache_ratio:0.45030159979019146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 -DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.1074526309967041 s -INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.10922622680664062 s -DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=331591899575203965379308775778010219342, time:1750768011.3728445s req_ids:[8] -DEBUG 06-24 20:26:51 [manager.py:391] -ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:167.33479499816895ms total_cost_time:167.37794876098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11440 prompt_cache_len:5151 prompt_cache_ratio:0.4502622377622378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 -DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10809683799743652 s -INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.10990262031555176 s -DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=238774578876722170465811159921886031137, time:1750768011.545501s req_ids:[8] -DEBUG 06-24 20:26:51 [manager.py:391] -ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:165.5418872833252ms total_cost_time:165.58313369750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:11441 prompt_cache_len:5151 prompt_cache_ratio:0.45022288261515603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 -DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10931634902954102 s -INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.11132502555847168 s -DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=96912978304753535353150470575578194729, time:1750768011.7168686s req_ids:[8] -DEBUG 06-24 20:26:51 [manager.py:391] -ERROR 06-24 20:26:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:202.44097709655762ms total_cost_time:202.4829387664795ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11442 prompt_cache_len:5151 prompt_cache_ratio:0.4501835343471421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 -DEBUG 06-24 20:26:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:51 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:51 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:51 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:51 [manager.py:224] router recive req id 8 cost time 0.10929250717163086 s -INFO 06-24 20:26:51 [manager.py:68] detokenization recv req id 8 cost time 0.1112518310546875 s -DEBUG 06-24 20:26:51 [manager.py:391] Prefill Batch: batch_id=97691593939246167262052618685552095628, time:1750768011.9264734s req_ids:[8] -DEBUG 06-24 20:26:51 [manager.py:391] -ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:51 lightllm_req_id:8 first_token_cost:208.52065086364746ms total_cost_time:208.56285095214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11443 prompt_cache_len:5151 prompt_cache_ratio:0.4501441929563926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 -DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10970067977905273 s -INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11170268058776855 s -DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=95794465918697418231805359040113649018, time:1750768012.1407182s req_ids:[8] -DEBUG 06-24 20:26:52 [manager.py:391] -ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:207.82113075256348ms total_cost_time:207.86356925964355ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11444 prompt_cache_len:5151 prompt_cache_ratio:0.4501048584411045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 -DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:52 [batch.py:51] router release req id 8 -INFO 06-24 20:26:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10911035537719727 s -INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11138081550598145 s -DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=86217182640072320305634902100594652241, time:1750768012.3571422s req_ids:[8] -DEBUG 06-24 20:26:52 [manager.py:391] -ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:409.83009338378906ms total_cost_time:409.87396240234375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11445 prompt_cache_len:5151 prompt_cache_ratio:0.45006553079947575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 -DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10810637474060059 s -INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11014413833618164 s -DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=20573420085522550803710680408427342150, time:1750768012.772401s req_ids:[8] -DEBUG 06-24 20:26:52 [manager.py:391] -ERROR 06-24 20:26:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:207.15999603271484ms total_cost_time:207.20362663269043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11446 prompt_cache_len:5151 prompt_cache_ratio:0.4500262100297047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 -DEBUG 06-24 20:26:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:52 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:52 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:52 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:52 [manager.py:224] router recive req id 8 cost time 0.10821127891540527 s -INFO 06-24 20:26:52 [manager.py:68] detokenization recv req id 8 cost time 0.11014103889465332 s -DEBUG 06-24 20:26:52 [manager.py:391] Prefill Batch: batch_id=290568827336460463864548603896084177145, time:1750768012.9853816s req_ids:[8] -DEBUG 06-24 20:26:52 [manager.py:391] -ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:52 lightllm_req_id:8 first_token_cost:207.78846740722656ms total_cost_time:207.83162117004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11447 prompt_cache_len:5151 prompt_cache_ratio:0.4499868961299904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 -DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.10890388488769531 s -INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.11088705062866211 s -DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=249091325483057760909907931116052622912, time:1750768013.2011693s req_ids:[8] -DEBUG 06-24 20:26:53 [manager.py:391] -INFO 06-24 20:26:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:26:53 [statics_utils.py:24] mean first cost: 228.69557909546808 ms -INFO 06-24 20:26:53 [statics_utils.py:24] mean per token cost: 0.06421429555557658 ms -ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:203.08899879455566ms total_cost_time:203.13262939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11448 prompt_cache_len:5151 prompt_cache_ratio:0.4499475890985325 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 -INFO 06-24 20:26:53 [manager.py:620] left req id 8can release True refcount 3 -DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.1081545352935791 s -INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.1100926399230957 s -DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=14602717524417206026431386274730183660, time:1750768013.4103122s req_ids:[8] -DEBUG 06-24 20:26:53 [manager.py:391] -ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:208.7841033935547ms total_cost_time:208.82630348205566ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11449 prompt_cache_len:5151 prompt_cache_ratio:0.4499082889335313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 -DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.10936760902404785 s -INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.11135458946228027 s -DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=301209802871501452267321032428093171548, time:1750768013.6254554s req_ids:[8] -DEBUG 06-24 20:26:53 [manager.py:391] -ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:208.9521884918213ms total_cost_time:208.99724960327148ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11450 prompt_cache_len:5151 prompt_cache_ratio:0.4498689956331878 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 -DEBUG 06-24 20:26:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:53 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:53 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:53 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:53 [manager.py:224] router recive req id 8 cost time 0.10816168785095215 s -INFO 06-24 20:26:53 [manager.py:68] detokenization recv req id 8 cost time 0.11019015312194824 s -DEBUG 06-24 20:26:53 [manager.py:391] Prefill Batch: batch_id=127511485200745841045030443192024394061, time:1750768013.841671s req_ids:[8] -DEBUG 06-24 20:26:53 [manager.py:391] -DEBUG 06-24 20:26:53 [stats.py:37] Avg tokens(prompt+generate) throughput: 47634.439 tokens/s -DEBUG 06-24 20:26:53 [stats.py:37] Avg prompt tokens throughput: 47626.106 tokens/s -DEBUG 06-24 20:26:53 [stats.py:37] Avg generate tokens throughput: 8.333 tokens/s -ERROR 06-24 20:26:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:209.91015434265137ms total_cost_time:209.95402336120605ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11451 prompt_cache_len:5151 prompt_cache_ratio:0.4498297091957034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 -DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.31118130683898926 s -INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.3131897449493408 s -DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=54933909058169892662337345102072916851, time:1750768014.2642703s req_ids:[8] -DEBUG 06-24 20:26:54 [manager.py:391] -ERROR 06-24 20:26:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:53 lightllm_req_id:8 first_token_cost:422.76859283447266ms total_cost_time:422.81460762023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11452 prompt_cache_len:5151 prompt_cache_ratio:0.44979042961928045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 -DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.10894489288330078 s -INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.11110806465148926 s -DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=104407485998132974389324877879190732666, time:1750768014.486629s req_ids:[8] -DEBUG 06-24 20:26:54 [manager.py:391] -ERROR 06-24 20:26:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 first_token_cost:211.39216423034668ms total_cost_time:211.43698692321777ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11453 prompt_cache_len:5151 prompt_cache_ratio:0.4497511569021217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 -DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.10896873474121094 s -INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.11106705665588379 s -DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=183700484889021835634171427931783141796, time:1750768014.7209835s req_ids:[8] -DEBUG 06-24 20:26:54 [manager.py:391] -ERROR 06-24 20:26:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 first_token_cost:226.31144523620605ms total_cost_time:226.35769844055176ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11454 prompt_cache_len:5151 prompt_cache_ratio:0.4497118910424306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 -DEBUG 06-24 20:26:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:54 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:54 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:54 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:54 [manager.py:224] router recive req id 8 cost time 0.10884284973144531 s -INFO 06-24 20:26:54 [manager.py:68] detokenization recv req id 8 cost time 0.11078310012817383 s -DEBUG 06-24 20:26:54 [manager.py:391] Prefill Batch: batch_id=332955413409934623315870900375326021640, time:1750768014.9356692s req_ids:[8] -DEBUG 06-24 20:26:54 [manager.py:391] -ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:54 lightllm_req_id:8 first_token_cost:208.37163925170898ms total_cost_time:208.41550827026367ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11455 prompt_cache_len:5151 prompt_cache_ratio:0.4496726320384112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 -DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10902118682861328 s -INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11123847961425781 s -DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=76991108028334758460887047233862964570, time:1750768015.151376s req_ids:[8] -DEBUG 06-24 20:26:55 [manager.py:391] -ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:207.83567428588867ms total_cost_time:207.87930488586426ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11456 prompt_cache_len:5151 prompt_cache_ratio:0.4496333798882682 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 -DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10898041725158691 s -INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11104655265808105 s -DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=164460399628108252834782306793214210204, time:1750768015.3664188s req_ids:[8] -DEBUG 06-24 20:26:55 [manager.py:391] -ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:211.26508712768555ms total_cost_time:211.29488945007324ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:11457 prompt_cache_len:5151 prompt_cache_ratio:0.44959413459020686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 -DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10901260375976562 s -INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11107301712036133 s -DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=122346015397080565204665814221656297970, time:1750768015.583846s req_ids:[8] -DEBUG 06-24 20:26:55 [manager.py:391] -ERROR 06-24 20:26:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:26:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:380.13410568237305ms total_cost_time:380.18012046813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11458 prompt_cache_len:5151 prompt_cache_ratio:0.4495548961424332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 -DEBUG 06-24 20:26:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:55 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:55 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:55 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:55 [manager.py:224] router recive req id 8 cost time 0.10832929611206055 s -INFO 06-24 20:26:55 [manager.py:68] detokenization recv req id 8 cost time 0.11036896705627441 s -DEBUG 06-24 20:26:55 [manager.py:391] Prefill Batch: batch_id=162949972449399555629281289122563008946, time:1750768015.9686754s req_ids:[8] -DEBUG 06-24 20:26:55 [manager.py:391] -ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:55 lightllm_req_id:8 first_token_cost:202.88944244384766ms total_cost_time:202.93235778808594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11459 prompt_cache_len:5151 prompt_cache_ratio:0.44951566454315384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 -DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10936355590820312 s -INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11149811744689941 s -DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=123319388752502892192770330812774526229, time:1750768016.1792185s req_ids:[8] -DEBUG 06-24 20:26:56 [manager.py:391] -ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:208.86826515197754ms total_cost_time:208.91356468200684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11460 prompt_cache_len:5151 prompt_cache_ratio:0.44947643979057594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 -DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10965323448181152 s -INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11177706718444824 s -DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=296957135344586340759362021529344435991, time:1750768016.393971s req_ids:[8] -DEBUG 06-24 20:26:56 [manager.py:391] -ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:210.36434173583984ms total_cost_time:210.40773391723633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11461 prompt_cache_len:5151 prompt_cache_ratio:0.44943722188290725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 -DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10876250267028809 s -INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11084485054016113 s -DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=62971099288697461982971183526039039182, time:1750768016.6118228s req_ids:[8] -DEBUG 06-24 20:26:56 [manager.py:391] -ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:211.03930473327637ms total_cost_time:211.08555793762207ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11462 prompt_cache_len:5151 prompt_cache_ratio:0.4493980108183563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 -DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:56 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:56 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:56 [manager.py:224] router recive req id 8 cost time 0.10858440399169922 s -INFO 06-24 20:26:56 [manager.py:68] detokenization recv req id 8 cost time 0.11069917678833008 s -DEBUG 06-24 20:26:56 [manager.py:391] Prefill Batch: batch_id=271953514829246812225434739937763886317, time:1750768016.8297834s req_ids:[8] -DEBUG 06-24 20:26:56 [manager.py:391] -ERROR 06-24 20:26:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:208.19687843322754ms total_cost_time:208.24027061462402ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11463 prompt_cache_len:5151 prompt_cache_ratio:0.44935880659513217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 -DEBUG 06-24 20:26:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:56 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10863256454467773 s -INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11063647270202637 s -DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=317904880198610201939453221010727710400, time:1750768017.0421329s req_ids:[8] -DEBUG 06-24 20:26:57 [manager.py:391] -DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:26:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:56 lightllm_req_id:8 first_token_cost:210.56199073791504ms total_cost_time:210.60657501220703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11464 prompt_cache_len:5151 prompt_cache_ratio:0.44931960921144454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 -DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10934090614318848 s -INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11148715019226074 s -DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=241919702743359286942008974342283114563, time:1750768017.2598357s req_ids:[8] -DEBUG 06-24 20:26:57 [manager.py:391] -ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:210.9525203704834ms total_cost_time:210.99591255187988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11465 prompt_cache_len:5151 prompt_cache_ratio:0.4492804186655037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 -DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10924744606018066 s -INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11145973205566406 s -DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=139762340319683233120175363109060948924, time:1750768017.47796s req_ids:[8] -DEBUG 06-24 20:26:57 [manager.py:391] -ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:378.31950187683105ms total_cost_time:378.36217880249023ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11466 prompt_cache_len:5151 prompt_cache_ratio:0.44924123495552065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 -DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:57 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:57 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:57 [manager.py:224] router recive req id 8 cost time 0.10906815528869629 s -INFO 06-24 20:26:57 [manager.py:68] detokenization recv req id 8 cost time 0.11096906661987305 s -DEBUG 06-24 20:26:57 [manager.py:391] Prefill Batch: batch_id=23560303376656970149894723636876651575, time:1750768017.8612015s req_ids:[8] -DEBUG 06-24 20:26:57 [manager.py:391] -ERROR 06-24 20:26:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:204.6647071838379ms total_cost_time:204.70905303955078ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11467 prompt_cache_len:5151 prompt_cache_ratio:0.449202058079707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 -DEBUG 06-24 20:26:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:57 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10940384864807129 s -INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.11132693290710449 s -DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=294987311005685315031048737806368933803, time:1750768018.0714636s req_ids:[8] -DEBUG 06-24 20:26:58 [manager.py:391] -ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:57 lightllm_req_id:8 first_token_cost:206.1934471130371ms total_cost_time:206.23779296875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11468 prompt_cache_len:5151 prompt_cache_ratio:0.44916288803627485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 -DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.11154961585998535 s -INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.11357450485229492 s -DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=95892266543681043296887734795121190359, time:1750768018.2837808s req_ids:[8] -DEBUG 06-24 20:26:58 [manager.py:391] -ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:209.57517623901367ms total_cost_time:209.61928367614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11469 prompt_cache_len:5151 prompt_cache_ratio:0.4491237248234371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 -DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10914754867553711 s -INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.1111001968383789 s -DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=53864448307674733828095615930610936574, time:1750768018.5002954s req_ids:[8] -DEBUG 06-24 20:26:58 [manager.py:391] -ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:212.39995956420898ms total_cost_time:212.44549751281738ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11470 prompt_cache_len:5151 prompt_cache_ratio:0.44908456843940714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 -DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10998201370239258 s -INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.11192917823791504 s -DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=78028409208472113890678082194289196987, time:1750768018.7185657s req_ids:[8] -DEBUG 06-24 20:26:58 [manager.py:391] -ERROR 06-24 20:26:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:211.81964874267578ms total_cost_time:211.8661403656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11471 prompt_cache_len:5151 prompt_cache_ratio:0.44904541888239907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 -DEBUG 06-24 20:26:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:58 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:58 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:58 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:58 [manager.py:224] router recive req id 8 cost time 0.10844826698303223 s -INFO 06-24 20:26:58 [manager.py:68] detokenization recv req id 8 cost time 0.1104576587677002 s -DEBUG 06-24 20:26:58 [manager.py:391] Prefill Batch: batch_id=172054009128581549265216702023217005363, time:1750768018.9396365s req_ids:[8] -DEBUG 06-24 20:26:58 [manager.py:391] -ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:58 lightllm_req_id:8 first_token_cost:213.64808082580566ms total_cost_time:213.69338035583496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11472 prompt_cache_len:5151 prompt_cache_ratio:0.4490062761506276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 -DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10804891586303711 s -INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11006903648376465 s -DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=91415839762164015225259061108122062990, time:1750768019.156237s req_ids:[8] -DEBUG 06-24 20:26:59 [manager.py:391] -ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:209.88011360168457ms total_cost_time:209.92517471313477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11473 prompt_cache_len:5151 prompt_cache_ratio:0.44896714024230805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 -DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:59 [batch.py:51] router release req id 8 -INFO 06-24 20:26:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10871553421020508 s -INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11079597473144531 s -DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=129901199283494959515081024600784296108, time:1750768019.3737192s req_ids:[8] -DEBUG 06-24 20:26:59 [manager.py:391] -ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:384.23776626586914ms total_cost_time:384.28425788879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11474 prompt_cache_len:5151 prompt_cache_ratio:0.4489280111556563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 -DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10843825340270996 s -INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11062097549438477 s -DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=169223438173379246264727006865381613353, time:1750768019.763277s req_ids:[8] -DEBUG 06-24 20:26:59 [manager.py:391] -ERROR 06-24 20:26:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:26:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:26:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:211.03906631469727ms total_cost_time:211.08198165893555ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11475 prompt_cache_len:5151 prompt_cache_ratio:0.4488888888888889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:26:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 -DEBUG 06-24 20:26:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:26:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:26:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:26:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:26:59 [batch.py:51] router release req id 8 -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:26:59 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:26:59 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:26:59 [manager.py:224] router recive req id 8 cost time 0.10915875434875488 s -INFO 06-24 20:26:59 [manager.py:68] detokenization recv req id 8 cost time 0.11112141609191895 s -DEBUG 06-24 20:26:59 [manager.py:391] Prefill Batch: batch_id=251407935840355163020497406823261075091, time:1750768019.986652s req_ids:[8] -DEBUG 06-24 20:26:59 [manager.py:391] -ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:26:59 lightllm_req_id:8 first_token_cost:209.6419334411621ms total_cost_time:209.6867561340332ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11476 prompt_cache_len:5151 prompt_cache_ratio:0.4488497734402231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 -DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10870194435119629 s -INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11080551147460938 s -DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=194157459069776705000224371996220439046, time:1750768020.1963158s req_ids:[8] -DEBUG 06-24 20:27:00 [manager.py:391] -ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:208.49871635437012ms total_cost_time:208.5421085357666ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11477 prompt_cache_len:5151 prompt_cache_ratio:0.4488106648078766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 -DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10886740684509277 s -INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11082768440246582 s -DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=68079542440286809875940611800686408043, time:1750768020.413054s req_ids:[8] -DEBUG 06-24 20:27:00 [manager.py:391] -ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:209.7156047821045ms total_cost_time:209.75828170776367ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11478 prompt_cache_len:5151 prompt_cache_ratio:0.44877156299006793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 -DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10971808433532715 s -INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11183667182922363 s -DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=177578027295320291958028599555213766346, time:1750768020.6295843s req_ids:[8] -DEBUG 06-24 20:27:00 [manager.py:391] -ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:210.90149879455566ms total_cost_time:211.00926399230957ms,out_token_counter:1 mean_per_token_cost_time: 0.10776519775390625ms prompt_token_num:11479 prompt_cache_len:5151 prompt_cache_ratio:0.4487324679850161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 -DEBUG 06-24 20:27:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:00 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:00 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:00 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:00 [manager.py:224] router recive req id 8 cost time 0.10942411422729492 s -INFO 06-24 20:27:00 [manager.py:68] detokenization recv req id 8 cost time 0.11150288581848145 s -DEBUG 06-24 20:27:00 [manager.py:391] Prefill Batch: batch_id=216318949101922255458039149893658851954, time:1750768020.8469996s req_ids:[8] -DEBUG 06-24 20:27:00 [manager.py:391] -ERROR 06-24 20:27:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:213.02151679992676ms total_cost_time:213.06443214416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11480 prompt_cache_len:5151 prompt_cache_ratio:0.44869337979094076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 -DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.20850300788879395 s -INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.21024560928344727 s -DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=110416980243581446351796552496244602956, time:1750768021.200284s req_ids:[8] -DEBUG 06-24 20:27:01 [manager.py:391] -ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:00 lightllm_req_id:8 first_token_cost:330.5649757385254ms total_cost_time:330.6107521057129ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11481 prompt_cache_len:5151 prompt_cache_ratio:0.4486542984060622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 -DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.10973644256591797 s -INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.11170530319213867 s -DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=17380211930572365758083962206461553192, time:1750768021.403385s req_ids:[8] -DEBUG 06-24 20:27:01 [manager.py:391] -ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:202.49390602111816ms total_cost_time:202.56853103637695ms,out_token_counter:1 mean_per_token_cost_time: 0.07462501525878906ms prompt_token_num:11482 prompt_cache_len:5151 prompt_cache_ratio:0.4486152238286013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 -DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.10879135131835938 s -INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.11086392402648926 s -DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=204806298526561401523409151778371299627, time:1750768021.6134336s req_ids:[8] -DEBUG 06-24 20:27:01 [manager.py:391] -DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:201.22289657592773ms total_cost_time:201.26605033874512ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11483 prompt_cache_len:5151 prompt_cache_ratio:0.4485761560567796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 -DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:01 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:01 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:01 [manager.py:224] router recive req id 8 cost time 0.1082756519317627 s -INFO 06-24 20:27:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101827621459961 s -DEBUG 06-24 20:27:01 [manager.py:391] Prefill Batch: batch_id=46912868657940928169528706513222068588, time:1750768021.8216321s req_ids:[8] -DEBUG 06-24 20:27:01 [manager.py:391] -ERROR 06-24 20:27:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:215.9709930419922ms total_cost_time:216.01581573486328ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11484 prompt_cache_len:5151 prompt_cache_ratio:0.4485370950888192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 -DEBUG 06-24 20:27:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:01 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.10811758041381836 s -INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.11007523536682129 s -DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=205906403815117856552481537645435332078, time:1750768022.04394s req_ids:[8] -DEBUG 06-24 20:27:02 [manager.py:391] -ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:01 lightllm_req_id:8 first_token_cost:210.11805534362793ms total_cost_time:210.1612091064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11485 prompt_cache_len:5151 prompt_cache_ratio:0.448498040922943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 -DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.10732793807983398 s -INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s -DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=59450376973971665913999176740413981644, time:1750768022.2612457s req_ids:[8] -DEBUG 06-24 20:27:02 [manager.py:391] -ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:212.06068992614746ms total_cost_time:212.10432052612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11486 prompt_cache_len:5151 prompt_cache_ratio:0.4484589935573742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 -DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.1087198257446289 s -INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.1107034683227539 s -DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=278351599202882621199781221391778667169, time:1750768022.4795702s req_ids:[8] -DEBUG 06-24 20:27:02 [manager.py:391] -ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:206.9528102874756ms total_cost_time:206.99810981750488ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11487 prompt_cache_len:5151 prompt_cache_ratio:0.4484199529903369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 -DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:02 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:02 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:02 [manager.py:224] router recive req id 8 cost time 0.20937347412109375 s -INFO 06-24 20:27:02 [manager.py:68] detokenization recv req id 8 cost time 0.21111392974853516 s -DEBUG 06-24 20:27:02 [manager.py:391] Prefill Batch: batch_id=251300340778364654720902304334192948559, time:1750768022.8253553s req_ids:[8] -DEBUG 06-24 20:27:02 [manager.py:391] -ERROR 06-24 20:27:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:338.0894660949707ms total_cost_time:338.1328582763672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11488 prompt_cache_len:5151 prompt_cache_ratio:0.4483809192200557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 -DEBUG 06-24 20:27:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:02 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10823369026184082 s -INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11015605926513672 s -DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=231535289920529318999046183834708902709, time:1750768023.0452106s req_ids:[8] -DEBUG 06-24 20:27:03 [manager.py:391] -ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:02 lightllm_req_id:8 first_token_cost:217.451810836792ms total_cost_time:217.4975872039795ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11489 prompt_cache_len:5151 prompt_cache_ratio:0.44834189224475585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 -DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10831689834594727 s -INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11030435562133789 s -DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=114345775613969438442008431666323932670, time:1750768023.259938s req_ids:[8] -DEBUG 06-24 20:27:03 [manager.py:391] -ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:209.65266227722168ms total_cost_time:209.69748497009277ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11490 prompt_cache_len:5151 prompt_cache_ratio:0.44830287206266317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 -DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.11020994186401367 s -INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11211633682250977 s -DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=269449351930330736333523385808196046439, time:1750768023.4769177s req_ids:[8] -DEBUG 06-24 20:27:03 [manager.py:391] -ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:209.09428596496582ms total_cost_time:209.1374397277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11491 prompt_cache_len:5151 prompt_cache_ratio:0.4482638586720042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 -DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10909914970397949 s -INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11120057106018066 s -DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=267834929978489600763842642546862785466, time:1750768023.6944804s req_ids:[8] -DEBUG 06-24 20:27:03 [manager.py:391] -ERROR 06-24 20:27:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:210.21008491516113ms total_cost_time:210.25586128234863ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11492 prompt_cache_len:5151 prompt_cache_ratio:0.44822485207100593 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 -DEBUG 06-24 20:27:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:03 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:03 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:03 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:03 [manager.py:224] router recive req id 8 cost time 0.10952234268188477 s -INFO 06-24 20:27:03 [manager.py:68] detokenization recv req id 8 cost time 0.11163568496704102 s -DEBUG 06-24 20:27:03 [manager.py:391] Prefill Batch: batch_id=213933980562856640474120110401903940428, time:1750768023.9101877s req_ids:[8] -DEBUG 06-24 20:27:03 [manager.py:391] -DEBUG 06-24 20:27:03 [stats.py:37] Avg tokens(prompt+generate) throughput: 47865.350 tokens/s -DEBUG 06-24 20:27:03 [stats.py:37] Avg prompt tokens throughput: 47857.007 tokens/s -DEBUG 06-24 20:27:03 [stats.py:37] Avg generate tokens throughput: 8.343 tokens/s -ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:03 lightllm_req_id:8 first_token_cost:211.48300170898438ms total_cost_time:211.52591705322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11493 prompt_cache_len:5151 prompt_cache_ratio:0.4481858522578961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 -DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10856342315673828 s -INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.11051464080810547 s -DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=197765049441637236395995545614509678971, time:1750768024.127005s req_ids:[8] -DEBUG 06-24 20:27:04 [manager.py:391] -ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:392.4744129180908ms total_cost_time:392.5197124481201ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11494 prompt_cache_len:5151 prompt_cache_ratio:0.4481468592309031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 -DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10774636268615723 s -INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.10972452163696289 s -DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=11536550526298161207457285899490840812, time:1750768024.5304654s req_ids:[8] -DEBUG 06-24 20:27:04 [manager.py:391] -ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:214.98370170593262ms total_cost_time:215.0249481201172ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:11495 prompt_cache_len:5151 prompt_cache_ratio:0.4481078729882558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 -DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10864830017089844 s -INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.11054491996765137 s -DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=131730048106509425257644850606384390465, time:1750768024.7485874s req_ids:[8] -DEBUG 06-24 20:27:04 [manager.py:391] -ERROR 06-24 20:27:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:204.6835422515869ms total_cost_time:204.7276496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11496 prompt_cache_len:5151 prompt_cache_ratio:0.44806889352818374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 -DEBUG 06-24 20:27:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:04 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:04 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:04 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:04 [manager.py:224] router recive req id 8 cost time 0.10985016822814941 s -INFO 06-24 20:27:04 [manager.py:68] detokenization recv req id 8 cost time 0.1118013858795166 s -DEBUG 06-24 20:27:04 [manager.py:391] Prefill Batch: batch_id=3606936866228866317805580588946842939, time:1750768024.957365s req_ids:[8] -DEBUG 06-24 20:27:04 [manager.py:391] -ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:04 lightllm_req_id:8 first_token_cost:208.38046073913574ms total_cost_time:208.42480659484863ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11497 prompt_cache_len:5151 prompt_cache_ratio:0.44802992084891713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 -DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10895895957946777 s -INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.11084747314453125 s -DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=4855946702249088777521689259667289334, time:1750768025.1709967s req_ids:[8] -DEBUG 06-24 20:27:05 [manager.py:391] -ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:205.6434154510498ms total_cost_time:205.69586753845215ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:11498 prompt_cache_len:5151 prompt_cache_ratio:0.44799095494868674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 -DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10940384864807129 s -INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.1114354133605957 s -DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=213736072517534173356378361908157795940, time:1750768025.3827944s req_ids:[8] -DEBUG 06-24 20:27:05 [manager.py:391] -ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:215.60931205749512ms total_cost_time:215.6524658203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11499 prompt_cache_len:5151 prompt_cache_ratio:0.447951995825724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 -DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10831189155578613 s -INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.11015987396240234 s -DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=270169810724592860314252392366219539407, time:1750768025.6190133s req_ids:[8] -DEBUG 06-24 20:27:05 [manager.py:391] -ERROR 06-24 20:27:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:213.98186683654785ms total_cost_time:214.02573585510254ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11500 prompt_cache_len:5151 prompt_cache_ratio:0.4479130434782609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 -DEBUG 06-24 20:27:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:05 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:05 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:05 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:05 [manager.py:224] router recive req id 8 cost time 0.10850977897644043 s -INFO 06-24 20:27:05 [manager.py:68] detokenization recv req id 8 cost time 0.11031150817871094 s -DEBUG 06-24 20:27:05 [manager.py:391] Prefill Batch: batch_id=140047481700162450342605131418296836585, time:1750768025.8471203s req_ids:[8] -DEBUG 06-24 20:27:05 [manager.py:391] -ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:05 lightllm_req_id:8 first_token_cost:403.7513732910156ms total_cost_time:403.7942886352539ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11501 prompt_cache_len:5151 prompt_cache_ratio:0.44787409790453003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 -DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10891222953796387 s -INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.11092638969421387 s -DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=257724131350751158734646409481255075029, time:1750768026.2372186s req_ids:[8] -DEBUG 06-24 20:27:06 [manager.py:391] -DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:202.81434059143066ms total_cost_time:202.85606384277344ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11502 prompt_cache_len:5151 prompt_cache_ratio:0.4478351591027647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 -DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:06 [batch.py:51] router release req id 8 -INFO 06-24 20:27:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10863947868347168 s -INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.11056351661682129 s -DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=22240959118440316298059906489897165692, time:1750768026.4532208s req_ids:[8] -DEBUG 06-24 20:27:06 [manager.py:391] -ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:213.29784393310547ms total_cost_time:213.34147453308105ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11503 prompt_cache_len:5151 prompt_cache_ratio:0.4477962270711988 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 -DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10897636413574219 s -INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.1109464168548584 s -DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=182696868454419069693083116218630994617, time:1750768026.664432s req_ids:[8] -DEBUG 06-24 20:27:06 [manager.py:391] -ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:205.4002285003662ms total_cost_time:205.4443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11504 prompt_cache_len:5151 prompt_cache_ratio:0.4477573018080668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 -DEBUG 06-24 20:27:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:06 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:06 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:06 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:06 [manager.py:224] router recive req id 8 cost time 0.10844850540161133 s -INFO 06-24 20:27:06 [manager.py:68] detokenization recv req id 8 cost time 0.11051082611083984 s -DEBUG 06-24 20:27:06 [manager.py:391] Prefill Batch: batch_id=216123952455913904641642629521920238091, time:1750768026.8766768s req_ids:[8] -DEBUG 06-24 20:27:06 [manager.py:391] -ERROR 06-24 20:27:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:211.72428131103516ms total_cost_time:211.76862716674805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11505 prompt_cache_len:5151 prompt_cache_ratio:0.44771838331160363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 -DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.10832977294921875 s -INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.11068153381347656 s -DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=197536237760918745383568581636620113664, time:1750768027.0957658s req_ids:[8] -DEBUG 06-24 20:27:07 [manager.py:391] -ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:06 lightllm_req_id:8 first_token_cost:205.6267261505127ms total_cost_time:205.67059516906738ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11506 prompt_cache_len:5151 prompt_cache_ratio:0.4476794715800452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 -DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.10852408409118652 s -INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.11063432693481445 s -DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=89828281685318031161752566169101147716, time:1750768027.3118255s req_ids:[8] -DEBUG 06-24 20:27:07 [manager.py:391] -ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:214.6742343902588ms total_cost_time:214.71881866455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11507 prompt_cache_len:5151 prompt_cache_ratio:0.4476405666116277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 -DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s -INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s -DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=140498526619812386526613841456780909575, time:1750768027.5279782s req_ids:[8] -DEBUG 06-24 20:27:07 [manager.py:391] -ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:199.13816452026367ms total_cost_time:199.18251037597656ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11508 prompt_cache_len:5151 prompt_cache_ratio:0.4476016684045881 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 -DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:07 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:07 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:07 [manager.py:224] router recive req id 8 cost time 0.20930218696594238 s -INFO 06-24 20:27:07 [manager.py:68] detokenization recv req id 8 cost time 0.21102356910705566 s -DEBUG 06-24 20:27:07 [manager.py:391] Prefill Batch: batch_id=84107992851990607567501875144729538640, time:1750768027.8693511s req_ids:[8] -DEBUG 06-24 20:27:07 [manager.py:391] -ERROR 06-24 20:27:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:313.54808807373047ms total_cost_time:313.5943412780762ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11509 prompt_cache_len:5151 prompt_cache_ratio:0.44756277695716395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 -DEBUG 06-24 20:27:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:07 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10858774185180664 s -INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.11046242713928223 s -DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=89590166486592492012769836778292336381, time:1750768028.0530016s req_ids:[8] -DEBUG 06-24 20:27:08 [manager.py:391] -ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:07 lightllm_req_id:8 first_token_cost:199.28503036499023ms total_cost_time:199.32937622070312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11510 prompt_cache_len:5151 prompt_cache_ratio:0.44752389226759337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 -DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s -INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.11025762557983398 s -DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=163638271657110360675943853040788606812, time:1750768028.2602093s req_ids:[8] -DEBUG 06-24 20:27:08 [manager.py:391] -ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:203.7358283996582ms total_cost_time:203.7801742553711ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11511 prompt_cache_len:5151 prompt_cache_ratio:0.4474850143341152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 -DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10927891731262207 s -INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.1112830638885498 s -DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=182209689488403169261110316523182841843, time:1750768028.4690893s req_ids:[8] -DEBUG 06-24 20:27:08 [manager.py:391] -ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:207.59868621826172ms total_cost_time:207.6423168182373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11512 prompt_cache_len:5151 prompt_cache_ratio:0.4474461431549687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 -DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10888218879699707 s -INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.11094331741333008 s -DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=137129771991069977376591417104966075220, time:1750768028.684868s req_ids:[8] -DEBUG 06-24 20:27:08 [manager.py:391] -ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:207.10992813110352ms total_cost_time:207.1518898010254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11513 prompt_cache_len:5151 prompt_cache_ratio:0.447407278728394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 -DEBUG 06-24 20:27:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:08 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:08 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:08 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:08 [manager.py:224] router recive req id 8 cost time 0.10908651351928711 s -INFO 06-24 20:27:08 [manager.py:68] detokenization recv req id 8 cost time 0.1112055778503418 s -DEBUG 06-24 20:27:08 [manager.py:391] Prefill Batch: batch_id=183129512022226391617747331139427311333, time:1750768028.89688s req_ids:[8] -DEBUG 06-24 20:27:08 [manager.py:391] -ERROR 06-24 20:27:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:190.56415557861328ms total_cost_time:190.60826301574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11514 prompt_cache_len:5151 prompt_cache_ratio:0.4473684210526316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 -DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10892271995544434 s -INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.11073160171508789 s -DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=260037951453583052709221745229004179368, time:1750768029.0936055s req_ids:[8] -DEBUG 06-24 20:27:09 [manager.py:391] -ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:08 lightllm_req_id:8 first_token_cost:362.2126579284668ms total_cost_time:362.2567653656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11515 prompt_cache_len:5151 prompt_cache_ratio:0.4473295701259227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 -DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10850334167480469 s -INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.1103818416595459 s -DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=261454611022190730171017719536613147025, time:1750768029.461922s req_ids:[8] -DEBUG 06-24 20:27:09 [manager.py:391] -ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:187.6664161682129ms total_cost_time:187.71028518676758ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11516 prompt_cache_len:5151 prompt_cache_ratio:0.4472907259465092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 -DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s -INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.1096043586730957 s -DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=195564776759058681558391963633513822618, time:1750768029.6575105s req_ids:[8] -DEBUG 06-24 20:27:09 [manager.py:391] -ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:199.63955879211426ms total_cost_time:199.68247413635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11517 prompt_cache_len:5151 prompt_cache_ratio:0.4472518885126335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 -DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:09 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:09 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:09 [manager.py:224] router recive req id 8 cost time 0.10915780067443848 s -INFO 06-24 20:27:09 [manager.py:68] detokenization recv req id 8 cost time 0.11107707023620605 s -DEBUG 06-24 20:27:09 [manager.py:391] Prefill Batch: batch_id=7539896173252525941614578352514043508, time:1750768029.8626633s req_ids:[8] -DEBUG 06-24 20:27:09 [manager.py:391] -ERROR 06-24 20:27:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:204.48589324951172ms total_cost_time:204.5302391052246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11518 prompt_cache_len:5151 prompt_cache_ratio:0.4472130578225386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 -DEBUG 06-24 20:27:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:09 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.10808253288269043 s -INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.1100466251373291 s -DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=241238806878254744391135727420671444463, time:1750768030.0836942s req_ids:[8] -DEBUG 06-24 20:27:10 [manager.py:391] -ERROR 06-24 20:27:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:09 lightllm_req_id:8 first_token_cost:219.9561595916748ms total_cost_time:220.001220703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11519 prompt_cache_len:5151 prompt_cache_ratio:0.44717423387446825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 -DEBUG 06-24 20:27:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:10 [batch.py:51] router release req id 8 -WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_logprobs_998 and create again -INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_logprobs_998 -WARNING 06-24 20:27:10 [shm_array.py:25] size not same, unlink shm 12322_0_shm_prompts_998 and create again -INFO 06-24 20:27:10 [shm_array.py:30] create shm 12322_0_shm_prompts_998 -INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.10959744453430176 s -INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.11166238784790039 s -DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=324159688702362866898453155766500746673, time:1750768030.301429s req_ids:[8] -DEBUG 06-24 20:27:10 [manager.py:391] -ERROR 06-24 20:27:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 first_token_cost:209.38348770141602ms total_cost_time:209.4278335571289ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11520 prompt_cache_len:5151 prompt_cache_ratio:0.4471354166666667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 -DEBUG 06-24 20:27:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:10 [batch.py:51] router release req id 8 -INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s -INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.11090469360351562 s -DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=273314963072987937624466043001015779581, time:1750768030.5395844s req_ids:[8] -DEBUG 06-24 20:27:10 [manager.py:391] -DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 first_token_cost:230.15928268432617ms total_cost_time:230.20315170288086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11521 prompt_cache_len:5151 prompt_cache_ratio:0.4470966061973787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 -DEBUG 06-24 20:27:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:10 [batch.py:51] router release req id 8 -INFO 06-24 20:27:10 [manager.py:224] router recive req id 8 cost time 0.10875582695007324 s -INFO 06-24 20:27:10 [manager.py:68] detokenization recv req id 8 cost time 0.11083793640136719 s -DEBUG 06-24 20:27:10 [manager.py:391] Prefill Batch: batch_id=62874336445114855445315672648053757520, time:1750768030.752677s req_ids:[8] -DEBUG 06-24 20:27:10 [manager.py:391] -ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:10 lightllm_req_id:8 first_token_cost:378.4186840057373ms total_cost_time:378.4632682800293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11522 prompt_cache_len:5151 prompt_cache_ratio:0.44705780246484983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 -DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:11 [batch.py:51] router release req id 8 -INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.10847353935241699 s -INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.11044168472290039 s -DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=181342935191488314782691934929057284834, time:1750768031.146899s req_ids:[8] -DEBUG 06-24 20:27:11 [manager.py:391] -ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:214.90788459777832ms total_cost_time:214.9515151977539ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11523 prompt_cache_len:5151 prompt_cache_ratio:0.4470190054673262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 -DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:11 [batch.py:51] router release req id 8 -INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.1078798770904541 s -INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.10990047454833984 s -DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=9589314894294385190675499494374968129, time:1750768031.3593307s req_ids:[8] -DEBUG 06-24 20:27:11 [manager.py:391] -ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:207.57436752319336ms total_cost_time:207.62085914611816ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11524 prompt_cache_len:5151 prompt_cache_ratio:0.4469802152030545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 -DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:11 [batch.py:51] router release req id 8 -INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.10930323600769043 s -INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.11125397682189941 s -DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=29830870207625210821487779867829939521, time:1750768031.581609s req_ids:[8] -DEBUG 06-24 20:27:11 [manager.py:391] -ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:218.02854537963867ms total_cost_time:218.07241439819336ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11525 prompt_cache_len:5151 prompt_cache_ratio:0.446941431670282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 -DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:11 [batch.py:51] router release req id 8 -INFO 06-24 20:27:11 [manager.py:224] router recive req id 8 cost time 0.10734272003173828 s -INFO 06-24 20:27:11 [manager.py:68] detokenization recv req id 8 cost time 0.10927748680114746 s -DEBUG 06-24 20:27:11 [manager.py:391] Prefill Batch: batch_id=184960704546135925761441682165797363529, time:1750768031.805424s req_ids:[8] -DEBUG 06-24 20:27:11 [manager.py:391] -ERROR 06-24 20:27:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:213.92297744750977ms total_cost_time:213.96541595458984ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11526 prompt_cache_len:5151 prompt_cache_ratio:0.4469026548672566 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 -DEBUG 06-24 20:27:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:11 [batch.py:51] router release req id 8 -INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.10758519172668457 s -INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.10962891578674316 s -DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=136612049340751542490230307301212965529, time:1750768032.0206873s req_ids:[8] -DEBUG 06-24 20:27:12 [manager.py:391] -ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:11 lightllm_req_id:8 first_token_cost:213.71054649353027ms total_cost_time:213.75489234924316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11527 prompt_cache_len:5151 prompt_cache_ratio:0.44686388479222694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 -DEBUG 06-24 20:27:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:12 [batch.py:51] router release req id 8 -INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.10884284973144531 s -INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.11070561408996582 s -DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=271018319268510207474466365842486396230, time:1750768032.238665s req_ids:[8] -DEBUG 06-24 20:27:12 [manager.py:391] -ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:209.23137664794922ms total_cost_time:209.27691459655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11528 prompt_cache_len:5151 prompt_cache_ratio:0.44682512144344205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 -DEBUG 06-24 20:27:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:12 [batch.py:51] router release req id 8 -INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.10699796676635742 s -INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.10888099670410156 s -DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=27029226230246066736506496277015071706, time:1750768032.4791806s req_ids:[8] -DEBUG 06-24 20:27:12 [manager.py:391] -ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:229.20823097229004ms total_cost_time:229.25090789794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11529 prompt_cache_len:5151 prompt_cache_ratio:0.4467863648191517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 -DEBUG 06-24 20:27:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:12 [batch.py:51] router release req id 8 -INFO 06-24 20:27:12 [manager.py:224] router recive req id 8 cost time 0.309694766998291 s -INFO 06-24 20:27:12 [manager.py:68] detokenization recv req id 8 cost time 0.31163454055786133 s -DEBUG 06-24 20:27:12 [manager.py:391] Prefill Batch: batch_id=251227467450080321586560580780172480615, time:1750768032.8942409s req_ids:[8] -DEBUG 06-24 20:27:12 [manager.py:391] -ERROR 06-24 20:27:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:415.6961441040039ms total_cost_time:415.7402515411377ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11530 prompt_cache_len:5151 prompt_cache_ratio:0.44674761491760623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 -DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:13 [batch.py:51] router release req id 8 -INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10782456398010254 s -INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10972428321838379 s -DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=329394360501775278836708721118079774711, time:1750768033.1151454s req_ids:[8] -DEBUG 06-24 20:27:13 [manager.py:391] -ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:12 lightllm_req_id:8 first_token_cost:208.43005180358887ms total_cost_time:208.47535133361816ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11531 prompt_cache_len:5151 prompt_cache_ratio:0.44670887173705665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 -DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:13 [batch.py:51] router release req id 8 -INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10821390151977539 s -INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.11025333404541016 s -DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=224143599464996496941801128111624638780, time:1750768033.328239s req_ids:[8] -DEBUG 06-24 20:27:13 [manager.py:391] -ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:206.15696907043457ms total_cost_time:206.20250701904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11532 prompt_cache_len:5151 prompt_cache_ratio:0.44667013527575444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 -DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:13 [batch.py:51] router release req id 8 -INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10745549201965332 s -INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10939431190490723 s -DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=26253948729714786583888846471302946988, time:1750768033.563854s req_ids:[8] -DEBUG 06-24 20:27:13 [manager.py:391] -ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:221.55284881591797ms total_cost_time:221.59695625305176ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11533 prompt_cache_len:5151 prompt_cache_ratio:0.4466314055319518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 -DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:13 [batch.py:51] router release req id 8 -INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10775089263916016 s -INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10976862907409668 s -DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=197400315803058788094577083226503200948, time:1750768033.7699008s req_ids:[8] -DEBUG 06-24 20:27:13 [manager.py:391] -ERROR 06-24 20:27:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:205.05952835083008ms total_cost_time:205.10435104370117ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11534 prompt_cache_len:5151 prompt_cache_ratio:0.4465926825039015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 -DEBUG 06-24 20:27:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:13 [batch.py:51] router release req id 8 -INFO 06-24 20:27:13 [manager.py:224] router recive req id 8 cost time 0.10730147361755371 s -INFO 06-24 20:27:13 [manager.py:68] detokenization recv req id 8 cost time 0.10968303680419922 s -DEBUG 06-24 20:27:13 [manager.py:391] Prefill Batch: batch_id=220418320867655774254720220618153093837, time:1750768033.979639s req_ids:[8] -DEBUG 06-24 20:27:13 [manager.py:391] -DEBUG 06-24 20:27:13 [stats.py:37] Avg tokens(prompt+generate) throughput: 48035.746 tokens/s -DEBUG 06-24 20:27:13 [stats.py:37] Avg prompt tokens throughput: 48027.404 tokens/s -DEBUG 06-24 20:27:13 [stats.py:37] Avg generate tokens throughput: 8.342 tokens/s -ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:13 lightllm_req_id:8 first_token_cost:201.94077491760254ms total_cost_time:201.98392868041992ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11535 prompt_cache_len:5151 prompt_cache_ratio:0.44655396618985693 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 -DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:14 [batch.py:51] router release req id 8 -INFO 06-24 20:27:14 [manager.py:224] router recive req id 8 cost time 0.10815262794494629 s -INFO 06-24 20:27:14 [manager.py:68] detokenization recv req id 8 cost time 0.11003398895263672 s -DEBUG 06-24 20:27:14 [manager.py:391] Prefill Batch: batch_id=72722018168235309752317118410661161236, time:1750768034.192789s req_ids:[8] -DEBUG 06-24 20:27:14 [manager.py:391] -ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:379.84156608581543ms total_cost_time:379.8868656158447ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11536 prompt_cache_len:5151 prompt_cache_ratio:0.4465152565880721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 -DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:14 [batch.py:51] router release req id 8 -INFO 06-24 20:27:14 [manager.py:224] router recive req id 8 cost time 0.1072850227355957 s -INFO 06-24 20:27:14 [manager.py:68] detokenization recv req id 8 cost time 0.10918378829956055 s -DEBUG 06-24 20:27:14 [manager.py:391] Prefill Batch: batch_id=121949659619199036932854757905768669939, time:1750768034.5762315s req_ids:[8] -DEBUG 06-24 20:27:14 [manager.py:391] -ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:224.92718696594238ms total_cost_time:224.97200965881348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11537 prompt_cache_len:5151 prompt_cache_ratio:0.4464765536968016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 -DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:14 [batch.py:51] router release req id 8 -INFO 06-24 20:27:14 [manager.py:224] router recive req id 8 cost time 0.10879373550415039 s -INFO 06-24 20:27:14 [manager.py:68] detokenization recv req id 8 cost time 0.11072850227355957 s -DEBUG 06-24 20:27:14 [manager.py:391] Prefill Batch: batch_id=195821545213852940111713587102067954925, time:1750768034.8096938s req_ids:[8] -DEBUG 06-24 20:27:14 [manager.py:391] -ERROR 06-24 20:27:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:205.3837776184082ms total_cost_time:205.4283618927002ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11538 prompt_cache_len:5151 prompt_cache_ratio:0.4464378575143006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 -DEBUG 06-24 20:27:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:14 [batch.py:51] router release req id 8 -INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10788559913635254 s -INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.10989809036254883 s -DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=65515364993187780731950899083747939304, time:1750768035.0190854s req_ids:[8] -DEBUG 06-24 20:27:15 [manager.py:391] -ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:14 lightllm_req_id:8 first_token_cost:212.16368675231934ms total_cost_time:212.20707893371582ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11539 prompt_cache_len:5151 prompt_cache_ratio:0.44639916803882485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 -DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:15 [batch.py:51] router release req id 8 -INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10696196556091309 s -INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.10897016525268555 s -DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=327278611138202212038166339920846279975, time:1750768035.262717s req_ids:[8] -DEBUG 06-24 20:27:15 [manager.py:391] -ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:225.32248497009277ms total_cost_time:225.36420822143555ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11540 prompt_cache_len:5151 prompt_cache_ratio:0.44636048526863087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 -DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:15 [batch.py:51] router release req id 8 -INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10893416404724121 s -INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.11101770401000977 s -DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=217050675394868006263311026451562650133, time:1750768035.470755s req_ids:[8] -DEBUG 06-24 20:27:15 [manager.py:391] -ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:208.59503746032715ms total_cost_time:208.64009857177734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11541 prompt_cache_len:5151 prompt_cache_ratio:0.44632180920197556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 -DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:15 [batch.py:51] router release req id 8 -INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10810327529907227 s -INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.10995626449584961 s -DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=336366292626247426675723278064317327664, time:1750768035.684561s req_ids:[8] -DEBUG 06-24 20:27:15 [manager.py:391] -ERROR 06-24 20:27:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:203.83524894714355ms total_cost_time:203.88007164001465ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11542 prompt_cache_len:5151 prompt_cache_ratio:0.4462831398371166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 -DEBUG 06-24 20:27:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:15 [batch.py:51] router release req id 8 -INFO 06-24 20:27:15 [manager.py:224] router recive req id 8 cost time 0.10823202133178711 s -INFO 06-24 20:27:15 [manager.py:68] detokenization recv req id 8 cost time 0.1104433536529541 s -DEBUG 06-24 20:27:15 [manager.py:391] Prefill Batch: batch_id=181279718564383556660070747150368292165, time:1750768035.8942478s req_ids:[8] -DEBUG 06-24 20:27:15 [manager.py:391] -ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:15 lightllm_req_id:8 first_token_cost:386.26885414123535ms total_cost_time:386.31486892700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11543 prompt_cache_len:5151 prompt_cache_ratio:0.44624447717231225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 -DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:16 [batch.py:51] router release req id 8 -INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.10723447799682617 s -INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.1090095043182373 s -DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=276346951025133259099899817001547061955, time:1750768036.3079555s req_ids:[8] -DEBUG 06-24 20:27:16 [manager.py:391] -ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:222.30982780456543ms total_cost_time:222.3525047302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11544 prompt_cache_len:5151 prompt_cache_ratio:0.4462058212058212 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 -DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:16 [batch.py:51] router release req id 8 -INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.10828185081481934 s -INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.11039257049560547 s -DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=76095344927514376388129964169372026501, time:1750768036.5169027s req_ids:[8] -DEBUG 06-24 20:27:16 [manager.py:391] -ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:200.6700038909912ms total_cost_time:200.7136344909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11545 prompt_cache_len:5151 prompt_cache_ratio:0.446167171935903 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 -DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:16 [batch.py:51] router release req id 8 -INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.10806083679199219 s -INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.10988140106201172 s -DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=60529722807640789506344857535411921298, time:1750768036.7338393s req_ids:[8] -DEBUG 06-24 20:27:16 [manager.py:391] -ERROR 06-24 20:27:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:206.80928230285645ms total_cost_time:206.85458183288574ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11546 prompt_cache_len:5151 prompt_cache_ratio:0.4461285293608176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 -DEBUG 06-24 20:27:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:16 [batch.py:51] router release req id 8 -INFO 06-24 20:27:16 [manager.py:224] router recive req id 8 cost time 0.1068568229675293 s -INFO 06-24 20:27:16 [manager.py:68] detokenization recv req id 8 cost time 0.10885882377624512 s -DEBUG 06-24 20:27:16 [manager.py:391] Prefill Batch: batch_id=26877513934762374613534303091180945659, time:1750768036.937134s req_ids:[8] -DEBUG 06-24 20:27:16 [manager.py:391] -ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:16 lightllm_req_id:8 first_token_cost:195.27268409729004ms total_cost_time:195.31774520874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11547 prompt_cache_len:5151 prompt_cache_ratio:0.44608989347882566 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 -INFO 06-24 20:27:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:17 [batch.py:51] router release req id 8 -INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.10778093338012695 s -INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.10957860946655273 s -DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=91978862725742220083039217607748335256, time:1750768037.1470807s req_ids:[8] -DEBUG 06-24 20:27:17 [manager.py:391] -ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:218.30296516418457ms total_cost_time:218.34802627563477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11548 prompt_cache_len:5151 prompt_cache_ratio:0.4460512642881884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 -DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:17 [batch.py:51] router release req id 8 -INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.10806012153625488 s -INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.1102592945098877 s -DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=93217065297045032840663842086971177265, time:1750768037.363575s req_ids:[8] -DEBUG 06-24 20:27:17 [manager.py:391] -ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:199.37753677368164ms total_cost_time:199.42283630371094ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11549 prompt_cache_len:5151 prompt_cache_ratio:0.4460126417871677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 -DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:17 [batch.py:51] router release req id 8 -INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.107330322265625 s -INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.10925555229187012 s -DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=296261573025029293541474962605177454661, time:1750768037.5687678s req_ids:[8] -DEBUG 06-24 20:27:17 [manager.py:391] -ERROR 06-24 20:27:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:199.4023323059082ms total_cost_time:199.44500923156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11550 prompt_cache_len:5151 prompt_cache_ratio:0.44597402597402597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 -DEBUG 06-24 20:27:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:17 [batch.py:51] router release req id 8 -INFO 06-24 20:27:17 [manager.py:224] router recive req id 8 cost time 0.3097109794616699 s -INFO 06-24 20:27:17 [manager.py:68] detokenization recv req id 8 cost time 0.3120126724243164 s -DEBUG 06-24 20:27:17 [manager.py:391] Prefill Batch: batch_id=262527193116415170111231484764237816702, time:1750768037.979559s req_ids:[8] -DEBUG 06-24 20:27:17 [manager.py:391] -ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:17 lightllm_req_id:8 first_token_cost:412.4901294708252ms total_cost_time:412.5347137451172ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11551 prompt_cache_len:5151 prompt_cache_ratio:0.44593541684702626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 -DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:18 [batch.py:51] router release req id 8 -INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.1081545352935791 s -INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.1101374626159668 s -DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=186627008545597276962196107084991165609, time:1750768038.1962545s req_ids:[8] -DEBUG 06-24 20:27:18 [manager.py:391] -ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:209.7952365875244ms total_cost_time:209.8402976989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11552 prompt_cache_len:5151 prompt_cache_ratio:0.44589681440443213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 -DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:18 [batch.py:51] router release req id 8 -INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.10725164413452148 s -INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.10923576354980469 s -DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=72574700691997711299962612133736973536, time:1750768038.4132843s req_ids:[8] -DEBUG 06-24 20:27:18 [manager.py:391] -ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:214.4765853881836ms total_cost_time:214.52021598815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11553 prompt_cache_len:5151 prompt_cache_ratio:0.4458582186445079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 -DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:18 [batch.py:51] router release req id 8 -INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.10808968544006348 s -INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.11004853248596191 s -DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=275281208950806706563697152706406251724, time:1750768038.647819s req_ids:[8] -DEBUG 06-24 20:27:18 [manager.py:391] -ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:221.53162956237793ms total_cost_time:221.57573699951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11554 prompt_cache_len:5151 prompt_cache_ratio:0.44581962956551846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 -DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:18 [batch.py:51] router release req id 8 -INFO 06-24 20:27:18 [manager.py:224] router recive req id 8 cost time 0.10864472389221191 s -INFO 06-24 20:27:18 [manager.py:68] detokenization recv req id 8 cost time 0.11060905456542969 s -DEBUG 06-24 20:27:18 [manager.py:391] Prefill Batch: batch_id=160619746254918374206772895103828979640, time:1750768038.8613305s req_ids:[8] -DEBUG 06-24 20:27:18 [manager.py:391] -ERROR 06-24 20:27:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:210.48545837402344ms total_cost_time:210.52908897399902ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11555 prompt_cache_len:5151 prompt_cache_ratio:0.44578104716572914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 -DEBUG 06-24 20:27:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:18 [batch.py:51] router release req id 8 -INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.1076052188873291 s -INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.10968184471130371 s -DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=192055939863602628503867255009910303973, time:1750768039.07979s req_ids:[8] -DEBUG 06-24 20:27:19 [manager.py:391] -ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:18 lightllm_req_id:8 first_token_cost:207.76796340942383ms total_cost_time:207.81230926513672ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11556 prompt_cache_len:5151 prompt_cache_ratio:0.445742471443406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 -DEBUG 06-24 20:27:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:19 [batch.py:51] router release req id 8 -INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.10738205909729004 s -INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.10958147048950195 s -DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=64476610540374886490576086221244465614, time:1750768039.2919707s req_ids:[8] -DEBUG 06-24 20:27:19 [manager.py:391] -ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 first_token_cost:396.6038227081299ms total_cost_time:396.6481685638428ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11557 prompt_cache_len:5151 prompt_cache_ratio:0.4457039023968158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 -DEBUG 06-24 20:27:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:19 [batch.py:51] router release req id 8 -INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.10813093185424805 s -INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.11015796661376953 s -DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=144945953252063376310999124512265698804, time:1750768039.6948233s req_ids:[8] -DEBUG 06-24 20:27:19 [manager.py:391] -DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 first_token_cost:206.79903030395508ms total_cost_time:206.84313774108887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11558 prompt_cache_len:5151 prompt_cache_ratio:0.44566534002422564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 -DEBUG 06-24 20:27:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:19 [batch.py:51] router release req id 8 -INFO 06-24 20:27:19 [manager.py:224] router recive req id 8 cost time 0.10876345634460449 s -INFO 06-24 20:27:19 [manager.py:68] detokenization recv req id 8 cost time 0.11082863807678223 s -DEBUG 06-24 20:27:19 [manager.py:391] Prefill Batch: batch_id=50152142214698984454296710994991332251, time:1750768039.9090242s req_ids:[8] -DEBUG 06-24 20:27:19 [manager.py:391] -ERROR 06-24 20:27:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:19 lightllm_req_id:8 first_token_cost:204.07366752624512ms total_cost_time:204.1189670562744ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11559 prompt_cache_len:5151 prompt_cache_ratio:0.44562678432390346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 -DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:20 [batch.py:51] router release req id 8 -INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10718369483947754 s -INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.10926389694213867 s -DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=316562154061129852225973958895333755448, time:1750768040.1188781s req_ids:[8] -DEBUG 06-24 20:27:20 [manager.py:391] -ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:212.04471588134766ms total_cost_time:212.08882331848145ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11560 prompt_cache_len:5151 prompt_cache_ratio:0.4455882352941177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 -DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:20 [batch.py:51] router release req id 8 -INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10814523696899414 s -INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.11010289192199707 s -DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=260644065771999606106858530331104083594, time:1750768040.3502462s req_ids:[8] -DEBUG 06-24 20:27:20 [manager.py:391] -ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:213.64736557006836ms total_cost_time:213.69147300720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11561 prompt_cache_len:5151 prompt_cache_ratio:0.4455496929331373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 -DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:20 [batch.py:51] router release req id 8 -INFO 06-24 20:27:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10795998573303223 s -INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.10988593101501465 s -DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=303818388784223970108427069095913305705, time:1750768040.5591733s req_ids:[8] -DEBUG 06-24 20:27:20 [manager.py:391] -ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:206.65788650512695ms total_cost_time:206.70199394226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11562 prompt_cache_len:5151 prompt_cache_ratio:0.44551115723923196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 -DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:20 [batch.py:51] router release req id 8 -INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10726237297058105 s -INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.10938644409179688 s -DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=89243430758099310534105994542739508426, time:1750768040.7708855s req_ids:[8] -DEBUG 06-24 20:27:20 [manager.py:391] -ERROR 06-24 20:27:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:202.8939723968506ms total_cost_time:202.93951034545898ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11563 prompt_cache_len:5151 prompt_cache_ratio:0.44547262821067196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 -DEBUG 06-24 20:27:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:20 [batch.py:51] router release req id 8 -INFO 06-24 20:27:20 [manager.py:224] router recive req id 8 cost time 0.10714459419250488 s -INFO 06-24 20:27:20 [manager.py:68] detokenization recv req id 8 cost time 0.1092081069946289 s -DEBUG 06-24 20:27:20 [manager.py:391] Prefill Batch: batch_id=250172097261122938508033012674261263899, time:1750768040.9817164s req_ids:[8] -DEBUG 06-24 20:27:20 [manager.py:391] -ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:20 lightllm_req_id:8 first_token_cost:370.33724784851074ms total_cost_time:370.38254737854004ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11564 prompt_cache_len:5151 prompt_cache_ratio:0.4454341058457281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 -DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:21 [batch.py:51] router release req id 8 -INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.10703277587890625 s -INFO 06-24 20:27:21 [manager.py:68] detokenization recv req id 8 cost time 0.10895490646362305 s -DEBUG 06-24 20:27:21 [manager.py:391] Prefill Batch: batch_id=279325152564711842479679128962507927131, time:1750768041.3591006s req_ids:[8] -DEBUG 06-24 20:27:21 [manager.py:391] -ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:216.58730506896973ms total_cost_time:216.6311740875244ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11565 prompt_cache_len:5151 prompt_cache_ratio:0.4453955901426719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 -DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:21 [batch.py:51] router release req id 8 -INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.1073751449584961 s -INFO 06-24 20:27:21 [manager.py:68] detokenization recv req id 8 cost time 0.109466552734375 s -DEBUG 06-24 20:27:21 [manager.py:391] Prefill Batch: batch_id=143851844614001003819053237413619527548, time:1750768041.583676s req_ids:[8] -DEBUG 06-24 20:27:21 [manager.py:391] -ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:206.27450942993164ms total_cost_time:206.31814002990723ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11566 prompt_cache_len:5151 prompt_cache_ratio:0.4453570810997752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 -DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:21 [batch.py:51] router release req id 8 -INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.1075589656829834 s -INFO 06-24 20:27:21 [manager.py:68] detokenization recv req id 8 cost time 0.10957026481628418 s -DEBUG 06-24 20:27:21 [manager.py:391] Prefill Batch: batch_id=72282653088081514264529457862941423130, time:1750768041.7936687s req_ids:[8] -DEBUG 06-24 20:27:21 [manager.py:391] -ERROR 06-24 20:27:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:203.92632484436035ms total_cost_time:203.96900177001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11567 prompt_cache_len:5151 prompt_cache_ratio:0.4453185787153108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 -DEBUG 06-24 20:27:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:21 [batch.py:51] router release req id 8 -INFO 06-24 20:27:21 [manager.py:224] router recive req id 8 cost time 0.10866999626159668 s -INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.11056947708129883 s -DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=207241863347605668628549669304742112662, time:1750768042.004275s req_ids:[8] -DEBUG 06-24 20:27:22 [manager.py:391] -ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:21 lightllm_req_id:8 first_token_cost:196.86603546142578ms total_cost_time:196.90752029418945ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11568 prompt_cache_len:5151 prompt_cache_ratio:0.44528008298755184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 -DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:22 [batch.py:51] router release req id 8 -INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.1075742244720459 s -INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.10971903800964355 s -DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=94883886052511634203832416524860209822, time:1750768042.2066646s req_ids:[8] -DEBUG 06-24 20:27:22 [manager.py:391] -ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:204.3936252593994ms total_cost_time:204.4358253479004ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11569 prompt_cache_len:5151 prompt_cache_ratio:0.44524159391477225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 -DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:22 [batch.py:51] router release req id 8 -INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.10750293731689453 s -INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.10943198204040527 s -DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=244564710304726301394168319461730603879, time:1750768042.418144s req_ids:[8] -DEBUG 06-24 20:27:22 [manager.py:391] -ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:204.10847663879395ms total_cost_time:204.15186882019043ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11570 prompt_cache_len:5151 prompt_cache_ratio:0.44520311149524633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 -DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:22 [batch.py:51] router release req id 8 -INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.10728621482849121 s -INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.10933303833007812 s -DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=71119514468147665464207116744884685416, time:1750768042.6296477s req_ids:[8] -DEBUG 06-24 20:27:22 [manager.py:391] -ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:207.92007446289062ms total_cost_time:207.9637050628662ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11571 prompt_cache_len:5151 prompt_cache_ratio:0.4451646357272492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 -DEBUG 06-24 20:27:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:22 [batch.py:51] router release req id 8 -INFO 06-24 20:27:22 [manager.py:224] router recive req id 8 cost time 0.2079322338104248 s -INFO 06-24 20:27:22 [manager.py:68] detokenization recv req id 8 cost time 0.20972371101379395 s -DEBUG 06-24 20:27:22 [manager.py:391] Prefill Batch: batch_id=183762078713383513045662939774384217537, time:1750768042.9455245s req_ids:[8] -DEBUG 06-24 20:27:22 [manager.py:391] -ERROR 06-24 20:27:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:22 lightllm_req_id:8 first_token_cost:265.60211181640625ms total_cost_time:265.64598083496094ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11572 prompt_cache_len:5151 prompt_cache_ratio:0.44512616660905635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 -DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:23 [batch.py:51] router release req id 8 -INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10907602310180664 s -DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=115511640357816040732083717822729157731, time:1750768043.114903s req_ids:[8] -DEBUG 06-24 20:27:23 [manager.py:391] -INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.11090087890625 s -ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:187.93749809265137ms total_cost_time:187.99519538879395ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:11573 prompt_cache_len:5151 prompt_cache_ratio:0.4450877041389441 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 -INFO 06-24 20:27:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:23 [batch.py:51] router release req id 8 -INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10815811157226562 s -INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.11004424095153809 s -DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=241558242360958393427601253541415694380, time:1750768043.3128421s req_ids:[8] -DEBUG 06-24 20:27:23 [manager.py:391] -ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:204.13780212402344ms total_cost_time:204.18071746826172ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11574 prompt_cache_len:5151 prompt_cache_ratio:0.4450492483151892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 -DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:23 [batch.py:51] router release req id 8 -INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s -INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.11069393157958984 s -DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=274041578602925132066295646625399381678, time:1750768043.5245295s req_ids:[8] -DEBUG 06-24 20:27:23 [manager.py:391] -ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:200.85859298706055ms total_cost_time:200.90436935424805ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11575 prompt_cache_len:5151 prompt_cache_ratio:0.4450107991360691 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 -DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:23 [batch.py:51] router release req id 8 -INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10806751251220703 s -INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.1099081039428711 s -DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=56556481522618134304960793097616046544, time:1750768043.7324297s req_ids:[8] -DEBUG 06-24 20:27:23 [manager.py:391] -ERROR 06-24 20:27:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:195.4667568206787ms total_cost_time:195.5125331878662ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11576 prompt_cache_len:5151 prompt_cache_ratio:0.4449723565998618 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 -DEBUG 06-24 20:27:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:23 [batch.py:51] router release req id 8 -INFO 06-24 20:27:23 [manager.py:224] router recive req id 8 cost time 0.10698962211608887 s -INFO 06-24 20:27:23 [manager.py:68] detokenization recv req id 8 cost time 0.10892295837402344 s -DEBUG 06-24 20:27:23 [manager.py:391] Prefill Batch: batch_id=31227861322949417292173771675011710711, time:1750768043.9330065s req_ids:[8] -DEBUG 06-24 20:27:23 [manager.py:391] -ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:27:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 48372.263 tokens/s -DEBUG 06-24 20:27:24 [stats.py:37] Avg prompt tokens throughput: 48363.793 tokens/s -DEBUG 06-24 20:27:24 [stats.py:37] Avg generate tokens throughput: 8.470 tokens/s -INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:23 lightllm_req_id:8 first_token_cost:198.41599464416504ms total_cost_time:198.4579563140869ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11577 prompt_cache_len:5151 prompt_cache_ratio:0.44493392070484583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 -DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:24 [batch.py:51] router release req id 8 -INFO 06-24 20:27:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.10773730278015137 s -INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.10955333709716797 s -DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=274742277334670146552303954623949235435, time:1750768044.1397195s req_ids:[8] -DEBUG 06-24 20:27:24 [manager.py:391] -ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:352.75745391845703ms total_cost_time:352.8013229370117ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11578 prompt_cache_len:5151 prompt_cache_ratio:0.4448954914493004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 -DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:24 [batch.py:51] router release req id 8 -INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.1072840690612793 s -INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s -DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=162556685905550370792891674121758006562, time:1750768044.512599s req_ids:[8] -DEBUG 06-24 20:27:24 [manager.py:391] -ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:219.72155570983887ms total_cost_time:219.76470947265625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11579 prompt_cache_len:5151 prompt_cache_ratio:0.44485706883150533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 -DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:24 [batch.py:51] router release req id 8 -INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.1084291934967041 s -INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.11028480529785156 s -DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=298290611333074028584773485507199370942, time:1750768044.7244189s req_ids:[8] -DEBUG 06-24 20:27:24 [manager.py:391] -ERROR 06-24 20:27:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:203.63116264343262ms total_cost_time:203.6740779876709ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11580 prompt_cache_len:5151 prompt_cache_ratio:0.4448186528497409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 -DEBUG 06-24 20:27:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:24 [batch.py:51] router release req id 8 -INFO 06-24 20:27:24 [manager.py:224] router recive req id 8 cost time 0.10866951942443848 s -INFO 06-24 20:27:24 [manager.py:68] detokenization recv req id 8 cost time 0.11063694953918457 s -DEBUG 06-24 20:27:24 [manager.py:391] Prefill Batch: batch_id=156000369759218646748078207246018214432, time:1750768044.9333236s req_ids:[8] -DEBUG 06-24 20:27:24 [manager.py:391] -ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:24 lightllm_req_id:8 first_token_cost:203.10258865356445ms total_cost_time:203.14502716064453ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11581 prompt_cache_len:5151 prompt_cache_ratio:0.44478024350228823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 -DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:25 [batch.py:51] router release req id 8 -INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.1067967414855957 s -INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.10858321189880371 s -DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=40796801150445118791521308297924028147, time:1750768045.1634939s req_ids:[8] -DEBUG 06-24 20:27:25 [manager.py:391] -ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:214.70332145690918ms total_cost_time:214.74742889404297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11582 prompt_cache_len:5151 prompt_cache_ratio:0.4447418407874288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 -DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:25 [batch.py:51] router release req id 8 -INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.1076357364654541 s -INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.10957145690917969 s -DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=104016520454205578847334899591728510589, time:1750768045.366401s req_ids:[8] -DEBUG 06-24 20:27:25 [manager.py:391] -ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:206.12239837646484ms total_cost_time:206.16602897644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11583 prompt_cache_len:5151 prompt_cache_ratio:0.4447034447034447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 -DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:25 [batch.py:51] router release req id 8 -INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.10800480842590332 s -INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.11003589630126953 s -DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=178911064734333362884920715512175663434, time:1750768045.576824s req_ids:[8] -DEBUG 06-24 20:27:25 [manager.py:391] -ERROR 06-24 20:27:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:210.39080619812012ms total_cost_time:210.4318141937256ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:11584 prompt_cache_len:5151 prompt_cache_ratio:0.44466505524861877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 -DEBUG 06-24 20:27:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:25 [batch.py:51] router release req id 8 -INFO 06-24 20:27:25 [manager.py:224] router recive req id 8 cost time 0.10786771774291992 s -INFO 06-24 20:27:25 [manager.py:68] detokenization recv req id 8 cost time 0.10984468460083008 s -DEBUG 06-24 20:27:25 [manager.py:391] Prefill Batch: batch_id=300681427374263850466168022733179785059, time:1750768045.793788s req_ids:[8] -DEBUG 06-24 20:27:25 [manager.py:391] -ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:25 lightllm_req_id:8 first_token_cost:369.5406913757324ms total_cost_time:369.5824146270752ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11585 prompt_cache_len:5151 prompt_cache_ratio:0.4446266724212344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 -DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:26 [batch.py:51] router release req id 8 -INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10800290107727051 s -INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.10968279838562012 s -DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=31521118003979778319064988090260603436, time:1750768046.1700404s req_ids:[8] -DEBUG 06-24 20:27:26 [manager.py:391] -ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:184.66591835021973ms total_cost_time:184.708833694458ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11586 prompt_cache_len:5151 prompt_cache_ratio:0.44458829621957535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 -DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:26 [batch.py:51] router release req id 8 -INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10674476623535156 s -INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.10858821868896484 s -DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=78949006112851565014238646171618040356, time:1750768046.362417s req_ids:[8] -DEBUG 06-24 20:27:26 [manager.py:391] -ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:197.59249687194824ms total_cost_time:197.63755798339844ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11587 prompt_cache_len:5151 prompt_cache_ratio:0.4445499266419263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 -DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:26 [batch.py:51] router release req id 8 -INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10857105255126953 s -INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.11091113090515137 s -DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=300631166909671122698182972879438502380, time:1750768046.5659935s req_ids:[8] -DEBUG 06-24 20:27:26 [manager.py:391] -ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:212.41116523742676ms total_cost_time:212.45479583740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11588 prompt_cache_len:5151 prompt_cache_ratio:0.4445115636865723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 -DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:26 [batch.py:51] router release req id 8 -INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10729122161865234 s -INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.10931515693664551 s -DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=237955721063345191969582371542458077500, time:1750768046.7842207s req_ids:[8] -DEBUG 06-24 20:27:26 [manager.py:391] -ERROR 06-24 20:27:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:202.85654067993164ms total_cost_time:202.89945602416992ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11589 prompt_cache_len:5151 prompt_cache_ratio:0.4444732073517991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 -DEBUG 06-24 20:27:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:26 [batch.py:51] router release req id 8 -INFO 06-24 20:27:26 [manager.py:224] router recive req id 8 cost time 0.10828995704650879 s -INFO 06-24 20:27:26 [manager.py:68] detokenization recv req id 8 cost time 0.11021971702575684 s -DEBUG 06-24 20:27:26 [manager.py:391] Prefill Batch: batch_id=71974388105541650596215070283234472618, time:1750768046.9946878s req_ids:[8] -DEBUG 06-24 20:27:26 [manager.py:391] -ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:26 lightllm_req_id:8 first_token_cost:209.32888984680176ms total_cost_time:209.37228202819824ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11590 prompt_cache_len:5151 prompt_cache_ratio:0.444434857635893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 -DEBUG 06-24 20:27:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:27 [batch.py:51] router release req id 8 -INFO 06-24 20:27:27 [manager.py:224] router recive req id 8 cost time 0.10712862014770508 s -INFO 06-24 20:27:27 [manager.py:68] detokenization recv req id 8 cost time 0.10897684097290039 s -DEBUG 06-24 20:27:27 [manager.py:391] Prefill Batch: batch_id=219074054481902241515778082479039405365, time:1750768047.221946s req_ids:[8] -DEBUG 06-24 20:27:27 [manager.py:391] -ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:216.68338775634766ms total_cost_time:216.72630310058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11591 prompt_cache_len:5151 prompt_cache_ratio:0.4443965145371409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 -DEBUG 06-24 20:27:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:27 [batch.py:51] router release req id 8 -INFO 06-24 20:27:27 [manager.py:224] router recive req id 8 cost time 0.10828757286071777 s -INFO 06-24 20:27:27 [manager.py:68] detokenization recv req id 8 cost time 0.11034417152404785 s -DEBUG 06-24 20:27:27 [manager.py:391] Prefill Batch: batch_id=12926021598124466231100127117502429968, time:1750768047.4331398s req_ids:[8] -DEBUG 06-24 20:27:27 [manager.py:391] -ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:203.31120491027832ms total_cost_time:203.3545970916748ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11592 prompt_cache_len:5151 prompt_cache_ratio:0.4443581780538302 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 -DEBUG 06-24 20:27:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:27 [batch.py:51] router release req id 8 -INFO 06-24 20:27:27 [manager.py:224] router recive req id 8 cost time 0.31017088890075684 s -INFO 06-24 20:27:27 [manager.py:68] detokenization recv req id 8 cost time 0.312105655670166 s -DEBUG 06-24 20:27:27 [manager.py:391] Prefill Batch: batch_id=182634462158383355267829956423271799078, time:1750768047.8676383s req_ids:[8] -DEBUG 06-24 20:27:27 [manager.py:391] -ERROR 06-24 20:27:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:437.9279613494873ms total_cost_time:437.9720687866211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11593 prompt_cache_len:5151 prompt_cache_ratio:0.44431984818424913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 -DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:28 [batch.py:51] router release req id 8 -INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10793805122375488 s -INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.10988855361938477 s -DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=318190909249535082790665831484784221501, time:1750768048.0876975s req_ids:[8] -DEBUG 06-24 20:27:28 [manager.py:391] -ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:27 lightllm_req_id:8 first_token_cost:216.20559692382812ms total_cost_time:216.2489891052246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11594 prompt_cache_len:5151 prompt_cache_ratio:0.4442815249266862 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 -DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:28 [batch.py:51] router release req id 8 -INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.1076362133026123 s -INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.10958313941955566 s -DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=166508227249825367995833035185159250695, time:1750768048.324057s req_ids:[8] -DEBUG 06-24 20:27:28 [manager.py:391] -ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:222.4874496459961ms total_cost_time:222.53179550170898ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11595 prompt_cache_len:5151 prompt_cache_ratio:0.4442432082794308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 -DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:28 [batch.py:51] router release req id 8 -INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10849976539611816 s -INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.11043906211853027 s -DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=197673383012150948979995698131697699393, time:1750768048.5382812s req_ids:[8] -DEBUG 06-24 20:27:28 [manager.py:391] -ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:213.3941650390625ms total_cost_time:213.4389877319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11596 prompt_cache_len:5151 prompt_cache_ratio:0.4442048982407727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 -DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:28 [batch.py:51] router release req id 8 -INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10799622535705566 s -INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.1100308895111084 s -DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=285586748372275578014292457786425409505, time:1750768048.7645957s req_ids:[8] -DEBUG 06-24 20:27:28 [manager.py:391] -ERROR 06-24 20:27:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:211.29870414733887ms total_cost_time:211.34257316589355ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11597 prompt_cache_len:5151 prompt_cache_ratio:0.44416659480900234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 -DEBUG 06-24 20:27:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:28 [batch.py:51] router release req id 8 -INFO 06-24 20:27:28 [manager.py:224] router recive req id 8 cost time 0.10719776153564453 s -INFO 06-24 20:27:28 [manager.py:68] detokenization recv req id 8 cost time 0.10905027389526367 s -DEBUG 06-24 20:27:28 [manager.py:391] Prefill Batch: batch_id=223944548428252722684262410638180091709, time:1750768048.9752784s req_ids:[8] -DEBUG 06-24 20:27:28 [manager.py:391] -ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:28 lightllm_req_id:8 first_token_cost:206.17103576660156ms total_cost_time:206.21514320373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11598 prompt_cache_len:5151 prompt_cache_ratio:0.44412829798241077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 -DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:29 [batch.py:51] router release req id 8 -INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.10823178291320801 s -INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.11018490791320801 s -DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=115412276080002100122989988797892935842, time:1750768049.1916304s req_ids:[8] -DEBUG 06-24 20:27:29 [manager.py:391] -ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:376.33299827575684ms total_cost_time:376.37782096862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11599 prompt_cache_len:5151 prompt_cache_ratio:0.4440900077592896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 -DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:29 [batch.py:51] router release req id 8 -INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.10693502426147461 s -INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.10908389091491699 s -DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=11742553500485497552810688810487013243, time:1750768049.5703387s req_ids:[8] -DEBUG 06-24 20:27:29 [manager.py:391] -ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:207.74221420288086ms total_cost_time:207.78465270996094ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11600 prompt_cache_len:5151 prompt_cache_ratio:0.44405172413793104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 -DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:29 [batch.py:51] router release req id 8 -INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.10902142524719238 s -INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.11094403266906738 s -DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=322537680115503013770236431501456919416, time:1750768049.786758s req_ids:[8] -DEBUG 06-24 20:27:29 [manager.py:391] -ERROR 06-24 20:27:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:208.04238319396973ms total_cost_time:208.085298538208ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11601 prompt_cache_len:5151 prompt_cache_ratio:0.4440134471166279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 -DEBUG 06-24 20:27:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:29 [batch.py:51] router release req id 8 -INFO 06-24 20:27:29 [manager.py:224] router recive req id 8 cost time 0.1068582534790039 s -INFO 06-24 20:27:29 [manager.py:68] detokenization recv req id 8 cost time 0.10875678062438965 s -DEBUG 06-24 20:27:29 [manager.py:391] Prefill Batch: batch_id=120080690139516164345846875822576873465, time:1750768049.9994538s req_ids:[8] -DEBUG 06-24 20:27:29 [manager.py:391] -DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:29 lightllm_req_id:8 first_token_cost:208.72950553894043ms total_cost_time:208.7721824645996ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11602 prompt_cache_len:5151 prompt_cache_ratio:0.4439751766936735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 -DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:30 [batch.py:51] router release req id 8 -INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10735726356506348 s -INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.10944485664367676 s -DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=316670526295762277438936446950291869044, time:1750768050.2150285s req_ids:[8] -DEBUG 06-24 20:27:30 [manager.py:391] -ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:210.8299732208252ms total_cost_time:210.87336540222168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11603 prompt_cache_len:5151 prompt_cache_ratio:0.44393691286736187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 -DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:30 [batch.py:51] router release req id 8 -INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10837101936340332 s -INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.11028599739074707 s -DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=254820825928489375547819754731130235356, time:1750768050.431994s req_ids:[8] -DEBUG 06-24 20:27:30 [manager.py:391] -ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:208.93454551696777ms total_cost_time:208.97722244262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11604 prompt_cache_len:5151 prompt_cache_ratio:0.44389865563598757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 -DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:30 [batch.py:51] router release req id 8 -INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10819387435913086 s -INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.11019325256347656 s -DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=12698200465022304349315194008020105329, time:1750768050.6480536s req_ids:[8] -DEBUG 06-24 20:27:30 [manager.py:391] -ERROR 06-24 20:27:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:231.39691352844238ms total_cost_time:231.44102096557617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11605 prompt_cache_len:5151 prompt_cache_ratio:0.44386040499784574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 -DEBUG 06-24 20:27:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:30 [batch.py:51] router release req id 8 -INFO 06-24 20:27:30 [manager.py:224] router recive req id 8 cost time 0.10722947120666504 s -INFO 06-24 20:27:30 [manager.py:68] detokenization recv req id 8 cost time 0.10936474800109863 s -DEBUG 06-24 20:27:30 [manager.py:391] Prefill Batch: batch_id=204571012891682551788771618421997697623, time:1750768050.8845394s req_ids:[8] -DEBUG 06-24 20:27:30 [manager.py:391] -ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:30 lightllm_req_id:8 first_token_cost:376.68824195861816ms total_cost_time:376.73091888427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11606 prompt_cache_len:5151 prompt_cache_ratio:0.4438221609512321 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 -DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:31 [batch.py:51] router release req id 8 -INFO 06-24 20:27:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10794496536254883 s -INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.10998988151550293 s -DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=4383387008904089726718803169148363854, time:1750768051.2664027s req_ids:[8] -DEBUG 06-24 20:27:31 [manager.py:391] -ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:204.8189640045166ms total_cost_time:204.86211776733398ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11607 prompt_cache_len:5151 prompt_cache_ratio:0.443783923494443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 -DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:31 [batch.py:51] router release req id 8 -INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10748577117919922 s -INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s -DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=100341077498308047138766490815062392620, time:1750768051.477281s req_ids:[8] -DEBUG 06-24 20:27:31 [manager.py:391] -ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:202.74043083190918ms total_cost_time:202.78501510620117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11608 prompt_cache_len:5151 prompt_cache_ratio:0.44374569262577535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 -DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:31 [batch.py:51] router release req id 8 -INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10749173164367676 s -INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.10952115058898926 s -DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=253028566486781346174647396143351243236, time:1750768051.6854234s req_ids:[8] -DEBUG 06-24 20:27:31 [manager.py:391] -ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:205.69396018981934ms total_cost_time:205.73925971984863ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11609 prompt_cache_len:5151 prompt_cache_ratio:0.4437074683435266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 -DEBUG 06-24 20:27:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:31 [batch.py:51] router release req id 8 -INFO 06-24 20:27:31 [manager.py:224] router recive req id 8 cost time 0.10859155654907227 s -INFO 06-24 20:27:31 [manager.py:68] detokenization recv req id 8 cost time 0.11056828498840332 s -DEBUG 06-24 20:27:31 [manager.py:391] Prefill Batch: batch_id=198608662432604478060866516883725317096, time:1750768051.900921s req_ids:[8] -DEBUG 06-24 20:27:31 [manager.py:391] -ERROR 06-24 20:27:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:207.62348175048828ms total_cost_time:207.66687393188477ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11610 prompt_cache_len:5151 prompt_cache_ratio:0.44366925064599483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 -DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:32 [batch.py:51] router release req id 8 -INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s -INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.11100149154663086 s -DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=102616921554648671128379959344541566492, time:1750768052.1183193s req_ids:[8] -DEBUG 06-24 20:27:32 [manager.py:391] -ERROR 06-24 20:27:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:31 lightllm_req_id:8 first_token_cost:215.4693603515625ms total_cost_time:215.5132293701172ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11611 prompt_cache_len:5151 prompt_cache_ratio:0.44363103953147875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 -DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:32 [batch.py:51] router release req id 8 -INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.10886335372924805 s -INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.11103105545043945 s -DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=52429840949596818735614702125341845413, time:1750768052.3347113s req_ids:[8] -DEBUG 06-24 20:27:32 [manager.py:391] -ERROR 06-24 20:27:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 first_token_cost:204.6184539794922ms total_cost_time:204.66089248657227ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11612 prompt_cache_len:5151 prompt_cache_ratio:0.4435928349982776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 -DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:32 [batch.py:51] router release req id 8 -INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.10697531700134277 s -INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.10903620719909668 s -DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=326088551832837516119518108822307277858, time:1750768052.5462503s req_ids:[8] -DEBUG 06-24 20:27:32 [manager.py:391] -ERROR 06-24 20:27:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 first_token_cost:205.62982559204102ms total_cost_time:205.6746482849121ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11613 prompt_cache_len:5151 prompt_cache_ratio:0.4435546370446913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 -DEBUG 06-24 20:27:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:32 [batch.py:51] router release req id 8 -INFO 06-24 20:27:32 [manager.py:224] router recive req id 8 cost time 0.30941033363342285 s -INFO 06-24 20:27:32 [manager.py:68] detokenization recv req id 8 cost time 0.31136608123779297 s -DEBUG 06-24 20:27:32 [manager.py:391] Prefill Batch: batch_id=327655206803896732243082537203167451047, time:1750768052.9610136s req_ids:[8] -DEBUG 06-24 20:27:32 [manager.py:391] -ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:32 lightllm_req_id:8 first_token_cost:397.7804183959961ms total_cost_time:397.7999687194824ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11614 prompt_cache_len:5151 prompt_cache_ratio:0.44351644566902015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 -DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:33 [batch.py:51] router release req id 8 -INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.10566926002502441 s -INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.10780835151672363 s -DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=159609690722808824453833847064038400204, time:1750768053.161927s req_ids:[8] -DEBUG 06-24 20:27:33 [manager.py:391] -ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:198.5797882080078ms total_cost_time:198.6246109008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11615 prompt_cache_len:5151 prompt_cache_ratio:0.4434782608695652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 -DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:33 [batch.py:51] router release req id 8 -INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.1082768440246582 s -INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.11032581329345703 s -DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=3704880289774479520575625657065861735, time:1750768053.3656929s req_ids:[8] -DEBUG 06-24 20:27:33 [manager.py:391] -ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:207.72647857666016ms total_cost_time:207.77153968811035ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11616 prompt_cache_len:5151 prompt_cache_ratio:0.4434400826446281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 -DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:33 [batch.py:51] router release req id 8 -INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s -INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.11020469665527344 s -DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=170078736722523670359689344307826376593, time:1750768053.5787885s req_ids:[8] -DEBUG 06-24 20:27:33 [manager.py:391] -ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:206.26425743103027ms total_cost_time:206.30741119384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11617 prompt_cache_len:5151 prompt_cache_ratio:0.443401910992511 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 -DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:33 [batch.py:51] router release req id 8 -INFO 06-24 20:27:33 [manager.py:224] router recive req id 8 cost time 0.10519742965698242 s -INFO 06-24 20:27:33 [manager.py:68] detokenization recv req id 8 cost time 0.10724759101867676 s -DEBUG 06-24 20:27:33 [manager.py:391] Prefill Batch: batch_id=173046256490098839109927107496841236876, time:1750768053.793487s req_ids:[8] -DEBUG 06-24 20:27:33 [manager.py:391] -ERROR 06-24 20:27:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:210.37626266479492ms total_cost_time:210.4203701019287ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11618 prompt_cache_len:5151 prompt_cache_ratio:0.4433637459115166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 -DEBUG 06-24 20:27:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:33 [batch.py:51] router release req id 8 -INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10780787467956543 s -INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10977053642272949 s -DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=249524091125512902513428867676842004096, time:1750768054.0079775s req_ids:[8] -DEBUG 06-24 20:27:34 [manager.py:391] -ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:27:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 48323.971 tokens/s -DEBUG 06-24 20:27:34 [stats.py:37] Avg prompt tokens throughput: 48315.640 tokens/s -DEBUG 06-24 20:27:34 [stats.py:37] Avg generate tokens throughput: 8.331 tokens/s -INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:33 lightllm_req_id:8 first_token_cost:205.16061782836914ms total_cost_time:205.20281791687012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11619 prompt_cache_len:5151 prompt_cache_ratio:0.4433255873999484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 -DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:34 [batch.py:51] router release req id 8 -INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10764503479003906 s -INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10957169532775879 s -DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=322042146884898295650035879953288667107, time:1750768054.2203221s req_ids:[8] -DEBUG 06-24 20:27:34 [manager.py:391] -ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:370.2967166900635ms total_cost_time:370.34082412719727ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11620 prompt_cache_len:5151 prompt_cache_ratio:0.44328743545611016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 -DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:34 [batch.py:51] router release req id 8 -INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10757708549499512 s -INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10975289344787598 s -DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=92967563308563899860405267896005405938, time:1750768054.5977654s req_ids:[8] -DEBUG 06-24 20:27:34 [manager.py:391] -ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:202.54826545715332ms total_cost_time:202.59356498718262ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11621 prompt_cache_len:5151 prompt_cache_ratio:0.4432492900783065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 -DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:34 [batch.py:51] router release req id 8 -INFO 06-24 20:27:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:34 [manager.py:224] router recive req id 8 cost time 0.10771560668945312 s -INFO 06-24 20:27:34 [manager.py:68] detokenization recv req id 8 cost time 0.10966229438781738 s -DEBUG 06-24 20:27:34 [manager.py:391] Prefill Batch: batch_id=26086783698301372003153037311722628367, time:1750768054.8068175s req_ids:[8] -DEBUG 06-24 20:27:34 [manager.py:391] -ERROR 06-24 20:27:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:206.60877227783203ms total_cost_time:206.65264129638672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11622 prompt_cache_len:5151 prompt_cache_ratio:0.44321115126484256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 -DEBUG 06-24 20:27:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:34 [batch.py:51] router release req id 8 -INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10788369178771973 s -INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10985493659973145 s -DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=56463665435554600794035382258834081678, time:1750768055.02479s req_ids:[8] -DEBUG 06-24 20:27:35 [manager.py:391] -ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:34 lightllm_req_id:8 first_token_cost:216.0806655883789ms total_cost_time:216.1235809326172ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11623 prompt_cache_len:5151 prompt_cache_ratio:0.4431730190140239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 -DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:35 [batch.py:51] router release req id 8 -INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10741233825683594 s -INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10944914817810059 s -DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=185788916005317189978595667364847927825, time:1750768055.244312s req_ids:[8] -DEBUG 06-24 20:27:35 [manager.py:391] -ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:210.98756790161133ms total_cost_time:211.0297679901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11624 prompt_cache_len:5151 prompt_cache_ratio:0.4431348933241569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 -DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:35 [batch.py:51] router release req id 8 -INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10820126533508301 s -INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.11024236679077148 s -DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=314519376579015344753851039302029320004, time:1750768055.4617643s req_ids:[8] -DEBUG 06-24 20:27:35 [manager.py:391] -ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:210.41393280029297ms total_cost_time:210.45923233032227ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11625 prompt_cache_len:5151 prompt_cache_ratio:0.44309677419354837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 -DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:35 [batch.py:51] router release req id 8 -INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s -INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10962820053100586 s -DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=28589246543198290154796260264159352669, time:1750768055.676811s req_ids:[8] -DEBUG 06-24 20:27:35 [manager.py:391] -ERROR 06-24 20:27:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:209.02037620544434ms total_cost_time:209.06424522399902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11626 prompt_cache_len:5151 prompt_cache_ratio:0.4430586616205058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 -DEBUG 06-24 20:27:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:35 [batch.py:51] router release req id 8 -INFO 06-24 20:27:35 [manager.py:224] router recive req id 8 cost time 0.10779094696044922 s -INFO 06-24 20:27:35 [manager.py:68] detokenization recv req id 8 cost time 0.10985803604125977 s -DEBUG 06-24 20:27:35 [manager.py:391] Prefill Batch: batch_id=278139419228630883080986344573144226336, time:1750768055.8920612s req_ids:[8] -DEBUG 06-24 20:27:35 [manager.py:391] -ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:35 lightllm_req_id:8 first_token_cost:376.01375579833984ms total_cost_time:376.05762481689453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11627 prompt_cache_len:5151 prompt_cache_ratio:0.4430205556033371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 -DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:36 [batch.py:51] router release req id 8 -INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.10822272300720215 s -INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11024594306945801 s -DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=75417688038995294896801884289730820000, time:1750768056.272834s req_ids:[8] -DEBUG 06-24 20:27:36 [manager.py:391] -ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:208.0078125ms total_cost_time:208.05120468139648ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11628 prompt_cache_len:5151 prompt_cache_ratio:0.44298245614035087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 -DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:36 [batch.py:51] router release req id 8 -INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.1085054874420166 s -INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11049056053161621 s -DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=197132007653966627037539083395863976809, time:1750768056.4887707s req_ids:[8] -DEBUG 06-24 20:27:36 [manager.py:391] -ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:206.85338973999023ms total_cost_time:206.8953514099121ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11629 prompt_cache_len:5151 prompt_cache_ratio:0.4429443632298564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 -DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:36 [batch.py:51] router release req id 8 -INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.10856413841247559 s -INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11060714721679688 s -DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=17709891307908481128429084620117407001, time:1750768056.7017605s req_ids:[8] -DEBUG 06-24 20:27:36 [manager.py:391] -ERROR 06-24 20:27:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:208.68778228759766ms total_cost_time:208.73236656188965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11630 prompt_cache_len:5151 prompt_cache_ratio:0.4429062768701634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 -DEBUG 06-24 20:27:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:36 [batch.py:51] router release req id 8 -INFO 06-24 20:27:36 [manager.py:224] router recive req id 8 cost time 0.10807538032531738 s -INFO 06-24 20:27:36 [manager.py:68] detokenization recv req id 8 cost time 0.11016345024108887 s -DEBUG 06-24 20:27:36 [manager.py:391] Prefill Batch: batch_id=141985797526126276629775214750975334031, time:1750768056.9170542s req_ids:[8] -DEBUG 06-24 20:27:36 [manager.py:391] -ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:36 lightllm_req_id:8 first_token_cost:205.83724975585938ms total_cost_time:205.88159561157227ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11631 prompt_cache_len:5151 prompt_cache_ratio:0.44286819705958214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 -DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:37 [batch.py:51] router release req id 8 -INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.10785746574401855 s -INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.1099550724029541 s -DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=150568358759909845589863602091934173328, time:1750768057.1306329s req_ids:[8] -DEBUG 06-24 20:27:37 [manager.py:391] -ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:203.2313346862793ms total_cost_time:203.2756805419922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11632 prompt_cache_len:5151 prompt_cache_ratio:0.44283012379642367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 -DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:37 [batch.py:51] router release req id 8 -INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.10842752456665039 s -INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.11037206649780273 s -DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=255468635855261129336253297370492248097, time:1750768057.3393357s req_ids:[8] -DEBUG 06-24 20:27:37 [manager.py:391] -DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:207.0162296295166ms total_cost_time:207.0600986480713ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11633 prompt_cache_len:5151 prompt_cache_ratio:0.4427920570789994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 -DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:37 [batch.py:51] router release req id 8 -INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.10868239402770996 s -INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.1106572151184082 s -DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=205861260231369315554372277450926284052, time:1750768057.5556757s req_ids:[8] -DEBUG 06-24 20:27:37 [manager.py:391] -ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:212.52751350402832ms total_cost_time:212.5718593597412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11634 prompt_cache_len:5151 prompt_cache_ratio:0.44275399690562145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 -DEBUG 06-24 20:27:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:37 [batch.py:51] router release req id 8 -INFO 06-24 20:27:37 [manager.py:224] router recive req id 8 cost time 0.2084054946899414 s -INFO 06-24 20:27:37 [manager.py:68] detokenization recv req id 8 cost time 0.21020102500915527 s -DEBUG 06-24 20:27:37 [manager.py:391] Prefill Batch: batch_id=61193095940179990327734526297592337342, time:1750768057.9048717s req_ids:[8] -DEBUG 06-24 20:27:37 [manager.py:391] -ERROR 06-24 20:27:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:319.77248191833496ms total_cost_time:319.81778144836426ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11635 prompt_cache_len:5151 prompt_cache_ratio:0.4427159432746025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 -DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:38 [batch.py:51] router release req id 8 -INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.1077573299407959 s -INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.10983848571777344 s -DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=269947313797313670973089204710357317795, time:1750768058.097356s req_ids:[8] -DEBUG 06-24 20:27:38 [manager.py:391] -ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:37 lightllm_req_id:8 first_token_cost:201.41363143920898ms total_cost_time:201.45726203918457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11636 prompt_cache_len:5151 prompt_cache_ratio:0.44267789618425574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 -DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:38 [batch.py:51] router release req id 8 -INFO 06-24 20:27:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.1074681282043457 s -INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.10943865776062012 s -DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=111155728540221233148814307519971143076, time:1750768058.3056908s req_ids:[8] -DEBUG 06-24 20:27:38 [manager.py:391] -ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:205.74712753295898ms total_cost_time:205.78980445861816ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11637 prompt_cache_len:5151 prompt_cache_ratio:0.4426398556328951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 -DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:38 [batch.py:51] router release req id 8 -INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.10755538940429688 s -INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.10971379280090332 s -DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=255039111609182138701053776779626005196, time:1750768058.5166001s req_ids:[8] -DEBUG 06-24 20:27:38 [manager.py:391] -ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:207.78203010559082ms total_cost_time:207.8251838684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11638 prompt_cache_len:5151 prompt_cache_ratio:0.4426018216188349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 -DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:38 [batch.py:51] router release req id 8 -INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.10800337791442871 s -INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.1100623607635498 s -DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=15185878056657217581301681413855575314, time:1750768058.731673s req_ids:[8] -DEBUG 06-24 20:27:38 [manager.py:391] -ERROR 06-24 20:27:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:209.96713638305664ms total_cost_time:210.01219749450684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11639 prompt_cache_len:5151 prompt_cache_ratio:0.44256379414039004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 -DEBUG 06-24 20:27:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:38 [batch.py:51] router release req id 8 -INFO 06-24 20:27:38 [manager.py:224] router recive req id 8 cost time 0.10798001289367676 s -INFO 06-24 20:27:38 [manager.py:68] detokenization recv req id 8 cost time 0.1100914478302002 s -DEBUG 06-24 20:27:38 [manager.py:391] Prefill Batch: batch_id=139004765710079065135606118722259411212, time:1750768058.9483297s req_ids:[8] -DEBUG 06-24 20:27:38 [manager.py:391] -ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:38 lightllm_req_id:8 first_token_cost:210.05606651306152ms total_cost_time:210.10184288024902ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11640 prompt_cache_len:5151 prompt_cache_ratio:0.44252577319587627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 -DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:39 [batch.py:51] router release req id 8 -INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10822701454162598 s -INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.1101372241973877 s -DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=274473753694687324176758882147147889501, time:1750768059.1711378s req_ids:[8] -DEBUG 06-24 20:27:39 [manager.py:391] -ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:372.12538719177246ms total_cost_time:372.17044830322266ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11641 prompt_cache_len:5151 prompt_cache_ratio:0.44248775878360963 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 -DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:39 [batch.py:51] router release req id 8 -INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s -INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.11091160774230957 s -DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=80492152616491568875311284530108596007, time:1750768059.542152s req_ids:[8] -DEBUG 06-24 20:27:39 [manager.py:391] -ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:207.74126052856445ms total_cost_time:207.78465270996094ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11642 prompt_cache_len:5151 prompt_cache_ratio:0.4424497509019069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 -DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:39 [batch.py:51] router release req id 8 -INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10876297950744629 s -INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.11085271835327148 s -DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=336383143086348206744468767669111793819, time:1750768059.7568507s req_ids:[8] -DEBUG 06-24 20:27:39 [manager.py:391] -ERROR 06-24 20:27:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:202.7606964111328ms total_cost_time:202.78167724609375ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11643 prompt_cache_len:5151 prompt_cache_ratio:0.4424117495490853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 -DEBUG 06-24 20:27:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:39 [batch.py:51] router release req id 8 -INFO 06-24 20:27:39 [manager.py:224] router recive req id 8 cost time 0.10585665702819824 s -INFO 06-24 20:27:39 [manager.py:68] detokenization recv req id 8 cost time 0.10787177085876465 s -DEBUG 06-24 20:27:39 [manager.py:391] Prefill Batch: batch_id=79193535528960203107071687578790542104, time:1750768059.9663005s req_ids:[8] -DEBUG 06-24 20:27:39 [manager.py:391] -ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:39 lightllm_req_id:8 first_token_cost:205.24168014526367ms total_cost_time:205.2628993988037ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:11644 prompt_cache_len:5151 prompt_cache_ratio:0.44237375472346274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 -DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:40 [batch.py:51] router release req id 8 -INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.10426092147827148 s -INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.10619974136352539 s -DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=295558104385778927500801051260715079703, time:1750768060.1757567s req_ids:[8] -DEBUG 06-24 20:27:40 [manager.py:391] -ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:211.53926849365234ms total_cost_time:211.5957736968994ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11645 prompt_cache_len:5151 prompt_cache_ratio:0.44233576642335765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 -DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:40 [batch.py:51] router release req id 8 -INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.10790348052978516 s -INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.10982537269592285 s -DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=280646328702124015446546387565036822621, time:1750768060.3915617s req_ids:[8] -DEBUG 06-24 20:27:40 [manager.py:391] -ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:208.7557315826416ms total_cost_time:208.8010311126709ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11646 prompt_cache_len:5151 prompt_cache_ratio:0.44229778464708913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 -DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:40 [batch.py:51] router release req id 8 -INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.11104679107666016 s -INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.11320042610168457 s -DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=62268459116684508482025122141809193249, time:1750768060.6072395s req_ids:[8] -DEBUG 06-24 20:27:40 [manager.py:391] -ERROR 06-24 20:27:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:203.5071849822998ms total_cost_time:203.5524845123291ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11647 prompt_cache_len:5151 prompt_cache_ratio:0.4422598093929767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 -DEBUG 06-24 20:27:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:40 [batch.py:51] router release req id 8 -INFO 06-24 20:27:40 [manager.py:224] router recive req id 8 cost time 0.10855650901794434 s -INFO 06-24 20:27:40 [manager.py:68] detokenization recv req id 8 cost time 0.11055922508239746 s -DEBUG 06-24 20:27:40 [manager.py:391] Prefill Batch: batch_id=61079119373762392419367431873905208071, time:1750768060.8220289s req_ids:[8] -DEBUG 06-24 20:27:40 [manager.py:391] -ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:40 lightllm_req_id:8 first_token_cost:376.1124610900879ms total_cost_time:376.1570453643799ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11648 prompt_cache_len:5151 prompt_cache_ratio:0.44222184065934067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 -DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:41 [batch.py:51] router release req id 8 -INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.10858726501464844 s -INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.11051774024963379 s -DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=299593004177238963388715736403263946494, time:1750768061.1999803s req_ids:[8] -DEBUG 06-24 20:27:41 [manager.py:391] -ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:209.85984802246094ms total_cost_time:209.90347862243652ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11649 prompt_cache_len:5151 prompt_cache_ratio:0.4421838784445017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 -DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:41 [batch.py:51] router release req id 8 -INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.1081840991973877 s -INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.1102457046508789 s -DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=316781868197800707089498737181832943379, time:1750768061.416961s req_ids:[8] -DEBUG 06-24 20:27:41 [manager.py:391] -ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:206.64405822753906ms total_cost_time:206.68721199035645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11650 prompt_cache_len:5151 prompt_cache_ratio:0.4421459227467811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 -DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:41 [batch.py:51] router release req id 8 -INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.10854816436767578 s -INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.1105797290802002 s -DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=270251469595641011912024118746084410715, time:1750768061.6294148s req_ids:[8] -DEBUG 06-24 20:27:41 [manager.py:391] -ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:208.46080780029297ms total_cost_time:208.50443840026855ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11651 prompt_cache_len:5151 prompt_cache_ratio:0.4421079735645009 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 -DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:41 [batch.py:51] router release req id 8 -INFO 06-24 20:27:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:41 [manager.py:224] router recive req id 8 cost time 0.10954117774963379 s -INFO 06-24 20:27:41 [manager.py:68] detokenization recv req id 8 cost time 0.11166214942932129 s -DEBUG 06-24 20:27:41 [manager.py:391] Prefill Batch: batch_id=204731399901107882064521222102199610353, time:1750768061.8463502s req_ids:[8] -DEBUG 06-24 20:27:41 [manager.py:391] -DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:210.57462692260742ms total_cost_time:210.62016487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11652 prompt_cache_len:5151 prompt_cache_ratio:0.4420700308959835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 -DEBUG 06-24 20:27:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:41 [batch.py:51] router release req id 8 -INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.10795974731445312 s -INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.10989904403686523 s -DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=257933142827629647630890189838073963588, time:1750768062.0591762s req_ids:[8] -DEBUG 06-24 20:27:42 [manager.py:391] -ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:41 lightllm_req_id:8 first_token_cost:207.16476440429688ms total_cost_time:207.20720291137695ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11653 prompt_cache_len:5151 prompt_cache_ratio:0.4420320947395521 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 -DEBUG 06-24 20:27:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:42 [batch.py:51] router release req id 8 -INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.10914087295532227 s -INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.1111302375793457 s -DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=17937069243415260362399538069278821502, time:1750768062.2755115s req_ids:[8] -DEBUG 06-24 20:27:42 [manager.py:391] -ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 first_token_cost:210.28399467468262ms total_cost_time:210.32953262329102ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11654 prompt_cache_len:5151 prompt_cache_ratio:0.4419941650935301 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 -DEBUG 06-24 20:27:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:42 [batch.py:51] router release req id 8 -INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.10788989067077637 s -INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.10980486869812012 s -DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=312647226188170999033993674702095941198, time:1750768062.4916286s req_ids:[8] -DEBUG 06-24 20:27:42 [manager.py:391] -ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 first_token_cost:207.88979530334473ms total_cost_time:207.9324722290039ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11655 prompt_cache_len:5151 prompt_cache_ratio:0.44195624195624195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 -DEBUG 06-24 20:27:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:42 [batch.py:51] router release req id 8 -INFO 06-24 20:27:42 [manager.py:224] router recive req id 8 cost time 0.30853700637817383 s -INFO 06-24 20:27:42 [manager.py:68] detokenization recv req id 8 cost time 0.31052422523498535 s -DEBUG 06-24 20:27:42 [manager.py:391] Prefill Batch: batch_id=299740526944787088239288135503753498887, time:1750768062.9138064s req_ids:[8] -DEBUG 06-24 20:27:42 [manager.py:391] -ERROR 06-24 20:27:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:42 lightllm_req_id:8 first_token_cost:407.29713439941406ms total_cost_time:407.34100341796875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11656 prompt_cache_len:5151 prompt_cache_ratio:0.44191832532601233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 -DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:43 [batch.py:51] router release req id 8 -INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10860013961791992 s -INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.11050987243652344 s -DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=337446504550859803029660026187667420749, time:1750768063.1204126s req_ids:[8] -DEBUG 06-24 20:27:43 [manager.py:391] -ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:207.14616775512695ms total_cost_time:207.19051361083984ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11657 prompt_cache_len:5151 prompt_cache_ratio:0.44188041520116667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 -DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:43 [batch.py:51] router release req id 8 -INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10731959342956543 s -INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.10938763618469238 s -DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=130485175085857028088939489140531399497, time:1750768063.3351586s req_ids:[8] -DEBUG 06-24 20:27:43 [manager.py:391] -ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:210.31713485717773ms total_cost_time:210.36338806152344ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11658 prompt_cache_len:5151 prompt_cache_ratio:0.4418425115800309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 -DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:43 [batch.py:51] router release req id 8 -INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.1075592041015625 s -INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.10967016220092773 s -DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=11096509143887670593093691096838479192, time:1750768063.551984s req_ids:[8] -DEBUG 06-24 20:27:43 [manager.py:391] -ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:209.15889739990234ms total_cost_time:209.20157432556152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11659 prompt_cache_len:5151 prompt_cache_ratio:0.44180461446093144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 -DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:43 [batch.py:51] router release req id 8 -INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10948467254638672 s -INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.11144590377807617 s -DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=122142627285848471446769339946885118300, time:1750768063.7694595s req_ids:[8] -DEBUG 06-24 20:27:43 [manager.py:391] -ERROR 06-24 20:27:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:208.31775665283203ms total_cost_time:208.36186408996582ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11660 prompt_cache_len:5151 prompt_cache_ratio:0.44176672384219556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 -DEBUG 06-24 20:27:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:43 [batch.py:51] router release req id 8 -INFO 06-24 20:27:43 [manager.py:224] router recive req id 8 cost time 0.10779523849487305 s -INFO 06-24 20:27:43 [manager.py:68] detokenization recv req id 8 cost time 0.10981059074401855 s -DEBUG 06-24 20:27:43 [manager.py:391] Prefill Batch: batch_id=314066956482224143493314398315864182556, time:1750768063.9817142s req_ids:[8] -DEBUG 06-24 20:27:43 [manager.py:391] -ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:43 lightllm_req_id:8 first_token_cost:208.05811882019043ms total_cost_time:208.1010341644287ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11661 prompt_cache_len:5151 prompt_cache_ratio:0.44172883972215077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 -DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:44 [batch.py:51] router release req id 8 -INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.10868573188781738 s -INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.11003851890563965 s -DEBUG 06-24 20:27:44 [manager.py:391] Prefill Batch: batch_id=43716856319837955467461962905487194665, time:1750768064.198382s req_ids:[8] -DEBUG 06-24 20:27:44 [manager.py:391] -DEBUG 06-24 20:27:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 49567.098 tokens/s -DEBUG 06-24 20:27:44 [stats.py:37] Avg prompt tokens throughput: 49558.683 tokens/s -DEBUG 06-24 20:27:44 [stats.py:37] Avg generate tokens throughput: 8.416 tokens/s -ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:367.81787872314453ms total_cost_time:367.8615093231201ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11662 prompt_cache_len:5151 prompt_cache_ratio:0.44169096209912534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 -DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:44 [batch.py:51] router release req id 8 -INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.10796308517456055 s -INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.10994935035705566 s -DEBUG 06-24 20:27:44 [manager.py:391] Prefill Batch: batch_id=265482422708807595817312816396387340617, time:1750768064.5722618s req_ids:[8] -DEBUG 06-24 20:27:44 [manager.py:391] -ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:210.59942245483398ms total_cost_time:210.6456756591797ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11663 prompt_cache_len:5151 prompt_cache_ratio:0.44165309097144817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 -DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:44 [batch.py:51] router release req id 8 -INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.1072998046875 s -INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.10930252075195312 s -DEBUG 06-24 20:27:44 [manager.py:391] Prefill Batch: batch_id=140715960182771354796120416256631862347, time:1750768064.7888014s req_ids:[8] -DEBUG 06-24 20:27:44 [manager.py:391] -ERROR 06-24 20:27:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:209.71083641052246ms total_cost_time:209.7342014312744ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:11664 prompt_cache_len:5151 prompt_cache_ratio:0.44161522633744854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 -DEBUG 06-24 20:27:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:44 [batch.py:51] router release req id 8 -INFO 06-24 20:27:44 [manager.py:224] router recive req id 8 cost time 0.10576009750366211 s -INFO 06-24 20:27:44 [manager.py:68] detokenization recv req id 8 cost time 0.10772371292114258 s -DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=38939872212084040627938081887060034009, time:1750768065.0060368s req_ids:[8] -DEBUG 06-24 20:27:45 [manager.py:391] -ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:44 lightllm_req_id:8 first_token_cost:213.16981315612793ms total_cost_time:213.21368217468262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11665 prompt_cache_len:5151 prompt_cache_ratio:0.4415773681954565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 -DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:45 [batch.py:51] router release req id 8 -INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.10756874084472656 s -INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.10960698127746582 s -DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=121522200574996270201540644699303111517, time:1750768065.221665s req_ids:[8] -DEBUG 06-24 20:27:45 [manager.py:391] -ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:208.81175994873047ms total_cost_time:208.85539054870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11666 prompt_cache_len:5151 prompt_cache_ratio:0.4415395165438025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 -DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:45 [batch.py:51] router release req id 8 -INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.10688066482543945 s -INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.10885500907897949 s -DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=296434976523445833842283782897358076960, time:1750768065.4510627s req_ids:[8] -DEBUG 06-24 20:27:45 [manager.py:391] -ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:227.17595100402832ms total_cost_time:227.2202968597412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11667 prompt_cache_len:5151 prompt_cache_ratio:0.4415016713808177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 -DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:45 [batch.py:51] router release req id 8 -INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.1093912124633789 s -INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.11139631271362305 s -DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=16062869560735165174523373097118204279, time:1750768065.6721177s req_ids:[8] -DEBUG 06-24 20:27:45 [manager.py:391] -ERROR 06-24 20:27:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:212.32008934020996ms total_cost_time:212.36515045166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11668 prompt_cache_len:5151 prompt_cache_ratio:0.44146383270483375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 -DEBUG 06-24 20:27:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:45 [batch.py:51] router release req id 8 -INFO 06-24 20:27:45 [manager.py:224] router recive req id 8 cost time 0.10849952697753906 s -INFO 06-24 20:27:45 [manager.py:68] detokenization recv req id 8 cost time 0.11048078536987305 s -DEBUG 06-24 20:27:45 [manager.py:391] Prefill Batch: batch_id=322430848760108596701433256098931005177, time:1750768065.8877454s req_ids:[8] -DEBUG 06-24 20:27:45 [manager.py:391] -ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:45 lightllm_req_id:8 first_token_cost:374.65786933898926ms total_cost_time:374.70197677612305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11669 prompt_cache_len:5151 prompt_cache_ratio:0.4414260005141829 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 -DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:46 [batch.py:51] router release req id 8 -INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10850763320922852 s -INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.11054825782775879 s -DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=175355592149943647442503956285560439579, time:1750768066.2710464s req_ids:[8] -DEBUG 06-24 20:27:46 [manager.py:391] -ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:211.287260055542ms total_cost_time:211.3327980041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11670 prompt_cache_len:5151 prompt_cache_ratio:0.44138817480719794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 -DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:46 [batch.py:51] router release req id 8 -INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10736536979675293 s -INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.1094813346862793 s -DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=178275448747769996930753701572404118296, time:1750768066.4893088s req_ids:[8] -DEBUG 06-24 20:27:46 [manager.py:391] -ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:210.53099632263184ms total_cost_time:210.57605743408203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11671 prompt_cache_len:5151 prompt_cache_ratio:0.4413503555822123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 -DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:46 [batch.py:51] router release req id 8 -INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10888814926147461 s -INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.11098051071166992 s -DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=104647567097353884675832059871995132250, time:1750768066.7051613s req_ids:[8] -DEBUG 06-24 20:27:46 [manager.py:391] -ERROR 06-24 20:27:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:209.43641662597656ms total_cost_time:209.47909355163574ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11672 prompt_cache_len:5151 prompt_cache_ratio:0.44131254283756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 -DEBUG 06-24 20:27:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:46 [batch.py:51] router release req id 8 -INFO 06-24 20:27:46 [manager.py:224] router recive req id 8 cost time 0.10856199264526367 s -INFO 06-24 20:27:46 [manager.py:68] detokenization recv req id 8 cost time 0.1105494499206543 s -DEBUG 06-24 20:27:46 [manager.py:391] Prefill Batch: batch_id=286136379708286627664648265022458877770, time:1750768066.921517s req_ids:[8] -DEBUG 06-24 20:27:46 [manager.py:391] -ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:46 lightllm_req_id:8 first_token_cost:209.0163230895996ms total_cost_time:209.0601921081543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11673 prompt_cache_len:5151 prompt_cache_ratio:0.44127473657157545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 -INFO 06-24 20:27:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:47 [batch.py:51] router release req id 8 -INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.10799169540405273 s -INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.11010169982910156 s -DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=333092236254768544864245833229603047152, time:1750768067.1354423s req_ids:[8] -DEBUG 06-24 20:27:47 [manager.py:391] -ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:207.83305168151855ms total_cost_time:207.87668228149414ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11674 prompt_cache_len:5151 prompt_cache_ratio:0.4412369367825938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 -DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:47 [batch.py:51] router release req id 8 -INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.10848402976989746 s -INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.11039614677429199 s -DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=242066419917160546179103428843145465146, time:1750768067.3528771s req_ids:[8] -DEBUG 06-24 20:27:47 [manager.py:391] -ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:210.77370643615723ms total_cost_time:210.81948280334473ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11675 prompt_cache_len:5151 prompt_cache_ratio:0.4411991434689507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 -DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:47 [batch.py:51] router release req id 8 -INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.10789918899536133 s -INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.10973668098449707 s -DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=30946175742304727602675341187884965927, time:1750768067.570659s req_ids:[8] -DEBUG 06-24 20:27:47 [manager.py:391] -ERROR 06-24 20:27:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:379.8513412475586ms total_cost_time:379.8949718475342ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11676 prompt_cache_len:5151 prompt_cache_ratio:0.44116135662898254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 -DEBUG 06-24 20:27:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:47 [batch.py:51] router release req id 8 -INFO 06-24 20:27:47 [manager.py:224] router recive req id 8 cost time 0.1092216968536377 s -INFO 06-24 20:27:47 [manager.py:68] detokenization recv req id 8 cost time 0.11127710342407227 s -DEBUG 06-24 20:27:47 [manager.py:391] Prefill Batch: batch_id=73313771434464797223728399194716805674, time:1750768067.9539516s req_ids:[8] -DEBUG 06-24 20:27:47 [manager.py:391] -DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:47 lightllm_req_id:8 first_token_cost:209.05041694641113ms total_cost_time:209.09523963928223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11677 prompt_cache_len:5151 prompt_cache_ratio:0.44112357626102594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 -DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:48 [batch.py:51] router release req id 8 -INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.10930585861206055 s -INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.11123347282409668 s -DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=288001262650343206408843595607394306170, time:1750768068.1796744s req_ids:[8] -DEBUG 06-24 20:27:48 [manager.py:391] -ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:220.64876556396484ms total_cost_time:220.69406509399414ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11678 prompt_cache_len:5151 prompt_cache_ratio:0.4410858023634184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 -DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:48 [batch.py:51] router release req id 8 -INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.1083071231842041 s -INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.11049509048461914 s -DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=26752544059038432113733400857799309496, time:1750768068.3982942s req_ids:[8] -DEBUG 06-24 20:27:48 [manager.py:391] -ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:211.79533004760742ms total_cost_time:211.84110641479492ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11679 prompt_cache_len:5151 prompt_cache_ratio:0.4410480349344978 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 -DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:48 [batch.py:51] router release req id 8 -INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.10710597038269043 s -INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.10921287536621094 s -DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=217638077499561130651481766754466672082, time:1750768068.615498s req_ids:[8] -DEBUG 06-24 20:27:48 [manager.py:391] -ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:217.72456169128418ms total_cost_time:217.76986122131348ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11680 prompt_cache_len:5151 prompt_cache_ratio:0.44101027397260273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 -DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:48 [batch.py:51] router release req id 8 -INFO 06-24 20:27:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:48 [manager.py:224] router recive req id 8 cost time 0.1086277961730957 s -INFO 06-24 20:27:48 [manager.py:68] detokenization recv req id 8 cost time 0.11078357696533203 s -DEBUG 06-24 20:27:48 [manager.py:391] Prefill Batch: batch_id=183484381546618447873154783003017080773, time:1750768068.842046s req_ids:[8] -DEBUG 06-24 20:27:48 [manager.py:391] -ERROR 06-24 20:27:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:216.7809009552002ms total_cost_time:216.80068969726562ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11681 prompt_cache_len:5151 prompt_cache_ratio:0.44097251947607224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 -DEBUG 06-24 20:27:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:48 [batch.py:51] router release req id 8 -INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10581374168395996 s -INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.10769104957580566 s -DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=166073478561886356450542614144698214248, time:1750768069.06763s req_ids:[8] -DEBUG 06-24 20:27:49 [manager.py:391] -ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:48 lightllm_req_id:8 first_token_cost:213.01507949829102ms total_cost_time:213.06085586547852ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11682 prompt_cache_len:5151 prompt_cache_ratio:0.440934771443246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 -DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:49 [batch.py:51] router release req id 8 -INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10723304748535156 s -INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.10907983779907227 s -DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=227622986544389012314626857110713569781, time:1750768069.2822154s req_ids:[8] -DEBUG 06-24 20:27:49 [manager.py:391] -ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:377.5901794433594ms total_cost_time:377.63381004333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11683 prompt_cache_len:5151 prompt_cache_ratio:0.44089702987246426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 -DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:49 [batch.py:51] router release req id 8 -DEBUG 06-24 20:27:49 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:49 [manager.py:283] -DEBUG 06-24 20:27:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:49 [manager.py:284] -INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10852766036987305 s -INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.11050963401794434 s -DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=191548575499890481294140533313106696880, time:1750768069.6664155s req_ids:[8] -DEBUG 06-24 20:27:49 [manager.py:391] -ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:202.59404182434082ms total_cost_time:202.6379108428955ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11684 prompt_cache_len:5151 prompt_cache_ratio:0.4408592947620678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 -DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:49 [batch.py:51] router release req id 8 -INFO 06-24 20:27:49 [manager.py:224] router recive req id 8 cost time 0.10742926597595215 s -INFO 06-24 20:27:49 [manager.py:68] detokenization recv req id 8 cost time 0.10953426361083984 s -DEBUG 06-24 20:27:49 [manager.py:391] Prefill Batch: batch_id=43187319509054787744039514623587645003, time:1750768069.872239s req_ids:[8] -DEBUG 06-24 20:27:49 [manager.py:391] -ERROR 06-24 20:27:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:200.2246379852295ms total_cost_time:200.26874542236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11685 prompt_cache_len:5151 prompt_cache_ratio:0.44082156611039797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 -DEBUG 06-24 20:27:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:49 [batch.py:51] router release req id 8 -INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10787844657897949 s -INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.10983443260192871 s -DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=298384835260827150318491690840360067745, time:1750768070.0794733s req_ids:[8] -DEBUG 06-24 20:27:50 [manager.py:391] -ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:49 lightllm_req_id:8 first_token_cost:206.43162727355957ms total_cost_time:206.47644996643066ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11686 prompt_cache_len:5151 prompt_cache_ratio:0.44078384391579667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 -DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:50 [batch.py:51] router release req id 8 -INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10764265060424805 s -INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.10953140258789062 s -DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=225063350445177027224107117064696904193, time:1750768070.2917519s req_ids:[8] -DEBUG 06-24 20:27:50 [manager.py:391] -ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:207.49378204345703ms total_cost_time:207.54003524780273ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11687 prompt_cache_len:5151 prompt_cache_ratio:0.44074612817660647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 -DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:50 [batch.py:51] router release req id 8 -INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10875129699707031 s -INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s -DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=137225826317094076687923812881092625321, time:1750768070.506479s req_ids:[8] -DEBUG 06-24 20:27:50 [manager.py:391] -ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:208.59742164611816ms total_cost_time:208.64105224609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11688 prompt_cache_len:5151 prompt_cache_ratio:0.4407084188911704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 -DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:50 [batch.py:51] router release req id 8 -INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s -INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.1105647087097168 s -DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=80187774504198863934088397354980675605, time:1750768070.7215064s req_ids:[8] -DEBUG 06-24 20:27:50 [manager.py:391] -ERROR 06-24 20:27:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:206.7418098449707ms total_cost_time:206.7849636077881ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11689 prompt_cache_len:5151 prompt_cache_ratio:0.44067071605783215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 -DEBUG 06-24 20:27:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:50 [batch.py:51] router release req id 8 -INFO 06-24 20:27:50 [manager.py:224] router recive req id 8 cost time 0.10505175590515137 s -INFO 06-24 20:27:50 [manager.py:68] detokenization recv req id 8 cost time 0.10721921920776367 s -DEBUG 06-24 20:27:50 [manager.py:391] Prefill Batch: batch_id=147825844061204254257736059546139281692, time:1750768070.9347677s req_ids:[8] -DEBUG 06-24 20:27:50 [manager.py:391] -ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:50 lightllm_req_id:8 first_token_cost:384.75513458251953ms total_cost_time:384.8001956939697ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11690 prompt_cache_len:5151 prompt_cache_ratio:0.44063301967493584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 -DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:51 [batch.py:51] router release req id 8 -INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.10833454132080078 s -INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.11092710494995117 s -DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=47780690576181283297400113084221155824, time:1750768071.3247705s req_ids:[8] -DEBUG 06-24 20:27:51 [manager.py:391] -ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:205.37257194519043ms total_cost_time:205.4157257080078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11691 prompt_cache_len:5151 prompt_cache_ratio:0.44059532974082627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 -DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:51 [batch.py:51] router release req id 8 -INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.1075129508972168 s -INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.10955381393432617 s -DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=192028328961669777164971296976631988864, time:1750768071.5369785s req_ids:[8] -DEBUG 06-24 20:27:51 [manager.py:391] -ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:207.37075805664062ms total_cost_time:207.41510391235352ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11692 prompt_cache_len:5151 prompt_cache_ratio:0.4405576462538488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 -DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:51 [batch.py:51] router release req id 8 -INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.10739350318908691 s -INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.10927605628967285 s -DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=226644578963362568769550394731141433005, time:1750768071.7513175s req_ids:[8] -DEBUG 06-24 20:27:51 [manager.py:391] -ERROR 06-24 20:27:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:209.96475219726562ms total_cost_time:210.00981330871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11693 prompt_cache_len:5151 prompt_cache_ratio:0.44051996921234926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 -DEBUG 06-24 20:27:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:51 [batch.py:51] router release req id 8 -INFO 06-24 20:27:51 [manager.py:224] router recive req id 8 cost time 0.10618782043457031 s -INFO 06-24 20:27:51 [manager.py:68] detokenization recv req id 8 cost time 0.10780477523803711 s -DEBUG 06-24 20:27:51 [manager.py:391] Prefill Batch: batch_id=297762266278033280858054259708847652046, time:1750768071.9663367s req_ids:[8] -DEBUG 06-24 20:27:51 [manager.py:391] -ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:51 lightllm_req_id:8 first_token_cost:199.5100975036621ms total_cost_time:199.5542049407959ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11694 prompt_cache_len:5151 prompt_cache_ratio:0.4404822986146742 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 -DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:52 [batch.py:51] router release req id 8 -INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.10714960098266602 s -INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.10897254943847656 s -DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=31195497378965806952722866384171260587, time:1750768072.1725655s req_ids:[8] -DEBUG 06-24 20:27:52 [manager.py:391] -ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:203.4473419189453ms total_cost_time:203.4924030303955ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11695 prompt_cache_len:5151 prompt_cache_ratio:0.4404446344591706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 -DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:52 [batch.py:51] router release req id 8 -INFO 06-24 20:27:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.1074531078338623 s -INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.10943388938903809 s -DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=41961555260971651765030443494808349220, time:1750768072.382983s req_ids:[8] -DEBUG 06-24 20:27:52 [manager.py:391] -ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:206.2997817993164ms total_cost_time:206.3436508178711ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11696 prompt_cache_len:5151 prompt_cache_ratio:0.44040697674418605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 -DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:52 [batch.py:51] router release req id 8 -INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.10802459716796875 s -INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.10960602760314941 s -DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=259764074609678528213901087170832004807, time:1750768072.595922s req_ids:[8] -DEBUG 06-24 20:27:52 [manager.py:391] -ERROR 06-24 20:27:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:375.8530616760254ms total_cost_time:375.90718269348145ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:11697 prompt_cache_len:5151 prompt_cache_ratio:0.44036932546806873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 -DEBUG 06-24 20:27:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:52 [batch.py:51] router release req id 8 -INFO 06-24 20:27:52 [manager.py:224] router recive req id 8 cost time 0.10901856422424316 s -INFO 06-24 20:27:52 [manager.py:68] detokenization recv req id 8 cost time 0.11093354225158691 s -DEBUG 06-24 20:27:52 [manager.py:391] Prefill Batch: batch_id=924113102782929902878716484726148624, time:1750768072.976297s req_ids:[8] -DEBUG 06-24 20:27:52 [manager.py:391] -ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:52 lightllm_req_id:8 first_token_cost:207.92508125305176ms total_cost_time:207.96895027160645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11698 prompt_cache_len:5151 prompt_cache_ratio:0.4403316806291674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 -DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:53 [batch.py:51] router release req id 8 -INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10855627059936523 s -INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.11006307601928711 s -DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=26583854484608831339809600195653965698, time:1750768073.1925404s req_ids:[8] -DEBUG 06-24 20:27:53 [manager.py:391] -INFO 06-24 20:27:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:27:53 [statics_utils.py:24] mean first cost: 228.82940530744267 ms -INFO 06-24 20:27:53 [statics_utils.py:24] mean per token cost: 0.06352233204585778 ms -ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:207.9448699951172ms total_cost_time:207.98921585083008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11699 prompt_cache_len:5151 prompt_cache_ratio:0.4402940422258313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 -DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:53 [batch.py:51] router release req id 8 -INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10805463790893555 s -INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.11008477210998535 s -DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=172343902097966924872925216147226074346, time:1750768073.4061286s req_ids:[8] -DEBUG 06-24 20:27:53 [manager.py:391] -ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:211.3957405090332ms total_cost_time:211.4405632019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11700 prompt_cache_len:5151 prompt_cache_ratio:0.44025641025641027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 -DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:53 [batch.py:51] router release req id 8 -INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s -INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.11033892631530762 s -DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=225758045825775236494367558165643973930, time:1750768073.62264s req_ids:[8] -DEBUG 06-24 20:27:53 [manager.py:391] -ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:204.5266628265381ms total_cost_time:204.5729160308838ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11701 prompt_cache_len:5151 prompt_cache_ratio:0.44021878471925474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 -DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:53 [batch.py:51] router release req id 8 -INFO 06-24 20:27:53 [manager.py:224] router recive req id 8 cost time 0.10791921615600586 s -INFO 06-24 20:27:53 [manager.py:68] detokenization recv req id 8 cost time 0.10985159873962402 s -DEBUG 06-24 20:27:53 [manager.py:391] Prefill Batch: batch_id=141120055016717393029069260794318309936, time:1750768073.8354192s req_ids:[8] -DEBUG 06-24 20:27:53 [manager.py:391] -DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:210.10446548461914ms total_cost_time:210.14952659606934ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11702 prompt_cache_len:5151 prompt_cache_ratio:0.44018116561271575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 -DEBUG 06-24 20:27:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:53 [batch.py:51] router release req id 8 -INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.10765218734741211 s -INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10961055755615234 s -DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=7351774205792844063141412301094091625, time:1750768074.0521193s req_ids:[8] -DEBUG 06-24 20:27:54 [manager.py:391] -ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:53 lightllm_req_id:8 first_token_cost:206.79521560668945ms total_cost_time:206.83956146240234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11703 prompt_cache_len:5151 prompt_cache_ratio:0.44014355293514484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 -DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:54 [batch.py:51] router release req id 8 -INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.1075749397277832 s -INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10939621925354004 s -DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=118458180301205669185265281303196868164, time:1750768074.263249s req_ids:[8] -DEBUG 06-24 20:27:54 [manager.py:391] -DEBUG 06-24 20:27:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 48762.511 tokens/s -DEBUG 06-24 20:27:54 [stats.py:37] Avg prompt tokens throughput: 48754.165 tokens/s -DEBUG 06-24 20:27:54 [stats.py:37] Avg generate tokens throughput: 8.346 tokens/s -ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:366.4867877960205ms total_cost_time:366.5306568145752ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11704 prompt_cache_len:5151 prompt_cache_ratio:0.44010594668489406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 -DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:54 [batch.py:51] router release req id 8 -INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.10773992538452148 s -INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10962176322937012 s -DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=322676427113483016485968396162406141022, time:1750768074.6353645s req_ids:[8] -DEBUG 06-24 20:27:54 [manager.py:391] -ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:209.87915992736816ms total_cost_time:209.92350578308105ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11705 prompt_cache_len:5151 prompt_cache_ratio:0.4400683468603161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 -DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:54 [batch.py:51] router release req id 8 -INFO 06-24 20:27:54 [manager.py:224] router recive req id 8 cost time 0.10766053199768066 s -INFO 06-24 20:27:54 [manager.py:68] detokenization recv req id 8 cost time 0.10933947563171387 s -DEBUG 06-24 20:27:54 [manager.py:391] Prefill Batch: batch_id=19948050275451180531787381997900946558, time:1750768074.8507817s req_ids:[8] -DEBUG 06-24 20:27:54 [manager.py:391] -ERROR 06-24 20:27:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:206.1176300048828ms total_cost_time:206.1624526977539ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11706 prompt_cache_len:5151 prompt_cache_ratio:0.4400307534597642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 -DEBUG 06-24 20:27:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:54 [batch.py:51] router release req id 8 -INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10815119743347168 s -INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.11020803451538086 s -DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=283485054329856772655617090060025275979, time:1750768075.065433s req_ids:[8] -DEBUG 06-24 20:27:55 [manager.py:391] -ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:54 lightllm_req_id:8 first_token_cost:202.06379890441895ms total_cost_time:202.10647583007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11707 prompt_cache_len:5151 prompt_cache_ratio:0.4399931664815922 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 -DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:55 [batch.py:51] router release req id 8 -INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10750317573547363 s -INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.1095283031463623 s -DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=178332392696331114758401130756182138498, time:1750768075.274174s req_ids:[8] -DEBUG 06-24 20:27:55 [manager.py:391] -ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:202.8651237487793ms total_cost_time:202.9094696044922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11708 prompt_cache_len:5151 prompt_cache_ratio:0.4399555859241544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 -DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:55 [batch.py:51] router release req id 8 -INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10800290107727051 s -INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.10997557640075684 s -DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=154680509763442937881017367558262132366, time:1750768075.4838557s req_ids:[8] -DEBUG 06-24 20:27:55 [manager.py:391] -ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:207.11183547973633ms total_cost_time:207.1537971496582ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11709 prompt_cache_len:5151 prompt_cache_ratio:0.4399180117858058 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 -DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:55 [batch.py:51] router release req id 8 -INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10807204246520996 s -INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s -DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=125496945061914756773189028925257561314, time:1750768075.696993s req_ids:[8] -DEBUG 06-24 20:27:55 [manager.py:391] -ERROR 06-24 20:27:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:204.20193672180176ms total_cost_time:204.24532890319824ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11710 prompt_cache_len:5151 prompt_cache_ratio:0.4398804440649018 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 -DEBUG 06-24 20:27:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:55 [batch.py:51] router release req id 8 -INFO 06-24 20:27:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:55 [manager.py:224] router recive req id 8 cost time 0.10872483253479004 s -INFO 06-24 20:27:55 [manager.py:68] detokenization recv req id 8 cost time 0.11066985130310059 s -DEBUG 06-24 20:27:55 [manager.py:391] Prefill Batch: batch_id=137413808159458721799519624010656247786, time:1750768075.9059122s req_ids:[8] -DEBUG 06-24 20:27:55 [manager.py:391] -ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:55 lightllm_req_id:8 first_token_cost:359.12036895751953ms total_cost_time:359.1644763946533ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11711 prompt_cache_len:5151 prompt_cache_ratio:0.4398428827597985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 -DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:56 [batch.py:51] router release req id 8 -INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10780763626098633 s -INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.10943412780761719 s -DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=291802753025922360764161142604911838441, time:1750768076.2741618s req_ids:[8] -DEBUG 06-24 20:27:56 [manager.py:391] -ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:210.280179977417ms total_cost_time:210.30688285827637ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:11712 prompt_cache_len:5151 prompt_cache_ratio:0.43980532786885246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 -DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:56 [batch.py:51] router release req id 8 -INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10703682899475098 s -INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.10864973068237305 s -DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=8536152989871503548453711768574260706, time:1750768076.4896755s req_ids:[8] -DEBUG 06-24 20:27:56 [manager.py:391] -ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:209.29598808288574ms total_cost_time:209.34057235717773ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11713 prompt_cache_len:5151 prompt_cache_ratio:0.4397677793904209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 -DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:56 [batch.py:51] router release req id 8 -INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10812187194824219 s -INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.11020874977111816 s -DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=133367602897878706255786749831695528410, time:1750768076.7065353s req_ids:[8] -DEBUG 06-24 20:27:56 [manager.py:391] -ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:206.618070602417ms total_cost_time:206.6507339477539ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:11714 prompt_cache_len:5151 prompt_cache_ratio:0.43973023732286154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 -DEBUG 06-24 20:27:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:56 [batch.py:51] router release req id 8 -INFO 06-24 20:27:56 [manager.py:224] router recive req id 8 cost time 0.10730767250061035 s -INFO 06-24 20:27:56 [manager.py:68] detokenization recv req id 8 cost time 0.1085960865020752 s -DEBUG 06-24 20:27:56 [manager.py:391] Prefill Batch: batch_id=221753444052221687053046271775160557634, time:1750768076.9187286s req_ids:[8] -DEBUG 06-24 20:27:56 [manager.py:391] -ERROR 06-24 20:27:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:167.38629341125488ms total_cost_time:167.43087768554688ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11715 prompt_cache_len:5151 prompt_cache_ratio:0.43969270166453267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 -DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:57 [batch.py:51] router release req id 8 -INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.1071317195892334 s -INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.10866498947143555 s -DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=123114096874721942607245944560030692846, time:1750768077.0925298s req_ids:[8] -DEBUG 06-24 20:27:57 [manager.py:391] -ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:56 lightllm_req_id:8 first_token_cost:167.5724983215332ms total_cost_time:167.6158905029297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11716 prompt_cache_len:5151 prompt_cache_ratio:0.4396551724137931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 -DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:57 [batch.py:51] router release req id 8 -INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.10835003852844238 s -INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s -DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=298595673115991259221302050929734639141, time:1750768077.2649608s req_ids:[8] -DEBUG 06-24 20:27:57 [manager.py:391] -ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:196.00868225097656ms total_cost_time:196.05088233947754ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11717 prompt_cache_len:5151 prompt_cache_ratio:0.4396176495690023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 -DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:57 [batch.py:51] router release req id 8 -INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.10825037956237793 s -INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.11015987396240234 s -DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=109180299173523834166190172675225781427, time:1750768077.4673362s req_ids:[8] -DEBUG 06-24 20:27:57 [manager.py:391] -ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:380.82122802734375ms total_cost_time:380.86748123168945ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11718 prompt_cache_len:5151 prompt_cache_ratio:0.43958013312852023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 -DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:57 [batch.py:51] router release req id 8 -INFO 06-24 20:27:57 [manager.py:224] router recive req id 8 cost time 0.10854721069335938 s -INFO 06-24 20:27:57 [manager.py:68] detokenization recv req id 8 cost time 0.11049175262451172 s -DEBUG 06-24 20:27:57 [manager.py:391] Prefill Batch: batch_id=169985071211483921028627489385777737921, time:1750768077.8557432s req_ids:[8] -DEBUG 06-24 20:27:57 [manager.py:391] -ERROR 06-24 20:27:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:211.68112754821777ms total_cost_time:211.70282363891602ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:11719 prompt_cache_len:5151 prompt_cache_ratio:0.4395426230907074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 -DEBUG 06-24 20:27:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:57 [batch.py:51] router release req id 8 -INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10806989669799805 s -INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10998415946960449 s -DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=17175807067889291154752751360815925362, time:1750768078.073327s req_ids:[8] -DEBUG 06-24 20:27:58 [manager.py:391] -ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:57 lightllm_req_id:8 first_token_cost:211.0602855682373ms total_cost_time:211.1055850982666ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11720 prompt_cache_len:5151 prompt_cache_ratio:0.4395051194539249 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 -DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:58 [batch.py:51] router release req id 8 -INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10856151580810547 s -INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s -DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=268431165464257031607871144566536213502, time:1750768078.2901986s req_ids:[8] -DEBUG 06-24 20:27:58 [manager.py:391] -ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:211.08317375183105ms total_cost_time:211.12799644470215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11721 prompt_cache_len:5151 prompt_cache_ratio:0.4394676222165344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 -DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:58 [batch.py:51] router release req id 8 -INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10759711265563965 s -INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10975933074951172 s -DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=300250570234671355712060741176340386179, time:1750768078.506851s req_ids:[8] -DEBUG 06-24 20:27:58 [manager.py:391] -ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:209.4857692718506ms total_cost_time:209.5315456390381ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11722 prompt_cache_len:5151 prompt_cache_ratio:0.4394301313768981 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 -DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:58 [batch.py:51] router release req id 8 -INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10726428031921387 s -INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10916948318481445 s -DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=141265128740661020313837351670295099714, time:1750768078.7236798s req_ids:[8] -DEBUG 06-24 20:27:58 [manager.py:391] -ERROR 06-24 20:27:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:210.58893203735352ms total_cost_time:210.6337547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11723 prompt_cache_len:5151 prompt_cache_ratio:0.4393926469333788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 -DEBUG 06-24 20:27:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:58 [batch.py:51] router release req id 8 -INFO 06-24 20:27:58 [manager.py:224] router recive req id 8 cost time 0.10782718658447266 s -INFO 06-24 20:27:58 [manager.py:68] detokenization recv req id 8 cost time 0.10967826843261719 s -DEBUG 06-24 20:27:58 [manager.py:391] Prefill Batch: batch_id=247539787347195394274234829680720956274, time:1750768078.9413345s req_ids:[8] -DEBUG 06-24 20:27:58 [manager.py:391] -ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:58 lightllm_req_id:8 first_token_cost:369.11535263061523ms total_cost_time:369.159460067749ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11724 prompt_cache_len:5151 prompt_cache_ratio:0.43935516888433984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 -DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:59 [batch.py:51] router release req id 8 -INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10908937454223633 s -INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.11113405227661133 s -DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=82746934489195862702655088086284734950, time:1750768079.3170936s req_ids:[8] -DEBUG 06-24 20:27:59 [manager.py:391] -ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:207.8683376312256ms total_cost_time:207.91244506835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11725 prompt_cache_len:5151 prompt_cache_ratio:0.439317697228145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 -DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:59 [batch.py:51] router release req id 8 -INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10849928855895996 s -INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.1105189323425293 s -DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=180243610687250467102093286486070958357, time:1750768079.539834s req_ids:[8] -DEBUG 06-24 20:27:59 [manager.py:391] -ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:220.04008293151855ms total_cost_time:220.08609771728516ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11726 prompt_cache_len:5151 prompt_cache_ratio:0.4392802319631588 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 -DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:59 [batch.py:51] router release req id 8 -INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10826921463012695 s -INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.11030960083007812 s -DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=41800577272912925396776910926254179331, time:1750768079.7587826s req_ids:[8] -DEBUG 06-24 20:27:59 [manager.py:391] -DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:27:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:27:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:27:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:27:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:211.1492156982422ms total_cost_time:211.19213104248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11727 prompt_cache_len:5151 prompt_cache_ratio:0.43924277308774623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:27:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 -DEBUG 06-24 20:27:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:27:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:27:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:27:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:27:59 [batch.py:51] router release req id 8 -INFO 06-24 20:27:59 [manager.py:224] router recive req id 8 cost time 0.10863304138183594 s -INFO 06-24 20:27:59 [manager.py:68] detokenization recv req id 8 cost time 0.11061239242553711 s -DEBUG 06-24 20:27:59 [manager.py:391] Prefill Batch: batch_id=193009151310181925319208914731340858143, time:1750768079.9938014s req_ids:[8] -DEBUG 06-24 20:27:59 [manager.py:391] -ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:27:59 lightllm_req_id:8 first_token_cost:233.74342918395996ms total_cost_time:233.78825187683105ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11728 prompt_cache_len:5151 prompt_cache_ratio:0.43920532060027284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 -DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:00 [batch.py:51] router release req id 8 -INFO 06-24 20:28:00 [manager.py:224] router recive req id 8 cost time 0.10854983329772949 s -INFO 06-24 20:28:00 [manager.py:68] detokenization recv req id 8 cost time 0.11054039001464844 s -DEBUG 06-24 20:28:00 [manager.py:391] Prefill Batch: batch_id=137931543481817374150475295196355490582, time:1750768080.2216394s req_ids:[8] -DEBUG 06-24 20:28:00 [manager.py:391] -ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:218.90759468078613ms total_cost_time:218.95146369934082ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11729 prompt_cache_len:5151 prompt_cache_ratio:0.43916787449910477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 -DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:00 [batch.py:51] router release req id 8 -INFO 06-24 20:28:00 [manager.py:224] router recive req id 8 cost time 0.10710811614990234 s -INFO 06-24 20:28:00 [manager.py:68] detokenization recv req id 8 cost time 0.10911822319030762 s -DEBUG 06-24 20:28:00 [manager.py:391] Prefill Batch: batch_id=16887711482166531621416952486146028622, time:1750768080.4509778s req_ids:[8] -DEBUG 06-24 20:28:00 [manager.py:391] -ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:222.98216819763184ms total_cost_time:223.02722930908203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11730 prompt_cache_len:5151 prompt_cache_ratio:0.4391304347826087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 -DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:00 [batch.py:51] router release req id 8 -INFO 06-24 20:28:00 [manager.py:224] router recive req id 8 cost time 0.10858631134033203 s -INFO 06-24 20:28:00 [manager.py:68] detokenization recv req id 8 cost time 0.11063623428344727 s -DEBUG 06-24 20:28:00 [manager.py:391] Prefill Batch: batch_id=219186160019658772655613434929116824641, time:1750768080.6715834s req_ids:[8] -DEBUG 06-24 20:28:00 [manager.py:391] -ERROR 06-24 20:28:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:377.87652015686035ms total_cost_time:377.92134284973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11731 prompt_cache_len:5151 prompt_cache_ratio:0.4390930014491518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 -DEBUG 06-24 20:28:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:00 [batch.py:51] router release req id 8 -INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10775876045227051 s -INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.10984969139099121 s -DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=232963095576801606711003416079581040665, time:1750768081.054746s req_ids:[8] -DEBUG 06-24 20:28:01 [manager.py:391] -ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:00 lightllm_req_id:8 first_token_cost:208.6923122406006ms total_cost_time:208.73618125915527ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11732 prompt_cache_len:5151 prompt_cache_ratio:0.4390555744971019 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 -DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:01 [batch.py:51] router release req id 8 -INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10822010040283203 s -INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.11041378974914551 s -DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=22902933349116449031905180184210097322, time:1750768081.2850924s req_ids:[8] -DEBUG 06-24 20:28:01 [manager.py:391] -ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:225.97765922546387ms total_cost_time:226.02295875549316ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11733 prompt_cache_len:5151 prompt_cache_ratio:0.4390181539248274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 -DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:01 [batch.py:51] router release req id 8 -INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.1077873706817627 s -INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.10933446884155273 s -DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=302437762865757099324525960350452340073, time:1750768081.5047631s req_ids:[8] -DEBUG 06-24 20:28:01 [manager.py:391] -ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:209.60259437561035ms total_cost_time:209.64598655700684ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11734 prompt_cache_len:5151 prompt_cache_ratio:0.4389807397306971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 -DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:01 [batch.py:51] router release req id 8 -INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10709691047668457 s -INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.10917520523071289 s -DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=273676361814544893762249845273690597355, time:1750768081.721005s req_ids:[8] -DEBUG 06-24 20:28:01 [manager.py:391] -ERROR 06-24 20:28:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:213.23156356811523ms total_cost_time:213.27590942382812ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11735 prompt_cache_len:5151 prompt_cache_ratio:0.43894333191308055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 -DEBUG 06-24 20:28:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:01 [batch.py:51] router release req id 8 -INFO 06-24 20:28:01 [manager.py:224] router recive req id 8 cost time 0.10862922668457031 s -INFO 06-24 20:28:01 [manager.py:68] detokenization recv req id 8 cost time 0.11074018478393555 s -DEBUG 06-24 20:28:01 [manager.py:391] Prefill Batch: batch_id=277230823868757438219625573536573640479, time:1750768081.9398036s req_ids:[8] -DEBUG 06-24 20:28:01 [manager.py:391] -ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:01 lightllm_req_id:8 first_token_cost:211.05599403381348ms total_cost_time:211.09986305236816ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11736 prompt_cache_len:5151 prompt_cache_ratio:0.43890593047034765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 -DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:02 [batch.py:51] router release req id 8 -INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.10837483406066895 s -INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.11041378974914551 s -DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=13659204043033578966617922654145241138, time:1750768082.1585457s req_ids:[8] -DEBUG 06-24 20:28:02 [manager.py:391] -ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:208.28723907470703ms total_cost_time:208.33301544189453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11737 prompt_cache_len:5151 prompt_cache_ratio:0.43886853540086906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 -DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:02 [batch.py:51] router release req id 8 -INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.1074528694152832 s -INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.1095428466796875 s -DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=216741883326970456432805906282972959813, time:1750768082.3714192s req_ids:[8] -DEBUG 06-24 20:28:02 [manager.py:391] -ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:363.6476993560791ms total_cost_time:363.6929988861084ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11738 prompt_cache_len:5151 prompt_cache_ratio:0.4388311467030159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 -DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:02 [batch.py:51] router release req id 8 -INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.10821032524108887 s -INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s -DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=199879998557326096911081695023540657160, time:1750768082.7401228s req_ids:[8] -DEBUG 06-24 20:28:02 [manager.py:391] -ERROR 06-24 20:28:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:207.2005271911621ms total_cost_time:207.2451114654541ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11739 prompt_cache_len:5151 prompt_cache_ratio:0.4387937643751597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 -DEBUG 06-24 20:28:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:02 [batch.py:51] router release req id 8 -INFO 06-24 20:28:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:02 [manager.py:224] router recive req id 8 cost time 0.10881209373474121 s -INFO 06-24 20:28:02 [manager.py:68] detokenization recv req id 8 cost time 0.11099910736083984 s -DEBUG 06-24 20:28:02 [manager.py:391] Prefill Batch: batch_id=141765117333804516487137733271404625967, time:1750768082.9591322s req_ids:[8] -DEBUG 06-24 20:28:02 [manager.py:391] -ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:02 lightllm_req_id:8 first_token_cost:215.6517505645752ms total_cost_time:215.69538116455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11740 prompt_cache_len:5151 prompt_cache_ratio:0.43875638841567294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 -DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:03 [batch.py:51] router release req id 8 -INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10770082473754883 s -INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.1098775863647461 s -DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=116470087494442029273376740998088900323, time:1750768083.1755373s req_ids:[8] -DEBUG 06-24 20:28:03 [manager.py:391] -ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.3677520751953ms total_cost_time:209.4120979309082ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11741 prompt_cache_len:5151 prompt_cache_ratio:0.4387190188229282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 -DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:03 [batch.py:51] router release req id 8 -INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10846614837646484 s -INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.11050963401794434 s -DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=174600575084647101509398252152531689411, time:1750768083.3925076s req_ids:[8] -DEBUG 06-24 20:28:03 [manager.py:391] -ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.31291580200195ms total_cost_time:209.35654640197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11742 prompt_cache_len:5151 prompt_cache_ratio:0.4386816555952989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 -DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:03 [batch.py:51] router release req id 8 -INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10750269889831543 s -INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.10956215858459473 s -DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=171084241270683905995775587476250109729, time:1750768083.6075315s req_ids:[8] -DEBUG 06-24 20:28:03 [manager.py:391] -ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.5780372619629ms total_cost_time:209.6245288848877ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11743 prompt_cache_len:5151 prompt_cache_ratio:0.438644298731159 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 -DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:03 [batch.py:51] router release req id 8 -INFO 06-24 20:28:03 [manager.py:224] router recive req id 8 cost time 0.10937333106994629 s -INFO 06-24 20:28:03 [manager.py:68] detokenization recv req id 8 cost time 0.11142587661743164 s -DEBUG 06-24 20:28:03 [manager.py:391] Prefill Batch: batch_id=279051663761698478629226270718037497749, time:1750768083.8243406s req_ids:[8] -DEBUG 06-24 20:28:03 [manager.py:391] -ERROR 06-24 20:28:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:209.62238311767578ms total_cost_time:209.66458320617676ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11744 prompt_cache_len:5151 prompt_cache_ratio:0.43860694822888285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 -DEBUG 06-24 20:28:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:03 [batch.py:51] router release req id 8 -INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.1070394515991211 s -INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.1087961196899414 s -DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=39357457925060152693190935701929217526, time:1750768084.039799s req_ids:[8] -DEBUG 06-24 20:28:04 [manager.py:391] -ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:28:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 47960.012 tokens/s -DEBUG 06-24 20:28:04 [stats.py:37] Avg prompt tokens throughput: 47951.733 tokens/s -DEBUG 06-24 20:28:04 [stats.py:37] Avg generate tokens throughput: 8.279 tokens/s -INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:03 lightllm_req_id:8 first_token_cost:364.18843269348145ms total_cost_time:364.23325538635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11745 prompt_cache_len:5151 prompt_cache_ratio:0.4385696040868455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 -DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:04 [batch.py:51] router release req id 8 -INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.10875272750854492 s -INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.11075687408447266 s -DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=248365660691166201675798718354588871138, time:1750768084.411614s req_ids:[8] -DEBUG 06-24 20:28:04 [manager.py:391] -ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:209.43021774291992ms total_cost_time:209.4733715057373ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11746 prompt_cache_len:5151 prompt_cache_ratio:0.43853226630342246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 -DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:04 [batch.py:51] router release req id 8 -INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s -INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.10946536064147949 s -DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=219002505410073411822065592928498680227, time:1750768084.6277595s req_ids:[8] -DEBUG 06-24 20:28:04 [manager.py:391] -ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:209.32602882385254ms total_cost_time:209.36846733093262ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11747 prompt_cache_len:5151 prompt_cache_ratio:0.4384949348769899 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 -DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:04 [batch.py:51] router release req id 8 -INFO 06-24 20:28:04 [manager.py:224] router recive req id 8 cost time 0.10737466812133789 s -INFO 06-24 20:28:04 [manager.py:68] detokenization recv req id 8 cost time 0.10939621925354004 s -DEBUG 06-24 20:28:04 [manager.py:391] Prefill Batch: batch_id=232389293755963417898113195872904166252, time:1750768084.852123s req_ids:[8] -DEBUG 06-24 20:28:04 [manager.py:391] -ERROR 06-24 20:28:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:220.10564804077148ms total_cost_time:220.16215324401855ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11748 prompt_cache_len:5151 prompt_cache_ratio:0.4384576098059244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 -DEBUG 06-24 20:28:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:04 [batch.py:51] router release req id 8 -INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.10694551467895508 s -INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.10907530784606934 s -DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=16345388320489121440813828911658635315, time:1750768085.0783532s req_ids:[8] -DEBUG 06-24 20:28:05 [manager.py:391] -ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:04 lightllm_req_id:8 first_token_cost:216.4173126220703ms total_cost_time:216.461181640625ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11749 prompt_cache_len:5151 prompt_cache_ratio:0.4384202910886033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 -DEBUG 06-24 20:28:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:05 [batch.py:51] router release req id 8 -INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.1083076000213623 s -INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.11056995391845703 s -DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=185321277351292029103244790367876229750, time:1750768085.292266s req_ids:[8] -DEBUG 06-24 20:28:05 [manager.py:391] -ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:209.48219299316406ms total_cost_time:209.52701568603516ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11750 prompt_cache_len:5151 prompt_cache_ratio:0.43838297872340426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 -DEBUG 06-24 20:28:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:05 [batch.py:51] router release req id 8 -INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.10714459419250488 s -INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.1092221736907959 s -DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=237472826402998696739489461193778554670, time:1750768085.5088234s req_ids:[8] -DEBUG 06-24 20:28:05 [manager.py:391] -ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:203.66287231445312ms total_cost_time:203.7060260772705ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11751 prompt_cache_len:5151 prompt_cache_ratio:0.43834567270870567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 -DEBUG 06-24 20:28:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:05 [batch.py:51] router release req id 8 -INFO 06-24 20:28:05 [manager.py:224] router recive req id 8 cost time 0.10820198059082031 s -INFO 06-24 20:28:05 [manager.py:68] detokenization recv req id 8 cost time 0.11025142669677734 s -DEBUG 06-24 20:28:05 [manager.py:391] Prefill Batch: batch_id=198220053094540015205345008884535216068, time:1750768085.7191164s req_ids:[8] -DEBUG 06-24 20:28:05 [manager.py:391] -DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:05 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:373.42143058776855ms total_cost_time:373.47960472106934ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:11752 prompt_cache_len:5151 prompt_cache_ratio:0.43830837304288633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 -DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:06 [batch.py:51] router release req id 8 -INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10702800750732422 s -INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.10899138450622559 s -DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=60737041665867790769967088915342167743, time:1750768086.0985873s req_ids:[8] -DEBUG 06-24 20:28:06 [manager.py:391] -ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:05 lightllm_req_id:8 first_token_cost:209.09714698791504ms total_cost_time:209.14006233215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11753 prompt_cache_len:5151 prompt_cache_ratio:0.4382710797243257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 -DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:06 [batch.py:51] router release req id 8 -INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10803699493408203 s -INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.11010026931762695 s -DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=310576628088192920844417999186137943529, time:1750768086.3162565s req_ids:[8] -DEBUG 06-24 20:28:06 [manager.py:391] -ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:211.1837863922119ms total_cost_time:211.2886905670166ms,out_token_counter:1 mean_per_token_cost_time: 0.1049041748046875ms prompt_token_num:11754 prompt_cache_len:5151 prompt_cache_ratio:0.4382337927514038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 -DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:06 [batch.py:51] router release req id 8 -INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10841774940490723 s -INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.11040496826171875 s -DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=331253815600497617905794142965636441641, time:1750768086.5411499s req_ids:[8] -DEBUG 06-24 20:28:06 [manager.py:391] -ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:219.4066047668457ms total_cost_time:219.46191787719727ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:11755 prompt_cache_len:5151 prompt_cache_ratio:0.43819651212250105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 -DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:06 [batch.py:51] router release req id 8 -INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10896539688110352 s -INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.1110689640045166 s -DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=317551484947557617882648294896856165063, time:1750768086.7597365s req_ids:[8] -DEBUG 06-24 20:28:06 [manager.py:391] -ERROR 06-24 20:28:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:210.3722095489502ms total_cost_time:210.42346954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:11756 prompt_cache_len:5151 prompt_cache_ratio:0.43815923783599864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 -DEBUG 06-24 20:28:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:06 [batch.py:51] router release req id 8 -INFO 06-24 20:28:06 [manager.py:224] router recive req id 8 cost time 0.10811877250671387 s -INFO 06-24 20:28:06 [manager.py:68] detokenization recv req id 8 cost time 0.11023473739624023 s -DEBUG 06-24 20:28:06 [manager.py:391] Prefill Batch: batch_id=194318930416105455896631513294107967569, time:1750768086.9769719s req_ids:[8] -DEBUG 06-24 20:28:06 [manager.py:391] -ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:06 lightllm_req_id:8 first_token_cost:209.34247970581055ms total_cost_time:209.38825607299805ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11757 prompt_cache_len:5151 prompt_cache_ratio:0.43812196989027813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 -DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:07 [batch.py:51] router release req id 8 -INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.10828828811645508 s -INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.11034345626831055 s -DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=97768641497861275360251603350891425098, time:1750768087.1934514s req_ids:[8] -DEBUG 06-24 20:28:07 [manager.py:391] -ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:209.4864845275879ms total_cost_time:209.52820777893066ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11758 prompt_cache_len:5151 prompt_cache_ratio:0.4380847082837217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 -DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:07 [batch.py:51] router release req id 8 -INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.209092378616333 s -INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.21081900596618652 s -DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=73182450133938064867049573723693014090, time:1750768087.5421376s req_ids:[8] -DEBUG 06-24 20:28:07 [manager.py:391] -ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:330.7652473449707ms total_cost_time:330.82032203674316ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:11759 prompt_cache_len:5151 prompt_cache_ratio:0.43804745301471215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 -DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:07 [batch.py:51] router release req id 8 -INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.10844659805297852 s -INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.11052298545837402 s -DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=250708015961930964165876165364309421444, time:1750768087.7461414s req_ids:[8] -DEBUG 06-24 20:28:07 [manager.py:391] -ERROR 06-24 20:28:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:208.6927890777588ms total_cost_time:208.73570442199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11760 prompt_cache_len:5151 prompt_cache_ratio:0.43801020408163266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 -DEBUG 06-24 20:28:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:07 [batch.py:51] router release req id 8 -INFO 06-24 20:28:07 [manager.py:224] router recive req id 8 cost time 0.10825395584106445 s -INFO 06-24 20:28:07 [manager.py:68] detokenization recv req id 8 cost time 0.11030960083007812 s -DEBUG 06-24 20:28:07 [manager.py:391] Prefill Batch: batch_id=287084226738558720981629586349275663346, time:1750768087.9614441s req_ids:[8] -DEBUG 06-24 20:28:07 [manager.py:391] -ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:07 lightllm_req_id:8 first_token_cost:213.78350257873535ms total_cost_time:213.8066291809082ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:11761 prompt_cache_len:5151 prompt_cache_ratio:0.43797296148286713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 -DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:08 [batch.py:51] router release req id 8 -INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.10530710220336914 s -INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.1074521541595459 s -DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=323240945704873588669385453196188929316, time:1750768088.181811s req_ids:[8] -DEBUG 06-24 20:28:08 [manager.py:391] -ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:210.31975746154785ms total_cost_time:210.36386489868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11762 prompt_cache_len:5151 prompt_cache_ratio:0.43793572521679985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 -DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:08 [batch.py:51] router release req id 8 -INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.1080317497253418 s -INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s -DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=206130770807291696312720620133921694370, time:1750768088.4068727s req_ids:[8] -DEBUG 06-24 20:28:08 [manager.py:391] -ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:223.92630577087402ms total_cost_time:223.9689826965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11763 prompt_cache_len:5151 prompt_cache_ratio:0.43789849528181585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 -DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:08 [batch.py:51] router release req id 8 -INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.10809636116027832 s -INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.11029219627380371 s -DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=201400141774151527664629195418059646792, time:1750768088.6269028s req_ids:[8] -DEBUG 06-24 20:28:08 [manager.py:391] -ERROR 06-24 20:28:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:209.62977409362793ms total_cost_time:209.6731662750244ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11764 prompt_cache_len:5151 prompt_cache_ratio:0.43786127167630057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 -DEBUG 06-24 20:28:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:08 [batch.py:51] router release req id 8 -INFO 06-24 20:28:08 [manager.py:224] router recive req id 8 cost time 0.10841631889343262 s -INFO 06-24 20:28:08 [manager.py:68] detokenization recv req id 8 cost time 0.11055374145507812 s -DEBUG 06-24 20:28:08 [manager.py:391] Prefill Batch: batch_id=279945297872058651323189462055745554212, time:1750768088.8443935s req_ids:[8] -DEBUG 06-24 20:28:08 [manager.py:391] -ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:08 lightllm_req_id:8 first_token_cost:378.57985496520996ms total_cost_time:378.62610816955566ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11765 prompt_cache_len:5151 prompt_cache_ratio:0.43782405439864003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 -DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:09 [batch.py:51] router release req id 8 -INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s -INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.1104421615600586 s -DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=266711111893400671486174516663542324103, time:1750768089.2280886s req_ids:[8] -DEBUG 06-24 20:28:09 [manager.py:391] -ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:206.74824714660645ms total_cost_time:206.79378509521484ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11766 prompt_cache_len:5151 prompt_cache_ratio:0.4377868434472208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 -DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:09 [batch.py:51] router release req id 8 -INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10811853408813477 s -INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.11001968383789062 s -DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=198777377601040667024239575301316246241, time:1750768089.44658s req_ids:[8] -DEBUG 06-24 20:28:09 [manager.py:391] -ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:213.54246139526367ms total_cost_time:213.58418464660645ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11767 prompt_cache_len:5151 prompt_cache_ratio:0.43774963882043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 -DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:09 [batch.py:51] router release req id 8 -INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10827183723449707 s -INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.1105051040649414 s -DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=179055969670093476718412706870680100848, time:1750768089.6621714s req_ids:[8] -DEBUG 06-24 20:28:09 [manager.py:391] -ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:210.01338958740234ms total_cost_time:210.05678176879883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11768 prompt_cache_len:5151 prompt_cache_ratio:0.4377124405166553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 -DEBUG 06-24 20:28:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:09 [batch.py:51] router release req id 8 -INFO 06-24 20:28:09 [manager.py:224] router recive req id 8 cost time 0.10811710357666016 s -INFO 06-24 20:28:09 [manager.py:68] detokenization recv req id 8 cost time 0.11017036437988281 s -DEBUG 06-24 20:28:09 [manager.py:391] Prefill Batch: batch_id=155224937576569538224866764275714556448, time:1750768089.8765314s req_ids:[8] -DEBUG 06-24 20:28:09 [manager.py:391] -ERROR 06-24 20:28:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:203.39298248291016ms total_cost_time:203.43828201293945ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11769 prompt_cache_len:5151 prompt_cache_ratio:0.43767524853428497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 -DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:10 [batch.py:51] router release req id 8 -INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10827755928039551 s -INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s -DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=172156680615047922087924619668949849756, time:1750768090.098802s req_ids:[8] -DEBUG 06-24 20:28:10 [manager.py:391] -ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:09 lightllm_req_id:8 first_token_cost:221.9533920288086ms total_cost_time:221.99654579162598ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11770 prompt_cache_len:5151 prompt_cache_ratio:0.43763806287170776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 -DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:10 [batch.py:51] router release req id 8 -INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10965251922607422 s -INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11163115501403809 s -DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=70288172948057389566139017981008272936, time:1750768090.3166525s req_ids:[8] -DEBUG 06-24 20:28:10 [manager.py:391] -DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 first_token_cost:205.08360862731934ms total_cost_time:205.12843132019043ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11771 prompt_cache_len:5151 prompt_cache_ratio:0.4376008835273129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 -DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:10 [batch.py:51] router release req id 8 -INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10818147659301758 s -INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11018848419189453 s -DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=245776520327159263872278801652839137733, time:1750768090.5273278s req_ids:[8] -DEBUG 06-24 20:28:10 [manager.py:391] -ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 first_token_cost:367.7351474761963ms total_cost_time:367.779016494751ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11772 prompt_cache_len:5151 prompt_cache_ratio:0.43756371049949033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 -DEBUG 06-24 20:28:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:10 [batch.py:51] router release req id 8 -INFO 06-24 20:28:10 [manager.py:224] router recive req id 8 cost time 0.10880804061889648 s -INFO 06-24 20:28:10 [manager.py:68] detokenization recv req id 8 cost time 0.11085057258605957 s -DEBUG 06-24 20:28:10 [manager.py:391] Prefill Batch: batch_id=151540708380727966336750095080137415759, time:1750768090.901097s req_ids:[8] -DEBUG 06-24 20:28:10 [manager.py:391] -ERROR 06-24 20:28:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:10 lightllm_req_id:8 first_token_cost:208.9221477508545ms total_cost_time:208.96601676940918ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11773 prompt_cache_len:5151 prompt_cache_ratio:0.4375265437866304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 -DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:11 [batch.py:51] router release req id 8 -INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.10836935043334961 s -INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.1105356216430664 s -DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=292828333093874639914792846125745919281, time:1750768091.1174147s req_ids:[8] -DEBUG 06-24 20:28:11 [manager.py:391] -ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:207.2579860687256ms total_cost_time:207.30137825012207ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11774 prompt_cache_len:5151 prompt_cache_ratio:0.4374893833871242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 -DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:11 [batch.py:51] router release req id 8 -INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.10823297500610352 s -INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029839515686035 s -DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=155677717968568859003853376743592313835, time:1750768091.3332522s req_ids:[8] -DEBUG 06-24 20:28:11 [manager.py:391] -ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:211.8062973022461ms total_cost_time:211.8511199951172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11775 prompt_cache_len:5151 prompt_cache_ratio:0.43745222929936306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 -DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:11 [batch.py:51] router release req id 8 -INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.1081235408782959 s -INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.11011362075805664 s -DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=81386659402402937026926758588876546006, time:1750768091.560997s req_ids:[8] -DEBUG 06-24 20:28:11 [manager.py:391] -ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:223.18434715270996ms total_cost_time:223.22845458984375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11776 prompt_cache_len:5151 prompt_cache_ratio:0.43741508152173914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 -DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:11 [batch.py:51] router release req id 8 -INFO 06-24 20:28:11 [manager.py:224] router recive req id 8 cost time 0.1081547737121582 s -INFO 06-24 20:28:11 [manager.py:68] detokenization recv req id 8 cost time 0.11022567749023438 s -DEBUG 06-24 20:28:11 [manager.py:391] Prefill Batch: batch_id=262425262463702254538402996595748333561, time:1750768091.7916586s req_ids:[8] -DEBUG 06-24 20:28:11 [manager.py:391] -ERROR 06-24 20:28:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:224.32518005371094ms total_cost_time:224.36833381652832ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11777 prompt_cache_len:5151 prompt_cache_ratio:0.43737794005264496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 -DEBUG 06-24 20:28:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:11 [batch.py:51] router release req id 8 -INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10784435272216797 s -INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.10989999771118164 s -DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=75346722665892757104002061879379297048, time:1750768092.0109773s req_ids:[8] -DEBUG 06-24 20:28:12 [manager.py:391] -ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:11 lightllm_req_id:8 first_token_cost:210.86382865905762ms total_cost_time:210.9086513519287ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11778 prompt_cache_len:5151 prompt_cache_ratio:0.4373408048904738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 -DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:12 [batch.py:51] router release req id 8 -INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10826396942138672 s -INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.11035728454589844 s -DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=18353629191092810280625008056037171545, time:1750768092.2273655s req_ids:[8] -DEBUG 06-24 20:28:12 [manager.py:391] -ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:364.14384841918945ms total_cost_time:364.18867111206055ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11779 prompt_cache_len:5151 prompt_cache_ratio:0.43730367603361914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 -DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:12 [batch.py:51] router release req id 8 -INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10778522491455078 s -INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.10982370376586914 s -DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=154272610327282765054887513662055465125, time:1750768092.599072s req_ids:[8] -DEBUG 06-24 20:28:12 [manager.py:391] -ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:209.57684516906738ms total_cost_time:209.62023735046387ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11780 prompt_cache_len:5151 prompt_cache_ratio:0.4372665534804754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 -DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:12 [batch.py:51] router release req id 8 -INFO 06-24 20:28:12 [manager.py:224] router recive req id 8 cost time 0.10825753211975098 s -INFO 06-24 20:28:12 [manager.py:68] detokenization recv req id 8 cost time 0.1103217601776123 s -DEBUG 06-24 20:28:12 [manager.py:391] Prefill Batch: batch_id=305688429886744147085185185482314613580, time:1750768092.8162234s req_ids:[8] -DEBUG 06-24 20:28:12 [manager.py:391] -ERROR 06-24 20:28:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:211.07792854309082ms total_cost_time:211.12346649169922ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11781 prompt_cache_len:5151 prompt_cache_ratio:0.43722943722943725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 -DEBUG 06-24 20:28:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:12 [batch.py:51] router release req id 8 -INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.1073305606842041 s -INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.10935115814208984 s -DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=77087504656372173359191015234698670801, time:1750768093.0333297s req_ids:[8] -DEBUG 06-24 20:28:13 [manager.py:391] -ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:12 lightllm_req_id:8 first_token_cost:209.60664749145508ms total_cost_time:209.65123176574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11782 prompt_cache_len:5151 prompt_cache_ratio:0.4371923272789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 -DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:13 [batch.py:51] router release req id 8 -INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.10828351974487305 s -INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.11041808128356934 s -DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=199551473864985628619796616302214400179, time:1750768093.249657s req_ids:[8] -DEBUG 06-24 20:28:13 [manager.py:391] -ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:209.50055122375488ms total_cost_time:209.54489707946777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11783 prompt_cache_len:5151 prompt_cache_ratio:0.4371552236272596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 -DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:13 [batch.py:51] router release req id 8 -INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.10724830627441406 s -INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.1094064712524414 s -DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=303566339384537851143977123240505415120, time:1750768093.4643607s req_ids:[8] -DEBUG 06-24 20:28:13 [manager.py:391] -ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:200.93393325805664ms total_cost_time:200.97684860229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11784 prompt_cache_len:5151 prompt_cache_ratio:0.4371181262729124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 -DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:13 [batch.py:51] router release req id 8 -INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.1077723503112793 s -INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.10993528366088867 s -DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=16969174652299307381901726374040017416, time:1750768093.6806145s req_ids:[8] -DEBUG 06-24 20:28:13 [manager.py:391] -ERROR 06-24 20:28:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:216.7508602142334ms total_cost_time:216.7954444885254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11785 prompt_cache_len:5151 prompt_cache_ratio:0.4370810352142554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 -DEBUG 06-24 20:28:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:13 [batch.py:51] router release req id 8 -INFO 06-24 20:28:13 [manager.py:224] router recive req id 8 cost time 0.10838556289672852 s -INFO 06-24 20:28:13 [manager.py:68] detokenization recv req id 8 cost time 0.11049056053161621 s -DEBUG 06-24 20:28:13 [manager.py:391] Prefill Batch: batch_id=318423791699726281093029830294773070732, time:1750768093.896857s req_ids:[8] -DEBUG 06-24 20:28:13 [manager.py:391] -ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:13 lightllm_req_id:8 first_token_cost:358.80303382873535ms total_cost_time:358.84952545166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11786 prompt_cache_len:5151 prompt_cache_ratio:0.4370439504496861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 -DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:14 [batch.py:51] router release req id 8 -INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.10734319686889648 s -INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.10933423042297363 s -DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=332543445246028786268529534042257138541, time:1750768094.2621803s req_ids:[8] -DEBUG 06-24 20:28:14 [manager.py:391] -ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:28:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 49118.793 tokens/s -DEBUG 06-24 20:28:14 [stats.py:37] Avg prompt tokens throughput: 49110.445 tokens/s -DEBUG 06-24 20:28:14 [stats.py:37] Avg generate tokens throughput: 8.348 tokens/s -INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:206.16579055786133ms total_cost_time:206.19440078735352ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:11787 prompt_cache_len:5151 prompt_cache_ratio:0.43700687197760246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 -DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:14 [batch.py:51] router release req id 8 -INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.10663533210754395 s -INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.10866832733154297 s -DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=237253711178498269077398061694357211369, time:1750768094.4734743s req_ids:[8] -DEBUG 06-24 20:28:14 [manager.py:391] -ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:207.10372924804688ms total_cost_time:207.14902877807617ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11788 prompt_cache_len:5151 prompt_cache_ratio:0.4369697997964031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 -DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:14 [batch.py:51] router release req id 8 -INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.1080009937286377 s -INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s -DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=29883778546119663816269898175348468876, time:1750768094.6860876s req_ids:[8] -DEBUG 06-24 20:28:14 [manager.py:391] -ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:210.22677421569824ms total_cost_time:210.27135848999023ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11789 prompt_cache_len:5151 prompt_cache_ratio:0.4369327339044872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 -DEBUG 06-24 20:28:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:14 [batch.py:51] router release req id 8 -INFO 06-24 20:28:14 [manager.py:224] router recive req id 8 cost time 0.10596108436584473 s -INFO 06-24 20:28:14 [manager.py:68] detokenization recv req id 8 cost time 0.10810160636901855 s -DEBUG 06-24 20:28:14 [manager.py:391] Prefill Batch: batch_id=4516006987389106702425820282185963035, time:1750768094.9009302s req_ids:[8] -DEBUG 06-24 20:28:14 [manager.py:391] -ERROR 06-24 20:28:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:14 lightllm_req_id:8 first_token_cost:207.8700065612793ms total_cost_time:207.9143524169922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11790 prompt_cache_len:5151 prompt_cache_ratio:0.43689567430025444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 -DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:15 [batch.py:51] router release req id 8 -INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.1083531379699707 s -INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.11025500297546387 s -DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=156086780781323452555912073324277609630, time:1750768095.1155972s req_ids:[8] -DEBUG 06-24 20:28:15 [manager.py:391] -ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:209.6271514892578ms total_cost_time:209.6719741821289ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11791 prompt_cache_len:5151 prompt_cache_ratio:0.436858620982105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 -DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:15 [batch.py:51] router release req id 8 -INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.10849714279174805 s -INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.11052966117858887 s -DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=17948471344240426159765086055091613871, time:1750768095.3318112s req_ids:[8] -DEBUG 06-24 20:28:15 [manager.py:391] -ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:208.94932746887207ms total_cost_time:208.99510383605957ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11792 prompt_cache_len:5151 prompt_cache_ratio:0.4368215739484396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 -DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:15 [batch.py:51] router release req id 8 -INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.20846271514892578 s -INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.2101142406463623 s -DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=143071561961617562834656650991597682719, time:1750768095.6811976s req_ids:[8] -DEBUG 06-24 20:28:15 [manager.py:391] -ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:326.51209831237793ms total_cost_time:326.5392780303955ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:11793 prompt_cache_len:5151 prompt_cache_ratio:0.43678453319765964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 -DEBUG 06-24 20:28:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:15 [batch.py:51] router release req id 8 -INFO 06-24 20:28:15 [manager.py:224] router recive req id 8 cost time 0.10681748390197754 s -INFO 06-24 20:28:15 [manager.py:68] detokenization recv req id 8 cost time 0.10880923271179199 s -DEBUG 06-24 20:28:15 [manager.py:391] Prefill Batch: batch_id=139562136213725144745066632861752395607, time:1750768095.8797278s req_ids:[8] -DEBUG 06-24 20:28:15 [manager.py:391] -ERROR 06-24 20:28:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:205.5964469909668ms total_cost_time:205.6412696838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11794 prompt_cache_len:5151 prompt_cache_ratio:0.43674749872816687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 -DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:16 [batch.py:51] router release req id 8 -INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.108245849609375 s -INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.10972833633422852 s -DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=100661841516723656848895906294567805886, time:1750768096.089697s req_ids:[8] -DEBUG 06-24 20:28:16 [manager.py:391] -ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:15 lightllm_req_id:8 first_token_cost:207.30280876159668ms total_cost_time:207.34930038452148ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11795 prompt_cache_len:5151 prompt_cache_ratio:0.4367104705383637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 -DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:16 [batch.py:51] router release req id 8 -INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10730433464050293 s -INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.10918903350830078 s -DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=251771144552519883419114771388866462358, time:1750768096.3046787s req_ids:[8] -DEBUG 06-24 20:28:16 [manager.py:391] -ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:209.13171768188477ms total_cost_time:209.17415618896484ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11796 prompt_cache_len:5151 prompt_cache_ratio:0.4366734486266531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 -DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:16 [batch.py:51] router release req id 8 -INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10874557495117188 s -INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.11082100868225098 s -DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=92744913844185650375440115844941880659, time:1750768096.5200891s req_ids:[8] -DEBUG 06-24 20:28:16 [manager.py:391] -ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:208.48870277404785ms total_cost_time:208.53209495544434ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11797 prompt_cache_len:5151 prompt_cache_ratio:0.4366364329914385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 -DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:16 [batch.py:51] router release req id 8 -INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10706806182861328 s -INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.1091909408569336 s -DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=70582019436436509846692293290874934186, time:1750768096.7407508s req_ids:[8] -DEBUG 06-24 20:28:16 [manager.py:391] -ERROR 06-24 20:28:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:209.6545696258545ms total_cost_time:209.7005844116211ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11798 prompt_cache_len:5151 prompt_cache_ratio:0.4365994236311239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 -DEBUG 06-24 20:28:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:16 [batch.py:51] router release req id 8 -INFO 06-24 20:28:16 [manager.py:224] router recive req id 8 cost time 0.10796332359313965 s -INFO 06-24 20:28:16 [manager.py:68] detokenization recv req id 8 cost time 0.10985374450683594 s -DEBUG 06-24 20:28:16 [manager.py:391] Prefill Batch: batch_id=105109380332829771648179799339229659551, time:1750768096.9502997s req_ids:[8] -DEBUG 06-24 20:28:16 [manager.py:391] -INFO 06-24 20:28:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:16 lightllm_req_id:8 first_token_cost:375.4255771636963ms total_cost_time:375.4580020904541ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:11799 prompt_cache_len:5151 prompt_cache_ratio:0.4365624205441139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 -DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:17 [batch.py:51] router release req id 8 -INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.10823321342468262 s -INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.11028552055358887 s -DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=242688798368276591537192831802729369142, time:1750768097.3388922s req_ids:[8] -DEBUG 06-24 20:28:17 [manager.py:391] -ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:214.60986137390137ms total_cost_time:214.65277671813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11800 prompt_cache_len:5151 prompt_cache_ratio:0.43652542372881353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 -DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:17 [batch.py:51] router release req id 8 -INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s -INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.10983538627624512 s -DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=260862670787389057212463206165816702392, time:1750768097.550916s req_ids:[8] -DEBUG 06-24 20:28:17 [manager.py:391] -ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:199.9659538269043ms total_cost_time:200.00743865966797ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:11801 prompt_cache_len:5151 prompt_cache_ratio:0.4364884331836285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 -DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:17 [batch.py:51] router release req id 8 -INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.1082301139831543 s -INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.11033821105957031 s -DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=117357242846041769403085059704811999069, time:1750768097.7580574s req_ids:[8] -DEBUG 06-24 20:28:17 [manager.py:391] -ERROR 06-24 20:28:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:203.00054550170898ms total_cost_time:203.04250717163086ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:11802 prompt_cache_len:5151 prompt_cache_ratio:0.4364514489069649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 -DEBUG 06-24 20:28:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:17 [batch.py:51] router release req id 8 -INFO 06-24 20:28:17 [manager.py:224] router recive req id 8 cost time 0.10737204551696777 s -INFO 06-24 20:28:17 [manager.py:68] detokenization recv req id 8 cost time 0.10946989059448242 s -DEBUG 06-24 20:28:17 [manager.py:391] Prefill Batch: batch_id=193994694527072481628987739397096957690, time:1750768097.969149s req_ids:[8] -DEBUG 06-24 20:28:17 [manager.py:391] -ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:17 lightllm_req_id:8 first_token_cost:203.29833030700684ms total_cost_time:203.34386825561523ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11803 prompt_cache_len:5151 prompt_cache_ratio:0.4364144708972295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 -DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:18 [batch.py:51] router release req id 8 -INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.10716652870178223 s -INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.10914850234985352 s -DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=282470026420590865566027773141518107796, time:1750768098.178189s req_ids:[8] -DEBUG 06-24 20:28:18 [manager.py:391] -ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:202.53872871398926ms total_cost_time:202.58116722106934ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11804 prompt_cache_len:5151 prompt_cache_ratio:0.43637749915282953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 -DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:18 [batch.py:51] router release req id 8 -INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.10714077949523926 s -INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.10923171043395996 s -DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=273456528324718792725456617688895046678, time:1750768098.3888886s req_ids:[8] -DEBUG 06-24 20:28:18 [manager.py:391] -ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:210.53791046142578ms total_cost_time:210.57629585266113ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:11805 prompt_cache_len:5151 prompt_cache_ratio:0.4363405336721728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 -DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:18 [batch.py:51] router release req id 8 -INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.1079549789428711 s -INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.10989665985107422 s -DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=310544552687069659392370717896671726830, time:1750768098.6038795s req_ids:[8] -DEBUG 06-24 20:28:18 [manager.py:391] -ERROR 06-24 20:28:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:380.60712814331055ms total_cost_time:380.65171241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11806 prompt_cache_len:5151 prompt_cache_ratio:0.43630357445366763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 -DEBUG 06-24 20:28:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:18 [batch.py:51] router release req id 8 -INFO 06-24 20:28:18 [manager.py:224] router recive req id 8 cost time 0.1082601547241211 s -INFO 06-24 20:28:18 [manager.py:68] detokenization recv req id 8 cost time 0.11040663719177246 s -DEBUG 06-24 20:28:18 [manager.py:391] Prefill Batch: batch_id=55522329061015137872685148641918215246, time:1750768098.990727s req_ids:[8] -DEBUG 06-24 20:28:18 [manager.py:391] -ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:18 lightllm_req_id:8 first_token_cost:211.06576919555664ms total_cost_time:211.10939979553223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11807 prompt_cache_len:5151 prompt_cache_ratio:0.4362666214957229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 -DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:19 [batch.py:51] router release req id 8 -INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.10822105407714844 s -INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.1101984977722168 s -DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=111318464653381069148485436183812852771, time:1750768099.2082531s req_ids:[8] -DEBUG 06-24 20:28:19 [manager.py:391] -ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:207.40962028503418ms total_cost_time:207.45372772216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11808 prompt_cache_len:5151 prompt_cache_ratio:0.43622967479674796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 -DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:19 [batch.py:51] router release req id 8 -INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.10794281959533691 s -INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.10980105400085449 s -DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=243178789299844776893965681617541168567, time:1750768099.4340687s req_ids:[8] -DEBUG 06-24 20:28:19 [manager.py:391] -ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:223.90103340148926ms total_cost_time:223.94514083862305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11809 prompt_cache_len:5151 prompt_cache_ratio:0.4361927343551528 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 -DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:19 [batch.py:51] router release req id 8 -INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.1078801155090332 s -INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.11002492904663086 s -DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=201992601072673294364673796158752252436, time:1750768099.6541033s req_ids:[8] -DEBUG 06-24 20:28:19 [manager.py:391] -ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:207.09562301635742ms total_cost_time:207.1380615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11810 prompt_cache_len:5151 prompt_cache_ratio:0.436155800169348 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 -DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:19 [batch.py:51] router release req id 8 -INFO 06-24 20:28:19 [manager.py:224] router recive req id 8 cost time 0.10807633399963379 s -INFO 06-24 20:28:19 [manager.py:68] detokenization recv req id 8 cost time 0.10999155044555664 s -DEBUG 06-24 20:28:19 [manager.py:391] Prefill Batch: batch_id=82793900537183976138319200893850777751, time:1750768099.8670921s req_ids:[8] -DEBUG 06-24 20:28:19 [manager.py:391] -ERROR 06-24 20:28:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:202.66437530517578ms total_cost_time:202.70895957946777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11811 prompt_cache_len:5151 prompt_cache_ratio:0.4361188722377445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 -DEBUG 06-24 20:28:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:19 [batch.py:51] router release req id 8 -INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10685157775878906 s -INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.10877561569213867 s -DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=12649352316153921278264767403344495674, time:1750768100.0781999s req_ids:[8] -DEBUG 06-24 20:28:20 [manager.py:391] -ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:19 lightllm_req_id:8 first_token_cost:209.2888355255127ms total_cost_time:209.33270454406738ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11812 prompt_cache_len:5151 prompt_cache_ratio:0.4360819505587538 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 -DEBUG 06-24 20:28:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:20 [batch.py:51] router release req id 8 -INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10708308219909668 s -INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.10919189453125 s -DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=214776661518536471666954201656035617250, time:1750768100.294392s req_ids:[8] -DEBUG 06-24 20:28:20 [manager.py:391] -ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:364.41755294799805ms total_cost_time:364.46309089660645ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11813 prompt_cache_len:5151 prompt_cache_ratio:0.43604503513078813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 -DEBUG 06-24 20:28:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:20 [batch.py:51] router release req id 8 -INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10857439041137695 s -INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.11054468154907227 s -DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=94264381613404482839769812177087145027, time:1750768100.6637533s req_ids:[8] -DEBUG 06-24 20:28:20 [manager.py:391] -ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:204.15568351745605ms total_cost_time:204.19907569885254ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11814 prompt_cache_len:5151 prompt_cache_ratio:0.43600812595226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 -DEBUG 06-24 20:28:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:20 [batch.py:51] router release req id 8 -INFO 06-24 20:28:20 [manager.py:224] router recive req id 8 cost time 0.10782909393310547 s -INFO 06-24 20:28:20 [manager.py:68] detokenization recv req id 8 cost time 0.10951089859008789 s -DEBUG 06-24 20:28:20 [manager.py:391] Prefill Batch: batch_id=14341633962450682208156990771597436272, time:1750768100.8882954s req_ids:[8] -DEBUG 06-24 20:28:20 [manager.py:391] -DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:224.470853805542ms total_cost_time:224.51448440551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11815 prompt_cache_len:5151 prompt_cache_ratio:0.43597122302158275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 -DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:21 [batch.py:51] router release req id 8 -INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10834360122680664 s -INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.11031484603881836 s -DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=246786587703049836143906817268312435184, time:1750768101.1075695s req_ids:[8] -DEBUG 06-24 20:28:21 [manager.py:391] -ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:20 lightllm_req_id:8 first_token_cost:211.96985244750977ms total_cost_time:212.01348304748535ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11816 prompt_cache_len:5151 prompt_cache_ratio:0.43593432633716994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 -DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:21 [batch.py:51] router release req id 8 -INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10851764678955078 s -INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.11060357093811035 s -DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=224037271839791912419627107898530973012, time:1750768101.3253396s req_ids:[8] -DEBUG 06-24 20:28:21 [manager.py:391] -ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:210.50763130187988ms total_cost_time:210.55340766906738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11817 prompt_cache_len:5151 prompt_cache_ratio:0.4358974358974359 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 -DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:21 [batch.py:51] router release req id 8 -INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10740280151367188 s -INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.10937142372131348 s -DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=9882563576556506230902678797810835039, time:1750768101.5412252s req_ids:[8] -DEBUG 06-24 20:28:21 [manager.py:391] -ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:210.7870578765869ms total_cost_time:210.8299732208252ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11818 prompt_cache_len:5151 prompt_cache_ratio:0.4358605517007954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 -DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:21 [batch.py:51] router release req id 8 -INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.10851645469665527 s -INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.1107490062713623 s -DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=171547387199592456693803666897434015487, time:1750768101.7568007s req_ids:[8] -DEBUG 06-24 20:28:21 [manager.py:391] -ERROR 06-24 20:28:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:203.92775535583496ms total_cost_time:203.97162437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11819 prompt_cache_len:5151 prompt_cache_ratio:0.4358236737456638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 -DEBUG 06-24 20:28:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:21 [batch.py:51] router release req id 8 -INFO 06-24 20:28:21 [manager.py:224] router recive req id 8 cost time 0.11027097702026367 s -INFO 06-24 20:28:21 [manager.py:68] detokenization recv req id 8 cost time 0.11229228973388672 s -DEBUG 06-24 20:28:21 [manager.py:391] Prefill Batch: batch_id=69558427659541815398142551226342704803, time:1750768101.967057s req_ids:[8] -DEBUG 06-24 20:28:21 [manager.py:391] -ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:21 lightllm_req_id:8 first_token_cost:371.1273670196533ms total_cost_time:371.1724281311035ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11820 prompt_cache_len:5151 prompt_cache_ratio:0.43578680203045683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 -DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:22 [batch.py:51] router release req id 8 -INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10829854011535645 s -INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.11032319068908691 s -DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=334967454049806759731423596811636923108, time:1750768102.344881s req_ids:[8] -DEBUG 06-24 20:28:22 [manager.py:391] -ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:211.77244186401367ms total_cost_time:211.81750297546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11821 prompt_cache_len:5151 prompt_cache_ratio:0.43574993655359107 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 -DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:22 [batch.py:51] router release req id 8 -INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10731863975524902 s -INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.10928535461425781 s -DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=257809259817211060856424739495937956189, time:1750768102.5615969s req_ids:[8] -DEBUG 06-24 20:28:22 [manager.py:391] -ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:166.03636741638184ms total_cost_time:166.07999801635742ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11822 prompt_cache_len:5151 prompt_cache_ratio:0.43571307731348335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 -DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:22 [batch.py:51] router release req id 8 -INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10818958282470703 s -INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.11009883880615234 s -DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=23729815718666782677920038640430574626, time:1750768102.7339983s req_ids:[8] -DEBUG 06-24 20:28:22 [manager.py:391] -ERROR 06-24 20:28:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:198.47464561462402ms total_cost_time:198.5163688659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11823 prompt_cache_len:5151 prompt_cache_ratio:0.43567622430855113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 -DEBUG 06-24 20:28:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:22 [batch.py:51] router release req id 8 -INFO 06-24 20:28:22 [manager.py:224] router recive req id 8 cost time 0.10834598541259766 s -INFO 06-24 20:28:22 [manager.py:68] detokenization recv req id 8 cost time 0.11038088798522949 s -DEBUG 06-24 20:28:22 [manager.py:391] Prefill Batch: batch_id=108733328886161969055624172024253861267, time:1750768102.938689s req_ids:[8] -DEBUG 06-24 20:28:22 [manager.py:391] -ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:22 lightllm_req_id:8 first_token_cost:210.17956733703613ms total_cost_time:210.22391319274902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11824 prompt_cache_len:5151 prompt_cache_ratio:0.43563937753721244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 -DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:23 [batch.py:51] router release req id 8 -INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10799717903137207 s -INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.10963010787963867 s -DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=133769847089815306359347501621000728365, time:1750768103.1543956s req_ids:[8] -DEBUG 06-24 20:28:23 [manager.py:391] -INFO 06-24 20:28:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:28:23 [statics_utils.py:24] mean first cost: 228.87291303777405 ms -INFO 06-24 20:28:23 [statics_utils.py:24] mean per token cost: 0.06319268006874545 ms -ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:207.8413963317871ms total_cost_time:207.8859806060791ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11825 prompt_cache_len:5151 prompt_cache_ratio:0.4356025369978858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 -DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:23 [batch.py:51] router release req id 8 -INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10840868949890137 s -INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.11044549942016602 s -DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=13774356303798020589186248076877749007, time:1750768103.3688507s req_ids:[8] -DEBUG 06-24 20:28:23 [manager.py:391] -ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:203.12118530273438ms total_cost_time:203.16600799560547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11826 prompt_cache_len:5151 prompt_cache_ratio:0.43556570268899036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 -DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:23 [batch.py:51] router release req id 8 -INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10772037506103516 s -INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.10953712463378906 s -DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=277460259278322029788430456298451407760, time:1750768103.578867s req_ids:[8] -DEBUG 06-24 20:28:23 [manager.py:391] -ERROR 06-24 20:28:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:364.6361827850342ms total_cost_time:364.67981338500977ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11827 prompt_cache_len:5151 prompt_cache_ratio:0.4355288746089456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 -DEBUG 06-24 20:28:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:23 [batch.py:51] router release req id 8 -INFO 06-24 20:28:23 [manager.py:224] router recive req id 8 cost time 0.10767674446105957 s -INFO 06-24 20:28:23 [manager.py:68] detokenization recv req id 8 cost time 0.10977983474731445 s -DEBUG 06-24 20:28:23 [manager.py:391] Prefill Batch: batch_id=232335364309974174322263073830646631733, time:1750768103.9492073s req_ids:[8] -DEBUG 06-24 20:28:23 [manager.py:391] -ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:23 lightllm_req_id:8 first_token_cost:207.09753036499023ms total_cost_time:207.14068412780762ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11828 prompt_cache_len:5151 prompt_cache_ratio:0.4354920527561718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 -DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:24 [batch.py:51] router release req id 8 -INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10756397247314453 s -INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.1096193790435791 s -DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=243058501548682557211064948786590477480, time:1750768104.1618147s req_ids:[8] -DEBUG 06-24 20:28:24 [manager.py:391] -ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:201.89356803894043ms total_cost_time:201.93815231323242ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11829 prompt_cache_len:5151 prompt_cache_ratio:0.4354552371290895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 -DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:24 [batch.py:51] router release req id 8 -INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s -INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s -DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=122837956804860988712092617742540758829, time:1750768104.370877s req_ids:[8] -DEBUG 06-24 20:28:24 [manager.py:391] -DEBUG 06-24 20:28:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 50688.747 tokens/s -DEBUG 06-24 20:28:24 [stats.py:37] Avg prompt tokens throughput: 50680.264 tokens/s -DEBUG 06-24 20:28:24 [stats.py:37] Avg generate tokens throughput: 8.484 tokens/s -ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.09833908081055ms total_cost_time:209.14268493652344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11830 prompt_cache_len:5151 prompt_cache_ratio:0.43541842772612005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 -DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:24 [batch.py:51] router release req id 8 -INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10765242576599121 s -INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.10945415496826172 s -DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=104515876624913089004030360096655924995, time:1750768104.587176s req_ids:[8] -DEBUG 06-24 20:28:24 [manager.py:391] -ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.4717025756836ms total_cost_time:209.51437950134277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11831 prompt_cache_len:5151 prompt_cache_ratio:0.4353816245456851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 -DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:24 [batch.py:51] router release req id 8 -INFO 06-24 20:28:24 [manager.py:224] router recive req id 8 cost time 0.10843014717102051 s -INFO 06-24 20:28:24 [manager.py:68] detokenization recv req id 8 cost time 0.10976481437683105 s -DEBUG 06-24 20:28:24 [manager.py:391] Prefill Batch: batch_id=288045694277706391797641348825570546609, time:1750768104.802586s req_ids:[8] -DEBUG 06-24 20:28:24 [manager.py:391] -ERROR 06-24 20:28:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.91945266723633ms total_cost_time:209.9616527557373ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:11832 prompt_cache_len:5151 prompt_cache_ratio:0.4353448275862069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 -DEBUG 06-24 20:28:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:24 [batch.py:51] router release req id 8 -INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10834193229675293 s -INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.11029696464538574 s -DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=131752891130281073793744434635496113375, time:1750768105.0190384s req_ids:[8] -DEBUG 06-24 20:28:25 [manager.py:391] -ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:24 lightllm_req_id:8 first_token_cost:209.53798294067383ms total_cost_time:209.58423614501953ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11833 prompt_cache_len:5151 prompt_cache_ratio:0.43530803684610836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 -DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:25 [batch.py:51] router release req id 8 -INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10739827156066895 s -INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.10914158821105957 s -DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=212893383406487969015545359751583718168, time:1750768105.2341962s req_ids:[8] -DEBUG 06-24 20:28:25 [manager.py:391] -ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:369.2042827606201ms total_cost_time:369.2517280578613ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:11834 prompt_cache_len:5151 prompt_cache_ratio:0.43527125232381275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 -DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:25 [batch.py:51] router release req id 8 -INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10823178291320801 s -INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.11018013954162598 s -DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=56993645224287758651976250850504000378, time:1750768105.6080446s req_ids:[8] -DEBUG 06-24 20:28:25 [manager.py:391] -ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:210.5083465576172ms total_cost_time:210.55293083190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11835 prompt_cache_len:5151 prompt_cache_ratio:0.43523447401774396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 -DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:25 [batch.py:51] router release req id 8 -INFO 06-24 20:28:25 [manager.py:224] router recive req id 8 cost time 0.10751581192016602 s -INFO 06-24 20:28:25 [manager.py:68] detokenization recv req id 8 cost time 0.10961699485778809 s -DEBUG 06-24 20:28:25 [manager.py:391] Prefill Batch: batch_id=265975935421197008784365063592200285825, time:1750768105.8247821s req_ids:[8] -DEBUG 06-24 20:28:25 [manager.py:391] -ERROR 06-24 20:28:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:208.13369750976562ms total_cost_time:208.1763744354248ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11836 prompt_cache_len:5151 prompt_cache_ratio:0.43519770192632645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 -DEBUG 06-24 20:28:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:25 [batch.py:51] router release req id 8 -INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10786676406860352 s -INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.10985040664672852 s -DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=183294235721316073751470009447935104429, time:1750768106.0408154s req_ids:[8] -DEBUG 06-24 20:28:26 [manager.py:391] -ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:25 lightllm_req_id:8 first_token_cost:211.64608001708984ms total_cost_time:211.70663833618164ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:11837 prompt_cache_len:5151 prompt_cache_ratio:0.4351609360479851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 -DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:26 [batch.py:51] router release req id 8 -INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.11120033264160156 s -INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.11325764656066895 s -DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=188789225399506397195645297269025473984, time:1750768106.2580295s req_ids:[8] -DEBUG 06-24 20:28:26 [manager.py:391] -ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:207.95011520385742ms total_cost_time:208.0066204071045ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:11838 prompt_cache_len:5151 prompt_cache_ratio:0.4351241763811455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 -DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:26 [batch.py:51] router release req id 8 -INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10867953300476074 s -INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.11059856414794922 s -DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=6252123952698092410013501360165782055, time:1750768106.4740617s req_ids:[8] -DEBUG 06-24 20:28:26 [manager.py:391] -ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:210.33096313476562ms total_cost_time:210.38317680358887ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:11839 prompt_cache_len:5151 prompt_cache_ratio:0.43508742292423347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 -DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:26 [batch.py:51] router release req id 8 -INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10774087905883789 s -INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.1097726821899414 s -DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=190641257117425534058824980797202168848, time:1750768106.6901624s req_ids:[8] -DEBUG 06-24 20:28:26 [manager.py:391] -DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:209.52963829040527ms total_cost_time:209.57398414611816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11840 prompt_cache_len:5151 prompt_cache_ratio:0.4350506756756757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 -DEBUG 06-24 20:28:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:26 [batch.py:51] router release req id 8 -INFO 06-24 20:28:26 [manager.py:224] router recive req id 8 cost time 0.10764956474304199 s -INFO 06-24 20:28:26 [manager.py:68] detokenization recv req id 8 cost time 0.10993027687072754 s -DEBUG 06-24 20:28:26 [manager.py:391] Prefill Batch: batch_id=151714238301907987982092593321977103980, time:1750768106.9058564s req_ids:[8] -DEBUG 06-24 20:28:26 [manager.py:391] -ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:26 lightllm_req_id:8 first_token_cost:363.74664306640625ms total_cost_time:363.79194259643555ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11841 prompt_cache_len:5151 prompt_cache_ratio:0.43501393463389915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 -DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:27 [batch.py:51] router release req id 8 -INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.1081080436706543 s -INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.1101982593536377 s -DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=25392068842753214668133826172919159850, time:1750768107.276169s req_ids:[8] -DEBUG 06-24 20:28:27 [manager.py:391] -ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:209.89418029785156ms total_cost_time:209.93995666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11842 prompt_cache_len:5151 prompt_cache_ratio:0.4349771997973315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 -DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:27 [batch.py:51] router release req id 8 -INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.10825991630554199 s -INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.11043310165405273 s -DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=188952107873609749293299689582146219866, time:1750768107.4922912s req_ids:[8] -DEBUG 06-24 20:28:27 [manager.py:391] -ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:202.57258415222168ms total_cost_time:202.61693000793457ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11843 prompt_cache_len:5151 prompt_cache_ratio:0.43494047116440093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 -DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:27 [batch.py:51] router release req id 8 -INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.10962986946105957 s -INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.11179256439208984 s -DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=137199173143918372879743460047223152319, time:1750768107.7008302s req_ids:[8] -DEBUG 06-24 20:28:27 [manager.py:391] -ERROR 06-24 20:28:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:215.22808074951172ms total_cost_time:215.2717113494873ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11844 prompt_cache_len:5151 prompt_cache_ratio:0.434903748733536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 -DEBUG 06-24 20:28:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:27 [batch.py:51] router release req id 8 -INFO 06-24 20:28:27 [manager.py:224] router recive req id 8 cost time 0.10940361022949219 s -INFO 06-24 20:28:27 [manager.py:68] detokenization recv req id 8 cost time 0.11121082305908203 s -DEBUG 06-24 20:28:27 [manager.py:391] Prefill Batch: batch_id=113998909102099155203770548422585652596, time:1750768107.938854s req_ids:[8] -DEBUG 06-24 20:28:27 [manager.py:391] -ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:27 lightllm_req_id:8 first_token_cost:227.5681495666504ms total_cost_time:227.61297225952148ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11845 prompt_cache_len:5151 prompt_cache_ratio:0.4348670325031659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 -DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:28 [batch.py:51] router release req id 8 -INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.1095426082611084 s -INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11071610450744629 s -DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=265243073473811722256365139136524401430, time:1750768108.1573012s req_ids:[8] -DEBUG 06-24 20:28:28 [manager.py:391] -ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:209.51294898986816ms total_cost_time:209.56826210021973ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:11846 prompt_cache_len:5151 prompt_cache_ratio:0.43483032247172043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 -DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:28 [batch.py:51] router release req id 8 -INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.11229896545410156 s -INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11433148384094238 s -DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=91742669439278449955476834714760492758, time:1750768108.384648s req_ids:[8] -DEBUG 06-24 20:28:28 [manager.py:391] -ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:387.09449768066406ms total_cost_time:387.13884353637695ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11847 prompt_cache_len:5151 prompt_cache_ratio:0.4347936186376298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 -DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:28 [batch.py:51] router release req id 8 -INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s -INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11019277572631836 s -DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=79882619272615391415717038818087429414, time:1750768108.7673495s req_ids:[8] -DEBUG 06-24 20:28:28 [manager.py:391] -ERROR 06-24 20:28:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:210.85071563720703ms total_cost_time:210.89649200439453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11848 prompt_cache_len:5151 prompt_cache_ratio:0.4347569209993248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 -DEBUG 06-24 20:28:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:28 [batch.py:51] router release req id 8 -INFO 06-24 20:28:28 [manager.py:224] router recive req id 8 cost time 0.10807943344116211 s -INFO 06-24 20:28:28 [manager.py:68] detokenization recv req id 8 cost time 0.11018013954162598 s -DEBUG 06-24 20:28:28 [manager.py:391] Prefill Batch: batch_id=188828207219635695086545947796986337882, time:1750768108.9842227s req_ids:[8] -DEBUG 06-24 20:28:28 [manager.py:391] -ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:28 lightllm_req_id:8 first_token_cost:205.32965660095215ms total_cost_time:205.37519454956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11849 prompt_cache_len:5151 prompt_cache_ratio:0.4347202295552367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 -DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:29 [batch.py:51] router release req id 8 -INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10807442665100098 s -INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.10928034782409668 s -DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=138995906279715476315869694458443052957, time:1750768109.1963947s req_ids:[8] -DEBUG 06-24 20:28:29 [manager.py:391] -ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:209.28549766540527ms total_cost_time:209.33175086975098ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11850 prompt_cache_len:5151 prompt_cache_ratio:0.43468354430379746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 -DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:29 [batch.py:51] router release req id 8 -INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10859465599060059 s -INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.11075830459594727 s -DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=193244642764779444199998440016540857095, time:1750768109.4114962s req_ids:[8] -DEBUG 06-24 20:28:29 [manager.py:391] -ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:214.7960662841797ms total_cost_time:214.84112739562988ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11851 prompt_cache_len:5151 prompt_cache_ratio:0.4346468652434394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 -DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:29 [batch.py:51] router release req id 8 -INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10977506637573242 s -INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.11147403717041016 s -DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=230843736025076563184255870416772139743, time:1750768109.6499074s req_ids:[8] -DEBUG 06-24 20:28:29 [manager.py:391] -ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:190.8884048461914ms total_cost_time:190.9327507019043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11852 prompt_cache_len:5151 prompt_cache_ratio:0.43461019237259535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 -DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:29 [batch.py:51] router release req id 8 -INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10678720474243164 s -INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.1087179183959961 s -DEBUG 06-24 20:28:29 [manager.py:391] Prefill Batch: batch_id=19531097533351095739633717940783927452, time:1750768109.8298264s req_ids:[8] -DEBUG 06-24 20:28:29 [manager.py:391] -ERROR 06-24 20:28:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:167.0665740966797ms total_cost_time:167.1133041381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11853 prompt_cache_len:5151 prompt_cache_ratio:0.4345735256896988 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 -DEBUG 06-24 20:28:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:29 [batch.py:51] router release req id 8 -INFO 06-24 20:28:29 [manager.py:224] router recive req id 8 cost time 0.10738372802734375 s -INFO 06-24 20:28:29 [manager.py:68] detokenization recv req id 8 cost time 0.10960268974304199 s -DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=298867485143769618783581751208625689141, time:1750768110.0050213s req_ids:[8] -DEBUG 06-24 20:28:30 [manager.py:391] -ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:29 lightllm_req_id:8 first_token_cost:392.99654960632324ms total_cost_time:393.04256439208984ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11854 prompt_cache_len:5151 prompt_cache_ratio:0.43453686519318374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 -DEBUG 06-24 20:28:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:30 [batch.py:51] router release req id 8 -INFO 06-24 20:28:30 [manager.py:224] router recive req id 8 cost time 0.10880279541015625 s -INFO 06-24 20:28:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110234260559082 s -DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=292589598680450143269398590098992001728, time:1750768110.4024403s req_ids:[8] -DEBUG 06-24 20:28:30 [manager.py:391] -ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:209.6114158630371ms total_cost_time:209.68961715698242ms,out_token_counter:1 mean_per_token_cost_time: 0.0782012939453125ms prompt_token_num:11855 prompt_cache_len:5151 prompt_cache_ratio:0.4345002108814846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 -DEBUG 06-24 20:28:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:30 [batch.py:51] router release req id 8 -INFO 06-24 20:28:30 [manager.py:224] router recive req id 8 cost time 0.10976409912109375 s -INFO 06-24 20:28:30 [manager.py:68] detokenization recv req id 8 cost time 0.11205029487609863 s -DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=56871444571994861550835807716257990985, time:1750768110.6332257s req_ids:[8] -DEBUG 06-24 20:28:30 [manager.py:391] -ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:228.11603546142578ms total_cost_time:228.1651496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:11856 prompt_cache_len:5151 prompt_cache_ratio:0.43446356275303644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 -DEBUG 06-24 20:28:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:30 [batch.py:51] router release req id 8 -INFO 06-24 20:28:30 [manager.py:224] router recive req id 8 cost time 0.11135554313659668 s -INFO 06-24 20:28:30 [manager.py:68] detokenization recv req id 8 cost time 0.11368703842163086 s -DEBUG 06-24 20:28:30 [manager.py:391] Prefill Batch: batch_id=174837334016425638866942082311145806441, time:1750768110.8705413s req_ids:[8] -DEBUG 06-24 20:28:30 [manager.py:391] -ERROR 06-24 20:28:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:228.76787185668945ms total_cost_time:228.81627082824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:11857 prompt_cache_len:5151 prompt_cache_ratio:0.43442692080627476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 -DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:31 [batch.py:51] router release req id 8 -INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.10927224159240723 s -INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.11138534545898438 s -DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=242135513759416570430729438734053233340, time:1750768111.1040351s req_ids:[8] -DEBUG 06-24 20:28:31 [manager.py:391] -ERROR 06-24 20:28:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:30 lightllm_req_id:8 first_token_cost:230.70883750915527ms total_cost_time:230.75532913208008ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11858 prompt_cache_len:5151 prompt_cache_ratio:0.43439028503963567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 -DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:31 [batch.py:51] router release req id 8 -INFO 06-24 20:28:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.10867547988891602 s -INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.1109931468963623 s -DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=273366704237232413912962972500563289375, time:1750768111.3428593s req_ids:[8] -DEBUG 06-24 20:28:31 [manager.py:391] -ERROR 06-24 20:28:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 first_token_cost:229.9816608428955ms total_cost_time:230.0264835357666ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11859 prompt_cache_len:5151 prompt_cache_ratio:0.4343536554515558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 -DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:31 [batch.py:51] router release req id 8 -INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.11008310317993164 s -INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.11231160163879395 s -DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=98208872831204647830177040958943145217, time:1750768111.5806077s req_ids:[8] -DEBUG 06-24 20:28:31 [manager.py:391] -ERROR 06-24 20:28:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 first_token_cost:233.05153846740723ms total_cost_time:233.09850692749023ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:11860 prompt_cache_len:5151 prompt_cache_ratio:0.4343170320404722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 -DEBUG 06-24 20:28:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:31 [batch.py:51] router release req id 8 -INFO 06-24 20:28:31 [manager.py:224] router recive req id 8 cost time 0.10791850090026855 s -INFO 06-24 20:28:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997653007507324 s -DEBUG 06-24 20:28:31 [manager.py:391] Prefill Batch: batch_id=97936047980822285077980742942771859182, time:1750768111.8040445s req_ids:[8] -DEBUG 06-24 20:28:31 [manager.py:391] -ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:31 lightllm_req_id:8 first_token_cost:364.84432220458984ms total_cost_time:364.88890647888184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11861 prompt_cache_len:5151 prompt_cache_ratio:0.43428041480482255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 -DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:32 [batch.py:51] router release req id 8 -INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s -INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.1105806827545166 s -DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=11142585919447462739376373840096223320, time:1750768112.171773s req_ids:[8] -DEBUG 06-24 20:28:32 [manager.py:391] -ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:207.9486846923828ms total_cost_time:207.9939842224121ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11862 prompt_cache_len:5151 prompt_cache_ratio:0.434243803743045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 -DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:32 [batch.py:51] router release req id 8 -INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.1097416877746582 s -INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.11161470413208008 s -DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=110034234514243308030885337181554907301, time:1750768112.3862817s req_ids:[8] -DEBUG 06-24 20:28:32 [manager.py:391] -ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:166.6276454925537ms total_cost_time:166.6719913482666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11863 prompt_cache_len:5151 prompt_cache_ratio:0.43420719885357834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 -DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:32 [batch.py:51] router release req id 8 -INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.10724115371704102 s -INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.10936951637268066 s -DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=326034097724481502834778145619153073234, time:1750768112.5592637s req_ids:[8] -DEBUG 06-24 20:28:32 [manager.py:391] -ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:199.41210746765137ms total_cost_time:199.43857192993164ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:11864 prompt_cache_len:5151 prompt_cache_ratio:0.43417060013486175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 -DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:32 [batch.py:51] router release req id 8 -INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.10683774948120117 s -INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.10781574249267578 s -DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=179351694559500666814396036757856168726, time:1750768112.7673483s req_ids:[8] -DEBUG 06-24 20:28:32 [manager.py:391] -DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:211.2438678741455ms total_cost_time:211.26580238342285ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:11865 prompt_cache_len:5151 prompt_cache_ratio:0.43413400758533505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 -DEBUG 06-24 20:28:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:32 [batch.py:51] router release req id 8 -INFO 06-24 20:28:32 [manager.py:224] router recive req id 8 cost time 0.10457754135131836 s -INFO 06-24 20:28:32 [manager.py:68] detokenization recv req id 8 cost time 0.10544490814208984 s -DEBUG 06-24 20:28:32 [manager.py:391] Prefill Batch: batch_id=54379912887677411096561046016714697823, time:1750768112.984503s req_ids:[8] -DEBUG 06-24 20:28:32 [manager.py:391] -ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:32 lightllm_req_id:8 first_token_cost:212.47625350952148ms total_cost_time:212.50009536743164ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:11866 prompt_cache_len:5151 prompt_cache_ratio:0.4340974212034384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 -DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:33 [batch.py:51] router release req id 8 -INFO 06-24 20:28:33 [manager.py:224] router recive req id 8 cost time 0.10356712341308594 s -INFO 06-24 20:28:33 [manager.py:68] detokenization recv req id 8 cost time 0.10439300537109375 s -DEBUG 06-24 20:28:33 [manager.py:391] Prefill Batch: batch_id=179937542494342056592850181103454616182, time:1750768113.2155564s req_ids:[8] -DEBUG 06-24 20:28:33 [manager.py:391] -ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:216.18413925170898ms total_cost_time:216.20559692382812ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:11867 prompt_cache_len:5151 prompt_cache_ratio:0.4340608409876127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 -DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:33 [batch.py:51] router release req id 8 -INFO 06-24 20:28:33 [manager.py:224] router recive req id 8 cost time 0.10451388359069824 s -DEBUG 06-24 20:28:33 [manager.py:391] Prefill Batch: batch_id=211653993442948043769921238633971639543, time:1750768113.4037392s req_ids:[8] -DEBUG 06-24 20:28:33 [manager.py:391] -INFO 06-24 20:28:33 [manager.py:68] detokenization recv req id 8 cost time 0.10536503791809082 s -ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:431.15901947021484ms total_cost_time:431.1847686767578ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:11868 prompt_cache_len:5151 prompt_cache_ratio:0.4340242669362993 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 -DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:33 [batch.py:51] router release req id 8 -INFO 06-24 20:28:33 [manager.py:224] router recive req id 8 cost time 0.1039581298828125 s -INFO 06-24 20:28:33 [manager.py:68] detokenization recv req id 8 cost time 0.1048593521118164 s -DEBUG 06-24 20:28:33 [manager.py:391] Prefill Batch: batch_id=293945058288205504111290336290387204763, time:1750768113.8673863s req_ids:[8] -DEBUG 06-24 20:28:33 [manager.py:391] -ERROR 06-24 20:28:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:214.2784595489502ms total_cost_time:214.30230140686035ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:11869 prompt_cache_len:5151 prompt_cache_ratio:0.43398769904794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 -DEBUG 06-24 20:28:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:33 [batch.py:51] router release req id 8 -INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.10369157791137695 s -INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10447406768798828 s -DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=6363734903778108591894042619698550698, time:1750768114.085345s req_ids:[8] -DEBUG 06-24 20:28:34 [manager.py:391] -ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:33 lightllm_req_id:8 first_token_cost:214.59150314331055ms total_cost_time:214.6134376525879ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:11870 prompt_cache_len:5151 prompt_cache_ratio:0.43395113732097723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 -DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:34 [batch.py:51] router release req id 8 -INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.106109619140625 s -INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10734701156616211 s -DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=205892406970596528245782018845836049778, time:1750768114.304136s req_ids:[8] -DEBUG 06-24 20:28:34 [manager.py:391] -ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:28:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 48521.220 tokens/s -DEBUG 06-24 20:28:34 [stats.py:37] Avg prompt tokens throughput: 48512.933 tokens/s -DEBUG 06-24 20:28:34 [stats.py:37] Avg generate tokens throughput: 8.287 tokens/s -INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:220.56055068969727ms total_cost_time:220.60585021972656ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:11871 prompt_cache_len:5151 prompt_cache_ratio:0.43391458175385395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 -DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:34 [batch.py:51] router release req id 8 -INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.10538864135742188 s -INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10723495483398438 s -DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=220597745432292173285239366751729748883, time:1750768114.5218058s req_ids:[8] -DEBUG 06-24 20:28:34 [manager.py:391] -ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:222.35417366027832ms total_cost_time:222.37753868103027ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:11872 prompt_cache_len:5151 prompt_cache_ratio:0.4338780323450135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 -DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:34 [batch.py:51] router release req id 8 -INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.1039271354675293 s -INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10494041442871094 s -DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=303682631336364528079548651584249749075, time:1750768114.7535691s req_ids:[8] -DEBUG 06-24 20:28:34 [manager.py:391] -ERROR 06-24 20:28:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:215.80195426940918ms total_cost_time:215.82460403442383ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:11873 prompt_cache_len:5151 prompt_cache_ratio:0.4338414890928999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 -DEBUG 06-24 20:28:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:34 [batch.py:51] router release req id 8 -INFO 06-24 20:28:34 [manager.py:224] router recive req id 8 cost time 0.10386419296264648 s -INFO 06-24 20:28:34 [manager.py:68] detokenization recv req id 8 cost time 0.10472226142883301 s -DEBUG 06-24 20:28:34 [manager.py:391] Prefill Batch: batch_id=133238998156980978862911398731284604320, time:1750768114.9762044s req_ids:[8] -DEBUG 06-24 20:28:34 [manager.py:391] -ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:34 lightllm_req_id:8 first_token_cost:219.33317184448242ms total_cost_time:219.35534477233887ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:11874 prompt_cache_len:5151 prompt_cache_ratio:0.43380495199595753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 -DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:35 [batch.py:51] router release req id 8 -INFO 06-24 20:28:35 [manager.py:224] router recive req id 8 cost time 0.10379481315612793 s -INFO 06-24 20:28:35 [manager.py:68] detokenization recv req id 8 cost time 0.10469818115234375 s -DEBUG 06-24 20:28:35 [manager.py:391] Prefill Batch: batch_id=178173195218545079165805977127711689883, time:1750768115.198263s req_ids:[8] -DEBUG 06-24 20:28:35 [manager.py:391] -ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:430.2067756652832ms total_cost_time:430.22775650024414ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11875 prompt_cache_len:5151 prompt_cache_ratio:0.4337684210526316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 -DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:35 [batch.py:51] router release req id 8 -INFO 06-24 20:28:35 [manager.py:224] router recive req id 8 cost time 0.10480284690856934 s -INFO 06-24 20:28:35 [manager.py:68] detokenization recv req id 8 cost time 0.10572147369384766 s -DEBUG 06-24 20:28:35 [manager.py:391] Prefill Batch: batch_id=158044971044148494786218747835784622104, time:1750768115.6323247s req_ids:[8] -DEBUG 06-24 20:28:35 [manager.py:391] -ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:220.4289436340332ms total_cost_time:220.45421600341797ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11876 prompt_cache_len:5151 prompt_cache_ratio:0.4337318962613675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 -DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:35 [batch.py:51] router release req id 8 -INFO 06-24 20:28:35 [manager.py:224] router recive req id 8 cost time 0.10471010208129883 s -INFO 06-24 20:28:35 [manager.py:68] detokenization recv req id 8 cost time 0.10570812225341797 s -DEBUG 06-24 20:28:35 [manager.py:391] Prefill Batch: batch_id=175923898943493976333115882125782362181, time:1750768115.8556015s req_ids:[8] -DEBUG 06-24 20:28:35 [manager.py:391] -ERROR 06-24 20:28:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:220.57223320007324ms total_cost_time:220.59965133666992ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:11877 prompt_cache_len:5151 prompt_cache_ratio:0.4336953776206113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 -DEBUG 06-24 20:28:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:35 [batch.py:51] router release req id 8 -INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10354948043823242 s -INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10442376136779785 s -DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=291573683928380045270192115578318541141, time:1750768116.0778732s req_ids:[8] -DEBUG 06-24 20:28:36 [manager.py:391] -ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:35 lightllm_req_id:8 first_token_cost:218.96791458129883ms total_cost_time:218.9943790435791ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:11878 prompt_cache_len:5151 prompt_cache_ratio:0.4336588651288096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 -DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:36 [batch.py:51] router release req id 8 -INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10524845123291016 s -INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10628771781921387 s -DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=308021029786919301525346610842204144491, time:1750768116.2824674s req_ids:[8] -DEBUG 06-24 20:28:36 [manager.py:391] -ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:203.17339897155762ms total_cost_time:203.2005786895752ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:11879 prompt_cache_len:5151 prompt_cache_ratio:0.43362235878440947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 -DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:36 [batch.py:51] router release req id 8 -INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10391378402709961 s -INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10484695434570312 s -DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=87678905147441289569508797526573543352, time:1750768116.487401s req_ids:[8] -DEBUG 06-24 20:28:36 [manager.py:391] -ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:200.15239715576172ms total_cost_time:200.17147064208984ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:11880 prompt_cache_len:5151 prompt_cache_ratio:0.4335858585858586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 -DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:36 [batch.py:51] router release req id 8 -INFO 06-24 20:28:36 [manager.py:224] router recive req id 8 cost time 0.10352921485900879 s -INFO 06-24 20:28:36 [manager.py:68] detokenization recv req id 8 cost time 0.10434770584106445 s -DEBUG 06-24 20:28:36 [manager.py:391] Prefill Batch: batch_id=1154044498906886573778504009953608319, time:1750768116.6910193s req_ids:[8] -DEBUG 06-24 20:28:36 [manager.py:391] -ERROR 06-24 20:28:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:206.97855949401855ms total_cost_time:206.9990634918213ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:11881 prompt_cache_len:5151 prompt_cache_ratio:0.4335493645316051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 -DEBUG 06-24 20:28:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:36 [batch.py:51] router release req id 8 -INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.304229736328125 s -INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.3050069808959961 s -DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=315163521652213426658964823810151462492, time:1750768117.1118753s req_ids:[8] -DEBUG 06-24 20:28:37 [manager.py:391] -ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:36 lightllm_req_id:8 first_token_cost:410.48479080200195ms total_cost_time:410.5041027069092ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:11882 prompt_cache_len:5151 prompt_cache_ratio:0.43351287662009763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 -DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:37 [batch.py:51] router release req id 8 -INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10379481315612793 s -DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=264284800079406942670858333695686328181, time:1750768117.2995176s req_ids:[8] -DEBUG 06-24 20:28:37 [manager.py:391] -INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10478949546813965 s -ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:185.39714813232422ms total_cost_time:185.41622161865234ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:11883 prompt_cache_len:5151 prompt_cache_ratio:0.4334763948497854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 -DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:37 [batch.py:51] router release req id 8 -INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10296773910522461 s -INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10394597053527832 s -DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=111194200685913567443083064226978910299, time:1750768117.5203545s req_ids:[8] -DEBUG 06-24 20:28:37 [manager.py:391] -DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:217.53764152526855ms total_cost_time:217.5581455230713ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:11884 prompt_cache_len:5151 prompt_cache_ratio:0.4334399192191181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 -DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:37 [batch.py:51] router release req id 8 -INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10298776626586914 s -INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10373616218566895 s -DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=252069406791972656238954777312853159992, time:1750768117.7395692s req_ids:[8] -DEBUG 06-24 20:28:37 [manager.py:391] -ERROR 06-24 20:28:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:215.75617790222168ms total_cost_time:215.77763557434082ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:11885 prompt_cache_len:5151 prompt_cache_ratio:0.43340344972654604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 -DEBUG 06-24 20:28:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:37 [batch.py:51] router release req id 8 -INFO 06-24 20:28:37 [manager.py:224] router recive req id 8 cost time 0.10303401947021484 s -INFO 06-24 20:28:37 [manager.py:68] detokenization recv req id 8 cost time 0.10387301445007324 s -DEBUG 06-24 20:28:37 [manager.py:391] Prefill Batch: batch_id=139493725814063699601069818821416952512, time:1750768117.9583132s req_ids:[8] -DEBUG 06-24 20:28:37 [manager.py:391] -ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:37 lightllm_req_id:8 first_token_cost:216.84932708740234ms total_cost_time:216.86959266662598ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11886 prompt_cache_len:5151 prompt_cache_ratio:0.43336698637051996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 -DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:38 [batch.py:51] router release req id 8 -INFO 06-24 20:28:38 [manager.py:224] router recive req id 8 cost time 0.10387134552001953 s -INFO 06-24 20:28:38 [manager.py:68] detokenization recv req id 8 cost time 0.10461997985839844 s -DEBUG 06-24 20:28:38 [manager.py:391] Prefill Batch: batch_id=92878499258711925766107713668096769541, time:1750768118.1783955s req_ids:[8] -DEBUG 06-24 20:28:38 [manager.py:391] -ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:214.59698677062988ms total_cost_time:214.61868286132812ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:11887 prompt_cache_len:5151 prompt_cache_ratio:0.43333052914949105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 -DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:38 [batch.py:51] router release req id 8 -INFO 06-24 20:28:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:38 [manager.py:224] router recive req id 8 cost time 0.10263776779174805 s -INFO 06-24 20:28:38 [manager.py:68] detokenization recv req id 8 cost time 0.1034245491027832 s -DEBUG 06-24 20:28:38 [manager.py:391] Prefill Batch: batch_id=186727004882905897419108820902417162138, time:1750768118.3953588s req_ids:[8] -DEBUG 06-24 20:28:38 [manager.py:391] -ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:419.61145401000977ms total_cost_time:419.6295738220215ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:11888 prompt_cache_len:5151 prompt_cache_ratio:0.43329407806191117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 -DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:38 [batch.py:51] router release req id 8 -INFO 06-24 20:28:38 [manager.py:224] router recive req id 8 cost time 0.10374951362609863 s -INFO 06-24 20:28:38 [manager.py:68] detokenization recv req id 8 cost time 0.1044778823852539 s -DEBUG 06-24 20:28:38 [manager.py:391] Prefill Batch: batch_id=225027464847716798447343676528676637766, time:1750768118.8176684s req_ids:[8] -DEBUG 06-24 20:28:38 [manager.py:391] -ERROR 06-24 20:28:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:212.74209022521973ms total_cost_time:212.76211738586426ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11889 prompt_cache_len:5151 prompt_cache_ratio:0.43325763310623266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 -DEBUG 06-24 20:28:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:38 [batch.py:51] router release req id 8 -INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10383486747741699 s -INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10457634925842285 s -DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=16074684672064765379546276517470713510, time:1750768119.0338006s req_ids:[8] -DEBUG 06-24 20:28:39 [manager.py:391] -ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:38 lightllm_req_id:8 first_token_cost:214.76483345031738ms total_cost_time:214.78557586669922ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:11890 prompt_cache_len:5151 prompt_cache_ratio:0.43322119428090833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 -DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:39 [batch.py:51] router release req id 8 -INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10288023948669434 s -INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10366606712341309 s -DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=285439775702852421388817878679544362556, time:1750768119.2514436s req_ids:[8] -DEBUG 06-24 20:28:39 [manager.py:391] -ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:214.60890769958496ms total_cost_time:214.6289348602295ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11891 prompt_cache_len:5151 prompt_cache_ratio:0.43318476158439156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 -DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:39 [batch.py:51] router release req id 8 -INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10285329818725586 s -INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10359406471252441 s -DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=318004118626866687502908502583474110159, time:1750768119.469981s req_ids:[8] -DEBUG 06-24 20:28:39 [manager.py:391] -ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:214.71118927001953ms total_cost_time:214.73217010498047ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11892 prompt_cache_len:5151 prompt_cache_ratio:0.4331483350151362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 -DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:39 [batch.py:51] router release req id 8 -INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10289382934570312 s -INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10366392135620117 s -DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=27786646989689724984413693468009177999, time:1750768119.6552756s req_ids:[8] -DEBUG 06-24 20:28:39 [manager.py:391] -ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:185.27674674987793ms total_cost_time:185.29653549194336ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11893 prompt_cache_len:5151 prompt_cache_ratio:0.43311191457159676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 -DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:39 [batch.py:51] router release req id 8 -INFO 06-24 20:28:39 [manager.py:224] router recive req id 8 cost time 0.10391473770141602 s -INFO 06-24 20:28:39 [manager.py:68] detokenization recv req id 8 cost time 0.10465574264526367 s -DEBUG 06-24 20:28:39 [manager.py:391] Prefill Batch: batch_id=215372438020006911783971177387505560473, time:1750768119.8432872s req_ids:[8] -DEBUG 06-24 20:28:39 [manager.py:391] -ERROR 06-24 20:28:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:186.3267421722412ms total_cost_time:186.34605407714844ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:11894 prompt_cache_len:5151 prompt_cache_ratio:0.433075500252228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 -DEBUG 06-24 20:28:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:39 [batch.py:51] router release req id 8 -INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10273075103759766 s -INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10346341133117676 s -DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=94149045766095927298669277190631441307, time:1750768120.031504s req_ids:[8] -DEBUG 06-24 20:28:40 [manager.py:391] -ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:39 lightllm_req_id:8 first_token_cost:396.63124084472656ms total_cost_time:396.6507911682129ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11895 prompt_cache_len:5151 prompt_cache_ratio:0.4330390920554855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 -DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:40 [batch.py:51] router release req id 8 -INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10391068458557129 s -INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10464334487915039 s -DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=294727548836427237896901735155172937731, time:1750768120.4312015s req_ids:[8] -DEBUG 06-24 20:28:40 [manager.py:391] -ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:185.64414978027344ms total_cost_time:185.66226959228516ms,out_token_counter:1 mean_per_token_cost_time: 0.01811981201171875ms prompt_token_num:11896 prompt_cache_len:5151 prompt_cache_ratio:0.43300268997982516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 -DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:40 [batch.py:51] router release req id 8 -INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10303568840026855 s -INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10377955436706543 s -DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=193815743549806285059070736392735113758, time:1750768120.617825s req_ids:[8] -DEBUG 06-24 20:28:40 [manager.py:391] -ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:184.70144271850586ms total_cost_time:184.7207546234131ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:11897 prompt_cache_len:5151 prompt_cache_ratio:0.43296629402370346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 -DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:40 [batch.py:51] router release req id 8 -INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10303282737731934 s -DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=66009073030139411667477631772325113051, time:1750768120.8048763s req_ids:[8] -DEBUG 06-24 20:28:40 [manager.py:391] -INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10388350486755371 s -DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:184.5862865447998ms total_cost_time:184.60583686828613ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11898 prompt_cache_len:5151 prompt_cache_ratio:0.4329299041855774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 -DEBUG 06-24 20:28:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:40 [batch.py:51] router release req id 8 -INFO 06-24 20:28:40 [manager.py:224] router recive req id 8 cost time 0.10399484634399414 s -INFO 06-24 20:28:40 [manager.py:68] detokenization recv req id 8 cost time 0.10473823547363281 s -DEBUG 06-24 20:28:40 [manager.py:391] Prefill Batch: batch_id=304931695665280690564269659223799389780, time:1750768120.9929843s req_ids:[8] -DEBUG 06-24 20:28:40 [manager.py:391] -ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:40 lightllm_req_id:8 first_token_cost:186.12146377563477ms total_cost_time:186.1422061920166ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:11899 prompt_cache_len:5151 prompt_cache_ratio:0.43289352046390456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 -DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:41 [batch.py:51] router release req id 8 -INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10398221015930176 s -DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=91869575234673034233369237604297197940, time:1750768121.1813273s req_ids:[8] -DEBUG 06-24 20:28:41 [manager.py:391] -INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.10483932495117188 s -ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:186.0513687133789ms total_cost_time:186.07115745544434ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11900 prompt_cache_len:5151 prompt_cache_ratio:0.4328571428571429 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 -DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:41 [batch.py:51] router release req id 8 -INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10289621353149414 s -INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.10368871688842773 s -DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=293735260567805192594385009315586009925, time:1750768121.3701365s req_ids:[8] -DEBUG 06-24 20:28:41 [manager.py:391] -ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:185.74285507202148ms total_cost_time:185.76383590698242ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11901 prompt_cache_len:5151 prompt_cache_ratio:0.43282077136375097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 -DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:41 [batch.py:51] router release req id 8 -INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10282731056213379 s -INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.10369467735290527 s -DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=133231803494484504470395985042425411248, time:1750768121.5576954s req_ids:[8] -DEBUG 06-24 20:28:41 [manager.py:391] -ERROR 06-24 20:28:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:406.8021774291992ms total_cost_time:406.82244300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11902 prompt_cache_len:5151 prompt_cache_ratio:0.43278440598218787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 -DEBUG 06-24 20:28:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:41 [batch.py:51] router release req id 8 -INFO 06-24 20:28:41 [manager.py:224] router recive req id 8 cost time 0.10396695137023926 s -DEBUG 06-24 20:28:41 [manager.py:391] Prefill Batch: batch_id=14362376582423461109887799211070417666, time:1750768121.967087s req_ids:[8] -DEBUG 06-24 20:28:41 [manager.py:391] -INFO 06-24 20:28:41 [manager.py:68] detokenization recv req id 8 cost time 0.1047511100769043 s -ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:41 lightllm_req_id:8 first_token_cost:185.03522872924805ms total_cost_time:185.05620956420898ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11903 prompt_cache_len:5151 prompt_cache_ratio:0.4327480467109132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 -DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:42 [batch.py:51] router release req id 8 -INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10300755500793457 s -INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10376834869384766 s -DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=196042952659276185954510919503217220697, time:1750768122.187142s req_ids:[8] -DEBUG 06-24 20:28:42 [manager.py:391] -ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:216.72797203063965ms total_cost_time:216.74680709838867ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:11904 prompt_cache_len:5151 prompt_cache_ratio:0.4327116935483871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 -DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:42 [batch.py:51] router release req id 8 -INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10305380821228027 s -DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=51072148814772939830098363549592479794, time:1750768122.3728938s req_ids:[8] -DEBUG 06-24 20:28:42 [manager.py:391] -INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10382604598999023 s -ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:183.90846252441406ms total_cost_time:183.9296817779541ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:11905 prompt_cache_len:5151 prompt_cache_ratio:0.43267534649307016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 -DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:42 [batch.py:51] router release req id 8 -INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10294365882873535 s -INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10368561744689941 s -DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=179196303910933287213935143701919788889, time:1750768122.5592992s req_ids:[8] -DEBUG 06-24 20:28:42 [manager.py:391] -DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:184.1733455657959ms total_cost_time:184.19241905212402ms,out_token_counter:1 mean_per_token_cost_time: 0.019073486328125ms prompt_token_num:11906 prompt_cache_len:5151 prompt_cache_ratio:0.4326390055434235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 -DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:42 [batch.py:51] router release req id 8 -INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10312676429748535 s -INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.1038978099822998 s -DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=147129783462552214023951574258481299419, time:1750768122.7792697s req_ids:[8] -DEBUG 06-24 20:28:42 [manager.py:391] -ERROR 06-24 20:28:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:217.94939041137695ms total_cost_time:217.96894073486328ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11907 prompt_cache_len:5151 prompt_cache_ratio:0.4326026706979088 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 -DEBUG 06-24 20:28:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:42 [batch.py:51] router release req id 8 -INFO 06-24 20:28:42 [manager.py:224] router recive req id 8 cost time 0.10310220718383789 s -INFO 06-24 20:28:42 [manager.py:68] detokenization recv req id 8 cost time 0.10386800765991211 s -DEBUG 06-24 20:28:42 [manager.py:391] Prefill Batch: batch_id=249285835926311955843662330564777077985, time:1750768122.9995666s req_ids:[8] -DEBUG 06-24 20:28:42 [manager.py:391] -ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:42 lightllm_req_id:8 first_token_cost:217.47827529907227ms total_cost_time:217.4971103668213ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:11908 prompt_cache_len:5151 prompt_cache_ratio:0.4325663419549882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 -DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:43 [batch.py:51] router release req id 8 -INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.1026608943939209 s -INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.10341238975524902 s -DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=229628226113436194133592346352402657513, time:1750768123.2067904s req_ids:[8] -DEBUG 06-24 20:28:43 [manager.py:391] -ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:376.5110969543457ms total_cost_time:376.53112411499023ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11909 prompt_cache_len:5151 prompt_cache_ratio:0.4325300193131245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 -DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:43 [batch.py:51] router release req id 8 -INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.10271787643432617 s -INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.1034994125366211 s -DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=219670195906739416862186270876773721924, time:1750768123.586386s req_ids:[8] -DEBUG 06-24 20:28:43 [manager.py:391] -ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:186.95306777954102ms total_cost_time:186.97142601013184ms,out_token_counter:1 mean_per_token_cost_time: 0.018358230590820312ms prompt_token_num:11910 prompt_cache_len:5151 prompt_cache_ratio:0.43249370277078086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 -DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:43 [batch.py:51] router release req id 8 -INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.10295701026916504 s -INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.10371160507202148 s -DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=109108641233032409777627110540003684663, time:1750768123.7755115s req_ids:[8] -DEBUG 06-24 20:28:43 [manager.py:391] -ERROR 06-24 20:28:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:187.1178150177002ms total_cost_time:187.1469020843506ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:11911 prompt_cache_len:5151 prompt_cache_ratio:0.43245739232642094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 -DEBUG 06-24 20:28:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:43 [batch.py:51] router release req id 8 -INFO 06-24 20:28:43 [manager.py:224] router recive req id 8 cost time 0.10373187065124512 s -INFO 06-24 20:28:43 [manager.py:68] detokenization recv req id 8 cost time 0.10450029373168945 s -DEBUG 06-24 20:28:43 [manager.py:391] Prefill Batch: batch_id=126136171161668277662895032290498948939, time:1750768123.9630594s req_ids:[8] -DEBUG 06-24 20:28:43 [manager.py:391] -ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:43 lightllm_req_id:8 first_token_cost:185.05454063415527ms total_cost_time:185.0748062133789ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:11912 prompt_cache_len:5151 prompt_cache_ratio:0.4324210879785091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 -DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:44 [batch.py:51] router release req id 8 -INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.1041414737701416 s -INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.1050574779510498 s -DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=101904346663866664492163981770199337905, time:1750768124.1519098s req_ids:[8] -DEBUG 06-24 20:28:44 [manager.py:391] -ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:185.77861785888672ms total_cost_time:185.79959869384766ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11913 prompt_cache_len:5151 prompt_cache_ratio:0.4323847897255099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 -DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:44 [batch.py:51] router release req id 8 -INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.10367274284362793 s -INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.10445904731750488 s -DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=105959152308765602551435942516272708875, time:1750768124.3401246s req_ids:[8] -DEBUG 06-24 20:28:44 [manager.py:391] -ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:28:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 51072.416 tokens/s -DEBUG 06-24 20:28:44 [stats.py:37] Avg prompt tokens throughput: 51063.828 tokens/s -DEBUG 06-24 20:28:44 [stats.py:37] Avg generate tokens throughput: 8.587 tokens/s -INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:186.10668182373047ms total_cost_time:186.1276626586914ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11914 prompt_cache_len:5151 prompt_cache_ratio:0.4323484975658889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 -DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:44 [batch.py:51] router release req id 8 -INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.1065988540649414 s -INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.10744047164916992 s -DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=150338943219137824466646260291292578170, time:1750768124.5294368s req_ids:[8] -DEBUG 06-24 20:28:44 [manager.py:391] -ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:181.13470077514648ms total_cost_time:181.16521835327148ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:11915 prompt_cache_len:5151 prompt_cache_ratio:0.4323122114981116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 -DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:44 [batch.py:51] router release req id 8 -INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.20542168617248535 s -INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.20613527297973633 s -DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=217487315321687900054528096871513963017, time:1750768124.8341997s req_ids:[8] -DEBUG 06-24 20:28:44 [manager.py:391] -ERROR 06-24 20:28:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:285.7646942138672ms total_cost_time:285.7856750488281ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11916 prompt_cache_len:5151 prompt_cache_ratio:0.43227593152064453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 -DEBUG 06-24 20:28:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:44 [batch.py:51] router release req id 8 -INFO 06-24 20:28:44 [manager.py:224] router recive req id 8 cost time 0.10442137718200684 s -INFO 06-24 20:28:44 [manager.py:68] detokenization recv req id 8 cost time 0.10544300079345703 s -DEBUG 06-24 20:28:44 [manager.py:391] Prefill Batch: batch_id=290524089958705897788470446060157217293, time:1750768124.9884398s req_ids:[8] -DEBUG 06-24 20:28:44 [manager.py:391] -ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:44 lightllm_req_id:8 first_token_cost:163.35177421569824ms total_cost_time:163.37180137634277ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11917 prompt_cache_len:5151 prompt_cache_ratio:0.4322396576319544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 -DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:45 [batch.py:51] router release req id 8 -INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10406017303466797 s -INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10481667518615723 s -DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=30692006335710302791810082906723519783, time:1750768125.1534283s req_ids:[8] -DEBUG 06-24 20:28:45 [manager.py:391] -ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:168.41912269592285ms total_cost_time:168.43771934509277ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:11918 prompt_cache_len:5151 prompt_cache_ratio:0.4322033898305085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 -DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:45 [batch.py:51] router release req id 8 -INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10372376441955566 s -INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10448122024536133 s -DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=72490887313163765474611746369139095922, time:1750768125.3297367s req_ids:[8] -DEBUG 06-24 20:28:45 [manager.py:391] -ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:156.89325332641602ms total_cost_time:156.91423416137695ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:11919 prompt_cache_len:5151 prompt_cache_ratio:0.43216712811477476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 -DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:45 [batch.py:51] router release req id 8 -INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10349678993225098 s -INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10440278053283691 s -DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=104551186219817458142548401733315601718, time:1750768125.4851012s req_ids:[8] -DEBUG 06-24 20:28:45 [manager.py:391] -ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:184.56506729125977ms total_cost_time:184.5846176147461ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:11920 prompt_cache_len:5151 prompt_cache_ratio:0.43213087248322146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 -DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:45 [batch.py:51] router release req id 8 -INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10286378860473633 s -INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.10376334190368652 s -DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=285439590067661428086108302901341192029, time:1750768125.67804s req_ids:[8] -DEBUG 06-24 20:28:45 [manager.py:391] -ERROR 06-24 20:28:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:191.59483909606934ms total_cost_time:191.61677360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:11921 prompt_cache_len:5151 prompt_cache_ratio:0.4320946229343176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 -DEBUG 06-24 20:28:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:45 [batch.py:51] router release req id 8 -INFO 06-24 20:28:45 [manager.py:224] router recive req id 8 cost time 0.10417461395263672 s -INFO 06-24 20:28:45 [manager.py:68] detokenization recv req id 8 cost time 0.1050724983215332 s -DEBUG 06-24 20:28:45 [manager.py:391] Prefill Batch: batch_id=330880769612056719860483041211692931428, time:1750768125.8728566s req_ids:[8] -DEBUG 06-24 20:28:45 [manager.py:391] -ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:45 lightllm_req_id:8 first_token_cost:371.7951774597168ms total_cost_time:371.8152046203613ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:11922 prompt_cache_len:5151 prompt_cache_ratio:0.43205837946653247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 -DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:46 [batch.py:51] router release req id 8 -INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10330057144165039 s -INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10409116744995117 s -DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=101432892994023791577161635378135733177, time:1750768126.254141s req_ids:[8] -DEBUG 06-24 20:28:46 [manager.py:391] -ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:187.8063678741455ms total_cost_time:187.82806396484375ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:11923 prompt_cache_len:5151 prompt_cache_ratio:0.432022142078336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 -DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:46 [batch.py:51] router release req id 8 -INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10390281677246094 s -INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.1046912670135498 s -DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=194569489297379704269325406132162936346, time:1750768126.4438913s req_ids:[8] -DEBUG 06-24 20:28:46 [manager.py:391] -ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:184.4046115875244ms total_cost_time:184.42440032958984ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:11924 prompt_cache_len:5151 prompt_cache_ratio:0.4319859107681986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 -DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:46 [batch.py:51] router release req id 8 -INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10378146171569824 s -INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10457301139831543 s -DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=179306132277893662537467714436218454634, time:1750768126.6318889s req_ids:[8] -DEBUG 06-24 20:28:46 [manager.py:391] -DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:185.32323837280273ms total_cost_time:185.34207344055176ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:11925 prompt_cache_len:5151 prompt_cache_ratio:0.4319496855345912 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 -DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:46 [batch.py:51] router release req id 8 -INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.1040191650390625 s -INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10579252243041992 s -DEBUG 06-24 20:28:46 [manager.py:391] Prefill Batch: batch_id=98198369865994734633178925837271803866, time:1750768126.818987s req_ids:[8] -DEBUG 06-24 20:28:46 [manager.py:391] -ERROR 06-24 20:28:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:184.4651699066162ms total_cost_time:184.4959259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:11926 prompt_cache_len:5151 prompt_cache_ratio:0.43191346637598527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 -DEBUG 06-24 20:28:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:46 [batch.py:51] router release req id 8 -INFO 06-24 20:28:46 [manager.py:224] router recive req id 8 cost time 0.10430574417114258 s -INFO 06-24 20:28:46 [manager.py:68] detokenization recv req id 8 cost time 0.10512351989746094 s -DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=39941762660234327624813944061963785239, time:1750768127.0060763s req_ids:[8] -DEBUG 06-24 20:28:47 [manager.py:391] -INFO 06-24 20:28:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:46 lightllm_req_id:8 first_token_cost:184.53502655029297ms total_cost_time:184.56006050109863ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:11927 prompt_cache_len:5151 prompt_cache_ratio:0.4318772532908527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 -DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:47 [batch.py:51] router release req id 8 -INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10611820220947266 s -INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.10719561576843262 s -DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=254371351300487063249411515892349847721, time:1750768127.1945481s req_ids:[8] -DEBUG 06-24 20:28:47 [manager.py:391] -ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:200.6247043609619ms total_cost_time:200.6673812866211ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11928 prompt_cache_len:5151 prompt_cache_ratio:0.431841046277666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 -DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:47 [batch.py:51] router release req id 8 -INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10622334480285645 s -INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.10711431503295898 s -DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=180769309607235874900552505442133146658, time:1750768127.390995s req_ids:[8] -DEBUG 06-24 20:28:47 [manager.py:391] -ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:359.03429985046387ms total_cost_time:359.07840728759766ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11929 prompt_cache_len:5151 prompt_cache_ratio:0.43180484533489816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 -DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:47 [batch.py:51] router release req id 8 -INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10712313652038574 s -INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.10915040969848633 s -DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=183451054074156367393599271568668816449, time:1750768127.7498627s req_ids:[8] -DEBUG 06-24 20:28:47 [manager.py:391] -ERROR 06-24 20:28:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:186.98954582214355ms total_cost_time:187.02030181884766ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:11930 prompt_cache_len:5151 prompt_cache_ratio:0.43176865046102264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 -DEBUG 06-24 20:28:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:47 [batch.py:51] router release req id 8 -INFO 06-24 20:28:47 [manager.py:224] router recive req id 8 cost time 0.10822248458862305 s -INFO 06-24 20:28:47 [manager.py:68] detokenization recv req id 8 cost time 0.11017537117004395 s -DEBUG 06-24 20:28:47 [manager.py:391] Prefill Batch: batch_id=262289064727497717295385751286343736738, time:1750768127.954026s req_ids:[8] -DEBUG 06-24 20:28:47 [manager.py:391] -ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:47 lightllm_req_id:8 first_token_cost:214.22171592712402ms total_cost_time:214.26773071289062ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11931 prompt_cache_len:5151 prompt_cache_ratio:0.43173246165451346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 -DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:48 [batch.py:51] router release req id 8 -INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.1095893383026123 s -INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11162662506103516 s -DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=10955442552283379595091094048822561627, time:1750768128.169632s req_ids:[8] -DEBUG 06-24 20:28:48 [manager.py:391] -ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:211.67969703674316ms total_cost_time:211.72380447387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11932 prompt_cache_len:5151 prompt_cache_ratio:0.4316962789138451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 -DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:48 [batch.py:51] router release req id 8 -INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.1094825267791748 s -INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11165213584899902 s -DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=30701902382265071494347345381379766020, time:1750768128.3848386s req_ids:[8] -DEBUG 06-24 20:28:48 [manager.py:391] -ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:211.04764938354492ms total_cost_time:211.090087890625ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:11933 prompt_cache_len:5151 prompt_cache_ratio:0.4316601022374927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 -DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:48 [batch.py:51] router release req id 8 -INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.10813164710998535 s -INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11011147499084473 s -DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=264220764458383977328336085653241699902, time:1750768128.6066256s req_ids:[8] -DEBUG 06-24 20:28:48 [manager.py:391] -ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:212.75782585144043ms total_cost_time:212.81981468200684ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:11934 prompt_cache_len:5151 prompt_cache_ratio:0.43162393162393164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 -DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:48 [batch.py:51] router release req id 8 -INFO 06-24 20:28:48 [manager.py:224] router recive req id 8 cost time 0.1090703010559082 s -INFO 06-24 20:28:48 [manager.py:68] detokenization recv req id 8 cost time 0.11097431182861328 s -DEBUG 06-24 20:28:48 [manager.py:391] Prefill Batch: batch_id=226970020615295283576571519100874197849, time:1750768128.82386s req_ids:[8] -DEBUG 06-24 20:28:48 [manager.py:391] -ERROR 06-24 20:28:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:225.9652614593506ms total_cost_time:226.00626945495605ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:11935 prompt_cache_len:5151 prompt_cache_ratio:0.43158776707163804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 -DEBUG 06-24 20:28:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:48 [batch.py:51] router release req id 8 -INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10972976684570312 s -INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.11196017265319824 s -DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=56558388786481784097447248110183976064, time:1750768129.0512826s req_ids:[8] -DEBUG 06-24 20:28:49 [manager.py:391] -ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:48 lightllm_req_id:8 first_token_cost:373.2337951660156ms total_cost_time:373.2795715332031ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11936 prompt_cache_len:5151 prompt_cache_ratio:0.4315516085790885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 -DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:49 [batch.py:51] router release req id 8 -INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10651302337646484 s -INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10838103294372559 s -DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=249677948696364611735669091539764887871, time:1750768129.4364083s req_ids:[8] -DEBUG 06-24 20:28:49 [manager.py:391] -ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:162.73903846740723ms total_cost_time:162.78576850891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11937 prompt_cache_len:5151 prompt_cache_ratio:0.43151545614476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 -DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:49 [batch.py:51] router release req id 8 -INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.1071782112121582 s -INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10917830467224121 s -DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=193901099944717543853453843433311174277, time:1750768129.597522s req_ids:[8] -DEBUG 06-24 20:28:49 [manager.py:391] -ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:192.71159172058105ms total_cost_time:192.75712966918945ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11938 prompt_cache_len:5151 prompt_cache_ratio:0.43147930976713017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 -DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:49 [batch.py:51] router release req id 8 -INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10732078552246094 s -INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10909366607666016 s -DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=105622941769307149745216722497816046707, time:1750768129.8114686s req_ids:[8] -DEBUG 06-24 20:28:49 [manager.py:391] -ERROR 06-24 20:28:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:187.64710426330566ms total_cost_time:187.69335746765137ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:11939 prompt_cache_len:5151 prompt_cache_ratio:0.4314431694446771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 -DEBUG 06-24 20:28:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:49 [batch.py:51] router release req id 8 -INFO 06-24 20:28:49 [manager.py:224] router recive req id 8 cost time 0.10663914680480957 s -INFO 06-24 20:28:49 [manager.py:68] detokenization recv req id 8 cost time 0.10769391059875488 s -DEBUG 06-24 20:28:49 [manager.py:391] Prefill Batch: batch_id=32829283500757086317448305008622266516, time:1750768129.999136s req_ids:[8] -DEBUG 06-24 20:28:49 [manager.py:391] -ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:49 lightllm_req_id:8 first_token_cost:184.11564826965332ms total_cost_time:184.16166305541992ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11940 prompt_cache_len:5151 prompt_cache_ratio:0.4314070351758794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 -DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:50 [batch.py:51] router release req id 8 -INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10706734657287598 s -INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.10807466506958008 s -DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=218307670817421332996924828182541919691, time:1750768130.1863377s req_ids:[8] -DEBUG 06-24 20:28:50 [manager.py:391] -ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:193.1002140045166ms total_cost_time:193.16411018371582ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:11941 prompt_cache_len:5151 prompt_cache_ratio:0.43137090695921615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 -DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:50 [batch.py:51] router release req id 8 -INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10703134536743164 s -INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.1085963249206543 s -DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=8075253747766070727967962765713576120, time:1750768130.3880155s req_ids:[8] -DEBUG 06-24 20:28:50 [manager.py:391] -ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:204.17213439941406ms total_cost_time:204.21552658081055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11942 prompt_cache_len:5151 prompt_cache_ratio:0.43133478479316695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 -DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:50 [batch.py:51] router release req id 8 -INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10860085487365723 s -INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.11028790473937988 s -DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=298479681842835287749325519964436205646, time:1750768130.596268s req_ids:[8] -DEBUG 06-24 20:28:50 [manager.py:391] -ERROR 06-24 20:28:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:362.25199699401855ms total_cost_time:362.29538917541504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11943 prompt_cache_len:5151 prompt_cache_ratio:0.431298668676212 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 -DEBUG 06-24 20:28:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:50 [batch.py:51] router release req id 8 -INFO 06-24 20:28:50 [manager.py:224] router recive req id 8 cost time 0.10841035842895508 s -DEBUG 06-24 20:28:50 [manager.py:391] Prefill Batch: batch_id=177426162998192559358030897943835490159, time:1750768130.957746s req_ids:[8] -DEBUG 06-24 20:28:50 [manager.py:391] -INFO 06-24 20:28:50 [manager.py:68] detokenization recv req id 8 cost time 0.11046886444091797 s -ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:50 lightllm_req_id:8 first_token_cost:197.24130630493164ms total_cost_time:197.2970962524414ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:11944 prompt_cache_len:5151 prompt_cache_ratio:0.4312625586068319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 -DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:51 [batch.py:51] router release req id 8 -INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10709810256958008 s -INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.10895037651062012 s -DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=249981216753730869745852507468848513451, time:1750768131.16743s req_ids:[8] -DEBUG 06-24 20:28:51 [manager.py:391] -ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:192.49606132507324ms total_cost_time:192.53969192504883ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11945 prompt_cache_len:5151 prompt_cache_ratio:0.4312264545835077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 -DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:51 [batch.py:51] router release req id 8 -INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10904741287231445 s -INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.11110615730285645 s -DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=4334819489007811238289904869235204874, time:1750768131.3669422s req_ids:[8] -DEBUG 06-24 20:28:51 [manager.py:391] -ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:209.0611457824707ms total_cost_time:209.1076374053955ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:11946 prompt_cache_len:5151 prompt_cache_ratio:0.43119035660472127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 -DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:51 [batch.py:51] router release req id 8 -INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10863590240478516 s -INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.11068415641784668 s -DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=187049276141074335429132119649016305600, time:1750768131.5853672s req_ids:[8] -DEBUG 06-24 20:28:51 [manager.py:391] -ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:204.3013572692871ms total_cost_time:204.3440341949463ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11947 prompt_cache_len:5151 prompt_cache_ratio:0.43115426466895457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 -DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:51 [batch.py:51] router release req id 8 -INFO 06-24 20:28:51 [manager.py:224] router recive req id 8 cost time 0.10752534866333008 s -INFO 06-24 20:28:51 [manager.py:68] detokenization recv req id 8 cost time 0.10944914817810059 s -DEBUG 06-24 20:28:51 [manager.py:391] Prefill Batch: batch_id=32083930597534349419754367776652719316, time:1750768131.7934196s req_ids:[8] -DEBUG 06-24 20:28:51 [manager.py:391] -ERROR 06-24 20:28:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:207.75246620178223ms total_cost_time:207.7958583831787ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11948 prompt_cache_len:5151 prompt_cache_ratio:0.43111817877469033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 -DEBUG 06-24 20:28:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:51 [batch.py:51] router release req id 8 -INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10822105407714844 s -INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s -DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=82932988976142750099513244572096112892, time:1750768132.014584s req_ids:[8] -DEBUG 06-24 20:28:52 [manager.py:391] -ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:51 lightllm_req_id:8 first_token_cost:216.16816520690918ms total_cost_time:216.21298789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11949 prompt_cache_len:5151 prompt_cache_ratio:0.43108209892041177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 -DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:52 [batch.py:51] router release req id 8 -INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10789871215820312 s -INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.10982251167297363 s -DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=53293644116887765529528007494403420690, time:1750768132.2312596s req_ids:[8] -DEBUG 06-24 20:28:52 [manager.py:391] -DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:373.5618591308594ms total_cost_time:373.60668182373047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:11950 prompt_cache_len:5151 prompt_cache_ratio:0.4310460251046025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 -DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:52 [batch.py:51] router release req id 8 -INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10766887664794922 s -INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.10986828804016113 s -DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=299085699666324630620667964424971663928, time:1750768132.6142173s req_ids:[8] -DEBUG 06-24 20:28:52 [manager.py:391] -ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:213.83905410766602ms total_cost_time:213.88912200927734ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:11951 prompt_cache_len:5151 prompt_cache_ratio:0.4310099573257468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 -DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:52 [batch.py:51] router release req id 8 -INFO 06-24 20:28:52 [manager.py:224] router recive req id 8 cost time 0.10742497444152832 s -INFO 06-24 20:28:52 [manager.py:68] detokenization recv req id 8 cost time 0.10931229591369629 s -DEBUG 06-24 20:28:52 [manager.py:391] Prefill Batch: batch_id=144798307928284488434488280874079884879, time:1750768132.8307838s req_ids:[8] -DEBUG 06-24 20:28:52 [manager.py:391] -ERROR 06-24 20:28:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:210.98780632019043ms total_cost_time:211.0309600830078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:11952 prompt_cache_len:5151 prompt_cache_ratio:0.4309738955823293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 -DEBUG 06-24 20:28:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:52 [batch.py:51] router release req id 8 -INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10730409622192383 s -INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.10931992530822754 s -DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=283566256457937003956700817055337061115, time:1750768133.052218s req_ids:[8] -DEBUG 06-24 20:28:53 [manager.py:391] -ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:52 lightllm_req_id:8 first_token_cost:209.32388305664062ms total_cost_time:209.37156677246094ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:11953 prompt_cache_len:5151 prompt_cache_ratio:0.4309378398728353 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 -DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:53 [batch.py:51] router release req id 8 -INFO 06-24 20:28:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s -INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.11046814918518066 s -DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=321079679069355810564997312921033860538, time:1750768133.2700834s req_ids:[8] -DEBUG 06-24 20:28:53 [manager.py:391] -INFO 06-24 20:28:53 [manager.py:620] left req id 8can release False refcount 4 -ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:201.04026794433594ms total_cost_time:201.0631561279297ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:11954 prompt_cache_len:5151 prompt_cache_ratio:0.43090179019575037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 -DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:53 [batch.py:51] router release req id 8 -INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10402393341064453 s -INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.10510063171386719 s -DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=214015021856281163025284845259801141914, time:1750768133.4948883s req_ids:[8] -DEBUG 06-24 20:28:53 [manager.py:391] -ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:215.09408950805664ms total_cost_time:215.15870094299316ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:11955 prompt_cache_len:5151 prompt_cache_ratio:0.43086574654956084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 -DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:53 [batch.py:51] router release req id 8 -INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10417962074279785 s -INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.1058039665222168 s -DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=278922859122595349551139641541674955634, time:1750768133.7029865s req_ids:[8] -DEBUG 06-24 20:28:53 [manager.py:391] -ERROR 06-24 20:28:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:208.16516876220703ms total_cost_time:208.21022987365723ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11956 prompt_cache_len:5151 prompt_cache_ratio:0.4308297089327534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 -DEBUG 06-24 20:28:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:53 [batch.py:51] router release req id 8 -INFO 06-24 20:28:53 [manager.py:224] router recive req id 8 cost time 0.10745906829833984 s -INFO 06-24 20:28:53 [manager.py:68] detokenization recv req id 8 cost time 0.10846972465515137 s -DEBUG 06-24 20:28:53 [manager.py:391] Prefill Batch: batch_id=265470482435672432438706593366940899124, time:1750768133.9040763s req_ids:[8] -DEBUG 06-24 20:28:53 [manager.py:391] -ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:53 lightllm_req_id:8 first_token_cost:355.8318614959717ms total_cost_time:355.87477684020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:11957 prompt_cache_len:5151 prompt_cache_ratio:0.43079367734381535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 -DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:54 [batch.py:51] router release req id 8 -INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10811686515808105 s -INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.10998415946960449 s -DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=233804298028408121718787241589612477328, time:1750768134.2658517s req_ids:[8] -DEBUG 06-24 20:28:54 [manager.py:391] -ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:199.7823715209961ms total_cost_time:199.82671737670898ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11958 prompt_cache_len:5151 prompt_cache_ratio:0.4307576517812343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 -DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:54 [batch.py:51] router release req id 8 -INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10878205299377441 s -INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.11061215400695801 s -DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=70171910777716891133016673777406270311, time:1750768134.4771082s req_ids:[8] -DEBUG 06-24 20:28:54 [manager.py:391] -DEBUG 06-24 20:28:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 53321.297 tokens/s -DEBUG 06-24 20:28:54 [stats.py:37] Avg prompt tokens throughput: 53312.464 tokens/s -DEBUG 06-24 20:28:54 [stats.py:37] Avg generate tokens throughput: 8.833 tokens/s -ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:215.33989906311035ms total_cost_time:215.38352966308594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:11959 prompt_cache_len:5151 prompt_cache_ratio:0.43072163224349863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 -DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:54 [batch.py:51] router release req id 8 -INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10713386535644531 s -INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.10915851593017578 s -DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=231323215789748381961888343141972574034, time:1750768134.6927128s req_ids:[8] -DEBUG 06-24 20:28:54 [manager.py:391] -ERROR 06-24 20:28:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:205.79195022583008ms total_cost_time:205.83748817443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:11960 prompt_cache_len:5151 prompt_cache_ratio:0.430685618729097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 -DEBUG 06-24 20:28:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:54 [batch.py:51] router release req id 8 -INFO 06-24 20:28:54 [manager.py:224] router recive req id 8 cost time 0.10983586311340332 s -INFO 06-24 20:28:54 [manager.py:68] detokenization recv req id 8 cost time 0.11195635795593262 s -DEBUG 06-24 20:28:54 [manager.py:391] Prefill Batch: batch_id=238224137516115694925448643174657560197, time:1750768134.911102s req_ids:[8] -DEBUG 06-24 20:28:54 [manager.py:391] -ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:54 lightllm_req_id:8 first_token_cost:216.70222282409668ms total_cost_time:216.74561500549316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11961 prompt_cache_len:5151 prompt_cache_ratio:0.4306496112365187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 -DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:55 [batch.py:51] router release req id 8 -INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10788154602050781 s -INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.10985636711120605 s -DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=15819547354600753324859031452129940274, time:1750768135.1281812s req_ids:[8] -DEBUG 06-24 20:28:55 [manager.py:391] -ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:211.52687072753906ms total_cost_time:211.57121658325195ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11962 prompt_cache_len:5151 prompt_cache_ratio:0.43061360976425345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 -DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:55 [batch.py:51] router release req id 8 -INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10875082015991211 s -INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.1110377311706543 s -DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=95043965739271115239721168430332272280, time:1750768135.3508294s req_ids:[8] -DEBUG 06-24 20:28:55 [manager.py:391] -ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:212.9056453704834ms total_cost_time:212.95642852783203ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:11963 prompt_cache_len:5151 prompt_cache_ratio:0.4305776143107916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 -DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:55 [batch.py:51] router release req id 8 -INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10718369483947754 s -INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.1090691089630127 s -DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=258777417804640485526976366306366551919, time:1750768135.563974s req_ids:[8] -DEBUG 06-24 20:28:55 [manager.py:391] -ERROR 06-24 20:28:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:367.98858642578125ms total_cost_time:368.03293228149414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11964 prompt_cache_len:5151 prompt_cache_ratio:0.4305416248746239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 -DEBUG 06-24 20:28:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:55 [batch.py:51] router release req id 8 -INFO 06-24 20:28:55 [manager.py:224] router recive req id 8 cost time 0.10790467262268066 s -INFO 06-24 20:28:55 [manager.py:68] detokenization recv req id 8 cost time 0.10987687110900879 s -DEBUG 06-24 20:28:55 [manager.py:391] Prefill Batch: batch_id=273973186652445984431349803298722936564, time:1750768135.9396412s req_ids:[8] -DEBUG 06-24 20:28:55 [manager.py:391] -ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:55 lightllm_req_id:8 first_token_cost:202.00705528259277ms total_cost_time:202.04973220825195ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11965 prompt_cache_len:5151 prompt_cache_ratio:0.4305056414542415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 -DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:56 [batch.py:51] router release req id 8 -INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.10832691192626953 s -INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s -DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=300113675189918618686274143071557316507, time:1750768136.1523914s req_ids:[8] -DEBUG 06-24 20:28:56 [manager.py:391] -ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:212.8605842590332ms total_cost_time:212.9039764404297ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11966 prompt_cache_len:5151 prompt_cache_ratio:0.4304696640481364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 -DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:56 [batch.py:51] router release req id 8 -INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.10562682151794434 s -INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.10763883590698242 s -DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=283607000119770431190850984979214755571, time:1750768136.3669546s req_ids:[8] -DEBUG 06-24 20:28:56 [manager.py:391] -ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:211.67397499084473ms total_cost_time:211.71903610229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11967 prompt_cache_len:5151 prompt_cache_ratio:0.4304336926548007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 -DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:56 [batch.py:51] router release req id 8 -INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.11092638969421387 s -INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.11307263374328613 s -DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=313994612745509528983755209127736581597, time:1750768136.588752s req_ids:[8] -DEBUG 06-24 20:28:56 [manager.py:391] -ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:210.4470729827881ms total_cost_time:210.5412483215332ms,out_token_counter:1 mean_per_token_cost_time: 0.09417533874511719ms prompt_token_num:11968 prompt_cache_len:5151 prompt_cache_ratio:0.4303977272727273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 -DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:56 [batch.py:51] router release req id 8 -INFO 06-24 20:28:56 [manager.py:224] router recive req id 8 cost time 0.10787248611450195 s -INFO 06-24 20:28:56 [manager.py:68] detokenization recv req id 8 cost time 0.10982179641723633 s -DEBUG 06-24 20:28:56 [manager.py:391] Prefill Batch: batch_id=314901115796672256137962829382313540444, time:1750768136.8019927s req_ids:[8] -DEBUG 06-24 20:28:56 [manager.py:391] -ERROR 06-24 20:28:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:206.85744285583496ms total_cost_time:206.9244384765625ms,out_token_counter:1 mean_per_token_cost_time: 0.06699562072753906ms prompt_token_num:11969 prompt_cache_len:5151 prompt_cache_ratio:0.43036176790040936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 -DEBUG 06-24 20:28:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:56 [batch.py:51] router release req id 8 -INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10453939437866211 s -INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.10548019409179688 s -DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=16478308163660516422606655363763646896, time:1750768137.0170758s req_ids:[8] -DEBUG 06-24 20:28:57 [manager.py:391] -ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:56 lightllm_req_id:8 first_token_cost:348.22535514831543ms total_cost_time:348.27208518981934ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:11970 prompt_cache_len:5151 prompt_cache_ratio:0.43032581453634083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 -DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:57 [batch.py:51] router release req id 8 -INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10858654975891113 s -INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.11052155494689941 s -DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=263853919847717316374060382865263945853, time:1750768137.3716505s req_ids:[8] -DEBUG 06-24 20:28:57 [manager.py:391] -ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:202.7304172515869ms total_cost_time:202.7742862701416ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11971 prompt_cache_len:5151 prompt_cache_ratio:0.43028986717901596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 -DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:57 [batch.py:51] router release req id 8 -INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10835957527160645 s -INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.11027741432189941 s -DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=193372557165191265446377080303334493274, time:1750768137.587858s req_ids:[8] -DEBUG 06-24 20:28:57 [manager.py:391] -ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:216.3369655609131ms total_cost_time:216.39394760131836ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:11972 prompt_cache_len:5151 prompt_cache_ratio:0.4302539258269295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 -DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:57 [batch.py:51] router release req id 8 -INFO 06-24 20:28:57 [manager.py:224] router recive req id 8 cost time 0.10865283012390137 s -INFO 06-24 20:28:57 [manager.py:68] detokenization recv req id 8 cost time 0.11002635955810547 s -DEBUG 06-24 20:28:57 [manager.py:391] Prefill Batch: batch_id=118579312505353612546134618222646505980, time:1750768137.8058567s req_ids:[8] -DEBUG 06-24 20:28:57 [manager.py:391] -ERROR 06-24 20:28:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:206.66933059692383ms total_cost_time:206.71439170837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11973 prompt_cache_len:5151 prompt_cache_ratio:0.4302179904785768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 -DEBUG 06-24 20:28:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:57 [batch.py:51] router release req id 8 -INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.10770940780639648 s -INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.10975456237792969 s -DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=79815428260900973984936112290918699288, time:1750768138.0176246s req_ids:[8] -DEBUG 06-24 20:28:58 [manager.py:391] -ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:57 lightllm_req_id:8 first_token_cost:194.23246383666992ms total_cost_time:194.2763328552246ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:11974 prompt_cache_len:5151 prompt_cache_ratio:0.43018206113245366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 -DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:58 [batch.py:51] router release req id 8 -INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.1081693172454834 s -INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.11011099815368652 s -DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=141026568049117553427483524231658933207, time:1750768138.2232552s req_ids:[8] -DEBUG 06-24 20:28:58 [manager.py:391] -DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:28:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:207.01980590820312ms total_cost_time:207.0631980895996ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11975 prompt_cache_len:5151 prompt_cache_ratio:0.43014613778705635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 -DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:58 [batch.py:51] router release req id 8 -INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.1073915958404541 s -INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.1090843677520752 s -DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=55606224309751468707965949573460652671, time:1750768138.4412284s req_ids:[8] -DEBUG 06-24 20:28:58 [manager.py:391] -ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:210.4339599609375ms total_cost_time:210.47568321228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:11976 prompt_cache_len:5151 prompt_cache_ratio:0.43011022044088176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 -DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:58 [batch.py:51] router release req id 8 -INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.10776519775390625 s -INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.10976004600524902 s -DEBUG 06-24 20:28:58 [manager.py:391] Prefill Batch: batch_id=152217960475603516027913100532359953267, time:1750768138.653451s req_ids:[8] -DEBUG 06-24 20:28:58 [manager.py:391] -ERROR 06-24 20:28:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:345.16239166259766ms total_cost_time:345.20983695983887ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:11977 prompt_cache_len:5151 prompt_cache_ratio:0.43007430909242716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 -DEBUG 06-24 20:28:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:58 [batch.py:51] router release req id 8 -INFO 06-24 20:28:58 [manager.py:224] router recive req id 8 cost time 0.10803437232971191 s -INFO 06-24 20:28:58 [manager.py:68] detokenization recv req id 8 cost time 0.10989212989807129 s -DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=48966923906306713373175589894371429371, time:1750768139.0058842s req_ids:[8] -DEBUG 06-24 20:28:59 [manager.py:391] -ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:58 lightllm_req_id:8 first_token_cost:205.68132400512695ms total_cost_time:205.72543144226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11978 prompt_cache_len:5151 prompt_cache_ratio:0.43003840374019037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 -DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:59 [batch.py:51] router release req id 8 -INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.10694766044616699 s -INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.10892820358276367 s -DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=216320136538372147515387893965576566722, time:1750768139.2158518s req_ids:[8] -DEBUG 06-24 20:28:59 [manager.py:391] -ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:222.61357307434082ms total_cost_time:222.6581573486328ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:11979 prompt_cache_len:5151 prompt_cache_ratio:0.4300025043826697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 -DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:59 [batch.py:51] router release req id 8 -INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.10758781433105469 s -INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.1094970703125 s -DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=331044038621114045701302461255031648443, time:1750768139.448361s req_ids:[8] -DEBUG 06-24 20:28:59 [manager.py:391] -ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:209.8381519317627ms total_cost_time:209.8689079284668ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:11980 prompt_cache_len:5151 prompt_cache_ratio:0.4299666110183639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 -DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:59 [batch.py:51] router release req id 8 -INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.1075432300567627 s -INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.10949993133544922 s -DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=113667671425225498233490098555284725959, time:1750768139.671358s req_ids:[8] -DEBUG 06-24 20:28:59 [manager.py:391] -ERROR 06-24 20:28:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:28:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:28:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:230.69238662719727ms total_cost_time:230.73816299438477ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:11981 prompt_cache_len:5151 prompt_cache_ratio:0.42993072364577245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:28:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 -DEBUG 06-24 20:28:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:28:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:28:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:28:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:28:59 [batch.py:51] router release req id 8 -INFO 06-24 20:28:59 [manager.py:224] router recive req id 8 cost time 0.1069028377532959 s -INFO 06-24 20:28:59 [manager.py:68] detokenization recv req id 8 cost time 0.10873532295227051 s -DEBUG 06-24 20:28:59 [manager.py:391] Prefill Batch: batch_id=146945229886474275764179234189008314897, time:1750768139.9196985s req_ids:[8] -DEBUG 06-24 20:28:59 [manager.py:391] -ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:28:59 lightllm_req_id:8 first_token_cost:225.06237030029297ms total_cost_time:225.0981330871582ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:11982 prompt_cache_len:5151 prompt_cache_ratio:0.4298948422633951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 -DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:00 [batch.py:51] router release req id 8 -INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10735011100769043 s -INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.10937213897705078 s -DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=325836289117998328200526547618961335684, time:1750768140.1273289s req_ids:[8] -DEBUG 06-24 20:29:00 [manager.py:391] -ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:206.91776275634766ms total_cost_time:206.97546005249023ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:11983 prompt_cache_len:5151 prompt_cache_ratio:0.42985896686973213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 -DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:00 [batch.py:51] router release req id 8 -INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10825681686401367 s -INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.11023354530334473 s -DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=105004664256981886755727146224252441224, time:1750768140.342499s req_ids:[8] -DEBUG 06-24 20:29:00 [manager.py:391] -ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:380.04326820373535ms total_cost_time:380.07044792175293ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:11984 prompt_cache_len:5151 prompt_cache_ratio:0.4298230974632844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 -DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:00 [batch.py:51] router release req id 8 -INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10687518119812012 s -INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.10879969596862793 s -DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=318198177319329947172993549742013383874, time:1750768140.728171s req_ids:[8] -DEBUG 06-24 20:29:00 [manager.py:391] -ERROR 06-24 20:29:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:211.85755729675293ms total_cost_time:211.90452575683594ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:11985 prompt_cache_len:5151 prompt_cache_ratio:0.4297872340425532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 -DEBUG 06-24 20:29:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:00 [batch.py:51] router release req id 8 -INFO 06-24 20:29:00 [manager.py:224] router recive req id 8 cost time 0.10910916328430176 s -INFO 06-24 20:29:00 [manager.py:68] detokenization recv req id 8 cost time 0.1111607551574707 s -DEBUG 06-24 20:29:00 [manager.py:391] Prefill Batch: batch_id=76821386562948014192899590590822769055, time:1750768140.9437013s req_ids:[8] -DEBUG 06-24 20:29:00 [manager.py:391] -ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:00 lightllm_req_id:8 first_token_cost:206.22014999389648ms total_cost_time:206.2661647796631ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11986 prompt_cache_len:5151 prompt_cache_ratio:0.4297513766060404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 -DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:01 [batch.py:51] router release req id 8 -INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10844159126281738 s -INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s -DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=267626692958540132130191980178323698848, time:1750768141.1602192s req_ids:[8] -DEBUG 06-24 20:29:01 [manager.py:391] -ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:212.80479431152344ms total_cost_time:212.8603458404541ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:11987 prompt_cache_len:5151 prompt_cache_ratio:0.42971552515224826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 -DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:01 [batch.py:51] router release req id 8 -DEBUG 06-24 20:29:01 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:01 [manager.py:283] -DEBUG 06-24 20:29:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:01 [manager.py:284] -INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10789227485656738 s -INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.11007213592529297 s -DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=11444672340918047402183984021272924069, time:1750768141.380888s req_ids:[8] -DEBUG 06-24 20:29:01 [manager.py:391] -ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:229.41064834594727ms total_cost_time:229.45666313171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:11988 prompt_cache_len:5151 prompt_cache_ratio:0.4296796796796797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 -DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:01 [batch.py:51] router release req id 8 -INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10780811309814453 s -INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.1096944808959961 s -DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=212977370290146390311203075431462752876, time:1750768141.6157515s req_ids:[8] -DEBUG 06-24 20:29:01 [manager.py:391] -ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:207.23962783813477ms total_cost_time:207.28468894958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11989 prompt_cache_len:5151 prompt_cache_ratio:0.42964384018683793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 -DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:01 [batch.py:51] router release req id 8 -INFO 06-24 20:29:01 [manager.py:224] router recive req id 8 cost time 0.10717463493347168 s -INFO 06-24 20:29:01 [manager.py:68] detokenization recv req id 8 cost time 0.10912346839904785 s -DEBUG 06-24 20:29:01 [manager.py:391] Prefill Batch: batch_id=58508124273961706056465349581785378507, time:1750768141.8248682s req_ids:[8] -DEBUG 06-24 20:29:01 [manager.py:391] -ERROR 06-24 20:29:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:224.1840362548828ms total_cost_time:224.2283821105957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:11990 prompt_cache_len:5151 prompt_cache_ratio:0.42960800667222687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 -DEBUG 06-24 20:29:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:01 [batch.py:51] router release req id 8 -INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10739445686340332 s -INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.10936617851257324 s -DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=51066918067910285056303256494526161282, time:1750768142.0605953s req_ids:[8] -DEBUG 06-24 20:29:02 [manager.py:391] -ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:01 lightllm_req_id:8 first_token_cost:386.4421844482422ms total_cost_time:386.48486137390137ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:11991 prompt_cache_len:5151 prompt_cache_ratio:0.42957217913435075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 -DEBUG 06-24 20:29:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:02 [batch.py:51] router release req id 8 -INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10811543464660645 s -INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.11005759239196777 s -DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=79179214805524201961806513535136917339, time:1750768142.447242s req_ids:[8] -DEBUG 06-24 20:29:02 [manager.py:391] -ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:203.91392707824707ms total_cost_time:203.95803451538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11992 prompt_cache_len:5151 prompt_cache_ratio:0.4295363575717145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 -DEBUG 06-24 20:29:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:02 [batch.py:51] router release req id 8 -INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10744047164916992 s -INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.1095266342163086 s -DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=13266818944621714751726430285231528355, time:1750768142.657891s req_ids:[8] -DEBUG 06-24 20:29:02 [manager.py:391] -ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:207.23938941955566ms total_cost_time:207.26537704467773ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:11993 prompt_cache_len:5151 prompt_cache_ratio:0.42950054198282334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 -DEBUG 06-24 20:29:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:02 [batch.py:51] router release req id 8 -INFO 06-24 20:29:02 [manager.py:224] router recive req id 8 cost time 0.10683083534240723 s -INFO 06-24 20:29:02 [manager.py:68] detokenization recv req id 8 cost time 0.10882091522216797 s -DEBUG 06-24 20:29:02 [manager.py:391] Prefill Batch: batch_id=320029545402889721960625033169100896847, time:1750768142.8742738s req_ids:[8] -DEBUG 06-24 20:29:02 [manager.py:391] -ERROR 06-24 20:29:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:213.50622177124023ms total_cost_time:213.531494140625ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:11994 prompt_cache_len:5151 prompt_cache_ratio:0.4294647323661831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 -DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:03 [batch.py:51] router release req id 8 -INFO 06-24 20:29:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.10591912269592285 s -INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.10715079307556152 s -DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=119988958224795863777424935459438210855, time:1750768143.092618s req_ids:[8] -DEBUG 06-24 20:29:03 [manager.py:391] -ERROR 06-24 20:29:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:02 lightllm_req_id:8 first_token_cost:217.04697608947754ms total_cost_time:217.0724868774414ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:11995 prompt_cache_len:5151 prompt_cache_ratio:0.42942892872030014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 -DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:03 [batch.py:51] router release req id 8 -INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.106475830078125 s -INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.10770678520202637 s -DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=100171455838181181804051801913479669772, time:1750768143.3204806s req_ids:[8] -DEBUG 06-24 20:29:03 [manager.py:391] -ERROR 06-24 20:29:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 first_token_cost:216.89200401306152ms total_cost_time:216.96114540100098ms,out_token_counter:1 mean_per_token_cost_time: 0.06914138793945312ms prompt_token_num:11996 prompt_cache_len:5151 prompt_cache_ratio:0.4293931310436812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 -DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:03 [batch.py:51] router release req id 8 -INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.10833597183227539 s -INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.11051011085510254 s -DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=175076794872094989025733257240616189561, time:1750768143.5383315s req_ids:[8] -DEBUG 06-24 20:29:03 [manager.py:391] -ERROR 06-24 20:29:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 first_token_cost:204.8318386077881ms total_cost_time:204.87689971923828ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:11997 prompt_cache_len:5151 prompt_cache_ratio:0.4293573393348337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 -DEBUG 06-24 20:29:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:03 [batch.py:51] router release req id 8 -INFO 06-24 20:29:03 [manager.py:224] router recive req id 8 cost time 0.1096961498260498 s -INFO 06-24 20:29:03 [manager.py:68] detokenization recv req id 8 cost time 0.11219978332519531 s -DEBUG 06-24 20:29:03 [manager.py:391] Prefill Batch: batch_id=130083271177277859764878193174491548986, time:1750768143.7494287s req_ids:[8] -DEBUG 06-24 20:29:03 [manager.py:391] -ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:03 lightllm_req_id:8 first_token_cost:405.4999351501465ms total_cost_time:405.5440425872803ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:11998 prompt_cache_len:5151 prompt_cache_ratio:0.42932155359226537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 -DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:04 [batch.py:51] router release req id 8 -INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.1074836254119873 s -INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.10932135581970215 s -DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=327384162895898243084195067892052374415, time:1750768144.1623502s req_ids:[8] -DEBUG 06-24 20:29:04 [manager.py:391] -ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:183.47454071044922ms total_cost_time:183.5179328918457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:11999 prompt_cache_len:5151 prompt_cache_ratio:0.4292857738144845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 -DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:04 [batch.py:51] router release req id 8 -INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10811185836791992 s -INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.11004638671875 s -DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=156339597023225659232311034211781884008, time:1750768144.3497717s req_ids:[8] -DEBUG 06-24 20:29:04 [manager.py:391] -DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:195.55115699768066ms total_cost_time:195.61147689819336ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:12000 prompt_cache_len:5151 prompt_cache_ratio:0.42925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 -DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:04 [batch.py:51] router release req id 8 -INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10849332809448242 s -INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.11047554016113281 s -DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=259177005694312922991527212354066977009, time:1750768144.5484993s req_ids:[8] -DEBUG 06-24 20:29:04 [manager.py:391] -DEBUG 06-24 20:29:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 49969.533 tokens/s -DEBUG 06-24 20:29:04 [stats.py:37] Avg prompt tokens throughput: 49961.193 tokens/s -DEBUG 06-24 20:29:04 [stats.py:37] Avg generate tokens throughput: 8.340 tokens/s -ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:215.5599594116211ms total_cost_time:215.60359001159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12001 prompt_cache_len:5151 prompt_cache_ratio:0.4292142321473211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 -DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:04 [batch.py:51] router release req id 8 -INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10795259475708008 s -INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.10995268821716309 s -DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=114486568357166733845298271866179694713, time:1750768144.7710772s req_ids:[8] -DEBUG 06-24 20:29:04 [manager.py:391] -ERROR 06-24 20:29:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:213.04869651794434ms total_cost_time:213.10973167419434ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12002 prompt_cache_len:5151 prompt_cache_ratio:0.42917847025495753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 -DEBUG 06-24 20:29:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:04 [batch.py:51] router release req id 8 -INFO 06-24 20:29:04 [manager.py:224] router recive req id 8 cost time 0.10711002349853516 s -INFO 06-24 20:29:04 [manager.py:68] detokenization recv req id 8 cost time 0.1092214584350586 s -DEBUG 06-24 20:29:04 [manager.py:391] Prefill Batch: batch_id=245066641539354190372768831541904874427, time:1750768144.9931011s req_ids:[8] -DEBUG 06-24 20:29:04 [manager.py:391] -ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:04 lightllm_req_id:8 first_token_cost:215.86322784423828ms total_cost_time:215.90876579284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12003 prompt_cache_len:5151 prompt_cache_ratio:0.42914271432141965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 -DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:05 [batch.py:51] router release req id 8 -INFO 06-24 20:29:05 [manager.py:224] router recive req id 8 cost time 0.10851883888244629 s -INFO 06-24 20:29:05 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s -DEBUG 06-24 20:29:05 [manager.py:391] Prefill Batch: batch_id=206808503113083642025994083146073104112, time:1750768145.2141945s req_ids:[8] -DEBUG 06-24 20:29:05 [manager.py:391] -ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:215.68012237548828ms total_cost_time:215.72494506835938ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12004 prompt_cache_len:5151 prompt_cache_ratio:0.4291069643452183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 -DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:05 [batch.py:51] router release req id 8 -INFO 06-24 20:29:05 [manager.py:224] router recive req id 8 cost time 0.3118901252746582 s -INFO 06-24 20:29:05 [manager.py:68] detokenization recv req id 8 cost time 0.31380271911621094 s -DEBUG 06-24 20:29:05 [manager.py:391] Prefill Batch: batch_id=121136995746701749841183191537177903345, time:1750768145.6369538s req_ids:[8] -DEBUG 06-24 20:29:05 [manager.py:391] -ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:393.2313919067383ms total_cost_time:393.27549934387207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12005 prompt_cache_len:5151 prompt_cache_ratio:0.42907122032486467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 -DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:05 [batch.py:51] router release req id 8 -INFO 06-24 20:29:05 [manager.py:224] router recive req id 8 cost time 0.10801410675048828 s -INFO 06-24 20:29:05 [manager.py:68] detokenization recv req id 8 cost time 0.10997295379638672 s -DEBUG 06-24 20:29:05 [manager.py:391] Prefill Batch: batch_id=8413969729335408194508939558546892983, time:1750768145.8373215s req_ids:[8] -DEBUG 06-24 20:29:05 [manager.py:391] -ERROR 06-24 20:29:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:212.3887538909912ms total_cost_time:212.4350070953369ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12006 prompt_cache_len:5151 prompt_cache_ratio:0.42903548225887056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 -DEBUG 06-24 20:29:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:05 [batch.py:51] router release req id 8 -INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10972046852111816 s -INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11176323890686035 s -DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=19542191938045469950862621955606415612, time:1750768146.0523705s req_ids:[8] -DEBUG 06-24 20:29:06 [manager.py:391] -ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:05 lightllm_req_id:8 first_token_cost:211.68088912963867ms total_cost_time:211.72618865966797ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12007 prompt_cache_len:5151 prompt_cache_ratio:0.4289997501457483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 -DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:06 [batch.py:51] router release req id 8 -INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10818195343017578 s -INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.10930180549621582 s -DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=95407010081928218771993102772963095359, time:1750768146.2953484s req_ids:[8] -DEBUG 06-24 20:29:06 [manager.py:391] -ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:235.61954498291016ms total_cost_time:235.66317558288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12008 prompt_cache_len:5151 prompt_cache_ratio:0.4289640239840107 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 -DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:06 [batch.py:51] router release req id 8 -INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10865092277526855 s -INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11003971099853516 s -DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=269685452943100156085240263733822960384, time:1750768146.5354066s req_ids:[8] -DEBUG 06-24 20:29:06 [manager.py:391] -ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:225.0828742980957ms total_cost_time:225.1286506652832ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12009 prompt_cache_len:5151 prompt_cache_ratio:0.4289283037721709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 -DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:06 [batch.py:51] router release req id 8 -INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10861396789550781 s -INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11070990562438965 s -DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=15199493082351808299793569383798071069, time:1750768146.7585313s req_ids:[8] -DEBUG 06-24 20:29:06 [manager.py:391] -ERROR 06-24 20:29:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:222.34559059143066ms total_cost_time:222.39351272583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:12010 prompt_cache_len:5151 prompt_cache_ratio:0.4288925895087427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 -DEBUG 06-24 20:29:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:06 [batch.py:51] router release req id 8 -INFO 06-24 20:29:06 [manager.py:224] router recive req id 8 cost time 0.10854554176330566 s -INFO 06-24 20:29:06 [manager.py:68] detokenization recv req id 8 cost time 0.11081624031066895 s -DEBUG 06-24 20:29:06 [manager.py:391] Prefill Batch: batch_id=247672839698063844752436114003918206221, time:1750768146.9780264s req_ids:[8] -DEBUG 06-24 20:29:06 [manager.py:391] -ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:06 lightllm_req_id:8 first_token_cost:423.6428737640381ms total_cost_time:423.7046241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:12011 prompt_cache_len:5151 prompt_cache_ratio:0.42885688119224047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 -DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:07 [batch.py:51] router release req id 8 -INFO 06-24 20:29:07 [manager.py:224] router recive req id 8 cost time 0.10865068435668945 s -INFO 06-24 20:29:07 [manager.py:68] detokenization recv req id 8 cost time 0.11075425148010254 s -DEBUG 06-24 20:29:07 [manager.py:391] Prefill Batch: batch_id=80163403286976088376911342295070824939, time:1750768147.4084656s req_ids:[8] -DEBUG 06-24 20:29:07 [manager.py:391] -ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:191.2829875946045ms total_cost_time:191.3473606109619ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:12012 prompt_cache_len:5151 prompt_cache_ratio:0.4288211788211788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 -DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:07 [batch.py:51] router release req id 8 -INFO 06-24 20:29:07 [manager.py:224] router recive req id 8 cost time 0.10842061042785645 s -INFO 06-24 20:29:07 [manager.py:68] detokenization recv req id 8 cost time 0.11057209968566895 s -DEBUG 06-24 20:29:07 [manager.py:391] Prefill Batch: batch_id=133095015546961401256545974772499051861, time:1750768147.6049411s req_ids:[8] -DEBUG 06-24 20:29:07 [manager.py:391] -ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:209.3672752380371ms total_cost_time:209.4137668609619ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12013 prompt_cache_len:5151 prompt_cache_ratio:0.4287854823940731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 -DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:07 [batch.py:51] router release req id 8 -INFO 06-24 20:29:07 [manager.py:224] router recive req id 8 cost time 0.10808062553405762 s -INFO 06-24 20:29:07 [manager.py:68] detokenization recv req id 8 cost time 0.10911035537719727 s -DEBUG 06-24 20:29:07 [manager.py:391] Prefill Batch: batch_id=140641072025147824017690391817133506494, time:1750768147.8322158s req_ids:[8] -DEBUG 06-24 20:29:07 [manager.py:391] -ERROR 06-24 20:29:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:220.91937065124512ms total_cost_time:220.96490859985352ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12014 prompt_cache_len:5151 prompt_cache_ratio:0.428749791909439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 -DEBUG 06-24 20:29:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:07 [batch.py:51] router release req id 8 -INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.108184814453125 s -INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.11020946502685547 s -DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=52815742649395797252158054318416893819, time:1750768148.0522652s req_ids:[8] -DEBUG 06-24 20:29:08 [manager.py:391] -ERROR 06-24 20:29:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:07 lightllm_req_id:8 first_token_cost:211.8208408355713ms total_cost_time:211.8673324584961ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12015 prompt_cache_len:5151 prompt_cache_ratio:0.42871410736579274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 -DEBUG 06-24 20:29:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:08 [batch.py:51] router release req id 8 -INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.10674309730529785 s -INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.10875582695007324 s -DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=321049370361993069559888626532201931712, time:1750768148.2929409s req_ids:[8] -DEBUG 06-24 20:29:08 [manager.py:391] -ERROR 06-24 20:29:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 first_token_cost:240.81873893737793ms total_cost_time:240.8616542816162ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12016 prompt_cache_len:5151 prompt_cache_ratio:0.42867842876165113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 -DEBUG 06-24 20:29:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:08 [batch.py:51] router release req id 8 -INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.11030960083007812 s -INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.11245393753051758 s -DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=86860259430355401702264045010013662451, time:1750768148.5394244s req_ids:[8] -DEBUG 06-24 20:29:08 [manager.py:391] -ERROR 06-24 20:29:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 first_token_cost:236.26470565795898ms total_cost_time:236.30595207214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12017 prompt_cache_len:5151 prompt_cache_ratio:0.4286427560955313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 -DEBUG 06-24 20:29:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:08 [batch.py:51] router release req id 8 -INFO 06-24 20:29:08 [manager.py:224] router recive req id 8 cost time 0.10876250267028809 s -INFO 06-24 20:29:08 [manager.py:68] detokenization recv req id 8 cost time 0.1109161376953125 s -DEBUG 06-24 20:29:08 [manager.py:391] Prefill Batch: batch_id=793972570785084083736208901986077079, time:1750768148.7645762s req_ids:[8] -DEBUG 06-24 20:29:08 [manager.py:391] -ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:08 lightllm_req_id:8 first_token_cost:399.9342918395996ms total_cost_time:399.9795913696289ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12018 prompt_cache_len:5151 prompt_cache_ratio:0.4286070893659511 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 -DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:09 [batch.py:51] router release req id 8 -INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s -INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.11112403869628906 s -DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=128765853299447464181801748618125117338, time:1750768149.1748664s req_ids:[8] -DEBUG 06-24 20:29:09 [manager.py:391] -ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:221.8005657196045ms total_cost_time:221.8470573425293ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12019 prompt_cache_len:5151 prompt_cache_ratio:0.42857142857142855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 -DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:09 [batch.py:51] router release req id 8 -INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10792899131774902 s -INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.10940837860107422 s -DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=56849835682524159301660500157799077454, time:1750768149.4053035s req_ids:[8] -DEBUG 06-24 20:29:09 [manager.py:391] -ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:213.84000778198242ms total_cost_time:213.88602256774902ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12020 prompt_cache_len:5151 prompt_cache_ratio:0.4285357737104825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 -DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:09 [batch.py:51] router release req id 8 -INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10712933540344238 s -INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.10871577262878418 s -DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=59376223215732959262459505900379532525, time:1750768149.6104329s req_ids:[8] -DEBUG 06-24 20:29:09 [manager.py:391] -ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:188.57645988464355ms total_cost_time:188.6000633239746ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12021 prompt_cache_len:5151 prompt_cache_ratio:0.42850012478163213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 -DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:09 [batch.py:51] router release req id 8 -INFO 06-24 20:29:09 [manager.py:224] router recive req id 8 cost time 0.10310220718383789 s -INFO 06-24 20:29:09 [manager.py:68] detokenization recv req id 8 cost time 0.10462784767150879 s -DEBUG 06-24 20:29:09 [manager.py:391] Prefill Batch: batch_id=44788431383541676326584608368813368494, time:1750768149.8205647s req_ids:[8] -DEBUG 06-24 20:29:09 [manager.py:391] -ERROR 06-24 20:29:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:221.53973579406738ms total_cost_time:221.5728759765625ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:12022 prompt_cache_len:5151 prompt_cache_ratio:0.4284644817833971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 -DEBUG 06-24 20:29:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:09 [batch.py:51] router release req id 8 -INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.1052103042602539 s -INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10740923881530762 s -DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=66917883417043659105248542192226972827, time:1750768150.0533133s req_ids:[8] -DEBUG 06-24 20:29:10 [manager.py:391] -ERROR 06-24 20:29:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:09 lightllm_req_id:8 first_token_cost:233.32548141479492ms total_cost_time:233.34813117980957ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:12023 prompt_cache_len:5151 prompt_cache_ratio:0.4284288447142976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 -DEBUG 06-24 20:29:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:10 [batch.py:51] router release req id 8 -INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.1051032543182373 s -INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10729074478149414 s -DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=176253158548196712953843253094605844050, time:1750768150.2896163s req_ids:[8] -DEBUG 06-24 20:29:10 [manager.py:391] -ERROR 06-24 20:29:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 first_token_cost:228.27434539794922ms total_cost_time:228.29914093017578ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12024 prompt_cache_len:5151 prompt_cache_ratio:0.4283932135728543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 -DEBUG 06-24 20:29:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:10 [batch.py:51] router release req id 8 -INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.10510611534118652 s -INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10730719566345215 s -DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=240172713313360568779019341364643632157, time:1750768150.5266142s req_ids:[8] -DEBUG 06-24 20:29:10 [manager.py:391] -ERROR 06-24 20:29:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 first_token_cost:444.75793838500977ms total_cost_time:444.7815418243408ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12025 prompt_cache_len:5151 prompt_cache_ratio:0.42835758835758836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 -DEBUG 06-24 20:29:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:10 [batch.py:51] router release req id 8 -INFO 06-24 20:29:10 [manager.py:224] router recive req id 8 cost time 0.10425353050231934 s -INFO 06-24 20:29:10 [manager.py:68] detokenization recv req id 8 cost time 0.10620880126953125 s -DEBUG 06-24 20:29:10 [manager.py:391] Prefill Batch: batch_id=289666635358025803404998151710080379977, time:1750768150.9643679s req_ids:[8] -DEBUG 06-24 20:29:10 [manager.py:391] -ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:10 lightllm_req_id:8 first_token_cost:216.5670394897461ms total_cost_time:216.6142463684082ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12026 prompt_cache_len:5151 prompt_cache_ratio:0.42832196906702147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 -DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:11 [batch.py:51] router release req id 8 -INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.10611534118652344 s -INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.10829520225524902 s -DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=316442442297969193194089483756271252402, time:1750768151.2001643s req_ids:[8] -DEBUG 06-24 20:29:11 [manager.py:391] -ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:235.08310317993164ms total_cost_time:235.1076602935791ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:12027 prompt_cache_len:5151 prompt_cache_ratio:0.4282863556996757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 -DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:11 [batch.py:51] router release req id 8 -INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.1050877571105957 s -INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.10724759101867676 s -DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=109565752340182290113671562410930437067, time:1750768151.4411745s req_ids:[8] -DEBUG 06-24 20:29:11 [manager.py:391] -ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:235.5632781982422ms total_cost_time:235.58545112609863ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12028 prompt_cache_len:5151 prompt_cache_ratio:0.42825074825407383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 -DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:11 [batch.py:51] router release req id 8 -INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.1059572696685791 s -INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.108154296875 s -DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=219929110004615796080298254369893575243, time:1750768151.664816s req_ids:[8] -DEBUG 06-24 20:29:11 [manager.py:391] -ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:213.2554054260254ms total_cost_time:213.3004665374756ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12029 prompt_cache_len:5151 prompt_cache_ratio:0.4282151467287389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 -DEBUG 06-24 20:29:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:11 [batch.py:51] router release req id 8 -INFO 06-24 20:29:11 [manager.py:224] router recive req id 8 cost time 0.1102914810180664 s -INFO 06-24 20:29:11 [manager.py:68] detokenization recv req id 8 cost time 0.11255836486816406 s -DEBUG 06-24 20:29:11 [manager.py:391] Prefill Batch: batch_id=222455836025050646891414014265765181027, time:1750768151.895965s req_ids:[8] -DEBUG 06-24 20:29:11 [manager.py:391] -ERROR 06-24 20:29:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:11 lightllm_req_id:8 first_token_cost:231.51803016662598ms total_cost_time:231.54282569885254ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12030 prompt_cache_len:5151 prompt_cache_ratio:0.4281795511221945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 -DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:12 [batch.py:51] router release req id 8 -INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10507941246032715 s -INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10720467567443848 s -DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=58720643316398380131466173958936005866, time:1750768152.1218276s req_ids:[8] -DEBUG 06-24 20:29:12 [manager.py:391] -ERROR 06-24 20:29:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:217.71740913391113ms total_cost_time:217.75102615356445ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:12031 prompt_cache_len:5151 prompt_cache_ratio:0.42814396143296485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 -DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:12 [batch.py:51] router release req id 8 -INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10420107841491699 s -INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10635113716125488 s -DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=112135186220016747848402029810673593702, time:1750768152.3421726s req_ids:[8] -DEBUG 06-24 20:29:12 [manager.py:391] -ERROR 06-24 20:29:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:366.2266731262207ms total_cost_time:366.2524223327637ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:12032 prompt_cache_len:5151 prompt_cache_ratio:0.4281083776595745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 -DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:12 [batch.py:51] router release req id 8 -INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10545206069946289 s -INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10729169845581055 s -DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=65361160067735883182817186396964541122, time:1750768152.7145388s req_ids:[8] -DEBUG 06-24 20:29:12 [manager.py:391] -ERROR 06-24 20:29:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:217.12827682495117ms total_cost_time:217.15593338012695ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:12033 prompt_cache_len:5151 prompt_cache_ratio:0.42807279980054846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 -DEBUG 06-24 20:29:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:12 [batch.py:51] router release req id 8 -INFO 06-24 20:29:12 [manager.py:224] router recive req id 8 cost time 0.10755681991577148 s -INFO 06-24 20:29:12 [manager.py:68] detokenization recv req id 8 cost time 0.10973072052001953 s -DEBUG 06-24 20:29:12 [manager.py:391] Prefill Batch: batch_id=195909627552150442832864324375666989883, time:1750768152.94206s req_ids:[8] -DEBUG 06-24 20:29:12 [manager.py:391] -ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:12 lightllm_req_id:8 first_token_cost:234.0710163116455ms total_cost_time:234.09080505371094ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12034 prompt_cache_len:5151 prompt_cache_ratio:0.4280372278544125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 -DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:13 [batch.py:51] router release req id 8 -INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10854029655456543 s -INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.11075067520141602 s -DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=108602508995428510405783371312931566988, time:1750768153.176855s req_ids:[8] -DEBUG 06-24 20:29:13 [manager.py:391] -ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:206.13527297973633ms total_cost_time:206.15577697753906ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12035 prompt_cache_len:5151 prompt_cache_ratio:0.42800166181969257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 -DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:13 [batch.py:51] router release req id 8 -INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10504484176635742 s -INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.10699295997619629 s -DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=210828453444395192216928843050162273174, time:1750768153.3883357s req_ids:[8] -DEBUG 06-24 20:29:13 [manager.py:391] -ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:210.9203338623047ms total_cost_time:210.94107627868652ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12036 prompt_cache_len:5151 prompt_cache_ratio:0.4279661016949153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 -DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:13 [batch.py:51] router release req id 8 -INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10497140884399414 s -INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.10689210891723633 s -DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=191768512400196359097141788981957252732, time:1750768153.6023083s req_ids:[8] -DEBUG 06-24 20:29:13 [manager.py:391] -ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:213.87457847595215ms total_cost_time:213.89389038085938ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:12037 prompt_cache_len:5151 prompt_cache_ratio:0.42793054747860765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 -DEBUG 06-24 20:29:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:13 [batch.py:51] router release req id 8 -INFO 06-24 20:29:13 [manager.py:224] router recive req id 8 cost time 0.10607290267944336 s -INFO 06-24 20:29:13 [manager.py:68] detokenization recv req id 8 cost time 0.10805654525756836 s -DEBUG 06-24 20:29:13 [manager.py:391] Prefill Batch: batch_id=229931616283184504954360598259935564468, time:1750768153.8209498s req_ids:[8] -DEBUG 06-24 20:29:13 [manager.py:391] -ERROR 06-24 20:29:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:203.6118507385254ms total_cost_time:203.65500450134277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12038 prompt_cache_len:5151 prompt_cache_ratio:0.42789499916929724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 -DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:14 [batch.py:51] router release req id 8 -INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.30750536918640137 s -DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=318304723762559595147455348375842731295, time:1750768154.2220309s req_ids:[8] -DEBUG 06-24 20:29:14 [manager.py:391] -INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.3097724914550781 s -ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:13 lightllm_req_id:8 first_token_cost:415.8947467803955ms total_cost_time:415.91358184814453ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:12039 prompt_cache_len:5151 prompt_cache_ratio:0.42785945676551207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 -DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:14 [batch.py:51] router release req id 8 -INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.10582423210144043 s -INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.10779023170471191 s -DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=215419642741544631244834772759152345221, time:1750768154.4667826s req_ids:[8] -DEBUG 06-24 20:29:14 [manager.py:391] -ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:29:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 46881.166 tokens/s -DEBUG 06-24 20:29:14 [stats.py:37] Avg prompt tokens throughput: 46873.267 tokens/s -DEBUG 06-24 20:29:14 [stats.py:37] Avg generate tokens throughput: 7.899 tokens/s -INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:218.75977516174316ms total_cost_time:218.80483627319336ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12040 prompt_cache_len:5151 prompt_cache_ratio:0.42782392026578075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 -DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:14 [batch.py:51] router release req id 8 -INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.10828018188476562 s -INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.11031985282897949 s -DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=71084735116213594234740656544589837216, time:1750768154.6732526s req_ids:[8] -DEBUG 06-24 20:29:14 [manager.py:391] -ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:207.20338821411133ms total_cost_time:207.24916458129883ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12041 prompt_cache_len:5151 prompt_cache_ratio:0.4277883896686322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 -DEBUG 06-24 20:29:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:14 [batch.py:51] router release req id 8 -INFO 06-24 20:29:14 [manager.py:224] router recive req id 8 cost time 0.10753011703491211 s -INFO 06-24 20:29:14 [manager.py:68] detokenization recv req id 8 cost time 0.10960030555725098 s -DEBUG 06-24 20:29:14 [manager.py:391] Prefill Batch: batch_id=203265624047166060000088244312364300826, time:1750768154.887515s req_ids:[8] -DEBUG 06-24 20:29:14 [manager.py:391] -ERROR 06-24 20:29:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:212.7835750579834ms total_cost_time:212.82696723937988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12042 prompt_cache_len:5151 prompt_cache_ratio:0.4277528649725959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 -DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:15 [batch.py:51] router release req id 8 -INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.1079103946685791 s -INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.1097860336303711 s -DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=37472561879213758118610936963017647562, time:1750768155.112771s req_ids:[8] -DEBUG 06-24 20:29:15 [manager.py:391] -ERROR 06-24 20:29:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:14 lightllm_req_id:8 first_token_cost:214.60509300231934ms total_cost_time:214.71452713012695ms,out_token_counter:1 mean_per_token_cost_time: 0.10943412780761719ms prompt_token_num:12043 prompt_cache_len:5151 prompt_cache_ratio:0.42771734617620194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 -DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:15 [batch.py:51] router release req id 8 -INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.10738062858581543 s -INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.10923957824707031 s -DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=2177836956162719931539281275010271740, time:1750768155.3265784s req_ids:[8] -DEBUG 06-24 20:29:15 [manager.py:391] -DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 first_token_cost:202.99506187438965ms total_cost_time:203.03988456726074ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12044 prompt_cache_len:5151 prompt_cache_ratio:0.42768183327798076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 -DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:15 [batch.py:51] router release req id 8 -INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.10605907440185547 s -INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.10773134231567383 s -DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=70371748387237070635964710645186554897, time:1750768155.537984s req_ids:[8] -DEBUG 06-24 20:29:15 [manager.py:391] -ERROR 06-24 20:29:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 first_token_cost:450.99520683288574ms total_cost_time:451.0171413421631ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12045 prompt_cache_len:5151 prompt_cache_ratio:0.42764632627646326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 -DEBUG 06-24 20:29:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:15 [batch.py:51] router release req id 8 -INFO 06-24 20:29:15 [manager.py:224] router recive req id 8 cost time 0.10618162155151367 s -INFO 06-24 20:29:15 [manager.py:68] detokenization recv req id 8 cost time 0.10750532150268555 s -DEBUG 06-24 20:29:15 [manager.py:391] Prefill Batch: batch_id=60602649264901184097240858216265112250, time:1750768155.9953952s req_ids:[8] -DEBUG 06-24 20:29:15 [manager.py:391] -ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:15 lightllm_req_id:8 first_token_cost:213.62042427062988ms total_cost_time:213.66524696350098ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12046 prompt_cache_len:5151 prompt_cache_ratio:0.42761082517018095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 -DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:16 [batch.py:51] router release req id 8 -INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.10809683799743652 s -INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.11012387275695801 s -DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=37680172604040798269840570150761587201, time:1750768156.212346s req_ids:[8] -DEBUG 06-24 20:29:16 [manager.py:391] -ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:203.14502716064453ms total_cost_time:203.18984985351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12047 prompt_cache_len:5151 prompt_cache_ratio:0.4275753299576658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 -DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:16 [batch.py:51] router release req id 8 -INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.1084442138671875 s -INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s -DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=208199708294903876538876743869264051722, time:1750768156.42323s req_ids:[8] -DEBUG 06-24 20:29:16 [manager.py:391] -ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:210.39390563964844ms total_cost_time:210.43658256530762ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12048 prompt_cache_len:5151 prompt_cache_ratio:0.4275398406374502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 -DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:16 [batch.py:51] router release req id 8 -INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.10856938362121582 s -INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.11047863960266113 s -DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=92822937002517172786060448355481926368, time:1750768156.6393826s req_ids:[8] -DEBUG 06-24 20:29:16 [manager.py:391] -ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:221.8616008758545ms total_cost_time:221.91429138183594ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:12049 prompt_cache_len:5151 prompt_cache_ratio:0.42750435720806707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 -DEBUG 06-24 20:29:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:16 [batch.py:51] router release req id 8 -INFO 06-24 20:29:16 [manager.py:224] router recive req id 8 cost time 0.10782694816589355 s -INFO 06-24 20:29:16 [manager.py:68] detokenization recv req id 8 cost time 0.1097097396850586 s -DEBUG 06-24 20:29:16 [manager.py:391] Prefill Batch: batch_id=310947837100557592641503380457711807419, time:1750768156.8919666s req_ids:[8] -DEBUG 06-24 20:29:16 [manager.py:391] -ERROR 06-24 20:29:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:229.64930534362793ms total_cost_time:229.69317436218262ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12050 prompt_cache_len:5151 prompt_cache_ratio:0.4274688796680498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 -DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:17 [batch.py:51] router release req id 8 -INFO 06-24 20:29:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10872602462768555 s -INFO 06-24 20:29:17 [manager.py:68] detokenization recv req id 8 cost time 0.11072182655334473 s -DEBUG 06-24 20:29:17 [manager.py:391] Prefill Batch: batch_id=305620391520721147686760542501942210076, time:1750768157.1050603s req_ids:[8] -DEBUG 06-24 20:29:17 [manager.py:391] -ERROR 06-24 20:29:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:16 lightllm_req_id:8 first_token_cost:221.31872177124023ms total_cost_time:221.36282920837402ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12051 prompt_cache_len:5151 prompt_cache_ratio:0.42743340801593227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 -DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:17 [batch.py:51] router release req id 8 -INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10784482955932617 s -INFO 06-24 20:29:17 [manager.py:68] detokenization recv req id 8 cost time 0.10984420776367188 s -DEBUG 06-24 20:29:17 [manager.py:391] Prefill Batch: batch_id=313431163150635069735657004655510782201, time:1750768157.3480477s req_ids:[8] -DEBUG 06-24 20:29:17 [manager.py:391] -ERROR 06-24 20:29:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 first_token_cost:471.56262397766113ms total_cost_time:471.6055393218994ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12052 prompt_cache_len:5151 prompt_cache_ratio:0.4273979422502489 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 -DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:17 [batch.py:51] router release req id 8 -INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10857510566711426 s -INFO 06-24 20:29:17 [manager.py:68] detokenization recv req id 8 cost time 0.11044025421142578 s -DEBUG 06-24 20:29:17 [manager.py:391] Prefill Batch: batch_id=300533308143897871514302169705225126671, time:1750768157.8111422s req_ids:[8] -DEBUG 06-24 20:29:17 [manager.py:391] -ERROR 06-24 20:29:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 first_token_cost:195.1589584350586ms total_cost_time:195.18280029296875ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:12053 prompt_cache_len:5151 prompt_cache_ratio:0.42736248236953456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 -DEBUG 06-24 20:29:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:17 [batch.py:51] router release req id 8 -INFO 06-24 20:29:17 [manager.py:224] router recive req id 8 cost time 0.10557794570922852 s -INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.10748457908630371 s -DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=324349048255213809808410296926647616750, time:1750768158.0120595s req_ids:[8] -DEBUG 06-24 20:29:18 [manager.py:391] -ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:17 lightllm_req_id:8 first_token_cost:213.00768852233887ms total_cost_time:213.04988861083984ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12054 prompt_cache_len:5151 prompt_cache_ratio:0.42732702837232456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 -DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:18 [batch.py:51] router release req id 8 -INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s -INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.11133146286010742 s -DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=253957478304694350385564161484063562088, time:1750768158.229874s req_ids:[8] -DEBUG 06-24 20:29:18 [manager.py:391] -ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:211.3940715789795ms total_cost_time:211.44723892211914ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12055 prompt_cache_len:5151 prompt_cache_ratio:0.4272915802571547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 -DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:18 [batch.py:51] router release req id 8 -INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10811996459960938 s -INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.1102139949798584 s -DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=110474368305055151758111211003144719934, time:1750768158.4484684s req_ids:[8] -DEBUG 06-24 20:29:18 [manager.py:391] -ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:210.2830410003662ms total_cost_time:210.30688285827637ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:12056 prompt_cache_len:5151 prompt_cache_ratio:0.4272561380225614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 -DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:18 [batch.py:51] router release req id 8 -INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10441327095031738 s -INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.1064596176147461 s -DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=48845524495415014801840548309088550512, time:1750768158.6622002s req_ids:[8] -DEBUG 06-24 20:29:18 [manager.py:391] -ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:209.4719409942627ms total_cost_time:209.49554443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12057 prompt_cache_len:5151 prompt_cache_ratio:0.42722070166708137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 -DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:18 [batch.py:51] router release req id 8 -INFO 06-24 20:29:18 [manager.py:224] router recive req id 8 cost time 0.10660529136657715 s -INFO 06-24 20:29:18 [manager.py:68] detokenization recv req id 8 cost time 0.10858416557312012 s -DEBUG 06-24 20:29:18 [manager.py:391] Prefill Batch: batch_id=131436859228488649833513952173289443574, time:1750768158.8765733s req_ids:[8] -DEBUG 06-24 20:29:18 [manager.py:391] -ERROR 06-24 20:29:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:204.92243766784668ms total_cost_time:204.96630668640137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12058 prompt_cache_len:5151 prompt_cache_ratio:0.4271852711892519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 -DEBUG 06-24 20:29:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:18 [batch.py:51] router release req id 8 -INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10883808135986328 s -INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.11071658134460449 s -DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=162984558546202167601570167255323412228, time:1750768159.086143s req_ids:[8] -DEBUG 06-24 20:29:19 [manager.py:391] -ERROR 06-24 20:29:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:18 lightllm_req_id:8 first_token_cost:457.5049877166748ms total_cost_time:457.52835273742676ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:12059 prompt_cache_len:5151 prompt_cache_ratio:0.4271498465876109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 -DEBUG 06-24 20:29:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:19 [batch.py:51] router release req id 8 -INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10425853729248047 s -INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.10607409477233887 s -DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=131598932469307510893186364922493749113, time:1750768159.5529115s req_ids:[8] -DEBUG 06-24 20:29:19 [manager.py:391] -ERROR 06-24 20:29:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 first_token_cost:198.00806045532227ms total_cost_time:198.03214073181152ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:12060 prompt_cache_len:5151 prompt_cache_ratio:0.42711442786069653 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 -DEBUG 06-24 20:29:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:19 [batch.py:51] router release req id 8 -INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10450339317321777 s -INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.10643696784973145 s -DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=277817653406641260519340770778147166251, time:1750768159.7655668s req_ids:[8] -DEBUG 06-24 20:29:19 [manager.py:391] -ERROR 06-24 20:29:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 first_token_cost:237.59913444519043ms total_cost_time:237.62273788452148ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12061 prompt_cache_len:5151 prompt_cache_ratio:0.42707901500704754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 -DEBUG 06-24 20:29:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:19 [batch.py:51] router release req id 8 -INFO 06-24 20:29:19 [manager.py:224] router recive req id 8 cost time 0.10444092750549316 s -INFO 06-24 20:29:19 [manager.py:68] detokenization recv req id 8 cost time 0.10648751258850098 s -DEBUG 06-24 20:29:19 [manager.py:391] Prefill Batch: batch_id=34021106094258512512574366866143348040, time:1750768159.989338s req_ids:[8] -DEBUG 06-24 20:29:19 [manager.py:391] -ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:19 lightllm_req_id:8 first_token_cost:182.51562118530273ms total_cost_time:182.5399398803711ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12062 prompt_cache_len:5151 prompt_cache_ratio:0.42704360802520314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 -DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:20 [batch.py:51] router release req id 8 -INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.1044015884399414 s -INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.10629606246948242 s -DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=119329534314632534343580831602930570378, time:1750768160.1847932s req_ids:[8] -DEBUG 06-24 20:29:20 [manager.py:391] -DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:208.95981788635254ms total_cost_time:208.9846134185791ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12063 prompt_cache_len:5151 prompt_cache_ratio:0.42700820691370306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 -DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:20 [batch.py:51] router release req id 8 -INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.10443496704101562 s -INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.10647106170654297 s -DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=11231435938311089827499400375638693969, time:1750768160.3993225s req_ids:[8] -DEBUG 06-24 20:29:20 [manager.py:391] -ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:207.0763111114502ms total_cost_time:207.09991455078125ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:12064 prompt_cache_len:5151 prompt_cache_ratio:0.4269728116710875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 -DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:20 [batch.py:51] router release req id 8 -INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.1066441535949707 s -INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.10855984687805176 s -DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=320422109395247778923031136796160633754, time:1750768160.616411s req_ids:[8] -DEBUG 06-24 20:29:20 [manager.py:391] -ERROR 06-24 20:29:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:216.69864654541016ms total_cost_time:216.75992012023926ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12065 prompt_cache_len:5151 prompt_cache_ratio:0.4269374222958972 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 -DEBUG 06-24 20:29:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:20 [batch.py:51] router release req id 8 -INFO 06-24 20:29:20 [manager.py:224] router recive req id 8 cost time 0.10817170143127441 s -INFO 06-24 20:29:20 [manager.py:68] detokenization recv req id 8 cost time 0.11020517349243164 s -DEBUG 06-24 20:29:20 [manager.py:391] Prefill Batch: batch_id=159022458051294909731831602136372019600, time:1750768160.8344412s req_ids:[8] -DEBUG 06-24 20:29:20 [manager.py:391] -ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:20 lightllm_req_id:8 first_token_cost:412.28389739990234ms total_cost_time:412.32776641845703ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12066 prompt_cache_len:5151 prompt_cache_ratio:0.4269020387866733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 -DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:21 [batch.py:51] router release req id 8 -INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.10825705528259277 s -INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s -DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=214715682293418638077830333096195373968, time:1750768161.2534053s req_ids:[8] -DEBUG 06-24 20:29:21 [manager.py:391] -ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:204.35357093811035ms total_cost_time:204.39863204956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12067 prompt_cache_len:5151 prompt_cache_ratio:0.4268666611419574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 -DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:21 [batch.py:51] router release req id 8 -INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.10783958435058594 s -INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.10970735549926758 s -DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=172864066548881656145281451871871722721, time:1750768161.4623842s req_ids:[8] -DEBUG 06-24 20:29:21 [manager.py:391] -ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:206.62569999694824ms total_cost_time:206.67099952697754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12068 prompt_cache_len:5151 prompt_cache_ratio:0.4268312893602917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 -DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:21 [batch.py:51] router release req id 8 -INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.10787320137023926 s -INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.10995841026306152 s -DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=208200350788771513456819010406465496911, time:1750768161.680891s req_ids:[8] -DEBUG 06-24 20:29:21 [manager.py:391] -ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:210.99209785461426ms total_cost_time:211.0445499420166ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:12069 prompt_cache_len:5151 prompt_cache_ratio:0.4267959234402187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 -DEBUG 06-24 20:29:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:21 [batch.py:51] router release req id 8 -INFO 06-24 20:29:21 [manager.py:224] router recive req id 8 cost time 0.1087183952331543 s -INFO 06-24 20:29:21 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s -DEBUG 06-24 20:29:21 [manager.py:391] Prefill Batch: batch_id=72467525143756329508687410543427155130, time:1750768161.8967774s req_ids:[8] -DEBUG 06-24 20:29:21 [manager.py:391] -ERROR 06-24 20:29:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:21 lightllm_req_id:8 first_token_cost:212.36205101013184ms total_cost_time:212.40711212158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12070 prompt_cache_len:5151 prompt_cache_ratio:0.4267605633802817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 -DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:22 [batch.py:51] router release req id 8 -INFO 06-24 20:29:22 [manager.py:224] router recive req id 8 cost time 0.10986876487731934 s -INFO 06-24 20:29:22 [manager.py:68] detokenization recv req id 8 cost time 0.11180472373962402 s -DEBUG 06-24 20:29:22 [manager.py:391] Prefill Batch: batch_id=142927193493091226580217571044359556618, time:1750768162.117711s req_ids:[8] -DEBUG 06-24 20:29:22 [manager.py:391] -ERROR 06-24 20:29:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:206.21609687805176ms total_cost_time:206.26282691955566ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12071 prompt_cache_len:5151 prompt_cache_ratio:0.4267252091790241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 -DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:22 [batch.py:51] router release req id 8 -INFO 06-24 20:29:22 [manager.py:224] router recive req id 8 cost time 0.10825157165527344 s -INFO 06-24 20:29:22 [manager.py:68] detokenization recv req id 8 cost time 0.10936927795410156 s -DEBUG 06-24 20:29:22 [manager.py:391] Prefill Batch: batch_id=82130531750689544051400682293092908715, time:1750768162.3310297s req_ids:[8] -DEBUG 06-24 20:29:22 [manager.py:391] -ERROR 06-24 20:29:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:211.75312995910645ms total_cost_time:211.81392669677734ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12072 prompt_cache_len:5151 prompt_cache_ratio:0.42668986083499005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 -DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:22 [batch.py:51] router release req id 8 -INFO 06-24 20:29:22 [manager.py:224] router recive req id 8 cost time 0.10753369331359863 s -INFO 06-24 20:29:22 [manager.py:68] detokenization recv req id 8 cost time 0.10945510864257812 s -DEBUG 06-24 20:29:22 [manager.py:391] Prefill Batch: batch_id=42464442916728141648791610871069417774, time:1750768162.550109s req_ids:[8] -DEBUG 06-24 20:29:22 [manager.py:391] -ERROR 06-24 20:29:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:463.3471965789795ms total_cost_time:463.392972946167ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12073 prompt_cache_len:5151 prompt_cache_ratio:0.4266545183467241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 -DEBUG 06-24 20:29:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:22 [batch.py:51] router release req id 8 -INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10777592658996582 s -INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.10973405838012695 s -DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=11875089089050135980899569461200050751, time:1750768163.019186s req_ids:[8] -DEBUG 06-24 20:29:23 [manager.py:391] -ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:22 lightllm_req_id:8 first_token_cost:195.48678398132324ms total_cost_time:195.53112983703613ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12074 prompt_cache_len:5151 prompt_cache_ratio:0.42661918171277124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 -DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:23 [batch.py:51] router release req id 8 -INFO 06-24 20:29:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:29:23 [statics_utils.py:24] mean first cost: 229.07014882261274 ms -INFO 06-24 20:29:23 [statics_utils.py:24] mean per token cost: 0.06237495480393663 ms -INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10840463638305664 s -INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.11043500900268555 s -DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=152694082299877739306963894349550102956, time:1750768163.2206206s req_ids:[8] -DEBUG 06-24 20:29:23 [manager.py:391] -ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:214.81776237487793ms total_cost_time:214.8730754852295ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:12075 prompt_cache_len:5151 prompt_cache_ratio:0.426583850931677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 -DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:23 [batch.py:51] router release req id 8 -INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10779953002929688 s -INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.10980010032653809 s -DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=319965628701148084169393494720153804147, time:1750768163.4645429s req_ids:[8] -DEBUG 06-24 20:29:23 [manager.py:391] -ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:234.53354835510254ms total_cost_time:234.57789421081543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12076 prompt_cache_len:5151 prompt_cache_ratio:0.4265485260019874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 -DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:23 [batch.py:51] router release req id 8 -INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10821199417114258 s -INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.11040544509887695 s -DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=113672483453253660360409101686411607088, time:1750768163.68329s req_ids:[8] -DEBUG 06-24 20:29:23 [manager.py:391] -ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:206.6812515258789ms total_cost_time:206.7255973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12077 prompt_cache_len:5151 prompt_cache_ratio:0.4265132069222489 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 -DEBUG 06-24 20:29:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:23 [batch.py:51] router release req id 8 -INFO 06-24 20:29:23 [manager.py:224] router recive req id 8 cost time 0.10892724990844727 s -INFO 06-24 20:29:23 [manager.py:68] detokenization recv req id 8 cost time 0.11088180541992188 s -DEBUG 06-24 20:29:23 [manager.py:391] Prefill Batch: batch_id=157106017807927445966542583362375327607, time:1750768163.8981445s req_ids:[8] -DEBUG 06-24 20:29:23 [manager.py:391] -ERROR 06-24 20:29:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:23 lightllm_req_id:8 first_token_cost:212.5871181488037ms total_cost_time:212.64100074768066ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:12078 prompt_cache_len:5151 prompt_cache_ratio:0.42647789369100847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 -DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:24 [batch.py:51] router release req id 8 -INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10681271553039551 s -INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.1086890697479248 s -DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=17715277591360210217182938731529702404, time:1750768164.1298437s req_ids:[8] -DEBUG 06-24 20:29:24 [manager.py:391] -ERROR 06-24 20:29:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:217.6811695098877ms total_cost_time:217.7278995513916ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12079 prompt_cache_len:5151 prompt_cache_ratio:0.4264425863068135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 -DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:24 [batch.py:51] router release req id 8 -INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10833358764648438 s -INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.11029505729675293 s -DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=323529041036324189747276972179529962387, time:1750768164.341399s req_ids:[8] -DEBUG 06-24 20:29:24 [manager.py:391] -ERROR 06-24 20:29:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:29:24 [stats.py:37] Avg tokens(prompt+generate) throughput: 47815.716 tokens/s -DEBUG 06-24 20:29:24 [stats.py:37] Avg prompt tokens throughput: 47807.788 tokens/s -DEBUG 06-24 20:29:24 [stats.py:37] Avg generate tokens throughput: 7.928 tokens/s -INFO 06-24 20:29:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:418.7803268432617ms total_cost_time:418.8404083251953ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12080 prompt_cache_len:5151 prompt_cache_ratio:0.42640728476821194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 -DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:24 [batch.py:51] router release req id 8 -INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10815024375915527 s -INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.10997486114501953 s -DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=105431283973533029059556518663764142091, time:1750768164.7661805s req_ids:[8] -DEBUG 06-24 20:29:24 [manager.py:391] -ERROR 06-24 20:29:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:196.6707706451416ms total_cost_time:196.7294216156006ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12081 prompt_cache_len:5151 prompt_cache_ratio:0.4263719890737522 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 -DEBUG 06-24 20:29:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:24 [batch.py:51] router release req id 8 -INFO 06-24 20:29:24 [manager.py:224] router recive req id 8 cost time 0.10807275772094727 s -INFO 06-24 20:29:24 [manager.py:68] detokenization recv req id 8 cost time 0.10993337631225586 s -DEBUG 06-24 20:29:24 [manager.py:391] Prefill Batch: batch_id=5474515331079488267474148108644037105, time:1750768164.9769785s req_ids:[8] -DEBUG 06-24 20:29:24 [manager.py:391] -ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:24 lightllm_req_id:8 first_token_cost:216.28069877624512ms total_cost_time:216.33100509643555ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:12082 prompt_cache_len:5151 prompt_cache_ratio:0.42633669922198314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 -DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:25 [batch.py:51] router release req id 8 -INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.10871267318725586 s -INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.11061978340148926 s -DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=117938332335158875872318982564111383158, time:1750768165.189563s req_ids:[8] -DEBUG 06-24 20:29:25 [manager.py:391] -ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:206.08115196228027ms total_cost_time:206.14314079284668ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12083 prompt_cache_len:5151 prompt_cache_ratio:0.4263014152114541 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 -DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:25 [batch.py:51] router release req id 8 -INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.1084756851196289 s -INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.11056232452392578 s -DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=308371571774252700494012599243960686339, time:1750768165.413208s req_ids:[8] -DEBUG 06-24 20:29:25 [manager.py:391] -ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:214.7078514099121ms total_cost_time:214.79368209838867ms,out_token_counter:1 mean_per_token_cost_time: 0.0858306884765625ms prompt_token_num:12084 prompt_cache_len:5151 prompt_cache_ratio:0.426266137040715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 -DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:25 [batch.py:51] router release req id 8 -INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.10851526260375977 s -INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.110504150390625 s -DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=14826908049336166937465773595437351736, time:1750768165.6274846s req_ids:[8] -DEBUG 06-24 20:29:25 [manager.py:391] -ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:200.35934448242188ms total_cost_time:200.38866996765137ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:12085 prompt_cache_len:5151 prompt_cache_ratio:0.4262308647083161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 -DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:25 [batch.py:51] router release req id 8 -INFO 06-24 20:29:25 [manager.py:224] router recive req id 8 cost time 0.10788464546203613 s -INFO 06-24 20:29:25 [manager.py:68] detokenization recv req id 8 cost time 0.10989117622375488 s -DEBUG 06-24 20:29:25 [manager.py:391] Prefill Batch: batch_id=42454665239996806178864134455623708326, time:1750768165.836484s req_ids:[8] -DEBUG 06-24 20:29:25 [manager.py:391] -ERROR 06-24 20:29:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:208.15682411193848ms total_cost_time:208.18114280700684ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12086 prompt_cache_len:5151 prompt_cache_ratio:0.4261955982128082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 -DEBUG 06-24 20:29:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:25 [batch.py:51] router release req id 8 -INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.1065680980682373 s -INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.10867595672607422 s -DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=289209341916191498533210314876866415664, time:1750768166.0491052s req_ids:[8] -DEBUG 06-24 20:29:26 [manager.py:391] -ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:25 lightllm_req_id:8 first_token_cost:413.3586883544922ms total_cost_time:413.4037494659424ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12087 prompt_cache_len:5151 prompt_cache_ratio:0.42616033755274263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 -DEBUG 06-24 20:29:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:26 [batch.py:51] router release req id 8 -INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.10830211639404297 s -INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.11024665832519531 s -DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=321115342579625876846707929218114651575, time:1750768166.4668515s req_ids:[8] -DEBUG 06-24 20:29:26 [manager.py:391] -DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:207.44729042053223ms total_cost_time:207.49282836914062ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12088 prompt_cache_len:5151 prompt_cache_ratio:0.4261250827266711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 -DEBUG 06-24 20:29:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:26 [batch.py:51] router release req id 8 -INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.10830569267272949 s -INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.11023330688476562 s -DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=275085722100747506563322977584746962634, time:1750768166.6807287s req_ids:[8] -DEBUG 06-24 20:29:26 [manager.py:391] -ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:207.55505561828613ms total_cost_time:207.61466026306152ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:12089 prompt_cache_len:5151 prompt_cache_ratio:0.42608983373314585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 -DEBUG 06-24 20:29:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:26 [batch.py:51] router release req id 8 -INFO 06-24 20:29:26 [manager.py:224] router recive req id 8 cost time 0.10739541053771973 s -INFO 06-24 20:29:26 [manager.py:68] detokenization recv req id 8 cost time 0.10869193077087402 s -DEBUG 06-24 20:29:26 [manager.py:391] Prefill Batch: batch_id=55515117597908489011926213656375366351, time:1750768166.8971303s req_ids:[8] -DEBUG 06-24 20:29:26 [manager.py:391] -ERROR 06-24 20:29:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:208.9991569519043ms total_cost_time:209.0473175048828ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:12090 prompt_cache_len:5151 prompt_cache_ratio:0.4260545905707196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 -DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:27 [batch.py:51] router release req id 8 -INFO 06-24 20:29:27 [manager.py:224] router recive req id 8 cost time 0.10710597038269043 s -INFO 06-24 20:29:27 [manager.py:68] detokenization recv req id 8 cost time 0.10898351669311523 s -DEBUG 06-24 20:29:27 [manager.py:391] Prefill Batch: batch_id=8251264736707970196660598392124922615, time:1750768167.1313734s req_ids:[8] -DEBUG 06-24 20:29:27 [manager.py:391] -ERROR 06-24 20:29:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:26 lightllm_req_id:8 first_token_cost:225.8920669555664ms total_cost_time:225.9531021118164ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12091 prompt_cache_len:5151 prompt_cache_ratio:0.4260193532379456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 -DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:27 [batch.py:51] router release req id 8 -INFO 06-24 20:29:27 [manager.py:224] router recive req id 8 cost time 0.10744380950927734 s -INFO 06-24 20:29:27 [manager.py:68] detokenization recv req id 8 cost time 0.10949039459228516 s -DEBUG 06-24 20:29:27 [manager.py:391] Prefill Batch: batch_id=326397334034004834174791147072178272707, time:1750768167.3501437s req_ids:[8] -DEBUG 06-24 20:29:27 [manager.py:391] -ERROR 06-24 20:29:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 first_token_cost:215.26265144348145ms total_cost_time:215.32344818115234ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12092 prompt_cache_len:5151 prompt_cache_ratio:0.42598412173337746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 -DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:27 [batch.py:51] router release req id 8 -INFO 06-24 20:29:27 [manager.py:224] router recive req id 8 cost time 0.10903143882751465 s -INFO 06-24 20:29:27 [manager.py:68] detokenization recv req id 8 cost time 0.11093974113464355 s -DEBUG 06-24 20:29:27 [manager.py:391] Prefill Batch: batch_id=317861357271912375431931615560231996682, time:1750768167.5728202s req_ids:[8] -DEBUG 06-24 20:29:27 [manager.py:391] -ERROR 06-24 20:29:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 first_token_cost:443.6612129211426ms total_cost_time:443.7212944030762ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12093 prompt_cache_len:5151 prompt_cache_ratio:0.42594889605556935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 -DEBUG 06-24 20:29:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:27 [batch.py:51] router release req id 8 -INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.10731697082519531 s -INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.10940861701965332 s -DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=238720545174291634697460011375431586903, time:1750768168.0163062s req_ids:[8] -DEBUG 06-24 20:29:28 [manager.py:391] -ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:27 lightllm_req_id:8 first_token_cost:202.21638679504395ms total_cost_time:202.27670669555664ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:12094 prompt_cache_len:5151 prompt_cache_ratio:0.4259136762030759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 -DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:28 [batch.py:51] router release req id 8 -INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.1084744930267334 s -INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11044549942016602 s -DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=320799013707563251951913806250915446825, time:1750768168.238635s req_ids:[8] -DEBUG 06-24 20:29:28 [manager.py:391] -ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:224.14374351501465ms total_cost_time:224.20072555541992ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:12095 prompt_cache_len:5151 prompt_cache_ratio:0.42587846217445224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 -DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:28 [batch.py:51] router release req id 8 -INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.10819315910339355 s -INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11028146743774414 s -DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=230088508560147385679389331321578852045, time:1750768168.4582658s req_ids:[8] -DEBUG 06-24 20:29:28 [manager.py:391] -ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:200.60968399047852ms total_cost_time:200.6537914276123ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12096 prompt_cache_len:5151 prompt_cache_ratio:0.42584325396825395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 -DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:28 [batch.py:51] router release req id 8 -INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.10874128341674805 s -INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11065936088562012 s -DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=102872741345721765490195758442613110878, time:1750768168.6643283s req_ids:[8] -DEBUG 06-24 20:29:28 [manager.py:391] -ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:200.00433921813965ms total_cost_time:200.04892349243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12097 prompt_cache_len:5151 prompt_cache_ratio:0.42580805158303714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 -DEBUG 06-24 20:29:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:28 [batch.py:51] router release req id 8 -INFO 06-24 20:29:28 [manager.py:224] router recive req id 8 cost time 0.1099398136138916 s -INFO 06-24 20:29:28 [manager.py:68] detokenization recv req id 8 cost time 0.11197566986083984 s -DEBUG 06-24 20:29:28 [manager.py:391] Prefill Batch: batch_id=299172048898581842078129629047901137834, time:1750768168.8710449s req_ids:[8] -DEBUG 06-24 20:29:28 [manager.py:391] -ERROR 06-24 20:29:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:205.88231086730957ms total_cost_time:205.92641830444336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12098 prompt_cache_len:5151 prompt_cache_ratio:0.42577285501735823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 -DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:29 [batch.py:51] router release req id 8 -INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10834789276123047 s -INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.11036252975463867 s -DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=297067691350516832870386775203643160804, time:1750768169.0889473s req_ids:[8] -DEBUG 06-24 20:29:29 [manager.py:391] -ERROR 06-24 20:29:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:28 lightllm_req_id:8 first_token_cost:215.67106246948242ms total_cost_time:215.7149314880371ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12099 prompt_cache_len:5151 prompt_cache_ratio:0.4257376642697744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 -DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:29 [batch.py:51] router release req id 8 -INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10750579833984375 s -INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.1096491813659668 s -DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=142020250452107006202537844893237747961, time:1750768169.3162944s req_ids:[8] -DEBUG 06-24 20:29:29 [manager.py:391] -ERROR 06-24 20:29:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 first_token_cost:411.1766815185547ms total_cost_time:411.2215042114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12100 prompt_cache_len:5151 prompt_cache_ratio:0.425702479338843 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 -DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:29 [batch.py:51] router release req id 8 -INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10729217529296875 s -INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.10941529273986816 s -DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=83921928101375895334645024480018832977, time:1750768169.7231796s req_ids:[8] -DEBUG 06-24 20:29:29 [manager.py:391] -ERROR 06-24 20:29:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 first_token_cost:209.34534072875977ms total_cost_time:209.3679904937744ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:12101 prompt_cache_len:5151 prompt_cache_ratio:0.42566730022312205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 -DEBUG 06-24 20:29:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:29 [batch.py:51] router release req id 8 -INFO 06-24 20:29:29 [manager.py:224] router recive req id 8 cost time 0.10831022262573242 s -INFO 06-24 20:29:29 [manager.py:68] detokenization recv req id 8 cost time 0.11036062240600586 s -DEBUG 06-24 20:29:29 [manager.py:391] Prefill Batch: batch_id=124916319686568152445270978310806969274, time:1750768169.950586s req_ids:[8] -DEBUG 06-24 20:29:29 [manager.py:391] -ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:29 lightllm_req_id:8 first_token_cost:226.6838550567627ms total_cost_time:226.72700881958008ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12102 prompt_cache_len:5151 prompt_cache_ratio:0.42563212692117003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 -DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:30 [batch.py:51] router release req id 8 -INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.1083533763885498 s -INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.11033010482788086 s -DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=120944583438043153840471468222508828211, time:1750768170.18188s req_ids:[8] -DEBUG 06-24 20:29:30 [manager.py:391] -ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:224.56693649291992ms total_cost_time:224.61175918579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12103 prompt_cache_len:5151 prompt_cache_ratio:0.4255969594315459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 -DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:30 [batch.py:51] router release req id 8 -INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s -INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.11072921752929688 s -DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=154163153159015020155589951346534126410, time:1750768170.401171s req_ids:[8] -DEBUG 06-24 20:29:30 [manager.py:391] -ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:213.95611763000488ms total_cost_time:213.99807929992676ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12104 prompt_cache_len:5151 prompt_cache_ratio:0.425561797752809 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 -DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:30 [batch.py:51] router release req id 8 -INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.10839533805847168 s -INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.11038804054260254 s -DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=132007744675232204669686743326833615490, time:1750768170.6323035s req_ids:[8] -DEBUG 06-24 20:29:30 [manager.py:391] -ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:221.83561325073242ms total_cost_time:221.8799591064453ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12105 prompt_cache_len:5151 prompt_cache_ratio:0.4255266418835192 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 -DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:30 [batch.py:51] router release req id 8 -INFO 06-24 20:29:30 [manager.py:224] router recive req id 8 cost time 0.10609841346740723 s -INFO 06-24 20:29:30 [manager.py:68] detokenization recv req id 8 cost time 0.10800647735595703 s -DEBUG 06-24 20:29:30 [manager.py:391] Prefill Batch: batch_id=136896352720739087550699817782484750452, time:1750768170.8497176s req_ids:[8] -DEBUG 06-24 20:29:30 [manager.py:391] -ERROR 06-24 20:29:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:210.3898525238037ms total_cost_time:210.4339599609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12106 prompt_cache_len:5151 prompt_cache_ratio:0.4254914918222369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 -DEBUG 06-24 20:29:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:30 [batch.py:51] router release req id 8 -INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10666179656982422 s -INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.10842728614807129 s -DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=72441886944905688025705656820394854575, time:1750768171.0663579s req_ids:[8] -DEBUG 06-24 20:29:31 [manager.py:391] -ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:30 lightllm_req_id:8 first_token_cost:373.6233711242676ms total_cost_time:373.64888191223145ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12107 prompt_cache_len:5151 prompt_cache_ratio:0.42545634756752293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 -DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:31 [batch.py:51] router release req id 8 -INFO 06-24 20:29:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10673332214355469 s -INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.10863447189331055 s -DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=242626674229220205993105340406056708179, time:1750768171.4454274s req_ids:[8] -DEBUG 06-24 20:29:31 [manager.py:391] -ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:206.83693885803223ms total_cost_time:206.8798542022705ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12108 prompt_cache_len:5151 prompt_cache_ratio:0.42542120911793857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 -DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:31 [batch.py:51] router release req id 8 -INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10852646827697754 s -INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.11045527458190918 s -DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=48758708796930037597639548439056340982, time:1750768171.6596942s req_ids:[8] -DEBUG 06-24 20:29:31 [manager.py:391] -ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:207.4294090270996ms total_cost_time:207.48305320739746ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12109 prompt_cache_len:5151 prompt_cache_ratio:0.42538607647204557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 -DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:31 [batch.py:51] router release req id 8 -INFO 06-24 20:29:31 [manager.py:224] router recive req id 8 cost time 0.10778522491455078 s -INFO 06-24 20:29:31 [manager.py:68] detokenization recv req id 8 cost time 0.10981559753417969 s -DEBUG 06-24 20:29:31 [manager.py:391] Prefill Batch: batch_id=272955928308009136555634738260367954766, time:1750768171.8713665s req_ids:[8] -DEBUG 06-24 20:29:31 [manager.py:391] -ERROR 06-24 20:29:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:208.08029174804688ms total_cost_time:208.1005573272705ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12110 prompt_cache_len:5151 prompt_cache_ratio:0.42535094962840625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 -DEBUG 06-24 20:29:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:31 [batch.py:51] router release req id 8 -INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.10798001289367676 s -INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.10903787612915039 s -DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=318367565156790952807932832103755302053, time:1750768172.085849s req_ids:[8] -DEBUG 06-24 20:29:32 [manager.py:391] -ERROR 06-24 20:29:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:31 lightllm_req_id:8 first_token_cost:210.8771800994873ms total_cost_time:210.9205722808838ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12111 prompt_cache_len:5151 prompt_cache_ratio:0.42531582858558337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 -DEBUG 06-24 20:29:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:32 [batch.py:51] router release req id 8 -INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.1084296703338623 s -INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.11047840118408203 s -DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=170958238831819508335816881486197010368, time:1750768172.305708s req_ids:[8] -DEBUG 06-24 20:29:32 [manager.py:391] -ERROR 06-24 20:29:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 first_token_cost:214.141845703125ms total_cost_time:214.18499946594238ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12112 prompt_cache_len:5151 prompt_cache_ratio:0.42528071334214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 -DEBUG 06-24 20:29:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:32 [batch.py:51] router release req id 8 -INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.10783791542053223 s -INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.10988259315490723 s -DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=167358468822389908429829839415235154314, time:1750768172.5217946s req_ids:[8] -DEBUG 06-24 20:29:32 [manager.py:391] -DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 first_token_cost:214.5087718963623ms total_cost_time:214.52856063842773ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12113 prompt_cache_len:5151 prompt_cache_ratio:0.42524560389663996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 -DEBUG 06-24 20:29:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:32 [batch.py:51] router release req id 8 -INFO 06-24 20:29:32 [manager.py:224] router recive req id 8 cost time 0.10714292526245117 s -INFO 06-24 20:29:32 [manager.py:68] detokenization recv req id 8 cost time 0.10904979705810547 s -DEBUG 06-24 20:29:32 [manager.py:391] Prefill Batch: batch_id=182252764770211330427295001337290632939, time:1750768172.7527468s req_ids:[8] -DEBUG 06-24 20:29:32 [manager.py:391] -ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:32 lightllm_req_id:8 first_token_cost:466.52674674987793ms total_cost_time:466.5699005126953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12114 prompt_cache_len:5151 prompt_cache_ratio:0.42521050024764734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 -DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:33 [batch.py:51] router release req id 8 -INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10701942443847656 s -INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.10898375511169434 s -DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=211870241969576258847396309048078760472, time:1750768173.220652s req_ids:[8] -DEBUG 06-24 20:29:33 [manager.py:391] -ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:199.15318489074707ms total_cost_time:199.19419288635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:12115 prompt_cache_len:5151 prompt_cache_ratio:0.4251754023937268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 -DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:33 [batch.py:51] router release req id 8 -INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10884308815002441 s -INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s -DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=129042883847863911997991559641541396522, time:1750768173.4225862s req_ids:[8] -DEBUG 06-24 20:29:33 [manager.py:391] -ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:210.34550666809082ms total_cost_time:210.39080619812012ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12116 prompt_cache_len:5151 prompt_cache_ratio:0.4251403103334434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 -DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:33 [batch.py:51] router release req id 8 -INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10856413841247559 s -INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.11077713966369629 s -DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=188578579090110593454401424097026925564, time:1750768173.6380475s req_ids:[8] -DEBUG 06-24 20:29:33 [manager.py:391] -ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:195.79195976257324ms total_cost_time:195.83511352539062ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12117 prompt_cache_len:5151 prompt_cache_ratio:0.4251052240653627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 -DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:33 [batch.py:51] router release req id 8 -INFO 06-24 20:29:33 [manager.py:224] router recive req id 8 cost time 0.10697126388549805 s -INFO 06-24 20:29:33 [manager.py:68] detokenization recv req id 8 cost time 0.10905814170837402 s -DEBUG 06-24 20:29:33 [manager.py:391] Prefill Batch: batch_id=83991496340752764788674241838320348895, time:1750768173.8392944s req_ids:[8] -DEBUG 06-24 20:29:33 [manager.py:391] -ERROR 06-24 20:29:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:198.9607810974121ms total_cost_time:199.0034580230713ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12118 prompt_cache_len:5151 prompt_cache_ratio:0.42507014358805084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 -DEBUG 06-24 20:29:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:33 [batch.py:51] router release req id 8 -INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10820174217224121 s -INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.11004185676574707 s -DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=177838862255100211575082958917290642402, time:1750768174.0454166s req_ids:[8] -DEBUG 06-24 20:29:34 [manager.py:391] -ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:33 lightllm_req_id:8 first_token_cost:212.32175827026367ms total_cost_time:212.36634254455566ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12119 prompt_cache_len:5151 prompt_cache_ratio:0.42503506890007425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 -DEBUG 06-24 20:29:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:34 [batch.py:51] router release req id 8 -INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10479593276977539 s -INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.10675048828125 s -DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=93604372281114938889007454708273917158, time:1750768174.2800455s req_ids:[8] -DEBUG 06-24 20:29:34 [manager.py:391] -ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:223.32167625427246ms total_cost_time:223.3431339263916ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:12120 prompt_cache_len:5151 prompt_cache_ratio:0.425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 -DEBUG 06-24 20:29:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:34 [batch.py:51] router release req id 8 -INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10472822189331055 s -INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.10650777816772461 s -DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=237680985195980529432043951656984835388, time:1750768174.4935327s req_ids:[8] -DEBUG 06-24 20:29:34 [manager.py:391] -ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:29:34 [stats.py:37] Avg tokens(prompt+generate) throughput: 48981.434 tokens/s -DEBUG 06-24 20:29:34 [stats.py:37] Avg prompt tokens throughput: 48973.340 tokens/s -DEBUG 06-24 20:29:34 [stats.py:37] Avg generate tokens throughput: 8.094 tokens/s -INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:397.5844383239746ms total_cost_time:397.60541915893555ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12121 prompt_cache_len:5151 prompt_cache_ratio:0.42496493688639553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 -DEBUG 06-24 20:29:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:34 [batch.py:51] router release req id 8 -INFO 06-24 20:29:34 [manager.py:224] router recive req id 8 cost time 0.10394072532653809 s -INFO 06-24 20:29:34 [manager.py:68] detokenization recv req id 8 cost time 0.10608887672424316 s -DEBUG 06-24 20:29:34 [manager.py:391] Prefill Batch: batch_id=340251787563024515904180343620191232042, time:1750768174.89506s req_ids:[8] -DEBUG 06-24 20:29:34 [manager.py:391] -ERROR 06-24 20:29:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:213.59729766845703ms total_cost_time:213.623046875ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:12122 prompt_cache_len:5151 prompt_cache_ratio:0.42492987955782874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 -DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:35 [batch.py:51] router release req id 8 -INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s -INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10959959030151367 s -DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=38087546243068647744450745187189798854, time:1750768175.1265378s req_ids:[8] -DEBUG 06-24 20:29:35 [manager.py:391] -ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:34 lightllm_req_id:8 first_token_cost:222.55682945251465ms total_cost_time:222.59855270385742ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12123 prompt_cache_len:5151 prompt_cache_ratio:0.4248948280128681 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 -DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:35 [batch.py:51] router release req id 8 -INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.1068277359008789 s -INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10873675346374512 s -DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=247974355530550800301929459138736217589, time:1750768175.3402405s req_ids:[8] -DEBUG 06-24 20:29:35 [manager.py:391] -ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:166.17608070373535ms total_cost_time:166.20326042175293ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:12124 prompt_cache_len:5151 prompt_cache_ratio:0.4248597822500825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 -DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:35 [batch.py:51] router release req id 8 -INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10379743576049805 s -INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.1058194637298584 s -DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=330505091437266184205680050235447745184, time:1750768175.5147407s req_ids:[8] -DEBUG 06-24 20:29:35 [manager.py:391] -ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:189.93330001831055ms total_cost_time:189.97883796691895ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12125 prompt_cache_len:5151 prompt_cache_ratio:0.42482474226804123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 -DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:35 [batch.py:51] router release req id 8 -INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10780191421508789 s -INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10963988304138184 s -DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=185816330843267776792410407042811653170, time:1750768175.7088065s req_ids:[8] -DEBUG 06-24 20:29:35 [manager.py:391] -ERROR 06-24 20:29:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:207.86213874816895ms total_cost_time:207.92460441589355ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12126 prompt_cache_len:5151 prompt_cache_ratio:0.4247897080653142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 -DEBUG 06-24 20:29:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:35 [batch.py:51] router release req id 8 -INFO 06-24 20:29:35 [manager.py:224] router recive req id 8 cost time 0.10345077514648438 s -INFO 06-24 20:29:35 [manager.py:68] detokenization recv req id 8 cost time 0.10556721687316895 s -DEBUG 06-24 20:29:35 [manager.py:391] Prefill Batch: batch_id=296696299523953309787553340212737802537, time:1750768175.9229565s req_ids:[8] -DEBUG 06-24 20:29:35 [manager.py:391] -ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:35 lightllm_req_id:8 first_token_cost:206.58469200134277ms total_cost_time:206.6943645477295ms,out_token_counter:1 mean_per_token_cost_time: 0.10967254638671875ms prompt_token_num:12127 prompt_cache_len:5151 prompt_cache_ratio:0.4247546796404717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 -DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:36 [batch.py:51] router release req id 8 -INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.3104877471923828 s -INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.3124821186065674 s -DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=266823302585691637180315172780938891650, time:1750768176.3454165s req_ids:[8] -DEBUG 06-24 20:29:36 [manager.py:391] -ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:429.2929172515869ms total_cost_time:429.3365478515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12128 prompt_cache_len:5151 prompt_cache_ratio:0.4247196569920844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 -DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:36 [batch.py:51] router release req id 8 -INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.10708117485046387 s -INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.10895419120788574 s -DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=101337991100535601861372599709578179226, time:1750768176.5711436s req_ids:[8] -DEBUG 06-24 20:29:36 [manager.py:391] -ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:204.98013496398926ms total_cost_time:205.02400398254395ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12129 prompt_cache_len:5151 prompt_cache_ratio:0.4246846401187237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 -DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:36 [batch.py:51] router release req id 8 -INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.10731315612792969 s -INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.10942864418029785 s -DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=320588487500075745209625008448723033215, time:1750768176.784979s req_ids:[8] -DEBUG 06-24 20:29:36 [manager.py:391] -ERROR 06-24 20:29:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:202.34346389770508ms total_cost_time:202.38494873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12130 prompt_cache_len:5151 prompt_cache_ratio:0.42464962901896125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 -DEBUG 06-24 20:29:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:36 [batch.py:51] router release req id 8 -INFO 06-24 20:29:36 [manager.py:224] router recive req id 8 cost time 0.10741925239562988 s -INFO 06-24 20:29:36 [manager.py:68] detokenization recv req id 8 cost time 0.10958194732666016 s -DEBUG 06-24 20:29:36 [manager.py:391] Prefill Batch: batch_id=274478493294682165936667351053412392201, time:1750768176.9906409s req_ids:[8] -DEBUG 06-24 20:29:36 [manager.py:391] -ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:36 lightllm_req_id:8 first_token_cost:208.52351188659668ms total_cost_time:208.5566520690918ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:12131 prompt_cache_len:5151 prompt_cache_ratio:0.4246146236913692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 -DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:37 [batch.py:51] router release req id 8 -INFO 06-24 20:29:37 [manager.py:224] router recive req id 8 cost time 0.10922741889953613 s -INFO 06-24 20:29:37 [manager.py:68] detokenization recv req id 8 cost time 0.1116025447845459 s -DEBUG 06-24 20:29:37 [manager.py:391] Prefill Batch: batch_id=303396176667515746647835122489615290390, time:1750768177.2081375s req_ids:[8] -DEBUG 06-24 20:29:37 [manager.py:391] -ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:214.01047706604004ms total_cost_time:214.05529975891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12132 prompt_cache_len:5151 prompt_cache_ratio:0.4245796241345203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 -DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:37 [batch.py:51] router release req id 8 -INFO 06-24 20:29:37 [manager.py:224] router recive req id 8 cost time 0.10756254196166992 s -INFO 06-24 20:29:37 [manager.py:68] detokenization recv req id 8 cost time 0.10868310928344727 s -DEBUG 06-24 20:29:37 [manager.py:391] Prefill Batch: batch_id=211517298371529451079678221545897883939, time:1750768177.4400837s req_ids:[8] -DEBUG 06-24 20:29:37 [manager.py:391] -ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:215.7762050628662ms total_cost_time:215.8203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12133 prompt_cache_len:5151 prompt_cache_ratio:0.42454463034698753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 -DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:37 [batch.py:51] router release req id 8 -INFO 06-24 20:29:37 [manager.py:224] router recive req id 8 cost time 0.10770273208618164 s -INFO 06-24 20:29:37 [manager.py:68] detokenization recv req id 8 cost time 0.10964846611022949 s -DEBUG 06-24 20:29:37 [manager.py:391] Prefill Batch: batch_id=287526713420067493771072525195589226624, time:1750768177.6485612s req_ids:[8] -DEBUG 06-24 20:29:37 [manager.py:391] -ERROR 06-24 20:29:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:433.3231449127197ms total_cost_time:433.3674907684326ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12134 prompt_cache_len:5151 prompt_cache_ratio:0.42450964232734467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 -DEBUG 06-24 20:29:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:37 [batch.py:51] router release req id 8 -INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.1068880558013916 s -INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.10875082015991211 s -DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=299019802727512495401886066888243376768, time:1750768178.0881457s req_ids:[8] -DEBUG 06-24 20:29:38 [manager.py:391] -ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:37 lightllm_req_id:8 first_token_cost:194.4119930267334ms total_cost_time:194.45514678955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12135 prompt_cache_len:5151 prompt_cache_ratio:0.4244746600741656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 -DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:38 [batch.py:51] router release req id 8 -INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.11001801490783691 s -INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.11194562911987305 s -DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=293640633088328358692089650157625841013, time:1750768178.2882721s req_ids:[8] -DEBUG 06-24 20:29:38 [manager.py:391] -ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:202.27742195129395ms total_cost_time:202.32057571411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12136 prompt_cache_len:5151 prompt_cache_ratio:0.4244396835860251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 -DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:38 [batch.py:51] router release req id 8 -INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.10827493667602539 s -INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.11028718948364258 s -INFO 06-24 20:29:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=91474710794998932117667021703695323800, time:1750768178.497649s req_ids:[8] -DEBUG 06-24 20:29:38 [manager.py:391] -ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:210.10208129882812ms total_cost_time:210.1449966430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12137 prompt_cache_len:5151 prompt_cache_ratio:0.4244047128614979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 -DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:38 [batch.py:51] router release req id 8 -INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.10760807991027832 s -INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.10959100723266602 s -DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=273654388223973306707767411786764149111, time:1750768178.71371s req_ids:[8] -DEBUG 06-24 20:29:38 [manager.py:391] -ERROR 06-24 20:29:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:214.2951488494873ms total_cost_time:214.32876586914062ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:12138 prompt_cache_len:5151 prompt_cache_ratio:0.42436974789915966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 -DEBUG 06-24 20:29:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:38 [batch.py:51] router release req id 8 -INFO 06-24 20:29:38 [manager.py:224] router recive req id 8 cost time 0.10804414749145508 s -INFO 06-24 20:29:38 [manager.py:68] detokenization recv req id 8 cost time 0.10996055603027344 s -DEBUG 06-24 20:29:38 [manager.py:391] Prefill Batch: batch_id=299812576089764118226394740390914428456, time:1750768178.9318209s req_ids:[8] -DEBUG 06-24 20:29:38 [manager.py:391] -ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:38 lightllm_req_id:8 first_token_cost:210.98995208740234ms total_cost_time:211.03525161743164ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12139 prompt_cache_len:5151 prompt_cache_ratio:0.4243347886975863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 -DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:39 [batch.py:51] router release req id 8 -INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10892844200134277 s -INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.11086010932922363 s -DEBUG 06-24 20:29:39 [manager.py:391] Prefill Batch: batch_id=154744768876847331147203381500540538521, time:1750768179.1485958s req_ids:[8] -DEBUG 06-24 20:29:39 [manager.py:391] -ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:213.76967430114746ms total_cost_time:213.81282806396484ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12140 prompt_cache_len:5151 prompt_cache_ratio:0.4242998352553542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 -DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:39 [batch.py:51] router release req id 8 -INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10664749145507812 s -INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.10846304893493652 s -DEBUG 06-24 20:29:39 [manager.py:391] Prefill Batch: batch_id=73931406336905467925483704022051540666, time:1750768179.3809977s req_ids:[8] -DEBUG 06-24 20:29:39 [manager.py:391] -ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:398.29087257385254ms total_cost_time:398.33521842956543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12141 prompt_cache_len:5151 prompt_cache_ratio:0.42426488757104025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 -DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:39 [batch.py:51] router release req id 8 -INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10951638221740723 s -INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.11161923408508301 s -DEBUG 06-24 20:29:39 [manager.py:391] Prefill Batch: batch_id=277083700891311873169974563267248248947, time:1750768179.7729344s req_ids:[8] -DEBUG 06-24 20:29:39 [manager.py:391] -ERROR 06-24 20:29:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:219.86889839172363ms total_cost_time:219.91348266601562ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12142 prompt_cache_len:5151 prompt_cache_ratio:0.42422994564322186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 -DEBUG 06-24 20:29:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:39 [batch.py:51] router release req id 8 -INFO 06-24 20:29:39 [manager.py:224] router recive req id 8 cost time 0.10893988609313965 s -INFO 06-24 20:29:39 [manager.py:68] detokenization recv req id 8 cost time 0.11100363731384277 s -DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=243846156091639138550355281703165646622, time:1750768180.0062387s req_ids:[8] -DEBUG 06-24 20:29:40 [manager.py:391] -ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:39 lightllm_req_id:8 first_token_cost:213.56201171875ms total_cost_time:213.6056423187256ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12143 prompt_cache_len:5151 prompt_cache_ratio:0.42419500947047684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 -DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:40 [batch.py:51] router release req id 8 -INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10751962661743164 s -INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.10953688621520996 s -DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=320506661473848973693081198587941181604, time:1750768180.2206395s req_ids:[8] -DEBUG 06-24 20:29:40 [manager.py:391] -DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:212.005615234375ms total_cost_time:212.0504379272461ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12144 prompt_cache_len:5151 prompt_cache_ratio:0.4241600790513834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 -DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:40 [batch.py:51] router release req id 8 -INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10741853713989258 s -INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.10940098762512207 s -DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=101530776342567304696827395056945737142, time:1750768180.4375465s req_ids:[8] -DEBUG 06-24 20:29:40 [manager.py:391] -ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:214.39194679260254ms total_cost_time:214.43796157836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12145 prompt_cache_len:5151 prompt_cache_ratio:0.42412515438452036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 -DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:40 [batch.py:51] router release req id 8 -INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10986089706420898 s -INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.11182475090026855 s -DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=99749869772888058729032452796643206262, time:1750768180.6585805s req_ids:[8] -DEBUG 06-24 20:29:40 [manager.py:391] -ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:210.3593349456787ms total_cost_time:210.4039192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12146 prompt_cache_len:5151 prompt_cache_ratio:0.42409023546846697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 -DEBUG 06-24 20:29:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:40 [batch.py:51] router release req id 8 -INFO 06-24 20:29:40 [manager.py:224] router recive req id 8 cost time 0.10764169692993164 s -INFO 06-24 20:29:40 [manager.py:68] detokenization recv req id 8 cost time 0.1095728874206543 s -DEBUG 06-24 20:29:40 [manager.py:391] Prefill Batch: batch_id=34509305398129527457885702384426558269, time:1750768180.8735676s req_ids:[8] -DEBUG 06-24 20:29:40 [manager.py:391] -ERROR 06-24 20:29:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:209.59901809692383ms total_cost_time:209.65218544006348ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12147 prompt_cache_len:5151 prompt_cache_ratio:0.42405532230180293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 -DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:41 [batch.py:51] router release req id 8 -INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.10712289810180664 s -INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.1095585823059082 s -DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=160614028960100045664904650843145082380, time:1750768181.090413s req_ids:[8] -DEBUG 06-24 20:29:41 [manager.py:391] -ERROR 06-24 20:29:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:40 lightllm_req_id:8 first_token_cost:372.50685691833496ms total_cost_time:372.5428581237793ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:12148 prompt_cache_len:5151 prompt_cache_ratio:0.4240204148831083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 -DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:41 [batch.py:51] router release req id 8 -INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.1082758903503418 s -INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s -DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=335037007657085370143853193853880506168, time:1750768181.4700856s req_ids:[8] -DEBUG 06-24 20:29:41 [manager.py:391] -ERROR 06-24 20:29:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 first_token_cost:219.80977058410645ms total_cost_time:219.85220909118652ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12149 prompt_cache_len:5151 prompt_cache_ratio:0.42398551321096384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 -DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:41 [batch.py:51] router release req id 8 -INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.10841536521911621 s -INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s -DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=209488674385187664357349825387953496696, time:1750768181.711448s req_ids:[8] -DEBUG 06-24 20:29:41 [manager.py:391] -ERROR 06-24 20:29:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 first_token_cost:223.9232063293457ms total_cost_time:223.9665985107422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12150 prompt_cache_len:5151 prompt_cache_ratio:0.4239506172839506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 -DEBUG 06-24 20:29:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:41 [batch.py:51] router release req id 8 -INFO 06-24 20:29:41 [manager.py:224] router recive req id 8 cost time 0.1082618236541748 s -INFO 06-24 20:29:41 [manager.py:68] detokenization recv req id 8 cost time 0.11025333404541016 s -DEBUG 06-24 20:29:41 [manager.py:391] Prefill Batch: batch_id=272302216997286973164207061678113043520, time:1750768181.9262633s req_ids:[8] -DEBUG 06-24 20:29:41 [manager.py:391] -ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:41 lightllm_req_id:8 first_token_cost:206.6061496734619ms total_cost_time:206.6507339477539ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12151 prompt_cache_len:5151 prompt_cache_ratio:0.4239157271006502 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 -DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:42 [batch.py:51] router release req id 8 -INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.10799884796142578 s -INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.10992264747619629 s -DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=266167416153714575625739143886908559783, time:1750768182.144679s req_ids:[8] -DEBUG 06-24 20:29:42 [manager.py:391] -ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:214.5977020263672ms total_cost_time:214.64204788208008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12152 prompt_cache_len:5151 prompt_cache_ratio:0.4238808426596445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 -DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:42 [batch.py:51] router release req id 8 -INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.1085977554321289 s -INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.11084485054016113 s -DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=174877347978795779002794556765975467165, time:1750768182.3599591s req_ids:[8] -DEBUG 06-24 20:29:42 [manager.py:391] -ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:207.9782485961914ms total_cost_time:208.0214023590088ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12153 prompt_cache_len:5151 prompt_cache_ratio:0.4238459639595162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 -DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:42 [batch.py:51] router release req id 8 -INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.10742831230163574 s -INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.10930538177490234 s -DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=233632911218233230919337225802050433634, time:1750768182.575026s req_ids:[8] -DEBUG 06-24 20:29:42 [manager.py:391] -ERROR 06-24 20:29:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:207.93581008911133ms total_cost_time:207.9601287841797ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12154 prompt_cache_len:5151 prompt_cache_ratio:0.4238110909988481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 -DEBUG 06-24 20:29:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:42 [batch.py:51] router release req id 8 -INFO 06-24 20:29:42 [manager.py:224] router recive req id 8 cost time 0.10899043083190918 s -INFO 06-24 20:29:42 [manager.py:68] detokenization recv req id 8 cost time 0.11174154281616211 s -DEBUG 06-24 20:29:42 [manager.py:391] Prefill Batch: batch_id=242474598248843056928570337155729084008, time:1750768182.797736s req_ids:[8] -DEBUG 06-24 20:29:42 [manager.py:391] -ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:42 lightllm_req_id:8 first_token_cost:436.2657070159912ms total_cost_time:436.3124370574951ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12155 prompt_cache_len:5151 prompt_cache_ratio:0.42377622377622376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 -DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:43 [batch.py:51] router release req id 8 -INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10761833190917969 s -INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.10961651802062988 s -DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=10465098573057244097341410946427095255, time:1750768183.2338896s req_ids:[8] -DEBUG 06-24 20:29:43 [manager.py:391] -ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:189.26334381103516ms total_cost_time:189.30792808532715ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12156 prompt_cache_len:5151 prompt_cache_ratio:0.4237413622902271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 -DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:43 [batch.py:51] router release req id 8 -INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10841226577758789 s -INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.11041378974914551 s -DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=294499036573211132589107662243983826616, time:1750768183.432093s req_ids:[8] -DEBUG 06-24 20:29:43 [manager.py:391] -ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:205.97147941589355ms total_cost_time:206.00152015686035ms,out_token_counter:1 mean_per_token_cost_time: 0.030040740966796875ms prompt_token_num:12157 prompt_cache_len:5151 prompt_cache_ratio:0.4237065065394423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 -DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:43 [batch.py:51] router release req id 8 -INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10593271255493164 s -INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.10737156867980957 s -DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=135947984617785326558541513592512919015, time:1750768183.6573155s req_ids:[8] -DEBUG 06-24 20:29:43 [manager.py:391] -ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:226.92060470581055ms total_cost_time:226.96781158447266ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12158 prompt_cache_len:5151 prompt_cache_ratio:0.42367165652245437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 -DEBUG 06-24 20:29:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:43 [batch.py:51] router release req id 8 -INFO 06-24 20:29:43 [manager.py:224] router recive req id 8 cost time 0.10831475257873535 s -INFO 06-24 20:29:43 [manager.py:68] detokenization recv req id 8 cost time 0.11059975624084473 s -DEBUG 06-24 20:29:43 [manager.py:391] Prefill Batch: batch_id=224292292182442271426088769871114855854, time:1750768183.878471s req_ids:[8] -DEBUG 06-24 20:29:43 [manager.py:391] -ERROR 06-24 20:29:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:214.66565132141113ms total_cost_time:214.71047401428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12159 prompt_cache_len:5151 prompt_cache_ratio:0.4236368122378485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 -DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:44 [batch.py:51] router release req id 8 -INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.10890579223632812 s -INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.11093759536743164 s -DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=189790924707943558271519743040481770936, time:1750768184.1055315s req_ids:[8] -DEBUG 06-24 20:29:44 [manager.py:391] -ERROR 06-24 20:29:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:43 lightllm_req_id:8 first_token_cost:216.11809730529785ms total_cost_time:216.16268157958984ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12160 prompt_cache_len:5151 prompt_cache_ratio:0.42360197368421054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 -DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:44 [batch.py:51] router release req id 8 -INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.1073462963104248 s -INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.10948419570922852 s -DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=102542918702157905810401951991910124658, time:1750768184.3209329s req_ids:[8] -DEBUG 06-24 20:29:44 [manager.py:391] -ERROR 06-24 20:29:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 first_token_cost:204.85758781433105ms total_cost_time:204.90097999572754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12161 prompt_cache_len:5151 prompt_cache_ratio:0.42356714086012665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 -DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:44 [batch.py:51] router release req id 8 -INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.3101236820220947 s -INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.3122124671936035 s -DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=126447758517731243043021672157564236232, time:1750768184.7301917s req_ids:[8] -DEBUG 06-24 20:29:44 [manager.py:391] -ERROR 06-24 20:29:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:29:44 [stats.py:37] Avg tokens(prompt+generate) throughput: 49553.073 tokens/s -DEBUG 06-24 20:29:44 [stats.py:37] Avg prompt tokens throughput: 49544.912 tokens/s -DEBUG 06-24 20:29:44 [stats.py:37] Avg generate tokens throughput: 8.161 tokens/s -INFO 06-24 20:29:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 first_token_cost:403.98502349853516ms total_cost_time:404.02936935424805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12162 prompt_cache_len:5151 prompt_cache_ratio:0.42353231376418354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 -DEBUG 06-24 20:29:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:44 [batch.py:51] router release req id 8 -INFO 06-24 20:29:44 [manager.py:224] router recive req id 8 cost time 0.10929608345031738 s -INFO 06-24 20:29:44 [manager.py:68] detokenization recv req id 8 cost time 0.11118340492248535 s -DEBUG 06-24 20:29:44 [manager.py:391] Prefill Batch: batch_id=320082874408314361850912757727754183200, time:1750768184.9445252s req_ids:[8] -DEBUG 06-24 20:29:44 [manager.py:391] -ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:44 lightllm_req_id:8 first_token_cost:204.32567596435547ms total_cost_time:204.36978340148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12163 prompt_cache_len:5151 prompt_cache_ratio:0.42349749239496837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 -DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:45 [batch.py:51] router release req id 8 -INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.11109209060668945 s -INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.11311793327331543 s -DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=85731737004447914737908609810465876105, time:1750768185.1537778s req_ids:[8] -DEBUG 06-24 20:29:45 [manager.py:391] -ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:204.93006706237793ms total_cost_time:204.97560501098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12164 prompt_cache_len:5151 prompt_cache_ratio:0.42346267675106875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 -DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:45 [batch.py:51] router release req id 8 -INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10690593719482422 s -INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10880446434020996 s -DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=267938917893489809970455281780547095519, time:1750768185.3749826s req_ids:[8] -DEBUG 06-24 20:29:45 [manager.py:391] -ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:210.56151390075684ms total_cost_time:210.6027603149414ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12165 prompt_cache_len:5151 prompt_cache_ratio:0.42342786683107275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 -DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:45 [batch.py:51] router release req id 8 -INFO 06-24 20:29:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10727405548095703 s -INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10934710502624512 s -DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=78118183127760232846884953276331547799, time:1750768185.5794668s req_ids:[8] -DEBUG 06-24 20:29:45 [manager.py:391] -ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:197.24559783935547ms total_cost_time:197.27540016174316ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:12166 prompt_cache_len:5151 prompt_cache_ratio:0.42339306263356896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 -DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:45 [batch.py:51] router release req id 8 -INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10703635215759277 s -INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10910558700561523 s -DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=217005223284509705024797721609925231750, time:1750768185.7862613s req_ids:[8] -DEBUG 06-24 20:29:45 [manager.py:391] -ERROR 06-24 20:29:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:208.32538604736328ms total_cost_time:208.36901664733887ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12167 prompt_cache_len:5151 prompt_cache_ratio:0.4233582641571464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 -DEBUG 06-24 20:29:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:45 [batch.py:51] router release req id 8 -INFO 06-24 20:29:45 [manager.py:224] router recive req id 8 cost time 0.10714221000671387 s -INFO 06-24 20:29:45 [manager.py:68] detokenization recv req id 8 cost time 0.10832548141479492 s -DEBUG 06-24 20:29:45 [manager.py:391] Prefill Batch: batch_id=76818082194802183936069854855328380604, time:1750768185.9987824s req_ids:[8] -DEBUG 06-24 20:29:45 [manager.py:391] -ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:45 lightllm_req_id:8 first_token_cost:390.32793045043945ms total_cost_time:390.37179946899414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12168 prompt_cache_len:5151 prompt_cache_ratio:0.4233234714003945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 -DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:46 [batch.py:51] router release req id 8 -INFO 06-24 20:29:46 [manager.py:224] router recive req id 8 cost time 0.10760164260864258 s -INFO 06-24 20:29:46 [manager.py:68] detokenization recv req id 8 cost time 0.10961031913757324 s -DEBUG 06-24 20:29:46 [manager.py:391] Prefill Batch: batch_id=168314316334823366951202047765362901887, time:1750768186.397484s req_ids:[8] -DEBUG 06-24 20:29:46 [manager.py:391] -DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:205.55591583251953ms total_cost_time:205.60002326965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12169 prompt_cache_len:5151 prompt_cache_ratio:0.4232886843619032 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 -DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:46 [batch.py:51] router release req id 8 -INFO 06-24 20:29:46 [manager.py:224] router recive req id 8 cost time 0.10843205451965332 s -INFO 06-24 20:29:46 [manager.py:68] detokenization recv req id 8 cost time 0.11043238639831543 s -DEBUG 06-24 20:29:46 [manager.py:391] Prefill Batch: batch_id=170881595729915399115352785711680323964, time:1750768186.6088967s req_ids:[8] -DEBUG 06-24 20:29:46 [manager.py:391] -ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:210.5264663696289ms total_cost_time:210.5705738067627ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12170 prompt_cache_len:5151 prompt_cache_ratio:0.42325390304026295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 -DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:46 [batch.py:51] router release req id 8 -INFO 06-24 20:29:46 [manager.py:224] router recive req id 8 cost time 0.10804629325866699 s -INFO 06-24 20:29:46 [manager.py:68] detokenization recv req id 8 cost time 0.10986661911010742 s -DEBUG 06-24 20:29:46 [manager.py:391] Prefill Batch: batch_id=116492470160440233095906006976801441343, time:1750768186.8278282s req_ids:[8] -DEBUG 06-24 20:29:46 [manager.py:391] -ERROR 06-24 20:29:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:196.10261917114258ms total_cost_time:196.15817070007324ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:12171 prompt_cache_len:5151 prompt_cache_ratio:0.4232191274340646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 -DEBUG 06-24 20:29:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:46 [batch.py:51] router release req id 8 -INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10854530334472656 s -INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.11046648025512695 s -DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=218320981371046368683635872008856679111, time:1750768187.0387578s req_ids:[8] -DEBUG 06-24 20:29:47 [manager.py:391] -INFO 06-24 20:29:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:46 lightllm_req_id:8 first_token_cost:219.59638595581055ms total_cost_time:219.64097023010254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12172 prompt_cache_len:5151 prompt_cache_ratio:0.4231843575418994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 -DEBUG 06-24 20:29:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:47 [batch.py:51] router release req id 8 -INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10848855972290039 s -INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.11041784286499023 s -DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=61003533996318641876576730223892620553, time:1750768187.2587843s req_ids:[8] -DEBUG 06-24 20:29:47 [manager.py:391] -ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:214.0650749206543ms total_cost_time:214.1265869140625ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:12173 prompt_cache_len:5151 prompt_cache_ratio:0.4231495933623593 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 -DEBUG 06-24 20:29:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:47 [batch.py:51] router release req id 8 -INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10982322692871094 s -INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.1124720573425293 s -DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=180633088556351709089101623329278888726, time:1750768187.477732s req_ids:[8] -DEBUG 06-24 20:29:47 [manager.py:391] -ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:213.75417709350586ms total_cost_time:213.79828453063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12174 prompt_cache_len:5151 prompt_cache_ratio:0.42311483489403645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 -DEBUG 06-24 20:29:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:47 [batch.py:51] router release req id 8 -INFO 06-24 20:29:47 [manager.py:224] router recive req id 8 cost time 0.10857558250427246 s -INFO 06-24 20:29:47 [manager.py:68] detokenization recv req id 8 cost time 0.11053204536437988 s -DEBUG 06-24 20:29:47 [manager.py:391] Prefill Batch: batch_id=309088754221688251971910421420722188861, time:1750768187.6985695s req_ids:[8] -DEBUG 06-24 20:29:47 [manager.py:391] -ERROR 06-24 20:29:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:385.6179714202881ms total_cost_time:385.6487274169922ms,out_token_counter:1 mean_per_token_cost_time: 0.030755996704101562ms prompt_token_num:12175 prompt_cache_len:5151 prompt_cache_ratio:0.4230800821355236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 -DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:48 [batch.py:51] router release req id 8 -INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10617232322692871 s -INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.10718226432800293 s -DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=221719335839050511271809210786160998330, time:1750768188.0934258s req_ids:[8] -DEBUG 06-24 20:29:48 [manager.py:391] -ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:47 lightllm_req_id:8 first_token_cost:215.88397026062012ms total_cost_time:215.93070030212402ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12176 prompt_cache_len:5151 prompt_cache_ratio:0.4230453350854139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 -DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:48 [batch.py:51] router release req id 8 -INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10999631881713867 s -INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.11194968223571777 s -DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=34323341395267746680765741202392449331, time:1750768188.3122678s req_ids:[8] -DEBUG 06-24 20:29:48 [manager.py:391] -ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:211.5931510925293ms total_cost_time:211.6372585296631ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12177 prompt_cache_len:5151 prompt_cache_ratio:0.42301059374230104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 -DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:48 [batch.py:51] router release req id 8 -INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.1068727970123291 s -INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.10817503929138184 s -DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=181755108791294670926485546829099233105, time:1750768188.531519s req_ids:[8] -DEBUG 06-24 20:29:48 [manager.py:391] -ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:208.1904411315918ms total_cost_time:208.2235813140869ms,out_token_counter:1 mean_per_token_cost_time: 0.03314018249511719ms prompt_token_num:12178 prompt_cache_len:5151 prompt_cache_ratio:0.4229758581047791 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 -DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:48 [batch.py:51] router release req id 8 -INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10853028297424316 s -INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.11055850982666016 s -DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=193632884841415902248969811673320232515, time:1750768188.7459388s req_ids:[8] -DEBUG 06-24 20:29:48 [manager.py:391] -ERROR 06-24 20:29:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:212.91685104370117ms total_cost_time:212.96119689941406ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12179 prompt_cache_len:5151 prompt_cache_ratio:0.42294112817144264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 -DEBUG 06-24 20:29:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:48 [batch.py:51] router release req id 8 -INFO 06-24 20:29:48 [manager.py:224] router recive req id 8 cost time 0.10788846015930176 s -INFO 06-24 20:29:48 [manager.py:68] detokenization recv req id 8 cost time 0.10929298400878906 s -DEBUG 06-24 20:29:48 [manager.py:391] Prefill Batch: batch_id=175758928480388158086502245296753172541, time:1750768188.9642086s req_ids:[8] -DEBUG 06-24 20:29:48 [manager.py:391] -ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:48 lightllm_req_id:8 first_token_cost:211.0581398010254ms total_cost_time:211.10129356384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12180 prompt_cache_len:5151 prompt_cache_ratio:0.4229064039408867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 -DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:49 [batch.py:51] router release req id 8 -INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10778284072875977 s -INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.10960984230041504 s -DEBUG 06-24 20:29:49 [manager.py:391] Prefill Batch: batch_id=33160263357180468239623693256781480660, time:1750768189.1826067s req_ids:[8] -DEBUG 06-24 20:29:49 [manager.py:391] -ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:191.8947696685791ms total_cost_time:191.9384002685547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12181 prompt_cache_len:5151 prompt_cache_ratio:0.4228716854117068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 -DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:49 [batch.py:51] router release req id 8 -INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10841012001037598 s -INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.11052632331848145 s -DEBUG 06-24 20:29:49 [manager.py:391] Prefill Batch: batch_id=330685325507145879947394611796803360776, time:1750768189.3750062s req_ids:[8] -DEBUG 06-24 20:29:49 [manager.py:391] -ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:416.5351390838623ms total_cost_time:416.5785312652588ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12182 prompt_cache_len:5151 prompt_cache_ratio:0.42283697258249875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 -DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:49 [batch.py:51] router release req id 8 -INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10864400863647461 s -INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.11057043075561523 s -DEBUG 06-24 20:29:49 [manager.py:391] Prefill Batch: batch_id=42395687684910660019181109703880261296, time:1750768189.7965722s req_ids:[8] -DEBUG 06-24 20:29:49 [manager.py:391] -ERROR 06-24 20:29:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:195.43051719665527ms total_cost_time:195.47343254089355ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12183 prompt_cache_len:5151 prompt_cache_ratio:0.42280226545185917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 -DEBUG 06-24 20:29:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:49 [batch.py:51] router release req id 8 -INFO 06-24 20:29:49 [manager.py:224] router recive req id 8 cost time 0.10907411575317383 s -INFO 06-24 20:29:49 [manager.py:68] detokenization recv req id 8 cost time 0.11137890815734863 s -DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=220969251188201438835985218328290347006, time:1750768190.0065565s req_ids:[8] -DEBUG 06-24 20:29:50 [manager.py:391] -ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:49 lightllm_req_id:8 first_token_cost:214.45178985595703ms total_cost_time:214.4942283630371ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12184 prompt_cache_len:5151 prompt_cache_ratio:0.4227675640183848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 -DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:50 [batch.py:51] router release req id 8 -INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10765385627746582 s -INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.10962677001953125 s -DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=271015334585051035747435236562315210014, time:1750768190.2225957s req_ids:[8] -DEBUG 06-24 20:29:50 [manager.py:391] -ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:209.56850051879883ms total_cost_time:209.6114158630371ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12185 prompt_cache_len:5151 prompt_cache_ratio:0.42273286828067297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 -DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:50 [batch.py:51] router release req id 8 -INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10761880874633789 s -INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.1098017692565918 s -DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=263104274312922737206454966593609951467, time:1750768190.438858s req_ids:[8] -DEBUG 06-24 20:29:50 [manager.py:391] -ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:205.64651489257812ms total_cost_time:205.6901454925537ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12186 prompt_cache_len:5151 prompt_cache_ratio:0.42269817823732153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 -DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:50 [batch.py:51] router release req id 8 -INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10959672927856445 s -INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.11152839660644531 s -DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=312109309368404542595604790255909398049, time:1750768190.6521397s req_ids:[8] -DEBUG 06-24 20:29:50 [manager.py:391] -ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:214.7970199584961ms total_cost_time:214.83898162841797ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12187 prompt_cache_len:5151 prompt_cache_ratio:0.4226634938869287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 -DEBUG 06-24 20:29:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:50 [batch.py:51] router release req id 8 -INFO 06-24 20:29:50 [manager.py:224] router recive req id 8 cost time 0.10756492614746094 s -INFO 06-24 20:29:50 [manager.py:68] detokenization recv req id 8 cost time 0.1095428466796875 s -DEBUG 06-24 20:29:50 [manager.py:391] Prefill Batch: batch_id=194698969904767767711353166367489058982, time:1750768190.8709557s req_ids:[8] -DEBUG 06-24 20:29:50 [manager.py:391] -ERROR 06-24 20:29:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:211.84754371643066ms total_cost_time:211.89308166503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12188 prompt_cache_len:5151 prompt_cache_ratio:0.4226288152280932 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 -DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:51 [batch.py:51] router release req id 8 -INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10759758949279785 s -INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.1094980239868164 s -DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=155051735574091712619333999205373242602, time:1750768191.0907052s req_ids:[8] -DEBUG 06-24 20:29:51 [manager.py:391] -ERROR 06-24 20:29:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:50 lightllm_req_id:8 first_token_cost:398.8053798675537ms total_cost_time:398.8497257232666ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12189 prompt_cache_len:5151 prompt_cache_ratio:0.4225941422594142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 -DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:51 [batch.py:51] router release req id 8 -INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10907363891601562 s -INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.11108851432800293 s -DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=182754504918799968094821158261553265218, time:1750768191.4948857s req_ids:[8] -DEBUG 06-24 20:29:51 [manager.py:391] -ERROR 06-24 20:29:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 first_token_cost:215.46316146850586ms total_cost_time:215.50822257995605ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12190 prompt_cache_len:5151 prompt_cache_ratio:0.42255947497949137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 -DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:51 [batch.py:51] router release req id 8 -INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10598063468933105 s -INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.1080014705657959 s -DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=272234118373456590691380941996986651667, time:1750768191.7162604s req_ids:[8] -DEBUG 06-24 20:29:51 [manager.py:391] -ERROR 06-24 20:29:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 first_token_cost:211.9293212890625ms total_cost_time:211.9755744934082ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12191 prompt_cache_len:5151 prompt_cache_ratio:0.42252481338692477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 -DEBUG 06-24 20:29:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:51 [batch.py:51] router release req id 8 -INFO 06-24 20:29:51 [manager.py:224] router recive req id 8 cost time 0.10761785507202148 s -INFO 06-24 20:29:51 [manager.py:68] detokenization recv req id 8 cost time 0.10962533950805664 s -DEBUG 06-24 20:29:51 [manager.py:391] Prefill Batch: batch_id=100019831845121334773935005948475127847, time:1750768191.9334652s req_ids:[8] -DEBUG 06-24 20:29:51 [manager.py:391] -ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:51 lightllm_req_id:8 first_token_cost:208.59789848327637ms total_cost_time:208.64176750183105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12192 prompt_cache_len:5151 prompt_cache_ratio:0.422490157480315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 -DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:52 [batch.py:51] router release req id 8 -INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.1091618537902832 s -INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.11111879348754883 s -DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=296326609781880951083228869860197627991, time:1750768192.1484108s req_ids:[8] -DEBUG 06-24 20:29:52 [manager.py:391] -ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:210.4947566986084ms total_cost_time:210.5398178100586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12193 prompt_cache_len:5151 prompt_cache_ratio:0.4224555072582629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 -DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:52 [batch.py:51] router release req id 8 -INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.1084127426147461 s -INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.11028766632080078 s -DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=134360504680921163504800492198622094739, time:1750768192.3647637s req_ids:[8] -DEBUG 06-24 20:29:52 [manager.py:391] -DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:29:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:212.42904663085938ms total_cost_time:212.47529983520508ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12194 prompt_cache_len:5151 prompt_cache_ratio:0.4224208627193702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 -DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:52 [batch.py:51] router release req id 8 -INFO 06-24 20:29:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.10878872871398926 s -INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.10988306999206543 s -DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=213878484901855483289117411883993635866, time:1750768192.583026s req_ids:[8] -DEBUG 06-24 20:29:52 [manager.py:391] -ERROR 06-24 20:29:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:388.7009620666504ms total_cost_time:388.7448310852051ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12195 prompt_cache_len:5151 prompt_cache_ratio:0.4223862238622386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 -DEBUG 06-24 20:29:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:52 [batch.py:51] router release req id 8 -INFO 06-24 20:29:52 [manager.py:224] router recive req id 8 cost time 0.10746550559997559 s -INFO 06-24 20:29:52 [manager.py:68] detokenization recv req id 8 cost time 0.10958170890808105 s -DEBUG 06-24 20:29:52 [manager.py:391] Prefill Batch: batch_id=198849002999616071786872246818190122225, time:1750768192.9792356s req_ids:[8] -DEBUG 06-24 20:29:52 [manager.py:391] -ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:52 lightllm_req_id:8 first_token_cost:211.49587631225586ms total_cost_time:211.53950691223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12196 prompt_cache_len:5151 prompt_cache_ratio:0.4223515906854706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 -DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:53 [batch.py:51] router release req id 8 -INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.10799837112426758 s -INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.10996270179748535 s -DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=256295755621699275400233179874920948272, time:1750768193.203448s req_ids:[8] -DEBUG 06-24 20:29:53 [manager.py:391] -INFO 06-24 20:29:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:29:53 [statics_utils.py:24] mean first cost: 229.2316868352051 ms -INFO 06-24 20:29:53 [statics_utils.py:24] mean per token cost: 0.06209368451184434 ms -ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:221.13037109375ms total_cost_time:221.18401527404785ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12197 prompt_cache_len:5151 prompt_cache_ratio:0.4223169631876691 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 -DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:53 [batch.py:51] router release req id 8 -INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.11002993583679199 s -INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.11200380325317383 s -DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=230212472641820579308381706472208574813, time:1750768193.4235344s req_ids:[8] -DEBUG 06-24 20:29:53 [manager.py:391] -ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:212.54420280456543ms total_cost_time:212.59093284606934ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12198 prompt_cache_len:5151 prompt_cache_ratio:0.4222823413674373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 -DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:53 [batch.py:51] router release req id 8 -INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s -INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.11050224304199219 s -DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=243377964981261607834274361776529131413, time:1750768193.6438625s req_ids:[8] -DEBUG 06-24 20:29:53 [manager.py:391] -ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:202.87823677062988ms total_cost_time:202.92353630065918ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12199 prompt_cache_len:5151 prompt_cache_ratio:0.42224772522337894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 -DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:53 [batch.py:51] router release req id 8 -INFO 06-24 20:29:53 [manager.py:224] router recive req id 8 cost time 0.10809111595153809 s -INFO 06-24 20:29:53 [manager.py:68] detokenization recv req id 8 cost time 0.11005735397338867 s -DEBUG 06-24 20:29:53 [manager.py:391] Prefill Batch: batch_id=166287887725784252309711182642872837359, time:1750768193.8531113s req_ids:[8] -DEBUG 06-24 20:29:53 [manager.py:391] -ERROR 06-24 20:29:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:212.0966911315918ms total_cost_time:212.14056015014648ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12200 prompt_cache_len:5151 prompt_cache_ratio:0.42221311475409834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 -DEBUG 06-24 20:29:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:53 [batch.py:51] router release req id 8 -INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10737371444702148 s -INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.10951876640319824 s -DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=270470000415247780029787379749601948666, time:1750768194.070722s req_ids:[8] -DEBUG 06-24 20:29:54 [manager.py:391] -ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:53 lightllm_req_id:8 first_token_cost:215.12341499328613ms total_cost_time:215.1656150817871ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12201 prompt_cache_len:5151 prompt_cache_ratio:0.42217850995820017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 -DEBUG 06-24 20:29:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:54 [batch.py:51] router release req id 8 -INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10961484909057617 s -INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.11087155342102051 s -DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=120973974663546999344761708560767831082, time:1750768194.2927775s req_ids:[8] -DEBUG 06-24 20:29:54 [manager.py:391] -ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 first_token_cost:381.44898414611816ms total_cost_time:381.49499893188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12202 prompt_cache_len:5151 prompt_cache_ratio:0.4221439108342895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 -DEBUG 06-24 20:29:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:54 [batch.py:51] router release req id 8 -INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10790729522705078 s -INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.11005663871765137 s -DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=230845293765229685668772367903587622216, time:1750768194.692315s req_ids:[8] -DEBUG 06-24 20:29:54 [manager.py:391] -ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 first_token_cost:229.59136962890625ms total_cost_time:229.63786125183105ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12203 prompt_cache_len:5151 prompt_cache_ratio:0.4221093173809719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 -DEBUG 06-24 20:29:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:54 [batch.py:51] router release req id 8 -INFO 06-24 20:29:54 [manager.py:224] router recive req id 8 cost time 0.10763049125671387 s -INFO 06-24 20:29:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974860191345215 s -DEBUG 06-24 20:29:54 [manager.py:391] Prefill Batch: batch_id=64532123653523451403975341756351049315, time:1750768194.915187s req_ids:[8] -DEBUG 06-24 20:29:54 [manager.py:391] -DEBUG 06-24 20:29:54 [stats.py:37] Avg tokens(prompt+generate) throughput: 50696.768 tokens/s -DEBUG 06-24 20:29:54 [stats.py:37] Avg prompt tokens throughput: 50688.546 tokens/s -DEBUG 06-24 20:29:54 [stats.py:37] Avg generate tokens throughput: 8.222 tokens/s -ERROR 06-24 20:29:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:54 lightllm_req_id:8 first_token_cost:200.68669319152832ms total_cost_time:200.7291316986084ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12204 prompt_cache_len:5151 prompt_cache_ratio:0.4220747295968535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 -DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:55 [batch.py:51] router release req id 8 -INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.1075139045715332 s -INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.10956621170043945 s -DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=174789071221659748748659808691613022960, time:1750768195.1233928s req_ids:[8] -DEBUG 06-24 20:29:55 [manager.py:391] -ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:211.03429794311523ms total_cost_time:211.08198165893555ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:12205 prompt_cache_len:5151 prompt_cache_ratio:0.42204014748054075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 -DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:55 [batch.py:51] router release req id 8 -INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.10753846168518066 s -INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s -DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=21946604903621948101558953481608279943, time:1750768195.3488533s req_ids:[8] -DEBUG 06-24 20:29:55 [manager.py:391] -ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:223.59466552734375ms total_cost_time:223.63877296447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12206 prompt_cache_len:5151 prompt_cache_ratio:0.4220055710306407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 -DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:55 [batch.py:51] router release req id 8 -INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.10864949226379395 s -INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.1106104850769043 s -DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=324877073254419139717140680023012489456, time:1750768195.571614s req_ids:[8] -DEBUG 06-24 20:29:55 [manager.py:391] -ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:215.12389183044434ms total_cost_time:215.16704559326172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12207 prompt_cache_len:5151 prompt_cache_ratio:0.4219710002457606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 -DEBUG 06-24 20:29:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:55 [batch.py:51] router release req id 8 -INFO 06-24 20:29:55 [manager.py:224] router recive req id 8 cost time 0.10839724540710449 s -INFO 06-24 20:29:55 [manager.py:68] detokenization recv req id 8 cost time 0.11033987998962402 s -DEBUG 06-24 20:29:55 [manager.py:391] Prefill Batch: batch_id=88709957330802666707882512597175323037, time:1750768195.7911735s req_ids:[8] -DEBUG 06-24 20:29:55 [manager.py:391] -ERROR 06-24 20:29:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:213.0591869354248ms total_cost_time:213.1044864654541ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12208 prompt_cache_len:5151 prompt_cache_ratio:0.4219364351245085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 -DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:56 [batch.py:51] router release req id 8 -INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.30864405632019043 s -INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.30989933013916016 s -DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=149911680844091086834960921970536942243, time:1750768196.2311616s req_ids:[8] -DEBUG 06-24 20:29:56 [manager.py:391] -ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:55 lightllm_req_id:8 first_token_cost:439.4218921661377ms total_cost_time:439.4674301147461ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12209 prompt_cache_len:5151 prompt_cache_ratio:0.42190187566549264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 -DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:56 [batch.py:51] router release req id 8 -INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.10854387283325195 s -INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.11057209968566895 s -DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=314928825654595878395294572656663279924, time:1750768196.4573262s req_ids:[8] -DEBUG 06-24 20:29:56 [manager.py:391] -ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:213.26255798339844ms total_cost_time:213.30618858337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12210 prompt_cache_len:5151 prompt_cache_ratio:0.42186732186732184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 -DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:56 [batch.py:51] router release req id 8 -INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.10867643356323242 s -INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.11025071144104004 s -DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=212974228945281010053650328097761563742, time:1750768196.6761694s req_ids:[8] -DEBUG 06-24 20:29:56 [manager.py:391] -ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:213.1185531616211ms total_cost_time:213.13858032226562ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:12211 prompt_cache_len:5151 prompt_cache_ratio:0.42183277372860534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 -DEBUG 06-24 20:29:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:56 [batch.py:51] router release req id 8 -INFO 06-24 20:29:56 [manager.py:224] router recive req id 8 cost time 0.10543107986450195 s -INFO 06-24 20:29:56 [manager.py:68] detokenization recv req id 8 cost time 0.10740208625793457 s -DEBUG 06-24 20:29:56 [manager.py:391] Prefill Batch: batch_id=186497737881388926214587045756563372766, time:1750768196.8972974s req_ids:[8] -DEBUG 06-24 20:29:56 [manager.py:391] -ERROR 06-24 20:29:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:211.93337440490723ms total_cost_time:211.9770050048828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12212 prompt_cache_len:5151 prompt_cache_ratio:0.42179823124795285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 -DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:57 [batch.py:51] router release req id 8 -INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10774612426757812 s -INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.10979723930358887 s -DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=42302541233567716255078482049575838002, time:1750768197.1114984s req_ids:[8] -DEBUG 06-24 20:29:57 [manager.py:391] -ERROR 06-24 20:29:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:56 lightllm_req_id:8 first_token_cost:212.21137046813965ms total_cost_time:212.25428581237793ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12213 prompt_cache_len:5151 prompt_cache_ratio:0.42176369442397443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 -DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:57 [batch.py:51] router release req id 8 -INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10866260528564453 s -INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.1108863353729248 s -DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=59991711168008145214565920835719570270, time:1750768197.3304152s req_ids:[8] -DEBUG 06-24 20:29:57 [manager.py:391] -ERROR 06-24 20:29:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 first_token_cost:212.80694007873535ms total_cost_time:212.85033226013184ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12214 prompt_cache_len:5151 prompt_cache_ratio:0.4217291632552808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 -DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:57 [batch.py:51] router release req id 8 -INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10835456848144531 s -INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.11040687561035156 s -DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=138189075710236636975551262908225617544, time:1750768197.5490565s req_ids:[8] -DEBUG 06-24 20:29:57 [manager.py:391] -ERROR 06-24 20:29:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 first_token_cost:426.67102813720703ms total_cost_time:426.6924858093262ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:12215 prompt_cache_len:5151 prompt_cache_ratio:0.421694637740483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 -DEBUG 06-24 20:29:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:57 [batch.py:51] router release req id 8 -INFO 06-24 20:29:57 [manager.py:224] router recive req id 8 cost time 0.10684442520141602 s -INFO 06-24 20:29:57 [manager.py:68] detokenization recv req id 8 cost time 0.10890817642211914 s -DEBUG 06-24 20:29:57 [manager.py:391] Prefill Batch: batch_id=269424159929223819430223750631840536781, time:1750768197.9795697s req_ids:[8] -DEBUG 06-24 20:29:57 [manager.py:391] -ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:57 lightllm_req_id:8 first_token_cost:196.4249610900879ms total_cost_time:196.46763801574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12216 prompt_cache_len:5151 prompt_cache_ratio:0.4216601178781925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 -DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:58 [batch.py:51] router release req id 8 -INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.1089639663696289 s -INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.11094450950622559 s -DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=212289360542422141815130932947205913914, time:1750768198.1820931s req_ids:[8] -DEBUG 06-24 20:29:58 [manager.py:391] -ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:206.42423629760742ms total_cost_time:206.465482711792ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12217 prompt_cache_len:5151 prompt_cache_ratio:0.42162560366702134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 -DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:58 [batch.py:51] router release req id 8 -INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.10757565498352051 s -INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.10975098609924316 s -DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=31322629924996369606285235890291950074, time:1750768198.3979747s req_ids:[8] -DEBUG 06-24 20:29:58 [manager.py:391] -ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:209.78569984436035ms total_cost_time:209.82742309570312ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12218 prompt_cache_len:5151 prompt_cache_ratio:0.4215910951055819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 -DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:58 [batch.py:51] router release req id 8 -INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.10819792747497559 s -INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.11033511161804199 s -DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=186976929980878033613835932806704051648, time:1750768198.611314s req_ids:[8] -DEBUG 06-24 20:29:58 [manager.py:391] -ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:209.7926139831543ms total_cost_time:209.83600616455078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12219 prompt_cache_len:5151 prompt_cache_ratio:0.4215565921924871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 -DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:58 [batch.py:51] router release req id 8 -INFO 06-24 20:29:58 [manager.py:224] router recive req id 8 cost time 0.10823535919189453 s -INFO 06-24 20:29:58 [manager.py:68] detokenization recv req id 8 cost time 0.1095738410949707 s -DEBUG 06-24 20:29:58 [manager.py:391] Prefill Batch: batch_id=281022501244372775081175534750421134954, time:1750768198.8274364s req_ids:[8] -DEBUG 06-24 20:29:58 [manager.py:391] -ERROR 06-24 20:29:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:211.5161418914795ms total_cost_time:211.5774154663086ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12220 prompt_cache_len:5151 prompt_cache_ratio:0.42152209492635023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 -DEBUG 06-24 20:29:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:58 [batch.py:51] router release req id 8 -INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10717892646789551 s -INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.10920190811157227 s -DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=209068910636064759632128654394353618319, time:1750768199.0570753s req_ids:[8] -DEBUG 06-24 20:29:59 [manager.py:391] -ERROR 06-24 20:29:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:58 lightllm_req_id:8 first_token_cost:225.85511207580566ms total_cost_time:225.91781616210938ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:12221 prompt_cache_len:5151 prompt_cache_ratio:0.4214876033057851 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 -DEBUG 06-24 20:29:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:59 [batch.py:51] router release req id 8 -INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10770058631896973 s -INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.1103363037109375 s -DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=124682717165854309284680093298754718710, time:1750768199.275804s req_ids:[8] -DEBUG 06-24 20:29:59 [manager.py:391] -ERROR 06-24 20:29:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 first_token_cost:383.21518898010254ms total_cost_time:383.2581043243408ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12222 prompt_cache_len:5151 prompt_cache_ratio:0.421453117329406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 -DEBUG 06-24 20:29:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:59 [batch.py:51] router release req id 8 -INFO 06-24 20:29:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10812139511108398 s -INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.11014556884765625 s -DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=257629345262822897237682290826919725156, time:1750768199.6749105s req_ids:[8] -DEBUG 06-24 20:29:59 [manager.py:391] -ERROR 06-24 20:29:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:29:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:29:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 first_token_cost:220.60632705688477ms total_cost_time:220.65162658691406ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12223 prompt_cache_len:5151 prompt_cache_ratio:0.42141863699582754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:29:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 -DEBUG 06-24 20:29:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:29:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:29:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:29:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:29:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:29:59 [batch.py:51] router release req id 8 -INFO 06-24 20:29:59 [manager.py:224] router recive req id 8 cost time 0.10750102996826172 s -INFO 06-24 20:29:59 [manager.py:68] detokenization recv req id 8 cost time 0.10946416854858398 s -DEBUG 06-24 20:29:59 [manager.py:391] Prefill Batch: batch_id=105733838948125109711221831472642206304, time:1750768199.9011562s req_ids:[8] -DEBUG 06-24 20:29:59 [manager.py:391] -ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:29:59 lightllm_req_id:8 first_token_cost:221.19688987731934ms total_cost_time:221.24099731445312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12224 prompt_cache_len:5151 prompt_cache_ratio:0.42138416230366493 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 -DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:00 [batch.py:51] router release req id 8 -INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10985517501831055 s -INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.1116797924041748 s -DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=239150139542468687021986318228541490250, time:1750768200.1236277s req_ids:[8] -DEBUG 06-24 20:30:00 [manager.py:391] -DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:210.40749549865723ms total_cost_time:210.4930877685547ms,out_token_counter:1 mean_per_token_cost_time: 0.08559226989746094ms prompt_token_num:12225 prompt_cache_len:5151 prompt_cache_ratio:0.42134969325153376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 -DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:00 [batch.py:51] router release req id 8 -INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10820627212524414 s -INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.11021041870117188 s -DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=250776445460641421919660783214769869221, time:1750768200.341464s req_ids:[8] -DEBUG 06-24 20:30:00 [manager.py:391] -ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:211.78507804870605ms total_cost_time:211.84563636779785ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:12226 prompt_cache_len:5151 prompt_cache_ratio:0.42131522983805003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 -DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:00 [batch.py:51] router release req id 8 -INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10751128196716309 s -INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.10954570770263672 s -DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=3137178737192594429649629810434968238, time:1750768200.5603104s req_ids:[8] -DEBUG 06-24 20:30:00 [manager.py:391] -ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:211.11226081848145ms total_cost_time:211.15756034851074ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12227 prompt_cache_len:5151 prompt_cache_ratio:0.4212807720618304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 -DEBUG 06-24 20:30:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:00 [batch.py:51] router release req id 8 -INFO 06-24 20:30:00 [manager.py:224] router recive req id 8 cost time 0.10753989219665527 s -INFO 06-24 20:30:00 [manager.py:68] detokenization recv req id 8 cost time 0.10951042175292969 s -DEBUG 06-24 20:30:00 [manager.py:391] Prefill Batch: batch_id=56765746329624138983222567213829190126, time:1750768200.7787929s req_ids:[8] -DEBUG 06-24 20:30:00 [manager.py:391] -ERROR 06-24 20:30:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:207.17096328735352ms total_cost_time:207.2162628173828ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12228 prompt_cache_len:5151 prompt_cache_ratio:0.42124631992149164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 -DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:01 [batch.py:51] router release req id 8 -INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.30974626541137695 s -INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.3117485046386719 s -DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=80295407941590718457609173205414950581, time:1750768201.220171s req_ids:[8] -DEBUG 06-24 20:30:01 [manager.py:391] -ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:00 lightllm_req_id:8 first_token_cost:447.0634460449219ms total_cost_time:447.1089839935303ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12229 prompt_cache_len:5151 prompt_cache_ratio:0.4212118734156513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 -DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:01 [batch.py:51] router release req id 8 -INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.10863971710205078 s -INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.11047911643981934 s -DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=47003045478135918197875118563808060911, time:1750768201.4457228s req_ids:[8] -DEBUG 06-24 20:30:01 [manager.py:391] -ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:215.45791625976562ms total_cost_time:215.5013084411621ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12230 prompt_cache_len:5151 prompt_cache_ratio:0.4211774325429272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 -DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:01 [batch.py:51] router release req id 8 -INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.10921883583068848 s -INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.11119771003723145 s -DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=70643724831300006794159550924948553056, time:1750768201.665076s req_ids:[8] -DEBUG 06-24 20:30:01 [manager.py:391] -DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:206.44211769104004ms total_cost_time:206.48884773254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12231 prompt_cache_len:5151 prompt_cache_ratio:0.4211429973019377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 -DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:01 [batch.py:51] router release req id 8 -INFO 06-24 20:30:01 [manager.py:224] router recive req id 8 cost time 0.10700225830078125 s -INFO 06-24 20:30:01 [manager.py:68] detokenization recv req id 8 cost time 0.10881161689758301 s -DEBUG 06-24 20:30:01 [manager.py:391] Prefill Batch: batch_id=109000147315457364259830850608393302140, time:1750768201.87676s req_ids:[8] -DEBUG 06-24 20:30:01 [manager.py:391] -ERROR 06-24 20:30:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:168.440580368042ms total_cost_time:168.4863567352295ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12232 prompt_cache_len:5151 prompt_cache_ratio:0.4211085676913015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 -DEBUG 06-24 20:30:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:01 [batch.py:51] router release req id 8 -INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.10832786560058594 s -INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s -DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=93610471073346529630925938967718199442, time:1750768202.051116s req_ids:[8] -DEBUG 06-24 20:30:02 [manager.py:391] -ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:01 lightllm_req_id:8 first_token_cost:208.22548866271973ms total_cost_time:208.27031135559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12233 prompt_cache_len:5151 prompt_cache_ratio:0.42107414370963786 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 -DEBUG 06-24 20:30:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:02 [batch.py:51] router release req id 8 -INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.10794425010681152 s -INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.1099238395690918 s -DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=26347632179070337265035697964086891923, time:1750768202.2656589s req_ids:[8] -DEBUG 06-24 20:30:02 [manager.py:391] -ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:212.97335624694824ms total_cost_time:213.03439140319824ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12234 prompt_cache_len:5151 prompt_cache_ratio:0.42103972535556644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 -DEBUG 06-24 20:30:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:02 [batch.py:51] router release req id 8 -INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.10867953300476074 s -INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.11065387725830078 s -DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=235928491134191707130881427958066580049, time:1750768202.4862142s req_ids:[8] -DEBUG 06-24 20:30:02 [manager.py:391] -ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:387.8004550933838ms total_cost_time:387.84313201904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12235 prompt_cache_len:5151 prompt_cache_ratio:0.4210053126277074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 -DEBUG 06-24 20:30:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:02 [batch.py:51] router release req id 8 -INFO 06-24 20:30:02 [manager.py:224] router recive req id 8 cost time 0.1082460880279541 s -INFO 06-24 20:30:02 [manager.py:68] detokenization recv req id 8 cost time 0.11024022102355957 s -DEBUG 06-24 20:30:02 [manager.py:391] Prefill Batch: batch_id=187532577999371130932475151552017991985, time:1750768202.8797696s req_ids:[8] -DEBUG 06-24 20:30:02 [manager.py:391] -ERROR 06-24 20:30:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:213.59539031982422ms total_cost_time:213.6375904083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12236 prompt_cache_len:5151 prompt_cache_ratio:0.42097090552468125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 -DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:03 [batch.py:51] router release req id 8 -INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10901260375976562 s -INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.11109161376953125 s -DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=330992609334842800756070959800509745407, time:1750768203.1018188s req_ids:[8] -DEBUG 06-24 20:30:03 [manager.py:391] -ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:02 lightllm_req_id:8 first_token_cost:215.64960479736328ms total_cost_time:215.7003879547119ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12237 prompt_cache_len:5151 prompt_cache_ratio:0.4209365040451091 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 -DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:03 [batch.py:51] router release req id 8 -INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10756039619445801 s -INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.10955977439880371 s -DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=119095991258329836554252970702074307728, time:1750768203.3333497s req_ids:[8] -DEBUG 06-24 20:30:03 [manager.py:391] -ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:223.921537399292ms total_cost_time:223.96516799926758ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12238 prompt_cache_len:5151 prompt_cache_ratio:0.42090210818761237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 -DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:03 [batch.py:51] router release req id 8 -INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10901880264282227 s -INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.11094927787780762 s -DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=302626258717630095944716405526187505033, time:1750768203.5525932s req_ids:[8] -DEBUG 06-24 20:30:03 [manager.py:391] -ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:214.95509147644043ms total_cost_time:215.00349044799805ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:12239 prompt_cache_len:5151 prompt_cache_ratio:0.42086771795081296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 -DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:03 [batch.py:51] router release req id 8 -INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10814094543457031 s -INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.11014866828918457 s -DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=190428246061767179941517820551913878189, time:1750768203.7740333s req_ids:[8] -DEBUG 06-24 20:30:03 [manager.py:391] -ERROR 06-24 20:30:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:215.9872055053711ms total_cost_time:216.03941917419434ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:12240 prompt_cache_len:5151 prompt_cache_ratio:0.42083333333333334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 -DEBUG 06-24 20:30:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:03 [batch.py:51] router release req id 8 -INFO 06-24 20:30:03 [manager.py:224] router recive req id 8 cost time 0.10788679122924805 s -INFO 06-24 20:30:03 [manager.py:68] detokenization recv req id 8 cost time 0.10993170738220215 s -DEBUG 06-24 20:30:03 [manager.py:391] Prefill Batch: batch_id=92574910114674893320954211055605365078, time:1750768203.9936142s req_ids:[8] -DEBUG 06-24 20:30:03 [manager.py:391] -ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:03 lightllm_req_id:8 first_token_cost:392.29846000671387ms total_cost_time:392.3196792602539ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12241 prompt_cache_len:5151 prompt_cache_ratio:0.42079895433379627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 -DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:04 [batch.py:51] router release req id 8 -INFO 06-24 20:30:04 [manager.py:224] router recive req id 8 cost time 0.10653567314147949 s -INFO 06-24 20:30:04 [manager.py:68] detokenization recv req id 8 cost time 0.10910224914550781 s -DEBUG 06-24 20:30:04 [manager.py:391] Prefill Batch: batch_id=306653002806314186990580466101328386228, time:1750768204.3956168s req_ids:[8] -DEBUG 06-24 20:30:04 [manager.py:391] -ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:215.9860134124756ms total_cost_time:216.03059768676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12242 prompt_cache_len:5151 prompt_cache_ratio:0.42076458095082503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 -DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:04 [batch.py:51] router release req id 8 -INFO 06-24 20:30:04 [manager.py:224] router recive req id 8 cost time 0.10900402069091797 s -INFO 06-24 20:30:04 [manager.py:68] detokenization recv req id 8 cost time 0.11116576194763184 s -DEBUG 06-24 20:30:04 [manager.py:391] Prefill Batch: batch_id=1038856400656002782278862147208797843, time:1750768204.6171536s req_ids:[8] -DEBUG 06-24 20:30:04 [manager.py:391] -ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:208.9364528656006ms total_cost_time:208.97984504699707ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12243 prompt_cache_len:5151 prompt_cache_ratio:0.4207302131830434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 -DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:04 [batch.py:51] router release req id 8 -INFO 06-24 20:30:04 [manager.py:224] router recive req id 8 cost time 0.10942864418029785 s -INFO 06-24 20:30:04 [manager.py:68] detokenization recv req id 8 cost time 0.1114652156829834 s -DEBUG 06-24 20:30:04 [manager.py:391] Prefill Batch: batch_id=166725632115383027526645103125973393015, time:1750768204.8319354s req_ids:[8] -DEBUG 06-24 20:30:04 [manager.py:391] -ERROR 06-24 20:30:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:30:04 [stats.py:37] Avg tokens(prompt+generate) throughput: 48858.834 tokens/s -DEBUG 06-24 20:30:04 [stats.py:37] Avg prompt tokens throughput: 48850.741 tokens/s -DEBUG 06-24 20:30:04 [stats.py:37] Avg generate tokens throughput: 8.092 tokens/s -INFO 06-24 20:30:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:212.3281955718994ms total_cost_time:212.39113807678223ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:12244 prompt_cache_len:5151 prompt_cache_ratio:0.4206958510290755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 -DEBUG 06-24 20:30:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:04 [batch.py:51] router release req id 8 -INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.1081843376159668 s -INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11002802848815918 s -DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=269316204123715533953208734203234650233, time:1750768205.0486505s req_ids:[8] -DEBUG 06-24 20:30:05 [manager.py:391] -ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:04 lightllm_req_id:8 first_token_cost:213.31429481506348ms total_cost_time:213.35887908935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12245 prompt_cache_len:5151 prompt_cache_ratio:0.4206614944875459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 -DEBUG 06-24 20:30:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:05 [batch.py:51] router release req id 8 -INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.10881662368774414 s -INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11130237579345703 s -DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=52688240178524939505394464797015232811, time:1750768205.270461s req_ids:[8] -DEBUG 06-24 20:30:05 [manager.py:391] -ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:213.5481834411621ms total_cost_time:213.5932445526123ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12246 prompt_cache_len:5151 prompt_cache_ratio:0.42062714355707986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 -DEBUG 06-24 20:30:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:05 [batch.py:51] router release req id 8 -INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.10935044288635254 s -INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11130881309509277 s -DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=202087013659869897460412097078758596931, time:1750768205.4883513s req_ids:[8] -DEBUG 06-24 20:30:05 [manager.py:391] -ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:214.87808227539062ms total_cost_time:214.9221897125244ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12247 prompt_cache_len:5151 prompt_cache_ratio:0.42059279823630275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 -DEBUG 06-24 20:30:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:05 [batch.py:51] router release req id 8 -INFO 06-24 20:30:05 [manager.py:224] router recive req id 8 cost time 0.10907649993896484 s -INFO 06-24 20:30:05 [manager.py:68] detokenization recv req id 8 cost time 0.11110806465148926 s -DEBUG 06-24 20:30:05 [manager.py:391] Prefill Batch: batch_id=60859406898512209685746753944047818801, time:1750768205.7104766s req_ids:[8] -DEBUG 06-24 20:30:05 [manager.py:391] -ERROR 06-24 20:30:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:388.5009288787842ms total_cost_time:388.5457515716553ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12248 prompt_cache_len:5151 prompt_cache_ratio:0.4205584585238406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 -DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:06 [batch.py:51] router release req id 8 -INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.1081552505493164 s -INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.11027646064758301 s -DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=70348708539558374347036348308610846131, time:1750768206.1045418s req_ids:[8] -DEBUG 06-24 20:30:06 [manager.py:391] -ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:05 lightllm_req_id:8 first_token_cost:202.97551155090332ms total_cost_time:203.0181884765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12249 prompt_cache_len:5151 prompt_cache_ratio:0.4205241244183199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 -DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:06 [batch.py:51] router release req id 8 -DEBUG 06-24 20:30:06 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:06 [manager.py:283] -DEBUG 06-24 20:30:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:06 [manager.py:284] -INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10988640785217285 s -INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.11189556121826172 s -DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=291711606870264346976443458499033741340, time:1750768206.3153477s req_ids:[8] -DEBUG 06-24 20:30:06 [manager.py:391] -ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:211.09819412231445ms total_cost_time:211.1198902130127ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12250 prompt_cache_len:5151 prompt_cache_ratio:0.4204897959183673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 -DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:06 [batch.py:51] router release req id 8 -INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10431385040283203 s -INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.10567784309387207 s -DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=164728346363067285708802167510264490354, time:1750768206.5321105s req_ids:[8] -DEBUG 06-24 20:30:06 [manager.py:391] -ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:215.1780128479004ms total_cost_time:215.22283554077148ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12251 prompt_cache_len:5151 prompt_cache_ratio:0.4204554730226104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 -DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:06 [batch.py:51] router release req id 8 -INFO 06-24 20:30:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10775518417358398 s -INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.10988402366638184 s -DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=49955382392383198358322884733962557779, time:1750768206.7506554s req_ids:[8] -DEBUG 06-24 20:30:06 [manager.py:391] -ERROR 06-24 20:30:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:212.74328231811523ms total_cost_time:212.77308464050293ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:12252 prompt_cache_len:5151 prompt_cache_ratio:0.4204211557296768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 -DEBUG 06-24 20:30:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:06 [batch.py:51] router release req id 8 -INFO 06-24 20:30:06 [manager.py:224] router recive req id 8 cost time 0.10827207565307617 s -INFO 06-24 20:30:06 [manager.py:68] detokenization recv req id 8 cost time 0.11032366752624512 s -DEBUG 06-24 20:30:06 [manager.py:391] Prefill Batch: batch_id=68896067827600173667486211999822503174, time:1750768206.9716234s req_ids:[8] -DEBUG 06-24 20:30:06 [manager.py:391] -ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:06 lightllm_req_id:8 first_token_cost:215.59596061706543ms total_cost_time:215.6236171722412ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:12253 prompt_cache_len:5151 prompt_cache_ratio:0.4203868440381947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 -DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:07 [batch.py:51] router release req id 8 -INFO 06-24 20:30:07 [manager.py:224] router recive req id 8 cost time 0.10726523399353027 s -INFO 06-24 20:30:07 [manager.py:68] detokenization recv req id 8 cost time 0.10924649238586426 s -DEBUG 06-24 20:30:07 [manager.py:391] Prefill Batch: batch_id=145185381265212426596394092547838932601, time:1750768207.2036583s req_ids:[8] -DEBUG 06-24 20:30:07 [manager.py:391] -ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:225.30412673950195ms total_cost_time:225.36444664001465ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:12254 prompt_cache_len:5151 prompt_cache_ratio:0.4203525379467929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 -DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:07 [batch.py:51] router release req id 8 -INFO 06-24 20:30:07 [manager.py:224] router recive req id 8 cost time 0.10876750946044922 s -INFO 06-24 20:30:07 [manager.py:68] detokenization recv req id 8 cost time 0.11066579818725586 s -DEBUG 06-24 20:30:07 [manager.py:391] Prefill Batch: batch_id=195030242213755093576653937022891884723, time:1750768207.4211543s req_ids:[8] -DEBUG 06-24 20:30:07 [manager.py:391] -ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:412.9354953765869ms total_cost_time:412.9812717437744ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12255 prompt_cache_len:5151 prompt_cache_ratio:0.42031823745410035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 -DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:07 [batch.py:51] router release req id 8 -INFO 06-24 20:30:07 [manager.py:224] router recive req id 8 cost time 0.10909008979797363 s -INFO 06-24 20:30:07 [manager.py:68] detokenization recv req id 8 cost time 0.11108613014221191 s -DEBUG 06-24 20:30:07 [manager.py:391] Prefill Batch: batch_id=226834278698162554795832408421805947899, time:1750768207.8397315s req_ids:[8] -DEBUG 06-24 20:30:07 [manager.py:391] -DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:210.65974235534668ms total_cost_time:210.70265769958496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12256 prompt_cache_len:5151 prompt_cache_ratio:0.42028394255874674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 -DEBUG 06-24 20:30:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:07 [batch.py:51] router release req id 8 -INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10799264907836914 s -INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.10993218421936035 s -DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=121834125848010825864435878704461604788, time:1750768208.057165s req_ids:[8] -DEBUG 06-24 20:30:08 [manager.py:391] -ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:07 lightllm_req_id:8 first_token_cost:168.93935203552246ms total_cost_time:168.98107528686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12257 prompt_cache_len:5151 prompt_cache_ratio:0.420249653259362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 -DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:08 [batch.py:51] router release req id 8 -INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10718059539794922 s -INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.10908365249633789 s -DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=83955664704928414327781374524796270783, time:1750768208.2346065s req_ids:[8] -DEBUG 06-24 20:30:08 [manager.py:391] -ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:171.45228385925293ms total_cost_time:171.4949607849121ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12258 prompt_cache_len:5151 prompt_cache_ratio:0.4202153695545766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 -DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:08 [batch.py:51] router release req id 8 -INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10825371742248535 s -INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.11007046699523926 s -DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=126970056594558218454843025430544765818, time:1750768208.4130774s req_ids:[8] -DEBUG 06-24 20:30:08 [manager.py:391] -ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:212.36109733581543ms total_cost_time:212.40687370300293ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12259 prompt_cache_len:5151 prompt_cache_ratio:0.42018109144302146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 -DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:08 [batch.py:51] router release req id 8 -INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.1087195873260498 s -INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.11092805862426758 s -DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=116769097747641247597735877954382197320, time:1750768208.6321757s req_ids:[8] -DEBUG 06-24 20:30:08 [manager.py:391] -ERROR 06-24 20:30:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:212.2941017150879ms total_cost_time:212.3396396636963ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12260 prompt_cache_len:5151 prompt_cache_ratio:0.4201468189233279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 -DEBUG 06-24 20:30:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:08 [batch.py:51] router release req id 8 -INFO 06-24 20:30:08 [manager.py:224] router recive req id 8 cost time 0.10870480537414551 s -INFO 06-24 20:30:08 [manager.py:68] detokenization recv req id 8 cost time 0.11073946952819824 s -DEBUG 06-24 20:30:08 [manager.py:391] Prefill Batch: batch_id=327094586887805217270978341566685377373, time:1750768208.848913s req_ids:[8] -DEBUG 06-24 20:30:08 [manager.py:391] -ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:08 lightllm_req_id:8 first_token_cost:390.1326656341553ms total_cost_time:390.17677307128906ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12261 prompt_cache_len:5151 prompt_cache_ratio:0.4201125519941277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 -DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:09 [batch.py:51] router release req id 8 -INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10870122909545898 s -INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.1107931137084961 s -DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=8567574741461879866753610962329878327, time:1750768209.245811s req_ids:[8] -DEBUG 06-24 20:30:09 [manager.py:391] -ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:214.951753616333ms total_cost_time:214.9956226348877ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12262 prompt_cache_len:5151 prompt_cache_ratio:0.42007829065405317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 -DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:09 [batch.py:51] router release req id 8 -INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10788869857788086 s -INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.10993432998657227 s -DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=311263609536021027412294858963794908240, time:1750768209.4660754s req_ids:[8] -DEBUG 06-24 20:30:09 [manager.py:391] -ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:212.6791477203369ms total_cost_time:212.7244472503662ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12263 prompt_cache_len:5151 prompt_cache_ratio:0.4200440349017369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 -DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:09 [batch.py:51] router release req id 8 -INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10905599594116211 s -INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.11109256744384766 s -DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=204759345154378090691675856619123849807, time:1750768209.685196s req_ids:[8] -DEBUG 06-24 20:30:09 [manager.py:391] -ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:212.89610862731934ms total_cost_time:212.94164657592773ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12264 prompt_cache_len:5151 prompt_cache_ratio:0.42000978473581213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 -DEBUG 06-24 20:30:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:09 [batch.py:51] router release req id 8 -INFO 06-24 20:30:09 [manager.py:224] router recive req id 8 cost time 0.10842776298522949 s -INFO 06-24 20:30:09 [manager.py:68] detokenization recv req id 8 cost time 0.11048173904418945 s -DEBUG 06-24 20:30:09 [manager.py:391] Prefill Batch: batch_id=217393415738087548518167525004335582086, time:1750768209.9042857s req_ids:[8] -DEBUG 06-24 20:30:09 [manager.py:391] -ERROR 06-24 20:30:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:09 lightllm_req_id:8 first_token_cost:209.580659866333ms total_cost_time:209.62786674499512ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12265 prompt_cache_len:5151 prompt_cache_ratio:0.41997554015491234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 -DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:10 [batch.py:51] router release req id 8 -INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.10771536827087402 s -INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.11000561714172363 s -DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=73769647781656002862499770319571827903, time:1750768210.1209624s req_ids:[8] -DEBUG 06-24 20:30:10 [manager.py:391] -ERROR 06-24 20:30:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:215.95358848571777ms total_cost_time:215.99960327148438ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12266 prompt_cache_len:5151 prompt_cache_ratio:0.4199413011576716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 -DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:10 [batch.py:51] router release req id 8 -INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.1078639030456543 s -INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.10986709594726562 s -DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=119270155802948820070728089537598370295, time:1750768210.3516078s req_ids:[8] -DEBUG 06-24 20:30:10 [manager.py:391] -ERROR 06-24 20:30:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:220.24822235107422ms total_cost_time:220.2916145324707ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12267 prompt_cache_len:5151 prompt_cache_ratio:0.4199070677427244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 -DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:10 [batch.py:51] router release req id 8 -INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.10886907577514648 s -INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.11085677146911621 s -DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=311457074105700765898417825635140374519, time:1750768210.571553s req_ids:[8] -DEBUG 06-24 20:30:10 [manager.py:391] -ERROR 06-24 20:30:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:404.46925163269043ms total_cost_time:404.5143127441406ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12268 prompt_cache_len:5151 prompt_cache_ratio:0.41987283990870555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 -DEBUG 06-24 20:30:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:10 [batch.py:51] router release req id 8 -INFO 06-24 20:30:10 [manager.py:224] router recive req id 8 cost time 0.10792899131774902 s -INFO 06-24 20:30:10 [manager.py:68] detokenization recv req id 8 cost time 0.10988616943359375 s -DEBUG 06-24 20:30:10 [manager.py:391] Prefill Batch: batch_id=106523650154662671249854788398950984555, time:1750768210.9827905s req_ids:[8] -DEBUG 06-24 20:30:10 [manager.py:391] -ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:10 lightllm_req_id:8 first_token_cost:209.85770225524902ms total_cost_time:209.90300178527832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12269 prompt_cache_len:5151 prompt_cache_ratio:0.41983861765425057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 -DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:11 [batch.py:51] router release req id 8 -INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10760784149169922 s -INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.10960268974304199 s -DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=34625511700880369267905303733132738007, time:1750768211.2095954s req_ids:[8] -DEBUG 06-24 20:30:11 [manager.py:391] -ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:223.74582290649414ms total_cost_time:223.79016876220703ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12270 prompt_cache_len:5151 prompt_cache_ratio:0.4198044009779951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 -DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:11 [batch.py:51] router release req id 8 -INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10821723937988281 s -INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.11035513877868652 s -DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=187964616290520193354582041924638940361, time:1750768211.4278483s req_ids:[8] -DEBUG 06-24 20:30:11 [manager.py:391] -ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:209.00917053222656ms total_cost_time:209.06376838684082ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:12271 prompt_cache_len:5151 prompt_cache_ratio:0.4197701898785755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 -DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:11 [batch.py:51] router release req id 8 -INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10881328582763672 s -INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.11073970794677734 s -DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=327209994397368789317369608647500120634, time:1750768211.6439855s req_ids:[8] -DEBUG 06-24 20:30:11 [manager.py:391] -ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:212.67366409301758ms total_cost_time:212.71610260009766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12272 prompt_cache_len:5151 prompt_cache_ratio:0.41973598435462844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 -DEBUG 06-24 20:30:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:11 [batch.py:51] router release req id 8 -INFO 06-24 20:30:11 [manager.py:224] router recive req id 8 cost time 0.10771489143371582 s -INFO 06-24 20:30:11 [manager.py:68] detokenization recv req id 8 cost time 0.1098637580871582 s -DEBUG 06-24 20:30:11 [manager.py:391] Prefill Batch: batch_id=152566384265876434408300939557025266447, time:1750768211.8718204s req_ids:[8] -DEBUG 06-24 20:30:11 [manager.py:391] -ERROR 06-24 20:30:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:224.49469566345215ms total_cost_time:224.531888961792ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:12273 prompt_cache_len:5151 prompt_cache_ratio:0.419701784404791 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 -DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:12 [batch.py:51] router release req id 8 -INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.10841608047485352 s -INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.11052155494689941 s -DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=26611703205459395120396464541420283727, time:1750768212.0996208s req_ids:[8] -DEBUG 06-24 20:30:12 [manager.py:391] -ERROR 06-24 20:30:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:11 lightllm_req_id:8 first_token_cost:217.43273735046387ms total_cost_time:217.47612953186035ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12274 prompt_cache_len:5151 prompt_cache_ratio:0.4196675900277008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 -DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:12 [batch.py:51] router release req id 8 -INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.1086878776550293 s -INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.11076712608337402 s -DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=284324273951095787320748538978298563633, time:1750768212.3225946s req_ids:[8] -DEBUG 06-24 20:30:12 [manager.py:391] -ERROR 06-24 20:30:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 first_token_cost:379.76670265197754ms total_cost_time:379.81200218200684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12275 prompt_cache_len:5151 prompt_cache_ratio:0.41963340122199594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 -DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:12 [batch.py:51] router release req id 8 -INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.10868525505065918 s -INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.11060142517089844 s -DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=326579203069190064052218573364872873754, time:1750768212.7029383s req_ids:[8] -DEBUG 06-24 20:30:12 [manager.py:391] -ERROR 06-24 20:30:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 first_token_cost:213.3960723876953ms total_cost_time:213.4411334991455ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12276 prompt_cache_len:5151 prompt_cache_ratio:0.4195992179863148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 -DEBUG 06-24 20:30:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:12 [batch.py:51] router release req id 8 -INFO 06-24 20:30:12 [manager.py:224] router recive req id 8 cost time 0.10784053802490234 s -INFO 06-24 20:30:12 [manager.py:68] detokenization recv req id 8 cost time 0.1098170280456543 s -DEBUG 06-24 20:30:12 [manager.py:391] Prefill Batch: batch_id=334427082786688717670328296191861956164, time:1750768212.9231236s req_ids:[8] -DEBUG 06-24 20:30:12 [manager.py:391] -ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:12 lightllm_req_id:8 first_token_cost:210.75868606567383ms total_cost_time:210.82210540771484ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:12277 prompt_cache_len:5151 prompt_cache_ratio:0.41956504031929626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 -DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:13 [batch.py:51] router release req id 8 -INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.10878562927246094 s -INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11080193519592285 s -DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=220147819322291395145810741222888342634, time:1750768213.139639s req_ids:[8] -DEBUG 06-24 20:30:13 [manager.py:391] -ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:209.75971221923828ms total_cost_time:209.8069190979004ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12278 prompt_cache_len:5151 prompt_cache_ratio:0.41953086821957974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 -DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:13 [batch.py:51] router release req id 8 -INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.10899972915649414 s -INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11113405227661133 s -DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=316927324915558698703921048075067128267, time:1750768213.3576663s req_ids:[8] -DEBUG 06-24 20:30:13 [manager.py:391] -ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:208.30869674682617ms total_cost_time:208.35256576538086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12279 prompt_cache_len:5151 prompt_cache_ratio:0.41949670168580505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 -DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:13 [batch.py:51] router release req id 8 -INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.1088724136352539 s -INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11107540130615234 s -DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=206139606078525187179088752187312733038, time:1750768213.5721948s req_ids:[8] -DEBUG 06-24 20:30:13 [manager.py:391] -ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:209.059476852417ms total_cost_time:209.10239219665527ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12280 prompt_cache_len:5151 prompt_cache_ratio:0.4194625407166124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 -DEBUG 06-24 20:30:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:13 [batch.py:51] router release req id 8 -INFO 06-24 20:30:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:13 [manager.py:224] router recive req id 8 cost time 0.10875129699707031 s -INFO 06-24 20:30:13 [manager.py:68] detokenization recv req id 8 cost time 0.11068177223205566 s -DEBUG 06-24 20:30:13 [manager.py:391] Prefill Batch: batch_id=152871604667802670178516378231573534170, time:1750768213.7874155s req_ids:[8] -DEBUG 06-24 20:30:13 [manager.py:391] -DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:212.53490447998047ms total_cost_time:212.57877349853516ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12281 prompt_cache_len:5151 prompt_cache_ratio:0.41942838531064247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 -DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:14 [batch.py:51] router release req id 8 -INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.3106234073638916 s -INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.31250548362731934 s -DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=35408899537013279882574449970144099957, time:1750768214.2278163s req_ids:[8] -DEBUG 06-24 20:30:14 [manager.py:391] -ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:13 lightllm_req_id:8 first_token_cost:434.6632957458496ms total_cost_time:434.7085952758789ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12282 prompt_cache_len:5151 prompt_cache_ratio:0.41939423546653637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 -DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:14 [batch.py:51] router release req id 8 -INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.10974693298339844 s -INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.11162972450256348 s -DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=255322595240908329061791178551874471116, time:1750768214.4459279s req_ids:[8] -DEBUG 06-24 20:30:14 [manager.py:391] -ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:207.72957801818848ms total_cost_time:207.7772617340088ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:12283 prompt_cache_len:5151 prompt_cache_ratio:0.41936009118293577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 -DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:14 [batch.py:51] router release req id 8 -INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.10857224464416504 s -INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.11047625541687012 s -DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=319180040553376592275858233686237598524, time:1750768214.6595943s req_ids:[8] -DEBUG 06-24 20:30:14 [manager.py:391] -ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:205.0940990447998ms total_cost_time:205.1372528076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12284 prompt_cache_len:5151 prompt_cache_ratio:0.4193259524584826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 -DEBUG 06-24 20:30:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:14 [batch.py:51] router release req id 8 -INFO 06-24 20:30:14 [manager.py:224] router recive req id 8 cost time 0.10866999626159668 s -INFO 06-24 20:30:14 [manager.py:68] detokenization recv req id 8 cost time 0.11063003540039062 s -DEBUG 06-24 20:30:14 [manager.py:391] Prefill Batch: batch_id=165248541861191840379271098035928298562, time:1750768214.8722734s req_ids:[8] -DEBUG 06-24 20:30:14 [manager.py:391] -ERROR 06-24 20:30:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:30:14 [stats.py:37] Avg tokens(prompt+generate) throughput: 50085.260 tokens/s -DEBUG 06-24 20:30:14 [stats.py:37] Avg prompt tokens throughput: 50077.094 tokens/s -DEBUG 06-24 20:30:14 [stats.py:37] Avg generate tokens throughput: 8.166 tokens/s -INFO 06-24 20:30:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:211.4541530609131ms total_cost_time:211.49706840515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12285 prompt_cache_len:5151 prompt_cache_ratio:0.4192918192918193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 -DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:15 [batch.py:51] router release req id 8 -INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10718083381652832 s -INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.1092524528503418 s -DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=142439062501918909999817282146898394487, time:1750768215.0885985s req_ids:[8] -DEBUG 06-24 20:30:15 [manager.py:391] -ERROR 06-24 20:30:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:14 lightllm_req_id:8 first_token_cost:210.04796028137207ms total_cost_time:210.09397506713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12286 prompt_cache_len:5151 prompt_cache_ratio:0.4192576916815888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 -DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:15 [batch.py:51] router release req id 8 -INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10892486572265625 s -INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.11083388328552246 s -DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=323717495803503646380329902279718453848, time:1750768215.3075383s req_ids:[8] -DEBUG 06-24 20:30:15 [manager.py:391] -DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 first_token_cost:215.6851291656494ms total_cost_time:215.7275676727295ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12287 prompt_cache_len:5151 prompt_cache_ratio:0.4192235696264344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 -DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:15 [batch.py:51] router release req id 8 -INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10816454887390137 s -INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.11011362075805664 s -DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=280498292470406645513611029609521955612, time:1750768215.5295215s req_ids:[8] -DEBUG 06-24 20:30:15 [manager.py:391] -ERROR 06-24 20:30:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 first_token_cost:385.80894470214844ms total_cost_time:385.85567474365234ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12288 prompt_cache_len:5151 prompt_cache_ratio:0.419189453125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 -DEBUG 06-24 20:30:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:15 [batch.py:51] router release req id 8 -INFO 06-24 20:30:15 [manager.py:224] router recive req id 8 cost time 0.10908889770507812 s -INFO 06-24 20:30:15 [manager.py:68] detokenization recv req id 8 cost time 0.11022496223449707 s -DEBUG 06-24 20:30:15 [manager.py:391] Prefill Batch: batch_id=159541928179216326227174337285333288617, time:1750768215.9230154s req_ids:[8] -DEBUG 06-24 20:30:15 [manager.py:391] -ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:15 lightllm_req_id:8 first_token_cost:222.3207950592041ms total_cost_time:222.36299514770508ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12289 prompt_cache_len:5151 prompt_cache_ratio:0.4191553421759297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 -DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:16 [batch.py:51] router release req id 8 -INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10857319831848145 s -INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.11043071746826172 s -DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=317547751233177573621313497514028922574, time:1750768216.1565542s req_ids:[8] -DEBUG 06-24 20:30:16 [manager.py:391] -ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:216.98307991027832ms total_cost_time:217.0271873474121ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12290 prompt_cache_len:5151 prompt_cache_ratio:0.41912123677786817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 -DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:16 [batch.py:51] router release req id 8 -INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10961174964904785 s -INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.1115882396697998 s -DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=146630690807243504140236523240371656616, time:1750768216.3739161s req_ids:[8] -DEBUG 06-24 20:30:16 [manager.py:391] -ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:208.46891403198242ms total_cost_time:208.5118293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12291 prompt_cache_len:5151 prompt_cache_ratio:0.4190871369294606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 -DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:16 [batch.py:51] router release req id 8 -INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10622119903564453 s -INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.10732769966125488 s -DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=183913818176938679173598097788599764881, time:1750768216.5880268s req_ids:[8] -DEBUG 06-24 20:30:16 [manager.py:391] -ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:208.60934257507324ms total_cost_time:208.634614944458ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:12292 prompt_cache_len:5151 prompt_cache_ratio:0.41905304262935245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 -DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:16 [batch.py:51] router release req id 8 -INFO 06-24 20:30:16 [manager.py:224] router recive req id 8 cost time 0.10464000701904297 s -INFO 06-24 20:30:16 [manager.py:68] detokenization recv req id 8 cost time 0.10652327537536621 s -DEBUG 06-24 20:30:16 [manager.py:391] Prefill Batch: batch_id=252749128555520377695457858129818006532, time:1750768216.8018572s req_ids:[8] -DEBUG 06-24 20:30:16 [manager.py:391] -ERROR 06-24 20:30:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:208.93144607543945ms total_cost_time:208.95767211914062ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12293 prompt_cache_len:5151 prompt_cache_ratio:0.4190189538761897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 -DEBUG 06-24 20:30:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:16 [batch.py:51] router release req id 8 -INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.10602879524230957 s -INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.1077885627746582 s -DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=305500924411200803363995687139668718167, time:1750768217.0210962s req_ids:[8] -DEBUG 06-24 20:30:17 [manager.py:391] -INFO 06-24 20:30:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:16 lightllm_req_id:8 first_token_cost:218.07479858398438ms total_cost_time:218.09935569763184ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:12294 prompt_cache_len:5151 prompt_cache_ratio:0.41898487066861884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 -DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:17 [batch.py:51] router release req id 8 -INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.1044456958770752 s -INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.10638999938964844 s -INFO 06-24 20:30:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=190907607336372260987264323645116104204, time:1750768217.2442908s req_ids:[8] -DEBUG 06-24 20:30:17 [manager.py:391] -ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:382.17735290527344ms total_cost_time:382.2033405303955ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12295 prompt_cache_len:5151 prompt_cache_ratio:0.4189507930052867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 -DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:17 [batch.py:51] router release req id 8 -INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.10595846176147461 s -INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.10791015625 s -DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=175694482358923262402626680987191023810, time:1750768217.626134s req_ids:[8] -DEBUG 06-24 20:30:17 [manager.py:391] -ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:210.22653579711914ms total_cost_time:210.25323867797852ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:12296 prompt_cache_len:5151 prompt_cache_ratio:0.4189167208848406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 -DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:17 [batch.py:51] router release req id 8 -INFO 06-24 20:30:17 [manager.py:224] router recive req id 8 cost time 0.10581612586975098 s -INFO 06-24 20:30:17 [manager.py:68] detokenization recv req id 8 cost time 0.10776090621948242 s -DEBUG 06-24 20:30:17 [manager.py:391] Prefill Batch: batch_id=203332834193548005834256147153747306924, time:1750768217.8408647s req_ids:[8] -DEBUG 06-24 20:30:17 [manager.py:391] -ERROR 06-24 20:30:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:207.43489265441895ms total_cost_time:207.47876167297363ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12297 prompt_cache_len:5151 prompt_cache_ratio:0.41888265430592825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 -DEBUG 06-24 20:30:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:17 [batch.py:51] router release req id 8 -INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.10782098770141602 s -INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.10978937149047852 s -DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=309721090638467427126788955681082847325, time:1750768218.0526311s req_ids:[8] -DEBUG 06-24 20:30:18 [manager.py:391] -ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:17 lightllm_req_id:8 first_token_cost:212.7523422241211ms total_cost_time:212.79549598693848ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12298 prompt_cache_len:5151 prompt_cache_ratio:0.4188485932671979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 -DEBUG 06-24 20:30:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:18 [batch.py:51] router release req id 8 -INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.1074228286743164 s -INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.10951972007751465 s -DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=231964861893324105947416580115367687232, time:1750768218.270329s req_ids:[8] -DEBUG 06-24 20:30:18 [manager.py:391] -ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:208.19330215454102ms total_cost_time:208.2362174987793ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12299 prompt_cache_len:5151 prompt_cache_ratio:0.41881453776729816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 -DEBUG 06-24 20:30:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:18 [batch.py:51] router release req id 8 -INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.10730552673339844 s -INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.1094820499420166 s -DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=90084042079271187875812918590054601047, time:1750768218.4847622s req_ids:[8] -DEBUG 06-24 20:30:18 [manager.py:391] -ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:212.08763122558594ms total_cost_time:212.13316917419434ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12300 prompt_cache_len:5151 prompt_cache_ratio:0.41878048780487803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 -DEBUG 06-24 20:30:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:18 [batch.py:51] router release req id 8 -INFO 06-24 20:30:18 [manager.py:224] router recive req id 8 cost time 0.10750222206115723 s -INFO 06-24 20:30:18 [manager.py:68] detokenization recv req id 8 cost time 0.10939955711364746 s -DEBUG 06-24 20:30:18 [manager.py:391] Prefill Batch: batch_id=289885995674930448297700330981269122105, time:1750768218.7024384s req_ids:[8] -DEBUG 06-24 20:30:18 [manager.py:391] -ERROR 06-24 20:30:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:209.0892791748047ms total_cost_time:209.13338661193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12301 prompt_cache_len:5151 prompt_cache_ratio:0.41874644337858713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 -DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:19 [batch.py:51] router release req id 8 -INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.3092012405395508 s -INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.31110239028930664 s -DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=38127030148488292311809444343364004316, time:1750768219.135195s req_ids:[8] -DEBUG 06-24 20:30:19 [manager.py:391] -ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:18 lightllm_req_id:8 first_token_cost:431.77151679992676ms total_cost_time:431.7903518676758ms,out_token_counter:1 mean_per_token_cost_time: 0.018835067749023438ms prompt_token_num:12302 prompt_cache_len:5151 prompt_cache_ratio:0.41871240448707525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 -DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:19 [batch.py:51] router release req id 8 -INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10590791702270508 s -INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.10767960548400879 s -DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=45000330983815278105684239013301663375, time:1750768219.3538237s req_ids:[8] -DEBUG 06-24 20:30:19 [manager.py:391] -ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:206.5718173980713ms total_cost_time:206.61664009094238ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12303 prompt_cache_len:5151 prompt_cache_ratio:0.4186783711289929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 -DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:19 [batch.py:51] router release req id 8 -INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10823655128479004 s -INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.1099860668182373 s -DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=309989169019740880780485155574810673688, time:1750768219.565168s req_ids:[8] -DEBUG 06-24 20:30:19 [manager.py:391] -ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:205.9030532836914ms total_cost_time:205.9488296508789ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12304 prompt_cache_len:5151 prompt_cache_ratio:0.4186443433029909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 -DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:19 [batch.py:51] router release req id 8 -INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10740423202514648 s -INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.10986208915710449 s -DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=311964729791632439200582052398223360061, time:1750768219.7761662s req_ids:[8] -DEBUG 06-24 20:30:19 [manager.py:391] -ERROR 06-24 20:30:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:208.16850662231445ms total_cost_time:208.21261405944824ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12305 prompt_cache_len:5151 prompt_cache_ratio:0.41861032100772044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 -DEBUG 06-24 20:30:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:19 [batch.py:51] router release req id 8 -INFO 06-24 20:30:19 [manager.py:224] router recive req id 8 cost time 0.10737133026123047 s -INFO 06-24 20:30:19 [manager.py:68] detokenization recv req id 8 cost time 0.10938715934753418 s -DEBUG 06-24 20:30:19 [manager.py:391] Prefill Batch: batch_id=271777441607213166242967730699355693145, time:1750768219.9894078s req_ids:[8] -DEBUG 06-24 20:30:19 [manager.py:391] -ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:19 lightllm_req_id:8 first_token_cost:204.66899871826172ms total_cost_time:204.7126293182373ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12306 prompt_cache_len:5151 prompt_cache_ratio:0.41857630424183323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 -DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:20 [batch.py:51] router release req id 8 -INFO 06-24 20:30:20 [manager.py:224] router recive req id 8 cost time 0.10681414604187012 s -INFO 06-24 20:30:20 [manager.py:68] detokenization recv req id 8 cost time 0.10869765281677246 s -DEBUG 06-24 20:30:20 [manager.py:391] Prefill Batch: batch_id=290199570875732600437797211086623048126, time:1750768220.2007053s req_ids:[8] -DEBUG 06-24 20:30:20 [manager.py:391] -ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:207.94177055358887ms total_cost_time:207.9620361328125ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12307 prompt_cache_len:5151 prompt_cache_ratio:0.4185422930039815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 -DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:20 [batch.py:51] router release req id 8 -INFO 06-24 20:30:20 [manager.py:224] router recive req id 8 cost time 0.10592150688171387 s -INFO 06-24 20:30:20 [manager.py:68] detokenization recv req id 8 cost time 0.10789203643798828 s -DEBUG 06-24 20:30:20 [manager.py:391] Prefill Batch: batch_id=235226966213028544069639166104770228557, time:1750768220.4158187s req_ids:[8] -DEBUG 06-24 20:30:20 [manager.py:391] -ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:385.8811855316162ms total_cost_time:385.9260082244873ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12308 prompt_cache_len:5151 prompt_cache_ratio:0.4185082872928177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 -DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:20 [batch.py:51] router release req id 8 -INFO 06-24 20:30:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:20 [manager.py:224] router recive req id 8 cost time 0.10760235786437988 s -INFO 06-24 20:30:20 [manager.py:68] detokenization recv req id 8 cost time 0.10938811302185059 s -DEBUG 06-24 20:30:20 [manager.py:391] Prefill Batch: batch_id=277900629765222991500043776884889623013, time:1750768220.8055282s req_ids:[8] -DEBUG 06-24 20:30:20 [manager.py:391] -ERROR 06-24 20:30:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:211.6565704345703ms total_cost_time:211.7006778717041ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12309 prompt_cache_len:5151 prompt_cache_ratio:0.41847428710699486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 -DEBUG 06-24 20:30:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:20 [batch.py:51] router release req id 8 -INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10920333862304688 s -INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.11106681823730469 s -DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=128748271447325596884409514291324043654, time:1750768221.023272s req_ids:[8] -DEBUG 06-24 20:30:21 [manager.py:391] -ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:20 lightllm_req_id:8 first_token_cost:213.13714981079102ms total_cost_time:213.1798267364502ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12310 prompt_cache_len:5151 prompt_cache_ratio:0.4184402924451665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 -DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:21 [batch.py:51] router release req id 8 -INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10619068145751953 s -INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10793685913085938 s -DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=276425693268558130205846185341956890668, time:1750768221.2412093s req_ids:[8] -DEBUG 06-24 20:30:21 [manager.py:391] -ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:209.63215827941895ms total_cost_time:209.65266227722168ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12311 prompt_cache_len:5151 prompt_cache_ratio:0.4184063033059865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 -DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:21 [batch.py:51] router release req id 8 -INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.1039571762084961 s -INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10574054718017578 s -DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=30019235178860542505242026576238862951, time:1750768221.468738s req_ids:[8] -DEBUG 06-24 20:30:21 [manager.py:391] -DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:231.60195350646973ms total_cost_time:231.64749145507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12312 prompt_cache_len:5151 prompt_cache_ratio:0.4183723196881092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 -DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:21 [batch.py:51] router release req id 8 -INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10718297958374023 s -INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10903096199035645 s -DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=6741900432493218484736035659638790538, time:1750768221.6883554s req_ids:[8] -DEBUG 06-24 20:30:21 [manager.py:391] -ERROR 06-24 20:30:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:210.58225631713867ms total_cost_time:210.62850952148438ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12313 prompt_cache_len:5151 prompt_cache_ratio:0.41833834159018923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 -DEBUG 06-24 20:30:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:21 [batch.py:51] router release req id 8 -INFO 06-24 20:30:21 [manager.py:224] router recive req id 8 cost time 0.10804009437561035 s -INFO 06-24 20:30:21 [manager.py:68] detokenization recv req id 8 cost time 0.10983681678771973 s -DEBUG 06-24 20:30:21 [manager.py:391] Prefill Batch: batch_id=11000193433863802559194163045036323685, time:1750768221.9032564s req_ids:[8] -DEBUG 06-24 20:30:21 [manager.py:391] -ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:21 lightllm_req_id:8 first_token_cost:385.2221965789795ms total_cost_time:385.2674961090088ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12314 prompt_cache_len:5151 prompt_cache_ratio:0.4183043690108819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 -DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:22 [batch.py:51] router release req id 8 -INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10812878608703613 s -INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.10997438430786133 s -DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=141112722764993977758868702564397449941, time:1750768222.2928276s req_ids:[8] -DEBUG 06-24 20:30:22 [manager.py:391] -ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:211.60483360290527ms total_cost_time:211.64941787719727ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12315 prompt_cache_len:5151 prompt_cache_ratio:0.41827040194884285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 -DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:22 [batch.py:51] router release req id 8 -INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10987401008605957 s -INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.11174726486206055 s -DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=125452825545383334236345434751927397468, time:1750768222.5105457s req_ids:[8] -DEBUG 06-24 20:30:22 [manager.py:391] -ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:214.28728103637695ms total_cost_time:214.33091163635254ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12316 prompt_cache_len:5151 prompt_cache_ratio:0.41823644040272817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 -DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:22 [batch.py:51] router release req id 8 -INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10790562629699707 s -INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.10974335670471191 s -DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=192811308354748180395200290406349671320, time:1750768222.7301226s req_ids:[8] -DEBUG 06-24 20:30:22 [manager.py:391] -ERROR 06-24 20:30:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:210.2980613708496ms total_cost_time:210.3421688079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12317 prompt_cache_len:5151 prompt_cache_ratio:0.4182024843711943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 -DEBUG 06-24 20:30:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:22 [batch.py:51] router release req id 8 -INFO 06-24 20:30:22 [manager.py:224] router recive req id 8 cost time 0.10889387130737305 s -INFO 06-24 20:30:22 [manager.py:68] detokenization recv req id 8 cost time 0.11087632179260254 s -DEBUG 06-24 20:30:22 [manager.py:391] Prefill Batch: batch_id=268798155189771828048132465972705914485, time:1750768222.9463332s req_ids:[8] -DEBUG 06-24 20:30:22 [manager.py:391] -ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:22 lightllm_req_id:8 first_token_cost:210.0825309753418ms total_cost_time:210.13379096984863ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:12318 prompt_cache_len:5151 prompt_cache_ratio:0.4181685338528982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 -DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:23 [batch.py:51] router release req id 8 -INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10793662071228027 s -INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.10984659194946289 s -DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=183527022583178955269496482693212970114, time:1750768223.1624532s req_ids:[8] -DEBUG 06-24 20:30:23 [manager.py:391] -INFO 06-24 20:30:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:211.02404594421387ms total_cost_time:211.05027198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12319 prompt_cache_len:5151 prompt_cache_ratio:0.4181345888464973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 -DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:23 [batch.py:51] router release req id 8 -INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10472369194030762 s -INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.10656523704528809 s -DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=164709275216824695765698985800874702156, time:1750768223.3795993s req_ids:[8] -DEBUG 06-24 20:30:23 [manager.py:391] -ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:210.09588241577148ms total_cost_time:210.11781692504883ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12320 prompt_cache_len:5151 prompt_cache_ratio:0.41810064935064933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 -DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:23 [batch.py:51] router release req id 8 -INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10663127899169922 s -INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.10852575302124023 s -DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=68698354258610469532954112814209678665, time:1750768223.5964773s req_ids:[8] -DEBUG 06-24 20:30:23 [manager.py:391] -ERROR 06-24 20:30:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:395.0848579406738ms total_cost_time:395.129919052124ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12321 prompt_cache_len:5151 prompt_cache_ratio:0.4180667153640127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 -DEBUG 06-24 20:30:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:23 [batch.py:51] router release req id 8 -INFO 06-24 20:30:23 [manager.py:224] router recive req id 8 cost time 0.10791945457458496 s -INFO 06-24 20:30:23 [manager.py:68] detokenization recv req id 8 cost time 0.11002564430236816 s -DEBUG 06-24 20:30:23 [manager.py:391] Prefill Batch: batch_id=267181954252160671907080280023731073419, time:1750768223.9958658s req_ids:[8] -DEBUG 06-24 20:30:23 [manager.py:391] -ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:23 lightllm_req_id:8 first_token_cost:211.75074577331543ms total_cost_time:211.81654930114746ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:12322 prompt_cache_len:5151 prompt_cache_ratio:0.4180327868852459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 -DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:24 [batch.py:51] router release req id 8 -INFO 06-24 20:30:24 [manager.py:224] router recive req id 8 cost time 0.10635256767272949 s -INFO 06-24 20:30:24 [manager.py:68] detokenization recv req id 8 cost time 0.10809111595153809 s -DEBUG 06-24 20:30:24 [manager.py:391] Prefill Batch: batch_id=159796958108498451253758886484286906480, time:1750768224.2190518s req_ids:[8] -DEBUG 06-24 20:30:24 [manager.py:391] -ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 first_token_cost:218.20950508117676ms total_cost_time:218.25456619262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12323 prompt_cache_len:5151 prompt_cache_ratio:0.4179988639130082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 -DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:24 [batch.py:51] router release req id 8 -INFO 06-24 20:30:24 [manager.py:224] router recive req id 8 cost time 0.10738348960876465 s -INFO 06-24 20:30:24 [manager.py:68] detokenization recv req id 8 cost time 0.10918140411376953 s -DEBUG 06-24 20:30:24 [manager.py:391] Prefill Batch: batch_id=150905350408843317622404754928711601137, time:1750768224.4372444s req_ids:[8] -DEBUG 06-24 20:30:24 [manager.py:391] -ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 first_token_cost:167.56677627563477ms total_cost_time:167.60969161987305ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12324 prompt_cache_len:5151 prompt_cache_ratio:0.4179649464459591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 -DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:24 [batch.py:51] router release req id 8 -INFO 06-24 20:30:24 [manager.py:224] router recive req id 8 cost time 0.10768246650695801 s -INFO 06-24 20:30:24 [manager.py:68] detokenization recv req id 8 cost time 0.10867881774902344 s -DEBUG 06-24 20:30:24 [manager.py:391] Prefill Batch: batch_id=163827043402540936352270611200631441723, time:1750768224.6118188s req_ids:[8] -DEBUG 06-24 20:30:24 [manager.py:391] -ERROR 06-24 20:30:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:24 lightllm_req_id:8 first_token_cost:206.4507007598877ms total_cost_time:206.49409294128418ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12325 prompt_cache_len:5151 prompt_cache_ratio:0.41793103448275865 mtp_avg_token_per_step:1.0 -DEBUG 06-24 20:30:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:26 [batch.py:51] router release req id 8 -INFO 06-24 20:30:26 [manager.py:88] detokenize batch cost time 1346.8925952911377 ms -INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 -INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.007501125335693359 s -INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.009353399276733398 s -DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=149971353677183372869037845085130413049, time:1750768226.069404s req_ids:[8] -DEBUG 06-24 20:30:26 [manager.py:391] -DEBUG 06-24 20:30:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 45451.710 tokens/s -DEBUG 06-24 20:30:26 [stats.py:37] Avg prompt tokens throughput: 45444.414 tokens/s -DEBUG 06-24 20:30:26 [stats.py:37] Avg generate tokens throughput: 7.296 tokens/s -ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:70.87516784667969ms total_cost_time:70.91641426086426ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12326 prompt_cache_len:5151 prompt_cache_ratio:0.4178971280220672 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 -DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:26 [batch.py:51] router release req id 8 -INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.11015725135803223 s -INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.11197423934936523 s -DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=11641086166201680642182721610707953798, time:1750768226.2472377s req_ids:[8] -DEBUG 06-24 20:30:26 [manager.py:391] -ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:202.26192474365234ms total_cost_time:202.30674743652344ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12327 prompt_cache_len:5151 prompt_cache_ratio:0.41786322706254564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 -DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:26 [batch.py:51] router release req id 8 -INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.10840177536010742 s -INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.10940337181091309 s -DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=4827784159558355039442597672753264100, time:1750768226.4566085s req_ids:[8] -DEBUG 06-24 20:30:26 [manager.py:391] -ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:340.548038482666ms total_cost_time:340.5919075012207ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12328 prompt_cache_len:5151 prompt_cache_ratio:0.4178293316028553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 -DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:26 [batch.py:51] router release req id 8 -INFO 06-24 20:30:26 [manager.py:224] router recive req id 8 cost time 0.10769104957580566 s -INFO 06-24 20:30:26 [manager.py:68] detokenization recv req id 8 cost time 0.10893058776855469 s -DEBUG 06-24 20:30:26 [manager.py:391] Prefill Batch: batch_id=179908133745322955268760531501305073618, time:1750768226.802731s req_ids:[8] -DEBUG 06-24 20:30:26 [manager.py:391] -ERROR 06-24 20:30:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:200.16765594482422ms total_cost_time:200.2108097076416ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12329 prompt_cache_len:5151 prompt_cache_ratio:0.4177954416416579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 -DEBUG 06-24 20:30:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:26 [batch.py:51] router release req id 8 -INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10858917236328125 s -INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s -DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=238628165189564810994990000698402897339, time:1750768227.0096383s req_ids:[8] -DEBUG 06-24 20:30:27 [manager.py:391] -ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:26 lightllm_req_id:8 first_token_cost:209.76805686950684ms total_cost_time:209.81168746948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12330 prompt_cache_len:5151 prompt_cache_ratio:0.4177615571776156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 -DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:27 [batch.py:51] router release req id 8 -INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10841631889343262 s -INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.11014986038208008 s -DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=285358479764353040924748186395470928963, time:1750768227.2264001s req_ids:[8] -DEBUG 06-24 20:30:27 [manager.py:391] -ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:209.24854278564453ms total_cost_time:209.30075645446777ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:12331 prompt_cache_len:5151 prompt_cache_ratio:0.41772767820939094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 -DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:27 [batch.py:51] router release req id 8 -INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.1079704761505127 s -INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.1090087890625 s -DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=135789942995746476933530366063026894731, time:1750768227.4434001s req_ids:[8] -DEBUG 06-24 20:30:27 [manager.py:391] -ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:192.9464340209961ms total_cost_time:192.98863410949707ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12332 prompt_cache_len:5151 prompt_cache_ratio:0.4176938047356471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 -DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:27 [batch.py:51] router release req id 8 -INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10725140571594238 s -INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.1090240478515625 s -DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=147088582560991178967695306638244271497, time:1750768227.645021s req_ids:[8] -DEBUG 06-24 20:30:27 [manager.py:391] -ERROR 06-24 20:30:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:210.6029987335205ms total_cost_time:210.6473445892334ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12333 prompt_cache_len:5151 prompt_cache_ratio:0.4176599367550474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 -DEBUG 06-24 20:30:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:27 [batch.py:51] router release req id 8 -INFO 06-24 20:30:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:27 [manager.py:224] router recive req id 8 cost time 0.10877656936645508 s -INFO 06-24 20:30:27 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s -DEBUG 06-24 20:30:27 [manager.py:391] Prefill Batch: batch_id=294972682221820793108185879684440117626, time:1750768227.8602748s req_ids:[8] -DEBUG 06-24 20:30:27 [manager.py:391] -ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:27 lightllm_req_id:8 first_token_cost:382.7393054962158ms total_cost_time:382.784366607666ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12334 prompt_cache_len:5151 prompt_cache_ratio:0.41762607426625586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 -DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:28 [batch.py:51] router release req id 8 -INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10840559005737305 s -INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.11033058166503906 s -DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=213266453450289569996508969475129114344, time:1750768228.2468197s req_ids:[8] -DEBUG 06-24 20:30:28 [manager.py:391] -ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:207.87334442138672ms total_cost_time:207.9172134399414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12335 prompt_cache_len:5151 prompt_cache_ratio:0.41759221726793677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 -DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:28 [batch.py:51] router release req id 8 -INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10845589637756348 s -INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.10938358306884766 s -DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=183097842022108625875050875535606588837, time:1750768228.4586284s req_ids:[8] -DEBUG 06-24 20:30:28 [manager.py:391] -ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:168.21789741516113ms total_cost_time:168.259859085083ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12336 prompt_cache_len:5151 prompt_cache_ratio:0.4175583657587549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 -DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:28 [batch.py:51] router release req id 8 -INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10693359375 s -INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.10874533653259277 s -DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=23468331502140754638496891012924100072, time:1750768228.6327863s req_ids:[8] -DEBUG 06-24 20:30:28 [manager.py:391] -ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:204.16760444641113ms total_cost_time:204.209566116333ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12337 prompt_cache_len:5151 prompt_cache_ratio:0.41752451973737537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 -DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:28 [batch.py:51] router release req id 8 -INFO 06-24 20:30:28 [manager.py:224] router recive req id 8 cost time 0.10789990425109863 s -INFO 06-24 20:30:28 [manager.py:68] detokenization recv req id 8 cost time 0.10881543159484863 s -DEBUG 06-24 20:30:28 [manager.py:391] Prefill Batch: batch_id=142365426414513507668573665076643776753, time:1750768228.8559191s req_ids:[8] -DEBUG 06-24 20:30:28 [manager.py:391] -ERROR 06-24 20:30:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:187.92176246643066ms total_cost_time:187.97063827514648ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:12338 prompt_cache_len:5151 prompt_cache_ratio:0.4174906792024639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 -DEBUG 06-24 20:30:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:28 [batch.py:51] router release req id 8 -INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10633468627929688 s -INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.10735964775085449 s -DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=174920678958286956482544384087575912741, time:1750768229.038595s req_ids:[8] -DEBUG 06-24 20:30:29 [manager.py:391] -ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:28 lightllm_req_id:8 first_token_cost:208.76622200012207ms total_cost_time:208.80985260009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12339 prompt_cache_len:5151 prompt_cache_ratio:0.4174568441526866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 -DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:29 [batch.py:51] router release req id 8 -INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10641622543334961 s -INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.10736322402954102 s -DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=38090421814484206465011050811261456010, time:1750768229.2516158s req_ids:[8] -DEBUG 06-24 20:30:29 [manager.py:391] -ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:173.1410026550293ms total_cost_time:173.16222190856934ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12340 prompt_cache_len:5151 prompt_cache_ratio:0.4174230145867099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 -DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:29 [batch.py:51] router release req id 8 -INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10596561431884766 s -INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.1079103946685791 s -DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=182901315737257360021298706793562932647, time:1750768229.4308355s req_ids:[8] -DEBUG 06-24 20:30:29 [manager.py:391] -ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:373.4912872314453ms total_cost_time:373.537540435791ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12341 prompt_cache_len:5151 prompt_cache_ratio:0.4173891905032007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 -DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:29 [batch.py:51] router release req id 8 -INFO 06-24 20:30:29 [manager.py:224] router recive req id 8 cost time 0.10645508766174316 s -INFO 06-24 20:30:29 [manager.py:68] detokenization recv req id 8 cost time 0.1074526309967041 s -DEBUG 06-24 20:30:29 [manager.py:391] Prefill Batch: batch_id=41613462274429965579517442123484210180, time:1750768229.8099973s req_ids:[8] -DEBUG 06-24 20:30:29 [manager.py:391] -ERROR 06-24 20:30:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:209.3663215637207ms total_cost_time:209.4097137451172ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12342 prompt_cache_len:5151 prompt_cache_ratio:0.41735537190082644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 -DEBUG 06-24 20:30:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:29 [batch.py:51] router release req id 8 -INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10798931121826172 s -INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.10933732986450195 s -DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=172254792676380606999643884807252589719, time:1750768230.0256999s req_ids:[8] -DEBUG 06-24 20:30:30 [manager.py:391] -ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:29 lightllm_req_id:8 first_token_cost:212.13507652282715ms total_cost_time:212.18109130859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12343 prompt_cache_len:5151 prompt_cache_ratio:0.4173215587782549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 -DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:30 [batch.py:51] router release req id 8 -INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10710430145263672 s -INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.10889983177185059 s -DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=168098677183327268161574994210680427808, time:1750768230.2472036s req_ids:[8] -DEBUG 06-24 20:30:30 [manager.py:391] -ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:216.86410903930664ms total_cost_time:216.90773963928223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12344 prompt_cache_len:5151 prompt_cache_ratio:0.41728775113415423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 -DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:30 [batch.py:51] router release req id 8 -INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10759329795837402 s -INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.1086585521697998 s -DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=338907649277683956906523230870232238747, time:1750768230.4691293s req_ids:[8] -DEBUG 06-24 20:30:30 [manager.py:391] -ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:211.68875694274902ms total_cost_time:211.71283721923828ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:12345 prompt_cache_len:5151 prompt_cache_ratio:0.4172539489671932 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 -DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:30 [batch.py:51] router release req id 8 -INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10361886024475098 s -INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.10492300987243652 s -DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=261001364713037148241904907513870401650, time:1750768230.6861336s req_ids:[8] -DEBUG 06-24 20:30:30 [manager.py:391] -ERROR 06-24 20:30:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:216.43471717834473ms total_cost_time:216.45593643188477ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12346 prompt_cache_len:5151 prompt_cache_ratio:0.4172201522760408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 -DEBUG 06-24 20:30:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:30 [batch.py:51] router release req id 8 -INFO 06-24 20:30:30 [manager.py:224] router recive req id 8 cost time 0.10332202911376953 s -INFO 06-24 20:30:30 [manager.py:68] detokenization recv req id 8 cost time 0.1045830249786377 s -DEBUG 06-24 20:30:30 [manager.py:391] Prefill Batch: batch_id=177285919370903052506238714961177905800, time:1750768230.907219s req_ids:[8] -DEBUG 06-24 20:30:30 [manager.py:391] -ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:30 lightllm_req_id:8 first_token_cost:222.0780849456787ms total_cost_time:222.09835052490234ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12347 prompt_cache_len:5151 prompt_cache_ratio:0.41718636105936663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 -DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:31 [batch.py:51] router release req id 8 -INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10271596908569336 s -INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10384774208068848 s -DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=289365083330138975732603467309515462637, time:1750768231.1142542s req_ids:[8] -DEBUG 06-24 20:30:31 [manager.py:391] -ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:334.9034786224365ms total_cost_time:334.92350578308105ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:12348 prompt_cache_len:5151 prompt_cache_ratio:0.4171525753158406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 -DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:31 [batch.py:51] router release req id 8 -INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10408473014831543 s -INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10537362098693848 s -DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=337988136231583324214519889707238234349, time:1750768231.467587s req_ids:[8] -DEBUG 06-24 20:30:31 [manager.py:391] -ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:214.2794132232666ms total_cost_time:214.30230140686035ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:12349 prompt_cache_len:5151 prompt_cache_ratio:0.41711879504413313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 -DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:31 [batch.py:51] router release req id 8 -INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10551285743713379 s -INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10677957534790039 s -DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=65266063218336614979421059502422687484, time:1750768231.6832716s req_ids:[8] -DEBUG 06-24 20:30:31 [manager.py:391] -ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:188.73095512390137ms total_cost_time:188.7519359588623ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12350 prompt_cache_len:5151 prompt_cache_ratio:0.417085020242915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 -DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:31 [batch.py:51] router release req id 8 -INFO 06-24 20:30:31 [manager.py:224] router recive req id 8 cost time 0.10391736030578613 s -INFO 06-24 20:30:31 [manager.py:68] detokenization recv req id 8 cost time 0.10515713691711426 s -DEBUG 06-24 20:30:31 [manager.py:391] Prefill Batch: batch_id=152229579294515821701900622242121055818, time:1750768231.8749394s req_ids:[8] -DEBUG 06-24 20:30:31 [manager.py:391] -ERROR 06-24 20:30:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:209.93781089782715ms total_cost_time:209.95759963989258ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12351 prompt_cache_len:5151 prompt_cache_ratio:0.41705125091085743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 -DEBUG 06-24 20:30:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:31 [batch.py:51] router release req id 8 -INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.10301542282104492 s -INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.10418176651000977 s -DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=306317128709785822871030362901159817571, time:1750768232.0866063s req_ids:[8] -DEBUG 06-24 20:30:32 [manager.py:391] -ERROR 06-24 20:30:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:31 lightllm_req_id:8 first_token_cost:203.39202880859375ms total_cost_time:203.4132480621338ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12352 prompt_cache_len:5151 prompt_cache_ratio:0.4170174870466321 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 -DEBUG 06-24 20:30:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:32 [batch.py:51] router release req id 8 -INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.10312438011169434 s -INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.10429239273071289 s -DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=100911896635480136607813043007245329642, time:1750768232.295276s req_ids:[8] -DEBUG 06-24 20:30:32 [manager.py:391] -ERROR 06-24 20:30:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 first_token_cost:203.74369621276855ms total_cost_time:203.78899574279785ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12353 prompt_cache_len:5151 prompt_cache_ratio:0.4169837286489112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 -DEBUG 06-24 20:30:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:32 [batch.py:51] router release req id 8 -INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.10602736473083496 s -INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.1073141098022461 s -DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=282869368542816713727081358954775360489, time:1750768232.5012953s req_ids:[8] -DEBUG 06-24 20:30:32 [manager.py:391] -ERROR 06-24 20:30:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 first_token_cost:207.7333927154541ms total_cost_time:207.75461196899414ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12354 prompt_cache_len:5151 prompt_cache_ratio:0.41694997571636716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 -DEBUG 06-24 20:30:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:32 [batch.py:51] router release req id 8 -INFO 06-24 20:30:32 [manager.py:224] router recive req id 8 cost time 0.3062744140625 s -INFO 06-24 20:30:32 [manager.py:68] detokenization recv req id 8 cost time 0.3077511787414551 s -DEBUG 06-24 20:30:32 [manager.py:391] Prefill Batch: batch_id=198537307991279013509757405322646960959, time:1750768232.9232235s req_ids:[8] -DEBUG 06-24 20:30:32 [manager.py:391] -ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:32 lightllm_req_id:8 first_token_cost:428.6651611328125ms total_cost_time:428.68542671203613ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12355 prompt_cache_len:5151 prompt_cache_ratio:0.416916228247673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 -DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:33 [batch.py:51] router release req id 8 -INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10606837272644043 s -INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10723352432250977 s -DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=8190218448681302252991488561925840390, time:1750768233.1494133s req_ids:[8] -DEBUG 06-24 20:30:33 [manager.py:391] -ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:216.70842170715332ms total_cost_time:216.7530059814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12356 prompt_cache_len:5151 prompt_cache_ratio:0.4168824862415021 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 -DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:33 [batch.py:51] router release req id 8 -INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10499191284179688 s -INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10617876052856445 s -DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=308470909684577249286722433319794328234, time:1750768233.3678327s req_ids:[8] -DEBUG 06-24 20:30:33 [manager.py:391] -ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:214.31541442871094ms total_cost_time:214.35999870300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12357 prompt_cache_len:5151 prompt_cache_ratio:0.4168487496965283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 -DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:33 [batch.py:51] router release req id 8 -INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10573339462280273 s -INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10697412490844727 s -DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=63654941481730843458478830735348255155, time:1750768233.5872405s req_ids:[8] -DEBUG 06-24 20:30:33 [manager.py:391] -ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:214.46919441223145ms total_cost_time:214.51210975646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12358 prompt_cache_len:5151 prompt_cache_ratio:0.4168150186114258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 -DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:33 [batch.py:51] router release req id 8 -INFO 06-24 20:30:33 [manager.py:224] router recive req id 8 cost time 0.10632133483886719 s -INFO 06-24 20:30:33 [manager.py:68] detokenization recv req id 8 cost time 0.10740113258361816 s -DEBUG 06-24 20:30:33 [manager.py:391] Prefill Batch: batch_id=9011368820392217543164404922953405978, time:1750768233.8061125s req_ids:[8] -DEBUG 06-24 20:30:33 [manager.py:391] -ERROR 06-24 20:30:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:213.76299858093262ms total_cost_time:213.78540992736816ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:12359 prompt_cache_len:5151 prompt_cache_ratio:0.4167812929848693 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 -DEBUG 06-24 20:30:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:33 [batch.py:51] router release req id 8 -INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.10360836982727051 s -INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.10462737083435059 s -DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=90139554579142640498108624029452065721, time:1750768234.025219s req_ids:[8] -DEBUG 06-24 20:30:34 [manager.py:391] -ERROR 06-24 20:30:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:33 lightllm_req_id:8 first_token_cost:212.72611618041992ms total_cost_time:212.7695083618164ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12360 prompt_cache_len:5151 prompt_cache_ratio:0.416747572815534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 -DEBUG 06-24 20:30:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:34 [batch.py:51] router release req id 8 -INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.10592889785766602 s -INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.10702967643737793 s -DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=320220457744279792418772654605338968470, time:1750768234.2656755s req_ids:[8] -DEBUG 06-24 20:30:34 [manager.py:391] -ERROR 06-24 20:30:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 first_token_cost:436.0630512237549ms total_cost_time:436.107873916626ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12361 prompt_cache_len:5151 prompt_cache_ratio:0.4167138581020953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 -DEBUG 06-24 20:30:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:34 [batch.py:51] router release req id 8 -INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.1073911190032959 s -INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.10863041877746582 s -DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=205387999372103311414378260568559217514, time:1750768234.684641s req_ids:[8] -DEBUG 06-24 20:30:34 [manager.py:391] -ERROR 06-24 20:30:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 first_token_cost:218.66416931152344ms total_cost_time:218.68610382080078ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12362 prompt_cache_len:5151 prompt_cache_ratio:0.41668014884322924 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 -DEBUG 06-24 20:30:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:34 [batch.py:51] router release req id 8 -INFO 06-24 20:30:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:34 [manager.py:224] router recive req id 8 cost time 0.10439586639404297 s -INFO 06-24 20:30:34 [manager.py:68] detokenization recv req id 8 cost time 0.1054999828338623 s -DEBUG 06-24 20:30:34 [manager.py:391] Prefill Batch: batch_id=245275210252574526833533952848575797243, time:1750768234.9287624s req_ids:[8] -DEBUG 06-24 20:30:34 [manager.py:391] -ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:34 lightllm_req_id:8 first_token_cost:229.72893714904785ms total_cost_time:229.7499179840088ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12363 prompt_cache_len:5151 prompt_cache_ratio:0.4166464450376122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 -DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:35 [batch.py:51] router release req id 8 -INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.10576248168945312 s -INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.10697531700134277 s -DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=299427331904696112256235850327934306033, time:1750768235.1433938s req_ids:[8] -DEBUG 06-24 20:30:35 [manager.py:391] -ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:211.09652519226074ms total_cost_time:211.14134788513184ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12364 prompt_cache_len:5151 prompt_cache_ratio:0.41661274668392106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 -DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:35 [batch.py:51] router release req id 8 -INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.10757803916931152 s -INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.1088552474975586 s -DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=208634691544268811886730059796801350262, time:1750768235.3580174s req_ids:[8] -DEBUG 06-24 20:30:35 [manager.py:391] -ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:207.73959159851074ms total_cost_time:207.78393745422363ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12365 prompt_cache_len:5151 prompt_cache_ratio:0.416579053780833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 -DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:35 [batch.py:51] router release req id 8 -INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.1075277328491211 s -INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.10866117477416992 s -DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=298706225274550401283626875310686272409, time:1750768235.5722456s req_ids:[8] -DEBUG 06-24 20:30:35 [manager.py:391] -ERROR 06-24 20:30:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:209.6259593963623ms total_cost_time:209.6693515777588ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12366 prompt_cache_len:5151 prompt_cache_ratio:0.4165453663270257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 -DEBUG 06-24 20:30:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:35 [batch.py:51] router release req id 8 -INFO 06-24 20:30:35 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s -INFO 06-24 20:30:35 [manager.py:68] detokenization recv req id 8 cost time 0.10896873474121094 s -DEBUG 06-24 20:30:35 [manager.py:391] Prefill Batch: batch_id=137600441149220813638864710772597850762, time:1750768235.7880757s req_ids:[8] -DEBUG 06-24 20:30:35 [manager.py:391] -ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:35 lightllm_req_id:8 first_token_cost:388.40341567993164ms total_cost_time:388.4472846984863ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12367 prompt_cache_len:5151 prompt_cache_ratio:0.4165116843211773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 -DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:36 [batch.py:51] router release req id 8 -INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.10806465148925781 s -INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.10923576354980469 s -DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=81847973083828879770347336654938798229, time:1750768236.184393s req_ids:[8] -DEBUG 06-24 20:30:36 [manager.py:391] -DEBUG 06-24 20:30:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 51275.875 tokens/s -DEBUG 06-24 20:30:36 [stats.py:37] Avg prompt tokens throughput: 51267.571 tokens/s -DEBUG 06-24 20:30:36 [stats.py:37] Avg generate tokens throughput: 8.304 tokens/s -ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:207.80086517333984ms total_cost_time:207.84306526184082ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12368 prompt_cache_len:5151 prompt_cache_ratio:0.41647800776196636 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 -DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:36 [batch.py:51] router release req id 8 -INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.10672211647033691 s -INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.10782814025878906 s -DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=288476144712504447676342809205188021050, time:1750768236.4184856s req_ids:[8] -DEBUG 06-24 20:30:36 [manager.py:391] -ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:233.1368923187256ms total_cost_time:233.20317268371582ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:12369 prompt_cache_len:5151 prompt_cache_ratio:0.4164443366480718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 -DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:36 [batch.py:51] router release req id 8 -INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.11191463470458984 s -INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.11411190032958984 s -DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=28183811273717150005544274854243955198, time:1750768236.636406s req_ids:[8] -DEBUG 06-24 20:30:36 [manager.py:391] -ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:213.87052536010742ms total_cost_time:213.91725540161133ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12370 prompt_cache_len:5151 prompt_cache_ratio:0.416410670978173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 -DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:36 [batch.py:51] router release req id 8 -INFO 06-24 20:30:36 [manager.py:224] router recive req id 8 cost time 0.10965418815612793 s -INFO 06-24 20:30:36 [manager.py:68] detokenization recv req id 8 cost time 0.1116335391998291 s -DEBUG 06-24 20:30:36 [manager.py:391] Prefill Batch: batch_id=180572414323650452194351021888365398796, time:1750768236.8562768s req_ids:[8] -DEBUG 06-24 20:30:36 [manager.py:391] -ERROR 06-24 20:30:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:222.7783203125ms total_cost_time:222.8255271911621ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12371 prompt_cache_len:5151 prompt_cache_ratio:0.4163770107509498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 -DEBUG 06-24 20:30:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:36 [batch.py:51] router release req id 8 -INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.10724306106567383 s -INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.10928940773010254 s -DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=153165229990062536082565109022490064788, time:1750768237.0853343s req_ids:[8] -DEBUG 06-24 20:30:37 [manager.py:391] -ERROR 06-24 20:30:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:36 lightllm_req_id:8 first_token_cost:209.25569534301758ms total_cost_time:209.29932594299316ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12372 prompt_cache_len:5151 prompt_cache_ratio:0.41634335596508243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 -DEBUG 06-24 20:30:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:37 [batch.py:51] router release req id 8 -INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.10744094848632812 s -INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.1095132827758789 s -DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=44295151827756841104849175182254358371, time:1750768237.298769s req_ids:[8] -DEBUG 06-24 20:30:37 [manager.py:391] -ERROR 06-24 20:30:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 first_token_cost:224.1671085357666ms total_cost_time:224.2114543914795ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12373 prompt_cache_len:5151 prompt_cache_ratio:0.4163097066192516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 -DEBUG 06-24 20:30:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:37 [batch.py:51] router release req id 8 -INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.10984158515930176 s -INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.11179614067077637 s -DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=77709106296866416241723508400878984082, time:1750768237.5302694s req_ids:[8] -DEBUG 06-24 20:30:37 [manager.py:391] -ERROR 06-24 20:30:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 first_token_cost:397.17698097229004ms total_cost_time:397.22394943237305ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12374 prompt_cache_len:5151 prompt_cache_ratio:0.41627606271213835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 -DEBUG 06-24 20:30:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:37 [batch.py:51] router release req id 8 -INFO 06-24 20:30:37 [manager.py:224] router recive req id 8 cost time 0.1081078052520752 s -INFO 06-24 20:30:37 [manager.py:68] detokenization recv req id 8 cost time 0.11019062995910645 s -DEBUG 06-24 20:30:37 [manager.py:391] Prefill Batch: batch_id=236357821768055236541250965045651833025, time:1750768237.9399734s req_ids:[8] -DEBUG 06-24 20:30:37 [manager.py:391] -DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:37 lightllm_req_id:8 first_token_cost:232.19680786132812ms total_cost_time:232.24115371704102ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12375 prompt_cache_len:5151 prompt_cache_ratio:0.41624242424242425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 -DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:38 [batch.py:51] router release req id 8 -INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10889625549316406 s -INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.11083149909973145 s -DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=173037545317111123673230256039236075076, time:1750768238.1744602s req_ids:[8] -DEBUG 06-24 20:30:38 [manager.py:391] -ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:206.5291404724121ms total_cost_time:206.5744400024414ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12376 prompt_cache_len:5151 prompt_cache_ratio:0.41620879120879123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 -DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:38 [batch.py:51] router release req id 8 -INFO 06-24 20:30:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10878276824951172 s -INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.11086702346801758 s -DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=227180665237759584436180706520736692175, time:1750768238.3855667s req_ids:[8] -DEBUG 06-24 20:30:38 [manager.py:391] -ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:224.95317459106445ms total_cost_time:225.00014305114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12377 prompt_cache_len:5151 prompt_cache_ratio:0.41617516360992163 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 -DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:38 [batch.py:51] router release req id 8 -INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10823345184326172 s -INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.11024212837219238 s -DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=262686065833497363036027619225484525425, time:1750768238.6162245s req_ids:[8] -DEBUG 06-24 20:30:38 [manager.py:391] -ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:205.95312118530273ms total_cost_time:205.99818229675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12378 prompt_cache_len:5151 prompt_cache_ratio:0.41614154144449833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 -DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:38 [batch.py:51] router release req id 8 -INFO 06-24 20:30:38 [manager.py:224] router recive req id 8 cost time 0.10790491104125977 s -INFO 06-24 20:30:38 [manager.py:68] detokenization recv req id 8 cost time 0.10984134674072266 s -DEBUG 06-24 20:30:38 [manager.py:391] Prefill Batch: batch_id=132396989491180303618751575096153106756, time:1750768238.828684s req_ids:[8] -DEBUG 06-24 20:30:38 [manager.py:391] -ERROR 06-24 20:30:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:217.64659881591797ms total_cost_time:217.70596504211426ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:12379 prompt_cache_len:5151 prompt_cache_ratio:0.41610792471120445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 -DEBUG 06-24 20:30:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:38 [batch.py:51] router release req id 8 -INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10881567001342773 s -INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.11071538925170898 s -DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=144908386806746057462448044733643998590, time:1750768239.0532694s req_ids:[8] -DEBUG 06-24 20:30:39 [manager.py:391] -ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:38 lightllm_req_id:8 first_token_cost:205.59263229370117ms total_cost_time:205.65485954284668ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:12380 prompt_cache_len:5151 prompt_cache_ratio:0.41607431340872375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 -DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:39 [batch.py:51] router release req id 8 -INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10609555244445801 s -INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.10787677764892578 s -DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=127969320652302352167934815481669424845, time:1750768239.2647762s req_ids:[8] -DEBUG 06-24 20:30:39 [manager.py:391] -ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:169.10457611083984ms total_cost_time:169.14749145507812ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12381 prompt_cache_len:5151 prompt_cache_ratio:0.41604070753574024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 -DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:39 [batch.py:51] router release req id 8 -INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10723209381103516 s -INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s -DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=62925540422142650555947171552577332489, time:1750768239.4405591s req_ids:[8] -DEBUG 06-24 20:30:39 [manager.py:391] -ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:380.62024116516113ms total_cost_time:380.6648254394531ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12382 prompt_cache_len:5151 prompt_cache_ratio:0.41600710709093847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 -DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:39 [batch.py:51] router release req id 8 -INFO 06-24 20:30:39 [manager.py:224] router recive req id 8 cost time 0.10865497589111328 s -INFO 06-24 20:30:39 [manager.py:68] detokenization recv req id 8 cost time 0.11052608489990234 s -DEBUG 06-24 20:30:39 [manager.py:391] Prefill Batch: batch_id=275353133760815678107014535410645894741, time:1750768239.8274662s req_ids:[8] -DEBUG 06-24 20:30:39 [manager.py:391] -ERROR 06-24 20:30:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:209.15460586547852ms total_cost_time:209.21564102172852ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12383 prompt_cache_len:5151 prompt_cache_ratio:0.4159735120730033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 -DEBUG 06-24 20:30:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:39 [batch.py:51] router release req id 8 -INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10993385314941406 s -INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.11185526847839355 s -DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=73968033608722786198843102166076255361, time:1750768240.0470455s req_ids:[8] -DEBUG 06-24 20:30:40 [manager.py:391] -ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:39 lightllm_req_id:8 first_token_cost:214.19167518615723ms total_cost_time:214.23745155334473ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12384 prompt_cache_len:5151 prompt_cache_ratio:0.4159399224806202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 -DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:40 [batch.py:51] router release req id 8 -INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.1079566478729248 s -INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.10985827445983887 s -DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=257056328959941863730615964363766608010, time:1750768240.2619216s req_ids:[8] -DEBUG 06-24 20:30:40 [manager.py:391] -ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:205.60002326965332ms total_cost_time:205.66248893737793ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12385 prompt_cache_len:5151 prompt_cache_ratio:0.41590633831247475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 -DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:40 [batch.py:51] router release req id 8 -INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10840034484863281 s -INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s -DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=261890870281898470008089117178608868160, time:1750768240.4783971s req_ids:[8] -DEBUG 06-24 20:30:40 [manager.py:391] -ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:226.69124603271484ms total_cost_time:226.75204277038574ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12386 prompt_cache_len:5151 prompt_cache_ratio:0.41587275956725334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 -DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:40 [batch.py:51] router release req id 8 -INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10736274719238281 s -INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.10920476913452148 s -DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=180963687478566458771244654659720782819, time:1750768240.7192714s req_ids:[8] -DEBUG 06-24 20:30:40 [manager.py:391] -ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:179.8539161682129ms total_cost_time:179.89850044250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12387 prompt_cache_len:5151 prompt_cache_ratio:0.4158391862436425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 -DEBUG 06-24 20:30:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:40 [batch.py:51] router release req id 8 -INFO 06-24 20:30:40 [manager.py:224] router recive req id 8 cost time 0.10738921165466309 s -INFO 06-24 20:30:40 [manager.py:68] detokenization recv req id 8 cost time 0.10940718650817871 s -DEBUG 06-24 20:30:40 [manager.py:391] Prefill Batch: batch_id=236168828180638463261332425787617481177, time:1750768240.8926275s req_ids:[8] -DEBUG 06-24 20:30:40 [manager.py:391] -ERROR 06-24 20:30:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:204.66113090515137ms total_cost_time:204.70452308654785ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12388 prompt_cache_len:5151 prompt_cache_ratio:0.41580561834032936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 -DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:41 [batch.py:51] router release req id 8 -INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10950040817260742 s -INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.11145615577697754 s -DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=120626538080941431071783547492119207052, time:1750768241.1037586s req_ids:[8] -DEBUG 06-24 20:30:41 [manager.py:391] -ERROR 06-24 20:30:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:40 lightllm_req_id:8 first_token_cost:380.08999824523926ms total_cost_time:380.13553619384766ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12389 prompt_cache_len:5151 prompt_cache_ratio:0.4157720558560013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 -DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:41 [batch.py:51] router release req id 8 -INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s -INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.11058330535888672 s -DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=110020199165629193763493888133133459669, time:1750768241.4900498s req_ids:[8] -DEBUG 06-24 20:30:41 [manager.py:391] -ERROR 06-24 20:30:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 first_token_cost:209.8689079284668ms total_cost_time:209.9130153656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12390 prompt_cache_len:5151 prompt_cache_ratio:0.41573849878934627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 -DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:41 [batch.py:51] router release req id 8 -INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10819125175476074 s -INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.1100473403930664 s -DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=65938755141866541824212237420187829454, time:1750768241.7069113s req_ids:[8] -DEBUG 06-24 20:30:41 [manager.py:391] -ERROR 06-24 20:30:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 first_token_cost:199.39422607421875ms total_cost_time:199.43737983703613ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12391 prompt_cache_len:5151 prompt_cache_ratio:0.41570494713905254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 -DEBUG 06-24 20:30:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:41 [batch.py:51] router release req id 8 -INFO 06-24 20:30:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:41 [manager.py:224] router recive req id 8 cost time 0.10864090919494629 s -INFO 06-24 20:30:41 [manager.py:68] detokenization recv req id 8 cost time 0.11060976982116699 s -DEBUG 06-24 20:30:41 [manager.py:391] Prefill Batch: batch_id=210897852412438953906054543254678154155, time:1750768241.9128823s req_ids:[8] -DEBUG 06-24 20:30:41 [manager.py:391] -ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:41 lightllm_req_id:8 first_token_cost:206.51674270629883ms total_cost_time:206.56251907348633ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12392 prompt_cache_len:5151 prompt_cache_ratio:0.4156714009038089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 -DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:42 [batch.py:51] router release req id 8 -INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.10822772979736328 s -INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.11031532287597656 s -DEBUG 06-24 20:30:42 [manager.py:391] Prefill Batch: batch_id=124411267878287064489663481005004225584, time:1750768242.124851s req_ids:[8] -DEBUG 06-24 20:30:42 [manager.py:391] -ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:210.93368530273438ms total_cost_time:210.97898483276367ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12393 prompt_cache_len:5151 prompt_cache_ratio:0.4156378600823045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 -DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:42 [batch.py:51] router release req id 8 -INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.1077413558959961 s -INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.10968327522277832 s -DEBUG 06-24 20:30:42 [manager.py:391] Prefill Batch: batch_id=149873082516063976852542145395832716985, time:1750768242.3417823s req_ids:[8] -DEBUG 06-24 20:30:42 [manager.py:391] -ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:217.59700775146484ms total_cost_time:217.64111518859863ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12394 prompt_cache_len:5151 prompt_cache_ratio:0.415604324673229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 -DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:42 [batch.py:51] router release req id 8 -INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.10873270034790039 s -INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.1102757453918457 s -DEBUG 06-24 20:30:42 [manager.py:391] Prefill Batch: batch_id=42779543406357055263280614720480529403, time:1750768242.5686698s req_ids:[8] -DEBUG 06-24 20:30:42 [manager.py:391] -ERROR 06-24 20:30:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:209.22231674194336ms total_cost_time:209.26761627197266ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12395 prompt_cache_len:5151 prompt_cache_ratio:0.4155707946752723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 -DEBUG 06-24 20:30:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:42 [batch.py:51] router release req id 8 -INFO 06-24 20:30:42 [manager.py:224] router recive req id 8 cost time 0.3112678527832031 s -INFO 06-24 20:30:42 [manager.py:68] detokenization recv req id 8 cost time 0.3134334087371826 s -DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=10759551587808412460960988926643521750, time:1750768243.0040483s req_ids:[8] -DEBUG 06-24 20:30:43 [manager.py:391] -ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:42 lightllm_req_id:8 first_token_cost:438.673734664917ms total_cost_time:438.7032985687256ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:12396 prompt_cache_len:5151 prompt_cache_ratio:0.4155372700871249 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 -DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:43 [batch.py:51] router release req id 8 -INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.10658693313598633 s -INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.10851097106933594 s -DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=63276058981788484916974324733299827600, time:1750768243.2296104s req_ids:[8] -DEBUG 06-24 20:30:43 [manager.py:391] -ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:217.41604804992676ms total_cost_time:217.44394302368164ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12397 prompt_cache_len:5151 prompt_cache_ratio:0.4155037509074776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 -DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:43 [batch.py:51] router release req id 8 -INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.1067495346069336 s -INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.1086118221282959 s -DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=55098871330572320890231241812078703423, time:1750768243.4618495s req_ids:[8] -DEBUG 06-24 20:30:43 [manager.py:391] -ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:220.95155715942383ms total_cost_time:220.9937572479248ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12398 prompt_cache_len:5151 prompt_cache_ratio:0.4154702371350218 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 -DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:43 [batch.py:51] router release req id 8 -INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.10846567153930664 s -INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s -DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=161754550453038845847990137693515122418, time:1750768243.6758664s req_ids:[8] -DEBUG 06-24 20:30:43 [manager.py:391] -ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:207.34167098999023ms total_cost_time:207.37051963806152ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:12399 prompt_cache_len:5151 prompt_cache_ratio:0.41543672876844906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 -DEBUG 06-24 20:30:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:43 [batch.py:51] router release req id 8 -INFO 06-24 20:30:43 [manager.py:224] router recive req id 8 cost time 0.1051476001739502 s -INFO 06-24 20:30:43 [manager.py:68] detokenization recv req id 8 cost time 0.10701179504394531 s -DEBUG 06-24 20:30:43 [manager.py:391] Prefill Batch: batch_id=28805820990165881385321090527940094040, time:1750768243.8948019s req_ids:[8] -DEBUG 06-24 20:30:43 [manager.py:391] -ERROR 06-24 20:30:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:43 lightllm_req_id:8 first_token_cost:224.17902946472168ms total_cost_time:224.20692443847656ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12400 prompt_cache_len:5151 prompt_cache_ratio:0.4154032258064516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 -DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:44 [batch.py:51] router release req id 8 -INFO 06-24 20:30:44 [manager.py:224] router recive req id 8 cost time 0.10650944709777832 s -INFO 06-24 20:30:44 [manager.py:68] detokenization recv req id 8 cost time 0.10841250419616699 s -DEBUG 06-24 20:30:44 [manager.py:391] Prefill Batch: batch_id=243036985258189450962206370568579713789, time:1750768244.1334283s req_ids:[8] -DEBUG 06-24 20:30:44 [manager.py:391] -ERROR 06-24 20:30:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:230.82637786865234ms total_cost_time:230.87048530578613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12401 prompt_cache_len:5151 prompt_cache_ratio:0.41536972824772195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 -DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:44 [batch.py:51] router release req id 8 -INFO 06-24 20:30:44 [manager.py:224] router recive req id 8 cost time 0.30990171432495117 s -INFO 06-24 20:30:44 [manager.py:68] detokenization recv req id 8 cost time 0.3119776248931885 s -DEBUG 06-24 20:30:44 [manager.py:391] Prefill Batch: batch_id=168131587641461640583884149406272044847, time:1750768244.5673828s req_ids:[8] -DEBUG 06-24 20:30:44 [manager.py:391] -ERROR 06-24 20:30:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:430.31978607177734ms total_cost_time:430.36389350891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12402 prompt_cache_len:5151 prompt_cache_ratio:0.41533623609095305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 -DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:44 [batch.py:51] router release req id 8 -INFO 06-24 20:30:44 [manager.py:224] router recive req id 8 cost time 0.10913801193237305 s -INFO 06-24 20:30:44 [manager.py:68] detokenization recv req id 8 cost time 0.11108899116516113 s -DEBUG 06-24 20:30:44 [manager.py:391] Prefill Batch: batch_id=55252438555026391132161758853269804957, time:1750768244.7916949s req_ids:[8] -DEBUG 06-24 20:30:44 [manager.py:391] -ERROR 06-24 20:30:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:216.32957458496094ms total_cost_time:216.3827419281006ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12403 prompt_cache_len:5151 prompt_cache_ratio:0.41530274933483835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 -DEBUG 06-24 20:30:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:44 [batch.py:51] router release req id 8 -INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.1076650619506836 s -INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10976648330688477 s -DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=329196008846586408128785990914723288890, time:1750768245.0117621s req_ids:[8] -DEBUG 06-24 20:30:45 [manager.py:391] -ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:44 lightllm_req_id:8 first_token_cost:210.557222366333ms total_cost_time:210.5865478515625ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:12404 prompt_cache_len:5151 prompt_cache_ratio:0.41526926797807157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 -DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:45 [batch.py:51] router release req id 8 -INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.10683393478393555 s -INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10891222953796387 s -DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=222756538951228158458806087948864317506, time:1750768245.2326164s req_ids:[8] -DEBUG 06-24 20:30:45 [manager.py:391] -ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:212.34416961669922ms total_cost_time:212.40592002868652ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:12405 prompt_cache_len:5151 prompt_cache_ratio:0.415235792019347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 -DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:45 [batch.py:51] router release req id 8 -INFO 06-24 20:30:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.1065816879272461 s -INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10854315757751465 s -DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=198286134788743756410891419174621468611, time:1750768245.449496s req_ids:[8] -DEBUG 06-24 20:30:45 [manager.py:391] -ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:211.55166625976562ms total_cost_time:211.5805149078369ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:12406 prompt_cache_len:5151 prompt_cache_ratio:0.41520232145735936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 -DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:45 [batch.py:51] router release req id 8 -INFO 06-24 20:30:45 [manager.py:224] router recive req id 8 cost time 0.10601353645324707 s -INFO 06-24 20:30:45 [manager.py:68] detokenization recv req id 8 cost time 0.10806822776794434 s -DEBUG 06-24 20:30:45 [manager.py:391] Prefill Batch: batch_id=180145663125960373097703813473687933942, time:1750768245.6680968s req_ids:[8] -DEBUG 06-24 20:30:45 [manager.py:391] -ERROR 06-24 20:30:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:375.654935836792ms total_cost_time:375.7014274597168ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12407 prompt_cache_len:5151 prompt_cache_ratio:0.4151688562908036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 -DEBUG 06-24 20:30:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:45 [batch.py:51] router release req id 8 -INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.1073904037475586 s -INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10938024520874023 s -DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=73372549880568580573604827578107182624, time:1750768246.0493965s req_ids:[8] -DEBUG 06-24 20:30:46 [manager.py:391] -ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:45 lightllm_req_id:8 first_token_cost:214.56217765808105ms total_cost_time:214.58840370178223ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12408 prompt_cache_len:5151 prompt_cache_ratio:0.4151353965183752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 -DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:46 [batch.py:51] router release req id 8 -INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10620713233947754 s -INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10832738876342773 s -DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=197839674640486488135865539723351628993, time:1750768246.2711918s req_ids:[8] -DEBUG 06-24 20:30:46 [manager.py:391] -DEBUG 06-24 20:30:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 50366.272 tokens/s -DEBUG 06-24 20:30:46 [stats.py:37] Avg prompt tokens throughput: 50358.143 tokens/s -DEBUG 06-24 20:30:46 [stats.py:37] Avg generate tokens throughput: 8.129 tokens/s -ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:216.7351245880127ms total_cost_time:216.75562858581543ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12409 prompt_cache_len:5151 prompt_cache_ratio:0.4151019421387703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 -DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:46 [batch.py:51] router release req id 8 -INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10787439346313477 s -INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10999107360839844 s -DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=303824174823379533974414946467445820060, time:1750768246.4922338s req_ids:[8] -DEBUG 06-24 20:30:46 [manager.py:391] -ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:213.24396133422852ms total_cost_time:213.2887840270996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12410 prompt_cache_len:5151 prompt_cache_ratio:0.41506849315068495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 -DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:46 [batch.py:51] router release req id 8 -INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s -INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10998845100402832 s -DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=27285384022431238117225281410687554845, time:1750768246.711126s req_ids:[8] -DEBUG 06-24 20:30:46 [manager.py:391] -ERROR 06-24 20:30:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:210.7105255126953ms total_cost_time:210.73579788208008ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:12411 prompt_cache_len:5151 prompt_cache_ratio:0.41503504955281606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 -DEBUG 06-24 20:30:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:46 [batch.py:51] router release req id 8 -INFO 06-24 20:30:46 [manager.py:224] router recive req id 8 cost time 0.10684871673583984 s -INFO 06-24 20:30:46 [manager.py:68] detokenization recv req id 8 cost time 0.10884428024291992 s -DEBUG 06-24 20:30:46 [manager.py:391] Prefill Batch: batch_id=212822754960327124933689600490084433479, time:1750768246.9311352s req_ids:[8] -DEBUG 06-24 20:30:46 [manager.py:391] -DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:46 lightllm_req_id:8 first_token_cost:212.34369277954102ms total_cost_time:212.36872673034668ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:12412 prompt_cache_len:5151 prompt_cache_ratio:0.41500161134386077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 -DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:47 [batch.py:51] router release req id 8 -INFO 06-24 20:30:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10586118698120117 s -INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10785293579101562 s -DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=170469524889344175611430581420731266800, time:1750768247.1467924s req_ids:[8] -DEBUG 06-24 20:30:47 [manager.py:391] -ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:378.31568717956543ms total_cost_time:378.3597946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12413 prompt_cache_len:5151 prompt_cache_ratio:0.4149681785225167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 -DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:47 [batch.py:51] router release req id 8 -INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10698509216308594 s -INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10900115966796875 s -DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=322008835789722919910529588966293359657, time:1750768247.5312066s req_ids:[8] -DEBUG 06-24 20:30:47 [manager.py:391] -ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:216.0942554473877ms total_cost_time:216.11857414245605ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12414 prompt_cache_len:5151 prompt_cache_ratio:0.4149347510874819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 -DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:47 [batch.py:51] router release req id 8 -INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10652637481689453 s -INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10841965675354004 s -DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=250707333723645332776176872619605545177, time:1750768247.752067s req_ids:[8] -DEBUG 06-24 20:30:47 [manager.py:391] -ERROR 06-24 20:30:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:215.04497528076172ms total_cost_time:215.0704860687256ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12415 prompt_cache_len:5151 prompt_cache_ratio:0.4149013290374547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 -DEBUG 06-24 20:30:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:47 [batch.py:51] router release req id 8 -INFO 06-24 20:30:47 [manager.py:224] router recive req id 8 cost time 0.10735011100769043 s -INFO 06-24 20:30:47 [manager.py:68] detokenization recv req id 8 cost time 0.10939621925354004 s -DEBUG 06-24 20:30:47 [manager.py:391] Prefill Batch: batch_id=315843729027809932568577697666010751441, time:1750768247.9725795s req_ids:[8] -DEBUG 06-24 20:30:47 [manager.py:391] -ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:47 lightllm_req_id:8 first_token_cost:211.15684509277344ms total_cost_time:211.1835479736328ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:12416 prompt_cache_len:5151 prompt_cache_ratio:0.414867912371134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 -DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:48 [batch.py:51] router release req id 8 -INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10600900650024414 s -INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.10826849937438965 s -DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=264249146717129752881221445046787960385, time:1750768248.1897984s req_ids:[8] -DEBUG 06-24 20:30:48 [manager.py:391] -ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:211.12656593322754ms total_cost_time:211.1530303955078ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:12417 prompt_cache_len:5151 prompt_cache_ratio:0.41483450108721914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 -DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:48 [batch.py:51] router release req id 8 -INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10802960395812988 s -INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.10995149612426758 s -DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=153399232804048404410135696227341055770, time:1750768248.413649s req_ids:[8] -DEBUG 06-24 20:30:48 [manager.py:391] -ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:216.76278114318848ms total_cost_time:216.78900718688965ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12418 prompt_cache_len:5151 prompt_cache_ratio:0.4148010951844097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 -DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:48 [batch.py:51] router release req id 8 -INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10686731338500977 s -INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.1089789867401123 s -DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=136553845076708875648177866334516880661, time:1750768248.629487s req_ids:[8] -DEBUG 06-24 20:30:48 [manager.py:391] -ERROR 06-24 20:30:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:215.96002578735352ms total_cost_time:215.98482131958008ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12419 prompt_cache_len:5151 prompt_cache_ratio:0.4147676946614059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 -DEBUG 06-24 20:30:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:48 [batch.py:51] router release req id 8 -INFO 06-24 20:30:48 [manager.py:224] router recive req id 8 cost time 0.10718154907226562 s -INFO 06-24 20:30:48 [manager.py:68] detokenization recv req id 8 cost time 0.10900235176086426 s -DEBUG 06-24 20:30:48 [manager.py:391] Prefill Batch: batch_id=135000725457591029293110401028152904900, time:1750768248.8498118s req_ids:[8] -DEBUG 06-24 20:30:48 [manager.py:391] -ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:48 lightllm_req_id:8 first_token_cost:389.845609664917ms total_cost_time:389.86682891845703ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:12420 prompt_cache_len:5151 prompt_cache_ratio:0.4147342995169082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 -DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:49 [batch.py:51] router release req id 8 -INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10668230056762695 s -INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10815310478210449 s -DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=216706761507401565380923230029035613989, time:1750768249.2456555s req_ids:[8] -DEBUG 06-24 20:30:49 [manager.py:391] -ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:215.48771858215332ms total_cost_time:215.5132293701172ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12421 prompt_cache_len:5151 prompt_cache_ratio:0.4147009097496176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 -DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:49 [batch.py:51] router release req id 8 -INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10638618469238281 s -INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10836172103881836 s -DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=316488262775581693765581093426104177885, time:1750768249.464567s req_ids:[8] -DEBUG 06-24 20:30:49 [manager.py:391] -ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:212.26143836975098ms total_cost_time:212.28694915771484ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:12422 prompt_cache_len:5151 prompt_cache_ratio:0.41466752535823537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 -DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:49 [batch.py:51] router release req id 8 -INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10631036758422852 s -INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10805130004882812 s -DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=62359354607109446271810233168689849528, time:1750768249.6812391s req_ids:[8] -DEBUG 06-24 20:30:49 [manager.py:391] -ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:171.7050075531006ms total_cost_time:171.73099517822266ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12423 prompt_cache_len:5151 prompt_cache_ratio:0.4146341463414634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 -DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:49 [batch.py:51] router release req id 8 -INFO 06-24 20:30:49 [manager.py:224] router recive req id 8 cost time 0.10665440559387207 s -INFO 06-24 20:30:49 [manager.py:68] detokenization recv req id 8 cost time 0.10858726501464844 s -DEBUG 06-24 20:30:49 [manager.py:391] Prefill Batch: batch_id=15801549039811831264668260924930652050, time:1750768249.8589277s req_ids:[8] -DEBUG 06-24 20:30:49 [manager.py:391] -ERROR 06-24 20:30:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:205.63745498657227ms total_cost_time:205.66248893737793ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:12424 prompt_cache_len:5151 prompt_cache_ratio:0.4146007726980039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 -DEBUG 06-24 20:30:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:49 [batch.py:51] router release req id 8 -INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10552072525024414 s -INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.10753917694091797 s -DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=17090920995912503692514837986536188180, time:1750768250.0677605s req_ids:[8] -DEBUG 06-24 20:30:50 [manager.py:391] -ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:49 lightllm_req_id:8 first_token_cost:211.3204002380371ms total_cost_time:211.3656997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12425 prompt_cache_len:5151 prompt_cache_ratio:0.41456740442655937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 -DEBUG 06-24 20:30:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:50 [batch.py:51] router release req id 8 -INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10870242118835449 s -INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.11068415641784668 s -DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=108988323394701074706965042620812931970, time:1750768250.2834623s req_ids:[8] -DEBUG 06-24 20:30:50 [manager.py:391] -ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:205.55830001831055ms total_cost_time:205.61623573303223ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:12426 prompt_cache_len:5151 prompt_cache_ratio:0.41453404152583295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 -DEBUG 06-24 20:30:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:50 [batch.py:51] router release req id 8 -INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10912227630615234 s -INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.11104416847229004 s -DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=94412808368673861688587777379546520535, time:1750768250.4959133s req_ids:[8] -DEBUG 06-24 20:30:50 [manager.py:391] -ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:372.9138374328613ms total_cost_time:372.95985221862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12427 prompt_cache_len:5151 prompt_cache_ratio:0.41450068399452805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 -DEBUG 06-24 20:30:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:50 [batch.py:51] router release req id 8 -INFO 06-24 20:30:50 [manager.py:224] router recive req id 8 cost time 0.10796880722045898 s -INFO 06-24 20:30:50 [manager.py:68] detokenization recv req id 8 cost time 0.10993766784667969 s -DEBUG 06-24 20:30:50 [manager.py:391] Prefill Batch: batch_id=59890976463626219263848214727335376126, time:1750768250.8831663s req_ids:[8] -DEBUG 06-24 20:30:50 [manager.py:391] -ERROR 06-24 20:30:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:222.66602516174316ms total_cost_time:222.71156311035156ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12428 prompt_cache_len:5151 prompt_cache_ratio:0.4144673318313486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 -DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:51 [batch.py:51] router release req id 8 -INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.10866737365722656 s -INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11076903343200684 s -DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=137955553706391832181224902759114507482, time:1750768251.104259s req_ids:[8] -DEBUG 06-24 20:30:51 [manager.py:391] -ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:50 lightllm_req_id:8 first_token_cost:208.94932746887207ms total_cost_time:208.99367332458496ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12429 prompt_cache_len:5151 prompt_cache_ratio:0.4144339850349988 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 -DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:51 [batch.py:51] router release req id 8 -INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.10726714134216309 s -INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.10960936546325684 s -DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=168862639791533896737272550085581433719, time:1750768251.319687s req_ids:[8] -DEBUG 06-24 20:30:51 [manager.py:391] -ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:207.29804039001465ms total_cost_time:207.34429359436035ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12430 prompt_cache_len:5151 prompt_cache_ratio:0.41440064360418344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 -DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:51 [batch.py:51] router release req id 8 -INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.1087045669555664 s -INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11098599433898926 s -DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=310277880230842525860281971646576196544, time:1750768251.533921s req_ids:[8] -DEBUG 06-24 20:30:51 [manager.py:391] -ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:210.79468727111816ms total_cost_time:210.82687377929688ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:12431 prompt_cache_len:5151 prompt_cache_ratio:0.4143673075376076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 -DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:51 [batch.py:51] router release req id 8 -INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.10781288146972656 s -INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000418663024902 s -DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=19869017209201943601350144444009316020, time:1750768251.7535386s req_ids:[8] -DEBUG 06-24 20:30:51 [manager.py:391] -ERROR 06-24 20:30:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:213.62662315368652ms total_cost_time:213.67168426513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12432 prompt_cache_len:5151 prompt_cache_ratio:0.4143339768339768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 -DEBUG 06-24 20:30:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:51 [batch.py:51] router release req id 8 -INFO 06-24 20:30:51 [manager.py:224] router recive req id 8 cost time 0.108795166015625 s -INFO 06-24 20:30:51 [manager.py:68] detokenization recv req id 8 cost time 0.11119890213012695 s -DEBUG 06-24 20:30:51 [manager.py:391] Prefill Batch: batch_id=206350130390277966876210286803694310395, time:1750768251.971545s req_ids:[8] -DEBUG 06-24 20:30:51 [manager.py:391] -ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:51 lightllm_req_id:8 first_token_cost:424.85809326171875ms total_cost_time:424.90291595458984ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12433 prompt_cache_len:5151 prompt_cache_ratio:0.41430065149199713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 -DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:52 [batch.py:51] router release req id 8 -INFO 06-24 20:30:52 [manager.py:224] router recive req id 8 cost time 0.10709285736083984 s -INFO 06-24 20:30:52 [manager.py:68] detokenization recv req id 8 cost time 0.10895609855651855 s -DEBUG 06-24 20:30:52 [manager.py:391] Prefill Batch: batch_id=105076168867210197287992874263543637594, time:1750768252.4010627s req_ids:[8] -DEBUG 06-24 20:30:52 [manager.py:391] -ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:186.0647201538086ms total_cost_time:186.10835075378418ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12434 prompt_cache_len:5151 prompt_cache_ratio:0.41426733151037476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 -DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:52 [batch.py:51] router release req id 8 -INFO 06-24 20:30:52 [manager.py:224] router recive req id 8 cost time 0.10860061645507812 s -INFO 06-24 20:30:52 [manager.py:68] detokenization recv req id 8 cost time 0.1105356216430664 s -DEBUG 06-24 20:30:52 [manager.py:391] Prefill Batch: batch_id=246169260518551535103012322927546634563, time:1750768252.6010728s req_ids:[8] -DEBUG 06-24 20:30:52 [manager.py:391] -ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:211.34543418884277ms total_cost_time:211.39073371887207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12435 prompt_cache_len:5151 prompt_cache_ratio:0.41423401688781664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 -DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:52 [batch.py:51] router release req id 8 -INFO 06-24 20:30:52 [manager.py:224] router recive req id 8 cost time 0.10873675346374512 s -INFO 06-24 20:30:52 [manager.py:68] detokenization recv req id 8 cost time 0.11073446273803711 s -DEBUG 06-24 20:30:52 [manager.py:391] Prefill Batch: batch_id=285354833993391727964296131033507397623, time:1750768252.8356729s req_ids:[8] -DEBUG 06-24 20:30:52 [manager.py:391] -ERROR 06-24 20:30:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:231.37879371643066ms total_cost_time:231.42600059509277ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12436 prompt_cache_len:5151 prompt_cache_ratio:0.4142007076230299 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 -DEBUG 06-24 20:30:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:52 [batch.py:51] router release req id 8 -INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10920858383178711 s -INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.11125969886779785 s -DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=292811820465889058451570500480640416304, time:1750768253.0572267s req_ids:[8] -DEBUG 06-24 20:30:53 [manager.py:391] -DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:52 lightllm_req_id:8 first_token_cost:208.1892490386963ms total_cost_time:208.23359489440918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12437 prompt_cache_len:5151 prompt_cache_ratio:0.4141674037147222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 -DEBUG 06-24 20:30:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:53 [batch.py:51] router release req id 8 -INFO 06-24 20:30:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:30:53 [statics_utils.py:24] mean first cost: 229.48896463201245 ms -INFO 06-24 20:30:53 [statics_utils.py:24] mean per token cost: 0.06147854232694194 ms -INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10796952247619629 s -INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.10895133018493652 s -DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=153901125562153148640576937610769178711, time:1750768253.2826965s req_ids:[8] -DEBUG 06-24 20:30:53 [manager.py:391] -INFO 06-24 20:30:53 [manager.py:620] left req id 8can release False refcount 4 -ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:224.36952590942383ms total_cost_time:224.41506385803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12438 prompt_cache_len:5151 prompt_cache_ratio:0.41413410516160154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 -DEBUG 06-24 20:30:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:53 [batch.py:51] router release req id 8 -INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10917854309082031 s -INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.11120939254760742 s -DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=2977416372958569687901322992556419180, time:1750768253.5016842s req_ids:[8] -DEBUG 06-24 20:30:53 [manager.py:391] -ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:380.01370429992676ms total_cost_time:380.05924224853516ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12439 prompt_cache_len:5151 prompt_cache_ratio:0.4141008119623764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 -DEBUG 06-24 20:30:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:53 [batch.py:51] router release req id 8 -INFO 06-24 20:30:53 [manager.py:224] router recive req id 8 cost time 0.10558843612670898 s -INFO 06-24 20:30:53 [manager.py:68] detokenization recv req id 8 cost time 0.10747694969177246 s -DEBUG 06-24 20:30:53 [manager.py:391] Prefill Batch: batch_id=88302537858980346026392574819039396894, time:1750768253.8901713s req_ids:[8] -DEBUG 06-24 20:30:53 [manager.py:391] -ERROR 06-24 20:30:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:215.38829803466797ms total_cost_time:215.43264389038086ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12440 prompt_cache_len:5151 prompt_cache_ratio:0.41406752411575565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 -DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:54 [batch.py:51] router release req id 8 -INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10879325866699219 s -INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10985255241394043 s -DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=104349589476044304526519769986205150134, time:1750768254.1110094s req_ids:[8] -DEBUG 06-24 20:30:54 [manager.py:391] -ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:53 lightllm_req_id:8 first_token_cost:214.13922309875488ms total_cost_time:214.16115760803223ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12441 prompt_cache_len:5151 prompt_cache_ratio:0.4140342416204485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 -DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:54 [batch.py:51] router release req id 8 -INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10630059242248535 s -INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10735154151916504 s -DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=278766359291833571942018372099257772224, time:1750768254.3333023s req_ids:[8] -DEBUG 06-24 20:30:54 [manager.py:391] -ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:216.5226936340332ms total_cost_time:216.57323837280273ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:12442 prompt_cache_len:5151 prompt_cache_ratio:0.41400096447516477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 -DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:54 [batch.py:51] router release req id 8 -INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10769820213317871 s -INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10872435569763184 s -DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=32780791958348762896703829874477001705, time:1750768254.5549147s req_ids:[8] -DEBUG 06-24 20:30:54 [manager.py:391] -ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:209.75494384765625ms total_cost_time:209.8388671875ms,out_token_counter:1 mean_per_token_cost_time: 0.08392333984375ms prompt_token_num:12443 prompt_cache_len:5151 prompt_cache_ratio:0.4139676926786145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 -DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:54 [batch.py:51] router release req id 8 -INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.10804605484008789 s -INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s -DEBUG 06-24 20:30:54 [manager.py:391] Prefill Batch: batch_id=271498013673586120717850864451052667245, time:1750768254.7698402s req_ids:[8] -DEBUG 06-24 20:30:54 [manager.py:391] -ERROR 06-24 20:30:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:213.47594261169434ms total_cost_time:213.53554725646973ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:12444 prompt_cache_len:5151 prompt_cache_ratio:0.4139344262295082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 -DEBUG 06-24 20:30:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:54 [batch.py:51] router release req id 8 -INFO 06-24 20:30:54 [manager.py:224] router recive req id 8 cost time 0.1078495979309082 s -INFO 06-24 20:30:54 [manager.py:68] detokenization recv req id 8 cost time 0.10976743698120117 s -DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=70788377027025119580891402998515300081, time:1750768255.0028844s req_ids:[8] -DEBUG 06-24 20:30:55 [manager.py:391] -ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:54 lightllm_req_id:8 first_token_cost:228.3174991607666ms total_cost_time:228.3637523651123ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12445 prompt_cache_len:5151 prompt_cache_ratio:0.41390116512655684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 -DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:55 [batch.py:51] router release req id 8 -INFO 06-24 20:30:55 [manager.py:224] router recive req id 8 cost time 0.10842204093933105 s -INFO 06-24 20:30:55 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s -DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=234855720580925033029772938785652241281, time:1750768255.2261772s req_ids:[8] -DEBUG 06-24 20:30:55 [manager.py:391] -ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:385.36882400512695ms total_cost_time:385.41531562805176ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12446 prompt_cache_len:5151 prompt_cache_ratio:0.4138679093684718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 -DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:55 [batch.py:51] router release req id 8 -INFO 06-24 20:30:55 [manager.py:224] router recive req id 8 cost time 0.11071610450744629 s -INFO 06-24 20:30:55 [manager.py:68] detokenization recv req id 8 cost time 0.11318588256835938 s -DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=36157953441211592161737506075216100820, time:1750768255.617071s req_ids:[8] -DEBUG 06-24 20:30:55 [manager.py:391] -ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:214.16401863098145ms total_cost_time:214.20836448669434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12447 prompt_cache_len:5151 prompt_cache_ratio:0.4138346589539648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 -DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:55 [batch.py:51] router release req id 8 -INFO 06-24 20:30:55 [manager.py:224] router recive req id 8 cost time 0.10915756225585938 s -INFO 06-24 20:30:55 [manager.py:68] detokenization recv req id 8 cost time 0.11118149757385254 s -DEBUG 06-24 20:30:55 [manager.py:391] Prefill Batch: batch_id=8730518838044392911086779260491852017, time:1750768255.836698s req_ids:[8] -DEBUG 06-24 20:30:55 [manager.py:391] -ERROR 06-24 20:30:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:211.05551719665527ms total_cost_time:211.11726760864258ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:12448 prompt_cache_len:5151 prompt_cache_ratio:0.41380141388174807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 -DEBUG 06-24 20:30:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:55 [batch.py:51] router release req id 8 -INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10796070098876953 s -INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.10976290702819824 s -DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=267677606146649080910038008611125835213, time:1750768256.0558462s req_ids:[8] -DEBUG 06-24 20:30:56 [manager.py:391] -ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:55 lightllm_req_id:8 first_token_cost:168.75267028808594ms total_cost_time:168.80464553833008ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:12449 prompt_cache_len:5151 prompt_cache_ratio:0.4137681741505342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 -DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:56 [batch.py:51] router release req id 8 -INFO 06-24 20:30:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10722136497497559 s -INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.10825014114379883 s -DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=320678951449459179676738989625905705163, time:1750768256.229986s req_ids:[8] -DEBUG 06-24 20:30:56 [manager.py:391] -ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:30:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 50717.753 tokens/s -DEBUG 06-24 20:30:56 [stats.py:37] Avg prompt tokens throughput: 50709.494 tokens/s -DEBUG 06-24 20:30:56 [stats.py:37] Avg generate tokens throughput: 8.259 tokens/s -INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:207.71455764770508ms total_cost_time:207.75938034057617ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12450 prompt_cache_len:5151 prompt_cache_ratio:0.41373493975903614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 -DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:56 [batch.py:51] router release req id 8 -INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10771369934082031 s -INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.10885453224182129 s -DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=33876324607224621097591329003323461536, time:1750768256.4531665s req_ids:[8] -DEBUG 06-24 20:30:56 [manager.py:391] -ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:220.02315521240234ms total_cost_time:220.06654739379883ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12451 prompt_cache_len:5151 prompt_cache_ratio:0.4137017107059674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 -DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:56 [batch.py:51] router release req id 8 -INFO 06-24 20:30:56 [manager.py:224] router recive req id 8 cost time 0.10858297348022461 s -INFO 06-24 20:30:56 [manager.py:68] detokenization recv req id 8 cost time 0.11067438125610352 s -DEBUG 06-24 20:30:56 [manager.py:391] Prefill Batch: batch_id=333166459254446381887622689016573680598, time:1750768256.6697693s req_ids:[8] -DEBUG 06-24 20:30:56 [manager.py:391] -ERROR 06-24 20:30:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:398.3585834503174ms total_cost_time:398.41747283935547ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:12452 prompt_cache_len:5151 prompt_cache_ratio:0.4136684869900418 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 -DEBUG 06-24 20:30:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:56 [batch.py:51] router release req id 8 -INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10870552062988281 s -INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.11072731018066406 s -DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=55297323732811713121349310387988955956, time:1750768257.0761204s req_ids:[8] -DEBUG 06-24 20:30:57 [manager.py:391] -ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:56 lightllm_req_id:8 first_token_cost:209.75804328918457ms total_cost_time:209.81740951538086ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:12453 prompt_cache_len:5151 prompt_cache_ratio:0.4136352686099735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 -DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:57 [batch.py:51] router release req id 8 -INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10803890228271484 s -INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.10909914970397949 s -DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=6027848488110771488588448605070394739, time:1750768257.3056743s req_ids:[8] -DEBUG 06-24 20:30:57 [manager.py:391] -ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:191.7872428894043ms total_cost_time:191.8480396270752ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12454 prompt_cache_len:5151 prompt_cache_ratio:0.41360205556447727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 -DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:57 [batch.py:51] router release req id 8 -INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10795021057128906 s -INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.1099538803100586 s -DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=96539375724546523582061131714159474420, time:1750768257.4909606s req_ids:[8] -DEBUG 06-24 20:30:57 [manager.py:391] -ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:205.0192356109619ms total_cost_time:205.078125ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:12455 prompt_cache_len:5151 prompt_cache_ratio:0.41356884785226816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 -DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:57 [batch.py:51] router release req id 8 -INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s -INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.11003398895263672 s -DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=19641034874896495754987573314729150902, time:1750768257.7016096s req_ids:[8] -DEBUG 06-24 20:30:57 [manager.py:391] -ERROR 06-24 20:30:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:233.78610610961914ms total_cost_time:233.8414192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:12456 prompt_cache_len:5151 prompt_cache_ratio:0.4135356454720617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 -DEBUG 06-24 20:30:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:57 [batch.py:51] router release req id 8 -INFO 06-24 20:30:57 [manager.py:224] router recive req id 8 cost time 0.10707473754882812 s -INFO 06-24 20:30:57 [manager.py:68] detokenization recv req id 8 cost time 0.10915350914001465 s -DEBUG 06-24 20:30:57 [manager.py:391] Prefill Batch: batch_id=150222392040460624467828775336681107920, time:1750768257.9418857s req_ids:[8] -DEBUG 06-24 20:30:57 [manager.py:391] -ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:57 lightllm_req_id:8 first_token_cost:204.9853801727295ms total_cost_time:205.04403114318848ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12457 prompt_cache_len:5151 prompt_cache_ratio:0.41350244842257367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 -DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:58 [batch.py:51] router release req id 8 -INFO 06-24 20:30:58 [manager.py:224] router recive req id 8 cost time 0.10807514190673828 s -INFO 06-24 20:30:58 [manager.py:68] detokenization recv req id 8 cost time 0.11012530326843262 s -DEBUG 06-24 20:30:58 [manager.py:391] Prefill Batch: batch_id=224223952918295977811671035724449390854, time:1750768258.154356s req_ids:[8] -DEBUG 06-24 20:30:58 [manager.py:391] -ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:211.74001693725586ms total_cost_time:211.80105209350586ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12458 prompt_cache_len:5151 prompt_cache_ratio:0.4134692567025205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 -DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:58 [batch.py:51] router release req id 8 -INFO 06-24 20:30:58 [manager.py:224] router recive req id 8 cost time 0.10714173316955566 s -INFO 06-24 20:30:58 [manager.py:68] detokenization recv req id 8 cost time 0.10918831825256348 s -DEBUG 06-24 20:30:58 [manager.py:391] Prefill Batch: batch_id=19072953528025188445697028030248571201, time:1750768258.3796756s req_ids:[8] -DEBUG 06-24 20:30:58 [manager.py:391] -ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:216.28570556640625ms total_cost_time:216.33219718933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12459 prompt_cache_len:5151 prompt_cache_ratio:0.41343607031061885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 -DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:58 [batch.py:51] router release req id 8 -INFO 06-24 20:30:58 [manager.py:224] router recive req id 8 cost time 0.10715794563293457 s -INFO 06-24 20:30:58 [manager.py:68] detokenization recv req id 8 cost time 0.10844206809997559 s -DEBUG 06-24 20:30:58 [manager.py:391] Prefill Batch: batch_id=205282701281080427714189009668290844515, time:1750768258.6151636s req_ids:[8] -DEBUG 06-24 20:30:58 [manager.py:391] -ERROR 06-24 20:30:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:416.9301986694336ms total_cost_time:416.98455810546875ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:12460 prompt_cache_len:5151 prompt_cache_ratio:0.4134028892455859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 -DEBUG 06-24 20:30:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:58 [batch.py:51] router release req id 8 -INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10933303833007812 s -INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.11135530471801758 s -DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=111631596694140023051178298499666686482, time:1750768259.0269754s req_ids:[8] -DEBUG 06-24 20:30:59 [manager.py:391] -ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:58 lightllm_req_id:8 first_token_cost:232.35225677490234ms total_cost_time:232.4669361114502ms,out_token_counter:1 mean_per_token_cost_time: 0.11467933654785156ms prompt_token_num:12461 prompt_cache_len:5151 prompt_cache_ratio:0.41336971350613916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 -DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:59 [batch.py:51] router release req id 8 -INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10863780975341797 s -INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.11061716079711914 s -DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=230545898368270800857131714662829256750, time:1750768259.258932s req_ids:[8] -DEBUG 06-24 20:30:59 [manager.py:391] -DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:30:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:208.13608169555664ms total_cost_time:208.18114280700684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12462 prompt_cache_len:5151 prompt_cache_ratio:0.41333654309099666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 -DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:59 [batch.py:51] router release req id 8 -INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10841941833496094 s -INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.11037087440490723 s -DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=301940003077655646692862439318984552192, time:1750768259.4748392s req_ids:[8] -DEBUG 06-24 20:30:59 [manager.py:391] -ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:205.04021644592285ms total_cost_time:205.09839057922363ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:12463 prompt_cache_len:5151 prompt_cache_ratio:0.4133033779988767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 -DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:59 [batch.py:51] router release req id 8 -INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10687112808227539 s -INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.10877227783203125 s -DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=312023169252105053714454258865728239354, time:1750768259.687395s req_ids:[8] -DEBUG 06-24 20:30:59 [manager.py:391] -ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:213.67859840393066ms total_cost_time:213.72103691101074ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12464 prompt_cache_len:5151 prompt_cache_ratio:0.4132702182284981 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 -DEBUG 06-24 20:30:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:30:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:30:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:30:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:30:59 [batch.py:51] router release req id 8 -INFO 06-24 20:30:59 [manager.py:224] router recive req id 8 cost time 0.10720419883728027 s -INFO 06-24 20:30:59 [manager.py:68] detokenization recv req id 8 cost time 0.10908126831054688 s -DEBUG 06-24 20:30:59 [manager.py:391] Prefill Batch: batch_id=191479274459631523706888553801464698544, time:1750768259.918769s req_ids:[8] -DEBUG 06-24 20:30:59 [manager.py:391] -ERROR 06-24 20:30:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:30:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:30:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:182.7373504638672ms total_cost_time:182.78026580810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12465 prompt_cache_len:5151 prompt_cache_ratio:0.41323706377858005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:30:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 -DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:00 [batch.py:51] router release req id 8 -INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10785961151123047 s -INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.1098337173461914 s -DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=181450869902078814669365954901950285319, time:1750768260.0963042s req_ids:[8] -DEBUG 06-24 20:31:00 [manager.py:391] -ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:30:59 lightllm_req_id:8 first_token_cost:202.37231254577637ms total_cost_time:202.41403579711914ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12466 prompt_cache_len:5151 prompt_cache_ratio:0.4132039146478421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 -DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:00 [batch.py:51] router release req id 8 -INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s -INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s -DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=47661244253780091280466442354464114652, time:1750768260.3056054s req_ids:[8] -DEBUG 06-24 20:31:00 [manager.py:391] -ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:387.26258277893066ms total_cost_time:387.30454444885254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12467 prompt_cache_len:5151 prompt_cache_ratio:0.4131707708350044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 -DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:00 [batch.py:51] router release req id 8 -INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10769820213317871 s -INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.10935163497924805 s -DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=110918307799342330819032168563745187750, time:1750768260.697488s req_ids:[8] -DEBUG 06-24 20:31:00 [manager.py:391] -ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:188.8713836669922ms total_cost_time:188.91572952270508ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12468 prompt_cache_len:5151 prompt_cache_ratio:0.4131376323387873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 -DEBUG 06-24 20:31:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:00 [batch.py:51] router release req id 8 -INFO 06-24 20:31:00 [manager.py:224] router recive req id 8 cost time 0.10829663276672363 s -INFO 06-24 20:31:00 [manager.py:68] detokenization recv req id 8 cost time 0.11019396781921387 s -DEBUG 06-24 20:31:00 [manager.py:391] Prefill Batch: batch_id=34399715534402867248186878155374888836, time:1750768260.893034s req_ids:[8] -DEBUG 06-24 20:31:00 [manager.py:391] -ERROR 06-24 20:31:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:210.15334129333496ms total_cost_time:210.19911766052246ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12469 prompt_cache_len:5151 prompt_cache_ratio:0.41310449915791164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 -DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:01 [batch.py:51] router release req id 8 -INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10797643661499023 s -INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.1098175048828125 s -DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=136530538120919030954199829759840254207, time:1750768261.1096392s req_ids:[8] -DEBUG 06-24 20:31:01 [manager.py:391] -ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:00 lightllm_req_id:8 first_token_cost:196.60449028015137ms total_cost_time:196.64597511291504ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12470 prompt_cache_len:5151 prompt_cache_ratio:0.4130713712910986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 -DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:01 [batch.py:51] router release req id 8 -INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10767102241516113 s -INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.10952115058898926 s -DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=72896634457730056549489012188130781931, time:1750768261.3115654s req_ids:[8] -DEBUG 06-24 20:31:01 [manager.py:391] -ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:201.6470432281494ms total_cost_time:201.6899585723877ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12471 prompt_cache_len:5151 prompt_cache_ratio:0.41303824873707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 -DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:01 [batch.py:51] router release req id 8 -INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10753154754638672 s -INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.1096181869506836 s -DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=291413236124409637913569519175813281511, time:1750768261.5211315s req_ids:[8] -DEBUG 06-24 20:31:01 [manager.py:391] -ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:200.21629333496094ms total_cost_time:200.25992393493652ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12472 prompt_cache_len:5151 prompt_cache_ratio:0.41300513149454776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 -DEBUG 06-24 20:31:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:01 [batch.py:51] router release req id 8 -INFO 06-24 20:31:01 [manager.py:224] router recive req id 8 cost time 0.10804104804992676 s -INFO 06-24 20:31:01 [manager.py:68] detokenization recv req id 8 cost time 0.10991740226745605 s -DEBUG 06-24 20:31:01 [manager.py:391] Prefill Batch: batch_id=64811599997679441320889351947244011926, time:1750768261.7257676s req_ids:[8] -DEBUG 06-24 20:31:01 [manager.py:391] -ERROR 06-24 20:31:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:205.25789260864258ms total_cost_time:205.30080795288086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12473 prompt_cache_len:5151 prompt_cache_ratio:0.4129720195622545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 -DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:02 [batch.py:51] router release req id 8 -INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.3098106384277344 s -DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=335463417253242771115536386454889212723, time:1750768262.13208s req_ids:[8] -DEBUG 06-24 20:31:02 [manager.py:391] -INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.3117959499359131 s -ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:01 lightllm_req_id:8 first_token_cost:408.4603786468506ms total_cost_time:408.5052013397217ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12474 prompt_cache_len:5151 prompt_cache_ratio:0.41293891293891294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 -DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:02 [batch.py:51] router release req id 8 -INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.10832071304321289 s -INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.11067509651184082 s -DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=176962549358007825171441692736422913689, time:1750768262.351674s req_ids:[8] -DEBUG 06-24 20:31:02 [manager.py:391] -ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:212.45265007019043ms total_cost_time:212.49675750732422ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12475 prompt_cache_len:5151 prompt_cache_ratio:0.41290581162324647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 -DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:02 [batch.py:51] router release req id 8 -INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.10820698738098145 s -INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.11001849174499512 s -DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=213912654825097230301175148773544035533, time:1750768262.5901873s req_ids:[8] -DEBUG 06-24 20:31:02 [manager.py:391] -ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:231.86922073364258ms total_cost_time:231.91142082214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12476 prompt_cache_len:5151 prompt_cache_ratio:0.4128727156139788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 -DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:02 [batch.py:51] router release req id 8 -INFO 06-24 20:31:02 [manager.py:224] router recive req id 8 cost time 0.10843992233276367 s -INFO 06-24 20:31:02 [manager.py:68] detokenization recv req id 8 cost time 0.11036205291748047 s -DEBUG 06-24 20:31:02 [manager.py:391] Prefill Batch: batch_id=229431855920629484941947963491354180915, time:1750768262.80846s req_ids:[8] -DEBUG 06-24 20:31:02 [manager.py:391] -ERROR 06-24 20:31:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:215.3298854827881ms total_cost_time:215.37160873413086ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12477 prompt_cache_len:5151 prompt_cache_ratio:0.4128396249098341 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 -DEBUG 06-24 20:31:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:02 [batch.py:51] router release req id 8 -INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.1068413257598877 s -INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.10870766639709473 s -DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=197706533817026976268769589133822344974, time:1750768263.03608s req_ids:[8] -DEBUG 06-24 20:31:03 [manager.py:391] -ERROR 06-24 20:31:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:02 lightllm_req_id:8 first_token_cost:216.53270721435547ms total_cost_time:216.57395362854004ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12478 prompt_cache_len:5151 prompt_cache_ratio:0.4128065395095368 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 -DEBUG 06-24 20:31:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:03 [batch.py:51] router release req id 8 -INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.1077427864074707 s -INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.10977411270141602 s -DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=156521254108722658419979682263078928984, time:1750768263.2536135s req_ids:[8] -DEBUG 06-24 20:31:03 [manager.py:391] -ERROR 06-24 20:31:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 first_token_cost:209.10167694091797ms total_cost_time:209.14316177368164ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12479 prompt_cache_len:5151 prompt_cache_ratio:0.41277345941181187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 -DEBUG 06-24 20:31:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:03 [batch.py:51] router release req id 8 -INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.3097202777862549 s -INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.3116645812988281 s -DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=308365057621095315206623764433903436197, time:1750768263.6856482s req_ids:[8] -DEBUG 06-24 20:31:03 [manager.py:391] -ERROR 06-24 20:31:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 first_token_cost:437.7598762512207ms total_cost_time:437.8011226654053ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12480 prompt_cache_len:5151 prompt_cache_ratio:0.4127403846153846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 -DEBUG 06-24 20:31:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:03 [batch.py:51] router release req id 8 -INFO 06-24 20:31:03 [manager.py:224] router recive req id 8 cost time 0.10805344581604004 s -INFO 06-24 20:31:03 [manager.py:68] detokenization recv req id 8 cost time 0.10997438430786133 s -DEBUG 06-24 20:31:03 [manager.py:391] Prefill Batch: batch_id=127514572583305619428074956107137422643, time:1750768263.9105392s req_ids:[8] -DEBUG 06-24 20:31:03 [manager.py:391] -ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:03 lightllm_req_id:8 first_token_cost:222.69272804260254ms total_cost_time:222.73492813110352ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12481 prompt_cache_len:5151 prompt_cache_ratio:0.41270731511898084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 -DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:04 [batch.py:51] router release req id 8 -INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.10782217979431152 s -INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.1097257137298584 s -DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=68816819384489690662885386207278877162, time:1750768264.1495395s req_ids:[8] -DEBUG 06-24 20:31:04 [manager.py:391] -ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:219.6512222290039ms total_cost_time:219.6958065032959ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12482 prompt_cache_len:5151 prompt_cache_ratio:0.4126742509213267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 -DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:04 [batch.py:51] router release req id 8 -INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.1082756519317627 s -INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.11081671714782715 s -DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=209516937181353585384012023219293272177, time:1750768264.3684156s req_ids:[8] -DEBUG 06-24 20:31:04 [manager.py:391] -ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:214.67947959899902ms total_cost_time:214.7378921508789ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:12483 prompt_cache_len:5151 prompt_cache_ratio:0.41264119202114874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 -DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:04 [batch.py:51] router release req id 8 -INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.10907649993896484 s -INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.11096334457397461 s -DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=316753395785497313493132876187578569870, time:1750768264.5831099s req_ids:[8] -DEBUG 06-24 20:31:04 [manager.py:391] -ERROR 06-24 20:31:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:213.16170692443848ms total_cost_time:213.20796012878418ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12484 prompt_cache_len:5151 prompt_cache_ratio:0.412608138417174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 -DEBUG 06-24 20:31:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:04 [batch.py:51] router release req id 8 -INFO 06-24 20:31:04 [manager.py:224] router recive req id 8 cost time 0.10727500915527344 s -INFO 06-24 20:31:04 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s -DEBUG 06-24 20:31:04 [manager.py:391] Prefill Batch: batch_id=233676097618175219885476897963355789361, time:1750768264.8225307s req_ids:[8] -DEBUG 06-24 20:31:04 [manager.py:391] -ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:04 lightllm_req_id:8 first_token_cost:390.55633544921875ms total_cost_time:390.59996604919434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12485 prompt_cache_len:5151 prompt_cache_ratio:0.41257509010812976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 -DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:05 [batch.py:51] router release req id 8 -INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.10948657989501953 s -INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.11155486106872559 s -DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=25420626096970156692914414548132759823, time:1750768265.1987534s req_ids:[8] -DEBUG 06-24 20:31:05 [manager.py:391] -ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:211.1203670501709ms total_cost_time:211.1644744873047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12486 prompt_cache_len:5151 prompt_cache_ratio:0.4125420470927439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 -DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:05 [batch.py:51] router release req id 8 -INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.10743403434753418 s -INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.10947108268737793 s -DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=331959798810267994369950450106391193076, time:1750768265.4165535s req_ids:[8] -DEBUG 06-24 20:31:05 [manager.py:391] -ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:206.42352104187012ms total_cost_time:206.4688205718994ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12487 prompt_cache_len:5151 prompt_cache_ratio:0.41250900936974455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 -DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:05 [batch.py:51] router release req id 8 -INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.1088094711303711 s -INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.11103177070617676 s -DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=208698978373464275442925134444079464268, time:1750768265.6291256s req_ids:[8] -DEBUG 06-24 20:31:05 [manager.py:391] -ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:212.52179145812988ms total_cost_time:212.56542205810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12488 prompt_cache_len:5151 prompt_cache_ratio:0.41247597693786037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 -DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:05 [batch.py:51] router release req id 8 -INFO 06-24 20:31:05 [manager.py:224] router recive req id 8 cost time 0.10823607444763184 s -INFO 06-24 20:31:05 [manager.py:68] detokenization recv req id 8 cost time 0.11030840873718262 s -DEBUG 06-24 20:31:05 [manager.py:391] Prefill Batch: batch_id=3222537072395876234533459496520552559, time:1750768265.8481362s req_ids:[8] -DEBUG 06-24 20:31:05 [manager.py:391] -ERROR 06-24 20:31:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:209.80024337768555ms total_cost_time:209.85817909240723ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:12489 prompt_cache_len:5151 prompt_cache_ratio:0.4124429497958203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 -DEBUG 06-24 20:31:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:05 [batch.py:51] router release req id 8 -INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.1085200309753418 s -INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.11059188842773438 s -DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=177084588228214653286181802012143280478, time:1750768266.0644772s req_ids:[8] -DEBUG 06-24 20:31:06 [manager.py:391] -ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:05 lightllm_req_id:8 first_token_cost:210.23225784301758ms total_cost_time:210.27660369873047ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12490 prompt_cache_len:5151 prompt_cache_ratio:0.41240992794235387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 -DEBUG 06-24 20:31:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:06 [batch.py:51] router release req id 8 -INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.10744619369506836 s -INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.10945940017700195 s -DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=27885679134886115713469702724236210516, time:1750768266.2812974s req_ids:[8] -DEBUG 06-24 20:31:06 [manager.py:391] -ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:31:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 50055.312 tokens/s -DEBUG 06-24 20:31:06 [stats.py:37] Avg prompt tokens throughput: 50047.286 tokens/s -DEBUG 06-24 20:31:06 [stats.py:37] Avg generate tokens throughput: 8.026 tokens/s -INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:372.51925468444824ms total_cost_time:372.5621700286865ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12491 prompt_cache_len:5151 prompt_cache_ratio:0.41237691137619087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 -DEBUG 06-24 20:31:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:06 [batch.py:51] router release req id 8 -INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s -INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.1112680435180664 s -DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=201377200954779068346558203775480764757, time:1750768266.6612456s req_ids:[8] -DEBUG 06-24 20:31:06 [manager.py:391] -ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:213.82689476013184ms total_cost_time:213.87052536010742ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12492 prompt_cache_len:5151 prompt_cache_ratio:0.4123439000960615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 -DEBUG 06-24 20:31:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:06 [batch.py:51] router release req id 8 -INFO 06-24 20:31:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:06 [manager.py:224] router recive req id 8 cost time 0.10704708099365234 s -INFO 06-24 20:31:06 [manager.py:68] detokenization recv req id 8 cost time 0.1089024543762207 s -DEBUG 06-24 20:31:06 [manager.py:391] Prefill Batch: batch_id=281275185465685633668293628360356835821, time:1750768266.882115s req_ids:[8] -DEBUG 06-24 20:31:06 [manager.py:391] -DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:211.61437034606934ms total_cost_time:211.65943145751953ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12493 prompt_cache_len:5151 prompt_cache_ratio:0.4123108941006964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 -DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:07 [batch.py:51] router release req id 8 -INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10738372802734375 s -INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.1093451976776123 s -DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=332813435533580022291468381962780081408, time:1750768267.0995321s req_ids:[8] -DEBUG 06-24 20:31:07 [manager.py:391] -ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:06 lightllm_req_id:8 first_token_cost:208.36949348449707ms total_cost_time:208.41312408447266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12494 prompt_cache_len:5151 prompt_cache_ratio:0.41227789338882664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 -DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:07 [batch.py:51] router release req id 8 -INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10843753814697266 s -INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.11052203178405762 s -DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=41927179067663395116973905055266446041, time:1750768267.313437s req_ids:[8] -DEBUG 06-24 20:31:07 [manager.py:391] -ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:205.70659637451172ms total_cost_time:205.75308799743652ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12495 prompt_cache_len:5151 prompt_cache_ratio:0.4122448979591837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 -DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:07 [batch.py:51] router release req id 8 -INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.103546142578125 s -INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.10538387298583984 s -DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=166465240543926596858572255214284927103, time:1750768267.5263383s req_ids:[8] -DEBUG 06-24 20:31:07 [manager.py:391] -ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:222.20349311828613ms total_cost_time:222.24879264831543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12496 prompt_cache_len:5151 prompt_cache_ratio:0.4122119078104994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 -DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:07 [batch.py:51] router release req id 8 -INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10827279090881348 s -INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.1101844310760498 s -DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=196392207537966235939666587918125761220, time:1750768267.753386s req_ids:[8] -DEBUG 06-24 20:31:07 [manager.py:391] -ERROR 06-24 20:31:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:207.08870887756348ms total_cost_time:207.13305473327637ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12497 prompt_cache_len:5151 prompt_cache_ratio:0.412178922941506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 -DEBUG 06-24 20:31:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:07 [batch.py:51] router release req id 8 -INFO 06-24 20:31:07 [manager.py:224] router recive req id 8 cost time 0.10721349716186523 s -INFO 06-24 20:31:07 [manager.py:68] detokenization recv req id 8 cost time 0.10918354988098145 s -DEBUG 06-24 20:31:07 [manager.py:391] Prefill Batch: batch_id=209963547930889658587163866150168609014, time:1750768267.9681652s req_ids:[8] -DEBUG 06-24 20:31:07 [manager.py:391] -ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:07 lightllm_req_id:8 first_token_cost:382.3723793029785ms total_cost_time:382.4167251586914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12498 prompt_cache_len:5151 prompt_cache_ratio:0.41214594335093613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 -DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:08 [batch.py:51] router release req id 8 -INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.1077730655670166 s -INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10968494415283203 s -DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=309380588135326408861170842434463727074, time:1750768268.3566535s req_ids:[8] -DEBUG 06-24 20:31:08 [manager.py:391] -ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:185.6536865234375ms total_cost_time:185.70446968078613ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12499 prompt_cache_len:5151 prompt_cache_ratio:0.412112969037523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 -DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:08 [batch.py:51] router release req id 8 -INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.10773134231567383 s -INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10988950729370117 s -DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=308771310589489724622061215605645790678, time:1750768268.5492675s req_ids:[8] -DEBUG 06-24 20:31:08 [manager.py:391] -ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:207.71360397338867ms total_cost_time:207.75818824768066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12500 prompt_cache_len:5151 prompt_cache_ratio:0.41208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 -DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:08 [batch.py:51] router release req id 8 -INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.10793495178222656 s -INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10987567901611328 s -DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=157605382485507449445995574528165771359, time:1750768268.7633877s req_ids:[8] -DEBUG 06-24 20:31:08 [manager.py:391] -ERROR 06-24 20:31:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:213.78326416015625ms total_cost_time:213.82617950439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12501 prompt_cache_len:5151 prompt_cache_ratio:0.41204703623710104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 -DEBUG 06-24 20:31:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:08 [batch.py:51] router release req id 8 -INFO 06-24 20:31:08 [manager.py:224] router recive req id 8 cost time 0.10767078399658203 s -INFO 06-24 20:31:08 [manager.py:68] detokenization recv req id 8 cost time 0.10973763465881348 s -DEBUG 06-24 20:31:08 [manager.py:391] Prefill Batch: batch_id=156319905130828988385209150612355490410, time:1750768268.9844146s req_ids:[8] -DEBUG 06-24 20:31:08 [manager.py:391] -ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:08 lightllm_req_id:8 first_token_cost:226.78399085998535ms total_cost_time:226.82785987854004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12502 prompt_cache_len:5151 prompt_cache_ratio:0.4120140777475604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 -DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:09 [batch.py:51] router release req id 8 -INFO 06-24 20:31:09 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s -INFO 06-24 20:31:09 [manager.py:68] detokenization recv req id 8 cost time 0.10984373092651367 s -DEBUG 06-24 20:31:09 [manager.py:391] Prefill Batch: batch_id=295492755154484424061258697106642599943, time:1750768269.2215552s req_ids:[8] -DEBUG 06-24 20:31:09 [manager.py:391] -ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:210.35528182983398ms total_cost_time:210.4012966156006ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12503 prompt_cache_len:5151 prompt_cache_ratio:0.41198112453011276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 -DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:09 [batch.py:51] router release req id 8 -INFO 06-24 20:31:09 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s -INFO 06-24 20:31:09 [manager.py:68] detokenization recv req id 8 cost time 0.10977387428283691 s -DEBUG 06-24 20:31:09 [manager.py:391] Prefill Batch: batch_id=334363851398461936880807734011136347128, time:1750768269.4344208s req_ids:[8] -DEBUG 06-24 20:31:09 [manager.py:391] -ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:210.47067642211914ms total_cost_time:210.51526069641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12504 prompt_cache_len:5151 prompt_cache_ratio:0.4119481765834933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 -DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:09 [batch.py:51] router release req id 8 -INFO 06-24 20:31:09 [manager.py:224] router recive req id 8 cost time 0.10805153846740723 s -INFO 06-24 20:31:09 [manager.py:68] detokenization recv req id 8 cost time 0.1099843978881836 s -DEBUG 06-24 20:31:09 [manager.py:391] Prefill Batch: batch_id=274886550352700284025755126154914969379, time:1750768269.6512759s req_ids:[8] -DEBUG 06-24 20:31:09 [manager.py:391] -ERROR 06-24 20:31:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:406.8949222564697ms total_cost_time:406.9180488586426ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:12505 prompt_cache_len:5151 prompt_cache_ratio:0.4119152339064374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 -DEBUG 06-24 20:31:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:09 [batch.py:51] router release req id 8 -INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10771989822387695 s -INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.10965275764465332 s -DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=75615430330593713225362058400767954110, time:1750768270.0636318s req_ids:[8] -DEBUG 06-24 20:31:10 [manager.py:391] -ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:09 lightllm_req_id:8 first_token_cost:209.9459171295166ms total_cost_time:209.98883247375488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12506 prompt_cache_len:5151 prompt_cache_ratio:0.4118822964976811 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 -DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:10 [batch.py:51] router release req id 8 -INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10843348503112793 s -INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.11040353775024414 s -DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=262658503894260619744906117307045909717, time:1750768270.283475s req_ids:[8] -DEBUG 06-24 20:31:10 [manager.py:391] -ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:216.0928249359131ms total_cost_time:216.13669395446777ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12507 prompt_cache_len:5151 prompt_cache_ratio:0.41184936435596065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 -DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:10 [batch.py:51] router release req id 8 -INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s -INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.10956096649169922 s -DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=284068088712209531769875984138252560222, time:1750768270.5112612s req_ids:[8] -DEBUG 06-24 20:31:10 [manager.py:391] -ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:221.62652015686035ms total_cost_time:221.67038917541504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12508 prompt_cache_len:5151 prompt_cache_ratio:0.4118164374800128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 -DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:10 [batch.py:51] router release req id 8 -INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.10805845260620117 s -INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.11020636558532715 s -DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=290638582771012901185844741655489464104, time:1750768270.7334328s req_ids:[8] -DEBUG 06-24 20:31:10 [manager.py:391] -ERROR 06-24 20:31:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:209.54155921936035ms total_cost_time:209.59711074829102ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:12509 prompt_cache_len:5151 prompt_cache_ratio:0.41178351586857465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 -DEBUG 06-24 20:31:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:10 [batch.py:51] router release req id 8 -INFO 06-24 20:31:10 [manager.py:224] router recive req id 8 cost time 0.11052513122558594 s -INFO 06-24 20:31:10 [manager.py:68] detokenization recv req id 8 cost time 0.11241459846496582 s -DEBUG 06-24 20:31:10 [manager.py:391] Prefill Batch: batch_id=206136282999498309293515217584519768747, time:1750768270.94695s req_ids:[8] -DEBUG 06-24 20:31:10 [manager.py:391] -ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:10 lightllm_req_id:8 first_token_cost:210.9987735748291ms total_cost_time:211.0443115234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12510 prompt_cache_len:5151 prompt_cache_ratio:0.4117505995203837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 -DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:11 [batch.py:51] router release req id 8 -INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.10859107971191406 s -INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.10962247848510742 s -DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=138472392076730866881253495914941814021, time:1750768271.1621392s req_ids:[8] -DEBUG 06-24 20:31:11 [manager.py:391] -ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:382.504940032959ms total_cost_time:382.5514316558838ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12511 prompt_cache_len:5151 prompt_cache_ratio:0.41171768843417794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 -DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:11 [batch.py:51] router release req id 8 -DEBUG 06-24 20:31:11 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:11 [manager.py:283] -DEBUG 06-24 20:31:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:11 [manager.py:284] -INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.1089928150177002 s -INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.11092591285705566 s -DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=274842610773323748006693584392995860369, time:1750768271.5527139s req_ids:[8] -DEBUG 06-24 20:31:11 [manager.py:391] -ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:217.94819831848145ms total_cost_time:217.99182891845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12512 prompt_cache_len:5151 prompt_cache_ratio:0.4116847826086957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 -DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:11 [batch.py:51] router release req id 8 -INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.10698986053466797 s -INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.10892844200134277 s -DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=328499169006924377949282002478703492110, time:1750768271.7818563s req_ids:[8] -DEBUG 06-24 20:31:11 [manager.py:391] -ERROR 06-24 20:31:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:220.14927864074707ms total_cost_time:220.19362449645996ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12513 prompt_cache_len:5151 prompt_cache_ratio:0.4116518820426756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 -DEBUG 06-24 20:31:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:11 [batch.py:51] router release req id 8 -INFO 06-24 20:31:11 [manager.py:224] router recive req id 8 cost time 0.10772085189819336 s -INFO 06-24 20:31:11 [manager.py:68] detokenization recv req id 8 cost time 0.10964393615722656 s -DEBUG 06-24 20:31:11 [manager.py:391] Prefill Batch: batch_id=338074776588502273919273700022555585288, time:1750768271.9988298s req_ids:[8] -DEBUG 06-24 20:31:11 [manager.py:391] -ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:11 lightllm_req_id:8 first_token_cost:205.66987991333008ms total_cost_time:205.71327209472656ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12514 prompt_cache_len:5151 prompt_cache_ratio:0.411618986734857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 -DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:12 [batch.py:51] router release req id 8 -INFO 06-24 20:31:12 [manager.py:224] router recive req id 8 cost time 0.1093599796295166 s -INFO 06-24 20:31:12 [manager.py:68] detokenization recv req id 8 cost time 0.11138606071472168 s -DEBUG 06-24 20:31:12 [manager.py:391] Prefill Batch: batch_id=285326432658005579272735091469556887831, time:1750768272.2109773s req_ids:[8] -DEBUG 06-24 20:31:12 [manager.py:391] -ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:204.3745517730713ms total_cost_time:204.41865921020508ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12515 prompt_cache_len:5151 prompt_cache_ratio:0.4115860966839792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 -DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:12 [batch.py:51] router release req id 8 -INFO 06-24 20:31:12 [manager.py:224] router recive req id 8 cost time 0.10878205299377441 s -INFO 06-24 20:31:12 [manager.py:68] detokenization recv req id 8 cost time 0.11059045791625977 s -DEBUG 06-24 20:31:12 [manager.py:391] Prefill Batch: batch_id=190255958124451904774178780099135527288, time:1750768272.417846s req_ids:[8] -DEBUG 06-24 20:31:12 [manager.py:391] -ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:206.16459846496582ms total_cost_time:206.2079906463623ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12516 prompt_cache_len:5151 prompt_cache_ratio:0.41155321188878236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 -DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:12 [batch.py:51] router release req id 8 -INFO 06-24 20:31:12 [manager.py:224] router recive req id 8 cost time 0.10843157768249512 s -INFO 06-24 20:31:12 [manager.py:68] detokenization recv req id 8 cost time 0.11037755012512207 s -DEBUG 06-24 20:31:12 [manager.py:391] Prefill Batch: batch_id=131422421860615437810100093198019348114, time:1750768272.6311183s req_ids:[8] -DEBUG 06-24 20:31:12 [manager.py:391] -ERROR 06-24 20:31:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:415.50731658935547ms total_cost_time:415.55142402648926ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12517 prompt_cache_len:5151 prompt_cache_ratio:0.4115203323480067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 -DEBUG 06-24 20:31:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:12 [batch.py:51] router release req id 8 -INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10846590995788574 s -INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11074709892272949 s -DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=277106865176068151334441545449356057205, time:1750768273.0545304s req_ids:[8] -DEBUG 06-24 20:31:13 [manager.py:391] -DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:12 lightllm_req_id:8 first_token_cost:211.19403839111328ms total_cost_time:211.23862266540527ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12518 prompt_cache_len:5151 prompt_cache_ratio:0.41148745806039305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 -DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:13 [batch.py:51] router release req id 8 -INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10851669311523438 s -INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11036300659179688 s -DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=220293240658358206876506297548075410536, time:1750768273.2801838s req_ids:[8] -DEBUG 06-24 20:31:13 [manager.py:391] -ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:222.6853370666504ms total_cost_time:222.72920608520508ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12519 prompt_cache_len:5151 prompt_cache_ratio:0.41145458902468246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 -DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:13 [batch.py:51] router release req id 8 -INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10951471328735352 s -INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11139512062072754 s -DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=209489413563033150713709822564619335117, time:1750768273.4999242s req_ids:[8] -DEBUG 06-24 20:31:13 [manager.py:391] -ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:170.00555992126465ms total_cost_time:170.0570583343506ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:12520 prompt_cache_len:5151 prompt_cache_ratio:0.4114217252396166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 -DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:13 [batch.py:51] router release req id 8 -INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10794854164123535 s -INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.10965347290039062 s -DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=227931483414505929216758720466042156691, time:1750768273.676312s req_ids:[8] -DEBUG 06-24 20:31:13 [manager.py:391] -ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:202.98242568969727ms total_cost_time:203.02557945251465ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12521 prompt_cache_len:5151 prompt_cache_ratio:0.4113888667039374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 -DEBUG 06-24 20:31:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:13 [batch.py:51] router release req id 8 -INFO 06-24 20:31:13 [manager.py:224] router recive req id 8 cost time 0.10867929458618164 s -INFO 06-24 20:31:13 [manager.py:68] detokenization recv req id 8 cost time 0.11017775535583496 s -DEBUG 06-24 20:31:13 [manager.py:391] Prefill Batch: batch_id=116931807011560776590899074982640281284, time:1750768273.885679s req_ids:[8] -DEBUG 06-24 20:31:13 [manager.py:391] -ERROR 06-24 20:31:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:215.67106246948242ms total_cost_time:215.72375297546387ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:12522 prompt_cache_len:5151 prompt_cache_ratio:0.41135601341638717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 -DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:14 [batch.py:51] router release req id 8 -INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.1069638729095459 s -INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.10841870307922363 s -DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=178997456884958683885664304534773038616, time:1750768274.1331282s req_ids:[8] -DEBUG 06-24 20:31:14 [manager.py:391] -ERROR 06-24 20:31:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:13 lightllm_req_id:8 first_token_cost:228.3787727355957ms total_cost_time:228.43575477600098ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:12523 prompt_cache_len:5151 prompt_cache_ratio:0.4113231653757087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 -DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:14 [batch.py:51] router release req id 8 -INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s -INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.11024713516235352 s -DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=295605562536982205220024503344360061749, time:1750768274.3413801s req_ids:[8] -DEBUG 06-24 20:31:14 [manager.py:391] -ERROR 06-24 20:31:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 first_token_cost:389.8179531097412ms total_cost_time:389.8627758026123ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12524 prompt_cache_len:5151 prompt_cache_ratio:0.4112903225806452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 -DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:14 [batch.py:51] router release req id 8 -INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.10848259925842285 s -INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.11039876937866211 s -DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=331242976173159817323475117226824780284, time:1750768274.7359161s req_ids:[8] -DEBUG 06-24 20:31:14 [manager.py:391] -ERROR 06-24 20:31:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 first_token_cost:210.03007888793945ms total_cost_time:210.07442474365234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12525 prompt_cache_len:5151 prompt_cache_ratio:0.41125748502994014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 -DEBUG 06-24 20:31:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:14 [batch.py:51] router release req id 8 -INFO 06-24 20:31:14 [manager.py:224] router recive req id 8 cost time 0.10741925239562988 s -INFO 06-24 20:31:14 [manager.py:68] detokenization recv req id 8 cost time 0.10891294479370117 s -DEBUG 06-24 20:31:14 [manager.py:391] Prefill Batch: batch_id=3741966754158941271781177073025609991, time:1750768274.9651766s req_ids:[8] -DEBUG 06-24 20:31:14 [manager.py:391] -ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:14 lightllm_req_id:8 first_token_cost:224.73812103271484ms total_cost_time:224.78199005126953ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12526 prompt_cache_len:5151 prompt_cache_ratio:0.41122465272233755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 -DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:15 [batch.py:51] router release req id 8 -INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10805273056030273 s -INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.10916495323181152 s -DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=123976354871854654732500638602692739225, time:1750768275.1838672s req_ids:[8] -DEBUG 06-24 20:31:15 [manager.py:391] -ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:231.1117649078369ms total_cost_time:231.15801811218262ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12527 prompt_cache_len:5151 prompt_cache_ratio:0.4111918256565818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 -DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:15 [batch.py:51] router release req id 8 -INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10794353485107422 s -INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.10989975929260254 s -DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=327224276748437850533417176729005399906, time:1750768275.4256055s req_ids:[8] -DEBUG 06-24 20:31:15 [manager.py:391] -ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:226.72462463378906ms total_cost_time:226.77040100097656ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12528 prompt_cache_len:5151 prompt_cache_ratio:0.4111590038314176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 -DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:15 [batch.py:51] router release req id 8 -INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10826563835144043 s -INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013960838317871 s -DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=97931355185587339517844902398606599557, time:1750768275.6577344s req_ids:[8] -DEBUG 06-24 20:31:15 [manager.py:391] -ERROR 06-24 20:31:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:230.30591011047363ms total_cost_time:230.38244247436523ms,out_token_counter:1 mean_per_token_cost_time: 0.07653236389160156ms prompt_token_num:12529 prompt_cache_len:5151 prompt_cache_ratio:0.41112618724559025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 -DEBUG 06-24 20:31:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:15 [batch.py:51] router release req id 8 -INFO 06-24 20:31:15 [manager.py:224] router recive req id 8 cost time 0.10790061950683594 s -INFO 06-24 20:31:15 [manager.py:68] detokenization recv req id 8 cost time 0.1099386215209961 s -DEBUG 06-24 20:31:15 [manager.py:391] Prefill Batch: batch_id=105037887369281595108128063475452842141, time:1750768275.8985052s req_ids:[8] -DEBUG 06-24 20:31:15 [manager.py:391] -ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:15 lightllm_req_id:8 first_token_cost:400.8526802062988ms total_cost_time:400.907039642334ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:12530 prompt_cache_len:5151 prompt_cache_ratio:0.41109337589784517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 -DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:16 [batch.py:51] router release req id 8 -INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10814642906188965 s -INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.11004233360290527 s -DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=236481050959080865530757516517222148744, time:1750768276.2994266s req_ids:[8] -DEBUG 06-24 20:31:16 [manager.py:391] -ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:204.27393913269043ms total_cost_time:204.2992115020752ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:12531 prompt_cache_len:5151 prompt_cache_ratio:0.4110605697869284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 -DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:16 [batch.py:51] router release req id 8 -INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10483622550964355 s -INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.10672521591186523 s -DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=175265139455072007620372557336351049246, time:1750768276.5095196s req_ids:[8] -DEBUG 06-24 20:31:16 [manager.py:391] -ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:31:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 50992.712 tokens/s -DEBUG 06-24 20:31:16 [stats.py:37] Avg prompt tokens throughput: 50984.563 tokens/s -DEBUG 06-24 20:31:16 [stats.py:37] Avg generate tokens throughput: 8.150 tokens/s -INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:210.7105255126953ms total_cost_time:210.7555866241455ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12532 prompt_cache_len:5151 prompt_cache_ratio:0.4110277689115863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 -DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:16 [batch.py:51] router release req id 8 -INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10831809043884277 s -INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.1102302074432373 s -DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=230442108666386900555144758553057681256, time:1750768276.7473354s req_ids:[8] -DEBUG 06-24 20:31:16 [manager.py:391] -ERROR 06-24 20:31:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:228.79648208618164ms total_cost_time:228.84297370910645ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12533 prompt_cache_len:5151 prompt_cache_ratio:0.4109949732705657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 -DEBUG 06-24 20:31:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:16 [batch.py:51] router release req id 8 -INFO 06-24 20:31:16 [manager.py:224] router recive req id 8 cost time 0.10748934745788574 s -INFO 06-24 20:31:16 [manager.py:68] detokenization recv req id 8 cost time 0.1091604232788086 s -DEBUG 06-24 20:31:16 [manager.py:391] Prefill Batch: batch_id=174620604022056359089597685458447835929, time:1750768276.9688103s req_ids:[8] -DEBUG 06-24 20:31:16 [manager.py:391] -ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:16 lightllm_req_id:8 first_token_cost:223.30451011657715ms total_cost_time:223.36983680725098ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:12534 prompt_cache_len:5151 prompt_cache_ratio:0.4109621828626137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 -INFO 06-24 20:31:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:17 [batch.py:51] router release req id 8 -INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10846614837646484 s -INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.1103672981262207 s -DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=154217849949316213949446032002320211087, time:1750768277.1921399s req_ids:[8] -DEBUG 06-24 20:31:17 [manager.py:391] -ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:207.72790908813477ms total_cost_time:207.77392387390137ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12535 prompt_cache_len:5151 prompt_cache_ratio:0.41092939768647785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 -DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:17 [batch.py:51] router release req id 8 -INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10950636863708496 s -INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.11150050163269043 s -DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=108137090411318116023750754203297368051, time:1750768277.4012213s req_ids:[8] -DEBUG 06-24 20:31:17 [manager.py:391] -ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:211.29107475280762ms total_cost_time:211.3347053527832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12536 prompt_cache_len:5151 prompt_cache_ratio:0.4108966177409062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 -DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:17 [batch.py:51] router release req id 8 -INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10752296447753906 s -INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.10860657691955566 s -DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=112182156355757783619233039777198366976, time:1750768277.6225874s req_ids:[8] -DEBUG 06-24 20:31:17 [manager.py:391] -ERROR 06-24 20:31:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:219.49338912963867ms total_cost_time:219.53654289245605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12537 prompt_cache_len:5151 prompt_cache_ratio:0.41086384302464707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 -DEBUG 06-24 20:31:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:17 [batch.py:51] router release req id 8 -INFO 06-24 20:31:17 [manager.py:224] router recive req id 8 cost time 0.10815572738647461 s -INFO 06-24 20:31:17 [manager.py:68] detokenization recv req id 8 cost time 0.10997295379638672 s -DEBUG 06-24 20:31:17 [manager.py:391] Prefill Batch: batch_id=159536824877776508465751281580135407903, time:1750768277.8636575s req_ids:[8] -DEBUG 06-24 20:31:17 [manager.py:391] -ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:17 lightllm_req_id:8 first_token_cost:404.67190742492676ms total_cost_time:404.79207038879395ms,out_token_counter:1 mean_per_token_cost_time: 0.1201629638671875ms prompt_token_num:12538 prompt_cache_len:5151 prompt_cache_ratio:0.4108310735364492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 -DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:18 [batch.py:51] router release req id 8 -INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.10717201232910156 s -INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10812878608703613 s -DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=231164334226629571980830937996094749944, time:1750768278.2539432s req_ids:[8] -DEBUG 06-24 20:31:18 [manager.py:391] -ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:186.29717826843262ms total_cost_time:186.3405704498291ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12539 prompt_cache_len:5151 prompt_cache_ratio:0.41079830927506183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 -DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:18 [batch.py:51] router release req id 8 -INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.10762977600097656 s -INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s -DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=173555751811260018138083162465825119318, time:1750768278.448421s req_ids:[8] -DEBUG 06-24 20:31:18 [manager.py:391] -ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:205.46483993530273ms total_cost_time:205.51037788391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12540 prompt_cache_len:5151 prompt_cache_ratio:0.41076555023923444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 -DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:18 [batch.py:51] router release req id 8 -INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.1071467399597168 s -INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10817146301269531 s -DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=254011236643366864011433757633352885268, time:1750768278.6584747s req_ids:[8] -DEBUG 06-24 20:31:18 [manager.py:391] -ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:211.0762596130371ms total_cost_time:211.1222743988037ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12541 prompt_cache_len:5151 prompt_cache_ratio:0.4107327964277171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 -DEBUG 06-24 20:31:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:18 [batch.py:51] router release req id 8 -INFO 06-24 20:31:18 [manager.py:224] router recive req id 8 cost time 0.10761690139770508 s -INFO 06-24 20:31:18 [manager.py:68] detokenization recv req id 8 cost time 0.10951519012451172 s -DEBUG 06-24 20:31:18 [manager.py:391] Prefill Batch: batch_id=257394938601223862008762757272571695009, time:1750768278.8756688s req_ids:[8] -DEBUG 06-24 20:31:18 [manager.py:391] -ERROR 06-24 20:31:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:214.7042751312256ms total_cost_time:214.74862098693848ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12542 prompt_cache_len:5151 prompt_cache_ratio:0.41070004783926006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 -DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:19 [batch.py:51] router release req id 8 -INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.10759091377258301 s -INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.10868692398071289 s -DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=312292214621825617157837977250841117739, time:1750768279.0958283s req_ids:[8] -DEBUG 06-24 20:31:19 [manager.py:391] -DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:18 lightllm_req_id:8 first_token_cost:215.0275707244873ms total_cost_time:215.07024765014648ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12543 prompt_cache_len:5151 prompt_cache_ratio:0.4106673044726142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 -DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:19 [batch.py:51] router release req id 8 -INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.10758185386657715 s -INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.10962557792663574 s -DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=88137748764892093413558632388837982573, time:1750768279.3142118s req_ids:[8] -DEBUG 06-24 20:31:19 [manager.py:391] -ERROR 06-24 20:31:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 first_token_cost:212.77880668640137ms total_cost_time:212.82243728637695ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12544 prompt_cache_len:5151 prompt_cache_ratio:0.4106345663265306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 -DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:19 [batch.py:51] router release req id 8 -INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.10888910293579102 s -INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.11080741882324219 s -DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=151110584988018278342670598681910506629, time:1750768279.5333657s req_ids:[8] -DEBUG 06-24 20:31:19 [manager.py:391] -ERROR 06-24 20:31:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 first_token_cost:395.0936794281006ms total_cost_time:395.1535224914551ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:12545 prompt_cache_len:5151 prompt_cache_ratio:0.41060183339976086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 -DEBUG 06-24 20:31:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:19 [batch.py:51] router release req id 8 -INFO 06-24 20:31:19 [manager.py:224] router recive req id 8 cost time 0.11097025871276855 s -INFO 06-24 20:31:19 [manager.py:68] detokenization recv req id 8 cost time 0.11311078071594238 s -DEBUG 06-24 20:31:19 [manager.py:391] Prefill Batch: batch_id=270091429903016462769162260505912333415, time:1750768279.9354692s req_ids:[8] -DEBUG 06-24 20:31:19 [manager.py:391] -ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:19 lightllm_req_id:8 first_token_cost:208.2540988922119ms total_cost_time:208.2960605621338ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12546 prompt_cache_len:5151 prompt_cache_ratio:0.4105691056910569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 -DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:20 [batch.py:51] router release req id 8 -INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10844159126281738 s -INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.11037015914916992 s -DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=123512294390957702757395496350686527781, time:1750768280.1535735s req_ids:[8] -DEBUG 06-24 20:31:20 [manager.py:391] -ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:213.4108543395996ms total_cost_time:213.4532928466797ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12547 prompt_cache_len:5151 prompt_cache_ratio:0.41053638319917113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 -DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:20 [batch.py:51] router release req id 8 -INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10864973068237305 s -INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.11067676544189453 s -DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=38621624746936721233358258632667348431, time:1750768280.3733773s req_ids:[8] -DEBUG 06-24 20:31:20 [manager.py:391] -ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:216.00031852722168ms total_cost_time:216.04323387145996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12548 prompt_cache_len:5151 prompt_cache_ratio:0.41050366592285625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 -DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:20 [batch.py:51] router release req id 8 -INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10907292366027832 s -INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.11106491088867188 s -DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=7556144568986281925685504038794234393, time:1750768280.5954952s req_ids:[8] -DEBUG 06-24 20:31:20 [manager.py:391] -ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:214.30397033691406ms total_cost_time:214.34760093688965ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12549 prompt_cache_len:5151 prompt_cache_ratio:0.4104709538608654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 -DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:20 [batch.py:51] router release req id 8 -INFO 06-24 20:31:20 [manager.py:224] router recive req id 8 cost time 0.10805797576904297 s -INFO 06-24 20:31:20 [manager.py:68] detokenization recv req id 8 cost time 0.10998415946960449 s -DEBUG 06-24 20:31:20 [manager.py:391] Prefill Batch: batch_id=34825489587725977828500866992980680493, time:1750768280.8304672s req_ids:[8] -DEBUG 06-24 20:31:20 [manager.py:391] -ERROR 06-24 20:31:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:229.47025299072266ms total_cost_time:229.51602935791016ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12550 prompt_cache_len:5151 prompt_cache_ratio:0.4104382470119522 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 -DEBUG 06-24 20:31:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:20 [batch.py:51] router release req id 8 -INFO 06-24 20:31:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.10766458511352539 s -INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.1096196174621582 s -DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=31727066068721349117508171555635264683, time:1750768281.0552557s req_ids:[8] -DEBUG 06-24 20:31:21 [manager.py:391] -ERROR 06-24 20:31:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:20 lightllm_req_id:8 first_token_cost:215.84534645080566ms total_cost_time:215.88802337646484ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12551 prompt_cache_len:5151 prompt_cache_ratio:0.4104055453748705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 -DEBUG 06-24 20:31:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:21 [batch.py:51] router release req id 8 -INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.3108539581298828 s -INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.31293320655822754 s -DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=8967043047885518195309717220378685095, time:1750768281.4952662s req_ids:[8] -DEBUG 06-24 20:31:21 [manager.py:391] -ERROR 06-24 20:31:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 first_token_cost:442.0514106750488ms total_cost_time:442.09766387939453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12552 prompt_cache_len:5151 prompt_cache_ratio:0.4103728489483748 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 -DEBUG 06-24 20:31:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:21 [batch.py:51] router release req id 8 -INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.10778617858886719 s -INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s -DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=230481473998731284805538974717477629765, time:1750768281.7270124s req_ids:[8] -DEBUG 06-24 20:31:21 [manager.py:391] -ERROR 06-24 20:31:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 first_token_cost:212.82267570495605ms total_cost_time:212.86702156066895ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12553 prompt_cache_len:5151 prompt_cache_ratio:0.4103401577312196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 -DEBUG 06-24 20:31:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:21 [batch.py:51] router release req id 8 -INFO 06-24 20:31:21 [manager.py:224] router recive req id 8 cost time 0.1076962947845459 s -INFO 06-24 20:31:21 [manager.py:68] detokenization recv req id 8 cost time 0.10955357551574707 s -DEBUG 06-24 20:31:21 [manager.py:391] Prefill Batch: batch_id=44791872304156656974901722217437230052, time:1750768281.9459798s req_ids:[8] -DEBUG 06-24 20:31:21 [manager.py:391] -ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:21 lightllm_req_id:8 first_token_cost:217.67020225524902ms total_cost_time:217.7138328552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12554 prompt_cache_len:5151 prompt_cache_ratio:0.41030747172216026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 -DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:22 [batch.py:51] router release req id 8 -INFO 06-24 20:31:22 [manager.py:224] router recive req id 8 cost time 0.11064553260803223 s -INFO 06-24 20:31:22 [manager.py:68] detokenization recv req id 8 cost time 0.11272835731506348 s -DEBUG 06-24 20:31:22 [manager.py:391] Prefill Batch: batch_id=7440126245509333649937650584883862396, time:1750768282.169322s req_ids:[8] -DEBUG 06-24 20:31:22 [manager.py:391] -ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:214.90073204040527ms total_cost_time:214.94269371032715ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12555 prompt_cache_len:5151 prompt_cache_ratio:0.4102747909199522 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 -DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:22 [batch.py:51] router release req id 8 -INFO 06-24 20:31:22 [manager.py:224] router recive req id 8 cost time 0.10876870155334473 s -INFO 06-24 20:31:22 [manager.py:68] detokenization recv req id 8 cost time 0.11072468757629395 s -DEBUG 06-24 20:31:22 [manager.py:391] Prefill Batch: batch_id=56990272556954084287474442613095706910, time:1750768282.3946886s req_ids:[8] -DEBUG 06-24 20:31:22 [manager.py:391] -ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:213.7296199798584ms total_cost_time:213.77325057983398ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12556 prompt_cache_len:5151 prompt_cache_ratio:0.41024211532335136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 -DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:22 [batch.py:51] router release req id 8 -INFO 06-24 20:31:22 [manager.py:224] router recive req id 8 cost time 0.10727763175964355 s -INFO 06-24 20:31:22 [manager.py:68] detokenization recv req id 8 cost time 0.10930800437927246 s -DEBUG 06-24 20:31:22 [manager.py:391] Prefill Batch: batch_id=209551074770314779817449554963110097504, time:1750768282.6150904s req_ids:[8] -DEBUG 06-24 20:31:22 [manager.py:391] -ERROR 06-24 20:31:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:215.9578800201416ms total_cost_time:216.00008010864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12557 prompt_cache_len:5151 prompt_cache_ratio:0.4102094449311141 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 -DEBUG 06-24 20:31:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:22 [batch.py:51] router release req id 8 -INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.3088219165802002 s -INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.31073999404907227 s -DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=165473555168745689181655006668900495607, time:1750768283.0528193s req_ids:[8] -DEBUG 06-24 20:31:23 [manager.py:391] -ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:22 lightllm_req_id:8 first_token_cost:446.4552402496338ms total_cost_time:446.49791717529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12558 prompt_cache_len:5151 prompt_cache_ratio:0.4101767797419971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 -DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:23 [batch.py:51] router release req id 8 -INFO 06-24 20:31:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:31:23 [statics_utils.py:24] mean first cost: 229.67158661164862 ms -INFO 06-24 20:31:23 [statics_utils.py:24] mean per token cost: 0.06127567081356578 ms -INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10859560966491699 s -INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11049389839172363 s -DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=163784857226672687579813413995041346623, time:1750768283.2889993s req_ids:[8] -DEBUG 06-24 20:31:23 [manager.py:391] -ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:208.89973640441895ms total_cost_time:208.94384384155273ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12559 prompt_cache_len:5151 prompt_cache_ratio:0.41014411975475756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 -DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:23 [batch.py:51] router release req id 8 -INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10858798027038574 s -INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11020541191101074 s -DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=161455055624283841016538751990982376338, time:1750768283.505844s req_ids:[8] -DEBUG 06-24 20:31:23 [manager.py:391] -ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:210.8311653137207ms total_cost_time:210.8759880065918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12560 prompt_cache_len:5151 prompt_cache_ratio:0.4101114649681529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 -DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:23 [batch.py:51] router release req id 8 -INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10980725288391113 s -INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11165761947631836 s -DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=264470177085633845006263746562317023140, time:1750768283.7231448s req_ids:[8] -DEBUG 06-24 20:31:23 [manager.py:391] -ERROR 06-24 20:31:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:208.6317539215088ms total_cost_time:208.67586135864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12561 prompt_cache_len:5151 prompt_cache_ratio:0.410078815380941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 -DEBUG 06-24 20:31:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:23 [batch.py:51] router release req id 8 -INFO 06-24 20:31:23 [manager.py:224] router recive req id 8 cost time 0.10868501663208008 s -INFO 06-24 20:31:23 [manager.py:68] detokenization recv req id 8 cost time 0.11061620712280273 s -DEBUG 06-24 20:31:23 [manager.py:391] Prefill Batch: batch_id=305335943424951821310252269238071115684, time:1750768283.939183s req_ids:[8] -DEBUG 06-24 20:31:23 [manager.py:391] -ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:23 lightllm_req_id:8 first_token_cost:212.70108222961426ms total_cost_time:212.74662017822266ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12562 prompt_cache_len:5151 prompt_cache_ratio:0.4100461709918803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 -DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:24 [batch.py:51] router release req id 8 -INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.10942602157592773 s -INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.1114652156829834 s -DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=230688461594259607233972332783572607387, time:1750768284.1595075s req_ids:[8] -DEBUG 06-24 20:31:24 [manager.py:391] -ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:381.70599937438965ms total_cost_time:381.74939155578613ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12563 prompt_cache_len:5151 prompt_cache_ratio:0.4100135317997294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 -DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:24 [batch.py:51] router release req id 8 -INFO 06-24 20:31:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.11082220077514648 s -INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.11296558380126953 s -DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=4235264440530786418340947760829135116, time:1750768284.548638s req_ids:[8] -DEBUG 06-24 20:31:24 [manager.py:391] -ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:216.43733978271484ms total_cost_time:216.48168563842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12564 prompt_cache_len:5151 prompt_cache_ratio:0.40998089780324737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 -DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:24 [batch.py:51] router release req id 8 -INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.10861039161682129 s -INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.11071181297302246 s -DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=176193743172511927873914752714383000113, time:1750768284.7757754s req_ids:[8] -DEBUG 06-24 20:31:24 [manager.py:391] -ERROR 06-24 20:31:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:217.49377250671387ms total_cost_time:217.53811836242676ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12565 prompt_cache_len:5151 prompt_cache_ratio:0.4099482690011938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 -DEBUG 06-24 20:31:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:24 [batch.py:51] router release req id 8 -INFO 06-24 20:31:24 [manager.py:224] router recive req id 8 cost time 0.10686206817626953 s -INFO 06-24 20:31:24 [manager.py:68] detokenization recv req id 8 cost time 0.10892176628112793 s -DEBUG 06-24 20:31:24 [manager.py:391] Prefill Batch: batch_id=69446486642018248990431353107734448130, time:1750768284.9949796s req_ids:[8] -DEBUG 06-24 20:31:24 [manager.py:391] -ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:24 lightllm_req_id:8 first_token_cost:211.48014068603516ms total_cost_time:211.52663230895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12566 prompt_cache_len:5151 prompt_cache_ratio:0.4099156453923285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 -DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:25 [batch.py:51] router release req id 8 -INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10872077941894531 s -INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.11070084571838379 s -DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=328678817198159626523087031810306343181, time:1750768285.2133377s req_ids:[8] -DEBUG 06-24 20:31:25 [manager.py:391] -ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:214.11871910095215ms total_cost_time:214.13898468017578ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12567 prompt_cache_len:5151 prompt_cache_ratio:0.4098830269754118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 -DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:25 [batch.py:51] router release req id 8 -INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10793733596801758 s -INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.11004424095153809 s -DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=84175638531695706572957095835258471430, time:1750768285.4323394s req_ids:[8] -DEBUG 06-24 20:31:25 [manager.py:391] -ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:170.75228691101074ms total_cost_time:170.79472541809082ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12568 prompt_cache_len:5151 prompt_cache_ratio:0.40985041374920433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 -DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:25 [batch.py:51] router release req id 8 -INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10772085189819336 s -INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.10960578918457031 s -DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=265504156831964183044608640167717948435, time:1750768285.611286s req_ids:[8] -DEBUG 06-24 20:31:25 [manager.py:391] -ERROR 06-24 20:31:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:374.91536140441895ms total_cost_time:374.95970726013184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12569 prompt_cache_len:5151 prompt_cache_ratio:0.4098178057124672 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 -DEBUG 06-24 20:31:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:25 [batch.py:51] router release req id 8 -INFO 06-24 20:31:25 [manager.py:224] router recive req id 8 cost time 0.10776805877685547 s -INFO 06-24 20:31:25 [manager.py:68] detokenization recv req id 8 cost time 0.10979270935058594 s -DEBUG 06-24 20:31:25 [manager.py:391] Prefill Batch: batch_id=284357627798559516179485403685622927428, time:1750768285.9909537s req_ids:[8] -DEBUG 06-24 20:31:25 [manager.py:391] -ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:25 lightllm_req_id:8 first_token_cost:214.4618034362793ms total_cost_time:214.48349952697754ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12570 prompt_cache_len:5151 prompt_cache_ratio:0.4097852028639618 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 -DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:26 [batch.py:51] router release req id 8 -INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.10873293876647949 s -INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.11074995994567871 s -DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=331567525282126948751008732231542746042, time:1750768286.2130823s req_ids:[8] -DEBUG 06-24 20:31:26 [manager.py:391] -ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:214.3697738647461ms total_cost_time:214.42103385925293ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:12571 prompt_cache_len:5151 prompt_cache_ratio:0.4097526052024501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 -DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:26 [batch.py:51] router release req id 8 -INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.10855531692504883 s -INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.11066365242004395 s -DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=180147962116622310113189789929379873694, time:1750768286.447992s req_ids:[8] -DEBUG 06-24 20:31:26 [manager.py:391] -ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:227.0054817199707ms total_cost_time:227.0512580871582ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12572 prompt_cache_len:5151 prompt_cache_ratio:0.4097200127266942 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 -DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:26 [batch.py:51] router release req id 8 -INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.1084146499633789 s -INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.11034393310546875 s -DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=287667223120414895802343739041102217604, time:1750768286.680165s req_ids:[8] -DEBUG 06-24 20:31:26 [manager.py:391] -DEBUG 06-24 20:31:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 51062.732 tokens/s -DEBUG 06-24 20:31:26 [stats.py:37] Avg prompt tokens throughput: 51054.697 tokens/s -DEBUG 06-24 20:31:26 [stats.py:37] Avg generate tokens throughput: 8.035 tokens/s -ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:229.4926643371582ms total_cost_time:229.5379638671875ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12573 prompt_cache_len:5151 prompt_cache_ratio:0.40968742543545694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 -DEBUG 06-24 20:31:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:26 [batch.py:51] router release req id 8 -INFO 06-24 20:31:26 [manager.py:224] router recive req id 8 cost time 0.10782861709594727 s -INFO 06-24 20:31:26 [manager.py:68] detokenization recv req id 8 cost time 0.1089012622833252 s -DEBUG 06-24 20:31:26 [manager.py:391] Prefill Batch: batch_id=255240291397660744534081115180308686188, time:1750768286.9055686s req_ids:[8] -DEBUG 06-24 20:31:26 [manager.py:391] -ERROR 06-24 20:31:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:26 lightllm_req_id:8 first_token_cost:207.61919021606445ms total_cost_time:207.66592025756836ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12574 prompt_cache_len:5151 prompt_cache_ratio:0.4096548433275012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 -DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:27 [batch.py:51] router release req id 8 -INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.10822033882141113 s -INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11026215553283691 s -DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=127516161619346948698272387698425244521, time:1750768287.1214705s req_ids:[8] -DEBUG 06-24 20:31:27 [manager.py:391] -ERROR 06-24 20:31:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:207.26418495178223ms total_cost_time:207.32474327087402ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:12575 prompt_cache_len:5151 prompt_cache_ratio:0.40962226640159044 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 -DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:27 [batch.py:51] router release req id 8 -INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.11001396179199219 s -INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11203432083129883 s -DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=243020015261130800642486192670525958042, time:1750768287.3432753s req_ids:[8] -DEBUG 06-24 20:31:27 [manager.py:391] -ERROR 06-24 20:31:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:402.2393226623535ms total_cost_time:402.2812843322754ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12576 prompt_cache_len:5151 prompt_cache_ratio:0.40958969465648853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 -DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:27 [batch.py:51] router release req id 8 -INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.10833430290222168 s -INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11020398139953613 s -DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=335242273285806892006830332171244712358, time:1750768287.7415798s req_ids:[8] -DEBUG 06-24 20:31:27 [manager.py:391] -ERROR 06-24 20:31:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:217.25130081176758ms total_cost_time:217.29564666748047ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12577 prompt_cache_len:5151 prompt_cache_ratio:0.4095571280909597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 -DEBUG 06-24 20:31:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:27 [batch.py:51] router release req id 8 -INFO 06-24 20:31:27 [manager.py:224] router recive req id 8 cost time 0.10951519012451172 s -INFO 06-24 20:31:27 [manager.py:68] detokenization recv req id 8 cost time 0.11144495010375977 s -DEBUG 06-24 20:31:27 [manager.py:391] Prefill Batch: batch_id=51303382814631372679617565969901822866, time:1750768287.965721s req_ids:[8] -DEBUG 06-24 20:31:27 [manager.py:391] -ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:27 lightllm_req_id:8 first_token_cost:182.2037696838379ms total_cost_time:182.24835395812988ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12578 prompt_cache_len:5151 prompt_cache_ratio:0.40952456670376847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 -DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:28 [batch.py:51] router release req id 8 -INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.10783934593200684 s -INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.10984277725219727 s -DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=197487877513946119539464559256284067769, time:1750768288.1528404s req_ids:[8] -DEBUG 06-24 20:31:28 [manager.py:391] -ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:207.85069465637207ms total_cost_time:207.89551734924316ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12579 prompt_cache_len:5151 prompt_cache_ratio:0.40949201049367995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 -DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:28 [batch.py:51] router release req id 8 -INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.1091461181640625 s -INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.11127209663391113 s -DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=104131051347779816479625752324733237580, time:1750768288.3669758s req_ids:[8] -DEBUG 06-24 20:31:28 [manager.py:391] -DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:212.0075225830078ms total_cost_time:212.05472946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12580 prompt_cache_len:5151 prompt_cache_ratio:0.40945945945945944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 -DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:28 [batch.py:51] router release req id 8 -INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.10755515098571777 s -INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.10943818092346191 s -DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=324297709927787439945452727901622424637, time:1750768288.5954754s req_ids:[8] -DEBUG 06-24 20:31:28 [manager.py:391] -ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:222.84531593322754ms total_cost_time:222.8987216949463ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:12581 prompt_cache_len:5151 prompt_cache_ratio:0.4094269135998728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 -DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:28 [batch.py:51] router release req id 8 -INFO 06-24 20:31:28 [manager.py:224] router recive req id 8 cost time 0.11018800735473633 s -INFO 06-24 20:31:28 [manager.py:68] detokenization recv req id 8 cost time 0.11210060119628906 s -DEBUG 06-24 20:31:28 [manager.py:391] Prefill Batch: batch_id=322516579833666015439633711027431332100, time:1750768288.827038s req_ids:[8] -DEBUG 06-24 20:31:28 [manager.py:391] -ERROR 06-24 20:31:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:230.77678680419922ms total_cost_time:230.8206558227539ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12582 prompt_cache_len:5151 prompt_cache_ratio:0.40939437291368624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 -DEBUG 06-24 20:31:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:28 [batch.py:51] router release req id 8 -INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10837268829345703 s -INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.1103355884552002 s -DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=166311768373964590449937115278238895555, time:1750768289.0540247s req_ids:[8] -DEBUG 06-24 20:31:29 [manager.py:391] -ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:28 lightllm_req_id:8 first_token_cost:381.1817169189453ms total_cost_time:381.2253475189209ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12583 prompt_cache_len:5151 prompt_cache_ratio:0.4093618373996662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 -DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:29 [batch.py:51] router release req id 8 -INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10980939865112305 s -INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.11220908164978027 s -DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=280943928641385432892860780483912264860, time:1750768289.4410005s req_ids:[8] -DEBUG 06-24 20:31:29 [manager.py:391] -ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:214.44964408874512ms total_cost_time:214.4927978515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12584 prompt_cache_len:5151 prompt_cache_ratio:0.40932930705657977 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 -DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:29 [batch.py:51] router release req id 8 -INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10741519927978516 s -INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.10979533195495605 s -DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=107483609448302520521400425450512137487, time:1750768289.6620047s req_ids:[8] -DEBUG 06-24 20:31:29 [manager.py:391] -ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:214.96224403381348ms total_cost_time:215.00539779663086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12585 prompt_cache_len:5151 prompt_cache_ratio:0.4092967818831943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 -DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:29 [batch.py:51] router release req id 8 -INFO 06-24 20:31:29 [manager.py:224] router recive req id 8 cost time 0.10830402374267578 s -INFO 06-24 20:31:29 [manager.py:68] detokenization recv req id 8 cost time 0.11005711555480957 s -DEBUG 06-24 20:31:29 [manager.py:391] Prefill Batch: batch_id=6694361489045797251076872386723220597, time:1750768289.8824768s req_ids:[8] -DEBUG 06-24 20:31:29 [manager.py:391] -ERROR 06-24 20:31:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:170.78590393066406ms total_cost_time:170.82881927490234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12586 prompt_cache_len:5151 prompt_cache_ratio:0.40926426187827747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 -DEBUG 06-24 20:31:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:29 [batch.py:51] router release req id 8 -INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.10806703567504883 s -INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.10993480682373047 s -DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=29662953976377615438430970645064532984, time:1750768290.0605233s req_ids:[8] -DEBUG 06-24 20:31:30 [manager.py:391] -ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:29 lightllm_req_id:8 first_token_cost:204.60271835327148ms total_cost_time:204.65683937072754ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:12587 prompt_cache_len:5151 prompt_cache_ratio:0.40923174704059745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 -DEBUG 06-24 20:31:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:30 [batch.py:51] router release req id 8 -INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.11028337478637695 s -INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.11228299140930176 s -DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=137804907117672319639438934689464398080, time:1750768290.270396s req_ids:[8] -DEBUG 06-24 20:31:30 [manager.py:391] -ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:210.28661727905273ms total_cost_time:210.33072471618652ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12588 prompt_cache_len:5151 prompt_cache_ratio:0.4091992373689228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 -DEBUG 06-24 20:31:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:30 [batch.py:51] router release req id 8 -INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.10830020904541016 s -INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.11031413078308105 s -DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=204934078685628162780595746907253670256, time:1750768290.488244s req_ids:[8] -DEBUG 06-24 20:31:30 [manager.py:391] -ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:389.1465663909912ms total_cost_time:389.1909122467041ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12589 prompt_cache_len:5151 prompt_cache_ratio:0.4091667328620224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 -DEBUG 06-24 20:31:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:30 [batch.py:51] router release req id 8 -INFO 06-24 20:31:30 [manager.py:224] router recive req id 8 cost time 0.10947084426879883 s -INFO 06-24 20:31:30 [manager.py:68] detokenization recv req id 8 cost time 0.11156773567199707 s -DEBUG 06-24 20:31:30 [manager.py:391] Prefill Batch: batch_id=263747253370599758403886059081224390866, time:1750768290.8844552s req_ids:[8] -DEBUG 06-24 20:31:30 [manager.py:391] -ERROR 06-24 20:31:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:219.36607360839844ms total_cost_time:219.40922737121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12590 prompt_cache_len:5151 prompt_cache_ratio:0.4091342335186656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 -DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:31 [batch.py:51] router release req id 8 -INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10883593559265137 s -INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11081266403198242 s -DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=81222966924710462973498499514896319919, time:1750768291.1108537s req_ids:[8] -DEBUG 06-24 20:31:31 [manager.py:391] -ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:30 lightllm_req_id:8 first_token_cost:218.37973594665527ms total_cost_time:218.42122077941895ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12591 prompt_cache_len:5151 prompt_cache_ratio:0.4091017393376221 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 -DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:31 [batch.py:51] router release req id 8 -INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10838842391967773 s -INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.1103062629699707 s -DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=213524110672395144227356493622693993170, time:1750768291.3384123s req_ids:[8] -DEBUG 06-24 20:31:31 [manager.py:391] -ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:215.8212661743164ms total_cost_time:215.8658504486084ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12592 prompt_cache_len:5151 prompt_cache_ratio:0.409069250317662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 -DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:31 [batch.py:51] router release req id 8 -INFO 06-24 20:31:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10917544364929199 s -INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11111807823181152 s -DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=227393303705009321525320324786662152630, time:1750768291.5590465s req_ids:[8] -DEBUG 06-24 20:31:31 [manager.py:391] -ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:219.42758560180664ms total_cost_time:219.47216987609863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12593 prompt_cache_len:5151 prompt_cache_ratio:0.40903676645755577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 -DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:31 [batch.py:51] router release req id 8 -INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10886287689208984 s -INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11090254783630371 s -DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=191694708510019816298433550817819558622, time:1750768291.7846642s req_ids:[8] -DEBUG 06-24 20:31:31 [manager.py:391] -ERROR 06-24 20:31:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:207.79967308044434ms total_cost_time:207.84354209899902ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12594 prompt_cache_len:5151 prompt_cache_ratio:0.40900428775607434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 -DEBUG 06-24 20:31:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:31 [batch.py:51] router release req id 8 -INFO 06-24 20:31:31 [manager.py:224] router recive req id 8 cost time 0.10893082618713379 s -INFO 06-24 20:31:31 [manager.py:68] detokenization recv req id 8 cost time 0.11081695556640625 s -DEBUG 06-24 20:31:31 [manager.py:391] Prefill Batch: batch_id=176403923553384781517880922750209277091, time:1750768291.999398s req_ids:[8] -DEBUG 06-24 20:31:31 [manager.py:391] -ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:31 lightllm_req_id:8 first_token_cost:384.69386100769043ms total_cost_time:384.74011421203613ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12595 prompt_cache_len:5151 prompt_cache_ratio:0.40897181421198886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 -DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:32 [batch.py:51] router release req id 8 -INFO 06-24 20:31:32 [manager.py:224] router recive req id 8 cost time 0.10855388641357422 s -INFO 06-24 20:31:32 [manager.py:68] detokenization recv req id 8 cost time 0.11044549942016602 s -DEBUG 06-24 20:31:32 [manager.py:391] Prefill Batch: batch_id=42619443031892006306929523972689808340, time:1750768292.3936365s req_ids:[8] -DEBUG 06-24 20:31:32 [manager.py:391] -ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:215.7583236694336ms total_cost_time:215.80266952514648ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12596 prompt_cache_len:5151 prompt_cache_ratio:0.40893934582407115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 -DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:32 [batch.py:51] router release req id 8 -INFO 06-24 20:31:32 [manager.py:224] router recive req id 8 cost time 0.10793423652648926 s -INFO 06-24 20:31:32 [manager.py:68] detokenization recv req id 8 cost time 0.10993671417236328 s -DEBUG 06-24 20:31:32 [manager.py:391] Prefill Batch: batch_id=211951306842679428969452697780604626848, time:1750768292.628538s req_ids:[8] -DEBUG 06-24 20:31:32 [manager.py:391] -ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:224.73859786987305ms total_cost_time:224.78294372558594ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12597 prompt_cache_len:5151 prompt_cache_ratio:0.4089068825910931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 -DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:32 [batch.py:51] router release req id 8 -INFO 06-24 20:31:32 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s -INFO 06-24 20:31:32 [manager.py:68] detokenization recv req id 8 cost time 0.11075329780578613 s -DEBUG 06-24 20:31:32 [manager.py:391] Prefill Batch: batch_id=267389676640857197854135979600310912569, time:1750768292.846806s req_ids:[8] -DEBUG 06-24 20:31:32 [manager.py:391] -ERROR 06-24 20:31:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:211.7900848388672ms total_cost_time:211.83490753173828ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12598 prompt_cache_len:5151 prompt_cache_ratio:0.40887442451182726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 -DEBUG 06-24 20:31:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:32 [batch.py:51] router release req id 8 -INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10745000839233398 s -INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.10975933074951172 s -DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=13380925876325099253765580395316687642, time:1750768293.0660636s req_ids:[8] -DEBUG 06-24 20:31:33 [manager.py:391] -ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:32 lightllm_req_id:8 first_token_cost:213.2432460784912ms total_cost_time:213.2880687713623ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12599 prompt_cache_len:5151 prompt_cache_ratio:0.4088419715850464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 -DEBUG 06-24 20:31:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:33 [batch.py:51] router release req id 8 -INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10886549949645996 s -INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.1108856201171875 s -DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=172688852117008446934263000394113719866, time:1750768293.2856596s req_ids:[8] -DEBUG 06-24 20:31:33 [manager.py:391] -ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:213.3927345275879ms total_cost_time:213.45281600952148ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12600 prompt_cache_len:5151 prompt_cache_ratio:0.4088095238095238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 -DEBUG 06-24 20:31:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:33 [batch.py:51] router release req id 8 -INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10820579528808594 s -INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.1101992130279541 s -DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=215183729230698782003611422892713412938, time:1750768293.5142765s req_ids:[8] -DEBUG 06-24 20:31:33 [manager.py:391] -ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:212.60905265808105ms total_cost_time:212.65625953674316ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12601 prompt_cache_len:5151 prompt_cache_ratio:0.408777081184033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 -DEBUG 06-24 20:31:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:33 [batch.py:51] router release req id 8 -INFO 06-24 20:31:33 [manager.py:224] router recive req id 8 cost time 0.10876321792602539 s -INFO 06-24 20:31:33 [manager.py:68] detokenization recv req id 8 cost time 0.11068201065063477 s -DEBUG 06-24 20:31:33 [manager.py:391] Prefill Batch: batch_id=246391954737351516394429210280621599632, time:1750768293.724613s req_ids:[8] -DEBUG 06-24 20:31:33 [manager.py:391] -ERROR 06-24 20:31:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:378.69954109191895ms total_cost_time:378.74555587768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12602 prompt_cache_len:5151 prompt_cache_ratio:0.40874464370734803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 -DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:34 [batch.py:51] router release req id 8 -INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10871195793151855 s -INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11063861846923828 s -DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=4479869930058026851675974494744672441, time:1750768294.1113396s req_ids:[8] -DEBUG 06-24 20:31:34 [manager.py:391] -ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:33 lightllm_req_id:8 first_token_cost:211.49396896362305ms total_cost_time:211.53807640075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12603 prompt_cache_len:5151 prompt_cache_ratio:0.40871221137824326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 -DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:34 [batch.py:51] router release req id 8 -INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10725045204162598 s -INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.10913515090942383 s -DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=98374254496721231379667898280863423693, time:1750768294.3297024s req_ids:[8] -DEBUG 06-24 20:31:34 [manager.py:391] -ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:213.4718894958496ms total_cost_time:213.51909637451172ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12604 prompt_cache_len:5151 prompt_cache_ratio:0.4086797841954935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 -DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:34 [batch.py:51] router release req id 8 -INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.11089706420898438 s -INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11214733123779297 s -DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=39509969314765756603375269342544092063, time:1750768294.549787s req_ids:[8] -DEBUG 06-24 20:31:34 [manager.py:391] -DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:213.8369083404541ms total_cost_time:213.8817310333252ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12605 prompt_cache_len:5151 prompt_cache_ratio:0.40864736215787384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 -DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:34 [batch.py:51] router release req id 8 -INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10840320587158203 s -INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11043596267700195 s -DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=56589186161222404721289733482045726887, time:1750768294.770313s req_ids:[8] -DEBUG 06-24 20:31:34 [manager.py:391] -ERROR 06-24 20:31:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:215.43478965759277ms total_cost_time:215.47913551330566ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12606 prompt_cache_len:5151 prompt_cache_ratio:0.4086149452641599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 -DEBUG 06-24 20:31:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:34 [batch.py:51] router release req id 8 -INFO 06-24 20:31:34 [manager.py:224] router recive req id 8 cost time 0.10842108726501465 s -INFO 06-24 20:31:34 [manager.py:68] detokenization recv req id 8 cost time 0.11032271385192871 s -DEBUG 06-24 20:31:34 [manager.py:391] Prefill Batch: batch_id=311817808803823164999071232003172973560, time:1750768294.990627s req_ids:[8] -DEBUG 06-24 20:31:34 [manager.py:391] -ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:34 lightllm_req_id:8 first_token_cost:222.88155555725098ms total_cost_time:222.92423248291016ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12607 prompt_cache_len:5151 prompt_cache_ratio:0.4085825335131276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 -DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:35 [batch.py:51] router release req id 8 -INFO 06-24 20:31:35 [manager.py:224] router recive req id 8 cost time 0.10779500007629395 s -INFO 06-24 20:31:35 [manager.py:68] detokenization recv req id 8 cost time 0.10984635353088379 s -DEBUG 06-24 20:31:35 [manager.py:391] Prefill Batch: batch_id=277651683383830551115542736121476538187, time:1750768295.225302s req_ids:[8] -DEBUG 06-24 20:31:35 [manager.py:391] -ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:405.9169292449951ms total_cost_time:405.961275100708ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12608 prompt_cache_len:5151 prompt_cache_ratio:0.4085501269035533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 -DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:35 [batch.py:51] router release req id 8 -INFO 06-24 20:31:35 [manager.py:224] router recive req id 8 cost time 0.10963010787963867 s -INFO 06-24 20:31:35 [manager.py:68] detokenization recv req id 8 cost time 0.11173057556152344 s -DEBUG 06-24 20:31:35 [manager.py:391] Prefill Batch: batch_id=100945803705115710375740433042445865933, time:1750768295.6297054s req_ids:[8] -DEBUG 06-24 20:31:35 [manager.py:391] -ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:219.29287910461426ms total_cost_time:219.33698654174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12609 prompt_cache_len:5151 prompt_cache_ratio:0.40851772543421366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 -DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:35 [batch.py:51] router release req id 8 -INFO 06-24 20:31:35 [manager.py:224] router recive req id 8 cost time 0.10914349555969238 s -INFO 06-24 20:31:35 [manager.py:68] detokenization recv req id 8 cost time 0.11135268211364746 s -DEBUG 06-24 20:31:35 [manager.py:391] Prefill Batch: batch_id=208096663102930731180491535232959140779, time:1750768295.8511267s req_ids:[8] -DEBUG 06-24 20:31:35 [manager.py:391] -ERROR 06-24 20:31:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:215.88659286499023ms total_cost_time:215.93093872070312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12610 prompt_cache_len:5151 prompt_cache_ratio:0.4084853291038858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 -DEBUG 06-24 20:31:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:35 [batch.py:51] router release req id 8 -INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10940742492675781 s -INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.11138200759887695 s -DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=19190277716743124981175776702309379122, time:1750768296.0750487s req_ids:[8] -DEBUG 06-24 20:31:36 [manager.py:391] -ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:35 lightllm_req_id:8 first_token_cost:210.55293083190918ms total_cost_time:210.59894561767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12611 prompt_cache_len:5151 prompt_cache_ratio:0.40845293791134724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 -DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:36 [batch.py:51] router release req id 8 -INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10948371887207031 s -INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.11157917976379395 s -DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=128186724662158171630735639117363867303, time:1750768296.303151s req_ids:[8] -DEBUG 06-24 20:31:36 [manager.py:391] -ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:228.73711585998535ms total_cost_time:228.78575325012207ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:12612 prompt_cache_len:5151 prompt_cache_ratio:0.40842055185537585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 -DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:36 [batch.py:51] router release req id 8 -INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10833549499511719 s -INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.11034369468688965 s -DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=275177509666351406933765296553645368133, time:1750768296.5252721s req_ids:[8] -DEBUG 06-24 20:31:36 [manager.py:391] -ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:203.79185676574707ms total_cost_time:203.83596420288086ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12613 prompt_cache_len:5151 prompt_cache_ratio:0.4083881709347499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 -DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:36 [batch.py:51] router release req id 8 -INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.10777401924133301 s -INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.10972142219543457 s -DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=66684521766955871359378496415934350622, time:1750768296.7348616s req_ids:[8] -DEBUG 06-24 20:31:36 [manager.py:391] -DEBUG 06-24 20:31:36 [stats.py:37] Avg tokens(prompt+generate) throughput: 51363.424 tokens/s -DEBUG 06-24 20:31:36 [stats.py:37] Avg prompt tokens throughput: 51355.268 tokens/s -DEBUG 06-24 20:31:36 [stats.py:37] Avg generate tokens throughput: 8.156 tokens/s -ERROR 06-24 20:31:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:207.6246738433838ms total_cost_time:207.66973495483398ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12614 prompt_cache_len:5151 prompt_cache_ratio:0.408355795148248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 -DEBUG 06-24 20:31:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:36 [batch.py:51] router release req id 8 -INFO 06-24 20:31:36 [manager.py:224] router recive req id 8 cost time 0.1076195240020752 s -INFO 06-24 20:31:36 [manager.py:68] detokenization recv req id 8 cost time 0.10949182510375977 s -DEBUG 06-24 20:31:36 [manager.py:391] Prefill Batch: batch_id=314904055925532987378438403082054922362, time:1750768296.9516168s req_ids:[8] -DEBUG 06-24 20:31:36 [manager.py:391] -ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:36 lightllm_req_id:8 first_token_cost:208.83417129516602ms total_cost_time:208.8785171508789ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12615 prompt_cache_len:5151 prompt_cache_ratio:0.4083234244946492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 -DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:37 [batch.py:51] router release req id 8 -INFO 06-24 20:31:37 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s -INFO 06-24 20:31:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102912425994873 s -DEBUG 06-24 20:31:37 [manager.py:391] Prefill Batch: batch_id=130424800144092031385697553977849962366, time:1750768297.1666281s req_ids:[8] -DEBUG 06-24 20:31:37 [manager.py:391] -ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:422.1012592315674ms total_cost_time:422.1458435058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12616 prompt_cache_len:5151 prompt_cache_ratio:0.40829105897273305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 -DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:37 [batch.py:51] router release req id 8 -INFO 06-24 20:31:37 [manager.py:224] router recive req id 8 cost time 0.10746240615844727 s -INFO 06-24 20:31:37 [manager.py:68] detokenization recv req id 8 cost time 0.1092832088470459 s -DEBUG 06-24 20:31:37 [manager.py:391] Prefill Batch: batch_id=49385675762759096937812373597030655581, time:1750768297.5948493s req_ids:[8] -DEBUG 06-24 20:31:37 [manager.py:391] -ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:198.89426231384277ms total_cost_time:198.94099235534668ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12617 prompt_cache_len:5151 prompt_cache_ratio:0.4082586985812792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 -DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:37 [batch.py:51] router release req id 8 -INFO 06-24 20:31:37 [manager.py:224] router recive req id 8 cost time 0.10738992691040039 s -INFO 06-24 20:31:37 [manager.py:68] detokenization recv req id 8 cost time 0.1092989444732666 s -DEBUG 06-24 20:31:37 [manager.py:391] Prefill Batch: batch_id=254991211651364340267426115037189287333, time:1750768297.8047986s req_ids:[8] -DEBUG 06-24 20:31:37 [manager.py:391] -ERROR 06-24 20:31:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:216.50123596191406ms total_cost_time:216.54582023620605ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12618 prompt_cache_len:5151 prompt_cache_ratio:0.408226343319068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 -DEBUG 06-24 20:31:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:37 [batch.py:51] router release req id 8 -INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10979819297790527 s -INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.11103105545043945 s -DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=128282666838351181239466874871923139196, time:1750768298.0243397s req_ids:[8] -DEBUG 06-24 20:31:38 [manager.py:391] -ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:37 lightllm_req_id:8 first_token_cost:213.7911319732666ms total_cost_time:213.8378620147705ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12619 prompt_cache_len:5151 prompt_cache_ratio:0.4081939931848799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 -DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:38 [batch.py:51] router release req id 8 -INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10780215263366699 s -INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10972476005554199 s -DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=236396750779510924908427445948401879365, time:1750768298.2437232s req_ids:[8] -DEBUG 06-24 20:31:38 [manager.py:391] -ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:212.1715545654297ms total_cost_time:212.21613883972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12620 prompt_cache_len:5151 prompt_cache_ratio:0.40816164817749606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 -DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:38 [batch.py:51] router release req id 8 -INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10762906074523926 s -INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10953521728515625 s -DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=112164838036872430418340581767390249956, time:1750768298.4629874s req_ids:[8] -DEBUG 06-24 20:31:38 [manager.py:391] -ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:215.5766487121582ms total_cost_time:215.620756149292ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12621 prompt_cache_len:5151 prompt_cache_ratio:0.40812930829569766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 -DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:38 [batch.py:51] router release req id 8 -INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10799145698547363 s -INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10919332504272461 s -DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=291818276245563997605504094071066155941, time:1750768298.6802568s req_ids:[8] -DEBUG 06-24 20:31:38 [manager.py:391] -ERROR 06-24 20:31:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:181.1995506286621ms total_cost_time:181.243896484375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12622 prompt_cache_len:5151 prompt_cache_ratio:0.4080969735382665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 -DEBUG 06-24 20:31:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:38 [batch.py:51] router release req id 8 -INFO 06-24 20:31:38 [manager.py:224] router recive req id 8 cost time 0.10691118240356445 s -INFO 06-24 20:31:38 [manager.py:68] detokenization recv req id 8 cost time 0.10873770713806152 s -DEBUG 06-24 20:31:38 [manager.py:391] Prefill Batch: batch_id=83247834203312958373580611365910668147, time:1750768298.8741672s req_ids:[8] -DEBUG 06-24 20:31:38 [manager.py:391] -ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:38 lightllm_req_id:8 first_token_cost:386.6455554962158ms total_cost_time:386.6894245147705ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12623 prompt_cache_len:5151 prompt_cache_ratio:0.4080646439039848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 -DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:39 [batch.py:51] router release req id 8 -INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.1079719066619873 s -INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.10982608795166016 s -DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=172995786502920152628159237456591888276, time:1750768299.26852s req_ids:[8] -DEBUG 06-24 20:31:39 [manager.py:391] -DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:39 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:215.8188819885254ms total_cost_time:215.86298942565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12624 prompt_cache_len:5151 prompt_cache_ratio:0.408032319391635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 -DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:39 [batch.py:51] router release req id 8 -INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.10767245292663574 s -INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.10922408103942871 s -DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=204387326474375111136189974230427065687, time:1750768299.4933753s req_ids:[8] -DEBUG 06-24 20:31:39 [manager.py:391] -ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:173.1557846069336ms total_cost_time:173.19917678833008ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12625 prompt_cache_len:5151 prompt_cache_ratio:0.408 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 -DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:39 [batch.py:51] router release req id 8 -INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.10878467559814453 s -INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.11060929298400879 s -DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=304642818181010983898201194283979085085, time:1750768299.6713095s req_ids:[8] -DEBUG 06-24 20:31:39 [manager.py:391] -ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:208.3911895751953ms total_cost_time:208.4343433380127ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12626 prompt_cache_len:5151 prompt_cache_ratio:0.4079676857278631 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 -DEBUG 06-24 20:31:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:39 [batch.py:51] router release req id 8 -INFO 06-24 20:31:39 [manager.py:224] router recive req id 8 cost time 0.10953617095947266 s -INFO 06-24 20:31:39 [manager.py:68] detokenization recv req id 8 cost time 0.11142945289611816 s -DEBUG 06-24 20:31:39 [manager.py:391] Prefill Batch: batch_id=229894501546547682786608550757233509770, time:1750768299.88258s req_ids:[8] -DEBUG 06-24 20:31:39 [manager.py:391] -ERROR 06-24 20:31:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:207.77273178100586ms total_cost_time:207.81779289245605ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12627 prompt_cache_len:5151 prompt_cache_ratio:0.4079353765740081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 -DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:40 [batch.py:51] router release req id 8 -INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.10801196098327637 s -INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.11036086082458496 s -DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=136062935555223989384343637909698895697, time:1750768300.096595s req_ids:[8] -DEBUG 06-24 20:31:40 [manager.py:391] -ERROR 06-24 20:31:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:39 lightllm_req_id:8 first_token_cost:209.23113822937012ms total_cost_time:209.27810668945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12628 prompt_cache_len:5151 prompt_cache_ratio:0.40790307253721886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 -DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:40 [batch.py:51] router release req id 8 -INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.1084127426147461 s -INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.11043596267700195 s -DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=270474597520245467067732075223890167514, time:1750768300.3128815s req_ids:[8] -DEBUG 06-24 20:31:40 [manager.py:391] -ERROR 06-24 20:31:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 first_token_cost:212.29290962219238ms total_cost_time:212.33892440795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12629 prompt_cache_len:5151 prompt_cache_ratio:0.40787077361628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 -DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:40 [batch.py:51] router release req id 8 -INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.3120231628417969 s -INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.3141918182373047 s -DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=67244638823611783429480534003193936194, time:1750768300.7550068s req_ids:[8] -DEBUG 06-24 20:31:40 [manager.py:391] -DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 first_token_cost:442.2643184661865ms total_cost_time:442.3098564147949ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12630 prompt_cache_len:5151 prompt_cache_ratio:0.40783847980997623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 -DEBUG 06-24 20:31:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:40 [batch.py:51] router release req id 8 -INFO 06-24 20:31:40 [manager.py:224] router recive req id 8 cost time 0.10859560966491699 s -INFO 06-24 20:31:40 [manager.py:68] detokenization recv req id 8 cost time 0.1106257438659668 s -DEBUG 06-24 20:31:40 [manager.py:391] Prefill Batch: batch_id=329334651340126069787057424736291684654, time:1750768300.9837117s req_ids:[8] -DEBUG 06-24 20:31:40 [manager.py:391] -ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:40 lightllm_req_id:8 first_token_cost:211.1196517944336ms total_cost_time:211.16280555725098ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12631 prompt_cache_len:5151 prompt_cache_ratio:0.4078061911170929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 -DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:41 [batch.py:51] router release req id 8 -INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10755395889282227 s -INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.1096186637878418 s -DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=149420506903031264561203140609186492280, time:1750768301.2039337s req_ids:[8] -DEBUG 06-24 20:31:41 [manager.py:391] -ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:210.34550666809082ms total_cost_time:210.39175987243652ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12632 prompt_cache_len:5151 prompt_cache_ratio:0.40777390753641546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 -DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:41 [batch.py:51] router release req id 8 -INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10828757286071777 s -INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.11020708084106445 s -DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=69386299104513770406853117549657735794, time:1750768301.420113s req_ids:[8] -DEBUG 06-24 20:31:41 [manager.py:391] -ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:212.59641647338867ms total_cost_time:212.64147758483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12633 prompt_cache_len:5151 prompt_cache_ratio:0.40774162906673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 -DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:41 [batch.py:51] router release req id 8 -INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10882091522216797 s -INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.11072635650634766 s -DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=109523942067844912314154747091131213825, time:1750768301.6392484s req_ids:[8] -DEBUG 06-24 20:31:41 [manager.py:391] -ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:214.7073745727539ms total_cost_time:214.7500514984131ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12634 prompt_cache_len:5151 prompt_cache_ratio:0.4077093557068229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 -DEBUG 06-24 20:31:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:41 [batch.py:51] router release req id 8 -INFO 06-24 20:31:41 [manager.py:224] router recive req id 8 cost time 0.10736441612243652 s -INFO 06-24 20:31:41 [manager.py:68] detokenization recv req id 8 cost time 0.10914349555969238 s -DEBUG 06-24 20:31:41 [manager.py:391] Prefill Batch: batch_id=257722918136644318243733046937363596137, time:1750768301.8586848s req_ids:[8] -DEBUG 06-24 20:31:41 [manager.py:391] -ERROR 06-24 20:31:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:215.4226303100586ms total_cost_time:215.46363830566406ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:12635 prompt_cache_len:5151 prompt_cache_ratio:0.4076770874554808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 -DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:42 [batch.py:51] router release req id 8 -INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.31010913848876953 s -INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.3122293949127197 s -DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=171882036930096393939942073787469503324, time:1750768302.2937193s req_ids:[8] -DEBUG 06-24 20:31:42 [manager.py:391] -DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:41 lightllm_req_id:8 first_token_cost:432.2371482849121ms total_cost_time:432.2810173034668ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12636 prompt_cache_len:5151 prompt_cache_ratio:0.40764482431149096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 -DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:42 [batch.py:51] router release req id 8 -INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.10843443870544434 s -INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.11066246032714844 s -DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=248272909618591885139621258374474596962, time:1750768302.5200408s req_ids:[8] -DEBUG 06-24 20:31:42 [manager.py:391] -ERROR 06-24 20:31:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 first_token_cost:214.60914611816406ms total_cost_time:214.65134620666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12637 prompt_cache_len:5151 prompt_cache_ratio:0.4076125662736409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 -DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:42 [batch.py:51] router release req id 8 -INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.10857224464416504 s -INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.11045145988464355 s -DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=330430758385035665620981689425030000775, time:1750768302.7392192s req_ids:[8] -DEBUG 06-24 20:31:42 [manager.py:391] -ERROR 06-24 20:31:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 first_token_cost:214.79487419128418ms total_cost_time:214.83850479125977ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12638 prompt_cache_len:5151 prompt_cache_ratio:0.4075803133407185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 -DEBUG 06-24 20:31:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:42 [batch.py:51] router release req id 8 -INFO 06-24 20:31:42 [manager.py:224] router recive req id 8 cost time 0.1080935001373291 s -INFO 06-24 20:31:42 [manager.py:68] detokenization recv req id 8 cost time 0.11078071594238281 s -DEBUG 06-24 20:31:42 [manager.py:391] Prefill Batch: batch_id=336640953550511299264891062090898515991, time:1750768302.9625185s req_ids:[8] -DEBUG 06-24 20:31:42 [manager.py:391] -ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:42 lightllm_req_id:8 first_token_cost:216.34769439697266ms total_cost_time:216.39156341552734ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12639 prompt_cache_len:5151 prompt_cache_ratio:0.407548065511512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 -DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:43 [batch.py:51] router release req id 8 -INFO 06-24 20:31:43 [manager.py:224] router recive req id 8 cost time 0.10795259475708008 s -INFO 06-24 20:31:43 [manager.py:68] detokenization recv req id 8 cost time 0.1100010871887207 s -DEBUG 06-24 20:31:43 [manager.py:391] Prefill Batch: batch_id=230705201634629089095159724397706240062, time:1750768303.1857138s req_ids:[8] -DEBUG 06-24 20:31:43 [manager.py:391] -ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:210.13569831848145ms total_cost_time:210.18075942993164ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12640 prompt_cache_len:5151 prompt_cache_ratio:0.40751582278481013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 -DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:43 [batch.py:51] router release req id 8 -INFO 06-24 20:31:43 [manager.py:224] router recive req id 8 cost time 0.10761380195617676 s -INFO 06-24 20:31:43 [manager.py:68] detokenization recv req id 8 cost time 0.10983586311340332 s -DEBUG 06-24 20:31:43 [manager.py:391] Prefill Batch: batch_id=334315985789690237470355374025820469899, time:1750768303.4103446s req_ids:[8] -DEBUG 06-24 20:31:43 [manager.py:391] -ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:387.9108428955078ms total_cost_time:387.97712326049805ms,out_token_counter:1 mean_per_token_cost_time: 0.06628036499023438ms prompt_token_num:12641 prompt_cache_len:5151 prompt_cache_ratio:0.407483585159402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 -DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:43 [batch.py:51] router release req id 8 -INFO 06-24 20:31:43 [manager.py:224] router recive req id 8 cost time 0.10854125022888184 s -INFO 06-24 20:31:43 [manager.py:68] detokenization recv req id 8 cost time 0.11081242561340332 s -DEBUG 06-24 20:31:43 [manager.py:391] Prefill Batch: batch_id=206346782021594085184786600562290783007, time:1750768303.7976635s req_ids:[8] -DEBUG 06-24 20:31:43 [manager.py:391] -ERROR 06-24 20:31:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:218.96004676818848ms total_cost_time:219.00367736816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12642 prompt_cache_len:5151 prompt_cache_ratio:0.4074513526340769 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 -DEBUG 06-24 20:31:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:43 [batch.py:51] router release req id 8 -INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10906219482421875 s -INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.11114883422851562 s -DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=107375983674889816447451499802973515993, time:1750768304.0184083s req_ids:[8] -DEBUG 06-24 20:31:44 [manager.py:391] -ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:43 lightllm_req_id:8 first_token_cost:213.40417861938477ms total_cost_time:213.44852447509766ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12643 prompt_cache_len:5151 prompt_cache_ratio:0.4074191252076248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 -DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:44 [batch.py:51] router release req id 8 -INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10764312744140625 s -INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.10973286628723145 s -DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=114647246359924551708550263009200473238, time:1750768304.238656s req_ids:[8] -DEBUG 06-24 20:31:44 [manager.py:391] -ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:208.96363258361816ms total_cost_time:209.00774002075195ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12644 prompt_cache_len:5151 prompt_cache_ratio:0.40738690287883583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 -DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:44 [batch.py:51] router release req id 8 -INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10766315460205078 s -INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.1097269058227539 s -DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=201920586067294815178528743635256730166, time:1750768304.4562602s req_ids:[8] -DEBUG 06-24 20:31:44 [manager.py:391] -ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:212.91708946228027ms total_cost_time:212.99052238464355ms,out_token_counter:1 mean_per_token_cost_time: 0.07343292236328125ms prompt_token_num:12645 prompt_cache_len:5151 prompt_cache_ratio:0.40735468564650057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 -DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:44 [batch.py:51] router release req id 8 -INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.10894894599914551 s -INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.11088824272155762 s -DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=102036719866630716199268313169755550851, time:1750768304.6750374s req_ids:[8] -DEBUG 06-24 20:31:44 [manager.py:391] -ERROR 06-24 20:31:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:216.86148643493652ms total_cost_time:216.904878616333ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12646 prompt_cache_len:5151 prompt_cache_ratio:0.4073224735094101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 -DEBUG 06-24 20:31:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:44 [batch.py:51] router release req id 8 -INFO 06-24 20:31:44 [manager.py:224] router recive req id 8 cost time 0.1087334156036377 s -INFO 06-24 20:31:44 [manager.py:68] detokenization recv req id 8 cost time 0.11079001426696777 s -DEBUG 06-24 20:31:44 [manager.py:391] Prefill Batch: batch_id=237289973530030445464194296609661123651, time:1750768304.8974686s req_ids:[8] -DEBUG 06-24 20:31:44 [manager.py:391] -ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:44 lightllm_req_id:8 first_token_cost:397.2899913787842ms total_cost_time:397.3357677459717ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12647 prompt_cache_len:5151 prompt_cache_ratio:0.40729026646635563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 -DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:45 [batch.py:51] router release req id 8 -INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10901689529418945 s -INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.11100959777832031 s -DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=225514930938261948423374102781879818224, time:1750768305.3026752s req_ids:[8] -DEBUG 06-24 20:31:45 [manager.py:391] -ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:216.22705459594727ms total_cost_time:216.27020835876465ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12648 prompt_cache_len:5151 prompt_cache_ratio:0.40725806451612906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 -DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:45 [batch.py:51] router release req id 8 -INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10810184478759766 s -INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.1105349063873291 s -DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=203140632764145214237619762604360214064, time:1750768305.525989s req_ids:[8] -DEBUG 06-24 20:31:45 [manager.py:391] -ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:212.02850341796875ms total_cost_time:212.07213401794434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12649 prompt_cache_len:5151 prompt_cache_ratio:0.40722586765752233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 -DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:45 [batch.py:51] router release req id 8 -INFO 06-24 20:31:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10854244232177734 s -INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.11053109169006348 s -DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=113474873977158108725341840617779561671, time:1750768305.7442186s req_ids:[8] -DEBUG 06-24 20:31:45 [manager.py:391] -ERROR 06-24 20:31:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:213.96183967590332ms total_cost_time:214.0064239501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12650 prompt_cache_len:5151 prompt_cache_ratio:0.4071936758893281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 -DEBUG 06-24 20:31:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:45 [batch.py:51] router release req id 8 -INFO 06-24 20:31:45 [manager.py:224] router recive req id 8 cost time 0.10895609855651855 s -INFO 06-24 20:31:45 [manager.py:68] detokenization recv req id 8 cost time 0.11105108261108398 s -DEBUG 06-24 20:31:45 [manager.py:391] Prefill Batch: batch_id=34859635389651686775303458706713338653, time:1750768305.9623508s req_ids:[8] -DEBUG 06-24 20:31:45 [manager.py:391] -ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:45 lightllm_req_id:8 first_token_cost:207.82017707824707ms total_cost_time:207.84783363342285ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:12651 prompt_cache_len:5151 prompt_cache_ratio:0.4071614892103391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 -DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:46 [batch.py:51] router release req id 8 -INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.10808396339416504 s -INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.10938215255737305 s -DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=79184847473269647403676145899067018123, time:1750768306.175273s req_ids:[8] -DEBUG 06-24 20:31:46 [manager.py:391] -ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:217.04769134521484ms total_cost_time:217.08989143371582ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12652 prompt_cache_len:5151 prompt_cache_ratio:0.40712930761934873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 -DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:46 [batch.py:51] router release req id 8 -INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.1085512638092041 s -INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.10960006713867188 s -DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=288670148886631215790036199712377442612, time:1750768306.3961313s req_ids:[8] -DEBUG 06-24 20:31:46 [manager.py:391] -ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:168.40815544128418ms total_cost_time:168.45107078552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12653 prompt_cache_len:5151 prompt_cache_ratio:0.40709713111515056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 -DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:46 [batch.py:51] router release req id 8 -INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.10757184028625488 s -INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.10944032669067383 s -DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=222792922835404461972003460922181995091, time:1750768306.5729573s req_ids:[8] -DEBUG 06-24 20:31:46 [manager.py:391] -ERROR 06-24 20:31:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:31:46 [stats.py:37] Avg tokens(prompt+generate) throughput: 50019.223 tokens/s -DEBUG 06-24 20:31:46 [stats.py:37] Avg prompt tokens throughput: 50011.207 tokens/s -DEBUG 06-24 20:31:46 [stats.py:37] Avg generate tokens throughput: 8.016 tokens/s -INFO 06-24 20:31:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:386.08407974243164ms total_cost_time:386.1274719238281ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12654 prompt_cache_len:5151 prompt_cache_ratio:0.40706495969653866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 -DEBUG 06-24 20:31:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:46 [batch.py:51] router release req id 8 -INFO 06-24 20:31:46 [manager.py:224] router recive req id 8 cost time 0.10803937911987305 s -INFO 06-24 20:31:46 [manager.py:68] detokenization recv req id 8 cost time 0.11021256446838379 s -DEBUG 06-24 20:31:46 [manager.py:391] Prefill Batch: batch_id=241427336653067322443084062843377361305, time:1750768306.9655347s req_ids:[8] -DEBUG 06-24 20:31:46 [manager.py:391] -ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:46 lightllm_req_id:8 first_token_cost:216.25542640686035ms total_cost_time:216.29810333251953ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12655 prompt_cache_len:5151 prompt_cache_ratio:0.4070327933623074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 -INFO 06-24 20:31:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:47 [batch.py:51] router release req id 8 -INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10820508003234863 s -INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.1100006103515625 s -DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=15052656183451195163903035685505686307, time:1750768307.1866841s req_ids:[8] -DEBUG 06-24 20:31:47 [manager.py:391] -ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:171.4019775390625ms total_cost_time:171.4463233947754ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12656 prompt_cache_len:5151 prompt_cache_ratio:0.4070006321112516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 -DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:47 [batch.py:51] router release req id 8 -INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10941553115844727 s -INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.1114494800567627 s -DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=175385716349436320833296663404865624103, time:1750768307.3646085s req_ids:[8] -DEBUG 06-24 20:31:47 [manager.py:391] -ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:204.31900024414062ms total_cost_time:204.3454647064209ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:12657 prompt_cache_len:5151 prompt_cache_ratio:0.4069684759421664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 -DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:47 [batch.py:51] router release req id 8 -INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10741281509399414 s -INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.10927486419677734 s -DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=189011367133681998259886823417022571699, time:1750768307.5761268s req_ids:[8] -DEBUG 06-24 20:31:47 [manager.py:391] -ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:211.54165267944336ms total_cost_time:211.57050132751465ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:12658 prompt_cache_len:5151 prompt_cache_ratio:0.40693632485384734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 -DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:47 [batch.py:51] router release req id 8 -INFO 06-24 20:31:47 [manager.py:224] router recive req id 8 cost time 0.10930466651916504 s -INFO 06-24 20:31:47 [manager.py:68] detokenization recv req id 8 cost time 0.11123132705688477 s -DEBUG 06-24 20:31:47 [manager.py:391] Prefill Batch: batch_id=103805325351368421629367651384329074511, time:1750768307.7913852s req_ids:[8] -DEBUG 06-24 20:31:47 [manager.py:391] -ERROR 06-24 20:31:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:215.61050415039062ms total_cost_time:215.63124656677246ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12659 prompt_cache_len:5151 prompt_cache_ratio:0.40690417884509045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 -DEBUG 06-24 20:31:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:47 [batch.py:51] router release req id 8 -INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10879802703857422 s -INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.11072254180908203 s -DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=211933906126234192575776803144193416354, time:1750768308.0121512s req_ids:[8] -DEBUG 06-24 20:31:48 [manager.py:391] -ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:47 lightllm_req_id:8 first_token_cost:211.991548538208ms total_cost_time:212.01324462890625ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12660 prompt_cache_len:5151 prompt_cache_ratio:0.40687203791469195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 -DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:48 [batch.py:51] router release req id 8 -INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10790467262268066 s -INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.10986804962158203 s -DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=24670051913982760462006229758086957340, time:1750768308.232683s req_ids:[8] -DEBUG 06-24 20:31:48 [manager.py:391] -DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:48 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:381.38699531555176ms total_cost_time:381.43348693847656ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12661 prompt_cache_len:5151 prompt_cache_ratio:0.40683990206144854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 -DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:48 [batch.py:51] router release req id 8 -INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10827994346618652 s -INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.11026358604431152 s -DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=126083402449422554401665426936836769630, time:1750768308.6183367s req_ids:[8] -DEBUG 06-24 20:31:48 [manager.py:391] -ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:213.0563259124756ms total_cost_time:213.10043334960938ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12662 prompt_cache_len:5151 prompt_cache_ratio:0.4068077712841573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 -DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:48 [batch.py:51] router release req id 8 -INFO 06-24 20:31:48 [manager.py:224] router recive req id 8 cost time 0.10796499252319336 s -INFO 06-24 20:31:48 [manager.py:68] detokenization recv req id 8 cost time 0.11012840270996094 s -DEBUG 06-24 20:31:48 [manager.py:391] Prefill Batch: batch_id=338761029901149407147707023323693528291, time:1750768308.8387291s req_ids:[8] -DEBUG 06-24 20:31:48 [manager.py:391] -ERROR 06-24 20:31:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:213.40012550354004ms total_cost_time:213.44494819641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12663 prompt_cache_len:5151 prompt_cache_ratio:0.4067756455816157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 -DEBUG 06-24 20:31:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:48 [batch.py:51] router release req id 8 -INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10886764526367188 s -INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.11089253425598145 s -DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=179643077432267788246854826850319669097, time:1750768309.0572512s req_ids:[8] -DEBUG 06-24 20:31:49 [manager.py:391] -ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:48 lightllm_req_id:8 first_token_cost:214.84708786010742ms total_cost_time:214.8904800415039ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12664 prompt_cache_len:5151 prompt_cache_ratio:0.4067435249526216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 -DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:49 [batch.py:51] router release req id 8 -INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10624527931213379 s -INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.10817766189575195 s -DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=151060297139733678092011687537845316169, time:1750768309.2781818s req_ids:[8] -DEBUG 06-24 20:31:49 [manager.py:391] -ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:215.67964553833008ms total_cost_time:215.72303771972656ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12665 prompt_cache_len:5151 prompt_cache_ratio:0.40671140939597317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 -DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:49 [batch.py:51] router release req id 8 -INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10843420028686523 s -INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.11045432090759277 s -DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=223412114021006797894461846542853570042, time:1750768309.5000458s req_ids:[8] -DEBUG 06-24 20:31:49 [manager.py:391] -ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:214.29729461669922ms total_cost_time:214.34426307678223ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12666 prompt_cache_len:5151 prompt_cache_ratio:0.40667929891046894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 -DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:49 [batch.py:51] router release req id 8 -INFO 06-24 20:31:49 [manager.py:224] router recive req id 8 cost time 0.10710525512695312 s -INFO 06-24 20:31:49 [manager.py:68] detokenization recv req id 8 cost time 0.10902166366577148 s -DEBUG 06-24 20:31:49 [manager.py:391] Prefill Batch: batch_id=247148326681128066113319362166926025056, time:1750768309.722744s req_ids:[8] -DEBUG 06-24 20:31:49 [manager.py:391] -ERROR 06-24 20:31:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:347.5487232208252ms total_cost_time:347.5940227508545ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12667 prompt_cache_len:5151 prompt_cache_ratio:0.406647193494908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 -DEBUG 06-24 20:31:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:49 [batch.py:51] router release req id 8 -INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10783839225769043 s -INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.10988974571228027 s -DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=103032247160829965697417290584232196604, time:1750768310.0752413s req_ids:[8] -DEBUG 06-24 20:31:50 [manager.py:391] -ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:49 lightllm_req_id:8 first_token_cost:210.51788330078125ms total_cost_time:210.56294441223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12668 prompt_cache_len:5151 prompt_cache_ratio:0.4066150931480897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 -DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:50 [batch.py:51] router release req id 8 -INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10957884788513184 s -INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.11151528358459473 s -DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=154016029002090736858651509580717042448, time:1750768310.2934964s req_ids:[8] -DEBUG 06-24 20:31:50 [manager.py:391] -ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:210.1879119873047ms total_cost_time:210.27469635009766ms,out_token_counter:1 mean_per_token_cost_time: 0.08678436279296875ms prompt_token_num:12669 prompt_cache_len:5151 prompt_cache_ratio:0.40658299786881363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 -DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:50 [batch.py:51] router release req id 8 -INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10894203186035156 s -INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.11097598075866699 s -DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=294052799059985612734264527520075482852, time:1750768310.5149634s req_ids:[8] -DEBUG 06-24 20:31:50 [manager.py:391] -ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:214.67113494873047ms total_cost_time:214.71381187438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12670 prompt_cache_len:5151 prompt_cache_ratio:0.40655090765588003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 -DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:50 [batch.py:51] router release req id 8 -INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10744619369506836 s -INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.1094353199005127 s -DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=261407527087926550242535254081976893256, time:1750768310.7337236s req_ids:[8] -DEBUG 06-24 20:31:50 [manager.py:391] -ERROR 06-24 20:31:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:213.35315704345703ms total_cost_time:213.39893341064453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12671 prompt_cache_len:5151 prompt_cache_ratio:0.40651882250808935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 -DEBUG 06-24 20:31:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:50 [batch.py:51] router release req id 8 -INFO 06-24 20:31:50 [manager.py:224] router recive req id 8 cost time 0.10858869552612305 s -INFO 06-24 20:31:50 [manager.py:68] detokenization recv req id 8 cost time 0.11052417755126953 s -DEBUG 06-24 20:31:50 [manager.py:391] Prefill Batch: batch_id=314205030846017005052871133587295736800, time:1750768310.954373s req_ids:[8] -DEBUG 06-24 20:31:50 [manager.py:391] -ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:50 lightllm_req_id:8 first_token_cost:214.86902236938477ms total_cost_time:214.91241455078125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12672 prompt_cache_len:5151 prompt_cache_ratio:0.40648674242424243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 -DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:51 [batch.py:51] router release req id 8 -INFO 06-24 20:31:51 [manager.py:224] router recive req id 8 cost time 0.10738229751586914 s -INFO 06-24 20:31:51 [manager.py:68] detokenization recv req id 8 cost time 0.10934972763061523 s -DEBUG 06-24 20:31:51 [manager.py:391] Prefill Batch: batch_id=157931708247133893830656865215721452517, time:1750768311.1769524s req_ids:[8] -DEBUG 06-24 20:31:51 [manager.py:391] -ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:382.5359344482422ms total_cost_time:382.5807571411133ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12673 prompt_cache_len:5151 prompt_cache_ratio:0.4064546674031405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 -DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:51 [batch.py:51] router release req id 8 -INFO 06-24 20:31:51 [manager.py:224] router recive req id 8 cost time 0.1079092025756836 s -INFO 06-24 20:31:51 [manager.py:68] detokenization recv req id 8 cost time 0.10990381240844727 s -DEBUG 06-24 20:31:51 [manager.py:391] Prefill Batch: batch_id=167077517496578843993007422321866780804, time:1750768311.5629737s req_ids:[8] -DEBUG 06-24 20:31:51 [manager.py:391] -ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:210.04438400268555ms total_cost_time:210.08992195129395ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12674 prompt_cache_len:5151 prompt_cache_ratio:0.4064225974435853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 -DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:51 [batch.py:51] router release req id 8 -INFO 06-24 20:31:51 [manager.py:224] router recive req id 8 cost time 0.10710835456848145 s -INFO 06-24 20:31:51 [manager.py:68] detokenization recv req id 8 cost time 0.10903143882751465 s -DEBUG 06-24 20:31:51 [manager.py:391] Prefill Batch: batch_id=295740367576957540609261981152313585471, time:1750768311.7938097s req_ids:[8] -DEBUG 06-24 20:31:51 [manager.py:391] -ERROR 06-24 20:31:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:228.9111614227295ms total_cost_time:228.95479202270508ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12675 prompt_cache_len:5151 prompt_cache_ratio:0.4063905325443787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 -DEBUG 06-24 20:31:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:51 [batch.py:51] router release req id 8 -INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10876965522766113 s -INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.11072874069213867 s -DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=298061572569535717755348666789268831610, time:1750768312.0154076s req_ids:[8] -DEBUG 06-24 20:31:52 [manager.py:391] -ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:51 lightllm_req_id:8 first_token_cost:212.09955215454102ms total_cost_time:212.144136428833ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12676 prompt_cache_len:5151 prompt_cache_ratio:0.4063584727043231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 -DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:52 [batch.py:51] router release req id 8 -INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10800957679748535 s -INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.11001944541931152 s -DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=136602193909651840998475188496251938787, time:1750768312.234483s req_ids:[8] -DEBUG 06-24 20:31:52 [manager.py:391] -ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:214.76435661315918ms total_cost_time:214.80894088745117ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12677 prompt_cache_len:5151 prompt_cache_ratio:0.40632641792222135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 -DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:52 [batch.py:51] router release req id 8 -INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10886740684509277 s -INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.11098861694335938 s -DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=141251011661322143949347651505877564268, time:1750768312.457678s req_ids:[8] -DEBUG 06-24 20:31:52 [manager.py:391] -ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:208.33468437194824ms total_cost_time:208.37903022766113ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12678 prompt_cache_len:5151 prompt_cache_ratio:0.4062943681968765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 -DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:52 [batch.py:51] router release req id 8 -INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10744690895080566 s -INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.10865044593811035 s -DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=336715324598648123939928572162448288918, time:1750768312.6725504s req_ids:[8] -DEBUG 06-24 20:31:52 [manager.py:391] -ERROR 06-24 20:31:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:173.01654815673828ms total_cost_time:173.05898666381836ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12679 prompt_cache_len:5151 prompt_cache_ratio:0.406262323527092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 -DEBUG 06-24 20:31:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:52 [batch.py:51] router release req id 8 -INFO 06-24 20:31:52 [manager.py:224] router recive req id 8 cost time 0.10798311233520508 s -INFO 06-24 20:31:52 [manager.py:68] detokenization recv req id 8 cost time 0.10997819900512695 s -DEBUG 06-24 20:31:52 [manager.py:391] Prefill Batch: batch_id=182147693893813811509103063486495442975, time:1750768312.8503416s req_ids:[8] -DEBUG 06-24 20:31:52 [manager.py:391] -ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:52 lightllm_req_id:8 first_token_cost:371.82164192199707ms total_cost_time:371.86670303344727ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12680 prompt_cache_len:5151 prompt_cache_ratio:0.40623028391167193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 -DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:53 [batch.py:51] router release req id 8 -INFO 06-24 20:31:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:31:53 [statics_utils.py:24] mean first cost: 229.8075781236348 ms -INFO 06-24 20:31:53 [statics_utils.py:24] mean per token cost: 0.061028083524552496 ms -INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.10729265213012695 s -INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.10924053192138672 s -DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=100533128434540306893089057140164652921, time:1750768313.228158s req_ids:[8] -DEBUG 06-24 20:31:53 [manager.py:391] -ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:212.59117126464844ms total_cost_time:212.63432502746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12681 prompt_cache_len:5151 prompt_cache_ratio:0.4061982493494204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 -DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:53 [batch.py:51] router release req id 8 -INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.10828590393066406 s -INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.11043643951416016 s -DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=113012843392126164701435233802788079741, time:1750768313.4473777s req_ids:[8] -DEBUG 06-24 20:31:53 [manager.py:391] -ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:212.3394012451172ms total_cost_time:212.38398551940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12682 prompt_cache_len:5151 prompt_cache_ratio:0.4061662198391421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 -DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:53 [batch.py:51] router release req id 8 -INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.10792946815490723 s -INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.10992121696472168 s -DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=187476524882524153908230745172328359621, time:1750768313.666465s req_ids:[8] -DEBUG 06-24 20:31:53 [manager.py:391] -ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:214.44272994995117ms total_cost_time:214.48779106140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12683 prompt_cache_len:5151 prompt_cache_ratio:0.406134195379642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 -DEBUG 06-24 20:31:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:53 [batch.py:51] router release req id 8 -INFO 06-24 20:31:53 [manager.py:224] router recive req id 8 cost time 0.1084144115447998 s -INFO 06-24 20:31:53 [manager.py:68] detokenization recv req id 8 cost time 0.11034107208251953 s -DEBUG 06-24 20:31:53 [manager.py:391] Prefill Batch: batch_id=194151084748129204874002022291785510731, time:1750768313.8838358s req_ids:[8] -DEBUG 06-24 20:31:53 [manager.py:391] -ERROR 06-24 20:31:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:216.7351245880127ms total_cost_time:216.77899360656738ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12684 prompt_cache_len:5151 prompt_cache_ratio:0.40610217596972564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 -DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:54 [batch.py:51] router release req id 8 -INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.10815763473510742 s -INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012482643127441 s -DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=231502852230472474379554727890179288947, time:1750768314.1058154s req_ids:[8] -DEBUG 06-24 20:31:54 [manager.py:391] -ERROR 06-24 20:31:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:53 lightllm_req_id:8 first_token_cost:214.7071361541748ms total_cost_time:214.7519588470459ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12685 prompt_cache_len:5151 prompt_cache_ratio:0.40607016160819864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 -DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:54 [batch.py:51] router release req id 8 -INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.10867619514465332 s -INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11065554618835449 s -DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=337640953838531791581590691259983693978, time:1750768314.3249s req_ids:[8] -DEBUG 06-24 20:31:54 [manager.py:391] -DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:31:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:31:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 first_token_cost:387.33482360839844ms total_cost_time:387.3770236968994ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12686 prompt_cache_len:5151 prompt_cache_ratio:0.40603815229386725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 -DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:54 [batch.py:51] router release req id 8 -INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.1087198257446289 s -INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11047959327697754 s -DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=225979910358022397298435134781090866206, time:1750768314.7197163s req_ids:[8] -DEBUG 06-24 20:31:54 [manager.py:391] -ERROR 06-24 20:31:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 first_token_cost:213.49549293518066ms total_cost_time:213.54007720947266ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12687 prompt_cache_len:5151 prompt_cache_ratio:0.40600614802553797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 -DEBUG 06-24 20:31:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:54 [batch.py:51] router release req id 8 -INFO 06-24 20:31:54 [manager.py:224] router recive req id 8 cost time 0.10914015769958496 s -INFO 06-24 20:31:54 [manager.py:68] detokenization recv req id 8 cost time 0.11112761497497559 s -DEBUG 06-24 20:31:54 [manager.py:391] Prefill Batch: batch_id=236939109757381976148698086753640529674, time:1750768314.941101s req_ids:[8] -DEBUG 06-24 20:31:54 [manager.py:391] -ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:54 lightllm_req_id:8 first_token_cost:213.96422386169434ms total_cost_time:214.00928497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12688 prompt_cache_len:5151 prompt_cache_ratio:0.40597414880201765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 -DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:55 [batch.py:51] router release req id 8 -INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.1079399585723877 s -INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.10996723175048828 s -DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=213850440299692461833806423811240665997, time:1750768315.1596014s req_ids:[8] -DEBUG 06-24 20:31:55 [manager.py:391] -ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:214.60819244384766ms total_cost_time:214.65277671813965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12689 prompt_cache_len:5151 prompt_cache_ratio:0.40594215462211364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 -DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:55 [batch.py:51] router release req id 8 -INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.10898303985595703 s -INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.11091184616088867 s -DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=134765064418347960226781513563734415505, time:1750768315.3802814s req_ids:[8] -DEBUG 06-24 20:31:55 [manager.py:391] -ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:207.16023445129395ms total_cost_time:207.22222328186035ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12690 prompt_cache_len:5151 prompt_cache_ratio:0.40591016548463354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 -DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:55 [batch.py:51] router release req id 8 -INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.10769867897033691 s -INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.10967016220092773 s -DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=200159742716216395068022680282094185185, time:1750768315.5951662s req_ids:[8] -DEBUG 06-24 20:31:55 [manager.py:391] -ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:214.72716331481934ms total_cost_time:214.74814414978027ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12691 prompt_cache_len:5151 prompt_cache_ratio:0.4058781813883855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 -DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:55 [batch.py:51] router release req id 8 -INFO 06-24 20:31:55 [manager.py:224] router recive req id 8 cost time 0.10953640937805176 s -INFO 06-24 20:31:55 [manager.py:68] detokenization recv req id 8 cost time 0.11148357391357422 s -DEBUG 06-24 20:31:55 [manager.py:391] Prefill Batch: batch_id=36977750861533174894416739268900112294, time:1750768315.8162622s req_ids:[8] -DEBUG 06-24 20:31:55 [manager.py:391] -ERROR 06-24 20:31:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:216.8738842010498ms total_cost_time:216.9182300567627ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12692 prompt_cache_len:5151 prompt_cache_ratio:0.4058462023321778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 -DEBUG 06-24 20:31:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:55 [batch.py:51] router release req id 8 -INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.1089329719543457 s -INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.11046433448791504 s -DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=320355215567574202974514810384319495552, time:1750768316.0366042s req_ids:[8] -DEBUG 06-24 20:31:56 [manager.py:391] -ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:55 lightllm_req_id:8 first_token_cost:211.99369430541992ms total_cost_time:212.0356559753418ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12693 prompt_cache_len:5151 prompt_cache_ratio:0.40581422831481917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 -DEBUG 06-24 20:31:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:56 [batch.py:51] router release req id 8 -INFO 06-24 20:31:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.1080942153930664 s -INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.11009907722473145 s -DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=86927203244066313732480696205449161596, time:1750768316.25664s req_ids:[8] -DEBUG 06-24 20:31:56 [manager.py:391] -ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:389.25766944885254ms total_cost_time:389.301061630249ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12694 prompt_cache_len:5151 prompt_cache_ratio:0.40578225933511897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 -DEBUG 06-24 20:31:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:56 [batch.py:51] router release req id 8 -INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.10785484313964844 s -INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.10982513427734375 s -DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=250796340074148087698914176544532451194, time:1750768316.6511972s req_ids:[8] -DEBUG 06-24 20:31:56 [manager.py:391] -ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:216.7665958404541ms total_cost_time:216.82167053222656ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:12695 prompt_cache_len:5151 prompt_cache_ratio:0.40575029539188656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 -DEBUG 06-24 20:31:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:56 [batch.py:51] router release req id 8 -INFO 06-24 20:31:56 [manager.py:224] router recive req id 8 cost time 0.11043453216552734 s -INFO 06-24 20:31:56 [manager.py:68] detokenization recv req id 8 cost time 0.11251473426818848 s -DEBUG 06-24 20:31:56 [manager.py:391] Prefill Batch: batch_id=251084081853436642648179789449691400558, time:1750768316.8758671s req_ids:[8] -DEBUG 06-24 20:31:56 [manager.py:391] -DEBUG 06-24 20:31:56 [stats.py:37] Avg tokens(prompt+generate) throughput: 53055.943 tokens/s -DEBUG 06-24 20:31:56 [stats.py:37] Avg prompt tokens throughput: 53047.673 tokens/s -DEBUG 06-24 20:31:56 [stats.py:37] Avg generate tokens throughput: 8.270 tokens/s -ERROR 06-24 20:31:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:215.82436561584473ms total_cost_time:215.8679962158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12696 prompt_cache_len:5151 prompt_cache_ratio:0.40571833648393196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 -DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:57 [batch.py:51] router release req id 8 -INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10769319534301758 s -INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.10972714424133301 s -DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=195134987161558625318607420898782625239, time:1750768317.0956604s req_ids:[8] -DEBUG 06-24 20:31:57 [manager.py:391] -ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:56 lightllm_req_id:8 first_token_cost:213.348388671875ms total_cost_time:213.3936882019043ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12697 prompt_cache_len:5151 prompt_cache_ratio:0.40568638261006534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 -DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:57 [batch.py:51] router release req id 8 -INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10779118537902832 s -INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.10969185829162598 s -DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=229315824314945755078829511737606730224, time:1750768317.3148115s req_ids:[8] -DEBUG 06-24 20:31:57 [manager.py:391] -ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:212.40901947021484ms total_cost_time:212.46767044067383ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12698 prompt_cache_len:5151 prompt_cache_ratio:0.4056544337690975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 -DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:57 [batch.py:51] router release req id 8 -INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10948705673217773 s -INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.11167073249816895 s -DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=280036031425414493869496818798353662600, time:1750768317.5381594s req_ids:[8] -DEBUG 06-24 20:31:57 [manager.py:391] -ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:216.43805503845215ms total_cost_time:216.4902687072754ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:12699 prompt_cache_len:5151 prompt_cache_ratio:0.40562248995983935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 -DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:57 [batch.py:51] router release req id 8 -INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.1077885627746582 s -INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.1098167896270752 s -DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=297255116071985496714719757389536534677, time:1750768317.7591586s req_ids:[8] -DEBUG 06-24 20:31:57 [manager.py:391] -ERROR 06-24 20:31:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:214.86496925354004ms total_cost_time:214.9221897125244ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:12700 prompt_cache_len:5151 prompt_cache_ratio:0.4055905511811024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 -DEBUG 06-24 20:31:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:57 [batch.py:51] router release req id 8 -INFO 06-24 20:31:57 [manager.py:224] router recive req id 8 cost time 0.10833477973937988 s -INFO 06-24 20:31:57 [manager.py:68] detokenization recv req id 8 cost time 0.11027908325195312 s -DEBUG 06-24 20:31:57 [manager.py:391] Prefill Batch: batch_id=176513291546951222860882989056596064022, time:1750768317.9795287s req_ids:[8] -DEBUG 06-24 20:31:57 [manager.py:391] -ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:57 lightllm_req_id:8 first_token_cost:386.7781162261963ms total_cost_time:386.80028915405273ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12701 prompt_cache_len:5151 prompt_cache_ratio:0.4055586174316983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 -DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:58 [batch.py:51] router release req id 8 -INFO 06-24 20:31:58 [manager.py:224] router recive req id 8 cost time 0.10522961616516113 s -INFO 06-24 20:31:58 [manager.py:68] detokenization recv req id 8 cost time 0.10681509971618652 s -DEBUG 06-24 20:31:58 [manager.py:391] Prefill Batch: batch_id=211785063235053528479761411671688898845, time:1750768318.3747344s req_ids:[8] -DEBUG 06-24 20:31:58 [manager.py:391] -ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:213.32573890686035ms total_cost_time:213.36960792541504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12702 prompt_cache_len:5151 prompt_cache_ratio:0.4055266887104393 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 -DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:58 [batch.py:51] router release req id 8 -INFO 06-24 20:31:58 [manager.py:224] router recive req id 8 cost time 0.10815238952636719 s -INFO 06-24 20:31:58 [manager.py:68] detokenization recv req id 8 cost time 0.11023664474487305 s -DEBUG 06-24 20:31:58 [manager.py:391] Prefill Batch: batch_id=33870205628208237864658473636951548574, time:1750768318.5912044s req_ids:[8] -DEBUG 06-24 20:31:58 [manager.py:391] -ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:211.20381355285645ms total_cost_time:211.24744415283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12703 prompt_cache_len:5151 prompt_cache_ratio:0.4054947650161379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 -DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:58 [batch.py:51] router release req id 8 -INFO 06-24 20:31:58 [manager.py:224] router recive req id 8 cost time 0.10725784301757812 s -INFO 06-24 20:31:58 [manager.py:68] detokenization recv req id 8 cost time 0.10927462577819824 s -DEBUG 06-24 20:31:58 [manager.py:391] Prefill Batch: batch_id=189804254040746653902469843935530309801, time:1750768318.8098285s req_ids:[8] -DEBUG 06-24 20:31:58 [manager.py:391] -ERROR 06-24 20:31:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:213.32144737243652ms total_cost_time:213.36603164672852ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12704 prompt_cache_len:5151 prompt_cache_ratio:0.40546284634760704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 -DEBUG 06-24 20:31:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:58 [batch.py:51] router release req id 8 -INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.10828542709350586 s -INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.11020994186401367 s -DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=37839808140394174915566136243964434851, time:1750768319.0312932s req_ids:[8] -DEBUG 06-24 20:31:59 [manager.py:391] -ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:58 lightllm_req_id:8 first_token_cost:209.51128005981445ms total_cost_time:209.55514907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12705 prompt_cache_len:5151 prompt_cache_ratio:0.40543093270366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 -DEBUG 06-24 20:31:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:59 [batch.py:51] router release req id 8 -INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.10865211486816406 s -INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.11063861846923828 s -DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=80260118147687216734211625097801592920, time:1750768319.247347s req_ids:[8] -DEBUG 06-24 20:31:59 [manager.py:391] -ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:210.13259887695312ms total_cost_time:210.17742156982422ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12706 prompt_cache_len:5151 prompt_cache_ratio:0.40539902408311035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 -DEBUG 06-24 20:31:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:59 [batch.py:51] router release req id 8 -INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.10873723030090332 s -INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.11071014404296875 s -DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=263420373889561741424069758297888689747, time:1750768319.4639723s req_ids:[8] -DEBUG 06-24 20:31:59 [manager.py:391] -ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:213.23752403259277ms total_cost_time:213.28282356262207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12707 prompt_cache_len:5151 prompt_cache_ratio:0.4053671204847722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 -DEBUG 06-24 20:31:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:31:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:31:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:31:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:31:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:31:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:31:59 [batch.py:51] router release req id 8 -INFO 06-24 20:31:59 [manager.py:224] router recive req id 8 cost time 0.31235194206237793 s -INFO 06-24 20:31:59 [manager.py:68] detokenization recv req id 8 cost time 0.3143332004547119 s -DEBUG 06-24 20:31:59 [manager.py:391] Prefill Batch: batch_id=102555876953742025316289845346229363665, time:1750768319.8943024s req_ids:[8] -DEBUG 06-24 20:31:59 [manager.py:391] -ERROR 06-24 20:31:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:31:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:31:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:421.8618869781494ms total_cost_time:421.9026565551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:12708 prompt_cache_len:5151 prompt_cache_ratio:0.40533522190745985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:31:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 -DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:00 [batch.py:51] router release req id 8 -INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10731887817382812 s -INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.1092674732208252 s -DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=192315838924288419967727211220898720620, time:1750768320.1125433s req_ids:[8] -DEBUG 06-24 20:32:00 [manager.py:391] -ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:31:59 lightllm_req_id:8 first_token_cost:214.74194526672363ms total_cost_time:214.78533744812012ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12709 prompt_cache_len:5151 prompt_cache_ratio:0.4053033283499882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 -DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:00 [batch.py:51] router release req id 8 -INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10936832427978516 s -INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.11160874366760254 s -DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=71582635394706380470695983551639098254, time:1750768320.334193s req_ids:[8] -DEBUG 06-24 20:32:00 [manager.py:391] -ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:213.6518955230713ms total_cost_time:213.69385719299316ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12710 prompt_cache_len:5151 prompt_cache_ratio:0.4052714398111723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 -DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:00 [batch.py:51] router release req id 8 -INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10767269134521484 s -INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.10987210273742676 s -DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=71884242513739346849616461547824643154, time:1750768320.5507705s req_ids:[8] -DEBUG 06-24 20:32:00 [manager.py:391] -ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:211.4260196685791ms total_cost_time:211.4696502685547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12711 prompt_cache_len:5151 prompt_cache_ratio:0.4052395562898277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 -DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:00 [batch.py:51] router release req id 8 -INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10859155654907227 s -INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.11079859733581543 s -DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=106185683891478414243840823376584271104, time:1750768320.7696738s req_ids:[8] -DEBUG 06-24 20:32:00 [manager.py:391] -ERROR 06-24 20:32:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:212.91875839233398ms total_cost_time:212.9647731781006ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12712 prompt_cache_len:5151 prompt_cache_ratio:0.4052076777847703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 -DEBUG 06-24 20:32:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:00 [batch.py:51] router release req id 8 -INFO 06-24 20:32:00 [manager.py:224] router recive req id 8 cost time 0.10969042778015137 s -INFO 06-24 20:32:00 [manager.py:68] detokenization recv req id 8 cost time 0.11168146133422852 s -DEBUG 06-24 20:32:00 [manager.py:391] Prefill Batch: batch_id=237319618237049496061963558664816810396, time:1750768320.9891071s req_ids:[8] -DEBUG 06-24 20:32:00 [manager.py:391] -ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:00 lightllm_req_id:8 first_token_cost:211.87591552734375ms total_cost_time:211.92169189453125ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12713 prompt_cache_len:5151 prompt_cache_ratio:0.4051758042948163 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 -DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:01 [batch.py:51] router release req id 8 -INFO 06-24 20:32:01 [manager.py:224] router recive req id 8 cost time 0.3092350959777832 s -INFO 06-24 20:32:01 [manager.py:68] detokenization recv req id 8 cost time 0.3112211227416992 s -DEBUG 06-24 20:32:01 [manager.py:391] Prefill Batch: batch_id=80543863261359082067204434078819324570, time:1750768321.4195795s req_ids:[8] -DEBUG 06-24 20:32:01 [manager.py:391] -ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:432.58118629455566ms total_cost_time:432.62577056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12714 prompt_cache_len:5151 prompt_cache_ratio:0.40514393581878244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 -DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:01 [batch.py:51] router release req id 8 -INFO 06-24 20:32:01 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s -INFO 06-24 20:32:01 [manager.py:68] detokenization recv req id 8 cost time 0.11081790924072266 s -DEBUG 06-24 20:32:01 [manager.py:391] Prefill Batch: batch_id=338817078839101290897536857631327242476, time:1750768321.6452143s req_ids:[8] -DEBUG 06-24 20:32:01 [manager.py:391] -ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:212.97121047973633ms total_cost_time:213.01698684692383ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12715 prompt_cache_len:5151 prompt_cache_ratio:0.40511207235548563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 -DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:01 [batch.py:51] router release req id 8 -INFO 06-24 20:32:01 [manager.py:224] router recive req id 8 cost time 0.10873794555664062 s -INFO 06-24 20:32:01 [manager.py:68] detokenization recv req id 8 cost time 0.11081647872924805 s -DEBUG 06-24 20:32:01 [manager.py:391] Prefill Batch: batch_id=321556144501425804111108793326476154995, time:1750768321.863915s req_ids:[8] -DEBUG 06-24 20:32:01 [manager.py:391] -ERROR 06-24 20:32:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:214.5373821258545ms total_cost_time:214.58196640014648ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12716 prompt_cache_len:5151 prompt_cache_ratio:0.4050802139037433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 -DEBUG 06-24 20:32:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:01 [batch.py:51] router release req id 8 -INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.1080923080444336 s -INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.11017417907714844 s -DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=253853648231699859043871091763919089815, time:1750768322.0859714s req_ids:[8] -DEBUG 06-24 20:32:02 [manager.py:391] -ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:01 lightllm_req_id:8 first_token_cost:215.06118774414062ms total_cost_time:215.1041030883789ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12717 prompt_cache_len:5151 prompt_cache_ratio:0.4050483604623732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 -DEBUG 06-24 20:32:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:02 [batch.py:51] router release req id 8 -INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.10805702209472656 s -INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.11009955406188965 s -DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=63005054283085304125248241768603633889, time:1750768322.3060715s req_ids:[8] -DEBUG 06-24 20:32:02 [manager.py:391] -ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:211.11130714416504ms total_cost_time:211.15517616271973ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12718 prompt_cache_len:5151 prompt_cache_ratio:0.40501651203019345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 -DEBUG 06-24 20:32:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:02 [batch.py:51] router release req id 8 -INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.10851812362670898 s -INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.11061525344848633 s -DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=164518756011714732286909320767630977826, time:1750768322.5252562s req_ids:[8] -DEBUG 06-24 20:32:02 [manager.py:391] -ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:367.5415515899658ms total_cost_time:367.5854206085205ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12719 prompt_cache_len:5151 prompt_cache_ratio:0.4049846686060225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 -DEBUG 06-24 20:32:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:02 [batch.py:51] router release req id 8 -INFO 06-24 20:32:02 [manager.py:224] router recive req id 8 cost time 0.10768413543701172 s -INFO 06-24 20:32:02 [manager.py:68] detokenization recv req id 8 cost time 0.10989499092102051 s -DEBUG 06-24 20:32:02 [manager.py:391] Prefill Batch: batch_id=24080165050801856306512161003319405163, time:1750768322.8972886s req_ids:[8] -DEBUG 06-24 20:32:02 [manager.py:391] -ERROR 06-24 20:32:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:205.6727409362793ms total_cost_time:205.71541786193848ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12720 prompt_cache_len:5151 prompt_cache_ratio:0.4049528301886792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 -DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:03 [batch.py:51] router release req id 8 -INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10722994804382324 s -INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.10916376113891602 s -DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=96672564606290184475236175225142404779, time:1750768323.110316s req_ids:[8] -DEBUG 06-24 20:32:03 [manager.py:391] -ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:02 lightllm_req_id:8 first_token_cost:205.9018611907959ms total_cost_time:205.9457302093506ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12721 prompt_cache_len:5151 prompt_cache_ratio:0.40492099677698296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 -DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:03 [batch.py:51] router release req id 8 -INFO 06-24 20:32:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10873746871948242 s -INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s -DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=115159087678863655984229130903175440117, time:1750768323.3216183s req_ids:[8] -DEBUG 06-24 20:32:03 [manager.py:391] -ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:213.1955623626709ms total_cost_time:213.2406234741211ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12722 prompt_cache_len:5151 prompt_cache_ratio:0.4048891683697532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 -DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:03 [batch.py:51] router release req id 8 -INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10675954818725586 s -INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.10866808891296387 s -DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=332868064180803166802322206321798698002, time:1750768323.566484s req_ids:[8] -DEBUG 06-24 20:32:03 [manager.py:391] -DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:228.6677360534668ms total_cost_time:228.7125587463379ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12723 prompt_cache_len:5151 prompt_cache_ratio:0.40485734496580994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 -DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:03 [batch.py:51] router release req id 8 -INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s -INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.1105351448059082 s -DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=39464432188160806102523052170722373423, time:1750768323.7764695s req_ids:[8] -DEBUG 06-24 20:32:03 [manager.py:391] -ERROR 06-24 20:32:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:205.09815216064453ms total_cost_time:205.12008666992188ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:12724 prompt_cache_len:5151 prompt_cache_ratio:0.4048255265639736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 -DEBUG 06-24 20:32:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:03 [batch.py:51] router release req id 8 -INFO 06-24 20:32:03 [manager.py:224] router recive req id 8 cost time 0.10851550102233887 s -INFO 06-24 20:32:03 [manager.py:68] detokenization recv req id 8 cost time 0.11052536964416504 s -DEBUG 06-24 20:32:03 [manager.py:391] Prefill Batch: batch_id=107478278279364262931773934796417484953, time:1750768323.988935s req_ids:[8] -DEBUG 06-24 20:32:03 [manager.py:391] -ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:03 lightllm_req_id:8 first_token_cost:375.22149085998535ms total_cost_time:375.26750564575195ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12725 prompt_cache_len:5151 prompt_cache_ratio:0.40479371316306484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 -DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:04 [batch.py:51] router release req id 8 -INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.1068720817565918 s -INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.1080625057220459 s -DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=308610629551917069323346340020657718255, time:1750768324.3689227s req_ids:[8] -DEBUG 06-24 20:32:04 [manager.py:391] -ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:213.67549896240234ms total_cost_time:213.6971950531006ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12726 prompt_cache_len:5151 prompt_cache_ratio:0.40476190476190477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 -DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:04 [batch.py:51] router release req id 8 -INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.10534310340881348 s -DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=166114505208902735107805869866774731630, time:1750768324.5803697s req_ids:[8] -DEBUG 06-24 20:32:04 [manager.py:391] -INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.10712122917175293 s -ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:180.96089363098145ms total_cost_time:181.00523948669434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12727 prompt_cache_len:5151 prompt_cache_ratio:0.40473010135931486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 -DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:04 [batch.py:51] router release req id 8 -INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.10731935501098633 s -INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.10818719863891602 s -DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=264095568520125204133397756748454241251, time:1750768324.7758415s req_ids:[8] -DEBUG 06-24 20:32:04 [manager.py:391] -ERROR 06-24 20:32:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:168.1971549987793ms total_cost_time:168.24102401733398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12728 prompt_cache_len:5151 prompt_cache_ratio:0.4046983029541169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 -DEBUG 06-24 20:32:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:04 [batch.py:51] router release req id 8 -INFO 06-24 20:32:04 [manager.py:224] router recive req id 8 cost time 0.10728240013122559 s -INFO 06-24 20:32:04 [manager.py:68] detokenization recv req id 8 cost time 0.10924959182739258 s -DEBUG 06-24 20:32:04 [manager.py:391] Prefill Batch: batch_id=123479965270926146145917621582577571309, time:1750768324.9508545s req_ids:[8] -DEBUG 06-24 20:32:04 [manager.py:391] -ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:04 lightllm_req_id:8 first_token_cost:200.3183364868164ms total_cost_time:200.3774642944336ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:12729 prompt_cache_len:5151 prompt_cache_ratio:0.40466650954513317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 -DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:05 [batch.py:51] router release req id 8 -INFO 06-24 20:32:05 [manager.py:224] router recive req id 8 cost time 0.10842347145080566 s -INFO 06-24 20:32:05 [manager.py:68] detokenization recv req id 8 cost time 0.1104273796081543 s -DEBUG 06-24 20:32:05 [manager.py:391] Prefill Batch: batch_id=84923760578756261262305992240040791713, time:1750768325.1592057s req_ids:[8] -DEBUG 06-24 20:32:05 [manager.py:391] -ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:211.22193336486816ms total_cost_time:211.2748622894287ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:12730 prompt_cache_len:5151 prompt_cache_ratio:0.40463472113118615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 -DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:05 [batch.py:51] router release req id 8 -INFO 06-24 20:32:05 [manager.py:224] router recive req id 8 cost time 0.10982513427734375 s -INFO 06-24 20:32:05 [manager.py:68] detokenization recv req id 8 cost time 0.11182022094726562 s -DEBUG 06-24 20:32:05 [manager.py:391] Prefill Batch: batch_id=98109035821706712392985663898202288790, time:1750768325.378078s req_ids:[8] -DEBUG 06-24 20:32:05 [manager.py:391] -ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:213.98472785949707ms total_cost_time:214.04719352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12731 prompt_cache_len:5151 prompt_cache_ratio:0.4046029377110989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 -DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:05 [batch.py:51] router release req id 8 -INFO 06-24 20:32:05 [manager.py:224] router recive req id 8 cost time 0.10725927352905273 s -INFO 06-24 20:32:05 [manager.py:68] detokenization recv req id 8 cost time 0.10911345481872559 s -DEBUG 06-24 20:32:05 [manager.py:391] Prefill Batch: batch_id=78719036996131217389117812523624455748, time:1750768325.6073685s req_ids:[8] -DEBUG 06-24 20:32:05 [manager.py:391] -ERROR 06-24 20:32:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:435.58406829833984ms total_cost_time:435.62793731689453ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12732 prompt_cache_len:5151 prompt_cache_ratio:0.40457115928369464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 -DEBUG 06-24 20:32:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:05 [batch.py:51] router release req id 8 -INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10787558555603027 s -INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.11005568504333496 s -DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=101455811768662956194702191245060189141, time:1750768326.0381331s req_ids:[8] -DEBUG 06-24 20:32:06 [manager.py:391] -ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:05 lightllm_req_id:8 first_token_cost:213.8967514038086ms total_cost_time:213.94062042236328ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12733 prompt_cache_len:5151 prompt_cache_ratio:0.4045393858477971 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 -DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:06 [batch.py:51] router release req id 8 -INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10832452774047852 s -INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.11032295227050781 s -DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=162744945041232176945866669177217809449, time:1750768326.2605393s req_ids:[8] -DEBUG 06-24 20:32:06 [manager.py:391] -ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:216.63522720336914ms total_cost_time:216.67742729187012ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12734 prompt_cache_len:5151 prompt_cache_ratio:0.40450761740223024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 -DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:06 [batch.py:51] router release req id 8 -INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10705804824829102 s -INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.10892462730407715 s -DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=224677008175086016413559102617827072802, time:1750768326.485499s req_ids:[8] -DEBUG 06-24 20:32:06 [manager.py:391] -ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:216.64977073669434ms total_cost_time:216.71366691589355ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:12735 prompt_cache_len:5151 prompt_cache_ratio:0.4044758539458186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 -DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:06 [batch.py:51] router release req id 8 -INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.10782551765441895 s -INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.10978841781616211 s -DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=284946616773434406099854083244093990399, time:1750768326.7225735s req_ids:[8] -DEBUG 06-24 20:32:06 [manager.py:391] -ERROR 06-24 20:32:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:214.8592472076416ms total_cost_time:214.91551399230957ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:12736 prompt_cache_len:5151 prompt_cache_ratio:0.4044440954773869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 -DEBUG 06-24 20:32:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:06 [batch.py:51] router release req id 8 -INFO 06-24 20:32:06 [manager.py:224] router recive req id 8 cost time 0.11059951782226562 s -INFO 06-24 20:32:06 [manager.py:68] detokenization recv req id 8 cost time 0.11281824111938477 s -DEBUG 06-24 20:32:06 [manager.py:391] Prefill Batch: batch_id=279963320846153716470609459012527187729, time:1750768326.9312544s req_ids:[8] -DEBUG 06-24 20:32:06 [manager.py:391] -DEBUG 06-24 20:32:06 [stats.py:37] Avg tokens(prompt+generate) throughput: 51860.005 tokens/s -DEBUG 06-24 20:32:06 [stats.py:37] Avg prompt tokens throughput: 51851.850 tokens/s -DEBUG 06-24 20:32:06 [stats.py:37] Avg generate tokens throughput: 8.155 tokens/s -ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:06 lightllm_req_id:8 first_token_cost:247.88904190063477ms total_cost_time:247.94912338256836ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:12737 prompt_cache_len:5151 prompt_cache_ratio:0.40441234199576037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 -DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:07 [batch.py:51] router release req id 8 -INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.10750126838684082 s -INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.10938739776611328 s -DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=319864604173598892257647413798674028279, time:1750768327.1891727s req_ids:[8] -DEBUG 06-24 20:32:07 [manager.py:391] -ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:203.57871055603027ms total_cost_time:203.62544059753418ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12738 prompt_cache_len:5151 prompt_cache_ratio:0.40438059349976446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 -DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:07 [batch.py:51] router release req id 8 -INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.11082100868225098 s -INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.11287879943847656 s -DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=4303334175365590107116684646409786245, time:1750768327.3987477s req_ids:[8] -DEBUG 06-24 20:32:07 [manager.py:391] -ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:385.15186309814453ms total_cost_time:385.19906997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:12739 prompt_cache_len:5151 prompt_cache_ratio:0.4043488499882251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 -DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:07 [batch.py:51] router release req id 8 -INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.10887622833251953 s -INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.11079549789428711 s -DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=96652630432232257193886835485865578059, time:1750768327.7845984s req_ids:[8] -DEBUG 06-24 20:32:07 [manager.py:391] -ERROR 06-24 20:32:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:195.98984718322754ms total_cost_time:196.03824615478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:12740 prompt_cache_len:5151 prompt_cache_ratio:0.4043171114599686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 -DEBUG 06-24 20:32:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:07 [batch.py:51] router release req id 8 -INFO 06-24 20:32:07 [manager.py:224] router recive req id 8 cost time 0.10849428176879883 s -INFO 06-24 20:32:07 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s -DEBUG 06-24 20:32:07 [manager.py:391] Prefill Batch: batch_id=313080579098090644851413948042528788976, time:1750768327.9903781s req_ids:[8] -DEBUG 06-24 20:32:07 [manager.py:391] -ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:07 lightllm_req_id:8 first_token_cost:209.18011665344238ms total_cost_time:209.22541618347168ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12741 prompt_cache_len:5151 prompt_cache_ratio:0.4042853779138215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 -DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:08 [batch.py:51] router release req id 8 -INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.10911297798156738 s -INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.1112065315246582 s -DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=190986703000232247063522985211421691896, time:1750768328.2039244s req_ids:[8] -DEBUG 06-24 20:32:08 [manager.py:391] -DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:212.2969627380371ms total_cost_time:212.3415470123291ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12742 prompt_cache_len:5151 prompt_cache_ratio:0.4042536493486109 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 -DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:08 [batch.py:51] router release req id 8 -INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.10777568817138672 s -INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.10969328880310059 s -DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=171747021582067039416066221697440191013, time:1750768328.428756s req_ids:[8] -DEBUG 06-24 20:32:08 [manager.py:391] -ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:218.7035083770752ms total_cost_time:218.7516689300537ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:12743 prompt_cache_len:5151 prompt_cache_ratio:0.40422192576316407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 -DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:08 [batch.py:51] router release req id 8 -INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.1089937686920166 s -INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.11097979545593262 s -DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=273161866324616153003195574455995375117, time:1750768328.6477537s req_ids:[8] -DEBUG 06-24 20:32:08 [manager.py:391] -ERROR 06-24 20:32:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:210.676908493042ms total_cost_time:210.740327835083ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:12744 prompt_cache_len:5151 prompt_cache_ratio:0.4041902071563089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 -DEBUG 06-24 20:32:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:08 [batch.py:51] router release req id 8 -INFO 06-24 20:32:08 [manager.py:224] router recive req id 8 cost time 0.10848879814147949 s -INFO 06-24 20:32:08 [manager.py:68] detokenization recv req id 8 cost time 0.11054849624633789 s -DEBUG 06-24 20:32:08 [manager.py:391] Prefill Batch: batch_id=234654434928044360988751263455960078164, time:1750768328.8750122s req_ids:[8] -DEBUG 06-24 20:32:08 [manager.py:391] -ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:08 lightllm_req_id:8 first_token_cost:395.49946784973145ms total_cost_time:395.54429054260254ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12745 prompt_cache_len:5151 prompt_cache_ratio:0.4041584935268733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 -DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:09 [batch.py:51] router release req id 8 -INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10923242568969727 s -INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.11075639724731445 s -DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=268571295668600539011956116668842712747, time:1750768329.2686017s req_ids:[8] -DEBUG 06-24 20:32:09 [manager.py:391] -ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:207.5967788696289ms total_cost_time:207.658052444458ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12746 prompt_cache_len:5151 prompt_cache_ratio:0.40412678487368586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 -DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:09 [batch.py:51] router release req id 8 -INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10855603218078613 s -INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.10967588424682617 s -DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=136986149363190310578692878921761645357, time:1750768329.4836285s req_ids:[8] -DEBUG 06-24 20:32:09 [manager.py:391] -ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:216.12024307250977ms total_cost_time:216.16148948669434ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:12747 prompt_cache_len:5151 prompt_cache_ratio:0.4040950811955754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 -DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:09 [batch.py:51] router release req id 8 -INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10928726196289062 s -INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.11055541038513184 s -DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=186251313048350501890985089223866424605, time:1750768329.7060323s req_ids:[8] -DEBUG 06-24 20:32:09 [manager.py:391] -ERROR 06-24 20:32:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:213.03701400756836ms total_cost_time:213.08064460754395ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12748 prompt_cache_len:5151 prompt_cache_ratio:0.4040633824913712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 -DEBUG 06-24 20:32:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:09 [batch.py:51] router release req id 8 -INFO 06-24 20:32:09 [manager.py:224] router recive req id 8 cost time 0.10909891128540039 s -INFO 06-24 20:32:09 [manager.py:68] detokenization recv req id 8 cost time 0.11082673072814941 s -DEBUG 06-24 20:32:09 [manager.py:391] Prefill Batch: batch_id=153094158433775038290841477991447811708, time:1750768329.9247167s req_ids:[8] -DEBUG 06-24 20:32:09 [manager.py:391] -ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:09 lightllm_req_id:8 first_token_cost:209.69510078430176ms total_cost_time:209.73777770996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12749 prompt_cache_len:5151 prompt_cache_ratio:0.40403168875990275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 -DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:10 [batch.py:51] router release req id 8 -INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10811781883239746 s -INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.10977005958557129 s -DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=87383392470522335468229076730113987959, time:1750768330.1408894s req_ids:[8] -DEBUG 06-24 20:32:10 [manager.py:391] -ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:210.76011657714844ms total_cost_time:210.80875396728516ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:12750 prompt_cache_len:5151 prompt_cache_ratio:0.404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 -DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:10 [batch.py:51] router release req id 8 -INFO 06-24 20:32:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10868525505065918 s -INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.11045336723327637 s -DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=279297398751838851318630800587533442882, time:1750768330.3651478s req_ids:[8] -DEBUG 06-24 20:32:10 [manager.py:391] -ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:399.8374938964844ms total_cost_time:399.88183975219727ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12751 prompt_cache_len:5151 prompt_cache_ratio:0.4039683162104933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 -DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:10 [batch.py:51] router release req id 8 -INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10744380950927734 s -INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.10932540893554688 s -DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=205671153875013041385491703969086570100, time:1750768330.7655103s req_ids:[8] -DEBUG 06-24 20:32:10 [manager.py:391] -ERROR 06-24 20:32:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:214.8911952972412ms total_cost_time:214.9360179901123ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12752 prompt_cache_len:5151 prompt_cache_ratio:0.4039366373902133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 -DEBUG 06-24 20:32:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:10 [batch.py:51] router release req id 8 -INFO 06-24 20:32:10 [manager.py:224] router recive req id 8 cost time 0.10904574394226074 s -INFO 06-24 20:32:10 [manager.py:68] detokenization recv req id 8 cost time 0.11106419563293457 s -DEBUG 06-24 20:32:10 [manager.py:391] Prefill Batch: batch_id=185956170062228657751086264118787393422, time:1750768330.9861462s req_ids:[8] -DEBUG 06-24 20:32:10 [manager.py:391] -ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:10 lightllm_req_id:8 first_token_cost:213.5484218597412ms total_cost_time:213.5915756225586ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12753 prompt_cache_len:5151 prompt_cache_ratio:0.40390496353799105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 -DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:11 [batch.py:51] router release req id 8 -INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10767483711242676 s -INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.1094973087310791 s -DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=303464728190860782826067761737155165734, time:1750768331.207503s req_ids:[8] -DEBUG 06-24 20:32:11 [manager.py:391] -ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:210.45351028442383ms total_cost_time:210.49809455871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12754 prompt_cache_len:5151 prompt_cache_ratio:0.403873294652658 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 -DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:11 [batch.py:51] router release req id 8 -INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10775470733642578 s -INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.1096489429473877 s -DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=184202180498045085844294131931850558023, time:1750768331.4255946s req_ids:[8] -DEBUG 06-24 20:32:11 [manager.py:391] -ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:211.2879753112793ms total_cost_time:211.3323211669922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12755 prompt_cache_len:5151 prompt_cache_ratio:0.40384163073304585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 -DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:11 [batch.py:51] router release req id 8 -INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10888838768005371 s -INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.11101436614990234 s -DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=334686937240656905046644512752414658577, time:1750768331.6425962s req_ids:[8] -DEBUG 06-24 20:32:11 [manager.py:391] -ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:216.65406227111816ms total_cost_time:216.69745445251465ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12756 prompt_cache_len:5151 prompt_cache_ratio:0.40380997177798683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 -DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:11 [batch.py:51] router release req id 8 -INFO 06-24 20:32:11 [manager.py:224] router recive req id 8 cost time 0.10783672332763672 s -INFO 06-24 20:32:11 [manager.py:68] detokenization recv req id 8 cost time 0.10915803909301758 s -DEBUG 06-24 20:32:11 [manager.py:391] Prefill Batch: batch_id=236715885815506573995845790939051637960, time:1750768331.8632057s req_ids:[8] -DEBUG 06-24 20:32:11 [manager.py:391] -ERROR 06-24 20:32:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:213.29331398010254ms total_cost_time:213.33813667297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12757 prompt_cache_len:5151 prompt_cache_ratio:0.4037783177863134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 -DEBUG 06-24 20:32:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:11 [batch.py:51] router release req id 8 -INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.10872220993041992 s -INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10979795455932617 s -DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=50163741418903514875663481839673748642, time:1750768332.0809324s req_ids:[8] -DEBUG 06-24 20:32:12 [manager.py:391] -ERROR 06-24 20:32:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:11 lightllm_req_id:8 first_token_cost:374.1724491119385ms total_cost_time:374.21679496765137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12758 prompt_cache_len:5151 prompt_cache_ratio:0.40374666875685844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 -DEBUG 06-24 20:32:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:12 [batch.py:51] router release req id 8 -INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.1086273193359375 s -INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10983109474182129 s -DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=309549304259410002631185175114818762264, time:1750768332.463406s req_ids:[8] -DEBUG 06-24 20:32:12 [manager.py:391] -ERROR 06-24 20:32:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 first_token_cost:216.37654304504395ms total_cost_time:216.42041206359863ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12759 prompt_cache_len:5151 prompt_cache_ratio:0.4037150246884552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 -DEBUG 06-24 20:32:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:12 [batch.py:51] router release req id 8 -INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.10851716995239258 s -INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10958647727966309 s -DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=246596124787094043231685615205376036157, time:1750768332.6848547s req_ids:[8] -DEBUG 06-24 20:32:12 [manager.py:391] -ERROR 06-24 20:32:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 first_token_cost:215.10624885559082ms total_cost_time:215.1501178741455ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12760 prompt_cache_len:5151 prompt_cache_ratio:0.4036833855799373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 -DEBUG 06-24 20:32:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:12 [batch.py:51] router release req id 8 -INFO 06-24 20:32:12 [manager.py:224] router recive req id 8 cost time 0.10752630233764648 s -INFO 06-24 20:32:12 [manager.py:68] detokenization recv req id 8 cost time 0.10951828956604004 s -DEBUG 06-24 20:32:12 [manager.py:391] Prefill Batch: batch_id=264550105337034624954075775699484397767, time:1750768332.906285s req_ids:[8] -DEBUG 06-24 20:32:12 [manager.py:391] -ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:12 lightllm_req_id:8 first_token_cost:214.12420272827148ms total_cost_time:214.16997909545898ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12761 prompt_cache_len:5151 prompt_cache_ratio:0.4036517514301387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 -DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:13 [batch.py:51] router release req id 8 -INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.1092679500579834 s -INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.11122727394104004 s -DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=305178561174230089291908079720010112515, time:1750768333.1237361s req_ids:[8] -DEBUG 06-24 20:32:13 [manager.py:391] -ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:209.29908752441406ms total_cost_time:209.34462547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12762 prompt_cache_len:5151 prompt_cache_ratio:0.40362012223789373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 -DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:13 [batch.py:51] router release req id 8 -INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.10820364952087402 s -INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.10936236381530762 s -DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=221555671578494233235708062908446067347, time:1750768333.3369102s req_ids:[8] -DEBUG 06-24 20:32:13 [manager.py:391] -ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:212.0053768157959ms total_cost_time:212.0497226715088ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12763 prompt_cache_len:5151 prompt_cache_ratio:0.40358849800203717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 -DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:13 [batch.py:51] router release req id 8 -INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.10876965522766113 s -INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.11084794998168945 s -DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=73355558970403681588473645571662650760, time:1750768333.554558s req_ids:[8] -DEBUG 06-24 20:32:13 [manager.py:391] -ERROR 06-24 20:32:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:391.24488830566406ms total_cost_time:391.29137992858887ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12764 prompt_cache_len:5151 prompt_cache_ratio:0.40355687872140394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 -DEBUG 06-24 20:32:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:13 [batch.py:51] router release req id 8 -INFO 06-24 20:32:13 [manager.py:224] router recive req id 8 cost time 0.10910201072692871 s -INFO 06-24 20:32:13 [manager.py:68] detokenization recv req id 8 cost time 0.1112222671508789 s -DEBUG 06-24 20:32:13 [manager.py:391] Prefill Batch: batch_id=53943494029676393930777895924701649371, time:1750768333.959336s req_ids:[8] -DEBUG 06-24 20:32:13 [manager.py:391] -ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:13 lightllm_req_id:8 first_token_cost:222.37801551818848ms total_cost_time:222.44000434875488ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12765 prompt_cache_len:5151 prompt_cache_ratio:0.4035252643948296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 -DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:14 [batch.py:51] router release req id 8 -INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.1091158390045166 s -INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s -DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=328436197250720853730627193021126438096, time:1750768334.1792412s req_ids:[8] -DEBUG 06-24 20:32:14 [manager.py:391] -ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:211.88974380493164ms total_cost_time:211.93575859069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12766 prompt_cache_len:5151 prompt_cache_ratio:0.4034936550211499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 -DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:14 [batch.py:51] router release req id 8 -INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.10912489891052246 s -INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s -DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=204183005986504478002741963122950267224, time:1750768334.4001515s req_ids:[8] -DEBUG 06-24 20:32:14 [manager.py:391] -DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:214.22934532165527ms total_cost_time:214.27369117736816ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12767 prompt_cache_len:5151 prompt_cache_ratio:0.40346205059920104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 -DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:14 [batch.py:51] router release req id 8 -INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.10898709297180176 s -INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.11110401153564453 s -DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=94384186471841813796427733896382812597, time:1750768334.6192143s req_ids:[8] -DEBUG 06-24 20:32:14 [manager.py:391] -ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:214.17617797851562ms total_cost_time:214.22076225280762ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12768 prompt_cache_len:5151 prompt_cache_ratio:0.40343045112781956 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 -DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:14 [batch.py:51] router release req id 8 -INFO 06-24 20:32:14 [manager.py:224] router recive req id 8 cost time 0.10762810707092285 s -INFO 06-24 20:32:14 [manager.py:68] detokenization recv req id 8 cost time 0.10870099067687988 s -DEBUG 06-24 20:32:14 [manager.py:391] Prefill Batch: batch_id=289125054046173823795334074225658672095, time:1750768334.8362412s req_ids:[8] -DEBUG 06-24 20:32:14 [manager.py:391] -ERROR 06-24 20:32:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:213.38987350463867ms total_cost_time:213.43469619750977ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12769 prompt_cache_len:5151 prompt_cache_ratio:0.4033988566058423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 -DEBUG 06-24 20:32:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:14 [batch.py:51] router release req id 8 -INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.1092679500579834 s -INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.11080765724182129 s -DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=185777141390185758979124460531288140805, time:1750768335.0550463s req_ids:[8] -DEBUG 06-24 20:32:15 [manager.py:391] -ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:14 lightllm_req_id:8 first_token_cost:210.8299732208252ms total_cost_time:210.87193489074707ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12770 prompt_cache_len:5151 prompt_cache_ratio:0.4033672670321065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 -DEBUG 06-24 20:32:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:15 [batch.py:51] router release req id 8 -INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.10758709907531738 s -INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.10891556739807129 s -DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=289975140293902323568109889531367499597, time:1750768335.2733886s req_ids:[8] -DEBUG 06-24 20:32:15 [manager.py:391] -ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:211.7142677307129ms total_cost_time:211.75694465637207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12771 prompt_cache_len:5151 prompt_cache_ratio:0.40333568240544987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 -DEBUG 06-24 20:32:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:15 [batch.py:51] router release req id 8 -INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.10883927345275879 s -INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.1100623607635498 s -DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=182071911128253644127758490330249599080, time:1750768335.4944787s req_ids:[8] -DEBUG 06-24 20:32:15 [manager.py:391] -ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:400.71821212768555ms total_cost_time:400.74682235717773ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:12772 prompt_cache_len:5151 prompt_cache_ratio:0.4033041027247103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 -DEBUG 06-24 20:32:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:15 [batch.py:51] router release req id 8 -INFO 06-24 20:32:15 [manager.py:224] router recive req id 8 cost time 0.10919356346130371 s -INFO 06-24 20:32:15 [manager.py:68] detokenization recv req id 8 cost time 0.11125349998474121 s -DEBUG 06-24 20:32:15 [manager.py:391] Prefill Batch: batch_id=95678919104808777774008045419438116004, time:1750768335.9027739s req_ids:[8] -DEBUG 06-24 20:32:15 [manager.py:391] -ERROR 06-24 20:32:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:198.6231803894043ms total_cost_time:198.6687183380127ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12773 prompt_cache_len:5151 prompt_cache_ratio:0.4032725279887262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 -DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:16 [batch.py:51] router release req id 8 -DEBUG 06-24 20:32:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:16 [manager.py:283] -DEBUG 06-24 20:32:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:16 [manager.py:284] -INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10937786102294922 s -INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11140584945678711 s -DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=250394397790085365401076077240485737978, time:1750768336.107398s req_ids:[8] -DEBUG 06-24 20:32:16 [manager.py:391] -ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:15 lightllm_req_id:8 first_token_cost:210.7548713684082ms total_cost_time:210.798978805542ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12774 prompt_cache_len:5151 prompt_cache_ratio:0.4032409581963363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 -DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:16 [batch.py:51] router release req id 8 -INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10875320434570312 s -INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11078166961669922 s -DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=65166913776842913741257069643316408561, time:1750768336.3250961s req_ids:[8] -DEBUG 06-24 20:32:16 [manager.py:391] -ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:216.20678901672363ms total_cost_time:216.25041961669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12775 prompt_cache_len:5151 prompt_cache_ratio:0.4032093933463797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 -DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:16 [batch.py:51] router release req id 8 -INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10881471633911133 s -INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.1108086109161377 s -DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=265039547204125063767751321686973985199, time:1750768336.5452223s req_ids:[8] -DEBUG 06-24 20:32:16 [manager.py:391] -ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:212.43524551391602ms total_cost_time:212.4783992767334ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12776 prompt_cache_len:5151 prompt_cache_ratio:0.4031778334376957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 -DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:16 [batch.py:51] router release req id 8 -INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10804462432861328 s -INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11000990867614746 s -DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=4931067181246065638869094515320556055, time:1750768336.7648764s req_ids:[8] -DEBUG 06-24 20:32:16 [manager.py:391] -ERROR 06-24 20:32:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:207.67903327941895ms total_cost_time:207.7198028564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:12777 prompt_cache_len:5151 prompt_cache_ratio:0.4031462784691242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 -DEBUG 06-24 20:32:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:16 [batch.py:51] router release req id 8 -INFO 06-24 20:32:16 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s -INFO 06-24 20:32:16 [manager.py:68] detokenization recv req id 8 cost time 0.11025357246398926 s -DEBUG 06-24 20:32:16 [manager.py:391] Prefill Batch: batch_id=92238918883755976467360895836525042129, time:1750768336.9895656s req_ids:[8] -DEBUG 06-24 20:32:16 [manager.py:391] -DEBUG 06-24 20:32:16 [stats.py:37] Avg tokens(prompt+generate) throughput: 52013.707 tokens/s -DEBUG 06-24 20:32:16 [stats.py:37] Avg prompt tokens throughput: 52005.555 tokens/s -DEBUG 06-24 20:32:16 [stats.py:37] Avg generate tokens throughput: 8.153 tokens/s -ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:16 lightllm_req_id:8 first_token_cost:227.54955291748047ms total_cost_time:227.59366035461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12778 prompt_cache_len:5151 prompt_cache_ratio:0.4031147284395054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 -DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:17 [batch.py:51] router release req id 8 -INFO 06-24 20:32:17 [manager.py:224] router recive req id 8 cost time 0.10814809799194336 s -INFO 06-24 20:32:17 [manager.py:68] detokenization recv req id 8 cost time 0.11028528213500977 s -DEBUG 06-24 20:32:17 [manager.py:391] Prefill Batch: batch_id=70026461003083432815128745744006763962, time:1750768337.211016s req_ids:[8] -DEBUG 06-24 20:32:17 [manager.py:391] -ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:387.5298500061035ms total_cost_time:387.5718116760254ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12779 prompt_cache_len:5151 prompt_cache_ratio:0.4030831833476798 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 -DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:17 [batch.py:51] router release req id 8 -INFO 06-24 20:32:17 [manager.py:224] router recive req id 8 cost time 0.10745573043823242 s -INFO 06-24 20:32:17 [manager.py:68] detokenization recv req id 8 cost time 0.10946273803710938 s -DEBUG 06-24 20:32:17 [manager.py:391] Prefill Batch: batch_id=295443506750457855535545591393863372284, time:1750768337.6091065s req_ids:[8] -DEBUG 06-24 20:32:17 [manager.py:391] -ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:215.78550338745117ms total_cost_time:215.82794189453125ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12780 prompt_cache_len:5151 prompt_cache_ratio:0.40305164319248826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 -DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:17 [batch.py:51] router release req id 8 -INFO 06-24 20:32:17 [manager.py:224] router recive req id 8 cost time 0.10867762565612793 s -INFO 06-24 20:32:17 [manager.py:68] detokenization recv req id 8 cost time 0.11076164245605469 s -DEBUG 06-24 20:32:17 [manager.py:391] Prefill Batch: batch_id=324511124189727302939766653618015324208, time:1750768337.836053s req_ids:[8] -DEBUG 06-24 20:32:17 [manager.py:391] -ERROR 06-24 20:32:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:219.62523460388184ms total_cost_time:219.6676731109619ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12781 prompt_cache_len:5151 prompt_cache_ratio:0.4030201079727721 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 -DEBUG 06-24 20:32:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:17 [batch.py:51] router release req id 8 -INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.1081247329711914 s -INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.11008524894714355 s -DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=208218696670463765566349830066927277334, time:1750768338.065538s req_ids:[8] -DEBUG 06-24 20:32:18 [manager.py:391] -ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:17 lightllm_req_id:8 first_token_cost:222.43499755859375ms total_cost_time:222.47719764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12782 prompt_cache_len:5151 prompt_cache_ratio:0.4029885776873729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 -DEBUG 06-24 20:32:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:18 [batch.py:51] router release req id 8 -INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.10867571830749512 s -INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.11070632934570312 s -DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=311486201189688680242702860039443242795, time:1750768338.2893257s req_ids:[8] -DEBUG 06-24 20:32:18 [manager.py:391] -ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:215.0571346282959ms total_cost_time:215.10052680969238ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12783 prompt_cache_len:5151 prompt_cache_ratio:0.40295705233513257 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 -DEBUG 06-24 20:32:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:18 [batch.py:51] router release req id 8 -INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.10738801956176758 s -INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.10925602912902832 s -DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=329499031219073375638342460364528371985, time:1750768338.5110435s req_ids:[8] -DEBUG 06-24 20:32:18 [manager.py:391] -ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:213.90271186828613ms total_cost_time:213.9451503753662ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12784 prompt_cache_len:5151 prompt_cache_ratio:0.4029255319148936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 -DEBUG 06-24 20:32:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:18 [batch.py:51] router release req id 8 -INFO 06-24 20:32:18 [manager.py:224] router recive req id 8 cost time 0.10850882530212402 s -INFO 06-24 20:32:18 [manager.py:68] detokenization recv req id 8 cost time 0.10981488227844238 s -DEBUG 06-24 20:32:18 [manager.py:391] Prefill Batch: batch_id=48705036867626733063625460727736137529, time:1750768338.72973s req_ids:[8] -DEBUG 06-24 20:32:18 [manager.py:391] -ERROR 06-24 20:32:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:211.55047416687012ms total_cost_time:211.57526969909668ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:12785 prompt_cache_len:5151 prompt_cache_ratio:0.40289401642549866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 -DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:19 [batch.py:51] router release req id 8 -INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.31234264373779297 s -INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.3144843578338623 s -DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=159212714748014700149438768661185371868, time:1750768339.164549s req_ids:[8] -DEBUG 06-24 20:32:19 [manager.py:391] -ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:18 lightllm_req_id:8 first_token_cost:435.1987838745117ms total_cost_time:435.2433681488037ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12786 prompt_cache_len:5151 prompt_cache_ratio:0.4028625058657907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 -DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:19 [batch.py:51] router release req id 8 -INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.10950994491577148 s -INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.11173439025878906 s -DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=111431361868675696267553586846410028604, time:1750768339.3923173s req_ids:[8] -DEBUG 06-24 20:32:19 [manager.py:391] -ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:212.20660209655762ms total_cost_time:212.26239204406738ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:12787 prompt_cache_len:5151 prompt_cache_ratio:0.40283100023461327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 -DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:19 [batch.py:51] router release req id 8 -INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.10769772529602051 s -INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.1098330020904541 s -DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=194246409525248452922016314915737030391, time:1750768339.6082196s req_ids:[8] -DEBUG 06-24 20:32:19 [manager.py:391] -ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:211.80343627929688ms total_cost_time:211.84921264648438ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12788 prompt_cache_len:5151 prompt_cache_ratio:0.4027994995308101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 -DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:19 [batch.py:51] router release req id 8 -INFO 06-24 20:32:19 [manager.py:224] router recive req id 8 cost time 0.1082310676574707 s -INFO 06-24 20:32:19 [manager.py:68] detokenization recv req id 8 cost time 0.11014986038208008 s -DEBUG 06-24 20:32:19 [manager.py:391] Prefill Batch: batch_id=284192503015660560278757885151341090060, time:1750768339.827261s req_ids:[8] -DEBUG 06-24 20:32:19 [manager.py:391] -ERROR 06-24 20:32:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:214.86377716064453ms total_cost_time:214.90812301635742ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12789 prompt_cache_len:5151 prompt_cache_ratio:0.4027680037532254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 -DEBUG 06-24 20:32:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:19 [batch.py:51] router release req id 8 -INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.10914897918701172 s -INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.11113166809082031 s -DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=127466260538790170118749258526872474943, time:1750768340.0629764s req_ids:[8] -DEBUG 06-24 20:32:20 [manager.py:391] -ERROR 06-24 20:32:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:19 lightllm_req_id:8 first_token_cost:228.4541130065918ms total_cost_time:228.4998893737793ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12790 prompt_cache_len:5151 prompt_cache_ratio:0.40273651290070367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 -DEBUG 06-24 20:32:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:20 [batch.py:51] router release req id 8 -INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.1089167594909668 s -INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.1110074520111084 s -DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=151832180931281131249937561775725160298, time:1750768340.284597s req_ids:[8] -DEBUG 06-24 20:32:20 [manager.py:391] -ERROR 06-24 20:32:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 first_token_cost:207.4112892150879ms total_cost_time:207.4582576751709ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12791 prompt_cache_len:5151 prompt_cache_ratio:0.40270502697208976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 -DEBUG 06-24 20:32:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:20 [batch.py:51] router release req id 8 -INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.3107945919036865 s -INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.3128983974456787 s -DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=108364897012548086077736354537211088456, time:1750768340.7140446s req_ids:[8] -DEBUG 06-24 20:32:20 [manager.py:391] -ERROR 06-24 20:32:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 first_token_cost:431.35714530944824ms total_cost_time:431.40244483947754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12792 prompt_cache_len:5151 prompt_cache_ratio:0.4026735459662289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 -DEBUG 06-24 20:32:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:20 [batch.py:51] router release req id 8 -INFO 06-24 20:32:20 [manager.py:224] router recive req id 8 cost time 0.10651206970214844 s -INFO 06-24 20:32:20 [manager.py:68] detokenization recv req id 8 cost time 0.10758733749389648 s -DEBUG 06-24 20:32:20 [manager.py:391] Prefill Batch: batch_id=204333943733511921273916965767429681078, time:1750768340.9358287s req_ids:[8] -DEBUG 06-24 20:32:20 [manager.py:391] -ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:20 lightllm_req_id:8 first_token_cost:225.07429122924805ms total_cost_time:225.12006759643555ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12793 prompt_cache_len:5151 prompt_cache_ratio:0.4026420698819667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 -DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:21 [batch.py:51] router release req id 8 -INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10753965377807617 s -INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.10942721366882324 s -DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=42797764042740796117643801712542906408, time:1750768341.1766133s req_ids:[8] -DEBUG 06-24 20:32:21 [manager.py:391] -ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:219.31147575378418ms total_cost_time:219.35725212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12794 prompt_cache_len:5151 prompt_cache_ratio:0.40261059871814914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 -DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:21 [batch.py:51] router release req id 8 -INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10905098915100098 s -INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.11093306541442871 s -DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=328045701581968338129284982651127126829, time:1750768341.3924623s req_ids:[8] -DEBUG 06-24 20:32:21 [manager.py:391] -ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:210.91079711914062ms total_cost_time:210.9549045562744ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12795 prompt_cache_len:5151 prompt_cache_ratio:0.40257913247362254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 -DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:21 [batch.py:51] router release req id 8 -INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10962700843811035 s -INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.11176085472106934 s -DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=61707017856669233914773224895676251842, time:1750768341.6102846s req_ids:[8] -DEBUG 06-24 20:32:21 [manager.py:391] -ERROR 06-24 20:32:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:213.9589786529541ms total_cost_time:214.0028476715088ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12796 prompt_cache_len:5151 prompt_cache_ratio:0.4025476711472335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 -DEBUG 06-24 20:32:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:21 [batch.py:51] router release req id 8 -INFO 06-24 20:32:21 [manager.py:224] router recive req id 8 cost time 0.10746359825134277 s -INFO 06-24 20:32:21 [manager.py:68] detokenization recv req id 8 cost time 0.10942864418029785 s -DEBUG 06-24 20:32:21 [manager.py:391] Prefill Batch: batch_id=201835959866627544271851294979554920142, time:1750768341.8303468s req_ids:[8] -DEBUG 06-24 20:32:21 [manager.py:391] -ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:21 lightllm_req_id:8 first_token_cost:384.34576988220215ms total_cost_time:384.39106941223145ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12797 prompt_cache_len:5151 prompt_cache_ratio:0.40251621473782917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 -DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:22 [batch.py:51] router release req id 8 -INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10910773277282715 s -INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.11111783981323242 s -DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=309439357140097915563450789873870541843, time:1750768342.219602s req_ids:[8] -DEBUG 06-24 20:32:22 [manager.py:391] -ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:217.35477447509766ms total_cost_time:217.39935874938965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12798 prompt_cache_len:5151 prompt_cache_ratio:0.4024847632442569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 -DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:22 [batch.py:51] router release req id 8 -INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10877704620361328 s -INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.11014509201049805 s -DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=258450698529744897491057281918362912498, time:1750768342.4471378s req_ids:[8] -DEBUG 06-24 20:32:22 [manager.py:391] -ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:219.39373016357422ms total_cost_time:219.4385528564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12799 prompt_cache_len:5151 prompt_cache_ratio:0.4024533166653645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 -DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:22 [batch.py:51] router release req id 8 -INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10830879211425781 s -INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.11026620864868164 s -DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=120108118922872144417573194134995661286, time:1750768342.6682813s req_ids:[8] -DEBUG 06-24 20:32:22 [manager.py:391] -ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:217.16666221618652ms total_cost_time:217.2107696533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12800 prompt_cache_len:5151 prompt_cache_ratio:0.402421875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 -DEBUG 06-24 20:32:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:22 [batch.py:51] router release req id 8 -INFO 06-24 20:32:22 [manager.py:224] router recive req id 8 cost time 0.10784721374511719 s -INFO 06-24 20:32:22 [manager.py:68] detokenization recv req id 8 cost time 0.10982584953308105 s -DEBUG 06-24 20:32:22 [manager.py:391] Prefill Batch: batch_id=233249292378408635499826681981010087644, time:1750768342.8900845s req_ids:[8] -DEBUG 06-24 20:32:22 [manager.py:391] -ERROR 06-24 20:32:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:214.83397483825684ms total_cost_time:214.87665176391602ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12801 prompt_cache_len:5151 prompt_cache_ratio:0.40239043824701193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 -DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:23 [batch.py:51] router release req id 8 -INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.10773897171020508 s -INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.10967040061950684 s -DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=261649399595077689397919047617684974238, time:1750768343.1113343s req_ids:[8] -DEBUG 06-24 20:32:23 [manager.py:391] -ERROR 06-24 20:32:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:22 lightllm_req_id:8 first_token_cost:214.76054191589355ms total_cost_time:214.81585502624512ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:12802 prompt_cache_len:5151 prompt_cache_ratio:0.4023590064052492 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:32:23 [statics_utils.py:24] mean first cost: 229.96201950390488 ms -INFO 06-24 20:32:23 [statics_utils.py:24] mean per token cost: 0.06079721034521612 ms -INFO 06-24 20:32:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 -DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:23 [batch.py:51] router release req id 8 -INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.1087183952331543 s -INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.11055111885070801 s -DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=220762102063564505773762618658087183255, time:1750768343.3374286s req_ids:[8] -DEBUG 06-24 20:32:23 [manager.py:391] -ERROR 06-24 20:32:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 first_token_cost:382.89499282836914ms total_cost_time:382.94100761413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12803 prompt_cache_len:5151 prompt_cache_ratio:0.4023275794735609 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 -DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:23 [batch.py:51] router release req id 8 -INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s -INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.11112642288208008 s -DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=207213081614142952356581731114474849137, time:1750768343.7220483s req_ids:[8] -DEBUG 06-24 20:32:23 [manager.py:391] -DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 first_token_cost:215.79575538635254ms total_cost_time:215.83962440490723ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12804 prompt_cache_len:5151 prompt_cache_ratio:0.4022961574507966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 -DEBUG 06-24 20:32:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:23 [batch.py:51] router release req id 8 -INFO 06-24 20:32:23 [manager.py:224] router recive req id 8 cost time 0.10904908180236816 s -INFO 06-24 20:32:23 [manager.py:68] detokenization recv req id 8 cost time 0.11117434501647949 s -DEBUG 06-24 20:32:23 [manager.py:391] Prefill Batch: batch_id=232041908002191756669560038202451650967, time:1750768343.9456053s req_ids:[8] -DEBUG 06-24 20:32:23 [manager.py:391] -ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:23 lightllm_req_id:8 first_token_cost:213.547945022583ms total_cost_time:213.60445022583008ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:12805 prompt_cache_len:5151 prompt_cache_ratio:0.40226474033580634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 -DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:24 [batch.py:51] router release req id 8 -INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.10915279388427734 s -INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.11107730865478516 s -DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=139253665954856950943252893366113079795, time:1750768344.1681037s req_ids:[8] -DEBUG 06-24 20:32:24 [manager.py:391] -ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:213.4392261505127ms total_cost_time:213.48261833190918ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12806 prompt_cache_len:5151 prompt_cache_ratio:0.4022333281274403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 -DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:24 [batch.py:51] router release req id 8 -INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.1083381175994873 s -INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.11036324501037598 s -DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=154131676121033394070039496937921853843, time:1750768344.3862667s req_ids:[8] -DEBUG 06-24 20:32:24 [manager.py:391] -ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:213.9742374420166ms total_cost_time:214.0340805053711ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:12807 prompt_cache_len:5151 prompt_cache_ratio:0.4022019208245491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 -DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:24 [batch.py:51] router release req id 8 -INFO 06-24 20:32:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.10805368423461914 s -INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.11002755165100098 s -DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=252543143952675174779650403032820989574, time:1750768344.6063507s req_ids:[8] -DEBUG 06-24 20:32:24 [manager.py:391] -ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:211.80486679077148ms total_cost_time:211.8527889251709ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:12808 prompt_cache_len:5151 prompt_cache_ratio:0.4021705184259838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 -DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:24 [batch.py:51] router release req id 8 -INFO 06-24 20:32:24 [manager.py:224] router recive req id 8 cost time 0.10799980163574219 s -INFO 06-24 20:32:24 [manager.py:68] detokenization recv req id 8 cost time 0.10990190505981445 s -DEBUG 06-24 20:32:24 [manager.py:391] Prefill Batch: batch_id=182665450364871729514813012209934213229, time:1750768344.8253489s req_ids:[8] -DEBUG 06-24 20:32:24 [manager.py:391] -ERROR 06-24 20:32:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:212.1446132659912ms total_cost_time:212.2058868408203ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12809 prompt_cache_len:5151 prompt_cache_ratio:0.4021391209305957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 -DEBUG 06-24 20:32:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:24 [batch.py:51] router release req id 8 -INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10751056671142578 s -INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.10953545570373535 s -DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=22278380069274031271146944003692133475, time:1750768345.0440357s req_ids:[8] -DEBUG 06-24 20:32:25 [manager.py:391] -ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:24 lightllm_req_id:8 first_token_cost:384.6297264099121ms total_cost_time:384.6893310546875ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:12810 prompt_cache_len:5151 prompt_cache_ratio:0.4021077283372365 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 -DEBUG 06-24 20:32:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:25 [batch.py:51] router release req id 8 -INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10908889770507812 s -INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.1111903190612793 s -DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=52413436420511972420132877844630001755, time:1750768345.4363904s req_ids:[8] -DEBUG 06-24 20:32:25 [manager.py:391] -ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:214.27512168884277ms total_cost_time:214.35999870300293ms,out_token_counter:1 mean_per_token_cost_time: 0.08487701416015625ms prompt_token_num:12811 prompt_cache_len:5151 prompt_cache_ratio:0.4020763406447584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 -DEBUG 06-24 20:32:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:25 [batch.py:51] router release req id 8 -INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10875177383422852 s -INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.11091399192810059 s -DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=185923119406753741125043494090825499997, time:1750768345.657525s req_ids:[8] -DEBUG 06-24 20:32:25 [manager.py:391] -ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:212.19229698181152ms total_cost_time:212.23855018615723ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12812 prompt_cache_len:5151 prompt_cache_ratio:0.4020449578520137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 -DEBUG 06-24 20:32:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:25 [batch.py:51] router release req id 8 -INFO 06-24 20:32:25 [manager.py:224] router recive req id 8 cost time 0.10768270492553711 s -INFO 06-24 20:32:25 [manager.py:68] detokenization recv req id 8 cost time 0.10972452163696289 s -DEBUG 06-24 20:32:25 [manager.py:391] Prefill Batch: batch_id=51621591619419015320682010400385468415, time:1750768345.8824012s req_ids:[8] -DEBUG 06-24 20:32:25 [manager.py:391] -ERROR 06-24 20:32:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:219.10548210144043ms total_cost_time:219.16747093200684ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:12813 prompt_cache_len:5151 prompt_cache_ratio:0.4020135799578553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 -DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:26 [batch.py:51] router release req id 8 -INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.1087028980255127 s -INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.11064839363098145 s -DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=116700071872371136966012630125162770949, time:1750768346.1244106s req_ids:[8] -DEBUG 06-24 20:32:26 [manager.py:391] -ERROR 06-24 20:32:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:25 lightllm_req_id:8 first_token_cost:218.65296363830566ms total_cost_time:218.71376037597656ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:12814 prompt_cache_len:5151 prompt_cache_ratio:0.40198220696113623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 -DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:26 [batch.py:51] router release req id 8 -INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.10600066184997559 s -INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.10791897773742676 s -DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=323315318061765618275797393108528312537, time:1750768346.3353205s req_ids:[8] -DEBUG 06-24 20:32:26 [manager.py:391] -ERROR 06-24 20:32:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 first_token_cost:212.77427673339844ms total_cost_time:212.7974033355713ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:12815 prompt_cache_len:5151 prompt_cache_ratio:0.4019508388607101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 -DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:26 [batch.py:51] router release req id 8 -INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.10596847534179688 s -INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.10793757438659668 s -DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=85429584842177536384746230305496777384, time:1750768346.5514941s req_ids:[8] -DEBUG 06-24 20:32:26 [manager.py:391] -ERROR 06-24 20:32:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 first_token_cost:209.65051651000977ms total_cost_time:209.67841148376465ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12816 prompt_cache_len:5151 prompt_cache_ratio:0.40191947565543074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 -DEBUG 06-24 20:32:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:26 [batch.py:51] router release req id 8 -INFO 06-24 20:32:26 [manager.py:224] router recive req id 8 cost time 0.10503530502319336 s -INFO 06-24 20:32:26 [manager.py:68] detokenization recv req id 8 cost time 0.1069948673248291 s -DEBUG 06-24 20:32:26 [manager.py:391] Prefill Batch: batch_id=106829987430773303476404453014302677974, time:1750768346.7673137s req_ids:[8] -DEBUG 06-24 20:32:26 [manager.py:391] -ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:32:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 49701.467 tokens/s -DEBUG 06-24 20:32:27 [stats.py:37] Avg prompt tokens throughput: 49693.602 tokens/s -DEBUG 06-24 20:32:27 [stats.py:37] Avg generate tokens throughput: 7.865 tokens/s -INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:26 lightllm_req_id:8 first_token_cost:384.48143005371094ms total_cost_time:384.51075553894043ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:12817 prompt_cache_len:5151 prompt_cache_ratio:0.4018881173441523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 -DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:27 [batch.py:51] router release req id 8 -INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.1062917709350586 s -INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10868597030639648 s -DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=60852611275995568764725270875111446399, time:1750768347.1574504s req_ids:[8] -DEBUG 06-24 20:32:27 [manager.py:391] -ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:209.46764945983887ms total_cost_time:209.49506759643555ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:12818 prompt_cache_len:5151 prompt_cache_ratio:0.40185676392572944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 -DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:27 [batch.py:51] router release req id 8 -INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.10590505599975586 s -INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10783791542053223 s -DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=325046754993295761874566672783804791916, time:1750768347.3719337s req_ids:[8] -DEBUG 06-24 20:32:27 [manager.py:391] -ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:214.76078033447266ms total_cost_time:214.78891372680664ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:12819 prompt_cache_len:5151 prompt_cache_ratio:0.40182541539901706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 -DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:27 [batch.py:51] router release req id 8 -INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.1060018539428711 s -INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10794997215270996 s -DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=18280646961843405436071686861838592063, time:1750768347.5939212s req_ids:[8] -DEBUG 06-24 20:32:27 [manager.py:391] -ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:217.12923049926758ms total_cost_time:217.15712547302246ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12820 prompt_cache_len:5151 prompt_cache_ratio:0.4017940717628705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 -DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:27 [batch.py:51] router release req id 8 -INFO 06-24 20:32:27 [manager.py:224] router recive req id 8 cost time 0.10833215713500977 s -INFO 06-24 20:32:27 [manager.py:68] detokenization recv req id 8 cost time 0.10991477966308594 s -DEBUG 06-24 20:32:27 [manager.py:391] Prefill Batch: batch_id=121048559797060459162136081350489732168, time:1750768347.8176973s req_ids:[8] -DEBUG 06-24 20:32:27 [manager.py:391] -ERROR 06-24 20:32:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:216.59088134765625ms total_cost_time:216.61925315856934ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:12821 prompt_cache_len:5151 prompt_cache_ratio:0.4017627330161454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 -DEBUG 06-24 20:32:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:27 [batch.py:51] router release req id 8 -INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.10641789436340332 s -INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.1089928150177002 s -DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=1855781636601015295194052444246726828, time:1750768348.0386903s req_ids:[8] -DEBUG 06-24 20:32:28 [manager.py:391] -ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:27 lightllm_req_id:8 first_token_cost:220.97086906433105ms total_cost_time:221.0216522216797ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12822 prompt_cache_len:5151 prompt_cache_ratio:0.40173139915769773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 -DEBUG 06-24 20:32:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:28 [batch.py:51] router release req id 8 -INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.1059560775756836 s -INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.10825109481811523 s -DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=205165376926511123211390081320287160873, time:1750768348.2758608s req_ids:[8] -DEBUG 06-24 20:32:28 [manager.py:391] -ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:392.5633430480957ms total_cost_time:392.5907611846924ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:12823 prompt_cache_len:5151 prompt_cache_ratio:0.4017000701863838 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 -DEBUG 06-24 20:32:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:28 [batch.py:51] router release req id 8 -INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.10495448112487793 s -INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.10694241523742676 s -DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=229815914310115252756232731464624936503, time:1750768348.666822s req_ids:[8] -DEBUG 06-24 20:32:28 [manager.py:391] -ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:213.9737606048584ms total_cost_time:213.99998664855957ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:12824 prompt_cache_len:5151 prompt_cache_ratio:0.4016687461010605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 -DEBUG 06-24 20:32:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:28 [batch.py:51] router release req id 8 -INFO 06-24 20:32:28 [manager.py:224] router recive req id 8 cost time 0.10550355911254883 s -INFO 06-24 20:32:28 [manager.py:68] detokenization recv req id 8 cost time 0.10754776000976562 s -DEBUG 06-24 20:32:28 [manager.py:391] Prefill Batch: batch_id=323249475443310318246273870519909685408, time:1750768348.8848746s req_ids:[8] -DEBUG 06-24 20:32:28 [manager.py:391] -ERROR 06-24 20:32:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:208.16707611083984ms total_cost_time:208.19497108459473ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:12825 prompt_cache_len:5151 prompt_cache_ratio:0.4016374269005848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 -DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:29 [batch.py:51] router release req id 8 -INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10579061508178711 s -INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.10776209831237793 s -DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=124279971834469214705527431671093702337, time:1750768349.1004765s req_ids:[8] -DEBUG 06-24 20:32:29 [manager.py:391] -ERROR 06-24 20:32:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:28 lightllm_req_id:8 first_token_cost:212.60809898376465ms total_cost_time:212.63408660888672ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12826 prompt_cache_len:5151 prompt_cache_ratio:0.4016061125838141 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 -DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:29 [batch.py:51] router release req id 8 -INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10468149185180664 s -INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.1066446304321289 s -DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=158454302600614695516576208733900572756, time:1750768349.3182795s req_ids:[8] -DEBUG 06-24 20:32:29 [manager.py:391] -ERROR 06-24 20:32:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 first_token_cost:210.19411087036133ms total_cost_time:210.21795272827148ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:12827 prompt_cache_len:5151 prompt_cache_ratio:0.4015748031496063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 -DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:29 [batch.py:51] router release req id 8 -INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10579085350036621 s -INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.10778498649597168 s -DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=69985633649925630898216712388672116838, time:1750768349.534239s req_ids:[8] -DEBUG 06-24 20:32:29 [manager.py:391] -ERROR 06-24 20:32:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 first_token_cost:216.51768684387207ms total_cost_time:216.54367446899414ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:12828 prompt_cache_len:5151 prompt_cache_ratio:0.4015434985968195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 -DEBUG 06-24 20:32:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:29 [batch.py:51] router release req id 8 -INFO 06-24 20:32:29 [manager.py:224] router recive req id 8 cost time 0.10470151901245117 s -INFO 06-24 20:32:29 [manager.py:68] detokenization recv req id 8 cost time 0.10597085952758789 s -DEBUG 06-24 20:32:29 [manager.py:391] Prefill Batch: batch_id=96722282633699076179335580399581259859, time:1750768349.7584927s req_ids:[8] -DEBUG 06-24 20:32:29 [manager.py:391] -DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:29 lightllm_req_id:8 first_token_cost:392.1701908111572ms total_cost_time:392.22168922424316ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:12829 prompt_cache_len:5151 prompt_cache_ratio:0.4015121989243121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 -DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:30 [batch.py:51] router release req id 8 -INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10770988464355469 s -INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10968399047851562 s -DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=269867954577598169529203811237201654584, time:1750768350.1550257s req_ids:[8] -DEBUG 06-24 20:32:30 [manager.py:391] -ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:214.87879753112793ms total_cost_time:214.92385864257812ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12830 prompt_cache_len:5151 prompt_cache_ratio:0.4014809041309431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 -DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:30 [batch.py:51] router release req id 8 -INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10766744613647461 s -INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10971832275390625 s -DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=51924593423859848575283771764438393108, time:1750768350.3820007s req_ids:[8] -DEBUG 06-24 20:32:30 [manager.py:391] -ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:215.59429168701172ms total_cost_time:215.6367301940918ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12831 prompt_cache_len:5151 prompt_cache_ratio:0.4014496142155717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 -DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:30 [batch.py:51] router release req id 8 -INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10666179656982422 s -INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10852217674255371 s -DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=225141957081174373561396471629176397583, time:1750768350.596503s req_ids:[8] -DEBUG 06-24 20:32:30 [manager.py:391] -ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:201.27415657043457ms total_cost_time:201.31754875183105ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12832 prompt_cache_len:5151 prompt_cache_ratio:0.40141832917705733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 -DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:30 [batch.py:51] router release req id 8 -INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10810518264770508 s -INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10918188095092773 s -DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=81172162782348690502033709073791759569, time:1750768350.8040118s req_ids:[8] -DEBUG 06-24 20:32:30 [manager.py:391] -ERROR 06-24 20:32:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:168.73526573181152ms total_cost_time:168.7788963317871ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12833 prompt_cache_len:5151 prompt_cache_ratio:0.4013870490142601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 -DEBUG 06-24 20:32:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:30 [batch.py:51] router release req id 8 -INFO 06-24 20:32:30 [manager.py:224] router recive req id 8 cost time 0.10773658752441406 s -INFO 06-24 20:32:30 [manager.py:68] detokenization recv req id 8 cost time 0.10981345176696777 s -DEBUG 06-24 20:32:30 [manager.py:391] Prefill Batch: batch_id=99970022450980879317253139983375404637, time:1750768350.9786732s req_ids:[8] -DEBUG 06-24 20:32:30 [manager.py:391] -ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:30 lightllm_req_id:8 first_token_cost:205.60121536254883ms total_cost_time:205.64675331115723ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12834 prompt_cache_len:5151 prompt_cache_ratio:0.4013557737260402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 -DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:31 [batch.py:51] router release req id 8 -INFO 06-24 20:32:31 [manager.py:224] router recive req id 8 cost time 0.10929512977600098 s -INFO 06-24 20:32:31 [manager.py:68] detokenization recv req id 8 cost time 0.1114037036895752 s -DEBUG 06-24 20:32:31 [manager.py:391] Prefill Batch: batch_id=32004448131269710751797711300987736398, time:1750768351.190006s req_ids:[8] -DEBUG 06-24 20:32:31 [manager.py:391] -ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:213.83428573608398ms total_cost_time:213.87863159179688ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12835 prompt_cache_len:5151 prompt_cache_ratio:0.4013245033112583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 -DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:31 [batch.py:51] router release req id 8 -INFO 06-24 20:32:31 [manager.py:224] router recive req id 8 cost time 0.1070249080657959 s -INFO 06-24 20:32:31 [manager.py:68] detokenization recv req id 8 cost time 0.10907340049743652 s -DEBUG 06-24 20:32:31 [manager.py:391] Prefill Batch: batch_id=283151357426904627881108396021815816169, time:1750768351.4141943s req_ids:[8] -DEBUG 06-24 20:32:31 [manager.py:391] -ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:390.86103439331055ms total_cost_time:390.90538024902344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12836 prompt_cache_len:5151 prompt_cache_ratio:0.40129323776877535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 -DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:31 [batch.py:51] router release req id 8 -INFO 06-24 20:32:31 [manager.py:224] router recive req id 8 cost time 0.1079108715057373 s -INFO 06-24 20:32:31 [manager.py:68] detokenization recv req id 8 cost time 0.11127901077270508 s -DEBUG 06-24 20:32:31 [manager.py:391] Prefill Batch: batch_id=300127389828582603377476130744930342262, time:1750768351.8080432s req_ids:[8] -DEBUG 06-24 20:32:31 [manager.py:391] -ERROR 06-24 20:32:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:206.12192153930664ms total_cost_time:206.16936683654785ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:12837 prompt_cache_len:5151 prompt_cache_ratio:0.40126197709745265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 -DEBUG 06-24 20:32:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:31 [batch.py:51] router release req id 8 -INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.11073994636535645 s -INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11298894882202148 s -DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=148757289989635794743963032541845447157, time:1750768352.0207357s req_ids:[8] -DEBUG 06-24 20:32:32 [manager.py:391] -ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:31 lightllm_req_id:8 first_token_cost:213.6518955230713ms total_cost_time:213.69624137878418ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12838 prompt_cache_len:5151 prompt_cache_ratio:0.40123072129615206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 -DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:32 [batch.py:51] router release req id 8 -INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10794591903686523 s -INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11052513122558594 s -DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=52466047218876425747650416572726382506, time:1750768352.2389243s req_ids:[8] -DEBUG 06-24 20:32:32 [manager.py:391] -ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:210.65068244934082ms total_cost_time:210.6943130493164ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12839 prompt_cache_len:5151 prompt_cache_ratio:0.4011994703637355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 -DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:32 [batch.py:51] router release req id 8 -INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10871601104736328 s -INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11130928993225098 s -DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=179118223445953983735088294833345518804, time:1750768352.4682012s req_ids:[8] -DEBUG 06-24 20:32:32 [manager.py:391] -ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:224.31039810180664ms total_cost_time:224.35402870178223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12840 prompt_cache_len:5151 prompt_cache_ratio:0.4011682242990654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 -DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:32 [batch.py:51] router release req id 8 -INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10741686820983887 s -INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.10947537422180176 s -DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=117929470809742580163682387501187836222, time:1750768352.6891825s req_ids:[8] -DEBUG 06-24 20:32:32 [manager.py:391] -ERROR 06-24 20:32:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:212.88490295410156ms total_cost_time:212.92829513549805ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12841 prompt_cache_len:5151 prompt_cache_ratio:0.4011369831010046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 -DEBUG 06-24 20:32:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:32 [batch.py:51] router release req id 8 -INFO 06-24 20:32:32 [manager.py:224] router recive req id 8 cost time 0.10821056365966797 s -INFO 06-24 20:32:32 [manager.py:68] detokenization recv req id 8 cost time 0.11046361923217773 s -DEBUG 06-24 20:32:32 [manager.py:391] Prefill Batch: batch_id=305517591725713251219293156373953035491, time:1750768352.9065707s req_ids:[8] -DEBUG 06-24 20:32:32 [manager.py:391] -ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:32 lightllm_req_id:8 first_token_cost:386.5363597869873ms total_cost_time:386.5811824798584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12842 prompt_cache_len:5151 prompt_cache_ratio:0.40110574676841615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 -DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:33 [batch.py:51] router release req id 8 -INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10909628868103027 s -INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.11398792266845703 s -DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=21463708599813296329177016971466419749, time:1750768353.3013568s req_ids:[8] -DEBUG 06-24 20:32:33 [manager.py:391] -ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:214.71357345581055ms total_cost_time:214.75744247436523ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12843 prompt_cache_len:5151 prompt_cache_ratio:0.4010745153001635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 -DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:33 [batch.py:51] router release req id 8 -INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10853862762451172 s -INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.11077260971069336 s -DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=255980957766741912240983380346416192244, time:1750768353.521816s req_ids:[8] -DEBUG 06-24 20:32:33 [manager.py:391] -ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:209.71393585205078ms total_cost_time:209.75852012634277ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12844 prompt_cache_len:5151 prompt_cache_ratio:0.4010432886951106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 -DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:33 [batch.py:51] router release req id 8 -INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10802674293518066 s -INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.1127021312713623 s -DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=230539606338605201505586067668847838407, time:1750768353.7386837s req_ids:[8] -DEBUG 06-24 20:32:33 [manager.py:391] -ERROR 06-24 20:32:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:214.45488929748535ms total_cost_time:214.49637413024902ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12845 prompt_cache_len:5151 prompt_cache_ratio:0.40101206695212144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 -DEBUG 06-24 20:32:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:33 [batch.py:51] router release req id 8 -INFO 06-24 20:32:33 [manager.py:224] router recive req id 8 cost time 0.10855627059936523 s -INFO 06-24 20:32:33 [manager.py:68] detokenization recv req id 8 cost time 0.11071419715881348 s -DEBUG 06-24 20:32:33 [manager.py:391] Prefill Batch: batch_id=297686217749322132140258850763062125910, time:1750768353.9591126s req_ids:[8] -DEBUG 06-24 20:32:33 [manager.py:391] -ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:33 lightllm_req_id:8 first_token_cost:215.41118621826172ms total_cost_time:215.4562473297119ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12846 prompt_cache_len:5151 prompt_cache_ratio:0.40098085007006073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 -DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:34 [batch.py:51] router release req id 8 -INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10817337036132812 s -INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.11029601097106934 s -DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=73071458184082456371998146513213245308, time:1750768354.180707s req_ids:[8] -DEBUG 06-24 20:32:34 [manager.py:391] -ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:206.48765563964844ms total_cost_time:206.53223991394043ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12847 prompt_cache_len:5151 prompt_cache_ratio:0.40094963804779327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 -DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:34 [batch.py:51] router release req id 8 -INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10749936103820801 s -INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.10975885391235352 s -DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=322238633501542235499050005604107635690, time:1750768354.412689s req_ids:[8] -DEBUG 06-24 20:32:34 [manager.py:391] -ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:231.28461837768555ms total_cost_time:231.33158683776855ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12848 prompt_cache_len:5151 prompt_cache_ratio:0.40091843088418433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 -DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:34 [batch.py:51] router release req id 8 -INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10832643508911133 s -INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.1104285717010498 s -DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=66707610567167297021214850297496139154, time:1750768354.632361s req_ids:[8] -DEBUG 06-24 20:32:34 [manager.py:391] -ERROR 06-24 20:32:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:213.12880516052246ms total_cost_time:213.17410469055176ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12849 prompt_cache_len:5151 prompt_cache_ratio:0.40088722857809944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 -DEBUG 06-24 20:32:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:34 [batch.py:51] router release req id 8 -INFO 06-24 20:32:34 [manager.py:224] router recive req id 8 cost time 0.10971808433532715 s -INFO 06-24 20:32:34 [manager.py:68] detokenization recv req id 8 cost time 0.11165761947631836 s -DEBUG 06-24 20:32:34 [manager.py:391] Prefill Batch: batch_id=172344340825777313959862988425652517718, time:1750768354.866721s req_ids:[8] -DEBUG 06-24 20:32:34 [manager.py:391] -ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:34 lightllm_req_id:8 first_token_cost:408.0221652984619ms total_cost_time:408.0672264099121ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12850 prompt_cache_len:5151 prompt_cache_ratio:0.4008560311284047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 -DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:35 [batch.py:51] router release req id 8 -INFO 06-24 20:32:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.10753345489501953 s -INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.10903596878051758 s -DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=24114512684198741507322899376733584826, time:1750768355.2674847s req_ids:[8] -DEBUG 06-24 20:32:35 [manager.py:391] -ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:218.45555305480957ms total_cost_time:218.51134300231934ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:12851 prompt_cache_len:5151 prompt_cache_ratio:0.4008248385339662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 -DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:35 [batch.py:51] router release req id 8 -INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.1091165542602539 s -INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.1100466251373291 s -DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=20951157011009264319585329498179760093, time:1750768355.4906127s req_ids:[8] -DEBUG 06-24 20:32:35 [manager.py:391] -ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:206.68959617614746ms total_cost_time:206.73346519470215ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12852 prompt_cache_len:5151 prompt_cache_ratio:0.4007936507936508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 -DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:35 [batch.py:51] router release req id 8 -INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.10787820816040039 s -INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.10926938056945801 s -DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=281688663327349178006618024835234508891, time:1750768355.703515s req_ids:[8] -DEBUG 06-24 20:32:35 [manager.py:391] -ERROR 06-24 20:32:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:208.27817916870117ms total_cost_time:208.32085609436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12853 prompt_cache_len:5151 prompt_cache_ratio:0.40076246790632536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 -DEBUG 06-24 20:32:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:35 [batch.py:51] router release req id 8 -INFO 06-24 20:32:35 [manager.py:224] router recive req id 8 cost time 0.10770440101623535 s -INFO 06-24 20:32:35 [manager.py:68] detokenization recv req id 8 cost time 0.10861778259277344 s -DEBUG 06-24 20:32:35 [manager.py:391] Prefill Batch: batch_id=140180503040895202675983827454183508533, time:1750768355.918948s req_ids:[8] -DEBUG 06-24 20:32:35 [manager.py:391] -DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:35 lightllm_req_id:8 first_token_cost:212.14795112609863ms total_cost_time:212.1899127960205ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12854 prompt_cache_len:5151 prompt_cache_ratio:0.4007312898708573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 -DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:36 [batch.py:51] router release req id 8 -INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.1090993881225586 s -INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.11048603057861328 s -DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=132364740035108421660585809094607396762, time:1750768356.137186s req_ids:[8] -DEBUG 06-24 20:32:36 [manager.py:391] -ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:213.1035327911377ms total_cost_time:213.1481170654297ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12855 prompt_cache_len:5151 prompt_cache_ratio:0.4007001166861143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 -DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:36 [batch.py:51] router release req id 8 -INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.10780978202819824 s -INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.10890650749206543 s -DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=313254959861912950312924639355461554495, time:1750768356.3551052s req_ids:[8] -DEBUG 06-24 20:32:36 [manager.py:391] -ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:207.08274841308594ms total_cost_time:207.12685585021973ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12856 prompt_cache_len:5151 prompt_cache_ratio:0.4006689483509645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 -DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:36 [batch.py:51] router release req id 8 -INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.10749077796936035 s -INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.10964822769165039 s -DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=201170592212575184104236318754027238756, time:1750768356.579609s req_ids:[8] -DEBUG 06-24 20:32:36 [manager.py:391] -ERROR 06-24 20:32:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:387.5553607940674ms total_cost_time:387.59875297546387ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12857 prompt_cache_len:5151 prompt_cache_ratio:0.40063778486427626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 -DEBUG 06-24 20:32:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:36 [batch.py:51] router release req id 8 -INFO 06-24 20:32:36 [manager.py:224] router recive req id 8 cost time 0.1076345443725586 s -DEBUG 06-24 20:32:36 [manager.py:391] Prefill Batch: batch_id=267587396332165296556366436930486428690, time:1750768356.9590855s req_ids:[8] -DEBUG 06-24 20:32:36 [manager.py:391] -INFO 06-24 20:32:36 [manager.py:68] detokenization recv req id 8 cost time 0.10959696769714355 s -ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:36 lightllm_req_id:8 first_token_cost:184.27681922912598ms total_cost_time:184.32164192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12858 prompt_cache_len:5151 prompt_cache_ratio:0.4006066262249183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 -DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:37 [batch.py:51] router release req id 8 -INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10768437385559082 s -INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11065435409545898 s -DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=317361708165447729237624378287546029893, time:1750768357.1546652s req_ids:[8] -DEBUG 06-24 20:32:37 [manager.py:391] -DEBUG 06-24 20:32:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 53285.280 tokens/s -DEBUG 06-24 20:32:37 [stats.py:37] Avg prompt tokens throughput: 53277.079 tokens/s -DEBUG 06-24 20:32:37 [stats.py:37] Avg generate tokens throughput: 8.201 tokens/s -ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:203.06015014648438ms total_cost_time:203.10401916503906ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12859 prompt_cache_len:5151 prompt_cache_ratio:0.40057547243175984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 -DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:37 [batch.py:51] router release req id 8 -INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10829830169677734 s -INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11042118072509766 s -DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=221135693849931653459307928210180347072, time:1750768357.3669522s req_ids:[8] -DEBUG 06-24 20:32:37 [manager.py:391] -ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:212.2952938079834ms total_cost_time:212.33892440795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12860 prompt_cache_len:5151 prompt_cache_ratio:0.4005443234836703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 -DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:37 [batch.py:51] router release req id 8 -INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10875916481018066 s -INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s -DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=84627510649399654288109116419923012571, time:1750768357.5851233s req_ids:[8] -DEBUG 06-24 20:32:37 [manager.py:391] -ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:216.0179615020752ms total_cost_time:216.06087684631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12861 prompt_cache_len:5151 prompt_cache_ratio:0.40051317937951947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 -DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:37 [batch.py:51] router release req id 8 -INFO 06-24 20:32:37 [manager.py:224] router recive req id 8 cost time 0.10952162742614746 s -INFO 06-24 20:32:37 [manager.py:68] detokenization recv req id 8 cost time 0.11201071739196777 s -DEBUG 06-24 20:32:37 [manager.py:391] Prefill Batch: batch_id=193739161753955824429109779655684097894, time:1750768357.8063345s req_ids:[8] -DEBUG 06-24 20:32:37 [manager.py:391] -ERROR 06-24 20:32:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:214.82563018798828ms total_cost_time:214.84637260437012ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12862 prompt_cache_len:5151 prompt_cache_ratio:0.40048204011817756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 -DEBUG 06-24 20:32:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:37 [batch.py:51] router release req id 8 -INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.1060488224029541 s -INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.10817885398864746 s -DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=294507474154773209163826349722347690323, time:1750768358.0260487s req_ids:[8] -DEBUG 06-24 20:32:38 [manager.py:391] -ERROR 06-24 20:32:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:37 lightllm_req_id:8 first_token_cost:211.86518669128418ms total_cost_time:211.92026138305664ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:12863 prompt_cache_len:5151 prompt_cache_ratio:0.4004509056985151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 -DEBUG 06-24 20:32:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:38 [batch.py:51] router release req id 8 -INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.31224679946899414 s -INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.31427979469299316 s -DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=69663845363994372534660450683127471677, time:1750768358.453449s req_ids:[8] -DEBUG 06-24 20:32:38 [manager.py:391] -ERROR 06-24 20:32:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 first_token_cost:424.7438907623291ms total_cost_time:424.7884750366211ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12864 prompt_cache_len:5151 prompt_cache_ratio:0.400419776119403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 -DEBUG 06-24 20:32:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:38 [batch.py:51] router release req id 8 -INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.1078343391418457 s -INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.10979986190795898 s -DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=50468090068802323435457323556832460496, time:1750768358.67636s req_ids:[8] -DEBUG 06-24 20:32:38 [manager.py:391] -ERROR 06-24 20:32:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 first_token_cost:240.97490310668945ms total_cost_time:241.01758003234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12865 prompt_cache_len:5151 prompt_cache_ratio:0.4003886513797124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 -DEBUG 06-24 20:32:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:38 [batch.py:51] router release req id 8 -INFO 06-24 20:32:38 [manager.py:224] router recive req id 8 cost time 0.10743904113769531 s -INFO 06-24 20:32:38 [manager.py:68] detokenization recv req id 8 cost time 0.10946226119995117 s -DEBUG 06-24 20:32:38 [manager.py:391] Prefill Batch: batch_id=112603319654759421008126229734947131899, time:1750768358.9207656s req_ids:[8] -DEBUG 06-24 20:32:38 [manager.py:391] -ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:38 lightllm_req_id:8 first_token_cost:214.74933624267578ms total_cost_time:214.79392051696777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12866 prompt_cache_len:5151 prompt_cache_ratio:0.40035753147831493 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 -DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:39 [batch.py:51] router release req id 8 -INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10841608047485352 s -INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.11049556732177734 s -DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=136569176332481269141860780318482592512, time:1750768359.1404462s req_ids:[8] -DEBUG 06-24 20:32:39 [manager.py:391] -ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:200.50406455993652ms total_cost_time:200.5460262298584ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:12867 prompt_cache_len:5151 prompt_cache_ratio:0.40032641641408256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 -DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:39 [batch.py:51] router release req id 8 -INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10698699951171875 s -INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.1091761589050293 s -DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=220524089975317659450012147859027616597, time:1750768359.3482542s req_ids:[8] -DEBUG 06-24 20:32:39 [manager.py:391] -ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:206.6338062286377ms total_cost_time:206.6800594329834ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12868 prompt_cache_len:5151 prompt_cache_ratio:0.4002953061858875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 -DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:39 [batch.py:51] router release req id 8 -INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10837817192077637 s -INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.11041498184204102 s -DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=250980048724135682981607437037259836120, time:1750768359.559812s req_ids:[8] -DEBUG 06-24 20:32:39 [manager.py:391] -ERROR 06-24 20:32:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:214.1861915588379ms total_cost_time:214.2322063446045ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12869 prompt_cache_len:5151 prompt_cache_ratio:0.4002642007926024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 -DEBUG 06-24 20:32:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:39 [batch.py:51] router release req id 8 -INFO 06-24 20:32:39 [manager.py:224] router recive req id 8 cost time 0.10858821868896484 s -INFO 06-24 20:32:39 [manager.py:68] detokenization recv req id 8 cost time 0.11068415641784668 s -DEBUG 06-24 20:32:39 [manager.py:391] Prefill Batch: batch_id=113210083329043235087213525697258063742, time:1750768359.7780104s req_ids:[8] -DEBUG 06-24 20:32:39 [manager.py:391] -ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:39 lightllm_req_id:8 first_token_cost:380.8109760284424ms total_cost_time:380.85484504699707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12870 prompt_cache_len:5151 prompt_cache_ratio:0.4002331002331002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 -DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:40 [batch.py:51] router release req id 8 -INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10962200164794922 s -INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.11172628402709961 s -DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=181123670803528098680390489926329309777, time:1750768360.166057s req_ids:[8] -DEBUG 06-24 20:32:40 [manager.py:391] -ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:218.46938133239746ms total_cost_time:218.51372718811035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12871 prompt_cache_len:5151 prompt_cache_ratio:0.40020200450625437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 -DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:40 [batch.py:51] router release req id 8 -INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10950589179992676 s -INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.11142826080322266 s -DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=198681761813491091823117293309031244914, time:1750768360.3897166s req_ids:[8] -DEBUG 06-24 20:32:40 [manager.py:391] -ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:211.43293380737305ms total_cost_time:211.4572525024414ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:12872 prompt_cache_len:5151 prompt_cache_ratio:0.4001709136109385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 -DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:40 [batch.py:51] router release req id 8 -INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10689616203308105 s -INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.10873675346374512 s -DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=283541222669653329069910228622371455787, time:1750768360.6124766s req_ids:[8] -DEBUG 06-24 20:32:40 [manager.py:391] -ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:220.91364860534668ms total_cost_time:220.95870971679688ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12873 prompt_cache_len:5151 prompt_cache_ratio:0.40013982754602656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 -DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:40 [batch.py:51] router release req id 8 -INFO 06-24 20:32:40 [manager.py:224] router recive req id 8 cost time 0.10841035842895508 s -INFO 06-24 20:32:40 [manager.py:68] detokenization recv req id 8 cost time 0.11122560501098633 s -DEBUG 06-24 20:32:40 [manager.py:391] Prefill Batch: batch_id=145071811298234991619129478083565720384, time:1750768360.8372235s req_ids:[8] -DEBUG 06-24 20:32:40 [manager.py:391] -ERROR 06-24 20:32:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:209.9156379699707ms total_cost_time:209.9611759185791ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12874 prompt_cache_len:5151 prompt_cache_ratio:0.40010874631039306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 -DEBUG 06-24 20:32:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:40 [batch.py:51] router release req id 8 -INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.10780477523803711 s -INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.11000657081604004 s -DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=321941240107032266423440392027399899711, time:1750768361.0525863s req_ids:[8] -DEBUG 06-24 20:32:41 [manager.py:391] -ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:40 lightllm_req_id:8 first_token_cost:210.8466625213623ms total_cost_time:210.89744567871094ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:12875 prompt_cache_len:5151 prompt_cache_ratio:0.4000776699029126 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 -DEBUG 06-24 20:32:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:41 [batch.py:51] router release req id 8 -INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.3084535598754883 s -INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.310380220413208 s -DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=298019330997989248811193889006940246576, time:1750768361.4823272s req_ids:[8] -DEBUG 06-24 20:32:41 [manager.py:391] -ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 first_token_cost:423.27046394348145ms total_cost_time:423.31480979919434ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12876 prompt_cache_len:5151 prompt_cache_ratio:0.40004659832246037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 -DEBUG 06-24 20:32:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:41 [batch.py:51] router release req id 8 -INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.10851502418518066 s -INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.11046171188354492 s -DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=331478753374010836008816302858913734966, time:1750768361.698183s req_ids:[8] -DEBUG 06-24 20:32:41 [manager.py:391] -ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 first_token_cost:206.9108486175537ms total_cost_time:206.9559097290039ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12877 prompt_cache_len:5151 prompt_cache_ratio:0.4000155315679118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 -DEBUG 06-24 20:32:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:41 [batch.py:51] router release req id 8 -INFO 06-24 20:32:41 [manager.py:224] router recive req id 8 cost time 0.10992717742919922 s -INFO 06-24 20:32:41 [manager.py:68] detokenization recv req id 8 cost time 0.1118319034576416 s -DEBUG 06-24 20:32:41 [manager.py:391] Prefill Batch: batch_id=102542784665974863699446702836921896744, time:1750768361.9118366s req_ids:[8] -DEBUG 06-24 20:32:41 [manager.py:391] -ERROR 06-24 20:32:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:41 lightllm_req_id:8 first_token_cost:201.7052173614502ms total_cost_time:201.74884796142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12878 prompt_cache_len:5151 prompt_cache_ratio:0.39998446963814255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 -DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:42 [batch.py:51] router release req id 8 -INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.10749602317810059 s -INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.10966920852661133 s -DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=282410450932448245320614620816078469630, time:1750768362.1199281s req_ids:[8] -DEBUG 06-24 20:32:42 [manager.py:391] -ERROR 06-24 20:32:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:213.4227752685547ms total_cost_time:213.46592903137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12879 prompt_cache_len:5151 prompt_cache_ratio:0.3999534125320289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 -DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:42 [batch.py:51] router release req id 8 -INFO 06-24 20:32:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.10906648635864258 s -INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.11111640930175781 s -DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=199036199411911209867236474976652322521, time:1750768362.341671s req_ids:[8] -DEBUG 06-24 20:32:42 [manager.py:391] -ERROR 06-24 20:32:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:214.71428871154785ms total_cost_time:214.75672721862793ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12880 prompt_cache_len:5151 prompt_cache_ratio:0.3999223602484472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 -DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:42 [batch.py:51] router release req id 8 -INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.10857582092285156 s -INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.1105337142944336 s -DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=275838052459313704283131316678155164157, time:1750768362.5621865s req_ids:[8] -DEBUG 06-24 20:32:42 [manager.py:391] -ERROR 06-24 20:32:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:382.5969696044922ms total_cost_time:382.6415538787842ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12881 prompt_cache_len:5151 prompt_cache_ratio:0.3998913127862744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 -DEBUG 06-24 20:32:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:42 [batch.py:51] router release req id 8 -INFO 06-24 20:32:42 [manager.py:224] router recive req id 8 cost time 0.1079108715057373 s -INFO 06-24 20:32:42 [manager.py:68] detokenization recv req id 8 cost time 0.10999608039855957 s -DEBUG 06-24 20:32:42 [manager.py:391] Prefill Batch: batch_id=22256443322693418432066122735929835393, time:1750768362.9515493s req_ids:[8] -DEBUG 06-24 20:32:42 [manager.py:391] -ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:42 lightllm_req_id:8 first_token_cost:211.7021083831787ms total_cost_time:211.7481231689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:12882 prompt_cache_len:5151 prompt_cache_ratio:0.3998602701443875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 -DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:43 [batch.py:51] router release req id 8 -INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.10921835899353027 s -INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.11117172241210938 s -DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=230719554820704628226513869340634270556, time:1750768363.1698744s req_ids:[8] -DEBUG 06-24 20:32:43 [manager.py:391] -ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:210.05868911743164ms total_cost_time:210.07966995239258ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12883 prompt_cache_len:5151 prompt_cache_ratio:0.39982923232166423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 -DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:43 [batch.py:51] router release req id 8 -INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.10666298866271973 s -INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.10866117477416992 s -DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=90847437594544539414446558071070232989, time:1750768363.3871963s req_ids:[8] -DEBUG 06-24 20:32:43 [manager.py:391] -ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:210.8767032623291ms total_cost_time:210.921049118042ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12884 prompt_cache_len:5151 prompt_cache_ratio:0.3997981993169823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 -DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:43 [batch.py:51] router release req id 8 -INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.1105499267578125 s -INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.11253046989440918 s -DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=32467274945931578438518827573826912993, time:1750768363.6036298s req_ids:[8] -DEBUG 06-24 20:32:43 [manager.py:391] -DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:215.55471420288086ms total_cost_time:215.59834480285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12885 prompt_cache_len:5151 prompt_cache_ratio:0.39976717112922 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 -DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:43 [batch.py:51] router release req id 8 -INFO 06-24 20:32:43 [manager.py:224] router recive req id 8 cost time 0.10849380493164062 s -INFO 06-24 20:32:43 [manager.py:68] detokenization recv req id 8 cost time 0.11041927337646484 s -DEBUG 06-24 20:32:43 [manager.py:391] Prefill Batch: batch_id=308356996812741203143846913955190949688, time:1750768363.823867s req_ids:[8] -DEBUG 06-24 20:32:43 [manager.py:391] -ERROR 06-24 20:32:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:211.81368827819824ms total_cost_time:211.85803413391113ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12886 prompt_cache_len:5151 prompt_cache_ratio:0.3997361477572559 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 -DEBUG 06-24 20:32:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:43 [batch.py:51] router release req id 8 -INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.10862445831298828 s -INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11229467391967773 s -DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=276163978058958580826991167271763283608, time:1750768364.0441546s req_ids:[8] -DEBUG 06-24 20:32:44 [manager.py:391] -ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:43 lightllm_req_id:8 first_token_cost:394.36936378479004ms total_cost_time:394.41370964050293ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12887 prompt_cache_len:5151 prompt_cache_ratio:0.399705129199969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 -DEBUG 06-24 20:32:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:44 [batch.py:51] router release req id 8 -INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.10909414291381836 s -INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11101150512695312 s -DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=286248189132336773998456302015094183435, time:1750768364.4447024s req_ids:[8] -DEBUG 06-24 20:32:44 [manager.py:391] -ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:208.32109451293945ms total_cost_time:208.36734771728516ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12888 prompt_cache_len:5151 prompt_cache_ratio:0.3996741154562384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 -DEBUG 06-24 20:32:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:44 [batch.py:51] router release req id 8 -INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.108062744140625 s -INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11009478569030762 s -DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=120608706490100011489302634007617461074, time:1750768364.6587808s req_ids:[8] -DEBUG 06-24 20:32:44 [manager.py:391] -ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:208.88185501098633ms total_cost_time:208.92763137817383ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12889 prompt_cache_len:5151 prompt_cache_ratio:0.3996431065249437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 -DEBUG 06-24 20:32:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:44 [batch.py:51] router release req id 8 -INFO 06-24 20:32:44 [manager.py:224] router recive req id 8 cost time 0.10848712921142578 s -INFO 06-24 20:32:44 [manager.py:68] detokenization recv req id 8 cost time 0.11197185516357422 s -DEBUG 06-24 20:32:44 [manager.py:391] Prefill Batch: batch_id=174368984048093765196484983066368814397, time:1750768364.873221s req_ids:[8] -DEBUG 06-24 20:32:44 [manager.py:391] -ERROR 06-24 20:32:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:211.7295265197754ms total_cost_time:211.7753028869629ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12890 prompt_cache_len:5151 prompt_cache_ratio:0.3996121024049651 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 -DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:45 [batch.py:51] router release req id 8 -INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10772919654846191 s -INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.10962152481079102 s -DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=90987343539172249072712881182909984932, time:1750768365.0935338s req_ids:[8] -DEBUG 06-24 20:32:45 [manager.py:391] -ERROR 06-24 20:32:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:44 lightllm_req_id:8 first_token_cost:206.87270164489746ms total_cost_time:206.91609382629395ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12891 prompt_cache_len:5151 prompt_cache_ratio:0.3995811030951827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 -DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:45 [batch.py:51] router release req id 8 -INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10809063911437988 s -INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.11018228530883789 s -DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=65195424068177244853555115154158192858, time:1750768365.3068626s req_ids:[8] -DEBUG 06-24 20:32:45 [manager.py:391] -ERROR 06-24 20:32:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 first_token_cost:213.34147453308105ms total_cost_time:213.38820457458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12892 prompt_cache_len:5151 prompt_cache_ratio:0.3995501085944772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 -DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:45 [batch.py:51] router release req id 8 -INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10866522789001465 s -INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.11052775382995605 s -DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=305497047182717265234489123309083668228, time:1750768365.526419s req_ids:[8] -DEBUG 06-24 20:32:45 [manager.py:391] -ERROR 06-24 20:32:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 first_token_cost:207.24081993103027ms total_cost_time:207.28516578674316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12893 prompt_cache_len:5151 prompt_cache_ratio:0.39951911890172964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 -DEBUG 06-24 20:32:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:45 [batch.py:51] router release req id 8 -INFO 06-24 20:32:45 [manager.py:224] router recive req id 8 cost time 0.10850882530212402 s -INFO 06-24 20:32:45 [manager.py:68] detokenization recv req id 8 cost time 0.11052131652832031 s -DEBUG 06-24 20:32:45 [manager.py:391] Prefill Batch: batch_id=4336780487474613396020340644803398432, time:1750768365.7402627s req_ids:[8] -DEBUG 06-24 20:32:45 [manager.py:391] -ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:45 lightllm_req_id:8 first_token_cost:395.54548263549805ms total_cost_time:395.59197425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:12894 prompt_cache_len:5151 prompt_cache_ratio:0.39948813401582134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 -DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:46 [batch.py:51] router release req id 8 -INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10890579223632812 s -INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.11083579063415527 s -DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=318286494635991376652187361591447375189, time:1750768366.1429873s req_ids:[8] -DEBUG 06-24 20:32:46 [manager.py:391] -ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:207.26346969604492ms total_cost_time:207.30853080749512ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12895 prompt_cache_len:5151 prompt_cache_ratio:0.39945715393563397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 -DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:46 [batch.py:51] router release req id 8 -INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10693144798278809 s -INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.10884857177734375 s -DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=102168059512969490423572861380632814920, time:1750768366.3561654s req_ids:[8] -DEBUG 06-24 20:32:46 [manager.py:391] -ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:214.6005630493164ms total_cost_time:214.644193649292ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12896 prompt_cache_len:5151 prompt_cache_ratio:0.39942617866004965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 -DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:46 [batch.py:51] router release req id 8 -INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10921716690063477 s -DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=183830403990151759732814451883982339065, time:1750768366.5776844s req_ids:[8] -DEBUG 06-24 20:32:46 [manager.py:391] -INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.11645865440368652 s -ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:209.36131477355957ms total_cost_time:209.40709114074707ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12897 prompt_cache_len:5151 prompt_cache_ratio:0.3993952081879507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 -DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:46 [batch.py:51] router release req id 8 -INFO 06-24 20:32:46 [manager.py:224] router recive req id 8 cost time 0.10777068138122559 s -INFO 06-24 20:32:46 [manager.py:68] detokenization recv req id 8 cost time 0.10967278480529785 s -DEBUG 06-24 20:32:46 [manager.py:391] Prefill Batch: batch_id=270771977134166513888438164480173409807, time:1750768366.7956276s req_ids:[8] -DEBUG 06-24 20:32:46 [manager.py:391] -ERROR 06-24 20:32:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:209.1071605682373ms total_cost_time:209.1515064239502ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12898 prompt_cache_len:5151 prompt_cache_ratio:0.3993642425182199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 -DEBUG 06-24 20:32:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:46 [batch.py:51] router release req id 8 -INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.10794711112976074 s -INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.10983896255493164 s -DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=107155677279536174075203215241895240480, time:1750768367.010928s req_ids:[8] -DEBUG 06-24 20:32:47 [manager.py:391] -INFO 06-24 20:32:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:46 lightllm_req_id:8 first_token_cost:212.7225399017334ms total_cost_time:212.7671241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12899 prompt_cache_len:5151 prompt_cache_ratio:0.3993332816497403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 -DEBUG 06-24 20:32:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:47 [batch.py:51] router release req id 8 -INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.3092012405395508 s -INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.3113434314727783 s -DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=4449735080615531952315116875151216084, time:1750768367.4375908s req_ids:[8] -DEBUG 06-24 20:32:47 [manager.py:391] -DEBUG 06-24 20:32:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 51362.115 tokens/s -DEBUG 06-24 20:32:47 [stats.py:37] Avg prompt tokens throughput: 51354.140 tokens/s -DEBUG 06-24 20:32:47 [stats.py:37] Avg generate tokens throughput: 7.974 tokens/s -ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:427.80590057373047ms total_cost_time:427.8266429901123ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12900 prompt_cache_len:5151 prompt_cache_ratio:0.39930232558139533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 -DEBUG 06-24 20:32:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:47 [batch.py:51] router release req id 8 -INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.1089012622833252 s -INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.11086106300354004 s -DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=278809233310314894717978639873289973224, time:1750768367.6668477s req_ids:[8] -DEBUG 06-24 20:32:47 [manager.py:391] -ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:216.80474281311035ms total_cost_time:216.84861183166504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12901 prompt_cache_len:5151 prompt_cache_ratio:0.3992713743120688 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 -DEBUG 06-24 20:32:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:47 [batch.py:51] router release req id 8 -INFO 06-24 20:32:47 [manager.py:224] router recive req id 8 cost time 0.10848021507263184 s -INFO 06-24 20:32:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049652099609375 s -DEBUG 06-24 20:32:47 [manager.py:391] Prefill Batch: batch_id=255022828655958176521772538921390937047, time:1750768367.8861938s req_ids:[8] -DEBUG 06-24 20:32:47 [manager.py:391] -ERROR 06-24 20:32:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:211.38501167297363ms total_cost_time:211.42840385437012ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12902 prompt_cache_len:5151 prompt_cache_ratio:0.39924042784064484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 -DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:48 [batch.py:51] router release req id 8 -INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.1076362133026123 s -INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.10966610908508301 s -DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=138622804971475858270537276851374882567, time:1750768368.106162s req_ids:[8] -DEBUG 06-24 20:32:48 [manager.py:391] -ERROR 06-24 20:32:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:47 lightllm_req_id:8 first_token_cost:214.7347927093506ms total_cost_time:214.77842330932617ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:12903 prompt_cache_len:5151 prompt_cache_ratio:0.39920948616600793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 -DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:48 [batch.py:51] router release req id 8 -INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.10920238494873047 s -INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.11158370971679688 s -DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=265442365563723208441884661850743257614, time:1750768368.3259933s req_ids:[8] -DEBUG 06-24 20:32:48 [manager.py:391] -ERROR 06-24 20:32:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 first_token_cost:212.66651153564453ms total_cost_time:212.71061897277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12904 prompt_cache_len:5151 prompt_cache_ratio:0.39917854928704277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 -DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:48 [batch.py:51] router release req id 8 -INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.1089177131652832 s -INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.11079788208007812 s -DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=88527502631751861010253793164199997731, time:1750768368.5460155s req_ids:[8] -DEBUG 06-24 20:32:48 [manager.py:391] -ERROR 06-24 20:32:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 first_token_cost:210.20126342773438ms total_cost_time:210.26039123535156ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:12905 prompt_cache_len:5151 prompt_cache_ratio:0.39914761720263464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 -DEBUG 06-24 20:32:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:48 [batch.py:51] router release req id 8 -INFO 06-24 20:32:48 [manager.py:224] router recive req id 8 cost time 0.10890316963195801 s -INFO 06-24 20:32:48 [manager.py:68] detokenization recv req id 8 cost time 0.11097335815429688 s -DEBUG 06-24 20:32:48 [manager.py:391] Prefill Batch: batch_id=151918695996829108820912358381424799642, time:1750768368.765368s req_ids:[8] -DEBUG 06-24 20:32:48 [manager.py:391] -ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:48 lightllm_req_id:8 first_token_cost:376.9240379333496ms total_cost_time:376.9686222076416ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12906 prompt_cache_len:5151 prompt_cache_ratio:0.399116689911669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 -DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:49 [batch.py:51] router release req id 8 -INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.10538077354431152 s -INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.10674118995666504 s -DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=240501756469114548608930196231858067196, time:1750768369.1495357s req_ids:[8] -DEBUG 06-24 20:32:49 [manager.py:391] -ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:208.25934410095215ms total_cost_time:208.30249786376953ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12907 prompt_cache_len:5151 prompt_cache_ratio:0.3990857674130317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 -DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:49 [batch.py:51] router release req id 8 -INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.10882854461669922 s -INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s -DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=289259091463584744480427569167039779462, time:1750768369.3635895s req_ids:[8] -DEBUG 06-24 20:32:49 [manager.py:391] -ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:212.26787567138672ms total_cost_time:212.3126983642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12908 prompt_cache_len:5151 prompt_cache_ratio:0.39905484970560895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 -DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:49 [batch.py:51] router release req id 8 -INFO 06-24 20:32:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.10879850387573242 s -INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.11063718795776367 s -DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=191263003833809082584938256112185884779, time:1750768369.581702s req_ids:[8] -DEBUG 06-24 20:32:49 [manager.py:391] -ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:208.62245559692383ms total_cost_time:208.66632461547852ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12909 prompt_cache_len:5151 prompt_cache_ratio:0.39902393678828724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 -DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:49 [batch.py:51] router release req id 8 -INFO 06-24 20:32:49 [manager.py:224] router recive req id 8 cost time 0.1059110164642334 s -INFO 06-24 20:32:49 [manager.py:68] detokenization recv req id 8 cost time 0.10769486427307129 s -DEBUG 06-24 20:32:49 [manager.py:391] Prefill Batch: batch_id=81902795467379460400668537478844898701, time:1750768369.798326s req_ids:[8] -DEBUG 06-24 20:32:49 [manager.py:391] -ERROR 06-24 20:32:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:208.7695598602295ms total_cost_time:208.81390571594238ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12910 prompt_cache_len:5151 prompt_cache_ratio:0.3989930286599535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 -DEBUG 06-24 20:32:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:49 [batch.py:51] router release req id 8 -INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.10593724250793457 s -INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.10789942741394043 s -DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=225605605094680520105577733466074518118, time:1750768370.0149605s req_ids:[8] -DEBUG 06-24 20:32:50 [manager.py:391] -ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:49 lightllm_req_id:8 first_token_cost:210.85476875305176ms total_cost_time:210.8769416809082ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12911 prompt_cache_len:5151 prompt_cache_ratio:0.398962125319495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 -DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:50 [batch.py:51] router release req id 8 -INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.10284590721130371 s -INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.10476994514465332 s -DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=336877155230819017036831715916542790463, time:1750768370.2333694s req_ids:[8] -DEBUG 06-24 20:32:50 [manager.py:391] -ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:388.5455131530762ms total_cost_time:388.5791301727295ms,out_token_counter:1 mean_per_token_cost_time: 0.03361701965332031ms prompt_token_num:12912 prompt_cache_len:5151 prompt_cache_ratio:0.39893122676579923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 -DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:50 [batch.py:51] router release req id 8 -INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s -INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.10938620567321777 s -DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=87600417805480212545294058328893829647, time:1750768370.6271718s req_ids:[8] -DEBUG 06-24 20:32:50 [manager.py:391] -ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:214.64014053344727ms total_cost_time:214.6751880645752ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:12913 prompt_cache_len:5151 prompt_cache_ratio:0.3989003329977542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 -DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:50 [batch.py:51] router release req id 8 -INFO 06-24 20:32:50 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s -INFO 06-24 20:32:50 [manager.py:68] detokenization recv req id 8 cost time 0.11018824577331543 s -DEBUG 06-24 20:32:50 [manager.py:391] Prefill Batch: batch_id=277406554980552126789758474025967473542, time:1750768370.848689s req_ids:[8] -DEBUG 06-24 20:32:50 [manager.py:391] -ERROR 06-24 20:32:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:210.3433609008789ms total_cost_time:210.3874683380127ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12914 prompt_cache_len:5151 prompt_cache_ratio:0.3988694440142481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 -DEBUG 06-24 20:32:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:50 [batch.py:51] router release req id 8 -INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10624051094055176 s -INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.10821890830993652 s -DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=295752496235565729837873830849632338953, time:1750768371.0634687s req_ids:[8] -DEBUG 06-24 20:32:51 [manager.py:391] -ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:50 lightllm_req_id:8 first_token_cost:212.46886253356934ms total_cost_time:212.48745918273926ms,out_token_counter:1 mean_per_token_cost_time: 0.018596649169921875ms prompt_token_num:12915 prompt_cache_len:5151 prompt_cache_ratio:0.39883855981416955 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 -DEBUG 06-24 20:32:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:51 [batch.py:51] router release req id 8 -INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10485172271728516 s -INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.10684084892272949 s -DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=139122490742054125094407310767786719853, time:1750768371.2846644s req_ids:[8] -DEBUG 06-24 20:32:51 [manager.py:391] -DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:212.16392517089844ms total_cost_time:212.18490600585938ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:12916 prompt_cache_len:5151 prompt_cache_ratio:0.39880768039640757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 -DEBUG 06-24 20:32:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:51 [batch.py:51] router release req id 8 -INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10355997085571289 s -INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.10542845726013184 s -DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=211980806392362611982077090383202713887, time:1750768371.5021484s req_ids:[8] -DEBUG 06-24 20:32:51 [manager.py:391] -ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:215.34252166748047ms total_cost_time:215.3620719909668ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:12917 prompt_cache_len:5151 prompt_cache_ratio:0.39877680575985136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 -DEBUG 06-24 20:32:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:51 [batch.py:51] router release req id 8 -INFO 06-24 20:32:51 [manager.py:224] router recive req id 8 cost time 0.10410189628601074 s -INFO 06-24 20:32:51 [manager.py:68] detokenization recv req id 8 cost time 0.1060483455657959 s -DEBUG 06-24 20:32:51 [manager.py:391] Prefill Batch: batch_id=250792530020623909775314750896499629115, time:1750768371.7227063s req_ids:[8] -DEBUG 06-24 20:32:51 [manager.py:391] -ERROR 06-24 20:32:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:214.48731422424316ms total_cost_time:214.5075798034668ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12918 prompt_cache_len:5151 prompt_cache_ratio:0.39874593590339064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 -DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:52 [batch.py:51] router release req id 8 -INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.3075847625732422 s -INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.30959534645080566 s -DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=334869094121541617796824846594791666891, time:1750768372.148581s req_ids:[8] -DEBUG 06-24 20:32:52 [manager.py:391] -ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:51 lightllm_req_id:8 first_token_cost:427.1965026855469ms total_cost_time:427.2170066833496ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:12919 prompt_cache_len:5151 prompt_cache_ratio:0.39871507082591534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 -DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:52 [batch.py:51] router release req id 8 -INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.1054689884185791 s -INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10736441612243652 s -DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=111085422146742684600645107663045020726, time:1750768372.3748178s req_ids:[8] -DEBUG 06-24 20:32:52 [manager.py:391] -ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:173.3572483062744ms total_cost_time:173.37799072265625ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:12920 prompt_cache_len:5151 prompt_cache_ratio:0.3986842105263158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 -DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:52 [batch.py:51] router release req id 8 -INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.10698652267456055 s -INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10899686813354492 s -DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=152295261477633392570277294100556548031, time:1750768372.5535424s req_ids:[8] -DEBUG 06-24 20:32:52 [manager.py:391] -ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:173.3717918395996ms total_cost_time:173.39181900024414ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:12921 prompt_cache_len:5151 prompt_cache_ratio:0.3986533550034827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 -DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:52 [batch.py:51] router release req id 8 -INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.10595130920410156 s -INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10768413543701172 s -DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=253917499531028528429339765760337125781, time:1750768372.7335112s req_ids:[8] -DEBUG 06-24 20:32:52 [manager.py:391] -DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:174.57032203674316ms total_cost_time:174.5917797088623ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:12922 prompt_cache_len:5151 prompt_cache_ratio:0.3986225042563071 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 -DEBUG 06-24 20:32:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:52 [batch.py:51] router release req id 8 -INFO 06-24 20:32:52 [manager.py:224] router recive req id 8 cost time 0.10709667205810547 s -INFO 06-24 20:32:52 [manager.py:68] detokenization recv req id 8 cost time 0.10889983177185059 s -DEBUG 06-24 20:32:52 [manager.py:391] Prefill Batch: batch_id=209494378238083153793980436523385802040, time:1750768372.9131565s req_ids:[8] -DEBUG 06-24 20:32:52 [manager.py:391] -ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:52 lightllm_req_id:8 first_token_cost:206.57896995544434ms total_cost_time:206.60066604614258ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:12923 prompt_cache_len:5151 prompt_cache_ratio:0.39859165828368026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 -DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:53 [batch.py:51] router release req id 8 -INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10350728034973145 s -INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.1045689582824707 s -DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=167142446393170371914534712628249459505, time:1750768373.125418s req_ids:[8] -DEBUG 06-24 20:32:53 [manager.py:391] -INFO 06-24 20:32:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:53 [statics_utils.py:24] mean first cost: 230.10195452533532 ms -INFO 06-24 20:32:53 [statics_utils.py:24] mean per token cost: 0.06051902061554882 ms -INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:210.7412815093994ms total_cost_time:210.76107025146484ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:12924 prompt_cache_len:5151 prompt_cache_ratio:0.398560817084494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 -DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:53 [batch.py:51] router release req id 8 -INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10338568687438965 s -INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.10541033744812012 s -INFO 06-24 20:32:53 [manager.py:620] left req id 8can release False refcount 3 -DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=232313256038154860684353765798282930665, time:1750768373.3404565s req_ids:[8] -DEBUG 06-24 20:32:53 [manager.py:391] -ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:379.55498695373535ms total_cost_time:379.5773983001709ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:12925 prompt_cache_len:5151 prompt_cache_ratio:0.3985299806576402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 -DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:53 [batch.py:51] router release req id 8 -INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10870623588562012 s -INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.11070680618286133 s -DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=102023534388167542808838116809032689536, time:1750768373.7236493s req_ids:[8] -DEBUG 06-24 20:32:53 [manager.py:391] -ERROR 06-24 20:32:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:210.65926551818848ms total_cost_time:210.70384979248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12926 prompt_cache_len:5151 prompt_cache_ratio:0.39849914900201144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 -DEBUG 06-24 20:32:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:53 [batch.py:51] router release req id 8 -INFO 06-24 20:32:53 [manager.py:224] router recive req id 8 cost time 0.10799312591552734 s -INFO 06-24 20:32:53 [manager.py:68] detokenization recv req id 8 cost time 0.1099390983581543 s -DEBUG 06-24 20:32:53 [manager.py:391] Prefill Batch: batch_id=198231021556372056825551028792748324331, time:1750768373.9406655s req_ids:[8] -DEBUG 06-24 20:32:53 [manager.py:391] -ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:53 lightllm_req_id:8 first_token_cost:209.95092391967773ms total_cost_time:209.99717712402344ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:12927 prompt_cache_len:5151 prompt_cache_ratio:0.39846832211650035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 -DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:54 [batch.py:51] router release req id 8 -INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.1080012321472168 s -INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.1099543571472168 s -DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=252959330953307922622628793593577537586, time:1750768374.1579487s req_ids:[8] -DEBUG 06-24 20:32:54 [manager.py:391] -ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:206.2668800354004ms total_cost_time:206.3119411468506ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12928 prompt_cache_len:5151 prompt_cache_ratio:0.3984375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 -DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:54 [batch.py:51] router release req id 8 -INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.10745596885681152 s -INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.10957574844360352 s -DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=215385111894681779173354941527320968885, time:1750768374.3754847s req_ids:[8] -DEBUG 06-24 20:32:54 [manager.py:391] -ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:216.2466049194336ms total_cost_time:216.3090705871582ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12929 prompt_cache_len:5151 prompt_cache_ratio:0.39840668265140383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 -DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:54 [batch.py:51] router release req id 8 -INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.10756969451904297 s -INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.10943865776062012 s -DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=137213551232169556753730797810202847305, time:1750768374.6011128s req_ids:[8] -DEBUG 06-24 20:32:54 [manager.py:391] -ERROR 06-24 20:32:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:220.14379501342773ms total_cost_time:220.18885612487793ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12930 prompt_cache_len:5151 prompt_cache_ratio:0.39837587006960556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 -DEBUG 06-24 20:32:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:54 [batch.py:51] router release req id 8 -INFO 06-24 20:32:54 [manager.py:224] router recive req id 8 cost time 0.10790014266967773 s -INFO 06-24 20:32:54 [manager.py:68] detokenization recv req id 8 cost time 0.10978245735168457 s -DEBUG 06-24 20:32:54 [manager.py:391] Prefill Batch: batch_id=131829167908095401455511671287105035918, time:1750768374.8198059s req_ids:[8] -DEBUG 06-24 20:32:54 [manager.py:391] -ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:54 lightllm_req_id:8 first_token_cost:385.7686519622803ms total_cost_time:385.831356048584ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:12931 prompt_cache_len:5151 prompt_cache_ratio:0.39834506225349936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 -DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:55 [batch.py:51] router release req id 8 -INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10795760154724121 s -INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.1098628044128418 s -DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=34983927202219311048215835846564490579, time:1750768375.2119613s req_ids:[8] -DEBUG 06-24 20:32:55 [manager.py:391] -ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:211.66467666625977ms total_cost_time:211.72595024108887ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:12932 prompt_cache_len:5151 prompt_cache_ratio:0.3983142592019796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 -DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:55 [batch.py:51] router release req id 8 -INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10688376426696777 s -INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.10880279541015625 s -DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=266107985150428319908529106817245812229, time:1750768375.4302175s req_ids:[8] -DEBUG 06-24 20:32:55 [manager.py:391] -ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:209.76519584655762ms total_cost_time:209.81097221374512ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12933 prompt_cache_len:5151 prompt_cache_ratio:0.39828346091394107 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 -DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:55 [batch.py:51] router release req id 8 -INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10880517959594727 s -INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.11077642440795898 s -DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=143878123286737151102680501531156791307, time:1750768375.6459522s req_ids:[8] -DEBUG 06-24 20:32:55 [manager.py:391] -ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:212.00275421142578ms total_cost_time:212.0652198791504ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:12934 prompt_cache_len:5151 prompt_cache_ratio:0.39825266738827897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 -DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:55 [batch.py:51] router release req id 8 -INFO 06-24 20:32:55 [manager.py:224] router recive req id 8 cost time 0.10914421081542969 s -INFO 06-24 20:32:55 [manager.py:68] detokenization recv req id 8 cost time 0.11117887496948242 s -DEBUG 06-24 20:32:55 [manager.py:391] Prefill Batch: batch_id=137753880152086523471022614387461806796, time:1750768375.865206s req_ids:[8] -DEBUG 06-24 20:32:55 [manager.py:391] -ERROR 06-24 20:32:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:212.6917839050293ms total_cost_time:212.73565292358398ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12935 prompt_cache_len:5151 prompt_cache_ratio:0.3982218786238887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 -DEBUG 06-24 20:32:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:55 [batch.py:51] router release req id 8 -INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.1081085205078125 s -INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.11012768745422363 s -DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=244844937907978017677062739196223549863, time:1750768376.083391s req_ids:[8] -DEBUG 06-24 20:32:56 [manager.py:391] -ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:55 lightllm_req_id:8 first_token_cost:210.02650260925293ms total_cost_time:210.07966995239258ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:12936 prompt_cache_len:5151 prompt_cache_ratio:0.39819109461966606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 -DEBUG 06-24 20:32:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:56 [batch.py:51] router release req id 8 -INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.1087191104888916 s -INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.1106417179107666 s -DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=182307547716615467233687530380405350048, time:1750768376.2997656s req_ids:[8] -DEBUG 06-24 20:32:56 [manager.py:391] -ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 first_token_cost:385.71834564208984ms total_cost_time:385.7598304748535ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:12937 prompt_cache_len:5151 prompt_cache_ratio:0.39816031537450725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 -DEBUG 06-24 20:32:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:56 [batch.py:51] router release req id 8 -INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.10820245742797852 s -INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.11004137992858887 s -DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=202315475991065934492193741994576217806, time:1750768376.6929696s req_ids:[8] -DEBUG 06-24 20:32:56 [manager.py:391] -ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 first_token_cost:206.3913345336914ms total_cost_time:206.4356803894043ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12938 prompt_cache_len:5151 prompt_cache_ratio:0.39812954088730873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 -DEBUG 06-24 20:32:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:56 [batch.py:51] router release req id 8 -INFO 06-24 20:32:56 [manager.py:224] router recive req id 8 cost time 0.10866022109985352 s -INFO 06-24 20:32:56 [manager.py:68] detokenization recv req id 8 cost time 0.11064338684082031 s -DEBUG 06-24 20:32:56 [manager.py:391] Prefill Batch: batch_id=10972275312272804779166538342415308415, time:1750768376.9053369s req_ids:[8] -DEBUG 06-24 20:32:56 [manager.py:391] -ERROR 06-24 20:32:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:56 lightllm_req_id:8 first_token_cost:207.59153366088867ms total_cost_time:207.65304565429688ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:12939 prompt_cache_len:5151 prompt_cache_ratio:0.3980987711569673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 -DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:57 [batch.py:51] router release req id 8 -INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10864067077636719 s -INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.11063742637634277 s -DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=70589526748832140139605609883509320737, time:1750768377.119355s req_ids:[8] -DEBUG 06-24 20:32:57 [manager.py:391] -ERROR 06-24 20:32:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:206.29501342773438ms total_cost_time:206.35390281677246ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:12940 prompt_cache_len:5151 prompt_cache_ratio:0.3980680061823802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 -DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:57 [batch.py:51] router release req id 8 -INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10773444175720215 s -INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.1130063533782959 s -DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=12656200728463244490874199830562743953, time:1750768377.3338144s req_ids:[8] -DEBUG 06-24 20:32:57 [manager.py:391] -ERROR 06-24 20:32:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:214.27655220031738ms total_cost_time:214.38980102539062ms,out_token_counter:1 mean_per_token_cost_time: 0.11324882507324219ms prompt_token_num:12941 prompt_cache_len:5151 prompt_cache_ratio:0.39803724596244494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 -DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:57 [batch.py:51] router release req id 8 -INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10749530792236328 s -INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.10951662063598633 s -DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=91290342568348502067954873437937129737, time:1750768377.550625s req_ids:[8] -DEBUG 06-24 20:32:57 [manager.py:391] -DEBUG 06-24 20:32:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 53671.841 tokens/s -DEBUG 06-24 20:32:57 [stats.py:37] Avg prompt tokens throughput: 53663.535 tokens/s -DEBUG 06-24 20:32:57 [stats.py:37] Avg generate tokens throughput: 8.306 tokens/s -ERROR 06-24 20:32:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:207.47637748718262ms total_cost_time:207.49855041503906ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:12942 prompt_cache_len:5151 prompt_cache_ratio:0.39800649049605935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 -DEBUG 06-24 20:32:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:57 [batch.py:51] router release req id 8 -INFO 06-24 20:32:57 [manager.py:224] router recive req id 8 cost time 0.10503959655761719 s -INFO 06-24 20:32:57 [manager.py:68] detokenization recv req id 8 cost time 0.10695695877075195 s -DEBUG 06-24 20:32:57 [manager.py:391] Prefill Batch: batch_id=62880931933828217955427990930578919891, time:1750768377.7679124s req_ids:[8] -DEBUG 06-24 20:32:57 [manager.py:391] -ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:57 lightllm_req_id:8 first_token_cost:388.3941173553467ms total_cost_time:388.41915130615234ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:12943 prompt_cache_len:5151 prompt_cache_ratio:0.3979757397821216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 -DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:58 [batch.py:51] router release req id 8 -INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10480260848999023 s -INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.10674834251403809 s -DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=315777156355411221314972682652609676588, time:1750768378.1624725s req_ids:[8] -DEBUG 06-24 20:32:58 [manager.py:391] -ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.65545082092285ms total_cost_time:210.68215370178223ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:12944 prompt_cache_len:5151 prompt_cache_ratio:0.3979449938195303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 -DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:58 [batch.py:51] router release req id 8 -INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10645747184753418 s -INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.10840487480163574 s -DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=61429348597905880185834841302515845396, time:1750768378.3758924s req_ids:[8] -DEBUG 06-24 20:32:58 [manager.py:391] -ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:207.7200412750244ms total_cost_time:207.77034759521484ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:12945 prompt_cache_len:5151 prompt_cache_ratio:0.3979142526071842 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 -DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:58 [batch.py:51] router release req id 8 -INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10840916633605957 s -INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.11051416397094727 s -DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=185715099761161567009697246668351167183, time:1750768378.589347s req_ids:[8] -DEBUG 06-24 20:32:58 [manager.py:391] -ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.88767051696777ms total_cost_time:210.93201637268066ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12946 prompt_cache_len:5151 prompt_cache_ratio:0.3978835161439827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 -DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:58 [batch.py:51] router release req id 8 -INFO 06-24 20:32:58 [manager.py:224] router recive req id 8 cost time 0.10898160934448242 s -INFO 06-24 20:32:58 [manager.py:68] detokenization recv req id 8 cost time 0.11129450798034668 s -DEBUG 06-24 20:32:58 [manager.py:391] Prefill Batch: batch_id=69182248952262198800592161266003002461, time:1750768378.8052497s req_ids:[8] -DEBUG 06-24 20:32:58 [manager.py:391] -DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:32:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:32:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.10804176330566ms total_cost_time:210.15214920043945ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12947 prompt_cache_len:5151 prompt_cache_ratio:0.3978527844288252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 -DEBUG 06-24 20:32:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:58 [batch.py:51] router release req id 8 -INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10782909393310547 s -INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.10965657234191895 s -DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=80442181240173388234094348598265733285, time:1750768379.0210974s req_ids:[8] -DEBUG 06-24 20:32:59 [manager.py:391] -ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:58 lightllm_req_id:8 first_token_cost:210.21223068237305ms total_cost_time:210.27326583862305ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:12948 prompt_cache_len:5151 prompt_cache_ratio:0.39782205746061167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 -DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:59 [batch.py:51] router release req id 8 -INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10777854919433594 s -INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.10961723327636719 s -DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=144308602032719838401247118046269452577, time:1750768379.237047s req_ids:[8] -DEBUG 06-24 20:32:59 [manager.py:391] -ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:207.59224891662598ms total_cost_time:207.6125144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:12949 prompt_cache_len:5151 prompt_cache_ratio:0.39779133523824234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 -DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:59 [batch.py:51] router release req id 8 -INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10788488388061523 s -INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.11170196533203125 s -DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=174699695401120592218866722230309037262, time:1750768379.450935s req_ids:[8] -DEBUG 06-24 20:32:59 [manager.py:391] -ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:375.4732608795166ms total_cost_time:375.5173683166504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12950 prompt_cache_len:5151 prompt_cache_ratio:0.3977606177606178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 -DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:59 [batch.py:51] router release req id 8 -INFO 06-24 20:32:59 [manager.py:224] router recive req id 8 cost time 0.10757207870483398 s -INFO 06-24 20:32:59 [manager.py:68] detokenization recv req id 8 cost time 0.1094675064086914 s -DEBUG 06-24 20:32:59 [manager.py:391] Prefill Batch: batch_id=16840296587367438838642752002929973924, time:1750768379.8342154s req_ids:[8] -DEBUG 06-24 20:32:59 [manager.py:391] -ERROR 06-24 20:32:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:32:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:32:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:203.80735397338867ms total_cost_time:203.85026931762695ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12951 prompt_cache_len:5151 prompt_cache_ratio:0.39772990502663885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:32:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 -DEBUG 06-24 20:32:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:32:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:32:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:32:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:32:59 [batch.py:51] router release req id 8 -INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10918211936950684 s -INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11115527153015137 s -DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=314596629040021081758430369974752890856, time:1750768380.0439076s req_ids:[8] -DEBUG 06-24 20:33:00 [manager.py:391] -ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:32:59 lightllm_req_id:8 first_token_cost:214.72716331481934ms total_cost_time:214.78986740112305ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:12952 prompt_cache_len:5151 prompt_cache_ratio:0.3976991970352069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 -DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:00 [batch.py:51] router release req id 8 -INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10782289505004883 s -INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.1096794605255127 s -DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=314012744901665502538973224126457326120, time:1750768380.262842s req_ids:[8] -DEBUG 06-24 20:33:00 [manager.py:391] -ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:211.8971347808838ms total_cost_time:211.94028854370117ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12953 prompt_cache_len:5151 prompt_cache_ratio:0.3976684937852235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 -DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:00 [batch.py:51] router release req id 8 -INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10937285423278809 s -INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11121988296508789 s -DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=230647268047209660971270696898538504520, time:1750768380.4842677s req_ids:[8] -DEBUG 06-24 20:33:00 [manager.py:391] -ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:214.6434783935547ms total_cost_time:214.68639373779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:12954 prompt_cache_len:5151 prompt_cache_ratio:0.39763779527559057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 -DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:00 [batch.py:51] router release req id 8 -INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10916328430175781 s -INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11127138137817383 s -DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=44135810167417176616524004040240165577, time:1750768380.70423s req_ids:[8] -DEBUG 06-24 20:33:00 [manager.py:391] -ERROR 06-24 20:33:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:214.71524238586426ms total_cost_time:214.75934982299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12955 prompt_cache_len:5151 prompt_cache_ratio:0.39760710150521034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 -DEBUG 06-24 20:33:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:00 [batch.py:51] router release req id 8 -INFO 06-24 20:33:00 [manager.py:224] router recive req id 8 cost time 0.10912561416625977 s -INFO 06-24 20:33:00 [manager.py:68] detokenization recv req id 8 cost time 0.11117720603942871 s -DEBUG 06-24 20:33:00 [manager.py:391] Prefill Batch: batch_id=176221388503856740498777392223733409264, time:1750768380.9254398s req_ids:[8] -DEBUG 06-24 20:33:00 [manager.py:391] -ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:00 lightllm_req_id:8 first_token_cost:380.1608085632324ms total_cost_time:380.2063465118408ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:12956 prompt_cache_len:5151 prompt_cache_ratio:0.3975764124729855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 -DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:01 [batch.py:51] router release req id 8 -INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.10778212547302246 s -INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.10973286628723145 s -DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=154708734335641159694782593130911081003, time:1750768381.3116999s req_ids:[8] -DEBUG 06-24 20:33:01 [manager.py:391] -ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:217.04697608947754ms total_cost_time:217.09012985229492ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12957 prompt_cache_len:5151 prompt_cache_ratio:0.3975457281778189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 -DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:01 [batch.py:51] router release req id 8 -INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.11043596267700195 s -INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.11244845390319824 s -DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=140841579056273874413939935854201200022, time:1750768381.5357118s req_ids:[8] -DEBUG 06-24 20:33:01 [manager.py:391] -ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:216.68291091918945ms total_cost_time:216.72534942626953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:12958 prompt_cache_len:5151 prompt_cache_ratio:0.397515048618614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 -DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:01 [batch.py:51] router release req id 8 -INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.10885095596313477 s -INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.11083745956420898 s -DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=303539252059743278068382548839731862344, time:1750768381.7576578s req_ids:[8] -DEBUG 06-24 20:33:01 [manager.py:391] -ERROR 06-24 20:33:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:214.64776992797852ms total_cost_time:214.6909236907959ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12959 prompt_cache_len:5151 prompt_cache_ratio:0.39748437379427426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 -DEBUG 06-24 20:33:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:01 [batch.py:51] router release req id 8 -INFO 06-24 20:33:01 [manager.py:224] router recive req id 8 cost time 0.10900187492370605 s -INFO 06-24 20:33:01 [manager.py:68] detokenization recv req id 8 cost time 0.11129999160766602 s -DEBUG 06-24 20:33:01 [manager.py:391] Prefill Batch: batch_id=191500139603542081836032962661793526383, time:1750768381.9801161s req_ids:[8] -DEBUG 06-24 20:33:01 [manager.py:391] -ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:01 lightllm_req_id:8 first_token_cost:216.91155433654785ms total_cost_time:216.95661544799805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12960 prompt_cache_len:5151 prompt_cache_ratio:0.3974537037037037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 -DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:02 [batch.py:51] router release req id 8 -INFO 06-24 20:33:02 [manager.py:224] router recive req id 8 cost time 0.10811161994934082 s -INFO 06-24 20:33:02 [manager.py:68] detokenization recv req id 8 cost time 0.11006903648376465 s -DEBUG 06-24 20:33:02 [manager.py:391] Prefill Batch: batch_id=265210333260612747589921927186731374153, time:1750768382.2021415s req_ids:[8] -DEBUG 06-24 20:33:02 [manager.py:391] -ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:210.53576469421387ms total_cost_time:210.58034896850586ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12961 prompt_cache_len:5151 prompt_cache_ratio:0.39742303834580667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 -DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:02 [batch.py:51] router release req id 8 -INFO 06-24 20:33:02 [manager.py:224] router recive req id 8 cost time 0.10897517204284668 s -INFO 06-24 20:33:02 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s -DEBUG 06-24 20:33:02 [manager.py:391] Prefill Batch: batch_id=153207850059679740101457032700338277332, time:1750768382.4205258s req_ids:[8] -DEBUG 06-24 20:33:02 [manager.py:391] -ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:393.59450340270996ms total_cost_time:393.63789558410645ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12962 prompt_cache_len:5151 prompt_cache_ratio:0.3973923777194877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 -DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:02 [batch.py:51] router release req id 8 -INFO 06-24 20:33:02 [manager.py:224] router recive req id 8 cost time 0.1082620620727539 s -INFO 06-24 20:33:02 [manager.py:68] detokenization recv req id 8 cost time 0.11023235321044922 s -DEBUG 06-24 20:33:02 [manager.py:391] Prefill Batch: batch_id=44355944374384397417392747194315554768, time:1750768382.8181016s req_ids:[8] -DEBUG 06-24 20:33:02 [manager.py:391] -ERROR 06-24 20:33:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:194.5204734802246ms total_cost_time:194.56219673156738ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:12963 prompt_cache_len:5151 prompt_cache_ratio:0.3973617218236519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 -DEBUG 06-24 20:33:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:02 [batch.py:51] router release req id 8 -INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10849785804748535 s -INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.11040568351745605 s -DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=103648177072542355216118391756931998147, time:1750768383.020583s req_ids:[8] -DEBUG 06-24 20:33:03 [manager.py:391] -ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:02 lightllm_req_id:8 first_token_cost:170.53484916687012ms total_cost_time:170.58658599853516ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:12964 prompt_cache_len:5151 prompt_cache_ratio:0.3973310706572046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 -DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:03 [batch.py:51] router release req id 8 -INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10825920104980469 s -INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.1104283332824707 s -DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=145114005142044120494274581924949891185, time:1750768383.1952667s req_ids:[8] -DEBUG 06-24 20:33:03 [manager.py:391] -ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:204.6065330505371ms total_cost_time:204.6518325805664ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:12965 prompt_cache_len:5151 prompt_cache_ratio:0.3973004242190513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 -DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:03 [batch.py:51] router release req id 8 -INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.1091461181640625 s -INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s -DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=317807868880814323279862971764111061329, time:1750768383.4115329s req_ids:[8] -DEBUG 06-24 20:33:03 [manager.py:391] -ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:215.3148651123047ms total_cost_time:215.35801887512207ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12966 prompt_cache_len:5151 prompt_cache_ratio:0.3972697825080981 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 -DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:03 [batch.py:51] router release req id 8 -INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10859966278076172 s -INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.11061596870422363 s -DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=323492624076626596089652042139034234741, time:1750768383.631158s req_ids:[8] -DEBUG 06-24 20:33:03 [manager.py:391] -ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:208.98747444152832ms total_cost_time:209.03420448303223ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:12967 prompt_cache_len:5151 prompt_cache_ratio:0.39723914552325135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 -DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:03 [batch.py:51] router release req id 8 -INFO 06-24 20:33:03 [manager.py:224] router recive req id 8 cost time 0.10753917694091797 s -INFO 06-24 20:33:03 [manager.py:68] detokenization recv req id 8 cost time 0.1094355583190918 s -DEBUG 06-24 20:33:03 [manager.py:391] Prefill Batch: batch_id=9524225021712841479825636577336600278, time:1750768383.8482509s req_ids:[8] -DEBUG 06-24 20:33:03 [manager.py:391] -ERROR 06-24 20:33:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:209.6095085144043ms total_cost_time:209.65337753295898ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12968 prompt_cache_len:5151 prompt_cache_ratio:0.39720851326341766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 -DEBUG 06-24 20:33:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:03 [batch.py:51] router release req id 8 -INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10851097106933594 s -INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s -DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=121116532401364437350295609915497151252, time:1750768384.0657642s req_ids:[8] -DEBUG 06-24 20:33:04 [manager.py:391] -ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:03 lightllm_req_id:8 first_token_cost:381.02197647094727ms total_cost_time:381.0689449310303ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:12969 prompt_cache_len:5151 prompt_cache_ratio:0.39717788572750407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 -DEBUG 06-24 20:33:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:04 [batch.py:51] router release req id 8 -INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10852742195129395 s -INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.11056995391845703 s -DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=279314551015373191406254706093521968169, time:1750768384.4541688s req_ids:[8] -DEBUG 06-24 20:33:04 [manager.py:391] -ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:212.64910697937012ms total_cost_time:212.693452835083ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12970 prompt_cache_len:5151 prompt_cache_ratio:0.3971472629144179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 -DEBUG 06-24 20:33:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:04 [batch.py:51] router release req id 8 -INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s -INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.1098020076751709 s -DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=53658308208390112131091921790098171216, time:1750768384.6723447s req_ids:[8] -DEBUG 06-24 20:33:04 [manager.py:391] -ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:210.33191680908203ms total_cost_time:210.37578582763672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:12971 prompt_cache_len:5151 prompt_cache_ratio:0.39711664482306686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 -DEBUG 06-24 20:33:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:04 [batch.py:51] router release req id 8 -INFO 06-24 20:33:04 [manager.py:224] router recive req id 8 cost time 0.10858941078186035 s -INFO 06-24 20:33:04 [manager.py:68] detokenization recv req id 8 cost time 0.11047744750976562 s -DEBUG 06-24 20:33:04 [manager.py:391] Prefill Batch: batch_id=36014671537063516259814824075441248757, time:1750768384.889275s req_ids:[8] -DEBUG 06-24 20:33:04 [manager.py:391] -DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:04 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:208.6927890777588ms total_cost_time:208.73618125915527ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:12972 prompt_cache_len:5151 prompt_cache_ratio:0.39708603145235893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 -DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:05 [batch.py:51] router release req id 8 -INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10936236381530762 s -INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.11131548881530762 s -DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=145984696456415141792262212478587274440, time:1750768385.104374s req_ids:[8] -DEBUG 06-24 20:33:05 [manager.py:391] -ERROR 06-24 20:33:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:04 lightllm_req_id:8 first_token_cost:212.03351020812988ms total_cost_time:212.08715438842773ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12973 prompt_cache_len:5151 prompt_cache_ratio:0.3970554228012025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 -DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:05 [batch.py:51] router release req id 8 -INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10803413391113281 s -INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s -DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=96116512749070799789078866520441088937, time:1750768385.323129s req_ids:[8] -DEBUG 06-24 20:33:05 [manager.py:391] -ERROR 06-24 20:33:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 first_token_cost:212.62216567993164ms total_cost_time:212.66722679138184ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12974 prompt_cache_len:5151 prompt_cache_ratio:0.39702481886850627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 -DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:05 [batch.py:51] router release req id 8 -INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10869860649108887 s -INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.11066699028015137 s -DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=304746264579571821326646784990042852115, time:1750768385.5424397s req_ids:[8] -DEBUG 06-24 20:33:05 [manager.py:391] -ERROR 06-24 20:33:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 first_token_cost:418.79820823669434ms total_cost_time:418.8408851623535ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12975 prompt_cache_len:5151 prompt_cache_ratio:0.3969942196531792 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 -DEBUG 06-24 20:33:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:05 [batch.py:51] router release req id 8 -INFO 06-24 20:33:05 [manager.py:224] router recive req id 8 cost time 0.10917830467224121 s -INFO 06-24 20:33:05 [manager.py:68] detokenization recv req id 8 cost time 0.11114621162414551 s -DEBUG 06-24 20:33:05 [manager.py:391] Prefill Batch: batch_id=202749743828523223759340098463980886992, time:1750768385.9674542s req_ids:[8] -DEBUG 06-24 20:33:05 [manager.py:391] -ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:05 lightllm_req_id:8 first_token_cost:213.84930610656738ms total_cost_time:213.89389038085938ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12976 prompt_cache_len:5151 prompt_cache_ratio:0.3969636251541307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 -DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:06 [batch.py:51] router release req id 8 -INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10867810249328613 s -INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11060929298400879 s -DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=203750782032177864925710945880918880595, time:1750768386.187567s req_ids:[8] -DEBUG 06-24 20:33:06 [manager.py:391] -ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:210.0210189819336ms total_cost_time:210.0660800933838ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12977 prompt_cache_len:5151 prompt_cache_ratio:0.39693303537027047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 -DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:06 [batch.py:51] router release req id 8 -INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10893511772155762 s -INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11086058616638184 s -DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=284338736109028949842059807970424577789, time:1750768386.4104998s req_ids:[8] -DEBUG 06-24 20:33:06 [manager.py:391] -ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:221.04668617248535ms total_cost_time:221.09103202819824ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12978 prompt_cache_len:5151 prompt_cache_ratio:0.3969024503005086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 -DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:06 [batch.py:51] router release req id 8 -INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10895824432373047 s -INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11128091812133789 s -DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=302926112085310552486029738966931259132, time:1750768386.631245s req_ids:[8] -DEBUG 06-24 20:33:06 [manager.py:391] -ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:210.8924388885498ms total_cost_time:210.94608306884766ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:12979 prompt_cache_len:5151 prompt_cache_ratio:0.3968718699437553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 -DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:06 [batch.py:51] router release req id 8 -INFO 06-24 20:33:06 [manager.py:224] router recive req id 8 cost time 0.10830903053283691 s -INFO 06-24 20:33:06 [manager.py:68] detokenization recv req id 8 cost time 0.11036419868469238 s -DEBUG 06-24 20:33:06 [manager.py:391] Prefill Batch: batch_id=3882841582426922245345230109047675805, time:1750768386.8477106s req_ids:[8] -DEBUG 06-24 20:33:06 [manager.py:391] -ERROR 06-24 20:33:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:209.74278450012207ms total_cost_time:209.7952365875244ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:12980 prompt_cache_len:5151 prompt_cache_ratio:0.3968412942989214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 -DEBUG 06-24 20:33:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:06 [batch.py:51] router release req id 8 -INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10858368873596191 s -INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11061429977416992 s -DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=321633714057611307136311941571682094477, time:1750768387.0641763s req_ids:[8] -DEBUG 06-24 20:33:07 [manager.py:391] -ERROR 06-24 20:33:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:06 lightllm_req_id:8 first_token_cost:371.0591793060303ms total_cost_time:371.11783027648926ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:12981 prompt_cache_len:5151 prompt_cache_ratio:0.39681072336491796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 -DEBUG 06-24 20:33:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:07 [batch.py:51] router release req id 8 -INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10902523994445801 s -INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11085271835327148 s -DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=65719791340063985072181319243609486103, time:1750768387.4745958s req_ids:[8] -DEBUG 06-24 20:33:07 [manager.py:391] -ERROR 06-24 20:33:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:33:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 51736.857 tokens/s -DEBUG 06-24 20:33:07 [stats.py:37] Avg prompt tokens throughput: 51728.775 tokens/s -DEBUG 06-24 20:33:07 [stats.py:37] Avg generate tokens throughput: 8.081 tokens/s -INFO 06-24 20:33:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 first_token_cost:245.00751495361328ms total_cost_time:245.05257606506348ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12982 prompt_cache_len:5151 prompt_cache_ratio:0.3967801571406563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 -DEBUG 06-24 20:33:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:07 [batch.py:51] router release req id 8 -INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10871052742004395 s -INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11076879501342773 s -DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=43823997117528428909026528237440035579, time:1750768387.696905s req_ids:[8] -DEBUG 06-24 20:33:07 [manager.py:391] -ERROR 06-24 20:33:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 first_token_cost:210.59417724609375ms total_cost_time:210.63876152038574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12983 prompt_cache_len:5151 prompt_cache_ratio:0.39674959562504813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 -DEBUG 06-24 20:33:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:07 [batch.py:51] router release req id 8 -INFO 06-24 20:33:07 [manager.py:224] router recive req id 8 cost time 0.10889101028442383 s -INFO 06-24 20:33:07 [manager.py:68] detokenization recv req id 8 cost time 0.11087751388549805 s -DEBUG 06-24 20:33:07 [manager.py:391] Prefill Batch: batch_id=125219103746010714061231798261747723031, time:1750768387.9161804s req_ids:[8] -DEBUG 06-24 20:33:07 [manager.py:391] -ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:07 lightllm_req_id:8 first_token_cost:209.7926139831543ms total_cost_time:209.8369598388672ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:12984 prompt_cache_len:5151 prompt_cache_ratio:0.39671903881700554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 -DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:08 [batch.py:51] router release req id 8 -INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.10722661018371582 s -INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.10920476913452148 s -DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=67356057720234071060778502468655225335, time:1750768388.132157s req_ids:[8] -DEBUG 06-24 20:33:08 [manager.py:391] -ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:205.43384552001953ms total_cost_time:205.47890663146973ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12985 prompt_cache_len:5151 prompt_cache_ratio:0.3966884867154409 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 -DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:08 [batch.py:51] router release req id 8 -INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.10859847068786621 s -INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.11051201820373535 s -DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=19605371701955389848216558614619673007, time:1750768388.3439658s req_ids:[8] -DEBUG 06-24 20:33:08 [manager.py:391] -ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:207.8533172607422ms total_cost_time:207.9172134399414ms,out_token_counter:1 mean_per_token_cost_time: 0.06389617919921875ms prompt_token_num:12986 prompt_cache_len:5151 prompt_cache_ratio:0.3966579393192669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 -DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:08 [batch.py:51] router release req id 8 -INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.10759711265563965 s -INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.11005425453186035 s -DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=211161804440444231341572904848176394852, time:1750768388.5597873s req_ids:[8] -DEBUG 06-24 20:33:08 [manager.py:391] -ERROR 06-24 20:33:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:369.5685863494873ms total_cost_time:369.6136474609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:12987 prompt_cache_len:5151 prompt_cache_ratio:0.3966273966273966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 -DEBUG 06-24 20:33:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:08 [batch.py:51] router release req id 8 -INFO 06-24 20:33:08 [manager.py:224] router recive req id 8 cost time 0.1086115837097168 s -INFO 06-24 20:33:08 [manager.py:68] detokenization recv req id 8 cost time 0.11060452461242676 s -DEBUG 06-24 20:33:08 [manager.py:391] Prefill Batch: batch_id=213727319290852998464014793929182631295, time:1750768388.9654217s req_ids:[8] -DEBUG 06-24 20:33:08 [manager.py:391] -ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:08 lightllm_req_id:8 first_token_cost:246.02746963500977ms total_cost_time:246.09637260437012ms,out_token_counter:1 mean_per_token_cost_time: 0.06890296936035156ms prompt_token_num:12988 prompt_cache_len:5151 prompt_cache_ratio:0.39659685863874344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 -DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:09 [batch.py:51] router release req id 8 -INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10887503623962402 s -INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.11093425750732422 s -DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=325759992173675372040514483146532900854, time:1750768389.1898797s req_ids:[8] -DEBUG 06-24 20:33:09 [manager.py:391] -ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:212.33057975769043ms total_cost_time:212.3727798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:12989 prompt_cache_len:5151 prompt_cache_ratio:0.3965663253522211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 -DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:09 [batch.py:51] router release req id 8 -INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10858869552612305 s -INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.11307287216186523 s -DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=7343753132360644490419188337953917768, time:1750768389.410068s req_ids:[8] -DEBUG 06-24 20:33:09 [manager.py:391] -ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:213.43183517456055ms total_cost_time:213.456392288208ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:12990 prompt_cache_len:5151 prompt_cache_ratio:0.39653579676674366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 -DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:09 [batch.py:51] router release req id 8 -INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10809779167175293 s -INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.10999464988708496 s -DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=19323978305412206683783949933855329326, time:1750768389.6284378s req_ids:[8] -DEBUG 06-24 20:33:09 [manager.py:391] -ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:213.6096954345703ms total_cost_time:213.6528491973877ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12991 prompt_cache_len:5151 prompt_cache_ratio:0.39650527288122545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 -DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:09 [batch.py:51] router release req id 8 -INFO 06-24 20:33:09 [manager.py:224] router recive req id 8 cost time 0.10847115516662598 s -INFO 06-24 20:33:09 [manager.py:68] detokenization recv req id 8 cost time 0.11066579818725586 s -DEBUG 06-24 20:33:09 [manager.py:391] Prefill Batch: batch_id=29608879867844085160926568459254698814, time:1750768389.8448298s req_ids:[8] -DEBUG 06-24 20:33:09 [manager.py:391] -ERROR 06-24 20:33:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:204.86688613891602ms total_cost_time:204.911470413208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:12992 prompt_cache_len:5151 prompt_cache_ratio:0.3964747536945813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 -DEBUG 06-24 20:33:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:09 [batch.py:51] router release req id 8 -INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.10941052436828613 s -DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=255809776595122129318151695821805266987, time:1750768390.055834s req_ids:[8] -DEBUG 06-24 20:33:10 [manager.py:391] -INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.11151504516601562 s -ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:09 lightllm_req_id:8 first_token_cost:207.31806755065918ms total_cost_time:207.36217498779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12993 prompt_cache_len:5151 prompt_cache_ratio:0.39644423920572613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 -DEBUG 06-24 20:33:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:10 [batch.py:51] router release req id 8 -INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.1078498363494873 s -INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.10983657836914062 s -DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=203133595039585870140127080797949423059, time:1750768390.2710402s req_ids:[8] -DEBUG 06-24 20:33:10 [manager.py:391] -ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:365.77320098876953ms total_cost_time:365.833044052124ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:12994 prompt_cache_len:5151 prompt_cache_ratio:0.3964137294135755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 -DEBUG 06-24 20:33:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:10 [batch.py:51] router release req id 8 -INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.11135625839233398 s -INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.11327886581420898 s -DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=82601351511297803244904432598668661572, time:1750768390.6740448s req_ids:[8] -DEBUG 06-24 20:33:10 [manager.py:391] -ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:237.2872829437256ms total_cost_time:237.33210563659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:12995 prompt_cache_len:5151 prompt_cache_ratio:0.39638322431704504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 -DEBUG 06-24 20:33:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:10 [batch.py:51] router release req id 8 -INFO 06-24 20:33:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:10 [manager.py:224] router recive req id 8 cost time 0.1079106330871582 s -INFO 06-24 20:33:10 [manager.py:68] detokenization recv req id 8 cost time 0.1099238395690918 s -DEBUG 06-24 20:33:10 [manager.py:391] Prefill Batch: batch_id=12388578199880550068041307841045742933, time:1750768390.8889127s req_ids:[8] -DEBUG 06-24 20:33:10 [manager.py:391] -ERROR 06-24 20:33:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:204.51927185058594ms total_cost_time:204.56194877624512ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:12996 prompt_cache_len:5151 prompt_cache_ratio:0.3963527239150508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 -DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:11 [batch.py:51] router release req id 8 -INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.10859251022338867 s -INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.11054706573486328 s -DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=232140524233388498086685132587976166866, time:1750768391.1011176s req_ids:[8] -DEBUG 06-24 20:33:11 [manager.py:391] -ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:10 lightllm_req_id:8 first_token_cost:204.56647872924805ms total_cost_time:204.60963249206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:12997 prompt_cache_len:5151 prompt_cache_ratio:0.3963222282065092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 -DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:11 [batch.py:51] router release req id 8 -INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s -INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.10975003242492676 s -DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=136740788480743349329431865310010200459, time:1750768391.3200264s req_ids:[8] -DEBUG 06-24 20:33:11 [manager.py:391] -ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:220.8249568939209ms total_cost_time:220.8690643310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:12998 prompt_cache_len:5151 prompt_cache_ratio:0.396291737190337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 -DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:11 [batch.py:51] router release req id 8 -INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.10783576965332031 s -INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.10975837707519531 s -DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=145019826704413001424884414227420395455, time:1750768391.5389254s req_ids:[8] -DEBUG 06-24 20:33:11 [manager.py:391] -ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:210.75129508972168ms total_cost_time:210.79707145690918ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:12999 prompt_cache_len:5151 prompt_cache_ratio:0.39626125086545116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 -DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:11 [batch.py:51] router release req id 8 -INFO 06-24 20:33:11 [manager.py:224] router recive req id 8 cost time 0.20865941047668457 s -INFO 06-24 20:33:11 [manager.py:68] detokenization recv req id 8 cost time 0.21034908294677734 s -DEBUG 06-24 20:33:11 [manager.py:391] Prefill Batch: batch_id=83883731568665235670011422660608221209, time:1750768391.887774s req_ids:[8] -DEBUG 06-24 20:33:11 [manager.py:391] -ERROR 06-24 20:33:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:309.0837001800537ms total_cost_time:309.1263771057129ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13000 prompt_cache_len:5151 prompt_cache_ratio:0.3962307692307692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 -DEBUG 06-24 20:33:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:11 [batch.py:51] router release req id 8 -INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10788869857788086 s -INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.11028480529785156 s -DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=215055406353932646530078973889489330166, time:1750768392.0737674s req_ids:[8] -DEBUG 06-24 20:33:12 [manager.py:391] -ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:11 lightllm_req_id:8 first_token_cost:203.72724533081055ms total_cost_time:203.77135276794434ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13001 prompt_cache_len:5151 prompt_cache_ratio:0.3962002922852088 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 -DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:12 [batch.py:51] router release req id 8 -INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10879802703857422 s -INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.11078071594238281 s -DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=176582405888708492615130772769103129630, time:1750768392.2813714s req_ids:[8] -DEBUG 06-24 20:33:12 [manager.py:391] -ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:211.89022064208984ms total_cost_time:211.93480491638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13002 prompt_cache_len:5151 prompt_cache_ratio:0.396169820027688 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 -DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:12 [batch.py:51] router release req id 8 -INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10724759101867676 s -INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.1092216968536377 s -DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=110439147036003554141511377132808103269, time:1750768392.5000012s req_ids:[8] -DEBUG 06-24 20:33:12 [manager.py:391] -ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:209.75899696350098ms total_cost_time:209.80310440063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13003 prompt_cache_len:5151 prompt_cache_ratio:0.3961393524571253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 -DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:12 [batch.py:51] router release req id 8 -INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10891556739807129 s -INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.11141705513000488 s -DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=179407576329774911175486013129344941331, time:1750768392.715567s req_ids:[8] -DEBUG 06-24 20:33:12 [manager.py:391] -ERROR 06-24 20:33:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:210.1285457611084ms total_cost_time:210.1724147796631ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13004 prompt_cache_len:5151 prompt_cache_ratio:0.3961088895724392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 -DEBUG 06-24 20:33:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:12 [batch.py:51] router release req id 8 -INFO 06-24 20:33:12 [manager.py:224] router recive req id 8 cost time 0.10752058029174805 s -INFO 06-24 20:33:12 [manager.py:68] detokenization recv req id 8 cost time 0.10959744453430176 s -DEBUG 06-24 20:33:12 [manager.py:391] Prefill Batch: batch_id=269693520548407484139985680254373418873, time:1750768392.9315057s req_ids:[8] -DEBUG 06-24 20:33:12 [manager.py:391] -ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:12 lightllm_req_id:8 first_token_cost:210.07275581359863ms total_cost_time:210.1156711578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13005 prompt_cache_len:5151 prompt_cache_ratio:0.396078431372549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 -DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:13 [batch.py:51] router release req id 8 -INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.10886383056640625 s -INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.11080241203308105 s -DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=283203894597479597336575209078303905648, time:1750768393.1594434s req_ids:[8] -DEBUG 06-24 20:33:13 [manager.py:391] -ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:406.80789947509766ms total_cost_time:406.85057640075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13006 prompt_cache_len:5151 prompt_cache_ratio:0.39604797785637397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 -DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:13 [batch.py:51] router release req id 8 -INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.10683894157409668 s -INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.1086127758026123 s -DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=288960593322333645723158748050169511986, time:1750768393.5628214s req_ids:[8] -DEBUG 06-24 20:33:13 [manager.py:391] -ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:190.9925937652588ms total_cost_time:191.03717803955078ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13007 prompt_cache_len:5151 prompt_cache_ratio:0.3960175290228339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 -DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:13 [batch.py:51] router release req id 8 -INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.10837793350219727 s -INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.11224126815795898 s -DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=139235851295403682620929158311826997763, time:1750768393.7612936s req_ids:[8] -DEBUG 06-24 20:33:13 [manager.py:391] -ERROR 06-24 20:33:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:214.2949104309082ms total_cost_time:214.3406867980957ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13008 prompt_cache_len:5151 prompt_cache_ratio:0.3959870848708487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 -DEBUG 06-24 20:33:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:13 [batch.py:51] router release req id 8 -INFO 06-24 20:33:13 [manager.py:224] router recive req id 8 cost time 0.1082615852355957 s -INFO 06-24 20:33:13 [manager.py:68] detokenization recv req id 8 cost time 0.11038422584533691 s -DEBUG 06-24 20:33:13 [manager.py:391] Prefill Batch: batch_id=33882121940901492234885943714915170780, time:1750768393.9819086s req_ids:[8] -DEBUG 06-24 20:33:13 [manager.py:391] -DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:13 lightllm_req_id:8 first_token_cost:213.67621421813965ms total_cost_time:213.72056007385254ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13009 prompt_cache_len:5151 prompt_cache_ratio:0.39595664539933895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 -DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:14 [batch.py:51] router release req id 8 -INFO 06-24 20:33:14 [manager.py:224] router recive req id 8 cost time 0.10887956619262695 s -INFO 06-24 20:33:14 [manager.py:68] detokenization recv req id 8 cost time 0.11123347282409668 s -DEBUG 06-24 20:33:14 [manager.py:391] Prefill Batch: batch_id=106956122257022341874462145542064325463, time:1750768394.206414s req_ids:[8] -DEBUG 06-24 20:33:14 [manager.py:391] -ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:214.0185832977295ms total_cost_time:214.06173706054688ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13010 prompt_cache_len:5151 prompt_cache_ratio:0.3959262106072252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 -DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:14 [batch.py:51] router release req id 8 -INFO 06-24 20:33:14 [manager.py:224] router recive req id 8 cost time 0.1083371639251709 s -INFO 06-24 20:33:14 [manager.py:68] detokenization recv req id 8 cost time 0.11051154136657715 s -DEBUG 06-24 20:33:14 [manager.py:391] Prefill Batch: batch_id=113481414930846980617371265217507078981, time:1750768394.425001s req_ids:[8] -DEBUG 06-24 20:33:14 [manager.py:391] -ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:203.65166664123535ms total_cost_time:203.69362831115723ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13011 prompt_cache_len:5151 prompt_cache_ratio:0.39589578049342866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 -DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:14 [batch.py:51] router release req id 8 -INFO 06-24 20:33:14 [manager.py:224] router recive req id 8 cost time 0.1092529296875 s -INFO 06-24 20:33:14 [manager.py:68] detokenization recv req id 8 cost time 0.11140155792236328 s -DEBUG 06-24 20:33:14 [manager.py:391] Prefill Batch: batch_id=100172217535368547881127714946098270333, time:1750768394.6349025s req_ids:[8] -DEBUG 06-24 20:33:14 [manager.py:391] -ERROR 06-24 20:33:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:384.43446159362793ms total_cost_time:384.4785690307617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13012 prompt_cache_len:5151 prompt_cache_ratio:0.3958653550568706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 -DEBUG 06-24 20:33:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:14 [batch.py:51] router release req id 8 -INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10850691795349121 s -INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11072230339050293 s -DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=2060959522821261298360040814644500043, time:1750768395.029122s req_ids:[8] -DEBUG 06-24 20:33:15 [manager.py:391] -ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:14 lightllm_req_id:8 first_token_cost:205.90710639953613ms total_cost_time:205.95240592956543ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13013 prompt_cache_len:5151 prompt_cache_ratio:0.39583493429647276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 -DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:15 [batch.py:51] router release req id 8 -INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10892796516418457 s -INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11128616333007812 s -DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=321394634618967813413723870259622526503, time:1750768395.2428594s req_ids:[8] -DEBUG 06-24 20:33:15 [manager.py:391] -ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:212.49651908874512ms total_cost_time:212.5418186187744ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13014 prompt_cache_len:5151 prompt_cache_ratio:0.3958045182111572 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 -DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:15 [batch.py:51] router release req id 8 -INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10858011245727539 s -INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11070036888122559 s -DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=251329224160446448843773339255489627915, time:1750768395.4607623s req_ids:[8] -DEBUG 06-24 20:33:15 [manager.py:391] -ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:212.20755577087402ms total_cost_time:212.25237846374512ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13015 prompt_cache_len:5151 prompt_cache_ratio:0.3957741067998463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 -DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:15 [batch.py:51] router release req id 8 -INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10903525352478027 s -INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11156177520751953 s -DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=165628670760496530344775804626536398740, time:1750768395.679567s req_ids:[8] -DEBUG 06-24 20:33:15 [manager.py:391] -ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:209.78784561157227ms total_cost_time:209.83409881591797ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13016 prompt_cache_len:5151 prompt_cache_ratio:0.3957437000614628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 -DEBUG 06-24 20:33:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:15 [batch.py:51] router release req id 8 -INFO 06-24 20:33:15 [manager.py:224] router recive req id 8 cost time 0.10816383361816406 s -INFO 06-24 20:33:15 [manager.py:68] detokenization recv req id 8 cost time 0.11064362525939941 s -DEBUG 06-24 20:33:15 [manager.py:391] Prefill Batch: batch_id=273689300891655250188172968463948757251, time:1750768395.8956273s req_ids:[8] -DEBUG 06-24 20:33:15 [manager.py:391] -ERROR 06-24 20:33:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:15 lightllm_req_id:8 first_token_cost:214.7808074951172ms total_cost_time:214.82467651367188ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13017 prompt_cache_len:5151 prompt_cache_ratio:0.3957132979949297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 -DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:16 [batch.py:51] router release req id 8 -INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.10780191421508789 s -INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.10991096496582031 s -DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=60191213572841664104369190635430141617, time:1750768396.1161702s req_ids:[8] -DEBUG 06-24 20:33:16 [manager.py:391] -ERROR 06-24 20:33:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:211.22431755065918ms total_cost_time:211.26937866210938ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13018 prompt_cache_len:5151 prompt_cache_ratio:0.3956829005991704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 -DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:16 [batch.py:51] router release req id 8 -INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.30959630012512207 s -INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.31170654296875 s -DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=288558832363669133573519403677313823705, time:1750768396.54294s req_ids:[8] -DEBUG 06-24 20:33:16 [manager.py:391] -ERROR 06-24 20:33:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:424.44348335266113ms total_cost_time:424.468994140625ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:13019 prompt_cache_len:5151 prompt_cache_ratio:0.3956525078731085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 -DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:16 [batch.py:51] router release req id 8 -INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.1059110164642334 s -INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.10865426063537598 s -DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=80865950845203259781725640475083037010, time:1750768396.7668662s req_ids:[8] -DEBUG 06-24 20:33:16 [manager.py:391] -ERROR 06-24 20:33:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:217.47994422912598ms total_cost_time:217.52381324768066ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13020 prompt_cache_len:5151 prompt_cache_ratio:0.3956221198156682 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 -DEBUG 06-24 20:33:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:16 [batch.py:51] router release req id 8 -INFO 06-24 20:33:16 [manager.py:224] router recive req id 8 cost time 0.10906815528869629 s -INFO 06-24 20:33:16 [manager.py:68] detokenization recv req id 8 cost time 0.11142921447753906 s -DEBUG 06-24 20:33:16 [manager.py:391] Prefill Batch: batch_id=236454992858104063711838886331965096370, time:1750768396.9879787s req_ids:[8] -DEBUG 06-24 20:33:16 [manager.py:391] -ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:16 lightllm_req_id:8 first_token_cost:211.68136596679688ms total_cost_time:211.72547340393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13021 prompt_cache_len:5151 prompt_cache_ratio:0.3955917364257738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 -INFO 06-24 20:33:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:17 [batch.py:51] router release req id 8 -INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10925769805908203 s -INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.1114048957824707 s -DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=242772258706677118605372391914688830697, time:1750768397.2049096s req_ids:[8] -DEBUG 06-24 20:33:17 [manager.py:391] -ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:213.37175369262695ms total_cost_time:213.47880363464355ms,out_token_counter:1 mean_per_token_cost_time: 0.10704994201660156ms prompt_token_num:13022 prompt_cache_len:5151 prompt_cache_ratio:0.39556135770234985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 -DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:17 [batch.py:51] router release req id 8 -INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10862112045288086 s -INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.11072087287902832 s -DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=189144238474181392682258897048843139324, time:1750768397.4240625s req_ids:[8] -DEBUG 06-24 20:33:17 [manager.py:391] -ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:212.19110488891602ms total_cost_time:212.2366428375244ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13023 prompt_cache_len:5151 prompt_cache_ratio:0.3955309836443216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 -DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:17 [batch.py:51] router release req id 8 -INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10843133926391602 s -INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.11065554618835449 s -DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=241864019203936309384230963132145304357, time:1750768397.644812s req_ids:[8] -DEBUG 06-24 20:33:17 [manager.py:391] -DEBUG 06-24 20:33:17 [stats.py:37] Avg tokens(prompt+generate) throughput: 54239.434 tokens/s -DEBUG 06-24 20:33:17 [stats.py:37] Avg prompt tokens throughput: 54231.193 tokens/s -DEBUG 06-24 20:33:17 [stats.py:37] Avg generate tokens throughput: 8.242 tokens/s -ERROR 06-24 20:33:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:214.19644355773926ms total_cost_time:214.24007415771484ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13024 prompt_cache_len:5151 prompt_cache_ratio:0.3955006142506142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 -DEBUG 06-24 20:33:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:17 [batch.py:51] router release req id 8 -INFO 06-24 20:33:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:17 [manager.py:224] router recive req id 8 cost time 0.10848712921142578 s -INFO 06-24 20:33:17 [manager.py:68] detokenization recv req id 8 cost time 0.11061954498291016 s -DEBUG 06-24 20:33:17 [manager.py:391] Prefill Batch: batch_id=51610959531654263310370621062290214097, time:1750768397.8752317s req_ids:[8] -DEBUG 06-24 20:33:17 [manager.py:391] -ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:17 lightllm_req_id:8 first_token_cost:392.5333023071289ms total_cost_time:392.5776481628418ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13025 prompt_cache_len:5151 prompt_cache_ratio:0.3954702495201535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 -DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:18 [batch.py:51] router release req id 8 -INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10797357559204102 s -INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.10940861701965332 s -DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=321832753344048227117057271274189577709, time:1750768398.263832s req_ids:[8] -DEBUG 06-24 20:33:18 [manager.py:391] -ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:215.86298942565918ms total_cost_time:215.93403816223145ms,out_token_counter:1 mean_per_token_cost_time: 0.07104873657226562ms prompt_token_num:13026 prompt_cache_len:5151 prompt_cache_ratio:0.3954398894518655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 -DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:18 [batch.py:51] router release req id 8 -INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10845303535461426 s -INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.1106562614440918 s -DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=183254144663049346195902056583093989877, time:1750768398.484742s req_ids:[8] -DEBUG 06-24 20:33:18 [manager.py:391] -ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:213.6678695678711ms total_cost_time:213.71173858642578ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13027 prompt_cache_len:5151 prompt_cache_ratio:0.39540953404467644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 -DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:18 [batch.py:51] router release req id 8 -INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10855722427368164 s -INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.11080574989318848 s -DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=223534537375056935434240791376150362314, time:1750768398.7047205s req_ids:[8] -DEBUG 06-24 20:33:18 [manager.py:391] -ERROR 06-24 20:33:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:208.8000774383545ms total_cost_time:208.8456153869629ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13028 prompt_cache_len:5151 prompt_cache_ratio:0.39537918329751304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 -DEBUG 06-24 20:33:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:18 [batch.py:51] router release req id 8 -INFO 06-24 20:33:18 [manager.py:224] router recive req id 8 cost time 0.10805392265319824 s -INFO 06-24 20:33:18 [manager.py:68] detokenization recv req id 8 cost time 0.11046004295349121 s -DEBUG 06-24 20:33:18 [manager.py:391] Prefill Batch: batch_id=49119758234652287963103086022590865724, time:1750768398.9181309s req_ids:[8] -DEBUG 06-24 20:33:18 [manager.py:391] -ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:18 lightllm_req_id:8 first_token_cost:205.4142951965332ms total_cost_time:205.4579257965088ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13029 prompt_cache_len:5151 prompt_cache_ratio:0.3953488372093023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 -DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:19 [batch.py:51] router release req id 8 -INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.1078042984008789 s -INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.1097114086151123 s -DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=120780039706098151474444993452657621884, time:1750768399.1342244s req_ids:[8] -DEBUG 06-24 20:33:19 [manager.py:391] -ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:207.64565467834473ms total_cost_time:207.688570022583ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13030 prompt_cache_len:5151 prompt_cache_ratio:0.3953184957789716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 -DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:19 [batch.py:51] router release req id 8 -INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.10829043388366699 s -INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s -DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=305204269665171847869023064222758547886, time:1750768399.3461719s req_ids:[8] -DEBUG 06-24 20:33:19 [manager.py:391] -ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:380.69915771484375ms total_cost_time:380.74302673339844ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13031 prompt_cache_len:5151 prompt_cache_ratio:0.39528815900544856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 -DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:19 [batch.py:51] router release req id 8 -INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.10773968696594238 s -INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s -DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=310559538771044418268973412925133985581, time:1750768399.7326083s req_ids:[8] -DEBUG 06-24 20:33:19 [manager.py:391] -ERROR 06-24 20:33:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:207.13424682617188ms total_cost_time:207.17692375183105ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13032 prompt_cache_len:5151 prompt_cache_ratio:0.39525782688766115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 -DEBUG 06-24 20:33:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:19 [batch.py:51] router release req id 8 -INFO 06-24 20:33:19 [manager.py:224] router recive req id 8 cost time 0.10847306251525879 s -INFO 06-24 20:33:19 [manager.py:68] detokenization recv req id 8 cost time 0.11028790473937988 s -DEBUG 06-24 20:33:19 [manager.py:391] Prefill Batch: batch_id=57646268809455983474238958787676528418, time:1750768399.947802s req_ids:[8] -DEBUG 06-24 20:33:19 [manager.py:391] -ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:19 lightllm_req_id:8 first_token_cost:207.09848403930664ms total_cost_time:207.14259147644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13033 prompt_cache_len:5151 prompt_cache_ratio:0.3952274994245377 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 -DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:20 [batch.py:51] router release req id 8 -DEBUG 06-24 20:33:20 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:20 [manager.py:283] -DEBUG 06-24 20:33:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:20 [manager.py:284] -INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10805797576904297 s -INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.10999870300292969 s -DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=127013413293543619189723714944162444040, time:1750768400.1619778s req_ids:[8] -DEBUG 06-24 20:33:20 [manager.py:391] -ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:210.0226879119873ms total_cost_time:210.0667953491211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13034 prompt_cache_len:5151 prompt_cache_ratio:0.3951971766150069 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 -DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:20 [batch.py:51] router release req id 8 -INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10830330848693848 s -INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.11024069786071777 s -DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=35212755727179303705420540809525123737, time:1750768400.3786235s req_ids:[8] -DEBUG 06-24 20:33:20 [manager.py:391] -ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:217.73767471313477ms total_cost_time:217.78106689453125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13035 prompt_cache_len:5151 prompt_cache_ratio:0.3951668584579977 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 -DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:20 [batch.py:51] router release req id 8 -INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10869216918945312 s -INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.11049962043762207 s -DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=257496880091193917167104496177727080670, time:1750768400.6016483s req_ids:[8] -DEBUG 06-24 20:33:20 [manager.py:391] -ERROR 06-24 20:33:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:218.18828582763672ms total_cost_time:218.23358535766602ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13036 prompt_cache_len:5151 prompt_cache_ratio:0.3951365449524394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 -DEBUG 06-24 20:33:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:20 [batch.py:51] router release req id 8 -INFO 06-24 20:33:20 [manager.py:224] router recive req id 8 cost time 0.10844206809997559 s -INFO 06-24 20:33:20 [manager.py:68] detokenization recv req id 8 cost time 0.11025404930114746 s -DEBUG 06-24 20:33:20 [manager.py:391] Prefill Batch: batch_id=92550189039474824003753598735162176031, time:1750768400.8400164s req_ids:[8] -DEBUG 06-24 20:33:20 [manager.py:391] -ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:20 lightllm_req_id:8 first_token_cost:401.16381645202637ms total_cost_time:401.20601654052734ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13037 prompt_cache_len:5151 prompt_cache_ratio:0.39510623609726164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 -DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:21 [batch.py:51] router release req id 8 -INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10838699340820312 s -INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.11056280136108398 s -DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=212645412631158789383963648904152861664, time:1750768401.2348037s req_ids:[8] -DEBUG 06-24 20:33:21 [manager.py:391] -ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:210.6490135192871ms total_cost_time:210.6955051422119ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13038 prompt_cache_len:5151 prompt_cache_ratio:0.3950759318913944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 -DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:21 [batch.py:51] router release req id 8 -INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10770463943481445 s -INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.10976624488830566 s -DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=323231149444194796010373259210222854298, time:1750768401.4637384s req_ids:[8] -DEBUG 06-24 20:33:21 [manager.py:391] -ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:228.4567356109619ms total_cost_time:228.47604751586914ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13039 prompt_cache_len:5151 prompt_cache_ratio:0.39504563233376794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 -DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:21 [batch.py:51] router release req id 8 -INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10755109786987305 s -INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.10884499549865723 s -DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=152479872378295911987443659235620866276, time:1750768401.684788s req_ids:[8] -DEBUG 06-24 20:33:21 [manager.py:391] -DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:216.1545753479004ms total_cost_time:216.19749069213867ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13040 prompt_cache_len:5151 prompt_cache_ratio:0.3950153374233129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 -DEBUG 06-24 20:33:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:21 [batch.py:51] router release req id 8 -INFO 06-24 20:33:21 [manager.py:224] router recive req id 8 cost time 0.10868430137634277 s -INFO 06-24 20:33:21 [manager.py:68] detokenization recv req id 8 cost time 0.11056256294250488 s -DEBUG 06-24 20:33:21 [manager.py:391] Prefill Batch: batch_id=82032735784657062182763629681669643119, time:1750768401.9048376s req_ids:[8] -DEBUG 06-24 20:33:21 [manager.py:391] -ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:21 lightllm_req_id:8 first_token_cost:215.81792831420898ms total_cost_time:215.86227416992188ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13041 prompt_cache_len:5151 prompt_cache_ratio:0.3949850471589602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 -DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:22 [batch.py:51] router release req id 8 -INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.10809040069580078 s -INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.10930824279785156 s -DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=325696614994160511637001755606152937350, time:1750768402.1293988s req_ids:[8] -DEBUG 06-24 20:33:22 [manager.py:391] -ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:207.77297019958496ms total_cost_time:207.81707763671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13042 prompt_cache_len:5151 prompt_cache_ratio:0.39495476153964115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 -DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:22 [batch.py:51] router release req id 8 -INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.10861754417419434 s -INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.11064553260803223 s -DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=18671922292529703478902836156133317708, time:1750768402.343767s req_ids:[8] -DEBUG 06-24 20:33:22 [manager.py:391] -ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:392.5025463104248ms total_cost_time:392.5461769104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13043 prompt_cache_len:5151 prompt_cache_ratio:0.39492448056428736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 -DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:22 [batch.py:51] router release req id 8 -INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.10869050025939941 s -INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.11088109016418457 s -DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=267427289948298006130642032101772236891, time:1750768402.7421942s req_ids:[8] -DEBUG 06-24 20:33:22 [manager.py:391] -ERROR 06-24 20:33:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:214.4625186920166ms total_cost_time:214.5061492919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13044 prompt_cache_len:5151 prompt_cache_ratio:0.39489420423183075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 -DEBUG 06-24 20:33:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:22 [batch.py:51] router release req id 8 -INFO 06-24 20:33:22 [manager.py:224] router recive req id 8 cost time 0.107147216796875 s -INFO 06-24 20:33:22 [manager.py:68] detokenization recv req id 8 cost time 0.10919761657714844 s -DEBUG 06-24 20:33:22 [manager.py:391] Prefill Batch: batch_id=289453781527812622904324730435070155657, time:1750768402.962048s req_ids:[8] -DEBUG 06-24 20:33:22 [manager.py:391] -ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:22 lightllm_req_id:8 first_token_cost:204.32162284851074ms total_cost_time:204.38694953918457ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:13045 prompt_cache_len:5151 prompt_cache_ratio:0.3948639325412035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 -DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:23 [batch.py:51] router release req id 8 -INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.1088097095489502 s -INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.11104583740234375 s -DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=285976768689138724221873490985057687624, time:1750768403.1770976s req_ids:[8] -DEBUG 06-24 20:33:23 [manager.py:391] -INFO 06-24 20:33:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:33:23 [statics_utils.py:24] mean first cost: 230.23923012657042 ms -INFO 06-24 20:33:23 [statics_utils.py:24] mean per token cost: 0.06031699176893638 ms -ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:213.38844299316406ms total_cost_time:213.43231201171875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13046 prompt_cache_len:5151 prompt_cache_ratio:0.39483366549133836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 -DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:23 [batch.py:51] router release req id 8 -INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.10757565498352051 s -INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.1097574234008789 s -DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=146255065684401170380296216137236908087, time:1750768403.3927286s req_ids:[8] -DEBUG 06-24 20:33:23 [manager.py:391] -ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:216.53461456298828ms total_cost_time:216.57681465148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13047 prompt_cache_len:5151 prompt_cache_ratio:0.39480340308116807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 -DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:23 [batch.py:51] router release req id 8 -INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.10814976692199707 s -INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.10970854759216309 s -DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=95088164406373763704197008641919658772, time:1750768403.618071s req_ids:[8] -DEBUG 06-24 20:33:23 [manager.py:391] -ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:216.46475791931152ms total_cost_time:216.508150100708ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13048 prompt_cache_len:5151 prompt_cache_ratio:0.394773145309626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 -DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:23 [batch.py:51] router release req id 8 -INFO 06-24 20:33:23 [manager.py:224] router recive req id 8 cost time 0.10933923721313477 s -INFO 06-24 20:33:23 [manager.py:68] detokenization recv req id 8 cost time 0.1114962100982666 s -DEBUG 06-24 20:33:23 [manager.py:391] Prefill Batch: batch_id=310444057817659679001697556403128126854, time:1750768403.8406415s req_ids:[8] -DEBUG 06-24 20:33:23 [manager.py:391] -ERROR 06-24 20:33:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:216.90082550048828ms total_cost_time:216.94493293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13049 prompt_cache_len:5151 prompt_cache_ratio:0.3947428921756456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 -DEBUG 06-24 20:33:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:23 [batch.py:51] router release req id 8 -INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.10874223709106445 s -INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11074686050415039 s -DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=80160907855930335262363657481232547904, time:1750768404.0644157s req_ids:[8] -DEBUG 06-24 20:33:24 [manager.py:391] -ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:23 lightllm_req_id:8 first_token_cost:384.00864601135254ms total_cost_time:384.0758800506592ms,out_token_counter:1 mean_per_token_cost_time: 0.06723403930664062ms prompt_token_num:13050 prompt_cache_len:5151 prompt_cache_ratio:0.39471264367816095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 -DEBUG 06-24 20:33:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:24 [batch.py:51] router release req id 8 -INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.10877561569213867 s -INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11141395568847656 s -DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=178450927258888609194326570864147608221, time:1750768404.454751s req_ids:[8] -DEBUG 06-24 20:33:24 [manager.py:391] -ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 first_token_cost:215.30914306640625ms total_cost_time:215.35277366638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13051 prompt_cache_len:5151 prompt_cache_ratio:0.39468239981610603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 -DEBUG 06-24 20:33:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:24 [batch.py:51] router release req id 8 -INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.10898709297180176 s -INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11089968681335449 s -DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=262730869306433685352316019965158512678, time:1750768404.676607s req_ids:[8] -DEBUG 06-24 20:33:24 [manager.py:391] -ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 first_token_cost:216.37701988220215ms total_cost_time:216.42184257507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13052 prompt_cache_len:5151 prompt_cache_ratio:0.39465216058841557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 -DEBUG 06-24 20:33:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:24 [batch.py:51] router release req id 8 -INFO 06-24 20:33:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:24 [manager.py:224] router recive req id 8 cost time 0.11011838912963867 s -INFO 06-24 20:33:24 [manager.py:68] detokenization recv req id 8 cost time 0.11190032958984375 s -DEBUG 06-24 20:33:24 [manager.py:391] Prefill Batch: batch_id=293380961194454966484382402919463793318, time:1750768404.8990633s req_ids:[8] -DEBUG 06-24 20:33:24 [manager.py:391] -ERROR 06-24 20:33:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:24 lightllm_req_id:8 first_token_cost:217.38958358764648ms total_cost_time:217.43440628051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13053 prompt_cache_len:5151 prompt_cache_ratio:0.39462192599402435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 -DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:25 [batch.py:51] router release req id 8 -INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10863065719604492 s -INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11058640480041504 s -DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=172162000024469805260632617414626926177, time:1750768405.1250346s req_ids:[8] -DEBUG 06-24 20:33:25 [manager.py:391] -ERROR 06-24 20:33:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:219.48003768920898ms total_cost_time:219.53439712524414ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:13054 prompt_cache_len:5151 prompt_cache_ratio:0.3945916960318676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 -DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:25 [batch.py:51] router release req id 8 -INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10843682289123535 s -INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11027264595031738 s -DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=335457738988820561660540887827210159386, time:1750768405.3508813s req_ids:[8] -DEBUG 06-24 20:33:25 [manager.py:391] -ERROR 06-24 20:33:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:217.39459037780762ms total_cost_time:217.4396514892578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13055 prompt_cache_len:5151 prompt_cache_ratio:0.3945614707008809 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 -DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:25 [batch.py:51] router release req id 8 -INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10929393768310547 s -INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11033368110656738 s -DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=297157775440507761644900942844601606692, time:1750768405.5737498s req_ids:[8] -DEBUG 06-24 20:33:25 [manager.py:391] -ERROR 06-24 20:33:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:390.29717445373535ms total_cost_time:390.34175872802734ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13056 prompt_cache_len:5151 prompt_cache_ratio:0.39453125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 -DEBUG 06-24 20:33:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:25 [batch.py:51] router release req id 8 -INFO 06-24 20:33:25 [manager.py:224] router recive req id 8 cost time 0.10927820205688477 s -INFO 06-24 20:33:25 [manager.py:68] detokenization recv req id 8 cost time 0.11132001876831055 s -DEBUG 06-24 20:33:25 [manager.py:391] Prefill Batch: batch_id=204490816171788092866577784620946471430, time:1750768405.9714148s req_ids:[8] -DEBUG 06-24 20:33:25 [manager.py:391] -ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:25 lightllm_req_id:8 first_token_cost:216.9015407562256ms total_cost_time:216.94660186767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13057 prompt_cache_len:5151 prompt_cache_ratio:0.39450103392816116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 -DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:26 [batch.py:51] router release req id 8 -INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.1089162826538086 s -INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.11093854904174805 s -DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=324754593434674245585225043666751410798, time:1750768406.1946044s req_ids:[8] -DEBUG 06-24 20:33:26 [manager.py:391] -ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:211.62962913513184ms total_cost_time:211.67278289794922ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13058 prompt_cache_len:5151 prompt_cache_ratio:0.3944708224843008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 -DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:26 [batch.py:51] router release req id 8 -INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10851883888244629 s -INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.11043286323547363 s -DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=263762228062315232703049707204181391196, time:1750768406.412559s req_ids:[8] -DEBUG 06-24 20:33:26 [manager.py:391] -ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:221.35686874389648ms total_cost_time:221.40002250671387ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13059 prompt_cache_len:5151 prompt_cache_ratio:0.39444061566735583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 -DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:26 [batch.py:51] router release req id 8 -INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10802888870239258 s -INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.10996127128601074 s -DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=128074973394445665107950459448644063050, time:1750768406.6378539s req_ids:[8] -DEBUG 06-24 20:33:26 [manager.py:391] -ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:174.6046543121338ms total_cost_time:174.64828491210938ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13060 prompt_cache_len:5151 prompt_cache_ratio:0.3944104134762634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 -DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:26 [batch.py:51] router release req id 8 -INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10753583908081055 s -INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.10924720764160156 s -DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=19966390274702542212832716767348725165, time:1750768406.8176858s req_ids:[8] -DEBUG 06-24 20:33:26 [manager.py:391] -ERROR 06-24 20:33:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:172.3039150238037ms total_cost_time:172.346830368042ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13061 prompt_cache_len:5151 prompt_cache_ratio:0.394380215909961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 -DEBUG 06-24 20:33:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:26 [batch.py:51] router release req id 8 -INFO 06-24 20:33:26 [manager.py:224] router recive req id 8 cost time 0.10685062408447266 s -INFO 06-24 20:33:26 [manager.py:68] detokenization recv req id 8 cost time 0.10879898071289062 s -DEBUG 06-24 20:33:26 [manager.py:391] Prefill Batch: batch_id=192403254786260618278636066661915286743, time:1750768406.9985876s req_ids:[8] -DEBUG 06-24 20:33:26 [manager.py:391] -ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:26 lightllm_req_id:8 first_token_cost:379.9111843109131ms total_cost_time:379.955530166626ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13062 prompt_cache_len:5151 prompt_cache_ratio:0.3943500229673863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 -DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:27 [batch.py:51] router release req id 8 -INFO 06-24 20:33:27 [manager.py:224] router recive req id 8 cost time 0.1076955795288086 s -INFO 06-24 20:33:27 [manager.py:68] detokenization recv req id 8 cost time 0.10964298248291016 s -DEBUG 06-24 20:33:27 [manager.py:391] Prefill Batch: batch_id=73520651513655519230449643679431415384, time:1750768407.3874955s req_ids:[8] -DEBUG 06-24 20:33:27 [manager.py:391] -ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:215.48175811767578ms total_cost_time:215.52610397338867ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13063 prompt_cache_len:5151 prompt_cache_ratio:0.3943198346474776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 -DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:27 [batch.py:51] router release req id 8 -INFO 06-24 20:33:27 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s -INFO 06-24 20:33:27 [manager.py:68] detokenization recv req id 8 cost time 0.10960960388183594 s -DEBUG 06-24 20:33:27 [manager.py:391] Prefill Batch: batch_id=77968988360941299559263734337626291431, time:1750768407.6047072s req_ids:[8] -DEBUG 06-24 20:33:27 [manager.py:391] -ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:33:27 [stats.py:37] Avg tokens(prompt+generate) throughput: 51923.297 tokens/s -DEBUG 06-24 20:33:27 [stats.py:37] Avg prompt tokens throughput: 51915.238 tokens/s -DEBUG 06-24 20:33:27 [stats.py:37] Avg generate tokens throughput: 8.059 tokens/s -INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:207.06796646118164ms total_cost_time:207.11207389831543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13064 prompt_cache_len:5151 prompt_cache_ratio:0.3942896509491733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 -DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:27 [batch.py:51] router release req id 8 -INFO 06-24 20:33:27 [manager.py:224] router recive req id 8 cost time 0.10867571830749512 s -INFO 06-24 20:33:27 [manager.py:68] detokenization recv req id 8 cost time 0.1106867790222168 s -DEBUG 06-24 20:33:27 [manager.py:391] Prefill Batch: batch_id=47748010161756914801794253898632334892, time:1750768407.8171337s req_ids:[8] -DEBUG 06-24 20:33:27 [manager.py:391] -DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:212.18252182006836ms total_cost_time:212.22662925720215ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13065 prompt_cache_len:5151 prompt_cache_ratio:0.3942594718714122 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 -DEBUG 06-24 20:33:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:27 [batch.py:51] router release req id 8 -INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10752367973327637 s -INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.10946178436279297 s -DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=274212762438844544916661074767687728951, time:1750768408.0362065s req_ids:[8] -DEBUG 06-24 20:33:28 [manager.py:391] -ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:27 lightllm_req_id:8 first_token_cost:211.5764617919922ms total_cost_time:211.6219997406006ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13066 prompt_cache_len:5151 prompt_cache_ratio:0.39422929741313334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 -DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:28 [batch.py:51] router release req id 8 -INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10894060134887695 s -INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.11081123352050781 s -DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=91840340759639353029527694256418159864, time:1750768408.2540207s req_ids:[8] -DEBUG 06-24 20:33:28 [manager.py:391] -ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:214.26939964294434ms total_cost_time:214.31469917297363ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13067 prompt_cache_len:5151 prompt_cache_ratio:0.3941991275732762 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 -DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:28 [batch.py:51] router release req id 8 -INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10861587524414062 s -INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.11065888404846191 s -DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=4371355966082282143182423232863614713, time:1750768408.4764512s req_ids:[8] -DEBUG 06-24 20:33:28 [manager.py:391] -ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:212.23688125610352ms total_cost_time:212.2817039489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13068 prompt_cache_len:5151 prompt_cache_ratio:0.3941689623507805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 -DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:28 [batch.py:51] router release req id 8 -INFO 06-24 20:33:28 [manager.py:224] router recive req id 8 cost time 0.10888314247131348 s -INFO 06-24 20:33:28 [manager.py:68] detokenization recv req id 8 cost time 0.11091828346252441 s -DEBUG 06-24 20:33:28 [manager.py:391] Prefill Batch: batch_id=258603725035576975470511139863019540671, time:1750768408.6978288s req_ids:[8] -DEBUG 06-24 20:33:28 [manager.py:391] -ERROR 06-24 20:33:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:377.22325325012207ms total_cost_time:377.2873878479004ms,out_token_counter:1 mean_per_token_cost_time: 0.06413459777832031ms prompt_token_num:13069 prompt_cache_len:5151 prompt_cache_ratio:0.39413880174458643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 -DEBUG 06-24 20:33:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:28 [batch.py:51] router release req id 8 -INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.10883617401123047 s -INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.11073017120361328 s -DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=14909405426319352279929306654377697217, time:1750768409.0785575s req_ids:[8] -DEBUG 06-24 20:33:29 [manager.py:391] -ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:28 lightllm_req_id:8 first_token_cost:208.92000198364258ms total_cost_time:208.96482467651367ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13070 prompt_cache_len:5151 prompt_cache_ratio:0.3941086457536343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 -DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:29 [batch.py:51] router release req id 8 -INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.1086723804473877 s -INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.11072349548339844 s -DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=324858022517149890377092431931902192534, time:1750768409.2949598s req_ids:[8] -DEBUG 06-24 20:33:29 [manager.py:391] -ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:213.28020095825195ms total_cost_time:213.32478523254395ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13071 prompt_cache_len:5151 prompt_cache_ratio:0.3940784943768648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 -DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:29 [batch.py:51] router release req id 8 -INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.1094815731048584 s -INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.1114037036895752 s -DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=177732389578913794674087532208857300576, time:1750768409.5131643s req_ids:[8] -DEBUG 06-24 20:33:29 [manager.py:391] -ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:211.02190017700195ms total_cost_time:211.06743812561035ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13072 prompt_cache_len:5151 prompt_cache_ratio:0.3940483476132191 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 -DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:29 [batch.py:51] router release req id 8 -INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.10964226722717285 s -INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.11157774925231934 s -DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=247330051776901343555527741987838935255, time:1750768409.7340493s req_ids:[8] -DEBUG 06-24 20:33:29 [manager.py:391] -ERROR 06-24 20:33:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:211.96651458740234ms total_cost_time:212.01062202453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13073 prompt_cache_len:5151 prompt_cache_ratio:0.3940182054616385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 -DEBUG 06-24 20:33:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:29 [batch.py:51] router release req id 8 -INFO 06-24 20:33:29 [manager.py:224] router recive req id 8 cost time 0.10775065422058105 s -INFO 06-24 20:33:29 [manager.py:68] detokenization recv req id 8 cost time 0.10971617698669434 s -DEBUG 06-24 20:33:29 [manager.py:391] Prefill Batch: batch_id=299483850754776183861567836494340845839, time:1750768409.9497957s req_ids:[8] -DEBUG 06-24 20:33:29 [manager.py:391] -ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:29 lightllm_req_id:8 first_token_cost:214.04743194580078ms total_cost_time:214.06817436218262ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:13074 prompt_cache_len:5151 prompt_cache_ratio:0.3939880679210647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 -DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:30 [batch.py:51] router release req id 8 -INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10837054252624512 s -INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.11034870147705078 s -DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=190350854120814385586720448853787480482, time:1750768410.1723084s req_ids:[8] -DEBUG 06-24 20:33:30 [manager.py:391] -ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:378.676176071167ms total_cost_time:378.7202835083008ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13075 prompt_cache_len:5151 prompt_cache_ratio:0.3939579349904398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 -DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:30 [batch.py:51] router release req id 8 -INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10898184776306152 s -INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.1110377311706543 s -DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=208561265953640556266468329083053012801, time:1750768410.5562937s req_ids:[8] -DEBUG 06-24 20:33:30 [manager.py:391] -ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:213.93394470214844ms total_cost_time:213.97876739501953ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13076 prompt_cache_len:5151 prompt_cache_ratio:0.393927806668706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 -DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:30 [batch.py:51] router release req id 8 -INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10747337341308594 s -INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.1093130111694336 s -DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=91589200091973264243929523733434114937, time:1750768410.77666s req_ids:[8] -DEBUG 06-24 20:33:30 [manager.py:391] -ERROR 06-24 20:33:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:169.7235107421875ms total_cost_time:169.76547241210938ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13077 prompt_cache_len:5151 prompt_cache_ratio:0.39389768295480615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 -DEBUG 06-24 20:33:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:30 [batch.py:51] router release req id 8 -INFO 06-24 20:33:30 [manager.py:224] router recive req id 8 cost time 0.10803914070129395 s -INFO 06-24 20:33:30 [manager.py:68] detokenization recv req id 8 cost time 0.11003923416137695 s -DEBUG 06-24 20:33:30 [manager.py:391] Prefill Batch: batch_id=328979993173353349826985374188766239306, time:1750768410.9519284s req_ids:[8] -DEBUG 06-24 20:33:30 [manager.py:391] -ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:30 lightllm_req_id:8 first_token_cost:203.05728912353516ms total_cost_time:203.10020446777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13078 prompt_cache_len:5151 prompt_cache_ratio:0.3938675638476831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 -DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:31 [batch.py:51] router release req id 8 -INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.10774421691894531 s -INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.11031270027160645 s -DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=194001125688334336216561683604167367071, time:1750768411.1621833s req_ids:[8] -DEBUG 06-24 20:33:31 [manager.py:391] -ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:211.74216270446777ms total_cost_time:211.78507804870605ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13079 prompt_cache_len:5151 prompt_cache_ratio:0.3938374493462803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 -DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:31 [batch.py:51] router release req id 8 -INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.10913538932800293 s -INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.11115264892578125 s -DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=235061638432118752911673637810742068850, time:1750768411.3816094s req_ids:[8] -DEBUG 06-24 20:33:31 [manager.py:391] -ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:211.0297679901123ms total_cost_time:211.0748291015625ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13080 prompt_cache_len:5151 prompt_cache_ratio:0.39380733944954127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 -DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:31 [batch.py:51] router release req id 8 -INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.10811424255371094 s -INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.11015176773071289 s -DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=296197620574072192617863866301584975451, time:1750768411.5984812s req_ids:[8] -DEBUG 06-24 20:33:31 [manager.py:391] -ERROR 06-24 20:33:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:374.65858459472656ms total_cost_time:374.70316886901855ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13081 prompt_cache_len:5151 prompt_cache_ratio:0.3937772341564101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 -DEBUG 06-24 20:33:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:31 [batch.py:51] router release req id 8 -INFO 06-24 20:33:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:31 [manager.py:224] router recive req id 8 cost time 0.1077883243560791 s -INFO 06-24 20:33:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997700691223145 s -DEBUG 06-24 20:33:31 [manager.py:391] Prefill Batch: batch_id=300828300203310295866235988355376267906, time:1750768411.9791954s req_ids:[8] -DEBUG 06-24 20:33:31 [manager.py:391] -ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:31 lightllm_req_id:8 first_token_cost:213.23013305664062ms total_cost_time:213.27614784240723ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13082 prompt_cache_len:5151 prompt_cache_ratio:0.3937471334658309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 -DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:32 [batch.py:51] router release req id 8 -INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10784459114074707 s -INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.1098017692565918 s -DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=151349816138482728764406540327365648252, time:1750768412.198429s req_ids:[8] -DEBUG 06-24 20:33:32 [manager.py:391] -ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:213.63353729248047ms total_cost_time:213.67597579956055ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13083 prompt_cache_len:5151 prompt_cache_ratio:0.39371703737674846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 -DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:32 [batch.py:51] router release req id 8 -INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s -INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.11005663871765137 s -DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=110039468396583254205386416405185985016, time:1750768412.4185407s req_ids:[8] -DEBUG 06-24 20:33:32 [manager.py:391] -ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:220.35813331604004ms total_cost_time:220.40343284606934ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13084 prompt_cache_len:5151 prompt_cache_ratio:0.3936869458881076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 -DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:32 [batch.py:51] router release req id 8 -INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10770130157470703 s -INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.1098477840423584 s -DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=212898348829649094951748400105159793932, time:1750768412.6446152s req_ids:[8] -DEBUG 06-24 20:33:32 [manager.py:391] -ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:209.73968505859375ms total_cost_time:209.78331565856934ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13085 prompt_cache_len:5151 prompt_cache_ratio:0.39365685899885366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 -DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:32 [batch.py:51] router release req id 8 -INFO 06-24 20:33:32 [manager.py:224] router recive req id 8 cost time 0.10767054557800293 s -INFO 06-24 20:33:32 [manager.py:68] detokenization recv req id 8 cost time 0.10959339141845703 s -DEBUG 06-24 20:33:32 [manager.py:391] Prefill Batch: batch_id=65717240764965864713886705633775328624, time:1750768412.860603s req_ids:[8] -DEBUG 06-24 20:33:32 [manager.py:391] -ERROR 06-24 20:33:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:212.99171447753906ms total_cost_time:213.03677558898926ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13086 prompt_cache_len:5151 prompt_cache_ratio:0.39362677670793217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 -DEBUG 06-24 20:33:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:32 [batch.py:51] router release req id 8 -INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.10940051078796387 s -INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.11136960983276367 s -DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=113137000643014791351323938305164054379, time:1750768413.0806725s req_ids:[8] -DEBUG 06-24 20:33:33 [manager.py:391] -ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:32 lightllm_req_id:8 first_token_cost:389.1468048095703ms total_cost_time:389.1892433166504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13087 prompt_cache_len:5151 prompt_cache_ratio:0.393596699014289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 -DEBUG 06-24 20:33:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:33 [batch.py:51] router release req id 8 -INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.10860323905944824 s -INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.1110386848449707 s -DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=109286580887509699208716697494007363254, time:1750768413.4771266s req_ids:[8] -DEBUG 06-24 20:33:33 [manager.py:391] -ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:213.1786346435547ms total_cost_time:213.2241725921631ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13088 prompt_cache_len:5151 prompt_cache_ratio:0.3935666259168704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 -DEBUG 06-24 20:33:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:33 [batch.py:51] router release req id 8 -INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.1079409122467041 s -INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.10992312431335449 s -DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=78486502387130529742790814976590815495, time:1750768413.703067s req_ids:[8] -DEBUG 06-24 20:33:33 [manager.py:391] -ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:217.95392036437988ms total_cost_time:217.99755096435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13089 prompt_cache_len:5151 prompt_cache_ratio:0.39353655741462296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 -DEBUG 06-24 20:33:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:33 [batch.py:51] router release req id 8 -INFO 06-24 20:33:33 [manager.py:224] router recive req id 8 cost time 0.10827970504760742 s -INFO 06-24 20:33:33 [manager.py:68] detokenization recv req id 8 cost time 0.11010074615478516 s -DEBUG 06-24 20:33:33 [manager.py:391] Prefill Batch: batch_id=104689279490230866009323878877413431411, time:1750768413.9226935s req_ids:[8] -DEBUG 06-24 20:33:33 [manager.py:391] -DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:174.29471015930176ms total_cost_time:174.33667182922363ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13090 prompt_cache_len:5151 prompt_cache_ratio:0.3935064935064935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 -DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:34 [batch.py:51] router release req id 8 -INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.10940313339233398 s -INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s -DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=82212729822338202667919897891280590583, time:1750768414.103486s req_ids:[8] -DEBUG 06-24 20:33:34 [manager.py:391] -ERROR 06-24 20:33:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:33 lightllm_req_id:8 first_token_cost:215.040922164917ms total_cost_time:215.0869369506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13091 prompt_cache_len:5151 prompt_cache_ratio:0.39347643419142925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 -DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:34 [batch.py:51] router release req id 8 -INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.10810041427612305 s -INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.11008143424987793 s -DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=63306849642453696178073286178811389022, time:1750768414.329854s req_ids:[8] -DEBUG 06-24 20:33:34 [manager.py:391] -ERROR 06-24 20:33:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 first_token_cost:208.1611156463623ms total_cost_time:208.2054615020752ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13092 prompt_cache_len:5151 prompt_cache_ratio:0.3934463794683776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 -DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:34 [batch.py:51] router release req id 8 -INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.1077275276184082 s -INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.10987734794616699 s -DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=88397387735723525714735684310989757338, time:1750768414.5367687s req_ids:[8] -DEBUG 06-24 20:33:34 [manager.py:391] -ERROR 06-24 20:33:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 first_token_cost:418.3306694030762ms total_cost_time:418.37501525878906ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13093 prompt_cache_len:5151 prompt_cache_ratio:0.39341632933628656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 -DEBUG 06-24 20:33:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:34 [batch.py:51] router release req id 8 -INFO 06-24 20:33:34 [manager.py:224] router recive req id 8 cost time 0.10859537124633789 s -INFO 06-24 20:33:34 [manager.py:68] detokenization recv req id 8 cost time 0.11061310768127441 s -DEBUG 06-24 20:33:34 [manager.py:391] Prefill Batch: batch_id=250402664702351225939690722453694530414, time:1750768414.9606493s req_ids:[8] -DEBUG 06-24 20:33:34 [manager.py:391] -ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:34 lightllm_req_id:8 first_token_cost:210.87408065795898ms total_cost_time:210.91866493225098ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13094 prompt_cache_len:5151 prompt_cache_ratio:0.3933862837941042 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 -DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:35 [batch.py:51] router release req id 8 -INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10725760459899902 s -INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.10924601554870605 s -DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=217565244298140897213528248757437067249, time:1750768415.1773803s req_ids:[8] -DEBUG 06-24 20:33:35 [manager.py:391] -ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:211.3351821899414ms total_cost_time:211.378812789917ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13095 prompt_cache_len:5151 prompt_cache_ratio:0.3933562428407789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 -DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:35 [batch.py:51] router release req id 8 -INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10817217826843262 s -INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.11010169982910156 s -DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=102482833157275957222240261372878038624, time:1750768415.3973932s req_ids:[8] -DEBUG 06-24 20:33:35 [manager.py:391] -ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:210.5729579925537ms total_cost_time:210.6184959411621ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13096 prompt_cache_len:5151 prompt_cache_ratio:0.3933262064752596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 -DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:35 [batch.py:51] router release req id 8 -INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10794353485107422 s -INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.10994172096252441 s -DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=235137378855093151425808332353154521239, time:1750768415.6256328s req_ids:[8] -DEBUG 06-24 20:33:35 [manager.py:391] -ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:228.58285903930664ms total_cost_time:228.62911224365234ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13097 prompt_cache_len:5151 prompt_cache_ratio:0.39329617469649536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 -DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:35 [batch.py:51] router release req id 8 -INFO 06-24 20:33:35 [manager.py:224] router recive req id 8 cost time 0.10837078094482422 s -INFO 06-24 20:33:35 [manager.py:68] detokenization recv req id 8 cost time 0.1103823184967041 s -DEBUG 06-24 20:33:35 [manager.py:391] Prefill Batch: batch_id=339120375414442526266781403005118283997, time:1750768415.85031s req_ids:[8] -DEBUG 06-24 20:33:35 [manager.py:391] -ERROR 06-24 20:33:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:216.19057655334473ms total_cost_time:216.2344455718994ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13098 prompt_cache_len:5151 prompt_cache_ratio:0.39326614750343564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 -DEBUG 06-24 20:33:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:35 [batch.py:51] router release req id 8 -INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.10846233367919922 s -INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11051607131958008 s -DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=102293266523695156172664124600170288198, time:1750768416.0744154s req_ids:[8] -DEBUG 06-24 20:33:36 [manager.py:391] -ERROR 06-24 20:33:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:35 lightllm_req_id:8 first_token_cost:392.697811126709ms total_cost_time:392.7428722381592ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13099 prompt_cache_len:5151 prompt_cache_ratio:0.3932361248950302 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 -DEBUG 06-24 20:33:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:36 [batch.py:51] router release req id 8 -INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.1094205379486084 s -INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11142683029174805 s -DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=215805281728718706893291492953420040247, time:1750768416.4739976s req_ids:[8] -DEBUG 06-24 20:33:36 [manager.py:391] -ERROR 06-24 20:33:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 first_token_cost:221.86589241027832ms total_cost_time:221.92049026489258ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:13100 prompt_cache_len:5151 prompt_cache_ratio:0.393206106870229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 -DEBUG 06-24 20:33:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:36 [batch.py:51] router release req id 8 -INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.10935616493225098 s -INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11133670806884766 s -DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=38749398393903366112788070344315264609, time:1750768416.7021005s req_ids:[8] -DEBUG 06-24 20:33:36 [manager.py:391] -ERROR 06-24 20:33:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 first_token_cost:217.79489517211914ms total_cost_time:217.8475856781006ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:13101 prompt_cache_len:5151 prompt_cache_ratio:0.3931760934279826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 -DEBUG 06-24 20:33:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:36 [batch.py:51] router release req id 8 -INFO 06-24 20:33:36 [manager.py:224] router recive req id 8 cost time 0.10857033729553223 s -INFO 06-24 20:33:36 [manager.py:68] detokenization recv req id 8 cost time 0.11067843437194824 s -DEBUG 06-24 20:33:36 [manager.py:391] Prefill Batch: batch_id=246213383002050436690867801226480518367, time:1750768416.9258118s req_ids:[8] -DEBUG 06-24 20:33:36 [manager.py:391] -ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:36 lightllm_req_id:8 first_token_cost:216.71152114868164ms total_cost_time:216.76182746887207ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:13102 prompt_cache_len:5151 prompt_cache_ratio:0.39314608456724165 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 -DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:37 [batch.py:51] router release req id 8 -INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.1082758903503418 s -INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.1102137565612793 s -DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=220151998242717692610290878725419624053, time:1750768417.1489882s req_ids:[8] -DEBUG 06-24 20:33:37 [manager.py:391] -ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:208.60791206359863ms total_cost_time:208.65273475646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13103 prompt_cache_len:5151 prompt_cache_ratio:0.39311608028695716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 -DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:37 [batch.py:51] router release req id 8 -INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.10764813423156738 s -INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.10953569412231445 s -DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=126603676067162142246248605668311683888, time:1750768417.3617554s req_ids:[8] -DEBUG 06-24 20:33:37 [manager.py:391] -ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:211.86184883117676ms total_cost_time:211.90524101257324ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13104 prompt_cache_len:5151 prompt_cache_ratio:0.3930860805860806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 -DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:37 [batch.py:51] router release req id 8 -INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.10855340957641602 s -INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.11056303977966309 s -DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=2348258786407126252649409273992505060, time:1750768417.582074s req_ids:[8] -DEBUG 06-24 20:33:37 [manager.py:391] -ERROR 06-24 20:33:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:33:37 [stats.py:37] Avg tokens(prompt+generate) throughput: 52844.647 tokens/s -DEBUG 06-24 20:33:37 [stats.py:37] Avg prompt tokens throughput: 52836.571 tokens/s -DEBUG 06-24 20:33:37 [stats.py:37] Avg generate tokens throughput: 8.076 tokens/s -INFO 06-24 20:33:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:384.33837890625ms total_cost_time:384.380578994751ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13105 prompt_cache_len:5151 prompt_cache_ratio:0.3930560854635635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 -DEBUG 06-24 20:33:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:37 [batch.py:51] router release req id 8 -INFO 06-24 20:33:37 [manager.py:224] router recive req id 8 cost time 0.10895395278930664 s -INFO 06-24 20:33:37 [manager.py:68] detokenization recv req id 8 cost time 0.11098909378051758 s -DEBUG 06-24 20:33:37 [manager.py:391] Prefill Batch: batch_id=106287907465496946298735515512885366256, time:1750768417.9713328s req_ids:[8] -DEBUG 06-24 20:33:37 [manager.py:391] -ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:37 lightllm_req_id:8 first_token_cost:214.85495567321777ms total_cost_time:214.89620208740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13106 prompt_cache_len:5151 prompt_cache_ratio:0.393026094918358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 -DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:38 [batch.py:51] router release req id 8 -INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10806822776794434 s -INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.11009049415588379 s -DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=133715575939016155464983853188350111674, time:1750768418.2034302s req_ids:[8] -DEBUG 06-24 20:33:38 [manager.py:391] -ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:223.68454933166504ms total_cost_time:223.72865676879883ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13107 prompt_cache_len:5151 prompt_cache_ratio:0.39299610894941633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 -DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:38 [batch.py:51] router release req id 8 -INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10760378837585449 s -INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.1094980239868164 s -DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=2942388643164960510834460932907944335, time:1750768418.4225442s req_ids:[8] -DEBUG 06-24 20:33:38 [manager.py:391] -ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:212.06188201904297ms total_cost_time:212.10765838623047ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13108 prompt_cache_len:5151 prompt_cache_ratio:0.39296612755569116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 -DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:38 [batch.py:51] router release req id 8 -INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10879206657409668 s -INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.11085700988769531 s -DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=280073357710133315245537640815566938501, time:1750768418.6402066s req_ids:[8] -DEBUG 06-24 20:33:38 [manager.py:391] -ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:215.22808074951172ms total_cost_time:215.28148651123047ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:13109 prompt_cache_len:5151 prompt_cache_ratio:0.39293615073613547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 -DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:38 [batch.py:51] router release req id 8 -INFO 06-24 20:33:38 [manager.py:224] router recive req id 8 cost time 0.10764575004577637 s -INFO 06-24 20:33:38 [manager.py:68] detokenization recv req id 8 cost time 0.10968923568725586 s -DEBUG 06-24 20:33:38 [manager.py:391] Prefill Batch: batch_id=129961371539636677502202494753988069393, time:1750768418.861381s req_ids:[8] -DEBUG 06-24 20:33:38 [manager.py:391] -ERROR 06-24 20:33:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:209.3198299407959ms total_cost_time:209.3653678894043ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13110 prompt_cache_len:5151 prompt_cache_ratio:0.3929061784897025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 -DEBUG 06-24 20:33:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:38 [batch.py:51] router release req id 8 -INFO 06-24 20:33:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.10762572288513184 s -INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.10957932472229004 s -DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=18225266863192871281478135166024760692, time:1750768419.0801625s req_ids:[8] -DEBUG 06-24 20:33:39 [manager.py:391] -ERROR 06-24 20:33:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:38 lightllm_req_id:8 first_token_cost:393.3110237121582ms total_cost_time:393.355131149292ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13111 prompt_cache_len:5151 prompt_cache_ratio:0.3928762108153459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 -DEBUG 06-24 20:33:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:39 [batch.py:51] router release req id 8 -INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.10829353332519531 s -INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.11024022102355957 s -DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=47614557519914516895111008322320324772, time:1750768419.4811232s req_ids:[8] -DEBUG 06-24 20:33:39 [manager.py:391] -ERROR 06-24 20:33:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 first_token_cost:218.5828685760498ms total_cost_time:218.6267375946045ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13112 prompt_cache_len:5151 prompt_cache_ratio:0.39284624771201954 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 -DEBUG 06-24 20:33:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:39 [batch.py:51] router release req id 8 -INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.10648703575134277 s -INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.1084439754486084 s -DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=241964492075794988128180418192240577742, time:1750768419.703423s req_ids:[8] -DEBUG 06-24 20:33:39 [manager.py:391] -ERROR 06-24 20:33:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 first_token_cost:211.88020706176758ms total_cost_time:211.92526817321777ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13113 prompt_cache_len:5151 prompt_cache_ratio:0.39281628917867767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 -DEBUG 06-24 20:33:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:39 [batch.py:51] router release req id 8 -INFO 06-24 20:33:39 [manager.py:224] router recive req id 8 cost time 0.11024045944213867 s -INFO 06-24 20:33:39 [manager.py:68] detokenization recv req id 8 cost time 0.11221480369567871 s -DEBUG 06-24 20:33:39 [manager.py:391] Prefill Batch: batch_id=233876506240120997342439958071577531130, time:1750768419.9243166s req_ids:[8] -DEBUG 06-24 20:33:39 [manager.py:391] -ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:39 lightllm_req_id:8 first_token_cost:213.3033275604248ms total_cost_time:213.38510513305664ms,out_token_counter:1 mean_per_token_cost_time: 0.08177757263183594ms prompt_token_num:13114 prompt_cache_len:5151 prompt_cache_ratio:0.3927863352142748 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 -DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:40 [batch.py:51] router release req id 8 -INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.10881781578063965 s -INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.1107022762298584 s -DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=269065007067966508838694076037293930478, time:1750768420.1432278s req_ids:[8] -DEBUG 06-24 20:33:40 [manager.py:391] -DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:40 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:214.1869068145752ms total_cost_time:214.22958374023438ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13115 prompt_cache_len:5151 prompt_cache_ratio:0.3927563858177659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 -DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:40 [batch.py:51] router release req id 8 -INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.10828018188476562 s -INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.11020112037658691 s -DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=149331490007267979553380653723154887190, time:1750768420.3625698s req_ids:[8] -DEBUG 06-24 20:33:40 [manager.py:391] -ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:210.58988571166992ms total_cost_time:210.63661575317383ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13116 prompt_cache_len:5151 prompt_cache_ratio:0.3927264409881061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 -DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:40 [batch.py:51] router release req id 8 -INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.1081228256225586 s -INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s -DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=124022233531927309325753477405988086860, time:1750768420.5799353s req_ids:[8] -DEBUG 06-24 20:33:40 [manager.py:391] -ERROR 06-24 20:33:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:389.0419006347656ms total_cost_time:389.0864849090576ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13117 prompt_cache_len:5151 prompt_cache_ratio:0.392696500724251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 -DEBUG 06-24 20:33:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:40 [batch.py:51] router release req id 8 -INFO 06-24 20:33:40 [manager.py:224] router recive req id 8 cost time 0.10799551010131836 s -INFO 06-24 20:33:40 [manager.py:68] detokenization recv req id 8 cost time 0.10993766784667969 s -DEBUG 06-24 20:33:40 [manager.py:391] Prefill Batch: batch_id=11895382066407503353753506483218357222, time:1750768420.977581s req_ids:[8] -DEBUG 06-24 20:33:40 [manager.py:391] -ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:40 lightllm_req_id:8 first_token_cost:213.64617347717285ms total_cost_time:213.69147300720215ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13118 prompt_cache_len:5151 prompt_cache_ratio:0.3926665650251563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 -DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:41 [batch.py:51] router release req id 8 -INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10913538932800293 s -INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.11107897758483887 s -DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=71959944843333010626824696801174723601, time:1750768421.1964347s req_ids:[8] -DEBUG 06-24 20:33:41 [manager.py:391] -ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:213.3793830871582ms total_cost_time:213.4237289428711ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13119 prompt_cache_len:5151 prompt_cache_ratio:0.3926366338897782 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 -DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:41 [batch.py:51] router release req id 8 -INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10889363288879395 s -INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.11086249351501465 s -DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=322437738929331059494944927196827846527, time:1750768421.4147146s req_ids:[8] -DEBUG 06-24 20:33:41 [manager.py:391] -ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:213.9723300933838ms total_cost_time:214.01619911193848ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13120 prompt_cache_len:5151 prompt_cache_ratio:0.39260670731707314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 -DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:41 [batch.py:51] router release req id 8 -INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10912251472473145 s -INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.1110992431640625 s -DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=249250700262609361583704173320654210803, time:1750768421.6370847s req_ids:[8] -DEBUG 06-24 20:33:41 [manager.py:391] -ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:217.5142765045166ms total_cost_time:217.55695343017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13121 prompt_cache_len:5151 prompt_cache_ratio:0.39257678530599804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 -DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:41 [batch.py:51] router release req id 8 -INFO 06-24 20:33:41 [manager.py:224] router recive req id 8 cost time 0.10900449752807617 s -INFO 06-24 20:33:41 [manager.py:68] detokenization recv req id 8 cost time 0.11111021041870117 s -DEBUG 06-24 20:33:41 [manager.py:391] Prefill Batch: batch_id=44539688260308992956144120118911558673, time:1750768421.8614998s req_ids:[8] -DEBUG 06-24 20:33:41 [manager.py:391] -ERROR 06-24 20:33:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:213.820219039917ms total_cost_time:213.86384963989258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13122 prompt_cache_len:5151 prompt_cache_ratio:0.39254686785550985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 -DEBUG 06-24 20:33:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:41 [batch.py:51] router release req id 8 -INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10813665390014648 s -INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.11005973815917969 s -DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=74337347463814420679056434431801200866, time:1750768422.0833743s req_ids:[8] -DEBUG 06-24 20:33:42 [manager.py:391] -ERROR 06-24 20:33:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:41 lightllm_req_id:8 first_token_cost:391.7698860168457ms total_cost_time:391.8137550354004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13123 prompt_cache_len:5151 prompt_cache_ratio:0.392516954964566 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 -DEBUG 06-24 20:33:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:42 [batch.py:51] router release req id 8 -INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10879731178283691 s -INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.1107170581817627 s -DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=215611857696704533440411233849793136809, time:1750768422.4788332s req_ids:[8] -DEBUG 06-24 20:33:42 [manager.py:391] -ERROR 06-24 20:33:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 first_token_cost:214.24508094787598ms total_cost_time:214.29204940795898ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13124 prompt_cache_len:5151 prompt_cache_ratio:0.39248704663212436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 -DEBUG 06-24 20:33:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:42 [batch.py:51] router release req id 8 -INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10798311233520508 s -INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.11002635955810547 s -DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=219856797294241191614183202647101047984, time:1750768422.7087345s req_ids:[8] -DEBUG 06-24 20:33:42 [manager.py:391] -ERROR 06-24 20:33:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 first_token_cost:226.0420322418213ms total_cost_time:226.09400749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:13125 prompt_cache_len:5151 prompt_cache_ratio:0.39245714285714284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 -DEBUG 06-24 20:33:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:42 [batch.py:51] router release req id 8 -INFO 06-24 20:33:42 [manager.py:224] router recive req id 8 cost time 0.10776710510253906 s -INFO 06-24 20:33:42 [manager.py:68] detokenization recv req id 8 cost time 0.10970044136047363 s -DEBUG 06-24 20:33:42 [manager.py:391] Prefill Batch: batch_id=26346819006962849345193834787108461012, time:1750768422.9301438s req_ids:[8] -DEBUG 06-24 20:33:42 [manager.py:391] -ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:42 lightllm_req_id:8 first_token_cost:211.2278938293457ms total_cost_time:211.25030517578125ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:13126 prompt_cache_len:5151 prompt_cache_ratio:0.3924272436385799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 -DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:43 [batch.py:51] router release req id 8 -INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10894560813903809 s -INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.11091041564941406 s -DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=67189245249141275456612893374497925805, time:1750768423.1483104s req_ids:[8] -DEBUG 06-24 20:33:43 [manager.py:391] -ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:213.41490745544434ms total_cost_time:213.45949172973633ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13127 prompt_cache_len:5151 prompt_cache_ratio:0.39239734897539424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 -DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:43 [batch.py:51] router release req id 8 -INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10944318771362305 s -INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.11142873764038086 s -DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=236680426705069418274609115393605560733, time:1750768423.3668118s req_ids:[8] -DEBUG 06-24 20:33:43 [manager.py:391] -ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:210.86454391479492ms total_cost_time:210.90936660766602ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13128 prompt_cache_len:5151 prompt_cache_ratio:0.3923674588665448 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 -DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:43 [batch.py:51] router release req id 8 -INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10833144187927246 s -INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.11022210121154785 s -DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=143835750250091869931839308713673637258, time:1750768423.5842242s req_ids:[8] -DEBUG 06-24 20:33:43 [manager.py:391] -ERROR 06-24 20:33:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:387.21561431884766ms total_cost_time:387.26019859313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13129 prompt_cache_len:5151 prompt_cache_ratio:0.39233757331099095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 -DEBUG 06-24 20:33:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:43 [batch.py:51] router release req id 8 -INFO 06-24 20:33:43 [manager.py:224] router recive req id 8 cost time 0.10796785354614258 s -INFO 06-24 20:33:43 [manager.py:68] detokenization recv req id 8 cost time 0.1098012924194336 s -DEBUG 06-24 20:33:43 [manager.py:391] Prefill Batch: batch_id=109727877508427070285813245360448956709, time:1750768423.9771383s req_ids:[8] -DEBUG 06-24 20:33:43 [manager.py:391] -ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:43 lightllm_req_id:8 first_token_cost:207.5178623199463ms total_cost_time:207.56101608276367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13130 prompt_cache_len:5151 prompt_cache_ratio:0.3923076923076923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 -DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:44 [batch.py:51] router release req id 8 -INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.10912346839904785 s -INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.11168265342712402 s -DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=257425908104607092469222988637047575322, time:1750768424.1915114s req_ids:[8] -DEBUG 06-24 20:33:44 [manager.py:391] -ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:209.30981636047363ms total_cost_time:209.3648910522461ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:13131 prompt_cache_len:5151 prompt_cache_ratio:0.3922778158556089 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 -DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:44 [batch.py:51] router release req id 8 -INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.10640454292297363 s -INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.1082463264465332 s -DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=189436362451676668877945735613098318130, time:1750768424.406522s req_ids:[8] -DEBUG 06-24 20:33:44 [manager.py:391] -ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:209.72681045532227ms total_cost_time:209.77354049682617ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13132 prompt_cache_len:5151 prompt_cache_ratio:0.3922479439537009 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 -DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:44 [batch.py:51] router release req id 8 -INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.10862493515014648 s -INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.11049652099609375 s -DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=50671273354646361973317766641972832274, time:1750768424.6221s req_ids:[8] -DEBUG 06-24 20:33:44 [manager.py:391] -ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:216.62616729736328ms total_cost_time:216.67146682739258ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13133 prompt_cache_len:5151 prompt_cache_ratio:0.39221807660092894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 -DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:44 [batch.py:51] router release req id 8 -INFO 06-24 20:33:44 [manager.py:224] router recive req id 8 cost time 0.1077570915222168 s -INFO 06-24 20:33:44 [manager.py:68] detokenization recv req id 8 cost time 0.10969018936157227 s -DEBUG 06-24 20:33:44 [manager.py:391] Prefill Batch: batch_id=191823213820807892383117446565027326626, time:1750768424.8453646s req_ids:[8] -DEBUG 06-24 20:33:44 [manager.py:391] -ERROR 06-24 20:33:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:212.5685214996338ms total_cost_time:212.61334419250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13134 prompt_cache_len:5151 prompt_cache_ratio:0.392188213796254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 -DEBUG 06-24 20:33:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:44 [batch.py:51] router release req id 8 -INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.10875558853149414 s -INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.11071085929870605 s -DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=144367958310322236713834943607594626128, time:1750768425.0632458s req_ids:[8] -DEBUG 06-24 20:33:45 [manager.py:391] -ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:44 lightllm_req_id:8 first_token_cost:388.60249519348145ms total_cost_time:388.64898681640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13135 prompt_cache_len:5151 prompt_cache_ratio:0.3921583555386372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 -DEBUG 06-24 20:33:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:45 [batch.py:51] router release req id 8 -INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.1091303825378418 s -INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.11130261421203613 s -DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=227137784769106517502630691833657814471, time:1750768425.4593356s req_ids:[8] -DEBUG 06-24 20:33:45 [manager.py:391] -ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 first_token_cost:211.56716346740723ms total_cost_time:211.6219997406006ms,out_token_counter:1 mean_per_token_cost_time: 0.054836273193359375ms prompt_token_num:13136 prompt_cache_len:5151 prompt_cache_ratio:0.3921285018270402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 -DEBUG 06-24 20:33:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:45 [batch.py:51] router release req id 8 -INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.10862493515014648 s -INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.1098639965057373 s -DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=184659820770372805105040242201823080716, time:1750768425.6771235s req_ids:[8] -DEBUG 06-24 20:33:45 [manager.py:391] -ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 first_token_cost:211.30108833312988ms total_cost_time:211.34686470031738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13137 prompt_cache_len:5151 prompt_cache_ratio:0.39209865266042476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 -DEBUG 06-24 20:33:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:45 [batch.py:51] router release req id 8 -INFO 06-24 20:33:45 [manager.py:224] router recive req id 8 cost time 0.1074066162109375 s -INFO 06-24 20:33:45 [manager.py:68] detokenization recv req id 8 cost time 0.1092691421508789 s -DEBUG 06-24 20:33:45 [manager.py:391] Prefill Batch: batch_id=108512955133402766333711173749043693003, time:1750768425.9038515s req_ids:[8] -DEBUG 06-24 20:33:45 [manager.py:391] -ERROR 06-24 20:33:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:45 lightllm_req_id:8 first_token_cost:215.0290012359619ms total_cost_time:215.0719165802002ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13138 prompt_cache_len:5151 prompt_cache_ratio:0.39206880803775307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 -DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:46 [batch.py:51] router release req id 8 -INFO 06-24 20:33:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.10793161392211914 s -INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.11023569107055664 s -DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=189364422543677207174063727502278588726, time:1750768426.11561s req_ids:[8] -DEBUG 06-24 20:33:46 [manager.py:391] -ERROR 06-24 20:33:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:210.29019355773926ms total_cost_time:210.33406257629395ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13139 prompt_cache_len:5151 prompt_cache_ratio:0.39203896795798765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 -DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:46 [batch.py:51] router release req id 8 -INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.1081838607788086 s -INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.11005067825317383 s -DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=291036873403600536592124302130319640209, time:1750768426.334734s req_ids:[8] -DEBUG 06-24 20:33:46 [manager.py:391] -DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:209.5813751220703ms total_cost_time:209.60450172424316ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:13140 prompt_cache_len:5151 prompt_cache_ratio:0.39200913242009133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 -DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:46 [batch.py:51] router release req id 8 -INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.10764837265014648 s -INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.10956692695617676 s -DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=248235062703427400615717592393434904045, time:1750768426.551832s req_ids:[8] -DEBUG 06-24 20:33:46 [manager.py:391] -ERROR 06-24 20:33:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:215.67630767822266ms total_cost_time:215.72065353393555ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13141 prompt_cache_len:5151 prompt_cache_ratio:0.39197930142302717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 -DEBUG 06-24 20:33:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:46 [batch.py:51] router release req id 8 -INFO 06-24 20:33:46 [manager.py:224] router recive req id 8 cost time 0.30991625785827637 s -INFO 06-24 20:33:46 [manager.py:68] detokenization recv req id 8 cost time 0.31203675270080566 s -DEBUG 06-24 20:33:46 [manager.py:391] Prefill Batch: batch_id=323267255850530690238457700372727541278, time:1750768426.9807255s req_ids:[8] -DEBUG 06-24 20:33:46 [manager.py:391] -ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:46 lightllm_req_id:8 first_token_cost:429.6104907989502ms total_cost_time:429.6560287475586ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13142 prompt_cache_len:5151 prompt_cache_ratio:0.39194947496575866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 -INFO 06-24 20:33:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:47 [batch.py:51] router release req id 8 -INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.10895371437072754 s -INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11103940010070801 s -DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=327626794702331120311314310022341603881, time:1750768427.213491s req_ids:[8] -DEBUG 06-24 20:33:47 [manager.py:391] -ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:220.11518478393555ms total_cost_time:220.17836570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:13143 prompt_cache_len:5151 prompt_cache_ratio:0.39191965304724946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 -DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:47 [batch.py:51] router release req id 8 -INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.10791969299316406 s -INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11004877090454102 s -DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=215188344995670863875619394361210161345, time:1750768427.4352796s req_ids:[8] -DEBUG 06-24 20:33:47 [manager.py:391] -ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:214.42580223083496ms total_cost_time:214.48707580566406ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:13144 prompt_cache_len:5151 prompt_cache_ratio:0.3918898356664638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 -DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:47 [batch.py:51] router release req id 8 -INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.10889172554016113 s -INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11067366600036621 s -DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=37653356391073104813257104893874062915, time:1750768427.6560447s req_ids:[8] -DEBUG 06-24 20:33:47 [manager.py:391] -ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:215.24643898010254ms total_cost_time:215.29102325439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13145 prompt_cache_len:5151 prompt_cache_ratio:0.3918600228223659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 -DEBUG 06-24 20:33:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:47 [batch.py:51] router release req id 8 -INFO 06-24 20:33:47 [manager.py:224] router recive req id 8 cost time 0.1082465648651123 s -INFO 06-24 20:33:47 [manager.py:68] detokenization recv req id 8 cost time 0.11012649536132812 s -DEBUG 06-24 20:33:47 [manager.py:391] Prefill Batch: batch_id=216850800538592376932532414970587151077, time:1750768427.887058s req_ids:[8] -DEBUG 06-24 20:33:47 [manager.py:391] -DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:47 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:33:47 [stats.py:37] Avg tokens(prompt+generate) throughput: 53620.075 tokens/s -DEBUG 06-24 20:33:47 [stats.py:37] Avg prompt tokens throughput: 53612.006 tokens/s -DEBUG 06-24 20:33:47 [stats.py:37] Avg generate tokens throughput: 8.069 tokens/s -ERROR 06-24 20:33:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:226.46522521972656ms total_cost_time:226.4845371246338ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13146 prompt_cache_len:5151 prompt_cache_ratio:0.3918302145139206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 -DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:48 [batch.py:51] router release req id 8 -INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.10869717597961426 s -INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.10994672775268555 s -DEBUG 06-24 20:33:48 [manager.py:391] Prefill Batch: batch_id=162447434756048611056141119332207634510, time:1750768428.1100569s req_ids:[8] -DEBUG 06-24 20:33:48 [manager.py:391] -ERROR 06-24 20:33:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:47 lightllm_req_id:8 first_token_cost:212.87965774536133ms total_cost_time:212.92519569396973ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13147 prompt_cache_len:5151 prompt_cache_ratio:0.3918004107400928 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 -DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:48 [batch.py:51] router release req id 8 -INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.3109896183013916 s -INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.31290268898010254 s -DEBUG 06-24 20:33:48 [manager.py:391] Prefill Batch: batch_id=145673279303995485037262736852125956825, time:1750768428.555787s req_ids:[8] -DEBUG 06-24 20:33:48 [manager.py:391] -ERROR 06-24 20:33:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 first_token_cost:447.85165786743164ms total_cost_time:447.894811630249ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13148 prompt_cache_len:5151 prompt_cache_ratio:0.3917706114998479 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 -DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:48 [batch.py:51] router release req id 8 -INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.10823225975036621 s -INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.10957193374633789 s -DEBUG 06-24 20:33:48 [manager.py:391] Prefill Batch: batch_id=156959208281760943745266472530128954709, time:1750768428.7828045s req_ids:[8] -DEBUG 06-24 20:33:48 [manager.py:391] -ERROR 06-24 20:33:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 first_token_cost:213.98615837097168ms total_cost_time:214.02859687805176ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13149 prompt_cache_len:5151 prompt_cache_ratio:0.3917408167921515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 -DEBUG 06-24 20:33:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:48 [batch.py:51] router release req id 8 -INFO 06-24 20:33:48 [manager.py:224] router recive req id 8 cost time 0.10791349411010742 s -INFO 06-24 20:33:48 [manager.py:68] detokenization recv req id 8 cost time 0.11046266555786133 s -DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=4830914276340246469128561212195770224, time:1750768429.0026865s req_ids:[8] -DEBUG 06-24 20:33:49 [manager.py:391] -ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:48 lightllm_req_id:8 first_token_cost:211.5764617919922ms total_cost_time:211.66706085205078ms,out_token_counter:1 mean_per_token_cost_time: 0.09059906005859375ms prompt_token_num:13150 prompt_cache_len:5151 prompt_cache_ratio:0.39171102661596957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 -DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:49 [batch.py:51] router release req id 8 -INFO 06-24 20:33:49 [manager.py:224] router recive req id 8 cost time 0.10633349418640137 s -INFO 06-24 20:33:49 [manager.py:68] detokenization recv req id 8 cost time 0.10843229293823242 s -DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=246970390320979636042715647586880820197, time:1750768429.2208374s req_ids:[8] -DEBUG 06-24 20:33:49 [manager.py:391] -ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:216.65191650390625ms total_cost_time:216.69650077819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13151 prompt_cache_len:5151 prompt_cache_ratio:0.3916812409702684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 -DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:49 [batch.py:51] router release req id 8 -INFO 06-24 20:33:49 [manager.py:224] router recive req id 8 cost time 0.10883235931396484 s -INFO 06-24 20:33:49 [manager.py:68] detokenization recv req id 8 cost time 0.11066174507141113 s -DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=63964791975183540221669979967688067887, time:1750768429.4426017s req_ids:[8] -DEBUG 06-24 20:33:49 [manager.py:391] -DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:208.01448822021484ms total_cost_time:208.0678939819336ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:13152 prompt_cache_len:5151 prompt_cache_ratio:0.3916514598540146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 -DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:49 [batch.py:51] router release req id 8 -INFO 06-24 20:33:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:49 [manager.py:224] router recive req id 8 cost time 0.10859537124633789 s -INFO 06-24 20:33:49 [manager.py:68] detokenization recv req id 8 cost time 0.11060643196105957 s -DEBUG 06-24 20:33:49 [manager.py:391] Prefill Batch: batch_id=252334872313281557390114054154606555162, time:1750768429.6619606s req_ids:[8] -DEBUG 06-24 20:33:49 [manager.py:391] -ERROR 06-24 20:33:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:212.8884792327881ms total_cost_time:212.9347324371338ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13153 prompt_cache_len:5151 prompt_cache_ratio:0.391621683266175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 -DEBUG 06-24 20:33:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:49 [batch.py:51] router release req id 8 -INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.3102736473083496 s -INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.3115420341491699 s -DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=333463761376951333227654609887741986073, time:1750768430.0866444s req_ids:[8] -DEBUG 06-24 20:33:50 [manager.py:391] -ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:49 lightllm_req_id:8 first_token_cost:428.8172721862793ms total_cost_time:428.8625717163086ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13154 prompt_cache_len:5151 prompt_cache_ratio:0.3915919112057169 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 -DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:50 [batch.py:51] router release req id 8 -INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10837650299072266 s -INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.1098945140838623 s -DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=78554040213818175861095603340165724099, time:1750768430.311668s req_ids:[8] -DEBUG 06-24 20:33:50 [manager.py:391] -ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:215.35730361938477ms total_cost_time:215.40260314941406ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13155 prompt_cache_len:5151 prompt_cache_ratio:0.39156214367160774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 -DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:50 [batch.py:51] router release req id 8 -INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10838174819946289 s -INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.11032223701477051 s -DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=304116427796450187728293106887514233530, time:1750768430.530841s req_ids:[8] -DEBUG 06-24 20:33:50 [manager.py:391] -ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:206.26378059387207ms total_cost_time:206.31051063537598ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13156 prompt_cache_len:5151 prompt_cache_ratio:0.3915323806628154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 -DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:50 [batch.py:51] router release req id 8 -INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10890460014343262 s -INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.11081171035766602 s -DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=187756592006130963605720131132015763847, time:1750768430.747106s req_ids:[8] -DEBUG 06-24 20:33:50 [manager.py:391] -ERROR 06-24 20:33:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:208.99653434753418ms total_cost_time:209.04040336608887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13157 prompt_cache_len:5151 prompt_cache_ratio:0.3915026221783081 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 -DEBUG 06-24 20:33:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:50 [batch.py:51] router release req id 8 -INFO 06-24 20:33:50 [manager.py:224] router recive req id 8 cost time 0.10941481590270996 s -INFO 06-24 20:33:50 [manager.py:68] detokenization recv req id 8 cost time 0.1105659008026123 s -DEBUG 06-24 20:33:50 [manager.py:391] Prefill Batch: batch_id=166132745062119871562775991070301520271, time:1750768430.9627264s req_ids:[8] -DEBUG 06-24 20:33:50 [manager.py:391] -DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:50 lightllm_req_id:8 first_token_cost:212.65935897827148ms total_cost_time:212.70442008972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13158 prompt_cache_len:5151 prompt_cache_ratio:0.39147286821705424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 -DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:51 [batch.py:51] router release req id 8 -INFO 06-24 20:33:51 [manager.py:224] router recive req id 8 cost time 0.1079254150390625 s -INFO 06-24 20:33:51 [manager.py:68] detokenization recv req id 8 cost time 0.10987186431884766 s -DEBUG 06-24 20:33:51 [manager.py:391] Prefill Batch: batch_id=27450959189400297889441126868468551917, time:1750768431.1807299s req_ids:[8] -DEBUG 06-24 20:33:51 [manager.py:391] -ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:212.49842643737793ms total_cost_time:212.54205703735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13159 prompt_cache_len:5151 prompt_cache_ratio:0.3914431187780226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 -DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:51 [batch.py:51] router release req id 8 -INFO 06-24 20:33:51 [manager.py:224] router recive req id 8 cost time 0.3110380172729492 s -INFO 06-24 20:33:51 [manager.py:68] detokenization recv req id 8 cost time 0.31278109550476074 s -DEBUG 06-24 20:33:51 [manager.py:391] Prefill Batch: batch_id=312288499414900592727716544806364054101, time:1750768431.616931s req_ids:[8] -DEBUG 06-24 20:33:51 [manager.py:391] -ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:440.08421897888184ms total_cost_time:440.1280879974365ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13160 prompt_cache_len:5151 prompt_cache_ratio:0.3914133738601824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 -DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:51 [batch.py:51] router release req id 8 -INFO 06-24 20:33:51 [manager.py:224] router recive req id 8 cost time 0.10962677001953125 s -INFO 06-24 20:33:51 [manager.py:68] detokenization recv req id 8 cost time 0.11153125762939453 s -DEBUG 06-24 20:33:51 [manager.py:391] Prefill Batch: batch_id=248303836053384664579844288582413782517, time:1750768431.846237s req_ids:[8] -DEBUG 06-24 20:33:51 [manager.py:391] -ERROR 06-24 20:33:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:211.81035041809082ms total_cost_time:211.8399143218994ms,out_token_counter:1 mean_per_token_cost_time: 0.02956390380859375ms prompt_token_num:13161 prompt_cache_len:5151 prompt_cache_ratio:0.3913836334625028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 -DEBUG 06-24 20:33:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:51 [batch.py:51] router release req id 8 -INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.10804629325866699 s -INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.10910272598266602 s -DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=152187104299542735941176055708392711906, time:1750768432.0657094s req_ids:[8] -DEBUG 06-24 20:33:52 [manager.py:391] -ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:51 lightllm_req_id:8 first_token_cost:214.1108512878418ms total_cost_time:214.18261528015137ms,out_token_counter:1 mean_per_token_cost_time: 0.07176399230957031ms prompt_token_num:13162 prompt_cache_len:5151 prompt_cache_ratio:0.3913538975839538 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 -DEBUG 06-24 20:33:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:52 [batch.py:51] router release req id 8 -INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.10903573036193848 s -INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.11102533340454102 s -DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=106151674992362422390779297338203392687, time:1750768432.2852895s req_ids:[8] -DEBUG 06-24 20:33:52 [manager.py:391] -ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:208.18662643432617ms total_cost_time:208.23287963867188ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13163 prompt_cache_len:5151 prompt_cache_ratio:0.3913241662235053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 -DEBUG 06-24 20:33:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:52 [batch.py:51] router release req id 8 -INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.10899734497070312 s -INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.1110086441040039 s -DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=98143130093823317670466518579096732868, time:1750768432.5020652s req_ids:[8] -DEBUG 06-24 20:33:52 [manager.py:391] -DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:209.61785316467285ms total_cost_time:209.6400260925293ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:13164 prompt_cache_len:5151 prompt_cache_ratio:0.39129443938012765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 -DEBUG 06-24 20:33:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:52 [batch.py:51] router release req id 8 -INFO 06-24 20:33:52 [manager.py:224] router recive req id 8 cost time 0.1062767505645752 s -INFO 06-24 20:33:52 [manager.py:68] detokenization recv req id 8 cost time 0.10819888114929199 s -DEBUG 06-24 20:33:52 [manager.py:391] Prefill Batch: batch_id=133363582994201654239667870520594515129, time:1750768432.724929s req_ids:[8] -DEBUG 06-24 20:33:52 [manager.py:391] -ERROR 06-24 20:33:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:224.16257858276367ms total_cost_time:224.20787811279297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13165 prompt_cache_len:5151 prompt_cache_ratio:0.3912647170527915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 -DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:53 [batch.py:51] router release req id 8 -INFO 06-24 20:33:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.3104238510131836 s -INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.3118631839752197 s -DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=181217785764845566239292751026674827305, time:1750768433.162158s req_ids:[8] -DEBUG 06-24 20:33:53 [manager.py:391] -INFO 06-24 20:33:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:33:53 [statics_utils.py:24] mean first cost: 230.39619293099616 ms -INFO 06-24 20:33:53 [statics_utils.py:24] mean per token cost: 0.06011501844902964 ms -ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:52 lightllm_req_id:8 first_token_cost:441.92028045654297ms total_cost_time:441.96557998657227ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13166 prompt_cache_len:5151 prompt_cache_ratio:0.3912349992404679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 -DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:53 [batch.py:51] router release req id 8 -INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.1094059944152832 s -INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s -DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=247551491489587095815915939284519681815, time:1750768433.3963976s req_ids:[8] -DEBUG 06-24 20:33:53 [manager.py:391] -ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:218.0335521697998ms total_cost_time:218.0793285369873ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13167 prompt_cache_len:5151 prompt_cache_ratio:0.39120528594212806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 -DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:53 [batch.py:51] router release req id 8 -INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.10792970657348633 s -INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.10994148254394531 s -DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=117213379454645645519713659590928378133, time:1750768433.6180952s req_ids:[8] -DEBUG 06-24 20:33:53 [manager.py:391] -ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:205.2023410797119ms total_cost_time:205.2459716796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13168 prompt_cache_len:5151 prompt_cache_ratio:0.3911755771567436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 -DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:53 [batch.py:51] router release req id 8 -INFO 06-24 20:33:53 [manager.py:224] router recive req id 8 cost time 0.10836362838745117 s -INFO 06-24 20:33:53 [manager.py:68] detokenization recv req id 8 cost time 0.11037015914916992 s -DEBUG 06-24 20:33:53 [manager.py:391] Prefill Batch: batch_id=324993064159456080911211463287209798369, time:1750768433.832221s req_ids:[8] -DEBUG 06-24 20:33:53 [manager.py:391] -ERROR 06-24 20:33:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:211.78817749023438ms total_cost_time:211.83276176452637ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13169 prompt_cache_len:5151 prompt_cache_ratio:0.3911458728832865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 -DEBUG 06-24 20:33:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:53 [batch.py:51] router release req id 8 -INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.10913968086242676 s -INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.11086893081665039 s -DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=82729415078469742769646503238360575526, time:1750768434.0481393s req_ids:[8] -DEBUG 06-24 20:33:54 [manager.py:391] -DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:53 lightllm_req_id:8 first_token_cost:173.74920845031738ms total_cost_time:173.79450798034668ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13170 prompt_cache_len:5151 prompt_cache_ratio:0.39111617312072894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 -DEBUG 06-24 20:33:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:54 [batch.py:51] router release req id 8 -INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.1078946590423584 s -INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974001884460449 s -DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=316400296747290277298267288365268640272, time:1750768434.229176s req_ids:[8] -DEBUG 06-24 20:33:54 [manager.py:391] -ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:200.11472702026367ms total_cost_time:200.15907287597656ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13171 prompt_cache_len:5151 prompt_cache_ratio:0.3910864778680434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 -DEBUG 06-24 20:33:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:54 [batch.py:51] router release req id 8 -INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.31023216247558594 s -INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.3113899230957031 s -DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=118851558604832335514498913705495319104, time:1750768434.6477134s req_ids:[8] -DEBUG 06-24 20:33:54 [manager.py:391] -ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:433.779239654541ms total_cost_time:433.8250160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13172 prompt_cache_len:5151 prompt_cache_ratio:0.39105678712420283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 -DEBUG 06-24 20:33:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:54 [batch.py:51] router release req id 8 -INFO 06-24 20:33:54 [manager.py:224] router recive req id 8 cost time 0.10837388038635254 s -INFO 06-24 20:33:54 [manager.py:68] detokenization recv req id 8 cost time 0.11031603813171387 s -DEBUG 06-24 20:33:54 [manager.py:391] Prefill Batch: batch_id=24036374268205780726137780491025842491, time:1750768434.8822956s req_ids:[8] -DEBUG 06-24 20:33:54 [manager.py:391] -ERROR 06-24 20:33:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:221.31061553955078ms total_cost_time:221.35663032531738ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13173 prompt_cache_len:5151 prompt_cache_ratio:0.3910271008881804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 -DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:55 [batch.py:51] router release req id 8 -INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10839653015136719 s -INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.1102755069732666 s -DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=70422377231592748046638769907519076678, time:1750768435.1057937s req_ids:[8] -DEBUG 06-24 20:33:55 [manager.py:391] -ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:54 lightllm_req_id:8 first_token_cost:213.30642700195312ms total_cost_time:213.35268020629883ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13174 prompt_cache_len:5151 prompt_cache_ratio:0.3909974191589494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 -DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:55 [batch.py:51] router release req id 8 -INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10786151885986328 s -INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.10976052284240723 s -DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=244128714594556216244954003238197054386, time:1750768435.3243606s req_ids:[8] -DEBUG 06-24 20:33:55 [manager.py:391] -ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:211.36069297790527ms total_cost_time:211.40575408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13175 prompt_cache_len:5151 prompt_cache_ratio:0.3909677419354839 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 -DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:55 [batch.py:51] router release req id 8 -INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10777139663696289 s -INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.10982108116149902 s -DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=10263111673548856630867399498336728017, time:1750768435.540788s req_ids:[8] -DEBUG 06-24 20:33:55 [manager.py:391] -DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:214.68853950500488ms total_cost_time:214.7359848022461ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13176 prompt_cache_len:5151 prompt_cache_ratio:0.3909380692167577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 -DEBUG 06-24 20:33:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:55 [batch.py:51] router release req id 8 -INFO 06-24 20:33:55 [manager.py:224] router recive req id 8 cost time 0.10795855522155762 s -INFO 06-24 20:33:55 [manager.py:68] detokenization recv req id 8 cost time 0.10988664627075195 s -DEBUG 06-24 20:33:55 [manager.py:391] Prefill Batch: batch_id=136075030187177610743829637756159298219, time:1750768435.7636127s req_ids:[8] -DEBUG 06-24 20:33:55 [manager.py:391] -ERROR 06-24 20:33:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:213.9413356781006ms total_cost_time:213.98377418518066ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13177 prompt_cache_len:5151 prompt_cache_ratio:0.39090840100174545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 -DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:56 [batch.py:51] router release req id 8 -INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.31020641326904297 s -INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.31221747398376465 s -DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=182577671402752600091128810617392061113, time:1750768436.1948225s req_ids:[8] -DEBUG 06-24 20:33:56 [manager.py:391] -ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:55 lightllm_req_id:8 first_token_cost:429.09860610961914ms total_cost_time:429.14271354675293ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13178 prompt_cache_len:5151 prompt_cache_ratio:0.39087873728942174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 -DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:56 [batch.py:51] router release req id 8 -INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.10897088050842285 s -INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.11095118522644043 s -DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=115028498028757378640807879991877252180, time:1750768436.4182608s req_ids:[8] -DEBUG 06-24 20:33:56 [manager.py:391] -ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:214.48183059692383ms total_cost_time:214.5249843597412ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13179 prompt_cache_len:5151 prompt_cache_ratio:0.3908490780787617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 -DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:56 [batch.py:51] router release req id 8 -INFO 06-24 20:33:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.10796833038330078 s -INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.1090703010559082 s -DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=49563928374649874744593749415464691651, time:1750768436.6397333s req_ids:[8] -DEBUG 06-24 20:33:56 [manager.py:391] -ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:215.956449508667ms total_cost_time:216.00008010864258ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13180 prompt_cache_len:5151 prompt_cache_ratio:0.39081942336874054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 -DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:56 [batch.py:51] router release req id 8 -INFO 06-24 20:33:56 [manager.py:224] router recive req id 8 cost time 0.10817575454711914 s -INFO 06-24 20:33:56 [manager.py:68] detokenization recv req id 8 cost time 0.11021089553833008 s -DEBUG 06-24 20:33:56 [manager.py:391] Prefill Batch: batch_id=102613666747554359240784416686912000731, time:1750768436.8623939s req_ids:[8] -DEBUG 06-24 20:33:56 [manager.py:391] -ERROR 06-24 20:33:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:210.94775199890137ms total_cost_time:210.99185943603516ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13181 prompt_cache_len:5151 prompt_cache_ratio:0.39078977315833396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 -DEBUG 06-24 20:33:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:56 [batch.py:51] router release req id 8 -INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10738205909729004 s -INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.10931086540222168 s -DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=12272588665411357596502634674284390345, time:1750768437.0808725s req_ids:[8] -DEBUG 06-24 20:33:57 [manager.py:391] -DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:33:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:56 lightllm_req_id:8 first_token_cost:212.1579647064209ms total_cost_time:212.2032642364502ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13182 prompt_cache_len:5151 prompt_cache_ratio:0.390760127446518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 -DEBUG 06-24 20:33:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:57 [batch.py:51] router release req id 8 -INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10815119743347168 s -INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s -DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=141629662422050505262190259199766242749, time:1750768437.3025272s req_ids:[8] -DEBUG 06-24 20:33:57 [manager.py:391] -ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 first_token_cost:206.63046836853027ms total_cost_time:206.67195320129395ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13183 prompt_cache_len:5151 prompt_cache_ratio:0.39073048623226886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 -DEBUG 06-24 20:33:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:57 [batch.py:51] router release req id 8 -INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10807013511657715 s -INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.10993599891662598 s -DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=315654540399207966221048014326005403941, time:1750768437.5229893s req_ids:[8] -DEBUG 06-24 20:33:57 [manager.py:391] -ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 first_token_cost:383.67509841918945ms total_cost_time:383.72039794921875ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13184 prompt_cache_len:5151 prompt_cache_ratio:0.39070084951456313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 -DEBUG 06-24 20:33:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:57 [batch.py:51] router release req id 8 -INFO 06-24 20:33:57 [manager.py:224] router recive req id 8 cost time 0.10940718650817871 s -INFO 06-24 20:33:57 [manager.py:68] detokenization recv req id 8 cost time 0.11047554016113281 s -DEBUG 06-24 20:33:57 [manager.py:391] Prefill Batch: batch_id=120453181282946172119381735450404362012, time:1750768437.9037576s req_ids:[8] -DEBUG 06-24 20:33:57 [manager.py:391] -DEBUG 06-24 20:33:57 [stats.py:37] Avg tokens(prompt+generate) throughput: 51270.269 tokens/s -DEBUG 06-24 20:33:57 [stats.py:37] Avg prompt tokens throughput: 51262.482 tokens/s -DEBUG 06-24 20:33:57 [stats.py:37] Avg generate tokens throughput: 7.787 tokens/s -ERROR 06-24 20:33:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:57 lightllm_req_id:8 first_token_cost:211.28034591674805ms total_cost_time:211.32349967956543ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13185 prompt_cache_len:5151 prompt_cache_ratio:0.3906712172923777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 -DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:58 [batch.py:51] router release req id 8 -INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.10783219337463379 s -INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.10933089256286621 s -DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=166843003475726399469422672578994055326, time:1750768438.1223602s req_ids:[8] -DEBUG 06-24 20:33:58 [manager.py:391] -ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:214.63990211486816ms total_cost_time:214.68472480773926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13186 prompt_cache_len:5151 prompt_cache_ratio:0.3906415895646898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 -DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:58 [batch.py:51] router release req id 8 -INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.1082601547241211 s -INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.11018204689025879 s -DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=21147390825331859137541169426695240334, time:1750768438.345973s req_ids:[8] -DEBUG 06-24 20:33:58 [manager.py:391] -ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:215.20161628723145ms total_cost_time:215.24786949157715ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13187 prompt_cache_len:5151 prompt_cache_ratio:0.390611966330477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 -DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:58 [batch.py:51] router release req id 8 -INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.10885262489318848 s -INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.1109774112701416 s -DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=327533307895726290873018928449781516229, time:1750768438.56639s req_ids:[8] -DEBUG 06-24 20:33:58 [manager.py:391] -ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:215.1651382446289ms total_cost_time:215.2092456817627ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13188 prompt_cache_len:5151 prompt_cache_ratio:0.390582347588717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 -DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:58 [batch.py:51] router release req id 8 -INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.1077570915222168 s -INFO 06-24 20:33:58 [manager.py:68] detokenization recv req id 8 cost time 0.10976243019104004 s -DEBUG 06-24 20:33:58 [manager.py:391] Prefill Batch: batch_id=18114816391813926391071723661622747849, time:1750768438.789082s req_ids:[8] -DEBUG 06-24 20:33:58 [manager.py:391] -ERROR 06-24 20:33:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:214.32852745056152ms total_cost_time:214.37525749206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13189 prompt_cache_len:5151 prompt_cache_ratio:0.390552733338388 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 -DEBUG 06-24 20:33:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:58 [batch.py:51] router release req id 8 -INFO 06-24 20:33:58 [manager.py:224] router recive req id 8 cost time 0.10736680030822754 s -INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.1092836856842041 s -DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=8060910753896387065178709579600886727, time:1750768439.0197086s req_ids:[8] -DEBUG 06-24 20:33:59 [manager.py:391] -ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:58 lightllm_req_id:8 first_token_cost:392.8241729736328ms total_cost_time:392.8680419921875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13190 prompt_cache_len:5151 prompt_cache_ratio:0.3905231235784685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 -DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:59 [batch.py:51] router release req id 8 -INFO 06-24 20:33:59 [manager.py:224] router recive req id 8 cost time 0.1102907657623291 s -INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.11209249496459961 s -DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=300929159348211488814591184282480055407, time:1750768439.4209375s req_ids:[8] -DEBUG 06-24 20:33:59 [manager.py:391] -ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:193.3727264404297ms total_cost_time:193.41707229614258ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13191 prompt_cache_len:5151 prompt_cache_ratio:0.3904935183079372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 -DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:59 [batch.py:51] router release req id 8 -INFO 06-24 20:33:59 [manager.py:224] router recive req id 8 cost time 0.10821914672851562 s -INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.10936307907104492 s -DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=96769123055287817514068320932268282209, time:1750768439.6073055s req_ids:[8] -DEBUG 06-24 20:33:59 [manager.py:391] -ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:209.77067947387695ms total_cost_time:209.8228931427002ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:13192 prompt_cache_len:5151 prompt_cache_ratio:0.3904639175257732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 -DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:59 [batch.py:51] router release req id 8 -INFO 06-24 20:33:59 [manager.py:224] router recive req id 8 cost time 0.10790085792541504 s -INFO 06-24 20:33:59 [manager.py:68] detokenization recv req id 8 cost time 0.10982322692871094 s -DEBUG 06-24 20:33:59 [manager.py:391] Prefill Batch: batch_id=75383130942573820089785275737676791179, time:1750768439.8283038s req_ids:[8] -DEBUG 06-24 20:33:59 [manager.py:391] -ERROR 06-24 20:33:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:33:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:33:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:222.63717651367188ms total_cost_time:222.69535064697266ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:13193 prompt_cache_len:5151 prompt_cache_ratio:0.3904343212309558 mtp_avg_token_per_step:1.0 -INFO 06-24 20:33:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 -DEBUG 06-24 20:33:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:33:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:33:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:33:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:33:59 [batch.py:51] router release req id 8 -INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10787582397460938 s -INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10973429679870605 s -DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=310491619352652089904216655661690453672, time:1750768440.0572007s req_ids:[8] -DEBUG 06-24 20:34:00 [manager.py:391] -ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:33:59 lightllm_req_id:8 first_token_cost:184.42893028259277ms total_cost_time:184.47470664978027ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13194 prompt_cache_len:5151 prompt_cache_ratio:0.39040472942246474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 -DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:00 [batch.py:51] router release req id 8 -INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10796928405761719 s -INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10914921760559082 s -DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=163187536737577778575506724070493226392, time:1750768440.2468464s req_ids:[8] -DEBUG 06-24 20:34:00 [manager.py:391] -ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:212.16869354248047ms total_cost_time:212.21446990966797ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13195 prompt_cache_len:5151 prompt_cache_ratio:0.39037514209928004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 -DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:00 [batch.py:51] router release req id 8 -INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10725045204162598 s -INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10908842086791992 s -DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=314241326447672026146963457042004803187, time:1750768440.4628708s req_ids:[8] -DEBUG 06-24 20:34:00 [manager.py:391] -ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:389.7740840911865ms total_cost_time:389.8191452026367ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13196 prompt_cache_len:5151 prompt_cache_ratio:0.39034555926038195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 -DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:00 [batch.py:51] router release req id 8 -INFO 06-24 20:34:00 [manager.py:224] router recive req id 8 cost time 0.10833096504211426 s -INFO 06-24 20:34:00 [manager.py:68] detokenization recv req id 8 cost time 0.10971736907958984 s -DEBUG 06-24 20:34:00 [manager.py:391] Prefill Batch: batch_id=329298236837561679479934913208737346445, time:1750768440.859303s req_ids:[8] -DEBUG 06-24 20:34:00 [manager.py:391] -ERROR 06-24 20:34:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:217.97823905944824ms total_cost_time:218.0345058441162ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:13197 prompt_cache_len:5151 prompt_cache_ratio:0.39031598090475106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 -DEBUG 06-24 20:34:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:00 [batch.py:51] router release req id 8 -INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.1083669662475586 s -INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11031818389892578 s -DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=284155947081109980376749904396892847037, time:1750768441.083173s req_ids:[8] -DEBUG 06-24 20:34:01 [manager.py:391] -ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:00 lightllm_req_id:8 first_token_cost:216.01104736328125ms total_cost_time:216.05515480041504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13198 prompt_cache_len:5151 prompt_cache_ratio:0.3902864070313684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 -DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:01 [batch.py:51] router release req id 8 -INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s -INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11022281646728516 s -DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=84283677724473920741460703560474537174, time:1750768441.3058605s req_ids:[8] -DEBUG 06-24 20:34:01 [manager.py:391] -ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:212.99481391906738ms total_cost_time:213.05012702941895ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:13199 prompt_cache_len:5151 prompt_cache_ratio:0.3902568376392151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 -DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:01 [batch.py:51] router release req id 8 -INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.10829997062683105 s -INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11025238037109375 s -DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=225189486158240959279566138390004322837, time:1750768441.5250845s req_ids:[8] -DEBUG 06-24 20:34:01 [manager.py:391] -ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:208.2829475402832ms total_cost_time:208.34040641784668ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:13200 prompt_cache_len:5151 prompt_cache_ratio:0.3902272727272727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 -DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:01 [batch.py:51] router release req id 8 -INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.10892295837402344 s -INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11089038848876953 s -DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=235982416363979630286252794857947961221, time:1750768441.7404175s req_ids:[8] -DEBUG 06-24 20:34:01 [manager.py:391] -ERROR 06-24 20:34:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:214.15042877197266ms total_cost_time:214.19405937194824ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13201 prompt_cache_len:5151 prompt_cache_ratio:0.39019771229452316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 -DEBUG 06-24 20:34:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:01 [batch.py:51] router release req id 8 -INFO 06-24 20:34:01 [manager.py:224] router recive req id 8 cost time 0.1095423698425293 s -INFO 06-24 20:34:01 [manager.py:68] detokenization recv req id 8 cost time 0.11172008514404297 s -DEBUG 06-24 20:34:01 [manager.py:391] Prefill Batch: batch_id=264451072055428580316343265900257533137, time:1750768441.9613812s req_ids:[8] -DEBUG 06-24 20:34:01 [manager.py:391] -ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:01 lightllm_req_id:8 first_token_cost:390.8357620239258ms total_cost_time:390.8801078796387ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13202 prompt_cache_len:5151 prompt_cache_ratio:0.3901681563399485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 -DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:02 [batch.py:51] router release req id 8 -INFO 06-24 20:34:02 [manager.py:224] router recive req id 8 cost time 0.10834264755249023 s -INFO 06-24 20:34:02 [manager.py:68] detokenization recv req id 8 cost time 0.11036849021911621 s -DEBUG 06-24 20:34:02 [manager.py:391] Prefill Batch: batch_id=289139235998981889836922926926807451065, time:1750768442.3585443s req_ids:[8] -DEBUG 06-24 20:34:02 [manager.py:391] -ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:214.05982971191406ms total_cost_time:214.10250663757324ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13203 prompt_cache_len:5151 prompt_cache_ratio:0.39013860486253127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 -DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:02 [batch.py:51] router release req id 8 -INFO 06-24 20:34:02 [manager.py:224] router recive req id 8 cost time 0.1079702377319336 s -INFO 06-24 20:34:02 [manager.py:68] detokenization recv req id 8 cost time 0.10997653007507324 s -DEBUG 06-24 20:34:02 [manager.py:391] Prefill Batch: batch_id=279489038695631452720626208866594998264, time:1750768442.5884283s req_ids:[8] -DEBUG 06-24 20:34:02 [manager.py:391] -ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:227.63800621032715ms total_cost_time:227.68568992614746ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:13204 prompt_cache_len:5151 prompt_cache_ratio:0.39010905786125416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 -DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:02 [batch.py:51] router release req id 8 -INFO 06-24 20:34:02 [manager.py:224] router recive req id 8 cost time 0.10740780830383301 s -INFO 06-24 20:34:02 [manager.py:68] detokenization recv req id 8 cost time 0.10931086540222168 s -DEBUG 06-24 20:34:02 [manager.py:391] Prefill Batch: batch_id=238772383955670554054154398247459430029, time:1750768442.814802s req_ids:[8] -DEBUG 06-24 20:34:02 [manager.py:391] -ERROR 06-24 20:34:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:210.97087860107422ms total_cost_time:211.01641654968262ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13205 prompt_cache_len:5151 prompt_cache_ratio:0.3900795153351003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 -DEBUG 06-24 20:34:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:02 [batch.py:51] router release req id 8 -INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10850071907043457 s -INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.11095714569091797 s -DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=279611988909679756842796077914609622358, time:1750768443.030513s req_ids:[8] -DEBUG 06-24 20:34:03 [manager.py:391] -ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:02 lightllm_req_id:8 first_token_cost:214.59555625915527ms total_cost_time:214.64133262634277ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13206 prompt_cache_len:5151 prompt_cache_ratio:0.3900499772830532 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 -DEBUG 06-24 20:34:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:03 [batch.py:51] router release req id 8 -INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10794186592102051 s -INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.10985183715820312 s -DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=139092035442193807401632348919139980115, time:1750768443.2515635s req_ids:[8] -DEBUG 06-24 20:34:03 [manager.py:391] -DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:215.3952121734619ms total_cost_time:215.4395580291748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13207 prompt_cache_len:5151 prompt_cache_ratio:0.39002044370409633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 -DEBUG 06-24 20:34:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:03 [batch.py:51] router release req id 8 -INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10782694816589355 s -INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.10992980003356934 s -DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=105024766987536328639105699692070010940, time:1750768443.4783742s req_ids:[8] -DEBUG 06-24 20:34:03 [manager.py:391] -ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:392.59958267211914ms total_cost_time:392.64392852783203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13208 prompt_cache_len:5151 prompt_cache_ratio:0.3899909145972138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 -DEBUG 06-24 20:34:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:03 [batch.py:51] router release req id 8 -INFO 06-24 20:34:03 [manager.py:224] router recive req id 8 cost time 0.10864663124084473 s -INFO 06-24 20:34:03 [manager.py:68] detokenization recv req id 8 cost time 0.11053609848022461 s -DEBUG 06-24 20:34:03 [manager.py:391] Prefill Batch: batch_id=243630165342058371638470948210602517946, time:1750768443.8744683s req_ids:[8] -DEBUG 06-24 20:34:03 [manager.py:391] -ERROR 06-24 20:34:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:213.6518955230713ms total_cost_time:213.69409561157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13209 prompt_cache_len:5151 prompt_cache_ratio:0.38996138996138996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 -DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:04 [batch.py:51] router release req id 8 -INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10464191436767578 s -INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10653281211853027 s -DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=146833770924420724041854659593946355368, time:1750768444.0985916s req_ids:[8] -DEBUG 06-24 20:34:04 [manager.py:391] -ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:03 lightllm_req_id:8 first_token_cost:209.8829746246338ms total_cost_time:209.92636680603027ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13210 prompt_cache_len:5151 prompt_cache_ratio:0.3899318697956094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 -DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:04 [batch.py:51] router release req id 8 -INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10705900192260742 s -INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10898494720458984 s -DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=7676467345603072798185129080058320869, time:1750768444.313344s req_ids:[8] -DEBUG 06-24 20:34:04 [manager.py:391] -ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:211.08007431030273ms total_cost_time:211.13038063049316ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:13211 prompt_cache_len:5151 prompt_cache_ratio:0.38990235409885704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 -DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:04 [batch.py:51] router release req id 8 -INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10395121574401855 s -INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10638546943664551 s -DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=258328398136156433401548714798680116052, time:1750768444.5401285s req_ids:[8] -DEBUG 06-24 20:34:04 [manager.py:391] -ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:225.9054183959961ms total_cost_time:225.92759132385254ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:13212 prompt_cache_len:5151 prompt_cache_ratio:0.3898728428701181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 -DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:04 [batch.py:51] router release req id 8 -INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10408186912536621 s -INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10604500770568848 s -DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=233815980363126009679898083775430223385, time:1750768444.7615159s req_ids:[8] -DEBUG 06-24 20:34:04 [manager.py:391] -ERROR 06-24 20:34:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:217.13638305664062ms total_cost_time:217.18096733093262ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13213 prompt_cache_len:5151 prompt_cache_ratio:0.3898433361083781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 -DEBUG 06-24 20:34:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:04 [batch.py:51] router release req id 8 -INFO 06-24 20:34:04 [manager.py:224] router recive req id 8 cost time 0.10816264152526855 s -INFO 06-24 20:34:04 [manager.py:68] detokenization recv req id 8 cost time 0.10927248001098633 s -DEBUG 06-24 20:34:04 [manager.py:391] Prefill Batch: batch_id=195553233546468705211168149156281063761, time:1750768444.9853878s req_ids:[8] -DEBUG 06-24 20:34:04 [manager.py:391] -ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:04 lightllm_req_id:8 first_token_cost:388.9658451080322ms total_cost_time:389.0101909637451ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13214 prompt_cache_len:5151 prompt_cache_ratio:0.38981383381262297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 -DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:05 [batch.py:51] router release req id 8 -INFO 06-24 20:34:05 [manager.py:224] router recive req id 8 cost time 0.10816478729248047 s -INFO 06-24 20:34:05 [manager.py:68] detokenization recv req id 8 cost time 0.10923385620117188 s -DEBUG 06-24 20:34:05 [manager.py:391] Prefill Batch: batch_id=103574787503621450160557009011644233880, time:1750768445.3794458s req_ids:[8] -DEBUG 06-24 20:34:05 [manager.py:391] -ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:215.07644653320312ms total_cost_time:215.12079238891602ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13215 prompt_cache_len:5151 prompt_cache_ratio:0.3897843359818388 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 -DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:05 [batch.py:51] router release req id 8 -INFO 06-24 20:34:05 [manager.py:224] router recive req id 8 cost time 0.10803413391113281 s -INFO 06-24 20:34:05 [manager.py:68] detokenization recv req id 8 cost time 0.10968923568725586 s -DEBUG 06-24 20:34:05 [manager.py:391] Prefill Batch: batch_id=274591624054461904947093176956923952, time:1750768445.6024773s req_ids:[8] -DEBUG 06-24 20:34:05 [manager.py:391] -ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:211.18497848510742ms total_cost_time:211.23003959655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13216 prompt_cache_len:5151 prompt_cache_ratio:0.3897548426150121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 -DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:05 [batch.py:51] router release req id 8 -INFO 06-24 20:34:05 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s -INFO 06-24 20:34:05 [manager.py:68] detokenization recv req id 8 cost time 0.10995292663574219 s -DEBUG 06-24 20:34:05 [manager.py:391] Prefill Batch: batch_id=249680009927053678702348997142134700571, time:1750768445.8179312s req_ids:[8] -DEBUG 06-24 20:34:05 [manager.py:391] -ERROR 06-24 20:34:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:210.98661422729492ms total_cost_time:211.03191375732422ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13217 prompt_cache_len:5151 prompt_cache_ratio:0.3897253537111296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 -DEBUG 06-24 20:34:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:05 [batch.py:51] router release req id 8 -INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10788726806640625 s -INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.10989046096801758 s -DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=42725730848096509198490321041235344228, time:1750768446.033306s req_ids:[8] -DEBUG 06-24 20:34:06 [manager.py:391] -ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:05 lightllm_req_id:8 first_token_cost:210.16812324523926ms total_cost_time:210.21151542663574ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13218 prompt_cache_len:5151 prompt_cache_ratio:0.3896958692691784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 -DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:06 [batch.py:51] router release req id 8 -INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10779500007629395 s -INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.10971832275390625 s -DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=163720494046865216234550839551933833781, time:1750768446.2500188s req_ids:[8] -DEBUG 06-24 20:34:06 [manager.py:391] -ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:211.91143989562988ms total_cost_time:211.95721626281738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13219 prompt_cache_len:5151 prompt_cache_ratio:0.38966638928814584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 -DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:06 [batch.py:51] router release req id 8 -INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10906076431274414 s -INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.11099863052368164 s -DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=83785725144790305575756129569376089562, time:1750768446.4685452s req_ids:[8] -DEBUG 06-24 20:34:06 [manager.py:391] -ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:383.3932876586914ms total_cost_time:383.4385871887207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13220 prompt_cache_len:5151 prompt_cache_ratio:0.38963691376701964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 -DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:06 [batch.py:51] router release req id 8 -INFO 06-24 20:34:06 [manager.py:224] router recive req id 8 cost time 0.10839962959289551 s -INFO 06-24 20:34:06 [manager.py:68] detokenization recv req id 8 cost time 0.11056923866271973 s -DEBUG 06-24 20:34:06 [manager.py:391] Prefill Batch: batch_id=198883607274976995184759676193740069028, time:1750768446.8599784s req_ids:[8] -DEBUG 06-24 20:34:06 [manager.py:391] -ERROR 06-24 20:34:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:214.93005752563477ms total_cost_time:214.97559547424316ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13221 prompt_cache_len:5151 prompt_cache_ratio:0.38960744270478787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 -DEBUG 06-24 20:34:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:06 [batch.py:51] router release req id 8 -INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.1092367172241211 s -INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.11119270324707031 s -DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=263454507220464974071885507224942903700, time:1750768447.0805004s req_ids:[8] -DEBUG 06-24 20:34:07 [manager.py:391] -ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:06 lightllm_req_id:8 first_token_cost:212.5990390777588ms total_cost_time:212.64410018920898ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13222 prompt_cache_len:5151 prompt_cache_ratio:0.38957797610043865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 -DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:07 [batch.py:51] router release req id 8 -INFO 06-24 20:34:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.10758829116821289 s -INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.109527587890625 s -DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=219517760510085441115253027328715121342, time:1750768447.3005784s req_ids:[8] -DEBUG 06-24 20:34:07 [manager.py:391] -ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:207.72767066955566ms total_cost_time:207.77368545532227ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13223 prompt_cache_len:5151 prompt_cache_ratio:0.38954851395296075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 -DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:07 [batch.py:51] router release req id 8 -INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.10835933685302734 s -INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.11026620864868164 s -DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=234789490875355752872122187491129214024, time:1750768447.514303s req_ids:[8] -DEBUG 06-24 20:34:07 [manager.py:391] -ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:208.2529067993164ms total_cost_time:208.2974910736084ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13224 prompt_cache_len:5151 prompt_cache_ratio:0.38951905626134303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 -DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:07 [batch.py:51] router release req id 8 -INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.1082754135131836 s -INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.11023569107055664 s -DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=39555193040158159345082138047775101757, time:1750768447.729522s req_ids:[8] -DEBUG 06-24 20:34:07 [manager.py:391] -ERROR 06-24 20:34:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:212.8002643585205ms total_cost_time:212.8453254699707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13225 prompt_cache_len:5151 prompt_cache_ratio:0.3894896030245747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 -DEBUG 06-24 20:34:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:07 [batch.py:51] router release req id 8 -INFO 06-24 20:34:07 [manager.py:224] router recive req id 8 cost time 0.1070396900177002 s -INFO 06-24 20:34:07 [manager.py:68] detokenization recv req id 8 cost time 0.10912585258483887 s -DEBUG 06-24 20:34:07 [manager.py:391] Prefill Batch: batch_id=184299323313781469384723318880190528684, time:1750768447.9479165s req_ids:[8] -DEBUG 06-24 20:34:07 [manager.py:391] -DEBUG 06-24 20:34:07 [stats.py:37] Avg tokens(prompt+generate) throughput: 53915.036 tokens/s -DEBUG 06-24 20:34:07 [stats.py:37] Avg prompt tokens throughput: 53906.872 tokens/s -DEBUG 06-24 20:34:07 [stats.py:37] Avg generate tokens throughput: 8.164 tokens/s -ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:07 lightllm_req_id:8 first_token_cost:212.88323402404785ms total_cost_time:212.92901039123535ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13226 prompt_cache_len:5151 prompt_cache_ratio:0.38946015424164526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 -DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:08 [batch.py:51] router release req id 8 -INFO 06-24 20:34:08 [manager.py:224] router recive req id 8 cost time 0.3091716766357422 s -INFO 06-24 20:34:08 [manager.py:68] detokenization recv req id 8 cost time 0.31128787994384766 s -DEBUG 06-24 20:34:08 [manager.py:391] Prefill Batch: batch_id=46373217870938923404630643517946251471, time:1750768448.3755465s req_ids:[8] -DEBUG 06-24 20:34:08 [manager.py:391] -ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:423.4440326690674ms total_cost_time:423.4635829925537ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13227 prompt_cache_len:5151 prompt_cache_ratio:0.38943070991154455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 -DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:08 [batch.py:51] router release req id 8 -INFO 06-24 20:34:08 [manager.py:224] router recive req id 8 cost time 0.10635805130004883 s -INFO 06-24 20:34:08 [manager.py:68] detokenization recv req id 8 cost time 0.1082150936126709 s -DEBUG 06-24 20:34:08 [manager.py:391] Prefill Batch: batch_id=251140125680003083037019156456089144184, time:1750768448.5984719s req_ids:[8] -DEBUG 06-24 20:34:08 [manager.py:391] -ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:212.5387191772461ms total_cost_time:212.58258819580078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13228 prompt_cache_len:5151 prompt_cache_ratio:0.3894012700332628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 -DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:08 [batch.py:51] router release req id 8 -INFO 06-24 20:34:08 [manager.py:224] router recive req id 8 cost time 0.10876727104187012 s -INFO 06-24 20:34:08 [manager.py:68] detokenization recv req id 8 cost time 0.11006855964660645 s -DEBUG 06-24 20:34:08 [manager.py:391] Prefill Batch: batch_id=161452614914814379958163345362000446931, time:1750768448.8171163s req_ids:[8] -DEBUG 06-24 20:34:08 [manager.py:391] -ERROR 06-24 20:34:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:207.51595497131348ms total_cost_time:207.535982131958ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:13229 prompt_cache_len:5151 prompt_cache_ratio:0.38937183460579033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 -DEBUG 06-24 20:34:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:08 [batch.py:51] router release req id 8 -INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.10635089874267578 s -INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.10825347900390625 s -DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=118112901851609895715759704254974895705, time:1750768449.0313418s req_ids:[8] -DEBUG 06-24 20:34:09 [manager.py:391] -ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:08 lightllm_req_id:8 first_token_cost:212.2359275817871ms total_cost_time:212.2793197631836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13230 prompt_cache_len:5151 prompt_cache_ratio:0.3893424036281179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 -DEBUG 06-24 20:34:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:09 [batch.py:51] router release req id 8 -INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.10707664489746094 s -INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.10914421081542969 s -DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=120468380498248346051657165712279680070, time:1750768449.2475252s req_ids:[8] -DEBUG 06-24 20:34:09 [manager.py:391] -ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 first_token_cost:210.3590965270996ms total_cost_time:210.4053497314453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13231 prompt_cache_len:5151 prompt_cache_ratio:0.3893129770992366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 -DEBUG 06-24 20:34:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:09 [batch.py:51] router release req id 8 -INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.10642075538635254 s -INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.10836935043334961 s -DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=200551638025485648893786877360629061903, time:1750768449.4646432s req_ids:[8] -DEBUG 06-24 20:34:09 [manager.py:391] -ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 first_token_cost:215.8355712890625ms total_cost_time:215.85512161254883ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13232 prompt_cache_len:5151 prompt_cache_ratio:0.38928355501813783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 -DEBUG 06-24 20:34:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:09 [batch.py:51] router release req id 8 -INFO 06-24 20:34:09 [manager.py:224] router recive req id 8 cost time 0.3081932067871094 s -INFO 06-24 20:34:09 [manager.py:68] detokenization recv req id 8 cost time 0.31020140647888184 s -DEBUG 06-24 20:34:09 [manager.py:391] Prefill Batch: batch_id=318577815742856077299915704347339483655, time:1750768449.8925207s req_ids:[8] -DEBUG 06-24 20:34:09 [manager.py:391] -ERROR 06-24 20:34:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:09 lightllm_req_id:8 first_token_cost:429.92305755615234ms total_cost_time:429.97026443481445ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13233 prompt_cache_len:5151 prompt_cache_ratio:0.3892541373838132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 -DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:10 [batch.py:51] router release req id 8 -INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s -INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.11112141609191895 s -DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=243614932937912585918663049822957612082, time:1750768450.1205792s req_ids:[8] -DEBUG 06-24 20:34:10 [manager.py:391] -ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:215.10696411132812ms total_cost_time:215.1484489440918ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13234 prompt_cache_len:5151 prompt_cache_ratio:0.38922472419525467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 -DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:10 [batch.py:51] router release req id 8 -INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10962080955505371 s -INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.11146950721740723 s -DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=198855448052453556419136530779982547548, time:1750768450.3423676s req_ids:[8] -DEBUG 06-24 20:34:10 [manager.py:391] -ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:185.88805198669434ms total_cost_time:185.9121322631836ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:13235 prompt_cache_len:5151 prompt_cache_ratio:0.38919531545145447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 -DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:10 [batch.py:51] router release req id 8 -INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10886478424072266 s -INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.11064338684082031 s -DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=36740801380968086541778432388451787066, time:1750768450.5375404s req_ids:[8] -DEBUG 06-24 20:34:10 [manager.py:391] -ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:212.13555335998535ms total_cost_time:212.18252182006836ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13236 prompt_cache_len:5151 prompt_cache_ratio:0.38916591115140525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 -DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:10 [batch.py:51] router release req id 8 -INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.1073455810546875 s -INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.10930347442626953 s -DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=108633661379584971730230317968196283231, time:1750768450.7551122s req_ids:[8] -DEBUG 06-24 20:34:10 [manager.py:391] -ERROR 06-24 20:34:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:186.920166015625ms total_cost_time:186.9645118713379ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13237 prompt_cache_len:5151 prompt_cache_ratio:0.3891365112940999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 -DEBUG 06-24 20:34:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:10 [batch.py:51] router release req id 8 -INFO 06-24 20:34:10 [manager.py:224] router recive req id 8 cost time 0.10855650901794434 s -INFO 06-24 20:34:10 [manager.py:68] detokenization recv req id 8 cost time 0.1104733943939209 s -DEBUG 06-24 20:34:10 [manager.py:391] Prefill Batch: batch_id=22022077603118999719566782930974199673, time:1750768450.9455123s req_ids:[8] -DEBUG 06-24 20:34:10 [manager.py:391] -ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:10 lightllm_req_id:8 first_token_cost:204.82826232910156ms total_cost_time:204.87546920776367ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13238 prompt_cache_len:5151 prompt_cache_ratio:0.3891071158785315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 -DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:11 [batch.py:51] router release req id 8 -INFO 06-24 20:34:11 [manager.py:224] router recive req id 8 cost time 0.3083055019378662 s -INFO 06-24 20:34:11 [manager.py:68] detokenization recv req id 8 cost time 0.3103344440460205 s -DEBUG 06-24 20:34:11 [manager.py:391] Prefill Batch: batch_id=211531202516853339139438487114156805731, time:1750768451.3624263s req_ids:[8] -DEBUG 06-24 20:34:11 [manager.py:391] -ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:425.4274368286133ms total_cost_time:425.4894256591797ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:13239 prompt_cache_len:5151 prompt_cache_ratio:0.38907772490369363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 -DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:11 [batch.py:51] router release req id 8 -INFO 06-24 20:34:11 [manager.py:224] router recive req id 8 cost time 0.10860896110534668 s -INFO 06-24 20:34:11 [manager.py:68] detokenization recv req id 8 cost time 0.11059737205505371 s -DEBUG 06-24 20:34:11 [manager.py:391] Prefill Batch: batch_id=241243108017874049579397564100711297476, time:1750768451.5885189s req_ids:[8] -DEBUG 06-24 20:34:11 [manager.py:391] -ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:211.10200881958008ms total_cost_time:211.12799644470215ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:13240 prompt_cache_len:5151 prompt_cache_ratio:0.38904833836858005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 -DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:11 [batch.py:51] router release req id 8 -INFO 06-24 20:34:11 [manager.py:224] router recive req id 8 cost time 0.10715413093566895 s -INFO 06-24 20:34:11 [manager.py:68] detokenization recv req id 8 cost time 0.10952877998352051 s -DEBUG 06-24 20:34:11 [manager.py:391] Prefill Batch: batch_id=291226933442746978209893671675267383062, time:1750768451.8247874s req_ids:[8] -DEBUG 06-24 20:34:11 [manager.py:391] -ERROR 06-24 20:34:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:232.99169540405273ms total_cost_time:233.03699493408203ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13241 prompt_cache_len:5151 prompt_cache_ratio:0.38901895627218486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 -DEBUG 06-24 20:34:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:11 [batch.py:51] router release req id 8 -INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.1090400218963623 s -INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.1109919548034668 s -DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=258512013668525142684105672972523398764, time:1750768452.0475252s req_ids:[8] -DEBUG 06-24 20:34:12 [manager.py:391] -ERROR 06-24 20:34:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:11 lightllm_req_id:8 first_token_cost:214.08438682556152ms total_cost_time:214.12897109985352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13242 prompt_cache_len:5151 prompt_cache_ratio:0.3889895786135025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 -DEBUG 06-24 20:34:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:12 [batch.py:51] router release req id 8 -INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.10788512229919434 s -INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.10981607437133789 s -DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=331799649987951931148121756007328793416, time:1750768452.2686408s req_ids:[8] -DEBUG 06-24 20:34:12 [manager.py:391] -ERROR 06-24 20:34:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 first_token_cost:214.22648429870605ms total_cost_time:214.28585052490234ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:13243 prompt_cache_len:5151 prompt_cache_ratio:0.3889602053915276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 -DEBUG 06-24 20:34:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:12 [batch.py:51] router release req id 8 -INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.10881447792053223 s -INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.11067748069763184 s -DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=186435294464627031251864794284638482634, time:1750768452.4872186s req_ids:[8] -DEBUG 06-24 20:34:12 [manager.py:391] -ERROR 06-24 20:34:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 first_token_cost:207.7932357788086ms total_cost_time:207.84378051757812ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:13244 prompt_cache_len:5151 prompt_cache_ratio:0.38893083660525524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 -DEBUG 06-24 20:34:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:12 [batch.py:51] router release req id 8 -INFO 06-24 20:34:12 [manager.py:224] router recive req id 8 cost time 0.3101236820220947 s -INFO 06-24 20:34:12 [manager.py:68] detokenization recv req id 8 cost time 0.31209659576416016 s -DEBUG 06-24 20:34:12 [manager.py:391] Prefill Batch: batch_id=325116147418699506179523504556102996398, time:1750768452.907753s req_ids:[8] -DEBUG 06-24 20:34:12 [manager.py:391] -ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:12 lightllm_req_id:8 first_token_cost:424.59964752197266ms total_cost_time:424.66020584106445ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:13245 prompt_cache_len:5151 prompt_cache_ratio:0.38890147225368066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 -DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:13 [batch.py:51] router release req id 8 -INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10810995101928711 s -INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.1100761890411377 s -DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=102989688166565910432069108566755052127, time:1750768453.132869s req_ids:[8] -DEBUG 06-24 20:34:13 [manager.py:391] -ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:212.4650478363037ms total_cost_time:212.5098705291748ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13246 prompt_cache_len:5151 prompt_cache_ratio:0.3888721123357995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 -DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:13 [batch.py:51] router release req id 8 -INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10818338394165039 s -INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.11011409759521484 s -DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=87399614504325270049581010387201610128, time:1750768453.3537476s req_ids:[8] -DEBUG 06-24 20:34:13 [manager.py:391] -ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:211.0154628753662ms total_cost_time:211.0600471496582ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13247 prompt_cache_len:5151 prompt_cache_ratio:0.3888427568506077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 -DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:13 [batch.py:51] router release req id 8 -INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10909247398376465 s -INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.111328125 s -DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=298000822806958595561918345595003366155, time:1750768453.5706632s req_ids:[8] -DEBUG 06-24 20:34:13 [manager.py:391] -ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:213.58299255371094ms total_cost_time:213.62924575805664ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13248 prompt_cache_len:5151 prompt_cache_ratio:0.38881340579710144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 -DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:13 [batch.py:51] router release req id 8 -INFO 06-24 20:34:13 [manager.py:224] router recive req id 8 cost time 0.10809588432312012 s -INFO 06-24 20:34:13 [manager.py:68] detokenization recv req id 8 cost time 0.11005043983459473 s -DEBUG 06-24 20:34:13 [manager.py:391] Prefill Batch: batch_id=188271118906736624936937767209271999810, time:1750768453.803829s req_ids:[8] -DEBUG 06-24 20:34:13 [manager.py:391] -ERROR 06-24 20:34:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:226.88913345336914ms total_cost_time:226.9432544708252ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:13249 prompt_cache_len:5151 prompt_cache_ratio:0.38878405917427733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 -DEBUG 06-24 20:34:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:13 [batch.py:51] router release req id 8 -INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.10753107070922852 s -INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.10941123962402344 s -DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=38450605633933793286702506480046898816, time:1750768454.024239s req_ids:[8] -DEBUG 06-24 20:34:14 [manager.py:391] -ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:13 lightllm_req_id:8 first_token_cost:215.07978439331055ms total_cost_time:215.12246131896973ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13250 prompt_cache_len:5151 prompt_cache_ratio:0.38875471698113206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 -DEBUG 06-24 20:34:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:14 [batch.py:51] router release req id 8 -INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.3100440502166748 s -INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.3120455741882324 s -DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=45791837934041344415675037762321999949, time:1750768454.459305s req_ids:[8] -DEBUG 06-24 20:34:14 [manager.py:391] -ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:433.8359832763672ms total_cost_time:433.8812828063965ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13251 prompt_cache_len:5151 prompt_cache_ratio:0.3887253792166629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 -DEBUG 06-24 20:34:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:14 [batch.py:51] router release req id 8 -INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.10785198211669922 s -INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.10980510711669922 s -DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=46677370298677177457368165466659186438, time:1750768454.6845593s req_ids:[8] -DEBUG 06-24 20:34:14 [manager.py:391] -ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:207.14998245239258ms total_cost_time:207.19504356384277ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13252 prompt_cache_len:5151 prompt_cache_ratio:0.38869604587986717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 -DEBUG 06-24 20:34:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:14 [batch.py:51] router release req id 8 -INFO 06-24 20:34:14 [manager.py:224] router recive req id 8 cost time 0.10792875289916992 s -INFO 06-24 20:34:14 [manager.py:68] detokenization recv req id 8 cost time 0.10988998413085938 s -DEBUG 06-24 20:34:14 [manager.py:391] Prefill Batch: batch_id=200130974983704445537610377830879420147, time:1750768454.8968728s req_ids:[8] -DEBUG 06-24 20:34:14 [manager.py:391] -ERROR 06-24 20:34:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:206.27522468566895ms total_cost_time:206.32171630859375ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13253 prompt_cache_len:5151 prompt_cache_ratio:0.3886667169697427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 -DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:15 [batch.py:51] router release req id 8 -INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.10873818397521973 s -INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.11063313484191895 s -DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=266975273867571730082195669270762003421, time:1750768455.1099677s req_ids:[8] -DEBUG 06-24 20:34:15 [manager.py:391] -ERROR 06-24 20:34:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:14 lightllm_req_id:8 first_token_cost:211.69185638427734ms total_cost_time:211.73548698425293ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13254 prompt_cache_len:5151 prompt_cache_ratio:0.38863739248528745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 -DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:15 [batch.py:51] router release req id 8 -INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.10834980010986328 s -INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.11027836799621582 s -DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=53590973655636142279206130679094312674, time:1750768455.3293796s req_ids:[8] -DEBUG 06-24 20:34:15 [manager.py:391] -ERROR 06-24 20:34:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 first_token_cost:210.13832092285156ms total_cost_time:210.18338203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13255 prompt_cache_len:5151 prompt_cache_ratio:0.38860807242549983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 -DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:15 [batch.py:51] router release req id 8 -INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.10983800888061523 s -INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.11179995536804199 s -DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=157358789157754720624812700704376536776, time:1750768455.544756s req_ids:[8] -DEBUG 06-24 20:34:15 [manager.py:391] -ERROR 06-24 20:34:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 first_token_cost:211.56668663024902ms total_cost_time:211.61198616027832ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13256 prompt_cache_len:5151 prompt_cache_ratio:0.3885787567893784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 -DEBUG 06-24 20:34:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:15 [batch.py:51] router release req id 8 -INFO 06-24 20:34:15 [manager.py:224] router recive req id 8 cost time 0.31084156036376953 s -INFO 06-24 20:34:15 [manager.py:68] detokenization recv req id 8 cost time 0.3128471374511719 s -DEBUG 06-24 20:34:15 [manager.py:391] Prefill Batch: batch_id=169510668736635198190751115919660442695, time:1750768455.9733105s req_ids:[8] -DEBUG 06-24 20:34:15 [manager.py:391] -ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:15 lightllm_req_id:8 first_token_cost:430.15313148498535ms total_cost_time:430.19819259643555ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13257 prompt_cache_len:5151 prompt_cache_ratio:0.38854944557592214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 -DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:16 [batch.py:51] router release req id 8 -INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.10911989212036133 s -INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.11116790771484375 s -DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=175001329038555485081906985828512312770, time:1750768456.1995966s req_ids:[8] -DEBUG 06-24 20:34:16 [manager.py:391] -ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:210.80780029296875ms total_cost_time:210.85143089294434ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13258 prompt_cache_len:5151 prompt_cache_ratio:0.38852013878413033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 -DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:16 [batch.py:51] router release req id 8 -INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.1090095043182373 s -INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.11104655265808105 s -DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=118485011655941444994415549347396537918, time:1750768456.4176998s req_ids:[8] -DEBUG 06-24 20:34:16 [manager.py:391] -ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:220.46613693237305ms total_cost_time:220.51072120666504ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13259 prompt_cache_len:5151 prompt_cache_ratio:0.38849083641300247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 -DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:16 [batch.py:51] router release req id 8 -INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.1087350845336914 s -INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.11065125465393066 s -DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=242171465186733280877401133016360032388, time:1750768456.647364s req_ids:[8] -DEBUG 06-24 20:34:16 [manager.py:391] -ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:215.43049812316895ms total_cost_time:215.47508239746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13260 prompt_cache_len:5151 prompt_cache_ratio:0.38846153846153847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 -DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:16 [batch.py:51] router release req id 8 -INFO 06-24 20:34:16 [manager.py:224] router recive req id 8 cost time 0.10754871368408203 s -INFO 06-24 20:34:16 [manager.py:68] detokenization recv req id 8 cost time 0.10941267013549805 s -DEBUG 06-24 20:34:16 [manager.py:391] Prefill Batch: batch_id=188760340566130875730933816874202791999, time:1750768456.8647878s req_ids:[8] -DEBUG 06-24 20:34:16 [manager.py:391] -ERROR 06-24 20:34:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:207.4289321899414ms total_cost_time:207.4735164642334ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13261 prompt_cache_len:5151 prompt_cache_ratio:0.3884322449287384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 -DEBUG 06-24 20:34:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:16 [batch.py:51] router release req id 8 -INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.10903120040893555 s -INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.11088013648986816 s -DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=145793925969835500185029508474103647973, time:1750768457.079805s req_ids:[8] -DEBUG 06-24 20:34:17 [manager.py:391] -INFO 06-24 20:34:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:34:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:16 lightllm_req_id:8 first_token_cost:211.95602416992188ms total_cost_time:211.99917793273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13262 prompt_cache_len:5151 prompt_cache_ratio:0.3884029558136028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 -DEBUG 06-24 20:34:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:17 [batch.py:51] router release req id 8 -INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.3107261657714844 s -INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.31269097328186035 s -DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=241579952253788886198529772088886353065, time:1750768457.5029957s req_ids:[8] -DEBUG 06-24 20:34:17 [manager.py:391] -ERROR 06-24 20:34:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 first_token_cost:424.26514625549316ms total_cost_time:424.30996894836426ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13263 prompt_cache_len:5151 prompt_cache_ratio:0.3883736711151323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 -DEBUG 06-24 20:34:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:17 [batch.py:51] router release req id 8 -INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.10784769058227539 s -INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.10973834991455078 s -DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=166332961069997316229563406366437195131, time:1750768457.7280095s req_ids:[8] -DEBUG 06-24 20:34:17 [manager.py:391] -ERROR 06-24 20:34:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 first_token_cost:210.9353542327881ms total_cost_time:210.9999656677246ms,out_token_counter:1 mean_per_token_cost_time: 0.06461143493652344ms prompt_token_num:13264 prompt_cache_len:5151 prompt_cache_ratio:0.3883443908323281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 -DEBUG 06-24 20:34:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:17 [batch.py:51] router release req id 8 -INFO 06-24 20:34:17 [manager.py:224] router recive req id 8 cost time 0.10676145553588867 s -INFO 06-24 20:34:17 [manager.py:68] detokenization recv req id 8 cost time 0.1086883544921875 s -DEBUG 06-24 20:34:17 [manager.py:391] Prefill Batch: batch_id=102123420714460829303732529876843887940, time:1750768457.9457557s req_ids:[8] -DEBUG 06-24 20:34:17 [manager.py:391] -ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:34:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 51202.947 tokens/s -DEBUG 06-24 20:34:18 [stats.py:37] Avg prompt tokens throughput: 51195.118 tokens/s -DEBUG 06-24 20:34:18 [stats.py:37] Avg generate tokens throughput: 7.829 tokens/s -INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:17 lightllm_req_id:8 first_token_cost:210.2370262145996ms total_cost_time:210.28375625610352ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13265 prompt_cache_len:5151 prompt_cache_ratio:0.38831511496419147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 -DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:18 [batch.py:51] router release req id 8 -INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.11093950271606445 s -INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.11307239532470703 s -DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=120535169825872346994386659053694923947, time:1750768458.161289s req_ids:[8] -DEBUG 06-24 20:34:18 [manager.py:391] -ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:209.0628147125244ms total_cost_time:209.12456512451172ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:13266 prompt_cache_len:5151 prompt_cache_ratio:0.3882858435097241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 -DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:18 [batch.py:51] router release req id 8 -INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.10839390754699707 s -INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.1104116439819336 s -DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=296371073254043934100068256935941573041, time:1750768458.3781745s req_ids:[8] -DEBUG 06-24 20:34:18 [manager.py:391] -ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:212.64886856079102ms total_cost_time:212.69464492797852ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13267 prompt_cache_len:5151 prompt_cache_ratio:0.38825657646792794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 -DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:18 [batch.py:51] router release req id 8 -INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.10893988609313965 s -INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.11086869239807129 s -DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=7239540229117023935426439683894355890, time:1750768458.608143s req_ids:[8] -DEBUG 06-24 20:34:18 [manager.py:391] -ERROR 06-24 20:34:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:223.78134727478027ms total_cost_time:223.82712364196777ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13268 prompt_cache_len:5151 prompt_cache_ratio:0.38822731383780523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 -DEBUG 06-24 20:34:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:18 [batch.py:51] router release req id 8 -INFO 06-24 20:34:18 [manager.py:224] router recive req id 8 cost time 0.10744237899780273 s -INFO 06-24 20:34:18 [manager.py:68] detokenization recv req id 8 cost time 0.10941386222839355 s -DEBUG 06-24 20:34:18 [manager.py:391] Prefill Batch: batch_id=125569581273605578876022099425595575554, time:1750768458.8286712s req_ids:[8] -DEBUG 06-24 20:34:18 [manager.py:391] -ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:18 lightllm_req_id:8 first_token_cost:372.3931312561035ms total_cost_time:372.4379539489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13269 prompt_cache_len:5151 prompt_cache_ratio:0.3881980556183586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 -DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:19 [batch.py:51] router release req id 8 -INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.10891866683959961 s -INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11082696914672852 s -DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=196853312733538741160878159512171671910, time:1750768459.2191699s req_ids:[8] -DEBUG 06-24 20:34:19 [manager.py:391] -ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:228.2869815826416ms total_cost_time:228.3318042755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13270 prompt_cache_len:5151 prompt_cache_ratio:0.3881688018085908 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 -DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:19 [batch.py:51] router release req id 8 -INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.1080317497253418 s -INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11072754859924316 s -DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=10713636262711775863304373661095809739, time:1750768459.4422355s req_ids:[8] -DEBUG 06-24 20:34:19 [manager.py:391] -ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:210.676908493042ms total_cost_time:210.7243537902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13271 prompt_cache_len:5151 prompt_cache_ratio:0.3881395524075051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 -DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:19 [batch.py:51] router release req id 8 -INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.10819315910339355 s -INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11048197746276855 s -DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=133526613819832065568593803641318707084, time:1750768459.6659849s req_ids:[8] -DEBUG 06-24 20:34:19 [manager.py:391] -ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:221.44055366516113ms total_cost_time:221.48418426513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13272 prompt_cache_len:5151 prompt_cache_ratio:0.3881103074141049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 -DEBUG 06-24 20:34:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:19 [batch.py:51] router release req id 8 -INFO 06-24 20:34:19 [manager.py:224] router recive req id 8 cost time 0.10815167427062988 s -INFO 06-24 20:34:19 [manager.py:68] detokenization recv req id 8 cost time 0.11011028289794922 s -DEBUG 06-24 20:34:19 [manager.py:391] Prefill Batch: batch_id=205076182973560144562632162393950572772, time:1750768459.8869336s req_ids:[8] -DEBUG 06-24 20:34:19 [manager.py:391] -ERROR 06-24 20:34:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:212.7854824066162ms total_cost_time:212.8317356109619ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13273 prompt_cache_len:5151 prompt_cache_ratio:0.38808106682739396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 -DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:20 [batch.py:51] router release req id 8 -INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.1091468334197998 s -INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11108231544494629 s -DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=90624290049962525220087986788150239551, time:1750768460.1060183s req_ids:[8] -DEBUG 06-24 20:34:20 [manager.py:391] -DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:19 lightllm_req_id:8 first_token_cost:212.65006065368652ms total_cost_time:212.69488334655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13274 prompt_cache_len:5151 prompt_cache_ratio:0.3880518306463764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 -DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:20 [batch.py:51] router release req id 8 -INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.10874724388122559 s -INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11066675186157227 s -DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=254809822294636208917850740163936537849, time:1750768460.3248165s req_ids:[8] -DEBUG 06-24 20:34:20 [manager.py:391] -ERROR 06-24 20:34:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 first_token_cost:366.92309379577637ms total_cost_time:366.9748306274414ms,out_token_counter:1 mean_per_token_cost_time: 0.05173683166503906ms prompt_token_num:13275 prompt_cache_len:5151 prompt_cache_ratio:0.3880225988700565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 -DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:20 [batch.py:51] router release req id 8 -INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.10824012756347656 s -INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11012935638427734 s -DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=148740270807343105488793218786194300632, time:1750768460.6976948s req_ids:[8] -DEBUG 06-24 20:34:20 [manager.py:391] -ERROR 06-24 20:34:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 first_token_cost:206.3126564025879ms total_cost_time:206.35628700256348ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13276 prompt_cache_len:5151 prompt_cache_ratio:0.387993371497439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 -DEBUG 06-24 20:34:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:20 [batch.py:51] router release req id 8 -INFO 06-24 20:34:20 [manager.py:224] router recive req id 8 cost time 0.10894346237182617 s -INFO 06-24 20:34:20 [manager.py:68] detokenization recv req id 8 cost time 0.11095261573791504 s -DEBUG 06-24 20:34:20 [manager.py:391] Prefill Batch: batch_id=8402934823824448396517828941532933856, time:1750768460.9115648s req_ids:[8] -DEBUG 06-24 20:34:20 [manager.py:391] -ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:20 lightllm_req_id:8 first_token_cost:210.70265769958496ms total_cost_time:210.74676513671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13277 prompt_cache_len:5151 prompt_cache_ratio:0.3879641485275288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 -DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:21 [batch.py:51] router release req id 8 -INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.10902857780456543 s -INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.11096787452697754 s -DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=273990623829264578791656879150909851621, time:1750768461.1263576s req_ids:[8] -DEBUG 06-24 20:34:21 [manager.py:391] -ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:205.3050994873047ms total_cost_time:205.34944534301758ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13278 prompt_cache_len:5151 prompt_cache_ratio:0.3879349299593312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 -DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:21 [batch.py:51] router release req id 8 -INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.10933852195739746 s -INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.11122727394104004 s -DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=65815710773719609410980035895899554937, time:1750768461.3397424s req_ids:[8] -DEBUG 06-24 20:34:21 [manager.py:391] -ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:212.06092834472656ms total_cost_time:212.10598945617676ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13279 prompt_cache_len:5151 prompt_cache_ratio:0.3879057157918518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 -DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:21 [batch.py:51] router release req id 8 -INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.10789322853088379 s -INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.1092844009399414 s -DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=148550624891387866998124830538178345052, time:1750768461.5679934s req_ids:[8] -DEBUG 06-24 20:34:21 [manager.py:391] -ERROR 06-24 20:34:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:225.25668144226074ms total_cost_time:225.30078887939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13280 prompt_cache_len:5151 prompt_cache_ratio:0.3878765060240964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 -DEBUG 06-24 20:34:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:21 [batch.py:51] router release req id 8 -INFO 06-24 20:34:21 [manager.py:224] router recive req id 8 cost time 0.1089162826538086 s -INFO 06-24 20:34:21 [manager.py:68] detokenization recv req id 8 cost time 0.11092138290405273 s -DEBUG 06-24 20:34:21 [manager.py:391] Prefill Batch: batch_id=16388523261150798952872751257320914812, time:1750768461.788956s req_ids:[8] -DEBUG 06-24 20:34:21 [manager.py:391] -ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:21 lightllm_req_id:8 first_token_cost:375.5645751953125ms total_cost_time:375.6096363067627ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13281 prompt_cache_len:5151 prompt_cache_ratio:0.38784730065507117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 -DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:22 [batch.py:51] router release req id 8 -INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.10930538177490234 s -INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.11125707626342773 s -DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=173021871920425771490938210620239080416, time:1750768462.1718419s req_ids:[8] -DEBUG 06-24 20:34:22 [manager.py:391] -ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:215.46602249145508ms total_cost_time:215.51966667175293ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:13282 prompt_cache_len:5151 prompt_cache_ratio:0.38781809968378256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 -DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:22 [batch.py:51] router release req id 8 -INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.10857224464416504 s -INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.1105048656463623 s -DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=156730573729433535155034663631565733010, time:1750768462.3928545s req_ids:[8] -DEBUG 06-24 20:34:22 [manager.py:391] -ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:215.64531326293945ms total_cost_time:215.68918228149414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13283 prompt_cache_len:5151 prompt_cache_ratio:0.38778890310923736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 -DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:22 [batch.py:51] router release req id 8 -INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.1085507869720459 s -INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.11040997505187988 s -DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=292067452700095304807127191607224196640, time:1750768462.6133761s req_ids:[8] -DEBUG 06-24 20:34:22 [manager.py:391] -ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:215.00587463378906ms total_cost_time:215.05093574523926ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13284 prompt_cache_len:5151 prompt_cache_ratio:0.38775971093044265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 -DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:22 [batch.py:51] router release req id 8 -INFO 06-24 20:34:22 [manager.py:224] router recive req id 8 cost time 0.10807418823242188 s -INFO 06-24 20:34:22 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s -DEBUG 06-24 20:34:22 [manager.py:391] Prefill Batch: batch_id=71670645124085006351587599963223941469, time:1750768462.8327165s req_ids:[8] -DEBUG 06-24 20:34:22 [manager.py:391] -ERROR 06-24 20:34:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:211.34257316589355ms total_cost_time:211.38644218444824ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13285 prompt_cache_len:5151 prompt_cache_ratio:0.3877305231464057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 -DEBUG 06-24 20:34:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:22 [batch.py:51] router release req id 8 -INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.10962367057800293 s -INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.11159825325012207 s -DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=72111479130366341188840038903881089170, time:1750768463.0500581s req_ids:[8] -DEBUG 06-24 20:34:23 [manager.py:391] -ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:22 lightllm_req_id:8 first_token_cost:212.61048316955566ms total_cost_time:212.65602111816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13286 prompt_cache_len:5151 prompt_cache_ratio:0.3877013397561343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 -DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:23 [batch.py:51] router release req id 8 -INFO 06-24 20:34:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:34:23 [statics_utils.py:24] mean first cost: 230.58410481556828 ms -INFO 06-24 20:34:23 [statics_utils.py:24] mean per token cost: 0.05991084329516538 ms -INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.1098489761352539 s -INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.11172628402709961 s -DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=233545567565794336495777260557776101137, time:1750768463.2685475s req_ids:[8] -DEBUG 06-24 20:34:23 [manager.py:391] -ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:378.49950790405273ms total_cost_time:378.5429000854492ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13287 prompt_cache_len:5151 prompt_cache_ratio:0.38767216075863625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 -DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:23 [batch.py:51] router release req id 8 -INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.1085810661315918 s -INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.11051774024963379 s -DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=31955860154495250317025861510384600977, time:1750768463.654397s req_ids:[8] -DEBUG 06-24 20:34:23 [manager.py:391] -ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:213.3502960205078ms total_cost_time:213.3934497833252ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13288 prompt_cache_len:5151 prompt_cache_ratio:0.3876429861529199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 -DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:23 [batch.py:51] router release req id 8 -INFO 06-24 20:34:23 [manager.py:224] router recive req id 8 cost time 0.10969710350036621 s -INFO 06-24 20:34:23 [manager.py:68] detokenization recv req id 8 cost time 0.1124885082244873 s -DEBUG 06-24 20:34:23 [manager.py:391] Prefill Batch: batch_id=312961115159564624565410046022961117838, time:1750768463.8757033s req_ids:[8] -DEBUG 06-24 20:34:23 [manager.py:391] -ERROR 06-24 20:34:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:177.00576782226562ms total_cost_time:177.04129219055176ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:13289 prompt_cache_len:5151 prompt_cache_ratio:0.3876138159379938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 -DEBUG 06-24 20:34:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:23 [batch.py:51] router release req id 8 -INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.10633683204650879 s -INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.10828614234924316 s -DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=169023053375662135927991861485762240341, time:1750768464.059542s req_ids:[8] -DEBUG 06-24 20:34:24 [manager.py:391] -ERROR 06-24 20:34:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:23 lightllm_req_id:8 first_token_cost:205.59954643249512ms total_cost_time:205.66177368164062ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:13290 prompt_cache_len:5151 prompt_cache_ratio:0.3875846501128668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 -DEBUG 06-24 20:34:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:24 [batch.py:51] router release req id 8 -INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.10880851745605469 s -INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.11065125465393066 s -DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=250644729775404607827618610388061703915, time:1750768464.2764766s req_ids:[8] -DEBUG 06-24 20:34:24 [manager.py:391] -ERROR 06-24 20:34:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 first_token_cost:217.14234352111816ms total_cost_time:217.20337867736816ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:13291 prompt_cache_len:5151 prompt_cache_ratio:0.38755548867654804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 -DEBUG 06-24 20:34:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:24 [batch.py:51] router release req id 8 -INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.10802507400512695 s -INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.11016964912414551 s -DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=84179603738064476711100331714633263803, time:1750768464.4989555s req_ids:[8] -DEBUG 06-24 20:34:24 [manager.py:391] -ERROR 06-24 20:34:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 first_token_cost:228.07812690734863ms total_cost_time:228.12271118164062ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13292 prompt_cache_len:5151 prompt_cache_ratio:0.38752633162804695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 -DEBUG 06-24 20:34:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:24 [batch.py:51] router release req id 8 -DEBUG 06-24 20:34:24 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:24 [manager.py:283] -DEBUG 06-24 20:34:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:24 [manager.py:284] -INFO 06-24 20:34:24 [manager.py:224] router recive req id 8 cost time 0.11007046699523926 s -INFO 06-24 20:34:24 [manager.py:68] detokenization recv req id 8 cost time 0.1115565299987793 s -DEBUG 06-24 20:34:24 [manager.py:391] Prefill Batch: batch_id=307325463537186754980394037393650943084, time:1750768464.7592304s req_ids:[8] -DEBUG 06-24 20:34:24 [manager.py:391] -ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:24 lightllm_req_id:8 first_token_cost:466.34769439697266ms total_cost_time:466.4037227630615ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:13293 prompt_cache_len:5151 prompt_cache_ratio:0.38749717896637326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 -DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:25 [batch.py:51] router release req id 8 -INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.10772943496704102 s -INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.10861945152282715 s -DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=154819498831080952472993432894442586475, time:1750768465.2060678s req_ids:[8] -DEBUG 06-24 20:34:25 [manager.py:391] -ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:170.42112350463867ms total_cost_time:170.46427726745605ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13294 prompt_cache_len:5151 prompt_cache_ratio:0.3874680306905371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 -DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:25 [batch.py:51] router release req id 8 -INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.1110997200012207 s -INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.11200904846191406 s -DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=18683441701285720761114593535369755846, time:1750768465.3891943s req_ids:[8] -DEBUG 06-24 20:34:25 [manager.py:391] -ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:202.44240760803223ms total_cost_time:202.48794555664062ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13295 prompt_cache_len:5151 prompt_cache_ratio:0.3874388867995487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 -DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:25 [batch.py:51] router release req id 8 -INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.11037397384643555 s -INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.11228799819946289 s -DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=210340228890201960354285036983104527716, time:1750768465.5955987s req_ids:[8] -DEBUG 06-24 20:34:25 [manager.py:391] -ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:203.40228080749512ms total_cost_time:203.44805717468262ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13296 prompt_cache_len:5151 prompt_cache_ratio:0.3874097472924188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 -DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:25 [batch.py:51] router release req id 8 -INFO 06-24 20:34:25 [manager.py:224] router recive req id 8 cost time 0.10828280448913574 s -INFO 06-24 20:34:25 [manager.py:68] detokenization recv req id 8 cost time 0.10965561866760254 s -DEBUG 06-24 20:34:25 [manager.py:391] Prefill Batch: batch_id=88391913485953548656751923497091166671, time:1750768465.8004942s req_ids:[8] -DEBUG 06-24 20:34:25 [manager.py:391] -ERROR 06-24 20:34:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:214.75958824157715ms total_cost_time:214.80464935302734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13297 prompt_cache_len:5151 prompt_cache_ratio:0.3873806121681582 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 -DEBUG 06-24 20:34:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:25 [batch.py:51] router release req id 8 -INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.10849332809448242 s -INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.11034369468688965 s -DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=6474796628223588193311580772322194550, time:1750768466.0297804s req_ids:[8] -DEBUG 06-24 20:34:26 [manager.py:391] -ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:25 lightllm_req_id:8 first_token_cost:217.6821231842041ms total_cost_time:217.726469039917ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13298 prompt_cache_len:5151 prompt_cache_ratio:0.3873514814257783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 -DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:26 [batch.py:51] router release req id 8 -INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.10782217979431152 s -INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.10870933532714844 s -DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=180004943271904922887973549141665397739, time:1750768466.2453258s req_ids:[8] -DEBUG 06-24 20:34:26 [manager.py:391] -DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:392.26317405700684ms total_cost_time:392.3068046569824ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13299 prompt_cache_len:5151 prompt_cache_ratio:0.3873223550642905 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 -DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:26 [batch.py:51] router release req id 8 -INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.11045694351196289 s -INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.11250662803649902 s -DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=167191504617354064838701441169576887811, time:1750768466.6451428s req_ids:[8] -DEBUG 06-24 20:34:26 [manager.py:391] -ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:170.2871322631836ms total_cost_time:170.32980918884277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13300 prompt_cache_len:5151 prompt_cache_ratio:0.38729323308270674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 -DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:26 [batch.py:51] router release req id 8 -INFO 06-24 20:34:26 [manager.py:224] router recive req id 8 cost time 0.10656547546386719 s -INFO 06-24 20:34:26 [manager.py:68] detokenization recv req id 8 cost time 0.10752606391906738 s -DEBUG 06-24 20:34:26 [manager.py:391] Prefill Batch: batch_id=4150418313255882979906727803797572546, time:1750768466.822096s req_ids:[8] -DEBUG 06-24 20:34:26 [manager.py:391] -ERROR 06-24 20:34:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:190.5677318572998ms total_cost_time:190.6120777130127ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13301 prompt_cache_len:5151 prompt_cache_ratio:0.3872641154800391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 -DEBUG 06-24 20:34:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:26 [batch.py:51] router release req id 8 -INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s -INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.10955810546875 s -DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=25226106773223706676151593907397903370, time:1750768467.0179162s req_ids:[8] -DEBUG 06-24 20:34:27 [manager.py:391] -ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:26 lightllm_req_id:8 first_token_cost:206.2997817993164ms total_cost_time:206.3460350036621ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13302 prompt_cache_len:5151 prompt_cache_ratio:0.38723500225529994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 -DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:27 [batch.py:51] router release req id 8 -INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.10795140266418457 s -INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.10997819900512695 s -DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=139288533445653198763325090297469919582, time:1750768467.2319744s req_ids:[8] -DEBUG 06-24 20:34:27 [manager.py:391] -ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:208.1735134124756ms total_cost_time:208.2200050354004ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13303 prompt_cache_len:5151 prompt_cache_ratio:0.38720589340750206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 -DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:27 [batch.py:51] router release req id 8 -INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.10751819610595703 s -INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.10956001281738281 s -DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=207710418998591923813689091108603901098, time:1750768467.4570308s req_ids:[8] -DEBUG 06-24 20:34:27 [manager.py:391] -ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:221.21429443359375ms total_cost_time:221.25506401062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:13304 prompt_cache_len:5151 prompt_cache_ratio:0.38717678893565843 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 -DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:27 [batch.py:51] router release req id 8 -INFO 06-24 20:34:27 [manager.py:224] router recive req id 8 cost time 0.1084890365600586 s -INFO 06-24 20:34:27 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s -DEBUG 06-24 20:34:27 [manager.py:391] Prefill Batch: batch_id=230406849562299603737191674267257501595, time:1750768467.6888814s req_ids:[8] -DEBUG 06-24 20:34:27 [manager.py:391] -ERROR 06-24 20:34:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:401.7808437347412ms total_cost_time:401.824951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13305 prompt_cache_len:5151 prompt_cache_ratio:0.3871476888387824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 -DEBUG 06-24 20:34:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:27 [batch.py:51] router release req id 8 -INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s -INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.1098177433013916 s -DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=260875002897118381222453838399087032810, time:1750768468.086295s req_ids:[8] -DEBUG 06-24 20:34:28 [manager.py:391] -DEBUG 06-24 20:34:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 54222.163 tokens/s -DEBUG 06-24 20:34:28 [stats.py:37] Avg prompt tokens throughput: 54214.101 tokens/s -DEBUG 06-24 20:34:28 [stats.py:37] Avg generate tokens throughput: 8.062 tokens/s -ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:27 lightllm_req_id:8 first_token_cost:202.13603973388672ms total_cost_time:202.1796703338623ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13306 prompt_cache_len:5151 prompt_cache_ratio:0.3871185931158876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 -DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:28 [batch.py:51] router release req id 8 -INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.11082935333251953 s -INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.11268901824951172 s -DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=120413521308991042957430262681675150833, time:1750768468.296189s req_ids:[8] -DEBUG 06-24 20:34:28 [manager.py:391] -ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:209.54084396362305ms total_cost_time:209.58662033081055ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13307 prompt_cache_len:5151 prompt_cache_ratio:0.38708950176598783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 -DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:28 [batch.py:51] router release req id 8 -INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.10729336738586426 s -INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.10915064811706543 s -DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=52546145651560352482289702033017539710, time:1750768468.5087833s req_ids:[8] -DEBUG 06-24 20:34:28 [manager.py:391] -ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:193.64643096923828ms total_cost_time:193.69244575500488ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13308 prompt_cache_len:5151 prompt_cache_ratio:0.3870604147880974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 -DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:28 [batch.py:51] router release req id 8 -INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.1075582504272461 s -INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.1094818115234375 s -DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=297250776709606182053706544093270445017, time:1750768468.7184796s req_ids:[8] -DEBUG 06-24 20:34:28 [manager.py:391] -ERROR 06-24 20:34:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:217.38553047180176ms total_cost_time:217.42868423461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13309 prompt_cache_len:5151 prompt_cache_ratio:0.38703133218123076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 -DEBUG 06-24 20:34:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:28 [batch.py:51] router release req id 8 -INFO 06-24 20:34:28 [manager.py:224] router recive req id 8 cost time 0.10634875297546387 s -INFO 06-24 20:34:28 [manager.py:68] detokenization recv req id 8 cost time 0.10812902450561523 s -DEBUG 06-24 20:34:28 [manager.py:391] Prefill Batch: batch_id=76914420651271827177910290569348758417, time:1750768468.9326334s req_ids:[8] -DEBUG 06-24 20:34:28 [manager.py:391] -ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:28 lightllm_req_id:8 first_token_cost:206.09474182128906ms total_cost_time:206.13765716552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13310 prompt_cache_len:5151 prompt_cache_ratio:0.3870022539444027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 -DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:29 [batch.py:51] router release req id 8 -INFO 06-24 20:34:29 [manager.py:224] router recive req id 8 cost time 0.10902857780456543 s -INFO 06-24 20:34:29 [manager.py:68] detokenization recv req id 8 cost time 0.11095213890075684 s -DEBUG 06-24 20:34:29 [manager.py:391] Prefill Batch: batch_id=132176110688790226427969603473242056863, time:1750768469.1445546s req_ids:[8] -DEBUG 06-24 20:34:29 [manager.py:391] -ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:214.30706977844238ms total_cost_time:214.3728733062744ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:13311 prompt_cache_len:5151 prompt_cache_ratio:0.38697318007662834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 -DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:29 [batch.py:51] router release req id 8 -INFO 06-24 20:34:29 [manager.py:224] router recive req id 8 cost time 0.3097703456878662 s -INFO 06-24 20:34:29 [manager.py:68] detokenization recv req id 8 cost time 0.3117096424102783 s -DEBUG 06-24 20:34:29 [manager.py:391] Prefill Batch: batch_id=64468840685805664302130085826344435979, time:1750768469.5722933s req_ids:[8] -DEBUG 06-24 20:34:29 [manager.py:391] -ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:433.2125186920166ms total_cost_time:433.2578182220459ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13312 prompt_cache_len:5151 prompt_cache_ratio:0.3869441105769231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 -DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:29 [batch.py:51] router release req id 8 -INFO 06-24 20:34:29 [manager.py:224] router recive req id 8 cost time 0.11083197593688965 s -INFO 06-24 20:34:29 [manager.py:68] detokenization recv req id 8 cost time 0.1127469539642334 s -DEBUG 06-24 20:34:29 [manager.py:391] Prefill Batch: batch_id=222115773302504691190469052684444292411, time:1750768469.8054507s req_ids:[8] -DEBUG 06-24 20:34:29 [manager.py:391] -ERROR 06-24 20:34:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:217.5467014312744ms total_cost_time:217.59033203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13313 prompt_cache_len:5151 prompt_cache_ratio:0.38691504544430255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 -DEBUG 06-24 20:34:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:29 [batch.py:51] router release req id 8 -INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.10777163505554199 s -INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.10969662666320801 s -DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=78762296779658526208716982026603417232, time:1750768470.0239766s req_ids:[8] -DEBUG 06-24 20:34:30 [manager.py:391] -ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:29 lightllm_req_id:8 first_token_cost:203.28879356384277ms total_cost_time:203.33218574523926ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13314 prompt_cache_len:5151 prompt_cache_ratio:0.3868859846777828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 -DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:30 [batch.py:51] router release req id 8 -INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.10865211486816406 s -INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.1106569766998291 s -DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=233376247430462949683822568687145730807, time:1750768470.2322357s req_ids:[8] -DEBUG 06-24 20:34:30 [manager.py:391] -ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:208.40954780578613ms total_cost_time:208.45532417297363ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13315 prompt_cache_len:5151 prompt_cache_ratio:0.38685692827638 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 -DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:30 [batch.py:51] router release req id 8 -INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.10815000534057617 s -INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.11006021499633789 s -DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=212818461445103077620678443288420431259, time:1750768470.4447634s req_ids:[8] -DEBUG 06-24 20:34:30 [manager.py:391] -ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:212.62216567993164ms total_cost_time:212.68439292907715ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:13316 prompt_cache_len:5151 prompt_cache_ratio:0.38682787623911086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 -DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:30 [batch.py:51] router release req id 8 -INFO 06-24 20:34:30 [manager.py:224] router recive req id 8 cost time 0.1094675064086914 s -INFO 06-24 20:34:30 [manager.py:68] detokenization recv req id 8 cost time 0.11139535903930664 s -DEBUG 06-24 20:34:30 [manager.py:391] Prefill Batch: batch_id=112955103886491966992945340336954814518, time:1750768470.6648803s req_ids:[8] -DEBUG 06-24 20:34:30 [manager.py:391] -ERROR 06-24 20:34:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:216.07041358947754ms total_cost_time:216.10426902770996ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:13317 prompt_cache_len:5151 prompt_cache_ratio:0.3867988285649921 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 -DEBUG 06-24 20:34:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:30 [batch.py:51] router release req id 8 -INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.3077504634857178 s -INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.3094522953033447 s -DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=177362951330501730493435488673352698706, time:1750768471.1005833s req_ids:[8] -DEBUG 06-24 20:34:31 [manager.py:391] -ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:30 lightllm_req_id:8 first_token_cost:431.23769760131836ms total_cost_time:431.38623237609863ms,out_token_counter:1 mean_per_token_cost_time: 0.14853477478027344ms prompt_token_num:13318 prompt_cache_len:5151 prompt_cache_ratio:0.386769785253041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 -DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:31 [batch.py:51] router release req id 8 -INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10986042022705078 s -INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11163091659545898 s -DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=186273207995361043360982349395967023736, time:1750768471.322489s req_ids:[8] -DEBUG 06-24 20:34:31 [manager.py:391] -ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:199.84865188598633ms total_cost_time:199.89347457885742ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13319 prompt_cache_len:5151 prompt_cache_ratio:0.38674074630227495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 -DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:31 [batch.py:51] router release req id 8 -INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10897588729858398 s -INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11088418960571289 s -DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=287692765397454993533507600837591810277, time:1750768471.530686s req_ids:[8] -DEBUG 06-24 20:34:31 [manager.py:391] -ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:210.85476875305176ms total_cost_time:210.90292930603027ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:13320 prompt_cache_len:5151 prompt_cache_ratio:0.3867117117117117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 -DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:31 [batch.py:51] router release req id 8 -INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10919833183288574 s -INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11117029190063477 s -DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=313841114746951404400302209799346825357, time:1750768471.7492657s req_ids:[8] -DEBUG 06-24 20:34:31 [manager.py:391] -ERROR 06-24 20:34:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:211.1496925354004ms total_cost_time:211.1952304840088ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13321 prompt_cache_len:5151 prompt_cache_ratio:0.38668268148036933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 -DEBUG 06-24 20:34:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:31 [batch.py:51] router release req id 8 -INFO 06-24 20:34:31 [manager.py:224] router recive req id 8 cost time 0.10892057418823242 s -INFO 06-24 20:34:31 [manager.py:68] detokenization recv req id 8 cost time 0.11085915565490723 s -DEBUG 06-24 20:34:31 [manager.py:391] Prefill Batch: batch_id=83191059799593439009547041928459918398, time:1750768471.9677963s req_ids:[8] -DEBUG 06-24 20:34:31 [manager.py:391] -ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:31 lightllm_req_id:8 first_token_cost:214.7378921508789ms total_cost_time:214.77007865905762ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:13322 prompt_cache_len:5151 prompt_cache_ratio:0.38665365560726617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 -DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:32 [batch.py:51] router release req id 8 -INFO 06-24 20:34:32 [manager.py:224] router recive req id 8 cost time 0.10489034652709961 s -INFO 06-24 20:34:32 [manager.py:68] detokenization recv req id 8 cost time 0.10675954818725586 s -DEBUG 06-24 20:34:32 [manager.py:391] Prefill Batch: batch_id=38674720550244322052289308377233573602, time:1750768472.1901772s req_ids:[8] -DEBUG 06-24 20:34:32 [manager.py:391] -ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:173.48241806030273ms total_cost_time:173.50172996520996ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13323 prompt_cache_len:5151 prompt_cache_ratio:0.38662463409142084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 -DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:32 [batch.py:51] router release req id 8 -INFO 06-24 20:34:32 [manager.py:224] router recive req id 8 cost time 0.30788373947143555 s -INFO 06-24 20:34:32 [manager.py:68] detokenization recv req id 8 cost time 0.30947184562683105 s -DEBUG 06-24 20:34:32 [manager.py:391] Prefill Batch: batch_id=283626753352866107258766744765601226469, time:1750768472.5827947s req_ids:[8] -DEBUG 06-24 20:34:32 [manager.py:391] -ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:438.14921379089355ms total_cost_time:438.19475173950195ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13324 prompt_cache_len:5151 prompt_cache_ratio:0.3865956169318523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 -DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:32 [batch.py:51] router release req id 8 -INFO 06-24 20:34:32 [manager.py:224] router recive req id 8 cost time 0.10838723182678223 s -INFO 06-24 20:34:32 [manager.py:68] detokenization recv req id 8 cost time 0.11035299301147461 s -DEBUG 06-24 20:34:32 [manager.py:391] Prefill Batch: batch_id=31066098368209637903489980868916124349, time:1750768472.8097446s req_ids:[8] -DEBUG 06-24 20:34:32 [manager.py:391] -ERROR 06-24 20:34:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:215.24715423583984ms total_cost_time:215.2884006500244ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13325 prompt_cache_len:5151 prompt_cache_ratio:0.38656660412757976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 -DEBUG 06-24 20:34:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:32 [batch.py:51] router release req id 8 -INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.10785222053527832 s -INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.10981392860412598 s -DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=57963396626907316933452558786406332710, time:1750768473.031462s req_ids:[8] -DEBUG 06-24 20:34:33 [manager.py:391] -ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:32 lightllm_req_id:8 first_token_cost:213.93418312072754ms total_cost_time:213.97924423217773ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13326 prompt_cache_len:5151 prompt_cache_ratio:0.3865375956776227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 -DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:33 [batch.py:51] router release req id 8 -INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.1063077449798584 s -INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.1081855297088623 s -DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=311529501303489006848779245895283417599, time:1750768473.2546487s req_ids:[8] -DEBUG 06-24 20:34:33 [manager.py:391] -ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:214.2324447631836ms total_cost_time:214.25223350524902ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:13327 prompt_cache_len:5151 prompt_cache_ratio:0.386508591581001 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 -DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:33 [batch.py:51] router release req id 8 -INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.10829329490661621 s -INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.11018824577331543 s -DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=1200261701411205359864179422412459161, time:1750768473.4720697s req_ids:[8] -DEBUG 06-24 20:34:33 [manager.py:391] -ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:215.39926528930664ms total_cost_time:215.44218063354492ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13328 prompt_cache_len:5151 prompt_cache_ratio:0.3864795918367347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 -DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:33 [batch.py:51] router release req id 8 -INFO 06-24 20:34:33 [manager.py:224] router recive req id 8 cost time 0.10775971412658691 s -INFO 06-24 20:34:33 [manager.py:68] detokenization recv req id 8 cost time 0.10983586311340332 s -DEBUG 06-24 20:34:33 [manager.py:391] Prefill Batch: batch_id=4855611440105863291696627139317182026, time:1750768473.692509s req_ids:[8] -DEBUG 06-24 20:34:33 [manager.py:391] -ERROR 06-24 20:34:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:212.9683494567871ms total_cost_time:213.0146026611328ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13329 prompt_cache_len:5151 prompt_cache_ratio:0.3864505964438443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 -DEBUG 06-24 20:34:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:33 [batch.py:51] router release req id 8 -INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.3080439567565918 s -INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.3098316192626953 s -DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=226370342169505831002512095249254170742, time:1750768474.1154816s req_ids:[8] -DEBUG 06-24 20:34:34 [manager.py:391] -ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:33 lightllm_req_id:8 first_token_cost:374.1021156311035ms total_cost_time:374.12476539611816ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:13330 prompt_cache_len:5151 prompt_cache_ratio:0.38642160540135034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 -DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:34 [batch.py:51] router release req id 8 -INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.10896158218383789 s -INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.11100649833679199 s -DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=85666528927549737683625190018385706141, time:1750768474.2920663s req_ids:[8] -DEBUG 06-24 20:34:34 [manager.py:391] -ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:205.4119110107422ms total_cost_time:205.45530319213867ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13331 prompt_cache_len:5151 prompt_cache_ratio:0.38639261870827396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 -DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:34 [batch.py:51] router release req id 8 -INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.10778999328613281 s -INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.10974359512329102 s -DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=245765613459432858090719253337145659381, time:1750768474.5030286s req_ids:[8] -DEBUG 06-24 20:34:34 [manager.py:391] -ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:208.76669883728027ms total_cost_time:208.81104469299316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13332 prompt_cache_len:5151 prompt_cache_ratio:0.38636363636363635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 -DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:34 [batch.py:51] router release req id 8 -INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.10883831977844238 s -INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.11078476905822754 s -DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=221270784773791838415818188738343882422, time:1750768474.716619s req_ids:[8] -DEBUG 06-24 20:34:34 [manager.py:391] -ERROR 06-24 20:34:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:211.15350723266602ms total_cost_time:211.20643615722656ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:13333 prompt_cache_len:5151 prompt_cache_ratio:0.38633465836645914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 -DEBUG 06-24 20:34:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:34 [batch.py:51] router release req id 8 -INFO 06-24 20:34:34 [manager.py:224] router recive req id 8 cost time 0.11041569709777832 s -INFO 06-24 20:34:34 [manager.py:68] detokenization recv req id 8 cost time 0.11249208450317383 s -DEBUG 06-24 20:34:34 [manager.py:391] Prefill Batch: batch_id=108524506537588202442719109552500531368, time:1750768474.9363327s req_ids:[8] -DEBUG 06-24 20:34:34 [manager.py:391] -ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:34 lightllm_req_id:8 first_token_cost:209.17606353759766ms total_cost_time:209.22088623046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13334 prompt_cache_len:5151 prompt_cache_ratio:0.3863056847157642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 -DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:35 [batch.py:51] router release req id 8 -INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.10832047462463379 s -INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.11034154891967773 s -DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=253566143585684281961561025791925358961, time:1750768475.151598s req_ids:[8] -DEBUG 06-24 20:34:35 [manager.py:391] -ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:208.65988731384277ms total_cost_time:208.70494842529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13335 prompt_cache_len:5151 prompt_cache_ratio:0.3862767154105737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 -DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:35 [batch.py:51] router release req id 8 -INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.309093713760376 s -INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.3108856678009033 s -DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=217693437517916325722276134760620006766, time:1750768475.573553s req_ids:[8] -DEBUG 06-24 20:34:35 [manager.py:391] -ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:378.77559661865234ms total_cost_time:378.8173198699951ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13336 prompt_cache_len:5151 prompt_cache_ratio:0.38624775044991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 -DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:35 [batch.py:51] router release req id 8 -INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.11042451858520508 s -INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.11240100860595703 s -DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=189101816073897246250944532616209864485, time:1750768475.7570446s req_ids:[8] -DEBUG 06-24 20:34:35 [manager.py:391] -ERROR 06-24 20:34:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:215.70110321044922ms total_cost_time:215.7435417175293ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13337 prompt_cache_len:5151 prompt_cache_ratio:0.386218789832796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 -DEBUG 06-24 20:34:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:35 [batch.py:51] router release req id 8 -INFO 06-24 20:34:35 [manager.py:224] router recive req id 8 cost time 0.10810685157775879 s -INFO 06-24 20:34:35 [manager.py:68] detokenization recv req id 8 cost time 0.1101071834564209 s -DEBUG 06-24 20:34:35 [manager.py:391] Prefill Batch: batch_id=115903230706259062438546230178326448578, time:1750768475.9755785s req_ids:[8] -DEBUG 06-24 20:34:35 [manager.py:391] -ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:35 lightllm_req_id:8 first_token_cost:215.70968627929688ms total_cost_time:215.75284004211426ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13338 prompt_cache_len:5151 prompt_cache_ratio:0.3861898335582546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 -DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:36 [batch.py:51] router release req id 8 -INFO 06-24 20:34:36 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s -INFO 06-24 20:34:36 [manager.py:68] detokenization recv req id 8 cost time 0.10987639427185059 s -DEBUG 06-24 20:34:36 [manager.py:391] Prefill Batch: batch_id=209136065776312270129588530418305434222, time:1750768476.1979475s req_ids:[8] -DEBUG 06-24 20:34:36 [manager.py:391] -ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:218.97268295288086ms total_cost_time:219.01822090148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13339 prompt_cache_len:5151 prompt_cache_ratio:0.38616088162530926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 -DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:36 [batch.py:51] router release req id 8 -INFO 06-24 20:34:36 [manager.py:224] router recive req id 8 cost time 0.10890865325927734 s -INFO 06-24 20:34:36 [manager.py:68] detokenization recv req id 8 cost time 0.1110539436340332 s -DEBUG 06-24 20:34:36 [manager.py:391] Prefill Batch: batch_id=313544624922425589341357260154309204102, time:1750768476.4214225s req_ids:[8] -DEBUG 06-24 20:34:36 [manager.py:391] -ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:221.4982509613037ms total_cost_time:221.56190872192383ms,out_token_counter:1 mean_per_token_cost_time: 0.06365776062011719ms prompt_token_num:13340 prompt_cache_len:5151 prompt_cache_ratio:0.3861319340329835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 -DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:36 [batch.py:51] router release req id 8 -INFO 06-24 20:34:36 [manager.py:224] router recive req id 8 cost time 0.10880923271179199 s -INFO 06-24 20:34:36 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s -DEBUG 06-24 20:34:36 [manager.py:391] Prefill Batch: batch_id=11420537257713714483734054495014403587, time:1750768476.663018s req_ids:[8] -DEBUG 06-24 20:34:36 [manager.py:391] -ERROR 06-24 20:34:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:186.39588356018066ms total_cost_time:186.42807006835938ms,out_token_counter:1 mean_per_token_cost_time: 0.03218650817871094ms prompt_token_num:13341 prompt_cache_len:5151 prompt_cache_ratio:0.3861029907803013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 -DEBUG 06-24 20:34:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:36 [batch.py:51] router release req id 8 -INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.3093400001525879 s -INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.3114652633666992 s -DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=17140434945485784552932443049183987870, time:1750768477.0543027s req_ids:[8] -DEBUG 06-24 20:34:37 [manager.py:391] -ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:36 lightllm_req_id:8 first_token_cost:434.89909172058105ms total_cost_time:434.94391441345215ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13342 prompt_cache_len:5151 prompt_cache_ratio:0.3860740518662869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 -DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:37 [batch.py:51] router release req id 8 -INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10831046104431152 s -INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.11041617393493652 s -DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=106815531636650980314051729832312210157, time:1750768477.2853203s req_ids:[8] -DEBUG 06-24 20:34:37 [manager.py:391] -ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:212.8608226776123ms total_cost_time:212.9073143005371ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13343 prompt_cache_len:5151 prompt_cache_ratio:0.38604511728996477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 -DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:37 [batch.py:51] router release req id 8 -INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10707211494445801 s -INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.10850071907043457 s -DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=71106554987862022251150948379219116156, time:1750768477.5108976s req_ids:[8] -DEBUG 06-24 20:34:37 [manager.py:391] -ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:226.85694694519043ms total_cost_time:226.90391540527344ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13344 prompt_cache_len:5151 prompt_cache_ratio:0.3860161870503597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 -DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:37 [batch.py:51] router release req id 8 -INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10851669311523438 s -INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.11031961441040039 s -DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=290831420833195811961788909608880577923, time:1750768477.735443s req_ids:[8] -DEBUG 06-24 20:34:37 [manager.py:391] -ERROR 06-24 20:34:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:212.4619483947754ms total_cost_time:212.50605583190918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13345 prompt_cache_len:5151 prompt_cache_ratio:0.38598726114649684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 -DEBUG 06-24 20:34:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:37 [batch.py:51] router release req id 8 -INFO 06-24 20:34:37 [manager.py:224] router recive req id 8 cost time 0.10752415657043457 s -INFO 06-24 20:34:37 [manager.py:68] detokenization recv req id 8 cost time 0.1088407039642334 s -DEBUG 06-24 20:34:37 [manager.py:391] Prefill Batch: batch_id=324239681030183248212379087361979416314, time:1750768477.9530537s req_ids:[8] -DEBUG 06-24 20:34:37 [manager.py:391] -ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:37 lightllm_req_id:8 first_token_cost:216.52770042419434ms total_cost_time:216.57085418701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13346 prompt_cache_len:5151 prompt_cache_ratio:0.38595833957740144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 -DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:38 [batch.py:51] router release req id 8 -INFO 06-24 20:34:38 [manager.py:224] router recive req id 8 cost time 0.10787773132324219 s -INFO 06-24 20:34:38 [manager.py:68] detokenization recv req id 8 cost time 0.10951828956604004 s -DEBUG 06-24 20:34:38 [manager.py:391] Prefill Batch: batch_id=331332565354822669348409035704888346651, time:1750768478.1827722s req_ids:[8] -DEBUG 06-24 20:34:38 [manager.py:391] -DEBUG 06-24 20:34:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 54128.242 tokens/s -DEBUG 06-24 20:34:38 [stats.py:37] Avg prompt tokens throughput: 54120.120 tokens/s -DEBUG 06-24 20:34:38 [stats.py:37] Avg generate tokens throughput: 8.122 tokens/s -ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:190.01173973083496ms total_cost_time:190.05393981933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13347 prompt_cache_len:5151 prompt_cache_ratio:0.38592942234209937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 -DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:38 [batch.py:51] router release req id 8 -INFO 06-24 20:34:38 [manager.py:224] router recive req id 8 cost time 0.3096923828125 s -INFO 06-24 20:34:38 [manager.py:68] detokenization recv req id 8 cost time 0.3115527629852295 s -DEBUG 06-24 20:34:38 [manager.py:391] Prefill Batch: batch_id=173281029204959241074708907753878159855, time:1750768478.5838084s req_ids:[8] -DEBUG 06-24 20:34:38 [manager.py:391] -ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:424.0241050720215ms total_cost_time:424.0686893463135ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13348 prompt_cache_len:5151 prompt_cache_ratio:0.3859005094396164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 -DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:38 [batch.py:51] router release req id 8 -INFO 06-24 20:34:38 [manager.py:224] router recive req id 8 cost time 0.10899090766906738 s -INFO 06-24 20:34:38 [manager.py:68] detokenization recv req id 8 cost time 0.11115622520446777 s -DEBUG 06-24 20:34:38 [manager.py:391] Prefill Batch: batch_id=149758095706223953161820532491865044321, time:1750768478.8033757s req_ids:[8] -DEBUG 06-24 20:34:38 [manager.py:391] -ERROR 06-24 20:34:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:215.10982513427734ms total_cost_time:215.15583992004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13349 prompt_cache_len:5151 prompt_cache_ratio:0.3858716008689789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 -DEBUG 06-24 20:34:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:38 [batch.py:51] router release req id 8 -INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10909533500671387 s -INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.11125969886779785 s -DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=112642451137995098877388906949218499343, time:1750768479.0236616s req_ids:[8] -DEBUG 06-24 20:34:39 [manager.py:391] -ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:38 lightllm_req_id:8 first_token_cost:218.54114532470703ms total_cost_time:218.5835838317871ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13350 prompt_cache_len:5151 prompt_cache_ratio:0.3858426966292135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 -DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:39 [batch.py:51] router release req id 8 -INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10901069641113281 s -INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.11102890968322754 s -DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=94553971889609887028796371126984861256, time:1750768479.248381s req_ids:[8] -DEBUG 06-24 20:34:39 [manager.py:391] -ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:194.2894458770752ms total_cost_time:194.3340301513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13351 prompt_cache_len:5151 prompt_cache_ratio:0.38581379671934685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 -DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:39 [batch.py:51] router release req id 8 -INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10839486122131348 s -INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.11024975776672363 s -DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=120699560193738076125010600539937657644, time:1750768479.4583073s req_ids:[8] -DEBUG 06-24 20:34:39 [manager.py:391] -ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:197.97897338867188ms total_cost_time:198.00949096679688ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:13352 prompt_cache_len:5151 prompt_cache_ratio:0.38578490113840624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 -DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:39 [batch.py:51] router release req id 8 -INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10759973526000977 s -INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.10949420928955078 s -DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=210132254249584995228058122760667126574, time:1750768479.651793s req_ids:[8] -DEBUG 06-24 20:34:39 [manager.py:391] -ERROR 06-24 20:34:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:207.16428756713867ms total_cost_time:207.20887184143066ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13353 prompt_cache_len:5151 prompt_cache_ratio:0.385756009885419 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 -DEBUG 06-24 20:34:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:39 [batch.py:51] router release req id 8 -INFO 06-24 20:34:39 [manager.py:224] router recive req id 8 cost time 0.10781717300415039 s -INFO 06-24 20:34:39 [manager.py:68] detokenization recv req id 8 cost time 0.10973620414733887 s -DEBUG 06-24 20:34:39 [manager.py:391] Prefill Batch: batch_id=25076598888827931772203380331695581203, time:1750768479.8647318s req_ids:[8] -DEBUG 06-24 20:34:39 [manager.py:391] -ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:39 lightllm_req_id:8 first_token_cost:398.18334579467773ms total_cost_time:398.2272148132324ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13354 prompt_cache_len:5151 prompt_cache_ratio:0.3857271229594129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 -DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:40 [batch.py:51] router release req id 8 -INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10878348350524902 s -INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.1104881763458252 s -DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=234512898968712950825306820431799288774, time:1750768480.2711134s req_ids:[8] -DEBUG 06-24 20:34:40 [manager.py:391] -ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:214.39480781555176ms total_cost_time:214.43843841552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13355 prompt_cache_len:5151 prompt_cache_ratio:0.38569824035941597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 -DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:40 [batch.py:51] router release req id 8 -INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10929059982299805 s -INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.11115026473999023 s -DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=225930369365680368003515982506502272161, time:1750768480.4901028s req_ids:[8] -DEBUG 06-24 20:34:40 [manager.py:391] -ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:213.75346183776855ms total_cost_time:213.79661560058594ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13356 prompt_cache_len:5151 prompt_cache_ratio:0.3856693620844564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 -DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:40 [batch.py:51] router release req id 8 -INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10780143737792969 s -INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.10964512825012207 s -DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=19551937724137746025577459700348611, time:1750768480.7127438s req_ids:[8] -DEBUG 06-24 20:34:40 [manager.py:391] -ERROR 06-24 20:34:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:210.67428588867188ms total_cost_time:210.71767807006836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13357 prompt_cache_len:5151 prompt_cache_ratio:0.38564048813356294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 -DEBUG 06-24 20:34:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:40 [batch.py:51] router release req id 8 -INFO 06-24 20:34:40 [manager.py:224] router recive req id 8 cost time 0.10523653030395508 s -INFO 06-24 20:34:40 [manager.py:68] detokenization recv req id 8 cost time 0.1064310073852539 s -DEBUG 06-24 20:34:40 [manager.py:391] Prefill Batch: batch_id=147660279458457677993649765644210526217, time:1750768480.9303458s req_ids:[8] -DEBUG 06-24 20:34:40 [manager.py:391] -ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:40 lightllm_req_id:8 first_token_cost:216.57347679138184ms total_cost_time:216.61853790283203ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13358 prompt_cache_len:5151 prompt_cache_ratio:0.3856116185057643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 -DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:41 [batch.py:51] router release req id 8 -INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.10752081871032715 s -INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.10943460464477539 s -DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=113897170936640574098836156840897902110, time:1750768481.150117s req_ids:[8] -DEBUG 06-24 20:34:41 [manager.py:391] -ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:213.95564079284668ms total_cost_time:213.99950981140137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13359 prompt_cache_len:5151 prompt_cache_ratio:0.38558275320008983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 -DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:41 [batch.py:51] router release req id 8 -INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.1085362434387207 s -INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.11030220985412598 s -DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=214489236956257132774213700024781029756, time:1750768481.3715928s req_ids:[8] -DEBUG 06-24 20:34:41 [manager.py:391] -ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:376.41096115112305ms total_cost_time:376.45530700683594ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13360 prompt_cache_len:5151 prompt_cache_ratio:0.38555389221556885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 -DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:41 [batch.py:51] router release req id 8 -INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.10881805419921875 s -INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.11065196990966797 s -DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=8835921047092611029480533265950375655, time:1750768481.7554536s req_ids:[8] -DEBUG 06-24 20:34:41 [manager.py:391] -ERROR 06-24 20:34:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:216.67027473449707ms total_cost_time:216.71509742736816ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13361 prompt_cache_len:5151 prompt_cache_ratio:0.3855250355512312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 -DEBUG 06-24 20:34:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:41 [batch.py:51] router release req id 8 -INFO 06-24 20:34:41 [manager.py:224] router recive req id 8 cost time 0.10723614692687988 s -INFO 06-24 20:34:41 [manager.py:68] detokenization recv req id 8 cost time 0.10906767845153809 s -DEBUG 06-24 20:34:41 [manager.py:391] Prefill Batch: batch_id=239066132793640855462031916196968582913, time:1750768481.9779527s req_ids:[8] -DEBUG 06-24 20:34:41 [manager.py:391] -ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:41 lightllm_req_id:8 first_token_cost:216.0482406616211ms total_cost_time:216.09067916870117ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13362 prompt_cache_len:5151 prompt_cache_ratio:0.38549618320610685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 -DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:42 [batch.py:51] router release req id 8 -INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10881710052490234 s -INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.11053228378295898 s -DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=136659964606567164286207428289112721330, time:1750768482.1993475s req_ids:[8] -DEBUG 06-24 20:34:42 [manager.py:391] -ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:216.2790298461914ms total_cost_time:216.3228988647461ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13363 prompt_cache_len:5151 prompt_cache_ratio:0.3854673351792262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 -DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:42 [batch.py:51] router release req id 8 -INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10819625854492188 s -INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.10997343063354492 s -DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=17822103476799280653539342169453412951, time:1750768482.4234436s req_ids:[8] -DEBUG 06-24 20:34:42 [manager.py:391] -ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:214.0371799468994ms total_cost_time:214.0810489654541ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13364 prompt_cache_len:5151 prompt_cache_ratio:0.3854384914696199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 -DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:42 [batch.py:51] router release req id 8 -INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10785055160522461 s -INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.10963964462280273 s -DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=294976084089765071861026175907309643892, time:1750768482.644041s req_ids:[8] -DEBUG 06-24 20:34:42 [manager.py:391] -ERROR 06-24 20:34:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:210.83664894104004ms total_cost_time:210.88075637817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13365 prompt_cache_len:5151 prompt_cache_ratio:0.3854096520763187 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 -DEBUG 06-24 20:34:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:42 [batch.py:51] router release req id 8 -INFO 06-24 20:34:42 [manager.py:224] router recive req id 8 cost time 0.10732412338256836 s -INFO 06-24 20:34:42 [manager.py:68] detokenization recv req id 8 cost time 0.10928559303283691 s -DEBUG 06-24 20:34:42 [manager.py:391] Prefill Batch: batch_id=54074300426069124929523614344130724807, time:1750768482.873249s req_ids:[8] -DEBUG 06-24 20:34:42 [manager.py:391] -DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:42 lightllm_req_id:8 first_token_cost:392.10963249206543ms total_cost_time:392.1544551849365ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13366 prompt_cache_len:5151 prompt_cache_ratio:0.385380816998354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 -DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:43 [batch.py:51] router release req id 8 -INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.10781383514404297 s -INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10903048515319824 s -DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=328480732124916339312086988816429595541, time:1750768483.2618701s req_ids:[8] -DEBUG 06-24 20:34:43 [manager.py:391] -ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:211.36140823364258ms total_cost_time:211.40360832214355ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13367 prompt_cache_len:5151 prompt_cache_ratio:0.38535198623475725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 -DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:43 [batch.py:51] router release req id 8 -INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.10770845413208008 s -INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10949397087097168 s -DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=215467744301864372549316803299084111319, time:1750768483.481869s req_ids:[8] -DEBUG 06-24 20:34:43 [manager.py:391] -ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:206.77781105041504ms total_cost_time:206.82454109191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13368 prompt_cache_len:5151 prompt_cache_ratio:0.38532315978456017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 -DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:43 [batch.py:51] router release req id 8 -INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.10712170600891113 s -INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10915732383728027 s -DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=301173154886609180516353902747315618699, time:1750768483.6956868s req_ids:[8] -DEBUG 06-24 20:34:43 [manager.py:391] -ERROR 06-24 20:34:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:207.41534233093262ms total_cost_time:207.45849609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13369 prompt_cache_len:5151 prompt_cache_ratio:0.3852943376467948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 -DEBUG 06-24 20:34:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:43 [batch.py:51] router release req id 8 -INFO 06-24 20:34:43 [manager.py:224] router recive req id 8 cost time 0.1078341007232666 s -INFO 06-24 20:34:43 [manager.py:68] detokenization recv req id 8 cost time 0.10971403121948242 s -DEBUG 06-24 20:34:43 [manager.py:391] Prefill Batch: batch_id=131101112766855711354593546834470833590, time:1750768483.9097588s req_ids:[8] -DEBUG 06-24 20:34:43 [manager.py:391] -ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:43 lightllm_req_id:8 first_token_cost:220.719575881958ms total_cost_time:220.7643985748291ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13370 prompt_cache_len:5151 prompt_cache_ratio:0.3852655198204936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 -DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:44 [batch.py:51] router release req id 8 -INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10865592956542969 s -INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.11059260368347168 s -DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=178128336823981781214968123327841756949, time:1750768484.1565008s req_ids:[8] -DEBUG 06-24 20:34:44 [manager.py:391] -ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:219.451904296875ms total_cost_time:219.4962501525879ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13371 prompt_cache_len:5151 prompt_cache_ratio:0.38523670630468926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 -DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:44 [batch.py:51] router release req id 8 -INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10860204696655273 s -INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.11060261726379395 s -DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=218203607816499016608929607723338341039, time:1750768484.3829703s req_ids:[8] -DEBUG 06-24 20:34:44 [manager.py:391] -ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:395.14827728271484ms total_cost_time:395.19262313842773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13372 prompt_cache_len:5151 prompt_cache_ratio:0.3852078970984146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 -DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:44 [batch.py:51] router release req id 8 -INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10798263549804688 s -INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s -DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=10761395316124628856995118223544310802, time:1750768484.7705824s req_ids:[8] -DEBUG 06-24 20:34:44 [manager.py:391] -ERROR 06-24 20:34:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:212.3434543609619ms total_cost_time:212.3878002166748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13373 prompt_cache_len:5151 prompt_cache_ratio:0.3851790922007029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 -DEBUG 06-24 20:34:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:44 [batch.py:51] router release req id 8 -INFO 06-24 20:34:44 [manager.py:224] router recive req id 8 cost time 0.10755348205566406 s -INFO 06-24 20:34:44 [manager.py:68] detokenization recv req id 8 cost time 0.10945248603820801 s -DEBUG 06-24 20:34:44 [manager.py:391] Prefill Batch: batch_id=181453551087880698759050289217072650718, time:1750768484.9993172s req_ids:[8] -DEBUG 06-24 20:34:44 [manager.py:391] -ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:44 lightllm_req_id:8 first_token_cost:222.18561172485352ms total_cost_time:222.2304344177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13374 prompt_cache_len:5151 prompt_cache_ratio:0.3851502916105877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 -DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:45 [batch.py:51] router release req id 8 -INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10785961151123047 s -INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.10985589027404785 s -DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=78871873914617280703390596729372428416, time:1750768485.2167943s req_ids:[8] -DEBUG 06-24 20:34:45 [manager.py:391] -ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:207.82041549682617ms total_cost_time:207.86595344543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13375 prompt_cache_len:5151 prompt_cache_ratio:0.3851214953271028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 -DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:45 [batch.py:51] router release req id 8 -INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10772871971130371 s -INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.10956645011901855 s -DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=80864216105776078228325315638327534410, time:1750768485.4317555s req_ids:[8] -DEBUG 06-24 20:34:45 [manager.py:391] -ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:202.42953300476074ms total_cost_time:202.47220993041992ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13376 prompt_cache_len:5151 prompt_cache_ratio:0.3850927033492823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 -DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:45 [batch.py:51] router release req id 8 -INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10761356353759766 s -INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.1093907356262207 s -DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=3196720390525090139398010844003986168, time:1750768485.640412s req_ids:[8] -DEBUG 06-24 20:34:45 [manager.py:391] -ERROR 06-24 20:34:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:211.1198902130127ms total_cost_time:211.1673355102539ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13377 prompt_cache_len:5151 prompt_cache_ratio:0.38506391567616055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 -DEBUG 06-24 20:34:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:45 [batch.py:51] router release req id 8 -INFO 06-24 20:34:45 [manager.py:224] router recive req id 8 cost time 0.10875940322875977 s -INFO 06-24 20:34:45 [manager.py:68] detokenization recv req id 8 cost time 0.11066412925720215 s -DEBUG 06-24 20:34:45 [manager.py:391] Prefill Batch: batch_id=332383843549473594965860838077509987634, time:1750768485.8567662s req_ids:[8] -DEBUG 06-24 20:34:45 [manager.py:391] -ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:45 lightllm_req_id:8 first_token_cost:400.23040771484375ms total_cost_time:400.27499198913574ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13378 prompt_cache_len:5151 prompt_cache_ratio:0.38503513230677233 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 -DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:46 [batch.py:51] router release req id 8 -INFO 06-24 20:34:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10843825340270996 s -INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.11026453971862793 s -DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=80612008967129460081318619814246405916, time:1750768486.262896s req_ids:[8] -DEBUG 06-24 20:34:46 [manager.py:391] -ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:219.34008598327637ms total_cost_time:219.39635276794434ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:13379 prompt_cache_len:5151 prompt_cache_ratio:0.3850063532401525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 -DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:46 [batch.py:51] router release req id 8 -INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10874199867248535 s -INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.11058878898620605 s -DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=133285603690469072710712176549780434372, time:1750768486.4913287s req_ids:[8] -DEBUG 06-24 20:34:46 [manager.py:391] -ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:209.6383571624756ms total_cost_time:209.68151092529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13380 prompt_cache_len:5151 prompt_cache_ratio:0.3849775784753363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 -DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:46 [batch.py:51] router release req id 8 -INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10864520072937012 s -INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.11070680618286133 s -DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=128977785193546127995412755900398145403, time:1750768486.7087202s req_ids:[8] -DEBUG 06-24 20:34:46 [manager.py:391] -ERROR 06-24 20:34:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:215.3773307800293ms total_cost_time:215.4226303100586ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13381 prompt_cache_len:5151 prompt_cache_ratio:0.3849488080113594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 -DEBUG 06-24 20:34:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:46 [batch.py:51] router release req id 8 -INFO 06-24 20:34:46 [manager.py:224] router recive req id 8 cost time 0.10785245895385742 s -INFO 06-24 20:34:46 [manager.py:68] detokenization recv req id 8 cost time 0.10982203483581543 s -DEBUG 06-24 20:34:46 [manager.py:391] Prefill Batch: batch_id=25838559442113498713591909260482907370, time:1750768486.9297886s req_ids:[8] -DEBUG 06-24 20:34:46 [manager.py:391] -ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:46 lightllm_req_id:8 first_token_cost:215.5134677886963ms total_cost_time:215.55614471435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13382 prompt_cache_len:5151 prompt_cache_ratio:0.38492004184725753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 -DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:47 [batch.py:51] router release req id 8 -INFO 06-24 20:34:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:34:47 [manager.py:224] router recive req id 8 cost time 0.10802602767944336 s -INFO 06-24 20:34:47 [manager.py:68] detokenization recv req id 8 cost time 0.10979700088500977 s -DEBUG 06-24 20:34:47 [manager.py:391] Prefill Batch: batch_id=77967617222573395436171598603485860766, time:1750768487.153963s req_ids:[8] -DEBUG 06-24 20:34:47 [manager.py:391] -ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:217.43059158325195ms total_cost_time:217.47303009033203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13383 prompt_cache_len:5151 prompt_cache_ratio:0.3848912799820668 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 -DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:47 [batch.py:51] router release req id 8 -INFO 06-24 20:34:47 [manager.py:224] router recive req id 8 cost time 0.10860323905944824 s -INFO 06-24 20:34:47 [manager.py:68] detokenization recv req id 8 cost time 0.11049008369445801 s -DEBUG 06-24 20:34:47 [manager.py:391] Prefill Batch: batch_id=85386356311617993124900537375221467231, time:1750768487.3998854s req_ids:[8] -DEBUG 06-24 20:34:47 [manager.py:391] -ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:404.82449531555176ms total_cost_time:404.86764907836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13384 prompt_cache_len:5151 prompt_cache_ratio:0.38486252241482366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 -DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:47 [batch.py:51] router release req id 8 -INFO 06-24 20:34:47 [manager.py:224] router recive req id 8 cost time 0.10890698432922363 s -INFO 06-24 20:34:47 [manager.py:68] detokenization recv req id 8 cost time 0.11075901985168457 s -DEBUG 06-24 20:34:47 [manager.py:391] Prefill Batch: batch_id=224680239491257436096564331748170130145, time:1750768487.7882895s req_ids:[8] -DEBUG 06-24 20:34:47 [manager.py:391] -ERROR 06-24 20:34:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:216.7060375213623ms total_cost_time:216.7491912841797ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13385 prompt_cache_len:5151 prompt_cache_ratio:0.3848337691445648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 -DEBUG 06-24 20:34:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:47 [batch.py:51] router release req id 8 -INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.10720586776733398 s -INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.10890579223632812 s -DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=331650535860728505633023387049439511543, time:1750768488.010263s req_ids:[8] -DEBUG 06-24 20:34:48 [manager.py:391] -ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:47 lightllm_req_id:8 first_token_cost:214.10346031188965ms total_cost_time:214.12372589111328ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:13386 prompt_cache_len:5151 prompt_cache_ratio:0.3848050201703272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 -DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:48 [batch.py:51] router release req id 8 -INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.1086578369140625 s -INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.11047482490539551 s -DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=73850442012899795058565290444905267128, time:1750768488.2313294s req_ids:[8] -DEBUG 06-24 20:34:48 [manager.py:391] -DEBUG 06-24 20:34:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 53217.585 tokens/s -DEBUG 06-24 20:34:48 [stats.py:37] Avg prompt tokens throughput: 53209.624 tokens/s -DEBUG 06-24 20:34:48 [stats.py:37] Avg generate tokens throughput: 7.961 tokens/s -ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:213.38272094726562ms total_cost_time:213.42802047729492ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13387 prompt_cache_len:5151 prompt_cache_ratio:0.3847762754911481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 -DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:48 [batch.py:51] router release req id 8 -INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.1076974868774414 s -INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.10947251319885254 s -DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=105684043258194818915788278373234206214, time:1750768488.4496644s req_ids:[8] -DEBUG 06-24 20:34:48 [manager.py:391] -ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:214.2932415008545ms total_cost_time:214.339017868042ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13388 prompt_cache_len:5151 prompt_cache_ratio:0.38474753510606513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 -DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:48 [batch.py:51] router release req id 8 -INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.10918688774108887 s -INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.11088919639587402 s -DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=21249901609506640352796930378590481986, time:1750768488.66799s req_ids:[8] -DEBUG 06-24 20:34:48 [manager.py:391] -ERROR 06-24 20:34:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:212.54587173461914ms total_cost_time:212.58902549743652ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13389 prompt_cache_len:5151 prompt_cache_ratio:0.38471879901411604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 -DEBUG 06-24 20:34:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:48 [batch.py:51] router release req id 8 -INFO 06-24 20:34:48 [manager.py:224] router recive req id 8 cost time 0.10800409317016602 s -INFO 06-24 20:34:48 [manager.py:68] detokenization recv req id 8 cost time 0.1099100112915039 s -DEBUG 06-24 20:34:48 [manager.py:391] Prefill Batch: batch_id=225064741826576108036823851514588155778, time:1750768488.8860953s req_ids:[8] -DEBUG 06-24 20:34:48 [manager.py:391] -ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:48 lightllm_req_id:8 first_token_cost:379.07910346984863ms total_cost_time:379.12583351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13390 prompt_cache_len:5151 prompt_cache_ratio:0.38469006721433907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 -DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:49 [batch.py:51] router release req id 8 -INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10940194129943848 s -INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.11123156547546387 s -DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=202425316892297402321035240591147188730, time:1750768489.2695765s req_ids:[8] -DEBUG 06-24 20:34:49 [manager.py:391] -DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:216.01533889770508ms total_cost_time:216.05944633483887ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13391 prompt_cache_len:5151 prompt_cache_ratio:0.38466133970577254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 -DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:49 [batch.py:51] router release req id 8 -INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10958743095397949 s -INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.1113278865814209 s -DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=194155856581267084042990071958308719059, time:1750768489.491703s req_ids:[8] -DEBUG 06-24 20:34:49 [manager.py:391] -ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:212.62097358703613ms total_cost_time:212.66508102416992ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13392 prompt_cache_len:5151 prompt_cache_ratio:0.38463261648745517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 -DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:49 [batch.py:51] router release req id 8 -INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10944676399230957 s -INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.1112220287322998 s -DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=333880246236349465030280056446050387852, time:1750768489.7111957s req_ids:[8] -DEBUG 06-24 20:34:49 [manager.py:391] -ERROR 06-24 20:34:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:216.13097190856934ms total_cost_time:216.17531776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13393 prompt_cache_len:5151 prompt_cache_ratio:0.38460389755842606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 -DEBUG 06-24 20:34:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:49 [batch.py:51] router release req id 8 -INFO 06-24 20:34:49 [manager.py:224] router recive req id 8 cost time 0.10924887657165527 s -INFO 06-24 20:34:49 [manager.py:68] detokenization recv req id 8 cost time 0.11105608940124512 s -DEBUG 06-24 20:34:49 [manager.py:391] Prefill Batch: batch_id=8272239126569953909751734562855877589, time:1750768489.9477632s req_ids:[8] -DEBUG 06-24 20:34:49 [manager.py:391] -ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:49 lightllm_req_id:8 first_token_cost:234.04765129089355ms total_cost_time:234.09128189086914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13394 prompt_cache_len:5151 prompt_cache_ratio:0.38457518291772436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 -DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:50 [batch.py:51] router release req id 8 -INFO 06-24 20:34:50 [manager.py:224] router recive req id 8 cost time 0.10911870002746582 s -INFO 06-24 20:34:50 [manager.py:68] detokenization recv req id 8 cost time 0.11090564727783203 s -DEBUG 06-24 20:34:50 [manager.py:391] Prefill Batch: batch_id=216321904921803563920769676395248244351, time:1750768490.1733003s req_ids:[8] -DEBUG 06-24 20:34:50 [manager.py:391] -ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:217.057466506958ms total_cost_time:217.1003818511963ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13395 prompt_cache_len:5151 prompt_cache_ratio:0.3845464725643897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 -DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:50 [batch.py:51] router release req id 8 -INFO 06-24 20:34:50 [manager.py:224] router recive req id 8 cost time 0.10827040672302246 s -INFO 06-24 20:34:50 [manager.py:68] detokenization recv req id 8 cost time 0.10999417304992676 s -DEBUG 06-24 20:34:50 [manager.py:391] Prefill Batch: batch_id=329403301763156059926592041944175393698, time:1750768490.3958876s req_ids:[8] -DEBUG 06-24 20:34:50 [manager.py:391] -ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:219.39778327941895ms total_cost_time:219.44308280944824ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13396 prompt_cache_len:5151 prompt_cache_ratio:0.38451776649746194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 -DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:50 [batch.py:51] router release req id 8 -INFO 06-24 20:34:50 [manager.py:224] router recive req id 8 cost time 0.3101820945739746 s -INFO 06-24 20:34:50 [manager.py:68] detokenization recv req id 8 cost time 0.31214165687561035 s -DEBUG 06-24 20:34:50 [manager.py:391] Prefill Batch: batch_id=303469966273679999859715105729713351046, time:1750768490.8251936s req_ids:[8] -DEBUG 06-24 20:34:50 [manager.py:391] -DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:428.3151626586914ms total_cost_time:428.3599853515625ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13397 prompt_cache_len:5151 prompt_cache_ratio:0.3844890647159812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 -DEBUG 06-24 20:34:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:50 [batch.py:51] router release req id 8 -INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.1092386245727539 s -INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.11118435859680176 s -DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=256523803895843155251146324507910263315, time:1750768491.0546696s req_ids:[8] -DEBUG 06-24 20:34:51 [manager.py:391] -ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:50 lightllm_req_id:8 first_token_cost:215.45052528381348ms total_cost_time:215.49654006958008ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13398 prompt_cache_len:5151 prompt_cache_ratio:0.3844603672189879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 -DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:51 [batch.py:51] router release req id 8 -INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10552573204040527 s -INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.10750961303710938 s -DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=62742609223362523268549013286715924055, time:1750768491.27785s req_ids:[8] -DEBUG 06-24 20:34:51 [manager.py:391] -ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:213.63043785095215ms total_cost_time:213.67692947387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13399 prompt_cache_len:5151 prompt_cache_ratio:0.3844316740055228 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 -DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:51 [batch.py:51] router release req id 8 -INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10798287391662598 s -INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.10988497734069824 s -DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=314520380579866381465107525524539477302, time:1750768491.4938653s req_ids:[8] -DEBUG 06-24 20:34:51 [manager.py:391] -ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:209.71155166625977ms total_cost_time:209.75565910339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13400 prompt_cache_len:5151 prompt_cache_ratio:0.3844029850746269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 -DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:51 [batch.py:51] router release req id 8 -INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10843324661254883 s -INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.1104428768157959 s -DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=170242771642733229299839873032126322609, time:1750768491.7212722s req_ids:[8] -DEBUG 06-24 20:34:51 [manager.py:391] -ERROR 06-24 20:34:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:225.5077362060547ms total_cost_time:225.55232048034668ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13401 prompt_cache_len:5151 prompt_cache_ratio:0.3843743004253414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 -DEBUG 06-24 20:34:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:51 [batch.py:51] router release req id 8 -INFO 06-24 20:34:51 [manager.py:224] router recive req id 8 cost time 0.10933804512023926 s -INFO 06-24 20:34:51 [manager.py:68] detokenization recv req id 8 cost time 0.11116147041320801 s -DEBUG 06-24 20:34:51 [manager.py:391] Prefill Batch: batch_id=29562982289942915449525172571750263843, time:1750768491.9443426s req_ids:[8] -DEBUG 06-24 20:34:51 [manager.py:391] -ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:51 lightllm_req_id:8 first_token_cost:216.9663906097412ms total_cost_time:217.0100212097168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13402 prompt_cache_len:5151 prompt_cache_ratio:0.38434562005670797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 -DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:52 [batch.py:51] router release req id 8 -INFO 06-24 20:34:52 [manager.py:224] router recive req id 8 cost time 0.3119361400604248 s -INFO 06-24 20:34:52 [manager.py:68] detokenization recv req id 8 cost time 0.31392407417297363 s -DEBUG 06-24 20:34:52 [manager.py:391] Prefill Batch: batch_id=118301264466934108930829167159194288439, time:1750768492.3778577s req_ids:[8] -DEBUG 06-24 20:34:52 [manager.py:391] -DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:433.4700107574463ms total_cost_time:433.518648147583ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:13403 prompt_cache_len:5151 prompt_cache_ratio:0.3843169439677684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 -DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:52 [batch.py:51] router release req id 8 -INFO 06-24 20:34:52 [manager.py:224] router recive req id 8 cost time 0.10903739929199219 s -INFO 06-24 20:34:52 [manager.py:68] detokenization recv req id 8 cost time 0.11108732223510742 s -DEBUG 06-24 20:34:52 [manager.py:391] Prefill Batch: batch_id=232593348754038125678991477812919696678, time:1750768492.6091928s req_ids:[8] -DEBUG 06-24 20:34:52 [manager.py:391] -ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:216.5539264678955ms total_cost_time:216.60137176513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:13404 prompt_cache_len:5151 prompt_cache_ratio:0.3842882721575649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 -DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:52 [batch.py:51] router release req id 8 -INFO 06-24 20:34:52 [manager.py:224] router recive req id 8 cost time 0.10927391052246094 s -INFO 06-24 20:34:52 [manager.py:68] detokenization recv req id 8 cost time 0.11125493049621582 s -DEBUG 06-24 20:34:52 [manager.py:391] Prefill Batch: batch_id=310829970307397273138357558489722157558, time:1750768492.8309891s req_ids:[8] -DEBUG 06-24 20:34:52 [manager.py:391] -ERROR 06-24 20:34:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:217.74888038635254ms total_cost_time:217.79441833496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13405 prompt_cache_len:5151 prompt_cache_ratio:0.38425960462513986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 -DEBUG 06-24 20:34:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:52 [batch.py:51] router release req id 8 -INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.10852718353271484 s -INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.1105642318725586 s -DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=324082595468039851526245122039124194777, time:1750768493.0545287s req_ids:[8] -DEBUG 06-24 20:34:53 [manager.py:391] -ERROR 06-24 20:34:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:52 lightllm_req_id:8 first_token_cost:217.65542030334473ms total_cost_time:217.6971435546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13406 prompt_cache_len:5151 prompt_cache_ratio:0.384230941369536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 -DEBUG 06-24 20:34:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:53 [batch.py:51] router release req id 8 -INFO 06-24 20:34:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:34:53 [statics_utils.py:24] mean first cost: 230.75918787199086 ms -INFO 06-24 20:34:53 [statics_utils.py:24] mean per token cost: 0.05971222435423423 ms -INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.10805296897888184 s -INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.10942244529724121 s -DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=70643713434461362659426302924417986522, time:1750768493.2778869s req_ids:[8] -DEBUG 06-24 20:34:53 [manager.py:391] -INFO 06-24 20:34:53 [manager.py:620] left req id 8can release False refcount 4 -ERROR 06-24 20:34:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 first_token_cost:208.7545394897461ms total_cost_time:208.82058143615723ms,out_token_counter:1 mean_per_token_cost_time: 0.06604194641113281ms prompt_token_num:13407 prompt_cache_len:5151 prompt_cache_ratio:0.38420228238979637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 -DEBUG 06-24 20:34:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:53 [batch.py:51] router release req id 8 -INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.10906124114990234 s -INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.1103515625 s -DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=78302082029276430240479513965582609628, time:1750768493.503303s req_ids:[8] -DEBUG 06-24 20:34:53 [manager.py:391] -ERROR 06-24 20:34:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 first_token_cost:231.12940788269043ms total_cost_time:231.17494583129883ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13408 prompt_cache_len:5151 prompt_cache_ratio:0.3841736276849642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 -DEBUG 06-24 20:34:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:53 [batch.py:51] router release req id 8 -INFO 06-24 20:34:53 [manager.py:224] router recive req id 8 cost time 0.3097701072692871 s -INFO 06-24 20:34:53 [manager.py:68] detokenization recv req id 8 cost time 0.31184911727905273 s -DEBUG 06-24 20:34:53 [manager.py:391] Prefill Batch: batch_id=265045116596210824940948639581007715778, time:1750768493.9338503s req_ids:[8] -DEBUG 06-24 20:34:53 [manager.py:391] -DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:53 lightllm_req_id:8 first_token_cost:426.76448822021484ms total_cost_time:426.80883407592773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13409 prompt_cache_len:5151 prompt_cache_ratio:0.3841449772540831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 -DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:54 [batch.py:51] router release req id 8 -INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10769271850585938 s -INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.10960650444030762 s -DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=43213915475392955010542532166198436558, time:1750768494.162273s req_ids:[8] -DEBUG 06-24 20:34:54 [manager.py:391] -ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:212.32056617736816ms total_cost_time:212.36348152160645ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13410 prompt_cache_len:5151 prompt_cache_ratio:0.38411633109619686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 -DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:54 [batch.py:51] router release req id 8 -INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10771536827087402 s -INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.10974764823913574 s -DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=101328075228466610507661557384009922524, time:1750768494.381358s req_ids:[8] -DEBUG 06-24 20:34:54 [manager.py:391] -ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:216.35007858276367ms total_cost_time:216.39299392700195ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13411 prompt_cache_len:5151 prompt_cache_ratio:0.3840876892103497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 -DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:54 [batch.py:51] router release req id 8 -INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10893726348876953 s -INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.11087226867675781 s -DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=305276017090498716802864831210447967294, time:1750768494.6021066s req_ids:[8] -DEBUG 06-24 20:34:54 [manager.py:391] -ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:214.7376537322998ms total_cost_time:214.7815227508545ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13412 prompt_cache_len:5151 prompt_cache_ratio:0.38405905159558607 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 -DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:54 [batch.py:51] router release req id 8 -INFO 06-24 20:34:54 [manager.py:224] router recive req id 8 cost time 0.10830163955688477 s -INFO 06-24 20:34:54 [manager.py:68] detokenization recv req id 8 cost time 0.11034488677978516 s -DEBUG 06-24 20:34:54 [manager.py:391] Prefill Batch: batch_id=257846175379646357075680891322914062966, time:1750768494.8229942s req_ids:[8] -DEBUG 06-24 20:34:54 [manager.py:391] -ERROR 06-24 20:34:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:212.67008781433105ms total_cost_time:212.71443367004395ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13413 prompt_cache_len:5151 prompt_cache_ratio:0.3840304182509506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 -DEBUG 06-24 20:34:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:54 [batch.py:51] router release req id 8 -INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.10883665084838867 s -INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.11075711250305176 s -DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=286006572879662344677523408165507611070, time:1750768495.0437355s req_ids:[8] -DEBUG 06-24 20:34:55 [manager.py:391] -ERROR 06-24 20:34:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:54 lightllm_req_id:8 first_token_cost:216.48263931274414ms total_cost_time:216.52650833129883ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13414 prompt_cache_len:5151 prompt_cache_ratio:0.3840017891754883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 -DEBUG 06-24 20:34:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:55 [batch.py:51] router release req id 8 -INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.31078624725341797 s -INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.3127915859222412 s -DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=307318325363404135345259776980010555108, time:1750768495.4690287s req_ids:[8] -DEBUG 06-24 20:34:55 [manager.py:391] -DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 first_token_cost:430.3431510925293ms total_cost_time:430.3896427154541ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13415 prompt_cache_len:5151 prompt_cache_ratio:0.3839731643682445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 -DEBUG 06-24 20:34:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:55 [batch.py:51] router release req id 8 -INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.10810446739196777 s -INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.11008715629577637 s -DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=243371396566056312630200051065767580587, time:1750768495.6965475s req_ids:[8] -DEBUG 06-24 20:34:55 [manager.py:391] -ERROR 06-24 20:34:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 first_token_cost:217.3330783843994ms total_cost_time:217.3748016357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13416 prompt_cache_len:5151 prompt_cache_ratio:0.38394454382826476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 -DEBUG 06-24 20:34:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:55 [batch.py:51] router release req id 8 -INFO 06-24 20:34:55 [manager.py:224] router recive req id 8 cost time 0.10902738571166992 s -INFO 06-24 20:34:55 [manager.py:68] detokenization recv req id 8 cost time 0.11096525192260742 s -DEBUG 06-24 20:34:55 [manager.py:391] Prefill Batch: batch_id=196484625563338102340119497964953752782, time:1750768495.9199955s req_ids:[8] -DEBUG 06-24 20:34:55 [manager.py:391] -ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:55 lightllm_req_id:8 first_token_cost:217.36764907836914ms total_cost_time:217.41056442260742ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13417 prompt_cache_len:5151 prompt_cache_ratio:0.3839159275545949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 -DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:56 [batch.py:51] router release req id 8 -INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.10807919502258301 s -INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.11009716987609863 s -DEBUG 06-24 20:34:56 [manager.py:391] Prefill Batch: batch_id=85676724454533863835930567005094177020, time:1750768496.140428s req_ids:[8] -DEBUG 06-24 20:34:56 [manager.py:391] -ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:214.28561210632324ms total_cost_time:214.32852745056152ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13418 prompt_cache_len:5151 prompt_cache_ratio:0.38388731554628114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 -DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:56 [batch.py:51] router release req id 8 -INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.10826516151428223 s -INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.11038899421691895 s -DEBUG 06-24 20:34:56 [manager.py:391] Prefill Batch: batch_id=294160361309324828015034673130159350286, time:1750768496.3603203s req_ids:[8] -DEBUG 06-24 20:34:56 [manager.py:391] -ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:215.5923843383789ms total_cost_time:215.6364917755127ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13419 prompt_cache_len:5151 prompt_cache_ratio:0.38385870780236975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 -DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:56 [batch.py:51] router release req id 8 -INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.10834217071533203 s -INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.11037302017211914 s -DEBUG 06-24 20:34:56 [manager.py:391] Prefill Batch: batch_id=220278863604129038782964679112139531918, time:1750768496.5925498s req_ids:[8] -DEBUG 06-24 20:34:56 [manager.py:391] -ERROR 06-24 20:34:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:217.94390678405762ms total_cost_time:217.9882526397705ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13420 prompt_cache_len:5151 prompt_cache_ratio:0.3838301043219076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 -DEBUG 06-24 20:34:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:34:56 [batch.py:51] router release req id 8 -INFO 06-24 20:34:56 [manager.py:224] router recive req id 8 cost time 0.310762882232666 s -INFO 06-24 20:34:56 [manager.py:68] detokenization recv req id 8 cost time 0.3129255771636963 s -DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=284840831908101358143673399245226758415, time:1750768497.007654s req_ids:[8] -DEBUG 06-24 20:34:57 [manager.py:391] -DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:56 lightllm_req_id:8 first_token_cost:424.49069023132324ms total_cost_time:424.53455924987793ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13421 prompt_cache_len:5151 prompt_cache_ratio:0.38380150510394156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 -DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:57 [batch.py:51] router release req id 8 -INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.10778021812438965 s -INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.10952568054199219 s -DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=315167805053732025120146280821476307154, time:1750768497.2319005s req_ids:[8] -DEBUG 06-24 20:34:57 [manager.py:391] -ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:213.17410469055176ms total_cost_time:213.21821212768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13422 prompt_cache_len:5151 prompt_cache_ratio:0.383772910147519 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 -DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:57 [batch.py:51] router release req id 8 -INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.10835647583007812 s -INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s -DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=274168280615113909501388640978301507603, time:1750768497.4512012s req_ids:[8] -DEBUG 06-24 20:34:57 [manager.py:391] -ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:168.14923286437988ms total_cost_time:168.19071769714355ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13423 prompt_cache_len:5151 prompt_cache_ratio:0.3837443194516874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 -DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:57 [batch.py:51] router release req id 8 -INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.10849857330322266 s -INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.11072802543640137 s -DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=156084330953083719977132631775337902986, time:1750768497.627375s req_ids:[8] -DEBUG 06-24 20:34:57 [manager.py:391] -ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:201.23815536499023ms total_cost_time:201.28345489501953ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13424 prompt_cache_len:5151 prompt_cache_ratio:0.3837157330154946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 -DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:57 [batch.py:51] router release req id 8 -INFO 06-24 20:34:57 [manager.py:224] router recive req id 8 cost time 0.1092526912689209 s -INFO 06-24 20:34:57 [manager.py:68] detokenization recv req id 8 cost time 0.11127448081970215 s -DEBUG 06-24 20:34:57 [manager.py:391] Prefill Batch: batch_id=9060830014198150643881071561705305823, time:1750768497.835077s req_ids:[8] -DEBUG 06-24 20:34:57 [manager.py:391] -ERROR 06-24 20:34:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:206.21442794799805ms total_cost_time:206.25662803649902ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13425 prompt_cache_len:5151 prompt_cache_ratio:0.3836871508379888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 -DEBUG 06-24 20:34:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:57 [batch.py:51] router release req id 8 -INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.10768532752990723 s -INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.10967135429382324 s -DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=299185952340002568984787034579477555434, time:1750768498.0491061s req_ids:[8] -DEBUG 06-24 20:34:58 [manager.py:391] -ERROR 06-24 20:34:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:57 lightllm_req_id:8 first_token_cost:207.40079879760742ms total_cost_time:207.444429397583ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13426 prompt_cache_len:5151 prompt_cache_ratio:0.38365857291821837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 -DEBUG 06-24 20:34:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:58 [batch.py:51] router release req id 8 -INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.31101441383361816 s -INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.31314945220947266 s -DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=234274541729162269386060914833890464287, time:1750768498.4661531s req_ids:[8] -DEBUG 06-24 20:34:58 [manager.py:391] -DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:34:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -DEBUG 06-24 20:34:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 52406.910 tokens/s -DEBUG 06-24 20:34:58 [stats.py:37] Avg prompt tokens throughput: 52399.094 tokens/s -DEBUG 06-24 20:34:58 [stats.py:37] Avg generate tokens throughput: 7.816 tokens/s -ERROR 06-24 20:34:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 first_token_cost:429.44931983947754ms total_cost_time:429.4934272766113ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13427 prompt_cache_len:5151 prompt_cache_ratio:0.383629999255232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 -DEBUG 06-24 20:34:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:58 [batch.py:51] router release req id 8 -INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.10844612121582031 s -INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.1105201244354248 s -DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=192097547738549999958632050227279657868, time:1750768498.6999648s req_ids:[8] -DEBUG 06-24 20:34:58 [manager.py:391] -ERROR 06-24 20:34:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 first_token_cost:214.8280143737793ms total_cost_time:214.86949920654297ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13428 prompt_cache_len:5151 prompt_cache_ratio:0.38360142984807866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 -DEBUG 06-24 20:34:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:58 [batch.py:51] router release req id 8 -INFO 06-24 20:34:58 [manager.py:224] router recive req id 8 cost time 0.10744524002075195 s -INFO 06-24 20:34:58 [manager.py:68] detokenization recv req id 8 cost time 0.1093900203704834 s -DEBUG 06-24 20:34:58 [manager.py:391] Prefill Batch: batch_id=267644012558346507721701051173758620729, time:1750768498.9189537s req_ids:[8] -DEBUG 06-24 20:34:58 [manager.py:391] -ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:58 lightllm_req_id:8 first_token_cost:210.71314811706543ms total_cost_time:210.75844764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13429 prompt_cache_len:5151 prompt_cache_ratio:0.3835728646958076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 -DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:59 [batch.py:51] router release req id 8 -INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.10807442665100098 s -INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.10995364189147949 s -DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=116650705123071985991808926348675312659, time:1750768499.1342878s req_ids:[8] -DEBUG 06-24 20:34:59 [manager.py:391] -ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:210.6776237487793ms total_cost_time:210.7224464416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13430 prompt_cache_len:5151 prompt_cache_ratio:0.38354430379746834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 -DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:59 [batch.py:51] router release req id 8 -INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.10918760299682617 s -INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.11165642738342285 s -DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=35020485022871867085432313658953893696, time:1750768499.354187s req_ids:[8] -DEBUG 06-24 20:34:59 [manager.py:391] -ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:219.2516326904297ms total_cost_time:219.29526329040527ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13431 prompt_cache_len:5151 prompt_cache_ratio:0.3835157471521108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 -DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:59 [batch.py:51] router release req id 8 -INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.10944247245788574 s -INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.11156868934631348 s -DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=123556701073859292645224853717933851179, time:1750768499.5765383s req_ids:[8] -DEBUG 06-24 20:34:59 [manager.py:391] -ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:211.69233322143555ms total_cost_time:211.73882484436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13432 prompt_cache_len:5151 prompt_cache_ratio:0.383487194758785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 -DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:59 [batch.py:51] router release req id 8 -INFO 06-24 20:34:59 [manager.py:224] router recive req id 8 cost time 0.2081913948059082 s -INFO 06-24 20:34:59 [manager.py:68] detokenization recv req id 8 cost time 0.20993351936340332 s -DEBUG 06-24 20:34:59 [manager.py:391] Prefill Batch: batch_id=300489596054220902127722797455538215431, time:1750768499.8941624s req_ids:[8] -DEBUG 06-24 20:34:59 [manager.py:391] -ERROR 06-24 20:34:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:34:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:34:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:270.8098888397217ms total_cost_time:270.85256576538086ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13433 prompt_cache_len:5151 prompt_cache_ratio:0.38345864661654133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:34:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 -DEBUG 06-24 20:34:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:34:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:34:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:34:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:34:59 [batch.py:51] router release req id 8 -INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.1079111099243164 s -INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.10972237586975098 s -DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=176665783026419884959558153202365616700, time:1750768500.070966s req_ids:[8] -DEBUG 06-24 20:35:00 [manager.py:391] -ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:34:59 lightllm_req_id:8 first_token_cost:170.60446739196777ms total_cost_time:170.64666748046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13434 prompt_cache_len:5151 prompt_cache_ratio:0.38343010272443057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 -DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:00 [batch.py:51] router release req id 8 -INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.10859298706054688 s -INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.11045551300048828 s -DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=78583443480668124340167812349999102338, time:1750768500.2467515s req_ids:[8] -DEBUG 06-24 20:35:00 [manager.py:391] -ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:202.5127410888672ms total_cost_time:202.622652053833ms,out_token_counter:1 mean_per_token_cost_time: 0.10991096496582031ms prompt_token_num:13435 prompt_cache_len:5151 prompt_cache_ratio:0.3834015630815035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 -DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:00 [batch.py:51] router release req id 8 -INFO 06-24 20:35:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.10786771774291992 s -INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.1097712516784668 s -DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=56593472697731161013391566063284249802, time:1750768500.4625084s req_ids:[8] -DEBUG 06-24 20:35:00 [manager.py:391] -ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:236.35292053222656ms total_cost_time:236.39631271362305ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13436 prompt_cache_len:5151 prompt_cache_ratio:0.38337302768681153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 -DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:00 [batch.py:51] router release req id 8 -INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.1081702709197998 s -INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.11055922508239746 s -DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=313940312766660450621545253566451170725, time:1750768500.698635s req_ids:[8] -DEBUG 06-24 20:35:00 [manager.py:391] -ERROR 06-24 20:35:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:208.64009857177734ms total_cost_time:208.68563652038574ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13437 prompt_cache_len:5151 prompt_cache_ratio:0.3833444965394061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 -DEBUG 06-24 20:35:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:00 [batch.py:51] router release req id 8 -INFO 06-24 20:35:00 [manager.py:224] router recive req id 8 cost time 0.10886120796203613 s -INFO 06-24 20:35:00 [manager.py:68] detokenization recv req id 8 cost time 0.11102747917175293 s -DEBUG 06-24 20:35:00 [manager.py:391] Prefill Batch: batch_id=53207323025519798610899324938293730417, time:1750768500.913693s req_ids:[8] -DEBUG 06-24 20:35:00 [manager.py:391] -ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:00 lightllm_req_id:8 first_token_cost:208.40835571289062ms total_cost_time:208.45317840576172ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13438 prompt_cache_len:5151 prompt_cache_ratio:0.383315969638339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 -DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:01 [batch.py:51] router release req id 8 -INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.10909652709960938 s -INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.11106610298156738 s -DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=145849162293287213710576854157590685492, time:1750768501.130194s req_ids:[8] -DEBUG 06-24 20:35:01 [manager.py:391] -ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:375.72169303894043ms total_cost_time:375.7658004760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13439 prompt_cache_len:5151 prompt_cache_ratio:0.3832874469826624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 -DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:01 [batch.py:51] router release req id 8 -INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.10943031311035156 s -INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.11144328117370605 s -DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=210602672533625759662520388832070906193, time:1750768501.5094242s req_ids:[8] -DEBUG 06-24 20:35:01 [manager.py:391] -DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:209.13958549499512ms total_cost_time:209.1834545135498ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13440 prompt_cache_len:5151 prompt_cache_ratio:0.38325892857142857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 -DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:01 [batch.py:51] router release req id 8 -INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.10877490043640137 s -INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.1106882095336914 s -DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=56280643907394361541879637541815678425, time:1750768501.7266338s req_ids:[8] -DEBUG 06-24 20:35:01 [manager.py:391] -ERROR 06-24 20:35:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:216.4902687072754ms total_cost_time:216.5534496307373ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:13441 prompt_cache_len:5151 prompt_cache_ratio:0.3832304144036902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 -DEBUG 06-24 20:35:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:01 [batch.py:51] router release req id 8 -INFO 06-24 20:35:01 [manager.py:224] router recive req id 8 cost time 0.1083378791809082 s -INFO 06-24 20:35:01 [manager.py:68] detokenization recv req id 8 cost time 0.1102457046508789 s -DEBUG 06-24 20:35:01 [manager.py:391] Prefill Batch: batch_id=173356985908822345956890136803104509534, time:1750768501.9494357s req_ids:[8] -DEBUG 06-24 20:35:01 [manager.py:391] -ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:01 lightllm_req_id:8 first_token_cost:210.95705032348633ms total_cost_time:211.00211143493652ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13442 prompt_cache_len:5151 prompt_cache_ratio:0.3832019044785002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 -DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:02 [batch.py:51] router release req id 8 -INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.1072847843170166 s -INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.10923171043395996 s -DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=30949156325615806408933136762611302858, time:1750768502.1749244s req_ids:[8] -DEBUG 06-24 20:35:02 [manager.py:391] -ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:226.34267807006836ms total_cost_time:226.38583183288574ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13443 prompt_cache_len:5151 prompt_cache_ratio:0.38317339879491186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 -DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:02 [batch.py:51] router release req id 8 -INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.10882067680358887 s -INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.11086654663085938 s -DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=118068969668173087556917335370359657572, time:1750768502.3968365s req_ids:[8] -DEBUG 06-24 20:35:02 [manager.py:391] -ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:213.1974697113037ms total_cost_time:213.2420539855957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13444 prompt_cache_len:5151 prompt_cache_ratio:0.3831448973519786 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 -DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:02 [batch.py:51] router release req id 8 -INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.10878324508666992 s -INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.11081194877624512 s -DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=35482596050660843407285326871113351792, time:1750768502.6179655s req_ids:[8] -DEBUG 06-24 20:35:02 [manager.py:391] -ERROR 06-24 20:35:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:370.6042766571045ms total_cost_time:370.6481456756592ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13445 prompt_cache_len:5151 prompt_cache_ratio:0.3831164001487542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 -DEBUG 06-24 20:35:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:02 [batch.py:51] router release req id 8 -INFO 06-24 20:35:02 [manager.py:224] router recive req id 8 cost time 0.1086275577545166 s -INFO 06-24 20:35:02 [manager.py:68] detokenization recv req id 8 cost time 0.11063528060913086 s -DEBUG 06-24 20:35:02 [manager.py:391] Prefill Batch: batch_id=256755598860834163583559772172432591414, time:1750768502.9932675s req_ids:[8] -DEBUG 06-24 20:35:02 [manager.py:391] -ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:02 lightllm_req_id:8 first_token_cost:217.68760681152344ms total_cost_time:217.73266792297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13446 prompt_cache_len:5151 prompt_cache_ratio:0.38308790718429275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 -DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:03 [batch.py:51] router release req id 8 -INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10958504676818848 s -INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.11213064193725586 s -DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=150124896642969235925143198282878571616, time:1750768503.227286s req_ids:[8] -DEBUG 06-24 20:35:03 [manager.py:391] -ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:227.29969024658203ms total_cost_time:227.34498977661133ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13447 prompt_cache_len:5151 prompt_cache_ratio:0.38305941845764857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 -DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:03 [batch.py:51] router release req id 8 -INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10739469528198242 s -INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.10929012298583984 s -DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=273459127074743190091901798057112518392, time:1750768503.4507692s req_ids:[8] -DEBUG 06-24 20:35:03 [manager.py:391] -ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:220.49379348754883ms total_cost_time:220.53980827331543ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13448 prompt_cache_len:5151 prompt_cache_ratio:0.38303093396787624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 -DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:03 [batch.py:51] router release req id 8 -INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10867881774902344 s -INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.11063671112060547 s -DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=325624187278758528690055357092840855587, time:1750768503.6784394s req_ids:[8] -DEBUG 06-24 20:35:03 [manager.py:391] -ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:220.20983695983887ms total_cost_time:220.25465965270996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13449 prompt_cache_len:5151 prompt_cache_ratio:0.3830024537140308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 -DEBUG 06-24 20:35:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:03 [batch.py:51] router release req id 8 -INFO 06-24 20:35:03 [manager.py:224] router recive req id 8 cost time 0.10824012756347656 s -INFO 06-24 20:35:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:03 [manager.py:68] detokenization recv req id 8 cost time 0.11026787757873535 s -DEBUG 06-24 20:35:03 [manager.py:391] Prefill Batch: batch_id=158480789438363950938705978413256453363, time:1750768503.9012341s req_ids:[8] -DEBUG 06-24 20:35:03 [manager.py:391] -ERROR 06-24 20:35:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:03 lightllm_req_id:8 first_token_cost:209.13290977478027ms total_cost_time:209.17725563049316ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13450 prompt_cache_len:5151 prompt_cache_ratio:0.3829739776951673 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 -DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:04 [batch.py:51] router release req id 8 -INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10797834396362305 s -INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.10965585708618164 s -DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=126667913177590825633530372334513923414, time:1750768504.1198914s req_ids:[8] -DEBUG 06-24 20:35:04 [manager.py:391] -ERROR 06-24 20:35:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:387.30645179748535ms total_cost_time:387.35079765319824ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13451 prompt_cache_len:5151 prompt_cache_ratio:0.38294550591034127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 -DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:04 [batch.py:51] router release req id 8 -INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10934972763061523 s -INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.1114346981048584 s -DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=298063075927167836364322972442794323201, time:1750768504.510801s req_ids:[8] -DEBUG 06-24 20:35:04 [manager.py:391] -ERROR 06-24 20:35:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:217.60916709899902ms total_cost_time:217.6523208618164ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13452 prompt_cache_len:5151 prompt_cache_ratio:0.3829170383586084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 -DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:04 [batch.py:51] router release req id 8 -INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10849642753601074 s -INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.11041498184204102 s -DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=267589180665451916903176809702269722576, time:1750768504.7463286s req_ids:[8] -DEBUG 06-24 20:35:04 [manager.py:391] -ERROR 06-24 20:35:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:223.3293056488037ms total_cost_time:223.3724594116211ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13453 prompt_cache_len:5151 prompt_cache_ratio:0.38288857503902474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 -DEBUG 06-24 20:35:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:04 [batch.py:51] router release req id 8 -INFO 06-24 20:35:04 [manager.py:224] router recive req id 8 cost time 0.10998797416687012 s -INFO 06-24 20:35:04 [manager.py:68] detokenization recv req id 8 cost time 0.11193966865539551 s -DEBUG 06-24 20:35:04 [manager.py:391] Prefill Batch: batch_id=292660240533945290526278843991765375886, time:1750768504.966552s req_ids:[8] -DEBUG 06-24 20:35:04 [manager.py:391] -ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:04 lightllm_req_id:8 first_token_cost:218.17994117736816ms total_cost_time:218.22309494018555ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13454 prompt_cache_len:5151 prompt_cache_ratio:0.3828601159506467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 -DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:05 [batch.py:51] router release req id 8 -INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.10760927200317383 s -INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.10957646369934082 s -DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=144734362452285279752174046025416010819, time:1750768505.1862257s req_ids:[8] -DEBUG 06-24 20:35:05 [manager.py:391] -ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:206.84814453125ms total_cost_time:206.8924903869629ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13455 prompt_cache_len:5151 prompt_cache_ratio:0.38283166109253064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 -DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:05 [batch.py:51] router release req id 8 -INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.1088104248046875 s -INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.11083650588989258 s -DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=41958042831230348554039047818793133398, time:1750768505.399396s req_ids:[8] -DEBUG 06-24 20:35:05 [manager.py:391] -ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:208.35304260253906ms total_cost_time:208.39858055114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13456 prompt_cache_len:5151 prompt_cache_ratio:0.38280321046373367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 -DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:05 [batch.py:51] router release req id 8 -INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.1074380874633789 s -INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.10917353630065918 s -DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=102776368912420927022605240695449879433, time:1750768505.613654s req_ids:[8] -DEBUG 06-24 20:35:05 [manager.py:391] -ERROR 06-24 20:35:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:340.31128883361816ms total_cost_time:340.35491943359375ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13457 prompt_cache_len:5151 prompt_cache_ratio:0.3827747640633128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 -DEBUG 06-24 20:35:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:05 [batch.py:51] router release req id 8 -INFO 06-24 20:35:05 [manager.py:224] router recive req id 8 cost time 0.10868573188781738 s -INFO 06-24 20:35:05 [manager.py:68] detokenization recv req id 8 cost time 0.11065912246704102 s -DEBUG 06-24 20:35:05 [manager.py:391] Prefill Batch: batch_id=231649992310249458424839902391921210693, time:1750768505.9593399s req_ids:[8] -DEBUG 06-24 20:35:05 [manager.py:391] -ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:05 lightllm_req_id:8 first_token_cost:204.36811447143555ms total_cost_time:204.41269874572754ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13458 prompt_cache_len:5151 prompt_cache_ratio:0.3827463218903255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 -DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:06 [batch.py:51] router release req id 8 -INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.10747742652893066 s -INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.10963749885559082 s -DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=188247043342106851397678993449346384502, time:1750768506.1713164s req_ids:[8] -DEBUG 06-24 20:35:06 [manager.py:391] -ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:208.96148681640625ms total_cost_time:209.00583267211914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13459 prompt_cache_len:5151 prompt_cache_ratio:0.3827178839438294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 -DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:06 [batch.py:51] router release req id 8 -INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.1085824966430664 s -INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.11040592193603516 s -DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=17922053966545217476063007868813471002, time:1750768506.3865669s req_ids:[8] -DEBUG 06-24 20:35:06 [manager.py:391] -ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:212.11743354797363ms total_cost_time:212.16130256652832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13460 prompt_cache_len:5151 prompt_cache_ratio:0.3826894502228826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 -DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:06 [batch.py:51] router release req id 8 -INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.10826301574707031 s -INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.11014699935913086 s -DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=252859183864775049065774706018928475597, time:1750768506.6014953s req_ids:[8] -DEBUG 06-24 20:35:06 [manager.py:391] -ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:218.54901313781738ms total_cost_time:218.59312057495117ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13461 prompt_cache_len:5151 prompt_cache_ratio:0.38266102072654334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 -DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:06 [batch.py:51] router release req id 8 -INFO 06-24 20:35:06 [manager.py:224] router recive req id 8 cost time 0.1081855297088623 s -INFO 06-24 20:35:06 [manager.py:68] detokenization recv req id 8 cost time 0.11005663871765137 s -DEBUG 06-24 20:35:06 [manager.py:391] Prefill Batch: batch_id=73934876882738438819546037479856896726, time:1750768506.8258233s req_ids:[8] -DEBUG 06-24 20:35:06 [manager.py:391] -ERROR 06-24 20:35:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:209.02371406555176ms total_cost_time:209.06758308410645ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13462 prompt_cache_len:5151 prompt_cache_ratio:0.38263259545387013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 -DEBUG 06-24 20:35:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:06 [batch.py:51] router release req id 8 -INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.10769057273864746 s -INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.10958695411682129 s -DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=46275736988553236949085518678091005816, time:1750768507.041544s req_ids:[8] -DEBUG 06-24 20:35:07 [manager.py:391] -ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:06 lightllm_req_id:8 first_token_cost:381.8926811218262ms total_cost_time:381.93726539611816ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13463 prompt_cache_len:5151 prompt_cache_ratio:0.38260417440392186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 -DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:07 [batch.py:51] router release req id 8 -INFO 06-24 20:35:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.10851669311523438 s -INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.11025834083557129 s -DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=78817578120655925193507374845731970857, time:1750768507.4288828s req_ids:[8] -DEBUG 06-24 20:35:07 [manager.py:391] -ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:169.04640197753906ms total_cost_time:169.08979415893555ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13464 prompt_cache_len:5151 prompt_cache_ratio:0.38257575757575757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 -DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:07 [batch.py:51] router release req id 8 -INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.10770630836486816 s -INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.10959029197692871 s -DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=279799178577227839569383544316492396089, time:1750768507.6064525s req_ids:[8] -DEBUG 06-24 20:35:07 [manager.py:391] -DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:209.9752426147461ms total_cost_time:210.01935005187988ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13465 prompt_cache_len:5151 prompt_cache_ratio:0.3825473449684367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 -DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:07 [batch.py:51] router release req id 8 -INFO 06-24 20:35:07 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s -INFO 06-24 20:35:07 [manager.py:68] detokenization recv req id 8 cost time 0.1104118824005127 s -DEBUG 06-24 20:35:07 [manager.py:391] Prefill Batch: batch_id=67387231057965455116714085357510225203, time:1750768507.8233638s req_ids:[8] -DEBUG 06-24 20:35:07 [manager.py:391] -ERROR 06-24 20:35:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:212.19372749328613ms total_cost_time:212.23974227905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13466 prompt_cache_len:5151 prompt_cache_ratio:0.3825189365810189 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 -DEBUG 06-24 20:35:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:07 [batch.py:51] router release req id 8 -INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.1080925464630127 s -INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.11015868186950684 s -DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=324570286705647453222583553156004154046, time:1750768508.0398533s req_ids:[8] -DEBUG 06-24 20:35:08 [manager.py:391] -ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:07 lightllm_req_id:8 first_token_cost:213.52767944335938ms total_cost_time:213.57202529907227ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13467 prompt_cache_len:5151 prompt_cache_ratio:0.38249053241256403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 -DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:08 [batch.py:51] router release req id 8 -INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.10873007774353027 s -INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.11062431335449219 s -DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=129895078955538587783916350484032180685, time:1750768508.260541s req_ids:[8] -DEBUG 06-24 20:35:08 [manager.py:391] -ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:208.13465118408203ms total_cost_time:208.17804336547852ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13468 prompt_cache_len:5151 prompt_cache_ratio:0.38246213246213245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 -DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:08 [batch.py:51] router release req id 8 -INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s -INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.1102900505065918 s -DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=168961748511971567245901841521340171912, time:1750768508.4750247s req_ids:[8] -DEBUG 06-24 20:35:08 [manager.py:391] -DEBUG 06-24 20:35:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 56442.429 tokens/s -DEBUG 06-24 20:35:08 [stats.py:37] Avg prompt tokens throughput: 56434.037 tokens/s -DEBUG 06-24 20:35:08 [stats.py:37] Avg generate tokens throughput: 8.393 tokens/s -ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:386.94047927856445ms total_cost_time:386.98720932006836ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13469 prompt_cache_len:5151 prompt_cache_ratio:0.3824337367287846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 -DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:08 [batch.py:51] router release req id 8 -INFO 06-24 20:35:08 [manager.py:224] router recive req id 8 cost time 0.10579824447631836 s -INFO 06-24 20:35:08 [manager.py:68] detokenization recv req id 8 cost time 0.10780930519104004 s -DEBUG 06-24 20:35:08 [manager.py:391] Prefill Batch: batch_id=301688297832701480998399461908854773798, time:1750768508.8660834s req_ids:[8] -DEBUG 06-24 20:35:08 [manager.py:391] -ERROR 06-24 20:35:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:207.97443389892578ms total_cost_time:207.99636840820312ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:13470 prompt_cache_len:5151 prompt_cache_ratio:0.3824053452115813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 -DEBUG 06-24 20:35:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:08 [batch.py:51] router release req id 8 -INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10503554344177246 s -INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.10688042640686035 s -DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=29949710202076507970261844286236806053, time:1750768509.0824265s req_ids:[8] -DEBUG 06-24 20:35:09 [manager.py:391] -ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:08 lightllm_req_id:8 first_token_cost:173.60854148864746ms total_cost_time:173.65360260009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13471 prompt_cache_len:5151 prompt_cache_ratio:0.38237695790958354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 -DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:09 [batch.py:51] router release req id 8 -INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10789012908935547 s -INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.10998678207397461 s -DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=67301179099713795909297626148240816845, time:1750768509.259925s req_ids:[8] -DEBUG 06-24 20:35:09 [manager.py:391] -ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:201.9486427307129ms total_cost_time:202.00181007385254ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:13472 prompt_cache_len:5151 prompt_cache_ratio:0.38234857482185275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 -DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:09 [batch.py:51] router release req id 8 -INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.1087331771850586 s -INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.11073994636535645 s -DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=285293231438855593380992693706625705921, time:1750768509.4683764s req_ids:[8] -DEBUG 06-24 20:35:09 [manager.py:391] -ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:209.94830131530762ms total_cost_time:210.00194549560547ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:13473 prompt_cache_len:5151 prompt_cache_ratio:0.3823201959474505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 -DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:09 [batch.py:51] router release req id 8 -INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10764837265014648 s -INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.10963964462280273 s -DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=60865504447285031860664593549026664004, time:1750768509.684147s req_ids:[8] -DEBUG 06-24 20:35:09 [manager.py:391] -ERROR 06-24 20:35:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:213.75179290771484ms total_cost_time:213.79542350769043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13474 prompt_cache_len:5151 prompt_cache_ratio:0.38229182128543865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 -DEBUG 06-24 20:35:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:09 [batch.py:51] router release req id 8 -INFO 06-24 20:35:09 [manager.py:224] router recive req id 8 cost time 0.10904669761657715 s -INFO 06-24 20:35:09 [manager.py:68] detokenization recv req id 8 cost time 0.11097168922424316 s -DEBUG 06-24 20:35:09 [manager.py:391] Prefill Batch: batch_id=65612441072643029611567753718523887471, time:1750768509.9057915s req_ids:[8] -DEBUG 06-24 20:35:09 [manager.py:391] -ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:09 lightllm_req_id:8 first_token_cost:383.1939697265625ms total_cost_time:383.23974609375ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13475 prompt_cache_len:5151 prompt_cache_ratio:0.3822634508348794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 -DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:10 [batch.py:51] router release req id 8 -INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.1090693473815918 s -INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.11105704307556152 s -DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=174964867633192113139435839378244985993, time:1750768510.2955701s req_ids:[8] -DEBUG 06-24 20:35:10 [manager.py:391] -ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:214.31446075439453ms total_cost_time:214.35999870300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13476 prompt_cache_len:5151 prompt_cache_ratio:0.38223508459483524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 -DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:10 [batch.py:51] router release req id 8 -INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.1067345142364502 s -INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.10868096351623535 s -DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=308643828604693775118117023256396094570, time:1750768510.515572s req_ids:[8] -DEBUG 06-24 20:35:10 [manager.py:391] -ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:211.85612678527832ms total_cost_time:211.88068389892578ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:13477 prompt_cache_len:5151 prompt_cache_ratio:0.38220672256436894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 -DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:10 [batch.py:51] router release req id 8 -INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.10525703430175781 s -INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.10717535018920898 s -DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=10101963248964058692171730701358249386, time:1750768510.7355995s req_ids:[8] -DEBUG 06-24 20:35:10 [manager.py:391] -ERROR 06-24 20:35:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:215.0118350982666ms total_cost_time:215.03448486328125ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:13478 prompt_cache_len:5151 prompt_cache_ratio:0.3821783647425434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 -DEBUG 06-24 20:35:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:10 [batch.py:51] router release req id 8 -INFO 06-24 20:35:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:10 [manager.py:224] router recive req id 8 cost time 0.10396385192871094 s -INFO 06-24 20:35:10 [manager.py:68] detokenization recv req id 8 cost time 0.10570693016052246 s -DEBUG 06-24 20:35:10 [manager.py:391] Prefill Batch: batch_id=272397299100800314969838377656173381270, time:1750768510.954011s req_ids:[8] -DEBUG 06-24 20:35:10 [manager.py:391] -ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:10 lightllm_req_id:8 first_token_cost:210.04962921142578ms total_cost_time:210.07418632507324ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:13479 prompt_cache_len:5151 prompt_cache_ratio:0.382150011128422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 -DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:11 [batch.py:51] router release req id 8 -INFO 06-24 20:35:11 [manager.py:224] router recive req id 8 cost time 0.104888916015625 s -INFO 06-24 20:35:11 [manager.py:68] detokenization recv req id 8 cost time 0.10663533210754395 s -DEBUG 06-24 20:35:11 [manager.py:391] Prefill Batch: batch_id=185859036426625171754659723149511840483, time:1750768511.1679704s req_ids:[8] -DEBUG 06-24 20:35:11 [manager.py:391] -ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:211.2557888031006ms total_cost_time:211.28010749816895ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:13480 prompt_cache_len:5151 prompt_cache_ratio:0.38212166172106826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 -DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:11 [batch.py:51] router release req id 8 -INFO 06-24 20:35:11 [manager.py:224] router recive req id 8 cost time 0.10470366477966309 s -INFO 06-24 20:35:11 [manager.py:68] detokenization recv req id 8 cost time 0.10591650009155273 s -DEBUG 06-24 20:35:11 [manager.py:391] Prefill Batch: batch_id=192177260697797844930179787479032210963, time:1750768511.385097s req_ids:[8] -DEBUG 06-24 20:35:11 [manager.py:391] -ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:212.1894359588623ms total_cost_time:212.21446990966797ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13481 prompt_cache_len:5151 prompt_cache_ratio:0.38209331651954603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 -DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:11 [batch.py:51] router release req id 8 -INFO 06-24 20:35:11 [manager.py:224] router recive req id 8 cost time 0.3054969310760498 s -INFO 06-24 20:35:11 [manager.py:68] detokenization recv req id 8 cost time 0.3075411319732666 s -DEBUG 06-24 20:35:11 [manager.py:391] Prefill Batch: batch_id=28488821842327157015131694738451324705, time:1750768511.8081417s req_ids:[8] -DEBUG 06-24 20:35:11 [manager.py:391] -ERROR 06-24 20:35:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:428.14111709594727ms total_cost_time:428.16615104675293ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13482 prompt_cache_len:5151 prompt_cache_ratio:0.38206497552291946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 -DEBUG 06-24 20:35:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:11 [batch.py:51] router release req id 8 -INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10455584526062012 s -INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.10701489448547363 s -DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=53140851527832865517067344748969020035, time:1750768512.034052s req_ids:[8] -DEBUG 06-24 20:35:12 [manager.py:391] -ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:11 lightllm_req_id:8 first_token_cost:215.39831161499023ms total_cost_time:215.4216766357422ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:13483 prompt_cache_len:5151 prompt_cache_ratio:0.3820366387302529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 -DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:12 [batch.py:51] router release req id 8 -INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10480928421020508 s -INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.10692930221557617 s -DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=36786443851075574777872871739557629258, time:1750768512.2549913s req_ids:[8] -DEBUG 06-24 20:35:12 [manager.py:391] -ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:212.87775039672852ms total_cost_time:212.90278434753418ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13484 prompt_cache_len:5151 prompt_cache_ratio:0.3820083061406111 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 -DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:12 [batch.py:51] router release req id 8 -INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10432171821594238 s -INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.1064155101776123 s -DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=109082966791858761244917806041188341923, time:1750768512.4747255s req_ids:[8] -DEBUG 06-24 20:35:12 [manager.py:391] -ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:211.20858192443848ms total_cost_time:211.23337745666504ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:13485 prompt_cache_len:5151 prompt_cache_ratio:0.381979977753059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 -DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:12 [batch.py:51] router release req id 8 -INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.10672760009765625 s -INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.10860180854797363 s -DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=136582768890012693046713006038130492799, time:1750768512.6885269s req_ids:[8] -DEBUG 06-24 20:35:12 [manager.py:391] -ERROR 06-24 20:35:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:223.05059432983398ms total_cost_time:223.0966091156006ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13486 prompt_cache_len:5151 prompt_cache_ratio:0.3819516535666617 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 -DEBUG 06-24 20:35:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:12 [batch.py:51] router release req id 8 -INFO 06-24 20:35:12 [manager.py:224] router recive req id 8 cost time 0.1079871654510498 s -INFO 06-24 20:35:12 [manager.py:68] detokenization recv req id 8 cost time 0.11034607887268066 s -DEBUG 06-24 20:35:12 [manager.py:391] Prefill Batch: batch_id=134467692001876750305319613243137394431, time:1750768512.917149s req_ids:[8] -DEBUG 06-24 20:35:12 [manager.py:391] -ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:12 lightllm_req_id:8 first_token_cost:207.72099494934082ms total_cost_time:207.7641487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13487 prompt_cache_len:5151 prompt_cache_ratio:0.3819233335804849 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 -DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:13 [batch.py:51] router release req id 8 -INFO 06-24 20:35:13 [manager.py:224] router recive req id 8 cost time 0.3104104995727539 s -INFO 06-24 20:35:13 [manager.py:68] detokenization recv req id 8 cost time 0.3124861717224121 s -DEBUG 06-24 20:35:13 [manager.py:391] Prefill Batch: batch_id=101329860609039574373457443582120150074, time:1750768513.3433776s req_ids:[8] -DEBUG 06-24 20:35:13 [manager.py:391] -ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:432.4986934661865ms total_cost_time:432.5425624847412ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13488 prompt_cache_len:5151 prompt_cache_ratio:0.3818950177935943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 -DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:13 [batch.py:51] router release req id 8 -INFO 06-24 20:35:13 [manager.py:224] router recive req id 8 cost time 0.10758829116821289 s -INFO 06-24 20:35:13 [manager.py:68] detokenization recv req id 8 cost time 0.10963201522827148 s -DEBUG 06-24 20:35:13 [manager.py:391] Prefill Batch: batch_id=192099759748764297886426198030423768572, time:1750768513.5710893s req_ids:[8] -DEBUG 06-24 20:35:13 [manager.py:391] -ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:216.3100242614746ms total_cost_time:216.36223793029785ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:13489 prompt_cache_len:5151 prompt_cache_ratio:0.381866706205056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 -DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:13 [batch.py:51] router release req id 8 -INFO 06-24 20:35:13 [manager.py:224] router recive req id 8 cost time 0.1082456111907959 s -INFO 06-24 20:35:13 [manager.py:68] detokenization recv req id 8 cost time 0.11040210723876953 s -DEBUG 06-24 20:35:13 [manager.py:391] Prefill Batch: batch_id=10656322050022907688425201649855050095, time:1750768513.7937691s req_ids:[8] -DEBUG 06-24 20:35:13 [manager.py:391] -ERROR 06-24 20:35:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:213.29307556152344ms total_cost_time:213.33670616149902ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13490 prompt_cache_len:5151 prompt_cache_ratio:0.3818383988139363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 -DEBUG 06-24 20:35:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:13 [batch.py:51] router release req id 8 -INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.10935664176940918 s -INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.11128091812133789 s -DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=305429235431176243755425194693126962958, time:1750768514.0141892s req_ids:[8] -DEBUG 06-24 20:35:14 [manager.py:391] -ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:13 lightllm_req_id:8 first_token_cost:213.8214111328125ms total_cost_time:213.8655185699463ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13491 prompt_cache_len:5151 prompt_cache_ratio:0.38181009561930174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 -DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:14 [batch.py:51] router release req id 8 -INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.10888195037841797 s -INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s -DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=215283748554130037566963571315535696329, time:1750768514.2338738s req_ids:[8] -DEBUG 06-24 20:35:14 [manager.py:391] -ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:212.4791145324707ms total_cost_time:212.5244140625ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13492 prompt_cache_len:5151 prompt_cache_ratio:0.3817817966202194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 -DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:14 [batch.py:51] router release req id 8 -INFO 06-24 20:35:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.10846471786499023 s -INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.11017727851867676 s -DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=311078691381642513147451874147232175094, time:1750768514.4634604s req_ids:[8] -DEBUG 06-24 20:35:14 [manager.py:391] -ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:226.15551948547363ms total_cost_time:226.20201110839844ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13493 prompt_cache_len:5151 prompt_cache_ratio:0.3817535018157563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 -DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:14 [batch.py:51] router release req id 8 -INFO 06-24 20:35:14 [manager.py:224] router recive req id 8 cost time 0.20916199684143066 s -INFO 06-24 20:35:14 [manager.py:68] detokenization recv req id 8 cost time 0.2108478546142578 s -DEBUG 06-24 20:35:14 [manager.py:391] Prefill Batch: batch_id=5263106177638102027304930267248553063, time:1750768514.8197272s req_ids:[8] -DEBUG 06-24 20:35:14 [manager.py:391] -ERROR 06-24 20:35:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:336.9336128234863ms total_cost_time:336.9793891906738ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13494 prompt_cache_len:5151 prompt_cache_ratio:0.38172521120498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 -DEBUG 06-24 20:35:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:14 [batch.py:51] router release req id 8 -INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.10926103591918945 s -INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.11138486862182617 s -DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=94457410413412500682660574924056772373, time:1750768515.0310712s req_ids:[8] -DEBUG 06-24 20:35:15 [manager.py:391] -ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:14 lightllm_req_id:8 first_token_cost:210.67261695861816ms total_cost_time:210.71863174438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13495 prompt_cache_len:5151 prompt_cache_ratio:0.38169692478695816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 -DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:15 [batch.py:51] router release req id 8 -INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.1072995662689209 s -INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10949039459228516 s -DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=111262514160323482222028879809646545083, time:1750768515.2448292s req_ids:[8] -DEBUG 06-24 20:35:15 [manager.py:391] -DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:212.43643760681152ms total_cost_time:212.45980262756348ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:13496 prompt_cache_len:5151 prompt_cache_ratio:0.3816686425607587 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 -DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:15 [batch.py:51] router release req id 8 -INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.1041557788848877 s -INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10609292984008789 s -DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=55499376893561196063863924588642457495, time:1750768515.464836s req_ids:[8] -DEBUG 06-24 20:35:15 [manager.py:391] -ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:171.7069149017334ms total_cost_time:171.73051834106445ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:13497 prompt_cache_len:5151 prompt_cache_ratio:0.3816403645254501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 -DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:15 [batch.py:51] router release req id 8 -INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.1047370433807373 s -INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10630321502685547 s -DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=68452599615287034601777441131609894798, time:1750768515.6425755s req_ids:[8] -DEBUG 06-24 20:35:15 [manager.py:391] -ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:204.23555374145508ms total_cost_time:204.2684555053711ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:13498 prompt_cache_len:5151 prompt_cache_ratio:0.38161209068010077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 -DEBUG 06-24 20:35:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:15 [batch.py:51] router release req id 8 -INFO 06-24 20:35:15 [manager.py:224] router recive req id 8 cost time 0.10492634773254395 s -INFO 06-24 20:35:15 [manager.py:68] detokenization recv req id 8 cost time 0.10689306259155273 s -DEBUG 06-24 20:35:15 [manager.py:391] Prefill Batch: batch_id=229293841028034699459007222321287600636, time:1750768515.851977s req_ids:[8] -DEBUG 06-24 20:35:15 [manager.py:391] -ERROR 06-24 20:35:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:212.33725547790527ms total_cost_time:212.36300468444824ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:13499 prompt_cache_len:5151 prompt_cache_ratio:0.3815838210237795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 -DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:16 [batch.py:51] router release req id 8 -INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.30637502670288086 s -INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.30829787254333496 s -DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=122438894719322987146298287444862272656, time:1750768516.2779472s req_ids:[8] -DEBUG 06-24 20:35:16 [manager.py:391] -ERROR 06-24 20:35:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:15 lightllm_req_id:8 first_token_cost:403.80334854125977ms total_cost_time:403.8267135620117ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:13500 prompt_cache_len:5151 prompt_cache_ratio:0.38155555555555554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 -DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:16 [batch.py:51] router release req id 8 -INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.10470104217529297 s -INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.1067807674407959 s -DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=31865694289182221214471122075911492342, time:1750768516.477213s req_ids:[8] -DEBUG 06-24 20:35:16 [manager.py:391] -ERROR 06-24 20:35:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 first_token_cost:210.07061004638672ms total_cost_time:210.09492874145508ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:13501 prompt_cache_len:5151 prompt_cache_ratio:0.38152729427449816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 -DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:16 [batch.py:51] router release req id 8 -INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.10453510284423828 s -INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.10644745826721191 s -DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=91364465325999722922810822441609455594, time:1750768516.6919203s req_ids:[8] -DEBUG 06-24 20:35:16 [manager.py:391] -ERROR 06-24 20:35:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 first_token_cost:215.38352966308594ms total_cost_time:215.4066562652588ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:13502 prompt_cache_len:5151 prompt_cache_ratio:0.3814990371796771 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 -DEBUG 06-24 20:35:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:16 [batch.py:51] router release req id 8 -INFO 06-24 20:35:16 [manager.py:224] router recive req id 8 cost time 0.10762929916381836 s -INFO 06-24 20:35:16 [manager.py:68] detokenization recv req id 8 cost time 0.10955977439880371 s -DEBUG 06-24 20:35:16 [manager.py:391] Prefill Batch: batch_id=251178217581235756545374165214175187558, time:1750768516.9240246s req_ids:[8] -DEBUG 06-24 20:35:16 [manager.py:391] -ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:16 lightllm_req_id:8 first_token_cost:225.08955001831055ms total_cost_time:225.1126766204834ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:13503 prompt_cache_len:5151 prompt_cache_ratio:0.3814707842701622 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 -DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:17 [batch.py:51] router release req id 8 -INFO 06-24 20:35:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:35:17 [manager.py:224] router recive req id 8 cost time 0.10447573661804199 s -INFO 06-24 20:35:17 [manager.py:68] detokenization recv req id 8 cost time 0.10636377334594727 s -DEBUG 06-24 20:35:17 [manager.py:391] Prefill Batch: batch_id=241190968457501726389936801867976024410, time:1750768517.144868s req_ids:[8] -DEBUG 06-24 20:35:17 [manager.py:391] -ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:215.40117263793945ms total_cost_time:215.4247760772705ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:13504 prompt_cache_len:5151 prompt_cache_ratio:0.3814425355450237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 -DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:17 [batch.py:51] router release req id 8 -INFO 06-24 20:35:17 [manager.py:224] router recive req id 8 cost time 0.10751724243164062 s -INFO 06-24 20:35:17 [manager.py:68] detokenization recv req id 8 cost time 0.10927295684814453 s -DEBUG 06-24 20:35:17 [manager.py:391] Prefill Batch: batch_id=338085777019073191096955392463724036378, time:1750768517.363466s req_ids:[8] -DEBUG 06-24 20:35:17 [manager.py:391] -ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:213.43278884887695ms total_cost_time:213.47808837890625ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13505 prompt_cache_len:5151 prompt_cache_ratio:0.3814142910033321 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 -DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:17 [batch.py:51] router release req id 8 -INFO 06-24 20:35:17 [manager.py:224] router recive req id 8 cost time 0.310089111328125 s -INFO 06-24 20:35:17 [manager.py:68] detokenization recv req id 8 cost time 0.3121819496154785 s -DEBUG 06-24 20:35:17 [manager.py:391] Prefill Batch: batch_id=338667369625110186613379319189740373125, time:1750768517.7888982s req_ids:[8] -DEBUG 06-24 20:35:17 [manager.py:391] -ERROR 06-24 20:35:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:421.8635559082031ms total_cost_time:421.9067096710205ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13506 prompt_cache_len:5151 prompt_cache_ratio:0.38138605064415815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 -DEBUG 06-24 20:35:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:17 [batch.py:51] router release req id 8 -INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10622048377990723 s -INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.10828185081481934 s -DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=234075078935595581354742673354677048314, time:1750768518.012457s req_ids:[8] -DEBUG 06-24 20:35:18 [manager.py:391] -ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:17 lightllm_req_id:8 first_token_cost:214.90168571472168ms total_cost_time:214.94626998901367ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13507 prompt_cache_len:5151 prompt_cache_ratio:0.3813578144665729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 -DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:18 [batch.py:51] router release req id 8 -INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10863614082336426 s -INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.11063957214355469 s -DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=7134721490995961086479906423878473167, time:1750768518.2336931s req_ids:[8] -DEBUG 06-24 20:35:18 [manager.py:391] -ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:224.01976585388184ms total_cost_time:224.06411170959473ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13508 prompt_cache_len:5151 prompt_cache_ratio:0.3813295824696476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 -DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:18 [batch.py:51] router release req id 8 -INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10918712615966797 s -INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.1113271713256836 s -DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=298738891442720836578092045218827285624, time:1750768518.4641619s req_ids:[8] -DEBUG 06-24 20:35:18 [manager.py:391] -ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:35:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 53552.384 tokens/s -DEBUG 06-24 20:35:18 [stats.py:37] Avg prompt tokens throughput: 53544.346 tokens/s -DEBUG 06-24 20:35:18 [stats.py:37] Avg generate tokens throughput: 8.038 tokens/s -INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:205.08933067321777ms total_cost_time:205.13319969177246ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13509 prompt_cache_len:5151 prompt_cache_ratio:0.3813013546524539 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 -DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:18 [batch.py:51] router release req id 8 -INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10909700393676758 s -INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.11118721961975098 s -DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=338045735656072912198657337667476354446, time:1750768518.6789262s req_ids:[8] -DEBUG 06-24 20:35:18 [manager.py:391] -ERROR 06-24 20:35:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:215.49630165100098ms total_cost_time:215.54088592529297ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13510 prompt_cache_len:5151 prompt_cache_ratio:0.3812731310140637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 -DEBUG 06-24 20:35:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:18 [batch.py:51] router release req id 8 -INFO 06-24 20:35:18 [manager.py:224] router recive req id 8 cost time 0.10824894905090332 s -INFO 06-24 20:35:18 [manager.py:68] detokenization recv req id 8 cost time 0.11027002334594727 s -DEBUG 06-24 20:35:18 [manager.py:391] Prefill Batch: batch_id=257559580897265676228074629595790247602, time:1750768518.9001205s req_ids:[8] -DEBUG 06-24 20:35:18 [manager.py:391] -ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:18 lightllm_req_id:8 first_token_cost:220.57652473449707ms total_cost_time:220.62158584594727ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13511 prompt_cache_len:5151 prompt_cache_ratio:0.38124491155354895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 -DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:19 [batch.py:51] router release req id 8 -INFO 06-24 20:35:19 [manager.py:224] router recive req id 8 cost time 0.310413122177124 s -INFO 06-24 20:35:19 [manager.py:68] detokenization recv req id 8 cost time 0.3124217987060547 s -DEBUG 06-24 20:35:19 [manager.py:391] Prefill Batch: batch_id=321246369030131685161442878848601716948, time:1750768519.3571875s req_ids:[8] -DEBUG 06-24 20:35:19 [manager.py:391] -ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:452.2745609283447ms total_cost_time:452.3186683654785ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13512 prompt_cache_len:5151 prompt_cache_ratio:0.3812166962699822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 -DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:19 [batch.py:51] router release req id 8 -INFO 06-24 20:35:19 [manager.py:224] router recive req id 8 cost time 0.10793399810791016 s -INFO 06-24 20:35:19 [manager.py:68] detokenization recv req id 8 cost time 0.10993027687072754 s -DEBUG 06-24 20:35:19 [manager.py:391] Prefill Batch: batch_id=294187099250190950767577007509435414236, time:1750768519.5856745s req_ids:[8] -DEBUG 06-24 20:35:19 [manager.py:391] -ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:211.19165420532227ms total_cost_time:211.23790740966797ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13513 prompt_cache_len:5151 prompt_cache_ratio:0.3811884851624362 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 -DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:19 [batch.py:51] router release req id 8 -INFO 06-24 20:35:19 [manager.py:224] router recive req id 8 cost time 0.10820221900939941 s -INFO 06-24 20:35:19 [manager.py:68] detokenization recv req id 8 cost time 0.11031436920166016 s -DEBUG 06-24 20:35:19 [manager.py:391] Prefill Batch: batch_id=212518209123676665769896616335160824133, time:1750768519.8030756s req_ids:[8] -DEBUG 06-24 20:35:19 [manager.py:391] -ERROR 06-24 20:35:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:214.5400047302246ms total_cost_time:214.5850658416748ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13514 prompt_cache_len:5151 prompt_cache_ratio:0.3811602782299837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 -DEBUG 06-24 20:35:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:19 [batch.py:51] router release req id 8 -INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.10755324363708496 s -INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.1094977855682373 s -DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=260319925486479045100453733391916411980, time:1750768520.0265894s req_ids:[8] -DEBUG 06-24 20:35:20 [manager.py:391] -ERROR 06-24 20:35:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:19 lightllm_req_id:8 first_token_cost:213.31048011779785ms total_cost_time:213.33551406860352ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:13515 prompt_cache_len:5151 prompt_cache_ratio:0.38113207547169814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 -DEBUG 06-24 20:35:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:20 [batch.py:51] router release req id 8 -INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.10675406455993652 s -INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.10875439643859863 s -DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=166234919584143675417082794850606005682, time:1750768520.2577314s req_ids:[8] -DEBUG 06-24 20:35:20 [manager.py:391] -ERROR 06-24 20:35:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 first_token_cost:230.20052909851074ms total_cost_time:230.24439811706543ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13516 prompt_cache_len:5151 prompt_cache_ratio:0.38110387688665287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 -DEBUG 06-24 20:35:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:20 [batch.py:51] router release req id 8 -INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.10804557800292969 s -INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.11000347137451172 s -DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=76512940949376956645712486316017614531, time:1750768520.4810264s req_ids:[8] -DEBUG 06-24 20:35:20 [manager.py:391] -ERROR 06-24 20:35:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 first_token_cost:220.73650360107422ms total_cost_time:220.7803726196289ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13517 prompt_cache_len:5151 prompt_cache_ratio:0.3810756824739217 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 -DEBUG 06-24 20:35:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:20 [batch.py:51] router release req id 8 -INFO 06-24 20:35:20 [manager.py:224] router recive req id 8 cost time 0.3115808963775635 s -INFO 06-24 20:35:20 [manager.py:68] detokenization recv req id 8 cost time 0.313647985458374 s -DEBUG 06-24 20:35:20 [manager.py:391] Prefill Batch: batch_id=214560136979281156566116959930718590886, time:1750768520.935313s req_ids:[8] -DEBUG 06-24 20:35:20 [manager.py:391] -ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:20 lightllm_req_id:8 first_token_cost:448.0314254760742ms total_cost_time:448.0750560760498ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13518 prompt_cache_len:5151 prompt_cache_ratio:0.3810474922325788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 -DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:21 [batch.py:51] router release req id 8 -INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10780739784240723 s -INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.10979294776916504 s -DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=118957170072288500085438178940283941586, time:1750768521.16327s req_ids:[8] -DEBUG 06-24 20:35:21 [manager.py:391] -ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:216.25971794128418ms total_cost_time:216.30167961120605ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13519 prompt_cache_len:5151 prompt_cache_ratio:0.3810193061616983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 -DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:21 [batch.py:51] router release req id 8 -INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10826706886291504 s -INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.11023283004760742 s -DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=74706492479499175073181673788189832839, time:1750768521.3899243s req_ids:[8] -DEBUG 06-24 20:35:21 [manager.py:391] -ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:214.29777145385742ms total_cost_time:214.3421173095703ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13520 prompt_cache_len:5151 prompt_cache_ratio:0.38099112426035503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 -DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:21 [batch.py:51] router release req id 8 -INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10654997825622559 s -INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.10853838920593262 s -DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=61747741271542383717374210681947527137, time:1750768521.6200888s req_ids:[8] -DEBUG 06-24 20:35:21 [manager.py:391] -ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:230.21841049194336ms total_cost_time:230.26466369628906ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13521 prompt_cache_len:5151 prompt_cache_ratio:0.3809629465276237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 -DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:21 [batch.py:51] router release req id 8 -INFO 06-24 20:35:21 [manager.py:224] router recive req id 8 cost time 0.10911226272583008 s -INFO 06-24 20:35:21 [manager.py:68] detokenization recv req id 8 cost time 0.11067700386047363 s -DEBUG 06-24 20:35:21 [manager.py:391] Prefill Batch: batch_id=44003045671199032746318593329220581350, time:1750768521.8419807s req_ids:[8] -DEBUG 06-24 20:35:21 [manager.py:391] -ERROR 06-24 20:35:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:206.4809799194336ms total_cost_time:206.5267562866211ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13522 prompt_cache_len:5151 prompt_cache_ratio:0.3809347729625795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 -DEBUG 06-24 20:35:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:21 [batch.py:51] router release req id 8 -INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.11270380020141602 s -DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=265855933397719171773198952219928772313, time:1750768522.0562634s req_ids:[8] -DEBUG 06-24 20:35:22 [manager.py:391] -INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11477446556091309 s -ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:21 lightllm_req_id:8 first_token_cost:200.97017288208008ms total_cost_time:201.07793807983398ms,out_token_counter:1 mean_per_token_cost_time: 0.10776519775390625ms prompt_token_num:13523 prompt_cache_len:5151 prompt_cache_ratio:0.38090660356429784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 -DEBUG 06-24 20:35:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:22 [batch.py:51] router release req id 8 -INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.10798072814941406 s -INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11004233360290527 s -DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=207068961858260333355372842801318544803, time:1750768522.2656786s req_ids:[8] -DEBUG 06-24 20:35:22 [manager.py:391] -ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:379.87208366394043ms total_cost_time:379.9169063568115ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13524 prompt_cache_len:5151 prompt_cache_ratio:0.3808784383318545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 -DEBUG 06-24 20:35:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:22 [batch.py:51] router release req id 8 -INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.10879230499267578 s -INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11063408851623535 s -DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=222863301228541998130265184312956947443, time:1750768522.6590815s req_ids:[8] -DEBUG 06-24 20:35:22 [manager.py:391] -ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:223.08802604675293ms total_cost_time:223.1314182281494ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13525 prompt_cache_len:5151 prompt_cache_ratio:0.3808502772643253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 -DEBUG 06-24 20:35:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:22 [batch.py:51] router release req id 8 -INFO 06-24 20:35:22 [manager.py:224] router recive req id 8 cost time 0.10773324966430664 s -INFO 06-24 20:35:22 [manager.py:68] detokenization recv req id 8 cost time 0.11022615432739258 s -DEBUG 06-24 20:35:22 [manager.py:391] Prefill Batch: batch_id=62801144581053709785786445820751095853, time:1750768522.880199s req_ids:[8] -DEBUG 06-24 20:35:22 [manager.py:391] -ERROR 06-24 20:35:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:213.41896057128906ms total_cost_time:213.46402168273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13526 prompt_cache_len:5151 prompt_cache_ratio:0.3808221203607866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 -DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:23 [batch.py:51] router release req id 8 -INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10647249221801758 s -INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.10840892791748047 s -DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=282627781475866203830181504569334316234, time:1750768523.0997498s req_ids:[8] -DEBUG 06-24 20:35:23 [manager.py:391] -ERROR 06-24 20:35:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:22 lightllm_req_id:8 first_token_cost:213.3805751800537ms total_cost_time:213.4251594543457ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13527 prompt_cache_len:5151 prompt_cache_ratio:0.3807939676203149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 -INFO 06-24 20:35:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:35:23 [statics_utils.py:24] mean first cost: 230.9125709439558 ms -INFO 06-24 20:35:23 [statics_utils.py:24] mean per token cost: 0.05948571368764656 ms -DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:23 [batch.py:51] router release req id 8 -INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10892796516418457 s -INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.11141824722290039 s -DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=101451309035023524541161163381679769151, time:1750768523.3198268s req_ids:[8] -DEBUG 06-24 20:35:23 [manager.py:391] -ERROR 06-24 20:35:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 first_token_cost:211.17329597473145ms total_cost_time:211.21621131896973ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13528 prompt_cache_len:5151 prompt_cache_ratio:0.380765819041987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 -DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:23 [batch.py:51] router release req id 8 -INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10868334770202637 s -INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s -DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=25223083144882995848980420397434503363, time:1750768523.5451324s req_ids:[8] -DEBUG 06-24 20:35:23 [manager.py:391] -ERROR 06-24 20:35:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 first_token_cost:214.27321434020996ms total_cost_time:214.2939567565918ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:13529 prompt_cache_len:5151 prompt_cache_ratio:0.3807376746248799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 -DEBUG 06-24 20:35:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:23 [batch.py:51] router release req id 8 -INFO 06-24 20:35:23 [manager.py:224] router recive req id 8 cost time 0.10691666603088379 s -INFO 06-24 20:35:23 [manager.py:68] detokenization recv req id 8 cost time 0.10895442962646484 s -DEBUG 06-24 20:35:23 [manager.py:391] Prefill Batch: batch_id=236830920466563412432249674157409265386, time:1750768523.7594347s req_ids:[8] -DEBUG 06-24 20:35:23 [manager.py:391] -ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:23 lightllm_req_id:8 first_token_cost:373.1493949890137ms total_cost_time:373.19350242614746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13530 prompt_cache_len:5151 prompt_cache_ratio:0.380709534368071 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 -DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:24 [batch.py:51] router release req id 8 -INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10770988464355469 s -INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.10996747016906738 s -DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=97326612597371904055037042127020402445, time:1750768524.139205s req_ids:[8] -DEBUG 06-24 20:35:24 [manager.py:391] -ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:206.79879188537598ms total_cost_time:206.84242248535156ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13531 prompt_cache_len:5151 prompt_cache_ratio:0.3806813982706378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 -DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:24 [batch.py:51] router release req id 8 -INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10660123825073242 s -INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.10860419273376465 s -DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=132762905801960123887224584138663015879, time:1750768524.3514616s req_ids:[8] -DEBUG 06-24 20:35:24 [manager.py:391] -ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:201.29680633544922ms total_cost_time:201.34305953979492ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13532 prompt_cache_len:5151 prompt_cache_ratio:0.3806532663316583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 -DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:24 [batch.py:51] router release req id 8 -INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10932731628417969 s -INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.11131548881530762 s -DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=143822487837873180608873872689709161988, time:1750768524.5586855s req_ids:[8] -DEBUG 06-24 20:35:24 [manager.py:391] -ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:210.78944206237793ms total_cost_time:210.8321189880371ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13533 prompt_cache_len:5151 prompt_cache_ratio:0.3806251385502106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 -DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:24 [batch.py:51] router release req id 8 -INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10851025581359863 s -INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.11051106452941895 s -DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=62590573886996443778712731714568861239, time:1750768524.7763762s req_ids:[8] -DEBUG 06-24 20:35:24 [manager.py:391] -ERROR 06-24 20:35:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:214.646577835083ms total_cost_time:214.6890163421631ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13534 prompt_cache_len:5151 prompt_cache_ratio:0.3805970149253731 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 -DEBUG 06-24 20:35:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:24 [batch.py:51] router release req id 8 -INFO 06-24 20:35:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:24 [manager.py:224] router recive req id 8 cost time 0.10943722724914551 s -INFO 06-24 20:35:24 [manager.py:68] detokenization recv req id 8 cost time 0.11133766174316406 s -DEBUG 06-24 20:35:24 [manager.py:391] Prefill Batch: batch_id=137146347829630407559372545946519609540, time:1750768524.99712s req_ids:[8] -DEBUG 06-24 20:35:24 [manager.py:391] -ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:24 lightllm_req_id:8 first_token_cost:209.3679904937744ms total_cost_time:209.41448211669922ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13535 prompt_cache_len:5151 prompt_cache_ratio:0.3805688954562246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 -DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:25 [batch.py:51] router release req id 8 -INFO 06-24 20:35:25 [manager.py:224] router recive req id 8 cost time 0.10835480690002441 s -INFO 06-24 20:35:25 [manager.py:68] detokenization recv req id 8 cost time 0.11026525497436523 s -DEBUG 06-24 20:35:25 [manager.py:391] Prefill Batch: batch_id=101646140415386300518398467143017679567, time:1750768525.213583s req_ids:[8] -DEBUG 06-24 20:35:25 [manager.py:391] -ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:386.6889476776123ms total_cost_time:386.7313861846924ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13536 prompt_cache_len:5151 prompt_cache_ratio:0.38054078014184395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 -DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:25 [batch.py:51] router release req id 8 -INFO 06-24 20:35:25 [manager.py:224] router recive req id 8 cost time 0.10848379135131836 s -INFO 06-24 20:35:25 [manager.py:68] detokenization recv req id 8 cost time 0.1106412410736084 s -DEBUG 06-24 20:35:25 [manager.py:391] Prefill Batch: batch_id=246155680397242381860894956418341823248, time:1750768525.6081538s req_ids:[8] -DEBUG 06-24 20:35:25 [manager.py:391] -ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:216.13430976867676ms total_cost_time:216.17889404296875ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13537 prompt_cache_len:5151 prompt_cache_ratio:0.3805126689813105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 -DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:25 [batch.py:51] router release req id 8 -INFO 06-24 20:35:25 [manager.py:224] router recive req id 8 cost time 0.10881710052490234 s -INFO 06-24 20:35:25 [manager.py:68] detokenization recv req id 8 cost time 0.110748291015625 s -DEBUG 06-24 20:35:25 [manager.py:391] Prefill Batch: batch_id=24641326686816645603027094255189333353, time:1750768525.8324242s req_ids:[8] -DEBUG 06-24 20:35:25 [manager.py:391] -ERROR 06-24 20:35:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:211.9290828704834ms total_cost_time:211.9584083557129ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:13538 prompt_cache_len:5151 prompt_cache_ratio:0.38048456197370367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 -DEBUG 06-24 20:35:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:25 [batch.py:51] router release req id 8 -INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10706329345703125 s -INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.10902142524719238 s -DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=311621180563016062063343573876797254757, time:1750768526.052534s req_ids:[8] -DEBUG 06-24 20:35:26 [manager.py:391] -DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:26 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:25 lightllm_req_id:8 first_token_cost:216.37821197509766ms total_cost_time:216.42208099365234ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13539 prompt_cache_len:5151 prompt_cache_ratio:0.3804564591181033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 -DEBUG 06-24 20:35:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:26 [batch.py:51] router release req id 8 -INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10910272598266602 s -INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.11110687255859375 s -DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=164511702816413139899114360973358632465, time:1750768526.271763s req_ids:[8] -DEBUG 06-24 20:35:26 [manager.py:391] -ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:214.31350708007812ms total_cost_time:214.3564224243164ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13540 prompt_cache_len:5151 prompt_cache_ratio:0.38042836041358935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 -DEBUG 06-24 20:35:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:26 [batch.py:51] router release req id 8 -INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10838937759399414 s -INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.11037111282348633 s -DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=144933136015706402702299558769619873126, time:1750768526.4923673s req_ids:[8] -DEBUG 06-24 20:35:26 [manager.py:391] -ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:213.72294425964355ms total_cost_time:213.76824378967285ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13541 prompt_cache_len:5151 prompt_cache_ratio:0.3804002658592423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 -DEBUG 06-24 20:35:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:26 [batch.py:51] router release req id 8 -INFO 06-24 20:35:26 [manager.py:224] router recive req id 8 cost time 0.10796999931335449 s -INFO 06-24 20:35:26 [manager.py:68] detokenization recv req id 8 cost time 0.10986566543579102 s -DEBUG 06-24 20:35:26 [manager.py:391] Prefill Batch: batch_id=204607957924619966199713946750856585955, time:1750768526.7118762s req_ids:[8] -DEBUG 06-24 20:35:26 [manager.py:391] -ERROR 06-24 20:35:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:387.1006965637207ms total_cost_time:387.1438503265381ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13542 prompt_cache_len:5151 prompt_cache_ratio:0.38037217545414265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 -DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:27 [batch.py:51] router release req id 8 -INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10872817039489746 s -INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.11121034622192383 s -DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=171605179324773507327757219479854522597, time:1750768527.1094468s req_ids:[8] -DEBUG 06-24 20:35:27 [manager.py:391] -ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:26 lightllm_req_id:8 first_token_cost:219.5589542388916ms total_cost_time:219.6042537689209ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13543 prompt_cache_len:5151 prompt_cache_ratio:0.38034408919737134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 -DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:27 [batch.py:51] router release req id 8 -INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10753583908081055 s -INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s -DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=106478689907539338942813326902768897035, time:1750768527.3417575s req_ids:[8] -DEBUG 06-24 20:35:27 [manager.py:391] -ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:224.3669033050537ms total_cost_time:224.3947982788086ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:13544 prompt_cache_len:5151 prompt_cache_ratio:0.38031600708800944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 -DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:27 [batch.py:51] router release req id 8 -INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10717201232910156 s -INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.10908222198486328 s -DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=50535755730518306237220409847822448762, time:1750768527.5633478s req_ids:[8] -DEBUG 06-24 20:35:27 [manager.py:391] -ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:214.0491008758545ms total_cost_time:214.0958309173584ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13545 prompt_cache_len:5151 prompt_cache_ratio:0.3802879291251384 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 -DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:27 [batch.py:51] router release req id 8 -INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10894584655761719 s -INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.1108856201171875 s -DEBUG 06-24 20:35:27 [manager.py:391] Prefill Batch: batch_id=45002839426668605962024154616178814190, time:1750768527.7840836s req_ids:[8] -DEBUG 06-24 20:35:27 [manager.py:391] -ERROR 06-24 20:35:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:214.6153450012207ms total_cost_time:214.6611213684082ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13546 prompt_cache_len:5151 prompt_cache_ratio:0.38025985530783996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 -DEBUG 06-24 20:35:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:27 [batch.py:51] router release req id 8 -INFO 06-24 20:35:27 [manager.py:224] router recive req id 8 cost time 0.10820555686950684 s -INFO 06-24 20:35:27 [manager.py:68] detokenization recv req id 8 cost time 0.11014294624328613 s -DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=279135822200767994996470094155979586962, time:1750768528.0038457s req_ids:[8] -DEBUG 06-24 20:35:28 [manager.py:391] -ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:27 lightllm_req_id:8 first_token_cost:215.17276763916016ms total_cost_time:215.21377563476562ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:13547 prompt_cache_len:5151 prompt_cache_ratio:0.380231785635196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 -DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:28 [batch.py:51] router release req id 8 -INFO 06-24 20:35:28 [manager.py:224] router recive req id 8 cost time 0.10881352424621582 s -INFO 06-24 20:35:28 [manager.py:68] detokenization recv req id 8 cost time 0.11098384857177734 s -DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=303575767843888318578559565346184908747, time:1750768528.2237856s req_ids:[8] -DEBUG 06-24 20:35:28 [manager.py:391] -ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:386.5337371826172ms total_cost_time:386.5773677825928ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13548 prompt_cache_len:5151 prompt_cache_ratio:0.3802037201062888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 -DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:28 [batch.py:51] router release req id 8 -INFO 06-24 20:35:28 [manager.py:224] router recive req id 8 cost time 0.10892963409423828 s -INFO 06-24 20:35:28 [manager.py:68] detokenization recv req id 8 cost time 0.11098790168762207 s -DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=222646558114371765648188461988518780805, time:1750768528.6192265s req_ids:[8] -DEBUG 06-24 20:35:28 [manager.py:391] -DEBUG 06-24 20:35:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 53766.600 tokens/s -DEBUG 06-24 20:35:28 [stats.py:37] Avg prompt tokens throughput: 53758.752 tokens/s -DEBUG 06-24 20:35:28 [stats.py:37] Avg generate tokens throughput: 7.848 tokens/s -ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:220.68262100219727ms total_cost_time:220.72720527648926ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13549 prompt_cache_len:5151 prompt_cache_ratio:0.38017565872020076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 -DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:28 [batch.py:51] router release req id 8 -INFO 06-24 20:35:28 [manager.py:224] router recive req id 8 cost time 0.10753560066223145 s -INFO 06-24 20:35:28 [manager.py:68] detokenization recv req id 8 cost time 0.10962128639221191 s -DEBUG 06-24 20:35:28 [manager.py:391] Prefill Batch: batch_id=45224402552076409646116309389511232286, time:1750768528.8452046s req_ids:[8] -DEBUG 06-24 20:35:28 [manager.py:391] -ERROR 06-24 20:35:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:216.29571914672852ms total_cost_time:216.3403034210205ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13550 prompt_cache_len:5151 prompt_cache_ratio:0.3801476014760148 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 -DEBUG 06-24 20:35:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:28 [batch.py:51] router release req id 8 -INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10804939270019531 s -INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.1097862720489502 s -DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=10632232300827147859452230043595652521, time:1750768529.069847s req_ids:[8] -DEBUG 06-24 20:35:29 [manager.py:391] -ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:28 lightllm_req_id:8 first_token_cost:175.82941055297852ms total_cost_time:175.85492134094238ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:13551 prompt_cache_len:5151 prompt_cache_ratio:0.3801195483728138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 -DEBUG 06-24 20:35:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:29 [batch.py:51] router release req id 8 -DEBUG 06-24 20:35:29 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:29 [manager.py:283] -DEBUG 06-24 20:35:29 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:29 [manager.py:284] -INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10695171356201172 s -INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.10898971557617188 s -DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=290215412406720085126261590046712118234, time:1750768529.250307s req_ids:[8] -DEBUG 06-24 20:35:29 [manager.py:391] -ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:211.43817901611328ms total_cost_time:211.48180961608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13552 prompt_cache_len:5151 prompt_cache_ratio:0.38009149940968123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 -DEBUG 06-24 20:35:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:29 [batch.py:51] router release req id 8 -INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10792112350463867 s -INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.10995721817016602 s -DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=101634299414418226106890870212665705106, time:1750768529.4663765s req_ids:[8] -DEBUG 06-24 20:35:29 [manager.py:391] -ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:215.21854400634766ms total_cost_time:215.25931358337402ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:13553 prompt_cache_len:5151 prompt_cache_ratio:0.38006345458570057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 -DEBUG 06-24 20:35:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:29 [batch.py:51] router release req id 8 -INFO 06-24 20:35:29 [manager.py:224] router recive req id 8 cost time 0.10773062705993652 s -INFO 06-24 20:35:29 [manager.py:68] detokenization recv req id 8 cost time 0.10974955558776855 s -DEBUG 06-24 20:35:29 [manager.py:391] Prefill Batch: batch_id=161549705809815091670116986995449395976, time:1750768529.6867502s req_ids:[8] -DEBUG 06-24 20:35:29 [manager.py:391] -ERROR 06-24 20:35:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:406.9249629974365ms total_cost_time:406.9702625274658ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13554 prompt_cache_len:5151 prompt_cache_ratio:0.3800354138999557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 -DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:30 [batch.py:51] router release req id 8 -INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10801053047180176 s -INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10999608039855957 s -DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=3363760501513089804308577449257133545, time:1750768530.1011932s req_ids:[8] -DEBUG 06-24 20:35:30 [manager.py:391] -ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:29 lightllm_req_id:8 first_token_cost:213.2577896118164ms total_cost_time:213.3030891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13555 prompt_cache_len:5151 prompt_cache_ratio:0.3800073773515308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 -DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:30 [batch.py:51] router release req id 8 -INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10790610313415527 s -INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10989618301391602 s -DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=288406126327584555997376147271561287245, time:1750768530.3250968s req_ids:[8] -DEBUG 06-24 20:35:30 [manager.py:391] -ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:221.4045524597168ms total_cost_time:221.44842147827148ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13556 prompt_cache_len:5151 prompt_cache_ratio:0.3799793449395102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 -DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:30 [batch.py:51] router release req id 8 -INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.1083221435546875 s -INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.11029195785522461 s -DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=232861678327069312893139014494772282294, time:1750768530.5481477s req_ids:[8] -DEBUG 06-24 20:35:30 [manager.py:391] -ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:216.88342094421387ms total_cost_time:216.92657470703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13557 prompt_cache_len:5151 prompt_cache_ratio:0.37995131666297854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 -DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:30 [batch.py:51] router release req id 8 -INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10637950897216797 s -INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10842561721801758 s -DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=148102374207448866030297248807675922900, time:1750768530.774753s req_ids:[8] -DEBUG 06-24 20:35:30 [manager.py:391] -ERROR 06-24 20:35:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:216.64094924926758ms total_cost_time:216.68410301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13558 prompt_cache_len:5151 prompt_cache_ratio:0.3799232925210208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 -DEBUG 06-24 20:35:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:30 [batch.py:51] router release req id 8 -INFO 06-24 20:35:30 [manager.py:224] router recive req id 8 cost time 0.10775184631347656 s -INFO 06-24 20:35:30 [manager.py:68] detokenization recv req id 8 cost time 0.10968184471130371 s -DEBUG 06-24 20:35:30 [manager.py:391] Prefill Batch: batch_id=2958194460294366080596916224931478982, time:1750768530.994861s req_ids:[8] -DEBUG 06-24 20:35:30 [manager.py:391] -ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:30 lightllm_req_id:8 first_token_cost:214.7822380065918ms total_cost_time:214.82539176940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13559 prompt_cache_len:5151 prompt_cache_ratio:0.3798952725127222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 -DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:31 [batch.py:51] router release req id 8 -INFO 06-24 20:35:31 [manager.py:224] router recive req id 8 cost time 0.10851478576660156 s -INFO 06-24 20:35:31 [manager.py:68] detokenization recv req id 8 cost time 0.11051130294799805 s -DEBUG 06-24 20:35:31 [manager.py:391] Prefill Batch: batch_id=14660690526824778629956306495919479849, time:1750768531.216488s req_ids:[8] -DEBUG 06-24 20:35:31 [manager.py:391] -ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:389.721155166626ms total_cost_time:389.7831439971924ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:13560 prompt_cache_len:5151 prompt_cache_ratio:0.37986725663716814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 -DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:31 [batch.py:51] router release req id 8 -INFO 06-24 20:35:31 [manager.py:224] router recive req id 8 cost time 0.10866641998291016 s -INFO 06-24 20:35:31 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s -DEBUG 06-24 20:35:31 [manager.py:391] Prefill Batch: batch_id=279983004152471551153886949839592746078, time:1750768531.613336s req_ids:[8] -DEBUG 06-24 20:35:31 [manager.py:391] -ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:210.86907386779785ms total_cost_time:210.91413497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13561 prompt_cache_len:5151 prompt_cache_ratio:0.37983924489344445 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 -DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:31 [batch.py:51] router release req id 8 -INFO 06-24 20:35:31 [manager.py:224] router recive req id 8 cost time 0.10903620719909668 s -INFO 06-24 20:35:31 [manager.py:68] detokenization recv req id 8 cost time 0.11013436317443848 s -DEBUG 06-24 20:35:31 [manager.py:391] Prefill Batch: batch_id=310210767645060727808977156509735760854, time:1750768531.8301296s req_ids:[8] -DEBUG 06-24 20:35:31 [manager.py:391] -ERROR 06-24 20:35:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:213.10186386108398ms total_cost_time:213.14549446105957ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13562 prompt_cache_len:5151 prompt_cache_ratio:0.3798112372806371 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 -DEBUG 06-24 20:35:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:31 [batch.py:51] router release req id 8 -INFO 06-24 20:35:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.10948824882507324 s -INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.1114652156829834 s -DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=191242360890962216877215381395845433125, time:1750768532.0513444s req_ids:[8] -DEBUG 06-24 20:35:32 [manager.py:391] -ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:31 lightllm_req_id:8 first_token_cost:213.2394313812256ms total_cost_time:213.3009433746338ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:13563 prompt_cache_len:5151 prompt_cache_ratio:0.37978323379783235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 -DEBUG 06-24 20:35:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:32 [batch.py:51] router release req id 8 -INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.10674452781677246 s -INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.10880661010742188 s -DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=329806366452338006307468627474264628358, time:1750768532.2776184s req_ids:[8] -DEBUG 06-24 20:35:32 [manager.py:391] -DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:226.2887954711914ms total_cost_time:226.3333797454834ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13564 prompt_cache_len:5151 prompt_cache_ratio:0.3797552344441168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 -DEBUG 06-24 20:35:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:32 [batch.py:51] router release req id 8 -INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.1084439754486084 s -INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s -DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=281336328116864151463279990971011717083, time:1750768532.5075297s req_ids:[8] -DEBUG 06-24 20:35:32 [manager.py:391] -ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:222.84340858459473ms total_cost_time:222.88751602172852ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13565 prompt_cache_len:5151 prompt_cache_ratio:0.3797272392185772 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 -DEBUG 06-24 20:35:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:32 [batch.py:51] router release req id 8 -INFO 06-24 20:35:32 [manager.py:224] router recive req id 8 cost time 0.10831212997436523 s -INFO 06-24 20:35:32 [manager.py:68] detokenization recv req id 8 cost time 0.11032629013061523 s -DEBUG 06-24 20:35:32 [manager.py:391] Prefill Batch: batch_id=85873833822730675423254547922733870832, time:1750768532.731346s req_ids:[8] -DEBUG 06-24 20:35:32 [manager.py:391] -ERROR 06-24 20:35:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:216.45522117614746ms total_cost_time:216.51601791381836ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:13566 prompt_cache_len:5151 prompt_cache_ratio:0.37969924812030076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 -DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:33 [batch.py:51] router release req id 8 -INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.31066012382507324 s -INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.3126206398010254 s -DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=5161970200727564149104243896581751760, time:1750768533.1619406s req_ids:[8] -DEBUG 06-24 20:35:33 [manager.py:391] -ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:32 lightllm_req_id:8 first_token_cost:427.783727645874ms total_cost_time:427.8290271759033ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13567 prompt_cache_len:5151 prompt_cache_ratio:0.37967126114837474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 -DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:33 [batch.py:51] router release req id 8 -INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.10874271392822266 s -INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.11065173149108887 s -DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=120607128789180591877474473341246723163, time:1750768533.3892071s req_ids:[8] -DEBUG 06-24 20:35:33 [manager.py:391] -ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:216.2156105041504ms total_cost_time:216.25947952270508ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13568 prompt_cache_len:5151 prompt_cache_ratio:0.37964327830188677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 -DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:33 [batch.py:51] router release req id 8 -INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.10796761512756348 s -INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.11000585556030273 s -DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=299143253673056076435071339025638758269, time:1750768533.6104162s req_ids:[8] -DEBUG 06-24 20:35:33 [manager.py:391] -ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:217.33951568603516ms total_cost_time:217.40078926086426ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:13569 prompt_cache_len:5151 prompt_cache_ratio:0.37961529957992485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 -DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:33 [batch.py:51] router release req id 8 -INFO 06-24 20:35:33 [manager.py:224] router recive req id 8 cost time 0.10908389091491699 s -INFO 06-24 20:35:33 [manager.py:68] detokenization recv req id 8 cost time 0.11095380783081055 s -DEBUG 06-24 20:35:33 [manager.py:391] Prefill Batch: batch_id=259792603472551837651152441632966603004, time:1750768533.8330035s req_ids:[8] -DEBUG 06-24 20:35:33 [manager.py:391] -DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:212.92948722839355ms total_cost_time:212.97550201416016ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13570 prompt_cache_len:5151 prompt_cache_ratio:0.379587324981577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 -DEBUG 06-24 20:35:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:33 [batch.py:51] router release req id 8 -INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10799932479858398 s -INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.11009716987609863 s -DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=78287704870024235580135876696246654243, time:1750768534.0542057s req_ids:[8] -DEBUG 06-24 20:35:34 [manager.py:391] -ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:33 lightllm_req_id:8 first_token_cost:215.74831008911133ms total_cost_time:215.81029891967773ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:13571 prompt_cache_len:5151 prompt_cache_ratio:0.37955935450593176 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 -DEBUG 06-24 20:35:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:34 [batch.py:51] router release req id 8 -INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10796928405761719 s -INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.1099543571472168 s -DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=20329249252715400333158940932378889567, time:1750768534.2768936s req_ids:[8] -DEBUG 06-24 20:35:34 [manager.py:391] -ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:391.1294937133789ms total_cost_time:391.19744300842285ms,out_token_counter:1 mean_per_token_cost_time: 0.06794929504394531ms prompt_token_num:13572 prompt_cache_len:5151 prompt_cache_ratio:0.3795313881520778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 -DEBUG 06-24 20:35:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:34 [batch.py:51] router release req id 8 -INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10879659652709961 s -INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.11070537567138672 s -DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=24348689807481574669774629787261145293, time:1750768534.6733654s req_ids:[8] -DEBUG 06-24 20:35:34 [manager.py:391] -ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:210.22629737854004ms total_cost_time:210.27255058288574ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13573 prompt_cache_len:5151 prompt_cache_ratio:0.37950342591910413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 -DEBUG 06-24 20:35:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:34 [batch.py:51] router release req id 8 -INFO 06-24 20:35:34 [manager.py:224] router recive req id 8 cost time 0.10853147506713867 s -INFO 06-24 20:35:34 [manager.py:68] detokenization recv req id 8 cost time 0.11059141159057617 s -DEBUG 06-24 20:35:34 [manager.py:391] Prefill Batch: batch_id=181691761535626656249215004079537671223, time:1750768534.8915102s req_ids:[8] -DEBUG 06-24 20:35:34 [manager.py:391] -ERROR 06-24 20:35:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:211.35449409484863ms total_cost_time:211.39931678771973ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13574 prompt_cache_len:5151 prompt_cache_ratio:0.3794754678060999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 -DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:35 [batch.py:51] router release req id 8 -INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.1087186336517334 s -INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.1105506420135498 s -DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=236300158308190338302602781827858920842, time:1750768535.108223s req_ids:[8] -DEBUG 06-24 20:35:35 [manager.py:391] -ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:34 lightllm_req_id:8 first_token_cost:210.22844314575195ms total_cost_time:210.27135848999023ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13575 prompt_cache_len:5151 prompt_cache_ratio:0.3794475138121547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 -DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:35 [batch.py:51] router release req id 8 -INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.10840010643005371 s -INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.11025571823120117 s -DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=216655161829894292069005913172566541290, time:1750768535.3246982s req_ids:[8] -DEBUG 06-24 20:35:35 [manager.py:391] -ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:218.17612648010254ms total_cost_time:218.23692321777344ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:13576 prompt_cache_len:5151 prompt_cache_ratio:0.3794195639363583 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 -DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:35 [batch.py:51] router release req id 8 -INFO 06-24 20:35:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.10879683494567871 s -INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.11079716682434082 s -DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=302396177918448125234015697100354566302, time:1750768535.548697s req_ids:[8] -DEBUG 06-24 20:35:35 [manager.py:391] -ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:215.6054973602295ms total_cost_time:215.64817428588867ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13577 prompt_cache_len:5151 prompt_cache_ratio:0.3793916181778007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 -DEBUG 06-24 20:35:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:35 [batch.py:51] router release req id 8 -INFO 06-24 20:35:35 [manager.py:224] router recive req id 8 cost time 0.10898280143737793 s -INFO 06-24 20:35:35 [manager.py:68] detokenization recv req id 8 cost time 0.11007547378540039 s -DEBUG 06-24 20:35:35 [manager.py:391] Prefill Batch: batch_id=197906200729026109771295435179287237497, time:1750768535.770817s req_ids:[8] -DEBUG 06-24 20:35:35 [manager.py:391] -ERROR 06-24 20:35:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:214.51091766357422ms total_cost_time:214.55693244934082ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13578 prompt_cache_len:5151 prompt_cache_ratio:0.37936367653557224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 -DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:36 [batch.py:51] router release req id 8 -INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.30853819847106934 s -INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.3104114532470703 s -DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=14845535836316618575025318317005125427, time:1750768536.200914s req_ids:[8] -DEBUG 06-24 20:35:36 [manager.py:391] -ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:35 lightllm_req_id:8 first_token_cost:432.54923820495605ms total_cost_time:432.57904052734375ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:13579 prompt_cache_len:5151 prompt_cache_ratio:0.37933573900876355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 -DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:36 [batch.py:51] router release req id 8 -INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.10693049430847168 s -INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.10878181457519531 s -DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=311261208011281589952512084143693185075, time:1750768536.4302967s req_ids:[8] -DEBUG 06-24 20:35:36 [manager.py:391] -ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:217.37432479858398ms total_cost_time:217.41867065429688ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13580 prompt_cache_len:5151 prompt_cache_ratio:0.3793078055964654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 -DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:36 [batch.py:51] router release req id 8 -INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.10851430892944336 s -INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.11056256294250488 s -DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=3641296087445680746202259588715548498, time:1750768536.6532393s req_ids:[8] -DEBUG 06-24 20:35:36 [manager.py:391] -ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:214.39743041992188ms total_cost_time:214.44439888000488ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13581 prompt_cache_len:5151 prompt_cache_ratio:0.37927987629776894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 -DEBUG 06-24 20:35:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:36 [batch.py:51] router release req id 8 -INFO 06-24 20:35:36 [manager.py:224] router recive req id 8 cost time 0.10762953758239746 s -INFO 06-24 20:35:36 [manager.py:68] detokenization recv req id 8 cost time 0.10964560508728027 s -DEBUG 06-24 20:35:36 [manager.py:391] Prefill Batch: batch_id=278834966051042837863672751450034443664, time:1750768536.8762949s req_ids:[8] -DEBUG 06-24 20:35:36 [manager.py:391] -ERROR 06-24 20:35:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:214.1406536102295ms total_cost_time:214.1859531402588ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13582 prompt_cache_len:5151 prompt_cache_ratio:0.3792519511117656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 -DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:37 [batch.py:51] router release req id 8 -INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.10733580589294434 s -INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.1092066764831543 s -DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=119256392807729663652892353861577670449, time:1750768537.0975304s req_ids:[8] -DEBUG 06-24 20:35:37 [manager.py:391] -ERROR 06-24 20:35:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:36 lightllm_req_id:8 first_token_cost:218.11389923095703ms total_cost_time:218.15729141235352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13583 prompt_cache_len:5151 prompt_cache_ratio:0.37922403003754696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 -DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:37 [batch.py:51] router release req id 8 -INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.10800814628601074 s -INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.10920095443725586 s -DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=251511050988714103956253247271376093030, time:1750768537.3187537s req_ids:[8] -DEBUG 06-24 20:35:37 [manager.py:391] -ERROR 06-24 20:35:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 first_token_cost:214.94030952453613ms total_cost_time:214.98727798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13584 prompt_cache_len:5151 prompt_cache_ratio:0.3791961130742049 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 -DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:37 [batch.py:51] router release req id 8 -INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.31099843978881836 s -INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.3129112720489502 s -DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=304757996826823222441906173507418880528, time:1750768537.7506688s req_ids:[8] -DEBUG 06-24 20:35:37 [manager.py:391] -ERROR 06-24 20:35:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 first_token_cost:431.8265914916992ms total_cost_time:431.8854808807373ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:13585 prompt_cache_len:5151 prompt_cache_ratio:0.3791682002208318 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 -DEBUG 06-24 20:35:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:37 [batch.py:51] router release req id 8 -INFO 06-24 20:35:37 [manager.py:224] router recive req id 8 cost time 0.10791611671447754 s -INFO 06-24 20:35:37 [manager.py:68] detokenization recv req id 8 cost time 0.10918951034545898 s -DEBUG 06-24 20:35:37 [manager.py:391] Prefill Batch: batch_id=13146610025857157507931041877957509928, time:1750768537.980818s req_ids:[8] -DEBUG 06-24 20:35:37 [manager.py:391] -ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:37 lightllm_req_id:8 first_token_cost:208.8601589202881ms total_cost_time:208.90498161315918ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13586 prompt_cache_len:5151 prompt_cache_ratio:0.37914029147651995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 -DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:38 [batch.py:51] router release req id 8 -INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.1077570915222168 s -INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.10912847518920898 s -DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=252127636206557652567308260310565404815, time:1750768538.1977615s req_ids:[8] -DEBUG 06-24 20:35:38 [manager.py:391] -ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:209.63191986083984ms total_cost_time:209.67698097229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13587 prompt_cache_len:5151 prompt_cache_ratio:0.37911238684036214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 -DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:38 [batch.py:51] router release req id 8 -INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.10969209671020508 s -INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.11172056198120117 s -DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=52875310274312571314831454095452601586, time:1750768538.4156318s req_ids:[8] -DEBUG 06-24 20:35:38 [manager.py:391] -ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:217.8635597229004ms total_cost_time:217.88311004638672ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13588 prompt_cache_len:5151 prompt_cache_ratio:0.3790844863114513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 -DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:38 [batch.py:51] router release req id 8 -INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.10608911514282227 s -INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.10798931121826172 s -DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=234359492558451081227452154693864692596, time:1750768538.6362839s req_ids:[8] -DEBUG 06-24 20:35:38 [manager.py:391] -DEBUG 06-24 20:35:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 54193.539 tokens/s -DEBUG 06-24 20:35:38 [stats.py:37] Avg prompt tokens throughput: 54185.553 tokens/s -DEBUG 06-24 20:35:38 [stats.py:37] Avg generate tokens throughput: 7.986 tokens/s -ERROR 06-24 20:35:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:217.10491180419922ms total_cost_time:217.1492576599121ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13589 prompt_cache_len:5151 prompt_cache_ratio:0.3790565898888807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 -DEBUG 06-24 20:35:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:38 [batch.py:51] router release req id 8 -INFO 06-24 20:35:38 [manager.py:224] router recive req id 8 cost time 0.10803890228271484 s -INFO 06-24 20:35:38 [manager.py:68] detokenization recv req id 8 cost time 0.10985374450683594 s -DEBUG 06-24 20:35:38 [manager.py:391] Prefill Batch: batch_id=89646764811124633090934752491578122788, time:1750768538.8570962s req_ids:[8] -DEBUG 06-24 20:35:38 [manager.py:391] -ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:38 lightllm_req_id:8 first_token_cost:389.6939754486084ms total_cost_time:389.7385597229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13590 prompt_cache_len:5151 prompt_cache_ratio:0.37902869757174396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 -DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:39 [batch.py:51] router release req id 8 -INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.1086728572845459 s -INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.11056756973266602 s -DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=82866626344295554103967183244468423070, time:1750768539.253616s req_ids:[8] -DEBUG 06-24 20:35:39 [manager.py:391] -ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:212.205171585083ms total_cost_time:212.2499942779541ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13591 prompt_cache_len:5151 prompt_cache_ratio:0.3790008093591347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 -DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:39 [batch.py:51] router release req id 8 -INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.10852742195129395 s -INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.11053180694580078 s -DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=312473198729401489207836045327266829095, time:1750768539.4690185s req_ids:[8] -DEBUG 06-24 20:35:39 [manager.py:391] -ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:211.83228492736816ms total_cost_time:211.87710762023926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13592 prompt_cache_len:5151 prompt_cache_ratio:0.37897292525014714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 -DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:39 [batch.py:51] router release req id 8 -INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.10867595672607422 s -INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.11011505126953125 s -DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=184515634672172484501593220330195933141, time:1750768539.6883461s req_ids:[8] -DEBUG 06-24 20:35:39 [manager.py:391] -ERROR 06-24 20:35:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:213.5021686553955ms total_cost_time:213.545560836792ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13593 prompt_cache_len:5151 prompt_cache_ratio:0.3789450452438755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 -DEBUG 06-24 20:35:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:39 [batch.py:51] router release req id 8 -INFO 06-24 20:35:39 [manager.py:224] router recive req id 8 cost time 0.10934281349182129 s -INFO 06-24 20:35:39 [manager.py:68] detokenization recv req id 8 cost time 0.1114661693572998 s -DEBUG 06-24 20:35:39 [manager.py:391] Prefill Batch: batch_id=292717016435151522369893764715364919503, time:1750768539.9077733s req_ids:[8] -DEBUG 06-24 20:35:39 [manager.py:391] -ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:39 lightllm_req_id:8 first_token_cost:211.83085441589355ms total_cost_time:211.87424659729004ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13594 prompt_cache_len:5151 prompt_cache_ratio:0.37891716933941444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 -DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:40 [batch.py:51] router release req id 8 -INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10908269882202148 s -INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.11111974716186523 s -DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=168519778785309824664876323874769970067, time:1750768540.1279783s req_ids:[8] -DEBUG 06-24 20:35:40 [manager.py:391] -ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:210.22486686706543ms total_cost_time:210.24727821350098ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:13595 prompt_cache_len:5151 prompt_cache_ratio:0.37888929753585876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 -DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:40 [batch.py:51] router release req id 8 -INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10319399833679199 s -INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.1050257682800293 s -DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=156977408477062322928380143385979748437, time:1750768540.3443096s req_ids:[8] -DEBUG 06-24 20:35:40 [manager.py:391] -ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:172.66464233398438ms total_cost_time:172.6844310760498ms,out_token_counter:1 mean_per_token_cost_time: 0.019788742065429688ms prompt_token_num:13596 prompt_cache_len:5151 prompt_cache_ratio:0.37886142983230364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 -DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:40 [batch.py:51] router release req id 8 -INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10681891441345215 s -INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.10802531242370605 s -DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=55371751508327389827965395931599578888, time:1750768540.5214486s req_ids:[8] -DEBUG 06-24 20:35:40 [manager.py:391] -ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:373.7204074859619ms total_cost_time:373.765230178833ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13597 prompt_cache_len:5151 prompt_cache_ratio:0.37883356622784436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 -DEBUG 06-24 20:35:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:40 [batch.py:51] router release req id 8 -INFO 06-24 20:35:40 [manager.py:224] router recive req id 8 cost time 0.10801959037780762 s -INFO 06-24 20:35:40 [manager.py:68] detokenization recv req id 8 cost time 0.11014461517333984 s -DEBUG 06-24 20:35:40 [manager.py:391] Prefill Batch: batch_id=1967681378395399707303357263697826111, time:1750768540.8969193s req_ids:[8] -DEBUG 06-24 20:35:40 [manager.py:391] -ERROR 06-24 20:35:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:210.36148071289062ms total_cost_time:210.404634475708ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13598 prompt_cache_len:5151 prompt_cache_ratio:0.3788057067215767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 -DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:41 [batch.py:51] router release req id 8 -INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10888171195983887 s -INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.11088204383850098 s -DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=149477419881146959458603593027434585439, time:1750768541.1171978s req_ids:[8] -DEBUG 06-24 20:35:41 [manager.py:391] -ERROR 06-24 20:35:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:40 lightllm_req_id:8 first_token_cost:217.09609031677246ms total_cost_time:217.14043617248535ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13599 prompt_cache_len:5151 prompt_cache_ratio:0.3787778513125965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 -DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:41 [batch.py:51] router release req id 8 -INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10810399055480957 s -INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.10946416854858398 s -DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=290396287117234931895476039953472333614, time:1750768541.3381763s req_ids:[8] -DEBUG 06-24 20:35:41 [manager.py:391] -ERROR 06-24 20:35:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 first_token_cost:213.71865272521973ms total_cost_time:213.76299858093262ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13600 prompt_cache_len:5151 prompt_cache_ratio:0.37875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 -DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:41 [batch.py:51] router release req id 8 -INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10932135581970215 s -INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.11054611206054688 s -DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=279481349294463796504529855668934353218, time:1750768541.5575454s req_ids:[8] -DEBUG 06-24 20:35:41 [manager.py:391] -ERROR 06-24 20:35:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 first_token_cost:213.01555633544922ms total_cost_time:213.0589485168457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13601 prompt_cache_len:5151 prompt_cache_ratio:0.3787221527828836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 -DEBUG 06-24 20:35:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:41 [batch.py:51] router release req id 8 -INFO 06-24 20:35:41 [manager.py:224] router recive req id 8 cost time 0.10843920707702637 s -INFO 06-24 20:35:41 [manager.py:68] detokenization recv req id 8 cost time 0.11048507690429688 s -DEBUG 06-24 20:35:41 [manager.py:391] Prefill Batch: batch_id=192905104411030839179327211949817695604, time:1750768541.7775164s req_ids:[8] -DEBUG 06-24 20:35:41 [manager.py:391] -ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:41 lightllm_req_id:8 first_token_cost:391.2222385406494ms total_cost_time:391.2684917449951ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13602 prompt_cache_len:5151 prompt_cache_ratio:0.37869430966034406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 -DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:42 [batch.py:51] router release req id 8 -INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.10888171195983887 s -INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.11017560958862305 s -DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=140789127812398456879938818544773926082, time:1750768542.1762826s req_ids:[8] -DEBUG 06-24 20:35:42 [manager.py:391] -ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:214.06149864196777ms total_cost_time:214.10655975341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13603 prompt_cache_len:5151 prompt_cache_ratio:0.37866647063147835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 -DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:42 [batch.py:51] router release req id 8 -INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.10792922973632812 s -INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.10974717140197754 s -DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=322251059869106923080699845792245571053, time:1750768542.3957598s req_ids:[8] -DEBUG 06-24 20:35:42 [manager.py:391] -ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:208.77480506896973ms total_cost_time:208.82034301757812ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13604 prompt_cache_len:5151 prompt_cache_ratio:0.3786386356953837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 -DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:42 [batch.py:51] router release req id 8 -INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.11018562316894531 s -INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.11222362518310547 s -DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=8151488355836007724931859557944683671, time:1750768542.6109364s req_ids:[8] -DEBUG 06-24 20:35:42 [manager.py:391] -ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:212.1882438659668ms total_cost_time:212.20898628234863ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:13605 prompt_cache_len:5151 prompt_cache_ratio:0.37861080485115767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 -DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:42 [batch.py:51] router release req id 8 -INFO 06-24 20:35:42 [manager.py:224] router recive req id 8 cost time 0.10851478576660156 s -INFO 06-24 20:35:42 [manager.py:68] detokenization recv req id 8 cost time 0.11065006256103516 s -DEBUG 06-24 20:35:42 [manager.py:391] Prefill Batch: batch_id=297603167432330342416373557133570734676, time:1750768542.8426023s req_ids:[8] -DEBUG 06-24 20:35:42 [manager.py:391] -ERROR 06-24 20:35:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:229.41207885742188ms total_cost_time:229.45928573608398ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13606 prompt_cache_len:5151 prompt_cache_ratio:0.378582978097898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 -DEBUG 06-24 20:35:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:42 [batch.py:51] router release req id 8 -INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10805392265319824 s -INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.11006855964660645 s -DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=63523953996913537598672544553216320655, time:1750768543.06532s req_ids:[8] -DEBUG 06-24 20:35:43 [manager.py:391] -DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:42 lightllm_req_id:8 first_token_cost:213.69004249572754ms total_cost_time:213.7320041656494ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13607 prompt_cache_len:5151 prompt_cache_ratio:0.37855515543470275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 -DEBUG 06-24 20:35:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:43 [batch.py:51] router release req id 8 -INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10852479934692383 s -INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.11109399795532227 s -DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=108041713607132045387363621582680762016, time:1750768543.2978554s req_ids:[8] -DEBUG 06-24 20:35:43 [manager.py:391] -ERROR 06-24 20:35:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 first_token_cost:395.83802223205566ms total_cost_time:395.88332176208496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13608 prompt_cache_len:5151 prompt_cache_ratio:0.3785273368606702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 -DEBUG 06-24 20:35:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:43 [batch.py:51] router release req id 8 -INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10890698432922363 s -INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.11099600791931152 s -DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=249601185250909779010198803393374823622, time:1750768543.6880448s req_ids:[8] -DEBUG 06-24 20:35:43 [manager.py:391] -ERROR 06-24 20:35:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 first_token_cost:213.0258083343506ms total_cost_time:213.07086944580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13609 prompt_cache_len:5151 prompt_cache_ratio:0.378499522374899 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 -DEBUG 06-24 20:35:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:43 [batch.py:51] router release req id 8 -INFO 06-24 20:35:43 [manager.py:224] router recive req id 8 cost time 0.10825610160827637 s -INFO 06-24 20:35:43 [manager.py:68] detokenization recv req id 8 cost time 0.1102898120880127 s -DEBUG 06-24 20:35:43 [manager.py:391] Prefill Batch: batch_id=226611217595596865429454475423742658034, time:1750768543.9064684s req_ids:[8] -DEBUG 06-24 20:35:43 [manager.py:391] -ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:43 lightllm_req_id:8 first_token_cost:213.41347694396973ms total_cost_time:213.45973014831543ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13610 prompt_cache_len:5151 prompt_cache_ratio:0.3784717119764879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 -DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:44 [batch.py:51] router release req id 8 -INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.1071770191192627 s -INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.10866880416870117 s -DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=130958493808599589907986504006660274852, time:1750768544.126208s req_ids:[8] -DEBUG 06-24 20:35:44 [manager.py:391] -ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:212.65554428100586ms total_cost_time:212.70084381103516ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13611 prompt_cache_len:5151 prompt_cache_ratio:0.37844390566453606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 -DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:44 [batch.py:51] router release req id 8 -INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.10922050476074219 s -INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.11112236976623535 s -DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=104436109456154896394733093667044818527, time:1750768544.3465219s req_ids:[8] -DEBUG 06-24 20:35:44 [manager.py:391] -ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:215.58308601379395ms total_cost_time:215.62480926513672ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13612 prompt_cache_len:5151 prompt_cache_ratio:0.3784161034381428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 -DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:44 [batch.py:51] router release req id 8 -INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.11167693138122559 s -INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.1131439208984375 s -DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=44035926024371330174355240425430575612, time:1750768544.5706375s req_ids:[8] -DEBUG 06-24 20:35:44 [manager.py:391] -ERROR 06-24 20:35:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:216.5358066558838ms total_cost_time:216.57991409301758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13613 prompt_cache_len:5151 prompt_cache_ratio:0.3783883052964078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 -DEBUG 06-24 20:35:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:44 [batch.py:51] router release req id 8 -INFO 06-24 20:35:44 [manager.py:224] router recive req id 8 cost time 0.10886001586914062 s -INFO 06-24 20:35:44 [manager.py:68] detokenization recv req id 8 cost time 0.11029314994812012 s -DEBUG 06-24 20:35:44 [manager.py:391] Prefill Batch: batch_id=119430016434262367425001229304978266536, time:1750768544.7916996s req_ids:[8] -DEBUG 06-24 20:35:44 [manager.py:391] -ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:44 lightllm_req_id:8 first_token_cost:390.89274406433105ms total_cost_time:390.93661308288574ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13614 prompt_cache_len:5151 prompt_cache_ratio:0.378360511238431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 -DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:45 [batch.py:51] router release req id 8 -INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.1081838607788086 s -INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.10967731475830078 s -DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=273444994832047174098564456961035605906, time:1750768545.1903698s req_ids:[8] -DEBUG 06-24 20:35:45 [manager.py:391] -ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:213.3176326751709ms total_cost_time:213.3617401123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13615 prompt_cache_len:5151 prompt_cache_ratio:0.3783327212633125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 -DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:45 [batch.py:51] router release req id 8 -INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.10836386680603027 s -INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.10988306999206543 s -DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=122998039719627719998469232993655549025, time:1750768545.4208715s req_ids:[8] -DEBUG 06-24 20:35:45 [manager.py:391] -ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:222.11742401123047ms total_cost_time:222.14531898498535ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:13616 prompt_cache_len:5151 prompt_cache_ratio:0.37830493537015275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 -DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:45 [batch.py:51] router release req id 8 -INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.10795927047729492 s -INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.10997319221496582 s -DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=35282630186919789138957764892559378076, time:1750768545.642445s req_ids:[8] -DEBUG 06-24 20:35:45 [manager.py:391] -ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:212.35060691833496ms total_cost_time:212.39352226257324ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13617 prompt_cache_len:5151 prompt_cache_ratio:0.3782771535580524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 -DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:45 [batch.py:51] router release req id 8 -INFO 06-24 20:35:45 [manager.py:224] router recive req id 8 cost time 0.1107480525970459 s -INFO 06-24 20:35:45 [manager.py:68] detokenization recv req id 8 cost time 0.1126852035522461 s -DEBUG 06-24 20:35:45 [manager.py:391] Prefill Batch: batch_id=47513810349256356895494341189302484139, time:1750768545.8599033s req_ids:[8] -DEBUG 06-24 20:35:45 [manager.py:391] -ERROR 06-24 20:35:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:213.01603317260742ms total_cost_time:213.057279586792ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13618 prompt_cache_len:5151 prompt_cache_ratio:0.3782493758261125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 -DEBUG 06-24 20:35:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:45 [batch.py:51] router release req id 8 -INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.10805535316467285 s -INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.11014151573181152 s -DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=94356904830484959331389902357134745455, time:1750768546.0770204s req_ids:[8] -DEBUG 06-24 20:35:46 [manager.py:391] -ERROR 06-24 20:35:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:45 lightllm_req_id:8 first_token_cost:216.7515754699707ms total_cost_time:216.79377555847168ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13619 prompt_cache_len:5151 prompt_cache_ratio:0.37822160217343415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 -DEBUG 06-24 20:35:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:46 [batch.py:51] router release req id 8 -INFO 06-24 20:35:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.10963249206542969 s -INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.11109375953674316 s -DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=320957219285217034357074047481551965828, time:1750768546.2989986s req_ids:[8] -DEBUG 06-24 20:35:46 [manager.py:391] -ERROR 06-24 20:35:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 first_token_cost:209.7768783569336ms total_cost_time:209.82098579406738ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13620 prompt_cache_len:5151 prompt_cache_ratio:0.37819383259911893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 -DEBUG 06-24 20:35:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:46 [batch.py:51] router release req id 8 -INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.3108041286468506 s -INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.31293606758117676 s -DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=292576040227077569662274650760834648801, time:1750768546.7263598s req_ids:[8] -DEBUG 06-24 20:35:46 [manager.py:391] -ERROR 06-24 20:35:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 first_token_cost:427.9489517211914ms total_cost_time:427.9942512512207ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13621 prompt_cache_len:5151 prompt_cache_ratio:0.3781660671022686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 -DEBUG 06-24 20:35:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:46 [batch.py:51] router release req id 8 -INFO 06-24 20:35:46 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s -INFO 06-24 20:35:46 [manager.py:68] detokenization recv req id 8 cost time 0.11051058769226074 s -DEBUG 06-24 20:35:46 [manager.py:391] Prefill Batch: batch_id=309156379884607679918347025486197844202, time:1750768546.9509s req_ids:[8] -DEBUG 06-24 20:35:46 [manager.py:391] -ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:46 lightllm_req_id:8 first_token_cost:215.68536758422852ms total_cost_time:215.7270908355713ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13622 prompt_cache_len:5151 prompt_cache_ratio:0.37813830568198503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 -DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:47 [batch.py:51] router release req id 8 -INFO 06-24 20:35:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.10817933082580566 s -INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.11001324653625488 s -DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=24302363388559923243400375049437580690, time:1750768547.1722627s req_ids:[8] -DEBUG 06-24 20:35:47 [manager.py:391] -ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:218.34325790405273ms total_cost_time:218.38688850402832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13623 prompt_cache_len:5151 prompt_cache_ratio:0.3781105483373706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 -DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:47 [batch.py:51] router release req id 8 -INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.1076209545135498 s -INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.10862064361572266 s -DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=320016684876712546197543222767112920097, time:1750768547.395169s req_ids:[8] -DEBUG 06-24 20:35:47 [manager.py:391] -ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:169.72804069519043ms total_cost_time:169.7702407836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13624 prompt_cache_len:5151 prompt_cache_ratio:0.3780827950675279 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 -DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:47 [batch.py:51] router release req id 8 -INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.10774993896484375 s -INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.10892629623413086 s -DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=277279140517817463526152217235581903280, time:1750768547.5735333s req_ids:[8] -DEBUG 06-24 20:35:47 [manager.py:391] -ERROR 06-24 20:35:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:214.3256664276123ms total_cost_time:214.3688201904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13625 prompt_cache_len:5151 prompt_cache_ratio:0.37805504587155964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 -DEBUG 06-24 20:35:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:47 [batch.py:51] router release req id 8 -INFO 06-24 20:35:47 [manager.py:224] router recive req id 8 cost time 0.10911369323730469 s -INFO 06-24 20:35:47 [manager.py:68] detokenization recv req id 8 cost time 0.11252140998840332 s -DEBUG 06-24 20:35:47 [manager.py:391] Prefill Batch: batch_id=35096955949092871950169631164319642318, time:1750768547.7932768s req_ids:[8] -DEBUG 06-24 20:35:47 [manager.py:391] -ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:47 lightllm_req_id:8 first_token_cost:387.31884956359863ms total_cost_time:387.3636722564697ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13626 prompt_cache_len:5151 prompt_cache_ratio:0.37802730074856894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 -DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:48 [batch.py:51] router release req id 8 -INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.10870671272277832 s -INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.11010193824768066 s -DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=28758698059860884441342898673187097252, time:1750768548.1884575s req_ids:[8] -DEBUG 06-24 20:35:48 [manager.py:391] -ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:218.01137924194336ms total_cost_time:218.05262565612793ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13627 prompt_cache_len:5151 prompt_cache_ratio:0.3779995596976591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 -DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:48 [batch.py:51] router release req id 8 -INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.1060495376586914 s -INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.10806083679199219 s -DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=281134776810478477271381555937032106350, time:1750768548.4107366s req_ids:[8] -DEBUG 06-24 20:35:48 [manager.py:391] -ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:210.6626033782959ms total_cost_time:210.707426071167ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13628 prompt_cache_len:5151 prompt_cache_ratio:0.37797182271793367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 -DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:48 [batch.py:51] router release req id 8 -INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.10761213302612305 s -INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.10959148406982422 s -DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=142285376794513837059663077645312865958, time:1750768548.6283486s req_ids:[8] -DEBUG 06-24 20:35:48 [manager.py:391] -ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:35:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 53975.273 tokens/s -DEBUG 06-24 20:35:48 [stats.py:37] Avg prompt tokens throughput: 53967.243 tokens/s -DEBUG 06-24 20:35:48 [stats.py:37] Avg generate tokens throughput: 8.030 tokens/s -INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:211.58289909362793ms total_cost_time:211.62700653076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13629 prompt_cache_len:5151 prompt_cache_ratio:0.3779440898084966 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 -DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:48 [batch.py:51] router release req id 8 -INFO 06-24 20:35:48 [manager.py:224] router recive req id 8 cost time 0.10781574249267578 s -INFO 06-24 20:35:48 [manager.py:68] detokenization recv req id 8 cost time 0.10983729362487793 s -DEBUG 06-24 20:35:48 [manager.py:391] Prefill Batch: batch_id=300994028136680121246595563542210505910, time:1750768548.8493989s req_ids:[8] -DEBUG 06-24 20:35:48 [manager.py:391] -ERROR 06-24 20:35:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:209.86080169677734ms total_cost_time:209.90395545959473ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13630 prompt_cache_len:5151 prompt_cache_ratio:0.37791636096845194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 -DEBUG 06-24 20:35:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:48 [batch.py:51] router release req id 8 -INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10859370231628418 s -INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.11048269271850586 s -DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=99667879679816735471074901445044953506, time:1750768549.063888s req_ids:[8] -DEBUG 06-24 20:35:49 [manager.py:391] -ERROR 06-24 20:35:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:48 lightllm_req_id:8 first_token_cost:214.9038314819336ms total_cost_time:214.94579315185547ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13631 prompt_cache_len:5151 prompt_cache_ratio:0.3778886361969041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 -DEBUG 06-24 20:35:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:49 [batch.py:51] router release req id 8 -INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10830473899841309 s -INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.11048626899719238 s -DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=257917619119232590601852247808579288165, time:1750768549.2842946s req_ids:[8] -DEBUG 06-24 20:35:49 [manager.py:391] -ERROR 06-24 20:35:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 first_token_cost:388.9954090118408ms total_cost_time:389.0419006347656ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13632 prompt_cache_len:5151 prompt_cache_ratio:0.37786091549295775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 -DEBUG 06-24 20:35:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:49 [batch.py:51] router release req id 8 -INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10724401473999023 s -INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.1093604564666748 s -DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=31054056425887962216527733282506062550, time:1750768549.6814344s req_ids:[8] -DEBUG 06-24 20:35:49 [manager.py:391] -ERROR 06-24 20:35:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 first_token_cost:218.8417911529541ms total_cost_time:218.88303756713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13633 prompt_cache_len:5151 prompt_cache_ratio:0.37783319885571776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 -DEBUG 06-24 20:35:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:49 [batch.py:51] router release req id 8 -INFO 06-24 20:35:49 [manager.py:224] router recive req id 8 cost time 0.10855698585510254 s -INFO 06-24 20:35:49 [manager.py:68] detokenization recv req id 8 cost time 0.11065363883972168 s -DEBUG 06-24 20:35:49 [manager.py:391] Prefill Batch: batch_id=95585855388656804125224001217431018532, time:1750768549.9050138s req_ids:[8] -DEBUG 06-24 20:35:49 [manager.py:391] -ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:49 lightllm_req_id:8 first_token_cost:217.30995178222656ms total_cost_time:217.35334396362305ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13634 prompt_cache_len:5151 prompt_cache_ratio:0.3778054862842893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 -DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:50 [batch.py:51] router release req id 8 -INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.10902070999145508 s -INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11095762252807617 s -DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=83178396355074804875819021404046644400, time:1750768550.1362364s req_ids:[8] -DEBUG 06-24 20:35:50 [manager.py:391] -ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:223.65427017211914ms total_cost_time:223.70076179504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13635 prompt_cache_len:5151 prompt_cache_ratio:0.37777777777777777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 -DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:50 [batch.py:51] router release req id 8 -INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.10835599899291992 s -INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11032295227050781 s -DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=75409116160910432666062985706831405471, time:1750768550.3613985s req_ids:[8] -DEBUG 06-24 20:35:50 [manager.py:391] -ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:219.35200691223145ms total_cost_time:219.39730644226074ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13636 prompt_cache_len:5151 prompt_cache_ratio:0.3777500733352889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 -DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:50 [batch.py:51] router release req id 8 -INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.10905003547668457 s -INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s -DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=155862091041466772526169255249430827598, time:1750768550.5839698s req_ids:[8] -DEBUG 06-24 20:35:50 [manager.py:391] -ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:218.48559379577637ms total_cost_time:218.54782104492188ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:13637 prompt_cache_len:5151 prompt_cache_ratio:0.3777223729559287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 -DEBUG 06-24 20:35:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:50 [batch.py:51] router release req id 8 -INFO 06-24 20:35:50 [manager.py:224] router recive req id 8 cost time 0.11061358451843262 s -INFO 06-24 20:35:50 [manager.py:68] detokenization recv req id 8 cost time 0.11263084411621094 s -DEBUG 06-24 20:35:50 [manager.py:391] Prefill Batch: batch_id=133657886805782331993092778379460896084, time:1750768550.8176966s req_ids:[8] -DEBUG 06-24 20:35:50 [manager.py:391] -DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:50 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:229.89225387573242ms total_cost_time:229.9368381500244ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13638 prompt_cache_len:5151 prompt_cache_ratio:0.37769467663880335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 -DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:51 [batch.py:51] router release req id 8 -INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.31002235412597656 s -INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.31211161613464355 s -DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=174555946281598786902466974557467235897, time:1750768551.2587047s req_ids:[8] -DEBUG 06-24 20:35:51 [manager.py:391] -ERROR 06-24 20:35:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:50 lightllm_req_id:8 first_token_cost:436.32960319519043ms total_cost_time:436.3729953765869ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13639 prompt_cache_len:5151 prompt_cache_ratio:0.37766698438301927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 -DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:51 [batch.py:51] router release req id 8 -INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.1083681583404541 s -INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.11024785041809082 s -DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=188286778155602474216403692061546385671, time:1750768551.4863336s req_ids:[8] -DEBUG 06-24 20:35:51 [manager.py:391] -ERROR 06-24 20:35:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 first_token_cost:218.10245513916016ms total_cost_time:218.14537048339844ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13640 prompt_cache_len:5151 prompt_cache_ratio:0.37763929618768327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 -DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:51 [batch.py:51] router release req id 8 -INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.10745644569396973 s -INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.10949873924255371 s -DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=8390407928174607071693031107652752929, time:1750768551.7111785s req_ids:[8] -DEBUG 06-24 20:35:51 [manager.py:391] -ERROR 06-24 20:35:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 first_token_cost:218.10030937194824ms total_cost_time:218.14322471618652ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13641 prompt_cache_len:5151 prompt_cache_ratio:0.37761161205190236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 -DEBUG 06-24 20:35:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:51 [batch.py:51] router release req id 8 -INFO 06-24 20:35:51 [manager.py:224] router recive req id 8 cost time 0.10839629173278809 s -INFO 06-24 20:35:51 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s -DEBUG 06-24 20:35:51 [manager.py:391] Prefill Batch: batch_id=213190747695857363323377943642448862316, time:1750768551.9331715s req_ids:[8] -DEBUG 06-24 20:35:51 [manager.py:391] -ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:51 lightllm_req_id:8 first_token_cost:216.90034866333008ms total_cost_time:216.94564819335938ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13642 prompt_cache_len:5151 prompt_cache_ratio:0.37758393197478374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 -DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:52 [batch.py:51] router release req id 8 -INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.10802865028381348 s -INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.10996532440185547 s -DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=159746189219849857865710849734312241439, time:1750768552.16121s req_ids:[8] -DEBUG 06-24 20:35:52 [manager.py:391] -ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:180.3135871887207ms total_cost_time:180.3581714630127ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13643 prompt_cache_len:5151 prompt_cache_ratio:0.377556255955435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 -DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:52 [batch.py:51] router release req id 8 -INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.10880303382873535 s -INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.11080074310302734 s -DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=337838145547603146132441832734438942966, time:1750768552.3402197s req_ids:[8] -DEBUG 06-24 20:35:52 [manager.py:391] -DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:35:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:375.16236305236816ms total_cost_time:375.20790100097656ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13644 prompt_cache_len:5151 prompt_cache_ratio:0.37752858399296396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 -DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:52 [batch.py:51] router release req id 8 -INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.10774922370910645 s -INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.10974860191345215 s -DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=18407330026922278464330689317177495971, time:1750768552.7197297s req_ids:[8] -DEBUG 06-24 20:35:52 [manager.py:391] -ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:172.00756072998047ms total_cost_time:172.05071449279785ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13645 prompt_cache_len:5151 prompt_cache_ratio:0.37750091608647857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 -DEBUG 06-24 20:35:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:52 [batch.py:51] router release req id 8 -INFO 06-24 20:35:52 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s -INFO 06-24 20:35:52 [manager.py:68] detokenization recv req id 8 cost time 0.11090946197509766 s -DEBUG 06-24 20:35:52 [manager.py:391] Prefill Batch: batch_id=319335937521893534030063087331540655716, time:1750768552.898341s req_ids:[8] -DEBUG 06-24 20:35:52 [manager.py:391] -ERROR 06-24 20:35:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:203.75561714172363ms total_cost_time:203.80234718322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13646 prompt_cache_len:5151 prompt_cache_ratio:0.3774732522350872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 -DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:53 [batch.py:51] router release req id 8 -INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.10809326171875 s -INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.11011099815368652 s -DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=229593009020485800866750310937352552068, time:1750768553.107266s req_ids:[8] -DEBUG 06-24 20:35:53 [manager.py:391] -ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:52 lightllm_req_id:8 first_token_cost:212.13769912719727ms total_cost_time:212.18204498291016ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13647 prompt_cache_len:5151 prompt_cache_ratio:0.37744559243789844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 -INFO 06-24 20:35:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:53 [batch.py:51] router release req id 8 -INFO 06-24 20:35:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.10804295539855957 s -INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.1101541519165039 s -DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=126105128133144239977238626492060236269, time:1750768553.324351s req_ids:[8] -DEBUG 06-24 20:35:53 [manager.py:391] -ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:213.05418014526367ms total_cost_time:213.10067176818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13648 prompt_cache_len:5151 prompt_cache_ratio:0.37741793669402113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 -DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:53 [batch.py:51] router release req id 8 -INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.1073906421661377 s -INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.1092832088470459 s -DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=151925410819919333700135408243894110623, time:1750768553.5413709s req_ids:[8] -DEBUG 06-24 20:35:53 [manager.py:391] -ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:212.68725395202637ms total_cost_time:212.73136138916016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13649 prompt_cache_len:5151 prompt_cache_ratio:0.3773902850025643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 -DEBUG 06-24 20:35:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:53 [batch.py:51] router release req id 8 -INFO 06-24 20:35:53 [manager.py:224] router recive req id 8 cost time 0.10857605934143066 s -INFO 06-24 20:35:53 [manager.py:68] detokenization recv req id 8 cost time 0.11062741279602051 s -DEBUG 06-24 20:35:53 [manager.py:391] Prefill Batch: batch_id=209190148504032345125124346296662538947, time:1750768553.7588232s req_ids:[8] -DEBUG 06-24 20:35:53 [manager.py:391] -ERROR 06-24 20:35:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:213.85693550109863ms total_cost_time:213.90032768249512ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13650 prompt_cache_len:5151 prompt_cache_ratio:0.37736263736263737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 -DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:54 [batch.py:51] router release req id 8 -INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.3093385696411133 s -INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.31124448776245117 s -DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=257007811245343669500775947666003388801, time:1750768554.183651s req_ids:[8] -DEBUG 06-24 20:35:54 [manager.py:391] -ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:53 lightllm_req_id:8 first_token_cost:378.28707695007324ms total_cost_time:378.3295154571533ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13651 prompt_cache_len:5151 prompt_cache_ratio:0.37733499377334995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 -DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:54 [batch.py:51] router release req id 8 -INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.10752582550048828 s -INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.10958600044250488 s -DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=235612014099335927989610269083737396820, time:1750768554.3614638s req_ids:[8] -DEBUG 06-24 20:35:54 [manager.py:391] -ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:215.03090858459473ms total_cost_time:215.0745391845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13652 prompt_cache_len:5151 prompt_cache_ratio:0.3773073542338119 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 -DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:54 [batch.py:51] router release req id 8 -INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.10862326622009277 s -INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.11069250106811523 s -DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=49484158378608337013176031824749776154, time:1750768554.580949s req_ids:[8] -DEBUG 06-24 20:35:54 [manager.py:391] -ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:217.12398529052734ms total_cost_time:217.16761589050293ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13653 prompt_cache_len:5151 prompt_cache_ratio:0.3772797187431334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 -DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:54 [batch.py:51] router release req id 8 -INFO 06-24 20:35:54 [manager.py:224] router recive req id 8 cost time 0.10905146598815918 s -INFO 06-24 20:35:54 [manager.py:68] detokenization recv req id 8 cost time 0.11112380027770996 s -DEBUG 06-24 20:35:54 [manager.py:391] Prefill Batch: batch_id=108956072929634339371195895641946405800, time:1750768554.8026688s req_ids:[8] -DEBUG 06-24 20:35:54 [manager.py:391] -ERROR 06-24 20:35:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:214.78915214538574ms total_cost_time:214.83564376831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13654 prompt_cache_len:5151 prompt_cache_ratio:0.3772520873004248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 -DEBUG 06-24 20:35:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:54 [batch.py:51] router release req id 8 -INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.10964035987854004 s -INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.11162042617797852 s -DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=153382760579351380999977144223642908118, time:1750768555.0227501s req_ids:[8] -DEBUG 06-24 20:35:55 [manager.py:391] -ERROR 06-24 20:35:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:54 lightllm_req_id:8 first_token_cost:217.02861785888672ms total_cost_time:217.0724868774414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13655 prompt_cache_len:5151 prompt_cache_ratio:0.3772244599047968 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 -DEBUG 06-24 20:35:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:55 [batch.py:51] router release req id 8 -INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.10831761360168457 s -INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.11089706420898438 s -DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=193805241350396400480045712576549230029, time:1750768555.248415s req_ids:[8] -DEBUG 06-24 20:35:55 [manager.py:391] -ERROR 06-24 20:35:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 first_token_cost:222.9306697845459ms total_cost_time:222.975492477417ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13656 prompt_cache_len:5151 prompt_cache_ratio:0.3771968365553603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 -DEBUG 06-24 20:35:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:55 [batch.py:51] router release req id 8 -INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.3101987838745117 s -INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.31226205825805664 s -DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=195118419152740454995841600054416543695, time:1750768555.6782992s req_ids:[8] -DEBUG 06-24 20:35:55 [manager.py:391] -ERROR 06-24 20:35:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 first_token_cost:431.47969245910645ms total_cost_time:431.52523040771484ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13657 prompt_cache_len:5151 prompt_cache_ratio:0.3771692172512265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 -DEBUG 06-24 20:35:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:55 [batch.py:51] router release req id 8 -INFO 06-24 20:35:55 [manager.py:224] router recive req id 8 cost time 0.10817694664001465 s -INFO 06-24 20:35:55 [manager.py:68] detokenization recv req id 8 cost time 0.11034584045410156 s -DEBUG 06-24 20:35:55 [manager.py:391] Prefill Batch: batch_id=151631523667400502789879400052510589660, time:1750768555.908569s req_ids:[8] -DEBUG 06-24 20:35:55 [manager.py:391] -ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:55 lightllm_req_id:8 first_token_cost:218.30201148986816ms total_cost_time:218.34492683410645ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13658 prompt_cache_len:5151 prompt_cache_ratio:0.3771416019915068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 -DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:56 [batch.py:51] router release req id 8 -INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.10804605484008789 s -INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.11010241508483887 s -DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=257623674290752407476526956654920517947, time:1750768556.131419s req_ids:[8] -DEBUG 06-24 20:35:56 [manager.py:391] -ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:212.73112297058105ms total_cost_time:212.77570724487305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13659 prompt_cache_len:5151 prompt_cache_ratio:0.377113990775313 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 -DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:56 [batch.py:51] router release req id 8 -INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.1077878475189209 s -INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.10988402366638184 s -DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=190297581107265397179063143987158349761, time:1750768556.3476503s req_ids:[8] -DEBUG 06-24 20:35:56 [manager.py:391] -ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:211.83538436889648ms total_cost_time:211.87901496887207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13660 prompt_cache_len:5151 prompt_cache_ratio:0.37708638360175695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 -DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:56 [batch.py:51] router release req id 8 -INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.10842680931091309 s -INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.11051678657531738 s -DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=54857662492815382657442871395007006545, time:1750768556.5654368s req_ids:[8] -DEBUG 06-24 20:35:56 [manager.py:391] -ERROR 06-24 20:35:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:217.09418296813965ms total_cost_time:217.13733673095703ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13661 prompt_cache_len:5151 prompt_cache_ratio:0.37705878046995095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 -DEBUG 06-24 20:35:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:56 [batch.py:51] router release req id 8 -INFO 06-24 20:35:56 [manager.py:224] router recive req id 8 cost time 0.10813522338867188 s -INFO 06-24 20:35:56 [manager.py:68] detokenization recv req id 8 cost time 0.11013507843017578 s -DEBUG 06-24 20:35:56 [manager.py:391] Prefill Batch: batch_id=300364789289121113358902921117596870327, time:1750768556.787558s req_ids:[8] -DEBUG 06-24 20:35:56 [manager.py:391] -ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:56 lightllm_req_id:8 first_token_cost:386.5816593170166ms total_cost_time:386.6274356842041ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13662 prompt_cache_len:5151 prompt_cache_ratio:0.3770311813790075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 -DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:57 [batch.py:51] router release req id 8 -INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.1079862117767334 s -INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.1101236343383789 s -DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=187883675330075789851526300167506811549, time:1750768557.1788938s req_ids:[8] -DEBUG 06-24 20:35:57 [manager.py:391] -ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:217.81563758850098ms total_cost_time:217.85950660705566ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13663 prompt_cache_len:5151 prompt_cache_ratio:0.3770035863280392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 -DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:57 [batch.py:51] router release req id 8 -INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.1078939437866211 s -INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.1099846363067627 s -DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=293098158439175210611145627150530717569, time:1750768557.4022622s req_ids:[8] -DEBUG 06-24 20:35:57 [manager.py:391] -ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:217.12994575500488ms total_cost_time:217.17405319213867ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13664 prompt_cache_len:5151 prompt_cache_ratio:0.3769759953161593 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 -DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:57 [batch.py:51] router release req id 8 -INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.1082301139831543 s -INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.11027050018310547 s -DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=57238135226023662701856765536486369313, time:1750768557.6270173s req_ids:[8] -DEBUG 06-24 20:35:57 [manager.py:391] -ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:223.72865676879883ms total_cost_time:223.77347946166992ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13665 prompt_cache_len:5151 prompt_cache_ratio:0.3769484083424808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 -DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:57 [batch.py:51] router release req id 8 -INFO 06-24 20:35:57 [manager.py:224] router recive req id 8 cost time 0.10793495178222656 s -INFO 06-24 20:35:57 [manager.py:68] detokenization recv req id 8 cost time 0.10975837707519531 s -DEBUG 06-24 20:35:57 [manager.py:391] Prefill Batch: batch_id=164239055244202895134956885315973145799, time:1750768557.8561106s req_ids:[8] -DEBUG 06-24 20:35:57 [manager.py:391] -ERROR 06-24 20:35:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:218.9810276031494ms total_cost_time:219.0229892730713ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13666 prompt_cache_len:5151 prompt_cache_ratio:0.3769208254061174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 -DEBUG 06-24 20:35:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:57 [batch.py:51] router release req id 8 -INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10857534408569336 s -INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.11054039001464844 s -DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=17491232647544406456690733448703084188, time:1750768558.0762622s req_ids:[8] -DEBUG 06-24 20:35:58 [manager.py:391] -ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:57 lightllm_req_id:8 first_token_cost:218.2791233062744ms total_cost_time:218.3229923248291ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13667 prompt_cache_len:5151 prompt_cache_ratio:0.3768932465061828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 -DEBUG 06-24 20:35:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:58 [batch.py:51] router release req id 8 -INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10811400413513184 s -INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.1101381778717041 s -DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=260665062313428928668893639995864761558, time:1750768558.2983599s req_ids:[8] -DEBUG 06-24 20:35:58 [manager.py:391] -ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:209.7315788269043ms total_cost_time:209.7756862640381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13668 prompt_cache_len:5151 prompt_cache_ratio:0.376865671641791 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 -DEBUG 06-24 20:35:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:58 [batch.py:51] router release req id 8 -INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10889554023742676 s -INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.11087918281555176 s -DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=251049571318513807902057573612589340282, time:1750768558.512987s req_ids:[8] -DEBUG 06-24 20:35:58 [manager.py:391] -ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:35:58 [stats.py:37] Avg tokens(prompt+generate) throughput: 54351.951 tokens/s -DEBUG 06-24 20:35:58 [stats.py:37] Avg prompt tokens throughput: 54343.988 tokens/s -DEBUG 06-24 20:35:58 [stats.py:37] Avg generate tokens throughput: 7.963 tokens/s -INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:375.83351135253906ms total_cost_time:375.87928771972656ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13669 prompt_cache_len:5151 prompt_cache_ratio:0.3768381008120565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 -DEBUG 06-24 20:35:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:58 [batch.py:51] router release req id 8 -INFO 06-24 20:35:58 [manager.py:224] router recive req id 8 cost time 0.10853147506713867 s -INFO 06-24 20:35:58 [manager.py:68] detokenization recv req id 8 cost time 0.11055994033813477 s -DEBUG 06-24 20:35:58 [manager.py:391] Prefill Batch: batch_id=68813320741427866713348010278541618370, time:1750768558.893984s req_ids:[8] -DEBUG 06-24 20:35:58 [manager.py:391] -ERROR 06-24 20:35:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:212.9812240600586ms total_cost_time:213.03915977478027ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:13670 prompt_cache_len:5151 prompt_cache_ratio:0.37681053401609366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 -DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:59 [batch.py:51] router release req id 8 -INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.10829043388366699 s -INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.11027169227600098 s -DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=26892049964847638688863999236649037064, time:1750768559.1130607s req_ids:[8] -DEBUG 06-24 20:35:59 [manager.py:391] -ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:58 lightllm_req_id:8 first_token_cost:172.37091064453125ms total_cost_time:172.41573333740234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13671 prompt_cache_len:5151 prompt_cache_ratio:0.37678297125301735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 -DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:59 [batch.py:51] router release req id 8 -INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.1075596809387207 s -INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.1095273494720459 s -DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=114731582348946010192197612861649036261, time:1750768559.293538s req_ids:[8] -DEBUG 06-24 20:35:59 [manager.py:391] -ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 first_token_cost:204.06317710876465ms total_cost_time:204.10776138305664ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13672 prompt_cache_len:5151 prompt_cache_ratio:0.37675541252194267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 -DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:59 [batch.py:51] router release req id 8 -INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.10869359970092773 s -INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.11062765121459961 s -DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=254332320868817064403540280358532400553, time:1750768559.5034993s req_ids:[8] -DEBUG 06-24 20:35:59 [manager.py:391] -ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:35:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 first_token_cost:213.58585357666016ms total_cost_time:213.62876892089844ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13673 prompt_cache_len:5151 prompt_cache_ratio:0.3767278578219849 mtp_avg_token_per_step:1.0 -INFO 06-24 20:35:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 -DEBUG 06-24 20:35:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:35:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:35:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:35:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:35:59 [batch.py:51] router release req id 8 -INFO 06-24 20:35:59 [manager.py:224] router recive req id 8 cost time 0.10849142074584961 s -INFO 06-24 20:35:59 [manager.py:68] detokenization recv req id 8 cost time 0.11051225662231445 s -DEBUG 06-24 20:35:59 [manager.py:391] Prefill Batch: batch_id=310163101815339106029202796761737132947, time:1750768559.7229934s req_ids:[8] -DEBUG 06-24 20:35:59 [manager.py:391] -ERROR 06-24 20:35:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:35:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:35:59 lightllm_req_id:8 first_token_cost:394.3326473236084ms total_cost_time:394.3769931793213ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13674 prompt_cache_len:5151 prompt_cache_ratio:0.3767003071522598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 -DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:00 [batch.py:51] router release req id 8 -INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.1081089973449707 s -INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.10997843742370605 s -DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=157916551437019257423777337339710269693, time:1750768560.1221435s req_ids:[8] -DEBUG 06-24 20:36:00 [manager.py:391] -ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:214.73312377929688ms total_cost_time:214.77746963500977ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13675 prompt_cache_len:5151 prompt_cache_ratio:0.376672760511883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 -DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:00 [batch.py:51] router release req id 8 -INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.10854291915893555 s -INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.1105046272277832 s -DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=236455581206585182835000858650814063414, time:1750768560.3426344s req_ids:[8] -DEBUG 06-24 20:36:00 [manager.py:391] -ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:210.79134941101074ms total_cost_time:210.84880828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:13676 prompt_cache_len:5151 prompt_cache_ratio:0.37664521789997074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 -DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:00 [batch.py:51] router release req id 8 -INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.10779070854187012 s -INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.11014008522033691 s -INFO 06-24 20:36:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=198450818040750336791910335678538978955, time:1750768560.5624502s req_ids:[8] -DEBUG 06-24 20:36:00 [manager.py:391] -ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:219.8197841644287ms total_cost_time:219.8634147644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13677 prompt_cache_len:5151 prompt_cache_ratio:0.3766176793156394 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 -DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:00 [batch.py:51] router release req id 8 -INFO 06-24 20:36:00 [manager.py:224] router recive req id 8 cost time 0.11022591590881348 s -INFO 06-24 20:36:00 [manager.py:68] detokenization recv req id 8 cost time 0.11166834831237793 s -DEBUG 06-24 20:36:00 [manager.py:391] Prefill Batch: batch_id=163421767089692774712480714151470099233, time:1750768560.7843263s req_ids:[8] -DEBUG 06-24 20:36:00 [manager.py:391] -ERROR 06-24 20:36:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:223.0820655822754ms total_cost_time:223.12617301940918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13678 prompt_cache_len:5151 prompt_cache_ratio:0.37659014475800556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 -DEBUG 06-24 20:36:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:00 [batch.py:51] router release req id 8 -INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10873699188232422 s -INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.11069846153259277 s -DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=316280845978368079284644500574320082575, time:1750768561.0155902s req_ids:[8] -DEBUG 06-24 20:36:01 [manager.py:391] -ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:00 lightllm_req_id:8 first_token_cost:213.88745307922363ms total_cost_time:213.9296531677246ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13679 prompt_cache_len:5151 prompt_cache_ratio:0.37656261422618614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 -DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:01 [batch.py:51] router release req id 8 -INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10891079902648926 s -INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.11090850830078125 s -DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=187473870858225605631348748138118144432, time:1750768561.235861s req_ids:[8] -DEBUG 06-24 20:36:01 [manager.py:391] -ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:395.28894424438477ms total_cost_time:395.33352851867676ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13680 prompt_cache_len:5151 prompt_cache_ratio:0.3765350877192982 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 -DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:01 [batch.py:51] router release req id 8 -INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10886549949645996 s -INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.1104590892791748 s -DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=327126906278624230821965108218765240291, time:1750768561.6380112s req_ids:[8] -DEBUG 06-24 20:36:01 [manager.py:391] -DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:217.81206130981445ms total_cost_time:217.8642749786377ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:13681 prompt_cache_len:5151 prompt_cache_ratio:0.37650756523645934 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 -DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:01 [batch.py:51] router release req id 8 -INFO 06-24 20:36:01 [manager.py:224] router recive req id 8 cost time 0.10818719863891602 s -INFO 06-24 20:36:01 [manager.py:68] detokenization recv req id 8 cost time 0.11016273498535156 s -DEBUG 06-24 20:36:01 [manager.py:391] Prefill Batch: batch_id=310661958017560045413286966247218173497, time:1750768561.8592618s req_ids:[8] -DEBUG 06-24 20:36:01 [manager.py:391] -ERROR 06-24 20:36:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:215.3182029724121ms total_cost_time:215.3618335723877ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13682 prompt_cache_len:5151 prompt_cache_ratio:0.37648004677678704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 -DEBUG 06-24 20:36:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:01 [batch.py:51] router release req id 8 -INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.10772252082824707 s -INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.10873174667358398 s -DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=294296304520917938906888436479656756963, time:1750768562.0810807s req_ids:[8] -DEBUG 06-24 20:36:02 [manager.py:391] -ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:01 lightllm_req_id:8 first_token_cost:174.0882396697998ms total_cost_time:174.1316318511963ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13683 prompt_cache_len:5151 prompt_cache_ratio:0.37645253233939924 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 -DEBUG 06-24 20:36:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:02 [batch.py:51] router release req id 8 -INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.10907459259033203 s -INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.11036825180053711 s -DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=224751542837843329291389650593660496840, time:1750768562.2625144s req_ids:[8] -DEBUG 06-24 20:36:02 [manager.py:391] -ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:214.79129791259766ms total_cost_time:214.83540534973145ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13684 prompt_cache_len:5151 prompt_cache_ratio:0.3764250219234142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 -DEBUG 06-24 20:36:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:02 [batch.py:51] router release req id 8 -INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.10772991180419922 s -INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.1096792221069336 s -DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=2942148602482244426050882912649281021, time:1750768562.4801955s req_ids:[8] -DEBUG 06-24 20:36:02 [manager.py:391] -ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:212.46933937072754ms total_cost_time:212.51320838928223ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13685 prompt_cache_len:5151 prompt_cache_ratio:0.3763975155279503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 -DEBUG 06-24 20:36:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:02 [batch.py:51] router release req id 8 -INFO 06-24 20:36:02 [manager.py:224] router recive req id 8 cost time 0.1087636947631836 s -INFO 06-24 20:36:02 [manager.py:68] detokenization recv req id 8 cost time 0.1106882095336914 s -DEBUG 06-24 20:36:02 [manager.py:391] Prefill Batch: batch_id=249628482781049278516473589102285439973, time:1750768562.6958754s req_ids:[8] -DEBUG 06-24 20:36:02 [manager.py:391] -ERROR 06-24 20:36:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:390.7287120819092ms total_cost_time:390.77281951904297ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13686 prompt_cache_len:5151 prompt_cache_ratio:0.37637001315212626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 -DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:03 [batch.py:51] router release req id 8 -INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10800409317016602 s -INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.1097872257232666 s -DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=20268920181325564169125179817406441366, time:1750768563.092134s req_ids:[8] -DEBUG 06-24 20:36:03 [manager.py:391] -ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:02 lightllm_req_id:8 first_token_cost:174.38602447509766ms total_cost_time:174.42870140075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13687 prompt_cache_len:5151 prompt_cache_ratio:0.37634251479506103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 -DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:03 [batch.py:51] router release req id 8 -INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10914969444274902 s -INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11111617088317871 s -DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=153888401382603662020931468204088355073, time:1750768563.2735155s req_ids:[8] -DEBUG 06-24 20:36:03 [manager.py:391] -ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:207.27181434631348ms total_cost_time:207.31496810913086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13688 prompt_cache_len:5151 prompt_cache_ratio:0.3763150204558738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 -DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:03 [batch.py:51] router release req id 8 -INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10889148712158203 s -INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11088991165161133 s -DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=207905817326683949055798575734798492031, time:1750768563.48529s req_ids:[8] -DEBUG 06-24 20:36:03 [manager.py:391] -ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:218.5654640197754ms total_cost_time:218.60980987548828ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13689 prompt_cache_len:5151 prompt_cache_ratio:0.376287530133684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 -DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:03 [batch.py:51] router release req id 8 -INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10903596878051758 s -INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11102414131164551 s -DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=25200249908781500470442144280894297520, time:1750768563.7075348s req_ids:[8] -DEBUG 06-24 20:36:03 [manager.py:391] -ERROR 06-24 20:36:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:216.77517890930176ms total_cost_time:216.81785583496094ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13690 prompt_cache_len:5151 prompt_cache_ratio:0.3762600438276114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 -DEBUG 06-24 20:36:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:03 [batch.py:51] router release req id 8 -INFO 06-24 20:36:03 [manager.py:224] router recive req id 8 cost time 0.10886573791503906 s -INFO 06-24 20:36:03 [manager.py:68] detokenization recv req id 8 cost time 0.11089396476745605 s -DEBUG 06-24 20:36:03 [manager.py:391] Prefill Batch: batch_id=327898304515158839605476026271680785561, time:1750768563.9306567s req_ids:[8] -DEBUG 06-24 20:36:03 [manager.py:391] -ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:03 lightllm_req_id:8 first_token_cost:216.02940559387207ms total_cost_time:216.07255935668945ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13691 prompt_cache_len:5151 prompt_cache_ratio:0.376232561536776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 -DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:04 [batch.py:51] router release req id 8 -INFO 06-24 20:36:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:04 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s -INFO 06-24 20:36:04 [manager.py:68] detokenization recv req id 8 cost time 0.10953116416931152 s -DEBUG 06-24 20:36:04 [manager.py:391] Prefill Batch: batch_id=291085640019914894290205786406690412241, time:1750768564.1544101s req_ids:[8] -DEBUG 06-24 20:36:04 [manager.py:391] -ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:214.599609375ms total_cost_time:214.64228630065918ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13692 prompt_cache_len:5151 prompt_cache_ratio:0.376205083260298 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 -DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:04 [batch.py:51] router release req id 8 -INFO 06-24 20:36:04 [manager.py:224] router recive req id 8 cost time 0.3113975524902344 s -INFO 06-24 20:36:04 [manager.py:68] detokenization recv req id 8 cost time 0.3134908676147461 s -DEBUG 06-24 20:36:04 [manager.py:391] Prefill Batch: batch_id=4575308800431602052722726970014790449, time:1750768564.5862854s req_ids:[8] -DEBUG 06-24 20:36:04 [manager.py:391] -ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:431.973934173584ms total_cost_time:432.0189952850342ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13693 prompt_cache_len:5151 prompt_cache_ratio:0.3761776089972979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 -DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:04 [batch.py:51] router release req id 8 -INFO 06-24 20:36:04 [manager.py:224] router recive req id 8 cost time 0.1089322566986084 s -INFO 06-24 20:36:04 [manager.py:68] detokenization recv req id 8 cost time 0.11097884178161621 s -DEBUG 06-24 20:36:04 [manager.py:391] Prefill Batch: batch_id=244118052605711041029642501530467195852, time:1750768564.813111s req_ids:[8] -DEBUG 06-24 20:36:04 [manager.py:391] -ERROR 06-24 20:36:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:208.41288566589355ms total_cost_time:208.45651626586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13694 prompt_cache_len:5151 prompt_cache_ratio:0.37615013874689646 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 -DEBUG 06-24 20:36:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:04 [batch.py:51] router release req id 8 -INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10863614082336426 s -INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11067414283752441 s -DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=327556265672390547657536032781774180820, time:1750768565.0293002s req_ids:[8] -DEBUG 06-24 20:36:05 [manager.py:391] -ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:04 lightllm_req_id:8 first_token_cost:213.43302726745605ms total_cost_time:213.47570419311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13695 prompt_cache_len:5151 prompt_cache_ratio:0.3761226725082147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 -DEBUG 06-24 20:36:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:05 [batch.py:51] router release req id 8 -INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10923171043395996 s -INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11116337776184082 s -DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=125598632824474072101852168059193784718, time:1750768565.2476425s req_ids:[8] -DEBUG 06-24 20:36:05 [manager.py:391] -ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 first_token_cost:214.69974517822266ms total_cost_time:214.74385261535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13696 prompt_cache_len:5151 prompt_cache_ratio:0.37609521028037385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 -DEBUG 06-24 20:36:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:05 [batch.py:51] router release req id 8 -INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10865116119384766 s -INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11077713966369629 s -DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=291118268160651536491040511684873440704, time:1750768565.4705367s req_ids:[8] -DEBUG 06-24 20:36:05 [manager.py:391] -ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 first_token_cost:214.52713012695312ms total_cost_time:214.5712375640869ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13697 prompt_cache_len:5151 prompt_cache_ratio:0.37606775206249543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 -DEBUG 06-24 20:36:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:05 [batch.py:51] router release req id 8 -INFO 06-24 20:36:05 [manager.py:224] router recive req id 8 cost time 0.10820627212524414 s -INFO 06-24 20:36:05 [manager.py:68] detokenization recv req id 8 cost time 0.11037611961364746 s -DEBUG 06-24 20:36:05 [manager.py:391] Prefill Batch: batch_id=155870302276030234557503265133845982448, time:1750768565.6892035s req_ids:[8] -DEBUG 06-24 20:36:05 [manager.py:391] -ERROR 06-24 20:36:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:05 lightllm_req_id:8 first_token_cost:427.30093002319336ms total_cost_time:427.34503746032715ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13698 prompt_cache_len:5151 prompt_cache_ratio:0.37604029785370124 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 -DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:06 [batch.py:51] router release req id 8 -INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10854005813598633 s -INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11059403419494629 s -DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=20341615895473604732297947997636857953, time:1750768566.1242616s req_ids:[8] -DEBUG 06-24 20:36:06 [manager.py:391] -ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:220.11899948120117ms total_cost_time:220.16239166259766ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13699 prompt_cache_len:5151 prompt_cache_ratio:0.3760128476531134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 -DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:06 [batch.py:51] router release req id 8 -INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10880851745605469 s -INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11080598831176758 s -DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=303462921873735705454450473969115980931, time:1750768566.3484032s req_ids:[8] -DEBUG 06-24 20:36:06 [manager.py:391] -ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:219.62261199951172ms total_cost_time:219.6662425994873ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13700 prompt_cache_len:5151 prompt_cache_ratio:0.37598540145985404 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 -DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:06 [batch.py:51] router release req id 8 -INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10872888565063477 s -INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11072778701782227 s -DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=211209182355129658229141676311176809372, time:1750768566.5727615s req_ids:[8] -DEBUG 06-24 20:36:06 [manager.py:391] -ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:212.80407905578613ms total_cost_time:212.84818649291992ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13701 prompt_cache_len:5151 prompt_cache_ratio:0.37595795927304576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 -DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:06 [batch.py:51] router release req id 8 -INFO 06-24 20:36:06 [manager.py:224] router recive req id 8 cost time 0.10840368270874023 s -INFO 06-24 20:36:06 [manager.py:68] detokenization recv req id 8 cost time 0.11044859886169434 s -DEBUG 06-24 20:36:06 [manager.py:391] Prefill Batch: batch_id=55732032066999074511885898363908197006, time:1750768566.7902997s req_ids:[8] -DEBUG 06-24 20:36:06 [manager.py:391] -ERROR 06-24 20:36:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:211.80248260498047ms total_cost_time:211.84492111206055ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13702 prompt_cache_len:5151 prompt_cache_ratio:0.3759305210918114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 -DEBUG 06-24 20:36:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:06 [batch.py:51] router release req id 8 -INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.11054682731628418 s -INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.11253142356872559 s -DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=165393028340050572542096960296010377604, time:1750768567.007564s req_ids:[8] -DEBUG 06-24 20:36:07 [manager.py:391] -ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:06 lightllm_req_id:8 first_token_cost:211.91000938415527ms total_cost_time:211.95530891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13703 prompt_cache_len:5151 prompt_cache_ratio:0.375903086915274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 -DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:07 [batch.py:51] router release req id 8 -INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.10819363594055176 s -INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.11017918586730957 s -DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=198553233134112528525840085456563704248, time:1750768567.2234504s req_ids:[8] -DEBUG 06-24 20:36:07 [manager.py:391] -ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:391.45469665527344ms total_cost_time:391.4988040924072ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13704 prompt_cache_len:5151 prompt_cache_ratio:0.3758756567425569 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 -DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:07 [batch.py:51] router release req id 8 -INFO 06-24 20:36:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.1075129508972168 s -INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.1093740463256836 s -DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=5084536497712749351864440785575720813, time:1750768567.619706s req_ids:[8] -DEBUG 06-24 20:36:07 [manager.py:391] -ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:216.5853977203369ms total_cost_time:216.6306972503662ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13705 prompt_cache_len:5151 prompt_cache_ratio:0.37584823057278366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 -DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:07 [batch.py:51] router release req id 8 -INFO 06-24 20:36:07 [manager.py:224] router recive req id 8 cost time 0.10883045196533203 s -INFO 06-24 20:36:07 [manager.py:68] detokenization recv req id 8 cost time 0.1109170913696289 s -DEBUG 06-24 20:36:07 [manager.py:391] Prefill Batch: batch_id=287150963390130650151157043058514887856, time:1750768567.842655s req_ids:[8] -DEBUG 06-24 20:36:07 [manager.py:391] -ERROR 06-24 20:36:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:211.58337593078613ms total_cost_time:211.69018745422363ms,out_token_counter:1 mean_per_token_cost_time: 0.1068115234375ms prompt_token_num:13706 prompt_cache_len:5151 prompt_cache_ratio:0.3758208084050781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 -DEBUG 06-24 20:36:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:07 [batch.py:51] router release req id 8 -INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.10895180702209473 s -INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11102080345153809 s -DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=311413432746304570105988497796260370383, time:1750768568.0574915s req_ids:[8] -DEBUG 06-24 20:36:08 [manager.py:391] -ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:07 lightllm_req_id:8 first_token_cost:214.80083465576172ms total_cost_time:214.84708786010742ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13707 prompt_cache_len:5151 prompt_cache_ratio:0.37579339023856423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 -DEBUG 06-24 20:36:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:08 [batch.py:51] router release req id 8 -INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.10837292671203613 s -INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11030364036560059 s -DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=274354502620989434857873279386533815404, time:1750768568.2807589s req_ids:[8] -DEBUG 06-24 20:36:08 [manager.py:391] -ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:218.80793571472168ms total_cost_time:218.84965896606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13708 prompt_cache_len:5151 prompt_cache_ratio:0.3757659760723665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 -DEBUG 06-24 20:36:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:08 [batch.py:51] router release req id 8 -INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.1089479923248291 s -INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11100387573242188 s -DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=281900105668401137203555562098484316284, time:1750768568.501424s req_ids:[8] -DEBUG 06-24 20:36:08 [manager.py:391] -ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:212.04900741577148ms total_cost_time:212.09263801574707ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13709 prompt_cache_len:5151 prompt_cache_ratio:0.37573856590560945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 -DEBUG 06-24 20:36:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:08 [batch.py:51] router release req id 8 -INFO 06-24 20:36:08 [manager.py:224] router recive req id 8 cost time 0.10824275016784668 s -INFO 06-24 20:36:08 [manager.py:68] detokenization recv req id 8 cost time 0.11033391952514648 s -DEBUG 06-24 20:36:08 [manager.py:391] Prefill Batch: batch_id=277653029160907696816703808996168123923, time:1750768568.7193387s req_ids:[8] -DEBUG 06-24 20:36:08 [manager.py:391] -ERROR 06-24 20:36:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:36:08 [stats.py:37] Avg tokens(prompt+generate) throughput: 55894.979 tokens/s -DEBUG 06-24 20:36:08 [stats.py:37] Avg prompt tokens throughput: 55886.815 tokens/s -DEBUG 06-24 20:36:08 [stats.py:37] Avg generate tokens throughput: 8.165 tokens/s -INFO 06-24 20:36:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:212.73541450500488ms total_cost_time:212.77785301208496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13710 prompt_cache_len:5151 prompt_cache_ratio:0.3757111597374179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 -DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:09 [batch.py:51] router release req id 8 -INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.3092312812805176 s -INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.3113980293273926 s -DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=217182270450432715496890801837983297037, time:1750768569.1412618s req_ids:[8] -DEBUG 06-24 20:36:09 [manager.py:391] -ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:08 lightllm_req_id:8 first_token_cost:419.0068244934082ms total_cost_time:419.0499782562256ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13711 prompt_cache_len:5151 prompt_cache_ratio:0.3756837575669171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 -DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:09 [batch.py:51] router release req id 8 -INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.10861778259277344 s -INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.11081433296203613 s -DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=50937931999066462069969382017307552407, time:1750768569.361859s req_ids:[8] -DEBUG 06-24 20:36:09 [manager.py:391] -ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:213.87887001037598ms total_cost_time:213.92226219177246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13712 prompt_cache_len:5151 prompt_cache_ratio:0.3756563593932322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 -DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:09 [batch.py:51] router release req id 8 -INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.10752296447753906 s -INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.10940265655517578 s -DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=116902822828770268152020166420798167763, time:1750768569.5820744s req_ids:[8] -DEBUG 06-24 20:36:09 [manager.py:391] -ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:208.21738243103027ms total_cost_time:208.26077461242676ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13713 prompt_cache_len:5151 prompt_cache_ratio:0.37562896521548894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 -DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:09 [batch.py:51] router release req id 8 -INFO 06-24 20:36:09 [manager.py:224] router recive req id 8 cost time 0.10857629776000977 s -INFO 06-24 20:36:09 [manager.py:68] detokenization recv req id 8 cost time 0.11072897911071777 s -DEBUG 06-24 20:36:09 [manager.py:391] Prefill Batch: batch_id=76345945191424376066825153864064996928, time:1750768569.7971785s req_ids:[8] -DEBUG 06-24 20:36:09 [manager.py:391] -ERROR 06-24 20:36:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:213.90986442565918ms total_cost_time:213.95421028137207ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13714 prompt_cache_len:5151 prompt_cache_ratio:0.37560157503281316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 -DEBUG 06-24 20:36:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:09 [batch.py:51] router release req id 8 -INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10765409469604492 s -INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.1098015308380127 s -DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=129952489704944830816134264131196334657, time:1750768570.016723s req_ids:[8] -DEBUG 06-24 20:36:10 [manager.py:391] -ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:09 lightllm_req_id:8 first_token_cost:208.65583419799805ms total_cost_time:208.69922637939453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13715 prompt_cache_len:5151 prompt_cache_ratio:0.375574188844331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 -DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:10 [batch.py:51] router release req id 8 -INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10903739929199219 s -INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.1103963851928711 s -DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=52105376917487015994568650314316473523, time:1750768570.2317321s req_ids:[8] -DEBUG 06-24 20:36:10 [manager.py:391] -ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:383.27765464782715ms total_cost_time:383.32056999206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13716 prompt_cache_len:5151 prompt_cache_ratio:0.37554680664916884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 -DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:10 [batch.py:51] router release req id 8 -INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10748505592346191 s -INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s -DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=143835826180420810441165626324535168630, time:1750768570.6220236s req_ids:[8] -DEBUG 06-24 20:36:10 [manager.py:391] -ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:211.62152290344238ms total_cost_time:211.66539192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13717 prompt_cache_len:5151 prompt_cache_ratio:0.3755194284464533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 -DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:10 [batch.py:51] router release req id 8 -INFO 06-24 20:36:10 [manager.py:224] router recive req id 8 cost time 0.10842370986938477 s -INFO 06-24 20:36:10 [manager.py:68] detokenization recv req id 8 cost time 0.11037635803222656 s -DEBUG 06-24 20:36:10 [manager.py:391] Prefill Batch: batch_id=286857817823025784117166942664340752810, time:1750768570.839051s req_ids:[8] -DEBUG 06-24 20:36:10 [manager.py:391] -DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:208.04905891418457ms total_cost_time:208.09197425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13718 prompt_cache_len:5151 prompt_cache_ratio:0.3754920542353113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 -DEBUG 06-24 20:36:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:10 [batch.py:51] router release req id 8 -INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10889554023742676 s -INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.11092019081115723 s -DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=251781111053661081982515082912691665298, time:1750768571.054654s req_ids:[8] -DEBUG 06-24 20:36:11 [manager.py:391] -ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:10 lightllm_req_id:8 first_token_cost:212.5840187072754ms total_cost_time:212.62812614440918ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13719 prompt_cache_len:5151 prompt_cache_ratio:0.3754646840148699 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 -DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:11 [batch.py:51] router release req id 8 -INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10879778861999512 s -INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.11066532135009766 s -DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=311429254149751824976687477479524689936, time:1750768571.2837882s req_ids:[8] -DEBUG 06-24 20:36:11 [manager.py:391] -ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:225.27265548706055ms total_cost_time:225.31557083129883ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13720 prompt_cache_len:5151 prompt_cache_ratio:0.37543731778425654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 -DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:11 [batch.py:51] router release req id 8 -INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10863995552062988 s -INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.11046886444091797 s -DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=306214874201312827610940028019843013697, time:1750768571.5061178s req_ids:[8] -DEBUG 06-24 20:36:11 [manager.py:391] -ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:215.70158004760742ms total_cost_time:215.74664115905762ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13721 prompt_cache_len:5151 prompt_cache_ratio:0.3754099555425989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 -DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:11 [batch.py:51] router release req id 8 -INFO 06-24 20:36:11 [manager.py:224] router recive req id 8 cost time 0.10695457458496094 s -INFO 06-24 20:36:11 [manager.py:68] detokenization recv req id 8 cost time 0.10865426063537598 s -DEBUG 06-24 20:36:11 [manager.py:391] Prefill Batch: batch_id=76669233167020956013514188010362980024, time:1750768571.728009s req_ids:[8] -DEBUG 06-24 20:36:11 [manager.py:391] -ERROR 06-24 20:36:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:171.67377471923828ms total_cost_time:171.71764373779297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13722 prompt_cache_len:5151 prompt_cache_ratio:0.3753825972890249 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 -DEBUG 06-24 20:36:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:11 [batch.py:51] router release req id 8 -INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.3095850944519043 s -INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.3114607334136963 s -DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=8520307662988731886649237777785453732, time:1750768572.1143682s req_ids:[8] -DEBUG 06-24 20:36:12 [manager.py:391] -ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:11 lightllm_req_id:8 first_token_cost:417.80877113342285ms total_cost_time:417.85240173339844ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13723 prompt_cache_len:5151 prompt_cache_ratio:0.3753552430226627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 -DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:12 [batch.py:51] router release req id 8 -INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10873198509216309 s -INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.11078166961669922 s -DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=78201172574491985470099438901983503409, time:1750768572.330608s req_ids:[8] -DEBUG 06-24 20:36:12 [manager.py:391] -DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:12 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:216.5513038635254ms total_cost_time:216.59588813781738ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13724 prompt_cache_len:5151 prompt_cache_ratio:0.3753278927426406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 -DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:12 [batch.py:51] router release req id 8 -INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10858607292175293 s -INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.10992097854614258 s -DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=85109968803741861607164277595552530535, time:1750768572.5528843s req_ids:[8] -DEBUG 06-24 20:36:12 [manager.py:391] -ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:204.5912742614746ms total_cost_time:204.634428024292ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13725 prompt_cache_len:5151 prompt_cache_ratio:0.3753005464480874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 -DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:12 [batch.py:51] router release req id 8 -INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10866761207580566 s -INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.11001133918762207 s -DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=122635373191489327158361286025554112380, time:1750768572.7626429s req_ids:[8] -DEBUG 06-24 20:36:12 [manager.py:391] -ERROR 06-24 20:36:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:197.03173637390137ms total_cost_time:197.07441329956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13726 prompt_cache_len:5151 prompt_cache_ratio:0.37527320413813203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 -DEBUG 06-24 20:36:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:12 [batch.py:51] router release req id 8 -INFO 06-24 20:36:12 [manager.py:224] router recive req id 8 cost time 0.10821247100830078 s -INFO 06-24 20:36:12 [manager.py:68] detokenization recv req id 8 cost time 0.10950231552124023 s -DEBUG 06-24 20:36:12 [manager.py:391] Prefill Batch: batch_id=218985897903604413418233686166596847689, time:1750768572.9651563s req_ids:[8] -DEBUG 06-24 20:36:12 [manager.py:391] -ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:12 lightllm_req_id:8 first_token_cost:169.88301277160645ms total_cost_time:169.92568969726562ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13727 prompt_cache_len:5151 prompt_cache_ratio:0.37524586581190356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 -DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:13 [batch.py:51] router release req id 8 -INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.10871171951293945 s -INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.10994720458984375 s -DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=304264074398272835467310876713460236983, time:1750768573.1419604s req_ids:[8] -DEBUG 06-24 20:36:13 [manager.py:391] -ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:196.00939750671387ms total_cost_time:196.04969024658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:13728 prompt_cache_len:5151 prompt_cache_ratio:0.37521853146853146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 -DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:13 [batch.py:51] router release req id 8 -INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.3095541000366211 s -INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.3106839656829834 s -DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=148181551866251624372565508693686692470, time:1750768573.5564554s req_ids:[8] -DEBUG 06-24 20:36:13 [manager.py:391] -ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:383.4686279296875ms total_cost_time:383.5141658782959ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13729 prompt_cache_len:5151 prompt_cache_ratio:0.37519120110714543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 -DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:13 [batch.py:51] router release req id 8 -INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.10752701759338379 s -INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.108734130859375 s -DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=58067154981188757237548235503975275558, time:1750768573.7340567s req_ids:[8] -DEBUG 06-24 20:36:13 [manager.py:391] -DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:13 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:203.0770778656006ms total_cost_time:203.12857627868652ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:13730 prompt_cache_len:5151 prompt_cache_ratio:0.37516387472687546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 -DEBUG 06-24 20:36:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:13 [batch.py:51] router release req id 8 -INFO 06-24 20:36:13 [manager.py:224] router recive req id 8 cost time 0.10793185234069824 s -INFO 06-24 20:36:13 [manager.py:68] detokenization recv req id 8 cost time 0.10921669006347656 s -DEBUG 06-24 20:36:13 [manager.py:391] Prefill Batch: batch_id=310187810965830319063573596560372678272, time:1750768573.9454048s req_ids:[8] -DEBUG 06-24 20:36:13 [manager.py:391] -ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:13 lightllm_req_id:8 first_token_cost:210.59274673461914ms total_cost_time:210.63709259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13731 prompt_cache_len:5151 prompt_cache_ratio:0.37513655232685167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 -DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:14 [batch.py:51] router release req id 8 -INFO 06-24 20:36:14 [manager.py:224] router recive req id 8 cost time 0.10861468315124512 s -INFO 06-24 20:36:14 [manager.py:68] detokenization recv req id 8 cost time 0.10989570617675781 s -DEBUG 06-24 20:36:14 [manager.py:391] Prefill Batch: batch_id=110179998312177156412887966118324209880, time:1750768574.1735187s req_ids:[8] -DEBUG 06-24 20:36:14 [manager.py:391] -ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:227.11753845214844ms total_cost_time:227.16116905212402ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13732 prompt_cache_len:5151 prompt_cache_ratio:0.3751092339062045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 -DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:14 [batch.py:51] router release req id 8 -INFO 06-24 20:36:14 [manager.py:224] router recive req id 8 cost time 0.1074378490447998 s -INFO 06-24 20:36:14 [manager.py:68] detokenization recv req id 8 cost time 0.10863041877746582 s -DEBUG 06-24 20:36:14 [manager.py:391] Prefill Batch: batch_id=251645619460604959595123328817082694449, time:1750768574.4097853s req_ids:[8] -DEBUG 06-24 20:36:14 [manager.py:391] -ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:226.64904594421387ms total_cost_time:226.69148445129395ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13733 prompt_cache_len:5151 prompt_cache_ratio:0.37508191946406466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 -DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:14 [batch.py:51] router release req id 8 -INFO 06-24 20:36:14 [manager.py:224] router recive req id 8 cost time 0.1076805591583252 s -INFO 06-24 20:36:14 [manager.py:68] detokenization recv req id 8 cost time 0.10887694358825684 s -DEBUG 06-24 20:36:14 [manager.py:391] Prefill Batch: batch_id=168130883046062280644738163940751575686, time:1750768574.643036s req_ids:[8] -DEBUG 06-24 20:36:14 [manager.py:391] -ERROR 06-24 20:36:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:397.9144096374512ms total_cost_time:397.95827865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13734 prompt_cache_len:5151 prompt_cache_ratio:0.3750546089995631 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 -DEBUG 06-24 20:36:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:14 [batch.py:51] router release req id 8 -INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10882973670959473 s -INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.11007046699523926 s -DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=164291309350944552483986552331379812860, time:1750768575.0363538s req_ids:[8] -DEBUG 06-24 20:36:15 [manager.py:391] -ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:14 lightllm_req_id:8 first_token_cost:211.3499641418457ms total_cost_time:211.3933563232422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13735 prompt_cache_len:5151 prompt_cache_ratio:0.3750273025118311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 -DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:15 [batch.py:51] router release req id 8 -INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10739946365356445 s -INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10860657691955566 s -DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=251492275325047294743604242113471704599, time:1750768575.2549188s req_ids:[8] -DEBUG 06-24 20:36:15 [manager.py:391] -ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:212.33701705932617ms total_cost_time:212.38183975219727ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13736 prompt_cache_len:5151 prompt_cache_ratio:0.375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 -DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:15 [batch.py:51] router release req id 8 -INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10849523544311523 s -INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s -DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=333242837490716914508993792113220247616, time:1750768575.4735458s req_ids:[8] -DEBUG 06-24 20:36:15 [manager.py:391] -ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:216.73250198364258ms total_cost_time:216.77517890930176ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13737 prompt_cache_len:5151 prompt_cache_ratio:0.37497270146320155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 -DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:15 [batch.py:51] router release req id 8 -INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10762286186218262 s -INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10866951942443848 s -DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=310474599905656710722408677666675840076, time:1750768575.6935468s req_ids:[8] -DEBUG 06-24 20:36:15 [manager.py:391] -ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:170.05419731140137ms total_cost_time:170.09520530700684ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:13738 prompt_cache_len:5151 prompt_cache_ratio:0.37494540690056777 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 -DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:15 [batch.py:51] router release req id 8 -INFO 06-24 20:36:15 [manager.py:224] router recive req id 8 cost time 0.10859990119934082 s -INFO 06-24 20:36:15 [manager.py:68] detokenization recv req id 8 cost time 0.10983157157897949 s -DEBUG 06-24 20:36:15 [manager.py:391] Prefill Batch: batch_id=102971313293502691554675741941244029528, time:1750768575.8722441s req_ids:[8] -DEBUG 06-24 20:36:15 [manager.py:391] -ERROR 06-24 20:36:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:201.41029357910156ms total_cost_time:201.45440101623535ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13739 prompt_cache_len:5151 prompt_cache_ratio:0.3749181163112308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 -DEBUG 06-24 20:36:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:15 [batch.py:51] router release req id 8 -INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.10754895210266113 s -INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.10883474349975586 s -DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=146554155103969965583475384873459506210, time:1750768576.0809689s req_ids:[8] -DEBUG 06-24 20:36:16 [manager.py:391] -ERROR 06-24 20:36:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:15 lightllm_req_id:8 first_token_cost:213.52887153625488ms total_cost_time:213.57059478759766ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13740 prompt_cache_len:5151 prompt_cache_ratio:0.37489082969432314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 -DEBUG 06-24 20:36:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:16 [batch.py:51] router release req id 8 -INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.1084742546081543 s -INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.10976958274841309 s -DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=64714086168324757179833858637691748722, time:1750768576.3008327s req_ids:[8] -DEBUG 06-24 20:36:16 [manager.py:391] -ERROR 06-24 20:36:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 first_token_cost:376.6958713531494ms total_cost_time:376.7411708831787ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13741 prompt_cache_len:5151 prompt_cache_ratio:0.3748635470489775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 -DEBUG 06-24 20:36:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:16 [batch.py:51] router release req id 8 -INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.10883831977844238 s -INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.11000490188598633 s -DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=7388831944751906363281937040668829263, time:1750768576.6938064s req_ids:[8] -DEBUG 06-24 20:36:16 [manager.py:391] -ERROR 06-24 20:36:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 first_token_cost:228.02734375ms total_cost_time:228.0709743499756ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13742 prompt_cache_len:5151 prompt_cache_ratio:0.3748362683743269 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 -DEBUG 06-24 20:36:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:16 [batch.py:51] router release req id 8 -INFO 06-24 20:36:16 [manager.py:224] router recive req id 8 cost time 0.10866236686706543 s -INFO 06-24 20:36:16 [manager.py:68] detokenization recv req id 8 cost time 0.11003589630126953 s -DEBUG 06-24 20:36:16 [manager.py:391] Prefill Batch: batch_id=297394187883613128425808768086814605002, time:1750768576.9174302s req_ids:[8] -DEBUG 06-24 20:36:16 [manager.py:391] -ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:16 lightllm_req_id:8 first_token_cost:214.98870849609375ms total_cost_time:215.03257751464844ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13743 prompt_cache_len:5151 prompt_cache_ratio:0.3748089936695045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 -DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:17 [batch.py:51] router release req id 8 -INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10866355895996094 s -INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.11087965965270996 s -INFO 06-24 20:36:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=205708736660443678644740561827493403005, time:1750768577.138753s req_ids:[8] -DEBUG 06-24 20:36:17 [manager.py:391] -ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:213.2277488708496ms total_cost_time:213.2711410522461ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13744 prompt_cache_len:5151 prompt_cache_ratio:0.37478172293364376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 -DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:17 [batch.py:51] router release req id 8 -INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10869288444519043 s -INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.10991573333740234 s -DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=77735776757368270588351779871356942123, time:1750768577.3578508s req_ids:[8] -DEBUG 06-24 20:36:17 [manager.py:391] -ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:213.76514434814453ms total_cost_time:213.80925178527832ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13745 prompt_cache_len:5151 prompt_cache_ratio:0.3747544561658785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 -DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:17 [batch.py:51] router release req id 8 -INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10864710807800293 s -INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.10988211631774902 s -DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=194761394718417097361291852057914111150, time:1750768577.5788531s req_ids:[8] -DEBUG 06-24 20:36:17 [manager.py:391] -ERROR 06-24 20:36:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:385.45870780944824ms total_cost_time:385.5011463165283ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13746 prompt_cache_len:5151 prompt_cache_ratio:0.37472719336534266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 -DEBUG 06-24 20:36:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:17 [batch.py:51] router release req id 8 -INFO 06-24 20:36:17 [manager.py:224] router recive req id 8 cost time 0.10894012451171875 s -INFO 06-24 20:36:17 [manager.py:68] detokenization recv req id 8 cost time 0.11100196838378906 s -DEBUG 06-24 20:36:17 [manager.py:391] Prefill Batch: batch_id=189446459785670268147743911089100159048, time:1750768577.9713166s req_ids:[8] -DEBUG 06-24 20:36:17 [manager.py:391] -ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:17 lightllm_req_id:8 first_token_cost:216.72534942626953ms total_cost_time:216.77923202514648ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:13747 prompt_cache_len:5151 prompt_cache_ratio:0.37469993453117045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 -DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:18 [batch.py:51] router release req id 8 -INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.1089775562286377 s -INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.11025738716125488 s -DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=22632230072741360454213297010434378741, time:1750768578.2006752s req_ids:[8] -DEBUG 06-24 20:36:18 [manager.py:391] -ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:222.67675399780273ms total_cost_time:222.72062301635742ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13748 prompt_cache_len:5151 prompt_cache_ratio:0.37467267966249634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 -DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:18 [batch.py:51] router release req id 8 -INFO 06-24 20:36:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.1083521842956543 s -INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.10957193374633789 s -DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=24110163655775653966543409758326346276, time:1750768578.4243484s req_ids:[8] -DEBUG 06-24 20:36:18 [manager.py:391] -ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:214.47134017944336ms total_cost_time:214.51449394226074ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13749 prompt_cache_len:5151 prompt_cache_ratio:0.37464542875845513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 -DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:18 [batch.py:51] router release req id 8 -INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s -INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.10908722877502441 s -DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=242449534380405616665791501238707058657, time:1750768578.6463752s req_ids:[8] -DEBUG 06-24 20:36:18 [manager.py:391] -ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:171.2791919708252ms total_cost_time:171.32186889648438ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13750 prompt_cache_len:5151 prompt_cache_ratio:0.3746181818181818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 -DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:18 [batch.py:51] router release req id 8 -INFO 06-24 20:36:18 [manager.py:224] router recive req id 8 cost time 0.10811901092529297 s -INFO 06-24 20:36:18 [manager.py:68] detokenization recv req id 8 cost time 0.10928773880004883 s -DEBUG 06-24 20:36:18 [manager.py:391] Prefill Batch: batch_id=170426951429281651797944932558011878185, time:1750768578.8241036s req_ids:[8] -DEBUG 06-24 20:36:18 [manager.py:391] -DEBUG 06-24 20:36:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 56246.032 tokens/s -DEBUG 06-24 20:36:18 [stats.py:37] Avg prompt tokens throughput: 56237.940 tokens/s -DEBUG 06-24 20:36:18 [stats.py:37] Avg generate tokens throughput: 8.091 tokens/s -ERROR 06-24 20:36:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:202.50225067138672ms total_cost_time:202.5444507598877ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13751 prompt_cache_len:5151 prompt_cache_ratio:0.37459093884081157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 -DEBUG 06-24 20:36:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:18 [batch.py:51] router release req id 8 -INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.10807275772094727 s -INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.11008787155151367 s -DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=204737245167080810236968494082647319323, time:1750768579.0348787s req_ids:[8] -DEBUG 06-24 20:36:19 [manager.py:391] -ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:18 lightllm_req_id:8 first_token_cost:384.1269016265869ms total_cost_time:384.1688632965088ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13752 prompt_cache_len:5151 prompt_cache_ratio:0.37456369982547993 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 -DEBUG 06-24 20:36:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:19 [batch.py:51] router release req id 8 -INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.10860872268676758 s -INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.10989642143249512 s -DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=149301589123398562412915785147825403556, time:1750768579.4295444s req_ids:[8] -DEBUG 06-24 20:36:19 [manager.py:391] -ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:219.0382480621338ms total_cost_time:219.07973289489746ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13753 prompt_cache_len:5151 prompt_cache_ratio:0.37453646477132263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 -DEBUG 06-24 20:36:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:19 [batch.py:51] router release req id 8 -INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.1084439754486084 s -INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.11033821105957031 s -DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=114508494260319896129183344119655172946, time:1750768579.6619585s req_ids:[8] -DEBUG 06-24 20:36:19 [manager.py:391] -ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:225.89921951293945ms total_cost_time:225.94308853149414ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13754 prompt_cache_len:5151 prompt_cache_ratio:0.37450923367747563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 -DEBUG 06-24 20:36:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:19 [batch.py:51] router release req id 8 -INFO 06-24 20:36:19 [manager.py:224] router recive req id 8 cost time 0.10907983779907227 s -INFO 06-24 20:36:19 [manager.py:68] detokenization recv req id 8 cost time 0.11026191711425781 s -DEBUG 06-24 20:36:19 [manager.py:391] Prefill Batch: batch_id=34577129609621700435090606967669510169, time:1750768579.88677s req_ids:[8] -DEBUG 06-24 20:36:19 [manager.py:391] -DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:218.05834770202637ms total_cost_time:218.10269355773926ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13755 prompt_cache_len:5151 prompt_cache_ratio:0.37448200654307523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 -DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:20 [batch.py:51] router release req id 8 -INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.1080472469329834 s -INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.10915660858154297 s -DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=306945843780238094556184032080721079437, time:1750768580.1156237s req_ids:[8] -DEBUG 06-24 20:36:20 [manager.py:391] -ERROR 06-24 20:36:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:19 lightllm_req_id:8 first_token_cost:180.2835464477539ms total_cost_time:180.32526969909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13756 prompt_cache_len:5151 prompt_cache_ratio:0.3744547833672579 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 -DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:20 [batch.py:51] router release req id 8 -INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.10847806930541992 s -INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.11044955253601074 s -DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=335847601742950772061300927747988035827, time:1750768580.2994869s req_ids:[8] -DEBUG 06-24 20:36:20 [manager.py:391] -ERROR 06-24 20:36:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 first_token_cost:205.60622215270996ms total_cost_time:205.65056800842285ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13757 prompt_cache_len:5151 prompt_cache_ratio:0.37442756414916045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 -DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:20 [batch.py:51] router release req id 8 -INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.10753703117370605 s -INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.10869479179382324 s -DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=303953348583180577290123414643346505956, time:1750768580.5208037s req_ids:[8] -DEBUG 06-24 20:36:20 [manager.py:391] -ERROR 06-24 20:36:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 first_token_cost:414.111852645874ms total_cost_time:414.1669273376465ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:13758 prompt_cache_len:5151 prompt_cache_ratio:0.37440034888791973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 -DEBUG 06-24 20:36:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:20 [batch.py:51] router release req id 8 -INFO 06-24 20:36:20 [manager.py:224] router recive req id 8 cost time 0.10823631286621094 s -INFO 06-24 20:36:20 [manager.py:68] detokenization recv req id 8 cost time 0.10945367813110352 s -DEBUG 06-24 20:36:20 [manager.py:391] Prefill Batch: batch_id=153624245308465715898687144558972627157, time:1750768580.9335506s req_ids:[8] -DEBUG 06-24 20:36:20 [manager.py:391] -ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:20 lightllm_req_id:8 first_token_cost:193.40848922729492ms total_cost_time:193.4645175933838ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:13759 prompt_cache_len:5151 prompt_cache_ratio:0.37437313758267315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 -DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:21 [batch.py:51] router release req id 8 -INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.1081087589263916 s -INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.11012101173400879 s -DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=264620539015251714852740412797499118791, time:1750768581.134663s req_ids:[8] -DEBUG 06-24 20:36:21 [manager.py:391] -ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:213.16766738891602ms total_cost_time:213.2127285003662ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13760 prompt_cache_len:5151 prompt_cache_ratio:0.37434593023255813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 -DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:21 [batch.py:51] router release req id 8 -INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.10775017738342285 s -INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.10897207260131836 s -DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=37403959710864053333712758767351594528, time:1750768581.3554473s req_ids:[8] -DEBUG 06-24 20:36:21 [manager.py:391] -ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:215.06929397583008ms total_cost_time:215.11077880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13761 prompt_cache_len:5151 prompt_cache_ratio:0.37431872683671247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 -DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:21 [batch.py:51] router release req id 8 -INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.108062744140625 s -INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.10899710655212402 s -DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=192382941579589616308712672403676148631, time:1750768581.5785089s req_ids:[8] -DEBUG 06-24 20:36:21 [manager.py:391] -ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:214.90073204040527ms total_cost_time:214.95699882507324ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:13762 prompt_cache_len:5151 prompt_cache_ratio:0.37429152739427407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 -DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:21 [batch.py:51] router release req id 8 -INFO 06-24 20:36:21 [manager.py:224] router recive req id 8 cost time 0.10805559158325195 s -INFO 06-24 20:36:21 [manager.py:68] detokenization recv req id 8 cost time 0.10997962951660156 s -DEBUG 06-24 20:36:21 [manager.py:391] Prefill Batch: batch_id=195730985255048076606269604083898053258, time:1750768581.811194s req_ids:[8] -DEBUG 06-24 20:36:21 [manager.py:391] -ERROR 06-24 20:36:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:230.3941249847412ms total_cost_time:230.4387092590332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13763 prompt_cache_len:5151 prompt_cache_ratio:0.3742643319043813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 -DEBUG 06-24 20:36:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:21 [batch.py:51] router release req id 8 -INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.10948801040649414 s -INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.11152958869934082 s -DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=290998651103999768091603125511949066725, time:1750768582.0505743s req_ids:[8] -DEBUG 06-24 20:36:22 [manager.py:391] -ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:21 lightllm_req_id:8 first_token_cost:230.51881790161133ms total_cost_time:230.5765151977539ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:13764 prompt_cache_len:5151 prompt_cache_ratio:0.3742371403661726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 -DEBUG 06-24 20:36:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:22 [batch.py:51] router release req id 8 -INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.3099632263183594 s -INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.31172823905944824 s -DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=268681031907932602303773090531249750544, time:1750768582.4800303s req_ids:[8] -DEBUG 06-24 20:36:22 [manager.py:391] -ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:375.23961067199707ms total_cost_time:375.28133392333984ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13765 prompt_cache_len:5151 prompt_cache_ratio:0.37420995277878677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 -DEBUG 06-24 20:36:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:22 [batch.py:51] router release req id 8 -INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.10889077186584473 s -INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.11083793640136719 s -DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=252209634441203966976809808674503265691, time:1750768582.6581206s req_ids:[8] -DEBUG 06-24 20:36:22 [manager.py:391] -ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:209.381103515625ms total_cost_time:209.4249725341797ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13766 prompt_cache_len:5151 prompt_cache_ratio:0.3741827691413628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 -DEBUG 06-24 20:36:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:22 [batch.py:51] router release req id 8 -INFO 06-24 20:36:22 [manager.py:224] router recive req id 8 cost time 0.10870218276977539 s -INFO 06-24 20:36:22 [manager.py:68] detokenization recv req id 8 cost time 0.11079573631286621 s -DEBUG 06-24 20:36:22 [manager.py:391] Prefill Batch: batch_id=246158324598541104739289850875468705913, time:1750768582.8806024s req_ids:[8] -DEBUG 06-24 20:36:22 [manager.py:391] -ERROR 06-24 20:36:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:225.67319869995117ms total_cost_time:225.72708129882812ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:13767 prompt_cache_len:5151 prompt_cache_ratio:0.3741555894530399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 -DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:23 [batch.py:51] router release req id 8 -INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10903692245483398 s -INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.1111600399017334 s -DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=276604199521003127469782703819175368475, time:1750768583.1171188s req_ids:[8] -DEBUG 06-24 20:36:23 [manager.py:391] -INFO 06-24 20:36:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:36:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:23 [statics_utils.py:24] mean first cost: 231.22188198589367 ms -INFO 06-24 20:36:23 [statics_utils.py:24] mean per token cost: 0.05910160754670098 ms -INFO 06-24 20:36:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:22 lightllm_req_id:8 first_token_cost:229.04706001281738ms total_cost_time:229.08926010131836ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13768 prompt_cache_len:5151 prompt_cache_ratio:0.3741284137129576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 -DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:23 [batch.py:51] router release req id 8 -INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10813260078430176 s -INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.11015844345092773 s -DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=68805721369044593082329602212297729764, time:1750768583.3416624s req_ids:[8] -DEBUG 06-24 20:36:23 [manager.py:391] -ERROR 06-24 20:36:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 first_token_cost:213.6833667755127ms total_cost_time:213.72652053833008ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13769 prompt_cache_len:5151 prompt_cache_ratio:0.3741012419202556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 -DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:23 [batch.py:51] router release req id 8 -INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10923027992248535 s -INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.11135363578796387 s -DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=160558118431269740121913247666909574047, time:1750768583.5623748s req_ids:[8] -DEBUG 06-24 20:36:23 [manager.py:391] -ERROR 06-24 20:36:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 first_token_cost:383.93568992614746ms total_cost_time:383.98003578186035ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13770 prompt_cache_len:5151 prompt_cache_ratio:0.37407407407407406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 -DEBUG 06-24 20:36:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:23 [batch.py:51] router release req id 8 -INFO 06-24 20:36:23 [manager.py:224] router recive req id 8 cost time 0.10796022415161133 s -INFO 06-24 20:36:23 [manager.py:68] detokenization recv req id 8 cost time 0.10974979400634766 s -DEBUG 06-24 20:36:23 [manager.py:391] Prefill Batch: batch_id=75070953023451659117361048065797602897, time:1750768583.963411s req_ids:[8] -DEBUG 06-24 20:36:23 [manager.py:391] -ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:23 lightllm_req_id:8 first_token_cost:189.45741653442383ms total_cost_time:189.5003318786621ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13771 prompt_cache_len:5151 prompt_cache_ratio:0.3740469101735531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 -DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:24 [batch.py:51] router release req id 8 -INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.10883116722106934 s -INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11094021797180176 s -DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=197120761459988196119057736039053517062, time:1750768584.1534846s req_ids:[8] -DEBUG 06-24 20:36:24 [manager.py:391] -ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:216.7949676513672ms total_cost_time:216.83931350708008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13772 prompt_cache_len:5151 prompt_cache_ratio:0.3740197502178333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 -DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:24 [batch.py:51] router release req id 8 -INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.10871601104736328 s -INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s -DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=52302614451390822232508305827296413715, time:1750768584.3863025s req_ids:[8] -DEBUG 06-24 20:36:24 [manager.py:391] -ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:228.3482551574707ms total_cost_time:228.3935546875ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13773 prompt_cache_len:5151 prompt_cache_ratio:0.37399259420605535 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 -DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:24 [batch.py:51] router release req id 8 -INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.10942840576171875 s -INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11145234107971191 s -DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=18175100276141320125582900888383239504, time:1750768584.6112075s req_ids:[8] -DEBUG 06-24 20:36:24 [manager.py:391] -ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:216.31860733032227ms total_cost_time:216.36152267456055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13774 prompt_cache_len:5151 prompt_cache_ratio:0.37396544213736027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 -DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:24 [batch.py:51] router release req id 8 -INFO 06-24 20:36:24 [manager.py:224] router recive req id 8 cost time 0.1084294319152832 s -INFO 06-24 20:36:24 [manager.py:68] detokenization recv req id 8 cost time 0.11046123504638672 s -DEBUG 06-24 20:36:24 [manager.py:391] Prefill Batch: batch_id=170748058374648964227533720605739829606, time:1750768584.8326836s req_ids:[8] -DEBUG 06-24 20:36:24 [manager.py:391] -ERROR 06-24 20:36:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:214.3397331237793ms total_cost_time:214.38288688659668ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13775 prompt_cache_len:5151 prompt_cache_ratio:0.3739382940108893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 -DEBUG 06-24 20:36:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:24 [batch.py:51] router release req id 8 -INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.1088404655456543 s -INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11081981658935547 s -DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=270521432285469796941491136416857042900, time:1750768585.0669403s req_ids:[8] -DEBUG 06-24 20:36:25 [manager.py:391] -ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:24 lightllm_req_id:8 first_token_cost:399.5068073272705ms total_cost_time:399.550199508667ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13776 prompt_cache_len:5151 prompt_cache_ratio:0.373911149825784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 -DEBUG 06-24 20:36:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:25 [batch.py:51] router release req id 8 -INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.10886645317077637 s -INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11099576950073242 s -DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=311512152921332558149500291923138559401, time:1750768585.4620469s req_ids:[8] -DEBUG 06-24 20:36:25 [manager.py:391] -ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 first_token_cost:215.93785285949707ms total_cost_time:215.98243713378906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13777 prompt_cache_len:5151 prompt_cache_ratio:0.37388400958118606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 -DEBUG 06-24 20:36:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:25 [batch.py:51] router release req id 8 -INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.10819411277770996 s -INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11018109321594238 s -DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=246547596626297198728442126592732247677, time:1750768585.6973033s req_ids:[8] -DEBUG 06-24 20:36:25 [manager.py:391] -ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 first_token_cost:230.23724555969238ms total_cost_time:230.28206825256348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13778 prompt_cache_len:5151 prompt_cache_ratio:0.37385687327623746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 -DEBUG 06-24 20:36:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:25 [batch.py:51] router release req id 8 -INFO 06-24 20:36:25 [manager.py:224] router recive req id 8 cost time 0.1082007884979248 s -INFO 06-24 20:36:25 [manager.py:68] detokenization recv req id 8 cost time 0.11007213592529297 s -DEBUG 06-24 20:36:25 [manager.py:391] Prefill Batch: batch_id=28724763507144094436127486113110173813, time:1750768585.935976s req_ids:[8] -DEBUG 06-24 20:36:25 [manager.py:391] -ERROR 06-24 20:36:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:25 lightllm_req_id:8 first_token_cost:191.53738021850586ms total_cost_time:191.58935546875ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:13779 prompt_cache_len:5151 prompt_cache_ratio:0.37382974091008053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 -DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:26 [batch.py:51] router release req id 8 -INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.10881257057189941 s -INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.11084151268005371 s -DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=34095584697262581598314064984480564613, time:1750768586.124112s req_ids:[8] -DEBUG 06-24 20:36:26 [manager.py:391] -ERROR 06-24 20:36:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:213.85598182678223ms total_cost_time:213.8984203338623ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13780 prompt_cache_len:5151 prompt_cache_ratio:0.37380261248185775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 -DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:26 [batch.py:51] router release req id 8 -INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.1081688404083252 s -INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.10993552207946777 s -DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=258795262993347749048811046311023198032, time:1750768586.343138s req_ids:[8] -DEBUG 06-24 20:36:26 [manager.py:391] -ERROR 06-24 20:36:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:216.58563613891602ms total_cost_time:216.6283130645752ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13781 prompt_cache_len:5151 prompt_cache_ratio:0.37377548799071186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 -DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:26 [batch.py:51] router release req id 8 -INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s -INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.10970640182495117 s -DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=47099747531194318677015863037323955656, time:1750768586.566929s req_ids:[8] -DEBUG 06-24 20:36:26 [manager.py:391] -ERROR 06-24 20:36:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:378.47304344177246ms total_cost_time:378.51595878601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13782 prompt_cache_len:5151 prompt_cache_ratio:0.3737483674357858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 -DEBUG 06-24 20:36:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:26 [batch.py:51] router release req id 8 -INFO 06-24 20:36:26 [manager.py:224] router recive req id 8 cost time 0.10919713973999023 s -INFO 06-24 20:36:26 [manager.py:68] detokenization recv req id 8 cost time 0.11103153228759766 s -DEBUG 06-24 20:36:26 [manager.py:391] Prefill Batch: batch_id=310972547881158329060879802940590828289, time:1750768586.9526803s req_ids:[8] -DEBUG 06-24 20:36:26 [manager.py:391] -ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:26 lightllm_req_id:8 first_token_cost:210.8302116394043ms total_cost_time:210.87360382080078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13783 prompt_cache_len:5151 prompt_cache_ratio:0.37372125081622287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 -DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:27 [batch.py:51] router release req id 8 -INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10943746566772461 s -INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.11133074760437012 s -DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=22602145400359889265682699674721264131, time:1750768587.172856s req_ids:[8] -DEBUG 06-24 20:36:27 [manager.py:391] -ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:215.26670455932617ms total_cost_time:215.31200408935547ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13784 prompt_cache_len:5151 prompt_cache_ratio:0.3736941381311666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 -DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:27 [batch.py:51] router release req id 8 -INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10879182815551758 s -INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.11059236526489258 s -DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=172126498529678486574065180407957794789, time:1750768587.3940177s req_ids:[8] -DEBUG 06-24 20:36:27 [manager.py:391] -ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:217.37098693847656ms total_cost_time:217.41461753845215ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13785 prompt_cache_len:5151 prompt_cache_ratio:0.3736670293797606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 -DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:27 [batch.py:51] router release req id 8 -INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10834312438964844 s -INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.1101217269897461 s -DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=31819115684325781723398951256790989209, time:1750768587.6156986s req_ids:[8] -DEBUG 06-24 20:36:27 [manager.py:391] -DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:171.06389999389648ms total_cost_time:171.12421989440918ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:13786 prompt_cache_len:5151 prompt_cache_ratio:0.373639924561149 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 -DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:27 [batch.py:51] router release req id 8 -INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10892200469970703 s -INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.11069703102111816 s -DEBUG 06-24 20:36:27 [manager.py:391] Prefill Batch: batch_id=141415734649546514267073167431458924933, time:1750768587.7975225s req_ids:[8] -DEBUG 06-24 20:36:27 [manager.py:391] -ERROR 06-24 20:36:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:198.61865043640137ms total_cost_time:198.66085052490234ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13787 prompt_cache_len:5151 prompt_cache_ratio:0.37361282367447596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 -DEBUG 06-24 20:36:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:27 [batch.py:51] router release req id 8 -INFO 06-24 20:36:27 [manager.py:224] router recive req id 8 cost time 0.10753750801086426 s -INFO 06-24 20:36:27 [manager.py:68] detokenization recv req id 8 cost time 0.10945820808410645 s -DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=191573883793787044665604234645023459800, time:1750768588.009976s req_ids:[8] -DEBUG 06-24 20:36:28 [manager.py:391] -ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:27 lightllm_req_id:8 first_token_cost:390.8510208129883ms total_cost_time:390.89488983154297ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13788 prompt_cache_len:5151 prompt_cache_ratio:0.373585726718886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 -DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:28 [batch.py:51] router release req id 8 -INFO 06-24 20:36:28 [manager.py:224] router recive req id 8 cost time 0.10888314247131348 s -INFO 06-24 20:36:28 [manager.py:68] detokenization recv req id 8 cost time 0.11089181900024414 s -DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=222084563514065249465223596331072764812, time:1750768588.401354s req_ids:[8] -DEBUG 06-24 20:36:28 [manager.py:391] -ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:214.14542198181152ms total_cost_time:214.186429977417ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:13789 prompt_cache_len:5151 prompt_cache_ratio:0.3735586336935238 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 -DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:28 [batch.py:51] router release req id 8 -INFO 06-24 20:36:28 [manager.py:224] router recive req id 8 cost time 0.10840177536010742 s -INFO 06-24 20:36:28 [manager.py:68] detokenization recv req id 8 cost time 0.1103050708770752 s -DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=258993850365837077497049684119097983955, time:1750768588.622603s req_ids:[8] -DEBUG 06-24 20:36:28 [manager.py:391] -ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:212.7218246459961ms total_cost_time:212.76497840881348ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13790 prompt_cache_len:5151 prompt_cache_ratio:0.37353154459753446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 -DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:28 [batch.py:51] router release req id 8 -INFO 06-24 20:36:28 [manager.py:224] router recive req id 8 cost time 0.10889840126037598 s -INFO 06-24 20:36:28 [manager.py:68] detokenization recv req id 8 cost time 0.11088085174560547 s -DEBUG 06-24 20:36:28 [manager.py:391] Prefill Batch: batch_id=164015436436284884420168237088550025243, time:1750768588.841591s req_ids:[8] -DEBUG 06-24 20:36:28 [manager.py:391] -DEBUG 06-24 20:36:28 [stats.py:37] Avg tokens(prompt+generate) throughput: 54997.253 tokens/s -DEBUG 06-24 20:36:28 [stats.py:37] Avg prompt tokens throughput: 54989.267 tokens/s -DEBUG 06-24 20:36:28 [stats.py:37] Avg generate tokens throughput: 7.986 tokens/s -ERROR 06-24 20:36:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:213.1824493408203ms total_cost_time:213.2258415222168ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13791 prompt_cache_len:5151 prompt_cache_ratio:0.3735044594300631 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 -DEBUG 06-24 20:36:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:28 [batch.py:51] router release req id 8 -INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.1085362434387207 s -INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.11050105094909668 s -INFO 06-24 20:36:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=298784555303675484243373553362283608036, time:1750768589.0739431s req_ids:[8] -DEBUG 06-24 20:36:29 [manager.py:391] -ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:28 lightllm_req_id:8 first_token_cost:226.27806663513184ms total_cost_time:226.3202667236328ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13792 prompt_cache_len:5151 prompt_cache_ratio:0.3734773781902552 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 -DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:29 [batch.py:51] router release req id 8 -INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.10796117782592773 s -INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.1097409725189209 s -DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=12306011289213749835730560298403401302, time:1750768589.3072891s req_ids:[8] -DEBUG 06-24 20:36:29 [manager.py:391] -ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:225.8775234222412ms total_cost_time:225.9213924407959ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13793 prompt_cache_len:5151 prompt_cache_ratio:0.37345030087725656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 -DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:29 [batch.py:51] router release req id 8 -INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.1081087589263916 s -INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.10989260673522949 s -DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=268402253598227928723197136817821402769, time:1750768589.5292885s req_ids:[8] -DEBUG 06-24 20:36:29 [manager.py:391] -ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:341.8159484863281ms total_cost_time:341.8605327606201ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13794 prompt_cache_len:5151 prompt_cache_ratio:0.3734232274902131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 -DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:29 [batch.py:51] router release req id 8 -INFO 06-24 20:36:29 [manager.py:224] router recive req id 8 cost time 0.10881328582763672 s -INFO 06-24 20:36:29 [manager.py:68] detokenization recv req id 8 cost time 0.11049389839172363 s -DEBUG 06-24 20:36:29 [manager.py:391] Prefill Batch: batch_id=199074250767939585557250321712788844136, time:1750768589.877693s req_ids:[8] -DEBUG 06-24 20:36:29 [manager.py:391] -ERROR 06-24 20:36:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:202.47650146484375ms total_cost_time:202.52060890197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13795 prompt_cache_len:5151 prompt_cache_ratio:0.3733961580282711 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 -DEBUG 06-24 20:36:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:30 [batch.py:51] router release req id 8 -INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10806512832641602 s -INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.10984921455383301 s -DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=21662857430865574355142304665460947509, time:1750768590.0880036s req_ids:[8] -DEBUG 06-24 20:36:30 [manager.py:391] -ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:29 lightllm_req_id:8 first_token_cost:210.0837230682373ms total_cost_time:210.1278305053711ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13796 prompt_cache_len:5151 prompt_cache_ratio:0.373369092490577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 -DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:30 [batch.py:51] router release req id 8 -INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10906052589416504 s -INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11103940010070801 s -DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=229119526404368664339368612389792333815, time:1750768590.305363s req_ids:[8] -DEBUG 06-24 20:36:30 [manager.py:391] -ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:214.53380584716797ms total_cost_time:214.57505226135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13797 prompt_cache_len:5151 prompt_cache_ratio:0.37334203087627743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 -DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:30 [batch.py:51] router release req id 8 -INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10845422744750977 s -INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11031460762023926 s -DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=294187294687715179390350593562597093965, time:1750768590.5259128s req_ids:[8] -DEBUG 06-24 20:36:30 [manager.py:391] -ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:213.7007713317871ms total_cost_time:213.7444019317627ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13798 prompt_cache_len:5151 prompt_cache_ratio:0.3733149731845195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 -DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:30 [batch.py:51] router release req id 8 -INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10886597633361816 s -INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11066126823425293 s -DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=263157409327347663164578291585108979386, time:1750768590.7467878s req_ids:[8] -DEBUG 06-24 20:36:30 [manager.py:391] -ERROR 06-24 20:36:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:213.41514587402344ms total_cost_time:213.4568691253662ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13799 prompt_cache_len:5151 prompt_cache_ratio:0.37328791941445033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 -DEBUG 06-24 20:36:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:30 [batch.py:51] router release req id 8 -INFO 06-24 20:36:30 [manager.py:224] router recive req id 8 cost time 0.10862874984741211 s -INFO 06-24 20:36:30 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s -DEBUG 06-24 20:36:30 [manager.py:391] Prefill Batch: batch_id=84280328490890618386309806894197726230, time:1750768590.9657977s req_ids:[8] -DEBUG 06-24 20:36:30 [manager.py:391] -ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:30 lightllm_req_id:8 first_token_cost:376.59168243408203ms total_cost_time:376.6331672668457ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13800 prompt_cache_len:5151 prompt_cache_ratio:0.37326086956521737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 -DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:31 [batch.py:51] router release req id 8 -INFO 06-24 20:36:31 [manager.py:224] router recive req id 8 cost time 0.10898637771606445 s -INFO 06-24 20:36:31 [manager.py:68] detokenization recv req id 8 cost time 0.11097908020019531 s -DEBUG 06-24 20:36:31 [manager.py:391] Prefill Batch: batch_id=235533212991741272577394618294583172855, time:1750768591.3503742s req_ids:[8] -DEBUG 06-24 20:36:31 [manager.py:391] -ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:215.60120582580566ms total_cost_time:215.64412117004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13801 prompt_cache_len:5151 prompt_cache_ratio:0.3732338236359684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 -DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:31 [batch.py:51] router release req id 8 -INFO 06-24 20:36:31 [manager.py:224] router recive req id 8 cost time 0.10780763626098633 s -INFO 06-24 20:36:31 [manager.py:68] detokenization recv req id 8 cost time 0.10966348648071289 s -DEBUG 06-24 20:36:31 [manager.py:391] Prefill Batch: batch_id=311589872824876410451743404516249018369, time:1750768591.5727096s req_ids:[8] -DEBUG 06-24 20:36:31 [manager.py:391] -ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:212.53657341003418ms total_cost_time:212.57829666137695ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13802 prompt_cache_len:5151 prompt_cache_ratio:0.3732067816258513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 -DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:31 [batch.py:51] router release req id 8 -INFO 06-24 20:36:31 [manager.py:224] router recive req id 8 cost time 0.10861492156982422 s -INFO 06-24 20:36:31 [manager.py:68] detokenization recv req id 8 cost time 0.11051177978515625 s -DEBUG 06-24 20:36:31 [manager.py:391] Prefill Batch: batch_id=180994089280192819091335178246480474487, time:1750768591.804878s req_ids:[8] -DEBUG 06-24 20:36:31 [manager.py:391] -ERROR 06-24 20:36:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:228.1324863433838ms total_cost_time:228.17659378051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13803 prompt_cache_len:5151 prompt_cache_ratio:0.37317974353401434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 -DEBUG 06-24 20:36:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:31 [batch.py:51] router release req id 8 -INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.10872769355773926 s -INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.11066365242004395 s -DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=112263642498292028161908322246309690685, time:1750768592.0293806s req_ids:[8] -DEBUG 06-24 20:36:32 [manager.py:391] -ERROR 06-24 20:36:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:31 lightllm_req_id:8 first_token_cost:213.52338790893555ms total_cost_time:213.56630325317383ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13804 prompt_cache_len:5151 prompt_cache_ratio:0.3731527093596059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 -DEBUG 06-24 20:36:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:32 [batch.py:51] router release req id 8 -INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.10878634452819824 s -INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.11066055297851562 s -DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=105635199432557649071407602140511473266, time:1750768592.2499352s req_ids:[8] -DEBUG 06-24 20:36:32 [manager.py:391] -ERROR 06-24 20:36:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 first_token_cost:214.1869068145752ms total_cost_time:214.22958374023438ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13805 prompt_cache_len:5151 prompt_cache_ratio:0.37312567910177474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 -DEBUG 06-24 20:36:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:32 [batch.py:51] router release req id 8 -INFO 06-24 20:36:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.31102728843688965 s -INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.31314516067504883 s -DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=183878873506865024951223248494418614161, time:1750768592.6740313s req_ids:[8] -DEBUG 06-24 20:36:32 [manager.py:391] -ERROR 06-24 20:36:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 first_token_cost:424.18479919433594ms total_cost_time:424.2405891418457ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:13806 prompt_cache_len:5151 prompt_cache_ratio:0.3730986527596697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 -DEBUG 06-24 20:36:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:32 [batch.py:51] router release req id 8 -INFO 06-24 20:36:32 [manager.py:224] router recive req id 8 cost time 0.10816025733947754 s -INFO 06-24 20:36:32 [manager.py:68] detokenization recv req id 8 cost time 0.11016988754272461 s -DEBUG 06-24 20:36:32 [manager.py:391] Prefill Batch: batch_id=29119128694980337095192697942810118837, time:1750768592.9186168s req_ids:[8] -DEBUG 06-24 20:36:32 [manager.py:391] -ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:32 lightllm_req_id:8 first_token_cost:233.24322700500488ms total_cost_time:233.28542709350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13807 prompt_cache_len:5151 prompt_cache_ratio:0.37307163033244006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 -DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:33 [batch.py:51] router release req id 8 -INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10830402374267578 s -INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.11042428016662598 s -DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=41890065043515425951854453205069601072, time:1750768593.1425343s req_ids:[8] -DEBUG 06-24 20:36:33 [manager.py:391] -ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:215.71850776672363ms total_cost_time:215.7607078552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13808 prompt_cache_len:5151 prompt_cache_ratio:0.3730446118192352 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 -DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:33 [batch.py:51] router release req id 8 -INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10834813117980957 s -INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.1104891300201416 s -DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=67945285421627667327401289378955552362, time:1750768593.365414s req_ids:[8] -DEBUG 06-24 20:36:33 [manager.py:391] -ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:213.4726047515869ms total_cost_time:213.51861953735352ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13809 prompt_cache_len:5151 prompt_cache_ratio:0.37301759721920486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 -DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:33 [batch.py:51] router release req id 8 -INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10859990119934082 s -INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.11068487167358398 s -DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=98712170673104117742996927683031159346, time:1750768593.5859854s req_ids:[8] -DEBUG 06-24 20:36:33 [manager.py:391] -ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:213.9601707458496ms total_cost_time:214.00165557861328ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13810 prompt_cache_len:5151 prompt_cache_ratio:0.3729905865314989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 -DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:33 [batch.py:51] router release req id 8 -DEBUG 06-24 20:36:33 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:33 [manager.py:283] -DEBUG 06-24 20:36:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:33 [manager.py:284] -INFO 06-24 20:36:33 [manager.py:224] router recive req id 8 cost time 0.10910415649414062 s -INFO 06-24 20:36:33 [manager.py:68] detokenization recv req id 8 cost time 0.11101317405700684 s -DEBUG 06-24 20:36:33 [manager.py:391] Prefill Batch: batch_id=309805742864741012871573084296122899615, time:1750768593.8196313s req_ids:[8] -DEBUG 06-24 20:36:33 [manager.py:391] -ERROR 06-24 20:36:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:229.74324226379395ms total_cost_time:229.78639602661133ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13811 prompt_cache_len:5151 prompt_cache_ratio:0.37296357975526756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 -DEBUG 06-24 20:36:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:33 [batch.py:51] router release req id 8 -INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.1085505485534668 s -INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.11075186729431152 s -DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=115024517166425707963766146518802832767, time:1750768594.046607s req_ids:[8] -DEBUG 06-24 20:36:34 [manager.py:391] -ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:33 lightllm_req_id:8 first_token_cost:383.0831050872803ms total_cost_time:383.12697410583496ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13812 prompt_cache_len:5151 prompt_cache_ratio:0.37293657688966114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 -DEBUG 06-24 20:36:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:34 [batch.py:51] router release req id 8 -INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.10866546630859375 s -INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.11062788963317871 s -DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=70541196149036493980534606155618423455, time:1750768594.4355533s req_ids:[8] -DEBUG 06-24 20:36:34 [manager.py:391] -ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:213.70959281921387ms total_cost_time:213.75131607055664ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13813 prompt_cache_len:5151 prompt_cache_ratio:0.37290957793383045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 -DEBUG 06-24 20:36:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:34 [batch.py:51] router release req id 8 -INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.10846161842346191 s -INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.11033034324645996 s -DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=289269212022392496069488863591953522820, time:1750768594.666673s req_ids:[8] -DEBUG 06-24 20:36:34 [manager.py:391] -ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:226.78232192993164ms total_cost_time:226.82499885559082ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13814 prompt_cache_len:5151 prompt_cache_ratio:0.37288258288692633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 -DEBUG 06-24 20:36:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:34 [batch.py:51] router release req id 8 -INFO 06-24 20:36:34 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s -INFO 06-24 20:36:34 [manager.py:68] detokenization recv req id 8 cost time 0.1100611686706543 s -DEBUG 06-24 20:36:34 [manager.py:391] Prefill Batch: batch_id=147999321632835356861945877280988447644, time:1750768594.8904595s req_ids:[8] -DEBUG 06-24 20:36:34 [manager.py:391] -ERROR 06-24 20:36:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:217.1926498413086ms total_cost_time:217.23461151123047ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13815 prompt_cache_len:5151 prompt_cache_ratio:0.3728555917480999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 -DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:35 [batch.py:51] router release req id 8 -INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10816526412963867 s -INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.10985660552978516 s -DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=233781318575570960723504139025265551648, time:1750768595.113944s req_ids:[8] -DEBUG 06-24 20:36:35 [manager.py:391] -ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:34 lightllm_req_id:8 first_token_cost:174.32403564453125ms total_cost_time:174.36575889587402ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13816 prompt_cache_len:5151 prompt_cache_ratio:0.3728286045165026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 -DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:35 [batch.py:51] router release req id 8 -INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10708975791931152 s -INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.10892653465270996 s -DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=250347019763933290440288368824110902912, time:1750768595.2951615s req_ids:[8] -DEBUG 06-24 20:36:35 [manager.py:391] -DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:205.47962188720703ms total_cost_time:205.52539825439453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13817 prompt_cache_len:5151 prompt_cache_ratio:0.37280162119128607 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 -DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:35 [batch.py:51] router release req id 8 -INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10813355445861816 s -INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.11006927490234375 s -DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=197984814368252103898274975904288632155, time:1750768595.5067356s req_ids:[8] -DEBUG 06-24 20:36:35 [manager.py:391] -ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:376.85513496398926ms total_cost_time:376.89900398254395ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13818 prompt_cache_len:5151 prompt_cache_ratio:0.37277464177160224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 -DEBUG 06-24 20:36:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:35 [batch.py:51] router release req id 8 -INFO 06-24 20:36:35 [manager.py:224] router recive req id 8 cost time 0.10777592658996582 s -INFO 06-24 20:36:35 [manager.py:68] detokenization recv req id 8 cost time 0.10974335670471191 s -DEBUG 06-24 20:36:35 [manager.py:391] Prefill Batch: batch_id=318426154573738710645627884375551040253, time:1750768595.8908987s req_ids:[8] -DEBUG 06-24 20:36:35 [manager.py:391] -ERROR 06-24 20:36:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:214.9968147277832ms total_cost_time:215.0406837463379ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13819 prompt_cache_len:5151 prompt_cache_ratio:0.3727476662566032 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 -DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:36 [batch.py:51] router release req id 8 -INFO 06-24 20:36:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.1091315746307373 s -INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.1110687255859375 s -DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=244079868142628014672219542702995731856, time:1750768596.1134791s req_ids:[8] -DEBUG 06-24 20:36:36 [manager.py:391] -ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:35 lightllm_req_id:8 first_token_cost:209.48410034179688ms total_cost_time:209.53011512756348ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13820 prompt_cache_len:5151 prompt_cache_ratio:0.3727206946454414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 -DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:36 [batch.py:51] router release req id 8 -INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.10849666595458984 s -INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.11051607131958008 s -DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=90662897453574747515343883787861614094, time:1750768596.330456s req_ids:[8] -DEBUG 06-24 20:36:36 [manager.py:391] -ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:209.34462547302246ms total_cost_time:209.39016342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13821 prompt_cache_len:5151 prompt_cache_ratio:0.3726937269372694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 -DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:36 [batch.py:51] router release req id 8 -INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.10779738426208496 s -INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.10990643501281738 s -DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=314333125187775010470799002796136271698, time:1750768596.5537639s req_ids:[8] -DEBUG 06-24 20:36:36 [manager.py:391] -ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:222.5472927093506ms total_cost_time:222.59163856506348ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13822 prompt_cache_len:5151 prompt_cache_ratio:0.37266676313124003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 -DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:36 [batch.py:51] router release req id 8 -INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.1085212230682373 s -INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.11033368110656738 s -DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=1105617874569855881666024151752766124, time:1750768596.779617s req_ids:[8] -DEBUG 06-24 20:36:36 [manager.py:391] -ERROR 06-24 20:36:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:216.29691123962402ms total_cost_time:216.3398265838623ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13823 prompt_cache_len:5151 prompt_cache_ratio:0.3726398032265065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 -DEBUG 06-24 20:36:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:36 [batch.py:51] router release req id 8 -INFO 06-24 20:36:36 [manager.py:224] router recive req id 8 cost time 0.1076047420501709 s -INFO 06-24 20:36:36 [manager.py:68] detokenization recv req id 8 cost time 0.1094520092010498 s -DEBUG 06-24 20:36:36 [manager.py:391] Prefill Batch: batch_id=92651499534998079335782367191799202241, time:1750768596.9991887s req_ids:[8] -DEBUG 06-24 20:36:36 [manager.py:391] -ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:36 lightllm_req_id:8 first_token_cost:371.75822257995605ms total_cost_time:371.80423736572266ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13824 prompt_cache_len:5151 prompt_cache_ratio:0.3726128472222222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 -DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:37 [batch.py:51] router release req id 8 -INFO 06-24 20:36:37 [manager.py:224] router recive req id 8 cost time 0.10924839973449707 s -INFO 06-24 20:36:37 [manager.py:68] detokenization recv req id 8 cost time 0.11116671562194824 s -DEBUG 06-24 20:36:37 [manager.py:391] Prefill Batch: batch_id=160755598365206827216034568632498794718, time:1750768597.3777874s req_ids:[8] -DEBUG 06-24 20:36:37 [manager.py:391] -ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:215.9874439239502ms total_cost_time:216.0325050354004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13825 prompt_cache_len:5151 prompt_cache_ratio:0.3725858951175407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 -DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:37 [batch.py:51] router release req id 8 -INFO 06-24 20:36:37 [manager.py:224] router recive req id 8 cost time 0.10918378829956055 s -INFO 06-24 20:36:37 [manager.py:68] detokenization recv req id 8 cost time 0.1112680435180664 s -DEBUG 06-24 20:36:37 [manager.py:391] Prefill Batch: batch_id=239155918142721663883829082087636575407, time:1750768597.5996947s req_ids:[8] -DEBUG 06-24 20:36:37 [manager.py:391] -ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:212.81790733337402ms total_cost_time:212.8608226776123ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13826 prompt_cache_len:5151 prompt_cache_ratio:0.3725589469116158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 -DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:37 [batch.py:51] router release req id 8 -INFO 06-24 20:36:37 [manager.py:224] router recive req id 8 cost time 0.10921144485473633 s -INFO 06-24 20:36:37 [manager.py:68] detokenization recv req id 8 cost time 0.11151456832885742 s -DEBUG 06-24 20:36:37 [manager.py:391] Prefill Batch: batch_id=324467070733286231624022932105240028778, time:1750768597.8255916s req_ids:[8] -DEBUG 06-24 20:36:37 [manager.py:391] -ERROR 06-24 20:36:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:219.73276138305664ms total_cost_time:219.77734565734863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13827 prompt_cache_len:5151 prompt_cache_ratio:0.37253200260360164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 -DEBUG 06-24 20:36:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:37 [batch.py:51] router release req id 8 -INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10887455940246582 s -INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.11089420318603516 s -DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=39050962449329032085341850144249384485, time:1750768598.0483422s req_ids:[8] -DEBUG 06-24 20:36:38 [manager.py:391] -ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:37 lightllm_req_id:8 first_token_cost:215.53349494934082ms total_cost_time:215.5766487121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13828 prompt_cache_len:5151 prompt_cache_ratio:0.3725050621926526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 -DEBUG 06-24 20:36:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:38 [batch.py:51] router release req id 8 -INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10919857025146484 s -INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.11104846000671387 s -DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=244719723756609903526087332932343750787, time:1750768598.269116s req_ids:[8] -DEBUG 06-24 20:36:38 [manager.py:391] -ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:372.01786041259766ms total_cost_time:372.06339836120605ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13829 prompt_cache_len:5151 prompt_cache_ratio:0.3724781256779232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 -DEBUG 06-24 20:36:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:38 [batch.py:51] router release req id 8 -INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10878634452819824 s -INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.11073040962219238 s -DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=230503635265953737339175586556390144311, time:1750768598.6486042s req_ids:[8] -DEBUG 06-24 20:36:38 [manager.py:391] -ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:214.09201622009277ms total_cost_time:214.13612365722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13830 prompt_cache_len:5151 prompt_cache_ratio:0.37245119305856833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 -DEBUG 06-24 20:36:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:38 [batch.py:51] router release req id 8 -INFO 06-24 20:36:38 [manager.py:224] router recive req id 8 cost time 0.10853958129882812 s -INFO 06-24 20:36:38 [manager.py:68] detokenization recv req id 8 cost time 0.1105034351348877 s -DEBUG 06-24 20:36:38 [manager.py:391] Prefill Batch: batch_id=5213523359176640098225327946997009545, time:1750768598.87044s req_ids:[8] -DEBUG 06-24 20:36:38 [manager.py:391] -DEBUG 06-24 20:36:38 [stats.py:37] Avg tokens(prompt+generate) throughput: 55095.016 tokens/s -DEBUG 06-24 20:36:38 [stats.py:37] Avg prompt tokens throughput: 55087.039 tokens/s -DEBUG 06-24 20:36:38 [stats.py:37] Avg generate tokens throughput: 7.977 tokens/s -ERROR 06-24 20:36:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:216.27163887023926ms total_cost_time:216.31717681884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13831 prompt_cache_len:5151 prompt_cache_ratio:0.37242426433374304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 -DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:39 [batch.py:51] router release req id 8 -INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10915207862854004 s -INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.11117720603942871 s -DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=147498267109635348951056480653060794688, time:1750768599.0980783s req_ids:[8] -DEBUG 06-24 20:36:39 [manager.py:391] -ERROR 06-24 20:36:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:38 lightllm_req_id:8 first_token_cost:221.34017944335938ms total_cost_time:221.38333320617676ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13832 prompt_cache_len:5151 prompt_cache_ratio:0.3723973395026027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 -DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:39 [batch.py:51] router release req id 8 -INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10929393768310547 s -INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.11136937141418457 s -DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=199183532784738756898525049911494007056, time:1750768599.3201354s req_ids:[8] -DEBUG 06-24 20:36:39 [manager.py:391] -ERROR 06-24 20:36:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 first_token_cost:213.8350009918213ms total_cost_time:213.87910842895508ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13833 prompt_cache_len:5151 prompt_cache_ratio:0.37237041856430275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 -DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:39 [batch.py:51] router release req id 8 -INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10877561569213867 s -INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.11092090606689453 s -DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=160099388801816922247152124771278877809, time:1750768599.540271s req_ids:[8] -DEBUG 06-24 20:36:39 [manager.py:391] -ERROR 06-24 20:36:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 first_token_cost:224.49755668640137ms total_cost_time:224.55739974975586ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:13834 prompt_cache_len:5151 prompt_cache_ratio:0.3723435015179991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 -DEBUG 06-24 20:36:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:39 [batch.py:51] router release req id 8 -INFO 06-24 20:36:39 [manager.py:224] router recive req id 8 cost time 0.10746002197265625 s -INFO 06-24 20:36:39 [manager.py:68] detokenization recv req id 8 cost time 0.10963702201843262 s -DEBUG 06-24 20:36:39 [manager.py:391] Prefill Batch: batch_id=8477697362157091446705166925786408555, time:1750768599.7746449s req_ids:[8] -DEBUG 06-24 20:36:39 [manager.py:391] -ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:39 lightllm_req_id:8 first_token_cost:390.53845405578613ms total_cost_time:390.60425758361816ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:13835 prompt_cache_len:5151 prompt_cache_ratio:0.37231658836284787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 -DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:40 [batch.py:51] router release req id 8 -INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10922026634216309 s -INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11135053634643555 s -DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=296883022349961401182770048571305628401, time:1750768600.1689537s req_ids:[8] -DEBUG 06-24 20:36:40 [manager.py:391] -ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:214.67876434326172ms total_cost_time:214.7202491760254ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13836 prompt_cache_len:5151 prompt_cache_ratio:0.3722896790980052 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 -DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:40 [batch.py:51] router release req id 8 -INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10816478729248047 s -INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11014556884765625 s -DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=186959692393502773127962698170287084022, time:1750768600.3898444s req_ids:[8] -DEBUG 06-24 20:36:40 [manager.py:391] -ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:214.65086936950684ms total_cost_time:214.69521522521973ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13837 prompt_cache_len:5151 prompt_cache_ratio:0.3722627737226277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 -DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:40 [batch.py:51] router release req id 8 -INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10806059837341309 s -INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11001014709472656 s -DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=151593025992029793929327637998234458817, time:1750768600.6118808s req_ids:[8] -DEBUG 06-24 20:36:40 [manager.py:391] -ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:215.38972854614258ms total_cost_time:215.43407440185547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13838 prompt_cache_len:5151 prompt_cache_ratio:0.37223587223587223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 -DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:40 [batch.py:51] router release req id 8 -INFO 06-24 20:36:40 [manager.py:224] router recive req id 8 cost time 0.10880422592163086 s -INFO 06-24 20:36:40 [manager.py:68] detokenization recv req id 8 cost time 0.11088061332702637 s -DEBUG 06-24 20:36:40 [manager.py:391] Prefill Batch: batch_id=284111348199580201809267296856892105182, time:1750768600.833257s req_ids:[8] -DEBUG 06-24 20:36:40 [manager.py:391] -ERROR 06-24 20:36:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:213.85598182678223ms total_cost_time:213.8974666595459ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13839 prompt_cache_len:5151 prompt_cache_ratio:0.37220897463689573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 -DEBUG 06-24 20:36:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:40 [batch.py:51] router release req id 8 -INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.1089944839477539 s -INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.11111688613891602 s -DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=229560012411902646121446241335715972761, time:1750768601.0539868s req_ids:[8] -DEBUG 06-24 20:36:41 [manager.py:391] -ERROR 06-24 20:36:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:40 lightllm_req_id:8 first_token_cost:215.2853012084961ms total_cost_time:215.3306007385254ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13840 prompt_cache_len:5151 prompt_cache_ratio:0.3721820809248555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 -DEBUG 06-24 20:36:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:41 [batch.py:51] router release req id 8 -INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.1082298755645752 s -INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.10961794853210449 s -DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=191000643797667570464130565803862420073, time:1750768601.275609s req_ids:[8] -DEBUG 06-24 20:36:41 [manager.py:391] -ERROR 06-24 20:36:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 first_token_cost:387.82382011413574ms total_cost_time:387.8672122955322ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13841 prompt_cache_len:5151 prompt_cache_ratio:0.37215519109890904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 -DEBUG 06-24 20:36:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:41 [batch.py:51] router release req id 8 -INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.10863351821899414 s -INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.10999798774719238 s -DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=337285015877567928872858512179850117286, time:1750768601.6697965s req_ids:[8] -DEBUG 06-24 20:36:41 [manager.py:391] -DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:41 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 first_token_cost:215.64030647277832ms total_cost_time:215.6827449798584ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13842 prompt_cache_len:5151 prompt_cache_ratio:0.37212830515821416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 -DEBUG 06-24 20:36:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:41 [batch.py:51] router release req id 8 -INFO 06-24 20:36:41 [manager.py:224] router recive req id 8 cost time 0.10800719261169434 s -INFO 06-24 20:36:41 [manager.py:68] detokenization recv req id 8 cost time 0.10941600799560547 s -DEBUG 06-24 20:36:41 [manager.py:391] Prefill Batch: batch_id=188499312184582458259122916032242095939, time:1750768601.9067285s req_ids:[8] -DEBUG 06-24 20:36:41 [manager.py:391] -ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:41 lightllm_req_id:8 first_token_cost:229.72512245178223ms total_cost_time:229.77089881896973ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13843 prompt_cache_len:5151 prompt_cache_ratio:0.3721014231019288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 -DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:42 [batch.py:51] router release req id 8 -INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.10821342468261719 s -INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.1094977855682373 s -DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=200937286312493235099449575281552991846, time:1750768602.1284418s req_ids:[8] -DEBUG 06-24 20:36:42 [manager.py:391] -ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:206.97951316833496ms total_cost_time:207.02362060546875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13844 prompt_cache_len:5151 prompt_cache_ratio:0.3720745449292112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 -DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:42 [batch.py:51] router release req id 8 -INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.1086421012878418 s -INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.10995697975158691 s -DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=168031610327138770871967588498501665204, time:1750768602.3416069s req_ids:[8] -DEBUG 06-24 20:36:42 [manager.py:391] -ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:213.4993076324463ms total_cost_time:213.54246139526367ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13845 prompt_cache_len:5151 prompt_cache_ratio:0.37204767063921995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 -DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:42 [batch.py:51] router release req id 8 -INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.10858964920043945 s -INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.10977530479431152 s -DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=267902443673609443096683853817627647410, time:1750768602.5630279s req_ids:[8] -DEBUG 06-24 20:36:42 [manager.py:391] -ERROR 06-24 20:36:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:215.15250205993652ms total_cost_time:215.1954174041748ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13846 prompt_cache_len:5151 prompt_cache_ratio:0.37202080023111367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 -DEBUG 06-24 20:36:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:42 [batch.py:51] router release req id 8 -INFO 06-24 20:36:42 [manager.py:224] router recive req id 8 cost time 0.10838627815246582 s -INFO 06-24 20:36:42 [manager.py:68] detokenization recv req id 8 cost time 0.10961270332336426 s -DEBUG 06-24 20:36:42 [manager.py:391] Prefill Batch: batch_id=49978266871827866954662014980714517645, time:1750768602.7835395s req_ids:[8] -DEBUG 06-24 20:36:42 [manager.py:391] -ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:42 lightllm_req_id:8 first_token_cost:379.227876663208ms total_cost_time:379.2712688446045ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13847 prompt_cache_len:5151 prompt_cache_ratio:0.3719939337040514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 -DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:43 [batch.py:51] router release req id 8 -INFO 06-24 20:36:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10920262336730957 s -INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s -DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=289699605286671532784967002474739684144, time:1750768603.169512s req_ids:[8] -DEBUG 06-24 20:36:43 [manager.py:391] -ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:213.08422088623047ms total_cost_time:213.12761306762695ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13848 prompt_cache_len:5151 prompt_cache_ratio:0.3719670710571924 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 -DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:43 [batch.py:51] router release req id 8 -INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10862970352172852 s -INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.11002016067504883 s -DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=142644558757226871491006825729974212818, time:1750768603.3885543s req_ids:[8] -DEBUG 06-24 20:36:43 [manager.py:391] -ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:209.75327491760254ms total_cost_time:209.79762077331543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13849 prompt_cache_len:5151 prompt_cache_ratio:0.371940212289696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 -DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:43 [batch.py:51] router release req id 8 -INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10832548141479492 s -INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.11029887199401855 s -DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=3030303708089589036805158924491963015, time:1750768603.6026955s req_ids:[8] -DEBUG 06-24 20:36:43 [manager.py:391] -ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:210.84880828857422ms total_cost_time:210.8936309814453ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13850 prompt_cache_len:5151 prompt_cache_ratio:0.371913357400722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 -DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:43 [batch.py:51] router release req id 8 -INFO 06-24 20:36:43 [manager.py:224] router recive req id 8 cost time 0.10785269737243652 s -INFO 06-24 20:36:43 [manager.py:68] detokenization recv req id 8 cost time 0.10970473289489746 s -DEBUG 06-24 20:36:43 [manager.py:391] Prefill Batch: batch_id=122020811646544496219407959949211508329, time:1750768603.8215182s req_ids:[8] -DEBUG 06-24 20:36:43 [manager.py:391] -ERROR 06-24 20:36:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:209.90657806396484ms total_cost_time:209.95044708251953ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13851 prompt_cache_len:5151 prompt_cache_ratio:0.3718865063894304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 -DEBUG 06-24 20:36:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:43 [batch.py:51] router release req id 8 -INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.10866403579711914 s -INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.11066794395446777 s -DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=312639360194585937348427258759687999681, time:1750768604.0372434s req_ids:[8] -DEBUG 06-24 20:36:44 [manager.py:391] -ERROR 06-24 20:36:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:43 lightllm_req_id:8 first_token_cost:210.7105255126953ms total_cost_time:210.75439453125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13852 prompt_cache_len:5151 prompt_cache_ratio:0.3718596592549812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 -DEBUG 06-24 20:36:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:44 [batch.py:51] router release req id 8 -INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.3080918788909912 s -INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.30947160720825195 s -DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=142262630836884404567964149602708608441, time:1750768604.4608305s req_ids:[8] -DEBUG 06-24 20:36:44 [manager.py:391] -ERROR 06-24 20:36:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 first_token_cost:427.9770851135254ms total_cost_time:428.0211925506592ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13853 prompt_cache_len:5151 prompt_cache_ratio:0.37183281599653506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 -DEBUG 06-24 20:36:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:44 [batch.py:51] router release req id 8 -INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.10920572280883789 s -INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.11062884330749512 s -DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=103213169192415222501952943854519291786, time:1750768604.6888561s req_ids:[8] -DEBUG 06-24 20:36:44 [manager.py:391] -ERROR 06-24 20:36:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 first_token_cost:213.28186988830566ms total_cost_time:213.32693099975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13854 prompt_cache_len:5151 prompt_cache_ratio:0.3718059766132525 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 -DEBUG 06-24 20:36:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:44 [batch.py:51] router release req id 8 -INFO 06-24 20:36:44 [manager.py:224] router recive req id 8 cost time 0.10837268829345703 s -INFO 06-24 20:36:44 [manager.py:68] detokenization recv req id 8 cost time 0.10966801643371582 s -DEBUG 06-24 20:36:44 [manager.py:391] Prefill Batch: batch_id=195920442552411858775117078734650647630, time:1750768604.9104292s req_ids:[8] -DEBUG 06-24 20:36:44 [manager.py:391] -ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:44 lightllm_req_id:8 first_token_cost:210.65402030944824ms total_cost_time:210.69741249084473ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13855 prompt_cache_len:5151 prompt_cache_ratio:0.3717791411042945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 -DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:45 [batch.py:51] router release req id 8 -INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.1070396900177002 s -INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10830545425415039 s -DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=109166986856396400011509957237007307800, time:1750768605.126659s req_ids:[8] -DEBUG 06-24 20:36:45 [manager.py:391] -ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:211.00258827209473ms total_cost_time:211.0462188720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13856 prompt_cache_len:5151 prompt_cache_ratio:0.37175230946882215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 -DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:45 [batch.py:51] router release req id 8 -INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s -INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10932469367980957 s -DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=57315085074251537041873232839668833102, time:1750768605.345094s req_ids:[8] -DEBUG 06-24 20:36:45 [manager.py:391] -ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:209.37705039978027ms total_cost_time:209.42258834838867ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13857 prompt_cache_len:5151 prompt_cache_ratio:0.37172548170599695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 -DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:45 [batch.py:51] router release req id 8 -INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.10824728012084961 s -INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10945773124694824 s -DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=35133569341582258199255030593270971933, time:1750768605.5611644s req_ids:[8] -DEBUG 06-24 20:36:45 [manager.py:391] -ERROR 06-24 20:36:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:212.16273307800293ms total_cost_time:212.2042179107666ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:13858 prompt_cache_len:5151 prompt_cache_ratio:0.3716986578149805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 -DEBUG 06-24 20:36:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:45 [batch.py:51] router release req id 8 -INFO 06-24 20:36:45 [manager.py:224] router recive req id 8 cost time 0.1073606014251709 s -INFO 06-24 20:36:45 [manager.py:68] detokenization recv req id 8 cost time 0.10857224464416504 s -DEBUG 06-24 20:36:45 [manager.py:391] Prefill Batch: batch_id=210010580968553470525848017144573051964, time:1750768605.779655s req_ids:[8] -DEBUG 06-24 20:36:45 [manager.py:391] -ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:45 lightllm_req_id:8 first_token_cost:382.59005546569824ms total_cost_time:382.6336860656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13859 prompt_cache_len:5151 prompt_cache_ratio:0.3716718377949347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 -DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:46 [batch.py:51] router release req id 8 -INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10862946510314941 s -INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.10991430282592773 s -DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=89827057632358311562881479544441812275, time:1750768606.1698174s req_ids:[8] -DEBUG 06-24 20:36:46 [manager.py:391] -ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:212.22734451293945ms total_cost_time:212.27073669433594ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13860 prompt_cache_len:5151 prompt_cache_ratio:0.37164502164502167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 -DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:46 [batch.py:51] router release req id 8 -INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10798072814941406 s -INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.1090841293334961 s -DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=188340965640538397083371885415216526966, time:1750768606.3860781s req_ids:[8] -DEBUG 06-24 20:36:46 [manager.py:391] -ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:168.92218589782715ms total_cost_time:168.96438598632812ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13861 prompt_cache_len:5151 prompt_cache_ratio:0.3716182093644037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 -DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:46 [batch.py:51] router release req id 8 -INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10789132118225098 s -INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.10906100273132324 s -DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=339886357382940863562481625270787972175, time:1750768606.5642962s req_ids:[8] -DEBUG 06-24 20:36:46 [manager.py:391] -ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:212.13865280151367ms total_cost_time:212.16773986816406ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:13862 prompt_cache_len:5151 prompt_cache_ratio:0.3715914009522435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 -DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:46 [batch.py:51] router release req id 8 -INFO 06-24 20:36:46 [manager.py:224] router recive req id 8 cost time 0.10860776901245117 s -INFO 06-24 20:36:46 [manager.py:68] detokenization recv req id 8 cost time 0.10993623733520508 s -DEBUG 06-24 20:36:46 [manager.py:391] Prefill Batch: batch_id=132624696046900502541277819093076895869, time:1750768606.7819715s req_ids:[8] -DEBUG 06-24 20:36:46 [manager.py:391] -ERROR 06-24 20:36:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:217.6229953765869ms total_cost_time:217.6668643951416ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13863 prompt_cache_len:5151 prompt_cache_ratio:0.37156459640770395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 -DEBUG 06-24 20:36:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:46 [batch.py:51] router release req id 8 -INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10865283012390137 s -INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.10985779762268066 s -DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=186907816647178689546833043204723998337, time:1750768607.0049124s req_ids:[8] -DEBUG 06-24 20:36:47 [manager.py:391] -ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:46 lightllm_req_id:8 first_token_cost:207.49974250793457ms total_cost_time:207.5352668762207ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:13864 prompt_cache_len:5151 prompt_cache_ratio:0.37153779572994805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 -DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:47 [batch.py:51] router release req id 8 -INFO 06-24 20:36:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10771298408508301 s -INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.10902643203735352 s -DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=200525133305156964602392608855734386426, time:1750768607.2232866s req_ids:[8] -DEBUG 06-24 20:36:47 [manager.py:391] -ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:384.4027519226074ms total_cost_time:384.4466209411621ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13865 prompt_cache_len:5151 prompt_cache_ratio:0.3715109989181392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 -DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:47 [batch.py:51] router release req id 8 -INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10760974884033203 s -INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.10900616645812988 s -DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=268770721857622098822176136249418496627, time:1750768607.6141279s req_ids:[8] -DEBUG 06-24 20:36:47 [manager.py:391] -ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:216.7975902557373ms total_cost_time:216.8421745300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13866 prompt_cache_len:5151 prompt_cache_ratio:0.37148420597144094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 -DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:47 [batch.py:51] router release req id 8 -INFO 06-24 20:36:47 [manager.py:224] router recive req id 8 cost time 0.10858702659606934 s -INFO 06-24 20:36:47 [manager.py:68] detokenization recv req id 8 cost time 0.1098475456237793 s -DEBUG 06-24 20:36:47 [manager.py:391] Prefill Batch: batch_id=186302833766340318518789690420728412987, time:1750768607.8367853s req_ids:[8] -DEBUG 06-24 20:36:47 [manager.py:391] -ERROR 06-24 20:36:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:206.63022994995117ms total_cost_time:206.67338371276855ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13867 prompt_cache_len:5151 prompt_cache_ratio:0.37145741688901707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 -DEBUG 06-24 20:36:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:47 [batch.py:51] router release req id 8 -INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10882186889648438 s -INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.11002111434936523 s -DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=265756992099880502930729079153832710322, time:1750768608.050007s req_ids:[8] -DEBUG 06-24 20:36:48 [manager.py:391] -ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:47 lightllm_req_id:8 first_token_cost:210.35456657409668ms total_cost_time:210.39581298828125ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:13868 prompt_cache_len:5151 prompt_cache_ratio:0.3714306316700317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 -DEBUG 06-24 20:36:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:48 [batch.py:51] router release req id 8 -INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10707640647888184 s -INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.1083369255065918 s -DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=231235172953752515194520881944545861718, time:1750768608.2670817s req_ids:[8] -DEBUG 06-24 20:36:48 [manager.py:391] -ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:214.75696563720703ms total_cost_time:214.7994041442871ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13869 prompt_cache_len:5151 prompt_cache_ratio:0.37140385031364914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 -DEBUG 06-24 20:36:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:48 [batch.py:51] router release req id 8 -INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10814547538757324 s -INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.10944437980651855 s -DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=161192682418519083200897260738698253611, time:1750768608.4875064s req_ids:[8] -DEBUG 06-24 20:36:48 [manager.py:391] -ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:215.72375297546387ms total_cost_time:215.76738357543945ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13870 prompt_cache_len:5151 prompt_cache_ratio:0.37137707281903387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 -DEBUG 06-24 20:36:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:48 [batch.py:51] router release req id 8 -INFO 06-24 20:36:48 [manager.py:224] router recive req id 8 cost time 0.10857295989990234 s -INFO 06-24 20:36:48 [manager.py:68] detokenization recv req id 8 cost time 0.10999202728271484 s -DEBUG 06-24 20:36:48 [manager.py:391] Prefill Batch: batch_id=232016903002391139886058467049020045805, time:1750768608.7108488s req_ids:[8] -DEBUG 06-24 20:36:48 [manager.py:391] -ERROR 06-24 20:36:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:36:48 [stats.py:37] Avg tokens(prompt+generate) throughput: 54887.019 tokens/s -DEBUG 06-24 20:36:48 [stats.py:37] Avg prompt tokens throughput: 54878.996 tokens/s -DEBUG 06-24 20:36:48 [stats.py:37] Avg generate tokens throughput: 8.023 tokens/s -INFO 06-24 20:36:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:373.06737899780273ms total_cost_time:373.08692932128906ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:13871 prompt_cache_len:5151 prompt_cache_ratio:0.37135029918535073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 -DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:49 [batch.py:51] router release req id 8 -INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10815143585205078 s -INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.10949420928955078 s -DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=37410786281659214582087252740634380895, time:1750768609.090279s req_ids:[8] -DEBUG 06-24 20:36:49 [manager.py:391] -ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:48 lightllm_req_id:8 first_token_cost:215.4695987701416ms total_cost_time:215.5132293701172ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13872 prompt_cache_len:5151 prompt_cache_ratio:0.3713235294117647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 -DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:49 [batch.py:51] router release req id 8 -INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10899043083190918 s -INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.11033916473388672 s -DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=159139081636568805356092117868464190700, time:1750768609.3111503s req_ids:[8] -DEBUG 06-24 20:36:49 [manager.py:391] -DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:215.3482437133789ms total_cost_time:215.37446975708008ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:13873 prompt_cache_len:5151 prompt_cache_ratio:0.37129676349744106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 -DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:49 [batch.py:51] router release req id 8 -INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10642743110656738 s -INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.10766005516052246 s -DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=199949266094131816080751630877071451636, time:1750768609.5347183s req_ids:[8] -DEBUG 06-24 20:36:49 [manager.py:391] -ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:217.93293952941895ms total_cost_time:217.97633171081543ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13874 prompt_cache_len:5151 prompt_cache_ratio:0.3712700014415453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 -DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:49 [batch.py:51] router release req id 8 -INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.10874176025390625 s -INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.1100764274597168 s -DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=111480326396203791283489163708786483399, time:1750768609.7561395s req_ids:[8] -DEBUG 06-24 20:36:49 [manager.py:391] -ERROR 06-24 20:36:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:209.78474617004395ms total_cost_time:209.82933044433594ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13875 prompt_cache_len:5151 prompt_cache_ratio:0.37124324324324326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 -DEBUG 06-24 20:36:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:49 [batch.py:51] router release req id 8 -INFO 06-24 20:36:49 [manager.py:224] router recive req id 8 cost time 0.1075584888458252 s -INFO 06-24 20:36:49 [manager.py:68] detokenization recv req id 8 cost time 0.10899543762207031 s -DEBUG 06-24 20:36:49 [manager.py:391] Prefill Batch: batch_id=136101432281230046515112299127644339559, time:1750768609.9733312s req_ids:[8] -DEBUG 06-24 20:36:49 [manager.py:391] -ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:49 lightllm_req_id:8 first_token_cost:382.25364685058594ms total_cost_time:382.2791576385498ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:13876 prompt_cache_len:5151 prompt_cache_ratio:0.3712164889017008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 -DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:50 [batch.py:51] router release req id 8 -INFO 06-24 20:36:50 [manager.py:224] router recive req id 8 cost time 0.10883426666259766 s -INFO 06-24 20:36:50 [manager.py:68] detokenization recv req id 8 cost time 0.11017227172851562 s -DEBUG 06-24 20:36:50 [manager.py:391] Prefill Batch: batch_id=24144117691648633256556517425554290391, time:1750768610.3624375s req_ids:[8] -DEBUG 06-24 20:36:50 [manager.py:391] -ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:208.5421085357666ms total_cost_time:208.5871696472168ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13877 prompt_cache_len:5151 prompt_cache_ratio:0.37118973841608416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 -DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:50 [batch.py:51] router release req id 8 -INFO 06-24 20:36:50 [manager.py:224] router recive req id 8 cost time 0.1082761287689209 s -INFO 06-24 20:36:50 [manager.py:68] detokenization recv req id 8 cost time 0.10960030555725098 s -DEBUG 06-24 20:36:50 [manager.py:391] Prefill Batch: batch_id=6685709028180030372752286962180717499, time:1750768610.5792215s req_ids:[8] -DEBUG 06-24 20:36:50 [manager.py:391] -ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:218.780517578125ms total_cost_time:218.82271766662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13878 prompt_cache_len:5151 prompt_cache_ratio:0.3711629917855599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 -DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:50 [batch.py:51] router release req id 8 -INFO 06-24 20:36:50 [manager.py:224] router recive req id 8 cost time 0.10855388641357422 s -INFO 06-24 20:36:50 [manager.py:68] detokenization recv req id 8 cost time 0.10982894897460938 s -DEBUG 06-24 20:36:50 [manager.py:391] Prefill Batch: batch_id=187144056471015864740758622640006219685, time:1750768610.7995799s req_ids:[8] -DEBUG 06-24 20:36:50 [manager.py:391] -ERROR 06-24 20:36:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:211.32278442382812ms total_cost_time:211.3661766052246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13879 prompt_cache_len:5151 prompt_cache_ratio:0.3711362490092946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 -DEBUG 06-24 20:36:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:50 [batch.py:51] router release req id 8 -INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.10867500305175781 s -INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.11008906364440918 s -DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=294414627366519872799643503579475874248, time:1750768611.0180745s req_ids:[8] -DEBUG 06-24 20:36:51 [manager.py:391] -ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:50 lightllm_req_id:8 first_token_cost:217.03696250915527ms total_cost_time:217.0562744140625ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:13880 prompt_cache_len:5151 prompt_cache_ratio:0.3711095100864553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 -DEBUG 06-24 20:36:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:51 [batch.py:51] router release req id 8 -INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.10822868347167969 s -INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.10952901840209961 s -DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=219237195035548130982560565690740558908, time:1750768611.242653s req_ids:[8] -DEBUG 06-24 20:36:51 [manager.py:391] -ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:211.5941047668457ms total_cost_time:211.6408348083496ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13881 prompt_cache_len:5151 prompt_cache_ratio:0.37108277501620923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 -DEBUG 06-24 20:36:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:51 [batch.py:51] router release req id 8 -INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.10779047012329102 s -INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.10905647277832031 s -DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=241865483528386857085454563276463318729, time:1750768611.4681647s req_ids:[8] -DEBUG 06-24 20:36:51 [manager.py:391] -ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:398.95081520080566ms total_cost_time:398.99611473083496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13882 prompt_cache_len:5151 prompt_cache_ratio:0.37105604379772367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 -DEBUG 06-24 20:36:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:51 [batch.py:51] router release req id 8 -INFO 06-24 20:36:51 [manager.py:224] router recive req id 8 cost time 0.1078193187713623 s -INFO 06-24 20:36:51 [manager.py:68] detokenization recv req id 8 cost time 0.10904097557067871 s -DEBUG 06-24 20:36:51 [manager.py:391] Prefill Batch: batch_id=206987606130043560835733932791108416096, time:1750768611.8649607s req_ids:[8] -DEBUG 06-24 20:36:51 [manager.py:391] -ERROR 06-24 20:36:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:214.71190452575684ms total_cost_time:214.7543430328369ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13883 prompt_cache_len:5151 prompt_cache_ratio:0.3710293164301664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 -DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:52 [batch.py:51] router release req id 8 -INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10795164108276367 s -INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10919690132141113 s -DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=112064123180520990523614817275728448218, time:1750768612.0888593s req_ids:[8] -DEBUG 06-24 20:36:52 [manager.py:391] -ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:51 lightllm_req_id:8 first_token_cost:210.96420288085938ms total_cost_time:210.99615097045898ms,out_token_counter:1 mean_per_token_cost_time: 0.031948089599609375ms prompt_token_num:13884 prompt_cache_len:5151 prompt_cache_ratio:0.37100259291270526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 -DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:52 [batch.py:51] router release req id 8 -INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10783886909484863 s -INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.1091623306274414 s -DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=230343888146461684206695859909805039429, time:1750768612.3059282s req_ids:[8] -DEBUG 06-24 20:36:52 [manager.py:391] -ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:217.18120574951172ms total_cost_time:217.2250747680664ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13885 prompt_cache_len:5151 prompt_cache_ratio:0.3709758732445085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 -DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:52 [batch.py:51] router release req id 8 -INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10831451416015625 s -INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10951972007751465 s -DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=114049861170827987490525037949853720883, time:1750768612.5294292s req_ids:[8] -DEBUG 06-24 20:36:52 [manager.py:391] -ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:211.51065826416016ms total_cost_time:211.55595779418945ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13886 prompt_cache_len:5151 prompt_cache_ratio:0.3709491574247443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 -DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:52 [batch.py:51] router release req id 8 -INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10798811912536621 s -INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10921406745910645 s -DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=58079987568008194806202508436541762270, time:1750768612.7473814s req_ids:[8] -DEBUG 06-24 20:36:52 [manager.py:391] -ERROR 06-24 20:36:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:215.62433242797852ms total_cost_time:215.6665325164795ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:13887 prompt_cache_len:5151 prompt_cache_ratio:0.3709224454525816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 -DEBUG 06-24 20:36:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:52 [batch.py:51] router release req id 8 -INFO 06-24 20:36:52 [manager.py:224] router recive req id 8 cost time 0.10847687721252441 s -INFO 06-24 20:36:52 [manager.py:68] detokenization recv req id 8 cost time 0.10969829559326172 s -DEBUG 06-24 20:36:52 [manager.py:391] Prefill Batch: batch_id=6530909416576735998434985069341649683, time:1750768612.969092s req_ids:[8] -DEBUG 06-24 20:36:52 [manager.py:391] -INFO 06-24 20:36:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:36:53 [statics_utils.py:24] mean first cost: 231.35835489509327 ms -INFO 06-24 20:36:53 [statics_utils.py:24] mean per token cost: 0.058900548764697525 ms -ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:52 lightllm_req_id:8 first_token_cost:386.46578788757324ms total_cost_time:386.4884376525879ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:13888 prompt_cache_len:5151 prompt_cache_ratio:0.37089573732718895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 -DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:53 [batch.py:51] router release req id 8 -INFO 06-24 20:36:53 [manager.py:224] router recive req id 8 cost time 0.10816502571105957 s -INFO 06-24 20:36:53 [manager.py:68] detokenization recv req id 8 cost time 0.1094818115234375 s -DEBUG 06-24 20:36:53 [manager.py:391] Prefill Batch: batch_id=58321234920487974876483567044486685029, time:1750768613.3610613s req_ids:[8] -DEBUG 06-24 20:36:53 [manager.py:391] -INFO 06-24 20:36:53 [manager.py:620] left req id 8can release False refcount 3 -ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:214.6594524383545ms total_cost_time:214.7047519683838ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13889 prompt_cache_len:5151 prompt_cache_ratio:0.3708690330477356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 -DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:53 [batch.py:51] router release req id 8 -INFO 06-24 20:36:53 [manager.py:224] router recive req id 8 cost time 0.109405517578125 s -INFO 06-24 20:36:53 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s -DEBUG 06-24 20:36:53 [manager.py:391] Prefill Batch: batch_id=204776427042440465875919434420798862312, time:1750768613.5832922s req_ids:[8] -DEBUG 06-24 20:36:53 [manager.py:391] -ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:214.6458625793457ms total_cost_time:214.6894931793213ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13890 prompt_cache_len:5151 prompt_cache_ratio:0.37084233261339095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 -DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:53 [batch.py:51] router release req id 8 -INFO 06-24 20:36:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:53 [manager.py:224] router recive req id 8 cost time 0.10839581489562988 s -INFO 06-24 20:36:53 [manager.py:68] detokenization recv req id 8 cost time 0.10965251922607422 s -DEBUG 06-24 20:36:53 [manager.py:391] Prefill Batch: batch_id=229446621718952886065594471646342434207, time:1750768613.8037827s req_ids:[8] -DEBUG 06-24 20:36:53 [manager.py:391] -ERROR 06-24 20:36:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:213.01984786987305ms total_cost_time:213.06324005126953ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13891 prompt_cache_len:5151 prompt_cache_ratio:0.37081563602332446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 -DEBUG 06-24 20:36:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:53 [batch.py:51] router release req id 8 -INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.10820651054382324 s -INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.10950016975402832 s -DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=55510846210750215855701179922266833293, time:1750768614.0232441s req_ids:[8] -DEBUG 06-24 20:36:54 [manager.py:391] -ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:53 lightllm_req_id:8 first_token_cost:213.20819854736328ms total_cost_time:213.25016021728516ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:13892 prompt_cache_len:5151 prompt_cache_ratio:0.37078894327670603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 -DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:54 [batch.py:51] router release req id 8 -INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.1090688705444336 s -INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.1103658676147461 s -DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=224121525366343885475807795940245334970, time:1750768614.2442517s req_ids:[8] -DEBUG 06-24 20:36:54 [manager.py:391] -ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:215.5001163482666ms total_cost_time:215.5439853668213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13893 prompt_cache_len:5151 prompt_cache_ratio:0.37076225437270566 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 -DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:54 [batch.py:51] router release req id 8 -INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.10854649543762207 s -INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.10979890823364258 s -DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=103245858121844457271003553170276015467, time:1750768614.465397s req_ids:[8] -DEBUG 06-24 20:36:54 [manager.py:391] -ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:375.079870223999ms total_cost_time:375.1246929168701ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13894 prompt_cache_len:5151 prompt_cache_ratio:0.37073556931049373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 -DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:54 [batch.py:51] router release req id 8 -INFO 06-24 20:36:54 [manager.py:224] router recive req id 8 cost time 0.1083076000213623 s -INFO 06-24 20:36:54 [manager.py:68] detokenization recv req id 8 cost time 0.10956525802612305 s -DEBUG 06-24 20:36:54 [manager.py:391] Prefill Batch: batch_id=270065926866823590835036049741054669318, time:1750768614.8458757s req_ids:[8] -DEBUG 06-24 20:36:54 [manager.py:391] -ERROR 06-24 20:36:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:214.2786979675293ms total_cost_time:214.324951171875ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13895 prompt_cache_len:5151 prompt_cache_ratio:0.37070888808924074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 -DEBUG 06-24 20:36:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:54 [batch.py:51] router release req id 8 -INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10884308815002441 s -INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.11015510559082031 s -DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=107251159687813623318187323971743775426, time:1750768615.0668144s req_ids:[8] -DEBUG 06-24 20:36:55 [manager.py:391] -ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:54 lightllm_req_id:8 first_token_cost:215.04974365234375ms total_cost_time:215.08145332336426ms,out_token_counter:1 mean_per_token_cost_time: 0.03170967102050781ms prompt_token_num:13896 prompt_cache_len:5151 prompt_cache_ratio:0.37068221070811747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 -DEBUG 06-24 20:36:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:55 [batch.py:51] router release req id 8 -INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10804367065429688 s -INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.10992789268493652 s -DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=127448916255251244232496196437365392613, time:1750768615.2896206s req_ids:[8] -DEBUG 06-24 20:36:55 [manager.py:391] -ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:214.72954750061035ms total_cost_time:214.77460861206055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13897 prompt_cache_len:5151 prompt_cache_ratio:0.3706555371662949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 -DEBUG 06-24 20:36:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:55 [batch.py:51] router release req id 8 -INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10825395584106445 s -INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.11022281646728516 s -DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=290071268696187492290421224949492356894, time:1750768615.5092902s req_ids:[8] -DEBUG 06-24 20:36:55 [manager.py:391] -DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:36:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:210.6313705444336ms total_cost_time:210.67380905151367ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13898 prompt_cache_len:5151 prompt_cache_ratio:0.3706288674629443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 -DEBUG 06-24 20:36:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:55 [batch.py:51] router release req id 8 -INFO 06-24 20:36:55 [manager.py:224] router recive req id 8 cost time 0.10738468170166016 s -INFO 06-24 20:36:55 [manager.py:68] detokenization recv req id 8 cost time 0.10920095443725586 s -DEBUG 06-24 20:36:55 [manager.py:391] Prefill Batch: batch_id=252098724588724672500571257332623524243, time:1750768615.72714s req_ids:[8] -DEBUG 06-24 20:36:55 [manager.py:391] -ERROR 06-24 20:36:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:212.07666397094727ms total_cost_time:212.12005615234375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13899 prompt_cache_len:5151 prompt_cache_ratio:0.3706022015972372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 -DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:56 [batch.py:51] router release req id 8 -INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.3101315498352051 s -INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.31182432174682617 s -DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=172063619910785978849918027681875703976, time:1750768616.157644s req_ids:[8] -DEBUG 06-24 20:36:56 [manager.py:391] -ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:55 lightllm_req_id:8 first_token_cost:402.6494026184082ms total_cost_time:402.6925563812256ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13900 prompt_cache_len:5151 prompt_cache_ratio:0.3705755395683453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 -DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:56 [batch.py:51] router release req id 8 -INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10751199722290039 s -INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.10923504829406738 s -DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=198092726942894691672229761749615424879, time:1750768616.3572998s req_ids:[8] -DEBUG 06-24 20:36:56 [manager.py:391] -ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:207.09609985351562ms total_cost_time:207.1397304534912ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13901 prompt_cache_len:5151 prompt_cache_ratio:0.3705488813754406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 -DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:56 [batch.py:51] router release req id 8 -INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10843300819396973 s -INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.11042499542236328 s -DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=225518704000031855525121671779690939624, time:1750768616.5707202s req_ids:[8] -DEBUG 06-24 20:36:56 [manager.py:391] -ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:204.12731170654297ms total_cost_time:204.17332649230957ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13902 prompt_cache_len:5151 prompt_cache_ratio:0.37052222701769527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 -DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:56 [batch.py:51] router release req id 8 -INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10805177688598633 s -INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.10921597480773926 s -DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=288969984037072555622116783616017625840, time:1750768616.7826922s req_ids:[8] -DEBUG 06-24 20:36:56 [manager.py:391] -ERROR 06-24 20:36:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:206.526517868042ms total_cost_time:206.56943321228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13903 prompt_cache_len:5151 prompt_cache_ratio:0.37049557649428183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 -DEBUG 06-24 20:36:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:56 [batch.py:51] router release req id 8 -INFO 06-24 20:36:56 [manager.py:224] router recive req id 8 cost time 0.10819458961486816 s -INFO 06-24 20:36:56 [manager.py:68] detokenization recv req id 8 cost time 0.11012554168701172 s -DEBUG 06-24 20:36:56 [manager.py:391] Prefill Batch: batch_id=217585346015029125276120308083418588188, time:1750768616.9968886s req_ids:[8] -DEBUG 06-24 20:36:56 [manager.py:391] -ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:56 lightllm_req_id:8 first_token_cost:206.2528133392334ms total_cost_time:206.29620552062988ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13904 prompt_cache_len:5151 prompt_cache_ratio:0.37046892980437285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 -DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:57 [batch.py:51] router release req id 8 -INFO 06-24 20:36:57 [manager.py:224] router recive req id 8 cost time 0.10814023017883301 s -INFO 06-24 20:36:57 [manager.py:68] detokenization recv req id 8 cost time 0.11016845703125 s -DEBUG 06-24 20:36:57 [manager.py:391] Prefill Batch: batch_id=200301460022562832120755463421612955775, time:1750768617.20847s req_ids:[8] -DEBUG 06-24 20:36:57 [manager.py:391] -ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:212.6023769378662ms total_cost_time:212.64886856079102ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13905 prompt_cache_len:5151 prompt_cache_ratio:0.3704422869471413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 -DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:57 [batch.py:51] router release req id 8 -INFO 06-24 20:36:57 [manager.py:224] router recive req id 8 cost time 0.10770320892333984 s -INFO 06-24 20:36:57 [manager.py:68] detokenization recv req id 8 cost time 0.10865592956542969 s -DEBUG 06-24 20:36:57 [manager.py:391] Prefill Batch: batch_id=23918242043753115527774402152552994034, time:1750768617.4421847s req_ids:[8] -DEBUG 06-24 20:36:57 [manager.py:391] -ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:398.3454704284668ms total_cost_time:398.3883857727051ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13906 prompt_cache_len:5151 prompt_cache_ratio:0.3704156479217604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 -DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:57 [batch.py:51] router release req id 8 -INFO 06-24 20:36:57 [manager.py:224] router recive req id 8 cost time 0.10739707946777344 s -INFO 06-24 20:36:57 [manager.py:68] detokenization recv req id 8 cost time 0.10922026634216309 s -DEBUG 06-24 20:36:57 [manager.py:391] Prefill Batch: batch_id=221870554354687592728649868737976966057, time:1750768617.8351018s req_ids:[8] -DEBUG 06-24 20:36:57 [manager.py:391] -ERROR 06-24 20:36:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:215.81506729125977ms total_cost_time:215.85893630981445ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13907 prompt_cache_len:5151 prompt_cache_ratio:0.3703890127274035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 -DEBUG 06-24 20:36:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:57 [batch.py:51] router release req id 8 -INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10674214363098145 s -INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.10825014114379883 s -DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=332155539145952671911186223190387063528, time:1750768618.05685s req_ids:[8] -DEBUG 06-24 20:36:58 [manager.py:391] -ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:57 lightllm_req_id:8 first_token_cost:210.86907386779785ms total_cost_time:210.91461181640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13908 prompt_cache_len:5151 prompt_cache_ratio:0.3703623813632442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 -DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:58 [batch.py:51] router release req id 8 -INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10915160179138184 s -INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.1105949878692627 s -DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=220393118989293678835595544890444441921, time:1750768618.2752411s req_ids:[8] -DEBUG 06-24 20:36:58 [manager.py:391] -ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:215.65675735473633ms total_cost_time:215.6991958618164ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13909 prompt_cache_len:5151 prompt_cache_ratio:0.3703357538284564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 -DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:58 [batch.py:51] router release req id 8 -INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10761451721191406 s -INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.10943722724914551 s -DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=226439665948844633691729562737217002831, time:1750768618.4960828s req_ids:[8] -DEBUG 06-24 20:36:58 [manager.py:391] -ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:170.90892791748047ms total_cost_time:170.95160484313965ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13910 prompt_cache_len:5151 prompt_cache_ratio:0.3703091301222142 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 -DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:58 [batch.py:51] router release req id 8 -INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10798883438110352 s -INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.11000251770019531 s -DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=309269484416052417248580670333986706377, time:1750768618.672763s req_ids:[8] -DEBUG 06-24 20:36:58 [manager.py:391] -ERROR 06-24 20:36:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:202.38327980041504ms total_cost_time:202.42595672607422ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13911 prompt_cache_len:5151 prompt_cache_ratio:0.370282510243692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 -DEBUG 06-24 20:36:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:58 [batch.py:51] router release req id 8 -INFO 06-24 20:36:58 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s -INFO 06-24 20:36:58 [manager.py:68] detokenization recv req id 8 cost time 0.11057209968566895 s -DEBUG 06-24 20:36:58 [manager.py:391] Prefill Batch: batch_id=71670868600021393519615150580683130143, time:1750768618.882891s req_ids:[8] -DEBUG 06-24 20:36:58 [manager.py:391] -ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:36:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 55926.074 tokens/s -DEBUG 06-24 20:36:59 [stats.py:37] Avg prompt tokens throughput: 55918.024 tokens/s -DEBUG 06-24 20:36:59 [stats.py:37] Avg generate tokens throughput: 8.050 tokens/s -INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:58 lightllm_req_id:8 first_token_cost:386.9960308074951ms total_cost_time:387.0401382446289ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13912 prompt_cache_len:5151 prompt_cache_ratio:0.3702558941920644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 -DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:59 [batch.py:51] router release req id 8 -INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10902547836303711 s -INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.11098289489746094 s -DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=133923718832203202596629738446689005717, time:1750768619.2771587s req_ids:[8] -DEBUG 06-24 20:36:59 [manager.py:391] -ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:211.18807792663574ms total_cost_time:211.23266220092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13913 prompt_cache_len:5151 prompt_cache_ratio:0.3702292819665061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 -DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:59 [batch.py:51] router release req id 8 -INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10885930061340332 s -INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.11080145835876465 s -DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=279304733665380026190025460390875255751, time:1750768619.4927404s req_ids:[8] -DEBUG 06-24 20:36:59 [manager.py:391] -ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:214.2024040222168ms total_cost_time:214.2484188079834ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13914 prompt_cache_len:5151 prompt_cache_ratio:0.3702026735661923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 -DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:59 [batch.py:51] router release req id 8 -INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10801243782043457 s -INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.10943126678466797 s -DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=254200334111609286439824705966843053686, time:1750768619.7145534s req_ids:[8] -DEBUG 06-24 20:36:59 [manager.py:391] -ERROR 06-24 20:36:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:36:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:36:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:215.36993980407715ms total_cost_time:215.41547775268555ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13915 prompt_cache_len:5151 prompt_cache_ratio:0.37017606899029826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:36:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 -DEBUG 06-24 20:36:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:36:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:36:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:36:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:36:59 [batch.py:51] router release req id 8 -INFO 06-24 20:36:59 [manager.py:224] router recive req id 8 cost time 0.10886955261230469 s -INFO 06-24 20:36:59 [manager.py:68] detokenization recv req id 8 cost time 0.1107642650604248 s -DEBUG 06-24 20:36:59 [manager.py:391] Prefill Batch: batch_id=75323565942071777366499456639871474257, time:1750768619.9378896s req_ids:[8] -DEBUG 06-24 20:36:59 [manager.py:391] -ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:36:59 lightllm_req_id:8 first_token_cost:210.3097438812256ms total_cost_time:210.3569507598877ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:13916 prompt_cache_len:5151 prompt_cache_ratio:0.37014946823799943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 -DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:00 [batch.py:51] router release req id 8 -INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10915851593017578 s -INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.1110389232635498 s -DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=215771239181721537558085871223867551880, time:1750768620.1516893s req_ids:[8] -DEBUG 06-24 20:37:00 [manager.py:391] -ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:210.98756790161133ms total_cost_time:211.0304832458496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13917 prompt_cache_len:5151 prompt_cache_ratio:0.37012287130847166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 -DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:00 [batch.py:51] router release req id 8 -INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10920119285583496 s -INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.11122536659240723 s -DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=219970268443140959352921849423234117846, time:1750768620.3724582s req_ids:[8] -DEBUG 06-24 20:37:00 [manager.py:391] -ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:370.25952339172363ms total_cost_time:370.3038692474365ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13918 prompt_cache_len:5151 prompt_cache_ratio:0.3700962782008909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 -DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:00 [batch.py:51] router release req id 8 -INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10903358459472656 s -INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.11103272438049316 s -DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=237538524942996614040975527562041831, time:1750768620.7496567s req_ids:[8] -DEBUG 06-24 20:37:00 [manager.py:391] -ERROR 06-24 20:37:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:215.6364917755127ms total_cost_time:215.68012237548828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13919 prompt_cache_len:5151 prompt_cache_ratio:0.3700696889144335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 -DEBUG 06-24 20:37:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:00 [batch.py:51] router release req id 8 -INFO 06-24 20:37:00 [manager.py:224] router recive req id 8 cost time 0.10883903503417969 s -INFO 06-24 20:37:00 [manager.py:68] detokenization recv req id 8 cost time 0.11096382141113281 s -DEBUG 06-24 20:37:00 [manager.py:391] Prefill Batch: batch_id=257236151209652087226595876016598175686, time:1750768620.9808483s req_ids:[8] -DEBUG 06-24 20:37:00 [manager.py:391] -ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:00 lightllm_req_id:8 first_token_cost:227.36787796020508ms total_cost_time:227.41150856018066ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13920 prompt_cache_len:5151 prompt_cache_ratio:0.37004310344827585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 -DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:01 [batch.py:51] router release req id 8 -INFO 06-24 20:37:01 [manager.py:224] router recive req id 8 cost time 0.10938239097595215 s -INFO 06-24 20:37:01 [manager.py:68] detokenization recv req id 8 cost time 0.11133360862731934 s -DEBUG 06-24 20:37:01 [manager.py:391] Prefill Batch: batch_id=232482605998982212544847478536891266789, time:1750768621.2050748s req_ids:[8] -DEBUG 06-24 20:37:01 [manager.py:391] -ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:214.0488624572754ms total_cost_time:214.094877243042ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13921 prompt_cache_len:5151 prompt_cache_ratio:0.3700165218015947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 -DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:01 [batch.py:51] router release req id 8 -INFO 06-24 20:37:01 [manager.py:224] router recive req id 8 cost time 0.1089775562286377 s -INFO 06-24 20:37:01 [manager.py:68] detokenization recv req id 8 cost time 0.11105465888977051 s -DEBUG 06-24 20:37:01 [manager.py:391] Prefill Batch: batch_id=19526503121744536597245145020809529250, time:1750768621.4273324s req_ids:[8] -DEBUG 06-24 20:37:01 [manager.py:391] -ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:210.9816074371338ms total_cost_time:211.0280990600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:13922 prompt_cache_len:5151 prompt_cache_ratio:0.369989943973567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 -DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:01 [batch.py:51] router release req id 8 -INFO 06-24 20:37:01 [manager.py:224] router recive req id 8 cost time 0.10915493965148926 s -INFO 06-24 20:37:01 [manager.py:68] detokenization recv req id 8 cost time 0.11102700233459473 s -DEBUG 06-24 20:37:01 [manager.py:391] Prefill Batch: batch_id=128546288495840170102586428914613294600, time:1750768621.6451705s req_ids:[8] -DEBUG 06-24 20:37:01 [manager.py:391] -DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:379.8229694366455ms total_cost_time:379.8825740814209ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:13923 prompt_cache_len:5151 prompt_cache_ratio:0.36996336996337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 -DEBUG 06-24 20:37:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:01 [batch.py:51] router release req id 8 -INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10876846313476562 s -INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11078238487243652 s -DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=315587277261789559727399101910876680157, time:1750768622.0303411s req_ids:[8] -DEBUG 06-24 20:37:02 [manager.py:391] -ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:01 lightllm_req_id:8 first_token_cost:213.28973770141602ms total_cost_time:213.3336067199707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13924 prompt_cache_len:5151 prompt_cache_ratio:0.36993679977018096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 -DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:02 [batch.py:51] router release req id 8 -INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10879302024841309 s -INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.10997271537780762 s -DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=333732795332618121932777022375170066622, time:1750768622.252176s req_ids:[8] -DEBUG 06-24 20:37:02 [manager.py:391] -ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:212.98646926879883ms total_cost_time:213.03272247314453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13925 prompt_cache_len:5151 prompt_cache_ratio:0.36991023339317775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 -DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:02 [batch.py:51] router release req id 8 -INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10840725898742676 s -INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11034059524536133 s -DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=101945375544171551629348716303399661860, time:1750768622.4699144s req_ids:[8] -DEBUG 06-24 20:37:02 [manager.py:391] -ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:210.76154708862305ms total_cost_time:210.80708503723145ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13926 prompt_cache_len:5151 prompt_cache_ratio:0.3698836708315381 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 -DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:02 [batch.py:51] router release req id 8 -INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.10902595520019531 s -INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11107540130615234 s -DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=28957456801412538655885092959954950539, time:1750768622.6867278s req_ids:[8] -DEBUG 06-24 20:37:02 [manager.py:391] -ERROR 06-24 20:37:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:210.2987766265869ms total_cost_time:210.3433609008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13927 prompt_cache_len:5151 prompt_cache_ratio:0.3698571120844403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 -DEBUG 06-24 20:37:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:02 [batch.py:51] router release req id 8 -INFO 06-24 20:37:02 [manager.py:224] router recive req id 8 cost time 0.1097567081451416 s -INFO 06-24 20:37:02 [manager.py:68] detokenization recv req id 8 cost time 0.11178207397460938 s -DEBUG 06-24 20:37:02 [manager.py:391] Prefill Batch: batch_id=192147062144882168338999978157611606711, time:1750768622.906113s req_ids:[8] -DEBUG 06-24 20:37:02 [manager.py:391] -ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:02 lightllm_req_id:8 first_token_cost:215.46316146850586ms total_cost_time:215.50703048706055ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13928 prompt_cache_len:5151 prompt_cache_ratio:0.3698305571510626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 -DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:03 [batch.py:51] router release req id 8 -INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.11007857322692871 s -INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.11199498176574707 s -DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=41895446920054152142373500974785246976, time:1750768623.1423213s req_ids:[8] -DEBUG 06-24 20:37:03 [manager.py:391] -ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:412.0802879333496ms total_cost_time:412.1253490447998ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13929 prompt_cache_len:5151 prompt_cache_ratio:0.36980400603058367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 -DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:03 [batch.py:51] router release req id 8 -INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.10997319221496582 s -INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.11186957359313965 s -DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=210457571705370692116986256170210928112, time:1750768623.5457253s req_ids:[8] -DEBUG 06-24 20:37:03 [manager.py:391] -ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:209.3219757080078ms total_cost_time:209.367036819458ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13930 prompt_cache_len:5151 prompt_cache_ratio:0.3697774587221823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 -DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:03 [batch.py:51] router release req id 8 -INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.10875368118286133 s -INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.1100616455078125 s -DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=175212948296425593950185278029121148367, time:1750768623.7734683s req_ids:[8] -DEBUG 06-24 20:37:03 [manager.py:391] -ERROR 06-24 20:37:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:227.30445861816406ms total_cost_time:227.35071182250977ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13931 prompt_cache_len:5151 prompt_cache_ratio:0.36975091522503767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 -DEBUG 06-24 20:37:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:03 [batch.py:51] router release req id 8 -INFO 06-24 20:37:03 [manager.py:224] router recive req id 8 cost time 0.10892128944396973 s -INFO 06-24 20:37:03 [manager.py:68] detokenization recv req id 8 cost time 0.11064863204956055 s -DEBUG 06-24 20:37:03 [manager.py:391] Prefill Batch: batch_id=260396556068167099650344386026952540436, time:1750768623.9977322s req_ids:[8] -DEBUG 06-24 20:37:03 [manager.py:391] -ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:03 lightllm_req_id:8 first_token_cost:215.5303955078125ms total_cost_time:215.5742645263672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13932 prompt_cache_len:5151 prompt_cache_ratio:0.36972437553832904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 -DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:04 [batch.py:51] router release req id 8 -INFO 06-24 20:37:04 [manager.py:224] router recive req id 8 cost time 0.10816359519958496 s -INFO 06-24 20:37:04 [manager.py:68] detokenization recv req id 8 cost time 0.1098775863647461 s -DEBUG 06-24 20:37:04 [manager.py:391] Prefill Batch: batch_id=288487669328809925050379244021909190040, time:1750768624.2183595s req_ids:[8] -DEBUG 06-24 20:37:04 [manager.py:391] -ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:213.19007873535156ms total_cost_time:213.23561668395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13933 prompt_cache_len:5151 prompt_cache_ratio:0.3696978396612359 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 -DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:04 [batch.py:51] router release req id 8 -INFO 06-24 20:37:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:04 [manager.py:224] router recive req id 8 cost time 0.10943841934204102 s -INFO 06-24 20:37:04 [manager.py:68] detokenization recv req id 8 cost time 0.11119484901428223 s -DEBUG 06-24 20:37:04 [manager.py:391] Prefill Batch: batch_id=246827518607713457017620749205921534259, time:1750768624.4377937s req_ids:[8] -DEBUG 06-24 20:37:04 [manager.py:391] -ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:210.10279655456543ms total_cost_time:210.14666557312012ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13934 prompt_cache_len:5151 prompt_cache_ratio:0.36967130759293815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 -DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:04 [batch.py:51] router release req id 8 -INFO 06-24 20:37:04 [manager.py:224] router recive req id 8 cost time 0.10814499855041504 s -INFO 06-24 20:37:04 [manager.py:68] detokenization recv req id 8 cost time 0.11000776290893555 s -DEBUG 06-24 20:37:04 [manager.py:391] Prefill Batch: batch_id=256159453287421827255114855225021658443, time:1750768624.655235s req_ids:[8] -DEBUG 06-24 20:37:04 [manager.py:391] -ERROR 06-24 20:37:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:384.5798969268799ms total_cost_time:384.6251964569092ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13935 prompt_cache_len:5151 prompt_cache_ratio:0.3696447793326157 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 -DEBUG 06-24 20:37:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:04 [batch.py:51] router release req id 8 -INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10903406143188477 s -INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.11103391647338867 s -DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=70224943397812551951389176340976439836, time:1750768625.046903s req_ids:[8] -DEBUG 06-24 20:37:05 [manager.py:391] -ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:04 lightllm_req_id:8 first_token_cost:211.10844612121582ms total_cost_time:211.1515998840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13936 prompt_cache_len:5151 prompt_cache_ratio:0.3696182548794489 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 -DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:05 [batch.py:51] router release req id 8 -INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10857129096984863 s -INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.11024284362792969 s -DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=264541544625963132932474138182543066423, time:1750768625.267228s req_ids:[8] -DEBUG 06-24 20:37:05 [manager.py:391] -ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:212.6331329345703ms total_cost_time:212.67938613891602ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13937 prompt_cache_len:5151 prompt_cache_ratio:0.3695917342326182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 -DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:05 [batch.py:51] router release req id 8 -INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10804343223571777 s -INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.1097872257232666 s -DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=42814334160434545670834167288501314267, time:1750768625.4857428s req_ids:[8] -DEBUG 06-24 20:37:05 [manager.py:391] -ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:215.73901176452637ms total_cost_time:215.78264236450195ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13938 prompt_cache_len:5151 prompt_cache_ratio:0.3695652173913043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 -DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:05 [batch.py:51] router release req id 8 -INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10842251777648926 s -INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.10965919494628906 s -DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=298567022201804756948667998125621692108, time:1750768625.706435s req_ids:[8] -DEBUG 06-24 20:37:05 [manager.py:391] -ERROR 06-24 20:37:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:208.6482048034668ms total_cost_time:208.69112014770508ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13939 prompt_cache_len:5151 prompt_cache_ratio:0.3695387043546883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 -DEBUG 06-24 20:37:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:05 [batch.py:51] router release req id 8 -INFO 06-24 20:37:05 [manager.py:224] router recive req id 8 cost time 0.10918402671813965 s -INFO 06-24 20:37:05 [manager.py:68] detokenization recv req id 8 cost time 0.1112363338470459 s -DEBUG 06-24 20:37:05 [manager.py:391] Prefill Batch: batch_id=273683894373204317081350673527536600267, time:1750768625.9218066s req_ids:[8] -DEBUG 06-24 20:37:05 [manager.py:391] -ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:05 lightllm_req_id:8 first_token_cost:210.70480346679688ms total_cost_time:210.74986457824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13940 prompt_cache_len:5151 prompt_cache_ratio:0.3695121951219512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 -DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:06 [batch.py:51] router release req id 8 -INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10886454582214355 s -INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.11039137840270996 s -DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=288771516278495236943905202434162662558, time:1750768626.1395354s req_ids:[8] -DEBUG 06-24 20:37:06 [manager.py:391] -ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:377.1843910217285ms total_cost_time:377.2294521331787ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13941 prompt_cache_len:5151 prompt_cache_ratio:0.3694856896922746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 -DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:06 [batch.py:51] router release req id 8 -INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10865187644958496 s -DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=10445945186429712395826115642952253211, time:1750768626.523873s req_ids:[8] -DEBUG 06-24 20:37:06 [manager.py:391] -INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.11082983016967773 s -ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:211.09294891357422ms total_cost_time:211.1372947692871ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13942 prompt_cache_len:5151 prompt_cache_ratio:0.36945918806484007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 -DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:06 [batch.py:51] router release req id 8 -INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10920166969299316 s -INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.1111900806427002 s -DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=308169976463995912445581497449446571330, time:1750768626.7450776s req_ids:[8] -DEBUG 06-24 20:37:06 [manager.py:391] -ERROR 06-24 20:37:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:213.63449096679688ms total_cost_time:213.68026733398438ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13943 prompt_cache_len:5151 prompt_cache_ratio:0.3694326902388295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 -DEBUG 06-24 20:37:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:06 [batch.py:51] router release req id 8 -INFO 06-24 20:37:06 [manager.py:224] router recive req id 8 cost time 0.10859107971191406 s -INFO 06-24 20:37:06 [manager.py:68] detokenization recv req id 8 cost time 0.11060118675231934 s -DEBUG 06-24 20:37:06 [manager.py:391] Prefill Batch: batch_id=101063073474856270944811072156136174349, time:1750768626.9695165s req_ids:[8] -DEBUG 06-24 20:37:06 [manager.py:391] -ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:06 lightllm_req_id:8 first_token_cost:217.14258193969727ms total_cost_time:217.18955039978027ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:13944 prompt_cache_len:5151 prompt_cache_ratio:0.36940619621342513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 -DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:07 [batch.py:51] router release req id 8 -INFO 06-24 20:37:07 [manager.py:224] router recive req id 8 cost time 0.10762834548950195 s -INFO 06-24 20:37:07 [manager.py:68] detokenization recv req id 8 cost time 0.10957121849060059 s -DEBUG 06-24 20:37:07 [manager.py:391] Prefill Batch: batch_id=125099626285870584418590301827953894391, time:1750768627.189367s req_ids:[8] -DEBUG 06-24 20:37:07 [manager.py:391] -ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:212.9814624786377ms total_cost_time:213.02437782287598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13945 prompt_cache_len:5151 prompt_cache_ratio:0.3693797059878092 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 -DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:07 [batch.py:51] router release req id 8 -INFO 06-24 20:37:07 [manager.py:224] router recive req id 8 cost time 0.10852408409118652 s -INFO 06-24 20:37:07 [manager.py:68] detokenization recv req id 8 cost time 0.1104736328125 s -DEBUG 06-24 20:37:07 [manager.py:391] Prefill Batch: batch_id=124045542390231541854400842103060234955, time:1750768627.4104726s req_ids:[8] -DEBUG 06-24 20:37:07 [manager.py:391] -ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:213.79780769348145ms total_cost_time:213.84143829345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13946 prompt_cache_len:5151 prompt_cache_ratio:0.3693532195611645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 -DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:07 [batch.py:51] router release req id 8 -INFO 06-24 20:37:07 [manager.py:224] router recive req id 8 cost time 0.31005144119262695 s -INFO 06-24 20:37:07 [manager.py:68] detokenization recv req id 8 cost time 0.3113112449645996 s -DEBUG 06-24 20:37:07 [manager.py:391] Prefill Batch: batch_id=104308371043862017816374384439789414194, time:1750768627.839343s req_ids:[8] -DEBUG 06-24 20:37:07 [manager.py:391] -ERROR 06-24 20:37:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:381.5882205963135ms total_cost_time:381.6509246826172ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:13947 prompt_cache_len:5151 prompt_cache_ratio:0.3693267369326737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 -DEBUG 06-24 20:37:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:07 [batch.py:51] router release req id 8 -INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10747265815734863 s -INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.10870933532714844 s -DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=83400216592221864004433764797439358087, time:1750768628.0191367s req_ids:[8] -DEBUG 06-24 20:37:08 [manager.py:391] -ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:07 lightllm_req_id:8 first_token_cost:206.29024505615234ms total_cost_time:206.33268356323242ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13948 prompt_cache_len:5151 prompt_cache_ratio:0.36930025810151995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 -DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:08 [batch.py:51] router release req id 8 -INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10938715934753418 s -INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.11162972450256348 s -DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=199110041395681224146989351509459105359, time:1750768628.239972s req_ids:[8] -DEBUG 06-24 20:37:08 [manager.py:391] -ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:222.53108024597168ms total_cost_time:222.57661819458008ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13949 prompt_cache_len:5151 prompt_cache_ratio:0.3692737830668865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 -DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:08 [batch.py:51] router release req id 8 -INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10981082916259766 s -INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.1120309829711914 s -DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=106238026222497706418331104991262201385, time:1750768628.478984s req_ids:[8] -DEBUG 06-24 20:37:08 [manager.py:391] -ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:232.09452629089355ms total_cost_time:232.1479320526123ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:13950 prompt_cache_len:5151 prompt_cache_ratio:0.369247311827957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 -DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:08 [batch.py:51] router release req id 8 -INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10784029960632324 s -INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.10988807678222656 s -DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=55894738104048942861133656649418786502, time:1750768628.718009s req_ids:[8] -DEBUG 06-24 20:37:08 [manager.py:391] -ERROR 06-24 20:37:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:231.60791397094727ms total_cost_time:231.65273666381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13951 prompt_cache_len:5151 prompt_cache_ratio:0.3692208443839151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 -DEBUG 06-24 20:37:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:08 [batch.py:51] router release req id 8 -INFO 06-24 20:37:08 [manager.py:224] router recive req id 8 cost time 0.10828900337219238 s -INFO 06-24 20:37:08 [manager.py:68] detokenization recv req id 8 cost time 0.11016607284545898 s -DEBUG 06-24 20:37:08 [manager.py:391] Prefill Batch: batch_id=305235593761326651968822928120138002363, time:1750768628.9414387s req_ids:[8] -DEBUG 06-24 20:37:08 [manager.py:391] -ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:08 lightllm_req_id:8 first_token_cost:212.6622200012207ms total_cost_time:212.71681785583496ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:13952 prompt_cache_len:5151 prompt_cache_ratio:0.36919438073394495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 -DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:09 [batch.py:51] router release req id 8 -INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.1085207462310791 s -INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11030125617980957 s -DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=34291958107616886458421667846565404186, time:1750768629.1611254s req_ids:[8] -DEBUG 06-24 20:37:09 [manager.py:391] -DEBUG 06-24 20:37:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 57083.077 tokens/s -DEBUG 06-24 20:37:09 [stats.py:37] Avg prompt tokens throughput: 57074.984 tokens/s -DEBUG 06-24 20:37:09 [stats.py:37] Avg generate tokens throughput: 8.093 tokens/s -ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:378.8440227508545ms total_cost_time:378.89671325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.05269050598144531ms prompt_token_num:13953 prompt_cache_len:5151 prompt_cache_ratio:0.3691679208772307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 -DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:09 [batch.py:51] router release req id 8 -INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.10888433456420898 s -INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11104106903076172 s -DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=291333514263332086300485840075899258211, time:1750768629.5459352s req_ids:[8] -DEBUG 06-24 20:37:09 [manager.py:391] -DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:09 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:213.08517456054688ms total_cost_time:213.13190460205078ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13954 prompt_cache_len:5151 prompt_cache_ratio:0.36914146481295684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 -DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:09 [batch.py:51] router release req id 8 -INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.1094048023223877 s -INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11150550842285156 s -DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=25593220145067302091142281344771496218, time:1750768629.7638197s req_ids:[8] -DEBUG 06-24 20:37:09 [manager.py:391] -ERROR 06-24 20:37:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:209.12885665893555ms total_cost_time:209.17201042175293ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13955 prompt_cache_len:5151 prompt_cache_ratio:0.36911501254030815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 -DEBUG 06-24 20:37:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:09 [batch.py:51] router release req id 8 -INFO 06-24 20:37:09 [manager.py:224] router recive req id 8 cost time 0.110321044921875 s -INFO 06-24 20:37:09 [manager.py:68] detokenization recv req id 8 cost time 0.11234331130981445 s -DEBUG 06-24 20:37:09 [manager.py:391] Prefill Batch: batch_id=65798691069630346309810407627342742467, time:1750768629.9821873s req_ids:[8] -DEBUG 06-24 20:37:09 [manager.py:391] -ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:09 lightllm_req_id:8 first_token_cost:205.52539825439453ms total_cost_time:205.5685520172119ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13956 prompt_cache_len:5151 prompt_cache_ratio:0.36908856405846946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 -DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:10 [batch.py:51] router release req id 8 -INFO 06-24 20:37:10 [manager.py:224] router recive req id 8 cost time 0.11044740676879883 s -INFO 06-24 20:37:10 [manager.py:68] detokenization recv req id 8 cost time 0.11247706413269043 s -DEBUG 06-24 20:37:10 [manager.py:391] Prefill Batch: batch_id=67396194817026667487177616517812211631, time:1750768630.196119s req_ids:[8] -DEBUG 06-24 20:37:10 [manager.py:391] -ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:215.56472778320312ms total_cost_time:215.63339233398438ms,out_token_counter:1 mean_per_token_cost_time: 0.06866455078125ms prompt_token_num:13957 prompt_cache_len:5151 prompt_cache_ratio:0.3690621193666261 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 -DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:10 [batch.py:51] router release req id 8 -INFO 06-24 20:37:10 [manager.py:224] router recive req id 8 cost time 0.10779142379760742 s -INFO 06-24 20:37:10 [manager.py:68] detokenization recv req id 8 cost time 0.10990166664123535 s -DEBUG 06-24 20:37:10 [manager.py:391] Prefill Batch: batch_id=76015538003986216031098212277281892458, time:1750768630.4250095s req_ids:[8] -DEBUG 06-24 20:37:10 [manager.py:391] -ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:221.2224006652832ms total_cost_time:221.2655544281006ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13958 prompt_cache_len:5151 prompt_cache_ratio:0.36903567846396335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 -DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:10 [batch.py:51] router release req id 8 -INFO 06-24 20:37:10 [manager.py:224] router recive req id 8 cost time 0.1077268123626709 s -INFO 06-24 20:37:10 [manager.py:68] detokenization recv req id 8 cost time 0.10962629318237305 s -DEBUG 06-24 20:37:10 [manager.py:391] Prefill Batch: batch_id=212026111350736059461380495837494867637, time:1750768630.6504242s req_ids:[8] -DEBUG 06-24 20:37:10 [manager.py:391] -ERROR 06-24 20:37:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:408.02526473999023ms total_cost_time:408.0681800842285ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13959 prompt_cache_len:5151 prompt_cache_ratio:0.3690092413496669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 -DEBUG 06-24 20:37:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:10 [batch.py:51] router release req id 8 -INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10836243629455566 s -INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.10998082160949707 s -DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=155066330884774564187075724439780488011, time:1750768631.0608914s req_ids:[8] -DEBUG 06-24 20:37:11 [manager.py:391] -ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:10 lightllm_req_id:8 first_token_cost:195.59979438781738ms total_cost_time:195.64294815063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13960 prompt_cache_len:5151 prompt_cache_ratio:0.36898280802292266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 -DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:11 [batch.py:51] router release req id 8 -INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10738396644592285 s -INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.10930180549621582 s -DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=289379815544848895498309203267217891342, time:1750768631.2617416s req_ids:[8] -DEBUG 06-24 20:37:11 [manager.py:391] -ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:209.60497856140137ms total_cost_time:209.64932441711426ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13961 prompt_cache_len:5151 prompt_cache_ratio:0.3689563784829167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 -DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:11 [batch.py:51] router release req id 8 -INFO 06-24 20:37:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.1076345443725586 s -INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.1092996597290039 s -DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=103425550519706900866972878420908912444, time:1750768631.4790819s req_ids:[8] -DEBUG 06-24 20:37:11 [manager.py:391] -ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:211.1837863922119ms total_cost_time:211.2290859222412ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13962 prompt_cache_len:5151 prompt_cache_ratio:0.3689299527288354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 -DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:11 [batch.py:51] router release req id 8 -INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10830545425415039 s -INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.11000728607177734 s -DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=156579918448146882857788077408299654878, time:1750768631.6976492s req_ids:[8] -DEBUG 06-24 20:37:11 [manager.py:391] -ERROR 06-24 20:37:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:215.73352813720703ms total_cost_time:215.77930450439453ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13963 prompt_cache_len:5151 prompt_cache_ratio:0.3689035307598654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 -DEBUG 06-24 20:37:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:11 [batch.py:51] router release req id 8 -INFO 06-24 20:37:11 [manager.py:224] router recive req id 8 cost time 0.10782098770141602 s -INFO 06-24 20:37:11 [manager.py:68] detokenization recv req id 8 cost time 0.10944366455078125 s -DEBUG 06-24 20:37:11 [manager.py:391] Prefill Batch: batch_id=55823393131493504206408715586916918802, time:1750768631.9187052s req_ids:[8] -DEBUG 06-24 20:37:11 [manager.py:391] -ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:11 lightllm_req_id:8 first_token_cost:210.6630802154541ms total_cost_time:210.7067108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13964 prompt_cache_len:5151 prompt_cache_ratio:0.36887711257519334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 -DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:12 [batch.py:51] router release req id 8 -INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.10764312744140625 s -INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.1092989444732666 s -DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=113131398368371662771795845302399160720, time:1750768632.1367507s req_ids:[8] -DEBUG 06-24 20:37:12 [manager.py:391] -ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:374.8800754547119ms total_cost_time:374.9239444732666ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13965 prompt_cache_len:5151 prompt_cache_ratio:0.36885069817400645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 -DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:12 [batch.py:51] router release req id 8 -INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.1084134578704834 s -INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.1104278564453125 s -DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=154201704682901204662442097155732473021, time:1750768632.5170584s req_ids:[8] -DEBUG 06-24 20:37:12 [manager.py:391] -ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:210.44445037841797ms total_cost_time:210.48879623413086ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13966 prompt_cache_len:5151 prompt_cache_ratio:0.3688242875554919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 -DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:12 [batch.py:51] router release req id 8 -INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s -INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.11077618598937988 s -DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=101835523533941867640890436872225148613, time:1750768632.735914s req_ids:[8] -DEBUG 06-24 20:37:12 [manager.py:391] -ERROR 06-24 20:37:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:217.67568588256836ms total_cost_time:217.72098541259766ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:13967 prompt_cache_len:5151 prompt_cache_ratio:0.36879788071883723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 -DEBUG 06-24 20:37:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:12 [batch.py:51] router release req id 8 -INFO 06-24 20:37:12 [manager.py:224] router recive req id 8 cost time 0.10765337944030762 s -INFO 06-24 20:37:12 [manager.py:68] detokenization recv req id 8 cost time 0.1096041202545166 s -DEBUG 06-24 20:37:12 [manager.py:391] Prefill Batch: batch_id=285308498855591838674116357280038341175, time:1750768632.9582512s req_ids:[8] -DEBUG 06-24 20:37:12 [manager.py:391] -ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:12 lightllm_req_id:8 first_token_cost:213.91558647155762ms total_cost_time:213.96183967590332ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:13968 prompt_cache_len:5151 prompt_cache_ratio:0.36877147766323026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 -DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:13 [batch.py:51] router release req id 8 -INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10932421684265137 s -INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.11110043525695801 s -DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=56960264878956924678480344503312042721, time:1750768633.179335s req_ids:[8] -DEBUG 06-24 20:37:13 [manager.py:391] -ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:211.31563186645508ms total_cost_time:211.36116981506348ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13969 prompt_cache_len:5151 prompt_cache_ratio:0.3687450783878588 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 -DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:13 [batch.py:51] router release req id 8 -INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10890960693359375 s -INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.11022567749023438 s -DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=162889442219590974850323458144476646776, time:1750768633.3956206s req_ids:[8] -DEBUG 06-24 20:37:13 [manager.py:391] -ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:370.96571922302246ms total_cost_time:371.01244926452637ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:13970 prompt_cache_len:5151 prompt_cache_ratio:0.36871868289191123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 -DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:13 [batch.py:51] router release req id 8 -INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10787796974182129 s -INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.10922646522521973 s -DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=124877348331976675224169518215073884119, time:1750768633.7745442s req_ids:[8] -DEBUG 06-24 20:37:13 [manager.py:391] -ERROR 06-24 20:37:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:218.28055381774902ms total_cost_time:218.3222770690918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:13971 prompt_cache_len:5151 prompt_cache_ratio:0.3686922911745759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 -DEBUG 06-24 20:37:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:13 [batch.py:51] router release req id 8 -INFO 06-24 20:37:13 [manager.py:224] router recive req id 8 cost time 0.10863232612609863 s -INFO 06-24 20:37:13 [manager.py:68] detokenization recv req id 8 cost time 0.10994839668273926 s -DEBUG 06-24 20:37:13 [manager.py:391] Prefill Batch: batch_id=127575139360520556106602266782837481100, time:1750768633.9974482s req_ids:[8] -DEBUG 06-24 20:37:13 [manager.py:391] -ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:13 lightllm_req_id:8 first_token_cost:209.88225936889648ms total_cost_time:209.92684364318848ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:13972 prompt_cache_len:5151 prompt_cache_ratio:0.3686659032350415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 -DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:14 [batch.py:51] router release req id 8 -INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.10771965980529785 s -INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.10977697372436523 s -DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=332415336343340678535546209147675204268, time:1750768634.212489s req_ids:[8] -DEBUG 06-24 20:37:14 [manager.py:391] -ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:213.00220489501953ms total_cost_time:213.04702758789062ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13973 prompt_cache_len:5151 prompt_cache_ratio:0.368639519072497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 -DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:14 [batch.py:51] router release req id 8 -INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.10806417465209961 s -INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.11001920700073242 s -DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=36042452911271591799901419889865982062, time:1750768634.4320753s req_ids:[8] -DEBUG 06-24 20:37:14 [manager.py:391] -ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:209.09380912780762ms total_cost_time:209.13982391357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:13974 prompt_cache_len:5151 prompt_cache_ratio:0.3686131386861314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 -DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:14 [batch.py:51] router release req id 8 -INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.11086773872375488 s -INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.1127617359161377 s -DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=68598854657716787776582902834585841259, time:1750768634.6516871s req_ids:[8] -DEBUG 06-24 20:37:14 [manager.py:391] -ERROR 06-24 20:37:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:213.9129638671875ms total_cost_time:213.95587921142578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13975 prompt_cache_len:5151 prompt_cache_ratio:0.3685867620751342 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 -DEBUG 06-24 20:37:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:14 [batch.py:51] router release req id 8 -INFO 06-24 20:37:14 [manager.py:224] router recive req id 8 cost time 0.1095132827758789 s -INFO 06-24 20:37:14 [manager.py:68] detokenization recv req id 8 cost time 0.11198019981384277 s -DEBUG 06-24 20:37:14 [manager.py:391] Prefill Batch: batch_id=6873755472947318731095892579841924578, time:1750768634.867758s req_ids:[8] -DEBUG 06-24 20:37:14 [manager.py:391] -ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:14 lightllm_req_id:8 first_token_cost:386.42024993896484ms total_cost_time:386.4631652832031ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13976 prompt_cache_len:5151 prompt_cache_ratio:0.3685603892386949 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 -DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:15 [batch.py:51] router release req id 8 -INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10797619819641113 s -INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.11002993583679199 s -DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=291625764657491914578755959906717208851, time:1750768635.2616925s req_ids:[8] -DEBUG 06-24 20:37:15 [manager.py:391] -ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:216.02678298950195ms total_cost_time:216.06993675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:13977 prompt_cache_len:5151 prompt_cache_ratio:0.36853402017600345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 -DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:15 [batch.py:51] router release req id 8 -INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10770773887634277 s -INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.10898828506469727 s -DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=84317553708246640575189677048345470617, time:1750768635.4856856s req_ids:[8] -DEBUG 06-24 20:37:15 [manager.py:391] -ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:216.20965003967285ms total_cost_time:216.25328063964844ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13978 prompt_cache_len:5151 prompt_cache_ratio:0.3685076548862498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 -DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:15 [batch.py:51] router release req id 8 -INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10849452018737793 s -INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.1097724437713623 s -DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=161836526280041976193795605993735592229, time:1750768635.7112758s req_ids:[8] -DEBUG 06-24 20:37:15 [manager.py:391] -DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:212.92543411254883ms total_cost_time:212.97025680541992ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13979 prompt_cache_len:5151 prompt_cache_ratio:0.36848129336862434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 -DEBUG 06-24 20:37:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:15 [batch.py:51] router release req id 8 -INFO 06-24 20:37:15 [manager.py:224] router recive req id 8 cost time 0.10744976997375488 s -INFO 06-24 20:37:15 [manager.py:68] detokenization recv req id 8 cost time 0.10945391654968262 s -DEBUG 06-24 20:37:15 [manager.py:391] Prefill Batch: batch_id=291166109562516408654192221174372814706, time:1750768635.9282699s req_ids:[8] -DEBUG 06-24 20:37:15 [manager.py:391] -ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:15 lightllm_req_id:8 first_token_cost:214.49780464172363ms total_cost_time:214.54143524169922ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13980 prompt_cache_len:5151 prompt_cache_ratio:0.36845493562231757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 -DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:16 [batch.py:51] router release req id 8 -INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.11100411415100098 s -INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.11299681663513184 s -DEBUG 06-24 20:37:16 [manager.py:391] Prefill Batch: batch_id=43141611322986118203876092176844221883, time:1750768636.1505299s req_ids:[8] -DEBUG 06-24 20:37:16 [manager.py:391] -ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:215.79265594482422ms total_cost_time:215.83843231201172ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13981 prompt_cache_len:5151 prompt_cache_ratio:0.36842858164652026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 -DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:16 [batch.py:51] router release req id 8 -INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.10910511016845703 s -INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.11137080192565918 s -DEBUG 06-24 20:37:16 [manager.py:391] Prefill Batch: batch_id=85219022915661342565534414783274569354, time:1750768636.3727517s req_ids:[8] -DEBUG 06-24 20:37:16 [manager.py:391] -ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:386.0025405883789ms total_cost_time:386.044979095459ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:13982 prompt_cache_len:5151 prompt_cache_ratio:0.3684022314404234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 -DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:16 [batch.py:51] router release req id 8 -INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.10849857330322266 s -INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.10968971252441406 s -DEBUG 06-24 20:37:16 [manager.py:391] Prefill Batch: batch_id=24553445778010541003338852883563129179, time:1750768636.765505s req_ids:[8] -DEBUG 06-24 20:37:16 [manager.py:391] -ERROR 06-24 20:37:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:221.5421199798584ms total_cost_time:221.5864658355713ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13983 prompt_cache_len:5151 prompt_cache_ratio:0.3683758850032182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 -DEBUG 06-24 20:37:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:16 [batch.py:51] router release req id 8 -INFO 06-24 20:37:16 [manager.py:224] router recive req id 8 cost time 0.1080780029296875 s -INFO 06-24 20:37:16 [manager.py:68] detokenization recv req id 8 cost time 0.10922503471374512 s -DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=55878046498719415876969663998507318066, time:1750768637.002391s req_ids:[8] -DEBUG 06-24 20:37:17 [manager.py:391] -ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:16 lightllm_req_id:8 first_token_cost:221.53949737548828ms total_cost_time:221.58217430114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:13984 prompt_cache_len:5151 prompt_cache_ratio:0.3683495423340961 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 -DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:17 [batch.py:51] router release req id 8 -INFO 06-24 20:37:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.1083831787109375 s -INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.10953021049499512 s -DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=152027078703207171616540982337375675148, time:1750768637.2221699s req_ids:[8] -DEBUG 06-24 20:37:17 [manager.py:391] -ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:215.27528762817383ms total_cost_time:215.32034873962402ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:13985 prompt_cache_len:5151 prompt_cache_ratio:0.36832320343224884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 -DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:17 [batch.py:51] router release req id 8 -INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.10882735252380371 s -INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.10998678207397461 s -DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=196210163247209452331683163425857789231, time:1750768637.4443297s req_ids:[8] -DEBUG 06-24 20:37:17 [manager.py:391] -ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:206.42876625061035ms total_cost_time:206.47358894348145ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13986 prompt_cache_len:5151 prompt_cache_ratio:0.3682968682968683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 -DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:17 [batch.py:51] router release req id 8 -INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.10783791542053223 s -INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.1089780330657959 s -DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=40178283905970698449013150410398637145, time:1750768637.6581635s req_ids:[8] -DEBUG 06-24 20:37:17 [manager.py:391] -ERROR 06-24 20:37:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:209.5041275024414ms total_cost_time:209.5482349395752ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:13987 prompt_cache_len:5151 prompt_cache_ratio:0.3682705369271466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 -DEBUG 06-24 20:37:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:17 [batch.py:51] router release req id 8 -INFO 06-24 20:37:17 [manager.py:224] router recive req id 8 cost time 0.10881543159484863 s -INFO 06-24 20:37:17 [manager.py:68] detokenization recv req id 8 cost time 0.10997962951660156 s -DEBUG 06-24 20:37:17 [manager.py:391] Prefill Batch: batch_id=115689451089287418768844843500558673491, time:1750768637.8727357s req_ids:[8] -DEBUG 06-24 20:37:17 [manager.py:391] -ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:17 lightllm_req_id:8 first_token_cost:377.0411014556885ms total_cost_time:377.08544731140137ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:13988 prompt_cache_len:5151 prompt_cache_ratio:0.36824420932227625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 -DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:18 [batch.py:51] router release req id 8 -INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10900115966796875 s -INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.1101992130279541 s -DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=91612312282333967349316688010455937197, time:1750768638.2556121s req_ids:[8] -DEBUG 06-24 20:37:18 [manager.py:391] -ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:212.3241424560547ms total_cost_time:212.3699188232422ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:13989 prompt_cache_len:5151 prompt_cache_ratio:0.36821788548144974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 -DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:18 [batch.py:51] router release req id 8 -INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10884428024291992 s -INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.11002969741821289 s -DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=51048576445876644730138261705641469896, time:1750768638.475584s req_ids:[8] -DEBUG 06-24 20:37:18 [manager.py:391] -ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:213.04893493652344ms total_cost_time:213.10806274414062ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:13990 prompt_cache_len:5151 prompt_cache_ratio:0.3681915654038599 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 -DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:18 [batch.py:51] router release req id 8 -INFO 06-24 20:37:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10880017280578613 s -INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.11002302169799805 s -DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=198420326774350614994535982538086570054, time:1750768638.6950521s req_ids:[8] -DEBUG 06-24 20:37:18 [manager.py:391] -ERROR 06-24 20:37:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:209.68294143676758ms total_cost_time:209.72681045532227ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:13991 prompt_cache_len:5151 prompt_cache_ratio:0.36816524908869985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 -DEBUG 06-24 20:37:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:18 [batch.py:51] router release req id 8 -INFO 06-24 20:37:18 [manager.py:224] router recive req id 8 cost time 0.10863065719604492 s -INFO 06-24 20:37:18 [manager.py:68] detokenization recv req id 8 cost time 0.10991168022155762 s -DEBUG 06-24 20:37:18 [manager.py:391] Prefill Batch: batch_id=245696302282062438863597396419961229560, time:1750768638.9110687s req_ids:[8] -DEBUG 06-24 20:37:18 [manager.py:391] -ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:18 lightllm_req_id:8 first_token_cost:211.91692352294922ms total_cost_time:211.9605541229248ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13992 prompt_cache_len:5151 prompt_cache_ratio:0.36813893653516294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 -DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:19 [batch.py:51] router release req id 8 -INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.10809040069580078 s -INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.10938763618469238 s -DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=190339614766738791423041981144301221890, time:1750768639.1300826s req_ids:[8] -DEBUG 06-24 20:37:19 [manager.py:391] -ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:37:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 55552.244 tokens/s -DEBUG 06-24 20:37:19 [stats.py:37] Avg prompt tokens throughput: 55544.195 tokens/s -DEBUG 06-24 20:37:19 [stats.py:37] Avg generate tokens throughput: 8.049 tokens/s -INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:210.73603630065918ms total_cost_time:210.77895164489746ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13993 prompt_cache_len:5151 prompt_cache_ratio:0.36811262774244263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 -DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:19 [batch.py:51] router release req id 8 -INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.3095400333404541 s -INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.31070590019226074 s -DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=128007873151124328360167139742395403484, time:1750768639.5573237s req_ids:[8] -DEBUG 06-24 20:37:19 [manager.py:391] -ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:381.23369216918945ms total_cost_time:381.27660751342773ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:13994 prompt_cache_len:5151 prompt_cache_ratio:0.36808632270973274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 -DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:19 [batch.py:51] router release req id 8 -INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.1073904037475586 s -INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.10873222351074219 s -DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=104141219626366574132826708325097048074, time:1750768639.7342165s req_ids:[8] -DEBUG 06-24 20:37:19 [manager.py:391] -ERROR 06-24 20:37:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:206.8171501159668ms total_cost_time:206.8619728088379ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:13995 prompt_cache_len:5151 prompt_cache_ratio:0.36806002143622724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 -DEBUG 06-24 20:37:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:19 [batch.py:51] router release req id 8 -INFO 06-24 20:37:19 [manager.py:224] router recive req id 8 cost time 0.10790371894836426 s -INFO 06-24 20:37:19 [manager.py:68] detokenization recv req id 8 cost time 0.10919976234436035 s -DEBUG 06-24 20:37:19 [manager.py:391] Prefill Batch: batch_id=184156989492292847444923373823407138356, time:1750768639.9472475s req_ids:[8] -DEBUG 06-24 20:37:19 [manager.py:391] -ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:19 lightllm_req_id:8 first_token_cost:208.1460952758789ms total_cost_time:208.1894874572754ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:13996 prompt_cache_len:5151 prompt_cache_ratio:0.3680337239211203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 -DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:20 [batch.py:51] router release req id 8 -INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.10809946060180664 s -INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.10917115211486816 s -DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=20875675385711834203355062944873096311, time:1750768640.1671433s req_ids:[8] -DEBUG 06-24 20:37:20 [manager.py:391] -ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:219.8014259338379ms total_cost_time:219.84505653381348ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:13997 prompt_cache_len:5151 prompt_cache_ratio:0.3680074301636065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 -DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:20 [batch.py:51] router release req id 8 -INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.10826230049133301 s -INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.11017203330993652 s -DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=246693757860352000744020247071501972420, time:1750768640.3940854s req_ids:[8] -DEBUG 06-24 20:37:20 [manager.py:391] -ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:219.30193901062012ms total_cost_time:219.34747695922852ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:13998 prompt_cache_len:5151 prompt_cache_ratio:0.3679811401628804 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 -DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:20 [batch.py:51] router release req id 8 -INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.10942482948303223 s -INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.11145472526550293 s -DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=298077991145517651159339065307827904531, time:1750768640.614352s req_ids:[8] -DEBUG 06-24 20:37:20 [manager.py:391] -ERROR 06-24 20:37:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:215.54064750671387ms total_cost_time:215.59429168701172ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:13999 prompt_cache_len:5151 prompt_cache_ratio:0.36795485391813704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 -DEBUG 06-24 20:37:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:20 [batch.py:51] router release req id 8 -INFO 06-24 20:37:20 [manager.py:224] router recive req id 8 cost time 0.1089620590209961 s -INFO 06-24 20:37:20 [manager.py:68] detokenization recv req id 8 cost time 0.11093401908874512 s -DEBUG 06-24 20:37:20 [manager.py:391] Prefill Batch: batch_id=70860833748614610439076641652526025638, time:1750768640.834475s req_ids:[8] -DEBUG 06-24 20:37:20 [manager.py:391] -ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:20 lightllm_req_id:8 first_token_cost:377.74133682250977ms total_cost_time:377.78687477111816ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14000 prompt_cache_len:5151 prompt_cache_ratio:0.36792857142857144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 -DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:21 [batch.py:51] router release req id 8 -INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10838842391967773 s -INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11078667640686035 s -DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=125952369016617074460335288017100477137, time:1750768641.2199621s req_ids:[8] -DEBUG 06-24 20:37:21 [manager.py:391] -ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:217.47469902038574ms total_cost_time:217.52023696899414ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14001 prompt_cache_len:5151 prompt_cache_ratio:0.36790229269337904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 -DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:21 [batch.py:51] router release req id 8 -INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10817503929138184 s -INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11039328575134277 s -DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=51889092843763610132730061916938238547, time:1750768641.442208s req_ids:[8] -DEBUG 06-24 20:37:21 [manager.py:391] -ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:171.2355613708496ms total_cost_time:171.27633094787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14002 prompt_cache_len:5151 prompt_cache_ratio:0.3678760177117555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 -DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:21 [batch.py:51] router release req id 8 -INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10850191116333008 s -INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11057519912719727 s -DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=96623721583148842109143267334964102473, time:1750768641.6210558s req_ids:[8] -DEBUG 06-24 20:37:21 [manager.py:391] -ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:203.60231399536133ms total_cost_time:203.64689826965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14003 prompt_cache_len:5151 prompt_cache_ratio:0.36784974648289653 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 -DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:21 [batch.py:51] router release req id 8 -INFO 06-24 20:37:21 [manager.py:224] router recive req id 8 cost time 0.10869669914245605 s -INFO 06-24 20:37:21 [manager.py:68] detokenization recv req id 8 cost time 0.11070775985717773 s -DEBUG 06-24 20:37:21 [manager.py:391] Prefill Batch: batch_id=129109449520881917082790143751131030149, time:1750768641.8317325s req_ids:[8] -DEBUG 06-24 20:37:21 [manager.py:391] -ERROR 06-24 20:37:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:213.01817893981934ms total_cost_time:213.06419372558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14004 prompt_cache_len:5151 prompt_cache_ratio:0.36782347900599827 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 -DEBUG 06-24 20:37:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:21 [batch.py:51] router release req id 8 -INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.10898327827453613 s -INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.11087775230407715 s -DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=102235248562697680764947522193850379919, time:1750768642.0505025s req_ids:[8] -DEBUG 06-24 20:37:22 [manager.py:391] -ERROR 06-24 20:37:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:21 lightllm_req_id:8 first_token_cost:214.00928497314453ms total_cost_time:214.0519618988037ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14005 prompt_cache_len:5151 prompt_cache_ratio:0.36779721528025705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 -DEBUG 06-24 20:37:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:22 [batch.py:51] router release req id 8 -INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.1092216968536377 s -INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.11111879348754883 s -DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=26080150329967155670703753081688791730, time:1750768642.2714345s req_ids:[8] -DEBUG 06-24 20:37:22 [manager.py:391] -ERROR 06-24 20:37:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 first_token_cost:382.2152614593506ms total_cost_time:382.2615146636963ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14006 prompt_cache_len:5151 prompt_cache_ratio:0.36777095530486936 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 -DEBUG 06-24 20:37:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:22 [batch.py:51] router release req id 8 -INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.10785627365112305 s -INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.1098482608795166 s -DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=245187693348831451453768940365758232951, time:1750768642.6757393s req_ids:[8] -DEBUG 06-24 20:37:22 [manager.py:391] -ERROR 06-24 20:37:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 first_token_cost:236.71269416809082ms total_cost_time:236.7548942565918ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14007 prompt_cache_len:5151 prompt_cache_ratio:0.3677446990790319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 -DEBUG 06-24 20:37:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:22 [batch.py:51] router release req id 8 -INFO 06-24 20:37:22 [manager.py:224] router recive req id 8 cost time 0.10636663436889648 s -INFO 06-24 20:37:22 [manager.py:68] detokenization recv req id 8 cost time 0.1083519458770752 s -DEBUG 06-24 20:37:22 [manager.py:391] Prefill Batch: batch_id=263388369887030462642510546699788716067, time:1750768642.9022193s req_ids:[8] -DEBUG 06-24 20:37:22 [manager.py:391] -ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:22 lightllm_req_id:8 first_token_cost:216.81475639343262ms total_cost_time:216.8595790863037ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14008 prompt_cache_len:5151 prompt_cache_ratio:0.36771844660194175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 -DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:23 [batch.py:51] router release req id 8 -INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10732889175415039 s -INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.1091916561126709 s -DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=38332137835792899029715002287708746678, time:1750768643.1451976s req_ids:[8] -DEBUG 06-24 20:37:23 [manager.py:391] -INFO 06-24 20:37:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:37:23 [statics_utils.py:24] mean first cost: 231.4989809468831 ms -INFO 06-24 20:37:23 [statics_utils.py:24] mean per token cost: 0.05872487896311082 ms -ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:230.74841499328613ms total_cost_time:230.79276084899902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14009 prompt_cache_len:5151 prompt_cache_ratio:0.36769219787279606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 -DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:23 [batch.py:51] router release req id 8 -INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10780549049377441 s -INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.10970687866210938 s -DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=46426417571693717882570116888007841932, time:1750768643.3628306s req_ids:[8] -DEBUG 06-24 20:37:23 [manager.py:391] -DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:213.61589431762695ms total_cost_time:213.66047859191895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14010 prompt_cache_len:5151 prompt_cache_ratio:0.3676659528907923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 -DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:23 [batch.py:51] router release req id 8 -INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10875272750854492 s -INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.11067509651184082 s -DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=217076218733205888835659167576230451014, time:1750768643.58324s req_ids:[8] -DEBUG 06-24 20:37:23 [manager.py:391] -ERROR 06-24 20:37:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:212.22424507141113ms total_cost_time:212.26906776428223ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14011 prompt_cache_len:5151 prompt_cache_ratio:0.3676397116551281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 -DEBUG 06-24 20:37:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:23 [batch.py:51] router release req id 8 -INFO 06-24 20:37:23 [manager.py:224] router recive req id 8 cost time 0.10829305648803711 s -INFO 06-24 20:37:23 [manager.py:68] detokenization recv req id 8 cost time 0.11030030250549316 s -DEBUG 06-24 20:37:23 [manager.py:391] Prefill Batch: batch_id=8799838567320948521137010541018961047, time:1750768643.8045652s req_ids:[8] -DEBUG 06-24 20:37:23 [manager.py:391] -ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:23 lightllm_req_id:8 first_token_cost:399.5318412780762ms total_cost_time:399.57571029663086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14012 prompt_cache_len:5151 prompt_cache_ratio:0.36761347416500145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 -DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:24 [batch.py:51] router release req id 8 -INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.1088721752166748 s -INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.11092114448547363 s -DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=76525814007984016949614803215842856784, time:1750768644.2097976s req_ids:[8] -DEBUG 06-24 20:37:24 [manager.py:391] -ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:216.7818546295166ms total_cost_time:216.82381629943848ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14013 prompt_cache_len:5151 prompt_cache_ratio:0.3675872404196104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 -DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:24 [batch.py:51] router release req id 8 -INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.10843586921691895 s -INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.10975074768066406 s -DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=150189330082249843617744967146425009659, time:1750768644.4338036s req_ids:[8] -DEBUG 06-24 20:37:24 [manager.py:391] -ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:214.75529670715332ms total_cost_time:214.8001194000244ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14014 prompt_cache_len:5151 prompt_cache_ratio:0.36756101041815326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 -DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:24 [batch.py:51] router release req id 8 -INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.10877275466918945 s -INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.11070990562438965 s -DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=84134536554018676178937675041633655580, time:1750768644.667008s req_ids:[8] -DEBUG 06-24 20:37:24 [manager.py:391] -ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:207.17859268188477ms total_cost_time:207.2288990020752ms,out_token_counter:1 mean_per_token_cost_time: 0.05030632019042969ms prompt_token_num:14015 prompt_cache_len:5151 prompt_cache_ratio:0.36753478415982876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 -DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:24 [batch.py:51] router release req id 8 -INFO 06-24 20:37:24 [manager.py:224] router recive req id 8 cost time 0.10899138450622559 s -INFO 06-24 20:37:24 [manager.py:68] detokenization recv req id 8 cost time 0.1110389232635498 s -DEBUG 06-24 20:37:24 [manager.py:391] Prefill Batch: batch_id=171055964866307339425703828936070774392, time:1750768644.869254s req_ids:[8] -DEBUG 06-24 20:37:24 [manager.py:391] -ERROR 06-24 20:37:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:209.00321006774902ms total_cost_time:209.04803276062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14016 prompt_cache_len:5151 prompt_cache_ratio:0.3675085616438356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 -DEBUG 06-24 20:37:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:24 [batch.py:51] router release req id 8 -INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.1088266372680664 s -INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11078381538391113 s -DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=167541586733189925553104417644809541069, time:1750768645.0848935s req_ids:[8] -DEBUG 06-24 20:37:25 [manager.py:391] -ERROR 06-24 20:37:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:24 lightllm_req_id:8 first_token_cost:373.7936019897461ms total_cost_time:373.8389015197754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14017 prompt_cache_len:5151 prompt_cache_ratio:0.3674823428693729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 -DEBUG 06-24 20:37:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:25 [batch.py:51] router release req id 8 -INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.10976409912109375 s -INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11188507080078125 s -DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=185905800330692748975643394503732300705, time:1750768645.4645739s req_ids:[8] -DEBUG 06-24 20:37:25 [manager.py:391] -ERROR 06-24 20:37:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 first_token_cost:210.2224826812744ms total_cost_time:210.27326583862305ms,out_token_counter:1 mean_per_token_cost_time: 0.05078315734863281ms prompt_token_num:14018 prompt_cache_len:5151 prompt_cache_ratio:0.3674561278356399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 -DEBUG 06-24 20:37:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:25 [batch.py:51] router release req id 8 -INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.10868024826049805 s -INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11068344116210938 s -DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=61587575717285583211793449830634246327, time:1750768645.6829276s req_ids:[8] -DEBUG 06-24 20:37:25 [manager.py:391] -ERROR 06-24 20:37:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 first_token_cost:207.49545097351074ms total_cost_time:207.53860473632812ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14019 prompt_cache_len:5151 prompt_cache_ratio:0.3674299165418361 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 -DEBUG 06-24 20:37:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:25 [batch.py:51] router release req id 8 -INFO 06-24 20:37:25 [manager.py:224] router recive req id 8 cost time 0.10816431045532227 s -INFO 06-24 20:37:25 [manager.py:68] detokenization recv req id 8 cost time 0.11012983322143555 s -DEBUG 06-24 20:37:25 [manager.py:391] Prefill Batch: batch_id=102084486086390195034546348605748854347, time:1750768645.9066472s req_ids:[8] -DEBUG 06-24 20:37:25 [manager.py:391] -ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:25 lightllm_req_id:8 first_token_cost:251.6634464263916ms total_cost_time:251.7070770263672ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14020 prompt_cache_len:5151 prompt_cache_ratio:0.3674037089871612 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 -DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:26 [batch.py:51] router release req id 8 -INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10864615440368652 s -INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11054801940917969 s -DEBUG 06-24 20:37:26 [manager.py:391] Prefill Batch: batch_id=27229082176471292183420565726811461205, time:1750768646.165872s req_ids:[8] -DEBUG 06-24 20:37:26 [manager.py:391] -ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:221.36163711547852ms total_cost_time:221.4052677154541ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14021 prompt_cache_len:5151 prompt_cache_ratio:0.3673775051708152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 -DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:26 [batch.py:51] router release req id 8 -INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10975885391235352 s -INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11153221130371094 s -DEBUG 06-24 20:37:26 [manager.py:391] Prefill Batch: batch_id=250318443827409529391719268268035001032, time:1750768646.3833728s req_ids:[8] -DEBUG 06-24 20:37:26 [manager.py:391] -ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:212.71848678588867ms total_cost_time:212.76402473449707ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14022 prompt_cache_len:5151 prompt_cache_ratio:0.3673513050919983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 -DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:26 [batch.py:51] router release req id 8 -INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10895156860351562 s -INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11090087890625 s -DEBUG 06-24 20:37:26 [manager.py:391] Prefill Batch: batch_id=161666834512740079112209659924765595197, time:1750768646.6020172s req_ids:[8] -DEBUG 06-24 20:37:26 [manager.py:391] -ERROR 06-24 20:37:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:391.2022113800049ms total_cost_time:391.2475109100342ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14023 prompt_cache_len:5151 prompt_cache_ratio:0.36732510874991087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 -DEBUG 06-24 20:37:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:26 [batch.py:51] router release req id 8 -INFO 06-24 20:37:26 [manager.py:224] router recive req id 8 cost time 0.10798048973083496 s -INFO 06-24 20:37:26 [manager.py:68] detokenization recv req id 8 cost time 0.11001396179199219 s -DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=58010366134870118591704892388075613685, time:1750768647.000275s req_ids:[8] -DEBUG 06-24 20:37:27 [manager.py:391] -ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:26 lightllm_req_id:8 first_token_cost:205.08551597595215ms total_cost_time:205.13010025024414ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14024 prompt_cache_len:5151 prompt_cache_ratio:0.36729891614375354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 -DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:27 [batch.py:51] router release req id 8 -INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10787630081176758 s -INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.1098775863647461 s -DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=312172612912482241668571861294427273828, time:1750768647.2121692s req_ids:[8] -DEBUG 06-24 20:37:27 [manager.py:391] -ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:211.378812789917ms total_cost_time:211.42292022705078ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14025 prompt_cache_len:5151 prompt_cache_ratio:0.36727272727272725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 -DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:27 [batch.py:51] router release req id 8 -INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10792350769042969 s -INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.10988759994506836 s -DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=30064478119911264962120305480799129363, time:1750768647.4301913s req_ids:[8] -DEBUG 06-24 20:37:27 [manager.py:391] -ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:213.2437229156494ms total_cost_time:213.2871150970459ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14026 prompt_cache_len:5151 prompt_cache_ratio:0.3672465421360331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 -DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:27 [batch.py:51] router release req id 8 -INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s -INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.10969209671020508 s -DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=188181456495465733160266496243954849185, time:1750768647.6494856s req_ids:[8] -DEBUG 06-24 20:37:27 [manager.py:391] -ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:170.5005168914795ms total_cost_time:170.54271697998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14027 prompt_cache_len:5151 prompt_cache_ratio:0.36722036073287234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 -DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:27 [batch.py:51] router release req id 8 -INFO 06-24 20:37:27 [manager.py:224] router recive req id 8 cost time 0.10907483100891113 s -INFO 06-24 20:37:27 [manager.py:68] detokenization recv req id 8 cost time 0.11092758178710938 s -DEBUG 06-24 20:37:27 [manager.py:391] Prefill Batch: batch_id=108924619117105180840169872495899346037, time:1750768647.8259497s req_ids:[8] -DEBUG 06-24 20:37:27 [manager.py:391] -ERROR 06-24 20:37:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:201.55668258666992ms total_cost_time:201.60150527954102ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14028 prompt_cache_len:5151 prompt_cache_ratio:0.36719418306244656 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 -DEBUG 06-24 20:37:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:27 [batch.py:51] router release req id 8 -INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.10797715187072754 s -INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.11006927490234375 s -DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=309045816129106047214984119830273207978, time:1750768648.034562s req_ids:[8] -DEBUG 06-24 20:37:28 [manager.py:391] -ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:27 lightllm_req_id:8 first_token_cost:380.5396556854248ms total_cost_time:380.584716796875ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14029 prompt_cache_len:5151 prompt_cache_ratio:0.36716800912395753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 -DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:28 [batch.py:51] router release req id 8 -INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.10827922821044922 s -INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.10956501960754395 s -DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=152844601813806767524328005983230681988, time:1750768648.421952s req_ids:[8] -DEBUG 06-24 20:37:28 [manager.py:391] -ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:215.70277214050293ms total_cost_time:215.75617790222168ms,out_token_counter:1 mean_per_token_cost_time: 0.05340576171875ms prompt_token_num:14030 prompt_cache_len:5151 prompt_cache_ratio:0.3671418389166073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 -DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:28 [batch.py:51] router release req id 8 -INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.1079721450805664 s -INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.11005568504333496 s -DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=20573223347345260654732874067021031531, time:1750768648.64376s req_ids:[8] -DEBUG 06-24 20:37:28 [manager.py:391] -ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:214.4315242767334ms total_cost_time:214.4758701324463ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14031 prompt_cache_len:5151 prompt_cache_ratio:0.36711567243959803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 -DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:28 [batch.py:51] router release req id 8 -INFO 06-24 20:37:28 [manager.py:224] router recive req id 8 cost time 0.10799527168273926 s -INFO 06-24 20:37:28 [manager.py:68] detokenization recv req id 8 cost time 0.11003708839416504 s -DEBUG 06-24 20:37:28 [manager.py:391] Prefill Batch: batch_id=253000760416701909763620068022718186525, time:1750768648.865531s req_ids:[8] -DEBUG 06-24 20:37:28 [manager.py:391] -ERROR 06-24 20:37:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:214.7531509399414ms total_cost_time:214.79558944702148ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14032 prompt_cache_len:5151 prompt_cache_ratio:0.3670895096921323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 -DEBUG 06-24 20:37:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:28 [batch.py:51] router release req id 8 -INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.11120367050170898 s -INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.11249446868896484 s -DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=15837043767542972863930064657531014182, time:1750768649.0867321s req_ids:[8] -DEBUG 06-24 20:37:29 [manager.py:391] -ERROR 06-24 20:37:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:28 lightllm_req_id:8 first_token_cost:214.17498588562012ms total_cost_time:214.25771713256836ms,out_token_counter:1 mean_per_token_cost_time: 0.08273124694824219ms prompt_token_num:14033 prompt_cache_len:5151 prompt_cache_ratio:0.36706335067341267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 -DEBUG 06-24 20:37:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:29 [batch.py:51] router release req id 8 -INFO 06-24 20:37:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.10867667198181152 s -INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.10996866226196289 s -DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=165558327551126128419124680030533218708, time:1750768649.3105474s req_ids:[8] -DEBUG 06-24 20:37:29 [manager.py:391] -DEBUG 06-24 20:37:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 56973.601 tokens/s -DEBUG 06-24 20:37:29 [stats.py:37] Avg prompt tokens throughput: 56965.570 tokens/s -DEBUG 06-24 20:37:29 [stats.py:37] Avg generate tokens throughput: 8.031 tokens/s -ERROR 06-24 20:37:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 first_token_cost:216.51458740234375ms total_cost_time:216.55774116516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14034 prompt_cache_len:5151 prompt_cache_ratio:0.3670371953826422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 -DEBUG 06-24 20:37:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:29 [batch.py:51] router release req id 8 -INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.10903215408325195 s -INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.11103463172912598 s -DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=263680788569450330300395281002809920399, time:1750768649.5419836s req_ids:[8] -DEBUG 06-24 20:37:29 [manager.py:391] -DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 first_token_cost:389.93096351623535ms total_cost_time:389.97602462768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14035 prompt_cache_len:5151 prompt_cache_ratio:0.3670110438190239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 -DEBUG 06-24 20:37:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:29 [batch.py:51] router release req id 8 -INFO 06-24 20:37:29 [manager.py:224] router recive req id 8 cost time 0.1080942153930664 s -INFO 06-24 20:37:29 [manager.py:68] detokenization recv req id 8 cost time 0.11007833480834961 s -DEBUG 06-24 20:37:29 [manager.py:391] Prefill Batch: batch_id=91585696867291754894202228285314586261, time:1750768649.9290872s req_ids:[8] -DEBUG 06-24 20:37:29 [manager.py:391] -ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:29 lightllm_req_id:8 first_token_cost:212.9838466644287ms total_cost_time:213.0293846130371ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14036 prompt_cache_len:5151 prompt_cache_ratio:0.3669848959817612 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 -DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:30 [batch.py:51] router release req id 8 -INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10773444175720215 s -INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.10978055000305176 s -DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=58840295766180191033116926610078332458, time:1750768650.1485004s req_ids:[8] -DEBUG 06-24 20:37:30 [manager.py:391] -ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:213.16957473754883ms total_cost_time:213.22989463806152ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14037 prompt_cache_len:5151 prompt_cache_ratio:0.3669587518700577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 -DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:30 [batch.py:51] router release req id 8 -INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10789966583251953 s -INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.10994791984558105 s -DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=18073608586559937040394494628310988752, time:1750768650.3678918s req_ids:[8] -DEBUG 06-24 20:37:30 [manager.py:391] -ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:210.32214164733887ms total_cost_time:210.38246154785156ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14038 prompt_cache_len:5151 prompt_cache_ratio:0.36693261148311723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 -DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:30 [batch.py:51] router release req id 8 -INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10905623435974121 s -INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.11101031303405762 s -DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=265745384421021407746865981762234487880, time:1750768650.5854254s req_ids:[8] -DEBUG 06-24 20:37:30 [manager.py:391] -ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:215.23809432983398ms total_cost_time:215.28267860412598ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14039 prompt_cache_len:5151 prompt_cache_ratio:0.3669064748201439 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 -DEBUG 06-24 20:37:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:30 [batch.py:51] router release req id 8 -INFO 06-24 20:37:30 [manager.py:224] router recive req id 8 cost time 0.10807538032531738 s -INFO 06-24 20:37:30 [manager.py:68] detokenization recv req id 8 cost time 0.11011791229248047 s -DEBUG 06-24 20:37:30 [manager.py:391] Prefill Batch: batch_id=154384013037280503034654216437273156687, time:1750768650.80862s req_ids:[8] -DEBUG 06-24 20:37:30 [manager.py:391] -ERROR 06-24 20:37:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:215.4560089111328ms total_cost_time:215.5168056488037ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:14040 prompt_cache_len:5151 prompt_cache_ratio:0.36688034188034185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 -DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:31 [batch.py:51] router release req id 8 -INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.30987048149108887 s -INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.31208252906799316 s -DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=298891842423466771646813608539047303036, time:1750768651.2397535s req_ids:[8] -DEBUG 06-24 20:37:31 [manager.py:391] -DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:31 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:30 lightllm_req_id:8 first_token_cost:430.0730228424072ms total_cost_time:430.1323890686035ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:14041 prompt_cache_len:5151 prompt_cache_ratio:0.36685421266291574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 -DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:31 [batch.py:51] router release req id 8 -INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.108184814453125 s -INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.1103062629699707 s -DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=212150281567210242513205876642373809904, time:1750768651.4682481s req_ids:[8] -DEBUG 06-24 20:37:31 [manager.py:391] -ERROR 06-24 20:37:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 first_token_cost:217.03076362609863ms total_cost_time:217.09346771240234ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:14042 prompt_cache_len:5151 prompt_cache_ratio:0.3668280871670702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 -DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:31 [batch.py:51] router release req id 8 -INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.1091611385345459 s -INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.11118578910827637 s -DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=229182645959696303596835413221865792740, time:1750768651.693989s req_ids:[8] -DEBUG 06-24 20:37:31 [manager.py:391] -ERROR 06-24 20:37:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 first_token_cost:217.2396183013916ms total_cost_time:217.3013687133789ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14043 prompt_cache_len:5151 prompt_cache_ratio:0.36680196539201027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 -DEBUG 06-24 20:37:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:31 [batch.py:51] router release req id 8 -INFO 06-24 20:37:31 [manager.py:224] router recive req id 8 cost time 0.10823512077331543 s -INFO 06-24 20:37:31 [manager.py:68] detokenization recv req id 8 cost time 0.11018800735473633 s -DEBUG 06-24 20:37:31 [manager.py:391] Prefill Batch: batch_id=167858335139420522594536818592125811707, time:1750768651.917418s req_ids:[8] -DEBUG 06-24 20:37:31 [manager.py:391] -ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:31 lightllm_req_id:8 first_token_cost:211.12561225891113ms total_cost_time:211.17258071899414ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14044 prompt_cache_len:5151 prompt_cache_ratio:0.366775847336941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 -DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:32 [batch.py:51] router release req id 8 -INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.10784339904785156 s -INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11023521423339844 s -DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=103721657734544847581579503931288767098, time:1750768652.136251s req_ids:[8] -DEBUG 06-24 20:37:32 [manager.py:391] -ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:217.1940803527832ms total_cost_time:217.2544002532959ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14045 prompt_cache_len:5151 prompt_cache_ratio:0.366749733001068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 -DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:32 [batch.py:51] router release req id 8 -INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.10909891128540039 s -INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11115145683288574 s -DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=219999877022901334713583608274836572005, time:1750768652.3599672s req_ids:[8] -DEBUG 06-24 20:37:32 [manager.py:391] -ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:216.01319313049316ms total_cost_time:216.07375144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:14046 prompt_cache_len:5151 prompt_cache_ratio:0.36672362238359674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 -DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:32 [batch.py:51] router release req id 8 -INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.1090090274810791 s -INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11092615127563477 s -DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=215174347424892181745493533918749329597, time:1750768652.58338s req_ids:[8] -DEBUG 06-24 20:37:32 [manager.py:391] -ERROR 06-24 20:37:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:390.06495475769043ms total_cost_time:390.12622833251953ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:14047 prompt_cache_len:5151 prompt_cache_ratio:0.3666975154837332 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 -DEBUG 06-24 20:37:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:32 [batch.py:51] router release req id 8 -INFO 06-24 20:37:32 [manager.py:224] router recive req id 8 cost time 0.1081700325012207 s -INFO 06-24 20:37:32 [manager.py:68] detokenization recv req id 8 cost time 0.11018633842468262 s -DEBUG 06-24 20:37:32 [manager.py:391] Prefill Batch: batch_id=224820620053238446065934643661716282261, time:1750768652.9830985s req_ids:[8] -DEBUG 06-24 20:37:32 [manager.py:391] -ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:32 lightllm_req_id:8 first_token_cost:216.10307693481445ms total_cost_time:216.16482734680176ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14048 prompt_cache_len:5151 prompt_cache_ratio:0.3666714123006834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 -DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:33 [batch.py:51] router release req id 8 -INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.1086575984954834 s -INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.11069941520690918 s -DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=299829430647162374295537733541323288139, time:1750768653.204201s req_ids:[8] -DEBUG 06-24 20:37:33 [manager.py:391] -ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:215.07549285888672ms total_cost_time:215.13700485229492ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:14049 prompt_cache_len:5151 prompt_cache_ratio:0.36664531283365365 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 -DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:33 [batch.py:51] router release req id 8 -INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.10890817642211914 s -INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.11088037490844727 s -DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=252984688575898064861044358589249924698, time:1750768653.4267576s req_ids:[8] -DEBUG 06-24 20:37:33 [manager.py:391] -ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:209.5162868499756ms total_cost_time:209.57612991333008ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:14050 prompt_cache_len:5151 prompt_cache_ratio:0.3666192170818505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 -DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:33 [batch.py:51] router release req id 8 -INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.10853290557861328 s -INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.11063408851623535 s -DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=211078981437536084643178642018715812977, time:1750768653.643601s req_ids:[8] -DEBUG 06-24 20:37:33 [manager.py:391] -ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:213.5615348815918ms total_cost_time:213.61923217773438ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:14051 prompt_cache_len:5151 prompt_cache_ratio:0.3665931250444808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 -DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:33 [batch.py:51] router release req id 8 -INFO 06-24 20:37:33 [manager.py:224] router recive req id 8 cost time 0.10774493217468262 s -INFO 06-24 20:37:33 [manager.py:68] detokenization recv req id 8 cost time 0.10901188850402832 s -DEBUG 06-24 20:37:33 [manager.py:391] Prefill Batch: batch_id=151261336880285299082661102908601011142, time:1750768653.8634965s req_ids:[8] -DEBUG 06-24 20:37:33 [manager.py:391] -ERROR 06-24 20:37:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:214.23864364624023ms total_cost_time:214.26010131835938ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:14052 prompt_cache_len:5151 prompt_cache_ratio:0.36656703672075147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 -DEBUG 06-24 20:37:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:33 [batch.py:51] router release req id 8 -INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.10709404945373535 s -INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.10899901390075684 s -DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=144398890077258877005639262792311657641, time:1750768654.0856285s req_ids:[8] -DEBUG 06-24 20:37:34 [manager.py:391] -ERROR 06-24 20:37:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:33 lightllm_req_id:8 first_token_cost:382.34472274780273ms total_cost_time:382.3890686035156ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14053 prompt_cache_len:5151 prompt_cache_ratio:0.36654095210986976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 -DEBUG 06-24 20:37:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:34 [batch.py:51] router release req id 8 -INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.10833501815795898 s -INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.11027979850769043 s -DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=189691696680779490671692199764230633667, time:1750768654.472982s req_ids:[8] -DEBUG 06-24 20:37:34 [manager.py:391] -ERROR 06-24 20:37:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 first_token_cost:213.58370780944824ms total_cost_time:213.62662315368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14054 prompt_cache_len:5151 prompt_cache_ratio:0.3665148712110431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 -DEBUG 06-24 20:37:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:34 [batch.py:51] router release req id 8 -INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.10853743553161621 s -INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.11061787605285645 s -DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=242369822099912875063963204839230027108, time:1750768654.693744s req_ids:[8] -DEBUG 06-24 20:37:34 [manager.py:391] -ERROR 06-24 20:37:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 first_token_cost:212.53228187561035ms total_cost_time:212.57638931274414ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14055 prompt_cache_len:5151 prompt_cache_ratio:0.36648879402347917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 -DEBUG 06-24 20:37:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:34 [batch.py:51] router release req id 8 -INFO 06-24 20:37:34 [manager.py:224] router recive req id 8 cost time 0.1078031063079834 s -INFO 06-24 20:37:34 [manager.py:68] detokenization recv req id 8 cost time 0.10974359512329102 s -DEBUG 06-24 20:37:34 [manager.py:391] Prefill Batch: batch_id=328967724320692869345656187846599931999, time:1750768654.9132788s req_ids:[8] -DEBUG 06-24 20:37:34 [manager.py:391] -ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:34 lightllm_req_id:8 first_token_cost:209.37848091125488ms total_cost_time:209.42282676696777ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14056 prompt_cache_len:5151 prompt_cache_ratio:0.3664627205463859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 -DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:35 [batch.py:51] router release req id 8 -INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.1088411808013916 s -INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.11035561561584473 s -DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=283473526727648433820772444719559442807, time:1750768655.1295986s req_ids:[8] -DEBUG 06-24 20:37:35 [manager.py:391] -ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:210.61420440673828ms total_cost_time:210.65926551818848ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14057 prompt_cache_len:5151 prompt_cache_ratio:0.3664366507789713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 -DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:35 [batch.py:51] router release req id 8 -INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.10881328582763672 s -INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.11084675788879395 s -DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=113446702042286435123236231751379005422, time:1750768655.3452084s req_ids:[8] -DEBUG 06-24 20:37:35 [manager.py:391] -ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:213.75083923339844ms total_cost_time:213.79375457763672ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14058 prompt_cache_len:5151 prompt_cache_ratio:0.36641058472044385 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 -DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:35 [batch.py:51] router release req id 8 -INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.1091616153717041 s -INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.1111752986907959 s -DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=102445438939738045243492104913284013533, time:1750768655.5668592s req_ids:[8] -DEBUG 06-24 20:37:35 [manager.py:391] -ERROR 06-24 20:37:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:373.8517761230469ms total_cost_time:373.89612197875977ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14059 prompt_cache_len:5151 prompt_cache_ratio:0.36638452237001207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 -DEBUG 06-24 20:37:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:35 [batch.py:51] router release req id 8 -INFO 06-24 20:37:35 [manager.py:224] router recive req id 8 cost time 0.10816597938537598 s -INFO 06-24 20:37:35 [manager.py:68] detokenization recv req id 8 cost time 0.10990715026855469 s -DEBUG 06-24 20:37:35 [manager.py:391] Prefill Batch: batch_id=106982580544501394707297620072146017418, time:1750768655.9459054s req_ids:[8] -DEBUG 06-24 20:37:35 [manager.py:391] -ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:35 lightllm_req_id:8 first_token_cost:214.3685817718506ms total_cost_time:214.41245079040527ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14060 prompt_cache_len:5151 prompt_cache_ratio:0.3663584637268848 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 -DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:36 [batch.py:51] router release req id 8 -INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10759758949279785 s -INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.1095268726348877 s -DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=218878316548341065638295672978092747566, time:1750768656.1767836s req_ids:[8] -DEBUG 06-24 20:37:36 [manager.py:391] -ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:225.73018074035645ms total_cost_time:225.77404975891113ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14061 prompt_cache_len:5151 prompt_cache_ratio:0.36633240879027096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 -DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:36 [batch.py:51] router release req id 8 -INFO 06-24 20:37:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10885143280029297 s -INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.11079835891723633 s -DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=330216702452067760132481773068916742131, time:1750768656.4000735s req_ids:[8] -DEBUG 06-24 20:37:36 [manager.py:391] -ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:213.00888061523438ms total_cost_time:213.05370330810547ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14062 prompt_cache_len:5151 prompt_cache_ratio:0.3663063575593799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 -DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:36 [batch.py:51] router release req id 8 -INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10791444778442383 s -INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.10991287231445312 s -DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=136219401290633969462016432329402592441, time:1750768656.6195328s req_ids:[8] -DEBUG 06-24 20:37:36 [manager.py:391] -ERROR 06-24 20:37:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:216.0501480102539ms total_cost_time:216.0937786102295ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14063 prompt_cache_len:5151 prompt_cache_ratio:0.36628031003342104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 -DEBUG 06-24 20:37:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:36 [batch.py:51] router release req id 8 -INFO 06-24 20:37:36 [manager.py:224] router recive req id 8 cost time 0.10870742797851562 s -INFO 06-24 20:37:36 [manager.py:68] detokenization recv req id 8 cost time 0.11060190200805664 s -DEBUG 06-24 20:37:36 [manager.py:391] Prefill Batch: batch_id=2998648059949904495729050569848859166, time:1750768656.8408601s req_ids:[8] -DEBUG 06-24 20:37:36 [manager.py:391] -ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:36 lightllm_req_id:8 first_token_cost:379.6370029449463ms total_cost_time:379.6834945678711ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14064 prompt_cache_len:5151 prompt_cache_ratio:0.3662542662116041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 -DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:37 [batch.py:51] router release req id 8 -INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.10842728614807129 s -INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.11036872863769531 s -DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=141917982380791955525039580240268778492, time:1750768657.2277296s req_ids:[8] -DEBUG 06-24 20:37:37 [manager.py:391] -ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:211.87448501586914ms total_cost_time:211.91930770874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14065 prompt_cache_len:5151 prompt_cache_ratio:0.366228226093139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 -DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:37 [batch.py:51] router release req id 8 -INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.11095833778381348 s -INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.11294221878051758 s -DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=182644050427468272389430063389032866885, time:1750768657.4472916s req_ids:[8] -DEBUG 06-24 20:37:37 [manager.py:391] -DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:213.5324478149414ms total_cost_time:213.5758399963379ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14066 prompt_cache_len:5151 prompt_cache_ratio:0.3662021896772359 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 -DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:37 [batch.py:51] router release req id 8 -INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.10748672485351562 s -INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.10900735855102539 s -DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=16817279111529589239685364579141613467, time:1750768657.6667724s req_ids:[8] -DEBUG 06-24 20:37:37 [manager.py:391] -ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:216.1417007446289ms total_cost_time:216.1862850189209ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14067 prompt_cache_len:5151 prompt_cache_ratio:0.3661761569631051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 -DEBUG 06-24 20:37:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:37 [batch.py:51] router release req id 8 -INFO 06-24 20:37:37 [manager.py:224] router recive req id 8 cost time 0.10890316963195801 s -INFO 06-24 20:37:37 [manager.py:68] detokenization recv req id 8 cost time 0.11095261573791504 s -DEBUG 06-24 20:37:37 [manager.py:391] Prefill Batch: batch_id=149614486773128525436912602895726722827, time:1750768657.889134s req_ids:[8] -DEBUG 06-24 20:37:37 [manager.py:391] -ERROR 06-24 20:37:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:213.84382247924805ms total_cost_time:213.88959884643555ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14068 prompt_cache_len:5151 prompt_cache_ratio:0.3661501279499573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 -DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:38 [batch.py:51] router release req id 8 -INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10725092887878418 s -INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.10913681983947754 s -DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=250392279850003829453981459188382351771, time:1750768658.1099925s req_ids:[8] -DEBUG 06-24 20:37:38 [manager.py:391] -ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:37 lightllm_req_id:8 first_token_cost:169.39735412597656ms total_cost_time:169.44050788879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14069 prompt_cache_len:5151 prompt_cache_ratio:0.36612410263700335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 -DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:38 [batch.py:51] router release req id 8 -INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10725688934326172 s -INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.1091775894165039 s -DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=244366808824328074400750273891712716274, time:1750768658.288065s req_ids:[8] -DEBUG 06-24 20:37:38 [manager.py:391] -ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 first_token_cost:378.9544105529785ms total_cost_time:378.9987564086914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14070 prompt_cache_len:5151 prompt_cache_ratio:0.36609808102345415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 -DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:38 [batch.py:51] router release req id 8 -INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10813760757446289 s -INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102907657623291 s -DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=113845917801791225192625135750618769570, time:1750768658.6735034s req_ids:[8] -DEBUG 06-24 20:37:38 [manager.py:391] -ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 first_token_cost:214.98823165893555ms total_cost_time:215.03305435180664ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14071 prompt_cache_len:5151 prompt_cache_ratio:0.3660720631085211 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 -DEBUG 06-24 20:37:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:38 [batch.py:51] router release req id 8 -INFO 06-24 20:37:38 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s -INFO 06-24 20:37:38 [manager.py:68] detokenization recv req id 8 cost time 0.11012816429138184 s -DEBUG 06-24 20:37:38 [manager.py:391] Prefill Batch: batch_id=305497704499796240582864423919009478273, time:1750768658.8953543s req_ids:[8] -DEBUG 06-24 20:37:38 [manager.py:391] -ERROR 06-24 20:37:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:38 lightllm_req_id:8 first_token_cost:212.97550201416016ms total_cost_time:213.02199363708496ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14072 prompt_cache_len:5151 prompt_cache_ratio:0.36604604889141557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 -DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:39 [batch.py:51] router release req id 8 -DEBUG 06-24 20:37:39 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:39 [manager.py:283] -DEBUG 06-24 20:37:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:39 [manager.py:284] -INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.10768890380859375 s -INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.1096353530883789 s -DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=162882017774484579468974464812263708926, time:1750768659.1150804s req_ids:[8] -DEBUG 06-24 20:37:39 [manager.py:391] -ERROR 06-24 20:37:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:214.5857810974121ms total_cost_time:214.6306037902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14073 prompt_cache_len:5151 prompt_cache_ratio:0.3660200383713494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 -DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:39 [batch.py:51] router release req id 8 -INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.1078329086303711 s -INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.10983800888061523 s -DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=68376915377731259713886549651838568804, time:1750768659.3354313s req_ids:[8] -DEBUG 06-24 20:37:39 [manager.py:391] -DEBUG 06-24 20:37:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 56086.355 tokens/s -DEBUG 06-24 20:37:39 [stats.py:37] Avg prompt tokens throughput: 56078.374 tokens/s -DEBUG 06-24 20:37:39 [stats.py:37] Avg generate tokens throughput: 7.980 tokens/s -ERROR 06-24 20:37:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:212.63837814331055ms total_cost_time:212.68296241760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14074 prompt_cache_len:5151 prompt_cache_ratio:0.36599403154753446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 -DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:39 [batch.py:51] router release req id 8 -INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.10843634605407715 s -INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.1103980541229248 s -DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=229911249074997693788055690420662283035, time:1750768659.556083s req_ids:[8] -DEBUG 06-24 20:37:39 [manager.py:391] -ERROR 06-24 20:37:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:209.94162559509277ms total_cost_time:209.98668670654297ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14075 prompt_cache_len:5151 prompt_cache_ratio:0.36596802841918297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 -DEBUG 06-24 20:37:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:39 [batch.py:51] router release req id 8 -INFO 06-24 20:37:39 [manager.py:224] router recive req id 8 cost time 0.10949039459228516 s -INFO 06-24 20:37:39 [manager.py:68] detokenization recv req id 8 cost time 0.11149263381958008 s -DEBUG 06-24 20:37:39 [manager.py:391] Prefill Batch: batch_id=113119797095664852152736398037907575897, time:1750768659.7720912s req_ids:[8] -DEBUG 06-24 20:37:39 [manager.py:391] -ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:39 lightllm_req_id:8 first_token_cost:392.55738258361816ms total_cost_time:392.60125160217285ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14076 prompt_cache_len:5151 prompt_cache_ratio:0.36594202898550726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 -DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:40 [batch.py:51] router release req id 8 -INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.1089942455291748 s -INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.11096954345703125 s -DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=84227695559844687687337978093228077122, time:1750768660.1708016s req_ids:[8] -DEBUG 06-24 20:37:40 [manager.py:391] -ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:209.17534828186035ms total_cost_time:209.21850204467773ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14077 prompt_cache_len:5151 prompt_cache_ratio:0.36591603324572 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 -DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:40 [batch.py:51] router release req id 8 -INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.10776257514953613 s -INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.10989236831665039 s -DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=131552831575339104317559156929430238427, time:1750768660.3881104s req_ids:[8] -DEBUG 06-24 20:37:40 [manager.py:391] -ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:216.4437770843506ms total_cost_time:216.48883819580078ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14078 prompt_cache_len:5151 prompt_cache_ratio:0.36589004119903396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 -DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:40 [batch.py:51] router release req id 8 -INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.10836124420166016 s -INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.1098942756652832 s -DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=189420492797132345394187389989081752103, time:1750768660.6211793s req_ids:[8] -DEBUG 06-24 20:37:40 [manager.py:391] -ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:227.51235961914062ms total_cost_time:227.55742073059082ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14079 prompt_cache_len:5151 prompt_cache_ratio:0.3658640528446623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 -DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:40 [batch.py:51] router release req id 8 -INFO 06-24 20:37:40 [manager.py:224] router recive req id 8 cost time 0.10932660102844238 s -INFO 06-24 20:37:40 [manager.py:68] detokenization recv req id 8 cost time 0.11145472526550293 s -DEBUG 06-24 20:37:40 [manager.py:391] Prefill Batch: batch_id=59282430695376911964763600173467593028, time:1750768660.845334s req_ids:[8] -DEBUG 06-24 20:37:40 [manager.py:391] -ERROR 06-24 20:37:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:215.35348892211914ms total_cost_time:215.39688110351562ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14080 prompt_cache_len:5151 prompt_cache_ratio:0.3658380681818182 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 -DEBUG 06-24 20:37:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:40 [batch.py:51] router release req id 8 -INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.10935068130493164 s -INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11067461967468262 s -DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=124814697109664966724194642157042970269, time:1750768661.067379s req_ids:[8] -DEBUG 06-24 20:37:41 [manager.py:391] -ERROR 06-24 20:37:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:40 lightllm_req_id:8 first_token_cost:212.97883987426758ms total_cost_time:213.02485466003418ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14081 prompt_cache_len:5151 prompt_cache_ratio:0.3658120872097152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 -DEBUG 06-24 20:37:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:41 [batch.py:51] router release req id 8 -INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.1087944507598877 s -INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11073994636535645 s -DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=337760848104722275786988979788191222777, time:1750768661.3002846s req_ids:[8] -DEBUG 06-24 20:37:41 [manager.py:391] -ERROR 06-24 20:37:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 first_token_cost:398.62847328186035ms total_cost_time:398.67210388183594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14082 prompt_cache_len:5151 prompt_cache_ratio:0.3657861099275671 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 -DEBUG 06-24 20:37:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:41 [batch.py:51] router release req id 8 -INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.10864114761352539 s -INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11074042320251465 s -DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=185418065051604430992802691647712516531, time:1750768661.692656s req_ids:[8] -DEBUG 06-24 20:37:41 [manager.py:391] -ERROR 06-24 20:37:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 first_token_cost:213.44280242919922ms total_cost_time:213.4873867034912ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14083 prompt_cache_len:5151 prompt_cache_ratio:0.3657601363345878 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 -DEBUG 06-24 20:37:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:41 [batch.py:51] router release req id 8 -INFO 06-24 20:37:41 [manager.py:224] router recive req id 8 cost time 0.10873532295227051 s -INFO 06-24 20:37:41 [manager.py:68] detokenization recv req id 8 cost time 0.11081480979919434 s -DEBUG 06-24 20:37:41 [manager.py:391] Prefill Batch: batch_id=203452414626859599597906036884031210168, time:1750768661.9124374s req_ids:[8] -DEBUG 06-24 20:37:41 [manager.py:391] -ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:41 lightllm_req_id:8 first_token_cost:215.240478515625ms total_cost_time:215.285062789917ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14084 prompt_cache_len:5151 prompt_cache_ratio:0.36573416642999146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 -DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:42 [batch.py:51] router release req id 8 -INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.10890555381774902 s -INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.11100363731384277 s -DEBUG 06-24 20:37:42 [manager.py:391] Prefill Batch: batch_id=246796842294271271208522994024872184536, time:1750768662.1342194s req_ids:[8] -DEBUG 06-24 20:37:42 [manager.py:391] -ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:216.66884422302246ms total_cost_time:216.71080589294434ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14085 prompt_cache_len:5151 prompt_cache_ratio:0.36570820021299255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 -DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:42 [batch.py:51] router release req id 8 -INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.10833859443664551 s -INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.11036014556884766 s -DEBUG 06-24 20:37:42 [manager.py:391] Prefill Batch: batch_id=333429011087523918707569960256852928271, time:1750768662.3568814s req_ids:[8] -DEBUG 06-24 20:37:42 [manager.py:391] -ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:213.64736557006836ms total_cost_time:213.69123458862305ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14086 prompt_cache_len:5151 prompt_cache_ratio:0.3656822376828056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 -DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:42 [batch.py:51] router release req id 8 -INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.10752439498901367 s -INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.10948514938354492 s -DEBUG 06-24 20:37:42 [manager.py:391] Prefill Batch: batch_id=247033001600489871964784834586265937817, time:1750768662.5796947s req_ids:[8] -DEBUG 06-24 20:37:42 [manager.py:391] -ERROR 06-24 20:37:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:212.9817008972168ms total_cost_time:213.02390098571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14087 prompt_cache_len:5151 prompt_cache_ratio:0.36565627883864554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 -DEBUG 06-24 20:37:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:42 [batch.py:51] router release req id 8 -INFO 06-24 20:37:42 [manager.py:224] router recive req id 8 cost time 0.309084415435791 s -INFO 06-24 20:37:42 [manager.py:68] detokenization recv req id 8 cost time 0.3113124370574951 s -DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=74811614427725936055679548229522603947, time:1750768663.0080094s req_ids:[8] -DEBUG 06-24 20:37:43 [manager.py:391] -ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:42 lightllm_req_id:8 first_token_cost:429.5165538787842ms total_cost_time:429.5620918273926ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14088 prompt_cache_len:5151 prompt_cache_ratio:0.3656303236797274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 -DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:43 [batch.py:51] router release req id 8 -INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10871028900146484 s -INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.11075711250305176 s -DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=54192288800080613278339558260024413250, time:1750768663.2353637s req_ids:[8] -DEBUG 06-24 20:37:43 [manager.py:391] -ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:216.35937690734863ms total_cost_time:216.40348434448242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14089 prompt_cache_len:5151 prompt_cache_ratio:0.36560437220526654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 -DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:43 [batch.py:51] router release req id 8 -INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10855603218078613 s -INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.1105966567993164 s -DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=252810558220714260044052843348628905605, time:1750768663.458472s req_ids:[8] -DEBUG 06-24 20:37:43 [manager.py:391] -ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:215.8493995666504ms total_cost_time:215.89183807373047ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14090 prompt_cache_len:5151 prompt_cache_ratio:0.36557842441447835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 -DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:43 [batch.py:51] router release req id 8 -INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10908365249633789 s -INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.11123323440551758 s -DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=248856373528264998929590753204885788461, time:1750768663.6908002s req_ids:[8] -DEBUG 06-24 20:37:43 [manager.py:391] -ERROR 06-24 20:37:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:225.22521018981934ms total_cost_time:225.26884078979492ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14091 prompt_cache_len:5151 prompt_cache_ratio:0.36555248030657866 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 -DEBUG 06-24 20:37:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:43 [batch.py:51] router release req id 8 -INFO 06-24 20:37:43 [manager.py:224] router recive req id 8 cost time 0.10860705375671387 s -INFO 06-24 20:37:43 [manager.py:68] detokenization recv req id 8 cost time 0.11064815521240234 s -DEBUG 06-24 20:37:43 [manager.py:391] Prefill Batch: batch_id=143194941673592438793976024860222384492, time:1750768663.9129052s req_ids:[8] -DEBUG 06-24 20:37:43 [manager.py:391] -ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:43 lightllm_req_id:8 first_token_cost:213.33670616149902ms total_cost_time:213.3636474609375ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:14092 prompt_cache_len:5151 prompt_cache_ratio:0.36552653988078343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 -DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:44 [batch.py:51] router release req id 8 -INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10781073570251465 s -INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.10995173454284668 s -DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=242211672724916949323982728451840504427, time:1750768664.1334672s req_ids:[8] -DEBUG 06-24 20:37:44 [manager.py:391] -ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:208.11152458190918ms total_cost_time:208.15467834472656ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14093 prompt_cache_len:5151 prompt_cache_ratio:0.36550060313630883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 -DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:44 [batch.py:51] router release req id 8 -INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10888028144836426 s -INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.11094880104064941 s -DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=203816478002049820646010088485853959594, time:1750768664.349486s req_ids:[8] -DEBUG 06-24 20:37:44 [manager.py:391] -ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:383.2268714904785ms total_cost_time:383.2731246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14094 prompt_cache_len:5151 prompt_cache_ratio:0.3654746700723712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 -DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:44 [batch.py:51] router release req id 8 -INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10781192779541016 s -INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.10979199409484863 s -DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=274512080196179113935073283673627139126, time:1750768664.7390273s req_ids:[8] -DEBUG 06-24 20:37:44 [manager.py:391] -ERROR 06-24 20:37:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:216.55750274658203ms total_cost_time:216.60113334655762ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14095 prompt_cache_len:5151 prompt_cache_ratio:0.3654487406881873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 -DEBUG 06-24 20:37:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:44 [batch.py:51] router release req id 8 -INFO 06-24 20:37:44 [manager.py:224] router recive req id 8 cost time 0.10825824737548828 s -INFO 06-24 20:37:44 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s -DEBUG 06-24 20:37:44 [manager.py:391] Prefill Batch: batch_id=47039910278596978717417310695713582597, time:1750768664.961557s req_ids:[8] -DEBUG 06-24 20:37:44 [manager.py:391] -ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:44 lightllm_req_id:8 first_token_cost:210.9377384185791ms total_cost_time:210.9827995300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14096 prompt_cache_len:5151 prompt_cache_ratio:0.3654228149829739 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 -DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:45 [batch.py:51] router release req id 8 -INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.10905861854553223 s -INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.11103057861328125 s -DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=53111157697098343827086054986034001277, time:1750768665.1790483s req_ids:[8] -DEBUG 06-24 20:37:45 [manager.py:391] -DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:207.09538459777832ms total_cost_time:207.14092254638672ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14097 prompt_cache_len:5151 prompt_cache_ratio:0.36539689295594807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 -DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:45 [batch.py:51] router release req id 8 -INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.10804438591003418 s -INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.10985803604125977 s -DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=139901179562605658579253878345892966092, time:1750768665.3925562s req_ids:[8] -DEBUG 06-24 20:37:45 [manager.py:391] -ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:209.33103561401367ms total_cost_time:209.37514305114746ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14098 prompt_cache_len:5151 prompt_cache_ratio:0.3653709746063271 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 -DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:45 [batch.py:51] router release req id 8 -INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.11006402969360352 s -INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.11215567588806152 s -DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=80359381600072279686203175309783508422, time:1750768665.6079237s req_ids:[8] -DEBUG 06-24 20:37:45 [manager.py:391] -ERROR 06-24 20:37:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:208.90021324157715ms total_cost_time:208.94336700439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14099 prompt_cache_len:5151 prompt_cache_ratio:0.3653450599333286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 -DEBUG 06-24 20:37:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:45 [batch.py:51] router release req id 8 -INFO 06-24 20:37:45 [manager.py:224] router recive req id 8 cost time 0.10808706283569336 s -INFO 06-24 20:37:45 [manager.py:68] detokenization recv req id 8 cost time 0.10976219177246094 s -DEBUG 06-24 20:37:45 [manager.py:391] Prefill Batch: batch_id=230726844957747816816986112734416442735, time:1750768665.8232913s req_ids:[8] -DEBUG 06-24 20:37:45 [manager.py:391] -ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:45 lightllm_req_id:8 first_token_cost:373.60215187072754ms total_cost_time:373.645544052124ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14100 prompt_cache_len:5151 prompt_cache_ratio:0.36531914893617023 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 -DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:46 [batch.py:51] router release req id 8 -INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10828399658203125 s -INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.11005330085754395 s -DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=287769941603116957090667671183508706576, time:1750768666.2055066s req_ids:[8] -DEBUG 06-24 20:37:46 [manager.py:391] -ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:204.04505729675293ms total_cost_time:204.09154891967773ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14101 prompt_cache_len:5151 prompt_cache_ratio:0.3652932416140699 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 -DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:46 [batch.py:51] router release req id 8 -INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10772943496704102 s -INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.1095435619354248 s -DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=229381722940246876488527189699258179691, time:1750768666.4151423s req_ids:[8] -DEBUG 06-24 20:37:46 [manager.py:391] -ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:212.16654777526855ms total_cost_time:212.21065521240234ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14102 prompt_cache_len:5151 prompt_cache_ratio:0.36526733796624594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 -DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:46 [batch.py:51] router release req id 8 -INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10854387283325195 s -INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.11043047904968262 s -DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=223863396912680091456873463470667158242, time:1750768666.6343105s req_ids:[8] -DEBUG 06-24 20:37:46 [manager.py:391] -ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:214.22672271728516ms total_cost_time:214.27106857299805ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14103 prompt_cache_len:5151 prompt_cache_ratio:0.3652414379919166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 -DEBUG 06-24 20:37:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:46 [batch.py:51] router release req id 8 -INFO 06-24 20:37:46 [manager.py:224] router recive req id 8 cost time 0.10800862312316895 s -INFO 06-24 20:37:46 [manager.py:68] detokenization recv req id 8 cost time 0.10988759994506836 s -DEBUG 06-24 20:37:46 [manager.py:391] Prefill Batch: batch_id=117283384639664790999510404570282636648, time:1750768666.854794s req_ids:[8] -DEBUG 06-24 20:37:46 [manager.py:391] -ERROR 06-24 20:37:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:216.64118766784668ms total_cost_time:216.68386459350586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14104 prompt_cache_len:5151 prompt_cache_ratio:0.3652155416903006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 -DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:47 [batch.py:51] router release req id 8 -INFO 06-24 20:37:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10886573791503906 s -INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.11078763008117676 s -DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=130178343013043275426722332064575395975, time:1750768667.0905154s req_ids:[8] -DEBUG 06-24 20:37:47 [manager.py:391] -INFO 06-24 20:37:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:46 lightllm_req_id:8 first_token_cost:230.849027633667ms total_cost_time:230.8952808380127ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14105 prompt_cache_len:5151 prompt_cache_ratio:0.3651896490606168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 -DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:47 [batch.py:51] router release req id 8 -INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10493016242980957 s -INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.10600852966308594 s -DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=277470874748853763773436651372930561181, time:1750768667.3169117s req_ids:[8] -DEBUG 06-24 20:37:47 [manager.py:391] -ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:353.34086418151855ms total_cost_time:353.38521003723145ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14106 prompt_cache_len:5151 prompt_cache_ratio:0.36516376010208423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 -DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:47 [batch.py:51] router release req id 8 -INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10999488830566406 s -INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.11194300651550293 s -DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=136567641727363853584757833867075372296, time:1750768667.6757884s req_ids:[8] -DEBUG 06-24 20:37:47 [manager.py:391] -ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:211.13324165344238ms total_cost_time:211.17639541625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14107 prompt_cache_len:5151 prompt_cache_ratio:0.3651378748139222 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 -DEBUG 06-24 20:37:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:47 [batch.py:51] router release req id 8 -INFO 06-24 20:37:47 [manager.py:224] router recive req id 8 cost time 0.10807943344116211 s -INFO 06-24 20:37:47 [manager.py:68] detokenization recv req id 8 cost time 0.11001920700073242 s -DEBUG 06-24 20:37:47 [manager.py:391] Prefill Batch: batch_id=308179057211638020981496232372326875999, time:1750768667.8930788s req_ids:[8] -DEBUG 06-24 20:37:47 [manager.py:391] -ERROR 06-24 20:37:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:209.52415466308594ms total_cost_time:209.5663547515869ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14108 prompt_cache_len:5151 prompt_cache_ratio:0.36511199319535015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 -DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:48 [batch.py:51] router release req id 8 -INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10911083221435547 s -INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.11103320121765137 s -DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=339295981134826438782733359420963148046, time:1750768668.109251s req_ids:[8] -DEBUG 06-24 20:37:48 [manager.py:391] -ERROR 06-24 20:37:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:47 lightllm_req_id:8 first_token_cost:214.43724632263184ms total_cost_time:214.48206901550293ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14109 prompt_cache_len:5151 prompt_cache_ratio:0.3650861152455879 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 -DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:48 [batch.py:51] router release req id 8 -INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10597705841064453 s -INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.10781741142272949 s -DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=292442417183616886443133011671262261823, time:1750768668.3297327s req_ids:[8] -DEBUG 06-24 20:37:48 [manager.py:391] -ERROR 06-24 20:37:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 first_token_cost:208.33373069763184ms total_cost_time:208.37736129760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14110 prompt_cache_len:5151 prompt_cache_ratio:0.3650602409638554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 -DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:48 [batch.py:51] router release req id 8 -INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10862207412719727 s -INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.11049151420593262 s -DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=79735219659994314233715964630787200208, time:1750768668.5456984s req_ids:[8] -DEBUG 06-24 20:37:48 [manager.py:391] -ERROR 06-24 20:37:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 first_token_cost:373.6429214477539ms total_cost_time:373.6860752105713ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14111 prompt_cache_len:5151 prompt_cache_ratio:0.3650343703493728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 -DEBUG 06-24 20:37:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:48 [batch.py:51] router release req id 8 -INFO 06-24 20:37:48 [manager.py:224] router recive req id 8 cost time 0.10814213752746582 s -INFO 06-24 20:37:48 [manager.py:68] detokenization recv req id 8 cost time 0.11006379127502441 s -DEBUG 06-24 20:37:48 [manager.py:391] Prefill Batch: batch_id=240213431420703554677367038724518438057, time:1750768668.9255707s req_ids:[8] -DEBUG 06-24 20:37:48 [manager.py:391] -ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:48 lightllm_req_id:8 first_token_cost:214.53428268432617ms total_cost_time:214.57815170288086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14112 prompt_cache_len:5151 prompt_cache_ratio:0.36500850340136054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 -DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:49 [batch.py:51] router release req id 8 -INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.10921573638916016 s -INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.11102104187011719 s -DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=238398836821138050728425348935243620588, time:1750768669.1471784s req_ids:[8] -DEBUG 06-24 20:37:49 [manager.py:391] -ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:215.55304527282715ms total_cost_time:215.59739112854004ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14113 prompt_cache_len:5151 prompt_cache_ratio:0.3649826401190392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 -DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:49 [batch.py:51] router release req id 8 -INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.10865092277526855 s -INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.10987401008605957 s -DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=262862227789800032836750519787356309382, time:1750768669.3695567s req_ids:[8] -DEBUG 06-24 20:37:49 [manager.py:391] -DEBUG 06-24 20:37:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 56194.656 tokens/s -DEBUG 06-24 20:37:49 [stats.py:37] Avg prompt tokens throughput: 56186.683 tokens/s -DEBUG 06-24 20:37:49 [stats.py:37] Avg generate tokens throughput: 7.973 tokens/s -ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:216.19296073913574ms total_cost_time:216.23682975769043ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14114 prompt_cache_len:5151 prompt_cache_ratio:0.3649567805016296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 -DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:49 [batch.py:51] router release req id 8 -INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.10804533958435059 s -INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.11006283760070801 s -DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=94147963942124974241697432601992209333, time:1750768669.5920784s req_ids:[8] -DEBUG 06-24 20:37:49 [manager.py:391] -ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:215.67726135253906ms total_cost_time:215.7301902770996ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:14115 prompt_cache_len:5151 prompt_cache_ratio:0.3649309245483528 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 -DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:49 [batch.py:51] router release req id 8 -INFO 06-24 20:37:49 [manager.py:224] router recive req id 8 cost time 0.11000943183898926 s -INFO 06-24 20:37:49 [manager.py:68] detokenization recv req id 8 cost time 0.1119680404663086 s -DEBUG 06-24 20:37:49 [manager.py:391] Prefill Batch: batch_id=100797310594177193735169815687532466371, time:1750768669.8144739s req_ids:[8] -DEBUG 06-24 20:37:49 [manager.py:391] -ERROR 06-24 20:37:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:210.8018398284912ms total_cost_time:210.8445167541504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14116 prompt_cache_len:5151 prompt_cache_ratio:0.36490507225843016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 -DEBUG 06-24 20:37:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:49 [batch.py:51] router release req id 8 -INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.1082761287689209 s -INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.1100151538848877 s -DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=315316934739730447191890753273953614989, time:1750768670.032156s req_ids:[8] -DEBUG 06-24 20:37:50 [manager.py:391] -ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:49 lightllm_req_id:8 first_token_cost:391.50142669677734ms total_cost_time:391.54529571533203ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14117 prompt_cache_len:5151 prompt_cache_ratio:0.3648792236310831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 -DEBUG 06-24 20:37:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:50 [batch.py:51] router release req id 8 -INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.1084737777709961 s -INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.1102294921875 s -DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=295762711712756298798726415581105331406, time:1750768670.4293618s req_ids:[8] -DEBUG 06-24 20:37:50 [manager.py:391] -ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:215.26813507080078ms total_cost_time:215.31057357788086ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14118 prompt_cache_len:5151 prompt_cache_ratio:0.36485337866553336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 -DEBUG 06-24 20:37:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:50 [batch.py:51] router release req id 8 -INFO 06-24 20:37:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.10895109176635742 s -INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.11087965965270996 s -DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=82564975426603147322057818733187811786, time:1750768670.654606s req_ids:[8] -DEBUG 06-24 20:37:50 [manager.py:391] -ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:215.01445770263672ms total_cost_time:215.0564193725586ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14119 prompt_cache_len:5151 prompt_cache_ratio:0.36482753736100293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 -DEBUG 06-24 20:37:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:50 [batch.py:51] router release req id 8 -INFO 06-24 20:37:50 [manager.py:224] router recive req id 8 cost time 0.10777139663696289 s -INFO 06-24 20:37:50 [manager.py:68] detokenization recv req id 8 cost time 0.10972714424133301 s -DEBUG 06-24 20:37:50 [manager.py:391] Prefill Batch: batch_id=253013045506979420066627614032379650035, time:1750768670.8741245s req_ids:[8] -DEBUG 06-24 20:37:50 [manager.py:391] -ERROR 06-24 20:37:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:211.72380447387695ms total_cost_time:211.76719665527344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14120 prompt_cache_len:5151 prompt_cache_ratio:0.3648016997167139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 -DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:51 [batch.py:51] router release req id 8 -INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10767221450805664 s -INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.10954809188842773 s -DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=65528844113650527310209422258492233131, time:1750768671.091431s req_ids:[8] -DEBUG 06-24 20:37:51 [manager.py:391] -ERROR 06-24 20:37:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:50 lightllm_req_id:8 first_token_cost:213.63568305969238ms total_cost_time:213.67955207824707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14121 prompt_cache_len:5151 prompt_cache_ratio:0.3647758657318887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 -DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:51 [batch.py:51] router release req id 8 -INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10762619972229004 s -INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.10953760147094727 s -DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=320410845756723611926195434998457428380, time:1750768671.315157s req_ids:[8] -DEBUG 06-24 20:37:51 [manager.py:391] -DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:37:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:37:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 first_token_cost:210.32118797302246ms total_cost_time:210.36696434020996ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14122 prompt_cache_len:5151 prompt_cache_ratio:0.3647500354057499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 -DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:51 [batch.py:51] router release req id 8 -INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10852718353271484 s -INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.11044692993164062 s -DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=166308035229882004696403801283784553590, time:1750768671.530374s req_ids:[8] -DEBUG 06-24 20:37:51 [manager.py:391] -ERROR 06-24 20:37:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 first_token_cost:388.4134292602539ms total_cost_time:388.4589672088623ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14123 prompt_cache_len:5151 prompt_cache_ratio:0.3647242087375204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 -DEBUG 06-24 20:37:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:51 [batch.py:51] router release req id 8 -INFO 06-24 20:37:51 [manager.py:224] router recive req id 8 cost time 0.10909724235534668 s -INFO 06-24 20:37:51 [manager.py:68] detokenization recv req id 8 cost time 0.1110687255859375 s -DEBUG 06-24 20:37:51 [manager.py:391] Prefill Batch: batch_id=295544988925749502539793312492208181003, time:1750768671.9265978s req_ids:[8] -DEBUG 06-24 20:37:51 [manager.py:391] -ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:51 lightllm_req_id:8 first_token_cost:218.82200241088867ms total_cost_time:218.86610984802246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14124 prompt_cache_len:5151 prompt_cache_ratio:0.3646983857264231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 -DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:52 [batch.py:51] router release req id 8 -INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.10883641242980957 s -INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.1108245849609375 s -DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=229633522933585892364510757270102188696, time:1750768672.1537292s req_ids:[8] -DEBUG 06-24 20:37:52 [manager.py:391] -ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:214.04194831848145ms total_cost_time:214.10226821899414ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:14125 prompt_cache_len:5151 prompt_cache_ratio:0.36467256637168144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 -DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:52 [batch.py:51] router release req id 8 -INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.10899877548217773 s -INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.11101031303405762 s -DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=234953296761910322560994823450977767339, time:1750768672.3789554s req_ids:[8] -DEBUG 06-24 20:37:52 [manager.py:391] -ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:215.36636352539062ms total_cost_time:215.4078483581543ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14126 prompt_cache_len:5151 prompt_cache_ratio:0.3646467506725188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 -DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:52 [batch.py:51] router release req id 8 -INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.10744690895080566 s -INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.10918998718261719 s -DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=204274676720215189689253322568797504445, time:1750768672.5984855s req_ids:[8] -DEBUG 06-24 20:37:52 [manager.py:391] -ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:172.8661060333252ms total_cost_time:172.90830612182617ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14127 prompt_cache_len:5151 prompt_cache_ratio:0.36462093862815886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 -DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:52 [batch.py:51] router release req id 8 -INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.1073756217956543 s -INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.10922813415527344 s -DEBUG 06-24 20:37:52 [manager.py:391] Prefill Batch: batch_id=323899998843028512805501764946876016457, time:1750768672.781292s req_ids:[8] -DEBUG 06-24 20:37:52 [manager.py:391] -ERROR 06-24 20:37:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:214.2770290374756ms total_cost_time:214.32232856750488ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14128 prompt_cache_len:5151 prompt_cache_ratio:0.3645951302378256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 -DEBUG 06-24 20:37:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:52 [batch.py:51] router release req id 8 -INFO 06-24 20:37:52 [manager.py:224] router recive req id 8 cost time 0.1090233325958252 s -INFO 06-24 20:37:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104559898376465 s -DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=285316706177696563373915860738778524347, time:1750768673.0007403s req_ids:[8] -DEBUG 06-24 20:37:53 [manager.py:391] -INFO 06-24 20:37:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:37:53 [statics_utils.py:24] mean first cost: 231.63000209449999 ms -INFO 06-24 20:37:53 [statics_utils.py:24] mean per token cost: 0.05857177495187085 ms -ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:52 lightllm_req_id:8 first_token_cost:389.6656036376953ms total_cost_time:389.7109031677246ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14129 prompt_cache_len:5151 prompt_cache_ratio:0.36456932550074317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 -DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:53 [batch.py:51] router release req id 8 -INFO 06-24 20:37:53 [manager.py:224] router recive req id 8 cost time 0.10849142074584961 s -INFO 06-24 20:37:53 [manager.py:68] detokenization recv req id 8 cost time 0.1104118824005127 s -DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=282114903336532640080952245410008783277, time:1750768673.3997617s req_ids:[8] -DEBUG 06-24 20:37:53 [manager.py:391] -ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:220.13068199157715ms total_cost_time:220.17478942871094ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14130 prompt_cache_len:5151 prompt_cache_ratio:0.36454352441613586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 -DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:53 [batch.py:51] router release req id 8 -INFO 06-24 20:37:53 [manager.py:224] router recive req id 8 cost time 0.10838794708251953 s -INFO 06-24 20:37:53 [manager.py:68] detokenization recv req id 8 cost time 0.11044049263000488 s -DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=313203907568116252158009232889714083127, time:1750768673.6249223s req_ids:[8] -DEBUG 06-24 20:37:53 [manager.py:391] -ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:214.68043327331543ms total_cost_time:214.72454071044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14131 prompt_cache_len:5151 prompt_cache_ratio:0.3645177269832284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 -DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:53 [batch.py:51] router release req id 8 -INFO 06-24 20:37:53 [manager.py:224] router recive req id 8 cost time 0.10790300369262695 s -INFO 06-24 20:37:53 [manager.py:68] detokenization recv req id 8 cost time 0.10987520217895508 s -DEBUG 06-24 20:37:53 [manager.py:391] Prefill Batch: batch_id=133577367349386424102381485838534862539, time:1750768673.8487456s req_ids:[8] -DEBUG 06-24 20:37:53 [manager.py:391] -ERROR 06-24 20:37:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:217.69285202026367ms total_cost_time:217.73552894592285ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14132 prompt_cache_len:5151 prompt_cache_ratio:0.3644919332012454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 -DEBUG 06-24 20:37:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:53 [batch.py:51] router release req id 8 -INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.10848474502563477 s -INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.11035490036010742 s -DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=209600652992552617055457788375899478500, time:1750768674.07167s req_ids:[8] -DEBUG 06-24 20:37:54 [manager.py:391] -ERROR 06-24 20:37:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:53 lightllm_req_id:8 first_token_cost:175.4894256591797ms total_cost_time:175.53043365478516ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14133 prompt_cache_len:5151 prompt_cache_ratio:0.364466143069412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 -DEBUG 06-24 20:37:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:54 [batch.py:51] router release req id 8 -INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.10819458961486816 s -INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.11009979248046875 s -DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=55014997780429687694394698161236732803, time:1750768674.2547007s req_ids:[8] -DEBUG 06-24 20:37:54 [manager.py:391] -ERROR 06-24 20:37:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 first_token_cost:203.57966423034668ms total_cost_time:203.62138748168945ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14134 prompt_cache_len:5151 prompt_cache_ratio:0.36444035658695345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 -DEBUG 06-24 20:37:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:54 [batch.py:51] router release req id 8 -INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.31034207344055176 s -INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.3123507499694824 s -DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=174934511072892071890292050217957885831, time:1750768674.6721196s req_ids:[8] -DEBUG 06-24 20:37:54 [manager.py:391] -ERROR 06-24 20:37:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 first_token_cost:431.8883419036865ms total_cost_time:431.9322109222412ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14135 prompt_cache_len:5151 prompt_cache_ratio:0.36441457375309516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 -DEBUG 06-24 20:37:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:54 [batch.py:51] router release req id 8 -INFO 06-24 20:37:54 [manager.py:224] router recive req id 8 cost time 0.10892534255981445 s -INFO 06-24 20:37:54 [manager.py:68] detokenization recv req id 8 cost time 0.11080551147460938 s -DEBUG 06-24 20:37:54 [manager.py:391] Prefill Batch: batch_id=159096224103353012114937562279190845920, time:1750768674.902135s req_ids:[8] -DEBUG 06-24 20:37:54 [manager.py:391] -ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:54 lightllm_req_id:8 first_token_cost:219.37131881713867ms total_cost_time:219.41494941711426ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14136 prompt_cache_len:5151 prompt_cache_ratio:0.3643887945670628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 -DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:55 [batch.py:51] router release req id 8 -INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10866713523864746 s -INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11047124862670898 s -DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=118314997167954466591630726788466876467, time:1750768675.1285143s req_ids:[8] -DEBUG 06-24 20:37:55 [manager.py:391] -ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:213.6552333831787ms total_cost_time:213.6979103088379ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14137 prompt_cache_len:5151 prompt_cache_ratio:0.3643630190280823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 -DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:55 [batch.py:51] router release req id 8 -INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10895133018493652 s -INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11081433296203613 s -DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=278139068034468975468719969973692574417, time:1750768675.3475513s req_ids:[8] -DEBUG 06-24 20:37:55 [manager.py:391] -ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:208.83941650390625ms total_cost_time:208.88543128967285ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14138 prompt_cache_len:5151 prompt_cache_ratio:0.36433724713537985 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 -DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:55 [batch.py:51] router release req id 8 -INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10850048065185547 s -INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11028766632080078 s -DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=294475022554992466051695135087563735652, time:1750768675.5638456s req_ids:[8] -DEBUG 06-24 20:37:55 [manager.py:391] -ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:215.84510803222656ms total_cost_time:215.88826179504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14139 prompt_cache_len:5151 prompt_cache_ratio:0.3643114788881816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 -DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:55 [batch.py:51] router release req id 8 -INFO 06-24 20:37:55 [manager.py:224] router recive req id 8 cost time 0.10891485214233398 s -INFO 06-24 20:37:55 [manager.py:68] detokenization recv req id 8 cost time 0.11070728302001953 s -DEBUG 06-24 20:37:55 [manager.py:391] Prefill Batch: batch_id=224433974289225798482128434043267776354, time:1750768675.7860062s req_ids:[8] -DEBUG 06-24 20:37:55 [manager.py:391] -ERROR 06-24 20:37:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:214.88523483276367ms total_cost_time:214.92838859558105ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14140 prompt_cache_len:5151 prompt_cache_ratio:0.36428571428571427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 -DEBUG 06-24 20:37:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:55 [batch.py:51] router release req id 8 -INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10841107368469238 s -INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.11028051376342773 s -DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=261065878541184423760802932626762974602, time:1750768676.0076208s req_ids:[8] -DEBUG 06-24 20:37:56 [manager.py:391] -ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:55 lightllm_req_id:8 first_token_cost:343.7056541442871ms total_cost_time:343.7507152557373ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14141 prompt_cache_len:5151 prompt_cache_ratio:0.36425995332720457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 -DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:56 [batch.py:51] router release req id 8 -INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10814833641052246 s -INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.11053681373596191 s -DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=188521611180382252637981197944550304484, time:1750768676.3590484s req_ids:[8] -DEBUG 06-24 20:37:56 [manager.py:391] -ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:214.54286575317383ms total_cost_time:214.5862579345703ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14142 prompt_cache_len:5151 prompt_cache_ratio:0.3642341960118795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 -DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:56 [batch.py:51] router release req id 8 -INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10810160636901855 s -INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.11054468154907227 s -DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=32965608084625148339837596242671479271, time:1750768676.5847495s req_ids:[8] -DEBUG 06-24 20:37:56 [manager.py:391] -ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:217.38529205322266ms total_cost_time:217.42820739746094ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14143 prompt_cache_len:5151 prompt_cache_ratio:0.36420844233896627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 -DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:56 [batch.py:51] router release req id 8 -INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10854172706604004 s -INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.1103975772857666 s -DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=10338391269055599741511502752424696063, time:1750768676.8128757s req_ids:[8] -DEBUG 06-24 20:37:56 [manager.py:391] -ERROR 06-24 20:37:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:183.9284896850586ms total_cost_time:183.97140502929688ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14144 prompt_cache_len:5151 prompt_cache_ratio:0.3641826923076923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 -DEBUG 06-24 20:37:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:56 [batch.py:51] router release req id 8 -INFO 06-24 20:37:56 [manager.py:224] router recive req id 8 cost time 0.10795783996582031 s -INFO 06-24 20:37:56 [manager.py:68] detokenization recv req id 8 cost time 0.10995101928710938 s -DEBUG 06-24 20:37:56 [manager.py:391] Prefill Batch: batch_id=282989488077063676068152277172435487762, time:1750768676.9927053s req_ids:[8] -DEBUG 06-24 20:37:56 [manager.py:391] -ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:56 lightllm_req_id:8 first_token_cost:204.8056125640869ms total_cost_time:204.8499584197998ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14145 prompt_cache_len:5151 prompt_cache_ratio:0.36415694591728526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 -DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:57 [batch.py:51] router release req id 8 -INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.10774970054626465 s -INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.10986089706420898 s -DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=90387729692475370474599392164878900987, time:1750768677.2045429s req_ids:[8] -DEBUG 06-24 20:37:57 [manager.py:391] -ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:205.98697662353516ms total_cost_time:206.03227615356445ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14146 prompt_cache_len:5151 prompt_cache_ratio:0.364131203166973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 -DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:57 [batch.py:51] router release req id 8 -INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.1082310676574707 s -INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.1102759838104248 s -DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=171647041838881632968734756933956976989, time:1750768677.4151835s req_ids:[8] -DEBUG 06-24 20:37:57 [manager.py:391] -ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:365.72933197021484ms total_cost_time:365.7724857330322ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14147 prompt_cache_len:5151 prompt_cache_ratio:0.3641054640559836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 -DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:57 [batch.py:51] router release req id 8 -INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.1072835922241211 s -INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.1085057258605957 s -DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=56110613564891605873997502883598589304, time:1750768677.788164s req_ids:[8] -DEBUG 06-24 20:37:57 [manager.py:391] -ERROR 06-24 20:37:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:161.74578666687012ms total_cost_time:161.7882251739502ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14148 prompt_cache_len:5151 prompt_cache_ratio:0.36407972858354537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 -DEBUG 06-24 20:37:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:57 [batch.py:51] router release req id 8 -INFO 06-24 20:37:57 [manager.py:224] router recive req id 8 cost time 0.11115646362304688 s -INFO 06-24 20:37:57 [manager.py:68] detokenization recv req id 8 cost time 0.11318206787109375 s -DEBUG 06-24 20:37:57 [manager.py:391] Prefill Batch: batch_id=117290592537830590460795816361961572936, time:1750768677.986542s req_ids:[8] -DEBUG 06-24 20:37:57 [manager.py:391] -ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:57 lightllm_req_id:8 first_token_cost:244.70210075378418ms total_cost_time:244.74596977233887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14149 prompt_cache_len:5151 prompt_cache_ratio:0.36405399674888683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 -DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:58 [batch.py:51] router release req id 8 -INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.1094672679901123 s -INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.11153507232666016 s -DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=289189447362010395611448044743870342188, time:1750768678.2216978s req_ids:[8] -DEBUG 06-24 20:37:58 [manager.py:391] -ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:229.1569709777832ms total_cost_time:229.2029857635498ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14150 prompt_cache_len:5151 prompt_cache_ratio:0.36402826855123677 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 -DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:58 [batch.py:51] router release req id 8 -INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.10971188545227051 s -INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.11224722862243652 s -DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=6152934389490409582688667191582163704, time:1750768678.444403s req_ids:[8] -DEBUG 06-24 20:37:58 [manager.py:391] -ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:214.13898468017578ms total_cost_time:214.18166160583496ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14151 prompt_cache_len:5151 prompt_cache_ratio:0.36400254398982407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 -DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:58 [batch.py:51] router release req id 8 -INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.10785460472106934 s -INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.10984611511230469 s -DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=125471856090445773112530068365497658847, time:1750768678.6636877s req_ids:[8] -DEBUG 06-24 20:37:58 [manager.py:391] -ERROR 06-24 20:37:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:167.4489974975586ms total_cost_time:167.4942970275879ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14152 prompt_cache_len:5151 prompt_cache_ratio:0.3639768230638779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 -DEBUG 06-24 20:37:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:58 [batch.py:51] router release req id 8 -INFO 06-24 20:37:58 [manager.py:224] router recive req id 8 cost time 0.10904717445373535 s -INFO 06-24 20:37:58 [manager.py:68] detokenization recv req id 8 cost time 0.11120223999023438 s -DEBUG 06-24 20:37:58 [manager.py:391] Prefill Batch: batch_id=140777075439202458692657638441102399614, time:1750768678.838702s req_ids:[8] -DEBUG 06-24 20:37:58 [manager.py:391] -ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:58 lightllm_req_id:8 first_token_cost:369.0049648284912ms total_cost_time:369.0469264984131ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14153 prompt_cache_len:5151 prompt_cache_ratio:0.36395110577262774 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 -DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:59 [batch.py:51] router release req id 8 -INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.10903668403625488 s -INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.11108112335205078 s -DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=284766646536800623161959687715104232641, time:1750768679.2187178s req_ids:[8] -DEBUG 06-24 20:37:59 [manager.py:391] -ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:222.09620475769043ms total_cost_time:222.1379280090332ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14154 prompt_cache_len:5151 prompt_cache_ratio:0.3639253921153031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 -DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:59 [batch.py:51] router release req id 8 -INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.1085653305053711 s -INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.11069679260253906 s -DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=201662764345637507177807429457763657828, time:1750768679.4478264s req_ids:[8] -DEBUG 06-24 20:37:59 [manager.py:391] -DEBUG 06-24 20:37:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 57510.157 tokens/s -DEBUG 06-24 20:37:59 [stats.py:37] Avg prompt tokens throughput: 57502.021 tokens/s -DEBUG 06-24 20:37:59 [stats.py:37] Avg generate tokens throughput: 8.136 tokens/s -ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:221.76742553710938ms total_cost_time:221.80891036987305ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14155 prompt_cache_len:5151 prompt_cache_ratio:0.3638996820911339 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 -DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:59 [batch.py:51] router release req id 8 -INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.10825562477111816 s -INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101999282836914 s -DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=73777938578701911787607478562775718659, time:1750768679.674467s req_ids:[8] -DEBUG 06-24 20:37:59 [manager.py:391] -ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:217.54002571105957ms total_cost_time:217.58222579956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14156 prompt_cache_len:5151 prompt_cache_ratio:0.3638739756993501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:37:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 -DEBUG 06-24 20:37:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:37:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:37:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:37:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:37:59 [batch.py:51] router release req id 8 -INFO 06-24 20:37:59 [manager.py:224] router recive req id 8 cost time 0.10834407806396484 s -INFO 06-24 20:37:59 [manager.py:68] detokenization recv req id 8 cost time 0.11052274703979492 s -DEBUG 06-24 20:37:59 [manager.py:391] Prefill Batch: batch_id=44923281524831597534980173550914189146, time:1750768679.8969252s req_ids:[8] -DEBUG 06-24 20:37:59 [manager.py:391] -ERROR 06-24 20:37:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:37:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:37:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:37:59 lightllm_req_id:8 first_token_cost:211.72165870666504ms total_cost_time:211.76481246948242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14157 prompt_cache_len:5151 prompt_cache_ratio:0.36384827293918204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 -DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:00 [batch.py:51] router release req id 8 -INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.10882925987243652 s -INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.11090993881225586 s -DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=154083090001057811559594309426393665127, time:1750768680.1162448s req_ids:[8] -DEBUG 06-24 20:38:00 [manager.py:391] -ERROR 06-24 20:38:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:382.90929794311523ms total_cost_time:382.9517364501953ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14158 prompt_cache_len:5151 prompt_cache_ratio:0.36382257380986016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 -DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:00 [batch.py:51] router release req id 8 -INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.1088705062866211 s -INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.11087703704833984 s -DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=74563966340992247815546664859325984477, time:1750768680.5058646s req_ids:[8] -DEBUG 06-24 20:38:00 [manager.py:391] -DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:216.97258949279785ms total_cost_time:217.01598167419434ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14159 prompt_cache_len:5151 prompt_cache_ratio:0.36379687831061513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 -DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:00 [batch.py:51] router release req id 8 -INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.10825824737548828 s -INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.11020326614379883 s -DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=260905753706121859762174013585322481689, time:1750768680.728447s req_ids:[8] -DEBUG 06-24 20:38:00 [manager.py:391] -ERROR 06-24 20:38:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:214.85376358032227ms total_cost_time:214.89572525024414ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14160 prompt_cache_len:5151 prompt_cache_ratio:0.36377118644067796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 -DEBUG 06-24 20:38:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:00 [batch.py:51] router release req id 8 -INFO 06-24 20:38:00 [manager.py:224] router recive req id 8 cost time 0.10899949073791504 s -INFO 06-24 20:38:00 [manager.py:68] detokenization recv req id 8 cost time 0.1113128662109375 s -DEBUG 06-24 20:38:00 [manager.py:391] Prefill Batch: batch_id=326952334590303005111523589417872851480, time:1750768680.9521742s req_ids:[8] -DEBUG 06-24 20:38:00 [manager.py:391] -ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:00 lightllm_req_id:8 first_token_cost:217.00334548950195ms total_cost_time:217.04459190368652ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14161 prompt_cache_len:5151 prompt_cache_ratio:0.3637454981992797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 -DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:01 [batch.py:51] router release req id 8 -INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10886549949645996 s -INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11098766326904297 s -DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=27005258496246823137157254342321436601, time:1750768681.1731443s req_ids:[8] -DEBUG 06-24 20:38:01 [manager.py:391] -INFO 06-24 20:38:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:218.16086769104004ms total_cost_time:218.2016372680664ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14162 prompt_cache_len:5151 prompt_cache_ratio:0.36371981358565175 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 -DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:01 [batch.py:51] router release req id 8 -INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10854816436767578 s -INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11025094985961914 s -DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=35458298005578539967198214179417879856, time:1750768681.399897s req_ids:[8] -DEBUG 06-24 20:38:01 [manager.py:391] -ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:173.31457138061523ms total_cost_time:173.3555793762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14163 prompt_cache_len:5151 prompt_cache_ratio:0.3636941325990256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 -DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:01 [batch.py:51] router release req id 8 -INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10837125778198242 s -INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11029553413391113 s -DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=211337410280169671186733326716796232792, time:1750768681.5804565s req_ids:[8] -DEBUG 06-24 20:38:01 [manager.py:391] -ERROR 06-24 20:38:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:400.799036026001ms total_cost_time:400.83980560302734ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14164 prompt_cache_len:5151 prompt_cache_ratio:0.36366845523863317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 -DEBUG 06-24 20:38:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:01 [batch.py:51] router release req id 8 -INFO 06-24 20:38:01 [manager.py:224] router recive req id 8 cost time 0.10798430442810059 s -INFO 06-24 20:38:01 [manager.py:68] detokenization recv req id 8 cost time 0.11058998107910156 s -DEBUG 06-24 20:38:01 [manager.py:391] Prefill Batch: batch_id=110995380351104975997078192507291917812, time:1750768681.9869306s req_ids:[8] -DEBUG 06-24 20:38:01 [manager.py:391] -ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:01 lightllm_req_id:8 first_token_cost:218.5213565826416ms total_cost_time:218.56355667114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14165 prompt_cache_len:5151 prompt_cache_ratio:0.3636427815037063 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 -DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:02 [batch.py:51] router release req id 8 -INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10770893096923828 s -INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.10973548889160156 s -DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=242069490161497126536762562289618329982, time:1750768682.2093859s req_ids:[8] -DEBUG 06-24 20:38:02 [manager.py:391] -ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:213.31501007080078ms total_cost_time:213.35840225219727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14166 prompt_cache_len:5151 prompt_cache_ratio:0.36361711139347735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 -DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:02 [batch.py:51] router release req id 8 -INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10867595672607422 s -INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.11080026626586914 s -DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=141027683372777210687452550197337608923, time:1750768682.4319959s req_ids:[8] -DEBUG 06-24 20:38:02 [manager.py:391] -ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:212.02802658081055ms total_cost_time:212.0687961578369ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14167 prompt_cache_len:5151 prompt_cache_ratio:0.36359144490717865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 -DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:02 [batch.py:51] router release req id 8 -INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10754776000976562 s -INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.10957527160644531 s -DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=51760239038390837105937099685880657354, time:1750768682.6513147s req_ids:[8] -DEBUG 06-24 20:38:02 [manager.py:391] -ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:213.74797821044922ms total_cost_time:213.7916088104248ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14168 prompt_cache_len:5151 prompt_cache_ratio:0.36356578204404294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 -DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:02 [batch.py:51] router release req id 8 -INFO 06-24 20:38:02 [manager.py:224] router recive req id 8 cost time 0.10795235633850098 s -INFO 06-24 20:38:02 [manager.py:68] detokenization recv req id 8 cost time 0.10959386825561523 s -DEBUG 06-24 20:38:02 [manager.py:391] Prefill Batch: batch_id=10949780752641403882021201027722631303, time:1750768682.8713515s req_ids:[8] -DEBUG 06-24 20:38:02 [manager.py:391] -ERROR 06-24 20:38:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:174.6044158935547ms total_cost_time:174.64780807495117ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14169 prompt_cache_len:5151 prompt_cache_ratio:0.363540122803303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 -DEBUG 06-24 20:38:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:02 [batch.py:51] router release req id 8 -INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.1086127758026123 s -INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.11049795150756836 s -DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=238779521307492400694428455414152362553, time:1750768683.0539546s req_ids:[8] -DEBUG 06-24 20:38:03 [manager.py:391] -ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:02 lightllm_req_id:8 first_token_cost:382.5962543487549ms total_cost_time:382.63845443725586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14170 prompt_cache_len:5151 prompt_cache_ratio:0.36351446718419195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 -DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:03 [batch.py:51] router release req id 8 -INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.10868072509765625 s -INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.11085987091064453 s -DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=18529632351738291947530661825073091866, time:1750768683.4411855s req_ids:[8] -DEBUG 06-24 20:38:03 [manager.py:391] -ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:217.15950965881348ms total_cost_time:217.20266342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14171 prompt_cache_len:5151 prompt_cache_ratio:0.3634888151859431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 -DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:03 [batch.py:51] router release req id 8 -INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.10780000686645508 s -INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.10971617698669434 s -DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=284746067302748642760279084859379002655, time:1750768683.6650455s req_ids:[8] -DEBUG 06-24 20:38:03 [manager.py:391] -ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:213.98138999938965ms total_cost_time:214.02287483215332ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14172 prompt_cache_len:5151 prompt_cache_ratio:0.36346316680779 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 -DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:03 [batch.py:51] router release req id 8 -INFO 06-24 20:38:03 [manager.py:224] router recive req id 8 cost time 0.1078042984008789 s -INFO 06-24 20:38:03 [manager.py:68] detokenization recv req id 8 cost time 0.10969972610473633 s -DEBUG 06-24 20:38:03 [manager.py:391] Prefill Batch: batch_id=198514015385547453701586904024344144267, time:1750768683.8885264s req_ids:[8] -DEBUG 06-24 20:38:03 [manager.py:391] -ERROR 06-24 20:38:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:181.23912811279297ms total_cost_time:181.28037452697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14173 prompt_cache_len:5151 prompt_cache_ratio:0.36343752204896634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 -DEBUG 06-24 20:38:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:03 [batch.py:51] router release req id 8 -INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10780978202819824 s -INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.10933518409729004 s -DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=35313010008105956344341483678136070125, time:1750768684.072916s req_ids:[8] -DEBUG 06-24 20:38:04 [manager.py:391] -ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:03 lightllm_req_id:8 first_token_cost:179.16345596313477ms total_cost_time:179.20804023742676ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14174 prompt_cache_len:5151 prompt_cache_ratio:0.3634118809087061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 -DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:04 [batch.py:51] router release req id 8 -INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10849118232727051 s -INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.11047577857971191 s -DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=253354572824708775024856970120162485393, time:1750768684.259436s req_ids:[8] -DEBUG 06-24 20:38:04 [manager.py:391] -ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:210.2193832397461ms total_cost_time:210.26277542114258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14175 prompt_cache_len:5151 prompt_cache_ratio:0.36338624338624337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 -DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:04 [batch.py:51] router release req id 8 -INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10636591911315918 s -INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.108245849609375 s -DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=95303560593855870200568265773093014672, time:1750768684.4755044s req_ids:[8] -DEBUG 06-24 20:38:04 [manager.py:391] -ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:378.7209987640381ms total_cost_time:378.76439094543457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14176 prompt_cache_len:5151 prompt_cache_ratio:0.36336060948081267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 -DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:04 [batch.py:51] router release req id 8 -INFO 06-24 20:38:04 [manager.py:224] router recive req id 8 cost time 0.10801219940185547 s -INFO 06-24 20:38:04 [manager.py:68] detokenization recv req id 8 cost time 0.11007356643676758 s -DEBUG 06-24 20:38:04 [manager.py:391] Prefill Batch: batch_id=191597203096087243131727601509707401763, time:1750768684.8583694s req_ids:[8] -DEBUG 06-24 20:38:04 [manager.py:391] -ERROR 06-24 20:38:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:207.45849609375ms total_cost_time:207.50164985656738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14177 prompt_cache_len:5151 prompt_cache_ratio:0.3633349791916484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 -DEBUG 06-24 20:38:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:04 [batch.py:51] router release req id 8 -INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.10658931732177734 s -INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.10821914672851562 s -DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=90484294903063968174748456817660525703, time:1750768685.074727s req_ids:[8] -DEBUG 06-24 20:38:05 [manager.py:391] -ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:04 lightllm_req_id:8 first_token_cost:171.81086540222168ms total_cost_time:171.85187339782715ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14178 prompt_cache_len:5151 prompt_cache_ratio:0.36330935251798563 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 -DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:05 [batch.py:51] router release req id 8 -INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.1088409423828125 s -INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.11090493202209473 s -DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=51381296324852724349323044681314292449, time:1750768685.251496s req_ids:[8] -DEBUG 06-24 20:38:05 [manager.py:391] -ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:208.24766159057617ms total_cost_time:208.29010009765625ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14179 prompt_cache_len:5151 prompt_cache_ratio:0.3632837294590592 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 -DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:05 [batch.py:51] router release req id 8 -INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.10897231101989746 s -INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.11093783378601074 s -DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=109009362378245755633944678269420645115, time:1750768685.4678187s req_ids:[8] -DEBUG 06-24 20:38:05 [manager.py:391] -ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:213.18435668945312ms total_cost_time:213.23060989379883ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14180 prompt_cache_len:5151 prompt_cache_ratio:0.36325811001410435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 -DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:05 [batch.py:51] router release req id 8 -INFO 06-24 20:38:05 [manager.py:224] router recive req id 8 cost time 0.1091620922088623 s -INFO 06-24 20:38:05 [manager.py:68] detokenization recv req id 8 cost time 0.11108851432800293 s -DEBUG 06-24 20:38:05 [manager.py:391] Prefill Batch: batch_id=227513934784281168857900035045679383655, time:1750768685.6898587s req_ids:[8] -DEBUG 06-24 20:38:05 [manager.py:391] -ERROR 06-24 20:38:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:211.456298828125ms total_cost_time:211.5025520324707ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14181 prompt_cache_len:5151 prompt_cache_ratio:0.3632324941823567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 -DEBUG 06-24 20:38:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:05 [batch.py:51] router release req id 8 -INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.3102278709411621 s -INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.312058687210083 s -DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=247525936455225559492817472834022954412, time:1750768686.1170447s req_ids:[8] -DEBUG 06-24 20:38:06 [manager.py:391] -ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:05 lightllm_req_id:8 first_token_cost:435.4870319366455ms total_cost_time:435.5306625366211ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14182 prompt_cache_len:5151 prompt_cache_ratio:0.36320688196305173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 -DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:06 [batch.py:51] router release req id 8 -INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.10930037498474121 s -INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.11130404472351074 s -DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=185960505053212011677462128620847809349, time:1750768686.347963s req_ids:[8] -DEBUG 06-24 20:38:06 [manager.py:391] -ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:218.8713550567627ms total_cost_time:218.9171314239502ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14183 prompt_cache_len:5151 prompt_cache_ratio:0.3631812733554255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 -DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:06 [batch.py:51] router release req id 8 -INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.10779023170471191 s -INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.10980868339538574 s -DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=129728826491704154499531793096668932516, time:1750768686.5732012s req_ids:[8] -DEBUG 06-24 20:38:06 [manager.py:391] -ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:225.14772415161133ms total_cost_time:225.19254684448242ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14184 prompt_cache_len:5151 prompt_cache_ratio:0.36315566835871405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 -DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:06 [batch.py:51] router release req id 8 -INFO 06-24 20:38:06 [manager.py:224] router recive req id 8 cost time 0.10847687721252441 s -INFO 06-24 20:38:06 [manager.py:68] detokenization recv req id 8 cost time 0.11047720909118652 s -DEBUG 06-24 20:38:06 [manager.py:391] Prefill Batch: batch_id=199278530453715152521667602023032361682, time:1750768686.8081086s req_ids:[8] -DEBUG 06-24 20:38:06 [manager.py:391] -ERROR 06-24 20:38:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:218.02496910095215ms total_cost_time:218.08648109436035ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:14185 prompt_cache_len:5151 prompt_cache_ratio:0.36313006697215366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 -DEBUG 06-24 20:38:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:06 [batch.py:51] router release req id 8 -INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.10957455635070801 s -INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11159825325012207 s -DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=197806944879467932754168708734517403834, time:1750768687.0399966s req_ids:[8] -DEBUG 06-24 20:38:07 [manager.py:391] -ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:06 lightllm_req_id:8 first_token_cost:227.036714553833ms total_cost_time:227.0808219909668ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14186 prompt_cache_len:5151 prompt_cache_ratio:0.36310446919498096 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 -DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:07 [batch.py:51] router release req id 8 -INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.1085350513458252 s -INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11054706573486328 s -DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=215321861954828250902320446125661295287, time:1750768687.265464s req_ids:[8] -DEBUG 06-24 20:38:07 [manager.py:391] -ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:176.91993713378906ms total_cost_time:176.96356773376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14187 prompt_cache_len:5151 prompt_cache_ratio:0.36307887502643266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 -DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:07 [batch.py:51] router release req id 8 -INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.10918903350830078 s -INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11087489128112793 s -DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=194528928910483672786366669496067664425, time:1750768687.447619s req_ids:[8] -DEBUG 06-24 20:38:07 [manager.py:391] -ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:342.63134002685547ms total_cost_time:342.67687797546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14188 prompt_cache_len:5151 prompt_cache_ratio:0.3630532844657457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 -DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:07 [batch.py:51] router release req id 8 -INFO 06-24 20:38:07 [manager.py:224] router recive req id 8 cost time 0.10793924331665039 s -INFO 06-24 20:38:07 [manager.py:68] detokenization recv req id 8 cost time 0.11122608184814453 s -DEBUG 06-24 20:38:07 [manager.py:391] Prefill Batch: batch_id=122697144990007747778809686766591126935, time:1750768687.7963295s req_ids:[8] -DEBUG 06-24 20:38:07 [manager.py:391] -ERROR 06-24 20:38:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:210.60824394226074ms total_cost_time:210.65282821655273ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14189 prompt_cache_len:5151 prompt_cache_ratio:0.3630276975121573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 -DEBUG 06-24 20:38:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:07 [batch.py:51] router release req id 8 -INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.10903596878051758 s -INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11101126670837402 s -DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=71558937162813044842976126977428581463, time:1750768688.0146272s req_ids:[8] -DEBUG 06-24 20:38:08 [manager.py:391] -DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:07 lightllm_req_id:8 first_token_cost:217.00334548950195ms total_cost_time:217.04912185668945ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14190 prompt_cache_len:5151 prompt_cache_ratio:0.36300211416490485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 -DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:08 [batch.py:51] router release req id 8 -INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.10921788215637207 s -INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11173605918884277 s -DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=209784717074598817632273593594639108435, time:1750768688.2403324s req_ids:[8] -DEBUG 06-24 20:38:08 [manager.py:391] -ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:219.44642066955566ms total_cost_time:219.49172019958496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14191 prompt_cache_len:5151 prompt_cache_ratio:0.36297653442322597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 -DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:08 [batch.py:51] router release req id 8 -INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.1088409423828125 s -INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11101007461547852 s -DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=33232350300964080644871214953428978630, time:1750768688.4764292s req_ids:[8] -DEBUG 06-24 20:38:08 [manager.py:391] -ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:228.99127006530762ms total_cost_time:229.0365695953369ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14192 prompt_cache_len:5151 prompt_cache_ratio:0.36295095828635854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 -DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:08 [batch.py:51] router release req id 8 -INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.1088857650756836 s -INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.11118650436401367 s -DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=119629326759591610755897163418605866060, time:1750768688.7011082s req_ids:[8] -DEBUG 06-24 20:38:08 [manager.py:391] -ERROR 06-24 20:38:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:219.1751003265381ms total_cost_time:219.1946506500244ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:14193 prompt_cache_len:5151 prompt_cache_ratio:0.3629253857535405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 -DEBUG 06-24 20:38:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:08 [batch.py:51] router release req id 8 -INFO 06-24 20:38:08 [manager.py:224] router recive req id 8 cost time 0.10694622993469238 s -INFO 06-24 20:38:08 [manager.py:68] detokenization recv req id 8 cost time 0.1090552806854248 s -DEBUG 06-24 20:38:08 [manager.py:391] Prefill Batch: batch_id=46972825149682052806645126772296467967, time:1750768688.9257085s req_ids:[8] -DEBUG 06-24 20:38:08 [manager.py:391] -ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:08 lightllm_req_id:8 first_token_cost:351.0925769805908ms total_cost_time:351.1366844177246ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14194 prompt_cache_len:5151 prompt_cache_ratio:0.3628998168240101 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 -DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:09 [batch.py:51] router release req id 8 -INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.1088263988494873 s -INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11108541488647461 s -DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=205011454682053495445724172559671150852, time:1750768689.2818682s req_ids:[8] -DEBUG 06-24 20:38:09 [manager.py:391] -ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:211.46130561828613ms total_cost_time:211.50565147399902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14195 prompt_cache_len:5151 prompt_cache_ratio:0.362874251497006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 -DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:09 [batch.py:51] router release req id 8 -INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.10794830322265625 s -INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11014413833618164 s -DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=273744508426812612274062055661189706544, time:1750768689.502176s req_ids:[8] -DEBUG 06-24 20:38:09 [manager.py:391] -DEBUG 06-24 20:38:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 57815.680 tokens/s -DEBUG 06-24 20:38:09 [stats.py:37] Avg prompt tokens throughput: 57807.524 tokens/s -DEBUG 06-24 20:38:09 [stats.py:37] Avg generate tokens throughput: 8.156 tokens/s -ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:211.59934997558594ms total_cost_time:211.64417266845703ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14196 prompt_cache_len:5151 prompt_cache_ratio:0.3628486897717667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 -DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:09 [batch.py:51] router release req id 8 -INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.10812568664550781 s -INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11015868186950684 s -DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=270743976773976173999166765421058661034, time:1750768689.719876s req_ids:[8] -DEBUG 06-24 20:38:09 [manager.py:391] -ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:176.17368698120117ms total_cost_time:176.21827125549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14197 prompt_cache_len:5151 prompt_cache_ratio:0.36282313164753116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 -DEBUG 06-24 20:38:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:09 [batch.py:51] router release req id 8 -INFO 06-24 20:38:09 [manager.py:224] router recive req id 8 cost time 0.1089942455291748 s -INFO 06-24 20:38:09 [manager.py:68] detokenization recv req id 8 cost time 0.11105799674987793 s -DEBUG 06-24 20:38:09 [manager.py:391] Prefill Batch: batch_id=247115560862649381763609393170566129041, time:1750768689.9018357s req_ids:[8] -DEBUG 06-24 20:38:09 [manager.py:391] -ERROR 06-24 20:38:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:205.84464073181152ms total_cost_time:205.8870792388916ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14198 prompt_cache_len:5151 prompt_cache_ratio:0.36279757712353855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 -DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:10 [batch.py:51] router release req id 8 -INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10808396339416504 s -INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.1102452278137207 s -DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=16984153363977296927577372231580779847, time:1750768690.114682s req_ids:[8] -DEBUG 06-24 20:38:10 [manager.py:391] -ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:09 lightllm_req_id:8 first_token_cost:215.52777290344238ms total_cost_time:215.57283401489258ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14199 prompt_cache_len:5151 prompt_cache_ratio:0.3627720261990281 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 -DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:10 [batch.py:51] router release req id 8 -INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10829281806945801 s -INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.11023068428039551 s -DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=17017772909587950787982895234348561299, time:1750768690.337348s req_ids:[8] -DEBUG 06-24 20:38:10 [manager.py:391] -ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 first_token_cost:341.1886692047119ms total_cost_time:341.231107711792ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14200 prompt_cache_len:5151 prompt_cache_ratio:0.36274647887323946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 -DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:10 [batch.py:51] router release req id 8 -INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10882735252380371 s -INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.11094212532043457 s -DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=225981476866537441642836077793929861033, time:1750768690.6868007s req_ids:[8] -DEBUG 06-24 20:38:10 [manager.py:391] -ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 first_token_cost:214.57624435424805ms total_cost_time:214.61749076843262ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14201 prompt_cache_len:5151 prompt_cache_ratio:0.3627209351454123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 -DEBUG 06-24 20:38:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:10 [batch.py:51] router release req id 8 -INFO 06-24 20:38:10 [manager.py:224] router recive req id 8 cost time 0.10770130157470703 s -INFO 06-24 20:38:10 [manager.py:68] detokenization recv req id 8 cost time 0.10980796813964844 s -DEBUG 06-24 20:38:10 [manager.py:391] Prefill Batch: batch_id=42070377922869162624690638056074600699, time:1750768690.9040008s req_ids:[8] -DEBUG 06-24 20:38:10 [manager.py:391] -ERROR 06-24 20:38:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:10 lightllm_req_id:8 first_token_cost:211.29107475280762ms total_cost_time:211.3347053527832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14202 prompt_cache_len:5151 prompt_cache_ratio:0.36269539501478665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 -DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:11 [batch.py:51] router release req id 8 -INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10793948173522949 s -INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.1104288101196289 s -DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=115951837902654693427578850638982138422, time:1750768691.1255472s req_ids:[8] -DEBUG 06-24 20:38:11 [manager.py:391] -ERROR 06-24 20:38:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:214.65826034545898ms total_cost_time:214.70260620117188ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14203 prompt_cache_len:5151 prompt_cache_ratio:0.3626698584806027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 -DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:11 [batch.py:51] router release req id 8 -INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10895729064941406 s -INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.11098456382751465 s -DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=206825416090870720441436930825699844359, time:1750768691.3447828s req_ids:[8] -DEBUG 06-24 20:38:11 [manager.py:391] -ERROR 06-24 20:38:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:217.3924446105957ms total_cost_time:217.45014190673828ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:14204 prompt_cache_len:5151 prompt_cache_ratio:0.3626443255421008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 -DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:11 [batch.py:51] router release req id 8 -INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10773515701293945 s -INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.10961675643920898 s -DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=106472139598992744683958841479119667584, time:1750768691.5683894s req_ids:[8] -DEBUG 06-24 20:38:11 [manager.py:391] -ERROR 06-24 20:38:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:386.56067848205566ms total_cost_time:386.60597801208496ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14205 prompt_cache_len:5151 prompt_cache_ratio:0.36261879619852166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 -DEBUG 06-24 20:38:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:11 [batch.py:51] router release req id 8 -INFO 06-24 20:38:11 [manager.py:224] router recive req id 8 cost time 0.10941839218139648 s -INFO 06-24 20:38:11 [manager.py:68] detokenization recv req id 8 cost time 0.11181068420410156 s -DEBUG 06-24 20:38:11 [manager.py:391] Prefill Batch: batch_id=311271409168698693291115896412474455573, time:1750768691.9611778s req_ids:[8] -DEBUG 06-24 20:38:11 [manager.py:391] -ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:11 lightllm_req_id:8 first_token_cost:216.37749671936035ms total_cost_time:216.42112731933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14206 prompt_cache_len:5151 prompt_cache_ratio:0.362593270449106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 -DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:12 [batch.py:51] router release req id 8 -INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10801124572753906 s -INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.10973954200744629 s -DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=134637267747952409284968376204123426466, time:1750768692.18736s req_ids:[8] -DEBUG 06-24 20:38:12 [manager.py:391] -ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:176.65600776672363ms total_cost_time:176.6986846923828ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14207 prompt_cache_len:5151 prompt_cache_ratio:0.36256774829309496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 -DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:12 [batch.py:51] router release req id 8 -INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10842704772949219 s -INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.11046242713928223 s -DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=166439763074399175326444046426164749401, time:1750768692.3673694s req_ids:[8] -DEBUG 06-24 20:38:12 [manager.py:391] -ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:208.9369297027588ms total_cost_time:208.98056030273438ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14208 prompt_cache_len:5151 prompt_cache_ratio:0.3625422297297297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 -DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:12 [batch.py:51] router release req id 8 -INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10836172103881836 s -INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.11070489883422852 s -DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=312959480188493585876276955360396461605, time:1750768692.5856762s req_ids:[8] -DEBUG 06-24 20:38:12 [manager.py:391] -ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:218.9047336578369ms total_cost_time:218.949556350708ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14209 prompt_cache_len:5151 prompt_cache_ratio:0.3625167147582518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 -DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:12 [batch.py:51] router release req id 8 -INFO 06-24 20:38:12 [manager.py:224] router recive req id 8 cost time 0.10913968086242676 s -INFO 06-24 20:38:12 [manager.py:68] detokenization recv req id 8 cost time 0.11130189895629883 s -DEBUG 06-24 20:38:12 [manager.py:391] Prefill Batch: batch_id=295446008450988072544038685101500335228, time:1750768692.810723s req_ids:[8] -DEBUG 06-24 20:38:12 [manager.py:391] -ERROR 06-24 20:38:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:213.58418464660645ms total_cost_time:213.62709999084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14210 prompt_cache_len:5151 prompt_cache_ratio:0.36249120337790286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 -DEBUG 06-24 20:38:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:12 [batch.py:51] router release req id 8 -INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.10776090621948242 s -INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11002540588378906 s -DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=182936624155111678582189648609360928994, time:1750768693.0288866s req_ids:[8] -DEBUG 06-24 20:38:13 [manager.py:391] -ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:12 lightllm_req_id:8 first_token_cost:386.8868350982666ms total_cost_time:386.9304656982422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14211 prompt_cache_len:5151 prompt_cache_ratio:0.36246569558792485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 -DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:13 [batch.py:51] router release req id 8 -INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.10879874229431152 s -INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11119341850280762 s -DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=176480529561905008038249585531648775072, time:1750768693.421224s req_ids:[8] -DEBUG 06-24 20:38:13 [manager.py:391] -ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:216.6297435760498ms total_cost_time:216.6738510131836ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14212 prompt_cache_len:5151 prompt_cache_ratio:0.3624401913875598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 -DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:13 [batch.py:51] router release req id 8 -INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.10827040672302246 s -INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11047816276550293 s -DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=261170535186346665435235803057198718350, time:1750768693.647196s req_ids:[8] -DEBUG 06-24 20:38:13 [manager.py:391] -ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:212.24474906921387ms total_cost_time:212.28981018066406ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14213 prompt_cache_len:5151 prompt_cache_ratio:0.3624146907760501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 -DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:13 [batch.py:51] router release req id 8 -INFO 06-24 20:38:13 [manager.py:224] router recive req id 8 cost time 0.1090390682220459 s -INFO 06-24 20:38:13 [manager.py:68] detokenization recv req id 8 cost time 0.11121153831481934 s -DEBUG 06-24 20:38:13 [manager.py:391] Prefill Batch: batch_id=320989574373195236641079262461239703866, time:1750768693.8656025s req_ids:[8] -DEBUG 06-24 20:38:13 [manager.py:391] -ERROR 06-24 20:38:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:210.35218238830566ms total_cost_time:210.4203701019287ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:14214 prompt_cache_len:5151 prompt_cache_ratio:0.36238919375263823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 -DEBUG 06-24 20:38:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:13 [batch.py:51] router release req id 8 -INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.10898756980895996 s -INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.11090850830078125 s -DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=31018357144751687697047698270001692961, time:1750768694.082509s req_ids:[8] -DEBUG 06-24 20:38:14 [manager.py:391] -DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:13 lightllm_req_id:8 first_token_cost:215.2872085571289ms total_cost_time:215.3308391571045ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14215 prompt_cache_len:5151 prompt_cache_ratio:0.362363700316567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 -DEBUG 06-24 20:38:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:14 [batch.py:51] router release req id 8 -INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.1078026294708252 s -INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.10988831520080566 s -DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=81276281701148278909942954766081135249, time:1750768694.3046231s req_ids:[8] -DEBUG 06-24 20:38:14 [manager.py:391] -ERROR 06-24 20:38:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 first_token_cost:219.00486946105957ms total_cost_time:219.04921531677246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14216 prompt_cache_len:5151 prompt_cache_ratio:0.36233821046707937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 -DEBUG 06-24 20:38:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:14 [batch.py:51] router release req id 8 -INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.1093909740447998 s -INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.1117711067199707 s -DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=321577718599418621138164629700082446788, time:1750768694.527994s req_ids:[8] -DEBUG 06-24 20:38:14 [manager.py:391] -ERROR 06-24 20:38:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 first_token_cost:390.775203704834ms total_cost_time:390.8195495605469ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14217 prompt_cache_len:5151 prompt_cache_ratio:0.3623127242034184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 -DEBUG 06-24 20:38:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:14 [batch.py:51] router release req id 8 -INFO 06-24 20:38:14 [manager.py:224] router recive req id 8 cost time 0.10907316207885742 s -INFO 06-24 20:38:14 [manager.py:68] detokenization recv req id 8 cost time 0.11120891571044922 s -DEBUG 06-24 20:38:14 [manager.py:391] Prefill Batch: batch_id=327659208897464653068304672886944585881, time:1750768694.9248803s req_ids:[8] -DEBUG 06-24 20:38:14 [manager.py:391] -ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:14 lightllm_req_id:8 first_token_cost:216.83096885681152ms total_cost_time:216.8734073638916ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14218 prompt_cache_len:5151 prompt_cache_ratio:0.36228724152482766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 -DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:15 [batch.py:51] router release req id 8 -INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10895967483520508 s -INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11102485656738281 s -DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=101692770434708124250708479874715588181, time:1750768695.1490266s req_ids:[8] -DEBUG 06-24 20:38:15 [manager.py:391] -ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:218.11318397521973ms total_cost_time:218.1565761566162ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14219 prompt_cache_len:5151 prompt_cache_ratio:0.3622617624305507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 -DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:15 [batch.py:51] router release req id 8 -INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10835528373718262 s -INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11044931411743164 s -DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=211485066387148536847904556744189065158, time:1750768695.3734095s req_ids:[8] -DEBUG 06-24 20:38:15 [manager.py:391] -ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:217.6377773284912ms total_cost_time:217.681884765625ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14220 prompt_cache_len:5151 prompt_cache_ratio:0.36223628691983123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 -DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:15 [batch.py:51] router release req id 8 -INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.11105036735534668 s -INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.1127777099609375 s -DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=233384436029030430711041168363354806871, time:1750768695.5982616s req_ids:[8] -DEBUG 06-24 20:38:15 [manager.py:391] -ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:173.6428737640381ms total_cost_time:173.73323440551758ms,out_token_counter:1 mean_per_token_cost_time: 0.09036064147949219ms prompt_token_num:14221 prompt_cache_len:5151 prompt_cache_ratio:0.3622108149919134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 -DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:15 [batch.py:51] router release req id 8 -INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10799264907836914 s -INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11013436317443848 s -DEBUG 06-24 20:38:15 [manager.py:391] Prefill Batch: batch_id=42610771945848430321528990043884177795, time:1750768695.7791848s req_ids:[8] -DEBUG 06-24 20:38:15 [manager.py:391] -ERROR 06-24 20:38:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:205.2440643310547ms total_cost_time:205.28793334960938ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14222 prompt_cache_len:5151 prompt_cache_ratio:0.36218534664604135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 -DEBUG 06-24 20:38:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:15 [batch.py:51] router release req id 8 -INFO 06-24 20:38:15 [manager.py:224] router recive req id 8 cost time 0.10832738876342773 s -INFO 06-24 20:38:15 [manager.py:68] detokenization recv req id 8 cost time 0.11050915718078613 s -DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=262404354696922514969577495565223239410, time:1750768696.0016062s req_ids:[8] -DEBUG 06-24 20:38:16 [manager.py:391] -ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:15 lightllm_req_id:8 first_token_cost:391.0329341888428ms total_cost_time:391.07775688171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14223 prompt_cache_len:5151 prompt_cache_ratio:0.3621598818814596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 -DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:16 [batch.py:51] router release req id 8 -INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.10826778411865234 s -INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.11031985282897949 s -DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=25340204156296689908723468427950447326, time:1750768696.388604s req_ids:[8] -DEBUG 06-24 20:38:16 [manager.py:391] -ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:220.21961212158203ms total_cost_time:220.26300430297852ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14224 prompt_cache_len:5151 prompt_cache_ratio:0.3621344206974128 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 -DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:16 [batch.py:51] router release req id 8 -INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.10730528831481934 s -INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.10919857025146484 s -DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=109768081338025255061063554555060608606, time:1750768696.6133816s req_ids:[8] -DEBUG 06-24 20:38:16 [manager.py:391] -ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:175.02784729003906ms total_cost_time:175.07100105285645ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14225 prompt_cache_len:5151 prompt_cache_ratio:0.3621089630931459 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 -DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:16 [batch.py:51] router release req id 8 -INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.1083369255065918 s -INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.11050295829772949 s -DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=11523934699780223017156626184254741299, time:1750768696.7949543s req_ids:[8] -DEBUG 06-24 20:38:16 [manager.py:391] -ERROR 06-24 20:38:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:176.67150497436523ms total_cost_time:176.713228225708ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14226 prompt_cache_len:5151 prompt_cache_ratio:0.3620835090679038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 -DEBUG 06-24 20:38:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:16 [batch.py:51] router release req id 8 -INFO 06-24 20:38:16 [manager.py:224] router recive req id 8 cost time 0.10875916481018066 s -INFO 06-24 20:38:16 [manager.py:68] detokenization recv req id 8 cost time 0.1110374927520752 s -DEBUG 06-24 20:38:16 [manager.py:391] Prefill Batch: batch_id=198349858160249422477745918314036845981, time:1750768696.9774497s req_ids:[8] -DEBUG 06-24 20:38:16 [manager.py:391] -ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:16 lightllm_req_id:8 first_token_cost:208.70423316955566ms total_cost_time:208.74977111816406ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14227 prompt_cache_len:5151 prompt_cache_ratio:0.36205805862093204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 -DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:17 [batch.py:51] router release req id 8 -INFO 06-24 20:38:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:38:17 [manager.py:224] router recive req id 8 cost time 0.10861992835998535 s -INFO 06-24 20:38:17 [manager.py:68] detokenization recv req id 8 cost time 0.11068344116210938 s -DEBUG 06-24 20:38:17 [manager.py:391] Prefill Batch: batch_id=235489047713352713784873971036072115283, time:1750768697.1912224s req_ids:[8] -DEBUG 06-24 20:38:17 [manager.py:391] -ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:211.58075332641602ms total_cost_time:211.6250991821289ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14228 prompt_cache_len:5151 prompt_cache_ratio:0.36203261175147594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 -DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:17 [batch.py:51] router release req id 8 -INFO 06-24 20:38:17 [manager.py:224] router recive req id 8 cost time 0.3116908073425293 s -INFO 06-24 20:38:17 [manager.py:68] detokenization recv req id 8 cost time 0.3137836456298828 s -DEBUG 06-24 20:38:17 [manager.py:391] Prefill Batch: batch_id=215332593647059960225303148801074257548, time:1750768697.6199865s req_ids:[8] -DEBUG 06-24 20:38:17 [manager.py:391] -ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:431.67734146118164ms total_cost_time:431.72407150268555ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14229 prompt_cache_len:5151 prompt_cache_ratio:0.36200716845878134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 -DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:17 [batch.py:51] router release req id 8 -INFO 06-24 20:38:17 [manager.py:224] router recive req id 8 cost time 0.1080782413482666 s -INFO 06-24 20:38:17 [manager.py:68] detokenization recv req id 8 cost time 0.11011672019958496 s -DEBUG 06-24 20:38:17 [manager.py:391] Prefill Batch: batch_id=109913952028972147365714172136110248492, time:1750768697.8476179s req_ids:[8] -DEBUG 06-24 20:38:17 [manager.py:391] -ERROR 06-24 20:38:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:218.13559532165527ms total_cost_time:218.18041801452637ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14230 prompt_cache_len:5151 prompt_cache_ratio:0.36198172874209417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 -DEBUG 06-24 20:38:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:17 [batch.py:51] router release req id 8 -INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.1094510555267334 s -INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.11151289939880371 s -DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=271928610050274309815041404875463624975, time:1750768698.0724802s req_ids:[8] -DEBUG 06-24 20:38:18 [manager.py:391] -ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:17 lightllm_req_id:8 first_token_cost:215.30532836914062ms total_cost_time:215.3482437133789ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14231 prompt_cache_len:5151 prompt_cache_ratio:0.36195629260066053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 -DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:18 [batch.py:51] router release req id 8 -INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10951423645019531 s -INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.11147141456604004 s -DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=335467360642515318383113717836041957658, time:1750768698.2939093s req_ids:[8] -DEBUG 06-24 20:38:18 [manager.py:391] -ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:217.9696559906006ms total_cost_time:218.01400184631348ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14232 prompt_cache_len:5151 prompt_cache_ratio:0.3619308600337268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 -DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:18 [batch.py:51] router release req id 8 -INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10829997062683105 s -INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.1102900505065918 s -DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=296156113661958927242621097115098895607, time:1750768698.5184383s req_ids:[8] -DEBUG 06-24 20:38:18 [manager.py:391] -ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:216.52507781982422ms total_cost_time:216.5682315826416ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14233 prompt_cache_len:5151 prompt_cache_ratio:0.36190543104053957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 -DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:18 [batch.py:51] router release req id 8 -INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10742068290710449 s -INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.10921001434326172 s -DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=28699766889382186567249182223262387519, time:1750768698.7475092s req_ids:[8] -DEBUG 06-24 20:38:18 [manager.py:391] -ERROR 06-24 20:38:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:184.57818031311035ms total_cost_time:184.61847305297852ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:14234 prompt_cache_len:5151 prompt_cache_ratio:0.36188000562034567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 -DEBUG 06-24 20:38:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:18 [batch.py:51] router release req id 8 -INFO 06-24 20:38:18 [manager.py:224] router recive req id 8 cost time 0.10851931571960449 s -INFO 06-24 20:38:18 [manager.py:68] detokenization recv req id 8 cost time 0.1104736328125 s -DEBUG 06-24 20:38:18 [manager.py:391] Prefill Batch: batch_id=245236834680968743544569505051020927769, time:1750768698.9323914s req_ids:[8] -DEBUG 06-24 20:38:18 [manager.py:391] -ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:18 lightllm_req_id:8 first_token_cost:381.11305236816406ms total_cost_time:381.15692138671875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14235 prompt_cache_len:5151 prompt_cache_ratio:0.361854583772392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 -DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:19 [batch.py:51] router release req id 8 -INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.1077873706817627 s -INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.10984659194946289 s -DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=339204945956879581360699990316365854712, time:1750768699.3236885s req_ids:[8] -DEBUG 06-24 20:38:19 [manager.py:391] -ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:213.58060836791992ms total_cost_time:213.62709999084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14236 prompt_cache_len:5151 prompt_cache_ratio:0.3618291654959258 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 -DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:19 [batch.py:51] router release req id 8 -INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.10863161087036133 s -INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.11053919792175293 s -DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=38617402104192177323951391505928220693, time:1750768699.5411332s req_ids:[8] -DEBUG 06-24 20:38:19 [manager.py:391] -DEBUG 06-24 20:38:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 58073.104 tokens/s -DEBUG 06-24 20:38:19 [stats.py:37] Avg prompt tokens throughput: 58064.935 tokens/s -DEBUG 06-24 20:38:19 [stats.py:37] Avg generate tokens throughput: 8.168 tokens/s -ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:209.77187156677246ms total_cost_time:209.81621742248535ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14237 prompt_cache_len:5151 prompt_cache_ratio:0.36180375079019456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 -DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:19 [batch.py:51] router release req id 8 -INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.1087031364440918 s -INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.1107625961303711 s -DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=256097474818184332035803194366206232581, time:1750768699.7577147s req_ids:[8] -DEBUG 06-24 20:38:19 [manager.py:391] -ERROR 06-24 20:38:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:212.24474906921387ms total_cost_time:212.28790283203125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14238 prompt_cache_len:5151 prompt_cache_ratio:0.36177833965444584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 -DEBUG 06-24 20:38:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:19 [batch.py:51] router release req id 8 -INFO 06-24 20:38:19 [manager.py:224] router recive req id 8 cost time 0.10895824432373047 s -INFO 06-24 20:38:19 [manager.py:68] detokenization recv req id 8 cost time 0.11112689971923828 s -DEBUG 06-24 20:38:19 [manager.py:391] Prefill Batch: batch_id=264652790701732437901791516455834824907, time:1750768699.9771464s req_ids:[8] -DEBUG 06-24 20:38:19 [manager.py:391] -ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:19 lightllm_req_id:8 first_token_cost:211.19427680969238ms total_cost_time:211.23790740966797ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14239 prompt_cache_len:5151 prompt_cache_ratio:0.3617529320879275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 -DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:20 [batch.py:51] router release req id 8 -INFO 06-24 20:38:20 [manager.py:224] router recive req id 8 cost time 0.10842418670654297 s -INFO 06-24 20:38:20 [manager.py:68] detokenization recv req id 8 cost time 0.1103372573852539 s -DEBUG 06-24 20:38:20 [manager.py:391] Prefill Batch: batch_id=108529410489234853182309024563216782096, time:1750768700.195316s req_ids:[8] -DEBUG 06-24 20:38:20 [manager.py:391] -ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:214.508056640625ms total_cost_time:214.5516872406006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14240 prompt_cache_len:5151 prompt_cache_ratio:0.3617275280898876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 -DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:20 [batch.py:51] router release req id 8 -INFO 06-24 20:38:20 [manager.py:224] router recive req id 8 cost time 0.10850095748901367 s -INFO 06-24 20:38:20 [manager.py:68] detokenization recv req id 8 cost time 0.11060714721679688 s -DEBUG 06-24 20:38:20 [manager.py:391] Prefill Batch: batch_id=326312563112837350401904037313090898498, time:1750768700.4149883s req_ids:[8] -DEBUG 06-24 20:38:20 [manager.py:391] -ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:384.77277755737305ms total_cost_time:384.81855392456055ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14241 prompt_cache_len:5151 prompt_cache_ratio:0.3617021276595745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 -DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:20 [batch.py:51] router release req id 8 -INFO 06-24 20:38:20 [manager.py:224] router recive req id 8 cost time 0.10824346542358398 s -INFO 06-24 20:38:20 [manager.py:68] detokenization recv req id 8 cost time 0.11034774780273438 s -DEBUG 06-24 20:38:20 [manager.py:391] Prefill Batch: batch_id=290064994207658388103121284618235156710, time:1750768700.807226s req_ids:[8] -DEBUG 06-24 20:38:20 [manager.py:391] -ERROR 06-24 20:38:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:210.73126792907715ms total_cost_time:210.77656745910645ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14242 prompt_cache_len:5151 prompt_cache_ratio:0.36167673079623647 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 -DEBUG 06-24 20:38:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:20 [batch.py:51] router release req id 8 -INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10868382453918457 s -INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.11076045036315918 s -DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=289781884027258690810513913263231970375, time:1750768701.0239558s req_ids:[8] -DEBUG 06-24 20:38:21 [manager.py:391] -ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:20 lightllm_req_id:8 first_token_cost:215.06834030151367ms total_cost_time:215.11292457580566ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14243 prompt_cache_len:5151 prompt_cache_ratio:0.36165133749912237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 -DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:21 [batch.py:51] router release req id 8 -INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10881662368774414 s -INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.11085081100463867 s -DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=141337858218616608778451452470357542632, time:1750768701.245617s req_ids:[8] -DEBUG 06-24 20:38:21 [manager.py:391] -ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:213.77253532409668ms total_cost_time:213.81545066833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14244 prompt_cache_len:5151 prompt_cache_ratio:0.361625947767481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 -DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:21 [batch.py:51] router release req id 8 -INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10880327224731445 s -INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.1109156608581543 s -DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=66126172289353260289709221427546203582, time:1750768701.466767s req_ids:[8] -DEBUG 06-24 20:38:21 [manager.py:391] -ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:215.25883674621582ms total_cost_time:215.3007984161377ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14245 prompt_cache_len:5151 prompt_cache_ratio:0.3616005616005616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 -DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:21 [batch.py:51] router release req id 8 -INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10851764678955078 s -INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.1105194091796875 s -DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=277854481664723844360924752477950950529, time:1750768701.6866782s req_ids:[8] -DEBUG 06-24 20:38:21 [manager.py:391] -DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:21 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:216.50314331054688ms total_cost_time:216.55559539794922ms,out_token_counter:1 mean_per_token_cost_time: 0.05245208740234375ms prompt_token_num:14246 prompt_cache_len:5151 prompt_cache_ratio:0.3615751789976134 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 -DEBUG 06-24 20:38:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:21 [batch.py:51] router release req id 8 -INFO 06-24 20:38:21 [manager.py:224] router recive req id 8 cost time 0.10894155502319336 s -INFO 06-24 20:38:21 [manager.py:68] detokenization recv req id 8 cost time 0.11093306541442871 s -DEBUG 06-24 20:38:21 [manager.py:391] Prefill Batch: batch_id=126858852238684743733970450856982005865, time:1750768701.912173s req_ids:[8] -DEBUG 06-24 20:38:21 [manager.py:391] -ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:21 lightllm_req_id:8 first_token_cost:380.83696365356445ms total_cost_time:380.88011741638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14247 prompt_cache_len:5151 prompt_cache_ratio:0.3615497999578859 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 -DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:22 [batch.py:51] router release req id 8 -INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10944986343383789 s -INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11165785789489746 s -DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=230478253572078936420492255557112079809, time:1750768702.2975452s req_ids:[8] -DEBUG 06-24 20:38:22 [manager.py:391] -ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:172.98626899719238ms total_cost_time:173.03037643432617ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14248 prompt_cache_len:5151 prompt_cache_ratio:0.3615244244806289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 -DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:22 [batch.py:51] router release req id 8 -INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10815834999084473 s -INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11014127731323242 s -DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=26100783133078089268492048524836455998, time:1750768702.4785268s req_ids:[8] -DEBUG 06-24 20:38:22 [manager.py:391] -ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:173.16198348999023ms total_cost_time:173.20585250854492ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14249 prompt_cache_len:5151 prompt_cache_ratio:0.3614990525650923 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 -DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:22 [batch.py:51] router release req id 8 -INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10874557495117188 s -INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11089515686035156 s -DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=103445617092853238513290796476499250129, time:1750768702.6582174s req_ids:[8] -DEBUG 06-24 20:38:22 [manager.py:391] -ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:224.23148155212402ms total_cost_time:224.27773475646973ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14250 prompt_cache_len:5151 prompt_cache_ratio:0.36147368421052634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 -DEBUG 06-24 20:38:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:22 [batch.py:51] router release req id 8 -INFO 06-24 20:38:22 [manager.py:224] router recive req id 8 cost time 0.10981178283691406 s -INFO 06-24 20:38:22 [manager.py:68] detokenization recv req id 8 cost time 0.11191320419311523 s -DEBUG 06-24 20:38:22 [manager.py:391] Prefill Batch: batch_id=4590846683549223026026924007407810283, time:1750768702.8945384s req_ids:[8] -DEBUG 06-24 20:38:22 [manager.py:391] -ERROR 06-24 20:38:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:22 lightllm_req_id:8 first_token_cost:221.9405174255371ms total_cost_time:221.9839096069336ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14251 prompt_cache_len:5151 prompt_cache_ratio:0.36144831941618133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 -DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:23 [batch.py:51] router release req id 8 -INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10886716842651367 s -INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.11086440086364746 s -DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=37607218430241042748927753190627397547, time:1750768703.1166275s req_ids:[8] -DEBUG 06-24 20:38:23 [manager.py:391] -INFO 06-24 20:38:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:38:23 [statics_utils.py:24] mean first cost: 231.71281376109815 ms -INFO 06-24 20:38:23 [statics_utils.py:24] mean per token cost: 0.058393696401846086 ms -ERROR 06-24 20:38:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:372.70545959472656ms total_cost_time:372.74742126464844ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14252 prompt_cache_len:5151 prompt_cache_ratio:0.3614229581813079 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 -DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:23 [batch.py:51] router release req id 8 -INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10784482955932617 s -INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.10985279083251953 s -DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=92669643871483030433133368835744357579, time:1750768703.4965448s req_ids:[8] -DEBUG 06-24 20:38:23 [manager.py:391] -ERROR 06-24 20:38:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:217.27514266967773ms total_cost_time:217.3178195953369ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14253 prompt_cache_len:5151 prompt_cache_ratio:0.3613976005051568 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 -DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:23 [batch.py:51] router release req id 8 -INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s -INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.11027908325195312 s -DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=116044546114338132362674620565779797942, time:1750768703.7206903s req_ids:[8] -DEBUG 06-24 20:38:23 [manager.py:391] -ERROR 06-24 20:38:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:214.4005298614502ms total_cost_time:214.4463062286377ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14254 prompt_cache_len:5151 prompt_cache_ratio:0.36137224638697907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 -DEBUG 06-24 20:38:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:23 [batch.py:51] router release req id 8 -INFO 06-24 20:38:23 [manager.py:224] router recive req id 8 cost time 0.10813665390014648 s -INFO 06-24 20:38:23 [manager.py:68] detokenization recv req id 8 cost time 0.11005544662475586 s -DEBUG 06-24 20:38:23 [manager.py:391] Prefill Batch: batch_id=269809177401614848570507801731414439343, time:1750768703.9397151s req_ids:[8] -DEBUG 06-24 20:38:23 [manager.py:391] -ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:23 lightllm_req_id:8 first_token_cost:214.97488021850586ms total_cost_time:215.01851081848145ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14255 prompt_cache_len:5151 prompt_cache_ratio:0.36134689582602597 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 -DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:24 [batch.py:51] router release req id 8 -INFO 06-24 20:38:24 [manager.py:224] router recive req id 8 cost time 0.10752058029174805 s -INFO 06-24 20:38:24 [manager.py:68] detokenization recv req id 8 cost time 0.10942697525024414 s -DEBUG 06-24 20:38:24 [manager.py:391] Prefill Batch: batch_id=194555102607942816495725954600490454894, time:1750768704.1654363s req_ids:[8] -DEBUG 06-24 20:38:24 [manager.py:391] -ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:218.68610382080078ms total_cost_time:218.72949600219727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14256 prompt_cache_len:5151 prompt_cache_ratio:0.3613215488215488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 -DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:24 [batch.py:51] router release req id 8 -INFO 06-24 20:38:24 [manager.py:224] router recive req id 8 cost time 0.10905623435974121 s -INFO 06-24 20:38:24 [manager.py:68] detokenization recv req id 8 cost time 0.11095643043518066 s -DEBUG 06-24 20:38:24 [manager.py:391] Prefill Batch: batch_id=247994700793165251051711531568661037169, time:1750768704.387687s req_ids:[8] -DEBUG 06-24 20:38:24 [manager.py:391] -ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:210.8604907989502ms total_cost_time:210.9072208404541ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14257 prompt_cache_len:5151 prompt_cache_ratio:0.36129620537279933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 -DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:24 [batch.py:51] router release req id 8 -INFO 06-24 20:38:24 [manager.py:224] router recive req id 8 cost time 0.10790657997131348 s -INFO 06-24 20:38:24 [manager.py:68] detokenization recv req id 8 cost time 0.10964632034301758 s -DEBUG 06-24 20:38:24 [manager.py:391] Prefill Batch: batch_id=24603621795164591053890511868000210579, time:1750768704.6046593s req_ids:[8] -DEBUG 06-24 20:38:24 [manager.py:391] -ERROR 06-24 20:38:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:396.0280418395996ms total_cost_time:396.0742950439453ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14258 prompt_cache_len:5151 prompt_cache_ratio:0.3612708654790293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 -DEBUG 06-24 20:38:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:24 [batch.py:51] router release req id 8 -INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.1086738109588623 s -INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.11078739166259766 s -DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=155189836786531218277268209659706461832, time:1750768705.0076299s req_ids:[8] -DEBUG 06-24 20:38:25 [manager.py:391] -ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:24 lightllm_req_id:8 first_token_cost:219.1638946533203ms total_cost_time:219.2087173461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14259 prompt_cache_len:5151 prompt_cache_ratio:0.3612455291394909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 -DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:25 [batch.py:51] router release req id 8 -INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.10912823677062988 s -INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.1111140251159668 s -DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=35466116700310860177728498719819713323, time:1750768705.2336066s req_ids:[8] -DEBUG 06-24 20:38:25 [manager.py:391] -ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:215.57283401489258ms total_cost_time:215.61717987060547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14260 prompt_cache_len:5151 prompt_cache_ratio:0.36122019635343616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 -DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:25 [batch.py:51] router release req id 8 -INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.10834026336669922 s -INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.11022329330444336 s -DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=45711128342854309500103785881196096299, time:1750768705.4570441s req_ids:[8] -DEBUG 06-24 20:38:25 [manager.py:391] -ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:217.73505210876465ms total_cost_time:217.77820587158203ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14261 prompt_cache_len:5151 prompt_cache_ratio:0.3611948671201178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 -DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:25 [batch.py:51] router release req id 8 -INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.1098330020904541 s -INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.11208367347717285 s -DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=56983335388868975051469468574466608649, time:1750768705.680117s req_ids:[8] -DEBUG 06-24 20:38:25 [manager.py:391] -ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:213.70744705200195ms total_cost_time:213.75012397766113ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14262 prompt_cache_len:5151 prompt_cache_ratio:0.3611695414387884 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 -DEBUG 06-24 20:38:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:25 [batch.py:51] router release req id 8 -INFO 06-24 20:38:25 [manager.py:224] router recive req id 8 cost time 0.1081857681274414 s -INFO 06-24 20:38:25 [manager.py:68] detokenization recv req id 8 cost time 0.1101083755493164 s -DEBUG 06-24 20:38:25 [manager.py:391] Prefill Batch: batch_id=152675621127161317095742100715856796364, time:1750768705.9025884s req_ids:[8] -DEBUG 06-24 20:38:25 [manager.py:391] -ERROR 06-24 20:38:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:25 lightllm_req_id:8 first_token_cost:211.9448184967041ms total_cost_time:211.9908332824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14263 prompt_cache_len:5151 prompt_cache_ratio:0.36114421930870083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 -DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:26 [batch.py:51] router release req id 8 -INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10933208465576172 s -INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.11050295829772949 s -DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=112284622224081763664030157942405696724, time:1750768706.1272902s req_ids:[8] -DEBUG 06-24 20:38:26 [manager.py:391] -ERROR 06-24 20:38:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:403.594970703125ms total_cost_time:403.6386013031006ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14264 prompt_cache_len:5151 prompt_cache_ratio:0.36111890072910824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 -DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:26 [batch.py:51] router release req id 8 -INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10861468315124512 s -INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.10976266860961914 s -DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=216709563430167644707302198382835305831, time:1750768706.5282092s req_ids:[8] -DEBUG 06-24 20:38:26 [manager.py:391] -ERROR 06-24 20:38:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:212.1129035949707ms total_cost_time:212.1570110321045ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14265 prompt_cache_len:5151 prompt_cache_ratio:0.3610935856992639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 -DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:26 [batch.py:51] router release req id 8 -INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10823917388916016 s -INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.1094522476196289 s -DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=107251540734002584243757528676762937010, time:1750768706.746921s req_ids:[8] -DEBUG 06-24 20:38:26 [manager.py:391] -ERROR 06-24 20:38:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:214.02573585510254ms total_cost_time:214.06984329223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14266 prompt_cache_len:5151 prompt_cache_ratio:0.3610682742184214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 -DEBUG 06-24 20:38:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:26 [batch.py:51] router release req id 8 -INFO 06-24 20:38:26 [manager.py:224] router recive req id 8 cost time 0.10752058029174805 s -INFO 06-24 20:38:26 [manager.py:68] detokenization recv req id 8 cost time 0.10880208015441895 s -DEBUG 06-24 20:38:26 [manager.py:391] Prefill Batch: batch_id=161721564497271796386651570704644392668, time:1750768706.967214s req_ids:[8] -DEBUG 06-24 20:38:26 [manager.py:391] -ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:26 lightllm_req_id:8 first_token_cost:214.12110328674316ms total_cost_time:214.16568756103516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14267 prompt_cache_len:5151 prompt_cache_ratio:0.36104296628583443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 -DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:27 [batch.py:51] router release req id 8 -INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10801482200622559 s -INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10915827751159668 s -DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=275579050197608394297299400123904209743, time:1750768707.1871688s req_ids:[8] -DEBUG 06-24 20:38:27 [manager.py:391] -ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:174.6532917022705ms total_cost_time:174.6971607208252ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14268 prompt_cache_len:5151 prompt_cache_ratio:0.36101766190075696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 -DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:27 [batch.py:51] router release req id 8 -INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10784435272216797 s -INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10907626152038574 s -DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=322931956293872787454538929591404745921, time:1750768707.368545s req_ids:[8] -DEBUG 06-24 20:38:27 [manager.py:391] -ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:209.7318172454834ms total_cost_time:209.77401733398438ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14269 prompt_cache_len:5151 prompt_cache_ratio:0.36099236106244303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 -DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:27 [batch.py:51] router release req id 8 -INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10804176330566406 s -INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10916543006896973 s -DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=87699627785215763541076044289396657441, time:1750768707.5842388s req_ids:[8] -DEBUG 06-24 20:38:27 [manager.py:391] -ERROR 06-24 20:38:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:380.3107738494873ms total_cost_time:380.3548812866211ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14270 prompt_cache_len:5151 prompt_cache_ratio:0.36096706377014715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 -DEBUG 06-24 20:38:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:27 [batch.py:51] router release req id 8 -INFO 06-24 20:38:27 [manager.py:224] router recive req id 8 cost time 0.10809850692749023 s -INFO 06-24 20:38:27 [manager.py:68] detokenization recv req id 8 cost time 0.10928463935852051 s -DEBUG 06-24 20:38:27 [manager.py:391] Prefill Batch: batch_id=157341962118228195590479086896174324799, time:1750768707.9725165s req_ids:[8] -DEBUG 06-24 20:38:27 [manager.py:391] -DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:27 lightllm_req_id:8 first_token_cost:220.45302391052246ms total_cost_time:220.49808502197266ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14271 prompt_cache_len:5151 prompt_cache_ratio:0.36094177002312383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 -DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:28 [batch.py:51] router release req id 8 -INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.10949993133544922 s -INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.11073851585388184 s -DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=324873762278114305504208102717214895293, time:1750768708.2003026s req_ids:[8] -DEBUG 06-24 20:38:28 [manager.py:391] -ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:217.65947341918945ms total_cost_time:217.70524978637695ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14272 prompt_cache_len:5151 prompt_cache_ratio:0.3609164798206278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 -DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:28 [batch.py:51] router release req id 8 -INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.10936522483825684 s -INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s -DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=191727752977890756539975842098113679417, time:1750768708.425195s req_ids:[8] -DEBUG 06-24 20:38:28 [manager.py:391] -ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:214.97082710266113ms total_cost_time:215.01421928405762ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14273 prompt_cache_len:5151 prompt_cache_ratio:0.3608911931619141 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 -DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:28 [batch.py:51] router release req id 8 -INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.1102759838104248 s -INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.11140775680541992 s -DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=183910624031422051294651887010596331794, time:1750768708.646223s req_ids:[8] -DEBUG 06-24 20:38:28 [manager.py:391] -ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:218.31727027893066ms total_cost_time:218.36256980895996ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14274 prompt_cache_len:5151 prompt_cache_ratio:0.3608659100462379 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 -DEBUG 06-24 20:38:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:28 [batch.py:51] router release req id 8 -INFO 06-24 20:38:28 [manager.py:224] router recive req id 8 cost time 0.10822892189025879 s -INFO 06-24 20:38:28 [manager.py:68] detokenization recv req id 8 cost time 0.10950756072998047 s -DEBUG 06-24 20:38:28 [manager.py:391] Prefill Batch: batch_id=4988325219455916826580241057238871320, time:1750768708.8695588s req_ids:[8] -DEBUG 06-24 20:38:28 [manager.py:391] -ERROR 06-24 20:38:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:211.8515968322754ms total_cost_time:211.89475059509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14275 prompt_cache_len:5151 prompt_cache_ratio:0.36084063047285464 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 -DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:29 [batch.py:51] router release req id 8 -INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.3114337921142578 s -INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.31279802322387695 s -DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=67142791448226603804232932078153316326, time:1750768709.295927s req_ids:[8] -DEBUG 06-24 20:38:29 [manager.py:391] -ERROR 06-24 20:38:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:28 lightllm_req_id:8 first_token_cost:432.2316646575928ms total_cost_time:432.2776794433594ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14276 prompt_cache_len:5151 prompt_cache_ratio:0.3608153544410199 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 -DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:29 [batch.py:51] router release req id 8 -INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.10806512832641602 s -INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.10928034782409668 s -DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=175140417026085694421881953622238537700, time:1750768709.5287404s req_ids:[8] -DEBUG 06-24 20:38:29 [manager.py:391] -DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:29 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:38:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 56591.656 tokens/s -DEBUG 06-24 20:38:29 [stats.py:37] Avg prompt tokens throughput: 56583.619 tokens/s -DEBUG 06-24 20:38:29 [stats.py:37] Avg generate tokens throughput: 8.037 tokens/s -INFO 06-24 20:38:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 first_token_cost:210.80541610717773ms total_cost_time:210.85214614868164ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14277 prompt_cache_len:5151 prompt_cache_ratio:0.3607900819499895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 -DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:29 [batch.py:51] router release req id 8 -INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.108367919921875 s -INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.10947775840759277 s -DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=34754242359685825961324259708736057417, time:1750768709.7451184s req_ids:[8] -DEBUG 06-24 20:38:29 [manager.py:391] -ERROR 06-24 20:38:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 first_token_cost:216.62187576293945ms total_cost_time:216.66526794433594ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14278 prompt_cache_len:5151 prompt_cache_ratio:0.36076481299901947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 -DEBUG 06-24 20:38:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:29 [batch.py:51] router release req id 8 -INFO 06-24 20:38:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:29 [manager.py:224] router recive req id 8 cost time 0.10810661315917969 s -INFO 06-24 20:38:29 [manager.py:68] detokenization recv req id 8 cost time 0.10940933227539062 s -DEBUG 06-24 20:38:29 [manager.py:391] Prefill Batch: batch_id=300448987994262465438565765828240742488, time:1750768709.9672012s req_ids:[8] -DEBUG 06-24 20:38:29 [manager.py:391] -ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:29 lightllm_req_id:8 first_token_cost:214.07604217529297ms total_cost_time:214.11991119384766ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14279 prompt_cache_len:5151 prompt_cache_ratio:0.36073954758736604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 -DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:30 [batch.py:51] router release req id 8 -INFO 06-24 20:38:30 [manager.py:224] router recive req id 8 cost time 0.10917043685913086 s -INFO 06-24 20:38:30 [manager.py:68] detokenization recv req id 8 cost time 0.1102442741394043 s -DEBUG 06-24 20:38:30 [manager.py:391] Prefill Batch: batch_id=310330459520562265125238172196327934832, time:1750768710.1854455s req_ids:[8] -DEBUG 06-24 20:38:30 [manager.py:391] -ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:215.08336067199707ms total_cost_time:215.12675285339355ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14280 prompt_cache_len:5151 prompt_cache_ratio:0.3607142857142857 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 -DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:30 [batch.py:51] router release req id 8 -INFO 06-24 20:38:30 [manager.py:224] router recive req id 8 cost time 0.11122584342956543 s -INFO 06-24 20:38:30 [manager.py:68] detokenization recv req id 8 cost time 0.1124427318572998 s -DEBUG 06-24 20:38:30 [manager.py:391] Prefill Batch: batch_id=40110720079963559767079849327104904293, time:1750768710.4106061s req_ids:[8] -DEBUG 06-24 20:38:30 [manager.py:391] -ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:375.704288482666ms total_cost_time:375.7483959197998ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14281 prompt_cache_len:5151 prompt_cache_ratio:0.3606890273790351 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 -DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:30 [batch.py:51] router release req id 8 -INFO 06-24 20:38:30 [manager.py:224] router recive req id 8 cost time 0.1083669662475586 s -INFO 06-24 20:38:30 [manager.py:68] detokenization recv req id 8 cost time 0.10951066017150879 s -DEBUG 06-24 20:38:30 [manager.py:391] Prefill Batch: batch_id=253655184368831574883322889132720151089, time:1750768710.7917306s req_ids:[8] -DEBUG 06-24 20:38:30 [manager.py:391] -ERROR 06-24 20:38:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:218.72663497924805ms total_cost_time:218.78886222839355ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:14282 prompt_cache_len:5151 prompt_cache_ratio:0.360663772580871 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 -DEBUG 06-24 20:38:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:30 [batch.py:51] router release req id 8 -INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.10822248458862305 s -INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.1093752384185791 s -DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=300035503163366359119889520728700999557, time:1750768711.012669s req_ids:[8] -DEBUG 06-24 20:38:31 [manager.py:391] -ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:30 lightllm_req_id:8 first_token_cost:199.4013786315918ms total_cost_time:199.44429397583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14283 prompt_cache_len:5151 prompt_cache_ratio:0.3606385213190506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 -DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:31 [batch.py:51] router release req id 8 -INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.10840773582458496 s -INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.10951876640319824 s -DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=324144855044313137112862026666618971329, time:1750768711.2216167s req_ids:[8] -DEBUG 06-24 20:38:31 [manager.py:391] -ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:209.6273899078369ms total_cost_time:209.6717357635498ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14284 prompt_cache_len:5151 prompt_cache_ratio:0.36061327359283113 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 -DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:31 [batch.py:51] router release req id 8 -INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.1079256534576416 s -INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.10900187492370605 s -DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=239041606207801968215534728356550466793, time:1750768711.437036s req_ids:[8] -DEBUG 06-24 20:38:31 [manager.py:391] -ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:214.6625518798828ms total_cost_time:214.705228805542ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14285 prompt_cache_len:5151 prompt_cache_ratio:0.36058802940147006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 -DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:31 [batch.py:51] router release req id 8 -INFO 06-24 20:38:31 [manager.py:224] router recive req id 8 cost time 0.10941576957702637 s -INFO 06-24 20:38:31 [manager.py:68] detokenization recv req id 8 cost time 0.1103825569152832 s -DEBUG 06-24 20:38:31 [manager.py:391] Prefill Batch: batch_id=181353613833834285558959543886686094636, time:1750768711.6585305s req_ids:[8] -DEBUG 06-24 20:38:31 [manager.py:391] -ERROR 06-24 20:38:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:389.76001739501953ms total_cost_time:389.8048400878906ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14286 prompt_cache_len:5151 prompt_cache_ratio:0.3605627887442251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 -DEBUG 06-24 20:38:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:31 [batch.py:51] router release req id 8 -INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10941791534423828 s -INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.11041641235351562 s -DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=309688211521869080631086722958418318453, time:1750768712.0569487s req_ids:[8] -DEBUG 06-24 20:38:32 [manager.py:391] -ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:31 lightllm_req_id:8 first_token_cost:211.6374969482422ms total_cost_time:211.70282363891602ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:14287 prompt_cache_len:5151 prompt_cache_ratio:0.36053755162035417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 -DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:32 [batch.py:51] router release req id 8 -INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10897278785705566 s -INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.10994958877563477 s -DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=309401336277837939586333489106531148992, time:1750768712.2734628s req_ids:[8] -DEBUG 06-24 20:38:32 [manager.py:391] -ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:209.02371406555176ms total_cost_time:209.06734466552734ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14288 prompt_cache_len:5151 prompt_cache_ratio:0.3605123180291153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 -DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:32 [batch.py:51] router release req id 8 -INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10788369178771973 s -INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.10891580581665039 s -DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=31658981941704406071314646806836122281, time:1750768712.4873397s req_ids:[8] -DEBUG 06-24 20:38:32 [manager.py:391] -ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:208.4660530090332ms total_cost_time:208.50896835327148ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14289 prompt_cache_len:5151 prompt_cache_ratio:0.360487087969767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 -DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:32 [batch.py:51] router release req id 8 -INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s -INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.11029791831970215 s -DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=219111016566369660719693670618227443196, time:1750768712.7037668s req_ids:[8] -DEBUG 06-24 20:38:32 [manager.py:391] -ERROR 06-24 20:38:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:211.66348457336426ms total_cost_time:211.70735359191895ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14290 prompt_cache_len:5151 prompt_cache_ratio:0.36046186144156755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 -DEBUG 06-24 20:38:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:32 [batch.py:51] router release req id 8 -INFO 06-24 20:38:32 [manager.py:224] router recive req id 8 cost time 0.1091306209564209 s -INFO 06-24 20:38:32 [manager.py:68] detokenization recv req id 8 cost time 0.11019062995910645 s -DEBUG 06-24 20:38:32 [manager.py:391] Prefill Batch: batch_id=49863428173590249347122873350681298774, time:1750768712.9218888s req_ids:[8] -DEBUG 06-24 20:38:32 [manager.py:391] -ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:32 lightllm_req_id:8 first_token_cost:214.41268920898438ms total_cost_time:214.45703506469727ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14291 prompt_cache_len:5151 prompt_cache_ratio:0.3604366384437758 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 -DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:33 [batch.py:51] router release req id 8 -INFO 06-24 20:38:33 [manager.py:224] router recive req id 8 cost time 0.10806941986083984 s -INFO 06-24 20:38:33 [manager.py:68] detokenization recv req id 8 cost time 0.10908055305480957 s -DEBUG 06-24 20:38:33 [manager.py:391] Prefill Batch: batch_id=230975982761298293206945226499347334751, time:1750768713.1429605s req_ids:[8] -DEBUG 06-24 20:38:33 [manager.py:391] -ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:214.41173553466797ms total_cost_time:214.45560455322266ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14292 prompt_cache_len:5151 prompt_cache_ratio:0.3604114189756507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 -DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:33 [batch.py:51] router release req id 8 -INFO 06-24 20:38:33 [manager.py:224] router recive req id 8 cost time 0.3129589557647705 s -INFO 06-24 20:38:33 [manager.py:68] detokenization recv req id 8 cost time 0.31403517723083496 s -DEBUG 06-24 20:38:33 [manager.py:391] Prefill Batch: batch_id=227229248147400051197510052849323221991, time:1750768713.570737s req_ids:[8] -DEBUG 06-24 20:38:33 [manager.py:391] -ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:420.8519458770752ms total_cost_time:420.8977222442627ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14293 prompt_cache_len:5151 prompt_cache_ratio:0.3603862030364514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 -DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:33 [batch.py:51] router release req id 8 -INFO 06-24 20:38:33 [manager.py:224] router recive req id 8 cost time 0.10899615287780762 s -INFO 06-24 20:38:33 [manager.py:68] detokenization recv req id 8 cost time 0.1100611686706543 s -DEBUG 06-24 20:38:33 [manager.py:391] Prefill Batch: batch_id=198740624024640700304364276331600200328, time:1750768713.7922153s req_ids:[8] -DEBUG 06-24 20:38:33 [manager.py:391] -ERROR 06-24 20:38:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:217.24343299865723ms total_cost_time:217.28777885437012ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14294 prompt_cache_len:5151 prompt_cache_ratio:0.36036099062543725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 -DEBUG 06-24 20:38:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:33 [batch.py:51] router release req id 8 -INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.10907673835754395 s -INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.11010622978210449 s -DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=308849611877324709313044845432754192573, time:1750768714.0164826s req_ids:[8] -DEBUG 06-24 20:38:34 [manager.py:391] -ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:33 lightllm_req_id:8 first_token_cost:216.5999412536621ms total_cost_time:216.66264533996582ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:14295 prompt_cache_len:5151 prompt_cache_ratio:0.3603357817418678 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 -DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:34 [batch.py:51] router release req id 8 -INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.10914731025695801 s -INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.11021780967712402 s -DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=7298113297532464827488932196760374987, time:1750768714.2403963s req_ids:[8] -DEBUG 06-24 20:38:34 [manager.py:391] -ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:215.7299518585205ms total_cost_time:215.7726287841797ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14296 prompt_cache_len:5151 prompt_cache_ratio:0.3603105763850028 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 -DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:34 [batch.py:51] router release req id 8 -INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.1101541519165039 s -INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.11122703552246094 s -DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=179995775319010562555444887567852153153, time:1750768714.4633112s req_ids:[8] -DEBUG 06-24 20:38:34 [manager.py:391] -ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:214.10846710205078ms total_cost_time:214.15162086486816ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14297 prompt_cache_len:5151 prompt_cache_ratio:0.36028537455410226 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 -DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:34 [batch.py:51] router release req id 8 -INFO 06-24 20:38:34 [manager.py:224] router recive req id 8 cost time 0.10875439643859863 s -INFO 06-24 20:38:34 [manager.py:68] detokenization recv req id 8 cost time 0.10984134674072266 s -DEBUG 06-24 20:38:34 [manager.py:391] Prefill Batch: batch_id=12361914905767672978511222016360947709, time:1750768714.6851068s req_ids:[8] -DEBUG 06-24 20:38:34 [manager.py:391] -ERROR 06-24 20:38:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:393.5692310333252ms total_cost_time:393.6131000518799ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14298 prompt_cache_len:5151 prompt_cache_ratio:0.3602601762484264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 -DEBUG 06-24 20:38:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:34 [batch.py:51] router release req id 8 -INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10855221748352051 s -INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.10959458351135254 s -DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=36879388784814690014910353589929816195, time:1750768715.082922s req_ids:[8] -DEBUG 06-24 20:38:35 [manager.py:391] -ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:34 lightllm_req_id:8 first_token_cost:218.28532218933105ms total_cost_time:218.34087371826172ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:14299 prompt_cache_len:5151 prompt_cache_ratio:0.3602349814672355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 -DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:35 [batch.py:51] router release req id 8 -INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10866689682006836 s -INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.10971522331237793 s -DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=336465445534868957057072612396440288369, time:1750768715.3080647s req_ids:[8] -DEBUG 06-24 20:38:35 [manager.py:391] -ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:217.6058292388916ms total_cost_time:217.6499366760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14300 prompt_cache_len:5151 prompt_cache_ratio:0.3602097902097902 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 -DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:35 [batch.py:51] router release req id 8 -INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.1085505485534668 s -INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s -DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=35839447325684058841472605588877985357, time:1750768715.5309417s req_ids:[8] -DEBUG 06-24 20:38:35 [manager.py:391] -ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:218.49703788757324ms total_cost_time:218.54186058044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14301 prompt_cache_len:5151 prompt_cache_ratio:0.3601846024753514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 -DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:35 [batch.py:51] router release req id 8 -INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10947799682617188 s -INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.11055231094360352 s -DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=39799943014911740192810342187867965428, time:1750768715.7557387s req_ids:[8] -DEBUG 06-24 20:38:35 [manager.py:391] -ERROR 06-24 20:38:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:213.4249210357666ms total_cost_time:213.4687900543213ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14302 prompt_cache_len:5151 prompt_cache_ratio:0.36015941826318 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 -DEBUG 06-24 20:38:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:35 [batch.py:51] router release req id 8 -INFO 06-24 20:38:35 [manager.py:224] router recive req id 8 cost time 0.10860371589660645 s -INFO 06-24 20:38:35 [manager.py:68] detokenization recv req id 8 cost time 0.10959219932556152 s -DEBUG 06-24 20:38:35 [manager.py:391] Prefill Batch: batch_id=27625006614332878305967706236117741305, time:1750768715.979675s req_ids:[8] -DEBUG 06-24 20:38:35 [manager.py:391] -ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:35 lightllm_req_id:8 first_token_cost:216.233491897583ms total_cost_time:216.2764072418213ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14303 prompt_cache_len:5151 prompt_cache_ratio:0.3601342375725372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 -DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:36 [batch.py:51] router release req id 8 -INFO 06-24 20:38:36 [manager.py:224] router recive req id 8 cost time 0.10930061340332031 s -INFO 06-24 20:38:36 [manager.py:68] detokenization recv req id 8 cost time 0.11041665077209473 s -DEBUG 06-24 20:38:36 [manager.py:391] Prefill Batch: batch_id=273084326860973483868345642432186432935, time:1750768716.1987016s req_ids:[8] -DEBUG 06-24 20:38:36 [manager.py:391] -ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:414.10279273986816ms total_cost_time:414.14809226989746ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14304 prompt_cache_len:5151 prompt_cache_ratio:0.3601090604026846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 -DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:36 [batch.py:51] router release req id 8 -INFO 06-24 20:38:36 [manager.py:224] router recive req id 8 cost time 0.10906744003295898 s -INFO 06-24 20:38:36 [manager.py:68] detokenization recv req id 8 cost time 0.11020445823669434 s -DEBUG 06-24 20:38:36 [manager.py:391] Prefill Batch: batch_id=71303769000114458801445844147046321594, time:1750768716.62154s req_ids:[8] -DEBUG 06-24 20:38:36 [manager.py:391] -ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:214.60890769958496ms total_cost_time:214.65349197387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14305 prompt_cache_len:5151 prompt_cache_ratio:0.3600838867528836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 -DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:36 [batch.py:51] router release req id 8 -INFO 06-24 20:38:36 [manager.py:224] router recive req id 8 cost time 0.10835433006286621 s -INFO 06-24 20:38:36 [manager.py:68] detokenization recv req id 8 cost time 0.1094822883605957 s -DEBUG 06-24 20:38:36 [manager.py:391] Prefill Batch: batch_id=19304856825515355643406279628975286621, time:1750768716.841561s req_ids:[8] -DEBUG 06-24 20:38:36 [manager.py:391] -ERROR 06-24 20:38:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:215.19875526428223ms total_cost_time:215.2426242828369ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14306 prompt_cache_len:5151 prompt_cache_ratio:0.3600587166223962 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 -DEBUG 06-24 20:38:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:36 [batch.py:51] router release req id 8 -INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.10806012153625488 s -INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.10912823677062988 s -DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=22267125484041152242923526200289025931, time:1750768717.0650477s req_ids:[8] -DEBUG 06-24 20:38:37 [manager.py:391] -ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:36 lightllm_req_id:8 first_token_cost:215.39068222045898ms total_cost_time:215.43431282043457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14307 prompt_cache_len:5151 prompt_cache_ratio:0.36003355001048437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 -DEBUG 06-24 20:38:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:37 [batch.py:51] router release req id 8 -INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.10772204399108887 s -INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.10873198509216309 s -DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=337614331400476825843764712804249121711, time:1750768717.286842s req_ids:[8] -DEBUG 06-24 20:38:37 [manager.py:391] -DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:37 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:213.7451171875ms total_cost_time:213.78827095031738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14308 prompt_cache_len:5151 prompt_cache_ratio:0.3600083869164104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 -DEBUG 06-24 20:38:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:37 [batch.py:51] router release req id 8 -INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.1089785099029541 s -INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.11010479927062988 s -DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=238386048792173809608014555876239157672, time:1750768717.5062656s req_ids:[8] -DEBUG 06-24 20:38:37 [manager.py:391] -ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:214.6320343017578ms total_cost_time:214.6759033203125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14309 prompt_cache_len:5151 prompt_cache_ratio:0.3599832273394367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 -DEBUG 06-24 20:38:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:37 [batch.py:51] router release req id 8 -INFO 06-24 20:38:37 [manager.py:224] router recive req id 8 cost time 0.10711240768432617 s -INFO 06-24 20:38:37 [manager.py:68] detokenization recv req id 8 cost time 0.10788607597351074 s -DEBUG 06-24 20:38:37 [manager.py:391] Prefill Batch: batch_id=187270018968756731327443780054617100492, time:1750768717.7275078s req_ids:[8] -DEBUG 06-24 20:38:37 [manager.py:391] -ERROR 06-24 20:38:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:358.7324619293213ms total_cost_time:358.7765693664551ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14310 prompt_cache_len:5151 prompt_cache_ratio:0.359958071278826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 -DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:38 [batch.py:51] router release req id 8 -INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.1081550121307373 s -INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10900640487670898 s -DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=207658275690279995385187150600872365173, time:1750768718.0923839s req_ids:[8] -DEBUG 06-24 20:38:38 [manager.py:391] -ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:37 lightllm_req_id:8 first_token_cost:199.57256317138672ms total_cost_time:199.6157169342041ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14311 prompt_cache_len:5151 prompt_cache_ratio:0.3599329187338411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 -DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:38 [batch.py:51] router release req id 8 -INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.1082143783569336 s -INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10925483703613281 s -DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=203597121101678351338611651429530960445, time:1750768718.2991407s req_ids:[8] -DEBUG 06-24 20:38:38 [manager.py:391] -ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:212.03374862670898ms total_cost_time:212.07880973815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14312 prompt_cache_len:5151 prompt_cache_ratio:0.3599077697037451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 -DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:38 [batch.py:51] router release req id 8 -INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.1079714298248291 s -INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10901999473571777 s -DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=32048459086076373550834889312118255152, time:1750768718.5168228s req_ids:[8] -DEBUG 06-24 20:38:38 [manager.py:391] -ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:210.1573944091797ms total_cost_time:210.20150184631348ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14313 prompt_cache_len:5151 prompt_cache_ratio:0.3598826241878013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 -DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:38 [batch.py:51] router release req id 8 -INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.10927653312683105 s -INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102447509765625 s -DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=47952708225707439984615417012857531081, time:1750768718.7343922s req_ids:[8] -DEBUG 06-24 20:38:38 [manager.py:391] -ERROR 06-24 20:38:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:210.90412139892578ms total_cost_time:210.94512939453125ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14314 prompt_cache_len:5151 prompt_cache_ratio:0.35985748218527314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 -DEBUG 06-24 20:38:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:38 [batch.py:51] router release req id 8 -INFO 06-24 20:38:38 [manager.py:224] router recive req id 8 cost time 0.10811734199523926 s -INFO 06-24 20:38:38 [manager.py:68] detokenization recv req id 8 cost time 0.10915422439575195 s -DEBUG 06-24 20:38:38 [manager.py:391] Prefill Batch: batch_id=161412611603861747931595961641859918496, time:1750768718.961354s req_ids:[8] -DEBUG 06-24 20:38:38 [manager.py:391] -ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:38 lightllm_req_id:8 first_token_cost:225.05903244018555ms total_cost_time:225.10457038879395ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14315 prompt_cache_len:5151 prompt_cache_ratio:0.3598323436954244 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 -DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:39 [batch.py:51] router release req id 8 -INFO 06-24 20:38:39 [manager.py:224] router recive req id 8 cost time 0.3109610080718994 s -INFO 06-24 20:38:39 [manager.py:68] detokenization recv req id 8 cost time 0.3121209144592285 s -DEBUG 06-24 20:38:39 [manager.py:391] Prefill Batch: batch_id=309286886493428932844529532691085835534, time:1750768719.388381s req_ids:[8] -DEBUG 06-24 20:38:39 [manager.py:391] -ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:425.7972240447998ms total_cost_time:425.8413314819336ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14316 prompt_cache_len:5151 prompt_cache_ratio:0.35980720871751887 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 -DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:39 [batch.py:51] router release req id 8 -INFO 06-24 20:38:39 [manager.py:224] router recive req id 8 cost time 0.10911822319030762 s -INFO 06-24 20:38:39 [manager.py:68] detokenization recv req id 8 cost time 0.11029577255249023 s -DEBUG 06-24 20:38:39 [manager.py:391] Prefill Batch: batch_id=113495359027572542813921866568586584317, time:1750768719.6173482s req_ids:[8] -DEBUG 06-24 20:38:39 [manager.py:391] -ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:38:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 56654.062 tokens/s -DEBUG 06-24 20:38:39 [stats.py:37] Avg prompt tokens throughput: 56646.138 tokens/s -DEBUG 06-24 20:38:39 [stats.py:37] Avg generate tokens throughput: 7.924 tokens/s -INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:214.89906311035156ms total_cost_time:214.94364738464355ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14317 prompt_cache_len:5151 prompt_cache_ratio:0.3597820772508207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 -DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:39 [batch.py:51] router release req id 8 -INFO 06-24 20:38:39 [manager.py:224] router recive req id 8 cost time 0.1083827018737793 s -INFO 06-24 20:38:39 [manager.py:68] detokenization recv req id 8 cost time 0.10938668251037598 s -DEBUG 06-24 20:38:39 [manager.py:391] Prefill Batch: batch_id=219431252192268158481018098232315991305, time:1750768719.8396358s req_ids:[8] -DEBUG 06-24 20:38:39 [manager.py:391] -ERROR 06-24 20:38:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:217.25726127624512ms total_cost_time:217.3008918762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14318 prompt_cache_len:5151 prompt_cache_ratio:0.35975694929459423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 -DEBUG 06-24 20:38:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:39 [batch.py:51] router release req id 8 -INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s -INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.10981059074401855 s -DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=292733695753435359011656294375156431344, time:1750768720.062112s req_ids:[8] -DEBUG 06-24 20:38:40 [manager.py:391] -ERROR 06-24 20:38:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:39 lightllm_req_id:8 first_token_cost:214.77007865905762ms total_cost_time:214.8122787475586ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14319 prompt_cache_len:5151 prompt_cache_ratio:0.3597318248481039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 -DEBUG 06-24 20:38:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:40 [batch.py:51] router release req id 8 -INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10806465148925781 s -INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.10899233818054199 s -DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=118814785872162620705492512118275358047, time:1750768720.284169s req_ids:[8] -DEBUG 06-24 20:38:40 [manager.py:391] -ERROR 06-24 20:38:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 first_token_cost:213.90652656555176ms total_cost_time:213.9265537261963ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:14320 prompt_cache_len:5151 prompt_cache_ratio:0.35970670391061454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 -DEBUG 06-24 20:38:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:40 [batch.py:51] router release req id 8 -INFO 06-24 20:38:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10858964920043945 s -INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.10946893692016602 s -DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=259563674301579742299194253949386000473, time:1750768720.5051982s req_ids:[8] -DEBUG 06-24 20:38:40 [manager.py:391] -ERROR 06-24 20:38:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 first_token_cost:402.8322696685791ms total_cost_time:402.8756618499756ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14321 prompt_cache_len:5151 prompt_cache_ratio:0.359681586481391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 -DEBUG 06-24 20:38:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:40 [batch.py:51] router release req id 8 -INFO 06-24 20:38:40 [manager.py:224] router recive req id 8 cost time 0.10938239097595215 s -INFO 06-24 20:38:40 [manager.py:68] detokenization recv req id 8 cost time 0.11044192314147949 s -DEBUG 06-24 20:38:40 [manager.py:391] Prefill Batch: batch_id=144214591891764007995838349096065152453, time:1750768720.9140751s req_ids:[8] -DEBUG 06-24 20:38:40 [manager.py:391] -ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:40 lightllm_req_id:8 first_token_cost:216.8586254119873ms total_cost_time:216.9036865234375ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14322 prompt_cache_len:5151 prompt_cache_ratio:0.35965647255969835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 -DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:41 [batch.py:51] router release req id 8 -INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.1103208065032959 s -INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.11131978034973145 s -DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=337344520444635864729608261909324467746, time:1750768721.1398907s req_ids:[8] -DEBUG 06-24 20:38:41 [manager.py:391] -ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:214.6289348602295ms total_cost_time:214.68758583068848ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:14323 prompt_cache_len:5151 prompt_cache_ratio:0.35963136214480207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 -DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:41 [batch.py:51] router release req id 8 -INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.1081228256225586 s -INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.10912561416625977 s -DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=65726297902167035493710652419074302266, time:1750768721.3595438s req_ids:[8] -DEBUG 06-24 20:38:41 [manager.py:391] -ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:215.2099609375ms total_cost_time:215.254545211792ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14324 prompt_cache_len:5151 prompt_cache_ratio:0.3596062552359676 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 -DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:41 [batch.py:51] router release req id 8 -INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.1081395149230957 s -INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.10915255546569824 s -DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=10392302593517550863032708739264419999, time:1750768721.5847633s req_ids:[8] -DEBUG 06-24 20:38:41 [manager.py:391] -ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:214.8585319519043ms total_cost_time:214.88118171691895ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:14325 prompt_cache_len:5151 prompt_cache_ratio:0.35958115183246075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 -DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:41 [batch.py:51] router release req id 8 -INFO 06-24 20:38:41 [manager.py:224] router recive req id 8 cost time 0.10678553581237793 s -INFO 06-24 20:38:41 [manager.py:68] detokenization recv req id 8 cost time 0.10784339904785156 s -DEBUG 06-24 20:38:41 [manager.py:391] Prefill Batch: batch_id=123777851548131639359384620110996058923, time:1750768721.802933s req_ids:[8] -DEBUG 06-24 20:38:41 [manager.py:391] -ERROR 06-24 20:38:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:216.5658473968506ms total_cost_time:216.61043167114258ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14326 prompt_cache_len:5151 prompt_cache_ratio:0.3595560519335474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 -DEBUG 06-24 20:38:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:41 [batch.py:51] router release req id 8 -INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.10779237747192383 s -INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.10865306854248047 s -DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=331469391169738333711562845954962360877, time:1750768722.0369036s req_ids:[8] -DEBUG 06-24 20:38:42 [manager.py:391] -ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:41 lightllm_req_id:8 first_token_cost:358.19363594055176ms total_cost_time:358.23774337768555ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14327 prompt_cache_len:5151 prompt_cache_ratio:0.35953095553849374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 -DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:42 [batch.py:51] router release req id 8 -INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.1087493896484375 s -INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.10979509353637695 s -DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=81601714097032354123917292213753152592, time:1750768722.3929098s req_ids:[8] -DEBUG 06-24 20:38:42 [manager.py:391] -ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:216.73178672790527ms total_cost_time:216.75372123718262ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:14328 prompt_cache_len:5151 prompt_cache_ratio:0.35950586264656614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 -DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:42 [batch.py:51] router release req id 8 -INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.10896158218383789 s -INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.11008810997009277 s -DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=218284273525638454670084978330504081928, time:1750768722.6141953s req_ids:[8] -DEBUG 06-24 20:38:42 [manager.py:391] -ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:212.10265159606934ms total_cost_time:212.14795112609863ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14329 prompt_cache_len:5151 prompt_cache_ratio:0.3594807732570312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 -DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:42 [batch.py:51] router release req id 8 -INFO 06-24 20:38:42 [manager.py:224] router recive req id 8 cost time 0.10892558097839355 s -INFO 06-24 20:38:42 [manager.py:68] detokenization recv req id 8 cost time 0.11002683639526367 s -DEBUG 06-24 20:38:42 [manager.py:391] Prefill Batch: batch_id=157728436992794349873683003312163441299, time:1750768722.8347185s req_ids:[8] -DEBUG 06-24 20:38:42 [manager.py:391] -ERROR 06-24 20:38:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:216.9938087463379ms total_cost_time:217.03696250915527ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14330 prompt_cache_len:5151 prompt_cache_ratio:0.3594556873691556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 -DEBUG 06-24 20:38:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:42 [batch.py:51] router release req id 8 -INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.1075901985168457 s -INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.10854625701904297 s -DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=252808046266179192708425490443751566868, time:1750768723.0545304s req_ids:[8] -DEBUG 06-24 20:38:43 [manager.py:391] -ERROR 06-24 20:38:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:42 lightllm_req_id:8 first_token_cost:216.59159660339355ms total_cost_time:216.63522720336914ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14331 prompt_cache_len:5151 prompt_cache_ratio:0.3594306049822064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 -DEBUG 06-24 20:38:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:43 [batch.py:51] router release req id 8 -INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.10807180404663086 s -INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.11003780364990234 s -DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=66916177527109151275972490958749537861, time:1750768723.2817373s req_ids:[8] -DEBUG 06-24 20:38:43 [manager.py:391] -ERROR 06-24 20:38:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 first_token_cost:393.784761428833ms total_cost_time:393.829345703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14332 prompt_cache_len:5151 prompt_cache_ratio:0.35940552609545073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 -DEBUG 06-24 20:38:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:43 [batch.py:51] router release req id 8 -DEBUG 06-24 20:38:43 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:43 [manager.py:283] -DEBUG 06-24 20:38:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:43 [manager.py:284] -INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.10848855972290039 s -INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.11046767234802246 s -DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=31774115044789403844710917488938268970, time:1750768723.6784708s req_ids:[8] -DEBUG 06-24 20:38:43 [manager.py:391] -ERROR 06-24 20:38:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 first_token_cost:217.55647659301758ms total_cost_time:217.60034561157227ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14333 prompt_cache_len:5151 prompt_cache_ratio:0.359380450708156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 -DEBUG 06-24 20:38:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:43 [batch.py:51] router release req id 8 -INFO 06-24 20:38:43 [manager.py:224] router recive req id 8 cost time 0.1088707447052002 s -INFO 06-24 20:38:43 [manager.py:68] detokenization recv req id 8 cost time 0.1108546257019043 s -DEBUG 06-24 20:38:43 [manager.py:391] Prefill Batch: batch_id=164320431104385602624799453999687375411, time:1750768723.9022744s req_ids:[8] -DEBUG 06-24 20:38:43 [manager.py:391] -ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:43 lightllm_req_id:8 first_token_cost:222.16558456420898ms total_cost_time:222.21136093139648ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14334 prompt_cache_len:5151 prompt_cache_ratio:0.3593553788195898 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 -DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:44 [batch.py:51] router release req id 8 -INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10790753364562988 s -INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.10989141464233398 s -DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=181260337437464389101440027708675598326, time:1750768724.1426075s req_ids:[8] -DEBUG 06-24 20:38:44 [manager.py:391] -ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:231.49895668029785ms total_cost_time:231.54497146606445ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14335 prompt_cache_len:5151 prompt_cache_ratio:0.35933031042901986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 -DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:44 [batch.py:51] router release req id 8 -INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10909438133239746 s -INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.11114954948425293 s -DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=190273325535456149013251387398130289637, time:1750768724.3705008s req_ids:[8] -DEBUG 06-24 20:38:44 [manager.py:391] -ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:214.81633186340332ms total_cost_time:214.8590087890625ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14336 prompt_cache_len:5151 prompt_cache_ratio:0.3593052455357143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 -DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:44 [batch.py:51] router release req id 8 -INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10935759544372559 s -INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s -DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=208737211068103266627193242115968320951, time:1750768724.5907636s req_ids:[8] -DEBUG 06-24 20:38:44 [manager.py:391] -ERROR 06-24 20:38:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:391.3564682006836ms total_cost_time:391.4015293121338ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14337 prompt_cache_len:5151 prompt_cache_ratio:0.3592801841389412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 -DEBUG 06-24 20:38:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:44 [batch.py:51] router release req id 8 -INFO 06-24 20:38:44 [manager.py:224] router recive req id 8 cost time 0.10973048210144043 s -INFO 06-24 20:38:44 [manager.py:68] detokenization recv req id 8 cost time 0.11203813552856445 s -DEBUG 06-24 20:38:44 [manager.py:391] Prefill Batch: batch_id=219038643703275278225190310142744438, time:1750768724.9901671s req_ids:[8] -DEBUG 06-24 20:38:44 [manager.py:391] -ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:44 lightllm_req_id:8 first_token_cost:221.30727767944336ms total_cost_time:221.35186195373535ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14338 prompt_cache_len:5151 prompt_cache_ratio:0.359255126237969 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 -DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:45 [batch.py:51] router release req id 8 -INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.10908961296081543 s -INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.11129379272460938 s -DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=108923366629408673944393699936749510904, time:1750768725.21533s req_ids:[8] -DEBUG 06-24 20:38:45 [manager.py:391] -DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:45 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:216.20893478393555ms total_cost_time:216.25328063964844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14339 prompt_cache_len:5151 prompt_cache_ratio:0.3592300718320664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 -DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:45 [batch.py:51] router release req id 8 -INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.1085367202758789 s -INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.11077666282653809 s -DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=308632465537945908575211534247480297205, time:1750768725.4390216s req_ids:[8] -DEBUG 06-24 20:38:45 [manager.py:391] -ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:214.71166610717773ms total_cost_time:214.75553512573242ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14340 prompt_cache_len:5151 prompt_cache_ratio:0.3592050209205021 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 -DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:45 [batch.py:51] router release req id 8 -INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.10823202133178711 s -INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.11020278930664062 s -DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=58550232841298009406527569958720847015, time:1750768725.6590996s req_ids:[8] -DEBUG 06-24 20:38:45 [manager.py:391] -ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:216.17555618286133ms total_cost_time:216.2187099456787ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14341 prompt_cache_len:5151 prompt_cache_ratio:0.35917997350254516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 -DEBUG 06-24 20:38:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:45 [batch.py:51] router release req id 8 -INFO 06-24 20:38:45 [manager.py:224] router recive req id 8 cost time 0.1077420711517334 s -INFO 06-24 20:38:45 [manager.py:68] detokenization recv req id 8 cost time 0.10983705520629883 s -DEBUG 06-24 20:38:45 [manager.py:391] Prefill Batch: batch_id=291740147306535194174220705649007674927, time:1750768725.8828664s req_ids:[8] -DEBUG 06-24 20:38:45 [manager.py:391] -ERROR 06-24 20:38:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:216.9952392578125ms total_cost_time:217.03863143920898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14342 prompt_cache_len:5151 prompt_cache_ratio:0.3591549295774648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 -DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:46 [batch.py:51] router release req id 8 -INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.10864138603210449 s -INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11047554016113281 s -DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=294307543644076669893561398143932687949, time:1750768726.1049736s req_ids:[8] -DEBUG 06-24 20:38:46 [manager.py:391] -ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:45 lightllm_req_id:8 first_token_cost:356.8878173828125ms total_cost_time:356.9328784942627ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14343 prompt_cache_len:5151 prompt_cache_ratio:0.3591298891445304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 -DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:46 [batch.py:51] router release req id 8 -INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.10932731628417969 s -INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11153793334960938 s -DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=291812422708849606056610866637691662713, time:1750768726.4673378s req_ids:[8] -DEBUG 06-24 20:38:46 [manager.py:391] -ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:209.23542976379395ms total_cost_time:209.28144454956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14344 prompt_cache_len:5151 prompt_cache_ratio:0.3591048522030117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 -DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:46 [batch.py:51] router release req id 8 -INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.1083974838256836 s -INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11069989204406738 s -DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=38303038482268747395093378139617829839, time:1750768726.6878989s req_ids:[8] -DEBUG 06-24 20:38:46 [manager.py:391] -ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:177.03914642333984ms total_cost_time:177.08253860473633ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14345 prompt_cache_len:5151 prompt_cache_ratio:0.3590798187521785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 -DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:46 [batch.py:51] router release req id 8 -INFO 06-24 20:38:46 [manager.py:224] router recive req id 8 cost time 0.10903286933898926 s -INFO 06-24 20:38:46 [manager.py:68] detokenization recv req id 8 cost time 0.11106562614440918 s -DEBUG 06-24 20:38:46 [manager.py:391] Prefill Batch: batch_id=333635470932938951419752778690552129194, time:1750768726.8706748s req_ids:[8] -DEBUG 06-24 20:38:46 [manager.py:391] -ERROR 06-24 20:38:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:205.79886436462402ms total_cost_time:205.8432102203369ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14346 prompt_cache_len:5151 prompt_cache_ratio:0.3590547887913007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 -DEBUG 06-24 20:38:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:46 [batch.py:51] router release req id 8 -INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10878229141235352 s -INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.11083340644836426 s -DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=240321357164904059742514231423195958365, time:1750768727.0804899s req_ids:[8] -DEBUG 06-24 20:38:47 [manager.py:391] -INFO 06-24 20:38:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:46 lightllm_req_id:8 first_token_cost:209.63644981384277ms total_cost_time:209.68294143676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14347 prompt_cache_len:5151 prompt_cache_ratio:0.3590297623196487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 -DEBUG 06-24 20:38:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:47 [batch.py:51] router release req id 8 -INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10815811157226562 s -INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.1102440357208252 s -DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=291782924917449856868981054640719534167, time:1750768727.2972672s req_ids:[8] -DEBUG 06-24 20:38:47 [manager.py:391] -ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 first_token_cost:381.78181648254395ms total_cost_time:381.82711601257324ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14348 prompt_cache_len:5151 prompt_cache_ratio:0.3590047393364929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 -DEBUG 06-24 20:38:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:47 [batch.py:51] router release req id 8 -INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10808205604553223 s -INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.11005401611328125 s -DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=187372096983019345673797922481341311801, time:1750768727.6876829s req_ids:[8] -DEBUG 06-24 20:38:47 [manager.py:391] -ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 first_token_cost:210.90936660766602ms total_cost_time:210.9541893005371ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14349 prompt_cache_len:5151 prompt_cache_ratio:0.3589797198411039 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 -DEBUG 06-24 20:38:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:47 [batch.py:51] router release req id 8 -INFO 06-24 20:38:47 [manager.py:224] router recive req id 8 cost time 0.10875988006591797 s -INFO 06-24 20:38:47 [manager.py:68] detokenization recv req id 8 cost time 0.11075282096862793 s -DEBUG 06-24 20:38:47 [manager.py:391] Prefill Batch: batch_id=206689769635735971066669334592737485356, time:1750768727.9021838s req_ids:[8] -DEBUG 06-24 20:38:47 [manager.py:391] -ERROR 06-24 20:38:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:47 lightllm_req_id:8 first_token_cost:212.2800350189209ms total_cost_time:212.324857711792ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14350 prompt_cache_len:5151 prompt_cache_ratio:0.35895470383275263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 -DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:48 [batch.py:51] router release req id 8 -INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10795807838439941 s -INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.10990285873413086 s -DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=183360311164322756586402179982383911460, time:1750768728.1225817s req_ids:[8] -DEBUG 06-24 20:38:48 [manager.py:391] -ERROR 06-24 20:38:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:217.41986274719238ms total_cost_time:217.46373176574707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14351 prompt_cache_len:5151 prompt_cache_ratio:0.35892969131071006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 -DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:48 [batch.py:51] router release req id 8 -INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s -INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.11015558242797852 s -DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=159635365050810072896697432373743114288, time:1750768728.3529146s req_ids:[8] -DEBUG 06-24 20:38:48 [manager.py:391] -ERROR 06-24 20:38:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:225.53491592407227ms total_cost_time:225.57783126831055ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14352 prompt_cache_len:5151 prompt_cache_ratio:0.3589046822742475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 -DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:48 [batch.py:51] router release req id 8 -INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10923480987548828 s -INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.11126923561096191 s -DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=204380967859539168744423039056410633516, time:1750768728.5794687s req_ids:[8] -DEBUG 06-24 20:38:48 [manager.py:391] -ERROR 06-24 20:38:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:218.0943489074707ms total_cost_time:218.1377410888672ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14353 prompt_cache_len:5151 prompt_cache_ratio:0.3588796767226364 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 -DEBUG 06-24 20:38:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:48 [batch.py:51] router release req id 8 -INFO 06-24 20:38:48 [manager.py:224] router recive req id 8 cost time 0.10816001892089844 s -INFO 06-24 20:38:48 [manager.py:68] detokenization recv req id 8 cost time 0.11024284362792969 s -DEBUG 06-24 20:38:48 [manager.py:391] Prefill Batch: batch_id=75900295801764187184313746588473686482, time:1750768728.8027496s req_ids:[8] -DEBUG 06-24 20:38:48 [manager.py:391] -ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:48 lightllm_req_id:8 first_token_cost:382.25293159484863ms total_cost_time:382.29846954345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14354 prompt_cache_len:5151 prompt_cache_ratio:0.35885467465514836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 -DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:49 [batch.py:51] router release req id 8 -INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.10794353485107422 s -INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.11008167266845703 s -DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=219203778563677880315393675278291998858, time:1750768729.1941311s req_ids:[8] -DEBUG 06-24 20:38:49 [manager.py:391] -ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:219.7129726409912ms total_cost_time:219.7573184967041ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14355 prompt_cache_len:5151 prompt_cache_ratio:0.35882967607105537 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 -DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:49 [batch.py:51] router release req id 8 -INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.10832571983337402 s -INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.11036443710327148 s -DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=164259472469265572822914066443300393774, time:1750768729.4173343s req_ids:[8] -DEBUG 06-24 20:38:49 [manager.py:391] -ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:215.66057205200195ms total_cost_time:215.70396423339844ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14356 prompt_cache_len:5151 prompt_cache_ratio:0.3588046809696294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 -DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:49 [batch.py:51] router release req id 8 -INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.10794687271118164 s -INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.11008763313293457 s -DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=230559937663556200687943893613543661208, time:1750768729.6427717s req_ids:[8] -DEBUG 06-24 20:38:49 [manager.py:391] -ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:38:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 57208.811 tokens/s -DEBUG 06-24 20:38:49 [stats.py:37] Avg prompt tokens throughput: 57200.832 tokens/s -DEBUG 06-24 20:38:49 [stats.py:37] Avg generate tokens throughput: 7.979 tokens/s -INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:217.7128791809082ms total_cost_time:217.7567481994629ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14357 prompt_cache_len:5151 prompt_cache_ratio:0.35877968935014276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 -DEBUG 06-24 20:38:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:49 [batch.py:51] router release req id 8 -INFO 06-24 20:38:49 [manager.py:224] router recive req id 8 cost time 0.1080632209777832 s -INFO 06-24 20:38:49 [manager.py:68] detokenization recv req id 8 cost time 0.10985755920410156 s -DEBUG 06-24 20:38:49 [manager.py:391] Prefill Batch: batch_id=42220058573074296046002228521921814187, time:1750768729.8654232s req_ids:[8] -DEBUG 06-24 20:38:49 [manager.py:391] -ERROR 06-24 20:38:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:218.34492683410645ms total_cost_time:218.39022636413574ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14358 prompt_cache_len:5151 prompt_cache_ratio:0.3587547012118679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 -DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:50 [batch.py:51] router release req id 8 -INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.10917210578918457 s -INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.11118721961975098 s -DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=87025171011789817108054396836622288477, time:1750768730.0888405s req_ids:[8] -DEBUG 06-24 20:38:50 [manager.py:391] -ERROR 06-24 20:38:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:49 lightllm_req_id:8 first_token_cost:214.59460258483887ms total_cost_time:214.63823318481445ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14359 prompt_cache_len:5151 prompt_cache_ratio:0.3587297165540776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 -DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:50 [batch.py:51] router release req id 8 -INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.310685396194458 s -INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.31278300285339355 s -DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=144207721673879602576625147573292808344, time:1750768730.5200946s req_ids:[8] -DEBUG 06-24 20:38:50 [manager.py:391] -ERROR 06-24 20:38:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 first_token_cost:431.81920051574707ms total_cost_time:431.86378479003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14360 prompt_cache_len:5151 prompt_cache_ratio:0.3587047353760446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 -DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:50 [batch.py:51] router release req id 8 -INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.10976958274841309 s -INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.11185383796691895 s -DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=136236594581020789496777564185792273578, time:1750768730.7491143s req_ids:[8] -DEBUG 06-24 20:38:50 [manager.py:391] -ERROR 06-24 20:38:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 first_token_cost:215.64555168151855ms total_cost_time:215.69037437438965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14361 prompt_cache_len:5151 prompt_cache_ratio:0.35867975767704197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 -DEBUG 06-24 20:38:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:50 [batch.py:51] router release req id 8 -INFO 06-24 20:38:50 [manager.py:224] router recive req id 8 cost time 0.10815882682800293 s -INFO 06-24 20:38:50 [manager.py:68] detokenization recv req id 8 cost time 0.11040616035461426 s -DEBUG 06-24 20:38:50 [manager.py:391] Prefill Batch: batch_id=100913643305791435007204448523901509860, time:1750768730.973198s req_ids:[8] -DEBUG 06-24 20:38:50 [manager.py:391] -ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:50 lightllm_req_id:8 first_token_cost:217.2102928161621ms total_cost_time:217.2539234161377ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14362 prompt_cache_len:5151 prompt_cache_ratio:0.3586547834563431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 -DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:51 [batch.py:51] router release req id 8 -INFO 06-24 20:38:51 [manager.py:224] router recive req id 8 cost time 0.10825490951538086 s -INFO 06-24 20:38:51 [manager.py:68] detokenization recv req id 8 cost time 0.11021113395690918 s -DEBUG 06-24 20:38:51 [manager.py:391] Prefill Batch: batch_id=51427911362471854734295822673637634841, time:1750768731.208283s req_ids:[8] -DEBUG 06-24 20:38:51 [manager.py:391] -ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:232.28883743286133ms total_cost_time:232.33413696289062ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14363 prompt_cache_len:5151 prompt_cache_ratio:0.3586298127132215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 -DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:51 [batch.py:51] router release req id 8 -INFO 06-24 20:38:51 [manager.py:224] router recive req id 8 cost time 0.10936856269836426 s -INFO 06-24 20:38:51 [manager.py:68] detokenization recv req id 8 cost time 0.11143112182617188 s -DEBUG 06-24 20:38:51 [manager.py:391] Prefill Batch: batch_id=302438928585701905998019858229401354922, time:1750768731.434327s req_ids:[8] -DEBUG 06-24 20:38:51 [manager.py:391] -ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:214.42437171936035ms total_cost_time:214.46919441223145ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14364 prompt_cache_len:5151 prompt_cache_ratio:0.3586048454469507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 -DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:51 [batch.py:51] router release req id 8 -INFO 06-24 20:38:51 [manager.py:224] router recive req id 8 cost time 0.10815572738647461 s -INFO 06-24 20:38:51 [manager.py:68] detokenization recv req id 8 cost time 0.1101076602935791 s -DEBUG 06-24 20:38:51 [manager.py:391] Prefill Batch: batch_id=334067944268963943074233574816122345491, time:1750768731.6567454s req_ids:[8] -DEBUG 06-24 20:38:51 [manager.py:391] -ERROR 06-24 20:38:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:398.45943450927734ms total_cost_time:398.50425720214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14365 prompt_cache_len:5151 prompt_cache_ratio:0.3585798816568047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 -DEBUG 06-24 20:38:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:51 [batch.py:51] router release req id 8 -INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.10930657386779785 s -INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11141562461853027 s -DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=128569987552541351663750449937573508348, time:1750768732.0627298s req_ids:[8] -DEBUG 06-24 20:38:52 [manager.py:391] -ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:51 lightllm_req_id:8 first_token_cost:218.7967300415039ms total_cost_time:218.8425064086914ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14366 prompt_cache_len:5151 prompt_cache_ratio:0.35855492134205763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 -DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:52 [batch.py:51] router release req id 8 -INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.10902714729309082 s -INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11107635498046875 s -DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=185486653347844698767421619877408002861, time:1750768732.2890964s req_ids:[8] -DEBUG 06-24 20:38:52 [manager.py:391] -ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:218.80125999450684ms total_cost_time:218.84393692016602ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14367 prompt_cache_len:5151 prompt_cache_ratio:0.3585299645019837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 -DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:52 [batch.py:51] router release req id 8 -INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.1077420711517334 s -INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.10975503921508789 s -DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=302612390582082725912719117851456903468, time:1750768732.5219085s req_ids:[8] -DEBUG 06-24 20:38:52 [manager.py:391] -ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:223.0243682861328ms total_cost_time:223.0682373046875ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14368 prompt_cache_len:5151 prompt_cache_ratio:0.3585050111358575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 -DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:52 [batch.py:51] router release req id 8 -INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.1093752384185791 s -INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11134886741638184 s -DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=258722905979772086307792994837766222813, time:1750768732.7581656s req_ids:[8] -DEBUG 06-24 20:38:52 [manager.py:391] -ERROR 06-24 20:38:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:231.9178581237793ms total_cost_time:231.9622039794922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14369 prompt_cache_len:5151 prompt_cache_ratio:0.35848006124295356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 -DEBUG 06-24 20:38:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:52 [batch.py:51] router release req id 8 -INFO 06-24 20:38:52 [manager.py:224] router recive req id 8 cost time 0.10820317268371582 s -INFO 06-24 20:38:52 [manager.py:68] detokenization recv req id 8 cost time 0.11017036437988281 s -DEBUG 06-24 20:38:52 [manager.py:391] Prefill Batch: batch_id=328147808206937819951451970975491078443, time:1750768732.9847677s req_ids:[8] -DEBUG 06-24 20:38:52 [manager.py:391] -DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:52 lightllm_req_id:8 first_token_cost:218.23668479919434ms total_cost_time:218.27960014343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14370 prompt_cache_len:5151 prompt_cache_ratio:0.358455114822547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 -INFO 06-24 20:38:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:38:53 [statics_utils.py:24] mean first cost: 231.8866850800596 ms -INFO 06-24 20:38:53 [statics_utils.py:24] mean per token cost: 0.058225489621477514 ms -DEBUG 06-24 20:38:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:53 [batch.py:51] router release req id 8 -INFO 06-24 20:38:53 [manager.py:224] router recive req id 8 cost time 0.31124234199523926 s -INFO 06-24 20:38:53 [manager.py:68] detokenization recv req id 8 cost time 0.3131675720214844 s -DEBUG 06-24 20:38:53 [manager.py:391] Prefill Batch: batch_id=190620952825917569310048264353160084655, time:1750768733.41747s req_ids:[8] -DEBUG 06-24 20:38:53 [manager.py:391] -ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:424.60179328918457ms total_cost_time:424.64613914489746ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14371 prompt_cache_len:5151 prompt_cache_ratio:0.35843017187391274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 -DEBUG 06-24 20:38:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:53 [batch.py:51] router release req id 8 -INFO 06-24 20:38:53 [manager.py:224] router recive req id 8 cost time 0.10901212692260742 s -INFO 06-24 20:38:53 [manager.py:68] detokenization recv req id 8 cost time 0.11099767684936523 s -DEBUG 06-24 20:38:53 [manager.py:391] Prefill Batch: batch_id=308739650557922791188371753540802801577, time:1750768733.6526062s req_ids:[8] -DEBUG 06-24 20:38:53 [manager.py:391] -ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:227.43821144104004ms total_cost_time:227.48303413391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14372 prompt_cache_len:5151 prompt_cache_ratio:0.3584052323963262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 -DEBUG 06-24 20:38:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:53 [batch.py:51] router release req id 8 -INFO 06-24 20:38:53 [manager.py:224] router recive req id 8 cost time 0.10796499252319336 s -INFO 06-24 20:38:53 [manager.py:68] detokenization recv req id 8 cost time 0.10992717742919922 s -DEBUG 06-24 20:38:53 [manager.py:391] Prefill Batch: batch_id=14946206708434810664476221206140397835, time:1750768733.8791194s req_ids:[8] -DEBUG 06-24 20:38:53 [manager.py:391] -ERROR 06-24 20:38:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:212.75925636291504ms total_cost_time:212.80384063720703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14373 prompt_cache_len:5151 prompt_cache_ratio:0.3583802963890628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 -DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:54 [batch.py:51] router release req id 8 -INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10826492309570312 s -INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.11031579971313477 s -DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=150461302604282984862274878916774915628, time:1750768734.0992568s req_ids:[8] -DEBUG 06-24 20:38:54 [manager.py:391] -ERROR 06-24 20:38:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:53 lightllm_req_id:8 first_token_cost:214.50495719909668ms total_cost_time:214.54858779907227ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14374 prompt_cache_len:5151 prompt_cache_ratio:0.3583553638513984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 -DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:54 [batch.py:51] router release req id 8 -INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10945749282836914 s -INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.1114351749420166 s -DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=66564869900728622193550877565057125872, time:1750768734.3318958s req_ids:[8] -DEBUG 06-24 20:38:54 [manager.py:391] -ERROR 06-24 20:38:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 first_token_cost:229.88367080688477ms total_cost_time:229.92849349975586ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14375 prompt_cache_len:5151 prompt_cache_ratio:0.3583304347826087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 -DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:54 [batch.py:51] router release req id 8 -INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10933399200439453 s -INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.11140799522399902 s -DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=326262474543408525129814083552658122830, time:1750768734.5575106s req_ids:[8] -DEBUG 06-24 20:38:54 [manager.py:391] -DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:38:54 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:38:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 first_token_cost:389.0037536621094ms total_cost_time:389.04762268066406ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14376 prompt_cache_len:5151 prompt_cache_ratio:0.35830550918196996 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 -DEBUG 06-24 20:38:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:54 [batch.py:51] router release req id 8 -INFO 06-24 20:38:54 [manager.py:224] router recive req id 8 cost time 0.10873150825500488 s -INFO 06-24 20:38:54 [manager.py:68] detokenization recv req id 8 cost time 0.11072945594787598 s -DEBUG 06-24 20:38:54 [manager.py:391] Prefill Batch: batch_id=93523537939754278613645912060833902661, time:1750768734.9666553s req_ids:[8] -DEBUG 06-24 20:38:54 [manager.py:391] -ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:54 lightllm_req_id:8 first_token_cost:230.16691207885742ms total_cost_time:230.2114963531494ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14377 prompt_cache_len:5151 prompt_cache_ratio:0.3582805870487584 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 -DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:55 [batch.py:51] router release req id 8 -INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10906672477722168 s -INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s -DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=197226083631269664035832265751259241060, time:1750768735.2001183s req_ids:[8] -DEBUG 06-24 20:38:55 [manager.py:391] -ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:224.76983070373535ms total_cost_time:224.81369972229004ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14378 prompt_cache_len:5151 prompt_cache_ratio:0.35825566838225065 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 -DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:55 [batch.py:51] router release req id 8 -INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10769033432006836 s -INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.10947155952453613 s -DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=292313296466008008938383126847553270306, time:1750768735.4402578s req_ids:[8] -DEBUG 06-24 20:38:55 [manager.py:391] -ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:234.91454124450684ms total_cost_time:234.95888710021973ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14379 prompt_cache_len:5151 prompt_cache_ratio:0.35823075318172337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 -DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:55 [batch.py:51] router release req id 8 -INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10794973373413086 s -INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.10992741584777832 s -DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=72923984217499151412962265561562571607, time:1750768735.6675034s req_ids:[8] -DEBUG 06-24 20:38:55 [manager.py:391] -ERROR 06-24 20:38:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:217.94581413269043ms total_cost_time:217.99087524414062ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14380 prompt_cache_len:5151 prompt_cache_ratio:0.3582058414464534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 -DEBUG 06-24 20:38:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:55 [batch.py:51] router release req id 8 -INFO 06-24 20:38:55 [manager.py:224] router recive req id 8 cost time 0.10853099822998047 s -INFO 06-24 20:38:55 [manager.py:68] detokenization recv req id 8 cost time 0.1103672981262207 s -DEBUG 06-24 20:38:55 [manager.py:391] Prefill Batch: batch_id=300451890550869285553661382977985928981, time:1750768735.9052405s req_ids:[8] -DEBUG 06-24 20:38:55 [manager.py:391] -ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:55 lightllm_req_id:8 first_token_cost:230.4835319519043ms total_cost_time:230.52692413330078ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14381 prompt_cache_len:5151 prompt_cache_ratio:0.358180933175718 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 -DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:56 [batch.py:51] router release req id 8 -INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10938644409179688 s -INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.11136913299560547 s -DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=330870904745291035828018625600186885491, time:1750768736.143805s req_ids:[8] -DEBUG 06-24 20:38:56 [manager.py:391] -ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:415.82584381103516ms total_cost_time:415.87114334106445ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14382 prompt_cache_len:5151 prompt_cache_ratio:0.35815602836879434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 -DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:56 [batch.py:51] router release req id 8 -INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10914158821105957 s -INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.11096072196960449 s -DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=273415856646088457513876601937729291533, time:1750768736.5768626s req_ids:[8] -DEBUG 06-24 20:38:56 [manager.py:391] -ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:199.27334785461426ms total_cost_time:199.31554794311523ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14383 prompt_cache_len:5151 prompt_cache_ratio:0.35813112702496 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 -DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:56 [batch.py:51] router release req id 8 -INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10852289199829102 s -INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.11040258407592773 s -DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=154890581781123149465997366429916974874, time:1750768736.760692s req_ids:[8] -DEBUG 06-24 20:38:56 [manager.py:391] -ERROR 06-24 20:38:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:208.01401138305664ms total_cost_time:208.05859565734863ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14384 prompt_cache_len:5151 prompt_cache_ratio:0.35810622914349277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 -DEBUG 06-24 20:38:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:56 [batch.py:51] router release req id 8 -INFO 06-24 20:38:56 [manager.py:224] router recive req id 8 cost time 0.10660934448242188 s -INFO 06-24 20:38:56 [manager.py:68] detokenization recv req id 8 cost time 0.10835647583007812 s -DEBUG 06-24 20:38:56 [manager.py:391] Prefill Batch: batch_id=55497801027213253594998370699236783114, time:1750768736.977141s req_ids:[8] -DEBUG 06-24 20:38:56 [manager.py:391] -ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:56 lightllm_req_id:8 first_token_cost:214.25914764404297ms total_cost_time:214.30373191833496ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14385 prompt_cache_len:5151 prompt_cache_ratio:0.3580813347236705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 -DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:57 [batch.py:51] router release req id 8 -INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10747647285461426 s -INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.10928988456726074 s -DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=161466351885057138207589984674515676523, time:1750768737.1987567s req_ids:[8] -DEBUG 06-24 20:38:57 [manager.py:391] -ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:175.0340461730957ms total_cost_time:175.079345703125ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14386 prompt_cache_len:5151 prompt_cache_ratio:0.3580564437647713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 -DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:57 [batch.py:51] router release req id 8 -INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10883688926696777 s -INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.11072778701782227 s -DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=1348720482247470096358465875504957843, time:1750768737.3804703s req_ids:[8] -DEBUG 06-24 20:38:57 [manager.py:391] -ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:210.63899993896484ms total_cost_time:210.70456504821777ms,out_token_counter:1 mean_per_token_cost_time: 0.06556510925292969ms prompt_token_num:14387 prompt_cache_len:5151 prompt_cache_ratio:0.3580315562660735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 -DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:57 [batch.py:51] router release req id 8 -INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10799670219421387 s -INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.11003375053405762 s -DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=54127338420909867432850575451597660016, time:1750768737.5996761s req_ids:[8] -DEBUG 06-24 20:38:57 [manager.py:391] -ERROR 06-24 20:38:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:388.319730758667ms total_cost_time:388.3640766143799ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14388 prompt_cache_len:5151 prompt_cache_ratio:0.3580066722268557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 -DEBUG 06-24 20:38:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:57 [batch.py:51] router release req id 8 -INFO 06-24 20:38:57 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s -INFO 06-24 20:38:57 [manager.py:68] detokenization recv req id 8 cost time 0.10942745208740234 s -DEBUG 06-24 20:38:57 [manager.py:391] Prefill Batch: batch_id=221223602299171783311719552443533064284, time:1750768737.9938319s req_ids:[8] -DEBUG 06-24 20:38:57 [manager.py:391] -ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:57 lightllm_req_id:8 first_token_cost:178.46941947937012ms total_cost_time:178.51519584655762ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14389 prompt_cache_len:5151 prompt_cache_ratio:0.35798179164639654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 -DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:58 [batch.py:51] router release req id 8 -INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.10815215110778809 s -INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.1100609302520752 s -DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=219340245958995027270679775037366800788, time:1750768738.1792977s req_ids:[8] -DEBUG 06-24 20:38:58 [manager.py:391] -ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:205.09815216064453ms total_cost_time:205.14249801635742ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14390 prompt_cache_len:5151 prompt_cache_ratio:0.357956914523975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 -DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:58 [batch.py:51] router release req id 8 -INFO 06-24 20:38:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.1079709529876709 s -INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.10998773574829102 s -DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=252590943439709788260819316631739246797, time:1750768738.390632s req_ids:[8] -DEBUG 06-24 20:38:58 [manager.py:391] -ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:211.53950691223145ms total_cost_time:211.58361434936523ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14391 prompt_cache_len:5151 prompt_cache_ratio:0.35793204085887015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 -DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:58 [batch.py:51] router release req id 8 -INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.10957646369934082 s -INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.11167478561401367 s -DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=80720211012966524024828235284531471557, time:1750768738.6091018s req_ids:[8] -DEBUG 06-24 20:38:58 [manager.py:391] -ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:207.5796127319336ms total_cost_time:207.62228965759277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14392 prompt_cache_len:5151 prompt_cache_ratio:0.3579071706503613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 -DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:58 [batch.py:51] router release req id 8 -INFO 06-24 20:38:58 [manager.py:224] router recive req id 8 cost time 0.10872101783752441 s -INFO 06-24 20:38:58 [manager.py:68] detokenization recv req id 8 cost time 0.11004400253295898 s -DEBUG 06-24 20:38:58 [manager.py:391] Prefill Batch: batch_id=75088281254800938641998824791851614713, time:1750768738.8223948s req_ids:[8] -DEBUG 06-24 20:38:58 [manager.py:391] -ERROR 06-24 20:38:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:215.1663303375244ms total_cost_time:215.2094841003418ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14393 prompt_cache_len:5151 prompt_cache_ratio:0.35788230389772807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 -DEBUG 06-24 20:38:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:58 [batch.py:51] router release req id 8 -INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.10905337333679199 s -INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11034965515136719 s -DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=135978810009958514769907754484973747396, time:1750768739.0448203s req_ids:[8] -DEBUG 06-24 20:38:59 [manager.py:391] -ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:58 lightllm_req_id:8 first_token_cost:380.418062210083ms total_cost_time:380.4628849029541ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14394 prompt_cache_len:5151 prompt_cache_ratio:0.3578574406002501 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 -DEBUG 06-24 20:38:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:59 [batch.py:51] router release req id 8 -INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.10909557342529297 s -INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11111664772033691 s -DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=39040268564985338010407122996169265194, time:1750768739.4313838s req_ids:[8] -DEBUG 06-24 20:38:59 [manager.py:391] -ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:216.83216094970703ms total_cost_time:216.87555313110352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14395 prompt_cache_len:5151 prompt_cache_ratio:0.35783258075720736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 -DEBUG 06-24 20:38:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:59 [batch.py:51] router release req id 8 -INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.1085209846496582 s -INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11037063598632812 s -DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=98044062420297885918263618598664678427, time:1750768739.6537817s req_ids:[8] -DEBUG 06-24 20:38:59 [manager.py:391] -ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:38:59 [stats.py:37] Avg tokens(prompt+generate) throughput: 56019.435 tokens/s -DEBUG 06-24 20:38:59 [stats.py:37] Avg prompt tokens throughput: 56011.643 tokens/s -DEBUG 06-24 20:38:59 [stats.py:37] Avg generate tokens throughput: 7.792 tokens/s -INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:214.57576751708984ms total_cost_time:214.62082862854004ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14396 prompt_cache_len:5151 prompt_cache_ratio:0.35780772436787994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 -DEBUG 06-24 20:38:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:38:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:38:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:38:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:38:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:38:59 [batch.py:51] router release req id 8 -INFO 06-24 20:38:59 [manager.py:224] router recive req id 8 cost time 0.1094970703125 s -INFO 06-24 20:38:59 [manager.py:68] detokenization recv req id 8 cost time 0.11141300201416016 s -DEBUG 06-24 20:38:59 [manager.py:391] Prefill Batch: batch_id=177876037772334646041545534003046497116, time:1750768739.8762355s req_ids:[8] -DEBUG 06-24 20:38:59 [manager.py:391] -ERROR 06-24 20:38:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:38:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:38:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:210.80851554870605ms total_cost_time:210.85095405578613ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14397 prompt_cache_len:5151 prompt_cache_ratio:0.35778287143154824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:38:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 -DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:00 [batch.py:51] router release req id 8 -INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10878515243530273 s -INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11072206497192383 s -DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=259672313330926242564452384740224089885, time:1750768740.0926967s req_ids:[8] -DEBUG 06-24 20:39:00 [manager.py:391] -ERROR 06-24 20:39:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:38:59 lightllm_req_id:8 first_token_cost:209.92541313171387ms total_cost_time:209.96952056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14398 prompt_cache_len:5151 prompt_cache_ratio:0.35775802194749273 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 -DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:00 [batch.py:51] router release req id 8 -INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10841870307922363 s -INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11034607887268066 s -DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=66094555359693542176500460260931110554, time:1750768740.3087192s req_ids:[8] -DEBUG 06-24 20:39:00 [manager.py:391] -ERROR 06-24 20:39:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 first_token_cost:208.8925838470459ms total_cost_time:208.93621444702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14399 prompt_cache_len:5151 prompt_cache_ratio:0.3577331759149941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 -DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:00 [batch.py:51] router release req id 8 -INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s -INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11062312126159668 s -DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=89801279844420085305197463016979616238, time:1750768740.5253499s req_ids:[8] -DEBUG 06-24 20:39:00 [manager.py:391] -ERROR 06-24 20:39:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 first_token_cost:374.97568130493164ms total_cost_time:375.02026557922363ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14400 prompt_cache_len:5151 prompt_cache_ratio:0.35770833333333335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 -DEBUG 06-24 20:39:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:00 [batch.py:51] router release req id 8 -INFO 06-24 20:39:00 [manager.py:224] router recive req id 8 cost time 0.10860896110534668 s -INFO 06-24 20:39:00 [manager.py:68] detokenization recv req id 8 cost time 0.11110973358154297 s -DEBUG 06-24 20:39:00 [manager.py:391] Prefill Batch: batch_id=101407775052477857075531335678002910047, time:1750768740.9061217s req_ids:[8] -DEBUG 06-24 20:39:00 [manager.py:391] -DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:00 lightllm_req_id:8 first_token_cost:216.61758422851562ms total_cost_time:216.66240692138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14401 prompt_cache_len:5151 prompt_cache_ratio:0.3576834942017915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 -DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:01 [batch.py:51] router release req id 8 -INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.1093437671661377 s -INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11132502555847168 s -DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=232523285198008779268083075923057207564, time:1750768741.1300201s req_ids:[8] -DEBUG 06-24 20:39:01 [manager.py:391] -ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:215.57164192199707ms total_cost_time:215.61241149902344ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14402 prompt_cache_len:5151 prompt_cache_ratio:0.35765865851965006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 -DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:01 [batch.py:51] router release req id 8 -INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.10904860496520996 s -INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11113572120666504 s -DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=251055890578191398376743145081402192827, time:1750768741.3518686s req_ids:[8] -DEBUG 06-24 20:39:01 [manager.py:391] -ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:213.78469467163086ms total_cost_time:213.82904052734375ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14403 prompt_cache_len:5151 prompt_cache_ratio:0.3576338262861904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 -DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:01 [batch.py:51] router release req id 8 -INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.10890364646911621 s -INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11105918884277344 s -DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=80809721121372523809235040015590200844, time:1750768741.57317s req_ids:[8] -DEBUG 06-24 20:39:01 [manager.py:391] -ERROR 06-24 20:39:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:209.88702774047852ms total_cost_time:209.9294662475586ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14404 prompt_cache_len:5151 prompt_cache_ratio:0.35760899750069425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 -DEBUG 06-24 20:39:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:01 [batch.py:51] router release req id 8 -INFO 06-24 20:39:01 [manager.py:224] router recive req id 8 cost time 0.1081538200378418 s -INFO 06-24 20:39:01 [manager.py:68] detokenization recv req id 8 cost time 0.11001133918762207 s -DEBUG 06-24 20:39:01 [manager.py:391] Prefill Batch: batch_id=221295420808640508336952611338352499095, time:1750768741.7896383s req_ids:[8] -DEBUG 06-24 20:39:01 [manager.py:391] -ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:01 lightllm_req_id:8 first_token_cost:395.2326774597168ms total_cost_time:395.2784538269043ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14405 prompt_cache_len:5151 prompt_cache_ratio:0.3575841721624436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 -DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:02 [batch.py:51] router release req id 8 -INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10817766189575195 s -INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11005258560180664 s -DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=207463655190164405078169938567625121943, time:1750768742.1952813s req_ids:[8] -DEBUG 06-24 20:39:02 [manager.py:391] -ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:221.3287353515625ms total_cost_time:221.3733196258545ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14406 prompt_cache_len:5151 prompt_cache_ratio:0.35755935027072056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 -DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:02 [batch.py:51] router release req id 8 -INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10891151428222656 s -INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11093950271606445 s -DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=314058917107092437678228266287984203637, time:1750768742.4193077s req_ids:[8] -DEBUG 06-24 20:39:02 [manager.py:391] -ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:213.9892578125ms total_cost_time:214.03264999389648ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14407 prompt_cache_len:5151 prompt_cache_ratio:0.3575345318248074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 -DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:02 [batch.py:51] router release req id 8 -INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10848164558410645 s -INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11054587364196777 s -DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=113362758880455398170138277649091583106, time:1750768742.639642s req_ids:[8] -DEBUG 06-24 20:39:02 [manager.py:391] -ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:214.68663215637207ms total_cost_time:214.73288536071777ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14408 prompt_cache_len:5151 prompt_cache_ratio:0.3575097168239867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 -DEBUG 06-24 20:39:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:02 [batch.py:51] router release req id 8 -INFO 06-24 20:39:02 [manager.py:224] router recive req id 8 cost time 0.10929512977600098 s -INFO 06-24 20:39:02 [manager.py:68] detokenization recv req id 8 cost time 0.11118578910827637 s -DEBUG 06-24 20:39:02 [manager.py:391] Prefill Batch: batch_id=295771330359681548590829762405999873300, time:1750768742.8610358s req_ids:[8] -DEBUG 06-24 20:39:02 [manager.py:391] -ERROR 06-24 20:39:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:212.14056015014648ms total_cost_time:212.18442916870117ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14409 prompt_cache_len:5151 prompt_cache_ratio:0.3574849052675411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 -DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:03 [batch.py:51] router release req id 8 -INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.10904240608215332 s -INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.11108708381652832 s -DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=288477962522164341205604601498197622156, time:1750768743.0917377s req_ids:[8] -DEBUG 06-24 20:39:03 [manager.py:391] -ERROR 06-24 20:39:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:02 lightllm_req_id:8 first_token_cost:230.57818412780762ms total_cost_time:230.6220531463623ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14410 prompt_cache_len:5151 prompt_cache_ratio:0.35746009715475363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 -DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:03 [batch.py:51] router release req id 8 -INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.1087346076965332 s -INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.11068272590637207 s -DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=91323997676189003678882725092398213662, time:1750768743.3321693s req_ids:[8] -DEBUG 06-24 20:39:03 [manager.py:391] -ERROR 06-24 20:39:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 first_token_cost:233.4749698638916ms total_cost_time:233.5188388824463ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14411 prompt_cache_len:5151 prompt_cache_ratio:0.35743529248490735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 -DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:03 [batch.py:51] router release req id 8 -INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.3099710941314697 s -INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.312058687210083 s -DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=270966981986474575896899475949809773461, time:1750768743.7650669s req_ids:[8] -DEBUG 06-24 20:39:03 [manager.py:391] -ERROR 06-24 20:39:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 first_token_cost:426.17154121398926ms total_cost_time:426.21564865112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14412 prompt_cache_len:5151 prompt_cache_ratio:0.3574104912572856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 -DEBUG 06-24 20:39:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:03 [batch.py:51] router release req id 8 -INFO 06-24 20:39:03 [manager.py:224] router recive req id 8 cost time 0.10918617248535156 s -INFO 06-24 20:39:03 [manager.py:68] detokenization recv req id 8 cost time 0.11100912094116211 s -DEBUG 06-24 20:39:03 [manager.py:391] Prefill Batch: batch_id=65283269701227960303078114703000133608, time:1750768743.991991s req_ids:[8] -DEBUG 06-24 20:39:03 [manager.py:391] -ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:03 lightllm_req_id:8 first_token_cost:218.22047233581543ms total_cost_time:218.26410293579102ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14413 prompt_cache_len:5151 prompt_cache_ratio:0.3573856934711719 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 -DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:04 [batch.py:51] router release req id 8 -INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.10826349258422852 s -INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.1101534366607666 s -DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=17757040287005866686907265450172287252, time:1750768744.2156723s req_ids:[8] -DEBUG 06-24 20:39:04 [manager.py:391] -ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:216.73154830932617ms total_cost_time:216.77541732788086ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14414 prompt_cache_len:5151 prompt_cache_ratio:0.3573608991258499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 -DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:04 [batch.py:51] router release req id 8 -INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.1085977554321289 s -INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.11080336570739746 s -DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=258447223088035561513005383830075366042, time:1750768744.4377725s req_ids:[8] -DEBUG 06-24 20:39:04 [manager.py:391] -ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:213.0756378173828ms total_cost_time:213.1197452545166ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14415 prompt_cache_len:5151 prompt_cache_ratio:0.3573361082206035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 -DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:04 [batch.py:51] router release req id 8 -INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.10781407356262207 s -INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.10985946655273438 s -DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=231123326205140437401424042414156079237, time:1750768744.6564646s req_ids:[8] -DEBUG 06-24 20:39:04 [manager.py:391] -ERROR 06-24 20:39:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:213.28282356262207ms total_cost_time:213.32502365112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14416 prompt_cache_len:5151 prompt_cache_ratio:0.357311320754717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 -DEBUG 06-24 20:39:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:04 [batch.py:51] router release req id 8 -INFO 06-24 20:39:04 [manager.py:224] router recive req id 8 cost time 0.10971307754516602 s -INFO 06-24 20:39:04 [manager.py:68] detokenization recv req id 8 cost time 0.11164617538452148 s -DEBUG 06-24 20:39:04 [manager.py:391] Prefill Batch: batch_id=269692927050720850323661326627435616653, time:1750768744.8774261s req_ids:[8] -DEBUG 06-24 20:39:04 [manager.py:391] -ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:04 lightllm_req_id:8 first_token_cost:389.61029052734375ms total_cost_time:389.65463638305664ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14417 prompt_cache_len:5151 prompt_cache_ratio:0.3572865367274745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 -DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:05 [batch.py:51] router release req id 8 -INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10916352272033691 s -INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.11112165451049805 s -DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=120645436605576550622915283860715834255, time:1750768745.272444s req_ids:[8] -DEBUG 06-24 20:39:05 [manager.py:391] -ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:213.67239952087402ms total_cost_time:213.71150016784668ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:14418 prompt_cache_len:5151 prompt_cache_ratio:0.35726175613816064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 -DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:05 [batch.py:51] router release req id 8 -INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10825872421264648 s -INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.1103367805480957 s -DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=37160258634418580758908671547267347760, time:1750768745.4932957s req_ids:[8] -DEBUG 06-24 20:39:05 [manager.py:391] -ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:219.30885314941406ms total_cost_time:219.35224533081055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14419 prompt_cache_len:5151 prompt_cache_ratio:0.35723697898606005 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 -DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:05 [batch.py:51] router release req id 8 -INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10815048217773438 s -INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.11012744903564453 s -DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=1025591099157772715304565047340465613, time:1750768745.7270288s req_ids:[8] -DEBUG 06-24 20:39:05 [manager.py:391] -ERROR 06-24 20:39:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:228.88994216918945ms total_cost_time:228.93500328063965ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14420 prompt_cache_len:5151 prompt_cache_ratio:0.3572122052704577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 -DEBUG 06-24 20:39:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:05 [batch.py:51] router release req id 8 -INFO 06-24 20:39:05 [manager.py:224] router recive req id 8 cost time 0.10862255096435547 s -INFO 06-24 20:39:05 [manager.py:68] detokenization recv req id 8 cost time 0.11040735244750977 s -DEBUG 06-24 20:39:05 [manager.py:391] Prefill Batch: batch_id=230234104841611586230691630628620043949, time:1750768745.9624455s req_ids:[8] -DEBUG 06-24 20:39:05 [manager.py:391] -ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:05 lightllm_req_id:8 first_token_cost:188.91167640686035ms total_cost_time:188.95697593688965ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14421 prompt_cache_len:5151 prompt_cache_ratio:0.35718743499063865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 -DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:06 [batch.py:51] router release req id 8 -INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.1081242561340332 s -INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.10961103439331055 s -DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=159104142879330775399206596724855937662, time:1750768746.1479666s req_ids:[8] -DEBUG 06-24 20:39:06 [manager.py:391] -ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:208.47797393798828ms total_cost_time:208.52375030517578ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14422 prompt_cache_len:5151 prompt_cache_ratio:0.3571626681458882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 -DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:06 [batch.py:51] router release req id 8 -INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.10764670372009277 s -INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.10957932472229004 s -DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=151300754131816835222323648214254116460, time:1750768746.3631163s req_ids:[8] -DEBUG 06-24 20:39:06 [manager.py:391] -ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:379.1518211364746ms total_cost_time:379.194974899292ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14423 prompt_cache_len:5151 prompt_cache_ratio:0.3571379047354919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 -DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:06 [batch.py:51] router release req id 8 -INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.10953140258789062 s -INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.11168289184570312 s -DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=326477692332041869549175581309848809659, time:1750768746.749041s req_ids:[8] -DEBUG 06-24 20:39:06 [manager.py:391] -ERROR 06-24 20:39:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:215.31915664672852ms total_cost_time:215.4250144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.10585784912109375ms prompt_token_num:14424 prompt_cache_len:5151 prompt_cache_ratio:0.35711314475873546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 -DEBUG 06-24 20:39:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:06 [batch.py:51] router release req id 8 -INFO 06-24 20:39:06 [manager.py:224] router recive req id 8 cost time 0.10836052894592285 s -INFO 06-24 20:39:06 [manager.py:68] detokenization recv req id 8 cost time 0.11048030853271484 s -DEBUG 06-24 20:39:06 [manager.py:391] Prefill Batch: batch_id=340033724385825236818916253133711560018, time:1750768746.9826066s req_ids:[8] -DEBUG 06-24 20:39:06 [manager.py:391] -ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:06 lightllm_req_id:8 first_token_cost:228.03735733032227ms total_cost_time:228.08194160461426ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14425 prompt_cache_len:5151 prompt_cache_ratio:0.3570883882149047 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 -DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:07 [batch.py:51] router release req id 8 -INFO 06-24 20:39:07 [manager.py:224] router recive req id 8 cost time 0.10976767539978027 s -INFO 06-24 20:39:07 [manager.py:68] detokenization recv req id 8 cost time 0.1117706298828125 s -DEBUG 06-24 20:39:07 [manager.py:391] Prefill Batch: batch_id=221842589995435193335948153450176221740, time:1750768747.2080717s req_ids:[8] -DEBUG 06-24 20:39:07 [manager.py:391] -ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:215.90709686279297ms total_cost_time:215.95048904418945ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14426 prompt_cache_len:5151 prompt_cache_ratio:0.35706363510328576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 -DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:07 [batch.py:51] router release req id 8 -INFO 06-24 20:39:07 [manager.py:224] router recive req id 8 cost time 0.10812735557556152 s -INFO 06-24 20:39:07 [manager.py:68] detokenization recv req id 8 cost time 0.11023116111755371 s -DEBUG 06-24 20:39:07 [manager.py:391] Prefill Batch: batch_id=154421752098875093416761266286156495260, time:1750768747.4419856s req_ids:[8] -DEBUG 06-24 20:39:07 [manager.py:391] -ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:227.41246223449707ms total_cost_time:227.45609283447266ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14427 prompt_cache_len:5151 prompt_cache_ratio:0.3570388854231649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 -DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:07 [batch.py:51] router release req id 8 -INFO 06-24 20:39:07 [manager.py:224] router recive req id 8 cost time 0.10860276222229004 s -INFO 06-24 20:39:07 [manager.py:68] detokenization recv req id 8 cost time 0.11073446273803711 s -DEBUG 06-24 20:39:07 [manager.py:391] Prefill Batch: batch_id=253899780095196006271365982139554892785, time:1750768747.6680632s req_ids:[8] -DEBUG 06-24 20:39:07 [manager.py:391] -ERROR 06-24 20:39:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:214.39743041992188ms total_cost_time:214.44249153137207ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14428 prompt_cache_len:5151 prompt_cache_ratio:0.3570141391738287 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 -DEBUG 06-24 20:39:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:07 [batch.py:51] router release req id 8 -INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.3111255168914795 s -INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.31325817108154297 s -DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=71611392071358443447537189801981580060, time:1750768748.097501s req_ids:[8] -DEBUG 06-24 20:39:08 [manager.py:391] -ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:07 lightllm_req_id:8 first_token_cost:422.5592613220215ms total_cost_time:422.6036071777344ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14429 prompt_cache_len:5151 prompt_cache_ratio:0.3569893963545637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 -DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:08 [batch.py:51] router release req id 8 -INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10849213600158691 s -INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.11040425300598145 s -DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=230452138273452295752895771748414479189, time:1750768748.319106s req_ids:[8] -DEBUG 06-24 20:39:08 [manager.py:391] -ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:214.15066719055176ms total_cost_time:214.19286727905273ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14430 prompt_cache_len:5151 prompt_cache_ratio:0.35696465696465696 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 -DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:08 [batch.py:51] router release req id 8 -INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10764765739440918 s -INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.10969829559326172 s -DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=286586147293116425906714578717497710289, time:1750768748.5391588s req_ids:[8] -DEBUG 06-24 20:39:08 [manager.py:391] -ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:208.94622802734375ms total_cost_time:208.98890495300293ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14431 prompt_cache_len:5151 prompt_cache_ratio:0.35693992100339544 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 -DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:08 [batch.py:51] router release req id 8 -INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10782408714294434 s -INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.1096489429473877 s -DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=78376289869153816009944014406074588319, time:1750768748.7664132s req_ids:[8] -DEBUG 06-24 20:39:08 [manager.py:391] -ERROR 06-24 20:39:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:227.86402702331543ms total_cost_time:227.9069423675537ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14432 prompt_cache_len:5151 prompt_cache_ratio:0.35691518847006654 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 -DEBUG 06-24 20:39:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:08 [batch.py:51] router release req id 8 -INFO 06-24 20:39:08 [manager.py:224] router recive req id 8 cost time 0.10904288291931152 s -INFO 06-24 20:39:08 [manager.py:68] detokenization recv req id 8 cost time 0.11111903190612793 s -DEBUG 06-24 20:39:08 [manager.py:391] Prefill Batch: batch_id=298660883630620630285118260099780099490, time:1750768748.9916048s req_ids:[8] -DEBUG 06-24 20:39:08 [manager.py:391] -ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:08 lightllm_req_id:8 first_token_cost:213.93799781799316ms total_cost_time:213.98210525512695ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14433 prompt_cache_len:5151 prompt_cache_ratio:0.3568904593639576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 -DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:09 [batch.py:51] router release req id 8 -INFO 06-24 20:39:09 [manager.py:224] router recive req id 8 cost time 0.1086127758026123 s -INFO 06-24 20:39:09 [manager.py:68] detokenization recv req id 8 cost time 0.11135983467102051 s -DEBUG 06-24 20:39:09 [manager.py:391] Prefill Batch: batch_id=1443158031777165879979305883847184775, time:1750768749.2257204s req_ids:[8] -DEBUG 06-24 20:39:09 [manager.py:391] -ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:392.2431468963623ms total_cost_time:392.2848701477051ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14434 prompt_cache_len:5151 prompt_cache_ratio:0.35686573368435637 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 -DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:09 [batch.py:51] router release req id 8 -INFO 06-24 20:39:09 [manager.py:224] router recive req id 8 cost time 0.10902667045593262 s -INFO 06-24 20:39:09 [manager.py:68] detokenization recv req id 8 cost time 0.11118674278259277 s -DEBUG 06-24 20:39:09 [manager.py:391] Prefill Batch: batch_id=190104925755065930834671279092262651122, time:1750768749.6123288s req_ids:[8] -DEBUG 06-24 20:39:09 [manager.py:391] -ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:211.1339569091797ms total_cost_time:211.17687225341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14435 prompt_cache_len:5151 prompt_cache_ratio:0.35684101143055075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 -DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:09 [batch.py:51] router release req id 8 -INFO 06-24 20:39:09 [manager.py:224] router recive req id 8 cost time 0.10796809196472168 s -INFO 06-24 20:39:09 [manager.py:68] detokenization recv req id 8 cost time 0.10998272895812988 s -DEBUG 06-24 20:39:09 [manager.py:391] Prefill Batch: batch_id=244173667086951092690210849735299183393, time:1750768749.8296163s req_ids:[8] -DEBUG 06-24 20:39:09 [manager.py:391] -DEBUG 06-24 20:39:09 [stats.py:37] Avg tokens(prompt+generate) throughput: 57231.970 tokens/s -DEBUG 06-24 20:39:09 [stats.py:37] Avg prompt tokens throughput: 57224.130 tokens/s -DEBUG 06-24 20:39:09 [stats.py:37] Avg generate tokens throughput: 7.839 tokens/s -ERROR 06-24 20:39:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:214.62416648864746ms total_cost_time:214.66684341430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14436 prompt_cache_len:5151 prompt_cache_ratio:0.3568162926018288 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 -DEBUG 06-24 20:39:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:09 [batch.py:51] router release req id 8 -INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.10880231857299805 s -INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.11071205139160156 s -DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=260883252017914314194225568725314386477, time:1750768750.052993s req_ids:[8] -DEBUG 06-24 20:39:10 [manager.py:391] -ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:09 lightllm_req_id:8 first_token_cost:211.09414100646973ms total_cost_time:211.13824844360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14437 prompt_cache_len:5151 prompt_cache_ratio:0.3567915771974787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 -DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:10 [batch.py:51] router release req id 8 -INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.1077430248260498 s -INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.10978984832763672 s -DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=30057341735574622687923989713725793556, time:1750768750.2697062s req_ids:[8] -DEBUG 06-24 20:39:10 [manager.py:391] -DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:10 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:214.2353057861328ms total_cost_time:214.277982711792ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14438 prompt_cache_len:5151 prompt_cache_ratio:0.356766865216789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 -DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:10 [batch.py:51] router release req id 8 -INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.10763359069824219 s -INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.10947728157043457 s -DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=82482135982308747291512840619287918130, time:1750768750.500907s req_ids:[8] -DEBUG 06-24 20:39:10 [manager.py:391] -ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:361.2246513366699ms total_cost_time:361.2701892852783ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14439 prompt_cache_len:5151 prompt_cache_ratio:0.3567421566590484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 -DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:10 [batch.py:51] router release req id 8 -INFO 06-24 20:39:10 [manager.py:224] router recive req id 8 cost time 0.10886287689208984 s -INFO 06-24 20:39:10 [manager.py:68] detokenization recv req id 8 cost time 0.11101961135864258 s -DEBUG 06-24 20:39:10 [manager.py:391] Prefill Batch: batch_id=112693203331248789428240721837511549641, time:1750768750.8648474s req_ids:[8] -DEBUG 06-24 20:39:10 [manager.py:391] -ERROR 06-24 20:39:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:219.39373016357422ms total_cost_time:219.45548057556152ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14440 prompt_cache_len:5151 prompt_cache_ratio:0.3567174515235457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 -DEBUG 06-24 20:39:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:10 [batch.py:51] router release req id 8 -INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10860919952392578 s -INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.11056971549987793 s -DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=309855515936060889190726296737339254099, time:1750768751.0881581s req_ids:[8] -DEBUG 06-24 20:39:11 [manager.py:391] -ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:10 lightllm_req_id:8 first_token_cost:215.5773639678955ms total_cost_time:215.6224250793457ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14441 prompt_cache_len:5151 prompt_cache_ratio:0.35669274980957 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 -DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:11 [batch.py:51] router release req id 8 -INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10819888114929199 s -INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.11014771461486816 s -DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=256156288801476146607702351379864855225, time:1750768751.3120918s req_ids:[8] -DEBUG 06-24 20:39:11 [manager.py:391] -ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:214.51401710510254ms total_cost_time:214.55979347229004ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14442 prompt_cache_len:5151 prompt_cache_ratio:0.35666805151641046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 -DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:11 [batch.py:51] router release req id 8 -INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10839724540710449 s -INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.11056351661682129 s -DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=172673095492456084099490345031176837323, time:1750768751.5329416s req_ids:[8] -DEBUG 06-24 20:39:11 [manager.py:391] -ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:211.7176055908203ms total_cost_time:211.7598056793213ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14443 prompt_cache_len:5151 prompt_cache_ratio:0.35664335664335667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 -DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:11 [batch.py:51] router release req id 8 -INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10801577568054199 s -INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.1093745231628418 s -DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=266424898381614818363760754332839761071, time:1750768751.7517812s req_ids:[8] -DEBUG 06-24 20:39:11 [manager.py:391] -ERROR 06-24 20:39:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:215.79599380493164ms total_cost_time:215.83819389343262ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14444 prompt_cache_len:5151 prompt_cache_ratio:0.35661866518969815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 -DEBUG 06-24 20:39:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:11 [batch.py:51] router release req id 8 -INFO 06-24 20:39:11 [manager.py:224] router recive req id 8 cost time 0.10799026489257812 s -INFO 06-24 20:39:11 [manager.py:68] detokenization recv req id 8 cost time 0.1099846363067627 s -DEBUG 06-24 20:39:11 [manager.py:391] Prefill Batch: batch_id=151103851337256310303437343954511429710, time:1750768751.9747906s req_ids:[8] -DEBUG 06-24 20:39:11 [manager.py:391] -ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:11 lightllm_req_id:8 first_token_cost:385.0288391113281ms total_cost_time:385.0724697113037ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14445 prompt_cache_len:5151 prompt_cache_ratio:0.3565939771547248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 -DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:12 [batch.py:51] router release req id 8 -INFO 06-24 20:39:12 [manager.py:224] router recive req id 8 cost time 0.10917997360229492 s -INFO 06-24 20:39:12 [manager.py:68] detokenization recv req id 8 cost time 0.1112508773803711 s -DEBUG 06-24 20:39:12 [manager.py:391] Prefill Batch: batch_id=274087782406724443260006128639365559476, time:1750768752.3672307s req_ids:[8] -DEBUG 06-24 20:39:12 [manager.py:391] -ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:214.29967880249023ms total_cost_time:214.34259414672852ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14446 prompt_cache_len:5151 prompt_cache_ratio:0.3565692925377267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 -DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:12 [batch.py:51] router release req id 8 -INFO 06-24 20:39:12 [manager.py:224] router recive req id 8 cost time 0.1090247631072998 s -INFO 06-24 20:39:12 [manager.py:68] detokenization recv req id 8 cost time 0.11106300354003906 s -DEBUG 06-24 20:39:12 [manager.py:391] Prefill Batch: batch_id=109643730919017540088511146435171013957, time:1750768752.5883234s req_ids:[8] -DEBUG 06-24 20:39:12 [manager.py:391] -ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:224.89619255065918ms total_cost_time:224.95222091674805ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:14447 prompt_cache_len:5151 prompt_cache_ratio:0.35654461133799403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 -DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:12 [batch.py:51] router release req id 8 -INFO 06-24 20:39:12 [manager.py:224] router recive req id 8 cost time 0.10911035537719727 s -INFO 06-24 20:39:12 [manager.py:68] detokenization recv req id 8 cost time 0.11115527153015137 s -DEBUG 06-24 20:39:12 [manager.py:391] Prefill Batch: batch_id=256413163123853200854585687665482682101, time:1750768752.828008s req_ids:[8] -DEBUG 06-24 20:39:12 [manager.py:391] -ERROR 06-24 20:39:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:224.54094886779785ms total_cost_time:224.58291053771973ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14448 prompt_cache_len:5151 prompt_cache_ratio:0.35651993355481726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 -DEBUG 06-24 20:39:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:12 [batch.py:51] router release req id 8 -INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10877442359924316 s -INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.1112830638885498 s -DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=291995886525481649736240854874971782809, time:1750768753.0512106s req_ids:[8] -DEBUG 06-24 20:39:13 [manager.py:391] -ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:12 lightllm_req_id:8 first_token_cost:212.82696723937988ms total_cost_time:212.86988258361816ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14449 prompt_cache_len:5151 prompt_cache_ratio:0.35649525918748703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 -DEBUG 06-24 20:39:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:13 [batch.py:51] router release req id 8 -INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10786557197570801 s -INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.10991096496582031 s -DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=66047282980062518053997735262163468593, time:1750768753.2698097s req_ids:[8] -DEBUG 06-24 20:39:13 [manager.py:391] -ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:376.97553634643555ms total_cost_time:377.01988220214844ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14450 prompt_cache_len:5151 prompt_cache_ratio:0.3564705882352941 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 -DEBUG 06-24 20:39:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:13 [batch.py:51] router release req id 8 -INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10900712013244629 s -INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.1111001968383789 s -DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=48994054095646772868058448545485534491, time:1750768753.652219s req_ids:[8] -DEBUG 06-24 20:39:13 [manager.py:391] -ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:214.30706977844238ms total_cost_time:214.36285972595215ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:14451 prompt_cache_len:5151 prompt_cache_ratio:0.35644592069752956 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 -DEBUG 06-24 20:39:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:13 [batch.py:51] router release req id 8 -INFO 06-24 20:39:13 [manager.py:224] router recive req id 8 cost time 0.10796451568603516 s -INFO 06-24 20:39:13 [manager.py:68] detokenization recv req id 8 cost time 0.10991406440734863 s -DEBUG 06-24 20:39:13 [manager.py:391] Prefill Batch: batch_id=323433360951768358497787823052859395054, time:1750768753.8873s req_ids:[8] -DEBUG 06-24 20:39:13 [manager.py:391] -ERROR 06-24 20:39:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:232.08975791931152ms total_cost_time:232.13434219360352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14452 prompt_cache_len:5151 prompt_cache_ratio:0.3564212565734846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 -DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:14 [batch.py:51] router release req id 8 -INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10915827751159668 s -INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11118769645690918 s -DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=98565531160045885128495239375206938881, time:1750768754.1131608s req_ids:[8] -DEBUG 06-24 20:39:14 [manager.py:391] -ERROR 06-24 20:39:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:13 lightllm_req_id:8 first_token_cost:215.00802040100098ms total_cost_time:215.04926681518555ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14453 prompt_cache_len:5151 prompt_cache_ratio:0.3563965958624507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 -DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:14 [batch.py:51] router release req id 8 -INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10883212089538574 s -INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11086821556091309 s -DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=120106942341199154735193975849053555217, time:1750768754.336108s req_ids:[8] -DEBUG 06-24 20:39:14 [manager.py:391] -ERROR 06-24 20:39:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 first_token_cost:216.68624877929688ms total_cost_time:216.72821044921875ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14454 prompt_cache_len:5151 prompt_cache_ratio:0.3563719385637194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 -DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:14 [batch.py:51] router release req id 8 -INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10880637168884277 s -INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11074304580688477 s -DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=65038110514663976119154620606364292258, time:1750768754.5582387s req_ids:[8] -DEBUG 06-24 20:39:14 [manager.py:391] -ERROR 06-24 20:39:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 first_token_cost:216.08209609985352ms total_cost_time:216.12548828125ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14455 prompt_cache_len:5151 prompt_cache_ratio:0.3563472846765825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 -DEBUG 06-24 20:39:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:14 [batch.py:51] router release req id 8 -INFO 06-24 20:39:14 [manager.py:224] router recive req id 8 cost time 0.10886025428771973 s -INFO 06-24 20:39:14 [manager.py:68] detokenization recv req id 8 cost time 0.11099052429199219 s -DEBUG 06-24 20:39:14 [manager.py:391] Prefill Batch: batch_id=6407844347482827244912000717078211352, time:1750768754.7799933s req_ids:[8] -DEBUG 06-24 20:39:14 [manager.py:391] -ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:14 lightllm_req_id:8 first_token_cost:375.5013942718506ms total_cost_time:375.54430961608887ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14456 prompt_cache_len:5151 prompt_cache_ratio:0.35632263420033206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 -DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:15 [batch.py:51] router release req id 8 -INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.1091146469116211 s -INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s -DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=1017323158985812167480360359367676943, time:1750768755.1629627s req_ids:[8] -DEBUG 06-24 20:39:15 [manager.py:391] -ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:218.3675765991211ms total_cost_time:218.4135913848877ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14457 prompt_cache_len:5151 prompt_cache_ratio:0.3562979871342602 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 -DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:15 [batch.py:51] router release req id 8 -INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.10799598693847656 s -INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.1098940372467041 s -DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=263774758892995599267683342175634349986, time:1750768755.3871503s req_ids:[8] -DEBUG 06-24 20:39:15 [manager.py:391] -ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:215.73233604431152ms total_cost_time:215.77692031860352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14458 prompt_cache_len:5151 prompt_cache_ratio:0.35627334347765943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 -DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:15 [batch.py:51] router release req id 8 -INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.10898470878601074 s -INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.11087298393249512 s -DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=308292539267410881415497870082185736748, time:1750768755.6106734s req_ids:[8] -DEBUG 06-24 20:39:15 [manager.py:391] -ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:216.0813808441162ms total_cost_time:216.1245346069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14459 prompt_cache_len:5151 prompt_cache_ratio:0.35624870322982227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 -DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:15 [batch.py:51] router release req id 8 -INFO 06-24 20:39:15 [manager.py:224] router recive req id 8 cost time 0.10798859596252441 s -INFO 06-24 20:39:15 [manager.py:68] detokenization recv req id 8 cost time 0.10982275009155273 s -DEBUG 06-24 20:39:15 [manager.py:391] Prefill Batch: batch_id=421920628723533212835412969761451501, time:1750768755.8362823s req_ids:[8] -DEBUG 06-24 20:39:15 [manager.py:391] -ERROR 06-24 20:39:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:223.01745414733887ms total_cost_time:223.06227684020996ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14460 prompt_cache_len:5151 prompt_cache_ratio:0.3562240663900415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 -DEBUG 06-24 20:39:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:15 [batch.py:51] router release req id 8 -INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.10882163047790527 s -INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.11086869239807129 s -DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=264767345193189309436985202029236393217, time:1750768756.059983s req_ids:[8] -DEBUG 06-24 20:39:16 [manager.py:391] -ERROR 06-24 20:39:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:15 lightllm_req_id:8 first_token_cost:210.26921272277832ms total_cost_time:210.313081741333ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14461 prompt_cache_len:5151 prompt_cache_ratio:0.35619943295761014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 -DEBUG 06-24 20:39:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:16 [batch.py:51] router release req id 8 -INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.3109748363494873 s -INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.31284093856811523 s -DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=13015321894155355799069032665733173622, time:1750768756.4824874s req_ids:[8] -DEBUG 06-24 20:39:16 [manager.py:391] -ERROR 06-24 20:39:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 first_token_cost:426.21803283691406ms total_cost_time:426.26428604125977ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14462 prompt_cache_len:5151 prompt_cache_ratio:0.3561748029318213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 -DEBUG 06-24 20:39:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:16 [batch.py:51] router release req id 8 -INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.10883188247680664 s -INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.11083984375 s -DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=100510524427873256812126511542235161365, time:1750768756.7100923s req_ids:[8] -DEBUG 06-24 20:39:16 [manager.py:391] -ERROR 06-24 20:39:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 first_token_cost:210.86764335632324ms total_cost_time:210.91032028198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14463 prompt_cache_len:5151 prompt_cache_ratio:0.3561501763119685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 -DEBUG 06-24 20:39:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:16 [batch.py:51] router release req id 8 -INFO 06-24 20:39:16 [manager.py:224] router recive req id 8 cost time 0.10869169235229492 s -INFO 06-24 20:39:16 [manager.py:68] detokenization recv req id 8 cost time 0.11073470115661621 s -DEBUG 06-24 20:39:16 [manager.py:391] Prefill Batch: batch_id=170230979480275616630987565844123840856, time:1750768756.9292464s req_ids:[8] -DEBUG 06-24 20:39:16 [manager.py:391] -ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:16 lightllm_req_id:8 first_token_cost:212.81957626342773ms total_cost_time:212.86392211914062ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14464 prompt_cache_len:5151 prompt_cache_ratio:0.3561255530973451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 -DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:17 [batch.py:51] router release req id 8 -INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.10896801948547363 s -INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11105155944824219 s -DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=222272595704911621297233237235553650850, time:1750768757.149159s req_ids:[8] -DEBUG 06-24 20:39:17 [manager.py:391] -INFO 06-24 20:39:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:214.89953994750977ms total_cost_time:214.94245529174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14465 prompt_cache_len:5151 prompt_cache_ratio:0.35610093328724507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 -DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:17 [batch.py:51] router release req id 8 -INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.10922884941101074 s -INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11127138137817383 s -DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=238724501656088151327940160845152567660, time:1750768757.3707323s req_ids:[8] -DEBUG 06-24 20:39:17 [manager.py:391] -ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:209.83219146728516ms total_cost_time:209.87629890441895ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14466 prompt_cache_len:5151 prompt_cache_ratio:0.35607631688096225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 -DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:17 [batch.py:51] router release req id 8 -INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.1083989143371582 s -INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11044740676879883 s -DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=136571648187754387877585146435720928608, time:1750768757.5987244s req_ids:[8] -DEBUG 06-24 20:39:17 [manager.py:391] -ERROR 06-24 20:39:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:393.1889533996582ms total_cost_time:393.2335376739502ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14467 prompt_cache_len:5151 prompt_cache_ratio:0.35605170387779084 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 -DEBUG 06-24 20:39:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:17 [batch.py:51] router release req id 8 -INFO 06-24 20:39:17 [manager.py:224] router recive req id 8 cost time 0.10904955863952637 s -INFO 06-24 20:39:17 [manager.py:68] detokenization recv req id 8 cost time 0.11109352111816406 s -DEBUG 06-24 20:39:17 [manager.py:391] Prefill Batch: batch_id=259898436402997137538497358270952818357, time:1750768757.9903495s req_ids:[8] -DEBUG 06-24 20:39:17 [manager.py:391] -ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:17 lightllm_req_id:8 first_token_cost:217.057466506958ms total_cost_time:217.1010971069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14468 prompt_cache_len:5151 prompt_cache_ratio:0.35602709427702517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 -DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:18 [batch.py:51] router release req id 8 -INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.10904479026794434 s -INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11102581024169922 s -DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=337514285645286034376217235270328430748, time:1750768758.2151718s req_ids:[8] -DEBUG 06-24 20:39:18 [manager.py:391] -DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:18 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:213.76276016235352ms total_cost_time:213.8059139251709ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14469 prompt_cache_len:5151 prompt_cache_ratio:0.35600248807795976 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 -DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:18 [batch.py:51] router release req id 8 -INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.10860681533813477 s -INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11061429977416992 s -DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=273196104514867727967211452289001512995, time:1750768758.4364254s req_ids:[8] -DEBUG 06-24 20:39:18 [manager.py:391] -ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:216.07637405395508ms total_cost_time:216.12095832824707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14470 prompt_cache_len:5151 prompt_cache_ratio:0.35597788527988944 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 -DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:18 [batch.py:51] router release req id 8 -INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.1085045337677002 s -INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11057353019714355 s -DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=115436287213321250280222773846590966675, time:1750768758.6595058s req_ids:[8] -DEBUG 06-24 20:39:18 [manager.py:391] -ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:211.6549015045166ms total_cost_time:211.6987705230713ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14471 prompt_cache_len:5151 prompt_cache_ratio:0.35595328588210906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 -DEBUG 06-24 20:39:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:18 [batch.py:51] router release req id 8 -INFO 06-24 20:39:18 [manager.py:224] router recive req id 8 cost time 0.10863327980041504 s -INFO 06-24 20:39:18 [manager.py:68] detokenization recv req id 8 cost time 0.11065244674682617 s -DEBUG 06-24 20:39:18 [manager.py:391] Prefill Batch: batch_id=178122349760245250984639058720720836890, time:1750768758.8835607s req_ids:[8] -DEBUG 06-24 20:39:18 [manager.py:391] -ERROR 06-24 20:39:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:219.22874450683594ms total_cost_time:219.3012237548828ms,out_token_counter:1 mean_per_token_cost_time: 0.072479248046875ms prompt_token_num:14472 prompt_cache_len:5151 prompt_cache_ratio:0.35592868988391374 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 -DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:19 [batch.py:51] router release req id 8 -INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.31195855140686035 s -INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.31337499618530273 s -DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=98014658054796709363782094895554895198, time:1750768759.3144581s req_ids:[8] -DEBUG 06-24 20:39:19 [manager.py:391] -ERROR 06-24 20:39:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:18 lightllm_req_id:8 first_token_cost:428.8802146911621ms total_cost_time:428.9236068725586ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14473 prompt_cache_len:5151 prompt_cache_ratio:0.35590409728459893 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 -DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:19 [batch.py:51] router release req id 8 -INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.10820317268371582 s -INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.10939860343933105 s -DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=257719559702122527960078960437955864387, time:1750768759.5421965s req_ids:[8] -DEBUG 06-24 20:39:19 [manager.py:391] -ERROR 06-24 20:39:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 first_token_cost:215.0125503540039ms total_cost_time:215.05475044250488ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14474 prompt_cache_len:5151 prompt_cache_ratio:0.35587950808345997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 -DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:19 [batch.py:51] router release req id 8 -INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.10843110084533691 s -INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.11015105247497559 s -DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=60246118414337925335603607700257304160, time:1750768759.7666085s req_ids:[8] -DEBUG 06-24 20:39:19 [manager.py:391] -DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:19 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:39:19 [stats.py:37] Avg tokens(prompt+generate) throughput: 56167.408 tokens/s -DEBUG 06-24 20:39:19 [stats.py:37] Avg prompt tokens throughput: 56159.538 tokens/s -DEBUG 06-24 20:39:19 [stats.py:37] Avg generate tokens throughput: 7.869 tokens/s -INFO 06-24 20:39:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 first_token_cost:222.62287139892578ms total_cost_time:222.66745567321777ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14475 prompt_cache_len:5151 prompt_cache_ratio:0.35585492227979276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 -DEBUG 06-24 20:39:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:19 [batch.py:51] router release req id 8 -INFO 06-24 20:39:19 [manager.py:224] router recive req id 8 cost time 0.10888385772705078 s -INFO 06-24 20:39:19 [manager.py:68] detokenization recv req id 8 cost time 0.11073017120361328 s -DEBUG 06-24 20:39:19 [manager.py:391] Prefill Batch: batch_id=179252565023677891973026276626664509295, time:1750768759.991026s req_ids:[8] -DEBUG 06-24 20:39:19 [manager.py:391] -ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:19 lightllm_req_id:8 first_token_cost:213.1519317626953ms total_cost_time:213.1948471069336ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14476 prompt_cache_len:5151 prompt_cache_ratio:0.3558303398728931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 -DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:20 [batch.py:51] router release req id 8 -INFO 06-24 20:39:20 [manager.py:224] router recive req id 8 cost time 0.10851144790649414 s -INFO 06-24 20:39:20 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s -DEBUG 06-24 20:39:20 [manager.py:391] Prefill Batch: batch_id=201257005861463015766489437267586091988, time:1750768760.21063s req_ids:[8] -DEBUG 06-24 20:39:20 [manager.py:391] -ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:212.12124824523926ms total_cost_time:212.16535568237305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14477 prompt_cache_len:5151 prompt_cache_ratio:0.35580576086205706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 -DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:20 [batch.py:51] router release req id 8 -INFO 06-24 20:39:20 [manager.py:224] router recive req id 8 cost time 0.1088254451751709 s -INFO 06-24 20:39:20 [manager.py:68] detokenization recv req id 8 cost time 0.11078906059265137 s -DEBUG 06-24 20:39:20 [manager.py:391] Prefill Batch: batch_id=25304054451728900996853120421279660634, time:1750768760.4298644s req_ids:[8] -DEBUG 06-24 20:39:20 [manager.py:391] -ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:387.5579833984375ms total_cost_time:387.6056671142578ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:14478 prompt_cache_len:5151 prompt_cache_ratio:0.35578118524658103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 -DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:20 [batch.py:51] router release req id 8 -INFO 06-24 20:39:20 [manager.py:224] router recive req id 8 cost time 0.10886669158935547 s -INFO 06-24 20:39:20 [manager.py:68] detokenization recv req id 8 cost time 0.11088371276855469 s -DEBUG 06-24 20:39:20 [manager.py:391] Prefill Batch: batch_id=169420038673990720697478235947123232945, time:1750768760.825021s req_ids:[8] -DEBUG 06-24 20:39:20 [manager.py:391] -ERROR 06-24 20:39:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:215.90566635131836ms total_cost_time:215.94905853271484ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14479 prompt_cache_len:5151 prompt_cache_ratio:0.35575661302576145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 -DEBUG 06-24 20:39:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:20 [batch.py:51] router release req id 8 -INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10894370079040527 s -INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.11017441749572754 s -DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=217155677914912462187866787741336137257, time:1750768761.046107s req_ids:[8] -DEBUG 06-24 20:39:21 [manager.py:391] -ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:20 lightllm_req_id:8 first_token_cost:214.79177474975586ms total_cost_time:214.83421325683594ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14480 prompt_cache_len:5151 prompt_cache_ratio:0.35573204419889504 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 -DEBUG 06-24 20:39:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:21 [batch.py:51] router release req id 8 -INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10792064666748047 s -INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.11001896858215332 s -DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=234360684666352354594063016406278605989, time:1750768761.268018s req_ids:[8] -DEBUG 06-24 20:39:21 [manager.py:391] -ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:216.61615371704102ms total_cost_time:216.6593074798584ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14481 prompt_cache_len:5151 prompt_cache_ratio:0.35570747876527864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 -DEBUG 06-24 20:39:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:21 [batch.py:51] router release req id 8 -INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10856008529663086 s -INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.1106865406036377 s -DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=296128557938243823528727482844788021495, time:1750768761.4909992s req_ids:[8] -DEBUG 06-24 20:39:21 [manager.py:391] -ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:214.49995040893555ms total_cost_time:214.54191207885742ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14482 prompt_cache_len:5151 prompt_cache_ratio:0.3556829167242094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 -DEBUG 06-24 20:39:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:21 [batch.py:51] router release req id 8 -INFO 06-24 20:39:21 [manager.py:224] router recive req id 8 cost time 0.10888814926147461 s -INFO 06-24 20:39:21 [manager.py:68] detokenization recv req id 8 cost time 0.11092019081115723 s -DEBUG 06-24 20:39:21 [manager.py:391] Prefill Batch: batch_id=241497182451478511734090784161542539653, time:1750768761.7125306s req_ids:[8] -DEBUG 06-24 20:39:21 [manager.py:391] -ERROR 06-24 20:39:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:392.2848701477051ms total_cost_time:392.32754707336426ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14483 prompt_cache_len:5151 prompt_cache_ratio:0.35565835807498447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 -DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:22 [batch.py:51] router release req id 8 -INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.11084747314453125 s -INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.1131753921508789 s -DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=219084662789604161303982372905736323637, time:1750768762.1104553s req_ids:[8] -DEBUG 06-24 20:39:22 [manager.py:391] -ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:21 lightllm_req_id:8 first_token_cost:217.47732162475586ms total_cost_time:217.54217147827148ms,out_token_counter:1 mean_per_token_cost_time: 0.064849853515625ms prompt_token_num:14484 prompt_cache_len:5151 prompt_cache_ratio:0.35563380281690143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 -DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:22 [batch.py:51] router release req id 8 -INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.10907149314880371 s -INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11112284660339355 s -DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=308923397991048543146706306520712943503, time:1750768762.3344615s req_ids:[8] -DEBUG 06-24 20:39:22 [manager.py:391] -ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:212.48412132263184ms total_cost_time:212.54587173461914ms,out_token_counter:1 mean_per_token_cost_time: 0.06175041198730469ms prompt_token_num:14485 prompt_cache_len:5151 prompt_cache_ratio:0.3556092509492578 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 -DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:22 [batch.py:51] router release req id 8 -INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.10924124717712402 s -INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11118888854980469 s -DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=285278350662507412520499487066676135052, time:1750768762.5550234s req_ids:[8] -DEBUG 06-24 20:39:22 [manager.py:391] -ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:212.61334419250488ms total_cost_time:212.65912055969238ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14486 prompt_cache_len:5151 prompt_cache_ratio:0.35558470247135165 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 -DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:22 [batch.py:51] router release req id 8 -INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.10917043685913086 s -INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s -DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=131928666935576175587118790988534849238, time:1750768762.7738123s req_ids:[8] -DEBUG 06-24 20:39:22 [manager.py:391] -ERROR 06-24 20:39:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:216.1080837249756ms total_cost_time:216.15123748779297ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14487 prompt_cache_len:5151 prompt_cache_ratio:0.35556015738248087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 -DEBUG 06-24 20:39:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:22 [batch.py:51] router release req id 8 -INFO 06-24 20:39:22 [manager.py:224] router recive req id 8 cost time 0.1094210147857666 s -INFO 06-24 20:39:22 [manager.py:68] detokenization recv req id 8 cost time 0.11148428916931152 s -DEBUG 06-24 20:39:22 [manager.py:391] Prefill Batch: batch_id=58444430159362729819293826139630683712, time:1750768762.9958334s req_ids:[8] -DEBUG 06-24 20:39:22 [manager.py:391] -ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:22 lightllm_req_id:8 first_token_cost:216.28332138061523ms total_cost_time:216.32623672485352ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14488 prompt_cache_len:5151 prompt_cache_ratio:0.3555356156819437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 -DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:23 [batch.py:51] router release req id 8 -INFO 06-24 20:39:23 [manager.py:224] router recive req id 8 cost time 0.1095418930053711 s -INFO 06-24 20:39:23 [manager.py:68] detokenization recv req id 8 cost time 0.11148405075073242 s -INFO 06-24 20:39:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:39:23 [manager.py:391] Prefill Batch: batch_id=1877656917933041874672586830458644840, time:1750768763.2195485s req_ids:[8] -DEBUG 06-24 20:39:23 [manager.py:391] -ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:378.4351348876953ms total_cost_time:378.4806728363037ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14489 prompt_cache_len:5151 prompt_cache_ratio:0.3555110773690386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 -DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:23 [batch.py:51] router release req id 8 -INFO 06-24 20:39:23 [manager.py:224] router recive req id 8 cost time 0.1092534065246582 s -INFO 06-24 20:39:23 [manager.py:68] detokenization recv req id 8 cost time 0.11136984825134277 s -DEBUG 06-24 20:39:23 [manager.py:391] Prefill Batch: batch_id=248209539323442834005900165792536595107, time:1750768763.6035285s req_ids:[8] -DEBUG 06-24 20:39:23 [manager.py:391] -ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:204.44965362548828ms total_cost_time:204.49423789978027ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14490 prompt_cache_len:5151 prompt_cache_ratio:0.35548654244306416 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 -DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:23 [batch.py:51] router release req id 8 -INFO 06-24 20:39:23 [manager.py:224] router recive req id 8 cost time 0.10872411727905273 s -INFO 06-24 20:39:23 [manager.py:68] detokenization recv req id 8 cost time 0.11073040962219238 s -DEBUG 06-24 20:39:23 [manager.py:391] Prefill Batch: batch_id=21632774937063864744075929724236934948, time:1750768763.8149583s req_ids:[8] -DEBUG 06-24 20:39:23 [manager.py:391] -ERROR 06-24 20:39:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:210.1423740386963ms total_cost_time:210.188627243042ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14491 prompt_cache_len:5151 prompt_cache_ratio:0.3554620109033193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 -DEBUG 06-24 20:39:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:23 [batch.py:51] router release req id 8 -INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10911226272583008 s -INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11114215850830078 s -DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=167152514488016462976516411848471273693, time:1750768764.030579s req_ids:[8] -DEBUG 06-24 20:39:24 [manager.py:391] -ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:23 lightllm_req_id:8 first_token_cost:207.9942226409912ms total_cost_time:208.0378532409668ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14492 prompt_cache_len:5151 prompt_cache_ratio:0.355437482749103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 -DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:24 [batch.py:51] router release req id 8 -INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10810327529907227 s -INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11009097099304199 s -DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=35036258650783333892583551499587202222, time:1750768764.2477124s req_ids:[8] -DEBUG 06-24 20:39:24 [manager.py:391] -ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:215.30795097351074ms total_cost_time:215.35086631774902ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14493 prompt_cache_len:5151 prompt_cache_ratio:0.35541295797971434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 -DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:24 [batch.py:51] router release req id 8 -INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10853815078735352 s -INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11057496070861816 s -DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=208865894759764555364853057868817563724, time:1750768764.4678087s req_ids:[8] -DEBUG 06-24 20:39:24 [manager.py:391] -ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:215.3451442718506ms total_cost_time:215.38734436035156ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14494 prompt_cache_len:5151 prompt_cache_ratio:0.35538843659445285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 -DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:24 [batch.py:51] router release req id 8 -INFO 06-24 20:39:24 [manager.py:224] router recive req id 8 cost time 0.10835599899291992 s -INFO 06-24 20:39:24 [manager.py:68] detokenization recv req id 8 cost time 0.11019539833068848 s -DEBUG 06-24 20:39:24 [manager.py:391] Prefill Batch: batch_id=245343045783389094714580991855729431060, time:1750768764.7006857s req_ids:[8] -DEBUG 06-24 20:39:24 [manager.py:391] -ERROR 06-24 20:39:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:360.9781265258789ms total_cost_time:361.0224723815918ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14495 prompt_cache_len:5151 prompt_cache_ratio:0.3553639185926181 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 -DEBUG 06-24 20:39:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:24 [batch.py:51] router release req id 8 -INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10919857025146484 s -INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11102867126464844 s -DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=277210873355568892756519530403589096244, time:1750768765.0587015s req_ids:[8] -DEBUG 06-24 20:39:25 [manager.py:391] -ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:24 lightllm_req_id:8 first_token_cost:213.07992935180664ms total_cost_time:213.12260627746582ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14496 prompt_cache_len:5151 prompt_cache_ratio:0.35533940397350994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 -DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:25 [batch.py:51] router release req id 8 -INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10947728157043457 s -INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11156320571899414 s -DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=122297740152915074659302917997924185362, time:1750768765.2778533s req_ids:[8] -DEBUG 06-24 20:39:25 [manager.py:391] -ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:214.99395370483398ms total_cost_time:215.03615379333496ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14497 prompt_cache_len:5151 prompt_cache_ratio:0.35531489273642825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 -DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:25 [batch.py:51] router release req id 8 -INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10861420631408691 s -INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11071443557739258 s -DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=87520481113193748356896574277585490099, time:1750768765.4995306s req_ids:[8] -DEBUG 06-24 20:39:25 [manager.py:391] -ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:216.05515480041504ms total_cost_time:216.09926223754883ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14498 prompt_cache_len:5151 prompt_cache_ratio:0.3552903848806732 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 -DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:25 [batch.py:51] router release req id 8 -INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.10935544967651367 s -INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.1104888916015625 s -DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=312966144090525822192825665557438298362, time:1750768765.7217438s req_ids:[8] -DEBUG 06-24 20:39:25 [manager.py:391] -ERROR 06-24 20:39:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:213.80209922790527ms total_cost_time:213.84620666503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14499 prompt_cache_len:5151 prompt_cache_ratio:0.3552658804055452 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 -DEBUG 06-24 20:39:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:25 [batch.py:51] router release req id 8 -INFO 06-24 20:39:25 [manager.py:224] router recive req id 8 cost time 0.11137986183166504 s -DEBUG 06-24 20:39:25 [manager.py:391] Prefill Batch: batch_id=58759272044990527095065356310375434187, time:1750768765.9398122s req_ids:[8] -DEBUG 06-24 20:39:25 [manager.py:391] -INFO 06-24 20:39:25 [manager.py:68] detokenization recv req id 8 cost time 0.11414933204650879 s -DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:25 lightllm_req_id:8 first_token_cost:419.07405853271484ms total_cost_time:419.1241264343262ms,out_token_counter:1 mean_per_token_cost_time: 0.050067901611328125ms prompt_token_num:14500 prompt_cache_len:5151 prompt_cache_ratio:0.35524137931034483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 -DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:26 [batch.py:51] router release req id 8 -INFO 06-24 20:39:26 [manager.py:224] router recive req id 8 cost time 0.11020922660827637 s -INFO 06-24 20:39:26 [manager.py:68] detokenization recv req id 8 cost time 0.11215019226074219 s -DEBUG 06-24 20:39:26 [manager.py:391] Prefill Batch: batch_id=51893165943518206300431894792615276969, time:1750768766.367028s req_ids:[8] -DEBUG 06-24 20:39:26 [manager.py:391] -ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:219.75994110107422ms total_cost_time:219.8021411895752ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14501 prompt_cache_len:5151 prompt_cache_ratio:0.3552168815943728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 -DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:26 [batch.py:51] router release req id 8 -INFO 06-24 20:39:26 [manager.py:224] router recive req id 8 cost time 0.1084597110748291 s -INFO 06-24 20:39:26 [manager.py:68] detokenization recv req id 8 cost time 0.1102910041809082 s -DEBUG 06-24 20:39:26 [manager.py:391] Prefill Batch: batch_id=99835580582123744120723674369900442802, time:1750768766.5912845s req_ids:[8] -DEBUG 06-24 20:39:26 [manager.py:391] -ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:217.0083522796631ms total_cost_time:217.06438064575195ms,out_token_counter:1 mean_per_token_cost_time: 0.05602836608886719ms prompt_token_num:14502 prompt_cache_len:5151 prompt_cache_ratio:0.3551923872569301 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 -DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:26 [batch.py:51] router release req id 8 -INFO 06-24 20:39:26 [manager.py:224] router recive req id 8 cost time 0.10892081260681152 s -INFO 06-24 20:39:26 [manager.py:68] detokenization recv req id 8 cost time 0.11076188087463379 s -DEBUG 06-24 20:39:26 [manager.py:391] Prefill Batch: batch_id=222975783770439133021536474467904369785, time:1750768766.8129175s req_ids:[8] -DEBUG 06-24 20:39:26 [manager.py:391] -ERROR 06-24 20:39:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:214.0481472015381ms total_cost_time:214.09153938293457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14503 prompt_cache_len:5151 prompt_cache_ratio:0.3551678962973178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 -DEBUG 06-24 20:39:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:26 [batch.py:51] router release req id 8 -INFO 06-24 20:39:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10914945602416992 s -INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.1109151840209961 s -DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=13356536801042388314544361589311080648, time:1750768767.035596s req_ids:[8] -DEBUG 06-24 20:39:27 [manager.py:391] -ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:26 lightllm_req_id:8 first_token_cost:215.27504920959473ms total_cost_time:215.32630920410156ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:14504 prompt_cache_len:5151 prompt_cache_ratio:0.3551434087148373 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 -DEBUG 06-24 20:39:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:27 [batch.py:51] router release req id 8 -INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10932779312133789 s -INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.11127853393554688 s -DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=121180605291686933521832715627954682820, time:1750768767.2556894s req_ids:[8] -DEBUG 06-24 20:39:27 [manager.py:391] -ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:214.13493156433105ms total_cost_time:214.17641639709473ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14505 prompt_cache_len:5151 prompt_cache_ratio:0.35511892450879007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 -DEBUG 06-24 20:39:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:27 [batch.py:51] router release req id 8 -INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10916662216186523 s -INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.11140942573547363 s -DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=152699025386797280072989503536975120357, time:1750768767.4784515s req_ids:[8] -DEBUG 06-24 20:39:27 [manager.py:391] -ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:397.97163009643555ms total_cost_time:398.01764488220215ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14506 prompt_cache_len:5151 prompt_cache_ratio:0.35509444367847787 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 -DEBUG 06-24 20:39:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:27 [batch.py:51] router release req id 8 -INFO 06-24 20:39:27 [manager.py:224] router recive req id 8 cost time 0.10917139053344727 s -INFO 06-24 20:39:27 [manager.py:68] detokenization recv req id 8 cost time 0.11104202270507812 s -DEBUG 06-24 20:39:27 [manager.py:391] Prefill Batch: batch_id=225517323539097838137148115057711582619, time:1750768767.8819711s req_ids:[8] -DEBUG 06-24 20:39:27 [manager.py:391] -ERROR 06-24 20:39:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:214.5991325378418ms total_cost_time:214.64180946350098ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14507 prompt_cache_len:5151 prompt_cache_ratio:0.3550699662232026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 -DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:28 [batch.py:51] router release req id 8 -INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10844016075134277 s -INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s -DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=6713878808218885103510731605048636514, time:1750768768.1038742s req_ids:[8] -DEBUG 06-24 20:39:28 [manager.py:391] -ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:27 lightllm_req_id:8 first_token_cost:212.92805671691895ms total_cost_time:212.97311782836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14508 prompt_cache_len:5151 prompt_cache_ratio:0.35504549214226633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 -DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:28 [batch.py:51] router release req id 8 -INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10911011695861816 s -INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.1111457347869873 s -DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=42346344243147713077976346209372286639, time:1750768768.3278384s req_ids:[8] -DEBUG 06-24 20:39:28 [manager.py:391] -ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:221.2541103363037ms total_cost_time:221.2989330291748ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14509 prompt_cache_len:5151 prompt_cache_ratio:0.3550210214349714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 -DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:28 [batch.py:51] router release req id 8 -INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10893464088439941 s -INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.11146807670593262 s -DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=220021960560683506433750817331502473825, time:1750768768.5506558s req_ids:[8] -DEBUG 06-24 20:39:28 [manager.py:391] -ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:227.0960807800293ms total_cost_time:227.1406650543213ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14510 prompt_cache_len:5151 prompt_cache_ratio:0.35499655410062025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 -DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:28 [batch.py:51] router release req id 8 -INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10839080810546875 s -INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.11044597625732422 s -DEBUG 06-24 20:39:28 [manager.py:391] Prefill Batch: batch_id=229685299257154220715900711509445511589, time:1750768768.785899s req_ids:[8] -DEBUG 06-24 20:39:28 [manager.py:391] -ERROR 06-24 20:39:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:209.9781036376953ms total_cost_time:210.02483367919922ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14511 prompt_cache_len:5151 prompt_cache_ratio:0.3549720901385156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 -DEBUG 06-24 20:39:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:28 [batch.py:51] router release req id 8 -INFO 06-24 20:39:28 [manager.py:224] router recive req id 8 cost time 0.10783004760742188 s -INFO 06-24 20:39:28 [manager.py:68] detokenization recv req id 8 cost time 0.10985660552978516 s -DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=116243069301083513492510742167312509184, time:1750768769.0023446s req_ids:[8] -DEBUG 06-24 20:39:29 [manager.py:391] -ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:28 lightllm_req_id:8 first_token_cost:381.3645839691162ms total_cost_time:381.4098834991455ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14512 prompt_cache_len:5151 prompt_cache_ratio:0.35494762954796033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 -DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:29 [batch.py:51] router release req id 8 -INFO 06-24 20:39:29 [manager.py:224] router recive req id 8 cost time 0.10873961448669434 s -INFO 06-24 20:39:29 [manager.py:68] detokenization recv req id 8 cost time 0.1107938289642334 s -DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=51957068422808433717595741949994564403, time:1750768769.3901327s req_ids:[8] -DEBUG 06-24 20:39:29 [manager.py:391] -ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:215.7609462738037ms total_cost_time:215.8055305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14513 prompt_cache_len:5151 prompt_cache_ratio:0.35492317232825743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 -DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:29 [batch.py:51] router release req id 8 -INFO 06-24 20:39:29 [manager.py:224] router recive req id 8 cost time 0.10838770866394043 s -INFO 06-24 20:39:29 [manager.py:68] detokenization recv req id 8 cost time 0.11039233207702637 s -DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=92997065188900168108083870791664317073, time:1750768769.6129997s req_ids:[8] -DEBUG 06-24 20:39:29 [manager.py:391] -ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:220.14355659484863ms total_cost_time:220.18671035766602ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14514 prompt_cache_len:5151 prompt_cache_ratio:0.3548987184787102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 -DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:29 [batch.py:51] router release req id 8 -INFO 06-24 20:39:29 [manager.py:224] router recive req id 8 cost time 0.10933661460876465 s -INFO 06-24 20:39:29 [manager.py:68] detokenization recv req id 8 cost time 0.11144876480102539 s -DEBUG 06-24 20:39:29 [manager.py:391] Prefill Batch: batch_id=21022815083986832922779510185894412257, time:1750768769.8390672s req_ids:[8] -DEBUG 06-24 20:39:29 [manager.py:391] -ERROR 06-24 20:39:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:39:29 [stats.py:37] Avg tokens(prompt+generate) throughput: 57593.660 tokens/s -DEBUG 06-24 20:39:29 [stats.py:37] Avg prompt tokens throughput: 57585.714 tokens/s -DEBUG 06-24 20:39:29 [stats.py:37] Avg generate tokens throughput: 7.945 tokens/s -INFO 06-24 20:39:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:215.01922607421875ms total_cost_time:215.06333351135254ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14515 prompt_cache_len:5151 prompt_cache_ratio:0.3548742679986221 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 -DEBUG 06-24 20:39:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:29 [batch.py:51] router release req id 8 -INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.10903692245483398 s -INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.11097288131713867 s -DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=38548695778031382016589659988168318072, time:1750768770.0609934s req_ids:[8] -DEBUG 06-24 20:39:30 [manager.py:391] -ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:29 lightllm_req_id:8 first_token_cost:214.7083282470703ms total_cost_time:214.7517204284668ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14516 prompt_cache_len:5151 prompt_cache_ratio:0.3548498208872968 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 -DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:30 [batch.py:51] router release req id 8 -INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.10804271697998047 s -INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.10986781120300293 s -DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=158478708906950481299914873774314052208, time:1750768770.2822213s req_ids:[8] -DEBUG 06-24 20:39:30 [manager.py:391] -ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:381.4666271209717ms total_cost_time:381.5126419067383ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14517 prompt_cache_len:5151 prompt_cache_ratio:0.354825377144038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 -DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:30 [batch.py:51] router release req id 8 -INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.10880446434020996 s -INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.1097707748413086 s -DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=287697090486576627064814895502678023296, time:1750768770.672578s req_ids:[8] -DEBUG 06-24 20:39:30 [manager.py:391] -ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:174.54957962036133ms total_cost_time:174.59583282470703ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14518 prompt_cache_len:5151 prompt_cache_ratio:0.3548009367681499 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 -DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:30 [batch.py:51] router release req id 8 -INFO 06-24 20:39:30 [manager.py:224] router recive req id 8 cost time 0.1092534065246582 s -INFO 06-24 20:39:30 [manager.py:68] detokenization recv req id 8 cost time 0.11119699478149414 s -DEBUG 06-24 20:39:30 [manager.py:391] Prefill Batch: batch_id=88368069443256321029080726696609605266, time:1750768770.8521051s req_ids:[8] -DEBUG 06-24 20:39:30 [manager.py:391] -ERROR 06-24 20:39:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:211.24625205993652ms total_cost_time:211.29131317138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14519 prompt_cache_len:5151 prompt_cache_ratio:0.35477649975893655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 -DEBUG 06-24 20:39:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:30 [batch.py:51] router release req id 8 -INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10927438735961914 s -INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11032629013061523 s -DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=162736841805040154597337970261109821179, time:1750768771.0699902s req_ids:[8] -DEBUG 06-24 20:39:31 [manager.py:391] -ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:30 lightllm_req_id:8 first_token_cost:207.46660232543945ms total_cost_time:207.51237869262695ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14520 prompt_cache_len:5151 prompt_cache_ratio:0.3547520661157025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 -DEBUG 06-24 20:39:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:31 [batch.py:51] router release req id 8 -INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10957741737365723 s -INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11060309410095215 s -DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=203851999625692996963080883550923803114, time:1750768771.2857325s req_ids:[8] -DEBUG 06-24 20:39:31 [manager.py:391] -ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 first_token_cost:211.3487720489502ms total_cost_time:211.3945484161377ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14521 prompt_cache_len:5151 prompt_cache_ratio:0.35472763583775224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 -DEBUG 06-24 20:39:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:31 [batch.py:51] router release req id 8 -INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10896444320678711 s -INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11089015007019043 s -DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=84784081357613295429460316189847027415, time:1750768771.505065s req_ids:[8] -DEBUG 06-24 20:39:31 [manager.py:391] -ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 first_token_cost:215.24930000305176ms total_cost_time:215.29245376586914ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14522 prompt_cache_len:5151 prompt_cache_ratio:0.35470320892439056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 -DEBUG 06-24 20:39:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:31 [batch.py:51] router release req id 8 -INFO 06-24 20:39:31 [manager.py:224] router recive req id 8 cost time 0.10847949981689453 s -INFO 06-24 20:39:31 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s -DEBUG 06-24 20:39:31 [manager.py:391] Prefill Batch: batch_id=34840033883206875132247591791445968657, time:1750768771.7278214s req_ids:[8] -DEBUG 06-24 20:39:31 [manager.py:391] -ERROR 06-24 20:39:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:31 lightllm_req_id:8 first_token_cost:383.66174697875977ms total_cost_time:383.71944427490234ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:14523 prompt_cache_len:5151 prompt_cache_ratio:0.35467878537492253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 -DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:32 [batch.py:51] router release req id 8 -INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10870480537414551 s -INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11017632484436035 s -DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=19578909454828492390384784907658982033, time:1750768772.120981s req_ids:[8] -DEBUG 06-24 20:39:32 [manager.py:391] -ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:206.33625984191895ms total_cost_time:206.38251304626465ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14524 prompt_cache_len:5151 prompt_cache_ratio:0.3546543651886533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 -DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:32 [batch.py:51] router release req id 8 -INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10823655128479004 s -INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11028289794921875 s -DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=253050683166302558214573613985803362243, time:1750768772.3340127s req_ids:[8] -DEBUG 06-24 20:39:32 [manager.py:391] -DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:32 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:216.36271476745605ms total_cost_time:216.40753746032715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14525 prompt_cache_len:5151 prompt_cache_ratio:0.35462994836488815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 -DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:32 [batch.py:51] router release req id 8 -INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10906076431274414 s -INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11098599433898926 s -DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=172541633832869620480675002880845347885, time:1750768772.554955s req_ids:[8] -DEBUG 06-24 20:39:32 [manager.py:391] -ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:212.04805374145508ms total_cost_time:212.09239959716797ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14526 prompt_cache_len:5151 prompt_cache_ratio:0.3546055349029327 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 -DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:32 [batch.py:51] router release req id 8 -INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10868120193481445 s -INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11058855056762695 s -DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=116563986190267675180670257853089251592, time:1750768772.7719748s req_ids:[8] -DEBUG 06-24 20:39:32 [manager.py:391] -ERROR 06-24 20:39:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:214.12062644958496ms total_cost_time:214.16807174682617ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:14527 prompt_cache_len:5151 prompt_cache_ratio:0.35458112480209264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 -DEBUG 06-24 20:39:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:32 [batch.py:51] router release req id 8 -INFO 06-24 20:39:32 [manager.py:224] router recive req id 8 cost time 0.10902786254882812 s -INFO 06-24 20:39:32 [manager.py:68] detokenization recv req id 8 cost time 0.11092495918273926 s -DEBUG 06-24 20:39:32 [manager.py:391] Prefill Batch: batch_id=337779889701170464176396188361321404737, time:1750768772.9921832s req_ids:[8] -DEBUG 06-24 20:39:32 [manager.py:391] -ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:32 lightllm_req_id:8 first_token_cost:388.883113861084ms total_cost_time:388.92579078674316ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14528 prompt_cache_len:5151 prompt_cache_ratio:0.354556718061674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 -DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:33 [batch.py:51] router release req id 8 -INFO 06-24 20:39:33 [manager.py:224] router recive req id 8 cost time 0.1095120906829834 s -INFO 06-24 20:39:33 [manager.py:68] detokenization recv req id 8 cost time 0.11142444610595703 s -DEBUG 06-24 20:39:33 [manager.py:391] Prefill Batch: batch_id=115831165849966091983738549977859610938, time:1750768773.3863826s req_ids:[8] -DEBUG 06-24 20:39:33 [manager.py:391] -ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:214.63274955749512ms total_cost_time:214.677095413208ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14529 prompt_cache_len:5151 prompt_cache_ratio:0.35453231468098284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 -DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:33 [batch.py:51] router release req id 8 -INFO 06-24 20:39:33 [manager.py:224] router recive req id 8 cost time 0.10982036590576172 s -INFO 06-24 20:39:33 [manager.py:68] detokenization recv req id 8 cost time 0.11160731315612793 s -DEBUG 06-24 20:39:33 [manager.py:391] Prefill Batch: batch_id=297552670641093857025616635569833848039, time:1750768773.6056292s req_ids:[8] -DEBUG 06-24 20:39:33 [manager.py:391] -ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:213.64545822143555ms total_cost_time:213.67383003234863ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:14530 prompt_cache_len:5151 prompt_cache_ratio:0.3545079146593255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 -DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:33 [batch.py:51] router release req id 8 -INFO 06-24 20:39:33 [manager.py:224] router recive req id 8 cost time 0.1051626205444336 s -INFO 06-24 20:39:33 [manager.py:68] detokenization recv req id 8 cost time 0.10715079307556152 s -DEBUG 06-24 20:39:33 [manager.py:391] Prefill Batch: batch_id=68897729774901835811780633384753556976, time:1750768773.8237085s req_ids:[8] -DEBUG 06-24 20:39:33 [manager.py:391] -ERROR 06-24 20:39:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:216.4022922515869ms total_cost_time:216.4478302001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14531 prompt_cache_len:5151 prompt_cache_ratio:0.3544835179960085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 -DEBUG 06-24 20:39:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:33 [batch.py:51] router release req id 8 -INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.10673117637634277 s -INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10866165161132812 s -DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=292428663578407402613213095398339634301, time:1750768774.0457487s req_ids:[8] -DEBUG 06-24 20:39:34 [manager.py:391] -ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:33 lightllm_req_id:8 first_token_cost:212.34393119812012ms total_cost_time:212.36538887023926ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:14532 prompt_cache_len:5151 prompt_cache_ratio:0.3544591246903386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 -DEBUG 06-24 20:39:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:34 [batch.py:51] router release req id 8 -INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.1070253849029541 s -INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10894083976745605 s -DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=85666927417501075917827027665996698415, time:1750768774.2727563s req_ids:[8] -DEBUG 06-24 20:39:34 [manager.py:391] -ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:224.9915599822998ms total_cost_time:225.0204086303711ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:14533 prompt_cache_len:5151 prompt_cache_ratio:0.3544347347416225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 -DEBUG 06-24 20:39:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:34 [batch.py:51] router release req id 8 -INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.1066436767578125 s -INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10848617553710938 s -DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=48624980663240354968588472068637974651, time:1750768774.4927688s req_ids:[8] -DEBUG 06-24 20:39:34 [manager.py:391] -ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:394.64879035949707ms total_cost_time:394.67811584472656ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:14534 prompt_cache_len:5151 prompt_cache_ratio:0.35441034814916744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 -DEBUG 06-24 20:39:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:34 [batch.py:51] router release req id 8 -INFO 06-24 20:39:34 [manager.py:224] router recive req id 8 cost time 0.10545563697814941 s -INFO 06-24 20:39:34 [manager.py:68] detokenization recv req id 8 cost time 0.10740542411804199 s -DEBUG 06-24 20:39:34 [manager.py:391] Prefill Batch: batch_id=253663197077762910574318102480921532126, time:1750768774.8926473s req_ids:[8] -DEBUG 06-24 20:39:34 [manager.py:391] -ERROR 06-24 20:39:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:212.15009689331055ms total_cost_time:212.17918395996094ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:14535 prompt_cache_len:5151 prompt_cache_ratio:0.3543859649122807 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 -DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:35 [batch.py:51] router release req id 8 -INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.1065216064453125 s -INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.10839557647705078 s -DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=243621567440008859290270158440564756858, time:1750768775.1073027s req_ids:[8] -DEBUG 06-24 20:39:35 [manager.py:391] -ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:34 lightllm_req_id:8 first_token_cost:214.9794101715088ms total_cost_time:215.00802040100098ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:14536 prompt_cache_len:5151 prompt_cache_ratio:0.35436158503026965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 -DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:35 [batch.py:51] router release req id 8 -INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10625600814819336 s -INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.1080775260925293 s -DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=265979074091991026862489475995578720631, time:1750768775.3319008s req_ids:[8] -DEBUG 06-24 20:39:35 [manager.py:391] -ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:226.24707221984863ms total_cost_time:226.2728214263916ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14537 prompt_cache_len:5151 prompt_cache_ratio:0.35433720850244205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 -DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:35 [batch.py:51] router release req id 8 -INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10551905632019043 s -INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.10728931427001953 s -DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=79254854132675384673988451049041210039, time:1750768775.556698s req_ids:[8] -DEBUG 06-24 20:39:35 [manager.py:391] -ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:213.33575248718262ms total_cost_time:213.36030960083008ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14538 prompt_cache_len:5151 prompt_cache_ratio:0.35431283532810565 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 -DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:35 [batch.py:51] router release req id 8 -INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10634064674377441 s -INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.10814833641052246 s -DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=93757842686643624768673998178436165449, time:1750768775.7731576s req_ids:[8] -DEBUG 06-24 20:39:35 [manager.py:391] -ERROR 06-24 20:39:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:214.75911140441895ms total_cost_time:214.78676795959473ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14539 prompt_cache_len:5151 prompt_cache_ratio:0.35428846550656856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 -DEBUG 06-24 20:39:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:35 [batch.py:51] router release req id 8 -INFO 06-24 20:39:35 [manager.py:224] router recive req id 8 cost time 0.10568451881408691 s -INFO 06-24 20:39:35 [manager.py:68] detokenization recv req id 8 cost time 0.1065833568572998 s -DEBUG 06-24 20:39:35 [manager.py:391] Prefill Batch: batch_id=202427555565836042505631835410287323896, time:1750768775.9910913s req_ids:[8] -DEBUG 06-24 20:39:35 [manager.py:391] -ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:35 lightllm_req_id:8 first_token_cost:352.3087501525879ms total_cost_time:352.3366451263428ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:14540 prompt_cache_len:5151 prompt_cache_ratio:0.3542640990371389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 -DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:36 [batch.py:51] router release req id 8 -INFO 06-24 20:39:36 [manager.py:224] router recive req id 8 cost time 0.10631704330444336 s -INFO 06-24 20:39:36 [manager.py:68] detokenization recv req id 8 cost time 0.10827064514160156 s -DEBUG 06-24 20:39:36 [manager.py:391] Prefill Batch: batch_id=171990501095187370170590592793478840113, time:1750768776.3483527s req_ids:[8] -DEBUG 06-24 20:39:36 [manager.py:391] -ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:212.1107578277588ms total_cost_time:212.13555335998535ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:14541 prompt_cache_len:5151 prompt_cache_ratio:0.3542397359191252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 -DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:36 [batch.py:51] router release req id 8 -INFO 06-24 20:39:36 [manager.py:224] router recive req id 8 cost time 0.10549807548522949 s -INFO 06-24 20:39:36 [manager.py:68] detokenization recv req id 8 cost time 0.10733342170715332 s -DEBUG 06-24 20:39:36 [manager.py:391] Prefill Batch: batch_id=43865969775910525812742350957049867530, time:1750768776.5692482s req_ids:[8] -DEBUG 06-24 20:39:36 [manager.py:391] -ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:228.26337814331055ms total_cost_time:228.2886505126953ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14542 prompt_cache_len:5151 prompt_cache_ratio:0.35421537615183607 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 -DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:36 [batch.py:51] router release req id 8 -INFO 06-24 20:39:36 [manager.py:224] router recive req id 8 cost time 0.10509443283081055 s -INFO 06-24 20:39:36 [manager.py:68] detokenization recv req id 8 cost time 0.1069021224975586 s -DEBUG 06-24 20:39:36 [manager.py:391] Prefill Batch: batch_id=58206165729686710022622029361539552525, time:1750768776.799016s req_ids:[8] -DEBUG 06-24 20:39:36 [manager.py:391] -ERROR 06-24 20:39:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:227.1115779876709ms total_cost_time:227.13708877563477ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:14543 prompt_cache_len:5151 prompt_cache_ratio:0.3541910197345802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 -DEBUG 06-24 20:39:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:36 [batch.py:51] router release req id 8 -INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.1050422191619873 s -INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.10706543922424316 s -DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=77033703735486625642662111757147313043, time:1750768777.0248942s req_ids:[8] -DEBUG 06-24 20:39:37 [manager.py:391] -ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:36 lightllm_req_id:8 first_token_cost:215.31176567077637ms total_cost_time:215.33703804016113ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14544 prompt_cache_len:5151 prompt_cache_ratio:0.3541666666666667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 -DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:37 [batch.py:51] router release req id 8 -INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.1050117015838623 s -INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.10679054260253906 s -DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=295670723805656863671268656658320243139, time:1750768777.244879s req_ids:[8] -DEBUG 06-24 20:39:37 [manager.py:391] -ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:216.92347526550293ms total_cost_time:216.9477939605713ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:14545 prompt_cache_len:5151 prompt_cache_ratio:0.3541423169474046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 -DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:37 [batch.py:51] router release req id 8 -INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.10505509376525879 s -INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.10692381858825684 s -DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=308159446360874762038106091714660563488, time:1750768777.4642599s req_ids:[8] -DEBUG 06-24 20:39:37 [manager.py:391] -ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:376.6951560974121ms total_cost_time:376.72924995422363ms,out_token_counter:1 mean_per_token_cost_time: 0.03409385681152344ms prompt_token_num:14546 prompt_cache_len:5151 prompt_cache_ratio:0.3541179705761034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 -DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:37 [batch.py:51] router release req id 8 -INFO 06-24 20:39:37 [manager.py:224] router recive req id 8 cost time 0.10520124435424805 s -INFO 06-24 20:39:37 [manager.py:68] detokenization recv req id 8 cost time 0.1069796085357666 s -DEBUG 06-24 20:39:37 [manager.py:391] Prefill Batch: batch_id=189630209730561884198165174909157859004, time:1750768777.843753s req_ids:[8] -DEBUG 06-24 20:39:37 [manager.py:391] -ERROR 06-24 20:39:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:215.43645858764648ms total_cost_time:215.46220779418945ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14547 prompt_cache_len:5151 prompt_cache_ratio:0.3540936275520726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 -DEBUG 06-24 20:39:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:37 [batch.py:51] router release req id 8 -INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10455322265625 s -INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.10618925094604492 s -DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=325918525779496039842248217357204321678, time:1750768778.062834s req_ids:[8] -DEBUG 06-24 20:39:38 [manager.py:391] -ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:37 lightllm_req_id:8 first_token_cost:176.32341384887695ms total_cost_time:176.34892463684082ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:14548 prompt_cache_len:5151 prompt_cache_ratio:0.35406928787462194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 -DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:38 [batch.py:51] router release req id 8 -INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10448503494262695 s -INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.10615873336791992 s -DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=92758111282561674591850410791681163346, time:1750768778.2447345s req_ids:[8] -DEBUG 06-24 20:39:38 [manager.py:391] -ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:218.86348724365234ms total_cost_time:218.92595291137695ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:14549 prompt_cache_len:5151 prompt_cache_ratio:0.35404495154306137 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 -DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:38 [batch.py:51] router release req id 8 -INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10870552062988281 s -INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.11075520515441895 s -DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=322179404023758885546546502674286972741, time:1750768778.4655535s req_ids:[8] -DEBUG 06-24 20:39:38 [manager.py:391] -DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:38 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:217.61059761047363ms total_cost_time:217.65470504760742ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14550 prompt_cache_len:5151 prompt_cache_ratio:0.354020618556701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 -DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:38 [batch.py:51] router release req id 8 -INFO 06-24 20:39:38 [manager.py:224] router recive req id 8 cost time 0.10891580581665039 s -INFO 06-24 20:39:38 [manager.py:68] detokenization recv req id 8 cost time 0.11069917678833008 s -DEBUG 06-24 20:39:38 [manager.py:391] Prefill Batch: batch_id=267868153342560201599840998098879924559, time:1750768778.688823s req_ids:[8] -DEBUG 06-24 20:39:38 [manager.py:391] -ERROR 06-24 20:39:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:389.43934440612793ms total_cost_time:389.4658088684082ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:14551 prompt_cache_len:5151 prompt_cache_ratio:0.3539962889148512 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 -DEBUG 06-24 20:39:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:38 [batch.py:51] router release req id 8 -INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10507512092590332 s -INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.1069638729095459 s -DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=180161131653370043387807789626053008799, time:1750768779.0837405s req_ids:[8] -DEBUG 06-24 20:39:39 [manager.py:391] -ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:38 lightllm_req_id:8 first_token_cost:218.99151802062988ms total_cost_time:219.01702880859375ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:14552 prompt_cache_len:5151 prompt_cache_ratio:0.35397196261682246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 -DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:39 [batch.py:51] router release req id 8 -INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10482287406921387 s -INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.1066582202911377 s -DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=206215961922434598291205563438507583166, time:1750768779.306101s req_ids:[8] -DEBUG 06-24 20:39:39 [manager.py:391] -ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:216.71390533447266ms total_cost_time:216.73917770385742ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14553 prompt_cache_len:5151 prompt_cache_ratio:0.3539476396619254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 -DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:39 [batch.py:51] router release req id 8 -INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10648703575134277 s -INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.10832786560058594 s -DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=252652394669039347765058144388945946225, time:1750768779.52618s req_ids:[8] -DEBUG 06-24 20:39:39 [manager.py:391] -ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:217.89073944091797ms total_cost_time:217.91529655456543ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14554 prompt_cache_len:5151 prompt_cache_ratio:0.3539233200494709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 -DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:39 [batch.py:51] router release req id 8 -INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10501265525817871 s -INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.10709428787231445 s -DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=287534783340934451281967748198843775954, time:1750768779.7469914s req_ids:[8] -DEBUG 06-24 20:39:39 [manager.py:391] -ERROR 06-24 20:39:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:217.52595901489258ms total_cost_time:217.54956245422363ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:14555 prompt_cache_len:5151 prompt_cache_ratio:0.3538990037787702 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 -DEBUG 06-24 20:39:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:39 [batch.py:51] router release req id 8 -INFO 06-24 20:39:39 [manager.py:224] router recive req id 8 cost time 0.10512948036193848 s -INFO 06-24 20:39:39 [manager.py:68] detokenization recv req id 8 cost time 0.10709619522094727 s -DEBUG 06-24 20:39:39 [manager.py:391] Prefill Batch: batch_id=89778433006741374031564466399037010663, time:1750768779.9680543s req_ids:[8] -DEBUG 06-24 20:39:39 [manager.py:391] -DEBUG 06-24 20:39:39 [stats.py:37] Avg tokens(prompt+generate) throughput: 59424.380 tokens/s -DEBUG 06-24 20:39:39 [stats.py:37] Avg prompt tokens throughput: 59416.304 tokens/s -DEBUG 06-24 20:39:39 [stats.py:37] Avg generate tokens throughput: 8.075 tokens/s -ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:39 lightllm_req_id:8 first_token_cost:386.9898319244385ms total_cost_time:387.01748847961426ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14556 prompt_cache_len:5151 prompt_cache_ratio:0.35387469084913437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 -DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:40 [batch.py:51] router release req id 8 -INFO 06-24 20:39:40 [manager.py:224] router recive req id 8 cost time 0.10444808006286621 s -INFO 06-24 20:39:40 [manager.py:68] detokenization recv req id 8 cost time 0.10617828369140625 s -DEBUG 06-24 20:39:40 [manager.py:391] Prefill Batch: batch_id=280520322171749294401558454858125420448, time:1750768780.358777s req_ids:[8] -DEBUG 06-24 20:39:40 [manager.py:391] -ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:222.35822677612305ms total_cost_time:222.3799228668213ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:14557 prompt_cache_len:5151 prompt_cache_ratio:0.35385038125987495 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 -DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:40 [batch.py:51] router release req id 8 -INFO 06-24 20:39:40 [manager.py:224] router recive req id 8 cost time 0.10513019561767578 s -INFO 06-24 20:39:40 [manager.py:68] detokenization recv req id 8 cost time 0.10718584060668945 s -DEBUG 06-24 20:39:40 [manager.py:391] Prefill Batch: batch_id=140410259684259293180875717506552278344, time:1750768780.585255s req_ids:[8] -DEBUG 06-24 20:39:40 [manager.py:391] -ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:220.48044204711914ms total_cost_time:220.52669525146484ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14558 prompt_cache_len:5151 prompt_cache_ratio:0.3538260750103036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 -DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:40 [batch.py:51] router release req id 8 -INFO 06-24 20:39:40 [manager.py:224] router recive req id 8 cost time 0.10622835159301758 s -INFO 06-24 20:39:40 [manager.py:68] detokenization recv req id 8 cost time 0.10811924934387207 s -DEBUG 06-24 20:39:40 [manager.py:391] Prefill Batch: batch_id=1653032116612397461388754469803233048, time:1750768780.8078618s req_ids:[8] -DEBUG 06-24 20:39:40 [manager.py:391] -ERROR 06-24 20:39:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:212.60905265808105ms total_cost_time:212.6328945159912ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:14559 prompt_cache_len:5151 prompt_cache_ratio:0.3538017720997321 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 -DEBUG 06-24 20:39:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:40 [batch.py:51] router release req id 8 -INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10672831535339355 s -INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.1080923080444336 s -DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=205397277942477784722705214871751204846, time:1750768781.0256853s req_ids:[8] -DEBUG 06-24 20:39:41 [manager.py:391] -ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:40 lightllm_req_id:8 first_token_cost:215.75617790222168ms total_cost_time:215.80052375793457ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14560 prompt_cache_len:5151 prompt_cache_ratio:0.35377747252747255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 -DEBUG 06-24 20:39:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:41 [batch.py:51] router release req id 8 -INFO 06-24 20:39:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10914158821105957 s -INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.11106038093566895 s -DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=19059869327271715355176767311431732765, time:1750768781.2463067s req_ids:[8] -DEBUG 06-24 20:39:41 [manager.py:391] -ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:217.90361404418945ms total_cost_time:217.94795989990234ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14561 prompt_cache_len:5151 prompt_cache_ratio:0.35375317629283703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 -DEBUG 06-24 20:39:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:41 [batch.py:51] router release req id 8 -INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10848593711853027 s -INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.11055159568786621 s -DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=206879657124235317704285691125894303644, time:1750768781.468305s req_ids:[8] -DEBUG 06-24 20:39:41 [manager.py:391] -ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:406.7668914794922ms total_cost_time:406.79264068603516ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14562 prompt_cache_len:5151 prompt_cache_ratio:0.353728883395138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 -DEBUG 06-24 20:39:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:41 [batch.py:51] router release req id 8 -INFO 06-24 20:39:41 [manager.py:224] router recive req id 8 cost time 0.10489439964294434 s -INFO 06-24 20:39:41 [manager.py:68] detokenization recv req id 8 cost time 0.10699582099914551 s -DEBUG 06-24 20:39:41 [manager.py:391] Prefill Batch: batch_id=309916577608175208869587583860240154216, time:1750768781.8821192s req_ids:[8] -DEBUG 06-24 20:39:41 [manager.py:391] -ERROR 06-24 20:39:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:223.8020896911621ms total_cost_time:223.82593154907227ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:14563 prompt_cache_len:5151 prompt_cache_ratio:0.35370459383368813 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 -DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:42 [batch.py:51] router release req id 8 -INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10525345802307129 s -INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.10729098320007324 s -DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=165710777552318443583147627246047820026, time:1750768782.10833s req_ids:[8] -DEBUG 06-24 20:39:42 [manager.py:391] -ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:41 lightllm_req_id:8 first_token_cost:222.11003303527832ms total_cost_time:222.1357822418213ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:14564 prompt_cache_len:5151 prompt_cache_ratio:0.35368030760780006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 -DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:42 [batch.py:51] router release req id 8 -INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10807037353515625 s -INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.11021828651428223 s -DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=215334853418445634214597553135006690364, time:1750768782.3342974s req_ids:[8] -DEBUG 06-24 20:39:42 [manager.py:391] -ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:216.04275703430176ms total_cost_time:216.06683731079102ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:14565 prompt_cache_len:5151 prompt_cache_ratio:0.3536560247167868 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 -DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:42 [batch.py:51] router release req id 8 -INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10508966445922852 s -INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.10776138305664062 s -DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=334573730246093725421829264613665050079, time:1750768782.5556443s req_ids:[8] -DEBUG 06-24 20:39:42 [manager.py:391] -ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:217.07630157470703ms total_cost_time:217.12088584899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14566 prompt_cache_len:5151 prompt_cache_ratio:0.35363174515996154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 -DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:42 [batch.py:51] router release req id 8 -INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.10887002944946289 s -INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.11088919639587402 s -DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=215790762517037872206847644005661357873, time:1750768782.7753825s req_ids:[8] -DEBUG 06-24 20:39:42 [manager.py:391] -ERROR 06-24 20:39:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:213.5601043701172ms total_cost_time:213.60325813293457ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14567 prompt_cache_len:5151 prompt_cache_ratio:0.3536074689366376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 -DEBUG 06-24 20:39:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:42 [batch.py:51] router release req id 8 -INFO 06-24 20:39:42 [manager.py:224] router recive req id 8 cost time 0.1089472770690918 s -INFO 06-24 20:39:42 [manager.py:68] detokenization recv req id 8 cost time 0.11099410057067871 s -DEBUG 06-24 20:39:42 [manager.py:391] Prefill Batch: batch_id=68790359951395144961366974045103045021, time:1750768782.9947865s req_ids:[8] -DEBUG 06-24 20:39:42 [manager.py:391] -ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:42 lightllm_req_id:8 first_token_cost:383.84270668029785ms total_cost_time:383.88776779174805ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14568 prompt_cache_len:5151 prompt_cache_ratio:0.3535831960461285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 -DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:43 [batch.py:51] router release req id 8 -INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.10820460319519043 s -INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.11005616188049316 s -DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=297256081971149290798006059989384643590, time:1750768783.3827567s req_ids:[8] -DEBUG 06-24 20:39:43 [manager.py:391] -ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:175.62031745910645ms total_cost_time:175.6448745727539ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14569 prompt_cache_len:5151 prompt_cache_ratio:0.35355892648774795 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 -DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:43 [batch.py:51] router release req id 8 -INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.10473084449768066 s -INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.10885357856750488 s -DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=4973619096162654162692915543051061468, time:1750768783.564899s req_ids:[8] -DEBUG 06-24 20:39:43 [manager.py:391] -ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:180.81068992614746ms total_cost_time:180.83524703979492ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14570 prompt_cache_len:5151 prompt_cache_ratio:0.3535346602608099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 -DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:43 [batch.py:51] router release req id 8 -INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.1049654483795166 s -INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.10670328140258789 s -DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=233917476471107642307969767589465740054, time:1750768783.7476735s req_ids:[8] -DEBUG 06-24 20:39:43 [manager.py:391] -ERROR 06-24 20:39:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:213.63592147827148ms total_cost_time:213.66190910339355ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:14571 prompt_cache_len:5151 prompt_cache_ratio:0.35351039736462836 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 -DEBUG 06-24 20:39:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:43 [batch.py:51] router release req id 8 -INFO 06-24 20:39:43 [manager.py:224] router recive req id 8 cost time 0.10660719871520996 s -INFO 06-24 20:39:43 [manager.py:68] detokenization recv req id 8 cost time 0.10859799385070801 s -DEBUG 06-24 20:39:43 [manager.py:391] Prefill Batch: batch_id=17118165458813347663164568606460948944, time:1750768783.9665282s req_ids:[8] -DEBUG 06-24 20:39:43 [manager.py:391] -ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:43 lightllm_req_id:8 first_token_cost:220.07036209106445ms total_cost_time:220.1082706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.03790855407714844ms prompt_token_num:14572 prompt_cache_len:5151 prompt_cache_ratio:0.35348613779851773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 -DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:44 [batch.py:51] router release req id 8 -INFO 06-24 20:39:44 [manager.py:224] router recive req id 8 cost time 0.10977530479431152 s -INFO 06-24 20:39:44 [manager.py:68] detokenization recv req id 8 cost time 0.1117258071899414 s -DEBUG 06-24 20:39:44 [manager.py:391] Prefill Batch: batch_id=205642516942540850395020645980091111761, time:1750768784.189258s req_ids:[8] -DEBUG 06-24 20:39:44 [manager.py:391] -ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:213.36102485656738ms total_cost_time:213.42897415161133ms,out_token_counter:1 mean_per_token_cost_time: 0.06794929504394531ms prompt_token_num:14573 prompt_cache_len:5151 prompt_cache_ratio:0.35346188156179237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 -DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:44 [batch.py:51] router release req id 8 -INFO 06-24 20:39:44 [manager.py:224] router recive req id 8 cost time 0.10812807083129883 s -INFO 06-24 20:39:44 [manager.py:68] detokenization recv req id 8 cost time 0.11024689674377441 s -DEBUG 06-24 20:39:44 [manager.py:391] Prefill Batch: batch_id=329062553862864902825476588063199503672, time:1750768784.4081151s req_ids:[8] -DEBUG 06-24 20:39:44 [manager.py:391] -ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:379.43553924560547ms total_cost_time:379.4875144958496ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:14574 prompt_cache_len:5151 prompt_cache_ratio:0.353437628653767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 -DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:44 [batch.py:51] router release req id 8 -INFO 06-24 20:39:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:44 [manager.py:224] router recive req id 8 cost time 0.10849857330322266 s -INFO 06-24 20:39:44 [manager.py:68] detokenization recv req id 8 cost time 0.11000704765319824 s -DEBUG 06-24 20:39:44 [manager.py:391] Prefill Batch: batch_id=302621952782947911018666686882542310677, time:1750768784.8011167s req_ids:[8] -DEBUG 06-24 20:39:44 [manager.py:391] -DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:225.12412071228027ms total_cost_time:225.1608371734619ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:14575 prompt_cache_len:5151 prompt_cache_ratio:0.3534133790737564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 -DEBUG 06-24 20:39:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:44 [batch.py:51] router release req id 8 -INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10988879203796387 s -INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.11193156242370605 s -DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=265653627118302793511361999638043288970, time:1750768785.0297034s req_ids:[8] -DEBUG 06-24 20:39:45 [manager.py:391] -ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:44 lightllm_req_id:8 first_token_cost:224.74122047424316ms total_cost_time:224.77221488952637ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:14576 prompt_cache_len:5151 prompt_cache_ratio:0.35338913282107576 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 -DEBUG 06-24 20:39:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:45 [batch.py:51] router release req id 8 -INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10801935195922852 s -INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.10956549644470215 s -DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=253164673824209732774513588948796858866, time:1750768785.2535675s req_ids:[8] -DEBUG 06-24 20:39:45 [manager.py:391] -ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:213.23347091674805ms total_cost_time:213.28210830688477ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:14577 prompt_cache_len:5151 prompt_cache_ratio:0.3533648898950401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 -DEBUG 06-24 20:39:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:45 [batch.py:51] router release req id 8 -INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10946917533874512 s -INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.11107063293457031 s -DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=124452974190543933737609369919558691927, time:1750768785.4730792s req_ids:[8] -DEBUG 06-24 20:39:45 [manager.py:391] -ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:217.16594696044922ms total_cost_time:217.20266342163086ms,out_token_counter:1 mean_per_token_cost_time: 0.036716461181640625ms prompt_token_num:14578 prompt_cache_len:5151 prompt_cache_ratio:0.35334065029496503 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 -DEBUG 06-24 20:39:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:45 [batch.py:51] router release req id 8 -INFO 06-24 20:39:45 [manager.py:224] router recive req id 8 cost time 0.10924315452575684 s -INFO 06-24 20:39:45 [manager.py:68] detokenization recv req id 8 cost time 0.11134648323059082 s -DEBUG 06-24 20:39:45 [manager.py:391] Prefill Batch: batch_id=149675380207269222589362834159669788817, time:1750768785.6982708s req_ids:[8] -DEBUG 06-24 20:39:45 [manager.py:391] -ERROR 06-24 20:39:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:216.72892570495605ms total_cost_time:216.77589416503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14579 prompt_cache_len:5151 prompt_cache_ratio:0.353316414020166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 -DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:46 [batch.py:51] router release req id 8 -INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.3128364086151123 s -INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.3141770362854004 s -DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=301335975606783357699383636055311218219, time:1750768786.127607s req_ids:[8] -DEBUG 06-24 20:39:46 [manager.py:391] -ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:45 lightllm_req_id:8 first_token_cost:431.80131912231445ms total_cost_time:431.83422088623047ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:14580 prompt_cache_len:5151 prompt_cache_ratio:0.35329218106995885 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 -DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:46 [batch.py:51] router release req id 8 -INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.10882687568664551 s -INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.11079978942871094 s -DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=49483186681568051395164541004042744190, time:1750768786.3661072s req_ids:[8] -DEBUG 06-24 20:39:46 [manager.py:391] -DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:46 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:227.33020782470703ms total_cost_time:227.3871898651123ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:14581 prompt_cache_len:5151 prompt_cache_ratio:0.3532679514436596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 -DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:46 [batch.py:51] router release req id 8 -INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.11306214332580566 s -INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.11495161056518555 s -DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=311631563991357654020654465196010331414, time:1750768786.5928771s req_ids:[8] -DEBUG 06-24 20:39:46 [manager.py:391] -ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:217.6809310913086ms total_cost_time:217.7255153656006ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14582 prompt_cache_len:5151 prompt_cache_ratio:0.3532437251405843 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 -DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:46 [batch.py:51] router release req id 8 -INFO 06-24 20:39:46 [manager.py:224] router recive req id 8 cost time 0.10955119132995605 s -INFO 06-24 20:39:46 [manager.py:68] detokenization recv req id 8 cost time 0.11147761344909668 s -DEBUG 06-24 20:39:46 [manager.py:391] Prefill Batch: batch_id=213824827493101668332366572389671179453, time:1750768786.8165715s req_ids:[8] -DEBUG 06-24 20:39:46 [manager.py:391] -ERROR 06-24 20:39:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:215.96574783325195ms total_cost_time:216.01009368896484ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14583 prompt_cache_len:5151 prompt_cache_ratio:0.3532195021600494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 -DEBUG 06-24 20:39:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:46 [batch.py:51] router release req id 8 -INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.11057877540588379 s -INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.11251163482666016 s -DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=154832086762027315607523519337500576396, time:1750768787.0391874s req_ids:[8] -DEBUG 06-24 20:39:47 [manager.py:391] -ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:46 lightllm_req_id:8 first_token_cost:216.10641479492188ms total_cost_time:216.14551544189453ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:14584 prompt_cache_len:5151 prompt_cache_ratio:0.3531952825013714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 -INFO 06-24 20:39:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:47 [batch.py:51] router release req id 8 -INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.21015024185180664 s -INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.2118375301361084 s -DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=269062854948957732017759949002905868460, time:1750768787.364056s req_ids:[8] -DEBUG 06-24 20:39:47 [manager.py:391] -ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:276.2646675109863ms total_cost_time:276.3078212738037ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14585 prompt_cache_len:5151 prompt_cache_ratio:0.353171066163867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 -DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:47 [batch.py:51] router release req id 8 -INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.10839128494262695 s -INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.11011576652526855 s -DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=3413577450046016845705075942239804203, time:1750768787.543885s req_ids:[8] -DEBUG 06-24 20:39:47 [manager.py:391] -ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:211.24768257141113ms total_cost_time:211.29274368286133ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14586 prompt_cache_len:5151 prompt_cache_ratio:0.3531468531468531 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 -DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:47 [batch.py:51] router release req id 8 -INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.10943436622619629 s -INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.1112203598022461 s -DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=216751042116308651620632627734768161934, time:1750768787.761565s req_ids:[8] -DEBUG 06-24 20:39:47 [manager.py:391] -ERROR 06-24 20:39:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:210.3433609008789ms total_cost_time:210.3707790374756ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:14587 prompt_cache_len:5151 prompt_cache_ratio:0.35312264344964694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 -DEBUG 06-24 20:39:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:47 [batch.py:51] router release req id 8 -INFO 06-24 20:39:47 [manager.py:224] router recive req id 8 cost time 0.11063313484191895 s -INFO 06-24 20:39:47 [manager.py:68] detokenization recv req id 8 cost time 0.11246490478515625 s -DEBUG 06-24 20:39:47 [manager.py:391] Prefill Batch: batch_id=251723794494026504238249637016749788398, time:1750768787.9782412s req_ids:[8] -DEBUG 06-24 20:39:47 [manager.py:391] -ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:47 lightllm_req_id:8 first_token_cost:213.8216495513916ms total_cost_time:213.87910842895508ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:14588 prompt_cache_len:5151 prompt_cache_ratio:0.3530984370715657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 -DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:48 [batch.py:51] router release req id 8 -INFO 06-24 20:39:48 [manager.py:224] router recive req id 8 cost time 0.11202597618103027 s -INFO 06-24 20:39:48 [manager.py:68] detokenization recv req id 8 cost time 0.11405372619628906 s -DEBUG 06-24 20:39:48 [manager.py:391] Prefill Batch: batch_id=338120759683938438999969171600705157505, time:1750768788.2309172s req_ids:[8] -DEBUG 06-24 20:39:48 [manager.py:391] -ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:256.55293464660645ms total_cost_time:256.59871101379395ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14589 prompt_cache_len:5151 prompt_cache_ratio:0.3530742340119268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 -DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:48 [batch.py:51] router release req id 8 -INFO 06-24 20:39:48 [manager.py:224] router recive req id 8 cost time 0.10900354385375977 s -INFO 06-24 20:39:48 [manager.py:68] detokenization recv req id 8 cost time 0.1109468936920166 s -DEBUG 06-24 20:39:48 [manager.py:391] Prefill Batch: batch_id=143278051472972091966221136975172002495, time:1750768788.4723651s req_ids:[8] -DEBUG 06-24 20:39:48 [manager.py:391] -ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:390.85888862609863ms total_cost_time:390.9034729003906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14590 prompt_cache_len:5151 prompt_cache_ratio:0.353050034270048 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 -DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:48 [batch.py:51] router release req id 8 -INFO 06-24 20:39:48 [manager.py:224] router recive req id 8 cost time 0.10914468765258789 s -INFO 06-24 20:39:48 [manager.py:68] detokenization recv req id 8 cost time 0.11065673828125 s -DEBUG 06-24 20:39:48 [manager.py:391] Prefill Batch: batch_id=171757553372950164812274981303353789178, time:1750768788.8605888s req_ids:[8] -DEBUG 06-24 20:39:48 [manager.py:391] -ERROR 06-24 20:39:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:217.6070213317871ms total_cost_time:217.6504135131836ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14591 prompt_cache_len:5151 prompt_cache_ratio:0.35302583784524705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 -DEBUG 06-24 20:39:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:48 [batch.py:51] router release req id 8 -INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10980415344238281 s -INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11185383796691895 s -DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=147303899395693450157815051491875240325, time:1750768789.0860713s req_ids:[8] -DEBUG 06-24 20:39:49 [manager.py:391] -ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:48 lightllm_req_id:8 first_token_cost:220.31641006469727ms total_cost_time:220.36266326904297ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14592 prompt_cache_len:5151 prompt_cache_ratio:0.3530016447368421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 -DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:49 [batch.py:51] router release req id 8 -INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10911941528320312 s -INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11120724678039551 s -DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=103152850691057898636715907070306923752, time:1750768789.310929s req_ids:[8] -DEBUG 06-24 20:39:49 [manager.py:391] -ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:218.36280822753906ms total_cost_time:218.40691566467285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14593 prompt_cache_len:5151 prompt_cache_ratio:0.3529774549441513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 -DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:49 [batch.py:51] router release req id 8 -DEBUG 06-24 20:39:49 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:49 [manager.py:283] -DEBUG 06-24 20:39:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:49 [manager.py:284] -INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10927987098693848 s -INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11118841171264648 s -DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=35927082414003202068380400190757305817, time:1750768789.536831s req_ids:[8] -DEBUG 06-24 20:39:49 [manager.py:391] -ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:213.1800651550293ms total_cost_time:213.22226524353027ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14594 prompt_cache_len:5151 prompt_cache_ratio:0.35295326846649305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 -DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:49 [batch.py:51] router release req id 8 -INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10883259773254395 s -INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11100125312805176 s -DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=282044102051602559175650873271267029671, time:1750768789.7535982s req_ids:[8] -DEBUG 06-24 20:39:49 [manager.py:391] -ERROR 06-24 20:39:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:216.6440486907959ms total_cost_time:216.68505668640137ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14595 prompt_cache_len:5151 prompt_cache_ratio:0.352929085303186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 -DEBUG 06-24 20:39:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:49 [batch.py:51] router release req id 8 -INFO 06-24 20:39:49 [manager.py:224] router recive req id 8 cost time 0.10870146751403809 s -INFO 06-24 20:39:49 [manager.py:68] detokenization recv req id 8 cost time 0.11076021194458008 s -DEBUG 06-24 20:39:49 [manager.py:391] Prefill Batch: batch_id=218153792189841875753775991752451270359, time:1750768789.976416s req_ids:[8] -DEBUG 06-24 20:39:49 [manager.py:391] -DEBUG 06-24 20:39:49 [stats.py:37] Avg tokens(prompt+generate) throughput: 58264.955 tokens/s -DEBUG 06-24 20:39:49 [stats.py:37] Avg prompt tokens throughput: 58256.962 tokens/s -DEBUG 06-24 20:39:49 [stats.py:37] Avg generate tokens throughput: 7.993 tokens/s -ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:49 lightllm_req_id:8 first_token_cost:368.6525821685791ms total_cost_time:368.6971664428711ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14596 prompt_cache_len:5151 prompt_cache_ratio:0.3529049054535489 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 -DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:50 [batch.py:51] router release req id 8 -INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.10863208770751953 s -INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.11052775382995605 s -DEBUG 06-24 20:39:50 [manager.py:391] Prefill Batch: batch_id=18352547525446340012965043078790008012, time:1750768790.3512247s req_ids:[8] -DEBUG 06-24 20:39:50 [manager.py:391] -ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:216.1552906036377ms total_cost_time:216.19820594787598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14597 prompt_cache_len:5151 prompt_cache_ratio:0.35288072891690075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 -DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:50 [batch.py:51] router release req id 8 -INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.10856080055236816 s -INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.11051297187805176 s -DEBUG 06-24 20:39:50 [manager.py:391] Prefill Batch: batch_id=10695622368058238099645858113850432297, time:1750768790.5725007s req_ids:[8] -DEBUG 06-24 20:39:50 [manager.py:391] -ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:214.4770622253418ms total_cost_time:214.5214080810547ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14598 prompt_cache_len:5151 prompt_cache_ratio:0.35285655569256064 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 -DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:50 [batch.py:51] router release req id 8 -INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.10753059387207031 s -INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.10928916931152344 s -DEBUG 06-24 20:39:50 [manager.py:391] Prefill Batch: batch_id=218508103209010209395421857556803296197, time:1750768790.7908895s req_ids:[8] -DEBUG 06-24 20:39:50 [manager.py:391] -ERROR 06-24 20:39:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:207.0603370666504ms total_cost_time:207.10253715515137ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14599 prompt_cache_len:5151 prompt_cache_ratio:0.35283238577984793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 -DEBUG 06-24 20:39:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:50 [batch.py:51] router release req id 8 -INFO 06-24 20:39:50 [manager.py:224] router recive req id 8 cost time 0.11075448989868164 s -INFO 06-24 20:39:50 [manager.py:68] detokenization recv req id 8 cost time 0.1125950813293457 s -DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=32079312299270332517646655638123830093, time:1750768791.0045545s req_ids:[8] -DEBUG 06-24 20:39:51 [manager.py:391] -DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:50 lightllm_req_id:8 first_token_cost:211.81249618530273ms total_cost_time:211.85803413391113ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14600 prompt_cache_len:5151 prompt_cache_ratio:0.3528082191780822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 -DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:51 [batch.py:51] router release req id 8 -INFO 06-24 20:39:51 [manager.py:224] router recive req id 8 cost time 0.10862302780151367 s -INFO 06-24 20:39:51 [manager.py:68] detokenization recv req id 8 cost time 0.11037230491638184 s -DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=72107015934581796145915057026780154410, time:1750768791.2195976s req_ids:[8] -DEBUG 06-24 20:39:51 [manager.py:391] -ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:380.9068202972412ms total_cost_time:380.9514045715332ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14601 prompt_cache_len:5151 prompt_cache_ratio:0.3527840558865831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 -DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:51 [batch.py:51] router release req id 8 -INFO 06-24 20:39:51 [manager.py:224] router recive req id 8 cost time 0.10866665840148926 s -INFO 06-24 20:39:51 [manager.py:68] detokenization recv req id 8 cost time 0.11000609397888184 s -DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=95970877254586506240287575455117919632, time:1750768791.6071267s req_ids:[8] -DEBUG 06-24 20:39:51 [manager.py:391] -ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:216.994047164917ms total_cost_time:217.02194213867188ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:14602 prompt_cache_len:5151 prompt_cache_ratio:0.3527598959046706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 -DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:51 [batch.py:51] router release req id 8 -INFO 06-24 20:39:51 [manager.py:224] router recive req id 8 cost time 0.10620427131652832 s -INFO 06-24 20:39:51 [manager.py:68] detokenization recv req id 8 cost time 0.10823392868041992 s -DEBUG 06-24 20:39:51 [manager.py:391] Prefill Batch: batch_id=317265843642154283015348988840065746539, time:1750768791.833168s req_ids:[8] -DEBUG 06-24 20:39:51 [manager.py:391] -ERROR 06-24 20:39:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:222.67746925354004ms total_cost_time:222.7010726928711ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:14603 prompt_cache_len:5151 prompt_cache_ratio:0.35273573923166474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 -DEBUG 06-24 20:39:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:51 [batch.py:51] router release req id 8 -INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10722732543945312 s -INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10929679870605469 s -DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=136128606129125989612163793498456469184, time:1750768792.0681968s req_ids:[8] -DEBUG 06-24 20:39:52 [manager.py:391] -ERROR 06-24 20:39:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:51 lightllm_req_id:8 first_token_cost:225.9359359741211ms total_cost_time:225.9809970855713ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14604 prompt_cache_len:5151 prompt_cache_ratio:0.3527115858668858 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 -DEBUG 06-24 20:39:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:52 [batch.py:51] router release req id 8 -INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10843300819396973 s -INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10958266258239746 s -DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=127430898602860975647392876129355729424, time:1750768792.2916193s req_ids:[8] -DEBUG 06-24 20:39:52 [manager.py:391] -ERROR 06-24 20:39:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 first_token_cost:215.73829650878906ms total_cost_time:215.76619148254395ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:14605 prompt_cache_len:5151 prompt_cache_ratio:0.3526874358096542 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 -DEBUG 06-24 20:39:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:52 [batch.py:51] router release req id 8 -INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10626411437988281 s -INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10811591148376465 s -DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=340033590239168107458885665131937223048, time:1750768792.5128484s req_ids:[8] -DEBUG 06-24 20:39:52 [manager.py:391] -ERROR 06-24 20:39:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 first_token_cost:218.78623962402344ms total_cost_time:218.8122272491455ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:14606 prompt_cache_len:5151 prompt_cache_ratio:0.35266328905929073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 -DEBUG 06-24 20:39:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:52 [batch.py:51] router release req id 8 -INFO 06-24 20:39:52 [manager.py:224] router recive req id 8 cost time 0.10608458518981934 s -INFO 06-24 20:39:52 [manager.py:68] detokenization recv req id 8 cost time 0.10817193984985352 s -DEBUG 06-24 20:39:52 [manager.py:391] Prefill Batch: batch_id=200743274537124578205950152583811654672, time:1750768792.7434688s req_ids:[8] -DEBUG 06-24 20:39:52 [manager.py:391] -ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:52 lightllm_req_id:8 first_token_cost:396.587610244751ms total_cost_time:396.6174125671387ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:14607 prompt_cache_len:5151 prompt_cache_ratio:0.35263914561511606 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 -DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:53 [batch.py:51] router release req id 8 -INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10672283172607422 s -INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.10876917839050293 s -DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=301483937968471526072041871744558342363, time:1750768793.141431s req_ids:[8] -DEBUG 06-24 20:39:53 [manager.py:391] -INFO 06-24 20:39:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:39:53 [statics_utils.py:24] mean first cost: 232.2300645177751 ms -INFO 06-24 20:39:53 [statics_utils.py:24] mean per token cost: 0.057852640866821445 ms -ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:220.24774551391602ms total_cost_time:220.2754020690918ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14608 prompt_cache_len:5151 prompt_cache_ratio:0.35261500547645125 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 -DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:53 [batch.py:51] router release req id 8 -INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10716056823730469 s -INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.11010479927062988 s -DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=312784204664014756927534557653319251439, time:1750768793.366108s req_ids:[8] -DEBUG 06-24 20:39:53 [manager.py:391] -ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:217.6227569580078ms total_cost_time:217.6513671875ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:14609 prompt_cache_len:5151 prompt_cache_ratio:0.35259086864261757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 -DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:53 [batch.py:51] router release req id 8 -INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10644745826721191 s -INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.10797619819641113 s -DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=128547841836206815438437644166158978975, time:1750768793.588976s req_ids:[8] -DEBUG 06-24 20:39:53 [manager.py:391] -ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:213.85979652404785ms total_cost_time:213.88864517211914ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:14610 prompt_cache_len:5151 prompt_cache_ratio:0.35256673511293635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 -DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:53 [batch.py:51] router release req id 8 -INFO 06-24 20:39:53 [manager.py:224] router recive req id 8 cost time 0.10611128807067871 s -INFO 06-24 20:39:53 [manager.py:68] detokenization recv req id 8 cost time 0.10823369026184082 s -DEBUG 06-24 20:39:53 [manager.py:391] Prefill Batch: batch_id=315830407126642505896334383469077725843, time:1750768793.8071468s req_ids:[8] -DEBUG 06-24 20:39:53 [manager.py:391] -ERROR 06-24 20:39:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:217.33975410461426ms total_cost_time:217.36812591552734ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:14611 prompt_cache_len:5151 prompt_cache_ratio:0.35254260488672917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 -DEBUG 06-24 20:39:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:53 [batch.py:51] router release req id 8 -INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.10588216781616211 s -INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10793089866638184 s -DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=251621967783032822995151232132850052759, time:1750768794.0308383s req_ids:[8] -DEBUG 06-24 20:39:54 [manager.py:391] -ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:53 lightllm_req_id:8 first_token_cost:214.91217613220215ms total_cost_time:214.9367332458496ms,out_token_counter:1 mean_per_token_cost_time: 0.024557113647460938ms prompt_token_num:14612 prompt_cache_len:5151 prompt_cache_ratio:0.3525184779633178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 -DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:54 [batch.py:51] router release req id 8 -INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.1060643196105957 s -INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10789370536804199 s -DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=54683609956904701781389237146703411612, time:1750768794.2510002s req_ids:[8] -DEBUG 06-24 20:39:54 [manager.py:391] -ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:347.46575355529785ms total_cost_time:347.49484062194824ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:14613 prompt_cache_len:5151 prompt_cache_ratio:0.35249435434202425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 -DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:54 [batch.py:51] router release req id 8 -INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.10524249076843262 s -INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10705113410949707 s -DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=6342112528025415781306804597996030509, time:1750768794.606074s req_ids:[8] -DEBUG 06-24 20:39:54 [manager.py:391] -ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:217.94795989990234ms total_cost_time:217.97537803649902ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:14614 prompt_cache_len:5151 prompt_cache_ratio:0.35247023402217054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 -DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:54 [batch.py:51] router release req id 8 -INFO 06-24 20:39:54 [manager.py:224] router recive req id 8 cost time 0.10634374618530273 s -INFO 06-24 20:39:54 [manager.py:68] detokenization recv req id 8 cost time 0.10817933082580566 s -DEBUG 06-24 20:39:54 [manager.py:391] Prefill Batch: batch_id=217464734558654374031099404067429390308, time:1750768794.8369815s req_ids:[8] -DEBUG 06-24 20:39:54 [manager.py:391] -ERROR 06-24 20:39:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:229.18248176574707ms total_cost_time:229.21013832092285ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:14615 prompt_cache_len:5151 prompt_cache_ratio:0.35244611700307904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 -DEBUG 06-24 20:39:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:54 [batch.py:51] router release req id 8 -INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10702347755432129 s -INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.1091005802154541 s -DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=223676207967032390637801055791797868217, time:1750768795.0742579s req_ids:[8] -DEBUG 06-24 20:39:55 [manager.py:391] -ERROR 06-24 20:39:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:54 lightllm_req_id:8 first_token_cost:231.34779930114746ms total_cost_time:231.37474060058594ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:14616 prompt_cache_len:5151 prompt_cache_ratio:0.35242200328407225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 -DEBUG 06-24 20:39:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:55 [batch.py:51] router release req id 8 -INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10688185691833496 s -INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.10891556739807129 s -DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=180004751886272089253343028418972066478, time:1750768795.3162808s req_ids:[8] -DEBUG 06-24 20:39:55 [manager.py:391] -ERROR 06-24 20:39:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 first_token_cost:238.39902877807617ms total_cost_time:238.42740058898926ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:14617 prompt_cache_len:5151 prompt_cache_ratio:0.3523978928644729 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 -DEBUG 06-24 20:39:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:55 [batch.py:51] router release req id 8 -INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10786652565002441 s -INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.10989022254943848 s -DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=142714408361736700852275728377013525592, time:1750768795.5545967s req_ids:[8] -DEBUG 06-24 20:39:55 [manager.py:391] -ERROR 06-24 20:39:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 first_token_cost:405.1780700683594ms total_cost_time:405.20501136779785ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:14618 prompt_cache_len:5151 prompt_cache_ratio:0.3523737857436038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 -DEBUG 06-24 20:39:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:55 [batch.py:51] router release req id 8 -INFO 06-24 20:39:55 [manager.py:224] router recive req id 8 cost time 0.10633182525634766 s -INFO 06-24 20:39:55 [manager.py:68] detokenization recv req id 8 cost time 0.1082770824432373 s -DEBUG 06-24 20:39:55 [manager.py:391] Prefill Batch: batch_id=182161739474111145085642832283242148312, time:1750768795.9641972s req_ids:[8] -DEBUG 06-24 20:39:55 [manager.py:391] -ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:55 lightllm_req_id:8 first_token_cost:232.13481903076172ms total_cost_time:232.1641445159912ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:14619 prompt_cache_len:5151 prompt_cache_ratio:0.352349681920788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 -DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:56 [batch.py:51] router release req id 8 -INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.10519170761108398 s -INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10738849639892578 s -DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=285106244454682188540343192740695780505, time:1750768796.1957314s req_ids:[8] -DEBUG 06-24 20:39:56 [manager.py:391] -ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:223.36626052856445ms total_cost_time:223.39487075805664ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:14620 prompt_cache_len:5151 prompt_cache_ratio:0.35232558139534886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 -DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:56 [batch.py:51] router release req id 8 -INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.10544943809509277 s -INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10754656791687012 s -DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=176315020001638055913747257223346552792, time:1750768796.4222822s req_ids:[8] -DEBUG 06-24 20:39:56 [manager.py:391] -ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:218.75333786010742ms total_cost_time:218.77813339233398ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:14621 prompt_cache_len:5151 prompt_cache_ratio:0.35230148416660967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 -DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:56 [batch.py:51] router release req id 8 -INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.1060781478881836 s -INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10825228691101074 s -DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=298162584713200607091534333393971425933, time:1750768796.6455512s req_ids:[8] -DEBUG 06-24 20:39:56 [manager.py:391] -ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:223.16384315490723ms total_cost_time:223.19436073303223ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:14622 prompt_cache_len:5151 prompt_cache_ratio:0.35227739023389415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 -DEBUG 06-24 20:39:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:56 [batch.py:51] router release req id 8 -INFO 06-24 20:39:56 [manager.py:224] router recive req id 8 cost time 0.10529351234436035 s -INFO 06-24 20:39:56 [manager.py:68] detokenization recv req id 8 cost time 0.10734963417053223 s -DEBUG 06-24 20:39:56 [manager.py:391] Prefill Batch: batch_id=91351218140577636045305557247925479091, time:1750768796.870546s req_ids:[8] -DEBUG 06-24 20:39:56 [manager.py:391] -ERROR 06-24 20:39:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:215.67440032958984ms total_cost_time:215.70181846618652ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:14623 prompt_cache_len:5151 prompt_cache_ratio:0.352253299596526 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 -DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:57 [batch.py:51] router release req id 8 -INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10541558265686035 s -INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.10755062103271484 s -DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=28915172066466887270575219324325629351, time:1750768797.0898035s req_ids:[8] -DEBUG 06-24 20:39:57 [manager.py:391] -ERROR 06-24 20:39:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:56 lightllm_req_id:8 first_token_cost:375.9632110595703ms total_cost_time:375.98514556884766ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:14624 prompt_cache_len:5151 prompt_cache_ratio:0.3522292122538293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 -DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:57 [batch.py:51] router release req id 8 -INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10532641410827637 s -INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.1074821949005127 s -DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=269008974523881689285635085617253543697, time:1750768797.476191s req_ids:[8] -DEBUG 06-24 20:39:57 [manager.py:391] -DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:39:57 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:39:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 first_token_cost:218.9481258392334ms total_cost_time:218.97315979003906ms,out_token_counter:1 mean_per_token_cost_time: 0.025033950805664062ms prompt_token_num:14625 prompt_cache_len:5151 prompt_cache_ratio:0.3522051282051282 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 -DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:57 [batch.py:51] router release req id 8 -INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10531425476074219 s -INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.10724377632141113 s -DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=295450407924368829242366151270423258738, time:1750768797.6956959s req_ids:[8] -DEBUG 06-24 20:39:57 [manager.py:391] -ERROR 06-24 20:39:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 first_token_cost:218.78290176391602ms total_cost_time:218.80769729614258ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:14626 prompt_cache_len:5151 prompt_cache_ratio:0.35218104744974704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 -DEBUG 06-24 20:39:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:57 [batch.py:51] router release req id 8 -INFO 06-24 20:39:57 [manager.py:224] router recive req id 8 cost time 0.10580563545227051 s -INFO 06-24 20:39:57 [manager.py:68] detokenization recv req id 8 cost time 0.10789370536804199 s -DEBUG 06-24 20:39:57 [manager.py:391] Prefill Batch: batch_id=57912617319386578300860825420687098809, time:1750768797.92045s req_ids:[8] -DEBUG 06-24 20:39:57 [manager.py:391] -ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:57 lightllm_req_id:8 first_token_cost:215.4548168182373ms total_cost_time:215.47842025756836ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:14627 prompt_cache_len:5151 prompt_cache_ratio:0.35215696998701035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 -DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:58 [batch.py:51] router release req id 8 -INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.10486388206481934 s -INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.1068871021270752 s -DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=79918718710762034017296853980517708040, time:1750768798.1405146s req_ids:[8] -DEBUG 06-24 20:39:58 [manager.py:391] -ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:219.27380561828613ms total_cost_time:219.2990779876709ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:14628 prompt_cache_len:5151 prompt_cache_ratio:0.3521328958162428 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 -DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:58 [batch.py:51] router release req id 8 -INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.1052238941192627 s -INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.10716032981872559 s -DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=296736462963503044587177980886388491731, time:1750768798.3650537s req_ids:[8] -DEBUG 06-24 20:39:58 [manager.py:391] -ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:385.53762435913086ms total_cost_time:385.56361198425293ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:14629 prompt_cache_len:5151 prompt_cache_ratio:0.3521088249367694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 -DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:58 [batch.py:51] router release req id 8 -INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.10832524299621582 s -INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.1101837158203125 s -DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=127522375407686513081918796752475705359, time:1750768798.7554176s req_ids:[8] -DEBUG 06-24 20:39:58 [manager.py:391] -ERROR 06-24 20:39:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:197.36433029174805ms total_cost_time:197.4325180053711ms,out_token_counter:1 mean_per_token_cost_time: 0.06818771362304688ms prompt_token_num:14630 prompt_cache_len:5151 prompt_cache_ratio:0.3520847573479152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 -DEBUG 06-24 20:39:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:58 [batch.py:51] router release req id 8 -INFO 06-24 20:39:58 [manager.py:224] router recive req id 8 cost time 0.10802865028381348 s -INFO 06-24 20:39:58 [manager.py:68] detokenization recv req id 8 cost time 0.11001706123352051 s -DEBUG 06-24 20:39:58 [manager.py:391] Prefill Batch: batch_id=285093775969350845142784214862407191997, time:1750768798.957246s req_ids:[8] -DEBUG 06-24 20:39:58 [manager.py:391] -ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:58 lightllm_req_id:8 first_token_cost:215.71826934814453ms total_cost_time:215.76213836669922ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14631 prompt_cache_len:5151 prompt_cache_ratio:0.35206069304900556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 -DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:59 [batch.py:51] router release req id 8 -INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.10884499549865723 s -INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.11090373992919922 s -DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=70506625319648077842224250376179252301, time:1750768799.1885874s req_ids:[8] -DEBUG 06-24 20:39:59 [manager.py:391] -ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:228.3337116241455ms total_cost_time:228.35540771484375ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:14632 prompt_cache_len:5151 prompt_cache_ratio:0.35203663203936575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 -DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:59 [batch.py:51] router release req id 8 -INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.10346317291259766 s -INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.10498213768005371 s -DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=188348915149361742991505328604343062615, time:1750768799.4199874s req_ids:[8] -DEBUG 06-24 20:39:59 [manager.py:391] -ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:187.75439262390137ms total_cost_time:187.81089782714844ms,out_token_counter:1 mean_per_token_cost_time: 0.05650520324707031ms prompt_token_num:14633 prompt_cache_len:5151 prompt_cache_ratio:0.3520125743183216 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 -DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:59 [batch.py:51] router release req id 8 -INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.10652446746826172 s -INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.10769200325012207 s -DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=40039371805730158033001495857627358933, time:1750768799.6072655s req_ids:[8] -DEBUG 06-24 20:39:59 [manager.py:391] -ERROR 06-24 20:39:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:39:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:39:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:180.33838272094727ms total_cost_time:180.38320541381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14634 prompt_cache_len:5151 prompt_cache_ratio:0.35198851988519886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:39:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 -DEBUG 06-24 20:39:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:39:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:39:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:39:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:39:59 [batch.py:51] router release req id 8 -INFO 06-24 20:39:59 [manager.py:224] router recive req id 8 cost time 0.1085507869720459 s -INFO 06-24 20:39:59 [manager.py:68] detokenization recv req id 8 cost time 0.11055135726928711 s -DEBUG 06-24 20:39:59 [manager.py:391] Prefill Batch: batch_id=298133020760754316039800047710127863655, time:1750768799.7993019s req_ids:[8] -DEBUG 06-24 20:39:59 [manager.py:391] -ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:40:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 56632.024 tokens/s -DEBUG 06-24 20:40:00 [stats.py:37] Avg prompt tokens throughput: 56624.177 tokens/s -DEBUG 06-24 20:40:00 [stats.py:37] Avg generate tokens throughput: 7.848 tokens/s -INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:39:59 lightllm_req_id:8 first_token_cost:365.6184673309326ms total_cost_time:365.6766414642334ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:14635 prompt_cache_len:5151 prompt_cache_ratio:0.3519644687393235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 -DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:00 [batch.py:51] router release req id 8 -INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.11114764213562012 s -INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.11406183242797852 s -DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=283671386727694669175934384718867031338, time:1750768800.1677294s req_ids:[8] -DEBUG 06-24 20:40:00 [manager.py:391] -ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:241.8069839477539ms total_cost_time:241.8651580810547ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:14636 prompt_cache_len:5151 prompt_cache_ratio:0.35194042088002186 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 -DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:00 [batch.py:51] router release req id 8 -INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.10748553276062012 s -INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.1104285717010498 s -DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=78423654741865273524398752190596282397, time:1750768800.4166522s req_ids:[8] -DEBUG 06-24 20:40:00 [manager.py:391] -ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:203.91416549682617ms total_cost_time:203.96065711975098ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14637 prompt_cache_len:5151 prompt_cache_ratio:0.3519163763066202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 -DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:00 [batch.py:51] router release req id 8 -INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.10904598236083984 s -INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.11102771759033203 s -DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=101497246974836572241333421686202404036, time:1750768800.6269672s req_ids:[8] -DEBUG 06-24 20:40:00 [manager.py:391] -ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:219.27118301391602ms total_cost_time:219.315767288208ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14638 prompt_cache_len:5151 prompt_cache_ratio:0.35189233501844513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 -DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:00 [batch.py:51] router release req id 8 -INFO 06-24 20:40:00 [manager.py:224] router recive req id 8 cost time 0.10834193229675293 s -INFO 06-24 20:40:00 [manager.py:68] detokenization recv req id 8 cost time 0.11030030250549316 s -DEBUG 06-24 20:40:00 [manager.py:391] Prefill Batch: batch_id=252977016086248619614814875287801501198, time:1750768800.850858s req_ids:[8] -DEBUG 06-24 20:40:00 [manager.py:391] -ERROR 06-24 20:40:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:210.9367847442627ms total_cost_time:210.98041534423828ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14639 prompt_cache_len:5151 prompt_cache_ratio:0.3518682970148234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 -DEBUG 06-24 20:40:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:00 [batch.py:51] router release req id 8 -INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10794258117675781 s -INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.11029672622680664 s -DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=75145028806095838917535046627860976429, time:1750768801.0772765s req_ids:[8] -DEBUG 06-24 20:40:01 [manager.py:391] -ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:00 lightllm_req_id:8 first_token_cost:203.19271087646484ms total_cost_time:203.23705673217773ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14640 prompt_cache_len:5151 prompt_cache_ratio:0.351844262295082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 -DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:01 [batch.py:51] router release req id 8 -INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10883426666259766 s -INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.11145734786987305 s -DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=29425647876760266472668248317845526836, time:1750768801.290563s req_ids:[8] -DEBUG 06-24 20:40:01 [manager.py:391] -ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:415.50731658935547ms total_cost_time:415.55190086364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14641 prompt_cache_len:5151 prompt_cache_ratio:0.3518202308585479 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 -DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:01 [batch.py:51] router release req id 8 -INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10839319229125977 s -INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.11055612564086914 s -DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=31075885119344961701866495387885794661, time:1750768801.7093735s req_ids:[8] -DEBUG 06-24 20:40:01 [manager.py:391] -ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:191.0409927368164ms total_cost_time:191.0860538482666ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14642 prompt_cache_len:5151 prompt_cache_ratio:0.35179620270454853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 -DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:01 [batch.py:51] router release req id 8 -INFO 06-24 20:40:01 [manager.py:224] router recive req id 8 cost time 0.10827875137329102 s -INFO 06-24 20:40:01 [manager.py:68] detokenization recv req id 8 cost time 0.1100931167602539 s -DEBUG 06-24 20:40:01 [manager.py:391] Prefill Batch: batch_id=310405396197711528359347490688022475197, time:1750768801.899445s req_ids:[8] -DEBUG 06-24 20:40:01 [manager.py:391] -ERROR 06-24 20:40:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:179.06594276428223ms total_cost_time:179.11577224731445ms,out_token_counter:1 mean_per_token_cost_time: 0.04982948303222656ms prompt_token_num:14643 prompt_cache_len:5151 prompt_cache_ratio:0.3517721778324114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 -DEBUG 06-24 20:40:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:01 [batch.py:51] router release req id 8 -INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10940814018249512 s -INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.11149954795837402 s -DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=38190377739000897392707002250125989748, time:1750768802.0863044s req_ids:[8] -DEBUG 06-24 20:40:02 [manager.py:391] -ERROR 06-24 20:40:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:01 lightllm_req_id:8 first_token_cost:239.7899627685547ms total_cost_time:239.837646484375ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:14644 prompt_cache_len:5151 prompt_cache_ratio:0.3517481562414641 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 -DEBUG 06-24 20:40:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:02 [batch.py:51] router release req id 8 -INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10821080207824707 s -INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.11034059524536133 s -DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=235138471743341854433750522224591097920, time:1750768802.3327134s req_ids:[8] -DEBUG 06-24 20:40:02 [manager.py:391] -ERROR 06-24 20:40:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 first_token_cost:206.82764053344727ms total_cost_time:206.87365531921387ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14645 prompt_cache_len:5151 prompt_cache_ratio:0.35172413793103446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 -DEBUG 06-24 20:40:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:02 [batch.py:51] router release req id 8 -INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10905051231384277 s -INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.11118292808532715 s -DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=252010119904605236361583805794168811429, time:1750768802.5591378s req_ids:[8] -DEBUG 06-24 20:40:02 [manager.py:391] -ERROR 06-24 20:40:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 first_token_cost:228.54137420654297ms total_cost_time:228.60193252563477ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:14646 prompt_cache_len:5151 prompt_cache_ratio:0.35170012290045066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 -DEBUG 06-24 20:40:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:02 [batch.py:51] router release req id 8 -INFO 06-24 20:40:02 [manager.py:224] router recive req id 8 cost time 0.10776972770690918 s -INFO 06-24 20:40:02 [manager.py:68] detokenization recv req id 8 cost time 0.10976767539978027 s -DEBUG 06-24 20:40:02 [manager.py:391] Prefill Batch: batch_id=1614845614591362272259334149308349989, time:1750768802.7961314s req_ids:[8] -DEBUG 06-24 20:40:02 [manager.py:391] -ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:02 lightllm_req_id:8 first_token_cost:408.1401824951172ms total_cost_time:408.2012176513672ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:14647 prompt_cache_len:5151 prompt_cache_ratio:0.35167611114904074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 -DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:03 [batch.py:51] router release req id 8 -INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10899519920349121 s -INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.11150813102722168 s -DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=32200158623744070366573186977118179276, time:1750768803.2030716s req_ids:[8] -DEBUG 06-24 20:40:03 [manager.py:391] -ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:220.25418281555176ms total_cost_time:220.29709815979004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14648 prompt_cache_len:5151 prompt_cache_ratio:0.3516521026761333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 -DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:03 [batch.py:51] router release req id 8 -INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10830974578857422 s -INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.11023807525634766 s -DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=280894100610602898366798201723229739363, time:1750768803.4275584s req_ids:[8] -DEBUG 06-24 20:40:03 [manager.py:391] -ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:226.76801681518555ms total_cost_time:226.81260108947754ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14649 prompt_cache_len:5151 prompt_cache_ratio:0.3516280974810567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 -DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:03 [batch.py:51] router release req id 8 -INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10739803314208984 s -INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.1094968318939209 s -DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=1138200867971789197563616787512741240, time:1750768803.6810126s req_ids:[8] -DEBUG 06-24 20:40:03 [manager.py:391] -DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:03 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:239.0146255493164ms total_cost_time:239.0587329864502ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14650 prompt_cache_len:5151 prompt_cache_ratio:0.35160409556313993 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 -DEBUG 06-24 20:40:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:03 [batch.py:51] router release req id 8 -INFO 06-24 20:40:03 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s -INFO 06-24 20:40:03 [manager.py:68] detokenization recv req id 8 cost time 0.11061835289001465 s -DEBUG 06-24 20:40:03 [manager.py:391] Prefill Batch: batch_id=233384928409575620349507820285901576477, time:1750768803.9078903s req_ids:[8] -DEBUG 06-24 20:40:03 [manager.py:391] -ERROR 06-24 20:40:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:03 lightllm_req_id:8 first_token_cost:207.12661743164062ms total_cost_time:207.17096328735352ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14651 prompt_cache_len:5151 prompt_cache_ratio:0.3515800969217118 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 -DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:04 [batch.py:51] router release req id 8 -INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.11030745506286621 s -INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.11235880851745605 s -DEBUG 06-24 20:40:04 [manager.py:391] Prefill Batch: batch_id=208651620164177868847082966661120653643, time:1750768804.1326857s req_ids:[8] -DEBUG 06-24 20:40:04 [manager.py:391] -ERROR 06-24 20:40:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:416.97120666503906ms total_cost_time:417.01602935791016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14652 prompt_cache_len:5151 prompt_cache_ratio:0.35155610155610156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 -DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:04 [batch.py:51] router release req id 8 -INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.10958456993103027 s -INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.1123802661895752 s -DEBUG 06-24 20:40:04 [manager.py:391] Prefill Batch: batch_id=337668018329813714051921797595025312314, time:1750768804.553901s req_ids:[8] -DEBUG 06-24 20:40:04 [manager.py:391] -ERROR 06-24 20:40:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:217.19741821289062ms total_cost_time:217.30661392211914ms,out_token_counter:1 mean_per_token_cost_time: 0.10919570922851562ms prompt_token_num:14653 prompt_cache_len:5151 prompt_cache_ratio:0.35153210946563845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 -DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:04 [batch.py:51] router release req id 8 -INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.10912942886352539 s -INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.11120343208312988 s -DEBUG 06-24 20:40:04 [manager.py:391] Prefill Batch: batch_id=248904565534071234620878684551747519468, time:1750768804.7700567s req_ids:[8] -DEBUG 06-24 20:40:04 [manager.py:391] -ERROR 06-24 20:40:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:221.48370742797852ms total_cost_time:221.51470184326172ms,out_token_counter:1 mean_per_token_cost_time: 0.030994415283203125ms prompt_token_num:14654 prompt_cache_len:5151 prompt_cache_ratio:0.351508120649652 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 -DEBUG 06-24 20:40:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:04 [batch.py:51] router release req id 8 -INFO 06-24 20:40:04 [manager.py:224] router recive req id 8 cost time 0.10680437088012695 s -INFO 06-24 20:40:04 [manager.py:68] detokenization recv req id 8 cost time 0.10872602462768555 s -DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=330040149621897743690311209401296562889, time:1750768805.003832s req_ids:[8] -DEBUG 06-24 20:40:05 [manager.py:391] -ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:04 lightllm_req_id:8 first_token_cost:219.7716236114502ms total_cost_time:219.8176383972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14655 prompt_cache_len:5151 prompt_cache_ratio:0.35148413510747184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 -DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:05 [batch.py:51] router release req id 8 -INFO 06-24 20:40:05 [manager.py:224] router recive req id 8 cost time 0.10593700408935547 s -INFO 06-24 20:40:05 [manager.py:68] detokenization recv req id 8 cost time 0.10802555084228516 s -DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=152227902293252817254054893581143336864, time:1750768805.2319462s req_ids:[8] -DEBUG 06-24 20:40:05 [manager.py:391] -ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:203.94349098205566ms total_cost_time:203.9651870727539ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:14656 prompt_cache_len:5151 prompt_cache_ratio:0.35146015283842796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 -DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:05 [batch.py:51] router release req id 8 -INFO 06-24 20:40:05 [manager.py:224] router recive req id 8 cost time 0.1085062026977539 s -INFO 06-24 20:40:05 [manager.py:68] detokenization recv req id 8 cost time 0.10977768898010254 s -DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=160847208572750396787923596571863100231, time:1750768805.4464252s req_ids:[8] -DEBUG 06-24 20:40:05 [manager.py:391] -ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:381.3321590423584ms total_cost_time:381.3765048980713ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14657 prompt_cache_len:5151 prompt_cache_ratio:0.35143617384185033 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 -DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:05 [batch.py:51] router release req id 8 -INFO 06-24 20:40:05 [manager.py:224] router recive req id 8 cost time 0.10767531394958496 s -INFO 06-24 20:40:05 [manager.py:68] detokenization recv req id 8 cost time 0.1094510555267334 s -DEBUG 06-24 20:40:05 [manager.py:391] Prefill Batch: batch_id=267951776018354645296607241914416860378, time:1750768805.8219411s req_ids:[8] -DEBUG 06-24 20:40:05 [manager.py:391] -ERROR 06-24 20:40:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:195.69849967956543ms total_cost_time:195.7406997680664ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14658 prompt_cache_len:5151 prompt_cache_ratio:0.3514121981170692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 -DEBUG 06-24 20:40:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:05 [batch.py:51] router release req id 8 -INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10608243942260742 s -INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.10776138305664062 s -DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=282227699810045277641654830340455488797, time:1750768806.0340028s req_ids:[8] -DEBUG 06-24 20:40:06 [manager.py:391] -ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:05 lightllm_req_id:8 first_token_cost:203.0620574951172ms total_cost_time:203.13501358032227ms,out_token_counter:1 mean_per_token_cost_time: 0.07295608520507812ms prompt_token_num:14659 prompt_cache_len:5151 prompt_cache_ratio:0.351388225663415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 -DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:06 [batch.py:51] router release req id 8 -INFO 06-24 20:40:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10859799385070801 s -INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.11053943634033203 s -DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=186493777232001939700521816047499167378, time:1750768806.2328393s req_ids:[8] -DEBUG 06-24 20:40:06 [manager.py:391] -ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:214.54334259033203ms total_cost_time:214.5862579345703ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14660 prompt_cache_len:5151 prompt_cache_ratio:0.3513642564802183 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 -DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:06 [batch.py:51] router release req id 8 -INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.1081690788269043 s -INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.10922741889953613 s -DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=222756045335889731770057236036851888253, time:1750768806.4541633s req_ids:[8] -DEBUG 06-24 20:40:06 [manager.py:391] -ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:213.77086639404297ms total_cost_time:213.8228416442871ms,out_token_counter:1 mean_per_token_cost_time: 0.051975250244140625ms prompt_token_num:14661 prompt_cache_len:5151 prompt_cache_ratio:0.3513402905668099 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 -DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:06 [batch.py:51] router release req id 8 -INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10773491859436035 s -INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.10969281196594238 s -DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=109573663443342411014985762888248359838, time:1750768806.6748793s req_ids:[8] -DEBUG 06-24 20:40:06 [manager.py:391] -ERROR 06-24 20:40:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:177.16646194458008ms total_cost_time:177.21056938171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14662 prompt_cache_len:5151 prompt_cache_ratio:0.3513163279225208 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 -DEBUG 06-24 20:40:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:06 [batch.py:51] router release req id 8 -INFO 06-24 20:40:06 [manager.py:224] router recive req id 8 cost time 0.10931563377380371 s -INFO 06-24 20:40:06 [manager.py:68] detokenization recv req id 8 cost time 0.11115598678588867 s -DEBUG 06-24 20:40:06 [manager.py:391] Prefill Batch: batch_id=101063367256290899015021728701759335485, time:1750768806.8588803s req_ids:[8] -DEBUG 06-24 20:40:06 [manager.py:391] -ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:06 lightllm_req_id:8 first_token_cost:372.8907108306885ms total_cost_time:372.93338775634766ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14663 prompt_cache_len:5151 prompt_cache_ratio:0.3512923685466821 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 -DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:07 [batch.py:51] router release req id 8 -INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.1101844310760498 s -INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.11182022094726562 s -DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=39883962420550959448599423589029536963, time:1750768807.2426622s req_ids:[8] -DEBUG 06-24 20:40:07 [manager.py:391] -ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:208.0819606781006ms total_cost_time:208.12606811523438ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14664 prompt_cache_len:5151 prompt_cache_ratio:0.3512684124386252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 -DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:07 [batch.py:51] router release req id 8 -INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.10881614685058594 s -INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.11061692237854004 s -DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=57462205274071478657137618260770688304, time:1750768807.454332s req_ids:[8] -DEBUG 06-24 20:40:07 [manager.py:391] -ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:208.53209495544434ms total_cost_time:208.57524871826172ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14665 prompt_cache_len:5151 prompt_cache_ratio:0.3512444595976816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 -DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:07 [batch.py:51] router release req id 8 -INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.10800957679748535 s -INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.10934281349182129 s -DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=286672083995782378182249043300205071198, time:1750768807.6706464s req_ids:[8] -DEBUG 06-24 20:40:07 [manager.py:391] -ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:210.1461887359619ms total_cost_time:210.1905345916748ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14666 prompt_cache_len:5151 prompt_cache_ratio:0.3512205100231829 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 -DEBUG 06-24 20:40:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:07 [batch.py:51] router release req id 8 -INFO 06-24 20:40:07 [manager.py:224] router recive req id 8 cost time 0.10635113716125488 s -INFO 06-24 20:40:07 [manager.py:68] detokenization recv req id 8 cost time 0.10775232315063477 s -DEBUG 06-24 20:40:07 [manager.py:391] Prefill Batch: batch_id=132299397437960114637208644030322416134, time:1750768807.8862617s req_ids:[8] -DEBUG 06-24 20:40:07 [manager.py:391] -ERROR 06-24 20:40:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:199.13649559020996ms total_cost_time:199.17988777160645ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14667 prompt_cache_len:5151 prompt_cache_ratio:0.351196563714461 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 -DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:08 [batch.py:51] router release req id 8 -INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10843658447265625 s -INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.10985541343688965 s -DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=134751804413329011592638054024290144512, time:1750768808.0973508s req_ids:[8] -DEBUG 06-24 20:40:08 [manager.py:391] -ERROR 06-24 20:40:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:07 lightllm_req_id:8 first_token_cost:206.1624526977539ms total_cost_time:206.2082290649414ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14668 prompt_cache_len:5151 prompt_cache_ratio:0.3511726206708481 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 -DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:08 [batch.py:51] router release req id 8 -INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10817885398864746 s -INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.11066317558288574 s -DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=292250162422457546909000223461499183186, time:1750768808.3055372s req_ids:[8] -DEBUG 06-24 20:40:08 [manager.py:391] -ERROR 06-24 20:40:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 first_token_cost:366.82963371276855ms total_cost_time:366.87493324279785ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14669 prompt_cache_len:5151 prompt_cache_ratio:0.35114868089167633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 -DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:08 [batch.py:51] router release req id 8 -INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10732626914978027 s -INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.1089630126953125 s -DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=307146466464324948792893729798468659939, time:1750768808.6853666s req_ids:[8] -DEBUG 06-24 20:40:08 [manager.py:391] -ERROR 06-24 20:40:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 first_token_cost:217.52119064331055ms total_cost_time:217.54193305969238ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:14670 prompt_cache_len:5151 prompt_cache_ratio:0.3511247443762781 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 -DEBUG 06-24 20:40:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:08 [batch.py:51] router release req id 8 -INFO 06-24 20:40:08 [manager.py:224] router recive req id 8 cost time 0.10756731033325195 s -INFO 06-24 20:40:08 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s -DEBUG 06-24 20:40:08 [manager.py:391] Prefill Batch: batch_id=73876365064544821356557035080322412181, time:1750768808.9131062s req_ids:[8] -DEBUG 06-24 20:40:08 [manager.py:391] -ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:08 lightllm_req_id:8 first_token_cost:228.98292541503906ms total_cost_time:229.02679443359375ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14671 prompt_cache_len:5151 prompt_cache_ratio:0.3511008111239861 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 -DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:09 [batch.py:51] router release req id 8 -INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.10862016677856445 s -INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11059904098510742 s -DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=66176665053939551211755757697881003234, time:1750768809.1368496s req_ids:[8] -DEBUG 06-24 20:40:09 [manager.py:391] -ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:215.0115966796875ms total_cost_time:215.0554656982422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14672 prompt_cache_len:5151 prompt_cache_ratio:0.35107688113413305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 -DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:09 [batch.py:51] router release req id 8 -INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.11381244659423828 s -INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11596465110778809 s -DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=246670812287341754160553141257638364089, time:1750768809.3659966s req_ids:[8] -DEBUG 06-24 20:40:09 [manager.py:391] -ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:217.75007247924805ms total_cost_time:217.79179573059082ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14673 prompt_cache_len:5151 prompt_cache_ratio:0.35105295440605194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 -DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:09 [batch.py:51] router release req id 8 -INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.1089484691619873 s -INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11101055145263672 s -DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=75296899705129925377362098132480849591, time:1750768809.5828452s req_ids:[8] -DEBUG 06-24 20:40:09 [manager.py:391] -ERROR 06-24 20:40:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:214.37907218933105ms total_cost_time:214.42365646362305ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14674 prompt_cache_len:5151 prompt_cache_ratio:0.3510290309390759 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 -DEBUG 06-24 20:40:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:09 [batch.py:51] router release req id 8 -INFO 06-24 20:40:09 [manager.py:224] router recive req id 8 cost time 0.10825109481811523 s -INFO 06-24 20:40:09 [manager.py:68] detokenization recv req id 8 cost time 0.11038017272949219 s -DEBUG 06-24 20:40:09 [manager.py:391] Prefill Batch: batch_id=85232070758264560754967031954451671468, time:1750768809.818333s req_ids:[8] -DEBUG 06-24 20:40:09 [manager.py:391] -DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:09 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:40:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 58414.144 tokens/s -DEBUG 06-24 20:40:10 [stats.py:37] Avg prompt tokens throughput: 58406.173 tokens/s -DEBUG 06-24 20:40:10 [stats.py:37] Avg generate tokens throughput: 7.971 tokens/s -INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:09 lightllm_req_id:8 first_token_cost:389.2490863800049ms total_cost_time:389.29224014282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14675 prompt_cache_len:5151 prompt_cache_ratio:0.3510051107325383 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 -DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:10 [batch.py:51] router release req id 8 -INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.10872507095336914 s -INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.1110992431640625 s -DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=288617072645281090501793009085291308542, time:1750768810.2027705s req_ids:[8] -DEBUG 06-24 20:40:10 [manager.py:391] -ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:216.3245677947998ms total_cost_time:216.3674831390381ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14676 prompt_cache_len:5151 prompt_cache_ratio:0.3509811937857727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 -DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:10 [batch.py:51] router release req id 8 -INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.10858917236328125 s -INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.11062860488891602 s -DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=251449859872343744570645712716914805734, time:1750768810.4378803s req_ids:[8] -DEBUG 06-24 20:40:10 [manager.py:391] -ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:227.8614044189453ms total_cost_time:227.9031276702881ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14677 prompt_cache_len:5151 prompt_cache_ratio:0.35095728009811267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 -DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:10 [batch.py:51] router release req id 8 -INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.1087045669555664 s -INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.11076140403747559 s -DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=296478288126915811178124684060817830986, time:1750768810.6640592s req_ids:[8] -DEBUG 06-24 20:40:10 [manager.py:391] -ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:215.3301239013672ms total_cost_time:215.37351608276367ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14678 prompt_cache_len:5151 prompt_cache_ratio:0.3509333696688922 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 -DEBUG 06-24 20:40:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:10 [batch.py:51] router release req id 8 -INFO 06-24 20:40:10 [manager.py:224] router recive req id 8 cost time 0.11052227020263672 s -INFO 06-24 20:40:10 [manager.py:68] detokenization recv req id 8 cost time 0.11247849464416504 s -DEBUG 06-24 20:40:10 [manager.py:391] Prefill Batch: batch_id=73120314939454522818614101276649400932, time:1750768810.88674s req_ids:[8] -DEBUG 06-24 20:40:10 [manager.py:391] -ERROR 06-24 20:40:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:10 lightllm_req_id:8 first_token_cost:228.46174240112305ms total_cost_time:228.50656509399414ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14679 prompt_cache_len:5151 prompt_cache_ratio:0.3509094624974453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 -DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:11 [batch.py:51] router release req id 8 -INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.10883855819702148 s -INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.11079025268554688 s -DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=260313848381223299663677298441291830775, time:1750768811.120842s req_ids:[8] -DEBUG 06-24 20:40:11 [manager.py:391] -ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:209.08832550048828ms total_cost_time:209.13243293762207ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14680 prompt_cache_len:5151 prompt_cache_ratio:0.35088555858310627 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 -DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:11 [batch.py:51] router release req id 8 -INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.20868897438049316 s -INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.21042728424072266 s -DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=104439952574871363373452588716405472152, time:1750768811.4391289s req_ids:[8] -DEBUG 06-24 20:40:11 [manager.py:391] -ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:275.32243728637695ms total_cost_time:275.36582946777344ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14681 prompt_cache_len:5151 prompt_cache_ratio:0.35086165792520946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 -DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:11 [batch.py:51] router release req id 8 -INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.10838603973388672 s -INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.11029481887817383 s -DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=163806316326058749563679437850570691710, time:1750768811.6180243s req_ids:[8] -DEBUG 06-24 20:40:11 [manager.py:391] -ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:223.51574897766113ms total_cost_time:223.56033325195312ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14682 prompt_cache_len:5151 prompt_cache_ratio:0.3508377605230895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 -DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:11 [batch.py:51] router release req id 8 -INFO 06-24 20:40:11 [manager.py:224] router recive req id 8 cost time 0.10888910293579102 s -INFO 06-24 20:40:11 [manager.py:68] detokenization recv req id 8 cost time 0.11084342002868652 s -DEBUG 06-24 20:40:11 [manager.py:391] Prefill Batch: batch_id=259692710921977007290304756638727369546, time:1750768811.8481991s req_ids:[8] -DEBUG 06-24 20:40:11 [manager.py:391] -ERROR 06-24 20:40:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:208.74762535095215ms total_cost_time:208.79125595092773ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14683 prompt_cache_len:5151 prompt_cache_ratio:0.3508138663760812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 -DEBUG 06-24 20:40:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:11 [batch.py:51] router release req id 8 -INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.10900163650512695 s -INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.11109209060668945 s -DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=106178268281230887263468439569640370188, time:1750768812.0623271s req_ids:[8] -DEBUG 06-24 20:40:12 [manager.py:391] -ERROR 06-24 20:40:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:11 lightllm_req_id:8 first_token_cost:212.0676040649414ms total_cost_time:212.1131420135498ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14684 prompt_cache_len:5151 prompt_cache_ratio:0.3507899754835195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 -DEBUG 06-24 20:40:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:12 [batch.py:51] router release req id 8 -INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.10736584663391113 s -INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.10934853553771973 s -DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=229506463445512717252030612818934474975, time:1750768812.2813s req_ids:[8] -DEBUG 06-24 20:40:12 [manager.py:391] -ERROR 06-24 20:40:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 first_token_cost:210.04223823547363ms total_cost_time:210.08706092834473ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14685 prompt_cache_len:5151 prompt_cache_ratio:0.3507660878447395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 -DEBUG 06-24 20:40:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:12 [batch.py:51] router release req id 8 -INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.3103487491607666 s -INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.3125133514404297 s -DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=46646136924526523423223190120439972658, time:1750768812.7113745s req_ids:[8] -DEBUG 06-24 20:40:12 [manager.py:391] -ERROR 06-24 20:40:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 first_token_cost:438.0228519439697ms total_cost_time:438.0664825439453ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14686 prompt_cache_len:5151 prompt_cache_ratio:0.3507422034590767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 -DEBUG 06-24 20:40:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:12 [batch.py:51] router release req id 8 -INFO 06-24 20:40:12 [manager.py:224] router recive req id 8 cost time 0.1081547737121582 s -INFO 06-24 20:40:12 [manager.py:68] detokenization recv req id 8 cost time 0.11033177375793457 s -DEBUG 06-24 20:40:12 [manager.py:391] Prefill Batch: batch_id=280227179896601486981952907103804898449, time:1750768812.941671s req_ids:[8] -DEBUG 06-24 20:40:12 [manager.py:391] -ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:12 lightllm_req_id:8 first_token_cost:215.75236320495605ms total_cost_time:215.8060073852539ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:14687 prompt_cache_len:5151 prompt_cache_ratio:0.3507183223258664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 -DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:13 [batch.py:51] router release req id 8 -INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.1079094409942627 s -INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.10978579521179199 s -DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=230946220934210716534521054901551339644, time:1750768813.1633117s req_ids:[8] -DEBUG 06-24 20:40:13 [manager.py:391] -ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:216.26901626586914ms total_cost_time:216.32790565490723ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:14688 prompt_cache_len:5151 prompt_cache_ratio:0.3506944444444444 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 -DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:13 [batch.py:51] router release req id 8 -INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.10831618309020996 s -INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.10941672325134277 s -DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=15618498689938842387835959734554964984, time:1750768813.3916268s req_ids:[8] -DEBUG 06-24 20:40:13 [manager.py:391] -ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:220.8397388458252ms total_cost_time:220.8840847015381ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14689 prompt_cache_len:5151 prompt_cache_ratio:0.3506705698141466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 -DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:13 [batch.py:51] router release req id 8 -INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.10974001884460449 s -INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.11181926727294922 s -DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=209437871507000890353336207164633925234, time:1750768813.6129513s req_ids:[8] -DEBUG 06-24 20:40:13 [manager.py:391] -ERROR 06-24 20:40:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:212.4958038330078ms total_cost_time:212.5394344329834ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14690 prompt_cache_len:5151 prompt_cache_ratio:0.35064669843430907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 -DEBUG 06-24 20:40:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:13 [batch.py:51] router release req id 8 -INFO 06-24 20:40:13 [manager.py:224] router recive req id 8 cost time 0.10801553726196289 s -INFO 06-24 20:40:13 [manager.py:68] detokenization recv req id 8 cost time 0.10989546775817871 s -DEBUG 06-24 20:40:13 [manager.py:391] Prefill Batch: batch_id=194283928511761194593323154536738791253, time:1750768813.8333905s req_ids:[8] -DEBUG 06-24 20:40:13 [manager.py:391] -ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:13 lightllm_req_id:8 first_token_cost:370.18895149230957ms total_cost_time:370.23234367370605ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14691 prompt_cache_len:5151 prompt_cache_ratio:0.35062283030426794 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 -DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:14 [batch.py:51] router release req id 8 -INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.10793113708496094 s -INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.1097562313079834 s -DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=85267071529423387605784326357880483224, time:1750768814.208885s req_ids:[8] -DEBUG 06-24 20:40:14 [manager.py:391] -ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:213.16957473754883ms total_cost_time:213.21463584899902ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14692 prompt_cache_len:5151 prompt_cache_ratio:0.35059896542335967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 -DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:14 [batch.py:51] router release req id 8 -INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.11032605171203613 s -INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.1128394603729248 s -DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=256547681957251109996189322030825728333, time:1750768814.4288466s req_ids:[8] -DEBUG 06-24 20:40:14 [manager.py:391] -ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:216.47310256958008ms total_cost_time:216.51601791381836ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14693 prompt_cache_len:5151 prompt_cache_ratio:0.35057510379092083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 -DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:14 [batch.py:51] router release req id 8 -INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.10784578323364258 s -INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.10986518859863281 s -DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=149996451286698082104683088451529236869, time:1750768814.6504965s req_ids:[8] -DEBUG 06-24 20:40:14 [manager.py:391] -DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:214.82348442077637ms total_cost_time:214.8764133453369ms,out_token_counter:1 mean_per_token_cost_time: 0.052928924560546875ms prompt_token_num:14694 prompt_cache_len:5151 prompt_cache_ratio:0.3505512454062883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 -DEBUG 06-24 20:40:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:14 [batch.py:51] router release req id 8 -INFO 06-24 20:40:14 [manager.py:224] router recive req id 8 cost time 0.10989880561828613 s -INFO 06-24 20:40:14 [manager.py:68] detokenization recv req id 8 cost time 0.11208891868591309 s -DEBUG 06-24 20:40:14 [manager.py:391] Prefill Batch: batch_id=292610880929220073819236610874294689197, time:1750768814.873325s req_ids:[8] -DEBUG 06-24 20:40:14 [manager.py:391] -ERROR 06-24 20:40:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:211.86375617980957ms total_cost_time:211.90953254699707ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14695 prompt_cache_len:5151 prompt_cache_ratio:0.3505273902687989 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 -DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:15 [batch.py:51] router release req id 8 -INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10903048515319824 s -INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.11045002937316895 s -DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=83978598018006754585282364874160156125, time:1750768815.090116s req_ids:[8] -DEBUG 06-24 20:40:15 [manager.py:391] -ERROR 06-24 20:40:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:14 lightllm_req_id:8 first_token_cost:212.63599395751953ms total_cost_time:212.68177032470703ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14696 prompt_cache_len:5151 prompt_cache_ratio:0.35050353837778986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 -DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:15 [batch.py:51] router release req id 8 -INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10903358459472656 s -INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.11043787002563477 s -DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=202411820592296422443235918652591464510, time:1750768815.3099244s req_ids:[8] -DEBUG 06-24 20:40:15 [manager.py:391] -ERROR 06-24 20:40:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 first_token_cost:372.90358543395996ms total_cost_time:372.94650077819824ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14697 prompt_cache_len:5151 prompt_cache_ratio:0.35047968973259847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 -DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:15 [batch.py:51] router release req id 8 -INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10810661315917969 s -INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.10949444770812988 s -DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=109145759774436825299845737751297545959, time:1750768815.689857s req_ids:[8] -DEBUG 06-24 20:40:15 [manager.py:391] -ERROR 06-24 20:40:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 first_token_cost:220.91054916381836ms total_cost_time:220.95441818237305ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14698 prompt_cache_len:5151 prompt_cache_ratio:0.35045584433256227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 -DEBUG 06-24 20:40:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:15 [batch.py:51] router release req id 8 -INFO 06-24 20:40:15 [manager.py:224] router recive req id 8 cost time 0.10871458053588867 s -INFO 06-24 20:40:15 [manager.py:68] detokenization recv req id 8 cost time 0.11009097099304199 s -DEBUG 06-24 20:40:15 [manager.py:391] Prefill Batch: batch_id=71291847029573411913523924862250645828, time:1750768815.915585s req_ids:[8] -DEBUG 06-24 20:40:15 [manager.py:391] -ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:15 lightllm_req_id:8 first_token_cost:210.46900749206543ms total_cost_time:210.51430702209473ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14699 prompt_cache_len:5151 prompt_cache_ratio:0.35043200217701886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 -DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:16 [batch.py:51] router release req id 8 -INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.1082468032836914 s -INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.10935521125793457 s -DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=330985283981790481572471721936287624934, time:1750768816.1315439s req_ids:[8] -DEBUG 06-24 20:40:16 [manager.py:391] -ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:210.41488647460938ms total_cost_time:210.46066284179688ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14700 prompt_cache_len:5151 prompt_cache_ratio:0.3504081632653061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 -DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:16 [batch.py:51] router release req id 8 -INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.1092681884765625 s -INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.11140871047973633 s -DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=71927880182853218777787044060417739260, time:1750768816.3487992s req_ids:[8] -DEBUG 06-24 20:40:16 [manager.py:391] -ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:212.13054656982422ms total_cost_time:212.1756076812744ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14701 prompt_cache_len:5151 prompt_cache_ratio:0.35038432759676214 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 -DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:16 [batch.py:51] router release req id 8 -INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.10783219337463379 s -INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.10994982719421387 s -DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=23532449572181476429637247826764038211, time:1750768816.5680692s req_ids:[8] -DEBUG 06-24 20:40:16 [manager.py:391] -ERROR 06-24 20:40:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:423.922061920166ms total_cost_time:423.9675998687744ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14702 prompt_cache_len:5151 prompt_cache_ratio:0.3503604951707251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 -DEBUG 06-24 20:40:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:16 [batch.py:51] router release req id 8 -INFO 06-24 20:40:16 [manager.py:224] router recive req id 8 cost time 0.10913920402526855 s -INFO 06-24 20:40:16 [manager.py:68] detokenization recv req id 8 cost time 0.11120963096618652 s -DEBUG 06-24 20:40:16 [manager.py:391] Prefill Batch: batch_id=337716684614698894360310496654351709683, time:1750768816.9980087s req_ids:[8] -DEBUG 06-24 20:40:16 [manager.py:391] -ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:16 lightllm_req_id:8 first_token_cost:215.03663063049316ms total_cost_time:215.08193016052246ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14703 prompt_cache_len:5151 prompt_cache_ratio:0.35033666598653335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 -DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:17 [batch.py:51] router release req id 8 -INFO 06-24 20:40:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s -INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11098957061767578 s -DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=84344801542775111924716179508882138168, time:1750768817.2217424s req_ids:[8] -DEBUG 06-24 20:40:17 [manager.py:391] -ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:220.90721130371094ms total_cost_time:220.94988822937012ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14704 prompt_cache_len:5151 prompt_cache_ratio:0.3503128400435256 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 -DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:17 [batch.py:51] router release req id 8 -INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10928678512573242 s -INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11136794090270996 s -DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=278053924147190657618031647954905177305, time:1750768817.4453528s req_ids:[8] -DEBUG 06-24 20:40:17 [manager.py:391] -ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:212.11957931518555ms total_cost_time:212.16368675231934ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14705 prompt_cache_len:5151 prompt_cache_ratio:0.35028901734104045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 -DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:17 [batch.py:51] router release req id 8 -INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10926270484924316 s -INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11130595207214355 s -DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=162949643589235368891536876983704878377, time:1750768817.6650093s req_ids:[8] -DEBUG 06-24 20:40:17 [manager.py:391] -ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:215.9428596496582ms total_cost_time:215.9872055053711ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14706 prompt_cache_len:5151 prompt_cache_ratio:0.350265197878417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 -DEBUG 06-24 20:40:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:17 [batch.py:51] router release req id 8 -INFO 06-24 20:40:17 [manager.py:224] router recive req id 8 cost time 0.10977816581726074 s -INFO 06-24 20:40:17 [manager.py:68] detokenization recv req id 8 cost time 0.11183404922485352 s -DEBUG 06-24 20:40:17 [manager.py:391] Prefill Batch: batch_id=59771654104974208202503201462900064956, time:1750768817.8874245s req_ids:[8] -DEBUG 06-24 20:40:17 [manager.py:391] -ERROR 06-24 20:40:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:213.80138397216797ms total_cost_time:213.85765075683594ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:14707 prompt_cache_len:5151 prompt_cache_ratio:0.35024138165499424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 -DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:18 [batch.py:51] router release req id 8 -INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.10902619361877441 s -INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.11090493202209473 s -DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=163840034019968302116533132363777254139, time:1750768818.117605s req_ids:[8] -DEBUG 06-24 20:40:18 [manager.py:391] -ERROR 06-24 20:40:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:17 lightllm_req_id:8 first_token_cost:400.130033493042ms total_cost_time:400.1734256744385ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14708 prompt_cache_len:5151 prompt_cache_ratio:0.3502175686701115 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 -DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:18 [batch.py:51] router release req id 8 -INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.10979270935058594 s -INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.1118624210357666 s -DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=139229620244462910979186042450618194967, time:1750768818.5142334s req_ids:[8] -DEBUG 06-24 20:40:18 [manager.py:391] -ERROR 06-24 20:40:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 first_token_cost:215.50583839416504ms total_cost_time:215.54875373840332ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14709 prompt_cache_len:5151 prompt_cache_ratio:0.3501937589231083 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 -DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:18 [batch.py:51] router release req id 8 -INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.10936713218688965 s -INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.11137080192565918 s -DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=154067350897321068229419894495690673731, time:1750768818.735521s req_ids:[8] -DEBUG 06-24 20:40:18 [manager.py:391] -ERROR 06-24 20:40:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 first_token_cost:216.4454460144043ms total_cost_time:216.4900302886963ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14710 prompt_cache_len:5151 prompt_cache_ratio:0.35016995241332427 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 -DEBUG 06-24 20:40:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:18 [batch.py:51] router release req id 8 -INFO 06-24 20:40:18 [manager.py:224] router recive req id 8 cost time 0.1081690788269043 s -INFO 06-24 20:40:18 [manager.py:68] detokenization recv req id 8 cost time 0.11007475852966309 s -DEBUG 06-24 20:40:18 [manager.py:391] Prefill Batch: batch_id=92924276508592284431361526557100415736, time:1750768818.957816s req_ids:[8] -DEBUG 06-24 20:40:18 [manager.py:391] -ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:18 lightllm_req_id:8 first_token_cost:214.86616134643555ms total_cost_time:214.91003036499023ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14711 prompt_cache_len:5151 prompt_cache_ratio:0.3501461491400992 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 -DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:19 [batch.py:51] router release req id 8 -INFO 06-24 20:40:19 [manager.py:224] router recive req id 8 cost time 0.10924339294433594 s -INFO 06-24 20:40:19 [manager.py:68] detokenization recv req id 8 cost time 0.11134076118469238 s -DEBUG 06-24 20:40:19 [manager.py:391] Prefill Batch: batch_id=27474986404538544395457980219412924981, time:1750768819.1845744s req_ids:[8] -DEBUG 06-24 20:40:19 [manager.py:391] -ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:222.17130661010742ms total_cost_time:222.2137451171875ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14712 prompt_cache_len:5151 prompt_cache_ratio:0.35012234910277323 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 -DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:19 [batch.py:51] router release req id 8 -INFO 06-24 20:40:19 [manager.py:224] router recive req id 8 cost time 0.11055731773376465 s -INFO 06-24 20:40:19 [manager.py:68] detokenization recv req id 8 cost time 0.11250495910644531 s -DEBUG 06-24 20:40:19 [manager.py:391] Prefill Batch: batch_id=278734477961778467504097557522278095925, time:1750768819.4089684s req_ids:[8] -DEBUG 06-24 20:40:19 [manager.py:391] -ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:216.01176261901855ms total_cost_time:216.05348587036133ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14713 prompt_cache_len:5151 prompt_cache_ratio:0.3500985523006865 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 -DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:19 [batch.py:51] router release req id 8 -INFO 06-24 20:40:19 [manager.py:224] router recive req id 8 cost time 0.10862278938293457 s -INFO 06-24 20:40:19 [manager.py:68] detokenization recv req id 8 cost time 0.11061406135559082 s -DEBUG 06-24 20:40:19 [manager.py:391] Prefill Batch: batch_id=195938774654422983008079039376137154699, time:1750768819.6332347s req_ids:[8] -DEBUG 06-24 20:40:19 [manager.py:391] -ERROR 06-24 20:40:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:380.9962272644043ms total_cost_time:381.0403347015381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14714 prompt_cache_len:5151 prompt_cache_ratio:0.3500747587331793 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 -DEBUG 06-24 20:40:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:19 [batch.py:51] router release req id 8 -INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10918760299682617 s -INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11131072044372559 s -DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=252146470875259449011412569199263454128, time:1750768820.0181272s req_ids:[8] -DEBUG 06-24 20:40:20 [manager.py:391] -ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:40:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 58592.326 tokens/s -DEBUG 06-24 20:40:20 [stats.py:37] Avg prompt tokens throughput: 58584.353 tokens/s -DEBUG 06-24 20:40:20 [stats.py:37] Avg generate tokens throughput: 7.973 tokens/s -INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:19 lightllm_req_id:8 first_token_cost:211.3654613494873ms total_cost_time:211.42005920410156ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:14715 prompt_cache_len:5151 prompt_cache_ratio:0.35005096839959227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 -DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:20 [batch.py:51] router release req id 8 -INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10906600952148438 s -INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11110997200012207 s -DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=95954748815909958735878640338729599983, time:1750768820.2440207s req_ids:[8] -DEBUG 06-24 20:40:20 [manager.py:391] -ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:224.13134574890137ms total_cost_time:224.17378425598145ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14716 prompt_cache_len:5151 prompt_cache_ratio:0.35002718129926613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 -DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:20 [batch.py:51] router release req id 8 -INFO 06-24 20:40:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.1079716682434082 s -INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.10998225212097168 s -DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=244795472970996849304813386913812005204, time:1750768820.468231s req_ids:[8] -DEBUG 06-24 20:40:20 [manager.py:391] -ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:228.98483276367188ms total_cost_time:229.02560234069824ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14717 prompt_cache_len:5151 prompt_cache_ratio:0.35000339743154174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 -DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:20 [batch.py:51] router release req id 8 -INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10828018188476562 s -INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11029648780822754 s -DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=102202093191719663057110273167712185687, time:1750768820.7090678s req_ids:[8] -DEBUG 06-24 20:40:20 [manager.py:391] -ERROR 06-24 20:40:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:215.67010879516602ms total_cost_time:215.71087837219238ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:14718 prompt_cache_len:5151 prompt_cache_ratio:0.3499796167957603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 -DEBUG 06-24 20:40:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:20 [batch.py:51] router release req id 8 -INFO 06-24 20:40:20 [manager.py:224] router recive req id 8 cost time 0.10885381698608398 s -INFO 06-24 20:40:20 [manager.py:68] detokenization recv req id 8 cost time 0.11074352264404297 s -DEBUG 06-24 20:40:20 [manager.py:391] Prefill Batch: batch_id=218190155008774682513115986361626660133, time:1750768820.9254787s req_ids:[8] -DEBUG 06-24 20:40:20 [manager.py:391] -DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:20 lightllm_req_id:8 first_token_cost:379.9419403076172ms total_cost_time:379.98366355895996ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14719 prompt_cache_len:5151 prompt_cache_ratio:0.349955839391263 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 -DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:21 [batch.py:51] router release req id 8 -INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10873579978942871 s -INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11084127426147461 s -DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=150353056383179411865339720371127552281, time:1750768821.3147857s req_ids:[8] -DEBUG 06-24 20:40:21 [manager.py:391] -ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:218.5831069946289ms total_cost_time:218.62459182739258ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14720 prompt_cache_len:5151 prompt_cache_ratio:0.3499320652173913 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 -DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:21 [batch.py:51] router release req id 8 -INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10844993591308594 s -INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11044096946716309 s -DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=240411819897803517244955389626172483581, time:1750768821.552177s req_ids:[8] -DEBUG 06-24 20:40:21 [manager.py:391] -ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:232.48791694641113ms total_cost_time:232.53226280212402ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14721 prompt_cache_len:5151 prompt_cache_ratio:0.34990829427348685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 -DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:21 [batch.py:51] router release req id 8 -INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10884809494018555 s -INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11087560653686523 s -DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=241244702501274647831771362213054799806, time:1750768821.778202s req_ids:[8] -DEBUG 06-24 20:40:21 [manager.py:391] -ERROR 06-24 20:40:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:217.1335220336914ms total_cost_time:217.1761989593506ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14722 prompt_cache_len:5151 prompt_cache_ratio:0.34988452655889146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 -DEBUG 06-24 20:40:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:21 [batch.py:51] router release req id 8 -INFO 06-24 20:40:21 [manager.py:224] router recive req id 8 cost time 0.10856294631958008 s -INFO 06-24 20:40:21 [manager.py:68] detokenization recv req id 8 cost time 0.11064267158508301 s -DEBUG 06-24 20:40:21 [manager.py:391] Prefill Batch: batch_id=42028759426480868085254035638232032891, time:1750768821.9992962s req_ids:[8] -DEBUG 06-24 20:40:21 [manager.py:391] -ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:21 lightllm_req_id:8 first_token_cost:213.11140060424805ms total_cost_time:213.15336227416992ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14723 prompt_cache_len:5151 prompt_cache_ratio:0.3498607620729471 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 -DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:22 [batch.py:51] router release req id 8 -INFO 06-24 20:40:22 [manager.py:224] router recive req id 8 cost time 0.10865616798400879 s -INFO 06-24 20:40:22 [manager.py:68] detokenization recv req id 8 cost time 0.11079668998718262 s -DEBUG 06-24 20:40:22 [manager.py:391] Prefill Batch: batch_id=82729548392927690412541713795164685453, time:1750768822.2203574s req_ids:[8] -DEBUG 06-24 20:40:22 [manager.py:391] -ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:218.5375690460205ms total_cost_time:218.57905387878418ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14724 prompt_cache_len:5151 prompt_cache_ratio:0.3498370008149959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 -DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:22 [batch.py:51] router release req id 8 -INFO 06-24 20:40:22 [manager.py:224] router recive req id 8 cost time 0.1083834171295166 s -INFO 06-24 20:40:22 [manager.py:68] detokenization recv req id 8 cost time 0.11037731170654297 s -DEBUG 06-24 20:40:22 [manager.py:391] Prefill Batch: batch_id=216245359714521007635436767050189627188, time:1750768822.4430335s req_ids:[8] -DEBUG 06-24 20:40:22 [manager.py:391] -ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:373.4426498413086ms total_cost_time:373.48389625549316ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14725 prompt_cache_len:5151 prompt_cache_ratio:0.3498132427843803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 -DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:22 [batch.py:51] router release req id 8 -INFO 06-24 20:40:22 [manager.py:224] router recive req id 8 cost time 0.10816335678100586 s -INFO 06-24 20:40:22 [manager.py:68] detokenization recv req id 8 cost time 0.11008167266845703 s -DEBUG 06-24 20:40:22 [manager.py:391] Prefill Batch: batch_id=106960437875073430660383625589556351635, time:1750768822.8247607s req_ids:[8] -DEBUG 06-24 20:40:22 [manager.py:391] -ERROR 06-24 20:40:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:215.49201011657715ms total_cost_time:215.53468704223633ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14726 prompt_cache_len:5151 prompt_cache_ratio:0.34978948798044274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 -DEBUG 06-24 20:40:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:22 [batch.py:51] router release req id 8 -INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10790538787841797 s -INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.10997509956359863 s -DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=234752422244079015440265751350404841205, time:1750768823.0459228s req_ids:[8] -DEBUG 06-24 20:40:23 [manager.py:391] -ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:22 lightllm_req_id:8 first_token_cost:216.8595790863037ms total_cost_time:216.9017791748047ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14727 prompt_cache_len:5151 prompt_cache_ratio:0.34976573640252595 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 -DEBUG 06-24 20:40:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:23 [batch.py:51] router release req id 8 -INFO 06-24 20:40:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10804319381713867 s -INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.11053991317749023 s -DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=205892908370596310986121267695549648449, time:1750768823.2683558s req_ids:[8] -DEBUG 06-24 20:40:23 [manager.py:391] -ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:209.45215225219727ms total_cost_time:209.49459075927734ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14728 prompt_cache_len:5151 prompt_cache_ratio:0.34974198804997286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 -DEBUG 06-24 20:40:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:23 [batch.py:51] router release req id 8 -INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10879898071289062 s -INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.11079049110412598 s -DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=146336133036422488894788449094167406076, time:1750768823.4838119s req_ids:[8] -DEBUG 06-24 20:40:23 [manager.py:391] -ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:214.46514129638672ms total_cost_time:214.508056640625ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14729 prompt_cache_len:5151 prompt_cache_ratio:0.34971824292212644 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 -DEBUG 06-24 20:40:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:23 [batch.py:51] router release req id 8 -INFO 06-24 20:40:23 [manager.py:224] router recive req id 8 cost time 0.10812997817993164 s -INFO 06-24 20:40:23 [manager.py:68] detokenization recv req id 8 cost time 0.11014509201049805 s -DEBUG 06-24 20:40:23 [manager.py:391] Prefill Batch: batch_id=225803028369704988127200924823203187100, time:1750768823.705018s req_ids:[8] -DEBUG 06-24 20:40:23 [manager.py:391] -ERROR 06-24 20:40:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:385.5254650115967ms total_cost_time:385.56861877441406ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14730 prompt_cache_len:5151 prompt_cache_ratio:0.34969450101832994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 -DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:24 [batch.py:51] router release req id 8 -INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10860252380371094 s -INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.11075830459594727 s -DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=329839708637922975065464774054145773582, time:1750768824.0985038s req_ids:[8] -DEBUG 06-24 20:40:24 [manager.py:391] -ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:23 lightllm_req_id:8 first_token_cost:216.28355979919434ms total_cost_time:216.325044631958ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14731 prompt_cache_len:5151 prompt_cache_ratio:0.3496707623379268 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 -DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:24 [batch.py:51] router release req id 8 -INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10773038864135742 s -INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.10971999168395996 s -DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=108882513264186924646401657806975031396, time:1750768824.3194456s req_ids:[8] -DEBUG 06-24 20:40:24 [manager.py:391] -ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:214.01596069335938ms total_cost_time:214.05887603759766ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14732 prompt_cache_len:5151 prompt_cache_ratio:0.34964702688026067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 -DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:24 [batch.py:51] router release req id 8 -INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10885095596313477 s -INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.11100363731384277 s -DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=54398509598182422363793455840809106726, time:1750768824.5495422s req_ids:[8] -DEBUG 06-24 20:40:24 [manager.py:391] -ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:225.41236877441406ms total_cost_time:225.45361518859863ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14733 prompt_cache_len:5151 prompt_cache_ratio:0.34962329464467523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 -DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:24 [batch.py:51] router release req id 8 -INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10812640190124512 s -INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.11012649536132812 s -DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=99222919712112739614638693430011173829, time:1750768824.7716093s req_ids:[8] -DEBUG 06-24 20:40:24 [manager.py:391] -ERROR 06-24 20:40:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:213.7291431427002ms total_cost_time:213.77229690551758ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14734 prompt_cache_len:5151 prompt_cache_ratio:0.3495995656305145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 -DEBUG 06-24 20:40:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:24 [batch.py:51] router release req id 8 -INFO 06-24 20:40:24 [manager.py:224] router recive req id 8 cost time 0.10829687118530273 s -INFO 06-24 20:40:24 [manager.py:68] detokenization recv req id 8 cost time 0.1103060245513916 s -DEBUG 06-24 20:40:24 [manager.py:391] Prefill Batch: batch_id=161136993519924356743507236976667227395, time:1750768824.9927127s req_ids:[8] -DEBUG 06-24 20:40:24 [manager.py:391] -ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:24 lightllm_req_id:8 first_token_cost:216.5241241455078ms total_cost_time:216.5672779083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14735 prompt_cache_len:5151 prompt_cache_ratio:0.3495758398371225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 -DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:25 [batch.py:51] router release req id 8 -INFO 06-24 20:40:25 [manager.py:224] router recive req id 8 cost time 0.10892605781555176 s -INFO 06-24 20:40:25 [manager.py:68] detokenization recv req id 8 cost time 0.11095738410949707 s -DEBUG 06-24 20:40:25 [manager.py:391] Prefill Batch: batch_id=215037999732090311585642783474842887600, time:1750768825.215685s req_ids:[8] -DEBUG 06-24 20:40:25 [manager.py:391] -ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:379.8520565032959ms total_cost_time:379.8937797546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14736 prompt_cache_len:5151 prompt_cache_ratio:0.34955211726384366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 -DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:25 [batch.py:51] router release req id 8 -INFO 06-24 20:40:25 [manager.py:224] router recive req id 8 cost time 0.10880565643310547 s -INFO 06-24 20:40:25 [manager.py:68] detokenization recv req id 8 cost time 0.11093330383300781 s -DEBUG 06-24 20:40:25 [manager.py:391] Prefill Batch: batch_id=232023640339493069061001373657956031557, time:1750768825.6021s req_ids:[8] -DEBUG 06-24 20:40:25 [manager.py:391] -ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:217.83018112182617ms total_cost_time:217.87142753601074ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14737 prompt_cache_len:5151 prompt_cache_ratio:0.3495283979100224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 -DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:25 [batch.py:51] router release req id 8 -INFO 06-24 20:40:25 [manager.py:224] router recive req id 8 cost time 0.10885071754455566 s -INFO 06-24 20:40:25 [manager.py:68] detokenization recv req id 8 cost time 0.11101722717285156 s -DEBUG 06-24 20:40:25 [manager.py:391] Prefill Batch: batch_id=171591947182874947658593488728448140988, time:1750768825.8262174s req_ids:[8] -DEBUG 06-24 20:40:25 [manager.py:391] -ERROR 06-24 20:40:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:217.36598014831543ms total_cost_time:217.4074649810791ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14738 prompt_cache_len:5151 prompt_cache_ratio:0.3495046817750034 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 -DEBUG 06-24 20:40:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:25 [batch.py:51] router release req id 8 -INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.10931706428527832 s -INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.11148858070373535 s -DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=129509340968800457324747685734214088552, time:1750768826.0505934s req_ids:[8] -DEBUG 06-24 20:40:26 [manager.py:391] -ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:25 lightllm_req_id:8 first_token_cost:213.6819362640381ms total_cost_time:213.72437477111816ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14739 prompt_cache_len:5151 prompt_cache_ratio:0.3494809688581315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 -DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:26 [batch.py:51] router release req id 8 -INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.1081857681274414 s -INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.1102762222290039 s -DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=193781136826422544923064446438687067735, time:1750768826.2682414s req_ids:[8] -DEBUG 06-24 20:40:26 [manager.py:391] -ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:210.81304550170898ms total_cost_time:210.85667610168457ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14740 prompt_cache_len:5151 prompt_cache_ratio:0.3494572591587517 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 -DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:26 [batch.py:51] router release req id 8 -INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.10837364196777344 s -INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.11030197143554688 s -DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=170674567482056146155922867433993650960, time:1750768826.4859996s req_ids:[8] -DEBUG 06-24 20:40:26 [manager.py:391] -ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:215.19112586975098ms total_cost_time:215.23213386535645ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:14741 prompt_cache_len:5151 prompt_cache_ratio:0.34943355267620924 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 -DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:26 [batch.py:51] router release req id 8 -INFO 06-24 20:40:26 [manager.py:224] router recive req id 8 cost time 0.10756421089172363 s -INFO 06-24 20:40:26 [manager.py:68] detokenization recv req id 8 cost time 0.10918736457824707 s -DEBUG 06-24 20:40:26 [manager.py:391] Prefill Batch: batch_id=299320712009081363472658467266416720119, time:1750768826.70843s req_ids:[8] -DEBUG 06-24 20:40:26 [manager.py:391] -ERROR 06-24 20:40:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:344.0220355987549ms total_cost_time:344.07591819763184ms,out_token_counter:1 mean_per_token_cost_time: 0.053882598876953125ms prompt_token_num:14742 prompt_cache_len:5151 prompt_cache_ratio:0.3494098494098494 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 -DEBUG 06-24 20:40:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:26 [batch.py:51] router release req id 8 -INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.1082615852355957 s -INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11004424095153809 s -DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=280885242141554199862299742696802391378, time:1750768827.0586078s req_ids:[8] -DEBUG 06-24 20:40:27 [manager.py:391] -ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:26 lightllm_req_id:8 first_token_cost:219.9854850769043ms total_cost_time:220.0303077697754ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14743 prompt_cache_len:5151 prompt_cache_ratio:0.34938614935901785 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 -DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:27 [batch.py:51] router release req id 8 -INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10919308662414551 s -INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.1111447811126709 s -DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=25860185952810873825296780073958984946, time:1750768827.2886305s req_ids:[8] -DEBUG 06-24 20:40:27 [manager.py:391] -DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:215.6996726989746ms total_cost_time:215.74163436889648ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14744 prompt_cache_len:5151 prompt_cache_ratio:0.34936245252306025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 -DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:27 [batch.py:51] router release req id 8 -INFO 06-24 20:40:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10879969596862793 s -INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11079573631286621 s -DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=337448448930846003328047093995079386642, time:1750768827.507925s req_ids:[8] -DEBUG 06-24 20:40:27 [manager.py:391] -ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:213.3791446685791ms total_cost_time:213.4225368499756ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14745 prompt_cache_len:5151 prompt_cache_ratio:0.3493387589013225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 -DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:27 [batch.py:51] router release req id 8 -INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10819721221923828 s -INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11011791229248047 s -DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=111885460944661861847382958704664014548, time:1750768827.7268631s req_ids:[8] -DEBUG 06-24 20:40:27 [manager.py:391] -ERROR 06-24 20:40:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:215.3646945953369ms total_cost_time:215.4068946838379ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14746 prompt_cache_len:5151 prompt_cache_ratio:0.3493150684931507 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 -DEBUG 06-24 20:40:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:27 [batch.py:51] router release req id 8 -INFO 06-24 20:40:27 [manager.py:224] router recive req id 8 cost time 0.10886240005493164 s -INFO 06-24 20:40:27 [manager.py:68] detokenization recv req id 8 cost time 0.11078906059265137 s -DEBUG 06-24 20:40:27 [manager.py:391] Prefill Batch: batch_id=204066637168487904519354615423161611838, time:1750768827.948776s req_ids:[8] -DEBUG 06-24 20:40:27 [manager.py:391] -ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:27 lightllm_req_id:8 first_token_cost:210.40749549865723ms total_cost_time:210.45207977294922ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14747 prompt_cache_len:5151 prompt_cache_ratio:0.3492913812978911 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 -DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:28 [batch.py:51] router release req id 8 -INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.10925960540771484 s -INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.11109066009521484 s -DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=301928371783730248639170560846551572358, time:1750768828.1656218s req_ids:[8] -DEBUG 06-24 20:40:28 [manager.py:391] -ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:375.3514289855957ms total_cost_time:375.3950595855713ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14748 prompt_cache_len:5151 prompt_cache_ratio:0.34926769731489016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 -DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:28 [batch.py:51] router release req id 8 -INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.1084585189819336 s -INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.1103816032409668 s -DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=85022395819210198641587113467841012162, time:1750768828.5467563s req_ids:[8] -DEBUG 06-24 20:40:28 [manager.py:391] -ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:215.6815528869629ms total_cost_time:215.7275676727295ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14749 prompt_cache_len:5151 prompt_cache_ratio:0.34924401654349446 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 -DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:28 [batch.py:51] router release req id 8 -INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.10824012756347656 s -INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.11013531684875488 s -DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=304717683666559356671544002785498700441, time:1750768828.7697897s req_ids:[8] -DEBUG 06-24 20:40:28 [manager.py:391] -ERROR 06-24 20:40:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:215.54255485534668ms total_cost_time:215.58523178100586ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14750 prompt_cache_len:5151 prompt_cache_ratio:0.3492203389830508 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 -DEBUG 06-24 20:40:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:28 [batch.py:51] router release req id 8 -INFO 06-24 20:40:28 [manager.py:224] router recive req id 8 cost time 0.10811948776245117 s -INFO 06-24 20:40:28 [manager.py:68] detokenization recv req id 8 cost time 0.10983443260192871 s -DEBUG 06-24 20:40:28 [manager.py:391] Prefill Batch: batch_id=238948343146417012451175734216975354500, time:1750768828.9924626s req_ids:[8] -DEBUG 06-24 20:40:28 [manager.py:391] -ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:28 lightllm_req_id:8 first_token_cost:212.10694313049316ms total_cost_time:212.15057373046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14751 prompt_cache_len:5151 prompt_cache_ratio:0.3491966646329062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 -DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:29 [batch.py:51] router release req id 8 -INFO 06-24 20:40:29 [manager.py:224] router recive req id 8 cost time 0.10842394828796387 s -INFO 06-24 20:40:29 [manager.py:68] detokenization recv req id 8 cost time 0.11021709442138672 s -DEBUG 06-24 20:40:29 [manager.py:391] Prefill Batch: batch_id=261982068674132505410481916816620574882, time:1750768829.2109358s req_ids:[8] -DEBUG 06-24 20:40:29 [manager.py:391] -ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:213.44590187072754ms total_cost_time:213.49191665649414ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14752 prompt_cache_len:5151 prompt_cache_ratio:0.3491729934924078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 -DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:29 [batch.py:51] router release req id 8 -INFO 06-24 20:40:29 [manager.py:224] router recive req id 8 cost time 0.10874342918395996 s -INFO 06-24 20:40:29 [manager.py:68] detokenization recv req id 8 cost time 0.11051368713378906 s -DEBUG 06-24 20:40:29 [manager.py:391] Prefill Batch: batch_id=169220190852814432629664672288023179917, time:1750768829.4294777s req_ids:[8] -DEBUG 06-24 20:40:29 [manager.py:391] -ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:381.50978088378906ms total_cost_time:381.55388832092285ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14753 prompt_cache_len:5151 prompt_cache_ratio:0.3491493255609029 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 -DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:29 [batch.py:51] router release req id 8 -INFO 06-24 20:40:29 [manager.py:224] router recive req id 8 cost time 0.1082465648651123 s -INFO 06-24 20:40:29 [manager.py:68] detokenization recv req id 8 cost time 0.11024594306945801 s -DEBUG 06-24 20:40:29 [manager.py:391] Prefill Batch: batch_id=136751505937359784113896126303637329662, time:1750768829.8186388s req_ids:[8] -DEBUG 06-24 20:40:29 [manager.py:391] -ERROR 06-24 20:40:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:215.44146537780762ms total_cost_time:215.4850959777832ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14754 prompt_cache_len:5151 prompt_cache_ratio:0.34912566083773894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 -DEBUG 06-24 20:40:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:29 [batch.py:51] router release req id 8 -INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.10795116424560547 s -INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.10994172096252441 s -DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=335897480439458947946066291621747337719, time:1750768830.0413418s req_ids:[8] -DEBUG 06-24 20:40:30 [manager.py:391] -ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:40:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 58824.353 tokens/s -DEBUG 06-24 20:40:30 [stats.py:37] Avg prompt tokens throughput: 58816.370 tokens/s -DEBUG 06-24 20:40:30 [stats.py:37] Avg generate tokens throughput: 7.983 tokens/s -INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:29 lightllm_req_id:8 first_token_cost:211.83013916015625ms total_cost_time:211.87281608581543ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14755 prompt_cache_len:5151 prompt_cache_ratio:0.34910199932226366 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 -DEBUG 06-24 20:40:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:30 [batch.py:51] router release req id 8 -INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.10884952545166016 s -INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.11098051071166992 s -DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=316787523390822672496591234072188396123, time:1750768830.257951s req_ids:[8] -DEBUG 06-24 20:40:30 [manager.py:391] -ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:214.9507999420166ms total_cost_time:214.9949073791504ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14756 prompt_cache_len:5151 prompt_cache_ratio:0.3490783410138249 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 -DEBUG 06-24 20:40:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:30 [batch.py:51] router release req id 8 -INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.1086118221282959 s -INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.110504150390625 s -DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=8379380313012295456977891024923444727, time:1750768830.4794853s req_ids:[8] -DEBUG 06-24 20:40:30 [manager.py:391] -ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:214.4148349761963ms total_cost_time:214.45846557617188ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14757 prompt_cache_len:5151 prompt_cache_ratio:0.3490546859117707 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 -DEBUG 06-24 20:40:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:30 [batch.py:51] router release req id 8 -INFO 06-24 20:40:30 [manager.py:224] router recive req id 8 cost time 0.10900068283081055 s -INFO 06-24 20:40:30 [manager.py:68] detokenization recv req id 8 cost time 0.11089086532592773 s -DEBUG 06-24 20:40:30 [manager.py:391] Prefill Batch: batch_id=77357581678859967715883209571237919283, time:1750768830.699089s req_ids:[8] -DEBUG 06-24 20:40:30 [manager.py:391] -ERROR 06-24 20:40:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:402.65727043151855ms total_cost_time:402.69970893859863ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14758 prompt_cache_len:5151 prompt_cache_ratio:0.34903103401544927 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 -DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:31 [batch.py:51] router release req id 8 -INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10871553421020508 s -INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.11075878143310547 s -DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=89919806168213389146430452084712271202, time:1750768831.1073728s req_ids:[8] -DEBUG 06-24 20:40:31 [manager.py:391] -ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:30 lightllm_req_id:8 first_token_cost:214.13397789001465ms total_cost_time:214.17641639709473ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14759 prompt_cache_len:5151 prompt_cache_ratio:0.34900738532420894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 -DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:31 [batch.py:51] router release req id 8 -INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10930585861206055 s -INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.11135673522949219 s -DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=144128991160033358385336282731478328251, time:1750768831.3273485s req_ids:[8] -DEBUG 06-24 20:40:31 [manager.py:391] -ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:216.39680862426758ms total_cost_time:216.44115447998047ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14760 prompt_cache_len:5151 prompt_cache_ratio:0.3489837398373984 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 -DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:31 [batch.py:51] router release req id 8 -INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10842418670654297 s -INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.11038541793823242 s -DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=162800491243798785339987305273618859483, time:1750768831.5511847s req_ids:[8] -DEBUG 06-24 20:40:31 [manager.py:391] -ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:216.28355979919434ms total_cost_time:216.34173393249512ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:14761 prompt_cache_len:5151 prompt_cache_ratio:0.34896009755436624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 -DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:31 [batch.py:51] router release req id 8 -INFO 06-24 20:40:31 [manager.py:224] router recive req id 8 cost time 0.10786199569702148 s -INFO 06-24 20:40:31 [manager.py:68] detokenization recv req id 8 cost time 0.10997700691223145 s -DEBUG 06-24 20:40:31 [manager.py:391] Prefill Batch: batch_id=14236172323634276415849350806864899728, time:1750768831.782177s req_ids:[8] -DEBUG 06-24 20:40:31 [manager.py:391] -ERROR 06-24 20:40:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:225.70037841796875ms total_cost_time:225.74448585510254ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14762 prompt_cache_len:5151 prompt_cache_ratio:0.3489364584744615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 -DEBUG 06-24 20:40:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:31 [batch.py:51] router release req id 8 -INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10927534103393555 s -INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.11098504066467285 s -DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=90278934900427237829959953923954571040, time:1750768832.0072606s req_ids:[8] -DEBUG 06-24 20:40:32 [manager.py:391] -ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:31 lightllm_req_id:8 first_token_cost:214.4942283630371ms total_cost_time:214.5369052886963ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14763 prompt_cache_len:5151 prompt_cache_ratio:0.3489128225970331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 -DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:32 [batch.py:51] router release req id 8 -INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10785079002380371 s -INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.10979413986206055 s -DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=290108574513598769533109026722285547161, time:1750768832.226965s req_ids:[8] -DEBUG 06-24 20:40:32 [manager.py:391] -ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:380.3138732910156ms total_cost_time:380.3596496582031ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14764 prompt_cache_len:5151 prompt_cache_ratio:0.34888918992143053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 -DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:32 [batch.py:51] router release req id 8 -INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10946297645568848 s -INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.11204075813293457 s -DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=308653551652250667168158287772320357287, time:1750768832.6149256s req_ids:[8] -DEBUG 06-24 20:40:32 [manager.py:391] -ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:214.951753616333ms total_cost_time:214.9946689605713ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14765 prompt_cache_len:5151 prompt_cache_ratio:0.34886556044700306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 -DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:32 [batch.py:51] router release req id 8 -INFO 06-24 20:40:32 [manager.py:224] router recive req id 8 cost time 0.10861754417419434 s -INFO 06-24 20:40:32 [manager.py:68] detokenization recv req id 8 cost time 0.11074566841125488 s -DEBUG 06-24 20:40:32 [manager.py:391] Prefill Batch: batch_id=250059938936350750265845591852419488804, time:1750768832.837375s req_ids:[8] -DEBUG 06-24 20:40:32 [manager.py:391] -ERROR 06-24 20:40:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:211.75622940063477ms total_cost_time:211.80009841918945ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14766 prompt_cache_len:5151 prompt_cache_ratio:0.3488419341731004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 -DEBUG 06-24 20:40:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:32 [batch.py:51] router release req id 8 -INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10816597938537598 s -INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.11007332801818848 s -DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=86195731079465471946376652617093461032, time:1750768833.0554962s req_ids:[8] -DEBUG 06-24 20:40:33 [manager.py:391] -ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:32 lightllm_req_id:8 first_token_cost:214.0824794769287ms total_cost_time:214.1268253326416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14767 prompt_cache_len:5151 prompt_cache_ratio:0.34881831109907224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 -DEBUG 06-24 20:40:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:33 [batch.py:51] router release req id 8 -INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10895419120788574 s -INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.11115264892578125 s -DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=319918078160362812947852012016022974385, time:1750768833.2873003s req_ids:[8] -DEBUG 06-24 20:40:33 [manager.py:391] -ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 first_token_cost:228.98006439208984ms total_cost_time:229.02369499206543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14768 prompt_cache_len:5151 prompt_cache_ratio:0.3487946912242687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 -DEBUG 06-24 20:40:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:33 [batch.py:51] router release req id 8 -INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10770630836486816 s -INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.10962724685668945 s -DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=240273272608963791126271751781661022787, time:1750768833.5117347s req_ids:[8] -DEBUG 06-24 20:40:33 [manager.py:391] -DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 first_token_cost:215.93284606933594ms total_cost_time:215.97599983215332ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14769 prompt_cache_len:5151 prompt_cache_ratio:0.3487710745480398 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 -DEBUG 06-24 20:40:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:33 [batch.py:51] router release req id 8 -INFO 06-24 20:40:33 [manager.py:224] router recive req id 8 cost time 0.10782051086425781 s -INFO 06-24 20:40:33 [manager.py:68] detokenization recv req id 8 cost time 0.10991096496582031 s -DEBUG 06-24 20:40:33 [manager.py:391] Prefill Batch: batch_id=213404560950185398293611639552792144541, time:1750768833.7347152s req_ids:[8] -DEBUG 06-24 20:40:33 [manager.py:391] -ERROR 06-24 20:40:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:33 lightllm_req_id:8 first_token_cost:377.90489196777344ms total_cost_time:377.95066833496094ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14770 prompt_cache_len:5151 prompt_cache_ratio:0.348747461069736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 -DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:34 [batch.py:51] router release req id 8 -INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.10912370681762695 s -INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11104559898376465 s -DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=5606609173108477116399842652558081711, time:1750768834.1190033s req_ids:[8] -DEBUG 06-24 20:40:34 [manager.py:391] -ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:215.09933471679688ms total_cost_time:215.14344215393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14771 prompt_cache_len:5151 prompt_cache_ratio:0.3487238507887076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 -DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:34 [batch.py:51] router release req id 8 -INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.10826849937438965 s -INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11042642593383789 s -DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=296724273848153874182849290219519330917, time:1750768834.3406718s req_ids:[8] -DEBUG 06-24 20:40:34 [manager.py:391] -ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:214.75720405578613ms total_cost_time:214.8146629333496ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:14772 prompt_cache_len:5151 prompt_cache_ratio:0.34870024370430547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 -DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:34 [batch.py:51] router release req id 8 -INFO 06-24 20:40:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.11103177070617676 s -INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11298656463623047 s -DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=149230804867059076690486805536121674095, time:1750768834.573161s req_ids:[8] -DEBUG 06-24 20:40:34 [manager.py:391] -ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:229.5377254486084ms total_cost_time:229.5827865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14773 prompt_cache_len:5151 prompt_cache_ratio:0.3486766398158803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 -DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:34 [batch.py:51] router release req id 8 -INFO 06-24 20:40:34 [manager.py:224] router recive req id 8 cost time 0.10884380340576172 s -INFO 06-24 20:40:34 [manager.py:68] detokenization recv req id 8 cost time 0.11079716682434082 s -DEBUG 06-24 20:40:34 [manager.py:391] Prefill Batch: batch_id=314677339942655718412197114918525105668, time:1750768834.7979693s req_ids:[8] -DEBUG 06-24 20:40:34 [manager.py:391] -ERROR 06-24 20:40:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:215.19780158996582ms total_cost_time:215.2414321899414ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14774 prompt_cache_len:5151 prompt_cache_ratio:0.3486530391227833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 -DEBUG 06-24 20:40:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:34 [batch.py:51] router release req id 8 -INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10783720016479492 s -INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.10965371131896973 s -DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=195598420901633717811654569604019799592, time:1750768835.020666s req_ids:[8] -DEBUG 06-24 20:40:35 [manager.py:391] -ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:34 lightllm_req_id:8 first_token_cost:209.25498008728027ms total_cost_time:209.30123329162598ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14775 prompt_cache_len:5151 prompt_cache_ratio:0.3486294416243655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 -DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:35 [batch.py:51] router release req id 8 -INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10860824584960938 s -INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.11043357849121094 s -DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=326425699761088970318213995691303837569, time:1750768835.236078s req_ids:[8] -DEBUG 06-24 20:40:35 [manager.py:391] -ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:377.02012062072754ms total_cost_time:377.06565856933594ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14776 prompt_cache_len:5151 prompt_cache_ratio:0.34860584731997835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 -DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:35 [batch.py:51] router release req id 8 -INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10793638229370117 s -INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.11000704765319824 s -DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=188097941491711298999829338146366058409, time:1750768835.620981s req_ids:[8] -DEBUG 06-24 20:40:35 [manager.py:391] -ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:217.18573570251465ms total_cost_time:217.22912788391113ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14777 prompt_cache_len:5151 prompt_cache_ratio:0.34858225620897343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 -DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:35 [batch.py:51] router release req id 8 -INFO 06-24 20:40:35 [manager.py:224] router recive req id 8 cost time 0.10787796974182129 s -INFO 06-24 20:40:35 [manager.py:68] detokenization recv req id 8 cost time 0.10982799530029297 s -DEBUG 06-24 20:40:35 [manager.py:391] Prefill Batch: batch_id=289448305566789996028884232127358435451, time:1750768835.8445811s req_ids:[8] -DEBUG 06-24 20:40:35 [manager.py:391] -ERROR 06-24 20:40:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:209.02204513549805ms total_cost_time:209.06567573547363ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14778 prompt_cache_len:5151 prompt_cache_ratio:0.3485586682907024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 -DEBUG 06-24 20:40:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:35 [batch.py:51] router release req id 8 -INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.10978341102600098 s -INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.11185026168823242 s -DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=203507385669399716196884834621496899202, time:1750768836.058192s req_ids:[8] -DEBUG 06-24 20:40:36 [manager.py:391] -ERROR 06-24 20:40:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:35 lightllm_req_id:8 first_token_cost:211.33089065551758ms total_cost_time:211.37332916259766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14779 prompt_cache_len:5151 prompt_cache_ratio:0.34853508356451723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 -DEBUG 06-24 20:40:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:36 [batch.py:51] router release req id 8 -INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.10946917533874512 s -INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.11152410507202148 s -DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=174696255399302560808350780295131976025, time:1750768836.2775006s req_ids:[8] -DEBUG 06-24 20:40:36 [manager.py:391] -ERROR 06-24 20:40:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 first_token_cost:215.76189994812012ms total_cost_time:215.82627296447754ms,out_token_counter:1 mean_per_token_cost_time: 0.06437301635742188ms prompt_token_num:14780 prompt_cache_len:5151 prompt_cache_ratio:0.34851150202976994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 -DEBUG 06-24 20:40:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:36 [batch.py:51] router release req id 8 -INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.10876631736755371 s -INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.1107320785522461 s -DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=128572679933745898463541105595346638377, time:1750768836.4973052s req_ids:[8] -DEBUG 06-24 20:40:36 [manager.py:391] -ERROR 06-24 20:40:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 first_token_cost:207.84950256347656ms total_cost_time:207.89337158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14781 prompt_cache_len:5151 prompt_cache_ratio:0.3484879236858129 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 -DEBUG 06-24 20:40:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:36 [batch.py:51] router release req id 8 -INFO 06-24 20:40:36 [manager.py:224] router recive req id 8 cost time 0.31009888648986816 s -INFO 06-24 20:40:36 [manager.py:68] detokenization recv req id 8 cost time 0.3113257884979248 s -DEBUG 06-24 20:40:36 [manager.py:391] Prefill Batch: batch_id=322321286095607465575083003658026026616, time:1750768836.9207911s req_ids:[8] -DEBUG 06-24 20:40:36 [manager.py:391] -ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:36 lightllm_req_id:8 first_token_cost:428.9834499359131ms total_cost_time:429.0294647216797ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14782 prompt_cache_len:5151 prompt_cache_ratio:0.34846434853199837 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 -DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:37 [batch.py:51] router release req id 8 -INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.10892033576965332 s -INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.11075758934020996 s -DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=274554121658867261844014937918426724658, time:1750768837.1492841s req_ids:[8] -DEBUG 06-24 20:40:37 [manager.py:391] -ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:215.35181999206543ms total_cost_time:215.39568901062012ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14783 prompt_cache_len:5151 prompt_cache_ratio:0.3484407765676791 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 -DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:37 [batch.py:51] router release req id 8 -INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.10909843444824219 s -INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.1109623908996582 s -DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=8613331649267303223324639721188083384, time:1750768837.3838494s req_ids:[8] -DEBUG 06-24 20:40:37 [manager.py:391] -ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:230.04770278930664ms total_cost_time:230.09181022644043ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14784 prompt_cache_len:5151 prompt_cache_ratio:0.3484172077922078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 -DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:37 [batch.py:51] router release req id 8 -INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.10836982727050781 s -INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.11016702651977539 s -DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=249941453246777411140560058546242566782, time:1750768837.608913s req_ids:[8] -DEBUG 06-24 20:40:37 [manager.py:391] -ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:195.16873359680176ms total_cost_time:195.21164894104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14785 prompt_cache_len:5151 prompt_cache_ratio:0.34839364220493746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 -DEBUG 06-24 20:40:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:37 [batch.py:51] router release req id 8 -INFO 06-24 20:40:37 [manager.py:224] router recive req id 8 cost time 0.1085968017578125 s -INFO 06-24 20:40:37 [manager.py:68] detokenization recv req id 8 cost time 0.11036181449890137 s -DEBUG 06-24 20:40:37 [manager.py:391] Prefill Batch: batch_id=338885971832902405423569492111995136872, time:1750768837.823307s req_ids:[8] -DEBUG 06-24 20:40:37 [manager.py:391] -ERROR 06-24 20:40:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:224.26366806030273ms total_cost_time:224.30682182312012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14786 prompt_cache_len:5151 prompt_cache_ratio:0.34837007980522117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 -DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:38 [batch.py:51] router release req id 8 -INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.3097951412200928 s -INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.31174707412719727 s -DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=23925561919112979986364379510242258014, time:1750768838.246147s req_ids:[8] -DEBUG 06-24 20:40:38 [manager.py:391] -ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:37 lightllm_req_id:8 first_token_cost:426.2204170227051ms total_cost_time:426.26500129699707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14787 prompt_cache_len:5151 prompt_cache_ratio:0.3483465205924123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 -DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:38 [batch.py:51] router release req id 8 -INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.10868334770202637 s -INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s -DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=140012892025942787003626803520117093065, time:1750768838.4734833s req_ids:[8] -DEBUG 06-24 20:40:38 [manager.py:391] -ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:193.4823989868164ms total_cost_time:193.5257911682129ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14788 prompt_cache_len:5151 prompt_cache_ratio:0.3483229645658642 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 -DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:38 [batch.py:51] router release req id 8 -INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.1091318130493164 s -INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.1110684871673584 s -DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=150865796908841619747971517517389321226, time:1750768838.673945s req_ids:[8] -DEBUG 06-24 20:40:38 [manager.py:391] -ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:206.91204071044922ms total_cost_time:206.9554328918457ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14789 prompt_cache_len:5151 prompt_cache_ratio:0.3482994117249307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 -DEBUG 06-24 20:40:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:38 [batch.py:51] router release req id 8 -INFO 06-24 20:40:38 [manager.py:224] router recive req id 8 cost time 0.10880875587463379 s -INFO 06-24 20:40:38 [manager.py:68] detokenization recv req id 8 cost time 0.11069893836975098 s -DEBUG 06-24 20:40:38 [manager.py:391] Prefill Batch: batch_id=155370376659140290057391126074022939875, time:1750768838.8885734s req_ids:[8] -DEBUG 06-24 20:40:38 [manager.py:391] -ERROR 06-24 20:40:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:209.19156074523926ms total_cost_time:209.23328399658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14790 prompt_cache_len:5151 prompt_cache_ratio:0.3482758620689655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 -DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:39 [batch.py:51] router release req id 8 -INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10833907127380371 s -INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11026334762573242 s -DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=55480398420202676776097449004736821523, time:1750768839.1034193s req_ids:[8] -DEBUG 06-24 20:40:39 [manager.py:391] -ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:38 lightllm_req_id:8 first_token_cost:211.74979209899902ms total_cost_time:211.79485321044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14791 prompt_cache_len:5151 prompt_cache_ratio:0.34825231559732267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 -DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:39 [batch.py:51] router release req id 8 -INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10880374908447266 s -INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11072587966918945 s -DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=43485270828814745586883311352624281336, time:1750768839.3224406s req_ids:[8] -DEBUG 06-24 20:40:39 [manager.py:391] -ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:344.2087173461914ms total_cost_time:344.2542552947998ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14792 prompt_cache_len:5151 prompt_cache_ratio:0.3482287723093564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 -DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:39 [batch.py:51] router release req id 8 -INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10952568054199219 s -INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11155033111572266 s -DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=190588304763817001540117053555682120257, time:1750768839.672383s req_ids:[8] -DEBUG 06-24 20:40:39 [manager.py:391] -ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:211.56954765319824ms total_cost_time:211.61460876464844ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14793 prompt_cache_len:5151 prompt_cache_ratio:0.348205232204421 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 -DEBUG 06-24 20:40:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:39 [batch.py:51] router release req id 8 -INFO 06-24 20:40:39 [manager.py:224] router recive req id 8 cost time 0.10945820808410645 s -INFO 06-24 20:40:39 [manager.py:68] detokenization recv req id 8 cost time 0.11140561103820801 s -DEBUG 06-24 20:40:39 [manager.py:391] Prefill Batch: batch_id=216017589300901786021104684395790007967, time:1750768839.8923526s req_ids:[8] -DEBUG 06-24 20:40:39 [manager.py:391] -ERROR 06-24 20:40:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:214.38193321228027ms total_cost_time:214.4031524658203ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:14794 prompt_cache_len:5151 prompt_cache_ratio:0.34818169528187104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 -DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:40 [batch.py:51] router release req id 8 -INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10372233390808105 s -INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.10554194450378418 s -DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=88771526094195011519561903158310682478, time:1750768840.1132681s req_ids:[8] -DEBUG 06-24 20:40:40 [manager.py:391] -ERROR 06-24 20:40:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:40:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 58900.734 tokens/s -DEBUG 06-24 20:40:40 [stats.py:37] Avg prompt tokens throughput: 58892.762 tokens/s -DEBUG 06-24 20:40:40 [stats.py:37] Avg generate tokens throughput: 7.972 tokens/s -INFO 06-24 20:40:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:39 lightllm_req_id:8 first_token_cost:176.04732513427734ms total_cost_time:176.08952522277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14795 prompt_cache_len:5151 prompt_cache_ratio:0.34815816154106116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 -DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:40 [batch.py:51] router release req id 8 -INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10633277893066406 s -INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.10846066474914551 s -DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=38412919081624769680726841100780455480, time:1750768840.2944572s req_ids:[8] -DEBUG 06-24 20:40:40 [manager.py:391] -ERROR 06-24 20:40:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 first_token_cost:207.96680450439453ms total_cost_time:208.0094814300537ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14796 prompt_cache_len:5151 prompt_cache_ratio:0.3481346309813463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 -DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:40 [batch.py:51] router release req id 8 -INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10823559761047363 s -INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.11023259162902832 s -DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=317988550995046517931631244083331426994, time:1750768840.5065968s req_ids:[8] -DEBUG 06-24 20:40:40 [manager.py:391] -ERROR 06-24 20:40:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 first_token_cost:215.66438674926758ms total_cost_time:215.70587158203125ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14797 prompt_cache_len:5151 prompt_cache_ratio:0.34811110360208153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 -DEBUG 06-24 20:40:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:40 [batch.py:51] router release req id 8 -INFO 06-24 20:40:40 [manager.py:224] router recive req id 8 cost time 0.10900259017944336 s -INFO 06-24 20:40:40 [manager.py:68] detokenization recv req id 8 cost time 0.11096787452697754 s -DEBUG 06-24 20:40:40 [manager.py:391] Prefill Batch: batch_id=199367957817151179193796228365825485852, time:1750768840.7287345s req_ids:[8] -DEBUG 06-24 20:40:40 [manager.py:391] -ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:40 lightllm_req_id:8 first_token_cost:391.7500972747803ms total_cost_time:391.79515838623047ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14798 prompt_cache_len:5151 prompt_cache_ratio:0.34808757940262197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 -DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:41 [batch.py:51] router release req id 8 -INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10953998565673828 s -INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.11149477958679199 s -DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=21686356965695087694596218120765870718, time:1750768841.1277306s req_ids:[8] -DEBUG 06-24 20:40:41 [manager.py:391] -ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:218.4431552886963ms total_cost_time:218.48773956298828ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14799 prompt_cache_len:5151 prompt_cache_ratio:0.3480640583823231 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 -DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:41 [batch.py:51] router release req id 8 -INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10848569869995117 s -INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.11051034927368164 s -DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=246630996474985688940491096054188049027, time:1750768841.3493364s req_ids:[8] -DEBUG 06-24 20:40:41 [manager.py:391] -ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:213.88721466064453ms total_cost_time:213.93203735351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14800 prompt_cache_len:5151 prompt_cache_ratio:0.34804054054054057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 -DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:41 [batch.py:51] router release req id 8 -INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10902690887451172 s -INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.11108899116516113 s -DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=146121580510277964902055776495097165605, time:1750768841.5718713s req_ids:[8] -DEBUG 06-24 20:40:41 [manager.py:391] -ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:228.89161109924316ms total_cost_time:228.93786430358887ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14801 prompt_cache_len:5151 prompt_cache_ratio:0.34801702587662997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 -DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:41 [batch.py:51] router release req id 8 -INFO 06-24 20:40:41 [manager.py:224] router recive req id 8 cost time 0.10832476615905762 s -INFO 06-24 20:40:41 [manager.py:68] detokenization recv req id 8 cost time 0.1101219654083252 s -DEBUG 06-24 20:40:41 [manager.py:391] Prefill Batch: batch_id=75353447346946242889611135486140653627, time:1750768841.8321242s req_ids:[8] -DEBUG 06-24 20:40:41 [manager.py:391] -ERROR 06-24 20:40:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:228.77216339111328ms total_cost_time:228.81650924682617ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14802 prompt_cache_len:5151 prompt_cache_ratio:0.3479935143899473 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 -DEBUG 06-24 20:40:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:41 [batch.py:51] router release req id 8 -INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.1090857982635498 s -INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.11103224754333496 s -DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=98355756968641116070676484962887962975, time:1750768842.04269s req_ids:[8] -DEBUG 06-24 20:40:42 [manager.py:391] -ERROR 06-24 20:40:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:41 lightllm_req_id:8 first_token_cost:384.63521003723145ms total_cost_time:384.6783638000488ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14803 prompt_cache_len:5151 prompt_cache_ratio:0.3479700060798487 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 -DEBUG 06-24 20:40:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:42 [batch.py:51] router release req id 8 -INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.10943102836608887 s -INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.11142444610595703 s -DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=176278200048239980606288599395600652085, time:1750768842.4341617s req_ids:[8] -DEBUG 06-24 20:40:42 [manager.py:391] -ERROR 06-24 20:40:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 first_token_cost:228.87253761291504ms total_cost_time:228.91569137573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14804 prompt_cache_len:5151 prompt_cache_ratio:0.34794650094569035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 -DEBUG 06-24 20:40:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:42 [batch.py:51] router release req id 8 -INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.10860681533813477 s -INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.11044955253601074 s -DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=182347878234621687476335759762714745994, time:1750768842.6866887s req_ids:[8] -DEBUG 06-24 20:40:42 [manager.py:391] -ERROR 06-24 20:40:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 first_token_cost:224.33233261108398ms total_cost_time:224.37810897827148ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14805 prompt_cache_len:5151 prompt_cache_ratio:0.34792299898682877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 -DEBUG 06-24 20:40:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:42 [batch.py:51] router release req id 8 -INFO 06-24 20:40:42 [manager.py:224] router recive req id 8 cost time 0.10746335983276367 s -INFO 06-24 20:40:42 [manager.py:68] detokenization recv req id 8 cost time 0.10937714576721191 s -DEBUG 06-24 20:40:42 [manager.py:391] Prefill Batch: batch_id=78715056893372488881069265307737301189, time:1750768842.9111028s req_ids:[8] -DEBUG 06-24 20:40:42 [manager.py:391] -DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:42 lightllm_req_id:8 first_token_cost:227.83684730529785ms total_cost_time:227.88000106811523ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14806 prompt_cache_len:5151 prompt_cache_ratio:0.34789950020262056 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 -DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:43 [batch.py:51] router release req id 8 -INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10811138153076172 s -INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.11004066467285156 s -DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=42440626120652398773116384548159058234, time:1750768843.1347637s req_ids:[8] -DEBUG 06-24 20:40:43 [manager.py:391] -ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:217.7562713623047ms total_cost_time:217.79990196228027ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14807 prompt_cache_len:5151 prompt_cache_ratio:0.3478760045924225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 -DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:43 [batch.py:51] router release req id 8 -INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10781073570251465 s -INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.1094658374786377 s -DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=218908050620441887875082233001866425646, time:1750768843.3588357s req_ids:[8] -DEBUG 06-24 20:40:43 [manager.py:391] -ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:207.1547508239746ms total_cost_time:207.1990966796875ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14808 prompt_cache_len:5151 prompt_cache_ratio:0.3478525121555916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 -DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:43 [batch.py:51] router release req id 8 -INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10818624496459961 s -INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.11036348342895508 s -DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=61797890058957720954323536615300824332, time:1750768843.5725849s req_ids:[8] -DEBUG 06-24 20:40:43 [manager.py:391] -ERROR 06-24 20:40:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:384.16099548339844ms total_cost_time:384.2048645019531ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14809 prompt_cache_len:5151 prompt_cache_ratio:0.34782902289148493 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 -DEBUG 06-24 20:40:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:43 [batch.py:51] router release req id 8 -INFO 06-24 20:40:43 [manager.py:224] router recive req id 8 cost time 0.10889124870300293 s -INFO 06-24 20:40:43 [manager.py:68] detokenization recv req id 8 cost time 0.11093664169311523 s -DEBUG 06-24 20:40:43 [manager.py:391] Prefill Batch: batch_id=248102945857509419014935369627352271495, time:1750768843.965249s req_ids:[8] -DEBUG 06-24 20:40:43 [manager.py:391] -ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:43 lightllm_req_id:8 first_token_cost:215.2543067932129ms total_cost_time:215.29650688171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14810 prompt_cache_len:5151 prompt_cache_ratio:0.3478055367994598 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 -DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:44 [batch.py:51] router release req id 8 -INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.10865592956542969 s -INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.11078858375549316 s -DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=132179859115854363224722763400381134218, time:1750768844.188374s req_ids:[8] -DEBUG 06-24 20:40:44 [manager.py:391] -ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:210.71434020996094ms total_cost_time:210.75844764709473ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14811 prompt_cache_len:5151 prompt_cache_ratio:0.3477820538788738 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 -DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:44 [batch.py:51] router release req id 8 -INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.10777425765991211 s -INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.10993742942810059 s -DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=181707344387863956869768278569590949484, time:1750768844.4064724s req_ids:[8] -DEBUG 06-24 20:40:44 [manager.py:391] -ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:211.44723892211914ms total_cost_time:211.49373054504395ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14812 prompt_cache_len:5151 prompt_cache_ratio:0.3477585741290845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 -DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:44 [batch.py:51] router release req id 8 -INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.10907864570617676 s -INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.11139512062072754 s -DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=182705360682541200207469516667798243953, time:1750768844.6259582s req_ids:[8] -DEBUG 06-24 20:40:44 [manager.py:391] -ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:215.89064598083496ms total_cost_time:215.93403816223145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14813 prompt_cache_len:5151 prompt_cache_ratio:0.3477350975494498 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 -DEBUG 06-24 20:40:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:44 [batch.py:51] router release req id 8 -INFO 06-24 20:40:44 [manager.py:224] router recive req id 8 cost time 0.1096944808959961 s -INFO 06-24 20:40:44 [manager.py:68] detokenization recv req id 8 cost time 0.11194062232971191 s -DEBUG 06-24 20:40:44 [manager.py:391] Prefill Batch: batch_id=65094034135427083880458228615148150183, time:1750768844.848637s req_ids:[8] -DEBUG 06-24 20:40:44 [manager.py:391] -ERROR 06-24 20:40:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:239.6676540374756ms total_cost_time:239.71319198608398ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14814 prompt_cache_len:5151 prompt_cache_ratio:0.34771162413932766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 -DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:45 [batch.py:51] router release req id 8 -INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.10834097862243652 s -INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11023116111755371 s -DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=252061074605528275909546173656225196990, time:1750768845.1003077s req_ids:[8] -DEBUG 06-24 20:40:45 [manager.py:391] -ERROR 06-24 20:40:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:44 lightllm_req_id:8 first_token_cost:386.63387298583984ms total_cost_time:386.69538497924805ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:14815 prompt_cache_len:5151 prompt_cache_ratio:0.3476881538980763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 -DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:45 [batch.py:51] router release req id 8 -INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.1091454029083252 s -INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11174368858337402 s -DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=235027181488721524814439854195963616286, time:1750768845.4884155s req_ids:[8] -DEBUG 06-24 20:40:45 [manager.py:391] -ERROR 06-24 20:40:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 first_token_cost:207.66639709472656ms total_cost_time:207.71026611328125ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14816 prompt_cache_len:5151 prompt_cache_ratio:0.34766468682505397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 -DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:45 [batch.py:51] router release req id 8 -INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.10788989067077637 s -INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11034226417541504 s -DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=231828423082246704938994927192289814123, time:1750768845.7132616s req_ids:[8] -DEBUG 06-24 20:40:45 [manager.py:391] -ERROR 06-24 20:40:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 first_token_cost:235.4729175567627ms total_cost_time:235.5179786682129ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14817 prompt_cache_len:5151 prompt_cache_ratio:0.34764122291961935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 -DEBUG 06-24 20:40:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:45 [batch.py:51] router release req id 8 -INFO 06-24 20:40:45 [manager.py:224] router recive req id 8 cost time 0.10912156105041504 s -INFO 06-24 20:40:45 [manager.py:68] detokenization recv req id 8 cost time 0.11171650886535645 s -DEBUG 06-24 20:40:45 [manager.py:391] Prefill Batch: batch_id=70839508973314836433232067581364468733, time:1750768845.9441757s req_ids:[8] -DEBUG 06-24 20:40:45 [manager.py:391] -ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:45 lightllm_req_id:8 first_token_cost:204.41842079162598ms total_cost_time:204.46252822875977ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14818 prompt_cache_len:5151 prompt_cache_ratio:0.34761776218113105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 -DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:46 [batch.py:51] router release req id 8 -INFO 06-24 20:40:46 [manager.py:224] router recive req id 8 cost time 0.10917353630065918 s -INFO 06-24 20:40:46 [manager.py:68] detokenization recv req id 8 cost time 0.1114656925201416 s -DEBUG 06-24 20:40:46 [manager.py:391] Prefill Batch: batch_id=92029641910859212780503711229643831801, time:1750768846.1594272s req_ids:[8] -DEBUG 06-24 20:40:46 [manager.py:391] -ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:219.2704677581787ms total_cost_time:219.313383102417ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14819 prompt_cache_len:5151 prompt_cache_ratio:0.34759430460894797 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 -DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:46 [batch.py:51] router release req id 8 -INFO 06-24 20:40:46 [manager.py:224] router recive req id 8 cost time 0.10956382751464844 s -INFO 06-24 20:40:46 [manager.py:68] detokenization recv req id 8 cost time 0.11150240898132324 s -DEBUG 06-24 20:40:46 [manager.py:391] Prefill Batch: batch_id=248794822004897147880821612489529625877, time:1750768846.3853245s req_ids:[8] -DEBUG 06-24 20:40:46 [manager.py:391] -ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:387.1264457702637ms total_cost_time:387.17150688171387ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14820 prompt_cache_len:5151 prompt_cache_ratio:0.34757085020242917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 -DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:46 [batch.py:51] router release req id 8 -INFO 06-24 20:40:46 [manager.py:224] router recive req id 8 cost time 0.10947775840759277 s -INFO 06-24 20:40:46 [manager.py:68] detokenization recv req id 8 cost time 0.11163878440856934 s -DEBUG 06-24 20:40:46 [manager.py:391] Prefill Batch: batch_id=337460981725000884871031918288729756582, time:1750768846.7813277s req_ids:[8] -DEBUG 06-24 20:40:46 [manager.py:391] -ERROR 06-24 20:40:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:222.1059799194336ms total_cost_time:222.14865684509277ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14821 prompt_cache_len:5151 prompt_cache_ratio:0.3475473989609338 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 -DEBUG 06-24 20:40:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:46 [batch.py:51] router release req id 8 -INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.10936737060546875 s -INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11185455322265625 s -DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=58911421354191091316940430440805258336, time:1750768847.007873s req_ids:[8] -DEBUG 06-24 20:40:47 [manager.py:391] -ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:46 lightllm_req_id:8 first_token_cost:214.7653102874756ms total_cost_time:214.80751037597656ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14822 prompt_cache_len:5151 prompt_cache_ratio:0.3475239508838213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 -DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:47 [batch.py:51] router release req id 8 -INFO 06-24 20:40:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.10927057266235352 s -INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11122250556945801 s -DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=286061251294349083806485342800136138260, time:1750768847.2354212s req_ids:[8] -DEBUG 06-24 20:40:47 [manager.py:391] -ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:224.26366806030273ms total_cost_time:224.30920600891113ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14823 prompt_cache_len:5151 prompt_cache_ratio:0.34750050597045135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 -DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:47 [batch.py:51] router release req id 8 -INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.11070775985717773 s -INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11274242401123047 s -DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=54203497360153627201432010978567480053, time:1750768847.4633265s req_ids:[8] -DEBUG 06-24 20:40:47 [manager.py:391] -ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:221.99225425720215ms total_cost_time:222.03564643859863ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14824 prompt_cache_len:5151 prompt_cache_ratio:0.3474770642201835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 -DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:47 [batch.py:51] router release req id 8 -INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.10803079605102539 s -INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.10994720458984375 s -DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=191499043781114413400517843306229530258, time:1750768847.703136s req_ids:[8] -DEBUG 06-24 20:40:47 [manager.py:391] -ERROR 06-24 20:40:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:236.19437217712402ms total_cost_time:236.23919486999512ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14825 prompt_cache_len:5151 prompt_cache_ratio:0.34745362563237775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 -DEBUG 06-24 20:40:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:47 [batch.py:51] router release req id 8 -INFO 06-24 20:40:47 [manager.py:224] router recive req id 8 cost time 0.1084141731262207 s -INFO 06-24 20:40:47 [manager.py:68] detokenization recv req id 8 cost time 0.11034631729125977 s -DEBUG 06-24 20:40:47 [manager.py:391] Prefill Batch: batch_id=274023715708474064479896137189114040211, time:1750768847.9301534s req_ids:[8] -DEBUG 06-24 20:40:47 [manager.py:391] -ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:47 lightllm_req_id:8 first_token_cost:381.8204402923584ms total_cost_time:381.866455078125ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14826 prompt_cache_len:5151 prompt_cache_ratio:0.34743019020639415 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 -DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:48 [batch.py:51] router release req id 8 -INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.11069416999816895 s -INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11327195167541504 s -DEBUG 06-24 20:40:48 [manager.py:391] Prefill Batch: batch_id=302140411455865068881109828679972372929, time:1750768848.3216832s req_ids:[8] -DEBUG 06-24 20:40:48 [manager.py:391] -ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:221.50349617004395ms total_cost_time:221.54712677001953ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14827 prompt_cache_len:5151 prompt_cache_ratio:0.34740675794159304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 -DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:48 [batch.py:51] router release req id 8 -INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.10906767845153809 s -INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11093854904174805 s -DEBUG 06-24 20:40:48 [manager.py:391] Prefill Batch: batch_id=127151430386715834625302850410601790372, time:1750768848.547938s req_ids:[8] -DEBUG 06-24 20:40:48 [manager.py:391] -ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:218.5075283050537ms total_cost_time:218.5509204864502ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14828 prompt_cache_len:5151 prompt_cache_ratio:0.34738332883733475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 -DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:48 [batch.py:51] router release req id 8 -INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.10842251777648926 s -INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11043500900268555 s -DEBUG 06-24 20:40:48 [manager.py:391] Prefill Batch: batch_id=65346817275911521283094103287015042885, time:1750768848.7742355s req_ids:[8] -DEBUG 06-24 20:40:48 [manager.py:391] -ERROR 06-24 20:40:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:220.92795372009277ms total_cost_time:220.97182273864746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14829 prompt_cache_len:5151 prompt_cache_ratio:0.34735990289297997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 -DEBUG 06-24 20:40:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:48 [batch.py:51] router release req id 8 -INFO 06-24 20:40:48 [manager.py:224] router recive req id 8 cost time 0.1081078052520752 s -INFO 06-24 20:40:48 [manager.py:68] detokenization recv req id 8 cost time 0.11030244827270508 s -DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=148721898668657336259600816473668756952, time:1750768849.012154s req_ids:[8] -DEBUG 06-24 20:40:49 [manager.py:391] -ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:48 lightllm_req_id:8 first_token_cost:232.29694366455078ms total_cost_time:232.34176635742188ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14830 prompt_cache_len:5151 prompt_cache_ratio:0.3473364801078894 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 -DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:49 [batch.py:51] router release req id 8 -INFO 06-24 20:40:49 [manager.py:224] router recive req id 8 cost time 0.10814929008483887 s -INFO 06-24 20:40:49 [manager.py:68] detokenization recv req id 8 cost time 0.11016297340393066 s -DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=36131333846343433078011965290492150550, time:1750768849.2393086s req_ids:[8] -DEBUG 06-24 20:40:49 [manager.py:391] -DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:387.85552978515625ms total_cost_time:387.89844512939453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14831 prompt_cache_len:5151 prompt_cache_ratio:0.347313060481424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 -DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:49 [batch.py:51] router release req id 8 -INFO 06-24 20:40:49 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s -INFO 06-24 20:40:49 [manager.py:68] detokenization recv req id 8 cost time 0.11047220230102539 s -DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=270885321025767923499650750163872627883, time:1750768849.6341548s req_ids:[8] -DEBUG 06-24 20:40:49 [manager.py:391] -ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:217.2551155090332ms total_cost_time:217.29707717895508ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14832 prompt_cache_len:5151 prompt_cache_ratio:0.347289644012945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 -DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:49 [batch.py:51] router release req id 8 -INFO 06-24 20:40:49 [manager.py:224] router recive req id 8 cost time 0.10888862609863281 s -INFO 06-24 20:40:49 [manager.py:68] detokenization recv req id 8 cost time 0.11098718643188477 s -DEBUG 06-24 20:40:49 [manager.py:391] Prefill Batch: batch_id=167722352798595551960208905556493702864, time:1750768849.8588948s req_ids:[8] -DEBUG 06-24 20:40:49 [manager.py:391] -ERROR 06-24 20:40:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:215.82388877868652ms total_cost_time:215.867280960083ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14833 prompt_cache_len:5151 prompt_cache_ratio:0.3472662307018135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 -DEBUG 06-24 20:40:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:49 [batch.py:51] router release req id 8 -INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10925173759460449 s -INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11127257347106934 s -DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=138686576213767389852768482786332323152, time:1750768850.0802891s req_ids:[8] -DEBUG 06-24 20:40:50 [manager.py:391] -ERROR 06-24 20:40:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:40:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 57727.491 tokens/s -DEBUG 06-24 20:40:50 [stats.py:37] Avg prompt tokens throughput: 57719.699 tokens/s -DEBUG 06-24 20:40:50 [stats.py:37] Avg generate tokens throughput: 7.792 tokens/s -INFO 06-24 20:40:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:49 lightllm_req_id:8 first_token_cost:218.55664253234863ms total_cost_time:218.5990810394287ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14834 prompt_cache_len:5151 prompt_cache_ratio:0.34724282054739114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 -DEBUG 06-24 20:40:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:50 [batch.py:51] router release req id 8 -INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10952496528625488 s -INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11142468452453613 s -DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=268796056452635516818572284828377338424, time:1750768850.3064694s req_ids:[8] -DEBUG 06-24 20:40:50 [manager.py:391] -ERROR 06-24 20:40:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 first_token_cost:217.21363067626953ms total_cost_time:217.25749969482422ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14835 prompt_cache_len:5151 prompt_cache_ratio:0.34721941354903946 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 -DEBUG 06-24 20:40:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:50 [batch.py:51] router release req id 8 -INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10938119888305664 s -INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11140751838684082 s -DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=42937648339821814852118522490560197425, time:1750768850.5299842s req_ids:[8] -DEBUG 06-24 20:40:50 [manager.py:391] -ERROR 06-24 20:40:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 first_token_cost:213.914155960083ms total_cost_time:213.9577865600586ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14836 prompt_cache_len:5151 prompt_cache_ratio:0.34719600970612025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 -DEBUG 06-24 20:40:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:50 [batch.py:51] router release req id 8 -INFO 06-24 20:40:50 [manager.py:224] router recive req id 8 cost time 0.10964083671569824 s -INFO 06-24 20:40:50 [manager.py:68] detokenization recv req id 8 cost time 0.11079764366149902 s -DEBUG 06-24 20:40:50 [manager.py:391] Prefill Batch: batch_id=126637337826036183264262803857920339117, time:1750768850.7508225s req_ids:[8] -DEBUG 06-24 20:40:50 [manager.py:391] -ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:50 lightllm_req_id:8 first_token_cost:383.699893951416ms total_cost_time:383.7423324584961ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14837 prompt_cache_len:5151 prompt_cache_ratio:0.34717260901799557 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 -DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:51 [batch.py:51] router release req id 8 -INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10813140869140625 s -INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11004853248596191 s -DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=61994194845261628209951332147028630730, time:1750768851.141053s req_ids:[8] -DEBUG 06-24 20:40:51 [manager.py:391] -ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:218.3704376220703ms total_cost_time:218.4126377105713ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14838 prompt_cache_len:5151 prompt_cache_ratio:0.3471492114840275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 -DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:51 [batch.py:51] router release req id 8 -INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10895276069641113 s -INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11089301109313965 s -DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=127323199384974285863607263965967918894, time:1750768851.3745673s req_ids:[8] -DEBUG 06-24 20:40:51 [manager.py:391] -ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:228.2085418701172ms total_cost_time:228.25288772583008ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14839 prompt_cache_len:5151 prompt_cache_ratio:0.3471258171035784 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 -DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:51 [batch.py:51] router release req id 8 -INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10844087600708008 s -INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11014747619628906 s -DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=303294219303015022340331324845255028826, time:1750768851.6000302s req_ids:[8] -DEBUG 06-24 20:40:51 [manager.py:391] -ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:217.31138229370117ms total_cost_time:217.35429763793945ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14840 prompt_cache_len:5151 prompt_cache_ratio:0.34710242587601076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 -DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:51 [batch.py:51] router release req id 8 -INFO 06-24 20:40:51 [manager.py:224] router recive req id 8 cost time 0.10945439338684082 s -INFO 06-24 20:40:51 [manager.py:68] detokenization recv req id 8 cost time 0.11120843887329102 s -DEBUG 06-24 20:40:51 [manager.py:391] Prefill Batch: batch_id=48445642108979346594076659743089461924, time:1750768851.8232896s req_ids:[8] -DEBUG 06-24 20:40:51 [manager.py:391] -ERROR 06-24 20:40:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:214.20764923095703ms total_cost_time:214.2500877380371ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14841 prompt_cache_len:5151 prompt_cache_ratio:0.3470790378006873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 -DEBUG 06-24 20:40:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:51 [batch.py:51] router release req id 8 -INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.10936880111694336 s -INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104202270507812 s -DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=205344609554262640986074329798015933901, time:1750768852.0445037s req_ids:[8] -DEBUG 06-24 20:40:52 [manager.py:391] -ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:51 lightllm_req_id:8 first_token_cost:217.4081802368164ms total_cost_time:217.44990348815918ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14842 prompt_cache_len:5151 prompt_cache_ratio:0.34705565287697077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 -DEBUG 06-24 20:40:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:52 [batch.py:51] router release req id 8 -INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.10831856727600098 s -INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11016321182250977 s -DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=226388964299291264210012713446691124446, time:1750768852.2677495s req_ids:[8] -DEBUG 06-24 20:40:52 [manager.py:391] -ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:386.15965843200684ms total_cost_time:386.2185478210449ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:14843 prompt_cache_len:5151 prompt_cache_ratio:0.3470322711042242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 -DEBUG 06-24 20:40:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:52 [batch.py:51] router release req id 8 -INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.10890936851501465 s -INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11061549186706543 s -DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=39207660011688437413472176639765168578, time:1750768852.6611636s req_ids:[8] -DEBUG 06-24 20:40:52 [manager.py:391] -ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:213.16051483154297ms total_cost_time:213.20366859436035ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14844 prompt_cache_len:5151 prompt_cache_ratio:0.3470088924818108 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 -DEBUG 06-24 20:40:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:52 [batch.py:51] router release req id 8 -INFO 06-24 20:40:52 [manager.py:224] router recive req id 8 cost time 0.1093142032623291 s -INFO 06-24 20:40:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104321479797363 s -DEBUG 06-24 20:40:52 [manager.py:391] Prefill Batch: batch_id=45419421682986767023647893075479562899, time:1750768852.879836s req_ids:[8] -DEBUG 06-24 20:40:52 [manager.py:391] -ERROR 06-24 20:40:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:215.76666831970215ms total_cost_time:215.82412719726562ms,out_token_counter:1 mean_per_token_cost_time: 0.05745887756347656ms prompt_token_num:14845 prompt_cache_len:5151 prompt_cache_ratio:0.34698551700909397 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 -DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:53 [batch.py:51] router release req id 8 -INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.10855889320373535 s -INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11037707328796387 s -DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=168844597530939919492288970828221909974, time:1750768853.1053097s req_ids:[8] -DEBUG 06-24 20:40:53 [manager.py:391] -ERROR 06-24 20:40:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:52 lightllm_req_id:8 first_token_cost:213.56678009033203ms total_cost_time:213.608980178833ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14846 prompt_cache_len:5151 prompt_cache_ratio:0.3469621446854372 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 -INFO 06-24 20:40:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:40:53 [statics_utils.py:24] mean first cost: 232.5301736641789 ms -INFO 06-24 20:40:53 [statics_utils.py:24] mean per token cost: 0.05751931347417947 ms -DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:53 [batch.py:51] router release req id 8 -INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.10929441452026367 s -INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11133098602294922 s -DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=21710506362242675556027847984882967294, time:1750768853.323157s req_ids:[8] -DEBUG 06-24 20:40:53 [manager.py:391] -INFO 06-24 20:40:53 [manager.py:620] left req id 8can release False refcount 4 -ERROR 06-24 20:40:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 first_token_cost:214.5230770111084ms total_cost_time:214.5669460296631ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14847 prompt_cache_len:5151 prompt_cache_ratio:0.3469387755102041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 -DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:53 [batch.py:51] router release req id 8 -INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.10895490646362305 s -INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11088037490844727 s -DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=163113732338514847852512582675087241753, time:1750768853.5442932s req_ids:[8] -DEBUG 06-24 20:40:53 [manager.py:391] -ERROR 06-24 20:40:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 first_token_cost:214.05339241027832ms total_cost_time:214.0965461730957ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14848 prompt_cache_len:5151 prompt_cache_ratio:0.3469154094827586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 -DEBUG 06-24 20:40:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:53 [batch.py:51] router release req id 8 -INFO 06-24 20:40:53 [manager.py:224] router recive req id 8 cost time 0.1089470386505127 s -INFO 06-24 20:40:53 [manager.py:68] detokenization recv req id 8 cost time 0.11101794242858887 s -DEBUG 06-24 20:40:53 [manager.py:391] Prefill Batch: batch_id=46519230388910376681851162275862435134, time:1750768853.7657351s req_ids:[8] -DEBUG 06-24 20:40:53 [manager.py:391] -ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:53 lightllm_req_id:8 first_token_cost:381.4725875854492ms total_cost_time:381.5171718597412ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14849 prompt_cache_len:5151 prompt_cache_ratio:0.3468920466024648 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 -DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:54 [batch.py:51] router release req id 8 -INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10893440246582031 s -INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.11097884178161621 s -DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=308334606095917163127620037768145741653, time:1750768854.1530652s req_ids:[8] -DEBUG 06-24 20:40:54 [manager.py:391] -ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:220.64542770385742ms total_cost_time:220.6895351409912ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14850 prompt_cache_len:5151 prompt_cache_ratio:0.3468686868686869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 -DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:54 [batch.py:51] router release req id 8 -INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10790729522705078 s -INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.10994958877563477 s -DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=148226305758453917163101162164592496159, time:1750768854.379973s req_ids:[8] -DEBUG 06-24 20:40:54 [manager.py:391] -ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:218.13464164733887ms total_cost_time:218.17898750305176ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14851 prompt_cache_len:5151 prompt_cache_ratio:0.3468453302807892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 -DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:54 [batch.py:51] router release req id 8 -INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10825991630554199 s -INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.11024022102355957 s -DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=224344271085155326154806314905491487807, time:1750768854.6065054s req_ids:[8] -DEBUG 06-24 20:40:54 [manager.py:391] -ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:215.86012840270996ms total_cost_time:215.90447425842285ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14852 prompt_cache_len:5151 prompt_cache_ratio:0.34682197683813626 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 -DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:54 [batch.py:51] router release req id 8 -INFO 06-24 20:40:54 [manager.py:224] router recive req id 8 cost time 0.10873794555664062 s -INFO 06-24 20:40:54 [manager.py:68] detokenization recv req id 8 cost time 0.11076831817626953 s -DEBUG 06-24 20:40:54 [manager.py:391] Prefill Batch: batch_id=129504238438278196502079877768591557443, time:1750768854.8268094s req_ids:[8] -DEBUG 06-24 20:40:54 [manager.py:391] -ERROR 06-24 20:40:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:213.02270889282227ms total_cost_time:213.06610107421875ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14853 prompt_cache_len:5151 prompt_cache_ratio:0.3467986265400929 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 -DEBUG 06-24 20:40:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:54 [batch.py:51] router release req id 8 -INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.10822749137878418 s -INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11013269424438477 s -DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=252743147959163400454552036055707437162, time:1750768855.058071s req_ids:[8] -DEBUG 06-24 20:40:55 [manager.py:391] -ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:54 lightllm_req_id:8 first_token_cost:392.67754554748535ms total_cost_time:392.72332191467285ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14854 prompt_cache_len:5151 prompt_cache_ratio:0.346775279386024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 -DEBUG 06-24 20:40:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:55 [batch.py:51] router release req id 8 -INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.1092689037322998 s -INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11135411262512207 s -DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=29328081965286694053172317514460251644, time:1750768855.4476278s req_ids:[8] -DEBUG 06-24 20:40:55 [manager.py:391] -ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:218.02377700805664ms total_cost_time:218.06740760803223ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14855 prompt_cache_len:5151 prompt_cache_ratio:0.34675193537529453 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 -DEBUG 06-24 20:40:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:55 [batch.py:51] router release req id 8 -INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.10827946662902832 s -INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11037564277648926 s -DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=254371067539319987672283213949260502640, time:1750768855.67229s req_ids:[8] -DEBUG 06-24 20:40:55 [manager.py:391] -DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:40:55 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:218.77336502075195ms total_cost_time:218.81580352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14856 prompt_cache_len:5151 prompt_cache_ratio:0.3467285945072698 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 -DEBUG 06-24 20:40:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:55 [batch.py:51] router release req id 8 -INFO 06-24 20:40:55 [manager.py:224] router recive req id 8 cost time 0.10887408256530762 s -INFO 06-24 20:40:55 [manager.py:68] detokenization recv req id 8 cost time 0.11081576347351074 s -DEBUG 06-24 20:40:55 [manager.py:391] Prefill Batch: batch_id=208165317810573962612201287272215255784, time:1750768855.895097s req_ids:[8] -DEBUG 06-24 20:40:55 [manager.py:391] -ERROR 06-24 20:40:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:210.76273918151855ms total_cost_time:210.80660820007324ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14857 prompt_cache_len:5151 prompt_cache_ratio:0.3467052567813152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 -DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:56 [batch.py:51] router release req id 8 -INFO 06-24 20:40:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s -INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.11038661003112793 s -DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=186937536360240095596580753588514012725, time:1750768856.1157286s req_ids:[8] -DEBUG 06-24 20:40:56 [manager.py:391] -ERROR 06-24 20:40:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:55 lightllm_req_id:8 first_token_cost:217.02289581298828ms total_cost_time:217.06581115722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14858 prompt_cache_len:5151 prompt_cache_ratio:0.34668192219679633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 -DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:56 [batch.py:51] router release req id 8 -INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10913658142089844 s -INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.11117696762084961 s -DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=90944576786277947240352311636465545112, time:1750768856.3375335s req_ids:[8] -DEBUG 06-24 20:40:56 [manager.py:391] -ERROR 06-24 20:40:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 first_token_cost:387.88771629333496ms total_cost_time:387.93373107910156ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14859 prompt_cache_len:5151 prompt_cache_ratio:0.3466585907530789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 -DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:56 [batch.py:51] router release req id 8 -INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10756540298461914 s -INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.10944414138793945 s -DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=130144033473181817960502432533194540488, time:1750768856.7327209s req_ids:[8] -DEBUG 06-24 20:40:56 [manager.py:391] -ERROR 06-24 20:40:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 first_token_cost:216.43495559692383ms total_cost_time:216.4778709411621ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14860 prompt_cache_len:5151 prompt_cache_ratio:0.3466352624495289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 -DEBUG 06-24 20:40:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:56 [batch.py:51] router release req id 8 -INFO 06-24 20:40:56 [manager.py:224] router recive req id 8 cost time 0.10744452476501465 s -INFO 06-24 20:40:56 [manager.py:68] detokenization recv req id 8 cost time 0.10934257507324219 s -DEBUG 06-24 20:40:56 [manager.py:391] Prefill Batch: batch_id=325705179179017476770047909346264951467, time:1750768856.9551404s req_ids:[8] -DEBUG 06-24 20:40:56 [manager.py:391] -ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:56 lightllm_req_id:8 first_token_cost:216.51363372802734ms total_cost_time:216.55678749084473ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14861 prompt_cache_len:5151 prompt_cache_ratio:0.34661193728551243 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 -DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:57 [batch.py:51] router release req id 8 -INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.10899615287780762 s -INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11096930503845215 s -DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=310223895000534510674887324135487974484, time:1750768857.1789212s req_ids:[8] -DEBUG 06-24 20:40:57 [manager.py:391] -ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:215.5017852783203ms total_cost_time:215.5442237854004ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14862 prompt_cache_len:5151 prompt_cache_ratio:0.34658861526039564 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 -DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:57 [batch.py:51] router release req id 8 -INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.1082003116607666 s -INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11026120185852051 s -DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=43106765343510030714209204094423593191, time:1750768857.3989134s req_ids:[8] -DEBUG 06-24 20:40:57 [manager.py:391] -ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:216.39633178710938ms total_cost_time:216.43972396850586ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14863 prompt_cache_len:5151 prompt_cache_ratio:0.34656529637354505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 -DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:57 [batch.py:51] router release req id 8 -INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.10869336128234863 s -INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11070585250854492 s -DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=194386802533934419758830174697754777399, time:1750768857.6217022s req_ids:[8] -DEBUG 06-24 20:40:57 [manager.py:391] -ERROR 06-24 20:40:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:220.0336456298828ms total_cost_time:220.0767993927002ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14864 prompt_cache_len:5151 prompt_cache_ratio:0.34654198062432723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 -DEBUG 06-24 20:40:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:57 [batch.py:51] router release req id 8 -INFO 06-24 20:40:57 [manager.py:224] router recive req id 8 cost time 0.10886096954345703 s -INFO 06-24 20:40:57 [manager.py:68] detokenization recv req id 8 cost time 0.11061978340148926 s -DEBUG 06-24 20:40:57 [manager.py:391] Prefill Batch: batch_id=70435799213386712513996817956545656973, time:1750768857.848022s req_ids:[8] -DEBUG 06-24 20:40:57 [manager.py:391] -ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:57 lightllm_req_id:8 first_token_cost:382.7171325683594ms total_cost_time:382.7638626098633ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:14865 prompt_cache_len:5151 prompt_cache_ratio:0.34651866801210895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 -DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:58 [batch.py:51] router release req id 8 -INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10949373245239258 s -INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.11149787902832031 s -DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=51790543177241029601962955433113348307, time:1750768858.2367487s req_ids:[8] -DEBUG 06-24 20:40:58 [manager.py:391] -ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:216.0036563873291ms total_cost_time:216.04585647583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14866 prompt_cache_len:5151 prompt_cache_ratio:0.34649535853625724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 -DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:58 [batch.py:51] router release req id 8 -INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10840249061584473 s -INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.11014723777770996 s -DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=97050191559475073871642634152391601779, time:1750768858.459972s req_ids:[8] -DEBUG 06-24 20:40:58 [manager.py:391] -ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:216.7670726776123ms total_cost_time:216.80903434753418ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14867 prompt_cache_len:5151 prompt_cache_ratio:0.3464720521961391 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 -DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:58 [batch.py:51] router release req id 8 -INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10908746719360352 s -INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.11095929145812988 s -DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=137586483109649023358936739097714199793, time:1750768858.691904s req_ids:[8] -DEBUG 06-24 20:40:58 [manager.py:391] -ERROR 06-24 20:40:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:225.36873817443848ms total_cost_time:225.41213035583496ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14868 prompt_cache_len:5151 prompt_cache_ratio:0.3464487489911219 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 -DEBUG 06-24 20:40:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:58 [batch.py:51] router release req id 8 -INFO 06-24 20:40:58 [manager.py:224] router recive req id 8 cost time 0.10814285278320312 s -INFO 06-24 20:40:58 [manager.py:68] detokenization recv req id 8 cost time 0.1098334789276123 s -DEBUG 06-24 20:40:58 [manager.py:391] Prefill Batch: batch_id=52654648932681713816636337981151815318, time:1750768858.9151597s req_ids:[8] -DEBUG 06-24 20:40:58 [manager.py:391] -ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:58 lightllm_req_id:8 first_token_cost:213.58013153076172ms total_cost_time:213.6240005493164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14869 prompt_cache_len:5151 prompt_cache_ratio:0.34642544892057303 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 -DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:59 [batch.py:51] router release req id 8 -INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.1082308292388916 s -INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101691722869873 s -DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=203150616436167594968624190923035975230, time:1750768859.1396341s req_ids:[8] -DEBUG 06-24 20:40:59 [manager.py:391] -ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:216.9501781463623ms total_cost_time:216.9930934906006ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14870 prompt_cache_len:5151 prompt_cache_ratio:0.3464021519838601 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 -DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:59 [batch.py:51] router release req id 8 -INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.10931873321533203 s -INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.11124873161315918 s -DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=167031256084227916068579839790065652491, time:1750768859.358784s req_ids:[8] -DEBUG 06-24 20:40:59 [manager.py:391] -ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:375.6697177886963ms total_cost_time:375.713586807251ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14871 prompt_cache_len:5151 prompt_cache_ratio:0.346378858180351 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 -DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:59 [batch.py:51] router release req id 8 -INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.10915827751159668 s -INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.11115407943725586 s -DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=136760883050539528999813811172541162904, time:1750768859.7418325s req_ids:[8] -DEBUG 06-24 20:40:59 [manager.py:391] -ERROR 06-24 20:40:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:40:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:40:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:212.62288093566895ms total_cost_time:212.66746520996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14872 prompt_cache_len:5151 prompt_cache_ratio:0.34635556750941365 mtp_avg_token_per_step:1.0 -INFO 06-24 20:40:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 -DEBUG 06-24 20:40:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:40:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:40:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:40:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:40:59 [batch.py:51] router release req id 8 -INFO 06-24 20:40:59 [manager.py:224] router recive req id 8 cost time 0.10931992530822754 s -INFO 06-24 20:40:59 [manager.py:68] detokenization recv req id 8 cost time 0.11138033866882324 s -DEBUG 06-24 20:40:59 [manager.py:391] Prefill Batch: batch_id=52999980440188132058692401983966633638, time:1750768859.9696s req_ids:[8] -DEBUG 06-24 20:40:59 [manager.py:391] -ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:40:59 lightllm_req_id:8 first_token_cost:226.6998291015625ms total_cost_time:226.74322128295898ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14873 prompt_cache_len:5151 prompt_cache_ratio:0.3463322799704162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 -DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:00 [batch.py:51] router release req id 8 -INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10899138450622559 s -INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.11091923713684082 s -DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=30340022811735612688987866621317335719, time:1750768860.1935837s req_ids:[8] -DEBUG 06-24 20:41:00 [manager.py:391] -DEBUG 06-24 20:41:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 59351.138 tokens/s -DEBUG 06-24 20:41:00 [stats.py:37] Avg prompt tokens throughput: 59343.248 tokens/s -DEBUG 06-24 20:41:00 [stats.py:37] Avg generate tokens throughput: 7.890 tokens/s -ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:215.76189994812012ms total_cost_time:215.8055305480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14874 prompt_cache_len:5151 prompt_cache_ratio:0.34630899556272693 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 -DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:00 [batch.py:51] router release req id 8 -INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10855937004089355 s -INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.10974311828613281 s -DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=24440539916409399144162206739061709299, time:1750768860.4161408s req_ids:[8] -DEBUG 06-24 20:41:00 [manager.py:391] -ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:214.29109573364258ms total_cost_time:214.33401107788086ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14875 prompt_cache_len:5151 prompt_cache_ratio:0.3462857142857143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 -DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:00 [batch.py:51] router release req id 8 -INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10857391357421875 s -INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.11061358451843262 s -DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=162360816456284702805959460382544291702, time:1750768860.6371193s req_ids:[8] -DEBUG 06-24 20:41:00 [manager.py:391] -ERROR 06-24 20:41:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:214.96176719665527ms total_cost_time:215.00444412231445ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14876 prompt_cache_len:5151 prompt_cache_ratio:0.34626243613874697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 -DEBUG 06-24 20:41:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:00 [batch.py:51] router release req id 8 -INFO 06-24 20:41:00 [manager.py:224] router recive req id 8 cost time 0.10768413543701172 s -INFO 06-24 20:41:00 [manager.py:68] detokenization recv req id 8 cost time 0.10979032516479492 s -DEBUG 06-24 20:41:00 [manager.py:391] Prefill Batch: batch_id=109887392837987005072029733850974912145, time:1750768860.8588235s req_ids:[8] -DEBUG 06-24 20:41:00 [manager.py:391] -ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:00 lightllm_req_id:8 first_token_cost:382.4770450592041ms total_cost_time:382.5194835662842ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14877 prompt_cache_len:5151 prompt_cache_ratio:0.3462391611211938 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 -DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:01 [batch.py:51] router release req id 8 -INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.10885167121887207 s -INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.11104369163513184 s -DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=107355060012968455688088632834658166087, time:1750768861.2476895s req_ids:[8] -DEBUG 06-24 20:41:01 [manager.py:391] -ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:216.83907508850098ms total_cost_time:216.88246726989746ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14878 prompt_cache_len:5151 prompt_cache_ratio:0.3462158892324237 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 -DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:01 [batch.py:51] router release req id 8 -INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.10940670967102051 s -INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.11199784278869629 s -DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=151973044893258421316056284232615194776, time:1750768861.4725146s req_ids:[8] -DEBUG 06-24 20:41:01 [manager.py:391] -ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:214.2469882965088ms total_cost_time:214.28942680358887ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14879 prompt_cache_len:5151 prompt_cache_ratio:0.3461926204718059 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 -DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:01 [batch.py:51] router release req id 8 -INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.1088857650756836 s -INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.11100530624389648 s -DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=265633169987935817030960517438594143582, time:1750768861.6918795s req_ids:[8] -DEBUG 06-24 20:41:01 [manager.py:391] -ERROR 06-24 20:41:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:212.5692367553711ms total_cost_time:212.61310577392578ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14880 prompt_cache_len:5151 prompt_cache_ratio:0.3461693548387097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 -DEBUG 06-24 20:41:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:01 [batch.py:51] router release req id 8 -INFO 06-24 20:41:01 [manager.py:224] router recive req id 8 cost time 0.10915040969848633 s -INFO 06-24 20:41:01 [manager.py:68] detokenization recv req id 8 cost time 0.1112222671508789 s -DEBUG 06-24 20:41:01 [manager.py:391] Prefill Batch: batch_id=66353668469187788581520762286825745195, time:1750768861.911734s req_ids:[8] -DEBUG 06-24 20:41:01 [manager.py:391] -DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:01 lightllm_req_id:8 first_token_cost:215.39020538330078ms total_cost_time:215.43145179748535ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14881 prompt_cache_len:5151 prompt_cache_ratio:0.34614609233250454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 -DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:02 [batch.py:51] router release req id 8 -INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.10875511169433594 s -INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.11082077026367188 s -DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=87726903341641546901883822299707514518, time:1750768862.1344776s req_ids:[8] -DEBUG 06-24 20:41:02 [manager.py:391] -ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:216.63475036621094ms total_cost_time:216.67766571044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14882 prompt_cache_len:5151 prompt_cache_ratio:0.34612283295256013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 -DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:02 [batch.py:51] router release req id 8 -INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.2092437744140625 s -INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.21091532707214355 s -DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=95319859116897128760894535842799485515, time:1750768862.490029s req_ids:[8] -DEBUG 06-24 20:41:02 [manager.py:391] -ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:329.2872905731201ms total_cost_time:329.3297290802002ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14883 prompt_cache_len:5151 prompt_cache_ratio:0.3460995766982463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 -DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:02 [batch.py:51] router release req id 8 -INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.10882949829101562 s -INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.1108694076538086 s -DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=43123884381091590076306023372049116752, time:1750768862.6929505s req_ids:[8] -DEBUG 06-24 20:41:02 [manager.py:391] -ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:211.79747581481934ms total_cost_time:211.8396759033203ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:14884 prompt_cache_len:5151 prompt_cache_ratio:0.3460763235689331 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 -DEBUG 06-24 20:41:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:02 [batch.py:51] router release req id 8 -INFO 06-24 20:41:02 [manager.py:224] router recive req id 8 cost time 0.10748481750488281 s -INFO 06-24 20:41:02 [manager.py:68] detokenization recv req id 8 cost time 0.10918593406677246 s -DEBUG 06-24 20:41:02 [manager.py:391] Prefill Batch: batch_id=23118814973476388619837085834386101701, time:1750768862.9101026s req_ids:[8] -DEBUG 06-24 20:41:02 [manager.py:391] -ERROR 06-24 20:41:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:171.4460849761963ms total_cost_time:171.48756980895996ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:14885 prompt_cache_len:5151 prompt_cache_ratio:0.3460530735639906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 -DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:03 [batch.py:51] router release req id 8 -INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.10857176780700684 s -INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.11064577102661133 s -DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=186301524104673297330674387919432549129, time:1750768863.0887148s req_ids:[8] -DEBUG 06-24 20:41:03 [manager.py:391] -ERROR 06-24 20:41:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:02 lightllm_req_id:8 first_token_cost:209.23161506652832ms total_cost_time:209.2759609222412ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14886 prompt_cache_len:5151 prompt_cache_ratio:0.3460298266827892 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 -DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:03 [batch.py:51] router release req id 8 -INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.10918450355529785 s -INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.1112356185913086 s -DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=242582471906023648059019302265076176598, time:1750768863.3153472s req_ids:[8] -DEBUG 06-24 20:41:03 [manager.py:391] -ERROR 06-24 20:41:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 first_token_cost:231.90569877624512ms total_cost_time:231.9498062133789ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14887 prompt_cache_len:5151 prompt_cache_ratio:0.3460065829246994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 -DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:03 [batch.py:51] router release req id 8 -INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.3105282783508301 s -INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.31249284744262695 s -DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=111972613815247801845076089116162334219, time:1750768863.7491968s req_ids:[8] -DEBUG 06-24 20:41:03 [manager.py:391] -ERROR 06-24 20:41:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 first_token_cost:432.4631690979004ms total_cost_time:432.5079917907715ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14888 prompt_cache_len:5151 prompt_cache_ratio:0.3459833422890919 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 -DEBUG 06-24 20:41:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:03 [batch.py:51] router release req id 8 -INFO 06-24 20:41:03 [manager.py:224] router recive req id 8 cost time 0.10958552360534668 s -INFO 06-24 20:41:03 [manager.py:68] detokenization recv req id 8 cost time 0.11178851127624512 s -DEBUG 06-24 20:41:03 [manager.py:391] Prefill Batch: batch_id=174520694503096606808550088945943102036, time:1750768863.98159s req_ids:[8] -DEBUG 06-24 20:41:03 [manager.py:391] -ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:03 lightllm_req_id:8 first_token_cost:219.44189071655273ms total_cost_time:219.4845676422119ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14889 prompt_cache_len:5151 prompt_cache_ratio:0.3459601047753375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 -DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:04 [batch.py:51] router release req id 8 -INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10886383056640625 s -INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11073994636535645 s -DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=250893711619579312756076361067948681755, time:1750768864.2066488s req_ids:[8] -DEBUG 06-24 20:41:04 [manager.py:391] -ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:215.14010429382324ms total_cost_time:215.18373489379883ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14890 prompt_cache_len:5151 prompt_cache_ratio:0.34593687038280724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 -DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:04 [batch.py:51] router release req id 8 -INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10869145393371582 s -INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11074542999267578 s -DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=276314252462958894868404663225309688840, time:1750768864.4302382s req_ids:[8] -DEBUG 06-24 20:41:04 [manager.py:391] -ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:211.64703369140625ms total_cost_time:211.69137954711914ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14891 prompt_cache_len:5151 prompt_cache_ratio:0.34591363911087236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 -DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:04 [batch.py:51] router release req id 8 -INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10960793495178223 s -INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11159968376159668 s -DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=216745425805572535129557559109168650304, time:1750768864.649254s req_ids:[8] -DEBUG 06-24 20:41:04 [manager.py:391] -ERROR 06-24 20:41:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:212.6913070678711ms total_cost_time:212.7358913421631ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14892 prompt_cache_len:5151 prompt_cache_ratio:0.3458904109589041 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 -DEBUG 06-24 20:41:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:04 [batch.py:51] router release req id 8 -INFO 06-24 20:41:04 [manager.py:224] router recive req id 8 cost time 0.10870218276977539 s -INFO 06-24 20:41:04 [manager.py:68] detokenization recv req id 8 cost time 0.11073541641235352 s -DEBUG 06-24 20:41:04 [manager.py:391] Prefill Batch: batch_id=267529852153740872475190704003029324575, time:1750768864.8791468s req_ids:[8] -DEBUG 06-24 20:41:04 [manager.py:391] -ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:04 lightllm_req_id:8 first_token_cost:392.27843284606934ms total_cost_time:392.322301864624ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14893 prompt_cache_len:5151 prompt_cache_ratio:0.3458671859262741 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 -DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:05 [batch.py:51] router release req id 8 -DEBUG 06-24 20:41:05 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:05 [manager.py:283] -DEBUG 06-24 20:41:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:05 [manager.py:284] -INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10910820960998535 s -INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.11134052276611328 s -DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=32897946877815333385498523328267399539, time:1750768865.2683203s req_ids:[8] -DEBUG 06-24 20:41:05 [manager.py:391] -ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:217.84400939941406ms total_cost_time:217.88668632507324ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14894 prompt_cache_len:5151 prompt_cache_ratio:0.34584396401235395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 -DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:05 [batch.py:51] router release req id 8 -INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10877370834350586 s -INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.1107935905456543 s -DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=4280161830768621835617159168658818268, time:1750768865.493105s req_ids:[8] -DEBUG 06-24 20:41:05 [manager.py:391] -ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:209.0744972229004ms total_cost_time:209.120512008667ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:14895 prompt_cache_len:5151 prompt_cache_ratio:0.3458207452165156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 -DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:05 [batch.py:51] router release req id 8 -INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10905838012695312 s -INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.11100506782531738 s -DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=112935126093636079472208406474335395449, time:1750768865.7077105s req_ids:[8] -DEBUG 06-24 20:41:05 [manager.py:391] -ERROR 06-24 20:41:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:212.75758743286133ms total_cost_time:212.8005027770996ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14896 prompt_cache_len:5151 prompt_cache_ratio:0.34579752953813103 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 -DEBUG 06-24 20:41:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:05 [batch.py:51] router release req id 8 -INFO 06-24 20:41:05 [manager.py:224] router recive req id 8 cost time 0.10890340805053711 s -INFO 06-24 20:41:05 [manager.py:68] detokenization recv req id 8 cost time 0.11079239845275879 s -DEBUG 06-24 20:41:05 [manager.py:391] Prefill Batch: batch_id=253698152045727825433128296209084539004, time:1750768865.927933s req_ids:[8] -DEBUG 06-24 20:41:05 [manager.py:391] -ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:05 lightllm_req_id:8 first_token_cost:221.4500904083252ms total_cost_time:221.49395942687988ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14897 prompt_cache_len:5151 prompt_cache_ratio:0.34577431697657246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 -DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:06 [batch.py:51] router release req id 8 -INFO 06-24 20:41:06 [manager.py:224] router recive req id 8 cost time 0.10950064659118652 s -INFO 06-24 20:41:06 [manager.py:68] detokenization recv req id 8 cost time 0.11147761344909668 s -DEBUG 06-24 20:41:06 [manager.py:391] Prefill Batch: batch_id=145636469247426338582433858355420701122, time:1750768866.155177s req_ids:[8] -DEBUG 06-24 20:41:06 [manager.py:391] -ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:212.5387191772461ms total_cost_time:212.58234977722168ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14898 prompt_cache_len:5151 prompt_cache_ratio:0.34575110753121224 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 -DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:06 [batch.py:51] router release req id 8 -INFO 06-24 20:41:06 [manager.py:224] router recive req id 8 cost time 0.10746288299560547 s -INFO 06-24 20:41:06 [manager.py:68] detokenization recv req id 8 cost time 0.1094825267791748 s -DEBUG 06-24 20:41:06 [manager.py:391] Prefill Batch: batch_id=43069594315191217862609225695507154677, time:1750768866.3785434s req_ids:[8] -DEBUG 06-24 20:41:06 [manager.py:391] -ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:416.4903163909912ms total_cost_time:416.5332317352295ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14899 prompt_cache_len:5151 prompt_cache_ratio:0.34572790120142294 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 -DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:06 [batch.py:51] router release req id 8 -INFO 06-24 20:41:06 [manager.py:224] router recive req id 8 cost time 0.10793209075927734 s -INFO 06-24 20:41:06 [manager.py:68] detokenization recv req id 8 cost time 0.10999417304992676 s -DEBUG 06-24 20:41:06 [manager.py:391] Prefill Batch: batch_id=221285560943699036228107238866254889367, time:1750768866.8026779s req_ids:[8] -DEBUG 06-24 20:41:06 [manager.py:391] -ERROR 06-24 20:41:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:221.15278244018555ms total_cost_time:221.19665145874023ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14900 prompt_cache_len:5151 prompt_cache_ratio:0.34570469798657716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 -DEBUG 06-24 20:41:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:06 [batch.py:51] router release req id 8 -INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.10841536521911621 s -INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.11042475700378418 s -DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=36033659262829388529023374209089953215, time:1750768867.0295746s req_ids:[8] -DEBUG 06-24 20:41:07 [manager.py:391] -ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:06 lightllm_req_id:8 first_token_cost:216.06040000915527ms total_cost_time:216.10331535339355ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14901 prompt_cache_len:5151 prompt_cache_ratio:0.3456814978860479 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 -DEBUG 06-24 20:41:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:07 [batch.py:51] router release req id 8 -INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.10922980308532715 s -INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.11124420166015625 s -DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=270315735946108243168961941880375939391, time:1750768867.2514284s req_ids:[8] -DEBUG 06-24 20:41:07 [manager.py:391] -ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:214.87140655517578ms total_cost_time:214.89405632019043ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:14902 prompt_cache_len:5151 prompt_cache_ratio:0.3456583008992082 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 -DEBUG 06-24 20:41:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:07 [batch.py:51] router release req id 8 -INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.10830330848693848 s -INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.11036920547485352 s -DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=193771079240656229269882947576340212010, time:1750768867.4719985s req_ids:[8] -DEBUG 06-24 20:41:07 [manager.py:391] -ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:219.62523460388184ms total_cost_time:219.6676731109619ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14903 prompt_cache_len:5151 prompt_cache_ratio:0.3456351070254311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 -DEBUG 06-24 20:41:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:07 [batch.py:51] router release req id 8 -INFO 06-24 20:41:07 [manager.py:224] router recive req id 8 cost time 0.11028480529785156 s -INFO 06-24 20:41:07 [manager.py:68] detokenization recv req id 8 cost time 0.1122591495513916 s -DEBUG 06-24 20:41:07 [manager.py:391] Prefill Batch: batch_id=291142253853659683131321862920721975389, time:1750768867.6973171s req_ids:[8] -DEBUG 06-24 20:41:07 [manager.py:391] -ERROR 06-24 20:41:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:395.46918869018555ms total_cost_time:395.57647705078125ms,out_token_counter:1 mean_per_token_cost_time: 0.10728836059570312ms prompt_token_num:14904 prompt_cache_len:5151 prompt_cache_ratio:0.34561191626409016 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 -DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:08 [batch.py:51] router release req id 8 -INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.1080937385559082 s -INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.10995817184448242 s -DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=215810424577079131747000277978612690255, time:1750768868.0994499s req_ids:[8] -DEBUG 06-24 20:41:08 [manager.py:391] -ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:07 lightllm_req_id:8 first_token_cost:173.93827438354492ms total_cost_time:173.9802360534668ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:14905 prompt_cache_len:5151 prompt_cache_ratio:0.34558872861455886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 -DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:08 [batch.py:51] router release req id 8 -INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.10782909393310547 s -INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.10982394218444824 s -DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=118800833338764372539361865488163742873, time:1750768868.2800276s req_ids:[8] -DEBUG 06-24 20:41:08 [manager.py:391] -DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:08 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:215.7881259918213ms total_cost_time:215.82984924316406ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14906 prompt_cache_len:5151 prompt_cache_ratio:0.34556554407621093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 -DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:08 [batch.py:51] router release req id 8 -INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.10789012908935547 s -INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.1098940372467041 s -DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=175348578824830694084324027796760366306, time:1750768868.5019732s req_ids:[8] -DEBUG 06-24 20:41:08 [manager.py:391] -ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:229.76255416870117ms total_cost_time:229.80570793151855ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14907 prompt_cache_len:5151 prompt_cache_ratio:0.3455423626484202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 -DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:08 [batch.py:51] router release req id 8 -INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.10945582389831543 s -INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.11144566535949707 s -DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=61457639929847069873615895232070059774, time:1750768868.7436316s req_ids:[8] -DEBUG 06-24 20:41:08 [manager.py:391] -ERROR 06-24 20:41:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:219.81477737426758ms total_cost_time:219.85983848571777ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14908 prompt_cache_len:5151 prompt_cache_ratio:0.3455191843305608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 -DEBUG 06-24 20:41:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:08 [batch.py:51] router release req id 8 -INFO 06-24 20:41:08 [manager.py:224] router recive req id 8 cost time 0.1083064079284668 s -INFO 06-24 20:41:08 [manager.py:68] detokenization recv req id 8 cost time 0.1102449893951416 s -DEBUG 06-24 20:41:08 [manager.py:391] Prefill Batch: batch_id=293132026085252512129214121529498567958, time:1750768868.9670632s req_ids:[8] -DEBUG 06-24 20:41:08 [manager.py:391] -ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:08 lightllm_req_id:8 first_token_cost:218.78480911254883ms total_cost_time:218.827486038208ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14909 prompt_cache_len:5151 prompt_cache_ratio:0.34549600912200684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 -DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:09 [batch.py:51] router release req id 8 -INFO 06-24 20:41:09 [manager.py:224] router recive req id 8 cost time 0.10799765586853027 s -INFO 06-24 20:41:09 [manager.py:68] detokenization recv req id 8 cost time 0.10994482040405273 s -DEBUG 06-24 20:41:09 [manager.py:391] Prefill Batch: batch_id=62410351373127482399835636552348370672, time:1750768869.1899393s req_ids:[8] -DEBUG 06-24 20:41:09 [manager.py:391] -ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:387.9415988922119ms total_cost_time:387.9833221435547ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14910 prompt_cache_len:5151 prompt_cache_ratio:0.3454728370221328 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 -DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:09 [batch.py:51] router release req id 8 -INFO 06-24 20:41:09 [manager.py:224] router recive req id 8 cost time 0.1089627742767334 s -INFO 06-24 20:41:09 [manager.py:68] detokenization recv req id 8 cost time 0.11092758178710938 s -DEBUG 06-24 20:41:09 [manager.py:391] Prefill Batch: batch_id=252868431862889257448383693773606786921, time:1750768869.5886948s req_ids:[8] -DEBUG 06-24 20:41:09 [manager.py:391] -ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:223.01387786865234ms total_cost_time:223.05750846862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14911 prompt_cache_len:5151 prompt_cache_ratio:0.3454496680303132 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 -DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:09 [batch.py:51] router release req id 8 -INFO 06-24 20:41:09 [manager.py:224] router recive req id 8 cost time 0.10886788368225098 s -INFO 06-24 20:41:09 [manager.py:68] detokenization recv req id 8 cost time 0.11022806167602539 s -DEBUG 06-24 20:41:09 [manager.py:391] Prefill Batch: batch_id=81260551969142300189781994789688793109, time:1750768869.8172355s req_ids:[8] -DEBUG 06-24 20:41:09 [manager.py:391] -ERROR 06-24 20:41:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:211.57002449035645ms total_cost_time:211.61293983459473ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14912 prompt_cache_len:5151 prompt_cache_ratio:0.34542650214592274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 -DEBUG 06-24 20:41:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:09 [batch.py:51] router release req id 8 -INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.1091761589050293 s -INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11064815521240234 s -DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=159757178682864389425469575912161281887, time:1750768870.0343378s req_ids:[8] -DEBUG 06-24 20:41:10 [manager.py:391] -ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:09 lightllm_req_id:8 first_token_cost:214.37954902648926ms total_cost_time:214.42389488220215ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14913 prompt_cache_len:5151 prompt_cache_ratio:0.34540333936833634 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 -DEBUG 06-24 20:41:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:10 [batch.py:51] router release req id 8 -INFO 06-24 20:41:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.10974812507629395 s -INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11115741729736328 s -DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=93185547198237627460301643082472967646, time:1750768870.255305s req_ids:[8] -DEBUG 06-24 20:41:10 [manager.py:391] -DEBUG 06-24 20:41:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 59220.603 tokens/s -DEBUG 06-24 20:41:10 [stats.py:37] Avg prompt tokens throughput: 59212.652 tokens/s -DEBUG 06-24 20:41:10 [stats.py:37] Avg generate tokens throughput: 7.951 tokens/s -ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:217.26322174072266ms total_cost_time:217.30971336364746ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:14914 prompt_cache_len:5151 prompt_cache_ratio:0.3453801796969291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 -DEBUG 06-24 20:41:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:10 [batch.py:51] router release req id 8 -INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.10945010185241699 s -INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11086177825927734 s -DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=52968974788723638813367291977635059153, time:1750768870.4805448s req_ids:[8] -DEBUG 06-24 20:41:10 [manager.py:391] -ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:220.19219398498535ms total_cost_time:220.23916244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14915 prompt_cache_len:5151 prompt_cache_ratio:0.3453570231310761 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 -DEBUG 06-24 20:41:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:10 [batch.py:51] router release req id 8 -INFO 06-24 20:41:10 [manager.py:224] router recive req id 8 cost time 0.10928606986999512 s -INFO 06-24 20:41:10 [manager.py:68] detokenization recv req id 8 cost time 0.11086273193359375 s -DEBUG 06-24 20:41:10 [manager.py:391] Prefill Batch: batch_id=46282328124414315388060674623020798475, time:1750768870.7209675s req_ids:[8] -DEBUG 06-24 20:41:10 [manager.py:391] -ERROR 06-24 20:41:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:400.47430992126465ms total_cost_time:400.5169868469238ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14916 prompt_cache_len:5151 prompt_cache_ratio:0.34533386967015284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 -DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:11 [batch.py:51] router release req id 8 -INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10915231704711914 s -INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.11057043075561523 s -DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=159259871218461477861587348394248744543, time:1750768871.1156638s req_ids:[8] -DEBUG 06-24 20:41:11 [manager.py:391] -ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:10 lightllm_req_id:8 first_token_cost:219.57087516784668ms total_cost_time:219.61426734924316ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14917 prompt_cache_len:5151 prompt_cache_ratio:0.3453107193135349 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 -DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:11 [batch.py:51] router release req id 8 -INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10951471328735352 s -INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.11063146591186523 s -DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=34914395714947755613268331326873610894, time:1750768871.3389401s req_ids:[8] -DEBUG 06-24 20:41:11 [manager.py:391] -ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:216.02869033813477ms total_cost_time:216.07255935668945ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14918 prompt_cache_len:5151 prompt_cache_ratio:0.34528757206059796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 -DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:11 [batch.py:51] router release req id 8 -INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10887789726257324 s -INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.10990095138549805 s -DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=109201385768949674141668179527031702705, time:1750768871.5622141s req_ids:[8] -DEBUG 06-24 20:41:11 [manager.py:391] -ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:218.50013732910156ms total_cost_time:218.54257583618164ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14919 prompt_cache_len:5151 prompt_cache_ratio:0.3452644279107179 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 -DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:11 [batch.py:51] router release req id 8 -INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.11032557487487793 s -INFO 06-24 20:41:11 [manager.py:68] detokenization recv req id 8 cost time 0.11227774620056152 s -DEBUG 06-24 20:41:11 [manager.py:391] Prefill Batch: batch_id=321789107968804759616621093226387698477, time:1750768871.786662s req_ids:[8] -DEBUG 06-24 20:41:11 [manager.py:391] -ERROR 06-24 20:41:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:211.0130786895752ms total_cost_time:211.05694770812988ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14920 prompt_cache_len:5151 prompt_cache_ratio:0.34524128686327077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 -DEBUG 06-24 20:41:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:11 [batch.py:51] router release req id 8 -INFO 06-24 20:41:11 [manager.py:224] router recive req id 8 cost time 0.10947847366333008 s -INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11155867576599121 s -DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=329240170780994265122548500954997707267, time:1750768872.0110292s req_ids:[8] -DEBUG 06-24 20:41:12 [manager.py:391] -ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:11 lightllm_req_id:8 first_token_cost:396.0099220275879ms total_cost_time:396.0533142089844ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14921 prompt_cache_len:5151 prompt_cache_ratio:0.34521814891763286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 -DEBUG 06-24 20:41:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:12 [batch.py:51] router release req id 8 -INFO 06-24 20:41:12 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s -INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11045980453491211 s -DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=245209820497953646340639040012631700817, time:1750768872.4154806s req_ids:[8] -DEBUG 06-24 20:41:12 [manager.py:391] -ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:231.16087913513184ms total_cost_time:231.20498657226562ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14922 prompt_cache_len:5151 prompt_cache_ratio:0.34519501407318054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 -DEBUG 06-24 20:41:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:12 [batch.py:51] router release req id 8 -INFO 06-24 20:41:12 [manager.py:224] router recive req id 8 cost time 0.10846066474914551 s -INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11057519912719727 s -DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=147912887620968984763564838889853909967, time:1750768872.6462255s req_ids:[8] -DEBUG 06-24 20:41:12 [manager.py:391] -ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:219.98310089111328ms total_cost_time:220.02649307250977ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14923 prompt_cache_len:5151 prompt_cache_ratio:0.34517188232929036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 -DEBUG 06-24 20:41:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:12 [batch.py:51] router release req id 8 -INFO 06-24 20:41:12 [manager.py:224] router recive req id 8 cost time 0.10841870307922363 s -INFO 06-24 20:41:12 [manager.py:68] detokenization recv req id 8 cost time 0.11042213439941406 s -DEBUG 06-24 20:41:12 [manager.py:391] Prefill Batch: batch_id=175272184762289720530426725224908876527, time:1750768872.8709216s req_ids:[8] -DEBUG 06-24 20:41:12 [manager.py:391] -ERROR 06-24 20:41:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:216.45331382751465ms total_cost_time:216.49813652038574ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14924 prompt_cache_len:5151 prompt_cache_ratio:0.34514875368533904 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 -DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:13 [batch.py:51] router release req id 8 -INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.10924768447875977 s -INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11127924919128418 s -DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=191603966747855760707361989097258415913, time:1750768873.0948272s req_ids:[8] -DEBUG 06-24 20:41:13 [manager.py:391] -ERROR 06-24 20:41:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:12 lightllm_req_id:8 first_token_cost:217.01836585998535ms total_cost_time:217.06295013427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14925 prompt_cache_len:5151 prompt_cache_ratio:0.3451256281407035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 -DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:13 [batch.py:51] router release req id 8 -INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.10977792739868164 s -INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11204981803894043 s -DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=293782200822397432423555264145994325254, time:1750768873.3185103s req_ids:[8] -DEBUG 06-24 20:41:13 [manager.py:391] -ERROR 06-24 20:41:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 first_token_cost:217.24915504455566ms total_cost_time:217.29230880737305ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14926 prompt_cache_len:5151 prompt_cache_ratio:0.3451025056947608 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 -DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:13 [batch.py:51] router release req id 8 -INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.1117391586303711 s -INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11395692825317383 s -DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=289601180895871672955601282572724058808, time:1750768873.5444384s req_ids:[8] -DEBUG 06-24 20:41:13 [manager.py:391] -ERROR 06-24 20:41:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 first_token_cost:389.62244987487793ms total_cost_time:389.6653652191162ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14927 prompt_cache_len:5151 prompt_cache_ratio:0.3450793863468882 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 -DEBUG 06-24 20:41:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:13 [batch.py:51] router release req id 8 -INFO 06-24 20:41:13 [manager.py:224] router recive req id 8 cost time 0.10827255249023438 s -INFO 06-24 20:41:13 [manager.py:68] detokenization recv req id 8 cost time 0.11023592948913574 s -DEBUG 06-24 20:41:13 [manager.py:391] Prefill Batch: batch_id=99433586430800847066807114448760384876, time:1750768873.9387445s req_ids:[8] -DEBUG 06-24 20:41:13 [manager.py:391] -ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:13 lightllm_req_id:8 first_token_cost:208.73188972473145ms total_cost_time:208.77575874328613ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14928 prompt_cache_len:5151 prompt_cache_ratio:0.345056270096463 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 -DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:14 [batch.py:51] router release req id 8 -INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.10824823379516602 s -INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.11034345626831055 s -DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=113566831266858445432531689496601523866, time:1750768874.1633537s req_ids:[8] -DEBUG 06-24 20:41:14 [manager.py:391] -ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:230.66306114196777ms total_cost_time:230.70883750915527ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14929 prompt_cache_len:5151 prompt_cache_ratio:0.3450331569428629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 -DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:14 [batch.py:51] router release req id 8 -INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.10857868194580078 s -INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.11070632934570312 s -DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=305328272555057567545257923316995110337, time:1750768874.3937106s req_ids:[8] -DEBUG 06-24 20:41:14 [manager.py:391] -ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:215.8036231994629ms total_cost_time:215.84630012512207ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14930 prompt_cache_len:5151 prompt_cache_ratio:0.3450100468854655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 -DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:14 [batch.py:51] router release req id 8 -INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.1102752685546875 s -INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.11266732215881348 s -DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=143955387927802354349233989108501751658, time:1750768874.618446s req_ids:[8] -DEBUG 06-24 20:41:14 [manager.py:391] -DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:214.56146240234375ms total_cost_time:214.60437774658203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14931 prompt_cache_len:5151 prompt_cache_ratio:0.34498693992364876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 -DEBUG 06-24 20:41:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:14 [batch.py:51] router release req id 8 -INFO 06-24 20:41:14 [manager.py:224] router recive req id 8 cost time 0.10750436782836914 s -INFO 06-24 20:41:14 [manager.py:68] detokenization recv req id 8 cost time 0.10937261581420898 s -DEBUG 06-24 20:41:14 [manager.py:391] Prefill Batch: batch_id=45151492009601507274870022363887926434, time:1750768874.843152s req_ids:[8] -DEBUG 06-24 20:41:14 [manager.py:391] -ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:14 lightllm_req_id:8 first_token_cost:354.259729385376ms total_cost_time:354.3047904968262ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14932 prompt_cache_len:5151 prompt_cache_ratio:0.34496383605679076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 -DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:15 [batch.py:51] router release req id 8 -INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.10905313491821289 s -INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11108660697937012 s -DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=154705034634744469700013866426514549219, time:1750768875.2007685s req_ids:[8] -DEBUG 06-24 20:41:15 [manager.py:391] -ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:215.19970893859863ms total_cost_time:215.24381637573242ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14933 prompt_cache_len:5151 prompt_cache_ratio:0.3449407352842697 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 -DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:15 [batch.py:51] router release req id 8 -INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.1080160140991211 s -INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11016631126403809 s -DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=113650760469740398504036855212413053725, time:1750768875.4239767s req_ids:[8] -DEBUG 06-24 20:41:15 [manager.py:391] -ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:215.61264991760254ms total_cost_time:215.6543731689453ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:14934 prompt_cache_len:5151 prompt_cache_ratio:0.34491763760546407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 -DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:15 [batch.py:51] router release req id 8 -INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.10861754417419434 s -INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11040306091308594 s -DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=276417682175909890318940379818925710452, time:1750768875.6471417s req_ids:[8] -DEBUG 06-24 20:41:15 [manager.py:391] -ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:210.80255508422852ms total_cost_time:210.8464241027832ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14935 prompt_cache_len:5151 prompt_cache_ratio:0.34489454301975225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 -DEBUG 06-24 20:41:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:15 [batch.py:51] router release req id 8 -INFO 06-24 20:41:15 [manager.py:224] router recive req id 8 cost time 0.10915136337280273 s -INFO 06-24 20:41:15 [manager.py:68] detokenization recv req id 8 cost time 0.11137652397155762 s -DEBUG 06-24 20:41:15 [manager.py:391] Prefill Batch: batch_id=104113656679349919669617940578133823850, time:1750768875.8652654s req_ids:[8] -DEBUG 06-24 20:41:15 [manager.py:391] -ERROR 06-24 20:41:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:221.62795066833496ms total_cost_time:221.67253494262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14936 prompt_cache_len:5151 prompt_cache_ratio:0.3448714515265131 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 -DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:16 [batch.py:51] router release req id 8 -INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10883808135986328 s -INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.1109774112701416 s -DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=324236625503766573972200914267355369876, time:1750768876.0969372s req_ids:[8] -DEBUG 06-24 20:41:16 [manager.py:391] -ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:15 lightllm_req_id:8 first_token_cost:223.53196144104004ms total_cost_time:223.57606887817383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14937 prompt_cache_len:5151 prompt_cache_ratio:0.3448483631251255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 -DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:16 [batch.py:51] router release req id 8 -INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10943961143493652 s -INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.11159706115722656 s -DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=301800610547718163294629232807234741408, time:1750768876.3237417s req_ids:[8] -DEBUG 06-24 20:41:16 [manager.py:391] -ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 first_token_cost:382.4901580810547ms total_cost_time:382.5352191925049ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14938 prompt_cache_len:5151 prompt_cache_ratio:0.3448252778149685 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 -DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:16 [batch.py:51] router release req id 8 -INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10911917686462402 s -INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.11112380027770996 s -DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=326650267207700980285373585686392839786, time:1750768876.715178s req_ids:[8] -DEBUG 06-24 20:41:16 [manager.py:391] -ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 first_token_cost:219.47026252746582ms total_cost_time:219.51603889465332ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14939 prompt_cache_len:5151 prompt_cache_ratio:0.34480219559542136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 -DEBUG 06-24 20:41:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:16 [batch.py:51] router release req id 8 -INFO 06-24 20:41:16 [manager.py:224] router recive req id 8 cost time 0.10836076736450195 s -INFO 06-24 20:41:16 [manager.py:68] detokenization recv req id 8 cost time 0.11023139953613281 s -DEBUG 06-24 20:41:16 [manager.py:391] Prefill Batch: batch_id=101285894011348526691503753989736651421, time:1750768876.9425979s req_ids:[8] -DEBUG 06-24 20:41:16 [manager.py:391] -ERROR 06-24 20:41:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:16 lightllm_req_id:8 first_token_cost:175.26865005493164ms total_cost_time:175.3091812133789ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:14940 prompt_cache_len:5151 prompt_cache_ratio:0.34477911646586346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 -DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:17 [batch.py:51] router release req id 8 -INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10772228240966797 s -INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.11010217666625977 s -DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=260650953263683490790027291494405948615, time:1750768877.125262s req_ids:[8] -DEBUG 06-24 20:41:17 [manager.py:391] -ERROR 06-24 20:41:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:177.09589004516602ms total_cost_time:177.140474319458ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14941 prompt_cache_len:5151 prompt_cache_ratio:0.3447560404256743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 -DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:17 [batch.py:51] router release req id 8 -INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10820508003234863 s -INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.11014461517333984 s -DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=250275585823277219198959375671879736677, time:1750768877.3044581s req_ids:[8] -DEBUG 06-24 20:41:17 [manager.py:391] -ERROR 06-24 20:41:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:209.34724807739258ms total_cost_time:209.39350128173828ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:14942 prompt_cache_len:5151 prompt_cache_ratio:0.3447329674742337 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 -DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:17 [batch.py:51] router release req id 8 -INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10933732986450195 s -INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.1113741397857666 s -DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=91931578339703094490489354494758685840, time:1750768877.525258s req_ids:[8] -DEBUG 06-24 20:41:17 [manager.py:391] -ERROR 06-24 20:41:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:222.92160987854004ms total_cost_time:222.96571731567383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14943 prompt_cache_len:5151 prompt_cache_ratio:0.3447098976109215 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 -DEBUG 06-24 20:41:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:17 [batch.py:51] router release req id 8 -INFO 06-24 20:41:17 [manager.py:224] router recive req id 8 cost time 0.10807657241821289 s -INFO 06-24 20:41:17 [manager.py:68] detokenization recv req id 8 cost time 0.11012792587280273 s -DEBUG 06-24 20:41:17 [manager.py:391] Prefill Batch: batch_id=152723289749101446278691476070451344161, time:1750768877.7542567s req_ids:[8] -DEBUG 06-24 20:41:17 [manager.py:391] -ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:17 lightllm_req_id:8 first_token_cost:412.5781059265137ms total_cost_time:412.62340545654297ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14944 prompt_cache_len:5151 prompt_cache_ratio:0.34468683083511775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 -DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:18 [batch.py:51] router release req id 8 -INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.10827350616455078 s -INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11020612716674805 s -DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=214815922351812969449846664858499244407, time:1750768878.1712513s req_ids:[8] -DEBUG 06-24 20:41:18 [manager.py:391] -ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:199.0034580230713ms total_cost_time:199.0506649017334ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:14945 prompt_cache_len:5151 prompt_cache_ratio:0.3446637671462027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 -DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:18 [batch.py:51] router release req id 8 -INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.10991024971008301 s -INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11245560646057129 s -DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=214237287897823853471084511843904846093, time:1750768878.3770719s req_ids:[8] -DEBUG 06-24 20:41:18 [manager.py:391] -ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:217.2262668609619ms total_cost_time:217.26751327514648ms,out_token_counter:1 mean_per_token_cost_time: 0.04124641418457031ms prompt_token_num:14946 prompt_cache_len:5151 prompt_cache_ratio:0.34464070654355683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 -DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:18 [batch.py:51] router release req id 8 -INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.11017036437988281 s -INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11273384094238281 s -DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=177481519683233426514864260658378008748, time:1750768878.6017735s req_ids:[8] -DEBUG 06-24 20:41:18 [manager.py:391] -ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:218.2917594909668ms total_cost_time:218.336820602417ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14947 prompt_cache_len:5151 prompt_cache_ratio:0.3446176490265605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 -DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:18 [batch.py:51] router release req id 8 -INFO 06-24 20:41:18 [manager.py:224] router recive req id 8 cost time 0.11173701286315918 s -INFO 06-24 20:41:18 [manager.py:68] detokenization recv req id 8 cost time 0.11375045776367188 s -DEBUG 06-24 20:41:18 [manager.py:391] Prefill Batch: batch_id=156482145176183525931288434239349954042, time:1750768878.8257139s req_ids:[8] -DEBUG 06-24 20:41:18 [manager.py:391] -ERROR 06-24 20:41:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:211.55095100402832ms total_cost_time:211.5955352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14948 prompt_cache_len:5151 prompt_cache_ratio:0.34459459459459457 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 -DEBUG 06-24 20:41:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:18 [batch.py:51] router release req id 8 -INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.10914969444274902 s -INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.11104154586791992 s -DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=75378085655337040812567313058374724027, time:1750768879.0430663s req_ids:[8] -DEBUG 06-24 20:41:19 [manager.py:391] -ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:18 lightllm_req_id:8 first_token_cost:218.74380111694336ms total_cost_time:218.78767013549805ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14949 prompt_cache_len:5151 prompt_cache_ratio:0.3445715432470399 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 -DEBUG 06-24 20:41:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:19 [batch.py:51] router release req id 8 -INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s -INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.11168599128723145 s -DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=334495049688107621405657876420559659118, time:1750768879.2787092s req_ids:[8] -DEBUG 06-24 20:41:19 [manager.py:391] -ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:393.19634437561035ms total_cost_time:393.24140548706055ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14950 prompt_cache_len:5151 prompt_cache_ratio:0.3445484949832776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 -DEBUG 06-24 20:41:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:19 [batch.py:51] router release req id 8 -INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.1117246150970459 s -INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.1139681339263916 s -DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=236165610114740173589558707284836869659, time:1750768879.6683269s req_ids:[8] -DEBUG 06-24 20:41:19 [manager.py:391] -ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:218.66393089294434ms total_cost_time:218.70803833007812ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14951 prompt_cache_len:5151 prompt_cache_ratio:0.3445254498026888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 -DEBUG 06-24 20:41:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:19 [batch.py:51] router release req id 8 -INFO 06-24 20:41:19 [manager.py:224] router recive req id 8 cost time 0.10839080810546875 s -INFO 06-24 20:41:19 [manager.py:68] detokenization recv req id 8 cost time 0.11041259765625 s -DEBUG 06-24 20:41:19 [manager.py:391] Prefill Batch: batch_id=248171077159780907060682897082116600116, time:1750768879.8925724s req_ids:[8] -DEBUG 06-24 20:41:19 [manager.py:391] -ERROR 06-24 20:41:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:209.71345901489258ms total_cost_time:209.75756645202637ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14952 prompt_cache_len:5151 prompt_cache_ratio:0.3445024077046549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 -DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:20 [batch.py:51] router release req id 8 -INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.1094961166381836 s -INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.11140632629394531 s -DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=35841379271185118501953147625332758852, time:1750768880.111558s req_ids:[8] -DEBUG 06-24 20:41:20 [manager.py:391] -ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:19 lightllm_req_id:8 first_token_cost:220.4899787902832ms total_cost_time:220.5333709716797ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14953 prompt_cache_len:5151 prompt_cache_ratio:0.3444793686885575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 -DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:20 [batch.py:51] router release req id 8 -INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.10946917533874512 s -INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.11154723167419434 s -DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=77495247073661216537624098622616427048, time:1750768880.3350513s req_ids:[8] -DEBUG 06-24 20:41:20 [manager.py:391] -DEBUG 06-24 20:41:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 59273.368 tokens/s -DEBUG 06-24 20:41:20 [stats.py:37] Avg prompt tokens throughput: 59265.431 tokens/s -DEBUG 06-24 20:41:20 [stats.py:37] Avg generate tokens throughput: 7.937 tokens/s -ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:217.73958206176758ms total_cost_time:217.78202056884766ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14954 prompt_cache_len:5151 prompt_cache_ratio:0.34445633275377824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 -DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:20 [batch.py:51] router release req id 8 -INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.10730338096618652 s -INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.10904955863952637 s -DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=219417800556851274654125096325751401932, time:1750768880.5630672s req_ids:[8] -DEBUG 06-24 20:41:20 [manager.py:391] -ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:358.35933685302734ms total_cost_time:358.40439796447754ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14955 prompt_cache_len:5151 prompt_cache_ratio:0.3444332998996991 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 -DEBUG 06-24 20:41:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:20 [batch.py:51] router release req id 8 -INFO 06-24 20:41:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:20 [manager.py:224] router recive req id 8 cost time 0.1063230037689209 s -INFO 06-24 20:41:20 [manager.py:68] detokenization recv req id 8 cost time 0.10821866989135742 s -DEBUG 06-24 20:41:20 [manager.py:391] Prefill Batch: batch_id=209915660412498998881438403447617463348, time:1750768880.9229097s req_ids:[8] -DEBUG 06-24 20:41:20 [manager.py:391] -DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:20 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:172.7914810180664ms total_cost_time:172.81365394592285ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:14956 prompt_cache_len:5151 prompt_cache_ratio:0.3444102701257021 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 -DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:21 [batch.py:51] router release req id 8 -INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10422301292419434 s -INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.10626578330993652 s -DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=183077454607186067817380255998330358491, time:1750768881.1055334s req_ids:[8] -DEBUG 06-24 20:41:21 [manager.py:391] -ERROR 06-24 20:41:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:20 lightllm_req_id:8 first_token_cost:216.7809009552002ms total_cost_time:216.8254852294922ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14957 prompt_cache_len:5151 prompt_cache_ratio:0.34438724343116933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 -DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:21 [batch.py:51] router release req id 8 -INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10835146903991699 s -INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.11046004295349121 s -DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=226697835416790576099156633690799214932, time:1750768881.323791s req_ids:[8] -DEBUG 06-24 20:41:21 [manager.py:391] -ERROR 06-24 20:41:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 first_token_cost:213.1643295288086ms total_cost_time:213.20796012878418ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14958 prompt_cache_len:5151 prompt_cache_ratio:0.3443642198154834 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 -DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:21 [batch.py:51] router release req id 8 -INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10941147804260254 s -INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.11145329475402832 s -DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=212857202184910939121903020548283121422, time:1750768881.5450943s req_ids:[8] -DEBUG 06-24 20:41:21 [manager.py:391] -ERROR 06-24 20:41:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 first_token_cost:217.17429161071777ms total_cost_time:217.21959114074707ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14959 prompt_cache_len:5151 prompt_cache_ratio:0.3443411992780266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 -DEBUG 06-24 20:41:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:21 [batch.py:51] router release req id 8 -INFO 06-24 20:41:21 [manager.py:224] router recive req id 8 cost time 0.10811233520507812 s -INFO 06-24 20:41:21 [manager.py:68] detokenization recv req id 8 cost time 0.11004042625427246 s -DEBUG 06-24 20:41:21 [manager.py:391] Prefill Batch: batch_id=315897656889564681504374153397622782887, time:1750768881.7685308s req_ids:[8] -DEBUG 06-24 20:41:21 [manager.py:391] -ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:21 lightllm_req_id:8 first_token_cost:388.32569122314453ms total_cost_time:388.3693218231201ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14960 prompt_cache_len:5151 prompt_cache_ratio:0.3443181818181818 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 -DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:22 [batch.py:51] router release req id 8 -INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.10830259323120117 s -INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.11093449592590332 s -DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=26031495386456320580579251256594121373, time:1750768882.163792s req_ids:[8] -DEBUG 06-24 20:41:22 [manager.py:391] -ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:217.87619590759277ms total_cost_time:217.92078018188477ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14961 prompt_cache_len:5151 prompt_cache_ratio:0.34429516743533184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 -DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:22 [batch.py:51] router release req id 8 -INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.1083977222442627 s -INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.10976433753967285 s -DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=160961722459132032484570639098101158745, time:1750768882.3879912s req_ids:[8] -DEBUG 06-24 20:41:22 [manager.py:391] -ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:217.1177864074707ms total_cost_time:217.1630859375ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14962 prompt_cache_len:5151 prompt_cache_ratio:0.34427215612885975 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 -DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:22 [batch.py:51] router release req id 8 -INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.10861539840698242 s -INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.11049628257751465 s -DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=88030114080803870590604836438848209330, time:1750768882.6137786s req_ids:[8] -DEBUG 06-24 20:41:22 [manager.py:391] -ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:216.92800521850586ms total_cost_time:216.97068214416504ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:14963 prompt_cache_len:5151 prompt_cache_ratio:0.34424914789814876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 -DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:22 [batch.py:51] router release req id 8 -INFO 06-24 20:41:22 [manager.py:224] router recive req id 8 cost time 0.10946893692016602 s -INFO 06-24 20:41:22 [manager.py:68] detokenization recv req id 8 cost time 0.11153125762939453 s -DEBUG 06-24 20:41:22 [manager.py:391] Prefill Batch: batch_id=52552652545220923263933244925597941915, time:1750768882.834685s req_ids:[8] -DEBUG 06-24 20:41:22 [manager.py:391] -ERROR 06-24 20:41:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:219.9416160583496ms total_cost_time:219.9862003326416ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14964 prompt_cache_len:5151 prompt_cache_ratio:0.3442261427425822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 -DEBUG 06-24 20:41:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:22 [batch.py:51] router release req id 8 -INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.1091921329498291 s -INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.11132645606994629 s -DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=43007143102164667166881812987655176896, time:1750768883.0607471s req_ids:[8] -DEBUG 06-24 20:41:23 [manager.py:391] -ERROR 06-24 20:41:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:22 lightllm_req_id:8 first_token_cost:218.6431884765625ms total_cost_time:218.6868190765381ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14965 prompt_cache_len:5151 prompt_cache_ratio:0.3442031406615436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 -DEBUG 06-24 20:41:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:23 [batch.py:51] router release req id 8 -INFO 06-24 20:41:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:41:23 [statics_utils.py:24] mean first cost: 232.67209177533982 ms -INFO 06-24 20:41:23 [statics_utils.py:24] mean per token cost: 0.057365905146663516 ms -INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.10883665084838867 s -INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.11078643798828125 s -DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=21710642144484523080270899330482751543, time:1750768883.2940938s req_ids:[8] -DEBUG 06-24 20:41:23 [manager.py:391] -ERROR 06-24 20:41:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 first_token_cost:401.4556407928467ms total_cost_time:401.49807929992676ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:14966 prompt_cache_len:5151 prompt_cache_ratio:0.3441801416544167 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 -DEBUG 06-24 20:41:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:23 [batch.py:51] router release req id 8 -INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.1083989143371582 s -INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.10968470573425293 s -DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=273942242254472272849742705831906454694, time:1750768883.6943724s req_ids:[8] -DEBUG 06-24 20:41:23 [manager.py:391] -ERROR 06-24 20:41:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 first_token_cost:222.2139835357666ms total_cost_time:222.2576141357422ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:14967 prompt_cache_len:5151 prompt_cache_ratio:0.3441571457205853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 -DEBUG 06-24 20:41:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:23 [batch.py:51] router release req id 8 -INFO 06-24 20:41:23 [manager.py:224] router recive req id 8 cost time 0.10795950889587402 s -INFO 06-24 20:41:23 [manager.py:68] detokenization recv req id 8 cost time 0.10975027084350586 s -DEBUG 06-24 20:41:23 [manager.py:391] Prefill Batch: batch_id=321004118212953356089864327371181726624, time:1750768883.9201534s req_ids:[8] -DEBUG 06-24 20:41:23 [manager.py:391] -ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:23 lightllm_req_id:8 first_token_cost:224.52855110168457ms total_cost_time:224.57385063171387ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:14968 prompt_cache_len:5151 prompt_cache_ratio:0.34413415285943344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 -DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:24 [batch.py:51] router release req id 8 -INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.1088109016418457 s -INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.11099386215209961 s -DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=79858301090016873276182837077549440530, time:1750768884.1503096s req_ids:[8] -DEBUG 06-24 20:41:24 [manager.py:391] -ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:215.6052589416504ms total_cost_time:215.66033363342285ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:14969 prompt_cache_len:5151 prompt_cache_ratio:0.3441111630703454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 -DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:24 [batch.py:51] router release req id 8 -INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.10799074172973633 s -INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.11005187034606934 s -DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=214563019284504171825083332913887698654, time:1750768884.3888195s req_ids:[8] -DEBUG 06-24 20:41:24 [manager.py:391] -ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:236.95874214172363ms total_cost_time:237.0133399963379ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:14970 prompt_cache_len:5151 prompt_cache_ratio:0.3440881763527054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 -DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:24 [batch.py:51] router release req id 8 -INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.10923886299133301 s -INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.1113734245300293 s -DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=201013074390565482375988652935808103009, time:1750768884.6317704s req_ids:[8] -DEBUG 06-24 20:41:24 [manager.py:391] -ERROR 06-24 20:41:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:234.67636108398438ms total_cost_time:234.73072052001953ms,out_token_counter:1 mean_per_token_cost_time: 0.05435943603515625ms prompt_token_num:14971 prompt_cache_len:5151 prompt_cache_ratio:0.3440651927058981 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 -DEBUG 06-24 20:41:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:24 [batch.py:51] router release req id 8 -INFO 06-24 20:41:24 [manager.py:224] router recive req id 8 cost time 0.10866522789001465 s -INFO 06-24 20:41:24 [manager.py:68] detokenization recv req id 8 cost time 0.11061668395996094 s -DEBUG 06-24 20:41:24 [manager.py:391] Prefill Batch: batch_id=78670691076752652961665803065169371061, time:1750768884.8598359s req_ids:[8] -DEBUG 06-24 20:41:24 [manager.py:391] -ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:24 lightllm_req_id:8 first_token_cost:383.284330368042ms total_cost_time:383.3284378051758ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14972 prompt_cache_len:5151 prompt_cache_ratio:0.344042212129308 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 -DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:25 [batch.py:51] router release req id 8 -INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10842275619506836 s -INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.11051154136657715 s -DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=46242747765838290547564530125389405441, time:1750768885.252965s req_ids:[8] -DEBUG 06-24 20:41:25 [manager.py:391] -ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:214.37692642211914ms total_cost_time:214.42103385925293ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:14973 prompt_cache_len:5151 prompt_cache_ratio:0.3440192346223202 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 -DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:25 [batch.py:51] router release req id 8 -INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10986638069152832 s -INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.11186480522155762 s -DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=82036150584605391026262524636438465327, time:1750768885.4717221s req_ids:[8] -DEBUG 06-24 20:41:25 [manager.py:391] -ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:212.7530574798584ms total_cost_time:212.7985954284668ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:14974 prompt_cache_len:5151 prompt_cache_ratio:0.3439962601843195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 -DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:25 [batch.py:51] router release req id 8 -INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10950326919555664 s -INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.11161565780639648 s -DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=77009969539375109700450108827437171911, time:1750768885.6920593s req_ids:[8] -DEBUG 06-24 20:41:25 [manager.py:391] -ERROR 06-24 20:41:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:218.5971736907959ms total_cost_time:218.6422348022461ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14975 prompt_cache_len:5151 prompt_cache_ratio:0.34397328881469114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 -DEBUG 06-24 20:41:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:25 [batch.py:51] router release req id 8 -INFO 06-24 20:41:25 [manager.py:224] router recive req id 8 cost time 0.10876822471618652 s -INFO 06-24 20:41:25 [manager.py:68] detokenization recv req id 8 cost time 0.1109457015991211 s -DEBUG 06-24 20:41:25 [manager.py:391] Prefill Batch: batch_id=239450163424048794128482734545059733318, time:1750768885.9330297s req_ids:[8] -DEBUG 06-24 20:41:25 [manager.py:391] -ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:25 lightllm_req_id:8 first_token_cost:243.74866485595703ms total_cost_time:243.80731582641602ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:14976 prompt_cache_len:5151 prompt_cache_ratio:0.34395032051282054 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 -DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:26 [batch.py:51] router release req id 8 -INFO 06-24 20:41:26 [manager.py:224] router recive req id 8 cost time 0.10805988311767578 s -INFO 06-24 20:41:26 [manager.py:68] detokenization recv req id 8 cost time 0.11014962196350098 s -DEBUG 06-24 20:41:26 [manager.py:391] Prefill Batch: batch_id=327443374163179770836718083413923009411, time:1750768886.1661863s req_ids:[8] -DEBUG 06-24 20:41:26 [manager.py:391] -ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:218.66798400878906ms total_cost_time:218.71280670166016ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:14977 prompt_cache_len:5151 prompt_cache_ratio:0.3439273552780931 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 -DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:26 [batch.py:51] router release req id 8 -INFO 06-24 20:41:26 [manager.py:224] router recive req id 8 cost time 0.10923552513122559 s -INFO 06-24 20:41:26 [manager.py:68] detokenization recv req id 8 cost time 0.11118197441101074 s -DEBUG 06-24 20:41:26 [manager.py:391] Prefill Batch: batch_id=322400434840132519631437592111349382350, time:1750768886.3931775s req_ids:[8] -DEBUG 06-24 20:41:26 [manager.py:391] -ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:383.2049369812012ms total_cost_time:383.2714557647705ms,out_token_counter:1 mean_per_token_cost_time: 0.06651878356933594ms prompt_token_num:14978 prompt_cache_len:5151 prompt_cache_ratio:0.3439043931098945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 -DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:26 [batch.py:51] router release req id 8 -INFO 06-24 20:41:26 [manager.py:224] router recive req id 8 cost time 0.10860943794250488 s -INFO 06-24 20:41:26 [manager.py:68] detokenization recv req id 8 cost time 0.11075282096862793 s -DEBUG 06-24 20:41:26 [manager.py:391] Prefill Batch: batch_id=70689272754661912680935651976168503919, time:1750768886.7840316s req_ids:[8] -DEBUG 06-24 20:41:26 [manager.py:391] -ERROR 06-24 20:41:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:217.97752380371094ms total_cost_time:218.02258491516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14979 prompt_cache_len:5151 prompt_cache_ratio:0.3438814340076107 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 -DEBUG 06-24 20:41:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:26 [batch.py:51] router release req id 8 -INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.1091768741607666 s -INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.1114339828491211 s -DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=140833368004190543248738872519941228568, time:1750768887.0089767s req_ids:[8] -DEBUG 06-24 20:41:27 [manager.py:391] -ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:26 lightllm_req_id:8 first_token_cost:219.49219703674316ms total_cost_time:219.53797340393066ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:14980 prompt_cache_len:5151 prompt_cache_ratio:0.3438584779706275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 -DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:27 [batch.py:51] router release req id 8 -INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.10844993591308594 s -INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.11055397987365723 s -DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=149721921327696597458085646274579713828, time:1750768887.235228s req_ids:[8] -DEBUG 06-24 20:41:27 [manager.py:391] -DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:215.77787399291992ms total_cost_time:215.8217430114746ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:14981 prompt_cache_len:5151 prompt_cache_ratio:0.3438355249983312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 -DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:27 [batch.py:51] router release req id 8 -INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.10875606536865234 s -INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.11074066162109375 s -DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=73542233364073794684397801339461334109, time:1750768887.4565916s req_ids:[8] -DEBUG 06-24 20:41:27 [manager.py:391] -ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:215.03233909606934ms total_cost_time:215.07525444030762ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14982 prompt_cache_len:5151 prompt_cache_ratio:0.34381257509010815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 -DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:27 [batch.py:51] router release req id 8 -INFO 06-24 20:41:27 [manager.py:224] router recive req id 8 cost time 0.1084136962890625 s -INFO 06-24 20:41:27 [manager.py:68] detokenization recv req id 8 cost time 0.10964488983154297 s -DEBUG 06-24 20:41:27 [manager.py:391] Prefill Batch: batch_id=333631010107865183010568282629378819274, time:1750768887.6783066s req_ids:[8] -DEBUG 06-24 20:41:27 [manager.py:391] -ERROR 06-24 20:41:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:213.23299407958984ms total_cost_time:213.27614784240723ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14983 prompt_cache_len:5151 prompt_cache_ratio:0.3437896282453447 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 -DEBUG 06-24 20:41:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:27 [batch.py:51] router release req id 8 -INFO 06-24 20:41:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.3110027313232422 s -INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.31238603591918945 s -DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=169611552380853464397458392309101915453, time:1750768888.1083164s req_ids:[8] -DEBUG 06-24 20:41:28 [manager.py:391] -ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:27 lightllm_req_id:8 first_token_cost:437.23607063293457ms total_cost_time:437.28113174438477ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:14984 prompt_cache_len:5151 prompt_cache_ratio:0.3437666844634277 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 -DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:28 [batch.py:51] router release req id 8 -INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.1092691421508789 s -INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11111664772033691 s -DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=57443635991513523322381091842748031222, time:1750768888.3408473s req_ids:[8] -DEBUG 06-24 20:41:28 [manager.py:391] -ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:216.8436050415039ms total_cost_time:216.8881893157959ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14985 prompt_cache_len:5151 prompt_cache_ratio:0.3437437437437437 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 -DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:28 [batch.py:51] router release req id 8 -INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.10868358612060547 s -INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11088681221008301 s -DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=231993416682468758284335136978555147630, time:1750768888.5643852s req_ids:[8] -DEBUG 06-24 20:41:28 [manager.py:391] -ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:212.48340606689453ms total_cost_time:212.5263214111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14986 prompt_cache_len:5151 prompt_cache_ratio:0.34372080608567995 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 -DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:28 [batch.py:51] router release req id 8 -INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.10904598236083984 s -INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11093354225158691 s -DEBUG 06-24 20:41:28 [manager.py:391] Prefill Batch: batch_id=102221070183489978111468385498913373065, time:1750768888.7840025s req_ids:[8] -DEBUG 06-24 20:41:28 [manager.py:391] -DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:28 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:210.53147315979004ms total_cost_time:210.59346199035645ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:14987 prompt_cache_len:5151 prompt_cache_ratio:0.34369787148862346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 -DEBUG 06-24 20:41:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:28 [batch.py:51] router release req id 8 -INFO 06-24 20:41:28 [manager.py:224] router recive req id 8 cost time 0.10878419876098633 s -INFO 06-24 20:41:28 [manager.py:68] detokenization recv req id 8 cost time 0.11060523986816406 s -DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=182975904839457891027082941293161034351, time:1750768889.0021937s req_ids:[8] -DEBUG 06-24 20:41:29 [manager.py:391] -ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:28 lightllm_req_id:8 first_token_cost:211.55357360839844ms total_cost_time:211.59672737121582ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14988 prompt_cache_len:5151 prompt_cache_ratio:0.3436749399519616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 -DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:29 [batch.py:51] router release req id 8 -INFO 06-24 20:41:29 [manager.py:224] router recive req id 8 cost time 0.10860466957092285 s -INFO 06-24 20:41:29 [manager.py:68] detokenization recv req id 8 cost time 0.11034631729125977 s -DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=16232881263146766062903896870990119864, time:1750768889.2196805s req_ids:[8] -DEBUG 06-24 20:41:29 [manager.py:391] -ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:382.51614570617676ms total_cost_time:382.56311416625977ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:14989 prompt_cache_len:5151 prompt_cache_ratio:0.34365201147508173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 -DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:29 [batch.py:51] router release req id 8 -INFO 06-24 20:41:29 [manager.py:224] router recive req id 8 cost time 0.10853338241577148 s -INFO 06-24 20:41:29 [manager.py:68] detokenization recv req id 8 cost time 0.10986471176147461 s -DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=199101331252947403570158880205632921876, time:1750768889.6085775s req_ids:[8] -DEBUG 06-24 20:41:29 [manager.py:391] -ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:211.71975135803223ms total_cost_time:211.76433563232422ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14990 prompt_cache_len:5151 prompt_cache_ratio:0.3436290860573716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 -DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:29 [batch.py:51] router release req id 8 -INFO 06-24 20:41:29 [manager.py:224] router recive req id 8 cost time 0.10840606689453125 s -INFO 06-24 20:41:29 [manager.py:68] detokenization recv req id 8 cost time 0.11035609245300293 s -DEBUG 06-24 20:41:29 [manager.py:391] Prefill Batch: batch_id=42899343357916828924906647653627556219, time:1750768889.8281124s req_ids:[8] -DEBUG 06-24 20:41:29 [manager.py:391] -ERROR 06-24 20:41:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:215.0402069091797ms total_cost_time:215.08359909057617ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:14991 prompt_cache_len:5151 prompt_cache_ratio:0.34360616369821895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 -DEBUG 06-24 20:41:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:29 [batch.py:51] router release req id 8 -INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.10937738418579102 s -INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11071372032165527 s -DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=161943752785316588439001681693510506372, time:1750768890.0520632s req_ids:[8] -DEBUG 06-24 20:41:30 [manager.py:391] -ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:29 lightllm_req_id:8 first_token_cost:223.71983528137207ms total_cost_time:223.76418113708496ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14992 prompt_cache_len:5151 prompt_cache_ratio:0.34358324439701177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 -DEBUG 06-24 20:41:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:30 [batch.py:51] router release req id 8 -INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.11098957061767578 s -INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11255455017089844 s -DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=40072304252502428723046251820511191123, time:1750768890.277095s req_ids:[8] -DEBUG 06-24 20:41:30 [manager.py:391] -ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:41:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 58158.496 tokens/s -DEBUG 06-24 20:41:30 [stats.py:37] Avg prompt tokens throughput: 58150.629 tokens/s -DEBUG 06-24 20:41:30 [stats.py:37] Avg generate tokens throughput: 7.866 tokens/s -INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:217.7412509918213ms total_cost_time:217.78440475463867ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:14993 prompt_cache_len:5151 prompt_cache_ratio:0.34356032815313814 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 -DEBUG 06-24 20:41:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:30 [batch.py:51] router release req id 8 -INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.11121320724487305 s -INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11330723762512207 s -DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=80934075272110578585893113064031280292, time:1750768890.4998765s req_ids:[8] -DEBUG 06-24 20:41:30 [manager.py:391] -ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:213.84692192077637ms total_cost_time:213.89150619506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14994 prompt_cache_len:5151 prompt_cache_ratio:0.3435374149659864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 -DEBUG 06-24 20:41:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:30 [batch.py:51] router release req id 8 -INFO 06-24 20:41:30 [manager.py:224] router recive req id 8 cost time 0.10911178588867188 s -INFO 06-24 20:41:30 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s -DEBUG 06-24 20:41:30 [manager.py:391] Prefill Batch: batch_id=173173830171311416091205740646480584, time:1750768890.721452s req_ids:[8] -DEBUG 06-24 20:41:30 [manager.py:391] -ERROR 06-24 20:41:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:372.1592426300049ms total_cost_time:372.2035884857178ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:14995 prompt_cache_len:5151 prompt_cache_ratio:0.343514504834945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 -DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:31 [batch.py:51] router release req id 8 -INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10910344123840332 s -INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11045694351196289 s -DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=143480452642961181247150027882143914067, time:1750768891.1012316s req_ids:[8] -DEBUG 06-24 20:41:31 [manager.py:391] -ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:30 lightllm_req_id:8 first_token_cost:219.92778778076172ms total_cost_time:219.9723720550537ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14996 prompt_cache_len:5151 prompt_cache_ratio:0.3434915977594025 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 -DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:31 [batch.py:51] router release req id 8 -INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10905337333679199 s -INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11118197441101074 s -DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=227640888425608508024319286040649382012, time:1750768891.3238666s req_ids:[8] -DEBUG 06-24 20:41:31 [manager.py:391] -ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:211.3053798675537ms total_cost_time:211.3499641418457ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:14997 prompt_cache_len:5151 prompt_cache_ratio:0.3434686937387478 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 -DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:31 [batch.py:51] router release req id 8 -INFO 06-24 20:41:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10922765731811523 s -INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11138463020324707 s -DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=335000376025672409847314505226831500100, time:1750768891.5423598s req_ids:[8] -DEBUG 06-24 20:41:31 [manager.py:391] -ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:215.87848663330078ms total_cost_time:215.92140197753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14998 prompt_cache_len:5151 prompt_cache_ratio:0.34344579277236964 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 -DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:31 [batch.py:51] router release req id 8 -INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10851383209228516 s -INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.10992789268493652 s -DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=92721587902157764722294624981176418827, time:1750768891.7660408s req_ids:[8] -DEBUG 06-24 20:41:31 [manager.py:391] -ERROR 06-24 20:41:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:214.89191055297852ms total_cost_time:214.9348258972168ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:14999 prompt_cache_len:5151 prompt_cache_ratio:0.3434228948596573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 -DEBUG 06-24 20:41:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:31 [batch.py:51] router release req id 8 -INFO 06-24 20:41:31 [manager.py:224] router recive req id 8 cost time 0.10943603515625 s -INFO 06-24 20:41:31 [manager.py:68] detokenization recv req id 8 cost time 0.11137628555297852 s -DEBUG 06-24 20:41:31 [manager.py:391] Prefill Batch: batch_id=133805948068428467204686380321082387768, time:1750768891.9860535s req_ids:[8] -DEBUG 06-24 20:41:31 [manager.py:391] -ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:31 lightllm_req_id:8 first_token_cost:385.1132392883301ms total_cost_time:385.15734672546387ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15000 prompt_cache_len:5151 prompt_cache_ratio:0.3434 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 -DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:32 [batch.py:51] router release req id 8 -INFO 06-24 20:41:32 [manager.py:224] router recive req id 8 cost time 0.10851502418518066 s -INFO 06-24 20:41:32 [manager.py:68] detokenization recv req id 8 cost time 0.1105794906616211 s -DEBUG 06-24 20:41:32 [manager.py:391] Prefill Batch: batch_id=56075173006742504993432706230173012587, time:1750768892.3758278s req_ids:[8] -DEBUG 06-24 20:41:32 [manager.py:391] -ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:215.35515785217285ms total_cost_time:215.39807319641113ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15001 prompt_cache_len:5151 prompt_cache_ratio:0.34337710819278716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 -DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:32 [batch.py:51] router release req id 8 -INFO 06-24 20:41:32 [manager.py:224] router recive req id 8 cost time 0.1081087589263916 s -INFO 06-24 20:41:32 [manager.py:68] detokenization recv req id 8 cost time 0.1094973087310791 s -DEBUG 06-24 20:41:32 [manager.py:391] Prefill Batch: batch_id=59256218168929873761030392192045639108, time:1750768892.5983126s req_ids:[8] -DEBUG 06-24 20:41:32 [manager.py:391] -ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:211.52544021606445ms total_cost_time:211.56978607177734ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15002 prompt_cache_len:5151 prompt_cache_ratio:0.34335421943740835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 -DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:32 [batch.py:51] router release req id 8 -INFO 06-24 20:41:32 [manager.py:224] router recive req id 8 cost time 0.10838723182678223 s -INFO 06-24 20:41:32 [manager.py:68] detokenization recv req id 8 cost time 0.11030411720275879 s -DEBUG 06-24 20:41:32 [manager.py:391] Prefill Batch: batch_id=169350086579639260450698616675564766541, time:1750768892.8152936s req_ids:[8] -DEBUG 06-24 20:41:32 [manager.py:391] -ERROR 06-24 20:41:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:215.00492095947266ms total_cost_time:215.04831314086914ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15003 prompt_cache_len:5151 prompt_cache_ratio:0.34333133373325336 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 -DEBUG 06-24 20:41:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:32 [batch.py:51] router release req id 8 -INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.10988140106201172 s -INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.1118004322052002 s -DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=335165873826914932394773740613826496076, time:1750768893.0450776s req_ids:[8] -DEBUG 06-24 20:41:33 [manager.py:391] -ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:32 lightllm_req_id:8 first_token_cost:232.0852279663086ms total_cost_time:232.1302890777588ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15004 prompt_cache_len:5151 prompt_cache_ratio:0.3433084510797121 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 -DEBUG 06-24 20:41:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:33 [batch.py:51] router release req id 8 -INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.11040139198303223 s -INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.11150336265563965 s -DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=78193806412122427668163002264328966167, time:1750768893.2716181s req_ids:[8] -DEBUG 06-24 20:41:33 [manager.py:391] -ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:216.96782112121582ms total_cost_time:217.01312065124512ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15005 prompt_cache_len:5151 prompt_cache_ratio:0.3432855714761746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 -DEBUG 06-24 20:41:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:33 [batch.py:51] router release req id 8 -INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.11034941673278809 s -INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.11243271827697754 s -DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=73988265215392574866951698619167243100, time:1750768893.4943929s req_ids:[8] -DEBUG 06-24 20:41:33 [manager.py:391] -ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:376.37948989868164ms total_cost_time:376.42431259155273ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15006 prompt_cache_len:5151 prompt_cache_ratio:0.3432626949220312 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 -DEBUG 06-24 20:41:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:33 [batch.py:51] router release req id 8 -INFO 06-24 20:41:33 [manager.py:224] router recive req id 8 cost time 0.10841703414916992 s -INFO 06-24 20:41:33 [manager.py:68] detokenization recv req id 8 cost time 0.10952353477478027 s -DEBUG 06-24 20:41:33 [manager.py:391] Prefill Batch: batch_id=105773950202960426558586750136995196485, time:1750768893.8781543s req_ids:[8] -DEBUG 06-24 20:41:33 [manager.py:391] -ERROR 06-24 20:41:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:219.1150188446045ms total_cost_time:219.15721893310547ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15007 prompt_cache_len:5151 prompt_cache_ratio:0.3432398214166722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 -DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:34 [batch.py:51] router release req id 8 -INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.10841822624206543 s -INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.1096651554107666 s -DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=62566946012981551267172630404501431052, time:1750768894.1051495s req_ids:[8] -DEBUG 06-24 20:41:34 [manager.py:391] -ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:33 lightllm_req_id:8 first_token_cost:176.82409286499023ms total_cost_time:176.86820030212402ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15008 prompt_cache_len:5151 prompt_cache_ratio:0.3432169509594883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 -DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:34 [batch.py:51] router release req id 8 -INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.1076056957244873 s -INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.10947704315185547 s -DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=111003280065290505829597606741982113564, time:1750768894.2852952s req_ids:[8] -DEBUG 06-24 20:41:34 [manager.py:391] -ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:204.8358917236328ms total_cost_time:204.88476753234863ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:15009 prompt_cache_len:5151 prompt_cache_ratio:0.3431940835498701 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 -DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:34 [batch.py:51] router release req id 8 -INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.10960888862609863 s -INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.11184549331665039 s -DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=192141180293667075722977335216106723674, time:1750768894.497001s req_ids:[8] -DEBUG 06-24 20:41:34 [manager.py:391] -ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:212.61096000671387ms total_cost_time:212.65482902526855ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15010 prompt_cache_len:5151 prompt_cache_ratio:0.34317121918720855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 -DEBUG 06-24 20:41:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:34 [batch.py:51] router release req id 8 -INFO 06-24 20:41:34 [manager.py:224] router recive req id 8 cost time 0.10865545272827148 s -INFO 06-24 20:41:34 [manager.py:68] detokenization recv req id 8 cost time 0.11078691482543945 s -DEBUG 06-24 20:41:34 [manager.py:391] Prefill Batch: batch_id=140630677894783208402179990917328003007, time:1750768894.7168555s req_ids:[8] -DEBUG 06-24 20:41:34 [manager.py:391] -ERROR 06-24 20:41:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:381.20222091674805ms total_cost_time:381.24680519104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15011 prompt_cache_len:5151 prompt_cache_ratio:0.3431483578708947 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 -DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:35 [batch.py:51] router release req id 8 -INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10877084732055664 s -INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.11084628105163574 s -DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=275302806747942986523242196890100767947, time:1750768895.1048276s req_ids:[8] -DEBUG 06-24 20:41:35 [manager.py:391] -DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:35 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:34 lightllm_req_id:8 first_token_cost:220.1235294342041ms total_cost_time:220.16668319702148ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15012 prompt_cache_len:5151 prompt_cache_ratio:0.34312549960031974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 -DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:35 [batch.py:51] router release req id 8 -INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10899996757507324 s -INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.1115269660949707 s -DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=198240914322113728627115471572143307914, time:1750768895.3384814s req_ids:[8] -DEBUG 06-24 20:41:35 [manager.py:391] -ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:228.6968231201172ms total_cost_time:228.74021530151367ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15013 prompt_cache_len:5151 prompt_cache_ratio:0.3431026443748751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 -DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:35 [batch.py:51] router release req id 8 -INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10850167274475098 s -INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.11054205894470215 s -DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=159135697595853286631329192648537620795, time:1750768895.5652165s req_ids:[8] -DEBUG 06-24 20:41:35 [manager.py:391] -ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:212.51273155212402ms total_cost_time:212.53490447998047ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:15014 prompt_cache_len:5151 prompt_cache_ratio:0.3430797921939523 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 -DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:35 [batch.py:51] router release req id 8 -INFO 06-24 20:41:35 [manager.py:224] router recive req id 8 cost time 0.10781216621398926 s -INFO 06-24 20:41:35 [manager.py:68] detokenization recv req id 8 cost time 0.10944986343383789 s -DEBUG 06-24 20:41:35 [manager.py:391] Prefill Batch: batch_id=106464030211109145481723477885784725373, time:1750768895.7843294s req_ids:[8] -DEBUG 06-24 20:41:35 [manager.py:391] -ERROR 06-24 20:41:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:220.23439407348633ms total_cost_time:220.2889919281006ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:15015 prompt_cache_len:5151 prompt_cache_ratio:0.34305694305694306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 -DEBUG 06-24 20:41:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:35 [batch.py:51] router release req id 8 -INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.10868167877197266 s -INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.10962700843811035 s -DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=175251293042478211154517803785102976318, time:1750768896.0106792s req_ids:[8] -DEBUG 06-24 20:41:36 [manager.py:391] -ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:35 lightllm_req_id:8 first_token_cost:346.91834449768066ms total_cost_time:346.96221351623535ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15016 prompt_cache_len:5151 prompt_cache_ratio:0.3430340969632392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 -DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:36 [batch.py:51] router release req id 8 -INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.10761761665344238 s -INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.10961174964904785 s -DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=89920203491889801626574200972905509593, time:1750768896.3622448s req_ids:[8] -DEBUG 06-24 20:41:36 [manager.py:391] -ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:204.9400806427002ms total_cost_time:204.98299598693848ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15017 prompt_cache_len:5151 prompt_cache_ratio:0.3430112539122328 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 -DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:36 [batch.py:51] router release req id 8 -INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.10952949523925781 s -INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.11148929595947266 s -DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=297901532215773788290340818267662566942, time:1750768896.5762126s req_ids:[8] -DEBUG 06-24 20:41:36 [manager.py:391] -ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:212.6333713531494ms total_cost_time:212.6789093017578ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15018 prompt_cache_len:5151 prompt_cache_ratio:0.342988413903316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 -DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:36 [batch.py:51] router release req id 8 -INFO 06-24 20:41:36 [manager.py:224] router recive req id 8 cost time 0.11171317100524902 s -INFO 06-24 20:41:36 [manager.py:68] detokenization recv req id 8 cost time 0.11306214332580566 s -DEBUG 06-24 20:41:36 [manager.py:391] Prefill Batch: batch_id=191501369723447700927485896521963568442, time:1750768896.7957127s req_ids:[8] -DEBUG 06-24 20:41:36 [manager.py:391] -ERROR 06-24 20:41:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:214.84923362731934ms total_cost_time:214.8916721343994ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15019 prompt_cache_len:5151 prompt_cache_ratio:0.3429655769358812 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 -DEBUG 06-24 20:41:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:36 [batch.py:51] router release req id 8 -INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.1109771728515625 s -INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.11289238929748535 s -DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=141770978761726578433607078369791848463, time:1750768897.0164444s req_ids:[8] -DEBUG 06-24 20:41:37 [manager.py:391] -ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:36 lightllm_req_id:8 first_token_cost:217.42534637451172ms total_cost_time:217.4696922302246ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15020 prompt_cache_len:5151 prompt_cache_ratio:0.3429427430093209 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 -DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:37 [batch.py:51] router release req id 8 -INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.10837578773498535 s -INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.11030149459838867 s -DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=314916997881633685913953111632454184799, time:1750768897.2404473s req_ids:[8] -DEBUG 06-24 20:41:37 [manager.py:391] -ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:217.0114517211914ms total_cost_time:217.0555591583252ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15021 prompt_cache_len:5151 prompt_cache_ratio:0.34291991212302775 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 -DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:37 [batch.py:51] router release req id 8 -INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.10789918899536133 s -INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.10979580879211426 s -DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=222304584216660401759664066359327857125, time:1750768897.4617805s req_ids:[8] -DEBUG 06-24 20:41:37 [manager.py:391] -ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:383.0609321594238ms total_cost_time:383.1052780151367ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15022 prompt_cache_len:5151 prompt_cache_ratio:0.34289708427639465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 -DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:37 [batch.py:51] router release req id 8 -INFO 06-24 20:41:37 [manager.py:224] router recive req id 8 cost time 0.1085355281829834 s -INFO 06-24 20:41:37 [manager.py:68] detokenization recv req id 8 cost time 0.11047482490539551 s -DEBUG 06-24 20:41:37 [manager.py:391] Prefill Batch: batch_id=339356230939116679823397430755638084455, time:1750768897.8528774s req_ids:[8] -DEBUG 06-24 20:41:37 [manager.py:391] -ERROR 06-24 20:41:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:214.83087539672852ms total_cost_time:214.87784385681152ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:15023 prompt_cache_len:5151 prompt_cache_ratio:0.3428742594688145 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 -DEBUG 06-24 20:41:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:37 [batch.py:51] router release req id 8 -INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.10926628112792969 s -INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.11125850677490234 s -DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=251607770807142919921521026223677639570, time:1750768898.074562s req_ids:[8] -DEBUG 06-24 20:41:38 [manager.py:391] -ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:37 lightllm_req_id:8 first_token_cost:206.5730094909668ms total_cost_time:206.61377906799316ms,out_token_counter:1 mean_per_token_cost_time: 0.04076957702636719ms prompt_token_num:15024 prompt_cache_len:5151 prompt_cache_ratio:0.3428514376996805 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 -DEBUG 06-24 20:41:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:38 [batch.py:51] router release req id 8 -INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.10967540740966797 s -INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.1117238998413086 s -DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=266940633232077859721598610060156828293, time:1750768898.2888916s req_ids:[8] -DEBUG 06-24 20:41:38 [manager.py:391] -ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:218.05095672607422ms total_cost_time:218.0941104888916ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15025 prompt_cache_len:5151 prompt_cache_ratio:0.342828618968386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 -DEBUG 06-24 20:41:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:38 [batch.py:51] router release req id 8 -INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.11025452613830566 s -INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.11179256439208984 s -DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=280325910839107438213898396030809525308, time:1750768898.5130851s req_ids:[8] -DEBUG 06-24 20:41:38 [manager.py:391] -ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:216.56203269958496ms total_cost_time:216.60661697387695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15026 prompt_cache_len:5151 prompt_cache_ratio:0.3428058032743245 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 -DEBUG 06-24 20:41:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:38 [batch.py:51] router release req id 8 -INFO 06-24 20:41:38 [manager.py:224] router recive req id 8 cost time 0.11108803749084473 s -INFO 06-24 20:41:38 [manager.py:68] detokenization recv req id 8 cost time 0.11269330978393555 s -DEBUG 06-24 20:41:38 [manager.py:391] Prefill Batch: batch_id=288021914962145183480358429272282614988, time:1750768898.7456963s req_ids:[8] -DEBUG 06-24 20:41:38 [manager.py:391] -ERROR 06-24 20:41:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:188.39240074157715ms total_cost_time:188.43436241149902ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15027 prompt_cache_len:5151 prompt_cache_ratio:0.3427829906168896 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 -DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:39 [batch.py:51] router release req id 8 -INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.3108804225921631 s -INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.3128793239593506 s -DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=107909350400421865603764395207111990568, time:1750768899.1388607s req_ids:[8] -DEBUG 06-24 20:41:39 [manager.py:391] -ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:38 lightllm_req_id:8 first_token_cost:427.5243282318115ms total_cost_time:427.5696277618408ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15028 prompt_cache_len:5151 prompt_cache_ratio:0.3427601809954751 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 -DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:39 [batch.py:51] router release req id 8 -INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.1105349063873291 s -INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.1125798225402832 s -DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=136919818469105812953397167437127364100, time:1750768899.365628s req_ids:[8] -DEBUG 06-24 20:41:39 [manager.py:391] -ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:205.66105842590332ms total_cost_time:205.70826530456543ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15029 prompt_cache_len:5151 prompt_cache_ratio:0.342737374409475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 -DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:39 [batch.py:51] router release req id 8 -INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.11058807373046875 s -INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.11258339881896973 s -DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=29082367047007068620337796489358134645, time:1750768899.5769184s req_ids:[8] -DEBUG 06-24 20:41:39 [manager.py:391] -ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:216.81737899780273ms total_cost_time:216.86053276062012ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15030 prompt_cache_len:5151 prompt_cache_ratio:0.34271457085828344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 -DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:39 [batch.py:51] router release req id 8 -INFO 06-24 20:41:39 [manager.py:224] router recive req id 8 cost time 0.10768675804138184 s -INFO 06-24 20:41:39 [manager.py:68] detokenization recv req id 8 cost time 0.10949993133544922 s -DEBUG 06-24 20:41:39 [manager.py:391] Prefill Batch: batch_id=79282224109627704573341929260586742131, time:1750768899.8006852s req_ids:[8] -DEBUG 06-24 20:41:39 [manager.py:391] -ERROR 06-24 20:41:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:202.37183570861816ms total_cost_time:202.41665840148926ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15031 prompt_cache_len:5151 prompt_cache_ratio:0.34269177034129467 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 -DEBUG 06-24 20:41:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:39 [batch.py:51] router release req id 8 -INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.10985803604125977 s -INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.11177897453308105 s -DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=4914664528978754296559123802525416125, time:1750768900.00942s req_ids:[8] -DEBUG 06-24 20:41:40 [manager.py:391] -ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:39 lightllm_req_id:8 first_token_cost:214.22147750854492ms total_cost_time:214.2658233642578ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15032 prompt_cache_len:5151 prompt_cache_ratio:0.34266897285790315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 -DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:40 [batch.py:51] router release req id 8 -INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.11078739166259766 s -INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.11259961128234863 s -DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=128836002972202678290920100983281839749, time:1750768900.2300262s req_ids:[8] -DEBUG 06-24 20:41:40 [manager.py:391] -ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:41:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 59388.197 tokens/s -DEBUG 06-24 20:41:40 [stats.py:37] Avg prompt tokens throughput: 59380.286 tokens/s -DEBUG 06-24 20:41:40 [stats.py:37] Avg generate tokens throughput: 7.910 tokens/s -INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:376.9841194152832ms total_cost_time:377.0277500152588ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15033 prompt_cache_len:5151 prompt_cache_ratio:0.3426461784075035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 -DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:40 [batch.py:51] router release req id 8 -INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.10748910903930664 s -INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.10924458503723145 s -DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=245612086217130801440167574684926194508, time:1750768900.614228s req_ids:[8] -DEBUG 06-24 20:41:40 [manager.py:391] -ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:214.86496925354004ms total_cost_time:214.90955352783203ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15034 prompt_cache_len:5151 prompt_cache_ratio:0.34262338698949046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 -DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:40 [batch.py:51] router release req id 8 -INFO 06-24 20:41:40 [manager.py:224] router recive req id 8 cost time 0.1085965633392334 s -INFO 06-24 20:41:40 [manager.py:68] detokenization recv req id 8 cost time 0.11049413681030273 s -DEBUG 06-24 20:41:40 [manager.py:391] Prefill Batch: batch_id=132434276140596343699172035391200432652, time:1750768900.8370717s req_ids:[8] -DEBUG 06-24 20:41:40 [manager.py:391] -ERROR 06-24 20:41:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:216.69816970825195ms total_cost_time:216.74299240112305ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15035 prompt_cache_len:5151 prompt_cache_ratio:0.3426005986032591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 -DEBUG 06-24 20:41:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:40 [batch.py:51] router release req id 8 -INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.11188244819641113 s -INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.11375212669372559 s -DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=303676494995468759222598213438839012224, time:1750768901.0596085s req_ids:[8] -DEBUG 06-24 20:41:41 [manager.py:391] -ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:40 lightllm_req_id:8 first_token_cost:215.8670425415039ms total_cost_time:215.9261703491211ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15036 prompt_cache_len:5151 prompt_cache_ratio:0.3425778132482043 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 -DEBUG 06-24 20:41:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:41 [batch.py:51] router release req id 8 -INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.10904645919799805 s -INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.1109151840209961 s -DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=242018460239485632958360086014286758761, time:1750768901.2834673s req_ids:[8] -DEBUG 06-24 20:41:41 [manager.py:391] -ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:215.66128730773926ms total_cost_time:215.70611000061035ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15037 prompt_cache_len:5151 prompt_cache_ratio:0.34255503092372147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 -DEBUG 06-24 20:41:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:41 [batch.py:51] router release req id 8 -INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.10912489891052246 s -INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.11099553108215332 s -DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=147697877860422208081557916912506719336, time:1750768901.5062788s req_ids:[8] -DEBUG 06-24 20:41:41 [manager.py:391] -ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:216.57347679138184ms total_cost_time:216.63904190063477ms,out_token_counter:1 mean_per_token_cost_time: 0.06556510925292969ms prompt_token_num:15038 prompt_cache_len:5151 prompt_cache_ratio:0.34253225162920603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 -DEBUG 06-24 20:41:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:41 [batch.py:51] router release req id 8 -INFO 06-24 20:41:41 [manager.py:224] router recive req id 8 cost time 0.10895037651062012 s -INFO 06-24 20:41:41 [manager.py:68] detokenization recv req id 8 cost time 0.11089134216308594 s -DEBUG 06-24 20:41:41 [manager.py:391] Prefill Batch: batch_id=198936895926139838807469166597417840416, time:1750768901.728543s req_ids:[8] -DEBUG 06-24 20:41:41 [manager.py:391] -ERROR 06-24 20:41:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:374.1450309753418ms total_cost_time:374.1891384124756ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15039 prompt_cache_len:5151 prompt_cache_ratio:0.34250947536405346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 -DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:42 [batch.py:51] router release req id 8 -INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.10847711563110352 s -INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11039614677429199 s -DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=292552252816090919278202324868528294325, time:1750768902.1130116s req_ids:[8] -DEBUG 06-24 20:41:42 [manager.py:391] -ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:41 lightllm_req_id:8 first_token_cost:222.54085540771484ms total_cost_time:222.58639335632324ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15040 prompt_cache_len:5151 prompt_cache_ratio:0.3424867021276596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 -DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:42 [batch.py:51] router release req id 8 -INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.1109931468963623 s -INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11273956298828125 s -DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=155634396420410974871778212565666806270, time:1750768902.3386033s req_ids:[8] -DEBUG 06-24 20:41:42 [manager.py:391] -ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:215.12150764465332ms total_cost_time:215.1651382446289ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15041 prompt_cache_len:5151 prompt_cache_ratio:0.34246393191942026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 -DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:42 [batch.py:51] router release req id 8 -INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.11055493354797363 s -INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11223125457763672 s -DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=104076044532069330015692727114806687122, time:1750768902.561581s req_ids:[8] -DEBUG 06-24 20:41:42 [manager.py:391] -ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:212.03351020812988ms total_cost_time:212.0802402496338ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15042 prompt_cache_len:5151 prompt_cache_ratio:0.34244116473873154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 -DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:42 [batch.py:51] router release req id 8 -INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.11113142967224121 s -INFO 06-24 20:41:42 [manager.py:68] detokenization recv req id 8 cost time 0.11294245719909668 s -DEBUG 06-24 20:41:42 [manager.py:391] Prefill Batch: batch_id=314942193439731529536272246433010048572, time:1750768902.7804158s req_ids:[8] -DEBUG 06-24 20:41:42 [manager.py:391] -DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:42 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:216.78972244262695ms total_cost_time:216.83430671691895ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15043 prompt_cache_len:5151 prompt_cache_ratio:0.3424184005849897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 -DEBUG 06-24 20:41:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:42 [batch.py:51] router release req id 8 -INFO 06-24 20:41:42 [manager.py:224] router recive req id 8 cost time 0.1097254753112793 s -INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.11172795295715332 s -DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=59749822216877583008861517630733104703, time:1750768903.0035286s req_ids:[8] -DEBUG 06-24 20:41:43 [manager.py:391] -ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:42 lightllm_req_id:8 first_token_cost:381.01911544799805ms total_cost_time:381.06513023376465ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15044 prompt_cache_len:5151 prompt_cache_ratio:0.34239563945759105 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 -DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:43 [batch.py:51] router release req id 8 -INFO 06-24 20:41:43 [manager.py:224] router recive req id 8 cost time 0.10781645774841309 s -INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.10959196090698242 s -DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=191050457956107257876441725566553381860, time:1750768903.3899517s req_ids:[8] -DEBUG 06-24 20:41:43 [manager.py:391] -ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:214.55931663513184ms total_cost_time:214.60270881652832ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15045 prompt_cache_len:5151 prompt_cache_ratio:0.3423728813559322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 -DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:43 [batch.py:51] router release req id 8 -INFO 06-24 20:41:43 [manager.py:224] router recive req id 8 cost time 0.11039328575134277 s -INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.11206436157226562 s -DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=265373963661181809296748559712683028642, time:1750768903.6144896s req_ids:[8] -DEBUG 06-24 20:41:43 [manager.py:391] -ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:218.22237968444824ms total_cost_time:218.26696395874023ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15046 prompt_cache_len:5151 prompt_cache_ratio:0.3423501262794098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 -DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:43 [batch.py:51] router release req id 8 -INFO 06-24 20:41:43 [manager.py:224] router recive req id 8 cost time 0.10941934585571289 s -INFO 06-24 20:41:43 [manager.py:68] detokenization recv req id 8 cost time 0.1113288402557373 s -DEBUG 06-24 20:41:43 [manager.py:391] Prefill Batch: batch_id=18879904704026333109587426434251455500, time:1750768903.8368983s req_ids:[8] -DEBUG 06-24 20:41:43 [manager.py:391] -ERROR 06-24 20:41:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:215.09265899658203ms total_cost_time:215.13652801513672ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15047 prompt_cache_len:5151 prompt_cache_ratio:0.34232737422742077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 -DEBUG 06-24 20:41:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:43 [batch.py:51] router release req id 8 -INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.10899996757507324 s -INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11086273193359375 s -DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=333871928953391483110826278126430207582, time:1750768904.0593557s req_ids:[8] -DEBUG 06-24 20:41:44 [manager.py:391] -ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:43 lightllm_req_id:8 first_token_cost:216.02439880371094ms total_cost_time:216.06945991516113ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15048 prompt_cache_len:5151 prompt_cache_ratio:0.34230462519936206 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 -DEBUG 06-24 20:41:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:44 [batch.py:51] router release req id 8 -INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.10808587074279785 s -INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11002969741821289 s -DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=210932886025604261108962234408949213076, time:1750768904.2824395s req_ids:[8] -DEBUG 06-24 20:41:44 [manager.py:391] -ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:211.30990982055664ms total_cost_time:211.35377883911133ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15049 prompt_cache_len:5151 prompt_cache_ratio:0.3422818791946309 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 -DEBUG 06-24 20:41:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:44 [batch.py:51] router release req id 8 -INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.1109771728515625 s -INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11298155784606934 s -DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=212810875700332531170383542092353110788, time:1750768904.4996517s req_ids:[8] -DEBUG 06-24 20:41:44 [manager.py:391] -ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:374.4809627532959ms total_cost_time:374.5253086090088ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15050 prompt_cache_len:5151 prompt_cache_ratio:0.3422591362126246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 -DEBUG 06-24 20:41:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:44 [batch.py:51] router release req id 8 -INFO 06-24 20:41:44 [manager.py:224] router recive req id 8 cost time 0.1083230972290039 s -INFO 06-24 20:41:44 [manager.py:68] detokenization recv req id 8 cost time 0.11037421226501465 s -DEBUG 06-24 20:41:44 [manager.py:391] Prefill Batch: batch_id=174453179966349153166111277100499363826, time:1750768904.8798547s req_ids:[8] -DEBUG 06-24 20:41:44 [manager.py:391] -ERROR 06-24 20:41:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:217.81682968139648ms total_cost_time:217.8630828857422ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15051 prompt_cache_len:5151 prompt_cache_ratio:0.34223639625274066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 -DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:45 [batch.py:51] router release req id 8 -INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.10822176933288574 s -INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.10925722122192383 s -DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=284800008336978484726762450352224316927, time:1750768905.1058981s req_ids:[8] -DEBUG 06-24 20:41:45 [manager.py:391] -ERROR 06-24 20:41:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:44 lightllm_req_id:8 first_token_cost:210.59775352478027ms total_cost_time:210.64352989196777ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15052 prompt_cache_len:5151 prompt_cache_ratio:0.3422136593143768 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 -DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:45 [batch.py:51] router release req id 8 -INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.1087188720703125 s -INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.11047530174255371 s -DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=183265122316067514672789967252552499440, time:1750768905.3234782s req_ids:[8] -DEBUG 06-24 20:41:45 [manager.py:391] -ERROR 06-24 20:41:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 first_token_cost:216.8886661529541ms total_cost_time:216.933012008667ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15053 prompt_cache_len:5151 prompt_cache_ratio:0.34219092539693086 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 -DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:45 [batch.py:51] router release req id 8 -INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.10807347297668457 s -INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.11026334762573242 s -DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=212726617675304735376846608961245807459, time:1750768905.5461123s req_ids:[8] -DEBUG 06-24 20:41:45 [manager.py:391] -ERROR 06-24 20:41:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 first_token_cost:215.94548225402832ms total_cost_time:215.9874439239502ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15054 prompt_cache_len:5151 prompt_cache_ratio:0.34216819449980074 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 -DEBUG 06-24 20:41:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:45 [batch.py:51] router release req id 8 -INFO 06-24 20:41:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:45 [manager.py:224] router recive req id 8 cost time 0.1086583137512207 s -INFO 06-24 20:41:45 [manager.py:68] detokenization recv req id 8 cost time 0.11058449745178223 s -DEBUG 06-24 20:41:45 [manager.py:391] Prefill Batch: batch_id=236064269211470818176107771091560229518, time:1750768905.7691543s req_ids:[8] -DEBUG 06-24 20:41:45 [manager.py:391] -ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:45 lightllm_req_id:8 first_token_cost:379.17256355285645ms total_cost_time:379.21881675720215ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15055 prompt_cache_len:5151 prompt_cache_ratio:0.3421454666223846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 -DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:46 [batch.py:51] router release req id 8 -INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.11116313934326172 s -DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=39233689289509105446680506279282266896, time:1750768906.1537817s req_ids:[8] -DEBUG 06-24 20:41:46 [manager.py:391] -INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.11307764053344727 s -ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:210.62135696411133ms total_cost_time:210.66665649414062ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15056 prompt_cache_len:5151 prompt_cache_ratio:0.34212274176408075 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 -DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:46 [batch.py:51] router release req id 8 -INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.10905909538269043 s -INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.11101508140563965 s -DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=141647235897768435332735377732571189771, time:1750768906.371627s req_ids:[8] -DEBUG 06-24 20:41:46 [manager.py:391] -ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:213.29236030578613ms total_cost_time:213.33670616149902ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15057 prompt_cache_len:5151 prompt_cache_ratio:0.3421000199242877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 -DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:46 [batch.py:51] router release req id 8 -INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.10763406753540039 s -INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.10913538932800293 s -DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=220219238596842224809658054709473577718, time:1750768906.5937047s req_ids:[8] -DEBUG 06-24 20:41:46 [manager.py:391] -ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:217.4522876739502ms total_cost_time:217.49591827392578ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15058 prompt_cache_len:5151 prompt_cache_ratio:0.34207730110240403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 -DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:46 [batch.py:51] router release req id 8 -INFO 06-24 20:41:46 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s -INFO 06-24 20:41:46 [manager.py:68] detokenization recv req id 8 cost time 0.11039900779724121 s -DEBUG 06-24 20:41:46 [manager.py:391] Prefill Batch: batch_id=58573806701864939045054105707694057397, time:1750768906.817152s req_ids:[8] -DEBUG 06-24 20:41:46 [manager.py:391] -ERROR 06-24 20:41:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:216.37868881225586ms total_cost_time:216.42088890075684ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15059 prompt_cache_len:5151 prompt_cache_ratio:0.34205458529782856 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 -DEBUG 06-24 20:41:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:46 [batch.py:51] router release req id 8 -INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.10882949829101562 s -INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.11074972152709961 s -DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=161546693945666954248507016722334263114, time:1750768907.0414143s req_ids:[8] -DEBUG 06-24 20:41:47 [manager.py:391] -INFO 06-24 20:41:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:46 lightllm_req_id:8 first_token_cost:382.30180740356445ms total_cost_time:382.36260414123535ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15060 prompt_cache_len:5151 prompt_cache_ratio:0.34203187250996014 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 -DEBUG 06-24 20:41:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:47 [batch.py:51] router release req id 8 -INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.10748028755187988 s -INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.10946893692016602 s -DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=271932922816642209706776211752563226564, time:1750768907.430292s req_ids:[8] -DEBUG 06-24 20:41:47 [manager.py:391] -ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:212.47029304504395ms total_cost_time:212.48984336853027ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:15061 prompt_cache_len:5151 prompt_cache_ratio:0.342009162738198 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 -DEBUG 06-24 20:41:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:47 [batch.py:51] router release req id 8 -INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.1084599494934082 s -INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.1104888916015625 s -DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=271995003663406157061856077165916883574, time:1750768907.6499057s req_ids:[8] -DEBUG 06-24 20:41:47 [manager.py:391] -ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:217.637300491333ms total_cost_time:217.695951461792ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:15062 prompt_cache_len:5151 prompt_cache_ratio:0.34198645598194133 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 -DEBUG 06-24 20:41:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:47 [batch.py:51] router release req id 8 -INFO 06-24 20:41:47 [manager.py:224] router recive req id 8 cost time 0.10778164863586426 s -INFO 06-24 20:41:47 [manager.py:68] detokenization recv req id 8 cost time 0.10974693298339844 s -DEBUG 06-24 20:41:47 [manager.py:391] Prefill Batch: batch_id=334489070257517803658949640124315822150, time:1750768907.8741257s req_ids:[8] -DEBUG 06-24 20:41:47 [manager.py:391] -ERROR 06-24 20:41:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:216.61829948425293ms total_cost_time:216.68362617492676ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:15063 prompt_cache_len:5151 prompt_cache_ratio:0.3419637522405895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 -DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:48 [batch.py:51] router release req id 8 -INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10641193389892578 s -INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.10826396942138672 s -DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=255097552523517388024575289258992940419, time:1750768908.1018918s req_ids:[8] -DEBUG 06-24 20:41:48 [manager.py:391] -ERROR 06-24 20:41:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:47 lightllm_req_id:8 first_token_cost:222.92447090148926ms total_cost_time:222.98693656921387ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:15064 prompt_cache_len:5151 prompt_cache_ratio:0.3419410515135422 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 -DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:48 [batch.py:51] router release req id 8 -INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10907387733459473 s -INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.11105537414550781 s -DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=120568070232426805315451619226877690644, time:1750768908.3281517s req_ids:[8] -DEBUG 06-24 20:41:48 [manager.py:391] -ERROR 06-24 20:41:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 first_token_cost:217.74792671203613ms total_cost_time:217.80681610107422ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:15065 prompt_cache_len:5151 prompt_cache_ratio:0.34191835380019914 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 -DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:48 [batch.py:51] router release req id 8 -INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10883069038391113 s -INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.11052107810974121 s -DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=336638436744989719257877848521813461769, time:1750768908.5524032s req_ids:[8] -DEBUG 06-24 20:41:48 [manager.py:391] -ERROR 06-24 20:41:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 first_token_cost:346.085786819458ms total_cost_time:346.1451530456543ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15066 prompt_cache_len:5151 prompt_cache_ratio:0.34189565909996017 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 -DEBUG 06-24 20:41:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:48 [batch.py:51] router release req id 8 -INFO 06-24 20:41:48 [manager.py:224] router recive req id 8 cost time 0.10966777801513672 s -INFO 06-24 20:41:48 [manager.py:68] detokenization recv req id 8 cost time 0.11153125762939453 s -DEBUG 06-24 20:41:48 [manager.py:391] Prefill Batch: batch_id=340026426714101034507917899392667794028, time:1750768908.9067547s req_ids:[8] -DEBUG 06-24 20:41:48 [manager.py:391] -ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:48 lightllm_req_id:8 first_token_cost:214.26057815551758ms total_cost_time:214.32256698608398ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15067 prompt_cache_len:5151 prompt_cache_ratio:0.3418729674122254 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 -DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:49 [batch.py:51] router release req id 8 -INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10860896110534668 s -INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.10987520217895508 s -DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=112979490311968913964166121693198819558, time:1750768909.12435s req_ids:[8] -DEBUG 06-24 20:41:49 [manager.py:391] -DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:49 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:209.46025848388672ms total_cost_time:209.5053195953369ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15068 prompt_cache_len:5151 prompt_cache_ratio:0.341850278736395 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 -DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:49 [batch.py:51] router release req id 8 -INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10927033424377441 s -INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.1111140251159668 s -DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=234682628043113877485682750706014912415, time:1750768909.3423376s req_ids:[8] -DEBUG 06-24 20:41:49 [manager.py:391] -ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:214.1251564025879ms total_cost_time:214.16854858398438ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15069 prompt_cache_len:5151 prompt_cache_ratio:0.3418275930718694 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 -DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:49 [batch.py:51] router release req id 8 -INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10878109931945801 s -INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.11023998260498047 s -DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=33565681220975658098454018508018360983, time:1750768909.5616922s req_ids:[8] -DEBUG 06-24 20:41:49 [manager.py:391] -ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:171.84209823608398ms total_cost_time:171.88501358032227ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15070 prompt_cache_len:5151 prompt_cache_ratio:0.3418049104180491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 -DEBUG 06-24 20:41:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:49 [batch.py:51] router release req id 8 -INFO 06-24 20:41:49 [manager.py:224] router recive req id 8 cost time 0.10914039611816406 s -INFO 06-24 20:41:49 [manager.py:68] detokenization recv req id 8 cost time 0.1110079288482666 s -DEBUG 06-24 20:41:49 [manager.py:391] Prefill Batch: batch_id=203729187529340237229402757958075157743, time:1750768909.7414377s req_ids:[8] -DEBUG 06-24 20:41:49 [manager.py:391] -ERROR 06-24 20:41:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:208.42409133911133ms total_cost_time:208.46891403198242ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15071 prompt_cache_len:5151 prompt_cache_ratio:0.34178223077433484 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 -DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:50 [batch.py:51] router release req id 8 -INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.2095801830291748 s -INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.2112407684326172 s -DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=281645117756050246340461154399441733454, time:1750768910.0918024s req_ids:[8] -DEBUG 06-24 20:41:50 [manager.py:391] -ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:49 lightllm_req_id:8 first_token_cost:336.0590934753418ms total_cost_time:336.1053466796875ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15072 prompt_cache_len:5151 prompt_cache_ratio:0.3417595541401274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 -DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:50 [batch.py:51] router release req id 8 -INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.11126232147216797 s -INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11313939094543457 s -DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=80926614092023239868298072321053156853, time:1750768910.2985556s req_ids:[8] -DEBUG 06-24 20:41:50 [manager.py:391] -ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:215.11578559875488ms total_cost_time:215.15893936157227ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15073 prompt_cache_len:5151 prompt_cache_ratio:0.3417368805148278 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 -DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:50 [batch.py:51] router release req id 8 -INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.10970163345336914 s -INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11186099052429199 s -DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=300835596679619547576940777854797814847, time:1750768910.5195727s req_ids:[8] -DEBUG 06-24 20:41:50 [manager.py:391] -DEBUG 06-24 20:41:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 61554.403 tokens/s -DEBUG 06-24 20:41:50 [stats.py:37] Avg prompt tokens throughput: 61546.326 tokens/s -DEBUG 06-24 20:41:50 [stats.py:37] Avg generate tokens throughput: 8.077 tokens/s -ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:217.8051471710205ms total_cost_time:217.8492546081543ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15074 prompt_cache_len:5151 prompt_cache_ratio:0.34171420989783735 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 -DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:50 [batch.py:51] router release req id 8 -INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.10820412635803223 s -INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11024117469787598 s -DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=261094707866582202039746025720798255286, time:1750768910.7439256s req_ids:[8] -DEBUG 06-24 20:41:50 [manager.py:391] -ERROR 06-24 20:41:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:211.75765991210938ms total_cost_time:211.80248260498047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15075 prompt_cache_len:5151 prompt_cache_ratio:0.3416915422885572 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 -DEBUG 06-24 20:41:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:50 [batch.py:51] router release req id 8 -INFO 06-24 20:41:50 [manager.py:224] router recive req id 8 cost time 0.1100773811340332 s -INFO 06-24 20:41:50 [manager.py:68] detokenization recv req id 8 cost time 0.11213278770446777 s -DEBUG 06-24 20:41:50 [manager.py:391] Prefill Batch: batch_id=283066310180235221907807822913137620973, time:1750768910.9623802s req_ids:[8] -DEBUG 06-24 20:41:50 [manager.py:391] -ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:50 lightllm_req_id:8 first_token_cost:176.66149139404297ms total_cost_time:176.72300338745117ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:15076 prompt_cache_len:5151 prompt_cache_ratio:0.341668877686389 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 -DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:51 [batch.py:51] router release req id 8 -INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.10788965225219727 s -INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.10979986190795898 s -DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=249701800865116622744170380505431949963, time:1750768911.1459336s req_ids:[8] -DEBUG 06-24 20:41:51 [manager.py:391] -ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:383.7168216705322ms total_cost_time:383.76426696777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15077 prompt_cache_len:5151 prompt_cache_ratio:0.34164621609073426 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 -DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:51 [batch.py:51] router release req id 8 -INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.10863828659057617 s -INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.11043524742126465 s -DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=258297022791512060755435841648333940404, time:1750768911.534111s req_ids:[8] -DEBUG 06-24 20:41:51 [manager.py:391] -ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:172.806978225708ms total_cost_time:172.84893989562988ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15078 prompt_cache_len:5151 prompt_cache_ratio:0.3416235575009948 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 -DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:51 [batch.py:51] router release req id 8 -INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.1094825267791748 s -INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.11121201515197754 s -DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=2183851551898486920158933134249960023, time:1750768911.7156236s req_ids:[8] -DEBUG 06-24 20:41:51 [manager.py:391] -ERROR 06-24 20:41:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:210.1306915283203ms total_cost_time:210.15644073486328ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:15079 prompt_cache_len:5151 prompt_cache_ratio:0.3416009019165727 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 -DEBUG 06-24 20:41:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:51 [batch.py:51] router release req id 8 -INFO 06-24 20:41:51 [manager.py:224] router recive req id 8 cost time 0.10787439346313477 s -INFO 06-24 20:41:51 [manager.py:68] detokenization recv req id 8 cost time 0.10962581634521484 s -DEBUG 06-24 20:41:51 [manager.py:391] Prefill Batch: batch_id=69774270831748128100184585093067372450, time:1750768911.9326503s req_ids:[8] -DEBUG 06-24 20:41:51 [manager.py:391] -ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:51 lightllm_req_id:8 first_token_cost:217.0889377593994ms total_cost_time:217.1323299407959ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15080 prompt_cache_len:5151 prompt_cache_ratio:0.34157824933687003 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 -DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:52 [batch.py:51] router release req id 8 -INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10929417610168457 s -INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.11125898361206055 s -DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=215545012090180538169451771818967844747, time:1750768912.1555722s req_ids:[8] -DEBUG 06-24 20:41:52 [manager.py:391] -ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:219.51699256896973ms total_cost_time:219.5594310760498ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15081 prompt_cache_len:5151 prompt_cache_ratio:0.341555599761289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 -DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:52 [batch.py:51] router release req id 8 -INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10802912712097168 s -INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.1097099781036377 s -DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=28506148198625333011481927810087402592, time:1750768912.3796394s req_ids:[8] -DEBUG 06-24 20:41:52 [manager.py:391] -ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:172.89376258850098ms total_cost_time:172.93405532836914ms,out_token_counter:1 mean_per_token_cost_time: 0.04029273986816406ms prompt_token_num:15082 prompt_cache_len:5151 prompt_cache_ratio:0.3415329531892322 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 -DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:52 [batch.py:51] router release req id 8 -INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10737276077270508 s -INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.10890555381774902 s -DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=181571877202807345102380543676494716370, time:1750768912.559716s req_ids:[8] -DEBUG 06-24 20:41:52 [manager.py:391] -ERROR 06-24 20:41:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:343.7011241912842ms total_cost_time:343.74499320983887ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15083 prompt_cache_len:5151 prompt_cache_ratio:0.3415103096201021 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 -DEBUG 06-24 20:41:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:52 [batch.py:51] router release req id 8 -INFO 06-24 20:41:52 [manager.py:224] router recive req id 8 cost time 0.10851836204528809 s -INFO 06-24 20:41:52 [manager.py:68] detokenization recv req id 8 cost time 0.11062788963317871 s -DEBUG 06-24 20:41:52 [manager.py:391] Prefill Batch: batch_id=45970912719117339612302879272312477101, time:1750768912.9097073s req_ids:[8] -DEBUG 06-24 20:41:52 [manager.py:391] -ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:52 lightllm_req_id:8 first_token_cost:208.404541015625ms total_cost_time:208.44721794128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15084 prompt_cache_len:5151 prompt_cache_ratio:0.3414876690533015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 -DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:53 [batch.py:51] router release req id 8 -INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.10874509811401367 s -INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.11069011688232422 s -DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=21841646188685403684557818106693023862, time:1750768913.1240482s req_ids:[8] -DEBUG 06-24 20:41:53 [manager.py:391] -ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:174.47376251220703ms total_cost_time:174.51715469360352ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15085 prompt_cache_len:5151 prompt_cache_ratio:0.3414650314882333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 -DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:53 [batch.py:51] router release req id 8 -INFO 06-24 20:41:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:41:53 [statics_utils.py:24] mean first cost: 232.79526009011153 ms -INFO 06-24 20:41:53 [statics_utils.py:24] mean per token cost: 0.057237066090271876 ms -INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.10676407814025879 s -INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.10798239707946777 s -DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=151189781837006036891553911545168133127, time:1750768913.304848s req_ids:[8] -DEBUG 06-24 20:41:53 [manager.py:391] -ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:212.13412284851074ms total_cost_time:212.15415000915527ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:15086 prompt_cache_len:5151 prompt_cache_ratio:0.3414423969243007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 -DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:53 [batch.py:51] router release req id 8 -INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.1082007884979248 s -INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.11020421981811523 s -DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=318779123571854947444627469951396773643, time:1750768913.522357s req_ids:[8] -DEBUG 06-24 20:41:53 [manager.py:391] -DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:209.94091033935547ms total_cost_time:209.98358726501465ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15087 prompt_cache_len:5151 prompt_cache_ratio:0.34141976536090674 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 -DEBUG 06-24 20:41:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:53 [batch.py:51] router release req id 8 -INFO 06-24 20:41:53 [manager.py:224] router recive req id 8 cost time 0.10874629020690918 s -INFO 06-24 20:41:53 [manager.py:68] detokenization recv req id 8 cost time 0.110870361328125 s -DEBUG 06-24 20:41:53 [manager.py:391] Prefill Batch: batch_id=298482292756897104505684097350522172629, time:1750768913.74107s req_ids:[8] -DEBUG 06-24 20:41:53 [manager.py:391] -ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:53 lightllm_req_id:8 first_token_cost:379.79793548583984ms total_cost_time:379.84204292297363ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15088 prompt_cache_len:5151 prompt_cache_ratio:0.3413971367974549 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 -DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:54 [batch.py:51] router release req id 8 -INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10917282104492188 s -INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.11033749580383301 s -DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=152348007431906108863543912305695915814, time:1750768914.127895s req_ids:[8] -DEBUG 06-24 20:41:54 [manager.py:391] -ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:204.23555374145508ms total_cost_time:204.27894592285156ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15089 prompt_cache_len:5151 prompt_cache_ratio:0.3413745112333488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 -DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:54 [batch.py:51] router release req id 8 -INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10850930213928223 s -INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.1105494499206543 s -DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=72234143059623363816169244082614546560, time:1750768914.338672s req_ids:[8] -DEBUG 06-24 20:41:54 [manager.py:391] -ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:213.31381797790527ms total_cost_time:213.35721015930176ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15090 prompt_cache_len:5151 prompt_cache_ratio:0.34135188866799204 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 -DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:54 [batch.py:51] router release req id 8 -INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10905218124389648 s -INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.1110537052154541 s -DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=41758195201830778076746444762616542678, time:1750768914.5580869s req_ids:[8] -DEBUG 06-24 20:41:54 [manager.py:391] -ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:213.81735801696777ms total_cost_time:213.86075019836426ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15091 prompt_cache_len:5151 prompt_cache_ratio:0.34132926910078853 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 -DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:54 [batch.py:51] router release req id 8 -INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.10814189910888672 s -INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.10978412628173828 s -DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=106354437660802350200787674333870438646, time:1750768914.777274s req_ids:[8] -DEBUG 06-24 20:41:54 [manager.py:391] -ERROR 06-24 20:41:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:173.77638816833496ms total_cost_time:173.81882667541504ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15092 prompt_cache_len:5151 prompt_cache_ratio:0.34130665253114234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 -DEBUG 06-24 20:41:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:54 [batch.py:51] router release req id 8 -INFO 06-24 20:41:54 [manager.py:224] router recive req id 8 cost time 0.1071019172668457 s -INFO 06-24 20:41:54 [manager.py:68] detokenization recv req id 8 cost time 0.10902285575866699 s -DEBUG 06-24 20:41:54 [manager.py:391] Prefill Batch: batch_id=163271021314456153848346391459503465796, time:1750768914.97318s req_ids:[8] -DEBUG 06-24 20:41:54 [manager.py:391] -ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:54 lightllm_req_id:8 first_token_cost:219.13743019104004ms total_cost_time:219.18082237243652ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15093 prompt_cache_len:5151 prompt_cache_ratio:0.34128403895845755 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 -DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:55 [batch.py:51] router release req id 8 -INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.10882329940795898 s -INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.11159253120422363 s -DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=112914082212821095783330252662377630594, time:1750768915.1838322s req_ids:[8] -DEBUG 06-24 20:41:55 [manager.py:391] -ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:412.6605987548828ms total_cost_time:412.7049446105957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15094 prompt_cache_len:5151 prompt_cache_ratio:0.3412614283821386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 -DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:55 [batch.py:51] router release req id 8 -INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s -INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.1103217601776123 s -DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=289889264986668363313209391944932275600, time:1750768915.6031835s req_ids:[8] -DEBUG 06-24 20:41:55 [manager.py:391] -ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:175.10056495666504ms total_cost_time:175.12154579162598ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:15095 prompt_cache_len:5151 prompt_cache_ratio:0.3412388208015899 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 -DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:55 [batch.py:51] router release req id 8 -INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.10757613182067871 s -INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.10913252830505371 s -DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=51156618889855294454517010463924585283, time:1750768915.7859282s req_ids:[8] -DEBUG 06-24 20:41:55 [manager.py:391] -ERROR 06-24 20:41:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:175.44960975646973ms total_cost_time:175.49419403076172ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15096 prompt_cache_len:5151 prompt_cache_ratio:0.34121621621621623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 -DEBUG 06-24 20:41:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:55 [batch.py:51] router release req id 8 -INFO 06-24 20:41:55 [manager.py:224] router recive req id 8 cost time 0.10823392868041992 s -INFO 06-24 20:41:55 [manager.py:68] detokenization recv req id 8 cost time 0.11012482643127441 s -DEBUG 06-24 20:41:55 [manager.py:391] Prefill Batch: batch_id=59115779834676107309353225364594538019, time:1750768915.9669774s req_ids:[8] -DEBUG 06-24 20:41:55 [manager.py:391] -ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:55 lightllm_req_id:8 first_token_cost:208.8449001312256ms total_cost_time:208.8909149169922ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15097 prompt_cache_len:5151 prompt_cache_ratio:0.34119361462542225 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 -DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:56 [batch.py:51] router release req id 8 -INFO 06-24 20:41:56 [manager.py:224] router recive req id 8 cost time 0.1086878776550293 s -INFO 06-24 20:41:56 [manager.py:68] detokenization recv req id 8 cost time 0.11002779006958008 s -DEBUG 06-24 20:41:56 [manager.py:391] Prefill Batch: batch_id=14953796354546814740006014941513496716, time:1750768916.1813307s req_ids:[8] -DEBUG 06-24 20:41:56 [manager.py:391] -ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:233.26992988586426ms total_cost_time:233.31403732299805ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15098 prompt_cache_len:5151 prompt_cache_ratio:0.34117101602861305 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 -DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:56 [batch.py:51] router release req id 8 -INFO 06-24 20:41:56 [manager.py:224] router recive req id 8 cost time 0.10784149169921875 s -INFO 06-24 20:41:56 [manager.py:68] detokenization recv req id 8 cost time 0.10907888412475586 s -DEBUG 06-24 20:41:56 [manager.py:391] Prefill Batch: batch_id=260226142424154188946531448959269391513, time:1750768916.4308105s req_ids:[8] -DEBUG 06-24 20:41:56 [manager.py:391] -ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:384.4788074493408ms total_cost_time:384.5231533050537ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15099 prompt_cache_len:5151 prompt_cache_ratio:0.3411484204251937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 -DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:56 [batch.py:51] router release req id 8 -INFO 06-24 20:41:56 [manager.py:224] router recive req id 8 cost time 0.10956287384033203 s -INFO 06-24 20:41:56 [manager.py:68] detokenization recv req id 8 cost time 0.11157536506652832 s -DEBUG 06-24 20:41:56 [manager.py:391] Prefill Batch: batch_id=256805047913416277488057881128160942168, time:1750768916.812071s req_ids:[8] -DEBUG 06-24 20:41:56 [manager.py:391] -ERROR 06-24 20:41:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:216.28880500793457ms total_cost_time:216.33291244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15100 prompt_cache_len:5151 prompt_cache_ratio:0.3411258278145695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 -DEBUG 06-24 20:41:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:56 [batch.py:51] router release req id 8 -INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.10841679573059082 s -INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.11043667793273926 s -DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=184080922360411198335547994297255947972, time:1750768917.03691s req_ids:[8] -DEBUG 06-24 20:41:57 [manager.py:391] -ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:56 lightllm_req_id:8 first_token_cost:219.9394702911377ms total_cost_time:219.98310089111328ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15101 prompt_cache_len:5151 prompt_cache_ratio:0.34110323819614596 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 -DEBUG 06-24 20:41:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:57 [batch.py:51] router release req id 8 -INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.10941600799560547 s -INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.11138153076171875 s -DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=134052431938879338581717528815211457945, time:1750768917.2632842s req_ids:[8] -DEBUG 06-24 20:41:57 [manager.py:391] -ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:215.58690071105957ms total_cost_time:215.63053131103516ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15102 prompt_cache_len:5151 prompt_cache_ratio:0.34108065156932854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 -DEBUG 06-24 20:41:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:57 [batch.py:51] router release req id 8 -INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.10909485816955566 s -INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.11106586456298828 s -DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=182273116362900452627579989927537463669, time:1750768917.484107s req_ids:[8] -DEBUG 06-24 20:41:57 [manager.py:391] -ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:216.2339687347412ms total_cost_time:216.292142868042ms,out_token_counter:1 mean_per_token_cost_time: 0.05817413330078125ms prompt_token_num:15103 prompt_cache_len:5151 prompt_cache_ratio:0.34105806793352317 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 -DEBUG 06-24 20:41:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:57 [batch.py:51] router release req id 8 -INFO 06-24 20:41:57 [manager.py:224] router recive req id 8 cost time 0.1091310977935791 s -INFO 06-24 20:41:57 [manager.py:68] detokenization recv req id 8 cost time 0.1109170913696289 s -DEBUG 06-24 20:41:57 [manager.py:391] Prefill Batch: batch_id=144322164691599546479468913602393519184, time:1750768917.7046149s req_ids:[8] -DEBUG 06-24 20:41:57 [manager.py:391] -ERROR 06-24 20:41:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:383.1789493560791ms total_cost_time:383.2218647003174ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15104 prompt_cache_len:5151 prompt_cache_ratio:0.3410354872881356 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 -DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:58 [batch.py:51] router release req id 8 -INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10949039459228516 s -INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.1112978458404541 s -DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=328759578369771033784862834271249146551, time:1750768918.0942457s req_ids:[8] -DEBUG 06-24 20:41:58 [manager.py:391] -ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:57 lightllm_req_id:8 first_token_cost:216.5665626525879ms total_cost_time:216.60852432250977ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15105 prompt_cache_len:5151 prompt_cache_ratio:0.34101290963257197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 -DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:58 [batch.py:51] router release req id 8 -INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10967087745666504 s -INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.11164188385009766 s -DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=84646863349846286003089801263035732345, time:1750768918.3149915s req_ids:[8] -DEBUG 06-24 20:41:58 [manager.py:391] -ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:210.11018753051758ms total_cost_time:210.15477180480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15106 prompt_cache_len:5151 prompt_cache_ratio:0.3409903349662386 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 -DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:58 [batch.py:51] router release req id 8 -INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10940432548522949 s -INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.1114039421081543 s -DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=151880485984966726155694783717332178651, time:1750768918.5327442s req_ids:[8] -DEBUG 06-24 20:41:58 [manager.py:391] -ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:215.93093872070312ms total_cost_time:215.9733772277832ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15107 prompt_cache_len:5151 prompt_cache_ratio:0.3409677632885417 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 -DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:58 [batch.py:51] router release req id 8 -INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10916733741760254 s -INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.11111998558044434 s -DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=18710355243211274935617998411004518276, time:1750768918.754063s req_ids:[8] -DEBUG 06-24 20:41:58 [manager.py:391] -ERROR 06-24 20:41:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:217.9553508758545ms total_cost_time:217.99898147583008ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15108 prompt_cache_len:5151 prompt_cache_ratio:0.340945194598888 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 -DEBUG 06-24 20:41:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:58 [batch.py:51] router release req id 8 -INFO 06-24 20:41:58 [manager.py:224] router recive req id 8 cost time 0.10952425003051758 s -INFO 06-24 20:41:58 [manager.py:68] detokenization recv req id 8 cost time 0.11126160621643066 s -DEBUG 06-24 20:41:58 [manager.py:391] Prefill Batch: batch_id=41001186266519349659632480122461850623, time:1750768918.979691s req_ids:[8] -DEBUG 06-24 20:41:58 [manager.py:391] -ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:58 lightllm_req_id:8 first_token_cost:176.57470703125ms total_cost_time:176.6207218170166ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15109 prompt_cache_len:5151 prompt_cache_ratio:0.3409226288966841 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 -DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:59 [batch.py:51] router release req id 8 -INFO 06-24 20:41:59 [manager.py:224] router recive req id 8 cost time 0.10947895050048828 s -INFO 06-24 20:41:59 [manager.py:68] detokenization recv req id 8 cost time 0.1114969253540039 s -DEBUG 06-24 20:41:59 [manager.py:391] Prefill Batch: batch_id=135907808961695677569412970812330166470, time:1750768919.162218s req_ids:[8] -DEBUG 06-24 20:41:59 [manager.py:391] -ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:384.69672203063965ms total_cost_time:384.74154472351074ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15110 prompt_cache_len:5151 prompt_cache_ratio:0.34090006618133684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 -DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:59 [batch.py:51] router release req id 8 -INFO 06-24 20:41:59 [manager.py:224] router recive req id 8 cost time 0.10837173461914062 s -INFO 06-24 20:41:59 [manager.py:68] detokenization recv req id 8 cost time 0.1103212833404541 s -DEBUG 06-24 20:41:59 [manager.py:391] Prefill Batch: batch_id=61567501262750498214145369267629152541, time:1750768919.554969s req_ids:[8] -DEBUG 06-24 20:41:59 [manager.py:391] -ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:216.82500839233398ms total_cost_time:216.87626838684082ms,out_token_counter:1 mean_per_token_cost_time: 0.05125999450683594ms prompt_token_num:15111 prompt_cache_len:5151 prompt_cache_ratio:0.34087750645225334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 -DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:59 [batch.py:51] router release req id 8 -INFO 06-24 20:41:59 [manager.py:224] router recive req id 8 cost time 0.10970854759216309 s -INFO 06-24 20:41:59 [manager.py:68] detokenization recv req id 8 cost time 0.11183714866638184 s -DEBUG 06-24 20:41:59 [manager.py:391] Prefill Batch: batch_id=134668308301368890131021354029335366204, time:1750768919.7796903s req_ids:[8] -DEBUG 06-24 20:41:59 [manager.py:391] -DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:41:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:41:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:41:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:41:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:41:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:234.53927040100098ms total_cost_time:234.58528518676758ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15112 prompt_cache_len:5151 prompt_cache_ratio:0.34085494970884067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:41:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 -DEBUG 06-24 20:41:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:41:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:41:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:41:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:41:59 [batch.py:51] router release req id 8 -INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.10832834243774414 s -INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.1104893684387207 s -DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=47713204662063076854880932830691400815, time:1750768920.027444s req_ids:[8] -DEBUG 06-24 20:42:00 [manager.py:391] -ERROR 06-24 20:42:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:41:59 lightllm_req_id:8 first_token_cost:221.17090225219727ms total_cost_time:221.23980522155762ms,out_token_counter:1 mean_per_token_cost_time: 0.06890296936035156ms prompt_token_num:15113 prompt_cache_len:5151 prompt_cache_ratio:0.3408323959505062 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 -DEBUG 06-24 20:42:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:00 [batch.py:51] router release req id 8 -INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.10856366157531738 s -INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.1106421947479248 s -DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=320860260490044786099541792835853312356, time:1750768920.2503104s req_ids:[8] -DEBUG 06-24 20:42:00 [manager.py:391] -ERROR 06-24 20:42:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 first_token_cost:249.50003623962402ms total_cost_time:249.54628944396973ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15114 prompt_cache_len:5151 prompt_cache_ratio:0.3408098451766574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 -DEBUG 06-24 20:42:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:00 [batch.py:51] router release req id 8 -INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.10890436172485352 s -INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.11071324348449707 s -DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=251201076994834275782970455603988051977, time:1750768920.52111s req_ids:[8] -DEBUG 06-24 20:42:00 [manager.py:391] -DEBUG 06-24 20:42:00 [stats.py:37] Avg tokens(prompt+generate) throughput: 61890.440 tokens/s -DEBUG 06-24 20:42:00 [stats.py:37] Avg prompt tokens throughput: 61882.241 tokens/s -DEBUG 06-24 20:42:00 [stats.py:37] Avg generate tokens throughput: 8.199 tokens/s -ERROR 06-24 20:42:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 first_token_cost:223.84309768676758ms total_cost_time:223.92702102661133ms,out_token_counter:1 mean_per_token_cost_time: 0.08392333984375ms prompt_token_num:15115 prompt_cache_len:5151 prompt_cache_ratio:0.34078729738670194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 -DEBUG 06-24 20:42:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:00 [batch.py:51] router release req id 8 -INFO 06-24 20:42:00 [manager.py:224] router recive req id 8 cost time 0.3114936351776123 s -INFO 06-24 20:42:00 [manager.py:68] detokenization recv req id 8 cost time 0.31391263008117676 s -DEBUG 06-24 20:42:00 [manager.py:391] Prefill Batch: batch_id=142770800511558748084894835519988784642, time:1750768920.9400666s req_ids:[8] -DEBUG 06-24 20:42:00 [manager.py:391] -ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:00 lightllm_req_id:8 first_token_cost:446.5157985687256ms total_cost_time:446.57349586486816ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15116 prompt_cache_len:5151 prompt_cache_ratio:0.3407647525800476 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 -DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:01 [batch.py:51] router release req id 8 -INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10781216621398926 s -INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.10976910591125488 s -DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=292716616776109716335042459139377068333, time:1750768921.1976094s req_ids:[8] -DEBUG 06-24 20:42:01 [manager.py:391] -ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:237.7016544342041ms total_cost_time:237.7619743347168ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15117 prompt_cache_len:5151 prompt_cache_ratio:0.3407422107561024 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 -DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:01 [batch.py:51] router release req id 8 -INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10908913612365723 s -INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.11115217208862305 s -DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=292752961160642883329417984701403430296, time:1750768921.4409993s req_ids:[8] -DEBUG 06-24 20:42:01 [manager.py:391] -DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:01 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:227.1733283996582ms total_cost_time:227.2329330444336ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:15118 prompt_cache_len:5151 prompt_cache_ratio:0.3407196719142744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 -DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:01 [batch.py:51] router release req id 8 -INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10768461227416992 s -INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.1094975471496582 s -DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=23728277255687012303600469779707287489, time:1750768921.6701703s req_ids:[8] -DEBUG 06-24 20:42:01 [manager.py:391] -ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:185.14776229858398ms total_cost_time:185.19306182861328ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15119 prompt_cache_len:5151 prompt_cache_ratio:0.34069713605397184 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 -DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:01 [batch.py:51] router release req id 8 -INFO 06-24 20:42:01 [manager.py:224] router recive req id 8 cost time 0.10853767395019531 s -INFO 06-24 20:42:01 [manager.py:68] detokenization recv req id 8 cost time 0.11011219024658203 s -DEBUG 06-24 20:42:01 [manager.py:391] Prefill Batch: batch_id=332528922802634058728495723105243719466, time:1750768921.8705637s req_ids:[8] -DEBUG 06-24 20:42:01 [manager.py:391] -ERROR 06-24 20:42:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:194.3376064300537ms total_cost_time:194.39959526062012ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15120 prompt_cache_len:5151 prompt_cache_ratio:0.3406746031746032 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 -DEBUG 06-24 20:42:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:01 [batch.py:51] router release req id 8 -INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10902142524719238 s -INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11117792129516602 s -DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=129464832289572141500767201707391086603, time:1750768922.0700247s req_ids:[8] -DEBUG 06-24 20:42:02 [manager.py:391] -ERROR 06-24 20:42:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:01 lightllm_req_id:8 first_token_cost:405.92408180236816ms total_cost_time:405.9772491455078ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:15121 prompt_cache_len:5151 prompt_cache_ratio:0.340652073275577 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 -DEBUG 06-24 20:42:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:02 [batch.py:51] router release req id 8 -INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10884380340576172 s -INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11147713661193848 s -DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=191854365426463251610690205097331085982, time:1750768922.4774551s req_ids:[8] -DEBUG 06-24 20:42:02 [manager.py:391] -ERROR 06-24 20:42:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 first_token_cost:232.93733596801758ms total_cost_time:232.99860954284668ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15122 prompt_cache_len:5151 prompt_cache_ratio:0.34062954635630205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 -DEBUG 06-24 20:42:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:02 [batch.py:51] router release req id 8 -INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10943412780761719 s -INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11169576644897461 s -DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=236025743619789428316483758715654471847, time:1750768922.722321s req_ids:[8] -DEBUG 06-24 20:42:02 [manager.py:391] -ERROR 06-24 20:42:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 first_token_cost:234.53807830810547ms total_cost_time:234.59792137145996ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:15123 prompt_cache_len:5151 prompt_cache_ratio:0.3406070224161873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 -DEBUG 06-24 20:42:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:02 [batch.py:51] router release req id 8 -INFO 06-24 20:42:02 [manager.py:224] router recive req id 8 cost time 0.10884308815002441 s -INFO 06-24 20:42:02 [manager.py:68] detokenization recv req id 8 cost time 0.11088109016418457 s -DEBUG 06-24 20:42:02 [manager.py:391] Prefill Batch: batch_id=158442976878761652122875386252503453636, time:1750768922.9648066s req_ids:[8] -DEBUG 06-24 20:42:02 [manager.py:391] -ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:02 lightllm_req_id:8 first_token_cost:236.03343963623047ms total_cost_time:236.09113693237305ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15124 prompt_cache_len:5151 prompt_cache_ratio:0.34058450145464164 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 -DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:03 [batch.py:51] router release req id 8 -INFO 06-24 20:42:03 [manager.py:224] router recive req id 8 cost time 0.10809063911437988 s -INFO 06-24 20:42:03 [manager.py:68] detokenization recv req id 8 cost time 0.11016058921813965 s -DEBUG 06-24 20:42:03 [manager.py:391] Prefill Batch: batch_id=260975344026449079813912402695700404977, time:1750768923.2087054s req_ids:[8] -DEBUG 06-24 20:42:03 [manager.py:391] -ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:220.14141082763672ms total_cost_time:220.20196914672852ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:15125 prompt_cache_len:5151 prompt_cache_ratio:0.34056198347107436 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 -DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:03 [batch.py:51] router release req id 8 -INFO 06-24 20:42:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:03 [manager.py:224] router recive req id 8 cost time 0.10964751243591309 s -INFO 06-24 20:42:03 [manager.py:68] detokenization recv req id 8 cost time 0.11169838905334473 s -DEBUG 06-24 20:42:03 [manager.py:391] Prefill Batch: batch_id=56328092031856350232001204283227619558, time:1750768923.4298987s req_ids:[8] -DEBUG 06-24 20:42:03 [manager.py:391] -ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:230.26490211486816ms total_cost_time:230.30996322631836ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15126 prompt_cache_len:5151 prompt_cache_ratio:0.34053946846489486 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 -DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:03 [batch.py:51] router release req id 8 -INFO 06-24 20:42:03 [manager.py:224] router recive req id 8 cost time 0.10875535011291504 s -INFO 06-24 20:42:03 [manager.py:68] detokenization recv req id 8 cost time 0.11017560958862305 s -DEBUG 06-24 20:42:03 [manager.py:391] Prefill Batch: batch_id=242819618091853126138592557824733027511, time:1750768923.6814861s req_ids:[8] -DEBUG 06-24 20:42:03 [manager.py:391] -ERROR 06-24 20:42:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:362.987756729126ms total_cost_time:363.0409240722656ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:15127 prompt_cache_len:5151 prompt_cache_ratio:0.34051695643551266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 -DEBUG 06-24 20:42:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:03 [batch.py:51] router release req id 8 -INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10764312744140625 s -INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.10963201522827148 s -DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=119648962098155508205452085484944690819, time:1750768924.0371702s req_ids:[8] -DEBUG 06-24 20:42:04 [manager.py:391] -ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:03 lightllm_req_id:8 first_token_cost:209.49387550354004ms total_cost_time:209.53941345214844ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15128 prompt_cache_len:5151 prompt_cache_ratio:0.34049444738233736 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 -DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:04 [batch.py:51] router release req id 8 -INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10999608039855957 s -INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.11194300651550293 s -DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=151203195932878952939661329562518074308, time:1750768924.2661686s req_ids:[8] -DEBUG 06-24 20:42:04 [manager.py:391] -ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:230.59725761413574ms total_cost_time:230.65757751464844ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15129 prompt_cache_len:5151 prompt_cache_ratio:0.3404719413047789 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 -DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:04 [batch.py:51] router release req id 8 -INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10804295539855957 s -INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.10997271537780762 s -DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=245520470666247478266902375212474005024, time:1750768924.4923224s req_ids:[8] -DEBUG 06-24 20:42:04 [manager.py:391] -ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:217.29779243469238ms total_cost_time:217.35882759094238ms,out_token_counter:1 mean_per_token_cost_time: 0.06103515625ms prompt_token_num:15130 prompt_cache_len:5151 prompt_cache_ratio:0.3404494382022472 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 -DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:04 [batch.py:51] router release req id 8 -INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10970568656921387 s -INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.11175394058227539 s -DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=34621216213874200289464525700555307880, time:1750768924.7163055s req_ids:[8] -DEBUG 06-24 20:42:04 [manager.py:391] -ERROR 06-24 20:42:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:211.95578575134277ms total_cost_time:212.01491355895996ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15131 prompt_cache_len:5151 prompt_cache_ratio:0.3404269380741524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 -DEBUG 06-24 20:42:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:04 [batch.py:51] router release req id 8 -INFO 06-24 20:42:04 [manager.py:224] router recive req id 8 cost time 0.10884928703308105 s -INFO 06-24 20:42:04 [manager.py:68] detokenization recv req id 8 cost time 0.11084413528442383 s -DEBUG 06-24 20:42:04 [manager.py:391] Prefill Batch: batch_id=174222114439524139736014604387299845718, time:1750768924.9352312s req_ids:[8] -DEBUG 06-24 20:42:04 [manager.py:391] -ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:04 lightllm_req_id:8 first_token_cost:383.0540180206299ms total_cost_time:383.1160068511963ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15132 prompt_cache_len:5151 prompt_cache_ratio:0.34040444091990485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 -DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:05 [batch.py:51] router release req id 8 -INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.1089780330657959 s -INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.11071014404296875 s -DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=293943600145546403510529913689509359480, time:1750768925.3274732s req_ids:[8] -DEBUG 06-24 20:42:05 [manager.py:391] -ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:217.9248332977295ms total_cost_time:217.9715633392334ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15133 prompt_cache_len:5151 prompt_cache_ratio:0.340381946738915 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 -DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:05 [batch.py:51] router release req id 8 -INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.10852479934692383 s -INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.11061263084411621 s -DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=122567621281190014287128082149545896538, time:1750768925.5507078s req_ids:[8] -DEBUG 06-24 20:42:05 [manager.py:391] -ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:218.98531913757324ms total_cost_time:219.04659271240234ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15134 prompt_cache_len:5151 prompt_cache_ratio:0.3403594555305934 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 -DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:05 [batch.py:51] router release req id 8 -INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.10811591148376465 s -INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.11012697219848633 s -DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=181716967487952626663313275643190550642, time:1750768925.7759686s req_ids:[8] -DEBUG 06-24 20:42:05 [manager.py:391] -ERROR 06-24 20:42:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:210.6630802154541ms total_cost_time:210.7076644897461ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15135 prompt_cache_len:5151 prompt_cache_ratio:0.34033696729435087 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 -DEBUG 06-24 20:42:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:05 [batch.py:51] router release req id 8 -INFO 06-24 20:42:05 [manager.py:224] router recive req id 8 cost time 0.10898113250732422 s -INFO 06-24 20:42:05 [manager.py:68] detokenization recv req id 8 cost time 0.1109459400177002 s -DEBUG 06-24 20:42:05 [manager.py:391] Prefill Batch: batch_id=329770381711218239256854856042062636515, time:1750768925.9945657s req_ids:[8] -DEBUG 06-24 20:42:05 [manager.py:391] -ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:05 lightllm_req_id:8 first_token_cost:215.5628204345703ms total_cost_time:215.620756149292ms,out_token_counter:1 mean_per_token_cost_time: 0.05793571472167969ms prompt_token_num:15136 prompt_cache_len:5151 prompt_cache_ratio:0.3403144820295983 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 -DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:06 [batch.py:51] router release req id 8 -INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10923385620117188 s -INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11123871803283691 s -DEBUG 06-24 20:42:06 [manager.py:391] Prefill Batch: batch_id=105718388953191655217562251393093617301, time:1750768926.217449s req_ids:[8] -DEBUG 06-24 20:42:06 [manager.py:391] -ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:217.71669387817383ms total_cost_time:217.77629852294922ms,out_token_counter:1 mean_per_token_cost_time: 0.059604644775390625ms prompt_token_num:15137 prompt_cache_len:5151 prompt_cache_ratio:0.34029199973574686 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 -DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:06 [batch.py:51] router release req id 8 -INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10824847221374512 s -INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11018109321594238 s -DEBUG 06-24 20:42:06 [manager.py:391] Prefill Batch: batch_id=213705483799439245614474845534458892873, time:1750768926.439531s req_ids:[8] -DEBUG 06-24 20:42:06 [manager.py:391] -ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:371.18053436279297ms total_cost_time:371.2432384490967ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:15138 prompt_cache_len:5151 prompt_cache_ratio:0.3402695204122077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 -DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:06 [batch.py:51] router release req id 8 -INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10851550102233887 s -INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11023736000061035 s -DEBUG 06-24 20:42:06 [manager.py:391] Prefill Batch: batch_id=106371600486117104738433619347586511830, time:1750768926.8193789s req_ids:[8] -DEBUG 06-24 20:42:06 [manager.py:391] -ERROR 06-24 20:42:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:172.07813262939453ms total_cost_time:172.13034629821777ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:15139 prompt_cache_len:5151 prompt_cache_ratio:0.34024704405839223 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 -DEBUG 06-24 20:42:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:06 [batch.py:51] router release req id 8 -INFO 06-24 20:42:06 [manager.py:224] router recive req id 8 cost time 0.10879158973693848 s -INFO 06-24 20:42:06 [manager.py:68] detokenization recv req id 8 cost time 0.11054086685180664 s -DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=253727504005699471469345535585371517078, time:1750768927.0016215s req_ids:[8] -DEBUG 06-24 20:42:07 [manager.py:391] -ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:06 lightllm_req_id:8 first_token_cost:174.9410629272461ms total_cost_time:174.98445510864258ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15140 prompt_cache_len:5151 prompt_cache_ratio:0.34022457067371203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 -DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:07 [batch.py:51] router release req id 8 -INFO 06-24 20:42:07 [manager.py:224] router recive req id 8 cost time 0.10932111740112305 s -INFO 06-24 20:42:07 [manager.py:68] detokenization recv req id 8 cost time 0.11110258102416992 s -DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=90591396344182833445147627550415117224, time:1750768927.1830559s req_ids:[8] -DEBUG 06-24 20:42:07 [manager.py:391] -ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:213.3197784423828ms total_cost_time:213.364839553833ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15141 prompt_cache_len:5151 prompt_cache_ratio:0.34020210025757874 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 -DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:07 [batch.py:51] router release req id 8 -INFO 06-24 20:42:07 [manager.py:224] router recive req id 8 cost time 0.10909295082092285 s -INFO 06-24 20:42:07 [manager.py:68] detokenization recv req id 8 cost time 0.11114740371704102 s -DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=44516360052309234812207522500813250719, time:1750768927.4037309s req_ids:[8] -DEBUG 06-24 20:42:07 [manager.py:391] -ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:221.28891944885254ms total_cost_time:221.33231163024902ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15142 prompt_cache_len:5151 prompt_cache_ratio:0.34017963280940433 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 -DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:07 [batch.py:51] router release req id 8 -INFO 06-24 20:42:07 [manager.py:224] router recive req id 8 cost time 0.10893535614013672 s -INFO 06-24 20:42:07 [manager.py:68] detokenization recv req id 8 cost time 0.1106722354888916 s -DEBUG 06-24 20:42:07 [manager.py:391] Prefill Batch: batch_id=310803097578630834209985896133924027191, time:1750768927.6349337s req_ids:[8] -DEBUG 06-24 20:42:07 [manager.py:391] -DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:380.4326057434082ms total_cost_time:380.479097366333ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15143 prompt_cache_len:5151 prompt_cache_ratio:0.34015716832860066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 -DEBUG 06-24 20:42:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:07 [batch.py:51] router release req id 8 -INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.11096787452697754 s -INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.11319112777709961 s -DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=16770693465956860871455644356012166182, time:1750768928.0183327s req_ids:[8] -DEBUG 06-24 20:42:08 [manager.py:391] -ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:07 lightllm_req_id:8 first_token_cost:212.88061141967773ms total_cost_time:212.92424201965332ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15144 prompt_cache_len:5151 prompt_cache_ratio:0.34013470681458 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 -DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:08 [batch.py:51] router release req id 8 -INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.10921430587768555 s -INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.1111292839050293 s -DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=220242871540421039298134463637496962635, time:1750768928.2374442s req_ids:[8] -DEBUG 06-24 20:42:08 [manager.py:391] -ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:216.54343605041504ms total_cost_time:216.58802032470703ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15145 prompt_cache_len:5151 prompt_cache_ratio:0.3401122482667547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 -DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:08 [batch.py:51] router release req id 8 -INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.10704708099365234 s -INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.1088407039642334 s -DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=75320475940050182699501259913804923522, time:1750768928.4599936s req_ids:[8] -DEBUG 06-24 20:42:08 [manager.py:391] -ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:173.4154224395752ms total_cost_time:173.44927787780762ms,out_token_counter:1 mean_per_token_cost_time: 0.033855438232421875ms prompt_token_num:15146 prompt_cache_len:5151 prompt_cache_ratio:0.34008979268453715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 -DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:08 [batch.py:51] router release req id 8 -INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.10980939865112305 s -INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.11166024208068848 s -DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=10151914379784797819032507642563774842, time:1750768928.641206s req_ids:[8] -DEBUG 06-24 20:42:08 [manager.py:391] -ERROR 06-24 20:42:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:211.59887313842773ms total_cost_time:211.64274215698242ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15147 prompt_cache_len:5151 prompt_cache_ratio:0.3400673400673401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 -DEBUG 06-24 20:42:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:08 [batch.py:51] router release req id 8 -INFO 06-24 20:42:08 [manager.py:224] router recive req id 8 cost time 0.11143040657043457 s -INFO 06-24 20:42:08 [manager.py:68] detokenization recv req id 8 cost time 0.11342620849609375 s -DEBUG 06-24 20:42:08 [manager.py:391] Prefill Batch: batch_id=237509338301135155914547641553873250848, time:1750768928.8589869s req_ids:[8] -DEBUG 06-24 20:42:08 [manager.py:391] -ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:08 lightllm_req_id:8 first_token_cost:390.4128074645996ms total_cost_time:390.4588222503662ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15148 prompt_cache_len:5151 prompt_cache_ratio:0.3400448904145762 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 -DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:09 [batch.py:51] router release req id 8 -INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.10909199714660645 s -INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11124444007873535 s -DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=266986863866306553650511161103714025302, time:1750768929.2548144s req_ids:[8] -DEBUG 06-24 20:42:09 [manager.py:391] -ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:216.14670753479004ms total_cost_time:216.18938446044922ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15149 prompt_cache_len:5151 prompt_cache_ratio:0.3400224437256585 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 -DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:09 [batch.py:51] router release req id 8 -INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.11080431938171387 s -INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11290836334228516 s -DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=259719751702741891281946431056453846828, time:1750768929.4812276s req_ids:[8] -DEBUG 06-24 20:42:09 [manager.py:391] -ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:225.59309005737305ms total_cost_time:225.6314754486084ms,out_token_counter:1 mean_per_token_cost_time: 0.03838539123535156ms prompt_token_num:15150 prompt_cache_len:5151 prompt_cache_ratio:0.34 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 -DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:09 [batch.py:51] router release req id 8 -INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.10841202735900879 s -INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11053323745727539 s -DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=162666230470223322202687180923312568516, time:1750768929.7132275s req_ids:[8] -DEBUG 06-24 20:42:09 [manager.py:391] -ERROR 06-24 20:42:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:210.80708503723145ms total_cost_time:210.85858345031738ms,out_token_counter:1 mean_per_token_cost_time: 0.0514984130859375ms prompt_token_num:15151 prompt_cache_len:5151 prompt_cache_ratio:0.33997755923701406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 -DEBUG 06-24 20:42:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:09 [batch.py:51] router release req id 8 -INFO 06-24 20:42:09 [manager.py:224] router recive req id 8 cost time 0.10915303230285645 s -INFO 06-24 20:42:09 [manager.py:68] detokenization recv req id 8 cost time 0.11109614372253418 s -DEBUG 06-24 20:42:09 [manager.py:391] Prefill Batch: batch_id=204379589067036472048125837799511295054, time:1750768929.929747s req_ids:[8] -DEBUG 06-24 20:42:09 [manager.py:391] -ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:09 lightllm_req_id:8 first_token_cost:212.81671524047852ms total_cost_time:212.8775119781494ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15152 prompt_cache_len:5151 prompt_cache_ratio:0.33995512143611406 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 -DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:10 [batch.py:51] router release req id 8 -INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.10874009132385254 s -INFO 06-24 20:42:10 [manager.py:68] detokenization recv req id 8 cost time 0.11068916320800781 s -DEBUG 06-24 20:42:10 [manager.py:391] Prefill Batch: batch_id=162472383881362703335362734243699832234, time:1750768930.1613822s req_ids:[8] -DEBUG 06-24 20:42:10 [manager.py:391] -ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:231.47225379943848ms total_cost_time:231.5351963043213ms,out_token_counter:1 mean_per_token_cost_time: 0.0629425048828125ms prompt_token_num:15153 prompt_cache_len:5151 prompt_cache_ratio:0.3399326865967135 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 -DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:10 [batch.py:51] router release req id 8 -INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.10875296592712402 s -INFO 06-24 20:42:10 [manager.py:68] detokenization recv req id 8 cost time 0.11060166358947754 s -DEBUG 06-24 20:42:10 [manager.py:391] Prefill Batch: batch_id=36687424468851237904626865667461902308, time:1750768930.402315s req_ids:[8] -DEBUG 06-24 20:42:10 [manager.py:391] -ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:42:10 [stats.py:37] Avg tokens(prompt+generate) throughput: 58244.419 tokens/s -DEBUG 06-24 20:42:10 [stats.py:37] Avg prompt tokens throughput: 58236.625 tokens/s -DEBUG 06-24 20:42:10 [stats.py:37] Avg generate tokens throughput: 7.794 tokens/s -INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:383.56494903564453ms total_cost_time:383.6092948913574ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15154 prompt_cache_len:5151 prompt_cache_ratio:0.3399102547182262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 -DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:10 [batch.py:51] router release req id 8 -INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.10806870460510254 s -INFO 06-24 20:42:10 [manager.py:68] detokenization recv req id 8 cost time 0.11001086235046387 s -DEBUG 06-24 20:42:10 [manager.py:391] Prefill Batch: batch_id=116954138388791682524458953858580656279, time:1750768930.7820656s req_ids:[8] -DEBUG 06-24 20:42:10 [manager.py:391] -ERROR 06-24 20:42:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:220.55411338806152ms total_cost_time:220.59869766235352ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15155 prompt_cache_len:5151 prompt_cache_ratio:0.339887825800066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 -DEBUG 06-24 20:42:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:10 [batch.py:51] router release req id 8 -DEBUG 06-24 20:42:10 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:10 [manager.py:283] -DEBUG 06-24 20:42:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:10 [manager.py:284] -INFO 06-24 20:42:10 [manager.py:224] router recive req id 8 cost time 0.1089625358581543 s -INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.11111330986022949 s -DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=217637866310348812204766851318435611166, time:1750768931.00737s req_ids:[8] -DEBUG 06-24 20:42:11 [manager.py:391] -ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:10 lightllm_req_id:8 first_token_cost:219.18988227844238ms total_cost_time:219.24901008605957ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15156 prompt_cache_len:5151 prompt_cache_ratio:0.3398653998416469 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 -DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:11 [batch.py:51] router release req id 8 -INFO 06-24 20:42:11 [manager.py:224] router recive req id 8 cost time 0.10811996459960938 s -INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.1100471019744873 s -DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=88412097004777143828733987993335715672, time:1750768931.2318702s req_ids:[8] -DEBUG 06-24 20:42:11 [manager.py:391] -ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:215.5916690826416ms total_cost_time:215.63458442687988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15157 prompt_cache_len:5151 prompt_cache_ratio:0.33984297684238307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 -DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:11 [batch.py:51] router release req id 8 -INFO 06-24 20:42:11 [manager.py:224] router recive req id 8 cost time 0.10874032974243164 s -INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.11077022552490234 s -DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=308396656811554247816575141514679258621, time:1750768931.4543045s req_ids:[8] -DEBUG 06-24 20:42:11 [manager.py:391] -ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:212.4795913696289ms total_cost_time:212.5232219696045ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15158 prompt_cache_len:5151 prompt_cache_ratio:0.3398205568016889 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 -DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:11 [batch.py:51] router release req id 8 -INFO 06-24 20:42:11 [manager.py:224] router recive req id 8 cost time 0.10859870910644531 s -INFO 06-24 20:42:11 [manager.py:68] detokenization recv req id 8 cost time 0.11052894592285156 s -DEBUG 06-24 20:42:11 [manager.py:391] Prefill Batch: batch_id=163419811437959115575735290248869874884, time:1750768931.6740763s req_ids:[8] -DEBUG 06-24 20:42:11 [manager.py:391] -ERROR 06-24 20:42:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:216.54152870178223ms total_cost_time:216.60423278808594ms,out_token_counter:1 mean_per_token_cost_time: 0.06270408630371094ms prompt_token_num:15159 prompt_cache_len:5151 prompt_cache_ratio:0.3397981397189788 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 -DEBUG 06-24 20:42:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:11 [batch.py:51] router release req id 8 -INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.3139023780822754 s -INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.31595540046691895 s -DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=298875651006723307089745361053395311799, time:1750768932.100516s req_ids:[8] -DEBUG 06-24 20:42:12 [manager.py:391] -ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:11 lightllm_req_id:8 first_token_cost:422.85943031311035ms total_cost_time:422.90329933166504ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15160 prompt_cache_len:5151 prompt_cache_ratio:0.33977572559366753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 -DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:12 [batch.py:51] router release req id 8 -INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10849785804748535 s -INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11026167869567871 s -DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=75458211344852979027204447494708317446, time:1750768932.3290565s req_ids:[8] -DEBUG 06-24 20:42:12 [manager.py:391] -ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:171.73409461975098ms total_cost_time:171.77605628967285ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15161 prompt_cache_len:5151 prompt_cache_ratio:0.3397533144251698 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 -DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:12 [batch.py:51] router release req id 8 -INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10846972465515137 s -INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11050009727478027 s -DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=314514047641390217513180188380208649819, time:1750768932.509847s req_ids:[8] -DEBUG 06-24 20:42:12 [manager.py:391] -ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:215.69108963012695ms total_cost_time:215.73495864868164ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15162 prompt_cache_len:5151 prompt_cache_ratio:0.3397309062129007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 -DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:12 [batch.py:51] router release req id 8 -INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10895943641662598 s -INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11084604263305664 s -DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=12017392277513101675152169549719479217, time:1750768932.7316117s req_ids:[8] -DEBUG 06-24 20:42:12 [manager.py:391] -ERROR 06-24 20:42:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:218.57333183288574ms total_cost_time:218.61934661865234ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15163 prompt_cache_len:5151 prompt_cache_ratio:0.33970850095627514 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 -DEBUG 06-24 20:42:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:12 [batch.py:51] router release req id 8 -INFO 06-24 20:42:12 [manager.py:224] router recive req id 8 cost time 0.10940980911254883 s -INFO 06-24 20:42:12 [manager.py:68] detokenization recv req id 8 cost time 0.11160588264465332 s -DEBUG 06-24 20:42:12 [manager.py:391] Prefill Batch: batch_id=160749533772920837796674148345896475644, time:1750768932.9571192s req_ids:[8] -DEBUG 06-24 20:42:12 [manager.py:391] -ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:12 lightllm_req_id:8 first_token_cost:212.74328231811523ms total_cost_time:212.79120445251465ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:15164 prompt_cache_len:5151 prompt_cache_ratio:0.3396860986547085 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 -DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:13 [batch.py:51] router release req id 8 -INFO 06-24 20:42:13 [manager.py:224] router recive req id 8 cost time 0.10826396942138672 s -INFO 06-24 20:42:13 [manager.py:68] detokenization recv req id 8 cost time 0.11040568351745605 s -DEBUG 06-24 20:42:13 [manager.py:391] Prefill Batch: batch_id=100905699756882877748345440464227284438, time:1750768933.1762204s req_ids:[8] -DEBUG 06-24 20:42:13 [manager.py:391] -ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:387.4964714050293ms total_cost_time:387.54963874816895ms,out_token_counter:1 mean_per_token_cost_time: 0.05316734313964844ms prompt_token_num:15165 prompt_cache_len:5151 prompt_cache_ratio:0.3396636993076162 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 -DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:13 [batch.py:51] router release req id 8 -INFO 06-24 20:42:13 [manager.py:224] router recive req id 8 cost time 0.1082296371459961 s -INFO 06-24 20:42:13 [manager.py:68] detokenization recv req id 8 cost time 0.11031103134155273 s -DEBUG 06-24 20:42:13 [manager.py:391] Prefill Batch: batch_id=130663392160568233189535034101535982721, time:1750768933.5693545s req_ids:[8] -DEBUG 06-24 20:42:13 [manager.py:391] -ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:219.81334686279297ms total_cost_time:219.87366676330566ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15166 prompt_cache_len:5151 prompt_cache_ratio:0.3396413029144138 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 -DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:13 [batch.py:51] router release req id 8 -INFO 06-24 20:42:13 [manager.py:224] router recive req id 8 cost time 0.10912728309631348 s -INFO 06-24 20:42:13 [manager.py:68] detokenization recv req id 8 cost time 0.1112067699432373 s -DEBUG 06-24 20:42:13 [manager.py:391] Prefill Batch: batch_id=114687940023632532315405087634798030527, time:1750768933.8040345s req_ids:[8] -DEBUG 06-24 20:42:13 [manager.py:391] -ERROR 06-24 20:42:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:220.98731994628906ms total_cost_time:221.04740142822266ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:15167 prompt_cache_len:5151 prompt_cache_ratio:0.33961890947451706 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 -DEBUG 06-24 20:42:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:13 [batch.py:51] router release req id 8 -INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.10866451263427734 s -INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11071491241455078 s -DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=211290683999722074312007029203276003648, time:1750768934.0256963s req_ids:[8] -DEBUG 06-24 20:42:14 [manager.py:391] -ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:13 lightllm_req_id:8 first_token_cost:217.06151962280273ms total_cost_time:217.10896492004395ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15168 prompt_cache_len:5151 prompt_cache_ratio:0.3395965189873418 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 -DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:14 [batch.py:51] router release req id 8 -INFO 06-24 20:42:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.11349654197692871 s -DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=37553541000315564764521294644688198063, time:1750768934.2504926s req_ids:[8] -DEBUG 06-24 20:42:14 [manager.py:391] -INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11444735527038574 s -ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:174.85976219177246ms total_cost_time:174.98469352722168ms,out_token_counter:1 mean_per_token_cost_time: 0.12493133544921875ms prompt_token_num:15169 prompt_cache_len:5151 prompt_cache_ratio:0.33957413145230403 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 -DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:14 [batch.py:51] router release req id 8 -INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.10886502265930176 s -INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11073589324951172 s -DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=1096136736441189369241097787671684276, time:1750768934.4331121s req_ids:[8] -DEBUG 06-24 20:42:14 [manager.py:391] -ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:213.43302726745605ms total_cost_time:213.49525451660156ms,out_token_counter:1 mean_per_token_cost_time: 0.06222724914550781ms prompt_token_num:15170 prompt_cache_len:5151 prompt_cache_ratio:0.33955174686882006 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 -DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:14 [batch.py:51] router release req id 8 -INFO 06-24 20:42:14 [manager.py:224] router recive req id 8 cost time 0.10869097709655762 s -INFO 06-24 20:42:14 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s -DEBUG 06-24 20:42:14 [manager.py:391] Prefill Batch: batch_id=10604411426724509131112122600169596658, time:1750768934.6683373s req_ids:[8] -DEBUG 06-24 20:42:14 [manager.py:391] -ERROR 06-24 20:42:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:396.2969779968262ms total_cost_time:396.35205268859863ms,out_token_counter:1 mean_per_token_cost_time: 0.05507469177246094ms prompt_token_num:15171 prompt_cache_len:5151 prompt_cache_ratio:0.3395293652363061 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 -DEBUG 06-24 20:42:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:14 [batch.py:51] router release req id 8 -INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10937976837158203 s -INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11175155639648438 s -DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=308278672626882888112016134426806295630, time:1750768935.0715678s req_ids:[8] -DEBUG 06-24 20:42:15 [manager.py:391] -ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:14 lightllm_req_id:8 first_token_cost:231.5058708190918ms total_cost_time:231.5652370452881ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15172 prompt_cache_len:5151 prompt_cache_ratio:0.33950698655417877 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 -DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:15 [batch.py:51] router release req id 8 -INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.1088871955871582 s -INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11085391044616699 s -DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=154269202804267551771659652672756194674, time:1750768935.2985997s req_ids:[8] -DEBUG 06-24 20:42:15 [manager.py:391] -ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:217.27395057678223ms total_cost_time:217.32759475708008ms,out_token_counter:1 mean_per_token_cost_time: 0.05364418029785156ms prompt_token_num:15173 prompt_cache_len:5151 prompt_cache_ratio:0.3394846108218546 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 -DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:15 [batch.py:51] router release req id 8 -INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10907983779907227 s -INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11095786094665527 s -DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=211162656016160909205669852684200529289, time:1750768935.5333889s req_ids:[8] -DEBUG 06-24 20:42:15 [manager.py:391] -DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:15 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:229.59232330322266ms total_cost_time:229.65168952941895ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15174 prompt_cache_len:5151 prompt_cache_ratio:0.3394622380387505 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 -DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:15 [batch.py:51] router release req id 8 -INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10830283164978027 s -INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.10949945449829102 s -DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=222070297526169833170158616475890934056, time:1750768935.7594836s req_ids:[8] -DEBUG 06-24 20:42:15 [manager.py:391] -ERROR 06-24 20:42:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:218.34182739257812ms total_cost_time:218.38879585266113ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:15175 prompt_cache_len:5151 prompt_cache_ratio:0.33943986820428335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 -DEBUG 06-24 20:42:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:15 [batch.py:51] router release req id 8 -INFO 06-24 20:42:15 [manager.py:224] router recive req id 8 cost time 0.10854315757751465 s -INFO 06-24 20:42:15 [manager.py:68] detokenization recv req id 8 cost time 0.11062383651733398 s -DEBUG 06-24 20:42:15 [manager.py:391] Prefill Batch: batch_id=37914800362907107182648095138160948131, time:1750768935.9894776s req_ids:[8] -DEBUG 06-24 20:42:15 [manager.py:391] -ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:15 lightllm_req_id:8 first_token_cost:397.7634906768799ms total_cost_time:397.8140354156494ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:15176 prompt_cache_len:5151 prompt_cache_ratio:0.3394175013178703 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 -DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:16 [batch.py:51] router release req id 8 -INFO 06-24 20:42:16 [manager.py:224] router recive req id 8 cost time 0.10888528823852539 s -INFO 06-24 20:42:16 [manager.py:68] detokenization recv req id 8 cost time 0.11095213890075684 s -DEBUG 06-24 20:42:16 [manager.py:391] Prefill Batch: batch_id=65728021892721642036134691515062371818, time:1750768936.3902884s req_ids:[8] -DEBUG 06-24 20:42:16 [manager.py:391] -ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:218.76049041748047ms total_cost_time:218.82057189941406ms,out_token_counter:1 mean_per_token_cost_time: 0.06008148193359375ms prompt_token_num:15177 prompt_cache_len:5151 prompt_cache_ratio:0.3393951373789286 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 -DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:16 [batch.py:51] router release req id 8 -INFO 06-24 20:42:16 [manager.py:224] router recive req id 8 cost time 0.10917282104492188 s -INFO 06-24 20:42:16 [manager.py:68] detokenization recv req id 8 cost time 0.11114501953125 s -DEBUG 06-24 20:42:16 [manager.py:391] Prefill Batch: batch_id=99728906666724970926512405386079584250, time:1750768936.6199012s req_ids:[8] -DEBUG 06-24 20:42:16 [manager.py:391] -ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:223.0672836303711ms total_cost_time:223.1285572052002ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15178 prompt_cache_len:5151 prompt_cache_ratio:0.33937277638687574 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 -DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:16 [batch.py:51] router release req id 8 -INFO 06-24 20:42:16 [manager.py:224] router recive req id 8 cost time 0.10848760604858398 s -INFO 06-24 20:42:16 [manager.py:68] detokenization recv req id 8 cost time 0.11055922508239746 s -DEBUG 06-24 20:42:16 [manager.py:391] Prefill Batch: batch_id=313689337459227610288645289335041421092, time:1750768936.8465247s req_ids:[8] -DEBUG 06-24 20:42:16 [manager.py:391] -ERROR 06-24 20:42:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:217.30899810791016ms total_cost_time:217.36979484558105ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15179 prompt_cache_len:5151 prompt_cache_ratio:0.33935041834112917 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 -DEBUG 06-24 20:42:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:16 [batch.py:51] router release req id 8 -INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10905289649963379 s -INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.1110689640045166 s -DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=122687061824358778447861747482944507803, time:1750768937.0705225s req_ids:[8] -DEBUG 06-24 20:42:17 [manager.py:391] -ERROR 06-24 20:42:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:16 lightllm_req_id:8 first_token_cost:218.20354461669922ms total_cost_time:218.2629108428955ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15180 prompt_cache_len:5151 prompt_cache_ratio:0.3393280632411067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 -INFO 06-24 20:42:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:42:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:17 [batch.py:51] router release req id 8 -INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10816097259521484 s -INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.11010360717773438 s -DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=173800583072736181823245617748441481075, time:1750768937.29692s req_ids:[8] -DEBUG 06-24 20:42:17 [manager.py:391] -ERROR 06-24 20:42:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 first_token_cost:394.1371440887451ms total_cost_time:394.18625831604004ms,out_token_counter:1 mean_per_token_cost_time: 0.049114227294921875ms prompt_token_num:15181 prompt_cache_len:5151 prompt_cache_ratio:0.3393057110862262 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 -DEBUG 06-24 20:42:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:17 [batch.py:51] router release req id 8 -INFO 06-24 20:42:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10852384567260742 s -INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.11071634292602539 s -DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=46527016274505654992188616480697623102, time:1750768937.696355s req_ids:[8] -DEBUG 06-24 20:42:17 [manager.py:391] -ERROR 06-24 20:42:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 first_token_cost:214.1575813293457ms total_cost_time:214.22076225280762ms,out_token_counter:1 mean_per_token_cost_time: 0.06318092346191406ms prompt_token_num:15182 prompt_cache_len:5151 prompt_cache_ratio:0.3392833618759057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 -DEBUG 06-24 20:42:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:17 [batch.py:51] router release req id 8 -INFO 06-24 20:42:17 [manager.py:224] router recive req id 8 cost time 0.10872054100036621 s -INFO 06-24 20:42:17 [manager.py:68] detokenization recv req id 8 cost time 0.11062502861022949 s -DEBUG 06-24 20:42:17 [manager.py:391] Prefill Batch: batch_id=46924685682493559376080246293275289045, time:1750768937.9186237s req_ids:[8] -DEBUG 06-24 20:42:17 [manager.py:391] -ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:17 lightllm_req_id:8 first_token_cost:217.99039840698242ms total_cost_time:218.05286407470703ms,out_token_counter:1 mean_per_token_cost_time: 0.062465667724609375ms prompt_token_num:15183 prompt_cache_len:5151 prompt_cache_ratio:0.33926101560956334 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 -DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:18 [batch.py:51] router release req id 8 -INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.10913634300231934 s -INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.11122941970825195 s -DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=133919047054363655536239907345678774093, time:1750768938.1442797s req_ids:[8] -DEBUG 06-24 20:42:18 [manager.py:391] -ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:219.04754638671875ms total_cost_time:219.10834312438965ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15184 prompt_cache_len:5151 prompt_cache_ratio:0.33923867228661747 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 -DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:18 [batch.py:51] router release req id 8 -INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.10908746719360352 s -INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s -DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=138917395478911565847010630331177765062, time:1750768938.3713663s req_ids:[8] -DEBUG 06-24 20:42:18 [manager.py:391] -ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:215.58880805969238ms total_cost_time:215.65008163452148ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15185 prompt_cache_len:5151 prompt_cache_ratio:0.33921633190648665 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 -DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:18 [batch.py:51] router release req id 8 -INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.10671019554138184 s -INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.1080636978149414 s -DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=133186548632280356345333128492140982055, time:1750768938.5926871s req_ids:[8] -DEBUG 06-24 20:42:18 [manager.py:391] -ERROR 06-24 20:42:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:209.4879150390625ms total_cost_time:209.5494270324707ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:15186 prompt_cache_len:5151 prompt_cache_ratio:0.3391939944685895 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 -DEBUG 06-24 20:42:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:18 [batch.py:51] router release req id 8 -INFO 06-24 20:42:18 [manager.py:224] router recive req id 8 cost time 0.1077888011932373 s -INFO 06-24 20:42:18 [manager.py:68] detokenization recv req id 8 cost time 0.10995030403137207 s -DEBUG 06-24 20:42:18 [manager.py:391] Prefill Batch: batch_id=47010660621880075201665836139575816647, time:1750768938.8089595s req_ids:[8] -DEBUG 06-24 20:42:18 [manager.py:391] -ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:18 lightllm_req_id:8 first_token_cost:385.48803329467773ms total_cost_time:385.5326175689697ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15187 prompt_cache_len:5151 prompt_cache_ratio:0.3391716599723448 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 -DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:19 [batch.py:51] router release req id 8 -INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10898828506469727 s -INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.11094927787780762 s -DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=49322155431499347462737771567379886993, time:1750768939.1997495s req_ids:[8] -DEBUG 06-24 20:42:19 [manager.py:391] -ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:215.9411907196045ms total_cost_time:215.98458290100098ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15188 prompt_cache_len:5151 prompt_cache_ratio:0.33914932841717144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 -DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:19 [batch.py:51] router release req id 8 -INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10936641693115234 s -INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.11133575439453125 s -DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=57046338280859119376316374736171206338, time:1750768939.4239733s req_ids:[8] -DEBUG 06-24 20:42:19 [manager.py:391] -ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:217.9713249206543ms total_cost_time:218.0333137512207ms,out_token_counter:1 mean_per_token_cost_time: 0.06198883056640625ms prompt_token_num:15189 prompt_cache_len:5151 prompt_cache_ratio:0.3391269998024886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 -DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:19 [batch.py:51] router release req id 8 -INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10805773735046387 s -INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.10998106002807617 s -DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=300428107430158495157623225369137519176, time:1750768939.659912s req_ids:[8] -DEBUG 06-24 20:42:19 [manager.py:391] -ERROR 06-24 20:42:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:232.74469375610352ms total_cost_time:232.79213905334473ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15190 prompt_cache_len:5151 prompt_cache_ratio:0.3391046741277156 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 -DEBUG 06-24 20:42:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:19 [batch.py:51] router release req id 8 -INFO 06-24 20:42:19 [manager.py:224] router recive req id 8 cost time 0.10874414443969727 s -INFO 06-24 20:42:19 [manager.py:68] detokenization recv req id 8 cost time 0.11097455024719238 s -DEBUG 06-24 20:42:19 [manager.py:391] Prefill Batch: batch_id=147403776232530842411857526325865068907, time:1750768939.8987508s req_ids:[8] -DEBUG 06-24 20:42:19 [manager.py:391] -ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:19 lightllm_req_id:8 first_token_cost:229.19082641601562ms total_cost_time:229.23970222473145ms,out_token_counter:1 mean_per_token_cost_time: 0.04887580871582031ms prompt_token_num:15191 prompt_cache_len:5151 prompt_cache_ratio:0.3390823513922717 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 -DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:20 [batch.py:51] router release req id 8 -INFO 06-24 20:42:20 [manager.py:224] router recive req id 8 cost time 0.10995626449584961 s -INFO 06-24 20:42:20 [manager.py:68] detokenization recv req id 8 cost time 0.11193084716796875 s -DEBUG 06-24 20:42:20 [manager.py:391] Prefill Batch: batch_id=254938150094575030094327282537852081225, time:1750768940.1244683s req_ids:[8] -DEBUG 06-24 20:42:20 [manager.py:391] -ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:213.0424976348877ms total_cost_time:213.10162544250488ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15192 prompt_cache_len:5151 prompt_cache_ratio:0.3390600315955766 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 -DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:20 [batch.py:51] router release req id 8 -INFO 06-24 20:42:20 [manager.py:224] router recive req id 8 cost time 0.310835599899292 s -INFO 06-24 20:42:20 [manager.py:68] detokenization recv req id 8 cost time 0.31293177604675293 s -DEBUG 06-24 20:42:20 [manager.py:391] Prefill Batch: batch_id=246751136733251994068484139025190547656, time:1750768940.5590603s req_ids:[8] -DEBUG 06-24 20:42:20 [manager.py:391] -ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:42:20 [stats.py:37] Avg tokens(prompt+generate) throughput: 59136.937 tokens/s -DEBUG 06-24 20:42:20 [stats.py:37] Avg prompt tokens throughput: 59129.143 tokens/s -DEBUG 06-24 20:42:20 [stats.py:37] Avg generate tokens throughput: 7.793 tokens/s -INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:434.2925548553467ms total_cost_time:434.35192108154297ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15193 prompt_cache_len:5151 prompt_cache_ratio:0.33903771473704997 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 -DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:20 [batch.py:51] router release req id 8 -INFO 06-24 20:42:20 [manager.py:224] router recive req id 8 cost time 0.10979533195495605 s -INFO 06-24 20:42:20 [manager.py:68] detokenization recv req id 8 cost time 0.11196565628051758 s -DEBUG 06-24 20:42:20 [manager.py:391] Prefill Batch: batch_id=251338580535259355565701219972431072810, time:1750768940.7882512s req_ids:[8] -DEBUG 06-24 20:42:20 [manager.py:391] -ERROR 06-24 20:42:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:217.49234199523926ms total_cost_time:217.55266189575195ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15194 prompt_cache_len:5151 prompt_cache_ratio:0.33901540081611165 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 -DEBUG 06-24 20:42:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:20 [batch.py:51] router release req id 8 -INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.10877585411071777 s -INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11076641082763672 s -DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=57537521288962973122640586936020283672, time:1750768941.0136397s req_ids:[8] -DEBUG 06-24 20:42:21 [manager.py:391] -ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:20 lightllm_req_id:8 first_token_cost:211.56883239746094ms total_cost_time:211.61365509033203ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15195 prompt_cache_len:5151 prompt_cache_ratio:0.3389930898321816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 -DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:21 [batch.py:51] router release req id 8 -INFO 06-24 20:42:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.1091756820678711 s -INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11126041412353516 s -DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=58737911634750185734373420974689333322, time:1750768941.2315671s req_ids:[8] -DEBUG 06-24 20:42:21 [manager.py:391] -ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:211.62080764770508ms total_cost_time:211.66539192199707ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15196 prompt_cache_len:5151 prompt_cache_ratio:0.33897078178468015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 -DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:21 [batch.py:51] router release req id 8 -INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.10792279243469238 s -INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11001396179199219 s -DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=121495887664646698917868106388007559133, time:1750768941.451874s req_ids:[8] -DEBUG 06-24 20:42:21 [manager.py:391] -ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:216.57466888427734ms total_cost_time:216.60280227661133ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:15197 prompt_cache_len:5151 prompt_cache_ratio:0.3389484766730276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 -DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:21 [batch.py:51] router release req id 8 -INFO 06-24 20:42:21 [manager.py:224] router recive req id 8 cost time 0.1082913875579834 s -INFO 06-24 20:42:21 [manager.py:68] detokenization recv req id 8 cost time 0.11018228530883789 s -DEBUG 06-24 20:42:21 [manager.py:391] Prefill Batch: batch_id=303448412231199530525807995885070100682, time:1750768941.676679s req_ids:[8] -DEBUG 06-24 20:42:21 [manager.py:391] -ERROR 06-24 20:42:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:383.070707321167ms total_cost_time:383.1191062927246ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:15198 prompt_cache_len:5151 prompt_cache_ratio:0.3389261744966443 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 -DEBUG 06-24 20:42:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:21 [batch.py:51] router release req id 8 -INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10771846771240234 s -INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.10977554321289062 s -DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=51877779178005503494266830329763452129, time:1750768942.0679781s req_ids:[8] -DEBUG 06-24 20:42:22 [manager.py:391] -ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:21 lightllm_req_id:8 first_token_cost:216.2184715270996ms total_cost_time:216.2797451019287ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15199 prompt_cache_len:5151 prompt_cache_ratio:0.33890387525495097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 -DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:22 [batch.py:51] router release req id 8 -INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10918045043945312 s -INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.1111598014831543 s -DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=146734159022037405147800068623686117245, time:1750768942.2900643s req_ids:[8] -DEBUG 06-24 20:42:22 [manager.py:391] -ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:216.01223945617676ms total_cost_time:216.07351303100586ms,out_token_counter:1 mean_per_token_cost_time: 0.06127357482910156ms prompt_token_num:15200 prompt_cache_len:5151 prompt_cache_ratio:0.3388815789473684 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 -DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:22 [batch.py:51] router release req id 8 -INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10944771766662598 s -INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.11131501197814941 s -DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=40530824994474852669787678309673075508, time:1750768942.5145206s req_ids:[8] -DEBUG 06-24 20:42:22 [manager.py:391] -ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:217.27252006530762ms total_cost_time:217.3311710357666ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:15201 prompt_cache_len:5151 prompt_cache_ratio:0.33885928557331757 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 -DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:22 [batch.py:51] router release req id 8 -INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10907149314880371 s -INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.11101007461547852 s -DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=225259572782006256435400484393447163302, time:1750768942.7527535s req_ids:[8] -DEBUG 06-24 20:42:22 [manager.py:391] -ERROR 06-24 20:42:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:234.60936546325684ms total_cost_time:234.65991020202637ms,out_token_counter:1 mean_per_token_cost_time: 0.05054473876953125ms prompt_token_num:15202 prompt_cache_len:5151 prompt_cache_ratio:0.33883699513221943 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 -DEBUG 06-24 20:42:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:22 [batch.py:51] router release req id 8 -INFO 06-24 20:42:22 [manager.py:224] router recive req id 8 cost time 0.10909843444824219 s -INFO 06-24 20:42:22 [manager.py:68] detokenization recv req id 8 cost time 0.11119580268859863 s -DEBUG 06-24 20:42:22 [manager.py:391] Prefill Batch: batch_id=91389964008902718819311242030820172461, time:1750768942.9810998s req_ids:[8] -DEBUG 06-24 20:42:22 [manager.py:391] -INFO 06-24 20:42:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:42:23 [statics_utils.py:24] mean first cost: 232.944852018184 ms -INFO 06-24 20:42:23 [statics_utils.py:24] mean per token cost: 0.05717543137161622 ms -ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:22 lightllm_req_id:8 first_token_cost:386.6608142852783ms total_cost_time:386.7065906524658ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15203 prompt_cache_len:5151 prompt_cache_ratio:0.33881470762349536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 -DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:23 [batch.py:51] router release req id 8 -INFO 06-24 20:42:23 [manager.py:224] router recive req id 8 cost time 0.10760307312011719 s -INFO 06-24 20:42:23 [manager.py:68] detokenization recv req id 8 cost time 0.10970354080200195 s -DEBUG 06-24 20:42:23 [manager.py:391] Prefill Batch: batch_id=285216507773551007437188874236833020041, time:1750768943.3764598s req_ids:[8] -DEBUG 06-24 20:42:23 [manager.py:391] -ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:231.25195503234863ms total_cost_time:231.31251335144043ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:15204 prompt_cache_len:5151 prompt_cache_ratio:0.3387924230465667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 -DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:23 [batch.py:51] router release req id 8 -INFO 06-24 20:42:23 [manager.py:224] router recive req id 8 cost time 0.11190986633300781 s -INFO 06-24 20:42:23 [manager.py:68] detokenization recv req id 8 cost time 0.11417865753173828 s -DEBUG 06-24 20:42:23 [manager.py:391] Prefill Batch: batch_id=333949612434111225613799545097591697778, time:1750768943.6196811s req_ids:[8] -DEBUG 06-24 20:42:23 [manager.py:391] -DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:23 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:236.7570400238037ms total_cost_time:236.8011474609375ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15205 prompt_cache_len:5151 prompt_cache_ratio:0.338770141400855 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 -DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:23 [batch.py:51] router release req id 8 -INFO 06-24 20:42:23 [manager.py:224] router recive req id 8 cost time 0.10896468162536621 s -INFO 06-24 20:42:23 [manager.py:68] detokenization recv req id 8 cost time 0.11098527908325195 s -DEBUG 06-24 20:42:23 [manager.py:391] Prefill Batch: batch_id=151322254967275387401459255985038613734, time:1750768943.856885s req_ids:[8] -DEBUG 06-24 20:42:23 [manager.py:391] -ERROR 06-24 20:42:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:211.11083030700684ms total_cost_time:211.14325523376465ms,out_token_counter:1 mean_per_token_cost_time: 0.0324249267578125ms prompt_token_num:15206 prompt_cache_len:5151 prompt_cache_ratio:0.33874786268578194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 -DEBUG 06-24 20:42:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:23 [batch.py:51] router release req id 8 -INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.11091995239257812 s -INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11284542083740234 s -DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=339655059029783761566215805014444899395, time:1750768944.077825s req_ids:[8] -DEBUG 06-24 20:42:24 [manager.py:391] -ERROR 06-24 20:42:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:23 lightllm_req_id:8 first_token_cost:220.93868255615234ms total_cost_time:220.98135948181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15207 prompt_cache_len:5151 prompt_cache_ratio:0.33872558690076937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 -DEBUG 06-24 20:42:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:24 [batch.py:51] router release req id 8 -INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.10907649993896484 s -INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11097145080566406 s -DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=99907977983238074245606867818415707455, time:1750768944.3018675s req_ids:[8] -DEBUG 06-24 20:42:24 [manager.py:391] -ERROR 06-24 20:42:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 first_token_cost:216.84026718139648ms total_cost_time:216.8877124786377ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15208 prompt_cache_len:5151 prompt_cache_ratio:0.33870331404523935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 -DEBUG 06-24 20:42:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:24 [batch.py:51] router release req id 8 -INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.10959887504577637 s -INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11164021492004395 s -DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=156053127792389014832685293353448591151, time:1750768944.5375254s req_ids:[8] -DEBUG 06-24 20:42:24 [manager.py:391] -ERROR 06-24 20:42:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 first_token_cost:397.0463275909424ms total_cost_time:397.0935344696045ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15209 prompt_cache_len:5151 prompt_cache_ratio:0.338681044118614 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 -DEBUG 06-24 20:42:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:24 [batch.py:51] router release req id 8 -INFO 06-24 20:42:24 [manager.py:224] router recive req id 8 cost time 0.11020135879516602 s -INFO 06-24 20:42:24 [manager.py:68] detokenization recv req id 8 cost time 0.11220836639404297 s -DEBUG 06-24 20:42:24 [manager.py:391] Prefill Batch: batch_id=183865536449247141930842194879354619325, time:1750768944.929993s req_ids:[8] -DEBUG 06-24 20:42:24 [manager.py:391] -ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:24 lightllm_req_id:8 first_token_cost:208.4503173828125ms total_cost_time:208.4939479827881ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15210 prompt_cache_len:5151 prompt_cache_ratio:0.33865877712031556 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 -DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:25 [batch.py:51] router release req id 8 -INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.10902929306030273 s -INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.11122608184814453 s -DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=97291444785418183794932015250697884242, time:1750768945.145193s req_ids:[8] -DEBUG 06-24 20:42:25 [manager.py:391] -ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:218.15824508666992ms total_cost_time:218.2016372680664ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15211 prompt_cache_len:5151 prompt_cache_ratio:0.3386365130497666 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 -DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:25 [batch.py:51] router release req id 8 -INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.10958027839660645 s -INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.11162376403808594 s -DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=152815036522922430042762051595382150210, time:1750768945.3811061s req_ids:[8] -DEBUG 06-24 20:42:25 [manager.py:391] -ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:231.35852813720703ms total_cost_time:231.40215873718262ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15212 prompt_cache_len:5151 prompt_cache_ratio:0.3386142519063897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 -DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:25 [batch.py:51] router release req id 8 -INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.10905265808105469 s -INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.1104574203491211 s -DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=28347471482245471441616016408783668581, time:1750768945.610829s req_ids:[8] -DEBUG 06-24 20:42:25 [manager.py:391] -ERROR 06-24 20:42:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:210.48855781555176ms total_cost_time:210.53457260131836ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15213 prompt_cache_len:5151 prompt_cache_ratio:0.3385919936896076 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 -DEBUG 06-24 20:42:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:25 [batch.py:51] router release req id 8 -INFO 06-24 20:42:25 [manager.py:224] router recive req id 8 cost time 0.1092538833618164 s -INFO 06-24 20:42:25 [manager.py:68] detokenization recv req id 8 cost time 0.11125373840332031 s -DEBUG 06-24 20:42:25 [manager.py:391] Prefill Batch: batch_id=167309450980744844688063409561212324790, time:1750768945.8358808s req_ids:[8] -DEBUG 06-24 20:42:25 [manager.py:391] -ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:25 lightllm_req_id:8 first_token_cost:400.0062942504883ms total_cost_time:400.03323554992676ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:15214 prompt_cache_len:5151 prompt_cache_ratio:0.3385697383988432 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 -DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:26 [batch.py:51] router release req id 8 -INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10921430587768555 s -INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11124634742736816 s -DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=163013880815729421790907961308180951683, time:1750768946.2316225s req_ids:[8] -DEBUG 06-24 20:42:26 [manager.py:391] -ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:219.4373607635498ms total_cost_time:219.4831371307373ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15215 prompt_cache_len:5151 prompt_cache_ratio:0.33854748603351953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 -DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:26 [batch.py:51] router release req id 8 -INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10815072059631348 s -INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11019372940063477 s -DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=224759704197104578791684149686602452294, time:1750768946.4692094s req_ids:[8] -DEBUG 06-24 20:42:26 [manager.py:391] -ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:191.1611557006836ms total_cost_time:191.20502471923828ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15216 prompt_cache_len:5151 prompt_cache_ratio:0.33852523659305994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 -DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:26 [batch.py:51] router release req id 8 -INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10878157615661621 s -INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11086726188659668 s -DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=42535516330004298637238848173277342090, time:1750768946.6555483s req_ids:[8] -DEBUG 06-24 20:42:26 [manager.py:391] -ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:208.96124839782715ms total_cost_time:209.00678634643555ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15217 prompt_cache_len:5151 prompt_cache_ratio:0.33850299007688767 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 -DEBUG 06-24 20:42:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:26 [batch.py:51] router release req id 8 -INFO 06-24 20:42:26 [manager.py:224] router recive req id 8 cost time 0.10907435417175293 s -INFO 06-24 20:42:26 [manager.py:68] detokenization recv req id 8 cost time 0.11107349395751953 s -DEBUG 06-24 20:42:26 [manager.py:391] Prefill Batch: batch_id=311406162475802883682689433313708292888, time:1750768946.8719618s req_ids:[8] -DEBUG 06-24 20:42:26 [manager.py:391] -ERROR 06-24 20:42:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:213.96803855895996ms total_cost_time:214.01357650756836ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15218 prompt_cache_len:5151 prompt_cache_ratio:0.33848074648442633 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 -DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:27 [batch.py:51] router release req id 8 -INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10867881774902344 s -INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.1105954647064209 s -DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=156952397275002005425798255186950474003, time:1750768947.092765s req_ids:[8] -DEBUG 06-24 20:42:27 [manager.py:391] -ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:26 lightllm_req_id:8 first_token_cost:347.123384475708ms total_cost_time:347.1667766571045ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15219 prompt_cache_len:5151 prompt_cache_ratio:0.33845850581509956 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 -DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:27 [batch.py:51] router release req id 8 -INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10931038856506348 s -INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.11133122444152832 s -DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=248389825422883365581418157280440671625, time:1750768947.4479792s req_ids:[8] -DEBUG 06-24 20:42:27 [manager.py:391] -ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:209.9626064300537ms total_cost_time:210.0067138671875ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15220 prompt_cache_len:5151 prompt_cache_ratio:0.33843626806833116 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 -DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:27 [batch.py:51] router release req id 8 -INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10956573486328125 s -INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.11157965660095215 s -DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=192128486609736799629233222319838368635, time:1750768947.664769s req_ids:[8] -DEBUG 06-24 20:42:27 [manager.py:391] -ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:213.23513984680176ms total_cost_time:213.27805519104004ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15221 prompt_cache_len:5151 prompt_cache_ratio:0.3384140332435451 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 -DEBUG 06-24 20:42:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:27 [batch.py:51] router release req id 8 -INFO 06-24 20:42:27 [manager.py:224] router recive req id 8 cost time 0.10822319984436035 s -INFO 06-24 20:42:27 [manager.py:68] detokenization recv req id 8 cost time 0.11033463478088379 s -DEBUG 06-24 20:42:27 [manager.py:391] Prefill Batch: batch_id=24751020865396336538753651257858856054, time:1750768947.8837166s req_ids:[8] -DEBUG 06-24 20:42:27 [manager.py:391] -ERROR 06-24 20:42:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:215.23308753967285ms total_cost_time:215.27481079101562ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:15222 prompt_cache_len:5151 prompt_cache_ratio:0.33839180134016555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 -DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:28 [batch.py:51] router release req id 8 -INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10889863967895508 s -INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11082029342651367 s -DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=324145480685840641495072148659263265532, time:1750768948.1059062s req_ids:[8] -DEBUG 06-24 20:42:28 [manager.py:391] -ERROR 06-24 20:42:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:27 lightllm_req_id:8 first_token_cost:217.3595428466797ms total_cost_time:217.40198135375977ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15223 prompt_cache_len:5151 prompt_cache_ratio:0.3383695723576168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 -DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:28 [batch.py:51] router release req id 8 -INFO 06-24 20:42:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10905265808105469 s -INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11097836494445801 s -DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=290819936024197858379648165854937545962, time:1750768948.3310833s req_ids:[8] -DEBUG 06-24 20:42:28 [manager.py:391] -ERROR 06-24 20:42:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 first_token_cost:391.1268711090088ms total_cost_time:391.1712169647217ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15224 prompt_cache_len:5151 prompt_cache_ratio:0.33834734629532315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 -DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:28 [batch.py:51] router release req id 8 -INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10889196395874023 s -INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11055397987365723 s -DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=117876272925479813246889919741665322065, time:1750768948.729227s req_ids:[8] -DEBUG 06-24 20:42:28 [manager.py:391] -ERROR 06-24 20:42:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 first_token_cost:216.34268760681152ms total_cost_time:216.38751029968262ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15225 prompt_cache_len:5151 prompt_cache_ratio:0.33832512315270935 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 -DEBUG 06-24 20:42:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:28 [batch.py:51] router release req id 8 -INFO 06-24 20:42:28 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s -INFO 06-24 20:42:28 [manager.py:68] detokenization recv req id 8 cost time 0.11058282852172852 s -DEBUG 06-24 20:42:28 [manager.py:391] Prefill Batch: batch_id=25951506112116602865193496287732003066, time:1750768948.951071s req_ids:[8] -DEBUG 06-24 20:42:28 [manager.py:391] -ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:28 lightllm_req_id:8 first_token_cost:217.43345260620117ms total_cost_time:217.47922897338867ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15226 prompt_cache_len:5151 prompt_cache_ratio:0.33830290292920007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 -DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:29 [batch.py:51] router release req id 8 -INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.11277055740356445 s -DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=275937255153162806138996847243901716838, time:1750768949.1742778s req_ids:[8] -DEBUG 06-24 20:42:29 [manager.py:391] -INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.11487984657287598 s -ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:217.30661392211914ms total_cost_time:217.35000610351562ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15227 prompt_cache_len:5151 prompt_cache_ratio:0.33828068562422015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 -DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:29 [batch.py:51] router release req id 8 -INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.11216044425964355 s -INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.11431145668029785 s -DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=135179875387446902422752135353394010466, time:1750768949.4319224s req_ids:[8] -DEBUG 06-24 20:42:29 [manager.py:391] -ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:255.0947666168213ms total_cost_time:255.11574745178223ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:15228 prompt_cache_len:5151 prompt_cache_ratio:0.33825847123719466 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 -DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:29 [batch.py:51] router release req id 8 -INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.10884928703308105 s -INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.1106407642364502 s -DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=33298410059919722005003279901942847236, time:1750768949.6610684s req_ids:[8] -DEBUG 06-24 20:42:29 [manager.py:391] -ERROR 06-24 20:42:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:175.3823757171631ms total_cost_time:175.41170120239258ms,out_token_counter:1 mean_per_token_cost_time: 0.029325485229492188ms prompt_token_num:15229 prompt_cache_len:5151 prompt_cache_ratio:0.33823625976754873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 -DEBUG 06-24 20:42:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:29 [batch.py:51] router release req id 8 -INFO 06-24 20:42:29 [manager.py:224] router recive req id 8 cost time 0.11077356338500977 s -INFO 06-24 20:42:29 [manager.py:68] detokenization recv req id 8 cost time 0.11278080940246582 s -DEBUG 06-24 20:42:29 [manager.py:391] Prefill Batch: batch_id=54710067149018350118956873490146067376, time:1750768949.843968s req_ids:[8] -DEBUG 06-24 20:42:29 [manager.py:391] -ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:29 lightllm_req_id:8 first_token_cost:384.5257759094238ms total_cost_time:384.5703601837158ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15230 prompt_cache_len:5151 prompt_cache_ratio:0.3382140512147078 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 -DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:30 [batch.py:51] router release req id 8 -INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.11068367958068848 s -INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11259150505065918 s -DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=154394495422275240855932498769734734563, time:1750768950.2337487s req_ids:[8] -DEBUG 06-24 20:42:30 [manager.py:391] -ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:216.55726432800293ms total_cost_time:216.6006565093994ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15231 prompt_cache_len:5151 prompt_cache_ratio:0.3381918455780973 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 -DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:30 [batch.py:51] router release req id 8 -INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.11116528511047363 s -INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11313652992248535 s -DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=13144452047582576582857498679996981844, time:1750768950.4903393s req_ids:[8] -DEBUG 06-24 20:42:30 [manager.py:391] -ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:254.37426567077637ms total_cost_time:254.41837310791016ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15232 prompt_cache_len:5151 prompt_cache_ratio:0.33816964285714285 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 -DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:30 [batch.py:51] router release req id 8 -INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.11394071578979492 s -INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11597943305969238 s -DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=207564985157224758571846504791144265699, time:1750768950.7233686s req_ids:[8] -DEBUG 06-24 20:42:30 [manager.py:391] -DEBUG 06-24 20:42:30 [stats.py:37] Avg tokens(prompt+generate) throughput: 60506.157 tokens/s -DEBUG 06-24 20:42:30 [stats.py:37] Avg prompt tokens throughput: 60498.303 tokens/s -DEBUG 06-24 20:42:30 [stats.py:37] Avg generate tokens throughput: 7.854 tokens/s -ERROR 06-24 20:42:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:221.15778923034668ms total_cost_time:221.20213508605957ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15233 prompt_cache_len:5151 prompt_cache_ratio:0.33814744305127026 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 -DEBUG 06-24 20:42:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:30 [batch.py:51] router release req id 8 -INFO 06-24 20:42:30 [manager.py:224] router recive req id 8 cost time 0.1099851131439209 s -INFO 06-24 20:42:30 [manager.py:68] detokenization recv req id 8 cost time 0.11192083358764648 s -DEBUG 06-24 20:42:30 [manager.py:391] Prefill Batch: batch_id=38554427302605331109027670585372408576, time:1750768950.9511874s req_ids:[8] -DEBUG 06-24 20:42:30 [manager.py:391] -ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:30 lightllm_req_id:8 first_token_cost:220.1673984527588ms total_cost_time:220.21150588989258ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15234 prompt_cache_len:5151 prompt_cache_ratio:0.33812524615990547 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 -DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:31 [batch.py:51] router release req id 8 -INFO 06-24 20:42:31 [manager.py:224] router recive req id 8 cost time 0.10855889320373535 s -INFO 06-24 20:42:31 [manager.py:68] detokenization recv req id 8 cost time 0.11063671112060547 s -DEBUG 06-24 20:42:31 [manager.py:391] Prefill Batch: batch_id=324580595878255722641191650772519065767, time:1750768951.1775544s req_ids:[8] -DEBUG 06-24 20:42:31 [manager.py:391] -ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:212.97574043273926ms total_cost_time:213.01984786987305ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15235 prompt_cache_len:5151 prompt_cache_ratio:0.33810305218247455 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 -DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:31 [batch.py:51] router release req id 8 -INFO 06-24 20:42:31 [manager.py:224] router recive req id 8 cost time 0.31091952323913574 s -INFO 06-24 20:42:31 [manager.py:68] detokenization recv req id 8 cost time 0.31317663192749023 s -DEBUG 06-24 20:42:31 [manager.py:391] Prefill Batch: batch_id=27356283079339726189606001896392536810, time:1750768951.6045935s req_ids:[8] -DEBUG 06-24 20:42:31 [manager.py:391] -ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:431.8280220031738ms total_cost_time:431.8726062774658ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15236 prompt_cache_len:5151 prompt_cache_ratio:0.3380808611184038 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 -DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:31 [batch.py:51] router release req id 8 -INFO 06-24 20:42:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:31 [manager.py:224] router recive req id 8 cost time 0.10943365097045898 s -INFO 06-24 20:42:31 [manager.py:68] detokenization recv req id 8 cost time 0.11138010025024414 s -DEBUG 06-24 20:42:31 [manager.py:391] Prefill Batch: batch_id=67507952225215512254920186798505852976, time:1750768951.8471699s req_ids:[8] -DEBUG 06-24 20:42:31 [manager.py:391] -ERROR 06-24 20:42:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:230.96728324890137ms total_cost_time:230.9889793395996ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:15237 prompt_cache_len:5151 prompt_cache_ratio:0.3380586729671195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 -DEBUG 06-24 20:42:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:31 [batch.py:51] router release req id 8 -INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10959744453430176 s -INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.11153769493103027 s -DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=210861354281717730838161154896459820329, time:1750768952.0734813s req_ids:[8] -DEBUG 06-24 20:42:32 [manager.py:391] -ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:31 lightllm_req_id:8 first_token_cost:220.64971923828125ms total_cost_time:220.69454193115234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15238 prompt_cache_len:5151 prompt_cache_ratio:0.3380364877280483 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 -DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:32 [batch.py:51] router release req id 8 -INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10902643203735352 s -INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.11081838607788086 s -DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=29490741528308523511030928288014233792, time:1750768952.2985284s req_ids:[8] -DEBUG 06-24 20:42:32 [manager.py:391] -ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:176.00083351135254ms total_cost_time:176.0389804840088ms,out_token_counter:1 mean_per_token_cost_time: 0.03814697265625ms prompt_token_num:15239 prompt_cache_len:5151 prompt_cache_ratio:0.33801430540061683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 -DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:32 [batch.py:51] router release req id 8 -INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10778403282165527 s -INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.10960555076599121 s -DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=125448507203496648137271374463379221196, time:1750768952.4797475s req_ids:[8] -DEBUG 06-24 20:42:32 [manager.py:391] -ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:205.78765869140625ms total_cost_time:205.83057403564453ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15240 prompt_cache_len:5151 prompt_cache_ratio:0.337992125984252 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 -DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:32 [batch.py:51] router release req id 8 -INFO 06-24 20:42:32 [manager.py:224] router recive req id 8 cost time 0.10804128646850586 s -INFO 06-24 20:42:32 [manager.py:68] detokenization recv req id 8 cost time 0.11007571220397949 s -DEBUG 06-24 20:42:32 [manager.py:391] Prefill Batch: batch_id=330574665252447096149541816101523739973, time:1750768952.691846s req_ids:[8] -DEBUG 06-24 20:42:32 [manager.py:391] -ERROR 06-24 20:42:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:378.2675266265869ms total_cost_time:378.3283233642578ms,out_token_counter:1 mean_per_token_cost_time: 0.06079673767089844ms prompt_token_num:15241 prompt_cache_len:5151 prompt_cache_ratio:0.33796994947838066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 -DEBUG 06-24 20:42:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:32 [batch.py:51] router release req id 8 -INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.11238646507263184 s -DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=128517104914843866067287262035021677960, time:1750768953.077379s req_ids:[8] -DEBUG 06-24 20:42:33 [manager.py:391] -INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11432385444641113 s -ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:32 lightllm_req_id:8 first_token_cost:216.3839340209961ms total_cost_time:216.4287567138672ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15242 prompt_cache_len:5151 prompt_cache_ratio:0.33794777588243013 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 -DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:33 [batch.py:51] router release req id 8 -INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.11250591278076172 s -DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=85046733415100996830233130992949663858, time:1750768953.301085s req_ids:[8] -DEBUG 06-24 20:42:33 [manager.py:391] -INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11464571952819824 s -ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:216.40253067016602ms total_cost_time:216.44902229309082ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15243 prompt_cache_len:5151 prompt_cache_ratio:0.3379256051958276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 -DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:33 [batch.py:51] router release req id 8 -INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.11189079284667969 s -INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11378955841064453 s -DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=37397979130218230758955987671071890910, time:1750768953.5256927s req_ids:[8] -DEBUG 06-24 20:42:33 [manager.py:391] -ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:212.06283569335938ms total_cost_time:212.10908889770508ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15244 prompt_cache_len:5151 prompt_cache_ratio:0.33790343741800055 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 -DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:33 [batch.py:51] router release req id 8 -INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.10970377922058105 s -INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11171817779541016 s -DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=221122639943293744259544217770029869344, time:1750768953.7444096s req_ids:[8] -DEBUG 06-24 20:42:33 [manager.py:391] -ERROR 06-24 20:42:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:215.61884880065918ms total_cost_time:215.66247940063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15245 prompt_cache_len:5151 prompt_cache_ratio:0.3378812725483765 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 -DEBUG 06-24 20:42:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:33 [batch.py:51] router release req id 8 -INFO 06-24 20:42:33 [manager.py:224] router recive req id 8 cost time 0.10953116416931152 s -INFO 06-24 20:42:33 [manager.py:68] detokenization recv req id 8 cost time 0.11148524284362793 s -DEBUG 06-24 20:42:33 [manager.py:391] Prefill Batch: batch_id=148271004254697567596119525530489805705, time:1750768953.9660368s req_ids:[8] -DEBUG 06-24 20:42:33 [manager.py:391] -ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:33 lightllm_req_id:8 first_token_cost:381.4873695373535ms total_cost_time:381.5462589263916ms,out_token_counter:1 mean_per_token_cost_time: 0.05888938903808594ms prompt_token_num:15246 prompt_cache_len:5151 prompt_cache_ratio:0.3378591105863833 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 -DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:34 [batch.py:51] router release req id 8 -INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10900259017944336 s -INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.11106348037719727 s -DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=153653718605778331430612870875373893264, time:1750768954.355918s req_ids:[8] -DEBUG 06-24 20:42:34 [manager.py:391] -ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:218.86253356933594ms total_cost_time:218.90640258789062ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15247 prompt_cache_len:5151 prompt_cache_ratio:0.3378369515314488 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 -DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:34 [batch.py:51] router release req id 8 -INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10845804214477539 s -INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.10936522483825684 s -DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=3045314096655996475288756209180429968, time:1750768954.581874s req_ids:[8] -DEBUG 06-24 20:42:34 [manager.py:391] -DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:34 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:175.08554458618164ms total_cost_time:175.12941360473633ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15248 prompt_cache_len:5151 prompt_cache_ratio:0.3378147953830011 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 -DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:34 [batch.py:51] router release req id 8 -INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10876297950744629 s -INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.11086845397949219 s -DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=131145743787009875682427713212871096648, time:1750768954.762036s req_ids:[8] -DEBUG 06-24 20:42:34 [manager.py:391] -ERROR 06-24 20:42:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:205.63840866088867ms total_cost_time:205.68275451660156ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15249 prompt_cache_len:5151 prompt_cache_ratio:0.3377926421404682 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 -DEBUG 06-24 20:42:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:34 [batch.py:51] router release req id 8 -INFO 06-24 20:42:34 [manager.py:224] router recive req id 8 cost time 0.10895180702209473 s -INFO 06-24 20:42:34 [manager.py:68] detokenization recv req id 8 cost time 0.11102509498596191 s -DEBUG 06-24 20:42:34 [manager.py:391] Prefill Batch: batch_id=190237609166860698510660642048507000917, time:1750768954.9716654s req_ids:[8] -DEBUG 06-24 20:42:34 [manager.py:391] -ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:34 lightllm_req_id:8 first_token_cost:213.5453224182129ms total_cost_time:213.58871459960938ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15250 prompt_cache_len:5151 prompt_cache_ratio:0.33777049180327867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 -DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:35 [batch.py:51] router release req id 8 -INFO 06-24 20:42:35 [manager.py:224] router recive req id 8 cost time 0.10855531692504883 s -INFO 06-24 20:42:35 [manager.py:68] detokenization recv req id 8 cost time 0.11056852340698242 s -DEBUG 06-24 20:42:35 [manager.py:391] Prefill Batch: batch_id=29226589432581069773330096974399410771, time:1750768955.1923192s req_ids:[8] -DEBUG 06-24 20:42:35 [manager.py:391] -ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:216.48788452148438ms total_cost_time:216.53056144714355ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15251 prompt_cache_len:5151 prompt_cache_ratio:0.33774834437086093 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 -DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:35 [batch.py:51] router release req id 8 -INFO 06-24 20:42:35 [manager.py:224] router recive req id 8 cost time 0.11019277572631836 s -INFO 06-24 20:42:35 [manager.py:68] detokenization recv req id 8 cost time 0.1121358871459961 s -DEBUG 06-24 20:42:35 [manager.py:391] Prefill Batch: batch_id=197854366029171773451263009704251934502, time:1750768955.4172056s req_ids:[8] -DEBUG 06-24 20:42:35 [manager.py:391] -ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:374.58229064941406ms total_cost_time:374.62568283081055ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15252 prompt_cache_len:5151 prompt_cache_ratio:0.33772619984264357 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 -DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:35 [batch.py:51] router release req id 8 -INFO 06-24 20:42:35 [manager.py:224] router recive req id 8 cost time 0.10869193077087402 s -INFO 06-24 20:42:35 [manager.py:68] detokenization recv req id 8 cost time 0.11061859130859375 s -DEBUG 06-24 20:42:35 [manager.py:391] Prefill Batch: batch_id=4472235495899164523274913813495401011, time:1750768955.7989151s req_ids:[8] -DEBUG 06-24 20:42:35 [manager.py:391] -ERROR 06-24 20:42:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:216.09759330749512ms total_cost_time:216.1407470703125ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15253 prompt_cache_len:5151 prompt_cache_ratio:0.3377040582180555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 -DEBUG 06-24 20:42:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:35 [batch.py:51] router release req id 8 -INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10822176933288574 s -INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.11025595664978027 s -DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=34239524635493832308781079472720458273, time:1750768956.0217457s req_ids:[8] -DEBUG 06-24 20:42:36 [manager.py:391] -ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:35 lightllm_req_id:8 first_token_cost:212.25237846374512ms total_cost_time:212.2964859008789ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15254 prompt_cache_len:5151 prompt_cache_ratio:0.3376819194965255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 -DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:36 [batch.py:51] router release req id 8 -INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10984563827514648 s -INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.11197209358215332 s -DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=199908730056097122643285332484719901504, time:1750768956.240059s req_ids:[8] -DEBUG 06-24 20:42:36 [manager.py:391] -ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:217.25010871887207ms total_cost_time:217.29373931884766ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15255 prompt_cache_len:5151 prompt_cache_ratio:0.3376597836774828 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 -DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:36 [batch.py:51] router release req id 8 -INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10911297798156738 s -INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.11044478416442871 s -DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=299275304387965678991653534924160291823, time:1750768956.465387s req_ids:[8] -DEBUG 06-24 20:42:36 [manager.py:391] -ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:216.3708209991455ms total_cost_time:216.4134979248047ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15256 prompt_cache_len:5151 prompt_cache_ratio:0.33763765076035657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 -DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:36 [batch.py:51] router release req id 8 -INFO 06-24 20:42:36 [manager.py:224] router recive req id 8 cost time 0.10865259170532227 s -INFO 06-24 20:42:36 [manager.py:68] detokenization recv req id 8 cost time 0.1107017993927002 s -DEBUG 06-24 20:42:36 [manager.py:391] Prefill Batch: batch_id=301251761444856908197318849533497271219, time:1750768956.6872032s req_ids:[8] -DEBUG 06-24 20:42:36 [manager.py:391] -ERROR 06-24 20:42:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:386.75689697265625ms total_cost_time:386.81840896606445ms,out_token_counter:1 mean_per_token_cost_time: 0.061511993408203125ms prompt_token_num:15257 prompt_cache_len:5151 prompt_cache_ratio:0.33761552074457624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 -DEBUG 06-24 20:42:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:36 [batch.py:51] router release req id 8 -INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.11145901679992676 s -INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11346960067749023 s -DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=239549778611540566230187740711706982135, time:1750768957.0811865s req_ids:[8] -DEBUG 06-24 20:42:37 [manager.py:391] -ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:36 lightllm_req_id:8 first_token_cost:213.58156204223633ms total_cost_time:213.62543106079102ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15258 prompt_cache_len:5151 prompt_cache_ratio:0.33759339362957136 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 -DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:37 [batch.py:51] router release req id 8 -INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.10931849479675293 s -INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11143732070922852 s -DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=143272613221294398071794191884084107276, time:1750768957.304773s req_ids:[8] -DEBUG 06-24 20:42:37 [manager.py:391] -ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:216.56489372253418ms total_cost_time:216.60804748535156ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15259 prompt_cache_len:5151 prompt_cache_ratio:0.3375712694147716 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 -DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:37 [batch.py:51] router release req id 8 -INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.11106324195861816 s -INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11294031143188477 s -DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=51785673864588079644180840029810005998, time:1750768957.5249245s req_ids:[8] -DEBUG 06-24 20:42:37 [manager.py:391] -ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:213.50860595703125ms total_cost_time:213.55342864990234ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15260 prompt_cache_len:5151 prompt_cache_ratio:0.3375491480996068 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 -DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:37 [batch.py:51] router release req id 8 -INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.10996389389038086 s -INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11209917068481445 s -DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=264176749272298275267341445895125438090, time:1750768957.7461736s req_ids:[8] -DEBUG 06-24 20:42:37 [manager.py:391] -ERROR 06-24 20:42:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:214.18166160583496ms total_cost_time:214.22505378723145ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15261 prompt_cache_len:5151 prompt_cache_ratio:0.33752702968350695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 -DEBUG 06-24 20:42:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:37 [batch.py:51] router release req id 8 -INFO 06-24 20:42:37 [manager.py:224] router recive req id 8 cost time 0.1085672378540039 s -INFO 06-24 20:42:37 [manager.py:68] detokenization recv req id 8 cost time 0.11065244674682617 s -DEBUG 06-24 20:42:37 [manager.py:391] Prefill Batch: batch_id=236767826369951394620205539337971581064, time:1750768957.96786s req_ids:[8] -DEBUG 06-24 20:42:37 [manager.py:391] -ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:37 lightllm_req_id:8 first_token_cost:351.1476516723633ms total_cost_time:351.1929512023926ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15262 prompt_cache_len:5151 prompt_cache_ratio:0.3375049141659022 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 -DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:38 [batch.py:51] router release req id 8 -INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10834646224975586 s -INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.1102750301361084 s -DEBUG 06-24 20:42:38 [manager.py:391] Prefill Batch: batch_id=224205266336419571404865413319454941215, time:1750768958.3244746s req_ids:[8] -DEBUG 06-24 20:42:38 [manager.py:391] -ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:216.62545204162598ms total_cost_time:216.66884422302246ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15263 prompt_cache_len:5151 prompt_cache_ratio:0.3374828015462229 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 -DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:38 [batch.py:51] router release req id 8 -INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10828590393066406 s -INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.1103816032409668 s -DEBUG 06-24 20:42:38 [manager.py:391] Prefill Batch: batch_id=233071193449393166954206902675184340935, time:1750768958.5495353s req_ids:[8] -DEBUG 06-24 20:42:38 [manager.py:391] -ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:218.40500831604004ms total_cost_time:218.44959259033203ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15264 prompt_cache_len:5151 prompt_cache_ratio:0.3374606918238994 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 -DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:38 [batch.py:51] router release req id 8 -INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10899186134338379 s -INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.11118888854980469 s -DEBUG 06-24 20:42:38 [manager.py:391] Prefill Batch: batch_id=327205533889318930082625987062977388943, time:1750768958.7728577s req_ids:[8] -DEBUG 06-24 20:42:38 [manager.py:391] -ERROR 06-24 20:42:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:216.57180786132812ms total_cost_time:216.61686897277832ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15265 prompt_cache_len:5151 prompt_cache_ratio:0.33743858499836227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 -DEBUG 06-24 20:42:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:38 [batch.py:51] router release req id 8 -INFO 06-24 20:42:38 [manager.py:224] router recive req id 8 cost time 0.10772418975830078 s -INFO 06-24 20:42:38 [manager.py:68] detokenization recv req id 8 cost time 0.1086127758026123 s -DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=92699773777352220089411950717588010729, time:1750768959.0058718s req_ids:[8] -DEBUG 06-24 20:42:39 [manager.py:391] -ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:38 lightllm_req_id:8 first_token_cost:190.78421592712402ms total_cost_time:190.83833694458008ms,out_token_counter:1 mean_per_token_cost_time: 0.05412101745605469ms prompt_token_num:15266 prompt_cache_len:5151 prompt_cache_ratio:0.3374164810690423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 -DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:39 [batch.py:51] router release req id 8 -INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10918068885803223 s -INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.1110680103302002 s -DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=25172233717920294897555473414303272221, time:1750768959.193971s req_ids:[8] -DEBUG 06-24 20:42:39 [manager.py:391] -ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:381.75106048583984ms total_cost_time:381.79636001586914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15267 prompt_cache_len:5151 prompt_cache_ratio:0.3373943800353704 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 -DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:39 [batch.py:51] router release req id 8 -INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10816049575805664 s -INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.11003923416137695 s -DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=89577200359681050252138795102324039331, time:1750768959.5815437s req_ids:[8] -DEBUG 06-24 20:42:39 [manager.py:391] -ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:173.42853546142578ms total_cost_time:173.47192764282227ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15268 prompt_cache_len:5151 prompt_cache_ratio:0.3373722818967776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 -DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:39 [batch.py:51] router release req id 8 -INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10783910751342773 s -INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.10952615737915039 s -DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=314245843455946025836402085714239572824, time:1750768959.761197s req_ids:[8] -DEBUG 06-24 20:42:39 [manager.py:391] -ERROR 06-24 20:42:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:173.3419895172119ms total_cost_time:173.3996868133545ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15269 prompt_cache_len:5151 prompt_cache_ratio:0.337350186652695 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 -DEBUG 06-24 20:42:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:39 [batch.py:51] router release req id 8 -INFO 06-24 20:42:39 [manager.py:224] router recive req id 8 cost time 0.10966873168945312 s -DEBUG 06-24 20:42:39 [manager.py:391] Prefill Batch: batch_id=254714919351273262804701767517969830652, time:1750768959.9416606s req_ids:[8] -DEBUG 06-24 20:42:39 [manager.py:391] -INFO 06-24 20:42:39 [manager.py:68] detokenization recv req id 8 cost time 0.11176300048828125 s -ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:39 lightllm_req_id:8 first_token_cost:212.04471588134766ms total_cost_time:212.08763122558594ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15270 prompt_cache_len:5151 prompt_cache_ratio:0.337328094302554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 -DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:40 [batch.py:51] router release req id 8 -INFO 06-24 20:42:40 [manager.py:224] router recive req id 8 cost time 0.10869884490966797 s -INFO 06-24 20:42:40 [manager.py:68] detokenization recv req id 8 cost time 0.11075401306152344 s -DEBUG 06-24 20:42:40 [manager.py:391] Prefill Batch: batch_id=19166768681254532233948511541079624824, time:1750768960.1618207s req_ids:[8] -DEBUG 06-24 20:42:40 [manager.py:391] -ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:213.84382247924805ms total_cost_time:213.88721466064453ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15271 prompt_cache_len:5151 prompt_cache_ratio:0.33730600484578616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 -DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:40 [batch.py:51] router release req id 8 -INFO 06-24 20:42:40 [manager.py:224] router recive req id 8 cost time 0.11330294609069824 s -INFO 06-24 20:42:40 [manager.py:68] detokenization recv req id 8 cost time 0.11533331871032715 s -DEBUG 06-24 20:42:40 [manager.py:391] Prefill Batch: batch_id=204692234715884295545748767734487068399, time:1750768960.419041s req_ids:[8] -DEBUG 06-24 20:42:40 [manager.py:391] -ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:256.53839111328125ms total_cost_time:256.5774917602539ms,out_token_counter:1 mean_per_token_cost_time: 0.03910064697265625ms prompt_token_num:15272 prompt_cache_len:5151 prompt_cache_ratio:0.33728391828182297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 -DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:40 [batch.py:51] router release req id 8 -INFO 06-24 20:42:40 [manager.py:224] router recive req id 8 cost time 0.10823988914489746 s -INFO 06-24 20:42:40 [manager.py:68] detokenization recv req id 8 cost time 0.1102762222290039 s -DEBUG 06-24 20:42:40 [manager.py:391] Prefill Batch: batch_id=268795295248525539910702455514642451641, time:1750768960.6497185s req_ids:[8] -DEBUG 06-24 20:42:40 [manager.py:391] -ERROR 06-24 20:42:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:42:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 59835.101 tokens/s -DEBUG 06-24 20:42:40 [stats.py:37] Avg prompt tokens throughput: 59827.159 tokens/s -DEBUG 06-24 20:42:40 [stats.py:37] Avg generate tokens throughput: 7.942 tokens/s -INFO 06-24 20:42:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:389.4011974334717ms total_cost_time:389.4460201263428ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15273 prompt_cache_len:5151 prompt_cache_ratio:0.33726183461009623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 -DEBUG 06-24 20:42:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:40 [batch.py:51] router release req id 8 -INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10832500457763672 s -INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11030173301696777 s -DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=57573714678294995531376285014907843847, time:1750768961.0450807s req_ids:[8] -DEBUG 06-24 20:42:41 [manager.py:391] -ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:40 lightllm_req_id:8 first_token_cost:212.60547637939453ms total_cost_time:212.65244483947754ms,out_token_counter:1 mean_per_token_cost_time: 0.04696846008300781ms prompt_token_num:15274 prompt_cache_len:5151 prompt_cache_ratio:0.33723975383003796 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 -DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:41 [batch.py:51] router release req id 8 -INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10874795913696289 s -INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11092638969421387 s -DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=333533988152382795405080613011908114404, time:1750768961.262557s req_ids:[8] -DEBUG 06-24 20:42:41 [manager.py:391] -ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:209.6564769744873ms total_cost_time:209.7012996673584ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15275 prompt_cache_len:5151 prompt_cache_ratio:0.3372176759410802 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 -DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:41 [batch.py:51] router release req id 8 -INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10807394981384277 s -INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11007881164550781 s -DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=73146709741062125027801460311818684545, time:1750768961.4802914s req_ids:[8] -DEBUG 06-24 20:42:41 [manager.py:391] -ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:212.40472793579102ms total_cost_time:212.42666244506836ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:15276 prompt_cache_len:5151 prompt_cache_ratio:0.33719560094265516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 -DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:41 [batch.py:51] router release req id 8 -INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10896039009094238 s -INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s -DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=277453151690861920073459323343605934623, time:1750768961.699458s req_ids:[8] -DEBUG 06-24 20:42:41 [manager.py:391] -ERROR 06-24 20:42:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:218.67990493774414ms total_cost_time:218.72329711914062ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15277 prompt_cache_len:5151 prompt_cache_ratio:0.3371735288341952 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 -DEBUG 06-24 20:42:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:41 [batch.py:51] router release req id 8 -INFO 06-24 20:42:41 [manager.py:224] router recive req id 8 cost time 0.10841536521911621 s -INFO 06-24 20:42:41 [manager.py:68] detokenization recv req id 8 cost time 0.1104135513305664 s -DEBUG 06-24 20:42:41 [manager.py:391] Prefill Batch: batch_id=249761171618258891912573798028683362201, time:1750768961.939264s req_ids:[8] -DEBUG 06-24 20:42:41 [manager.py:391] -ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:41 lightllm_req_id:8 first_token_cost:218.72901916503906ms total_cost_time:218.77360343933105ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15278 prompt_cache_len:5151 prompt_cache_ratio:0.3371514596151329 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 -DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:42 [batch.py:51] router release req id 8 -INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.3140742778778076 s -INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.3162975311279297 s -DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=273266287757559814926770517223130730889, time:1750768962.3595288s req_ids:[8] -DEBUG 06-24 20:42:42 [manager.py:391] -ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:429.78358268737793ms total_cost_time:429.83102798461914ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15279 prompt_cache_len:5151 prompt_cache_ratio:0.3371293932849008 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 -DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:42 [batch.py:51] router release req id 8 -INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.11202740669250488 s -INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.11411428451538086 s -DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=154082030348393430774514024572163345470, time:1750768962.5859945s req_ids:[8] -DEBUG 06-24 20:42:42 [manager.py:391] -ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:213.87410163879395ms total_cost_time:213.91940116882324ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15280 prompt_cache_len:5151 prompt_cache_ratio:0.33710732984293196 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 -DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:42 [batch.py:51] router release req id 8 -INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.11049938201904297 s -INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.11228060722351074 s -DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=86304403666834894973589082259328266084, time:1750768962.8055751s req_ids:[8] -DEBUG 06-24 20:42:42 [manager.py:391] -ERROR 06-24 20:42:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:173.98786544799805ms total_cost_time:174.03292655944824ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15281 prompt_cache_len:5151 prompt_cache_ratio:0.3370852692886591 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 -DEBUG 06-24 20:42:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:42 [batch.py:51] router release req id 8 -INFO 06-24 20:42:42 [manager.py:224] router recive req id 8 cost time 0.10813379287719727 s -INFO 06-24 20:42:42 [manager.py:68] detokenization recv req id 8 cost time 0.10999512672424316 s -DEBUG 06-24 20:42:42 [manager.py:391] Prefill Batch: batch_id=80001343279135406418980920042664219934, time:1750768962.9881215s req_ids:[8] -DEBUG 06-24 20:42:42 [manager.py:391] -ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:42 lightllm_req_id:8 first_token_cost:175.9941577911377ms total_cost_time:176.039457321167ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15282 prompt_cache_len:5151 prompt_cache_ratio:0.33706321162151553 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 -DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:43 [batch.py:51] router release req id 8 -INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10897111892700195 s -INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.11089539527893066 s -DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=143991533333115594383488610385072590830, time:1750768963.1694014s req_ids:[8] -DEBUG 06-24 20:42:43 [manager.py:391] -ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:210.35432815551758ms total_cost_time:210.40058135986328ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15283 prompt_cache_len:5151 prompt_cache_ratio:0.3370411568409344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 -DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:43 [batch.py:51] router release req id 8 -INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10897183418273926 s -INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.1109616756439209 s -DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=86503241519473131785912430707854228605, time:1750768963.387512s req_ids:[8] -DEBUG 06-24 20:42:43 [manager.py:391] -ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:381.89172744750977ms total_cost_time:381.9551467895508ms,out_token_counter:1 mean_per_token_cost_time: 0.06341934204101562ms prompt_token_num:15284 prompt_cache_len:5151 prompt_cache_ratio:0.3370191049463491 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 -DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:43 [batch.py:51] router release req id 8 -INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10857605934143066 s -INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.11029553413391113 s -DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=302909532962369433250945153497286574004, time:1750768963.7756867s req_ids:[8] -DEBUG 06-24 20:42:43 [manager.py:391] -DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:43 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:173.8271713256836ms total_cost_time:173.87008666992188ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15285 prompt_cache_len:5151 prompt_cache_ratio:0.3369970559371933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 -DEBUG 06-24 20:42:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:43 [batch.py:51] router release req id 8 -INFO 06-24 20:42:43 [manager.py:224] router recive req id 8 cost time 0.10913348197937012 s -INFO 06-24 20:42:43 [manager.py:68] detokenization recv req id 8 cost time 0.11125445365905762 s -DEBUG 06-24 20:42:43 [manager.py:391] Prefill Batch: batch_id=133319371431059373422684897580878663486, time:1750768963.9567096s req_ids:[8] -DEBUG 06-24 20:42:43 [manager.py:391] -ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:43 lightllm_req_id:8 first_token_cost:211.12847328186035ms total_cost_time:211.17067337036133ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15286 prompt_cache_len:5151 prompt_cache_ratio:0.3369750098129007 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 -DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:44 [batch.py:51] router release req id 8 -INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10867500305175781 s -INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11057782173156738 s -DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=232786782293569675701267987969537747441, time:1750768964.175408s req_ids:[8] -DEBUG 06-24 20:42:44 [manager.py:391] -ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:216.5670394897461ms total_cost_time:216.61090850830078ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15287 prompt_cache_len:5151 prompt_cache_ratio:0.3369529665729051 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 -DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:44 [batch.py:51] router release req id 8 -INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10844564437866211 s -INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s -DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=205263109471168399304651825252523744422, time:1750768964.402068s req_ids:[8] -DEBUG 06-24 20:42:44 [manager.py:391] -ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:222.05400466918945ms total_cost_time:222.09811210632324ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15288 prompt_cache_len:5151 prompt_cache_ratio:0.3369309262166405 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 -DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:44 [batch.py:51] router release req id 8 -INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10824131965637207 s -INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11010980606079102 s -DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=189456826049611772785593103769999278470, time:1750768964.6263907s req_ids:[8] -DEBUG 06-24 20:42:44 [manager.py:391] -ERROR 06-24 20:42:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:347.3842144012451ms total_cost_time:347.4287986755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15289 prompt_cache_len:5151 prompt_cache_ratio:0.33690888874354114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 -DEBUG 06-24 20:42:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:44 [batch.py:51] router release req id 8 -INFO 06-24 20:42:44 [manager.py:224] router recive req id 8 cost time 0.10918021202087402 s -INFO 06-24 20:42:44 [manager.py:68] detokenization recv req id 8 cost time 0.11117053031921387 s -DEBUG 06-24 20:42:44 [manager.py:391] Prefill Batch: batch_id=31906429100339121136266182681113234525, time:1750768964.9802153s req_ids:[8] -DEBUG 06-24 20:42:44 [manager.py:391] -ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:44 lightllm_req_id:8 first_token_cost:216.04061126708984ms total_cost_time:216.08424186706543ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15290 prompt_cache_len:5151 prompt_cache_ratio:0.3368868541530412 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 -DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:45 [batch.py:51] router release req id 8 -INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.1100618839263916 s -INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.11237573623657227 s -DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=72485119064450478069388092176801404097, time:1750768965.2021701s req_ids:[8] -DEBUG 06-24 20:42:45 [manager.py:391] -ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:214.07413482666016ms total_cost_time:214.11609649658203ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15291 prompt_cache_len:5151 prompt_cache_ratio:0.3368648224445752 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 -DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:45 [batch.py:51] router release req id 8 -INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.1081535816192627 s -INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.11034321784973145 s -DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=120322700966790208587618739429897530080, time:1750768965.4229474s req_ids:[8] -DEBUG 06-24 20:42:45 [manager.py:391] -ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:218.1849479675293ms total_cost_time:218.2292938232422ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15292 prompt_cache_len:5151 prompt_cache_ratio:0.3368427936175778 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 -DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:45 [batch.py:51] router release req id 8 -INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.10793709754943848 s -INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.10969018936157227 s -DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=239528137336332536001279364489197703124, time:1750768965.6485834s req_ids:[8] -DEBUG 06-24 20:42:45 [manager.py:391] -ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:216.71557426452637ms total_cost_time:216.75848960876465ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15293 prompt_cache_len:5151 prompt_cache_ratio:0.33682076767148367 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 -DEBUG 06-24 20:42:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:45 [batch.py:51] router release req id 8 -INFO 06-24 20:42:45 [manager.py:224] router recive req id 8 cost time 0.11090993881225586 s -INFO 06-24 20:42:45 [manager.py:68] detokenization recv req id 8 cost time 0.11289620399475098 s -DEBUG 06-24 20:42:45 [manager.py:391] Prefill Batch: batch_id=124696280399343810459781301469479435008, time:1750768965.8719244s req_ids:[8] -DEBUG 06-24 20:42:45 [manager.py:391] -ERROR 06-24 20:42:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:218.9795970916748ms total_cost_time:219.0232276916504ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15294 prompt_cache_len:5151 prompt_cache_ratio:0.33679874460572773 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 -DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:46 [batch.py:51] router release req id 8 -INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.10926485061645508 s -INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.11129879951477051 s -DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=46623969522368298408880099680608602768, time:1750768966.104454s req_ids:[8] -DEBUG 06-24 20:42:46 [manager.py:391] -ERROR 06-24 20:42:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:45 lightllm_req_id:8 first_token_cost:433.0558776855469ms total_cost_time:433.09998512268066ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15295 prompt_cache_len:5151 prompt_cache_ratio:0.336776724419745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 -DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:46 [batch.py:51] router release req id 8 -INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.1069023609161377 s -INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.10792016983032227 s -DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=333163605100754676007736873758363233882, time:1750768966.5384912s req_ids:[8] -DEBUG 06-24 20:42:46 [manager.py:391] -ERROR 06-24 20:42:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 first_token_cost:218.7819480895996ms total_cost_time:218.82939338684082ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15296 prompt_cache_len:5151 prompt_cache_ratio:0.33675470711297073 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 -DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:46 [batch.py:51] router release req id 8 -INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.10905599594116211 s -INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.11026453971862793 s -DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=127141239058018062596515531762985767424, time:1750768966.7632134s req_ids:[8] -DEBUG 06-24 20:42:46 [manager.py:391] -ERROR 06-24 20:42:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 first_token_cost:211.95054054260254ms total_cost_time:211.99631690979004ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15297 prompt_cache_len:5151 prompt_cache_ratio:0.3367326926848402 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 -DEBUG 06-24 20:42:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:46 [batch.py:51] router release req id 8 -INFO 06-24 20:42:46 [manager.py:224] router recive req id 8 cost time 0.10945892333984375 s -INFO 06-24 20:42:46 [manager.py:68] detokenization recv req id 8 cost time 0.11123824119567871 s -DEBUG 06-24 20:42:46 [manager.py:391] Prefill Batch: batch_id=201658133625886701345475311168542024000, time:1750768966.9832294s req_ids:[8] -DEBUG 06-24 20:42:46 [manager.py:391] -ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:46 lightllm_req_id:8 first_token_cost:214.39385414123535ms total_cost_time:214.43867683410645ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15298 prompt_cache_len:5151 prompt_cache_ratio:0.33671068113478886 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 -DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:47 [batch.py:51] router release req id 8 -INFO 06-24 20:42:47 [manager.py:224] router recive req id 8 cost time 0.10907530784606934 s -INFO 06-24 20:42:47 [manager.py:68] detokenization recv req id 8 cost time 0.11098456382751465 s -DEBUG 06-24 20:42:47 [manager.py:391] Prefill Batch: batch_id=281181628271042014554416719716299624085, time:1750768967.2042766s req_ids:[8] -DEBUG 06-24 20:42:47 [manager.py:391] -INFO 06-24 20:42:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:217.24796295166016ms total_cost_time:217.29183197021484ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15299 prompt_cache_len:5151 prompt_cache_ratio:0.3366886724622524 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 -DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:47 [batch.py:51] router release req id 8 -INFO 06-24 20:42:47 [manager.py:224] router recive req id 8 cost time 0.10974502563476562 s -INFO 06-24 20:42:47 [manager.py:68] detokenization recv req id 8 cost time 0.11173295974731445 s -DEBUG 06-24 20:42:47 [manager.py:391] Prefill Batch: batch_id=207190394608493638163270070010247435715, time:1750768967.428706s req_ids:[8] -DEBUG 06-24 20:42:47 [manager.py:391] -ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:395.56384086608887ms total_cost_time:395.60890197753906ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15300 prompt_cache_len:5151 prompt_cache_ratio:0.33666666666666667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 -DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:47 [batch.py:51] router release req id 8 -INFO 06-24 20:42:47 [manager.py:224] router recive req id 8 cost time 0.10812163352966309 s -INFO 06-24 20:42:47 [manager.py:68] detokenization recv req id 8 cost time 0.11037182807922363 s -DEBUG 06-24 20:42:47 [manager.py:391] Prefill Batch: batch_id=143788279983788295104220454539294549530, time:1750768967.8298116s req_ids:[8] -DEBUG 06-24 20:42:47 [manager.py:391] -ERROR 06-24 20:42:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:221.05646133422852ms total_cost_time:221.11201286315918ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:15301 prompt_cache_len:5151 prompt_cache_ratio:0.3366446637474675 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 -DEBUG 06-24 20:42:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:47 [batch.py:51] router release req id 8 -INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.10946321487426758 s -INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.11149191856384277 s -DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=118735260533829356698187123397974787914, time:1750768968.058415s req_ids:[8] -DEBUG 06-24 20:42:48 [manager.py:391] -ERROR 06-24 20:42:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:47 lightllm_req_id:8 first_token_cost:215.14058113098145ms total_cost_time:215.16752243041992ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:15302 prompt_cache_len:5151 prompt_cache_ratio:0.33662266370409094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 -DEBUG 06-24 20:42:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:48 [batch.py:51] router release req id 8 -INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.10944604873657227 s -INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.11142230033874512 s -DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=218900032652891180590334931963816945698, time:1750768968.2891686s req_ids:[8] -DEBUG 06-24 20:42:48 [manager.py:391] -ERROR 06-24 20:42:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 first_token_cost:228.58357429504395ms total_cost_time:228.62958908081055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15303 prompt_cache_len:5151 prompt_cache_ratio:0.3366006665359733 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 -DEBUG 06-24 20:42:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:48 [batch.py:51] router release req id 8 -INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.11382603645324707 s -INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.1156766414642334 s -DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=106573955011478134305099838849853628949, time:1750768968.5479906s req_ids:[8] -DEBUG 06-24 20:42:48 [manager.py:391] -ERROR 06-24 20:42:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 first_token_cost:255.3424835205078ms total_cost_time:255.3870677947998ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15304 prompt_cache_len:5151 prompt_cache_ratio:0.33657867224255095 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 -DEBUG 06-24 20:42:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:48 [batch.py:51] router release req id 8 -INFO 06-24 20:42:48 [manager.py:224] router recive req id 8 cost time 0.11331605911254883 s -DEBUG 06-24 20:42:48 [manager.py:391] Prefill Batch: batch_id=220570184046574146735611968749597580841, time:1750768968.7777324s req_ids:[8] -DEBUG 06-24 20:42:48 [manager.py:391] -INFO 06-24 20:42:48 [manager.py:68] detokenization recv req id 8 cost time 0.1154031753540039 s -ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:48 lightllm_req_id:8 first_token_cost:390.60139656066895ms total_cost_time:390.64502716064453ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15305 prompt_cache_len:5151 prompt_cache_ratio:0.33655668082326035 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 -DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:49 [batch.py:51] router release req id 8 -INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10929393768310547 s -INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.1111762523651123 s -DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=213266858597367368396693061512444701271, time:1750768969.1739895s req_ids:[8] -DEBUG 06-24 20:42:49 [manager.py:391] -ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:218.27101707458496ms total_cost_time:218.31536293029785ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15306 prompt_cache_len:5151 prompt_cache_ratio:0.33653469227753824 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 -DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:49 [batch.py:51] router release req id 8 -INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10826230049133301 s -INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.11028647422790527 s -DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=207197339794377823852207332076431113080, time:1750768969.4005055s req_ids:[8] -DEBUG 06-24 20:42:49 [manager.py:391] -ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:218.2481288909912ms total_cost_time:218.27006340026855ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:15307 prompt_cache_len:5151 prompt_cache_ratio:0.3365127066048213 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 -DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:49 [batch.py:51] router release req id 8 -INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10643291473388672 s -INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.10820460319519043 s -DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=151404732567665939835927566587686360725, time:1750768969.6370568s req_ids:[8] -DEBUG 06-24 20:42:49 [manager.py:391] -ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:185.45937538146973ms total_cost_time:185.5015754699707ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15308 prompt_cache_len:5151 prompt_cache_ratio:0.33649072380454664 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 -DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:49 [batch.py:51] router release req id 8 -INFO 06-24 20:42:49 [manager.py:224] router recive req id 8 cost time 0.10833215713500977 s -INFO 06-24 20:42:49 [manager.py:68] detokenization recv req id 8 cost time 0.11038351058959961 s -DEBUG 06-24 20:42:49 [manager.py:391] Prefill Batch: batch_id=139395259600872835069267260525045237871, time:1750768969.8190825s req_ids:[8] -DEBUG 06-24 20:42:49 [manager.py:391] -ERROR 06-24 20:42:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:209.79952812194824ms total_cost_time:209.84554290771484ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15309 prompt_cache_len:5151 prompt_cache_ratio:0.3364687438761513 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 -DEBUG 06-24 20:42:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:49 [batch.py:51] router release req id 8 -INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10900211334228516 s -INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11103439331054688 s -DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=136681978341145368486439710936022663088, time:1750768970.0322962s req_ids:[8] -DEBUG 06-24 20:42:50 [manager.py:391] -ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:49 lightllm_req_id:8 first_token_cost:389.39881324768066ms total_cost_time:389.44387435913086ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15310 prompt_cache_len:5151 prompt_cache_ratio:0.3364467668190725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 -DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:50 [batch.py:51] router release req id 8 -INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10852909088134766 s -INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11036944389343262 s -DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=327174258089267058152819911523288870919, time:1750768970.4308302s req_ids:[8] -DEBUG 06-24 20:42:50 [manager.py:391] -ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:218.72544288635254ms total_cost_time:218.76907348632812ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15311 prompt_cache_len:5151 prompt_cache_ratio:0.3364247926327477 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 -DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:50 [batch.py:51] router release req id 8 -INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10945248603820801 s -INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11139345169067383 s -DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=296035011426413534414214817507477122017, time:1750768970.6616118s req_ids:[8] -DEBUG 06-24 20:42:50 [manager.py:391] -ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:226.46546363830566ms total_cost_time:226.51124000549316ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15312 prompt_cache_len:5151 prompt_cache_ratio:0.3364028213166144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 -DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:50 [batch.py:51] router release req id 8 -INFO 06-24 20:42:50 [manager.py:224] router recive req id 8 cost time 0.10907888412475586 s -INFO 06-24 20:42:50 [manager.py:68] detokenization recv req id 8 cost time 0.11091971397399902 s -DEBUG 06-24 20:42:50 [manager.py:391] Prefill Batch: batch_id=139225536724817114983254184863201697265, time:1750768970.898214s req_ids:[8] -DEBUG 06-24 20:42:50 [manager.py:391] -ERROR 06-24 20:42:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:42:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 60926.492 tokens/s -DEBUG 06-24 20:42:50 [stats.py:37] Avg prompt tokens throughput: 60918.525 tokens/s -DEBUG 06-24 20:42:50 [stats.py:37] Avg generate tokens throughput: 7.967 tokens/s -INFO 06-24 20:42:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:189.68796730041504ms total_cost_time:189.73159790039062ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15313 prompt_cache_len:5151 prompt_cache_ratio:0.3363808528701104 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 -DEBUG 06-24 20:42:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:50 [batch.py:51] router release req id 8 -INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.1082766056060791 s -INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11003375053405762 s -DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=75273083978615056677417808843088518856, time:1750768971.0878198s req_ids:[8] -DEBUG 06-24 20:42:51 [manager.py:391] -ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:50 lightllm_req_id:8 first_token_cost:175.54640769958496ms total_cost_time:175.59003829956055ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15314 prompt_cache_len:5151 prompt_cache_ratio:0.3363588872926734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 -DEBUG 06-24 20:42:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:51 [batch.py:51] router release req id 8 -INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.10881686210632324 s -INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11084699630737305 s -DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=205671522056542375350667359860331681758, time:1750768971.2701604s req_ids:[8] -DEBUG 06-24 20:42:51 [manager.py:391] -ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:205.4746150970459ms total_cost_time:205.51753044128418ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15315 prompt_cache_len:5151 prompt_cache_ratio:0.3363369245837414 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 -DEBUG 06-24 20:42:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:51 [batch.py:51] router release req id 8 -INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.10948944091796875 s -INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11149978637695312 s -DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=42625442722787368158268338541939090214, time:1750768971.4821432s req_ids:[8] -DEBUG 06-24 20:42:51 [manager.py:391] -DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:51 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:388.7600898742676ms total_cost_time:388.80228996276855ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15316 prompt_cache_len:5151 prompt_cache_ratio:0.3363149647427527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 -DEBUG 06-24 20:42:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:51 [batch.py:51] router release req id 8 -INFO 06-24 20:42:51 [manager.py:224] router recive req id 8 cost time 0.10973548889160156 s -INFO 06-24 20:42:51 [manager.py:68] detokenization recv req id 8 cost time 0.11172127723693848 s -DEBUG 06-24 20:42:51 [manager.py:391] Prefill Batch: batch_id=80787102678951303253530927990046975483, time:1750768971.8776014s req_ids:[8] -DEBUG 06-24 20:42:51 [manager.py:391] -ERROR 06-24 20:42:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:215.8496379852295ms total_cost_time:215.90423583984375ms,out_token_counter:1 mean_per_token_cost_time: 0.05459785461425781ms prompt_token_num:15317 prompt_cache_len:5151 prompt_cache_ratio:0.3362930077691454 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 -DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:52 [batch.py:51] router release req id 8 -INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.1086113452911377 s -INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11049032211303711 s -DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=304653817357335848159481074334328882217, time:1750768972.101409s req_ids:[8] -DEBUG 06-24 20:42:52 [manager.py:391] -ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:51 lightllm_req_id:8 first_token_cost:219.18559074401855ms total_cost_time:219.23041343688965ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15318 prompt_cache_len:5151 prompt_cache_ratio:0.336271053662358 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 -DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:52 [batch.py:51] router release req id 8 -INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.10921835899353027 s -INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11104011535644531 s -DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=286275820780609436128846902014346733486, time:1750768972.3287394s req_ids:[8] -DEBUG 06-24 20:42:52 [manager.py:391] -ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:225.19254684448242ms total_cost_time:225.2368927001953ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15319 prompt_cache_len:5151 prompt_cache_ratio:0.3362491024218291 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 -DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:52 [batch.py:51] router release req id 8 -INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.10818934440612793 s -INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11007094383239746 s -DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=258991911638198069650165832121559093558, time:1750768972.583152s req_ids:[8] -DEBUG 06-24 20:42:52 [manager.py:391] -ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:245.7449436187744ms total_cost_time:245.78857421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15320 prompt_cache_len:5151 prompt_cache_ratio:0.3362271540469974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 -DEBUG 06-24 20:42:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:52 [batch.py:51] router release req id 8 -INFO 06-24 20:42:52 [manager.py:224] router recive req id 8 cost time 0.10894489288330078 s -INFO 06-24 20:42:52 [manager.py:68] detokenization recv req id 8 cost time 0.11070919036865234 s -DEBUG 06-24 20:42:52 [manager.py:391] Prefill Batch: batch_id=277590033815439948017843451332998192368, time:1750768972.8383968s req_ids:[8] -DEBUG 06-24 20:42:52 [manager.py:391] -ERROR 06-24 20:42:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:232.99694061279297ms total_cost_time:233.04414749145508ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15321 prompt_cache_len:5151 prompt_cache_ratio:0.33620520853730174 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 -DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:53 [batch.py:51] router release req id 8 -INFO 06-24 20:42:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:42:53 [statics_utils.py:24] mean first cost: 233.08571218308117 ms -INFO 06-24 20:42:53 [statics_utils.py:24] mean per token cost: 0.05703200167086259 ms -INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.3113415241241455 s -INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.31342530250549316 s -DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=92646671727510335327827355820773469934, time:1750768973.26546s req_ids:[8] -DEBUG 06-24 20:42:53 [manager.py:391] -DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:53 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:52 lightllm_req_id:8 first_token_cost:440.8905506134033ms total_cost_time:440.91176986694336ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:15322 prompt_cache_len:5151 prompt_cache_ratio:0.33618326589218117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 -DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:53 [batch.py:51] router release req id 8 -INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.10953950881958008 s -INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.11092185974121094 s -DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=226969634425264763476423843801536342598, time:1750768973.495207s req_ids:[8] -DEBUG 06-24 20:42:53 [manager.py:391] -ERROR 06-24 20:42:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 first_token_cost:216.477632522583ms total_cost_time:216.5207862854004ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15323 prompt_cache_len:5151 prompt_cache_ratio:0.33616132611107485 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 -DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:53 [batch.py:51] router release req id 8 -INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.1086881160736084 s -INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.11022067070007324 s -DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=18638250450885591266255624606402263586, time:1750768973.717723s req_ids:[8] -DEBUG 06-24 20:42:53 [manager.py:391] -ERROR 06-24 20:42:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 first_token_cost:207.84378051757812ms total_cost_time:207.88908004760742ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15324 prompt_cache_len:5151 prompt_cache_ratio:0.33613938919342207 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 -DEBUG 06-24 20:42:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:53 [batch.py:51] router release req id 8 -INFO 06-24 20:42:53 [manager.py:224] router recive req id 8 cost time 0.10832977294921875 s -INFO 06-24 20:42:53 [manager.py:68] detokenization recv req id 8 cost time 0.10979795455932617 s -DEBUG 06-24 20:42:53 [manager.py:391] Prefill Batch: batch_id=306377985823902198740730944060201942532, time:1750768973.9348261s req_ids:[8] -DEBUG 06-24 20:42:53 [manager.py:391] -ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:53 lightllm_req_id:8 first_token_cost:214.09034729003906ms total_cost_time:214.13326263427734ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15325 prompt_cache_len:5151 prompt_cache_ratio:0.3361174551386623 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 -DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:54 [batch.py:51] router release req id 8 -INFO 06-24 20:42:54 [manager.py:224] router recive req id 8 cost time 0.10876607894897461 s -INFO 06-24 20:42:54 [manager.py:68] detokenization recv req id 8 cost time 0.11004829406738281 s -DEBUG 06-24 20:42:54 [manager.py:391] Prefill Batch: batch_id=16362404637402728455321410143009721430, time:1750768974.1546347s req_ids:[8] -DEBUG 06-24 20:42:54 [manager.py:391] -ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:222.97406196594238ms total_cost_time:223.01888465881348ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15326 prompt_cache_len:5151 prompt_cache_ratio:0.33609552394623515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 -DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:54 [batch.py:51] router release req id 8 -INFO 06-24 20:42:54 [manager.py:224] router recive req id 8 cost time 0.1080622673034668 s -INFO 06-24 20:42:54 [manager.py:68] detokenization recv req id 8 cost time 0.10990643501281738 s -DEBUG 06-24 20:42:54 [manager.py:391] Prefill Batch: batch_id=164732157111739476198136279750358592644, time:1750768974.4084578s req_ids:[8] -DEBUG 06-24 20:42:54 [manager.py:391] -ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:445.68586349487305ms total_cost_time:445.7285404205322ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15327 prompt_cache_len:5151 prompt_cache_ratio:0.33607359561558037 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 -DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:54 [batch.py:51] router release req id 8 -INFO 06-24 20:42:54 [manager.py:224] router recive req id 8 cost time 0.10962843894958496 s -INFO 06-24 20:42:54 [manager.py:68] detokenization recv req id 8 cost time 0.11098194122314453 s -DEBUG 06-24 20:42:54 [manager.py:391] Prefill Batch: batch_id=326522886053188905775444211210609141097, time:1750768974.836165s req_ids:[8] -DEBUG 06-24 20:42:54 [manager.py:391] -ERROR 06-24 20:42:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:213.6058807373047ms total_cost_time:213.64951133728027ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15328 prompt_cache_len:5151 prompt_cache_ratio:0.3360516701461378 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 -DEBUG 06-24 20:42:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:54 [batch.py:51] router release req id 8 -INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.1097555160522461 s -INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11185979843139648 s -DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=144344382519892597221764530356904377539, time:1750768975.056322s req_ids:[8] -DEBUG 06-24 20:42:55 [manager.py:391] -ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:54 lightllm_req_id:8 first_token_cost:215.55852890014648ms total_cost_time:215.60311317443848ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15329 prompt_cache_len:5151 prompt_cache_ratio:0.3360297475373475 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 -DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:55 [batch.py:51] router release req id 8 -INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.10883140563964844 s -INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11094093322753906 s -DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=227988676369477360678641043278677978321, time:1750768975.2786465s req_ids:[8] -DEBUG 06-24 20:42:55 [manager.py:391] -ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:219.46263313293457ms total_cost_time:219.50721740722656ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15330 prompt_cache_len:5151 prompt_cache_ratio:0.3360078277886497 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 -DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:55 [batch.py:51] router release req id 8 -INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.10972189903259277 s -INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11109375953674316 s -DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=282966017163980914720924374168613210294, time:1750768975.50362s req_ids:[8] -DEBUG 06-24 20:42:55 [manager.py:391] -ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:218.20974349975586ms total_cost_time:218.25456619262695ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15331 prompt_cache_len:5151 prompt_cache_ratio:0.3359859108994847 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 -DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:55 [batch.py:51] router release req id 8 -INFO 06-24 20:42:55 [manager.py:224] router recive req id 8 cost time 0.10836362838745117 s -INFO 06-24 20:42:55 [manager.py:68] detokenization recv req id 8 cost time 0.11007022857666016 s -DEBUG 06-24 20:42:55 [manager.py:391] Prefill Batch: batch_id=122723376335127200083220617165330103781, time:1750768975.7297702s req_ids:[8] -DEBUG 06-24 20:42:55 [manager.py:391] -ERROR 06-24 20:42:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:347.54300117492676ms total_cost_time:347.58687019348145ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15332 prompt_cache_len:5151 prompt_cache_ratio:0.33596399686929296 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 -DEBUG 06-24 20:42:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:55 [batch.py:51] router release req id 8 -INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10858631134033203 s -INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.11065673828125 s -DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=274594342980269237021212139939735502037, time:1750768976.08399s req_ids:[8] -DEBUG 06-24 20:42:56 [manager.py:391] -ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:55 lightllm_req_id:8 first_token_cost:209.64765548706055ms total_cost_time:209.69319343566895ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15333 prompt_cache_len:5151 prompt_cache_ratio:0.33594208569751516 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 -DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:56 [batch.py:51] router release req id 8 -INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10797238349914551 s -INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.10984611511230469 s -DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=237610410454759499936795613696038081288, time:1750768976.2998629s req_ids:[8] -DEBUG 06-24 20:42:56 [manager.py:391] -ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:173.98357391357422ms total_cost_time:174.0267276763916ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15334 prompt_cache_len:5151 prompt_cache_ratio:0.33592017738359203 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 -DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:56 [batch.py:51] router release req id 8 -INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10795402526855469 s -INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.10968017578125 s -DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=295983761400535653952052349898365469088, time:1750768976.4816022s req_ids:[8] -DEBUG 06-24 20:42:56 [manager.py:391] -ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:181.04147911071777ms total_cost_time:181.08773231506348ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15335 prompt_cache_len:5151 prompt_cache_ratio:0.3358982719269645 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 -DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:56 [batch.py:51] router release req id 8 -INFO 06-24 20:42:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10856056213378906 s -INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.11034536361694336 s -DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=56407298152871248156032788240139055168, time:1750768976.6703231s req_ids:[8] -DEBUG 06-24 20:42:56 [manager.py:391] -ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:206.0222625732422ms total_cost_time:206.07805252075195ms,out_token_counter:1 mean_per_token_cost_time: 0.055789947509765625ms prompt_token_num:15336 prompt_cache_len:5151 prompt_cache_ratio:0.33587636932707354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 -DEBUG 06-24 20:42:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:56 [batch.py:51] router release req id 8 -INFO 06-24 20:42:56 [manager.py:224] router recive req id 8 cost time 0.10825300216674805 s -INFO 06-24 20:42:56 [manager.py:68] detokenization recv req id 8 cost time 0.11021971702575684 s -DEBUG 06-24 20:42:56 [manager.py:391] Prefill Batch: batch_id=158740947624756608791884466503603174073, time:1750768976.8820312s req_ids:[8] -DEBUG 06-24 20:42:56 [manager.py:391] -ERROR 06-24 20:42:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:212.88132667541504ms total_cost_time:212.92686462402344ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15337 prompt_cache_len:5151 prompt_cache_ratio:0.3358544695833605 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 -DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:57 [batch.py:51] router release req id 8 -INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.10818839073181152 s -INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11037993431091309 s -DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=322517486264072634995032119120306214942, time:1750768977.1016862s req_ids:[8] -DEBUG 06-24 20:42:57 [manager.py:391] -ERROR 06-24 20:42:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:56 lightllm_req_id:8 first_token_cost:379.61459159851074ms total_cost_time:379.65965270996094ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15338 prompt_cache_len:5151 prompt_cache_ratio:0.3358325726952667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 -DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:57 [batch.py:51] router release req id 8 -INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.11078834533691406 s -INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11257719993591309 s -DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=79567468288218621607874284058498190307, time:1750768977.4884176s req_ids:[8] -DEBUG 06-24 20:42:57 [manager.py:391] -ERROR 06-24 20:42:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 first_token_cost:217.86117553710938ms total_cost_time:217.90599822998047ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15339 prompt_cache_len:5151 prompt_cache_ratio:0.3358106786622335 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 -DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:57 [batch.py:51] router release req id 8 -INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.10887408256530762 s -INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11094784736633301 s -DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=156001186831924051612221275691133984214, time:1750768977.712095s req_ids:[8] -DEBUG 06-24 20:42:57 [manager.py:391] -ERROR 06-24 20:42:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 first_token_cost:217.9102897644043ms total_cost_time:217.9543972015381ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15340 prompt_cache_len:5151 prompt_cache_ratio:0.3357887874837027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 -DEBUG 06-24 20:42:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:57 [batch.py:51] router release req id 8 -INFO 06-24 20:42:57 [manager.py:224] router recive req id 8 cost time 0.10884666442871094 s -INFO 06-24 20:42:57 [manager.py:68] detokenization recv req id 8 cost time 0.11090946197509766 s -DEBUG 06-24 20:42:57 [manager.py:391] Prefill Batch: batch_id=51495630030534115698473573854292362327, time:1750768977.9371724s req_ids:[8] -DEBUG 06-24 20:42:57 [manager.py:391] -ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:57 lightllm_req_id:8 first_token_cost:217.44275093078613ms total_cost_time:217.48948097229004ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15341 prompt_cache_len:5151 prompt_cache_ratio:0.3357668991591161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 -DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:58 [batch.py:51] router release req id 8 -INFO 06-24 20:42:58 [manager.py:224] router recive req id 8 cost time 0.10848426818847656 s -INFO 06-24 20:42:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052250862121582 s -DEBUG 06-24 20:42:58 [manager.py:391] Prefill Batch: batch_id=212219375294411603175039535825477325631, time:1750768978.1626809s req_ids:[8] -DEBUG 06-24 20:42:58 [manager.py:391] -ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:223.7873077392578ms total_cost_time:223.8328456878662ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15342 prompt_cache_len:5151 prompt_cache_ratio:0.33574501368791554 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 -DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:58 [batch.py:51] router release req id 8 -INFO 06-24 20:42:58 [manager.py:224] router recive req id 8 cost time 0.1091911792755127 s -INFO 06-24 20:42:58 [manager.py:68] detokenization recv req id 8 cost time 0.11118364334106445 s -DEBUG 06-24 20:42:58 [manager.py:391] Prefill Batch: batch_id=155002304727997770478871482088627870759, time:1750768978.395067s req_ids:[8] -DEBUG 06-24 20:42:58 [manager.py:391] -ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:386.35754585266113ms total_cost_time:386.3997459411621ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15343 prompt_cache_len:5151 prompt_cache_ratio:0.3357231310695431 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 -DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:58 [batch.py:51] router release req id 8 -INFO 06-24 20:42:58 [manager.py:224] router recive req id 8 cost time 0.10852479934692383 s -INFO 06-24 20:42:58 [manager.py:68] detokenization recv req id 8 cost time 0.11052203178405762 s -DEBUG 06-24 20:42:58 [manager.py:391] Prefill Batch: batch_id=200070183466653755022877277077968782016, time:1750768978.7852924s req_ids:[8] -DEBUG 06-24 20:42:58 [manager.py:391] -ERROR 06-24 20:42:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:217.12398529052734ms total_cost_time:217.16713905334473ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15344 prompt_cache_len:5151 prompt_cache_ratio:0.3357012513034411 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 -DEBUG 06-24 20:42:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:58 [batch.py:51] router release req id 8 -INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.10964345932006836 s -INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11145305633544922 s -DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=283814069062744516629299119373090608740, time:1750768979.0077274s req_ids:[8] -DEBUG 06-24 20:42:59 [manager.py:391] -ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:58 lightllm_req_id:8 first_token_cost:213.52458000183105ms total_cost_time:213.56987953186035ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15345 prompt_cache_len:5151 prompt_cache_ratio:0.3356793743890518 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 -DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:59 [batch.py:51] router release req id 8 -INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.10906672477722168 s -INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11075878143310547 s -DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=323982062977136394240571413242254904374, time:1750768979.2279358s req_ids:[8] -DEBUG 06-24 20:42:59 [manager.py:391] -ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:212.01133728027344ms total_cost_time:212.05902099609375ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:15346 prompt_cache_len:5151 prompt_cache_ratio:0.3356575003258178 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 -DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:59 [batch.py:51] router release req id 8 -INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.10912823677062988 s -INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11102414131164551 s -DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=271456533260929858801787805114912979618, time:1750768979.447707s req_ids:[8] -DEBUG 06-24 20:42:59 [manager.py:391] -DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:42:59 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:211.08031272888184ms total_cost_time:211.12465858459473ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15347 prompt_cache_len:5151 prompt_cache_ratio:0.33563562911318173 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 -DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:59 [batch.py:51] router release req id 8 -INFO 06-24 20:42:59 [manager.py:224] router recive req id 8 cost time 0.1087043285369873 s -INFO 06-24 20:42:59 [manager.py:68] detokenization recv req id 8 cost time 0.11056780815124512 s -DEBUG 06-24 20:42:59 [manager.py:391] Prefill Batch: batch_id=153066080821979535270323887584686654548, time:1750768979.6774912s req_ids:[8] -DEBUG 06-24 20:42:59 [manager.py:391] -ERROR 06-24 20:42:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:42:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:42:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:399.2457389831543ms total_cost_time:399.289608001709ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15348 prompt_cache_len:5151 prompt_cache_ratio:0.3356137607505864 mtp_avg_token_per_step:1.0 -INFO 06-24 20:42:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 -DEBUG 06-24 20:42:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:42:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:42:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:42:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:42:59 [batch.py:51] router release req id 8 -INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10561442375183105 s -INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.10747265815734863 s -DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=270145558130665314116764292355998010863, time:1750768980.072468s req_ids:[8] -DEBUG 06-24 20:43:00 [manager.py:391] -ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:42:59 lightllm_req_id:8 first_token_cost:174.12495613098145ms total_cost_time:174.147367477417ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:15349 prompt_cache_len:5151 prompt_cache_ratio:0.33559189523747474 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 -DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:00 [batch.py:51] router release req id 8 -INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10703706741333008 s -INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.10889458656311035 s -DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=9078645035160183809545011812628570115, time:1750768980.2552655s req_ids:[8] -DEBUG 06-24 20:43:00 [manager.py:391] -ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:210.97302436828613ms total_cost_time:211.01784706115723ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15350 prompt_cache_len:5151 prompt_cache_ratio:0.3355700325732899 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 -DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:00 [batch.py:51] router release req id 8 -INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10949969291687012 s -INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.11165308952331543 s -DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=26151391008697655829321699638125795870, time:1750768980.4727352s req_ids:[8] -DEBUG 06-24 20:43:00 [manager.py:391] -ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:219.36917304992676ms total_cost_time:219.41494941711426ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15351 prompt_cache_len:5151 prompt_cache_ratio:0.33554817275747506 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 -DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:00 [batch.py:51] router release req id 8 -INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10872817039489746 s -INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.11069750785827637 s -DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=127727202330861232230604765399495255353, time:1750768980.6983454s req_ids:[8] -DEBUG 06-24 20:43:00 [manager.py:391] -ERROR 06-24 20:43:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:216.5842056274414ms total_cost_time:216.62664413452148ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15352 prompt_cache_len:5151 prompt_cache_ratio:0.3355263157894737 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 -DEBUG 06-24 20:43:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:00 [batch.py:51] router release req id 8 -INFO 06-24 20:43:00 [manager.py:224] router recive req id 8 cost time 0.10840582847595215 s -INFO 06-24 20:43:00 [manager.py:68] detokenization recv req id 8 cost time 0.1104581356048584 s -DEBUG 06-24 20:43:00 [manager.py:391] Prefill Batch: batch_id=258736601547835548276467609077952617779, time:1750768980.91914s req_ids:[8] -DEBUG 06-24 20:43:00 [manager.py:391] -ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:43:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 59981.522 tokens/s -DEBUG 06-24 20:43:01 [stats.py:37] Avg prompt tokens throughput: 59973.699 tokens/s -DEBUG 06-24 20:43:01 [stats.py:37] Avg generate tokens throughput: 7.823 tokens/s -INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:00 lightllm_req_id:8 first_token_cost:386.446475982666ms total_cost_time:386.4891529083252ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15353 prompt_cache_len:5151 prompt_cache_ratio:0.3355044616687292 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 -DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:01 [batch.py:51] router release req id 8 -INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.10991120338439941 s -INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.11188650131225586 s -DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=290799042573286687400949406338462368788, time:1750768981.3136764s req_ids:[8] -DEBUG 06-24 20:43:01 [manager.py:391] -ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:217.6980972290039ms total_cost_time:217.7414894104004ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15354 prompt_cache_len:5151 prompt_cache_ratio:0.3354826103946854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 -DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:01 [batch.py:51] router release req id 8 -INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.10855674743652344 s -INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.1107947826385498 s -DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=158640378962868852408319225048174198547, time:1750768981.540713s req_ids:[8] -DEBUG 06-24 20:43:01 [manager.py:391] -ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:218.29915046691895ms total_cost_time:218.34349632263184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15355 prompt_cache_len:5151 prompt_cache_ratio:0.33546076196678604 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 -DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:01 [batch.py:51] router release req id 8 -INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.10948944091796875 s -INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.11159467697143555 s -DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=81025357482006603200975788762374016352, time:1750768981.7629461s req_ids:[8] -DEBUG 06-24 20:43:01 [manager.py:391] -ERROR 06-24 20:43:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:215.57283401489258ms total_cost_time:215.61527252197266ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15356 prompt_cache_len:5151 prompt_cache_ratio:0.33543891638447515 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 -DEBUG 06-24 20:43:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:01 [batch.py:51] router release req id 8 -INFO 06-24 20:43:01 [manager.py:224] router recive req id 8 cost time 0.11089658737182617 s -INFO 06-24 20:43:01 [manager.py:68] detokenization recv req id 8 cost time 0.11288619041442871 s -DEBUG 06-24 20:43:01 [manager.py:391] Prefill Batch: batch_id=268268635266716478838450128042643936704, time:1750768981.9836023s req_ids:[8] -DEBUG 06-24 20:43:01 [manager.py:391] -ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:01 lightllm_req_id:8 first_token_cost:214.54143524169922ms total_cost_time:214.58864212036133ms,out_token_counter:1 mean_per_token_cost_time: 0.047206878662109375ms prompt_token_num:15357 prompt_cache_len:5151 prompt_cache_ratio:0.3354170736471967 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 -DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:02 [batch.py:51] router release req id 8 -INFO 06-24 20:43:02 [manager.py:224] router recive req id 8 cost time 0.109588623046875 s -INFO 06-24 20:43:02 [manager.py:68] detokenization recv req id 8 cost time 0.11157441139221191 s -DEBUG 06-24 20:43:02 [manager.py:391] Prefill Batch: batch_id=287625837531010990463567237447162271395, time:1750768982.205321s req_ids:[8] -DEBUG 06-24 20:43:02 [manager.py:391] -ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:216.05300903320312ms total_cost_time:216.09807014465332ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15358 prompt_cache_len:5151 prompt_cache_ratio:0.3353952337543951 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 -DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:02 [batch.py:51] router release req id 8 -INFO 06-24 20:43:02 [manager.py:224] router recive req id 8 cost time 0.11161446571350098 s -INFO 06-24 20:43:02 [manager.py:68] detokenization recv req id 8 cost time 0.1137533187866211 s -DEBUG 06-24 20:43:02 [manager.py:391] Prefill Batch: batch_id=92855271251293218863292472613775499392, time:1750768982.4291716s req_ids:[8] -DEBUG 06-24 20:43:02 [manager.py:391] -ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:390.2320861816406ms total_cost_time:390.2771472930908ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15359 prompt_cache_len:5151 prompt_cache_ratio:0.3353733967055147 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 -DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:02 [batch.py:51] router release req id 8 -INFO 06-24 20:43:02 [manager.py:224] router recive req id 8 cost time 0.10943078994750977 s -INFO 06-24 20:43:02 [manager.py:68] detokenization recv req id 8 cost time 0.11136627197265625 s -DEBUG 06-24 20:43:02 [manager.py:391] Prefill Batch: batch_id=38800973283737081648578372040491696929, time:1750768982.8256137s req_ids:[8] -DEBUG 06-24 20:43:02 [manager.py:391] -ERROR 06-24 20:43:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:215.7583236694336ms total_cost_time:215.8043384552002ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15360 prompt_cache_len:5151 prompt_cache_ratio:0.3353515625 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 -DEBUG 06-24 20:43:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:02 [batch.py:51] router release req id 8 -INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10936498641967773 s -INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.1113579273223877 s -DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=177247339212693740176265226471529954408, time:1750768983.047578s req_ids:[8] -DEBUG 06-24 20:43:03 [manager.py:391] -ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:02 lightllm_req_id:8 first_token_cost:211.4555835723877ms total_cost_time:211.49873733520508ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15361 prompt_cache_len:5151 prompt_cache_ratio:0.33532973113729575 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 -DEBUG 06-24 20:43:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:03 [batch.py:51] router release req id 8 -INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10856294631958008 s -INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.11044812202453613 s -DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=143074062640900478189127587870579759452, time:1750768983.2670083s req_ids:[8] -DEBUG 06-24 20:43:03 [manager.py:391] -ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:212.54682540893555ms total_cost_time:212.59260177612305ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15362 prompt_cache_len:5151 prompt_cache_ratio:0.3353079026168468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 -DEBUG 06-24 20:43:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:03 [batch.py:51] router release req id 8 -INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10832500457763672 s -INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.11034464836120605 s -DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=302818105532362846049912944537836351008, time:1750768983.4855945s req_ids:[8] -DEBUG 06-24 20:43:03 [manager.py:391] -ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:216.07589721679688ms total_cost_time:216.11857414245605ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15363 prompt_cache_len:5151 prompt_cache_ratio:0.33528607693809803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 -DEBUG 06-24 20:43:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:03 [batch.py:51] router release req id 8 -INFO 06-24 20:43:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:03 [manager.py:224] router recive req id 8 cost time 0.10929083824157715 s -INFO 06-24 20:43:03 [manager.py:68] detokenization recv req id 8 cost time 0.11132240295410156 s -DEBUG 06-24 20:43:03 [manager.py:391] Prefill Batch: batch_id=74308727657514916718725508856092378739, time:1750768983.7086174s req_ids:[8] -DEBUG 06-24 20:43:03 [manager.py:391] -ERROR 06-24 20:43:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:214.54358100891113ms total_cost_time:214.58768844604492ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15364 prompt_cache_len:5151 prompt_cache_ratio:0.33526425410049465 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 -DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:04 [batch.py:51] router release req id 8 -INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.3108022212982178 s -INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.31284117698669434 s -DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=63064194624646275818112140183214060079, time:1750768984.137029s req_ids:[8] -DEBUG 06-24 20:43:04 [manager.py:391] -ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:03 lightllm_req_id:8 first_token_cost:428.3902645111084ms total_cost_time:428.4350872039795ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15365 prompt_cache_len:5151 prompt_cache_ratio:0.3352424341034819 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 -DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:04 [batch.py:51] router release req id 8 -INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.10988879203796387 s -INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.11189603805541992 s -DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=311389230740309513958957683030451969073, time:1750768984.3652768s req_ids:[8] -DEBUG 06-24 20:43:04 [manager.py:391] -ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:211.7774486541748ms total_cost_time:211.81988716125488ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15366 prompt_cache_len:5151 prompt_cache_ratio:0.3352206169465053 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 -DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:04 [batch.py:51] router release req id 8 -INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.10894012451171875 s -INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.11089324951171875 s -DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=172458119560786033644438559276628151925, time:1750768984.5838003s req_ids:[8] -DEBUG 06-24 20:43:04 [manager.py:391] -ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:216.31336212158203ms total_cost_time:216.3560390472412ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15367 prompt_cache_len:5151 prompt_cache_ratio:0.3351988026290102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 -DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:04 [batch.py:51] router release req id 8 -INFO 06-24 20:43:04 [manager.py:224] router recive req id 8 cost time 0.10941243171691895 s -INFO 06-24 20:43:04 [manager.py:68] detokenization recv req id 8 cost time 0.11143898963928223 s -DEBUG 06-24 20:43:04 [manager.py:391] Prefill Batch: batch_id=252111704490846481959061713900815059261, time:1750768984.806749s req_ids:[8] -DEBUG 06-24 20:43:04 [manager.py:391] -ERROR 06-24 20:43:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:216.92419052124023ms total_cost_time:216.96853637695312ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15368 prompt_cache_len:5151 prompt_cache_ratio:0.33517699115044247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 -DEBUG 06-24 20:43:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:04 [batch.py:51] router release req id 8 -INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10915112495422363 s -INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.11105155944824219 s -DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=229573794079567529141691555750646106229, time:1750768985.0310078s req_ids:[8] -DEBUG 06-24 20:43:05 [manager.py:391] -ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:04 lightllm_req_id:8 first_token_cost:216.85504913330078ms total_cost_time:216.89748764038086ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15369 prompt_cache_len:5151 prompt_cache_ratio:0.3351551825102479 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 -DEBUG 06-24 20:43:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:05 [batch.py:51] router release req id 8 -INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10850024223327637 s -INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.10983395576477051 s -DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=53414658934787146395039420875538988430, time:1750768985.2553716s req_ids:[8] -DEBUG 06-24 20:43:05 [manager.py:391] -ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:387.6934051513672ms total_cost_time:387.7372741699219ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15370 prompt_cache_len:5151 prompt_cache_ratio:0.33513337670787247 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 -DEBUG 06-24 20:43:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:05 [batch.py:51] router release req id 8 -INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10744404792785645 s -INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.1085209846496582 s -DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=325242214310208396462809028867472014592, time:1750768985.648186s req_ids:[8] -DEBUG 06-24 20:43:05 [manager.py:391] -ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:213.75560760498047ms total_cost_time:213.80066871643066ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15371 prompt_cache_len:5151 prompt_cache_ratio:0.33511157374276235 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 -DEBUG 06-24 20:43:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:05 [batch.py:51] router release req id 8 -INFO 06-24 20:43:05 [manager.py:224] router recive req id 8 cost time 0.10821747779846191 s -INFO 06-24 20:43:05 [manager.py:68] detokenization recv req id 8 cost time 0.1102144718170166 s -DEBUG 06-24 20:43:05 [manager.py:391] Prefill Batch: batch_id=191902520460076828967114699486866690553, time:1750768985.8690102s req_ids:[8] -DEBUG 06-24 20:43:05 [manager.py:391] -ERROR 06-24 20:43:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:218.87826919555664ms total_cost_time:218.92023086547852ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15372 prompt_cache_len:5151 prompt_cache_ratio:0.33508977361436376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 -DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:06 [batch.py:51] router release req id 8 -INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.10853958129882812 s -INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.11044788360595703 s -DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=130298085718475251897893912261769743178, time:1750768986.0944624s req_ids:[8] -DEBUG 06-24 20:43:06 [manager.py:391] -ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:05 lightllm_req_id:8 first_token_cost:213.54961395263672ms total_cost_time:213.5944366455078ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15373 prompt_cache_len:5151 prompt_cache_ratio:0.3350679763221232 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 -DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:06 [batch.py:51] router release req id 8 -INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.1080322265625 s -INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.1095123291015625 s -DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=284784535097765960780685482871099146251, time:1750768986.3134863s req_ids:[8] -DEBUG 06-24 20:43:06 [manager.py:391] -ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:175.3544807434082ms total_cost_time:175.3995418548584ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15374 prompt_cache_len:5151 prompt_cache_ratio:0.3350461818654872 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 -DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:06 [batch.py:51] router release req id 8 -INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.10727405548095703 s -INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.10831141471862793 s -DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=203465573709955617309034880723087023812, time:1750768986.4958093s req_ids:[8] -DEBUG 06-24 20:43:06 [manager.py:391] -ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:381.0992240905762ms total_cost_time:381.1452388763428ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15375 prompt_cache_len:5151 prompt_cache_ratio:0.33502439024390246 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 -DEBUG 06-24 20:43:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:06 [batch.py:51] router release req id 8 -INFO 06-24 20:43:06 [manager.py:224] router recive req id 8 cost time 0.1085050106048584 s -INFO 06-24 20:43:06 [manager.py:68] detokenization recv req id 8 cost time 0.10973644256591797 s -DEBUG 06-24 20:43:06 [manager.py:391] Prefill Batch: batch_id=275363071337174741334805848211209491364, time:1750768986.8826015s req_ids:[8] -DEBUG 06-24 20:43:06 [manager.py:391] -ERROR 06-24 20:43:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:216.28427505493164ms total_cost_time:216.32933616638184ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15376 prompt_cache_len:5151 prompt_cache_ratio:0.3350026014568158 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 -DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:07 [batch.py:51] router release req id 8 -INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.11011767387390137 s -INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.11158275604248047 s -DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=28748341201241374132325111932275566416, time:1750768987.1066349s req_ids:[8] -DEBUG 06-24 20:43:07 [manager.py:391] -ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:06 lightllm_req_id:8 first_token_cost:216.2301540374756ms total_cost_time:216.25757217407227ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:15377 prompt_cache_len:5151 prompt_cache_ratio:0.3349808155036743 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 -DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:07 [batch.py:51] router release req id 8 -INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.10867762565612793 s -INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.10971355438232422 s -DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=27307337222922660929956799907225599158, time:1750768987.329331s req_ids:[8] -DEBUG 06-24 20:43:07 [manager.py:391] -DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:07 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:218.9939022064209ms total_cost_time:219.03157234191895ms,out_token_counter:1 mean_per_token_cost_time: 0.037670135498046875ms prompt_token_num:15378 prompt_cache_len:5151 prompt_cache_ratio:0.3349590323839251 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 -DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:07 [batch.py:51] router release req id 8 -INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.10798525810241699 s -INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.10971403121948242 s -DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=268847276328336262217239237926366796205, time:1750768987.5548983s req_ids:[8] -DEBUG 06-24 20:43:07 [manager.py:391] -ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:221.94337844848633ms total_cost_time:221.98772430419922ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15379 prompt_cache_len:5151 prompt_cache_ratio:0.3349372520970154 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 -DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:07 [batch.py:51] router release req id 8 -INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.10790586471557617 s -INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.10968899726867676 s -DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=115752829122166113927631094203261456617, time:1750768987.7880783s req_ids:[8] -DEBUG 06-24 20:43:07 [manager.py:391] -ERROR 06-24 20:43:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:183.96997451782227ms total_cost_time:184.01241302490234ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15380 prompt_cache_len:5151 prompt_cache_ratio:0.33491547464239274 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 -DEBUG 06-24 20:43:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:07 [batch.py:51] router release req id 8 -INFO 06-24 20:43:07 [manager.py:224] router recive req id 8 cost time 0.1085822582244873 s -INFO 06-24 20:43:07 [manager.py:68] detokenization recv req id 8 cost time 0.11048269271850586 s -DEBUG 06-24 20:43:07 [manager.py:391] Prefill Batch: batch_id=179713555479128556894913035793076482, time:1750768987.972466s req_ids:[8] -DEBUG 06-24 20:43:07 [manager.py:391] -ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:07 lightllm_req_id:8 first_token_cost:373.6555576324463ms total_cost_time:373.7020492553711ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15381 prompt_cache_len:5151 prompt_cache_ratio:0.3348937000195046 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 -DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:08 [batch.py:51] router release req id 8 -INFO 06-24 20:43:08 [manager.py:224] router recive req id 8 cost time 0.10820245742797852 s -INFO 06-24 20:43:08 [manager.py:68] detokenization recv req id 8 cost time 0.11017012596130371 s -DEBUG 06-24 20:43:08 [manager.py:391] Prefill Batch: batch_id=222199049591143076391014291186387884592, time:1750768988.3543856s req_ids:[8] -DEBUG 06-24 20:43:08 [manager.py:391] -ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:222.46980667114258ms total_cost_time:222.51296043395996ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15382 prompt_cache_len:5151 prompt_cache_ratio:0.3348719282277987 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 -DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:08 [batch.py:51] router release req id 8 -INFO 06-24 20:43:08 [manager.py:224] router recive req id 8 cost time 0.10981488227844238 s -INFO 06-24 20:43:08 [manager.py:68] detokenization recv req id 8 cost time 0.11178183555603027 s -DEBUG 06-24 20:43:08 [manager.py:391] Prefill Batch: batch_id=103545135952820151947916647480866047324, time:1750768988.5826573s req_ids:[8] -DEBUG 06-24 20:43:08 [manager.py:391] -ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:216.74752235412598ms total_cost_time:216.81451797485352ms,out_token_counter:1 mean_per_token_cost_time: 0.06699562072753906ms prompt_token_num:15383 prompt_cache_len:5151 prompt_cache_ratio:0.334850159266723 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 -DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:08 [batch.py:51] router release req id 8 -INFO 06-24 20:43:08 [manager.py:224] router recive req id 8 cost time 0.1080472469329834 s -INFO 06-24 20:43:08 [manager.py:68] detokenization recv req id 8 cost time 0.10985088348388672 s -DEBUG 06-24 20:43:08 [manager.py:391] Prefill Batch: batch_id=180511660459799740200696957219688452439, time:1750768988.8052795s req_ids:[8] -DEBUG 06-24 20:43:08 [manager.py:391] -ERROR 06-24 20:43:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:213.9143943786621ms total_cost_time:213.95587921142578ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15384 prompt_cache_len:5151 prompt_cache_ratio:0.33482839313572543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 -DEBUG 06-24 20:43:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:08 [batch.py:51] router release req id 8 -INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.1089012622833252 s -INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.11016225814819336 s -DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=5678973956362070982095814165889962956, time:1750768989.026877s req_ids:[8] -DEBUG 06-24 20:43:09 [manager.py:391] -ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:08 lightllm_req_id:8 first_token_cost:220.50833702087402ms total_cost_time:220.5519676208496ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15385 prompt_cache_len:5151 prompt_cache_ratio:0.33480662983425413 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 -DEBUG 06-24 20:43:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:09 [batch.py:51] router release req id 8 -INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.10807967185974121 s -INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.10934162139892578 s -DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=306702398674857164511381156890790356101, time:1750768989.2533498s req_ids:[8] -DEBUG 06-24 20:43:09 [manager.py:391] -ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:389.7264003753662ms total_cost_time:389.7716999053955ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15386 prompt_cache_len:5151 prompt_cache_ratio:0.33478486936175744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 -DEBUG 06-24 20:43:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:09 [batch.py:51] router release req id 8 -INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.10943794250488281 s -INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.11133217811584473 s -DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=132337494665832374764081061621139557188, time:1750768989.6514843s req_ids:[8] -DEBUG 06-24 20:43:09 [manager.py:391] -ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:218.25337409973145ms total_cost_time:218.29938888549805ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15387 prompt_cache_len:5151 prompt_cache_ratio:0.33476311171768375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 -DEBUG 06-24 20:43:09 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:09 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:09 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:09 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:09 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:09 [batch.py:51] router release req id 8 -INFO 06-24 20:43:09 [manager.py:224] router recive req id 8 cost time 0.10807394981384277 s -INFO 06-24 20:43:09 [manager.py:68] detokenization recv req id 8 cost time 0.11014723777770996 s -DEBUG 06-24 20:43:09 [manager.py:391] Prefill Batch: batch_id=129152334527789253160810628574957973227, time:1750768989.873794s req_ids:[8] -DEBUG 06-24 20:43:09 [manager.py:391] -ERROR 06-24 20:43:09 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:09 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:09 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:216.21990203857422ms total_cost_time:216.2625789642334ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15388 prompt_cache_len:5151 prompt_cache_ratio:0.33474135690148166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:09 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 -DEBUG 06-24 20:43:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:10 [batch.py:51] router release req id 8 -INFO 06-24 20:43:10 [manager.py:224] router recive req id 8 cost time 0.10935521125793457 s -INFO 06-24 20:43:10 [manager.py:68] detokenization recv req id 8 cost time 0.11126232147216797 s -DEBUG 06-24 20:43:10 [manager.py:391] Prefill Batch: batch_id=301707460772738326108228294881170992639, time:1750768990.0978022s req_ids:[8] -DEBUG 06-24 20:43:10 [manager.py:391] -ERROR 06-24 20:43:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:09 lightllm_req_id:8 first_token_cost:226.64141654968262ms total_cost_time:226.6843318939209ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15389 prompt_cache_len:5151 prompt_cache_ratio:0.3347196049125999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:10 [manager.py:106] timer detokenize batch cost time 317.1837329864502 ms -INFO 06-24 20:43:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 -DEBUG 06-24 20:43:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:10 [batch.py:51] router release req id 8 -INFO 06-24 20:43:10 [manager.py:224] router recive req id 8 cost time 0.10874342918395996 s -INFO 06-24 20:43:10 [manager.py:68] detokenization recv req id 8 cost time 0.11060690879821777 s -DEBUG 06-24 20:43:10 [manager.py:391] Prefill Batch: batch_id=81300660259002508943704420534911316703, time:1750768990.6677232s req_ids:[8] -DEBUG 06-24 20:43:10 [manager.py:391] -ERROR 06-24 20:43:10 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:10 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:10 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 first_token_cost:257.68017768859863ms total_cost_time:257.7247619628906ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15390 prompt_cache_len:5151 prompt_cache_ratio:0.3346978557504873 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:10 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 -DEBUG 06-24 20:43:10 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:10 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:10 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:10 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:10 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:10 [batch.py:51] router release req id 8 -INFO 06-24 20:43:10 [manager.py:224] router recive req id 8 cost time 0.10962891578674316 s -INFO 06-24 20:43:10 [manager.py:68] detokenization recv req id 8 cost time 0.11192893981933594 s -DEBUG 06-24 20:43:10 [manager.py:391] Prefill Batch: batch_id=180103438408433234449581578360276301228, time:1750768990.917566s req_ids:[8] -DEBUG 06-24 20:43:10 [manager.py:391] -ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:43:11 [stats.py:37] Avg tokens(prompt+generate) throughput: 58406.070 tokens/s -DEBUG 06-24 20:43:11 [stats.py:37] Avg prompt tokens throughput: 58398.472 tokens/s -DEBUG 06-24 20:43:11 [stats.py:37] Avg generate tokens throughput: 7.598 tokens/s -INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:10 lightllm_req_id:8 first_token_cost:398.74958992004395ms total_cost_time:398.79560470581055ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15391 prompt_cache_len:5151 prompt_cache_ratio:0.33467610941459297 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 -DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:11 [batch.py:51] router release req id 8 -INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10924124717712402 s -INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.1113588809967041 s -DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=85566923316330572428938097048648464572, time:1750768991.3164673s req_ids:[8] -DEBUG 06-24 20:43:11 [manager.py:391] -ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:211.38334274291992ms total_cost_time:211.4262580871582ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15392 prompt_cache_len:5151 prompt_cache_ratio:0.3346543659043659 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 -DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:11 [batch.py:51] router release req id 8 -INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10875225067138672 s -INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.1106879711151123 s -DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=313972188896506657273327691386561294965, time:1750768991.535128s req_ids:[8] -DEBUG 06-24 20:43:11 [manager.py:391] -ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:213.61398696899414ms total_cost_time:213.65714073181152ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15393 prompt_cache_len:5151 prompt_cache_ratio:0.3346326252192555 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 -DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:11 [batch.py:51] router release req id 8 -INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10947370529174805 s -INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.1113595962524414 s -DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=290822336709275417918323137163358897869, time:1750768991.7550218s req_ids:[8] -DEBUG 06-24 20:43:11 [manager.py:391] -ERROR 06-24 20:43:11 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:11 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:11 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:212.81194686889648ms total_cost_time:212.8584384918213ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15394 prompt_cache_len:5151 prompt_cache_ratio:0.3346108873587112 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:11 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 -DEBUG 06-24 20:43:11 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:11 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:11 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:11 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:11 [batch.py:51] router release req id 8 -INFO 06-24 20:43:11 [manager.py:224] router recive req id 8 cost time 0.10914349555969238 s -INFO 06-24 20:43:11 [manager.py:68] detokenization recv req id 8 cost time 0.11104607582092285 s -DEBUG 06-24 20:43:11 [manager.py:391] Prefill Batch: batch_id=213481428035240435327713353303699695161, time:1750768991.974523s req_ids:[8] -DEBUG 06-24 20:43:11 [manager.py:391] -ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:11 lightllm_req_id:8 first_token_cost:213.1814956665039ms total_cost_time:213.2244110107422ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15395 prompt_cache_len:5151 prompt_cache_ratio:0.3345891523221825 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 -DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:12 [batch.py:51] router release req id 8 -INFO 06-24 20:43:12 [manager.py:224] router recive req id 8 cost time 0.10822629928588867 s -INFO 06-24 20:43:12 [manager.py:68] detokenization recv req id 8 cost time 0.11034584045410156 s -DEBUG 06-24 20:43:12 [manager.py:391] Prefill Batch: batch_id=96381301246285436145112724650056725543, time:1750768992.1939557s req_ids:[8] -DEBUG 06-24 20:43:12 [manager.py:391] -ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:384.7815990447998ms total_cost_time:384.8264217376709ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15396 prompt_cache_len:5151 prompt_cache_ratio:0.3345674201091193 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 -DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:12 [batch.py:51] router release req id 8 -INFO 06-24 20:43:12 [manager.py:224] router recive req id 8 cost time 0.10871767997741699 s -INFO 06-24 20:43:12 [manager.py:68] detokenization recv req id 8 cost time 0.11065840721130371 s -DEBUG 06-24 20:43:12 [manager.py:391] Prefill Batch: batch_id=57806719697594403274272096543536383004, time:1750768992.5862477s req_ids:[8] -DEBUG 06-24 20:43:12 [manager.py:391] -ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:211.87186241149902ms total_cost_time:211.91692352294922ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15397 prompt_cache_len:5151 prompt_cache_ratio:0.3345456907189712 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 -DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:12 [batch.py:51] router release req id 8 -INFO 06-24 20:43:12 [manager.py:224] router recive req id 8 cost time 0.10848116874694824 s -INFO 06-24 20:43:12 [manager.py:68] detokenization recv req id 8 cost time 0.11076831817626953 s -DEBUG 06-24 20:43:12 [manager.py:391] Prefill Batch: batch_id=219660776213359549977653419765497230576, time:1750768992.803929s req_ids:[8] -DEBUG 06-24 20:43:12 [manager.py:391] -ERROR 06-24 20:43:12 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:12 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:12 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:216.2792682647705ms total_cost_time:216.3236141204834ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15398 prompt_cache_len:5151 prompt_cache_ratio:0.33452396415118846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:12 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 -DEBUG 06-24 20:43:12 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:12 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:12 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:12 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:12 [batch.py:51] router release req id 8 -INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10858869552612305 s -INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11043334007263184 s -DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=124706528384044252875859282675346320134, time:1750768993.0296085s req_ids:[8] -DEBUG 06-24 20:43:13 [manager.py:391] -ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:12 lightllm_req_id:8 first_token_cost:218.68252754211426ms total_cost_time:218.72901916503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15399 prompt_cache_len:5151 prompt_cache_ratio:0.33450224040522114 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 -DEBUG 06-24 20:43:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:13 [batch.py:51] router release req id 8 -INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10816168785095215 s -INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11008620262145996 s -DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=232024296140760365638428958816223515262, time:1750768993.2540321s req_ids:[8] -DEBUG 06-24 20:43:13 [manager.py:391] -ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:217.1928882598877ms total_cost_time:217.2374725341797ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15400 prompt_cache_len:5151 prompt_cache_ratio:0.3344805194805195 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 -DEBUG 06-24 20:43:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:13 [batch.py:51] router release req id 8 -INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10905265808105469 s -INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11109161376953125 s -DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=58136029423578194316573177133878374348, time:1750768993.4777226s req_ids:[8] -DEBUG 06-24 20:43:13 [manager.py:391] -ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:217.01979637145996ms total_cost_time:217.06366539001465ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15401 prompt_cache_len:5151 prompt_cache_ratio:0.334458801376534 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 -DEBUG 06-24 20:43:13 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:13 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:13 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:13 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:13 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:13 [batch.py:51] router release req id 8 -INFO 06-24 20:43:13 [manager.py:224] router recive req id 8 cost time 0.10912156105041504 s -INFO 06-24 20:43:13 [manager.py:68] detokenization recv req id 8 cost time 0.11156368255615234 s -DEBUG 06-24 20:43:13 [manager.py:391] Prefill Batch: batch_id=239367475946870249514322205844673974904, time:1750768993.700736s req_ids:[8] -DEBUG 06-24 20:43:13 [manager.py:391] -ERROR 06-24 20:43:13 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:13 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:13 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:388.5383605957031ms total_cost_time:388.582706451416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15402 prompt_cache_len:5151 prompt_cache_ratio:0.3344370860927152 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:13 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 -DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:14 [batch.py:51] router release req id 8 -INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10821652412414551 s -INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11035895347595215 s -DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=151164019357857465822206331908738888446, time:1750768994.0968673s req_ids:[8] -DEBUG 06-24 20:43:14 [manager.py:391] -DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:14 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:13 lightllm_req_id:8 first_token_cost:212.2197151184082ms total_cost_time:212.2633457183838ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15403 prompt_cache_len:5151 prompt_cache_ratio:0.3344153736285139 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 -DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:14 [batch.py:51] router release req id 8 -INFO 06-24 20:43:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10928821563720703 s -INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11142516136169434 s -DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=331395454763253572283641362036003778945, time:1750768994.316298s req_ids:[8] -DEBUG 06-24 20:43:14 [manager.py:391] -ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:213.24396133422852ms total_cost_time:213.2883071899414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15404 prompt_cache_len:5151 prompt_cache_ratio:0.33439366398338094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 -DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:14 [batch.py:51] router release req id 8 -INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10882997512817383 s -INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11083388328552246 s -DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=124013980840683801576894085890826652153, time:1750768994.5370975s req_ids:[8] -DEBUG 06-24 20:43:14 [manager.py:391] -ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:213.75608444213867ms total_cost_time:213.79947662353516ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15405 prompt_cache_len:5151 prompt_cache_ratio:0.33437195715676726 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 -DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:14 [batch.py:51] router release req id 8 -INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10836458206176758 s -INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11024045944213867 s -DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=302877489830753757722032464645096476937, time:1750768994.7566955s req_ids:[8] -DEBUG 06-24 20:43:14 [manager.py:391] -ERROR 06-24 20:43:14 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:14 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:14 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:217.756986618042ms total_cost_time:217.80085563659668ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15406 prompt_cache_len:5151 prompt_cache_ratio:0.3343502531481241 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:14 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 -DEBUG 06-24 20:43:14 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:14 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:14 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:14 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:14 [batch.py:51] router release req id 8 -INFO 06-24 20:43:14 [manager.py:224] router recive req id 8 cost time 0.10835695266723633 s -INFO 06-24 20:43:14 [manager.py:68] detokenization recv req id 8 cost time 0.11048412322998047 s -DEBUG 06-24 20:43:14 [manager.py:391] Prefill Batch: batch_id=24581509388758548075957499746046130690, time:1750768994.9801626s req_ids:[8] -DEBUG 06-24 20:43:14 [manager.py:391] -ERROR 06-24 20:43:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:14 lightllm_req_id:8 first_token_cost:216.02725982666016ms total_cost_time:216.08662605285645ms,out_token_counter:1 mean_per_token_cost_time: 0.05936622619628906ms prompt_token_num:15407 prompt_cache_len:5151 prompt_cache_ratio:0.3343285519569027 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 -DEBUG 06-24 20:43:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:15 [batch.py:51] router release req id 8 -INFO 06-24 20:43:15 [manager.py:224] router recive req id 8 cost time 0.3141491413116455 s -INFO 06-24 20:43:15 [manager.py:68] detokenization recv req id 8 cost time 0.31620049476623535 s -DEBUG 06-24 20:43:15 [manager.py:391] Prefill Batch: batch_id=45656487084254762653541826288689922393, time:1750768995.4146621s req_ids:[8] -DEBUG 06-24 20:43:15 [manager.py:391] -ERROR 06-24 20:43:15 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:15 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:15 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 first_token_cost:436.81812286376953ms total_cost_time:436.8605613708496ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15408 prompt_cache_len:5151 prompt_cache_ratio:0.3343068535825545 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 -DEBUG 06-24 20:43:15 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:15 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:15 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:15 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:15 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:15 [batch.py:51] router release req id 8 -INFO 06-24 20:43:15 [manager.py:224] router recive req id 8 cost time 0.11034822463989258 s -INFO 06-24 20:43:15 [manager.py:68] detokenization recv req id 8 cost time 0.4668753147125244 s -DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=179764857156876102112194523042355233840, time:1750768996.0324202s req_ids:[8] -DEBUG 06-24 20:43:16 [manager.py:391] -ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:15 lightllm_req_id:8 first_token_cost:614.6852970123291ms total_cost_time:614.7286891937256ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15409 prompt_cache_len:5151 prompt_cache_ratio:0.3342851580245311 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 -DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:16 [batch.py:51] router release req id 8 -INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10853028297424316 s -INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.11043548583984375 s -DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=312962332055338874345956745086123473610, time:1750768996.2694905s req_ids:[8] -DEBUG 06-24 20:43:16 [manager.py:391] -ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:220.7329273223877ms total_cost_time:220.77584266662598ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15410 prompt_cache_len:5151 prompt_cache_ratio:0.33426346528228423 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 -DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:16 [batch.py:51] router release req id 8 -INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10837054252624512 s -INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.10977959632873535 s -DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=184278810902614614469661160053231905718, time:1750768996.4954352s req_ids:[8] -DEBUG 06-24 20:43:16 [manager.py:391] -ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:216.17555618286133ms total_cost_time:216.2189483642578ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15411 prompt_cache_len:5151 prompt_cache_ratio:0.3342417753552657 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 -DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:16 [batch.py:51] router release req id 8 -INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10907387733459473 s -INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.11047863960266113 s -DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=127334781582033291877612998857628704528, time:1750768996.730037s req_ids:[8] -DEBUG 06-24 20:43:16 [manager.py:391] -ERROR 06-24 20:43:16 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:16 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:16 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:230.37409782409668ms total_cost_time:230.41820526123047ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15412 prompt_cache_len:5151 prompt_cache_ratio:0.3342200882429276 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 -DEBUG 06-24 20:43:16 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:16 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:16 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:16 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:16 [batch.py:51] router release req id 8 -INFO 06-24 20:43:16 [manager.py:224] router recive req id 8 cost time 0.10807085037231445 s -INFO 06-24 20:43:16 [manager.py:68] detokenization recv req id 8 cost time 0.1100459098815918 s -DEBUG 06-24 20:43:16 [manager.py:391] Prefill Batch: batch_id=206383878400547312186242012635412106189, time:1750768996.9565449s req_ids:[8] -DEBUG 06-24 20:43:16 [manager.py:391] -INFO 06-24 20:43:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:16 lightllm_req_id:8 first_token_cost:384.3717575073242ms total_cost_time:384.4037055969238ms,out_token_counter:1 mean_per_token_cost_time: 0.031948089599609375ms prompt_token_num:15413 prompt_cache_len:5151 prompt_cache_ratio:0.334198403944722 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 -DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:17 [batch.py:51] router release req id 8 -INFO 06-24 20:43:17 [manager.py:224] router recive req id 8 cost time 0.10870766639709473 s -INFO 06-24 20:43:17 [manager.py:68] detokenization recv req id 8 cost time 0.11110234260559082 s -DEBUG 06-24 20:43:17 [manager.py:391] Prefill Batch: batch_id=125220546088489368585121117453757955615, time:1750768997.3469355s req_ids:[8] -DEBUG 06-24 20:43:17 [manager.py:391] -ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:215.54183959960938ms total_cost_time:215.58451652526855ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15414 prompt_cache_len:5151 prompt_cache_ratio:0.3341767224601012 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 -DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:17 [batch.py:51] router release req id 8 -INFO 06-24 20:43:17 [manager.py:224] router recive req id 8 cost time 0.10894107818603516 s -INFO 06-24 20:43:17 [manager.py:68] detokenization recv req id 8 cost time 0.11038947105407715 s -DEBUG 06-24 20:43:17 [manager.py:391] Prefill Batch: batch_id=10271938347883595421600931731739267664, time:1750768997.569174s req_ids:[8] -DEBUG 06-24 20:43:17 [manager.py:391] -DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:17 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:217.9241180419922ms total_cost_time:217.98467636108398ms,out_token_counter:1 mean_per_token_cost_time: 0.060558319091796875ms prompt_token_num:15415 prompt_cache_len:5151 prompt_cache_ratio:0.3341550437885177 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 -DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:17 [batch.py:51] router release req id 8 -INFO 06-24 20:43:17 [manager.py:224] router recive req id 8 cost time 0.1117558479309082 s -INFO 06-24 20:43:17 [manager.py:68] detokenization recv req id 8 cost time 0.11353754997253418 s -DEBUG 06-24 20:43:17 [manager.py:391] Prefill Batch: batch_id=299861424305003713188570558129473653595, time:1750768997.7940192s req_ids:[8] -DEBUG 06-24 20:43:17 [manager.py:391] -ERROR 06-24 20:43:17 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:17 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:216.65525436401367ms total_cost_time:216.70007705688477ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15416 prompt_cache_len:5151 prompt_cache_ratio:0.334133367929424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:17 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 -DEBUG 06-24 20:43:17 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:17 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:17 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:17 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:17 [batch.py:51] router release req id 8 -INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10855364799499512 s -INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11046457290649414 s -DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=129765180899556088813968405685969006848, time:1750768998.0230882s req_ids:[8] -DEBUG 06-24 20:43:18 [manager.py:391] -ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:17 lightllm_req_id:8 first_token_cost:225.85725784301758ms total_cost_time:225.90255737304688ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15417 prompt_cache_len:5151 prompt_cache_ratio:0.3341116948822728 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 -DEBUG 06-24 20:43:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:18 [batch.py:51] router release req id 8 -INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10993432998657227 s -INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11182641983032227 s -DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=189023724908253657618418827555334444259, time:1750768998.2482333s req_ids:[8] -DEBUG 06-24 20:43:18 [manager.py:391] -ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:387.33863830566406ms total_cost_time:387.38131523132324ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15418 prompt_cache_len:5151 prompt_cache_ratio:0.33409002464651705 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 -DEBUG 06-24 20:43:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:18 [batch.py:51] router release req id 8 -INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10862183570861816 s -INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11064600944519043 s -DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=325010068512206289651033121211828969125, time:1750768998.6425078s req_ids:[8] -DEBUG 06-24 20:43:18 [manager.py:391] -ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:216.26520156860352ms total_cost_time:216.30859375ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15419 prompt_cache_len:5151 prompt_cache_ratio:0.3340683572216097 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 -DEBUG 06-24 20:43:18 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:18 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:18 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:18 [batch.py:51] router release req id 8 -INFO 06-24 20:43:18 [manager.py:224] router recive req id 8 cost time 0.10863685607910156 s -INFO 06-24 20:43:18 [manager.py:68] detokenization recv req id 8 cost time 0.11051654815673828 s -DEBUG 06-24 20:43:18 [manager.py:391] Prefill Batch: batch_id=260660830272828802047573391917372451747, time:1750768998.8690102s req_ids:[8] -DEBUG 06-24 20:43:18 [manager.py:391] -ERROR 06-24 20:43:18 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:18 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:218.65081787109375ms total_cost_time:218.69373321533203ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15420 prompt_cache_len:5151 prompt_cache_ratio:0.33404669260700387 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 -DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:19 [batch.py:51] router release req id 8 -INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10846996307373047 s -INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.1105659008026123 s -DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=271177456271730926536685791785396149031, time:1750768999.0920513s req_ids:[8] -DEBUG 06-24 20:43:19 [manager.py:391] -ERROR 06-24 20:43:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:18 lightllm_req_id:8 first_token_cost:214.9369716644287ms total_cost_time:214.9813175201416ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15421 prompt_cache_len:5151 prompt_cache_ratio:0.3340250308021529 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 -DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:19 [batch.py:51] router release req id 8 -INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10835599899291992 s -INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.11019301414489746 s -DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=251787789265713170359376828944363800646, time:1750768999.3146327s req_ids:[8] -DEBUG 06-24 20:43:19 [manager.py:391] -ERROR 06-24 20:43:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 first_token_cost:259.42230224609375ms total_cost_time:259.46640968322754ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15422 prompt_cache_len:5151 prompt_cache_ratio:0.3340033718065102 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 -DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:19 [batch.py:51] router release req id 8 -INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10930490493774414 s -INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.11123132705688477 s -DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=280130801048184147228352792616082019649, time:1750768999.6012118s req_ids:[8] -DEBUG 06-24 20:43:19 [manager.py:391] -ERROR 06-24 20:43:19 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:19 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 first_token_cost:221.84133529663086ms total_cost_time:221.89807891845703ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:15423 prompt_cache_len:5151 prompt_cache_ratio:0.3339817156195293 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:19 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 -DEBUG 06-24 20:43:19 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:19 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:19 [batch.py:51] router release req id 8 -INFO 06-24 20:43:19 [manager.py:224] router recive req id 8 cost time 0.10887408256530762 s -INFO 06-24 20:43:19 [manager.py:68] detokenization recv req id 8 cost time 0.1102292537689209 s -DEBUG 06-24 20:43:19 [manager.py:391] Prefill Batch: batch_id=236997033003466197485780446928212902286, time:1750768999.826101s req_ids:[8] -DEBUG 06-24 20:43:19 [manager.py:391] -ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:19 lightllm_req_id:8 first_token_cost:344.8307514190674ms total_cost_time:344.8760509490967ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15424 prompt_cache_len:5151 prompt_cache_ratio:0.3339600622406639 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 -DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:20 [batch.py:51] router release req id 8 -INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.10805130004882812 s -INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11015486717224121 s -DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=57309986884507969375818372613517005954, time:1750769000.178851s req_ids:[8] -DEBUG 06-24 20:43:20 [manager.py:391] -ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:213.73748779296875ms total_cost_time:213.78302574157715ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15425 prompt_cache_len:5151 prompt_cache_ratio:0.3339384116693679 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 -DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:20 [batch.py:51] router release req id 8 -INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.10901594161987305 s -INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11085796356201172 s -DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=120139984236611335976036335208686722033, time:1750769000.40728s req_ids:[8] -DEBUG 06-24 20:43:20 [manager.py:391] -ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:231.99105262756348ms total_cost_time:232.15413093566895ms,out_token_counter:1 mean_per_token_cost_time: 0.16307830810546875ms prompt_token_num:15426 prompt_cache_len:5151 prompt_cache_ratio:0.3339167639050953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 -DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:20 [batch.py:51] router release req id 8 -INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.11100220680236816 s -INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11310529708862305 s -DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=178239456482495432555398965045462208962, time:1750769000.6715631s req_ids:[8] -DEBUG 06-24 20:43:20 [manager.py:391] -ERROR 06-24 20:43:20 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:20 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:255.23805618286133ms total_cost_time:255.28311729431152ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15427 prompt_cache_len:5151 prompt_cache_ratio:0.3338951189473002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:20 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 -DEBUG 06-24 20:43:20 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:20 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:20 [batch.py:51] router release req id 8 -INFO 06-24 20:43:20 [manager.py:224] router recive req id 8 cost time 0.11085247993469238 s -INFO 06-24 20:43:20 [manager.py:68] detokenization recv req id 8 cost time 0.11293339729309082 s -DEBUG 06-24 20:43:20 [manager.py:391] Prefill Batch: batch_id=241827451097481701816494216152067596274, time:1750769000.9060152s req_ids:[8] -DEBUG 06-24 20:43:20 [manager.py:391] -ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:20 lightllm_req_id:8 first_token_cost:214.76197242736816ms total_cost_time:214.80441093444824ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15428 prompt_cache_len:5151 prompt_cache_ratio:0.3338734767954369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 -DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:21 [batch.py:51] router release req id 8 -INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.10837912559509277 s -INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.11051201820373535 s -DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=267478173731040531160448048982231957021, time:1750769001.1244347s req_ids:[8] -DEBUG 06-24 20:43:21 [manager.py:391] -ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:43:21 [stats.py:37] Avg tokens(prompt+generate) throughput: 57399.840 tokens/s -DEBUG 06-24 20:43:21 [stats.py:37] Avg prompt tokens throughput: 57392.392 tokens/s -DEBUG 06-24 20:43:21 [stats.py:37] Avg generate tokens throughput: 7.448 tokens/s -INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:389.1007900238037ms total_cost_time:389.14942741394043ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:15429 prompt_cache_len:5151 prompt_cache_ratio:0.33385183744895974 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 -DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:21 [batch.py:51] router release req id 8 -INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.1093900203704834 s -INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.11147904396057129 s -DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=157527296228540410486096363381835443000, time:1750769001.5206366s req_ids:[8] -DEBUG 06-24 20:43:21 [manager.py:391] -ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:218.97482872009277ms total_cost_time:219.03038024902344ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:15430 prompt_cache_len:5151 prompt_cache_ratio:0.3338302009073234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 -DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:21 [batch.py:51] router release req id 8 -INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.11124801635742188 s -DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=338832660266854091497349862000465714120, time:1750769001.7454457s req_ids:[8] -DEBUG 06-24 20:43:21 [manager.py:391] -INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.11341428756713867 s -ERROR 06-24 20:43:21 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:21 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:210.35408973693848ms total_cost_time:210.39700508117676ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15431 prompt_cache_len:5151 prompt_cache_ratio:0.33380856716998253 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:21 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 -DEBUG 06-24 20:43:21 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:21 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:21 [batch.py:51] router release req id 8 -INFO 06-24 20:43:21 [manager.py:224] router recive req id 8 cost time 0.10828351974487305 s -INFO 06-24 20:43:21 [manager.py:68] detokenization recv req id 8 cost time 0.1099236011505127 s -DEBUG 06-24 20:43:21 [manager.py:391] Prefill Batch: batch_id=8811457152464451264968692518318777534, time:1750769001.9638553s req_ids:[8] -DEBUG 06-24 20:43:21 [manager.py:391] -ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:21 lightllm_req_id:8 first_token_cost:217.67520904541016ms total_cost_time:217.71883964538574ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15432 prompt_cache_len:5151 prompt_cache_ratio:0.33378693623639194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 -DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:22 [batch.py:51] router release req id 8 -INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.10793471336364746 s -INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.10970568656921387 s -DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=53760691995464876594379747515243920548, time:1750769002.188636s req_ids:[8] -DEBUG 06-24 20:43:22 [manager.py:391] -ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:174.33476448059082ms total_cost_time:174.3781566619873ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15433 prompt_cache_len:5151 prompt_cache_ratio:0.33376530810600663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 -DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:22 [batch.py:51] router release req id 8 -INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.10853791236877441 s -INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.1109762191772461 s -DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=38032441872155846565838070902272483626, time:1750769002.3701382s req_ids:[8] -DEBUG 06-24 20:43:22 [manager.py:391] -ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:344.6993827819824ms total_cost_time:344.7437286376953ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15434 prompt_cache_len:5151 prompt_cache_ratio:0.3337436827782817 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 -DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:22 [batch.py:51] router release req id 8 -INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.11163687705993652 s -DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=262153972283909031013284660982623243805, time:1750769002.7198937s req_ids:[8] -DEBUG 06-24 20:43:22 [manager.py:391] -INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.11339735984802246 s -ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:172.82676696777344ms total_cost_time:172.87015914916992ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15435 prompt_cache_len:5151 prompt_cache_ratio:0.3337220602526725 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 -DEBUG 06-24 20:43:22 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:22 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:22 [batch.py:51] router release req id 8 -INFO 06-24 20:43:22 [manager.py:224] router recive req id 8 cost time 0.11005949974060059 s -INFO 06-24 20:43:22 [manager.py:68] detokenization recv req id 8 cost time 0.1120445728302002 s -DEBUG 06-24 20:43:22 [manager.py:391] Prefill Batch: batch_id=270058030800655076917920579116954086705, time:1750769002.9012778s req_ids:[8] -DEBUG 06-24 20:43:22 [manager.py:391] -ERROR 06-24 20:43:22 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:22 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:205.0013542175293ms total_cost_time:205.0468921661377ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15436 prompt_cache_len:5151 prompt_cache_ratio:0.33370044052863435 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:22 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 -DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:23 [batch.py:51] router release req id 8 -INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10875415802001953 s -INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.1107327938079834 s -DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=65619476569552218348962656888527820624, time:1750769003.1157167s req_ids:[8] -DEBUG 06-24 20:43:23 [manager.py:391] -ERROR 06-24 20:43:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:22 lightllm_req_id:8 first_token_cost:214.39886093139648ms total_cost_time:214.44106101989746ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15437 prompt_cache_len:5151 prompt_cache_ratio:0.33367882360562284 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:43:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 -INFO 06-24 20:43:23 [statics_utils.py:24] mean first cost: 233.27891738854936 ms -INFO 06-24 20:43:23 [statics_utils.py:24] mean per token cost: 0.05690889423068175 ms -DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:23 [batch.py:51] router release req id 8 -INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10909080505371094 s -INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.11149430274963379 s -DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=59041762474116765485562589127778992167, time:1750769003.334885s req_ids:[8] -DEBUG 06-24 20:43:23 [manager.py:391] -ERROR 06-24 20:43:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 first_token_cost:212.97001838684082ms total_cost_time:213.0138874053955ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15438 prompt_cache_len:5151 prompt_cache_ratio:0.3336572094830937 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 -DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:23 [batch.py:51] router release req id 8 -INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10874819755554199 s -INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.11063814163208008 s -DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=163291143409158833628341115754739092869, time:1750769003.557256s req_ids:[8] -DEBUG 06-24 20:43:23 [manager.py:391] -ERROR 06-24 20:43:23 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:23 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 first_token_cost:387.91823387145996ms total_cost_time:387.96329498291016ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15439 prompt_cache_len:5151 prompt_cache_ratio:0.33363559816050264 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:23 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 -DEBUG 06-24 20:43:23 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:23 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:23 [batch.py:51] router release req id 8 -INFO 06-24 20:43:23 [manager.py:224] router recive req id 8 cost time 0.10738015174865723 s -INFO 06-24 20:43:23 [manager.py:68] detokenization recv req id 8 cost time 0.10911989212036133 s -DEBUG 06-24 20:43:23 [manager.py:391] Prefill Batch: batch_id=229808869300752675759341439956250764466, time:1750769003.9604712s req_ids:[8] -DEBUG 06-24 20:43:23 [manager.py:391] -ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:23 lightllm_req_id:8 first_token_cost:227.89478302001953ms total_cost_time:227.9372215270996ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15440 prompt_cache_len:5151 prompt_cache_ratio:0.3336139896373057 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 -DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:24 [batch.py:51] router release req id 8 -INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.10939478874206543 s -INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.1115720272064209 s -DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=37132088448787553993466938728754968678, time:1750769004.1874907s req_ids:[8] -DEBUG 06-24 20:43:24 [manager.py:391] -ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:214.4761085510254ms total_cost_time:214.51759338378906ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15441 prompt_cache_len:5151 prompt_cache_ratio:0.333592383912959 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 -DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:24 [batch.py:51] router release req id 8 -INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.10942554473876953 s -INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.11134600639343262 s -DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=314276141382556848510486457349007866900, time:1750769004.426429s req_ids:[8] -DEBUG 06-24 20:43:24 [manager.py:391] -ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:270.92695236206055ms total_cost_time:270.9698677062988ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15442 prompt_cache_len:5151 prompt_cache_ratio:0.3335707809869188 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 -DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:24 [batch.py:51] router release req id 8 -INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.11108183860778809 s -INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.11325907707214355 s -DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=190968883731145992986182063731898355058, time:1750769004.7023172s req_ids:[8] -DEBUG 06-24 20:43:24 [manager.py:391] -ERROR 06-24 20:43:24 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:24 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:217.15092658996582ms total_cost_time:217.1950340270996ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15443 prompt_cache_len:5151 prompt_cache_ratio:0.33354918085864144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:24 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 -DEBUG 06-24 20:43:24 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:24 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:24 [batch.py:51] router release req id 8 -INFO 06-24 20:43:24 [manager.py:224] router recive req id 8 cost time 0.1082298755645752 s -INFO 06-24 20:43:24 [manager.py:68] detokenization recv req id 8 cost time 0.11023688316345215 s -DEBUG 06-24 20:43:24 [manager.py:391] Prefill Batch: batch_id=196763527768202311873037372773450053327, time:1750769004.9253538s req_ids:[8] -DEBUG 06-24 20:43:24 [manager.py:391] -ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:24 lightllm_req_id:8 first_token_cost:219.42687034606934ms total_cost_time:219.47002410888672ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15444 prompt_cache_len:5151 prompt_cache_ratio:0.33352758352758355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 -DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:25 [batch.py:51] router release req id 8 -INFO 06-24 20:43:25 [manager.py:224] router recive req id 8 cost time 0.10905122756958008 s -INFO 06-24 20:43:25 [manager.py:68] detokenization recv req id 8 cost time 0.11112761497497559 s -DEBUG 06-24 20:43:25 [manager.py:391] Prefill Batch: batch_id=58386318805146299272037676221056832045, time:1750769005.1626537s req_ids:[8] -DEBUG 06-24 20:43:25 [manager.py:391] -ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:399.8548984527588ms total_cost_time:399.8987674713135ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15445 prompt_cache_len:5151 prompt_cache_ratio:0.33350598899320166 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 -DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:25 [batch.py:51] router release req id 8 -INFO 06-24 20:43:25 [manager.py:224] router recive req id 8 cost time 0.10901379585266113 s -INFO 06-24 20:43:25 [manager.py:68] detokenization recv req id 8 cost time 0.11108016967773438 s -DEBUG 06-24 20:43:25 [manager.py:391] Prefill Batch: batch_id=236306962793082257345209269303516878151, time:1750769005.5599859s req_ids:[8] -DEBUG 06-24 20:43:25 [manager.py:391] -DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:25 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:217.03815460205078ms total_cost_time:217.08154678344727ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15446 prompt_cache_len:5151 prompt_cache_ratio:0.3334843972549527 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 -DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:25 [batch.py:51] router release req id 8 -INFO 06-24 20:43:25 [manager.py:224] router recive req id 8 cost time 0.10959148406982422 s -INFO 06-24 20:43:25 [manager.py:68] detokenization recv req id 8 cost time 0.1118319034576416 s -DEBUG 06-24 20:43:25 [manager.py:391] Prefill Batch: batch_id=145501889432507285585211031145695928294, time:1750769005.7931159s req_ids:[8] -DEBUG 06-24 20:43:25 [manager.py:391] -ERROR 06-24 20:43:25 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:25 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:223.7563133239746ms total_cost_time:223.8013744354248ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15447 prompt_cache_len:5151 prompt_cache_ratio:0.33346280831229363 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:25 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 -DEBUG 06-24 20:43:25 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:25 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:25 [batch.py:51] router release req id 8 -INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.11140751838684082 s -INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.11356925964355469 s -DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=339725403631621527710553801225795733746, time:1750769006.0332572s req_ids:[8] -DEBUG 06-24 20:43:26 [manager.py:391] -ERROR 06-24 20:43:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:25 lightllm_req_id:8 first_token_cost:235.92329025268555ms total_cost_time:235.96835136413574ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15448 prompt_cache_len:5151 prompt_cache_ratio:0.3334412221646815 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 -DEBUG 06-24 20:43:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:26 [batch.py:51] router release req id 8 -INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.11177849769592285 s -INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.11383533477783203 s -DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=169702736765862436870389350487073749295, time:1750769006.2760806s req_ids:[8] -DEBUG 06-24 20:43:26 [manager.py:391] -ERROR 06-24 20:43:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 first_token_cost:234.91382598876953ms total_cost_time:234.95745658874512ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15449 prompt_cache_len:5151 prompt_cache_ratio:0.33341963881157355 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 -DEBUG 06-24 20:43:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:26 [batch.py:51] router release req id 8 -INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.10898494720458984 s -INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.11101436614990234 s -DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=236657486488954502226365137269568090389, time:1750769006.504517s req_ids:[8] -DEBUG 06-24 20:43:26 [manager.py:391] -ERROR 06-24 20:43:26 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:26 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 first_token_cost:213.85979652404785ms total_cost_time:213.90295028686523ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15450 prompt_cache_len:5151 prompt_cache_ratio:0.3333980582524272 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:26 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 -DEBUG 06-24 20:43:26 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:26 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:26 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:26 [batch.py:51] router release req id 8 -INFO 06-24 20:43:26 [manager.py:224] router recive req id 8 cost time 0.31144189834594727 s -INFO 06-24 20:43:26 [manager.py:68] detokenization recv req id 8 cost time 0.31356167793273926 s -DEBUG 06-24 20:43:26 [manager.py:391] Prefill Batch: batch_id=5557421891798285109525589155678990965, time:1750769006.934319s req_ids:[8] -DEBUG 06-24 20:43:26 [manager.py:391] -ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:26 lightllm_req_id:8 first_token_cost:431.17356300354004ms total_cost_time:431.229829788208ms,out_token_counter:1 mean_per_token_cost_time: 0.05626678466796875ms prompt_token_num:15451 prompt_cache_len:5151 prompt_cache_ratio:0.3333764804866999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 -DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:27 [batch.py:51] router release req id 8 -INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.11226201057434082 s -INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11411046981811523 s -DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=286829608933489572007865618870403139389, time:1750769007.1763513s req_ids:[8] -DEBUG 06-24 20:43:27 [manager.py:391] -DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:27 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:233.34693908691406ms total_cost_time:233.38985443115234ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15452 prompt_cache_len:5151 prompt_cache_ratio:0.33335490551384933 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 -DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:27 [batch.py:51] router release req id 8 -INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.10942244529724121 s -INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11141633987426758 s -DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=224048784967695890038946223843890988933, time:1750769007.4058323s req_ids:[8] -DEBUG 06-24 20:43:27 [manager.py:391] -ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:213.1357192993164ms total_cost_time:213.1783962249756ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15453 prompt_cache_len:5151 prompt_cache_ratio:0.3333333333333333 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 -DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:27 [batch.py:51] router release req id 8 -INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.10915660858154297 s -INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11101555824279785 s -DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=123668264819005789502262842458729418571, time:1750769007.626951s req_ids:[8] -DEBUG 06-24 20:43:27 [manager.py:391] -ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:214.92433547973633ms total_cost_time:214.9674892425537ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15454 prompt_cache_len:5151 prompt_cache_ratio:0.3333117639446098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 -DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:27 [batch.py:51] router release req id 8 -INFO 06-24 20:43:27 [manager.py:224] router recive req id 8 cost time 0.10917448997497559 s -INFO 06-24 20:43:27 [manager.py:68] detokenization recv req id 8 cost time 0.11126160621643066 s -DEBUG 06-24 20:43:27 [manager.py:391] Prefill Batch: batch_id=99933064138580819749610954395884721208, time:1750769007.8481743s req_ids:[8] -DEBUG 06-24 20:43:27 [manager.py:391] -ERROR 06-24 20:43:27 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:27 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:217.77963638305664ms total_cost_time:217.82541275024414ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15455 prompt_cache_len:5151 prompt_cache_ratio:0.33329019734713683 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:27 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 -DEBUG 06-24 20:43:27 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:27 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:27 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:27 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:27 [batch.py:51] router release req id 8 -INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.11059713363647461 s -INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.11274552345275879 s -DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=2285963685280466275844023781773615237, time:1750769008.0875654s req_ids:[8] -DEBUG 06-24 20:43:28 [manager.py:391] -ERROR 06-24 20:43:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:27 lightllm_req_id:8 first_token_cost:393.904447555542ms total_cost_time:393.963098526001ms,out_token_counter:1 mean_per_token_cost_time: 0.058650970458984375ms prompt_token_num:15456 prompt_cache_len:5151 prompt_cache_ratio:0.33326863354037267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 -DEBUG 06-24 20:43:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:28 [batch.py:51] router release req id 8 -INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.11050629615783691 s -INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.1125028133392334 s -DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=131277016215271845423939224382614101736, time:1750769008.477619s req_ids:[8] -DEBUG 06-24 20:43:28 [manager.py:391] -ERROR 06-24 20:43:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 first_token_cost:227.58078575134277ms total_cost_time:227.62632369995117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15457 prompt_cache_len:5151 prompt_cache_ratio:0.3332470725237756 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 -DEBUG 06-24 20:43:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:28 [batch.py:51] router release req id 8 -INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.1099538803100586 s -INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.1120610237121582 s -DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=280283602041049408341498935033118877930, time:1750769008.7204874s req_ids:[8] -DEBUG 06-24 20:43:28 [manager.py:391] -ERROR 06-24 20:43:28 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:28 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 first_token_cost:225.05831718444824ms total_cost_time:225.10361671447754ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15458 prompt_cache_len:5151 prompt_cache_ratio:0.33322551429680425 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:28 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 -DEBUG 06-24 20:43:28 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:28 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:28 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:28 [batch.py:51] router release req id 8 -INFO 06-24 20:43:28 [manager.py:224] router recive req id 8 cost time 0.11077666282653809 s -INFO 06-24 20:43:28 [manager.py:68] detokenization recv req id 8 cost time 0.11269736289978027 s -DEBUG 06-24 20:43:28 [manager.py:391] Prefill Batch: batch_id=324105153404654701639401294464583416091, time:1750769008.9559665s req_ids:[8] -DEBUG 06-24 20:43:28 [manager.py:391] -ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:28 lightllm_req_id:8 first_token_cost:227.64897346496582ms total_cost_time:227.69570350646973ms,out_token_counter:1 mean_per_token_cost_time: 0.04673004150390625ms prompt_token_num:15459 prompt_cache_len:5151 prompt_cache_ratio:0.3332039588589171 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 -DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:29 [batch.py:51] router release req id 8 -INFO 06-24 20:43:29 [manager.py:224] router recive req id 8 cost time 0.10947012901306152 s -INFO 06-24 20:43:29 [manager.py:68] detokenization recv req id 8 cost time 0.1116032600402832 s -DEBUG 06-24 20:43:29 [manager.py:391] Prefill Batch: batch_id=126954439773292175961168287538622488286, time:1750769009.182194s req_ids:[8] -DEBUG 06-24 20:43:29 [manager.py:391] -ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:218.9338207244873ms total_cost_time:218.99151802062988ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:15460 prompt_cache_len:5151 prompt_cache_ratio:0.33318240620957307 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 -DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:29 [batch.py:51] router release req id 8 -INFO 06-24 20:43:29 [manager.py:224] router recive req id 8 cost time 0.11010313034057617 s -DEBUG 06-24 20:43:29 [manager.py:391] Prefill Batch: batch_id=189565023201435435948852307635204997817, time:1750769009.4067895s req_ids:[8] -DEBUG 06-24 20:43:29 [manager.py:391] -INFO 06-24 20:43:29 [manager.py:68] detokenization recv req id 8 cost time 0.11206483840942383 s -ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:384.8536014556885ms total_cost_time:384.91201400756836ms,out_token_counter:1 mean_per_token_cost_time: 0.05841255187988281ms prompt_token_num:15461 prompt_cache_len:5151 prompt_cache_ratio:0.33316085634823106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 -DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:29 [batch.py:51] router release req id 8 -INFO 06-24 20:43:29 [manager.py:224] router recive req id 8 cost time 0.1094961166381836 s -INFO 06-24 20:43:29 [manager.py:68] detokenization recv req id 8 cost time 0.11060070991516113 s -DEBUG 06-24 20:43:29 [manager.py:391] Prefill Batch: batch_id=239441346017877375543889587438427279271, time:1750769009.8019886s req_ids:[8] -DEBUG 06-24 20:43:29 [manager.py:391] -ERROR 06-24 20:43:29 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:29 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:219.3284034729004ms total_cost_time:219.37131881713867ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15462 prompt_cache_len:5151 prompt_cache_ratio:0.33313930927435004 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:29 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 -DEBUG 06-24 20:43:29 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:29 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:29 [batch.py:51] router release req id 8 -INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10935711860656738 s -INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.1114358901977539 s -DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=221204952879915705818642985049516004425, time:1750769010.027834s req_ids:[8] -DEBUG 06-24 20:43:30 [manager.py:391] -ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:29 lightllm_req_id:8 first_token_cost:220.414400100708ms total_cost_time:220.46971321105957ms,out_token_counter:1 mean_per_token_cost_time: 0.0553131103515625ms prompt_token_num:15463 prompt_cache_len:5151 prompt_cache_ratio:0.33311776498738926 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 -DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:30 [batch.py:51] router release req id 8 -INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10989665985107422 s -INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.11185979843139648 s -DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=104790985737874088387315238630219527358, time:1750769010.2546425s req_ids:[8] -DEBUG 06-24 20:43:30 [manager.py:391] -ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:215.179443359375ms total_cost_time:215.2235507965088ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15464 prompt_cache_len:5151 prompt_cache_ratio:0.33309622348680806 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 -DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:30 [batch.py:51] router release req id 8 -INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10973358154296875 s -INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.1118617057800293 s -DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=302018470920699872595036178330893233693, time:1750769010.4780586s req_ids:[8] -DEBUG 06-24 20:43:30 [manager.py:391] -ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:217.82684326171875ms total_cost_time:217.88668632507324ms,out_token_counter:1 mean_per_token_cost_time: 0.05984306335449219ms prompt_token_num:15465 prompt_cache_len:5151 prompt_cache_ratio:0.333074684772066 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 -DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:30 [batch.py:51] router release req id 8 -INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10962462425231934 s -INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.11161327362060547 s -DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=210025485215097564114753896535914509387, time:1750769010.7010825s req_ids:[8] -DEBUG 06-24 20:43:30 [manager.py:391] -ERROR 06-24 20:43:30 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:30 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:30 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:211.1499309539795ms total_cost_time:211.1952304840088ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15466 prompt_cache_len:5151 prompt_cache_ratio:0.33305314884262255 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:30 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 -DEBUG 06-24 20:43:30 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:30 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:30 [batch.py:51] router release req id 8 -INFO 06-24 20:43:30 [manager.py:224] router recive req id 8 cost time 0.10875296592712402 s -INFO 06-24 20:43:30 [manager.py:68] detokenization recv req id 8 cost time 0.11078310012817383 s -DEBUG 06-24 20:43:30 [manager.py:391] Prefill Batch: batch_id=184426979977695687461517933117575577203, time:1750769010.9202573s req_ids:[8] -DEBUG 06-24 20:43:30 [manager.py:391] -ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:30 lightllm_req_id:8 first_token_cost:380.0070285797119ms total_cost_time:380.051851272583ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15467 prompt_cache_len:5151 prompt_cache_ratio:0.33303161569793754 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 -DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:31 [batch.py:51] router release req id 8 -INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.10895299911499023 s -INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.11087369918823242 s -DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=305570453939615981126622214639287995897, time:1750769011.3077374s req_ids:[8] -DEBUG 06-24 20:43:31 [manager.py:391] -ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:43:31 [stats.py:37] Avg tokens(prompt+generate) throughput: 60186.263 tokens/s -DEBUG 06-24 20:43:31 [stats.py:37] Avg prompt tokens throughput: 60178.472 tokens/s -DEBUG 06-24 20:43:31 [stats.py:37] Avg generate tokens throughput: 7.791 tokens/s -INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:217.96512603759766ms total_cost_time:218.00994873046875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15468 prompt_cache_len:5151 prompt_cache_ratio:0.3330100853374709 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 -DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:31 [batch.py:51] router release req id 8 -INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.1089484691619873 s -INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.1110081672668457 s -DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=99060813480093921108669805488079059722, time:1750769011.5321562s req_ids:[8] -DEBUG 06-24 20:43:31 [manager.py:391] -ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:215.66486358642578ms total_cost_time:215.70873260498047ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15469 prompt_cache_len:5151 prompt_cache_ratio:0.33298855776068265 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 -DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:31 [batch.py:51] router release req id 8 -INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.10861921310424805 s -INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.11071062088012695 s -DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=312295098165254681390643443013766764798, time:1750769011.7536666s req_ids:[8] -DEBUG 06-24 20:43:31 [manager.py:391] -ERROR 06-24 20:43:31 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:31 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:212.29004859924316ms total_cost_time:212.33463287353516ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15470 prompt_cache_len:5151 prompt_cache_ratio:0.33296703296703295 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:31 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 -DEBUG 06-24 20:43:31 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:31 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:31 [batch.py:51] router release req id 8 -INFO 06-24 20:43:31 [manager.py:224] router recive req id 8 cost time 0.10924267768859863 s -INFO 06-24 20:43:31 [manager.py:68] detokenization recv req id 8 cost time 0.11113262176513672 s -DEBUG 06-24 20:43:31 [manager.py:391] Prefill Batch: batch_id=72984598762621632635097769896538917076, time:1750769011.9749513s req_ids:[8] -DEBUG 06-24 20:43:31 [manager.py:391] -ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:31 lightllm_req_id:8 first_token_cost:219.02084350585938ms total_cost_time:219.06375885009766ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15471 prompt_cache_len:5151 prompt_cache_ratio:0.3329455109559822 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 -DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:32 [batch.py:51] router release req id 8 -INFO 06-24 20:43:32 [manager.py:224] router recive req id 8 cost time 0.10824799537658691 s -INFO 06-24 20:43:32 [manager.py:68] detokenization recv req id 8 cost time 0.11019110679626465 s -DEBUG 06-24 20:43:32 [manager.py:391] Prefill Batch: batch_id=136026491060450398138517916074245982929, time:1750769012.1988928s req_ids:[8] -DEBUG 06-24 20:43:32 [manager.py:391] -ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:384.2761516571045ms total_cost_time:384.3202590942383ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15472 prompt_cache_len:5151 prompt_cache_ratio:0.3329239917269907 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 -DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:32 [batch.py:51] router release req id 8 -INFO 06-24 20:43:32 [manager.py:224] router recive req id 8 cost time 0.1090857982635498 s -INFO 06-24 20:43:32 [manager.py:68] detokenization recv req id 8 cost time 0.11110711097717285 s -DEBUG 06-24 20:43:32 [manager.py:391] Prefill Batch: batch_id=73868169285685713245807988527796424875, time:1750769012.590482s req_ids:[8] -DEBUG 06-24 20:43:32 [manager.py:391] -ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:218.6727523803711ms total_cost_time:218.71709823608398ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15473 prompt_cache_len:5151 prompt_cache_ratio:0.33290247527951916 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 -DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:32 [batch.py:51] router release req id 8 -INFO 06-24 20:43:32 [manager.py:224] router recive req id 8 cost time 0.10828709602355957 s -INFO 06-24 20:43:32 [manager.py:68] detokenization recv req id 8 cost time 0.110382080078125 s -DEBUG 06-24 20:43:32 [manager.py:391] Prefill Batch: batch_id=67274642922715787807076773425212407903, time:1750769012.8166347s req_ids:[8] -DEBUG 06-24 20:43:32 [manager.py:391] -ERROR 06-24 20:43:32 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:32 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:212.49032020568848ms total_cost_time:212.53299713134766ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15474 prompt_cache_len:5151 prompt_cache_ratio:0.3328809616130283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 -DEBUG 06-24 20:43:32 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:32 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:32 [batch.py:51] router release req id 8 -INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.10964417457580566 s -INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.11161136627197266 s -DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=53667939613524688608361647364584573029, time:1750769013.0361764s req_ids:[8] -DEBUG 06-24 20:43:33 [manager.py:391] -ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:32 lightllm_req_id:8 first_token_cost:215.73996543884277ms total_cost_time:215.78407287597656ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15475 prompt_cache_len:5151 prompt_cache_ratio:0.332859450726979 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 -DEBUG 06-24 20:43:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:33 [batch.py:51] router release req id 8 -INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.1082603931427002 s -INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.1109914779663086 s -DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=107275762603649880429416896818172619369, time:1750769013.2599587s req_ids:[8] -DEBUG 06-24 20:43:33 [manager.py:391] -ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:217.42010116577148ms total_cost_time:217.46420860290527ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15476 prompt_cache_len:5151 prompt_cache_ratio:0.33283794262083227 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 -DEBUG 06-24 20:43:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:33 [batch.py:51] router release req id 8 -INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.10973119735717773 s -INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.11173176765441895 s -DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=153036940948665084371104056569659416867, time:1750769013.487214s req_ids:[8] -DEBUG 06-24 20:43:33 [manager.py:391] -DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:33 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:387.67504692077637ms total_cost_time:387.71867752075195ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15477 prompt_cache_len:5151 prompt_cache_ratio:0.33281643729404925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 -DEBUG 06-24 20:43:33 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:33 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:33 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:33 [batch.py:51] router release req id 8 -INFO 06-24 20:43:33 [manager.py:224] router recive req id 8 cost time 0.10835552215576172 s -INFO 06-24 20:43:33 [manager.py:68] detokenization recv req id 8 cost time 0.11038017272949219 s -DEBUG 06-24 20:43:33 [manager.py:391] Prefill Batch: batch_id=131429948924080757927873330310697027713, time:1750769013.8777986s req_ids:[8] -DEBUG 06-24 20:43:33 [manager.py:391] -ERROR 06-24 20:43:33 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:33 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:217.46373176574707ms total_cost_time:217.50950813293457ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15478 prompt_cache_len:5151 prompt_cache_ratio:0.33279493474609123 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:33 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 -DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:34 [batch.py:51] router release req id 8 -INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.108062744140625 s -INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.1101231575012207 s -DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=240716361288372971613221214414598235470, time:1750769014.1030223s req_ids:[8] -DEBUG 06-24 20:43:34 [manager.py:391] -ERROR 06-24 20:43:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:33 lightllm_req_id:8 first_token_cost:210.06369590759277ms total_cost_time:210.10923385620117ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15479 prompt_cache_len:5151 prompt_cache_ratio:0.33277343497641965 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 -DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:34 [batch.py:51] router release req id 8 -INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.1082620620727539 s -INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.11044645309448242 s -DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=156938767727576743828337823644899685455, time:1750769014.3244421s req_ids:[8] -DEBUG 06-24 20:43:34 [manager.py:391] -ERROR 06-24 20:43:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 first_token_cost:222.18036651611328ms total_cost_time:222.22590446472168ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15480 prompt_cache_len:5151 prompt_cache_ratio:0.33275193798449615 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 -DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:34 [batch.py:51] router release req id 8 -INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.10965895652770996 s -INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.11166501045227051 s -DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=119835342933129630132236326942008096500, time:1750769014.5491033s req_ids:[8] -DEBUG 06-24 20:43:34 [manager.py:391] -ERROR 06-24 20:43:34 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:34 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:34 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 first_token_cost:217.49615669250488ms total_cost_time:217.54002571105957ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15481 prompt_cache_len:5151 prompt_cache_ratio:0.33273044376978234 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:34 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 -DEBUG 06-24 20:43:34 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:34 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:34 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:34 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:34 [batch.py:51] router release req id 8 -INFO 06-24 20:43:34 [manager.py:224] router recive req id 8 cost time 0.10910940170288086 s -INFO 06-24 20:43:34 [manager.py:68] detokenization recv req id 8 cost time 0.11089539527893066 s -DEBUG 06-24 20:43:34 [manager.py:391] Prefill Batch: batch_id=282507000742130658888453064996081948521, time:1750769014.77378s req_ids:[8] -DEBUG 06-24 20:43:34 [manager.py:391] -ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:34 lightllm_req_id:8 first_token_cost:390.78283309936523ms total_cost_time:390.82860946655273ms,out_token_counter:1 mean_per_token_cost_time: 0.0457763671875ms prompt_token_num:15482 prompt_cache_len:5151 prompt_cache_ratio:0.3327089523317401 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 -DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:35 [batch.py:51] router release req id 8 -INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10924935340881348 s -INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11108207702636719 s -DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=251889490099763646579944707063002492875, time:1750769015.1706903s req_ids:[8] -DEBUG 06-24 20:43:35 [manager.py:391] -ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:216.18270874023438ms total_cost_time:216.22872352600098ms,out_token_counter:1 mean_per_token_cost_time: 0.04601478576660156ms prompt_token_num:15483 prompt_cache_len:5151 prompt_cache_ratio:0.33268746366983143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 -DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:35 [batch.py:51] router release req id 8 -INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10914778709411621 s -DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=185327335961143310989529372401487027081, time:1750769015.3925962s req_ids:[8] -DEBUG 06-24 20:43:35 [manager.py:391] -INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11093425750732422 s -ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:212.7859592437744ms total_cost_time:212.8283977508545ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15484 prompt_cache_len:5151 prompt_cache_ratio:0.3326659777835185 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 -DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:35 [batch.py:51] router release req id 8 -INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10917401313781738 s -INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11100506782531738 s -INFO 06-24 20:43:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=156352584738741434355173611217471607218, time:1750769015.6198106s req_ids:[8] -DEBUG 06-24 20:43:35 [manager.py:391] -ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:216.35770797729492ms total_cost_time:216.4008617401123ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15485 prompt_cache_len:5151 prompt_cache_ratio:0.3326444946722635 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 -DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:35 [batch.py:51] router release req id 8 -INFO 06-24 20:43:35 [manager.py:224] router recive req id 8 cost time 0.10853719711303711 s -INFO 06-24 20:43:35 [manager.py:68] detokenization recv req id 8 cost time 0.11038994789123535 s -DEBUG 06-24 20:43:35 [manager.py:391] Prefill Batch: batch_id=90190926452675642297566359561849615636, time:1750769015.8393862s req_ids:[8] -DEBUG 06-24 20:43:35 [manager.py:391] -ERROR 06-24 20:43:35 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:35 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:35 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:215.46339988708496ms total_cost_time:215.50798416137695ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15486 prompt_cache_len:5151 prompt_cache_ratio:0.3326230143355289 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:35 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 -DEBUG 06-24 20:43:35 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:35 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:35 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:35 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:35 [batch.py:51] router release req id 8 -INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10935282707214355 s -INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11129403114318848 s -DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=62113987517455922359902916587392281832, time:1750769016.0620043s req_ids:[8] -DEBUG 06-24 20:43:36 [manager.py:391] -ERROR 06-24 20:43:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:35 lightllm_req_id:8 first_token_cost:220.16668319702148ms total_cost_time:220.21198272705078ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15487 prompt_cache_len:5151 prompt_cache_ratio:0.33260153677277715 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 -DEBUG 06-24 20:43:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:36 [batch.py:51] router release req id 8 -INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10828351974487305 s -INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11025691032409668 s -DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=225312501493360974579127034886677379280, time:1750769016.2881653s req_ids:[8] -DEBUG 06-24 20:43:36 [manager.py:391] -ERROR 06-24 20:43:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 first_token_cost:422.806978225708ms total_cost_time:422.8498935699463ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15488 prompt_cache_len:5151 prompt_cache_ratio:0.33258006198347106 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 -DEBUG 06-24 20:43:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:36 [batch.py:51] router release req id 8 -INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10875320434570312 s -INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11066293716430664 s -DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=94719725824781149228032226210658312208, time:1750769016.7179585s req_ids:[8] -DEBUG 06-24 20:43:36 [manager.py:391] -ERROR 06-24 20:43:36 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:36 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:36 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 first_token_cost:203.75299453735352ms total_cost_time:203.7985324859619ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15489 prompt_cache_len:5151 prompt_cache_ratio:0.3325585899670734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:36 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 -DEBUG 06-24 20:43:36 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:36 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:36 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:36 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:36 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:36 [batch.py:51] router release req id 8 -DEBUG 06-24 20:43:36 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:36 [manager.py:283] -DEBUG 06-24 20:43:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:36 [manager.py:284] -INFO 06-24 20:43:36 [manager.py:224] router recive req id 8 cost time 0.10907626152038574 s -INFO 06-24 20:43:36 [manager.py:68] detokenization recv req id 8 cost time 0.11105656623840332 s -DEBUG 06-24 20:43:36 [manager.py:391] Prefill Batch: batch_id=196026483954770485932375621299786134364, time:1750769016.9293842s req_ids:[8] -DEBUG 06-24 20:43:36 [manager.py:391] -ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:36 lightllm_req_id:8 first_token_cost:214.73979949951172ms total_cost_time:214.78271484375ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15490 prompt_cache_len:5151 prompt_cache_ratio:0.33253712072304714 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 -DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:37 [batch.py:51] router release req id 8 -INFO 06-24 20:43:37 [manager.py:224] router recive req id 8 cost time 0.10830116271972656 s -INFO 06-24 20:43:37 [manager.py:68] detokenization recv req id 8 cost time 0.10978007316589355 s -DEBUG 06-24 20:43:37 [manager.py:391] Prefill Batch: batch_id=332235020123944962942246435542303478277, time:1750769017.150864s req_ids:[8] -DEBUG 06-24 20:43:37 [manager.py:391] -ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:214.72620964050293ms total_cost_time:214.7691249847412ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15491 prompt_cache_len:5151 prompt_cache_ratio:0.33251565425085533 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 -DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:37 [batch.py:51] router release req id 8 -INFO 06-24 20:43:37 [manager.py:224] router recive req id 8 cost time 0.10905289649963379 s -INFO 06-24 20:43:37 [manager.py:68] detokenization recv req id 8 cost time 0.11128711700439453 s -DEBUG 06-24 20:43:37 [manager.py:391] Prefill Batch: batch_id=169890904052221588498594811705163407816, time:1750769017.3724678s req_ids:[8] -DEBUG 06-24 20:43:37 [manager.py:391] -ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:213.59705924987793ms total_cost_time:213.6397361755371ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15492 prompt_cache_len:5151 prompt_cache_ratio:0.33249419054996127 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 -DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:37 [batch.py:51] router release req id 8 -INFO 06-24 20:43:37 [manager.py:224] router recive req id 8 cost time 0.1094675064086914 s -INFO 06-24 20:43:37 [manager.py:68] detokenization recv req id 8 cost time 0.11138677597045898 s -DEBUG 06-24 20:43:37 [manager.py:391] Prefill Batch: batch_id=322676009228767463733138541071397237828, time:1750769017.5916646s req_ids:[8] -DEBUG 06-24 20:43:37 [manager.py:391] -ERROR 06-24 20:43:37 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:37 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:37 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:215.20471572875977ms total_cost_time:215.24786949157715ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15493 prompt_cache_len:5151 prompt_cache_ratio:0.3324727296198283 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:37 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 -DEBUG 06-24 20:43:37 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:37 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:37 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:37 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:37 [batch.py:51] router release req id 8 -INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.31024932861328125 s -INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.31502318382263184 s -DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=154176038929949792403952196384885986923, time:1750769018.018621s req_ids:[8] -DEBUG 06-24 20:43:38 [manager.py:391] -ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:37 lightllm_req_id:8 first_token_cost:427.809476852417ms total_cost_time:427.8557300567627ms,out_token_counter:1 mean_per_token_cost_time: 0.046253204345703125ms prompt_token_num:15494 prompt_cache_len:5151 prompt_cache_ratio:0.33245127145992 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 -DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:38 [batch.py:51] router release req id 8 -INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10812211036682129 s -INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.11049509048461914 s -DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=176293531940193866655195328798697408596, time:1750769018.2486646s req_ids:[8] -DEBUG 06-24 20:43:38 [manager.py:391] -ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:214.2167091369629ms total_cost_time:214.26129341125488ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15495 prompt_cache_len:5151 prompt_cache_ratio:0.3324298160696999 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 -DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:38 [batch.py:51] router release req id 8 -INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10844874382019043 s -INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.11204886436462402 s -DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=144846425567057627243962262319234119388, time:1750769018.473652s req_ids:[8] -DEBUG 06-24 20:43:38 [manager.py:391] -ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:201.89785957336426ms total_cost_time:201.94077491760254ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15496 prompt_cache_len:5151 prompt_cache_ratio:0.3324083634486319 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 -DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:38 [batch.py:51] router release req id 8 -INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10819530487060547 s -INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.10994768142700195 s -DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=90968109130511610281869163205376469076, time:1750769018.691005s req_ids:[8] -DEBUG 06-24 20:43:38 [manager.py:391] -ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:209.58638191223145ms total_cost_time:209.63025093078613ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15497 prompt_cache_len:5151 prompt_cache_ratio:0.3323869135961799 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 -DEBUG 06-24 20:43:38 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:38 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:38 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:38 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:38 [batch.py:51] router release req id 8 -INFO 06-24 20:43:38 [manager.py:224] router recive req id 8 cost time 0.10927915573120117 s -INFO 06-24 20:43:38 [manager.py:68] detokenization recv req id 8 cost time 0.11118483543395996 s -DEBUG 06-24 20:43:38 [manager.py:391] Prefill Batch: batch_id=53953272495919721536225720168864675600, time:1750769018.9026737s req_ids:[8] -DEBUG 06-24 20:43:38 [manager.py:391] -ERROR 06-24 20:43:38 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:38 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:38 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:203.83739471435547ms total_cost_time:203.88293266296387ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15498 prompt_cache_len:5151 prompt_cache_ratio:0.332365466511808 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:38 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 -DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:39 [batch.py:51] router release req id 8 -INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.1075601577758789 s -INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.10952067375183105 s -DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=179672858928877123196054330801667626126, time:1750769019.119352s req_ids:[8] -DEBUG 06-24 20:43:39 [manager.py:391] -ERROR 06-24 20:43:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:38 lightllm_req_id:8 first_token_cost:373.5947608947754ms total_cost_time:373.6388683319092ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15499 prompt_cache_len:5151 prompt_cache_ratio:0.3323440221949803 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 -DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:39 [batch.py:51] router release req id 8 -INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.10801005363464355 s -INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.11068034172058105 s -DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=326237757909633856153384566180308443622, time:1750769019.515276s req_ids:[8] -DEBUG 06-24 20:43:39 [manager.py:391] -ERROR 06-24 20:43:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 first_token_cost:239.72749710083008ms total_cost_time:239.76993560791016ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15500 prompt_cache_len:5151 prompt_cache_ratio:0.3323225806451613 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 -DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:39 [batch.py:51] router release req id 8 -INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.10840535163879395 s -INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.11039304733276367 s -DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=98342287027970499537167805550769379722, time:1750769019.7317693s req_ids:[8] -DEBUG 06-24 20:43:39 [manager.py:391] -ERROR 06-24 20:43:39 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:39 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:39 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 first_token_cost:207.47852325439453ms total_cost_time:207.5207233428955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15501 prompt_cache_len:5151 prompt_cache_ratio:0.33230114186181536 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:39 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 -DEBUG 06-24 20:43:39 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:39 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:39 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:39 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:39 [batch.py:51] router release req id 8 -INFO 06-24 20:43:39 [manager.py:224] router recive req id 8 cost time 0.10872769355773926 s -INFO 06-24 20:43:39 [manager.py:68] detokenization recv req id 8 cost time 0.11060380935668945 s -DEBUG 06-24 20:43:39 [manager.py:391] Prefill Batch: batch_id=316351813828968168688975933987935746619, time:1750769019.960206s req_ids:[8] -DEBUG 06-24 20:43:39 [manager.py:391] -ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:39 lightllm_req_id:8 first_token_cost:229.83813285827637ms total_cost_time:229.88367080688477ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15502 prompt_cache_len:5151 prompt_cache_ratio:0.3322797058444072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 -DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:40 [batch.py:51] router release req id 8 -INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10823583602905273 s -INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.11039900779724121 s -DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=49120880856124430077460048045797745168, time:1750769020.184157s req_ids:[8] -DEBUG 06-24 20:43:40 [manager.py:391] -ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:213.62853050231934ms total_cost_time:213.67263793945312ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15503 prompt_cache_len:5151 prompt_cache_ratio:0.3322582725924015 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 -DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:40 [batch.py:51] router release req id 8 -INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10865330696105957 s -INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.11075806617736816 s -DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=278881800412086961829088317152244328712, time:1750769020.404828s req_ids:[8] -DEBUG 06-24 20:43:40 [manager.py:391] -ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:362.5960350036621ms total_cost_time:362.6413345336914ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15504 prompt_cache_len:5151 prompt_cache_ratio:0.33223684210526316 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 -DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:40 [batch.py:51] router release req id 8 -INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10818123817443848 s -INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.11027264595031738 s -DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=139379794584315504223860311709632557893, time:1750769020.7747266s req_ids:[8] -DEBUG 06-24 20:43:40 [manager.py:391] -ERROR 06-24 20:43:40 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:40 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:211.0278606414795ms total_cost_time:211.07172966003418ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15505 prompt_cache_len:5151 prompt_cache_ratio:0.3322154143824573 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 -DEBUG 06-24 20:43:40 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:40 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:40 [batch.py:51] router release req id 8 -INFO 06-24 20:43:40 [manager.py:224] router recive req id 8 cost time 0.10772895812988281 s -INFO 06-24 20:43:40 [manager.py:68] detokenization recv req id 8 cost time 0.10985684394836426 s -DEBUG 06-24 20:43:40 [manager.py:391] Prefill Batch: batch_id=276999116135120299237225054605357287, time:1750769020.9926078s req_ids:[8] -DEBUG 06-24 20:43:40 [manager.py:391] -ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:40 lightllm_req_id:8 first_token_cost:211.36164665222168ms total_cost_time:211.40480041503906ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15506 prompt_cache_len:5151 prompt_cache_ratio:0.33219398942344897 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 -DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:41 [batch.py:51] router release req id 8 -INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.1097109317779541 s -INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.11178469657897949 s -DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=323019034965407069123328577334358345369, time:1750769021.2118876s req_ids:[8] -DEBUG 06-24 20:43:41 [manager.py:391] -ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:216.28785133361816ms total_cost_time:216.33052825927734ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15507 prompt_cache_len:5151 prompt_cache_ratio:0.3321725672277036 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 -DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:41 [batch.py:51] router release req id 8 -INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.10851693153381348 s -INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.11034893989562988 s -DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=338686882482975875892332852139445727500, time:1750769021.434208s req_ids:[8] -DEBUG 06-24 20:43:41 [manager.py:391] -DEBUG 06-24 20:43:41 [stats.py:37] Avg tokens(prompt+generate) throughput: 61806.758 tokens/s -DEBUG 06-24 20:43:41 [stats.py:37] Avg prompt tokens throughput: 61798.877 tokens/s -DEBUG 06-24 20:43:41 [stats.py:37] Avg generate tokens throughput: 7.880 tokens/s -ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:212.97788619995117ms total_cost_time:213.01889419555664ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:15508 prompt_cache_len:5151 prompt_cache_ratio:0.33215114779468663 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 -DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:41 [batch.py:51] router release req id 8 -INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.10841083526611328 s -INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.11022520065307617 s -DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=261928917451178508832671656259816829495, time:1750769021.6518214s req_ids:[8] -DEBUG 06-24 20:43:41 [manager.py:391] -ERROR 06-24 20:43:41 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:41 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:166.74184799194336ms total_cost_time:166.78404808044434ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15509 prompt_cache_len:5151 prompt_cache_ratio:0.33212973112386357 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:41 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 -DEBUG 06-24 20:43:41 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:41 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:41 [batch.py:51] router release req id 8 -INFO 06-24 20:43:41 [manager.py:224] router recive req id 8 cost time 0.10819625854492188 s -INFO 06-24 20:43:41 [manager.py:68] detokenization recv req id 8 cost time 0.10960817337036133 s -DEBUG 06-24 20:43:41 [manager.py:391] Prefill Batch: batch_id=5273772548952963671445260707588477053, time:1750769021.8250737s req_ids:[8] -DEBUG 06-24 20:43:41 [manager.py:391] -ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:41 lightllm_req_id:8 first_token_cost:319.5171356201172ms total_cost_time:319.5605278015137ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15510 prompt_cache_len:5151 prompt_cache_ratio:0.3321083172147002 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 -DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:42 [batch.py:51] router release req id 8 -INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.10884642601013184 s -INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.11066818237304688 s -DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=19619467589783348822045253082794053381, time:1750769022.1836612s req_ids:[8] -DEBUG 06-24 20:43:42 [manager.py:391] -ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:234.19904708862305ms total_cost_time:234.24148559570312ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15511 prompt_cache_len:5151 prompt_cache_ratio:0.33208690606666236 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 -DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:42 [batch.py:51] router release req id 8 -INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.10812544822692871 s -DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=316269426407312303547313796351305791702, time:1750769022.3916593s req_ids:[8] -DEBUG 06-24 20:43:42 [manager.py:391] -INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.10962510108947754 s -ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:199.63908195495605ms total_cost_time:199.68461990356445ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15512 prompt_cache_len:5151 prompt_cache_ratio:0.3320654976792161 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 -DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:42 [batch.py:51] router release req id 8 -INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.1075887680053711 s -INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.10952448844909668 s -DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=329017144932209401201276784159304588777, time:1750769022.6012936s req_ids:[8] -DEBUG 06-24 20:43:42 [manager.py:391] -ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:212.99004554748535ms total_cost_time:213.03439140319824ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15513 prompt_cache_len:5151 prompt_cache_ratio:0.3320440920518275 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 -DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:42 [batch.py:51] router release req id 8 -INFO 06-24 20:43:42 [manager.py:224] router recive req id 8 cost time 0.10810232162475586 s -INFO 06-24 20:43:42 [manager.py:68] detokenization recv req id 8 cost time 0.1108400821685791 s -DEBUG 06-24 20:43:42 [manager.py:391] Prefill Batch: batch_id=303196132137554750414265443312942272038, time:1750769022.824088s req_ids:[8] -DEBUG 06-24 20:43:42 [manager.py:391] -ERROR 06-24 20:43:42 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:42 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:42 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:232.99384117126465ms total_cost_time:233.03937911987305ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15514 prompt_cache_len:5151 prompt_cache_ratio:0.3320226891839629 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:42 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 -DEBUG 06-24 20:43:42 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:42 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:42 [batch.py:51] router release req id 8 -INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.10845279693603516 s -INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.11036467552185059 s -DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=307230671625728604567787563093723081941, time:1750769023.0726013s req_ids:[8] -DEBUG 06-24 20:43:43 [manager.py:391] -ERROR 06-24 20:43:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:42 lightllm_req_id:8 first_token_cost:396.67487144470215ms total_cost_time:396.72040939331055ms,out_token_counter:1 mean_per_token_cost_time: 0.04553794860839844ms prompt_token_num:15515 prompt_cache_len:5151 prompt_cache_ratio:0.33200128907508863 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 -DEBUG 06-24 20:43:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:43 [batch.py:51] router release req id 8 -INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.10805821418762207 s -INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.1099388599395752 s -DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=45754913226802713990027559320085193234, time:1750769023.467828s req_ids:[8] -DEBUG 06-24 20:43:43 [manager.py:391] -ERROR 06-24 20:43:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 first_token_cost:220.01290321350098ms total_cost_time:220.05701065063477ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15516 prompt_cache_len:5151 prompt_cache_ratio:0.3319798917246713 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 -DEBUG 06-24 20:43:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:43 [batch.py:51] router release req id 8 -INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.1091923713684082 s -INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.11132264137268066 s -DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=318214888497941041975188300981612976238, time:1750769023.6943517s req_ids:[8] -DEBUG 06-24 20:43:43 [manager.py:391] -ERROR 06-24 20:43:43 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:43 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 first_token_cost:218.61982345581055ms total_cost_time:218.66297721862793ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15517 prompt_cache_len:5151 prompt_cache_ratio:0.3319584971321776 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 -DEBUG 06-24 20:43:43 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:43 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:43 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:43 [batch.py:51] router release req id 8 -INFO 06-24 20:43:43 [manager.py:224] router recive req id 8 cost time 0.1096038818359375 s -INFO 06-24 20:43:43 [manager.py:68] detokenization recv req id 8 cost time 0.11199712753295898 s -DEBUG 06-24 20:43:43 [manager.py:391] Prefill Batch: batch_id=109436801085966099966744629989268400186, time:1750769023.918232s req_ids:[8] -DEBUG 06-24 20:43:43 [manager.py:391] -ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:43 lightllm_req_id:8 first_token_cost:213.72485160827637ms total_cost_time:213.76681327819824ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15518 prompt_cache_len:5151 prompt_cache_ratio:0.3319371052970744 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 -DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:44 [batch.py:51] router release req id 8 -INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.10857915878295898 s -INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11065101623535156 s -DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=103802723867093625407415147389185553408, time:1750769024.1391547s req_ids:[8] -DEBUG 06-24 20:43:44 [manager.py:391] -ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:213.38510513305664ms total_cost_time:213.43016624450684ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15519 prompt_cache_len:5151 prompt_cache_ratio:0.33191571621882854 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 -DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:44 [batch.py:51] router release req id 8 -INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.1097710132598877 s -INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11186552047729492 s -DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=242984402063095259225873652698373280291, time:1750769024.360155s req_ids:[8] -DEBUG 06-24 20:43:44 [manager.py:391] -DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:44 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:385.99467277526855ms total_cost_time:386.03997230529785ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15520 prompt_cache_len:5151 prompt_cache_ratio:0.3318943298969072 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 -DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:44 [batch.py:51] router release req id 8 -INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.10904550552368164 s -INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11099576950073242 s -DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=102059917900976279482206318202318222254, time:1750769024.752716s req_ids:[8] -DEBUG 06-24 20:43:44 [manager.py:391] -ERROR 06-24 20:43:44 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:44 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:218.8570499420166ms total_cost_time:218.89996528625488ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15521 prompt_cache_len:5151 prompt_cache_ratio:0.33187294633077763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:44 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 -DEBUG 06-24 20:43:44 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:44 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:44 [batch.py:51] router release req id 8 -INFO 06-24 20:43:44 [manager.py:224] router recive req id 8 cost time 0.10829567909240723 s -INFO 06-24 20:43:44 [manager.py:68] detokenization recv req id 8 cost time 0.11029386520385742 s -DEBUG 06-24 20:43:44 [manager.py:391] Prefill Batch: batch_id=299729287154150161561299181764875804344, time:1750769024.9782145s req_ids:[8] -DEBUG 06-24 20:43:44 [manager.py:391] -ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:44 lightllm_req_id:8 first_token_cost:217.8504467010498ms total_cost_time:217.8940773010254ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15522 prompt_cache_len:5151 prompt_cache_ratio:0.33185156551990724 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 -DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:45 [batch.py:51] router release req id 8 -INFO 06-24 20:43:45 [manager.py:224] router recive req id 8 cost time 0.1095130443572998 s -INFO 06-24 20:43:45 [manager.py:68] detokenization recv req id 8 cost time 0.11152219772338867 s -DEBUG 06-24 20:43:45 [manager.py:391] Prefill Batch: batch_id=207303940375172202598675770025417374171, time:1750769025.2024257s req_ids:[8] -DEBUG 06-24 20:43:45 [manager.py:391] -ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:211.37762069702148ms total_cost_time:211.42101287841797ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15523 prompt_cache_len:5151 prompt_cache_ratio:0.33183018746376347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 -DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:45 [batch.py:51] router release req id 8 -INFO 06-24 20:43:45 [manager.py:224] router recive req id 8 cost time 0.1092832088470459 s -INFO 06-24 20:43:45 [manager.py:68] detokenization recv req id 8 cost time 0.11099910736083984 s -DEBUG 06-24 20:43:45 [manager.py:391] Prefill Batch: batch_id=142950815813708327940461603860703940202, time:1750769025.4204268s req_ids:[8] -DEBUG 06-24 20:43:45 [manager.py:391] -ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:211.83252334594727ms total_cost_time:211.87758445739746ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15524 prompt_cache_len:5151 prompt_cache_ratio:0.33180881216181396 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 -DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:45 [batch.py:51] router release req id 8 -INFO 06-24 20:43:45 [manager.py:224] router recive req id 8 cost time 0.10939145088195801 s -INFO 06-24 20:43:45 [manager.py:68] detokenization recv req id 8 cost time 0.11132168769836426 s -DEBUG 06-24 20:43:45 [manager.py:391] Prefill Batch: batch_id=101950293631671734011448909659977498726, time:1750769025.6506877s req_ids:[8] -DEBUG 06-24 20:43:45 [manager.py:391] -ERROR 06-24 20:43:45 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:45 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:404.6914577484131ms total_cost_time:404.73484992980957ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15525 prompt_cache_len:5151 prompt_cache_ratio:0.3317874396135266 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 -DEBUG 06-24 20:43:45 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:45 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:45 [batch.py:51] router release req id 8 -INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10904622077941895 s -INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.1109311580657959 s -DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=332371293557360053851842754272485921015, time:1750769026.0508275s req_ids:[8] -DEBUG 06-24 20:43:46 [manager.py:391] -ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:45 lightllm_req_id:8 first_token_cost:219.32601928710938ms total_cost_time:219.37060356140137ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15526 prompt_cache_len:5151 prompt_cache_ratio:0.3317660698183692 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 -DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:46 [batch.py:51] router release req id 8 -INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10840916633605957 s -INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11032652854919434 s -DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=30911605617061220223646506835168024326, time:1750769026.2763624s req_ids:[8] -DEBUG 06-24 20:43:46 [manager.py:391] -ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:214.61892127990723ms total_cost_time:214.66398239135742ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15527 prompt_cache_len:5151 prompt_cache_ratio:0.33174470277580986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 -DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:46 [batch.py:51] router release req id 8 -INFO 06-24 20:43:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10843610763549805 s -INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11064028739929199 s -DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=126905134549935820575579145840257770058, time:1750769026.4983764s req_ids:[8] -DEBUG 06-24 20:43:46 [manager.py:391] -ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:218.4758186340332ms total_cost_time:218.51778030395508ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15528 prompt_cache_len:5151 prompt_cache_ratio:0.3317233384853168 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 -DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:46 [batch.py:51] router release req id 8 -INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.1088407039642334 s -INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11062455177307129 s -DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=102514456566867554588566489105507614995, time:1750769026.7221158s req_ids:[8] -DEBUG 06-24 20:43:46 [manager.py:391] -ERROR 06-24 20:43:46 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:46 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:214.47443962097168ms total_cost_time:214.5540714263916ms,out_token_counter:1 mean_per_token_cost_time: 0.07963180541992188ms prompt_token_num:15529 prompt_cache_len:5151 prompt_cache_ratio:0.33170197694635845 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 -DEBUG 06-24 20:43:46 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:46 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:46 [batch.py:51] router release req id 8 -INFO 06-24 20:43:46 [manager.py:224] router recive req id 8 cost time 0.10939574241638184 s -INFO 06-24 20:43:46 [manager.py:68] detokenization recv req id 8 cost time 0.11149168014526367 s -DEBUG 06-24 20:43:46 [manager.py:391] Prefill Batch: batch_id=204619035539067143220960315319791039486, time:1750769026.9447358s req_ids:[8] -DEBUG 06-24 20:43:46 [manager.py:391] -ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:46 lightllm_req_id:8 first_token_cost:217.1328067779541ms total_cost_time:217.1781063079834ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15530 prompt_cache_len:5151 prompt_cache_ratio:0.3316806181584031 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 -DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:47 [batch.py:51] router release req id 8 -INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.1081230640411377 s -INFO 06-24 20:43:47 [manager.py:68] detokenization recv req id 8 cost time 0.11090254783630371 s -DEBUG 06-24 20:43:47 [manager.py:391] Prefill Batch: batch_id=150118097590983921166223722144446790883, time:1750769027.1711185s req_ids:[8] -DEBUG 06-24 20:43:47 [manager.py:391] -INFO 06-24 20:43:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:389.64176177978516ms total_cost_time:389.68634605407715ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15531 prompt_cache_len:5151 prompt_cache_ratio:0.33165926212091945 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 -DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:47 [batch.py:51] router release req id 8 -INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.10819673538208008 s -INFO 06-24 20:43:47 [manager.py:68] detokenization recv req id 8 cost time 0.11037588119506836 s -DEBUG 06-24 20:43:47 [manager.py:391] Prefill Batch: batch_id=242542600559515427446593851441123828876, time:1750769027.5656703s req_ids:[8] -DEBUG 06-24 20:43:47 [manager.py:391] -ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:209.35606956481934ms total_cost_time:209.39970016479492ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15532 prompt_cache_len:5151 prompt_cache_ratio:0.33163790883337624 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 -DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:47 [batch.py:51] router release req id 8 -INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.10970306396484375 s -INFO 06-24 20:43:47 [manager.py:68] detokenization recv req id 8 cost time 0.11189413070678711 s -DEBUG 06-24 20:43:47 [manager.py:391] Prefill Batch: batch_id=171464810881854301774516463915539359504, time:1750769027.7821412s req_ids:[8] -DEBUG 06-24 20:43:47 [manager.py:391] -ERROR 06-24 20:43:47 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:47 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:47 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:214.22362327575684ms total_cost_time:214.26820755004883ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15533 prompt_cache_len:5151 prompt_cache_ratio:0.3316165582952424 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 -DEBUG 06-24 20:43:47 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:47 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:47 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:47 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:47 [batch.py:51] router release req id 8 -INFO 06-24 20:43:47 [manager.py:224] router recive req id 8 cost time 0.1089773178100586 s -INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.11106634140014648 s -DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=308284938768527598047522113813846306806, time:1750769028.003821s req_ids:[8] -DEBUG 06-24 20:43:48 [manager.py:391] -ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:47 lightllm_req_id:8 first_token_cost:218.7483310699463ms total_cost_time:218.79339218139648ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15534 prompt_cache_len:5151 prompt_cache_ratio:0.3315952105059869 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 -DEBUG 06-24 20:43:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:48 [batch.py:51] router release req id 8 -INFO 06-24 20:43:48 [manager.py:224] router recive req id 8 cost time 0.1097109317779541 s -INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.11153721809387207 s -DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=306627584847641138759538466045388416844, time:1750769028.2294636s req_ids:[8] -DEBUG 06-24 20:43:48 [manager.py:391] -ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:216.54891967773438ms total_cost_time:216.59088134765625ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15535 prompt_cache_len:5151 prompt_cache_ratio:0.33157386546507883 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 -DEBUG 06-24 20:43:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:48 [batch.py:51] router release req id 8 -INFO 06-24 20:43:48 [manager.py:224] router recive req id 8 cost time 0.11142349243164062 s -INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.11330986022949219 s -DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=249088319425165120783365185202831381600, time:1750769028.4527485s req_ids:[8] -DEBUG 06-24 20:43:48 [manager.py:391] -ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:217.0267105102539ms total_cost_time:217.0712947845459ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15536 prompt_cache_len:5151 prompt_cache_ratio:0.33155252317198763 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 -DEBUG 06-24 20:43:48 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:48 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:48 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:48 [batch.py:51] router release req id 8 -INFO 06-24 20:43:48 [manager.py:224] router recive req id 8 cost time 0.3107578754425049 s -INFO 06-24 20:43:48 [manager.py:68] detokenization recv req id 8 cost time 0.31271815299987793 s -DEBUG 06-24 20:43:48 [manager.py:391] Prefill Batch: batch_id=46338616987134804412961997750335130830, time:1750769028.8855793s req_ids:[8] -DEBUG 06-24 20:43:48 [manager.py:391] -ERROR 06-24 20:43:48 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:48 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:429.05402183532715ms total_cost_time:429.09908294677734ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15537 prompt_cache_len:5151 prompt_cache_ratio:0.33153118362618267 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:48 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 -DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:49 [batch.py:51] router release req id 8 -INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10877561569213867 s -INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11074161529541016 s -DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=256304896901180767019829689296353044728, time:1750769029.111985s req_ids:[8] -DEBUG 06-24 20:43:49 [manager.py:391] -ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:48 lightllm_req_id:8 first_token_cost:216.85051918029785ms total_cost_time:216.89343452453613ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15538 prompt_cache_len:5151 prompt_cache_ratio:0.33150984682713347 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 -DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:49 [batch.py:51] router release req id 8 -INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10911345481872559 s -INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11107611656188965 s -DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=11914350465585293193335753998047384224, time:1750769029.3375275s req_ids:[8] -DEBUG 06-24 20:43:49 [manager.py:391] -ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:218.6570167541504ms total_cost_time:218.69945526123047ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15539 prompt_cache_len:5151 prompt_cache_ratio:0.3314885127743098 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 -DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:49 [batch.py:51] router release req id 8 -INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10962080955505371 s -INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11184453964233398 s -DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=92559025657682593139588003299918682449, time:1750769029.563439s req_ids:[8] -DEBUG 06-24 20:43:49 [manager.py:391] -ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:217.23246574401855ms total_cost_time:217.27585792541504ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15540 prompt_cache_len:5151 prompt_cache_ratio:0.33146718146718146 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 -DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:49 [batch.py:51] router release req id 8 -INFO 06-24 20:43:49 [manager.py:224] router recive req id 8 cost time 0.10815238952636719 s -INFO 06-24 20:43:49 [manager.py:68] detokenization recv req id 8 cost time 0.11005711555480957 s -DEBUG 06-24 20:43:49 [manager.py:391] Prefill Batch: batch_id=138787423009346355910998276199849501270, time:1750769029.7868407s req_ids:[8] -DEBUG 06-24 20:43:49 [manager.py:391] -ERROR 06-24 20:43:49 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:49 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:219.39587593078613ms total_cost_time:219.4387912750244ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15541 prompt_cache_len:5151 prompt_cache_ratio:0.33144585290521844 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:49 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 -DEBUG 06-24 20:43:49 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:49 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:49 [batch.py:51] router release req id 8 -INFO 06-24 20:43:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10800313949584961 s -INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.10933399200439453 s -DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=15917486968578099426193658055216877790, time:1750769030.0150003s req_ids:[8] -DEBUG 06-24 20:43:50 [manager.py:391] -ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:49 lightllm_req_id:8 first_token_cost:406.0220718383789ms total_cost_time:406.08739852905273ms,out_token_counter:1 mean_per_token_cost_time: 0.06532669067382812ms prompt_token_num:15542 prompt_cache_len:5151 prompt_cache_ratio:0.3314245270878909 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 -DEBUG 06-24 20:43:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:50 [batch.py:51] router release req id 8 -INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10936927795410156 s -INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.11150431632995605 s -DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=74724516942241345359130765108610129002, time:1750769030.4255998s req_ids:[8] -DEBUG 06-24 20:43:50 [manager.py:391] -ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:220.32666206359863ms total_cost_time:220.37053108215332ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15543 prompt_cache_len:5151 prompt_cache_ratio:0.331403204014669 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 -DEBUG 06-24 20:43:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:50 [batch.py:51] router release req id 8 -INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10883212089538574 s -INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.11093354225158691 s -DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=237161794849529447737333821955647323542, time:1750769030.6510277s req_ids:[8] -DEBUG 06-24 20:43:50 [manager.py:391] -ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:217.75317192077637ms total_cost_time:217.79704093933105ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15544 prompt_cache_len:5151 prompt_cache_ratio:0.33138188368502314 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 -DEBUG 06-24 20:43:50 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:50 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:50 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:50 [batch.py:51] router release req id 8 -INFO 06-24 20:43:50 [manager.py:224] router recive req id 8 cost time 0.10813164710998535 s -INFO 06-24 20:43:50 [manager.py:68] detokenization recv req id 8 cost time 0.11019206047058105 s -DEBUG 06-24 20:43:50 [manager.py:391] Prefill Batch: batch_id=197860938431525613644076390093338610885, time:1750769030.8760686s req_ids:[8] -DEBUG 06-24 20:43:50 [manager.py:391] -ERROR 06-24 20:43:50 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:50 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:219.44522857666016ms total_cost_time:219.49052810668945ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15545 prompt_cache_len:5151 prompt_cache_ratio:0.3313605660984239 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 -DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:51 [batch.py:51] router release req id 8 -INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.10980010032653809 s -INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11174654960632324 s -DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=118969098824032969123341260923366932207, time:1750769031.105517s req_ids:[8] -DEBUG 06-24 20:43:51 [manager.py:391] -ERROR 06-24 20:43:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:50 lightllm_req_id:8 first_token_cost:221.2235927581787ms total_cost_time:221.266508102417ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15546 prompt_cache_len:5151 prompt_cache_ratio:0.33133925125434194 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 -DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:51 [batch.py:51] router release req id 8 -INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.10801362991333008 s -INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11009836196899414 s -DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=297666862153480019537062366943576659205, time:1750769031.3304858s req_ids:[8] -DEBUG 06-24 20:43:51 [manager.py:391] -ERROR 06-24 20:43:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:43:51 [stats.py:37] Avg tokens(prompt+generate) throughput: 59569.973 tokens/s -DEBUG 06-24 20:43:51 [stats.py:37] Avg prompt tokens throughput: 59562.203 tokens/s -DEBUG 06-24 20:43:51 [stats.py:37] Avg generate tokens throughput: 7.770 tokens/s -INFO 06-24 20:43:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 first_token_cost:387.17007637023926ms total_cost_time:387.21346855163574ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15547 prompt_cache_len:5151 prompt_cache_ratio:0.331317939152248 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 -DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:51 [batch.py:51] router release req id 8 -INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.1094655990600586 s -INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11140894889831543 s -DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=254781524972102490119496142615516110804, time:1750769031.7256694s req_ids:[8] -DEBUG 06-24 20:43:51 [manager.py:391] -ERROR 06-24 20:43:51 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:51 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 first_token_cost:220.6132411956787ms total_cost_time:220.6583023071289ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15548 prompt_cache_len:5151 prompt_cache_ratio:0.33129662979161306 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 -DEBUG 06-24 20:43:51 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:51 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:51 [batch.py:51] router release req id 8 -INFO 06-24 20:43:51 [manager.py:224] router recive req id 8 cost time 0.10832619667053223 s -INFO 06-24 20:43:51 [manager.py:68] detokenization recv req id 8 cost time 0.11049365997314453 s -DEBUG 06-24 20:43:51 [manager.py:391] Prefill Batch: batch_id=70308892265831410812602855225644730294, time:1750769031.9526315s req_ids:[8] -DEBUG 06-24 20:43:51 [manager.py:391] -ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:51 lightllm_req_id:8 first_token_cost:215.53516387939453ms total_cost_time:215.5773639678955ms,out_token_counter:1 mean_per_token_cost_time: 0.04220008850097656ms prompt_token_num:15549 prompt_cache_len:5151 prompt_cache_ratio:0.33127532317190816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 -DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:52 [batch.py:51] router release req id 8 -INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10905170440673828 s -INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.11087799072265625 s -DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=97539143484511959104897698645568178075, time:1750769032.174345s req_ids:[8] -DEBUG 06-24 20:43:52 [manager.py:391] -ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:214.7085666656494ms total_cost_time:214.7524356842041ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15550 prompt_cache_len:5151 prompt_cache_ratio:0.3312540192926045 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 -DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:52 [batch.py:51] router release req id 8 -INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10947918891906738 s -INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.11160016059875488 s -DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=282804342949524630407896981436059801484, time:1750769032.3964672s req_ids:[8] -DEBUG 06-24 20:43:52 [manager.py:391] -DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:52 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:215.5911922454834ms total_cost_time:215.63315391540527ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15551 prompt_cache_len:5151 prompt_cache_ratio:0.33123271815317346 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 -DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:52 [batch.py:51] router release req id 8 -INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10886693000793457 s -INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.11066007614135742 s -DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=309164303217772779578861095620899456232, time:1750769032.6172438s req_ids:[8] -DEBUG 06-24 20:43:52 [manager.py:391] -ERROR 06-24 20:43:52 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:52 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:216.37916564941406ms total_cost_time:216.42065048217773ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15552 prompt_cache_len:5151 prompt_cache_ratio:0.33121141975308643 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:52 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 -DEBUG 06-24 20:43:52 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:52 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:52 [batch.py:51] router release req id 8 -INFO 06-24 20:43:52 [manager.py:224] router recive req id 8 cost time 0.10906863212585449 s -INFO 06-24 20:43:52 [manager.py:68] detokenization recv req id 8 cost time 0.1109459400177002 s -DEBUG 06-24 20:43:52 [manager.py:391] Prefill Batch: batch_id=121579550962863252033105642091039241369, time:1750769032.840269s req_ids:[8] -DEBUG 06-24 20:43:52 [manager.py:391] -ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:52 lightllm_req_id:8 first_token_cost:375.5974769592285ms total_cost_time:375.6420612335205ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15553 prompt_cache_len:5151 prompt_cache_ratio:0.3311901240918151 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 -DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:53 [batch.py:51] router release req id 8 -INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.10796952247619629 s -INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.10979294776916504 s -INFO 06-24 20:43:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=50347335171169126184684192751692342884, time:1750769033.223577s req_ids:[8] -DEBUG 06-24 20:43:53 [manager.py:391] -INFO 06-24 20:43:53 [statics_utils.py:24] mean first cost: 233.45740679691613 ms -INFO 06-24 20:43:53 [statics_utils.py:24] mean per token cost: 0.05678636001360444 ms -ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:220.28183937072754ms total_cost_time:220.32499313354492ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15554 prompt_cache_len:5151 prompt_cache_ratio:0.33116883116883117 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 -DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:53 [batch.py:51] router release req id 8 -INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.10824871063232422 s -INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.10987401008605957 s -DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=275208670064836212021675735656053088391, time:1750769033.4508846s req_ids:[8] -DEBUG 06-24 20:43:53 [manager.py:391] -ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:173.6316680908203ms total_cost_time:173.67267608642578ms,out_token_counter:1 mean_per_token_cost_time: 0.04100799560546875ms prompt_token_num:15555 prompt_cache_len:5151 prompt_cache_ratio:0.33114754098360655 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 -DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:53 [batch.py:51] router release req id 8 -INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.1094212532043457 s -INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.11130285263061523 s -DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=60087378125098382631853072830392824417, time:1750769033.6325872s req_ids:[8] -DEBUG 06-24 20:43:53 [manager.py:391] -ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:213.26160430908203ms total_cost_time:213.30571174621582ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:15556 prompt_cache_len:5151 prompt_cache_ratio:0.33112625353561326 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 -DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:53 [batch.py:51] router release req id 8 -INFO 06-24 20:43:53 [manager.py:224] router recive req id 8 cost time 0.10945820808410645 s -INFO 06-24 20:43:53 [manager.py:68] detokenization recv req id 8 cost time 0.11155509948730469 s -DEBUG 06-24 20:43:53 [manager.py:391] Prefill Batch: batch_id=39011637767286973061479284200907232230, time:1750769033.8521712s req_ids:[8] -DEBUG 06-24 20:43:53 [manager.py:391] -ERROR 06-24 20:43:53 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:53 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:216.94326400756836ms total_cost_time:216.99070930480957ms,out_token_counter:1 mean_per_token_cost_time: 0.04744529724121094ms prompt_token_num:15557 prompt_cache_len:5151 prompt_cache_ratio:0.33110496882432344 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 -DEBUG 06-24 20:43:53 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:53 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:53 [batch.py:51] router release req id 8 -INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10886120796203613 s -INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11095285415649414 s -DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=26632357620585585361142215201953563942, time:1750769034.076187s req_ids:[8] -DEBUG 06-24 20:43:54 [manager.py:391] -ERROR 06-24 20:43:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:53 lightllm_req_id:8 first_token_cost:420.6821918487549ms total_cost_time:420.72558403015137ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15558 prompt_cache_len:5151 prompt_cache_ratio:0.3310836868492094 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 -DEBUG 06-24 20:43:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:54 [batch.py:51] router release req id 8 -INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10837960243225098 s -INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11043381690979004 s -DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=277416376479513446008779012406765754350, time:1750769034.5074878s req_ids:[8] -DEBUG 06-24 20:43:54 [manager.py:391] -ERROR 06-24 20:43:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 first_token_cost:226.792573928833ms total_cost_time:226.8376350402832ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15559 prompt_cache_len:5151 prompt_cache_ratio:0.33106240760974354 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 -DEBUG 06-24 20:43:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:54 [batch.py:51] router release req id 8 -INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10803532600402832 s -INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11012959480285645 s -DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=60558359434636768029288516204385319708, time:1750769034.7373013s req_ids:[8] -DEBUG 06-24 20:43:54 [manager.py:391] -ERROR 06-24 20:43:54 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:54 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 first_token_cost:220.03936767578125ms total_cost_time:220.08371353149414ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15560 prompt_cache_len:5151 prompt_cache_ratio:0.33104113110539846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:54 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 -DEBUG 06-24 20:43:54 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:54 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:54 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:54 [batch.py:51] router release req id 8 -INFO 06-24 20:43:54 [manager.py:224] router recive req id 8 cost time 0.10810565948486328 s -INFO 06-24 20:43:54 [manager.py:68] detokenization recv req id 8 cost time 0.11007189750671387 s -DEBUG 06-24 20:43:54 [manager.py:391] Prefill Batch: batch_id=34865227742933153772796010995088812573, time:1750769034.962294s req_ids:[8] -DEBUG 06-24 20:43:54 [manager.py:391] -ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:54 lightllm_req_id:8 first_token_cost:219.09403800964355ms total_cost_time:219.13719177246094ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15561 prompt_cache_len:5151 prompt_cache_ratio:0.3310198573356468 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 -DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:55 [batch.py:51] router release req id 8 -INFO 06-24 20:43:55 [manager.py:224] router recive req id 8 cost time 0.10815930366516113 s -INFO 06-24 20:43:55 [manager.py:68] detokenization recv req id 8 cost time 0.11016368865966797 s -DEBUG 06-24 20:43:55 [manager.py:391] Prefill Batch: batch_id=293460912257357475780100269515807154621, time:1750769035.1969485s req_ids:[8] -DEBUG 06-24 20:43:55 [manager.py:391] -ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:231.1840057373047ms total_cost_time:231.22859001159668ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:15562 prompt_cache_len:5151 prompt_cache_ratio:0.33099858629996143 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 -DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:55 [batch.py:51] router release req id 8 -INFO 06-24 20:43:55 [manager.py:224] router recive req id 8 cost time 0.10921025276184082 s -INFO 06-24 20:43:55 [manager.py:68] detokenization recv req id 8 cost time 0.11129212379455566 s -DEBUG 06-24 20:43:55 [manager.py:391] Prefill Batch: batch_id=106826062946423968061798768291017682418, time:1750769035.4250224s req_ids:[8] -DEBUG 06-24 20:43:55 [manager.py:391] -ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:386.80553436279297ms total_cost_time:386.84892654418945ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15563 prompt_cache_len:5151 prompt_cache_ratio:0.3309773179978153 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 -DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:55 [batch.py:51] router release req id 8 -INFO 06-24 20:43:55 [manager.py:224] router recive req id 8 cost time 0.10807585716247559 s -INFO 06-24 20:43:55 [manager.py:68] detokenization recv req id 8 cost time 0.1101679801940918 s -DEBUG 06-24 20:43:55 [manager.py:391] Prefill Batch: batch_id=96053962834386165345156740521155962988, time:1750769035.81826s req_ids:[8] -DEBUG 06-24 20:43:55 [manager.py:391] -ERROR 06-24 20:43:55 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:55 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:242.84934997558594ms total_cost_time:242.8908348083496ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:15564 prompt_cache_len:5151 prompt_cache_ratio:0.3309560524286816 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 -DEBUG 06-24 20:43:55 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:55 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:55 [batch.py:51] router release req id 8 -INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10907101631164551 s -INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.1111140251159668 s -DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=267241612546131875695847738842228354737, time:1750769036.0807421s req_ids:[8] -DEBUG 06-24 20:43:56 [manager.py:391] -ERROR 06-24 20:43:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:55 lightllm_req_id:8 first_token_cost:227.97656059265137ms total_cost_time:228.02042961120605ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15565 prompt_cache_len:5151 prompt_cache_ratio:0.33093478959203343 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 -DEBUG 06-24 20:43:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:56 [batch.py:51] router release req id 8 -INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10927510261535645 s -INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.11139130592346191 s -DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=216870776308263750617614993022373564315, time:1750769036.3034408s req_ids:[8] -DEBUG 06-24 20:43:56 [manager.py:391] -ERROR 06-24 20:43:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 first_token_cost:218.06931495666504ms total_cost_time:218.11413764953613ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15566 prompt_cache_len:5151 prompt_cache_ratio:0.3309135294873442 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 -DEBUG 06-24 20:43:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:56 [batch.py:51] router release req id 8 -INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10832381248474121 s -INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.11050820350646973 s -DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=27891753634078613310716833744905710286, time:1750769036.5279362s req_ids:[8] -DEBUG 06-24 20:43:56 [manager.py:391] -ERROR 06-24 20:43:56 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:56 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 first_token_cost:218.16229820251465ms total_cost_time:218.21045875549316ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:15567 prompt_cache_len:5151 prompt_cache_ratio:0.3308922721140875 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:56 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 -DEBUG 06-24 20:43:56 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:56 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:56 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:56 [batch.py:51] router release req id 8 -INFO 06-24 20:43:56 [manager.py:224] router recive req id 8 cost time 0.10973596572875977 s -INFO 06-24 20:43:56 [manager.py:68] detokenization recv req id 8 cost time 0.11185550689697266 s -DEBUG 06-24 20:43:56 [manager.py:391] Prefill Batch: batch_id=148498682648414606884499959145094835653, time:1750769036.7533002s req_ids:[8] -DEBUG 06-24 20:43:56 [manager.py:391] -ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:56 lightllm_req_id:8 first_token_cost:440.08874893188477ms total_cost_time:440.13404846191406ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15568 prompt_cache_len:5151 prompt_cache_ratio:0.3308710174717369 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 -DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:57 [batch.py:51] router release req id 8 -INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.10959744453430176 s -INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.11164569854736328 s -DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=18957748361223813692530092821044491426, time:1750769037.199362s req_ids:[8] -DEBUG 06-24 20:43:57 [manager.py:391] -ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:255.60450553894043ms total_cost_time:255.6462287902832ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:15569 prompt_cache_len:5151 prompt_cache_ratio:0.3308497655597662 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 -DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:57 [batch.py:51] router release req id 8 -INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.10805416107177734 s -INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.10989713668823242 s -DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=221903994885307164879450266370046169234, time:1750769037.472642s req_ids:[8] -DEBUG 06-24 20:43:57 [manager.py:391] -ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:228.04856300354004ms total_cost_time:228.09219360351562ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15570 prompt_cache_len:5151 prompt_cache_ratio:0.3308285163776493 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 -DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:57 [batch.py:51] router release req id 8 -INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.11089396476745605 s -INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.11359977722167969 s -DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=144701399391632958195971870606180546345, time:1750769037.6988287s req_ids:[8] -DEBUG 06-24 20:43:57 [manager.py:391] -ERROR 06-24 20:43:57 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:57 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:221.29273414611816ms total_cost_time:221.33660316467285ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15571 prompt_cache_len:5151 prompt_cache_ratio:0.3308072699248603 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 -DEBUG 06-24 20:43:57 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:57 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:57 [batch.py:51] router release req id 8 -INFO 06-24 20:43:57 [manager.py:224] router recive req id 8 cost time 0.10839557647705078 s -INFO 06-24 20:43:57 [manager.py:68] detokenization recv req id 8 cost time 0.11039185523986816 s -DEBUG 06-24 20:43:57 [manager.py:391] Prefill Batch: batch_id=323883346104674957022216729842675927066, time:1750769037.9213834s req_ids:[8] -DEBUG 06-24 20:43:57 [manager.py:391] -ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:57 lightllm_req_id:8 first_token_cost:218.0006504058838ms total_cost_time:218.04547309875488ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15572 prompt_cache_len:5151 prompt_cache_ratio:0.3307860262008734 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 -DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:58 [batch.py:51] router release req id 8 -INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10829472541809082 s -INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11043000221252441 s -DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=160500517572896204814024325261048404589, time:1750769038.1482825s req_ids:[8] -DEBUG 06-24 20:43:58 [manager.py:391] -ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:215.3482437133789ms total_cost_time:215.39068222045898ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15573 prompt_cache_len:5151 prompt_cache_ratio:0.3307647852051628 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 -DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:58 [batch.py:51] router release req id 8 -INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10851383209228516 s -INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11050248146057129 s -DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=242814369642300358416487634402168749632, time:1750769038.3708375s req_ids:[8] -DEBUG 06-24 20:43:58 [manager.py:391] -ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:383.8672637939453ms total_cost_time:383.9116096496582ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15574 prompt_cache_len:5151 prompt_cache_ratio:0.33074354693720304 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 -DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:58 [batch.py:51] router release req id 8 -INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10952639579772949 s -INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11153697967529297 s -DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=82362349742169250307623793880029417966, time:1750769038.7604861s req_ids:[8] -DEBUG 06-24 20:43:58 [manager.py:391] -ERROR 06-24 20:43:58 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:58 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:216.45045280456543ms total_cost_time:216.4938449859619ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15575 prompt_cache_len:5151 prompt_cache_ratio:0.3307223113964687 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:58 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 -DEBUG 06-24 20:43:58 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:58 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:58 [batch.py:51] router release req id 8 -INFO 06-24 20:43:58 [manager.py:224] router recive req id 8 cost time 0.10793685913085938 s -INFO 06-24 20:43:58 [manager.py:68] detokenization recv req id 8 cost time 0.11028456687927246 s -DEBUG 06-24 20:43:58 [manager.py:391] Prefill Batch: batch_id=274867563622895852186579429952359736612, time:1750769038.9884715s req_ids:[8] -DEBUG 06-24 20:43:58 [manager.py:391] -DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:43:58 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:58 lightllm_req_id:8 first_token_cost:221.94623947143555ms total_cost_time:221.99106216430664ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15576 prompt_cache_len:5151 prompt_cache_ratio:0.3307010785824345 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 -DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:59 [batch.py:51] router release req id 8 -INFO 06-24 20:43:59 [manager.py:224] router recive req id 8 cost time 0.10811305046081543 s -INFO 06-24 20:43:59 [manager.py:68] detokenization recv req id 8 cost time 0.11012792587280273 s -DEBUG 06-24 20:43:59 [manager.py:391] Prefill Batch: batch_id=57935775165940663640154449206823411741, time:1750769039.2132866s req_ids:[8] -DEBUG 06-24 20:43:59 [manager.py:391] -ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:217.34952926635742ms total_cost_time:217.4086570739746ms,out_token_counter:1 mean_per_token_cost_time: 0.0591278076171875ms prompt_token_num:15577 prompt_cache_len:5151 prompt_cache_ratio:0.3306798484945753 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 -DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:59 [batch.py:51] router release req id 8 -INFO 06-24 20:43:59 [manager.py:224] router recive req id 8 cost time 0.10883426666259766 s -INFO 06-24 20:43:59 [manager.py:68] detokenization recv req id 8 cost time 0.11069869995117188 s -DEBUG 06-24 20:43:59 [manager.py:391] Prefill Batch: batch_id=245573929203060346338848107669351688401, time:1750769039.4372563s req_ids:[8] -DEBUG 06-24 20:43:59 [manager.py:391] -ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:217.03338623046875ms total_cost_time:217.07606315612793ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15578 prompt_cache_len:5151 prompt_cache_ratio:0.33065862113236616 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 -DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:59 [batch.py:51] router release req id 8 -INFO 06-24 20:43:59 [manager.py:224] router recive req id 8 cost time 0.10820817947387695 s -INFO 06-24 20:43:59 [manager.py:68] detokenization recv req id 8 cost time 0.1101982593536377 s -DEBUG 06-24 20:43:59 [manager.py:391] Prefill Batch: batch_id=199329132013138563119489454335687447116, time:1750769039.6621404s req_ids:[8] -DEBUG 06-24 20:43:59 [manager.py:391] -ERROR 06-24 20:43:59 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:43:59 [manager.py:162] detoken release req id 8 -INFO 06-24 20:43:59 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:214.20645713806152ms total_cost_time:214.2481803894043ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:15579 prompt_cache_len:5151 prompt_cache_ratio:0.3306373964952821 mtp_avg_token_per_step:1.0 -INFO 06-24 20:43:59 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 -DEBUG 06-24 20:43:59 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:43:59 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:43:59 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:43:59 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:43:59 [batch.py:51] router release req id 8 -INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.3109908103942871 s -INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.31294822692871094 s -DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=217158442847504299065006605246316436188, time:1750769040.0884008s req_ids:[8] -DEBUG 06-24 20:44:00 [manager.py:391] -ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:43:59 lightllm_req_id:8 first_token_cost:429.81815338134766ms total_cost_time:429.86297607421875ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:15580 prompt_cache_len:5151 prompt_cache_ratio:0.33061617458279846 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 -DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:00 [batch.py:51] router release req id 8 -INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.10846376419067383 s -INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.11043548583984375 s -DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=136596989182133262460614423106407248956, time:1750769040.3191776s req_ids:[8] -DEBUG 06-24 20:44:00 [manager.py:391] -ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:217.90814399719238ms total_cost_time:217.95201301574707ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15581 prompt_cache_len:5151 prompt_cache_ratio:0.3305949553943906 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 -DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:00 [batch.py:51] router release req id 8 -INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.10970783233642578 s -INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.11163592338562012 s -DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=141374874344924916512631725113746745222, time:1750769040.5451803s req_ids:[8] -DEBUG 06-24 20:44:00 [manager.py:391] -DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:44:00 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:221.36259078979492ms total_cost_time:221.4062213897705ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15582 prompt_cache_len:5151 prompt_cache_ratio:0.33057373892953407 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 -DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:00 [batch.py:51] router release req id 8 -INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.11149716377258301 s -INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.11348748207092285 s -DEBUG 06-24 20:44:00 [manager.py:391] Prefill Batch: batch_id=223543650573796889152688291723017509287, time:1750769040.7816372s req_ids:[8] -DEBUG 06-24 20:44:00 [manager.py:391] -ERROR 06-24 20:44:00 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:00 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:00 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:224.3812084197998ms total_cost_time:224.4248390197754ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15583 prompt_cache_len:5151 prompt_cache_ratio:0.33055252518770456 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:00 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 -DEBUG 06-24 20:44:00 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:00 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:00 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:00 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:00 [batch.py:51] router release req id 8 -INFO 06-24 20:44:00 [manager.py:224] router recive req id 8 cost time 0.10783004760742188 s -INFO 06-24 20:44:00 [manager.py:68] detokenization recv req id 8 cost time 0.10973930358886719 s -DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=119291079920543445537748036772034945876, time:1750769041.0060463s req_ids:[8] -DEBUG 06-24 20:44:01 [manager.py:391] -ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:00 lightllm_req_id:8 first_token_cost:176.26595497131348ms total_cost_time:176.30863189697266ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15584 prompt_cache_len:5151 prompt_cache_ratio:0.33053131416837783 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 -DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:01 [batch.py:51] router release req id 8 -INFO 06-24 20:44:01 [manager.py:224] router recive req id 8 cost time 0.10798859596252441 s -INFO 06-24 20:44:01 [manager.py:68] detokenization recv req id 8 cost time 0.10965895652770996 s -DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=230067573530730370665353556733192068988, time:1750769041.1878889s req_ids:[8] -DEBUG 06-24 20:44:01 [manager.py:391] -ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:372.8814125061035ms total_cost_time:372.9245662689209ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15585 prompt_cache_len:5151 prompt_cache_ratio:0.33051010587102986 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 -DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:01 [batch.py:51] router release req id 8 -INFO 06-24 20:44:01 [manager.py:224] router recive req id 8 cost time 0.10814404487609863 s -INFO 06-24 20:44:01 [manager.py:68] detokenization recv req id 8 cost time 0.1101217269897461 s -DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=79148854643542804781375309673619687319, time:1750769041.5670257s req_ids:[8] -DEBUG 06-24 20:44:01 [manager.py:391] -ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -DEBUG 06-24 20:44:01 [stats.py:37] Avg tokens(prompt+generate) throughput: 60295.758 tokens/s -DEBUG 06-24 20:44:01 [stats.py:37] Avg prompt tokens throughput: 60288.012 tokens/s -DEBUG 06-24 20:44:01 [stats.py:37] Avg generate tokens throughput: 7.746 tokens/s -INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:221.7864990234375ms total_cost_time:221.82941436767578ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:15586 prompt_cache_len:5151 prompt_cache_ratio:0.33048890029513667 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 -DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:01 [batch.py:51] router release req id 8 -INFO 06-24 20:44:01 [manager.py:224] router recive req id 8 cost time 0.10878109931945801 s -INFO 06-24 20:44:01 [manager.py:68] detokenization recv req id 8 cost time 0.11073112487792969 s -DEBUG 06-24 20:44:01 [manager.py:391] Prefill Batch: batch_id=70481385614710390788127995766509348133, time:1750769041.7953067s req_ids:[8] -DEBUG 06-24 20:44:01 [manager.py:391] -ERROR 06-24 20:44:01 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:01 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:01 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:216.71438217163086ms total_cost_time:216.75682067871094ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:15587 prompt_cache_len:5151 prompt_cache_ratio:0.3304676974401745 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:01 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 -DEBUG 06-24 20:44:01 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:01 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:01 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:01 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:01 [batch.py:51] router release req id 8 -INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10994648933410645 s -INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11202478408813477 s -DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=71709303262601156528156648155815578320, time:1750769042.0212622s req_ids:[8] -DEBUG 06-24 20:44:02 [manager.py:391] -ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:01 lightllm_req_id:8 first_token_cost:217.73982048034668ms total_cost_time:217.78368949890137ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15588 prompt_cache_len:5151 prompt_cache_ratio:0.3304464973056197 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 -DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:02 [batch.py:51] router release req id 8 -INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10897445678710938 s -INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11103248596191406 s -DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=338796347442452518000609775651903139234, time:1750769042.2442138s req_ids:[8] -DEBUG 06-24 20:44:02 [manager.py:391] -ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:214.60390090942383ms total_cost_time:214.6470546722412ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15589 prompt_cache_len:5151 prompt_cache_ratio:0.33042529989094876 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 -DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:02 [batch.py:51] router release req id 8 -INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10928654670715332 s -INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11127018928527832 s -DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=162655247608858853905420879662288003624, time:1750769042.463818s req_ids:[8] -DEBUG 06-24 20:44:02 [manager.py:391] -ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:387.0246410369873ms total_cost_time:387.0689868927002ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15590 prompt_cache_len:5151 prompt_cache_ratio:0.33040410519563823 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 -DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:02 [batch.py:51] router release req id 8 -INFO 06-24 20:44:02 [manager.py:224] router recive req id 8 cost time 0.10901427268981934 s -INFO 06-24 20:44:02 [manager.py:68] detokenization recv req id 8 cost time 0.11086583137512207 s -DEBUG 06-24 20:44:02 [manager.py:391] Prefill Batch: batch_id=32848749479221368454130089598432842669, time:1750769042.8598044s req_ids:[8] -DEBUG 06-24 20:44:02 [manager.py:391] -ERROR 06-24 20:44:02 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:02 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:02 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:219.8486328125ms total_cost_time:219.89178657531738ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15591 prompt_cache_len:5151 prompt_cache_ratio:0.3303829132191649 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:02 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 -DEBUG 06-24 20:44:02 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:02 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:02 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:02 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:02 [batch.py:51] router release req id 8 -INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10799121856689453 s -INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.1098639965057373 s -DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=205391140681309039459769396076018025086, time:1750769043.085207s req_ids:[8] -DEBUG 06-24 20:44:03 [manager.py:391] -ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:02 lightllm_req_id:8 first_token_cost:212.66961097717285ms total_cost_time:212.72635459899902ms,out_token_counter:1 mean_per_token_cost_time: 0.056743621826171875ms prompt_token_num:15592 prompt_cache_len:5151 prompt_cache_ratio:0.33036172396100566 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 -DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:03 [batch.py:51] router release req id 8 -INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.11019492149353027 s -INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.11204075813293457 s -DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=19380018098002271237111663721783244297, time:1750769043.3134947s req_ids:[8] -DEBUG 06-24 20:44:03 [manager.py:391] -ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:225.51798820495605ms total_cost_time:225.56114196777344ms,out_token_counter:1 mean_per_token_cost_time: 0.04315376281738281ms prompt_token_num:15593 prompt_cache_len:5151 prompt_cache_ratio:0.33034053742063746 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 -DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:03 [batch.py:51] router release req id 8 -INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10837745666503906 s -INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.11027693748474121 s -DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=279974663404867781579620751132709683047, time:1750769043.5385728s req_ids:[8] -DEBUG 06-24 20:44:03 [manager.py:391] -ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:217.72384643554688ms total_cost_time:217.76747703552246ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15594 prompt_cache_len:5151 prompt_cache_ratio:0.3303193535975375 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 -DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:03 [batch.py:51] router release req id 8 -INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10806703567504883 s -INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.10996437072753906 s -DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=70156263583445701593495362126811458634, time:1750769043.7625334s req_ids:[8] -DEBUG 06-24 20:44:03 [manager.py:391] -ERROR 06-24 20:44:03 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:03 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:03 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:218.89591217041016ms total_cost_time:218.94025802612305ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15595 prompt_cache_len:5151 prompt_cache_ratio:0.3302981724911831 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:03 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 -DEBUG 06-24 20:44:03 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:03 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:03 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:03 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:03 [batch.py:51] router release req id 8 -INFO 06-24 20:44:03 [manager.py:224] router recive req id 8 cost time 0.10865163803100586 s -INFO 06-24 20:44:03 [manager.py:68] detokenization recv req id 8 cost time 0.11051082611083984 s -DEBUG 06-24 20:44:03 [manager.py:391] Prefill Batch: batch_id=329634273476295423486024366947665062063, time:1750769043.9869545s req_ids:[8] -DEBUG 06-24 20:44:03 [manager.py:391] -ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:03 lightllm_req_id:8 first_token_cost:377.44736671447754ms total_cost_time:377.49266624450684ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:15596 prompt_cache_len:5151 prompt_cache_ratio:0.33027699410105155 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 -DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:04 [batch.py:51] router release req id 8 -INFO 06-24 20:44:04 [manager.py:224] router recive req id 8 cost time 0.10796856880187988 s -INFO 06-24 20:44:04 [manager.py:68] detokenization recv req id 8 cost time 0.10987067222595215 s -DEBUG 06-24 20:44:04 [manager.py:391] Prefill Batch: batch_id=322557533967463232520268152032680585279, time:1750769044.3729513s req_ids:[8] -DEBUG 06-24 20:44:04 [manager.py:391] -ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:212.50200271606445ms total_cost_time:212.55922317504883ms,out_token_counter:1 mean_per_token_cost_time: 0.057220458984375ms prompt_token_num:15597 prompt_cache_len:5151 prompt_cache_ratio:0.3302558184266205 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 -DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:04 [batch.py:51] router release req id 8 -INFO 06-24 20:44:04 [manager.py:224] router recive req id 8 cost time 0.10915160179138184 s -INFO 06-24 20:44:04 [manager.py:68] detokenization recv req id 8 cost time 0.11124968528747559 s -DEBUG 06-24 20:44:04 [manager.py:391] Prefill Batch: batch_id=312329460033288899770323793729315839120, time:1750769044.5944364s req_ids:[8] -DEBUG 06-24 20:44:04 [manager.py:391] -ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:213.4549617767334ms total_cost_time:213.4988307952881ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15598 prompt_cache_len:5151 prompt_cache_ratio:0.33023464546736764 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 -DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:04 [batch.py:51] router release req id 8 -INFO 06-24 20:44:04 [manager.py:224] router recive req id 8 cost time 0.1076958179473877 s -INFO 06-24 20:44:04 [manager.py:68] detokenization recv req id 8 cost time 0.10963559150695801 s -DEBUG 06-24 20:44:04 [manager.py:391] Prefill Batch: batch_id=79843873979181730478863219092620128921, time:1750769044.8120863s req_ids:[8] -DEBUG 06-24 20:44:04 [manager.py:391] -ERROR 06-24 20:44:04 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:04 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:04 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:213.21964263916016ms total_cost_time:213.26470375061035ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:15599 prompt_cache_len:5151 prompt_cache_ratio:0.33021347522277067 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:04 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 -DEBUG 06-24 20:44:04 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:04 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:04 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:04 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:04 [batch.py:51] router release req id 8 -INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.10822296142578125 s -INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11021590232849121 s -DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=139948103148018515538760352719222388026, time:1750769045.032056s req_ids:[8] -DEBUG 06-24 20:44:05 [manager.py:391] -ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:04 lightllm_req_id:8 first_token_cost:215.45767784118652ms total_cost_time:215.5015468597412ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15600 prompt_cache_len:5151 prompt_cache_ratio:0.3301923076923077 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 -DEBUG 06-24 20:44:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:05 [batch.py:51] router release req id 8 -INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.1096811294555664 s -INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11170363426208496 s -DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=330887729930812472779024537647204646611, time:1750769045.2546244s req_ids:[8] -DEBUG 06-24 20:44:05 [manager.py:391] -ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:381.21843338012695ms total_cost_time:381.26659393310547ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:15601 prompt_cache_len:5151 prompt_cache_ratio:0.3301711428754567 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 -DEBUG 06-24 20:44:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:05 [batch.py:51] router release req id 8 -INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.1088416576385498 s -INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11067390441894531 s -DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=333422561239658681084997008958819542978, time:1750769045.643798s req_ids:[8] -DEBUG 06-24 20:44:05 [manager.py:391] -ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:219.23518180847168ms total_cost_time:219.29216384887695ms,out_token_counter:1 mean_per_token_cost_time: 0.05698204040527344ms prompt_token_num:15602 prompt_cache_len:5151 prompt_cache_ratio:0.33014998077169594 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 -DEBUG 06-24 20:44:05 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:05 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:05 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:05 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:05 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:05 [batch.py:51] router release req id 8 -INFO 06-24 20:44:05 [manager.py:224] router recive req id 8 cost time 0.11032629013061523 s -INFO 06-24 20:44:05 [manager.py:68] detokenization recv req id 8 cost time 0.11219000816345215 s -DEBUG 06-24 20:44:05 [manager.py:391] Prefill Batch: batch_id=325538447371627327855414416757787265963, time:1750769045.8725553s req_ids:[8] -DEBUG 06-24 20:44:05 [manager.py:391] -ERROR 06-24 20:44:05 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:05 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:05 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:216.50314331054688ms total_cost_time:216.56346321105957ms,out_token_counter:1 mean_per_token_cost_time: 0.06031990051269531ms prompt_token_num:15603 prompt_cache_len:5151 prompt_cache_ratio:0.33012882138050376 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:05 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 -DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:06 [batch.py:51] router release req id 8 -INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.10970854759216309 s -INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.1116495132446289 s -DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=294196879378598490746288246174432098368, time:1750769046.0933654s req_ids:[8] -DEBUG 06-24 20:44:06 [manager.py:391] -ERROR 06-24 20:44:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:05 lightllm_req_id:8 first_token_cost:217.484712600708ms total_cost_time:217.5285816192627ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15604 prompt_cache_len:5151 prompt_cache_ratio:0.3301076647013586 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 -DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:06 [batch.py:51] router release req id 8 -INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.10951852798461914 s -INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.11154556274414062 s -DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=85075338928227049722017323278502291142, time:1750769046.3187554s req_ids:[8] -DEBUG 06-24 20:44:06 [manager.py:391] -ERROR 06-24 20:44:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 first_token_cost:221.36449813842773ms total_cost_time:221.40789031982422ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:15605 prompt_cache_len:5151 prompt_cache_ratio:0.3300865107337392 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 -DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:06 [batch.py:51] router release req id 8 -INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.1085820198059082 s -INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.11063885688781738 s -DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=285285321113052875781959915141359871321, time:1750769046.5466163s req_ids:[8] -DEBUG 06-24 20:44:06 [manager.py:391] -ERROR 06-24 20:44:06 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:06 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:06 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 first_token_cost:390.86270332336426ms total_cost_time:390.90704917907715ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15606 prompt_cache_len:5151 prompt_cache_ratio:0.3300653594771242 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:06 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 -DEBUG 06-24 20:44:06 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:06 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:06 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:06 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:06 [batch.py:51] router release req id 8 -INFO 06-24 20:44:06 [manager.py:224] router recive req id 8 cost time 0.10804033279418945 s -INFO 06-24 20:44:06 [manager.py:68] detokenization recv req id 8 cost time 0.11003613471984863 s -DEBUG 06-24 20:44:06 [manager.py:391] Prefill Batch: batch_id=335833418022450136740304770396554491188, time:1750769046.9432716s req_ids:[8] -DEBUG 06-24 20:44:06 [manager.py:391] -DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 20:44:06 [manager.py:248] dp_i 0 token used ratio: 0.9425329428989752 contain prompt cache tree unrefed token -ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:06 lightllm_req_id:8 first_token_cost:215.1169776916504ms total_cost_time:215.16132354736328ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15607 prompt_cache_len:5151 prompt_cache_ratio:0.3300442109309925 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 -DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:07 [batch.py:51] router release req id 8 -INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10828161239624023 s -INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.11024904251098633 s -DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=113670200968217977586127072351317721349, time:1750769047.1649494s req_ids:[8] -DEBUG 06-24 20:44:07 [manager.py:391] -ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:217.74983406066895ms total_cost_time:217.79417991638184ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15608 prompt_cache_len:5151 prompt_cache_ratio:0.33002306509482315 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 -DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:07 [batch.py:51] router release req id 8 -INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10779404640197754 s -INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.10955476760864258 s -DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=265541947445309822006754916157873628141, time:1750769047.3889048s req_ids:[8] -DEBUG 06-24 20:44:07 [manager.py:391] -ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:173.54249954223633ms total_cost_time:173.5851764678955ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:15609 prompt_cache_len:5151 prompt_cache_ratio:0.3300019219680953 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 -DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:07 [batch.py:51] router release req id 8 -INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10896611213684082 s -INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.11059355735778809 s -DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=193005792664032784804455577603626392482, time:1750769047.570233s req_ids:[8] -DEBUG 06-24 20:44:07 [manager.py:391] -ERROR 06-24 20:44:07 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:07 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:07 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:174.96323585510254ms total_cost_time:175.00758171081543ms,out_token_counter:1 mean_per_token_cost_time: 0.044345855712890625ms prompt_token_num:15610 prompt_cache_len:5151 prompt_cache_ratio:0.32998078155028826 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:07 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 -DEBUG 06-24 20:44:07 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:07 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:07 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:07 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:07 [batch.py:51] router release req id 8 -INFO 06-24 20:44:07 [manager.py:224] router recive req id 8 cost time 0.10877060890197754 s -INFO 06-24 20:44:07 [manager.py:68] detokenization recv req id 8 cost time 0.11060094833374023 s -DEBUG 06-24 20:44:07 [manager.py:391] Prefill Batch: batch_id=321716917940814754644547379286405604032, time:1750769047.7514484s req_ids:[8] -DEBUG 06-24 20:44:07 [manager.py:391] -ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:07 lightllm_req_id:8 first_token_cost:379.25219535827637ms total_cost_time:379.2986869812012ms,out_token_counter:1 mean_per_token_cost_time: 0.04649162292480469ms prompt_token_num:15611 prompt_cache_len:5151 prompt_cache_ratio:0.32995964384088144 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 -DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:08 [batch.py:51] router release req id 8 -INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10910964012145996 s -INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.11100554466247559 s -DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=316127366364544474139136975786156098144, time:1750769048.1380901s req_ids:[8] -DEBUG 06-24 20:44:08 [manager.py:391] -ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:216.7067527770996ms total_cost_time:216.7503833770752ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15612 prompt_cache_len:5151 prompt_cache_ratio:0.3299385088393543 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 -DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:08 [batch.py:51] router release req id 8 -INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10815620422363281 s -INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.11016511917114258 s -DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=45763653949086645735861526297465872720, time:1750769048.3611634s req_ids:[8] -DEBUG 06-24 20:44:08 [manager.py:391] -ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:213.98210525512695ms total_cost_time:214.02406692504883ms,out_token_counter:1 mean_per_token_cost_time: 0.041961669921875ms prompt_token_num:15613 prompt_cache_len:5151 prompt_cache_ratio:0.3299173765451867 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 -DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:08 [batch.py:51] router release req id 8 -INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10932779312133789 s -INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.11132931709289551 s -DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=268533939464099257755510652993975488838, time:1750769048.5812194s req_ids:[8] -DEBUG 06-24 20:44:08 [manager.py:391] -ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:213.94586563110352ms total_cost_time:213.9897346496582ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:15614 prompt_cache_len:5151 prompt_cache_ratio:0.32989624695785835 mtp_avg_token_per_step:1.0 -INFO 06-24 20:44:08 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 -DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:08 [batch.py:51] router release req id 8 -INFO 06-24 20:44:08 [manager.py:224] router recive req id 8 cost time 0.10983800888061523 s -INFO 06-24 20:44:08 [manager.py:68] detokenization recv req id 8 cost time 0.1118314266204834 s -DEBUG 06-24 20:44:08 [manager.py:391] Prefill Batch: batch_id=45696873797961603328040586443287464642, time:1750769048.802643s req_ids:[8] -DEBUG 06-24 20:44:08 [manager.py:391] -ERROR 06-24 20:44:08 [decode_impl.py:126] req_id: 8 forced to finished, it not in g_success_kv_move_task_cache -INFO 06-24 20:44:08 [manager.py:162] detoken release req id 8 -INFO 06-24 20:44:08 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:44:08 lightllm_req_id:8 first_token_cost:211.9007110595703ms total_cost_time:211.9443416595459ms,out_token_counter:1 mean_per_token_cost_time: 0.04363059997558594ms prompt_token_num:15615 prompt_cache_len:5151 prompt_cache_ratio:0.32987512007684916 mtp_avg_token_per_step:1.0 -DEBUG 06-24 20:44:08 [req_manager.py:78] freed all request size 1008 -DEBUG 06-24 20:44:08 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix refed token num 0 -DEBUG 06-24 20:44:08 [infer_batch.py:156] radix hold token num 15450 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager can alloc token num 942 -DEBUG 06-24 20:44:08 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:44:08 [batch.py:51] router release req id 8 -INFO 06-24 20:44:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:44:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:44:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:44:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:44:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:44:37 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:44:37 [manager.py:283] -DEBUG 06-24 20:44:37 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:44:37 [manager.py:284] -INFO 06-24 20:44:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:44:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:44:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:44:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:44:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:44:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:45:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:45:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:45:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:45:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:45:38 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:45:38 [manager.py:283] -DEBUG 06-24 20:45:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:45:38 [manager.py:284] -INFO 06-24 20:45:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:45:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:45:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:45:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:46:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:46:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:46:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:46:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:46:38 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:46:38 [manager.py:283] -DEBUG 06-24 20:46:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:46:38 [manager.py:284] -INFO 06-24 20:46:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:46:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:46:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:46:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:46:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:46:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:47:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:47:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:47:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:47:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:47:39 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:47:39 [manager.py:283] -DEBUG 06-24 20:47:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:47:39 [manager.py:284] -INFO 06-24 20:47:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:47:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:47:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:47:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:47:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:47:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:48:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:48:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:48:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:48:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:48:40 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:48:40 [manager.py:283] -DEBUG 06-24 20:48:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:48:40 [manager.py:284] -INFO 06-24 20:48:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:48:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:48:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:48:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:48:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:48:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:49:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:49:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:49:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:49:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:49:40 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:49:40 [manager.py:283] -DEBUG 06-24 20:49:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:49:40 [manager.py:284] -INFO 06-24 20:49:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:49:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:49:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:49:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:50:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:50:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:50:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:50:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:50:41 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:50:41 [manager.py:283] -DEBUG 06-24 20:50:41 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:50:41 [manager.py:284] -INFO 06-24 20:50:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:50:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:50:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:50:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:50:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:50:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:51:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:51:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:51:42 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:51:42 [manager.py:283] -DEBUG 06-24 20:51:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:51:42 [manager.py:284] -INFO 06-24 20:51:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:51:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:51:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:51:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:51:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:51:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:52:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:52:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:52:42 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:52:42 [manager.py:283] -DEBUG 06-24 20:52:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:52:42 [manager.py:284] -INFO 06-24 20:52:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:52:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:52:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:52:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:52:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:52:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:53:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:53:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:53:43 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:53:43 [manager.py:283] -DEBUG 06-24 20:53:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:53:43 [manager.py:284] -INFO 06-24 20:53:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:53:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:53:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:53:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:53:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:53:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:54:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:54:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:54:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:54:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:54:44 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:54:44 [manager.py:283] -DEBUG 06-24 20:54:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:54:44 [manager.py:284] -INFO 06-24 20:54:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:54:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:54:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:54:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:54:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:54:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:55:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:55:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:55:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:55:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:55:44 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:55:44 [manager.py:283] -DEBUG 06-24 20:55:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:55:44 [manager.py:284] -INFO 06-24 20:55:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:55:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:55:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:55:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:55:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:55:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:56:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:56:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:56:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:56:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:56:45 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:56:45 [manager.py:283] -DEBUG 06-24 20:56:45 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:56:45 [manager.py:284] -INFO 06-24 20:56:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:56:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:56:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:56:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:56:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:56:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:57:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:57:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:57:46 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:57:46 [manager.py:283] -DEBUG 06-24 20:57:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:57:46 [manager.py:284] -INFO 06-24 20:57:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:57:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:57:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:57:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:57:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:57:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:58:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:58:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:58:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:58:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 20:58:46 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:58:46 [manager.py:283] -DEBUG 06-24 20:58:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:58:46 [manager.py:284] -INFO 06-24 20:58:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:58:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:58:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:58:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:58:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:58:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:59:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:59:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 20:59:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 20:59:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:59:47 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:59:47 [manager.py:283] -DEBUG 06-24 20:59:47 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:59:47 [manager.py:284] -INFO 06-24 20:59:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:59:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 20:59:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:00:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:00:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:00:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:00:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:00:48 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:00:48 [manager.py:283] -DEBUG 06-24 21:00:48 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:00:48 [manager.py:284] -INFO 06-24 21:00:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:00:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:00:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:00:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:00:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:01:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:01:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:01:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:01:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:01:49 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:01:49 [manager.py:283] -DEBUG 06-24 21:01:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:01:49 [manager.py:284] -INFO 06-24 21:01:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:01:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:01:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:01:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:01:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:02:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:02:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:02:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:02:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:02:49 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:02:49 [manager.py:283] -DEBUG 06-24 21:02:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:02:49 [manager.py:284] -INFO 06-24 21:02:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:02:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:02:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:03:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:03:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:03:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:03:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:03:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:03:50 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:03:50 [manager.py:283] -DEBUG 06-24 21:03:50 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:03:50 [manager.py:284] -INFO 06-24 21:03:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:03:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:03:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:03:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:03:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:04:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:04:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:04:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:04:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:04:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:04:51 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:04:51 [manager.py:283] -DEBUG 06-24 21:04:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:04:51 [manager.py:284] -INFO 06-24 21:04:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:04:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:04:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:04:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:04:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:05:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:05:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:05:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:05:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:05:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:05:51 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:05:51 [manager.py:283] -DEBUG 06-24 21:05:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:05:51 [manager.py:284] -INFO 06-24 21:05:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:05:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:05:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:05:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:05:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:06:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:06:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:06:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:06:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:06:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:06:52 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:06:52 [manager.py:283] -DEBUG 06-24 21:06:52 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:06:52 [manager.py:284] -INFO 06-24 21:06:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:06:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:06:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:06:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:06:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:07:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:07:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:07:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:07:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:07:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:07:53 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:07:53 [manager.py:283] -DEBUG 06-24 21:07:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:07:53 [manager.py:284] -INFO 06-24 21:07:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:07:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:07:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:07:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:07:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:08:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:08:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:08:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:08:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:08:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:08:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:08:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -DEBUG 06-24 21:08:53 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:08:53 [manager.py:283] -DEBUG 06-24 21:08:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:08:53 [manager.py:284] -INFO 06-24 21:08:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:08:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:09:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:09:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:09:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:09:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:09:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:09:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:09:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:09:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:09:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:09:54 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:09:54 [manager.py:283] -DEBUG 06-24 21:09:54 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:09:54 [manager.py:284] -INFO 06-24 21:09:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:10:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:10:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:10:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:10:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:10:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:10:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:10:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:10:55 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:10:55 [manager.py:283] -DEBUG 06-24 21:10:55 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:10:55 [manager.py:284] -INFO 06-24 21:10:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:11:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:11:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:11:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:11:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:11:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:11:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:11:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:11:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:11:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:11:55 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:11:55 [manager.py:283] -DEBUG 06-24 21:11:55 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:11:55 [manager.py:284] -INFO 06-24 21:11:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:12:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:12:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:12:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:12:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:12:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:12:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:12:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:12:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -DEBUG 06-24 21:12:56 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:12:56 [manager.py:283] -DEBUG 06-24 21:12:56 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:12:56 [manager.py:284] -INFO 06-24 21:12:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:13:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:13:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:13:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:13:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:13:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:13:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:13:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:13:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:13:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:13:57 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:13:57 [manager.py:283] -DEBUG 06-24 21:13:57 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:13:57 [manager.py:284] -INFO 06-24 21:13:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:14:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:14:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:14:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:14:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:14:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:14:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:14:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:14:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:14:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:14:57 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:14:57 [manager.py:283] -DEBUG 06-24 21:14:57 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:14:57 [manager.py:284] -INFO 06-24 21:14:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:15:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:15:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:15:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:15:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:15:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:15:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:15:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:15:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:15:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:15:58 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:15:58 [manager.py:283] -DEBUG 06-24 21:15:58 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:15:58 [manager.py:284] -INFO 06-24 21:15:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:16:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:16:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:16:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:16:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:16:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:16:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:16:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:16:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:16:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:16:59 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:16:59 [manager.py:283] -DEBUG 06-24 21:16:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:16:59 [manager.py:284] -INFO 06-24 21:17:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:17:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:17:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:17:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:17:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:17:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:17:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:17:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:17:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:17:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:17:59 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:17:59 [manager.py:283] -DEBUG 06-24 21:17:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:17:59 [manager.py:284] -INFO 06-24 21:18:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:18:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:18:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:18:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:18:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:18:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:18:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:18:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:19:00 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:19:00 [manager.py:283] -DEBUG 06-24 21:19:00 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:19:00 [manager.py:284] -INFO 06-24 21:19:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:19:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:19:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:19:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:19:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:19:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:19:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:19:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:19:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:19:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:20:01 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:20:01 [manager.py:283] -DEBUG 06-24 21:20:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:20:01 [manager.py:284] -INFO 06-24 21:20:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:20:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:20:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:20:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:20:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:20:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:20:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:20:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:20:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:20:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:21:01 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:21:01 [manager.py:283] -DEBUG 06-24 21:21:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:21:01 [manager.py:284] -INFO 06-24 21:21:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:21:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:21:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:21:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:21:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:21:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:21:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:21:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:21:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:21:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:22:02 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:22:02 [manager.py:283] -DEBUG 06-24 21:22:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:22:02 [manager.py:284] -INFO 06-24 21:22:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:22:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:22:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:22:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:22:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:22:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:22:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:22:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:22:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:22:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:23:03 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:23:03 [manager.py:283] -DEBUG 06-24 21:23:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:23:03 [manager.py:284] -INFO 06-24 21:23:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:23:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:23:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:23:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:23:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:23:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:23:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:23:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:23:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:23:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:24:03 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:24:03 [manager.py:283] -DEBUG 06-24 21:24:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:24:03 [manager.py:284] -INFO 06-24 21:24:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:24:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:24:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:24:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:24:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:24:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:24:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:24:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:24:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:24:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:25:04 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:25:04 [manager.py:283] -DEBUG 06-24 21:25:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:25:04 [manager.py:284] -INFO 06-24 21:25:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:25:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:25:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:25:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:25:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:25:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:25:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:25:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:25:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:25:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:26:05 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:26:05 [manager.py:283] -DEBUG 06-24 21:26:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:26:05 [manager.py:284] -INFO 06-24 21:26:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:26:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:26:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:26:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:26:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:26:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:26:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:26:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:26:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:26:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:27:05 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:27:05 [manager.py:283] -DEBUG 06-24 21:27:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:27:05 [manager.py:284] -INFO 06-24 21:27:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:27:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:27:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:27:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:27:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:27:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:27:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:27:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:27:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:27:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:28:06 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:28:06 [manager.py:283] -DEBUG 06-24 21:28:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:28:06 [manager.py:284] -INFO 06-24 21:28:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:28:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:28:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:28:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:28:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:28:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:28:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:28:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:29:07 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:29:07 [manager.py:283] -DEBUG 06-24 21:29:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:29:07 [manager.py:284] -INFO 06-24 21:29:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:29:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:29:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:29:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:29:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:29:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:29:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:29:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:29:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:29:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:30:07 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:30:07 [manager.py:283] -DEBUG 06-24 21:30:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:30:07 [manager.py:284] -INFO 06-24 21:30:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:30:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:30:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:30:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:30:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:30:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:30:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:30:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:30:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:30:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:31:08 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:31:08 [manager.py:283] -DEBUG 06-24 21:31:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:31:08 [manager.py:284] -INFO 06-24 21:31:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:31:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:31:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:31:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:31:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:31:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:31:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:31:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:32:09 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:32:09 [manager.py:283] -DEBUG 06-24 21:32:09 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:32:09 [manager.py:284] -INFO 06-24 21:32:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:32:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:32:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:32:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:32:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:32:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:32:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:32:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:32:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:32:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:33:10 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:33:10 [manager.py:283] -DEBUG 06-24 21:33:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:33:10 [manager.py:284] -INFO 06-24 21:33:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:33:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:33:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:33:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:33:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:33:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:33:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:33:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:33:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:33:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:34:10 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:34:10 [manager.py:283] -DEBUG 06-24 21:34:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:34:10 [manager.py:284] -INFO 06-24 21:34:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:34:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:34:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:34:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:34:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:34:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:34:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:34:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:34:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:34:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:35:11 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:35:11 [manager.py:283] -DEBUG 06-24 21:35:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:35:11 [manager.py:284] -INFO 06-24 21:35:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:35:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:35:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:35:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:35:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:35:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:35:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:35:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:36:12 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:36:12 [manager.py:283] -DEBUG 06-24 21:36:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:36:12 [manager.py:284] -INFO 06-24 21:36:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:36:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:36:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:36:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:36:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:36:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:36:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:36:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:36:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:36:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:37:12 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:37:12 [manager.py:283] -DEBUG 06-24 21:37:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:37:12 [manager.py:284] -INFO 06-24 21:37:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:37:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:37:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:37:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:37:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:37:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:37:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:37:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:37:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:37:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:38:13 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:38:13 [manager.py:283] -DEBUG 06-24 21:38:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:38:13 [manager.py:284] -INFO 06-24 21:38:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:38:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:38:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:38:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:38:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:38:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:38:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:38:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:38:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:38:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:39:14 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:39:14 [manager.py:283] -DEBUG 06-24 21:39:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:39:14 [manager.py:284] -INFO 06-24 21:39:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:39:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:39:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:39:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:39:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:39:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:39:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:39:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:39:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:39:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:40:14 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:40:14 [manager.py:283] -DEBUG 06-24 21:40:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:40:14 [manager.py:284] -INFO 06-24 21:40:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:40:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:40:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:40:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:40:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:40:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:40:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:40:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:41:15 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:41:15 [manager.py:283] -DEBUG 06-24 21:41:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:41:15 [manager.py:284] -INFO 06-24 21:41:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:41:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:41:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:41:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:41:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:41:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:41:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:41:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:42:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:42:16 [manager.py:283] -DEBUG 06-24 21:42:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:42:16 [manager.py:284] -INFO 06-24 21:42:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:42:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:42:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:42:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:42:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:42:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:42:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:42:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:43:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:43:16 [manager.py:283] -DEBUG 06-24 21:43:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:43:16 [manager.py:284] -INFO 06-24 21:43:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:43:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:43:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:43:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:43:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:43:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:43:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:43:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:43:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:43:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:44:17 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:44:17 [manager.py:283] -DEBUG 06-24 21:44:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:44:17 [manager.py:284] -INFO 06-24 21:44:17 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:44:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:44:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:44:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:44:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:47 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:44:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:44:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:44:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:45:17 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:45:17 [manager.py:283] -DEBUG 06-24 21:45:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:45:17 [manager.py:284] -INFO 06-24 21:45:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:45:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:45:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:45:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:45:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:45:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:45:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:45:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:45:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:45:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:46:18 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:46:18 [manager.py:283] -DEBUG 06-24 21:46:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:46:18 [manager.py:284] -INFO 06-24 21:46:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:46:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:46:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:46:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:46:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:46:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:46:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:46:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:46:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:47:18 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:47:18 [manager.py:283] -DEBUG 06-24 21:47:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:47:18 [manager.py:284] -INFO 06-24 21:47:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:47:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:47:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:47:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:39 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:46 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:47:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:47:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:47:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:47:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:47:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:48:19 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:48:19 [manager.py:283] -DEBUG 06-24 21:48:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:48:19 [manager.py:284] -INFO 06-24 21:48:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:48:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:48:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:48:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:48:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:53 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:48:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:48:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:48:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:48:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:00 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:07 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:14 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:49:20 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:49:20 [manager.py:283] -DEBUG 06-24 21:49:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:49:20 [manager.py:284] -INFO 06-24 21:49:21 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:49:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:49:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:49:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:28 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:35 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:42 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:49:49 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:49:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:49:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:49:56 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:49:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:03 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:10 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:17 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:50:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:50:20 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:50:20 [manager.py:283] -DEBUG 06-24 21:50:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:50:20 [manager.py:284] -INFO 06-24 21:50:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:50:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:50:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:50:24 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:31 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:38 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:45 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:50:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:52 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:50:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:50:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:50:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:50:59 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:06 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:13 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:51:20 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:51:21 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:51:21 [manager.py:283] -DEBUG 06-24 21:51:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:51:21 [manager.py:284] -INFO 06-24 21:51:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:51:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:51:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:51:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:27 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:34 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:41 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:51:48 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:51:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:51:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:51:55 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:51:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:02 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:09 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:16 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:52:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:52:21 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:52:21 [manager.py:283] -DEBUG 06-24 21:52:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:52:21 [manager.py:284] -INFO 06-24 21:52:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:52:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:52:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:52:23 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:30 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:37 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:44 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:52:51 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:52:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:52:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:52:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:52:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:53:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:53:22 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:53:22 [manager.py:283] -DEBUG 06-24 21:53:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:53:22 [manager.py:284] -INFO 06-24 21:53:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:53:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:53:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:53:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:53:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:53:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:53:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:53:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:53:58 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:05 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:12 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:54:19 [decode_infer_rpyc.py:178] kv time out reqs: [] -ERROR 06-24 21:54:19 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:19 [pd_loop.py:121] no close frame received or sent -ERROR 06-24 21:54:19 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task -ERROR 06-24 21:54:19 [pd_loop.py:121] recv_bytes = await websocket.recv() -ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv -ERROR 06-24 21:54:19 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc -ERROR 06-24 21:54:19 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent -INFO 06-24 21:54:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -DEBUG 06-24 21:54:23 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:54:23 [manager.py:283] -DEBUG 06-24 21:54:23 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:54:23 [manager.py:284] -INFO 06-24 21:54:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:54:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:54:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:54:26 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:29 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:29 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:29 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:29 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:29 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:29 [pd_loop.py:121] return await self -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:29 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:29 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:29 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:54:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:33 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:39 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:39 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:39 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:39 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:39 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:39 [pd_loop.py:121] return await self -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:39 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:39 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:39 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:54:40 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:43 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:47 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:48 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:54:49 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:49 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:49 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:49 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:49 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:49 [pd_loop.py:121] return await self -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:49 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:49 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:49 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:54:50 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:53 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:54:53 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:54:53 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -INFO 06-24 21:54:54 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:57 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:54:59 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:59 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:59 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:59 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:59 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:59 [pd_loop.py:121] return await self -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:59 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:59 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:59 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:55:01 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:04 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:08 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:09 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:55:09 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:55:09 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:55:09 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:55:09 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:55:09 [pd_loop.py:121] return await self -ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:55:09 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:55:09 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:55:09 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:55:09 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:55:09 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:55:09 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:55:09 [decode_kv_move_manager.py:206] connect id f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df disconnect -ERROR 06-24 21:55:09 [decode_trans_obj.py:180] put_to_radix_loop thread quit, info: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 -ERROR 06-24 21:55:09 [decode_trans_obj.py:136] kv_move_loop thread quit -ERROR 06-24 21:55:09 [decode_trans_obj.py:226] trans obj del start, info: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 -ERROR 06-24 21:55:09 [decode_trans_obj.py:249] trans obj deled, info: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 -INFO 06-24 21:55:09 [decode_trans_process.py:123] destory PDTransLeaveInfo(decode_id=147275795944234129756100418482494441380, prefill_id=163479035537597727162519172725806046247, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') nccl communicator. -INFO 06-24 21:55:11 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:15 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:18 [up_status.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:55:18 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:19 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:55:19 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:55:19 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:55:19 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:55:19 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:55:19 [pd_loop.py:121] return await self -ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:55:19 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:55:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:55:19 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:55:19 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:55:19 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:55:19 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:55:22 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:23 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:55:23 [statics_utils.py:24] mean first cost: 233.53787300822012 ms -INFO 06-24 21:55:23 [statics_utils.py:24] mean per token cost: 0.05672142331302719 ms -DEBUG 06-24 21:55:23 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:55:23 [manager.py:283] -DEBUG 06-24 21:55:23 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:55:23 [manager.py:284] -INFO 06-24 21:55:25 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:29 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:29 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:55:29 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:55:29 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:55:29 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:55:29 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:55:29 [pd_loop.py:121] return await self -ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:55:29 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:55:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:55:29 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:55:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:55:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:55:29 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:55:32 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:36 [decode_infer_rpyc.py:178] kv time out reqs: [] -INFO 06-24 21:55:38 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... - -ERROR 06-24 21:55:38 [decode_kv_move_manager.py:301] -Traceback (most recent call last): - File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/decode_node_impl/decode_kv_move_manager.py", line 299, in timer_loop - time.sleep(3.5) -KeyboardInterrupt -INFO 06-24 21:55:38 [start_utils.py:106] Killing child process 1214345 -INFO 06-24 21:55:38 [start_utils.py:106] Killing child process 1214699 -INFO 06-24 21:55:38 [start_utils.py:108] Killing parent process 1214339 -INFO 06-24 21:55:38 [start_utils.py:53] Killing parent process 1212075 -INFO 06-24 21:55:38 [start_utils.py:51] Killing child process 1213381 -INFO 06-24 21:55:38 [start_utils.py:51] Killing child process 1213863 -INFO 06-24 21:55:38 [start_utils.py:51] Killing child process 1213865 -INFO 06-24 21:55:38 [start_utils.py:53] Killing parent process 1212190 -INFO 06-24 21:55:39 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 21:55:39 [start_utils.py:108] Killing parent process 1214339 -INFO 06-24 21:55:39 [start_utils.py:53] Killing parent process 1212190 -INFO 06-24 21:55:39 [start_utils.py:53] Killing parent process 1212191 -INFO 06-24 21:55:39 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 21:55:39 [start_utils.py:108] Killing parent process 1214339 -WARNING 06-24 21:55:39 [start_utils.py:56] Process 1212191 does not exist. -INFO 06-24 21:55:39 [start_utils.py:69] All processes terminated gracefully. -INFO 06-24 21:55:39 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_master.log b/pd_master.log deleted file mode 100644 index 0dd78a2f7..000000000 --- a/pd_master.log +++ /dev/null @@ -1,4529 +0,0 @@ -INFO 06-24 21:55:19 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:55:20 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:55:21 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:55:23 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:55:23 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:55:23 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -[api_server] pd_master_start -[pd_master_start] pd_chunk_size: 0 -INFO 06-24 21:55:23 [api_start.py:344] use tgi api: False -INFO 06-24 21:55:23 [api_start.py:345] all start args:Namespace(run_mode='pd_master', host='127.0.1.1', port=60011, httpserver_workers=1, zmq_mode='ipc:///tmp/', pd_master_ip='0.0.0.0', pd_master_port=1212, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=None, mem_fraction=0.9, batch_max_tokens=None, eos_id=None, tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=28765, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type=None, return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=None, visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=0, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, pd_node_id=0) -INFO 06-24 21:55:25 [start_utils.py:37] init func start_metric_manager : init ok -INFO 06-24 21:55:25 [api_start.py:57] start process pid 1410832 -INFO 06-24 21:55:25 [api_start.py:58] http server pid 1411519 -INFO 06-24 21:55:28 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:55:29 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:55:30 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:55:32 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:55:32 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:55:32 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:55:32 [api_http.py:326] server start up -INFO 06-24 21:55:32 [atomic_lock.py:29] link lock shm 60011_pd_master_req_id_gen_lock -INFO 06-24 21:55:33 [api_http.py:330] server start up ok, loop use is -INFO 06-24 21:56:03 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 21:56:03 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 21:56:28 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 33900 -INFO 06-24 21:56:28 [api_http.py:268] recieved regist_json {'node_id': 287595743282619216970276961428881885738, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10216, 'detokenization_port': 10253, 'detokenization_pub_port': 10154, 'visual_port': 10095, 'audio_port': 10138, 'cache_port': 10064, 'metric_port': 10217, 'pd_node_infer_rpyc_ports': [10133], 'pd_node_id': 287595743282619216970276961428881885738, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 21:56:28 [manager.py:67] mode: prefill url: 127.0.1.1:8017 registed -INFO 06-24 21:56:36 [api_http.py:291] kv_move_status Client connected from IP: 127.0.0.1, Port: 39458 -INFO 06-24 21:56:44 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 48452 -INFO 06-24 21:56:44 [api_http.py:268] recieved regist_json {'node_id': 148730891575017957868136796871489876076, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10011, 'detokenization_port': 10239, 'detokenization_pub_port': 10144, 'visual_port': 10176, 'audio_port': 10271, 'cache_port': 10117, 'metric_port': 10125, 'pd_node_infer_rpyc_ports': [10126], 'pd_node_id': 148730891575017957868136796871489876076, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 21:56:44 [manager.py:67] mode: decode url: 127.0.1.1:8118 registed -INFO 06-24 21:57:03 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 21:57:03 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:8 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:16 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:24 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:32 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:40 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:48 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:56 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:64 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:72 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:80 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:88 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:96 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:104 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:112 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:120 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:128 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:136 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:144 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:152 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:160 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:168 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:176 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:184 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:192 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:200 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:208 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:216 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:224 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:232 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:240 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:248 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:256 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:264 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:272 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:280 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:288 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:296 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:304 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:312 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:320 -INFO 06-24 21:57:15 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:328 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:336 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:344 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:352 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:360 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:368 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:376 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:384 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:392 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:400 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:408 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:416 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:424 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:432 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:440 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:448 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:456 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:464 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:472 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:480 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:488 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:496 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:504 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:512 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:520 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:528 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:536 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:544 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:552 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:560 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:568 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:576 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:584 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:592 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:600 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:608 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:616 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:624 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:632 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:640 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:648 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:656 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:664 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:672 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:680 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:688 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:696 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:704 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:712 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:720 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:728 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:736 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:744 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:752 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:760 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:768 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:776 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:784 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:792 -INFO 06-24 21:57:16 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:800 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14560, pd_chunk_size: 0 -INFO 06-24 21:57:33 [statics_utils.py:24] mean first cost: 9286.72189950943 ms -INFO 06-24 21:57:33 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 21:58:03 [statics_utils.py:24] mean first cost: 9286.72189950943 ms -INFO 06-24 21:58:03 [statics_utils.py:24] mean per token cost: 0.0 ms -WARNING 06-24 21:58:17 [manager.py:236] group_request_id: 8 kv move time out err, server is busy now. -ERROR 06-24 21:58:17 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:17 [manager.py:349] aborted group_request_id 8 -ERROR 06-24 21:58:17 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:17 [api_http.py:183] await fut -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:17 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:17 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:17 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:17 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:17 [api_http.py:183] raise e -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:17 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:17 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46184 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:17 [manager.py:236] group_request_id: 24 kv move time out err, server is busy now. -ERROR 06-24 21:58:17 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:17 [manager.py:349] aborted group_request_id 24 -ERROR 06-24 21:58:17 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:17 [api_http.py:183] await fut -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:17 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:17 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:17 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:17 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:17 [api_http.py:183] raise e -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:17 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:17 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46214 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:17 [manager.py:236] group_request_id: 32 kv move time out err, server is busy now. -ERROR 06-24 21:58:17 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:17 [manager.py:349] aborted group_request_id 32 -ERROR 06-24 21:58:17 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:17 [api_http.py:183] await fut -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:17 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:17 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:17 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:17 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:17 [api_http.py:183] -ERROR 06-24 21:58:17 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:17 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:17 [api_http.py:183] raise e -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:17 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:17 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:17 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:17 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46216 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:18 [manager.py:236] group_request_id: 40 kv move time out err, server is busy now. -ERROR 06-24 21:58:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:18 [manager.py:349] aborted group_request_id 40 -ERROR 06-24 21:58:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:18 [api_http.py:183] await fut -ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:18 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:18 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:18 [api_http.py:183] raise e -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:18 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46218 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:18 [manager.py:236] group_request_id: 48 kv move time out err, server is busy now. -ERROR 06-24 21:58:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:18 [manager.py:349] aborted group_request_id 48 -ERROR 06-24 21:58:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:18 [api_http.py:183] await fut -ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:18 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:18 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:18 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:18 [api_http.py:183] -ERROR 06-24 21:58:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:18 [api_http.py:183] raise e -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:18 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46234 - "POST /generate HTTP/1.1" 503 -INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:808 -INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:816 -INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:824 -INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:832 -INFO 06-24 21:58:18 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:840 -WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 56 kv move time out err, server is busy now. -ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 56 -ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:19 [api_http.py:183] await fut -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:19 [api_http.py:183] raise e -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46238 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 64 kv move time out err, server is busy now. -ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 64 -ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:19 [api_http.py:183] await fut -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:19 [api_http.py:183] raise e -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46252 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 72 kv move time out err, server is busy now. -ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 72 -ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:19 [api_http.py:183] await fut -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:19 [api_http.py:183] raise e -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46254 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 80 kv move time out err, server is busy now. -ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 80 -ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:19 [api_http.py:183] await fut -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:19 [api_http.py:183] raise e -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46262 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 88 kv move time out err, server is busy now. -ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 88 -ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:19 [api_http.py:183] await fut -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:19 [api_http.py:183] raise e -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46272 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 96 kv move time out err, server is busy now. -ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 96 -ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:19 [api_http.py:183] await fut -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:19 [api_http.py:183] raise e -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46282 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:19 [manager.py:236] group_request_id: 104 kv move time out err, server is busy now. -ERROR 06-24 21:58:19 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:19 [manager.py:349] aborted group_request_id 104 -ERROR 06-24 21:58:19 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:19 [api_http.py:183] await fut -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:19 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:19 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:19 [api_http.py:183] -ERROR 06-24 21:58:19 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:19 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:19 [api_http.py:183] raise e -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:19 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:19 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:19 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:19 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46294 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 112 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 112 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46300 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 120 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 120 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46310 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 128 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 128 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46324 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 136 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 136 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46340 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 144 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 144 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46348 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 152 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 152 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46362 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 160 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 160 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46376 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:20 [manager.py:236] group_request_id: 168 kv move time out err, server is busy now. -ERROR 06-24 21:58:20 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:20 [manager.py:349] aborted group_request_id 168 -ERROR 06-24 21:58:20 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:20 [api_http.py:183] await fut -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:20 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:20 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:20 [api_http.py:183] -ERROR 06-24 21:58:20 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:20 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:20 [api_http.py:183] raise e -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:20 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:20 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:20 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:20 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46386 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 176 kv move time out err, server is busy now. -ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 176 -ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:21 [api_http.py:183] await fut -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:21 [api_http.py:183] raise e -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46402 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 184 kv move time out err, server is busy now. -ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 184 -ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:21 [api_http.py:183] await fut -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:21 [api_http.py:183] raise e -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46414 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 192 kv move time out err, server is busy now. -ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 192 -ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:21 [api_http.py:183] await fut -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:21 [api_http.py:183] raise e -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46430 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 200 kv move time out err, server is busy now. -ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 200 -ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:21 [api_http.py:183] await fut -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:21 [api_http.py:183] raise e -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46436 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 208 kv move time out err, server is busy now. -ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 208 -ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:21 [api_http.py:183] await fut -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:21 [api_http.py:183] raise e -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46448 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 216 kv move time out err, server is busy now. -ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 216 -ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:21 [api_http.py:183] await fut -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:21 [api_http.py:183] raise e -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46456 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:21 [manager.py:236] group_request_id: 224 kv move time out err, server is busy now. -ERROR 06-24 21:58:21 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:21 [manager.py:349] aborted group_request_id 224 -ERROR 06-24 21:58:21 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:21 [api_http.py:183] await fut -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:21 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:21 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:21 [api_http.py:183] -ERROR 06-24 21:58:21 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:21 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:21 [api_http.py:183] raise e -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:21 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:21 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:21 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:21 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46472 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 232 kv move time out err, server is busy now. -ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 232 -ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:22 [api_http.py:183] await fut -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:22 [api_http.py:183] raise e -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46482 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 240 kv move time out err, server is busy now. -ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 240 -ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:22 [api_http.py:183] await fut -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:22 [api_http.py:183] raise e -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46494 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 248 kv move time out err, server is busy now. -ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 248 -ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:22 [api_http.py:183] await fut -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:22 [api_http.py:183] raise e -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46500 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 256 kv move time out err, server is busy now. -ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 256 -ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:22 [api_http.py:183] await fut -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:22 [api_http.py:183] raise e -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46506 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 264 kv move time out err, server is busy now. -ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 264 -ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:22 [api_http.py:183] await fut -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:22 [api_http.py:183] raise e -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46520 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 272 kv move time out err, server is busy now. -ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 272 -ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:22 [api_http.py:183] await fut -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:22 [api_http.py:183] raise e -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46532 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:22 [manager.py:236] group_request_id: 280 kv move time out err, server is busy now. -ERROR 06-24 21:58:22 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:22 [manager.py:349] aborted group_request_id 280 -ERROR 06-24 21:58:22 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:22 [api_http.py:183] await fut -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:22 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:22 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:22 [api_http.py:183] -ERROR 06-24 21:58:22 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:22 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:22 [api_http.py:183] raise e -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:22 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:22 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:22 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:22 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46542 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 288 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 288 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46560 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 296 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 296 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46558 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 304 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 304 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46570 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 312 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 312 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46586 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 320 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 320 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46594 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 328 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 328 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46608 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 336 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 336 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46612 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:23 [manager.py:236] group_request_id: 344 kv move time out err, server is busy now. -ERROR 06-24 21:58:23 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:23 [manager.py:349] aborted group_request_id 344 -ERROR 06-24 21:58:23 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:23 [api_http.py:183] await fut -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:23 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:23 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:23 [api_http.py:183] -ERROR 06-24 21:58:23 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:23 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:23 [api_http.py:183] raise e -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:23 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:23 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:23 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:23 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46620 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 352 kv move time out err, server is busy now. -ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 352 -ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:24 [api_http.py:183] await fut -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:24 [api_http.py:183] raise e -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46622 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 360 kv move time out err, server is busy now. -ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 360 -ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:24 [api_http.py:183] await fut -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:24 [api_http.py:183] raise e -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46636 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 368 kv move time out err, server is busy now. -ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 368 -ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:24 [api_http.py:183] await fut -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:24 [api_http.py:183] raise e -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46638 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 376 kv move time out err, server is busy now. -ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 376 -ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:24 [api_http.py:183] await fut -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:24 [api_http.py:183] raise e -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46652 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 384 kv move time out err, server is busy now. -ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 384 -ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:24 [api_http.py:183] await fut -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:24 [api_http.py:183] raise e -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46660 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 392 kv move time out err, server is busy now. -ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 392 -ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:24 [api_http.py:183] await fut -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:24 [api_http.py:183] raise e -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46670 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:24 [manager.py:236] group_request_id: 400 kv move time out err, server is busy now. -ERROR 06-24 21:58:24 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:24 [manager.py:349] aborted group_request_id 400 -ERROR 06-24 21:58:24 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:24 [api_http.py:183] await fut -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:24 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:24 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:24 [api_http.py:183] -ERROR 06-24 21:58:24 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:24 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:24 [api_http.py:183] raise e -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:24 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:24 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:24 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:24 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46678 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 408 kv move time out err, server is busy now. -ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 408 -ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:25 [api_http.py:183] await fut -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:25 [api_http.py:183] raise e -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46690 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 416 kv move time out err, server is busy now. -ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 416 -ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:25 [api_http.py:183] await fut -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:25 [api_http.py:183] raise e -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46706 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 424 kv move time out err, server is busy now. -ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 424 -ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:25 [api_http.py:183] await fut -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:25 [api_http.py:183] raise e -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46718 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 432 kv move time out err, server is busy now. -ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 432 -ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:25 [api_http.py:183] await fut -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:25 [api_http.py:183] raise e -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46734 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 440 kv move time out err, server is busy now. -ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 440 -ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:25 [api_http.py:183] await fut -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:25 [api_http.py:183] raise e -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46748 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 448 kv move time out err, server is busy now. -ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 448 -ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:25 [api_http.py:183] await fut -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:25 [api_http.py:183] raise e -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46762 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:25 [manager.py:236] group_request_id: 456 kv move time out err, server is busy now. -ERROR 06-24 21:58:25 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:25 [manager.py:349] aborted group_request_id 456 -ERROR 06-24 21:58:25 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:25 [api_http.py:183] await fut -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:25 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:25 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:25 [api_http.py:183] -ERROR 06-24 21:58:25 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:25 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:25 [api_http.py:183] raise e -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:25 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:25 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:25 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:25 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46768 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 464 kv move time out err, server is busy now. -ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 464 -ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:26 [api_http.py:183] await fut -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:26 [api_http.py:183] raise e -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46780 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 472 kv move time out err, server is busy now. -ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 472 -ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:26 [api_http.py:183] await fut -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:26 [api_http.py:183] raise e -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46792 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 480 kv move time out err, server is busy now. -ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 480 -ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:26 [api_http.py:183] await fut -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:26 [api_http.py:183] raise e -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46802 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 488 kv move time out err, server is busy now. -ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 488 -ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:26 [api_http.py:183] await fut -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:26 [api_http.py:183] raise e -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46814 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 496 kv move time out err, server is busy now. -ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 496 -ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:26 [api_http.py:183] await fut -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:26 [api_http.py:183] raise e -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46830 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 504 kv move time out err, server is busy now. -ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 504 -ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:26 [api_http.py:183] await fut -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:26 [api_http.py:183] raise e -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46846 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:26 [manager.py:236] group_request_id: 512 kv move time out err, server is busy now. -ERROR 06-24 21:58:26 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:26 [manager.py:349] aborted group_request_id 512 -ERROR 06-24 21:58:26 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:26 [api_http.py:183] await fut -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:26 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:26 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:26 [api_http.py:183] -ERROR 06-24 21:58:26 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:26 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:26 [api_http.py:183] raise e -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:26 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:26 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:26 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:26 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46856 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:27 [manager.py:236] group_request_id: 520 kv move time out err, server is busy now. -ERROR 06-24 21:58:27 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:27 [manager.py:349] aborted group_request_id 520 -ERROR 06-24 21:58:27 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:27 [api_http.py:183] await fut -ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:27 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:27 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:27 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:27 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:27 [api_http.py:183] raise e -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:27 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:27 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46858 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:27 [manager.py:236] group_request_id: 528 kv move time out err, server is busy now. -ERROR 06-24 21:58:27 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:27 [manager.py:349] aborted group_request_id 528 -ERROR 06-24 21:58:27 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:27 [api_http.py:183] await fut -ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:27 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:27 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:27 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:27 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:27 [api_http.py:183] -ERROR 06-24 21:58:27 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:27 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:27 [api_http.py:183] raise e -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:27 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:27 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:27 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:27 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46862 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:27 [manager.py:236] group_request_id: 536 kv move time out err, server is busy now. -ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 536 -ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:28 [api_http.py:183] await fut -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:28 [api_http.py:183] raise e -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46870 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 544 kv move time out err, server is busy now. -ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 544 -ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:28 [api_http.py:183] await fut -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:28 [api_http.py:183] raise e -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46878 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 552 kv move time out err, server is busy now. -ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 552 -ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:28 [api_http.py:183] await fut -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:28 [api_http.py:183] raise e -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46880 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 560 kv move time out err, server is busy now. -ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 560 -ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:28 [api_http.py:183] await fut -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:28 [api_http.py:183] raise e -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46896 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 568 kv move time out err, server is busy now. -ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 568 -ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:28 [api_http.py:183] await fut -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:28 [api_http.py:183] raise e -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46900 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:28 [manager.py:236] group_request_id: 576 kv move time out err, server is busy now. -ERROR 06-24 21:58:28 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:28 [manager.py:349] aborted group_request_id 576 -ERROR 06-24 21:58:28 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:28 [api_http.py:183] await fut -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:28 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:28 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:28 [api_http.py:183] -ERROR 06-24 21:58:28 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:28 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:28 [api_http.py:183] raise e -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:28 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:28 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:28 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:28 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46916 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 584 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 584 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46918 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 592 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 592 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46926 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 600 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 600 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46928 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 608 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 608 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46932 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 616 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 616 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46938 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 624 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 624 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46948 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 632 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 632 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46956 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 640 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 640 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46962 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 648 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 648 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46974 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 656 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 656 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46976 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 664 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 664 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46986 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 672 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 672 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:46990 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 680 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:29 [manager.py:349] aborted group_request_id 680 -ERROR 06-24 21:58:29 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:29 [api_http.py:183] await fut -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:29 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:29 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:29 [api_http.py:183] -ERROR 06-24 21:58:29 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:29 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:29 [api_http.py:183] raise e -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:29 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:29 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:29 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:29 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47002 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:29 [manager.py:236] group_request_id: 688 kv move time out err, server is busy now. -ERROR 06-24 21:58:29 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:30 [manager.py:349] aborted group_request_id 688 -ERROR 06-24 21:58:30 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:30 [api_http.py:183] await fut -ERROR 06-24 21:58:30 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:30 [api_http.py:183] -ERROR 06-24 21:58:30 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:30 [api_http.py:183] -ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:30 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:30 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:30 [api_http.py:183] -ERROR 06-24 21:58:30 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:30 [api_http.py:183] -ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:30 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:30 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:30 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:30 [api_http.py:183] -ERROR 06-24 21:58:30 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:30 [api_http.py:183] -ERROR 06-24 21:58:30 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:30 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:30 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:30 [api_http.py:183] raise e -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:30 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:30 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:30 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:30 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:30 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47008 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 696 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 696 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47016 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 704 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 704 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47018 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 712 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 712 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47026 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 720 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 720 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47028 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 728 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 728 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47036 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 736 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 736 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47042 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 744 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 744 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47052 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:31 [manager.py:236] group_request_id: 752 kv move time out err, server is busy now. -ERROR 06-24 21:58:31 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:31 [manager.py:349] aborted group_request_id 752 -ERROR 06-24 21:58:31 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:31 [api_http.py:183] await fut -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:31 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:31 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:31 [api_http.py:183] -ERROR 06-24 21:58:31 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:31 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:31 [api_http.py:183] raise e -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:31 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:31 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:31 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:31 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47054 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 760 kv move time out err, server is busy now. -ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 760 -ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:32 [api_http.py:183] await fut -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:32 [api_http.py:183] raise e -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47056 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 768 kv move time out err, server is busy now. -ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 768 -ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:32 [api_http.py:183] await fut -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:32 [api_http.py:183] raise e -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47068 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 776 kv move time out err, server is busy now. -ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 776 -ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:32 [api_http.py:183] await fut -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:32 [api_http.py:183] raise e -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47078 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 784 kv move time out err, server is busy now. -ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 784 -ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:32 [api_http.py:183] await fut -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:32 [api_http.py:183] raise e -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47074 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 792 kv move time out err, server is busy now. -ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 792 -ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:32 [api_http.py:183] await fut -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:32 [api_http.py:183] raise e -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47086 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:58:32 [manager.py:236] group_request_id: 800 kv move time out err, server is busy now. -ERROR 06-24 21:58:32 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:58:32 [manager.py:349] aborted group_request_id 800 -ERROR 06-24 21:58:32 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:58:32 [api_http.py:183] await fut -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] return fut.result() -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:58:32 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:58:32 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:58:32 [api_http.py:183] -ERROR 06-24 21:58:32 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:58:32 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:58:32 [api_http.py:183] raise e -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:58:32 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:58:32 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:58:32 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:58:32 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:47094 - "POST /generate HTTP/1.1" 503 -INFO 06-24 21:58:33 [statics_utils.py:24] mean first cost: 8877.26598013015 ms -INFO 06-24 21:58:33 [statics_utils.py:24] mean per token cost: 0.0 ms -WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 808 kv move time out err, server is busy now. -ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 808 -ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:59:18 [api_http.py:183] await fut -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:59:18 [api_http.py:183] raise e -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:51404 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 816 kv move time out err, server is busy now. -ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 816 -ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:59:18 [api_http.py:183] await fut -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:59:18 [api_http.py:183] raise e -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:51416 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 824 kv move time out err, server is busy now. -ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 824 -ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:59:18 [api_http.py:183] await fut -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:59:18 [api_http.py:183] raise e -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:51424 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 832 kv move time out err, server is busy now. -ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 832 -ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:59:18 [api_http.py:183] await fut -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:59:18 [api_http.py:183] raise e -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:51432 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 21:59:18 [manager.py:236] group_request_id: 840 kv move time out err, server is busy now. -ERROR 06-24 21:59:18 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 21:59:18 [manager.py:349] aborted group_request_id 840 -ERROR 06-24 21:59:18 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 21:59:18 [api_http.py:183] await fut -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] return fut.result() -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 21:59:18 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 21:59:18 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 21:59:18 [api_http.py:183] -ERROR 06-24 21:59:18 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 21:59:18 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 21:59:18 [api_http.py:183] raise e -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 21:59:18 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 21:59:18 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 21:59:18 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 21:59:18 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:51436 - "POST /generate HTTP/1.1" 503 -INFO 06-24 21:59:33 [statics_utils.py:24] mean first cost: 8877.26598013015 ms -INFO 06-24 21:59:33 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 22:00:03 [statics_utils.py:24] mean first cost: 8877.26598013015 ms -INFO 06-24 22:00:03 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 22:00:33 [statics_utils.py:24] mean first cost: 8877.26598013015 ms -INFO 06-24 22:00:33 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 22:01:03 [statics_utils.py:24] mean first cost: 8877.26598013015 ms -INFO 06-24 22:01:03 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 22:01:39 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... - -INFO 06-24 22:01:39 [start_utils.py:106] Killing child process 1412175 -INFO 06-24 22:01:39 [start_utils.py:106] Killing child process 1412182 -INFO 06-24 22:01:39 [start_utils.py:108] Killing parent process 1411519 -INFO 06-24 22:01:39 [start_utils.py:53] Killing parent process 1411353 -INFO 06-24 22:01:39 [start_utils.py:69] All processes terminated gracefully. -INFO 06-24 22:01:39 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_master_4096.log b/pd_master_4096.log deleted file mode 100644 index ea00b53d9..000000000 --- a/pd_master_4096.log +++ /dev/null @@ -1,15896 +0,0 @@ -INFO 06-24 19:53:44 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:53:45 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:53:46 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:53:48 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:53:48 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:53:48 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -[api_server] pd_master_start -[pd_master_start] pd_chunk_size: 4096 -INFO 06-24 19:53:48 [api_start.py:344] use tgi api: False -INFO 06-24 19:53:48 [api_start.py:345] all start args:Namespace(run_mode='pd_master', host='127.0.1.1', port=60011, httpserver_workers=1, zmq_mode='ipc:///tmp/', pd_master_ip='0.0.0.0', pd_master_port=1212, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=None, mem_fraction=0.9, batch_max_tokens=None, eos_id=None, tool_call_parser=None, running_max_req_size=1000, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16384, nccl_host='127.0.0.1', nccl_port=28765, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type=None, return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=None, visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=False, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=0, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=4096, pd_node_id=0) -INFO 06-24 19:53:50 [start_utils.py:37] init func start_metric_manager : init ok -INFO 06-24 19:53:50 [api_start.py:57] start process pid 1210636 -INFO 06-24 19:53:50 [api_start.py:58] http server pid 1211009 -INFO 06-24 19:53:53 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:53:54 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:53:55 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:53:57 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:53:57 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:53:57 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:53:57 [api_http.py:326] server start up -INFO 06-24 19:53:57 [atomic_lock.py:29] link lock shm 60011_pd_master_req_id_gen_lock -INFO 06-24 19:53:57 [api_http.py:330] server start up ok, loop use is -INFO 06-24 19:54:27 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:54:27 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:54:43 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 36276 -INFO 06-24 19:54:43 [api_http.py:268] recieved regist_json {'node_id': 163479035537597727162519172725806046247, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10173, 'detokenization_port': 10076, 'detokenization_pub_port': 10098, 'visual_port': 10080, 'audio_port': 10160, 'cache_port': 10233, 'metric_port': 10089, 'pd_node_infer_rpyc_ports': [10247], 'pd_node_id': 163479035537597727162519172725806046247, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 19:54:43 [manager.py:67] mode: prefill url: 127.0.1.1:8017 registed -INFO 06-24 19:54:46 [api_http.py:291] kv_move_status Client connected from IP: 127.0.0.1, Port: 36288 -INFO 06-24 19:54:53 [api_http.py:266] Client connected from IP: 127.0.0.1, Port: 41776 -INFO 06-24 19:54:53 [api_http.py:268] recieved regist_json {'node_id': 147275795944234129756100418482494441380, 'client_ip_port': '127.0.1.1:8118', 'mode': 'decode', 'start_args': {'run_mode': 'decode', 'host': '127.0.1.1', 'port': 8118, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_12322_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 1000, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16384, 'nccl_host': '127.0.0.1', 'nccl_port': 12322, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 0, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': False, 'graph_max_batch_size': 16, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 2048, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10135, 'detokenization_port': 10143, 'detokenization_pub_port': 10207, 'visual_port': 10165, 'audio_port': 10059, 'cache_port': 10114, 'metric_port': 10051, 'pd_node_infer_rpyc_ports': [10236], 'pd_node_id': 147275795944234129756100418482494441380, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 19:54:53 [manager.py:67] mode: decode url: 127.0.1.1:8118 registed -INFO 06-24 19:54:57 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:54:57 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:55:27 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:55:27 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:55:57 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:55:57 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:56:57 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:56:57 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:57:57 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:57:57 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:58:27 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:58:27 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:58:57 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:58:57 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:59:27 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:59:27 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:16 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:24 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:32 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:40 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:48 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:56 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:64 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:72 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:80 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:88 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:96 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:104 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:112 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:136 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:144 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:152 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:160 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:168 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:184 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:192 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:200 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:208 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:216 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:224 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:232 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:248 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:272 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:288 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:296 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:312 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:328 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:336 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:344 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:352 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:360 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:368 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:376 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:384 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:392 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:408 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:416 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:424 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:432 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:440 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:448 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:464 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:472 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:480 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:488 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:496 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:504 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:512 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:520 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:536 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:544 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:552 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:560 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:568 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:576 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:584 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:592 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:600 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:608 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:616 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:624 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:632 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:640 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:648 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:656 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:672 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:680 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:688 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:696 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:704 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:712 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:720 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:728 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:736 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:744 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:760 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:768 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:776 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:784 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:792 -INFO 06-24 19:59:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:800 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:43 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 first_token_cost:3264.9035453796387ms total_cost_time:3589.8795127868652ms,out_token_counter:2 mean_per_token_cost_time: 162.48798370361328ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40222 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:43 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:808 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:44 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 first_token_cost:4415.666103363037ms total_cost_time:4719.113349914551ms,out_token_counter:2 mean_per_token_cost_time: 151.72362327575684ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40270 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:45 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:816 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:46 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 first_token_cost:5572.38507270813ms total_cost_time:5953.195095062256ms,out_token_counter:2 mean_per_token_cost_time: 190.405011177063ms prompt_token_num:1064 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40344 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:46 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:824 -INFO 06-24 19:59:46 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 first_token_cost:5570.23286819458ms total_cost_time:5978.773593902588ms,out_token_counter:2 mean_per_token_cost_time: 204.2703628540039ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40366 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:46 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 -INFO 06-24 19:59:46 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 first_token_cost:5569.038391113281ms total_cost_time:6003.514051437378ms,out_token_counter:2 mean_per_token_cost_time: 217.23783016204834ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40380 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:46 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:840 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:47 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 first_token_cost:6644.745111465454ms total_cost_time:6978.367805480957ms,out_token_counter:2 mean_per_token_cost_time: 166.81134700775146ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40416 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:47 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:848 -INFO 06-24 19:59:47 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 first_token_cost:6642.567157745361ms total_cost_time:7003.440856933594ms,out_token_counter:2 mean_per_token_cost_time: 180.4368495941162ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40438 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:47 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:856 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:50 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 first_token_cost:9978.71732711792ms total_cost_time:10221.765756607056ms,out_token_counter:2 mean_per_token_cost_time: 121.52421474456787ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40610 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:50 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:864 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:51 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 first_token_cost:11109.405755996704ms total_cost_time:11378.748655319214ms,out_token_counter:2 mean_per_token_cost_time: 134.67144966125488ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40710 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:51 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:53 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 first_token_cost:13133.885145187378ms total_cost_time:13412.688255310059ms,out_token_counter:2 mean_per_token_cost_time: 139.40155506134033ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40866 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:53 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:880 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:55 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 first_token_cost:15290.080785751343ms total_cost_time:15553.339958190918ms,out_token_counter:2 mean_per_token_cost_time: 131.6295862197876ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40976 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:55 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:888 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:57 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 first_token_cost:10523.409843444824ms total_cost_time:10836.05980873108ms,out_token_counter:2 mean_per_token_cost_time: 156.32498264312744ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:48768 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:57 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:896 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 19:59:57 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 first_token_cost:5573.999404907227ms total_cost_time:5863.377809524536ms,out_token_counter:2 mean_per_token_cost_time: 144.68920230865479ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:48836 - "POST /generate HTTP/1.1" 200 -INFO 06-24 19:59:57 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:904 -INFO 06-24 19:59:57 [statics_utils.py:24] mean first cost: 8480.433994105884 ms -INFO 06-24 19:59:57 [statics_utils.py:24] mean per token cost: 161.66261526254507 ms -INFO 06-24 20:00:27 [statics_utils.py:24] mean first cost: 8409.328593616992 ms -INFO 06-24 20:00:27 [statics_utils.py:24] mean per token cost: 161.66261526254507 ms -INFO 06-24 20:00:32 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 first_token_cost:6648.284196853638ms total_cost_time:52575.37841796875ms,out_token_counter:1379 mean_per_token_cost_time: 33.30463685360051ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40402 - "POST /generate HTTP/1.1" 200 -INFO 06-24 20:00:32 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:32 lightllm_req_id:912 -WARNING 06-24 20:00:40 [manager.py:236] group_request_id: 16 kv move time out err, server is busy now. -ERROR 06-24 20:00:40 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:40 [manager.py:349] aborted group_request_id 16 -ERROR 06-24 20:00:40 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:40 [api_http.py:183] await fut -ERROR 06-24 20:00:40 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:40 [api_http.py:183] -ERROR 06-24 20:00:40 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:40 [api_http.py:183] -ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:40 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:40 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:40 [api_http.py:183] -ERROR 06-24 20:00:40 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:40 [api_http.py:183] -ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:40 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:40 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:40 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:40 [api_http.py:183] -ERROR 06-24 20:00:40 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:40 [api_http.py:183] -ERROR 06-24 20:00:40 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:40 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:40 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:40 [api_http.py:183] raise e -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:40 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:40 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:40 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:40 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:40 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40082 - "POST /generate HTTP/1.1" 503 -INFO 06-24 20:00:40 [manager.py:161] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:40 lightllm_req_id:920 -WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 24 kv move time out err, server is busy now. -ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 24 -ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:41 [api_http.py:183] await fut -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:41 [api_http.py:183] raise e -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40102 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 32 kv move time out err, server is busy now. -ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 32 -ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:41 [api_http.py:183] await fut -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:41 [api_http.py:183] raise e -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40118 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 40 kv move time out err, server is busy now. -ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 40 -ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:41 [api_http.py:183] await fut -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:41 [api_http.py:183] raise e -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40124 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 48 kv move time out err, server is busy now. -ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 48 -ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:41 [api_http.py:183] await fut -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:41 [api_http.py:183] raise e -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40136 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 56 kv move time out err, server is busy now. -ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 56 -ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:41 [api_http.py:183] await fut -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:41 [api_http.py:183] raise e -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40148 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 64 kv move time out err, server is busy now. -ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 64 -ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:41 [api_http.py:183] await fut -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:41 [api_http.py:183] raise e -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40156 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:41 [manager.py:236] group_request_id: 72 kv move time out err, server is busy now. -ERROR 06-24 20:00:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:41 [manager.py:349] aborted group_request_id 72 -ERROR 06-24 20:00:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:41 [api_http.py:183] await fut -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:41 [api_http.py:183] -ERROR 06-24 20:00:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:41 [api_http.py:183] raise e -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40160 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 80 kv move time out err, server is busy now. -ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 80 -ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:43 [api_http.py:183] await fut -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:43 [api_http.py:183] raise e -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40162 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 88 kv move time out err, server is busy now. -ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 88 -ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:43 [api_http.py:183] await fut -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:43 [api_http.py:183] raise e -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40178 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 96 kv move time out err, server is busy now. -ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 96 -ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:43 [api_http.py:183] await fut -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:43 [api_http.py:183] raise e -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40194 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 104 kv move time out err, server is busy now. -ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 104 -ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:43 [api_http.py:183] await fut -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:43 [api_http.py:183] raise e -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40204 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 112 kv move time out err, server is busy now. -ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 112 -ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:43 [api_http.py:183] await fut -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:43 [api_http.py:183] raise e -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40214 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:43 [manager.py:236] group_request_id: 136 kv move time out err, server is busy now. -ERROR 06-24 20:00:43 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:43 [manager.py:349] aborted group_request_id 136 -ERROR 06-24 20:00:43 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:43 [api_http.py:183] await fut -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:43 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:43 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:43 [api_http.py:183] -ERROR 06-24 20:00:43 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:43 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:43 [api_http.py:183] raise e -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:43 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:43 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:43 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:43 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40224 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 144 kv move time out err, server is busy now. -ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 144 -ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:44 [api_http.py:183] await fut -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:44 [api_http.py:183] raise e -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40240 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 152 kv move time out err, server is busy now. -ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 152 -ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:44 [api_http.py:183] await fut -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:44 [api_http.py:183] raise e -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40244 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 160 kv move time out err, server is busy now. -ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 160 -ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:44 [api_http.py:183] await fut -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:44 [api_http.py:183] raise e -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40258 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 168 kv move time out err, server is busy now. -ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 168 -ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:44 [api_http.py:183] await fut -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:44 [api_http.py:183] raise e -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40268 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 184 kv move time out err, server is busy now. -ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 184 -ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:44 [api_http.py:183] await fut -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:44 [api_http.py:183] raise e -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40282 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 192 kv move time out err, server is busy now. -ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 192 -ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:44 [api_http.py:183] await fut -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:44 [api_http.py:183] raise e -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40298 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:44 [manager.py:236] group_request_id: 200 kv move time out err, server is busy now. -ERROR 06-24 20:00:44 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:44 [manager.py:349] aborted group_request_id 200 -ERROR 06-24 20:00:44 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:44 [api_http.py:183] await fut -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:44 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:44 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:44 [api_http.py:183] -ERROR 06-24 20:00:44 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:44 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:44 [api_http.py:183] raise e -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:44 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:44 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:44 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:44 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40308 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 208 kv move time out err, server is busy now. -ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 208 -ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:45 [api_http.py:183] await fut -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:45 [api_http.py:183] raise e -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40312 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 216 kv move time out err, server is busy now. -ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 216 -ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:45 [api_http.py:183] await fut -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:45 [api_http.py:183] raise e -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40336 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 224 kv move time out err, server is busy now. -ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 224 -ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:45 [api_http.py:183] await fut -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:45 [api_http.py:183] raise e -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40320 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 232 kv move time out err, server is busy now. -ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 232 -ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:45 [api_http.py:183] await fut -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:45 [api_http.py:183] raise e -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40338 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:45 [manager.py:236] group_request_id: 248 kv move time out err, server is busy now. -ERROR 06-24 20:00:45 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:45 [manager.py:349] aborted group_request_id 248 -ERROR 06-24 20:00:45 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:45 [api_http.py:183] await fut -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:45 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:45 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:45 [api_http.py:183] -ERROR 06-24 20:00:45 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:45 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:45 [api_http.py:183] raise e -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:45 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:45 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:45 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:45 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40360 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 272 kv move time out err, server is busy now. -ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 272 -ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:46 [api_http.py:183] await fut -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:46 [api_http.py:183] raise e -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40390 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 288 kv move time out err, server is busy now. -ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 288 -ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:46 [api_http.py:183] await fut -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:46 [api_http.py:183] raise e -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40404 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 296 kv move time out err, server is busy now. -ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 296 -ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:46 [api_http.py:183] await fut -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:46 [api_http.py:183] raise e -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40410 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:46 [manager.py:236] group_request_id: 312 kv move time out err, server is busy now. -ERROR 06-24 20:00:46 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:46 [manager.py:349] aborted group_request_id 312 -ERROR 06-24 20:00:46 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:46 [api_http.py:183] await fut -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:46 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:46 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:46 [api_http.py:183] -ERROR 06-24 20:00:46 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:46 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:46 [api_http.py:183] raise e -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:46 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:46 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:46 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:46 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40424 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 328 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 328 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40446 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 336 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 336 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40448 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 344 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 344 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40464 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 352 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 352 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40476 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 360 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 360 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40486 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 368 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 368 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40496 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 376 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 376 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40512 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:48 [manager.py:236] group_request_id: 384 kv move time out err, server is busy now. -ERROR 06-24 20:00:48 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:48 [manager.py:349] aborted group_request_id 384 -ERROR 06-24 20:00:48 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:48 [api_http.py:183] await fut -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:48 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:48 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:48 [api_http.py:183] -ERROR 06-24 20:00:48 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:48 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:48 [api_http.py:183] raise e -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:48 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:48 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:48 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:48 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40528 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 392 kv move time out err, server is busy now. -ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 392 -ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:49 [api_http.py:183] await fut -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:49 [api_http.py:183] raise e -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40538 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 408 kv move time out err, server is busy now. -ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 408 -ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:49 [api_http.py:183] await fut -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:49 [api_http.py:183] raise e -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40550 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 416 kv move time out err, server is busy now. -ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 416 -ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:49 [api_http.py:183] await fut -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:49 [api_http.py:183] raise e -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40554 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 424 kv move time out err, server is busy now. -ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 424 -ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:49 [api_http.py:183] await fut -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:49 [api_http.py:183] raise e -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40568 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 432 kv move time out err, server is busy now. -ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 432 -ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:49 [api_http.py:183] await fut -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:49 [api_http.py:183] raise e -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40572 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 440 kv move time out err, server is busy now. -ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 440 -ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:49 [api_http.py:183] await fut -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:49 [api_http.py:183] raise e -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40586 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:49 [manager.py:236] group_request_id: 448 kv move time out err, server is busy now. -ERROR 06-24 20:00:49 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:49 [manager.py:349] aborted group_request_id 448 -ERROR 06-24 20:00:49 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:49 [api_http.py:183] await fut -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:49 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:49 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:49 [api_http.py:183] -ERROR 06-24 20:00:49 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:49 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:49 [api_http.py:183] raise e -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:49 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:49 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:49 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:49 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40598 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 464 kv move time out err, server is busy now. -ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 464 -ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:50 [api_http.py:183] await fut -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:50 [api_http.py:183] raise e -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40620 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 472 kv move time out err, server is busy now. -ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 472 -ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:50 [api_http.py:183] await fut -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:50 [api_http.py:183] raise e -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40632 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 480 kv move time out err, server is busy now. -ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 480 -ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:50 [api_http.py:183] await fut -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:50 [api_http.py:183] raise e -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40648 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 488 kv move time out err, server is busy now. -ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 488 -ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:50 [api_http.py:183] await fut -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:50 [api_http.py:183] raise e -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40664 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 496 kv move time out err, server is busy now. -ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 496 -ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:50 [api_http.py:183] await fut -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:50 [api_http.py:183] raise e -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40680 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:50 [manager.py:236] group_request_id: 504 kv move time out err, server is busy now. -ERROR 06-24 20:00:50 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:50 [manager.py:349] aborted group_request_id 504 -ERROR 06-24 20:00:50 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:50 [api_http.py:183] await fut -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:50 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:50 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:50 [api_http.py:183] -ERROR 06-24 20:00:50 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:50 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:50 [api_http.py:183] raise e -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:50 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:50 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:50 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:50 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40684 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 512 kv move time out err, server is busy now. -ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 512 -ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:51 [api_http.py:183] await fut -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:51 [api_http.py:183] raise e -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40694 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 520 kv move time out err, server is busy now. -ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 520 -ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:51 [api_http.py:183] await fut -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:51 [api_http.py:183] raise e -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40700 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 536 kv move time out err, server is busy now. -ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 536 -ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:51 [api_http.py:183] await fut -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:51 [api_http.py:183] raise e -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40726 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 544 kv move time out err, server is busy now. -ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 544 -ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:51 [api_http.py:183] await fut -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:51 [api_http.py:183] raise e -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40728 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 552 kv move time out err, server is busy now. -ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 552 -ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:51 [api_http.py:183] await fut -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:51 [api_http.py:183] raise e -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40740 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 560 kv move time out err, server is busy now. -ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 560 -ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:51 [api_http.py:183] await fut -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:51 [api_http.py:183] raise e -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40746 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:51 [manager.py:236] group_request_id: 568 kv move time out err, server is busy now. -ERROR 06-24 20:00:51 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:51 [manager.py:349] aborted group_request_id 568 -ERROR 06-24 20:00:51 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:51 [api_http.py:183] await fut -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:51 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:51 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:51 [api_http.py:183] -ERROR 06-24 20:00:51 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:51 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:51 [api_http.py:183] raise e -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:51 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:51 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:51 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:51 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40758 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 576 kv move time out err, server is busy now. -ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 576 -ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:52 [api_http.py:183] await fut -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:52 [api_http.py:183] raise e -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40762 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 584 kv move time out err, server is busy now. -ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 584 -ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:52 [api_http.py:183] await fut -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:52 [api_http.py:183] raise e -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40776 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 592 kv move time out err, server is busy now. -ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 592 -ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:52 [api_http.py:183] await fut -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:52 [api_http.py:183] raise e -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40790 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 600 kv move time out err, server is busy now. -ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 600 -ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:52 [api_http.py:183] await fut -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:52 [api_http.py:183] raise e -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40804 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 608 kv move time out err, server is busy now. -ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 608 -ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:52 [api_http.py:183] await fut -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:52 [api_http.py:183] raise e -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40816 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 616 kv move time out err, server is busy now. -ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 616 -ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:52 [api_http.py:183] await fut -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:52 [api_http.py:183] raise e -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40832 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:52 [manager.py:236] group_request_id: 624 kv move time out err, server is busy now. -ERROR 06-24 20:00:52 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:52 [manager.py:349] aborted group_request_id 624 -ERROR 06-24 20:00:52 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:52 [api_http.py:183] await fut -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:52 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:52 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:52 [api_http.py:183] -ERROR 06-24 20:00:52 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:52 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:52 [api_http.py:183] raise e -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:52 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:52 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:52 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:52 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40834 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 632 kv move time out err, server is busy now. -ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 632 -ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:53 [api_http.py:183] await fut -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:53 [api_http.py:183] raise e -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40846 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 640 kv move time out err, server is busy now. -ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 640 -ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:53 [api_http.py:183] await fut -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:53 [api_http.py:183] raise e -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40850 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 648 kv move time out err, server is busy now. -ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 648 -ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:53 [api_http.py:183] await fut -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:53 [api_http.py:183] raise e -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40858 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 656 kv move time out err, server is busy now. -ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 656 -ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:53 [api_http.py:183] await fut -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:53 [api_http.py:183] raise e -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40862 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 672 kv move time out err, server is busy now. -ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 672 -ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:53 [api_http.py:183] await fut -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:53 [api_http.py:183] raise e -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40878 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:53 [manager.py:236] group_request_id: 680 kv move time out err, server is busy now. -ERROR 06-24 20:00:53 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:53 [manager.py:349] aborted group_request_id 680 -ERROR 06-24 20:00:53 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:53 [api_http.py:183] await fut -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:53 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:53 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:53 [api_http.py:183] -ERROR 06-24 20:00:53 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:53 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:53 [api_http.py:183] raise e -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:53 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:53 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:53 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:53 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40894 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 688 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 688 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40900 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 696 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 696 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40910 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 704 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 704 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40924 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 712 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 712 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40932 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 720 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 720 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40938 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 728 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 728 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40952 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 736 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 736 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40964 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:54 [manager.py:236] group_request_id: 744 kv move time out err, server is busy now. -ERROR 06-24 20:00:54 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:54 [manager.py:349] aborted group_request_id 744 -ERROR 06-24 20:00:54 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:54 [api_http.py:183] await fut -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:54 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:54 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:54 [api_http.py:183] -ERROR 06-24 20:00:54 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:54 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:54 [api_http.py:183] raise e -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:54 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:54 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:54 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:54 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40968 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:55 [manager.py:236] group_request_id: 760 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 760 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40980 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 768 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 768 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40992 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 776 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 776 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:40998 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 784 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 784 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:41014 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 792 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 792 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:41030 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 800 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 800 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:41040 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 808 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 808 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:48728 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 816 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 816 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:48740 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 824 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 824 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:48752 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 840 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 840 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:48784 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 848 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 848 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:48796 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 856 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 856 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:48804 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:56 [manager.py:236] group_request_id: 864 kv move time out err, server is busy now. -ERROR 06-24 20:00:56 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:56 [manager.py:349] aborted group_request_id 864 -ERROR 06-24 20:00:56 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:56 [api_http.py:183] await fut -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:56 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:56 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:56 [api_http.py:183] -ERROR 06-24 20:00:56 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:56 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:56 [api_http.py:183] raise e -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:56 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:56 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:56 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:56 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:48820 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:57 [manager.py:236] group_request_id: 880 kv move time out err, server is busy now. -ERROR 06-24 20:00:57 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:57 [manager.py:349] aborted group_request_id 880 -ERROR 06-24 20:00:57 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:57 [api_http.py:183] await fut -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:57 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:57 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:57 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:57 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:57 [api_http.py:183] raise e -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:57 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:57 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:59482 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:57 [manager.py:236] group_request_id: 888 kv move time out err, server is busy now. -ERROR 06-24 20:00:57 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:57 [manager.py:349] aborted group_request_id 888 -ERROR 06-24 20:00:57 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:57 [api_http.py:183] await fut -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:57 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:57 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:57 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:57 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:57 [api_http.py:183] raise e -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:57 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:57 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:59494 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:00:57 [manager.py:236] group_request_id: 896 kv move time out err, server is busy now. -ERROR 06-24 20:00:57 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:57 [manager.py:349] aborted group_request_id 896 -ERROR 06-24 20:00:57 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:57 [api_http.py:183] await fut -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:57 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:57 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:57 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:57 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:57 [api_http.py:183] -ERROR 06-24 20:00:57 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:57 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:57 [api_http.py:183] raise e -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:57 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:57 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:57 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:57 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:59496 - "POST /generate HTTP/1.1" 503 -INFO 06-24 20:00:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:00:58 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms -WARNING 06-24 20:00:58 [manager.py:236] group_request_id: 904 kv move time out err, server is busy now. -ERROR 06-24 20:00:58 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:00:58 [manager.py:349] aborted group_request_id 904 -ERROR 06-24 20:00:58 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:00:58 [api_http.py:183] await fut -ERROR 06-24 20:00:58 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:58 [api_http.py:183] -ERROR 06-24 20:00:58 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:58 [api_http.py:183] -ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:00:58 [api_http.py:183] return fut.result() -ERROR 06-24 20:00:58 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:00:58 [api_http.py:183] -ERROR 06-24 20:00:58 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:00:58 [api_http.py:183] -ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:00:58 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:00:58 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:00:58 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:00:58 [api_http.py:183] -ERROR 06-24 20:00:58 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:00:58 [api_http.py:183] -ERROR 06-24 20:00:58 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:00:58 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:00:58 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:00:58 [api_http.py:183] raise e -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:00:58 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:00:58 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:00:58 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:00:58 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:00:58 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:59504 - "POST /generate HTTP/1.1" 503 -INFO 06-24 20:01:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:01:28 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms -WARNING 06-24 20:01:33 [manager.py:236] group_request_id: 912 kv move time out err, server is busy now. -ERROR 06-24 20:01:33 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:01:33 [manager.py:349] aborted group_request_id 912 -ERROR 06-24 20:01:33 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:01:33 [api_http.py:183] await fut -ERROR 06-24 20:01:33 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:01:33 [api_http.py:183] -ERROR 06-24 20:01:33 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:01:33 [api_http.py:183] -ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:01:33 [api_http.py:183] return fut.result() -ERROR 06-24 20:01:33 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:01:33 [api_http.py:183] -ERROR 06-24 20:01:33 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:01:33 [api_http.py:183] -ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:01:33 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:01:33 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:01:33 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:01:33 [api_http.py:183] -ERROR 06-24 20:01:33 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:01:33 [api_http.py:183] -ERROR 06-24 20:01:33 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:01:33 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:01:33 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:01:33 [api_http.py:183] raise e -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:01:33 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:01:33 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:01:33 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:01:33 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:01:33 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:39294 - "POST /generate HTTP/1.1" 503 -WARNING 06-24 20:01:41 [manager.py:236] group_request_id: 920 kv move time out err, server is busy now. -ERROR 06-24 20:01:41 [manager.py:149] has exception Server is busy, please try again later (Status code: 503) -WARNING 06-24 20:01:41 [manager.py:349] aborted group_request_id 920 -ERROR 06-24 20:01:41 [api_http.py:183] Server is busy, please try again later (Status code: 503) -ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/locks.py", line 226, in wait -ERROR 06-24 20:01:41 [api_http.py:183] await fut -ERROR 06-24 20:01:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:01:41 [api_http.py:183] -ERROR 06-24 20:01:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:01:41 [api_http.py:183] -ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 490, in wait_for -ERROR 06-24 20:01:41 [api_http.py:183] return fut.result() -ERROR 06-24 20:01:41 [api_http.py:183] asyncio.exceptions.CancelledError -ERROR 06-24 20:01:41 [api_http.py:183] -ERROR 06-24 20:01:41 [api_http.py:183] The above exception was the direct cause of the following exception: -ERROR 06-24 20:01:41 [api_http.py:183] -ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 234, in fetch_stream -ERROR 06-24 20:01:41 [api_http.py:183] await asyncio.wait_for(up_status_event.wait(), timeout=60) -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/asyncio/tasks.py", line 492, in wait_for -ERROR 06-24 20:01:41 [api_http.py:183] raise exceptions.TimeoutError() from exc -ERROR 06-24 20:01:41 [api_http.py:183] asyncio.exceptions.TimeoutError -ERROR 06-24 20:01:41 [api_http.py:183] -ERROR 06-24 20:01:41 [api_http.py:183] During handling of the above exception, another exception occurred: -ERROR 06-24 20:01:41 [api_http.py:183] -ERROR 06-24 20:01:41 [api_http.py:183] Traceback (most recent call last): -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_http.py", line 181, in generate -ERROR 06-24 20:01:41 [api_http.py:183] return await g_objs.g_generate_func(request, g_objs.httpserver_manager) -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/api_lightllm.py", line 55, in lightllm_generate -ERROR 06-24 20:01:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 151, in generate -ERROR 06-24 20:01:41 [api_http.py:183] raise e -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 145, in generate -ERROR 06-24 20:01:41 [api_http.py:183] async for sub_req_id, request_output, metadata, finish_status in results_generator: -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 302, in _wait_to_token_package -ERROR 06-24 20:01:41 [api_http.py:183] async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( -ERROR 06-24 20:01:41 [api_http.py:183] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver_for_pd_master/manager.py", line 237, in fetch_stream -ERROR 06-24 20:01:41 [api_http.py:183] raise ServerBusyError() -ERROR 06-24 20:01:41 [api_http.py:183] lightllm.utils.error_utils.ServerBusyError: Server is busy, please try again later (Status code: 503) -127.0.0.1:39304 - "POST /generate HTTP/1.1" 503 -INFO 06-24 20:01:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:01:58 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 189, pd_chunk_size: 4096 -INFO 06-24 20:02:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:02:28 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 99, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 98, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 97, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 96, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 95, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 94, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 93, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 92, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 91, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 90, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 89, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 88, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 87, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 86, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 85, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 84, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 83, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 82, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 81, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 80, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 79, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 78, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 77, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 76, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 75, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 454, pd_chunk_size: 4096 -INFO 06-24 20:02:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:02:58 [statics_utils.py:24] mean per token cost: 152.49418823333477 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 74, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 73, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 72, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 71, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 70, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 69, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 68, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 67, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 66, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 65, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 64, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 63, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 62, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 61, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 60, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 59, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 58, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 57, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 56, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 55, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 54, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 53, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 52, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 51, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 50, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 49, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 48, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 47, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 46, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 45, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 44, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 43, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 42, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 41, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 40, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 39, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 38, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 37, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 36, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 35, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 34, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 33, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 32, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 31, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 30, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 29, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 28, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 27, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 26, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 25, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 24, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 23, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 22, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 21, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 20, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 19, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 18, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 17, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 16, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 15, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 13, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 12, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 11, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10272, pd_chunk_size: 4096 -INFO 06-24 20:03:14 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 first_token_cost:3266.073703765869ms total_cost_time:214615.10252952576ms,out_token_counter:4288 mean_per_token_cost_time: 49.28848620003729ms prompt_token_num:5345 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40218 - "POST /generate HTTP/1.1" 200 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 301, pd_chunk_size: 4096 -INFO 06-24 20:03:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:03:28 [statics_utils.py:24] mean per token cost: 145.6138080977816 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 136, pd_chunk_size: 4096 -INFO 06-24 20:03:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:03:58 [statics_utils.py:24] mean per token cost: 145.6138080977816 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 99, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 98, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 97, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 96, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 95, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 94, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 93, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 92, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 91, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 90, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 89, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 88, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 87, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 86, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 85, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 84, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 83, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 82, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 81, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 80, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 79, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 78, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 77, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 76, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 75, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 74, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 73, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 72, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 71, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 70, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 69, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 68, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 67, pd_chunk_size: 4096 -INFO 06-24 20:04:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:04:28 [statics_utils.py:24] mean per token cost: 145.6138080977816 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 66, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 65, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 64, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 63, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 62, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 61, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 60, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 59, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 58, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 57, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 56, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 55, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 54, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 53, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 52, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 51, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 50, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 49, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 48, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 47, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 46, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 45, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 44, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 43, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 42, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 41, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 40, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 39, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 38, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 37, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 36, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 35, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 34, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 33, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 32, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 31, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 30, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 29, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 28, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 27, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 26, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 25, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 24, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 23, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 22, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 21, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 20, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 19, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 18, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 17, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 16, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 15, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 13, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 12, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 11, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 20:04:55 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 first_token_cost:8981.547832489014ms total_cost_time:315349.3595123291ms,out_token_counter:4631 mean_per_token_cost_time: 66.15586518675018ms prompt_token_num:5679 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40544 - "POST /generate HTTP/1.1" 200 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9886, pd_chunk_size: 4096 -INFO 06-24 20:04:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:04:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9621, pd_chunk_size: 4096 -INFO 06-24 20:05:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:05:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9486, pd_chunk_size: 4096 -INFO 06-24 20:06:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:06:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9216, pd_chunk_size: 4096 -INFO 06-24 20:07:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:07:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9071, pd_chunk_size: 4096 -INFO 06-24 20:07:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:07:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8925, pd_chunk_size: 4096 -INFO 06-24 20:08:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:08:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8793, pd_chunk_size: 4096 -INFO 06-24 20:08:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:08:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8660, pd_chunk_size: 4096 -INFO 06-24 20:09:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:09:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8526, pd_chunk_size: 4096 -INFO 06-24 20:09:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:09:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8394, pd_chunk_size: 4096 -INFO 06-24 20:10:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:10:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8131, pd_chunk_size: 4096 -INFO 06-24 20:11:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:11:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7872, pd_chunk_size: 4096 -INFO 06-24 20:12:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:12:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7740, pd_chunk_size: 4096 -INFO 06-24 20:12:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:12:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7607, pd_chunk_size: 4096 -INFO 06-24 20:13:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:13:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7346, pd_chunk_size: 4096 -INFO 06-24 20:14:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:14:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7220, pd_chunk_size: 4096 -INFO 06-24 20:14:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:14:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7090, pd_chunk_size: 4096 -INFO 06-24 20:15:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:15:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6959, pd_chunk_size: 4096 -INFO 06-24 20:15:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:15:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6702, pd_chunk_size: 4096 -INFO 06-24 20:16:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:16:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6572, pd_chunk_size: 4096 -INFO 06-24 20:17:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:17:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6314, pd_chunk_size: 4096 -INFO 06-24 20:18:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:18:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6187, pd_chunk_size: 4096 -INFO 06-24 20:18:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:18:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5931, pd_chunk_size: 4096 -INFO 06-24 20:19:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:19:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5805, pd_chunk_size: 4096 -INFO 06-24 20:20:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:20:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5676, pd_chunk_size: 4096 -INFO 06-24 20:20:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:20:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5548, pd_chunk_size: 4096 -INFO 06-24 20:21:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:21:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5419, pd_chunk_size: 4096 -INFO 06-24 20:21:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:21:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5162, pd_chunk_size: 4096 -INFO 06-24 20:22:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:22:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5033, pd_chunk_size: 4096 -INFO 06-24 20:23:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:23:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4905, pd_chunk_size: 4096 -INFO 06-24 20:23:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:23:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4777, pd_chunk_size: 4096 -INFO 06-24 20:24:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:24:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4651, pd_chunk_size: 4096 -INFO 06-24 20:24:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:24:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4398, pd_chunk_size: 4096 -INFO 06-24 20:25:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:25:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4148, pd_chunk_size: 4096 -INFO 06-24 20:26:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:26:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4022, pd_chunk_size: 4096 -INFO 06-24 20:27:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:27:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3896, pd_chunk_size: 4096 -INFO 06-24 20:27:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:27:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3771, pd_chunk_size: 4096 -INFO 06-24 20:28:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:28:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3642, pd_chunk_size: 4096 -INFO 06-24 20:28:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:28:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3522, pd_chunk_size: 4096 -INFO 06-24 20:29:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:29:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3282, pd_chunk_size: 4096 -INFO 06-24 20:30:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:30:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3037, pd_chunk_size: 4096 -INFO 06-24 20:31:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:31:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2915, pd_chunk_size: 4096 -INFO 06-24 20:31:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:31:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2794, pd_chunk_size: 4096 -INFO 06-24 20:32:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:32:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2673, pd_chunk_size: 4096 -INFO 06-24 20:32:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:32:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2430, pd_chunk_size: 4096 -INFO 06-24 20:33:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:33:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2310, pd_chunk_size: 4096 -INFO 06-24 20:34:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:34:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2190, pd_chunk_size: 4096 -INFO 06-24 20:34:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:34:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2069, pd_chunk_size: 4096 -INFO 06-24 20:35:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:35:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1988, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1828, pd_chunk_size: 4096 -INFO 06-24 20:36:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:36:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1751, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1632, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1587, pd_chunk_size: 4096 -INFO 06-24 20:37:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:37:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1511, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1467, pd_chunk_size: 4096 -INFO 06-24 20:37:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:37:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1393, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1345, pd_chunk_size: 4096 -INFO 06-24 20:38:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:38:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1274, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1227, pd_chunk_size: 4096 -INFO 06-24 20:38:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:38:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1160, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1108, pd_chunk_size: 4096 -INFO 06-24 20:39:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:39:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1099, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1098, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1097, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1096, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1095, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1094, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1093, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1092, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1091, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1090, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1089, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1088, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1087, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1086, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1085, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1084, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1083, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1082, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1081, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1080, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1079, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1078, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1077, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1076, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1075, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1074, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1073, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1072, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1071, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1070, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1069, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1068, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1067, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1066, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1065, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1064, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1063, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1062, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1061, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1060, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1059, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1058, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1057, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1056, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1055, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1054, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1053, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1052, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1051, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1050, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1049, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1048, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1047, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1046, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1045, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1044, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1043, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1042, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1041, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1040, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1039, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1038, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1037, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1036, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1035, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1034, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1033, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1032, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1031, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1030, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1029, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1028, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1027, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1026, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1025, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1024, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1023, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1022, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1021, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1020, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1019, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1018, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1017, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1016, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1015, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1014, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1013, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1012, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1011, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1010, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1009, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1008, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1007, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1006, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1005, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1004, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1003, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1002, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1001, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1000, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 999, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 998, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 997, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 996, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 995, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 994, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 993, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 992, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 991, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 990, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 989, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 988, pd_chunk_size: 4096 -INFO 06-24 20:39:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:39:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 987, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 986, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 985, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 984, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 983, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 982, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 981, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 980, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 979, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 978, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 977, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 976, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 975, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 974, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 973, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 972, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 971, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 970, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 969, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 968, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 967, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 966, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 965, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 964, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 963, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 962, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 961, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 960, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 959, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 958, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 957, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 956, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 955, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 954, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 953, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 952, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 951, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 950, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 949, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 948, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 947, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 946, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 945, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 944, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 943, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 942, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 941, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 940, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 939, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 938, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 937, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 936, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 935, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 934, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 933, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 932, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 931, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 930, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 929, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 928, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 927, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 926, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 925, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 924, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 923, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 922, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 921, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 920, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 919, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 918, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 917, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 916, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 915, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 914, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 913, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 912, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 911, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 910, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 909, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 908, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 907, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 906, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 905, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 904, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 903, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 902, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 901, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 900, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 899, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 898, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 897, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 896, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 895, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 894, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 893, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 892, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 891, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 890, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 889, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 888, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 887, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 886, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 885, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 884, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 883, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 882, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 881, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 880, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 879, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 878, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 877, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 876, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 875, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 874, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 873, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 872, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 871, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 870, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 869, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 868, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 867, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 866, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 865, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 864, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 863, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 862, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 861, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 860, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 859, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 858, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 857, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 856, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 855, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 854, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 853, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 852, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 851, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 850, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 849, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 848, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 847, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 846, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 845, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 844, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 843, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 842, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 841, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 840, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 839, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 838, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 837, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 836, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 835, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 834, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 833, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 832, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 831, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 830, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 829, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 828, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 827, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 826, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 825, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 824, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 823, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 822, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 821, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 820, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 819, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 818, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 817, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 816, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 815, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 814, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 813, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 812, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 811, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 810, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 809, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 808, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 807, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 806, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 805, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 804, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 803, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 802, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 801, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 800, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 799, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 798, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 797, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 796, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 795, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 794, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 793, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 792, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 791, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 790, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 789, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 788, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 787, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 786, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 785, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 784, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 783, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 782, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 781, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 780, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 779, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 778, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 777, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 776, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 775, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 774, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 773, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 772, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 771, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 770, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 769, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 768, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 767, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 766, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 765, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 764, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 763, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 762, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 761, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 760, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 759, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 758, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 757, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 756, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 755, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 754, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 753, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 752, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 751, pd_chunk_size: 4096 -INFO 06-24 20:40:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:40:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 750, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 749, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 748, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 747, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 746, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 745, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 744, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 743, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 742, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 741, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 740, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 739, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 738, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 737, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 736, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 735, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 734, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 733, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 732, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 731, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 730, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 729, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 728, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 727, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 726, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 725, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 724, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 723, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 722, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 721, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 720, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 719, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 718, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 717, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 716, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 715, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 714, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 713, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 712, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 711, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 710, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 709, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 708, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 707, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 706, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 705, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 704, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 703, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 702, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 701, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 700, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 699, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 698, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 697, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 696, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 695, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 694, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 693, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 692, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 691, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 690, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 689, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 688, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 687, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 686, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 685, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 684, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 683, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 682, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 681, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 680, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 679, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 678, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 677, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 676, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 675, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 674, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 673, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 672, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 671, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 670, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 669, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 668, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 667, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 666, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 665, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 664, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 663, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 662, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 661, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 660, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 659, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 658, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 657, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 656, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 655, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 654, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 653, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 652, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 651, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 650, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 649, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 648, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 647, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 646, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 645, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 644, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 643, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 642, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 641, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 640, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 639, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 638, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 637, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 636, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 635, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 634, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 633, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 632, pd_chunk_size: 4096 -INFO 06-24 20:41:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:41:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 631, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 630, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 629, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 628, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 627, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 626, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 625, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 624, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 623, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 622, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 621, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 620, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 619, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 618, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 617, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 616, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 615, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 614, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 613, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 612, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 611, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 610, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 609, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 608, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 607, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 606, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 605, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 604, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 603, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 602, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 601, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 600, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 599, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 598, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 597, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 596, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 595, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 594, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 593, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 592, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 591, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 590, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 589, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 588, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 587, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 586, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 585, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 584, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 583, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 582, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 581, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 580, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 579, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 578, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 577, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 576, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 575, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 574, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 573, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 572, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 571, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 570, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 569, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 568, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 567, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 566, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 565, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 564, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 563, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 562, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 561, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 560, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 559, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 558, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 557, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 556, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 555, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 554, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 553, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 552, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 551, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 550, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 549, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 548, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 547, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 546, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 545, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 544, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 543, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 542, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 541, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 540, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 539, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 538, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 537, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 536, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 535, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 534, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 533, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 532, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 531, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 530, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 529, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 528, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 527, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 526, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 525, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 524, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 523, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 522, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 521, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 520, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 519, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 518, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 517, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 516, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 515, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 514, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 513, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 512, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 511, pd_chunk_size: 4096 -INFO 06-24 20:41:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:41:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 510, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 509, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 508, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 507, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 506, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 505, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 504, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 503, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 502, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 501, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 500, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 499, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 498, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 497, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 496, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 495, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 494, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 493, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 492, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 491, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 490, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 489, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 488, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 487, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 486, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 485, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 484, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 483, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 482, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 481, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 480, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 479, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 478, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 477, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 476, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 475, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 474, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 473, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 472, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 471, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 470, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 469, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 468, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 467, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 466, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 465, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 464, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 463, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 462, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 461, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 460, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 459, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 458, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 457, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 456, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 455, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 454, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 453, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 452, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 451, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 450, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 449, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 448, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 447, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 446, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 445, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 444, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 443, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 442, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 441, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 440, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 439, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 438, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 437, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 436, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 435, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 434, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 433, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 432, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 431, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 430, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 429, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 428, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 427, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 426, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 425, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 424, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 423, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 422, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 421, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 420, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 419, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 418, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 417, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 416, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 415, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 414, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 413, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 412, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 411, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 410, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 409, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 408, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 407, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 406, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 405, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 404, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 403, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 402, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 401, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 400, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 399, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 398, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 397, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 396, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 395, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 394, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 393, pd_chunk_size: 4096 -INFO 06-24 20:42:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:42:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 392, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 391, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 390, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 389, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 388, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 387, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 386, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 385, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 384, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 383, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 382, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 381, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 380, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 379, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 378, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 377, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 376, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 375, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 374, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 373, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 372, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 371, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 370, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 369, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 368, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 367, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 366, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 365, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 364, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 363, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 362, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 361, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 360, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 359, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 358, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 357, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 356, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 355, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 354, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 353, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 352, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 351, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 350, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 349, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 348, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 347, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 346, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 345, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 344, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 343, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 342, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 341, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 340, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 339, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 338, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 337, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 336, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 335, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 334, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 333, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 332, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 331, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 330, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 329, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 328, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 327, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 326, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 325, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 324, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 323, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 322, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 321, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 320, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 319, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 318, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 317, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 316, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 315, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 314, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 313, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 312, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 311, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 310, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 309, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 308, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 307, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 306, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 305, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 304, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 303, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 302, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 301, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 300, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 299, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 298, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 297, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 296, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 295, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 294, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 293, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 292, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 291, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 290, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 289, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 288, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 287, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 286, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 285, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 284, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 283, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 282, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 281, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 280, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 279, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 278, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 277, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 276, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 275, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 274, pd_chunk_size: 4096 -INFO 06-24 20:42:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:42:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 273, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 272, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 271, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 270, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 269, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 268, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 267, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 266, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 265, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 264, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 263, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 262, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 261, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 260, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 259, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 258, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 257, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 256, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 255, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 254, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 253, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 252, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 251, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 250, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 249, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 248, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 247, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 246, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 245, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 244, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 243, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 242, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 241, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 240, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 239, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 238, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 237, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 236, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 235, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 234, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 233, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 232, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 231, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 230, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 229, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 228, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 227, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 226, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 225, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 224, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 223, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 222, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 221, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 220, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 219, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 218, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 217, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 216, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 215, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 214, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 213, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 212, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 211, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 210, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 209, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 208, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 207, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 206, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 205, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 204, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 203, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 202, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 201, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 200, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 199, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 198, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 197, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 196, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 195, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 194, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 193, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 192, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 191, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 190, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 189, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 188, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 187, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 186, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 185, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 184, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 183, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 182, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 181, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 180, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 179, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 178, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 177, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 176, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 175, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 174, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 173, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 172, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 171, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 170, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 169, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 168, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 167, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 166, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 165, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 164, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 163, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 162, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 161, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 160, pd_chunk_size: 4096 -INFO 06-24 20:43:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:43:28 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 159, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 158, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 157, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 156, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 155, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 154, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 153, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 152, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 151, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 150, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 149, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 148, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 147, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 146, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 145, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 144, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 143, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 142, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 141, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 140, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 139, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 138, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 137, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 136, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 135, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 134, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 133, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 132, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 131, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 130, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 129, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 128, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 127, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 126, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 125, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 124, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 123, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 122, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 121, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 120, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 119, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 118, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 117, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 116, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 115, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 114, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 113, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 112, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 111, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 110, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 109, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 108, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 107, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 106, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 105, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 104, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 103, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 102, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 101, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 100, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 99, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 98, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 97, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 96, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 95, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 94, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 93, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 92, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 91, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 90, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 89, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 88, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 87, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 86, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 85, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 84, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 83, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 82, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 81, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 80, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 79, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 78, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 77, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 76, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 75, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 74, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 73, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 72, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 71, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 70, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 69, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 68, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 67, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 66, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 65, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 64, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 63, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 62, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 61, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 60, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 59, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 58, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 57, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 56, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 55, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 54, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 53, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 52, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 51, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 50, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 49, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 48, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 47, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 46, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 45, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 44, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 43, pd_chunk_size: 4096 -INFO 06-24 20:43:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:43:58 [statics_utils.py:24] mean per token cost: 140.64768666584214 ms -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 42, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 41, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 40, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 39, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 38, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 37, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 36, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 35, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 34, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 33, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 32, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 31, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 30, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 29, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 28, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 27, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 26, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 25, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 24, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 23, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 22, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 21, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 20, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 19, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 18, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 17, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 16, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 15, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 14, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 13, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 12, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 11, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 10, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 9, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 8, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 7, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 6, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 5, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 4, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 3, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 2, pd_chunk_size: 4096 -[fetch_stream] wait_to_ready, chunk_finished: False, remaining_tokens: 1, pd_chunk_size: 4096 -INFO 06-24 20:44:08 [manager.py:329] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 first_token_cost:825.5589008331299ms total_cost_time:2668792.4263477325ms,out_token_counter:14561 mean_per_token_cost_time: 183.22689838932075ms prompt_token_num:15615 prompt_cache_len:0 prompt_cache_ratio:0.0 -127.0.0.1:40092 - "POST /generate HTTP/1.1" 200 -INFO 06-24 20:44:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:44:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:44:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:44:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:45:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:45:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:45:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:45:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:46:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:46:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:46:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:46:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:47:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:47:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:47:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:47:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:48:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:48:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:49:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:49:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:49:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:49:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:50:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:50:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:51:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:51:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:51:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:51:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:52:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:52:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:52:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:52:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:53:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:53:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:54:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:54:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:54:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:54:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:55:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:55:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:56:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:56:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:56:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:56:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:57:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:57:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:57:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:57:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:58:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:58:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:58:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:58:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:59:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:59:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 20:59:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 20:59:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:00:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:00:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:01:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:01:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:01:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:01:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:02:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:02:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:02:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:02:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:03:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:03:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:03:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:03:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:04:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:04:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:04:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:04:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:05:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:05:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:05:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:05:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:06:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:06:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:06:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:06:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:07:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:07:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:07:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:07:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:08:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:08:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:08:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:08:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:09:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:09:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:09:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:09:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:10:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:10:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:10:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:10:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:11:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:11:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:11:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:11:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:12:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:12:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:12:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:12:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:13:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:13:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:13:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:13:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:14:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:14:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:14:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:14:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:15:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:15:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:16:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:16:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:17:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:17:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:18:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:18:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:19:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:19:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:19:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:19:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:20:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:20:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:21:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:21:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:21:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:21:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:22:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:22:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:23:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:23:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:24:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:24:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:24:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:24:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:25:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:25:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:26:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:26:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:26:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:26:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:27:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:27:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:28:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:28:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:28:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:28:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:29:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:29:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:30:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:30:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:31:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:31:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:32:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:32:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:32:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:32:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:33:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:33:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:34:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:34:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:34:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:34:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:35:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:35:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:35:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:35:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:36:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:36:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:36:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:36:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:37:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:37:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:37:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:37:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:38:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:38:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:39:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:39:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:40:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:40:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:40:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:40:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:41:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:41:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:41:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:41:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:42:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:42:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:43:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:43:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:44:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:44:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:45:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:45:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:45:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:45:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:46:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:46:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:47:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:47:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:48:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:48:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:48:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:48:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:49:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:49:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:50:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:50:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:50:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:50:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:51:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:51:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:52:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:52:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:52:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:52:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:53:28 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:53:28 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:53:58 [statics_utils.py:24] mean first cost: 8266.630672371906 ms -INFO 06-24 21:53:58 [statics_utils.py:24] mean per token cost: 143.15234617898795 ms -INFO 06-24 21:54:19 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... - -INFO 06-24 21:54:19 [start_utils.py:106] Killing child process 1211478 -INFO 06-24 21:54:19 [start_utils.py:106] Killing child process 1211483 -INFO 06-24 21:54:19 [start_utils.py:108] Killing parent process 1211009 -INFO 06-24 21:54:19 [start_utils.py:53] Killing parent process 1210907 -INFO 06-24 21:54:19 [start_utils.py:69] All processes terminated gracefully. -INFO 06-24 21:54:19 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_p.log b/pd_p.log deleted file mode 100644 index da5287651..000000000 --- a/pd_p.log +++ /dev/null @@ -1,1512 +0,0 @@ -INFO 06-24 21:55:24 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:55:24 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:55:25 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:55:27 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:55:27 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:55:27 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:55:27 [api_start.py:79] zmq mode head: ipc:///tmp/_2732_0_ -INFO 06-24 21:55:27 [api_start.py:81] use tgi api: False -INFO 06-24 21:55:27 [api_start.py:192] alloced ports: [10216, 10253, 10154, 10095, 10138, 10064, 10217, 10074, 10133] -INFO 06-24 21:55:27 [api_start.py:233] all start args:Namespace(run_mode='prefill', host='127.0.1.1', port=8017, httpserver_workers=1, zmq_mode='ipc:///tmp/_2732_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=128, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16000, nccl_host='127.0.0.1', nccl_port=2732, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=True, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=16000, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10216, detokenization_port=10253, detokenization_pub_port=10154, visual_port=10095, audio_port=10138, cache_port=10064, metric_port=10217, pd_node_infer_rpyc_ports=[10133], pd_node_id=287595743282619216970276961428881885738, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) -INFO 06-24 21:55:29 [start_utils.py:37] init func start_metric_manager : init ok -INFO 06-24 21:55:31 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:55:32 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:55:32 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:55:33 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:55:33 [__init__.py:239] Automatically detected platform cuda. -INFO 06-24 21:55:34 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:55:35 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:55:35 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:55:35 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:55:35 [shm_req_manager.py:59] create lock shm 2732_0_req_shm_total -INFO 06-24 21:55:35 [atomic_array_lock.py:29] create lock shm 2732_0_array_reqs_lock -INFO 06-24 21:55:35 [atomic_lock.py:26] create lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 21:55:35 [shared_arr.py:17] create shm 2732_0_mem_manger_can_use_token_num_0 -INFO 06-24 21:55:35 [shared_arr.py:17] create shm 2732_0_shared_token_load -INFO 06-24 21:55:35 [shared_arr.py:17] create shm 2732_0_shared_token_load_ext_infos -INFO 06-24 21:55:36 [model_rpc.py:70] Initialized RPC server for rank 0. -INFO 06-24 21:55:36 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total -INFO 06-24 21:55:36 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock -INFO 06-24 21:55:36 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 21:55:36 [model_rpc.py:184] use ChunckedPrefillForPrefillNode -WARNING 06-24 21:55:36 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:55:36 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:55:36 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:55:36 [manager.py:41] pub_to_httpserver sendhwm 1000 -INFO 06-24 21:55:36 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total -INFO 06-24 21:55:36 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock -INFO 06-24 21:55:36 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 21:55:38 [shared_arr.py:20] link shm 2732_0_shared_token_load -INFO 06-24 21:55:38 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos -INFO 06-24 21:55:38 [shared_arr.py:17] create shm 2732_0_dp_rank_0_lock_tp_infos -INFO 06-24 21:55:38 [basemodel.py:134] Initial quantization. The default quantization method is none -INFO 06-24 21:55:38 [mem_utils.py:11] mode setting params: [] -INFO 06-24 21:55:38 [mem_utils.py:25] Model kv cache using mode normal -INFO 06-24 21:55:38 [shared_arr.py:20] link shm 2732_0_mem_manger_can_use_token_num_0 -INFO 06-24 21:56:04 [basemodel.py:652] begin check max_len infer -INFO 06-24 21:56:05 [basemodel.py:680] check max_len 8448 infer ok -INFO 06-24 21:56:05 [shared_arr.py:17] create shm 2732_0_refed_tokens_num_0 -INFO 06-24 21:56:05 [shared_arr.py:17] create shm 2732_0_tree_total_tokens_num_0 -INFO 06-24 21:56:05 [base_backend.py:135] loaded model class -INFO 06-24 21:56:05 [prefill_impl.py:36] lock_nccl_group ranks 0 -INFO 06-24 21:56:05 [shared_arr.py:20] link shm 2732_0_refed_tokens_num_0 -INFO 06-24 21:56:05 [shared_arr.py:20] link shm 2732_0_tree_total_tokens_num_0 -INFO 06-24 21:56:05 [manager.py:196] use req queue QueueForPDChunkedPrefill -INFO 06-24 21:56:08 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:09 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:10 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:56:13 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:13 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:13 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:13 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 21:56:13 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 21:56:13 [prefill_kv_move_manager.py:55] rpyc connect to infer rpyc port: 10133 ok -INFO 06-24 21:56:13 [net_utils.py:51] get hostname ip 127.0.1.1 -INFO 06-24 21:56:13 [prefill_trans_process.py:154] prefill trans kv process for device: 0 started! -INFO 06-24 21:56:15 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:16 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:18 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:56:20 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:20 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:20 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:20 [prefill_infer_rpyc.py:51] put mem manager to mem_queue ok -INFO 06-24 21:56:20 [prefill_kv_move_manager.py:246] prefill kv move manager process started -INFO 06-24 21:56:20 [start_utils.py:37] init func start_router_process : init ok -INFO 06-24 21:56:20 [start_utils.py:37] init func start_detokenization_process : init ok -INFO 06-24 21:56:20 [api_start.py:57] start process pid 1411123 -INFO 06-24 21:56:20 [api_start.py:58] http server pid 1414655 -INFO 06-24 21:56:24 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 21:56:24 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 21:56:25 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 21:56:27 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 21:56:27 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 21:56:27 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 21:56:27 [api_http.py:326] server start up -INFO 06-24 21:56:27 [atomic_array_lock.py:32] link lock shm 2732_0_lightllm_resource_lock -INFO 06-24 21:56:27 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total -INFO 06-24 21:56:27 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock -INFO 06-24 21:56:27 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 21:56:28 [atomic_lock.py:29] link lock shm 2732_0_req_id_gen_lock -INFO 06-24 21:56:28 [shared_arr.py:20] link shm 2732_0_latest_success_infer_time_mark -INFO 06-24 21:56:28 [shared_arr.py:20] link shm 2732_0_shared_token_load -INFO 06-24 21:56:28 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos -INFO 06-24 21:56:28 [api_http.py:330] server start up ok, loop use is -INFO 06-24 21:56:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:56:28 [pd_loop.py:92] Sent registration JSON: {'node_id': 287595743282619216970276961428881885738, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10216, 'detokenization_port': 10253, 'detokenization_pub_port': 10154, 'visual_port': 10095, 'audio_port': 10138, 'cache_port': 10064, 'metric_port': 10217, 'pd_node_infer_rpyc_ports': [10133], 'pd_node_id': 287595743282619216970276961428881885738, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 21:56:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:56:58 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 21:56:58 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:8 -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:16 -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:24 -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:32 -INFO 06-24 21:57:15 [manager.py:224] router recive req id 8 cost time 0.07434654235839844 s -INFO 06-24 21:57:15 [manager.py:224] router recive req id 16 cost time 0.025234222412109375 s -INFO 06-24 21:57:15 [manager.py:224] router recive req id 24 cost time 0.014957904815673828 s -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:40 -INFO 06-24 21:57:15 [manager.py:224] router recive req id 32 cost time 0.009561538696289062 s -INFO 06-24 21:57:15 [manager.py:224] router recive req id 40 cost time 0.010047435760498047 s -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:48 -INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 8 cost time 0.10156798362731934 s -INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 16 cost time 0.052925825119018555 s -INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 24 cost time 0.043247222900390625 s -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:56 -INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 32 cost time 0.03420853614807129 s -INFO 06-24 21:57:15 [manager.py:224] router recive req id 48 cost time 0.0063211917877197266 s -INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 40 cost time 0.025928497314453125 s -INFO 06-24 21:57:15 [manager.py:68] detokenization recv req id 48 cost time 0.00784921646118164 s -INFO 06-24 21:57:15 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:64 -DEBUG 06-24 21:57:15 [manager.py:391] Prefill Batch: batch_id=183192395226738958363005591887549515689, time:1750773435.9941852s req_ids:[8, 16, 24, 32, 40, 48] -DEBUG 06-24 21:57:15 [manager.py:391] -DEBUG 06-24 21:57:15 [stats.py:37] Avg tokens(prompt+generate) throughput: 63.452 tokens/s -DEBUG 06-24 21:57:15 [stats.py:37] Avg prompt tokens throughput: 63.452 tokens/s -DEBUG 06-24 21:57:15 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:72 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:80 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:88 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:96 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:104 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:112 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:120 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:128 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:136 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:144 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:152 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:160 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:168 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:176 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:184 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:192 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:200 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:208 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:216 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:224 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:232 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:240 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:248 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:256 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:264 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:272 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:280 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:288 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:296 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:304 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:312 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:320 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:328 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:336 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:344 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:352 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:360 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:368 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:376 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:384 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:392 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:400 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:408 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:416 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:424 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:432 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:440 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:448 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:456 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:464 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:472 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:480 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:488 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:496 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:504 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:512 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:520 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:528 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:536 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:544 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:552 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:560 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:568 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:576 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:584 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:592 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:600 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:608 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:616 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:624 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:632 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:640 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:648 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:656 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:664 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:672 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:680 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:688 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:696 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:704 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:712 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:720 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:728 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:736 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:744 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:752 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:760 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:768 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:776 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:784 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:792 -INFO 06-24 21:57:16 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:800 -INFO 06-24 21:57:17 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:17 [manager.py:162] detoken release req id 8 -INFO 06-24 21:57:17 [manager.py:162] detoken release req id 16 -INFO 06-24 21:57:17 [manager.py:162] detoken release req id 24 -INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:8 first_token_cost:2066.8702125549316ms total_cost_time:2066.9257640838623ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:17 [manager.py:162] detoken release req id 32 -INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:16 first_token_cost:2017.4884796142578ms total_cost_time:2017.5154209136963ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:24 first_token_cost:2007.4079036712646ms total_cost_time:2007.4307918548584ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:17 [manager.py:162] detoken release req id 40 -INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:32 first_token_cost:1997.9596138000488ms total_cost_time:1997.981071472168ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:17 [manager.py:162] detoken release req id 48 -INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:40 first_token_cost:1989.2513751983643ms total_cost_time:1989.2728328704834ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:17 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:48 first_token_cost:1971.0087776184082ms total_cost_time:1971.02952003479ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:18 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 21:57:18 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:18 [infer_batch.py:156] radix refed token num 6345 -DEBUG 06-24 21:57:18 [infer_batch.py:156] radix hold token num 6345 -DEBUG 06-24 21:57:18 [infer_batch.py:156] mem manager can alloc token num 10047 -DEBUG 06-24 21:57:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:18 [batch.py:51] router release req id 8 -INFO 06-24 21:57:18 [batch.py:51] router release req id 16 -INFO 06-24 21:57:18 [batch.py:51] router release req id 24 -INFO 06-24 21:57:18 [batch.py:51] router release req id 32 -INFO 06-24 21:57:18 [batch.py:51] router release req id 40 -INFO 06-24 21:57:18 [batch.py:51] router release req id 48 -INFO 06-24 21:57:18 [manager.py:224] router recive req id 56 cost time 2.0529732704162598 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 64 cost time 2.0450782775878906 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 56 cost time 2.054810047149658 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 72 cost time 2.036593198776245 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 80 cost time 2.0283164978027344 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 88 cost time 1.9868121147155762 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 64 cost time 2.047435998916626 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 96 cost time 1.981055736541748 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 104 cost time 1.9764375686645508 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 112 cost time 1.9717299938201904 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 72 cost time 2.039720058441162 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 120 cost time 1.9665844440460205 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 128 cost time 1.9616596698760986 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 80 cost time 2.032367706298828 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 136 cost time 1.953277349472046 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 144 cost time 1.9455409049987793 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 152 cost time 1.9381029605865479 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 88 cost time 1.9917616844177246 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 160 cost time 1.9307143688201904 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 168 cost time 1.9222908020019531 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 96 cost time 1.9869613647460938 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 176 cost time 1.9146487712860107 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 184 cost time 1.9070994853973389 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 192 cost time 1.8989145755767822 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 104 cost time 1.983229160308838 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 200 cost time 1.8905799388885498 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 112 cost time 1.9795911312103271 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 208 cost time 1.883430004119873 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 216 cost time 1.8658819198608398 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 224 cost time 1.8562607765197754 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 232 cost time 1.8481740951538086 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 120 cost time 1.976130723953247 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 240 cost time 1.8409795761108398 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 248 cost time 1.8338754177093506 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 256 cost time 1.8289406299591064 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 264 cost time 1.8243746757507324 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 272 cost time 1.8194279670715332 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 128 cost time 1.9731316566467285 s -INFO 06-24 21:57:18 [manager.py:224] router recive req id 280 cost time 1.814791202545166 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 136 cost time 1.9656925201416016 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 144 cost time 1.9588980674743652 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 152 cost time 1.9523286819458008 s -DEBUG 06-24 21:57:18 [manager.py:391] Prefill Batch: batch_id=183435466452217083455255300120862165865, time:1750773438.0651114s req_ids:[56, 64, 72, 80, 88, 96, 104] -DEBUG 06-24 21:57:18 [manager.py:391] -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 160 cost time 1.978437900543213 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 168 cost time 1.9713377952575684 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 176 cost time 1.964714765548706 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 184 cost time 1.958141565322876 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 192 cost time 1.951369047164917 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 200 cost time 1.943962574005127 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 208 cost time 1.9369051456451416 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 216 cost time 1.9202401638031006 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 224 cost time 1.9116225242614746 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 232 cost time 1.904437780380249 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 240 cost time 1.8981175422668457 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 248 cost time 1.8918724060058594 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 256 cost time 1.8877604007720947 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 264 cost time 1.8840248584747314 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 272 cost time 1.8799102306365967 s -INFO 06-24 21:57:18 [manager.py:68] detokenization recv req id 280 cost time 1.8761711120605469 s -INFO 06-24 21:57:18 [rpyc_fix_utils.py:36] change socket buffer from 2626560 131072 change to 4194304 -INFO 06-24 21:57:18 [prefill_trans_process.py:61] connect start PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=-1, prefill_id=287595743282619216970276961428881885738, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') -INFO 06-24 21:57:18 [prefill_trans_process.py:64] connect src_id 287595743282619216970276961428881885738 dest_id 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:18 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 -INFO 06-24 21:57:18 [pynccl.py:180] LightLLM is using nccl==2.21.5 -INFO 06-24 21:57:19 [manager.py:224] router recive req id 288 cost time 2.843327045440674 s -INFO 06-24 21:57:19 [manager.py:162] detoken release req id 56 -INFO 06-24 21:57:19 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:19 [manager.py:162] detoken release req id 64 -INFO 06-24 21:57:19 [manager.py:162] detoken release req id 72 -INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:56 first_token_cost:3107.999324798584ms total_cost_time:3108.043432235718ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:15 lightllm_req_id:64 first_token_cost:3099.2417335510254ms total_cost_time:3099.2672443389893ms,out_token_counter:1 mean_per_token_cost_time: 0.025510787963867188ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:19 [manager.py:162] detoken release req id 80 -INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:72 first_token_cost:3090.4550552368164ms total_cost_time:3090.477228164673ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:19 [manager.py:162] detoken release req id 88 -INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:80 first_token_cost:3082.355737686157ms total_cost_time:3082.376480102539ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:88 first_token_cost:3040.7509803771973ms total_cost_time:3040.7721996307373ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:19 [manager.py:162] detoken release req id 96 -INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:96 first_token_cost:3034.8153114318848ms total_cost_time:3034.8355770111084ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:19 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:104 first_token_cost:3030.2133560180664ms total_cost_time:3030.233383178711ms,out_token_counter:1 mean_per_token_cost_time: 0.02002716064453125ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:19 [manager.py:162] detoken release req id 104 -INFO 06-24 21:57:19 [prefill_trans_process.py:81] PDTransJoinInfo(decode_id=148730891575017957868136796871489876076, decode_device_id=-1, prefill_id=287595743282619216970276961428881885738, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc') kv trans connected! -INFO 06-24 21:57:19 [prefill_trans_obj.py:104] create KVTransConnectObj success: connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc decode_node_id: 148730891575017957868136796871489876076 prefill_node_id: 287595743282619216970276961428881885738 device_index: 0 -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 288 cost time 2.852398157119751 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 8 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.1529920101165771 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 16 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.1361424922943115 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 24 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.1195614337921143 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 32 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.10410737991333 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 40 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.090341329574585 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 48 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 1.0786843299865723 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 56 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008833646774291992 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 64 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0019044876098632812 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 8 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.013906478881835938 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 8 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 24 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 32 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 40 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 48 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 56 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 64 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009841203689575195 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 80 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0033121109008789062 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc to start kv movequeue time 1.1567583084106445 s -INFO 06-24 21:57:19 [prefill_trans_process.py:34] trans start: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007048368453979492 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 72 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 80 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 88 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0039234161376953125 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 88 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.004856109619140625 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 88 not send, decode is busy -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 96 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0009615421295166016 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 96 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.004712343215942383 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 96 not send, decode is busy -INFO 06-24 21:57:19 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 8 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 24 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 32 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 40 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 48 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 56 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 64 -DEBUG 06-24 21:57:19 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:19 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:19 [infer_batch.py:156] radix refed token num 6333 -DEBUG 06-24 21:57:19 [infer_batch.py:156] radix hold token num 13738 -DEBUG 06-24 21:57:19 [infer_batch.py:156] mem manager can alloc token num 2654 -DEBUG 06-24 21:57:19 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 72 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 80 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 88 -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 96 -INFO 06-24 21:57:19 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 104 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00585484504699707 s -INFO 06-24 21:57:19 [batch.py:51] router release req id 56 -INFO 06-24 21:57:19 [batch.py:51] router release req id 64 -INFO 06-24 21:57:19 [batch.py:51] router release req id 72 -INFO 06-24 21:57:19 [batch.py:51] router release req id 80 -INFO 06-24 21:57:19 [batch.py:51] router release req id 88 -INFO 06-24 21:57:19 [batch.py:51] router release req id 96 -INFO 06-24 21:57:19 [batch.py:51] router release req id 104 -INFO 06-24 21:57:19 [manager.py:224] router recive req id 296 cost time 2.904078483581543 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 304 cost time 2.9004077911376953 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 296 cost time 2.9058501720428467 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 312 cost time 2.89624285697937 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 104 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.006551504135131836 s -INFO 06-24 21:57:19 [prefill_trans_obj.py:166] prefill node kv move task req_id: 104 not send, decode is busy -INFO 06-24 21:57:19 [manager.py:224] router recive req id 320 cost time 2.8917062282562256 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 328 cost time 2.8868372440338135 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 304 cost time 2.9025967121124268 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 336 cost time 2.882467031478882 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 344 cost time 2.8421084880828857 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 352 cost time 2.8355062007904053 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 312 cost time 2.899076223373413 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 360 cost time 2.830348253250122 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 368 cost time 2.8254940509796143 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 376 cost time 2.820925235748291 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 320 cost time 2.8958187103271484 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 384 cost time 2.8162660598754883 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 392 cost time 2.8115522861480713 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 400 cost time 2.806875705718994 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 328 cost time 2.891998052597046 s -INFO 06-24 21:57:19 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 104 -INFO 06-24 21:57:19 [manager.py:224] router recive req id 408 cost time 2.8029000759124756 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 336 cost time 2.888749361038208 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 416 cost time 2.798424005508423 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 424 cost time 2.7936484813690186 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 432 cost time 2.7890119552612305 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 344 cost time 2.849390983581543 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 440 cost time 2.7844791412353516 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 448 cost time 2.7797770500183105 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 352 cost time 2.843766689300537 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 456 cost time 2.7765212059020996 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 360 cost time 2.8396759033203125 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 464 cost time 2.7719948291778564 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 472 cost time 2.767430067062378 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 480 cost time 2.7624762058258057 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 368 cost time 2.8358266353607178 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 488 cost time 2.757695198059082 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 496 cost time 2.75347638130188 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 504 cost time 2.7491462230682373 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 376 cost time 2.832227945327759 s -INFO 06-24 21:57:19 [manager.py:224] router recive req id 512 cost time 2.744821310043335 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 384 cost time 2.8284521102905273 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 392 cost time 2.824519157409668 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 400 cost time 2.820510149002075 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 408 cost time 2.816723346710205 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 416 cost time 2.812837600708008 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 424 cost time 2.808816432952881 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 432 cost time 2.804975748062134 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 440 cost time 2.8012473583221436 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 448 cost time 2.797304391860962 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 456 cost time 2.7937395572662354 s -DEBUG 06-24 21:57:19 [manager.py:391] Prefill Batch: batch_id=92541181842414176735446229458234978367, time:1750773439.1856368s req_ids:[112, 120, 128, 136, 144, 152, 160, 168] -DEBUG 06-24 21:57:19 [manager.py:391] -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 464 cost time 2.7900025844573975 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 472 cost time 2.814335584640503 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 480 cost time 2.811356544494629 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 488 cost time 2.8079729080200195 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 496 cost time 2.804640293121338 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 504 cost time 2.801222324371338 s -INFO 06-24 21:57:19 [manager.py:68] detokenization recv req id 512 cost time 2.7986178398132324 s -INFO 06-24 21:57:19 [prefill_trans_process.py:42] trans finished: id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc move len: 1055 -INFO 06-24 21:57:20 [manager.py:224] router recive req id 520 cost time 4.18721604347229 s -INFO 06-24 21:57:20 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 112 -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 120 -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 128 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:112 first_token_cost:4553.349018096924ms total_cost_time:4553.433895111084ms,out_token_counter:1 mean_per_token_cost_time: 0.08487701416015625ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 136 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:120 first_token_cost:4548.264026641846ms total_cost_time:4548.290967941284ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 144 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:128 first_token_cost:4543.472051620483ms total_cost_time:4543.494939804077ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 152 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:136 first_token_cost:4535.346984863281ms total_cost_time:4535.3686809539795ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 160 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:144 first_token_cost:4527.52161026001ms total_cost_time:4527.5421142578125ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:152 first_token_cost:4519.891738891602ms total_cost_time:4519.912958145142ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:162] detoken release req id 168 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:160 first_token_cost:4512.29453086853ms total_cost_time:4512.31575012207ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:168 first_token_cost:4503.759384155273ms total_cost_time:4503.7806034088135ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 520 cost time 4.196249008178711 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0020177364349365234 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008722305297851562 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 112 not send, decode is busy -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 120 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0048978328704833984 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 120 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00770878791809082 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 120 not send, decode is busy -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007648468017578125 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 136 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0018413066864013672 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012303352355957031 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 128 not send, decode is busy -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 136 not send, decode is busy -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009046792984008789 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 152 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0028464794158935547 s -INFO 06-24 21:57:20 [prefill_trans_process.py:44] trans cost time: 1.5370166301727295,move_total_kv_len: 1055, id: 16 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc -INFO 06-24 21:57:20 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 16 cost total time: 2.695488214492798 s -INFO 06-24 21:57:20 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 112 -INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.01164865493774414 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 144 not send, decode is busy -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 152 not send, decode is busy -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 160 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009674549102783203 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 168 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00382232666015625 s -DEBUG 06-24 21:57:20 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:20 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:20 [infer_batch.py:156] radix refed token num 8408 -DEBUG 06-24 21:57:20 [infer_batch.py:156] radix hold token num 15808 -DEBUG 06-24 21:57:20 [infer_batch.py:156] mem manager can alloc token num 584 -DEBUG 06-24 21:57:20 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:20 [batch.py:51] router release req id 112 -INFO 06-24 21:57:20 [batch.py:51] router release req id 120 -INFO 06-24 21:57:20 [batch.py:51] router release req id 128 -INFO 06-24 21:57:20 [batch.py:51] router release req id 136 -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 120 -INFO 06-24 21:57:20 [batch.py:51] router release req id 144 -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 128 -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 136 -INFO 06-24 21:57:20 [batch.py:51] router release req id 152 -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 16 -INFO 06-24 21:57:20 [batch.py:51] router release req id 160 -INFO 06-24 21:57:20 [batch.py:51] router release req id 168 -INFO 06-24 21:57:20 [manager.py:224] router recive req id 528 cost time 4.2441792488098145 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 536 cost time 4.239896059036255 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 544 cost time 4.23553466796875 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 528 cost time 4.245772361755371 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 552 cost time 4.231782913208008 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 560 cost time 4.22771143913269 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 568 cost time 4.223820924758911 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 536 cost time 4.2426581382751465 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 576 cost time 4.219863414764404 s -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 144 -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 544 cost time 4.23926568031311 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 584 cost time 4.21586275100708 s -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 152 -INFO 06-24 21:57:20 [manager.py:224] router recive req id 592 cost time 4.212351322174072 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 552 cost time 4.235877513885498 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 600 cost time 4.207972526550293 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 608 cost time 4.203743934631348 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 560 cost time 4.23270320892334 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 616 cost time 4.199443817138672 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 624 cost time 4.19502067565918 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 632 cost time 4.190509080886841 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 640 cost time 4.186006546020508 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 568 cost time 4.22925877571106 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 648 cost time 4.181662082672119 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 656 cost time 4.177393674850464 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 664 cost time 4.172720193862915 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 576 cost time 4.2260565757751465 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 672 cost time 4.168277740478516 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 680 cost time 4.16365385055542 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 688 cost time 4.159157991409302 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 696 cost time 4.15469765663147 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 704 cost time 4.150240898132324 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 712 cost time 4.1457295417785645 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 584 cost time 4.223072528839111 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 720 cost time 4.14218282699585 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 728 cost time 4.13811993598938 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 736 cost time 4.133428573608398 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 592 cost time 4.220565557479858 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 744 cost time 4.128777742385864 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 752 cost time 4.12431788444519 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 760 cost time 4.119714975357056 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 768 cost time 4.114940881729126 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 776 cost time 4.110274791717529 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 784 cost time 4.105071544647217 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 600 cost time 4.217778921127319 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 792 cost time 4.100350618362427 s -INFO 06-24 21:57:20 [manager.py:224] router recive req id 800 cost time 4.095543622970581 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 608 cost time 4.214792966842651 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 616 cost time 4.211430549621582 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 624 cost time 4.208020210266113 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 160 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.02581334114074707 s -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 160 not send, decode is busy -INFO 06-24 21:57:20 [prefill_trans_obj.py:166] prefill node kv move task req_id: 168 not send, decode is busy -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 632 cost time 4.204531669616699 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 640 cost time 4.2012012004852295 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 648 cost time 4.197704315185547 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 656 cost time 4.194422245025635 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 664 cost time 4.190845489501953 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 672 cost time 4.187482833862305 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 680 cost time 4.1839280128479 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 688 cost time 4.18047571182251 s -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 160 -DEBUG 06-24 21:57:20 [manager.py:391] Prefill Batch: batch_id=201455354752215698501524051058319870647, time:1750773440.7145548s req_ids:[176, 184, 192, 200, 208, 216, 224] -DEBUG 06-24 21:57:20 [manager.py:391] -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 696 cost time 4.177005767822266 s -INFO 06-24 21:57:20 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 168 -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 704 cost time 4.173741340637207 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 712 cost time 4.170109510421753 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 720 cost time 4.16648006439209 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 728 cost time 4.163381576538086 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 736 cost time 4.159599781036377 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 744 cost time 4.155829429626465 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 752 cost time 4.152259588241577 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 760 cost time 4.14850926399231 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 768 cost time 4.144503593444824 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 776 cost time 4.1405723094940186 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 784 cost time 4.136146783828735 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 792 cost time 4.132099390029907 s -INFO 06-24 21:57:20 [manager.py:68] detokenization recv req id 800 cost time 4.127938747406006 s -INFO 06-24 21:57:21 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:21 [manager.py:162] detoken release req id 176 -INFO 06-24 21:57:21 [manager.py:162] detoken release req id 184 -INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:176 first_token_cost:5522.639989852905ms total_cost_time:5522.682666778564ms,out_token_counter:1 mean_per_token_cost_time: 0.04267692565917969ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:184 first_token_cost:5515.013217926025ms total_cost_time:5515.039682388306ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:21 [manager.py:162] detoken release req id 192 -INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:192 first_token_cost:5506.82806968689ms total_cost_time:5506.8519115448ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:21 [manager.py:162] detoken release req id 200 -INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:200 first_token_cost:5498.515605926514ms total_cost_time:5498.550653457642ms,out_token_counter:1 mean_per_token_cost_time: 0.03504753112792969ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:208 first_token_cost:5490.27419090271ms total_cost_time:5490.3223514556885ms,out_token_counter:1 mean_per_token_cost_time: 0.048160552978515625ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:21 [manager.py:162] detoken release req id 208 -INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:216 first_token_cost:5472.749710083008ms total_cost_time:5472.784519195557ms,out_token_counter:1 mean_per_token_cost_time: 0.034809112548828125ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:21 [manager.py:162] detoken release req id 216 -INFO 06-24 21:57:21 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:224 first_token_cost:5463.237047195435ms total_cost_time:5463.2627964019775ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:21 [manager.py:162] detoken release req id 224 -INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 176 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008868217468261719 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 184 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.002635955810546875 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 176 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012252092361450195 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 176 not send, decode is busy -INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 184 not send, decode is busy -INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 192 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009368896484375 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 200 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003036975860595703 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 192 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009622812271118164 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 192 not send, decode is busy -INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 200 not send, decode is busy -INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005233287811279297 s -INFO 06-24 21:57:21 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007521152496337891 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 208 not send, decode is busy -INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 176 -INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 216 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.006842136383056641 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 224 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.000782012939453125 s -INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 184 -DEBUG 06-24 21:57:21 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:21 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:21 [infer_batch.py:156] radix refed token num 5290 -DEBUG 06-24 21:57:21 [infer_batch.py:156] radix hold token num 15809 -DEBUG 06-24 21:57:21 [infer_batch.py:156] mem manager can alloc token num 583 -DEBUG 06-24 21:57:21 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 192 -INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 200 -INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 208 -INFO 06-24 21:57:21 [batch.py:51] router release req id 176 -INFO 06-24 21:57:21 [batch.py:51] router release req id 184 -INFO 06-24 21:57:21 [batch.py:51] router release req id 192 -INFO 06-24 21:57:21 [batch.py:51] router release req id 200 -INFO 06-24 21:57:21 [batch.py:51] router release req id 208 -INFO 06-24 21:57:21 [batch.py:51] router release req id 216 -INFO 06-24 21:57:21 [batch.py:51] router release req id 224 -INFO 06-24 21:57:21 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 216 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010659933090209961 s -INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 216 not send, decode is busy -INFO 06-24 21:57:21 [prefill_trans_obj.py:166] prefill node kv move task req_id: 224 not send, decode is busy -INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 216 -INFO 06-24 21:57:21 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 224 -DEBUG 06-24 21:57:21 [manager.py:391] Prefill Batch: batch_id=128383818688016428732547559065532079804, time:1750773441.7364323s req_ids:[232, 240, 248, 256, 264, 272, 280] -DEBUG 06-24 21:57:21 [manager.py:391] -INFO 06-24 21:57:22 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:22 [manager.py:162] detoken release req id 232 -INFO 06-24 21:57:22 [manager.py:162] detoken release req id 240 -INFO 06-24 21:57:22 [manager.py:162] detoken release req id 248 -INFO 06-24 21:57:22 [manager.py:162] detoken release req id 256 -INFO 06-24 21:57:22 [manager.py:162] detoken release req id 264 -INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:232 first_token_cost:6473.076820373535ms total_cost_time:6473.118305206299ms,out_token_counter:1 mean_per_token_cost_time: 0.041484832763671875ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:240 first_token_cost:6465.898275375366ms total_cost_time:6465.925931930542ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:22 [manager.py:162] detoken release req id 272 -INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:248 first_token_cost:6458.858489990234ms total_cost_time:6458.880662918091ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:256 first_token_cost:6453.812837600708ms total_cost_time:6453.834533691406ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:22 [manager.py:162] detoken release req id 280 -INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:264 first_token_cost:6449.409961700439ms total_cost_time:6449.431657791138ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:272 first_token_cost:6444.46873664856ms total_cost_time:6444.489240646362ms,out_token_counter:1 mean_per_token_cost_time: 0.020503997802734375ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:22 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:280 first_token_cost:6439.7313594818115ms total_cost_time:6439.752101898193ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 232 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.001619100570678711 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 232 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007566690444946289 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 232 not send, decode is busy -INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 240 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004763364791870117 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 240 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007576942443847656 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 240 not send, decode is busy -INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 248 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0076656341552734375 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 256 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.002526998519897461 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 248 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007674455642700195 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 248 not send, decode is busy -INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 256 not send, decode is busy -INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 264 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00623011589050293 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 272 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0016736984252929688 s -INFO 06-24 21:57:22 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 232 -DEBUG 06-24 21:57:22 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:22 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:22 [infer_batch.py:156] radix refed token num 6334 -DEBUG 06-24 21:57:22 [infer_batch.py:156] radix hold token num 15843 -DEBUG 06-24 21:57:22 [infer_batch.py:156] mem manager can alloc token num 549 -DEBUG 06-24 21:57:22 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 240 -INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 248 -INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 256 -INFO 06-24 21:57:22 [batch.py:51] router release req id 232 -INFO 06-24 21:57:22 [batch.py:51] router release req id 240 -INFO 06-24 21:57:22 [batch.py:51] router release req id 248 -INFO 06-24 21:57:22 [batch.py:51] router release req id 256 -INFO 06-24 21:57:22 [batch.py:51] router release req id 264 -INFO 06-24 21:57:22 [batch.py:51] router release req id 272 -INFO 06-24 21:57:22 [batch.py:51] router release req id 280 -INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 264 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009365320205688477 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 264 not send, decode is busy -INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 272 not send, decode is busy -INFO 06-24 21:57:22 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 280 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0067102909088134766 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 280 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.005312204360961914 s -INFO 06-24 21:57:22 [prefill_trans_obj.py:166] prefill node kv move task req_id: 280 not send, decode is busy -INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 264 -INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 272 -INFO 06-24 21:57:22 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 280 -DEBUG 06-24 21:57:22 [manager.py:391] Prefill Batch: batch_id=105466661178272767147654849707186787718, time:1750773442.742199s req_ids:[288, 296, 304, 312, 320, 328, 336, 344] -DEBUG 06-24 21:57:22 [manager.py:391] -INFO 06-24 21:57:23 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 288 -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 296 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:288 first_token_cost:7568.840265274048ms total_cost_time:7568.888187408447ms,out_token_counter:1 mean_per_token_cost_time: 0.04792213439941406ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 304 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:296 first_token_cost:7564.2688274383545ms total_cost_time:7564.295768737793ms,out_token_counter:1 mean_per_token_cost_time: 0.026941299438476562ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 312 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:304 first_token_cost:7559.7474575042725ms total_cost_time:7559.769868850708ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 320 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:312 first_token_cost:7555.351972579956ms total_cost_time:7555.373668670654ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:320 first_token_cost:7550.553798675537ms total_cost_time:7550.575494766235ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:328 first_token_cost:7545.566082000732ms total_cost_time:7545.602083206177ms,out_token_counter:1 mean_per_token_cost_time: 0.03600120544433594ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:336 first_token_cost:7541.104316711426ms total_cost_time:7541.126489639282ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:344 first_token_cost:7500.508069992065ms total_cost_time:7500.529766082764ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 328 -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 336 -INFO 06-24 21:57:23 [manager.py:162] detoken release req id 344 -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 288 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0034797191619873047 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 288 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010988235473632812 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 288 not send, decode is busy -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 296 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009880542755126953 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 304 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005002260208129883 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 296 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009933948516845703 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 296 not send, decode is busy -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 304 not send, decode is busy -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010309934616088867 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 320 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0040853023529052734 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008486032485961914 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 312 not send, decode is busy -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 320 not send, decode is busy -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00830388069152832 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 336 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003463268280029297 s -INFO 06-24 21:57:23 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 288 -DEBUG 06-24 21:57:23 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:23 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:23 [infer_batch.py:156] radix refed token num 7383 -DEBUG 06-24 21:57:23 [infer_batch.py:156] radix hold token num 15833 -DEBUG 06-24 21:57:23 [infer_batch.py:156] mem manager can alloc token num 559 -DEBUG 06-24 21:57:23 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:23 [batch.py:51] router release req id 288 -INFO 06-24 21:57:23 [batch.py:51] router release req id 296 -INFO 06-24 21:57:23 [batch.py:51] router release req id 304 -INFO 06-24 21:57:23 [batch.py:51] router release req id 312 -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 296 -INFO 06-24 21:57:23 [batch.py:51] router release req id 320 -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 304 -INFO 06-24 21:57:23 [batch.py:51] router release req id 328 -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 312 -INFO 06-24 21:57:23 [batch.py:51] router release req id 336 -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 320 -INFO 06-24 21:57:23 [batch.py:51] router release req id 344 -INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009533166885375977 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 328 not send, decode is busy -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 336 not send, decode is busy -INFO 06-24 21:57:23 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 344 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009120464324951172 s -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 328 -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 336 -INFO 06-24 21:57:23 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 344 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00877833366394043 s -INFO 06-24 21:57:23 [prefill_trans_obj.py:166] prefill node kv move task req_id: 344 not send, decode is busy -INFO 06-24 21:57:23 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 344 -DEBUG 06-24 21:57:23 [manager.py:391] Prefill Batch: batch_id=24528051848475343217428906132102045920, time:1750773443.890494s req_ids:[352, 360, 368, 376, 384, 392, 400] -DEBUG 06-24 21:57:23 [manager.py:391] -INFO 06-24 21:57:24 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:24 [manager.py:162] detoken release req id 352 -INFO 06-24 21:57:24 [manager.py:162] detoken release req id 360 -INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:352 first_token_cost:8504.795789718628ms total_cost_time:8504.839897155762ms,out_token_counter:1 mean_per_token_cost_time: 0.04410743713378906ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:24 [manager.py:162] detoken release req id 368 -INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:360 first_token_cost:8499.641180038452ms total_cost_time:8499.666452407837ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:368 first_token_cost:8494.734048843384ms total_cost_time:8494.755506515503ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:24 [manager.py:162] detoken release req id 376 -INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:376 first_token_cost:8490.172386169434ms total_cost_time:8490.193605422974ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:24 [manager.py:162] detoken release req id 384 -INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:384 first_token_cost:8485.517740249634ms total_cost_time:8485.54015159607ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:24 [manager.py:162] detoken release req id 392 -INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:392 first_token_cost:8480.722188949585ms total_cost_time:8480.743408203125ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:24 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:400 first_token_cost:8475.736141204834ms total_cost_time:8475.756406784058ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:24 [manager.py:162] detoken release req id 400 -INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0008697509765625 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012676715850830078 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 352 not send, decode is busy -INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 360 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009419918060302734 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 368 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004806995391845703 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 360 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009534835815429688 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 360 not send, decode is busy -INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 368 not send, decode is busy -INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 376 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009966135025024414 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 384 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004705905914306641 s -INFO 06-24 21:57:24 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 352 -DEBUG 06-24 21:57:24 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:24 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:24 [infer_batch.py:156] radix refed token num 6332 -DEBUG 06-24 21:57:24 [infer_batch.py:156] radix hold token num 15824 -DEBUG 06-24 21:57:24 [infer_batch.py:156] mem manager can alloc token num 568 -DEBUG 06-24 21:57:24 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 360 -INFO 06-24 21:57:24 [batch.py:51] router release req id 352 -INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 368 -INFO 06-24 21:57:24 [batch.py:51] router release req id 360 -INFO 06-24 21:57:24 [batch.py:51] router release req id 368 -INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 376 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010133743286132812 s -INFO 06-24 21:57:24 [batch.py:51] router release req id 376 -INFO 06-24 21:57:24 [batch.py:51] router release req id 384 -INFO 06-24 21:57:24 [batch.py:51] router release req id 392 -INFO 06-24 21:57:24 [batch.py:51] router release req id 400 -INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 376 not send, decode is busy -INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 384 not send, decode is busy -INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.011520147323608398 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 400 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007290840148925781 s -INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 376 -INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 384 -INFO 06-24 21:57:24 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009365081787109375 s -INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 392 not send, decode is busy -INFO 06-24 21:57:24 [prefill_trans_obj.py:166] prefill node kv move task req_id: 400 not send, decode is busy -INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 392 -INFO 06-24 21:57:24 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 400 -DEBUG 06-24 21:57:24 [manager.py:391] Prefill Batch: batch_id=191998082800694326435444267982571911092, time:1750773444.8943431s req_ids:[408, 416, 424, 432, 440, 448, 456] -DEBUG 06-24 21:57:24 [manager.py:391] -INFO 06-24 21:57:25 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:25 [manager.py:162] detoken release req id 408 -INFO 06-24 21:57:25 [manager.py:162] detoken release req id 416 -INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:408 first_token_cost:9469.240427017212ms total_cost_time:9469.283819198608ms,out_token_counter:1 mean_per_token_cost_time: 0.043392181396484375ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:25 [manager.py:162] detoken release req id 424 -INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:416 first_token_cost:9464.5094871521ms total_cost_time:9464.535474777222ms,out_token_counter:1 mean_per_token_cost_time: 0.025987625122070312ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:25 [manager.py:162] detoken release req id 432 -INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:424 first_token_cost:9459.718704223633ms total_cost_time:9459.740400314331ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:432 first_token_cost:9454.83422279358ms total_cost_time:9454.855680465698ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:25 [manager.py:162] detoken release req id 440 -INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:440 first_token_cost:9450.320720672607ms total_cost_time:9450.342893600464ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:25 [manager.py:162] detoken release req id 448 -INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:448 first_token_cost:9445.702314376831ms total_cost_time:9445.725440979004ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:25 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:456 first_token_cost:9441.13302230835ms total_cost_time:9441.15424156189ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:25 [manager.py:162] detoken release req id 456 -INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 408 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004118204116821289 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 408 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009727954864501953 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 408 not send, decode is busy -INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 416 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0096893310546875 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 424 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004786968231201172 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 416 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00877523422241211 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 416 not send, decode is busy -INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 424 not send, decode is busy -INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 432 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009337425231933594 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 440 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004602670669555664 s -INFO 06-24 21:57:25 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 408 -DEBUG 06-24 21:57:25 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:25 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:25 [infer_batch.py:156] radix refed token num 6334 -DEBUG 06-24 21:57:25 [infer_batch.py:156] radix hold token num 15836 -DEBUG 06-24 21:57:25 [infer_batch.py:156] mem manager can alloc token num 556 -DEBUG 06-24 21:57:25 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:25 [batch.py:51] router release req id 408 -INFO 06-24 21:57:25 [batch.py:51] router release req id 416 -INFO 06-24 21:57:25 [batch.py:51] router release req id 424 -INFO 06-24 21:57:25 [batch.py:51] router release req id 432 -INFO 06-24 21:57:25 [batch.py:51] router release req id 440 -INFO 06-24 21:57:25 [batch.py:51] router release req id 448 -INFO 06-24 21:57:25 [batch.py:51] router release req id 456 -INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 432 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011211633682250977 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 432 not send, decode is busy -INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 440 not send, decode is busy -INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 416 -INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 448 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.01125788688659668 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 456 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00641179084777832 s -INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 424 -INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 432 -INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 440 -INFO 06-24 21:57:25 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 448 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011875152587890625 s -INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 448 not send, decode is busy -INFO 06-24 21:57:25 [prefill_trans_obj.py:166] prefill node kv move task req_id: 456 not send, decode is busy -INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 448 -INFO 06-24 21:57:25 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 456 -DEBUG 06-24 21:57:25 [manager.py:391] Prefill Batch: batch_id=218303380001311569318333645359995508613, time:1750773445.8957012s req_ids:[464, 472, 480, 488, 496, 504, 512] -DEBUG 06-24 21:57:25 [manager.py:391] -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 current batch size: 7 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 estimated_peak_token_count: 7405 -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 21:57:25 [manager.py:248] dp_i 0 token used ratio: 0.9660810151293314 contain prompt cache tree unrefed token -DEBUG 06-24 21:57:26 [stats.py:37] Avg tokens(prompt+generate) throughput: 5661.956 tokens/s -DEBUG 06-24 21:57:26 [stats.py:37] Avg prompt tokens throughput: 5650.788 tokens/s -DEBUG 06-24 21:57:26 [stats.py:37] Avg generate tokens throughput: 11.169 tokens/s -INFO 06-24 21:57:26 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:26 [manager.py:162] detoken release req id 464 -INFO 06-24 21:57:26 [manager.py:162] detoken release req id 472 -INFO 06-24 21:57:26 [manager.py:162] detoken release req id 480 -INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:464 first_token_cost:10434.598922729492ms total_cost_time:10434.705018997192ms,out_token_counter:1 mean_per_token_cost_time: 0.10609626770019531ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:472 first_token_cost:10430.545806884766ms total_cost_time:10430.574655532837ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:26 [manager.py:162] detoken release req id 488 -INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:480 first_token_cost:10425.76789855957ms total_cost_time:10425.791263580322ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:26 [manager.py:162] detoken release req id 496 -INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:488 first_token_cost:10420.909881591797ms total_cost_time:10420.931339263916ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:26 [manager.py:162] detoken release req id 504 -INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:496 first_token_cost:10416.791915893555ms total_cost_time:10416.813850402832ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:26 [manager.py:162] detoken release req id 512 -INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:504 first_token_cost:10412.447452545166ms total_cost_time:10412.468910217285ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:26 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:512 first_token_cost:10408.00404548645ms total_cost_time:10408.025979995728ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 464 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0048732757568359375 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 464 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011396169662475586 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 464 not send, decode is busy -INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 472 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.011648893356323242 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 480 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.006359100341796875 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 488 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0016443729400634766 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 472 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010746479034423828 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 472 not send, decode is busy -INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 480 not send, decode is busy -INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 488 not send, decode is busy -INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 496 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008312463760375977 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 504 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0033416748046875 s -INFO 06-24 21:57:26 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 464 -DEBUG 06-24 21:57:26 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:26 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:26 [infer_batch.py:156] radix refed token num 6338 -DEBUG 06-24 21:57:26 [infer_batch.py:156] radix hold token num 15846 -DEBUG 06-24 21:57:26 [infer_batch.py:156] mem manager can alloc token num 546 -DEBUG 06-24 21:57:26 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 472 -INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 480 -INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 488 -INFO 06-24 21:57:26 [batch.py:51] router release req id 464 -INFO 06-24 21:57:26 [batch.py:51] router release req id 472 -INFO 06-24 21:57:26 [batch.py:51] router release req id 480 -INFO 06-24 21:57:26 [batch.py:51] router release req id 488 -INFO 06-24 21:57:26 [batch.py:51] router release req id 496 -INFO 06-24 21:57:26 [batch.py:51] router release req id 504 -INFO 06-24 21:57:26 [batch.py:51] router release req id 512 -INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 496 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010193109512329102 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 496 not send, decode is busy -INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 504 not send, decode is busy -INFO 06-24 21:57:26 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 512 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00936269760131836 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 512 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008563041687011719 s -INFO 06-24 21:57:26 [prefill_trans_obj.py:166] prefill node kv move task req_id: 512 not send, decode is busy -INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 496 -INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 504 -INFO 06-24 21:57:26 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 512 -DEBUG 06-24 21:57:26 [manager.py:391] Prefill Batch: batch_id=161650577233271773526771599162437663158, time:1750773446.900781s req_ids:[520, 528, 536, 544, 552, 560, 568, 576] -DEBUG 06-24 21:57:26 [manager.py:391] -INFO 06-24 21:57:27 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 520 -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 528 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:520 first_token_cost:11542.181491851807ms total_cost_time:11542.22321510315ms,out_token_counter:1 mean_per_token_cost_time: 0.04172325134277344ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:528 first_token_cost:11537.727355957031ms total_cost_time:11537.752628326416ms,out_token_counter:1 mean_per_token_cost_time: 0.025272369384765625ms prompt_token_num:1045 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 536 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:536 first_token_cost:11533.485889434814ms total_cost_time:11533.507585525513ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 544 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:544 first_token_cost:11529.413938522339ms total_cost_time:11529.436111450195ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 552 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:552 first_token_cost:11525.158405303955ms total_cost_time:11525.179624557495ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:560 first_token_cost:11521.02780342102ms total_cost_time:11521.04902267456ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 560 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:568 first_token_cost:11516.887664794922ms total_cost_time:11516.90936088562ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 568 -INFO 06-24 21:57:27 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:576 first_token_cost:11512.858629226685ms total_cost_time:11512.879371643066ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:27 [manager.py:162] detoken release req id 576 -INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 520 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007232189178466797 s -INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 528 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0026750564575195312 s -INFO 06-24 21:57:27 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 520 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.012886285781860352 s -INFO 06-24 21:57:27 [prefill_trans_obj.py:166] prefill node kv move task req_id: 520 not send, decode is busy -INFO 06-24 21:57:27 [prefill_trans_obj.py:166] prefill node kv move task req_id: 528 not send, decode is busy -INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.011229753494262695 s -INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 544 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.006566762924194336 s -INFO 06-24 21:57:27 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 552 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0017480850219726562 s -INFO 06-24 21:57:28 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009779691696166992 s -INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 536 not send, decode is busy -INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 544 not send, decode is busy -INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 552 not send, decode is busy -INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007314920425415039 s -INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 568 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0025243759155273438 s -INFO 06-24 21:57:28 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 520 -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 528 -DEBUG 06-24 21:57:28 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:28 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:28 [infer_batch.py:156] radix refed token num 6346 -DEBUG 06-24 21:57:28 [infer_batch.py:156] radix hold token num 15840 -DEBUG 06-24 21:57:28 [infer_batch.py:156] mem manager can alloc token num 552 -DEBUG 06-24 21:57:28 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:28 [batch.py:51] router release req id 520 -INFO 06-24 21:57:28 [batch.py:51] router release req id 528 -INFO 06-24 21:57:28 [batch.py:51] router release req id 536 -INFO 06-24 21:57:28 [batch.py:51] router release req id 544 -INFO 06-24 21:57:28 [batch.py:51] router release req id 552 -INFO 06-24 21:57:28 [batch.py:51] router release req id 560 -INFO 06-24 21:57:28 [batch.py:51] router release req id 568 -INFO 06-24 21:57:28 [batch.py:51] router release req id 576 -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 536 -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 544 -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 552 -INFO 06-24 21:57:28 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.009374856948852539 s -INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 560 not send, decode is busy -INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 568 not send, decode is busy -INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0072596073150634766 s -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 560 -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 568 -INFO 06-24 21:57:28 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010232925415039062 s -INFO 06-24 21:57:28 [prefill_trans_obj.py:166] prefill node kv move task req_id: 576 not send, decode is busy -INFO 06-24 21:57:28 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 576 -DEBUG 06-24 21:57:28 [manager.py:391] Prefill Batch: batch_id=67017997309100946241665083556177100204, time:1750773448.0456958s req_ids:[584, 592, 600, 608, 616, 624, 632] -DEBUG 06-24 21:57:28 [manager.py:391] -INFO 06-24 21:57:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:57:28 [statics_utils.py:24] mean first cost: 7008.399413691626 ms -INFO 06-24 21:57:28 [statics_utils.py:24] mean per token cost: 0.027954578399658203 ms -INFO 06-24 21:57:28 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:28 [manager.py:162] detoken release req id 584 -INFO 06-24 21:57:28 [manager.py:162] detoken release req id 592 -INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:584 first_token_cost:12511.72685623169ms total_cost_time:12511.76929473877ms,out_token_counter:1 mean_per_token_cost_time: 0.042438507080078125ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:592 first_token_cost:12507.427453994751ms total_cost_time:12507.45415687561ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:28 [manager.py:162] detoken release req id 600 -INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:600 first_token_cost:12503.117799758911ms total_cost_time:12503.142595291138ms,out_token_counter:1 mean_per_token_cost_time: 0.0247955322265625ms prompt_token_num:1071 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:28 [manager.py:162] detoken release req id 608 -INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:608 first_token_cost:12498.862743377686ms total_cost_time:12498.884439468384ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:616 first_token_cost:12494.251251220703ms total_cost_time:12494.272232055664ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:28 [manager.py:162] detoken release req id 616 -INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:624 first_token_cost:12489.952325820923ms total_cost_time:12489.9742603302ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:28 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:632 first_token_cost:12485.601663589478ms total_cost_time:12485.623121261597ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:28 [manager.py:162] detoken release req id 624 -INFO 06-24 21:57:28 [manager.py:162] detoken release req id 632 -INFO 06-24 21:57:28 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.002023935317993164 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011422157287597656 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 584 not send, decode is busy -INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 592 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009478092193603516 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 600 in_len:1071 v_len: 1071 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0046541690826416016 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 592 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008721351623535156 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 592 not send, decode is busy -INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 600 not send, decode is busy -INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.009311199188232422 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 616 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004762172698974609 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.0055027008056640625 s -INFO 06-24 21:57:29 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 608 not send, decode is busy -INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 616 not send, decode is busy -INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 584 -INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005312681198120117 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 632 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0006544589996337891 s -DEBUG 06-24 21:57:29 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:29 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:29 [infer_batch.py:156] radix refed token num 6347 -DEBUG 06-24 21:57:29 [infer_batch.py:156] radix hold token num 15857 -DEBUG 06-24 21:57:29 [infer_batch.py:156] mem manager can alloc token num 535 -DEBUG 06-24 21:57:29 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 592 -INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 600 -INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 608 -INFO 06-24 21:57:29 [batch.py:51] router release req id 584 -INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 616 -INFO 06-24 21:57:29 [batch.py:51] router release req id 592 -INFO 06-24 21:57:29 [batch.py:51] router release req id 600 -INFO 06-24 21:57:29 [batch.py:51] router release req id 608 -INFO 06-24 21:57:29 [batch.py:51] router release req id 616 -INFO 06-24 21:57:29 [batch.py:51] router release req id 624 -INFO 06-24 21:57:29 [batch.py:51] router release req id 632 -INFO 06-24 21:57:29 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007875204086303711 s -INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 624 not send, decode is busy -INFO 06-24 21:57:29 [prefill_trans_obj.py:166] prefill node kv move task req_id: 632 not send, decode is busy -INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 624 -INFO 06-24 21:57:29 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 632 -DEBUG 06-24 21:57:29 [manager.py:391] Prefill Batch: batch_id=47045407989267376152470951590131429777, time:1750773449.049589s req_ids:[640, 648, 656, 664, 672, 680, 688] -DEBUG 06-24 21:57:29 [manager.py:391] -INFO 06-24 21:57:29 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:29 [manager.py:162] detoken release req id 640 -INFO 06-24 21:57:29 [manager.py:162] detoken release req id 648 -INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:640 first_token_cost:13478.892087936401ms total_cost_time:13478.918552398682ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:648 first_token_cost:13474.328994750977ms total_cost_time:13474.342823028564ms,out_token_counter:1 mean_per_token_cost_time: 0.013828277587890625ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:29 [manager.py:162] detoken release req id 656 -INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:656 first_token_cost:13469.989776611328ms total_cost_time:13470.000267028809ms,out_token_counter:1 mean_per_token_cost_time: 0.01049041748046875ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:664 first_token_cost:13465.176105499268ms total_cost_time:13465.185642242432ms,out_token_counter:1 mean_per_token_cost_time: 0.0095367431640625ms prompt_token_num:1043 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:29 [manager.py:162] detoken release req id 664 -INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:672 first_token_cost:13460.595607757568ms total_cost_time:13460.603952407837ms,out_token_counter:1 mean_per_token_cost_time: 0.008344650268554688ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:680 first_token_cost:13455.767631530762ms total_cost_time:13455.775499343872ms,out_token_counter:1 mean_per_token_cost_time: 0.007867813110351562ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:29 [manager.py:162] detoken release req id 672 -INFO 06-24 21:57:29 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:688 first_token_cost:13451.16138458252ms total_cost_time:13451.169967651367ms,out_token_counter:1 mean_per_token_cost_time: 0.00858306884765625ms prompt_token_num:1069 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:29 [manager.py:162] detoken release req id 680 -INFO 06-24 21:57:29 [manager.py:162] detoken release req id 688 -INFO 06-24 21:57:29 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 640 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003117799758911133 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 640 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.006980419158935547 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 640 not send, decode is busy -INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0006282329559326172 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.003368854522705078 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 648 not send, decode is busy -INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 656 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004578828811645508 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 656 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.0031185150146484375 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 656 not send, decode is busy -INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 664 in_len:1043 v_len: 1043 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.008547067642211914 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 664 in_len:1043 v_len: 1043 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.003646373748779297 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 664 not send, decode is busy -INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 672 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0026831626892089844 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 672 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.002583026885986328 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 672 not send, decode is busy -INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005922794342041016 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.003292560577392578 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 680 not send, decode is busy -INFO 06-24 21:57:30 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 640 -DEBUG 06-24 21:57:30 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:30 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:30 [infer_batch.py:156] radix refed token num 6342 -DEBUG 06-24 21:57:30 [infer_batch.py:156] radix hold token num 15883 -DEBUG 06-24 21:57:30 [infer_batch.py:156] mem manager can alloc token num 509 -DEBUG 06-24 21:57:30 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 648 -INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 656 -INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 664 -INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 672 -INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 680 -INFO 06-24 21:57:30 [batch.py:51] router release req id 640 -INFO 06-24 21:57:30 [batch.py:51] router release req id 648 -INFO 06-24 21:57:30 [batch.py:51] router release req id 656 -INFO 06-24 21:57:30 [batch.py:51] router release req id 664 -INFO 06-24 21:57:30 [batch.py:51] router release req id 672 -INFO 06-24 21:57:30 [batch.py:51] router release req id 680 -INFO 06-24 21:57:30 [batch.py:51] router release req id 688 -INFO 06-24 21:57:30 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.012074470520019531 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010558128356933594 s -INFO 06-24 21:57:30 [prefill_trans_obj.py:166] prefill node kv move task req_id: 688 not send, decode is busy -INFO 06-24 21:57:30 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 688 -DEBUG 06-24 21:57:30 [manager.py:391] Prefill Batch: batch_id=248254461732847854709012582094359501133, time:1750773450.086136s req_ids:[696, 704, 712, 720, 728, 736, 744, 752] -DEBUG 06-24 21:57:30 [manager.py:391] -INFO 06-24 21:57:31 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 696 -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 704 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:696 first_token_cost:14621.362209320068ms total_cost_time:14621.402740478516ms,out_token_counter:1 mean_per_token_cost_time: 0.040531158447265625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:704 first_token_cost:14617.124557495117ms total_cost_time:14617.151260375977ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 712 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:712 first_token_cost:14612.886428833008ms total_cost_time:14612.910747528076ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 720 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:720 first_token_cost:14608.647346496582ms total_cost_time:14608.66928100586ms,out_token_counter:1 mean_per_token_cost_time: 0.02193450927734375ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:728 first_token_cost:14604.609727859497ms total_cost_time:14604.631900787354ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 728 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:736 first_token_cost:14600.08454322815ms total_cost_time:14600.10552406311ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 736 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:744 first_token_cost:14595.526695251465ms total_cost_time:14595.547914505005ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:752 first_token_cost:14591.10713005066ms total_cost_time:14591.128587722778ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 744 -INFO 06-24 21:57:31 [manager.py:162] detoken release req id 752 -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 696 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005808353424072266 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 696 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010880708694458008 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 696 not send, decode is busy -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010863780975341797 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 712 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0048520565032958984 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011660099029541016 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 704 not send, decode is busy -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 712 not send, decode is busy -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 720 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010832786560058594 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 728 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004339456558227539 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 720 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011260509490966797 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 720 not send, decode is busy -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 728 not send, decode is busy -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 736 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010160446166992188 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 744 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004326820373535156 s -INFO 06-24 21:57:31 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 696 -DEBUG 06-24 21:57:31 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:57:31 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:31 [infer_batch.py:156] radix refed token num 7375 -DEBUG 06-24 21:57:31 [infer_batch.py:156] radix hold token num 15828 -DEBUG 06-24 21:57:31 [infer_batch.py:156] mem manager can alloc token num 564 -DEBUG 06-24 21:57:31 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 704 -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 712 -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 720 -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 728 -INFO 06-24 21:57:31 [batch.py:51] router release req id 696 -INFO 06-24 21:57:31 [batch.py:51] router release req id 704 -INFO 06-24 21:57:31 [batch.py:51] router release req id 712 -INFO 06-24 21:57:31 [batch.py:51] router release req id 720 -INFO 06-24 21:57:31 [batch.py:51] router release req id 728 -INFO 06-24 21:57:31 [batch.py:51] router release req id 736 -INFO 06-24 21:57:31 [batch.py:51] router release req id 744 -INFO 06-24 21:57:31 [batch.py:51] router release req id 752 -INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 736 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011093854904174805 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 736 not send, decode is busy -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 744 not send, decode is busy -INFO 06-24 21:57:31 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010010242462158203 s -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 736 -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 744 -INFO 06-24 21:57:31 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.008131027221679688 s -INFO 06-24 21:57:31 [prefill_trans_obj.py:166] prefill node kv move task req_id: 752 not send, decode is busy -INFO 06-24 21:57:31 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 752 -DEBUG 06-24 21:57:31 [manager.py:391] Prefill Batch: batch_id=159894059491602842148363584452156276559, time:1750773451.243592s req_ids:[760, 768, 776, 784, 792, 800] -DEBUG 06-24 21:57:31 [manager.py:391] -INFO 06-24 21:57:32 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:57:32 [manager.py:162] detoken release req id 760 -INFO 06-24 21:57:32 [manager.py:162] detoken release req id 768 -INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:760 first_token_cost:15483.526229858398ms total_cost_time:15483.655452728271ms,out_token_counter:1 mean_per_token_cost_time: 0.12922286987304688ms prompt_token_num:1064 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:32 [manager.py:162] detoken release req id 776 -INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:768 first_token_cost:15479.77352142334ms total_cost_time:15479.809999465942ms,out_token_counter:1 mean_per_token_cost_time: 0.03647804260253906ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:776 first_token_cost:15475.495100021362ms total_cost_time:15475.528001785278ms,out_token_counter:1 mean_per_token_cost_time: 0.032901763916015625ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:32 [manager.py:162] detoken release req id 784 -INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:784 first_token_cost:15470.622777938843ms total_cost_time:15470.65258026123ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:792 first_token_cost:15466.00604057312ms total_cost_time:15466.034650802612ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:32 [manager.py:162] detoken release req id 792 -INFO 06-24 21:57:32 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:57:16 lightllm_req_id:800 first_token_cost:15461.296558380127ms total_cost_time:15461.327075958252ms,out_token_counter:1 mean_per_token_cost_time: 0.030517578125ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:57:32 [manager.py:162] detoken release req id 800 -INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 760 in_len:1064 v_len: 1064 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010465860366821289 s -INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 768 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004469871520996094 s -INFO 06-24 21:57:32 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 760 in_len:1064 v_len: 1064 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011321306228637695 s -INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 760 not send, decode is busy -INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 768 not send, decode is busy -INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 776 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.010179758071899414 s -INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 784 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.004068851470947266 s -INFO 06-24 21:57:32 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 21:57:32 [req_manager.py:78] freed all request size 136 -INFO 06-24 21:57:32 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 776 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.010899782180786133 s -INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 776 not send, decode is busy -INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 784 not send, decode is busy -DEBUG 06-24 21:57:32 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:57:32 [infer_batch.py:156] radix refed token num 6350 -DEBUG 06-24 21:57:32 [infer_batch.py:156] radix hold token num 15847 -DEBUG 06-24 21:57:32 [infer_batch.py:156] mem manager can alloc token num 545 -DEBUG 06-24 21:57:32 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 792 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.00947117805480957 s -INFO 06-24 21:57:32 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 800 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003470897674560547 s -INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 760 -INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 768 -INFO 06-24 21:57:32 [batch.py:51] router release req id 760 -INFO 06-24 21:57:32 [batch.py:51] router release req id 768 -INFO 06-24 21:57:32 [batch.py:51] router release req id 776 -INFO 06-24 21:57:32 [batch.py:51] router release req id 784 -INFO 06-24 21:57:32 [batch.py:51] router release req id 792 -INFO 06-24 21:57:32 [batch.py:51] router release req id 800 -INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 776 -INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 784 -INFO 06-24 21:57:32 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 792 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.011971712112426758 s -INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 792 not send, decode is busy -INFO 06-24 21:57:32 [prefill_trans_obj.py:166] prefill node kv move task req_id: 800 not send, decode is busy -INFO 06-24 21:57:32 [shm_req_manager.py:119] all shm req has been release ok -INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 792 -INFO 06-24 21:57:32 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 800 -DEBUG 06-24 21:57:36 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:57:36 [manager.py:283] -DEBUG 06-24 21:57:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:57:36 [manager.py:284] -INFO 06-24 21:57:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:57:58 [statics_utils.py:24] mean first cost: 8960.396783351898 ms -INFO 06-24 21:57:58 [statics_utils.py:24] mean per token cost: 0.027647018432617188 ms -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:18 [manager.py:590] aborted group_request_id not exist -INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:808 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_99 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_99 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_99 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_99 -INFO 06-24 21:58:18 [manager.py:224] router recive req id 808 cost time 0.03365135192871094 s -INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:816 -INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 808 cost time 0.035724639892578125 s -DEBUG 06-24 21:58:18 [manager.py:391] Prefill Batch: batch_id=116507823906490739156615915078303123438, time:1750773498.0802565s req_ids:[808] -DEBUG 06-24 21:58:18 [manager.py:391] -DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 current batch size: 1 -DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 estimated_peak_token_count: 1059 -DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 21:58:18 [manager.py:248] dp_i 0 token used ratio: 0.9667520741825281 contain prompt cache tree unrefed token -DEBUG 06-24 21:58:18 [stats.py:37] Avg tokens(prompt+generate) throughput: 764.296 tokens/s -DEBUG 06-24 21:58:18 [stats.py:37] Avg prompt tokens throughput: 762.754 tokens/s -DEBUG 06-24 21:58:18 [stats.py:37] Avg generate tokens throughput: 1.541 tokens/s -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_98 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_98 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_98 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_98 -INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:824 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_97 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_97 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_97 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_97 -INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:832 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_96 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_96 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_96 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_96 -INFO 06-24 21:58:18 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:840 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_95 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_logprobs_95 -WARNING 06-24 21:58:18 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_95 and create again -INFO 06-24 21:58:18 [shm_array.py:30] create shm 2732_0_shm_prompts_95 -INFO 06-24 21:58:18 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:58:18 [manager.py:162] detoken release req id 808 -INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:808 first_token_cost:207.29327201843262ms total_cost_time:207.34190940856934ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:58:18 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 21:58:18 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:58:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:58:18 [infer_batch.py:156] radix refed token num 1058 -DEBUG 06-24 21:58:18 [infer_batch.py:156] radix hold token num 15836 -DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager can alloc token num 556 -DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:58:18 [batch.py:51] router release req id 808 -INFO 06-24 21:58:18 [manager.py:224] router recive req id 816 cost time 0.17960238456726074 s -INFO 06-24 21:58:18 [manager.py:224] router recive req id 824 cost time 0.16028761863708496 s -INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 816 cost time 0.1804039478302002 s -INFO 06-24 21:58:18 [manager.py:224] router recive req id 832 cost time 0.14226198196411133 s -INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 824 cost time 0.1611027717590332 s -INFO 06-24 21:58:18 [manager.py:224] router recive req id 840 cost time 0.12623143196105957 s -INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 832 cost time 0.14295721054077148 s -INFO 06-24 21:58:18 [manager.py:68] detokenization recv req id 840 cost time 0.12695002555847168 s -INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 808 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.005694866180419922 s -INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 808 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.00798654556274414 s -INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 808 not send, decode is busy -INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 808 -DEBUG 06-24 21:58:18 [manager.py:391] Prefill Batch: batch_id=180573588197561696527024042514760098171, time:1750773498.271277s req_ids:[816, 824, 832, 840] -DEBUG 06-24 21:58:18 [manager.py:391] -INFO 06-24 21:58:18 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 21:58:18 [manager.py:162] detoken release req id 816 -INFO 06-24 21:58:18 [manager.py:162] detoken release req id 824 -INFO 06-24 21:58:18 [manager.py:162] detoken release req id 832 -INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:816 first_token_cost:766.4666175842285ms total_cost_time:766.5369510650635ms,out_token_counter:1 mean_per_token_cost_time: 0.07033348083496094ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:58:18 [manager.py:162] detoken release req id 840 -INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:824 first_token_cost:747.0569610595703ms total_cost_time:747.0846176147461ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:832 first_token_cost:728.865385055542ms total_cost_time:728.8897037506104ms,out_token_counter:1 mean_per_token_cost_time: 0.024318695068359375ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:58:18 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 21:58:18 lightllm_req_id:840 first_token_cost:712.9158973693848ms total_cost_time:712.9440307617188ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 816 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.007913351058959961 s -INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 824 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0014026165008544922 s -INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 816 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007066965103149414 s -INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 816 not send, decode is busy -INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 824 not send, decode is busy -INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.0028319358825683594 s -INFO 06-24 21:58:18 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 816 -INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 824 -DEBUG 06-24 21:58:18 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 21:58:18 [infer_batch.py:156] free a batch state: -DEBUG 06-24 21:58:18 [infer_batch.py:156] radix refed token num 2115 -DEBUG 06-24 21:58:18 [infer_batch.py:156] radix hold token num 15834 -DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager can alloc token num 558 -DEBUG 06-24 21:58:18 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 21:58:18 [batch.py:51] router release req id 816 -INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007506370544433594 s -INFO 06-24 21:58:18 [batch.py:51] router release req id 824 -INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 832 not send, decode is busy -INFO 06-24 21:58:18 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 840 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc queue time 0.003538370132446289 s -INFO 06-24 21:58:18 [batch.py:51] router release req id 832 -INFO 06-24 21:58:18 [batch.py:51] router release req id 840 -INFO 06-24 21:58:18 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 840 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: 11e3f6d2-2b1a-4c7c-8ebe-e30ab6464dbc cost time: 0.007813692092895508 s -INFO 06-24 21:58:18 [prefill_trans_obj.py:166] prefill node kv move task req_id: 840 not send, decode is busy -INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 832 -INFO 06-24 21:58:18 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 840 -INFO 06-24 21:58:18 [shm_req_manager.py:119] all shm req has been release ok -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:19 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:20 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:21 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:22 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:23 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:24 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:25 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:26 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:28 [manager.py:590] aborted group_request_id not exist -INFO 06-24 21:58:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:58:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms -INFO 06-24 21:58:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:29 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:30 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:31 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:58:32 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 21:58:38 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:58:38 [manager.py:283] -DEBUG 06-24 21:58:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:58:38 [manager.py:284] -INFO 06-24 21:58:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:58:58 [statics_utils.py:24] mean first cost: 8563.83120445978 ms -INFO 06-24 21:58:58 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 21:59:18 [manager.py:590] aborted group_request_id not exist -INFO 06-24 21:59:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:59:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms -INFO 06-24 21:59:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms -DEBUG 06-24 21:59:38 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:59:38 [manager.py:283] -DEBUG 06-24 21:59:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:59:38 [manager.py:284] -INFO 06-24 21:59:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:59:58 [statics_utils.py:24] mean first cost: 8563.83120445978 ms -INFO 06-24 21:59:58 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms -INFO 06-24 22:00:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:00:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms -INFO 06-24 22:00:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms -DEBUG 06-24 22:00:39 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:00:39 [manager.py:283] -DEBUG 06-24 22:00:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 22:00:39 [manager.py:284] -INFO 06-24 22:00:58 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:00:58 [statics_utils.py:24] mean first cost: 8563.83120445978 ms -INFO 06-24 22:00:58 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms -INFO 06-24 22:01:28 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 22:01:28 [statics_utils.py:24] mean first cost: 8563.83120445978 ms -INFO 06-24 22:01:28 [statics_utils.py:24] mean per token cost: 0.028226489112490698 ms -ERROR 06-24 22:01:39 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 22:01:39 [pd_loop.py:121] no close frame received or sent -ERROR 06-24 22:01:39 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task -ERROR 06-24 22:01:39 [pd_loop.py:121] recv_bytes = await websocket.recv() -ERROR 06-24 22:01:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv -ERROR 06-24 22:01:39 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc -ERROR 06-24 22:01:39 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent -DEBUG 06-24 22:01:40 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 22:01:40 [manager.py:283] -DEBUG 06-24 22:01:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 22:01:40 [manager.py:284] -INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... - -ERROR 06-24 22:01:41 [prefill_kv_move_manager.py:96] -Traceback (most recent call last): - File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_kv_move_manager.py", line 85, in task_dispatcher_loop - move_task: KVMoveTask = self.info_queue.get() - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/queues.py", line 103, in get - res = self._recv_bytes() - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 216, in recv_bytes - buf = self._recv_bytes(maxlength) - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes - buf = self._recv(4) - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 379, in _recv - chunk = read(handle, remaining) -KeyboardInterrupt -INFO 06-24 22:01:41 [start_utils.py:106] Killing child process 1414656 -INFO 06-24 22:01:41 [start_utils.py:106] Killing child process 1415230 -INFO 06-24 22:01:41 [start_utils.py:108] Killing parent process 1414655 -INFO 06-24 22:01:41 [start_utils.py:53] Killing parent process 1411713 -INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 22:01:41 [start_utils.py:108] Killing parent process 1414655 -INFO 06-24 22:01:41 [start_utils.py:51] Killing child process 1413850 -INFO 06-24 22:01:41 [start_utils.py:51] Killing child process 1414262 -INFO 06-24 22:01:41 [start_utils.py:53] Killing parent process 1411872 -INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 22:01:41 [start_utils.py:108] Killing parent process 1414655 -INFO 06-24 22:01:41 [start_utils.py:51] Killing child process 1413850 -INFO 06-24 22:01:41 [start_utils.py:53] Killing parent process 1411872 -INFO 06-24 22:01:41 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... -INFO 06-24 22:01:42 [start_utils.py:108] Killing parent process 1414655 -INFO 06-24 22:01:42 [start_utils.py:53] Killing parent process 1411872 -INFO 06-24 22:01:42 [start_utils.py:69] All processes terminated gracefully. -INFO 06-24 22:01:42 [api_start.py:30] All processes have been forcefully terminated. diff --git a/pd_p_4096.log b/pd_p_4096.log deleted file mode 100644 index 3c689fff0..000000000 --- a/pd_p_4096.log +++ /dev/null @@ -1,2843 +0,0 @@ -INFO 06-24 19:53:53 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:53:54 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:53:55 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:53:57 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:53:57 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:53:57 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:53:57 [api_start.py:79] zmq mode head: ipc:///tmp/_2732_0_ -INFO 06-24 19:53:57 [api_start.py:81] use tgi api: False -INFO 06-24 19:53:57 [api_start.py:192] alloced ports: [10173, 10076, 10098, 10080, 10160, 10233, 10089, 10220, 10247] -INFO 06-24 19:53:57 [api_start.py:233] all start args:Namespace(run_mode='prefill', host='127.0.1.1', port=8017, httpserver_workers=1, zmq_mode='ipc:///tmp/_2732_0_', pd_master_ip='127.0.1.1', pd_master_port=60011, pd_decode_rpyc_port=42000, config_server_host=None, config_server_port=None, model_name='default_model_name', model_dir='/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', tokenizer_mode='fast', load_way='HF', max_total_token_num=16392, mem_fraction=0.9, batch_max_tokens=8448, eos_id=[151643], tool_call_parser=None, running_max_req_size=128, nnodes=1, node_rank=0, multinode_httpmanager_port=12345, multinode_router_gloo_port=20001, tp=1, dp=1, max_req_total_len=16000, nccl_host='127.0.0.1', nccl_port=2732, use_config_server_to_init_nccl=False, mode=[], trust_remote_code=False, disable_log_stats=False, log_stats_interval=10, router_token_ratio=0.0, router_max_new_token_len=1024, router_max_wait_tokens=6, disable_aggressive_schedule=False, use_dynamic_prompt_cache=False, disable_dynamic_prompt_cache=False, chunked_prefill_size=4096, disable_chunked_prefill=False, diverse_mode=False, token_healing_mode=False, output_constraint_mode='none', first_token_constraint_mode=False, enable_multimodal=False, enable_multimodal_audio=False, enable_mps=False, disable_custom_allreduce=False, enable_custom_allgather=False, enable_tpsp_mix_mode=False, enable_prefill_microbatch_overlap=False, enable_decode_microbatch_overlap=False, enable_flashinfer_prefill=False, enable_flashinfer_decode=False, enable_fa3=False, cache_capacity=200, cache_reserved_ratio=0.5, data_type='bfloat16', return_all_prompt_logprobs=False, use_reward_model=False, long_truncation_mode=None, use_tgi_api=False, health_monitor=False, metric_gateway=None, job_name='lightllm', grouping_key=[], push_interval=10, visual_infer_batch_size=1, visual_gpu_ids=[0], visual_tp=1, visual_dp=1, visual_nccl_ports=[29500], enable_monitor_auth=False, disable_cudagraph=True, graph_max_batch_size=256, graph_split_batch_size=32, graph_grow_step_size=16, graph_max_len_in_batch=16000, quant_type='none', quant_cfg=None, vit_quant_type='none', vit_quant_cfg=None, sampling_backend='triton', ep_redundancy_expert_config_path=None, auto_update_redundancy_expert=False, mtp_mode=None, mtp_draft_model_dir=None, mtp_step=0, pd_chunk_size=0, router_port=10173, detokenization_port=10076, detokenization_pub_port=10098, visual_port=10080, audio_port=10160, cache_port=10233, metric_port=10089, pd_node_infer_rpyc_ports=[10247], pd_node_id=163479035537597727162519172725806046247, pd_p_allowed_port_min=20000, pd_p_allowed_port_max=30000) -INFO 06-24 19:53:58 [start_utils.py:37] init func start_metric_manager : init ok -INFO 06-24 19:54:00 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:01 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:02 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:03 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:03 [__init__.py:239] Automatically detected platform cuda. -INFO 06-24 19:54:04 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:05 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:05 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:05 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:05 [shm_req_manager.py:59] create lock shm 2732_0_req_shm_total -INFO 06-24 19:54:05 [atomic_array_lock.py:29] create lock shm 2732_0_array_reqs_lock -INFO 06-24 19:54:05 [atomic_lock.py:26] create lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 19:54:05 [shared_arr.py:17] create shm 2732_0_mem_manger_can_use_token_num_0 -INFO 06-24 19:54:05 [shared_arr.py:17] create shm 2732_0_shared_token_load -INFO 06-24 19:54:05 [shared_arr.py:17] create shm 2732_0_shared_token_load_ext_infos -INFO 06-24 19:54:05 [model_rpc.py:70] Initialized RPC server for rank 0. -INFO 06-24 19:54:05 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total -INFO 06-24 19:54:05 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock -INFO 06-24 19:54:05 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 19:54:05 [model_rpc.py:184] use ChunckedPrefillForPrefillNode -WARNING 06-24 19:54:05 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:05 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:05 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:06 [manager.py:41] pub_to_httpserver sendhwm 1000 -INFO 06-24 19:54:06 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total -INFO 06-24 19:54:06 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock -INFO 06-24 19:54:06 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 19:54:07 [shared_arr.py:20] link shm 2732_0_shared_token_load -INFO 06-24 19:54:07 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos -INFO 06-24 19:54:07 [shared_arr.py:17] create shm 2732_0_dp_rank_0_lock_tp_infos -INFO 06-24 19:54:07 [basemodel.py:134] Initial quantization. The default quantization method is none -INFO 06-24 19:54:07 [mem_utils.py:11] mode setting params: [] -INFO 06-24 19:54:07 [mem_utils.py:25] Model kv cache using mode normal -INFO 06-24 19:54:07 [shared_arr.py:20] link shm 2732_0_mem_manger_can_use_token_num_0 -INFO 06-24 19:54:20 [basemodel.py:652] begin check max_len infer -INFO 06-24 19:54:21 [basemodel.py:680] check max_len 8448 infer ok -INFO 06-24 19:54:21 [shared_arr.py:17] create shm 2732_0_refed_tokens_num_0 -INFO 06-24 19:54:21 [shared_arr.py:17] create shm 2732_0_tree_total_tokens_num_0 -INFO 06-24 19:54:21 [base_backend.py:135] loaded model class -INFO 06-24 19:54:21 [prefill_impl.py:36] lock_nccl_group ranks 0 -INFO 06-24 19:54:21 [shared_arr.py:20] link shm 2732_0_refed_tokens_num_0 -INFO 06-24 19:54:21 [shared_arr.py:20] link shm 2732_0_tree_total_tokens_num_0 -INFO 06-24 19:54:21 [manager.py:196] use req queue QueueForPDChunkedPrefill -INFO 06-24 19:54:23 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:24 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:26 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:28 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:28 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:28 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:28 [rpyc_fix_utils.py:85] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 19:54:28 [rpyc_fix_utils.py:113] change socket buffer from 212992 212992 change to 4194304 -INFO 06-24 19:54:28 [prefill_kv_move_manager.py:55] rpyc connect to infer rpyc port: 10247 ok -INFO 06-24 19:54:28 [net_utils.py:51] get hostname ip 127.0.1.1 -INFO 06-24 19:54:28 [prefill_trans_process.py:154] prefill trans kv process for device: 0 started! -INFO 06-24 19:54:30 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:31 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:33 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:35 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:35 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:35 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:35 [prefill_infer_rpyc.py:51] put mem manager to mem_queue ok -INFO 06-24 19:54:35 [prefill_kv_move_manager.py:246] prefill kv move manager process started -INFO 06-24 19:54:35 [start_utils.py:37] init func start_router_process : init ok -INFO 06-24 19:54:35 [start_utils.py:37] init func start_detokenization_process : init ok -INFO 06-24 19:54:35 [api_start.py:57] start process pid 1211011 -INFO 06-24 19:54:35 [api_start.py:58] http server pid 1213612 -INFO 06-24 19:54:39 [cache_tensor_manager.py:17] USE_GPU_TENSOR_CACHE is On -INFO 06-24 19:54:40 [importing.py:53] Triton module has been replaced with a placeholder. -INFO 06-24 19:54:40 [__init__.py:239] Automatically detected platform cuda. -WARNING 06-24 19:54:42 [light_utils.py:13] lightllm_kernel is not installed, you can't use the api of it. -WARNING 06-24 19:54:42 [grouped_fused_moe_ep.py:26] no deepep or deep_gemm -INFO 06-24 19:54:42 [communication_op.py:61] deep_ep is not installed, you can't use the api of it. -INFO 06-24 19:54:42 [api_http.py:326] server start up -INFO 06-24 19:54:42 [atomic_array_lock.py:32] link lock shm 2732_0_lightllm_resource_lock -INFO 06-24 19:54:42 [shm_req_manager.py:62] link lock shm 2732_0_req_shm_total -INFO 06-24 19:54:42 [atomic_array_lock.py:32] link lock shm 2732_0_array_reqs_lock -INFO 06-24 19:54:42 [atomic_lock.py:29] link lock shm 2732_0_shm_reqs_manager_lock -INFO 06-24 19:54:43 [atomic_lock.py:29] link lock shm 2732_0_req_id_gen_lock -INFO 06-24 19:54:43 [shared_arr.py:20] link shm 2732_0_latest_success_infer_time_mark -INFO 06-24 19:54:43 [shared_arr.py:20] link shm 2732_0_shared_token_load -INFO 06-24 19:54:43 [shared_arr.py:20] link shm 2732_0_shared_token_load_ext_infos -INFO 06-24 19:54:43 [api_http.py:330] server start up ok, loop use is -INFO 06-24 19:54:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:54:43 [pd_loop.py:92] Sent registration JSON: {'node_id': 163479035537597727162519172725806046247, 'client_ip_port': '127.0.1.1:8017', 'mode': 'prefill', 'start_args': {'run_mode': 'prefill', 'host': '127.0.1.1', 'port': 8017, 'httpserver_workers': 1, 'zmq_mode': 'ipc:///tmp/_2732_0_', 'pd_master_ip': '127.0.1.1', 'pd_master_port': 60011, 'pd_decode_rpyc_port': 42000, 'config_server_host': None, 'config_server_port': None, 'model_name': 'default_model_name', 'model_dir': '/mnt/youwei-data/zhuohang/model/Qwen/Qwen2.5-14B', 'tokenizer_mode': 'fast', 'load_way': 'HF', 'max_total_token_num': 16392, 'mem_fraction': 0.9, 'batch_max_tokens': 8448, 'eos_id': [151643], 'tool_call_parser': None, 'running_max_req_size': 128, 'nnodes': 1, 'node_rank': 0, 'multinode_httpmanager_port': 12345, 'multinode_router_gloo_port': 20001, 'tp': 1, 'dp': 1, 'max_req_total_len': 16000, 'nccl_host': '127.0.0.1', 'nccl_port': 2732, 'use_config_server_to_init_nccl': False, 'mode': [], 'trust_remote_code': False, 'disable_log_stats': False, 'log_stats_interval': 10, 'router_token_ratio': 0.0, 'router_max_new_token_len': 1024, 'router_max_wait_tokens': 6, 'disable_aggressive_schedule': False, 'use_dynamic_prompt_cache': False, 'disable_dynamic_prompt_cache': False, 'chunked_prefill_size': 4096, 'disable_chunked_prefill': False, 'diverse_mode': False, 'token_healing_mode': False, 'output_constraint_mode': 'none', 'first_token_constraint_mode': False, 'enable_multimodal': False, 'enable_multimodal_audio': False, 'enable_mps': False, 'disable_custom_allreduce': False, 'enable_custom_allgather': False, 'enable_tpsp_mix_mode': False, 'enable_prefill_microbatch_overlap': False, 'enable_decode_microbatch_overlap': False, 'enable_flashinfer_prefill': False, 'enable_flashinfer_decode': False, 'enable_fa3': False, 'cache_capacity': 200, 'cache_reserved_ratio': 0.5, 'data_type': 'bfloat16', 'return_all_prompt_logprobs': False, 'use_reward_model': False, 'long_truncation_mode': None, 'use_tgi_api': False, 'health_monitor': False, 'metric_gateway': None, 'job_name': 'lightllm', 'grouping_key': [], 'push_interval': 10, 'visual_infer_batch_size': 1, 'visual_gpu_ids': [0], 'visual_tp': 1, 'visual_dp': 1, 'visual_nccl_ports': [29500], 'enable_monitor_auth': False, 'disable_cudagraph': True, 'graph_max_batch_size': 256, 'graph_split_batch_size': 32, 'graph_grow_step_size': 16, 'graph_max_len_in_batch': 16000, 'quant_type': 'none', 'quant_cfg': None, 'vit_quant_type': 'none', 'vit_quant_cfg': None, 'sampling_backend': 'triton', 'ep_redundancy_expert_config_path': None, 'auto_update_redundancy_expert': False, 'mtp_mode': None, 'mtp_draft_model_dir': None, 'mtp_step': 0, 'pd_chunk_size': 0, 'router_port': 10173, 'detokenization_port': 10076, 'detokenization_pub_port': 10098, 'visual_port': 10080, 'audio_port': 10160, 'cache_port': 10233, 'metric_port': 10089, 'pd_node_infer_rpyc_ports': [10247], 'pd_node_id': 163479035537597727162519172725806046247, 'pd_p_allowed_port_min': 20000, 'pd_p_allowed_port_max': 30000}} -INFO 06-24 19:55:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:55:13 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:55:13 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 19:55:41 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:55:41 [manager.py:283] -DEBUG 06-24 19:55:41 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:55:41 [manager.py:284] -INFO 06-24 19:55:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:55:43 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:55:43 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:56:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:56:13 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:56:13 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 19:56:42 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:56:42 [manager.py:283] -DEBUG 06-24 19:56:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:56:42 [manager.py:284] -INFO 06-24 19:56:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:56:43 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:56:43 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:57:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:57:13 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:57:13 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 19:57:43 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:57:43 [manager.py:283] -DEBUG 06-24 19:57:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:57:43 [manager.py:284] -INFO 06-24 19:57:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:57:43 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:57:43 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:58:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:58:13 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:58:13 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:58:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:58:43 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:58:43 [statics_utils.py:24] mean per token cost: 0.0 ms -DEBUG 06-24 19:58:43 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:58:43 [manager.py:283] -DEBUG 06-24 19:58:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 19:58:43 [manager.py:284] -INFO 06-24 19:59:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:59:13 [statics_utils.py:24] mean first cost: 0.0 ms -INFO 06-24 19:59:13 [statics_utils.py:24] mean per token cost: 0.0 ms -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 8 cost time 0.0771784782409668 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:16 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:24 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 16 cost time 0.01605367660522461 s -DEBUG 06-24 19:59:40 [manager.py:391] Prefill Batch: batch_id=5056365483121303145815933528891045344, time:1750766380.2746513s req_ids:[8] -DEBUG 06-24 19:59:40 [manager.py:391] -DEBUG 06-24 19:59:40 [stats.py:37] Avg tokens(prompt+generate) throughput: 3.153 tokens/s -DEBUG 06-24 19:59:40 [stats.py:37] Avg prompt tokens throughput: 3.153 tokens/s -DEBUG 06-24 19:59:40 [stats.py:37] Avg generate tokens throughput: 0.000 tokens/s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 8 cost time 0.11862468719482422 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 16 cost time 0.023453235626220703 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:32 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:40 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:48 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:56 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:64 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:72 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:80 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:88 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:96 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:104 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:112 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:136 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:144 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:152 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:160 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:168 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:184 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:192 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:200 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:208 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:216 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:224 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:232 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:248 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:272 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:288 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:296 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:312 -INFO 06-24 19:59:40 [manager.py:162] detoken release req id 8 -DEBUG 06-24 19:59:40 [manager.py:391] Prefill Batch: batch_id=287395042470880448688684009838947316744, time:1750766380.5638816s req_ids:[16] -DEBUG 06-24 19:59:40 [manager.py:391] -INFO 06-24 19:59:40 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:328 -INFO 06-24 19:59:40 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:336 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:344 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:352 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:360 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:368 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:376 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:384 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:392 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:408 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:416 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:424 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:432 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:440 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:448 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:464 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:472 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:480 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:488 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:496 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:504 -INFO 06-24 19:59:40 [batch.py:51] router release req id 8 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 24 cost time 0.4605579376220703 s -INFO 06-24 19:59:40 [manager.py:162] detoken release req id 16 -INFO 06-24 19:59:40 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:512 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 24 cost time 0.46323204040527344 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:520 -INFO 06-24 19:59:40 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 19:59:40 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:40 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:40 [infer_batch.py:156] radix refed token num 2108 -DEBUG 06-24 19:59:40 [infer_batch.py:156] radix hold token num 2108 -DEBUG 06-24 19:59:40 [infer_batch.py:156] mem manager can alloc token num 14284 -DEBUG 06-24 19:59:40 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:40 [batch.py:51] router release req id 16 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 32 cost time 0.4595165252685547 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 40 cost time 0.4420509338378906 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 48 cost time 0.43045759201049805 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 32 cost time 0.46100616455078125 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 56 cost time 0.4211440086364746 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 64 cost time 0.4114036560058594 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 72 cost time 0.40201258659362793 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 80 cost time 0.39416027069091797 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 40 cost time 0.4444599151611328 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 88 cost time 0.3561263084411621 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 96 cost time 0.34813475608825684 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 104 cost time 0.3421444892883301 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 112 cost time 0.3359825611114502 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 120 cost time 0.32976698875427246 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 48 cost time 0.433704137802124 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 128 cost time 0.3224823474884033 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 136 cost time 0.3159162998199463 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 144 cost time 0.30929017066955566 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:536 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 152 cost time 0.30318641662597656 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 56 cost time 0.425382137298584 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 160 cost time 0.297224760055542 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 168 cost time 0.29160284996032715 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 176 cost time 0.2858412265777588 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 184 cost time 0.28053855895996094 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 64 cost time 0.416536808013916 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 192 cost time 0.2750709056854248 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 200 cost time 0.2698476314544678 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 208 cost time 0.2634468078613281 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 216 cost time 0.2484593391418457 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 72 cost time 0.4081766605377197 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 224 cost time 0.24333763122558594 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 232 cost time 0.23853564262390137 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 240 cost time 0.23358726501464844 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 248 cost time 0.22908997535705566 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 80 cost time 0.4013192653656006 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 256 cost time 0.22463440895080566 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 264 cost time 0.22019624710083008 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:544 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 272 cost time 0.2154369354248047 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 280 cost time 0.20968985557556152 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 88 cost time 0.3643667697906494 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 288 cost time 0.20473432540893555 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 296 cost time 0.2002854347229004 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 304 cost time 0.19563555717468262 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 96 cost time 0.35745882987976074 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 312 cost time 0.19149160385131836 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 320 cost time 0.18721485137939453 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 328 cost time 0.18253755569458008 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 104 cost time 0.3525278568267822 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 336 cost time 0.17823219299316406 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 344 cost time 0.1357724666595459 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 352 cost time 0.13075518608093262 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 360 cost time 0.12569165229797363 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 112 cost time 0.34745216369628906 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 368 cost time 0.12105774879455566 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:552 -INFO 06-24 19:59:40 [manager.py:224] router recive req id 376 cost time 0.1166226863861084 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 384 cost time 0.11048221588134766 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 392 cost time 0.10562920570373535 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 120 cost time 0.3422739505767822 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 400 cost time 0.10108423233032227 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 408 cost time 0.0954592227935791 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 416 cost time 0.09096550941467285 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 424 cost time 0.08623027801513672 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 432 cost time 0.08154058456420898 s -INFO 06-24 19:59:40 [manager.py:224] router recive req id 440 cost time 0.07657957077026367 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:560 -DEBUG 06-24 19:59:40 [manager.py:391] Prefill Batch: batch_id=267698894269622210945840868524439186934, time:1750766380.767022s req_ids:[24, 32, 40, 48, 56, 64, 72] -DEBUG 06-24 19:59:40 [manager.py:391] -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:568 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:576 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:584 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:592 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:600 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:608 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 128 cost time 0.3727076053619385 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 136 cost time 0.3678281307220459 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:616 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 144 cost time 0.3624389171600342 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 152 cost time 0.3575727939605713 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 160 cost time 0.35327982902526855 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:624 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 168 cost time 0.34880661964416504 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 176 cost time 0.34411168098449707 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 184 cost time 0.33986663818359375 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:632 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 192 cost time 0.33549952507019043 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 200 cost time 0.33135485649108887 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 208 cost time 0.3260159492492676 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 216 cost time 0.3121187686920166 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:640 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 224 cost time 0.3080432415008545 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 232 cost time 0.3041059970855713 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 240 cost time 0.3001420497894287 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:648 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 248 cost time 0.2966794967651367 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 256 cost time 0.2932896614074707 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 264 cost time 0.289884090423584 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 272 cost time 0.2861654758453369 s -INFO 06-24 19:59:40 [rpyc_fix_utils.py:36] change socket buffer from 2626560 131072 change to 4194304 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:656 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 280 cost time 0.28146958351135254 s -INFO 06-24 19:59:40 [prefill_trans_process.py:61] connect start PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=-1, prefill_id=163479035537597727162519172725806046247, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') -INFO 06-24 19:59:40 [prefill_trans_process.py:64] connect src_id 163479035537597727162519172725806046247 dest_id f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:40 [pynccl_wrapper.py:75] Found nccl from library libnccl.so.2 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 288 cost time 0.27756190299987793 s -INFO 06-24 19:59:40 [pynccl.py:180] LightLLM is using nccl==2.21.5 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 296 cost time 0.2741262912750244 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 304 cost time 0.27051210403442383 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 312 cost time 0.2665543556213379 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 320 cost time 0.26325225830078125 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 328 cost time 0.2595937252044678 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 336 cost time 0.2561604976654053 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:672 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 344 cost time 0.21455073356628418 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 352 cost time 0.21040844917297363 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 360 cost time 0.20623016357421875 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 368 cost time 0.20246386528015137 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:680 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 376 cost time 0.19890666007995605 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:688 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:696 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:704 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:712 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:720 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:728 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 384 cost time 0.22502851486206055 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 392 cost time 0.22162365913391113 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 400 cost time 0.2180180549621582 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:736 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 408 cost time 0.21332144737243652 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 416 cost time 0.20965194702148438 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 424 cost time 0.2062544822692871 s -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 432 cost time 0.20240402221679688 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:744 -INFO 06-24 19:59:40 [manager.py:68] detokenization recv req id 440 cost time 0.19823169708251953 s -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:760 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:768 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:776 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:784 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:792 -INFO 06-24 19:59:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:800 -INFO 06-24 19:59:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:8 first_token_cost:764.474630355835ms total_cost_time:764.5032405853271ms,out_token_counter:1 mean_per_token_cost_time: 0.0286102294921875ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:40 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:16 first_token_cost:668.4184074401855ms total_cost_time:668.4293746948242ms,out_token_counter:1 mean_per_token_cost_time: 0.010967254638671875ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [prefill_trans_process.py:81] PDTransJoinInfo(decode_id=147275795944234129756100418482494441380, decode_device_id=-1, prefill_id=163479035537597727162519172725806046247, prefill_device_id=0, pd_prefill_nccl_ip='127.0.1.1', pd_prefill_nccl_port=20000, connect_id='f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df') kv trans connected! -INFO 06-24 19:59:41 [prefill_trans_obj.py:104] create KVTransConnectObj success: connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df decode_node_id: 147275795944234129756100418482494441380 prefill_node_id: 163479035537597727162519172725806046247 device_index: 0 -INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 8 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 1.196134328842163 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 16 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 1.0328316688537598 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 8 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009828329086303711 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 16 not send, decode is busy -INFO 06-24 19:59:41 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 1.206552505493164 s -INFO 06-24 19:59:41 [prefill_trans_process.py:34] trans start: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:41 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 16 -INFO 06-24 19:59:41 [manager.py:224] router recive req id 448 cost time 1.2438838481903076 s -INFO 06-24 19:59:41 [manager.py:162] detoken release req id 24 -INFO 06-24 19:59:41 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:41 [manager.py:162] detoken release req id 32 -INFO 06-24 19:59:41 [manager.py:162] detoken release req id 40 -INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:24 first_token_cost:1665.039300918579ms total_cost_time:1665.0831699371338ms,out_token_counter:1 mean_per_token_cost_time: 0.0438690185546875ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:32 first_token_cost:1653.5532474517822ms total_cost_time:1653.5794734954834ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [manager.py:162] detoken release req id 48 -INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:40 first_token_cost:1636.0180377960205ms total_cost_time:1636.0392570495605ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [manager.py:162] detoken release req id 56 -INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:48 first_token_cost:1624.2146492004395ms total_cost_time:1624.2358684539795ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [manager.py:162] detoken release req id 64 -INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:56 first_token_cost:1615.1435375213623ms total_cost_time:1615.1671409606934ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:64 first_token_cost:1605.3791046142578ms total_cost_time:1605.4003238677979ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [manager.py:162] detoken release req id 72 -INFO 06-24 19:59:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:72 first_token_cost:1596.0474014282227ms total_cost_time:1596.0681438446045ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:41 [manager.py:68] detokenization recv req id 448 cost time 1.2521979808807373 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 24 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007410764694213867 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 24 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009976387023925781 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 24 not send, decode is busy -INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 32 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006712436676025391 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 32 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007524251937866211 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 32 not send, decode is busy -INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 40 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0020258426666259766 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 40 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007321357727050781 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 40 not send, decode is busy -INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 48 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009424448013305664 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 48 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008481264114379883 s -INFO 06-24 19:59:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 48 not send, decode is busy -INFO 06-24 19:59:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 56 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008547306060791016 s -INFO 06-24 19:59:42 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 56 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008028745651245117 s -INFO 06-24 19:59:42 [prefill_trans_obj.py:166] prefill node kv move task req_id: 56 not send, decode is busy -INFO 06-24 19:59:42 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 64 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007581472396850586 s -INFO 06-24 19:59:42 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 24 -DEBUG 06-24 19:59:42 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:42 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:42 [infer_batch.py:156] radix refed token num 7398 -DEBUG 06-24 19:59:42 [infer_batch.py:156] radix hold token num 9510 -DEBUG 06-24 19:59:42 [infer_batch.py:156] mem manager can alloc token num 6882 -DEBUG 06-24 19:59:42 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 32 -INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 40 -INFO 06-24 19:59:42 [batch.py:51] router release req id 24 -INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 48 -INFO 06-24 19:59:42 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 64 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.00882101058959961 s -INFO 06-24 19:59:42 [prefill_trans_obj.py:166] prefill node kv move task req_id: 64 not send, decode is busy -INFO 06-24 19:59:42 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007585048675537109 s -INFO 06-24 19:59:42 [batch.py:51] router release req id 32 -INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 56 -INFO 06-24 19:59:42 [batch.py:51] router release req id 40 -INFO 06-24 19:59:42 [batch.py:51] router release req id 48 -INFO 06-24 19:59:42 [batch.py:51] router release req id 56 -INFO 06-24 19:59:42 [batch.py:51] router release req id 64 -INFO 06-24 19:59:42 [batch.py:51] router release req id 72 -INFO 06-24 19:59:42 [manager.py:224] router recive req id 456 cost time 1.3277161121368408 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 464 cost time 1.3242998123168945 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 472 cost time 1.319894552230835 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 456 cost time 1.3300683498382568 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 480 cost time 1.3157711029052734 s -INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 64 -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 464 cost time 1.326838493347168 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 488 cost time 1.3120489120483398 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 472 cost time 1.322997808456421 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 496 cost time 1.3070104122161865 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 504 cost time 1.3010101318359375 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 512 cost time 1.296341896057129 s -INFO 06-24 19:59:42 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 72 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009949684143066406 s -INFO 06-24 19:59:42 [prefill_trans_obj.py:166] prefill node kv move task req_id: 72 not send, decode is busy -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 480 cost time 1.3195230960845947 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 520 cost time 1.291534662246704 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 528 cost time 1.2869133949279785 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 536 cost time 1.2822480201721191 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 488 cost time 1.3156139850616455 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 544 cost time 1.2775328159332275 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 552 cost time 1.2729871273040771 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 496 cost time 1.3109245300292969 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 560 cost time 1.2685813903808594 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 568 cost time 1.2642817497253418 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 576 cost time 1.2597098350524902 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 504 cost time 1.305795431137085 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 584 cost time 1.2549967765808105 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 592 cost time 1.2503442764282227 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 600 cost time 1.2441017627716064 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 512 cost time 1.302056074142456 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 608 cost time 1.2394473552703857 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 616 cost time 1.235461950302124 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 520 cost time 1.2981574535369873 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 624 cost time 1.2309205532073975 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 632 cost time 1.2266817092895508 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 640 cost time 1.2221033573150635 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 528 cost time 1.294480323791504 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 648 cost time 1.2175545692443848 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 656 cost time 1.21309232711792 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 664 cost time 1.2083725929260254 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 536 cost time 1.2907042503356934 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 672 cost time 1.2038202285766602 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 680 cost time 1.1997058391571045 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 544 cost time 1.286849021911621 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 688 cost time 1.1952989101409912 s -INFO 06-24 19:59:42 [manager.py:224] router recive req id 696 cost time 1.1898579597473145 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 552 cost time 1.2832188606262207 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 560 cost time 1.2795476913452148 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 568 cost time 1.276412010192871 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 576 cost time 1.2727677822113037 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 584 cost time 1.268918514251709 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 592 cost time 1.2651641368865967 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 600 cost time 1.2598137855529785 s -DEBUG 06-24 19:59:42 [manager.py:391] Prefill Batch: batch_id=148194245146493766126448738228854701509, time:1750766382.0499694s req_ids:[80, 88, 96, 104, 112, 120, 128, 136] -DEBUG 06-24 19:59:42 [manager.py:391] -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 608 cost time 1.2560806274414062 s -INFO 06-24 19:59:42 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 72 -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 616 cost time 1.2526047229766846 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 624 cost time 1.2489118576049805 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 632 cost time 1.2455463409423828 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 640 cost time 1.241865873336792 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 648 cost time 1.2382326126098633 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 656 cost time 1.2346761226654053 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 664 cost time 1.2308566570281982 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 672 cost time 1.2271971702575684 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 680 cost time 1.2235558032989502 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 688 cost time 1.2199194431304932 s -INFO 06-24 19:59:42 [manager.py:68] detokenization recv req id 696 cost time 1.2149651050567627 s -INFO 06-24 19:59:42 [prefill_trans_process.py:42] trans finished: id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1055 -INFO 06-24 19:59:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 19:59:43 [statics_utils.py:24] mean first cost: 1425.3653685251873 ms -INFO 06-24 19:59:43 [statics_utils.py:24] mean per token cost: 0.02418624030219184 ms -INFO 06-24 19:59:43 [prefill_trans_process.py:44] trans cost time: 1.5516793727874756,move_total_kv_len: 1055, id: 8 in_len:1055 v_len: 1055 move_len: 1055 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 8 cost total time: 2.7596399784088135 s -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 8 -INFO 06-24 19:59:43 [manager.py:224] router recive req id 704 cost time 2.6499106884002686 s -INFO 06-24 19:59:43 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 80 -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 88 -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 96 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:80 first_token_cost:3159.6274375915527ms total_cost_time:3159.6851348876953ms,out_token_counter:1 mean_per_token_cost_time: 0.057697296142578125ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:88 first_token_cost:3121.9112873077393ms total_cost_time:3121.9427585601807ms,out_token_counter:1 mean_per_token_cost_time: 0.03147125244140625ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 104 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:96 first_token_cost:3114.2942905426025ms total_cost_time:3114.32147026062ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:104 first_token_cost:3108.484983444214ms total_cost_time:3108.5076332092285ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:112 first_token_cost:3102.428913116455ms total_cost_time:3102.4506092071533ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 112 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:120 first_token_cost:3096.4436531066895ms total_cost_time:3096.466302871704ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:128 first_token_cost:3089.165449142456ms total_cost_time:3089.1857147216797ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:136 first_token_cost:3082.70263671875ms total_cost_time:3082.723379135132ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 120 -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 128 -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 80 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0018470287322998047 s -INFO 06-24 19:59:43 [manager.py:162] detoken release req id 136 -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 704 cost time 2.6649019718170166 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 80 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.018401384353637695 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 80 not send, decode is busy -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 88 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.014518260955810547 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 96 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008265972137451172 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 104 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0021898746490478516 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 88 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010574102401733398 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 88 not send, decode is busy -INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 96 not send, decode is busy -INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 104 not send, decode is busy -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0076749324798583984 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 120 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0008678436279296875 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 112 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009312868118286133 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 112 not send, decode is busy -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004452228546142578 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.012329578399658203 s -INFO 06-24 19:59:43 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 80 -INFO 06-24 19:59:43 [prefill_trans_process.py:34] trans start: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -DEBUG 06-24 19:59:43 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:43 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:43 [infer_batch.py:156] radix refed token num 7389 -DEBUG 06-24 19:59:43 [infer_batch.py:156] radix hold token num 15845 -DEBUG 06-24 19:59:43 [infer_batch.py:156] mem manager can alloc token num 547 -DEBUG 06-24 19:59:43 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:43 [batch.py:51] router release req id 80 -INFO 06-24 19:59:43 [batch.py:51] router release req id 88 -INFO 06-24 19:59:43 [batch.py:51] router release req id 96 -INFO 06-24 19:59:43 [batch.py:51] router release req id 104 -INFO 06-24 19:59:43 [batch.py:51] router release req id 112 -INFO 06-24 19:59:43 [batch.py:51] router release req id 120 -INFO 06-24 19:59:43 [batch.py:51] router release req id 128 -INFO 06-24 19:59:43 [batch.py:51] router release req id 136 -INFO 06-24 19:59:43 [manager.py:224] router recive req id 712 cost time 2.7033867835998535 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 720 cost time 2.6990702152252197 s -INFO 06-24 19:59:43 [prefill_trans_process.py:42] trans finished: id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 -INFO 06-24 19:59:43 [manager.py:224] router recive req id 728 cost time 2.6946513652801514 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 736 cost time 2.6902432441711426 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 712 cost time 2.70529842376709 s -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 88 -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 96 -INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 128 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010231494903564453 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 744 cost time 2.6858882904052734 s -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 104 -INFO 06-24 19:59:43 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 136 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009371519088745117 s -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 112 -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 720 cost time 2.701856851577759 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 752 cost time 2.68165922164917 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 760 cost time 2.67747163772583 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 768 cost time 2.6727747917175293 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 776 cost time 2.667965888977051 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 784 cost time 2.662921667098999 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 792 cost time 2.6576287746429443 s -INFO 06-24 19:59:43 [manager.py:224] router recive req id 800 cost time 2.652892827987671 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 728 cost time 2.6993229389190674 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 736 cost time 2.696143627166748 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 744 cost time 2.6924619674682617 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 752 cost time 2.6888301372528076 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 760 cost time 2.685490608215332 s -INFO 06-24 19:59:43 [prefill_trans_process.py:44] trans cost time: 0.017946243286132812,move_total_kv_len: 1058, id: 120 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 120 cost total time: 0.03179788589477539 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.025861501693725586 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 768 cost time 2.681823968887329 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 136 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011013269424438477 s -INFO 06-24 19:59:43 [prefill_trans_obj.py:166] prefill node kv move task req_id: 136 not send, decode is busy -INFO 06-24 19:59:43 [prefill_trans_process.py:34] trans start: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 776 cost time 2.678018808364868 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 784 cost time 2.6740620136260986 s -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 120 -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 136 -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 792 cost time 2.6707096099853516 s -INFO 06-24 19:59:43 [manager.py:68] detokenization recv req id 800 cost time 2.6670334339141846 s -INFO 06-24 19:59:43 [prefill_trans_process.py:42] trans finished: id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 -DEBUG 06-24 19:59:43 [manager.py:391] Prefill Batch: batch_id=294565578027902777965125904067623474870, time:1750766383.592034s req_ids:[144, 152, 160, 168, 176, 184, 192, 200] -DEBUG 06-24 19:59:43 [manager.py:391] -INFO 06-24 19:59:43 [prefill_trans_process.py:44] trans cost time: 0.020441293716430664,move_total_kv_len: 1049, id: 128 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:43 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 128 cost total time: 0.04755067825317383 s -INFO 06-24 19:59:43 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 128 -INFO 06-24 19:59:43 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:808 -WARNING 06-24 19:59:43 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_16 and create again -INFO 06-24 19:59:43 [shm_array.py:30] create shm 2732_0_shm_logprobs_16 -WARNING 06-24 19:59:43 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_16 and create again -INFO 06-24 19:59:43 [shm_array.py:30] create shm 2732_0_shm_prompts_16 -INFO 06-24 19:59:44 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 144 -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 152 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:144 first_token_cost:4231.854200363159ms total_cost_time:4231.899261474609ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:152 first_token_cost:4225.930213928223ms total_cost_time:4225.956916809082ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:160 first_token_cost:4220.216751098633ms total_cost_time:4220.238924026489ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 160 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:168 first_token_cost:4214.667320251465ms total_cost_time:4214.688777923584ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:176 first_token_cost:4208.98175239563ms total_cost_time:4209.003448486328ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:184 first_token_cost:4203.855276107788ms total_cost_time:4203.876495361328ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1050 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 168 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:192 first_token_cost:4198.5790729522705ms total_cost_time:4198.600053787231ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:200 first_token_cost:4193.4356689453125ms total_cost_time:4193.456411361694ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 176 -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 184 -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 192 -INFO 06-24 19:59:44 [manager.py:162] detoken release req id 200 -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0053310394287109375 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 144 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.01036977767944336 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 144 not send, decode is busy -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 152 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010135412216186523 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 160 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0039844512939453125 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 152 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010167598724365234 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 152 not send, decode is busy -INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 160 not send, decode is busy -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 168 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009825944900512695 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 176 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004091024398803711 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 168 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010155677795410156 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 168 not send, decode is busy -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 184 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008224964141845703 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 192 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0022199153900146484 s -INFO 06-24 19:59:44 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 144 -DEBUG 06-24 19:59:44 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:44 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:44 [infer_batch.py:156] radix refed token num 7373 -DEBUG 06-24 19:59:44 [infer_batch.py:156] radix hold token num 15810 -DEBUG 06-24 19:59:44 [infer_batch.py:156] mem manager can alloc token num 582 -DEBUG 06-24 19:59:44 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:44 [batch.py:51] router release req id 144 -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 152 -INFO 06-24 19:59:44 [batch.py:51] router release req id 152 -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 160 -INFO 06-24 19:59:44 [batch.py:51] router release req id 160 -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 168 -INFO 06-24 19:59:44 [batch.py:51] router release req id 168 -INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 184 in_len:1050 v_len: 1050 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009021282196044922 s -INFO 06-24 19:59:44 [batch.py:51] router release req id 176 -INFO 06-24 19:59:44 [batch.py:51] router release req id 184 -INFO 06-24 19:59:44 [batch.py:51] router release req id 192 -INFO 06-24 19:59:44 [batch.py:51] router release req id 200 -INFO 06-24 19:59:44 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.024348974227905273 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 184 not send, decode is busy -INFO 06-24 19:59:44 [manager.py:224] router recive req id 808 cost time 0.850881814956665 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 192 not send, decode is busy -INFO 06-24 19:59:44 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 200 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007191896438598633 s -INFO 06-24 19:59:44 [prefill_trans_process.py:34] trans start: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:44 [manager.py:68] detokenization recv req id 808 cost time 0.8527519702911377 s -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 184 -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 192 -INFO 06-24 19:59:44 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 200 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008372306823730469 s -INFO 06-24 19:59:44 [prefill_trans_obj.py:166] prefill node kv move task req_id: 200 not send, decode is busy -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 200 -INFO 06-24 19:59:44 [prefill_trans_process.py:42] trans finished: id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 -DEBUG 06-24 19:59:44 [manager.py:391] Prefill Batch: batch_id=182561919414357009603534939896602708878, time:1750766384.7506483s req_ids:[208, 216, 224, 232, 240, 248, 256, 264] -DEBUG 06-24 19:59:44 [manager.py:391] -INFO 06-24 19:59:44 [prefill_trans_process.py:44] trans cost time: 0.0439300537109375,move_total_kv_len: 1056, id: 176 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:44 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 176 cost total time: 0.07047057151794434 s -INFO 06-24 19:59:44 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 176 -INFO 06-24 19:59:45 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:816 -WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_24 and create again -INFO 06-24 19:59:45 [shm_array.py:30] create shm 2732_0_shm_logprobs_24 -WARNING 06-24 19:59:45 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_24 and create again -INFO 06-24 19:59:45 [shm_array.py:30] create shm 2732_0_shm_prompts_24 -INFO 06-24 19:59:45 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 208 -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 216 -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 224 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:208 first_token_cost:5349.753856658936ms total_cost_time:5349.806070327759ms,out_token_counter:1 mean_per_token_cost_time: 0.05221366882324219ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 232 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:216 first_token_cost:5335.107803344727ms total_cost_time:5335.139274597168ms,out_token_counter:1 mean_per_token_cost_time: 0.03147125244140625ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 240 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:224 first_token_cost:5330.266237258911ms total_cost_time:5330.289125442505ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:232 first_token_cost:5325.3490924835205ms total_cost_time:5325.370073318481ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 248 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:240 first_token_cost:5320.688724517822ms total_cost_time:5320.711135864258ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 256 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:248 first_token_cost:5316.360712051392ms total_cost_time:5316.3816928863525ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:256 first_token_cost:5311.980485916138ms total_cost_time:5312.000751495361ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [manager.py:162] detoken release req id 264 -INFO 06-24 19:59:45 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:264 first_token_cost:5307.828664779663ms total_cost_time:5307.848930358887ms,out_token_counter:1 mean_per_token_cost_time: 0.020265579223632812ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008646726608276367 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 216 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002033233642578125 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 208 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011253595352172852 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 208 not send, decode is busy -INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 216 not send, decode is busy -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 224 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007774829864501953 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 232 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0018138885498046875 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 224 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008698463439941406 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 224 not send, decode is busy -INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 232 not send, decode is busy -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 240 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006409168243408203 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 248 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0017321109771728516 s -INFO 06-24 19:59:45 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 208 -INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 216 -INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 224 -INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 232 -INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 240 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010566473007202148 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:166] prefill node kv move task req_id: 248 not send, decode is busy -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 256 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007519245147705078 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 264 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00293731689453125 s -DEBUG 06-24 19:59:45 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:45 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:45 [infer_batch.py:156] radix refed token num 4230 -DEBUG 06-24 19:59:45 [infer_batch.py:156] radix hold token num 15820 -DEBUG 06-24 19:59:45 [infer_batch.py:156] mem manager can alloc token num 572 -DEBUG 06-24 19:59:45 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:45 [batch.py:51] router release req id 208 -INFO 06-24 19:59:45 [batch.py:51] router release req id 216 -INFO 06-24 19:59:45 [batch.py:51] router release req id 224 -INFO 06-24 19:59:45 [batch.py:51] router release req id 232 -INFO 06-24 19:59:45 [batch.py:51] router release req id 240 -INFO 06-24 19:59:45 [batch.py:51] router release req id 248 -INFO 06-24 19:59:45 [batch.py:51] router release req id 256 -INFO 06-24 19:59:45 [batch.py:51] router release req id 264 -INFO 06-24 19:59:45 [manager.py:224] router recive req id 816 cost time 0.8788070678710938 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.021255970001220703 s -INFO 06-24 19:59:45 [manager.py:68] detokenization recv req id 816 cost time 0.8805432319641113 s -INFO 06-24 19:59:45 [prefill_trans_process.py:34] trans start: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 248 -INFO 06-24 19:59:45 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 256 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011115550994873047 s -INFO 06-24 19:59:45 [prefill_trans_process.py:42] trans finished: id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1063 -DEBUG 06-24 19:59:45 [manager.py:391] Prefill Batch: batch_id=190779151374460836427467471628907477635, time:1750766385.9125724s req_ids:[272, 280, 288, 296, 304, 312, 320] -DEBUG 06-24 19:59:45 [manager.py:391] -INFO 06-24 19:59:45 [prefill_trans_process.py:44] trans cost time: 0.0259249210357666,move_total_kv_len: 1063, id: 240 in_len:1063 v_len: 1063 move_len: 1063 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 240 cost total time: 0.04833054542541504 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.03835797309875488 s -INFO 06-24 19:59:45 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 264 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.033788442611694336 s -INFO 06-24 19:59:45 [prefill_trans_process.py:34] trans start: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:45 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 240 -INFO 06-24 19:59:45 [prefill_trans_process.py:42] trans finished: id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 2108 -INFO 06-24 19:59:46 [prefill_trans_process.py:44] trans cost time: 0.14321660995483398,move_total_kv_len: 2108, id: 256 in_len:1054 v_len: 1054 move_len: 1054 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 256 cost total time: 0.18323612213134766 s -INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 256 -INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 264 -INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:824 -WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_32 and create again -INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_logprobs_32 -WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_32 and create again -INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_prompts_32 -INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 -WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_31 and create again -INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_logprobs_31 -WARNING 06-24 19:59:46 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_31 and create again -INFO 06-24 19:59:46 [shm_array.py:30] create shm 2732_0_shm_prompts_31 -INFO 06-24 19:59:46 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:840 -INFO 06-24 19:59:46 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 272 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 280 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:272 first_token_cost:6383.890151977539ms total_cost_time:6383.934736251831ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 288 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:280 first_token_cost:6378.338098526001ms total_cost_time:6378.364562988281ms,out_token_counter:1 mean_per_token_cost_time: 0.026464462280273438ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 296 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:288 first_token_cost:6373.648405075073ms total_cost_time:6373.671770095825ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:296 first_token_cost:6369.301319122314ms total_cost_time:6369.323492050171ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 304 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:304 first_token_cost:6365.0031089782715ms total_cost_time:6365.0267124176025ms,out_token_counter:1 mean_per_token_cost_time: 0.023603439331054688ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:312 first_token_cost:6360.0428104400635ms total_cost_time:6360.0640296936035ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 312 -INFO 06-24 19:59:46 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:320 first_token_cost:6355.86953163147ms total_cost_time:6355.890274047852ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:46 [manager.py:162] detoken release req id 320 -INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 272 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0007350444793701172 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 272 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010283470153808594 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 272 not send, decode is busy -INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 280 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0067899227142333984 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 288 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002086639404296875 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 280 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010869979858398438 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 288 not send, decode is busy -INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 296 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00860905647277832 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 304 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003374814987182617 s -INFO 06-24 19:59:46 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 272 -DEBUG 06-24 19:59:46 [req_manager.py:78] freed all request size 136 -INFO 06-24 19:59:46 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.027638673782348633 s -DEBUG 06-24 19:59:46 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:46 [infer_batch.py:156] radix refed token num 6334 -DEBUG 06-24 19:59:46 [infer_batch.py:156] radix hold token num 15842 -DEBUG 06-24 19:59:46 [infer_batch.py:156] mem manager can alloc token num 550 -DEBUG 06-24 19:59:46 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 288 -INFO 06-24 19:59:46 [batch.py:51] router release req id 272 -INFO 06-24 19:59:46 [prefill_trans_process.py:34] trans start: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [batch.py:51] router release req id 280 -INFO 06-24 19:59:46 [batch.py:51] router release req id 288 -INFO 06-24 19:59:46 [batch.py:51] router release req id 296 -INFO 06-24 19:59:46 [batch.py:51] router release req id 304 -INFO 06-24 19:59:46 [batch.py:51] router release req id 312 -INFO 06-24 19:59:46 [batch.py:51] router release req id 320 -INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 296 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010961771011352539 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 296 not send, decode is busy -INFO 06-24 19:59:46 [manager.py:224] router recive req id 824 cost time 0.7054061889648438 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010041952133178711 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 320 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0052471160888671875 s -INFO 06-24 19:59:46 [manager.py:224] router recive req id 832 cost time 0.6817638874053955 s -INFO 06-24 19:59:46 [manager.py:224] router recive req id 840 cost time 0.655919075012207 s -INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 824 cost time 0.7071444988250732 s -INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 832 cost time 0.6850030422210693 s -INFO 06-24 19:59:46 [prefill_trans_process.py:42] trans finished: id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1058 -INFO 06-24 19:59:46 [manager.py:68] detokenization recv req id 840 cost time 0.6609628200531006 s -INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 296 -INFO 06-24 19:59:46 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 312 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011251211166381836 s -INFO 06-24 19:59:46 [prefill_trans_obj.py:166] prefill node kv move task req_id: 312 not send, decode is busy -DEBUG 06-24 19:59:46 [manager.py:391] Prefill Batch: batch_id=70930678938869571698225208165763613302, time:1750766386.987919s req_ids:[328, 336, 344, 352, 360, 368, 376, 384] -DEBUG 06-24 19:59:46 [manager.py:391] -INFO 06-24 19:59:46 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 312 -INFO 06-24 19:59:46 [prefill_trans_process.py:44] trans cost time: 0.035775184631347656,move_total_kv_len: 1058, id: 280 in_len:1058 v_len: 1058 move_len: 1058 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:46 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 280 cost total time: 0.0642862319946289 s -INFO 06-24 19:59:46 [task_queue.py:39] queue ready_kv_trans_task_queue left size: 1 -INFO 06-24 19:59:46 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.049445390701293945 s -INFO 06-24 19:59:46 [prefill_trans_process.py:34] trans start: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 280 -INFO 06-24 19:59:47 [prefill_trans_process.py:42] trans finished: id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1051 -INFO 06-24 19:59:47 [prefill_trans_process.py:44] trans cost time: 0.07567572593688965,move_total_kv_len: 1051, id: 304 in_len:1051 v_len: 1051 move_len: 1051 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 304 cost total time: 0.12643718719482422 s -INFO 06-24 19:59:47 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.11698675155639648 s -INFO 06-24 19:59:47 [prefill_trans_process.py:34] trans start: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [prefill_trans_process.py:42] trans finished: id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1057 -INFO 06-24 19:59:47 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 304 -INFO 06-24 19:59:47 [prefill_trans_process.py:44] trans cost time: 0.04365849494934082,move_total_kv_len: 1057, id: 320 in_len:1057 v_len: 1057 move_len: 1057 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:47 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 320 cost total time: 0.1618814468383789 s -INFO 06-24 19:59:47 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 320 -INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:848 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_39 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_logprobs_39 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_39 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_prompts_39 -INFO 06-24 19:59:47 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:856 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_38 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_logprobs_38 -WARNING 06-24 19:59:47 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_38 and create again -INFO 06-24 19:59:47 [shm_array.py:30] create shm 2732_0_shm_prompts_38 -INFO 06-24 19:59:48 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 328 -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 336 -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 344 -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 352 -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 360 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:328 first_token_cost:7552.9944896698ms total_cost_time:7553.060293197632ms,out_token_counter:1 mean_per_token_cost_time: 0.06580352783203125ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:336 first_token_cost:7548.905849456787ms total_cost_time:7548.933506011963ms,out_token_counter:1 mean_per_token_cost_time: 0.02765655517578125ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 368 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:344 first_token_cost:7506.665468215942ms total_cost_time:7506.687164306641ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 376 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:352 first_token_cost:7502.08854675293ms total_cost_time:7502.110242843628ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [manager.py:162] detoken release req id 384 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:360 first_token_cost:7497.266054153442ms total_cost_time:7497.288942337036ms,out_token_counter:1 mean_per_token_cost_time: 0.02288818359375ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:368 first_token_cost:7492.713928222656ms total_cost_time:7492.736101150513ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:376 first_token_cost:7488.4033203125ms total_cost_time:7488.430023193359ms,out_token_counter:1 mean_per_token_cost_time: 0.026702880859375ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:384 first_token_cost:7482.416868209839ms total_cost_time:7482.438087463379ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1063 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002575397491455078 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 328 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009096384048461914 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 328 not send, decode is busy -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 336 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007551670074462891 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 344 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0028002262115478516 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 336 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009668111801147461 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 336 not send, decode is busy -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 344 not send, decode is busy -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008342266082763672 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 360 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0036711692810058594 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 352 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008313894271850586 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 352 not send, decode is busy -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 360 not send, decode is busy -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 368 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007716655731201172 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 376 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0028553009033203125 s -INFO 06-24 19:59:48 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 328 -DEBUG 06-24 19:59:48 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:48 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:48 [infer_batch.py:156] radix refed token num 7387 -DEBUG 06-24 19:59:48 [infer_batch.py:156] radix hold token num 15841 -DEBUG 06-24 19:59:48 [infer_batch.py:156] mem manager can alloc token num 551 -DEBUG 06-24 19:59:48 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:48 [batch.py:51] router release req id 328 -INFO 06-24 19:59:48 [batch.py:51] router release req id 336 -INFO 06-24 19:59:48 [batch.py:51] router release req id 344 -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 336 -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 344 -INFO 06-24 19:59:48 [batch.py:51] router release req id 352 -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 352 -INFO 06-24 19:59:48 [batch.py:51] router release req id 360 -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 360 -INFO 06-24 19:59:48 [batch.py:51] router release req id 368 -INFO 06-24 19:59:48 [batch.py:51] router release req id 376 -INFO 06-24 19:59:48 [batch.py:51] router release req id 384 -INFO 06-24 19:59:48 [manager.py:224] router recive req id 848 cost time 0.8817059993743896 s -INFO 06-24 19:59:48 [manager.py:224] router recive req id 856 cost time 0.8530170917510986 s -INFO 06-24 19:59:48 [manager.py:68] detokenization recv req id 848 cost time 0.8824746608734131 s -INFO 06-24 19:59:48 [manager.py:68] detokenization recv req id 856 cost time 0.8538038730621338 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 368 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011055707931518555 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 368 not send, decode is busy -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 376 not send, decode is busy -INFO 06-24 19:59:48 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 384 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009758234024047852 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 384 in_len:1063 v_len: 1063 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.0060765743255615234 s -INFO 06-24 19:59:48 [prefill_trans_obj.py:166] prefill node kv move task req_id: 384 not send, decode is busy -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 368 -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 376 -INFO 06-24 19:59:48 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 384 -DEBUG 06-24 19:59:48 [manager.py:391] Prefill Batch: batch_id=189601775423375408021768785101695884949, time:1750766388.1974154s req_ids:[392, 400, 408, 416, 424, 432, 440, 448] -DEBUG 06-24 19:59:48 [manager.py:391] -INFO 06-24 19:59:49 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 392 -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 400 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:392 first_token_cost:8616.84775352478ms total_cost_time:8616.903305053711ms,out_token_counter:1 mean_per_token_cost_time: 0.05555152893066406ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 408 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:400 first_token_cost:8612.566709518433ms total_cost_time:8612.594604492188ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:408 first_token_cost:8607.21755027771ms total_cost_time:8607.239723205566ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 416 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:416 first_token_cost:8602.810382843018ms total_cost_time:8602.831602096558ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 424 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:424 first_token_cost:8598.429679870605ms total_cost_time:8598.450899124146ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 432 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:432 first_token_cost:8593.9359664917ms total_cost_time:8593.95718574524ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:440 first_token_cost:8589.045763015747ms total_cost_time:8589.066743850708ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 440 -INFO 06-24 19:59:49 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:448 first_token_cost:8584.46478843689ms total_cost_time:8584.48576927185ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:49 [manager.py:162] detoken release req id 448 -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.005452871322631836 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 400 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0007920265197753906 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 392 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011265277862548828 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 392 not send, decode is busy -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 408 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007835626602172852 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 416 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00324249267578125 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.018041372299194336 s -INFO 06-24 19:59:49 [prefill_trans_process.py:34] trans start: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 408 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009248495101928711 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 408 not send, decode is busy -INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 416 not send, decode is busy -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 424 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008563041687011719 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 432 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003607034683227539 s -INFO 06-24 19:59:49 [prefill_trans_process.py:42] trans finished: id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 -INFO 06-24 19:59:49 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 392 -DEBUG 06-24 19:59:49 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:49 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:49 [infer_batch.py:156] radix refed token num 7386 -DEBUG 06-24 19:59:49 [infer_batch.py:156] radix hold token num 15826 -DEBUG 06-24 19:59:49 [infer_batch.py:156] mem manager can alloc token num 566 -DEBUG 06-24 19:59:49 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:49 [batch.py:51] router release req id 392 -INFO 06-24 19:59:49 [batch.py:51] router release req id 400 -INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 424 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009217023849487305 s -INFO 06-24 19:59:49 [batch.py:51] router release req id 408 -INFO 06-24 19:59:49 [batch.py:51] router release req id 416 -INFO 06-24 19:59:49 [batch.py:51] router release req id 424 -INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 424 not send, decode is busy -INFO 06-24 19:59:49 [batch.py:51] router release req id 432 -INFO 06-24 19:59:49 [batch.py:51] router release req id 440 -INFO 06-24 19:59:49 [batch.py:51] router release req id 448 -INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 432 not send, decode is busy -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 440 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009834527969360352 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 448 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.005139827728271484 s -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 408 -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 416 -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 424 -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 432 -INFO 06-24 19:59:49 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 440 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008382081985473633 s -INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 440 not send, decode is busy -INFO 06-24 19:59:49 [prefill_trans_obj.py:166] prefill node kv move task req_id: 448 not send, decode is busy -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 440 -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 448 -INFO 06-24 19:59:49 [prefill_trans_process.py:44] trans cost time: 0.03542947769165039,move_total_kv_len: 1049, id: 400 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:49 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 400 cost total time: 0.054984331130981445 s -DEBUG 06-24 19:59:49 [manager.py:391] Prefill Batch: batch_id=326357583043305760939398804875066805400, time:1750766389.3391924s req_ids:[456, 464, 472, 480, 488, 496, 504] -DEBUG 06-24 19:59:49 [manager.py:391] -INFO 06-24 19:59:49 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 400 -INFO 06-24 19:59:50 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 456 -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 464 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:456 first_token_cost:9583.629608154297ms total_cost_time:9583.716869354248ms,out_token_counter:1 mean_per_token_cost_time: 0.08726119995117188ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 472 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:464 first_token_cost:9579.71978187561ms total_cost_time:9579.748630523682ms,out_token_counter:1 mean_per_token_cost_time: 0.028848648071289062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 480 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:472 first_token_cost:9575.04153251648ms total_cost_time:9575.064182281494ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 488 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:480 first_token_cost:9570.974111557007ms total_cost_time:9570.996284484863ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:488 first_token_cost:9566.270112991333ms total_cost_time:9566.292762756348ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 496 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:496 first_token_cost:9560.660123825073ms total_cost_time:9560.681343078613ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [manager.py:162] detoken release req id 504 -INFO 06-24 19:59:50 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:504 first_token_cost:9554.535627365112ms total_cost_time:9554.557085037231ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 456 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007656574249267578 s -INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 464 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0028052330017089844 s -INFO 06-24 19:59:50 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 456 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010640859603881836 s -INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 464 not send, decode is busy -INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 472 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009193897247314453 s -INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 480 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004563331604003906 s -INFO 06-24 19:59:50 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.020784616470336914 s -INFO 06-24 19:59:50 [prefill_trans_process.py:34] trans start: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:50 [prefill_trans_process.py:42] trans finished: id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 -INFO 06-24 19:59:50 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 464 -INFO 06-24 19:59:50 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 472 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010582923889160156 s -INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 472 not send, decode is busy -INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 480 not send, decode is busy -DEBUG 06-24 19:59:50 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:50 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:50 [infer_batch.py:156] radix refed token num 6346 -DEBUG 06-24 19:59:50 [infer_batch.py:156] radix hold token num 15845 -DEBUG 06-24 19:59:50 [infer_batch.py:156] mem manager can alloc token num 547 -DEBUG 06-24 19:59:50 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 488 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.012292623519897461 s -INFO 06-24 19:59:50 [batch.py:51] router release req id 456 -INFO 06-24 19:59:50 [batch.py:51] router release req id 464 -INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 496 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00767827033996582 s -INFO 06-24 19:59:50 [batch.py:51] router release req id 472 -INFO 06-24 19:59:50 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 504 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003205537796020508 s -INFO 06-24 19:59:50 [batch.py:51] router release req id 480 -INFO 06-24 19:59:50 [batch.py:51] router release req id 488 -INFO 06-24 19:59:50 [batch.py:51] router release req id 496 -INFO 06-24 19:59:50 [batch.py:51] router release req id 504 -INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 472 -INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 480 -INFO 06-24 19:59:50 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 488 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.012189865112304688 s -INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 488 not send, decode is busy -INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 496 not send, decode is busy -INFO 06-24 19:59:50 [prefill_trans_obj.py:166] prefill node kv move task req_id: 504 not send, decode is busy -INFO 06-24 19:59:50 [prefill_trans_process.py:44] trans cost time: 0.03583383560180664,move_total_kv_len: 1062, id: 456 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:50 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 456 cost total time: 0.05907273292541504 s -INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 488 -DEBUG 06-24 19:59:50 [manager.py:391] Prefill Batch: batch_id=332681513737580430563604603620588698251, time:1750766390.3448286s req_ids:[512, 520, 528, 536, 544, 552, 560, 568] -DEBUG 06-24 19:59:50 [manager.py:391] -INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 496 -DEBUG 06-24 19:59:50 [stats.py:37] Avg tokens(prompt+generate) throughput: 7349.145 tokens/s -DEBUG 06-24 19:59:50 [stats.py:37] Avg prompt tokens throughput: 7336.634 tokens/s -DEBUG 06-24 19:59:50 [stats.py:37] Avg generate tokens throughput: 12.512 tokens/s -INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 504 -INFO 06-24 19:59:50 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 456 -INFO 06-24 19:59:50 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:864 -WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again -INFO 06-24 19:59:50 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 -WARNING 06-24 19:59:50 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again -INFO 06-24 19:59:50 [shm_array.py:30] create shm 2732_0_shm_prompts_62 -INFO 06-24 19:59:51 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 512 -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 520 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:512 first_token_cost:10687.07275390625ms total_cost_time:10687.12043762207ms,out_token_counter:1 mean_per_token_cost_time: 0.0476837158203125ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:520 first_token_cost:10682.308197021484ms total_cost_time:10682.335376739502ms,out_token_counter:1 mean_per_token_cost_time: 0.027179718017578125ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 528 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:528 first_token_cost:10677.851438522339ms total_cost_time:10677.873849868774ms,out_token_counter:1 mean_per_token_cost_time: 0.022411346435546875ms prompt_token_num:1045 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:536 first_token_cost:10673.033714294434ms total_cost_time:10673.054695129395ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:544 first_token_cost:10668.126106262207ms total_cost_time:10668.146848678589ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:552 first_token_cost:10663.379192352295ms total_cost_time:10663.398504257202ms,out_token_counter:1 mean_per_token_cost_time: 0.019311904907226562ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 536 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:560 first_token_cost:10658.718585968018ms total_cost_time:10658.739566802979ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:568 first_token_cost:10654.187440872192ms total_cost_time:10654.206991195679ms,out_token_counter:1 mean_per_token_cost_time: 0.019550323486328125ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 544 -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 552 -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 560 -INFO 06-24 19:59:51 [manager.py:162] detoken release req id 568 -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 512 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.004446506500244141 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 512 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007849693298339844 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 512 not send, decode is busy -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 520 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007852554321289062 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 528 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0032262802124023438 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 520 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007605075836181641 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 520 not send, decode is busy -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006117343902587891 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 544 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0013079643249511719 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 536 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006703376770019531 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 536 not send, decode is busy -INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 544 not send, decode is busy -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 552 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003616809844970703 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.018597126007080078 s -INFO 06-24 19:59:51 [prefill_trans_process.py:34] trans start: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:51 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 512 -INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 552 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007677316665649414 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 552 not send, decode is busy -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0069620609283447266 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 568 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0021970272064208984 s -DEBUG 06-24 19:59:51 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:51 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:51 [infer_batch.py:156] radix refed token num 7377 -DEBUG 06-24 19:59:51 [infer_batch.py:156] radix hold token num 15834 -DEBUG 06-24 19:59:51 [infer_batch.py:156] mem manager can alloc token num 558 -DEBUG 06-24 19:59:51 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:51 [batch.py:51] router release req id 512 -INFO 06-24 19:59:51 [batch.py:51] router release req id 520 -INFO 06-24 19:59:51 [batch.py:51] router release req id 528 -INFO 06-24 19:59:51 [batch.py:51] router release req id 536 -INFO 06-24 19:59:51 [batch.py:51] router release req id 544 -INFO 06-24 19:59:51 [batch.py:51] router release req id 552 -INFO 06-24 19:59:51 [batch.py:51] router release req id 560 -INFO 06-24 19:59:51 [batch.py:51] router release req id 568 -INFO 06-24 19:59:51 [prefill_trans_process.py:42] trans finished: id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1045 -INFO 06-24 19:59:51 [manager.py:224] router recive req id 864 cost time 0.907036304473877 s -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 520 -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 536 -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 544 -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 552 -INFO 06-24 19:59:51 [manager.py:68] detokenization recv req id 864 cost time 0.9086446762084961 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 560 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006991863250732422 s -INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 560 not send, decode is busy -INFO 06-24 19:59:51 [prefill_trans_obj.py:166] prefill node kv move task req_id: 568 not send, decode is busy -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 560 -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 568 -INFO 06-24 19:59:51 [prefill_trans_process.py:44] trans cost time: 0.024519681930541992,move_total_kv_len: 1045, id: 528 in_len:1045 v_len: 1045 move_len: 1045 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:51 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 528 cost total time: 0.044325828552246094 s -DEBUG 06-24 19:59:51 [manager.py:391] Prefill Batch: batch_id=151190849069827501310392242175690041406, time:1750766391.4910963s req_ids:[576, 584, 592, 600, 608, 616, 624] -DEBUG 06-24 19:59:51 [manager.py:391] -INFO 06-24 19:59:51 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 528 -INFO 06-24 19:59:51 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 -WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_70 and create again -INFO 06-24 19:59:51 [shm_array.py:30] create shm 2732_0_shm_logprobs_70 -WARNING 06-24 19:59:51 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_70 and create again -INFO 06-24 19:59:51 [shm_array.py:30] create shm 2732_0_shm_prompts_70 -INFO 06-24 19:59:52 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:52 [manager.py:162] detoken release req id 576 -INFO 06-24 19:59:52 [manager.py:162] detoken release req id 584 -INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:576 first_token_cost:11679.4753074646ms total_cost_time:11679.52013015747ms,out_token_counter:1 mean_per_token_cost_time: 0.04482269287109375ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:52 [manager.py:162] detoken release req id 592 -INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:584 first_token_cost:11674.811840057373ms total_cost_time:11674.840211868286ms,out_token_counter:1 mean_per_token_cost_time: 0.028371810913085938ms prompt_token_num:1068 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:592 first_token_cost:11670.287609100342ms total_cost_time:11670.311450958252ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:52 [manager.py:162] detoken release req id 600 -INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:600 first_token_cost:11664.368629455566ms total_cost_time:11664.392709732056ms,out_token_counter:1 mean_per_token_cost_time: 0.024080276489257812ms prompt_token_num:1071 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:52 [manager.py:162] detoken release req id 608 -INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:608 first_token_cost:11659.80839729309ms total_cost_time:11659.832239151001ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:52 [manager.py:162] detoken release req id 616 -INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:616 first_token_cost:11655.45129776001ms total_cost_time:11655.477523803711ms,out_token_counter:1 mean_per_token_cost_time: 0.026226043701171875ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:52 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:624 first_token_cost:11650.835990905762ms total_cost_time:11650.865077972412ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:52 [manager.py:162] detoken release req id 624 -INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006216764450073242 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 576 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009234428405761719 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 576 not send, decode is busy -INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009853839874267578 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 592 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003866434097290039 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 584 in_len:1068 v_len: 1068 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008233308792114258 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 584 not send, decode is busy -INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 592 not send, decode is busy -INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 600 in_len:1071 v_len: 1071 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006408214569091797 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 600 in_len:1071 v_len: 1071 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007149934768676758 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 600 not send, decode is busy -INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007205009460449219 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 616 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0011563301086425781 s -INFO 06-24 19:59:52 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 576 -DEBUG 06-24 19:59:52 [req_manager.py:78] freed all request size 136 -INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 608 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007665872573852539 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 608 not send, decode is busy -DEBUG 06-24 19:59:52 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:52 [infer_batch.py:156] radix refed token num 6360 -DEBUG 06-24 19:59:52 [infer_batch.py:156] radix hold token num 15856 -DEBUG 06-24 19:59:52 [infer_batch.py:156] mem manager can alloc token num 536 -DEBUG 06-24 19:59:52 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 616 not send, decode is busy -INFO 06-24 19:59:52 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0030508041381835938 s -INFO 06-24 19:59:52 [batch.py:51] router release req id 576 -INFO 06-24 19:59:52 [batch.py:51] router release req id 584 -INFO 06-24 19:59:52 [batch.py:51] router release req id 592 -INFO 06-24 19:59:52 [batch.py:51] router release req id 600 -INFO 06-24 19:59:52 [batch.py:51] router release req id 608 -INFO 06-24 19:59:52 [batch.py:51] router release req id 616 -INFO 06-24 19:59:52 [batch.py:51] router release req id 624 -INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 584 -INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 592 -INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 600 -INFO 06-24 19:59:52 [manager.py:224] router recive req id 872 cost time 0.7806687355041504 s -INFO 06-24 19:59:52 [manager.py:68] detokenization recv req id 872 cost time 0.7828049659729004 s -INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 608 -INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 616 -INFO 06-24 19:59:52 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 624 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006276130676269531 s -INFO 06-24 19:59:52 [prefill_trans_obj.py:166] prefill node kv move task req_id: 624 not send, decode is busy -INFO 06-24 19:59:52 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 624 -DEBUG 06-24 19:59:52 [manager.py:391] Prefill Batch: batch_id=41061000598215678357118032570746485017, time:1750766392.5295331s req_ids:[632, 640, 648, 656, 664, 672, 680] -DEBUG 06-24 19:59:52 [manager.py:391] -INFO 06-24 19:59:53 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 632 -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 640 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:632 first_token_cost:12656.394720077515ms total_cost_time:12656.439304351807ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:640 first_token_cost:12651.873588562012ms total_cost_time:12651.901721954346ms,out_token_counter:1 mean_per_token_cost_time: 0.028133392333984375ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 648 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:648 first_token_cost:12647.441148757935ms total_cost_time:12647.463321685791ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 656 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:656 first_token_cost:12643.292665481567ms total_cost_time:12643.314838409424ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 664 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:664 first_token_cost:12638.623237609863ms total_cost_time:12638.644218444824ms,out_token_counter:1 mean_per_token_cost_time: 0.0209808349609375ms prompt_token_num:1062 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:672 first_token_cost:12634.015321731567ms total_cost_time:12634.036540985107ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1043 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 672 -INFO 06-24 19:59:53 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:680 first_token_cost:12629.651308059692ms total_cost_time:12629.672050476074ms,out_token_counter:1 mean_per_token_cost_time: 0.020742416381835938ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:53 [manager.py:162] detoken release req id 680 -INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 632 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.006348371505737305 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 640 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0016994476318359375 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 632 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010278701782226562 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 632 not send, decode is busy -INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 640 not send, decode is busy -INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00801229476928711 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 656 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.003419637680053711 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 648 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.007851362228393555 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 648 not send, decode is busy -INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 656 not send, decode is busy -INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 664 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007013559341430664 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 672 in_len:1043 v_len: 1043 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0025191307067871094 s -INFO 06-24 19:59:53 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 19:59:53 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:53 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:53 [infer_batch.py:156] radix refed token num 7386 -DEBUG 06-24 19:59:53 [infer_batch.py:156] radix hold token num 15874 -DEBUG 06-24 19:59:53 [infer_batch.py:156] mem manager can alloc token num 518 -DEBUG 06-24 19:59:53 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 632 -INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 640 -INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 648 -INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 656 -INFO 06-24 19:59:53 [batch.py:51] router release req id 632 -INFO 06-24 19:59:53 [batch.py:51] router release req id 640 -INFO 06-24 19:59:53 [batch.py:51] router release req id 648 -INFO 06-24 19:59:53 [batch.py:51] router release req id 656 -INFO 06-24 19:59:53 [batch.py:51] router release req id 664 -INFO 06-24 19:59:53 [batch.py:51] router release req id 672 -INFO 06-24 19:59:53 [batch.py:51] router release req id 680 -INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 664 in_len:1062 v_len: 1062 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010159492492675781 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 672 not send, decode is busy -INFO 06-24 19:59:53 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008017778396606445 s -INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 672 -INFO 06-24 19:59:53 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.026720762252807617 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 680 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.00932931900024414 s -INFO 06-24 19:59:53 [prefill_trans_obj.py:166] prefill node kv move task req_id: 680 not send, decode is busy -INFO 06-24 19:59:53 [prefill_trans_process.py:34] trans start: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 680 -INFO 06-24 19:59:53 [prefill_trans_process.py:42] trans finished: id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1062 -DEBUG 06-24 19:59:53 [manager.py:391] Prefill Batch: batch_id=168428558191206249765469831658675004253, time:1750766393.5315804s req_ids:[688, 696, 704, 712, 720, 728, 736, 744] -DEBUG 06-24 19:59:53 [manager.py:391] -INFO 06-24 19:59:53 [prefill_trans_process.py:44] trans cost time: 0.03470134735107422,move_total_kv_len: 1062, id: 664 in_len:1062 v_len: 1062 move_len: 1062 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:53 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 664 cost total time: 0.06349658966064453 s -INFO 06-24 19:59:53 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 664 -INFO 06-24 19:59:53 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:880 -WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_84 and create again -INFO 06-24 19:59:53 [shm_array.py:30] create shm 2732_0_shm_logprobs_84 -WARNING 06-24 19:59:53 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_84 and create again -INFO 06-24 19:59:53 [shm_array.py:30] create shm 2732_0_shm_prompts_84 -INFO 06-24 19:59:54 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 688 -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 696 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:688 first_token_cost:13776.899814605713ms total_cost_time:13776.966333389282ms,out_token_counter:1 mean_per_token_cost_time: 0.06651878356933594ms prompt_token_num:1069 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 704 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:696 first_token_cost:13771.39139175415ms total_cost_time:13771.419286727905ms,out_token_counter:1 mean_per_token_cost_time: 0.027894973754882812ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 712 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:704 first_token_cost:13766.984701156616ms total_cost_time:13767.007827758789ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 720 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:712 first_token_cost:13762.664794921875ms total_cost_time:13762.687921524048ms,out_token_counter:1 mean_per_token_cost_time: 0.023126602172851562ms prompt_token_num:1048 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 728 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:720 first_token_cost:13758.100509643555ms total_cost_time:13758.121967315674ms,out_token_counter:1 mean_per_token_cost_time: 0.021457672119140625ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:728 first_token_cost:13753.724813461304ms total_cost_time:13753.75747680664ms,out_token_counter:1 mean_per_token_cost_time: 0.03266334533691406ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 736 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:736 first_token_cost:13749.449014663696ms total_cost_time:13749.486207962036ms,out_token_counter:1 mean_per_token_cost_time: 0.03719329833984375ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [manager.py:162] detoken release req id 744 -INFO 06-24 19:59:54 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:744 first_token_cost:13745.047092437744ms total_cost_time:13745.08261680603ms,out_token_counter:1 mean_per_token_cost_time: 0.03552436828613281ms prompt_token_num:1047 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010353565216064453 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 696 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00538325309753418 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 688 in_len:1069 v_len: 1069 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010210990905761719 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 688 not send, decode is busy -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 696 not send, decode is busy -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.011600494384765625 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 712 in_len:1048 v_len: 1048 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007045745849609375 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 720 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0024051666259765625 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 704 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010615110397338867 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 704 not send, decode is busy -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 712 not send, decode is busy -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 720 not send, decode is busy -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 728 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008868217468261719 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 736 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0021393299102783203 s -INFO 06-24 19:59:54 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 688 -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 696 -DEBUG 06-24 19:59:54 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:54 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:54 [infer_batch.py:156] radix refed token num 6322 -DEBUG 06-24 19:59:54 [infer_batch.py:156] radix hold token num 15830 -DEBUG 06-24 19:59:54 [infer_batch.py:156] mem manager can alloc token num 562 -DEBUG 06-24 19:59:54 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:54 [batch.py:51] router release req id 688 -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 704 -INFO 06-24 19:59:54 [batch.py:51] router release req id 696 -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 712 -INFO 06-24 19:59:54 [batch.py:51] router release req id 704 -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 720 -INFO 06-24 19:59:54 [batch.py:51] router release req id 712 -INFO 06-24 19:59:54 [batch.py:51] router release req id 720 -INFO 06-24 19:59:54 [batch.py:51] router release req id 728 -INFO 06-24 19:59:54 [batch.py:51] router release req id 736 -INFO 06-24 19:59:54 [batch.py:51] router release req id 744 -INFO 06-24 19:59:54 [manager.py:224] router recive req id 880 cost time 0.8910543918609619 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 728 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009208917617797852 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 728 not send, decode is busy -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 736 not send, decode is busy -INFO 06-24 19:59:54 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 744 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007225751876831055 s -INFO 06-24 19:59:54 [manager.py:68] detokenization recv req id 880 cost time 0.8926985263824463 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 744 in_len:1047 v_len: 1047 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.006890773773193359 s -INFO 06-24 19:59:54 [prefill_trans_obj.py:166] prefill node kv move task req_id: 744 not send, decode is busy -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 728 -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 736 -INFO 06-24 19:59:54 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 744 -DEBUG 06-24 19:59:54 [manager.py:391] Prefill Batch: batch_id=298089166584524292011397622308017931149, time:1750766394.6946175s req_ids:[752, 760, 768, 776, 784, 792, 800] -DEBUG 06-24 19:59:54 [manager.py:391] -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 current batch size: 7 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 paused req num: 0 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 frozen token num: 0 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 estimated_peak_token_count: 7410 -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.0 not contain prompt cache tree unrefed token -DEBUG 06-24 19:59:54 [manager.py:248] dp_i 0 token used ratio: 0.9657149829184968 contain prompt cache tree unrefed token -INFO 06-24 19:59:55 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 752 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:752 first_token_cost:14746.919870376587ms total_cost_time:14746.965169906616ms,out_token_counter:1 mean_per_token_cost_time: 0.045299530029296875ms prompt_token_num:1053 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 760 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:760 first_token_cost:14742.695808410645ms total_cost_time:14742.723226547241ms,out_token_counter:1 mean_per_token_cost_time: 0.027418136596679688ms prompt_token_num:1064 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:768 first_token_cost:14738.271713256836ms total_cost_time:14738.295555114746ms,out_token_counter:1 mean_per_token_cost_time: 0.02384185791015625ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 768 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:776 first_token_cost:14733.774900436401ms total_cost_time:14733.810663223267ms,out_token_counter:1 mean_per_token_cost_time: 0.035762786865234375ms prompt_token_num:1051 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 776 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:784 first_token_cost:14728.860855102539ms total_cost_time:14728.896141052246ms,out_token_counter:1 mean_per_token_cost_time: 0.03528594970703125ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 784 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:792 first_token_cost:14723.912000656128ms total_cost_time:14723.941802978516ms,out_token_counter:1 mean_per_token_cost_time: 0.029802322387695312ms prompt_token_num:1061 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:40 lightllm_req_id:800 first_token_cost:14719.4185256958ms total_cost_time:14719.455480575562ms,out_token_counter:1 mean_per_token_cost_time: 0.03695487976074219ms prompt_token_num:1065 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 792 -INFO 06-24 19:59:55 [manager.py:162] detoken release req id 800 -INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00774383544921875 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 760 in_len:1064 v_len: 1064 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002983570098876953 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 752 in_len:1053 v_len: 1053 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011865377426147461 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 760 not send, decode is busy -INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 768 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010986804962158203 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 776 in_len:1051 v_len: 1051 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0045206546783447266 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.02506709098815918 s -INFO 06-24 19:59:55 [prefill_trans_process.py:34] trans start: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:55 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 760 -INFO 06-24 19:59:55 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 768 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.01067972183227539 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 768 not send, decode is busy -INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 776 not send, decode is busy -INFO 06-24 19:59:55 [prefill_trans_process.py:42] trans finished: id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1053 -INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 784 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.012121438980102539 s -DEBUG 06-24 19:59:55 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:55 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:55 [infer_batch.py:156] radix refed token num 6339 -DEBUG 06-24 19:59:55 [infer_batch.py:156] radix hold token num 15847 -DEBUG 06-24 19:59:55 [infer_batch.py:156] mem manager can alloc token num 545 -DEBUG 06-24 19:59:55 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 792 in_len:1061 v_len: 1061 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007155895233154297 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 800 in_len:1065 v_len: 1065 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0024378299713134766 s -INFO 06-24 19:59:55 [batch.py:51] router release req id 752 -INFO 06-24 19:59:55 [batch.py:51] router release req id 760 -INFO 06-24 19:59:55 [batch.py:51] router release req id 768 -INFO 06-24 19:59:55 [batch.py:51] router release req id 776 -INFO 06-24 19:59:55 [batch.py:51] router release req id 784 -INFO 06-24 19:59:55 [batch.py:51] router release req id 792 -INFO 06-24 19:59:55 [batch.py:51] router release req id 800 -INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 768 -INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 776 -INFO 06-24 19:59:55 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 784 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010683774948120117 s -INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 784 not send, decode is busy -INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 792 not send, decode is busy -INFO 06-24 19:59:55 [prefill_trans_obj.py:166] prefill node kv move task req_id: 800 not send, decode is busy -INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 784 -INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 792 -INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 800 -DEBUG 06-24 19:59:55 [manager.py:391] Prefill Batch: batch_id=190381160306182571724730900033463728660, time:1750766395.7016985s req_ids:[808, 816, 824, 832, 840, 848, 856, 864] -DEBUG 06-24 19:59:55 [manager.py:391] -INFO 06-24 19:59:55 [prefill_trans_process.py:44] trans cost time: 0.038568973541259766,move_total_kv_len: 1053, id: 752 in_len:1053 v_len: 1053 move_len: 1053 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:55 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 752 cost total time: 0.06527590751647949 s -INFO 06-24 19:59:55 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 752 -INFO 06-24 19:59:55 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:888 -WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_99 and create again -INFO 06-24 19:59:55 [shm_array.py:30] create shm 2732_0_shm_logprobs_99 -WARNING 06-24 19:59:55 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_99 and create again -INFO 06-24 19:59:55 [shm_array.py:30] create shm 2732_0_shm_prompts_99 -INFO 06-24 19:59:56 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 808 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:43 lightllm_req_id:808 first_token_cost:12903.241634368896ms total_cost_time:12903.290271759033ms,out_token_counter:1 mean_per_token_cost_time: 0.04863739013671875ms prompt_token_num:1046 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 816 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:45 lightllm_req_id:816 first_token_cost:11768.028497695923ms total_cost_time:11768.057584762573ms,out_token_counter:1 mean_per_token_cost_time: 0.029087066650390625ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 824 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:824 first_token_cost:10520.995140075684ms total_cost_time:10521.018505096436ms,out_token_counter:1 mean_per_token_cost_time: 0.023365020751953125ms prompt_token_num:1055 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:832 first_token_cost:10497.162580490112ms total_cost_time:10497.184753417969ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 832 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:46 lightllm_req_id:840 first_token_cost:10471.440553665161ms total_cost_time:10471.463203430176ms,out_token_counter:1 mean_per_token_cost_time: 0.022649765014648438ms prompt_token_num:1059 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:848 first_token_cost:9490.027904510498ms total_cost_time:9490.050077438354ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1052 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 840 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:47 lightllm_req_id:856 first_token_cost:9461.206197738647ms total_cost_time:9461.227893829346ms,out_token_counter:1 mean_per_token_cost_time: 0.021696090698242188ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 848 -INFO 06-24 19:59:56 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:50 lightllm_req_id:864 first_token_cost:6223.236322402954ms total_cost_time:6223.257541656494ms,out_token_counter:1 mean_per_token_cost_time: 0.021219253540039062ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 856 -INFO 06-24 19:59:56 [manager.py:162] detoken release req id 864 -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 808 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002652406692504883 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 808 in_len:1046 v_len: 1046 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008903980255126953 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 808 not send, decode is busy -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 816 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0076906681060791016 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 824 in_len:1055 v_len: 1055 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0029401779174804688 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 816 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010135412216186523 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 816 not send, decode is busy -INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 824 not send, decode is busy -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010025501251220703 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 840 in_len:1059 v_len: 1059 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.005650520324707031 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 832 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008464336395263672 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 840 not send, decode is busy -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 848 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.009446859359741211 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 856 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.002905130386352539 s -INFO 06-24 19:59:56 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 808 -DEBUG 06-24 19:59:56 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:56 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:56 [infer_batch.py:156] radix refed token num 7400 -DEBUG 06-24 19:59:56 [infer_batch.py:156] radix hold token num 15849 -DEBUG 06-24 19:59:56 [infer_batch.py:156] mem manager can alloc token num 543 -DEBUG 06-24 19:59:56 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:56 [batch.py:51] router release req id 808 -INFO 06-24 19:59:56 [batch.py:51] router release req id 816 -INFO 06-24 19:59:56 [batch.py:51] router release req id 824 -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 816 -INFO 06-24 19:59:56 [batch.py:51] router release req id 832 -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 824 -INFO 06-24 19:59:56 [batch.py:51] router release req id 840 -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 840 -INFO 06-24 19:59:56 [batch.py:51] router release req id 848 -INFO 06-24 19:59:56 [batch.py:51] router release req id 856 -INFO 06-24 19:59:56 [batch.py:51] router release req id 864 -INFO 06-24 19:59:56 [manager.py:224] router recive req id 888 cost time 0.8922204971313477 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 848 in_len:1052 v_len: 1052 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.00862264633178711 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 848 not send, decode is busy -INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 856 not send, decode is busy -INFO 06-24 19:59:56 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 864 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.007134914398193359 s -INFO 06-24 19:59:56 [manager.py:68] detokenization recv req id 888 cost time 0.894127368927002 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.02945232391357422 s -INFO 06-24 19:59:56 [prefill_trans_process.py:34] trans start: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:56 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 864 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008223772048950195 s -INFO 06-24 19:59:56 [prefill_trans_obj.py:166] prefill node kv move task req_id: 864 not send, decode is busy -INFO 06-24 19:59:56 [prefill_trans_process.py:42] trans finished: id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1056 -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 848 -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 856 -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 864 -DEBUG 06-24 19:59:56 [manager.py:391] Prefill Batch: batch_id=266402877888843830617698295371328410035, time:1750766396.8458936s req_ids:[872, 880, 888] -DEBUG 06-24 19:59:56 [manager.py:391] -INFO 06-24 19:59:56 [prefill_trans_process.py:44] trans cost time: 0.04648113250732422,move_total_kv_len: 1056, id: 832 in_len:1056 v_len: 1056 move_len: 1056 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:56 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 832 cost total time: 0.07764625549316406 s -INFO 06-24 19:59:56 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 832 -INFO 06-24 19:59:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:896 -WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again -INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 -WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again -INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_prompts_62 -INFO 06-24 19:59:57 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:57 [manager.py:162] detoken release req id 872 -INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:51 lightllm_req_id:872 first_token_cost:5556.049346923828ms total_cost_time:5556.09393119812ms,out_token_counter:1 mean_per_token_cost_time: 0.04458427429199219ms prompt_token_num:1049 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:57 [manager.py:162] detoken release req id 880 -INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:53 lightllm_req_id:880 first_token_cost:3500.9469985961914ms total_cost_time:3500.9727478027344ms,out_token_counter:1 mean_per_token_cost_time: 0.02574920654296875ms prompt_token_num:1057 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:57 [manager.py:162] detoken release req id 888 -INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:55 lightllm_req_id:888 first_token_cost:1350.1379489898682ms total_cost_time:1350.1601219177246ms,out_token_counter:1 mean_per_token_cost_time: 0.022172927856445312ms prompt_token_num:1060 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 872 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.00251007080078125 s -INFO 06-24 19:59:57 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 19:59:57 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:57 [infer_batch.py:156] radix refed token num 3166 -DEBUG 06-24 19:59:57 [infer_batch.py:156] radix hold token num 15846 -DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager can alloc token num 546 -DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:57 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 872 in_len:1049 v_len: 1049 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010227203369140625 s -INFO 06-24 19:59:57 [batch.py:51] router release req id 872 -INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 880 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.008426189422607422 s -INFO 06-24 19:59:57 [batch.py:51] router release req id 880 -INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 888 in_len:1060 v_len: 1060 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0015916824340820312 s -INFO 06-24 19:59:57 [batch.py:51] router release req id 888 -INFO 06-24 19:59:57 [manager.py:224] router recive req id 896 cost time 0.17604541778564453 s -INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 896 cost time 0.17776012420654297 s -INFO 06-24 19:59:57 [prefill_trans_obj.py:222] kv_trans_handle_loop get task id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df to start kv movequeue time 0.022619962692260742 s -INFO 06-24 19:59:57 [prefill_trans_process.py:34] trans start: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:57 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 880 in_len:1057 v_len: 1057 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.010534048080444336 s -INFO 06-24 19:59:57 [prefill_trans_obj.py:166] prefill node kv move task req_id: 880 not send, decode is busy -INFO 06-24 19:59:57 [prefill_trans_obj.py:166] prefill node kv move task req_id: 888 not send, decode is busy -INFO 06-24 19:59:57 [prefill_trans_process.py:42] trans finished: id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df move len: 1049 -INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 880 -INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 888 -DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=84970949193112820747633257429598105154, time:1750766397.3190756s req_ids:[896] -DEBUG 06-24 19:59:57 [manager.py:391] -INFO 06-24 19:59:57 [prefill_trans_process.py:44] trans cost time: 0.02783513069152832,move_total_kv_len: 1049, id: 872 in_len:1049 v_len: 1049 move_len: 1049 dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df -INFO 06-24 19:59:57 [prefill_trans_obj.py:196] _transfer_kv data ok, req_id: 872 cost total time: 0.05187344551086426 s -INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 872 -INFO 06-24 19:59:57 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:57 [manager.py:162] detoken release req id 896 -INFO 06-24 19:59:57 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:896 first_token_cost:368.4573173522949ms total_cost_time:368.544340133667ms,out_token_counter:1 mean_per_token_cost_time: 0.08702278137207031ms prompt_token_num:1054 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:57 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 19:59:57 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:57 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:57 [infer_batch.py:156] radix refed token num 1054 -DEBUG 06-24 19:59:57 [infer_batch.py:156] radix hold token num 15849 -DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager can alloc token num 543 -DEBUG 06-24 19:59:57 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:57 [batch.py:51] router release req id 896 -INFO 06-24 19:59:57 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 896 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0046384334564208984 s -INFO 06-24 19:59:57 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 896 in_len:1054 v_len: 1054 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.008706331253051758 s -INFO 06-24 19:59:57 [prefill_trans_obj.py:166] prefill node kv move task req_id: 896 not send, decode is busy -INFO 06-24 19:59:57 [shm_req_manager.py:119] all shm req has been release ok -INFO 06-24 19:59:57 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 896 -INFO 06-24 19:59:57 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:904 -WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again -INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 -WARNING 06-24 19:59:57 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again -INFO 06-24 19:59:57 [shm_array.py:30] create shm 2732_0_shm_prompts_62 -INFO 06-24 19:59:57 [manager.py:224] router recive req id 904 cost time 0.023071765899658203 s -INFO 06-24 19:59:57 [manager.py:68] detokenization recv req id 904 cost time 0.025092363357543945 s -DEBUG 06-24 19:59:57 [manager.py:391] Prefill Batch: batch_id=265990561747531797382892875336867628745, time:1750766397.6218994s req_ids:[904] -DEBUG 06-24 19:59:57 [manager.py:391] -INFO 06-24 19:59:58 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 19:59:58 [manager.py:162] detoken release req id 904 -INFO 06-24 19:59:58 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 19:59:57 lightllm_req_id:904 first_token_cost:436.77806854248047ms total_cost_time:436.82312965393066ms,out_token_counter:1 mean_per_token_cost_time: 0.04506111145019531ms prompt_token_num:1056 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 19:59:58 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 19:59:58 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 19:59:58 [infer_batch.py:156] free a batch state: -DEBUG 06-24 19:59:58 [infer_batch.py:156] radix refed token num 1056 -DEBUG 06-24 19:59:58 [infer_batch.py:156] radix hold token num 15848 -DEBUG 06-24 19:59:58 [infer_batch.py:156] mem manager can alloc token num 544 -DEBUG 06-24 19:59:58 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 19:59:58 [batch.py:51] router release req id 904 -INFO 06-24 19:59:58 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 904 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.010128498077392578 s -INFO 06-24 19:59:58 [shm_req_manager.py:119] all shm req has been release ok -INFO 06-24 19:59:58 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 904 in_len:1056 v_len: 1056 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009553194046020508 s -INFO 06-24 19:59:58 [prefill_trans_obj.py:166] prefill node kv move task req_id: 904 not send, decode is busy -INFO 06-24 19:59:58 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 904 -DEBUG 06-24 20:00:00 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:00:00 [manager.py:283] -DEBUG 06-24 20:00:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:00:01 [manager.py:284] -INFO 06-24 20:00:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:00:13 [statics_utils.py:24] mean first cost: 8103.882593391216 ms -INFO 06-24 20:00:13 [statics_utils.py:24] mean per token cost: 0.02849207515210177 ms -INFO 06-24 20:00:32 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:32 lightllm_req_id:912 -WARNING 06-24 20:00:32 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again -INFO 06-24 20:00:32 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 -WARNING 06-24 20:00:32 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again -INFO 06-24 20:00:32 [shm_array.py:30] create shm 2732_0_shm_prompts_62 -INFO 06-24 20:00:32 [manager.py:224] router recive req id 912 cost time 0.011658906936645508 s -INFO 06-24 20:00:32 [manager.py:68] detokenization recv req id 912 cost time 0.013936042785644531 s -DEBUG 06-24 20:00:32 [manager.py:391] Prefill Batch: batch_id=242973563606902050196922819935859844168, time:1750766432.9127753s req_ids:[912] -DEBUG 06-24 20:00:32 [manager.py:391] -DEBUG 06-24 20:00:32 [stats.py:37] Avg tokens(prompt+generate) throughput: 1069.843 tokens/s -DEBUG 06-24 20:00:32 [stats.py:37] Avg prompt tokens throughput: 1067.494 tokens/s -DEBUG 06-24 20:00:32 [stats.py:37] Avg generate tokens throughput: 2.349 tokens/s -INFO 06-24 20:00:33 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 20:00:33 [manager.py:162] detoken release req id 912 -INFO 06-24 20:00:33 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:32 lightllm_req_id:912 first_token_cost:193.95685195922852ms total_cost_time:194.00525093078613ms,out_token_counter:1 mean_per_token_cost_time: 0.04839897155761719ms prompt_token_num:1058 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 20:00:33 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 20:00:33 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 20:00:33 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:00:33 [infer_batch.py:156] radix refed token num 1058 -DEBUG 06-24 20:00:33 [infer_batch.py:156] radix hold token num 15845 -DEBUG 06-24 20:00:33 [infer_batch.py:156] mem manager can alloc token num 547 -DEBUG 06-24 20:00:33 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:00:33 [batch.py:51] router release req id 912 -INFO 06-24 20:00:33 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 912 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0037949085235595703 s -INFO 06-24 20:00:33 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 912 in_len:1058 v_len: 1058 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.009572982788085938 s -INFO 06-24 20:00:33 [prefill_trans_obj.py:166] prefill node kv move task req_id: 912 not send, decode is busy -INFO 06-24 20:00:33 [shm_req_manager.py:119] all shm req has been release ok -INFO 06-24 20:00:33 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 912 -WARNING 06-24 20:00:40 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:00:40 [manager.py:336] recieved req X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:40 lightllm_req_id:920 -WARNING 06-24 20:00:40 [shm_array.py:25] size not same, unlink shm 2732_0_shm_logprobs_62 and create again -INFO 06-24 20:00:40 [shm_array.py:30] create shm 2732_0_shm_logprobs_62 -WARNING 06-24 20:00:40 [shm_array.py:25] size not same, unlink shm 2732_0_shm_prompts_62 and create again -INFO 06-24 20:00:40 [shm_array.py:30] create shm 2732_0_shm_prompts_62 -INFO 06-24 20:00:40 [manager.py:224] router recive req id 920 cost time 0.0210115909576416 s -INFO 06-24 20:00:41 [manager.py:68] detokenization recv req id 920 cost time 0.023111581802368164 s -DEBUG 06-24 20:00:41 [manager.py:391] Prefill Batch: batch_id=259424221880659235483337860354254147774, time:1750766441.0110836s req_ids:[920] -DEBUG 06-24 20:00:41 [manager.py:391] -INFO 06-24 20:00:41 [prefill_impl.py:93] prefill_req_handle_and_frozen_tokens -INFO 06-24 20:00:41 [manager.py:162] detoken release req id 920 -INFO 06-24 20:00:41 [manager.py:550] X-Request-Id: X-Session-Id: start_time:2025-06-24 20:00:40 lightllm_req_id:920 first_token_cost:195.88994979858398ms total_cost_time:195.93286514282227ms,out_token_counter:1 mean_per_token_cost_time: 0.04291534423828125ms prompt_token_num:1045 prompt_cache_len:0 prompt_cache_ratio:0.0 mtp_avg_token_per_step:1.0 -INFO 06-24 20:00:41 [prefill_impl.py:145] prefill_req_handle_and_frozen_tokens end -DEBUG 06-24 20:00:41 [req_manager.py:78] freed all request size 136 -DEBUG 06-24 20:00:41 [infer_batch.py:156] free a batch state: -DEBUG 06-24 20:00:41 [infer_batch.py:156] radix refed token num 1045 -DEBUG 06-24 20:00:41 [infer_batch.py:156] radix hold token num 15825 -DEBUG 06-24 20:00:41 [infer_batch.py:156] mem manager can alloc token num 567 -DEBUG 06-24 20:00:41 [infer_batch.py:156] mem manager total size 16392 -INFO 06-24 20:00:41 [prefill_trans_obj.py:140] request_kv_trans_loop get task id: 920 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df queue time 0.0008890628814697266 s -INFO 06-24 20:00:41 [batch.py:51] router release req id 920 -INFO 06-24 20:00:41 [prefill_trans_obj.py:154] request_kv_trans_loop request_data_transfer ok, id: 920 in_len:1045 v_len: 1045 move_len: None dp_index:0 connect_id: f46e3f5a-cdbb-4fc0-9d97-845cf48ee9df cost time: 0.011546134948730469 s -INFO 06-24 20:00:41 [prefill_trans_obj.py:166] prefill node kv move task req_id: 920 not send, decode is busy -INFO 06-24 20:00:41 [shm_req_manager.py:119] all shm req has been release ok -INFO 06-24 20:00:41 [prefill_infer_rpyc.py:38] unfrozen tokens for req id: 920 -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:42 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:00:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:00:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:00:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:43 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:44 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:45 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:46 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:48 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:49 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:50 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:51 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:52 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:53 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:54 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:56 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:57 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:00:58 [manager.py:590] aborted group_request_id not exist -DEBUG 06-24 20:01:02 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:01:02 [manager.py:283] -DEBUG 06-24 20:01:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:01:02 [manager.py:284] -INFO 06-24 20:01:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:01:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:01:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -WARNING 06-24 20:01:33 [manager.py:590] aborted group_request_id not exist -WARNING 06-24 20:01:41 [manager.py:590] aborted group_request_id not exist -INFO 06-24 20:01:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:02:02 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:02:02 [manager.py:283] -DEBUG 06-24 20:02:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:02:02 [manager.py:284] -INFO 06-24 20:02:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:02:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:02:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:02:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:02:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:02:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:03:03 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:03:03 [manager.py:283] -DEBUG 06-24 20:03:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:03:03 [manager.py:284] -INFO 06-24 20:03:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:03:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:03:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:03:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:03:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:03:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:04:04 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:04:04 [manager.py:283] -DEBUG 06-24 20:04:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:04:04 [manager.py:284] -INFO 06-24 20:04:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:04:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:04:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:04:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:04:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:04:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:05:04 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:05:04 [manager.py:283] -DEBUG 06-24 20:05:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:05:04 [manager.py:284] -INFO 06-24 20:05:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:05:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:05:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:05:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:05:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:05:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:06:05 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:06:05 [manager.py:283] -DEBUG 06-24 20:06:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:06:05 [manager.py:284] -INFO 06-24 20:06:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:06:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:06:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:06:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:06:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:06:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:07:06 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:07:06 [manager.py:283] -DEBUG 06-24 20:07:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:07:06 [manager.py:284] -INFO 06-24 20:07:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:07:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:07:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:07:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:07:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:07:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:08:07 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:08:07 [manager.py:283] -DEBUG 06-24 20:08:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:08:07 [manager.py:284] -INFO 06-24 20:08:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:08:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:08:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:08:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:08:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:08:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:09:07 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:09:07 [manager.py:283] -DEBUG 06-24 20:09:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:09:07 [manager.py:284] -INFO 06-24 20:09:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:09:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:09:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:09:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:09:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:09:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:10:08 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:10:08 [manager.py:283] -DEBUG 06-24 20:10:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:10:08 [manager.py:284] -INFO 06-24 20:10:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:10:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:10:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:10:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:11:09 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:11:09 [manager.py:283] -DEBUG 06-24 20:11:09 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:11:09 [manager.py:284] -INFO 06-24 20:11:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:11:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:11:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:11:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:11:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:11:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:12:10 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:12:10 [manager.py:283] -DEBUG 06-24 20:12:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:12:10 [manager.py:284] -INFO 06-24 20:12:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:12:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:12:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:12:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:12:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:12:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:13:10 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:13:10 [manager.py:283] -DEBUG 06-24 20:13:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:13:10 [manager.py:284] -INFO 06-24 20:13:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:13:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:13:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:13:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:13:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:14:11 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:14:11 [manager.py:283] -DEBUG 06-24 20:14:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:14:11 [manager.py:284] -INFO 06-24 20:14:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:14:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:14:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:14:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:14:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:14:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:15:12 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:15:12 [manager.py:283] -DEBUG 06-24 20:15:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:15:12 [manager.py:284] -INFO 06-24 20:15:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:15:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:15:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:15:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:16:13 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:16:13 [manager.py:283] -DEBUG 06-24 20:16:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:16:13 [manager.py:284] -INFO 06-24 20:16:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:16:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:16:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:16:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:17:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:17:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:17:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:17:13 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:17:13 [manager.py:283] -DEBUG 06-24 20:17:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:17:13 [manager.py:284] -INFO 06-24 20:17:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:17:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:17:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:18:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:18:14 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:18:14 [manager.py:283] -DEBUG 06-24 20:18:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:18:14 [manager.py:284] -INFO 06-24 20:18:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:18:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:18:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:19:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:19:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:19:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:19:15 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:19:15 [manager.py:283] -DEBUG 06-24 20:19:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:19:15 [manager.py:284] -INFO 06-24 20:19:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:19:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:19:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:20:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:20:15 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:20:15 [manager.py:283] -DEBUG 06-24 20:20:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:20:15 [manager.py:284] -INFO 06-24 20:20:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:20:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:20:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:21:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:21:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:21:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:21:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:21:16 [manager.py:283] -DEBUG 06-24 20:21:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:21:16 [manager.py:284] -INFO 06-24 20:21:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:22:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:22:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:22:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:22:17 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:22:17 [manager.py:283] -DEBUG 06-24 20:22:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:22:17 [manager.py:284] -INFO 06-24 20:22:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:22:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:22:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:23:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:23:17 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:23:17 [manager.py:283] -DEBUG 06-24 20:23:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:23:17 [manager.py:284] -INFO 06-24 20:23:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:23:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:23:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:24:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:24:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:24:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:24:18 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:24:18 [manager.py:283] -DEBUG 06-24 20:24:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:24:18 [manager.py:284] -INFO 06-24 20:24:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:25:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:25:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:25:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:25:19 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:25:19 [manager.py:283] -DEBUG 06-24 20:25:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:25:19 [manager.py:284] -INFO 06-24 20:25:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:25:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:25:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:26:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:26:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:26:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:26:19 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:26:19 [manager.py:283] -DEBUG 06-24 20:26:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:26:19 [manager.py:284] -INFO 06-24 20:26:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:26:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:26:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:27:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:27:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:27:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:27:20 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:27:20 [manager.py:283] -DEBUG 06-24 20:27:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:27:20 [manager.py:284] -INFO 06-24 20:27:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:27:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:27:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:28:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:28:21 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:28:21 [manager.py:283] -DEBUG 06-24 20:28:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:28:21 [manager.py:284] -INFO 06-24 20:28:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:28:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:28:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:29:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:29:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:29:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:29:22 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:29:22 [manager.py:283] -DEBUG 06-24 20:29:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:29:22 [manager.py:284] -INFO 06-24 20:29:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:29:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:29:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:30:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:30:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:30:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:30:22 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:30:22 [manager.py:283] -DEBUG 06-24 20:30:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:30:22 [manager.py:284] -INFO 06-24 20:30:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:30:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:30:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:31:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:31:23 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:31:23 [manager.py:283] -DEBUG 06-24 20:31:23 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:31:23 [manager.py:284] -INFO 06-24 20:31:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:31:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:31:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:32:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:32:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:32:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:32:24 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:32:24 [manager.py:283] -DEBUG 06-24 20:32:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:32:24 [manager.py:284] -INFO 06-24 20:32:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:32:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:32:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:33:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:33:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:33:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:33:24 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:33:24 [manager.py:283] -DEBUG 06-24 20:33:24 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:33:24 [manager.py:284] -INFO 06-24 20:33:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:34:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:34:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:34:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:34:25 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:34:25 [manager.py:283] -DEBUG 06-24 20:34:25 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:34:25 [manager.py:284] -INFO 06-24 20:34:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:34:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:34:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:35:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:35:26 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:35:26 [manager.py:283] -DEBUG 06-24 20:35:26 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:35:26 [manager.py:284] -INFO 06-24 20:35:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:35:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:35:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:36:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:36:27 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:36:27 [manager.py:283] -DEBUG 06-24 20:36:27 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:36:27 [manager.py:284] -INFO 06-24 20:36:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:36:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:36:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:37:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:37:27 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:37:27 [manager.py:283] -DEBUG 06-24 20:37:27 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:37:27 [manager.py:284] -INFO 06-24 20:37:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:37:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:37:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:38:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:38:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:38:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:38:28 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:38:28 [manager.py:283] -DEBUG 06-24 20:38:28 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:38:28 [manager.py:284] -INFO 06-24 20:38:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:39:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:39:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:39:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:39:29 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:39:29 [manager.py:283] -DEBUG 06-24 20:39:29 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:39:29 [manager.py:284] -INFO 06-24 20:39:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:39:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:39:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:40:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:40:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:40:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:40:30 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:40:30 [manager.py:283] -DEBUG 06-24 20:40:30 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:40:30 [manager.py:284] -INFO 06-24 20:40:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:40:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:40:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:41:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:41:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:41:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:41:30 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:41:30 [manager.py:283] -DEBUG 06-24 20:41:30 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:41:30 [manager.py:284] -INFO 06-24 20:41:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:42:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:42:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:42:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:42:31 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:42:31 [manager.py:283] -DEBUG 06-24 20:42:31 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:42:31 [manager.py:284] -INFO 06-24 20:42:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:42:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:42:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:43:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:43:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:43:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:43:32 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:43:32 [manager.py:283] -DEBUG 06-24 20:43:32 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:43:32 [manager.py:284] -INFO 06-24 20:43:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:43:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:43:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:44:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:44:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:44:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:44:33 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:44:33 [manager.py:283] -DEBUG 06-24 20:44:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:44:33 [manager.py:284] -INFO 06-24 20:44:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:44:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:44:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:45:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:45:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:45:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:45:33 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:45:33 [manager.py:283] -DEBUG 06-24 20:45:33 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:45:33 [manager.py:284] -INFO 06-24 20:45:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:46:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:46:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:46:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:46:34 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:46:34 [manager.py:283] -DEBUG 06-24 20:46:34 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:46:34 [manager.py:284] -INFO 06-24 20:46:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:47:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:47:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:47:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:47:35 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:47:35 [manager.py:283] -DEBUG 06-24 20:47:35 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:47:35 [manager.py:284] -INFO 06-24 20:47:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:47:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:47:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:48:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:48:36 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:48:36 [manager.py:283] -DEBUG 06-24 20:48:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:48:36 [manager.py:284] -INFO 06-24 20:48:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:48:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:48:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:49:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:49:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:49:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:49:36 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:49:36 [manager.py:283] -DEBUG 06-24 20:49:36 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:49:36 [manager.py:284] -INFO 06-24 20:49:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:49:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:49:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:50:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:50:37 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:50:37 [manager.py:283] -DEBUG 06-24 20:50:37 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:50:37 [manager.py:284] -INFO 06-24 20:50:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:50:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:50:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:51:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:51:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:51:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:51:38 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:51:38 [manager.py:283] -DEBUG 06-24 20:51:38 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:51:38 [manager.py:284] -INFO 06-24 20:51:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:51:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:51:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:52:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:52:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:52:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:52:39 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:52:39 [manager.py:283] -DEBUG 06-24 20:52:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:52:39 [manager.py:284] -INFO 06-24 20:52:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:53:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:53:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:53:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:53:39 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:53:39 [manager.py:283] -DEBUG 06-24 20:53:39 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:53:39 [manager.py:284] -INFO 06-24 20:53:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:54:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:54:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:54:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:54:40 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:54:40 [manager.py:283] -DEBUG 06-24 20:54:40 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:54:40 [manager.py:284] -INFO 06-24 20:54:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:54:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:54:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:55:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:55:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:55:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:55:41 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:55:41 [manager.py:283] -DEBUG 06-24 20:55:41 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:55:41 [manager.py:284] -INFO 06-24 20:55:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:55:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:55:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:56:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 20:56:42 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:56:42 [manager.py:283] -DEBUG 06-24 20:56:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:56:42 [manager.py:284] -INFO 06-24 20:56:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:56:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:56:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:57:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:57:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:57:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:57:42 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:57:42 [manager.py:283] -DEBUG 06-24 20:57:42 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:57:42 [manager.py:284] -INFO 06-24 20:57:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:58:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:58:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:58:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:58:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:58:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:58:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:58:43 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:58:43 [manager.py:283] -DEBUG 06-24 20:58:43 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:58:43 [manager.py:284] -INFO 06-24 20:59:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:59:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:59:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 20:59:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 20:59:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 20:59:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 20:59:44 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 20:59:44 [manager.py:283] -DEBUG 06-24 20:59:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 20:59:44 [manager.py:284] -INFO 06-24 21:00:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:00:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:00:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:00:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:00:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:00:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:00:44 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:00:44 [manager.py:283] -DEBUG 06-24 21:00:44 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:00:44 [manager.py:284] -INFO 06-24 21:01:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:01:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:01:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:01:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:01:45 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:01:45 [manager.py:283] -DEBUG 06-24 21:01:45 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:01:45 [manager.py:284] -INFO 06-24 21:02:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:02:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:02:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:02:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:02:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:02:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:02:46 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:02:46 [manager.py:283] -DEBUG 06-24 21:02:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:02:46 [manager.py:284] -INFO 06-24 21:03:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:03:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:03:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:03:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:03:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:03:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:03:46 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:03:46 [manager.py:283] -DEBUG 06-24 21:03:46 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:03:46 [manager.py:284] -INFO 06-24 21:04:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:04:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:04:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:04:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:04:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:04:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:04:47 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:04:47 [manager.py:283] -DEBUG 06-24 21:04:47 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:04:47 [manager.py:284] -INFO 06-24 21:05:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:05:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:05:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:05:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:05:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:05:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:05:48 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:05:48 [manager.py:283] -DEBUG 06-24 21:05:48 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:05:48 [manager.py:284] -INFO 06-24 21:06:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:06:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:06:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:06:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:06:49 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:06:49 [manager.py:283] -DEBUG 06-24 21:06:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:06:49 [manager.py:284] -INFO 06-24 21:07:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:07:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:07:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:07:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:07:49 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:07:49 [manager.py:283] -DEBUG 06-24 21:07:49 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:07:49 [manager.py:284] -INFO 06-24 21:08:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:08:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:08:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:08:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:08:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:08:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:08:50 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:08:50 [manager.py:283] -DEBUG 06-24 21:08:50 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:08:50 [manager.py:284] -INFO 06-24 21:09:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:09:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:09:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:09:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:09:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:09:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:09:51 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:09:51 [manager.py:283] -DEBUG 06-24 21:09:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:09:51 [manager.py:284] -INFO 06-24 21:10:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:10:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:10:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:10:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:10:51 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:10:51 [manager.py:283] -DEBUG 06-24 21:10:51 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:10:51 [manager.py:284] -INFO 06-24 21:11:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:11:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:11:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:11:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:11:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:11:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:11:52 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:11:52 [manager.py:283] -DEBUG 06-24 21:11:52 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:11:52 [manager.py:284] -INFO 06-24 21:12:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:12:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:12:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:12:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:12:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:12:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:12:53 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:12:53 [manager.py:283] -DEBUG 06-24 21:12:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:12:53 [manager.py:284] -INFO 06-24 21:13:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:13:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:13:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:13:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:13:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:13:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:13:53 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:13:53 [manager.py:283] -DEBUG 06-24 21:13:53 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:13:53 [manager.py:284] -INFO 06-24 21:14:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:14:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:14:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:14:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:14:54 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:14:54 [manager.py:283] -DEBUG 06-24 21:14:54 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:14:54 [manager.py:284] -INFO 06-24 21:15:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:15:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:15:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:15:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:15:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:15:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:15:55 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:15:55 [manager.py:283] -DEBUG 06-24 21:15:55 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:15:55 [manager.py:284] -INFO 06-24 21:16:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:16:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:16:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:16:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:16:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:16:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:16:56 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:16:56 [manager.py:283] -DEBUG 06-24 21:16:56 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:16:56 [manager.py:284] -INFO 06-24 21:17:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:17:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:17:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:17:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:17:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:17:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:17:56 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:17:56 [manager.py:283] -DEBUG 06-24 21:17:56 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:17:56 [manager.py:284] -INFO 06-24 21:18:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:18:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:18:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:18:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:18:57 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:18:57 [manager.py:283] -DEBUG 06-24 21:18:57 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:18:57 [manager.py:284] -INFO 06-24 21:19:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:19:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:19:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:19:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:19:58 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:19:58 [manager.py:283] -DEBUG 06-24 21:19:58 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:19:58 [manager.py:284] -INFO 06-24 21:20:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:20:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:20:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:20:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:20:59 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:20:59 [manager.py:283] -DEBUG 06-24 21:20:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:20:59 [manager.py:284] -INFO 06-24 21:21:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:21:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:21:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:21:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:21:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:21:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:21:59 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:21:59 [manager.py:283] -DEBUG 06-24 21:21:59 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:21:59 [manager.py:284] -INFO 06-24 21:22:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:22:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:22:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:22:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:23:00 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:23:00 [manager.py:283] -DEBUG 06-24 21:23:00 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:23:00 [manager.py:284] -INFO 06-24 21:23:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:23:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:23:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:23:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:23:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:23:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:24:01 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:24:01 [manager.py:283] -DEBUG 06-24 21:24:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:24:01 [manager.py:284] -INFO 06-24 21:24:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:24:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:24:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:24:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:24:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:24:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:25:01 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:25:01 [manager.py:283] -DEBUG 06-24 21:25:01 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:25:01 [manager.py:284] -INFO 06-24 21:25:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:25:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:25:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:25:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:26:02 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:26:02 [manager.py:283] -DEBUG 06-24 21:26:02 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:26:02 [manager.py:284] -INFO 06-24 21:26:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:26:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:26:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:26:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:26:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:26:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:27:03 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:27:03 [manager.py:283] -DEBUG 06-24 21:27:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:27:03 [manager.py:284] -INFO 06-24 21:27:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:27:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:27:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:27:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:27:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:27:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:28:03 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:28:03 [manager.py:283] -DEBUG 06-24 21:28:03 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:28:03 [manager.py:284] -INFO 06-24 21:28:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:28:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:28:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:28:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:29:04 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:29:04 [manager.py:283] -DEBUG 06-24 21:29:04 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:29:04 [manager.py:284] -INFO 06-24 21:29:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:29:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:29:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:29:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:30:05 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:30:05 [manager.py:283] -DEBUG 06-24 21:30:05 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:30:05 [manager.py:284] -INFO 06-24 21:30:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:30:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:30:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:30:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:31:06 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:31:06 [manager.py:283] -DEBUG 06-24 21:31:06 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:31:06 [manager.py:284] -INFO 06-24 21:31:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:31:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:31:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:31:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:31:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:31:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:32:07 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:32:07 [manager.py:283] -DEBUG 06-24 21:32:07 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:32:07 [manager.py:284] -INFO 06-24 21:32:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:32:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:32:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:32:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:32:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:32:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:33:08 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:33:08 [manager.py:283] -DEBUG 06-24 21:33:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:33:08 [manager.py:284] -INFO 06-24 21:33:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:33:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:33:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:33:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:34:08 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:34:08 [manager.py:283] -DEBUG 06-24 21:34:08 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:34:08 [manager.py:284] -INFO 06-24 21:34:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:34:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:34:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:34:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:34:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:34:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:35:09 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:35:09 [manager.py:283] -DEBUG 06-24 21:35:09 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:35:09 [manager.py:284] -INFO 06-24 21:35:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:35:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:35:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:35:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:35:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:35:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:36:10 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:36:10 [manager.py:283] -DEBUG 06-24 21:36:10 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:36:10 [manager.py:284] -INFO 06-24 21:36:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:36:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:36:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:36:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:36:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:36:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:37:11 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:37:11 [manager.py:283] -DEBUG 06-24 21:37:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:37:11 [manager.py:284] -INFO 06-24 21:37:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:37:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:37:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:37:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:37:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:37:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:38:11 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:38:11 [manager.py:283] -DEBUG 06-24 21:38:11 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:38:11 [manager.py:284] -INFO 06-24 21:38:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:38:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:38:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:38:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:38:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:38:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:39:12 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:39:12 [manager.py:283] -DEBUG 06-24 21:39:12 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:39:12 [manager.py:284] -INFO 06-24 21:39:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:39:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:39:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:39:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:39:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:39:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:40:13 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:40:13 [manager.py:283] -DEBUG 06-24 21:40:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:40:13 [manager.py:284] -INFO 06-24 21:40:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:40:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:40:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:40:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:41:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:41:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:41:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:41:13 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:41:13 [manager.py:283] -DEBUG 06-24 21:41:13 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:41:13 [manager.py:284] -INFO 06-24 21:41:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:41:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:41:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:42:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:42:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:42:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:42:14 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:42:14 [manager.py:283] -DEBUG 06-24 21:42:14 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:42:14 [manager.py:284] -INFO 06-24 21:42:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:43:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:43:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:43:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:43:15 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:43:15 [manager.py:283] -DEBUG 06-24 21:43:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:43:15 [manager.py:284] -INFO 06-24 21:43:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:43:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:43:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:44:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:44:15 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:44:15 [manager.py:283] -DEBUG 06-24 21:44:15 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:44:15 [manager.py:284] -INFO 06-24 21:44:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:44:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:44:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:45:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -DEBUG 06-24 21:45:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:45:16 [manager.py:283] -DEBUG 06-24 21:45:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:45:16 [manager.py:284] -INFO 06-24 21:45:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:45:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:45:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:46:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:46:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:46:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:46:16 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:46:16 [manager.py:283] -DEBUG 06-24 21:46:16 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:46:16 [manager.py:284] -INFO 06-24 21:46:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:47:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:47:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:47:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:47:17 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:47:17 [manager.py:283] -DEBUG 06-24 21:47:17 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:47:17 [manager.py:284] -INFO 06-24 21:47:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:47:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:47:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:48:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:48:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:48:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:48:18 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:48:18 [manager.py:283] -DEBUG 06-24 21:48:18 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:48:18 [manager.py:284] -INFO 06-24 21:48:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:48:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:48:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:49:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:49:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:49:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:49:19 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:49:19 [manager.py:283] -DEBUG 06-24 21:49:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:49:19 [manager.py:284] -INFO 06-24 21:49:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:49:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:49:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:50:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:50:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:50:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:50:19 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:50:19 [manager.py:283] -DEBUG 06-24 21:50:19 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:50:19 [manager.py:284] -INFO 06-24 21:50:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:51:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:51:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:51:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:51:20 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:51:20 [manager.py:283] -DEBUG 06-24 21:51:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:51:20 [manager.py:284] -INFO 06-24 21:51:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:51:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:51:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:52:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:52:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:52:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:52:20 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:52:20 [manager.py:283] -DEBUG 06-24 21:52:20 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:52:20 [manager.py:284] -INFO 06-24 21:52:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:52:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:52:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:53:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:53:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:53:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -DEBUG 06-24 21:53:21 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:53:21 [manager.py:283] -DEBUG 06-24 21:53:21 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:53:21 [manager.py:284] -INFO 06-24 21:53:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:53:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:53:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:54:13 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:54:13 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:54:13 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -ERROR 06-24 21:54:19 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:19 [pd_loop.py:121] no close frame received or sent -ERROR 06-24 21:54:19 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 99, in _pd_handle_task -ERROR 06-24 21:54:19 [pd_loop.py:121] recv_bytes = await websocket.recv() -ERROR 06-24 21:54:19 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/connection.py", line 322, in recv -ERROR 06-24 21:54:19 [pd_loop.py:121] raise self.protocol.close_exc from self.recv_exc -ERROR 06-24 21:54:19 [pd_loop.py:121] websockets.exceptions.ConnectionClosedError: no close frame received or sent -DEBUG 06-24 21:54:22 [manager.py:283] dp_i 0 frozen token num: 0 -DEBUG 06-24 21:54:22 [manager.py:283] -DEBUG 06-24 21:54:22 [manager.py:284] dp_i 0 estimated_peak_token_count: 0 -DEBUG 06-24 21:54:22 [manager.py:284] -INFO 06-24 21:54:29 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:29 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:29 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:29 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:29 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:29 [pd_loop.py:121] return await self -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:29 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:29 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:29 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:29 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:29 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:54:39 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:39 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:39 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:39 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:39 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:39 [pd_loop.py:121] return await self -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:39 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:39 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:39 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:39 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:39 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:54:43 [pd_loop.py:42] get pd_master_objs {0: PD_Master_Obj(node_id=0, host_ip_port='127.0.1.1:60011')} -INFO 06-24 21:54:43 [statics_utils.py:24] mean first cost: 7966.335476999698 ms -INFO 06-24 21:54:43 [statics_utils.py:24] mean per token cost: 0.02879059833029042 ms -INFO 06-24 21:54:49 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:49 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:49 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:49 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:49 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:49 [pd_loop.py:121] return await self -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:49 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:49 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:49 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:49 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:49 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:54:59 [pd_loop.py:126] reconnection to pd_master -ERROR 06-24 21:54:59 [pd_loop.py:120] connetion to pd_master has error -ERROR 06-24 21:54:59 [pd_loop.py:121] [Errno 111] Connection refused -ERROR 06-24 21:54:59 [pd_loop.py:121] Traceback (most recent call last): -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/bzh/lightllm-pd/lightllm/server/httpserver/pd_loop.py", line 74, in _pd_handle_task -ERROR 06-24 21:54:59 [pd_loop.py:121] async with websockets.connect( -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 587, in __aenter__ -ERROR 06-24 21:54:59 [pd_loop.py:121] return await self -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 541, in __await_impl__ -ERROR 06-24 21:54:59 [pd_loop.py:121] self.connection = await self.create_connection() -ERROR 06-24 21:54:59 [pd_loop.py:121] File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/site-packages/websockets/asyncio/client.py", line 467, in create_connection -ERROR 06-24 21:54:59 [pd_loop.py:121] _, connection = await loop.create_connection(factory, **kwargs) -ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2039, in create_connection -ERROR 06-24 21:54:59 [pd_loop.py:121] File "uvloop/loop.pyx", line 2016, in uvloop.loop.Loop.create_connection -ERROR 06-24 21:54:59 [pd_loop.py:121] ConnectionRefusedError: [Errno 111] Connection refused -INFO 06-24 21:55:09 [api_start.py:25] Received SIGINT (Ctrl+C), forcing immediate exit... - -ERROR 06-24 21:55:09 [prefill_kv_move_manager.py:96] -Traceback (most recent call last): - File "/home/youwei/bzh/lightllm-pd/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_kv_move_manager.py", line 85, in task_dispatcher_loop - move_task: KVMoveTask = self.info_queue.get() - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/queues.py", line 103, in get - res = self._recv_bytes() - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 216, in recv_bytes - buf = self._recv_bytes(maxlength) - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes - buf = self._recv(4) - File "/home/youwei/anaconda3/envs/lightllm_pd/lib/python3.9/multiprocessing/connection.py", line 379, in _recv - chunk = read(handle, remaining) -KeyboardInterrupt -INFO 06-24 21:55:09 [start_utils.py:106] Killing child process 1213613 -INFO 06-24 21:55:09 [start_utils.py:106] Killing child process 1214162 -INFO 06-24 21:55:09 [start_utils.py:108] Killing parent process 1213612 -INFO 06-24 21:55:09 [start_utils.py:53] Killing parent process 1211407 -INFO 06-24 21:55:09 [start_utils.py:51] Killing child process 1212910 -INFO 06-24 21:55:09 [start_utils.py:51] Killing child process 1213222 -INFO 06-24 21:55:09 [start_utils.py:53] Killing parent process 1211508 -INFO 06-24 21:55:09 [start_utils.py:53] Killing parent process 1211509 -INFO 06-24 21:55:09 [start_utils.py:69] All processes terminated gracefully. -INFO 06-24 21:55:09 [api_start.py:30] All processes have been forcefully terminated. From 2387b2d390e423887adbd2a0b70fb07e7f9f838c Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Wed, 25 Jun 2025 11:24:04 +0800 Subject: [PATCH 10/17] fix: remove unused code --- lightllm/server/httpserver_for_pd_master/manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index 295b52d5a..d737a07d2 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -300,8 +300,6 @@ async def _wait_to_token_package( unfinished_count = sampling_params.best_of is_first_token = True - max_new_tokens = sampling_params.max_new_tokens - async for sub_req_id, out_str, metadata, finish_status in self.fetch_stream( p_node, d_node, prompt, sampling_params, multimodal_params, request ): From d62544c5d5837128ffebbbc95128a0b405122751 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Wed, 25 Jun 2025 11:47:43 +0800 Subject: [PATCH 11/17] feat: rename server arg --- lightllm/server/api_cli.py | 12 ++++++------ lightllm/server/httpserver_for_pd_master/manager.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py index 293958f5b..9fcecd81f 100644 --- a/lightllm/server/api_cli.py +++ b/lightllm/server/api_cli.py @@ -60,6 +60,12 @@ def make_argument_parser() -> argparse.ArgumentParser: default="default_model_name", help="just help to distinguish internal model name, use 'host:port/get_model_name' to get", ) + parser.add_argument( + "--chunked_max_new_token", + type=int, + default=0, + help="""Specifies the chunk size for pd mode.""", + ) parser.add_argument( "--model_dir", @@ -434,10 +440,4 @@ def make_argument_parser() -> argparse.ArgumentParser: but ensure that the model is compatible with the specified step count. currently, deepseekv3 model only support 1 step""", ) - parser.add_argument( - "--pd_chunk_size", - type=int, - default=0, - help="""Specifies the chunk size for pd mode.""", - ) return parser diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index d737a07d2..d4a48685e 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -240,11 +240,11 @@ async def fetch_stream( sampling_params.suggested_dp_index = up_status_event.upkv_status.dp_index remaining_tokens = old_max_new_tokens - 1 - pd_chunk_size = self.args.pd_chunk_size + chunked_max_new_token = self.args.chunked_max_new_token current_prompt_ids = list(prompt_ids) while remaining_tokens > 0: - chunk_size = min(remaining_tokens, pd_chunk_size) if pd_chunk_size > 0 else remaining_tokens + chunk_size = min(remaining_tokens, chunked_max_new_token) if chunked_max_new_token > 0 else remaining_tokens sampling_params.max_new_tokens = chunk_size await d_node.websocket.send_bytes( pickle.dumps((ObjType.REQ, (current_prompt_ids, sampling_params, multimodal_params))) From 719993e5d126221d23bfd83e5fb074885ffd0fa6 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Wed, 25 Jun 2025 11:50:21 +0800 Subject: [PATCH 12/17] feat: update doc --- docs/CN/source/tutorial/api_server_args_zh.rst | 5 +++++ docs/EN/source/tutorial/api_server_args_zh.rst | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/docs/CN/source/tutorial/api_server_args_zh.rst b/docs/CN/source/tutorial/api_server_args_zh.rst index d7c055ef4..7e87b9c62 100755 --- a/docs/CN/source/tutorial/api_server_args_zh.rst +++ b/docs/CN/source/tutorial/api_server_args_zh.rst @@ -64,6 +64,11 @@ PD 分离模式参数 配置服务器模式下的端口号 + +.. option:: --chunked_max_new_token + + 分块解码最大 token 数量,默认为 ``0`` ,代表不使用分块解码 + 模型配置参数 ----------- diff --git a/docs/EN/source/tutorial/api_server_args_zh.rst b/docs/EN/source/tutorial/api_server_args_zh.rst index 3b25ae85c..d7350485b 100755 --- a/docs/EN/source/tutorial/api_server_args_zh.rst +++ b/docs/EN/source/tutorial/api_server_args_zh.rst @@ -64,6 +64,10 @@ PD disaggregation Mode Parameters Port number in configuration server mode +.. option:: --chunked_max_new_token + + Maximum token number for chunked decoding, default is ``0``, representing no chunked decoding + Model Configuration Parameters ----------------------------- From 888937cc3727171171ceccfa0c07ddf7c3015ad8 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 1 Jul 2025 17:03:26 +0800 Subject: [PATCH 13/17] feat: add decode fail retry --- .../httpserver_for_pd_master/manager.py | 54 +++++++++++++------ lightllm/utils/error_utils.py | 5 ++ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index d4a48685e..862c139a2 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -24,7 +24,7 @@ from lightllm.server.metrics.manager import MetricClient from lightllm.utils.statics_utils import MovingAverage from lightllm.server.httpserver.manager import AsyncQueue -from lightllm.utils.error_utils import ServerBusyError +from lightllm.utils.error_utils import ServerBusyError, KVMoveTimeoutError logger = init_logger(__name__) @@ -123,6 +123,9 @@ async def generate( ): start_time = time.time() group_request_id = self.id_gen.generate_id() + max_retries = 3 + retry_count = 0 + try: sampling_params.group_request_id = group_request_id # 记录请求到达的相关信息 @@ -131,22 +134,41 @@ async def generate( self.metric_client.counter_inc("lightllm_request_count") self.metric_client.histogram_observe("lightllm_request_max_new_tokens", sampling_params.max_new_tokens) - p_node, d_node = await self.select_p_d_node(prompt, sampling_params, multimodal_params) - - results_generator = self._wait_to_token_package( - p_node, - d_node, - start_time, - prompt, - sampling_params, - multimodal_params, - request, - ) - async for sub_req_id, request_output, metadata, finish_status in results_generator: - yield sub_req_id, request_output, metadata, finish_status + while retry_count <= max_retries: + try: + p_node, d_node = await self.select_p_d_node(prompt, sampling_params, multimodal_params) + + results_generator = self._wait_to_token_package( + p_node, + d_node, + start_time, + prompt, + sampling_params, + multimodal_params, + request, + ) + async for sub_req_id, request_output, metadata, finish_status in results_generator: + yield sub_req_id, request_output, metadata, finish_status + + break + + except KVMoveTimeoutError as e: + retry_count += 1 + if retry_count <= max_retries: + logger.warning(f"KV move timeout for group_request_id {group_request_id}, attempt {retry_count}/{max_retries + 1}. Retrying with new nodes...") + # 清理当前请求状态,准备重试 + await self.abort(group_request_id) + # 重新生成group_request_id避免冲突 + group_request_id = self.id_gen.generate_id() + sampling_params.group_request_id = group_request_id + continue + else: + logger.error(f"KV move timeout after {max_retries + 1} attempts for group_request_id {group_request_id}. Giving up.") + raise ServerBusyError(f"KV move timeout after {max_retries + 1} attempts, server is busy now.") except BaseException as e: - logger.error(f"has exception {str(e)}") + if not isinstance(e, KVMoveTimeoutError): + logger.error(f"has exception {str(e)}") await self.abort(group_request_id) raise e @@ -234,7 +256,7 @@ async def fetch_stream( await asyncio.wait_for(up_status_event.wait(), timeout=60) except asyncio.TimeoutError: logger.warning(f"group_request_id: {group_request_id} kv move time out err, server is busy now.") - raise ServerBusyError() + raise KVMoveTimeoutError(f"KV move timeout for group_request_id {group_request_id}") sampling_params.move_kv_to_decode_node.initialize(None) sampling_params.suggested_dp_index = up_status_event.upkv_status.dp_index diff --git a/lightllm/utils/error_utils.py b/lightllm/utils/error_utils.py index 4424fc17d..d1ffe2b7c 100644 --- a/lightllm/utils/error_utils.py +++ b/lightllm/utils/error_utils.py @@ -16,3 +16,8 @@ def __init__(self, message="Server is busy, please try again later", status_code def __str__(self): """String representation of the error""" return f"{self.message} (Status code: {self.status_code})" + +class KVMoveTimeoutError(ServerBusyError): + """KV移动超时错误,用于触发重试机制""" + def __init__(self, message="KV move timeout, please try again later", status_code=503): + super().__init__(message, status_code) \ No newline at end of file From 576b82bf138e7a32a9684e7f4c0014a7b9e4918b Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Mon, 7 Jul 2025 20:40:52 +0800 Subject: [PATCH 14/17] feat: add decode kv cache transfer back --- .../CN/source/tutorial/api_server_args_zh.rst | 4 ++ .../EN/source/tutorial/api_server_args_zh.rst | 4 ++ lightllm/server/api_cli.py | 6 ++ .../httpserver_for_pd_master/manager.py | 56 ++++++++++++++++++- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/docs/CN/source/tutorial/api_server_args_zh.rst b/docs/CN/source/tutorial/api_server_args_zh.rst index 7e87b9c62..5d8971a84 100755 --- a/docs/CN/source/tutorial/api_server_args_zh.rst +++ b/docs/CN/source/tutorial/api_server_args_zh.rst @@ -69,6 +69,10 @@ PD 分离模式参数 分块解码最大 token 数量,默认为 ``0`` ,代表不使用分块解码 +.. option:: --pd_max_retry_count + + PD 模式下 kv 传输失败的最大重试次数,默认为 ``3`` + 模型配置参数 ----------- diff --git a/docs/EN/source/tutorial/api_server_args_zh.rst b/docs/EN/source/tutorial/api_server_args_zh.rst index d7350485b..694e1b88c 100755 --- a/docs/EN/source/tutorial/api_server_args_zh.rst +++ b/docs/EN/source/tutorial/api_server_args_zh.rst @@ -68,6 +68,10 @@ PD disaggregation Mode Parameters Maximum token number for chunked decoding, default is ``0``, representing no chunked decoding +.. option:: --pd_max_retry_count + + Maximum retry count for kv transmission in PD mode, default is ``3`` + Model Configuration Parameters ----------------------------- diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py index a2d92cb15..5124d182e 100644 --- a/lightllm/server/api_cli.py +++ b/lightllm/server/api_cli.py @@ -66,6 +66,12 @@ def make_argument_parser() -> argparse.ArgumentParser: default=0, help="""Specifies the chunk size for pd mode.""", ) + parser.add_argument( + "--pd_max_retry_count", + type=int, + default=3, + help="""Specifies the max retry count for pd mode.""", + ) parser.add_argument( "--model_dir", diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index 862c139a2..ca2550198 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -123,7 +123,7 @@ async def generate( ): start_time = time.time() group_request_id = self.id_gen.generate_id() - max_retries = 3 + max_retries = self.args.max_retries retry_count = 0 try: @@ -268,6 +268,27 @@ async def fetch_stream( while remaining_tokens > 0: chunk_size = min(remaining_tokens, chunked_max_new_token) if chunked_max_new_token > 0 else remaining_tokens sampling_params.max_new_tokens = chunk_size + + # 如果不是第一个chunk,需要重新将KV Cache从prefill发送到decode节点 + if remaining_tokens < old_max_new_tokens - 1: + # 重新设置KV Cache迁移参数,将KV Cache从prefill发送到decode节点 + sampling_params.move_kv_to_decode_node.initialize(decode_node_dict) + sampling_params.suggested_dp_index = -1 + + # 创建新的迁移事件并等待KV Cache迁移完成 + up_status_event = req_status.up_status_event + up_status_event.clear() + + try: + await asyncio.wait_for(up_status_event.wait(), timeout=60) + except asyncio.TimeoutError: + logger.warning(f"group_request_id: {group_request_id} kv move time out err, server is busy now.") + raise KVMoveTimeoutError(f"KV move timeout for group_request_id {group_request_id}") + + # 迁移完成后,重置参数 + sampling_params.move_kv_to_decode_node.initialize(None) + sampling_params.suggested_dp_index = up_status_event.upkv_status.dp_index + await d_node.websocket.send_bytes( pickle.dumps((ObjType.REQ, (current_prompt_ids, sampling_params, multimodal_params))) ) @@ -304,6 +325,39 @@ async def fetch_stream( if final_finish_status.is_finished(): break + # 如果不是最后一个chunk,需要将KV Cache从decode节点发送回prefill节点 + if remaining_tokens > 0: + p_start_args = p_node.start_args + prefill_node_dict = { + "node_id": p_start_args["pd_node_id"], + "ip": p_start_args["host"], + "rpyc_port": p_start_args.get("pd_prefill_rpyc_port", p_start_args["rpyc_port"]), + "max_new_tokens": 0, # 表示这是回传操作 + "pd_master_node_id": self.args.pd_node_id, + } + + # 使用一个特殊的请求将KV Cache发送回prefill节点 + sampling_params.move_kv_to_decode_node.initialize(prefill_node_dict) + sampling_params.suggested_dp_index = -1 + + # 向decode节点发送回传KV Cache的指令 + await d_node.websocket.send_bytes( + pickle.dumps((ObjType.REQ, (current_prompt_ids, sampling_params, multimodal_params))) + ) + + # 等待KV Cache回传完成 + up_status_event = req_status.up_status_event + up_status_event.clear() + + try: + await asyncio.wait_for(up_status_event.wait(), timeout=60) + except asyncio.TimeoutError: + logger.warning(f"group_request_id: {group_request_id} kv move back time out err, server is busy now.") + raise KVMoveTimeoutError(f"KV move back timeout for group_request_id {group_request_id}") + + # 回传完成后,重置参数 + sampling_params.move_kv_to_decode_node.initialize(None) + return async def _wait_to_token_package( From 037772175ad9a5544a0b204c1c27aef35fa31eb1 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Mon, 7 Jul 2025 21:26:51 +0800 Subject: [PATCH 15/17] feat: better status --- lightllm/server/httpserver_for_pd_master/manager.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index ca2550198..feda34d49 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -212,6 +212,7 @@ async def fetch_stream( self.req_id_to_out_inf[group_request_id] = req_status up_status_event = req_status.up_status_event + up_status_event.clear() d_start_args = d_node.start_args decode_node_dict = { @@ -331,21 +332,20 @@ async def fetch_stream( prefill_node_dict = { "node_id": p_start_args["pd_node_id"], "ip": p_start_args["host"], - "rpyc_port": p_start_args.get("pd_prefill_rpyc_port", p_start_args["rpyc_port"]), - "max_new_tokens": 0, # 表示这是回传操作 + "rpyc_port": d_start_args["pd_decode_rpyc_port"], + "max_new_tokens": 0, "pd_master_node_id": self.args.pd_node_id, } # 使用一个特殊的请求将KV Cache发送回prefill节点 + sampling_params.max_new_tokens = 0 sampling_params.move_kv_to_decode_node.initialize(prefill_node_dict) sampling_params.suggested_dp_index = -1 - # 向decode节点发送回传KV Cache的指令 - await d_node.websocket.send_bytes( + await p_node.websocket.send_bytes( pickle.dumps((ObjType.REQ, (current_prompt_ids, sampling_params, multimodal_params))) ) - # 等待KV Cache回传完成 up_status_event = req_status.up_status_event up_status_event.clear() From aba310bf06aac7ca4955d4156ba2d5389735c685 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Tue, 8 Jul 2025 15:59:50 +0800 Subject: [PATCH 16/17] fix: retry cli --- lightllm/server/httpserver_for_pd_master/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index feda34d49..06ae02eb6 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -123,7 +123,7 @@ async def generate( ): start_time = time.time() group_request_id = self.id_gen.generate_id() - max_retries = self.args.max_retries + max_retries = self.args.pd_max_retry_count retry_count = 0 try: From 641a5b35c1b9760a97fa0665ccd32eff03add8a9 Mon Sep 17 00:00:00 2001 From: pigKiller <22373017@buaa.edu.cn> Date: Wed, 9 Jul 2025 13:36:18 +0800 Subject: [PATCH 17/17] fix: pd send --- .../httpserver_for_pd_master/manager.py | 33 +++---------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py index 06ae02eb6..57765dbfa 100644 --- a/lightllm/server/httpserver_for_pd_master/manager.py +++ b/lightllm/server/httpserver_for_pd_master/manager.py @@ -155,7 +155,7 @@ async def generate( except KVMoveTimeoutError as e: retry_count += 1 if retry_count <= max_retries: - logger.warning(f"KV move timeout for group_request_id {group_request_id}, attempt {retry_count}/{max_retries + 1}. Retrying with new nodes...") + logger.warning(f"KV move timeout for group_request_id {group_request_id}, attempt {retry_count}/{max_retries}. Retrying with new nodes...") # 清理当前请求状态,准备重试 await self.abort(group_request_id) # 重新生成group_request_id避免冲突 @@ -270,26 +270,6 @@ async def fetch_stream( chunk_size = min(remaining_tokens, chunked_max_new_token) if chunked_max_new_token > 0 else remaining_tokens sampling_params.max_new_tokens = chunk_size - # 如果不是第一个chunk,需要重新将KV Cache从prefill发送到decode节点 - if remaining_tokens < old_max_new_tokens - 1: - # 重新设置KV Cache迁移参数,将KV Cache从prefill发送到decode节点 - sampling_params.move_kv_to_decode_node.initialize(decode_node_dict) - sampling_params.suggested_dp_index = -1 - - # 创建新的迁移事件并等待KV Cache迁移完成 - up_status_event = req_status.up_status_event - up_status_event.clear() - - try: - await asyncio.wait_for(up_status_event.wait(), timeout=60) - except asyncio.TimeoutError: - logger.warning(f"group_request_id: {group_request_id} kv move time out err, server is busy now.") - raise KVMoveTimeoutError(f"KV move timeout for group_request_id {group_request_id}") - - # 迁移完成后,重置参数 - sampling_params.move_kv_to_decode_node.initialize(None) - sampling_params.suggested_dp_index = up_status_event.upkv_status.dp_index - await d_node.websocket.send_bytes( pickle.dumps((ObjType.REQ, (current_prompt_ids, sampling_params, multimodal_params))) ) @@ -328,16 +308,17 @@ async def fetch_stream( # 如果不是最后一个chunk,需要将KV Cache从decode节点发送回prefill节点 if remaining_tokens > 0: + up_status_event = req_status.up_status_event + up_status_event.clear() p_start_args = p_node.start_args prefill_node_dict = { "node_id": p_start_args["pd_node_id"], "ip": p_start_args["host"], - "rpyc_port": d_start_args["pd_decode_rpyc_port"], + "rpyc_port": p_start_args["pd_decode_rpyc_port"], "max_new_tokens": 0, "pd_master_node_id": self.args.pd_node_id, } - # 使用一个特殊的请求将KV Cache发送回prefill节点 sampling_params.max_new_tokens = 0 sampling_params.move_kv_to_decode_node.initialize(prefill_node_dict) sampling_params.suggested_dp_index = -1 @@ -346,18 +327,12 @@ async def fetch_stream( pickle.dumps((ObjType.REQ, (current_prompt_ids, sampling_params, multimodal_params))) ) - up_status_event = req_status.up_status_event - up_status_event.clear() - try: await asyncio.wait_for(up_status_event.wait(), timeout=60) except asyncio.TimeoutError: logger.warning(f"group_request_id: {group_request_id} kv move back time out err, server is busy now.") raise KVMoveTimeoutError(f"KV move back timeout for group_request_id {group_request_id}") - # 回传完成后,重置参数 - sampling_params.move_kv_to_decode_node.initialize(None) - return async def _wait_to_token_package(